Commit 0ffa798d947f5f5e40690cc9d38e678080a34f87
Exists in
master
and in
7 other branches
Merge branches 'perf/powerpc' and 'perf/bench' into perf/core
Merge reason: Both 'perf bench' and the pending PowerPC changes are now ready for the next merge window. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 37 changed files Side-by-side Diff
- arch/powerpc/Kconfig.debug
- arch/powerpc/configs/pseries_defconfig
- arch/powerpc/include/asm/emulated_ops.h
- arch/powerpc/include/asm/hvcall.h
- arch/powerpc/include/asm/reg.h
- arch/powerpc/include/asm/trace.h
- arch/powerpc/kernel/align.c
- arch/powerpc/kernel/entry_64.S
- arch/powerpc/kernel/exceptions-64s.S
- arch/powerpc/kernel/irq.c
- arch/powerpc/kernel/perf_event.c
- arch/powerpc/kernel/power5+-pmu.c
- arch/powerpc/kernel/power5-pmu.c
- arch/powerpc/kernel/power6-pmu.c
- arch/powerpc/kernel/power7-pmu.c
- arch/powerpc/kernel/ppc970-pmu.c
- arch/powerpc/kernel/setup-common.c
- arch/powerpc/kernel/time.c
- arch/powerpc/kernel/traps.c
- arch/powerpc/lib/copypage_64.S
- arch/powerpc/platforms/pseries/hvCall.S
- arch/powerpc/platforms/pseries/hvCall_inst.c
- arch/powerpc/platforms/pseries/lpar.c
- include/linux/perf_counter.h
- include/linux/perf_event.h
- kernel/perf_event.c
- tools/perf/Documentation/perf-bench.txt
- tools/perf/Makefile
- tools/perf/bench/bench.h
- tools/perf/bench/sched-messaging.c
- tools/perf/bench/sched-pipe.c
- tools/perf/builtin-bench.c
- tools/perf/builtin.h
- tools/perf/command-list.txt
- tools/perf/design.txt
- tools/perf/perf.c
- tools/perf/util/parse-events.c
arch/powerpc/Kconfig.debug
... | ... | @@ -46,7 +46,7 @@ |
46 | 46 | |
47 | 47 | config HCALL_STATS |
48 | 48 | bool "Hypervisor call instrumentation" |
49 | - depends on PPC_PSERIES && DEBUG_FS | |
49 | + depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS | |
50 | 50 | help |
51 | 51 | Adds code to keep track of the number of hypervisor calls made and |
52 | 52 | the amount of time spent in hypervisor calls. Wall time spent in |
arch/powerpc/configs/pseries_defconfig
... | ... | @@ -1683,7 +1683,7 @@ |
1683 | 1683 | CONFIG_DEBUG_STACKOVERFLOW=y |
1684 | 1684 | # CONFIG_DEBUG_STACK_USAGE is not set |
1685 | 1685 | # CONFIG_DEBUG_PAGEALLOC is not set |
1686 | -CONFIG_HCALL_STATS=y | |
1686 | +# CONFIG_HCALL_STATS is not set | |
1687 | 1687 | # CONFIG_CODE_PATCHING_SELFTEST is not set |
1688 | 1688 | # CONFIG_FTR_FIXUP_SELFTEST is not set |
1689 | 1689 | # CONFIG_MSI_BITMAP_SELFTEST is not set |
arch/powerpc/include/asm/emulated_ops.h
... | ... | @@ -19,6 +19,7 @@ |
19 | 19 | #define _ASM_POWERPC_EMULATED_OPS_H |
20 | 20 | |
21 | 21 | #include <asm/atomic.h> |
22 | +#include <linux/perf_event.h> | |
22 | 23 | |
23 | 24 | |
24 | 25 | #ifdef CONFIG_PPC_EMULATED_STATS |
... | ... | @@ -57,7 +58,7 @@ |
57 | 58 | |
58 | 59 | extern void ppc_warn_emulated_print(const char *type); |
59 | 60 | |
60 | -#define PPC_WARN_EMULATED(type) \ | |
61 | +#define __PPC_WARN_EMULATED(type) \ | |
61 | 62 | do { \ |
62 | 63 | atomic_inc(&ppc_emulated.type.val); \ |
63 | 64 | if (ppc_warn_emulated) \ |
64 | 65 | |
... | ... | @@ -66,9 +67,23 @@ |
66 | 67 | |
67 | 68 | #else /* !CONFIG_PPC_EMULATED_STATS */ |
68 | 69 | |
69 | -#define PPC_WARN_EMULATED(type) do { } while (0) | |
70 | +#define __PPC_WARN_EMULATED(type) do { } while (0) | |
70 | 71 | |
71 | 72 | #endif /* !CONFIG_PPC_EMULATED_STATS */ |
73 | + | |
74 | +#define PPC_WARN_EMULATED(type, regs) \ | |
75 | + do { \ | |
76 | + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \ | |
77 | + 1, 0, regs, 0); \ | |
78 | + __PPC_WARN_EMULATED(type); \ | |
79 | + } while (0) | |
80 | + | |
81 | +#define PPC_WARN_ALIGNMENT(type, regs) \ | |
82 | + do { \ | |
83 | + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \ | |
84 | + 1, 0, regs, regs->dar); \ | |
85 | + __PPC_WARN_EMULATED(type); \ | |
86 | + } while (0) | |
72 | 87 | |
73 | 88 | #endif /* _ASM_POWERPC_EMULATED_OPS_H */ |
arch/powerpc/include/asm/hvcall.h
... | ... | @@ -274,6 +274,8 @@ |
274 | 274 | unsigned long num_calls; /* number of calls (on this CPU) */ |
275 | 275 | unsigned long tb_total; /* total wall time (mftb) of calls. */ |
276 | 276 | unsigned long purr_total; /* total cpu time (PURR) of calls. */ |
277 | + unsigned long tb_start; | |
278 | + unsigned long purr_start; | |
277 | 279 | }; |
278 | 280 | #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) |
279 | 281 |
arch/powerpc/include/asm/reg.h
... | ... | @@ -489,6 +489,8 @@ |
489 | 489 | #define SPRN_MMCR1 798 |
490 | 490 | #define SPRN_MMCRA 0x312 |
491 | 491 | #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ |
492 | +#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL | |
493 | +#define MMCRA_SDAR_ERAT_MISS 0x20000000UL | |
492 | 494 | #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ |
493 | 495 | #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ |
494 | 496 | #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ |
arch/powerpc/include/asm/trace.h
1 | +#undef TRACE_SYSTEM | |
2 | +#define TRACE_SYSTEM powerpc | |
3 | + | |
4 | +#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ) | |
5 | +#define _TRACE_POWERPC_H | |
6 | + | |
7 | +#include <linux/tracepoint.h> | |
8 | + | |
9 | +struct pt_regs; | |
10 | + | |
11 | +TRACE_EVENT(irq_entry, | |
12 | + | |
13 | + TP_PROTO(struct pt_regs *regs), | |
14 | + | |
15 | + TP_ARGS(regs), | |
16 | + | |
17 | + TP_STRUCT__entry( | |
18 | + __field(struct pt_regs *, regs) | |
19 | + ), | |
20 | + | |
21 | + TP_fast_assign( | |
22 | + __entry->regs = regs; | |
23 | + ), | |
24 | + | |
25 | + TP_printk("pt_regs=%p", __entry->regs) | |
26 | +); | |
27 | + | |
28 | +TRACE_EVENT(irq_exit, | |
29 | + | |
30 | + TP_PROTO(struct pt_regs *regs), | |
31 | + | |
32 | + TP_ARGS(regs), | |
33 | + | |
34 | + TP_STRUCT__entry( | |
35 | + __field(struct pt_regs *, regs) | |
36 | + ), | |
37 | + | |
38 | + TP_fast_assign( | |
39 | + __entry->regs = regs; | |
40 | + ), | |
41 | + | |
42 | + TP_printk("pt_regs=%p", __entry->regs) | |
43 | +); | |
44 | + | |
45 | +TRACE_EVENT(timer_interrupt_entry, | |
46 | + | |
47 | + TP_PROTO(struct pt_regs *regs), | |
48 | + | |
49 | + TP_ARGS(regs), | |
50 | + | |
51 | + TP_STRUCT__entry( | |
52 | + __field(struct pt_regs *, regs) | |
53 | + ), | |
54 | + | |
55 | + TP_fast_assign( | |
56 | + __entry->regs = regs; | |
57 | + ), | |
58 | + | |
59 | + TP_printk("pt_regs=%p", __entry->regs) | |
60 | +); | |
61 | + | |
62 | +TRACE_EVENT(timer_interrupt_exit, | |
63 | + | |
64 | + TP_PROTO(struct pt_regs *regs), | |
65 | + | |
66 | + TP_ARGS(regs), | |
67 | + | |
68 | + TP_STRUCT__entry( | |
69 | + __field(struct pt_regs *, regs) | |
70 | + ), | |
71 | + | |
72 | + TP_fast_assign( | |
73 | + __entry->regs = regs; | |
74 | + ), | |
75 | + | |
76 | + TP_printk("pt_regs=%p", __entry->regs) | |
77 | +); | |
78 | + | |
79 | +#ifdef CONFIG_PPC_PSERIES | |
80 | +extern void hcall_tracepoint_regfunc(void); | |
81 | +extern void hcall_tracepoint_unregfunc(void); | |
82 | + | |
83 | +TRACE_EVENT_FN(hcall_entry, | |
84 | + | |
85 | + TP_PROTO(unsigned long opcode, unsigned long *args), | |
86 | + | |
87 | + TP_ARGS(opcode, args), | |
88 | + | |
89 | + TP_STRUCT__entry( | |
90 | + __field(unsigned long, opcode) | |
91 | + ), | |
92 | + | |
93 | + TP_fast_assign( | |
94 | + __entry->opcode = opcode; | |
95 | + ), | |
96 | + | |
97 | + TP_printk("opcode=%lu", __entry->opcode), | |
98 | + | |
99 | + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc | |
100 | +); | |
101 | + | |
102 | +TRACE_EVENT_FN(hcall_exit, | |
103 | + | |
104 | + TP_PROTO(unsigned long opcode, unsigned long retval, | |
105 | + unsigned long *retbuf), | |
106 | + | |
107 | + TP_ARGS(opcode, retval, retbuf), | |
108 | + | |
109 | + TP_STRUCT__entry( | |
110 | + __field(unsigned long, opcode) | |
111 | + __field(unsigned long, retval) | |
112 | + ), | |
113 | + | |
114 | + TP_fast_assign( | |
115 | + __entry->opcode = opcode; | |
116 | + __entry->retval = retval; | |
117 | + ), | |
118 | + | |
119 | + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), | |
120 | + | |
121 | + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc | |
122 | +); | |
123 | +#endif | |
124 | + | |
125 | +#endif /* _TRACE_POWERPC_H */ | |
126 | + | |
127 | +#undef TRACE_INCLUDE_PATH | |
128 | +#undef TRACE_INCLUDE_FILE | |
129 | + | |
130 | +#define TRACE_INCLUDE_PATH asm | |
131 | +#define TRACE_INCLUDE_FILE trace | |
132 | + | |
133 | +#include <trace/define_trace.h> |
arch/powerpc/kernel/align.c
... | ... | @@ -732,7 +732,7 @@ |
732 | 732 | |
733 | 733 | #ifdef CONFIG_SPE |
734 | 734 | if ((instr >> 26) == 0x4) { |
735 | - PPC_WARN_EMULATED(spe); | |
735 | + PPC_WARN_ALIGNMENT(spe, regs); | |
736 | 736 | return emulate_spe(regs, reg, instr); |
737 | 737 | } |
738 | 738 | #endif |
... | ... | @@ -786,7 +786,7 @@ |
786 | 786 | flags |= SPLT; |
787 | 787 | nb = 8; |
788 | 788 | } |
789 | - PPC_WARN_EMULATED(vsx); | |
789 | + PPC_WARN_ALIGNMENT(vsx, regs); | |
790 | 790 | return emulate_vsx(addr, reg, areg, regs, flags, nb); |
791 | 791 | } |
792 | 792 | #endif |
... | ... | @@ -794,7 +794,7 @@ |
794 | 794 | * the exception of DCBZ which is handled as a special case here |
795 | 795 | */ |
796 | 796 | if (instr == DCBZ) { |
797 | - PPC_WARN_EMULATED(dcbz); | |
797 | + PPC_WARN_ALIGNMENT(dcbz, regs); | |
798 | 798 | return emulate_dcbz(regs, addr); |
799 | 799 | } |
800 | 800 | if (unlikely(nb == 0)) |
... | ... | @@ -804,7 +804,7 @@ |
804 | 804 | * function |
805 | 805 | */ |
806 | 806 | if (flags & M) { |
807 | - PPC_WARN_EMULATED(multiple); | |
807 | + PPC_WARN_ALIGNMENT(multiple, regs); | |
808 | 808 | return emulate_multiple(regs, addr, reg, nb, |
809 | 809 | flags, instr, swiz); |
810 | 810 | } |
811 | 811 | |
... | ... | @@ -825,11 +825,11 @@ |
825 | 825 | |
826 | 826 | /* Special case for 16-byte FP loads and stores */ |
827 | 827 | if (nb == 16) { |
828 | - PPC_WARN_EMULATED(fp_pair); | |
828 | + PPC_WARN_ALIGNMENT(fp_pair, regs); | |
829 | 829 | return emulate_fp_pair(addr, reg, flags); |
830 | 830 | } |
831 | 831 | |
832 | - PPC_WARN_EMULATED(unaligned); | |
832 | + PPC_WARN_ALIGNMENT(unaligned, regs); | |
833 | 833 | |
834 | 834 | /* If we are loading, get the data from user space, else |
835 | 835 | * get it from register values |
arch/powerpc/kernel/entry_64.S
... | ... | @@ -551,7 +551,7 @@ |
551 | 551 | BEGIN_FW_FTR_SECTION |
552 | 552 | ld r5,SOFTE(r1) |
553 | 553 | FW_FTR_SECTION_ELSE |
554 | - b iseries_check_pending_irqs | |
554 | + b .Liseries_check_pending_irqs | |
555 | 555 | ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) |
556 | 556 | 2: |
557 | 557 | TRACE_AND_RESTORE_IRQ(r5); |
... | ... | @@ -623,7 +623,7 @@ |
623 | 623 | |
624 | 624 | #endif /* CONFIG_PPC_BOOK3E */ |
625 | 625 | |
626 | -iseries_check_pending_irqs: | |
626 | +.Liseries_check_pending_irqs: | |
627 | 627 | #ifdef CONFIG_PPC_ISERIES |
628 | 628 | ld r5,SOFTE(r1) |
629 | 629 | cmpdi 0,r5,0 |
arch/powerpc/kernel/exceptions-64s.S
... | ... | @@ -185,12 +185,15 @@ |
185 | 185 | * prolog code of the PerformanceMonitor one. A little |
186 | 186 | * trickery is thus necessary |
187 | 187 | */ |
188 | +performance_monitor_pSeries_1: | |
188 | 189 | . = 0xf00 |
189 | 190 | b performance_monitor_pSeries |
190 | 191 | |
192 | +altivec_unavailable_pSeries_1: | |
191 | 193 | . = 0xf20 |
192 | 194 | b altivec_unavailable_pSeries |
193 | 195 | |
196 | +vsx_unavailable_pSeries_1: | |
194 | 197 | . = 0xf40 |
195 | 198 | b vsx_unavailable_pSeries |
196 | 199 |
arch/powerpc/kernel/irq.c
... | ... | @@ -70,6 +70,8 @@ |
70 | 70 | #include <asm/firmware.h> |
71 | 71 | #include <asm/lv1call.h> |
72 | 72 | #endif |
73 | +#define CREATE_TRACE_POINTS | |
74 | +#include <asm/trace.h> | |
73 | 75 | |
74 | 76 | int __irq_offset_value; |
75 | 77 | static int ppc_spurious_interrupts; |
... | ... | @@ -325,6 +327,8 @@ |
325 | 327 | struct pt_regs *old_regs = set_irq_regs(regs); |
326 | 328 | unsigned int irq; |
327 | 329 | |
330 | + trace_irq_entry(regs); | |
331 | + | |
328 | 332 | irq_enter(); |
329 | 333 | |
330 | 334 | check_stack_overflow(); |
... | ... | @@ -348,6 +352,8 @@ |
348 | 352 | timer_interrupt(regs); |
349 | 353 | } |
350 | 354 | #endif |
355 | + | |
356 | + trace_irq_exit(regs); | |
351 | 357 | } |
352 | 358 | |
353 | 359 | void __init init_IRQ(void) |
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/power5+-pmu.c
arch/powerpc/kernel/power5-pmu.c
... | ... | @@ -73,10 +73,6 @@ |
73 | 73 | #define MMCR1_PMCSEL_MSK 0x7f |
74 | 74 | |
75 | 75 | /* |
76 | - * Bits in MMCRA | |
77 | - */ | |
78 | - | |
79 | -/* | |
80 | 76 | * Layout of constraint bits: |
81 | 77 | * 6666555555555544444444443333333333222222222211111111110000000000 |
82 | 78 | * 3210987654321098765432109876543210987654321098765432109876543210 |
... | ... | @@ -390,7 +386,7 @@ |
390 | 386 | unsigned int hwc[], unsigned long mmcr[]) |
391 | 387 | { |
392 | 388 | unsigned long mmcr1 = 0; |
393 | - unsigned long mmcra = 0; | |
389 | + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | |
394 | 390 | unsigned int pmc, unit, byte, psel; |
395 | 391 | unsigned int ttm, grp; |
396 | 392 | int i, isbus, bit, grsel; |
arch/powerpc/kernel/power6-pmu.c
... | ... | @@ -178,7 +178,7 @@ |
178 | 178 | unsigned int hwc[], unsigned long mmcr[]) |
179 | 179 | { |
180 | 180 | unsigned long mmcr1 = 0; |
181 | - unsigned long mmcra = 0; | |
181 | + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | |
182 | 182 | int i; |
183 | 183 | unsigned int pmc, ev, b, u, s, psel; |
184 | 184 | unsigned int ttmset = 0; |
arch/powerpc/kernel/power7-pmu.c
... | ... | @@ -51,10 +51,6 @@ |
51 | 51 | #define MMCR1_PMCSEL_MSK 0xff |
52 | 52 | |
53 | 53 | /* |
54 | - * Bits in MMCRA | |
55 | - */ | |
56 | - | |
57 | -/* | |
58 | 54 | * Layout of constraint bits: |
59 | 55 | * 6666555555555544444444443333333333222222222211111111110000000000 |
60 | 56 | * 3210987654321098765432109876543210987654321098765432109876543210 |
... | ... | @@ -230,7 +226,7 @@ |
230 | 226 | unsigned int hwc[], unsigned long mmcr[]) |
231 | 227 | { |
232 | 228 | unsigned long mmcr1 = 0; |
233 | - unsigned long mmcra = 0; | |
229 | + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; | |
234 | 230 | unsigned int pmc, unit, combine, l2sel, psel; |
235 | 231 | unsigned int pmc_inuse = 0; |
236 | 232 | int i; |
arch/powerpc/kernel/ppc970-pmu.c
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/time.c
... | ... | @@ -54,6 +54,7 @@ |
54 | 54 | #include <linux/irq.h> |
55 | 55 | #include <linux/delay.h> |
56 | 56 | #include <linux/perf_event.h> |
57 | +#include <asm/trace.h> | |
57 | 58 | |
58 | 59 | #include <asm/io.h> |
59 | 60 | #include <asm/processor.h> |
... | ... | @@ -571,6 +572,8 @@ |
571 | 572 | struct clock_event_device *evt = &decrementer->event; |
572 | 573 | u64 now; |
573 | 574 | |
575 | + trace_timer_interrupt_entry(regs); | |
576 | + | |
574 | 577 | /* Ensure a positive value is written to the decrementer, or else |
575 | 578 | * some CPUs will continuue to take decrementer exceptions */ |
576 | 579 | set_dec(DECREMENTER_MAX); |
... | ... | @@ -590,6 +593,7 @@ |
590 | 593 | now = decrementer->next_tb - now; |
591 | 594 | if (now <= DECREMENTER_MAX) |
592 | 595 | set_dec((int)now); |
596 | + trace_timer_interrupt_exit(regs); | |
593 | 597 | return; |
594 | 598 | } |
595 | 599 | old_regs = set_irq_regs(regs); |
... | ... | @@ -620,6 +624,8 @@ |
620 | 624 | |
621 | 625 | irq_exit(); |
622 | 626 | set_irq_regs(old_regs); |
627 | + | |
628 | + trace_timer_interrupt_exit(regs); | |
623 | 629 | } |
624 | 630 | |
625 | 631 | void wakeup_decrementer(void) |
arch/powerpc/kernel/traps.c
... | ... | @@ -759,7 +759,7 @@ |
759 | 759 | |
760 | 760 | /* Emulate the mfspr rD, PVR. */ |
761 | 761 | if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { |
762 | - PPC_WARN_EMULATED(mfpvr); | |
762 | + PPC_WARN_EMULATED(mfpvr, regs); | |
763 | 763 | rd = (instword >> 21) & 0x1f; |
764 | 764 | regs->gpr[rd] = mfspr(SPRN_PVR); |
765 | 765 | return 0; |
... | ... | @@ -767,7 +767,7 @@ |
767 | 767 | |
768 | 768 | /* Emulating the dcba insn is just a no-op. */ |
769 | 769 | if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) { |
770 | - PPC_WARN_EMULATED(dcba); | |
770 | + PPC_WARN_EMULATED(dcba, regs); | |
771 | 771 | return 0; |
772 | 772 | } |
773 | 773 | |
... | ... | @@ -776,7 +776,7 @@ |
776 | 776 | int shift = (instword >> 21) & 0x1c; |
777 | 777 | unsigned long msk = 0xf0000000UL >> shift; |
778 | 778 | |
779 | - PPC_WARN_EMULATED(mcrxr); | |
779 | + PPC_WARN_EMULATED(mcrxr, regs); | |
780 | 780 | regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); |
781 | 781 | regs->xer &= ~0xf0000000UL; |
782 | 782 | return 0; |
783 | 783 | |
784 | 784 | |
... | ... | @@ -784,19 +784,19 @@ |
784 | 784 | |
785 | 785 | /* Emulate load/store string insn. */ |
786 | 786 | if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { |
787 | - PPC_WARN_EMULATED(string); | |
787 | + PPC_WARN_EMULATED(string, regs); | |
788 | 788 | return emulate_string_inst(regs, instword); |
789 | 789 | } |
790 | 790 | |
791 | 791 | /* Emulate the popcntb (Population Count Bytes) instruction. */ |
792 | 792 | if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { |
793 | - PPC_WARN_EMULATED(popcntb); | |
793 | + PPC_WARN_EMULATED(popcntb, regs); | |
794 | 794 | return emulate_popcntb_inst(regs, instword); |
795 | 795 | } |
796 | 796 | |
797 | 797 | /* Emulate isel (Integer Select) instruction */ |
798 | 798 | if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { |
799 | - PPC_WARN_EMULATED(isel); | |
799 | + PPC_WARN_EMULATED(isel, regs); | |
800 | 800 | return emulate_isel(regs, instword); |
801 | 801 | } |
802 | 802 | |
... | ... | @@ -995,7 +995,7 @@ |
995 | 995 | #ifdef CONFIG_MATH_EMULATION |
996 | 996 | errcode = do_mathemu(regs); |
997 | 997 | if (errcode >= 0) |
998 | - PPC_WARN_EMULATED(math); | |
998 | + PPC_WARN_EMULATED(math, regs); | |
999 | 999 | |
1000 | 1000 | switch (errcode) { |
1001 | 1001 | case 0: |
... | ... | @@ -1018,7 +1018,7 @@ |
1018 | 1018 | #elif defined(CONFIG_8XX_MINIMAL_FPEMU) |
1019 | 1019 | errcode = Soft_emulate_8xx(regs); |
1020 | 1020 | if (errcode >= 0) |
1021 | - PPC_WARN_EMULATED(8xx); | |
1021 | + PPC_WARN_EMULATED(8xx, regs); | |
1022 | 1022 | |
1023 | 1023 | switch (errcode) { |
1024 | 1024 | case 0: |
... | ... | @@ -1129,7 +1129,7 @@ |
1129 | 1129 | |
1130 | 1130 | flush_altivec_to_thread(current); |
1131 | 1131 | |
1132 | - PPC_WARN_EMULATED(altivec); | |
1132 | + PPC_WARN_EMULATED(altivec, regs); | |
1133 | 1133 | err = emulate_altivec(regs); |
1134 | 1134 | if (err == 0) { |
1135 | 1135 | regs->nip += 4; /* skip emulated instruction */ |
arch/powerpc/lib/copypage_64.S
... | ... | @@ -26,11 +26,11 @@ |
26 | 26 | srd r8,r5,r11 |
27 | 27 | |
28 | 28 | mtctr r8 |
29 | -setup: | |
29 | +.Lsetup: | |
30 | 30 | dcbt r9,r4 |
31 | 31 | dcbz r9,r3 |
32 | 32 | add r9,r9,r12 |
33 | - bdnz setup | |
33 | + bdnz .Lsetup | |
34 | 34 | END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) |
35 | 35 | addi r3,r3,-8 |
36 | 36 | srdi r8,r5,7 /* page is copied in 128 byte strides */ |
arch/powerpc/platforms/pseries/hvCall.S
... | ... | @@ -14,68 +14,94 @@ |
14 | 14 | |
15 | 15 | #define STK_PARM(i) (48 + ((i)-3)*8) |
16 | 16 | |
17 | -#ifdef CONFIG_HCALL_STATS | |
17 | +#ifdef CONFIG_TRACEPOINTS | |
18 | + | |
19 | + .section ".toc","aw" | |
20 | + | |
21 | + .globl hcall_tracepoint_refcount | |
22 | +hcall_tracepoint_refcount: | |
23 | + .llong 0 | |
24 | + | |
25 | + .section ".text" | |
26 | + | |
18 | 27 | /* |
19 | 28 | * precall must preserve all registers. use unused STK_PARM() |
20 | - * areas to save snapshots and opcode. | |
29 | + * areas to save snapshots and opcode. We branch around this | |
30 | + * in early init (eg when populating the MMU hashtable) by using an | |
31 | + * unconditional cpu feature. | |
21 | 32 | */ |
22 | -#define HCALL_INST_PRECALL \ | |
23 | - std r3,STK_PARM(r3)(r1); /* save opcode */ \ | |
24 | - mftb r0; /* get timebase and */ \ | |
25 | - std r0,STK_PARM(r5)(r1); /* save for later */ \ | |
33 | +#define HCALL_INST_PRECALL(FIRST_REG) \ | |
26 | 34 | BEGIN_FTR_SECTION; \ |
27 | - mfspr r0,SPRN_PURR; /* get PURR and */ \ | |
28 | - std r0,STK_PARM(r6)(r1); /* save for later */ \ | |
29 | -END_FTR_SECTION_IFSET(CPU_FTR_PURR); | |
30 | - | |
35 | + b 1f; \ | |
36 | +END_FTR_SECTION(0, 1); \ | |
37 | + ld r12,hcall_tracepoint_refcount@toc(r2); \ | |
38 | + cmpdi r12,0; \ | |
39 | + beq+ 1f; \ | |
40 | + mflr r0; \ | |
41 | + std r3,STK_PARM(r3)(r1); \ | |
42 | + std r4,STK_PARM(r4)(r1); \ | |
43 | + std r5,STK_PARM(r5)(r1); \ | |
44 | + std r6,STK_PARM(r6)(r1); \ | |
45 | + std r7,STK_PARM(r7)(r1); \ | |
46 | + std r8,STK_PARM(r8)(r1); \ | |
47 | + std r9,STK_PARM(r9)(r1); \ | |
48 | + std r10,STK_PARM(r10)(r1); \ | |
49 | + std r0,16(r1); \ | |
50 | + addi r4,r1,STK_PARM(FIRST_REG); \ | |
51 | + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ | |
52 | + bl .__trace_hcall_entry; \ | |
53 | + addi r1,r1,STACK_FRAME_OVERHEAD; \ | |
54 | + ld r0,16(r1); \ | |
55 | + ld r3,STK_PARM(r3)(r1); \ | |
56 | + ld r4,STK_PARM(r4)(r1); \ | |
57 | + ld r5,STK_PARM(r5)(r1); \ | |
58 | + ld r6,STK_PARM(r6)(r1); \ | |
59 | + ld r7,STK_PARM(r7)(r1); \ | |
60 | + ld r8,STK_PARM(r8)(r1); \ | |
61 | + ld r9,STK_PARM(r9)(r1); \ | |
62 | + ld r10,STK_PARM(r10)(r1); \ | |
63 | + mtlr r0; \ | |
64 | +1: | |
65 | + | |
31 | 66 | /* |
32 | 67 | * postcall is performed immediately before function return which |
33 | 68 | * allows liberal use of volatile registers. We branch around this |
34 | 69 | * in early init (eg when populating the MMU hashtable) by using an |
35 | 70 | * unconditional cpu feature. |
36 | 71 | */ |
37 | -#define HCALL_INST_POSTCALL \ | |
72 | +#define __HCALL_INST_POSTCALL \ | |
38 | 73 | BEGIN_FTR_SECTION; \ |
39 | 74 | b 1f; \ |
40 | 75 | END_FTR_SECTION(0, 1); \ |
41 | - ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ | |
42 | - cmpldi cr7,r4,MAX_HCALL_OPCODE; \ | |
43 | - bgt- cr7,1f; \ | |
44 | - \ | |
45 | - /* get time and PURR snapshots after hcall */ \ | |
46 | - mftb r7; /* timebase after */ \ | |
47 | -BEGIN_FTR_SECTION; \ | |
48 | - mfspr r8,SPRN_PURR; /* PURR after */ \ | |
49 | - ld r6,STK_PARM(r6)(r1); /* PURR before */ \ | |
50 | - subf r6,r6,r8; /* delta */ \ | |
51 | -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ | |
52 | - ld r5,STK_PARM(r5)(r1); /* timebase before */ \ | |
53 | - subf r5,r5,r7; /* time delta */ \ | |
54 | - \ | |
55 | - /* calculate address of stat structure r4 = opcode */ \ | |
56 | - srdi r4,r4,2; /* index into array */ \ | |
57 | - mulli r4,r4,HCALL_STAT_SIZE; \ | |
58 | - LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ | |
59 | - add r4,r4,r7; \ | |
60 | - ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ | |
61 | - add r4,r4,r7; \ | |
62 | - \ | |
63 | - /* update stats */ \ | |
64 | - ld r7,HCALL_STAT_CALLS(r4); /* count */ \ | |
65 | - addi r7,r7,1; \ | |
66 | - std r7,HCALL_STAT_CALLS(r4); \ | |
67 | - ld r7,HCALL_STAT_TB(r4); /* timebase */ \ | |
68 | - add r7,r7,r5; \ | |
69 | - std r7,HCALL_STAT_TB(r4); \ | |
70 | -BEGIN_FTR_SECTION; \ | |
71 | - ld r7,HCALL_STAT_PURR(r4); /* PURR */ \ | |
72 | - add r7,r7,r6; \ | |
73 | - std r7,HCALL_STAT_PURR(r4); \ | |
74 | -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ | |
76 | + ld r12,hcall_tracepoint_refcount@toc(r2); \ | |
77 | + cmpdi r12,0; \ | |
78 | + beq+ 1f; \ | |
79 | + mflr r0; \ | |
80 | + ld r6,STK_PARM(r3)(r1); \ | |
81 | + std r3,STK_PARM(r3)(r1); \ | |
82 | + mr r4,r3; \ | |
83 | + mr r3,r6; \ | |
84 | + std r0,16(r1); \ | |
85 | + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ | |
86 | + bl .__trace_hcall_exit; \ | |
87 | + addi r1,r1,STACK_FRAME_OVERHEAD; \ | |
88 | + ld r0,16(r1); \ | |
89 | + ld r3,STK_PARM(r3)(r1); \ | |
90 | + mtlr r0; \ | |
75 | 91 | 1: |
92 | + | |
93 | +#define HCALL_INST_POSTCALL_NORETS \ | |
94 | + li r5,0; \ | |
95 | + __HCALL_INST_POSTCALL | |
96 | + | |
97 | +#define HCALL_INST_POSTCALL(BUFREG) \ | |
98 | + mr r5,BUFREG; \ | |
99 | + __HCALL_INST_POSTCALL | |
100 | + | |
76 | 101 | #else |
77 | -#define HCALL_INST_PRECALL | |
78 | -#define HCALL_INST_POSTCALL | |
102 | +#define HCALL_INST_PRECALL(FIRST_ARG) | |
103 | +#define HCALL_INST_POSTCALL_NORETS | |
104 | +#define HCALL_INST_POSTCALL(BUFREG) | |
79 | 105 | #endif |
80 | 106 | |
81 | 107 | .text |
82 | 108 | |
... | ... | @@ -86,11 +112,11 @@ |
86 | 112 | mfcr r0 |
87 | 113 | stw r0,8(r1) |
88 | 114 | |
89 | - HCALL_INST_PRECALL | |
115 | + HCALL_INST_PRECALL(r4) | |
90 | 116 | |
91 | 117 | HVSC /* invoke the hypervisor */ |
92 | 118 | |
93 | - HCALL_INST_POSTCALL | |
119 | + HCALL_INST_POSTCALL_NORETS | |
94 | 120 | |
95 | 121 | lwz r0,8(r1) |
96 | 122 | mtcrf 0xff,r0 |
... | ... | @@ -102,7 +128,7 @@ |
102 | 128 | mfcr r0 |
103 | 129 | stw r0,8(r1) |
104 | 130 | |
105 | - HCALL_INST_PRECALL | |
131 | + HCALL_INST_PRECALL(r5) | |
106 | 132 | |
107 | 133 | std r4,STK_PARM(r4)(r1) /* Save ret buffer */ |
108 | 134 | |
... | ... | @@ -121,7 +147,7 @@ |
121 | 147 | std r6, 16(r12) |
122 | 148 | std r7, 24(r12) |
123 | 149 | |
124 | - HCALL_INST_POSTCALL | |
150 | + HCALL_INST_POSTCALL(r12) | |
125 | 151 | |
126 | 152 | lwz r0,8(r1) |
127 | 153 | mtcrf 0xff,r0 |
... | ... | @@ -168,7 +194,7 @@ |
168 | 194 | mfcr r0 |
169 | 195 | stw r0,8(r1) |
170 | 196 | |
171 | - HCALL_INST_PRECALL | |
197 | + HCALL_INST_PRECALL(r5) | |
172 | 198 | |
173 | 199 | std r4,STK_PARM(r4)(r1) /* Save ret buffer */ |
174 | 200 | |
... | ... | @@ -196,7 +222,7 @@ |
196 | 222 | std r11,56(r12) |
197 | 223 | std r0, 64(r12) |
198 | 224 | |
199 | - HCALL_INST_POSTCALL | |
225 | + HCALL_INST_POSTCALL(r12) | |
200 | 226 | |
201 | 227 | lwz r0,8(r1) |
202 | 228 | mtcrf 0xff,r0 |
arch/powerpc/platforms/pseries/hvCall_inst.c
... | ... | @@ -26,6 +26,7 @@ |
26 | 26 | #include <asm/hvcall.h> |
27 | 27 | #include <asm/firmware.h> |
28 | 28 | #include <asm/cputable.h> |
29 | +#include <asm/trace.h> | |
29 | 30 | |
30 | 31 | DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); |
31 | 32 | |
... | ... | @@ -100,6 +101,35 @@ |
100 | 101 | #define HCALL_ROOT_DIR "hcall_inst" |
101 | 102 | #define CPU_NAME_BUF_SIZE 32 |
102 | 103 | |
104 | + | |
105 | +static void probe_hcall_entry(unsigned long opcode, unsigned long *args) | |
106 | +{ | |
107 | + struct hcall_stats *h; | |
108 | + | |
109 | + if (opcode > MAX_HCALL_OPCODE) | |
110 | + return; | |
111 | + | |
112 | + h = &get_cpu_var(hcall_stats)[opcode / 4]; | |
113 | + h->tb_start = mftb(); | |
114 | + h->purr_start = mfspr(SPRN_PURR); | |
115 | +} | |
116 | + | |
117 | +static void probe_hcall_exit(unsigned long opcode, unsigned long retval, | |
118 | + unsigned long *retbuf) | |
119 | +{ | |
120 | + struct hcall_stats *h; | |
121 | + | |
122 | + if (opcode > MAX_HCALL_OPCODE) | |
123 | + return; | |
124 | + | |
125 | + h = &__get_cpu_var(hcall_stats)[opcode / 4]; | |
126 | + h->num_calls++; | |
127 | + h->tb_total = mftb() - h->tb_start; | |
128 | + h->purr_total = mfspr(SPRN_PURR) - h->purr_start; | |
129 | + | |
130 | + put_cpu_var(hcall_stats); | |
131 | +} | |
132 | + | |
103 | 133 | static int __init hcall_inst_init(void) |
104 | 134 | { |
105 | 135 | struct dentry *hcall_root; |
... | ... | @@ -109,6 +139,14 @@ |
109 | 139 | |
110 | 140 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
111 | 141 | return 0; |
142 | + | |
143 | + if (register_trace_hcall_entry(probe_hcall_entry)) | |
144 | + return -EINVAL; | |
145 | + | |
146 | + if (register_trace_hcall_exit(probe_hcall_exit)) { | |
147 | + unregister_trace_hcall_entry(probe_hcall_entry); | |
148 | + return -EINVAL; | |
149 | + } | |
112 | 150 | |
113 | 151 | hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); |
114 | 152 | if (!hcall_root) |
arch/powerpc/platforms/pseries/lpar.c
... | ... | @@ -39,6 +39,7 @@ |
39 | 39 | #include <asm/cputable.h> |
40 | 40 | #include <asm/udbg.h> |
41 | 41 | #include <asm/smp.h> |
42 | +#include <asm/trace.h> | |
42 | 43 | |
43 | 44 | #include "plpar_wrappers.h" |
44 | 45 | #include "pseries.h" |
... | ... | @@ -660,5 +661,37 @@ |
660 | 661 | } |
661 | 662 | EXPORT_SYMBOL(arch_free_page); |
662 | 663 | |
664 | +#endif | |
665 | + | |
666 | +#ifdef CONFIG_TRACEPOINTS | |
667 | +/* | |
668 | + * We optimise our hcall path by placing hcall_tracepoint_refcount | |
669 | + * directly in the TOC so we can check if the hcall tracepoints are | |
670 | + * enabled via a single load. | |
671 | + */ | |
672 | + | |
673 | +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ | |
674 | +extern long hcall_tracepoint_refcount; | |
675 | + | |
676 | +void hcall_tracepoint_regfunc(void) | |
677 | +{ | |
678 | + hcall_tracepoint_refcount++; | |
679 | +} | |
680 | + | |
681 | +void hcall_tracepoint_unregfunc(void) | |
682 | +{ | |
683 | + hcall_tracepoint_refcount--; | |
684 | +} | |
685 | + | |
686 | +void __trace_hcall_entry(unsigned long opcode, unsigned long *args) | |
687 | +{ | |
688 | + trace_hcall_entry(opcode, args); | |
689 | +} | |
690 | + | |
691 | +void __trace_hcall_exit(long opcode, unsigned long retval, | |
692 | + unsigned long *retbuf) | |
693 | +{ | |
694 | + trace_hcall_exit(opcode, retval, retbuf); | |
695 | +} | |
663 | 696 | #endif |
include/linux/perf_counter.h
include/linux/perf_event.h
kernel/perf_event.c
... | ... | @@ -4274,6 +4274,8 @@ |
4274 | 4274 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: |
4275 | 4275 | case PERF_COUNT_SW_CONTEXT_SWITCHES: |
4276 | 4276 | case PERF_COUNT_SW_CPU_MIGRATIONS: |
4277 | + case PERF_COUNT_SW_ALIGNMENT_FAULTS: | |
4278 | + case PERF_COUNT_SW_EMULATION_FAULTS: | |
4277 | 4279 | if (!event->parent) { |
4278 | 4280 | atomic_inc(&perf_swevent_enabled[event_id]); |
4279 | 4281 | event->destroy = sw_perf_event_destroy; |
tools/perf/Documentation/perf-bench.txt
1 | +perf-bench(1) | |
2 | +============ | |
3 | + | |
4 | +NAME | |
5 | +---- | |
6 | +perf-bench - General framework for benchmark suites | |
7 | + | |
8 | +SYNOPSIS | |
9 | +-------- | |
10 | +[verse] | |
11 | +'perf bench' [<common options>] <subsystem> <suite> [<options>] | |
12 | + | |
13 | +DESCRIPTION | |
14 | +----------- | |
15 | +This 'perf bench' command is general framework for benchmark suites. | |
16 | + | |
17 | +COMMON OPTIONS | |
18 | +-------------- | |
19 | +-f:: | |
20 | +--format=:: | |
21 | +Specify format style. | |
22 | +Current available format styles are, | |
23 | + | |
24 | +'default':: | |
25 | +Default style. This is mainly for human reading. | |
26 | +--------------------- | |
27 | +% perf bench sched pipe # with no style specify | |
28 | +(executing 1000000 pipe operations between two tasks) | |
29 | + Total time:5.855 sec | |
30 | + 5.855061 usecs/op | |
31 | + 170792 ops/sec | |
32 | +--------------------- | |
33 | + | |
34 | +'simple':: | |
35 | +This simple style is friendly for automated | |
36 | +processing by scripts. | |
37 | +--------------------- | |
38 | +% perf bench --format=simple sched pipe # specified simple | |
39 | +5.988 | |
40 | +--------------------- | |
41 | + | |
42 | +SUBSYSTEM | |
43 | +--------- | |
44 | + | |
45 | +'sched':: | |
46 | + Scheduler and IPC mechanisms. | |
47 | + | |
48 | +SUITES FOR 'sched' | |
49 | +~~~~~~~~~~~~~~~~~~ | |
50 | +*messaging*:: | |
51 | +Suite for evaluating performance of scheduler and IPC mechanisms. | |
52 | +Based on hackbench by Rusty Russell. | |
53 | + | |
54 | +Options of *pipe* | |
55 | +^^^^^^^^^^^^^^^^^ | |
56 | +-p:: | |
57 | +--pipe:: | |
58 | +Use pipe() instead of socketpair() | |
59 | + | |
60 | +-t:: | |
61 | +--thread:: | |
62 | +Be multi thread instead of multi process | |
63 | + | |
64 | +-g:: | |
65 | +--group=:: | |
66 | +Specify number of groups | |
67 | + | |
68 | +-l:: | |
69 | +--loop=:: | |
70 | +Specify number of loops | |
71 | + | |
72 | +Example of *messaging* | |
73 | +^^^^^^^^^^^^^^^^^^^^^^ | |
74 | + | |
75 | +--------------------- | |
76 | +% perf bench sched messaging # run with default | |
77 | +options (20 sender and receiver processes per group) | |
78 | +(10 groups == 400 processes run) | |
79 | + | |
80 | + Total time:0.308 sec | |
81 | + | |
82 | +% perf bench sched messaging -t -g 20 # be multi-thread,with 20 groups | |
83 | +(20 sender and receiver threads per group) | |
84 | +(20 groups == 800 threads run) | |
85 | + | |
86 | + Total time:0.582 sec | |
87 | +--------------------- | |
88 | + | |
89 | +*pipe*:: | |
90 | +Suite for pipe() system call. | |
91 | +Based on pipe-test-1m.c by Ingo Molnar. | |
92 | + | |
93 | +Options of *pipe* | |
94 | +^^^^^^^^^^^^^^^^^ | |
95 | +-l:: | |
96 | +--loop=:: | |
97 | +Specify number of loops. | |
98 | + | |
99 | +Example of *pipe* | |
100 | +^^^^^^^^^^^^^^^^^ | |
101 | + | |
102 | +--------------------- | |
103 | +% perf bench sched pipe | |
104 | +(executing 1000000 pipe operations between two tasks) | |
105 | + | |
106 | + Total time:8.091 sec | |
107 | + 8.091833 usecs/op | |
108 | + 123581 ops/sec | |
109 | + | |
110 | +% perf bench sched pipe -l 1000 # loop 1000 | |
111 | +(executing 1000 pipe operations between two tasks) | |
112 | + | |
113 | + Total time:0.016 sec | |
114 | + 16.948000 usecs/op | |
115 | + 59004 ops/sec | |
116 | +--------------------- | |
117 | + | |
118 | +SEE ALSO | |
119 | +-------- | |
120 | +linkperf:perf[1] |
tools/perf/Makefile
... | ... | @@ -421,6 +421,13 @@ |
421 | 421 | LIB_OBJS += util/data_map.o |
422 | 422 | |
423 | 423 | BUILTIN_OBJS += builtin-annotate.o |
424 | + | |
425 | +BUILTIN_OBJS += builtin-bench.o | |
426 | + | |
427 | +# Benchmark modules | |
428 | +BUILTIN_OBJS += bench/sched-messaging.o | |
429 | +BUILTIN_OBJS += bench/sched-pipe.o | |
430 | + | |
424 | 431 | BUILTIN_OBJS += builtin-help.o |
425 | 432 | BUILTIN_OBJS += builtin-sched.o |
426 | 433 | BUILTIN_OBJS += builtin-list.o |
tools/perf/bench/bench.h
1 | +#ifndef BENCH_H | |
2 | +#define BENCH_H | |
3 | + | |
4 | +extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); | |
5 | +extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); | |
6 | + | |
7 | +#define BENCH_FORMAT_DEFAULT_STR "default" | |
8 | +#define BENCH_FORMAT_DEFAULT 0 | |
9 | +#define BENCH_FORMAT_SIMPLE_STR "simple" | |
10 | +#define BENCH_FORMAT_SIMPLE 1 | |
11 | + | |
12 | +#define BENCH_FORMAT_UNKNOWN -1 | |
13 | + | |
14 | +extern int bench_format; | |
15 | + | |
16 | +#endif |
tools/perf/bench/sched-messaging.c
1 | +/* | |
2 | + * | |
3 | + * builtin-bench-messaging.c | |
4 | + * | |
5 | + * messaging: Benchmark for scheduler and IPC mechanisms | |
6 | + * | |
7 | + * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au> | |
8 | + * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> | |
9 | + * | |
10 | + */ | |
11 | + | |
12 | +#include "../perf.h" | |
13 | +#include "../util/util.h" | |
14 | +#include "../util/parse-options.h" | |
15 | +#include "../builtin.h" | |
16 | +#include "bench.h" | |
17 | + | |
18 | +/* Test groups of 20 processes spraying to 20 receivers */ | |
19 | +#include <pthread.h> | |
20 | +#include <stdio.h> | |
21 | +#include <stdlib.h> | |
22 | +#include <string.h> | |
23 | +#include <errno.h> | |
24 | +#include <unistd.h> | |
25 | +#include <sys/types.h> | |
26 | +#include <sys/socket.h> | |
27 | +#include <sys/wait.h> | |
28 | +#include <sys/time.h> | |
29 | +#include <sys/poll.h> | |
30 | +#include <limits.h> | |
31 | + | |
32 | +#define DATASIZE 100 | |
33 | + | |
34 | +static int use_pipes = 0; | |
35 | +static unsigned int loops = 100; | |
36 | +static unsigned int thread_mode = 0; | |
37 | +static unsigned int num_groups = 10; | |
38 | + | |
39 | +struct sender_context { | |
40 | + unsigned int num_fds; | |
41 | + int ready_out; | |
42 | + int wakefd; | |
43 | + int out_fds[0]; | |
44 | +}; | |
45 | + | |
46 | +struct receiver_context { | |
47 | + unsigned int num_packets; | |
48 | + int in_fds[2]; | |
49 | + int ready_out; | |
50 | + int wakefd; | |
51 | +}; | |
52 | + | |
53 | +static void barf(const char *msg) | |
54 | +{ | |
55 | + fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno)); | |
56 | + exit(1); | |
57 | +} | |
58 | + | |
59 | +static void fdpair(int fds[2]) | |
60 | +{ | |
61 | + if (use_pipes) { | |
62 | + if (pipe(fds) == 0) | |
63 | + return; | |
64 | + } else { | |
65 | + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0) | |
66 | + return; | |
67 | + } | |
68 | + | |
69 | + barf(use_pipes ? "pipe()" : "socketpair()"); | |
70 | +} | |
71 | + | |
72 | +/* Block until we're ready to go */ | |
73 | +static void ready(int ready_out, int wakefd) | |
74 | +{ | |
75 | + char dummy; | |
76 | + struct pollfd pollfd = { .fd = wakefd, .events = POLLIN }; | |
77 | + | |
78 | + /* Tell them we're ready. */ | |
79 | + if (write(ready_out, &dummy, 1) != 1) | |
80 | + barf("CLIENT: ready write"); | |
81 | + | |
82 | + /* Wait for "GO" signal */ | |
83 | + if (poll(&pollfd, 1, -1) != 1) | |
84 | + barf("poll"); | |
85 | +} | |
86 | + | |
87 | +/* Sender sprays loops messages down each file descriptor */ | |
88 | +static void *sender(struct sender_context *ctx) | |
89 | +{ | |
90 | + char data[DATASIZE]; | |
91 | + unsigned int i, j; | |
92 | + | |
93 | + ready(ctx->ready_out, ctx->wakefd); | |
94 | + | |
95 | + /* Now pump to every receiver. */ | |
96 | + for (i = 0; i < loops; i++) { | |
97 | + for (j = 0; j < ctx->num_fds; j++) { | |
98 | + int ret, done = 0; | |
99 | + | |
100 | +again: | |
101 | + ret = write(ctx->out_fds[j], data + done, | |
102 | + sizeof(data)-done); | |
103 | + if (ret < 0) | |
104 | + barf("SENDER: write"); | |
105 | + done += ret; | |
106 | + if (done < DATASIZE) | |
107 | + goto again; | |
108 | + } | |
109 | + } | |
110 | + | |
111 | + return NULL; | |
112 | +} | |
113 | + | |
114 | + | |
115 | +/* One receiver per fd */ | |
116 | +static void *receiver(struct receiver_context* ctx) | |
117 | +{ | |
118 | + unsigned int i; | |
119 | + | |
120 | + if (!thread_mode) | |
121 | + close(ctx->in_fds[1]); | |
122 | + | |
123 | + /* Wait for start... */ | |
124 | + ready(ctx->ready_out, ctx->wakefd); | |
125 | + | |
126 | + /* Receive them all */ | |
127 | + for (i = 0; i < ctx->num_packets; i++) { | |
128 | + char data[DATASIZE]; | |
129 | + int ret, done = 0; | |
130 | + | |
131 | +again: | |
132 | + ret = read(ctx->in_fds[0], data + done, DATASIZE - done); | |
133 | + if (ret < 0) | |
134 | + barf("SERVER: read"); | |
135 | + done += ret; | |
136 | + if (done < DATASIZE) | |
137 | + goto again; | |
138 | + } | |
139 | + | |
140 | + return NULL; | |
141 | +} | |
142 | + | |
143 | +static pthread_t create_worker(void *ctx, void *(*func)(void *)) | |
144 | +{ | |
145 | + pthread_attr_t attr; | |
146 | + pthread_t childid; | |
147 | + int err; | |
148 | + | |
149 | + if (!thread_mode) { | |
150 | + /* process mode */ | |
151 | + /* Fork the receiver. */ | |
152 | + switch (fork()) { | |
153 | + case -1: | |
154 | + barf("fork()"); | |
155 | + break; | |
156 | + case 0: | |
157 | + (*func) (ctx); | |
158 | + exit(0); | |
159 | + break; | |
160 | + default: | |
161 | + break; | |
162 | + } | |
163 | + | |
164 | + return (pthread_t)0; | |
165 | + } | |
166 | + | |
167 | + if (pthread_attr_init(&attr) != 0) | |
168 | + barf("pthread_attr_init:"); | |
169 | + | |
170 | +#ifndef __ia64__ | |
171 | + if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0) | |
172 | + barf("pthread_attr_setstacksize"); | |
173 | +#endif | |
174 | + | |
175 | + err = pthread_create(&childid, &attr, func, ctx); | |
176 | + if (err != 0) { | |
177 | + fprintf(stderr, "pthread_create failed: %s (%d)\n", | |
178 | + strerror(err), err); | |
179 | + exit(-1); | |
180 | + } | |
181 | + return childid; | |
182 | +} | |
183 | + | |
184 | +static void reap_worker(pthread_t id) | |
185 | +{ | |
186 | + int proc_status; | |
187 | + void *thread_status; | |
188 | + | |
189 | + if (!thread_mode) { | |
190 | + /* process mode */ | |
191 | + wait(&proc_status); | |
192 | + if (!WIFEXITED(proc_status)) | |
193 | + exit(1); | |
194 | + } else { | |
195 | + pthread_join(id, &thread_status); | |
196 | + } | |
197 | +} | |
198 | + | |
199 | +/* One group of senders and receivers */ | |
200 | +static unsigned int group(pthread_t *pth, | |
201 | + unsigned int num_fds, | |
202 | + int ready_out, | |
203 | + int wakefd) | |
204 | +{ | |
205 | + unsigned int i; | |
206 | + struct sender_context *snd_ctx = malloc(sizeof(struct sender_context) | |
207 | + + num_fds * sizeof(int)); | |
208 | + | |
209 | + if (!snd_ctx) | |
210 | + barf("malloc()"); | |
211 | + | |
212 | + for (i = 0; i < num_fds; i++) { | |
213 | + int fds[2]; | |
214 | + struct receiver_context *ctx = malloc(sizeof(*ctx)); | |
215 | + | |
216 | + if (!ctx) | |
217 | + barf("malloc()"); | |
218 | + | |
219 | + | |
220 | + /* Create the pipe between client and server */ | |
221 | + fdpair(fds); | |
222 | + | |
223 | + ctx->num_packets = num_fds * loops; | |
224 | + ctx->in_fds[0] = fds[0]; | |
225 | + ctx->in_fds[1] = fds[1]; | |
226 | + ctx->ready_out = ready_out; | |
227 | + ctx->wakefd = wakefd; | |
228 | + | |
229 | + pth[i] = create_worker(ctx, (void *)receiver); | |
230 | + | |
231 | + snd_ctx->out_fds[i] = fds[1]; | |
232 | + if (!thread_mode) | |
233 | + close(fds[0]); | |
234 | + } | |
235 | + | |
236 | + /* Now we have all the fds, fork the senders */ | |
237 | + for (i = 0; i < num_fds; i++) { | |
238 | + snd_ctx->ready_out = ready_out; | |
239 | + snd_ctx->wakefd = wakefd; | |
240 | + snd_ctx->num_fds = num_fds; | |
241 | + | |
242 | + pth[num_fds+i] = create_worker(snd_ctx, (void *)sender); | |
243 | + } | |
244 | + | |
245 | + /* Close the fds we have left */ | |
246 | + if (!thread_mode) | |
247 | + for (i = 0; i < num_fds; i++) | |
248 | + close(snd_ctx->out_fds[i]); | |
249 | + | |
250 | + /* Return number of children to reap */ | |
251 | + return num_fds * 2; | |
252 | +} | |
253 | + | |
254 | +static const struct option options[] = { | |
255 | + OPT_BOOLEAN('p', "pipe", &use_pipes, | |
256 | + "Use pipe() instead of socketpair()"), | |
257 | + OPT_BOOLEAN('t', "thread", &thread_mode, | |
258 | + "Be multi thread instead of multi process"), | |
259 | + OPT_INTEGER('g', "group", &num_groups, | |
260 | + "Specify number of groups"), | |
261 | + OPT_INTEGER('l', "loop", &loops, | |
262 | + "Specify number of loops"), | |
263 | + OPT_END() | |
264 | +}; | |
265 | + | |
266 | +static const char * const bench_sched_message_usage[] = { | |
267 | + "perf bench sched messaging <options>", | |
268 | + NULL | |
269 | +}; | |
270 | + | |
271 | +int bench_sched_messaging(int argc, const char **argv, | |
272 | + const char *prefix __used) | |
273 | +{ | |
274 | + unsigned int i, total_children; | |
275 | + struct timeval start, stop, diff; | |
276 | + unsigned int num_fds = 20; | |
277 | + int readyfds[2], wakefds[2]; | |
278 | + char dummy; | |
279 | + pthread_t *pth_tab; | |
280 | + | |
281 | + argc = parse_options(argc, argv, options, | |
282 | + bench_sched_message_usage, 0); | |
283 | + | |
284 | + pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t)); | |
285 | + if (!pth_tab) | |
286 | + barf("main:malloc()"); | |
287 | + | |
288 | + fdpair(readyfds); | |
289 | + fdpair(wakefds); | |
290 | + | |
291 | + total_children = 0; | |
292 | + for (i = 0; i < num_groups; i++) | |
293 | + total_children += group(pth_tab+total_children, num_fds, | |
294 | + readyfds[1], wakefds[0]); | |
295 | + | |
296 | + /* Wait for everyone to be ready */ | |
297 | + for (i = 0; i < total_children; i++) | |
298 | + if (read(readyfds[0], &dummy, 1) != 1) | |
299 | + barf("Reading for readyfds"); | |
300 | + | |
301 | + gettimeofday(&start, NULL); | |
302 | + | |
303 | + /* Kick them off */ | |
304 | + if (write(wakefds[1], &dummy, 1) != 1) | |
305 | + barf("Writing to start them"); | |
306 | + | |
307 | + /* Reap them all */ | |
308 | + for (i = 0; i < total_children; i++) | |
309 | + reap_worker(pth_tab[i]); | |
310 | + | |
311 | + gettimeofday(&stop, NULL); | |
312 | + | |
313 | + timersub(&stop, &start, &diff); | |
314 | + | |
315 | + switch (bench_format) { | |
316 | + case BENCH_FORMAT_DEFAULT: | |
317 | + printf("# %d sender and receiver %s per group\n", | |
318 | + num_fds, thread_mode ? "threads" : "processes"); | |
319 | + printf("# %d groups == %d %s run\n\n", | |
320 | + num_groups, num_groups * 2 * num_fds, | |
321 | + thread_mode ? "threads" : "processes"); | |
322 | + printf(" %14s: %lu.%03lu [sec]\n", "Total time", | |
323 | + diff.tv_sec, diff.tv_usec/1000); | |
324 | + break; | |
325 | + case BENCH_FORMAT_SIMPLE: | |
326 | + printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000); | |
327 | + break; | |
328 | + default: | |
329 | + /* reaching here is something disaster */ | |
330 | + fprintf(stderr, "Unknown format:%d\n", bench_format); | |
331 | + exit(1); | |
332 | + break; | |
333 | + } | |
334 | + | |
335 | + return 0; | |
336 | +} |
tools/perf/bench/sched-pipe.c
1 | +/* | |
2 | + * | |
3 | + * builtin-bench-pipe.c | |
4 | + * | |
5 | + * pipe: Benchmark for pipe() | |
6 | + * | |
7 | + * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com> | |
8 | + * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c | |
9 | + * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> | |
10 | + * | |
11 | + */ | |
12 | + | |
13 | +#include "../perf.h" | |
14 | +#include "../util/util.h" | |
15 | +#include "../util/parse-options.h" | |
16 | +#include "../builtin.h" | |
17 | +#include "bench.h" | |
18 | + | |
19 | +#include <unistd.h> | |
20 | +#include <stdio.h> | |
21 | +#include <stdlib.h> | |
22 | +#include <signal.h> | |
23 | +#include <sys/wait.h> | |
24 | +#include <linux/unistd.h> | |
25 | +#include <string.h> | |
26 | +#include <errno.h> | |
27 | +#include <assert.h> | |
28 | +#include <sys/time.h> | |
29 | +#include <sys/types.h> | |
30 | + | |
31 | +#define LOOPS_DEFAULT 1000000 | |
32 | +static int loops = LOOPS_DEFAULT; | |
33 | + | |
34 | +static const struct option options[] = { | |
35 | + OPT_INTEGER('l', "loop", &loops, | |
36 | + "Specify number of loops"), | |
37 | + OPT_END() | |
38 | +}; | |
39 | + | |
40 | +static const char * const bench_sched_pipe_usage[] = { | |
41 | + "perf bench sched pipe <options>", | |
42 | + NULL | |
43 | +}; | |
44 | + | |
45 | +int bench_sched_pipe(int argc, const char **argv, | |
46 | + const char *prefix __used) | |
47 | +{ | |
48 | + int pipe_1[2], pipe_2[2]; | |
49 | + int m = 0, i; | |
50 | + struct timeval start, stop, diff; | |
51 | + unsigned long long result_usec = 0; | |
52 | + | |
53 | + /* | |
54 | + * why does "ret" exist? | |
55 | + * discarding returned value of read(), write() | |
56 | + * causes error in building environment for perf | |
57 | + */ | |
58 | + int ret, wait_stat; | |
59 | + pid_t pid, retpid; | |
60 | + | |
61 | + argc = parse_options(argc, argv, options, | |
62 | + bench_sched_pipe_usage, 0); | |
63 | + | |
64 | + assert(!pipe(pipe_1)); | |
65 | + assert(!pipe(pipe_2)); | |
66 | + | |
67 | + pid = fork(); | |
68 | + assert(pid >= 0); | |
69 | + | |
70 | + gettimeofday(&start, NULL); | |
71 | + | |
72 | + if (!pid) { | |
73 | + for (i = 0; i < loops; i++) { | |
74 | + ret = read(pipe_1[0], &m, sizeof(int)); | |
75 | + ret = write(pipe_2[1], &m, sizeof(int)); | |
76 | + } | |
77 | + } else { | |
78 | + for (i = 0; i < loops; i++) { | |
79 | + ret = write(pipe_1[1], &m, sizeof(int)); | |
80 | + ret = read(pipe_2[0], &m, sizeof(int)); | |
81 | + } | |
82 | + } | |
83 | + | |
84 | + gettimeofday(&stop, NULL); | |
85 | + timersub(&stop, &start, &diff); | |
86 | + | |
87 | + if (pid) { | |
88 | + retpid = waitpid(pid, &wait_stat, 0); | |
89 | + assert((retpid == pid) && WIFEXITED(wait_stat)); | |
90 | + return 0; | |
91 | + } | |
92 | + | |
93 | + switch (bench_format) { | |
94 | + case BENCH_FORMAT_DEFAULT: | |
95 | + printf("# Extecuted %d pipe operations between two tasks\n\n", | |
96 | + loops); | |
97 | + | |
98 | + result_usec = diff.tv_sec * 1000000; | |
99 | + result_usec += diff.tv_usec; | |
100 | + | |
101 | + printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", | |
102 | + diff.tv_sec, diff.tv_usec/1000); | |
103 | + | |
104 | + printf(" %14lf usecs/op\n", | |
105 | + (double)result_usec / (double)loops); | |
106 | + printf(" %14d ops/sec\n", | |
107 | + (int)((double)loops / | |
108 | + ((double)result_usec / (double)1000000))); | |
109 | + break; | |
110 | + | |
111 | + case BENCH_FORMAT_SIMPLE: | |
112 | + printf("%lu.%03lu\n", | |
113 | + diff.tv_sec, diff.tv_usec / 1000); | |
114 | + break; | |
115 | + | |
116 | + default: | |
117 | + /* reaching here is something disaster */ | |
118 | + fprintf(stderr, "Unknown format:%d\n", bench_format); | |
119 | + exit(1); | |
120 | + break; | |
121 | + } | |
122 | + | |
123 | + return 0; | |
124 | +} |
tools/perf/builtin-bench.c
1 | +/* | |
2 | + * | |
3 | + * builtin-bench.c | |
4 | + * | |
5 | + * General benchmarking subsystem provided by perf | |
6 | + * | |
7 | + * Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> | |
8 | + * | |
9 | + */ | |
10 | + | |
11 | +/* | |
12 | + * | |
13 | + * Available subsystem list: | |
14 | + * sched ... scheduler and IPC mechanism | |
15 | + * | |
16 | + */ | |
17 | + | |
18 | +#include "perf.h" | |
19 | +#include "util/util.h" | |
20 | +#include "util/parse-options.h" | |
21 | +#include "builtin.h" | |
22 | +#include "bench/bench.h" | |
23 | + | |
24 | +#include <stdio.h> | |
25 | +#include <stdlib.h> | |
26 | +#include <string.h> | |
27 | + | |
28 | +struct bench_suite { | |
29 | + const char *name; | |
30 | + const char *summary; | |
31 | + int (*fn)(int, const char **, const char *); | |
32 | +}; | |
33 | + | |
34 | +static struct bench_suite sched_suites[] = { | |
35 | + { "messaging", | |
36 | + "Benchmark for scheduler and IPC mechanisms", | |
37 | + bench_sched_messaging }, | |
38 | + { "pipe", | |
39 | + "Flood of communication over pipe() between two processes", | |
40 | + bench_sched_pipe }, | |
41 | + { NULL, | |
42 | + NULL, | |
43 | + NULL } | |
44 | +}; | |
45 | + | |
46 | +struct bench_subsys { | |
47 | + const char *name; | |
48 | + const char *summary; | |
49 | + struct bench_suite *suites; | |
50 | +}; | |
51 | + | |
52 | +static struct bench_subsys subsystems[] = { | |
53 | + { "sched", | |
54 | + "scheduler and IPC mechanism", | |
55 | + sched_suites }, | |
56 | + { NULL, | |
57 | + NULL, | |
58 | + NULL } | |
59 | +}; | |
60 | + | |
61 | +static void dump_suites(int subsys_index) | |
62 | +{ | |
63 | + int i; | |
64 | + | |
65 | + printf("List of available suites for %s...\n\n", | |
66 | + subsystems[subsys_index].name); | |
67 | + | |
68 | + for (i = 0; subsystems[subsys_index].suites[i].name; i++) | |
69 | + printf("\t%s: %s\n", | |
70 | + subsystems[subsys_index].suites[i].name, | |
71 | + subsystems[subsys_index].suites[i].summary); | |
72 | + | |
73 | + printf("\n"); | |
74 | + return; | |
75 | +} | |
76 | + | |
77 | +static char *bench_format_str; | |
78 | +int bench_format = BENCH_FORMAT_DEFAULT; | |
79 | + | |
80 | +static const struct option bench_options[] = { | |
81 | + OPT_STRING('f', "format", &bench_format_str, "default", | |
82 | + "Specify format style"), | |
83 | + OPT_END() | |
84 | +}; | |
85 | + | |
86 | +static const char * const bench_usage[] = { | |
87 | + "perf bench [<common options>] <subsystem> <suite> [<options>]", | |
88 | + NULL | |
89 | +}; | |
90 | + | |
91 | +static void print_usage(void) | |
92 | +{ | |
93 | + int i; | |
94 | + | |
95 | + printf("Usage: \n"); | |
96 | + for (i = 0; bench_usage[i]; i++) | |
97 | + printf("\t%s\n", bench_usage[i]); | |
98 | + printf("\n"); | |
99 | + | |
100 | + printf("List of available subsystems...\n\n"); | |
101 | + | |
102 | + for (i = 0; subsystems[i].name; i++) | |
103 | + printf("\t%s: %s\n", | |
104 | + subsystems[i].name, subsystems[i].summary); | |
105 | + printf("\n"); | |
106 | +} | |
107 | + | |
108 | +static int bench_str2int(char *str) | |
109 | +{ | |
110 | + if (!str) | |
111 | + return BENCH_FORMAT_DEFAULT; | |
112 | + | |
113 | + if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR)) | |
114 | + return BENCH_FORMAT_DEFAULT; | |
115 | + else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR)) | |
116 | + return BENCH_FORMAT_SIMPLE; | |
117 | + | |
118 | + return BENCH_FORMAT_UNKNOWN; | |
119 | +} | |
120 | + | |
121 | +int cmd_bench(int argc, const char **argv, const char *prefix __used) | |
122 | +{ | |
123 | + int i, j, status = 0; | |
124 | + | |
125 | + if (argc < 2) { | |
126 | + /* No subsystem specified. */ | |
127 | + print_usage(); | |
128 | + goto end; | |
129 | + } | |
130 | + | |
131 | + argc = parse_options(argc, argv, bench_options, bench_usage, | |
132 | + PARSE_OPT_STOP_AT_NON_OPTION); | |
133 | + | |
134 | + bench_format = bench_str2int(bench_format_str); | |
135 | + if (bench_format == BENCH_FORMAT_UNKNOWN) { | |
136 | + printf("Unknown format descriptor:%s\n", bench_format_str); | |
137 | + goto end; | |
138 | + } | |
139 | + | |
140 | + if (argc < 1) { | |
141 | + print_usage(); | |
142 | + goto end; | |
143 | + } | |
144 | + | |
145 | + for (i = 0; subsystems[i].name; i++) { | |
146 | + if (strcmp(subsystems[i].name, argv[0])) | |
147 | + continue; | |
148 | + | |
149 | + if (argc < 2) { | |
150 | + /* No suite specified. */ | |
151 | + dump_suites(i); | |
152 | + goto end; | |
153 | + } | |
154 | + | |
155 | + for (j = 0; subsystems[i].suites[j].name; j++) { | |
156 | + if (strcmp(subsystems[i].suites[j].name, argv[1])) | |
157 | + continue; | |
158 | + | |
159 | + if (bench_format == BENCH_FORMAT_DEFAULT) | |
160 | + printf("# Running %s/%s benchmark...\n", | |
161 | + subsystems[i].name, | |
162 | + subsystems[i].suites[j].name); | |
163 | + status = subsystems[i].suites[j].fn(argc - 1, | |
164 | + argv + 1, prefix); | |
165 | + goto end; | |
166 | + } | |
167 | + | |
168 | + if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { | |
169 | + dump_suites(i); | |
170 | + goto end; | |
171 | + } | |
172 | + | |
173 | + printf("Unknown suite:%s for %s\n", argv[1], argv[0]); | |
174 | + status = 1; | |
175 | + goto end; | |
176 | + } | |
177 | + | |
178 | + printf("Unknown subsystem:%s\n", argv[0]); | |
179 | + status = 1; | |
180 | + | |
181 | +end: | |
182 | + return status; | |
183 | +} |
tools/perf/builtin.h
... | ... | @@ -15,6 +15,7 @@ |
15 | 15 | extern int check_pager_config(const char *cmd); |
16 | 16 | |
17 | 17 | extern int cmd_annotate(int argc, const char **argv, const char *prefix); |
18 | +extern int cmd_bench(int argc, const char **argv, const char *prefix); | |
18 | 19 | extern int cmd_help(int argc, const char **argv, const char *prefix); |
19 | 20 | extern int cmd_sched(int argc, const char **argv, const char *prefix); |
20 | 21 | extern int cmd_list(int argc, const char **argv, const char *prefix); |
tools/perf/command-list.txt
tools/perf/design.txt
... | ... | @@ -137,6 +137,8 @@ |
137 | 137 | PERF_COUNT_SW_CPU_MIGRATIONS = 4, |
138 | 138 | PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, |
139 | 139 | PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, |
140 | + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, | |
141 | + PERF_COUNT_SW_EMULATION_FAULTS = 8, | |
140 | 142 | }; |
141 | 143 | |
142 | 144 | Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event |
tools/perf/perf.c
tools/perf/util/parse-events.c
... | ... | @@ -48,6 +48,8 @@ |
48 | 48 | { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, |
49 | 49 | { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, |
50 | 50 | { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, |
51 | + { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, | |
52 | + { CSW(EMULATION_FAULTS), "emulation-faults", "" }, | |
51 | 53 | }; |
52 | 54 | |
53 | 55 | #define __PERF_EVENT_FIELD(config, name) \ |
... | ... | @@ -76,6 +78,8 @@ |
76 | 78 | "CPU-migrations", |
77 | 79 | "minor-faults", |
78 | 80 | "major-faults", |
81 | + "alignment-faults", | |
82 | + "emulation-faults", | |
79 | 83 | }; |
80 | 84 | |
81 | 85 | #define MAX_ALIASES 8 |