Commit 0ffa798d947f5f5e40690cc9d38e678080a34f87

Authored by Ingo Molnar

Merge branches 'perf/powerpc' and 'perf/bench' into perf/core

Merge reason: Both 'perf bench' and the pending PowerPC changes
              are now ready for the next merge window.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 37 changed files Side-by-side Diff

arch/powerpc/Kconfig.debug
... ... @@ -46,7 +46,7 @@
46 46  
47 47 config HCALL_STATS
48 48 bool "Hypervisor call instrumentation"
49   - depends on PPC_PSERIES && DEBUG_FS
  49 + depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
50 50 help
51 51 Adds code to keep track of the number of hypervisor calls made and
52 52 the amount of time spent in hypervisor calls. Wall time spent in
arch/powerpc/configs/pseries_defconfig
... ... @@ -1683,7 +1683,7 @@
1683 1683 CONFIG_DEBUG_STACKOVERFLOW=y
1684 1684 # CONFIG_DEBUG_STACK_USAGE is not set
1685 1685 # CONFIG_DEBUG_PAGEALLOC is not set
1686   -CONFIG_HCALL_STATS=y
  1686 +# CONFIG_HCALL_STATS is not set
1687 1687 # CONFIG_CODE_PATCHING_SELFTEST is not set
1688 1688 # CONFIG_FTR_FIXUP_SELFTEST is not set
1689 1689 # CONFIG_MSI_BITMAP_SELFTEST is not set
arch/powerpc/include/asm/emulated_ops.h
... ... @@ -19,6 +19,7 @@
19 19 #define _ASM_POWERPC_EMULATED_OPS_H
20 20  
21 21 #include <asm/atomic.h>
  22 +#include <linux/perf_event.h>
22 23  
23 24  
24 25 #ifdef CONFIG_PPC_EMULATED_STATS
... ... @@ -57,7 +58,7 @@
57 58  
58 59 extern void ppc_warn_emulated_print(const char *type);
59 60  
60   -#define PPC_WARN_EMULATED(type) \
  61 +#define __PPC_WARN_EMULATED(type) \
61 62 do { \
62 63 atomic_inc(&ppc_emulated.type.val); \
63 64 if (ppc_warn_emulated) \
64 65  
... ... @@ -66,9 +67,23 @@
66 67  
67 68 #else /* !CONFIG_PPC_EMULATED_STATS */
68 69  
69   -#define PPC_WARN_EMULATED(type) do { } while (0)
  70 +#define __PPC_WARN_EMULATED(type) do { } while (0)
70 71  
71 72 #endif /* !CONFIG_PPC_EMULATED_STATS */
  73 +
  74 +#define PPC_WARN_EMULATED(type, regs) \
  75 + do { \
  76 + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
  77 + 1, 0, regs, 0); \
  78 + __PPC_WARN_EMULATED(type); \
  79 + } while (0)
  80 +
  81 +#define PPC_WARN_ALIGNMENT(type, regs) \
  82 + do { \
  83 + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
  84 + 1, 0, regs, regs->dar); \
  85 + __PPC_WARN_EMULATED(type); \
  86 + } while (0)
72 87  
73 88 #endif /* _ASM_POWERPC_EMULATED_OPS_H */
arch/powerpc/include/asm/hvcall.h
... ... @@ -274,6 +274,8 @@
274 274 unsigned long num_calls; /* number of calls (on this CPU) */
275 275 unsigned long tb_total; /* total wall time (mftb) of calls. */
276 276 unsigned long purr_total; /* total cpu time (PURR) of calls. */
  277 + unsigned long tb_start;
  278 + unsigned long purr_start;
277 279 };
278 280 #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
279 281  
arch/powerpc/include/asm/reg.h
... ... @@ -489,6 +489,8 @@
489 489 #define SPRN_MMCR1 798
490 490 #define SPRN_MMCRA 0x312
491 491 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
  492 +#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL
  493 +#define MMCRA_SDAR_ERAT_MISS 0x20000000UL
492 494 #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */
493 495 #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */
494 496 #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
arch/powerpc/include/asm/trace.h
  1 +#undef TRACE_SYSTEM
  2 +#define TRACE_SYSTEM powerpc
  3 +
  4 +#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
  5 +#define _TRACE_POWERPC_H
  6 +
  7 +#include <linux/tracepoint.h>
  8 +
  9 +struct pt_regs;
  10 +
  11 +TRACE_EVENT(irq_entry,
  12 +
  13 + TP_PROTO(struct pt_regs *regs),
  14 +
  15 + TP_ARGS(regs),
  16 +
  17 + TP_STRUCT__entry(
  18 + __field(struct pt_regs *, regs)
  19 + ),
  20 +
  21 + TP_fast_assign(
  22 + __entry->regs = regs;
  23 + ),
  24 +
  25 + TP_printk("pt_regs=%p", __entry->regs)
  26 +);
  27 +
  28 +TRACE_EVENT(irq_exit,
  29 +
  30 + TP_PROTO(struct pt_regs *regs),
  31 +
  32 + TP_ARGS(regs),
  33 +
  34 + TP_STRUCT__entry(
  35 + __field(struct pt_regs *, regs)
  36 + ),
  37 +
  38 + TP_fast_assign(
  39 + __entry->regs = regs;
  40 + ),
  41 +
  42 + TP_printk("pt_regs=%p", __entry->regs)
  43 +);
  44 +
  45 +TRACE_EVENT(timer_interrupt_entry,
  46 +
  47 + TP_PROTO(struct pt_regs *regs),
  48 +
  49 + TP_ARGS(regs),
  50 +
  51 + TP_STRUCT__entry(
  52 + __field(struct pt_regs *, regs)
  53 + ),
  54 +
  55 + TP_fast_assign(
  56 + __entry->regs = regs;
  57 + ),
  58 +
  59 + TP_printk("pt_regs=%p", __entry->regs)
  60 +);
  61 +
  62 +TRACE_EVENT(timer_interrupt_exit,
  63 +
  64 + TP_PROTO(struct pt_regs *regs),
  65 +
  66 + TP_ARGS(regs),
  67 +
  68 + TP_STRUCT__entry(
  69 + __field(struct pt_regs *, regs)
  70 + ),
  71 +
  72 + TP_fast_assign(
  73 + __entry->regs = regs;
  74 + ),
  75 +
  76 + TP_printk("pt_regs=%p", __entry->regs)
  77 +);
  78 +
  79 +#ifdef CONFIG_PPC_PSERIES
  80 +extern void hcall_tracepoint_regfunc(void);
  81 +extern void hcall_tracepoint_unregfunc(void);
  82 +
  83 +TRACE_EVENT_FN(hcall_entry,
  84 +
  85 + TP_PROTO(unsigned long opcode, unsigned long *args),
  86 +
  87 + TP_ARGS(opcode, args),
  88 +
  89 + TP_STRUCT__entry(
  90 + __field(unsigned long, opcode)
  91 + ),
  92 +
  93 + TP_fast_assign(
  94 + __entry->opcode = opcode;
  95 + ),
  96 +
  97 + TP_printk("opcode=%lu", __entry->opcode),
  98 +
  99 + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
  100 +);
  101 +
  102 +TRACE_EVENT_FN(hcall_exit,
  103 +
  104 + TP_PROTO(unsigned long opcode, unsigned long retval,
  105 + unsigned long *retbuf),
  106 +
  107 + TP_ARGS(opcode, retval, retbuf),
  108 +
  109 + TP_STRUCT__entry(
  110 + __field(unsigned long, opcode)
  111 + __field(unsigned long, retval)
  112 + ),
  113 +
  114 + TP_fast_assign(
  115 + __entry->opcode = opcode;
  116 + __entry->retval = retval;
  117 + ),
  118 +
  119 + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
  120 +
  121 + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
  122 +);
  123 +#endif
  124 +
  125 +#endif /* _TRACE_POWERPC_H */
  126 +
  127 +#undef TRACE_INCLUDE_PATH
  128 +#undef TRACE_INCLUDE_FILE
  129 +
  130 +#define TRACE_INCLUDE_PATH asm
  131 +#define TRACE_INCLUDE_FILE trace
  132 +
  133 +#include <trace/define_trace.h>
arch/powerpc/kernel/align.c
... ... @@ -732,7 +732,7 @@
732 732  
733 733 #ifdef CONFIG_SPE
734 734 if ((instr >> 26) == 0x4) {
735   - PPC_WARN_EMULATED(spe);
  735 + PPC_WARN_ALIGNMENT(spe, regs);
736 736 return emulate_spe(regs, reg, instr);
737 737 }
738 738 #endif
... ... @@ -786,7 +786,7 @@
786 786 flags |= SPLT;
787 787 nb = 8;
788 788 }
789   - PPC_WARN_EMULATED(vsx);
  789 + PPC_WARN_ALIGNMENT(vsx, regs);
790 790 return emulate_vsx(addr, reg, areg, regs, flags, nb);
791 791 }
792 792 #endif
... ... @@ -794,7 +794,7 @@
794 794 * the exception of DCBZ which is handled as a special case here
795 795 */
796 796 if (instr == DCBZ) {
797   - PPC_WARN_EMULATED(dcbz);
  797 + PPC_WARN_ALIGNMENT(dcbz, regs);
798 798 return emulate_dcbz(regs, addr);
799 799 }
800 800 if (unlikely(nb == 0))
... ... @@ -804,7 +804,7 @@
804 804 * function
805 805 */
806 806 if (flags & M) {
807   - PPC_WARN_EMULATED(multiple);
  807 + PPC_WARN_ALIGNMENT(multiple, regs);
808 808 return emulate_multiple(regs, addr, reg, nb,
809 809 flags, instr, swiz);
810 810 }
811 811  
... ... @@ -825,11 +825,11 @@
825 825  
826 826 /* Special case for 16-byte FP loads and stores */
827 827 if (nb == 16) {
828   - PPC_WARN_EMULATED(fp_pair);
  828 + PPC_WARN_ALIGNMENT(fp_pair, regs);
829 829 return emulate_fp_pair(addr, reg, flags);
830 830 }
831 831  
832   - PPC_WARN_EMULATED(unaligned);
  832 + PPC_WARN_ALIGNMENT(unaligned, regs);
833 833  
834 834 /* If we are loading, get the data from user space, else
835 835 * get it from register values
arch/powerpc/kernel/entry_64.S
... ... @@ -551,7 +551,7 @@
551 551 BEGIN_FW_FTR_SECTION
552 552 ld r5,SOFTE(r1)
553 553 FW_FTR_SECTION_ELSE
554   - b iseries_check_pending_irqs
  554 + b .Liseries_check_pending_irqs
555 555 ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
556 556 2:
557 557 TRACE_AND_RESTORE_IRQ(r5);
... ... @@ -623,7 +623,7 @@
623 623  
624 624 #endif /* CONFIG_PPC_BOOK3E */
625 625  
626   -iseries_check_pending_irqs:
  626 +.Liseries_check_pending_irqs:
627 627 #ifdef CONFIG_PPC_ISERIES
628 628 ld r5,SOFTE(r1)
629 629 cmpdi 0,r5,0
arch/powerpc/kernel/exceptions-64s.S
... ... @@ -185,12 +185,15 @@
185 185 * prolog code of the PerformanceMonitor one. A little
186 186 * trickery is thus necessary
187 187 */
  188 +performance_monitor_pSeries_1:
188 189 . = 0xf00
189 190 b performance_monitor_pSeries
190 191  
  192 +altivec_unavailable_pSeries_1:
191 193 . = 0xf20
192 194 b altivec_unavailable_pSeries
193 195  
  196 +vsx_unavailable_pSeries_1:
194 197 . = 0xf40
195 198 b vsx_unavailable_pSeries
196 199  
arch/powerpc/kernel/irq.c
... ... @@ -70,6 +70,8 @@
70 70 #include <asm/firmware.h>
71 71 #include <asm/lv1call.h>
72 72 #endif
  73 +#define CREATE_TRACE_POINTS
  74 +#include <asm/trace.h>
73 75  
74 76 int __irq_offset_value;
75 77 static int ppc_spurious_interrupts;
... ... @@ -325,6 +327,8 @@
325 327 struct pt_regs *old_regs = set_irq_regs(regs);
326 328 unsigned int irq;
327 329  
  330 + trace_irq_entry(regs);
  331 +
328 332 irq_enter();
329 333  
330 334 check_stack_overflow();
... ... @@ -348,6 +352,8 @@
348 352 timer_interrupt(regs);
349 353 }
350 354 #endif
  355 +
  356 + trace_irq_exit(regs);
351 357 }
352 358  
353 359 void __init init_IRQ(void)
arch/powerpc/kernel/perf_event.c
... ... @@ -1165,7 +1165,7 @@
1165 1165 */
1166 1166 if (record) {
1167 1167 struct perf_sample_data data = {
1168   - .addr = 0,
  1168 + .addr = ~0ULL,
1169 1169 .period = event->hw.last_period,
1170 1170 };
1171 1171  
arch/powerpc/kernel/power5+-pmu.c
... ... @@ -73,10 +73,6 @@
73 73 #define MMCR1_PMCSEL_MSK 0x7f
74 74  
75 75 /*
76   - * Bits in MMCRA
77   - */
78   -
79   -/*
80 76 * Layout of constraint bits:
81 77 * 6666555555555544444444443333333333222222222211111111110000000000
82 78 * 3210987654321098765432109876543210987654321098765432109876543210
arch/powerpc/kernel/power5-pmu.c
... ... @@ -73,10 +73,6 @@
73 73 #define MMCR1_PMCSEL_MSK 0x7f
74 74  
75 75 /*
76   - * Bits in MMCRA
77   - */
78   -
79   -/*
80 76 * Layout of constraint bits:
81 77 * 6666555555555544444444443333333333222222222211111111110000000000
82 78 * 3210987654321098765432109876543210987654321098765432109876543210
... ... @@ -390,7 +386,7 @@
390 386 unsigned int hwc[], unsigned long mmcr[])
391 387 {
392 388 unsigned long mmcr1 = 0;
393   - unsigned long mmcra = 0;
  389 + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
394 390 unsigned int pmc, unit, byte, psel;
395 391 unsigned int ttm, grp;
396 392 int i, isbus, bit, grsel;
arch/powerpc/kernel/power6-pmu.c
... ... @@ -178,7 +178,7 @@
178 178 unsigned int hwc[], unsigned long mmcr[])
179 179 {
180 180 unsigned long mmcr1 = 0;
181   - unsigned long mmcra = 0;
  181 + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
182 182 int i;
183 183 unsigned int pmc, ev, b, u, s, psel;
184 184 unsigned int ttmset = 0;
arch/powerpc/kernel/power7-pmu.c
... ... @@ -51,10 +51,6 @@
51 51 #define MMCR1_PMCSEL_MSK 0xff
52 52  
53 53 /*
54   - * Bits in MMCRA
55   - */
56   -
57   -/*
58 54 * Layout of constraint bits:
59 55 * 6666555555555544444444443333333333222222222211111111110000000000
60 56 * 3210987654321098765432109876543210987654321098765432109876543210
... ... @@ -230,7 +226,7 @@
230 226 unsigned int hwc[], unsigned long mmcr[])
231 227 {
232 228 unsigned long mmcr1 = 0;
233   - unsigned long mmcra = 0;
  229 + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
234 230 unsigned int pmc, unit, combine, l2sel, psel;
235 231 unsigned int pmc_inuse = 0;
236 232 int i;
arch/powerpc/kernel/ppc970-pmu.c
... ... @@ -84,10 +84,6 @@
84 84 };
85 85  
86 86 /*
87   - * Bits in MMCRA
88   - */
89   -
90   -/*
91 87 * Layout of constraint bits:
92 88 * 6666555555555544444444443333333333222222222211111111110000000000
93 89 * 3210987654321098765432109876543210987654321098765432109876543210
arch/powerpc/kernel/setup-common.c
... ... @@ -660,6 +660,7 @@
660 660  
661 661 #ifdef CONFIG_DEBUG_FS
662 662 struct dentry *powerpc_debugfs_root;
  663 +EXPORT_SYMBOL(powerpc_debugfs_root);
663 664  
664 665 static int powerpc_debugfs_init(void)
665 666 {
arch/powerpc/kernel/time.c
... ... @@ -54,6 +54,7 @@
54 54 #include <linux/irq.h>
55 55 #include <linux/delay.h>
56 56 #include <linux/perf_event.h>
  57 +#include <asm/trace.h>
57 58  
58 59 #include <asm/io.h>
59 60 #include <asm/processor.h>
... ... @@ -571,6 +572,8 @@
571 572 struct clock_event_device *evt = &decrementer->event;
572 573 u64 now;
573 574  
  575 + trace_timer_interrupt_entry(regs);
  576 +
574 577 /* Ensure a positive value is written to the decrementer, or else
575 578 * some CPUs will continuue to take decrementer exceptions */
576 579 set_dec(DECREMENTER_MAX);
... ... @@ -590,6 +593,7 @@
590 593 now = decrementer->next_tb - now;
591 594 if (now <= DECREMENTER_MAX)
592 595 set_dec((int)now);
  596 + trace_timer_interrupt_exit(regs);
593 597 return;
594 598 }
595 599 old_regs = set_irq_regs(regs);
... ... @@ -620,6 +624,8 @@
620 624  
621 625 irq_exit();
622 626 set_irq_regs(old_regs);
  627 +
  628 + trace_timer_interrupt_exit(regs);
623 629 }
624 630  
625 631 void wakeup_decrementer(void)
arch/powerpc/kernel/traps.c
... ... @@ -759,7 +759,7 @@
759 759  
760 760 /* Emulate the mfspr rD, PVR. */
761 761 if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
762   - PPC_WARN_EMULATED(mfpvr);
  762 + PPC_WARN_EMULATED(mfpvr, regs);
763 763 rd = (instword >> 21) & 0x1f;
764 764 regs->gpr[rd] = mfspr(SPRN_PVR);
765 765 return 0;
... ... @@ -767,7 +767,7 @@
767 767  
768 768 /* Emulating the dcba insn is just a no-op. */
769 769 if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
770   - PPC_WARN_EMULATED(dcba);
  770 + PPC_WARN_EMULATED(dcba, regs);
771 771 return 0;
772 772 }
773 773  
... ... @@ -776,7 +776,7 @@
776 776 int shift = (instword >> 21) & 0x1c;
777 777 unsigned long msk = 0xf0000000UL >> shift;
778 778  
779   - PPC_WARN_EMULATED(mcrxr);
  779 + PPC_WARN_EMULATED(mcrxr, regs);
780 780 regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
781 781 regs->xer &= ~0xf0000000UL;
782 782 return 0;
783 783  
784 784  
... ... @@ -784,19 +784,19 @@
784 784  
785 785 /* Emulate load/store string insn. */
786 786 if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
787   - PPC_WARN_EMULATED(string);
  787 + PPC_WARN_EMULATED(string, regs);
788 788 return emulate_string_inst(regs, instword);
789 789 }
790 790  
791 791 /* Emulate the popcntb (Population Count Bytes) instruction. */
792 792 if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
793   - PPC_WARN_EMULATED(popcntb);
  793 + PPC_WARN_EMULATED(popcntb, regs);
794 794 return emulate_popcntb_inst(regs, instword);
795 795 }
796 796  
797 797 /* Emulate isel (Integer Select) instruction */
798 798 if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
799   - PPC_WARN_EMULATED(isel);
  799 + PPC_WARN_EMULATED(isel, regs);
800 800 return emulate_isel(regs, instword);
801 801 }
802 802  
... ... @@ -995,7 +995,7 @@
995 995 #ifdef CONFIG_MATH_EMULATION
996 996 errcode = do_mathemu(regs);
997 997 if (errcode >= 0)
998   - PPC_WARN_EMULATED(math);
  998 + PPC_WARN_EMULATED(math, regs);
999 999  
1000 1000 switch (errcode) {
1001 1001 case 0:
... ... @@ -1018,7 +1018,7 @@
1018 1018 #elif defined(CONFIG_8XX_MINIMAL_FPEMU)
1019 1019 errcode = Soft_emulate_8xx(regs);
1020 1020 if (errcode >= 0)
1021   - PPC_WARN_EMULATED(8xx);
  1021 + PPC_WARN_EMULATED(8xx, regs);
1022 1022  
1023 1023 switch (errcode) {
1024 1024 case 0:
... ... @@ -1129,7 +1129,7 @@
1129 1129  
1130 1130 flush_altivec_to_thread(current);
1131 1131  
1132   - PPC_WARN_EMULATED(altivec);
  1132 + PPC_WARN_EMULATED(altivec, regs);
1133 1133 err = emulate_altivec(regs);
1134 1134 if (err == 0) {
1135 1135 regs->nip += 4; /* skip emulated instruction */
arch/powerpc/lib/copypage_64.S
... ... @@ -26,11 +26,11 @@
26 26 srd r8,r5,r11
27 27  
28 28 mtctr r8
29   -setup:
  29 +.Lsetup:
30 30 dcbt r9,r4
31 31 dcbz r9,r3
32 32 add r9,r9,r12
33   - bdnz setup
  33 + bdnz .Lsetup
34 34 END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
35 35 addi r3,r3,-8
36 36 srdi r8,r5,7 /* page is copied in 128 byte strides */
arch/powerpc/platforms/pseries/hvCall.S
... ... @@ -14,68 +14,94 @@
14 14  
15 15 #define STK_PARM(i) (48 + ((i)-3)*8)
16 16  
17   -#ifdef CONFIG_HCALL_STATS
  17 +#ifdef CONFIG_TRACEPOINTS
  18 +
  19 + .section ".toc","aw"
  20 +
  21 + .globl hcall_tracepoint_refcount
  22 +hcall_tracepoint_refcount:
  23 + .llong 0
  24 +
  25 + .section ".text"
  26 +
18 27 /*
19 28 * precall must preserve all registers. use unused STK_PARM()
20   - * areas to save snapshots and opcode.
  29 + * areas to save snapshots and opcode. We branch around this
  30 + * in early init (eg when populating the MMU hashtable) by using an
  31 + * unconditional cpu feature.
21 32 */
22   -#define HCALL_INST_PRECALL \
23   - std r3,STK_PARM(r3)(r1); /* save opcode */ \
24   - mftb r0; /* get timebase and */ \
25   - std r0,STK_PARM(r5)(r1); /* save for later */ \
  33 +#define HCALL_INST_PRECALL(FIRST_REG) \
26 34 BEGIN_FTR_SECTION; \
27   - mfspr r0,SPRN_PURR; /* get PURR and */ \
28   - std r0,STK_PARM(r6)(r1); /* save for later */ \
29   -END_FTR_SECTION_IFSET(CPU_FTR_PURR);
30   -
  35 + b 1f; \
  36 +END_FTR_SECTION(0, 1); \
  37 + ld r12,hcall_tracepoint_refcount@toc(r2); \
  38 + cmpdi r12,0; \
  39 + beq+ 1f; \
  40 + mflr r0; \
  41 + std r3,STK_PARM(r3)(r1); \
  42 + std r4,STK_PARM(r4)(r1); \
  43 + std r5,STK_PARM(r5)(r1); \
  44 + std r6,STK_PARM(r6)(r1); \
  45 + std r7,STK_PARM(r7)(r1); \
  46 + std r8,STK_PARM(r8)(r1); \
  47 + std r9,STK_PARM(r9)(r1); \
  48 + std r10,STK_PARM(r10)(r1); \
  49 + std r0,16(r1); \
  50 + addi r4,r1,STK_PARM(FIRST_REG); \
  51 + stdu r1,-STACK_FRAME_OVERHEAD(r1); \
  52 + bl .__trace_hcall_entry; \
  53 + addi r1,r1,STACK_FRAME_OVERHEAD; \
  54 + ld r0,16(r1); \
  55 + ld r3,STK_PARM(r3)(r1); \
  56 + ld r4,STK_PARM(r4)(r1); \
  57 + ld r5,STK_PARM(r5)(r1); \
  58 + ld r6,STK_PARM(r6)(r1); \
  59 + ld r7,STK_PARM(r7)(r1); \
  60 + ld r8,STK_PARM(r8)(r1); \
  61 + ld r9,STK_PARM(r9)(r1); \
  62 + ld r10,STK_PARM(r10)(r1); \
  63 + mtlr r0; \
  64 +1:
  65 +
31 66 /*
32 67 * postcall is performed immediately before function return which
33 68 * allows liberal use of volatile registers. We branch around this
34 69 * in early init (eg when populating the MMU hashtable) by using an
35 70 * unconditional cpu feature.
36 71 */
37   -#define HCALL_INST_POSTCALL \
  72 +#define __HCALL_INST_POSTCALL \
38 73 BEGIN_FTR_SECTION; \
39 74 b 1f; \
40 75 END_FTR_SECTION(0, 1); \
41   - ld r4,STK_PARM(r3)(r1); /* validate opcode */ \
42   - cmpldi cr7,r4,MAX_HCALL_OPCODE; \
43   - bgt- cr7,1f; \
44   - \
45   - /* get time and PURR snapshots after hcall */ \
46   - mftb r7; /* timebase after */ \
47   -BEGIN_FTR_SECTION; \
48   - mfspr r8,SPRN_PURR; /* PURR after */ \
49   - ld r6,STK_PARM(r6)(r1); /* PURR before */ \
50   - subf r6,r6,r8; /* delta */ \
51   -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
52   - ld r5,STK_PARM(r5)(r1); /* timebase before */ \
53   - subf r5,r5,r7; /* time delta */ \
54   - \
55   - /* calculate address of stat structure r4 = opcode */ \
56   - srdi r4,r4,2; /* index into array */ \
57   - mulli r4,r4,HCALL_STAT_SIZE; \
58   - LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
59   - add r4,r4,r7; \
60   - ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
61   - add r4,r4,r7; \
62   - \
63   - /* update stats */ \
64   - ld r7,HCALL_STAT_CALLS(r4); /* count */ \
65   - addi r7,r7,1; \
66   - std r7,HCALL_STAT_CALLS(r4); \
67   - ld r7,HCALL_STAT_TB(r4); /* timebase */ \
68   - add r7,r7,r5; \
69   - std r7,HCALL_STAT_TB(r4); \
70   -BEGIN_FTR_SECTION; \
71   - ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
72   - add r7,r7,r6; \
73   - std r7,HCALL_STAT_PURR(r4); \
74   -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
  76 + ld r12,hcall_tracepoint_refcount@toc(r2); \
  77 + cmpdi r12,0; \
  78 + beq+ 1f; \
  79 + mflr r0; \
  80 + ld r6,STK_PARM(r3)(r1); \
  81 + std r3,STK_PARM(r3)(r1); \
  82 + mr r4,r3; \
  83 + mr r3,r6; \
  84 + std r0,16(r1); \
  85 + stdu r1,-STACK_FRAME_OVERHEAD(r1); \
  86 + bl .__trace_hcall_exit; \
  87 + addi r1,r1,STACK_FRAME_OVERHEAD; \
  88 + ld r0,16(r1); \
  89 + ld r3,STK_PARM(r3)(r1); \
  90 + mtlr r0; \
75 91 1:
  92 +
  93 +#define HCALL_INST_POSTCALL_NORETS \
  94 + li r5,0; \
  95 + __HCALL_INST_POSTCALL
  96 +
  97 +#define HCALL_INST_POSTCALL(BUFREG) \
  98 + mr r5,BUFREG; \
  99 + __HCALL_INST_POSTCALL
  100 +
76 101 #else
77   -#define HCALL_INST_PRECALL
78   -#define HCALL_INST_POSTCALL
  102 +#define HCALL_INST_PRECALL(FIRST_ARG)
  103 +#define HCALL_INST_POSTCALL_NORETS
  104 +#define HCALL_INST_POSTCALL(BUFREG)
79 105 #endif
80 106  
81 107 .text
82 108  
... ... @@ -86,11 +112,11 @@
86 112 mfcr r0
87 113 stw r0,8(r1)
88 114  
89   - HCALL_INST_PRECALL
  115 + HCALL_INST_PRECALL(r4)
90 116  
91 117 HVSC /* invoke the hypervisor */
92 118  
93   - HCALL_INST_POSTCALL
  119 + HCALL_INST_POSTCALL_NORETS
94 120  
95 121 lwz r0,8(r1)
96 122 mtcrf 0xff,r0
... ... @@ -102,7 +128,7 @@
102 128 mfcr r0
103 129 stw r0,8(r1)
104 130  
105   - HCALL_INST_PRECALL
  131 + HCALL_INST_PRECALL(r5)
106 132  
107 133 std r4,STK_PARM(r4)(r1) /* Save ret buffer */
108 134  
... ... @@ -121,7 +147,7 @@
121 147 std r6, 16(r12)
122 148 std r7, 24(r12)
123 149  
124   - HCALL_INST_POSTCALL
  150 + HCALL_INST_POSTCALL(r12)
125 151  
126 152 lwz r0,8(r1)
127 153 mtcrf 0xff,r0
... ... @@ -168,7 +194,7 @@
168 194 mfcr r0
169 195 stw r0,8(r1)
170 196  
171   - HCALL_INST_PRECALL
  197 + HCALL_INST_PRECALL(r5)
172 198  
173 199 std r4,STK_PARM(r4)(r1) /* Save ret buffer */
174 200  
... ... @@ -196,7 +222,7 @@
196 222 std r11,56(r12)
197 223 std r0, 64(r12)
198 224  
199   - HCALL_INST_POSTCALL
  225 + HCALL_INST_POSTCALL(r12)
200 226  
201 227 lwz r0,8(r1)
202 228 mtcrf 0xff,r0
arch/powerpc/platforms/pseries/hvCall_inst.c
... ... @@ -26,6 +26,7 @@
26 26 #include <asm/hvcall.h>
27 27 #include <asm/firmware.h>
28 28 #include <asm/cputable.h>
  29 +#include <asm/trace.h>
29 30  
30 31 DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
31 32  
... ... @@ -100,6 +101,35 @@
100 101 #define HCALL_ROOT_DIR "hcall_inst"
101 102 #define CPU_NAME_BUF_SIZE 32
102 103  
  104 +
  105 +static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
  106 +{
  107 + struct hcall_stats *h;
  108 +
  109 + if (opcode > MAX_HCALL_OPCODE)
  110 + return;
  111 +
  112 + h = &get_cpu_var(hcall_stats)[opcode / 4];
  113 + h->tb_start = mftb();
  114 + h->purr_start = mfspr(SPRN_PURR);
  115 +}
  116 +
  117 +static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
  118 + unsigned long *retbuf)
  119 +{
  120 + struct hcall_stats *h;
  121 +
  122 + if (opcode > MAX_HCALL_OPCODE)
  123 + return;
  124 +
  125 + h = &__get_cpu_var(hcall_stats)[opcode / 4];
  126 + h->num_calls++;
  127 + h->tb_total = mftb() - h->tb_start;
  128 + h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
  129 +
  130 + put_cpu_var(hcall_stats);
  131 +}
  132 +
103 133 static int __init hcall_inst_init(void)
104 134 {
105 135 struct dentry *hcall_root;
... ... @@ -109,6 +139,14 @@
109 139  
110 140 if (!firmware_has_feature(FW_FEATURE_LPAR))
111 141 return 0;
  142 +
  143 + if (register_trace_hcall_entry(probe_hcall_entry))
  144 + return -EINVAL;
  145 +
  146 + if (register_trace_hcall_exit(probe_hcall_exit)) {
  147 + unregister_trace_hcall_entry(probe_hcall_entry);
  148 + return -EINVAL;
  149 + }
112 150  
113 151 hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
114 152 if (!hcall_root)
arch/powerpc/platforms/pseries/lpar.c
... ... @@ -39,6 +39,7 @@
39 39 #include <asm/cputable.h>
40 40 #include <asm/udbg.h>
41 41 #include <asm/smp.h>
  42 +#include <asm/trace.h>
42 43  
43 44 #include "plpar_wrappers.h"
44 45 #include "pseries.h"
... ... @@ -660,5 +661,37 @@
660 661 }
661 662 EXPORT_SYMBOL(arch_free_page);
662 663  
  664 +#endif
  665 +
  666 +#ifdef CONFIG_TRACEPOINTS
  667 +/*
  668 + * We optimise our hcall path by placing hcall_tracepoint_refcount
  669 + * directly in the TOC so we can check if the hcall tracepoints are
  670 + * enabled via a single load.
  671 + */
  672 +
  673 +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
  674 +extern long hcall_tracepoint_refcount;
  675 +
  676 +void hcall_tracepoint_regfunc(void)
  677 +{
  678 + hcall_tracepoint_refcount++;
  679 +}
  680 +
  681 +void hcall_tracepoint_unregfunc(void)
  682 +{
  683 + hcall_tracepoint_refcount--;
  684 +}
  685 +
  686 +void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
  687 +{
  688 + trace_hcall_entry(opcode, args);
  689 +}
  690 +
  691 +void __trace_hcall_exit(long opcode, unsigned long retval,
  692 + unsigned long *retbuf)
  693 +{
  694 + trace_hcall_exit(opcode, retval, retbuf);
  695 +}
663 696 #endif
include/linux/perf_counter.h
... ... @@ -106,6 +106,8 @@
106 106 PERF_COUNT_SW_CPU_MIGRATIONS = 4,
107 107 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
108 108 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
  109 + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
  110 + PERF_COUNT_SW_EMULATION_FAULTS = 8,
109 111  
110 112 PERF_COUNT_SW_MAX, /* non-ABI */
111 113 };
include/linux/perf_event.h
... ... @@ -102,6 +102,8 @@
102 102 PERF_COUNT_SW_CPU_MIGRATIONS = 4,
103 103 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
104 104 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
  105 + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
  106 + PERF_COUNT_SW_EMULATION_FAULTS = 8,
105 107  
106 108 PERF_COUNT_SW_MAX, /* non-ABI */
107 109 };
... ... @@ -4274,6 +4274,8 @@
4274 4274 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4275 4275 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4276 4276 case PERF_COUNT_SW_CPU_MIGRATIONS:
  4277 + case PERF_COUNT_SW_ALIGNMENT_FAULTS:
  4278 + case PERF_COUNT_SW_EMULATION_FAULTS:
4277 4279 if (!event->parent) {
4278 4280 atomic_inc(&perf_swevent_enabled[event_id]);
4279 4281 event->destroy = sw_perf_event_destroy;
tools/perf/Documentation/perf-bench.txt
  1 +perf-bench(1)
  2 +============
  3 +
  4 +NAME
  5 +----
  6 +perf-bench - General framework for benchmark suites
  7 +
  8 +SYNOPSIS
  9 +--------
  10 +[verse]
  11 +'perf bench' [<common options>] <subsystem> <suite> [<options>]
  12 +
  13 +DESCRIPTION
  14 +-----------
  15 +This 'perf bench' command is general framework for benchmark suites.
  16 +
  17 +COMMON OPTIONS
  18 +--------------
  19 +-f::
  20 +--format=::
  21 +Specify format style.
  22 +Current available format styles are,
  23 +
  24 +'default'::
  25 +Default style. This is mainly for human reading.
  26 +---------------------
  27 +% perf bench sched pipe # with no style specify
  28 +(executing 1000000 pipe operations between two tasks)
  29 + Total time:5.855 sec
  30 + 5.855061 usecs/op
  31 + 170792 ops/sec
  32 +---------------------
  33 +
  34 +'simple'::
  35 +This simple style is friendly for automated
  36 +processing by scripts.
  37 +---------------------
  38 +% perf bench --format=simple sched pipe # specified simple
  39 +5.988
  40 +---------------------
  41 +
  42 +SUBSYSTEM
  43 +---------
  44 +
  45 +'sched'::
  46 + Scheduler and IPC mechanisms.
  47 +
  48 +SUITES FOR 'sched'
  49 +~~~~~~~~~~~~~~~~~~
  50 +*messaging*::
  51 +Suite for evaluating performance of scheduler and IPC mechanisms.
  52 +Based on hackbench by Rusty Russell.
  53 +
  54 +Options of *pipe*
  55 +^^^^^^^^^^^^^^^^^
  56 +-p::
  57 +--pipe::
  58 +Use pipe() instead of socketpair()
  59 +
  60 +-t::
  61 +--thread::
  62 +Be multi thread instead of multi process
  63 +
  64 +-g::
  65 +--group=::
  66 +Specify number of groups
  67 +
  68 +-l::
  69 +--loop=::
  70 +Specify number of loops
  71 +
  72 +Example of *messaging*
  73 +^^^^^^^^^^^^^^^^^^^^^^
  74 +
  75 +---------------------
  76 +% perf bench sched messaging # run with default
  77 +options (20 sender and receiver processes per group)
  78 +(10 groups == 400 processes run)
  79 +
  80 + Total time:0.308 sec
  81 +
  82 +% perf bench sched messaging -t -g 20 # be multi-thread,with 20 groups
  83 +(20 sender and receiver threads per group)
  84 +(20 groups == 800 threads run)
  85 +
  86 + Total time:0.582 sec
  87 +---------------------
  88 +
  89 +*pipe*::
  90 +Suite for pipe() system call.
  91 +Based on pipe-test-1m.c by Ingo Molnar.
  92 +
  93 +Options of *pipe*
  94 +^^^^^^^^^^^^^^^^^
  95 +-l::
  96 +--loop=::
  97 +Specify number of loops.
  98 +
  99 +Example of *pipe*
  100 +^^^^^^^^^^^^^^^^^
  101 +
  102 +---------------------
  103 +% perf bench sched pipe
  104 +(executing 1000000 pipe operations between two tasks)
  105 +
  106 + Total time:8.091 sec
  107 + 8.091833 usecs/op
  108 + 123581 ops/sec
  109 +
  110 +% perf bench sched pipe -l 1000 # loop 1000
  111 +(executing 1000 pipe operations between two tasks)
  112 +
  113 + Total time:0.016 sec
  114 + 16.948000 usecs/op
  115 + 59004 ops/sec
  116 +---------------------
  117 +
  118 +SEE ALSO
  119 +--------
  120 +linkperf:perf[1]
... ... @@ -421,6 +421,13 @@
421 421 LIB_OBJS += util/data_map.o
422 422  
423 423 BUILTIN_OBJS += builtin-annotate.o
  424 +
  425 +BUILTIN_OBJS += builtin-bench.o
  426 +
  427 +# Benchmark modules
  428 +BUILTIN_OBJS += bench/sched-messaging.o
  429 +BUILTIN_OBJS += bench/sched-pipe.o
  430 +
424 431 BUILTIN_OBJS += builtin-help.o
425 432 BUILTIN_OBJS += builtin-sched.o
426 433 BUILTIN_OBJS += builtin-list.o
tools/perf/bench/bench.h
  1 +#ifndef BENCH_H
  2 +#define BENCH_H
  3 +
  4 +extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
  5 +extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
  6 +
  7 +#define BENCH_FORMAT_DEFAULT_STR "default"
  8 +#define BENCH_FORMAT_DEFAULT 0
  9 +#define BENCH_FORMAT_SIMPLE_STR "simple"
  10 +#define BENCH_FORMAT_SIMPLE 1
  11 +
  12 +#define BENCH_FORMAT_UNKNOWN -1
  13 +
  14 +extern int bench_format;
  15 +
  16 +#endif
tools/perf/bench/sched-messaging.c
  1 +/*
  2 + *
  3 + * builtin-bench-messaging.c
  4 + *
  5 + * messaging: Benchmark for scheduler and IPC mechanisms
  6 + *
  7 + * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
  8 + * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  9 + *
  10 + */
  11 +
  12 +#include "../perf.h"
  13 +#include "../util/util.h"
  14 +#include "../util/parse-options.h"
  15 +#include "../builtin.h"
  16 +#include "bench.h"
  17 +
  18 +/* Test groups of 20 processes spraying to 20 receivers */
  19 +#include <pthread.h>
  20 +#include <stdio.h>
  21 +#include <stdlib.h>
  22 +#include <string.h>
  23 +#include <errno.h>
  24 +#include <unistd.h>
  25 +#include <sys/types.h>
  26 +#include <sys/socket.h>
  27 +#include <sys/wait.h>
  28 +#include <sys/time.h>
  29 +#include <sys/poll.h>
  30 +#include <limits.h>
  31 +
  32 +#define DATASIZE 100
  33 +
  34 +static int use_pipes = 0;
  35 +static unsigned int loops = 100;
  36 +static unsigned int thread_mode = 0;
  37 +static unsigned int num_groups = 10;
  38 +
  39 +struct sender_context {
  40 + unsigned int num_fds;
  41 + int ready_out;
  42 + int wakefd;
  43 + int out_fds[0];
  44 +};
  45 +
  46 +struct receiver_context {
  47 + unsigned int num_packets;
  48 + int in_fds[2];
  49 + int ready_out;
  50 + int wakefd;
  51 +};
  52 +
  53 +static void barf(const char *msg)
  54 +{
  55 + fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
  56 + exit(1);
  57 +}
  58 +
  59 +static void fdpair(int fds[2])
  60 +{
  61 + if (use_pipes) {
  62 + if (pipe(fds) == 0)
  63 + return;
  64 + } else {
  65 + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
  66 + return;
  67 + }
  68 +
  69 + barf(use_pipes ? "pipe()" : "socketpair()");
  70 +}
  71 +
  72 +/* Block until we're ready to go */
  73 +static void ready(int ready_out, int wakefd)
  74 +{
  75 + char dummy;
  76 + struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
  77 +
  78 + /* Tell them we're ready. */
  79 + if (write(ready_out, &dummy, 1) != 1)
  80 + barf("CLIENT: ready write");
  81 +
  82 + /* Wait for "GO" signal */
  83 + if (poll(&pollfd, 1, -1) != 1)
  84 + barf("poll");
  85 +}
  86 +
  87 +/* Sender sprays loops messages down each file descriptor */
  88 +static void *sender(struct sender_context *ctx)
  89 +{
  90 + char data[DATASIZE];
  91 + unsigned int i, j;
  92 +
  93 + ready(ctx->ready_out, ctx->wakefd);
  94 +
  95 + /* Now pump to every receiver. */
  96 + for (i = 0; i < loops; i++) {
  97 + for (j = 0; j < ctx->num_fds; j++) {
  98 + int ret, done = 0;
  99 +
  100 +again:
  101 + ret = write(ctx->out_fds[j], data + done,
  102 + sizeof(data)-done);
  103 + if (ret < 0)
  104 + barf("SENDER: write");
  105 + done += ret;
  106 + if (done < DATASIZE)
  107 + goto again;
  108 + }
  109 + }
  110 +
  111 + return NULL;
  112 +}
  113 +
  114 +
  115 +/* One receiver per fd */
  116 +static void *receiver(struct receiver_context* ctx)
  117 +{
  118 + unsigned int i;
  119 +
  120 + if (!thread_mode)
  121 + close(ctx->in_fds[1]);
  122 +
  123 + /* Wait for start... */
  124 + ready(ctx->ready_out, ctx->wakefd);
  125 +
  126 + /* Receive them all */
  127 + for (i = 0; i < ctx->num_packets; i++) {
  128 + char data[DATASIZE];
  129 + int ret, done = 0;
  130 +
  131 +again:
  132 + ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
  133 + if (ret < 0)
  134 + barf("SERVER: read");
  135 + done += ret;
  136 + if (done < DATASIZE)
  137 + goto again;
  138 + }
  139 +
  140 + return NULL;
  141 +}
  142 +
  143 +static pthread_t create_worker(void *ctx, void *(*func)(void *))
  144 +{
  145 + pthread_attr_t attr;
  146 + pthread_t childid;
  147 + int err;
  148 +
  149 + if (!thread_mode) {
  150 + /* process mode */
  151 + /* Fork the receiver. */
  152 + switch (fork()) {
  153 + case -1:
  154 + barf("fork()");
  155 + break;
  156 + case 0:
  157 + (*func) (ctx);
  158 + exit(0);
  159 + break;
  160 + default:
  161 + break;
  162 + }
  163 +
  164 + return (pthread_t)0;
  165 + }
  166 +
  167 + if (pthread_attr_init(&attr) != 0)
  168 + barf("pthread_attr_init:");
  169 +
  170 +#ifndef __ia64__
  171 + if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
  172 + barf("pthread_attr_setstacksize");
  173 +#endif
  174 +
  175 + err = pthread_create(&childid, &attr, func, ctx);
  176 + if (err != 0) {
  177 + fprintf(stderr, "pthread_create failed: %s (%d)\n",
  178 + strerror(err), err);
  179 + exit(-1);
  180 + }
  181 + return childid;
  182 +}
  183 +
  184 +static void reap_worker(pthread_t id)
  185 +{
  186 + int proc_status;
  187 + void *thread_status;
  188 +
  189 + if (!thread_mode) {
  190 + /* process mode */
  191 + wait(&proc_status);
  192 + if (!WIFEXITED(proc_status))
  193 + exit(1);
  194 + } else {
  195 + pthread_join(id, &thread_status);
  196 + }
  197 +}
  198 +
  199 +/* One group of senders and receivers */
  200 +static unsigned int group(pthread_t *pth,
  201 + unsigned int num_fds,
  202 + int ready_out,
  203 + int wakefd)
  204 +{
  205 + unsigned int i;
  206 + struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
  207 + + num_fds * sizeof(int));
  208 +
  209 + if (!snd_ctx)
  210 + barf("malloc()");
  211 +
  212 + for (i = 0; i < num_fds; i++) {
  213 + int fds[2];
  214 + struct receiver_context *ctx = malloc(sizeof(*ctx));
  215 +
  216 + if (!ctx)
  217 + barf("malloc()");
  218 +
  219 +
  220 + /* Create the pipe between client and server */
  221 + fdpair(fds);
  222 +
  223 + ctx->num_packets = num_fds * loops;
  224 + ctx->in_fds[0] = fds[0];
  225 + ctx->in_fds[1] = fds[1];
  226 + ctx->ready_out = ready_out;
  227 + ctx->wakefd = wakefd;
  228 +
  229 + pth[i] = create_worker(ctx, (void *)receiver);
  230 +
  231 + snd_ctx->out_fds[i] = fds[1];
  232 + if (!thread_mode)
  233 + close(fds[0]);
  234 + }
  235 +
  236 + /* Now we have all the fds, fork the senders */
  237 + for (i = 0; i < num_fds; i++) {
  238 + snd_ctx->ready_out = ready_out;
  239 + snd_ctx->wakefd = wakefd;
  240 + snd_ctx->num_fds = num_fds;
  241 +
  242 + pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
  243 + }
  244 +
  245 + /* Close the fds we have left */
  246 + if (!thread_mode)
  247 + for (i = 0; i < num_fds; i++)
  248 + close(snd_ctx->out_fds[i]);
  249 +
  250 + /* Return number of children to reap */
  251 + return num_fds * 2;
  252 +}
  253 +
  254 +static const struct option options[] = {
  255 + OPT_BOOLEAN('p', "pipe", &use_pipes,
  256 + "Use pipe() instead of socketpair()"),
  257 + OPT_BOOLEAN('t', "thread", &thread_mode,
  258 + "Be multi thread instead of multi process"),
  259 + OPT_INTEGER('g', "group", &num_groups,
  260 + "Specify number of groups"),
  261 + OPT_INTEGER('l', "loop", &loops,
  262 + "Specify number of loops"),
  263 + OPT_END()
  264 +};
  265 +
  266 +static const char * const bench_sched_message_usage[] = {
  267 + "perf bench sched messaging <options>",
  268 + NULL
  269 +};
  270 +
  271 +int bench_sched_messaging(int argc, const char **argv,
  272 + const char *prefix __used)
  273 +{
  274 + unsigned int i, total_children;
  275 + struct timeval start, stop, diff;
  276 + unsigned int num_fds = 20;
  277 + int readyfds[2], wakefds[2];
  278 + char dummy;
  279 + pthread_t *pth_tab;
  280 +
  281 + argc = parse_options(argc, argv, options,
  282 + bench_sched_message_usage, 0);
  283 +
  284 + pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
  285 + if (!pth_tab)
  286 + barf("main:malloc()");
  287 +
  288 + fdpair(readyfds);
  289 + fdpair(wakefds);
  290 +
  291 + total_children = 0;
  292 + for (i = 0; i < num_groups; i++)
  293 + total_children += group(pth_tab+total_children, num_fds,
  294 + readyfds[1], wakefds[0]);
  295 +
  296 + /* Wait for everyone to be ready */
  297 + for (i = 0; i < total_children; i++)
  298 + if (read(readyfds[0], &dummy, 1) != 1)
  299 + barf("Reading for readyfds");
  300 +
  301 + gettimeofday(&start, NULL);
  302 +
  303 + /* Kick them off */
  304 + if (write(wakefds[1], &dummy, 1) != 1)
  305 + barf("Writing to start them");
  306 +
  307 + /* Reap them all */
  308 + for (i = 0; i < total_children; i++)
  309 + reap_worker(pth_tab[i]);
  310 +
  311 + gettimeofday(&stop, NULL);
  312 +
  313 + timersub(&stop, &start, &diff);
  314 +
  315 + switch (bench_format) {
  316 + case BENCH_FORMAT_DEFAULT:
  317 + printf("# %d sender and receiver %s per group\n",
  318 + num_fds, thread_mode ? "threads" : "processes");
  319 + printf("# %d groups == %d %s run\n\n",
  320 + num_groups, num_groups * 2 * num_fds,
  321 + thread_mode ? "threads" : "processes");
  322 + printf(" %14s: %lu.%03lu [sec]\n", "Total time",
  323 + diff.tv_sec, diff.tv_usec/1000);
  324 + break;
  325 + case BENCH_FORMAT_SIMPLE:
  326 + printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
  327 + break;
  328 + default:
  329 + /* reaching here is something disaster */
  330 + fprintf(stderr, "Unknown format:%d\n", bench_format);
  331 + exit(1);
  332 + break;
  333 + }
  334 +
  335 + return 0;
  336 +}
tools/perf/bench/sched-pipe.c
  1 +/*
  2 + *
  3 + * builtin-bench-pipe.c
  4 + *
  5 + * pipe: Benchmark for pipe()
  6 + *
  7 + * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
  8 + * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
  9 + * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  10 + *
  11 + */
  12 +
  13 +#include "../perf.h"
  14 +#include "../util/util.h"
  15 +#include "../util/parse-options.h"
  16 +#include "../builtin.h"
  17 +#include "bench.h"
  18 +
  19 +#include <unistd.h>
  20 +#include <stdio.h>
  21 +#include <stdlib.h>
  22 +#include <signal.h>
  23 +#include <sys/wait.h>
  24 +#include <linux/unistd.h>
  25 +#include <string.h>
  26 +#include <errno.h>
  27 +#include <assert.h>
  28 +#include <sys/time.h>
  29 +#include <sys/types.h>
  30 +
  31 +#define LOOPS_DEFAULT 1000000
  32 +static int loops = LOOPS_DEFAULT;
  33 +
  34 +static const struct option options[] = {
  35 + OPT_INTEGER('l', "loop", &loops,
  36 + "Specify number of loops"),
  37 + OPT_END()
  38 +};
  39 +
  40 +static const char * const bench_sched_pipe_usage[] = {
  41 + "perf bench sched pipe <options>",
  42 + NULL
  43 +};
  44 +
  45 +int bench_sched_pipe(int argc, const char **argv,
  46 + const char *prefix __used)
  47 +{
  48 + int pipe_1[2], pipe_2[2];
  49 + int m = 0, i;
  50 + struct timeval start, stop, diff;
  51 + unsigned long long result_usec = 0;
  52 +
  53 + /*
  54 + * why does "ret" exist?
  55 + * discarding returned value of read(), write()
  56 + * causes error in building environment for perf
  57 + */
  58 + int ret, wait_stat;
  59 + pid_t pid, retpid;
  60 +
  61 + argc = parse_options(argc, argv, options,
  62 + bench_sched_pipe_usage, 0);
  63 +
  64 + assert(!pipe(pipe_1));
  65 + assert(!pipe(pipe_2));
  66 +
  67 + pid = fork();
  68 + assert(pid >= 0);
  69 +
  70 + gettimeofday(&start, NULL);
  71 +
  72 + if (!pid) {
  73 + for (i = 0; i < loops; i++) {
  74 + ret = read(pipe_1[0], &m, sizeof(int));
  75 + ret = write(pipe_2[1], &m, sizeof(int));
  76 + }
  77 + } else {
  78 + for (i = 0; i < loops; i++) {
  79 + ret = write(pipe_1[1], &m, sizeof(int));
  80 + ret = read(pipe_2[0], &m, sizeof(int));
  81 + }
  82 + }
  83 +
  84 + gettimeofday(&stop, NULL);
  85 + timersub(&stop, &start, &diff);
  86 +
  87 + if (pid) {
  88 + retpid = waitpid(pid, &wait_stat, 0);
  89 + assert((retpid == pid) && WIFEXITED(wait_stat));
  90 + return 0;
  91 + }
  92 +
  93 + switch (bench_format) {
  94 + case BENCH_FORMAT_DEFAULT:
  95 + printf("# Extecuted %d pipe operations between two tasks\n\n",
  96 + loops);
  97 +
  98 + result_usec = diff.tv_sec * 1000000;
  99 + result_usec += diff.tv_usec;
  100 +
  101 + printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
  102 + diff.tv_sec, diff.tv_usec/1000);
  103 +
  104 + printf(" %14lf usecs/op\n",
  105 + (double)result_usec / (double)loops);
  106 + printf(" %14d ops/sec\n",
  107 + (int)((double)loops /
  108 + ((double)result_usec / (double)1000000)));
  109 + break;
  110 +
  111 + case BENCH_FORMAT_SIMPLE:
  112 + printf("%lu.%03lu\n",
  113 + diff.tv_sec, diff.tv_usec / 1000);
  114 + break;
  115 +
  116 + default:
  117 + /* reaching here is something disaster */
  118 + fprintf(stderr, "Unknown format:%d\n", bench_format);
  119 + exit(1);
  120 + break;
  121 + }
  122 +
  123 + return 0;
  124 +}
tools/perf/builtin-bench.c
  1 +/*
  2 + *
  3 + * builtin-bench.c
  4 + *
  5 + * General benchmarking subsystem provided by perf
  6 + *
  7 + * Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  8 + *
  9 + */
  10 +
  11 +/*
  12 + *
  13 + * Available subsystem list:
  14 + * sched ... scheduler and IPC mechanism
  15 + *
  16 + */
  17 +
  18 +#include "perf.h"
  19 +#include "util/util.h"
  20 +#include "util/parse-options.h"
  21 +#include "builtin.h"
  22 +#include "bench/bench.h"
  23 +
  24 +#include <stdio.h>
  25 +#include <stdlib.h>
  26 +#include <string.h>
  27 +
  28 +struct bench_suite {
  29 + const char *name;
  30 + const char *summary;
  31 + int (*fn)(int, const char **, const char *);
  32 +};
  33 +
  34 +static struct bench_suite sched_suites[] = {
  35 + { "messaging",
  36 + "Benchmark for scheduler and IPC mechanisms",
  37 + bench_sched_messaging },
  38 + { "pipe",
  39 + "Flood of communication over pipe() between two processes",
  40 + bench_sched_pipe },
  41 + { NULL,
  42 + NULL,
  43 + NULL }
  44 +};
  45 +
  46 +struct bench_subsys {
  47 + const char *name;
  48 + const char *summary;
  49 + struct bench_suite *suites;
  50 +};
  51 +
  52 +static struct bench_subsys subsystems[] = {
  53 + { "sched",
  54 + "scheduler and IPC mechanism",
  55 + sched_suites },
  56 + { NULL,
  57 + NULL,
  58 + NULL }
  59 +};
  60 +
  61 +static void dump_suites(int subsys_index)
  62 +{
  63 + int i;
  64 +
  65 + printf("List of available suites for %s...\n\n",
  66 + subsystems[subsys_index].name);
  67 +
  68 + for (i = 0; subsystems[subsys_index].suites[i].name; i++)
  69 + printf("\t%s: %s\n",
  70 + subsystems[subsys_index].suites[i].name,
  71 + subsystems[subsys_index].suites[i].summary);
  72 +
  73 + printf("\n");
  74 + return;
  75 +}
  76 +
  77 +static char *bench_format_str;
  78 +int bench_format = BENCH_FORMAT_DEFAULT;
  79 +
  80 +static const struct option bench_options[] = {
  81 + OPT_STRING('f', "format", &bench_format_str, "default",
  82 + "Specify format style"),
  83 + OPT_END()
  84 +};
  85 +
  86 +static const char * const bench_usage[] = {
  87 + "perf bench [<common options>] <subsystem> <suite> [<options>]",
  88 + NULL
  89 +};
  90 +
  91 +static void print_usage(void)
  92 +{
  93 + int i;
  94 +
  95 + printf("Usage: \n");
  96 + for (i = 0; bench_usage[i]; i++)
  97 + printf("\t%s\n", bench_usage[i]);
  98 + printf("\n");
  99 +
  100 + printf("List of available subsystems...\n\n");
  101 +
  102 + for (i = 0; subsystems[i].name; i++)
  103 + printf("\t%s: %s\n",
  104 + subsystems[i].name, subsystems[i].summary);
  105 + printf("\n");
  106 +}
  107 +
  108 +static int bench_str2int(char *str)
  109 +{
  110 + if (!str)
  111 + return BENCH_FORMAT_DEFAULT;
  112 +
  113 + if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
  114 + return BENCH_FORMAT_DEFAULT;
  115 + else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
  116 + return BENCH_FORMAT_SIMPLE;
  117 +
  118 + return BENCH_FORMAT_UNKNOWN;
  119 +}
  120 +
  121 +int cmd_bench(int argc, const char **argv, const char *prefix __used)
  122 +{
  123 + int i, j, status = 0;
  124 +
  125 + if (argc < 2) {
  126 + /* No subsystem specified. */
  127 + print_usage();
  128 + goto end;
  129 + }
  130 +
  131 + argc = parse_options(argc, argv, bench_options, bench_usage,
  132 + PARSE_OPT_STOP_AT_NON_OPTION);
  133 +
  134 + bench_format = bench_str2int(bench_format_str);
  135 + if (bench_format == BENCH_FORMAT_UNKNOWN) {
  136 + printf("Unknown format descriptor:%s\n", bench_format_str);
  137 + goto end;
  138 + }
  139 +
  140 + if (argc < 1) {
  141 + print_usage();
  142 + goto end;
  143 + }
  144 +
  145 + for (i = 0; subsystems[i].name; i++) {
  146 + if (strcmp(subsystems[i].name, argv[0]))
  147 + continue;
  148 +
  149 + if (argc < 2) {
  150 + /* No suite specified. */
  151 + dump_suites(i);
  152 + goto end;
  153 + }
  154 +
  155 + for (j = 0; subsystems[i].suites[j].name; j++) {
  156 + if (strcmp(subsystems[i].suites[j].name, argv[1]))
  157 + continue;
  158 +
  159 + if (bench_format == BENCH_FORMAT_DEFAULT)
  160 + printf("# Running %s/%s benchmark...\n",
  161 + subsystems[i].name,
  162 + subsystems[i].suites[j].name);
  163 + status = subsystems[i].suites[j].fn(argc - 1,
  164 + argv + 1, prefix);
  165 + goto end;
  166 + }
  167 +
  168 + if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
  169 + dump_suites(i);
  170 + goto end;
  171 + }
  172 +
  173 + printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
  174 + status = 1;
  175 + goto end;
  176 + }
  177 +
  178 + printf("Unknown subsystem:%s\n", argv[0]);
  179 + status = 1;
  180 +
  181 +end:
  182 + return status;
  183 +}
tools/perf/builtin.h
... ... @@ -15,6 +15,7 @@
15 15 extern int check_pager_config(const char *cmd);
16 16  
17 17 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
  18 +extern int cmd_bench(int argc, const char **argv, const char *prefix);
18 19 extern int cmd_help(int argc, const char **argv, const char *prefix);
19 20 extern int cmd_sched(int argc, const char **argv, const char *prefix);
20 21 extern int cmd_list(int argc, const char **argv, const char *prefix);
tools/perf/command-list.txt
... ... @@ -3,6 +3,7 @@
3 3 # command name category [deprecated] [common]
4 4 #
5 5 perf-annotate mainporcelain common
  6 +perf-bench mainporcelain common
6 7 perf-list mainporcelain common
7 8 perf-sched mainporcelain common
8 9 perf-record mainporcelain common
tools/perf/design.txt
... ... @@ -137,6 +137,8 @@
137 137 PERF_COUNT_SW_CPU_MIGRATIONS = 4,
138 138 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
139 139 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
  140 + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
  141 + PERF_COUNT_SW_EMULATION_FAULTS = 8,
140 142 };
141 143  
142 144 Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
... ... @@ -289,6 +289,7 @@
289 289 { "list", cmd_list, 0 },
290 290 { "record", cmd_record, 0 },
291 291 { "report", cmd_report, 0 },
  292 + { "bench", cmd_bench, 0 },
292 293 { "stat", cmd_stat, 0 },
293 294 { "timechart", cmd_timechart, 0 },
294 295 { "top", cmd_top, 0 },
tools/perf/util/parse-events.c
... ... @@ -48,6 +48,8 @@
48 48 { CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
49 49 { CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
50 50 { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
  51 + { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
  52 + { CSW(EMULATION_FAULTS), "emulation-faults", "" },
51 53 };
52 54  
53 55 #define __PERF_EVENT_FIELD(config, name) \
... ... @@ -76,6 +78,8 @@
76 78 "CPU-migrations",
77 79 "minor-faults",
78 80 "major-faults",
  81 + "alignment-faults",
  82 + "emulation-faults",
79 83 };
80 84  
81 85 #define MAX_ALIASES 8