Commit 7a7868326d77416018e8f3b4c4697a3c57444549

Authored by Michael Ellerman
Committed by Benjamin Herrenschmidt
1 parent 240686c136

powerpc/perf: Add an explict flag indicating presence of SLOT field

In perf_ip_adjust() we potentially use the MMCRA[SLOT] field to adjust
the reported IP of a sampled instruction.

Currently the logic is written so that if the backend does NOT have
the PPMU_ALT_SIPR flag set then we assume MMCRA[SLOT] exists.

However on power8 we do not want to set ALT_SIPR (it's in a third
location), and we also do not have MMCRA[SLOT].

So add a new flag which only indicates whether MMCRA[SLOT] exists.

Naively we'd set it on everything except power6/7, because they set
ALT_SIPR, and we've reversed the polarity of the flag. But it's more
complicated than that.

mpc7450 is 32-bit, and uses its own version of perf_ip_adjust()
which doesn't use MMCRA[SLOT], so it doesn't need the new flag set and
the behaviour is unchanged.

PPC970 (and I assume power4) don't have MMCRA[SLOT], so shouldn't have
the new flag set. This is a behaviour change on those cpus, though we
were probably getting lucky and the bits in question were 0.

power5 and power5+ set the new flag, behaviour unchanged.

power6 & power7 do not set the new flag, behaviour unchanged.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Showing 4 changed files with 5 additions and 2 deletions Inline Diff

arch/powerpc/include/asm/perf_event_server.h
1 /* 1 /*
2 * Performance event support - PowerPC classic/server specific definitions. 2 * Performance event support - PowerPC classic/server specific definitions.
3 * 3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation. 4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12 #include <linux/types.h> 12 #include <linux/types.h>
13 #include <asm/hw_irq.h> 13 #include <asm/hw_irq.h>
14 #include <linux/device.h> 14 #include <linux/device.h>
15 15
16 #define MAX_HWEVENTS 8 16 #define MAX_HWEVENTS 8
17 #define MAX_EVENT_ALTERNATIVES 8 17 #define MAX_EVENT_ALTERNATIVES 8
18 #define MAX_LIMITED_HWCOUNTERS 2 18 #define MAX_LIMITED_HWCOUNTERS 2
19 19
20 /* 20 /*
21 * This struct provides the constants and functions needed to 21 * This struct provides the constants and functions needed to
22 * describe the PMU on a particular POWER-family CPU. 22 * describe the PMU on a particular POWER-family CPU.
23 */ 23 */
24 struct power_pmu { 24 struct power_pmu {
25 const char *name; 25 const char *name;
26 int n_counter; 26 int n_counter;
27 int max_alternatives; 27 int max_alternatives;
28 unsigned long add_fields; 28 unsigned long add_fields;
29 unsigned long test_adder; 29 unsigned long test_adder;
30 int (*compute_mmcr)(u64 events[], int n_ev, 30 int (*compute_mmcr)(u64 events[], int n_ev,
31 unsigned int hwc[], unsigned long mmcr[]); 31 unsigned int hwc[], unsigned long mmcr[]);
32 int (*get_constraint)(u64 event_id, unsigned long *mskp, 32 int (*get_constraint)(u64 event_id, unsigned long *mskp,
33 unsigned long *valp); 33 unsigned long *valp);
34 int (*get_alternatives)(u64 event_id, unsigned int flags, 34 int (*get_alternatives)(u64 event_id, unsigned int flags,
35 u64 alt[]); 35 u64 alt[]);
36 void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); 36 void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
37 int (*limited_pmc_event)(u64 event_id); 37 int (*limited_pmc_event)(u64 event_id);
38 u32 flags; 38 u32 flags;
39 const struct attribute_group **attr_groups; 39 const struct attribute_group **attr_groups;
40 int n_generic; 40 int n_generic;
41 int *generic_events; 41 int *generic_events;
42 int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] 42 int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
43 [PERF_COUNT_HW_CACHE_OP_MAX] 43 [PERF_COUNT_HW_CACHE_OP_MAX]
44 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 44 [PERF_COUNT_HW_CACHE_RESULT_MAX];
45 }; 45 };
46 46
47 /* 47 /*
48 * Values for power_pmu.flags 48 * Values for power_pmu.flags
49 */ 49 */
50 #define PPMU_LIMITED_PMC5_6 0x00000001 /* PMC5/6 have limited function */ 50 #define PPMU_LIMITED_PMC5_6 0x00000001 /* PMC5/6 have limited function */
51 #define PPMU_ALT_SIPR 0x00000002 /* uses alternate posn for SIPR/HV */ 51 #define PPMU_ALT_SIPR 0x00000002 /* uses alternate posn for SIPR/HV */
52 #define PPMU_NO_SIPR 0x00000004 /* no SIPR/HV in MMCRA at all */ 52 #define PPMU_NO_SIPR 0x00000004 /* no SIPR/HV in MMCRA at all */
53 #define PPMU_NO_CONT_SAMPLING 0x00000008 /* no continuous sampling */ 53 #define PPMU_NO_CONT_SAMPLING 0x00000008 /* no continuous sampling */
54 #define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */ 54 #define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */
55 #define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */
55 56
56 /* 57 /*
57 * Values for flags to get_alternatives() 58 * Values for flags to get_alternatives()
58 */ 59 */
59 #define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ 60 #define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */
60 #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ 61 #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
61 #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ 62 #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
62 63
63 extern int register_power_pmu(struct power_pmu *); 64 extern int register_power_pmu(struct power_pmu *);
64 65
65 struct pt_regs; 66 struct pt_regs;
66 extern unsigned long perf_misc_flags(struct pt_regs *regs); 67 extern unsigned long perf_misc_flags(struct pt_regs *regs);
67 extern unsigned long perf_instruction_pointer(struct pt_regs *regs); 68 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
68 69
69 /* 70 /*
70 * Only override the default definitions in include/linux/perf_event.h 71 * Only override the default definitions in include/linux/perf_event.h
71 * if we have hardware PMU support. 72 * if we have hardware PMU support.
72 */ 73 */
73 #ifdef CONFIG_PPC_PERF_CTRS 74 #ifdef CONFIG_PPC_PERF_CTRS
74 #define perf_misc_flags(regs) perf_misc_flags(regs) 75 #define perf_misc_flags(regs) perf_misc_flags(regs)
75 #endif 76 #endif
76 77
77 /* 78 /*
78 * The power_pmu.get_constraint function returns a 32/64-bit value and 79 * The power_pmu.get_constraint function returns a 32/64-bit value and
79 * a 32/64-bit mask that express the constraints between this event_id and 80 * a 32/64-bit mask that express the constraints between this event_id and
80 * other events. 81 * other events.
81 * 82 *
82 * The value and mask are divided up into (non-overlapping) bitfields 83 * The value and mask are divided up into (non-overlapping) bitfields
83 * of three different types: 84 * of three different types:
84 * 85 *
85 * Select field: this expresses the constraint that some set of bits 86 * Select field: this expresses the constraint that some set of bits
86 * in MMCR* needs to be set to a specific value for this event_id. For a 87 * in MMCR* needs to be set to a specific value for this event_id. For a
87 * select field, the mask contains 1s in every bit of the field, and 88 * select field, the mask contains 1s in every bit of the field, and
88 * the value contains a unique value for each possible setting of the 89 * the value contains a unique value for each possible setting of the
89 * MMCR* bits. The constraint checking code will ensure that two events 90 * MMCR* bits. The constraint checking code will ensure that two events
90 * that set the same field in their masks have the same value in their 91 * that set the same field in their masks have the same value in their
91 * value dwords. 92 * value dwords.
92 * 93 *
93 * Add field: this expresses the constraint that there can be at most 94 * Add field: this expresses the constraint that there can be at most
94 * N events in a particular class. A field of k bits can be used for 95 * N events in a particular class. A field of k bits can be used for
95 * N <= 2^(k-1) - 1. The mask has the most significant bit of the field 96 * N <= 2^(k-1) - 1. The mask has the most significant bit of the field
96 * set (and the other bits 0), and the value has only the least significant 97 * set (and the other bits 0), and the value has only the least significant
97 * bit of the field set. In addition, the 'add_fields' and 'test_adder' 98 * bit of the field set. In addition, the 'add_fields' and 'test_adder'
98 * in the struct power_pmu for this processor come into play. The 99 * in the struct power_pmu for this processor come into play. The
99 * add_fields value contains 1 in the LSB of the field, and the 100 * add_fields value contains 1 in the LSB of the field, and the
100 * test_adder contains 2^(k-1) - 1 - N in the field. 101 * test_adder contains 2^(k-1) - 1 - N in the field.
101 * 102 *
102 * NAND field: this expresses the constraint that you may not have events 103 * NAND field: this expresses the constraint that you may not have events
103 * in all of a set of classes. (For example, on PPC970, you can't select 104 * in all of a set of classes. (For example, on PPC970, you can't select
104 * events from the FPU, ISU and IDU simultaneously, although any two are 105 * events from the FPU, ISU and IDU simultaneously, although any two are
105 * possible.) For N classes, the field is N+1 bits wide, and each class 106 * possible.) For N classes, the field is N+1 bits wide, and each class
106 * is assigned one bit from the least-significant N bits. The mask has 107 * is assigned one bit from the least-significant N bits. The mask has
107 * only the most-significant bit set, and the value has only the bit 108 * only the most-significant bit set, and the value has only the bit
108 * for the event_id's class set. The test_adder has the least significant 109 * for the event_id's class set. The test_adder has the least significant
109 * bit set in the field. 110 * bit set in the field.
110 * 111 *
111 * If an event_id is not subject to the constraint expressed by a particular 112 * If an event_id is not subject to the constraint expressed by a particular
112 * field, then it will have 0 in both the mask and value for that field. 113 * field, then it will have 0 in both the mask and value for that field.
113 */ 114 */
114 115
115 extern ssize_t power_events_sysfs_show(struct device *dev, 116 extern ssize_t power_events_sysfs_show(struct device *dev,
116 struct device_attribute *attr, char *page); 117 struct device_attribute *attr, char *page);
117 118
118 /* 119 /*
119 * EVENT_VAR() is same as PMU_EVENT_VAR with a suffix. 120 * EVENT_VAR() is same as PMU_EVENT_VAR with a suffix.
120 * 121 *
121 * Having a suffix allows us to have aliases in sysfs - eg: the generic 122 * Having a suffix allows us to have aliases in sysfs - eg: the generic
122 * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and 123 * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and
123 * 'PM_CYC' where the latter is the name by which the event is known in 124 * 'PM_CYC' where the latter is the name by which the event is known in
124 * POWER CPU specification. 125 * POWER CPU specification.
125 */ 126 */
126 #define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix 127 #define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix
127 #define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr 128 #define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr
128 129
129 #define EVENT_ATTR(_name, _id, _suffix) \ 130 #define EVENT_ATTR(_name, _id, _suffix) \
130 PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id, \ 131 PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id, \
131 power_events_sysfs_show) 132 power_events_sysfs_show)
132 133
133 #define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g) 134 #define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g)
134 #define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g) 135 #define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g)
135 136
136 #define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(PM_##_name, _id, _p) 137 #define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(PM_##_name, _id, _p)
137 #define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p) 138 #define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p)
138 139
arch/powerpc/perf/core-book3s.c
1 /* 1 /*
2 * Performance event support - powerpc architecture code 2 * Performance event support - powerpc architecture code
3 * 3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation. 4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 #include <linux/kernel.h> 11 #include <linux/kernel.h>
12 #include <linux/sched.h> 12 #include <linux/sched.h>
13 #include <linux/perf_event.h> 13 #include <linux/perf_event.h>
14 #include <linux/percpu.h> 14 #include <linux/percpu.h>
15 #include <linux/hardirq.h> 15 #include <linux/hardirq.h>
16 #include <asm/reg.h> 16 #include <asm/reg.h>
17 #include <asm/pmc.h> 17 #include <asm/pmc.h>
18 #include <asm/machdep.h> 18 #include <asm/machdep.h>
19 #include <asm/firmware.h> 19 #include <asm/firmware.h>
20 #include <asm/ptrace.h> 20 #include <asm/ptrace.h>
21 21
22 struct cpu_hw_events { 22 struct cpu_hw_events {
23 int n_events; 23 int n_events;
24 int n_percpu; 24 int n_percpu;
25 int disabled; 25 int disabled;
26 int n_added; 26 int n_added;
27 int n_limited; 27 int n_limited;
28 u8 pmcs_enabled; 28 u8 pmcs_enabled;
29 struct perf_event *event[MAX_HWEVENTS]; 29 struct perf_event *event[MAX_HWEVENTS];
30 u64 events[MAX_HWEVENTS]; 30 u64 events[MAX_HWEVENTS];
31 unsigned int flags[MAX_HWEVENTS]; 31 unsigned int flags[MAX_HWEVENTS];
32 unsigned long mmcr[3]; 32 unsigned long mmcr[3];
33 struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; 33 struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; 34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
35 u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; 35 u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
36 unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; 36 unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
37 unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; 37 unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
38 38
39 unsigned int group_flag; 39 unsigned int group_flag;
40 int n_txn_start; 40 int n_txn_start;
41 }; 41 };
42 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 42 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
43 43
44 struct power_pmu *ppmu; 44 struct power_pmu *ppmu;
45 45
46 /* 46 /*
47 * Normally, to ignore kernel events we set the FCS (freeze counters 47 * Normally, to ignore kernel events we set the FCS (freeze counters
48 * in supervisor mode) bit in MMCR0, but if the kernel runs with the 48 * in supervisor mode) bit in MMCR0, but if the kernel runs with the
49 * hypervisor bit set in the MSR, or if we are running on a processor 49 * hypervisor bit set in the MSR, or if we are running on a processor
50 * where the hypervisor bit is forced to 1 (as on Apple G5 processors), 50 * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
51 * then we need to use the FCHV bit to ignore kernel events. 51 * then we need to use the FCHV bit to ignore kernel events.
52 */ 52 */
53 static unsigned int freeze_events_kernel = MMCR0_FCS; 53 static unsigned int freeze_events_kernel = MMCR0_FCS;
54 54
55 /* 55 /*
56 * 32-bit doesn't have MMCRA but does have an MMCR2, 56 * 32-bit doesn't have MMCRA but does have an MMCR2,
57 * and a few other names are different. 57 * and a few other names are different.
58 */ 58 */
59 #ifdef CONFIG_PPC32 59 #ifdef CONFIG_PPC32
60 60
61 #define MMCR0_FCHV 0 61 #define MMCR0_FCHV 0
62 #define MMCR0_PMCjCE MMCR0_PMCnCE 62 #define MMCR0_PMCjCE MMCR0_PMCnCE
63 63
64 #define SPRN_MMCRA SPRN_MMCR2 64 #define SPRN_MMCRA SPRN_MMCR2
65 #define MMCRA_SAMPLE_ENABLE 0 65 #define MMCRA_SAMPLE_ENABLE 0
66 66
67 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) 67 static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
68 { 68 {
69 return 0; 69 return 0;
70 } 70 }
71 static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } 71 static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
72 static inline u32 perf_get_misc_flags(struct pt_regs *regs) 72 static inline u32 perf_get_misc_flags(struct pt_regs *regs)
73 { 73 {
74 return 0; 74 return 0;
75 } 75 }
76 static inline void perf_read_regs(struct pt_regs *regs) 76 static inline void perf_read_regs(struct pt_regs *regs)
77 { 77 {
78 regs->result = 0; 78 regs->result = 0;
79 } 79 }
80 static inline int perf_intr_is_nmi(struct pt_regs *regs) 80 static inline int perf_intr_is_nmi(struct pt_regs *regs)
81 { 81 {
82 return 0; 82 return 0;
83 } 83 }
84 84
85 static inline int siar_valid(struct pt_regs *regs) 85 static inline int siar_valid(struct pt_regs *regs)
86 { 86 {
87 return 1; 87 return 1;
88 } 88 }
89 89
90 #endif /* CONFIG_PPC32 */ 90 #endif /* CONFIG_PPC32 */
91 91
92 /* 92 /*
93 * Things that are specific to 64-bit implementations. 93 * Things that are specific to 64-bit implementations.
94 */ 94 */
95 #ifdef CONFIG_PPC64 95 #ifdef CONFIG_PPC64
96 96
97 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) 97 static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
98 { 98 {
99 unsigned long mmcra = regs->dsisr; 99 unsigned long mmcra = regs->dsisr;
100 100
101 if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { 101 if ((ppmu->flags & PPMU_HAS_SSLOT) && (mmcra & MMCRA_SAMPLE_ENABLE)) {
102 unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; 102 unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
103 if (slot > 1) 103 if (slot > 1)
104 return 4 * (slot - 1); 104 return 4 * (slot - 1);
105 } 105 }
106
106 return 0; 107 return 0;
107 } 108 }
108 109
109 /* 110 /*
110 * The user wants a data address recorded. 111 * The user wants a data address recorded.
111 * If we're not doing instruction sampling, give them the SDAR 112 * If we're not doing instruction sampling, give them the SDAR
112 * (sampled data address). If we are doing instruction sampling, then 113 * (sampled data address). If we are doing instruction sampling, then
113 * only give them the SDAR if it corresponds to the instruction 114 * only give them the SDAR if it corresponds to the instruction
114 * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC or 115 * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC or
115 * the [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA. 116 * the [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA.
116 */ 117 */
117 static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) 118 static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
118 { 119 {
119 unsigned long mmcra = regs->dsisr; 120 unsigned long mmcra = regs->dsisr;
120 unsigned long sdsync; 121 unsigned long sdsync;
121 122
122 if (ppmu->flags & PPMU_SIAR_VALID) 123 if (ppmu->flags & PPMU_SIAR_VALID)
123 sdsync = POWER7P_MMCRA_SDAR_VALID; 124 sdsync = POWER7P_MMCRA_SDAR_VALID;
124 else if (ppmu->flags & PPMU_ALT_SIPR) 125 else if (ppmu->flags & PPMU_ALT_SIPR)
125 sdsync = POWER6_MMCRA_SDSYNC; 126 sdsync = POWER6_MMCRA_SDSYNC;
126 else 127 else
127 sdsync = MMCRA_SDSYNC; 128 sdsync = MMCRA_SDSYNC;
128 129
129 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) 130 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
130 *addrp = mfspr(SPRN_SDAR); 131 *addrp = mfspr(SPRN_SDAR);
131 } 132 }
132 133
133 static bool mmcra_sihv(unsigned long mmcra) 134 static bool mmcra_sihv(unsigned long mmcra)
134 { 135 {
135 unsigned long sihv = MMCRA_SIHV; 136 unsigned long sihv = MMCRA_SIHV;
136 137
137 if (ppmu->flags & PPMU_ALT_SIPR) 138 if (ppmu->flags & PPMU_ALT_SIPR)
138 sihv = POWER6_MMCRA_SIHV; 139 sihv = POWER6_MMCRA_SIHV;
139 140
140 return !!(mmcra & sihv); 141 return !!(mmcra & sihv);
141 } 142 }
142 143
143 static bool mmcra_sipr(unsigned long mmcra) 144 static bool mmcra_sipr(unsigned long mmcra)
144 { 145 {
145 unsigned long sipr = MMCRA_SIPR; 146 unsigned long sipr = MMCRA_SIPR;
146 147
147 if (ppmu->flags & PPMU_ALT_SIPR) 148 if (ppmu->flags & PPMU_ALT_SIPR)
148 sipr = POWER6_MMCRA_SIPR; 149 sipr = POWER6_MMCRA_SIPR;
149 150
150 return !!(mmcra & sipr); 151 return !!(mmcra & sipr);
151 } 152 }
152 153
153 static inline u32 perf_flags_from_msr(struct pt_regs *regs) 154 static inline u32 perf_flags_from_msr(struct pt_regs *regs)
154 { 155 {
155 if (regs->msr & MSR_PR) 156 if (regs->msr & MSR_PR)
156 return PERF_RECORD_MISC_USER; 157 return PERF_RECORD_MISC_USER;
157 if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV) 158 if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV)
158 return PERF_RECORD_MISC_HYPERVISOR; 159 return PERF_RECORD_MISC_HYPERVISOR;
159 return PERF_RECORD_MISC_KERNEL; 160 return PERF_RECORD_MISC_KERNEL;
160 } 161 }
161 162
162 static inline u32 perf_get_misc_flags(struct pt_regs *regs) 163 static inline u32 perf_get_misc_flags(struct pt_regs *regs)
163 { 164 {
164 unsigned long mmcra = regs->dsisr; 165 unsigned long mmcra = regs->dsisr;
165 unsigned long use_siar = regs->result; 166 unsigned long use_siar = regs->result;
166 167
167 if (!use_siar) 168 if (!use_siar)
168 return perf_flags_from_msr(regs); 169 return perf_flags_from_msr(regs);
169 170
170 /* 171 /*
171 * If we don't have flags in MMCRA, rather than using 172 * If we don't have flags in MMCRA, rather than using
172 * the MSR, we intuit the flags from the address in 173 * the MSR, we intuit the flags from the address in
173 * SIAR which should give slightly more reliable 174 * SIAR which should give slightly more reliable
174 * results 175 * results
175 */ 176 */
176 if (ppmu->flags & PPMU_NO_SIPR) { 177 if (ppmu->flags & PPMU_NO_SIPR) {
177 unsigned long siar = mfspr(SPRN_SIAR); 178 unsigned long siar = mfspr(SPRN_SIAR);
178 if (siar >= PAGE_OFFSET) 179 if (siar >= PAGE_OFFSET)
179 return PERF_RECORD_MISC_KERNEL; 180 return PERF_RECORD_MISC_KERNEL;
180 return PERF_RECORD_MISC_USER; 181 return PERF_RECORD_MISC_USER;
181 } 182 }
182 183
183 /* PR has priority over HV, so order below is important */ 184 /* PR has priority over HV, so order below is important */
184 if (mmcra_sipr(mmcra)) 185 if (mmcra_sipr(mmcra))
185 return PERF_RECORD_MISC_USER; 186 return PERF_RECORD_MISC_USER;
186 if (mmcra_sihv(mmcra) && (freeze_events_kernel != MMCR0_FCHV)) 187 if (mmcra_sihv(mmcra) && (freeze_events_kernel != MMCR0_FCHV))
187 return PERF_RECORD_MISC_HYPERVISOR; 188 return PERF_RECORD_MISC_HYPERVISOR;
188 return PERF_RECORD_MISC_KERNEL; 189 return PERF_RECORD_MISC_KERNEL;
189 } 190 }
190 191
191 /* 192 /*
192 * Overload regs->dsisr to store MMCRA so we only need to read it once 193 * Overload regs->dsisr to store MMCRA so we only need to read it once
193 * on each interrupt. 194 * on each interrupt.
194 * Overload regs->result to specify whether we should use the MSR (result 195 * Overload regs->result to specify whether we should use the MSR (result
195 * is zero) or the SIAR (result is non zero). 196 * is zero) or the SIAR (result is non zero).
196 */ 197 */
197 static inline void perf_read_regs(struct pt_regs *regs) 198 static inline void perf_read_regs(struct pt_regs *regs)
198 { 199 {
199 unsigned long mmcra = mfspr(SPRN_MMCRA); 200 unsigned long mmcra = mfspr(SPRN_MMCRA);
200 int marked = mmcra & MMCRA_SAMPLE_ENABLE; 201 int marked = mmcra & MMCRA_SAMPLE_ENABLE;
201 int use_siar; 202 int use_siar;
202 203
203 /* 204 /*
204 * If this isn't a PMU exception (eg a software event) the SIAR is 205 * If this isn't a PMU exception (eg a software event) the SIAR is
205 * not valid. Use pt_regs. 206 * not valid. Use pt_regs.
206 * 207 *
207 * If it is a marked event use the SIAR. 208 * If it is a marked event use the SIAR.
208 * 209 *
209 * If the PMU doesn't update the SIAR for non marked events use 210 * If the PMU doesn't update the SIAR for non marked events use
210 * pt_regs. 211 * pt_regs.
211 * 212 *
212 * If the PMU has HV/PR flags then check to see if they 213 * If the PMU has HV/PR flags then check to see if they
213 * place the exception in userspace. If so, use pt_regs. In 214 * place the exception in userspace. If so, use pt_regs. In
214 * continuous sampling mode the SIAR and the PMU exception are 215 * continuous sampling mode the SIAR and the PMU exception are
215 * not synchronised, so they may be many instructions apart. 216 * not synchronised, so they may be many instructions apart.
216 * This can result in confusing backtraces. We still want 217 * This can result in confusing backtraces. We still want
217 * hypervisor samples as well as samples in the kernel with 218 * hypervisor samples as well as samples in the kernel with
218 * interrupts off hence the userspace check. 219 * interrupts off hence the userspace check.
219 */ 220 */
220 if (TRAP(regs) != 0xf00) 221 if (TRAP(regs) != 0xf00)
221 use_siar = 0; 222 use_siar = 0;
222 else if (marked) 223 else if (marked)
223 use_siar = 1; 224 use_siar = 1;
224 else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING)) 225 else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
225 use_siar = 0; 226 use_siar = 0;
226 else if (!(ppmu->flags & PPMU_NO_SIPR) && mmcra_sipr(mmcra)) 227 else if (!(ppmu->flags & PPMU_NO_SIPR) && mmcra_sipr(mmcra))
227 use_siar = 0; 228 use_siar = 0;
228 else 229 else
229 use_siar = 1; 230 use_siar = 1;
230 231
231 regs->dsisr = mmcra; 232 regs->dsisr = mmcra;
232 regs->result = use_siar; 233 regs->result = use_siar;
233 } 234 }
234 235
235 /* 236 /*
236 * If interrupts were soft-disabled when a PMU interrupt occurs, treat 237 * If interrupts were soft-disabled when a PMU interrupt occurs, treat
237 * it as an NMI. 238 * it as an NMI.
238 */ 239 */
239 static inline int perf_intr_is_nmi(struct pt_regs *regs) 240 static inline int perf_intr_is_nmi(struct pt_regs *regs)
240 { 241 {
241 return !regs->softe; 242 return !regs->softe;
242 } 243 }
243 244
244 /* 245 /*
245 * On processors like P7+ that have the SIAR-Valid bit, marked instructions 246 * On processors like P7+ that have the SIAR-Valid bit, marked instructions
246 * must be sampled only if the SIAR-valid bit is set. 247 * must be sampled only if the SIAR-valid bit is set.
247 * 248 *
248 * For unmarked instructions and for processors that don't have the SIAR-Valid 249 * For unmarked instructions and for processors that don't have the SIAR-Valid
249 * bit, assume that SIAR is valid. 250 * bit, assume that SIAR is valid.
250 */ 251 */
251 static inline int siar_valid(struct pt_regs *regs) 252 static inline int siar_valid(struct pt_regs *regs)
252 { 253 {
253 unsigned long mmcra = regs->dsisr; 254 unsigned long mmcra = regs->dsisr;
254 int marked = mmcra & MMCRA_SAMPLE_ENABLE; 255 int marked = mmcra & MMCRA_SAMPLE_ENABLE;
255 256
256 if ((ppmu->flags & PPMU_SIAR_VALID) && marked) 257 if ((ppmu->flags & PPMU_SIAR_VALID) && marked)
257 return mmcra & POWER7P_MMCRA_SIAR_VALID; 258 return mmcra & POWER7P_MMCRA_SIAR_VALID;
258 259
259 return 1; 260 return 1;
260 } 261 }
261 262
262 #endif /* CONFIG_PPC64 */ 263 #endif /* CONFIG_PPC64 */
263 264
264 static void perf_event_interrupt(struct pt_regs *regs); 265 static void perf_event_interrupt(struct pt_regs *regs);
265 266
266 void perf_event_print_debug(void) 267 void perf_event_print_debug(void)
267 { 268 {
268 } 269 }
269 270
270 /* 271 /*
271 * Read one performance monitor counter (PMC). 272 * Read one performance monitor counter (PMC).
272 */ 273 */
273 static unsigned long read_pmc(int idx) 274 static unsigned long read_pmc(int idx)
274 { 275 {
275 unsigned long val; 276 unsigned long val;
276 277
277 switch (idx) { 278 switch (idx) {
278 case 1: 279 case 1:
279 val = mfspr(SPRN_PMC1); 280 val = mfspr(SPRN_PMC1);
280 break; 281 break;
281 case 2: 282 case 2:
282 val = mfspr(SPRN_PMC2); 283 val = mfspr(SPRN_PMC2);
283 break; 284 break;
284 case 3: 285 case 3:
285 val = mfspr(SPRN_PMC3); 286 val = mfspr(SPRN_PMC3);
286 break; 287 break;
287 case 4: 288 case 4:
288 val = mfspr(SPRN_PMC4); 289 val = mfspr(SPRN_PMC4);
289 break; 290 break;
290 case 5: 291 case 5:
291 val = mfspr(SPRN_PMC5); 292 val = mfspr(SPRN_PMC5);
292 break; 293 break;
293 case 6: 294 case 6:
294 val = mfspr(SPRN_PMC6); 295 val = mfspr(SPRN_PMC6);
295 break; 296 break;
296 #ifdef CONFIG_PPC64 297 #ifdef CONFIG_PPC64
297 case 7: 298 case 7:
298 val = mfspr(SPRN_PMC7); 299 val = mfspr(SPRN_PMC7);
299 break; 300 break;
300 case 8: 301 case 8:
301 val = mfspr(SPRN_PMC8); 302 val = mfspr(SPRN_PMC8);
302 break; 303 break;
303 #endif /* CONFIG_PPC64 */ 304 #endif /* CONFIG_PPC64 */
304 default: 305 default:
305 printk(KERN_ERR "oops trying to read PMC%d\n", idx); 306 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
306 val = 0; 307 val = 0;
307 } 308 }
308 return val; 309 return val;
309 } 310 }
310 311
311 /* 312 /*
312 * Write one PMC. 313 * Write one PMC.
313 */ 314 */
314 static void write_pmc(int idx, unsigned long val) 315 static void write_pmc(int idx, unsigned long val)
315 { 316 {
316 switch (idx) { 317 switch (idx) {
317 case 1: 318 case 1:
318 mtspr(SPRN_PMC1, val); 319 mtspr(SPRN_PMC1, val);
319 break; 320 break;
320 case 2: 321 case 2:
321 mtspr(SPRN_PMC2, val); 322 mtspr(SPRN_PMC2, val);
322 break; 323 break;
323 case 3: 324 case 3:
324 mtspr(SPRN_PMC3, val); 325 mtspr(SPRN_PMC3, val);
325 break; 326 break;
326 case 4: 327 case 4:
327 mtspr(SPRN_PMC4, val); 328 mtspr(SPRN_PMC4, val);
328 break; 329 break;
329 case 5: 330 case 5:
330 mtspr(SPRN_PMC5, val); 331 mtspr(SPRN_PMC5, val);
331 break; 332 break;
332 case 6: 333 case 6:
333 mtspr(SPRN_PMC6, val); 334 mtspr(SPRN_PMC6, val);
334 break; 335 break;
335 #ifdef CONFIG_PPC64 336 #ifdef CONFIG_PPC64
336 case 7: 337 case 7:
337 mtspr(SPRN_PMC7, val); 338 mtspr(SPRN_PMC7, val);
338 break; 339 break;
339 case 8: 340 case 8:
340 mtspr(SPRN_PMC8, val); 341 mtspr(SPRN_PMC8, val);
341 break; 342 break;
342 #endif /* CONFIG_PPC64 */ 343 #endif /* CONFIG_PPC64 */
343 default: 344 default:
344 printk(KERN_ERR "oops trying to write PMC%d\n", idx); 345 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
345 } 346 }
346 } 347 }
347 348
348 /* 349 /*
349 * Check if a set of events can all go on the PMU at once. 350 * Check if a set of events can all go on the PMU at once.
350 * If they can't, this will look at alternative codes for the events 351 * If they can't, this will look at alternative codes for the events
351 * and see if any combination of alternative codes is feasible. 352 * and see if any combination of alternative codes is feasible.
352 * The feasible set is returned in event_id[]. 353 * The feasible set is returned in event_id[].
353 */ 354 */
354 static int power_check_constraints(struct cpu_hw_events *cpuhw, 355 static int power_check_constraints(struct cpu_hw_events *cpuhw,
355 u64 event_id[], unsigned int cflags[], 356 u64 event_id[], unsigned int cflags[],
356 int n_ev) 357 int n_ev)
357 { 358 {
358 unsigned long mask, value, nv; 359 unsigned long mask, value, nv;
359 unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; 360 unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
360 int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS]; 361 int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS];
361 int i, j; 362 int i, j;
362 unsigned long addf = ppmu->add_fields; 363 unsigned long addf = ppmu->add_fields;
363 unsigned long tadd = ppmu->test_adder; 364 unsigned long tadd = ppmu->test_adder;
364 365
365 if (n_ev > ppmu->n_counter) 366 if (n_ev > ppmu->n_counter)
366 return -1; 367 return -1;
367 368
368 /* First see if the events will go on as-is */ 369 /* First see if the events will go on as-is */
369 for (i = 0; i < n_ev; ++i) { 370 for (i = 0; i < n_ev; ++i) {
370 if ((cflags[i] & PPMU_LIMITED_PMC_REQD) 371 if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
371 && !ppmu->limited_pmc_event(event_id[i])) { 372 && !ppmu->limited_pmc_event(event_id[i])) {
372 ppmu->get_alternatives(event_id[i], cflags[i], 373 ppmu->get_alternatives(event_id[i], cflags[i],
373 cpuhw->alternatives[i]); 374 cpuhw->alternatives[i]);
374 event_id[i] = cpuhw->alternatives[i][0]; 375 event_id[i] = cpuhw->alternatives[i][0];
375 } 376 }
376 if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], 377 if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
377 &cpuhw->avalues[i][0])) 378 &cpuhw->avalues[i][0]))
378 return -1; 379 return -1;
379 } 380 }
380 value = mask = 0; 381 value = mask = 0;
381 for (i = 0; i < n_ev; ++i) { 382 for (i = 0; i < n_ev; ++i) {
382 nv = (value | cpuhw->avalues[i][0]) + 383 nv = (value | cpuhw->avalues[i][0]) +
383 (value & cpuhw->avalues[i][0] & addf); 384 (value & cpuhw->avalues[i][0] & addf);
384 if ((((nv + tadd) ^ value) & mask) != 0 || 385 if ((((nv + tadd) ^ value) & mask) != 0 ||
385 (((nv + tadd) ^ cpuhw->avalues[i][0]) & 386 (((nv + tadd) ^ cpuhw->avalues[i][0]) &
386 cpuhw->amasks[i][0]) != 0) 387 cpuhw->amasks[i][0]) != 0)
387 break; 388 break;
388 value = nv; 389 value = nv;
389 mask |= cpuhw->amasks[i][0]; 390 mask |= cpuhw->amasks[i][0];
390 } 391 }
391 if (i == n_ev) 392 if (i == n_ev)
392 return 0; /* all OK */ 393 return 0; /* all OK */
393 394
394 /* doesn't work, gather alternatives... */ 395 /* doesn't work, gather alternatives... */
395 if (!ppmu->get_alternatives) 396 if (!ppmu->get_alternatives)
396 return -1; 397 return -1;
397 for (i = 0; i < n_ev; ++i) { 398 for (i = 0; i < n_ev; ++i) {
398 choice[i] = 0; 399 choice[i] = 0;
399 n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i], 400 n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i],
400 cpuhw->alternatives[i]); 401 cpuhw->alternatives[i]);
401 for (j = 1; j < n_alt[i]; ++j) 402 for (j = 1; j < n_alt[i]; ++j)
402 ppmu->get_constraint(cpuhw->alternatives[i][j], 403 ppmu->get_constraint(cpuhw->alternatives[i][j],
403 &cpuhw->amasks[i][j], 404 &cpuhw->amasks[i][j],
404 &cpuhw->avalues[i][j]); 405 &cpuhw->avalues[i][j]);
405 } 406 }
406 407
407 /* enumerate all possibilities and see if any will work */ 408 /* enumerate all possibilities and see if any will work */
408 i = 0; 409 i = 0;
409 j = -1; 410 j = -1;
410 value = mask = nv = 0; 411 value = mask = nv = 0;
411 while (i < n_ev) { 412 while (i < n_ev) {
412 if (j >= 0) { 413 if (j >= 0) {
413 /* we're backtracking, restore context */ 414 /* we're backtracking, restore context */
414 value = svalues[i]; 415 value = svalues[i];
415 mask = smasks[i]; 416 mask = smasks[i];
416 j = choice[i]; 417 j = choice[i];
417 } 418 }
418 /* 419 /*
419 * See if any alternative k for event_id i, 420 * See if any alternative k for event_id i,
420 * where k > j, will satisfy the constraints. 421 * where k > j, will satisfy the constraints.
421 */ 422 */
422 while (++j < n_alt[i]) { 423 while (++j < n_alt[i]) {
423 nv = (value | cpuhw->avalues[i][j]) + 424 nv = (value | cpuhw->avalues[i][j]) +
424 (value & cpuhw->avalues[i][j] & addf); 425 (value & cpuhw->avalues[i][j] & addf);
425 if ((((nv + tadd) ^ value) & mask) == 0 && 426 if ((((nv + tadd) ^ value) & mask) == 0 &&
426 (((nv + tadd) ^ cpuhw->avalues[i][j]) 427 (((nv + tadd) ^ cpuhw->avalues[i][j])
427 & cpuhw->amasks[i][j]) == 0) 428 & cpuhw->amasks[i][j]) == 0)
428 break; 429 break;
429 } 430 }
430 if (j >= n_alt[i]) { 431 if (j >= n_alt[i]) {
431 /* 432 /*
432 * No feasible alternative, backtrack 433 * No feasible alternative, backtrack
433 * to event_id i-1 and continue enumerating its 434 * to event_id i-1 and continue enumerating its
434 * alternatives from where we got up to. 435 * alternatives from where we got up to.
435 */ 436 */
436 if (--i < 0) 437 if (--i < 0)
437 return -1; 438 return -1;
438 } else { 439 } else {
439 /* 440 /*
440 * Found a feasible alternative for event_id i, 441 * Found a feasible alternative for event_id i,
441 * remember where we got up to with this event_id, 442 * remember where we got up to with this event_id,
442 * go on to the next event_id, and start with 443 * go on to the next event_id, and start with
443 * the first alternative for it. 444 * the first alternative for it.
444 */ 445 */
445 choice[i] = j; 446 choice[i] = j;
446 svalues[i] = value; 447 svalues[i] = value;
447 smasks[i] = mask; 448 smasks[i] = mask;
448 value = nv; 449 value = nv;
449 mask |= cpuhw->amasks[i][j]; 450 mask |= cpuhw->amasks[i][j];
450 ++i; 451 ++i;
451 j = -1; 452 j = -1;
452 } 453 }
453 } 454 }
454 455
455 /* OK, we have a feasible combination, tell the caller the solution */ 456 /* OK, we have a feasible combination, tell the caller the solution */
456 for (i = 0; i < n_ev; ++i) 457 for (i = 0; i < n_ev; ++i)
457 event_id[i] = cpuhw->alternatives[i][choice[i]]; 458 event_id[i] = cpuhw->alternatives[i][choice[i]];
458 return 0; 459 return 0;
459 } 460 }
460 461
461 /* 462 /*
462 * Check if newly-added events have consistent settings for 463 * Check if newly-added events have consistent settings for
463 * exclude_{user,kernel,hv} with each other and any previously 464 * exclude_{user,kernel,hv} with each other and any previously
464 * added events. 465 * added events.
465 */ 466 */
466 static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], 467 static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
467 int n_prev, int n_new) 468 int n_prev, int n_new)
468 { 469 {
469 int eu = 0, ek = 0, eh = 0; 470 int eu = 0, ek = 0, eh = 0;
470 int i, n, first; 471 int i, n, first;
471 struct perf_event *event; 472 struct perf_event *event;
472 473
473 n = n_prev + n_new; 474 n = n_prev + n_new;
474 if (n <= 1) 475 if (n <= 1)
475 return 0; 476 return 0;
476 477
477 first = 1; 478 first = 1;
478 for (i = 0; i < n; ++i) { 479 for (i = 0; i < n; ++i) {
479 if (cflags[i] & PPMU_LIMITED_PMC_OK) { 480 if (cflags[i] & PPMU_LIMITED_PMC_OK) {
480 cflags[i] &= ~PPMU_LIMITED_PMC_REQD; 481 cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
481 continue; 482 continue;
482 } 483 }
483 event = ctrs[i]; 484 event = ctrs[i];
484 if (first) { 485 if (first) {
485 eu = event->attr.exclude_user; 486 eu = event->attr.exclude_user;
486 ek = event->attr.exclude_kernel; 487 ek = event->attr.exclude_kernel;
487 eh = event->attr.exclude_hv; 488 eh = event->attr.exclude_hv;
488 first = 0; 489 first = 0;
489 } else if (event->attr.exclude_user != eu || 490 } else if (event->attr.exclude_user != eu ||
490 event->attr.exclude_kernel != ek || 491 event->attr.exclude_kernel != ek ||
491 event->attr.exclude_hv != eh) { 492 event->attr.exclude_hv != eh) {
492 return -EAGAIN; 493 return -EAGAIN;
493 } 494 }
494 } 495 }
495 496
496 if (eu || ek || eh) 497 if (eu || ek || eh)
497 for (i = 0; i < n; ++i) 498 for (i = 0; i < n; ++i)
498 if (cflags[i] & PPMU_LIMITED_PMC_OK) 499 if (cflags[i] & PPMU_LIMITED_PMC_OK)
499 cflags[i] |= PPMU_LIMITED_PMC_REQD; 500 cflags[i] |= PPMU_LIMITED_PMC_REQD;
500 501
501 return 0; 502 return 0;
502 } 503 }
503 504
504 static u64 check_and_compute_delta(u64 prev, u64 val) 505 static u64 check_and_compute_delta(u64 prev, u64 val)
505 { 506 {
506 u64 delta = (val - prev) & 0xfffffffful; 507 u64 delta = (val - prev) & 0xfffffffful;
507 508
508 /* 509 /*
509 * POWER7 can roll back counter values, if the new value is smaller 510 * POWER7 can roll back counter values, if the new value is smaller
510 * than the previous value it will cause the delta and the counter to 511 * than the previous value it will cause the delta and the counter to
511 * have bogus values unless we rolled a counter over. If a coutner is 512 * have bogus values unless we rolled a counter over. If a coutner is
512 * rolled back, it will be smaller, but within 256, which is the maximum 513 * rolled back, it will be smaller, but within 256, which is the maximum
513 * number of events to rollback at once. If we dectect a rollback 514 * number of events to rollback at once. If we dectect a rollback
514 * return 0. This can lead to a small lack of precision in the 515 * return 0. This can lead to a small lack of precision in the
515 * counters. 516 * counters.
516 */ 517 */
517 if (prev > val && (prev - val) < 256) 518 if (prev > val && (prev - val) < 256)
518 delta = 0; 519 delta = 0;
519 520
520 return delta; 521 return delta;
521 } 522 }
522 523
523 static void power_pmu_read(struct perf_event *event) 524 static void power_pmu_read(struct perf_event *event)
524 { 525 {
525 s64 val, delta, prev; 526 s64 val, delta, prev;
526 527
527 if (event->hw.state & PERF_HES_STOPPED) 528 if (event->hw.state & PERF_HES_STOPPED)
528 return; 529 return;
529 530
530 if (!event->hw.idx) 531 if (!event->hw.idx)
531 return; 532 return;
532 /* 533 /*
533 * Performance monitor interrupts come even when interrupts 534 * Performance monitor interrupts come even when interrupts
534 * are soft-disabled, as long as interrupts are hard-enabled. 535 * are soft-disabled, as long as interrupts are hard-enabled.
535 * Therefore we treat them like NMIs. 536 * Therefore we treat them like NMIs.
536 */ 537 */
537 do { 538 do {
538 prev = local64_read(&event->hw.prev_count); 539 prev = local64_read(&event->hw.prev_count);
539 barrier(); 540 barrier();
540 val = read_pmc(event->hw.idx); 541 val = read_pmc(event->hw.idx);
541 delta = check_and_compute_delta(prev, val); 542 delta = check_and_compute_delta(prev, val);
542 if (!delta) 543 if (!delta)
543 return; 544 return;
544 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 545 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
545 546
546 local64_add(delta, &event->count); 547 local64_add(delta, &event->count);
547 local64_sub(delta, &event->hw.period_left); 548 local64_sub(delta, &event->hw.period_left);
548 } 549 }
549 550
550 /* 551 /*
551 * On some machines, PMC5 and PMC6 can't be written, don't respect 552 * On some machines, PMC5 and PMC6 can't be written, don't respect
552 * the freeze conditions, and don't generate interrupts. This tells 553 * the freeze conditions, and don't generate interrupts. This tells
553 * us if `event' is using such a PMC. 554 * us if `event' is using such a PMC.
554 */ 555 */
555 static int is_limited_pmc(int pmcnum) 556 static int is_limited_pmc(int pmcnum)
556 { 557 {
557 return (ppmu->flags & PPMU_LIMITED_PMC5_6) 558 return (ppmu->flags & PPMU_LIMITED_PMC5_6)
558 && (pmcnum == 5 || pmcnum == 6); 559 && (pmcnum == 5 || pmcnum == 6);
559 } 560 }
560 561
561 static void freeze_limited_counters(struct cpu_hw_events *cpuhw, 562 static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
562 unsigned long pmc5, unsigned long pmc6) 563 unsigned long pmc5, unsigned long pmc6)
563 { 564 {
564 struct perf_event *event; 565 struct perf_event *event;
565 u64 val, prev, delta; 566 u64 val, prev, delta;
566 int i; 567 int i;
567 568
568 for (i = 0; i < cpuhw->n_limited; ++i) { 569 for (i = 0; i < cpuhw->n_limited; ++i) {
569 event = cpuhw->limited_counter[i]; 570 event = cpuhw->limited_counter[i];
570 if (!event->hw.idx) 571 if (!event->hw.idx)
571 continue; 572 continue;
572 val = (event->hw.idx == 5) ? pmc5 : pmc6; 573 val = (event->hw.idx == 5) ? pmc5 : pmc6;
573 prev = local64_read(&event->hw.prev_count); 574 prev = local64_read(&event->hw.prev_count);
574 event->hw.idx = 0; 575 event->hw.idx = 0;
575 delta = check_and_compute_delta(prev, val); 576 delta = check_and_compute_delta(prev, val);
576 if (delta) 577 if (delta)
577 local64_add(delta, &event->count); 578 local64_add(delta, &event->count);
578 } 579 }
579 } 580 }
580 581
581 static void thaw_limited_counters(struct cpu_hw_events *cpuhw, 582 static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
582 unsigned long pmc5, unsigned long pmc6) 583 unsigned long pmc5, unsigned long pmc6)
583 { 584 {
584 struct perf_event *event; 585 struct perf_event *event;
585 u64 val, prev; 586 u64 val, prev;
586 int i; 587 int i;
587 588
588 for (i = 0; i < cpuhw->n_limited; ++i) { 589 for (i = 0; i < cpuhw->n_limited; ++i) {
589 event = cpuhw->limited_counter[i]; 590 event = cpuhw->limited_counter[i];
590 event->hw.idx = cpuhw->limited_hwidx[i]; 591 event->hw.idx = cpuhw->limited_hwidx[i];
591 val = (event->hw.idx == 5) ? pmc5 : pmc6; 592 val = (event->hw.idx == 5) ? pmc5 : pmc6;
592 prev = local64_read(&event->hw.prev_count); 593 prev = local64_read(&event->hw.prev_count);
593 if (check_and_compute_delta(prev, val)) 594 if (check_and_compute_delta(prev, val))
594 local64_set(&event->hw.prev_count, val); 595 local64_set(&event->hw.prev_count, val);
595 perf_event_update_userpage(event); 596 perf_event_update_userpage(event);
596 } 597 }
597 } 598 }
598 599
599 /* 600 /*
600 * Since limited events don't respect the freeze conditions, we 601 * Since limited events don't respect the freeze conditions, we
601 * have to read them immediately after freezing or unfreezing the 602 * have to read them immediately after freezing or unfreezing the
602 * other events. We try to keep the values from the limited 603 * other events. We try to keep the values from the limited
603 * events as consistent as possible by keeping the delay (in 604 * events as consistent as possible by keeping the delay (in
604 * cycles and instructions) between freezing/unfreezing and reading 605 * cycles and instructions) between freezing/unfreezing and reading
605 * the limited events as small and consistent as possible. 606 * the limited events as small and consistent as possible.
606 * Therefore, if any limited events are in use, we read them 607 * Therefore, if any limited events are in use, we read them
607 * both, and always in the same order, to minimize variability, 608 * both, and always in the same order, to minimize variability,
608 * and do it inside the same asm that writes MMCR0. 609 * and do it inside the same asm that writes MMCR0.
609 */ 610 */
610 static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) 611 static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
611 { 612 {
612 unsigned long pmc5, pmc6; 613 unsigned long pmc5, pmc6;
613 614
614 if (!cpuhw->n_limited) { 615 if (!cpuhw->n_limited) {
615 mtspr(SPRN_MMCR0, mmcr0); 616 mtspr(SPRN_MMCR0, mmcr0);
616 return; 617 return;
617 } 618 }
618 619
619 /* 620 /*
620 * Write MMCR0, then read PMC5 and PMC6 immediately. 621 * Write MMCR0, then read PMC5 and PMC6 immediately.
621 * To ensure we don't get a performance monitor interrupt 622 * To ensure we don't get a performance monitor interrupt
622 * between writing MMCR0 and freezing/thawing the limited 623 * between writing MMCR0 and freezing/thawing the limited
623 * events, we first write MMCR0 with the event overflow 624 * events, we first write MMCR0 with the event overflow
624 * interrupt enable bits turned off. 625 * interrupt enable bits turned off.
625 */ 626 */
626 asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" 627 asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
627 : "=&r" (pmc5), "=&r" (pmc6) 628 : "=&r" (pmc5), "=&r" (pmc6)
628 : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), 629 : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
629 "i" (SPRN_MMCR0), 630 "i" (SPRN_MMCR0),
630 "i" (SPRN_PMC5), "i" (SPRN_PMC6)); 631 "i" (SPRN_PMC5), "i" (SPRN_PMC6));
631 632
632 if (mmcr0 & MMCR0_FC) 633 if (mmcr0 & MMCR0_FC)
633 freeze_limited_counters(cpuhw, pmc5, pmc6); 634 freeze_limited_counters(cpuhw, pmc5, pmc6);
634 else 635 else
635 thaw_limited_counters(cpuhw, pmc5, pmc6); 636 thaw_limited_counters(cpuhw, pmc5, pmc6);
636 637
637 /* 638 /*
638 * Write the full MMCR0 including the event overflow interrupt 639 * Write the full MMCR0 including the event overflow interrupt
639 * enable bits, if necessary. 640 * enable bits, if necessary.
640 */ 641 */
641 if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) 642 if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
642 mtspr(SPRN_MMCR0, mmcr0); 643 mtspr(SPRN_MMCR0, mmcr0);
643 } 644 }
644 645
645 /* 646 /*
646 * Disable all events to prevent PMU interrupts and to allow 647 * Disable all events to prevent PMU interrupts and to allow
647 * events to be added or removed. 648 * events to be added or removed.
648 */ 649 */
649 static void power_pmu_disable(struct pmu *pmu) 650 static void power_pmu_disable(struct pmu *pmu)
650 { 651 {
651 struct cpu_hw_events *cpuhw; 652 struct cpu_hw_events *cpuhw;
652 unsigned long flags; 653 unsigned long flags;
653 654
654 if (!ppmu) 655 if (!ppmu)
655 return; 656 return;
656 local_irq_save(flags); 657 local_irq_save(flags);
657 cpuhw = &__get_cpu_var(cpu_hw_events); 658 cpuhw = &__get_cpu_var(cpu_hw_events);
658 659
659 if (!cpuhw->disabled) { 660 if (!cpuhw->disabled) {
660 cpuhw->disabled = 1; 661 cpuhw->disabled = 1;
661 cpuhw->n_added = 0; 662 cpuhw->n_added = 0;
662 663
663 /* 664 /*
664 * Check if we ever enabled the PMU on this cpu. 665 * Check if we ever enabled the PMU on this cpu.
665 */ 666 */
666 if (!cpuhw->pmcs_enabled) { 667 if (!cpuhw->pmcs_enabled) {
667 ppc_enable_pmcs(); 668 ppc_enable_pmcs();
668 cpuhw->pmcs_enabled = 1; 669 cpuhw->pmcs_enabled = 1;
669 } 670 }
670 671
671 /* 672 /*
672 * Disable instruction sampling if it was enabled 673 * Disable instruction sampling if it was enabled
673 */ 674 */
674 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { 675 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
675 mtspr(SPRN_MMCRA, 676 mtspr(SPRN_MMCRA,
676 cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 677 cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
677 mb(); 678 mb();
678 } 679 }
679 680
680 /* 681 /*
681 * Set the 'freeze counters' bit. 682 * Set the 'freeze counters' bit.
682 * The barrier is to make sure the mtspr has been 683 * The barrier is to make sure the mtspr has been
683 * executed and the PMU has frozen the events 684 * executed and the PMU has frozen the events
684 * before we return. 685 * before we return.
685 */ 686 */
686 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); 687 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
687 mb(); 688 mb();
688 } 689 }
689 local_irq_restore(flags); 690 local_irq_restore(flags);
690 } 691 }
691 692
692 /* 693 /*
693 * Re-enable all events if disable == 0. 694 * Re-enable all events if disable == 0.
694 * If we were previously disabled and events were added, then 695 * If we were previously disabled and events were added, then
695 * put the new config on the PMU. 696 * put the new config on the PMU.
696 */ 697 */
697 static void power_pmu_enable(struct pmu *pmu) 698 static void power_pmu_enable(struct pmu *pmu)
698 { 699 {
699 struct perf_event *event; 700 struct perf_event *event;
700 struct cpu_hw_events *cpuhw; 701 struct cpu_hw_events *cpuhw;
701 unsigned long flags; 702 unsigned long flags;
702 long i; 703 long i;
703 unsigned long val; 704 unsigned long val;
704 s64 left; 705 s64 left;
705 unsigned int hwc_index[MAX_HWEVENTS]; 706 unsigned int hwc_index[MAX_HWEVENTS];
706 int n_lim; 707 int n_lim;
707 int idx; 708 int idx;
708 709
709 if (!ppmu) 710 if (!ppmu)
710 return; 711 return;
711 local_irq_save(flags); 712 local_irq_save(flags);
712 cpuhw = &__get_cpu_var(cpu_hw_events); 713 cpuhw = &__get_cpu_var(cpu_hw_events);
713 if (!cpuhw->disabled) { 714 if (!cpuhw->disabled) {
714 local_irq_restore(flags); 715 local_irq_restore(flags);
715 return; 716 return;
716 } 717 }
717 cpuhw->disabled = 0; 718 cpuhw->disabled = 0;
718 719
719 /* 720 /*
720 * If we didn't change anything, or only removed events, 721 * If we didn't change anything, or only removed events,
721 * no need to recalculate MMCR* settings and reset the PMCs. 722 * no need to recalculate MMCR* settings and reset the PMCs.
722 * Just reenable the PMU with the current MMCR* settings 723 * Just reenable the PMU with the current MMCR* settings
723 * (possibly updated for removal of events). 724 * (possibly updated for removal of events).
724 */ 725 */
725 if (!cpuhw->n_added) { 726 if (!cpuhw->n_added) {
726 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 727 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
727 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); 728 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
728 if (cpuhw->n_events == 0) 729 if (cpuhw->n_events == 0)
729 ppc_set_pmu_inuse(0); 730 ppc_set_pmu_inuse(0);
730 goto out_enable; 731 goto out_enable;
731 } 732 }
732 733
733 /* 734 /*
734 * Compute MMCR* values for the new set of events 735 * Compute MMCR* values for the new set of events
735 */ 736 */
736 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, 737 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
737 cpuhw->mmcr)) { 738 cpuhw->mmcr)) {
738 /* shouldn't ever get here */ 739 /* shouldn't ever get here */
739 printk(KERN_ERR "oops compute_mmcr failed\n"); 740 printk(KERN_ERR "oops compute_mmcr failed\n");
740 goto out; 741 goto out;
741 } 742 }
742 743
743 /* 744 /*
744 * Add in MMCR0 freeze bits corresponding to the 745 * Add in MMCR0 freeze bits corresponding to the
745 * attr.exclude_* bits for the first event. 746 * attr.exclude_* bits for the first event.
746 * We have already checked that all events have the 747 * We have already checked that all events have the
747 * same values for these bits as the first event. 748 * same values for these bits as the first event.
748 */ 749 */
749 event = cpuhw->event[0]; 750 event = cpuhw->event[0];
750 if (event->attr.exclude_user) 751 if (event->attr.exclude_user)
751 cpuhw->mmcr[0] |= MMCR0_FCP; 752 cpuhw->mmcr[0] |= MMCR0_FCP;
752 if (event->attr.exclude_kernel) 753 if (event->attr.exclude_kernel)
753 cpuhw->mmcr[0] |= freeze_events_kernel; 754 cpuhw->mmcr[0] |= freeze_events_kernel;
754 if (event->attr.exclude_hv) 755 if (event->attr.exclude_hv)
755 cpuhw->mmcr[0] |= MMCR0_FCHV; 756 cpuhw->mmcr[0] |= MMCR0_FCHV;
756 757
757 /* 758 /*
758 * Write the new configuration to MMCR* with the freeze 759 * Write the new configuration to MMCR* with the freeze
759 * bit set and set the hardware events to their initial values. 760 * bit set and set the hardware events to their initial values.
760 * Then unfreeze the events. 761 * Then unfreeze the events.
761 */ 762 */
762 ppc_set_pmu_inuse(1); 763 ppc_set_pmu_inuse(1);
763 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 764 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
764 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); 765 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
765 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) 766 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
766 | MMCR0_FC); 767 | MMCR0_FC);
767 768
768 /* 769 /*
769 * Read off any pre-existing events that need to move 770 * Read off any pre-existing events that need to move
770 * to another PMC. 771 * to another PMC.
771 */ 772 */
772 for (i = 0; i < cpuhw->n_events; ++i) { 773 for (i = 0; i < cpuhw->n_events; ++i) {
773 event = cpuhw->event[i]; 774 event = cpuhw->event[i];
774 if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) { 775 if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) {
775 power_pmu_read(event); 776 power_pmu_read(event);
776 write_pmc(event->hw.idx, 0); 777 write_pmc(event->hw.idx, 0);
777 event->hw.idx = 0; 778 event->hw.idx = 0;
778 } 779 }
779 } 780 }
780 781
781 /* 782 /*
782 * Initialize the PMCs for all the new and moved events. 783 * Initialize the PMCs for all the new and moved events.
783 */ 784 */
784 cpuhw->n_limited = n_lim = 0; 785 cpuhw->n_limited = n_lim = 0;
785 for (i = 0; i < cpuhw->n_events; ++i) { 786 for (i = 0; i < cpuhw->n_events; ++i) {
786 event = cpuhw->event[i]; 787 event = cpuhw->event[i];
787 if (event->hw.idx) 788 if (event->hw.idx)
788 continue; 789 continue;
789 idx = hwc_index[i] + 1; 790 idx = hwc_index[i] + 1;
790 if (is_limited_pmc(idx)) { 791 if (is_limited_pmc(idx)) {
791 cpuhw->limited_counter[n_lim] = event; 792 cpuhw->limited_counter[n_lim] = event;
792 cpuhw->limited_hwidx[n_lim] = idx; 793 cpuhw->limited_hwidx[n_lim] = idx;
793 ++n_lim; 794 ++n_lim;
794 continue; 795 continue;
795 } 796 }
796 val = 0; 797 val = 0;
797 if (event->hw.sample_period) { 798 if (event->hw.sample_period) {
798 left = local64_read(&event->hw.period_left); 799 left = local64_read(&event->hw.period_left);
799 if (left < 0x80000000L) 800 if (left < 0x80000000L)
800 val = 0x80000000L - left; 801 val = 0x80000000L - left;
801 } 802 }
802 local64_set(&event->hw.prev_count, val); 803 local64_set(&event->hw.prev_count, val);
803 event->hw.idx = idx; 804 event->hw.idx = idx;
804 if (event->hw.state & PERF_HES_STOPPED) 805 if (event->hw.state & PERF_HES_STOPPED)
805 val = 0; 806 val = 0;
806 write_pmc(idx, val); 807 write_pmc(idx, val);
807 perf_event_update_userpage(event); 808 perf_event_update_userpage(event);
808 } 809 }
809 cpuhw->n_limited = n_lim; 810 cpuhw->n_limited = n_lim;
810 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; 811 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
811 812
812 out_enable: 813 out_enable:
813 mb(); 814 mb();
814 write_mmcr0(cpuhw, cpuhw->mmcr[0]); 815 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
815 816
816 /* 817 /*
817 * Enable instruction sampling if necessary 818 * Enable instruction sampling if necessary
818 */ 819 */
819 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { 820 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
820 mb(); 821 mb();
821 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); 822 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
822 } 823 }
823 824
824 out: 825 out:
825 local_irq_restore(flags); 826 local_irq_restore(flags);
826 } 827 }
827 828
828 static int collect_events(struct perf_event *group, int max_count, 829 static int collect_events(struct perf_event *group, int max_count,
829 struct perf_event *ctrs[], u64 *events, 830 struct perf_event *ctrs[], u64 *events,
830 unsigned int *flags) 831 unsigned int *flags)
831 { 832 {
832 int n = 0; 833 int n = 0;
833 struct perf_event *event; 834 struct perf_event *event;
834 835
835 if (!is_software_event(group)) { 836 if (!is_software_event(group)) {
836 if (n >= max_count) 837 if (n >= max_count)
837 return -1; 838 return -1;
838 ctrs[n] = group; 839 ctrs[n] = group;
839 flags[n] = group->hw.event_base; 840 flags[n] = group->hw.event_base;
840 events[n++] = group->hw.config; 841 events[n++] = group->hw.config;
841 } 842 }
842 list_for_each_entry(event, &group->sibling_list, group_entry) { 843 list_for_each_entry(event, &group->sibling_list, group_entry) {
843 if (!is_software_event(event) && 844 if (!is_software_event(event) &&
844 event->state != PERF_EVENT_STATE_OFF) { 845 event->state != PERF_EVENT_STATE_OFF) {
845 if (n >= max_count) 846 if (n >= max_count)
846 return -1; 847 return -1;
847 ctrs[n] = event; 848 ctrs[n] = event;
848 flags[n] = event->hw.event_base; 849 flags[n] = event->hw.event_base;
849 events[n++] = event->hw.config; 850 events[n++] = event->hw.config;
850 } 851 }
851 } 852 }
852 return n; 853 return n;
853 } 854 }
854 855
855 /* 856 /*
856 * Add a event to the PMU. 857 * Add a event to the PMU.
857 * If all events are not already frozen, then we disable and 858 * If all events are not already frozen, then we disable and
858 * re-enable the PMU in order to get hw_perf_enable to do the 859 * re-enable the PMU in order to get hw_perf_enable to do the
859 * actual work of reconfiguring the PMU. 860 * actual work of reconfiguring the PMU.
860 */ 861 */
861 static int power_pmu_add(struct perf_event *event, int ef_flags) 862 static int power_pmu_add(struct perf_event *event, int ef_flags)
862 { 863 {
863 struct cpu_hw_events *cpuhw; 864 struct cpu_hw_events *cpuhw;
864 unsigned long flags; 865 unsigned long flags;
865 int n0; 866 int n0;
866 int ret = -EAGAIN; 867 int ret = -EAGAIN;
867 868
868 local_irq_save(flags); 869 local_irq_save(flags);
869 perf_pmu_disable(event->pmu); 870 perf_pmu_disable(event->pmu);
870 871
871 /* 872 /*
872 * Add the event to the list (if there is room) 873 * Add the event to the list (if there is room)
873 * and check whether the total set is still feasible. 874 * and check whether the total set is still feasible.
874 */ 875 */
875 cpuhw = &__get_cpu_var(cpu_hw_events); 876 cpuhw = &__get_cpu_var(cpu_hw_events);
876 n0 = cpuhw->n_events; 877 n0 = cpuhw->n_events;
877 if (n0 >= ppmu->n_counter) 878 if (n0 >= ppmu->n_counter)
878 goto out; 879 goto out;
879 cpuhw->event[n0] = event; 880 cpuhw->event[n0] = event;
880 cpuhw->events[n0] = event->hw.config; 881 cpuhw->events[n0] = event->hw.config;
881 cpuhw->flags[n0] = event->hw.event_base; 882 cpuhw->flags[n0] = event->hw.event_base;
882 883
883 /* 884 /*
884 * This event may have been disabled/stopped in record_and_restart() 885 * This event may have been disabled/stopped in record_and_restart()
885 * because we exceeded the ->event_limit. If re-starting the event, 886 * because we exceeded the ->event_limit. If re-starting the event,
886 * clear the ->hw.state (STOPPED and UPTODATE flags), so the user 887 * clear the ->hw.state (STOPPED and UPTODATE flags), so the user
887 * notification is re-enabled. 888 * notification is re-enabled.
888 */ 889 */
889 if (!(ef_flags & PERF_EF_START)) 890 if (!(ef_flags & PERF_EF_START))
890 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 891 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
891 else 892 else
892 event->hw.state = 0; 893 event->hw.state = 0;
893 894
894 /* 895 /*
895 * If group events scheduling transaction was started, 896 * If group events scheduling transaction was started,
896 * skip the schedulability test here, it will be performed 897 * skip the schedulability test here, it will be performed
897 * at commit time(->commit_txn) as a whole 898 * at commit time(->commit_txn) as a whole
898 */ 899 */
899 if (cpuhw->group_flag & PERF_EVENT_TXN) 900 if (cpuhw->group_flag & PERF_EVENT_TXN)
900 goto nocheck; 901 goto nocheck;
901 902
902 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) 903 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
903 goto out; 904 goto out;
904 if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) 905 if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
905 goto out; 906 goto out;
906 event->hw.config = cpuhw->events[n0]; 907 event->hw.config = cpuhw->events[n0];
907 908
908 nocheck: 909 nocheck:
909 ++cpuhw->n_events; 910 ++cpuhw->n_events;
910 ++cpuhw->n_added; 911 ++cpuhw->n_added;
911 912
912 ret = 0; 913 ret = 0;
913 out: 914 out:
914 perf_pmu_enable(event->pmu); 915 perf_pmu_enable(event->pmu);
915 local_irq_restore(flags); 916 local_irq_restore(flags);
916 return ret; 917 return ret;
917 } 918 }
918 919
919 /* 920 /*
920 * Remove a event from the PMU. 921 * Remove a event from the PMU.
921 */ 922 */
922 static void power_pmu_del(struct perf_event *event, int ef_flags) 923 static void power_pmu_del(struct perf_event *event, int ef_flags)
923 { 924 {
924 struct cpu_hw_events *cpuhw; 925 struct cpu_hw_events *cpuhw;
925 long i; 926 long i;
926 unsigned long flags; 927 unsigned long flags;
927 928
928 local_irq_save(flags); 929 local_irq_save(flags);
929 perf_pmu_disable(event->pmu); 930 perf_pmu_disable(event->pmu);
930 931
931 power_pmu_read(event); 932 power_pmu_read(event);
932 933
933 cpuhw = &__get_cpu_var(cpu_hw_events); 934 cpuhw = &__get_cpu_var(cpu_hw_events);
934 for (i = 0; i < cpuhw->n_events; ++i) { 935 for (i = 0; i < cpuhw->n_events; ++i) {
935 if (event == cpuhw->event[i]) { 936 if (event == cpuhw->event[i]) {
936 while (++i < cpuhw->n_events) { 937 while (++i < cpuhw->n_events) {
937 cpuhw->event[i-1] = cpuhw->event[i]; 938 cpuhw->event[i-1] = cpuhw->event[i];
938 cpuhw->events[i-1] = cpuhw->events[i]; 939 cpuhw->events[i-1] = cpuhw->events[i];
939 cpuhw->flags[i-1] = cpuhw->flags[i]; 940 cpuhw->flags[i-1] = cpuhw->flags[i];
940 } 941 }
941 --cpuhw->n_events; 942 --cpuhw->n_events;
942 ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); 943 ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
943 if (event->hw.idx) { 944 if (event->hw.idx) {
944 write_pmc(event->hw.idx, 0); 945 write_pmc(event->hw.idx, 0);
945 event->hw.idx = 0; 946 event->hw.idx = 0;
946 } 947 }
947 perf_event_update_userpage(event); 948 perf_event_update_userpage(event);
948 break; 949 break;
949 } 950 }
950 } 951 }
951 for (i = 0; i < cpuhw->n_limited; ++i) 952 for (i = 0; i < cpuhw->n_limited; ++i)
952 if (event == cpuhw->limited_counter[i]) 953 if (event == cpuhw->limited_counter[i])
953 break; 954 break;
954 if (i < cpuhw->n_limited) { 955 if (i < cpuhw->n_limited) {
955 while (++i < cpuhw->n_limited) { 956 while (++i < cpuhw->n_limited) {
956 cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; 957 cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
957 cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; 958 cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
958 } 959 }
959 --cpuhw->n_limited; 960 --cpuhw->n_limited;
960 } 961 }
961 if (cpuhw->n_events == 0) { 962 if (cpuhw->n_events == 0) {
962 /* disable exceptions if no events are running */ 963 /* disable exceptions if no events are running */
963 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); 964 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
964 } 965 }
965 966
966 perf_pmu_enable(event->pmu); 967 perf_pmu_enable(event->pmu);
967 local_irq_restore(flags); 968 local_irq_restore(flags);
968 } 969 }
969 970
970 /* 971 /*
971 * POWER-PMU does not support disabling individual counters, hence 972 * POWER-PMU does not support disabling individual counters, hence
972 * program their cycle counter to their max value and ignore the interrupts. 973 * program their cycle counter to their max value and ignore the interrupts.
973 */ 974 */
974 975
975 static void power_pmu_start(struct perf_event *event, int ef_flags) 976 static void power_pmu_start(struct perf_event *event, int ef_flags)
976 { 977 {
977 unsigned long flags; 978 unsigned long flags;
978 s64 left; 979 s64 left;
979 unsigned long val; 980 unsigned long val;
980 981
981 if (!event->hw.idx || !event->hw.sample_period) 982 if (!event->hw.idx || !event->hw.sample_period)
982 return; 983 return;
983 984
984 if (!(event->hw.state & PERF_HES_STOPPED)) 985 if (!(event->hw.state & PERF_HES_STOPPED))
985 return; 986 return;
986 987
987 if (ef_flags & PERF_EF_RELOAD) 988 if (ef_flags & PERF_EF_RELOAD)
988 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 989 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
989 990
990 local_irq_save(flags); 991 local_irq_save(flags);
991 perf_pmu_disable(event->pmu); 992 perf_pmu_disable(event->pmu);
992 993
993 event->hw.state = 0; 994 event->hw.state = 0;
994 left = local64_read(&event->hw.period_left); 995 left = local64_read(&event->hw.period_left);
995 996
996 val = 0; 997 val = 0;
997 if (left < 0x80000000L) 998 if (left < 0x80000000L)
998 val = 0x80000000L - left; 999 val = 0x80000000L - left;
999 1000
1000 write_pmc(event->hw.idx, val); 1001 write_pmc(event->hw.idx, val);
1001 1002
1002 perf_event_update_userpage(event); 1003 perf_event_update_userpage(event);
1003 perf_pmu_enable(event->pmu); 1004 perf_pmu_enable(event->pmu);
1004 local_irq_restore(flags); 1005 local_irq_restore(flags);
1005 } 1006 }
1006 1007
1007 static void power_pmu_stop(struct perf_event *event, int ef_flags) 1008 static void power_pmu_stop(struct perf_event *event, int ef_flags)
1008 { 1009 {
1009 unsigned long flags; 1010 unsigned long flags;
1010 1011
1011 if (!event->hw.idx || !event->hw.sample_period) 1012 if (!event->hw.idx || !event->hw.sample_period)
1012 return; 1013 return;
1013 1014
1014 if (event->hw.state & PERF_HES_STOPPED) 1015 if (event->hw.state & PERF_HES_STOPPED)
1015 return; 1016 return;
1016 1017
1017 local_irq_save(flags); 1018 local_irq_save(flags);
1018 perf_pmu_disable(event->pmu); 1019 perf_pmu_disable(event->pmu);
1019 1020
1020 power_pmu_read(event); 1021 power_pmu_read(event);
1021 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 1022 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
1022 write_pmc(event->hw.idx, 0); 1023 write_pmc(event->hw.idx, 0);
1023 1024
1024 perf_event_update_userpage(event); 1025 perf_event_update_userpage(event);
1025 perf_pmu_enable(event->pmu); 1026 perf_pmu_enable(event->pmu);
1026 local_irq_restore(flags); 1027 local_irq_restore(flags);
1027 } 1028 }
1028 1029
1029 /* 1030 /*
1030 * Start group events scheduling transaction 1031 * Start group events scheduling transaction
1031 * Set the flag to make pmu::enable() not perform the 1032 * Set the flag to make pmu::enable() not perform the
1032 * schedulability test, it will be performed at commit time 1033 * schedulability test, it will be performed at commit time
1033 */ 1034 */
1034 void power_pmu_start_txn(struct pmu *pmu) 1035 void power_pmu_start_txn(struct pmu *pmu)
1035 { 1036 {
1036 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1037 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1037 1038
1038 perf_pmu_disable(pmu); 1039 perf_pmu_disable(pmu);
1039 cpuhw->group_flag |= PERF_EVENT_TXN; 1040 cpuhw->group_flag |= PERF_EVENT_TXN;
1040 cpuhw->n_txn_start = cpuhw->n_events; 1041 cpuhw->n_txn_start = cpuhw->n_events;
1041 } 1042 }
1042 1043
1043 /* 1044 /*
1044 * Stop group events scheduling transaction 1045 * Stop group events scheduling transaction
1045 * Clear the flag and pmu::enable() will perform the 1046 * Clear the flag and pmu::enable() will perform the
1046 * schedulability test. 1047 * schedulability test.
1047 */ 1048 */
1048 void power_pmu_cancel_txn(struct pmu *pmu) 1049 void power_pmu_cancel_txn(struct pmu *pmu)
1049 { 1050 {
1050 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1051 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1051 1052
1052 cpuhw->group_flag &= ~PERF_EVENT_TXN; 1053 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1053 perf_pmu_enable(pmu); 1054 perf_pmu_enable(pmu);
1054 } 1055 }
1055 1056
1056 /* 1057 /*
1057 * Commit group events scheduling transaction 1058 * Commit group events scheduling transaction
1058 * Perform the group schedulability test as a whole 1059 * Perform the group schedulability test as a whole
1059 * Return 0 if success 1060 * Return 0 if success
1060 */ 1061 */
1061 int power_pmu_commit_txn(struct pmu *pmu) 1062 int power_pmu_commit_txn(struct pmu *pmu)
1062 { 1063 {
1063 struct cpu_hw_events *cpuhw; 1064 struct cpu_hw_events *cpuhw;
1064 long i, n; 1065 long i, n;
1065 1066
1066 if (!ppmu) 1067 if (!ppmu)
1067 return -EAGAIN; 1068 return -EAGAIN;
1068 cpuhw = &__get_cpu_var(cpu_hw_events); 1069 cpuhw = &__get_cpu_var(cpu_hw_events);
1069 n = cpuhw->n_events; 1070 n = cpuhw->n_events;
1070 if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) 1071 if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
1071 return -EAGAIN; 1072 return -EAGAIN;
1072 i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); 1073 i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
1073 if (i < 0) 1074 if (i < 0)
1074 return -EAGAIN; 1075 return -EAGAIN;
1075 1076
1076 for (i = cpuhw->n_txn_start; i < n; ++i) 1077 for (i = cpuhw->n_txn_start; i < n; ++i)
1077 cpuhw->event[i]->hw.config = cpuhw->events[i]; 1078 cpuhw->event[i]->hw.config = cpuhw->events[i];
1078 1079
1079 cpuhw->group_flag &= ~PERF_EVENT_TXN; 1080 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1080 perf_pmu_enable(pmu); 1081 perf_pmu_enable(pmu);
1081 return 0; 1082 return 0;
1082 } 1083 }
1083 1084
1084 /* 1085 /*
1085 * Return 1 if we might be able to put event on a limited PMC, 1086 * Return 1 if we might be able to put event on a limited PMC,
1086 * or 0 if not. 1087 * or 0 if not.
1087 * A event can only go on a limited PMC if it counts something 1088 * A event can only go on a limited PMC if it counts something
1088 * that a limited PMC can count, doesn't require interrupts, and 1089 * that a limited PMC can count, doesn't require interrupts, and
1089 * doesn't exclude any processor mode. 1090 * doesn't exclude any processor mode.
1090 */ 1091 */
1091 static int can_go_on_limited_pmc(struct perf_event *event, u64 ev, 1092 static int can_go_on_limited_pmc(struct perf_event *event, u64 ev,
1092 unsigned int flags) 1093 unsigned int flags)
1093 { 1094 {
1094 int n; 1095 int n;
1095 u64 alt[MAX_EVENT_ALTERNATIVES]; 1096 u64 alt[MAX_EVENT_ALTERNATIVES];
1096 1097
1097 if (event->attr.exclude_user 1098 if (event->attr.exclude_user
1098 || event->attr.exclude_kernel 1099 || event->attr.exclude_kernel
1099 || event->attr.exclude_hv 1100 || event->attr.exclude_hv
1100 || event->attr.sample_period) 1101 || event->attr.sample_period)
1101 return 0; 1102 return 0;
1102 1103
1103 if (ppmu->limited_pmc_event(ev)) 1104 if (ppmu->limited_pmc_event(ev))
1104 return 1; 1105 return 1;
1105 1106
1106 /* 1107 /*
1107 * The requested event_id isn't on a limited PMC already; 1108 * The requested event_id isn't on a limited PMC already;
1108 * see if any alternative code goes on a limited PMC. 1109 * see if any alternative code goes on a limited PMC.
1109 */ 1110 */
1110 if (!ppmu->get_alternatives) 1111 if (!ppmu->get_alternatives)
1111 return 0; 1112 return 0;
1112 1113
1113 flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; 1114 flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
1114 n = ppmu->get_alternatives(ev, flags, alt); 1115 n = ppmu->get_alternatives(ev, flags, alt);
1115 1116
1116 return n > 0; 1117 return n > 0;
1117 } 1118 }
1118 1119
1119 /* 1120 /*
1120 * Find an alternative event_id that goes on a normal PMC, if possible, 1121 * Find an alternative event_id that goes on a normal PMC, if possible,
1121 * and return the event_id code, or 0 if there is no such alternative. 1122 * and return the event_id code, or 0 if there is no such alternative.
1122 * (Note: event_id code 0 is "don't count" on all machines.) 1123 * (Note: event_id code 0 is "don't count" on all machines.)
1123 */ 1124 */
1124 static u64 normal_pmc_alternative(u64 ev, unsigned long flags) 1125 static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
1125 { 1126 {
1126 u64 alt[MAX_EVENT_ALTERNATIVES]; 1127 u64 alt[MAX_EVENT_ALTERNATIVES];
1127 int n; 1128 int n;
1128 1129
1129 flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); 1130 flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
1130 n = ppmu->get_alternatives(ev, flags, alt); 1131 n = ppmu->get_alternatives(ev, flags, alt);
1131 if (!n) 1132 if (!n)
1132 return 0; 1133 return 0;
1133 return alt[0]; 1134 return alt[0];
1134 } 1135 }
1135 1136
1136 /* Number of perf_events counting hardware events */ 1137 /* Number of perf_events counting hardware events */
1137 static atomic_t num_events; 1138 static atomic_t num_events;
1138 /* Used to avoid races in calling reserve/release_pmc_hardware */ 1139 /* Used to avoid races in calling reserve/release_pmc_hardware */
1139 static DEFINE_MUTEX(pmc_reserve_mutex); 1140 static DEFINE_MUTEX(pmc_reserve_mutex);
1140 1141
1141 /* 1142 /*
1142 * Release the PMU if this is the last perf_event. 1143 * Release the PMU if this is the last perf_event.
1143 */ 1144 */
1144 static void hw_perf_event_destroy(struct perf_event *event) 1145 static void hw_perf_event_destroy(struct perf_event *event)
1145 { 1146 {
1146 if (!atomic_add_unless(&num_events, -1, 1)) { 1147 if (!atomic_add_unless(&num_events, -1, 1)) {
1147 mutex_lock(&pmc_reserve_mutex); 1148 mutex_lock(&pmc_reserve_mutex);
1148 if (atomic_dec_return(&num_events) == 0) 1149 if (atomic_dec_return(&num_events) == 0)
1149 release_pmc_hardware(); 1150 release_pmc_hardware();
1150 mutex_unlock(&pmc_reserve_mutex); 1151 mutex_unlock(&pmc_reserve_mutex);
1151 } 1152 }
1152 } 1153 }
1153 1154
1154 /* 1155 /*
1155 * Translate a generic cache event_id config to a raw event_id code. 1156 * Translate a generic cache event_id config to a raw event_id code.
1156 */ 1157 */
1157 static int hw_perf_cache_event(u64 config, u64 *eventp) 1158 static int hw_perf_cache_event(u64 config, u64 *eventp)
1158 { 1159 {
1159 unsigned long type, op, result; 1160 unsigned long type, op, result;
1160 int ev; 1161 int ev;
1161 1162
1162 if (!ppmu->cache_events) 1163 if (!ppmu->cache_events)
1163 return -EINVAL; 1164 return -EINVAL;
1164 1165
1165 /* unpack config */ 1166 /* unpack config */
1166 type = config & 0xff; 1167 type = config & 0xff;
1167 op = (config >> 8) & 0xff; 1168 op = (config >> 8) & 0xff;
1168 result = (config >> 16) & 0xff; 1169 result = (config >> 16) & 0xff;
1169 1170
1170 if (type >= PERF_COUNT_HW_CACHE_MAX || 1171 if (type >= PERF_COUNT_HW_CACHE_MAX ||
1171 op >= PERF_COUNT_HW_CACHE_OP_MAX || 1172 op >= PERF_COUNT_HW_CACHE_OP_MAX ||
1172 result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 1173 result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
1173 return -EINVAL; 1174 return -EINVAL;
1174 1175
1175 ev = (*ppmu->cache_events)[type][op][result]; 1176 ev = (*ppmu->cache_events)[type][op][result];
1176 if (ev == 0) 1177 if (ev == 0)
1177 return -EOPNOTSUPP; 1178 return -EOPNOTSUPP;
1178 if (ev == -1) 1179 if (ev == -1)
1179 return -EINVAL; 1180 return -EINVAL;
1180 *eventp = ev; 1181 *eventp = ev;
1181 return 0; 1182 return 0;
1182 } 1183 }
1183 1184
1184 static int power_pmu_event_init(struct perf_event *event) 1185 static int power_pmu_event_init(struct perf_event *event)
1185 { 1186 {
1186 u64 ev; 1187 u64 ev;
1187 unsigned long flags; 1188 unsigned long flags;
1188 struct perf_event *ctrs[MAX_HWEVENTS]; 1189 struct perf_event *ctrs[MAX_HWEVENTS];
1189 u64 events[MAX_HWEVENTS]; 1190 u64 events[MAX_HWEVENTS];
1190 unsigned int cflags[MAX_HWEVENTS]; 1191 unsigned int cflags[MAX_HWEVENTS];
1191 int n; 1192 int n;
1192 int err; 1193 int err;
1193 struct cpu_hw_events *cpuhw; 1194 struct cpu_hw_events *cpuhw;
1194 1195
1195 if (!ppmu) 1196 if (!ppmu)
1196 return -ENOENT; 1197 return -ENOENT;
1197 1198
1198 /* does not support taken branch sampling */ 1199 /* does not support taken branch sampling */
1199 if (has_branch_stack(event)) 1200 if (has_branch_stack(event))
1200 return -EOPNOTSUPP; 1201 return -EOPNOTSUPP;
1201 1202
1202 switch (event->attr.type) { 1203 switch (event->attr.type) {
1203 case PERF_TYPE_HARDWARE: 1204 case PERF_TYPE_HARDWARE:
1204 ev = event->attr.config; 1205 ev = event->attr.config;
1205 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 1206 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
1206 return -EOPNOTSUPP; 1207 return -EOPNOTSUPP;
1207 ev = ppmu->generic_events[ev]; 1208 ev = ppmu->generic_events[ev];
1208 break; 1209 break;
1209 case PERF_TYPE_HW_CACHE: 1210 case PERF_TYPE_HW_CACHE:
1210 err = hw_perf_cache_event(event->attr.config, &ev); 1211 err = hw_perf_cache_event(event->attr.config, &ev);
1211 if (err) 1212 if (err)
1212 return err; 1213 return err;
1213 break; 1214 break;
1214 case PERF_TYPE_RAW: 1215 case PERF_TYPE_RAW:
1215 ev = event->attr.config; 1216 ev = event->attr.config;
1216 break; 1217 break;
1217 default: 1218 default:
1218 return -ENOENT; 1219 return -ENOENT;
1219 } 1220 }
1220 1221
1221 event->hw.config_base = ev; 1222 event->hw.config_base = ev;
1222 event->hw.idx = 0; 1223 event->hw.idx = 0;
1223 1224
1224 /* 1225 /*
1225 * If we are not running on a hypervisor, force the 1226 * If we are not running on a hypervisor, force the
1226 * exclude_hv bit to 0 so that we don't care what 1227 * exclude_hv bit to 0 so that we don't care what
1227 * the user set it to. 1228 * the user set it to.
1228 */ 1229 */
1229 if (!firmware_has_feature(FW_FEATURE_LPAR)) 1230 if (!firmware_has_feature(FW_FEATURE_LPAR))
1230 event->attr.exclude_hv = 0; 1231 event->attr.exclude_hv = 0;
1231 1232
1232 /* 1233 /*
1233 * If this is a per-task event, then we can use 1234 * If this is a per-task event, then we can use
1234 * PM_RUN_* events interchangeably with their non RUN_* 1235 * PM_RUN_* events interchangeably with their non RUN_*
1235 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. 1236 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
1236 * XXX we should check if the task is an idle task. 1237 * XXX we should check if the task is an idle task.
1237 */ 1238 */
1238 flags = 0; 1239 flags = 0;
1239 if (event->attach_state & PERF_ATTACH_TASK) 1240 if (event->attach_state & PERF_ATTACH_TASK)
1240 flags |= PPMU_ONLY_COUNT_RUN; 1241 flags |= PPMU_ONLY_COUNT_RUN;
1241 1242
1242 /* 1243 /*
1243 * If this machine has limited events, check whether this 1244 * If this machine has limited events, check whether this
1244 * event_id could go on a limited event. 1245 * event_id could go on a limited event.
1245 */ 1246 */
1246 if (ppmu->flags & PPMU_LIMITED_PMC5_6) { 1247 if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
1247 if (can_go_on_limited_pmc(event, ev, flags)) { 1248 if (can_go_on_limited_pmc(event, ev, flags)) {
1248 flags |= PPMU_LIMITED_PMC_OK; 1249 flags |= PPMU_LIMITED_PMC_OK;
1249 } else if (ppmu->limited_pmc_event(ev)) { 1250 } else if (ppmu->limited_pmc_event(ev)) {
1250 /* 1251 /*
1251 * The requested event_id is on a limited PMC, 1252 * The requested event_id is on a limited PMC,
1252 * but we can't use a limited PMC; see if any 1253 * but we can't use a limited PMC; see if any
1253 * alternative goes on a normal PMC. 1254 * alternative goes on a normal PMC.
1254 */ 1255 */
1255 ev = normal_pmc_alternative(ev, flags); 1256 ev = normal_pmc_alternative(ev, flags);
1256 if (!ev) 1257 if (!ev)
1257 return -EINVAL; 1258 return -EINVAL;
1258 } 1259 }
1259 } 1260 }
1260 1261
1261 /* 1262 /*
1262 * If this is in a group, check if it can go on with all the 1263 * If this is in a group, check if it can go on with all the
1263 * other hardware events in the group. We assume the event 1264 * other hardware events in the group. We assume the event
1264 * hasn't been linked into its leader's sibling list at this point. 1265 * hasn't been linked into its leader's sibling list at this point.
1265 */ 1266 */
1266 n = 0; 1267 n = 0;
1267 if (event->group_leader != event) { 1268 if (event->group_leader != event) {
1268 n = collect_events(event->group_leader, ppmu->n_counter - 1, 1269 n = collect_events(event->group_leader, ppmu->n_counter - 1,
1269 ctrs, events, cflags); 1270 ctrs, events, cflags);
1270 if (n < 0) 1271 if (n < 0)
1271 return -EINVAL; 1272 return -EINVAL;
1272 } 1273 }
1273 events[n] = ev; 1274 events[n] = ev;
1274 ctrs[n] = event; 1275 ctrs[n] = event;
1275 cflags[n] = flags; 1276 cflags[n] = flags;
1276 if (check_excludes(ctrs, cflags, n, 1)) 1277 if (check_excludes(ctrs, cflags, n, 1))
1277 return -EINVAL; 1278 return -EINVAL;
1278 1279
1279 cpuhw = &get_cpu_var(cpu_hw_events); 1280 cpuhw = &get_cpu_var(cpu_hw_events);
1280 err = power_check_constraints(cpuhw, events, cflags, n + 1); 1281 err = power_check_constraints(cpuhw, events, cflags, n + 1);
1281 put_cpu_var(cpu_hw_events); 1282 put_cpu_var(cpu_hw_events);
1282 if (err) 1283 if (err)
1283 return -EINVAL; 1284 return -EINVAL;
1284 1285
1285 event->hw.config = events[n]; 1286 event->hw.config = events[n];
1286 event->hw.event_base = cflags[n]; 1287 event->hw.event_base = cflags[n];
1287 event->hw.last_period = event->hw.sample_period; 1288 event->hw.last_period = event->hw.sample_period;
1288 local64_set(&event->hw.period_left, event->hw.last_period); 1289 local64_set(&event->hw.period_left, event->hw.last_period);
1289 1290
1290 /* 1291 /*
1291 * See if we need to reserve the PMU. 1292 * See if we need to reserve the PMU.
1292 * If no events are currently in use, then we have to take a 1293 * If no events are currently in use, then we have to take a
1293 * mutex to ensure that we don't race with another task doing 1294 * mutex to ensure that we don't race with another task doing
1294 * reserve_pmc_hardware or release_pmc_hardware. 1295 * reserve_pmc_hardware or release_pmc_hardware.
1295 */ 1296 */
1296 err = 0; 1297 err = 0;
1297 if (!atomic_inc_not_zero(&num_events)) { 1298 if (!atomic_inc_not_zero(&num_events)) {
1298 mutex_lock(&pmc_reserve_mutex); 1299 mutex_lock(&pmc_reserve_mutex);
1299 if (atomic_read(&num_events) == 0 && 1300 if (atomic_read(&num_events) == 0 &&
1300 reserve_pmc_hardware(perf_event_interrupt)) 1301 reserve_pmc_hardware(perf_event_interrupt))
1301 err = -EBUSY; 1302 err = -EBUSY;
1302 else 1303 else
1303 atomic_inc(&num_events); 1304 atomic_inc(&num_events);
1304 mutex_unlock(&pmc_reserve_mutex); 1305 mutex_unlock(&pmc_reserve_mutex);
1305 } 1306 }
1306 event->destroy = hw_perf_event_destroy; 1307 event->destroy = hw_perf_event_destroy;
1307 1308
1308 return err; 1309 return err;
1309 } 1310 }
1310 1311
1311 static int power_pmu_event_idx(struct perf_event *event) 1312 static int power_pmu_event_idx(struct perf_event *event)
1312 { 1313 {
1313 return event->hw.idx; 1314 return event->hw.idx;
1314 } 1315 }
1315 1316
1316 ssize_t power_events_sysfs_show(struct device *dev, 1317 ssize_t power_events_sysfs_show(struct device *dev,
1317 struct device_attribute *attr, char *page) 1318 struct device_attribute *attr, char *page)
1318 { 1319 {
1319 struct perf_pmu_events_attr *pmu_attr; 1320 struct perf_pmu_events_attr *pmu_attr;
1320 1321
1321 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); 1322 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
1322 1323
1323 return sprintf(page, "event=0x%02llx\n", pmu_attr->id); 1324 return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
1324 } 1325 }
1325 1326
1326 struct pmu power_pmu = { 1327 struct pmu power_pmu = {
1327 .pmu_enable = power_pmu_enable, 1328 .pmu_enable = power_pmu_enable,
1328 .pmu_disable = power_pmu_disable, 1329 .pmu_disable = power_pmu_disable,
1329 .event_init = power_pmu_event_init, 1330 .event_init = power_pmu_event_init,
1330 .add = power_pmu_add, 1331 .add = power_pmu_add,
1331 .del = power_pmu_del, 1332 .del = power_pmu_del,
1332 .start = power_pmu_start, 1333 .start = power_pmu_start,
1333 .stop = power_pmu_stop, 1334 .stop = power_pmu_stop,
1334 .read = power_pmu_read, 1335 .read = power_pmu_read,
1335 .start_txn = power_pmu_start_txn, 1336 .start_txn = power_pmu_start_txn,
1336 .cancel_txn = power_pmu_cancel_txn, 1337 .cancel_txn = power_pmu_cancel_txn,
1337 .commit_txn = power_pmu_commit_txn, 1338 .commit_txn = power_pmu_commit_txn,
1338 .event_idx = power_pmu_event_idx, 1339 .event_idx = power_pmu_event_idx,
1339 }; 1340 };
1340 1341
1341 1342
1342 /* 1343 /*
1343 * A counter has overflowed; update its count and record 1344 * A counter has overflowed; update its count and record
1344 * things if requested. Note that interrupts are hard-disabled 1345 * things if requested. Note that interrupts are hard-disabled
1345 * here so there is no possibility of being interrupted. 1346 * here so there is no possibility of being interrupted.
1346 */ 1347 */
1347 static void record_and_restart(struct perf_event *event, unsigned long val, 1348 static void record_and_restart(struct perf_event *event, unsigned long val,
1348 struct pt_regs *regs) 1349 struct pt_regs *regs)
1349 { 1350 {
1350 u64 period = event->hw.sample_period; 1351 u64 period = event->hw.sample_period;
1351 s64 prev, delta, left; 1352 s64 prev, delta, left;
1352 int record = 0; 1353 int record = 0;
1353 1354
1354 if (event->hw.state & PERF_HES_STOPPED) { 1355 if (event->hw.state & PERF_HES_STOPPED) {
1355 write_pmc(event->hw.idx, 0); 1356 write_pmc(event->hw.idx, 0);
1356 return; 1357 return;
1357 } 1358 }
1358 1359
1359 /* we don't have to worry about interrupts here */ 1360 /* we don't have to worry about interrupts here */
1360 prev = local64_read(&event->hw.prev_count); 1361 prev = local64_read(&event->hw.prev_count);
1361 delta = check_and_compute_delta(prev, val); 1362 delta = check_and_compute_delta(prev, val);
1362 local64_add(delta, &event->count); 1363 local64_add(delta, &event->count);
1363 1364
1364 /* 1365 /*
1365 * See if the total period for this event has expired, 1366 * See if the total period for this event has expired,
1366 * and update for the next period. 1367 * and update for the next period.
1367 */ 1368 */
1368 val = 0; 1369 val = 0;
1369 left = local64_read(&event->hw.period_left) - delta; 1370 left = local64_read(&event->hw.period_left) - delta;
1370 if (delta == 0) 1371 if (delta == 0)
1371 left++; 1372 left++;
1372 if (period) { 1373 if (period) {
1373 if (left <= 0) { 1374 if (left <= 0) {
1374 left += period; 1375 left += period;
1375 if (left <= 0) 1376 if (left <= 0)
1376 left = period; 1377 left = period;
1377 record = siar_valid(regs); 1378 record = siar_valid(regs);
1378 event->hw.last_period = event->hw.sample_period; 1379 event->hw.last_period = event->hw.sample_period;
1379 } 1380 }
1380 if (left < 0x80000000LL) 1381 if (left < 0x80000000LL)
1381 val = 0x80000000LL - left; 1382 val = 0x80000000LL - left;
1382 } 1383 }
1383 1384
1384 write_pmc(event->hw.idx, val); 1385 write_pmc(event->hw.idx, val);
1385 local64_set(&event->hw.prev_count, val); 1386 local64_set(&event->hw.prev_count, val);
1386 local64_set(&event->hw.period_left, left); 1387 local64_set(&event->hw.period_left, left);
1387 perf_event_update_userpage(event); 1388 perf_event_update_userpage(event);
1388 1389
1389 /* 1390 /*
1390 * Finally record data if requested. 1391 * Finally record data if requested.
1391 */ 1392 */
1392 if (record) { 1393 if (record) {
1393 struct perf_sample_data data; 1394 struct perf_sample_data data;
1394 1395
1395 perf_sample_data_init(&data, ~0ULL, event->hw.last_period); 1396 perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
1396 1397
1397 if (event->attr.sample_type & PERF_SAMPLE_ADDR) 1398 if (event->attr.sample_type & PERF_SAMPLE_ADDR)
1398 perf_get_data_addr(regs, &data.addr); 1399 perf_get_data_addr(regs, &data.addr);
1399 1400
1400 if (perf_event_overflow(event, &data, regs)) 1401 if (perf_event_overflow(event, &data, regs))
1401 power_pmu_stop(event, 0); 1402 power_pmu_stop(event, 0);
1402 } 1403 }
1403 } 1404 }
1404 1405
1405 /* 1406 /*
1406 * Called from generic code to get the misc flags (i.e. processor mode) 1407 * Called from generic code to get the misc flags (i.e. processor mode)
1407 * for an event_id. 1408 * for an event_id.
1408 */ 1409 */
1409 unsigned long perf_misc_flags(struct pt_regs *regs) 1410 unsigned long perf_misc_flags(struct pt_regs *regs)
1410 { 1411 {
1411 u32 flags = perf_get_misc_flags(regs); 1412 u32 flags = perf_get_misc_flags(regs);
1412 1413
1413 if (flags) 1414 if (flags)
1414 return flags; 1415 return flags;
1415 return user_mode(regs) ? PERF_RECORD_MISC_USER : 1416 return user_mode(regs) ? PERF_RECORD_MISC_USER :
1416 PERF_RECORD_MISC_KERNEL; 1417 PERF_RECORD_MISC_KERNEL;
1417 } 1418 }
1418 1419
1419 /* 1420 /*
1420 * Called from generic code to get the instruction pointer 1421 * Called from generic code to get the instruction pointer
1421 * for an event_id. 1422 * for an event_id.
1422 */ 1423 */
1423 unsigned long perf_instruction_pointer(struct pt_regs *regs) 1424 unsigned long perf_instruction_pointer(struct pt_regs *regs)
1424 { 1425 {
1425 unsigned long use_siar = regs->result; 1426 unsigned long use_siar = regs->result;
1426 1427
1427 if (use_siar && siar_valid(regs)) 1428 if (use_siar && siar_valid(regs))
1428 return mfspr(SPRN_SIAR) + perf_ip_adjust(regs); 1429 return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
1429 else if (use_siar) 1430 else if (use_siar)
1430 return 0; // no valid instruction pointer 1431 return 0; // no valid instruction pointer
1431 else 1432 else
1432 return regs->nip; 1433 return regs->nip;
1433 } 1434 }
1434 1435
1435 static bool pmc_overflow_power7(unsigned long val) 1436 static bool pmc_overflow_power7(unsigned long val)
1436 { 1437 {
1437 /* 1438 /*
1438 * Events on POWER7 can roll back if a speculative event doesn't 1439 * Events on POWER7 can roll back if a speculative event doesn't
1439 * eventually complete. Unfortunately in some rare cases they will 1440 * eventually complete. Unfortunately in some rare cases they will
1440 * raise a performance monitor exception. We need to catch this to 1441 * raise a performance monitor exception. We need to catch this to
1441 * ensure we reset the PMC. In all cases the PMC will be 256 or less 1442 * ensure we reset the PMC. In all cases the PMC will be 256 or less
1442 * cycles from overflow. 1443 * cycles from overflow.
1443 * 1444 *
1444 * We only do this if the first pass fails to find any overflowing 1445 * We only do this if the first pass fails to find any overflowing
1445 * PMCs because a user might set a period of less than 256 and we 1446 * PMCs because a user might set a period of less than 256 and we
1446 * don't want to mistakenly reset them. 1447 * don't want to mistakenly reset them.
1447 */ 1448 */
1448 if ((0x80000000 - val) <= 256) 1449 if ((0x80000000 - val) <= 256)
1449 return true; 1450 return true;
1450 1451
1451 return false; 1452 return false;
1452 } 1453 }
1453 1454
1454 static bool pmc_overflow(unsigned long val) 1455 static bool pmc_overflow(unsigned long val)
1455 { 1456 {
1456 if ((int)val < 0) 1457 if ((int)val < 0)
1457 return true; 1458 return true;
1458 1459
1459 return false; 1460 return false;
1460 } 1461 }
1461 1462
1462 /* 1463 /*
1463 * Performance monitor interrupt stuff 1464 * Performance monitor interrupt stuff
1464 */ 1465 */
1465 static void perf_event_interrupt(struct pt_regs *regs) 1466 static void perf_event_interrupt(struct pt_regs *regs)
1466 { 1467 {
1467 int i, j; 1468 int i, j;
1468 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1469 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1469 struct perf_event *event; 1470 struct perf_event *event;
1470 unsigned long val[8]; 1471 unsigned long val[8];
1471 int found, active; 1472 int found, active;
1472 int nmi; 1473 int nmi;
1473 1474
1474 if (cpuhw->n_limited) 1475 if (cpuhw->n_limited)
1475 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), 1476 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
1476 mfspr(SPRN_PMC6)); 1477 mfspr(SPRN_PMC6));
1477 1478
1478 perf_read_regs(regs); 1479 perf_read_regs(regs);
1479 1480
1480 nmi = perf_intr_is_nmi(regs); 1481 nmi = perf_intr_is_nmi(regs);
1481 if (nmi) 1482 if (nmi)
1482 nmi_enter(); 1483 nmi_enter();
1483 else 1484 else
1484 irq_enter(); 1485 irq_enter();
1485 1486
1486 /* Read all the PMCs since we'll need them a bunch of times */ 1487 /* Read all the PMCs since we'll need them a bunch of times */
1487 for (i = 0; i < ppmu->n_counter; ++i) 1488 for (i = 0; i < ppmu->n_counter; ++i)
1488 val[i] = read_pmc(i + 1); 1489 val[i] = read_pmc(i + 1);
1489 1490
1490 /* Try to find what caused the IRQ */ 1491 /* Try to find what caused the IRQ */
1491 found = 0; 1492 found = 0;
1492 for (i = 0; i < ppmu->n_counter; ++i) { 1493 for (i = 0; i < ppmu->n_counter; ++i) {
1493 if (!pmc_overflow(val[i])) 1494 if (!pmc_overflow(val[i]))
1494 continue; 1495 continue;
1495 if (is_limited_pmc(i + 1)) 1496 if (is_limited_pmc(i + 1))
1496 continue; /* these won't generate IRQs */ 1497 continue; /* these won't generate IRQs */
1497 /* 1498 /*
1498 * We've found one that's overflowed. For active 1499 * We've found one that's overflowed. For active
1499 * counters we need to log this. For inactive 1500 * counters we need to log this. For inactive
1500 * counters, we need to reset it anyway 1501 * counters, we need to reset it anyway
1501 */ 1502 */
1502 found = 1; 1503 found = 1;
1503 active = 0; 1504 active = 0;
1504 for (j = 0; j < cpuhw->n_events; ++j) { 1505 for (j = 0; j < cpuhw->n_events; ++j) {
1505 event = cpuhw->event[j]; 1506 event = cpuhw->event[j];
1506 if (event->hw.idx == (i + 1)) { 1507 if (event->hw.idx == (i + 1)) {
1507 active = 1; 1508 active = 1;
1508 record_and_restart(event, val[i], regs); 1509 record_and_restart(event, val[i], regs);
1509 break; 1510 break;
1510 } 1511 }
1511 } 1512 }
1512 if (!active) 1513 if (!active)
1513 /* reset non active counters that have overflowed */ 1514 /* reset non active counters that have overflowed */
1514 write_pmc(i + 1, 0); 1515 write_pmc(i + 1, 0);
1515 } 1516 }
1516 if (!found && pvr_version_is(PVR_POWER7)) { 1517 if (!found && pvr_version_is(PVR_POWER7)) {
1517 /* check active counters for special buggy p7 overflow */ 1518 /* check active counters for special buggy p7 overflow */
1518 for (i = 0; i < cpuhw->n_events; ++i) { 1519 for (i = 0; i < cpuhw->n_events; ++i) {
1519 event = cpuhw->event[i]; 1520 event = cpuhw->event[i];
1520 if (!event->hw.idx || is_limited_pmc(event->hw.idx)) 1521 if (!event->hw.idx || is_limited_pmc(event->hw.idx))
1521 continue; 1522 continue;
1522 if (pmc_overflow_power7(val[event->hw.idx - 1])) { 1523 if (pmc_overflow_power7(val[event->hw.idx - 1])) {
1523 /* event has overflowed in a buggy way*/ 1524 /* event has overflowed in a buggy way*/
1524 found = 1; 1525 found = 1;
1525 record_and_restart(event, 1526 record_and_restart(event,
1526 val[event->hw.idx - 1], 1527 val[event->hw.idx - 1],
1527 regs); 1528 regs);
1528 } 1529 }
1529 } 1530 }
1530 } 1531 }
1531 if ((!found) && printk_ratelimit()) 1532 if ((!found) && printk_ratelimit())
1532 printk(KERN_WARNING "Can't find PMC that caused IRQ\n"); 1533 printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
1533 1534
1534 /* 1535 /*
1535 * Reset MMCR0 to its normal value. This will set PMXE and 1536 * Reset MMCR0 to its normal value. This will set PMXE and
1536 * clear FC (freeze counters) and PMAO (perf mon alert occurred) 1537 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
1537 * and thus allow interrupts to occur again. 1538 * and thus allow interrupts to occur again.
1538 * XXX might want to use MSR.PM to keep the events frozen until 1539 * XXX might want to use MSR.PM to keep the events frozen until
1539 * we get back out of this interrupt. 1540 * we get back out of this interrupt.
1540 */ 1541 */
1541 write_mmcr0(cpuhw, cpuhw->mmcr[0]); 1542 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
1542 1543
1543 if (nmi) 1544 if (nmi)
1544 nmi_exit(); 1545 nmi_exit();
1545 else 1546 else
1546 irq_exit(); 1547 irq_exit();
1547 } 1548 }
1548 1549
1549 static void power_pmu_setup(int cpu) 1550 static void power_pmu_setup(int cpu)
1550 { 1551 {
1551 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 1552 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
1552 1553
1553 if (!ppmu) 1554 if (!ppmu)
1554 return; 1555 return;
1555 memset(cpuhw, 0, sizeof(*cpuhw)); 1556 memset(cpuhw, 0, sizeof(*cpuhw));
1556 cpuhw->mmcr[0] = MMCR0_FC; 1557 cpuhw->mmcr[0] = MMCR0_FC;
1557 } 1558 }
1558 1559
1559 static int __cpuinit 1560 static int __cpuinit
1560 power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) 1561 power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1561 { 1562 {
1562 unsigned int cpu = (long)hcpu; 1563 unsigned int cpu = (long)hcpu;
1563 1564
1564 switch (action & ~CPU_TASKS_FROZEN) { 1565 switch (action & ~CPU_TASKS_FROZEN) {
1565 case CPU_UP_PREPARE: 1566 case CPU_UP_PREPARE:
1566 power_pmu_setup(cpu); 1567 power_pmu_setup(cpu);
1567 break; 1568 break;
1568 1569
1569 default: 1570 default:
1570 break; 1571 break;
1571 } 1572 }
1572 1573
1573 return NOTIFY_OK; 1574 return NOTIFY_OK;
1574 } 1575 }
1575 1576
1576 int __cpuinit register_power_pmu(struct power_pmu *pmu) 1577 int __cpuinit register_power_pmu(struct power_pmu *pmu)
1577 { 1578 {
1578 if (ppmu) 1579 if (ppmu)
1579 return -EBUSY; /* something's already registered */ 1580 return -EBUSY; /* something's already registered */
1580 1581
1581 ppmu = pmu; 1582 ppmu = pmu;
1582 pr_info("%s performance monitor hardware support registered\n", 1583 pr_info("%s performance monitor hardware support registered\n",
1583 pmu->name); 1584 pmu->name);
1584 1585
1585 power_pmu.attr_groups = ppmu->attr_groups; 1586 power_pmu.attr_groups = ppmu->attr_groups;
1586 1587
1587 #ifdef MSR_HV 1588 #ifdef MSR_HV
1588 /* 1589 /*
1589 * Use FCHV to ignore kernel events if MSR.HV is set. 1590 * Use FCHV to ignore kernel events if MSR.HV is set.
1590 */ 1591 */
1591 if (mfmsr() & MSR_HV) 1592 if (mfmsr() & MSR_HV)
1592 freeze_events_kernel = MMCR0_FCHV; 1593 freeze_events_kernel = MMCR0_FCHV;
1593 #endif /* CONFIG_PPC64 */ 1594 #endif /* CONFIG_PPC64 */
1594 1595
1595 perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); 1596 perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
1596 perf_cpu_notifier(power_pmu_notifier); 1597 perf_cpu_notifier(power_pmu_notifier);
1597 1598
1598 return 0; 1599 return 0;
1599 } 1600 }
1600 1601
arch/powerpc/perf/power5+-pmu.c
1 /* 1 /*
2 * Performance counter support for POWER5+/++ (not POWER5) processors. 2 * Performance counter support for POWER5+/++ (not POWER5) processors.
3 * 3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation. 4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 #include <linux/kernel.h> 11 #include <linux/kernel.h>
12 #include <linux/perf_event.h> 12 #include <linux/perf_event.h>
13 #include <linux/string.h> 13 #include <linux/string.h>
14 #include <asm/reg.h> 14 #include <asm/reg.h>
15 #include <asm/cputable.h> 15 #include <asm/cputable.h>
16 16
17 /* 17 /*
18 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) 18 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
19 */ 19 */
20 #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ 20 #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
21 #define PM_PMC_MSK 0xf 21 #define PM_PMC_MSK 0xf
22 #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) 22 #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23 #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ 23 #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
24 #define PM_UNIT_MSK 0xf 24 #define PM_UNIT_MSK 0xf
25 #define PM_BYTE_SH 12 /* Byte number of event bus to use */ 25 #define PM_BYTE_SH 12 /* Byte number of event bus to use */
26 #define PM_BYTE_MSK 7 26 #define PM_BYTE_MSK 7
27 #define PM_GRS_SH 8 /* Storage subsystem mux select */ 27 #define PM_GRS_SH 8 /* Storage subsystem mux select */
28 #define PM_GRS_MSK 7 28 #define PM_GRS_MSK 7
29 #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ 29 #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
30 #define PM_PMCSEL_MSK 0x7f 30 #define PM_PMCSEL_MSK 0x7f
31 31
32 /* Values in PM_UNIT field */ 32 /* Values in PM_UNIT field */
33 #define PM_FPU 0 33 #define PM_FPU 0
34 #define PM_ISU0 1 34 #define PM_ISU0 1
35 #define PM_IFU 2 35 #define PM_IFU 2
36 #define PM_ISU1 3 36 #define PM_ISU1 3
37 #define PM_IDU 4 37 #define PM_IDU 4
38 #define PM_ISU0_ALT 6 38 #define PM_ISU0_ALT 6
39 #define PM_GRS 7 39 #define PM_GRS 7
40 #define PM_LSU0 8 40 #define PM_LSU0 8
41 #define PM_LSU1 0xc 41 #define PM_LSU1 0xc
42 #define PM_LASTUNIT 0xc 42 #define PM_LASTUNIT 0xc
43 43
44 /* 44 /*
45 * Bits in MMCR1 for POWER5+ 45 * Bits in MMCR1 for POWER5+
46 */ 46 */
47 #define MMCR1_TTM0SEL_SH 62 47 #define MMCR1_TTM0SEL_SH 62
48 #define MMCR1_TTM1SEL_SH 60 48 #define MMCR1_TTM1SEL_SH 60
49 #define MMCR1_TTM2SEL_SH 58 49 #define MMCR1_TTM2SEL_SH 58
50 #define MMCR1_TTM3SEL_SH 56 50 #define MMCR1_TTM3SEL_SH 56
51 #define MMCR1_TTMSEL_MSK 3 51 #define MMCR1_TTMSEL_MSK 3
52 #define MMCR1_TD_CP_DBG0SEL_SH 54 52 #define MMCR1_TD_CP_DBG0SEL_SH 54
53 #define MMCR1_TD_CP_DBG1SEL_SH 52 53 #define MMCR1_TD_CP_DBG1SEL_SH 52
54 #define MMCR1_TD_CP_DBG2SEL_SH 50 54 #define MMCR1_TD_CP_DBG2SEL_SH 50
55 #define MMCR1_TD_CP_DBG3SEL_SH 48 55 #define MMCR1_TD_CP_DBG3SEL_SH 48
56 #define MMCR1_GRS_L2SEL_SH 46 56 #define MMCR1_GRS_L2SEL_SH 46
57 #define MMCR1_GRS_L2SEL_MSK 3 57 #define MMCR1_GRS_L2SEL_MSK 3
58 #define MMCR1_GRS_L3SEL_SH 44 58 #define MMCR1_GRS_L3SEL_SH 44
59 #define MMCR1_GRS_L3SEL_MSK 3 59 #define MMCR1_GRS_L3SEL_MSK 3
60 #define MMCR1_GRS_MCSEL_SH 41 60 #define MMCR1_GRS_MCSEL_SH 41
61 #define MMCR1_GRS_MCSEL_MSK 7 61 #define MMCR1_GRS_MCSEL_MSK 7
62 #define MMCR1_GRS_FABSEL_SH 39 62 #define MMCR1_GRS_FABSEL_SH 39
63 #define MMCR1_GRS_FABSEL_MSK 3 63 #define MMCR1_GRS_FABSEL_MSK 3
64 #define MMCR1_PMC1_ADDER_SEL_SH 35 64 #define MMCR1_PMC1_ADDER_SEL_SH 35
65 #define MMCR1_PMC2_ADDER_SEL_SH 34 65 #define MMCR1_PMC2_ADDER_SEL_SH 34
66 #define MMCR1_PMC3_ADDER_SEL_SH 33 66 #define MMCR1_PMC3_ADDER_SEL_SH 33
67 #define MMCR1_PMC4_ADDER_SEL_SH 32 67 #define MMCR1_PMC4_ADDER_SEL_SH 32
68 #define MMCR1_PMC1SEL_SH 25 68 #define MMCR1_PMC1SEL_SH 25
69 #define MMCR1_PMC2SEL_SH 17 69 #define MMCR1_PMC2SEL_SH 17
70 #define MMCR1_PMC3SEL_SH 9 70 #define MMCR1_PMC3SEL_SH 9
71 #define MMCR1_PMC4SEL_SH 1 71 #define MMCR1_PMC4SEL_SH 1
72 #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) 72 #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
73 #define MMCR1_PMCSEL_MSK 0x7f 73 #define MMCR1_PMCSEL_MSK 0x7f
74 74
75 /* 75 /*
76 * Layout of constraint bits: 76 * Layout of constraint bits:
77 * 6666555555555544444444443333333333222222222211111111110000000000 77 * 6666555555555544444444443333333333222222222211111111110000000000
78 * 3210987654321098765432109876543210987654321098765432109876543210 78 * 3210987654321098765432109876543210987654321098765432109876543210
79 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><> 79 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
80 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1 80 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
81 * 81 *
82 * NC - number of counters 82 * NC - number of counters
83 * 51: NC error 0x0008_0000_0000_0000 83 * 51: NC error 0x0008_0000_0000_0000
84 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 84 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
85 * 85 *
86 * G0..G3 - GRS mux constraints 86 * G0..G3 - GRS mux constraints
87 * 46-47: GRS_L2SEL value 87 * 46-47: GRS_L2SEL value
88 * 44-45: GRS_L3SEL value 88 * 44-45: GRS_L3SEL value
89 * 41-44: GRS_MCSEL value 89 * 41-44: GRS_MCSEL value
90 * 39-40: GRS_FABSEL value 90 * 39-40: GRS_FABSEL value
91 * Note that these match up with their bit positions in MMCR1 91 * Note that these match up with their bit positions in MMCR1
92 * 92 *
93 * T0 - TTM0 constraint 93 * T0 - TTM0 constraint
94 * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 94 * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
95 * 95 *
96 * T1 - TTM1 constraint 96 * T1 - TTM1 constraint
97 * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 97 * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
98 * 98 *
99 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS 99 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
100 * 33: UC3 error 0x02_0000_0000 100 * 33: UC3 error 0x02_0000_0000
101 * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 101 * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000
102 * 31: ISU0 events needed 0x01_8000_0000 102 * 31: ISU0 events needed 0x01_8000_0000
103 * 30: IDU|GRS events needed 0x00_4000_0000 103 * 30: IDU|GRS events needed 0x00_4000_0000
104 * 104 *
105 * B0 105 * B0
106 * 24-27: Byte 0 event source 0x0f00_0000 106 * 24-27: Byte 0 event source 0x0f00_0000
107 * Encoding as for the event code 107 * Encoding as for the event code
108 * 108 *
109 * B1, B2, B3 109 * B1, B2, B3
110 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources 110 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
111 * 111 *
112 * P6 112 * P6
113 * 11: P6 error 0x800 113 * 11: P6 error 0x800
114 * 10-11: Count of events needing PMC6 114 * 10-11: Count of events needing PMC6
115 * 115 *
116 * P1..P5 116 * P1..P5
117 * 0-9: Count of events needing PMC1..PMC5 117 * 0-9: Count of events needing PMC1..PMC5
118 */ 118 */
119 119
120 static const int grsel_shift[8] = { 120 static const int grsel_shift[8] = {
121 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, 121 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
122 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, 122 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
123 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH 123 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
124 }; 124 };
125 125
126 /* Masks and values for using events from the various units */ 126 /* Masks and values for using events from the various units */
127 static unsigned long unit_cons[PM_LASTUNIT+1][2] = { 127 static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
128 [PM_FPU] = { 0x3200000000ul, 0x0100000000ul }, 128 [PM_FPU] = { 0x3200000000ul, 0x0100000000ul },
129 [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul }, 129 [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul },
130 [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul }, 130 [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul },
131 [PM_IFU] = { 0x3200000000ul, 0x2100000000ul }, 131 [PM_IFU] = { 0x3200000000ul, 0x2100000000ul },
132 [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul }, 132 [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul },
133 [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul }, 133 [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul },
134 }; 134 };
135 135
136 static int power5p_get_constraint(u64 event, unsigned long *maskp, 136 static int power5p_get_constraint(u64 event, unsigned long *maskp,
137 unsigned long *valp) 137 unsigned long *valp)
138 { 138 {
139 int pmc, byte, unit, sh; 139 int pmc, byte, unit, sh;
140 int bit, fmask; 140 int bit, fmask;
141 unsigned long mask = 0, value = 0; 141 unsigned long mask = 0, value = 0;
142 142
143 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 143 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
144 if (pmc) { 144 if (pmc) {
145 if (pmc > 6) 145 if (pmc > 6)
146 return -1; 146 return -1;
147 sh = (pmc - 1) * 2; 147 sh = (pmc - 1) * 2;
148 mask |= 2 << sh; 148 mask |= 2 << sh;
149 value |= 1 << sh; 149 value |= 1 << sh;
150 if (pmc >= 5 && !(event == 0x500009 || event == 0x600005)) 150 if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
151 return -1; 151 return -1;
152 } 152 }
153 if (event & PM_BUSEVENT_MSK) { 153 if (event & PM_BUSEVENT_MSK) {
154 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; 154 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
155 if (unit > PM_LASTUNIT) 155 if (unit > PM_LASTUNIT)
156 return -1; 156 return -1;
157 if (unit == PM_ISU0_ALT) 157 if (unit == PM_ISU0_ALT)
158 unit = PM_ISU0; 158 unit = PM_ISU0;
159 mask |= unit_cons[unit][0]; 159 mask |= unit_cons[unit][0];
160 value |= unit_cons[unit][1]; 160 value |= unit_cons[unit][1];
161 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; 161 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
162 if (byte >= 4) { 162 if (byte >= 4) {
163 if (unit != PM_LSU1) 163 if (unit != PM_LSU1)
164 return -1; 164 return -1;
165 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ 165 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
166 ++unit; 166 ++unit;
167 byte &= 3; 167 byte &= 3;
168 } 168 }
169 if (unit == PM_GRS) { 169 if (unit == PM_GRS) {
170 bit = event & 7; 170 bit = event & 7;
171 fmask = (bit == 6)? 7: 3; 171 fmask = (bit == 6)? 7: 3;
172 sh = grsel_shift[bit]; 172 sh = grsel_shift[bit];
173 mask |= (unsigned long)fmask << sh; 173 mask |= (unsigned long)fmask << sh;
174 value |= (unsigned long)((event >> PM_GRS_SH) & fmask) 174 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
175 << sh; 175 << sh;
176 } 176 }
177 /* Set byte lane select field */ 177 /* Set byte lane select field */
178 mask |= 0xfUL << (24 - 4 * byte); 178 mask |= 0xfUL << (24 - 4 * byte);
179 value |= (unsigned long)unit << (24 - 4 * byte); 179 value |= (unsigned long)unit << (24 - 4 * byte);
180 } 180 }
181 if (pmc < 5) { 181 if (pmc < 5) {
182 /* need a counter from PMC1-4 set */ 182 /* need a counter from PMC1-4 set */
183 mask |= 0x8000000000000ul; 183 mask |= 0x8000000000000ul;
184 value |= 0x1000000000000ul; 184 value |= 0x1000000000000ul;
185 } 185 }
186 *maskp = mask; 186 *maskp = mask;
187 *valp = value; 187 *valp = value;
188 return 0; 188 return 0;
189 } 189 }
190 190
191 static int power5p_limited_pmc_event(u64 event) 191 static int power5p_limited_pmc_event(u64 event)
192 { 192 {
193 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 193 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
194 194
195 return pmc == 5 || pmc == 6; 195 return pmc == 5 || pmc == 6;
196 } 196 }
197 197
198 #define MAX_ALT 3 /* at most 3 alternatives for any event */ 198 #define MAX_ALT 3 /* at most 3 alternatives for any event */
199 199
200 static const unsigned int event_alternatives[][MAX_ALT] = { 200 static const unsigned int event_alternatives[][MAX_ALT] = {
201 { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ 201 { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */
202 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ 202 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
203 { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ 203 { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */
204 { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ 204 { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */
205 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ 205 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
206 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ 206 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
207 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ 207 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
208 { 0x100005, 0x600005 }, /* PM_RUN_CYC */ 208 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
209 { 0x100009, 0x200009 }, /* PM_INST_CMPL */ 209 { 0x100009, 0x200009 }, /* PM_INST_CMPL */
210 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ 210 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
211 { 0x300009, 0x400009 }, /* PM_INST_DISP */ 211 { 0x300009, 0x400009 }, /* PM_INST_DISP */
212 }; 212 };
213 213
214 /* 214 /*
215 * Scan the alternatives table for a match and return the 215 * Scan the alternatives table for a match and return the
216 * index into the alternatives table if found, else -1. 216 * index into the alternatives table if found, else -1.
217 */ 217 */
218 static int find_alternative(unsigned int event) 218 static int find_alternative(unsigned int event)
219 { 219 {
220 int i, j; 220 int i, j;
221 221
222 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { 222 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
223 if (event < event_alternatives[i][0]) 223 if (event < event_alternatives[i][0])
224 break; 224 break;
225 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) 225 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
226 if (event == event_alternatives[i][j]) 226 if (event == event_alternatives[i][j])
227 return i; 227 return i;
228 } 228 }
229 return -1; 229 return -1;
230 } 230 }
231 231
232 static const unsigned char bytedecode_alternatives[4][4] = { 232 static const unsigned char bytedecode_alternatives[4][4] = {
233 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, 233 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
234 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, 234 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
235 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, 235 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
236 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } 236 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
237 }; 237 };
238 238
239 /* 239 /*
240 * Some direct events for decodes of event bus byte 3 have alternative 240 * Some direct events for decodes of event bus byte 3 have alternative
241 * PMCSEL values on other counters. This returns the alternative 241 * PMCSEL values on other counters. This returns the alternative
242 * event code for those that do, or -1 otherwise. This also handles 242 * event code for those that do, or -1 otherwise. This also handles
243 * alternative PCMSEL values for add events. 243 * alternative PCMSEL values for add events.
244 */ 244 */
245 static s64 find_alternative_bdecode(u64 event) 245 static s64 find_alternative_bdecode(u64 event)
246 { 246 {
247 int pmc, altpmc, pp, j; 247 int pmc, altpmc, pp, j;
248 248
249 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 249 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
250 if (pmc == 0 || pmc > 4) 250 if (pmc == 0 || pmc > 4)
251 return -1; 251 return -1;
252 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ 252 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
253 pp = event & PM_PMCSEL_MSK; 253 pp = event & PM_PMCSEL_MSK;
254 for (j = 0; j < 4; ++j) { 254 for (j = 0; j < 4; ++j) {
255 if (bytedecode_alternatives[pmc - 1][j] == pp) { 255 if (bytedecode_alternatives[pmc - 1][j] == pp) {
256 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | 256 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
257 (altpmc << PM_PMC_SH) | 257 (altpmc << PM_PMC_SH) |
258 bytedecode_alternatives[altpmc - 1][j]; 258 bytedecode_alternatives[altpmc - 1][j];
259 } 259 }
260 } 260 }
261 261
262 /* new decode alternatives for power5+ */ 262 /* new decode alternatives for power5+ */
263 if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) 263 if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
264 return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); 264 return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
265 if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) 265 if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
266 return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); 266 return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
267 267
268 /* alternative add event encodings */ 268 /* alternative add event encodings */
269 if (pp == 0x10 || pp == 0x28) 269 if (pp == 0x10 || pp == 0x28)
270 return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | 270 return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
271 (altpmc << PM_PMC_SH); 271 (altpmc << PM_PMC_SH);
272 272
273 return -1; 273 return -1;
274 } 274 }
275 275
276 static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[]) 276 static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
277 { 277 {
278 int i, j, nalt = 1; 278 int i, j, nalt = 1;
279 int nlim; 279 int nlim;
280 s64 ae; 280 s64 ae;
281 281
282 alt[0] = event; 282 alt[0] = event;
283 nalt = 1; 283 nalt = 1;
284 nlim = power5p_limited_pmc_event(event); 284 nlim = power5p_limited_pmc_event(event);
285 i = find_alternative(event); 285 i = find_alternative(event);
286 if (i >= 0) { 286 if (i >= 0) {
287 for (j = 0; j < MAX_ALT; ++j) { 287 for (j = 0; j < MAX_ALT; ++j) {
288 ae = event_alternatives[i][j]; 288 ae = event_alternatives[i][j];
289 if (ae && ae != event) 289 if (ae && ae != event)
290 alt[nalt++] = ae; 290 alt[nalt++] = ae;
291 nlim += power5p_limited_pmc_event(ae); 291 nlim += power5p_limited_pmc_event(ae);
292 } 292 }
293 } else { 293 } else {
294 ae = find_alternative_bdecode(event); 294 ae = find_alternative_bdecode(event);
295 if (ae > 0) 295 if (ae > 0)
296 alt[nalt++] = ae; 296 alt[nalt++] = ae;
297 } 297 }
298 298
299 if (flags & PPMU_ONLY_COUNT_RUN) { 299 if (flags & PPMU_ONLY_COUNT_RUN) {
300 /* 300 /*
301 * We're only counting in RUN state, 301 * We're only counting in RUN state,
302 * so PM_CYC is equivalent to PM_RUN_CYC 302 * so PM_CYC is equivalent to PM_RUN_CYC
303 * and PM_INST_CMPL === PM_RUN_INST_CMPL. 303 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
304 * This doesn't include alternatives that don't provide 304 * This doesn't include alternatives that don't provide
305 * any extra flexibility in assigning PMCs (e.g. 305 * any extra flexibility in assigning PMCs (e.g.
306 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC). 306 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
307 * Note that even with these additional alternatives 307 * Note that even with these additional alternatives
308 * we never end up with more than 3 alternatives for any event. 308 * we never end up with more than 3 alternatives for any event.
309 */ 309 */
310 j = nalt; 310 j = nalt;
311 for (i = 0; i < nalt; ++i) { 311 for (i = 0; i < nalt; ++i) {
312 switch (alt[i]) { 312 switch (alt[i]) {
313 case 0xf: /* PM_CYC */ 313 case 0xf: /* PM_CYC */
314 alt[j++] = 0x600005; /* PM_RUN_CYC */ 314 alt[j++] = 0x600005; /* PM_RUN_CYC */
315 ++nlim; 315 ++nlim;
316 break; 316 break;
317 case 0x600005: /* PM_RUN_CYC */ 317 case 0x600005: /* PM_RUN_CYC */
318 alt[j++] = 0xf; 318 alt[j++] = 0xf;
319 break; 319 break;
320 case 0x100009: /* PM_INST_CMPL */ 320 case 0x100009: /* PM_INST_CMPL */
321 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ 321 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
322 ++nlim; 322 ++nlim;
323 break; 323 break;
324 case 0x500009: /* PM_RUN_INST_CMPL */ 324 case 0x500009: /* PM_RUN_INST_CMPL */
325 alt[j++] = 0x100009; /* PM_INST_CMPL */ 325 alt[j++] = 0x100009; /* PM_INST_CMPL */
326 alt[j++] = 0x200009; 326 alt[j++] = 0x200009;
327 break; 327 break;
328 } 328 }
329 } 329 }
330 nalt = j; 330 nalt = j;
331 } 331 }
332 332
333 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { 333 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
334 /* remove the limited PMC events */ 334 /* remove the limited PMC events */
335 j = 0; 335 j = 0;
336 for (i = 0; i < nalt; ++i) { 336 for (i = 0; i < nalt; ++i) {
337 if (!power5p_limited_pmc_event(alt[i])) { 337 if (!power5p_limited_pmc_event(alt[i])) {
338 alt[j] = alt[i]; 338 alt[j] = alt[i];
339 ++j; 339 ++j;
340 } 340 }
341 } 341 }
342 nalt = j; 342 nalt = j;
343 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { 343 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
344 /* remove all but the limited PMC events */ 344 /* remove all but the limited PMC events */
345 j = 0; 345 j = 0;
346 for (i = 0; i < nalt; ++i) { 346 for (i = 0; i < nalt; ++i) {
347 if (power5p_limited_pmc_event(alt[i])) { 347 if (power5p_limited_pmc_event(alt[i])) {
348 alt[j] = alt[i]; 348 alt[j] = alt[i];
349 ++j; 349 ++j;
350 } 350 }
351 } 351 }
352 nalt = j; 352 nalt = j;
353 } 353 }
354 354
355 return nalt; 355 return nalt;
356 } 356 }
357 357
358 /* 358 /*
359 * Map of which direct events on which PMCs are marked instruction events. 359 * Map of which direct events on which PMCs are marked instruction events.
360 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. 360 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
361 * Bit 0 is set if it is marked for all PMCs. 361 * Bit 0 is set if it is marked for all PMCs.
362 * The 0x80 bit indicates a byte decode PMCSEL value. 362 * The 0x80 bit indicates a byte decode PMCSEL value.
363 */ 363 */
364 static unsigned char direct_event_is_marked[0x28] = { 364 static unsigned char direct_event_is_marked[0x28] = {
365 0, /* 00 */ 365 0, /* 00 */
366 0x1f, /* 01 PM_IOPS_CMPL */ 366 0x1f, /* 01 PM_IOPS_CMPL */
367 0x2, /* 02 PM_MRK_GRP_DISP */ 367 0x2, /* 02 PM_MRK_GRP_DISP */
368 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ 368 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
369 0, /* 04 */ 369 0, /* 04 */
370 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ 370 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
371 0x80, /* 06 */ 371 0x80, /* 06 */
372 0x80, /* 07 */ 372 0x80, /* 07 */
373 0, 0, 0,/* 08 - 0a */ 373 0, 0, 0,/* 08 - 0a */
374 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ 374 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
375 0, /* 0c */ 375 0, /* 0c */
376 0x80, /* 0d */ 376 0x80, /* 0d */
377 0x80, /* 0e */ 377 0x80, /* 0e */
378 0, /* 0f */ 378 0, /* 0f */
379 0, /* 10 */ 379 0, /* 10 */
380 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ 380 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
381 0, /* 12 */ 381 0, /* 12 */
382 0x10, /* 13 PM_MRK_GRP_CMPL */ 382 0x10, /* 13 PM_MRK_GRP_CMPL */
383 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ 383 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
384 0x2, /* 15 PM_MRK_GRP_ISSUED */ 384 0x2, /* 15 PM_MRK_GRP_ISSUED */
385 0x80, /* 16 */ 385 0x80, /* 16 */
386 0x80, /* 17 */ 386 0x80, /* 17 */
387 0, 0, 0, 0, 0, 387 0, 0, 0, 0, 0,
388 0x80, /* 1d */ 388 0x80, /* 1d */
389 0x80, /* 1e */ 389 0x80, /* 1e */
390 0, /* 1f */ 390 0, /* 1f */
391 0x80, /* 20 */ 391 0x80, /* 20 */
392 0x80, /* 21 */ 392 0x80, /* 21 */
393 0x80, /* 22 */ 393 0x80, /* 22 */
394 0x80, /* 23 */ 394 0x80, /* 23 */
395 0x80, /* 24 */ 395 0x80, /* 24 */
396 0x80, /* 25 */ 396 0x80, /* 25 */
397 0x80, /* 26 */ 397 0x80, /* 26 */
398 0x80, /* 27 */ 398 0x80, /* 27 */
399 }; 399 };
400 400
401 /* 401 /*
402 * Returns 1 if event counts things relating to marked instructions 402 * Returns 1 if event counts things relating to marked instructions
403 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. 403 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
404 */ 404 */
405 static int power5p_marked_instr_event(u64 event) 405 static int power5p_marked_instr_event(u64 event)
406 { 406 {
407 int pmc, psel; 407 int pmc, psel;
408 int bit, byte, unit; 408 int bit, byte, unit;
409 u32 mask; 409 u32 mask;
410 410
411 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 411 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
412 psel = event & PM_PMCSEL_MSK; 412 psel = event & PM_PMCSEL_MSK;
413 if (pmc >= 5) 413 if (pmc >= 5)
414 return 0; 414 return 0;
415 415
416 bit = -1; 416 bit = -1;
417 if (psel < sizeof(direct_event_is_marked)) { 417 if (psel < sizeof(direct_event_is_marked)) {
418 if (direct_event_is_marked[psel] & (1 << pmc)) 418 if (direct_event_is_marked[psel] & (1 << pmc))
419 return 1; 419 return 1;
420 if (direct_event_is_marked[psel] & 0x80) 420 if (direct_event_is_marked[psel] & 0x80)
421 bit = 4; 421 bit = 4;
422 else if (psel == 0x08) 422 else if (psel == 0x08)
423 bit = pmc - 1; 423 bit = pmc - 1;
424 else if (psel == 0x10) 424 else if (psel == 0x10)
425 bit = 4 - pmc; 425 bit = 4 - pmc;
426 else if (psel == 0x1b && (pmc == 1 || pmc == 3)) 426 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
427 bit = 4; 427 bit = 4;
428 } else if ((psel & 0x48) == 0x40) { 428 } else if ((psel & 0x48) == 0x40) {
429 bit = psel & 7; 429 bit = psel & 7;
430 } else if (psel == 0x28) { 430 } else if (psel == 0x28) {
431 bit = pmc - 1; 431 bit = pmc - 1;
432 } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { 432 } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
433 bit = 4; 433 bit = 4;
434 } 434 }
435 435
436 if (!(event & PM_BUSEVENT_MSK) || bit == -1) 436 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
437 return 0; 437 return 0;
438 438
439 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; 439 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
440 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; 440 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
441 if (unit == PM_LSU0) { 441 if (unit == PM_LSU0) {
442 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ 442 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
443 mask = 0x5dff00; 443 mask = 0x5dff00;
444 } else if (unit == PM_LSU1 && byte >= 4) { 444 } else if (unit == PM_LSU1 && byte >= 4) {
445 byte -= 4; 445 byte -= 4;
446 /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ 446 /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
447 mask = 0x5f11c000; 447 mask = 0x5f11c000;
448 } else 448 } else
449 return 0; 449 return 0;
450 450
451 return (mask >> (byte * 8 + bit)) & 1; 451 return (mask >> (byte * 8 + bit)) & 1;
452 } 452 }
453 453
454 static int power5p_compute_mmcr(u64 event[], int n_ev, 454 static int power5p_compute_mmcr(u64 event[], int n_ev,
455 unsigned int hwc[], unsigned long mmcr[]) 455 unsigned int hwc[], unsigned long mmcr[])
456 { 456 {
457 unsigned long mmcr1 = 0; 457 unsigned long mmcr1 = 0;
458 unsigned long mmcra = 0; 458 unsigned long mmcra = 0;
459 unsigned int pmc, unit, byte, psel; 459 unsigned int pmc, unit, byte, psel;
460 unsigned int ttm; 460 unsigned int ttm;
461 int i, isbus, bit, grsel; 461 int i, isbus, bit, grsel;
462 unsigned int pmc_inuse = 0; 462 unsigned int pmc_inuse = 0;
463 unsigned char busbyte[4]; 463 unsigned char busbyte[4];
464 unsigned char unituse[16]; 464 unsigned char unituse[16];
465 int ttmuse; 465 int ttmuse;
466 466
467 if (n_ev > 6) 467 if (n_ev > 6)
468 return -1; 468 return -1;
469 469
470 /* First pass to count resource use */ 470 /* First pass to count resource use */
471 memset(busbyte, 0, sizeof(busbyte)); 471 memset(busbyte, 0, sizeof(busbyte));
472 memset(unituse, 0, sizeof(unituse)); 472 memset(unituse, 0, sizeof(unituse));
473 for (i = 0; i < n_ev; ++i) { 473 for (i = 0; i < n_ev; ++i) {
474 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 474 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
475 if (pmc) { 475 if (pmc) {
476 if (pmc > 6) 476 if (pmc > 6)
477 return -1; 477 return -1;
478 if (pmc_inuse & (1 << (pmc - 1))) 478 if (pmc_inuse & (1 << (pmc - 1)))
479 return -1; 479 return -1;
480 pmc_inuse |= 1 << (pmc - 1); 480 pmc_inuse |= 1 << (pmc - 1);
481 } 481 }
482 if (event[i] & PM_BUSEVENT_MSK) { 482 if (event[i] & PM_BUSEVENT_MSK) {
483 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; 483 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
484 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; 484 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
485 if (unit > PM_LASTUNIT) 485 if (unit > PM_LASTUNIT)
486 return -1; 486 return -1;
487 if (unit == PM_ISU0_ALT) 487 if (unit == PM_ISU0_ALT)
488 unit = PM_ISU0; 488 unit = PM_ISU0;
489 if (byte >= 4) { 489 if (byte >= 4) {
490 if (unit != PM_LSU1) 490 if (unit != PM_LSU1)
491 return -1; 491 return -1;
492 ++unit; 492 ++unit;
493 byte &= 3; 493 byte &= 3;
494 } 494 }
495 if (busbyte[byte] && busbyte[byte] != unit) 495 if (busbyte[byte] && busbyte[byte] != unit)
496 return -1; 496 return -1;
497 busbyte[byte] = unit; 497 busbyte[byte] = unit;
498 unituse[unit] = 1; 498 unituse[unit] = 1;
499 } 499 }
500 } 500 }
501 501
502 /* 502 /*
503 * Assign resources and set multiplexer selects. 503 * Assign resources and set multiplexer selects.
504 * 504 *
505 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only 505 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
506 * choice we have to deal with. 506 * choice we have to deal with.
507 */ 507 */
508 if (unituse[PM_ISU0] & 508 if (unituse[PM_ISU0] &
509 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { 509 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
510 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ 510 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
511 unituse[PM_ISU0] = 0; 511 unituse[PM_ISU0] = 0;
512 } 512 }
513 /* Set TTM[01]SEL fields. */ 513 /* Set TTM[01]SEL fields. */
514 ttmuse = 0; 514 ttmuse = 0;
515 for (i = PM_FPU; i <= PM_ISU1; ++i) { 515 for (i = PM_FPU; i <= PM_ISU1; ++i) {
516 if (!unituse[i]) 516 if (!unituse[i])
517 continue; 517 continue;
518 if (ttmuse++) 518 if (ttmuse++)
519 return -1; 519 return -1;
520 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; 520 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
521 } 521 }
522 ttmuse = 0; 522 ttmuse = 0;
523 for (; i <= PM_GRS; ++i) { 523 for (; i <= PM_GRS; ++i) {
524 if (!unituse[i]) 524 if (!unituse[i])
525 continue; 525 continue;
526 if (ttmuse++) 526 if (ttmuse++)
527 return -1; 527 return -1;
528 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; 528 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
529 } 529 }
530 if (ttmuse > 1) 530 if (ttmuse > 1)
531 return -1; 531 return -1;
532 532
533 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ 533 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
534 for (byte = 0; byte < 4; ++byte) { 534 for (byte = 0; byte < 4; ++byte) {
535 unit = busbyte[byte]; 535 unit = busbyte[byte];
536 if (!unit) 536 if (!unit)
537 continue; 537 continue;
538 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { 538 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
539 /* get ISU0 through TTM1 rather than TTM0 */ 539 /* get ISU0 through TTM1 rather than TTM0 */
540 unit = PM_ISU0_ALT; 540 unit = PM_ISU0_ALT;
541 } else if (unit == PM_LSU1 + 1) { 541 } else if (unit == PM_LSU1 + 1) {
542 /* select lower word of LSU1 for this byte */ 542 /* select lower word of LSU1 for this byte */
543 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); 543 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
544 } 544 }
545 ttm = unit >> 2; 545 ttm = unit >> 2;
546 mmcr1 |= (unsigned long)ttm 546 mmcr1 |= (unsigned long)ttm
547 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); 547 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
548 } 548 }
549 549
550 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ 550 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
551 for (i = 0; i < n_ev; ++i) { 551 for (i = 0; i < n_ev; ++i) {
552 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 552 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
553 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; 553 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
554 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; 554 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
555 psel = event[i] & PM_PMCSEL_MSK; 555 psel = event[i] & PM_PMCSEL_MSK;
556 isbus = event[i] & PM_BUSEVENT_MSK; 556 isbus = event[i] & PM_BUSEVENT_MSK;
557 if (!pmc) { 557 if (!pmc) {
558 /* Bus event or any-PMC direct event */ 558 /* Bus event or any-PMC direct event */
559 for (pmc = 0; pmc < 4; ++pmc) { 559 for (pmc = 0; pmc < 4; ++pmc) {
560 if (!(pmc_inuse & (1 << pmc))) 560 if (!(pmc_inuse & (1 << pmc)))
561 break; 561 break;
562 } 562 }
563 if (pmc >= 4) 563 if (pmc >= 4)
564 return -1; 564 return -1;
565 pmc_inuse |= 1 << pmc; 565 pmc_inuse |= 1 << pmc;
566 } else if (pmc <= 4) { 566 } else if (pmc <= 4) {
567 /* Direct event */ 567 /* Direct event */
568 --pmc; 568 --pmc;
569 if (isbus && (byte & 2) && 569 if (isbus && (byte & 2) &&
570 (psel == 8 || psel == 0x10 || psel == 0x28)) 570 (psel == 8 || psel == 0x10 || psel == 0x28))
571 /* add events on higher-numbered bus */ 571 /* add events on higher-numbered bus */
572 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); 572 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
573 } else { 573 } else {
574 /* Instructions or run cycles on PMC5/6 */ 574 /* Instructions or run cycles on PMC5/6 */
575 --pmc; 575 --pmc;
576 } 576 }
577 if (isbus && unit == PM_GRS) { 577 if (isbus && unit == PM_GRS) {
578 bit = psel & 7; 578 bit = psel & 7;
579 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; 579 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
580 mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; 580 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
581 } 581 }
582 if (power5p_marked_instr_event(event[i])) 582 if (power5p_marked_instr_event(event[i]))
583 mmcra |= MMCRA_SAMPLE_ENABLE; 583 mmcra |= MMCRA_SAMPLE_ENABLE;
584 if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) 584 if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
585 /* select alternate byte lane */ 585 /* select alternate byte lane */
586 psel |= 0x10; 586 psel |= 0x10;
587 if (pmc <= 3) 587 if (pmc <= 3)
588 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); 588 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
589 hwc[i] = pmc; 589 hwc[i] = pmc;
590 } 590 }
591 591
592 /* Return MMCRx values */ 592 /* Return MMCRx values */
593 mmcr[0] = 0; 593 mmcr[0] = 0;
594 if (pmc_inuse & 1) 594 if (pmc_inuse & 1)
595 mmcr[0] = MMCR0_PMC1CE; 595 mmcr[0] = MMCR0_PMC1CE;
596 if (pmc_inuse & 0x3e) 596 if (pmc_inuse & 0x3e)
597 mmcr[0] |= MMCR0_PMCjCE; 597 mmcr[0] |= MMCR0_PMCjCE;
598 mmcr[1] = mmcr1; 598 mmcr[1] = mmcr1;
599 mmcr[2] = mmcra; 599 mmcr[2] = mmcra;
600 return 0; 600 return 0;
601 } 601 }
602 602
603 static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) 603 static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[])
604 { 604 {
605 if (pmc <= 3) 605 if (pmc <= 3)
606 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); 606 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
607 } 607 }
608 608
609 static int power5p_generic_events[] = { 609 static int power5p_generic_events[] = {
610 [PERF_COUNT_HW_CPU_CYCLES] = 0xf, 610 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
611 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, 611 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
612 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ 612 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */
613 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ 613 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
614 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ 614 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
615 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ 615 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
616 }; 616 };
617 617
618 #define C(x) PERF_COUNT_HW_CACHE_##x 618 #define C(x) PERF_COUNT_HW_CACHE_##x
619 619
620 /* 620 /*
621 * Table of generalized cache-related events. 621 * Table of generalized cache-related events.
622 * 0 means not supported, -1 means nonsensical, other values 622 * 0 means not supported, -1 means nonsensical, other values
623 * are event codes. 623 * are event codes.
624 */ 624 */
625 static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { 625 static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
626 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ 626 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
627 [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, 627 [C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
628 [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, 628 [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
629 [C(OP_PREFETCH)] = { 0xc70e7, -1 }, 629 [C(OP_PREFETCH)] = { 0xc70e7, -1 },
630 }, 630 },
631 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ 631 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
632 [C(OP_READ)] = { 0, 0 }, 632 [C(OP_READ)] = { 0, 0 },
633 [C(OP_WRITE)] = { -1, -1 }, 633 [C(OP_WRITE)] = { -1, -1 },
634 [C(OP_PREFETCH)] = { 0, 0 }, 634 [C(OP_PREFETCH)] = { 0, 0 },
635 }, 635 },
636 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ 636 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
637 [C(OP_READ)] = { 0, 0 }, 637 [C(OP_READ)] = { 0, 0 },
638 [C(OP_WRITE)] = { 0, 0 }, 638 [C(OP_WRITE)] = { 0, 0 },
639 [C(OP_PREFETCH)] = { 0xc50c3, 0 }, 639 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
640 }, 640 },
641 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ 641 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
642 [C(OP_READ)] = { 0xc20e4, 0x800c4 }, 642 [C(OP_READ)] = { 0xc20e4, 0x800c4 },
643 [C(OP_WRITE)] = { -1, -1 }, 643 [C(OP_WRITE)] = { -1, -1 },
644 [C(OP_PREFETCH)] = { -1, -1 }, 644 [C(OP_PREFETCH)] = { -1, -1 },
645 }, 645 },
646 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ 646 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
647 [C(OP_READ)] = { 0, 0x800c0 }, 647 [C(OP_READ)] = { 0, 0x800c0 },
648 [C(OP_WRITE)] = { -1, -1 }, 648 [C(OP_WRITE)] = { -1, -1 },
649 [C(OP_PREFETCH)] = { -1, -1 }, 649 [C(OP_PREFETCH)] = { -1, -1 },
650 }, 650 },
651 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ 651 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
652 [C(OP_READ)] = { 0x230e4, 0x230e5 }, 652 [C(OP_READ)] = { 0x230e4, 0x230e5 },
653 [C(OP_WRITE)] = { -1, -1 }, 653 [C(OP_WRITE)] = { -1, -1 },
654 [C(OP_PREFETCH)] = { -1, -1 }, 654 [C(OP_PREFETCH)] = { -1, -1 },
655 }, 655 },
656 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ 656 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
657 [C(OP_READ)] = { -1, -1 }, 657 [C(OP_READ)] = { -1, -1 },
658 [C(OP_WRITE)] = { -1, -1 }, 658 [C(OP_WRITE)] = { -1, -1 },
659 [C(OP_PREFETCH)] = { -1, -1 }, 659 [C(OP_PREFETCH)] = { -1, -1 },
660 }, 660 },
661 }; 661 };
662 662
663 static struct power_pmu power5p_pmu = { 663 static struct power_pmu power5p_pmu = {
664 .name = "POWER5+/++", 664 .name = "POWER5+/++",
665 .n_counter = 6, 665 .n_counter = 6,
666 .max_alternatives = MAX_ALT, 666 .max_alternatives = MAX_ALT,
667 .add_fields = 0x7000000000055ul, 667 .add_fields = 0x7000000000055ul,
668 .test_adder = 0x3000040000000ul, 668 .test_adder = 0x3000040000000ul,
669 .compute_mmcr = power5p_compute_mmcr, 669 .compute_mmcr = power5p_compute_mmcr,
670 .get_constraint = power5p_get_constraint, 670 .get_constraint = power5p_get_constraint,
671 .get_alternatives = power5p_get_alternatives, 671 .get_alternatives = power5p_get_alternatives,
672 .disable_pmc = power5p_disable_pmc, 672 .disable_pmc = power5p_disable_pmc,
673 .limited_pmc_event = power5p_limited_pmc_event, 673 .limited_pmc_event = power5p_limited_pmc_event,
674 .flags = PPMU_LIMITED_PMC5_6, 674 .flags = PPMU_LIMITED_PMC5_6 | PPMU_HAS_SSLOT,
675 .n_generic = ARRAY_SIZE(power5p_generic_events), 675 .n_generic = ARRAY_SIZE(power5p_generic_events),
676 .generic_events = power5p_generic_events, 676 .generic_events = power5p_generic_events,
677 .cache_events = &power5p_cache_events, 677 .cache_events = &power5p_cache_events,
678 }; 678 };
679 679
680 static int __init init_power5p_pmu(void) 680 static int __init init_power5p_pmu(void)
681 { 681 {
682 if (!cur_cpu_spec->oprofile_cpu_type || 682 if (!cur_cpu_spec->oprofile_cpu_type ||
683 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") 683 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
684 && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))) 684 && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++")))
685 return -ENODEV; 685 return -ENODEV;
686 686
687 return register_power_pmu(&power5p_pmu); 687 return register_power_pmu(&power5p_pmu);
688 } 688 }
689 689
690 early_initcall(init_power5p_pmu); 690 early_initcall(init_power5p_pmu);
691 691
arch/powerpc/perf/power5-pmu.c
1 /* 1 /*
2 * Performance counter support for POWER5 (not POWER5++) processors. 2 * Performance counter support for POWER5 (not POWER5++) processors.
3 * 3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation. 4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 #include <linux/kernel.h> 11 #include <linux/kernel.h>
12 #include <linux/perf_event.h> 12 #include <linux/perf_event.h>
13 #include <linux/string.h> 13 #include <linux/string.h>
14 #include <asm/reg.h> 14 #include <asm/reg.h>
15 #include <asm/cputable.h> 15 #include <asm/cputable.h>
16 16
17 /* 17 /*
18 * Bits in event code for POWER5 (not POWER5++) 18 * Bits in event code for POWER5 (not POWER5++)
19 */ 19 */
20 #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ 20 #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
21 #define PM_PMC_MSK 0xf 21 #define PM_PMC_MSK 0xf
22 #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) 22 #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23 #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ 23 #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
24 #define PM_UNIT_MSK 0xf 24 #define PM_UNIT_MSK 0xf
25 #define PM_BYTE_SH 12 /* Byte number of event bus to use */ 25 #define PM_BYTE_SH 12 /* Byte number of event bus to use */
26 #define PM_BYTE_MSK 7 26 #define PM_BYTE_MSK 7
27 #define PM_GRS_SH 8 /* Storage subsystem mux select */ 27 #define PM_GRS_SH 8 /* Storage subsystem mux select */
28 #define PM_GRS_MSK 7 28 #define PM_GRS_MSK 7
29 #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ 29 #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
30 #define PM_PMCSEL_MSK 0x7f 30 #define PM_PMCSEL_MSK 0x7f
31 31
32 /* Values in PM_UNIT field */ 32 /* Values in PM_UNIT field */
33 #define PM_FPU 0 33 #define PM_FPU 0
34 #define PM_ISU0 1 34 #define PM_ISU0 1
35 #define PM_IFU 2 35 #define PM_IFU 2
36 #define PM_ISU1 3 36 #define PM_ISU1 3
37 #define PM_IDU 4 37 #define PM_IDU 4
38 #define PM_ISU0_ALT 6 38 #define PM_ISU0_ALT 6
39 #define PM_GRS 7 39 #define PM_GRS 7
40 #define PM_LSU0 8 40 #define PM_LSU0 8
41 #define PM_LSU1 0xc 41 #define PM_LSU1 0xc
42 #define PM_LASTUNIT 0xc 42 #define PM_LASTUNIT 0xc
43 43
44 /* 44 /*
45 * Bits in MMCR1 for POWER5 45 * Bits in MMCR1 for POWER5
46 */ 46 */
47 #define MMCR1_TTM0SEL_SH 62 47 #define MMCR1_TTM0SEL_SH 62
48 #define MMCR1_TTM1SEL_SH 60 48 #define MMCR1_TTM1SEL_SH 60
49 #define MMCR1_TTM2SEL_SH 58 49 #define MMCR1_TTM2SEL_SH 58
50 #define MMCR1_TTM3SEL_SH 56 50 #define MMCR1_TTM3SEL_SH 56
51 #define MMCR1_TTMSEL_MSK 3 51 #define MMCR1_TTMSEL_MSK 3
52 #define MMCR1_TD_CP_DBG0SEL_SH 54 52 #define MMCR1_TD_CP_DBG0SEL_SH 54
53 #define MMCR1_TD_CP_DBG1SEL_SH 52 53 #define MMCR1_TD_CP_DBG1SEL_SH 52
54 #define MMCR1_TD_CP_DBG2SEL_SH 50 54 #define MMCR1_TD_CP_DBG2SEL_SH 50
55 #define MMCR1_TD_CP_DBG3SEL_SH 48 55 #define MMCR1_TD_CP_DBG3SEL_SH 48
56 #define MMCR1_GRS_L2SEL_SH 46 56 #define MMCR1_GRS_L2SEL_SH 46
57 #define MMCR1_GRS_L2SEL_MSK 3 57 #define MMCR1_GRS_L2SEL_MSK 3
58 #define MMCR1_GRS_L3SEL_SH 44 58 #define MMCR1_GRS_L3SEL_SH 44
59 #define MMCR1_GRS_L3SEL_MSK 3 59 #define MMCR1_GRS_L3SEL_MSK 3
60 #define MMCR1_GRS_MCSEL_SH 41 60 #define MMCR1_GRS_MCSEL_SH 41
61 #define MMCR1_GRS_MCSEL_MSK 7 61 #define MMCR1_GRS_MCSEL_MSK 7
62 #define MMCR1_GRS_FABSEL_SH 39 62 #define MMCR1_GRS_FABSEL_SH 39
63 #define MMCR1_GRS_FABSEL_MSK 3 63 #define MMCR1_GRS_FABSEL_MSK 3
64 #define MMCR1_PMC1_ADDER_SEL_SH 35 64 #define MMCR1_PMC1_ADDER_SEL_SH 35
65 #define MMCR1_PMC2_ADDER_SEL_SH 34 65 #define MMCR1_PMC2_ADDER_SEL_SH 34
66 #define MMCR1_PMC3_ADDER_SEL_SH 33 66 #define MMCR1_PMC3_ADDER_SEL_SH 33
67 #define MMCR1_PMC4_ADDER_SEL_SH 32 67 #define MMCR1_PMC4_ADDER_SEL_SH 32
68 #define MMCR1_PMC1SEL_SH 25 68 #define MMCR1_PMC1SEL_SH 25
69 #define MMCR1_PMC2SEL_SH 17 69 #define MMCR1_PMC2SEL_SH 17
70 #define MMCR1_PMC3SEL_SH 9 70 #define MMCR1_PMC3SEL_SH 9
71 #define MMCR1_PMC4SEL_SH 1 71 #define MMCR1_PMC4SEL_SH 1
72 #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) 72 #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
73 #define MMCR1_PMCSEL_MSK 0x7f 73 #define MMCR1_PMCSEL_MSK 0x7f
74 74
75 /* 75 /*
76 * Layout of constraint bits: 76 * Layout of constraint bits:
77 * 6666555555555544444444443333333333222222222211111111110000000000 77 * 6666555555555544444444443333333333222222222211111111110000000000
78 * 3210987654321098765432109876543210987654321098765432109876543210 78 * 3210987654321098765432109876543210987654321098765432109876543210
79 * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> 79 * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
80 * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 80 * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
81 * 81 *
82 * T0 - TTM0 constraint 82 * T0 - TTM0 constraint
83 * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 83 * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
84 * 84 *
85 * T1 - TTM1 constraint 85 * T1 - TTM1 constraint
86 * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 86 * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
87 * 87 *
88 * NC - number of counters 88 * NC - number of counters
89 * 51: NC error 0x0008_0000_0000_0000 89 * 51: NC error 0x0008_0000_0000_0000
90 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 90 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
91 * 91 *
92 * G0..G3 - GRS mux constraints 92 * G0..G3 - GRS mux constraints
93 * 46-47: GRS_L2SEL value 93 * 46-47: GRS_L2SEL value
94 * 44-45: GRS_L3SEL value 94 * 44-45: GRS_L3SEL value
95 * 41-44: GRS_MCSEL value 95 * 41-44: GRS_MCSEL value
96 * 39-40: GRS_FABSEL value 96 * 39-40: GRS_FABSEL value
97 * Note that these match up with their bit positions in MMCR1 97 * Note that these match up with their bit positions in MMCR1
98 * 98 *
99 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS 99 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
100 * 37: UC3 error 0x20_0000_0000 100 * 37: UC3 error 0x20_0000_0000
101 * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 101 * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
102 * 35: ISU0 events needed 0x08_0000_0000 102 * 35: ISU0 events needed 0x08_0000_0000
103 * 34: IDU|GRS events needed 0x04_0000_0000 103 * 34: IDU|GRS events needed 0x04_0000_0000
104 * 104 *
105 * PS1 105 * PS1
106 * 33: PS1 error 0x2_0000_0000 106 * 33: PS1 error 0x2_0000_0000
107 * 31-32: count of events needing PMC1/2 0x1_8000_0000 107 * 31-32: count of events needing PMC1/2 0x1_8000_0000
108 * 108 *
109 * PS2 109 * PS2
110 * 30: PS2 error 0x4000_0000 110 * 30: PS2 error 0x4000_0000
111 * 28-29: count of events needing PMC3/4 0x3000_0000 111 * 28-29: count of events needing PMC3/4 0x3000_0000
112 * 112 *
113 * B0 113 * B0
114 * 24-27: Byte 0 event source 0x0f00_0000 114 * 24-27: Byte 0 event source 0x0f00_0000
115 * Encoding as for the event code 115 * Encoding as for the event code
116 * 116 *
117 * B1, B2, B3 117 * B1, B2, B3
118 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources 118 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
119 * 119 *
120 * P1..P6 120 * P1..P6
121 * 0-11: Count of events needing PMC1..PMC6 121 * 0-11: Count of events needing PMC1..PMC6
122 */ 122 */
123 123
124 static const int grsel_shift[8] = { 124 static const int grsel_shift[8] = {
125 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, 125 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
126 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, 126 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
127 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH 127 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
128 }; 128 };
129 129
130 /* Masks and values for using events from the various units */ 130 /* Masks and values for using events from the various units */
131 static unsigned long unit_cons[PM_LASTUNIT+1][2] = { 131 static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
132 [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul }, 132 [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul },
133 [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul }, 133 [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul },
134 [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul }, 134 [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul },
135 [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul }, 135 [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul },
136 [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul }, 136 [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul },
137 [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul }, 137 [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul },
138 }; 138 };
139 139
140 static int power5_get_constraint(u64 event, unsigned long *maskp, 140 static int power5_get_constraint(u64 event, unsigned long *maskp,
141 unsigned long *valp) 141 unsigned long *valp)
142 { 142 {
143 int pmc, byte, unit, sh; 143 int pmc, byte, unit, sh;
144 int bit, fmask; 144 int bit, fmask;
145 unsigned long mask = 0, value = 0; 145 unsigned long mask = 0, value = 0;
146 int grp = -1; 146 int grp = -1;
147 147
148 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 148 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
149 if (pmc) { 149 if (pmc) {
150 if (pmc > 6) 150 if (pmc > 6)
151 return -1; 151 return -1;
152 sh = (pmc - 1) * 2; 152 sh = (pmc - 1) * 2;
153 mask |= 2 << sh; 153 mask |= 2 << sh;
154 value |= 1 << sh; 154 value |= 1 << sh;
155 if (pmc <= 4) 155 if (pmc <= 4)
156 grp = (pmc - 1) >> 1; 156 grp = (pmc - 1) >> 1;
157 else if (event != 0x500009 && event != 0x600005) 157 else if (event != 0x500009 && event != 0x600005)
158 return -1; 158 return -1;
159 } 159 }
160 if (event & PM_BUSEVENT_MSK) { 160 if (event & PM_BUSEVENT_MSK) {
161 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; 161 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
162 if (unit > PM_LASTUNIT) 162 if (unit > PM_LASTUNIT)
163 return -1; 163 return -1;
164 if (unit == PM_ISU0_ALT) 164 if (unit == PM_ISU0_ALT)
165 unit = PM_ISU0; 165 unit = PM_ISU0;
166 mask |= unit_cons[unit][0]; 166 mask |= unit_cons[unit][0];
167 value |= unit_cons[unit][1]; 167 value |= unit_cons[unit][1];
168 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; 168 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
169 if (byte >= 4) { 169 if (byte >= 4) {
170 if (unit != PM_LSU1) 170 if (unit != PM_LSU1)
171 return -1; 171 return -1;
172 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ 172 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
173 ++unit; 173 ++unit;
174 byte &= 3; 174 byte &= 3;
175 } 175 }
176 if (unit == PM_GRS) { 176 if (unit == PM_GRS) {
177 bit = event & 7; 177 bit = event & 7;
178 fmask = (bit == 6)? 7: 3; 178 fmask = (bit == 6)? 7: 3;
179 sh = grsel_shift[bit]; 179 sh = grsel_shift[bit];
180 mask |= (unsigned long)fmask << sh; 180 mask |= (unsigned long)fmask << sh;
181 value |= (unsigned long)((event >> PM_GRS_SH) & fmask) 181 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
182 << sh; 182 << sh;
183 } 183 }
184 /* 184 /*
185 * Bus events on bytes 0 and 2 can be counted 185 * Bus events on bytes 0 and 2 can be counted
186 * on PMC1/2; bytes 1 and 3 on PMC3/4. 186 * on PMC1/2; bytes 1 and 3 on PMC3/4.
187 */ 187 */
188 if (!pmc) 188 if (!pmc)
189 grp = byte & 1; 189 grp = byte & 1;
190 /* Set byte lane select field */ 190 /* Set byte lane select field */
191 mask |= 0xfUL << (24 - 4 * byte); 191 mask |= 0xfUL << (24 - 4 * byte);
192 value |= (unsigned long)unit << (24 - 4 * byte); 192 value |= (unsigned long)unit << (24 - 4 * byte);
193 } 193 }
194 if (grp == 0) { 194 if (grp == 0) {
195 /* increment PMC1/2 field */ 195 /* increment PMC1/2 field */
196 mask |= 0x200000000ul; 196 mask |= 0x200000000ul;
197 value |= 0x080000000ul; 197 value |= 0x080000000ul;
198 } else if (grp == 1) { 198 } else if (grp == 1) {
199 /* increment PMC3/4 field */ 199 /* increment PMC3/4 field */
200 mask |= 0x40000000ul; 200 mask |= 0x40000000ul;
201 value |= 0x10000000ul; 201 value |= 0x10000000ul;
202 } 202 }
203 if (pmc < 5) { 203 if (pmc < 5) {
204 /* need a counter from PMC1-4 set */ 204 /* need a counter from PMC1-4 set */
205 mask |= 0x8000000000000ul; 205 mask |= 0x8000000000000ul;
206 value |= 0x1000000000000ul; 206 value |= 0x1000000000000ul;
207 } 207 }
208 *maskp = mask; 208 *maskp = mask;
209 *valp = value; 209 *valp = value;
210 return 0; 210 return 0;
211 } 211 }
212 212
213 #define MAX_ALT 3 /* at most 3 alternatives for any event */ 213 #define MAX_ALT 3 /* at most 3 alternatives for any event */
214 214
215 static const unsigned int event_alternatives[][MAX_ALT] = { 215 static const unsigned int event_alternatives[][MAX_ALT] = {
216 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ 216 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
217 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ 217 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
218 { 0x100005, 0x600005 }, /* PM_RUN_CYC */ 218 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
219 { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ 219 { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
220 { 0x300009, 0x400009 }, /* PM_INST_DISP */ 220 { 0x300009, 0x400009 }, /* PM_INST_DISP */
221 }; 221 };
222 222
223 /* 223 /*
224 * Scan the alternatives table for a match and return the 224 * Scan the alternatives table for a match and return the
225 * index into the alternatives table if found, else -1. 225 * index into the alternatives table if found, else -1.
226 */ 226 */
227 static int find_alternative(u64 event) 227 static int find_alternative(u64 event)
228 { 228 {
229 int i, j; 229 int i, j;
230 230
231 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { 231 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
232 if (event < event_alternatives[i][0]) 232 if (event < event_alternatives[i][0])
233 break; 233 break;
234 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) 234 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
235 if (event == event_alternatives[i][j]) 235 if (event == event_alternatives[i][j])
236 return i; 236 return i;
237 } 237 }
238 return -1; 238 return -1;
239 } 239 }
240 240
241 static const unsigned char bytedecode_alternatives[4][4] = { 241 static const unsigned char bytedecode_alternatives[4][4] = {
242 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, 242 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
243 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, 243 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
244 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, 244 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
245 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } 245 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
246 }; 246 };
247 247
248 /* 248 /*
249 * Some direct events for decodes of event bus byte 3 have alternative 249 * Some direct events for decodes of event bus byte 3 have alternative
250 * PMCSEL values on other counters. This returns the alternative 250 * PMCSEL values on other counters. This returns the alternative
251 * event code for those that do, or -1 otherwise. 251 * event code for those that do, or -1 otherwise.
252 */ 252 */
253 static s64 find_alternative_bdecode(u64 event) 253 static s64 find_alternative_bdecode(u64 event)
254 { 254 {
255 int pmc, altpmc, pp, j; 255 int pmc, altpmc, pp, j;
256 256
257 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 257 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
258 if (pmc == 0 || pmc > 4) 258 if (pmc == 0 || pmc > 4)
259 return -1; 259 return -1;
260 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ 260 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
261 pp = event & PM_PMCSEL_MSK; 261 pp = event & PM_PMCSEL_MSK;
262 for (j = 0; j < 4; ++j) { 262 for (j = 0; j < 4; ++j) {
263 if (bytedecode_alternatives[pmc - 1][j] == pp) { 263 if (bytedecode_alternatives[pmc - 1][j] == pp) {
264 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | 264 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
265 (altpmc << PM_PMC_SH) | 265 (altpmc << PM_PMC_SH) |
266 bytedecode_alternatives[altpmc - 1][j]; 266 bytedecode_alternatives[altpmc - 1][j];
267 } 267 }
268 } 268 }
269 return -1; 269 return -1;
270 } 270 }
271 271
272 static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[]) 272 static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
273 { 273 {
274 int i, j, nalt = 1; 274 int i, j, nalt = 1;
275 s64 ae; 275 s64 ae;
276 276
277 alt[0] = event; 277 alt[0] = event;
278 nalt = 1; 278 nalt = 1;
279 i = find_alternative(event); 279 i = find_alternative(event);
280 if (i >= 0) { 280 if (i >= 0) {
281 for (j = 0; j < MAX_ALT; ++j) { 281 for (j = 0; j < MAX_ALT; ++j) {
282 ae = event_alternatives[i][j]; 282 ae = event_alternatives[i][j];
283 if (ae && ae != event) 283 if (ae && ae != event)
284 alt[nalt++] = ae; 284 alt[nalt++] = ae;
285 } 285 }
286 } else { 286 } else {
287 ae = find_alternative_bdecode(event); 287 ae = find_alternative_bdecode(event);
288 if (ae > 0) 288 if (ae > 0)
289 alt[nalt++] = ae; 289 alt[nalt++] = ae;
290 } 290 }
291 return nalt; 291 return nalt;
292 } 292 }
293 293
294 /* 294 /*
295 * Map of which direct events on which PMCs are marked instruction events. 295 * Map of which direct events on which PMCs are marked instruction events.
296 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. 296 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
297 * Bit 0 is set if it is marked for all PMCs. 297 * Bit 0 is set if it is marked for all PMCs.
298 * The 0x80 bit indicates a byte decode PMCSEL value. 298 * The 0x80 bit indicates a byte decode PMCSEL value.
299 */ 299 */
300 static unsigned char direct_event_is_marked[0x28] = { 300 static unsigned char direct_event_is_marked[0x28] = {
301 0, /* 00 */ 301 0, /* 00 */
302 0x1f, /* 01 PM_IOPS_CMPL */ 302 0x1f, /* 01 PM_IOPS_CMPL */
303 0x2, /* 02 PM_MRK_GRP_DISP */ 303 0x2, /* 02 PM_MRK_GRP_DISP */
304 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ 304 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
305 0, /* 04 */ 305 0, /* 04 */
306 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ 306 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
307 0x80, /* 06 */ 307 0x80, /* 06 */
308 0x80, /* 07 */ 308 0x80, /* 07 */
309 0, 0, 0,/* 08 - 0a */ 309 0, 0, 0,/* 08 - 0a */
310 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ 310 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
311 0, /* 0c */ 311 0, /* 0c */
312 0x80, /* 0d */ 312 0x80, /* 0d */
313 0x80, /* 0e */ 313 0x80, /* 0e */
314 0, /* 0f */ 314 0, /* 0f */
315 0, /* 10 */ 315 0, /* 10 */
316 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ 316 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
317 0, /* 12 */ 317 0, /* 12 */
318 0x10, /* 13 PM_MRK_GRP_CMPL */ 318 0x10, /* 13 PM_MRK_GRP_CMPL */
319 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ 319 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
320 0x2, /* 15 PM_MRK_GRP_ISSUED */ 320 0x2, /* 15 PM_MRK_GRP_ISSUED */
321 0x80, /* 16 */ 321 0x80, /* 16 */
322 0x80, /* 17 */ 322 0x80, /* 17 */
323 0, 0, 0, 0, 0, 323 0, 0, 0, 0, 0,
324 0x80, /* 1d */ 324 0x80, /* 1d */
325 0x80, /* 1e */ 325 0x80, /* 1e */
326 0, /* 1f */ 326 0, /* 1f */
327 0x80, /* 20 */ 327 0x80, /* 20 */
328 0x80, /* 21 */ 328 0x80, /* 21 */
329 0x80, /* 22 */ 329 0x80, /* 22 */
330 0x80, /* 23 */ 330 0x80, /* 23 */
331 0x80, /* 24 */ 331 0x80, /* 24 */
332 0x80, /* 25 */ 332 0x80, /* 25 */
333 0x80, /* 26 */ 333 0x80, /* 26 */
334 0x80, /* 27 */ 334 0x80, /* 27 */
335 }; 335 };
336 336
337 /* 337 /*
338 * Returns 1 if event counts things relating to marked instructions 338 * Returns 1 if event counts things relating to marked instructions
339 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. 339 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
340 */ 340 */
341 static int power5_marked_instr_event(u64 event) 341 static int power5_marked_instr_event(u64 event)
342 { 342 {
343 int pmc, psel; 343 int pmc, psel;
344 int bit, byte, unit; 344 int bit, byte, unit;
345 u32 mask; 345 u32 mask;
346 346
347 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 347 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
348 psel = event & PM_PMCSEL_MSK; 348 psel = event & PM_PMCSEL_MSK;
349 if (pmc >= 5) 349 if (pmc >= 5)
350 return 0; 350 return 0;
351 351
352 bit = -1; 352 bit = -1;
353 if (psel < sizeof(direct_event_is_marked)) { 353 if (psel < sizeof(direct_event_is_marked)) {
354 if (direct_event_is_marked[psel] & (1 << pmc)) 354 if (direct_event_is_marked[psel] & (1 << pmc))
355 return 1; 355 return 1;
356 if (direct_event_is_marked[psel] & 0x80) 356 if (direct_event_is_marked[psel] & 0x80)
357 bit = 4; 357 bit = 4;
358 else if (psel == 0x08) 358 else if (psel == 0x08)
359 bit = pmc - 1; 359 bit = pmc - 1;
360 else if (psel == 0x10) 360 else if (psel == 0x10)
361 bit = 4 - pmc; 361 bit = 4 - pmc;
362 else if (psel == 0x1b && (pmc == 1 || pmc == 3)) 362 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
363 bit = 4; 363 bit = 4;
364 } else if ((psel & 0x58) == 0x40) 364 } else if ((psel & 0x58) == 0x40)
365 bit = psel & 7; 365 bit = psel & 7;
366 366
367 if (!(event & PM_BUSEVENT_MSK)) 367 if (!(event & PM_BUSEVENT_MSK))
368 return 0; 368 return 0;
369 369
370 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; 370 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
371 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; 371 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
372 if (unit == PM_LSU0) { 372 if (unit == PM_LSU0) {
373 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ 373 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
374 mask = 0x5dff00; 374 mask = 0x5dff00;
375 } else if (unit == PM_LSU1 && byte >= 4) { 375 } else if (unit == PM_LSU1 && byte >= 4) {
376 byte -= 4; 376 byte -= 4;
377 /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ 377 /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
378 mask = 0x5f00c0aa; 378 mask = 0x5f00c0aa;
379 } else 379 } else
380 return 0; 380 return 0;
381 381
382 return (mask >> (byte * 8 + bit)) & 1; 382 return (mask >> (byte * 8 + bit)) & 1;
383 } 383 }
384 384
385 static int power5_compute_mmcr(u64 event[], int n_ev, 385 static int power5_compute_mmcr(u64 event[], int n_ev,
386 unsigned int hwc[], unsigned long mmcr[]) 386 unsigned int hwc[], unsigned long mmcr[])
387 { 387 {
388 unsigned long mmcr1 = 0; 388 unsigned long mmcr1 = 0;
389 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; 389 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
390 unsigned int pmc, unit, byte, psel; 390 unsigned int pmc, unit, byte, psel;
391 unsigned int ttm, grp; 391 unsigned int ttm, grp;
392 int i, isbus, bit, grsel; 392 int i, isbus, bit, grsel;
393 unsigned int pmc_inuse = 0; 393 unsigned int pmc_inuse = 0;
394 unsigned int pmc_grp_use[2]; 394 unsigned int pmc_grp_use[2];
395 unsigned char busbyte[4]; 395 unsigned char busbyte[4];
396 unsigned char unituse[16]; 396 unsigned char unituse[16];
397 int ttmuse; 397 int ttmuse;
398 398
399 if (n_ev > 6) 399 if (n_ev > 6)
400 return -1; 400 return -1;
401 401
402 /* First pass to count resource use */ 402 /* First pass to count resource use */
403 pmc_grp_use[0] = pmc_grp_use[1] = 0; 403 pmc_grp_use[0] = pmc_grp_use[1] = 0;
404 memset(busbyte, 0, sizeof(busbyte)); 404 memset(busbyte, 0, sizeof(busbyte));
405 memset(unituse, 0, sizeof(unituse)); 405 memset(unituse, 0, sizeof(unituse));
406 for (i = 0; i < n_ev; ++i) { 406 for (i = 0; i < n_ev; ++i) {
407 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 407 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
408 if (pmc) { 408 if (pmc) {
409 if (pmc > 6) 409 if (pmc > 6)
410 return -1; 410 return -1;
411 if (pmc_inuse & (1 << (pmc - 1))) 411 if (pmc_inuse & (1 << (pmc - 1)))
412 return -1; 412 return -1;
413 pmc_inuse |= 1 << (pmc - 1); 413 pmc_inuse |= 1 << (pmc - 1);
414 /* count 1/2 vs 3/4 use */ 414 /* count 1/2 vs 3/4 use */
415 if (pmc <= 4) 415 if (pmc <= 4)
416 ++pmc_grp_use[(pmc - 1) >> 1]; 416 ++pmc_grp_use[(pmc - 1) >> 1];
417 } 417 }
418 if (event[i] & PM_BUSEVENT_MSK) { 418 if (event[i] & PM_BUSEVENT_MSK) {
419 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; 419 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
420 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; 420 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
421 if (unit > PM_LASTUNIT) 421 if (unit > PM_LASTUNIT)
422 return -1; 422 return -1;
423 if (unit == PM_ISU0_ALT) 423 if (unit == PM_ISU0_ALT)
424 unit = PM_ISU0; 424 unit = PM_ISU0;
425 if (byte >= 4) { 425 if (byte >= 4) {
426 if (unit != PM_LSU1) 426 if (unit != PM_LSU1)
427 return -1; 427 return -1;
428 ++unit; 428 ++unit;
429 byte &= 3; 429 byte &= 3;
430 } 430 }
431 if (!pmc) 431 if (!pmc)
432 ++pmc_grp_use[byte & 1]; 432 ++pmc_grp_use[byte & 1];
433 if (busbyte[byte] && busbyte[byte] != unit) 433 if (busbyte[byte] && busbyte[byte] != unit)
434 return -1; 434 return -1;
435 busbyte[byte] = unit; 435 busbyte[byte] = unit;
436 unituse[unit] = 1; 436 unituse[unit] = 1;
437 } 437 }
438 } 438 }
439 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) 439 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
440 return -1; 440 return -1;
441 441
442 /* 442 /*
443 * Assign resources and set multiplexer selects. 443 * Assign resources and set multiplexer selects.
444 * 444 *
445 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only 445 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
446 * choice we have to deal with. 446 * choice we have to deal with.
447 */ 447 */
448 if (unituse[PM_ISU0] & 448 if (unituse[PM_ISU0] &
449 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { 449 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
450 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ 450 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
451 unituse[PM_ISU0] = 0; 451 unituse[PM_ISU0] = 0;
452 } 452 }
453 /* Set TTM[01]SEL fields. */ 453 /* Set TTM[01]SEL fields. */
454 ttmuse = 0; 454 ttmuse = 0;
455 for (i = PM_FPU; i <= PM_ISU1; ++i) { 455 for (i = PM_FPU; i <= PM_ISU1; ++i) {
456 if (!unituse[i]) 456 if (!unituse[i])
457 continue; 457 continue;
458 if (ttmuse++) 458 if (ttmuse++)
459 return -1; 459 return -1;
460 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; 460 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
461 } 461 }
462 ttmuse = 0; 462 ttmuse = 0;
463 for (; i <= PM_GRS; ++i) { 463 for (; i <= PM_GRS; ++i) {
464 if (!unituse[i]) 464 if (!unituse[i])
465 continue; 465 continue;
466 if (ttmuse++) 466 if (ttmuse++)
467 return -1; 467 return -1;
468 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; 468 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
469 } 469 }
470 if (ttmuse > 1) 470 if (ttmuse > 1)
471 return -1; 471 return -1;
472 472
473 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ 473 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
474 for (byte = 0; byte < 4; ++byte) { 474 for (byte = 0; byte < 4; ++byte) {
475 unit = busbyte[byte]; 475 unit = busbyte[byte];
476 if (!unit) 476 if (!unit)
477 continue; 477 continue;
478 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { 478 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
479 /* get ISU0 through TTM1 rather than TTM0 */ 479 /* get ISU0 through TTM1 rather than TTM0 */
480 unit = PM_ISU0_ALT; 480 unit = PM_ISU0_ALT;
481 } else if (unit == PM_LSU1 + 1) { 481 } else if (unit == PM_LSU1 + 1) {
482 /* select lower word of LSU1 for this byte */ 482 /* select lower word of LSU1 for this byte */
483 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); 483 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
484 } 484 }
485 ttm = unit >> 2; 485 ttm = unit >> 2;
486 mmcr1 |= (unsigned long)ttm 486 mmcr1 |= (unsigned long)ttm
487 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); 487 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
488 } 488 }
489 489
490 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ 490 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
491 for (i = 0; i < n_ev; ++i) { 491 for (i = 0; i < n_ev; ++i) {
492 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 492 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
493 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; 493 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
494 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; 494 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
495 psel = event[i] & PM_PMCSEL_MSK; 495 psel = event[i] & PM_PMCSEL_MSK;
496 isbus = event[i] & PM_BUSEVENT_MSK; 496 isbus = event[i] & PM_BUSEVENT_MSK;
497 if (!pmc) { 497 if (!pmc) {
498 /* Bus event or any-PMC direct event */ 498 /* Bus event or any-PMC direct event */
499 for (pmc = 0; pmc < 4; ++pmc) { 499 for (pmc = 0; pmc < 4; ++pmc) {
500 if (pmc_inuse & (1 << pmc)) 500 if (pmc_inuse & (1 << pmc))
501 continue; 501 continue;
502 grp = (pmc >> 1) & 1; 502 grp = (pmc >> 1) & 1;
503 if (isbus) { 503 if (isbus) {
504 if (grp == (byte & 1)) 504 if (grp == (byte & 1))
505 break; 505 break;
506 } else if (pmc_grp_use[grp] < 2) { 506 } else if (pmc_grp_use[grp] < 2) {
507 ++pmc_grp_use[grp]; 507 ++pmc_grp_use[grp];
508 break; 508 break;
509 } 509 }
510 } 510 }
511 pmc_inuse |= 1 << pmc; 511 pmc_inuse |= 1 << pmc;
512 } else if (pmc <= 4) { 512 } else if (pmc <= 4) {
513 /* Direct event */ 513 /* Direct event */
514 --pmc; 514 --pmc;
515 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) 515 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
516 /* add events on higher-numbered bus */ 516 /* add events on higher-numbered bus */
517 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); 517 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
518 } else { 518 } else {
519 /* Instructions or run cycles on PMC5/6 */ 519 /* Instructions or run cycles on PMC5/6 */
520 --pmc; 520 --pmc;
521 } 521 }
522 if (isbus && unit == PM_GRS) { 522 if (isbus && unit == PM_GRS) {
523 bit = psel & 7; 523 bit = psel & 7;
524 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; 524 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
525 mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; 525 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
526 } 526 }
527 if (power5_marked_instr_event(event[i])) 527 if (power5_marked_instr_event(event[i]))
528 mmcra |= MMCRA_SAMPLE_ENABLE; 528 mmcra |= MMCRA_SAMPLE_ENABLE;
529 if (pmc <= 3) 529 if (pmc <= 3)
530 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); 530 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
531 hwc[i] = pmc; 531 hwc[i] = pmc;
532 } 532 }
533 533
534 /* Return MMCRx values */ 534 /* Return MMCRx values */
535 mmcr[0] = 0; 535 mmcr[0] = 0;
536 if (pmc_inuse & 1) 536 if (pmc_inuse & 1)
537 mmcr[0] = MMCR0_PMC1CE; 537 mmcr[0] = MMCR0_PMC1CE;
538 if (pmc_inuse & 0x3e) 538 if (pmc_inuse & 0x3e)
539 mmcr[0] |= MMCR0_PMCjCE; 539 mmcr[0] |= MMCR0_PMCjCE;
540 mmcr[1] = mmcr1; 540 mmcr[1] = mmcr1;
541 mmcr[2] = mmcra; 541 mmcr[2] = mmcra;
542 return 0; 542 return 0;
543 } 543 }
544 544
545 static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) 545 static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[])
546 { 546 {
547 if (pmc <= 3) 547 if (pmc <= 3)
548 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); 548 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
549 } 549 }
550 550
551 static int power5_generic_events[] = { 551 static int power5_generic_events[] = {
552 [PERF_COUNT_HW_CPU_CYCLES] = 0xf, 552 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
553 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, 553 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
554 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ 554 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
555 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ 555 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
556 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ 556 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
557 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ 557 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
558 }; 558 };
559 559
560 #define C(x) PERF_COUNT_HW_CACHE_##x 560 #define C(x) PERF_COUNT_HW_CACHE_##x
561 561
562 /* 562 /*
563 * Table of generalized cache-related events. 563 * Table of generalized cache-related events.
564 * 0 means not supported, -1 means nonsensical, other values 564 * 0 means not supported, -1 means nonsensical, other values
565 * are event codes. 565 * are event codes.
566 */ 566 */
567 static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { 567 static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
568 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ 568 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
569 [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, 569 [C(OP_READ)] = { 0x4c1090, 0x3c1088 },
570 [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, 570 [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
571 [C(OP_PREFETCH)] = { 0xc70e7, 0 }, 571 [C(OP_PREFETCH)] = { 0xc70e7, 0 },
572 }, 572 },
573 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ 573 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
574 [C(OP_READ)] = { 0, 0 }, 574 [C(OP_READ)] = { 0, 0 },
575 [C(OP_WRITE)] = { -1, -1 }, 575 [C(OP_WRITE)] = { -1, -1 },
576 [C(OP_PREFETCH)] = { 0, 0 }, 576 [C(OP_PREFETCH)] = { 0, 0 },
577 }, 577 },
578 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ 578 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
579 [C(OP_READ)] = { 0, 0x3c309b }, 579 [C(OP_READ)] = { 0, 0x3c309b },
580 [C(OP_WRITE)] = { 0, 0 }, 580 [C(OP_WRITE)] = { 0, 0 },
581 [C(OP_PREFETCH)] = { 0xc50c3, 0 }, 581 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
582 }, 582 },
583 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ 583 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
584 [C(OP_READ)] = { 0x2c4090, 0x800c4 }, 584 [C(OP_READ)] = { 0x2c4090, 0x800c4 },
585 [C(OP_WRITE)] = { -1, -1 }, 585 [C(OP_WRITE)] = { -1, -1 },
586 [C(OP_PREFETCH)] = { -1, -1 }, 586 [C(OP_PREFETCH)] = { -1, -1 },
587 }, 587 },
588 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ 588 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
589 [C(OP_READ)] = { 0, 0x800c0 }, 589 [C(OP_READ)] = { 0, 0x800c0 },
590 [C(OP_WRITE)] = { -1, -1 }, 590 [C(OP_WRITE)] = { -1, -1 },
591 [C(OP_PREFETCH)] = { -1, -1 }, 591 [C(OP_PREFETCH)] = { -1, -1 },
592 }, 592 },
593 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ 593 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
594 [C(OP_READ)] = { 0x230e4, 0x230e5 }, 594 [C(OP_READ)] = { 0x230e4, 0x230e5 },
595 [C(OP_WRITE)] = { -1, -1 }, 595 [C(OP_WRITE)] = { -1, -1 },
596 [C(OP_PREFETCH)] = { -1, -1 }, 596 [C(OP_PREFETCH)] = { -1, -1 },
597 }, 597 },
598 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ 598 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
599 [C(OP_READ)] = { -1, -1 }, 599 [C(OP_READ)] = { -1, -1 },
600 [C(OP_WRITE)] = { -1, -1 }, 600 [C(OP_WRITE)] = { -1, -1 },
601 [C(OP_PREFETCH)] = { -1, -1 }, 601 [C(OP_PREFETCH)] = { -1, -1 },
602 }, 602 },
603 }; 603 };
604 604
605 static struct power_pmu power5_pmu = { 605 static struct power_pmu power5_pmu = {
606 .name = "POWER5", 606 .name = "POWER5",
607 .n_counter = 6, 607 .n_counter = 6,
608 .max_alternatives = MAX_ALT, 608 .max_alternatives = MAX_ALT,
609 .add_fields = 0x7000090000555ul, 609 .add_fields = 0x7000090000555ul,
610 .test_adder = 0x3000490000000ul, 610 .test_adder = 0x3000490000000ul,
611 .compute_mmcr = power5_compute_mmcr, 611 .compute_mmcr = power5_compute_mmcr,
612 .get_constraint = power5_get_constraint, 612 .get_constraint = power5_get_constraint,
613 .get_alternatives = power5_get_alternatives, 613 .get_alternatives = power5_get_alternatives,
614 .disable_pmc = power5_disable_pmc, 614 .disable_pmc = power5_disable_pmc,
615 .n_generic = ARRAY_SIZE(power5_generic_events), 615 .n_generic = ARRAY_SIZE(power5_generic_events),
616 .generic_events = power5_generic_events, 616 .generic_events = power5_generic_events,
617 .cache_events = &power5_cache_events, 617 .cache_events = &power5_cache_events,
618 .flags = PPMU_HAS_SSLOT,
618 }; 619 };
619 620
620 static int __init init_power5_pmu(void) 621 static int __init init_power5_pmu(void)
621 { 622 {
622 if (!cur_cpu_spec->oprofile_cpu_type || 623 if (!cur_cpu_spec->oprofile_cpu_type ||
623 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) 624 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
624 return -ENODEV; 625 return -ENODEV;
625 626
626 return register_power_pmu(&power5_pmu); 627 return register_power_pmu(&power5_pmu);
627 } 628 }
628 629
629 early_initcall(init_power5_pmu); 630 early_initcall(init_power5_pmu);
630 631