Commit 5a289395bf753f8a318d3a5fa335a757c16c0183
Exists in
master
and in
7 other branches
Merge branch 'oprofile/x86-oprofile-for-tip' into oprofile/oprofile-for-tip
Conflicts: arch/x86/oprofile/op_model_ppro.c
Showing 3 changed files Side-by-side Diff
arch/x86/oprofile/nmi_int.c
... | ... | @@ -415,9 +415,6 @@ |
415 | 415 | case 15: case 23: |
416 | 416 | *cpu_type = "i386/core_2"; |
417 | 417 | break; |
418 | - case 26: | |
419 | - *cpu_type = "i386/core_2"; | |
420 | - break; | |
421 | 418 | default: |
422 | 419 | /* Unknown */ |
423 | 420 | return 0; |
... | ... | @@ -427,6 +424,16 @@ |
427 | 424 | return 1; |
428 | 425 | } |
429 | 426 | |
427 | +static int __init arch_perfmon_init(char **cpu_type) | |
428 | +{ | |
429 | + if (!cpu_has_arch_perfmon) | |
430 | + return 0; | |
431 | + *cpu_type = "i386/arch_perfmon"; | |
432 | + model = &op_arch_perfmon_spec; | |
433 | + arch_perfmon_setup_counters(); | |
434 | + return 1; | |
435 | +} | |
436 | + | |
430 | 437 | /* in order to get sysfs right */ |
431 | 438 | static int using_nmi; |
432 | 439 | |
... | ... | @@ -434,7 +441,7 @@ |
434 | 441 | { |
435 | 442 | __u8 vendor = boot_cpu_data.x86_vendor; |
436 | 443 | __u8 family = boot_cpu_data.x86; |
437 | - char *cpu_type; | |
444 | + char *cpu_type = NULL; | |
438 | 445 | int ret = 0; |
439 | 446 | |
440 | 447 | if (!cpu_has_apic) |
441 | 448 | |
442 | 449 | |
443 | 450 | |
... | ... | @@ -472,19 +479,20 @@ |
472 | 479 | switch (family) { |
473 | 480 | /* Pentium IV */ |
474 | 481 | case 0xf: |
475 | - if (!p4_init(&cpu_type)) | |
476 | - return -ENODEV; | |
482 | + p4_init(&cpu_type); | |
477 | 483 | break; |
478 | 484 | |
479 | 485 | /* A P6-class processor */ |
480 | 486 | case 6: |
481 | - if (!ppro_init(&cpu_type)) | |
482 | - return -ENODEV; | |
487 | + ppro_init(&cpu_type); | |
483 | 488 | break; |
484 | 489 | |
485 | 490 | default: |
486 | - return -ENODEV; | |
491 | + break; | |
487 | 492 | } |
493 | + | |
494 | + if (!cpu_type && !arch_perfmon_init(&cpu_type)) | |
495 | + return -ENODEV; | |
488 | 496 | break; |
489 | 497 | |
490 | 498 | default: |
arch/x86/oprofile/op_model_ppro.c
1 | 1 | /* |
2 | 2 | * @file op_model_ppro.h |
3 | - * pentium pro / P6 model-specific MSR operations | |
3 | + * Family 6 perfmon and architectural perfmon MSR operations | |
4 | 4 | * |
5 | 5 | * @remark Copyright 2002 OProfile authors |
6 | + * @remark Copyright 2008 Intel Corporation | |
6 | 7 | * @remark Read the file COPYING |
7 | 8 | * |
8 | 9 | * @author John Levon |
9 | 10 | * @author Philippe Elie |
10 | 11 | * @author Graydon Hoare |
12 | + * @author Andi Kleen | |
11 | 13 | */ |
12 | 14 | |
13 | 15 | #include <linux/oprofile.h> |
16 | +#include <linux/slab.h> | |
14 | 17 | #include <asm/ptrace.h> |
15 | 18 | #include <asm/msr.h> |
16 | 19 | #include <asm/apic.h> |
17 | 20 | #include <asm/nmi.h> |
21 | +#include <asm/intel_arch_perfmon.h> | |
18 | 22 | |
19 | 23 | #include "op_x86_model.h" |
20 | 24 | #include "op_counter.h" |
21 | 25 | |
22 | -#define NUM_COUNTERS 2 | |
23 | -#define NUM_CONTROLS 2 | |
26 | +static int num_counters = 2; | |
27 | +static int counter_width = 32; | |
24 | 28 | |
25 | 29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) |
26 | 30 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) |
27 | -#define CTR_32BIT_WRITE(l, msrs, c) \ | |
28 | - do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0) | |
29 | -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | |
31 | +#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) | |
30 | 32 | |
31 | 33 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) |
32 | 34 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) |
33 | 35 | |
34 | 36 | |
... | ... | @@ -40,20 +42,20 @@ |
40 | 42 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) |
41 | 43 | #define CTRL_SET_EVENT(val, e) (val |= e) |
42 | 44 | |
43 | -static unsigned long reset_value[NUM_COUNTERS]; | |
45 | +static u64 *reset_value; | |
44 | 46 | |
45 | 47 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) |
46 | 48 | { |
47 | 49 | int i; |
48 | 50 | |
49 | - for (i = 0; i < NUM_COUNTERS; i++) { | |
51 | + for (i = 0; i < num_counters; i++) { | |
50 | 52 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
51 | 53 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; |
52 | 54 | else |
53 | 55 | msrs->counters[i].addr = 0; |
54 | 56 | } |
55 | 57 | |
56 | - for (i = 0; i < NUM_CONTROLS; i++) { | |
58 | + for (i = 0; i < num_counters; i++) { | |
57 | 59 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
58 | 60 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; |
59 | 61 | else |
60 | 62 | |
... | ... | @@ -67,8 +69,22 @@ |
67 | 69 | unsigned int low, high; |
68 | 70 | int i; |
69 | 71 | |
72 | + if (!reset_value) { | |
73 | + reset_value = kmalloc(sizeof(unsigned) * num_counters, | |
74 | + GFP_ATOMIC); | |
75 | + if (!reset_value) | |
76 | + return; | |
77 | + } | |
78 | + | |
79 | + if (cpu_has_arch_perfmon) { | |
80 | + union cpuid10_eax eax; | |
81 | + eax.full = cpuid_eax(0xa); | |
82 | + if (counter_width < eax.split.bit_width) | |
83 | + counter_width = eax.split.bit_width; | |
84 | + } | |
85 | + | |
70 | 86 | /* clear all counters */ |
71 | - for (i = 0 ; i < NUM_CONTROLS; ++i) { | |
87 | + for (i = 0 ; i < num_counters; ++i) { | |
72 | 88 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) |
73 | 89 | continue; |
74 | 90 | CTRL_READ(low, high, msrs, i); |
75 | 91 | |
76 | 92 | |
77 | 93 | |
... | ... | @@ -77,18 +93,18 @@ |
77 | 93 | } |
78 | 94 | |
79 | 95 | /* avoid a false detection of ctr overflows in NMI handler */ |
80 | - for (i = 0; i < NUM_COUNTERS; ++i) { | |
96 | + for (i = 0; i < num_counters; ++i) { | |
81 | 97 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) |
82 | 98 | continue; |
83 | - CTR_32BIT_WRITE(1, msrs, i); | |
99 | + wrmsrl(msrs->counters[i].addr, -1LL); | |
84 | 100 | } |
85 | 101 | |
86 | 102 | /* enable active counters */ |
87 | - for (i = 0; i < NUM_COUNTERS; ++i) { | |
103 | + for (i = 0; i < num_counters; ++i) { | |
88 | 104 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { |
89 | 105 | reset_value[i] = counter_config[i].count; |
90 | 106 | |
91 | - CTR_32BIT_WRITE(counter_config[i].count, msrs, i); | |
107 | + wrmsrl(msrs->counters[i].addr, -reset_value[i]); | |
92 | 108 | |
93 | 109 | CTRL_READ(low, high, msrs, i); |
94 | 110 | CTRL_CLEAR(low); |
95 | 111 | |
... | ... | @@ -111,13 +127,13 @@ |
111 | 127 | unsigned int low, high; |
112 | 128 | int i; |
113 | 129 | |
114 | - for (i = 0 ; i < NUM_COUNTERS; ++i) { | |
130 | + for (i = 0 ; i < num_counters; ++i) { | |
115 | 131 | if (!reset_value[i]) |
116 | 132 | continue; |
117 | 133 | CTR_READ(low, high, msrs, i); |
118 | 134 | if (CTR_OVERFLOWED(low)) { |
119 | 135 | oprofile_add_sample(regs, i); |
120 | - CTR_32BIT_WRITE(reset_value[i], msrs, i); | |
136 | + wrmsrl(msrs->counters[i].addr, -reset_value[i]); | |
121 | 137 | } |
122 | 138 | } |
123 | 139 | |
... | ... | @@ -141,7 +157,7 @@ |
141 | 157 | unsigned int low, high; |
142 | 158 | int i; |
143 | 159 | |
144 | - for (i = 0; i < NUM_COUNTERS; ++i) { | |
160 | + for (i = 0; i < num_counters; ++i) { | |
145 | 161 | if (reset_value[i]) { |
146 | 162 | CTRL_READ(low, high, msrs, i); |
147 | 163 | CTRL_SET_ACTIVE(low); |
... | ... | @@ -156,7 +172,7 @@ |
156 | 172 | unsigned int low, high; |
157 | 173 | int i; |
158 | 174 | |
159 | - for (i = 0; i < NUM_COUNTERS; ++i) { | |
175 | + for (i = 0; i < num_counters; ++i) { | |
160 | 176 | if (!reset_value[i]) |
161 | 177 | continue; |
162 | 178 | CTRL_READ(low, high, msrs, i); |
163 | 179 | |
164 | 180 | |
165 | 181 | |
166 | 182 | |
... | ... | @@ -169,21 +185,67 @@ |
169 | 185 | { |
170 | 186 | int i; |
171 | 187 | |
172 | - for (i = 0 ; i < NUM_COUNTERS ; ++i) { | |
188 | + for (i = 0 ; i < num_counters ; ++i) { | |
173 | 189 | if (CTR_IS_RESERVED(msrs, i)) |
174 | 190 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); |
175 | 191 | } |
176 | - for (i = 0 ; i < NUM_CONTROLS ; ++i) { | |
192 | + for (i = 0 ; i < num_counters ; ++i) { | |
177 | 193 | if (CTRL_IS_RESERVED(msrs, i)) |
178 | 194 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); |
179 | 195 | } |
196 | + if (reset_value) { | |
197 | + kfree(reset_value); | |
198 | + reset_value = NULL; | |
199 | + } | |
180 | 200 | } |
181 | 201 | |
182 | 202 | |
183 | -struct op_x86_model_spec const op_ppro_spec = { | |
184 | - .num_counters = NUM_COUNTERS, | |
185 | - .num_controls = NUM_CONTROLS, | |
203 | +struct op_x86_model_spec op_ppro_spec = { | |
204 | + .num_counters = 2, /* can be overriden */ | |
205 | + .num_controls = 2, /* dito */ | |
186 | 206 | .fill_in_addresses = &ppro_fill_in_addresses, |
207 | + .setup_ctrs = &ppro_setup_ctrs, | |
208 | + .check_ctrs = &ppro_check_ctrs, | |
209 | + .start = &ppro_start, | |
210 | + .stop = &ppro_stop, | |
211 | + .shutdown = &ppro_shutdown | |
212 | +}; | |
213 | + | |
214 | +/* | |
215 | + * Architectural performance monitoring. | |
216 | + * | |
217 | + * Newer Intel CPUs (Core1+) have support for architectural | |
218 | + * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. | |
219 | + * The advantage of this is that it can be done without knowing about | |
220 | + * the specific CPU. | |
221 | + */ | |
222 | + | |
223 | +void arch_perfmon_setup_counters(void) | |
224 | +{ | |
225 | + union cpuid10_eax eax; | |
226 | + | |
227 | + eax.full = cpuid_eax(0xa); | |
228 | + | |
229 | + /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ | |
230 | + if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | |
231 | + current_cpu_data.x86_model == 15) { | |
232 | + eax.split.version_id = 2; | |
233 | + eax.split.num_counters = 2; | |
234 | + eax.split.bit_width = 40; | |
235 | + } | |
236 | + | |
237 | + num_counters = eax.split.num_counters; | |
238 | + | |
239 | + op_arch_perfmon_spec.num_counters = num_counters; | |
240 | + op_arch_perfmon_spec.num_controls = num_counters; | |
241 | + op_ppro_spec.num_counters = num_counters; | |
242 | + op_ppro_spec.num_controls = num_counters; | |
243 | +} | |
244 | + | |
245 | +struct op_x86_model_spec op_arch_perfmon_spec = { | |
246 | + /* num_counters/num_controls filled in at runtime */ | |
247 | + .fill_in_addresses = &ppro_fill_in_addresses, | |
248 | + /* user space does the cpuid check for available events */ | |
187 | 249 | .setup_ctrs = &ppro_setup_ctrs, |
188 | 250 | .check_ctrs = &ppro_check_ctrs, |
189 | 251 | .start = &ppro_start, |
arch/x86/oprofile/op_x86_model.h
... | ... | @@ -34,8 +34,8 @@ |
34 | 34 | struct op_x86_model_spec { |
35 | 35 | int (*init)(struct oprofile_operations *ops); |
36 | 36 | void (*exit)(void); |
37 | - unsigned int const num_counters; | |
38 | - unsigned int const num_controls; | |
37 | + unsigned int num_counters; | |
38 | + unsigned int num_controls; | |
39 | 39 | void (*fill_in_addresses)(struct op_msrs * const msrs); |
40 | 40 | void (*setup_ctrs)(struct op_msrs const * const msrs); |
41 | 41 | int (*check_ctrs)(struct pt_regs * const regs, |
42 | 42 | |
... | ... | @@ -45,10 +45,13 @@ |
45 | 45 | void (*shutdown)(struct op_msrs const * const msrs); |
46 | 46 | }; |
47 | 47 | |
48 | -extern struct op_x86_model_spec const op_ppro_spec; | |
48 | +extern struct op_x86_model_spec op_ppro_spec; | |
49 | 49 | extern struct op_x86_model_spec const op_p4_spec; |
50 | 50 | extern struct op_x86_model_spec const op_p4_ht2_spec; |
51 | 51 | extern struct op_x86_model_spec const op_amd_spec; |
52 | +extern struct op_x86_model_spec op_arch_perfmon_spec; | |
53 | + | |
54 | +extern void arch_perfmon_setup_counters(void); | |
52 | 55 | |
53 | 56 | #endif /* OP_X86_MODEL_H */ |