Commit 5a289395bf753f8a318d3a5fa335a757c16c0183

Authored by Robert Richter

Merge branch 'oprofile/x86-oprofile-for-tip' into oprofile/oprofile-for-tip

Conflicts:
	arch/x86/oprofile/op_model_ppro.c

Showing 3 changed files Side-by-side Diff

arch/x86/oprofile/nmi_int.c
... ... @@ -415,9 +415,6 @@
415 415 case 15: case 23:
416 416 *cpu_type = "i386/core_2";
417 417 break;
418   - case 26:
419   - *cpu_type = "i386/core_2";
420   - break;
421 418 default:
422 419 /* Unknown */
423 420 return 0;
... ... @@ -427,6 +424,16 @@
427 424 return 1;
428 425 }
429 426  
  427 +static int __init arch_perfmon_init(char **cpu_type)
  428 +{
  429 + if (!cpu_has_arch_perfmon)
  430 + return 0;
  431 + *cpu_type = "i386/arch_perfmon";
  432 + model = &op_arch_perfmon_spec;
  433 + arch_perfmon_setup_counters();
  434 + return 1;
  435 +}
  436 +
430 437 /* in order to get sysfs right */
431 438 static int using_nmi;
432 439  
... ... @@ -434,7 +441,7 @@
434 441 {
435 442 __u8 vendor = boot_cpu_data.x86_vendor;
436 443 __u8 family = boot_cpu_data.x86;
437   - char *cpu_type;
  444 + char *cpu_type = NULL;
438 445 int ret = 0;
439 446  
440 447 if (!cpu_has_apic)
441 448  
442 449  
443 450  
... ... @@ -472,19 +479,20 @@
472 479 switch (family) {
473 480 /* Pentium IV */
474 481 case 0xf:
475   - if (!p4_init(&cpu_type))
476   - return -ENODEV;
  482 + p4_init(&cpu_type);
477 483 break;
478 484  
479 485 /* A P6-class processor */
480 486 case 6:
481   - if (!ppro_init(&cpu_type))
482   - return -ENODEV;
  487 + ppro_init(&cpu_type);
483 488 break;
484 489  
485 490 default:
486   - return -ENODEV;
  491 + break;
487 492 }
  493 +
  494 + if (!cpu_type && !arch_perfmon_init(&cpu_type))
  495 + return -ENODEV;
488 496 break;
489 497  
490 498 default:
arch/x86/oprofile/op_model_ppro.c
1 1 /*
2 2 * @file op_model_ppro.h
3   - * pentium pro / P6 model-specific MSR operations
  3 + * Family 6 perfmon and architectural perfmon MSR operations
4 4 *
5 5 * @remark Copyright 2002 OProfile authors
  6 + * @remark Copyright 2008 Intel Corporation
6 7 * @remark Read the file COPYING
7 8 *
8 9 * @author John Levon
9 10 * @author Philippe Elie
10 11 * @author Graydon Hoare
  12 + * @author Andi Kleen
11 13 */
12 14  
13 15 #include <linux/oprofile.h>
  16 +#include <linux/slab.h>
14 17 #include <asm/ptrace.h>
15 18 #include <asm/msr.h>
16 19 #include <asm/apic.h>
17 20 #include <asm/nmi.h>
  21 +#include <asm/intel_arch_perfmon.h>
18 22  
19 23 #include "op_x86_model.h"
20 24 #include "op_counter.h"
21 25  
22   -#define NUM_COUNTERS 2
23   -#define NUM_CONTROLS 2
  26 +static int num_counters = 2;
  27 +static int counter_width = 32;
24 28  
25 29 #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
26 30 #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
27   -#define CTR_32BIT_WRITE(l, msrs, c) \
28   - do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0)
29   -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
  31 +#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1))))
30 32  
31 33 #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
32 34 #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
33 35  
34 36  
... ... @@ -40,20 +42,20 @@
40 42 #define CTRL_SET_UM(val, m) (val |= (m << 8))
41 43 #define CTRL_SET_EVENT(val, e) (val |= e)
42 44  
43   -static unsigned long reset_value[NUM_COUNTERS];
  45 +static u64 *reset_value;
44 46  
45 47 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
46 48 {
47 49 int i;
48 50  
49   - for (i = 0; i < NUM_COUNTERS; i++) {
  51 + for (i = 0; i < num_counters; i++) {
50 52 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
51 53 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
52 54 else
53 55 msrs->counters[i].addr = 0;
54 56 }
55 57  
56   - for (i = 0; i < NUM_CONTROLS; i++) {
  58 + for (i = 0; i < num_counters; i++) {
57 59 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
58 60 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
59 61 else
60 62  
... ... @@ -67,8 +69,22 @@
67 69 unsigned int low, high;
68 70 int i;
69 71  
  72 + if (!reset_value) {
  73 + reset_value = kmalloc(sizeof(unsigned) * num_counters,
  74 + GFP_ATOMIC);
  75 + if (!reset_value)
  76 + return;
  77 + }
  78 +
  79 + if (cpu_has_arch_perfmon) {
  80 + union cpuid10_eax eax;
  81 + eax.full = cpuid_eax(0xa);
  82 + if (counter_width < eax.split.bit_width)
  83 + counter_width = eax.split.bit_width;
  84 + }
  85 +
70 86 /* clear all counters */
71   - for (i = 0 ; i < NUM_CONTROLS; ++i) {
  87 + for (i = 0 ; i < num_counters; ++i) {
72 88 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
73 89 continue;
74 90 CTRL_READ(low, high, msrs, i);
75 91  
76 92  
77 93  
... ... @@ -77,18 +93,18 @@
77 93 }
78 94  
79 95 /* avoid a false detection of ctr overflows in NMI handler */
80   - for (i = 0; i < NUM_COUNTERS; ++i) {
  96 + for (i = 0; i < num_counters; ++i) {
81 97 if (unlikely(!CTR_IS_RESERVED(msrs, i)))
82 98 continue;
83   - CTR_32BIT_WRITE(1, msrs, i);
  99 + wrmsrl(msrs->counters[i].addr, -1LL);
84 100 }
85 101  
86 102 /* enable active counters */
87   - for (i = 0; i < NUM_COUNTERS; ++i) {
  103 + for (i = 0; i < num_counters; ++i) {
88 104 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
89 105 reset_value[i] = counter_config[i].count;
90 106  
91   - CTR_32BIT_WRITE(counter_config[i].count, msrs, i);
  107 + wrmsrl(msrs->counters[i].addr, -reset_value[i]);
92 108  
93 109 CTRL_READ(low, high, msrs, i);
94 110 CTRL_CLEAR(low);
95 111  
... ... @@ -111,13 +127,13 @@
111 127 unsigned int low, high;
112 128 int i;
113 129  
114   - for (i = 0 ; i < NUM_COUNTERS; ++i) {
  130 + for (i = 0 ; i < num_counters; ++i) {
115 131 if (!reset_value[i])
116 132 continue;
117 133 CTR_READ(low, high, msrs, i);
118 134 if (CTR_OVERFLOWED(low)) {
119 135 oprofile_add_sample(regs, i);
120   - CTR_32BIT_WRITE(reset_value[i], msrs, i);
  136 + wrmsrl(msrs->counters[i].addr, -reset_value[i]);
121 137 }
122 138 }
123 139  
... ... @@ -141,7 +157,7 @@
141 157 unsigned int low, high;
142 158 int i;
143 159  
144   - for (i = 0; i < NUM_COUNTERS; ++i) {
  160 + for (i = 0; i < num_counters; ++i) {
145 161 if (reset_value[i]) {
146 162 CTRL_READ(low, high, msrs, i);
147 163 CTRL_SET_ACTIVE(low);
... ... @@ -156,7 +172,7 @@
156 172 unsigned int low, high;
157 173 int i;
158 174  
159   - for (i = 0; i < NUM_COUNTERS; ++i) {
  175 + for (i = 0; i < num_counters; ++i) {
160 176 if (!reset_value[i])
161 177 continue;
162 178 CTRL_READ(low, high, msrs, i);
163 179  
164 180  
165 181  
166 182  
... ... @@ -169,21 +185,67 @@
169 185 {
170 186 int i;
171 187  
172   - for (i = 0 ; i < NUM_COUNTERS ; ++i) {
  188 + for (i = 0 ; i < num_counters ; ++i) {
173 189 if (CTR_IS_RESERVED(msrs, i))
174 190 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
175 191 }
176   - for (i = 0 ; i < NUM_CONTROLS ; ++i) {
  192 + for (i = 0 ; i < num_counters ; ++i) {
177 193 if (CTRL_IS_RESERVED(msrs, i))
178 194 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
179 195 }
  196 + if (reset_value) {
  197 + kfree(reset_value);
  198 + reset_value = NULL;
  199 + }
180 200 }
181 201  
182 202  
183   -struct op_x86_model_spec const op_ppro_spec = {
184   - .num_counters = NUM_COUNTERS,
185   - .num_controls = NUM_CONTROLS,
  203 +struct op_x86_model_spec op_ppro_spec = {
  204 + .num_counters = 2, /* can be overriden */
  205 + .num_controls = 2, /* dito */
186 206 .fill_in_addresses = &ppro_fill_in_addresses,
  207 + .setup_ctrs = &ppro_setup_ctrs,
  208 + .check_ctrs = &ppro_check_ctrs,
  209 + .start = &ppro_start,
  210 + .stop = &ppro_stop,
  211 + .shutdown = &ppro_shutdown
  212 +};
  213 +
  214 +/*
  215 + * Architectural performance monitoring.
  216 + *
  217 + * Newer Intel CPUs (Core1+) have support for architectural
  218 + * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
  219 + * The advantage of this is that it can be done without knowing about
  220 + * the specific CPU.
  221 + */
  222 +
  223 +void arch_perfmon_setup_counters(void)
  224 +{
  225 + union cpuid10_eax eax;
  226 +
  227 + eax.full = cpuid_eax(0xa);
  228 +
  229 + /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
  230 + if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
  231 + current_cpu_data.x86_model == 15) {
  232 + eax.split.version_id = 2;
  233 + eax.split.num_counters = 2;
  234 + eax.split.bit_width = 40;
  235 + }
  236 +
  237 + num_counters = eax.split.num_counters;
  238 +
  239 + op_arch_perfmon_spec.num_counters = num_counters;
  240 + op_arch_perfmon_spec.num_controls = num_counters;
  241 + op_ppro_spec.num_counters = num_counters;
  242 + op_ppro_spec.num_controls = num_counters;
  243 +}
  244 +
  245 +struct op_x86_model_spec op_arch_perfmon_spec = {
  246 + /* num_counters/num_controls filled in at runtime */
  247 + .fill_in_addresses = &ppro_fill_in_addresses,
  248 + /* user space does the cpuid check for available events */
187 249 .setup_ctrs = &ppro_setup_ctrs,
188 250 .check_ctrs = &ppro_check_ctrs,
189 251 .start = &ppro_start,
arch/x86/oprofile/op_x86_model.h
... ... @@ -34,8 +34,8 @@
34 34 struct op_x86_model_spec {
35 35 int (*init)(struct oprofile_operations *ops);
36 36 void (*exit)(void);
37   - unsigned int const num_counters;
38   - unsigned int const num_controls;
  37 + unsigned int num_counters;
  38 + unsigned int num_controls;
39 39 void (*fill_in_addresses)(struct op_msrs * const msrs);
40 40 void (*setup_ctrs)(struct op_msrs const * const msrs);
41 41 int (*check_ctrs)(struct pt_regs * const regs,
42 42  
... ... @@ -45,10 +45,13 @@
45 45 void (*shutdown)(struct op_msrs const * const msrs);
46 46 };
47 47  
48   -extern struct op_x86_model_spec const op_ppro_spec;
  48 +extern struct op_x86_model_spec op_ppro_spec;
49 49 extern struct op_x86_model_spec const op_p4_spec;
50 50 extern struct op_x86_model_spec const op_p4_ht2_spec;
51 51 extern struct op_x86_model_spec const op_amd_spec;
  52 +extern struct op_x86_model_spec op_arch_perfmon_spec;
  53 +
  54 +extern void arch_perfmon_setup_counters(void);
52 55  
53 56 #endif /* OP_X86_MODEL_H */