Blame view

arch/x86/oprofile/op_model_p4.c 17.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
  /**
   * @file op_model_p4.c
   * P4 model-specific MSR operations
   *
   * @remark Copyright 2002 OProfile authors
   * @remark Read the file COPYING
   *
   * @author Graydon Hoare
   */
  
  #include <linux/oprofile.h>
  #include <linux/smp.h>
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
13
  #include <linux/ptrace.h>
4a7863cc2   Don Zickus   x86, nmi_watchdog...
14
  #include <asm/nmi.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
  #include <asm/msr.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
  #include <asm/fixmap.h>
  #include <asm/apic.h>
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
18

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
  
  #include "op_x86_model.h"
  #include "op_counter.h"
  
  #define NUM_EVENTS 39
  
  #define NUM_COUNTERS_NON_HT 8
  #define NUM_ESCRS_NON_HT 45
  #define NUM_CCCRS_NON_HT 18
  #define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
  
  #define NUM_COUNTERS_HT2 4
  #define NUM_ESCRS_HT2 23
  #define NUM_CCCRS_HT2 9
  #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
42399adb2   Robert Richter   x86/oprofile: rep...
34
  #define OP_CTR_OVERFLOW			(1ULL<<31)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35
  static unsigned int num_counters = NUM_COUNTERS_NON_HT;
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
36
  static unsigned int num_controls = NUM_CONTROLS_NON_HT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
38
39
40
41
42
43
  
  /* this has to be checked dynamically since the
     hyper-threadedness of a chip is discovered at
     kernel boot-time. */
  static inline void setup_num_counters(void)
  {
  #ifdef CONFIG_SMP
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
44
  	if (smp_num_siblings == 2) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
  		num_counters = NUM_COUNTERS_HT2;
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
46
47
  		num_controls = NUM_CONTROLS_HT2;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
49
  #endif
  }
42b16b3fb   Jesper Juhl   Kill off warning:...
50
  static inline int addr_increment(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  {
  #ifdef CONFIG_SMP
  	return smp_num_siblings == 2 ? 2 : 1;
  #else
  	return 1;
  #endif
  }
  
  
  /* tables to simulate simplified hardware view of p4 registers */
  struct p4_counter_binding {
  	int virt_counter;
  	int counter_address;
  	int cccr_address;
  };
  
  struct p4_event_binding {
  	int escr_select;  /* value to put in CCCR */
  	int event_select; /* value to put in ESCR */
  	struct {
  		int virt_counter; /* for this counter... */
  		int escr_address; /* use this ESCR       */
  	} bindings[2];
  };
  
  /* nb: these CTR_* defines are a duplicate of defines in
     event/i386.p4*events. */
  
  
  #define CTR_BPU_0      (1 << 0)
  #define CTR_MS_0       (1 << 1)
  #define CTR_FLAME_0    (1 << 2)
  #define CTR_IQ_4       (1 << 3)
  #define CTR_BPU_2      (1 << 4)
  #define CTR_MS_2       (1 << 5)
  #define CTR_FLAME_2    (1 << 6)
  #define CTR_IQ_5       (1 << 7)
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
88
  static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
90
91
92
93
94
95
96
97
  	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
  	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
  	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
  	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
  	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
  	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
  	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
  	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
  };
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
98
  #define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
100
101
102
  /* p4 event codes in libop/op_event.h are indices into this table. */
  
  static struct p4_event_binding p4_events[NUM_EVENTS] = {
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
103

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
104
  	{ /* BRANCH_RETIRED */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
105
  		0x05, 0x06,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
107
108
  		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
  		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
109

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
110
  	{ /* MISPRED_BRANCH_RETIRED */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
111
  		0x04, 0x03,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
113
114
  		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
  		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
115

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
116
117
  	{ /* TC_DELIVER_MODE */
  		0x01, 0x01,
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
118
  		{ { CTR_MS_0, MSR_P4_TC_ESCR0},
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
120
  		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
121

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
  	{ /* BPU_FETCH_REQUEST */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
123
  		0x00, 0x03,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
  		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
  		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
  	},
  
  	{ /* ITLB_REFERENCE */
  		0x03, 0x18,
  		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
  		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
  	},
  
  	{ /* MEMORY_CANCEL */
  		0x05, 0x02,
  		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
  	},
  
  	{ /* MEMORY_COMPLETE */
  		0x02, 0x08,
  		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
  	},
  
  	{ /* LOAD_PORT_REPLAY */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
147
  		0x02, 0x04,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
  		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
  	},
  
  	{ /* STORE_PORT_REPLAY */
  		0x02, 0x05,
  		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
  	},
  
  	{ /* MOB_LOAD_REPLAY */
  		0x02, 0x03,
  		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
  		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
  	},
  
  	{ /* PAGE_WALK_TYPE */
  		0x04, 0x01,
  		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
  		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
  	},
  
  	{ /* BSQ_CACHE_REFERENCE */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
171
  		0x07, 0x0c,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
174
175
176
  		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
  		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
  	},
  
  	{ /* IOQ_ALLOCATION */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
177
  		0x06, 0x03,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
179
180
181
182
  		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
  		  { 0, 0 } }
  	},
  
  	{ /* IOQ_ACTIVE_ENTRIES */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
183
  		0x06, 0x1a,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
185
186
187
188
  		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
  		  { 0, 0 } }
  	},
  
  	{ /* FSB_DATA_ACTIVITY */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
189
  		0x06, 0x17,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
191
192
193
194
  		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
  		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
  	},
  
  	{ /* BSQ_ALLOCATION */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
195
  		0x07, 0x05,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
196
197
198
199
200
201
  		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
  		  { 0, 0 } }
  	},
  
  	{ /* BSQ_ACTIVE_ENTRIES */
  		0x07, 0x06,
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
202
  		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
205
206
  		  { 0, 0 } }
  	},
  
  	{ /* X87_ASSIST */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
207
  		0x05, 0x03,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
209
210
211
212
213
214
215
216
  		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  	},
  
  	{ /* SSE_INPUT_ASSIST */
  		0x01, 0x34,
  		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
217

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
  	{ /* PACKED_SP_UOP */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
219
  		0x01, 0x08,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
221
222
  		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
223

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
224
  	{ /* PACKED_DP_UOP */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
225
  		0x01, 0x0c,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226
227
228
229
230
  		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  	},
  
  	{ /* SCALAR_SP_UOP */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
231
  		0x01, 0x0a,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
233
234
235
236
237
238
239
240
241
242
  		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  	},
  
  	{ /* SCALAR_DP_UOP */
  		0x01, 0x0e,
  		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  	},
  
  	{ /* 64BIT_MMX_UOP */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
243
  		0x01, 0x02,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
245
246
  		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
247

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
  	{ /* 128BIT_MMX_UOP */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
249
  		0x01, 0x1a,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
251
252
253
254
  		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  	},
  
  	{ /* X87_FP_UOP */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
255
  		0x01, 0x04,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256
257
258
  		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
259

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
260
  	{ /* X87_SIMD_MOVES_UOP */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
261
  		0x01, 0x2e,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262
263
264
  		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
265

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
266
  	{ /* MACHINE_CLEAR */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
267
  		0x05, 0x02,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
269
270
271
272
273
274
275
276
  		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  	},
  
  	{ /* GLOBAL_POWER_EVENTS */
  		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
  		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
  		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
277

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278
  	{ /* TC_MS_XFER */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
279
  		0x00, 0x05,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
  		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
  		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
  	},
  
  	{ /* UOP_QUEUE_WRITES */
  		0x00, 0x09,
  		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
  		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
  	},
  
  	{ /* FRONT_END_EVENT */
  		0x05, 0x08,
  		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  	},
  
  	{ /* EXECUTION_EVENT */
  		0x05, 0x0c,
  		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  	},
  
  	{ /* REPLAY_EVENT */
  		0x05, 0x09,
  		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  	},
  
  	{ /* INSTR_RETIRED */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
309
  		0x04, 0x02,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
310
311
312
313
314
315
316
317
318
  		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
  		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
  	},
  
  	{ /* UOPS_RETIRED */
  		0x04, 0x01,
  		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
  		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
  	},
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
319
320
  	{ /* UOP_TYPE */
  		0x02, 0x02,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321
322
323
324
325
  		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
  		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
  	},
  
  	{ /* RETIRED_MISPRED_BRANCH_TYPE */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
326
  		0x02, 0x05,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
  		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
  		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
  	},
  
  	{ /* RETIRED_BRANCH_TYPE */
  		0x02, 0x04,
  		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
  		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
  	}
  };
  
  
  #define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
  
  #define ESCR_RESERVED_BITS 0x80000003
  #define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
  #define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
  #define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
  #define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
  #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
  #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
  #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
349
350
351
352
353
354
355
356
357
  
  #define CCCR_RESERVED_BITS 0x38030FFF
  #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
  #define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
  #define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
  #define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
  #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
  #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
  #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
358
359
  #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
  #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
360
361
362
363
364
365
366
367
  
  /* this assigns a "stagger" to the current CPU, which is used throughout
     the code in this module as an extra array offset, to select the "even"
     or "odd" part of all the divided resources. */
  static unsigned int get_stagger(void)
  {
  #ifdef CONFIG_SMP
  	int cpu = smp_processor_id();
7ad728f98   Rusty Russell   cpumask: x86: con...
368
  	return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
369
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
371
372
373
374
375
376
377
378
379
  	return 0;
  }
  
  
  /* finally, mediate access to a real hardware counter
     by passing a "virtual" counter numer to this macro,
     along with your stagger setting. */
  #define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
  
  static unsigned long reset_value[NUM_COUNTERS_NON_HT];
83300ce0d   Robert Richter   oprofile/x86: mov...
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
  static void p4_shutdown(struct op_msrs const * const msrs)
  {
  	int i;
  
  	for (i = 0; i < num_counters; ++i) {
  		if (msrs->counters[i].addr)
  			release_perfctr_nmi(msrs->counters[i].addr);
  	}
  	/*
  	 * some of the control registers are specially reserved in
  	 * conjunction with the counter registers (hence the starting offset).
  	 * This saves a few bits.
  	 */
  	for (i = num_counters; i < num_controls; ++i) {
  		if (msrs->controls[i].addr)
  			release_evntsel_nmi(msrs->controls[i].addr);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
398

8617f98c0   Robert Richter   oprofile/x86: ret...
399
  static int p4_fill_in_addresses(struct op_msrs * const msrs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
400
  {
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
401
  	unsigned int i;
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
402
  	unsigned int addr, cccraddr, stag;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403
404
405
  
  	setup_num_counters();
  	stag = get_stagger();
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
406
407
408
409
  	/* the counter & cccr registers we pay attention to */
  	for (i = 0; i < num_counters; ++i) {
  		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
  		cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
410
  		if (reserve_perfctr_nmi(addr)) {
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
411
412
413
414
  			msrs->counters[i].addr = addr;
  			msrs->controls[i].addr = cccraddr;
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415
416
417
  	/* 43 ESCR registers in three or four discontiguous group */
  	for (addr = MSR_P4_BSU_ESCR0 + stag;
  	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
418
419
  		if (reserve_evntsel_nmi(addr))
  			msrs->controls[i].addr = addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
420
421
422
423
424
425
426
  	}
  
  	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
  	 * to avoid special case in nmi_{save|restore}_registers() */
  	if (boot_cpu_data.x86_model >= 0x3) {
  		for (addr = MSR_P4_BSU_ESCR0 + stag;
  		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
427
428
  			if (reserve_evntsel_nmi(addr))
  				msrs->controls[i].addr = addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
429
430
431
432
  		}
  	} else {
  		for (addr = MSR_P4_IQ_ESCR0 + stag;
  		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
433
434
  			if (reserve_evntsel_nmi(addr))
  				msrs->controls[i].addr = addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
436
437
438
439
  		}
  	}
  
  	for (addr = MSR_P4_RAT_ESCR0 + stag;
  	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
440
441
  		if (reserve_evntsel_nmi(addr))
  			msrs->controls[i].addr = addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
442
  	}
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
443

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
444
  	for (addr = MSR_P4_MS_ESCR0 + stag;
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
445
  	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
446
447
  		if (reserve_evntsel_nmi(addr))
  			msrs->controls[i].addr = addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
448
  	}
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
449

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
450
  	for (addr = MSR_P4_IX_ESCR0 + stag;
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
451
  	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
452
453
  		if (reserve_evntsel_nmi(addr))
  			msrs->controls[i].addr = addr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
454
455
456
  	}
  
  	/* there are 2 remaining non-contiguously located ESCRs */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
457
  	if (num_counters == NUM_COUNTERS_NON_HT) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
458
  		/* standard non-HT CPUs handle both remaining ESCRs*/
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
459
460
461
462
  		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
  			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
  		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
  			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
463
464
465
466
  
  	} else if (stag == 0) {
  		/* HT CPUs give the first remainder to the even thread, as
  		   the 32nd control register */
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
467
468
  		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
  			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
469
470
471
472
  
  	} else {
  		/* and two copies of the second to the odd thread,
  		   for the 22st and 23nd control registers */
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
473
474
475
476
  		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
  			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
  			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477
  	}
8617f98c0   Robert Richter   oprofile/x86: ret...
478
479
480
481
482
483
484
485
486
487
488
489
  
  	for (i = 0; i < num_counters; ++i) {
  		if (!counter_config[i].enabled)
  			continue;
  		if (msrs->controls[i].addr)
  			continue;
  		op_x86_warn_reserved(i);
  		p4_shutdown(msrs);
  		return -EBUSY;
  	}
  
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
  }
  
  
  static void pmc_setup_one_p4_counter(unsigned int ctr)
  {
  	int i;
  	int const maxbind = 2;
  	unsigned int cccr = 0;
  	unsigned int escr = 0;
  	unsigned int high = 0;
  	unsigned int counter_bit;
  	struct p4_event_binding *ev = NULL;
  	unsigned int stag;
  
  	stag = get_stagger();
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
505

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506
507
  	/* convert from counter *number* to counter *bit* */
  	counter_bit = 1 << VIRT_CTR(stag, ctr);
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
508

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
509
510
  	/* find our event binding structure. */
  	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
511
512
513
  		printk(KERN_ERR
  		       "oprofile: P4 event code 0x%lx out of range
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
514
515
516
  		       counter_config[ctr].event);
  		return;
  	}
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
517

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518
  	ev = &(p4_events[counter_config[ctr].event - 1]);
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
519

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
520
521
522
523
  	for (i = 0; i < maxbind; i++) {
  		if (ev->bindings[i].virt_counter & counter_bit) {
  
  			/* modify ESCR */
1131a4782   Robert Richter   x86/oprofile: rem...
524
  			rdmsr(ev->bindings[i].escr_address, escr, high);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
525
526
527
528
529
530
531
532
533
  			ESCR_CLEAR(escr);
  			if (stag == 0) {
  				ESCR_SET_USR_0(escr, counter_config[ctr].user);
  				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
  			} else {
  				ESCR_SET_USR_1(escr, counter_config[ctr].user);
  				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
  			}
  			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
534
  			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
1131a4782   Robert Richter   x86/oprofile: rem...
535
  			wrmsr(ev->bindings[i].escr_address, escr, high);
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
536

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
  			/* modify CCCR */
1131a4782   Robert Richter   x86/oprofile: rem...
538
539
  			rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
  			      cccr, high);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
540
541
542
  			CCCR_CLEAR(cccr);
  			CCCR_SET_REQUIRED_BITS(cccr);
  			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
543
  			if (stag == 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
544
  				CCCR_SET_PMI_OVF_0(cccr);
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
545
  			else
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
546
  				CCCR_SET_PMI_OVF_1(cccr);
1131a4782   Robert Richter   x86/oprofile: rem...
547
548
  			wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
  			      cccr, high);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549
550
551
  			return;
  		}
  	}
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
552
  	printk(KERN_ERR
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
553
554
555
556
  	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d
  ",
  	       counter_config[ctr].event, stag, ctr);
  }
ef8828ddf   Robert Richter   x86/oprofile: pas...
557
558
  static void p4_setup_ctrs(struct op_x86_model_spec const *model,
  			  struct op_msrs const * const msrs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
559
560
561
  {
  	unsigned int i;
  	unsigned int low, high;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562
563
564
565
566
  	unsigned int stag;
  
  	stag = get_stagger();
  
  	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
567
  	if (!MISC_PMC_ENABLED_P(low)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
568
569
570
571
572
573
  		printk(KERN_ERR "oprofile: P4 PMC not available
  ");
  		return;
  	}
  
  	/* clear the cccrs we will use */
6e63ea4b0   Robert Richter   x86/oprofile: Whi...
574
  	for (i = 0; i < num_counters; i++) {
217d3cfb9   Robert Richter   x86/oprofile: rep...
575
  		if (unlikely(!msrs->controls[i].addr))
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
576
  			continue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
577
578
579
580
581
  		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
  		CCCR_CLEAR(low);
  		CCCR_SET_REQUIRED_BITS(low);
  		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
582
  	/* clear all escrs (including those outside our concern) */
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
583
  	for (i = num_counters; i < num_controls; i++) {
217d3cfb9   Robert Richter   x86/oprofile: rep...
584
  		if (unlikely(!msrs->controls[i].addr))
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
585
586
  			continue;
  		wrmsr(msrs->controls[i].addr, 0, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
587
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
588
  	/* setup all counters */
6e63ea4b0   Robert Richter   x86/oprofile: Whi...
589
  	for (i = 0; i < num_counters; ++i) {
217d3cfb9   Robert Richter   x86/oprofile: rep...
590
  		if (counter_config[i].enabled && msrs->controls[i].addr) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
591
592
  			reset_value[i] = counter_config[i].count;
  			pmc_setup_one_p4_counter(i);
bbc5986d2   Robert Richter   x86/oprofile: use...
593
  			wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
8045a4c29   Robert Richter   x86/oprofile: Fix...
594
  			       -(u64)counter_config[i].count);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
  		} else {
  			reset_value[i] = 0;
  		}
  	}
  }
  
  
  static int p4_check_ctrs(struct pt_regs * const regs,
  			 struct op_msrs const * const msrs)
  {
  	unsigned long ctr, low, high, stag, real;
  	int i;
  
  	stag = get_stagger();
  
  	for (i = 0; i < num_counters; ++i) {
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
611
612
  
  		if (!reset_value[i])
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
613
  			continue;
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
614
  		/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
615
616
617
618
619
620
621
622
  		 * there is some eccentricity in the hardware which
  		 * requires that we perform 2 extra corrections:
  		 *
  		 * - check both the CCCR:OVF flag for overflow and the
  		 *   counter high bit for un-flagged overflows.
  		 *
  		 * - write the counter back twice to ensure it gets
  		 *   updated properly.
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
623
  		 *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
624
625
626
627
628
629
  		 * the former seems to be related to extra NMIs happening
  		 * during the current NMI; the latter is reported as errata
  		 * N15 in intel doc 249199-029, pentium 4 specification
  		 * update, though their suggested work-around does not
  		 * appear to solve the problem.
  		 */
20211e4d3   Paolo Ciarrocchi   x86: Coding style...
630

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
631
  		real = VIRT_CTR(stag, i);
1131a4782   Robert Richter   x86/oprofile: rem...
632
633
  		rdmsr(p4_counters[real].cccr_address, low, high);
  		rdmsr(p4_counters[real].counter_address, ctr, high);
42399adb2   Robert Richter   x86/oprofile: rep...
634
  		if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
635
  			oprofile_add_sample(regs, i);
bbc5986d2   Robert Richter   x86/oprofile: use...
636
  			wrmsrl(p4_counters[real].counter_address,
8045a4c29   Robert Richter   x86/oprofile: Fix...
637
  			       -(u64)reset_value[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
638
  			CCCR_CLEAR_OVF(low);
1131a4782   Robert Richter   x86/oprofile: rem...
639
  			wrmsr(p4_counters[real].cccr_address, low, high);
bbc5986d2   Robert Richter   x86/oprofile: use...
640
  			wrmsrl(p4_counters[real].counter_address,
8045a4c29   Robert Richter   x86/oprofile: Fix...
641
  			       -(u64)reset_value[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
  		}
  	}
  
  	/* P4 quirk: you have to re-unmask the apic vector */
  	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  
  	/* See op_model_ppro.c */
  	return 1;
  }
  
  
  static void p4_start(struct op_msrs const * const msrs)
  {
  	unsigned int low, high, stag;
  	int i;
  
  	stag = get_stagger();
  
  	for (i = 0; i < num_counters; ++i) {
  		if (!reset_value[i])
  			continue;
1131a4782   Robert Richter   x86/oprofile: rem...
663
  		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
664
  		CCCR_SET_ENABLE(low);
1131a4782   Robert Richter   x86/oprofile: rem...
665
  		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
667
668
669
670
671
672
673
674
675
676
677
  	}
  }
  
  
  static void p4_stop(struct op_msrs const * const msrs)
  {
  	unsigned int low, high, stag;
  	int i;
  
  	stag = get_stagger();
  
  	for (i = 0; i < num_counters; ++i) {
cb9c448c6   Don Zickus   [PATCH] i386: Uti...
678
679
  		if (!reset_value[i])
  			continue;
1131a4782   Robert Richter   x86/oprofile: rem...
680
  		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
681
  		CCCR_SET_DISABLE(low);
1131a4782   Robert Richter   x86/oprofile: rem...
682
  		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
683
684
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
685
  #ifdef CONFIG_SMP
259a83a8a   Robert Richter   x86/oprofile: Rem...
686
  struct op_x86_model_spec op_p4_ht2_spec = {
c92960fcc   Robert Richter   oprofile: whitesp...
687
688
689
690
691
692
693
694
  	.num_counters		= NUM_COUNTERS_HT2,
  	.num_controls		= NUM_CONTROLS_HT2,
  	.fill_in_addresses	= &p4_fill_in_addresses,
  	.setup_ctrs		= &p4_setup_ctrs,
  	.check_ctrs		= &p4_check_ctrs,
  	.start			= &p4_start,
  	.stop			= &p4_stop,
  	.shutdown		= &p4_shutdown
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
695
696
  };
  #endif
259a83a8a   Robert Richter   x86/oprofile: Rem...
697
  struct op_x86_model_spec op_p4_spec = {
c92960fcc   Robert Richter   oprofile: whitesp...
698
699
700
701
702
703
704
705
  	.num_counters		= NUM_COUNTERS_NON_HT,
  	.num_controls		= NUM_CONTROLS_NON_HT,
  	.fill_in_addresses	= &p4_fill_in_addresses,
  	.setup_ctrs		= &p4_setup_ctrs,
  	.check_ctrs		= &p4_check_ctrs,
  	.start			= &p4_start,
  	.stop			= &p4_stop,
  	.shutdown		= &p4_shutdown
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706
  };