Commit eaa958402ea40851097d051f52ba1bb7a885efe9

Authored by Yinghai Lu
Committed by Rusty Russell
1 parent 0281b5dc03

cpumask: alloc zeroed cpumask for static cpumask_var_ts

These are defined as static cpumask_var_t so if MAXSMP is not used,
they are cleared already.  Avoid surprises when MAXSMP is enabled.

Signed-off-by: Yinghai Lu <yinghai.lu@kernel.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

Showing 11 changed files with 11 additions and 11 deletions Inline Diff

arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
1 /* 1 /*
2 * acpi-cpufreq.c - ACPI Processor P-States Driver 2 * acpi-cpufreq.c - ACPI Processor P-States Driver
3 * 3 *
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6 * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> 6 * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7 * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com> 7 * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com>
8 * 8 *
9 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 9 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10 * 10 *
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by 12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or (at 13 * the Free Software Foundation; either version 2 of the License, or (at
14 * your option) any later version. 14 * your option) any later version.
15 * 15 *
16 * This program is distributed in the hope that it will be useful, but 16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details. 19 * General Public License for more details.
20 * 20 *
21 * You should have received a copy of the GNU General Public License along 21 * You should have received a copy of the GNU General Public License along
22 * with this program; if not, write to the Free Software Foundation, Inc., 22 * with this program; if not, write to the Free Software Foundation, Inc.,
23 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 23 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
24 * 24 *
25 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 25 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
26 */ 26 */
27 27
28 #include <linux/kernel.h> 28 #include <linux/kernel.h>
29 #include <linux/module.h> 29 #include <linux/module.h>
30 #include <linux/init.h> 30 #include <linux/init.h>
31 #include <linux/smp.h> 31 #include <linux/smp.h>
32 #include <linux/sched.h> 32 #include <linux/sched.h>
33 #include <linux/cpufreq.h> 33 #include <linux/cpufreq.h>
34 #include <linux/compiler.h> 34 #include <linux/compiler.h>
35 #include <linux/dmi.h> 35 #include <linux/dmi.h>
36 #include <trace/power.h> 36 #include <trace/power.h>
37 37
38 #include <linux/acpi.h> 38 #include <linux/acpi.h>
39 #include <linux/io.h> 39 #include <linux/io.h>
40 #include <linux/delay.h> 40 #include <linux/delay.h>
41 #include <linux/uaccess.h> 41 #include <linux/uaccess.h>
42 42
43 #include <acpi/processor.h> 43 #include <acpi/processor.h>
44 44
45 #include <asm/msr.h> 45 #include <asm/msr.h>
46 #include <asm/processor.h> 46 #include <asm/processor.h>
47 #include <asm/cpufeature.h> 47 #include <asm/cpufeature.h>
48 48
49 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ 49 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
50 "acpi-cpufreq", msg) 50 "acpi-cpufreq", msg)
51 51
52 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); 52 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
53 MODULE_DESCRIPTION("ACPI Processor P-States Driver"); 53 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
54 MODULE_LICENSE("GPL"); 54 MODULE_LICENSE("GPL");
55 55
56 enum { 56 enum {
57 UNDEFINED_CAPABLE = 0, 57 UNDEFINED_CAPABLE = 0,
58 SYSTEM_INTEL_MSR_CAPABLE, 58 SYSTEM_INTEL_MSR_CAPABLE,
59 SYSTEM_IO_CAPABLE, 59 SYSTEM_IO_CAPABLE,
60 }; 60 };
61 61
62 #define INTEL_MSR_RANGE (0xffff) 62 #define INTEL_MSR_RANGE (0xffff)
63 #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) 63 #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1)
64 64
65 struct acpi_cpufreq_data { 65 struct acpi_cpufreq_data {
66 struct acpi_processor_performance *acpi_data; 66 struct acpi_processor_performance *acpi_data;
67 struct cpufreq_frequency_table *freq_table; 67 struct cpufreq_frequency_table *freq_table;
68 unsigned int resume; 68 unsigned int resume;
69 unsigned int cpu_feature; 69 unsigned int cpu_feature;
70 }; 70 };
71 71
72 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); 72 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
73 73
74 struct acpi_msr_data { 74 struct acpi_msr_data {
75 u64 saved_aperf, saved_mperf; 75 u64 saved_aperf, saved_mperf;
76 }; 76 };
77 77
78 static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); 78 static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
79 79
80 DEFINE_TRACE(power_mark); 80 DEFINE_TRACE(power_mark);
81 81
82 /* acpi_perf_data is a pointer to percpu data. */ 82 /* acpi_perf_data is a pointer to percpu data. */
83 static struct acpi_processor_performance *acpi_perf_data; 83 static struct acpi_processor_performance *acpi_perf_data;
84 84
85 static struct cpufreq_driver acpi_cpufreq_driver; 85 static struct cpufreq_driver acpi_cpufreq_driver;
86 86
87 static unsigned int acpi_pstate_strict; 87 static unsigned int acpi_pstate_strict;
88 88
89 static int check_est_cpu(unsigned int cpuid) 89 static int check_est_cpu(unsigned int cpuid)
90 { 90 {
91 struct cpuinfo_x86 *cpu = &cpu_data(cpuid); 91 struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
92 92
93 if (cpu->x86_vendor != X86_VENDOR_INTEL || 93 if (cpu->x86_vendor != X86_VENDOR_INTEL ||
94 !cpu_has(cpu, X86_FEATURE_EST)) 94 !cpu_has(cpu, X86_FEATURE_EST))
95 return 0; 95 return 0;
96 96
97 return 1; 97 return 1;
98 } 98 }
99 99
100 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) 100 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
101 { 101 {
102 struct acpi_processor_performance *perf; 102 struct acpi_processor_performance *perf;
103 int i; 103 int i;
104 104
105 perf = data->acpi_data; 105 perf = data->acpi_data;
106 106
107 for (i = 0; i < perf->state_count; i++) { 107 for (i = 0; i < perf->state_count; i++) {
108 if (value == perf->states[i].status) 108 if (value == perf->states[i].status)
109 return data->freq_table[i].frequency; 109 return data->freq_table[i].frequency;
110 } 110 }
111 return 0; 111 return 0;
112 } 112 }
113 113
114 static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) 114 static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
115 { 115 {
116 int i; 116 int i;
117 struct acpi_processor_performance *perf; 117 struct acpi_processor_performance *perf;
118 118
119 msr &= INTEL_MSR_RANGE; 119 msr &= INTEL_MSR_RANGE;
120 perf = data->acpi_data; 120 perf = data->acpi_data;
121 121
122 for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { 122 for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
123 if (msr == perf->states[data->freq_table[i].index].status) 123 if (msr == perf->states[data->freq_table[i].index].status)
124 return data->freq_table[i].frequency; 124 return data->freq_table[i].frequency;
125 } 125 }
126 return data->freq_table[0].frequency; 126 return data->freq_table[0].frequency;
127 } 127 }
128 128
129 static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) 129 static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
130 { 130 {
131 switch (data->cpu_feature) { 131 switch (data->cpu_feature) {
132 case SYSTEM_INTEL_MSR_CAPABLE: 132 case SYSTEM_INTEL_MSR_CAPABLE:
133 return extract_msr(val, data); 133 return extract_msr(val, data);
134 case SYSTEM_IO_CAPABLE: 134 case SYSTEM_IO_CAPABLE:
135 return extract_io(val, data); 135 return extract_io(val, data);
136 default: 136 default:
137 return 0; 137 return 0;
138 } 138 }
139 } 139 }
140 140
141 struct msr_addr { 141 struct msr_addr {
142 u32 reg; 142 u32 reg;
143 }; 143 };
144 144
145 struct io_addr { 145 struct io_addr {
146 u16 port; 146 u16 port;
147 u8 bit_width; 147 u8 bit_width;
148 }; 148 };
149 149
150 struct drv_cmd { 150 struct drv_cmd {
151 unsigned int type; 151 unsigned int type;
152 const struct cpumask *mask; 152 const struct cpumask *mask;
153 union { 153 union {
154 struct msr_addr msr; 154 struct msr_addr msr;
155 struct io_addr io; 155 struct io_addr io;
156 } addr; 156 } addr;
157 u32 val; 157 u32 val;
158 }; 158 };
159 159
160 /* Called via smp_call_function_single(), on the target CPU */ 160 /* Called via smp_call_function_single(), on the target CPU */
161 static void do_drv_read(void *_cmd) 161 static void do_drv_read(void *_cmd)
162 { 162 {
163 struct drv_cmd *cmd = _cmd; 163 struct drv_cmd *cmd = _cmd;
164 u32 h; 164 u32 h;
165 165
166 switch (cmd->type) { 166 switch (cmd->type) {
167 case SYSTEM_INTEL_MSR_CAPABLE: 167 case SYSTEM_INTEL_MSR_CAPABLE:
168 rdmsr(cmd->addr.msr.reg, cmd->val, h); 168 rdmsr(cmd->addr.msr.reg, cmd->val, h);
169 break; 169 break;
170 case SYSTEM_IO_CAPABLE: 170 case SYSTEM_IO_CAPABLE:
171 acpi_os_read_port((acpi_io_address)cmd->addr.io.port, 171 acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
172 &cmd->val, 172 &cmd->val,
173 (u32)cmd->addr.io.bit_width); 173 (u32)cmd->addr.io.bit_width);
174 break; 174 break;
175 default: 175 default:
176 break; 176 break;
177 } 177 }
178 } 178 }
179 179
180 /* Called via smp_call_function_many(), on the target CPUs */ 180 /* Called via smp_call_function_many(), on the target CPUs */
181 static void do_drv_write(void *_cmd) 181 static void do_drv_write(void *_cmd)
182 { 182 {
183 struct drv_cmd *cmd = _cmd; 183 struct drv_cmd *cmd = _cmd;
184 u32 lo, hi; 184 u32 lo, hi;
185 185
186 switch (cmd->type) { 186 switch (cmd->type) {
187 case SYSTEM_INTEL_MSR_CAPABLE: 187 case SYSTEM_INTEL_MSR_CAPABLE:
188 rdmsr(cmd->addr.msr.reg, lo, hi); 188 rdmsr(cmd->addr.msr.reg, lo, hi);
189 lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); 189 lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
190 wrmsr(cmd->addr.msr.reg, lo, hi); 190 wrmsr(cmd->addr.msr.reg, lo, hi);
191 break; 191 break;
192 case SYSTEM_IO_CAPABLE: 192 case SYSTEM_IO_CAPABLE:
193 acpi_os_write_port((acpi_io_address)cmd->addr.io.port, 193 acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
194 cmd->val, 194 cmd->val,
195 (u32)cmd->addr.io.bit_width); 195 (u32)cmd->addr.io.bit_width);
196 break; 196 break;
197 default: 197 default:
198 break; 198 break;
199 } 199 }
200 } 200 }
201 201
202 static void drv_read(struct drv_cmd *cmd) 202 static void drv_read(struct drv_cmd *cmd)
203 { 203 {
204 cmd->val = 0; 204 cmd->val = 0;
205 205
206 smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); 206 smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1);
207 } 207 }
208 208
209 static void drv_write(struct drv_cmd *cmd) 209 static void drv_write(struct drv_cmd *cmd)
210 { 210 {
211 int this_cpu; 211 int this_cpu;
212 212
213 this_cpu = get_cpu(); 213 this_cpu = get_cpu();
214 if (cpumask_test_cpu(this_cpu, cmd->mask)) 214 if (cpumask_test_cpu(this_cpu, cmd->mask))
215 do_drv_write(cmd); 215 do_drv_write(cmd);
216 smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); 216 smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
217 put_cpu(); 217 put_cpu();
218 } 218 }
219 219
220 static u32 get_cur_val(const struct cpumask *mask) 220 static u32 get_cur_val(const struct cpumask *mask)
221 { 221 {
222 struct acpi_processor_performance *perf; 222 struct acpi_processor_performance *perf;
223 struct drv_cmd cmd; 223 struct drv_cmd cmd;
224 224
225 if (unlikely(cpumask_empty(mask))) 225 if (unlikely(cpumask_empty(mask)))
226 return 0; 226 return 0;
227 227
228 switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { 228 switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) {
229 case SYSTEM_INTEL_MSR_CAPABLE: 229 case SYSTEM_INTEL_MSR_CAPABLE:
230 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 230 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
231 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; 231 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
232 break; 232 break;
233 case SYSTEM_IO_CAPABLE: 233 case SYSTEM_IO_CAPABLE:
234 cmd.type = SYSTEM_IO_CAPABLE; 234 cmd.type = SYSTEM_IO_CAPABLE;
235 perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; 235 perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data;
236 cmd.addr.io.port = perf->control_register.address; 236 cmd.addr.io.port = perf->control_register.address;
237 cmd.addr.io.bit_width = perf->control_register.bit_width; 237 cmd.addr.io.bit_width = perf->control_register.bit_width;
238 break; 238 break;
239 default: 239 default:
240 return 0; 240 return 0;
241 } 241 }
242 242
243 cmd.mask = mask; 243 cmd.mask = mask;
244 drv_read(&cmd); 244 drv_read(&cmd);
245 245
246 dprintk("get_cur_val = %u\n", cmd.val); 246 dprintk("get_cur_val = %u\n", cmd.val);
247 247
248 return cmd.val; 248 return cmd.val;
249 } 249 }
250 250
251 struct perf_pair { 251 struct perf_pair {
252 union { 252 union {
253 struct { 253 struct {
254 u32 lo; 254 u32 lo;
255 u32 hi; 255 u32 hi;
256 } split; 256 } split;
257 u64 whole; 257 u64 whole;
258 } aperf, mperf; 258 } aperf, mperf;
259 }; 259 };
260 260
261 /* Called via smp_call_function_single(), on the target CPU */ 261 /* Called via smp_call_function_single(), on the target CPU */
262 static void read_measured_perf_ctrs(void *_cur) 262 static void read_measured_perf_ctrs(void *_cur)
263 { 263 {
264 struct perf_pair *cur = _cur; 264 struct perf_pair *cur = _cur;
265 265
266 rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); 266 rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
267 rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); 267 rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
268 } 268 }
269 269
270 /* 270 /*
271 * Return the measured active (C0) frequency on this CPU since last call 271 * Return the measured active (C0) frequency on this CPU since last call
272 * to this function. 272 * to this function.
273 * Input: cpu number 273 * Input: cpu number
274 * Return: Average CPU frequency in terms of max frequency (zero on error) 274 * Return: Average CPU frequency in terms of max frequency (zero on error)
275 * 275 *
276 * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance 276 * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
277 * over a period of time, while CPU is in C0 state. 277 * over a period of time, while CPU is in C0 state.
278 * IA32_MPERF counts at the rate of max advertised frequency 278 * IA32_MPERF counts at the rate of max advertised frequency
279 * IA32_APERF counts at the rate of actual CPU frequency 279 * IA32_APERF counts at the rate of actual CPU frequency
280 * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and 280 * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
281 * no meaning should be associated with absolute values of these MSRs. 281 * no meaning should be associated with absolute values of these MSRs.
282 */ 282 */
283 static unsigned int get_measured_perf(struct cpufreq_policy *policy, 283 static unsigned int get_measured_perf(struct cpufreq_policy *policy,
284 unsigned int cpu) 284 unsigned int cpu)
285 { 285 {
286 struct perf_pair readin, cur; 286 struct perf_pair readin, cur;
287 unsigned int perf_percent; 287 unsigned int perf_percent;
288 unsigned int retval; 288 unsigned int retval;
289 289
290 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) 290 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
291 return 0; 291 return 0;
292 292
293 cur.aperf.whole = readin.aperf.whole - 293 cur.aperf.whole = readin.aperf.whole -
294 per_cpu(msr_data, cpu).saved_aperf; 294 per_cpu(msr_data, cpu).saved_aperf;
295 cur.mperf.whole = readin.mperf.whole - 295 cur.mperf.whole = readin.mperf.whole -
296 per_cpu(msr_data, cpu).saved_mperf; 296 per_cpu(msr_data, cpu).saved_mperf;
297 per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; 297 per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
298 per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; 298 per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
299 299
300 #ifdef __i386__ 300 #ifdef __i386__
301 /* 301 /*
302 * We dont want to do 64 bit divide with 32 bit kernel 302 * We dont want to do 64 bit divide with 32 bit kernel
303 * Get an approximate value. Return failure in case we cannot get 303 * Get an approximate value. Return failure in case we cannot get
304 * an approximate value. 304 * an approximate value.
305 */ 305 */
306 if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { 306 if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
307 int shift_count; 307 int shift_count;
308 u32 h; 308 u32 h;
309 309
310 h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); 310 h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
311 shift_count = fls(h); 311 shift_count = fls(h);
312 312
313 cur.aperf.whole >>= shift_count; 313 cur.aperf.whole >>= shift_count;
314 cur.mperf.whole >>= shift_count; 314 cur.mperf.whole >>= shift_count;
315 } 315 }
316 316
317 if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { 317 if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
318 int shift_count = 7; 318 int shift_count = 7;
319 cur.aperf.split.lo >>= shift_count; 319 cur.aperf.split.lo >>= shift_count;
320 cur.mperf.split.lo >>= shift_count; 320 cur.mperf.split.lo >>= shift_count;
321 } 321 }
322 322
323 if (cur.aperf.split.lo && cur.mperf.split.lo) 323 if (cur.aperf.split.lo && cur.mperf.split.lo)
324 perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; 324 perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
325 else 325 else
326 perf_percent = 0; 326 perf_percent = 0;
327 327
328 #else 328 #else
329 if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { 329 if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
330 int shift_count = 7; 330 int shift_count = 7;
331 cur.aperf.whole >>= shift_count; 331 cur.aperf.whole >>= shift_count;
332 cur.mperf.whole >>= shift_count; 332 cur.mperf.whole >>= shift_count;
333 } 333 }
334 334
335 if (cur.aperf.whole && cur.mperf.whole) 335 if (cur.aperf.whole && cur.mperf.whole)
336 perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; 336 perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
337 else 337 else
338 perf_percent = 0; 338 perf_percent = 0;
339 339
340 #endif 340 #endif
341 341
342 retval = (policy->cpuinfo.max_freq * perf_percent) / 100; 342 retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
343 343
344 return retval; 344 return retval;
345 } 345 }
346 346
347 static unsigned int get_cur_freq_on_cpu(unsigned int cpu) 347 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
348 { 348 {
349 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); 349 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
350 unsigned int freq; 350 unsigned int freq;
351 unsigned int cached_freq; 351 unsigned int cached_freq;
352 352
353 dprintk("get_cur_freq_on_cpu (%d)\n", cpu); 353 dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
354 354
355 if (unlikely(data == NULL || 355 if (unlikely(data == NULL ||
356 data->acpi_data == NULL || data->freq_table == NULL)) { 356 data->acpi_data == NULL || data->freq_table == NULL)) {
357 return 0; 357 return 0;
358 } 358 }
359 359
360 cached_freq = data->freq_table[data->acpi_data->state].frequency; 360 cached_freq = data->freq_table[data->acpi_data->state].frequency;
361 freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); 361 freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
362 if (freq != cached_freq) { 362 if (freq != cached_freq) {
363 /* 363 /*
364 * The dreaded BIOS frequency change behind our back. 364 * The dreaded BIOS frequency change behind our back.
365 * Force set the frequency on next target call. 365 * Force set the frequency on next target call.
366 */ 366 */
367 data->resume = 1; 367 data->resume = 1;
368 } 368 }
369 369
370 dprintk("cur freq = %u\n", freq); 370 dprintk("cur freq = %u\n", freq);
371 371
372 return freq; 372 return freq;
373 } 373 }
374 374
375 static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, 375 static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
376 struct acpi_cpufreq_data *data) 376 struct acpi_cpufreq_data *data)
377 { 377 {
378 unsigned int cur_freq; 378 unsigned int cur_freq;
379 unsigned int i; 379 unsigned int i;
380 380
381 for (i = 0; i < 100; i++) { 381 for (i = 0; i < 100; i++) {
382 cur_freq = extract_freq(get_cur_val(mask), data); 382 cur_freq = extract_freq(get_cur_val(mask), data);
383 if (cur_freq == freq) 383 if (cur_freq == freq)
384 return 1; 384 return 1;
385 udelay(10); 385 udelay(10);
386 } 386 }
387 return 0; 387 return 0;
388 } 388 }
389 389
390 static int acpi_cpufreq_target(struct cpufreq_policy *policy, 390 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
391 unsigned int target_freq, unsigned int relation) 391 unsigned int target_freq, unsigned int relation)
392 { 392 {
393 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 393 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
394 struct acpi_processor_performance *perf; 394 struct acpi_processor_performance *perf;
395 struct cpufreq_freqs freqs; 395 struct cpufreq_freqs freqs;
396 struct drv_cmd cmd; 396 struct drv_cmd cmd;
397 unsigned int next_state = 0; /* Index into freq_table */ 397 unsigned int next_state = 0; /* Index into freq_table */
398 unsigned int next_perf_state = 0; /* Index into perf table */ 398 unsigned int next_perf_state = 0; /* Index into perf table */
399 unsigned int i; 399 unsigned int i;
400 int result = 0; 400 int result = 0;
401 struct power_trace it; 401 struct power_trace it;
402 402
403 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 403 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
404 404
405 if (unlikely(data == NULL || 405 if (unlikely(data == NULL ||
406 data->acpi_data == NULL || data->freq_table == NULL)) { 406 data->acpi_data == NULL || data->freq_table == NULL)) {
407 return -ENODEV; 407 return -ENODEV;
408 } 408 }
409 409
410 perf = data->acpi_data; 410 perf = data->acpi_data;
411 result = cpufreq_frequency_table_target(policy, 411 result = cpufreq_frequency_table_target(policy,
412 data->freq_table, 412 data->freq_table,
413 target_freq, 413 target_freq,
414 relation, &next_state); 414 relation, &next_state);
415 if (unlikely(result)) { 415 if (unlikely(result)) {
416 result = -ENODEV; 416 result = -ENODEV;
417 goto out; 417 goto out;
418 } 418 }
419 419
420 next_perf_state = data->freq_table[next_state].index; 420 next_perf_state = data->freq_table[next_state].index;
421 if (perf->state == next_perf_state) { 421 if (perf->state == next_perf_state) {
422 if (unlikely(data->resume)) { 422 if (unlikely(data->resume)) {
423 dprintk("Called after resume, resetting to P%d\n", 423 dprintk("Called after resume, resetting to P%d\n",
424 next_perf_state); 424 next_perf_state);
425 data->resume = 0; 425 data->resume = 0;
426 } else { 426 } else {
427 dprintk("Already at target state (P%d)\n", 427 dprintk("Already at target state (P%d)\n",
428 next_perf_state); 428 next_perf_state);
429 goto out; 429 goto out;
430 } 430 }
431 } 431 }
432 432
433 trace_power_mark(&it, POWER_PSTATE, next_perf_state); 433 trace_power_mark(&it, POWER_PSTATE, next_perf_state);
434 434
435 switch (data->cpu_feature) { 435 switch (data->cpu_feature) {
436 case SYSTEM_INTEL_MSR_CAPABLE: 436 case SYSTEM_INTEL_MSR_CAPABLE:
437 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 437 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
438 cmd.addr.msr.reg = MSR_IA32_PERF_CTL; 438 cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
439 cmd.val = (u32) perf->states[next_perf_state].control; 439 cmd.val = (u32) perf->states[next_perf_state].control;
440 break; 440 break;
441 case SYSTEM_IO_CAPABLE: 441 case SYSTEM_IO_CAPABLE:
442 cmd.type = SYSTEM_IO_CAPABLE; 442 cmd.type = SYSTEM_IO_CAPABLE;
443 cmd.addr.io.port = perf->control_register.address; 443 cmd.addr.io.port = perf->control_register.address;
444 cmd.addr.io.bit_width = perf->control_register.bit_width; 444 cmd.addr.io.bit_width = perf->control_register.bit_width;
445 cmd.val = (u32) perf->states[next_perf_state].control; 445 cmd.val = (u32) perf->states[next_perf_state].control;
446 break; 446 break;
447 default: 447 default:
448 result = -ENODEV; 448 result = -ENODEV;
449 goto out; 449 goto out;
450 } 450 }
451 451
452 /* cpufreq holds the hotplug lock, so we are safe from here on */ 452 /* cpufreq holds the hotplug lock, so we are safe from here on */
453 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) 453 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
454 cmd.mask = policy->cpus; 454 cmd.mask = policy->cpus;
455 else 455 else
456 cmd.mask = cpumask_of(policy->cpu); 456 cmd.mask = cpumask_of(policy->cpu);
457 457
458 freqs.old = perf->states[perf->state].core_frequency * 1000; 458 freqs.old = perf->states[perf->state].core_frequency * 1000;
459 freqs.new = data->freq_table[next_state].frequency; 459 freqs.new = data->freq_table[next_state].frequency;
460 for_each_cpu(i, cmd.mask) { 460 for_each_cpu(i, cmd.mask) {
461 freqs.cpu = i; 461 freqs.cpu = i;
462 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 462 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
463 } 463 }
464 464
465 drv_write(&cmd); 465 drv_write(&cmd);
466 466
467 if (acpi_pstate_strict) { 467 if (acpi_pstate_strict) {
468 if (!check_freqs(cmd.mask, freqs.new, data)) { 468 if (!check_freqs(cmd.mask, freqs.new, data)) {
469 dprintk("acpi_cpufreq_target failed (%d)\n", 469 dprintk("acpi_cpufreq_target failed (%d)\n",
470 policy->cpu); 470 policy->cpu);
471 result = -EAGAIN; 471 result = -EAGAIN;
472 goto out; 472 goto out;
473 } 473 }
474 } 474 }
475 475
476 for_each_cpu(i, cmd.mask) { 476 for_each_cpu(i, cmd.mask) {
477 freqs.cpu = i; 477 freqs.cpu = i;
478 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 478 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
479 } 479 }
480 perf->state = next_perf_state; 480 perf->state = next_perf_state;
481 481
482 out: 482 out:
483 return result; 483 return result;
484 } 484 }
485 485
486 static int acpi_cpufreq_verify(struct cpufreq_policy *policy) 486 static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
487 { 487 {
488 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 488 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
489 489
490 dprintk("acpi_cpufreq_verify\n"); 490 dprintk("acpi_cpufreq_verify\n");
491 491
492 return cpufreq_frequency_table_verify(policy, data->freq_table); 492 return cpufreq_frequency_table_verify(policy, data->freq_table);
493 } 493 }
494 494
495 static unsigned long 495 static unsigned long
496 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) 496 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
497 { 497 {
498 struct acpi_processor_performance *perf = data->acpi_data; 498 struct acpi_processor_performance *perf = data->acpi_data;
499 499
500 if (cpu_khz) { 500 if (cpu_khz) {
501 /* search the closest match to cpu_khz */ 501 /* search the closest match to cpu_khz */
502 unsigned int i; 502 unsigned int i;
503 unsigned long freq; 503 unsigned long freq;
504 unsigned long freqn = perf->states[0].core_frequency * 1000; 504 unsigned long freqn = perf->states[0].core_frequency * 1000;
505 505
506 for (i = 0; i < (perf->state_count-1); i++) { 506 for (i = 0; i < (perf->state_count-1); i++) {
507 freq = freqn; 507 freq = freqn;
508 freqn = perf->states[i+1].core_frequency * 1000; 508 freqn = perf->states[i+1].core_frequency * 1000;
509 if ((2 * cpu_khz) > (freqn + freq)) { 509 if ((2 * cpu_khz) > (freqn + freq)) {
510 perf->state = i; 510 perf->state = i;
511 return freq; 511 return freq;
512 } 512 }
513 } 513 }
514 perf->state = perf->state_count-1; 514 perf->state = perf->state_count-1;
515 return freqn; 515 return freqn;
516 } else { 516 } else {
517 /* assume CPU is at P0... */ 517 /* assume CPU is at P0... */
518 perf->state = 0; 518 perf->state = 0;
519 return perf->states[0].core_frequency * 1000; 519 return perf->states[0].core_frequency * 1000;
520 } 520 }
521 } 521 }
522 522
523 static void free_acpi_perf_data(void) 523 static void free_acpi_perf_data(void)
524 { 524 {
525 unsigned int i; 525 unsigned int i;
526 526
527 /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ 527 /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
528 for_each_possible_cpu(i) 528 for_each_possible_cpu(i)
529 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) 529 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
530 ->shared_cpu_map); 530 ->shared_cpu_map);
531 free_percpu(acpi_perf_data); 531 free_percpu(acpi_perf_data);
532 } 532 }
533 533
534 /* 534 /*
535 * acpi_cpufreq_early_init - initialize ACPI P-States library 535 * acpi_cpufreq_early_init - initialize ACPI P-States library
536 * 536 *
537 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) 537 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
538 * in order to determine correct frequency and voltage pairings. We can 538 * in order to determine correct frequency and voltage pairings. We can
539 * do _PDC and _PSD and find out the processor dependency for the 539 * do _PDC and _PSD and find out the processor dependency for the
540 * actual init that will happen later... 540 * actual init that will happen later...
541 */ 541 */
542 static int __init acpi_cpufreq_early_init(void) 542 static int __init acpi_cpufreq_early_init(void)
543 { 543 {
544 unsigned int i; 544 unsigned int i;
545 dprintk("acpi_cpufreq_early_init\n"); 545 dprintk("acpi_cpufreq_early_init\n");
546 546
547 acpi_perf_data = alloc_percpu(struct acpi_processor_performance); 547 acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
548 if (!acpi_perf_data) { 548 if (!acpi_perf_data) {
549 dprintk("Memory allocation error for acpi_perf_data.\n"); 549 dprintk("Memory allocation error for acpi_perf_data.\n");
550 return -ENOMEM; 550 return -ENOMEM;
551 } 551 }
552 for_each_possible_cpu(i) { 552 for_each_possible_cpu(i) {
553 if (!alloc_cpumask_var_node( 553 if (!zalloc_cpumask_var_node(
554 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, 554 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
555 GFP_KERNEL, cpu_to_node(i))) { 555 GFP_KERNEL, cpu_to_node(i))) {
556 556
557 /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ 557 /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
558 free_acpi_perf_data(); 558 free_acpi_perf_data();
559 return -ENOMEM; 559 return -ENOMEM;
560 } 560 }
561 } 561 }
562 562
563 /* Do initialization in ACPI core */ 563 /* Do initialization in ACPI core */
564 acpi_processor_preregister_performance(acpi_perf_data); 564 acpi_processor_preregister_performance(acpi_perf_data);
565 return 0; 565 return 0;
566 } 566 }
567 567
568 #ifdef CONFIG_SMP 568 #ifdef CONFIG_SMP
569 /* 569 /*
570 * Some BIOSes do SW_ANY coordination internally, either set it up in hw 570 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
571 * or do it in BIOS firmware and won't inform about it to OS. If not 571 * or do it in BIOS firmware and won't inform about it to OS. If not
572 * detected, this has a side effect of making CPU run at a different speed 572 * detected, this has a side effect of making CPU run at a different speed
573 * than OS intended it to run at. Detect it and handle it cleanly. 573 * than OS intended it to run at. Detect it and handle it cleanly.
574 */ 574 */
575 static int bios_with_sw_any_bug; 575 static int bios_with_sw_any_bug;
576 576
577 static int sw_any_bug_found(const struct dmi_system_id *d) 577 static int sw_any_bug_found(const struct dmi_system_id *d)
578 { 578 {
579 bios_with_sw_any_bug = 1; 579 bios_with_sw_any_bug = 1;
580 return 0; 580 return 0;
581 } 581 }
582 582
583 static const struct dmi_system_id sw_any_bug_dmi_table[] = { 583 static const struct dmi_system_id sw_any_bug_dmi_table[] = {
584 { 584 {
585 .callback = sw_any_bug_found, 585 .callback = sw_any_bug_found,
586 .ident = "Supermicro Server X6DLP", 586 .ident = "Supermicro Server X6DLP",
587 .matches = { 587 .matches = {
588 DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), 588 DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
589 DMI_MATCH(DMI_BIOS_VERSION, "080010"), 589 DMI_MATCH(DMI_BIOS_VERSION, "080010"),
590 DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), 590 DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
591 }, 591 },
592 }, 592 },
593 { } 593 { }
594 }; 594 };
595 #endif 595 #endif
596 596
597 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) 597 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
598 { 598 {
599 unsigned int i; 599 unsigned int i;
600 unsigned int valid_states = 0; 600 unsigned int valid_states = 0;
601 unsigned int cpu = policy->cpu; 601 unsigned int cpu = policy->cpu;
602 struct acpi_cpufreq_data *data; 602 struct acpi_cpufreq_data *data;
603 unsigned int result = 0; 603 unsigned int result = 0;
604 struct cpuinfo_x86 *c = &cpu_data(policy->cpu); 604 struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
605 struct acpi_processor_performance *perf; 605 struct acpi_processor_performance *perf;
606 606
607 dprintk("acpi_cpufreq_cpu_init\n"); 607 dprintk("acpi_cpufreq_cpu_init\n");
608 608
609 data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); 609 data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
610 if (!data) 610 if (!data)
611 return -ENOMEM; 611 return -ENOMEM;
612 612
613 data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); 613 data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
614 per_cpu(drv_data, cpu) = data; 614 per_cpu(drv_data, cpu) = data;
615 615
616 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) 616 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
617 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; 617 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
618 618
619 result = acpi_processor_register_performance(data->acpi_data, cpu); 619 result = acpi_processor_register_performance(data->acpi_data, cpu);
620 if (result) 620 if (result)
621 goto err_free; 621 goto err_free;
622 622
623 perf = data->acpi_data; 623 perf = data->acpi_data;
624 policy->shared_type = perf->shared_type; 624 policy->shared_type = perf->shared_type;
625 625
626 /* 626 /*
627 * Will let policy->cpus know about dependency only when software 627 * Will let policy->cpus know about dependency only when software
628 * coordination is required. 628 * coordination is required.
629 */ 629 */
630 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || 630 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
631 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { 631 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
632 cpumask_copy(policy->cpus, perf->shared_cpu_map); 632 cpumask_copy(policy->cpus, perf->shared_cpu_map);
633 } 633 }
634 cpumask_copy(policy->related_cpus, perf->shared_cpu_map); 634 cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
635 635
636 #ifdef CONFIG_SMP 636 #ifdef CONFIG_SMP
637 dmi_check_system(sw_any_bug_dmi_table); 637 dmi_check_system(sw_any_bug_dmi_table);
638 if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { 638 if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
639 policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; 639 policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
640 cpumask_copy(policy->cpus, cpu_core_mask(cpu)); 640 cpumask_copy(policy->cpus, cpu_core_mask(cpu));
641 } 641 }
642 #endif 642 #endif
643 643
644 /* capability check */ 644 /* capability check */
645 if (perf->state_count <= 1) { 645 if (perf->state_count <= 1) {
646 dprintk("No P-States\n"); 646 dprintk("No P-States\n");
647 result = -ENODEV; 647 result = -ENODEV;
648 goto err_unreg; 648 goto err_unreg;
649 } 649 }
650 650
651 if (perf->control_register.space_id != perf->status_register.space_id) { 651 if (perf->control_register.space_id != perf->status_register.space_id) {
652 result = -ENODEV; 652 result = -ENODEV;
653 goto err_unreg; 653 goto err_unreg;
654 } 654 }
655 655
656 switch (perf->control_register.space_id) { 656 switch (perf->control_register.space_id) {
657 case ACPI_ADR_SPACE_SYSTEM_IO: 657 case ACPI_ADR_SPACE_SYSTEM_IO:
658 dprintk("SYSTEM IO addr space\n"); 658 dprintk("SYSTEM IO addr space\n");
659 data->cpu_feature = SYSTEM_IO_CAPABLE; 659 data->cpu_feature = SYSTEM_IO_CAPABLE;
660 break; 660 break;
661 case ACPI_ADR_SPACE_FIXED_HARDWARE: 661 case ACPI_ADR_SPACE_FIXED_HARDWARE:
662 dprintk("HARDWARE addr space\n"); 662 dprintk("HARDWARE addr space\n");
663 if (!check_est_cpu(cpu)) { 663 if (!check_est_cpu(cpu)) {
664 result = -ENODEV; 664 result = -ENODEV;
665 goto err_unreg; 665 goto err_unreg;
666 } 666 }
667 data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; 667 data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
668 break; 668 break;
669 default: 669 default:
670 dprintk("Unknown addr space %d\n", 670 dprintk("Unknown addr space %d\n",
671 (u32) (perf->control_register.space_id)); 671 (u32) (perf->control_register.space_id));
672 result = -ENODEV; 672 result = -ENODEV;
673 goto err_unreg; 673 goto err_unreg;
674 } 674 }
675 675
676 data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * 676 data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
677 (perf->state_count+1), GFP_KERNEL); 677 (perf->state_count+1), GFP_KERNEL);
678 if (!data->freq_table) { 678 if (!data->freq_table) {
679 result = -ENOMEM; 679 result = -ENOMEM;
680 goto err_unreg; 680 goto err_unreg;
681 } 681 }
682 682
683 /* detect transition latency */ 683 /* detect transition latency */
684 policy->cpuinfo.transition_latency = 0; 684 policy->cpuinfo.transition_latency = 0;
685 for (i = 0; i < perf->state_count; i++) { 685 for (i = 0; i < perf->state_count; i++) {
686 if ((perf->states[i].transition_latency * 1000) > 686 if ((perf->states[i].transition_latency * 1000) >
687 policy->cpuinfo.transition_latency) 687 policy->cpuinfo.transition_latency)
688 policy->cpuinfo.transition_latency = 688 policy->cpuinfo.transition_latency =
689 perf->states[i].transition_latency * 1000; 689 perf->states[i].transition_latency * 1000;
690 } 690 }
691 691
692 /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ 692 /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
693 if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && 693 if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
694 policy->cpuinfo.transition_latency > 20 * 1000) { 694 policy->cpuinfo.transition_latency > 20 * 1000) {
695 policy->cpuinfo.transition_latency = 20 * 1000; 695 policy->cpuinfo.transition_latency = 20 * 1000;
696 printk_once(KERN_INFO 696 printk_once(KERN_INFO
697 "P-state transition latency capped at 20 uS\n"); 697 "P-state transition latency capped at 20 uS\n");
698 } 698 }
699 699
700 /* table init */ 700 /* table init */
701 for (i = 0; i < perf->state_count; i++) { 701 for (i = 0; i < perf->state_count; i++) {
702 if (i > 0 && perf->states[i].core_frequency >= 702 if (i > 0 && perf->states[i].core_frequency >=
703 data->freq_table[valid_states-1].frequency / 1000) 703 data->freq_table[valid_states-1].frequency / 1000)
704 continue; 704 continue;
705 705
706 data->freq_table[valid_states].index = i; 706 data->freq_table[valid_states].index = i;
707 data->freq_table[valid_states].frequency = 707 data->freq_table[valid_states].frequency =
708 perf->states[i].core_frequency * 1000; 708 perf->states[i].core_frequency * 1000;
709 valid_states++; 709 valid_states++;
710 } 710 }
711 data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; 711 data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
712 perf->state = 0; 712 perf->state = 0;
713 713
714 result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); 714 result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
715 if (result) 715 if (result)
716 goto err_freqfree; 716 goto err_freqfree;
717 717
718 if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) 718 if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
719 printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); 719 printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
720 720
721 switch (perf->control_register.space_id) { 721 switch (perf->control_register.space_id) {
722 case ACPI_ADR_SPACE_SYSTEM_IO: 722 case ACPI_ADR_SPACE_SYSTEM_IO:
723 /* Current speed is unknown and not detectable by IO port */ 723 /* Current speed is unknown and not detectable by IO port */
724 policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); 724 policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
725 break; 725 break;
726 case ACPI_ADR_SPACE_FIXED_HARDWARE: 726 case ACPI_ADR_SPACE_FIXED_HARDWARE:
727 acpi_cpufreq_driver.get = get_cur_freq_on_cpu; 727 acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
728 policy->cur = get_cur_freq_on_cpu(cpu); 728 policy->cur = get_cur_freq_on_cpu(cpu);
729 break; 729 break;
730 default: 730 default:
731 break; 731 break;
732 } 732 }
733 733
734 /* notify BIOS that we exist */ 734 /* notify BIOS that we exist */
735 acpi_processor_notify_smm(THIS_MODULE); 735 acpi_processor_notify_smm(THIS_MODULE);
736 736
737 /* Check for APERF/MPERF support in hardware */ 737 /* Check for APERF/MPERF support in hardware */
738 if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { 738 if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
739 unsigned int ecx; 739 unsigned int ecx;
740 ecx = cpuid_ecx(6); 740 ecx = cpuid_ecx(6);
741 if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) 741 if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
742 acpi_cpufreq_driver.getavg = get_measured_perf; 742 acpi_cpufreq_driver.getavg = get_measured_perf;
743 } 743 }
744 744
745 dprintk("CPU%u - ACPI performance management activated.\n", cpu); 745 dprintk("CPU%u - ACPI performance management activated.\n", cpu);
746 for (i = 0; i < perf->state_count; i++) 746 for (i = 0; i < perf->state_count; i++)
747 dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", 747 dprintk(" %cP%d: %d MHz, %d mW, %d uS\n",
748 (i == perf->state ? '*' : ' '), i, 748 (i == perf->state ? '*' : ' '), i,
749 (u32) perf->states[i].core_frequency, 749 (u32) perf->states[i].core_frequency,
750 (u32) perf->states[i].power, 750 (u32) perf->states[i].power,
751 (u32) perf->states[i].transition_latency); 751 (u32) perf->states[i].transition_latency);
752 752
753 cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); 753 cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
754 754
755 /* 755 /*
756 * the first call to ->target() should result in us actually 756 * the first call to ->target() should result in us actually
757 * writing something to the appropriate registers. 757 * writing something to the appropriate registers.
758 */ 758 */
759 data->resume = 1; 759 data->resume = 1;
760 760
761 return result; 761 return result;
762 762
763 err_freqfree: 763 err_freqfree:
764 kfree(data->freq_table); 764 kfree(data->freq_table);
765 err_unreg: 765 err_unreg:
766 acpi_processor_unregister_performance(perf, cpu); 766 acpi_processor_unregister_performance(perf, cpu);
767 err_free: 767 err_free:
768 kfree(data); 768 kfree(data);
769 per_cpu(drv_data, cpu) = NULL; 769 per_cpu(drv_data, cpu) = NULL;
770 770
771 return result; 771 return result;
772 } 772 }
773 773
774 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) 774 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
775 { 775 {
776 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 776 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
777 777
778 dprintk("acpi_cpufreq_cpu_exit\n"); 778 dprintk("acpi_cpufreq_cpu_exit\n");
779 779
780 if (data) { 780 if (data) {
781 cpufreq_frequency_table_put_attr(policy->cpu); 781 cpufreq_frequency_table_put_attr(policy->cpu);
782 per_cpu(drv_data, policy->cpu) = NULL; 782 per_cpu(drv_data, policy->cpu) = NULL;
783 acpi_processor_unregister_performance(data->acpi_data, 783 acpi_processor_unregister_performance(data->acpi_data,
784 policy->cpu); 784 policy->cpu);
785 kfree(data); 785 kfree(data);
786 } 786 }
787 787
788 return 0; 788 return 0;
789 } 789 }
790 790
791 static int acpi_cpufreq_resume(struct cpufreq_policy *policy) 791 static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
792 { 792 {
793 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 793 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
794 794
795 dprintk("acpi_cpufreq_resume\n"); 795 dprintk("acpi_cpufreq_resume\n");
796 796
797 data->resume = 1; 797 data->resume = 1;
798 798
799 return 0; 799 return 0;
800 } 800 }
801 801
802 static struct freq_attr *acpi_cpufreq_attr[] = { 802 static struct freq_attr *acpi_cpufreq_attr[] = {
803 &cpufreq_freq_attr_scaling_available_freqs, 803 &cpufreq_freq_attr_scaling_available_freqs,
804 NULL, 804 NULL,
805 }; 805 };
806 806
807 static struct cpufreq_driver acpi_cpufreq_driver = { 807 static struct cpufreq_driver acpi_cpufreq_driver = {
808 .verify = acpi_cpufreq_verify, 808 .verify = acpi_cpufreq_verify,
809 .target = acpi_cpufreq_target, 809 .target = acpi_cpufreq_target,
810 .init = acpi_cpufreq_cpu_init, 810 .init = acpi_cpufreq_cpu_init,
811 .exit = acpi_cpufreq_cpu_exit, 811 .exit = acpi_cpufreq_cpu_exit,
812 .resume = acpi_cpufreq_resume, 812 .resume = acpi_cpufreq_resume,
813 .name = "acpi-cpufreq", 813 .name = "acpi-cpufreq",
814 .owner = THIS_MODULE, 814 .owner = THIS_MODULE,
815 .attr = acpi_cpufreq_attr, 815 .attr = acpi_cpufreq_attr,
816 }; 816 };
817 817
818 static int __init acpi_cpufreq_init(void) 818 static int __init acpi_cpufreq_init(void)
819 { 819 {
820 int ret; 820 int ret;
821 821
822 if (acpi_disabled) 822 if (acpi_disabled)
823 return 0; 823 return 0;
824 824
825 dprintk("acpi_cpufreq_init\n"); 825 dprintk("acpi_cpufreq_init\n");
826 826
827 ret = acpi_cpufreq_early_init(); 827 ret = acpi_cpufreq_early_init();
828 if (ret) 828 if (ret)
829 return ret; 829 return ret;
830 830
831 ret = cpufreq_register_driver(&acpi_cpufreq_driver); 831 ret = cpufreq_register_driver(&acpi_cpufreq_driver);
832 if (ret) 832 if (ret)
833 free_acpi_perf_data(); 833 free_acpi_perf_data();
834 834
835 return ret; 835 return ret;
836 } 836 }
837 837
838 static void __exit acpi_cpufreq_exit(void) 838 static void __exit acpi_cpufreq_exit(void)
839 { 839 {
840 dprintk("acpi_cpufreq_exit\n"); 840 dprintk("acpi_cpufreq_exit\n");
841 841
842 cpufreq_unregister_driver(&acpi_cpufreq_driver); 842 cpufreq_unregister_driver(&acpi_cpufreq_driver);
843 843
844 free_percpu(acpi_perf_data); 844 free_percpu(acpi_perf_data);
845 } 845 }
846 846
847 module_param(acpi_pstate_strict, uint, 0644); 847 module_param(acpi_pstate_strict, uint, 0644);
848 MODULE_PARM_DESC(acpi_pstate_strict, 848 MODULE_PARM_DESC(acpi_pstate_strict,
849 "value 0 or non-zero. non-zero -> strict ACPI checks are " 849 "value 0 or non-zero. non-zero -> strict ACPI checks are "
850 "performed during frequency changes."); 850 "performed during frequency changes.");
851 851
852 late_initcall(acpi_cpufreq_init); 852 late_initcall(acpi_cpufreq_init);
853 module_exit(acpi_cpufreq_exit); 853 module_exit(acpi_cpufreq_exit);
854 854
855 MODULE_ALIAS("acpi"); 855 MODULE_ALIAS("acpi");
856 856
arch/x86/kernel/cpu/cpufreq/powernow-k7.c
1 /* 1 /*
2 * AMD K7 Powernow driver. 2 * AMD K7 Powernow driver.
3 * (C) 2003 Dave Jones on behalf of SuSE Labs. 3 * (C) 2003 Dave Jones on behalf of SuSE Labs.
4 * (C) 2003-2004 Dave Jones <davej@redhat.com> 4 * (C) 2003-2004 Dave Jones <davej@redhat.com>
5 * 5 *
6 * Licensed under the terms of the GNU GPL License version 2. 6 * Licensed under the terms of the GNU GPL License version 2.
7 * Based upon datasheets & sample CPUs kindly provided by AMD. 7 * Based upon datasheets & sample CPUs kindly provided by AMD.
8 * 8 *
9 * Errata 5: 9 * Errata 5:
10 * CPU may fail to execute a FID/VID change in presence of interrupt. 10 * CPU may fail to execute a FID/VID change in presence of interrupt.
11 * - We cli/sti on stepping A0 CPUs around the FID/VID transition. 11 * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
12 * Errata 15: 12 * Errata 15:
13 * CPU with half frequency multipliers may hang upon wakeup from disconnect. 13 * CPU with half frequency multipliers may hang upon wakeup from disconnect.
14 * - We disable half multipliers if ACPI is used on A0 stepping CPUs. 14 * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
15 */ 15 */
16 16
17 #include <linux/kernel.h> 17 #include <linux/kernel.h>
18 #include <linux/module.h> 18 #include <linux/module.h>
19 #include <linux/moduleparam.h> 19 #include <linux/moduleparam.h>
20 #include <linux/init.h> 20 #include <linux/init.h>
21 #include <linux/cpufreq.h> 21 #include <linux/cpufreq.h>
22 #include <linux/slab.h> 22 #include <linux/slab.h>
23 #include <linux/string.h> 23 #include <linux/string.h>
24 #include <linux/dmi.h> 24 #include <linux/dmi.h>
25 #include <linux/timex.h> 25 #include <linux/timex.h>
26 #include <linux/io.h> 26 #include <linux/io.h>
27 27
28 #include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */ 28 #include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */
29 #include <asm/msr.h> 29 #include <asm/msr.h>
30 #include <asm/system.h> 30 #include <asm/system.h>
31 31
32 #ifdef CONFIG_X86_POWERNOW_K7_ACPI 32 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
33 #include <linux/acpi.h> 33 #include <linux/acpi.h>
34 #include <acpi/processor.h> 34 #include <acpi/processor.h>
35 #endif 35 #endif
36 36
37 #include "powernow-k7.h" 37 #include "powernow-k7.h"
38 38
39 #define PFX "powernow: " 39 #define PFX "powernow: "
40 40
41 41
42 struct psb_s { 42 struct psb_s {
43 u8 signature[10]; 43 u8 signature[10];
44 u8 tableversion; 44 u8 tableversion;
45 u8 flags; 45 u8 flags;
46 u16 settlingtime; 46 u16 settlingtime;
47 u8 reserved1; 47 u8 reserved1;
48 u8 numpst; 48 u8 numpst;
49 }; 49 };
50 50
51 struct pst_s { 51 struct pst_s {
52 u32 cpuid; 52 u32 cpuid;
53 u8 fsbspeed; 53 u8 fsbspeed;
54 u8 maxfid; 54 u8 maxfid;
55 u8 startvid; 55 u8 startvid;
56 u8 numpstates; 56 u8 numpstates;
57 }; 57 };
58 58
59 #ifdef CONFIG_X86_POWERNOW_K7_ACPI 59 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
60 union powernow_acpi_control_t { 60 union powernow_acpi_control_t {
61 struct { 61 struct {
62 unsigned long fid:5, 62 unsigned long fid:5,
63 vid:5, 63 vid:5,
64 sgtc:20, 64 sgtc:20,
65 res1:2; 65 res1:2;
66 } bits; 66 } bits;
67 unsigned long val; 67 unsigned long val;
68 }; 68 };
69 #endif 69 #endif
70 70
71 #ifdef CONFIG_CPU_FREQ_DEBUG 71 #ifdef CONFIG_CPU_FREQ_DEBUG
72 /* divide by 1000 to get VCore voltage in V. */ 72 /* divide by 1000 to get VCore voltage in V. */
73 static const int mobile_vid_table[32] = { 73 static const int mobile_vid_table[32] = {
74 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, 74 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
75 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, 75 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
76 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, 76 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
77 1075, 1050, 1025, 1000, 975, 950, 925, 0, 77 1075, 1050, 1025, 1000, 975, 950, 925, 0,
78 }; 78 };
79 #endif 79 #endif
80 80
81 /* divide by 10 to get FID. */ 81 /* divide by 10 to get FID. */
82 static const int fid_codes[32] = { 82 static const int fid_codes[32] = {
83 110, 115, 120, 125, 50, 55, 60, 65, 83 110, 115, 120, 125, 50, 55, 60, 65,
84 70, 75, 80, 85, 90, 95, 100, 105, 84 70, 75, 80, 85, 90, 95, 100, 105,
85 30, 190, 40, 200, 130, 135, 140, 210, 85 30, 190, 40, 200, 130, 135, 140, 210,
86 150, 225, 160, 165, 170, 180, -1, -1, 86 150, 225, 160, 165, 170, 180, -1, -1,
87 }; 87 };
88 88
89 /* This parameter is used in order to force ACPI instead of legacy method for 89 /* This parameter is used in order to force ACPI instead of legacy method for
90 * configuration purpose. 90 * configuration purpose.
91 */ 91 */
92 92
93 static int acpi_force; 93 static int acpi_force;
94 94
95 static struct cpufreq_frequency_table *powernow_table; 95 static struct cpufreq_frequency_table *powernow_table;
96 96
97 static unsigned int can_scale_bus; 97 static unsigned int can_scale_bus;
98 static unsigned int can_scale_vid; 98 static unsigned int can_scale_vid;
99 static unsigned int minimum_speed = -1; 99 static unsigned int minimum_speed = -1;
100 static unsigned int maximum_speed; 100 static unsigned int maximum_speed;
101 static unsigned int number_scales; 101 static unsigned int number_scales;
102 static unsigned int fsb; 102 static unsigned int fsb;
103 static unsigned int latency; 103 static unsigned int latency;
104 static char have_a0; 104 static char have_a0;
105 105
106 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ 106 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
107 "powernow-k7", msg) 107 "powernow-k7", msg)
108 108
109 static int check_fsb(unsigned int fsbspeed) 109 static int check_fsb(unsigned int fsbspeed)
110 { 110 {
111 int delta; 111 int delta;
112 unsigned int f = fsb / 1000; 112 unsigned int f = fsb / 1000;
113 113
114 delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; 114 delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
115 return delta < 5; 115 return delta < 5;
116 } 116 }
117 117
118 static int check_powernow(void) 118 static int check_powernow(void)
119 { 119 {
120 struct cpuinfo_x86 *c = &cpu_data(0); 120 struct cpuinfo_x86 *c = &cpu_data(0);
121 unsigned int maxei, eax, ebx, ecx, edx; 121 unsigned int maxei, eax, ebx, ecx, edx;
122 122
123 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) { 123 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
124 #ifdef MODULE 124 #ifdef MODULE
125 printk(KERN_INFO PFX "This module only works with " 125 printk(KERN_INFO PFX "This module only works with "
126 "AMD K7 CPUs\n"); 126 "AMD K7 CPUs\n");
127 #endif 127 #endif
128 return 0; 128 return 0;
129 } 129 }
130 130
131 /* Get maximum capabilities */ 131 /* Get maximum capabilities */
132 maxei = cpuid_eax(0x80000000); 132 maxei = cpuid_eax(0x80000000);
133 if (maxei < 0x80000007) { /* Any powernow info ? */ 133 if (maxei < 0x80000007) { /* Any powernow info ? */
134 #ifdef MODULE 134 #ifdef MODULE
135 printk(KERN_INFO PFX "No powernow capabilities detected\n"); 135 printk(KERN_INFO PFX "No powernow capabilities detected\n");
136 #endif 136 #endif
137 return 0; 137 return 0;
138 } 138 }
139 139
140 if ((c->x86_model == 6) && (c->x86_mask == 0)) { 140 if ((c->x86_model == 6) && (c->x86_mask == 0)) {
141 printk(KERN_INFO PFX "K7 660[A0] core detected, " 141 printk(KERN_INFO PFX "K7 660[A0] core detected, "
142 "enabling errata workarounds\n"); 142 "enabling errata workarounds\n");
143 have_a0 = 1; 143 have_a0 = 1;
144 } 144 }
145 145
146 cpuid(0x80000007, &eax, &ebx, &ecx, &edx); 146 cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
147 147
148 /* Check we can actually do something before we say anything.*/ 148 /* Check we can actually do something before we say anything.*/
149 if (!(edx & (1 << 1 | 1 << 2))) 149 if (!(edx & (1 << 1 | 1 << 2)))
150 return 0; 150 return 0;
151 151
152 printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); 152 printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
153 153
154 if (edx & 1 << 1) { 154 if (edx & 1 << 1) {
155 printk("frequency"); 155 printk("frequency");
156 can_scale_bus = 1; 156 can_scale_bus = 1;
157 } 157 }
158 158
159 if ((edx & (1 << 1 | 1 << 2)) == 0x6) 159 if ((edx & (1 << 1 | 1 << 2)) == 0x6)
160 printk(" and "); 160 printk(" and ");
161 161
162 if (edx & 1 << 2) { 162 if (edx & 1 << 2) {
163 printk("voltage"); 163 printk("voltage");
164 can_scale_vid = 1; 164 can_scale_vid = 1;
165 } 165 }
166 166
167 printk(".\n"); 167 printk(".\n");
168 return 1; 168 return 1;
169 } 169 }
170 170
171 #ifdef CONFIG_X86_POWERNOW_K7_ACPI 171 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
172 static void invalidate_entry(unsigned int entry) 172 static void invalidate_entry(unsigned int entry)
173 { 173 {
174 powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; 174 powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
175 } 175 }
176 #endif 176 #endif
177 177
178 static int get_ranges(unsigned char *pst) 178 static int get_ranges(unsigned char *pst)
179 { 179 {
180 unsigned int j; 180 unsigned int j;
181 unsigned int speed; 181 unsigned int speed;
182 u8 fid, vid; 182 u8 fid, vid;
183 183
184 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * 184 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
185 (number_scales + 1)), GFP_KERNEL); 185 (number_scales + 1)), GFP_KERNEL);
186 if (!powernow_table) 186 if (!powernow_table)
187 return -ENOMEM; 187 return -ENOMEM;
188 188
189 for (j = 0 ; j < number_scales; j++) { 189 for (j = 0 ; j < number_scales; j++) {
190 fid = *pst++; 190 fid = *pst++;
191 191
192 powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; 192 powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
193 powernow_table[j].index = fid; /* lower 8 bits */ 193 powernow_table[j].index = fid; /* lower 8 bits */
194 194
195 speed = powernow_table[j].frequency; 195 speed = powernow_table[j].frequency;
196 196
197 if ((fid_codes[fid] % 10) == 5) { 197 if ((fid_codes[fid] % 10) == 5) {
198 #ifdef CONFIG_X86_POWERNOW_K7_ACPI 198 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
199 if (have_a0 == 1) 199 if (have_a0 == 1)
200 invalidate_entry(j); 200 invalidate_entry(j);
201 #endif 201 #endif
202 } 202 }
203 203
204 if (speed < minimum_speed) 204 if (speed < minimum_speed)
205 minimum_speed = speed; 205 minimum_speed = speed;
206 if (speed > maximum_speed) 206 if (speed > maximum_speed)
207 maximum_speed = speed; 207 maximum_speed = speed;
208 208
209 vid = *pst++; 209 vid = *pst++;
210 powernow_table[j].index |= (vid << 8); /* upper 8 bits */ 210 powernow_table[j].index |= (vid << 8); /* upper 8 bits */
211 211
212 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " 212 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
213 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, 213 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
214 fid_codes[fid] % 10, speed/1000, vid, 214 fid_codes[fid] % 10, speed/1000, vid,
215 mobile_vid_table[vid]/1000, 215 mobile_vid_table[vid]/1000,
216 mobile_vid_table[vid]%1000); 216 mobile_vid_table[vid]%1000);
217 } 217 }
218 powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; 218 powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
219 powernow_table[number_scales].index = 0; 219 powernow_table[number_scales].index = 0;
220 220
221 return 0; 221 return 0;
222 } 222 }
223 223
224 224
225 static void change_FID(int fid) 225 static void change_FID(int fid)
226 { 226 {
227 union msr_fidvidctl fidvidctl; 227 union msr_fidvidctl fidvidctl;
228 228
229 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); 229 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
230 if (fidvidctl.bits.FID != fid) { 230 if (fidvidctl.bits.FID != fid) {
231 fidvidctl.bits.SGTC = latency; 231 fidvidctl.bits.SGTC = latency;
232 fidvidctl.bits.FID = fid; 232 fidvidctl.bits.FID = fid;
233 fidvidctl.bits.VIDC = 0; 233 fidvidctl.bits.VIDC = 0;
234 fidvidctl.bits.FIDC = 1; 234 fidvidctl.bits.FIDC = 1;
235 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); 235 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
236 } 236 }
237 } 237 }
238 238
239 239
240 static void change_VID(int vid) 240 static void change_VID(int vid)
241 { 241 {
242 union msr_fidvidctl fidvidctl; 242 union msr_fidvidctl fidvidctl;
243 243
244 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); 244 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
245 if (fidvidctl.bits.VID != vid) { 245 if (fidvidctl.bits.VID != vid) {
246 fidvidctl.bits.SGTC = latency; 246 fidvidctl.bits.SGTC = latency;
247 fidvidctl.bits.VID = vid; 247 fidvidctl.bits.VID = vid;
248 fidvidctl.bits.FIDC = 0; 248 fidvidctl.bits.FIDC = 0;
249 fidvidctl.bits.VIDC = 1; 249 fidvidctl.bits.VIDC = 1;
250 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); 250 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
251 } 251 }
252 } 252 }
253 253
254 254
255 static void change_speed(unsigned int index) 255 static void change_speed(unsigned int index)
256 { 256 {
257 u8 fid, vid; 257 u8 fid, vid;
258 struct cpufreq_freqs freqs; 258 struct cpufreq_freqs freqs;
259 union msr_fidvidstatus fidvidstatus; 259 union msr_fidvidstatus fidvidstatus;
260 int cfid; 260 int cfid;
261 261
262 /* fid are the lower 8 bits of the index we stored into 262 /* fid are the lower 8 bits of the index we stored into
263 * the cpufreq frequency table in powernow_decode_bios, 263 * the cpufreq frequency table in powernow_decode_bios,
264 * vid are the upper 8 bits. 264 * vid are the upper 8 bits.
265 */ 265 */
266 266
267 fid = powernow_table[index].index & 0xFF; 267 fid = powernow_table[index].index & 0xFF;
268 vid = (powernow_table[index].index & 0xFF00) >> 8; 268 vid = (powernow_table[index].index & 0xFF00) >> 8;
269 269
270 freqs.cpu = 0; 270 freqs.cpu = 0;
271 271
272 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); 272 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
273 cfid = fidvidstatus.bits.CFID; 273 cfid = fidvidstatus.bits.CFID;
274 freqs.old = fsb * fid_codes[cfid] / 10; 274 freqs.old = fsb * fid_codes[cfid] / 10;
275 275
276 freqs.new = powernow_table[index].frequency; 276 freqs.new = powernow_table[index].frequency;
277 277
278 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 278 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
279 279
280 /* Now do the magic poking into the MSRs. */ 280 /* Now do the magic poking into the MSRs. */
281 281
282 if (have_a0 == 1) /* A0 errata 5 */ 282 if (have_a0 == 1) /* A0 errata 5 */
283 local_irq_disable(); 283 local_irq_disable();
284 284
285 if (freqs.old > freqs.new) { 285 if (freqs.old > freqs.new) {
286 /* Going down, so change FID first */ 286 /* Going down, so change FID first */
287 change_FID(fid); 287 change_FID(fid);
288 change_VID(vid); 288 change_VID(vid);
289 } else { 289 } else {
290 /* Going up, so change VID first */ 290 /* Going up, so change VID first */
291 change_VID(vid); 291 change_VID(vid);
292 change_FID(fid); 292 change_FID(fid);
293 } 293 }
294 294
295 295
296 if (have_a0 == 1) 296 if (have_a0 == 1)
297 local_irq_enable(); 297 local_irq_enable();
298 298
299 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 299 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
300 } 300 }
301 301
302 302
303 #ifdef CONFIG_X86_POWERNOW_K7_ACPI 303 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
304 304
305 static struct acpi_processor_performance *acpi_processor_perf; 305 static struct acpi_processor_performance *acpi_processor_perf;
306 306
307 static int powernow_acpi_init(void) 307 static int powernow_acpi_init(void)
308 { 308 {
309 int i; 309 int i;
310 int retval = 0; 310 int retval = 0;
311 union powernow_acpi_control_t pc; 311 union powernow_acpi_control_t pc;
312 312
313 if (acpi_processor_perf != NULL && powernow_table != NULL) { 313 if (acpi_processor_perf != NULL && powernow_table != NULL) {
314 retval = -EINVAL; 314 retval = -EINVAL;
315 goto err0; 315 goto err0;
316 } 316 }
317 317
318 acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), 318 acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
319 GFP_KERNEL); 319 GFP_KERNEL);
320 if (!acpi_processor_perf) { 320 if (!acpi_processor_perf) {
321 retval = -ENOMEM; 321 retval = -ENOMEM;
322 goto err0; 322 goto err0;
323 } 323 }
324 324
325 if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, 325 if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
326 GFP_KERNEL)) { 326 GFP_KERNEL)) {
327 retval = -ENOMEM; 327 retval = -ENOMEM;
328 goto err05; 328 goto err05;
329 } 329 }
330 330
331 if (acpi_processor_register_performance(acpi_processor_perf, 0)) { 331 if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
332 retval = -EIO; 332 retval = -EIO;
333 goto err1; 333 goto err1;
334 } 334 }
335 335
336 if (acpi_processor_perf->control_register.space_id != 336 if (acpi_processor_perf->control_register.space_id !=
337 ACPI_ADR_SPACE_FIXED_HARDWARE) { 337 ACPI_ADR_SPACE_FIXED_HARDWARE) {
338 retval = -ENODEV; 338 retval = -ENODEV;
339 goto err2; 339 goto err2;
340 } 340 }
341 341
342 if (acpi_processor_perf->status_register.space_id != 342 if (acpi_processor_perf->status_register.space_id !=
343 ACPI_ADR_SPACE_FIXED_HARDWARE) { 343 ACPI_ADR_SPACE_FIXED_HARDWARE) {
344 retval = -ENODEV; 344 retval = -ENODEV;
345 goto err2; 345 goto err2;
346 } 346 }
347 347
348 number_scales = acpi_processor_perf->state_count; 348 number_scales = acpi_processor_perf->state_count;
349 349
350 if (number_scales < 2) { 350 if (number_scales < 2) {
351 retval = -ENODEV; 351 retval = -ENODEV;
352 goto err2; 352 goto err2;
353 } 353 }
354 354
355 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * 355 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
356 (number_scales + 1)), GFP_KERNEL); 356 (number_scales + 1)), GFP_KERNEL);
357 if (!powernow_table) { 357 if (!powernow_table) {
358 retval = -ENOMEM; 358 retval = -ENOMEM;
359 goto err2; 359 goto err2;
360 } 360 }
361 361
362 pc.val = (unsigned long) acpi_processor_perf->states[0].control; 362 pc.val = (unsigned long) acpi_processor_perf->states[0].control;
363 for (i = 0; i < number_scales; i++) { 363 for (i = 0; i < number_scales; i++) {
364 u8 fid, vid; 364 u8 fid, vid;
365 struct acpi_processor_px *state = 365 struct acpi_processor_px *state =
366 &acpi_processor_perf->states[i]; 366 &acpi_processor_perf->states[i];
367 unsigned int speed, speed_mhz; 367 unsigned int speed, speed_mhz;
368 368
369 pc.val = (unsigned long) state->control; 369 pc.val = (unsigned long) state->control;
370 dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", 370 dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
371 i, 371 i,
372 (u32) state->core_frequency, 372 (u32) state->core_frequency,
373 (u32) state->power, 373 (u32) state->power,
374 (u32) state->transition_latency, 374 (u32) state->transition_latency,
375 (u32) state->control, 375 (u32) state->control,
376 pc.bits.sgtc); 376 pc.bits.sgtc);
377 377
378 vid = pc.bits.vid; 378 vid = pc.bits.vid;
379 fid = pc.bits.fid; 379 fid = pc.bits.fid;
380 380
381 powernow_table[i].frequency = fsb * fid_codes[fid] / 10; 381 powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
382 powernow_table[i].index = fid; /* lower 8 bits */ 382 powernow_table[i].index = fid; /* lower 8 bits */
383 powernow_table[i].index |= (vid << 8); /* upper 8 bits */ 383 powernow_table[i].index |= (vid << 8); /* upper 8 bits */
384 384
385 speed = powernow_table[i].frequency; 385 speed = powernow_table[i].frequency;
386 speed_mhz = speed / 1000; 386 speed_mhz = speed / 1000;
387 387
388 /* processor_perflib will multiply the MHz value by 1000 to 388 /* processor_perflib will multiply the MHz value by 1000 to
389 * get a KHz value (e.g. 1266000). However, powernow-k7 works 389 * get a KHz value (e.g. 1266000). However, powernow-k7 works
390 * with true KHz values (e.g. 1266768). To ensure that all 390 * with true KHz values (e.g. 1266768). To ensure that all
391 * powernow frequencies are available, we must ensure that 391 * powernow frequencies are available, we must ensure that
392 * ACPI doesn't restrict them, so we round up the MHz value 392 * ACPI doesn't restrict them, so we round up the MHz value
393 * to ensure that perflib's computed KHz value is greater than 393 * to ensure that perflib's computed KHz value is greater than
394 * or equal to powernow's KHz value. 394 * or equal to powernow's KHz value.
395 */ 395 */
396 if (speed % 1000 > 0) 396 if (speed % 1000 > 0)
397 speed_mhz++; 397 speed_mhz++;
398 398
399 if ((fid_codes[fid] % 10) == 5) { 399 if ((fid_codes[fid] % 10) == 5) {
400 if (have_a0 == 1) 400 if (have_a0 == 1)
401 invalidate_entry(i); 401 invalidate_entry(i);
402 } 402 }
403 403
404 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " 404 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
405 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, 405 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
406 fid_codes[fid] % 10, speed_mhz, vid, 406 fid_codes[fid] % 10, speed_mhz, vid,
407 mobile_vid_table[vid]/1000, 407 mobile_vid_table[vid]/1000,
408 mobile_vid_table[vid]%1000); 408 mobile_vid_table[vid]%1000);
409 409
410 if (state->core_frequency != speed_mhz) { 410 if (state->core_frequency != speed_mhz) {
411 state->core_frequency = speed_mhz; 411 state->core_frequency = speed_mhz;
412 dprintk(" Corrected ACPI frequency to %d\n", 412 dprintk(" Corrected ACPI frequency to %d\n",
413 speed_mhz); 413 speed_mhz);
414 } 414 }
415 415
416 if (latency < pc.bits.sgtc) 416 if (latency < pc.bits.sgtc)
417 latency = pc.bits.sgtc; 417 latency = pc.bits.sgtc;
418 418
419 if (speed < minimum_speed) 419 if (speed < minimum_speed)
420 minimum_speed = speed; 420 minimum_speed = speed;
421 if (speed > maximum_speed) 421 if (speed > maximum_speed)
422 maximum_speed = speed; 422 maximum_speed = speed;
423 } 423 }
424 424
425 powernow_table[i].frequency = CPUFREQ_TABLE_END; 425 powernow_table[i].frequency = CPUFREQ_TABLE_END;
426 powernow_table[i].index = 0; 426 powernow_table[i].index = 0;
427 427
428 /* notify BIOS that we exist */ 428 /* notify BIOS that we exist */
429 acpi_processor_notify_smm(THIS_MODULE); 429 acpi_processor_notify_smm(THIS_MODULE);
430 430
431 return 0; 431 return 0;
432 432
433 err2: 433 err2:
434 acpi_processor_unregister_performance(acpi_processor_perf, 0); 434 acpi_processor_unregister_performance(acpi_processor_perf, 0);
435 err1: 435 err1:
436 free_cpumask_var(acpi_processor_perf->shared_cpu_map); 436 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
437 err05: 437 err05:
438 kfree(acpi_processor_perf); 438 kfree(acpi_processor_perf);
439 err0: 439 err0:
440 printk(KERN_WARNING PFX "ACPI perflib can not be used on " 440 printk(KERN_WARNING PFX "ACPI perflib can not be used on "
441 "this platform\n"); 441 "this platform\n");
442 acpi_processor_perf = NULL; 442 acpi_processor_perf = NULL;
443 return retval; 443 return retval;
444 } 444 }
445 #else 445 #else
446 static int powernow_acpi_init(void) 446 static int powernow_acpi_init(void)
447 { 447 {
448 printk(KERN_INFO PFX "no support for ACPI processor found." 448 printk(KERN_INFO PFX "no support for ACPI processor found."
449 " Please recompile your kernel with ACPI processor\n"); 449 " Please recompile your kernel with ACPI processor\n");
450 return -EINVAL; 450 return -EINVAL;
451 } 451 }
452 #endif 452 #endif
453 453
454 static void print_pst_entry(struct pst_s *pst, unsigned int j) 454 static void print_pst_entry(struct pst_s *pst, unsigned int j)
455 { 455 {
456 dprintk("PST:%d (@%p)\n", j, pst); 456 dprintk("PST:%d (@%p)\n", j, pst);
457 dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", 457 dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
458 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); 458 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
459 } 459 }
460 460
461 static int powernow_decode_bios(int maxfid, int startvid) 461 static int powernow_decode_bios(int maxfid, int startvid)
462 { 462 {
463 struct psb_s *psb; 463 struct psb_s *psb;
464 struct pst_s *pst; 464 struct pst_s *pst;
465 unsigned int i, j; 465 unsigned int i, j;
466 unsigned char *p; 466 unsigned char *p;
467 unsigned int etuple; 467 unsigned int etuple;
468 unsigned int ret; 468 unsigned int ret;
469 469
470 etuple = cpuid_eax(0x80000001); 470 etuple = cpuid_eax(0x80000001);
471 471
472 for (i = 0xC0000; i < 0xffff0 ; i += 16) { 472 for (i = 0xC0000; i < 0xffff0 ; i += 16) {
473 473
474 p = phys_to_virt(i); 474 p = phys_to_virt(i);
475 475
476 if (memcmp(p, "AMDK7PNOW!", 10) == 0) { 476 if (memcmp(p, "AMDK7PNOW!", 10) == 0) {
477 dprintk("Found PSB header at %p\n", p); 477 dprintk("Found PSB header at %p\n", p);
478 psb = (struct psb_s *) p; 478 psb = (struct psb_s *) p;
479 dprintk("Table version: 0x%x\n", psb->tableversion); 479 dprintk("Table version: 0x%x\n", psb->tableversion);
480 if (psb->tableversion != 0x12) { 480 if (psb->tableversion != 0x12) {
481 printk(KERN_INFO PFX "Sorry, only v1.2 tables" 481 printk(KERN_INFO PFX "Sorry, only v1.2 tables"
482 " supported right now\n"); 482 " supported right now\n");
483 return -ENODEV; 483 return -ENODEV;
484 } 484 }
485 485
486 dprintk("Flags: 0x%x\n", psb->flags); 486 dprintk("Flags: 0x%x\n", psb->flags);
487 if ((psb->flags & 1) == 0) 487 if ((psb->flags & 1) == 0)
488 dprintk("Mobile voltage regulator\n"); 488 dprintk("Mobile voltage regulator\n");
489 else 489 else
490 dprintk("Desktop voltage regulator\n"); 490 dprintk("Desktop voltage regulator\n");
491 491
492 latency = psb->settlingtime; 492 latency = psb->settlingtime;
493 if (latency < 100) { 493 if (latency < 100) {
494 printk(KERN_INFO PFX "BIOS set settling time " 494 printk(KERN_INFO PFX "BIOS set settling time "
495 "to %d microseconds. " 495 "to %d microseconds. "
496 "Should be at least 100. " 496 "Should be at least 100. "
497 "Correcting.\n", latency); 497 "Correcting.\n", latency);
498 latency = 100; 498 latency = 100;
499 } 499 }
500 dprintk("Settling Time: %d microseconds.\n", 500 dprintk("Settling Time: %d microseconds.\n",
501 psb->settlingtime); 501 psb->settlingtime);
502 dprintk("Has %d PST tables. (Only dumping ones " 502 dprintk("Has %d PST tables. (Only dumping ones "
503 "relevant to this CPU).\n", 503 "relevant to this CPU).\n",
504 psb->numpst); 504 psb->numpst);
505 505
506 p += sizeof(struct psb_s); 506 p += sizeof(struct psb_s);
507 507
508 pst = (struct pst_s *) p; 508 pst = (struct pst_s *) p;
509 509
510 for (j = 0; j < psb->numpst; j++) { 510 for (j = 0; j < psb->numpst; j++) {
511 pst = (struct pst_s *) p; 511 pst = (struct pst_s *) p;
512 number_scales = pst->numpstates; 512 number_scales = pst->numpstates;
513 513
514 if ((etuple == pst->cpuid) && 514 if ((etuple == pst->cpuid) &&
515 check_fsb(pst->fsbspeed) && 515 check_fsb(pst->fsbspeed) &&
516 (maxfid == pst->maxfid) && 516 (maxfid == pst->maxfid) &&
517 (startvid == pst->startvid)) { 517 (startvid == pst->startvid)) {
518 print_pst_entry(pst, j); 518 print_pst_entry(pst, j);
519 p = (char *)pst + sizeof(struct pst_s); 519 p = (char *)pst + sizeof(struct pst_s);
520 ret = get_ranges(p); 520 ret = get_ranges(p);
521 return ret; 521 return ret;
522 } else { 522 } else {
523 unsigned int k; 523 unsigned int k;
524 p = (char *)pst + sizeof(struct pst_s); 524 p = (char *)pst + sizeof(struct pst_s);
525 for (k = 0; k < number_scales; k++) 525 for (k = 0; k < number_scales; k++)
526 p += 2; 526 p += 2;
527 } 527 }
528 } 528 }
529 printk(KERN_INFO PFX "No PST tables match this cpuid " 529 printk(KERN_INFO PFX "No PST tables match this cpuid "
530 "(0x%x)\n", etuple); 530 "(0x%x)\n", etuple);
531 printk(KERN_INFO PFX "This is indicative of a broken " 531 printk(KERN_INFO PFX "This is indicative of a broken "
532 "BIOS.\n"); 532 "BIOS.\n");
533 533
534 return -EINVAL; 534 return -EINVAL;
535 } 535 }
536 p++; 536 p++;
537 } 537 }
538 538
539 return -ENODEV; 539 return -ENODEV;
540 } 540 }
541 541
542 542
543 static int powernow_target(struct cpufreq_policy *policy, 543 static int powernow_target(struct cpufreq_policy *policy,
544 unsigned int target_freq, 544 unsigned int target_freq,
545 unsigned int relation) 545 unsigned int relation)
546 { 546 {
547 unsigned int newstate; 547 unsigned int newstate;
548 548
549 if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, 549 if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
550 relation, &newstate)) 550 relation, &newstate))
551 return -EINVAL; 551 return -EINVAL;
552 552
553 change_speed(newstate); 553 change_speed(newstate);
554 554
555 return 0; 555 return 0;
556 } 556 }
557 557
558 558
559 static int powernow_verify(struct cpufreq_policy *policy) 559 static int powernow_verify(struct cpufreq_policy *policy)
560 { 560 {
561 return cpufreq_frequency_table_verify(policy, powernow_table); 561 return cpufreq_frequency_table_verify(policy, powernow_table);
562 } 562 }
563 563
564 /* 564 /*
565 * We use the fact that the bus frequency is somehow 565 * We use the fact that the bus frequency is somehow
566 * a multiple of 100000/3 khz, then we compute sgtc according 566 * a multiple of 100000/3 khz, then we compute sgtc according
567 * to this multiple. 567 * to this multiple.
568 * That way, we match more how AMD thinks all of that work. 568 * That way, we match more how AMD thinks all of that work.
569 * We will then get the same kind of behaviour already tested under 569 * We will then get the same kind of behaviour already tested under
570 * the "well-known" other OS. 570 * the "well-known" other OS.
571 */ 571 */
572 static int __init fixup_sgtc(void) 572 static int __init fixup_sgtc(void)
573 { 573 {
574 unsigned int sgtc; 574 unsigned int sgtc;
575 unsigned int m; 575 unsigned int m;
576 576
577 m = fsb / 3333; 577 m = fsb / 3333;
578 if ((m % 10) >= 5) 578 if ((m % 10) >= 5)
579 m += 5; 579 m += 5;
580 580
581 m /= 10; 581 m /= 10;
582 582
583 sgtc = 100 * m * latency; 583 sgtc = 100 * m * latency;
584 sgtc = sgtc / 3; 584 sgtc = sgtc / 3;
585 if (sgtc > 0xfffff) { 585 if (sgtc > 0xfffff) {
586 printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); 586 printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
587 sgtc = 0xfffff; 587 sgtc = 0xfffff;
588 } 588 }
589 return sgtc; 589 return sgtc;
590 } 590 }
591 591
592 static unsigned int powernow_get(unsigned int cpu) 592 static unsigned int powernow_get(unsigned int cpu)
593 { 593 {
594 union msr_fidvidstatus fidvidstatus; 594 union msr_fidvidstatus fidvidstatus;
595 unsigned int cfid; 595 unsigned int cfid;
596 596
597 if (cpu) 597 if (cpu)
598 return 0; 598 return 0;
599 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); 599 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
600 cfid = fidvidstatus.bits.CFID; 600 cfid = fidvidstatus.bits.CFID;
601 601
602 return fsb * fid_codes[cfid] / 10; 602 return fsb * fid_codes[cfid] / 10;
603 } 603 }
604 604
605 605
606 static int __init acer_cpufreq_pst(const struct dmi_system_id *d) 606 static int __init acer_cpufreq_pst(const struct dmi_system_id *d)
607 { 607 {
608 printk(KERN_WARNING PFX 608 printk(KERN_WARNING PFX
609 "%s laptop with broken PST tables in BIOS detected.\n", 609 "%s laptop with broken PST tables in BIOS detected.\n",
610 d->ident); 610 d->ident);
611 printk(KERN_WARNING PFX 611 printk(KERN_WARNING PFX
612 "You need to downgrade to 3A21 (09/09/2002), or try a newer " 612 "You need to downgrade to 3A21 (09/09/2002), or try a newer "
613 "BIOS than 3A71 (01/20/2003)\n"); 613 "BIOS than 3A71 (01/20/2003)\n");
614 printk(KERN_WARNING PFX 614 printk(KERN_WARNING PFX
615 "cpufreq scaling has been disabled as a result of this.\n"); 615 "cpufreq scaling has been disabled as a result of this.\n");
616 return 0; 616 return 0;
617 } 617 }
618 618
619 /* 619 /*
620 * Some Athlon laptops have really fucked PST tables. 620 * Some Athlon laptops have really fucked PST tables.
621 * A BIOS update is all that can save them. 621 * A BIOS update is all that can save them.
622 * Mention this, and disable cpufreq. 622 * Mention this, and disable cpufreq.
623 */ 623 */
624 static struct dmi_system_id __initdata powernow_dmi_table[] = { 624 static struct dmi_system_id __initdata powernow_dmi_table[] = {
625 { 625 {
626 .callback = acer_cpufreq_pst, 626 .callback = acer_cpufreq_pst,
627 .ident = "Acer Aspire", 627 .ident = "Acer Aspire",
628 .matches = { 628 .matches = {
629 DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), 629 DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
630 DMI_MATCH(DMI_BIOS_VERSION, "3A71"), 630 DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
631 }, 631 },
632 }, 632 },
633 { } 633 { }
634 }; 634 };
635 635
636 static int __init powernow_cpu_init(struct cpufreq_policy *policy) 636 static int __init powernow_cpu_init(struct cpufreq_policy *policy)
637 { 637 {
638 union msr_fidvidstatus fidvidstatus; 638 union msr_fidvidstatus fidvidstatus;
639 int result; 639 int result;
640 640
641 if (policy->cpu != 0) 641 if (policy->cpu != 0)
642 return -ENODEV; 642 return -ENODEV;
643 643
644 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); 644 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
645 645
646 recalibrate_cpu_khz(); 646 recalibrate_cpu_khz();
647 647
648 fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; 648 fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
649 if (!fsb) { 649 if (!fsb) {
650 printk(KERN_WARNING PFX "can not determine bus frequency\n"); 650 printk(KERN_WARNING PFX "can not determine bus frequency\n");
651 return -EINVAL; 651 return -EINVAL;
652 } 652 }
653 dprintk("FSB: %3dMHz\n", fsb/1000); 653 dprintk("FSB: %3dMHz\n", fsb/1000);
654 654
655 if (dmi_check_system(powernow_dmi_table) || acpi_force) { 655 if (dmi_check_system(powernow_dmi_table) || acpi_force) {
656 printk(KERN_INFO PFX "PSB/PST known to be broken. " 656 printk(KERN_INFO PFX "PSB/PST known to be broken. "
657 "Trying ACPI instead\n"); 657 "Trying ACPI instead\n");
658 result = powernow_acpi_init(); 658 result = powernow_acpi_init();
659 } else { 659 } else {
660 result = powernow_decode_bios(fidvidstatus.bits.MFID, 660 result = powernow_decode_bios(fidvidstatus.bits.MFID,
661 fidvidstatus.bits.SVID); 661 fidvidstatus.bits.SVID);
662 if (result) { 662 if (result) {
663 printk(KERN_INFO PFX "Trying ACPI perflib\n"); 663 printk(KERN_INFO PFX "Trying ACPI perflib\n");
664 maximum_speed = 0; 664 maximum_speed = 0;
665 minimum_speed = -1; 665 minimum_speed = -1;
666 latency = 0; 666 latency = 0;
667 result = powernow_acpi_init(); 667 result = powernow_acpi_init();
668 if (result) { 668 if (result) {
669 printk(KERN_INFO PFX 669 printk(KERN_INFO PFX
670 "ACPI and legacy methods failed\n"); 670 "ACPI and legacy methods failed\n");
671 } 671 }
672 } else { 672 } else {
673 /* SGTC use the bus clock as timer */ 673 /* SGTC use the bus clock as timer */
674 latency = fixup_sgtc(); 674 latency = fixup_sgtc();
675 printk(KERN_INFO PFX "SGTC: %d\n", latency); 675 printk(KERN_INFO PFX "SGTC: %d\n", latency);
676 } 676 }
677 } 677 }
678 678
679 if (result) 679 if (result)
680 return result; 680 return result;
681 681
682 printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", 682 printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
683 minimum_speed/1000, maximum_speed/1000); 683 minimum_speed/1000, maximum_speed/1000);
684 684
685 policy->cpuinfo.transition_latency = 685 policy->cpuinfo.transition_latency =
686 cpufreq_scale(2000000UL, fsb, latency); 686 cpufreq_scale(2000000UL, fsb, latency);
687 687
688 policy->cur = powernow_get(0); 688 policy->cur = powernow_get(0);
689 689
690 cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); 690 cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
691 691
692 return cpufreq_frequency_table_cpuinfo(policy, powernow_table); 692 return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
693 } 693 }
694 694
695 static int powernow_cpu_exit(struct cpufreq_policy *policy) 695 static int powernow_cpu_exit(struct cpufreq_policy *policy)
696 { 696 {
697 cpufreq_frequency_table_put_attr(policy->cpu); 697 cpufreq_frequency_table_put_attr(policy->cpu);
698 698
699 #ifdef CONFIG_X86_POWERNOW_K7_ACPI 699 #ifdef CONFIG_X86_POWERNOW_K7_ACPI
700 if (acpi_processor_perf) { 700 if (acpi_processor_perf) {
701 acpi_processor_unregister_performance(acpi_processor_perf, 0); 701 acpi_processor_unregister_performance(acpi_processor_perf, 0);
702 free_cpumask_var(acpi_processor_perf->shared_cpu_map); 702 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
703 kfree(acpi_processor_perf); 703 kfree(acpi_processor_perf);
704 } 704 }
705 #endif 705 #endif
706 706
707 kfree(powernow_table); 707 kfree(powernow_table);
708 return 0; 708 return 0;
709 } 709 }
710 710
711 static struct freq_attr *powernow_table_attr[] = { 711 static struct freq_attr *powernow_table_attr[] = {
712 &cpufreq_freq_attr_scaling_available_freqs, 712 &cpufreq_freq_attr_scaling_available_freqs,
713 NULL, 713 NULL,
714 }; 714 };
715 715
716 static struct cpufreq_driver powernow_driver = { 716 static struct cpufreq_driver powernow_driver = {
717 .verify = powernow_verify, 717 .verify = powernow_verify,
718 .target = powernow_target, 718 .target = powernow_target,
719 .get = powernow_get, 719 .get = powernow_get,
720 .init = powernow_cpu_init, 720 .init = powernow_cpu_init,
721 .exit = powernow_cpu_exit, 721 .exit = powernow_cpu_exit,
722 .name = "powernow-k7", 722 .name = "powernow-k7",
723 .owner = THIS_MODULE, 723 .owner = THIS_MODULE,
724 .attr = powernow_table_attr, 724 .attr = powernow_table_attr,
725 }; 725 };
726 726
727 static int __init powernow_init(void) 727 static int __init powernow_init(void)
728 { 728 {
729 if (check_powernow() == 0) 729 if (check_powernow() == 0)
730 return -ENODEV; 730 return -ENODEV;
731 return cpufreq_register_driver(&powernow_driver); 731 return cpufreq_register_driver(&powernow_driver);
732 } 732 }
733 733
734 734
735 static void __exit powernow_exit(void) 735 static void __exit powernow_exit(void)
736 { 736 {
737 cpufreq_unregister_driver(&powernow_driver); 737 cpufreq_unregister_driver(&powernow_driver);
738 } 738 }
739 739
740 module_param(acpi_force, int, 0444); 740 module_param(acpi_force, int, 0444);
741 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); 741 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
742 742
743 MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); 743 MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
744 MODULE_DESCRIPTION("Powernow driver for AMD K7 processors."); 744 MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
745 MODULE_LICENSE("GPL"); 745 MODULE_LICENSE("GPL");
746 746
747 late_initcall(powernow_init); 747 late_initcall(powernow_init);
748 module_exit(powernow_exit); 748 module_exit(powernow_exit);
749 749
750 750
arch/x86/kernel/cpu/cpufreq/powernow-k8.c
1 /* 1 /*
2 * (c) 2003-2006 Advanced Micro Devices, Inc. 2 * (c) 2003-2006 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or 4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
6 * 6 *
7 * Support : mark.langsdorf@amd.com 7 * Support : mark.langsdorf@amd.com
8 * 8 *
9 * Based on the powernow-k7.c module written by Dave Jones. 9 * Based on the powernow-k7.c module written by Dave Jones.
10 * (C) 2003 Dave Jones on behalf of SuSE Labs 10 * (C) 2003 Dave Jones on behalf of SuSE Labs
11 * (C) 2004 Dominik Brodowski <linux@brodo.de> 11 * (C) 2004 Dominik Brodowski <linux@brodo.de>
12 * (C) 2004 Pavel Machek <pavel@suse.cz> 12 * (C) 2004 Pavel Machek <pavel@suse.cz>
13 * Licensed under the terms of the GNU GPL License version 2. 13 * Licensed under the terms of the GNU GPL License version 2.
14 * Based upon datasheets & sample CPUs kindly provided by AMD. 14 * Based upon datasheets & sample CPUs kindly provided by AMD.
15 * 15 *
16 * Valuable input gratefully received from Dave Jones, Pavel Machek, 16 * Valuable input gratefully received from Dave Jones, Pavel Machek,
17 * Dominik Brodowski, Jacob Shin, and others. 17 * Dominik Brodowski, Jacob Shin, and others.
18 * Originally developed by Paul Devriendt. 18 * Originally developed by Paul Devriendt.
19 * Processor information obtained from Chapter 9 (Power and Thermal Management) 19 * Processor information obtained from Chapter 9 (Power and Thermal Management)
20 * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD 20 * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
21 * Opteron Processors" available for download from www.amd.com 21 * Opteron Processors" available for download from www.amd.com
22 * 22 *
23 * Tables for specific CPUs can be inferred from 23 * Tables for specific CPUs can be inferred from
24 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf 24 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
25 */ 25 */
26 26
27 #include <linux/kernel.h> 27 #include <linux/kernel.h>
28 #include <linux/smp.h> 28 #include <linux/smp.h>
29 #include <linux/module.h> 29 #include <linux/module.h>
30 #include <linux/init.h> 30 #include <linux/init.h>
31 #include <linux/cpufreq.h> 31 #include <linux/cpufreq.h>
32 #include <linux/slab.h> 32 #include <linux/slab.h>
33 #include <linux/string.h> 33 #include <linux/string.h>
34 #include <linux/cpumask.h> 34 #include <linux/cpumask.h>
35 #include <linux/sched.h> /* for current / set_cpus_allowed() */ 35 #include <linux/sched.h> /* for current / set_cpus_allowed() */
36 #include <linux/io.h> 36 #include <linux/io.h>
37 #include <linux/delay.h> 37 #include <linux/delay.h>
38 38
39 #include <asm/msr.h> 39 #include <asm/msr.h>
40 40
41 #include <linux/acpi.h> 41 #include <linux/acpi.h>
42 #include <linux/mutex.h> 42 #include <linux/mutex.h>
43 #include <acpi/processor.h> 43 #include <acpi/processor.h>
44 44
45 #define PFX "powernow-k8: " 45 #define PFX "powernow-k8: "
46 #define VERSION "version 2.20.00" 46 #define VERSION "version 2.20.00"
47 #include "powernow-k8.h" 47 #include "powernow-k8.h"
48 48
49 /* serialize freq changes */ 49 /* serialize freq changes */
50 static DEFINE_MUTEX(fidvid_mutex); 50 static DEFINE_MUTEX(fidvid_mutex);
51 51
52 static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); 52 static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
53 53
54 static int cpu_family = CPU_OPTERON; 54 static int cpu_family = CPU_OPTERON;
55 55
56 #ifndef CONFIG_SMP 56 #ifndef CONFIG_SMP
57 static inline const struct cpumask *cpu_core_mask(int cpu) 57 static inline const struct cpumask *cpu_core_mask(int cpu)
58 { 58 {
59 return cpumask_of(0); 59 return cpumask_of(0);
60 } 60 }
61 #endif 61 #endif
62 62
63 /* Return a frequency in MHz, given an input fid */ 63 /* Return a frequency in MHz, given an input fid */
64 static u32 find_freq_from_fid(u32 fid) 64 static u32 find_freq_from_fid(u32 fid)
65 { 65 {
66 return 800 + (fid * 100); 66 return 800 + (fid * 100);
67 } 67 }
68 68
69 /* Return a frequency in KHz, given an input fid */ 69 /* Return a frequency in KHz, given an input fid */
70 static u32 find_khz_freq_from_fid(u32 fid) 70 static u32 find_khz_freq_from_fid(u32 fid)
71 { 71 {
72 return 1000 * find_freq_from_fid(fid); 72 return 1000 * find_freq_from_fid(fid);
73 } 73 }
74 74
75 static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, 75 static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
76 u32 pstate) 76 u32 pstate)
77 { 77 {
78 return data[pstate].frequency; 78 return data[pstate].frequency;
79 } 79 }
80 80
81 /* Return the vco fid for an input fid 81 /* Return the vco fid for an input fid
82 * 82 *
83 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids 83 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
84 * only from corresponding high fids. This returns "high" fid corresponding to 84 * only from corresponding high fids. This returns "high" fid corresponding to
85 * "low" one. 85 * "low" one.
86 */ 86 */
87 static u32 convert_fid_to_vco_fid(u32 fid) 87 static u32 convert_fid_to_vco_fid(u32 fid)
88 { 88 {
89 if (fid < HI_FID_TABLE_BOTTOM) 89 if (fid < HI_FID_TABLE_BOTTOM)
90 return 8 + (2 * fid); 90 return 8 + (2 * fid);
91 else 91 else
92 return fid; 92 return fid;
93 } 93 }
94 94
95 /* 95 /*
96 * Return 1 if the pending bit is set. Unless we just instructed the processor 96 * Return 1 if the pending bit is set. Unless we just instructed the processor
97 * to transition to a new state, seeing this bit set is really bad news. 97 * to transition to a new state, seeing this bit set is really bad news.
98 */ 98 */
99 static int pending_bit_stuck(void) 99 static int pending_bit_stuck(void)
100 { 100 {
101 u32 lo, hi; 101 u32 lo, hi;
102 102
103 if (cpu_family == CPU_HW_PSTATE) 103 if (cpu_family == CPU_HW_PSTATE)
104 return 0; 104 return 0;
105 105
106 rdmsr(MSR_FIDVID_STATUS, lo, hi); 106 rdmsr(MSR_FIDVID_STATUS, lo, hi);
107 return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; 107 return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
108 } 108 }
109 109
110 /* 110 /*
111 * Update the global current fid / vid values from the status msr. 111 * Update the global current fid / vid values from the status msr.
112 * Returns 1 on error. 112 * Returns 1 on error.
113 */ 113 */
114 static int query_current_values_with_pending_wait(struct powernow_k8_data *data) 114 static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
115 { 115 {
116 u32 lo, hi; 116 u32 lo, hi;
117 u32 i = 0; 117 u32 i = 0;
118 118
119 if (cpu_family == CPU_HW_PSTATE) { 119 if (cpu_family == CPU_HW_PSTATE) {
120 if (data->currpstate == HW_PSTATE_INVALID) { 120 if (data->currpstate == HW_PSTATE_INVALID) {
121 /* read (initial) hw pstate if not yet set */ 121 /* read (initial) hw pstate if not yet set */
122 rdmsr(MSR_PSTATE_STATUS, lo, hi); 122 rdmsr(MSR_PSTATE_STATUS, lo, hi);
123 i = lo & HW_PSTATE_MASK; 123 i = lo & HW_PSTATE_MASK;
124 124
125 /* 125 /*
126 * a workaround for family 11h erratum 311 might cause 126 * a workaround for family 11h erratum 311 might cause
127 * an "out-of-range Pstate if the core is in Pstate-0 127 * an "out-of-range Pstate if the core is in Pstate-0
128 */ 128 */
129 if (i >= data->numps) 129 if (i >= data->numps)
130 data->currpstate = HW_PSTATE_0; 130 data->currpstate = HW_PSTATE_0;
131 else 131 else
132 data->currpstate = i; 132 data->currpstate = i;
133 } 133 }
134 return 0; 134 return 0;
135 } 135 }
136 do { 136 do {
137 if (i++ > 10000) { 137 if (i++ > 10000) {
138 dprintk("detected change pending stuck\n"); 138 dprintk("detected change pending stuck\n");
139 return 1; 139 return 1;
140 } 140 }
141 rdmsr(MSR_FIDVID_STATUS, lo, hi); 141 rdmsr(MSR_FIDVID_STATUS, lo, hi);
142 } while (lo & MSR_S_LO_CHANGE_PENDING); 142 } while (lo & MSR_S_LO_CHANGE_PENDING);
143 143
144 data->currvid = hi & MSR_S_HI_CURRENT_VID; 144 data->currvid = hi & MSR_S_HI_CURRENT_VID;
145 data->currfid = lo & MSR_S_LO_CURRENT_FID; 145 data->currfid = lo & MSR_S_LO_CURRENT_FID;
146 146
147 return 0; 147 return 0;
148 } 148 }
149 149
150 /* the isochronous relief time */ 150 /* the isochronous relief time */
151 static void count_off_irt(struct powernow_k8_data *data) 151 static void count_off_irt(struct powernow_k8_data *data)
152 { 152 {
153 udelay((1 << data->irt) * 10); 153 udelay((1 << data->irt) * 10);
154 return; 154 return;
155 } 155 }
156 156
157 /* the voltage stabilization time */ 157 /* the voltage stabilization time */
158 static void count_off_vst(struct powernow_k8_data *data) 158 static void count_off_vst(struct powernow_k8_data *data)
159 { 159 {
160 udelay(data->vstable * VST_UNITS_20US); 160 udelay(data->vstable * VST_UNITS_20US);
161 return; 161 return;
162 } 162 }
163 163
164 /* need to init the control msr to a safe value (for each cpu) */ 164 /* need to init the control msr to a safe value (for each cpu) */
165 static void fidvid_msr_init(void) 165 static void fidvid_msr_init(void)
166 { 166 {
167 u32 lo, hi; 167 u32 lo, hi;
168 u8 fid, vid; 168 u8 fid, vid;
169 169
170 rdmsr(MSR_FIDVID_STATUS, lo, hi); 170 rdmsr(MSR_FIDVID_STATUS, lo, hi);
171 vid = hi & MSR_S_HI_CURRENT_VID; 171 vid = hi & MSR_S_HI_CURRENT_VID;
172 fid = lo & MSR_S_LO_CURRENT_FID; 172 fid = lo & MSR_S_LO_CURRENT_FID;
173 lo = fid | (vid << MSR_C_LO_VID_SHIFT); 173 lo = fid | (vid << MSR_C_LO_VID_SHIFT);
174 hi = MSR_C_HI_STP_GNT_BENIGN; 174 hi = MSR_C_HI_STP_GNT_BENIGN;
175 dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); 175 dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
176 wrmsr(MSR_FIDVID_CTL, lo, hi); 176 wrmsr(MSR_FIDVID_CTL, lo, hi);
177 } 177 }
178 178
179 /* write the new fid value along with the other control fields to the msr */ 179 /* write the new fid value along with the other control fields to the msr */
180 static int write_new_fid(struct powernow_k8_data *data, u32 fid) 180 static int write_new_fid(struct powernow_k8_data *data, u32 fid)
181 { 181 {
182 u32 lo; 182 u32 lo;
183 u32 savevid = data->currvid; 183 u32 savevid = data->currvid;
184 u32 i = 0; 184 u32 i = 0;
185 185
186 if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { 186 if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
187 printk(KERN_ERR PFX "internal error - overflow on fid write\n"); 187 printk(KERN_ERR PFX "internal error - overflow on fid write\n");
188 return 1; 188 return 1;
189 } 189 }
190 190
191 lo = fid; 191 lo = fid;
192 lo |= (data->currvid << MSR_C_LO_VID_SHIFT); 192 lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
193 lo |= MSR_C_LO_INIT_FID_VID; 193 lo |= MSR_C_LO_INIT_FID_VID;
194 194
195 dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", 195 dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
196 fid, lo, data->plllock * PLL_LOCK_CONVERSION); 196 fid, lo, data->plllock * PLL_LOCK_CONVERSION);
197 197
198 do { 198 do {
199 wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); 199 wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
200 if (i++ > 100) { 200 if (i++ > 100) {
201 printk(KERN_ERR PFX 201 printk(KERN_ERR PFX
202 "Hardware error - pending bit very stuck - " 202 "Hardware error - pending bit very stuck - "
203 "no further pstate changes possible\n"); 203 "no further pstate changes possible\n");
204 return 1; 204 return 1;
205 } 205 }
206 } while (query_current_values_with_pending_wait(data)); 206 } while (query_current_values_with_pending_wait(data));
207 207
208 count_off_irt(data); 208 count_off_irt(data);
209 209
210 if (savevid != data->currvid) { 210 if (savevid != data->currvid) {
211 printk(KERN_ERR PFX 211 printk(KERN_ERR PFX
212 "vid change on fid trans, old 0x%x, new 0x%x\n", 212 "vid change on fid trans, old 0x%x, new 0x%x\n",
213 savevid, data->currvid); 213 savevid, data->currvid);
214 return 1; 214 return 1;
215 } 215 }
216 216
217 if (fid != data->currfid) { 217 if (fid != data->currfid) {
218 printk(KERN_ERR PFX 218 printk(KERN_ERR PFX
219 "fid trans failed, fid 0x%x, curr 0x%x\n", fid, 219 "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
220 data->currfid); 220 data->currfid);
221 return 1; 221 return 1;
222 } 222 }
223 223
224 return 0; 224 return 0;
225 } 225 }
226 226
227 /* Write a new vid to the hardware */ 227 /* Write a new vid to the hardware */
228 static int write_new_vid(struct powernow_k8_data *data, u32 vid) 228 static int write_new_vid(struct powernow_k8_data *data, u32 vid)
229 { 229 {
230 u32 lo; 230 u32 lo;
231 u32 savefid = data->currfid; 231 u32 savefid = data->currfid;
232 int i = 0; 232 int i = 0;
233 233
234 if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { 234 if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
235 printk(KERN_ERR PFX "internal error - overflow on vid write\n"); 235 printk(KERN_ERR PFX "internal error - overflow on vid write\n");
236 return 1; 236 return 1;
237 } 237 }
238 238
239 lo = data->currfid; 239 lo = data->currfid;
240 lo |= (vid << MSR_C_LO_VID_SHIFT); 240 lo |= (vid << MSR_C_LO_VID_SHIFT);
241 lo |= MSR_C_LO_INIT_FID_VID; 241 lo |= MSR_C_LO_INIT_FID_VID;
242 242
243 dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", 243 dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
244 vid, lo, STOP_GRANT_5NS); 244 vid, lo, STOP_GRANT_5NS);
245 245
246 do { 246 do {
247 wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); 247 wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
248 if (i++ > 100) { 248 if (i++ > 100) {
249 printk(KERN_ERR PFX "internal error - pending bit " 249 printk(KERN_ERR PFX "internal error - pending bit "
250 "very stuck - no further pstate " 250 "very stuck - no further pstate "
251 "changes possible\n"); 251 "changes possible\n");
252 return 1; 252 return 1;
253 } 253 }
254 } while (query_current_values_with_pending_wait(data)); 254 } while (query_current_values_with_pending_wait(data));
255 255
256 if (savefid != data->currfid) { 256 if (savefid != data->currfid) {
257 printk(KERN_ERR PFX "fid changed on vid trans, old " 257 printk(KERN_ERR PFX "fid changed on vid trans, old "
258 "0x%x new 0x%x\n", 258 "0x%x new 0x%x\n",
259 savefid, data->currfid); 259 savefid, data->currfid);
260 return 1; 260 return 1;
261 } 261 }
262 262
263 if (vid != data->currvid) { 263 if (vid != data->currvid) {
264 printk(KERN_ERR PFX "vid trans failed, vid 0x%x, " 264 printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
265 "curr 0x%x\n", 265 "curr 0x%x\n",
266 vid, data->currvid); 266 vid, data->currvid);
267 return 1; 267 return 1;
268 } 268 }
269 269
270 return 0; 270 return 0;
271 } 271 }
272 272
273 /* 273 /*
274 * Reduce the vid by the max of step or reqvid. 274 * Reduce the vid by the max of step or reqvid.
275 * Decreasing vid codes represent increasing voltages: 275 * Decreasing vid codes represent increasing voltages:
276 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. 276 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
277 */ 277 */
278 static int decrease_vid_code_by_step(struct powernow_k8_data *data, 278 static int decrease_vid_code_by_step(struct powernow_k8_data *data,
279 u32 reqvid, u32 step) 279 u32 reqvid, u32 step)
280 { 280 {
281 if ((data->currvid - reqvid) > step) 281 if ((data->currvid - reqvid) > step)
282 reqvid = data->currvid - step; 282 reqvid = data->currvid - step;
283 283
284 if (write_new_vid(data, reqvid)) 284 if (write_new_vid(data, reqvid))
285 return 1; 285 return 1;
286 286
287 count_off_vst(data); 287 count_off_vst(data);
288 288
289 return 0; 289 return 0;
290 } 290 }
291 291
292 /* Change hardware pstate by single MSR write */ 292 /* Change hardware pstate by single MSR write */
293 static int transition_pstate(struct powernow_k8_data *data, u32 pstate) 293 static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
294 { 294 {
295 wrmsr(MSR_PSTATE_CTRL, pstate, 0); 295 wrmsr(MSR_PSTATE_CTRL, pstate, 0);
296 data->currpstate = pstate; 296 data->currpstate = pstate;
297 return 0; 297 return 0;
298 } 298 }
299 299
300 /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ 300 /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
301 static int transition_fid_vid(struct powernow_k8_data *data, 301 static int transition_fid_vid(struct powernow_k8_data *data,
302 u32 reqfid, u32 reqvid) 302 u32 reqfid, u32 reqvid)
303 { 303 {
304 if (core_voltage_pre_transition(data, reqvid)) 304 if (core_voltage_pre_transition(data, reqvid))
305 return 1; 305 return 1;
306 306
307 if (core_frequency_transition(data, reqfid)) 307 if (core_frequency_transition(data, reqfid))
308 return 1; 308 return 1;
309 309
310 if (core_voltage_post_transition(data, reqvid)) 310 if (core_voltage_post_transition(data, reqvid))
311 return 1; 311 return 1;
312 312
313 if (query_current_values_with_pending_wait(data)) 313 if (query_current_values_with_pending_wait(data))
314 return 1; 314 return 1;
315 315
316 if ((reqfid != data->currfid) || (reqvid != data->currvid)) { 316 if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
317 printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, " 317 printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
318 "curr 0x%x 0x%x\n", 318 "curr 0x%x 0x%x\n",
319 smp_processor_id(), 319 smp_processor_id(),
320 reqfid, reqvid, data->currfid, data->currvid); 320 reqfid, reqvid, data->currfid, data->currvid);
321 return 1; 321 return 1;
322 } 322 }
323 323
324 dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", 324 dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
325 smp_processor_id(), data->currfid, data->currvid); 325 smp_processor_id(), data->currfid, data->currvid);
326 326
327 return 0; 327 return 0;
328 } 328 }
329 329
330 /* Phase 1 - core voltage transition ... setup voltage */ 330 /* Phase 1 - core voltage transition ... setup voltage */
331 static int core_voltage_pre_transition(struct powernow_k8_data *data, 331 static int core_voltage_pre_transition(struct powernow_k8_data *data,
332 u32 reqvid) 332 u32 reqvid)
333 { 333 {
334 u32 rvosteps = data->rvo; 334 u32 rvosteps = data->rvo;
335 u32 savefid = data->currfid; 335 u32 savefid = data->currfid;
336 u32 maxvid, lo; 336 u32 maxvid, lo;
337 337
338 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " 338 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
339 "reqvid 0x%x, rvo 0x%x\n", 339 "reqvid 0x%x, rvo 0x%x\n",
340 smp_processor_id(), 340 smp_processor_id(),
341 data->currfid, data->currvid, reqvid, data->rvo); 341 data->currfid, data->currvid, reqvid, data->rvo);
342 342
343 rdmsr(MSR_FIDVID_STATUS, lo, maxvid); 343 rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
344 maxvid = 0x1f & (maxvid >> 16); 344 maxvid = 0x1f & (maxvid >> 16);
345 dprintk("ph1 maxvid=0x%x\n", maxvid); 345 dprintk("ph1 maxvid=0x%x\n", maxvid);
346 if (reqvid < maxvid) /* lower numbers are higher voltages */ 346 if (reqvid < maxvid) /* lower numbers are higher voltages */
347 reqvid = maxvid; 347 reqvid = maxvid;
348 348
349 while (data->currvid > reqvid) { 349 while (data->currvid > reqvid) {
350 dprintk("ph1: curr 0x%x, req vid 0x%x\n", 350 dprintk("ph1: curr 0x%x, req vid 0x%x\n",
351 data->currvid, reqvid); 351 data->currvid, reqvid);
352 if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) 352 if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
353 return 1; 353 return 1;
354 } 354 }
355 355
356 while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { 356 while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) {
357 if (data->currvid == maxvid) { 357 if (data->currvid == maxvid) {
358 rvosteps = 0; 358 rvosteps = 0;
359 } else { 359 } else {
360 dprintk("ph1: changing vid for rvo, req 0x%x\n", 360 dprintk("ph1: changing vid for rvo, req 0x%x\n",
361 data->currvid - 1); 361 data->currvid - 1);
362 if (decrease_vid_code_by_step(data, data->currvid-1, 1)) 362 if (decrease_vid_code_by_step(data, data->currvid-1, 1))
363 return 1; 363 return 1;
364 rvosteps--; 364 rvosteps--;
365 } 365 }
366 } 366 }
367 367
368 if (query_current_values_with_pending_wait(data)) 368 if (query_current_values_with_pending_wait(data))
369 return 1; 369 return 1;
370 370
371 if (savefid != data->currfid) { 371 if (savefid != data->currfid) {
372 printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", 372 printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
373 data->currfid); 373 data->currfid);
374 return 1; 374 return 1;
375 } 375 }
376 376
377 dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", 377 dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
378 data->currfid, data->currvid); 378 data->currfid, data->currvid);
379 379
380 return 0; 380 return 0;
381 } 381 }
382 382
383 /* Phase 2 - core frequency transition */ 383 /* Phase 2 - core frequency transition */
384 static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) 384 static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
385 { 385 {
386 u32 vcoreqfid, vcocurrfid, vcofiddiff; 386 u32 vcoreqfid, vcocurrfid, vcofiddiff;
387 u32 fid_interval, savevid = data->currvid; 387 u32 fid_interval, savevid = data->currvid;
388 388
389 if ((reqfid < HI_FID_TABLE_BOTTOM) && 389 if ((reqfid < HI_FID_TABLE_BOTTOM) &&
390 (data->currfid < HI_FID_TABLE_BOTTOM)) { 390 (data->currfid < HI_FID_TABLE_BOTTOM)) {
391 printk(KERN_ERR PFX "ph2: illegal lo-lo transition " 391 printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
392 "0x%x 0x%x\n", reqfid, data->currfid); 392 "0x%x 0x%x\n", reqfid, data->currfid);
393 return 1; 393 return 1;
394 } 394 }
395 395
396 if (data->currfid == reqfid) { 396 if (data->currfid == reqfid) {
397 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", 397 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
398 data->currfid); 398 data->currfid);
399 return 0; 399 return 0;
400 } 400 }
401 401
402 dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, " 402 dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
403 "reqfid 0x%x\n", 403 "reqfid 0x%x\n",
404 smp_processor_id(), 404 smp_processor_id(),
405 data->currfid, data->currvid, reqfid); 405 data->currfid, data->currvid, reqfid);
406 406
407 vcoreqfid = convert_fid_to_vco_fid(reqfid); 407 vcoreqfid = convert_fid_to_vco_fid(reqfid);
408 vcocurrfid = convert_fid_to_vco_fid(data->currfid); 408 vcocurrfid = convert_fid_to_vco_fid(data->currfid);
409 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid 409 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
410 : vcoreqfid - vcocurrfid; 410 : vcoreqfid - vcocurrfid;
411 411
412 while (vcofiddiff > 2) { 412 while (vcofiddiff > 2) {
413 (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); 413 (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
414 414
415 if (reqfid > data->currfid) { 415 if (reqfid > data->currfid) {
416 if (data->currfid > LO_FID_TABLE_TOP) { 416 if (data->currfid > LO_FID_TABLE_TOP) {
417 if (write_new_fid(data, 417 if (write_new_fid(data,
418 data->currfid + fid_interval)) 418 data->currfid + fid_interval))
419 return 1; 419 return 1;
420 } else { 420 } else {
421 if (write_new_fid 421 if (write_new_fid
422 (data, 422 (data,
423 2 + convert_fid_to_vco_fid(data->currfid))) 423 2 + convert_fid_to_vco_fid(data->currfid)))
424 return 1; 424 return 1;
425 } 425 }
426 } else { 426 } else {
427 if (write_new_fid(data, data->currfid - fid_interval)) 427 if (write_new_fid(data, data->currfid - fid_interval))
428 return 1; 428 return 1;
429 } 429 }
430 430
431 vcocurrfid = convert_fid_to_vco_fid(data->currfid); 431 vcocurrfid = convert_fid_to_vco_fid(data->currfid);
432 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid 432 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
433 : vcoreqfid - vcocurrfid; 433 : vcoreqfid - vcocurrfid;
434 } 434 }
435 435
436 if (write_new_fid(data, reqfid)) 436 if (write_new_fid(data, reqfid))
437 return 1; 437 return 1;
438 438
439 if (query_current_values_with_pending_wait(data)) 439 if (query_current_values_with_pending_wait(data))
440 return 1; 440 return 1;
441 441
442 if (data->currfid != reqfid) { 442 if (data->currfid != reqfid) {
443 printk(KERN_ERR PFX 443 printk(KERN_ERR PFX
444 "ph2: mismatch, failed fid transition, " 444 "ph2: mismatch, failed fid transition, "
445 "curr 0x%x, req 0x%x\n", 445 "curr 0x%x, req 0x%x\n",
446 data->currfid, reqfid); 446 data->currfid, reqfid);
447 return 1; 447 return 1;
448 } 448 }
449 449
450 if (savevid != data->currvid) { 450 if (savevid != data->currvid) {
451 printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", 451 printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
452 savevid, data->currvid); 452 savevid, data->currvid);
453 return 1; 453 return 1;
454 } 454 }
455 455
456 dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", 456 dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
457 data->currfid, data->currvid); 457 data->currfid, data->currvid);
458 458
459 return 0; 459 return 0;
460 } 460 }
461 461
462 /* Phase 3 - core voltage transition flow ... jump to the final vid. */ 462 /* Phase 3 - core voltage transition flow ... jump to the final vid. */
463 static int core_voltage_post_transition(struct powernow_k8_data *data, 463 static int core_voltage_post_transition(struct powernow_k8_data *data,
464 u32 reqvid) 464 u32 reqvid)
465 { 465 {
466 u32 savefid = data->currfid; 466 u32 savefid = data->currfid;
467 u32 savereqvid = reqvid; 467 u32 savereqvid = reqvid;
468 468
469 dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", 469 dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
470 smp_processor_id(), 470 smp_processor_id(),
471 data->currfid, data->currvid); 471 data->currfid, data->currvid);
472 472
473 if (reqvid != data->currvid) { 473 if (reqvid != data->currvid) {
474 if (write_new_vid(data, reqvid)) 474 if (write_new_vid(data, reqvid))
475 return 1; 475 return 1;
476 476
477 if (savefid != data->currfid) { 477 if (savefid != data->currfid) {
478 printk(KERN_ERR PFX 478 printk(KERN_ERR PFX
479 "ph3: bad fid change, save 0x%x, curr 0x%x\n", 479 "ph3: bad fid change, save 0x%x, curr 0x%x\n",
480 savefid, data->currfid); 480 savefid, data->currfid);
481 return 1; 481 return 1;
482 } 482 }
483 483
484 if (data->currvid != reqvid) { 484 if (data->currvid != reqvid) {
485 printk(KERN_ERR PFX 485 printk(KERN_ERR PFX
486 "ph3: failed vid transition\n, " 486 "ph3: failed vid transition\n, "
487 "req 0x%x, curr 0x%x", 487 "req 0x%x, curr 0x%x",
488 reqvid, data->currvid); 488 reqvid, data->currvid);
489 return 1; 489 return 1;
490 } 490 }
491 } 491 }
492 492
493 if (query_current_values_with_pending_wait(data)) 493 if (query_current_values_with_pending_wait(data))
494 return 1; 494 return 1;
495 495
496 if (savereqvid != data->currvid) { 496 if (savereqvid != data->currvid) {
497 dprintk("ph3 failed, currvid 0x%x\n", data->currvid); 497 dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
498 return 1; 498 return 1;
499 } 499 }
500 500
501 if (savefid != data->currfid) { 501 if (savefid != data->currfid) {
502 dprintk("ph3 failed, currfid changed 0x%x\n", 502 dprintk("ph3 failed, currfid changed 0x%x\n",
503 data->currfid); 503 data->currfid);
504 return 1; 504 return 1;
505 } 505 }
506 506
507 dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", 507 dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
508 data->currfid, data->currvid); 508 data->currfid, data->currvid);
509 509
510 return 0; 510 return 0;
511 } 511 }
512 512
513 static int check_supported_cpu(unsigned int cpu) 513 static int check_supported_cpu(unsigned int cpu)
514 { 514 {
515 cpumask_t oldmask; 515 cpumask_t oldmask;
516 u32 eax, ebx, ecx, edx; 516 u32 eax, ebx, ecx, edx;
517 unsigned int rc = 0; 517 unsigned int rc = 0;
518 518
519 oldmask = current->cpus_allowed; 519 oldmask = current->cpus_allowed;
520 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 520 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
521 521
522 if (smp_processor_id() != cpu) { 522 if (smp_processor_id() != cpu) {
523 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); 523 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
524 goto out; 524 goto out;
525 } 525 }
526 526
527 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) 527 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
528 goto out; 528 goto out;
529 529
530 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 530 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
531 if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && 531 if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
532 ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) 532 ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
533 goto out; 533 goto out;
534 534
535 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { 535 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
536 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || 536 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
537 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { 537 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
538 printk(KERN_INFO PFX 538 printk(KERN_INFO PFX
539 "Processor cpuid %x not supported\n", eax); 539 "Processor cpuid %x not supported\n", eax);
540 goto out; 540 goto out;
541 } 541 }
542 542
543 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); 543 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
544 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { 544 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
545 printk(KERN_INFO PFX 545 printk(KERN_INFO PFX
546 "No frequency change capabilities detected\n"); 546 "No frequency change capabilities detected\n");
547 goto out; 547 goto out;
548 } 548 }
549 549
550 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 550 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
551 if ((edx & P_STATE_TRANSITION_CAPABLE) 551 if ((edx & P_STATE_TRANSITION_CAPABLE)
552 != P_STATE_TRANSITION_CAPABLE) { 552 != P_STATE_TRANSITION_CAPABLE) {
553 printk(KERN_INFO PFX 553 printk(KERN_INFO PFX
554 "Power state transitions not supported\n"); 554 "Power state transitions not supported\n");
555 goto out; 555 goto out;
556 } 556 }
557 } else { /* must be a HW Pstate capable processor */ 557 } else { /* must be a HW Pstate capable processor */
558 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 558 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
559 if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) 559 if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
560 cpu_family = CPU_HW_PSTATE; 560 cpu_family = CPU_HW_PSTATE;
561 else 561 else
562 goto out; 562 goto out;
563 } 563 }
564 564
565 rc = 1; 565 rc = 1;
566 566
567 out: 567 out:
568 set_cpus_allowed_ptr(current, &oldmask); 568 set_cpus_allowed_ptr(current, &oldmask);
569 return rc; 569 return rc;
570 } 570 }
571 571
572 static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, 572 static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
573 u8 maxvid) 573 u8 maxvid)
574 { 574 {
575 unsigned int j; 575 unsigned int j;
576 u8 lastfid = 0xff; 576 u8 lastfid = 0xff;
577 577
578 for (j = 0; j < data->numps; j++) { 578 for (j = 0; j < data->numps; j++) {
579 if (pst[j].vid > LEAST_VID) { 579 if (pst[j].vid > LEAST_VID) {
580 printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n", 580 printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n",
581 j, pst[j].vid); 581 j, pst[j].vid);
582 return -EINVAL; 582 return -EINVAL;
583 } 583 }
584 if (pst[j].vid < data->rvo) { 584 if (pst[j].vid < data->rvo) {
585 /* vid + rvo >= 0 */ 585 /* vid + rvo >= 0 */
586 printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" 586 printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
587 " %d\n", j); 587 " %d\n", j);
588 return -ENODEV; 588 return -ENODEV;
589 } 589 }
590 if (pst[j].vid < maxvid + data->rvo) { 590 if (pst[j].vid < maxvid + data->rvo) {
591 /* vid + rvo >= maxvid */ 591 /* vid + rvo >= maxvid */
592 printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" 592 printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
593 " %d\n", j); 593 " %d\n", j);
594 return -ENODEV; 594 return -ENODEV;
595 } 595 }
596 if (pst[j].fid > MAX_FID) { 596 if (pst[j].fid > MAX_FID) {
597 printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate" 597 printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate"
598 " %d\n", j); 598 " %d\n", j);
599 return -ENODEV; 599 return -ENODEV;
600 } 600 }
601 if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { 601 if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
602 /* Only first fid is allowed to be in "low" range */ 602 /* Only first fid is allowed to be in "low" range */
603 printk(KERN_ERR FW_BUG PFX "two low fids - %d : " 603 printk(KERN_ERR FW_BUG PFX "two low fids - %d : "
604 "0x%x\n", j, pst[j].fid); 604 "0x%x\n", j, pst[j].fid);
605 return -EINVAL; 605 return -EINVAL;
606 } 606 }
607 if (pst[j].fid < lastfid) 607 if (pst[j].fid < lastfid)
608 lastfid = pst[j].fid; 608 lastfid = pst[j].fid;
609 } 609 }
610 if (lastfid & 1) { 610 if (lastfid & 1) {
611 printk(KERN_ERR FW_BUG PFX "lastfid invalid\n"); 611 printk(KERN_ERR FW_BUG PFX "lastfid invalid\n");
612 return -EINVAL; 612 return -EINVAL;
613 } 613 }
614 if (lastfid > LO_FID_TABLE_TOP) 614 if (lastfid > LO_FID_TABLE_TOP)
615 printk(KERN_INFO FW_BUG PFX 615 printk(KERN_INFO FW_BUG PFX
616 "first fid not from lo freq table\n"); 616 "first fid not from lo freq table\n");
617 617
618 return 0; 618 return 0;
619 } 619 }
620 620
621 static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry) 621 static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry)
622 { 622 {
623 data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; 623 data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
624 } 624 }
625 625
626 static void print_basics(struct powernow_k8_data *data) 626 static void print_basics(struct powernow_k8_data *data)
627 { 627 {
628 int j; 628 int j;
629 for (j = 0; j < data->numps; j++) { 629 for (j = 0; j < data->numps; j++) {
630 if (data->powernow_table[j].frequency != 630 if (data->powernow_table[j].frequency !=
631 CPUFREQ_ENTRY_INVALID) { 631 CPUFREQ_ENTRY_INVALID) {
632 if (cpu_family == CPU_HW_PSTATE) { 632 if (cpu_family == CPU_HW_PSTATE) {
633 printk(KERN_INFO PFX 633 printk(KERN_INFO PFX
634 " %d : pstate %d (%d MHz)\n", j, 634 " %d : pstate %d (%d MHz)\n", j,
635 data->powernow_table[j].index, 635 data->powernow_table[j].index,
636 data->powernow_table[j].frequency/1000); 636 data->powernow_table[j].frequency/1000);
637 } else { 637 } else {
638 printk(KERN_INFO PFX 638 printk(KERN_INFO PFX
639 " %d : fid 0x%x (%d MHz), vid 0x%x\n", 639 " %d : fid 0x%x (%d MHz), vid 0x%x\n",
640 j, 640 j,
641 data->powernow_table[j].index & 0xff, 641 data->powernow_table[j].index & 0xff,
642 data->powernow_table[j].frequency/1000, 642 data->powernow_table[j].frequency/1000,
643 data->powernow_table[j].index >> 8); 643 data->powernow_table[j].index >> 8);
644 } 644 }
645 } 645 }
646 } 646 }
647 if (data->batps) 647 if (data->batps)
648 printk(KERN_INFO PFX "Only %d pstates on battery\n", 648 printk(KERN_INFO PFX "Only %d pstates on battery\n",
649 data->batps); 649 data->batps);
650 } 650 }
651 651
652 static u32 freq_from_fid_did(u32 fid, u32 did) 652 static u32 freq_from_fid_did(u32 fid, u32 did)
653 { 653 {
654 u32 mhz = 0; 654 u32 mhz = 0;
655 655
656 if (boot_cpu_data.x86 == 0x10) 656 if (boot_cpu_data.x86 == 0x10)
657 mhz = (100 * (fid + 0x10)) >> did; 657 mhz = (100 * (fid + 0x10)) >> did;
658 else if (boot_cpu_data.x86 == 0x11) 658 else if (boot_cpu_data.x86 == 0x11)
659 mhz = (100 * (fid + 8)) >> did; 659 mhz = (100 * (fid + 8)) >> did;
660 else 660 else
661 BUG(); 661 BUG();
662 662
663 return mhz * 1000; 663 return mhz * 1000;
664 } 664 }
665 665
666 static int fill_powernow_table(struct powernow_k8_data *data, 666 static int fill_powernow_table(struct powernow_k8_data *data,
667 struct pst_s *pst, u8 maxvid) 667 struct pst_s *pst, u8 maxvid)
668 { 668 {
669 struct cpufreq_frequency_table *powernow_table; 669 struct cpufreq_frequency_table *powernow_table;
670 unsigned int j; 670 unsigned int j;
671 671
672 if (data->batps) { 672 if (data->batps) {
673 /* use ACPI support to get full speed on mains power */ 673 /* use ACPI support to get full speed on mains power */
674 printk(KERN_WARNING PFX 674 printk(KERN_WARNING PFX
675 "Only %d pstates usable (use ACPI driver for full " 675 "Only %d pstates usable (use ACPI driver for full "
676 "range\n", data->batps); 676 "range\n", data->batps);
677 data->numps = data->batps; 677 data->numps = data->batps;
678 } 678 }
679 679
680 for (j = 1; j < data->numps; j++) { 680 for (j = 1; j < data->numps; j++) {
681 if (pst[j-1].fid >= pst[j].fid) { 681 if (pst[j-1].fid >= pst[j].fid) {
682 printk(KERN_ERR PFX "PST out of sequence\n"); 682 printk(KERN_ERR PFX "PST out of sequence\n");
683 return -EINVAL; 683 return -EINVAL;
684 } 684 }
685 } 685 }
686 686
687 if (data->numps < 2) { 687 if (data->numps < 2) {
688 printk(KERN_ERR PFX "no p states to transition\n"); 688 printk(KERN_ERR PFX "no p states to transition\n");
689 return -ENODEV; 689 return -ENODEV;
690 } 690 }
691 691
692 if (check_pst_table(data, pst, maxvid)) 692 if (check_pst_table(data, pst, maxvid))
693 return -EINVAL; 693 return -EINVAL;
694 694
695 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) 695 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
696 * (data->numps + 1)), GFP_KERNEL); 696 * (data->numps + 1)), GFP_KERNEL);
697 if (!powernow_table) { 697 if (!powernow_table) {
698 printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); 698 printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
699 return -ENOMEM; 699 return -ENOMEM;
700 } 700 }
701 701
702 for (j = 0; j < data->numps; j++) { 702 for (j = 0; j < data->numps; j++) {
703 int freq; 703 int freq;
704 powernow_table[j].index = pst[j].fid; /* lower 8 bits */ 704 powernow_table[j].index = pst[j].fid; /* lower 8 bits */
705 powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ 705 powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
706 freq = find_khz_freq_from_fid(pst[j].fid); 706 freq = find_khz_freq_from_fid(pst[j].fid);
707 powernow_table[j].frequency = freq; 707 powernow_table[j].frequency = freq;
708 } 708 }
709 powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; 709 powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
710 powernow_table[data->numps].index = 0; 710 powernow_table[data->numps].index = 0;
711 711
712 if (query_current_values_with_pending_wait(data)) { 712 if (query_current_values_with_pending_wait(data)) {
713 kfree(powernow_table); 713 kfree(powernow_table);
714 return -EIO; 714 return -EIO;
715 } 715 }
716 716
717 dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); 717 dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
718 data->powernow_table = powernow_table; 718 data->powernow_table = powernow_table;
719 if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) 719 if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
720 print_basics(data); 720 print_basics(data);
721 721
722 for (j = 0; j < data->numps; j++) 722 for (j = 0; j < data->numps; j++)
723 if ((pst[j].fid == data->currfid) && 723 if ((pst[j].fid == data->currfid) &&
724 (pst[j].vid == data->currvid)) 724 (pst[j].vid == data->currvid))
725 return 0; 725 return 0;
726 726
727 dprintk("currfid/vid do not match PST, ignoring\n"); 727 dprintk("currfid/vid do not match PST, ignoring\n");
728 return 0; 728 return 0;
729 } 729 }
730 730
731 /* Find and validate the PSB/PST table in BIOS. */ 731 /* Find and validate the PSB/PST table in BIOS. */
732 static int find_psb_table(struct powernow_k8_data *data) 732 static int find_psb_table(struct powernow_k8_data *data)
733 { 733 {
734 struct psb_s *psb; 734 struct psb_s *psb;
735 unsigned int i; 735 unsigned int i;
736 u32 mvs; 736 u32 mvs;
737 u8 maxvid; 737 u8 maxvid;
738 u32 cpst = 0; 738 u32 cpst = 0;
739 u32 thiscpuid; 739 u32 thiscpuid;
740 740
741 for (i = 0xc0000; i < 0xffff0; i += 0x10) { 741 for (i = 0xc0000; i < 0xffff0; i += 0x10) {
742 /* Scan BIOS looking for the signature. */ 742 /* Scan BIOS looking for the signature. */
743 /* It can not be at ffff0 - it is too big. */ 743 /* It can not be at ffff0 - it is too big. */
744 744
745 psb = phys_to_virt(i); 745 psb = phys_to_virt(i);
746 if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) 746 if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
747 continue; 747 continue;
748 748
749 dprintk("found PSB header at 0x%p\n", psb); 749 dprintk("found PSB header at 0x%p\n", psb);
750 750
751 dprintk("table vers: 0x%x\n", psb->tableversion); 751 dprintk("table vers: 0x%x\n", psb->tableversion);
752 if (psb->tableversion != PSB_VERSION_1_4) { 752 if (psb->tableversion != PSB_VERSION_1_4) {
753 printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n"); 753 printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
754 return -ENODEV; 754 return -ENODEV;
755 } 755 }
756 756
757 dprintk("flags: 0x%x\n", psb->flags1); 757 dprintk("flags: 0x%x\n", psb->flags1);
758 if (psb->flags1) { 758 if (psb->flags1) {
759 printk(KERN_ERR FW_BUG PFX "unknown flags\n"); 759 printk(KERN_ERR FW_BUG PFX "unknown flags\n");
760 return -ENODEV; 760 return -ENODEV;
761 } 761 }
762 762
763 data->vstable = psb->vstable; 763 data->vstable = psb->vstable;
764 dprintk("voltage stabilization time: %d(*20us)\n", 764 dprintk("voltage stabilization time: %d(*20us)\n",
765 data->vstable); 765 data->vstable);
766 766
767 dprintk("flags2: 0x%x\n", psb->flags2); 767 dprintk("flags2: 0x%x\n", psb->flags2);
768 data->rvo = psb->flags2 & 3; 768 data->rvo = psb->flags2 & 3;
769 data->irt = ((psb->flags2) >> 2) & 3; 769 data->irt = ((psb->flags2) >> 2) & 3;
770 mvs = ((psb->flags2) >> 4) & 3; 770 mvs = ((psb->flags2) >> 4) & 3;
771 data->vidmvs = 1 << mvs; 771 data->vidmvs = 1 << mvs;
772 data->batps = ((psb->flags2) >> 6) & 3; 772 data->batps = ((psb->flags2) >> 6) & 3;
773 773
774 dprintk("ramp voltage offset: %d\n", data->rvo); 774 dprintk("ramp voltage offset: %d\n", data->rvo);
775 dprintk("isochronous relief time: %d\n", data->irt); 775 dprintk("isochronous relief time: %d\n", data->irt);
776 dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); 776 dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
777 777
778 dprintk("numpst: 0x%x\n", psb->num_tables); 778 dprintk("numpst: 0x%x\n", psb->num_tables);
779 cpst = psb->num_tables; 779 cpst = psb->num_tables;
780 if ((psb->cpuid == 0x00000fc0) || 780 if ((psb->cpuid == 0x00000fc0) ||
781 (psb->cpuid == 0x00000fe0)) { 781 (psb->cpuid == 0x00000fe0)) {
782 thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 782 thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
783 if ((thiscpuid == 0x00000fc0) || 783 if ((thiscpuid == 0x00000fc0) ||
784 (thiscpuid == 0x00000fe0)) 784 (thiscpuid == 0x00000fe0))
785 cpst = 1; 785 cpst = 1;
786 } 786 }
787 if (cpst != 1) { 787 if (cpst != 1) {
788 printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); 788 printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
789 return -ENODEV; 789 return -ENODEV;
790 } 790 }
791 791
792 data->plllock = psb->plllocktime; 792 data->plllock = psb->plllocktime;
793 dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); 793 dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
794 dprintk("maxfid: 0x%x\n", psb->maxfid); 794 dprintk("maxfid: 0x%x\n", psb->maxfid);
795 dprintk("maxvid: 0x%x\n", psb->maxvid); 795 dprintk("maxvid: 0x%x\n", psb->maxvid);
796 maxvid = psb->maxvid; 796 maxvid = psb->maxvid;
797 797
798 data->numps = psb->numps; 798 data->numps = psb->numps;
799 dprintk("numpstates: 0x%x\n", data->numps); 799 dprintk("numpstates: 0x%x\n", data->numps);
800 return fill_powernow_table(data, 800 return fill_powernow_table(data,
801 (struct pst_s *)(psb+1), maxvid); 801 (struct pst_s *)(psb+1), maxvid);
802 } 802 }
803 /* 803 /*
804 * If you see this message, complain to BIOS manufacturer. If 804 * If you see this message, complain to BIOS manufacturer. If
805 * he tells you "we do not support Linux" or some similar 805 * he tells you "we do not support Linux" or some similar
806 * nonsense, remember that Windows 2000 uses the same legacy 806 * nonsense, remember that Windows 2000 uses the same legacy
807 * mechanism that the old Linux PSB driver uses. Tell them it 807 * mechanism that the old Linux PSB driver uses. Tell them it
808 * is broken with Windows 2000. 808 * is broken with Windows 2000.
809 * 809 *
810 * The reference to the AMD documentation is chapter 9 in the 810 * The reference to the AMD documentation is chapter 9 in the
811 * BIOS and Kernel Developer's Guide, which is available on 811 * BIOS and Kernel Developer's Guide, which is available on
812 * www.amd.com 812 * www.amd.com
813 */ 813 */
814 printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); 814 printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
815 return -ENODEV; 815 return -ENODEV;
816 } 816 }
817 817
818 static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, 818 static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
819 unsigned int index) 819 unsigned int index)
820 { 820 {
821 acpi_integer control; 821 acpi_integer control;
822 822
823 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 823 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
824 return; 824 return;
825 825
826 control = data->acpi_data.states[index].control; data->irt = (control 826 control = data->acpi_data.states[index].control; data->irt = (control
827 >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> 827 >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >>
828 RVO_SHIFT) & RVO_MASK; data->exttype = (control 828 RVO_SHIFT) & RVO_MASK; data->exttype = (control
829 >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; 829 >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
830 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 830 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1
831 << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = 831 << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable =
832 (control >> VST_SHIFT) & VST_MASK; } 832 (control >> VST_SHIFT) & VST_MASK; }
833 833
834 static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 834 static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
835 { 835 {
836 struct cpufreq_frequency_table *powernow_table; 836 struct cpufreq_frequency_table *powernow_table;
837 int ret_val = -ENODEV; 837 int ret_val = -ENODEV;
838 acpi_integer control, status; 838 acpi_integer control, status;
839 839
840 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 840 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
841 dprintk("register performance failed: bad ACPI data\n"); 841 dprintk("register performance failed: bad ACPI data\n");
842 return -EIO; 842 return -EIO;
843 } 843 }
844 844
845 /* verify the data contained in the ACPI structures */ 845 /* verify the data contained in the ACPI structures */
846 if (data->acpi_data.state_count <= 1) { 846 if (data->acpi_data.state_count <= 1) {
847 dprintk("No ACPI P-States\n"); 847 dprintk("No ACPI P-States\n");
848 goto err_out; 848 goto err_out;
849 } 849 }
850 850
851 control = data->acpi_data.control_register.space_id; 851 control = data->acpi_data.control_register.space_id;
852 status = data->acpi_data.status_register.space_id; 852 status = data->acpi_data.status_register.space_id;
853 853
854 if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) || 854 if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
855 (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) { 855 (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
856 dprintk("Invalid control/status registers (%x - %x)\n", 856 dprintk("Invalid control/status registers (%x - %x)\n",
857 control, status); 857 control, status);
858 goto err_out; 858 goto err_out;
859 } 859 }
860 860
861 /* fill in data->powernow_table */ 861 /* fill in data->powernow_table */
862 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) 862 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
863 * (data->acpi_data.state_count + 1)), GFP_KERNEL); 863 * (data->acpi_data.state_count + 1)), GFP_KERNEL);
864 if (!powernow_table) { 864 if (!powernow_table) {
865 dprintk("powernow_table memory alloc failure\n"); 865 dprintk("powernow_table memory alloc failure\n");
866 goto err_out; 866 goto err_out;
867 } 867 }
868 868
869 if (cpu_family == CPU_HW_PSTATE) 869 if (cpu_family == CPU_HW_PSTATE)
870 ret_val = fill_powernow_table_pstate(data, powernow_table); 870 ret_val = fill_powernow_table_pstate(data, powernow_table);
871 else 871 else
872 ret_val = fill_powernow_table_fidvid(data, powernow_table); 872 ret_val = fill_powernow_table_fidvid(data, powernow_table);
873 if (ret_val) 873 if (ret_val)
874 goto err_out_mem; 874 goto err_out_mem;
875 875
876 powernow_table[data->acpi_data.state_count].frequency = 876 powernow_table[data->acpi_data.state_count].frequency =
877 CPUFREQ_TABLE_END; 877 CPUFREQ_TABLE_END;
878 powernow_table[data->acpi_data.state_count].index = 0; 878 powernow_table[data->acpi_data.state_count].index = 0;
879 data->powernow_table = powernow_table; 879 data->powernow_table = powernow_table;
880 880
881 /* fill in data */ 881 /* fill in data */
882 data->numps = data->acpi_data.state_count; 882 data->numps = data->acpi_data.state_count;
883 if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) 883 if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
884 print_basics(data); 884 print_basics(data);
885 powernow_k8_acpi_pst_values(data, 0); 885 powernow_k8_acpi_pst_values(data, 0);
886 886
887 /* notify BIOS that we exist */ 887 /* notify BIOS that we exist */
888 acpi_processor_notify_smm(THIS_MODULE); 888 acpi_processor_notify_smm(THIS_MODULE);
889 889
890 if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { 890 if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
891 printk(KERN_ERR PFX 891 printk(KERN_ERR PFX
892 "unable to alloc powernow_k8_data cpumask\n"); 892 "unable to alloc powernow_k8_data cpumask\n");
893 ret_val = -ENOMEM; 893 ret_val = -ENOMEM;
894 goto err_out_mem; 894 goto err_out_mem;
895 } 895 }
896 896
897 return 0; 897 return 0;
898 898
899 err_out_mem: 899 err_out_mem:
900 kfree(powernow_table); 900 kfree(powernow_table);
901 901
902 err_out: 902 err_out:
903 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 903 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
904 904
905 /* data->acpi_data.state_count informs us at ->exit() 905 /* data->acpi_data.state_count informs us at ->exit()
906 * whether ACPI was used */ 906 * whether ACPI was used */
907 data->acpi_data.state_count = 0; 907 data->acpi_data.state_count = 0;
908 908
909 return ret_val; 909 return ret_val;
910 } 910 }
911 911
912 static int fill_powernow_table_pstate(struct powernow_k8_data *data, 912 static int fill_powernow_table_pstate(struct powernow_k8_data *data,
913 struct cpufreq_frequency_table *powernow_table) 913 struct cpufreq_frequency_table *powernow_table)
914 { 914 {
915 int i; 915 int i;
916 u32 hi = 0, lo = 0; 916 u32 hi = 0, lo = 0;
917 rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); 917 rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
918 data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; 918 data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
919 919
920 for (i = 0; i < data->acpi_data.state_count; i++) { 920 for (i = 0; i < data->acpi_data.state_count; i++) {
921 u32 index; 921 u32 index;
922 922
923 index = data->acpi_data.states[i].control & HW_PSTATE_MASK; 923 index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
924 if (index > data->max_hw_pstate) { 924 if (index > data->max_hw_pstate) {
925 printk(KERN_ERR PFX "invalid pstate %d - " 925 printk(KERN_ERR PFX "invalid pstate %d - "
926 "bad value %d.\n", i, index); 926 "bad value %d.\n", i, index);
927 printk(KERN_ERR PFX "Please report to BIOS " 927 printk(KERN_ERR PFX "Please report to BIOS "
928 "manufacturer\n"); 928 "manufacturer\n");
929 invalidate_entry(data, i); 929 invalidate_entry(data, i);
930 continue; 930 continue;
931 } 931 }
932 rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); 932 rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
933 if (!(hi & HW_PSTATE_VALID_MASK)) { 933 if (!(hi & HW_PSTATE_VALID_MASK)) {
934 dprintk("invalid pstate %d, ignoring\n", index); 934 dprintk("invalid pstate %d, ignoring\n", index);
935 invalidate_entry(data, i); 935 invalidate_entry(data, i);
936 continue; 936 continue;
937 } 937 }
938 938
939 powernow_table[i].index = index; 939 powernow_table[i].index = index;
940 940
941 /* Frequency may be rounded for these */ 941 /* Frequency may be rounded for these */
942 if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { 942 if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) {
943 powernow_table[i].frequency = 943 powernow_table[i].frequency =
944 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); 944 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
945 } else 945 } else
946 powernow_table[i].frequency = 946 powernow_table[i].frequency =
947 data->acpi_data.states[i].core_frequency * 1000; 947 data->acpi_data.states[i].core_frequency * 1000;
948 } 948 }
949 return 0; 949 return 0;
950 } 950 }
951 951
952 static int fill_powernow_table_fidvid(struct powernow_k8_data *data, 952 static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
953 struct cpufreq_frequency_table *powernow_table) 953 struct cpufreq_frequency_table *powernow_table)
954 { 954 {
955 int i; 955 int i;
956 int cntlofreq = 0; 956 int cntlofreq = 0;
957 957
958 for (i = 0; i < data->acpi_data.state_count; i++) { 958 for (i = 0; i < data->acpi_data.state_count; i++) {
959 u32 fid; 959 u32 fid;
960 u32 vid; 960 u32 vid;
961 u32 freq, index; 961 u32 freq, index;
962 acpi_integer status, control; 962 acpi_integer status, control;
963 963
964 if (data->exttype) { 964 if (data->exttype) {
965 status = data->acpi_data.states[i].status; 965 status = data->acpi_data.states[i].status;
966 fid = status & EXT_FID_MASK; 966 fid = status & EXT_FID_MASK;
967 vid = (status >> VID_SHIFT) & EXT_VID_MASK; 967 vid = (status >> VID_SHIFT) & EXT_VID_MASK;
968 } else { 968 } else {
969 control = data->acpi_data.states[i].control; 969 control = data->acpi_data.states[i].control;
970 fid = control & FID_MASK; 970 fid = control & FID_MASK;
971 vid = (control >> VID_SHIFT) & VID_MASK; 971 vid = (control >> VID_SHIFT) & VID_MASK;
972 } 972 }
973 973
974 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); 974 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
975 975
976 index = fid | (vid<<8); 976 index = fid | (vid<<8);
977 powernow_table[i].index = index; 977 powernow_table[i].index = index;
978 978
979 freq = find_khz_freq_from_fid(fid); 979 freq = find_khz_freq_from_fid(fid);
980 powernow_table[i].frequency = freq; 980 powernow_table[i].frequency = freq;
981 981
982 /* verify frequency is OK */ 982 /* verify frequency is OK */
983 if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { 983 if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
984 dprintk("invalid freq %u kHz, ignoring\n", freq); 984 dprintk("invalid freq %u kHz, ignoring\n", freq);
985 invalidate_entry(data, i); 985 invalidate_entry(data, i);
986 continue; 986 continue;
987 } 987 }
988 988
989 /* verify voltage is OK - 989 /* verify voltage is OK -
990 * BIOSs are using "off" to indicate invalid */ 990 * BIOSs are using "off" to indicate invalid */
991 if (vid == VID_OFF) { 991 if (vid == VID_OFF) {
992 dprintk("invalid vid %u, ignoring\n", vid); 992 dprintk("invalid vid %u, ignoring\n", vid);
993 invalidate_entry(data, i); 993 invalidate_entry(data, i);
994 continue; 994 continue;
995 } 995 }
996 996
997 /* verify only 1 entry from the lo frequency table */ 997 /* verify only 1 entry from the lo frequency table */
998 if (fid < HI_FID_TABLE_BOTTOM) { 998 if (fid < HI_FID_TABLE_BOTTOM) {
999 if (cntlofreq) { 999 if (cntlofreq) {
1000 /* if both entries are the same, 1000 /* if both entries are the same,
1001 * ignore this one ... */ 1001 * ignore this one ... */
1002 if ((freq != powernow_table[cntlofreq].frequency) || 1002 if ((freq != powernow_table[cntlofreq].frequency) ||
1003 (index != powernow_table[cntlofreq].index)) { 1003 (index != powernow_table[cntlofreq].index)) {
1004 printk(KERN_ERR PFX 1004 printk(KERN_ERR PFX
1005 "Too many lo freq table " 1005 "Too many lo freq table "
1006 "entries\n"); 1006 "entries\n");
1007 return 1; 1007 return 1;
1008 } 1008 }
1009 1009
1010 dprintk("double low frequency table entry, " 1010 dprintk("double low frequency table entry, "
1011 "ignoring it.\n"); 1011 "ignoring it.\n");
1012 invalidate_entry(data, i); 1012 invalidate_entry(data, i);
1013 continue; 1013 continue;
1014 } else 1014 } else
1015 cntlofreq = i; 1015 cntlofreq = i;
1016 } 1016 }
1017 1017
1018 if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { 1018 if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
1019 printk(KERN_INFO PFX "invalid freq entries " 1019 printk(KERN_INFO PFX "invalid freq entries "
1020 "%u kHz vs. %u kHz\n", freq, 1020 "%u kHz vs. %u kHz\n", freq,
1021 (unsigned int) 1021 (unsigned int)
1022 (data->acpi_data.states[i].core_frequency 1022 (data->acpi_data.states[i].core_frequency
1023 * 1000)); 1023 * 1000));
1024 invalidate_entry(data, i); 1024 invalidate_entry(data, i);
1025 continue; 1025 continue;
1026 } 1026 }
1027 } 1027 }
1028 return 0; 1028 return 0;
1029 } 1029 }
1030 1030
1031 static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) 1031 static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
1032 { 1032 {
1033 if (data->acpi_data.state_count) 1033 if (data->acpi_data.state_count)
1034 acpi_processor_unregister_performance(&data->acpi_data, 1034 acpi_processor_unregister_performance(&data->acpi_data,
1035 data->cpu); 1035 data->cpu);
1036 free_cpumask_var(data->acpi_data.shared_cpu_map); 1036 free_cpumask_var(data->acpi_data.shared_cpu_map);
1037 } 1037 }
1038 1038
1039 static int get_transition_latency(struct powernow_k8_data *data) 1039 static int get_transition_latency(struct powernow_k8_data *data)
1040 { 1040 {
1041 int max_latency = 0; 1041 int max_latency = 0;
1042 int i; 1042 int i;
1043 for (i = 0; i < data->acpi_data.state_count; i++) { 1043 for (i = 0; i < data->acpi_data.state_count; i++) {
1044 int cur_latency = data->acpi_data.states[i].transition_latency 1044 int cur_latency = data->acpi_data.states[i].transition_latency
1045 + data->acpi_data.states[i].bus_master_latency; 1045 + data->acpi_data.states[i].bus_master_latency;
1046 if (cur_latency > max_latency) 1046 if (cur_latency > max_latency)
1047 max_latency = cur_latency; 1047 max_latency = cur_latency;
1048 } 1048 }
1049 /* value in usecs, needs to be in nanoseconds */ 1049 /* value in usecs, needs to be in nanoseconds */
1050 return 1000 * max_latency; 1050 return 1000 * max_latency;
1051 } 1051 }
1052 1052
1053 /* Take a frequency, and issue the fid/vid transition command */ 1053 /* Take a frequency, and issue the fid/vid transition command */
1054 static int transition_frequency_fidvid(struct powernow_k8_data *data, 1054 static int transition_frequency_fidvid(struct powernow_k8_data *data,
1055 unsigned int index) 1055 unsigned int index)
1056 { 1056 {
1057 u32 fid = 0; 1057 u32 fid = 0;
1058 u32 vid = 0; 1058 u32 vid = 0;
1059 int res, i; 1059 int res, i;
1060 struct cpufreq_freqs freqs; 1060 struct cpufreq_freqs freqs;
1061 1061
1062 dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); 1062 dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
1063 1063
1064 /* fid/vid correctness check for k8 */ 1064 /* fid/vid correctness check for k8 */
1065 /* fid are the lower 8 bits of the index we stored into 1065 /* fid are the lower 8 bits of the index we stored into
1066 * the cpufreq frequency table in find_psb_table, vid 1066 * the cpufreq frequency table in find_psb_table, vid
1067 * are the upper 8 bits. 1067 * are the upper 8 bits.
1068 */ 1068 */
1069 fid = data->powernow_table[index].index & 0xFF; 1069 fid = data->powernow_table[index].index & 0xFF;
1070 vid = (data->powernow_table[index].index & 0xFF00) >> 8; 1070 vid = (data->powernow_table[index].index & 0xFF00) >> 8;
1071 1071
1072 dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); 1072 dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
1073 1073
1074 if (query_current_values_with_pending_wait(data)) 1074 if (query_current_values_with_pending_wait(data))
1075 return 1; 1075 return 1;
1076 1076
1077 if ((data->currvid == vid) && (data->currfid == fid)) { 1077 if ((data->currvid == vid) && (data->currfid == fid)) {
1078 dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", 1078 dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
1079 fid, vid); 1079 fid, vid);
1080 return 0; 1080 return 0;
1081 } 1081 }
1082 1082
1083 if ((fid < HI_FID_TABLE_BOTTOM) && 1083 if ((fid < HI_FID_TABLE_BOTTOM) &&
1084 (data->currfid < HI_FID_TABLE_BOTTOM)) { 1084 (data->currfid < HI_FID_TABLE_BOTTOM)) {
1085 printk(KERN_ERR PFX 1085 printk(KERN_ERR PFX
1086 "ignoring illegal change in lo freq table-%x to 0x%x\n", 1086 "ignoring illegal change in lo freq table-%x to 0x%x\n",
1087 data->currfid, fid); 1087 data->currfid, fid);
1088 return 1; 1088 return 1;
1089 } 1089 }
1090 1090
1091 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", 1091 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
1092 smp_processor_id(), fid, vid); 1092 smp_processor_id(), fid, vid);
1093 freqs.old = find_khz_freq_from_fid(data->currfid); 1093 freqs.old = find_khz_freq_from_fid(data->currfid);
1094 freqs.new = find_khz_freq_from_fid(fid); 1094 freqs.new = find_khz_freq_from_fid(fid);
1095 1095
1096 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1096 for_each_cpu_mask_nr(i, *(data->available_cores)) {
1097 freqs.cpu = i; 1097 freqs.cpu = i;
1098 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1098 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1099 } 1099 }
1100 1100
1101 res = transition_fid_vid(data, fid, vid); 1101 res = transition_fid_vid(data, fid, vid);
1102 freqs.new = find_khz_freq_from_fid(data->currfid); 1102 freqs.new = find_khz_freq_from_fid(data->currfid);
1103 1103
1104 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1104 for_each_cpu_mask_nr(i, *(data->available_cores)) {
1105 freqs.cpu = i; 1105 freqs.cpu = i;
1106 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1106 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1107 } 1107 }
1108 return res; 1108 return res;
1109 } 1109 }
1110 1110
1111 /* Take a frequency, and issue the hardware pstate transition command */ 1111 /* Take a frequency, and issue the hardware pstate transition command */
1112 static int transition_frequency_pstate(struct powernow_k8_data *data, 1112 static int transition_frequency_pstate(struct powernow_k8_data *data,
1113 unsigned int index) 1113 unsigned int index)
1114 { 1114 {
1115 u32 pstate = 0; 1115 u32 pstate = 0;
1116 int res, i; 1116 int res, i;
1117 struct cpufreq_freqs freqs; 1117 struct cpufreq_freqs freqs;
1118 1118
1119 dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); 1119 dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
1120 1120
1121 /* get MSR index for hardware pstate transition */ 1121 /* get MSR index for hardware pstate transition */
1122 pstate = index & HW_PSTATE_MASK; 1122 pstate = index & HW_PSTATE_MASK;
1123 if (pstate > data->max_hw_pstate) 1123 if (pstate > data->max_hw_pstate)
1124 return 0; 1124 return 0;
1125 freqs.old = find_khz_freq_from_pstate(data->powernow_table, 1125 freqs.old = find_khz_freq_from_pstate(data->powernow_table,
1126 data->currpstate); 1126 data->currpstate);
1127 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1127 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1128 1128
1129 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1129 for_each_cpu_mask_nr(i, *(data->available_cores)) {
1130 freqs.cpu = i; 1130 freqs.cpu = i;
1131 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1131 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1132 } 1132 }
1133 1133
1134 res = transition_pstate(data, pstate); 1134 res = transition_pstate(data, pstate);
1135 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1135 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1136 1136
1137 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1137 for_each_cpu_mask_nr(i, *(data->available_cores)) {
1138 freqs.cpu = i; 1138 freqs.cpu = i;
1139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1140 } 1140 }
1141 return res; 1141 return res;
1142 } 1142 }
1143 1143
1144 /* Driver entry point to switch to the target frequency */ 1144 /* Driver entry point to switch to the target frequency */
1145 static int powernowk8_target(struct cpufreq_policy *pol, 1145 static int powernowk8_target(struct cpufreq_policy *pol,
1146 unsigned targfreq, unsigned relation) 1146 unsigned targfreq, unsigned relation)
1147 { 1147 {
1148 cpumask_t oldmask; 1148 cpumask_t oldmask;
1149 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1149 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1150 u32 checkfid; 1150 u32 checkfid;
1151 u32 checkvid; 1151 u32 checkvid;
1152 unsigned int newstate; 1152 unsigned int newstate;
1153 int ret = -EIO; 1153 int ret = -EIO;
1154 1154
1155 if (!data) 1155 if (!data)
1156 return -EINVAL; 1156 return -EINVAL;
1157 1157
1158 checkfid = data->currfid; 1158 checkfid = data->currfid;
1159 checkvid = data->currvid; 1159 checkvid = data->currvid;
1160 1160
1161 /* only run on specific CPU from here on */ 1161 /* only run on specific CPU from here on */
1162 oldmask = current->cpus_allowed; 1162 oldmask = current->cpus_allowed;
1163 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); 1163 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
1164 1164
1165 if (smp_processor_id() != pol->cpu) { 1165 if (smp_processor_id() != pol->cpu) {
1166 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1166 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
1167 goto err_out; 1167 goto err_out;
1168 } 1168 }
1169 1169
1170 if (pending_bit_stuck()) { 1170 if (pending_bit_stuck()) {
1171 printk(KERN_ERR PFX "failing targ, change pending bit set\n"); 1171 printk(KERN_ERR PFX "failing targ, change pending bit set\n");
1172 goto err_out; 1172 goto err_out;
1173 } 1173 }
1174 1174
1175 dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", 1175 dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
1176 pol->cpu, targfreq, pol->min, pol->max, relation); 1176 pol->cpu, targfreq, pol->min, pol->max, relation);
1177 1177
1178 if (query_current_values_with_pending_wait(data)) 1178 if (query_current_values_with_pending_wait(data))
1179 goto err_out; 1179 goto err_out;
1180 1180
1181 if (cpu_family != CPU_HW_PSTATE) { 1181 if (cpu_family != CPU_HW_PSTATE) {
1182 dprintk("targ: curr fid 0x%x, vid 0x%x\n", 1182 dprintk("targ: curr fid 0x%x, vid 0x%x\n",
1183 data->currfid, data->currvid); 1183 data->currfid, data->currvid);
1184 1184
1185 if ((checkvid != data->currvid) || 1185 if ((checkvid != data->currvid) ||
1186 (checkfid != data->currfid)) { 1186 (checkfid != data->currfid)) {
1187 printk(KERN_INFO PFX 1187 printk(KERN_INFO PFX
1188 "error - out of sync, fix 0x%x 0x%x, " 1188 "error - out of sync, fix 0x%x 0x%x, "
1189 "vid 0x%x 0x%x\n", 1189 "vid 0x%x 0x%x\n",
1190 checkfid, data->currfid, 1190 checkfid, data->currfid,
1191 checkvid, data->currvid); 1191 checkvid, data->currvid);
1192 } 1192 }
1193 } 1193 }
1194 1194
1195 if (cpufreq_frequency_table_target(pol, data->powernow_table, 1195 if (cpufreq_frequency_table_target(pol, data->powernow_table,
1196 targfreq, relation, &newstate)) 1196 targfreq, relation, &newstate))
1197 goto err_out; 1197 goto err_out;
1198 1198
1199 mutex_lock(&fidvid_mutex); 1199 mutex_lock(&fidvid_mutex);
1200 1200
1201 powernow_k8_acpi_pst_values(data, newstate); 1201 powernow_k8_acpi_pst_values(data, newstate);
1202 1202
1203 if (cpu_family == CPU_HW_PSTATE) 1203 if (cpu_family == CPU_HW_PSTATE)
1204 ret = transition_frequency_pstate(data, newstate); 1204 ret = transition_frequency_pstate(data, newstate);
1205 else 1205 else
1206 ret = transition_frequency_fidvid(data, newstate); 1206 ret = transition_frequency_fidvid(data, newstate);
1207 if (ret) { 1207 if (ret) {
1208 printk(KERN_ERR PFX "transition frequency failed\n"); 1208 printk(KERN_ERR PFX "transition frequency failed\n");
1209 ret = 1; 1209 ret = 1;
1210 mutex_unlock(&fidvid_mutex); 1210 mutex_unlock(&fidvid_mutex);
1211 goto err_out; 1211 goto err_out;
1212 } 1212 }
1213 mutex_unlock(&fidvid_mutex); 1213 mutex_unlock(&fidvid_mutex);
1214 1214
1215 if (cpu_family == CPU_HW_PSTATE) 1215 if (cpu_family == CPU_HW_PSTATE)
1216 pol->cur = find_khz_freq_from_pstate(data->powernow_table, 1216 pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1217 newstate); 1217 newstate);
1218 else 1218 else
1219 pol->cur = find_khz_freq_from_fid(data->currfid); 1219 pol->cur = find_khz_freq_from_fid(data->currfid);
1220 ret = 0; 1220 ret = 0;
1221 1221
1222 err_out: 1222 err_out:
1223 set_cpus_allowed_ptr(current, &oldmask); 1223 set_cpus_allowed_ptr(current, &oldmask);
1224 return ret; 1224 return ret;
1225 } 1225 }
1226 1226
1227 /* Driver entry point to verify the policy and range of frequencies */ 1227 /* Driver entry point to verify the policy and range of frequencies */
1228 static int powernowk8_verify(struct cpufreq_policy *pol) 1228 static int powernowk8_verify(struct cpufreq_policy *pol)
1229 { 1229 {
1230 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1230 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1231 1231
1232 if (!data) 1232 if (!data)
1233 return -EINVAL; 1233 return -EINVAL;
1234 1234
1235 return cpufreq_frequency_table_verify(pol, data->powernow_table); 1235 return cpufreq_frequency_table_verify(pol, data->powernow_table);
1236 } 1236 }
1237 1237
1238 static const char ACPI_PSS_BIOS_BUG_MSG[] = 1238 static const char ACPI_PSS_BIOS_BUG_MSG[] =
1239 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" 1239 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1240 KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; 1240 KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n";
1241 1241
1242 /* per CPU init entry point to the driver */ 1242 /* per CPU init entry point to the driver */
1243 static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) 1243 static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1244 { 1244 {
1245 struct powernow_k8_data *data; 1245 struct powernow_k8_data *data;
1246 cpumask_t oldmask; 1246 cpumask_t oldmask;
1247 int rc; 1247 int rc;
1248 1248
1249 if (!cpu_online(pol->cpu)) 1249 if (!cpu_online(pol->cpu))
1250 return -ENODEV; 1250 return -ENODEV;
1251 1251
1252 if (!check_supported_cpu(pol->cpu)) 1252 if (!check_supported_cpu(pol->cpu))
1253 return -ENODEV; 1253 return -ENODEV;
1254 1254
1255 data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); 1255 data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
1256 if (!data) { 1256 if (!data) {
1257 printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); 1257 printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
1258 return -ENOMEM; 1258 return -ENOMEM;
1259 } 1259 }
1260 1260
1261 data->cpu = pol->cpu; 1261 data->cpu = pol->cpu;
1262 data->currpstate = HW_PSTATE_INVALID; 1262 data->currpstate = HW_PSTATE_INVALID;
1263 1263
1264 if (powernow_k8_cpu_init_acpi(data)) { 1264 if (powernow_k8_cpu_init_acpi(data)) {
1265 /* 1265 /*
1266 * Use the PSB BIOS structure. This is only availabe on 1266 * Use the PSB BIOS structure. This is only availabe on
1267 * an UP version, and is deprecated by AMD. 1267 * an UP version, and is deprecated by AMD.
1268 */ 1268 */
1269 if (num_online_cpus() != 1) { 1269 if (num_online_cpus() != 1) {
1270 printk_once(ACPI_PSS_BIOS_BUG_MSG); 1270 printk_once(ACPI_PSS_BIOS_BUG_MSG);
1271 goto err_out; 1271 goto err_out;
1272 } 1272 }
1273 if (pol->cpu != 0) { 1273 if (pol->cpu != 0) {
1274 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1274 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1275 "CPU other than CPU0. Complain to your BIOS " 1275 "CPU other than CPU0. Complain to your BIOS "
1276 "vendor.\n"); 1276 "vendor.\n");
1277 goto err_out; 1277 goto err_out;
1278 } 1278 }
1279 rc = find_psb_table(data); 1279 rc = find_psb_table(data);
1280 if (rc) 1280 if (rc)
1281 goto err_out; 1281 goto err_out;
1282 1282
1283 /* Take a crude guess here. 1283 /* Take a crude guess here.
1284 * That guess was in microseconds, so multiply with 1000 */ 1284 * That guess was in microseconds, so multiply with 1000 */
1285 pol->cpuinfo.transition_latency = ( 1285 pol->cpuinfo.transition_latency = (
1286 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + 1286 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
1287 ((1 << data->irt) * 30)) * 1000; 1287 ((1 << data->irt) * 30)) * 1000;
1288 } else /* ACPI _PSS objects available */ 1288 } else /* ACPI _PSS objects available */
1289 pol->cpuinfo.transition_latency = get_transition_latency(data); 1289 pol->cpuinfo.transition_latency = get_transition_latency(data);
1290 1290
1291 /* only run on specific CPU from here on */ 1291 /* only run on specific CPU from here on */
1292 oldmask = current->cpus_allowed; 1292 oldmask = current->cpus_allowed;
1293 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); 1293 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
1294 1294
1295 if (smp_processor_id() != pol->cpu) { 1295 if (smp_processor_id() != pol->cpu) {
1296 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1296 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
1297 goto err_out_unmask; 1297 goto err_out_unmask;
1298 } 1298 }
1299 1299
1300 if (pending_bit_stuck()) { 1300 if (pending_bit_stuck()) {
1301 printk(KERN_ERR PFX "failing init, change pending bit set\n"); 1301 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1302 goto err_out_unmask; 1302 goto err_out_unmask;
1303 } 1303 }
1304 1304
1305 if (query_current_values_with_pending_wait(data)) 1305 if (query_current_values_with_pending_wait(data))
1306 goto err_out_unmask; 1306 goto err_out_unmask;
1307 1307
1308 if (cpu_family == CPU_OPTERON) 1308 if (cpu_family == CPU_OPTERON)
1309 fidvid_msr_init(); 1309 fidvid_msr_init();
1310 1310
1311 /* run on any CPU again */ 1311 /* run on any CPU again */
1312 set_cpus_allowed_ptr(current, &oldmask); 1312 set_cpus_allowed_ptr(current, &oldmask);
1313 1313
1314 if (cpu_family == CPU_HW_PSTATE) 1314 if (cpu_family == CPU_HW_PSTATE)
1315 cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); 1315 cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
1316 else 1316 else
1317 cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); 1317 cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
1318 data->available_cores = pol->cpus; 1318 data->available_cores = pol->cpus;
1319 1319
1320 if (cpu_family == CPU_HW_PSTATE) 1320 if (cpu_family == CPU_HW_PSTATE)
1321 pol->cur = find_khz_freq_from_pstate(data->powernow_table, 1321 pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1322 data->currpstate); 1322 data->currpstate);
1323 else 1323 else
1324 pol->cur = find_khz_freq_from_fid(data->currfid); 1324 pol->cur = find_khz_freq_from_fid(data->currfid);
1325 dprintk("policy current frequency %d kHz\n", pol->cur); 1325 dprintk("policy current frequency %d kHz\n", pol->cur);
1326 1326
1327 /* min/max the cpu is capable of */ 1327 /* min/max the cpu is capable of */
1328 if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { 1328 if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
1329 printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n"); 1329 printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n");
1330 powernow_k8_cpu_exit_acpi(data); 1330 powernow_k8_cpu_exit_acpi(data);
1331 kfree(data->powernow_table); 1331 kfree(data->powernow_table);
1332 kfree(data); 1332 kfree(data);
1333 return -EINVAL; 1333 return -EINVAL;
1334 } 1334 }
1335 1335
1336 cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); 1336 cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
1337 1337
1338 if (cpu_family == CPU_HW_PSTATE) 1338 if (cpu_family == CPU_HW_PSTATE)
1339 dprintk("cpu_init done, current pstate 0x%x\n", 1339 dprintk("cpu_init done, current pstate 0x%x\n",
1340 data->currpstate); 1340 data->currpstate);
1341 else 1341 else
1342 dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", 1342 dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
1343 data->currfid, data->currvid); 1343 data->currfid, data->currvid);
1344 1344
1345 per_cpu(powernow_data, pol->cpu) = data; 1345 per_cpu(powernow_data, pol->cpu) = data;
1346 1346
1347 return 0; 1347 return 0;
1348 1348
1349 err_out_unmask: 1349 err_out_unmask:
1350 set_cpus_allowed_ptr(current, &oldmask); 1350 set_cpus_allowed_ptr(current, &oldmask);
1351 powernow_k8_cpu_exit_acpi(data); 1351 powernow_k8_cpu_exit_acpi(data);
1352 1352
1353 err_out: 1353 err_out:
1354 kfree(data); 1354 kfree(data);
1355 return -ENODEV; 1355 return -ENODEV;
1356 } 1356 }
1357 1357
1358 static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) 1358 static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1359 { 1359 {
1360 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1360 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1361 1361
1362 if (!data) 1362 if (!data)
1363 return -EINVAL; 1363 return -EINVAL;
1364 1364
1365 powernow_k8_cpu_exit_acpi(data); 1365 powernow_k8_cpu_exit_acpi(data);
1366 1366
1367 cpufreq_frequency_table_put_attr(pol->cpu); 1367 cpufreq_frequency_table_put_attr(pol->cpu);
1368 1368
1369 kfree(data->powernow_table); 1369 kfree(data->powernow_table);
1370 kfree(data); 1370 kfree(data);
1371 1371
1372 return 0; 1372 return 0;
1373 } 1373 }
1374 1374
1375 static unsigned int powernowk8_get(unsigned int cpu) 1375 static unsigned int powernowk8_get(unsigned int cpu)
1376 { 1376 {
1377 struct powernow_k8_data *data; 1377 struct powernow_k8_data *data;
1378 cpumask_t oldmask = current->cpus_allowed; 1378 cpumask_t oldmask = current->cpus_allowed;
1379 unsigned int khz = 0; 1379 unsigned int khz = 0;
1380 unsigned int first; 1380 unsigned int first;
1381 1381
1382 first = cpumask_first(cpu_core_mask(cpu)); 1382 first = cpumask_first(cpu_core_mask(cpu));
1383 data = per_cpu(powernow_data, first); 1383 data = per_cpu(powernow_data, first);
1384 1384
1385 if (!data) 1385 if (!data)
1386 return -EINVAL; 1386 return -EINVAL;
1387 1387
1388 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 1388 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
1389 if (smp_processor_id() != cpu) { 1389 if (smp_processor_id() != cpu) {
1390 printk(KERN_ERR PFX 1390 printk(KERN_ERR PFX
1391 "limiting to CPU %d failed in powernowk8_get\n", cpu); 1391 "limiting to CPU %d failed in powernowk8_get\n", cpu);
1392 set_cpus_allowed_ptr(current, &oldmask); 1392 set_cpus_allowed_ptr(current, &oldmask);
1393 return 0; 1393 return 0;
1394 } 1394 }
1395 1395
1396 if (query_current_values_with_pending_wait(data)) 1396 if (query_current_values_with_pending_wait(data))
1397 goto out; 1397 goto out;
1398 1398
1399 if (cpu_family == CPU_HW_PSTATE) 1399 if (cpu_family == CPU_HW_PSTATE)
1400 khz = find_khz_freq_from_pstate(data->powernow_table, 1400 khz = find_khz_freq_from_pstate(data->powernow_table,
1401 data->currpstate); 1401 data->currpstate);
1402 else 1402 else
1403 khz = find_khz_freq_from_fid(data->currfid); 1403 khz = find_khz_freq_from_fid(data->currfid);
1404 1404
1405 1405
1406 out: 1406 out:
1407 set_cpus_allowed_ptr(current, &oldmask); 1407 set_cpus_allowed_ptr(current, &oldmask);
1408 return khz; 1408 return khz;
1409 } 1409 }
1410 1410
1411 static struct freq_attr *powernow_k8_attr[] = { 1411 static struct freq_attr *powernow_k8_attr[] = {
1412 &cpufreq_freq_attr_scaling_available_freqs, 1412 &cpufreq_freq_attr_scaling_available_freqs,
1413 NULL, 1413 NULL,
1414 }; 1414 };
1415 1415
1416 static struct cpufreq_driver cpufreq_amd64_driver = { 1416 static struct cpufreq_driver cpufreq_amd64_driver = {
1417 .verify = powernowk8_verify, 1417 .verify = powernowk8_verify,
1418 .target = powernowk8_target, 1418 .target = powernowk8_target,
1419 .init = powernowk8_cpu_init, 1419 .init = powernowk8_cpu_init,
1420 .exit = __devexit_p(powernowk8_cpu_exit), 1420 .exit = __devexit_p(powernowk8_cpu_exit),
1421 .get = powernowk8_get, 1421 .get = powernowk8_get,
1422 .name = "powernow-k8", 1422 .name = "powernow-k8",
1423 .owner = THIS_MODULE, 1423 .owner = THIS_MODULE,
1424 .attr = powernow_k8_attr, 1424 .attr = powernow_k8_attr,
1425 }; 1425 };
1426 1426
1427 /* driver entry point for init */ 1427 /* driver entry point for init */
1428 static int __cpuinit powernowk8_init(void) 1428 static int __cpuinit powernowk8_init(void)
1429 { 1429 {
1430 unsigned int i, supported_cpus = 0; 1430 unsigned int i, supported_cpus = 0;
1431 1431
1432 for_each_online_cpu(i) { 1432 for_each_online_cpu(i) {
1433 if (check_supported_cpu(i)) 1433 if (check_supported_cpu(i))
1434 supported_cpus++; 1434 supported_cpus++;
1435 } 1435 }
1436 1436
1437 if (supported_cpus == num_online_cpus()) { 1437 if (supported_cpus == num_online_cpus()) {
1438 printk(KERN_INFO PFX "Found %d %s " 1438 printk(KERN_INFO PFX "Found %d %s "
1439 "processors (%d cpu cores) (" VERSION ")\n", 1439 "processors (%d cpu cores) (" VERSION ")\n",
1440 num_online_nodes(), 1440 num_online_nodes(),
1441 boot_cpu_data.x86_model_id, supported_cpus); 1441 boot_cpu_data.x86_model_id, supported_cpus);
1442 return cpufreq_register_driver(&cpufreq_amd64_driver); 1442 return cpufreq_register_driver(&cpufreq_amd64_driver);
1443 } 1443 }
1444 1444
1445 return -ENODEV; 1445 return -ENODEV;
1446 } 1446 }
1447 1447
1448 /* driver entry point for term */ 1448 /* driver entry point for term */
1449 static void __exit powernowk8_exit(void) 1449 static void __exit powernowk8_exit(void)
1450 { 1450 {
1451 dprintk("exit\n"); 1451 dprintk("exit\n");
1452 1452
1453 cpufreq_unregister_driver(&cpufreq_amd64_driver); 1453 cpufreq_unregister_driver(&cpufreq_amd64_driver);
1454 } 1454 }
1455 1455
1456 MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and " 1456 MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
1457 "Mark Langsdorf <mark.langsdorf@amd.com>"); 1457 "Mark Langsdorf <mark.langsdorf@amd.com>");
1458 MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); 1458 MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
1459 MODULE_LICENSE("GPL"); 1459 MODULE_LICENSE("GPL");
1460 1460
1461 late_initcall(powernowk8_init); 1461 late_initcall(powernowk8_init);
1462 module_exit(powernowk8_exit); 1462 module_exit(powernowk8_exit);
1463 1463
arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
1 /* 1 /*
2 * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium 2 * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
3 * M (part of the Centrino chipset). 3 * M (part of the Centrino chipset).
4 * 4 *
5 * Since the original Pentium M, most new Intel CPUs support Enhanced 5 * Since the original Pentium M, most new Intel CPUs support Enhanced
6 * SpeedStep. 6 * SpeedStep.
7 * 7 *
8 * Despite the "SpeedStep" in the name, this is almost entirely unlike 8 * Despite the "SpeedStep" in the name, this is almost entirely unlike
9 * traditional SpeedStep. 9 * traditional SpeedStep.
10 * 10 *
11 * Modelled on speedstep.c 11 * Modelled on speedstep.c
12 * 12 *
13 * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> 13 * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
14 */ 14 */
15 15
16 #include <linux/kernel.h> 16 #include <linux/kernel.h>
17 #include <linux/module.h> 17 #include <linux/module.h>
18 #include <linux/init.h> 18 #include <linux/init.h>
19 #include <linux/cpufreq.h> 19 #include <linux/cpufreq.h>
20 #include <linux/sched.h> /* current */ 20 #include <linux/sched.h> /* current */
21 #include <linux/delay.h> 21 #include <linux/delay.h>
22 #include <linux/compiler.h> 22 #include <linux/compiler.h>
23 23
24 #include <asm/msr.h> 24 #include <asm/msr.h>
25 #include <asm/processor.h> 25 #include <asm/processor.h>
26 #include <asm/cpufeature.h> 26 #include <asm/cpufeature.h>
27 27
28 #define PFX "speedstep-centrino: " 28 #define PFX "speedstep-centrino: "
29 #define MAINTAINER "cpufreq@vger.kernel.org" 29 #define MAINTAINER "cpufreq@vger.kernel.org"
30 30
31 #define dprintk(msg...) \ 31 #define dprintk(msg...) \
32 cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) 32 cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
33 33
34 #define INTEL_MSR_RANGE (0xffff) 34 #define INTEL_MSR_RANGE (0xffff)
35 35
36 struct cpu_id 36 struct cpu_id
37 { 37 {
38 __u8 x86; /* CPU family */ 38 __u8 x86; /* CPU family */
39 __u8 x86_model; /* model */ 39 __u8 x86_model; /* model */
40 __u8 x86_mask; /* stepping */ 40 __u8 x86_mask; /* stepping */
41 }; 41 };
42 42
43 enum { 43 enum {
44 CPU_BANIAS, 44 CPU_BANIAS,
45 CPU_DOTHAN_A1, 45 CPU_DOTHAN_A1,
46 CPU_DOTHAN_A2, 46 CPU_DOTHAN_A2,
47 CPU_DOTHAN_B0, 47 CPU_DOTHAN_B0,
48 CPU_MP4HT_D0, 48 CPU_MP4HT_D0,
49 CPU_MP4HT_E0, 49 CPU_MP4HT_E0,
50 }; 50 };
51 51
52 static const struct cpu_id cpu_ids[] = { 52 static const struct cpu_id cpu_ids[] = {
53 [CPU_BANIAS] = { 6, 9, 5 }, 53 [CPU_BANIAS] = { 6, 9, 5 },
54 [CPU_DOTHAN_A1] = { 6, 13, 1 }, 54 [CPU_DOTHAN_A1] = { 6, 13, 1 },
55 [CPU_DOTHAN_A2] = { 6, 13, 2 }, 55 [CPU_DOTHAN_A2] = { 6, 13, 2 },
56 [CPU_DOTHAN_B0] = { 6, 13, 6 }, 56 [CPU_DOTHAN_B0] = { 6, 13, 6 },
57 [CPU_MP4HT_D0] = {15, 3, 4 }, 57 [CPU_MP4HT_D0] = {15, 3, 4 },
58 [CPU_MP4HT_E0] = {15, 4, 1 }, 58 [CPU_MP4HT_E0] = {15, 4, 1 },
59 }; 59 };
60 #define N_IDS ARRAY_SIZE(cpu_ids) 60 #define N_IDS ARRAY_SIZE(cpu_ids)
61 61
62 struct cpu_model 62 struct cpu_model
63 { 63 {
64 const struct cpu_id *cpu_id; 64 const struct cpu_id *cpu_id;
65 const char *model_name; 65 const char *model_name;
66 unsigned max_freq; /* max clock in kHz */ 66 unsigned max_freq; /* max clock in kHz */
67 67
68 struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ 68 struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
69 }; 69 };
70 static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, 70 static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
71 const struct cpu_id *x); 71 const struct cpu_id *x);
72 72
73 /* Operating points for current CPU */ 73 /* Operating points for current CPU */
74 static DEFINE_PER_CPU(struct cpu_model *, centrino_model); 74 static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
75 static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); 75 static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
76 76
77 static struct cpufreq_driver centrino_driver; 77 static struct cpufreq_driver centrino_driver;
78 78
79 #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE 79 #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
80 80
81 /* Computes the correct form for IA32_PERF_CTL MSR for a particular 81 /* Computes the correct form for IA32_PERF_CTL MSR for a particular
82 frequency/voltage operating point; frequency in MHz, volts in mV. 82 frequency/voltage operating point; frequency in MHz, volts in mV.
83 This is stored as "index" in the structure. */ 83 This is stored as "index" in the structure. */
84 #define OP(mhz, mv) \ 84 #define OP(mhz, mv) \
85 { \ 85 { \
86 .frequency = (mhz) * 1000, \ 86 .frequency = (mhz) * 1000, \
87 .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ 87 .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \
88 } 88 }
89 89
90 /* 90 /*
91 * These voltage tables were derived from the Intel Pentium M 91 * These voltage tables were derived from the Intel Pentium M
92 * datasheet, document 25261202.pdf, Table 5. I have verified they 92 * datasheet, document 25261202.pdf, Table 5. I have verified they
93 * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium 93 * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
94 * M. 94 * M.
95 */ 95 */
96 96
97 /* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ 97 /* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
98 static struct cpufreq_frequency_table banias_900[] = 98 static struct cpufreq_frequency_table banias_900[] =
99 { 99 {
100 OP(600, 844), 100 OP(600, 844),
101 OP(800, 988), 101 OP(800, 988),
102 OP(900, 1004), 102 OP(900, 1004),
103 { .frequency = CPUFREQ_TABLE_END } 103 { .frequency = CPUFREQ_TABLE_END }
104 }; 104 };
105 105
106 /* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ 106 /* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
107 static struct cpufreq_frequency_table banias_1000[] = 107 static struct cpufreq_frequency_table banias_1000[] =
108 { 108 {
109 OP(600, 844), 109 OP(600, 844),
110 OP(800, 972), 110 OP(800, 972),
111 OP(900, 988), 111 OP(900, 988),
112 OP(1000, 1004), 112 OP(1000, 1004),
113 { .frequency = CPUFREQ_TABLE_END } 113 { .frequency = CPUFREQ_TABLE_END }
114 }; 114 };
115 115
116 /* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ 116 /* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
117 static struct cpufreq_frequency_table banias_1100[] = 117 static struct cpufreq_frequency_table banias_1100[] =
118 { 118 {
119 OP( 600, 956), 119 OP( 600, 956),
120 OP( 800, 1020), 120 OP( 800, 1020),
121 OP( 900, 1100), 121 OP( 900, 1100),
122 OP(1000, 1164), 122 OP(1000, 1164),
123 OP(1100, 1180), 123 OP(1100, 1180),
124 { .frequency = CPUFREQ_TABLE_END } 124 { .frequency = CPUFREQ_TABLE_END }
125 }; 125 };
126 126
127 127
128 /* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ 128 /* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
129 static struct cpufreq_frequency_table banias_1200[] = 129 static struct cpufreq_frequency_table banias_1200[] =
130 { 130 {
131 OP( 600, 956), 131 OP( 600, 956),
132 OP( 800, 1004), 132 OP( 800, 1004),
133 OP( 900, 1020), 133 OP( 900, 1020),
134 OP(1000, 1100), 134 OP(1000, 1100),
135 OP(1100, 1164), 135 OP(1100, 1164),
136 OP(1200, 1180), 136 OP(1200, 1180),
137 { .frequency = CPUFREQ_TABLE_END } 137 { .frequency = CPUFREQ_TABLE_END }
138 }; 138 };
139 139
140 /* Intel Pentium M processor 1.30GHz (Banias) */ 140 /* Intel Pentium M processor 1.30GHz (Banias) */
141 static struct cpufreq_frequency_table banias_1300[] = 141 static struct cpufreq_frequency_table banias_1300[] =
142 { 142 {
143 OP( 600, 956), 143 OP( 600, 956),
144 OP( 800, 1260), 144 OP( 800, 1260),
145 OP(1000, 1292), 145 OP(1000, 1292),
146 OP(1200, 1356), 146 OP(1200, 1356),
147 OP(1300, 1388), 147 OP(1300, 1388),
148 { .frequency = CPUFREQ_TABLE_END } 148 { .frequency = CPUFREQ_TABLE_END }
149 }; 149 };
150 150
151 /* Intel Pentium M processor 1.40GHz (Banias) */ 151 /* Intel Pentium M processor 1.40GHz (Banias) */
152 static struct cpufreq_frequency_table banias_1400[] = 152 static struct cpufreq_frequency_table banias_1400[] =
153 { 153 {
154 OP( 600, 956), 154 OP( 600, 956),
155 OP( 800, 1180), 155 OP( 800, 1180),
156 OP(1000, 1308), 156 OP(1000, 1308),
157 OP(1200, 1436), 157 OP(1200, 1436),
158 OP(1400, 1484), 158 OP(1400, 1484),
159 { .frequency = CPUFREQ_TABLE_END } 159 { .frequency = CPUFREQ_TABLE_END }
160 }; 160 };
161 161
162 /* Intel Pentium M processor 1.50GHz (Banias) */ 162 /* Intel Pentium M processor 1.50GHz (Banias) */
163 static struct cpufreq_frequency_table banias_1500[] = 163 static struct cpufreq_frequency_table banias_1500[] =
164 { 164 {
165 OP( 600, 956), 165 OP( 600, 956),
166 OP( 800, 1116), 166 OP( 800, 1116),
167 OP(1000, 1228), 167 OP(1000, 1228),
168 OP(1200, 1356), 168 OP(1200, 1356),
169 OP(1400, 1452), 169 OP(1400, 1452),
170 OP(1500, 1484), 170 OP(1500, 1484),
171 { .frequency = CPUFREQ_TABLE_END } 171 { .frequency = CPUFREQ_TABLE_END }
172 }; 172 };
173 173
174 /* Intel Pentium M processor 1.60GHz (Banias) */ 174 /* Intel Pentium M processor 1.60GHz (Banias) */
175 static struct cpufreq_frequency_table banias_1600[] = 175 static struct cpufreq_frequency_table banias_1600[] =
176 { 176 {
177 OP( 600, 956), 177 OP( 600, 956),
178 OP( 800, 1036), 178 OP( 800, 1036),
179 OP(1000, 1164), 179 OP(1000, 1164),
180 OP(1200, 1276), 180 OP(1200, 1276),
181 OP(1400, 1420), 181 OP(1400, 1420),
182 OP(1600, 1484), 182 OP(1600, 1484),
183 { .frequency = CPUFREQ_TABLE_END } 183 { .frequency = CPUFREQ_TABLE_END }
184 }; 184 };
185 185
186 /* Intel Pentium M processor 1.70GHz (Banias) */ 186 /* Intel Pentium M processor 1.70GHz (Banias) */
187 static struct cpufreq_frequency_table banias_1700[] = 187 static struct cpufreq_frequency_table banias_1700[] =
188 { 188 {
189 OP( 600, 956), 189 OP( 600, 956),
190 OP( 800, 1004), 190 OP( 800, 1004),
191 OP(1000, 1116), 191 OP(1000, 1116),
192 OP(1200, 1228), 192 OP(1200, 1228),
193 OP(1400, 1308), 193 OP(1400, 1308),
194 OP(1700, 1484), 194 OP(1700, 1484),
195 { .frequency = CPUFREQ_TABLE_END } 195 { .frequency = CPUFREQ_TABLE_END }
196 }; 196 };
197 #undef OP 197 #undef OP
198 198
199 #define _BANIAS(cpuid, max, name) \ 199 #define _BANIAS(cpuid, max, name) \
200 { .cpu_id = cpuid, \ 200 { .cpu_id = cpuid, \
201 .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ 201 .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \
202 .max_freq = (max)*1000, \ 202 .max_freq = (max)*1000, \
203 .op_points = banias_##max, \ 203 .op_points = banias_##max, \
204 } 204 }
205 #define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) 205 #define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
206 206
207 /* CPU models, their operating frequency range, and freq/voltage 207 /* CPU models, their operating frequency range, and freq/voltage
208 operating points */ 208 operating points */
209 static struct cpu_model models[] = 209 static struct cpu_model models[] =
210 { 210 {
211 _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), 211 _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
212 BANIAS(1000), 212 BANIAS(1000),
213 BANIAS(1100), 213 BANIAS(1100),
214 BANIAS(1200), 214 BANIAS(1200),
215 BANIAS(1300), 215 BANIAS(1300),
216 BANIAS(1400), 216 BANIAS(1400),
217 BANIAS(1500), 217 BANIAS(1500),
218 BANIAS(1600), 218 BANIAS(1600),
219 BANIAS(1700), 219 BANIAS(1700),
220 220
221 /* NULL model_name is a wildcard */ 221 /* NULL model_name is a wildcard */
222 { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, 222 { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
223 { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, 223 { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
224 { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, 224 { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
225 { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, 225 { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL },
226 { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, 226 { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL },
227 227
228 { NULL, } 228 { NULL, }
229 }; 229 };
230 #undef _BANIAS 230 #undef _BANIAS
231 #undef BANIAS 231 #undef BANIAS
232 232
233 static int centrino_cpu_init_table(struct cpufreq_policy *policy) 233 static int centrino_cpu_init_table(struct cpufreq_policy *policy)
234 { 234 {
235 struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); 235 struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
236 struct cpu_model *model; 236 struct cpu_model *model;
237 237
238 for(model = models; model->cpu_id != NULL; model++) 238 for(model = models; model->cpu_id != NULL; model++)
239 if (centrino_verify_cpu_id(cpu, model->cpu_id) && 239 if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
240 (model->model_name == NULL || 240 (model->model_name == NULL ||
241 strcmp(cpu->x86_model_id, model->model_name) == 0)) 241 strcmp(cpu->x86_model_id, model->model_name) == 0))
242 break; 242 break;
243 243
244 if (model->cpu_id == NULL) { 244 if (model->cpu_id == NULL) {
245 /* No match at all */ 245 /* No match at all */
246 dprintk("no support for CPU model \"%s\": " 246 dprintk("no support for CPU model \"%s\": "
247 "send /proc/cpuinfo to " MAINTAINER "\n", 247 "send /proc/cpuinfo to " MAINTAINER "\n",
248 cpu->x86_model_id); 248 cpu->x86_model_id);
249 return -ENOENT; 249 return -ENOENT;
250 } 250 }
251 251
252 if (model->op_points == NULL) { 252 if (model->op_points == NULL) {
253 /* Matched a non-match */ 253 /* Matched a non-match */
254 dprintk("no table support for CPU model \"%s\"\n", 254 dprintk("no table support for CPU model \"%s\"\n",
255 cpu->x86_model_id); 255 cpu->x86_model_id);
256 dprintk("try using the acpi-cpufreq driver\n"); 256 dprintk("try using the acpi-cpufreq driver\n");
257 return -ENOENT; 257 return -ENOENT;
258 } 258 }
259 259
260 per_cpu(centrino_model, policy->cpu) = model; 260 per_cpu(centrino_model, policy->cpu) = model;
261 261
262 dprintk("found \"%s\": max frequency: %dkHz\n", 262 dprintk("found \"%s\": max frequency: %dkHz\n",
263 model->model_name, model->max_freq); 263 model->model_name, model->max_freq);
264 264
265 return 0; 265 return 0;
266 } 266 }
267 267
268 #else 268 #else
269 static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) 269 static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
270 { 270 {
271 return -ENODEV; 271 return -ENODEV;
272 } 272 }
273 #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ 273 #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
274 274
275 static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, 275 static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
276 const struct cpu_id *x) 276 const struct cpu_id *x)
277 { 277 {
278 if ((c->x86 == x->x86) && 278 if ((c->x86 == x->x86) &&
279 (c->x86_model == x->x86_model) && 279 (c->x86_model == x->x86_model) &&
280 (c->x86_mask == x->x86_mask)) 280 (c->x86_mask == x->x86_mask))
281 return 1; 281 return 1;
282 return 0; 282 return 0;
283 } 283 }
284 284
285 /* To be called only after centrino_model is initialized */ 285 /* To be called only after centrino_model is initialized */
286 static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) 286 static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
287 { 287 {
288 int i; 288 int i;
289 289
290 /* 290 /*
291 * Extract clock in kHz from PERF_CTL value 291 * Extract clock in kHz from PERF_CTL value
292 * for centrino, as some DSDTs are buggy. 292 * for centrino, as some DSDTs are buggy.
293 * Ideally, this can be done using the acpi_data structure. 293 * Ideally, this can be done using the acpi_data structure.
294 */ 294 */
295 if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || 295 if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
296 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || 296 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
297 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { 297 (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
298 msr = (msr >> 8) & 0xff; 298 msr = (msr >> 8) & 0xff;
299 return msr * 100000; 299 return msr * 100000;
300 } 300 }
301 301
302 if ((!per_cpu(centrino_model, cpu)) || 302 if ((!per_cpu(centrino_model, cpu)) ||
303 (!per_cpu(centrino_model, cpu)->op_points)) 303 (!per_cpu(centrino_model, cpu)->op_points))
304 return 0; 304 return 0;
305 305
306 msr &= 0xffff; 306 msr &= 0xffff;
307 for (i = 0; 307 for (i = 0;
308 per_cpu(centrino_model, cpu)->op_points[i].frequency 308 per_cpu(centrino_model, cpu)->op_points[i].frequency
309 != CPUFREQ_TABLE_END; 309 != CPUFREQ_TABLE_END;
310 i++) { 310 i++) {
311 if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) 311 if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
312 return per_cpu(centrino_model, cpu)-> 312 return per_cpu(centrino_model, cpu)->
313 op_points[i].frequency; 313 op_points[i].frequency;
314 } 314 }
315 if (failsafe) 315 if (failsafe)
316 return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; 316 return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
317 else 317 else
318 return 0; 318 return 0;
319 } 319 }
320 320
321 /* Return the current CPU frequency in kHz */ 321 /* Return the current CPU frequency in kHz */
322 static unsigned int get_cur_freq(unsigned int cpu) 322 static unsigned int get_cur_freq(unsigned int cpu)
323 { 323 {
324 unsigned l, h; 324 unsigned l, h;
325 unsigned clock_freq; 325 unsigned clock_freq;
326 cpumask_t saved_mask; 326 cpumask_t saved_mask;
327 327
328 saved_mask = current->cpus_allowed; 328 saved_mask = current->cpus_allowed;
329 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 329 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
330 if (smp_processor_id() != cpu) 330 if (smp_processor_id() != cpu)
331 return 0; 331 return 0;
332 332
333 rdmsr(MSR_IA32_PERF_STATUS, l, h); 333 rdmsr(MSR_IA32_PERF_STATUS, l, h);
334 clock_freq = extract_clock(l, cpu, 0); 334 clock_freq = extract_clock(l, cpu, 0);
335 335
336 if (unlikely(clock_freq == 0)) { 336 if (unlikely(clock_freq == 0)) {
337 /* 337 /*
338 * On some CPUs, we can see transient MSR values (which are 338 * On some CPUs, we can see transient MSR values (which are
339 * not present in _PSS), while CPU is doing some automatic 339 * not present in _PSS), while CPU is doing some automatic
340 * P-state transition (like TM2). Get the last freq set 340 * P-state transition (like TM2). Get the last freq set
341 * in PERF_CTL. 341 * in PERF_CTL.
342 */ 342 */
343 rdmsr(MSR_IA32_PERF_CTL, l, h); 343 rdmsr(MSR_IA32_PERF_CTL, l, h);
344 clock_freq = extract_clock(l, cpu, 1); 344 clock_freq = extract_clock(l, cpu, 1);
345 } 345 }
346 346
347 set_cpus_allowed_ptr(current, &saved_mask); 347 set_cpus_allowed_ptr(current, &saved_mask);
348 return clock_freq; 348 return clock_freq;
349 } 349 }
350 350
351 351
352 static int centrino_cpu_init(struct cpufreq_policy *policy) 352 static int centrino_cpu_init(struct cpufreq_policy *policy)
353 { 353 {
354 struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); 354 struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
355 unsigned freq; 355 unsigned freq;
356 unsigned l, h; 356 unsigned l, h;
357 int ret; 357 int ret;
358 int i; 358 int i;
359 359
360 /* Only Intel makes Enhanced Speedstep-capable CPUs */ 360 /* Only Intel makes Enhanced Speedstep-capable CPUs */
361 if (cpu->x86_vendor != X86_VENDOR_INTEL || 361 if (cpu->x86_vendor != X86_VENDOR_INTEL ||
362 !cpu_has(cpu, X86_FEATURE_EST)) 362 !cpu_has(cpu, X86_FEATURE_EST))
363 return -ENODEV; 363 return -ENODEV;
364 364
365 if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) 365 if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
366 centrino_driver.flags |= CPUFREQ_CONST_LOOPS; 366 centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
367 367
368 if (policy->cpu != 0) 368 if (policy->cpu != 0)
369 return -ENODEV; 369 return -ENODEV;
370 370
371 for (i = 0; i < N_IDS; i++) 371 for (i = 0; i < N_IDS; i++)
372 if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) 372 if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
373 break; 373 break;
374 374
375 if (i != N_IDS) 375 if (i != N_IDS)
376 per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; 376 per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
377 377
378 if (!per_cpu(centrino_cpu, policy->cpu)) { 378 if (!per_cpu(centrino_cpu, policy->cpu)) {
379 dprintk("found unsupported CPU with " 379 dprintk("found unsupported CPU with "
380 "Enhanced SpeedStep: send /proc/cpuinfo to " 380 "Enhanced SpeedStep: send /proc/cpuinfo to "
381 MAINTAINER "\n"); 381 MAINTAINER "\n");
382 return -ENODEV; 382 return -ENODEV;
383 } 383 }
384 384
385 if (centrino_cpu_init_table(policy)) { 385 if (centrino_cpu_init_table(policy)) {
386 return -ENODEV; 386 return -ENODEV;
387 } 387 }
388 388
389 /* Check to see if Enhanced SpeedStep is enabled, and try to 389 /* Check to see if Enhanced SpeedStep is enabled, and try to
390 enable it if not. */ 390 enable it if not. */
391 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 391 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
392 392
393 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { 393 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
394 l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; 394 l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); 395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
396 wrmsr(MSR_IA32_MISC_ENABLE, l, h); 396 wrmsr(MSR_IA32_MISC_ENABLE, l, h);
397 397
398 /* check to see if it stuck */ 398 /* check to see if it stuck */
399 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 399 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
400 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { 400 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
401 printk(KERN_INFO PFX 401 printk(KERN_INFO PFX
402 "couldn't enable Enhanced SpeedStep\n"); 402 "couldn't enable Enhanced SpeedStep\n");
403 return -ENODEV; 403 return -ENODEV;
404 } 404 }
405 } 405 }
406 406
407 freq = get_cur_freq(policy->cpu); 407 freq = get_cur_freq(policy->cpu);
408 policy->cpuinfo.transition_latency = 10000; 408 policy->cpuinfo.transition_latency = 10000;
409 /* 10uS transition latency */ 409 /* 10uS transition latency */
410 policy->cur = freq; 410 policy->cur = freq;
411 411
412 dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); 412 dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
413 413
414 ret = cpufreq_frequency_table_cpuinfo(policy, 414 ret = cpufreq_frequency_table_cpuinfo(policy,
415 per_cpu(centrino_model, policy->cpu)->op_points); 415 per_cpu(centrino_model, policy->cpu)->op_points);
416 if (ret) 416 if (ret)
417 return (ret); 417 return (ret);
418 418
419 cpufreq_frequency_table_get_attr( 419 cpufreq_frequency_table_get_attr(
420 per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); 420 per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
421 421
422 return 0; 422 return 0;
423 } 423 }
424 424
425 static int centrino_cpu_exit(struct cpufreq_policy *policy) 425 static int centrino_cpu_exit(struct cpufreq_policy *policy)
426 { 426 {
427 unsigned int cpu = policy->cpu; 427 unsigned int cpu = policy->cpu;
428 428
429 if (!per_cpu(centrino_model, cpu)) 429 if (!per_cpu(centrino_model, cpu))
430 return -ENODEV; 430 return -ENODEV;
431 431
432 cpufreq_frequency_table_put_attr(cpu); 432 cpufreq_frequency_table_put_attr(cpu);
433 433
434 per_cpu(centrino_model, cpu) = NULL; 434 per_cpu(centrino_model, cpu) = NULL;
435 435
436 return 0; 436 return 0;
437 } 437 }
438 438
439 /** 439 /**
440 * centrino_verify - verifies a new CPUFreq policy 440 * centrino_verify - verifies a new CPUFreq policy
441 * @policy: new policy 441 * @policy: new policy
442 * 442 *
443 * Limit must be within this model's frequency range at least one 443 * Limit must be within this model's frequency range at least one
444 * border included. 444 * border included.
445 */ 445 */
446 static int centrino_verify (struct cpufreq_policy *policy) 446 static int centrino_verify (struct cpufreq_policy *policy)
447 { 447 {
448 return cpufreq_frequency_table_verify(policy, 448 return cpufreq_frequency_table_verify(policy,
449 per_cpu(centrino_model, policy->cpu)->op_points); 449 per_cpu(centrino_model, policy->cpu)->op_points);
450 } 450 }
451 451
452 /** 452 /**
453 * centrino_setpolicy - set a new CPUFreq policy 453 * centrino_setpolicy - set a new CPUFreq policy
454 * @policy: new policy 454 * @policy: new policy
455 * @target_freq: the target frequency 455 * @target_freq: the target frequency
456 * @relation: how that frequency relates to achieved frequency 456 * @relation: how that frequency relates to achieved frequency
457 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) 457 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
458 * 458 *
459 * Sets a new CPUFreq policy. 459 * Sets a new CPUFreq policy.
460 */ 460 */
461 static int centrino_target (struct cpufreq_policy *policy, 461 static int centrino_target (struct cpufreq_policy *policy,
462 unsigned int target_freq, 462 unsigned int target_freq,
463 unsigned int relation) 463 unsigned int relation)
464 { 464 {
465 unsigned int newstate = 0; 465 unsigned int newstate = 0;
466 unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; 466 unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu;
467 struct cpufreq_freqs freqs; 467 struct cpufreq_freqs freqs;
468 int retval = 0; 468 int retval = 0;
469 unsigned int j, k, first_cpu, tmp; 469 unsigned int j, k, first_cpu, tmp;
470 cpumask_var_t saved_mask, covered_cpus; 470 cpumask_var_t saved_mask, covered_cpus;
471 471
472 if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) 472 if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL)))
473 return -ENOMEM; 473 return -ENOMEM;
474 if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { 474 if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
475 free_cpumask_var(saved_mask); 475 free_cpumask_var(saved_mask);
476 return -ENOMEM; 476 return -ENOMEM;
477 } 477 }
478 cpumask_copy(saved_mask, &current->cpus_allowed); 478 cpumask_copy(saved_mask, &current->cpus_allowed);
479 479
480 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { 480 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
481 retval = -ENODEV; 481 retval = -ENODEV;
482 goto out; 482 goto out;
483 } 483 }
484 484
485 if (unlikely(cpufreq_frequency_table_target(policy, 485 if (unlikely(cpufreq_frequency_table_target(policy,
486 per_cpu(centrino_model, cpu)->op_points, 486 per_cpu(centrino_model, cpu)->op_points,
487 target_freq, 487 target_freq,
488 relation, 488 relation,
489 &newstate))) { 489 &newstate))) {
490 retval = -EINVAL; 490 retval = -EINVAL;
491 goto out; 491 goto out;
492 } 492 }
493 493
494 first_cpu = 1; 494 first_cpu = 1;
495 for_each_cpu(j, policy->cpus) { 495 for_each_cpu(j, policy->cpus) {
496 const struct cpumask *mask; 496 const struct cpumask *mask;
497 497
498 /* cpufreq holds the hotplug lock, so we are safe here */ 498 /* cpufreq holds the hotplug lock, so we are safe here */
499 if (!cpu_online(j)) 499 if (!cpu_online(j))
500 continue; 500 continue;
501 501
502 /* 502 /*
503 * Support for SMP systems. 503 * Support for SMP systems.
504 * Make sure we are running on CPU that wants to change freq 504 * Make sure we are running on CPU that wants to change freq
505 */ 505 */
506 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) 506 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
507 mask = policy->cpus; 507 mask = policy->cpus;
508 else 508 else
509 mask = cpumask_of(j); 509 mask = cpumask_of(j);
510 510
511 set_cpus_allowed_ptr(current, mask); 511 set_cpus_allowed_ptr(current, mask);
512 preempt_disable(); 512 preempt_disable();
513 if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { 513 if (unlikely(!cpu_isset(smp_processor_id(), *mask))) {
514 dprintk("couldn't limit to CPUs in this domain\n"); 514 dprintk("couldn't limit to CPUs in this domain\n");
515 retval = -EAGAIN; 515 retval = -EAGAIN;
516 if (first_cpu) { 516 if (first_cpu) {
517 /* We haven't started the transition yet. */ 517 /* We haven't started the transition yet. */
518 goto migrate_end; 518 goto migrate_end;
519 } 519 }
520 preempt_enable(); 520 preempt_enable();
521 break; 521 break;
522 } 522 }
523 523
524 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; 524 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
525 525
526 if (first_cpu) { 526 if (first_cpu) {
527 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); 527 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
528 if (msr == (oldmsr & 0xffff)) { 528 if (msr == (oldmsr & 0xffff)) {
529 dprintk("no change needed - msr was and needs " 529 dprintk("no change needed - msr was and needs "
530 "to be %x\n", oldmsr); 530 "to be %x\n", oldmsr);
531 retval = 0; 531 retval = 0;
532 goto migrate_end; 532 goto migrate_end;
533 } 533 }
534 534
535 freqs.old = extract_clock(oldmsr, cpu, 0); 535 freqs.old = extract_clock(oldmsr, cpu, 0);
536 freqs.new = extract_clock(msr, cpu, 0); 536 freqs.new = extract_clock(msr, cpu, 0);
537 537
538 dprintk("target=%dkHz old=%d new=%d msr=%04x\n", 538 dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
539 target_freq, freqs.old, freqs.new, msr); 539 target_freq, freqs.old, freqs.new, msr);
540 540
541 for_each_cpu(k, policy->cpus) { 541 for_each_cpu(k, policy->cpus) {
542 if (!cpu_online(k)) 542 if (!cpu_online(k))
543 continue; 543 continue;
544 freqs.cpu = k; 544 freqs.cpu = k;
545 cpufreq_notify_transition(&freqs, 545 cpufreq_notify_transition(&freqs,
546 CPUFREQ_PRECHANGE); 546 CPUFREQ_PRECHANGE);
547 } 547 }
548 548
549 first_cpu = 0; 549 first_cpu = 0;
550 /* all but 16 LSB are reserved, treat them with care */ 550 /* all but 16 LSB are reserved, treat them with care */
551 oldmsr &= ~0xffff; 551 oldmsr &= ~0xffff;
552 msr &= 0xffff; 552 msr &= 0xffff;
553 oldmsr |= msr; 553 oldmsr |= msr;
554 } 554 }
555 555
556 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 556 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
557 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { 557 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
558 preempt_enable(); 558 preempt_enable();
559 break; 559 break;
560 } 560 }
561 561
562 cpu_set(j, *covered_cpus); 562 cpu_set(j, *covered_cpus);
563 preempt_enable(); 563 preempt_enable();
564 } 564 }
565 565
566 for_each_cpu(k, policy->cpus) { 566 for_each_cpu(k, policy->cpus) {
567 if (!cpu_online(k)) 567 if (!cpu_online(k))
568 continue; 568 continue;
569 freqs.cpu = k; 569 freqs.cpu = k;
570 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 570 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
571 } 571 }
572 572
573 if (unlikely(retval)) { 573 if (unlikely(retval)) {
574 /* 574 /*
575 * We have failed halfway through the frequency change. 575 * We have failed halfway through the frequency change.
576 * We have sent callbacks to policy->cpus and 576 * We have sent callbacks to policy->cpus and
577 * MSRs have already been written on coverd_cpus. 577 * MSRs have already been written on coverd_cpus.
578 * Best effort undo.. 578 * Best effort undo..
579 */ 579 */
580 580
581 for_each_cpu_mask_nr(j, *covered_cpus) { 581 for_each_cpu_mask_nr(j, *covered_cpus) {
582 set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); 582 set_cpus_allowed_ptr(current, &cpumask_of_cpu(j));
583 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 583 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
584 } 584 }
585 585
586 tmp = freqs.new; 586 tmp = freqs.new;
587 freqs.new = freqs.old; 587 freqs.new = freqs.old;
588 freqs.old = tmp; 588 freqs.old = tmp;
589 for_each_cpu(j, policy->cpus) { 589 for_each_cpu(j, policy->cpus) {
590 if (!cpu_online(j)) 590 if (!cpu_online(j))
591 continue; 591 continue;
592 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 592 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
593 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 593 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
594 } 594 }
595 } 595 }
596 set_cpus_allowed_ptr(current, saved_mask); 596 set_cpus_allowed_ptr(current, saved_mask);
597 retval = 0; 597 retval = 0;
598 goto out; 598 goto out;
599 599
600 migrate_end: 600 migrate_end:
601 preempt_enable(); 601 preempt_enable();
602 set_cpus_allowed_ptr(current, saved_mask); 602 set_cpus_allowed_ptr(current, saved_mask);
603 out: 603 out:
604 free_cpumask_var(saved_mask); 604 free_cpumask_var(saved_mask);
605 free_cpumask_var(covered_cpus); 605 free_cpumask_var(covered_cpus);
606 return retval; 606 return retval;
607 } 607 }
608 608
609 static struct freq_attr* centrino_attr[] = { 609 static struct freq_attr* centrino_attr[] = {
610 &cpufreq_freq_attr_scaling_available_freqs, 610 &cpufreq_freq_attr_scaling_available_freqs,
611 NULL, 611 NULL,
612 }; 612 };
613 613
614 static struct cpufreq_driver centrino_driver = { 614 static struct cpufreq_driver centrino_driver = {
615 .name = "centrino", /* should be speedstep-centrino, 615 .name = "centrino", /* should be speedstep-centrino,
616 but there's a 16 char limit */ 616 but there's a 16 char limit */
617 .init = centrino_cpu_init, 617 .init = centrino_cpu_init,
618 .exit = centrino_cpu_exit, 618 .exit = centrino_cpu_exit,
619 .verify = centrino_verify, 619 .verify = centrino_verify,
620 .target = centrino_target, 620 .target = centrino_target,
621 .get = get_cur_freq, 621 .get = get_cur_freq,
622 .attr = centrino_attr, 622 .attr = centrino_attr,
623 .owner = THIS_MODULE, 623 .owner = THIS_MODULE,
624 }; 624 };
625 625
626 626
627 /** 627 /**
628 * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver 628 * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
629 * 629 *
630 * Initializes the Enhanced SpeedStep support. Returns -ENODEV on 630 * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
631 * unsupported devices, -ENOENT if there's no voltage table for this 631 * unsupported devices, -ENOENT if there's no voltage table for this
632 * particular CPU model, -EINVAL on problems during initiatization, 632 * particular CPU model, -EINVAL on problems during initiatization,
633 * and zero on success. 633 * and zero on success.
634 * 634 *
635 * This is quite picky. Not only does the CPU have to advertise the 635 * This is quite picky. Not only does the CPU have to advertise the
636 * "est" flag in the cpuid capability flags, we look for a specific 636 * "est" flag in the cpuid capability flags, we look for a specific
637 * CPU model and stepping, and we need to have the exact model name in 637 * CPU model and stepping, and we need to have the exact model name in
638 * our voltage tables. That is, be paranoid about not releasing 638 * our voltage tables. That is, be paranoid about not releasing
639 * someone's valuable magic smoke. 639 * someone's valuable magic smoke.
640 */ 640 */
641 static int __init centrino_init(void) 641 static int __init centrino_init(void)
642 { 642 {
643 struct cpuinfo_x86 *cpu = &cpu_data(0); 643 struct cpuinfo_x86 *cpu = &cpu_data(0);
644 644
645 if (!cpu_has(cpu, X86_FEATURE_EST)) 645 if (!cpu_has(cpu, X86_FEATURE_EST))
646 return -ENODEV; 646 return -ENODEV;
647 647
648 return cpufreq_register_driver(&centrino_driver); 648 return cpufreq_register_driver(&centrino_driver);
649 } 649 }
650 650
651 static void __exit centrino_exit(void) 651 static void __exit centrino_exit(void)
652 { 652 {
653 cpufreq_unregister_driver(&centrino_driver); 653 cpufreq_unregister_driver(&centrino_driver);
654 } 654 }
655 655
656 MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); 656 MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
657 MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); 657 MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
658 MODULE_LICENSE ("GPL"); 658 MODULE_LICENSE ("GPL");
659 659
660 late_initcall(centrino_init); 660 late_initcall(centrino_init);
661 module_exit(centrino_exit); 661 module_exit(centrino_exit);
662 662
arch/x86/kernel/cpu/mcheck/mce_64.c
1 /* 1 /*
2 * Machine check handler. 2 * Machine check handler.
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s). 4 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it. 5 * 2004 Andi Kleen. Rewrote most of it.
6 * Copyright 2008 Intel Corporation 6 * Copyright 2008 Intel Corporation
7 * Author: Andi Kleen 7 * Author: Andi Kleen
8 */ 8 */
9 9
10 #include <linux/init.h> 10 #include <linux/init.h>
11 #include <linux/types.h> 11 #include <linux/types.h>
12 #include <linux/kernel.h> 12 #include <linux/kernel.h>
13 #include <linux/sched.h> 13 #include <linux/sched.h>
14 #include <linux/smp_lock.h> 14 #include <linux/smp_lock.h>
15 #include <linux/string.h> 15 #include <linux/string.h>
16 #include <linux/rcupdate.h> 16 #include <linux/rcupdate.h>
17 #include <linux/kallsyms.h> 17 #include <linux/kallsyms.h>
18 #include <linux/sysdev.h> 18 #include <linux/sysdev.h>
19 #include <linux/miscdevice.h> 19 #include <linux/miscdevice.h>
20 #include <linux/fs.h> 20 #include <linux/fs.h>
21 #include <linux/capability.h> 21 #include <linux/capability.h>
22 #include <linux/cpu.h> 22 #include <linux/cpu.h>
23 #include <linux/percpu.h> 23 #include <linux/percpu.h>
24 #include <linux/poll.h> 24 #include <linux/poll.h>
25 #include <linux/thread_info.h> 25 #include <linux/thread_info.h>
26 #include <linux/ctype.h> 26 #include <linux/ctype.h>
27 #include <linux/kmod.h> 27 #include <linux/kmod.h>
28 #include <linux/kdebug.h> 28 #include <linux/kdebug.h>
29 #include <linux/kobject.h> 29 #include <linux/kobject.h>
30 #include <linux/sysfs.h> 30 #include <linux/sysfs.h>
31 #include <linux/ratelimit.h> 31 #include <linux/ratelimit.h>
32 #include <asm/processor.h> 32 #include <asm/processor.h>
33 #include <asm/msr.h> 33 #include <asm/msr.h>
34 #include <asm/mce.h> 34 #include <asm/mce.h>
35 #include <asm/uaccess.h> 35 #include <asm/uaccess.h>
36 #include <asm/smp.h> 36 #include <asm/smp.h>
37 #include <asm/idle.h> 37 #include <asm/idle.h>
38 38
39 #define MISC_MCELOG_MINOR 227 39 #define MISC_MCELOG_MINOR 227
40 40
41 atomic_t mce_entry; 41 atomic_t mce_entry;
42 42
43 static int mce_dont_init; 43 static int mce_dont_init;
44 44
45 /* 45 /*
46 * Tolerant levels: 46 * Tolerant levels:
47 * 0: always panic on uncorrected errors, log corrected errors 47 * 0: always panic on uncorrected errors, log corrected errors
48 * 1: panic or SIGBUS on uncorrected errors, log corrected errors 48 * 1: panic or SIGBUS on uncorrected errors, log corrected errors
49 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors 49 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
50 * 3: never panic or SIGBUS, log all errors (for testing only) 50 * 3: never panic or SIGBUS, log all errors (for testing only)
51 */ 51 */
52 static int tolerant = 1; 52 static int tolerant = 1;
53 static int banks; 53 static int banks;
54 static u64 *bank; 54 static u64 *bank;
55 static unsigned long notify_user; 55 static unsigned long notify_user;
56 static int rip_msr; 56 static int rip_msr;
57 static int mce_bootlog = -1; 57 static int mce_bootlog = -1;
58 static atomic_t mce_events; 58 static atomic_t mce_events;
59 59
60 static char trigger[128]; 60 static char trigger[128];
61 static char *trigger_argv[2] = { trigger, NULL }; 61 static char *trigger_argv[2] = { trigger, NULL };
62 62
63 static DECLARE_WAIT_QUEUE_HEAD(mce_wait); 63 static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
64 64
65 /* MCA banks polled by the period polling timer for corrected events */ 65 /* MCA banks polled by the period polling timer for corrected events */
66 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 66 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
67 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 67 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
68 }; 68 };
69 69
70 /* Do initial initialization of a struct mce */ 70 /* Do initial initialization of a struct mce */
71 void mce_setup(struct mce *m) 71 void mce_setup(struct mce *m)
72 { 72 {
73 memset(m, 0, sizeof(struct mce)); 73 memset(m, 0, sizeof(struct mce));
74 m->cpu = smp_processor_id(); 74 m->cpu = smp_processor_id();
75 rdtscll(m->tsc); 75 rdtscll(m->tsc);
76 } 76 }
77 77
78 /* 78 /*
79 * Lockless MCE logging infrastructure. 79 * Lockless MCE logging infrastructure.
80 * This avoids deadlocks on printk locks without having to break locks. Also 80 * This avoids deadlocks on printk locks without having to break locks. Also
81 * separate MCEs from kernel messages to avoid bogus bug reports. 81 * separate MCEs from kernel messages to avoid bogus bug reports.
82 */ 82 */
83 83
84 static struct mce_log mcelog = { 84 static struct mce_log mcelog = {
85 MCE_LOG_SIGNATURE, 85 MCE_LOG_SIGNATURE,
86 MCE_LOG_LEN, 86 MCE_LOG_LEN,
87 }; 87 };
88 88
89 void mce_log(struct mce *mce) 89 void mce_log(struct mce *mce)
90 { 90 {
91 unsigned next, entry; 91 unsigned next, entry;
92 atomic_inc(&mce_events); 92 atomic_inc(&mce_events);
93 mce->finished = 0; 93 mce->finished = 0;
94 wmb(); 94 wmb();
95 for (;;) { 95 for (;;) {
96 entry = rcu_dereference(mcelog.next); 96 entry = rcu_dereference(mcelog.next);
97 for (;;) { 97 for (;;) {
98 /* When the buffer fills up discard new entries. Assume 98 /* When the buffer fills up discard new entries. Assume
99 that the earlier errors are the more interesting. */ 99 that the earlier errors are the more interesting. */
100 if (entry >= MCE_LOG_LEN) { 100 if (entry >= MCE_LOG_LEN) {
101 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); 101 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
102 return; 102 return;
103 } 103 }
104 /* Old left over entry. Skip. */ 104 /* Old left over entry. Skip. */
105 if (mcelog.entry[entry].finished) { 105 if (mcelog.entry[entry].finished) {
106 entry++; 106 entry++;
107 continue; 107 continue;
108 } 108 }
109 break; 109 break;
110 } 110 }
111 smp_rmb(); 111 smp_rmb();
112 next = entry + 1; 112 next = entry + 1;
113 if (cmpxchg(&mcelog.next, entry, next) == entry) 113 if (cmpxchg(&mcelog.next, entry, next) == entry)
114 break; 114 break;
115 } 115 }
116 memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); 116 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
117 wmb(); 117 wmb();
118 mcelog.entry[entry].finished = 1; 118 mcelog.entry[entry].finished = 1;
119 wmb(); 119 wmb();
120 120
121 set_bit(0, &notify_user); 121 set_bit(0, &notify_user);
122 } 122 }
123 123
124 static void print_mce(struct mce *m) 124 static void print_mce(struct mce *m)
125 { 125 {
126 printk(KERN_EMERG "\n" 126 printk(KERN_EMERG "\n"
127 KERN_EMERG "HARDWARE ERROR\n" 127 KERN_EMERG "HARDWARE ERROR\n"
128 KERN_EMERG 128 KERN_EMERG
129 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", 129 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
130 m->cpu, m->mcgstatus, m->bank, m->status); 130 m->cpu, m->mcgstatus, m->bank, m->status);
131 if (m->ip) { 131 if (m->ip) {
132 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", 132 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
133 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 133 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
134 m->cs, m->ip); 134 m->cs, m->ip);
135 if (m->cs == __KERNEL_CS) 135 if (m->cs == __KERNEL_CS)
136 print_symbol("{%s}", m->ip); 136 print_symbol("{%s}", m->ip);
137 printk("\n"); 137 printk("\n");
138 } 138 }
139 printk(KERN_EMERG "TSC %llx ", m->tsc); 139 printk(KERN_EMERG "TSC %llx ", m->tsc);
140 if (m->addr) 140 if (m->addr)
141 printk("ADDR %llx ", m->addr); 141 printk("ADDR %llx ", m->addr);
142 if (m->misc) 142 if (m->misc)
143 printk("MISC %llx ", m->misc); 143 printk("MISC %llx ", m->misc);
144 printk("\n"); 144 printk("\n");
145 printk(KERN_EMERG "This is not a software problem!\n"); 145 printk(KERN_EMERG "This is not a software problem!\n");
146 printk(KERN_EMERG "Run through mcelog --ascii to decode " 146 printk(KERN_EMERG "Run through mcelog --ascii to decode "
147 "and contact your hardware vendor\n"); 147 "and contact your hardware vendor\n");
148 } 148 }
149 149
150 static void mce_panic(char *msg, struct mce *backup, unsigned long start) 150 static void mce_panic(char *msg, struct mce *backup, unsigned long start)
151 { 151 {
152 int i; 152 int i;
153 153
154 oops_begin(); 154 oops_begin();
155 for (i = 0; i < MCE_LOG_LEN; i++) { 155 for (i = 0; i < MCE_LOG_LEN; i++) {
156 unsigned long tsc = mcelog.entry[i].tsc; 156 unsigned long tsc = mcelog.entry[i].tsc;
157 157
158 if (time_before(tsc, start)) 158 if (time_before(tsc, start))
159 continue; 159 continue;
160 print_mce(&mcelog.entry[i]); 160 print_mce(&mcelog.entry[i]);
161 if (backup && mcelog.entry[i].tsc == backup->tsc) 161 if (backup && mcelog.entry[i].tsc == backup->tsc)
162 backup = NULL; 162 backup = NULL;
163 } 163 }
164 if (backup) 164 if (backup)
165 print_mce(backup); 165 print_mce(backup);
166 panic(msg); 166 panic(msg);
167 } 167 }
168 168
169 int mce_available(struct cpuinfo_x86 *c) 169 int mce_available(struct cpuinfo_x86 *c)
170 { 170 {
171 if (mce_dont_init) 171 if (mce_dont_init)
172 return 0; 172 return 0;
173 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); 173 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
174 } 174 }
175 175
176 static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) 176 static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
177 { 177 {
178 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { 178 if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
179 m->ip = regs->ip; 179 m->ip = regs->ip;
180 m->cs = regs->cs; 180 m->cs = regs->cs;
181 } else { 181 } else {
182 m->ip = 0; 182 m->ip = 0;
183 m->cs = 0; 183 m->cs = 0;
184 } 184 }
185 if (rip_msr) { 185 if (rip_msr) {
186 /* Assume the RIP in the MSR is exact. Is this true? */ 186 /* Assume the RIP in the MSR is exact. Is this true? */
187 m->mcgstatus |= MCG_STATUS_EIPV; 187 m->mcgstatus |= MCG_STATUS_EIPV;
188 rdmsrl(rip_msr, m->ip); 188 rdmsrl(rip_msr, m->ip);
189 m->cs = 0; 189 m->cs = 0;
190 } 190 }
191 } 191 }
192 192
193 /* 193 /*
194 * Poll for corrected events or events that happened before reset. 194 * Poll for corrected events or events that happened before reset.
195 * Those are just logged through /dev/mcelog. 195 * Those are just logged through /dev/mcelog.
196 * 196 *
197 * This is executed in standard interrupt context. 197 * This is executed in standard interrupt context.
198 */ 198 */
199 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 199 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
200 { 200 {
201 struct mce m; 201 struct mce m;
202 int i; 202 int i;
203 203
204 mce_setup(&m); 204 mce_setup(&m);
205 205
206 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); 206 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
207 for (i = 0; i < banks; i++) { 207 for (i = 0; i < banks; i++) {
208 if (!bank[i] || !test_bit(i, *b)) 208 if (!bank[i] || !test_bit(i, *b))
209 continue; 209 continue;
210 210
211 m.misc = 0; 211 m.misc = 0;
212 m.addr = 0; 212 m.addr = 0;
213 m.bank = i; 213 m.bank = i;
214 m.tsc = 0; 214 m.tsc = 0;
215 215
216 barrier(); 216 barrier();
217 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); 217 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
218 if (!(m.status & MCI_STATUS_VAL)) 218 if (!(m.status & MCI_STATUS_VAL))
219 continue; 219 continue;
220 220
221 /* 221 /*
222 * Uncorrected events are handled by the exception handler 222 * Uncorrected events are handled by the exception handler
223 * when it is enabled. But when the exception is disabled log 223 * when it is enabled. But when the exception is disabled log
224 * everything. 224 * everything.
225 * 225 *
226 * TBD do the same check for MCI_STATUS_EN here? 226 * TBD do the same check for MCI_STATUS_EN here?
227 */ 227 */
228 if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) 228 if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
229 continue; 229 continue;
230 230
231 if (m.status & MCI_STATUS_MISCV) 231 if (m.status & MCI_STATUS_MISCV)
232 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); 232 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
233 if (m.status & MCI_STATUS_ADDRV) 233 if (m.status & MCI_STATUS_ADDRV)
234 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); 234 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
235 235
236 if (!(flags & MCP_TIMESTAMP)) 236 if (!(flags & MCP_TIMESTAMP))
237 m.tsc = 0; 237 m.tsc = 0;
238 /* 238 /*
239 * Don't get the IP here because it's unlikely to 239 * Don't get the IP here because it's unlikely to
240 * have anything to do with the actual error location. 240 * have anything to do with the actual error location.
241 */ 241 */
242 if (!(flags & MCP_DONTLOG)) { 242 if (!(flags & MCP_DONTLOG)) {
243 mce_log(&m); 243 mce_log(&m);
244 add_taint(TAINT_MACHINE_CHECK); 244 add_taint(TAINT_MACHINE_CHECK);
245 } 245 }
246 246
247 /* 247 /*
248 * Clear state for this bank. 248 * Clear state for this bank.
249 */ 249 */
250 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 250 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
251 } 251 }
252 252
253 /* 253 /*
254 * Don't clear MCG_STATUS here because it's only defined for 254 * Don't clear MCG_STATUS here because it's only defined for
255 * exceptions. 255 * exceptions.
256 */ 256 */
257 } 257 }
258 258
259 /* 259 /*
260 * The actual machine check handler. This only handles real 260 * The actual machine check handler. This only handles real
261 * exceptions when something got corrupted coming in through int 18. 261 * exceptions when something got corrupted coming in through int 18.
262 * 262 *
263 * This is executed in NMI context not subject to normal locking rules. This 263 * This is executed in NMI context not subject to normal locking rules. This
264 * implies that most kernel services cannot be safely used. Don't even 264 * implies that most kernel services cannot be safely used. Don't even
265 * think about putting a printk in there! 265 * think about putting a printk in there!
266 */ 266 */
267 void do_machine_check(struct pt_regs * regs, long error_code) 267 void do_machine_check(struct pt_regs * regs, long error_code)
268 { 268 {
269 struct mce m, panicm; 269 struct mce m, panicm;
270 u64 mcestart = 0; 270 u64 mcestart = 0;
271 int i; 271 int i;
272 int panicm_found = 0; 272 int panicm_found = 0;
273 /* 273 /*
274 * If no_way_out gets set, there is no safe way to recover from this 274 * If no_way_out gets set, there is no safe way to recover from this
275 * MCE. If tolerant is cranked up, we'll try anyway. 275 * MCE. If tolerant is cranked up, we'll try anyway.
276 */ 276 */
277 int no_way_out = 0; 277 int no_way_out = 0;
278 /* 278 /*
279 * If kill_it gets set, there might be a way to recover from this 279 * If kill_it gets set, there might be a way to recover from this
280 * error. 280 * error.
281 */ 281 */
282 int kill_it = 0; 282 int kill_it = 0;
283 DECLARE_BITMAP(toclear, MAX_NR_BANKS); 283 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
284 284
285 atomic_inc(&mce_entry); 285 atomic_inc(&mce_entry);
286 286
287 if (notify_die(DIE_NMI, "machine check", regs, error_code, 287 if (notify_die(DIE_NMI, "machine check", regs, error_code,
288 18, SIGKILL) == NOTIFY_STOP) 288 18, SIGKILL) == NOTIFY_STOP)
289 goto out2; 289 goto out2;
290 if (!banks) 290 if (!banks)
291 goto out2; 291 goto out2;
292 292
293 mce_setup(&m); 293 mce_setup(&m);
294 294
295 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); 295 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
296 /* if the restart IP is not valid, we're done for */ 296 /* if the restart IP is not valid, we're done for */
297 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 297 if (!(m.mcgstatus & MCG_STATUS_RIPV))
298 no_way_out = 1; 298 no_way_out = 1;
299 299
300 rdtscll(mcestart); 300 rdtscll(mcestart);
301 barrier(); 301 barrier();
302 302
303 for (i = 0; i < banks; i++) { 303 for (i = 0; i < banks; i++) {
304 __clear_bit(i, toclear); 304 __clear_bit(i, toclear);
305 if (!bank[i]) 305 if (!bank[i])
306 continue; 306 continue;
307 307
308 m.misc = 0; 308 m.misc = 0;
309 m.addr = 0; 309 m.addr = 0;
310 m.bank = i; 310 m.bank = i;
311 311
312 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); 312 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
313 if ((m.status & MCI_STATUS_VAL) == 0) 313 if ((m.status & MCI_STATUS_VAL) == 0)
314 continue; 314 continue;
315 315
316 /* 316 /*
317 * Non uncorrected errors are handled by machine_check_poll 317 * Non uncorrected errors are handled by machine_check_poll
318 * Leave them alone. 318 * Leave them alone.
319 */ 319 */
320 if ((m.status & MCI_STATUS_UC) == 0) 320 if ((m.status & MCI_STATUS_UC) == 0)
321 continue; 321 continue;
322 322
323 /* 323 /*
324 * Set taint even when machine check was not enabled. 324 * Set taint even when machine check was not enabled.
325 */ 325 */
326 add_taint(TAINT_MACHINE_CHECK); 326 add_taint(TAINT_MACHINE_CHECK);
327 327
328 __set_bit(i, toclear); 328 __set_bit(i, toclear);
329 329
330 if (m.status & MCI_STATUS_EN) { 330 if (m.status & MCI_STATUS_EN) {
331 /* if PCC was set, there's no way out */ 331 /* if PCC was set, there's no way out */
332 no_way_out |= !!(m.status & MCI_STATUS_PCC); 332 no_way_out |= !!(m.status & MCI_STATUS_PCC);
333 /* 333 /*
334 * If this error was uncorrectable and there was 334 * If this error was uncorrectable and there was
335 * an overflow, we're in trouble. If no overflow, 335 * an overflow, we're in trouble. If no overflow,
336 * we might get away with just killing a task. 336 * we might get away with just killing a task.
337 */ 337 */
338 if (m.status & MCI_STATUS_UC) { 338 if (m.status & MCI_STATUS_UC) {
339 if (tolerant < 1 || m.status & MCI_STATUS_OVER) 339 if (tolerant < 1 || m.status & MCI_STATUS_OVER)
340 no_way_out = 1; 340 no_way_out = 1;
341 kill_it = 1; 341 kill_it = 1;
342 } 342 }
343 } else { 343 } else {
344 /* 344 /*
345 * Machine check event was not enabled. Clear, but 345 * Machine check event was not enabled. Clear, but
346 * ignore. 346 * ignore.
347 */ 347 */
348 continue; 348 continue;
349 } 349 }
350 350
351 if (m.status & MCI_STATUS_MISCV) 351 if (m.status & MCI_STATUS_MISCV)
352 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); 352 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
353 if (m.status & MCI_STATUS_ADDRV) 353 if (m.status & MCI_STATUS_ADDRV)
354 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); 354 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
355 355
356 mce_get_rip(&m, regs); 356 mce_get_rip(&m, regs);
357 mce_log(&m); 357 mce_log(&m);
358 358
359 /* Did this bank cause the exception? */ 359 /* Did this bank cause the exception? */
360 /* Assume that the bank with uncorrectable errors did it, 360 /* Assume that the bank with uncorrectable errors did it,
361 and that there is only a single one. */ 361 and that there is only a single one. */
362 if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { 362 if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
363 panicm = m; 363 panicm = m;
364 panicm_found = 1; 364 panicm_found = 1;
365 } 365 }
366 } 366 }
367 367
368 /* If we didn't find an uncorrectable error, pick 368 /* If we didn't find an uncorrectable error, pick
369 the last one (shouldn't happen, just being safe). */ 369 the last one (shouldn't happen, just being safe). */
370 if (!panicm_found) 370 if (!panicm_found)
371 panicm = m; 371 panicm = m;
372 372
373 /* 373 /*
374 * If we have decided that we just CAN'T continue, and the user 374 * If we have decided that we just CAN'T continue, and the user
375 * has not set tolerant to an insane level, give up and die. 375 * has not set tolerant to an insane level, give up and die.
376 */ 376 */
377 if (no_way_out && tolerant < 3) 377 if (no_way_out && tolerant < 3)
378 mce_panic("Machine check", &panicm, mcestart); 378 mce_panic("Machine check", &panicm, mcestart);
379 379
380 /* 380 /*
381 * If the error seems to be unrecoverable, something should be 381 * If the error seems to be unrecoverable, something should be
382 * done. Try to kill as little as possible. If we can kill just 382 * done. Try to kill as little as possible. If we can kill just
383 * one task, do that. If the user has set the tolerance very 383 * one task, do that. If the user has set the tolerance very
384 * high, don't try to do anything at all. 384 * high, don't try to do anything at all.
385 */ 385 */
386 if (kill_it && tolerant < 3) { 386 if (kill_it && tolerant < 3) {
387 int user_space = 0; 387 int user_space = 0;
388 388
389 /* 389 /*
390 * If the EIPV bit is set, it means the saved IP is the 390 * If the EIPV bit is set, it means the saved IP is the
391 * instruction which caused the MCE. 391 * instruction which caused the MCE.
392 */ 392 */
393 if (m.mcgstatus & MCG_STATUS_EIPV) 393 if (m.mcgstatus & MCG_STATUS_EIPV)
394 user_space = panicm.ip && (panicm.cs & 3); 394 user_space = panicm.ip && (panicm.cs & 3);
395 395
396 /* 396 /*
397 * If we know that the error was in user space, send a 397 * If we know that the error was in user space, send a
398 * SIGBUS. Otherwise, panic if tolerance is low. 398 * SIGBUS. Otherwise, panic if tolerance is low.
399 * 399 *
400 * force_sig() takes an awful lot of locks and has a slight 400 * force_sig() takes an awful lot of locks and has a slight
401 * risk of deadlocking. 401 * risk of deadlocking.
402 */ 402 */
403 if (user_space) { 403 if (user_space) {
404 force_sig(SIGBUS, current); 404 force_sig(SIGBUS, current);
405 } else if (panic_on_oops || tolerant < 2) { 405 } else if (panic_on_oops || tolerant < 2) {
406 mce_panic("Uncorrected machine check", 406 mce_panic("Uncorrected machine check",
407 &panicm, mcestart); 407 &panicm, mcestart);
408 } 408 }
409 } 409 }
410 410
411 /* notify userspace ASAP */ 411 /* notify userspace ASAP */
412 set_thread_flag(TIF_MCE_NOTIFY); 412 set_thread_flag(TIF_MCE_NOTIFY);
413 413
414 /* the last thing we do is clear state */ 414 /* the last thing we do is clear state */
415 for (i = 0; i < banks; i++) { 415 for (i = 0; i < banks; i++) {
416 if (test_bit(i, toclear)) 416 if (test_bit(i, toclear))
417 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 417 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
418 } 418 }
419 wrmsrl(MSR_IA32_MCG_STATUS, 0); 419 wrmsrl(MSR_IA32_MCG_STATUS, 0);
420 out2: 420 out2:
421 atomic_dec(&mce_entry); 421 atomic_dec(&mce_entry);
422 } 422 }
423 423
424 #ifdef CONFIG_X86_MCE_INTEL 424 #ifdef CONFIG_X86_MCE_INTEL
425 /*** 425 /***
426 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog 426 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
427 * @cpu: The CPU on which the event occurred. 427 * @cpu: The CPU on which the event occurred.
428 * @status: Event status information 428 * @status: Event status information
429 * 429 *
430 * This function should be called by the thermal interrupt after the 430 * This function should be called by the thermal interrupt after the
431 * event has been processed and the decision was made to log the event 431 * event has been processed and the decision was made to log the event
432 * further. 432 * further.
433 * 433 *
434 * The status parameter will be saved to the 'status' field of 'struct mce' 434 * The status parameter will be saved to the 'status' field of 'struct mce'
435 * and historically has been the register value of the 435 * and historically has been the register value of the
436 * MSR_IA32_THERMAL_STATUS (Intel) msr. 436 * MSR_IA32_THERMAL_STATUS (Intel) msr.
437 */ 437 */
438 void mce_log_therm_throt_event(__u64 status) 438 void mce_log_therm_throt_event(__u64 status)
439 { 439 {
440 struct mce m; 440 struct mce m;
441 441
442 mce_setup(&m); 442 mce_setup(&m);
443 m.bank = MCE_THERMAL_BANK; 443 m.bank = MCE_THERMAL_BANK;
444 m.status = status; 444 m.status = status;
445 mce_log(&m); 445 mce_log(&m);
446 } 446 }
447 #endif /* CONFIG_X86_MCE_INTEL */ 447 #endif /* CONFIG_X86_MCE_INTEL */
448 448
449 /* 449 /*
450 * Periodic polling timer for "silent" machine check errors. If the 450 * Periodic polling timer for "silent" machine check errors. If the
451 * poller finds an MCE, poll 2x faster. When the poller finds no more 451 * poller finds an MCE, poll 2x faster. When the poller finds no more
452 * errors, poll 2x slower (up to check_interval seconds). 452 * errors, poll 2x slower (up to check_interval seconds).
453 */ 453 */
454 454
455 static int check_interval = 5 * 60; /* 5 minutes */ 455 static int check_interval = 5 * 60; /* 5 minutes */
456 static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ 456 static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
457 static void mcheck_timer(unsigned long); 457 static void mcheck_timer(unsigned long);
458 static DEFINE_PER_CPU(struct timer_list, mce_timer); 458 static DEFINE_PER_CPU(struct timer_list, mce_timer);
459 459
460 static void mcheck_timer(unsigned long data) 460 static void mcheck_timer(unsigned long data)
461 { 461 {
462 struct timer_list *t = &per_cpu(mce_timer, data); 462 struct timer_list *t = &per_cpu(mce_timer, data);
463 int *n; 463 int *n;
464 464
465 WARN_ON(smp_processor_id() != data); 465 WARN_ON(smp_processor_id() != data);
466 466
467 if (mce_available(&current_cpu_data)) 467 if (mce_available(&current_cpu_data))
468 machine_check_poll(MCP_TIMESTAMP, 468 machine_check_poll(MCP_TIMESTAMP,
469 &__get_cpu_var(mce_poll_banks)); 469 &__get_cpu_var(mce_poll_banks));
470 470
471 /* 471 /*
472 * Alert userspace if needed. If we logged an MCE, reduce the 472 * Alert userspace if needed. If we logged an MCE, reduce the
473 * polling interval, otherwise increase the polling interval. 473 * polling interval, otherwise increase the polling interval.
474 */ 474 */
475 n = &__get_cpu_var(next_interval); 475 n = &__get_cpu_var(next_interval);
476 if (mce_notify_user()) { 476 if (mce_notify_user()) {
477 *n = max(*n/2, HZ/100); 477 *n = max(*n/2, HZ/100);
478 } else { 478 } else {
479 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); 479 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
480 } 480 }
481 481
482 t->expires = jiffies + *n; 482 t->expires = jiffies + *n;
483 add_timer(t); 483 add_timer(t);
484 } 484 }
485 485
486 static void mce_do_trigger(struct work_struct *work) 486 static void mce_do_trigger(struct work_struct *work)
487 { 487 {
488 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); 488 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
489 } 489 }
490 490
491 static DECLARE_WORK(mce_trigger_work, mce_do_trigger); 491 static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
492 492
493 /* 493 /*
494 * Notify the user(s) about new machine check events. 494 * Notify the user(s) about new machine check events.
495 * Can be called from interrupt context, but not from machine check/NMI 495 * Can be called from interrupt context, but not from machine check/NMI
496 * context. 496 * context.
497 */ 497 */
498 int mce_notify_user(void) 498 int mce_notify_user(void)
499 { 499 {
500 /* Not more than two messages every minute */ 500 /* Not more than two messages every minute */
501 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); 501 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
502 502
503 clear_thread_flag(TIF_MCE_NOTIFY); 503 clear_thread_flag(TIF_MCE_NOTIFY);
504 if (test_and_clear_bit(0, &notify_user)) { 504 if (test_and_clear_bit(0, &notify_user)) {
505 wake_up_interruptible(&mce_wait); 505 wake_up_interruptible(&mce_wait);
506 506
507 /* 507 /*
508 * There is no risk of missing notifications because 508 * There is no risk of missing notifications because
509 * work_pending is always cleared before the function is 509 * work_pending is always cleared before the function is
510 * executed. 510 * executed.
511 */ 511 */
512 if (trigger[0] && !work_pending(&mce_trigger_work)) 512 if (trigger[0] && !work_pending(&mce_trigger_work))
513 schedule_work(&mce_trigger_work); 513 schedule_work(&mce_trigger_work);
514 514
515 if (__ratelimit(&ratelimit)) 515 if (__ratelimit(&ratelimit))
516 printk(KERN_INFO "Machine check events logged\n"); 516 printk(KERN_INFO "Machine check events logged\n");
517 517
518 return 1; 518 return 1;
519 } 519 }
520 return 0; 520 return 0;
521 } 521 }
522 522
523 /* see if the idle task needs to notify userspace */ 523 /* see if the idle task needs to notify userspace */
524 static int 524 static int
525 mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) 525 mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk)
526 { 526 {
527 /* IDLE_END should be safe - interrupts are back on */ 527 /* IDLE_END should be safe - interrupts are back on */
528 if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) 528 if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
529 mce_notify_user(); 529 mce_notify_user();
530 530
531 return NOTIFY_OK; 531 return NOTIFY_OK;
532 } 532 }
533 533
534 static struct notifier_block mce_idle_notifier = { 534 static struct notifier_block mce_idle_notifier = {
535 .notifier_call = mce_idle_callback, 535 .notifier_call = mce_idle_callback,
536 }; 536 };
537 537
538 static __init int periodic_mcheck_init(void) 538 static __init int periodic_mcheck_init(void)
539 { 539 {
540 idle_notifier_register(&mce_idle_notifier); 540 idle_notifier_register(&mce_idle_notifier);
541 return 0; 541 return 0;
542 } 542 }
543 __initcall(periodic_mcheck_init); 543 __initcall(periodic_mcheck_init);
544 544
545 /* 545 /*
546 * Initialize Machine Checks for a CPU. 546 * Initialize Machine Checks for a CPU.
547 */ 547 */
548 static int mce_cap_init(void) 548 static int mce_cap_init(void)
549 { 549 {
550 u64 cap; 550 u64 cap;
551 unsigned b; 551 unsigned b;
552 552
553 rdmsrl(MSR_IA32_MCG_CAP, cap); 553 rdmsrl(MSR_IA32_MCG_CAP, cap);
554 b = cap & 0xff; 554 b = cap & 0xff;
555 if (b > MAX_NR_BANKS) { 555 if (b > MAX_NR_BANKS) {
556 printk(KERN_WARNING 556 printk(KERN_WARNING
557 "MCE: Using only %u machine check banks out of %u\n", 557 "MCE: Using only %u machine check banks out of %u\n",
558 MAX_NR_BANKS, b); 558 MAX_NR_BANKS, b);
559 b = MAX_NR_BANKS; 559 b = MAX_NR_BANKS;
560 } 560 }
561 561
562 /* Don't support asymmetric configurations today */ 562 /* Don't support asymmetric configurations today */
563 WARN_ON(banks != 0 && b != banks); 563 WARN_ON(banks != 0 && b != banks);
564 banks = b; 564 banks = b;
565 if (!bank) { 565 if (!bank) {
566 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); 566 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
567 if (!bank) 567 if (!bank)
568 return -ENOMEM; 568 return -ENOMEM;
569 memset(bank, 0xff, banks * sizeof(u64)); 569 memset(bank, 0xff, banks * sizeof(u64));
570 } 570 }
571 571
572 /* Use accurate RIP reporting if available. */ 572 /* Use accurate RIP reporting if available. */
573 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) 573 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
574 rip_msr = MSR_IA32_MCG_EIP; 574 rip_msr = MSR_IA32_MCG_EIP;
575 575
576 return 0; 576 return 0;
577 } 577 }
578 578
579 static void mce_init(void *dummy) 579 static void mce_init(void *dummy)
580 { 580 {
581 u64 cap; 581 u64 cap;
582 int i; 582 int i;
583 mce_banks_t all_banks; 583 mce_banks_t all_banks;
584 584
585 /* 585 /*
586 * Log the machine checks left over from the previous reset. 586 * Log the machine checks left over from the previous reset.
587 */ 587 */
588 bitmap_fill(all_banks, MAX_NR_BANKS); 588 bitmap_fill(all_banks, MAX_NR_BANKS);
589 machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); 589 machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
590 590
591 set_in_cr4(X86_CR4_MCE); 591 set_in_cr4(X86_CR4_MCE);
592 592
593 rdmsrl(MSR_IA32_MCG_CAP, cap); 593 rdmsrl(MSR_IA32_MCG_CAP, cap);
594 if (cap & MCG_CTL_P) 594 if (cap & MCG_CTL_P)
595 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 595 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
596 596
597 for (i = 0; i < banks; i++) { 597 for (i = 0; i < banks; i++) {
598 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 598 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
599 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 599 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
600 } 600 }
601 } 601 }
602 602
603 /* Add per CPU specific workarounds here */ 603 /* Add per CPU specific workarounds here */
604 static void mce_cpu_quirks(struct cpuinfo_x86 *c) 604 static void mce_cpu_quirks(struct cpuinfo_x86 *c)
605 { 605 {
606 /* This should be disabled by the BIOS, but isn't always */ 606 /* This should be disabled by the BIOS, but isn't always */
607 if (c->x86_vendor == X86_VENDOR_AMD) { 607 if (c->x86_vendor == X86_VENDOR_AMD) {
608 if (c->x86 == 15 && banks > 4) 608 if (c->x86 == 15 && banks > 4)
609 /* disable GART TBL walk error reporting, which trips off 609 /* disable GART TBL walk error reporting, which trips off
610 incorrectly with the IOMMU & 3ware & Cerberus. */ 610 incorrectly with the IOMMU & 3ware & Cerberus. */
611 clear_bit(10, (unsigned long *)&bank[4]); 611 clear_bit(10, (unsigned long *)&bank[4]);
612 if(c->x86 <= 17 && mce_bootlog < 0) 612 if(c->x86 <= 17 && mce_bootlog < 0)
613 /* Lots of broken BIOS around that don't clear them 613 /* Lots of broken BIOS around that don't clear them
614 by default and leave crap in there. Don't log. */ 614 by default and leave crap in there. Don't log. */
615 mce_bootlog = 0; 615 mce_bootlog = 0;
616 } 616 }
617 617
618 } 618 }
619 619
620 static void mce_cpu_features(struct cpuinfo_x86 *c) 620 static void mce_cpu_features(struct cpuinfo_x86 *c)
621 { 621 {
622 switch (c->x86_vendor) { 622 switch (c->x86_vendor) {
623 case X86_VENDOR_INTEL: 623 case X86_VENDOR_INTEL:
624 mce_intel_feature_init(c); 624 mce_intel_feature_init(c);
625 break; 625 break;
626 case X86_VENDOR_AMD: 626 case X86_VENDOR_AMD:
627 mce_amd_feature_init(c); 627 mce_amd_feature_init(c);
628 break; 628 break;
629 default: 629 default:
630 break; 630 break;
631 } 631 }
632 } 632 }
633 633
634 static void mce_init_timer(void) 634 static void mce_init_timer(void)
635 { 635 {
636 struct timer_list *t = &__get_cpu_var(mce_timer); 636 struct timer_list *t = &__get_cpu_var(mce_timer);
637 int *n = &__get_cpu_var(next_interval); 637 int *n = &__get_cpu_var(next_interval);
638 638
639 *n = check_interval * HZ; 639 *n = check_interval * HZ;
640 if (!*n) 640 if (!*n)
641 return; 641 return;
642 setup_timer(t, mcheck_timer, smp_processor_id()); 642 setup_timer(t, mcheck_timer, smp_processor_id());
643 t->expires = round_jiffies(jiffies + *n); 643 t->expires = round_jiffies(jiffies + *n);
644 add_timer(t); 644 add_timer(t);
645 } 645 }
646 646
647 /* 647 /*
648 * Called for each booted CPU to set up machine checks. 648 * Called for each booted CPU to set up machine checks.
649 * Must be called with preempt off. 649 * Must be called with preempt off.
650 */ 650 */
651 void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 651 void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
652 { 652 {
653 if (!mce_available(c)) 653 if (!mce_available(c))
654 return; 654 return;
655 655
656 if (mce_cap_init() < 0) { 656 if (mce_cap_init() < 0) {
657 mce_dont_init = 1; 657 mce_dont_init = 1;
658 return; 658 return;
659 } 659 }
660 mce_cpu_quirks(c); 660 mce_cpu_quirks(c);
661 661
662 mce_init(NULL); 662 mce_init(NULL);
663 mce_cpu_features(c); 663 mce_cpu_features(c);
664 mce_init_timer(); 664 mce_init_timer();
665 } 665 }
666 666
667 /* 667 /*
668 * Character device to read and clear the MCE log. 668 * Character device to read and clear the MCE log.
669 */ 669 */
670 670
671 static DEFINE_SPINLOCK(mce_state_lock); 671 static DEFINE_SPINLOCK(mce_state_lock);
672 static int open_count; /* #times opened */ 672 static int open_count; /* #times opened */
673 static int open_exclu; /* already open exclusive? */ 673 static int open_exclu; /* already open exclusive? */
674 674
675 static int mce_open(struct inode *inode, struct file *file) 675 static int mce_open(struct inode *inode, struct file *file)
676 { 676 {
677 lock_kernel(); 677 lock_kernel();
678 spin_lock(&mce_state_lock); 678 spin_lock(&mce_state_lock);
679 679
680 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { 680 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
681 spin_unlock(&mce_state_lock); 681 spin_unlock(&mce_state_lock);
682 unlock_kernel(); 682 unlock_kernel();
683 return -EBUSY; 683 return -EBUSY;
684 } 684 }
685 685
686 if (file->f_flags & O_EXCL) 686 if (file->f_flags & O_EXCL)
687 open_exclu = 1; 687 open_exclu = 1;
688 open_count++; 688 open_count++;
689 689
690 spin_unlock(&mce_state_lock); 690 spin_unlock(&mce_state_lock);
691 unlock_kernel(); 691 unlock_kernel();
692 692
693 return nonseekable_open(inode, file); 693 return nonseekable_open(inode, file);
694 } 694 }
695 695
696 static int mce_release(struct inode *inode, struct file *file) 696 static int mce_release(struct inode *inode, struct file *file)
697 { 697 {
698 spin_lock(&mce_state_lock); 698 spin_lock(&mce_state_lock);
699 699
700 open_count--; 700 open_count--;
701 open_exclu = 0; 701 open_exclu = 0;
702 702
703 spin_unlock(&mce_state_lock); 703 spin_unlock(&mce_state_lock);
704 704
705 return 0; 705 return 0;
706 } 706 }
707 707
708 static void collect_tscs(void *data) 708 static void collect_tscs(void *data)
709 { 709 {
710 unsigned long *cpu_tsc = (unsigned long *)data; 710 unsigned long *cpu_tsc = (unsigned long *)data;
711 711
712 rdtscll(cpu_tsc[smp_processor_id()]); 712 rdtscll(cpu_tsc[smp_processor_id()]);
713 } 713 }
714 714
715 static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, 715 static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
716 loff_t *off) 716 loff_t *off)
717 { 717 {
718 unsigned long *cpu_tsc; 718 unsigned long *cpu_tsc;
719 static DEFINE_MUTEX(mce_read_mutex); 719 static DEFINE_MUTEX(mce_read_mutex);
720 unsigned prev, next; 720 unsigned prev, next;
721 char __user *buf = ubuf; 721 char __user *buf = ubuf;
722 int i, err; 722 int i, err;
723 723
724 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); 724 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
725 if (!cpu_tsc) 725 if (!cpu_tsc)
726 return -ENOMEM; 726 return -ENOMEM;
727 727
728 mutex_lock(&mce_read_mutex); 728 mutex_lock(&mce_read_mutex);
729 next = rcu_dereference(mcelog.next); 729 next = rcu_dereference(mcelog.next);
730 730
731 /* Only supports full reads right now */ 731 /* Only supports full reads right now */
732 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 732 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
733 mutex_unlock(&mce_read_mutex); 733 mutex_unlock(&mce_read_mutex);
734 kfree(cpu_tsc); 734 kfree(cpu_tsc);
735 return -EINVAL; 735 return -EINVAL;
736 } 736 }
737 737
738 err = 0; 738 err = 0;
739 prev = 0; 739 prev = 0;
740 do { 740 do {
741 for (i = prev; i < next; i++) { 741 for (i = prev; i < next; i++) {
742 unsigned long start = jiffies; 742 unsigned long start = jiffies;
743 743
744 while (!mcelog.entry[i].finished) { 744 while (!mcelog.entry[i].finished) {
745 if (time_after_eq(jiffies, start + 2)) { 745 if (time_after_eq(jiffies, start + 2)) {
746 memset(mcelog.entry + i, 0, 746 memset(mcelog.entry + i, 0,
747 sizeof(struct mce)); 747 sizeof(struct mce));
748 goto timeout; 748 goto timeout;
749 } 749 }
750 cpu_relax(); 750 cpu_relax();
751 } 751 }
752 smp_rmb(); 752 smp_rmb();
753 err |= copy_to_user(buf, mcelog.entry + i, 753 err |= copy_to_user(buf, mcelog.entry + i,
754 sizeof(struct mce)); 754 sizeof(struct mce));
755 buf += sizeof(struct mce); 755 buf += sizeof(struct mce);
756 timeout: 756 timeout:
757 ; 757 ;
758 } 758 }
759 759
760 memset(mcelog.entry + prev, 0, 760 memset(mcelog.entry + prev, 0,
761 (next - prev) * sizeof(struct mce)); 761 (next - prev) * sizeof(struct mce));
762 prev = next; 762 prev = next;
763 next = cmpxchg(&mcelog.next, prev, 0); 763 next = cmpxchg(&mcelog.next, prev, 0);
764 } while (next != prev); 764 } while (next != prev);
765 765
766 synchronize_sched(); 766 synchronize_sched();
767 767
768 /* 768 /*
769 * Collect entries that were still getting written before the 769 * Collect entries that were still getting written before the
770 * synchronize. 770 * synchronize.
771 */ 771 */
772 on_each_cpu(collect_tscs, cpu_tsc, 1); 772 on_each_cpu(collect_tscs, cpu_tsc, 1);
773 for (i = next; i < MCE_LOG_LEN; i++) { 773 for (i = next; i < MCE_LOG_LEN; i++) {
774 if (mcelog.entry[i].finished && 774 if (mcelog.entry[i].finished &&
775 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { 775 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
776 err |= copy_to_user(buf, mcelog.entry+i, 776 err |= copy_to_user(buf, mcelog.entry+i,
777 sizeof(struct mce)); 777 sizeof(struct mce));
778 smp_rmb(); 778 smp_rmb();
779 buf += sizeof(struct mce); 779 buf += sizeof(struct mce);
780 memset(&mcelog.entry[i], 0, sizeof(struct mce)); 780 memset(&mcelog.entry[i], 0, sizeof(struct mce));
781 } 781 }
782 } 782 }
783 mutex_unlock(&mce_read_mutex); 783 mutex_unlock(&mce_read_mutex);
784 kfree(cpu_tsc); 784 kfree(cpu_tsc);
785 return err ? -EFAULT : buf - ubuf; 785 return err ? -EFAULT : buf - ubuf;
786 } 786 }
787 787
788 static unsigned int mce_poll(struct file *file, poll_table *wait) 788 static unsigned int mce_poll(struct file *file, poll_table *wait)
789 { 789 {
790 poll_wait(file, &mce_wait, wait); 790 poll_wait(file, &mce_wait, wait);
791 if (rcu_dereference(mcelog.next)) 791 if (rcu_dereference(mcelog.next))
792 return POLLIN | POLLRDNORM; 792 return POLLIN | POLLRDNORM;
793 return 0; 793 return 0;
794 } 794 }
795 795
796 static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 796 static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
797 { 797 {
798 int __user *p = (int __user *)arg; 798 int __user *p = (int __user *)arg;
799 799
800 if (!capable(CAP_SYS_ADMIN)) 800 if (!capable(CAP_SYS_ADMIN))
801 return -EPERM; 801 return -EPERM;
802 switch (cmd) { 802 switch (cmd) {
803 case MCE_GET_RECORD_LEN: 803 case MCE_GET_RECORD_LEN:
804 return put_user(sizeof(struct mce), p); 804 return put_user(sizeof(struct mce), p);
805 case MCE_GET_LOG_LEN: 805 case MCE_GET_LOG_LEN:
806 return put_user(MCE_LOG_LEN, p); 806 return put_user(MCE_LOG_LEN, p);
807 case MCE_GETCLEAR_FLAGS: { 807 case MCE_GETCLEAR_FLAGS: {
808 unsigned flags; 808 unsigned flags;
809 809
810 do { 810 do {
811 flags = mcelog.flags; 811 flags = mcelog.flags;
812 } while (cmpxchg(&mcelog.flags, flags, 0) != flags); 812 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
813 return put_user(flags, p); 813 return put_user(flags, p);
814 } 814 }
815 default: 815 default:
816 return -ENOTTY; 816 return -ENOTTY;
817 } 817 }
818 } 818 }
819 819
820 static const struct file_operations mce_chrdev_ops = { 820 static const struct file_operations mce_chrdev_ops = {
821 .open = mce_open, 821 .open = mce_open,
822 .release = mce_release, 822 .release = mce_release,
823 .read = mce_read, 823 .read = mce_read,
824 .poll = mce_poll, 824 .poll = mce_poll,
825 .unlocked_ioctl = mce_ioctl, 825 .unlocked_ioctl = mce_ioctl,
826 }; 826 };
827 827
828 static struct miscdevice mce_log_device = { 828 static struct miscdevice mce_log_device = {
829 MISC_MCELOG_MINOR, 829 MISC_MCELOG_MINOR,
830 "mcelog", 830 "mcelog",
831 &mce_chrdev_ops, 831 &mce_chrdev_ops,
832 }; 832 };
833 833
834 /* 834 /*
835 * Old style boot options parsing. Only for compatibility. 835 * Old style boot options parsing. Only for compatibility.
836 */ 836 */
837 static int __init mcheck_disable(char *str) 837 static int __init mcheck_disable(char *str)
838 { 838 {
839 mce_dont_init = 1; 839 mce_dont_init = 1;
840 return 1; 840 return 1;
841 } 841 }
842 842
843 /* mce=off disables machine check. 843 /* mce=off disables machine check.
844 mce=TOLERANCELEVEL (number, see above) 844 mce=TOLERANCELEVEL (number, see above)
845 mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 845 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
846 mce=nobootlog Don't log MCEs from before booting. */ 846 mce=nobootlog Don't log MCEs from before booting. */
847 static int __init mcheck_enable(char *str) 847 static int __init mcheck_enable(char *str)
848 { 848 {
849 if (!strcmp(str, "off")) 849 if (!strcmp(str, "off"))
850 mce_dont_init = 1; 850 mce_dont_init = 1;
851 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) 851 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
852 mce_bootlog = str[0] == 'b'; 852 mce_bootlog = str[0] == 'b';
853 else if (isdigit(str[0])) 853 else if (isdigit(str[0]))
854 get_option(&str, &tolerant); 854 get_option(&str, &tolerant);
855 else 855 else
856 printk("mce= argument %s ignored. Please use /sys", str); 856 printk("mce= argument %s ignored. Please use /sys", str);
857 return 1; 857 return 1;
858 } 858 }
859 859
860 __setup("nomce", mcheck_disable); 860 __setup("nomce", mcheck_disable);
861 __setup("mce=", mcheck_enable); 861 __setup("mce=", mcheck_enable);
862 862
863 /* 863 /*
864 * Sysfs support 864 * Sysfs support
865 */ 865 */
866 866
867 /* 867 /*
868 * Disable machine checks on suspend and shutdown. We can't really handle 868 * Disable machine checks on suspend and shutdown. We can't really handle
869 * them later. 869 * them later.
870 */ 870 */
871 static int mce_disable(void) 871 static int mce_disable(void)
872 { 872 {
873 int i; 873 int i;
874 874
875 for (i = 0; i < banks; i++) 875 for (i = 0; i < banks; i++)
876 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 876 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
877 return 0; 877 return 0;
878 } 878 }
879 879
880 static int mce_suspend(struct sys_device *dev, pm_message_t state) 880 static int mce_suspend(struct sys_device *dev, pm_message_t state)
881 { 881 {
882 return mce_disable(); 882 return mce_disable();
883 } 883 }
884 884
885 static int mce_shutdown(struct sys_device *dev) 885 static int mce_shutdown(struct sys_device *dev)
886 { 886 {
887 return mce_disable(); 887 return mce_disable();
888 } 888 }
889 889
890 /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. 890 /* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
891 Only one CPU is active at this time, the others get readded later using 891 Only one CPU is active at this time, the others get readded later using
892 CPU hotplug. */ 892 CPU hotplug. */
893 static int mce_resume(struct sys_device *dev) 893 static int mce_resume(struct sys_device *dev)
894 { 894 {
895 mce_init(NULL); 895 mce_init(NULL);
896 mce_cpu_features(&current_cpu_data); 896 mce_cpu_features(&current_cpu_data);
897 return 0; 897 return 0;
898 } 898 }
899 899
900 static void mce_cpu_restart(void *data) 900 static void mce_cpu_restart(void *data)
901 { 901 {
902 del_timer_sync(&__get_cpu_var(mce_timer)); 902 del_timer_sync(&__get_cpu_var(mce_timer));
903 if (mce_available(&current_cpu_data)) 903 if (mce_available(&current_cpu_data))
904 mce_init(NULL); 904 mce_init(NULL);
905 mce_init_timer(); 905 mce_init_timer();
906 } 906 }
907 907
908 /* Reinit MCEs after user configuration changes */ 908 /* Reinit MCEs after user configuration changes */
909 static void mce_restart(void) 909 static void mce_restart(void)
910 { 910 {
911 on_each_cpu(mce_cpu_restart, NULL, 1); 911 on_each_cpu(mce_cpu_restart, NULL, 1);
912 } 912 }
913 913
914 static struct sysdev_class mce_sysclass = { 914 static struct sysdev_class mce_sysclass = {
915 .suspend = mce_suspend, 915 .suspend = mce_suspend,
916 .shutdown = mce_shutdown, 916 .shutdown = mce_shutdown,
917 .resume = mce_resume, 917 .resume = mce_resume,
918 .name = "machinecheck", 918 .name = "machinecheck",
919 }; 919 };
920 920
921 DEFINE_PER_CPU(struct sys_device, device_mce); 921 DEFINE_PER_CPU(struct sys_device, device_mce);
922 void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; 922 void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata;
923 923
924 /* Why are there no generic functions for this? */ 924 /* Why are there no generic functions for this? */
925 #define ACCESSOR(name, var, start) \ 925 #define ACCESSOR(name, var, start) \
926 static ssize_t show_ ## name(struct sys_device *s, \ 926 static ssize_t show_ ## name(struct sys_device *s, \
927 struct sysdev_attribute *attr, \ 927 struct sysdev_attribute *attr, \
928 char *buf) { \ 928 char *buf) { \
929 return sprintf(buf, "%lx\n", (unsigned long)var); \ 929 return sprintf(buf, "%lx\n", (unsigned long)var); \
930 } \ 930 } \
931 static ssize_t set_ ## name(struct sys_device *s, \ 931 static ssize_t set_ ## name(struct sys_device *s, \
932 struct sysdev_attribute *attr, \ 932 struct sysdev_attribute *attr, \
933 const char *buf, size_t siz) { \ 933 const char *buf, size_t siz) { \
934 char *end; \ 934 char *end; \
935 unsigned long new = simple_strtoul(buf, &end, 0); \ 935 unsigned long new = simple_strtoul(buf, &end, 0); \
936 if (end == buf) return -EINVAL; \ 936 if (end == buf) return -EINVAL; \
937 var = new; \ 937 var = new; \
938 start; \ 938 start; \
939 return end-buf; \ 939 return end-buf; \
940 } \ 940 } \
941 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); 941 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
942 942
943 static struct sysdev_attribute *bank_attrs; 943 static struct sysdev_attribute *bank_attrs;
944 944
945 static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, 945 static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
946 char *buf) 946 char *buf)
947 { 947 {
948 u64 b = bank[attr - bank_attrs]; 948 u64 b = bank[attr - bank_attrs];
949 return sprintf(buf, "%llx\n", b); 949 return sprintf(buf, "%llx\n", b);
950 } 950 }
951 951
952 static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, 952 static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
953 const char *buf, size_t siz) 953 const char *buf, size_t siz)
954 { 954 {
955 char *end; 955 char *end;
956 u64 new = simple_strtoull(buf, &end, 0); 956 u64 new = simple_strtoull(buf, &end, 0);
957 if (end == buf) 957 if (end == buf)
958 return -EINVAL; 958 return -EINVAL;
959 bank[attr - bank_attrs] = new; 959 bank[attr - bank_attrs] = new;
960 mce_restart(); 960 mce_restart();
961 return end-buf; 961 return end-buf;
962 } 962 }
963 963
964 static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, 964 static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
965 char *buf) 965 char *buf)
966 { 966 {
967 strcpy(buf, trigger); 967 strcpy(buf, trigger);
968 strcat(buf, "\n"); 968 strcat(buf, "\n");
969 return strlen(trigger) + 1; 969 return strlen(trigger) + 1;
970 } 970 }
971 971
972 static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, 972 static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
973 const char *buf,size_t siz) 973 const char *buf,size_t siz)
974 { 974 {
975 char *p; 975 char *p;
976 int len; 976 int len;
977 strncpy(trigger, buf, sizeof(trigger)); 977 strncpy(trigger, buf, sizeof(trigger));
978 trigger[sizeof(trigger)-1] = 0; 978 trigger[sizeof(trigger)-1] = 0;
979 len = strlen(trigger); 979 len = strlen(trigger);
980 p = strchr(trigger, '\n'); 980 p = strchr(trigger, '\n');
981 if (*p) *p = 0; 981 if (*p) *p = 0;
982 return len; 982 return len;
983 } 983 }
984 984
985 static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 985 static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
986 static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 986 static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
987 ACCESSOR(check_interval,check_interval,mce_restart()) 987 ACCESSOR(check_interval,check_interval,mce_restart())
988 static struct sysdev_attribute *mce_attributes[] = { 988 static struct sysdev_attribute *mce_attributes[] = {
989 &attr_tolerant.attr, &attr_check_interval, &attr_trigger, 989 &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
990 NULL 990 NULL
991 }; 991 };
992 992
993 static cpumask_var_t mce_device_initialized; 993 static cpumask_var_t mce_device_initialized;
994 994
995 /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ 995 /* Per cpu sysdev init. All of the cpus still share the same ctl bank */
996 static __cpuinit int mce_create_device(unsigned int cpu) 996 static __cpuinit int mce_create_device(unsigned int cpu)
997 { 997 {
998 int err; 998 int err;
999 int i; 999 int i;
1000 1000
1001 if (!mce_available(&boot_cpu_data)) 1001 if (!mce_available(&boot_cpu_data))
1002 return -EIO; 1002 return -EIO;
1003 1003
1004 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); 1004 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
1005 per_cpu(device_mce,cpu).id = cpu; 1005 per_cpu(device_mce,cpu).id = cpu;
1006 per_cpu(device_mce,cpu).cls = &mce_sysclass; 1006 per_cpu(device_mce,cpu).cls = &mce_sysclass;
1007 1007
1008 err = sysdev_register(&per_cpu(device_mce,cpu)); 1008 err = sysdev_register(&per_cpu(device_mce,cpu));
1009 if (err) 1009 if (err)
1010 return err; 1010 return err;
1011 1011
1012 for (i = 0; mce_attributes[i]; i++) { 1012 for (i = 0; mce_attributes[i]; i++) {
1013 err = sysdev_create_file(&per_cpu(device_mce,cpu), 1013 err = sysdev_create_file(&per_cpu(device_mce,cpu),
1014 mce_attributes[i]); 1014 mce_attributes[i]);
1015 if (err) 1015 if (err)
1016 goto error; 1016 goto error;
1017 } 1017 }
1018 for (i = 0; i < banks; i++) { 1018 for (i = 0; i < banks; i++) {
1019 err = sysdev_create_file(&per_cpu(device_mce, cpu), 1019 err = sysdev_create_file(&per_cpu(device_mce, cpu),
1020 &bank_attrs[i]); 1020 &bank_attrs[i]);
1021 if (err) 1021 if (err)
1022 goto error2; 1022 goto error2;
1023 } 1023 }
1024 cpumask_set_cpu(cpu, mce_device_initialized); 1024 cpumask_set_cpu(cpu, mce_device_initialized);
1025 1025
1026 return 0; 1026 return 0;
1027 error2: 1027 error2:
1028 while (--i >= 0) { 1028 while (--i >= 0) {
1029 sysdev_remove_file(&per_cpu(device_mce, cpu), 1029 sysdev_remove_file(&per_cpu(device_mce, cpu),
1030 &bank_attrs[i]); 1030 &bank_attrs[i]);
1031 } 1031 }
1032 error: 1032 error:
1033 while (--i >= 0) { 1033 while (--i >= 0) {
1034 sysdev_remove_file(&per_cpu(device_mce,cpu), 1034 sysdev_remove_file(&per_cpu(device_mce,cpu),
1035 mce_attributes[i]); 1035 mce_attributes[i]);
1036 } 1036 }
1037 sysdev_unregister(&per_cpu(device_mce,cpu)); 1037 sysdev_unregister(&per_cpu(device_mce,cpu));
1038 1038
1039 return err; 1039 return err;
1040 } 1040 }
1041 1041
1042 static __cpuinit void mce_remove_device(unsigned int cpu) 1042 static __cpuinit void mce_remove_device(unsigned int cpu)
1043 { 1043 {
1044 int i; 1044 int i;
1045 1045
1046 if (!cpumask_test_cpu(cpu, mce_device_initialized)) 1046 if (!cpumask_test_cpu(cpu, mce_device_initialized))
1047 return; 1047 return;
1048 1048
1049 for (i = 0; mce_attributes[i]; i++) 1049 for (i = 0; mce_attributes[i]; i++)
1050 sysdev_remove_file(&per_cpu(device_mce,cpu), 1050 sysdev_remove_file(&per_cpu(device_mce,cpu),
1051 mce_attributes[i]); 1051 mce_attributes[i]);
1052 for (i = 0; i < banks; i++) 1052 for (i = 0; i < banks; i++)
1053 sysdev_remove_file(&per_cpu(device_mce, cpu), 1053 sysdev_remove_file(&per_cpu(device_mce, cpu),
1054 &bank_attrs[i]); 1054 &bank_attrs[i]);
1055 sysdev_unregister(&per_cpu(device_mce,cpu)); 1055 sysdev_unregister(&per_cpu(device_mce,cpu));
1056 cpumask_clear_cpu(cpu, mce_device_initialized); 1056 cpumask_clear_cpu(cpu, mce_device_initialized);
1057 } 1057 }
1058 1058
1059 /* Make sure there are no machine checks on offlined CPUs. */ 1059 /* Make sure there are no machine checks on offlined CPUs. */
1060 static void mce_disable_cpu(void *h) 1060 static void mce_disable_cpu(void *h)
1061 { 1061 {
1062 int i; 1062 int i;
1063 unsigned long action = *(unsigned long *)h; 1063 unsigned long action = *(unsigned long *)h;
1064 1064
1065 if (!mce_available(&current_cpu_data)) 1065 if (!mce_available(&current_cpu_data))
1066 return; 1066 return;
1067 if (!(action & CPU_TASKS_FROZEN)) 1067 if (!(action & CPU_TASKS_FROZEN))
1068 cmci_clear(); 1068 cmci_clear();
1069 for (i = 0; i < banks; i++) 1069 for (i = 0; i < banks; i++)
1070 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1070 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1071 } 1071 }
1072 1072
1073 static void mce_reenable_cpu(void *h) 1073 static void mce_reenable_cpu(void *h)
1074 { 1074 {
1075 int i; 1075 int i;
1076 unsigned long action = *(unsigned long *)h; 1076 unsigned long action = *(unsigned long *)h;
1077 1077
1078 if (!mce_available(&current_cpu_data)) 1078 if (!mce_available(&current_cpu_data))
1079 return; 1079 return;
1080 if (!(action & CPU_TASKS_FROZEN)) 1080 if (!(action & CPU_TASKS_FROZEN))
1081 cmci_reenable(); 1081 cmci_reenable();
1082 for (i = 0; i < banks; i++) 1082 for (i = 0; i < banks; i++)
1083 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); 1083 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
1084 } 1084 }
1085 1085
1086 /* Get notified when a cpu comes on/off. Be hotplug friendly. */ 1086 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
1087 static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, 1087 static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
1088 unsigned long action, void *hcpu) 1088 unsigned long action, void *hcpu)
1089 { 1089 {
1090 unsigned int cpu = (unsigned long)hcpu; 1090 unsigned int cpu = (unsigned long)hcpu;
1091 struct timer_list *t = &per_cpu(mce_timer, cpu); 1091 struct timer_list *t = &per_cpu(mce_timer, cpu);
1092 1092
1093 switch (action) { 1093 switch (action) {
1094 case CPU_ONLINE: 1094 case CPU_ONLINE:
1095 case CPU_ONLINE_FROZEN: 1095 case CPU_ONLINE_FROZEN:
1096 mce_create_device(cpu); 1096 mce_create_device(cpu);
1097 if (threshold_cpu_callback) 1097 if (threshold_cpu_callback)
1098 threshold_cpu_callback(action, cpu); 1098 threshold_cpu_callback(action, cpu);
1099 break; 1099 break;
1100 case CPU_DEAD: 1100 case CPU_DEAD:
1101 case CPU_DEAD_FROZEN: 1101 case CPU_DEAD_FROZEN:
1102 if (threshold_cpu_callback) 1102 if (threshold_cpu_callback)
1103 threshold_cpu_callback(action, cpu); 1103 threshold_cpu_callback(action, cpu);
1104 mce_remove_device(cpu); 1104 mce_remove_device(cpu);
1105 break; 1105 break;
1106 case CPU_DOWN_PREPARE: 1106 case CPU_DOWN_PREPARE:
1107 case CPU_DOWN_PREPARE_FROZEN: 1107 case CPU_DOWN_PREPARE_FROZEN:
1108 del_timer_sync(t); 1108 del_timer_sync(t);
1109 smp_call_function_single(cpu, mce_disable_cpu, &action, 1); 1109 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
1110 break; 1110 break;
1111 case CPU_DOWN_FAILED: 1111 case CPU_DOWN_FAILED:
1112 case CPU_DOWN_FAILED_FROZEN: 1112 case CPU_DOWN_FAILED_FROZEN:
1113 t->expires = round_jiffies(jiffies + 1113 t->expires = round_jiffies(jiffies +
1114 __get_cpu_var(next_interval)); 1114 __get_cpu_var(next_interval));
1115 add_timer_on(t, cpu); 1115 add_timer_on(t, cpu);
1116 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 1116 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1117 break; 1117 break;
1118 case CPU_POST_DEAD: 1118 case CPU_POST_DEAD:
1119 /* intentionally ignoring frozen here */ 1119 /* intentionally ignoring frozen here */
1120 cmci_rediscover(cpu); 1120 cmci_rediscover(cpu);
1121 break; 1121 break;
1122 } 1122 }
1123 return NOTIFY_OK; 1123 return NOTIFY_OK;
1124 } 1124 }
1125 1125
1126 static struct notifier_block mce_cpu_notifier __cpuinitdata = { 1126 static struct notifier_block mce_cpu_notifier __cpuinitdata = {
1127 .notifier_call = mce_cpu_callback, 1127 .notifier_call = mce_cpu_callback,
1128 }; 1128 };
1129 1129
1130 static __init int mce_init_banks(void) 1130 static __init int mce_init_banks(void)
1131 { 1131 {
1132 int i; 1132 int i;
1133 1133
1134 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, 1134 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1135 GFP_KERNEL); 1135 GFP_KERNEL);
1136 if (!bank_attrs) 1136 if (!bank_attrs)
1137 return -ENOMEM; 1137 return -ENOMEM;
1138 1138
1139 for (i = 0; i < banks; i++) { 1139 for (i = 0; i < banks; i++) {
1140 struct sysdev_attribute *a = &bank_attrs[i]; 1140 struct sysdev_attribute *a = &bank_attrs[i];
1141 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); 1141 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
1142 if (!a->attr.name) 1142 if (!a->attr.name)
1143 goto nomem; 1143 goto nomem;
1144 a->attr.mode = 0644; 1144 a->attr.mode = 0644;
1145 a->show = show_bank; 1145 a->show = show_bank;
1146 a->store = set_bank; 1146 a->store = set_bank;
1147 } 1147 }
1148 return 0; 1148 return 0;
1149 1149
1150 nomem: 1150 nomem:
1151 while (--i >= 0) 1151 while (--i >= 0)
1152 kfree(bank_attrs[i].attr.name); 1152 kfree(bank_attrs[i].attr.name);
1153 kfree(bank_attrs); 1153 kfree(bank_attrs);
1154 bank_attrs = NULL; 1154 bank_attrs = NULL;
1155 return -ENOMEM; 1155 return -ENOMEM;
1156 } 1156 }
1157 1157
1158 static __init int mce_init_device(void) 1158 static __init int mce_init_device(void)
1159 { 1159 {
1160 int err; 1160 int err;
1161 int i = 0; 1161 int i = 0;
1162 1162
1163 if (!mce_available(&boot_cpu_data)) 1163 if (!mce_available(&boot_cpu_data))
1164 return -EIO; 1164 return -EIO;
1165 1165
1166 alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); 1166 zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
1167 1167
1168 err = mce_init_banks(); 1168 err = mce_init_banks();
1169 if (err) 1169 if (err)
1170 return err; 1170 return err;
1171 1171
1172 err = sysdev_class_register(&mce_sysclass); 1172 err = sysdev_class_register(&mce_sysclass);
1173 if (err) 1173 if (err)
1174 return err; 1174 return err;
1175 1175
1176 for_each_online_cpu(i) { 1176 for_each_online_cpu(i) {
1177 err = mce_create_device(i); 1177 err = mce_create_device(i);
1178 if (err) 1178 if (err)
1179 return err; 1179 return err;
1180 } 1180 }
1181 1181
1182 register_hotcpu_notifier(&mce_cpu_notifier); 1182 register_hotcpu_notifier(&mce_cpu_notifier);
1183 misc_register(&mce_log_device); 1183 misc_register(&mce_log_device);
1184 return err; 1184 return err;
1185 } 1185 }
1186 1186
1187 device_initcall(mce_init_device); 1187 device_initcall(mce_init_device);
1188 1188
arch/x86/kernel/tlb_uv.c
1 /* 1 /*
2 * SGI UltraViolet TLB flush routines. 2 * SGI UltraViolet TLB flush routines.
3 * 3 *
4 * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI. 4 * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI.
5 * 5 *
6 * This code is released under the GNU General Public License version 2 or 6 * This code is released under the GNU General Public License version 2 or
7 * later. 7 * later.
8 */ 8 */
9 #include <linux/seq_file.h> 9 #include <linux/seq_file.h>
10 #include <linux/proc_fs.h> 10 #include <linux/proc_fs.h>
11 #include <linux/kernel.h> 11 #include <linux/kernel.h>
12 12
13 #include <asm/mmu_context.h> 13 #include <asm/mmu_context.h>
14 #include <asm/uv/uv.h> 14 #include <asm/uv/uv.h>
15 #include <asm/uv/uv_mmrs.h> 15 #include <asm/uv/uv_mmrs.h>
16 #include <asm/uv/uv_hub.h> 16 #include <asm/uv/uv_hub.h>
17 #include <asm/uv/uv_bau.h> 17 #include <asm/uv/uv_bau.h>
18 #include <asm/apic.h> 18 #include <asm/apic.h>
19 #include <asm/idle.h> 19 #include <asm/idle.h>
20 #include <asm/tsc.h> 20 #include <asm/tsc.h>
21 #include <asm/irq_vectors.h> 21 #include <asm/irq_vectors.h>
22 22
23 static struct bau_control **uv_bau_table_bases __read_mostly; 23 static struct bau_control **uv_bau_table_bases __read_mostly;
24 static int uv_bau_retry_limit __read_mostly; 24 static int uv_bau_retry_limit __read_mostly;
25 25
26 /* position of pnode (which is nasid>>1): */ 26 /* position of pnode (which is nasid>>1): */
27 static int uv_nshift __read_mostly; 27 static int uv_nshift __read_mostly;
28 /* base pnode in this partition */ 28 /* base pnode in this partition */
29 static int uv_partition_base_pnode __read_mostly; 29 static int uv_partition_base_pnode __read_mostly;
30 30
31 static unsigned long uv_mmask __read_mostly; 31 static unsigned long uv_mmask __read_mostly;
32 32
33 static DEFINE_PER_CPU(struct ptc_stats, ptcstats); 33 static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
34 static DEFINE_PER_CPU(struct bau_control, bau_control); 34 static DEFINE_PER_CPU(struct bau_control, bau_control);
35 35
36 /* 36 /*
37 * Determine the first node on a blade. 37 * Determine the first node on a blade.
38 */ 38 */
39 static int __init blade_to_first_node(int blade) 39 static int __init blade_to_first_node(int blade)
40 { 40 {
41 int node, b; 41 int node, b;
42 42
43 for_each_online_node(node) { 43 for_each_online_node(node) {
44 b = uv_node_to_blade_id(node); 44 b = uv_node_to_blade_id(node);
45 if (blade == b) 45 if (blade == b)
46 return node; 46 return node;
47 } 47 }
48 return -1; /* shouldn't happen */ 48 return -1; /* shouldn't happen */
49 } 49 }
50 50
51 /* 51 /*
52 * Determine the apicid of the first cpu on a blade. 52 * Determine the apicid of the first cpu on a blade.
53 */ 53 */
54 static int __init blade_to_first_apicid(int blade) 54 static int __init blade_to_first_apicid(int blade)
55 { 55 {
56 int cpu; 56 int cpu;
57 57
58 for_each_present_cpu(cpu) 58 for_each_present_cpu(cpu)
59 if (blade == uv_cpu_to_blade_id(cpu)) 59 if (blade == uv_cpu_to_blade_id(cpu))
60 return per_cpu(x86_cpu_to_apicid, cpu); 60 return per_cpu(x86_cpu_to_apicid, cpu);
61 return -1; 61 return -1;
62 } 62 }
63 63
64 /* 64 /*
65 * Free a software acknowledge hardware resource by clearing its Pending 65 * Free a software acknowledge hardware resource by clearing its Pending
66 * bit. This will return a reply to the sender. 66 * bit. This will return a reply to the sender.
67 * If the message has timed out, a reply has already been sent by the 67 * If the message has timed out, a reply has already been sent by the
68 * hardware but the resource has not been released. In that case our 68 * hardware but the resource has not been released. In that case our
69 * clear of the Timeout bit (as well) will free the resource. No reply will 69 * clear of the Timeout bit (as well) will free the resource. No reply will
70 * be sent (the hardware will only do one reply per message). 70 * be sent (the hardware will only do one reply per message).
71 */ 71 */
72 static void uv_reply_to_message(int resource, 72 static void uv_reply_to_message(int resource,
73 struct bau_payload_queue_entry *msg, 73 struct bau_payload_queue_entry *msg,
74 struct bau_msg_status *msp) 74 struct bau_msg_status *msp)
75 { 75 {
76 unsigned long dw; 76 unsigned long dw;
77 77
78 dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); 78 dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
79 msg->replied_to = 1; 79 msg->replied_to = 1;
80 msg->sw_ack_vector = 0; 80 msg->sw_ack_vector = 0;
81 if (msp) 81 if (msp)
82 msp->seen_by.bits = 0; 82 msp->seen_by.bits = 0;
83 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); 83 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
84 } 84 }
85 85
86 /* 86 /*
87 * Do all the things a cpu should do for a TLB shootdown message. 87 * Do all the things a cpu should do for a TLB shootdown message.
88 * Other cpu's may come here at the same time for this message. 88 * Other cpu's may come here at the same time for this message.
89 */ 89 */
90 static void uv_bau_process_message(struct bau_payload_queue_entry *msg, 90 static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
91 int msg_slot, int sw_ack_slot) 91 int msg_slot, int sw_ack_slot)
92 { 92 {
93 unsigned long this_cpu_mask; 93 unsigned long this_cpu_mask;
94 struct bau_msg_status *msp; 94 struct bau_msg_status *msp;
95 int cpu; 95 int cpu;
96 96
97 msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; 97 msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
98 cpu = uv_blade_processor_id(); 98 cpu = uv_blade_processor_id();
99 msg->number_of_cpus = 99 msg->number_of_cpus =
100 uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); 100 uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
101 this_cpu_mask = 1UL << cpu; 101 this_cpu_mask = 1UL << cpu;
102 if (msp->seen_by.bits & this_cpu_mask) 102 if (msp->seen_by.bits & this_cpu_mask)
103 return; 103 return;
104 atomic_or_long(&msp->seen_by.bits, this_cpu_mask); 104 atomic_or_long(&msp->seen_by.bits, this_cpu_mask);
105 105
106 if (msg->replied_to == 1) 106 if (msg->replied_to == 1)
107 return; 107 return;
108 108
109 if (msg->address == TLB_FLUSH_ALL) { 109 if (msg->address == TLB_FLUSH_ALL) {
110 local_flush_tlb(); 110 local_flush_tlb();
111 __get_cpu_var(ptcstats).alltlb++; 111 __get_cpu_var(ptcstats).alltlb++;
112 } else { 112 } else {
113 __flush_tlb_one(msg->address); 113 __flush_tlb_one(msg->address);
114 __get_cpu_var(ptcstats).onetlb++; 114 __get_cpu_var(ptcstats).onetlb++;
115 } 115 }
116 116
117 __get_cpu_var(ptcstats).requestee++; 117 __get_cpu_var(ptcstats).requestee++;
118 118
119 atomic_inc_short(&msg->acknowledge_count); 119 atomic_inc_short(&msg->acknowledge_count);
120 if (msg->number_of_cpus == msg->acknowledge_count) 120 if (msg->number_of_cpus == msg->acknowledge_count)
121 uv_reply_to_message(sw_ack_slot, msg, msp); 121 uv_reply_to_message(sw_ack_slot, msg, msp);
122 } 122 }
123 123
124 /* 124 /*
125 * Examine the payload queue on one distribution node to see 125 * Examine the payload queue on one distribution node to see
126 * which messages have not been seen, and which cpu(s) have not seen them. 126 * which messages have not been seen, and which cpu(s) have not seen them.
127 * 127 *
128 * Returns the number of cpu's that have not responded. 128 * Returns the number of cpu's that have not responded.
129 */ 129 */
130 static int uv_examine_destination(struct bau_control *bau_tablesp, int sender) 130 static int uv_examine_destination(struct bau_control *bau_tablesp, int sender)
131 { 131 {
132 struct bau_payload_queue_entry *msg; 132 struct bau_payload_queue_entry *msg;
133 struct bau_msg_status *msp; 133 struct bau_msg_status *msp;
134 int count = 0; 134 int count = 0;
135 int i; 135 int i;
136 int j; 136 int j;
137 137
138 for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE; 138 for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE;
139 msg++, i++) { 139 msg++, i++) {
140 if ((msg->sending_cpu == sender) && (!msg->replied_to)) { 140 if ((msg->sending_cpu == sender) && (!msg->replied_to)) {
141 msp = bau_tablesp->msg_statuses + i; 141 msp = bau_tablesp->msg_statuses + i;
142 printk(KERN_DEBUG 142 printk(KERN_DEBUG
143 "blade %d: address:%#lx %d of %d, not cpu(s): ", 143 "blade %d: address:%#lx %d of %d, not cpu(s): ",
144 i, msg->address, msg->acknowledge_count, 144 i, msg->address, msg->acknowledge_count,
145 msg->number_of_cpus); 145 msg->number_of_cpus);
146 for (j = 0; j < msg->number_of_cpus; j++) { 146 for (j = 0; j < msg->number_of_cpus; j++) {
147 if (!((1L << j) & msp->seen_by.bits)) { 147 if (!((1L << j) & msp->seen_by.bits)) {
148 count++; 148 count++;
149 printk("%d ", j); 149 printk("%d ", j);
150 } 150 }
151 } 151 }
152 printk("\n"); 152 printk("\n");
153 } 153 }
154 } 154 }
155 return count; 155 return count;
156 } 156 }
157 157
158 /* 158 /*
159 * Examine the payload queue on all the distribution nodes to see 159 * Examine the payload queue on all the distribution nodes to see
160 * which messages have not been seen, and which cpu(s) have not seen them. 160 * which messages have not been seen, and which cpu(s) have not seen them.
161 * 161 *
162 * Returns the number of cpu's that have not responded. 162 * Returns the number of cpu's that have not responded.
163 */ 163 */
164 static int uv_examine_destinations(struct bau_target_nodemask *distribution) 164 static int uv_examine_destinations(struct bau_target_nodemask *distribution)
165 { 165 {
166 int sender; 166 int sender;
167 int i; 167 int i;
168 int count = 0; 168 int count = 0;
169 169
170 sender = smp_processor_id(); 170 sender = smp_processor_id();
171 for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) { 171 for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) {
172 if (!bau_node_isset(i, distribution)) 172 if (!bau_node_isset(i, distribution))
173 continue; 173 continue;
174 count += uv_examine_destination(uv_bau_table_bases[i], sender); 174 count += uv_examine_destination(uv_bau_table_bases[i], sender);
175 } 175 }
176 return count; 176 return count;
177 } 177 }
178 178
179 /* 179 /*
180 * wait for completion of a broadcast message 180 * wait for completion of a broadcast message
181 * 181 *
182 * return COMPLETE, RETRY or GIVEUP 182 * return COMPLETE, RETRY or GIVEUP
183 */ 183 */
184 static int uv_wait_completion(struct bau_desc *bau_desc, 184 static int uv_wait_completion(struct bau_desc *bau_desc,
185 unsigned long mmr_offset, int right_shift) 185 unsigned long mmr_offset, int right_shift)
186 { 186 {
187 int exams = 0; 187 int exams = 0;
188 long destination_timeouts = 0; 188 long destination_timeouts = 0;
189 long source_timeouts = 0; 189 long source_timeouts = 0;
190 unsigned long descriptor_status; 190 unsigned long descriptor_status;
191 191
192 while ((descriptor_status = (((unsigned long) 192 while ((descriptor_status = (((unsigned long)
193 uv_read_local_mmr(mmr_offset) >> 193 uv_read_local_mmr(mmr_offset) >>
194 right_shift) & UV_ACT_STATUS_MASK)) != 194 right_shift) & UV_ACT_STATUS_MASK)) !=
195 DESC_STATUS_IDLE) { 195 DESC_STATUS_IDLE) {
196 if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { 196 if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
197 source_timeouts++; 197 source_timeouts++;
198 if (source_timeouts > SOURCE_TIMEOUT_LIMIT) 198 if (source_timeouts > SOURCE_TIMEOUT_LIMIT)
199 source_timeouts = 0; 199 source_timeouts = 0;
200 __get_cpu_var(ptcstats).s_retry++; 200 __get_cpu_var(ptcstats).s_retry++;
201 return FLUSH_RETRY; 201 return FLUSH_RETRY;
202 } 202 }
203 /* 203 /*
204 * spin here looking for progress at the destinations 204 * spin here looking for progress at the destinations
205 */ 205 */
206 if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) { 206 if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) {
207 destination_timeouts++; 207 destination_timeouts++;
208 if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) { 208 if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) {
209 /* 209 /*
210 * returns number of cpus not responding 210 * returns number of cpus not responding
211 */ 211 */
212 if (uv_examine_destinations 212 if (uv_examine_destinations
213 (&bau_desc->distribution) == 0) { 213 (&bau_desc->distribution) == 0) {
214 __get_cpu_var(ptcstats).d_retry++; 214 __get_cpu_var(ptcstats).d_retry++;
215 return FLUSH_RETRY; 215 return FLUSH_RETRY;
216 } 216 }
217 exams++; 217 exams++;
218 if (exams >= uv_bau_retry_limit) { 218 if (exams >= uv_bau_retry_limit) {
219 printk(KERN_DEBUG 219 printk(KERN_DEBUG
220 "uv_flush_tlb_others"); 220 "uv_flush_tlb_others");
221 printk("giving up on cpu %d\n", 221 printk("giving up on cpu %d\n",
222 smp_processor_id()); 222 smp_processor_id());
223 return FLUSH_GIVEUP; 223 return FLUSH_GIVEUP;
224 } 224 }
225 /* 225 /*
226 * delays can hang the simulator 226 * delays can hang the simulator
227 udelay(1000); 227 udelay(1000);
228 */ 228 */
229 destination_timeouts = 0; 229 destination_timeouts = 0;
230 } 230 }
231 } 231 }
232 cpu_relax(); 232 cpu_relax();
233 } 233 }
234 return FLUSH_COMPLETE; 234 return FLUSH_COMPLETE;
235 } 235 }
236 236
237 /** 237 /**
238 * uv_flush_send_and_wait 238 * uv_flush_send_and_wait
239 * 239 *
240 * Send a broadcast and wait for a broadcast message to complete. 240 * Send a broadcast and wait for a broadcast message to complete.
241 * 241 *
242 * The flush_mask contains the cpus the broadcast was sent to. 242 * The flush_mask contains the cpus the broadcast was sent to.
243 * 243 *
244 * Returns NULL if all remote flushing was done. The mask is zeroed. 244 * Returns NULL if all remote flushing was done. The mask is zeroed.
245 * Returns @flush_mask if some remote flushing remains to be done. The 245 * Returns @flush_mask if some remote flushing remains to be done. The
246 * mask will have some bits still set. 246 * mask will have some bits still set.
247 */ 247 */
248 const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, 248 const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,
249 struct bau_desc *bau_desc, 249 struct bau_desc *bau_desc,
250 struct cpumask *flush_mask) 250 struct cpumask *flush_mask)
251 { 251 {
252 int completion_status = 0; 252 int completion_status = 0;
253 int right_shift; 253 int right_shift;
254 int tries = 0; 254 int tries = 0;
255 int pnode; 255 int pnode;
256 int bit; 256 int bit;
257 unsigned long mmr_offset; 257 unsigned long mmr_offset;
258 unsigned long index; 258 unsigned long index;
259 cycles_t time1; 259 cycles_t time1;
260 cycles_t time2; 260 cycles_t time2;
261 261
262 if (cpu < UV_CPUS_PER_ACT_STATUS) { 262 if (cpu < UV_CPUS_PER_ACT_STATUS) {
263 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; 263 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
264 right_shift = cpu * UV_ACT_STATUS_SIZE; 264 right_shift = cpu * UV_ACT_STATUS_SIZE;
265 } else { 265 } else {
266 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; 266 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
267 right_shift = 267 right_shift =
268 ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); 268 ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
269 } 269 }
270 time1 = get_cycles(); 270 time1 = get_cycles();
271 do { 271 do {
272 tries++; 272 tries++;
273 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | 273 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
274 cpu; 274 cpu;
275 uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); 275 uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
276 completion_status = uv_wait_completion(bau_desc, mmr_offset, 276 completion_status = uv_wait_completion(bau_desc, mmr_offset,
277 right_shift); 277 right_shift);
278 } while (completion_status == FLUSH_RETRY); 278 } while (completion_status == FLUSH_RETRY);
279 time2 = get_cycles(); 279 time2 = get_cycles();
280 __get_cpu_var(ptcstats).sflush += (time2 - time1); 280 __get_cpu_var(ptcstats).sflush += (time2 - time1);
281 if (tries > 1) 281 if (tries > 1)
282 __get_cpu_var(ptcstats).retriesok++; 282 __get_cpu_var(ptcstats).retriesok++;
283 283
284 if (completion_status == FLUSH_GIVEUP) { 284 if (completion_status == FLUSH_GIVEUP) {
285 /* 285 /*
286 * Cause the caller to do an IPI-style TLB shootdown on 286 * Cause the caller to do an IPI-style TLB shootdown on
287 * the cpu's, all of which are still in the mask. 287 * the cpu's, all of which are still in the mask.
288 */ 288 */
289 __get_cpu_var(ptcstats).ptc_i++; 289 __get_cpu_var(ptcstats).ptc_i++;
290 return flush_mask; 290 return flush_mask;
291 } 291 }
292 292
293 /* 293 /*
294 * Success, so clear the remote cpu's from the mask so we don't 294 * Success, so clear the remote cpu's from the mask so we don't
295 * use the IPI method of shootdown on them. 295 * use the IPI method of shootdown on them.
296 */ 296 */
297 for_each_cpu(bit, flush_mask) { 297 for_each_cpu(bit, flush_mask) {
298 pnode = uv_cpu_to_pnode(bit); 298 pnode = uv_cpu_to_pnode(bit);
299 if (pnode == this_pnode) 299 if (pnode == this_pnode)
300 continue; 300 continue;
301 cpumask_clear_cpu(bit, flush_mask); 301 cpumask_clear_cpu(bit, flush_mask);
302 } 302 }
303 if (!cpumask_empty(flush_mask)) 303 if (!cpumask_empty(flush_mask))
304 return flush_mask; 304 return flush_mask;
305 return NULL; 305 return NULL;
306 } 306 }
307 307
308 static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); 308 static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
309 309
310 /** 310 /**
311 * uv_flush_tlb_others - globally purge translation cache of a virtual 311 * uv_flush_tlb_others - globally purge translation cache of a virtual
312 * address or all TLB's 312 * address or all TLB's
313 * @cpumask: mask of all cpu's in which the address is to be removed 313 * @cpumask: mask of all cpu's in which the address is to be removed
314 * @mm: mm_struct containing virtual address range 314 * @mm: mm_struct containing virtual address range
315 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 315 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
316 * @cpu: the current cpu 316 * @cpu: the current cpu
317 * 317 *
318 * This is the entry point for initiating any UV global TLB shootdown. 318 * This is the entry point for initiating any UV global TLB shootdown.
319 * 319 *
320 * Purges the translation caches of all specified processors of the given 320 * Purges the translation caches of all specified processors of the given
321 * virtual address, or purges all TLB's on specified processors. 321 * virtual address, or purges all TLB's on specified processors.
322 * 322 *
323 * The caller has derived the cpumask from the mm_struct. This function 323 * The caller has derived the cpumask from the mm_struct. This function
324 * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) 324 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
325 * 325 *
326 * The cpumask is converted into a nodemask of the nodes containing 326 * The cpumask is converted into a nodemask of the nodes containing
327 * the cpus. 327 * the cpus.
328 * 328 *
329 * Note that this function should be called with preemption disabled. 329 * Note that this function should be called with preemption disabled.
330 * 330 *
331 * Returns NULL if all remote flushing was done. 331 * Returns NULL if all remote flushing was done.
332 * Returns pointer to cpumask if some remote flushing remains to be 332 * Returns pointer to cpumask if some remote flushing remains to be
333 * done. The returned pointer is valid till preemption is re-enabled. 333 * done. The returned pointer is valid till preemption is re-enabled.
334 */ 334 */
335 const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 335 const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
336 struct mm_struct *mm, 336 struct mm_struct *mm,
337 unsigned long va, unsigned int cpu) 337 unsigned long va, unsigned int cpu)
338 { 338 {
339 struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); 339 struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
340 int i; 340 int i;
341 int bit; 341 int bit;
342 int pnode; 342 int pnode;
343 int uv_cpu; 343 int uv_cpu;
344 int this_pnode; 344 int this_pnode;
345 int locals = 0; 345 int locals = 0;
346 struct bau_desc *bau_desc; 346 struct bau_desc *bau_desc;
347 347
348 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); 348 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
349 349
350 uv_cpu = uv_blade_processor_id(); 350 uv_cpu = uv_blade_processor_id();
351 this_pnode = uv_hub_info->pnode; 351 this_pnode = uv_hub_info->pnode;
352 bau_desc = __get_cpu_var(bau_control).descriptor_base; 352 bau_desc = __get_cpu_var(bau_control).descriptor_base;
353 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; 353 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
354 354
355 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 355 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
356 356
357 i = 0; 357 i = 0;
358 for_each_cpu(bit, flush_mask) { 358 for_each_cpu(bit, flush_mask) {
359 pnode = uv_cpu_to_pnode(bit); 359 pnode = uv_cpu_to_pnode(bit);
360 BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1)); 360 BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1));
361 if (pnode == this_pnode) { 361 if (pnode == this_pnode) {
362 locals++; 362 locals++;
363 continue; 363 continue;
364 } 364 }
365 bau_node_set(pnode - uv_partition_base_pnode, 365 bau_node_set(pnode - uv_partition_base_pnode,
366 &bau_desc->distribution); 366 &bau_desc->distribution);
367 i++; 367 i++;
368 } 368 }
369 if (i == 0) { 369 if (i == 0) {
370 /* 370 /*
371 * no off_node flushing; return status for local node 371 * no off_node flushing; return status for local node
372 */ 372 */
373 if (locals) 373 if (locals)
374 return flush_mask; 374 return flush_mask;
375 else 375 else
376 return NULL; 376 return NULL;
377 } 377 }
378 __get_cpu_var(ptcstats).requestor++; 378 __get_cpu_var(ptcstats).requestor++;
379 __get_cpu_var(ptcstats).ntargeted += i; 379 __get_cpu_var(ptcstats).ntargeted += i;
380 380
381 bau_desc->payload.address = va; 381 bau_desc->payload.address = va;
382 bau_desc->payload.sending_cpu = cpu; 382 bau_desc->payload.sending_cpu = cpu;
383 383
384 return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask); 384 return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);
385 } 385 }
386 386
387 /* 387 /*
388 * The BAU message interrupt comes here. (registered by set_intr_gate) 388 * The BAU message interrupt comes here. (registered by set_intr_gate)
389 * See entry_64.S 389 * See entry_64.S
390 * 390 *
391 * We received a broadcast assist message. 391 * We received a broadcast assist message.
392 * 392 *
393 * Interrupts may have been disabled; this interrupt could represent 393 * Interrupts may have been disabled; this interrupt could represent
394 * the receipt of several messages. 394 * the receipt of several messages.
395 * 395 *
396 * All cores/threads on this node get this interrupt. 396 * All cores/threads on this node get this interrupt.
397 * The last one to see it does the s/w ack. 397 * The last one to see it does the s/w ack.
398 * (the resource will not be freed until noninterruptable cpus see this 398 * (the resource will not be freed until noninterruptable cpus see this
399 * interrupt; hardware will timeout the s/w ack and reply ERROR) 399 * interrupt; hardware will timeout the s/w ack and reply ERROR)
400 */ 400 */
401 void uv_bau_message_interrupt(struct pt_regs *regs) 401 void uv_bau_message_interrupt(struct pt_regs *regs)
402 { 402 {
403 struct bau_payload_queue_entry *va_queue_first; 403 struct bau_payload_queue_entry *va_queue_first;
404 struct bau_payload_queue_entry *va_queue_last; 404 struct bau_payload_queue_entry *va_queue_last;
405 struct bau_payload_queue_entry *msg; 405 struct bau_payload_queue_entry *msg;
406 struct pt_regs *old_regs = set_irq_regs(regs); 406 struct pt_regs *old_regs = set_irq_regs(regs);
407 cycles_t time1; 407 cycles_t time1;
408 cycles_t time2; 408 cycles_t time2;
409 int msg_slot; 409 int msg_slot;
410 int sw_ack_slot; 410 int sw_ack_slot;
411 int fw; 411 int fw;
412 int count = 0; 412 int count = 0;
413 unsigned long local_pnode; 413 unsigned long local_pnode;
414 414
415 ack_APIC_irq(); 415 ack_APIC_irq();
416 exit_idle(); 416 exit_idle();
417 irq_enter(); 417 irq_enter();
418 418
419 time1 = get_cycles(); 419 time1 = get_cycles();
420 420
421 local_pnode = uv_blade_to_pnode(uv_numa_blade_id()); 421 local_pnode = uv_blade_to_pnode(uv_numa_blade_id());
422 422
423 va_queue_first = __get_cpu_var(bau_control).va_queue_first; 423 va_queue_first = __get_cpu_var(bau_control).va_queue_first;
424 va_queue_last = __get_cpu_var(bau_control).va_queue_last; 424 va_queue_last = __get_cpu_var(bau_control).va_queue_last;
425 425
426 msg = __get_cpu_var(bau_control).bau_msg_head; 426 msg = __get_cpu_var(bau_control).bau_msg_head;
427 while (msg->sw_ack_vector) { 427 while (msg->sw_ack_vector) {
428 count++; 428 count++;
429 fw = msg->sw_ack_vector; 429 fw = msg->sw_ack_vector;
430 msg_slot = msg - va_queue_first; 430 msg_slot = msg - va_queue_first;
431 sw_ack_slot = ffs(fw) - 1; 431 sw_ack_slot = ffs(fw) - 1;
432 432
433 uv_bau_process_message(msg, msg_slot, sw_ack_slot); 433 uv_bau_process_message(msg, msg_slot, sw_ack_slot);
434 434
435 msg++; 435 msg++;
436 if (msg > va_queue_last) 436 if (msg > va_queue_last)
437 msg = va_queue_first; 437 msg = va_queue_first;
438 __get_cpu_var(bau_control).bau_msg_head = msg; 438 __get_cpu_var(bau_control).bau_msg_head = msg;
439 } 439 }
440 if (!count) 440 if (!count)
441 __get_cpu_var(ptcstats).nomsg++; 441 __get_cpu_var(ptcstats).nomsg++;
442 else if (count > 1) 442 else if (count > 1)
443 __get_cpu_var(ptcstats).multmsg++; 443 __get_cpu_var(ptcstats).multmsg++;
444 444
445 time2 = get_cycles(); 445 time2 = get_cycles();
446 __get_cpu_var(ptcstats).dflush += (time2 - time1); 446 __get_cpu_var(ptcstats).dflush += (time2 - time1);
447 447
448 irq_exit(); 448 irq_exit();
449 set_irq_regs(old_regs); 449 set_irq_regs(old_regs);
450 } 450 }
451 451
452 /* 452 /*
453 * uv_enable_timeouts 453 * uv_enable_timeouts
454 * 454 *
455 * Each target blade (i.e. blades that have cpu's) needs to have 455 * Each target blade (i.e. blades that have cpu's) needs to have
456 * shootdown message timeouts enabled. The timeout does not cause 456 * shootdown message timeouts enabled. The timeout does not cause
457 * an interrupt, but causes an error message to be returned to 457 * an interrupt, but causes an error message to be returned to
458 * the sender. 458 * the sender.
459 */ 459 */
460 static void uv_enable_timeouts(void) 460 static void uv_enable_timeouts(void)
461 { 461 {
462 int blade; 462 int blade;
463 int nblades; 463 int nblades;
464 int pnode; 464 int pnode;
465 unsigned long mmr_image; 465 unsigned long mmr_image;
466 466
467 nblades = uv_num_possible_blades(); 467 nblades = uv_num_possible_blades();
468 468
469 for (blade = 0; blade < nblades; blade++) { 469 for (blade = 0; blade < nblades; blade++) {
470 if (!uv_blade_nr_possible_cpus(blade)) 470 if (!uv_blade_nr_possible_cpus(blade))
471 continue; 471 continue;
472 472
473 pnode = uv_blade_to_pnode(blade); 473 pnode = uv_blade_to_pnode(blade);
474 mmr_image = 474 mmr_image =
475 uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); 475 uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
476 /* 476 /*
477 * Set the timeout period and then lock it in, in three 477 * Set the timeout period and then lock it in, in three
478 * steps; captures and locks in the period. 478 * steps; captures and locks in the period.
479 * 479 *
480 * To program the period, the SOFT_ACK_MODE must be off. 480 * To program the period, the SOFT_ACK_MODE must be off.
481 */ 481 */
482 mmr_image &= ~((unsigned long)1 << 482 mmr_image &= ~((unsigned long)1 <<
483 UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); 483 UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
484 uv_write_global_mmr64 484 uv_write_global_mmr64
485 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); 485 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
486 /* 486 /*
487 * Set the 4-bit period. 487 * Set the 4-bit period.
488 */ 488 */
489 mmr_image &= ~((unsigned long)0xf << 489 mmr_image &= ~((unsigned long)0xf <<
490 UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); 490 UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
491 mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << 491 mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
492 UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); 492 UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
493 uv_write_global_mmr64 493 uv_write_global_mmr64
494 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); 494 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
495 /* 495 /*
496 * Subsequent reversals of the timebase bit (3) cause an 496 * Subsequent reversals of the timebase bit (3) cause an
497 * immediate timeout of one or all INTD resources as 497 * immediate timeout of one or all INTD resources as
498 * indicated in bits 2:0 (7 causes all of them to timeout). 498 * indicated in bits 2:0 (7 causes all of them to timeout).
499 */ 499 */
500 mmr_image |= ((unsigned long)1 << 500 mmr_image |= ((unsigned long)1 <<
501 UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); 501 UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
502 uv_write_global_mmr64 502 uv_write_global_mmr64
503 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); 503 (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
504 } 504 }
505 } 505 }
506 506
507 static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) 507 static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
508 { 508 {
509 if (*offset < num_possible_cpus()) 509 if (*offset < num_possible_cpus())
510 return offset; 510 return offset;
511 return NULL; 511 return NULL;
512 } 512 }
513 513
514 static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) 514 static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
515 { 515 {
516 (*offset)++; 516 (*offset)++;
517 if (*offset < num_possible_cpus()) 517 if (*offset < num_possible_cpus())
518 return offset; 518 return offset;
519 return NULL; 519 return NULL;
520 } 520 }
521 521
522 static void uv_ptc_seq_stop(struct seq_file *file, void *data) 522 static void uv_ptc_seq_stop(struct seq_file *file, void *data)
523 { 523 {
524 } 524 }
525 525
526 /* 526 /*
527 * Display the statistics thru /proc 527 * Display the statistics thru /proc
528 * data points to the cpu number 528 * data points to the cpu number
529 */ 529 */
530 static int uv_ptc_seq_show(struct seq_file *file, void *data) 530 static int uv_ptc_seq_show(struct seq_file *file, void *data)
531 { 531 {
532 struct ptc_stats *stat; 532 struct ptc_stats *stat;
533 int cpu; 533 int cpu;
534 534
535 cpu = *(loff_t *)data; 535 cpu = *(loff_t *)data;
536 536
537 if (!cpu) { 537 if (!cpu) {
538 seq_printf(file, 538 seq_printf(file,
539 "# cpu requestor requestee one all sretry dretry ptc_i "); 539 "# cpu requestor requestee one all sretry dretry ptc_i ");
540 seq_printf(file, 540 seq_printf(file,
541 "sw_ack sflush dflush sok dnomsg dmult starget\n"); 541 "sw_ack sflush dflush sok dnomsg dmult starget\n");
542 } 542 }
543 if (cpu < num_possible_cpus() && cpu_online(cpu)) { 543 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
544 stat = &per_cpu(ptcstats, cpu); 544 stat = &per_cpu(ptcstats, cpu);
545 seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ", 545 seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ",
546 cpu, stat->requestor, 546 cpu, stat->requestor,
547 stat->requestee, stat->onetlb, stat->alltlb, 547 stat->requestee, stat->onetlb, stat->alltlb,
548 stat->s_retry, stat->d_retry, stat->ptc_i); 548 stat->s_retry, stat->d_retry, stat->ptc_i);
549 seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", 549 seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
550 uv_read_global_mmr64(uv_cpu_to_pnode(cpu), 550 uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
551 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), 551 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
552 stat->sflush, stat->dflush, 552 stat->sflush, stat->dflush,
553 stat->retriesok, stat->nomsg, 553 stat->retriesok, stat->nomsg,
554 stat->multmsg, stat->ntargeted); 554 stat->multmsg, stat->ntargeted);
555 } 555 }
556 556
557 return 0; 557 return 0;
558 } 558 }
559 559
560 /* 560 /*
561 * 0: display meaning of the statistics 561 * 0: display meaning of the statistics
562 * >0: retry limit 562 * >0: retry limit
563 */ 563 */
564 static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, 564 static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
565 size_t count, loff_t *data) 565 size_t count, loff_t *data)
566 { 566 {
567 long newmode; 567 long newmode;
568 char optstr[64]; 568 char optstr[64];
569 569
570 if (count == 0 || count > sizeof(optstr)) 570 if (count == 0 || count > sizeof(optstr))
571 return -EINVAL; 571 return -EINVAL;
572 if (copy_from_user(optstr, user, count)) 572 if (copy_from_user(optstr, user, count))
573 return -EFAULT; 573 return -EFAULT;
574 optstr[count - 1] = '\0'; 574 optstr[count - 1] = '\0';
575 if (strict_strtoul(optstr, 10, &newmode) < 0) { 575 if (strict_strtoul(optstr, 10, &newmode) < 0) {
576 printk(KERN_DEBUG "%s is invalid\n", optstr); 576 printk(KERN_DEBUG "%s is invalid\n", optstr);
577 return -EINVAL; 577 return -EINVAL;
578 } 578 }
579 579
580 if (newmode == 0) { 580 if (newmode == 0) {
581 printk(KERN_DEBUG "# cpu: cpu number\n"); 581 printk(KERN_DEBUG "# cpu: cpu number\n");
582 printk(KERN_DEBUG 582 printk(KERN_DEBUG
583 "requestor: times this cpu was the flush requestor\n"); 583 "requestor: times this cpu was the flush requestor\n");
584 printk(KERN_DEBUG 584 printk(KERN_DEBUG
585 "requestee: times this cpu was requested to flush its TLBs\n"); 585 "requestee: times this cpu was requested to flush its TLBs\n");
586 printk(KERN_DEBUG 586 printk(KERN_DEBUG
587 "one: times requested to flush a single address\n"); 587 "one: times requested to flush a single address\n");
588 printk(KERN_DEBUG 588 printk(KERN_DEBUG
589 "all: times requested to flush all TLB's\n"); 589 "all: times requested to flush all TLB's\n");
590 printk(KERN_DEBUG 590 printk(KERN_DEBUG
591 "sretry: number of retries of source-side timeouts\n"); 591 "sretry: number of retries of source-side timeouts\n");
592 printk(KERN_DEBUG 592 printk(KERN_DEBUG
593 "dretry: number of retries of destination-side timeouts\n"); 593 "dretry: number of retries of destination-side timeouts\n");
594 printk(KERN_DEBUG 594 printk(KERN_DEBUG
595 "ptc_i: times UV fell through to IPI-style flushes\n"); 595 "ptc_i: times UV fell through to IPI-style flushes\n");
596 printk(KERN_DEBUG 596 printk(KERN_DEBUG
597 "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); 597 "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
598 printk(KERN_DEBUG 598 printk(KERN_DEBUG
599 "sflush_us: cycles spent in uv_flush_tlb_others()\n"); 599 "sflush_us: cycles spent in uv_flush_tlb_others()\n");
600 printk(KERN_DEBUG 600 printk(KERN_DEBUG
601 "dflush_us: cycles spent in handling flush requests\n"); 601 "dflush_us: cycles spent in handling flush requests\n");
602 printk(KERN_DEBUG "sok: successes on retry\n"); 602 printk(KERN_DEBUG "sok: successes on retry\n");
603 printk(KERN_DEBUG "dnomsg: interrupts with no message\n"); 603 printk(KERN_DEBUG "dnomsg: interrupts with no message\n");
604 printk(KERN_DEBUG 604 printk(KERN_DEBUG
605 "dmult: interrupts with multiple messages\n"); 605 "dmult: interrupts with multiple messages\n");
606 printk(KERN_DEBUG "starget: nodes targeted\n"); 606 printk(KERN_DEBUG "starget: nodes targeted\n");
607 } else { 607 } else {
608 uv_bau_retry_limit = newmode; 608 uv_bau_retry_limit = newmode;
609 printk(KERN_DEBUG "timeout retry limit:%d\n", 609 printk(KERN_DEBUG "timeout retry limit:%d\n",
610 uv_bau_retry_limit); 610 uv_bau_retry_limit);
611 } 611 }
612 612
613 return count; 613 return count;
614 } 614 }
615 615
616 static const struct seq_operations uv_ptc_seq_ops = { 616 static const struct seq_operations uv_ptc_seq_ops = {
617 .start = uv_ptc_seq_start, 617 .start = uv_ptc_seq_start,
618 .next = uv_ptc_seq_next, 618 .next = uv_ptc_seq_next,
619 .stop = uv_ptc_seq_stop, 619 .stop = uv_ptc_seq_stop,
620 .show = uv_ptc_seq_show 620 .show = uv_ptc_seq_show
621 }; 621 };
622 622
623 static int uv_ptc_proc_open(struct inode *inode, struct file *file) 623 static int uv_ptc_proc_open(struct inode *inode, struct file *file)
624 { 624 {
625 return seq_open(file, &uv_ptc_seq_ops); 625 return seq_open(file, &uv_ptc_seq_ops);
626 } 626 }
627 627
628 static const struct file_operations proc_uv_ptc_operations = { 628 static const struct file_operations proc_uv_ptc_operations = {
629 .open = uv_ptc_proc_open, 629 .open = uv_ptc_proc_open,
630 .read = seq_read, 630 .read = seq_read,
631 .write = uv_ptc_proc_write, 631 .write = uv_ptc_proc_write,
632 .llseek = seq_lseek, 632 .llseek = seq_lseek,
633 .release = seq_release, 633 .release = seq_release,
634 }; 634 };
635 635
636 static int __init uv_ptc_init(void) 636 static int __init uv_ptc_init(void)
637 { 637 {
638 struct proc_dir_entry *proc_uv_ptc; 638 struct proc_dir_entry *proc_uv_ptc;
639 639
640 if (!is_uv_system()) 640 if (!is_uv_system())
641 return 0; 641 return 0;
642 642
643 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); 643 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
644 if (!proc_uv_ptc) { 644 if (!proc_uv_ptc) {
645 printk(KERN_ERR "unable to create %s proc entry\n", 645 printk(KERN_ERR "unable to create %s proc entry\n",
646 UV_PTC_BASENAME); 646 UV_PTC_BASENAME);
647 return -EINVAL; 647 return -EINVAL;
648 } 648 }
649 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; 649 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
650 return 0; 650 return 0;
651 } 651 }
652 652
653 /* 653 /*
654 * begin the initialization of the per-blade control structures 654 * begin the initialization of the per-blade control structures
655 */ 655 */
656 static struct bau_control * __init uv_table_bases_init(int blade, int node) 656 static struct bau_control * __init uv_table_bases_init(int blade, int node)
657 { 657 {
658 int i; 658 int i;
659 struct bau_msg_status *msp; 659 struct bau_msg_status *msp;
660 struct bau_control *bau_tabp; 660 struct bau_control *bau_tabp;
661 661
662 bau_tabp = 662 bau_tabp =
663 kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node); 663 kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node);
664 BUG_ON(!bau_tabp); 664 BUG_ON(!bau_tabp);
665 665
666 bau_tabp->msg_statuses = 666 bau_tabp->msg_statuses =
667 kmalloc_node(sizeof(struct bau_msg_status) * 667 kmalloc_node(sizeof(struct bau_msg_status) *
668 DEST_Q_SIZE, GFP_KERNEL, node); 668 DEST_Q_SIZE, GFP_KERNEL, node);
669 BUG_ON(!bau_tabp->msg_statuses); 669 BUG_ON(!bau_tabp->msg_statuses);
670 670
671 for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++) 671 for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++)
672 bau_cpubits_clear(&msp->seen_by, (int) 672 bau_cpubits_clear(&msp->seen_by, (int)
673 uv_blade_nr_possible_cpus(blade)); 673 uv_blade_nr_possible_cpus(blade));
674 674
675 uv_bau_table_bases[blade] = bau_tabp; 675 uv_bau_table_bases[blade] = bau_tabp;
676 676
677 return bau_tabp; 677 return bau_tabp;
678 } 678 }
679 679
680 /* 680 /*
681 * finish the initialization of the per-blade control structures 681 * finish the initialization of the per-blade control structures
682 */ 682 */
683 static void __init 683 static void __init
684 uv_table_bases_finish(int blade, 684 uv_table_bases_finish(int blade,
685 struct bau_control *bau_tablesp, 685 struct bau_control *bau_tablesp,
686 struct bau_desc *adp) 686 struct bau_desc *adp)
687 { 687 {
688 struct bau_control *bcp; 688 struct bau_control *bcp;
689 int cpu; 689 int cpu;
690 690
691 for_each_present_cpu(cpu) { 691 for_each_present_cpu(cpu) {
692 if (blade != uv_cpu_to_blade_id(cpu)) 692 if (blade != uv_cpu_to_blade_id(cpu))
693 continue; 693 continue;
694 694
695 bcp = (struct bau_control *)&per_cpu(bau_control, cpu); 695 bcp = (struct bau_control *)&per_cpu(bau_control, cpu);
696 bcp->bau_msg_head = bau_tablesp->va_queue_first; 696 bcp->bau_msg_head = bau_tablesp->va_queue_first;
697 bcp->va_queue_first = bau_tablesp->va_queue_first; 697 bcp->va_queue_first = bau_tablesp->va_queue_first;
698 bcp->va_queue_last = bau_tablesp->va_queue_last; 698 bcp->va_queue_last = bau_tablesp->va_queue_last;
699 bcp->msg_statuses = bau_tablesp->msg_statuses; 699 bcp->msg_statuses = bau_tablesp->msg_statuses;
700 bcp->descriptor_base = adp; 700 bcp->descriptor_base = adp;
701 } 701 }
702 } 702 }
703 703
704 /* 704 /*
705 * initialize the sending side's sending buffers 705 * initialize the sending side's sending buffers
706 */ 706 */
707 static struct bau_desc * __init 707 static struct bau_desc * __init
708 uv_activation_descriptor_init(int node, int pnode) 708 uv_activation_descriptor_init(int node, int pnode)
709 { 709 {
710 int i; 710 int i;
711 unsigned long pa; 711 unsigned long pa;
712 unsigned long m; 712 unsigned long m;
713 unsigned long n; 713 unsigned long n;
714 unsigned long mmr_image; 714 unsigned long mmr_image;
715 struct bau_desc *adp; 715 struct bau_desc *adp;
716 struct bau_desc *ad2; 716 struct bau_desc *ad2;
717 717
718 adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node); 718 adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
719 BUG_ON(!adp); 719 BUG_ON(!adp);
720 720
721 pa = uv_gpa(adp); /* need the real nasid*/ 721 pa = uv_gpa(adp); /* need the real nasid*/
722 n = pa >> uv_nshift; 722 n = pa >> uv_nshift;
723 m = pa & uv_mmask; 723 m = pa & uv_mmask;
724 724
725 mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); 725 mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE);
726 if (mmr_image) { 726 if (mmr_image) {
727 uv_write_global_mmr64(pnode, (unsigned long) 727 uv_write_global_mmr64(pnode, (unsigned long)
728 UVH_LB_BAU_SB_DESCRIPTOR_BASE, 728 UVH_LB_BAU_SB_DESCRIPTOR_BASE,
729 (n << UV_DESC_BASE_PNODE_SHIFT | m)); 729 (n << UV_DESC_BASE_PNODE_SHIFT | m));
730 } 730 }
731 731
732 for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) { 732 for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
733 memset(ad2, 0, sizeof(struct bau_desc)); 733 memset(ad2, 0, sizeof(struct bau_desc));
734 ad2->header.sw_ack_flag = 1; 734 ad2->header.sw_ack_flag = 1;
735 /* 735 /*
736 * base_dest_nodeid is the first node in the partition, so 736 * base_dest_nodeid is the first node in the partition, so
737 * the bit map will indicate partition-relative node numbers. 737 * the bit map will indicate partition-relative node numbers.
738 * note that base_dest_nodeid is actually a nasid. 738 * note that base_dest_nodeid is actually a nasid.
739 */ 739 */
740 ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; 740 ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
741 ad2->header.command = UV_NET_ENDPOINT_INTD; 741 ad2->header.command = UV_NET_ENDPOINT_INTD;
742 ad2->header.int_both = 1; 742 ad2->header.int_both = 1;
743 /* 743 /*
744 * all others need to be set to zero: 744 * all others need to be set to zero:
745 * fairness chaining multilevel count replied_to 745 * fairness chaining multilevel count replied_to
746 */ 746 */
747 } 747 }
748 return adp; 748 return adp;
749 } 749 }
750 750
751 /* 751 /*
752 * initialize the destination side's receiving buffers 752 * initialize the destination side's receiving buffers
753 */ 753 */
754 static struct bau_payload_queue_entry * __init 754 static struct bau_payload_queue_entry * __init
755 uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) 755 uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
756 { 756 {
757 struct bau_payload_queue_entry *pqp; 757 struct bau_payload_queue_entry *pqp;
758 unsigned long pa; 758 unsigned long pa;
759 int pn; 759 int pn;
760 char *cp; 760 char *cp;
761 761
762 pqp = (struct bau_payload_queue_entry *) kmalloc_node( 762 pqp = (struct bau_payload_queue_entry *) kmalloc_node(
763 (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), 763 (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
764 GFP_KERNEL, node); 764 GFP_KERNEL, node);
765 BUG_ON(!pqp); 765 BUG_ON(!pqp);
766 766
767 cp = (char *)pqp + 31; 767 cp = (char *)pqp + 31;
768 pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); 768 pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
769 bau_tablesp->va_queue_first = pqp; 769 bau_tablesp->va_queue_first = pqp;
770 /* 770 /*
771 * need the pnode of where the memory was really allocated 771 * need the pnode of where the memory was really allocated
772 */ 772 */
773 pa = uv_gpa(pqp); 773 pa = uv_gpa(pqp);
774 pn = pa >> uv_nshift; 774 pn = pa >> uv_nshift;
775 uv_write_global_mmr64(pnode, 775 uv_write_global_mmr64(pnode,
776 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, 776 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
777 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | 777 ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
778 uv_physnodeaddr(pqp)); 778 uv_physnodeaddr(pqp));
779 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, 779 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
780 uv_physnodeaddr(pqp)); 780 uv_physnodeaddr(pqp));
781 bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1); 781 bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
782 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, 782 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
783 (unsigned long) 783 (unsigned long)
784 uv_physnodeaddr(bau_tablesp->va_queue_last)); 784 uv_physnodeaddr(bau_tablesp->va_queue_last));
785 memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); 785 memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
786 786
787 return pqp; 787 return pqp;
788 } 788 }
789 789
790 /* 790 /*
791 * Initialization of each UV blade's structures 791 * Initialization of each UV blade's structures
792 */ 792 */
793 static int __init uv_init_blade(int blade) 793 static int __init uv_init_blade(int blade)
794 { 794 {
795 int node; 795 int node;
796 int pnode; 796 int pnode;
797 unsigned long pa; 797 unsigned long pa;
798 unsigned long apicid; 798 unsigned long apicid;
799 struct bau_desc *adp; 799 struct bau_desc *adp;
800 struct bau_payload_queue_entry *pqp; 800 struct bau_payload_queue_entry *pqp;
801 struct bau_control *bau_tablesp; 801 struct bau_control *bau_tablesp;
802 802
803 node = blade_to_first_node(blade); 803 node = blade_to_first_node(blade);
804 bau_tablesp = uv_table_bases_init(blade, node); 804 bau_tablesp = uv_table_bases_init(blade, node);
805 pnode = uv_blade_to_pnode(blade); 805 pnode = uv_blade_to_pnode(blade);
806 adp = uv_activation_descriptor_init(node, pnode); 806 adp = uv_activation_descriptor_init(node, pnode);
807 pqp = uv_payload_queue_init(node, pnode, bau_tablesp); 807 pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
808 uv_table_bases_finish(blade, bau_tablesp, adp); 808 uv_table_bases_finish(blade, bau_tablesp, adp);
809 /* 809 /*
810 * the below initialization can't be in firmware because the 810 * the below initialization can't be in firmware because the
811 * messaging IRQ will be determined by the OS 811 * messaging IRQ will be determined by the OS
812 */ 812 */
813 apicid = blade_to_first_apicid(blade); 813 apicid = blade_to_first_apicid(blade);
814 pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); 814 pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
815 if ((pa & 0xff) != UV_BAU_MESSAGE) { 815 if ((pa & 0xff) != UV_BAU_MESSAGE) {
816 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 816 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
817 ((apicid << 32) | UV_BAU_MESSAGE)); 817 ((apicid << 32) | UV_BAU_MESSAGE));
818 } 818 }
819 return 0; 819 return 0;
820 } 820 }
821 821
822 /* 822 /*
823 * Initialization of BAU-related structures 823 * Initialization of BAU-related structures
824 */ 824 */
825 static int __init uv_bau_init(void) 825 static int __init uv_bau_init(void)
826 { 826 {
827 int blade; 827 int blade;
828 int nblades; 828 int nblades;
829 int cur_cpu; 829 int cur_cpu;
830 830
831 if (!is_uv_system()) 831 if (!is_uv_system())
832 return 0; 832 return 0;
833 833
834 for_each_possible_cpu(cur_cpu) 834 for_each_possible_cpu(cur_cpu)
835 alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), 835 zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
836 GFP_KERNEL, cpu_to_node(cur_cpu)); 836 GFP_KERNEL, cpu_to_node(cur_cpu));
837 837
838 uv_bau_retry_limit = 1; 838 uv_bau_retry_limit = 1;
839 uv_nshift = uv_hub_info->n_val; 839 uv_nshift = uv_hub_info->n_val;
840 uv_mmask = (1UL << uv_hub_info->n_val) - 1; 840 uv_mmask = (1UL << uv_hub_info->n_val) - 1;
841 nblades = uv_num_possible_blades(); 841 nblades = uv_num_possible_blades();
842 842
843 uv_bau_table_bases = (struct bau_control **) 843 uv_bau_table_bases = (struct bau_control **)
844 kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); 844 kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
845 BUG_ON(!uv_bau_table_bases); 845 BUG_ON(!uv_bau_table_bases);
846 846
847 uv_partition_base_pnode = 0x7fffffff; 847 uv_partition_base_pnode = 0x7fffffff;
848 for (blade = 0; blade < nblades; blade++) 848 for (blade = 0; blade < nblades; blade++)
849 if (uv_blade_nr_possible_cpus(blade) && 849 if (uv_blade_nr_possible_cpus(blade) &&
850 (uv_blade_to_pnode(blade) < uv_partition_base_pnode)) 850 (uv_blade_to_pnode(blade) < uv_partition_base_pnode))
851 uv_partition_base_pnode = uv_blade_to_pnode(blade); 851 uv_partition_base_pnode = uv_blade_to_pnode(blade);
852 for (blade = 0; blade < nblades; blade++) 852 for (blade = 0; blade < nblades; blade++)
853 if (uv_blade_nr_possible_cpus(blade)) 853 if (uv_blade_nr_possible_cpus(blade))
854 uv_init_blade(blade); 854 uv_init_blade(blade);
855 855
856 alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); 856 alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
857 uv_enable_timeouts(); 857 uv_enable_timeouts();
858 858
859 return 0; 859 return 0;
860 } 860 }
861 __initcall(uv_bau_init); 861 __initcall(uv_bau_init);
862 __initcall(uv_ptc_init); 862 __initcall(uv_ptc_init);
863 863
drivers/acpi/processor_core.c
1 /* 1 /*
2 * acpi_processor.c - ACPI Processor Driver ($Revision: 71 $) 2 * acpi_processor.c - ACPI Processor Driver ($Revision: 71 $)
3 * 3 *
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6 * Copyright (C) 2004 Dominik Brodowski <linux@brodo.de> 6 * Copyright (C) 2004 Dominik Brodowski <linux@brodo.de>
7 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 7 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
8 * - Added processor hotplug support 8 * - Added processor hotplug support
9 * 9 *
10 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 10 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11 * 11 *
12 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by 13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at 14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version. 15 * your option) any later version.
16 * 16 *
17 * This program is distributed in the hope that it will be useful, but 17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of 18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details. 20 * General Public License for more details.
21 * 21 *
22 * You should have received a copy of the GNU General Public License along 22 * You should have received a copy of the GNU General Public License along
23 * with this program; if not, write to the Free Software Foundation, Inc., 23 * with this program; if not, write to the Free Software Foundation, Inc.,
24 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 24 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
25 * 25 *
26 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 26 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 * TBD: 27 * TBD:
28 * 1. Make # power states dynamic. 28 * 1. Make # power states dynamic.
29 * 2. Support duty_cycle values that span bit 4. 29 * 2. Support duty_cycle values that span bit 4.
30 * 3. Optimize by having scheduler determine business instead of 30 * 3. Optimize by having scheduler determine business instead of
31 * having us try to calculate it here. 31 * having us try to calculate it here.
32 * 4. Need C1 timing -- must modify kernel (IRQ handler) to get this. 32 * 4. Need C1 timing -- must modify kernel (IRQ handler) to get this.
33 */ 33 */
34 34
35 #include <linux/kernel.h> 35 #include <linux/kernel.h>
36 #include <linux/module.h> 36 #include <linux/module.h>
37 #include <linux/init.h> 37 #include <linux/init.h>
38 #include <linux/types.h> 38 #include <linux/types.h>
39 #include <linux/pci.h> 39 #include <linux/pci.h>
40 #include <linux/pm.h> 40 #include <linux/pm.h>
41 #include <linux/cpufreq.h> 41 #include <linux/cpufreq.h>
42 #include <linux/cpu.h> 42 #include <linux/cpu.h>
43 #include <linux/proc_fs.h> 43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h> 44 #include <linux/seq_file.h>
45 #include <linux/dmi.h> 45 #include <linux/dmi.h>
46 #include <linux/moduleparam.h> 46 #include <linux/moduleparam.h>
47 #include <linux/cpuidle.h> 47 #include <linux/cpuidle.h>
48 48
49 #include <asm/io.h> 49 #include <asm/io.h>
50 #include <asm/system.h> 50 #include <asm/system.h>
51 #include <asm/cpu.h> 51 #include <asm/cpu.h>
52 #include <asm/delay.h> 52 #include <asm/delay.h>
53 #include <asm/uaccess.h> 53 #include <asm/uaccess.h>
54 #include <asm/processor.h> 54 #include <asm/processor.h>
55 #include <asm/smp.h> 55 #include <asm/smp.h>
56 #include <asm/acpi.h> 56 #include <asm/acpi.h>
57 57
58 #include <acpi/acpi_bus.h> 58 #include <acpi/acpi_bus.h>
59 #include <acpi/acpi_drivers.h> 59 #include <acpi/acpi_drivers.h>
60 #include <acpi/processor.h> 60 #include <acpi/processor.h>
61 61
62 #define ACPI_PROCESSOR_CLASS "processor" 62 #define ACPI_PROCESSOR_CLASS "processor"
63 #define ACPI_PROCESSOR_DEVICE_NAME "Processor" 63 #define ACPI_PROCESSOR_DEVICE_NAME "Processor"
64 #define ACPI_PROCESSOR_FILE_INFO "info" 64 #define ACPI_PROCESSOR_FILE_INFO "info"
65 #define ACPI_PROCESSOR_FILE_THROTTLING "throttling" 65 #define ACPI_PROCESSOR_FILE_THROTTLING "throttling"
66 #define ACPI_PROCESSOR_FILE_LIMIT "limit" 66 #define ACPI_PROCESSOR_FILE_LIMIT "limit"
67 #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 67 #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
68 #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 68 #define ACPI_PROCESSOR_NOTIFY_POWER 0x81
69 #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 69 #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
70 70
71 #define ACPI_PROCESSOR_LIMIT_USER 0 71 #define ACPI_PROCESSOR_LIMIT_USER 0
72 #define ACPI_PROCESSOR_LIMIT_THERMAL 1 72 #define ACPI_PROCESSOR_LIMIT_THERMAL 1
73 73
74 #define _COMPONENT ACPI_PROCESSOR_COMPONENT 74 #define _COMPONENT ACPI_PROCESSOR_COMPONENT
75 ACPI_MODULE_NAME("processor_core"); 75 ACPI_MODULE_NAME("processor_core");
76 76
77 MODULE_AUTHOR("Paul Diefenbaugh"); 77 MODULE_AUTHOR("Paul Diefenbaugh");
78 MODULE_DESCRIPTION("ACPI Processor Driver"); 78 MODULE_DESCRIPTION("ACPI Processor Driver");
79 MODULE_LICENSE("GPL"); 79 MODULE_LICENSE("GPL");
80 80
81 static int acpi_processor_add(struct acpi_device *device); 81 static int acpi_processor_add(struct acpi_device *device);
82 static int acpi_processor_start(struct acpi_device *device); 82 static int acpi_processor_start(struct acpi_device *device);
83 static int acpi_processor_remove(struct acpi_device *device, int type); 83 static int acpi_processor_remove(struct acpi_device *device, int type);
84 static int acpi_processor_info_open_fs(struct inode *inode, struct file *file); 84 static int acpi_processor_info_open_fs(struct inode *inode, struct file *file);
85 static void acpi_processor_notify(struct acpi_device *device, u32 event); 85 static void acpi_processor_notify(struct acpi_device *device, u32 event);
86 static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu); 86 static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu);
87 static int acpi_processor_handle_eject(struct acpi_processor *pr); 87 static int acpi_processor_handle_eject(struct acpi_processor *pr);
88 88
89 89
90 static const struct acpi_device_id processor_device_ids[] = { 90 static const struct acpi_device_id processor_device_ids[] = {
91 {ACPI_PROCESSOR_OBJECT_HID, 0}, 91 {ACPI_PROCESSOR_OBJECT_HID, 0},
92 {ACPI_PROCESSOR_HID, 0}, 92 {ACPI_PROCESSOR_HID, 0},
93 {"", 0}, 93 {"", 0},
94 }; 94 };
95 MODULE_DEVICE_TABLE(acpi, processor_device_ids); 95 MODULE_DEVICE_TABLE(acpi, processor_device_ids);
96 96
97 static struct acpi_driver acpi_processor_driver = { 97 static struct acpi_driver acpi_processor_driver = {
98 .name = "processor", 98 .name = "processor",
99 .class = ACPI_PROCESSOR_CLASS, 99 .class = ACPI_PROCESSOR_CLASS,
100 .ids = processor_device_ids, 100 .ids = processor_device_ids,
101 .ops = { 101 .ops = {
102 .add = acpi_processor_add, 102 .add = acpi_processor_add,
103 .remove = acpi_processor_remove, 103 .remove = acpi_processor_remove,
104 .start = acpi_processor_start, 104 .start = acpi_processor_start,
105 .suspend = acpi_processor_suspend, 105 .suspend = acpi_processor_suspend,
106 .resume = acpi_processor_resume, 106 .resume = acpi_processor_resume,
107 .notify = acpi_processor_notify, 107 .notify = acpi_processor_notify,
108 }, 108 },
109 }; 109 };
110 110
111 #define INSTALL_NOTIFY_HANDLER 1 111 #define INSTALL_NOTIFY_HANDLER 1
112 #define UNINSTALL_NOTIFY_HANDLER 2 112 #define UNINSTALL_NOTIFY_HANDLER 2
113 113
114 static const struct file_operations acpi_processor_info_fops = { 114 static const struct file_operations acpi_processor_info_fops = {
115 .owner = THIS_MODULE, 115 .owner = THIS_MODULE,
116 .open = acpi_processor_info_open_fs, 116 .open = acpi_processor_info_open_fs,
117 .read = seq_read, 117 .read = seq_read,
118 .llseek = seq_lseek, 118 .llseek = seq_lseek,
119 .release = single_release, 119 .release = single_release,
120 }; 120 };
121 121
122 DEFINE_PER_CPU(struct acpi_processor *, processors); 122 DEFINE_PER_CPU(struct acpi_processor *, processors);
123 struct acpi_processor_errata errata __read_mostly; 123 struct acpi_processor_errata errata __read_mostly;
124 static int set_no_mwait(const struct dmi_system_id *id) 124 static int set_no_mwait(const struct dmi_system_id *id)
125 { 125 {
126 printk(KERN_NOTICE PREFIX "%s detected - " 126 printk(KERN_NOTICE PREFIX "%s detected - "
127 "disabling mwait for CPU C-states\n", id->ident); 127 "disabling mwait for CPU C-states\n", id->ident);
128 idle_nomwait = 1; 128 idle_nomwait = 1;
129 return 0; 129 return 0;
130 } 130 }
131 131
132 static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { 132 static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = {
133 { 133 {
134 set_no_mwait, "IFL91 board", { 134 set_no_mwait, "IFL91 board", {
135 DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"), 135 DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
136 DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"), 136 DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"),
137 DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"), 137 DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"),
138 DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL}, 138 DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL},
139 { 139 {
140 set_no_mwait, "Extensa 5220", { 140 set_no_mwait, "Extensa 5220", {
141 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), 141 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
142 DMI_MATCH(DMI_SYS_VENDOR, "Acer"), 142 DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
143 DMI_MATCH(DMI_PRODUCT_VERSION, "0100"), 143 DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
144 DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL}, 144 DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
145 {}, 145 {},
146 }; 146 };
147 147
148 /* -------------------------------------------------------------------------- 148 /* --------------------------------------------------------------------------
149 Errata Handling 149 Errata Handling
150 -------------------------------------------------------------------------- */ 150 -------------------------------------------------------------------------- */
151 151
152 static int acpi_processor_errata_piix4(struct pci_dev *dev) 152 static int acpi_processor_errata_piix4(struct pci_dev *dev)
153 { 153 {
154 u8 value1 = 0; 154 u8 value1 = 0;
155 u8 value2 = 0; 155 u8 value2 = 0;
156 156
157 157
158 if (!dev) 158 if (!dev)
159 return -EINVAL; 159 return -EINVAL;
160 160
161 /* 161 /*
162 * Note that 'dev' references the PIIX4 ACPI Controller. 162 * Note that 'dev' references the PIIX4 ACPI Controller.
163 */ 163 */
164 164
165 switch (dev->revision) { 165 switch (dev->revision) {
166 case 0: 166 case 0:
167 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 A-step\n")); 167 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 A-step\n"));
168 break; 168 break;
169 case 1: 169 case 1:
170 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 B-step\n")); 170 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 B-step\n"));
171 break; 171 break;
172 case 2: 172 case 2:
173 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4E\n")); 173 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4E\n"));
174 break; 174 break;
175 case 3: 175 case 3:
176 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4M\n")); 176 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4M\n"));
177 break; 177 break;
178 default: 178 default:
179 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found unknown PIIX4\n")); 179 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found unknown PIIX4\n"));
180 break; 180 break;
181 } 181 }
182 182
183 switch (dev->revision) { 183 switch (dev->revision) {
184 184
185 case 0: /* PIIX4 A-step */ 185 case 0: /* PIIX4 A-step */
186 case 1: /* PIIX4 B-step */ 186 case 1: /* PIIX4 B-step */
187 /* 187 /*
188 * See specification changes #13 ("Manual Throttle Duty Cycle") 188 * See specification changes #13 ("Manual Throttle Duty Cycle")
189 * and #14 ("Enabling and Disabling Manual Throttle"), plus 189 * and #14 ("Enabling and Disabling Manual Throttle"), plus
190 * erratum #5 ("STPCLK# Deassertion Time") from the January 190 * erratum #5 ("STPCLK# Deassertion Time") from the January
191 * 2002 PIIX4 specification update. Applies to only older 191 * 2002 PIIX4 specification update. Applies to only older
192 * PIIX4 models. 192 * PIIX4 models.
193 */ 193 */
194 errata.piix4.throttle = 1; 194 errata.piix4.throttle = 1;
195 195
196 case 2: /* PIIX4E */ 196 case 2: /* PIIX4E */
197 case 3: /* PIIX4M */ 197 case 3: /* PIIX4M */
198 /* 198 /*
199 * See erratum #18 ("C3 Power State/BMIDE and Type-F DMA 199 * See erratum #18 ("C3 Power State/BMIDE and Type-F DMA
200 * Livelock") from the January 2002 PIIX4 specification update. 200 * Livelock") from the January 2002 PIIX4 specification update.
201 * Applies to all PIIX4 models. 201 * Applies to all PIIX4 models.
202 */ 202 */
203 203
204 /* 204 /*
205 * BM-IDE 205 * BM-IDE
206 * ------ 206 * ------
207 * Find the PIIX4 IDE Controller and get the Bus Master IDE 207 * Find the PIIX4 IDE Controller and get the Bus Master IDE
208 * Status register address. We'll use this later to read 208 * Status register address. We'll use this later to read
209 * each IDE controller's DMA status to make sure we catch all 209 * each IDE controller's DMA status to make sure we catch all
210 * DMA activity. 210 * DMA activity.
211 */ 211 */
212 dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, 212 dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
213 PCI_DEVICE_ID_INTEL_82371AB, 213 PCI_DEVICE_ID_INTEL_82371AB,
214 PCI_ANY_ID, PCI_ANY_ID, NULL); 214 PCI_ANY_ID, PCI_ANY_ID, NULL);
215 if (dev) { 215 if (dev) {
216 errata.piix4.bmisx = pci_resource_start(dev, 4); 216 errata.piix4.bmisx = pci_resource_start(dev, 4);
217 pci_dev_put(dev); 217 pci_dev_put(dev);
218 } 218 }
219 219
220 /* 220 /*
221 * Type-F DMA 221 * Type-F DMA
222 * ---------- 222 * ----------
223 * Find the PIIX4 ISA Controller and read the Motherboard 223 * Find the PIIX4 ISA Controller and read the Motherboard
224 * DMA controller's status to see if Type-F (Fast) DMA mode 224 * DMA controller's status to see if Type-F (Fast) DMA mode
225 * is enabled (bit 7) on either channel. Note that we'll 225 * is enabled (bit 7) on either channel. Note that we'll
226 * disable C3 support if this is enabled, as some legacy 226 * disable C3 support if this is enabled, as some legacy
227 * devices won't operate well if fast DMA is disabled. 227 * devices won't operate well if fast DMA is disabled.
228 */ 228 */
229 dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, 229 dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
230 PCI_DEVICE_ID_INTEL_82371AB_0, 230 PCI_DEVICE_ID_INTEL_82371AB_0,
231 PCI_ANY_ID, PCI_ANY_ID, NULL); 231 PCI_ANY_ID, PCI_ANY_ID, NULL);
232 if (dev) { 232 if (dev) {
233 pci_read_config_byte(dev, 0x76, &value1); 233 pci_read_config_byte(dev, 0x76, &value1);
234 pci_read_config_byte(dev, 0x77, &value2); 234 pci_read_config_byte(dev, 0x77, &value2);
235 if ((value1 & 0x80) || (value2 & 0x80)) 235 if ((value1 & 0x80) || (value2 & 0x80))
236 errata.piix4.fdma = 1; 236 errata.piix4.fdma = 1;
237 pci_dev_put(dev); 237 pci_dev_put(dev);
238 } 238 }
239 239
240 break; 240 break;
241 } 241 }
242 242
243 if (errata.piix4.bmisx) 243 if (errata.piix4.bmisx)
244 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 244 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
245 "Bus master activity detection (BM-IDE) erratum enabled\n")); 245 "Bus master activity detection (BM-IDE) erratum enabled\n"));
246 if (errata.piix4.fdma) 246 if (errata.piix4.fdma)
247 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 247 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
248 "Type-F DMA livelock erratum (C3 disabled)\n")); 248 "Type-F DMA livelock erratum (C3 disabled)\n"));
249 249
250 return 0; 250 return 0;
251 } 251 }
252 252
253 static int acpi_processor_errata(struct acpi_processor *pr) 253 static int acpi_processor_errata(struct acpi_processor *pr)
254 { 254 {
255 int result = 0; 255 int result = 0;
256 struct pci_dev *dev = NULL; 256 struct pci_dev *dev = NULL;
257 257
258 258
259 if (!pr) 259 if (!pr)
260 return -EINVAL; 260 return -EINVAL;
261 261
262 /* 262 /*
263 * PIIX4 263 * PIIX4
264 */ 264 */
265 dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, 265 dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
266 PCI_DEVICE_ID_INTEL_82371AB_3, PCI_ANY_ID, 266 PCI_DEVICE_ID_INTEL_82371AB_3, PCI_ANY_ID,
267 PCI_ANY_ID, NULL); 267 PCI_ANY_ID, NULL);
268 if (dev) { 268 if (dev) {
269 result = acpi_processor_errata_piix4(dev); 269 result = acpi_processor_errata_piix4(dev);
270 pci_dev_put(dev); 270 pci_dev_put(dev);
271 } 271 }
272 272
273 return result; 273 return result;
274 } 274 }
275 275
276 /* -------------------------------------------------------------------------- 276 /* --------------------------------------------------------------------------
277 Common ACPI processor functions 277 Common ACPI processor functions
278 -------------------------------------------------------------------------- */ 278 -------------------------------------------------------------------------- */
279 279
280 /* 280 /*
281 * _PDC is required for a BIOS-OS handshake for most of the newer 281 * _PDC is required for a BIOS-OS handshake for most of the newer
282 * ACPI processor features. 282 * ACPI processor features.
283 */ 283 */
284 static int acpi_processor_set_pdc(struct acpi_processor *pr) 284 static int acpi_processor_set_pdc(struct acpi_processor *pr)
285 { 285 {
286 struct acpi_object_list *pdc_in = pr->pdc; 286 struct acpi_object_list *pdc_in = pr->pdc;
287 acpi_status status = AE_OK; 287 acpi_status status = AE_OK;
288 288
289 289
290 if (!pdc_in) 290 if (!pdc_in)
291 return status; 291 return status;
292 if (idle_nomwait) { 292 if (idle_nomwait) {
293 /* 293 /*
294 * If mwait is disabled for CPU C-states, the C2C3_FFH access 294 * If mwait is disabled for CPU C-states, the C2C3_FFH access
295 * mode will be disabled in the parameter of _PDC object. 295 * mode will be disabled in the parameter of _PDC object.
296 * Of course C1_FFH access mode will also be disabled. 296 * Of course C1_FFH access mode will also be disabled.
297 */ 297 */
298 union acpi_object *obj; 298 union acpi_object *obj;
299 u32 *buffer = NULL; 299 u32 *buffer = NULL;
300 300
301 obj = pdc_in->pointer; 301 obj = pdc_in->pointer;
302 buffer = (u32 *)(obj->buffer.pointer); 302 buffer = (u32 *)(obj->buffer.pointer);
303 buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH); 303 buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
304 304
305 } 305 }
306 status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL); 306 status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL);
307 307
308 if (ACPI_FAILURE(status)) 308 if (ACPI_FAILURE(status))
309 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 309 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
310 "Could not evaluate _PDC, using legacy perf. control...\n")); 310 "Could not evaluate _PDC, using legacy perf. control...\n"));
311 311
312 return status; 312 return status;
313 } 313 }
314 314
315 /* -------------------------------------------------------------------------- 315 /* --------------------------------------------------------------------------
316 FS Interface (/proc) 316 FS Interface (/proc)
317 -------------------------------------------------------------------------- */ 317 -------------------------------------------------------------------------- */
318 318
319 static struct proc_dir_entry *acpi_processor_dir = NULL; 319 static struct proc_dir_entry *acpi_processor_dir = NULL;
320 320
321 static int acpi_processor_info_seq_show(struct seq_file *seq, void *offset) 321 static int acpi_processor_info_seq_show(struct seq_file *seq, void *offset)
322 { 322 {
323 struct acpi_processor *pr = seq->private; 323 struct acpi_processor *pr = seq->private;
324 324
325 325
326 if (!pr) 326 if (!pr)
327 goto end; 327 goto end;
328 328
329 seq_printf(seq, "processor id: %d\n" 329 seq_printf(seq, "processor id: %d\n"
330 "acpi id: %d\n" 330 "acpi id: %d\n"
331 "bus mastering control: %s\n" 331 "bus mastering control: %s\n"
332 "power management: %s\n" 332 "power management: %s\n"
333 "throttling control: %s\n" 333 "throttling control: %s\n"
334 "limit interface: %s\n", 334 "limit interface: %s\n",
335 pr->id, 335 pr->id,
336 pr->acpi_id, 336 pr->acpi_id,
337 pr->flags.bm_control ? "yes" : "no", 337 pr->flags.bm_control ? "yes" : "no",
338 pr->flags.power ? "yes" : "no", 338 pr->flags.power ? "yes" : "no",
339 pr->flags.throttling ? "yes" : "no", 339 pr->flags.throttling ? "yes" : "no",
340 pr->flags.limit ? "yes" : "no"); 340 pr->flags.limit ? "yes" : "no");
341 341
342 end: 342 end:
343 return 0; 343 return 0;
344 } 344 }
345 345
346 static int acpi_processor_info_open_fs(struct inode *inode, struct file *file) 346 static int acpi_processor_info_open_fs(struct inode *inode, struct file *file)
347 { 347 {
348 return single_open(file, acpi_processor_info_seq_show, 348 return single_open(file, acpi_processor_info_seq_show,
349 PDE(inode)->data); 349 PDE(inode)->data);
350 } 350 }
351 351
352 static int acpi_processor_add_fs(struct acpi_device *device) 352 static int acpi_processor_add_fs(struct acpi_device *device)
353 { 353 {
354 struct proc_dir_entry *entry = NULL; 354 struct proc_dir_entry *entry = NULL;
355 355
356 356
357 if (!acpi_device_dir(device)) { 357 if (!acpi_device_dir(device)) {
358 acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device), 358 acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device),
359 acpi_processor_dir); 359 acpi_processor_dir);
360 if (!acpi_device_dir(device)) 360 if (!acpi_device_dir(device))
361 return -ENODEV; 361 return -ENODEV;
362 } 362 }
363 363
364 /* 'info' [R] */ 364 /* 'info' [R] */
365 entry = proc_create_data(ACPI_PROCESSOR_FILE_INFO, 365 entry = proc_create_data(ACPI_PROCESSOR_FILE_INFO,
366 S_IRUGO, acpi_device_dir(device), 366 S_IRUGO, acpi_device_dir(device),
367 &acpi_processor_info_fops, 367 &acpi_processor_info_fops,
368 acpi_driver_data(device)); 368 acpi_driver_data(device));
369 if (!entry) 369 if (!entry)
370 return -EIO; 370 return -EIO;
371 371
372 /* 'throttling' [R/W] */ 372 /* 'throttling' [R/W] */
373 entry = proc_create_data(ACPI_PROCESSOR_FILE_THROTTLING, 373 entry = proc_create_data(ACPI_PROCESSOR_FILE_THROTTLING,
374 S_IFREG | S_IRUGO | S_IWUSR, 374 S_IFREG | S_IRUGO | S_IWUSR,
375 acpi_device_dir(device), 375 acpi_device_dir(device),
376 &acpi_processor_throttling_fops, 376 &acpi_processor_throttling_fops,
377 acpi_driver_data(device)); 377 acpi_driver_data(device));
378 if (!entry) 378 if (!entry)
379 return -EIO; 379 return -EIO;
380 380
381 /* 'limit' [R/W] */ 381 /* 'limit' [R/W] */
382 entry = proc_create_data(ACPI_PROCESSOR_FILE_LIMIT, 382 entry = proc_create_data(ACPI_PROCESSOR_FILE_LIMIT,
383 S_IFREG | S_IRUGO | S_IWUSR, 383 S_IFREG | S_IRUGO | S_IWUSR,
384 acpi_device_dir(device), 384 acpi_device_dir(device),
385 &acpi_processor_limit_fops, 385 &acpi_processor_limit_fops,
386 acpi_driver_data(device)); 386 acpi_driver_data(device));
387 if (!entry) 387 if (!entry)
388 return -EIO; 388 return -EIO;
389 return 0; 389 return 0;
390 } 390 }
391 391
392 static int acpi_processor_remove_fs(struct acpi_device *device) 392 static int acpi_processor_remove_fs(struct acpi_device *device)
393 { 393 {
394 394
395 if (acpi_device_dir(device)) { 395 if (acpi_device_dir(device)) {
396 remove_proc_entry(ACPI_PROCESSOR_FILE_INFO, 396 remove_proc_entry(ACPI_PROCESSOR_FILE_INFO,
397 acpi_device_dir(device)); 397 acpi_device_dir(device));
398 remove_proc_entry(ACPI_PROCESSOR_FILE_THROTTLING, 398 remove_proc_entry(ACPI_PROCESSOR_FILE_THROTTLING,
399 acpi_device_dir(device)); 399 acpi_device_dir(device));
400 remove_proc_entry(ACPI_PROCESSOR_FILE_LIMIT, 400 remove_proc_entry(ACPI_PROCESSOR_FILE_LIMIT,
401 acpi_device_dir(device)); 401 acpi_device_dir(device));
402 remove_proc_entry(acpi_device_bid(device), acpi_processor_dir); 402 remove_proc_entry(acpi_device_bid(device), acpi_processor_dir);
403 acpi_device_dir(device) = NULL; 403 acpi_device_dir(device) = NULL;
404 } 404 }
405 405
406 return 0; 406 return 0;
407 } 407 }
408 408
409 /* Use the acpiid in MADT to map cpus in case of SMP */ 409 /* Use the acpiid in MADT to map cpus in case of SMP */
410 410
411 #ifndef CONFIG_SMP 411 #ifndef CONFIG_SMP
412 static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id) { return -1; } 412 static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id) { return -1; }
413 #else 413 #else
414 414
415 static struct acpi_table_madt *madt; 415 static struct acpi_table_madt *madt;
416 416
417 static int map_lapic_id(struct acpi_subtable_header *entry, 417 static int map_lapic_id(struct acpi_subtable_header *entry,
418 u32 acpi_id, int *apic_id) 418 u32 acpi_id, int *apic_id)
419 { 419 {
420 struct acpi_madt_local_apic *lapic = 420 struct acpi_madt_local_apic *lapic =
421 (struct acpi_madt_local_apic *)entry; 421 (struct acpi_madt_local_apic *)entry;
422 if ((lapic->lapic_flags & ACPI_MADT_ENABLED) && 422 if ((lapic->lapic_flags & ACPI_MADT_ENABLED) &&
423 lapic->processor_id == acpi_id) { 423 lapic->processor_id == acpi_id) {
424 *apic_id = lapic->id; 424 *apic_id = lapic->id;
425 return 1; 425 return 1;
426 } 426 }
427 return 0; 427 return 0;
428 } 428 }
429 429
430 static int map_x2apic_id(struct acpi_subtable_header *entry, 430 static int map_x2apic_id(struct acpi_subtable_header *entry,
431 int device_declaration, u32 acpi_id, int *apic_id) 431 int device_declaration, u32 acpi_id, int *apic_id)
432 { 432 {
433 struct acpi_madt_local_x2apic *apic = 433 struct acpi_madt_local_x2apic *apic =
434 (struct acpi_madt_local_x2apic *)entry; 434 (struct acpi_madt_local_x2apic *)entry;
435 u32 tmp = apic->local_apic_id; 435 u32 tmp = apic->local_apic_id;
436 436
437 /* Only check enabled APICs*/ 437 /* Only check enabled APICs*/
438 if (!(apic->lapic_flags & ACPI_MADT_ENABLED)) 438 if (!(apic->lapic_flags & ACPI_MADT_ENABLED))
439 return 0; 439 return 0;
440 440
441 /* Device statement declaration type */ 441 /* Device statement declaration type */
442 if (device_declaration) { 442 if (device_declaration) {
443 if (apic->uid == acpi_id) 443 if (apic->uid == acpi_id)
444 goto found; 444 goto found;
445 } 445 }
446 446
447 return 0; 447 return 0;
448 found: 448 found:
449 *apic_id = tmp; 449 *apic_id = tmp;
450 return 1; 450 return 1;
451 } 451 }
452 452
453 static int map_lsapic_id(struct acpi_subtable_header *entry, 453 static int map_lsapic_id(struct acpi_subtable_header *entry,
454 int device_declaration, u32 acpi_id, int *apic_id) 454 int device_declaration, u32 acpi_id, int *apic_id)
455 { 455 {
456 struct acpi_madt_local_sapic *lsapic = 456 struct acpi_madt_local_sapic *lsapic =
457 (struct acpi_madt_local_sapic *)entry; 457 (struct acpi_madt_local_sapic *)entry;
458 u32 tmp = (lsapic->id << 8) | lsapic->eid; 458 u32 tmp = (lsapic->id << 8) | lsapic->eid;
459 459
460 /* Only check enabled APICs*/ 460 /* Only check enabled APICs*/
461 if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED)) 461 if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))
462 return 0; 462 return 0;
463 463
464 /* Device statement declaration type */ 464 /* Device statement declaration type */
465 if (device_declaration) { 465 if (device_declaration) {
466 if (entry->length < 16) 466 if (entry->length < 16)
467 printk(KERN_ERR PREFIX 467 printk(KERN_ERR PREFIX
468 "Invalid LSAPIC with Device type processor (SAPIC ID %#x)\n", 468 "Invalid LSAPIC with Device type processor (SAPIC ID %#x)\n",
469 tmp); 469 tmp);
470 else if (lsapic->uid == acpi_id) 470 else if (lsapic->uid == acpi_id)
471 goto found; 471 goto found;
472 /* Processor statement declaration type */ 472 /* Processor statement declaration type */
473 } else if (lsapic->processor_id == acpi_id) 473 } else if (lsapic->processor_id == acpi_id)
474 goto found; 474 goto found;
475 475
476 return 0; 476 return 0;
477 found: 477 found:
478 *apic_id = tmp; 478 *apic_id = tmp;
479 return 1; 479 return 1;
480 } 480 }
481 481
482 static int map_madt_entry(int type, u32 acpi_id) 482 static int map_madt_entry(int type, u32 acpi_id)
483 { 483 {
484 unsigned long madt_end, entry; 484 unsigned long madt_end, entry;
485 int apic_id = -1; 485 int apic_id = -1;
486 486
487 if (!madt) 487 if (!madt)
488 return apic_id; 488 return apic_id;
489 489
490 entry = (unsigned long)madt; 490 entry = (unsigned long)madt;
491 madt_end = entry + madt->header.length; 491 madt_end = entry + madt->header.length;
492 492
493 /* Parse all entries looking for a match. */ 493 /* Parse all entries looking for a match. */
494 494
495 entry += sizeof(struct acpi_table_madt); 495 entry += sizeof(struct acpi_table_madt);
496 while (entry + sizeof(struct acpi_subtable_header) < madt_end) { 496 while (entry + sizeof(struct acpi_subtable_header) < madt_end) {
497 struct acpi_subtable_header *header = 497 struct acpi_subtable_header *header =
498 (struct acpi_subtable_header *)entry; 498 (struct acpi_subtable_header *)entry;
499 if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { 499 if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
500 if (map_lapic_id(header, acpi_id, &apic_id)) 500 if (map_lapic_id(header, acpi_id, &apic_id))
501 break; 501 break;
502 } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) { 502 } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
503 if (map_x2apic_id(header, type, acpi_id, &apic_id)) 503 if (map_x2apic_id(header, type, acpi_id, &apic_id))
504 break; 504 break;
505 } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { 505 } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
506 if (map_lsapic_id(header, type, acpi_id, &apic_id)) 506 if (map_lsapic_id(header, type, acpi_id, &apic_id))
507 break; 507 break;
508 } 508 }
509 entry += header->length; 509 entry += header->length;
510 } 510 }
511 return apic_id; 511 return apic_id;
512 } 512 }
513 513
514 static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) 514 static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
515 { 515 {
516 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 516 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
517 union acpi_object *obj; 517 union acpi_object *obj;
518 struct acpi_subtable_header *header; 518 struct acpi_subtable_header *header;
519 int apic_id = -1; 519 int apic_id = -1;
520 520
521 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) 521 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
522 goto exit; 522 goto exit;
523 523
524 if (!buffer.length || !buffer.pointer) 524 if (!buffer.length || !buffer.pointer)
525 goto exit; 525 goto exit;
526 526
527 obj = buffer.pointer; 527 obj = buffer.pointer;
528 if (obj->type != ACPI_TYPE_BUFFER || 528 if (obj->type != ACPI_TYPE_BUFFER ||
529 obj->buffer.length < sizeof(struct acpi_subtable_header)) { 529 obj->buffer.length < sizeof(struct acpi_subtable_header)) {
530 goto exit; 530 goto exit;
531 } 531 }
532 532
533 header = (struct acpi_subtable_header *)obj->buffer.pointer; 533 header = (struct acpi_subtable_header *)obj->buffer.pointer;
534 if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { 534 if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
535 map_lapic_id(header, acpi_id, &apic_id); 535 map_lapic_id(header, acpi_id, &apic_id);
536 } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { 536 } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
537 map_lsapic_id(header, type, acpi_id, &apic_id); 537 map_lsapic_id(header, type, acpi_id, &apic_id);
538 } 538 }
539 539
540 exit: 540 exit:
541 if (buffer.pointer) 541 if (buffer.pointer)
542 kfree(buffer.pointer); 542 kfree(buffer.pointer);
543 return apic_id; 543 return apic_id;
544 } 544 }
545 545
546 static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id) 546 static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id)
547 { 547 {
548 int i; 548 int i;
549 int apic_id = -1; 549 int apic_id = -1;
550 550
551 apic_id = map_mat_entry(handle, type, acpi_id); 551 apic_id = map_mat_entry(handle, type, acpi_id);
552 if (apic_id == -1) 552 if (apic_id == -1)
553 apic_id = map_madt_entry(type, acpi_id); 553 apic_id = map_madt_entry(type, acpi_id);
554 if (apic_id == -1) 554 if (apic_id == -1)
555 return apic_id; 555 return apic_id;
556 556
557 for_each_possible_cpu(i) { 557 for_each_possible_cpu(i) {
558 if (cpu_physical_id(i) == apic_id) 558 if (cpu_physical_id(i) == apic_id)
559 return i; 559 return i;
560 } 560 }
561 return -1; 561 return -1;
562 } 562 }
563 #endif 563 #endif
564 564
565 /* -------------------------------------------------------------------------- 565 /* --------------------------------------------------------------------------
566 Driver Interface 566 Driver Interface
567 -------------------------------------------------------------------------- */ 567 -------------------------------------------------------------------------- */
568 568
569 static int acpi_processor_get_info(struct acpi_device *device) 569 static int acpi_processor_get_info(struct acpi_device *device)
570 { 570 {
571 acpi_status status = 0; 571 acpi_status status = 0;
572 union acpi_object object = { 0 }; 572 union acpi_object object = { 0 };
573 struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; 573 struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
574 struct acpi_processor *pr; 574 struct acpi_processor *pr;
575 int cpu_index, device_declaration = 0; 575 int cpu_index, device_declaration = 0;
576 static int cpu0_initialized; 576 static int cpu0_initialized;
577 577
578 pr = acpi_driver_data(device); 578 pr = acpi_driver_data(device);
579 if (!pr) 579 if (!pr)
580 return -EINVAL; 580 return -EINVAL;
581 581
582 if (num_online_cpus() > 1) 582 if (num_online_cpus() > 1)
583 errata.smp = TRUE; 583 errata.smp = TRUE;
584 584
585 acpi_processor_errata(pr); 585 acpi_processor_errata(pr);
586 586
587 /* 587 /*
588 * Check to see if we have bus mastering arbitration control. This 588 * Check to see if we have bus mastering arbitration control. This
589 * is required for proper C3 usage (to maintain cache coherency). 589 * is required for proper C3 usage (to maintain cache coherency).
590 */ 590 */
591 if (acpi_gbl_FADT.pm2_control_block && acpi_gbl_FADT.pm2_control_length) { 591 if (acpi_gbl_FADT.pm2_control_block && acpi_gbl_FADT.pm2_control_length) {
592 pr->flags.bm_control = 1; 592 pr->flags.bm_control = 1;
593 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 593 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
594 "Bus mastering arbitration control present\n")); 594 "Bus mastering arbitration control present\n"));
595 } else 595 } else
596 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 596 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
597 "No bus mastering arbitration control\n")); 597 "No bus mastering arbitration control\n"));
598 598
599 if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_HID)) { 599 if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_HID)) {
600 /* 600 /*
601 * Declared with "Device" statement; match _UID. 601 * Declared with "Device" statement; match _UID.
602 * Note that we don't handle string _UIDs yet. 602 * Note that we don't handle string _UIDs yet.
603 */ 603 */
604 unsigned long long value; 604 unsigned long long value;
605 status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID, 605 status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID,
606 NULL, &value); 606 NULL, &value);
607 if (ACPI_FAILURE(status)) { 607 if (ACPI_FAILURE(status)) {
608 printk(KERN_ERR PREFIX 608 printk(KERN_ERR PREFIX
609 "Evaluating processor _UID [%#x]\n", status); 609 "Evaluating processor _UID [%#x]\n", status);
610 return -ENODEV; 610 return -ENODEV;
611 } 611 }
612 device_declaration = 1; 612 device_declaration = 1;
613 pr->acpi_id = value; 613 pr->acpi_id = value;
614 } else { 614 } else {
615 /* Declared with "Processor" statement; match ProcessorID */ 615 /* Declared with "Processor" statement; match ProcessorID */
616 status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer); 616 status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer);
617 if (ACPI_FAILURE(status)) { 617 if (ACPI_FAILURE(status)) {
618 printk(KERN_ERR PREFIX "Evaluating processor object\n"); 618 printk(KERN_ERR PREFIX "Evaluating processor object\n");
619 return -ENODEV; 619 return -ENODEV;
620 } 620 }
621 621
622 /* 622 /*
623 * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP. 623 * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP.
624 * >>> 'acpi_get_processor_id(acpi_id, &id)' in 624 * >>> 'acpi_get_processor_id(acpi_id, &id)' in
625 * arch/xxx/acpi.c 625 * arch/xxx/acpi.c
626 */ 626 */
627 pr->acpi_id = object.processor.proc_id; 627 pr->acpi_id = object.processor.proc_id;
628 } 628 }
629 cpu_index = get_cpu_id(pr->handle, device_declaration, pr->acpi_id); 629 cpu_index = get_cpu_id(pr->handle, device_declaration, pr->acpi_id);
630 630
631 /* Handle UP system running SMP kernel, with no LAPIC in MADT */ 631 /* Handle UP system running SMP kernel, with no LAPIC in MADT */
632 if (!cpu0_initialized && (cpu_index == -1) && 632 if (!cpu0_initialized && (cpu_index == -1) &&
633 (num_online_cpus() == 1)) { 633 (num_online_cpus() == 1)) {
634 cpu_index = 0; 634 cpu_index = 0;
635 } 635 }
636 636
637 cpu0_initialized = 1; 637 cpu0_initialized = 1;
638 638
639 pr->id = cpu_index; 639 pr->id = cpu_index;
640 640
641 /* 641 /*
642 * Extra Processor objects may be enumerated on MP systems with 642 * Extra Processor objects may be enumerated on MP systems with
643 * less than the max # of CPUs. They should be ignored _iff 643 * less than the max # of CPUs. They should be ignored _iff
644 * they are physically not present. 644 * they are physically not present.
645 */ 645 */
646 if (pr->id == -1) { 646 if (pr->id == -1) {
647 if (ACPI_FAILURE 647 if (ACPI_FAILURE
648 (acpi_processor_hotadd_init(pr->handle, &pr->id))) { 648 (acpi_processor_hotadd_init(pr->handle, &pr->id))) {
649 return -ENODEV; 649 return -ENODEV;
650 } 650 }
651 } 651 }
652 652
653 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id, 653 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id,
654 pr->acpi_id)); 654 pr->acpi_id));
655 655
656 if (!object.processor.pblk_address) 656 if (!object.processor.pblk_address)
657 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No PBLK (NULL address)\n")); 657 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No PBLK (NULL address)\n"));
658 else if (object.processor.pblk_length != 6) 658 else if (object.processor.pblk_length != 6)
659 printk(KERN_ERR PREFIX "Invalid PBLK length [%d]\n", 659 printk(KERN_ERR PREFIX "Invalid PBLK length [%d]\n",
660 object.processor.pblk_length); 660 object.processor.pblk_length);
661 else { 661 else {
662 pr->throttling.address = object.processor.pblk_address; 662 pr->throttling.address = object.processor.pblk_address;
663 pr->throttling.duty_offset = acpi_gbl_FADT.duty_offset; 663 pr->throttling.duty_offset = acpi_gbl_FADT.duty_offset;
664 pr->throttling.duty_width = acpi_gbl_FADT.duty_width; 664 pr->throttling.duty_width = acpi_gbl_FADT.duty_width;
665 665
666 pr->pblk = object.processor.pblk_address; 666 pr->pblk = object.processor.pblk_address;
667 667
668 /* 668 /*
669 * We don't care about error returns - we just try to mark 669 * We don't care about error returns - we just try to mark
670 * these reserved so that nobody else is confused into thinking 670 * these reserved so that nobody else is confused into thinking
671 * that this region might be unused.. 671 * that this region might be unused..
672 * 672 *
673 * (In particular, allocating the IO range for Cardbus) 673 * (In particular, allocating the IO range for Cardbus)
674 */ 674 */
675 request_region(pr->throttling.address, 6, "ACPI CPU throttle"); 675 request_region(pr->throttling.address, 6, "ACPI CPU throttle");
676 } 676 }
677 677
678 /* 678 /*
679 * If ACPI describes a slot number for this CPU, we can use it 679 * If ACPI describes a slot number for this CPU, we can use it
680 * ensure we get the right value in the "physical id" field 680 * ensure we get the right value in the "physical id" field
681 * of /proc/cpuinfo 681 * of /proc/cpuinfo
682 */ 682 */
683 status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer); 683 status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer);
684 if (ACPI_SUCCESS(status)) 684 if (ACPI_SUCCESS(status))
685 arch_fix_phys_package_id(pr->id, object.integer.value); 685 arch_fix_phys_package_id(pr->id, object.integer.value);
686 686
687 return 0; 687 return 0;
688 } 688 }
689 689
690 static DEFINE_PER_CPU(void *, processor_device_array); 690 static DEFINE_PER_CPU(void *, processor_device_array);
691 691
692 static int __cpuinit acpi_processor_start(struct acpi_device *device) 692 static int __cpuinit acpi_processor_start(struct acpi_device *device)
693 { 693 {
694 int result = 0; 694 int result = 0;
695 struct acpi_processor *pr; 695 struct acpi_processor *pr;
696 struct sys_device *sysdev; 696 struct sys_device *sysdev;
697 697
698 pr = acpi_driver_data(device); 698 pr = acpi_driver_data(device);
699 699
700 result = acpi_processor_get_info(device); 700 result = acpi_processor_get_info(device);
701 if (result) { 701 if (result) {
702 /* Processor is physically not present */ 702 /* Processor is physically not present */
703 return 0; 703 return 0;
704 } 704 }
705 705
706 BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); 706 BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0));
707 707
708 /* 708 /*
709 * Buggy BIOS check 709 * Buggy BIOS check
710 * ACPI id of processors can be reported wrongly by the BIOS. 710 * ACPI id of processors can be reported wrongly by the BIOS.
711 * Don't trust it blindly 711 * Don't trust it blindly
712 */ 712 */
713 if (per_cpu(processor_device_array, pr->id) != NULL && 713 if (per_cpu(processor_device_array, pr->id) != NULL &&
714 per_cpu(processor_device_array, pr->id) != device) { 714 per_cpu(processor_device_array, pr->id) != device) {
715 printk(KERN_WARNING "BIOS reported wrong ACPI id " 715 printk(KERN_WARNING "BIOS reported wrong ACPI id "
716 "for the processor\n"); 716 "for the processor\n");
717 return -ENODEV; 717 return -ENODEV;
718 } 718 }
719 per_cpu(processor_device_array, pr->id) = device; 719 per_cpu(processor_device_array, pr->id) = device;
720 720
721 per_cpu(processors, pr->id) = pr; 721 per_cpu(processors, pr->id) = pr;
722 722
723 result = acpi_processor_add_fs(device); 723 result = acpi_processor_add_fs(device);
724 if (result) 724 if (result)
725 goto end; 725 goto end;
726 726
727 sysdev = get_cpu_sysdev(pr->id); 727 sysdev = get_cpu_sysdev(pr->id);
728 if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) 728 if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev"))
729 return -EFAULT; 729 return -EFAULT;
730 730
731 /* _PDC call should be done before doing anything else (if reqd.). */ 731 /* _PDC call should be done before doing anything else (if reqd.). */
732 arch_acpi_processor_init_pdc(pr); 732 arch_acpi_processor_init_pdc(pr);
733 acpi_processor_set_pdc(pr); 733 acpi_processor_set_pdc(pr);
734 #ifdef CONFIG_CPU_FREQ 734 #ifdef CONFIG_CPU_FREQ
735 acpi_processor_ppc_has_changed(pr); 735 acpi_processor_ppc_has_changed(pr);
736 #endif 736 #endif
737 acpi_processor_get_throttling_info(pr); 737 acpi_processor_get_throttling_info(pr);
738 acpi_processor_get_limit_info(pr); 738 acpi_processor_get_limit_info(pr);
739 739
740 740
741 acpi_processor_power_init(pr, device); 741 acpi_processor_power_init(pr, device);
742 742
743 pr->cdev = thermal_cooling_device_register("Processor", device, 743 pr->cdev = thermal_cooling_device_register("Processor", device,
744 &processor_cooling_ops); 744 &processor_cooling_ops);
745 if (IS_ERR(pr->cdev)) { 745 if (IS_ERR(pr->cdev)) {
746 result = PTR_ERR(pr->cdev); 746 result = PTR_ERR(pr->cdev);
747 goto end; 747 goto end;
748 } 748 }
749 749
750 dev_info(&device->dev, "registered as cooling_device%d\n", 750 dev_info(&device->dev, "registered as cooling_device%d\n",
751 pr->cdev->id); 751 pr->cdev->id);
752 752
753 result = sysfs_create_link(&device->dev.kobj, 753 result = sysfs_create_link(&device->dev.kobj,
754 &pr->cdev->device.kobj, 754 &pr->cdev->device.kobj,
755 "thermal_cooling"); 755 "thermal_cooling");
756 if (result) 756 if (result)
757 printk(KERN_ERR PREFIX "Create sysfs link\n"); 757 printk(KERN_ERR PREFIX "Create sysfs link\n");
758 result = sysfs_create_link(&pr->cdev->device.kobj, 758 result = sysfs_create_link(&pr->cdev->device.kobj,
759 &device->dev.kobj, 759 &device->dev.kobj,
760 "device"); 760 "device");
761 if (result) 761 if (result)
762 printk(KERN_ERR PREFIX "Create sysfs link\n"); 762 printk(KERN_ERR PREFIX "Create sysfs link\n");
763 763
764 if (pr->flags.throttling) { 764 if (pr->flags.throttling) {
765 printk(KERN_INFO PREFIX "%s [%s] (supports", 765 printk(KERN_INFO PREFIX "%s [%s] (supports",
766 acpi_device_name(device), acpi_device_bid(device)); 766 acpi_device_name(device), acpi_device_bid(device));
767 printk(" %d throttling states", pr->throttling.state_count); 767 printk(" %d throttling states", pr->throttling.state_count);
768 printk(")\n"); 768 printk(")\n");
769 } 769 }
770 770
771 end: 771 end:
772 772
773 return result; 773 return result;
774 } 774 }
775 775
776 static void acpi_processor_notify(struct acpi_device *device, u32 event) 776 static void acpi_processor_notify(struct acpi_device *device, u32 event)
777 { 777 {
778 struct acpi_processor *pr = acpi_driver_data(device); 778 struct acpi_processor *pr = acpi_driver_data(device);
779 int saved; 779 int saved;
780 780
781 if (!pr) 781 if (!pr)
782 return; 782 return;
783 783
784 switch (event) { 784 switch (event) {
785 case ACPI_PROCESSOR_NOTIFY_PERFORMANCE: 785 case ACPI_PROCESSOR_NOTIFY_PERFORMANCE:
786 saved = pr->performance_platform_limit; 786 saved = pr->performance_platform_limit;
787 acpi_processor_ppc_has_changed(pr); 787 acpi_processor_ppc_has_changed(pr);
788 if (saved == pr->performance_platform_limit) 788 if (saved == pr->performance_platform_limit)
789 break; 789 break;
790 acpi_bus_generate_proc_event(device, event, 790 acpi_bus_generate_proc_event(device, event,
791 pr->performance_platform_limit); 791 pr->performance_platform_limit);
792 acpi_bus_generate_netlink_event(device->pnp.device_class, 792 acpi_bus_generate_netlink_event(device->pnp.device_class,
793 dev_name(&device->dev), event, 793 dev_name(&device->dev), event,
794 pr->performance_platform_limit); 794 pr->performance_platform_limit);
795 break; 795 break;
796 case ACPI_PROCESSOR_NOTIFY_POWER: 796 case ACPI_PROCESSOR_NOTIFY_POWER:
797 acpi_processor_cst_has_changed(pr); 797 acpi_processor_cst_has_changed(pr);
798 acpi_bus_generate_proc_event(device, event, 0); 798 acpi_bus_generate_proc_event(device, event, 0);
799 acpi_bus_generate_netlink_event(device->pnp.device_class, 799 acpi_bus_generate_netlink_event(device->pnp.device_class,
800 dev_name(&device->dev), event, 0); 800 dev_name(&device->dev), event, 0);
801 break; 801 break;
802 case ACPI_PROCESSOR_NOTIFY_THROTTLING: 802 case ACPI_PROCESSOR_NOTIFY_THROTTLING:
803 acpi_processor_tstate_has_changed(pr); 803 acpi_processor_tstate_has_changed(pr);
804 acpi_bus_generate_proc_event(device, event, 0); 804 acpi_bus_generate_proc_event(device, event, 0);
805 acpi_bus_generate_netlink_event(device->pnp.device_class, 805 acpi_bus_generate_netlink_event(device->pnp.device_class,
806 dev_name(&device->dev), event, 0); 806 dev_name(&device->dev), event, 0);
807 default: 807 default:
808 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 808 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
809 "Unsupported event [0x%x]\n", event)); 809 "Unsupported event [0x%x]\n", event));
810 break; 810 break;
811 } 811 }
812 812
813 return; 813 return;
814 } 814 }
815 815
816 static int acpi_cpu_soft_notify(struct notifier_block *nfb, 816 static int acpi_cpu_soft_notify(struct notifier_block *nfb,
817 unsigned long action, void *hcpu) 817 unsigned long action, void *hcpu)
818 { 818 {
819 unsigned int cpu = (unsigned long)hcpu; 819 unsigned int cpu = (unsigned long)hcpu;
820 struct acpi_processor *pr = per_cpu(processors, cpu); 820 struct acpi_processor *pr = per_cpu(processors, cpu);
821 821
822 if (action == CPU_ONLINE && pr) { 822 if (action == CPU_ONLINE && pr) {
823 acpi_processor_ppc_has_changed(pr); 823 acpi_processor_ppc_has_changed(pr);
824 acpi_processor_cst_has_changed(pr); 824 acpi_processor_cst_has_changed(pr);
825 acpi_processor_tstate_has_changed(pr); 825 acpi_processor_tstate_has_changed(pr);
826 } 826 }
827 return NOTIFY_OK; 827 return NOTIFY_OK;
828 } 828 }
829 829
830 static struct notifier_block acpi_cpu_notifier = 830 static struct notifier_block acpi_cpu_notifier =
831 { 831 {
832 .notifier_call = acpi_cpu_soft_notify, 832 .notifier_call = acpi_cpu_soft_notify,
833 }; 833 };
834 834
835 static int acpi_processor_add(struct acpi_device *device) 835 static int acpi_processor_add(struct acpi_device *device)
836 { 836 {
837 struct acpi_processor *pr = NULL; 837 struct acpi_processor *pr = NULL;
838 838
839 839
840 if (!device) 840 if (!device)
841 return -EINVAL; 841 return -EINVAL;
842 842
843 pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); 843 pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
844 if (!pr) 844 if (!pr)
845 return -ENOMEM; 845 return -ENOMEM;
846 846
847 if (!alloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { 847 if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
848 kfree(pr); 848 kfree(pr);
849 return -ENOMEM; 849 return -ENOMEM;
850 } 850 }
851 851
852 pr->handle = device->handle; 852 pr->handle = device->handle;
853 strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME); 853 strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME);
854 strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS); 854 strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
855 device->driver_data = pr; 855 device->driver_data = pr;
856 856
857 return 0; 857 return 0;
858 } 858 }
859 859
860 static int acpi_processor_remove(struct acpi_device *device, int type) 860 static int acpi_processor_remove(struct acpi_device *device, int type)
861 { 861 {
862 struct acpi_processor *pr = NULL; 862 struct acpi_processor *pr = NULL;
863 863
864 864
865 if (!device || !acpi_driver_data(device)) 865 if (!device || !acpi_driver_data(device))
866 return -EINVAL; 866 return -EINVAL;
867 867
868 pr = acpi_driver_data(device); 868 pr = acpi_driver_data(device);
869 869
870 if (pr->id >= nr_cpu_ids) 870 if (pr->id >= nr_cpu_ids)
871 goto free; 871 goto free;
872 872
873 if (type == ACPI_BUS_REMOVAL_EJECT) { 873 if (type == ACPI_BUS_REMOVAL_EJECT) {
874 if (acpi_processor_handle_eject(pr)) 874 if (acpi_processor_handle_eject(pr))
875 return -EINVAL; 875 return -EINVAL;
876 } 876 }
877 877
878 acpi_processor_power_exit(pr, device); 878 acpi_processor_power_exit(pr, device);
879 879
880 sysfs_remove_link(&device->dev.kobj, "sysdev"); 880 sysfs_remove_link(&device->dev.kobj, "sysdev");
881 881
882 acpi_processor_remove_fs(device); 882 acpi_processor_remove_fs(device);
883 883
884 if (pr->cdev) { 884 if (pr->cdev) {
885 sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); 885 sysfs_remove_link(&device->dev.kobj, "thermal_cooling");
886 sysfs_remove_link(&pr->cdev->device.kobj, "device"); 886 sysfs_remove_link(&pr->cdev->device.kobj, "device");
887 thermal_cooling_device_unregister(pr->cdev); 887 thermal_cooling_device_unregister(pr->cdev);
888 pr->cdev = NULL; 888 pr->cdev = NULL;
889 } 889 }
890 890
891 per_cpu(processors, pr->id) = NULL; 891 per_cpu(processors, pr->id) = NULL;
892 per_cpu(processor_device_array, pr->id) = NULL; 892 per_cpu(processor_device_array, pr->id) = NULL;
893 893
894 free: 894 free:
895 free_cpumask_var(pr->throttling.shared_cpu_map); 895 free_cpumask_var(pr->throttling.shared_cpu_map);
896 kfree(pr); 896 kfree(pr);
897 897
898 return 0; 898 return 0;
899 } 899 }
900 900
901 #ifdef CONFIG_ACPI_HOTPLUG_CPU 901 #ifdef CONFIG_ACPI_HOTPLUG_CPU
902 /**************************************************************************** 902 /****************************************************************************
903 * Acpi processor hotplug support * 903 * Acpi processor hotplug support *
904 ****************************************************************************/ 904 ****************************************************************************/
905 905
906 static int is_processor_present(acpi_handle handle) 906 static int is_processor_present(acpi_handle handle)
907 { 907 {
908 acpi_status status; 908 acpi_status status;
909 unsigned long long sta = 0; 909 unsigned long long sta = 0;
910 910
911 911
912 status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); 912 status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
913 913
914 if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT)) 914 if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT))
915 return 1; 915 return 1;
916 916
917 /* 917 /*
918 * _STA is mandatory for a processor that supports hot plug 918 * _STA is mandatory for a processor that supports hot plug
919 */ 919 */
920 if (status == AE_NOT_FOUND) 920 if (status == AE_NOT_FOUND)
921 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 921 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
922 "Processor does not support hot plug\n")); 922 "Processor does not support hot plug\n"));
923 else 923 else
924 ACPI_EXCEPTION((AE_INFO, status, 924 ACPI_EXCEPTION((AE_INFO, status,
925 "Processor Device is not present")); 925 "Processor Device is not present"));
926 return 0; 926 return 0;
927 } 927 }
928 928
929 static 929 static
930 int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device) 930 int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device)
931 { 931 {
932 acpi_handle phandle; 932 acpi_handle phandle;
933 struct acpi_device *pdev; 933 struct acpi_device *pdev;
934 struct acpi_processor *pr; 934 struct acpi_processor *pr;
935 935
936 936
937 if (acpi_get_parent(handle, &phandle)) { 937 if (acpi_get_parent(handle, &phandle)) {
938 return -ENODEV; 938 return -ENODEV;
939 } 939 }
940 940
941 if (acpi_bus_get_device(phandle, &pdev)) { 941 if (acpi_bus_get_device(phandle, &pdev)) {
942 return -ENODEV; 942 return -ENODEV;
943 } 943 }
944 944
945 if (acpi_bus_add(device, pdev, handle, ACPI_BUS_TYPE_PROCESSOR)) { 945 if (acpi_bus_add(device, pdev, handle, ACPI_BUS_TYPE_PROCESSOR)) {
946 return -ENODEV; 946 return -ENODEV;
947 } 947 }
948 948
949 acpi_bus_start(*device); 949 acpi_bus_start(*device);
950 950
951 pr = acpi_driver_data(*device); 951 pr = acpi_driver_data(*device);
952 if (!pr) 952 if (!pr)
953 return -ENODEV; 953 return -ENODEV;
954 954
955 if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) { 955 if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) {
956 kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE); 956 kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE);
957 } 957 }
958 return 0; 958 return 0;
959 } 959 }
960 960
961 static void __ref acpi_processor_hotplug_notify(acpi_handle handle, 961 static void __ref acpi_processor_hotplug_notify(acpi_handle handle,
962 u32 event, void *data) 962 u32 event, void *data)
963 { 963 {
964 struct acpi_processor *pr; 964 struct acpi_processor *pr;
965 struct acpi_device *device = NULL; 965 struct acpi_device *device = NULL;
966 int result; 966 int result;
967 967
968 968
969 switch (event) { 969 switch (event) {
970 case ACPI_NOTIFY_BUS_CHECK: 970 case ACPI_NOTIFY_BUS_CHECK:
971 case ACPI_NOTIFY_DEVICE_CHECK: 971 case ACPI_NOTIFY_DEVICE_CHECK:
972 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 972 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
973 "Processor driver received %s event\n", 973 "Processor driver received %s event\n",
974 (event == ACPI_NOTIFY_BUS_CHECK) ? 974 (event == ACPI_NOTIFY_BUS_CHECK) ?
975 "ACPI_NOTIFY_BUS_CHECK" : "ACPI_NOTIFY_DEVICE_CHECK")); 975 "ACPI_NOTIFY_BUS_CHECK" : "ACPI_NOTIFY_DEVICE_CHECK"));
976 976
977 if (!is_processor_present(handle)) 977 if (!is_processor_present(handle))
978 break; 978 break;
979 979
980 if (acpi_bus_get_device(handle, &device)) { 980 if (acpi_bus_get_device(handle, &device)) {
981 result = acpi_processor_device_add(handle, &device); 981 result = acpi_processor_device_add(handle, &device);
982 if (result) 982 if (result)
983 printk(KERN_ERR PREFIX 983 printk(KERN_ERR PREFIX
984 "Unable to add the device\n"); 984 "Unable to add the device\n");
985 break; 985 break;
986 } 986 }
987 987
988 pr = acpi_driver_data(device); 988 pr = acpi_driver_data(device);
989 if (!pr) { 989 if (!pr) {
990 printk(KERN_ERR PREFIX "Driver data is NULL\n"); 990 printk(KERN_ERR PREFIX "Driver data is NULL\n");
991 break; 991 break;
992 } 992 }
993 993
994 if (pr->id >= 0 && (pr->id < nr_cpu_ids)) { 994 if (pr->id >= 0 && (pr->id < nr_cpu_ids)) {
995 kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); 995 kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
996 break; 996 break;
997 } 997 }
998 998
999 result = acpi_processor_start(device); 999 result = acpi_processor_start(device);
1000 if ((!result) && ((pr->id >= 0) && (pr->id < nr_cpu_ids))) { 1000 if ((!result) && ((pr->id >= 0) && (pr->id < nr_cpu_ids))) {
1001 kobject_uevent(&device->dev.kobj, KOBJ_ONLINE); 1001 kobject_uevent(&device->dev.kobj, KOBJ_ONLINE);
1002 } else { 1002 } else {
1003 printk(KERN_ERR PREFIX "Device [%s] failed to start\n", 1003 printk(KERN_ERR PREFIX "Device [%s] failed to start\n",
1004 acpi_device_bid(device)); 1004 acpi_device_bid(device));
1005 } 1005 }
1006 break; 1006 break;
1007 case ACPI_NOTIFY_EJECT_REQUEST: 1007 case ACPI_NOTIFY_EJECT_REQUEST:
1008 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 1008 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1009 "received ACPI_NOTIFY_EJECT_REQUEST\n")); 1009 "received ACPI_NOTIFY_EJECT_REQUEST\n"));
1010 1010
1011 if (acpi_bus_get_device(handle, &device)) { 1011 if (acpi_bus_get_device(handle, &device)) {
1012 printk(KERN_ERR PREFIX 1012 printk(KERN_ERR PREFIX
1013 "Device don't exist, dropping EJECT\n"); 1013 "Device don't exist, dropping EJECT\n");
1014 break; 1014 break;
1015 } 1015 }
1016 pr = acpi_driver_data(device); 1016 pr = acpi_driver_data(device);
1017 if (!pr) { 1017 if (!pr) {
1018 printk(KERN_ERR PREFIX 1018 printk(KERN_ERR PREFIX
1019 "Driver data is NULL, dropping EJECT\n"); 1019 "Driver data is NULL, dropping EJECT\n");
1020 return; 1020 return;
1021 } 1021 }
1022 1022
1023 if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id))) 1023 if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id)))
1024 kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); 1024 kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
1025 break; 1025 break;
1026 default: 1026 default:
1027 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 1027 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
1028 "Unsupported event [0x%x]\n", event)); 1028 "Unsupported event [0x%x]\n", event));
1029 break; 1029 break;
1030 } 1030 }
1031 1031
1032 return; 1032 return;
1033 } 1033 }
1034 1034
1035 static acpi_status 1035 static acpi_status
1036 processor_walk_namespace_cb(acpi_handle handle, 1036 processor_walk_namespace_cb(acpi_handle handle,
1037 u32 lvl, void *context, void **rv) 1037 u32 lvl, void *context, void **rv)
1038 { 1038 {
1039 acpi_status status; 1039 acpi_status status;
1040 int *action = context; 1040 int *action = context;
1041 acpi_object_type type = 0; 1041 acpi_object_type type = 0;
1042 1042
1043 status = acpi_get_type(handle, &type); 1043 status = acpi_get_type(handle, &type);
1044 if (ACPI_FAILURE(status)) 1044 if (ACPI_FAILURE(status))
1045 return (AE_OK); 1045 return (AE_OK);
1046 1046
1047 if (type != ACPI_TYPE_PROCESSOR) 1047 if (type != ACPI_TYPE_PROCESSOR)
1048 return (AE_OK); 1048 return (AE_OK);
1049 1049
1050 switch (*action) { 1050 switch (*action) {
1051 case INSTALL_NOTIFY_HANDLER: 1051 case INSTALL_NOTIFY_HANDLER:
1052 acpi_install_notify_handler(handle, 1052 acpi_install_notify_handler(handle,
1053 ACPI_SYSTEM_NOTIFY, 1053 ACPI_SYSTEM_NOTIFY,
1054 acpi_processor_hotplug_notify, 1054 acpi_processor_hotplug_notify,
1055 NULL); 1055 NULL);
1056 break; 1056 break;
1057 case UNINSTALL_NOTIFY_HANDLER: 1057 case UNINSTALL_NOTIFY_HANDLER:
1058 acpi_remove_notify_handler(handle, 1058 acpi_remove_notify_handler(handle,
1059 ACPI_SYSTEM_NOTIFY, 1059 ACPI_SYSTEM_NOTIFY,
1060 acpi_processor_hotplug_notify); 1060 acpi_processor_hotplug_notify);
1061 break; 1061 break;
1062 default: 1062 default:
1063 break; 1063 break;
1064 } 1064 }
1065 1065
1066 return (AE_OK); 1066 return (AE_OK);
1067 } 1067 }
1068 1068
1069 static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) 1069 static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu)
1070 { 1070 {
1071 1071
1072 if (!is_processor_present(handle)) { 1072 if (!is_processor_present(handle)) {
1073 return AE_ERROR; 1073 return AE_ERROR;
1074 } 1074 }
1075 1075
1076 if (acpi_map_lsapic(handle, p_cpu)) 1076 if (acpi_map_lsapic(handle, p_cpu))
1077 return AE_ERROR; 1077 return AE_ERROR;
1078 1078
1079 if (arch_register_cpu(*p_cpu)) { 1079 if (arch_register_cpu(*p_cpu)) {
1080 acpi_unmap_lsapic(*p_cpu); 1080 acpi_unmap_lsapic(*p_cpu);
1081 return AE_ERROR; 1081 return AE_ERROR;
1082 } 1082 }
1083 1083
1084 return AE_OK; 1084 return AE_OK;
1085 } 1085 }
1086 1086
1087 static int acpi_processor_handle_eject(struct acpi_processor *pr) 1087 static int acpi_processor_handle_eject(struct acpi_processor *pr)
1088 { 1088 {
1089 if (cpu_online(pr->id)) 1089 if (cpu_online(pr->id))
1090 cpu_down(pr->id); 1090 cpu_down(pr->id);
1091 1091
1092 arch_unregister_cpu(pr->id); 1092 arch_unregister_cpu(pr->id);
1093 acpi_unmap_lsapic(pr->id); 1093 acpi_unmap_lsapic(pr->id);
1094 return (0); 1094 return (0);
1095 } 1095 }
1096 #else 1096 #else
1097 static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) 1097 static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu)
1098 { 1098 {
1099 return AE_ERROR; 1099 return AE_ERROR;
1100 } 1100 }
1101 static int acpi_processor_handle_eject(struct acpi_processor *pr) 1101 static int acpi_processor_handle_eject(struct acpi_processor *pr)
1102 { 1102 {
1103 return (-EINVAL); 1103 return (-EINVAL);
1104 } 1104 }
1105 #endif 1105 #endif
1106 1106
1107 static 1107 static
1108 void acpi_processor_install_hotplug_notify(void) 1108 void acpi_processor_install_hotplug_notify(void)
1109 { 1109 {
1110 #ifdef CONFIG_ACPI_HOTPLUG_CPU 1110 #ifdef CONFIG_ACPI_HOTPLUG_CPU
1111 int action = INSTALL_NOTIFY_HANDLER; 1111 int action = INSTALL_NOTIFY_HANDLER;
1112 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, 1112 acpi_walk_namespace(ACPI_TYPE_PROCESSOR,
1113 ACPI_ROOT_OBJECT, 1113 ACPI_ROOT_OBJECT,
1114 ACPI_UINT32_MAX, 1114 ACPI_UINT32_MAX,
1115 processor_walk_namespace_cb, &action, NULL); 1115 processor_walk_namespace_cb, &action, NULL);
1116 #endif 1116 #endif
1117 register_hotcpu_notifier(&acpi_cpu_notifier); 1117 register_hotcpu_notifier(&acpi_cpu_notifier);
1118 } 1118 }
1119 1119
1120 static 1120 static
1121 void acpi_processor_uninstall_hotplug_notify(void) 1121 void acpi_processor_uninstall_hotplug_notify(void)
1122 { 1122 {
1123 #ifdef CONFIG_ACPI_HOTPLUG_CPU 1123 #ifdef CONFIG_ACPI_HOTPLUG_CPU
1124 int action = UNINSTALL_NOTIFY_HANDLER; 1124 int action = UNINSTALL_NOTIFY_HANDLER;
1125 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, 1125 acpi_walk_namespace(ACPI_TYPE_PROCESSOR,
1126 ACPI_ROOT_OBJECT, 1126 ACPI_ROOT_OBJECT,
1127 ACPI_UINT32_MAX, 1127 ACPI_UINT32_MAX,
1128 processor_walk_namespace_cb, &action, NULL); 1128 processor_walk_namespace_cb, &action, NULL);
1129 #endif 1129 #endif
1130 unregister_hotcpu_notifier(&acpi_cpu_notifier); 1130 unregister_hotcpu_notifier(&acpi_cpu_notifier);
1131 } 1131 }
1132 1132
1133 /* 1133 /*
1134 * We keep the driver loaded even when ACPI is not running. 1134 * We keep the driver loaded even when ACPI is not running.
1135 * This is needed for the powernow-k8 driver, that works even without 1135 * This is needed for the powernow-k8 driver, that works even without
1136 * ACPI, but needs symbols from this driver 1136 * ACPI, but needs symbols from this driver
1137 */ 1137 */
1138 1138
1139 static int __init acpi_processor_init(void) 1139 static int __init acpi_processor_init(void)
1140 { 1140 {
1141 int result = 0; 1141 int result = 0;
1142 1142
1143 memset(&errata, 0, sizeof(errata)); 1143 memset(&errata, 0, sizeof(errata));
1144 1144
1145 #ifdef CONFIG_SMP 1145 #ifdef CONFIG_SMP
1146 if (ACPI_FAILURE(acpi_get_table(ACPI_SIG_MADT, 0, 1146 if (ACPI_FAILURE(acpi_get_table(ACPI_SIG_MADT, 0,
1147 (struct acpi_table_header **)&madt))) 1147 (struct acpi_table_header **)&madt)))
1148 madt = NULL; 1148 madt = NULL;
1149 #endif 1149 #endif
1150 1150
1151 acpi_processor_dir = proc_mkdir(ACPI_PROCESSOR_CLASS, acpi_root_dir); 1151 acpi_processor_dir = proc_mkdir(ACPI_PROCESSOR_CLASS, acpi_root_dir);
1152 if (!acpi_processor_dir) 1152 if (!acpi_processor_dir)
1153 return -ENOMEM; 1153 return -ENOMEM;
1154 1154
1155 /* 1155 /*
1156 * Check whether the system is DMI table. If yes, OSPM 1156 * Check whether the system is DMI table. If yes, OSPM
1157 * should not use mwait for CPU-states. 1157 * should not use mwait for CPU-states.
1158 */ 1158 */
1159 dmi_check_system(processor_idle_dmi_table); 1159 dmi_check_system(processor_idle_dmi_table);
1160 result = cpuidle_register_driver(&acpi_idle_driver); 1160 result = cpuidle_register_driver(&acpi_idle_driver);
1161 if (result < 0) 1161 if (result < 0)
1162 goto out_proc; 1162 goto out_proc;
1163 1163
1164 result = acpi_bus_register_driver(&acpi_processor_driver); 1164 result = acpi_bus_register_driver(&acpi_processor_driver);
1165 if (result < 0) 1165 if (result < 0)
1166 goto out_cpuidle; 1166 goto out_cpuidle;
1167 1167
1168 acpi_processor_install_hotplug_notify(); 1168 acpi_processor_install_hotplug_notify();
1169 1169
1170 acpi_thermal_cpufreq_init(); 1170 acpi_thermal_cpufreq_init();
1171 1171
1172 acpi_processor_ppc_init(); 1172 acpi_processor_ppc_init();
1173 1173
1174 acpi_processor_throttling_init(); 1174 acpi_processor_throttling_init();
1175 1175
1176 return 0; 1176 return 0;
1177 1177
1178 out_cpuidle: 1178 out_cpuidle:
1179 cpuidle_unregister_driver(&acpi_idle_driver); 1179 cpuidle_unregister_driver(&acpi_idle_driver);
1180 1180
1181 out_proc: 1181 out_proc:
1182 remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir); 1182 remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
1183 1183
1184 return result; 1184 return result;
1185 } 1185 }
1186 1186
1187 static void __exit acpi_processor_exit(void) 1187 static void __exit acpi_processor_exit(void)
1188 { 1188 {
1189 acpi_processor_ppc_exit(); 1189 acpi_processor_ppc_exit();
1190 1190
1191 acpi_thermal_cpufreq_exit(); 1191 acpi_thermal_cpufreq_exit();
1192 1192
1193 acpi_processor_uninstall_hotplug_notify(); 1193 acpi_processor_uninstall_hotplug_notify();
1194 1194
1195 acpi_bus_unregister_driver(&acpi_processor_driver); 1195 acpi_bus_unregister_driver(&acpi_processor_driver);
1196 1196
1197 cpuidle_unregister_driver(&acpi_idle_driver); 1197 cpuidle_unregister_driver(&acpi_idle_driver);
1198 1198
1199 remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir); 1199 remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
1200 1200
1201 return; 1201 return;
1202 } 1202 }
1203 1203
1204 module_init(acpi_processor_init); 1204 module_init(acpi_processor_init);
1205 module_exit(acpi_processor_exit); 1205 module_exit(acpi_processor_exit);
1206 1206
1207 EXPORT_SYMBOL(acpi_processor_set_thermal_limit); 1207 EXPORT_SYMBOL(acpi_processor_set_thermal_limit);
1208 1208
1209 MODULE_ALIAS("processor"); 1209 MODULE_ALIAS("processor");
1210 1210
drivers/cpufreq/cpufreq.c
1 /* 1 /*
2 * linux/drivers/cpufreq/cpufreq.c 2 * linux/drivers/cpufreq/cpufreq.c
3 * 3 *
4 * Copyright (C) 2001 Russell King 4 * Copyright (C) 2001 Russell King
5 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> 5 * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6 * 6 *
7 * Oct 2005 - Ashok Raj <ashok.raj@intel.com> 7 * Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8 * Added handling for CPU hotplug 8 * Added handling for CPU hotplug
9 * Feb 2006 - Jacob Shin <jacob.shin@amd.com> 9 * Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10 * Fix handling for CPU hotplug -- affected CPUs 10 * Fix handling for CPU hotplug -- affected CPUs
11 * 11 *
12 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation. 14 * published by the Free Software Foundation.
15 * 15 *
16 */ 16 */
17 17
18 #include <linux/kernel.h> 18 #include <linux/kernel.h>
19 #include <linux/module.h> 19 #include <linux/module.h>
20 #include <linux/init.h> 20 #include <linux/init.h>
21 #include <linux/notifier.h> 21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h> 22 #include <linux/cpufreq.h>
23 #include <linux/delay.h> 23 #include <linux/delay.h>
24 #include <linux/interrupt.h> 24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h> 25 #include <linux/spinlock.h>
26 #include <linux/device.h> 26 #include <linux/device.h>
27 #include <linux/slab.h> 27 #include <linux/slab.h>
28 #include <linux/cpu.h> 28 #include <linux/cpu.h>
29 #include <linux/completion.h> 29 #include <linux/completion.h>
30 #include <linux/mutex.h> 30 #include <linux/mutex.h>
31 31
32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \ 32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
33 "cpufreq-core", msg) 33 "cpufreq-core", msg)
34 34
35 /** 35 /**
36 * The "cpufreq driver" - the arch- or hardware-dependent low 36 * The "cpufreq driver" - the arch- or hardware-dependent low
37 * level driver of CPUFreq support, and its spinlock. This lock 37 * level driver of CPUFreq support, and its spinlock. This lock
38 * also protects the cpufreq_cpu_data array. 38 * also protects the cpufreq_cpu_data array.
39 */ 39 */
40 static struct cpufreq_driver *cpufreq_driver; 40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); 41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU 42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */ 43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor); 44 static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor);
45 #endif 45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock); 46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 47
48 /* 48 /*
49 * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure 49 * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50 * all cpufreq/hotplug/workqueue/etc related lock issues. 50 * all cpufreq/hotplug/workqueue/etc related lock issues.
51 * 51 *
52 * The rules for this semaphore: 52 * The rules for this semaphore:
53 * - Any routine that wants to read from the policy structure will 53 * - Any routine that wants to read from the policy structure will
54 * do a down_read on this semaphore. 54 * do a down_read on this semaphore.
55 * - Any routine that will write to the policy structure and/or may take away 55 * - Any routine that will write to the policy structure and/or may take away
56 * the policy altogether (eg. CPU hotplug), will hold this lock in write 56 * the policy altogether (eg. CPU hotplug), will hold this lock in write
57 * mode before doing so. 57 * mode before doing so.
58 * 58 *
59 * Additional rules: 59 * Additional rules:
60 * - All holders of the lock should check to make sure that the CPU they 60 * - All holders of the lock should check to make sure that the CPU they
61 * are concerned with are online after they get the lock. 61 * are concerned with are online after they get the lock.
62 * - Governor routines that can be called in cpufreq hotplug path should not 62 * - Governor routines that can be called in cpufreq hotplug path should not
63 * take this sem as top level hotplug notifier handler takes this. 63 * take this sem as top level hotplug notifier handler takes this.
64 */ 64 */
65 static DEFINE_PER_CPU(int, policy_cpu); 65 static DEFINE_PER_CPU(int, policy_cpu);
66 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); 66 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
67 67
68 #define lock_policy_rwsem(mode, cpu) \ 68 #define lock_policy_rwsem(mode, cpu) \
69 int lock_policy_rwsem_##mode \ 69 int lock_policy_rwsem_##mode \
70 (int cpu) \ 70 (int cpu) \
71 { \ 71 { \
72 int policy_cpu = per_cpu(policy_cpu, cpu); \ 72 int policy_cpu = per_cpu(policy_cpu, cpu); \
73 BUG_ON(policy_cpu == -1); \ 73 BUG_ON(policy_cpu == -1); \
74 down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ 74 down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \
75 if (unlikely(!cpu_online(cpu))) { \ 75 if (unlikely(!cpu_online(cpu))) { \
76 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ 76 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \
77 return -1; \ 77 return -1; \
78 } \ 78 } \
79 \ 79 \
80 return 0; \ 80 return 0; \
81 } 81 }
82 82
83 lock_policy_rwsem(read, cpu); 83 lock_policy_rwsem(read, cpu);
84 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read); 84 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
85 85
86 lock_policy_rwsem(write, cpu); 86 lock_policy_rwsem(write, cpu);
87 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write); 87 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
88 88
89 void unlock_policy_rwsem_read(int cpu) 89 void unlock_policy_rwsem_read(int cpu)
90 { 90 {
91 int policy_cpu = per_cpu(policy_cpu, cpu); 91 int policy_cpu = per_cpu(policy_cpu, cpu);
92 BUG_ON(policy_cpu == -1); 92 BUG_ON(policy_cpu == -1);
93 up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); 93 up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 } 94 }
95 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read); 95 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
96 96
97 void unlock_policy_rwsem_write(int cpu) 97 void unlock_policy_rwsem_write(int cpu)
98 { 98 {
99 int policy_cpu = per_cpu(policy_cpu, cpu); 99 int policy_cpu = per_cpu(policy_cpu, cpu);
100 BUG_ON(policy_cpu == -1); 100 BUG_ON(policy_cpu == -1);
101 up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); 101 up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
102 } 102 }
103 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write); 103 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
104 104
105 105
106 /* internal prototypes */ 106 /* internal prototypes */
107 static int __cpufreq_governor(struct cpufreq_policy *policy, 107 static int __cpufreq_governor(struct cpufreq_policy *policy,
108 unsigned int event); 108 unsigned int event);
109 static unsigned int __cpufreq_get(unsigned int cpu); 109 static unsigned int __cpufreq_get(unsigned int cpu);
110 static void handle_update(struct work_struct *work); 110 static void handle_update(struct work_struct *work);
111 111
112 /** 112 /**
113 * Two notifier lists: the "policy" list is involved in the 113 * Two notifier lists: the "policy" list is involved in the
114 * validation process for a new CPU frequency policy; the 114 * validation process for a new CPU frequency policy; the
115 * "transition" list for kernel code that needs to handle 115 * "transition" list for kernel code that needs to handle
116 * changes to devices when the CPU clock speed changes. 116 * changes to devices when the CPU clock speed changes.
117 * The mutex locks both lists. 117 * The mutex locks both lists.
118 */ 118 */
119 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list); 119 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
120 static struct srcu_notifier_head cpufreq_transition_notifier_list; 120 static struct srcu_notifier_head cpufreq_transition_notifier_list;
121 121
122 static bool init_cpufreq_transition_notifier_list_called; 122 static bool init_cpufreq_transition_notifier_list_called;
123 static int __init init_cpufreq_transition_notifier_list(void) 123 static int __init init_cpufreq_transition_notifier_list(void)
124 { 124 {
125 srcu_init_notifier_head(&cpufreq_transition_notifier_list); 125 srcu_init_notifier_head(&cpufreq_transition_notifier_list);
126 init_cpufreq_transition_notifier_list_called = true; 126 init_cpufreq_transition_notifier_list_called = true;
127 return 0; 127 return 0;
128 } 128 }
129 pure_initcall(init_cpufreq_transition_notifier_list); 129 pure_initcall(init_cpufreq_transition_notifier_list);
130 130
131 static LIST_HEAD(cpufreq_governor_list); 131 static LIST_HEAD(cpufreq_governor_list);
132 static DEFINE_MUTEX(cpufreq_governor_mutex); 132 static DEFINE_MUTEX(cpufreq_governor_mutex);
133 133
134 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) 134 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
135 { 135 {
136 struct cpufreq_policy *data; 136 struct cpufreq_policy *data;
137 unsigned long flags; 137 unsigned long flags;
138 138
139 if (cpu >= nr_cpu_ids) 139 if (cpu >= nr_cpu_ids)
140 goto err_out; 140 goto err_out;
141 141
142 /* get the cpufreq driver */ 142 /* get the cpufreq driver */
143 spin_lock_irqsave(&cpufreq_driver_lock, flags); 143 spin_lock_irqsave(&cpufreq_driver_lock, flags);
144 144
145 if (!cpufreq_driver) 145 if (!cpufreq_driver)
146 goto err_out_unlock; 146 goto err_out_unlock;
147 147
148 if (!try_module_get(cpufreq_driver->owner)) 148 if (!try_module_get(cpufreq_driver->owner))
149 goto err_out_unlock; 149 goto err_out_unlock;
150 150
151 151
152 /* get the CPU */ 152 /* get the CPU */
153 data = per_cpu(cpufreq_cpu_data, cpu); 153 data = per_cpu(cpufreq_cpu_data, cpu);
154 154
155 if (!data) 155 if (!data)
156 goto err_out_put_module; 156 goto err_out_put_module;
157 157
158 if (!kobject_get(&data->kobj)) 158 if (!kobject_get(&data->kobj))
159 goto err_out_put_module; 159 goto err_out_put_module;
160 160
161 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 161 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
162 return data; 162 return data;
163 163
164 err_out_put_module: 164 err_out_put_module:
165 module_put(cpufreq_driver->owner); 165 module_put(cpufreq_driver->owner);
166 err_out_unlock: 166 err_out_unlock:
167 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 167 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
168 err_out: 168 err_out:
169 return NULL; 169 return NULL;
170 } 170 }
171 EXPORT_SYMBOL_GPL(cpufreq_cpu_get); 171 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
172 172
173 173
174 void cpufreq_cpu_put(struct cpufreq_policy *data) 174 void cpufreq_cpu_put(struct cpufreq_policy *data)
175 { 175 {
176 kobject_put(&data->kobj); 176 kobject_put(&data->kobj);
177 module_put(cpufreq_driver->owner); 177 module_put(cpufreq_driver->owner);
178 } 178 }
179 EXPORT_SYMBOL_GPL(cpufreq_cpu_put); 179 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
180 180
181 181
182 /********************************************************************* 182 /*********************************************************************
183 * UNIFIED DEBUG HELPERS * 183 * UNIFIED DEBUG HELPERS *
184 *********************************************************************/ 184 *********************************************************************/
185 #ifdef CONFIG_CPU_FREQ_DEBUG 185 #ifdef CONFIG_CPU_FREQ_DEBUG
186 186
187 /* what part(s) of the CPUfreq subsystem are debugged? */ 187 /* what part(s) of the CPUfreq subsystem are debugged? */
188 static unsigned int debug; 188 static unsigned int debug;
189 189
190 /* is the debug output ratelimit'ed using printk_ratelimit? User can 190 /* is the debug output ratelimit'ed using printk_ratelimit? User can
191 * set or modify this value. 191 * set or modify this value.
192 */ 192 */
193 static unsigned int debug_ratelimit = 1; 193 static unsigned int debug_ratelimit = 1;
194 194
195 /* is the printk_ratelimit'ing enabled? It's enabled after a successful 195 /* is the printk_ratelimit'ing enabled? It's enabled after a successful
196 * loading of a cpufreq driver, temporarily disabled when a new policy 196 * loading of a cpufreq driver, temporarily disabled when a new policy
197 * is set, and disabled upon cpufreq driver removal 197 * is set, and disabled upon cpufreq driver removal
198 */ 198 */
199 static unsigned int disable_ratelimit = 1; 199 static unsigned int disable_ratelimit = 1;
200 static DEFINE_SPINLOCK(disable_ratelimit_lock); 200 static DEFINE_SPINLOCK(disable_ratelimit_lock);
201 201
202 static void cpufreq_debug_enable_ratelimit(void) 202 static void cpufreq_debug_enable_ratelimit(void)
203 { 203 {
204 unsigned long flags; 204 unsigned long flags;
205 205
206 spin_lock_irqsave(&disable_ratelimit_lock, flags); 206 spin_lock_irqsave(&disable_ratelimit_lock, flags);
207 if (disable_ratelimit) 207 if (disable_ratelimit)
208 disable_ratelimit--; 208 disable_ratelimit--;
209 spin_unlock_irqrestore(&disable_ratelimit_lock, flags); 209 spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
210 } 210 }
211 211
212 static void cpufreq_debug_disable_ratelimit(void) 212 static void cpufreq_debug_disable_ratelimit(void)
213 { 213 {
214 unsigned long flags; 214 unsigned long flags;
215 215
216 spin_lock_irqsave(&disable_ratelimit_lock, flags); 216 spin_lock_irqsave(&disable_ratelimit_lock, flags);
217 disable_ratelimit++; 217 disable_ratelimit++;
218 spin_unlock_irqrestore(&disable_ratelimit_lock, flags); 218 spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
219 } 219 }
220 220
221 void cpufreq_debug_printk(unsigned int type, const char *prefix, 221 void cpufreq_debug_printk(unsigned int type, const char *prefix,
222 const char *fmt, ...) 222 const char *fmt, ...)
223 { 223 {
224 char s[256]; 224 char s[256];
225 va_list args; 225 va_list args;
226 unsigned int len; 226 unsigned int len;
227 unsigned long flags; 227 unsigned long flags;
228 228
229 WARN_ON(!prefix); 229 WARN_ON(!prefix);
230 if (type & debug) { 230 if (type & debug) {
231 spin_lock_irqsave(&disable_ratelimit_lock, flags); 231 spin_lock_irqsave(&disable_ratelimit_lock, flags);
232 if (!disable_ratelimit && debug_ratelimit 232 if (!disable_ratelimit && debug_ratelimit
233 && !printk_ratelimit()) { 233 && !printk_ratelimit()) {
234 spin_unlock_irqrestore(&disable_ratelimit_lock, flags); 234 spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
235 return; 235 return;
236 } 236 }
237 spin_unlock_irqrestore(&disable_ratelimit_lock, flags); 237 spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
238 238
239 len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix); 239 len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
240 240
241 va_start(args, fmt); 241 va_start(args, fmt);
242 len += vsnprintf(&s[len], (256 - len), fmt, args); 242 len += vsnprintf(&s[len], (256 - len), fmt, args);
243 va_end(args); 243 va_end(args);
244 244
245 printk(s); 245 printk(s);
246 246
247 WARN_ON(len < 5); 247 WARN_ON(len < 5);
248 } 248 }
249 } 249 }
250 EXPORT_SYMBOL(cpufreq_debug_printk); 250 EXPORT_SYMBOL(cpufreq_debug_printk);
251 251
252 252
253 module_param(debug, uint, 0644); 253 module_param(debug, uint, 0644);
254 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core," 254 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
255 " 2 to debug drivers, and 4 to debug governors."); 255 " 2 to debug drivers, and 4 to debug governors.");
256 256
257 module_param(debug_ratelimit, uint, 0644); 257 module_param(debug_ratelimit, uint, 0644);
258 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:" 258 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
259 " set to 0 to disable ratelimiting."); 259 " set to 0 to disable ratelimiting.");
260 260
261 #else /* !CONFIG_CPU_FREQ_DEBUG */ 261 #else /* !CONFIG_CPU_FREQ_DEBUG */
262 262
263 static inline void cpufreq_debug_enable_ratelimit(void) { return; } 263 static inline void cpufreq_debug_enable_ratelimit(void) { return; }
264 static inline void cpufreq_debug_disable_ratelimit(void) { return; } 264 static inline void cpufreq_debug_disable_ratelimit(void) { return; }
265 265
266 #endif /* CONFIG_CPU_FREQ_DEBUG */ 266 #endif /* CONFIG_CPU_FREQ_DEBUG */
267 267
268 268
269 /********************************************************************* 269 /*********************************************************************
270 * EXTERNALLY AFFECTING FREQUENCY CHANGES * 270 * EXTERNALLY AFFECTING FREQUENCY CHANGES *
271 *********************************************************************/ 271 *********************************************************************/
272 272
273 /** 273 /**
274 * adjust_jiffies - adjust the system "loops_per_jiffy" 274 * adjust_jiffies - adjust the system "loops_per_jiffy"
275 * 275 *
276 * This function alters the system "loops_per_jiffy" for the clock 276 * This function alters the system "loops_per_jiffy" for the clock
277 * speed change. Note that loops_per_jiffy cannot be updated on SMP 277 * speed change. Note that loops_per_jiffy cannot be updated on SMP
278 * systems as each CPU might be scaled differently. So, use the arch 278 * systems as each CPU might be scaled differently. So, use the arch
279 * per-CPU loops_per_jiffy value wherever possible. 279 * per-CPU loops_per_jiffy value wherever possible.
280 */ 280 */
281 #ifndef CONFIG_SMP 281 #ifndef CONFIG_SMP
282 static unsigned long l_p_j_ref; 282 static unsigned long l_p_j_ref;
283 static unsigned int l_p_j_ref_freq; 283 static unsigned int l_p_j_ref_freq;
284 284
285 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) 285 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
286 { 286 {
287 if (ci->flags & CPUFREQ_CONST_LOOPS) 287 if (ci->flags & CPUFREQ_CONST_LOOPS)
288 return; 288 return;
289 289
290 if (!l_p_j_ref_freq) { 290 if (!l_p_j_ref_freq) {
291 l_p_j_ref = loops_per_jiffy; 291 l_p_j_ref = loops_per_jiffy;
292 l_p_j_ref_freq = ci->old; 292 l_p_j_ref_freq = ci->old;
293 dprintk("saving %lu as reference value for loops_per_jiffy; " 293 dprintk("saving %lu as reference value for loops_per_jiffy; "
294 "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); 294 "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
295 } 295 }
296 if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) || 296 if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) ||
297 (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) || 297 (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
298 (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { 298 (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
299 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, 299 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
300 ci->new); 300 ci->new);
301 dprintk("scaling loops_per_jiffy to %lu " 301 dprintk("scaling loops_per_jiffy to %lu "
302 "for frequency %u kHz\n", loops_per_jiffy, ci->new); 302 "for frequency %u kHz\n", loops_per_jiffy, ci->new);
303 } 303 }
304 } 304 }
305 #else 305 #else
306 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) 306 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
307 { 307 {
308 return; 308 return;
309 } 309 }
310 #endif 310 #endif
311 311
312 312
313 /** 313 /**
314 * cpufreq_notify_transition - call notifier chain and adjust_jiffies 314 * cpufreq_notify_transition - call notifier chain and adjust_jiffies
315 * on frequency transition. 315 * on frequency transition.
316 * 316 *
317 * This function calls the transition notifiers and the "adjust_jiffies" 317 * This function calls the transition notifiers and the "adjust_jiffies"
318 * function. It is called twice on all CPU frequency changes that have 318 * function. It is called twice on all CPU frequency changes that have
319 * external effects. 319 * external effects.
320 */ 320 */
321 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) 321 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
322 { 322 {
323 struct cpufreq_policy *policy; 323 struct cpufreq_policy *policy;
324 324
325 BUG_ON(irqs_disabled()); 325 BUG_ON(irqs_disabled());
326 326
327 freqs->flags = cpufreq_driver->flags; 327 freqs->flags = cpufreq_driver->flags;
328 dprintk("notification %u of frequency transition to %u kHz\n", 328 dprintk("notification %u of frequency transition to %u kHz\n",
329 state, freqs->new); 329 state, freqs->new);
330 330
331 policy = per_cpu(cpufreq_cpu_data, freqs->cpu); 331 policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
332 switch (state) { 332 switch (state) {
333 333
334 case CPUFREQ_PRECHANGE: 334 case CPUFREQ_PRECHANGE:
335 /* detect if the driver reported a value as "old frequency" 335 /* detect if the driver reported a value as "old frequency"
336 * which is not equal to what the cpufreq core thinks is 336 * which is not equal to what the cpufreq core thinks is
337 * "old frequency". 337 * "old frequency".
338 */ 338 */
339 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { 339 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
340 if ((policy) && (policy->cpu == freqs->cpu) && 340 if ((policy) && (policy->cpu == freqs->cpu) &&
341 (policy->cur) && (policy->cur != freqs->old)) { 341 (policy->cur) && (policy->cur != freqs->old)) {
342 dprintk("Warning: CPU frequency is" 342 dprintk("Warning: CPU frequency is"
343 " %u, cpufreq assumed %u kHz.\n", 343 " %u, cpufreq assumed %u kHz.\n",
344 freqs->old, policy->cur); 344 freqs->old, policy->cur);
345 freqs->old = policy->cur; 345 freqs->old = policy->cur;
346 } 346 }
347 } 347 }
348 srcu_notifier_call_chain(&cpufreq_transition_notifier_list, 348 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
349 CPUFREQ_PRECHANGE, freqs); 349 CPUFREQ_PRECHANGE, freqs);
350 adjust_jiffies(CPUFREQ_PRECHANGE, freqs); 350 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
351 break; 351 break;
352 352
353 case CPUFREQ_POSTCHANGE: 353 case CPUFREQ_POSTCHANGE:
354 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); 354 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
355 srcu_notifier_call_chain(&cpufreq_transition_notifier_list, 355 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
356 CPUFREQ_POSTCHANGE, freqs); 356 CPUFREQ_POSTCHANGE, freqs);
357 if (likely(policy) && likely(policy->cpu == freqs->cpu)) 357 if (likely(policy) && likely(policy->cpu == freqs->cpu))
358 policy->cur = freqs->new; 358 policy->cur = freqs->new;
359 break; 359 break;
360 } 360 }
361 } 361 }
362 EXPORT_SYMBOL_GPL(cpufreq_notify_transition); 362 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
363 363
364 364
365 365
366 /********************************************************************* 366 /*********************************************************************
367 * SYSFS INTERFACE * 367 * SYSFS INTERFACE *
368 *********************************************************************/ 368 *********************************************************************/
369 369
370 static struct cpufreq_governor *__find_governor(const char *str_governor) 370 static struct cpufreq_governor *__find_governor(const char *str_governor)
371 { 371 {
372 struct cpufreq_governor *t; 372 struct cpufreq_governor *t;
373 373
374 list_for_each_entry(t, &cpufreq_governor_list, governor_list) 374 list_for_each_entry(t, &cpufreq_governor_list, governor_list)
375 if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN)) 375 if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
376 return t; 376 return t;
377 377
378 return NULL; 378 return NULL;
379 } 379 }
380 380
381 /** 381 /**
382 * cpufreq_parse_governor - parse a governor string 382 * cpufreq_parse_governor - parse a governor string
383 */ 383 */
384 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, 384 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
385 struct cpufreq_governor **governor) 385 struct cpufreq_governor **governor)
386 { 386 {
387 int err = -EINVAL; 387 int err = -EINVAL;
388 388
389 if (!cpufreq_driver) 389 if (!cpufreq_driver)
390 goto out; 390 goto out;
391 391
392 if (cpufreq_driver->setpolicy) { 392 if (cpufreq_driver->setpolicy) {
393 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { 393 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
394 *policy = CPUFREQ_POLICY_PERFORMANCE; 394 *policy = CPUFREQ_POLICY_PERFORMANCE;
395 err = 0; 395 err = 0;
396 } else if (!strnicmp(str_governor, "powersave", 396 } else if (!strnicmp(str_governor, "powersave",
397 CPUFREQ_NAME_LEN)) { 397 CPUFREQ_NAME_LEN)) {
398 *policy = CPUFREQ_POLICY_POWERSAVE; 398 *policy = CPUFREQ_POLICY_POWERSAVE;
399 err = 0; 399 err = 0;
400 } 400 }
401 } else if (cpufreq_driver->target) { 401 } else if (cpufreq_driver->target) {
402 struct cpufreq_governor *t; 402 struct cpufreq_governor *t;
403 403
404 mutex_lock(&cpufreq_governor_mutex); 404 mutex_lock(&cpufreq_governor_mutex);
405 405
406 t = __find_governor(str_governor); 406 t = __find_governor(str_governor);
407 407
408 if (t == NULL) { 408 if (t == NULL) {
409 char *name = kasprintf(GFP_KERNEL, "cpufreq_%s", 409 char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
410 str_governor); 410 str_governor);
411 411
412 if (name) { 412 if (name) {
413 int ret; 413 int ret;
414 414
415 mutex_unlock(&cpufreq_governor_mutex); 415 mutex_unlock(&cpufreq_governor_mutex);
416 ret = request_module("%s", name); 416 ret = request_module("%s", name);
417 mutex_lock(&cpufreq_governor_mutex); 417 mutex_lock(&cpufreq_governor_mutex);
418 418
419 if (ret == 0) 419 if (ret == 0)
420 t = __find_governor(str_governor); 420 t = __find_governor(str_governor);
421 } 421 }
422 422
423 kfree(name); 423 kfree(name);
424 } 424 }
425 425
426 if (t != NULL) { 426 if (t != NULL) {
427 *governor = t; 427 *governor = t;
428 err = 0; 428 err = 0;
429 } 429 }
430 430
431 mutex_unlock(&cpufreq_governor_mutex); 431 mutex_unlock(&cpufreq_governor_mutex);
432 } 432 }
433 out: 433 out:
434 return err; 434 return err;
435 } 435 }
436 436
437 437
438 /** 438 /**
439 * cpufreq_per_cpu_attr_read() / show_##file_name() - 439 * cpufreq_per_cpu_attr_read() / show_##file_name() -
440 * print out cpufreq information 440 * print out cpufreq information
441 * 441 *
442 * Write out information from cpufreq_driver->policy[cpu]; object must be 442 * Write out information from cpufreq_driver->policy[cpu]; object must be
443 * "unsigned int". 443 * "unsigned int".
444 */ 444 */
445 445
446 #define show_one(file_name, object) \ 446 #define show_one(file_name, object) \
447 static ssize_t show_##file_name \ 447 static ssize_t show_##file_name \
448 (struct cpufreq_policy *policy, char *buf) \ 448 (struct cpufreq_policy *policy, char *buf) \
449 { \ 449 { \
450 return sprintf(buf, "%u\n", policy->object); \ 450 return sprintf(buf, "%u\n", policy->object); \
451 } 451 }
452 452
453 show_one(cpuinfo_min_freq, cpuinfo.min_freq); 453 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
454 show_one(cpuinfo_max_freq, cpuinfo.max_freq); 454 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
455 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); 455 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
456 show_one(scaling_min_freq, min); 456 show_one(scaling_min_freq, min);
457 show_one(scaling_max_freq, max); 457 show_one(scaling_max_freq, max);
458 show_one(scaling_cur_freq, cur); 458 show_one(scaling_cur_freq, cur);
459 459
460 static int __cpufreq_set_policy(struct cpufreq_policy *data, 460 static int __cpufreq_set_policy(struct cpufreq_policy *data,
461 struct cpufreq_policy *policy); 461 struct cpufreq_policy *policy);
462 462
463 /** 463 /**
464 * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access 464 * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
465 */ 465 */
466 #define store_one(file_name, object) \ 466 #define store_one(file_name, object) \
467 static ssize_t store_##file_name \ 467 static ssize_t store_##file_name \
468 (struct cpufreq_policy *policy, const char *buf, size_t count) \ 468 (struct cpufreq_policy *policy, const char *buf, size_t count) \
469 { \ 469 { \
470 unsigned int ret = -EINVAL; \ 470 unsigned int ret = -EINVAL; \
471 struct cpufreq_policy new_policy; \ 471 struct cpufreq_policy new_policy; \
472 \ 472 \
473 ret = cpufreq_get_policy(&new_policy, policy->cpu); \ 473 ret = cpufreq_get_policy(&new_policy, policy->cpu); \
474 if (ret) \ 474 if (ret) \
475 return -EINVAL; \ 475 return -EINVAL; \
476 \ 476 \
477 ret = sscanf(buf, "%u", &new_policy.object); \ 477 ret = sscanf(buf, "%u", &new_policy.object); \
478 if (ret != 1) \ 478 if (ret != 1) \
479 return -EINVAL; \ 479 return -EINVAL; \
480 \ 480 \
481 ret = __cpufreq_set_policy(policy, &new_policy); \ 481 ret = __cpufreq_set_policy(policy, &new_policy); \
482 policy->user_policy.object = policy->object; \ 482 policy->user_policy.object = policy->object; \
483 \ 483 \
484 return ret ? ret : count; \ 484 return ret ? ret : count; \
485 } 485 }
486 486
487 store_one(scaling_min_freq, min); 487 store_one(scaling_min_freq, min);
488 store_one(scaling_max_freq, max); 488 store_one(scaling_max_freq, max);
489 489
490 /** 490 /**
491 * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware 491 * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
492 */ 492 */
493 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, 493 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
494 char *buf) 494 char *buf)
495 { 495 {
496 unsigned int cur_freq = __cpufreq_get(policy->cpu); 496 unsigned int cur_freq = __cpufreq_get(policy->cpu);
497 if (!cur_freq) 497 if (!cur_freq)
498 return sprintf(buf, "<unknown>"); 498 return sprintf(buf, "<unknown>");
499 return sprintf(buf, "%u\n", cur_freq); 499 return sprintf(buf, "%u\n", cur_freq);
500 } 500 }
501 501
502 502
503 /** 503 /**
504 * show_scaling_governor - show the current policy for the specified CPU 504 * show_scaling_governor - show the current policy for the specified CPU
505 */ 505 */
506 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) 506 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
507 { 507 {
508 if (policy->policy == CPUFREQ_POLICY_POWERSAVE) 508 if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
509 return sprintf(buf, "powersave\n"); 509 return sprintf(buf, "powersave\n");
510 else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) 510 else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
511 return sprintf(buf, "performance\n"); 511 return sprintf(buf, "performance\n");
512 else if (policy->governor) 512 else if (policy->governor)
513 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", 513 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
514 policy->governor->name); 514 policy->governor->name);
515 return -EINVAL; 515 return -EINVAL;
516 } 516 }
517 517
518 518
519 /** 519 /**
520 * store_scaling_governor - store policy for the specified CPU 520 * store_scaling_governor - store policy for the specified CPU
521 */ 521 */
522 static ssize_t store_scaling_governor(struct cpufreq_policy *policy, 522 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
523 const char *buf, size_t count) 523 const char *buf, size_t count)
524 { 524 {
525 unsigned int ret = -EINVAL; 525 unsigned int ret = -EINVAL;
526 char str_governor[16]; 526 char str_governor[16];
527 struct cpufreq_policy new_policy; 527 struct cpufreq_policy new_policy;
528 528
529 ret = cpufreq_get_policy(&new_policy, policy->cpu); 529 ret = cpufreq_get_policy(&new_policy, policy->cpu);
530 if (ret) 530 if (ret)
531 return ret; 531 return ret;
532 532
533 ret = sscanf(buf, "%15s", str_governor); 533 ret = sscanf(buf, "%15s", str_governor);
534 if (ret != 1) 534 if (ret != 1)
535 return -EINVAL; 535 return -EINVAL;
536 536
537 if (cpufreq_parse_governor(str_governor, &new_policy.policy, 537 if (cpufreq_parse_governor(str_governor, &new_policy.policy,
538 &new_policy.governor)) 538 &new_policy.governor))
539 return -EINVAL; 539 return -EINVAL;
540 540
541 /* Do not use cpufreq_set_policy here or the user_policy.max 541 /* Do not use cpufreq_set_policy here or the user_policy.max
542 will be wrongly overridden */ 542 will be wrongly overridden */
543 ret = __cpufreq_set_policy(policy, &new_policy); 543 ret = __cpufreq_set_policy(policy, &new_policy);
544 544
545 policy->user_policy.policy = policy->policy; 545 policy->user_policy.policy = policy->policy;
546 policy->user_policy.governor = policy->governor; 546 policy->user_policy.governor = policy->governor;
547 547
548 if (ret) 548 if (ret)
549 return ret; 549 return ret;
550 else 550 else
551 return count; 551 return count;
552 } 552 }
553 553
554 /** 554 /**
555 * show_scaling_driver - show the cpufreq driver currently loaded 555 * show_scaling_driver - show the cpufreq driver currently loaded
556 */ 556 */
557 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) 557 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
558 { 558 {
559 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name); 559 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
560 } 560 }
561 561
562 /** 562 /**
563 * show_scaling_available_governors - show the available CPUfreq governors 563 * show_scaling_available_governors - show the available CPUfreq governors
564 */ 564 */
565 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, 565 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
566 char *buf) 566 char *buf)
567 { 567 {
568 ssize_t i = 0; 568 ssize_t i = 0;
569 struct cpufreq_governor *t; 569 struct cpufreq_governor *t;
570 570
571 if (!cpufreq_driver->target) { 571 if (!cpufreq_driver->target) {
572 i += sprintf(buf, "performance powersave"); 572 i += sprintf(buf, "performance powersave");
573 goto out; 573 goto out;
574 } 574 }
575 575
576 list_for_each_entry(t, &cpufreq_governor_list, governor_list) { 576 list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
577 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) 577 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
578 - (CPUFREQ_NAME_LEN + 2))) 578 - (CPUFREQ_NAME_LEN + 2)))
579 goto out; 579 goto out;
580 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name); 580 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
581 } 581 }
582 out: 582 out:
583 i += sprintf(&buf[i], "\n"); 583 i += sprintf(&buf[i], "\n");
584 return i; 584 return i;
585 } 585 }
586 586
587 static ssize_t show_cpus(const struct cpumask *mask, char *buf) 587 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
588 { 588 {
589 ssize_t i = 0; 589 ssize_t i = 0;
590 unsigned int cpu; 590 unsigned int cpu;
591 591
592 for_each_cpu(cpu, mask) { 592 for_each_cpu(cpu, mask) {
593 if (i) 593 if (i)
594 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); 594 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
595 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); 595 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
596 if (i >= (PAGE_SIZE - 5)) 596 if (i >= (PAGE_SIZE - 5))
597 break; 597 break;
598 } 598 }
599 i += sprintf(&buf[i], "\n"); 599 i += sprintf(&buf[i], "\n");
600 return i; 600 return i;
601 } 601 }
602 602
603 /** 603 /**
604 * show_related_cpus - show the CPUs affected by each transition even if 604 * show_related_cpus - show the CPUs affected by each transition even if
605 * hw coordination is in use 605 * hw coordination is in use
606 */ 606 */
607 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) 607 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
608 { 608 {
609 if (cpumask_empty(policy->related_cpus)) 609 if (cpumask_empty(policy->related_cpus))
610 return show_cpus(policy->cpus, buf); 610 return show_cpus(policy->cpus, buf);
611 return show_cpus(policy->related_cpus, buf); 611 return show_cpus(policy->related_cpus, buf);
612 } 612 }
613 613
614 /** 614 /**
615 * show_affected_cpus - show the CPUs affected by each transition 615 * show_affected_cpus - show the CPUs affected by each transition
616 */ 616 */
617 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) 617 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
618 { 618 {
619 return show_cpus(policy->cpus, buf); 619 return show_cpus(policy->cpus, buf);
620 } 620 }
621 621
622 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, 622 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
623 const char *buf, size_t count) 623 const char *buf, size_t count)
624 { 624 {
625 unsigned int freq = 0; 625 unsigned int freq = 0;
626 unsigned int ret; 626 unsigned int ret;
627 627
628 if (!policy->governor || !policy->governor->store_setspeed) 628 if (!policy->governor || !policy->governor->store_setspeed)
629 return -EINVAL; 629 return -EINVAL;
630 630
631 ret = sscanf(buf, "%u", &freq); 631 ret = sscanf(buf, "%u", &freq);
632 if (ret != 1) 632 if (ret != 1)
633 return -EINVAL; 633 return -EINVAL;
634 634
635 policy->governor->store_setspeed(policy, freq); 635 policy->governor->store_setspeed(policy, freq);
636 636
637 return count; 637 return count;
638 } 638 }
639 639
640 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) 640 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
641 { 641 {
642 if (!policy->governor || !policy->governor->show_setspeed) 642 if (!policy->governor || !policy->governor->show_setspeed)
643 return sprintf(buf, "<unsupported>\n"); 643 return sprintf(buf, "<unsupported>\n");
644 644
645 return policy->governor->show_setspeed(policy, buf); 645 return policy->governor->show_setspeed(policy, buf);
646 } 646 }
647 647
648 #define define_one_ro(_name) \ 648 #define define_one_ro(_name) \
649 static struct freq_attr _name = \ 649 static struct freq_attr _name = \
650 __ATTR(_name, 0444, show_##_name, NULL) 650 __ATTR(_name, 0444, show_##_name, NULL)
651 651
652 #define define_one_ro0400(_name) \ 652 #define define_one_ro0400(_name) \
653 static struct freq_attr _name = \ 653 static struct freq_attr _name = \
654 __ATTR(_name, 0400, show_##_name, NULL) 654 __ATTR(_name, 0400, show_##_name, NULL)
655 655
656 #define define_one_rw(_name) \ 656 #define define_one_rw(_name) \
657 static struct freq_attr _name = \ 657 static struct freq_attr _name = \
658 __ATTR(_name, 0644, show_##_name, store_##_name) 658 __ATTR(_name, 0644, show_##_name, store_##_name)
659 659
660 define_one_ro0400(cpuinfo_cur_freq); 660 define_one_ro0400(cpuinfo_cur_freq);
661 define_one_ro(cpuinfo_min_freq); 661 define_one_ro(cpuinfo_min_freq);
662 define_one_ro(cpuinfo_max_freq); 662 define_one_ro(cpuinfo_max_freq);
663 define_one_ro(cpuinfo_transition_latency); 663 define_one_ro(cpuinfo_transition_latency);
664 define_one_ro(scaling_available_governors); 664 define_one_ro(scaling_available_governors);
665 define_one_ro(scaling_driver); 665 define_one_ro(scaling_driver);
666 define_one_ro(scaling_cur_freq); 666 define_one_ro(scaling_cur_freq);
667 define_one_ro(related_cpus); 667 define_one_ro(related_cpus);
668 define_one_ro(affected_cpus); 668 define_one_ro(affected_cpus);
669 define_one_rw(scaling_min_freq); 669 define_one_rw(scaling_min_freq);
670 define_one_rw(scaling_max_freq); 670 define_one_rw(scaling_max_freq);
671 define_one_rw(scaling_governor); 671 define_one_rw(scaling_governor);
672 define_one_rw(scaling_setspeed); 672 define_one_rw(scaling_setspeed);
673 673
674 static struct attribute *default_attrs[] = { 674 static struct attribute *default_attrs[] = {
675 &cpuinfo_min_freq.attr, 675 &cpuinfo_min_freq.attr,
676 &cpuinfo_max_freq.attr, 676 &cpuinfo_max_freq.attr,
677 &cpuinfo_transition_latency.attr, 677 &cpuinfo_transition_latency.attr,
678 &scaling_min_freq.attr, 678 &scaling_min_freq.attr,
679 &scaling_max_freq.attr, 679 &scaling_max_freq.attr,
680 &affected_cpus.attr, 680 &affected_cpus.attr,
681 &related_cpus.attr, 681 &related_cpus.attr,
682 &scaling_governor.attr, 682 &scaling_governor.attr,
683 &scaling_driver.attr, 683 &scaling_driver.attr,
684 &scaling_available_governors.attr, 684 &scaling_available_governors.attr,
685 &scaling_setspeed.attr, 685 &scaling_setspeed.attr,
686 NULL 686 NULL
687 }; 687 };
688 688
689 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) 689 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
690 #define to_attr(a) container_of(a, struct freq_attr, attr) 690 #define to_attr(a) container_of(a, struct freq_attr, attr)
691 691
692 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 692 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
693 { 693 {
694 struct cpufreq_policy *policy = to_policy(kobj); 694 struct cpufreq_policy *policy = to_policy(kobj);
695 struct freq_attr *fattr = to_attr(attr); 695 struct freq_attr *fattr = to_attr(attr);
696 ssize_t ret = -EINVAL; 696 ssize_t ret = -EINVAL;
697 policy = cpufreq_cpu_get(policy->cpu); 697 policy = cpufreq_cpu_get(policy->cpu);
698 if (!policy) 698 if (!policy)
699 goto no_policy; 699 goto no_policy;
700 700
701 if (lock_policy_rwsem_read(policy->cpu) < 0) 701 if (lock_policy_rwsem_read(policy->cpu) < 0)
702 goto fail; 702 goto fail;
703 703
704 if (fattr->show) 704 if (fattr->show)
705 ret = fattr->show(policy, buf); 705 ret = fattr->show(policy, buf);
706 else 706 else
707 ret = -EIO; 707 ret = -EIO;
708 708
709 unlock_policy_rwsem_read(policy->cpu); 709 unlock_policy_rwsem_read(policy->cpu);
710 fail: 710 fail:
711 cpufreq_cpu_put(policy); 711 cpufreq_cpu_put(policy);
712 no_policy: 712 no_policy:
713 return ret; 713 return ret;
714 } 714 }
715 715
716 static ssize_t store(struct kobject *kobj, struct attribute *attr, 716 static ssize_t store(struct kobject *kobj, struct attribute *attr,
717 const char *buf, size_t count) 717 const char *buf, size_t count)
718 { 718 {
719 struct cpufreq_policy *policy = to_policy(kobj); 719 struct cpufreq_policy *policy = to_policy(kobj);
720 struct freq_attr *fattr = to_attr(attr); 720 struct freq_attr *fattr = to_attr(attr);
721 ssize_t ret = -EINVAL; 721 ssize_t ret = -EINVAL;
722 policy = cpufreq_cpu_get(policy->cpu); 722 policy = cpufreq_cpu_get(policy->cpu);
723 if (!policy) 723 if (!policy)
724 goto no_policy; 724 goto no_policy;
725 725
726 if (lock_policy_rwsem_write(policy->cpu) < 0) 726 if (lock_policy_rwsem_write(policy->cpu) < 0)
727 goto fail; 727 goto fail;
728 728
729 if (fattr->store) 729 if (fattr->store)
730 ret = fattr->store(policy, buf, count); 730 ret = fattr->store(policy, buf, count);
731 else 731 else
732 ret = -EIO; 732 ret = -EIO;
733 733
734 unlock_policy_rwsem_write(policy->cpu); 734 unlock_policy_rwsem_write(policy->cpu);
735 fail: 735 fail:
736 cpufreq_cpu_put(policy); 736 cpufreq_cpu_put(policy);
737 no_policy: 737 no_policy:
738 return ret; 738 return ret;
739 } 739 }
740 740
741 static void cpufreq_sysfs_release(struct kobject *kobj) 741 static void cpufreq_sysfs_release(struct kobject *kobj)
742 { 742 {
743 struct cpufreq_policy *policy = to_policy(kobj); 743 struct cpufreq_policy *policy = to_policy(kobj);
744 dprintk("last reference is dropped\n"); 744 dprintk("last reference is dropped\n");
745 complete(&policy->kobj_unregister); 745 complete(&policy->kobj_unregister);
746 } 746 }
747 747
748 static struct sysfs_ops sysfs_ops = { 748 static struct sysfs_ops sysfs_ops = {
749 .show = show, 749 .show = show,
750 .store = store, 750 .store = store,
751 }; 751 };
752 752
753 static struct kobj_type ktype_cpufreq = { 753 static struct kobj_type ktype_cpufreq = {
754 .sysfs_ops = &sysfs_ops, 754 .sysfs_ops = &sysfs_ops,
755 .default_attrs = default_attrs, 755 .default_attrs = default_attrs,
756 .release = cpufreq_sysfs_release, 756 .release = cpufreq_sysfs_release,
757 }; 757 };
758 758
759 759
760 /** 760 /**
761 * cpufreq_add_dev - add a CPU device 761 * cpufreq_add_dev - add a CPU device
762 * 762 *
763 * Adds the cpufreq interface for a CPU device. 763 * Adds the cpufreq interface for a CPU device.
764 */ 764 */
765 static int cpufreq_add_dev(struct sys_device *sys_dev) 765 static int cpufreq_add_dev(struct sys_device *sys_dev)
766 { 766 {
767 unsigned int cpu = sys_dev->id; 767 unsigned int cpu = sys_dev->id;
768 int ret = 0; 768 int ret = 0;
769 struct cpufreq_policy new_policy; 769 struct cpufreq_policy new_policy;
770 struct cpufreq_policy *policy; 770 struct cpufreq_policy *policy;
771 struct freq_attr **drv_attr; 771 struct freq_attr **drv_attr;
772 struct sys_device *cpu_sys_dev; 772 struct sys_device *cpu_sys_dev;
773 unsigned long flags; 773 unsigned long flags;
774 unsigned int j; 774 unsigned int j;
775 #ifdef CONFIG_SMP 775 #ifdef CONFIG_SMP
776 struct cpufreq_policy *managed_policy; 776 struct cpufreq_policy *managed_policy;
777 #endif 777 #endif
778 778
779 if (cpu_is_offline(cpu)) 779 if (cpu_is_offline(cpu))
780 return 0; 780 return 0;
781 781
782 cpufreq_debug_disable_ratelimit(); 782 cpufreq_debug_disable_ratelimit();
783 dprintk("adding CPU %u\n", cpu); 783 dprintk("adding CPU %u\n", cpu);
784 784
785 #ifdef CONFIG_SMP 785 #ifdef CONFIG_SMP
786 /* check whether a different CPU already registered this 786 /* check whether a different CPU already registered this
787 * CPU because it is in the same boat. */ 787 * CPU because it is in the same boat. */
788 policy = cpufreq_cpu_get(cpu); 788 policy = cpufreq_cpu_get(cpu);
789 if (unlikely(policy)) { 789 if (unlikely(policy)) {
790 cpufreq_cpu_put(policy); 790 cpufreq_cpu_put(policy);
791 cpufreq_debug_enable_ratelimit(); 791 cpufreq_debug_enable_ratelimit();
792 return 0; 792 return 0;
793 } 793 }
794 #endif 794 #endif
795 795
796 if (!try_module_get(cpufreq_driver->owner)) { 796 if (!try_module_get(cpufreq_driver->owner)) {
797 ret = -EINVAL; 797 ret = -EINVAL;
798 goto module_out; 798 goto module_out;
799 } 799 }
800 800
801 policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); 801 policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
802 if (!policy) { 802 if (!policy) {
803 ret = -ENOMEM; 803 ret = -ENOMEM;
804 goto nomem_out; 804 goto nomem_out;
805 } 805 }
806 if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) { 806 if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) {
807 kfree(policy); 807 kfree(policy);
808 ret = -ENOMEM; 808 ret = -ENOMEM;
809 goto nomem_out; 809 goto nomem_out;
810 } 810 }
811 if (!alloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { 811 if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
812 free_cpumask_var(policy->cpus); 812 free_cpumask_var(policy->cpus);
813 kfree(policy); 813 kfree(policy);
814 ret = -ENOMEM; 814 ret = -ENOMEM;
815 goto nomem_out; 815 goto nomem_out;
816 } 816 }
817 817
818 policy->cpu = cpu; 818 policy->cpu = cpu;
819 cpumask_copy(policy->cpus, cpumask_of(cpu)); 819 cpumask_copy(policy->cpus, cpumask_of(cpu));
820 820
821 /* Initially set CPU itself as the policy_cpu */ 821 /* Initially set CPU itself as the policy_cpu */
822 per_cpu(policy_cpu, cpu) = cpu; 822 per_cpu(policy_cpu, cpu) = cpu;
823 lock_policy_rwsem_write(cpu); 823 lock_policy_rwsem_write(cpu);
824 824
825 init_completion(&policy->kobj_unregister); 825 init_completion(&policy->kobj_unregister);
826 INIT_WORK(&policy->update, handle_update); 826 INIT_WORK(&policy->update, handle_update);
827 827
828 /* Set governor before ->init, so that driver could check it */ 828 /* Set governor before ->init, so that driver could check it */
829 policy->governor = CPUFREQ_DEFAULT_GOVERNOR; 829 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
830 /* call driver. From then on the cpufreq must be able 830 /* call driver. From then on the cpufreq must be able
831 * to accept all calls to ->verify and ->setpolicy for this CPU 831 * to accept all calls to ->verify and ->setpolicy for this CPU
832 */ 832 */
833 ret = cpufreq_driver->init(policy); 833 ret = cpufreq_driver->init(policy);
834 if (ret) { 834 if (ret) {
835 dprintk("initialization failed\n"); 835 dprintk("initialization failed\n");
836 goto err_out; 836 goto err_out;
837 } 837 }
838 policy->user_policy.min = policy->min; 838 policy->user_policy.min = policy->min;
839 policy->user_policy.max = policy->max; 839 policy->user_policy.max = policy->max;
840 840
841 blocking_notifier_call_chain(&cpufreq_policy_notifier_list, 841 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
842 CPUFREQ_START, policy); 842 CPUFREQ_START, policy);
843 843
844 #ifdef CONFIG_SMP 844 #ifdef CONFIG_SMP
845 845
846 #ifdef CONFIG_HOTPLUG_CPU 846 #ifdef CONFIG_HOTPLUG_CPU
847 if (per_cpu(cpufreq_cpu_governor, cpu)) { 847 if (per_cpu(cpufreq_cpu_governor, cpu)) {
848 policy->governor = per_cpu(cpufreq_cpu_governor, cpu); 848 policy->governor = per_cpu(cpufreq_cpu_governor, cpu);
849 dprintk("Restoring governor %s for cpu %d\n", 849 dprintk("Restoring governor %s for cpu %d\n",
850 policy->governor->name, cpu); 850 policy->governor->name, cpu);
851 } 851 }
852 #endif 852 #endif
853 853
854 for_each_cpu(j, policy->cpus) { 854 for_each_cpu(j, policy->cpus) {
855 if (cpu == j) 855 if (cpu == j)
856 continue; 856 continue;
857 857
858 /* Check for existing affected CPUs. 858 /* Check for existing affected CPUs.
859 * They may not be aware of it due to CPU Hotplug. 859 * They may not be aware of it due to CPU Hotplug.
860 */ 860 */
861 managed_policy = cpufreq_cpu_get(j); /* FIXME: Where is this released? What about error paths? */ 861 managed_policy = cpufreq_cpu_get(j); /* FIXME: Where is this released? What about error paths? */
862 if (unlikely(managed_policy)) { 862 if (unlikely(managed_policy)) {
863 863
864 /* Set proper policy_cpu */ 864 /* Set proper policy_cpu */
865 unlock_policy_rwsem_write(cpu); 865 unlock_policy_rwsem_write(cpu);
866 per_cpu(policy_cpu, cpu) = managed_policy->cpu; 866 per_cpu(policy_cpu, cpu) = managed_policy->cpu;
867 867
868 if (lock_policy_rwsem_write(cpu) < 0) 868 if (lock_policy_rwsem_write(cpu) < 0)
869 goto err_out_driver_exit; 869 goto err_out_driver_exit;
870 870
871 spin_lock_irqsave(&cpufreq_driver_lock, flags); 871 spin_lock_irqsave(&cpufreq_driver_lock, flags);
872 cpumask_copy(managed_policy->cpus, policy->cpus); 872 cpumask_copy(managed_policy->cpus, policy->cpus);
873 per_cpu(cpufreq_cpu_data, cpu) = managed_policy; 873 per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
874 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 874 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
875 875
876 dprintk("CPU already managed, adding link\n"); 876 dprintk("CPU already managed, adding link\n");
877 ret = sysfs_create_link(&sys_dev->kobj, 877 ret = sysfs_create_link(&sys_dev->kobj,
878 &managed_policy->kobj, 878 &managed_policy->kobj,
879 "cpufreq"); 879 "cpufreq");
880 if (ret) 880 if (ret)
881 goto err_out_driver_exit; 881 goto err_out_driver_exit;
882 882
883 cpufreq_debug_enable_ratelimit(); 883 cpufreq_debug_enable_ratelimit();
884 ret = 0; 884 ret = 0;
885 goto err_out_driver_exit; /* call driver->exit() */ 885 goto err_out_driver_exit; /* call driver->exit() */
886 } 886 }
887 } 887 }
888 #endif 888 #endif
889 memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); 889 memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
890 890
891 /* prepare interface data */ 891 /* prepare interface data */
892 ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, 892 ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
893 "cpufreq"); 893 "cpufreq");
894 if (ret) 894 if (ret)
895 goto err_out_driver_exit; 895 goto err_out_driver_exit;
896 896
897 /* set up files for this cpu device */ 897 /* set up files for this cpu device */
898 drv_attr = cpufreq_driver->attr; 898 drv_attr = cpufreq_driver->attr;
899 while ((drv_attr) && (*drv_attr)) { 899 while ((drv_attr) && (*drv_attr)) {
900 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); 900 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
901 if (ret) 901 if (ret)
902 goto err_out_driver_exit; 902 goto err_out_driver_exit;
903 drv_attr++; 903 drv_attr++;
904 } 904 }
905 if (cpufreq_driver->get) { 905 if (cpufreq_driver->get) {
906 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); 906 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
907 if (ret) 907 if (ret)
908 goto err_out_driver_exit; 908 goto err_out_driver_exit;
909 } 909 }
910 if (cpufreq_driver->target) { 910 if (cpufreq_driver->target) {
911 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); 911 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
912 if (ret) 912 if (ret)
913 goto err_out_driver_exit; 913 goto err_out_driver_exit;
914 } 914 }
915 915
916 spin_lock_irqsave(&cpufreq_driver_lock, flags); 916 spin_lock_irqsave(&cpufreq_driver_lock, flags);
917 for_each_cpu(j, policy->cpus) { 917 for_each_cpu(j, policy->cpus) {
918 per_cpu(cpufreq_cpu_data, j) = policy; 918 per_cpu(cpufreq_cpu_data, j) = policy;
919 per_cpu(policy_cpu, j) = policy->cpu; 919 per_cpu(policy_cpu, j) = policy->cpu;
920 } 920 }
921 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 921 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
922 922
923 /* symlink affected CPUs */ 923 /* symlink affected CPUs */
924 for_each_cpu(j, policy->cpus) { 924 for_each_cpu(j, policy->cpus) {
925 if (j == cpu) 925 if (j == cpu)
926 continue; 926 continue;
927 if (!cpu_online(j)) 927 if (!cpu_online(j))
928 continue; 928 continue;
929 929
930 dprintk("CPU %u already managed, adding link\n", j); 930 dprintk("CPU %u already managed, adding link\n", j);
931 cpufreq_cpu_get(cpu); 931 cpufreq_cpu_get(cpu);
932 cpu_sys_dev = get_cpu_sysdev(j); 932 cpu_sys_dev = get_cpu_sysdev(j);
933 ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, 933 ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
934 "cpufreq"); 934 "cpufreq");
935 if (ret) 935 if (ret)
936 goto err_out_unregister; 936 goto err_out_unregister;
937 } 937 }
938 938
939 policy->governor = NULL; /* to assure that the starting sequence is 939 policy->governor = NULL; /* to assure that the starting sequence is
940 * run in cpufreq_set_policy */ 940 * run in cpufreq_set_policy */
941 941
942 /* set default policy */ 942 /* set default policy */
943 ret = __cpufreq_set_policy(policy, &new_policy); 943 ret = __cpufreq_set_policy(policy, &new_policy);
944 policy->user_policy.policy = policy->policy; 944 policy->user_policy.policy = policy->policy;
945 policy->user_policy.governor = policy->governor; 945 policy->user_policy.governor = policy->governor;
946 946
947 if (ret) { 947 if (ret) {
948 dprintk("setting policy failed\n"); 948 dprintk("setting policy failed\n");
949 goto err_out_unregister; 949 goto err_out_unregister;
950 } 950 }
951 951
952 unlock_policy_rwsem_write(cpu); 952 unlock_policy_rwsem_write(cpu);
953 953
954 kobject_uevent(&policy->kobj, KOBJ_ADD); 954 kobject_uevent(&policy->kobj, KOBJ_ADD);
955 module_put(cpufreq_driver->owner); 955 module_put(cpufreq_driver->owner);
956 dprintk("initialization complete\n"); 956 dprintk("initialization complete\n");
957 cpufreq_debug_enable_ratelimit(); 957 cpufreq_debug_enable_ratelimit();
958 958
959 return 0; 959 return 0;
960 960
961 961
962 err_out_unregister: 962 err_out_unregister:
963 spin_lock_irqsave(&cpufreq_driver_lock, flags); 963 spin_lock_irqsave(&cpufreq_driver_lock, flags);
964 for_each_cpu(j, policy->cpus) 964 for_each_cpu(j, policy->cpus)
965 per_cpu(cpufreq_cpu_data, j) = NULL; 965 per_cpu(cpufreq_cpu_data, j) = NULL;
966 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 966 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
967 967
968 kobject_put(&policy->kobj); 968 kobject_put(&policy->kobj);
969 wait_for_completion(&policy->kobj_unregister); 969 wait_for_completion(&policy->kobj_unregister);
970 970
971 err_out_driver_exit: 971 err_out_driver_exit:
972 if (cpufreq_driver->exit) 972 if (cpufreq_driver->exit)
973 cpufreq_driver->exit(policy); 973 cpufreq_driver->exit(policy);
974 974
975 err_out: 975 err_out:
976 unlock_policy_rwsem_write(cpu); 976 unlock_policy_rwsem_write(cpu);
977 kfree(policy); 977 kfree(policy);
978 978
979 nomem_out: 979 nomem_out:
980 module_put(cpufreq_driver->owner); 980 module_put(cpufreq_driver->owner);
981 module_out: 981 module_out:
982 cpufreq_debug_enable_ratelimit(); 982 cpufreq_debug_enable_ratelimit();
983 return ret; 983 return ret;
984 } 984 }
985 985
986 986
987 /** 987 /**
988 * __cpufreq_remove_dev - remove a CPU device 988 * __cpufreq_remove_dev - remove a CPU device
989 * 989 *
990 * Removes the cpufreq interface for a CPU device. 990 * Removes the cpufreq interface for a CPU device.
991 * Caller should already have policy_rwsem in write mode for this CPU. 991 * Caller should already have policy_rwsem in write mode for this CPU.
992 * This routine frees the rwsem before returning. 992 * This routine frees the rwsem before returning.
993 */ 993 */
994 static int __cpufreq_remove_dev(struct sys_device *sys_dev) 994 static int __cpufreq_remove_dev(struct sys_device *sys_dev)
995 { 995 {
996 unsigned int cpu = sys_dev->id; 996 unsigned int cpu = sys_dev->id;
997 unsigned long flags; 997 unsigned long flags;
998 struct cpufreq_policy *data; 998 struct cpufreq_policy *data;
999 #ifdef CONFIG_SMP 999 #ifdef CONFIG_SMP
1000 struct sys_device *cpu_sys_dev; 1000 struct sys_device *cpu_sys_dev;
1001 unsigned int j; 1001 unsigned int j;
1002 #endif 1002 #endif
1003 1003
1004 cpufreq_debug_disable_ratelimit(); 1004 cpufreq_debug_disable_ratelimit();
1005 dprintk("unregistering CPU %u\n", cpu); 1005 dprintk("unregistering CPU %u\n", cpu);
1006 1006
1007 spin_lock_irqsave(&cpufreq_driver_lock, flags); 1007 spin_lock_irqsave(&cpufreq_driver_lock, flags);
1008 data = per_cpu(cpufreq_cpu_data, cpu); 1008 data = per_cpu(cpufreq_cpu_data, cpu);
1009 1009
1010 if (!data) { 1010 if (!data) {
1011 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1011 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1012 cpufreq_debug_enable_ratelimit(); 1012 cpufreq_debug_enable_ratelimit();
1013 unlock_policy_rwsem_write(cpu); 1013 unlock_policy_rwsem_write(cpu);
1014 return -EINVAL; 1014 return -EINVAL;
1015 } 1015 }
1016 per_cpu(cpufreq_cpu_data, cpu) = NULL; 1016 per_cpu(cpufreq_cpu_data, cpu) = NULL;
1017 1017
1018 1018
1019 #ifdef CONFIG_SMP 1019 #ifdef CONFIG_SMP
1020 /* if this isn't the CPU which is the parent of the kobj, we 1020 /* if this isn't the CPU which is the parent of the kobj, we
1021 * only need to unlink, put and exit 1021 * only need to unlink, put and exit
1022 */ 1022 */
1023 if (unlikely(cpu != data->cpu)) { 1023 if (unlikely(cpu != data->cpu)) {
1024 dprintk("removing link\n"); 1024 dprintk("removing link\n");
1025 cpumask_clear_cpu(cpu, data->cpus); 1025 cpumask_clear_cpu(cpu, data->cpus);
1026 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1026 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1027 sysfs_remove_link(&sys_dev->kobj, "cpufreq"); 1027 sysfs_remove_link(&sys_dev->kobj, "cpufreq");
1028 cpufreq_cpu_put(data); 1028 cpufreq_cpu_put(data);
1029 cpufreq_debug_enable_ratelimit(); 1029 cpufreq_debug_enable_ratelimit();
1030 unlock_policy_rwsem_write(cpu); 1030 unlock_policy_rwsem_write(cpu);
1031 return 0; 1031 return 0;
1032 } 1032 }
1033 #endif 1033 #endif
1034 1034
1035 #ifdef CONFIG_SMP 1035 #ifdef CONFIG_SMP
1036 1036
1037 #ifdef CONFIG_HOTPLUG_CPU 1037 #ifdef CONFIG_HOTPLUG_CPU
1038 per_cpu(cpufreq_cpu_governor, cpu) = data->governor; 1038 per_cpu(cpufreq_cpu_governor, cpu) = data->governor;
1039 #endif 1039 #endif
1040 1040
1041 /* if we have other CPUs still registered, we need to unlink them, 1041 /* if we have other CPUs still registered, we need to unlink them,
1042 * or else wait_for_completion below will lock up. Clean the 1042 * or else wait_for_completion below will lock up. Clean the
1043 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove 1043 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1044 * the sysfs links afterwards. 1044 * the sysfs links afterwards.
1045 */ 1045 */
1046 if (unlikely(cpumask_weight(data->cpus) > 1)) { 1046 if (unlikely(cpumask_weight(data->cpus) > 1)) {
1047 for_each_cpu(j, data->cpus) { 1047 for_each_cpu(j, data->cpus) {
1048 if (j == cpu) 1048 if (j == cpu)
1049 continue; 1049 continue;
1050 per_cpu(cpufreq_cpu_data, j) = NULL; 1050 per_cpu(cpufreq_cpu_data, j) = NULL;
1051 } 1051 }
1052 } 1052 }
1053 1053
1054 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1054 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1055 1055
1056 if (unlikely(cpumask_weight(data->cpus) > 1)) { 1056 if (unlikely(cpumask_weight(data->cpus) > 1)) {
1057 for_each_cpu(j, data->cpus) { 1057 for_each_cpu(j, data->cpus) {
1058 if (j == cpu) 1058 if (j == cpu)
1059 continue; 1059 continue;
1060 dprintk("removing link for cpu %u\n", j); 1060 dprintk("removing link for cpu %u\n", j);
1061 #ifdef CONFIG_HOTPLUG_CPU 1061 #ifdef CONFIG_HOTPLUG_CPU
1062 per_cpu(cpufreq_cpu_governor, j) = data->governor; 1062 per_cpu(cpufreq_cpu_governor, j) = data->governor;
1063 #endif 1063 #endif
1064 cpu_sys_dev = get_cpu_sysdev(j); 1064 cpu_sys_dev = get_cpu_sysdev(j);
1065 sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq"); 1065 sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
1066 cpufreq_cpu_put(data); 1066 cpufreq_cpu_put(data);
1067 } 1067 }
1068 } 1068 }
1069 #else 1069 #else
1070 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1070 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1071 #endif 1071 #endif
1072 1072
1073 unlock_policy_rwsem_write(cpu); 1073 unlock_policy_rwsem_write(cpu);
1074 1074
1075 if (cpufreq_driver->target) 1075 if (cpufreq_driver->target)
1076 __cpufreq_governor(data, CPUFREQ_GOV_STOP); 1076 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1077 1077
1078 kobject_put(&data->kobj); 1078 kobject_put(&data->kobj);
1079 1079
1080 /* we need to make sure that the underlying kobj is actually 1080 /* we need to make sure that the underlying kobj is actually
1081 * not referenced anymore by anybody before we proceed with 1081 * not referenced anymore by anybody before we proceed with
1082 * unloading. 1082 * unloading.
1083 */ 1083 */
1084 dprintk("waiting for dropping of refcount\n"); 1084 dprintk("waiting for dropping of refcount\n");
1085 wait_for_completion(&data->kobj_unregister); 1085 wait_for_completion(&data->kobj_unregister);
1086 dprintk("wait complete\n"); 1086 dprintk("wait complete\n");
1087 1087
1088 if (cpufreq_driver->exit) 1088 if (cpufreq_driver->exit)
1089 cpufreq_driver->exit(data); 1089 cpufreq_driver->exit(data);
1090 1090
1091 free_cpumask_var(data->related_cpus); 1091 free_cpumask_var(data->related_cpus);
1092 free_cpumask_var(data->cpus); 1092 free_cpumask_var(data->cpus);
1093 kfree(data); 1093 kfree(data);
1094 per_cpu(cpufreq_cpu_data, cpu) = NULL; 1094 per_cpu(cpufreq_cpu_data, cpu) = NULL;
1095 1095
1096 cpufreq_debug_enable_ratelimit(); 1096 cpufreq_debug_enable_ratelimit();
1097 return 0; 1097 return 0;
1098 } 1098 }
1099 1099
1100 1100
1101 static int cpufreq_remove_dev(struct sys_device *sys_dev) 1101 static int cpufreq_remove_dev(struct sys_device *sys_dev)
1102 { 1102 {
1103 unsigned int cpu = sys_dev->id; 1103 unsigned int cpu = sys_dev->id;
1104 int retval; 1104 int retval;
1105 1105
1106 if (cpu_is_offline(cpu)) 1106 if (cpu_is_offline(cpu))
1107 return 0; 1107 return 0;
1108 1108
1109 if (unlikely(lock_policy_rwsem_write(cpu))) 1109 if (unlikely(lock_policy_rwsem_write(cpu)))
1110 BUG(); 1110 BUG();
1111 1111
1112 retval = __cpufreq_remove_dev(sys_dev); 1112 retval = __cpufreq_remove_dev(sys_dev);
1113 return retval; 1113 return retval;
1114 } 1114 }
1115 1115
1116 1116
1117 static void handle_update(struct work_struct *work) 1117 static void handle_update(struct work_struct *work)
1118 { 1118 {
1119 struct cpufreq_policy *policy = 1119 struct cpufreq_policy *policy =
1120 container_of(work, struct cpufreq_policy, update); 1120 container_of(work, struct cpufreq_policy, update);
1121 unsigned int cpu = policy->cpu; 1121 unsigned int cpu = policy->cpu;
1122 dprintk("handle_update for cpu %u called\n", cpu); 1122 dprintk("handle_update for cpu %u called\n", cpu);
1123 cpufreq_update_policy(cpu); 1123 cpufreq_update_policy(cpu);
1124 } 1124 }
1125 1125
1126 /** 1126 /**
1127 * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble. 1127 * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1128 * @cpu: cpu number 1128 * @cpu: cpu number
1129 * @old_freq: CPU frequency the kernel thinks the CPU runs at 1129 * @old_freq: CPU frequency the kernel thinks the CPU runs at
1130 * @new_freq: CPU frequency the CPU actually runs at 1130 * @new_freq: CPU frequency the CPU actually runs at
1131 * 1131 *
1132 * We adjust to current frequency first, and need to clean up later. 1132 * We adjust to current frequency first, and need to clean up later.
1133 * So either call to cpufreq_update_policy() or schedule handle_update()). 1133 * So either call to cpufreq_update_policy() or schedule handle_update()).
1134 */ 1134 */
1135 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, 1135 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1136 unsigned int new_freq) 1136 unsigned int new_freq)
1137 { 1137 {
1138 struct cpufreq_freqs freqs; 1138 struct cpufreq_freqs freqs;
1139 1139
1140 dprintk("Warning: CPU frequency out of sync: cpufreq and timing " 1140 dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
1141 "core thinks of %u, is %u kHz.\n", old_freq, new_freq); 1141 "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1142 1142
1143 freqs.cpu = cpu; 1143 freqs.cpu = cpu;
1144 freqs.old = old_freq; 1144 freqs.old = old_freq;
1145 freqs.new = new_freq; 1145 freqs.new = new_freq;
1146 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1146 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1147 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1147 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1148 } 1148 }
1149 1149
1150 1150
1151 /** 1151 /**
1152 * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur 1152 * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1153 * @cpu: CPU number 1153 * @cpu: CPU number
1154 * 1154 *
1155 * This is the last known freq, without actually getting it from the driver. 1155 * This is the last known freq, without actually getting it from the driver.
1156 * Return value will be same as what is shown in scaling_cur_freq in sysfs. 1156 * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1157 */ 1157 */
1158 unsigned int cpufreq_quick_get(unsigned int cpu) 1158 unsigned int cpufreq_quick_get(unsigned int cpu)
1159 { 1159 {
1160 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 1160 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1161 unsigned int ret_freq = 0; 1161 unsigned int ret_freq = 0;
1162 1162
1163 if (policy) { 1163 if (policy) {
1164 ret_freq = policy->cur; 1164 ret_freq = policy->cur;
1165 cpufreq_cpu_put(policy); 1165 cpufreq_cpu_put(policy);
1166 } 1166 }
1167 1167
1168 return ret_freq; 1168 return ret_freq;
1169 } 1169 }
1170 EXPORT_SYMBOL(cpufreq_quick_get); 1170 EXPORT_SYMBOL(cpufreq_quick_get);
1171 1171
1172 1172
1173 static unsigned int __cpufreq_get(unsigned int cpu) 1173 static unsigned int __cpufreq_get(unsigned int cpu)
1174 { 1174 {
1175 struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); 1175 struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1176 unsigned int ret_freq = 0; 1176 unsigned int ret_freq = 0;
1177 1177
1178 if (!cpufreq_driver->get) 1178 if (!cpufreq_driver->get)
1179 return ret_freq; 1179 return ret_freq;
1180 1180
1181 ret_freq = cpufreq_driver->get(cpu); 1181 ret_freq = cpufreq_driver->get(cpu);
1182 1182
1183 if (ret_freq && policy->cur && 1183 if (ret_freq && policy->cur &&
1184 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { 1184 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1185 /* verify no discrepancy between actual and 1185 /* verify no discrepancy between actual and
1186 saved value exists */ 1186 saved value exists */
1187 if (unlikely(ret_freq != policy->cur)) { 1187 if (unlikely(ret_freq != policy->cur)) {
1188 cpufreq_out_of_sync(cpu, policy->cur, ret_freq); 1188 cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1189 schedule_work(&policy->update); 1189 schedule_work(&policy->update);
1190 } 1190 }
1191 } 1191 }
1192 1192
1193 return ret_freq; 1193 return ret_freq;
1194 } 1194 }
1195 1195
1196 /** 1196 /**
1197 * cpufreq_get - get the current CPU frequency (in kHz) 1197 * cpufreq_get - get the current CPU frequency (in kHz)
1198 * @cpu: CPU number 1198 * @cpu: CPU number
1199 * 1199 *
1200 * Get the CPU current (static) CPU frequency 1200 * Get the CPU current (static) CPU frequency
1201 */ 1201 */
1202 unsigned int cpufreq_get(unsigned int cpu) 1202 unsigned int cpufreq_get(unsigned int cpu)
1203 { 1203 {
1204 unsigned int ret_freq = 0; 1204 unsigned int ret_freq = 0;
1205 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 1205 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1206 1206
1207 if (!policy) 1207 if (!policy)
1208 goto out; 1208 goto out;
1209 1209
1210 if (unlikely(lock_policy_rwsem_read(cpu))) 1210 if (unlikely(lock_policy_rwsem_read(cpu)))
1211 goto out_policy; 1211 goto out_policy;
1212 1212
1213 ret_freq = __cpufreq_get(cpu); 1213 ret_freq = __cpufreq_get(cpu);
1214 1214
1215 unlock_policy_rwsem_read(cpu); 1215 unlock_policy_rwsem_read(cpu);
1216 1216
1217 out_policy: 1217 out_policy:
1218 cpufreq_cpu_put(policy); 1218 cpufreq_cpu_put(policy);
1219 out: 1219 out:
1220 return ret_freq; 1220 return ret_freq;
1221 } 1221 }
1222 EXPORT_SYMBOL(cpufreq_get); 1222 EXPORT_SYMBOL(cpufreq_get);
1223 1223
1224 1224
1225 /** 1225 /**
1226 * cpufreq_suspend - let the low level driver prepare for suspend 1226 * cpufreq_suspend - let the low level driver prepare for suspend
1227 */ 1227 */
1228 1228
1229 static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) 1229 static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
1230 { 1230 {
1231 int cpu = sysdev->id; 1231 int cpu = sysdev->id;
1232 int ret = 0; 1232 int ret = 0;
1233 unsigned int cur_freq = 0; 1233 unsigned int cur_freq = 0;
1234 struct cpufreq_policy *cpu_policy; 1234 struct cpufreq_policy *cpu_policy;
1235 1235
1236 dprintk("suspending cpu %u\n", cpu); 1236 dprintk("suspending cpu %u\n", cpu);
1237 1237
1238 if (!cpu_online(cpu)) 1238 if (!cpu_online(cpu))
1239 return 0; 1239 return 0;
1240 1240
1241 /* we may be lax here as interrupts are off. Nonetheless 1241 /* we may be lax here as interrupts are off. Nonetheless
1242 * we need to grab the correct cpu policy, as to check 1242 * we need to grab the correct cpu policy, as to check
1243 * whether we really run on this CPU. 1243 * whether we really run on this CPU.
1244 */ 1244 */
1245 1245
1246 cpu_policy = cpufreq_cpu_get(cpu); 1246 cpu_policy = cpufreq_cpu_get(cpu);
1247 if (!cpu_policy) 1247 if (!cpu_policy)
1248 return -EINVAL; 1248 return -EINVAL;
1249 1249
1250 /* only handle each CPU group once */ 1250 /* only handle each CPU group once */
1251 if (unlikely(cpu_policy->cpu != cpu)) 1251 if (unlikely(cpu_policy->cpu != cpu))
1252 goto out; 1252 goto out;
1253 1253
1254 if (cpufreq_driver->suspend) { 1254 if (cpufreq_driver->suspend) {
1255 ret = cpufreq_driver->suspend(cpu_policy, pmsg); 1255 ret = cpufreq_driver->suspend(cpu_policy, pmsg);
1256 if (ret) { 1256 if (ret) {
1257 printk(KERN_ERR "cpufreq: suspend failed in ->suspend " 1257 printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1258 "step on CPU %u\n", cpu_policy->cpu); 1258 "step on CPU %u\n", cpu_policy->cpu);
1259 goto out; 1259 goto out;
1260 } 1260 }
1261 } 1261 }
1262 1262
1263 if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS) 1263 if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
1264 goto out; 1264 goto out;
1265 1265
1266 if (cpufreq_driver->get) 1266 if (cpufreq_driver->get)
1267 cur_freq = cpufreq_driver->get(cpu_policy->cpu); 1267 cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1268 1268
1269 if (!cur_freq || !cpu_policy->cur) { 1269 if (!cur_freq || !cpu_policy->cur) {
1270 printk(KERN_ERR "cpufreq: suspend failed to assert current " 1270 printk(KERN_ERR "cpufreq: suspend failed to assert current "
1271 "frequency is what timing core thinks it is.\n"); 1271 "frequency is what timing core thinks it is.\n");
1272 goto out; 1272 goto out;
1273 } 1273 }
1274 1274
1275 if (unlikely(cur_freq != cpu_policy->cur)) { 1275 if (unlikely(cur_freq != cpu_policy->cur)) {
1276 struct cpufreq_freqs freqs; 1276 struct cpufreq_freqs freqs;
1277 1277
1278 if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) 1278 if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1279 dprintk("Warning: CPU frequency is %u, " 1279 dprintk("Warning: CPU frequency is %u, "
1280 "cpufreq assumed %u kHz.\n", 1280 "cpufreq assumed %u kHz.\n",
1281 cur_freq, cpu_policy->cur); 1281 cur_freq, cpu_policy->cur);
1282 1282
1283 freqs.cpu = cpu; 1283 freqs.cpu = cpu;
1284 freqs.old = cpu_policy->cur; 1284 freqs.old = cpu_policy->cur;
1285 freqs.new = cur_freq; 1285 freqs.new = cur_freq;
1286 1286
1287 srcu_notifier_call_chain(&cpufreq_transition_notifier_list, 1287 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
1288 CPUFREQ_SUSPENDCHANGE, &freqs); 1288 CPUFREQ_SUSPENDCHANGE, &freqs);
1289 adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs); 1289 adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
1290 1290
1291 cpu_policy->cur = cur_freq; 1291 cpu_policy->cur = cur_freq;
1292 } 1292 }
1293 1293
1294 out: 1294 out:
1295 cpufreq_cpu_put(cpu_policy); 1295 cpufreq_cpu_put(cpu_policy);
1296 return ret; 1296 return ret;
1297 } 1297 }
1298 1298
1299 /** 1299 /**
1300 * cpufreq_resume - restore proper CPU frequency handling after resume 1300 * cpufreq_resume - restore proper CPU frequency handling after resume
1301 * 1301 *
1302 * 1.) resume CPUfreq hardware support (cpufreq_driver->resume()) 1302 * 1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1303 * 2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync 1303 * 2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
1304 * 3.) schedule call cpufreq_update_policy() ASAP as interrupts are 1304 * 3.) schedule call cpufreq_update_policy() ASAP as interrupts are
1305 * restored. 1305 * restored.
1306 */ 1306 */
1307 static int cpufreq_resume(struct sys_device *sysdev) 1307 static int cpufreq_resume(struct sys_device *sysdev)
1308 { 1308 {
1309 int cpu = sysdev->id; 1309 int cpu = sysdev->id;
1310 int ret = 0; 1310 int ret = 0;
1311 struct cpufreq_policy *cpu_policy; 1311 struct cpufreq_policy *cpu_policy;
1312 1312
1313 dprintk("resuming cpu %u\n", cpu); 1313 dprintk("resuming cpu %u\n", cpu);
1314 1314
1315 if (!cpu_online(cpu)) 1315 if (!cpu_online(cpu))
1316 return 0; 1316 return 0;
1317 1317
1318 /* we may be lax here as interrupts are off. Nonetheless 1318 /* we may be lax here as interrupts are off. Nonetheless
1319 * we need to grab the correct cpu policy, as to check 1319 * we need to grab the correct cpu policy, as to check
1320 * whether we really run on this CPU. 1320 * whether we really run on this CPU.
1321 */ 1321 */
1322 1322
1323 cpu_policy = cpufreq_cpu_get(cpu); 1323 cpu_policy = cpufreq_cpu_get(cpu);
1324 if (!cpu_policy) 1324 if (!cpu_policy)
1325 return -EINVAL; 1325 return -EINVAL;
1326 1326
1327 /* only handle each CPU group once */ 1327 /* only handle each CPU group once */
1328 if (unlikely(cpu_policy->cpu != cpu)) 1328 if (unlikely(cpu_policy->cpu != cpu))
1329 goto fail; 1329 goto fail;
1330 1330
1331 if (cpufreq_driver->resume) { 1331 if (cpufreq_driver->resume) {
1332 ret = cpufreq_driver->resume(cpu_policy); 1332 ret = cpufreq_driver->resume(cpu_policy);
1333 if (ret) { 1333 if (ret) {
1334 printk(KERN_ERR "cpufreq: resume failed in ->resume " 1334 printk(KERN_ERR "cpufreq: resume failed in ->resume "
1335 "step on CPU %u\n", cpu_policy->cpu); 1335 "step on CPU %u\n", cpu_policy->cpu);
1336 goto fail; 1336 goto fail;
1337 } 1337 }
1338 } 1338 }
1339 1339
1340 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { 1340 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1341 unsigned int cur_freq = 0; 1341 unsigned int cur_freq = 0;
1342 1342
1343 if (cpufreq_driver->get) 1343 if (cpufreq_driver->get)
1344 cur_freq = cpufreq_driver->get(cpu_policy->cpu); 1344 cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1345 1345
1346 if (!cur_freq || !cpu_policy->cur) { 1346 if (!cur_freq || !cpu_policy->cur) {
1347 printk(KERN_ERR "cpufreq: resume failed to assert " 1347 printk(KERN_ERR "cpufreq: resume failed to assert "
1348 "current frequency is what timing core " 1348 "current frequency is what timing core "
1349 "thinks it is.\n"); 1349 "thinks it is.\n");
1350 goto out; 1350 goto out;
1351 } 1351 }
1352 1352
1353 if (unlikely(cur_freq != cpu_policy->cur)) { 1353 if (unlikely(cur_freq != cpu_policy->cur)) {
1354 struct cpufreq_freqs freqs; 1354 struct cpufreq_freqs freqs;
1355 1355
1356 if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) 1356 if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1357 dprintk("Warning: CPU frequency " 1357 dprintk("Warning: CPU frequency "
1358 "is %u, cpufreq assumed %u kHz.\n", 1358 "is %u, cpufreq assumed %u kHz.\n",
1359 cur_freq, cpu_policy->cur); 1359 cur_freq, cpu_policy->cur);
1360 1360
1361 freqs.cpu = cpu; 1361 freqs.cpu = cpu;
1362 freqs.old = cpu_policy->cur; 1362 freqs.old = cpu_policy->cur;
1363 freqs.new = cur_freq; 1363 freqs.new = cur_freq;
1364 1364
1365 srcu_notifier_call_chain( 1365 srcu_notifier_call_chain(
1366 &cpufreq_transition_notifier_list, 1366 &cpufreq_transition_notifier_list,
1367 CPUFREQ_RESUMECHANGE, &freqs); 1367 CPUFREQ_RESUMECHANGE, &freqs);
1368 adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs); 1368 adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
1369 1369
1370 cpu_policy->cur = cur_freq; 1370 cpu_policy->cur = cur_freq;
1371 } 1371 }
1372 } 1372 }
1373 1373
1374 out: 1374 out:
1375 schedule_work(&cpu_policy->update); 1375 schedule_work(&cpu_policy->update);
1376 fail: 1376 fail:
1377 cpufreq_cpu_put(cpu_policy); 1377 cpufreq_cpu_put(cpu_policy);
1378 return ret; 1378 return ret;
1379 } 1379 }
1380 1380
1381 static struct sysdev_driver cpufreq_sysdev_driver = { 1381 static struct sysdev_driver cpufreq_sysdev_driver = {
1382 .add = cpufreq_add_dev, 1382 .add = cpufreq_add_dev,
1383 .remove = cpufreq_remove_dev, 1383 .remove = cpufreq_remove_dev,
1384 .suspend = cpufreq_suspend, 1384 .suspend = cpufreq_suspend,
1385 .resume = cpufreq_resume, 1385 .resume = cpufreq_resume,
1386 }; 1386 };
1387 1387
1388 1388
1389 /********************************************************************* 1389 /*********************************************************************
1390 * NOTIFIER LISTS INTERFACE * 1390 * NOTIFIER LISTS INTERFACE *
1391 *********************************************************************/ 1391 *********************************************************************/
1392 1392
1393 /** 1393 /**
1394 * cpufreq_register_notifier - register a driver with cpufreq 1394 * cpufreq_register_notifier - register a driver with cpufreq
1395 * @nb: notifier function to register 1395 * @nb: notifier function to register
1396 * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER 1396 * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1397 * 1397 *
1398 * Add a driver to one of two lists: either a list of drivers that 1398 * Add a driver to one of two lists: either a list of drivers that
1399 * are notified about clock rate changes (once before and once after 1399 * are notified about clock rate changes (once before and once after
1400 * the transition), or a list of drivers that are notified about 1400 * the transition), or a list of drivers that are notified about
1401 * changes in cpufreq policy. 1401 * changes in cpufreq policy.
1402 * 1402 *
1403 * This function may sleep, and has the same return conditions as 1403 * This function may sleep, and has the same return conditions as
1404 * blocking_notifier_chain_register. 1404 * blocking_notifier_chain_register.
1405 */ 1405 */
1406 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) 1406 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1407 { 1407 {
1408 int ret; 1408 int ret;
1409 1409
1410 WARN_ON(!init_cpufreq_transition_notifier_list_called); 1410 WARN_ON(!init_cpufreq_transition_notifier_list_called);
1411 1411
1412 switch (list) { 1412 switch (list) {
1413 case CPUFREQ_TRANSITION_NOTIFIER: 1413 case CPUFREQ_TRANSITION_NOTIFIER:
1414 ret = srcu_notifier_chain_register( 1414 ret = srcu_notifier_chain_register(
1415 &cpufreq_transition_notifier_list, nb); 1415 &cpufreq_transition_notifier_list, nb);
1416 break; 1416 break;
1417 case CPUFREQ_POLICY_NOTIFIER: 1417 case CPUFREQ_POLICY_NOTIFIER:
1418 ret = blocking_notifier_chain_register( 1418 ret = blocking_notifier_chain_register(
1419 &cpufreq_policy_notifier_list, nb); 1419 &cpufreq_policy_notifier_list, nb);
1420 break; 1420 break;
1421 default: 1421 default:
1422 ret = -EINVAL; 1422 ret = -EINVAL;
1423 } 1423 }
1424 1424
1425 return ret; 1425 return ret;
1426 } 1426 }
1427 EXPORT_SYMBOL(cpufreq_register_notifier); 1427 EXPORT_SYMBOL(cpufreq_register_notifier);
1428 1428
1429 1429
1430 /** 1430 /**
1431 * cpufreq_unregister_notifier - unregister a driver with cpufreq 1431 * cpufreq_unregister_notifier - unregister a driver with cpufreq
1432 * @nb: notifier block to be unregistered 1432 * @nb: notifier block to be unregistered
1433 * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER 1433 * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1434 * 1434 *
1435 * Remove a driver from the CPU frequency notifier list. 1435 * Remove a driver from the CPU frequency notifier list.
1436 * 1436 *
1437 * This function may sleep, and has the same return conditions as 1437 * This function may sleep, and has the same return conditions as
1438 * blocking_notifier_chain_unregister. 1438 * blocking_notifier_chain_unregister.
1439 */ 1439 */
1440 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) 1440 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1441 { 1441 {
1442 int ret; 1442 int ret;
1443 1443
1444 switch (list) { 1444 switch (list) {
1445 case CPUFREQ_TRANSITION_NOTIFIER: 1445 case CPUFREQ_TRANSITION_NOTIFIER:
1446 ret = srcu_notifier_chain_unregister( 1446 ret = srcu_notifier_chain_unregister(
1447 &cpufreq_transition_notifier_list, nb); 1447 &cpufreq_transition_notifier_list, nb);
1448 break; 1448 break;
1449 case CPUFREQ_POLICY_NOTIFIER: 1449 case CPUFREQ_POLICY_NOTIFIER:
1450 ret = blocking_notifier_chain_unregister( 1450 ret = blocking_notifier_chain_unregister(
1451 &cpufreq_policy_notifier_list, nb); 1451 &cpufreq_policy_notifier_list, nb);
1452 break; 1452 break;
1453 default: 1453 default:
1454 ret = -EINVAL; 1454 ret = -EINVAL;
1455 } 1455 }
1456 1456
1457 return ret; 1457 return ret;
1458 } 1458 }
1459 EXPORT_SYMBOL(cpufreq_unregister_notifier); 1459 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1460 1460
1461 1461
1462 /********************************************************************* 1462 /*********************************************************************
1463 * GOVERNORS * 1463 * GOVERNORS *
1464 *********************************************************************/ 1464 *********************************************************************/
1465 1465
1466 1466
1467 int __cpufreq_driver_target(struct cpufreq_policy *policy, 1467 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1468 unsigned int target_freq, 1468 unsigned int target_freq,
1469 unsigned int relation) 1469 unsigned int relation)
1470 { 1470 {
1471 int retval = -EINVAL; 1471 int retval = -EINVAL;
1472 1472
1473 dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, 1473 dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1474 target_freq, relation); 1474 target_freq, relation);
1475 if (cpu_online(policy->cpu) && cpufreq_driver->target) 1475 if (cpu_online(policy->cpu) && cpufreq_driver->target)
1476 retval = cpufreq_driver->target(policy, target_freq, relation); 1476 retval = cpufreq_driver->target(policy, target_freq, relation);
1477 1477
1478 return retval; 1478 return retval;
1479 } 1479 }
1480 EXPORT_SYMBOL_GPL(__cpufreq_driver_target); 1480 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1481 1481
1482 int cpufreq_driver_target(struct cpufreq_policy *policy, 1482 int cpufreq_driver_target(struct cpufreq_policy *policy,
1483 unsigned int target_freq, 1483 unsigned int target_freq,
1484 unsigned int relation) 1484 unsigned int relation)
1485 { 1485 {
1486 int ret = -EINVAL; 1486 int ret = -EINVAL;
1487 1487
1488 policy = cpufreq_cpu_get(policy->cpu); 1488 policy = cpufreq_cpu_get(policy->cpu);
1489 if (!policy) 1489 if (!policy)
1490 goto no_policy; 1490 goto no_policy;
1491 1491
1492 if (unlikely(lock_policy_rwsem_write(policy->cpu))) 1492 if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1493 goto fail; 1493 goto fail;
1494 1494
1495 ret = __cpufreq_driver_target(policy, target_freq, relation); 1495 ret = __cpufreq_driver_target(policy, target_freq, relation);
1496 1496
1497 unlock_policy_rwsem_write(policy->cpu); 1497 unlock_policy_rwsem_write(policy->cpu);
1498 1498
1499 fail: 1499 fail:
1500 cpufreq_cpu_put(policy); 1500 cpufreq_cpu_put(policy);
1501 no_policy: 1501 no_policy:
1502 return ret; 1502 return ret;
1503 } 1503 }
1504 EXPORT_SYMBOL_GPL(cpufreq_driver_target); 1504 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1505 1505
1506 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu) 1506 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1507 { 1507 {
1508 int ret = 0; 1508 int ret = 0;
1509 1509
1510 policy = cpufreq_cpu_get(policy->cpu); 1510 policy = cpufreq_cpu_get(policy->cpu);
1511 if (!policy) 1511 if (!policy)
1512 return -EINVAL; 1512 return -EINVAL;
1513 1513
1514 if (cpu_online(cpu) && cpufreq_driver->getavg) 1514 if (cpu_online(cpu) && cpufreq_driver->getavg)
1515 ret = cpufreq_driver->getavg(policy, cpu); 1515 ret = cpufreq_driver->getavg(policy, cpu);
1516 1516
1517 cpufreq_cpu_put(policy); 1517 cpufreq_cpu_put(policy);
1518 return ret; 1518 return ret;
1519 } 1519 }
1520 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg); 1520 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1521 1521
1522 /* 1522 /*
1523 * when "event" is CPUFREQ_GOV_LIMITS 1523 * when "event" is CPUFREQ_GOV_LIMITS
1524 */ 1524 */
1525 1525
1526 static int __cpufreq_governor(struct cpufreq_policy *policy, 1526 static int __cpufreq_governor(struct cpufreq_policy *policy,
1527 unsigned int event) 1527 unsigned int event)
1528 { 1528 {
1529 int ret; 1529 int ret;
1530 1530
1531 /* Only must be defined when default governor is known to have latency 1531 /* Only must be defined when default governor is known to have latency
1532 restrictions, like e.g. conservative or ondemand. 1532 restrictions, like e.g. conservative or ondemand.
1533 That this is the case is already ensured in Kconfig 1533 That this is the case is already ensured in Kconfig
1534 */ 1534 */
1535 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE 1535 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1536 struct cpufreq_governor *gov = &cpufreq_gov_performance; 1536 struct cpufreq_governor *gov = &cpufreq_gov_performance;
1537 #else 1537 #else
1538 struct cpufreq_governor *gov = NULL; 1538 struct cpufreq_governor *gov = NULL;
1539 #endif 1539 #endif
1540 1540
1541 if (policy->governor->max_transition_latency && 1541 if (policy->governor->max_transition_latency &&
1542 policy->cpuinfo.transition_latency > 1542 policy->cpuinfo.transition_latency >
1543 policy->governor->max_transition_latency) { 1543 policy->governor->max_transition_latency) {
1544 if (!gov) 1544 if (!gov)
1545 return -EINVAL; 1545 return -EINVAL;
1546 else { 1546 else {
1547 printk(KERN_WARNING "%s governor failed, too long" 1547 printk(KERN_WARNING "%s governor failed, too long"
1548 " transition latency of HW, fallback" 1548 " transition latency of HW, fallback"
1549 " to %s governor\n", 1549 " to %s governor\n",
1550 policy->governor->name, 1550 policy->governor->name,
1551 gov->name); 1551 gov->name);
1552 policy->governor = gov; 1552 policy->governor = gov;
1553 } 1553 }
1554 } 1554 }
1555 1555
1556 if (!try_module_get(policy->governor->owner)) 1556 if (!try_module_get(policy->governor->owner))
1557 return -EINVAL; 1557 return -EINVAL;
1558 1558
1559 dprintk("__cpufreq_governor for CPU %u, event %u\n", 1559 dprintk("__cpufreq_governor for CPU %u, event %u\n",
1560 policy->cpu, event); 1560 policy->cpu, event);
1561 ret = policy->governor->governor(policy, event); 1561 ret = policy->governor->governor(policy, event);
1562 1562
1563 /* we keep one module reference alive for 1563 /* we keep one module reference alive for
1564 each CPU governed by this CPU */ 1564 each CPU governed by this CPU */
1565 if ((event != CPUFREQ_GOV_START) || ret) 1565 if ((event != CPUFREQ_GOV_START) || ret)
1566 module_put(policy->governor->owner); 1566 module_put(policy->governor->owner);
1567 if ((event == CPUFREQ_GOV_STOP) && !ret) 1567 if ((event == CPUFREQ_GOV_STOP) && !ret)
1568 module_put(policy->governor->owner); 1568 module_put(policy->governor->owner);
1569 1569
1570 return ret; 1570 return ret;
1571 } 1571 }
1572 1572
1573 1573
1574 int cpufreq_register_governor(struct cpufreq_governor *governor) 1574 int cpufreq_register_governor(struct cpufreq_governor *governor)
1575 { 1575 {
1576 int err; 1576 int err;
1577 1577
1578 if (!governor) 1578 if (!governor)
1579 return -EINVAL; 1579 return -EINVAL;
1580 1580
1581 mutex_lock(&cpufreq_governor_mutex); 1581 mutex_lock(&cpufreq_governor_mutex);
1582 1582
1583 err = -EBUSY; 1583 err = -EBUSY;
1584 if (__find_governor(governor->name) == NULL) { 1584 if (__find_governor(governor->name) == NULL) {
1585 err = 0; 1585 err = 0;
1586 list_add(&governor->governor_list, &cpufreq_governor_list); 1586 list_add(&governor->governor_list, &cpufreq_governor_list);
1587 } 1587 }
1588 1588
1589 mutex_unlock(&cpufreq_governor_mutex); 1589 mutex_unlock(&cpufreq_governor_mutex);
1590 return err; 1590 return err;
1591 } 1591 }
1592 EXPORT_SYMBOL_GPL(cpufreq_register_governor); 1592 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1593 1593
1594 1594
1595 void cpufreq_unregister_governor(struct cpufreq_governor *governor) 1595 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1596 { 1596 {
1597 if (!governor) 1597 if (!governor)
1598 return; 1598 return;
1599 1599
1600 mutex_lock(&cpufreq_governor_mutex); 1600 mutex_lock(&cpufreq_governor_mutex);
1601 list_del(&governor->governor_list); 1601 list_del(&governor->governor_list);
1602 mutex_unlock(&cpufreq_governor_mutex); 1602 mutex_unlock(&cpufreq_governor_mutex);
1603 return; 1603 return;
1604 } 1604 }
1605 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); 1605 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1606 1606
1607 1607
1608 1608
1609 /********************************************************************* 1609 /*********************************************************************
1610 * POLICY INTERFACE * 1610 * POLICY INTERFACE *
1611 *********************************************************************/ 1611 *********************************************************************/
1612 1612
1613 /** 1613 /**
1614 * cpufreq_get_policy - get the current cpufreq_policy 1614 * cpufreq_get_policy - get the current cpufreq_policy
1615 * @policy: struct cpufreq_policy into which the current cpufreq_policy 1615 * @policy: struct cpufreq_policy into which the current cpufreq_policy
1616 * is written 1616 * is written
1617 * 1617 *
1618 * Reads the current cpufreq policy. 1618 * Reads the current cpufreq policy.
1619 */ 1619 */
1620 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) 1620 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1621 { 1621 {
1622 struct cpufreq_policy *cpu_policy; 1622 struct cpufreq_policy *cpu_policy;
1623 if (!policy) 1623 if (!policy)
1624 return -EINVAL; 1624 return -EINVAL;
1625 1625
1626 cpu_policy = cpufreq_cpu_get(cpu); 1626 cpu_policy = cpufreq_cpu_get(cpu);
1627 if (!cpu_policy) 1627 if (!cpu_policy)
1628 return -EINVAL; 1628 return -EINVAL;
1629 1629
1630 memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy)); 1630 memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1631 1631
1632 cpufreq_cpu_put(cpu_policy); 1632 cpufreq_cpu_put(cpu_policy);
1633 return 0; 1633 return 0;
1634 } 1634 }
1635 EXPORT_SYMBOL(cpufreq_get_policy); 1635 EXPORT_SYMBOL(cpufreq_get_policy);
1636 1636
1637 1637
1638 /* 1638 /*
1639 * data : current policy. 1639 * data : current policy.
1640 * policy : policy to be set. 1640 * policy : policy to be set.
1641 */ 1641 */
1642 static int __cpufreq_set_policy(struct cpufreq_policy *data, 1642 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1643 struct cpufreq_policy *policy) 1643 struct cpufreq_policy *policy)
1644 { 1644 {
1645 int ret = 0; 1645 int ret = 0;
1646 1646
1647 cpufreq_debug_disable_ratelimit(); 1647 cpufreq_debug_disable_ratelimit();
1648 dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu, 1648 dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1649 policy->min, policy->max); 1649 policy->min, policy->max);
1650 1650
1651 memcpy(&policy->cpuinfo, &data->cpuinfo, 1651 memcpy(&policy->cpuinfo, &data->cpuinfo,
1652 sizeof(struct cpufreq_cpuinfo)); 1652 sizeof(struct cpufreq_cpuinfo));
1653 1653
1654 if (policy->min > data->max || policy->max < data->min) { 1654 if (policy->min > data->max || policy->max < data->min) {
1655 ret = -EINVAL; 1655 ret = -EINVAL;
1656 goto error_out; 1656 goto error_out;
1657 } 1657 }
1658 1658
1659 /* verify the cpu speed can be set within this limit */ 1659 /* verify the cpu speed can be set within this limit */
1660 ret = cpufreq_driver->verify(policy); 1660 ret = cpufreq_driver->verify(policy);
1661 if (ret) 1661 if (ret)
1662 goto error_out; 1662 goto error_out;
1663 1663
1664 /* adjust if necessary - all reasons */ 1664 /* adjust if necessary - all reasons */
1665 blocking_notifier_call_chain(&cpufreq_policy_notifier_list, 1665 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1666 CPUFREQ_ADJUST, policy); 1666 CPUFREQ_ADJUST, policy);
1667 1667
1668 /* adjust if necessary - hardware incompatibility*/ 1668 /* adjust if necessary - hardware incompatibility*/
1669 blocking_notifier_call_chain(&cpufreq_policy_notifier_list, 1669 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1670 CPUFREQ_INCOMPATIBLE, policy); 1670 CPUFREQ_INCOMPATIBLE, policy);
1671 1671
1672 /* verify the cpu speed can be set within this limit, 1672 /* verify the cpu speed can be set within this limit,
1673 which might be different to the first one */ 1673 which might be different to the first one */
1674 ret = cpufreq_driver->verify(policy); 1674 ret = cpufreq_driver->verify(policy);
1675 if (ret) 1675 if (ret)
1676 goto error_out; 1676 goto error_out;
1677 1677
1678 /* notification of the new policy */ 1678 /* notification of the new policy */
1679 blocking_notifier_call_chain(&cpufreq_policy_notifier_list, 1679 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1680 CPUFREQ_NOTIFY, policy); 1680 CPUFREQ_NOTIFY, policy);
1681 1681
1682 data->min = policy->min; 1682 data->min = policy->min;
1683 data->max = policy->max; 1683 data->max = policy->max;
1684 1684
1685 dprintk("new min and max freqs are %u - %u kHz\n", 1685 dprintk("new min and max freqs are %u - %u kHz\n",
1686 data->min, data->max); 1686 data->min, data->max);
1687 1687
1688 if (cpufreq_driver->setpolicy) { 1688 if (cpufreq_driver->setpolicy) {
1689 data->policy = policy->policy; 1689 data->policy = policy->policy;
1690 dprintk("setting range\n"); 1690 dprintk("setting range\n");
1691 ret = cpufreq_driver->setpolicy(policy); 1691 ret = cpufreq_driver->setpolicy(policy);
1692 } else { 1692 } else {
1693 if (policy->governor != data->governor) { 1693 if (policy->governor != data->governor) {
1694 /* save old, working values */ 1694 /* save old, working values */
1695 struct cpufreq_governor *old_gov = data->governor; 1695 struct cpufreq_governor *old_gov = data->governor;
1696 1696
1697 dprintk("governor switch\n"); 1697 dprintk("governor switch\n");
1698 1698
1699 /* end old governor */ 1699 /* end old governor */
1700 if (data->governor) 1700 if (data->governor)
1701 __cpufreq_governor(data, CPUFREQ_GOV_STOP); 1701 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1702 1702
1703 /* start new governor */ 1703 /* start new governor */
1704 data->governor = policy->governor; 1704 data->governor = policy->governor;
1705 if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { 1705 if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1706 /* new governor failed, so re-start old one */ 1706 /* new governor failed, so re-start old one */
1707 dprintk("starting governor %s failed\n", 1707 dprintk("starting governor %s failed\n",
1708 data->governor->name); 1708 data->governor->name);
1709 if (old_gov) { 1709 if (old_gov) {
1710 data->governor = old_gov; 1710 data->governor = old_gov;
1711 __cpufreq_governor(data, 1711 __cpufreq_governor(data,
1712 CPUFREQ_GOV_START); 1712 CPUFREQ_GOV_START);
1713 } 1713 }
1714 ret = -EINVAL; 1714 ret = -EINVAL;
1715 goto error_out; 1715 goto error_out;
1716 } 1716 }
1717 /* might be a policy change, too, so fall through */ 1717 /* might be a policy change, too, so fall through */
1718 } 1718 }
1719 dprintk("governor: change or update limits\n"); 1719 dprintk("governor: change or update limits\n");
1720 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); 1720 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1721 } 1721 }
1722 1722
1723 error_out: 1723 error_out:
1724 cpufreq_debug_enable_ratelimit(); 1724 cpufreq_debug_enable_ratelimit();
1725 return ret; 1725 return ret;
1726 } 1726 }
1727 1727
1728 /** 1728 /**
1729 * cpufreq_update_policy - re-evaluate an existing cpufreq policy 1729 * cpufreq_update_policy - re-evaluate an existing cpufreq policy
1730 * @cpu: CPU which shall be re-evaluated 1730 * @cpu: CPU which shall be re-evaluated
1731 * 1731 *
1732 * Usefull for policy notifiers which have different necessities 1732 * Usefull for policy notifiers which have different necessities
1733 * at different times. 1733 * at different times.
1734 */ 1734 */
1735 int cpufreq_update_policy(unsigned int cpu) 1735 int cpufreq_update_policy(unsigned int cpu)
1736 { 1736 {
1737 struct cpufreq_policy *data = cpufreq_cpu_get(cpu); 1737 struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1738 struct cpufreq_policy policy; 1738 struct cpufreq_policy policy;
1739 int ret; 1739 int ret;
1740 1740
1741 if (!data) { 1741 if (!data) {
1742 ret = -ENODEV; 1742 ret = -ENODEV;
1743 goto no_policy; 1743 goto no_policy;
1744 } 1744 }
1745 1745
1746 if (unlikely(lock_policy_rwsem_write(cpu))) { 1746 if (unlikely(lock_policy_rwsem_write(cpu))) {
1747 ret = -EINVAL; 1747 ret = -EINVAL;
1748 goto fail; 1748 goto fail;
1749 } 1749 }
1750 1750
1751 dprintk("updating policy for CPU %u\n", cpu); 1751 dprintk("updating policy for CPU %u\n", cpu);
1752 memcpy(&policy, data, sizeof(struct cpufreq_policy)); 1752 memcpy(&policy, data, sizeof(struct cpufreq_policy));
1753 policy.min = data->user_policy.min; 1753 policy.min = data->user_policy.min;
1754 policy.max = data->user_policy.max; 1754 policy.max = data->user_policy.max;
1755 policy.policy = data->user_policy.policy; 1755 policy.policy = data->user_policy.policy;
1756 policy.governor = data->user_policy.governor; 1756 policy.governor = data->user_policy.governor;
1757 1757
1758 /* BIOS might change freq behind our back 1758 /* BIOS might change freq behind our back
1759 -> ask driver for current freq and notify governors about a change */ 1759 -> ask driver for current freq and notify governors about a change */
1760 if (cpufreq_driver->get) { 1760 if (cpufreq_driver->get) {
1761 policy.cur = cpufreq_driver->get(cpu); 1761 policy.cur = cpufreq_driver->get(cpu);
1762 if (!data->cur) { 1762 if (!data->cur) {
1763 dprintk("Driver did not initialize current freq"); 1763 dprintk("Driver did not initialize current freq");
1764 data->cur = policy.cur; 1764 data->cur = policy.cur;
1765 } else { 1765 } else {
1766 if (data->cur != policy.cur) 1766 if (data->cur != policy.cur)
1767 cpufreq_out_of_sync(cpu, data->cur, 1767 cpufreq_out_of_sync(cpu, data->cur,
1768 policy.cur); 1768 policy.cur);
1769 } 1769 }
1770 } 1770 }
1771 1771
1772 ret = __cpufreq_set_policy(data, &policy); 1772 ret = __cpufreq_set_policy(data, &policy);
1773 1773
1774 unlock_policy_rwsem_write(cpu); 1774 unlock_policy_rwsem_write(cpu);
1775 1775
1776 fail: 1776 fail:
1777 cpufreq_cpu_put(data); 1777 cpufreq_cpu_put(data);
1778 no_policy: 1778 no_policy:
1779 return ret; 1779 return ret;
1780 } 1780 }
1781 EXPORT_SYMBOL(cpufreq_update_policy); 1781 EXPORT_SYMBOL(cpufreq_update_policy);
1782 1782
1783 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, 1783 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1784 unsigned long action, void *hcpu) 1784 unsigned long action, void *hcpu)
1785 { 1785 {
1786 unsigned int cpu = (unsigned long)hcpu; 1786 unsigned int cpu = (unsigned long)hcpu;
1787 struct sys_device *sys_dev; 1787 struct sys_device *sys_dev;
1788 1788
1789 sys_dev = get_cpu_sysdev(cpu); 1789 sys_dev = get_cpu_sysdev(cpu);
1790 if (sys_dev) { 1790 if (sys_dev) {
1791 switch (action) { 1791 switch (action) {
1792 case CPU_ONLINE: 1792 case CPU_ONLINE:
1793 case CPU_ONLINE_FROZEN: 1793 case CPU_ONLINE_FROZEN:
1794 cpufreq_add_dev(sys_dev); 1794 cpufreq_add_dev(sys_dev);
1795 break; 1795 break;
1796 case CPU_DOWN_PREPARE: 1796 case CPU_DOWN_PREPARE:
1797 case CPU_DOWN_PREPARE_FROZEN: 1797 case CPU_DOWN_PREPARE_FROZEN:
1798 if (unlikely(lock_policy_rwsem_write(cpu))) 1798 if (unlikely(lock_policy_rwsem_write(cpu)))
1799 BUG(); 1799 BUG();
1800 1800
1801 __cpufreq_remove_dev(sys_dev); 1801 __cpufreq_remove_dev(sys_dev);
1802 break; 1802 break;
1803 case CPU_DOWN_FAILED: 1803 case CPU_DOWN_FAILED:
1804 case CPU_DOWN_FAILED_FROZEN: 1804 case CPU_DOWN_FAILED_FROZEN:
1805 cpufreq_add_dev(sys_dev); 1805 cpufreq_add_dev(sys_dev);
1806 break; 1806 break;
1807 } 1807 }
1808 } 1808 }
1809 return NOTIFY_OK; 1809 return NOTIFY_OK;
1810 } 1810 }
1811 1811
1812 static struct notifier_block __refdata cpufreq_cpu_notifier = 1812 static struct notifier_block __refdata cpufreq_cpu_notifier =
1813 { 1813 {
1814 .notifier_call = cpufreq_cpu_callback, 1814 .notifier_call = cpufreq_cpu_callback,
1815 }; 1815 };
1816 1816
1817 /********************************************************************* 1817 /*********************************************************************
1818 * REGISTER / UNREGISTER CPUFREQ DRIVER * 1818 * REGISTER / UNREGISTER CPUFREQ DRIVER *
1819 *********************************************************************/ 1819 *********************************************************************/
1820 1820
1821 /** 1821 /**
1822 * cpufreq_register_driver - register a CPU Frequency driver 1822 * cpufreq_register_driver - register a CPU Frequency driver
1823 * @driver_data: A struct cpufreq_driver containing the values# 1823 * @driver_data: A struct cpufreq_driver containing the values#
1824 * submitted by the CPU Frequency driver. 1824 * submitted by the CPU Frequency driver.
1825 * 1825 *
1826 * Registers a CPU Frequency driver to this core code. This code 1826 * Registers a CPU Frequency driver to this core code. This code
1827 * returns zero on success, -EBUSY when another driver got here first 1827 * returns zero on success, -EBUSY when another driver got here first
1828 * (and isn't unregistered in the meantime). 1828 * (and isn't unregistered in the meantime).
1829 * 1829 *
1830 */ 1830 */
1831 int cpufreq_register_driver(struct cpufreq_driver *driver_data) 1831 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1832 { 1832 {
1833 unsigned long flags; 1833 unsigned long flags;
1834 int ret; 1834 int ret;
1835 1835
1836 if (!driver_data || !driver_data->verify || !driver_data->init || 1836 if (!driver_data || !driver_data->verify || !driver_data->init ||
1837 ((!driver_data->setpolicy) && (!driver_data->target))) 1837 ((!driver_data->setpolicy) && (!driver_data->target)))
1838 return -EINVAL; 1838 return -EINVAL;
1839 1839
1840 dprintk("trying to register driver %s\n", driver_data->name); 1840 dprintk("trying to register driver %s\n", driver_data->name);
1841 1841
1842 if (driver_data->setpolicy) 1842 if (driver_data->setpolicy)
1843 driver_data->flags |= CPUFREQ_CONST_LOOPS; 1843 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1844 1844
1845 spin_lock_irqsave(&cpufreq_driver_lock, flags); 1845 spin_lock_irqsave(&cpufreq_driver_lock, flags);
1846 if (cpufreq_driver) { 1846 if (cpufreq_driver) {
1847 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1847 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1848 return -EBUSY; 1848 return -EBUSY;
1849 } 1849 }
1850 cpufreq_driver = driver_data; 1850 cpufreq_driver = driver_data;
1851 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1851 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1852 1852
1853 ret = sysdev_driver_register(&cpu_sysdev_class, 1853 ret = sysdev_driver_register(&cpu_sysdev_class,
1854 &cpufreq_sysdev_driver); 1854 &cpufreq_sysdev_driver);
1855 1855
1856 if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) { 1856 if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1857 int i; 1857 int i;
1858 ret = -ENODEV; 1858 ret = -ENODEV;
1859 1859
1860 /* check for at least one working CPU */ 1860 /* check for at least one working CPU */
1861 for (i = 0; i < nr_cpu_ids; i++) 1861 for (i = 0; i < nr_cpu_ids; i++)
1862 if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) { 1862 if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1863 ret = 0; 1863 ret = 0;
1864 break; 1864 break;
1865 } 1865 }
1866 1866
1867 /* if all ->init() calls failed, unregister */ 1867 /* if all ->init() calls failed, unregister */
1868 if (ret) { 1868 if (ret) {
1869 dprintk("no CPU initialized for driver %s\n", 1869 dprintk("no CPU initialized for driver %s\n",
1870 driver_data->name); 1870 driver_data->name);
1871 sysdev_driver_unregister(&cpu_sysdev_class, 1871 sysdev_driver_unregister(&cpu_sysdev_class,
1872 &cpufreq_sysdev_driver); 1872 &cpufreq_sysdev_driver);
1873 1873
1874 spin_lock_irqsave(&cpufreq_driver_lock, flags); 1874 spin_lock_irqsave(&cpufreq_driver_lock, flags);
1875 cpufreq_driver = NULL; 1875 cpufreq_driver = NULL;
1876 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1876 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1877 } 1877 }
1878 } 1878 }
1879 1879
1880 if (!ret) { 1880 if (!ret) {
1881 register_hotcpu_notifier(&cpufreq_cpu_notifier); 1881 register_hotcpu_notifier(&cpufreq_cpu_notifier);
1882 dprintk("driver %s up and running\n", driver_data->name); 1882 dprintk("driver %s up and running\n", driver_data->name);
1883 cpufreq_debug_enable_ratelimit(); 1883 cpufreq_debug_enable_ratelimit();
1884 } 1884 }
1885 1885
1886 return ret; 1886 return ret;
1887 } 1887 }
1888 EXPORT_SYMBOL_GPL(cpufreq_register_driver); 1888 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1889 1889
1890 1890
1891 /** 1891 /**
1892 * cpufreq_unregister_driver - unregister the current CPUFreq driver 1892 * cpufreq_unregister_driver - unregister the current CPUFreq driver
1893 * 1893 *
1894 * Unregister the current CPUFreq driver. Only call this if you have 1894 * Unregister the current CPUFreq driver. Only call this if you have
1895 * the right to do so, i.e. if you have succeeded in initialising before! 1895 * the right to do so, i.e. if you have succeeded in initialising before!
1896 * Returns zero if successful, and -EINVAL if the cpufreq_driver is 1896 * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1897 * currently not initialised. 1897 * currently not initialised.
1898 */ 1898 */
1899 int cpufreq_unregister_driver(struct cpufreq_driver *driver) 1899 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1900 { 1900 {
1901 unsigned long flags; 1901 unsigned long flags;
1902 1902
1903 cpufreq_debug_disable_ratelimit(); 1903 cpufreq_debug_disable_ratelimit();
1904 1904
1905 if (!cpufreq_driver || (driver != cpufreq_driver)) { 1905 if (!cpufreq_driver || (driver != cpufreq_driver)) {
1906 cpufreq_debug_enable_ratelimit(); 1906 cpufreq_debug_enable_ratelimit();
1907 return -EINVAL; 1907 return -EINVAL;
1908 } 1908 }
1909 1909
1910 dprintk("unregistering driver %s\n", driver->name); 1910 dprintk("unregistering driver %s\n", driver->name);
1911 1911
1912 sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); 1912 sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1913 unregister_hotcpu_notifier(&cpufreq_cpu_notifier); 1913 unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1914 1914
1915 spin_lock_irqsave(&cpufreq_driver_lock, flags); 1915 spin_lock_irqsave(&cpufreq_driver_lock, flags);
1916 cpufreq_driver = NULL; 1916 cpufreq_driver = NULL;
1917 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1917 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1918 1918
1919 return 0; 1919 return 0;
1920 } 1920 }
1921 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); 1921 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1922 1922
1923 static int __init cpufreq_core_init(void) 1923 static int __init cpufreq_core_init(void)
1924 { 1924 {
1925 int cpu; 1925 int cpu;
1926 1926
1927 for_each_possible_cpu(cpu) { 1927 for_each_possible_cpu(cpu) {
1928 per_cpu(policy_cpu, cpu) = -1; 1928 per_cpu(policy_cpu, cpu) = -1;
1929 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); 1929 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1930 } 1930 }
1931 return 0; 1931 return 0;
1932 } 1932 }
1933 1933
1934 core_initcall(cpufreq_core_init); 1934 core_initcall(cpufreq_core_init);
1935 1935
kernel/sched_cpupri.c
1 /* 1 /*
2 * kernel/sched_cpupri.c 2 * kernel/sched_cpupri.c
3 * 3 *
4 * CPU priority management 4 * CPU priority management
5 * 5 *
6 * Copyright (C) 2007-2008 Novell 6 * Copyright (C) 2007-2008 Novell
7 * 7 *
8 * Author: Gregory Haskins <ghaskins@novell.com> 8 * Author: Gregory Haskins <ghaskins@novell.com>
9 * 9 *
10 * This code tracks the priority of each CPU so that global migration 10 * This code tracks the priority of each CPU so that global migration
11 * decisions are easy to calculate. Each CPU can be in a state as follows: 11 * decisions are easy to calculate. Each CPU can be in a state as follows:
12 * 12 *
13 * (INVALID), IDLE, NORMAL, RT1, ... RT99 13 * (INVALID), IDLE, NORMAL, RT1, ... RT99
14 * 14 *
15 * going from the lowest priority to the highest. CPUs in the INVALID state 15 * going from the lowest priority to the highest. CPUs in the INVALID state
16 * are not eligible for routing. The system maintains this state with 16 * are not eligible for routing. The system maintains this state with
17 * a 2 dimensional bitmap (the first for priority class, the second for cpus 17 * a 2 dimensional bitmap (the first for priority class, the second for cpus
18 * in that class). Therefore a typical application without affinity 18 * in that class). Therefore a typical application without affinity
19 * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit 19 * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
20 * searches). For tasks with affinity restrictions, the algorithm has a 20 * searches). For tasks with affinity restrictions, the algorithm has a
21 * worst case complexity of O(min(102, nr_domcpus)), though the scenario that 21 * worst case complexity of O(min(102, nr_domcpus)), though the scenario that
22 * yields the worst case search is fairly contrived. 22 * yields the worst case search is fairly contrived.
23 * 23 *
24 * This program is free software; you can redistribute it and/or 24 * This program is free software; you can redistribute it and/or
25 * modify it under the terms of the GNU General Public License 25 * modify it under the terms of the GNU General Public License
26 * as published by the Free Software Foundation; version 2 26 * as published by the Free Software Foundation; version 2
27 * of the License. 27 * of the License.
28 */ 28 */
29 29
30 #include "sched_cpupri.h" 30 #include "sched_cpupri.h"
31 31
32 /* Convert between a 140 based task->prio, and our 102 based cpupri */ 32 /* Convert between a 140 based task->prio, and our 102 based cpupri */
33 static int convert_prio(int prio) 33 static int convert_prio(int prio)
34 { 34 {
35 int cpupri; 35 int cpupri;
36 36
37 if (prio == CPUPRI_INVALID) 37 if (prio == CPUPRI_INVALID)
38 cpupri = CPUPRI_INVALID; 38 cpupri = CPUPRI_INVALID;
39 else if (prio == MAX_PRIO) 39 else if (prio == MAX_PRIO)
40 cpupri = CPUPRI_IDLE; 40 cpupri = CPUPRI_IDLE;
41 else if (prio >= MAX_RT_PRIO) 41 else if (prio >= MAX_RT_PRIO)
42 cpupri = CPUPRI_NORMAL; 42 cpupri = CPUPRI_NORMAL;
43 else 43 else
44 cpupri = MAX_RT_PRIO - prio + 1; 44 cpupri = MAX_RT_PRIO - prio + 1;
45 45
46 return cpupri; 46 return cpupri;
47 } 47 }
48 48
49 #define for_each_cpupri_active(array, idx) \ 49 #define for_each_cpupri_active(array, idx) \
50 for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \ 50 for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \
51 idx < CPUPRI_NR_PRIORITIES; \ 51 idx < CPUPRI_NR_PRIORITIES; \
52 idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1)) 52 idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1))
53 53
54 /** 54 /**
55 * cpupri_find - find the best (lowest-pri) CPU in the system 55 * cpupri_find - find the best (lowest-pri) CPU in the system
56 * @cp: The cpupri context 56 * @cp: The cpupri context
57 * @p: The task 57 * @p: The task
58 * @lowest_mask: A mask to fill in with selected CPUs (or NULL) 58 * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
59 * 59 *
60 * Note: This function returns the recommended CPUs as calculated during the 60 * Note: This function returns the recommended CPUs as calculated during the
61 * current invokation. By the time the call returns, the CPUs may have in 61 * current invokation. By the time the call returns, the CPUs may have in
62 * fact changed priorities any number of times. While not ideal, it is not 62 * fact changed priorities any number of times. While not ideal, it is not
63 * an issue of correctness since the normal rebalancer logic will correct 63 * an issue of correctness since the normal rebalancer logic will correct
64 * any discrepancies created by racing against the uncertainty of the current 64 * any discrepancies created by racing against the uncertainty of the current
65 * priority configuration. 65 * priority configuration.
66 * 66 *
67 * Returns: (int)bool - CPUs were found 67 * Returns: (int)bool - CPUs were found
68 */ 68 */
69 int cpupri_find(struct cpupri *cp, struct task_struct *p, 69 int cpupri_find(struct cpupri *cp, struct task_struct *p,
70 struct cpumask *lowest_mask) 70 struct cpumask *lowest_mask)
71 { 71 {
72 int idx = 0; 72 int idx = 0;
73 int task_pri = convert_prio(p->prio); 73 int task_pri = convert_prio(p->prio);
74 74
75 for_each_cpupri_active(cp->pri_active, idx) { 75 for_each_cpupri_active(cp->pri_active, idx) {
76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; 76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
77 77
78 if (idx >= task_pri) 78 if (idx >= task_pri)
79 break; 79 break;
80 80
81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
82 continue; 82 continue;
83 83
84 if (lowest_mask) 84 if (lowest_mask)
85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); 85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
86 return 1; 86 return 1;
87 } 87 }
88 88
89 return 0; 89 return 0;
90 } 90 }
91 91
92 /** 92 /**
93 * cpupri_set - update the cpu priority setting 93 * cpupri_set - update the cpu priority setting
94 * @cp: The cpupri context 94 * @cp: The cpupri context
95 * @cpu: The target cpu 95 * @cpu: The target cpu
96 * @pri: The priority (INVALID-RT99) to assign to this CPU 96 * @pri: The priority (INVALID-RT99) to assign to this CPU
97 * 97 *
98 * Note: Assumes cpu_rq(cpu)->lock is locked 98 * Note: Assumes cpu_rq(cpu)->lock is locked
99 * 99 *
100 * Returns: (void) 100 * Returns: (void)
101 */ 101 */
102 void cpupri_set(struct cpupri *cp, int cpu, int newpri) 102 void cpupri_set(struct cpupri *cp, int cpu, int newpri)
103 { 103 {
104 int *currpri = &cp->cpu_to_pri[cpu]; 104 int *currpri = &cp->cpu_to_pri[cpu];
105 int oldpri = *currpri; 105 int oldpri = *currpri;
106 unsigned long flags; 106 unsigned long flags;
107 107
108 newpri = convert_prio(newpri); 108 newpri = convert_prio(newpri);
109 109
110 BUG_ON(newpri >= CPUPRI_NR_PRIORITIES); 110 BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);
111 111
112 if (newpri == oldpri) 112 if (newpri == oldpri)
113 return; 113 return;
114 114
115 /* 115 /*
116 * If the cpu was currently mapped to a different value, we 116 * If the cpu was currently mapped to a different value, we
117 * first need to unmap the old value 117 * first need to unmap the old value
118 */ 118 */
119 if (likely(oldpri != CPUPRI_INVALID)) { 119 if (likely(oldpri != CPUPRI_INVALID)) {
120 struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; 120 struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
121 121
122 spin_lock_irqsave(&vec->lock, flags); 122 spin_lock_irqsave(&vec->lock, flags);
123 123
124 vec->count--; 124 vec->count--;
125 if (!vec->count) 125 if (!vec->count)
126 clear_bit(oldpri, cp->pri_active); 126 clear_bit(oldpri, cp->pri_active);
127 cpumask_clear_cpu(cpu, vec->mask); 127 cpumask_clear_cpu(cpu, vec->mask);
128 128
129 spin_unlock_irqrestore(&vec->lock, flags); 129 spin_unlock_irqrestore(&vec->lock, flags);
130 } 130 }
131 131
132 if (likely(newpri != CPUPRI_INVALID)) { 132 if (likely(newpri != CPUPRI_INVALID)) {
133 struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; 133 struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
134 134
135 spin_lock_irqsave(&vec->lock, flags); 135 spin_lock_irqsave(&vec->lock, flags);
136 136
137 cpumask_set_cpu(cpu, vec->mask); 137 cpumask_set_cpu(cpu, vec->mask);
138 vec->count++; 138 vec->count++;
139 if (vec->count == 1) 139 if (vec->count == 1)
140 set_bit(newpri, cp->pri_active); 140 set_bit(newpri, cp->pri_active);
141 141
142 spin_unlock_irqrestore(&vec->lock, flags); 142 spin_unlock_irqrestore(&vec->lock, flags);
143 } 143 }
144 144
145 *currpri = newpri; 145 *currpri = newpri;
146 } 146 }
147 147
148 /** 148 /**
149 * cpupri_init - initialize the cpupri structure 149 * cpupri_init - initialize the cpupri structure
150 * @cp: The cpupri context 150 * @cp: The cpupri context
151 * @bootmem: true if allocations need to use bootmem 151 * @bootmem: true if allocations need to use bootmem
152 * 152 *
153 * Returns: -ENOMEM if memory fails. 153 * Returns: -ENOMEM if memory fails.
154 */ 154 */
155 int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) 155 int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
156 { 156 {
157 int i; 157 int i;
158 158
159 memset(cp, 0, sizeof(*cp)); 159 memset(cp, 0, sizeof(*cp));
160 160
161 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { 161 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
162 struct cpupri_vec *vec = &cp->pri_to_cpu[i]; 162 struct cpupri_vec *vec = &cp->pri_to_cpu[i];
163 163
164 spin_lock_init(&vec->lock); 164 spin_lock_init(&vec->lock);
165 vec->count = 0; 165 vec->count = 0;
166 if (bootmem) 166 if (bootmem)
167 alloc_bootmem_cpumask_var(&vec->mask); 167 alloc_bootmem_cpumask_var(&vec->mask);
168 else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL)) 168 else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
169 goto cleanup; 169 goto cleanup;
170 } 170 }
171 171
172 for_each_possible_cpu(i) 172 for_each_possible_cpu(i)
173 cp->cpu_to_pri[i] = CPUPRI_INVALID; 173 cp->cpu_to_pri[i] = CPUPRI_INVALID;
174 return 0; 174 return 0;
175 175
176 cleanup: 176 cleanup:
177 for (i--; i >= 0; i--) 177 for (i--; i >= 0; i--)
178 free_cpumask_var(cp->pri_to_cpu[i].mask); 178 free_cpumask_var(cp->pri_to_cpu[i].mask);
179 return -ENOMEM; 179 return -ENOMEM;
180 } 180 }
181 181
182 /** 182 /**
183 * cpupri_cleanup - clean up the cpupri structure 183 * cpupri_cleanup - clean up the cpupri structure
184 * @cp: The cpupri context 184 * @cp: The cpupri context
185 */ 185 */
186 void cpupri_cleanup(struct cpupri *cp) 186 void cpupri_cleanup(struct cpupri *cp)
187 { 187 {
188 int i; 188 int i;
189 189
190 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) 190 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
191 free_cpumask_var(cp->pri_to_cpu[i].mask); 191 free_cpumask_var(cp->pri_to_cpu[i].mask);
192 } 192 }
193 193
1 /* 1 /*
2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR 2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
3 * policies) 3 * policies)
4 */ 4 */
5 5
6 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 6 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
7 { 7 {
8 return container_of(rt_se, struct task_struct, rt); 8 return container_of(rt_se, struct task_struct, rt);
9 } 9 }
10 10
11 #ifdef CONFIG_RT_GROUP_SCHED 11 #ifdef CONFIG_RT_GROUP_SCHED
12 12
13 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 13 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
14 { 14 {
15 return rt_rq->rq; 15 return rt_rq->rq;
16 } 16 }
17 17
18 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 18 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
19 { 19 {
20 return rt_se->rt_rq; 20 return rt_se->rt_rq;
21 } 21 }
22 22
23 #else /* CONFIG_RT_GROUP_SCHED */ 23 #else /* CONFIG_RT_GROUP_SCHED */
24 24
25 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 25 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
26 { 26 {
27 return container_of(rt_rq, struct rq, rt); 27 return container_of(rt_rq, struct rq, rt);
28 } 28 }
29 29
30 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 30 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
31 { 31 {
32 struct task_struct *p = rt_task_of(rt_se); 32 struct task_struct *p = rt_task_of(rt_se);
33 struct rq *rq = task_rq(p); 33 struct rq *rq = task_rq(p);
34 34
35 return &rq->rt; 35 return &rq->rt;
36 } 36 }
37 37
38 #endif /* CONFIG_RT_GROUP_SCHED */ 38 #endif /* CONFIG_RT_GROUP_SCHED */
39 39
40 #ifdef CONFIG_SMP 40 #ifdef CONFIG_SMP
41 41
42 static inline int rt_overloaded(struct rq *rq) 42 static inline int rt_overloaded(struct rq *rq)
43 { 43 {
44 return atomic_read(&rq->rd->rto_count); 44 return atomic_read(&rq->rd->rto_count);
45 } 45 }
46 46
47 static inline void rt_set_overload(struct rq *rq) 47 static inline void rt_set_overload(struct rq *rq)
48 { 48 {
49 if (!rq->online) 49 if (!rq->online)
50 return; 50 return;
51 51
52 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); 52 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
53 /* 53 /*
54 * Make sure the mask is visible before we set 54 * Make sure the mask is visible before we set
55 * the overload count. That is checked to determine 55 * the overload count. That is checked to determine
56 * if we should look at the mask. It would be a shame 56 * if we should look at the mask. It would be a shame
57 * if we looked at the mask, but the mask was not 57 * if we looked at the mask, but the mask was not
58 * updated yet. 58 * updated yet.
59 */ 59 */
60 wmb(); 60 wmb();
61 atomic_inc(&rq->rd->rto_count); 61 atomic_inc(&rq->rd->rto_count);
62 } 62 }
63 63
64 static inline void rt_clear_overload(struct rq *rq) 64 static inline void rt_clear_overload(struct rq *rq)
65 { 65 {
66 if (!rq->online) 66 if (!rq->online)
67 return; 67 return;
68 68
69 /* the order here really doesn't matter */ 69 /* the order here really doesn't matter */
70 atomic_dec(&rq->rd->rto_count); 70 atomic_dec(&rq->rd->rto_count);
71 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); 71 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
72 } 72 }
73 73
74 static void update_rt_migration(struct rt_rq *rt_rq) 74 static void update_rt_migration(struct rt_rq *rt_rq)
75 { 75 {
76 if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) { 76 if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {
77 if (!rt_rq->overloaded) { 77 if (!rt_rq->overloaded) {
78 rt_set_overload(rq_of_rt_rq(rt_rq)); 78 rt_set_overload(rq_of_rt_rq(rt_rq));
79 rt_rq->overloaded = 1; 79 rt_rq->overloaded = 1;
80 } 80 }
81 } else if (rt_rq->overloaded) { 81 } else if (rt_rq->overloaded) {
82 rt_clear_overload(rq_of_rt_rq(rt_rq)); 82 rt_clear_overload(rq_of_rt_rq(rt_rq));
83 rt_rq->overloaded = 0; 83 rt_rq->overloaded = 0;
84 } 84 }
85 } 85 }
86 86
87 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 87 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
88 { 88 {
89 if (rt_se->nr_cpus_allowed > 1) 89 if (rt_se->nr_cpus_allowed > 1)
90 rt_rq->rt_nr_migratory++; 90 rt_rq->rt_nr_migratory++;
91 91
92 update_rt_migration(rt_rq); 92 update_rt_migration(rt_rq);
93 } 93 }
94 94
95 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 95 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
96 { 96 {
97 if (rt_se->nr_cpus_allowed > 1) 97 if (rt_se->nr_cpus_allowed > 1)
98 rt_rq->rt_nr_migratory--; 98 rt_rq->rt_nr_migratory--;
99 99
100 update_rt_migration(rt_rq); 100 update_rt_migration(rt_rq);
101 } 101 }
102 102
103 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 103 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
104 { 104 {
105 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 105 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
106 plist_node_init(&p->pushable_tasks, p->prio); 106 plist_node_init(&p->pushable_tasks, p->prio);
107 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); 107 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
108 } 108 }
109 109
110 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 110 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
111 { 111 {
112 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 112 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
113 } 113 }
114 114
115 #else 115 #else
116 116
117 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 117 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
118 { 118 {
119 } 119 }
120 120
121 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 121 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
122 { 122 {
123 } 123 }
124 124
125 static inline 125 static inline
126 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 126 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
127 { 127 {
128 } 128 }
129 129
130 static inline 130 static inline
131 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 131 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
132 { 132 {
133 } 133 }
134 134
135 #endif /* CONFIG_SMP */ 135 #endif /* CONFIG_SMP */
136 136
137 static inline int on_rt_rq(struct sched_rt_entity *rt_se) 137 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
138 { 138 {
139 return !list_empty(&rt_se->run_list); 139 return !list_empty(&rt_se->run_list);
140 } 140 }
141 141
142 #ifdef CONFIG_RT_GROUP_SCHED 142 #ifdef CONFIG_RT_GROUP_SCHED
143 143
144 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 144 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
145 { 145 {
146 if (!rt_rq->tg) 146 if (!rt_rq->tg)
147 return RUNTIME_INF; 147 return RUNTIME_INF;
148 148
149 return rt_rq->rt_runtime; 149 return rt_rq->rt_runtime;
150 } 150 }
151 151
152 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 152 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
153 { 153 {
154 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 154 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
155 } 155 }
156 156
157 #define for_each_leaf_rt_rq(rt_rq, rq) \ 157 #define for_each_leaf_rt_rq(rt_rq, rq) \
158 list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) 158 list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
159 159
160 #define for_each_sched_rt_entity(rt_se) \ 160 #define for_each_sched_rt_entity(rt_se) \
161 for (; rt_se; rt_se = rt_se->parent) 161 for (; rt_se; rt_se = rt_se->parent)
162 162
163 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 163 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
164 { 164 {
165 return rt_se->my_q; 165 return rt_se->my_q;
166 } 166 }
167 167
168 static void enqueue_rt_entity(struct sched_rt_entity *rt_se); 168 static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
169 static void dequeue_rt_entity(struct sched_rt_entity *rt_se); 169 static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
170 170
171 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 171 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
172 { 172 {
173 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 173 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
174 struct sched_rt_entity *rt_se = rt_rq->rt_se; 174 struct sched_rt_entity *rt_se = rt_rq->rt_se;
175 175
176 if (rt_rq->rt_nr_running) { 176 if (rt_rq->rt_nr_running) {
177 if (rt_se && !on_rt_rq(rt_se)) 177 if (rt_se && !on_rt_rq(rt_se))
178 enqueue_rt_entity(rt_se); 178 enqueue_rt_entity(rt_se);
179 if (rt_rq->highest_prio.curr < curr->prio) 179 if (rt_rq->highest_prio.curr < curr->prio)
180 resched_task(curr); 180 resched_task(curr);
181 } 181 }
182 } 182 }
183 183
184 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 184 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
185 { 185 {
186 struct sched_rt_entity *rt_se = rt_rq->rt_se; 186 struct sched_rt_entity *rt_se = rt_rq->rt_se;
187 187
188 if (rt_se && on_rt_rq(rt_se)) 188 if (rt_se && on_rt_rq(rt_se))
189 dequeue_rt_entity(rt_se); 189 dequeue_rt_entity(rt_se);
190 } 190 }
191 191
192 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 192 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
193 { 193 {
194 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; 194 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
195 } 195 }
196 196
197 static int rt_se_boosted(struct sched_rt_entity *rt_se) 197 static int rt_se_boosted(struct sched_rt_entity *rt_se)
198 { 198 {
199 struct rt_rq *rt_rq = group_rt_rq(rt_se); 199 struct rt_rq *rt_rq = group_rt_rq(rt_se);
200 struct task_struct *p; 200 struct task_struct *p;
201 201
202 if (rt_rq) 202 if (rt_rq)
203 return !!rt_rq->rt_nr_boosted; 203 return !!rt_rq->rt_nr_boosted;
204 204
205 p = rt_task_of(rt_se); 205 p = rt_task_of(rt_se);
206 return p->prio != p->normal_prio; 206 return p->prio != p->normal_prio;
207 } 207 }
208 208
209 #ifdef CONFIG_SMP 209 #ifdef CONFIG_SMP
210 static inline const struct cpumask *sched_rt_period_mask(void) 210 static inline const struct cpumask *sched_rt_period_mask(void)
211 { 211 {
212 return cpu_rq(smp_processor_id())->rd->span; 212 return cpu_rq(smp_processor_id())->rd->span;
213 } 213 }
214 #else 214 #else
215 static inline const struct cpumask *sched_rt_period_mask(void) 215 static inline const struct cpumask *sched_rt_period_mask(void)
216 { 216 {
217 return cpu_online_mask; 217 return cpu_online_mask;
218 } 218 }
219 #endif 219 #endif
220 220
221 static inline 221 static inline
222 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 222 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
223 { 223 {
224 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; 224 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
225 } 225 }
226 226
227 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 227 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
228 { 228 {
229 return &rt_rq->tg->rt_bandwidth; 229 return &rt_rq->tg->rt_bandwidth;
230 } 230 }
231 231
232 #else /* !CONFIG_RT_GROUP_SCHED */ 232 #else /* !CONFIG_RT_GROUP_SCHED */
233 233
234 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 234 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
235 { 235 {
236 return rt_rq->rt_runtime; 236 return rt_rq->rt_runtime;
237 } 237 }
238 238
239 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 239 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
240 { 240 {
241 return ktime_to_ns(def_rt_bandwidth.rt_period); 241 return ktime_to_ns(def_rt_bandwidth.rt_period);
242 } 242 }
243 243
244 #define for_each_leaf_rt_rq(rt_rq, rq) \ 244 #define for_each_leaf_rt_rq(rt_rq, rq) \
245 for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 245 for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
246 246
247 #define for_each_sched_rt_entity(rt_se) \ 247 #define for_each_sched_rt_entity(rt_se) \
248 for (; rt_se; rt_se = NULL) 248 for (; rt_se; rt_se = NULL)
249 249
250 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 250 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
251 { 251 {
252 return NULL; 252 return NULL;
253 } 253 }
254 254
255 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 255 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
256 { 256 {
257 if (rt_rq->rt_nr_running) 257 if (rt_rq->rt_nr_running)
258 resched_task(rq_of_rt_rq(rt_rq)->curr); 258 resched_task(rq_of_rt_rq(rt_rq)->curr);
259 } 259 }
260 260
261 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 261 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
262 { 262 {
263 } 263 }
264 264
265 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 265 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
266 { 266 {
267 return rt_rq->rt_throttled; 267 return rt_rq->rt_throttled;
268 } 268 }
269 269
270 static inline const struct cpumask *sched_rt_period_mask(void) 270 static inline const struct cpumask *sched_rt_period_mask(void)
271 { 271 {
272 return cpu_online_mask; 272 return cpu_online_mask;
273 } 273 }
274 274
275 static inline 275 static inline
276 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 276 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
277 { 277 {
278 return &cpu_rq(cpu)->rt; 278 return &cpu_rq(cpu)->rt;
279 } 279 }
280 280
281 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 281 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
282 { 282 {
283 return &def_rt_bandwidth; 283 return &def_rt_bandwidth;
284 } 284 }
285 285
286 #endif /* CONFIG_RT_GROUP_SCHED */ 286 #endif /* CONFIG_RT_GROUP_SCHED */
287 287
288 #ifdef CONFIG_SMP 288 #ifdef CONFIG_SMP
289 /* 289 /*
290 * We ran out of runtime, see if we can borrow some from our neighbours. 290 * We ran out of runtime, see if we can borrow some from our neighbours.
291 */ 291 */
292 static int do_balance_runtime(struct rt_rq *rt_rq) 292 static int do_balance_runtime(struct rt_rq *rt_rq)
293 { 293 {
294 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 294 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
295 struct root_domain *rd = cpu_rq(smp_processor_id())->rd; 295 struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
296 int i, weight, more = 0; 296 int i, weight, more = 0;
297 u64 rt_period; 297 u64 rt_period;
298 298
299 weight = cpumask_weight(rd->span); 299 weight = cpumask_weight(rd->span);
300 300
301 spin_lock(&rt_b->rt_runtime_lock); 301 spin_lock(&rt_b->rt_runtime_lock);
302 rt_period = ktime_to_ns(rt_b->rt_period); 302 rt_period = ktime_to_ns(rt_b->rt_period);
303 for_each_cpu(i, rd->span) { 303 for_each_cpu(i, rd->span) {
304 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 304 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
305 s64 diff; 305 s64 diff;
306 306
307 if (iter == rt_rq) 307 if (iter == rt_rq)
308 continue; 308 continue;
309 309
310 spin_lock(&iter->rt_runtime_lock); 310 spin_lock(&iter->rt_runtime_lock);
311 /* 311 /*
312 * Either all rqs have inf runtime and there's nothing to steal 312 * Either all rqs have inf runtime and there's nothing to steal
313 * or __disable_runtime() below sets a specific rq to inf to 313 * or __disable_runtime() below sets a specific rq to inf to
314 * indicate its been disabled and disalow stealing. 314 * indicate its been disabled and disalow stealing.
315 */ 315 */
316 if (iter->rt_runtime == RUNTIME_INF) 316 if (iter->rt_runtime == RUNTIME_INF)
317 goto next; 317 goto next;
318 318
319 /* 319 /*
320 * From runqueues with spare time, take 1/n part of their 320 * From runqueues with spare time, take 1/n part of their
321 * spare time, but no more than our period. 321 * spare time, but no more than our period.
322 */ 322 */
323 diff = iter->rt_runtime - iter->rt_time; 323 diff = iter->rt_runtime - iter->rt_time;
324 if (diff > 0) { 324 if (diff > 0) {
325 diff = div_u64((u64)diff, weight); 325 diff = div_u64((u64)diff, weight);
326 if (rt_rq->rt_runtime + diff > rt_period) 326 if (rt_rq->rt_runtime + diff > rt_period)
327 diff = rt_period - rt_rq->rt_runtime; 327 diff = rt_period - rt_rq->rt_runtime;
328 iter->rt_runtime -= diff; 328 iter->rt_runtime -= diff;
329 rt_rq->rt_runtime += diff; 329 rt_rq->rt_runtime += diff;
330 more = 1; 330 more = 1;
331 if (rt_rq->rt_runtime == rt_period) { 331 if (rt_rq->rt_runtime == rt_period) {
332 spin_unlock(&iter->rt_runtime_lock); 332 spin_unlock(&iter->rt_runtime_lock);
333 break; 333 break;
334 } 334 }
335 } 335 }
336 next: 336 next:
337 spin_unlock(&iter->rt_runtime_lock); 337 spin_unlock(&iter->rt_runtime_lock);
338 } 338 }
339 spin_unlock(&rt_b->rt_runtime_lock); 339 spin_unlock(&rt_b->rt_runtime_lock);
340 340
341 return more; 341 return more;
342 } 342 }
343 343
344 /* 344 /*
345 * Ensure this RQ takes back all the runtime it lend to its neighbours. 345 * Ensure this RQ takes back all the runtime it lend to its neighbours.
346 */ 346 */
347 static void __disable_runtime(struct rq *rq) 347 static void __disable_runtime(struct rq *rq)
348 { 348 {
349 struct root_domain *rd = rq->rd; 349 struct root_domain *rd = rq->rd;
350 struct rt_rq *rt_rq; 350 struct rt_rq *rt_rq;
351 351
352 if (unlikely(!scheduler_running)) 352 if (unlikely(!scheduler_running))
353 return; 353 return;
354 354
355 for_each_leaf_rt_rq(rt_rq, rq) { 355 for_each_leaf_rt_rq(rt_rq, rq) {
356 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 356 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
357 s64 want; 357 s64 want;
358 int i; 358 int i;
359 359
360 spin_lock(&rt_b->rt_runtime_lock); 360 spin_lock(&rt_b->rt_runtime_lock);
361 spin_lock(&rt_rq->rt_runtime_lock); 361 spin_lock(&rt_rq->rt_runtime_lock);
362 /* 362 /*
363 * Either we're all inf and nobody needs to borrow, or we're 363 * Either we're all inf and nobody needs to borrow, or we're
364 * already disabled and thus have nothing to do, or we have 364 * already disabled and thus have nothing to do, or we have
365 * exactly the right amount of runtime to take out. 365 * exactly the right amount of runtime to take out.
366 */ 366 */
367 if (rt_rq->rt_runtime == RUNTIME_INF || 367 if (rt_rq->rt_runtime == RUNTIME_INF ||
368 rt_rq->rt_runtime == rt_b->rt_runtime) 368 rt_rq->rt_runtime == rt_b->rt_runtime)
369 goto balanced; 369 goto balanced;
370 spin_unlock(&rt_rq->rt_runtime_lock); 370 spin_unlock(&rt_rq->rt_runtime_lock);
371 371
372 /* 372 /*
373 * Calculate the difference between what we started out with 373 * Calculate the difference between what we started out with
374 * and what we current have, that's the amount of runtime 374 * and what we current have, that's the amount of runtime
375 * we lend and now have to reclaim. 375 * we lend and now have to reclaim.
376 */ 376 */
377 want = rt_b->rt_runtime - rt_rq->rt_runtime; 377 want = rt_b->rt_runtime - rt_rq->rt_runtime;
378 378
379 /* 379 /*
380 * Greedy reclaim, take back as much as we can. 380 * Greedy reclaim, take back as much as we can.
381 */ 381 */
382 for_each_cpu(i, rd->span) { 382 for_each_cpu(i, rd->span) {
383 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 383 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
384 s64 diff; 384 s64 diff;
385 385
386 /* 386 /*
387 * Can't reclaim from ourselves or disabled runqueues. 387 * Can't reclaim from ourselves or disabled runqueues.
388 */ 388 */
389 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 389 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
390 continue; 390 continue;
391 391
392 spin_lock(&iter->rt_runtime_lock); 392 spin_lock(&iter->rt_runtime_lock);
393 if (want > 0) { 393 if (want > 0) {
394 diff = min_t(s64, iter->rt_runtime, want); 394 diff = min_t(s64, iter->rt_runtime, want);
395 iter->rt_runtime -= diff; 395 iter->rt_runtime -= diff;
396 want -= diff; 396 want -= diff;
397 } else { 397 } else {
398 iter->rt_runtime -= want; 398 iter->rt_runtime -= want;
399 want -= want; 399 want -= want;
400 } 400 }
401 spin_unlock(&iter->rt_runtime_lock); 401 spin_unlock(&iter->rt_runtime_lock);
402 402
403 if (!want) 403 if (!want)
404 break; 404 break;
405 } 405 }
406 406
407 spin_lock(&rt_rq->rt_runtime_lock); 407 spin_lock(&rt_rq->rt_runtime_lock);
408 /* 408 /*
409 * We cannot be left wanting - that would mean some runtime 409 * We cannot be left wanting - that would mean some runtime
410 * leaked out of the system. 410 * leaked out of the system.
411 */ 411 */
412 BUG_ON(want); 412 BUG_ON(want);
413 balanced: 413 balanced:
414 /* 414 /*
415 * Disable all the borrow logic by pretending we have inf 415 * Disable all the borrow logic by pretending we have inf
416 * runtime - in which case borrowing doesn't make sense. 416 * runtime - in which case borrowing doesn't make sense.
417 */ 417 */
418 rt_rq->rt_runtime = RUNTIME_INF; 418 rt_rq->rt_runtime = RUNTIME_INF;
419 spin_unlock(&rt_rq->rt_runtime_lock); 419 spin_unlock(&rt_rq->rt_runtime_lock);
420 spin_unlock(&rt_b->rt_runtime_lock); 420 spin_unlock(&rt_b->rt_runtime_lock);
421 } 421 }
422 } 422 }
423 423
424 static void disable_runtime(struct rq *rq) 424 static void disable_runtime(struct rq *rq)
425 { 425 {
426 unsigned long flags; 426 unsigned long flags;
427 427
428 spin_lock_irqsave(&rq->lock, flags); 428 spin_lock_irqsave(&rq->lock, flags);
429 __disable_runtime(rq); 429 __disable_runtime(rq);
430 spin_unlock_irqrestore(&rq->lock, flags); 430 spin_unlock_irqrestore(&rq->lock, flags);
431 } 431 }
432 432
433 static void __enable_runtime(struct rq *rq) 433 static void __enable_runtime(struct rq *rq)
434 { 434 {
435 struct rt_rq *rt_rq; 435 struct rt_rq *rt_rq;
436 436
437 if (unlikely(!scheduler_running)) 437 if (unlikely(!scheduler_running))
438 return; 438 return;
439 439
440 /* 440 /*
441 * Reset each runqueue's bandwidth settings 441 * Reset each runqueue's bandwidth settings
442 */ 442 */
443 for_each_leaf_rt_rq(rt_rq, rq) { 443 for_each_leaf_rt_rq(rt_rq, rq) {
444 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 444 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
445 445
446 spin_lock(&rt_b->rt_runtime_lock); 446 spin_lock(&rt_b->rt_runtime_lock);
447 spin_lock(&rt_rq->rt_runtime_lock); 447 spin_lock(&rt_rq->rt_runtime_lock);
448 rt_rq->rt_runtime = rt_b->rt_runtime; 448 rt_rq->rt_runtime = rt_b->rt_runtime;
449 rt_rq->rt_time = 0; 449 rt_rq->rt_time = 0;
450 rt_rq->rt_throttled = 0; 450 rt_rq->rt_throttled = 0;
451 spin_unlock(&rt_rq->rt_runtime_lock); 451 spin_unlock(&rt_rq->rt_runtime_lock);
452 spin_unlock(&rt_b->rt_runtime_lock); 452 spin_unlock(&rt_b->rt_runtime_lock);
453 } 453 }
454 } 454 }
455 455
456 static void enable_runtime(struct rq *rq) 456 static void enable_runtime(struct rq *rq)
457 { 457 {
458 unsigned long flags; 458 unsigned long flags;
459 459
460 spin_lock_irqsave(&rq->lock, flags); 460 spin_lock_irqsave(&rq->lock, flags);
461 __enable_runtime(rq); 461 __enable_runtime(rq);
462 spin_unlock_irqrestore(&rq->lock, flags); 462 spin_unlock_irqrestore(&rq->lock, flags);
463 } 463 }
464 464
465 static int balance_runtime(struct rt_rq *rt_rq) 465 static int balance_runtime(struct rt_rq *rt_rq)
466 { 466 {
467 int more = 0; 467 int more = 0;
468 468
469 if (rt_rq->rt_time > rt_rq->rt_runtime) { 469 if (rt_rq->rt_time > rt_rq->rt_runtime) {
470 spin_unlock(&rt_rq->rt_runtime_lock); 470 spin_unlock(&rt_rq->rt_runtime_lock);
471 more = do_balance_runtime(rt_rq); 471 more = do_balance_runtime(rt_rq);
472 spin_lock(&rt_rq->rt_runtime_lock); 472 spin_lock(&rt_rq->rt_runtime_lock);
473 } 473 }
474 474
475 return more; 475 return more;
476 } 476 }
477 #else /* !CONFIG_SMP */ 477 #else /* !CONFIG_SMP */
478 static inline int balance_runtime(struct rt_rq *rt_rq) 478 static inline int balance_runtime(struct rt_rq *rt_rq)
479 { 479 {
480 return 0; 480 return 0;
481 } 481 }
482 #endif /* CONFIG_SMP */ 482 #endif /* CONFIG_SMP */
483 483
484 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 484 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
485 { 485 {
486 int i, idle = 1; 486 int i, idle = 1;
487 const struct cpumask *span; 487 const struct cpumask *span;
488 488
489 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 489 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
490 return 1; 490 return 1;
491 491
492 span = sched_rt_period_mask(); 492 span = sched_rt_period_mask();
493 for_each_cpu(i, span) { 493 for_each_cpu(i, span) {
494 int enqueue = 0; 494 int enqueue = 0;
495 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 495 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
496 struct rq *rq = rq_of_rt_rq(rt_rq); 496 struct rq *rq = rq_of_rt_rq(rt_rq);
497 497
498 spin_lock(&rq->lock); 498 spin_lock(&rq->lock);
499 if (rt_rq->rt_time) { 499 if (rt_rq->rt_time) {
500 u64 runtime; 500 u64 runtime;
501 501
502 spin_lock(&rt_rq->rt_runtime_lock); 502 spin_lock(&rt_rq->rt_runtime_lock);
503 if (rt_rq->rt_throttled) 503 if (rt_rq->rt_throttled)
504 balance_runtime(rt_rq); 504 balance_runtime(rt_rq);
505 runtime = rt_rq->rt_runtime; 505 runtime = rt_rq->rt_runtime;
506 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 506 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
507 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 507 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
508 rt_rq->rt_throttled = 0; 508 rt_rq->rt_throttled = 0;
509 enqueue = 1; 509 enqueue = 1;
510 } 510 }
511 if (rt_rq->rt_time || rt_rq->rt_nr_running) 511 if (rt_rq->rt_time || rt_rq->rt_nr_running)
512 idle = 0; 512 idle = 0;
513 spin_unlock(&rt_rq->rt_runtime_lock); 513 spin_unlock(&rt_rq->rt_runtime_lock);
514 } else if (rt_rq->rt_nr_running) 514 } else if (rt_rq->rt_nr_running)
515 idle = 0; 515 idle = 0;
516 516
517 if (enqueue) 517 if (enqueue)
518 sched_rt_rq_enqueue(rt_rq); 518 sched_rt_rq_enqueue(rt_rq);
519 spin_unlock(&rq->lock); 519 spin_unlock(&rq->lock);
520 } 520 }
521 521
522 return idle; 522 return idle;
523 } 523 }
524 524
525 static inline int rt_se_prio(struct sched_rt_entity *rt_se) 525 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
526 { 526 {
527 #ifdef CONFIG_RT_GROUP_SCHED 527 #ifdef CONFIG_RT_GROUP_SCHED
528 struct rt_rq *rt_rq = group_rt_rq(rt_se); 528 struct rt_rq *rt_rq = group_rt_rq(rt_se);
529 529
530 if (rt_rq) 530 if (rt_rq)
531 return rt_rq->highest_prio.curr; 531 return rt_rq->highest_prio.curr;
532 #endif 532 #endif
533 533
534 return rt_task_of(rt_se)->prio; 534 return rt_task_of(rt_se)->prio;
535 } 535 }
536 536
537 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 537 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
538 { 538 {
539 u64 runtime = sched_rt_runtime(rt_rq); 539 u64 runtime = sched_rt_runtime(rt_rq);
540 540
541 if (rt_rq->rt_throttled) 541 if (rt_rq->rt_throttled)
542 return rt_rq_throttled(rt_rq); 542 return rt_rq_throttled(rt_rq);
543 543
544 if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) 544 if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
545 return 0; 545 return 0;
546 546
547 balance_runtime(rt_rq); 547 balance_runtime(rt_rq);
548 runtime = sched_rt_runtime(rt_rq); 548 runtime = sched_rt_runtime(rt_rq);
549 if (runtime == RUNTIME_INF) 549 if (runtime == RUNTIME_INF)
550 return 0; 550 return 0;
551 551
552 if (rt_rq->rt_time > runtime) { 552 if (rt_rq->rt_time > runtime) {
553 rt_rq->rt_throttled = 1; 553 rt_rq->rt_throttled = 1;
554 if (rt_rq_throttled(rt_rq)) { 554 if (rt_rq_throttled(rt_rq)) {
555 sched_rt_rq_dequeue(rt_rq); 555 sched_rt_rq_dequeue(rt_rq);
556 return 1; 556 return 1;
557 } 557 }
558 } 558 }
559 559
560 return 0; 560 return 0;
561 } 561 }
562 562
563 /* 563 /*
564 * Update the current task's runtime statistics. Skip current tasks that 564 * Update the current task's runtime statistics. Skip current tasks that
565 * are not in our scheduling class. 565 * are not in our scheduling class.
566 */ 566 */
567 static void update_curr_rt(struct rq *rq) 567 static void update_curr_rt(struct rq *rq)
568 { 568 {
569 struct task_struct *curr = rq->curr; 569 struct task_struct *curr = rq->curr;
570 struct sched_rt_entity *rt_se = &curr->rt; 570 struct sched_rt_entity *rt_se = &curr->rt;
571 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 571 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
572 u64 delta_exec; 572 u64 delta_exec;
573 573
574 if (!task_has_rt_policy(curr)) 574 if (!task_has_rt_policy(curr))
575 return; 575 return;
576 576
577 delta_exec = rq->clock - curr->se.exec_start; 577 delta_exec = rq->clock - curr->se.exec_start;
578 if (unlikely((s64)delta_exec < 0)) 578 if (unlikely((s64)delta_exec < 0))
579 delta_exec = 0; 579 delta_exec = 0;
580 580
581 schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); 581 schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
582 582
583 curr->se.sum_exec_runtime += delta_exec; 583 curr->se.sum_exec_runtime += delta_exec;
584 account_group_exec_runtime(curr, delta_exec); 584 account_group_exec_runtime(curr, delta_exec);
585 585
586 curr->se.exec_start = rq->clock; 586 curr->se.exec_start = rq->clock;
587 cpuacct_charge(curr, delta_exec); 587 cpuacct_charge(curr, delta_exec);
588 588
589 if (!rt_bandwidth_enabled()) 589 if (!rt_bandwidth_enabled())
590 return; 590 return;
591 591
592 for_each_sched_rt_entity(rt_se) { 592 for_each_sched_rt_entity(rt_se) {
593 rt_rq = rt_rq_of_se(rt_se); 593 rt_rq = rt_rq_of_se(rt_se);
594 594
595 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 595 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
596 spin_lock(&rt_rq->rt_runtime_lock); 596 spin_lock(&rt_rq->rt_runtime_lock);
597 rt_rq->rt_time += delta_exec; 597 rt_rq->rt_time += delta_exec;
598 if (sched_rt_runtime_exceeded(rt_rq)) 598 if (sched_rt_runtime_exceeded(rt_rq))
599 resched_task(curr); 599 resched_task(curr);
600 spin_unlock(&rt_rq->rt_runtime_lock); 600 spin_unlock(&rt_rq->rt_runtime_lock);
601 } 601 }
602 } 602 }
603 } 603 }
604 604
605 #if defined CONFIG_SMP 605 #if defined CONFIG_SMP
606 606
607 static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu); 607 static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
608 608
609 static inline int next_prio(struct rq *rq) 609 static inline int next_prio(struct rq *rq)
610 { 610 {
611 struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu); 611 struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
612 612
613 if (next && rt_prio(next->prio)) 613 if (next && rt_prio(next->prio))
614 return next->prio; 614 return next->prio;
615 else 615 else
616 return MAX_RT_PRIO; 616 return MAX_RT_PRIO;
617 } 617 }
618 618
619 static void 619 static void
620 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 620 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
621 { 621 {
622 struct rq *rq = rq_of_rt_rq(rt_rq); 622 struct rq *rq = rq_of_rt_rq(rt_rq);
623 623
624 if (prio < prev_prio) { 624 if (prio < prev_prio) {
625 625
626 /* 626 /*
627 * If the new task is higher in priority than anything on the 627 * If the new task is higher in priority than anything on the
628 * run-queue, we know that the previous high becomes our 628 * run-queue, we know that the previous high becomes our
629 * next-highest. 629 * next-highest.
630 */ 630 */
631 rt_rq->highest_prio.next = prev_prio; 631 rt_rq->highest_prio.next = prev_prio;
632 632
633 if (rq->online) 633 if (rq->online)
634 cpupri_set(&rq->rd->cpupri, rq->cpu, prio); 634 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
635 635
636 } else if (prio == rt_rq->highest_prio.curr) 636 } else if (prio == rt_rq->highest_prio.curr)
637 /* 637 /*
638 * If the next task is equal in priority to the highest on 638 * If the next task is equal in priority to the highest on
639 * the run-queue, then we implicitly know that the next highest 639 * the run-queue, then we implicitly know that the next highest
640 * task cannot be any lower than current 640 * task cannot be any lower than current
641 */ 641 */
642 rt_rq->highest_prio.next = prio; 642 rt_rq->highest_prio.next = prio;
643 else if (prio < rt_rq->highest_prio.next) 643 else if (prio < rt_rq->highest_prio.next)
644 /* 644 /*
645 * Otherwise, we need to recompute next-highest 645 * Otherwise, we need to recompute next-highest
646 */ 646 */
647 rt_rq->highest_prio.next = next_prio(rq); 647 rt_rq->highest_prio.next = next_prio(rq);
648 } 648 }
649 649
650 static void 650 static void
651 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 651 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
652 { 652 {
653 struct rq *rq = rq_of_rt_rq(rt_rq); 653 struct rq *rq = rq_of_rt_rq(rt_rq);
654 654
655 if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next)) 655 if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
656 rt_rq->highest_prio.next = next_prio(rq); 656 rt_rq->highest_prio.next = next_prio(rq);
657 657
658 if (rq->online && rt_rq->highest_prio.curr != prev_prio) 658 if (rq->online && rt_rq->highest_prio.curr != prev_prio)
659 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); 659 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
660 } 660 }
661 661
662 #else /* CONFIG_SMP */ 662 #else /* CONFIG_SMP */
663 663
664 static inline 664 static inline
665 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 665 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
666 static inline 666 static inline
667 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 667 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
668 668
669 #endif /* CONFIG_SMP */ 669 #endif /* CONFIG_SMP */
670 670
671 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 671 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
672 static void 672 static void
673 inc_rt_prio(struct rt_rq *rt_rq, int prio) 673 inc_rt_prio(struct rt_rq *rt_rq, int prio)
674 { 674 {
675 int prev_prio = rt_rq->highest_prio.curr; 675 int prev_prio = rt_rq->highest_prio.curr;
676 676
677 if (prio < prev_prio) 677 if (prio < prev_prio)
678 rt_rq->highest_prio.curr = prio; 678 rt_rq->highest_prio.curr = prio;
679 679
680 inc_rt_prio_smp(rt_rq, prio, prev_prio); 680 inc_rt_prio_smp(rt_rq, prio, prev_prio);
681 } 681 }
682 682
683 static void 683 static void
684 dec_rt_prio(struct rt_rq *rt_rq, int prio) 684 dec_rt_prio(struct rt_rq *rt_rq, int prio)
685 { 685 {
686 int prev_prio = rt_rq->highest_prio.curr; 686 int prev_prio = rt_rq->highest_prio.curr;
687 687
688 if (rt_rq->rt_nr_running) { 688 if (rt_rq->rt_nr_running) {
689 689
690 WARN_ON(prio < prev_prio); 690 WARN_ON(prio < prev_prio);
691 691
692 /* 692 /*
693 * This may have been our highest task, and therefore 693 * This may have been our highest task, and therefore
694 * we may have some recomputation to do 694 * we may have some recomputation to do
695 */ 695 */
696 if (prio == prev_prio) { 696 if (prio == prev_prio) {
697 struct rt_prio_array *array = &rt_rq->active; 697 struct rt_prio_array *array = &rt_rq->active;
698 698
699 rt_rq->highest_prio.curr = 699 rt_rq->highest_prio.curr =
700 sched_find_first_bit(array->bitmap); 700 sched_find_first_bit(array->bitmap);
701 } 701 }
702 702
703 } else 703 } else
704 rt_rq->highest_prio.curr = MAX_RT_PRIO; 704 rt_rq->highest_prio.curr = MAX_RT_PRIO;
705 705
706 dec_rt_prio_smp(rt_rq, prio, prev_prio); 706 dec_rt_prio_smp(rt_rq, prio, prev_prio);
707 } 707 }
708 708
709 #else 709 #else
710 710
711 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} 711 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
712 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} 712 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
713 713
714 #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ 714 #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
715 715
716 #ifdef CONFIG_RT_GROUP_SCHED 716 #ifdef CONFIG_RT_GROUP_SCHED
717 717
718 static void 718 static void
719 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 719 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
720 { 720 {
721 if (rt_se_boosted(rt_se)) 721 if (rt_se_boosted(rt_se))
722 rt_rq->rt_nr_boosted++; 722 rt_rq->rt_nr_boosted++;
723 723
724 if (rt_rq->tg) 724 if (rt_rq->tg)
725 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); 725 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
726 } 726 }
727 727
728 static void 728 static void
729 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 729 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
730 { 730 {
731 if (rt_se_boosted(rt_se)) 731 if (rt_se_boosted(rt_se))
732 rt_rq->rt_nr_boosted--; 732 rt_rq->rt_nr_boosted--;
733 733
734 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); 734 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
735 } 735 }
736 736
737 #else /* CONFIG_RT_GROUP_SCHED */ 737 #else /* CONFIG_RT_GROUP_SCHED */
738 738
739 static void 739 static void
740 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 740 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
741 { 741 {
742 start_rt_bandwidth(&def_rt_bandwidth); 742 start_rt_bandwidth(&def_rt_bandwidth);
743 } 743 }
744 744
745 static inline 745 static inline
746 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} 746 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
747 747
748 #endif /* CONFIG_RT_GROUP_SCHED */ 748 #endif /* CONFIG_RT_GROUP_SCHED */
749 749
750 static inline 750 static inline
751 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 751 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
752 { 752 {
753 int prio = rt_se_prio(rt_se); 753 int prio = rt_se_prio(rt_se);
754 754
755 WARN_ON(!rt_prio(prio)); 755 WARN_ON(!rt_prio(prio));
756 rt_rq->rt_nr_running++; 756 rt_rq->rt_nr_running++;
757 757
758 inc_rt_prio(rt_rq, prio); 758 inc_rt_prio(rt_rq, prio);
759 inc_rt_migration(rt_se, rt_rq); 759 inc_rt_migration(rt_se, rt_rq);
760 inc_rt_group(rt_se, rt_rq); 760 inc_rt_group(rt_se, rt_rq);
761 } 761 }
762 762
763 static inline 763 static inline
764 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 764 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
765 { 765 {
766 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 766 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
767 WARN_ON(!rt_rq->rt_nr_running); 767 WARN_ON(!rt_rq->rt_nr_running);
768 rt_rq->rt_nr_running--; 768 rt_rq->rt_nr_running--;
769 769
770 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 770 dec_rt_prio(rt_rq, rt_se_prio(rt_se));
771 dec_rt_migration(rt_se, rt_rq); 771 dec_rt_migration(rt_se, rt_rq);
772 dec_rt_group(rt_se, rt_rq); 772 dec_rt_group(rt_se, rt_rq);
773 } 773 }
774 774
775 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) 775 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
776 { 776 {
777 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 777 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
778 struct rt_prio_array *array = &rt_rq->active; 778 struct rt_prio_array *array = &rt_rq->active;
779 struct rt_rq *group_rq = group_rt_rq(rt_se); 779 struct rt_rq *group_rq = group_rt_rq(rt_se);
780 struct list_head *queue = array->queue + rt_se_prio(rt_se); 780 struct list_head *queue = array->queue + rt_se_prio(rt_se);
781 781
782 /* 782 /*
783 * Don't enqueue the group if its throttled, or when empty. 783 * Don't enqueue the group if its throttled, or when empty.
784 * The latter is a consequence of the former when a child group 784 * The latter is a consequence of the former when a child group
785 * get throttled and the current group doesn't have any other 785 * get throttled and the current group doesn't have any other
786 * active members. 786 * active members.
787 */ 787 */
788 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 788 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
789 return; 789 return;
790 790
791 list_add_tail(&rt_se->run_list, queue); 791 list_add_tail(&rt_se->run_list, queue);
792 __set_bit(rt_se_prio(rt_se), array->bitmap); 792 __set_bit(rt_se_prio(rt_se), array->bitmap);
793 793
794 inc_rt_tasks(rt_se, rt_rq); 794 inc_rt_tasks(rt_se, rt_rq);
795 } 795 }
796 796
797 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) 797 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
798 { 798 {
799 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 799 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
800 struct rt_prio_array *array = &rt_rq->active; 800 struct rt_prio_array *array = &rt_rq->active;
801 801
802 list_del_init(&rt_se->run_list); 802 list_del_init(&rt_se->run_list);
803 if (list_empty(array->queue + rt_se_prio(rt_se))) 803 if (list_empty(array->queue + rt_se_prio(rt_se)))
804 __clear_bit(rt_se_prio(rt_se), array->bitmap); 804 __clear_bit(rt_se_prio(rt_se), array->bitmap);
805 805
806 dec_rt_tasks(rt_se, rt_rq); 806 dec_rt_tasks(rt_se, rt_rq);
807 } 807 }
808 808
809 /* 809 /*
810 * Because the prio of an upper entry depends on the lower 810 * Because the prio of an upper entry depends on the lower
811 * entries, we must remove entries top - down. 811 * entries, we must remove entries top - down.
812 */ 812 */
813 static void dequeue_rt_stack(struct sched_rt_entity *rt_se) 813 static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
814 { 814 {
815 struct sched_rt_entity *back = NULL; 815 struct sched_rt_entity *back = NULL;
816 816
817 for_each_sched_rt_entity(rt_se) { 817 for_each_sched_rt_entity(rt_se) {
818 rt_se->back = back; 818 rt_se->back = back;
819 back = rt_se; 819 back = rt_se;
820 } 820 }
821 821
822 for (rt_se = back; rt_se; rt_se = rt_se->back) { 822 for (rt_se = back; rt_se; rt_se = rt_se->back) {
823 if (on_rt_rq(rt_se)) 823 if (on_rt_rq(rt_se))
824 __dequeue_rt_entity(rt_se); 824 __dequeue_rt_entity(rt_se);
825 } 825 }
826 } 826 }
827 827
828 static void enqueue_rt_entity(struct sched_rt_entity *rt_se) 828 static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
829 { 829 {
830 dequeue_rt_stack(rt_se); 830 dequeue_rt_stack(rt_se);
831 for_each_sched_rt_entity(rt_se) 831 for_each_sched_rt_entity(rt_se)
832 __enqueue_rt_entity(rt_se); 832 __enqueue_rt_entity(rt_se);
833 } 833 }
834 834
835 static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 835 static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
836 { 836 {
837 dequeue_rt_stack(rt_se); 837 dequeue_rt_stack(rt_se);
838 838
839 for_each_sched_rt_entity(rt_se) { 839 for_each_sched_rt_entity(rt_se) {
840 struct rt_rq *rt_rq = group_rt_rq(rt_se); 840 struct rt_rq *rt_rq = group_rt_rq(rt_se);
841 841
842 if (rt_rq && rt_rq->rt_nr_running) 842 if (rt_rq && rt_rq->rt_nr_running)
843 __enqueue_rt_entity(rt_se); 843 __enqueue_rt_entity(rt_se);
844 } 844 }
845 } 845 }
846 846
847 /* 847 /*
848 * Adding/removing a task to/from a priority array: 848 * Adding/removing a task to/from a priority array:
849 */ 849 */
850 static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) 850 static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
851 { 851 {
852 struct sched_rt_entity *rt_se = &p->rt; 852 struct sched_rt_entity *rt_se = &p->rt;
853 853
854 if (wakeup) 854 if (wakeup)
855 rt_se->timeout = 0; 855 rt_se->timeout = 0;
856 856
857 enqueue_rt_entity(rt_se); 857 enqueue_rt_entity(rt_se);
858 858
859 if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) 859 if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
860 enqueue_pushable_task(rq, p); 860 enqueue_pushable_task(rq, p);
861 861
862 inc_cpu_load(rq, p->se.load.weight); 862 inc_cpu_load(rq, p->se.load.weight);
863 } 863 }
864 864
865 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) 865 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
866 { 866 {
867 struct sched_rt_entity *rt_se = &p->rt; 867 struct sched_rt_entity *rt_se = &p->rt;
868 868
869 update_curr_rt(rq); 869 update_curr_rt(rq);
870 dequeue_rt_entity(rt_se); 870 dequeue_rt_entity(rt_se);
871 871
872 dequeue_pushable_task(rq, p); 872 dequeue_pushable_task(rq, p);
873 873
874 dec_cpu_load(rq, p->se.load.weight); 874 dec_cpu_load(rq, p->se.load.weight);
875 } 875 }
876 876
877 /* 877 /*
878 * Put task to the end of the run list without the overhead of dequeue 878 * Put task to the end of the run list without the overhead of dequeue
879 * followed by enqueue. 879 * followed by enqueue.
880 */ 880 */
881 static void 881 static void
882 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) 882 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
883 { 883 {
884 if (on_rt_rq(rt_se)) { 884 if (on_rt_rq(rt_se)) {
885 struct rt_prio_array *array = &rt_rq->active; 885 struct rt_prio_array *array = &rt_rq->active;
886 struct list_head *queue = array->queue + rt_se_prio(rt_se); 886 struct list_head *queue = array->queue + rt_se_prio(rt_se);
887 887
888 if (head) 888 if (head)
889 list_move(&rt_se->run_list, queue); 889 list_move(&rt_se->run_list, queue);
890 else 890 else
891 list_move_tail(&rt_se->run_list, queue); 891 list_move_tail(&rt_se->run_list, queue);
892 } 892 }
893 } 893 }
894 894
895 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) 895 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
896 { 896 {
897 struct sched_rt_entity *rt_se = &p->rt; 897 struct sched_rt_entity *rt_se = &p->rt;
898 struct rt_rq *rt_rq; 898 struct rt_rq *rt_rq;
899 899
900 for_each_sched_rt_entity(rt_se) { 900 for_each_sched_rt_entity(rt_se) {
901 rt_rq = rt_rq_of_se(rt_se); 901 rt_rq = rt_rq_of_se(rt_se);
902 requeue_rt_entity(rt_rq, rt_se, head); 902 requeue_rt_entity(rt_rq, rt_se, head);
903 } 903 }
904 } 904 }
905 905
906 static void yield_task_rt(struct rq *rq) 906 static void yield_task_rt(struct rq *rq)
907 { 907 {
908 requeue_task_rt(rq, rq->curr, 0); 908 requeue_task_rt(rq, rq->curr, 0);
909 } 909 }
910 910
911 #ifdef CONFIG_SMP 911 #ifdef CONFIG_SMP
912 static int find_lowest_rq(struct task_struct *task); 912 static int find_lowest_rq(struct task_struct *task);
913 913
914 static int select_task_rq_rt(struct task_struct *p, int sync) 914 static int select_task_rq_rt(struct task_struct *p, int sync)
915 { 915 {
916 struct rq *rq = task_rq(p); 916 struct rq *rq = task_rq(p);
917 917
918 /* 918 /*
919 * If the current task is an RT task, then 919 * If the current task is an RT task, then
920 * try to see if we can wake this RT task up on another 920 * try to see if we can wake this RT task up on another
921 * runqueue. Otherwise simply start this RT task 921 * runqueue. Otherwise simply start this RT task
922 * on its current runqueue. 922 * on its current runqueue.
923 * 923 *
924 * We want to avoid overloading runqueues. Even if 924 * We want to avoid overloading runqueues. Even if
925 * the RT task is of higher priority than the current RT task. 925 * the RT task is of higher priority than the current RT task.
926 * RT tasks behave differently than other tasks. If 926 * RT tasks behave differently than other tasks. If
927 * one gets preempted, we try to push it off to another queue. 927 * one gets preempted, we try to push it off to another queue.
928 * So trying to keep a preempting RT task on the same 928 * So trying to keep a preempting RT task on the same
929 * cache hot CPU will force the running RT task to 929 * cache hot CPU will force the running RT task to
930 * a cold CPU. So we waste all the cache for the lower 930 * a cold CPU. So we waste all the cache for the lower
931 * RT task in hopes of saving some of a RT task 931 * RT task in hopes of saving some of a RT task
932 * that is just being woken and probably will have 932 * that is just being woken and probably will have
933 * cold cache anyway. 933 * cold cache anyway.
934 */ 934 */
935 if (unlikely(rt_task(rq->curr)) && 935 if (unlikely(rt_task(rq->curr)) &&
936 (p->rt.nr_cpus_allowed > 1)) { 936 (p->rt.nr_cpus_allowed > 1)) {
937 int cpu = find_lowest_rq(p); 937 int cpu = find_lowest_rq(p);
938 938
939 return (cpu == -1) ? task_cpu(p) : cpu; 939 return (cpu == -1) ? task_cpu(p) : cpu;
940 } 940 }
941 941
942 /* 942 /*
943 * Otherwise, just let it ride on the affined RQ and the 943 * Otherwise, just let it ride on the affined RQ and the
944 * post-schedule router will push the preempted task away 944 * post-schedule router will push the preempted task away
945 */ 945 */
946 return task_cpu(p); 946 return task_cpu(p);
947 } 947 }
948 948
949 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 949 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
950 { 950 {
951 if (rq->curr->rt.nr_cpus_allowed == 1) 951 if (rq->curr->rt.nr_cpus_allowed == 1)
952 return; 952 return;
953 953
954 if (p->rt.nr_cpus_allowed != 1 954 if (p->rt.nr_cpus_allowed != 1
955 && cpupri_find(&rq->rd->cpupri, p, NULL)) 955 && cpupri_find(&rq->rd->cpupri, p, NULL))
956 return; 956 return;
957 957
958 if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) 958 if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
959 return; 959 return;
960 960
961 /* 961 /*
962 * There appears to be other cpus that can accept 962 * There appears to be other cpus that can accept
963 * current and none to run 'p', so lets reschedule 963 * current and none to run 'p', so lets reschedule
964 * to try and push current away: 964 * to try and push current away:
965 */ 965 */
966 requeue_task_rt(rq, p, 1); 966 requeue_task_rt(rq, p, 1);
967 resched_task(rq->curr); 967 resched_task(rq->curr);
968 } 968 }
969 969
970 #endif /* CONFIG_SMP */ 970 #endif /* CONFIG_SMP */
971 971
972 /* 972 /*
973 * Preempt the current task with a newly woken task if needed: 973 * Preempt the current task with a newly woken task if needed:
974 */ 974 */
975 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync) 975 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
976 { 976 {
977 if (p->prio < rq->curr->prio) { 977 if (p->prio < rq->curr->prio) {
978 resched_task(rq->curr); 978 resched_task(rq->curr);
979 return; 979 return;
980 } 980 }
981 981
982 #ifdef CONFIG_SMP 982 #ifdef CONFIG_SMP
983 /* 983 /*
984 * If: 984 * If:
985 * 985 *
986 * - the newly woken task is of equal priority to the current task 986 * - the newly woken task is of equal priority to the current task
987 * - the newly woken task is non-migratable while current is migratable 987 * - the newly woken task is non-migratable while current is migratable
988 * - current will be preempted on the next reschedule 988 * - current will be preempted on the next reschedule
989 * 989 *
990 * we should check to see if current can readily move to a different 990 * we should check to see if current can readily move to a different
991 * cpu. If so, we will reschedule to allow the push logic to try 991 * cpu. If so, we will reschedule to allow the push logic to try
992 * to move current somewhere else, making room for our non-migratable 992 * to move current somewhere else, making room for our non-migratable
993 * task. 993 * task.
994 */ 994 */
995 if (p->prio == rq->curr->prio && !need_resched()) 995 if (p->prio == rq->curr->prio && !need_resched())
996 check_preempt_equal_prio(rq, p); 996 check_preempt_equal_prio(rq, p);
997 #endif 997 #endif
998 } 998 }
999 999
1000 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 1000 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
1001 struct rt_rq *rt_rq) 1001 struct rt_rq *rt_rq)
1002 { 1002 {
1003 struct rt_prio_array *array = &rt_rq->active; 1003 struct rt_prio_array *array = &rt_rq->active;
1004 struct sched_rt_entity *next = NULL; 1004 struct sched_rt_entity *next = NULL;
1005 struct list_head *queue; 1005 struct list_head *queue;
1006 int idx; 1006 int idx;
1007 1007
1008 idx = sched_find_first_bit(array->bitmap); 1008 idx = sched_find_first_bit(array->bitmap);
1009 BUG_ON(idx >= MAX_RT_PRIO); 1009 BUG_ON(idx >= MAX_RT_PRIO);
1010 1010
1011 queue = array->queue + idx; 1011 queue = array->queue + idx;
1012 next = list_entry(queue->next, struct sched_rt_entity, run_list); 1012 next = list_entry(queue->next, struct sched_rt_entity, run_list);
1013 1013
1014 return next; 1014 return next;
1015 } 1015 }
1016 1016
1017 static struct task_struct *_pick_next_task_rt(struct rq *rq) 1017 static struct task_struct *_pick_next_task_rt(struct rq *rq)
1018 { 1018 {
1019 struct sched_rt_entity *rt_se; 1019 struct sched_rt_entity *rt_se;
1020 struct task_struct *p; 1020 struct task_struct *p;
1021 struct rt_rq *rt_rq; 1021 struct rt_rq *rt_rq;
1022 1022
1023 rt_rq = &rq->rt; 1023 rt_rq = &rq->rt;
1024 1024
1025 if (unlikely(!rt_rq->rt_nr_running)) 1025 if (unlikely(!rt_rq->rt_nr_running))
1026 return NULL; 1026 return NULL;
1027 1027
1028 if (rt_rq_throttled(rt_rq)) 1028 if (rt_rq_throttled(rt_rq))
1029 return NULL; 1029 return NULL;
1030 1030
1031 do { 1031 do {
1032 rt_se = pick_next_rt_entity(rq, rt_rq); 1032 rt_se = pick_next_rt_entity(rq, rt_rq);
1033 BUG_ON(!rt_se); 1033 BUG_ON(!rt_se);
1034 rt_rq = group_rt_rq(rt_se); 1034 rt_rq = group_rt_rq(rt_se);
1035 } while (rt_rq); 1035 } while (rt_rq);
1036 1036
1037 p = rt_task_of(rt_se); 1037 p = rt_task_of(rt_se);
1038 p->se.exec_start = rq->clock; 1038 p->se.exec_start = rq->clock;
1039 1039
1040 return p; 1040 return p;
1041 } 1041 }
1042 1042
1043 static struct task_struct *pick_next_task_rt(struct rq *rq) 1043 static struct task_struct *pick_next_task_rt(struct rq *rq)
1044 { 1044 {
1045 struct task_struct *p = _pick_next_task_rt(rq); 1045 struct task_struct *p = _pick_next_task_rt(rq);
1046 1046
1047 /* The running task is never eligible for pushing */ 1047 /* The running task is never eligible for pushing */
1048 if (p) 1048 if (p)
1049 dequeue_pushable_task(rq, p); 1049 dequeue_pushable_task(rq, p);
1050 1050
1051 return p; 1051 return p;
1052 } 1052 }
1053 1053
1054 static void put_prev_task_rt(struct rq *rq, struct task_struct *p) 1054 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1055 { 1055 {
1056 update_curr_rt(rq); 1056 update_curr_rt(rq);
1057 p->se.exec_start = 0; 1057 p->se.exec_start = 0;
1058 1058
1059 /* 1059 /*
1060 * The previous task needs to be made eligible for pushing 1060 * The previous task needs to be made eligible for pushing
1061 * if it is still active 1061 * if it is still active
1062 */ 1062 */
1063 if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) 1063 if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
1064 enqueue_pushable_task(rq, p); 1064 enqueue_pushable_task(rq, p);
1065 } 1065 }
1066 1066
1067 #ifdef CONFIG_SMP 1067 #ifdef CONFIG_SMP
1068 1068
1069 /* Only try algorithms three times */ 1069 /* Only try algorithms three times */
1070 #define RT_MAX_TRIES 3 1070 #define RT_MAX_TRIES 3
1071 1071
1072 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); 1072 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
1073 1073
1074 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1074 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1075 { 1075 {
1076 if (!task_running(rq, p) && 1076 if (!task_running(rq, p) &&
1077 (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && 1077 (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
1078 (p->rt.nr_cpus_allowed > 1)) 1078 (p->rt.nr_cpus_allowed > 1))
1079 return 1; 1079 return 1;
1080 return 0; 1080 return 0;
1081 } 1081 }
1082 1082
1083 /* Return the second highest RT task, NULL otherwise */ 1083 /* Return the second highest RT task, NULL otherwise */
1084 static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) 1084 static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
1085 { 1085 {
1086 struct task_struct *next = NULL; 1086 struct task_struct *next = NULL;
1087 struct sched_rt_entity *rt_se; 1087 struct sched_rt_entity *rt_se;
1088 struct rt_prio_array *array; 1088 struct rt_prio_array *array;
1089 struct rt_rq *rt_rq; 1089 struct rt_rq *rt_rq;
1090 int idx; 1090 int idx;
1091 1091
1092 for_each_leaf_rt_rq(rt_rq, rq) { 1092 for_each_leaf_rt_rq(rt_rq, rq) {
1093 array = &rt_rq->active; 1093 array = &rt_rq->active;
1094 idx = sched_find_first_bit(array->bitmap); 1094 idx = sched_find_first_bit(array->bitmap);
1095 next_idx: 1095 next_idx:
1096 if (idx >= MAX_RT_PRIO) 1096 if (idx >= MAX_RT_PRIO)
1097 continue; 1097 continue;
1098 if (next && next->prio < idx) 1098 if (next && next->prio < idx)
1099 continue; 1099 continue;
1100 list_for_each_entry(rt_se, array->queue + idx, run_list) { 1100 list_for_each_entry(rt_se, array->queue + idx, run_list) {
1101 struct task_struct *p = rt_task_of(rt_se); 1101 struct task_struct *p = rt_task_of(rt_se);
1102 if (pick_rt_task(rq, p, cpu)) { 1102 if (pick_rt_task(rq, p, cpu)) {
1103 next = p; 1103 next = p;
1104 break; 1104 break;
1105 } 1105 }
1106 } 1106 }
1107 if (!next) { 1107 if (!next) {
1108 idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); 1108 idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
1109 goto next_idx; 1109 goto next_idx;
1110 } 1110 }
1111 } 1111 }
1112 1112
1113 return next; 1113 return next;
1114 } 1114 }
1115 1115
1116 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); 1116 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1117 1117
1118 static inline int pick_optimal_cpu(int this_cpu, 1118 static inline int pick_optimal_cpu(int this_cpu,
1119 const struct cpumask *mask) 1119 const struct cpumask *mask)
1120 { 1120 {
1121 int first; 1121 int first;
1122 1122
1123 /* "this_cpu" is cheaper to preempt than a remote processor */ 1123 /* "this_cpu" is cheaper to preempt than a remote processor */
1124 if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask)) 1124 if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
1125 return this_cpu; 1125 return this_cpu;
1126 1126
1127 first = cpumask_first(mask); 1127 first = cpumask_first(mask);
1128 if (first < nr_cpu_ids) 1128 if (first < nr_cpu_ids)
1129 return first; 1129 return first;
1130 1130
1131 return -1; 1131 return -1;
1132 } 1132 }
1133 1133
1134 static int find_lowest_rq(struct task_struct *task) 1134 static int find_lowest_rq(struct task_struct *task)
1135 { 1135 {
1136 struct sched_domain *sd; 1136 struct sched_domain *sd;
1137 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); 1137 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
1138 int this_cpu = smp_processor_id(); 1138 int this_cpu = smp_processor_id();
1139 int cpu = task_cpu(task); 1139 int cpu = task_cpu(task);
1140 cpumask_var_t domain_mask; 1140 cpumask_var_t domain_mask;
1141 1141
1142 if (task->rt.nr_cpus_allowed == 1) 1142 if (task->rt.nr_cpus_allowed == 1)
1143 return -1; /* No other targets possible */ 1143 return -1; /* No other targets possible */
1144 1144
1145 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1145 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
1146 return -1; /* No targets found */ 1146 return -1; /* No targets found */
1147 1147
1148 /* 1148 /*
1149 * Only consider CPUs that are usable for migration. 1149 * Only consider CPUs that are usable for migration.
1150 * I guess we might want to change cpupri_find() to ignore those 1150 * I guess we might want to change cpupri_find() to ignore those
1151 * in the first place. 1151 * in the first place.
1152 */ 1152 */
1153 cpumask_and(lowest_mask, lowest_mask, cpu_active_mask); 1153 cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
1154 1154
1155 /* 1155 /*
1156 * At this point we have built a mask of cpus representing the 1156 * At this point we have built a mask of cpus representing the
1157 * lowest priority tasks in the system. Now we want to elect 1157 * lowest priority tasks in the system. Now we want to elect
1158 * the best one based on our affinity and topology. 1158 * the best one based on our affinity and topology.
1159 * 1159 *
1160 * We prioritize the last cpu that the task executed on since 1160 * We prioritize the last cpu that the task executed on since
1161 * it is most likely cache-hot in that location. 1161 * it is most likely cache-hot in that location.
1162 */ 1162 */
1163 if (cpumask_test_cpu(cpu, lowest_mask)) 1163 if (cpumask_test_cpu(cpu, lowest_mask))
1164 return cpu; 1164 return cpu;
1165 1165
1166 /* 1166 /*
1167 * Otherwise, we consult the sched_domains span maps to figure 1167 * Otherwise, we consult the sched_domains span maps to figure
1168 * out which cpu is logically closest to our hot cache data. 1168 * out which cpu is logically closest to our hot cache data.
1169 */ 1169 */
1170 if (this_cpu == cpu) 1170 if (this_cpu == cpu)
1171 this_cpu = -1; /* Skip this_cpu opt if the same */ 1171 this_cpu = -1; /* Skip this_cpu opt if the same */
1172 1172
1173 if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) { 1173 if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
1174 for_each_domain(cpu, sd) { 1174 for_each_domain(cpu, sd) {
1175 if (sd->flags & SD_WAKE_AFFINE) { 1175 if (sd->flags & SD_WAKE_AFFINE) {
1176 int best_cpu; 1176 int best_cpu;
1177 1177
1178 cpumask_and(domain_mask, 1178 cpumask_and(domain_mask,
1179 sched_domain_span(sd), 1179 sched_domain_span(sd),
1180 lowest_mask); 1180 lowest_mask);
1181 1181
1182 best_cpu = pick_optimal_cpu(this_cpu, 1182 best_cpu = pick_optimal_cpu(this_cpu,
1183 domain_mask); 1183 domain_mask);
1184 1184
1185 if (best_cpu != -1) { 1185 if (best_cpu != -1) {
1186 free_cpumask_var(domain_mask); 1186 free_cpumask_var(domain_mask);
1187 return best_cpu; 1187 return best_cpu;
1188 } 1188 }
1189 } 1189 }
1190 } 1190 }
1191 free_cpumask_var(domain_mask); 1191 free_cpumask_var(domain_mask);
1192 } 1192 }
1193 1193
1194 /* 1194 /*
1195 * And finally, if there were no matches within the domains 1195 * And finally, if there were no matches within the domains
1196 * just give the caller *something* to work with from the compatible 1196 * just give the caller *something* to work with from the compatible
1197 * locations. 1197 * locations.
1198 */ 1198 */
1199 return pick_optimal_cpu(this_cpu, lowest_mask); 1199 return pick_optimal_cpu(this_cpu, lowest_mask);
1200 } 1200 }
1201 1201
1202 /* Will lock the rq it finds */ 1202 /* Will lock the rq it finds */
1203 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) 1203 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1204 { 1204 {
1205 struct rq *lowest_rq = NULL; 1205 struct rq *lowest_rq = NULL;
1206 int tries; 1206 int tries;
1207 int cpu; 1207 int cpu;
1208 1208
1209 for (tries = 0; tries < RT_MAX_TRIES; tries++) { 1209 for (tries = 0; tries < RT_MAX_TRIES; tries++) {
1210 cpu = find_lowest_rq(task); 1210 cpu = find_lowest_rq(task);
1211 1211
1212 if ((cpu == -1) || (cpu == rq->cpu)) 1212 if ((cpu == -1) || (cpu == rq->cpu))
1213 break; 1213 break;
1214 1214
1215 lowest_rq = cpu_rq(cpu); 1215 lowest_rq = cpu_rq(cpu);
1216 1216
1217 /* if the prio of this runqueue changed, try again */ 1217 /* if the prio of this runqueue changed, try again */
1218 if (double_lock_balance(rq, lowest_rq)) { 1218 if (double_lock_balance(rq, lowest_rq)) {
1219 /* 1219 /*
1220 * We had to unlock the run queue. In 1220 * We had to unlock the run queue. In
1221 * the mean time, task could have 1221 * the mean time, task could have
1222 * migrated already or had its affinity changed. 1222 * migrated already or had its affinity changed.
1223 * Also make sure that it wasn't scheduled on its rq. 1223 * Also make sure that it wasn't scheduled on its rq.
1224 */ 1224 */
1225 if (unlikely(task_rq(task) != rq || 1225 if (unlikely(task_rq(task) != rq ||
1226 !cpumask_test_cpu(lowest_rq->cpu, 1226 !cpumask_test_cpu(lowest_rq->cpu,
1227 &task->cpus_allowed) || 1227 &task->cpus_allowed) ||
1228 task_running(rq, task) || 1228 task_running(rq, task) ||
1229 !task->se.on_rq)) { 1229 !task->se.on_rq)) {
1230 1230
1231 spin_unlock(&lowest_rq->lock); 1231 spin_unlock(&lowest_rq->lock);
1232 lowest_rq = NULL; 1232 lowest_rq = NULL;
1233 break; 1233 break;
1234 } 1234 }
1235 } 1235 }
1236 1236
1237 /* If this rq is still suitable use it. */ 1237 /* If this rq is still suitable use it. */
1238 if (lowest_rq->rt.highest_prio.curr > task->prio) 1238 if (lowest_rq->rt.highest_prio.curr > task->prio)
1239 break; 1239 break;
1240 1240
1241 /* try again */ 1241 /* try again */
1242 double_unlock_balance(rq, lowest_rq); 1242 double_unlock_balance(rq, lowest_rq);
1243 lowest_rq = NULL; 1243 lowest_rq = NULL;
1244 } 1244 }
1245 1245
1246 return lowest_rq; 1246 return lowest_rq;
1247 } 1247 }
1248 1248
1249 static inline int has_pushable_tasks(struct rq *rq) 1249 static inline int has_pushable_tasks(struct rq *rq)
1250 { 1250 {
1251 return !plist_head_empty(&rq->rt.pushable_tasks); 1251 return !plist_head_empty(&rq->rt.pushable_tasks);
1252 } 1252 }
1253 1253
1254 static struct task_struct *pick_next_pushable_task(struct rq *rq) 1254 static struct task_struct *pick_next_pushable_task(struct rq *rq)
1255 { 1255 {
1256 struct task_struct *p; 1256 struct task_struct *p;
1257 1257
1258 if (!has_pushable_tasks(rq)) 1258 if (!has_pushable_tasks(rq))
1259 return NULL; 1259 return NULL;
1260 1260
1261 p = plist_first_entry(&rq->rt.pushable_tasks, 1261 p = plist_first_entry(&rq->rt.pushable_tasks,
1262 struct task_struct, pushable_tasks); 1262 struct task_struct, pushable_tasks);
1263 1263
1264 BUG_ON(rq->cpu != task_cpu(p)); 1264 BUG_ON(rq->cpu != task_cpu(p));
1265 BUG_ON(task_current(rq, p)); 1265 BUG_ON(task_current(rq, p));
1266 BUG_ON(p->rt.nr_cpus_allowed <= 1); 1266 BUG_ON(p->rt.nr_cpus_allowed <= 1);
1267 1267
1268 BUG_ON(!p->se.on_rq); 1268 BUG_ON(!p->se.on_rq);
1269 BUG_ON(!rt_task(p)); 1269 BUG_ON(!rt_task(p));
1270 1270
1271 return p; 1271 return p;
1272 } 1272 }
1273 1273
1274 /* 1274 /*
1275 * If the current CPU has more than one RT task, see if the non 1275 * If the current CPU has more than one RT task, see if the non
1276 * running task can migrate over to a CPU that is running a task 1276 * running task can migrate over to a CPU that is running a task
1277 * of lesser priority. 1277 * of lesser priority.
1278 */ 1278 */
1279 static int push_rt_task(struct rq *rq) 1279 static int push_rt_task(struct rq *rq)
1280 { 1280 {
1281 struct task_struct *next_task; 1281 struct task_struct *next_task;
1282 struct rq *lowest_rq; 1282 struct rq *lowest_rq;
1283 1283
1284 if (!rq->rt.overloaded) 1284 if (!rq->rt.overloaded)
1285 return 0; 1285 return 0;
1286 1286
1287 next_task = pick_next_pushable_task(rq); 1287 next_task = pick_next_pushable_task(rq);
1288 if (!next_task) 1288 if (!next_task)
1289 return 0; 1289 return 0;
1290 1290
1291 retry: 1291 retry:
1292 if (unlikely(next_task == rq->curr)) { 1292 if (unlikely(next_task == rq->curr)) {
1293 WARN_ON(1); 1293 WARN_ON(1);
1294 return 0; 1294 return 0;
1295 } 1295 }
1296 1296
1297 /* 1297 /*
1298 * It's possible that the next_task slipped in of 1298 * It's possible that the next_task slipped in of
1299 * higher priority than current. If that's the case 1299 * higher priority than current. If that's the case
1300 * just reschedule current. 1300 * just reschedule current.
1301 */ 1301 */
1302 if (unlikely(next_task->prio < rq->curr->prio)) { 1302 if (unlikely(next_task->prio < rq->curr->prio)) {
1303 resched_task(rq->curr); 1303 resched_task(rq->curr);
1304 return 0; 1304 return 0;
1305 } 1305 }
1306 1306
1307 /* We might release rq lock */ 1307 /* We might release rq lock */
1308 get_task_struct(next_task); 1308 get_task_struct(next_task);
1309 1309
1310 /* find_lock_lowest_rq locks the rq if found */ 1310 /* find_lock_lowest_rq locks the rq if found */
1311 lowest_rq = find_lock_lowest_rq(next_task, rq); 1311 lowest_rq = find_lock_lowest_rq(next_task, rq);
1312 if (!lowest_rq) { 1312 if (!lowest_rq) {
1313 struct task_struct *task; 1313 struct task_struct *task;
1314 /* 1314 /*
1315 * find lock_lowest_rq releases rq->lock 1315 * find lock_lowest_rq releases rq->lock
1316 * so it is possible that next_task has migrated. 1316 * so it is possible that next_task has migrated.
1317 * 1317 *
1318 * We need to make sure that the task is still on the same 1318 * We need to make sure that the task is still on the same
1319 * run-queue and is also still the next task eligible for 1319 * run-queue and is also still the next task eligible for
1320 * pushing. 1320 * pushing.
1321 */ 1321 */
1322 task = pick_next_pushable_task(rq); 1322 task = pick_next_pushable_task(rq);
1323 if (task_cpu(next_task) == rq->cpu && task == next_task) { 1323 if (task_cpu(next_task) == rq->cpu && task == next_task) {
1324 /* 1324 /*
1325 * If we get here, the task hasnt moved at all, but 1325 * If we get here, the task hasnt moved at all, but
1326 * it has failed to push. We will not try again, 1326 * it has failed to push. We will not try again,
1327 * since the other cpus will pull from us when they 1327 * since the other cpus will pull from us when they
1328 * are ready. 1328 * are ready.
1329 */ 1329 */
1330 dequeue_pushable_task(rq, next_task); 1330 dequeue_pushable_task(rq, next_task);
1331 goto out; 1331 goto out;
1332 } 1332 }
1333 1333
1334 if (!task) 1334 if (!task)
1335 /* No more tasks, just exit */ 1335 /* No more tasks, just exit */
1336 goto out; 1336 goto out;
1337 1337
1338 /* 1338 /*
1339 * Something has shifted, try again. 1339 * Something has shifted, try again.
1340 */ 1340 */
1341 put_task_struct(next_task); 1341 put_task_struct(next_task);
1342 next_task = task; 1342 next_task = task;
1343 goto retry; 1343 goto retry;
1344 } 1344 }
1345 1345
1346 deactivate_task(rq, next_task, 0); 1346 deactivate_task(rq, next_task, 0);
1347 set_task_cpu(next_task, lowest_rq->cpu); 1347 set_task_cpu(next_task, lowest_rq->cpu);
1348 activate_task(lowest_rq, next_task, 0); 1348 activate_task(lowest_rq, next_task, 0);
1349 1349
1350 resched_task(lowest_rq->curr); 1350 resched_task(lowest_rq->curr);
1351 1351
1352 double_unlock_balance(rq, lowest_rq); 1352 double_unlock_balance(rq, lowest_rq);
1353 1353
1354 out: 1354 out:
1355 put_task_struct(next_task); 1355 put_task_struct(next_task);
1356 1356
1357 return 1; 1357 return 1;
1358 } 1358 }
1359 1359
1360 static void push_rt_tasks(struct rq *rq) 1360 static void push_rt_tasks(struct rq *rq)
1361 { 1361 {
1362 /* push_rt_task will return true if it moved an RT */ 1362 /* push_rt_task will return true if it moved an RT */
1363 while (push_rt_task(rq)) 1363 while (push_rt_task(rq))
1364 ; 1364 ;
1365 } 1365 }
1366 1366
1367 static int pull_rt_task(struct rq *this_rq) 1367 static int pull_rt_task(struct rq *this_rq)
1368 { 1368 {
1369 int this_cpu = this_rq->cpu, ret = 0, cpu; 1369 int this_cpu = this_rq->cpu, ret = 0, cpu;
1370 struct task_struct *p; 1370 struct task_struct *p;
1371 struct rq *src_rq; 1371 struct rq *src_rq;
1372 1372
1373 if (likely(!rt_overloaded(this_rq))) 1373 if (likely(!rt_overloaded(this_rq)))
1374 return 0; 1374 return 0;
1375 1375
1376 for_each_cpu(cpu, this_rq->rd->rto_mask) { 1376 for_each_cpu(cpu, this_rq->rd->rto_mask) {
1377 if (this_cpu == cpu) 1377 if (this_cpu == cpu)
1378 continue; 1378 continue;
1379 1379
1380 src_rq = cpu_rq(cpu); 1380 src_rq = cpu_rq(cpu);
1381 1381
1382 /* 1382 /*
1383 * Don't bother taking the src_rq->lock if the next highest 1383 * Don't bother taking the src_rq->lock if the next highest
1384 * task is known to be lower-priority than our current task. 1384 * task is known to be lower-priority than our current task.
1385 * This may look racy, but if this value is about to go 1385 * This may look racy, but if this value is about to go
1386 * logically higher, the src_rq will push this task away. 1386 * logically higher, the src_rq will push this task away.
1387 * And if its going logically lower, we do not care 1387 * And if its going logically lower, we do not care
1388 */ 1388 */
1389 if (src_rq->rt.highest_prio.next >= 1389 if (src_rq->rt.highest_prio.next >=
1390 this_rq->rt.highest_prio.curr) 1390 this_rq->rt.highest_prio.curr)
1391 continue; 1391 continue;
1392 1392
1393 /* 1393 /*
1394 * We can potentially drop this_rq's lock in 1394 * We can potentially drop this_rq's lock in
1395 * double_lock_balance, and another CPU could 1395 * double_lock_balance, and another CPU could
1396 * alter this_rq 1396 * alter this_rq
1397 */ 1397 */
1398 double_lock_balance(this_rq, src_rq); 1398 double_lock_balance(this_rq, src_rq);
1399 1399
1400 /* 1400 /*
1401 * Are there still pullable RT tasks? 1401 * Are there still pullable RT tasks?
1402 */ 1402 */
1403 if (src_rq->rt.rt_nr_running <= 1) 1403 if (src_rq->rt.rt_nr_running <= 1)
1404 goto skip; 1404 goto skip;
1405 1405
1406 p = pick_next_highest_task_rt(src_rq, this_cpu); 1406 p = pick_next_highest_task_rt(src_rq, this_cpu);
1407 1407
1408 /* 1408 /*
1409 * Do we have an RT task that preempts 1409 * Do we have an RT task that preempts
1410 * the to-be-scheduled task? 1410 * the to-be-scheduled task?
1411 */ 1411 */
1412 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 1412 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
1413 WARN_ON(p == src_rq->curr); 1413 WARN_ON(p == src_rq->curr);
1414 WARN_ON(!p->se.on_rq); 1414 WARN_ON(!p->se.on_rq);
1415 1415
1416 /* 1416 /*
1417 * There's a chance that p is higher in priority 1417 * There's a chance that p is higher in priority
1418 * than what's currently running on its cpu. 1418 * than what's currently running on its cpu.
1419 * This is just that p is wakeing up and hasn't 1419 * This is just that p is wakeing up and hasn't
1420 * had a chance to schedule. We only pull 1420 * had a chance to schedule. We only pull
1421 * p if it is lower in priority than the 1421 * p if it is lower in priority than the
1422 * current task on the run queue 1422 * current task on the run queue
1423 */ 1423 */
1424 if (p->prio < src_rq->curr->prio) 1424 if (p->prio < src_rq->curr->prio)
1425 goto skip; 1425 goto skip;
1426 1426
1427 ret = 1; 1427 ret = 1;
1428 1428
1429 deactivate_task(src_rq, p, 0); 1429 deactivate_task(src_rq, p, 0);
1430 set_task_cpu(p, this_cpu); 1430 set_task_cpu(p, this_cpu);
1431 activate_task(this_rq, p, 0); 1431 activate_task(this_rq, p, 0);
1432 /* 1432 /*
1433 * We continue with the search, just in 1433 * We continue with the search, just in
1434 * case there's an even higher prio task 1434 * case there's an even higher prio task
1435 * in another runqueue. (low likelyhood 1435 * in another runqueue. (low likelyhood
1436 * but possible) 1436 * but possible)
1437 */ 1437 */
1438 } 1438 }
1439 skip: 1439 skip:
1440 double_unlock_balance(this_rq, src_rq); 1440 double_unlock_balance(this_rq, src_rq);
1441 } 1441 }
1442 1442
1443 return ret; 1443 return ret;
1444 } 1444 }
1445 1445
1446 static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) 1446 static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
1447 { 1447 {
1448 /* Try to pull RT tasks here if we lower this rq's prio */ 1448 /* Try to pull RT tasks here if we lower this rq's prio */
1449 if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) 1449 if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio)
1450 pull_rt_task(rq); 1450 pull_rt_task(rq);
1451 } 1451 }
1452 1452
1453 /* 1453 /*
1454 * assumes rq->lock is held 1454 * assumes rq->lock is held
1455 */ 1455 */
1456 static int needs_post_schedule_rt(struct rq *rq) 1456 static int needs_post_schedule_rt(struct rq *rq)
1457 { 1457 {
1458 return has_pushable_tasks(rq); 1458 return has_pushable_tasks(rq);
1459 } 1459 }
1460 1460
1461 static void post_schedule_rt(struct rq *rq) 1461 static void post_schedule_rt(struct rq *rq)
1462 { 1462 {
1463 /* 1463 /*
1464 * This is only called if needs_post_schedule_rt() indicates that 1464 * This is only called if needs_post_schedule_rt() indicates that
1465 * we need to push tasks away 1465 * we need to push tasks away
1466 */ 1466 */
1467 spin_lock_irq(&rq->lock); 1467 spin_lock_irq(&rq->lock);
1468 push_rt_tasks(rq); 1468 push_rt_tasks(rq);
1469 spin_unlock_irq(&rq->lock); 1469 spin_unlock_irq(&rq->lock);
1470 } 1470 }
1471 1471
1472 /* 1472 /*
1473 * If we are not running and we are not going to reschedule soon, we should 1473 * If we are not running and we are not going to reschedule soon, we should
1474 * try to push tasks away now 1474 * try to push tasks away now
1475 */ 1475 */
1476 static void task_wake_up_rt(struct rq *rq, struct task_struct *p) 1476 static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
1477 { 1477 {
1478 if (!task_running(rq, p) && 1478 if (!task_running(rq, p) &&
1479 !test_tsk_need_resched(rq->curr) && 1479 !test_tsk_need_resched(rq->curr) &&
1480 has_pushable_tasks(rq) && 1480 has_pushable_tasks(rq) &&
1481 p->rt.nr_cpus_allowed > 1) 1481 p->rt.nr_cpus_allowed > 1)
1482 push_rt_tasks(rq); 1482 push_rt_tasks(rq);
1483 } 1483 }
1484 1484
1485 static unsigned long 1485 static unsigned long
1486 load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, 1486 load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
1487 unsigned long max_load_move, 1487 unsigned long max_load_move,
1488 struct sched_domain *sd, enum cpu_idle_type idle, 1488 struct sched_domain *sd, enum cpu_idle_type idle,
1489 int *all_pinned, int *this_best_prio) 1489 int *all_pinned, int *this_best_prio)
1490 { 1490 {
1491 /* don't touch RT tasks */ 1491 /* don't touch RT tasks */
1492 return 0; 1492 return 0;
1493 } 1493 }
1494 1494
1495 static int 1495 static int
1496 move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, 1496 move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
1497 struct sched_domain *sd, enum cpu_idle_type idle) 1497 struct sched_domain *sd, enum cpu_idle_type idle)
1498 { 1498 {
1499 /* don't touch RT tasks */ 1499 /* don't touch RT tasks */
1500 return 0; 1500 return 0;
1501 } 1501 }
1502 1502
1503 static void set_cpus_allowed_rt(struct task_struct *p, 1503 static void set_cpus_allowed_rt(struct task_struct *p,
1504 const struct cpumask *new_mask) 1504 const struct cpumask *new_mask)
1505 { 1505 {
1506 int weight = cpumask_weight(new_mask); 1506 int weight = cpumask_weight(new_mask);
1507 1507
1508 BUG_ON(!rt_task(p)); 1508 BUG_ON(!rt_task(p));
1509 1509
1510 /* 1510 /*
1511 * Update the migration status of the RQ if we have an RT task 1511 * Update the migration status of the RQ if we have an RT task
1512 * which is running AND changing its weight value. 1512 * which is running AND changing its weight value.
1513 */ 1513 */
1514 if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { 1514 if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
1515 struct rq *rq = task_rq(p); 1515 struct rq *rq = task_rq(p);
1516 1516
1517 if (!task_current(rq, p)) { 1517 if (!task_current(rq, p)) {
1518 /* 1518 /*
1519 * Make sure we dequeue this task from the pushable list 1519 * Make sure we dequeue this task from the pushable list
1520 * before going further. It will either remain off of 1520 * before going further. It will either remain off of
1521 * the list because we are no longer pushable, or it 1521 * the list because we are no longer pushable, or it
1522 * will be requeued. 1522 * will be requeued.
1523 */ 1523 */
1524 if (p->rt.nr_cpus_allowed > 1) 1524 if (p->rt.nr_cpus_allowed > 1)
1525 dequeue_pushable_task(rq, p); 1525 dequeue_pushable_task(rq, p);
1526 1526
1527 /* 1527 /*
1528 * Requeue if our weight is changing and still > 1 1528 * Requeue if our weight is changing and still > 1
1529 */ 1529 */
1530 if (weight > 1) 1530 if (weight > 1)
1531 enqueue_pushable_task(rq, p); 1531 enqueue_pushable_task(rq, p);
1532 1532
1533 } 1533 }
1534 1534
1535 if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { 1535 if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) {
1536 rq->rt.rt_nr_migratory++; 1536 rq->rt.rt_nr_migratory++;
1537 } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) { 1537 } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) {
1538 BUG_ON(!rq->rt.rt_nr_migratory); 1538 BUG_ON(!rq->rt.rt_nr_migratory);
1539 rq->rt.rt_nr_migratory--; 1539 rq->rt.rt_nr_migratory--;
1540 } 1540 }
1541 1541
1542 update_rt_migration(&rq->rt); 1542 update_rt_migration(&rq->rt);
1543 } 1543 }
1544 1544
1545 cpumask_copy(&p->cpus_allowed, new_mask); 1545 cpumask_copy(&p->cpus_allowed, new_mask);
1546 p->rt.nr_cpus_allowed = weight; 1546 p->rt.nr_cpus_allowed = weight;
1547 } 1547 }
1548 1548
1549 /* Assumes rq->lock is held */ 1549 /* Assumes rq->lock is held */
1550 static void rq_online_rt(struct rq *rq) 1550 static void rq_online_rt(struct rq *rq)
1551 { 1551 {
1552 if (rq->rt.overloaded) 1552 if (rq->rt.overloaded)
1553 rt_set_overload(rq); 1553 rt_set_overload(rq);
1554 1554
1555 __enable_runtime(rq); 1555 __enable_runtime(rq);
1556 1556
1557 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); 1557 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
1558 } 1558 }
1559 1559
1560 /* Assumes rq->lock is held */ 1560 /* Assumes rq->lock is held */
1561 static void rq_offline_rt(struct rq *rq) 1561 static void rq_offline_rt(struct rq *rq)
1562 { 1562 {
1563 if (rq->rt.overloaded) 1563 if (rq->rt.overloaded)
1564 rt_clear_overload(rq); 1564 rt_clear_overload(rq);
1565 1565
1566 __disable_runtime(rq); 1566 __disable_runtime(rq);
1567 1567
1568 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); 1568 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
1569 } 1569 }
1570 1570
1571 /* 1571 /*
1572 * When switch from the rt queue, we bring ourselves to a position 1572 * When switch from the rt queue, we bring ourselves to a position
1573 * that we might want to pull RT tasks from other runqueues. 1573 * that we might want to pull RT tasks from other runqueues.
1574 */ 1574 */
1575 static void switched_from_rt(struct rq *rq, struct task_struct *p, 1575 static void switched_from_rt(struct rq *rq, struct task_struct *p,
1576 int running) 1576 int running)
1577 { 1577 {
1578 /* 1578 /*
1579 * If there are other RT tasks then we will reschedule 1579 * If there are other RT tasks then we will reschedule
1580 * and the scheduling of the other RT tasks will handle 1580 * and the scheduling of the other RT tasks will handle
1581 * the balancing. But if we are the last RT task 1581 * the balancing. But if we are the last RT task
1582 * we may need to handle the pulling of RT tasks 1582 * we may need to handle the pulling of RT tasks
1583 * now. 1583 * now.
1584 */ 1584 */
1585 if (!rq->rt.rt_nr_running) 1585 if (!rq->rt.rt_nr_running)
1586 pull_rt_task(rq); 1586 pull_rt_task(rq);
1587 } 1587 }
1588 1588
1589 static inline void init_sched_rt_class(void) 1589 static inline void init_sched_rt_class(void)
1590 { 1590 {
1591 unsigned int i; 1591 unsigned int i;
1592 1592
1593 for_each_possible_cpu(i) 1593 for_each_possible_cpu(i)
1594 alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), 1594 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
1595 GFP_KERNEL, cpu_to_node(i)); 1595 GFP_KERNEL, cpu_to_node(i));
1596 } 1596 }
1597 #endif /* CONFIG_SMP */ 1597 #endif /* CONFIG_SMP */
1598 1598
1599 /* 1599 /*
1600 * When switching a task to RT, we may overload the runqueue 1600 * When switching a task to RT, we may overload the runqueue
1601 * with RT tasks. In this case we try to push them off to 1601 * with RT tasks. In this case we try to push them off to
1602 * other runqueues. 1602 * other runqueues.
1603 */ 1603 */
1604 static void switched_to_rt(struct rq *rq, struct task_struct *p, 1604 static void switched_to_rt(struct rq *rq, struct task_struct *p,
1605 int running) 1605 int running)
1606 { 1606 {
1607 int check_resched = 1; 1607 int check_resched = 1;
1608 1608
1609 /* 1609 /*
1610 * If we are already running, then there's nothing 1610 * If we are already running, then there's nothing
1611 * that needs to be done. But if we are not running 1611 * that needs to be done. But if we are not running
1612 * we may need to preempt the current running task. 1612 * we may need to preempt the current running task.
1613 * If that current running task is also an RT task 1613 * If that current running task is also an RT task
1614 * then see if we can move to another run queue. 1614 * then see if we can move to another run queue.
1615 */ 1615 */
1616 if (!running) { 1616 if (!running) {
1617 #ifdef CONFIG_SMP 1617 #ifdef CONFIG_SMP
1618 if (rq->rt.overloaded && push_rt_task(rq) && 1618 if (rq->rt.overloaded && push_rt_task(rq) &&
1619 /* Don't resched if we changed runqueues */ 1619 /* Don't resched if we changed runqueues */
1620 rq != task_rq(p)) 1620 rq != task_rq(p))
1621 check_resched = 0; 1621 check_resched = 0;
1622 #endif /* CONFIG_SMP */ 1622 #endif /* CONFIG_SMP */
1623 if (check_resched && p->prio < rq->curr->prio) 1623 if (check_resched && p->prio < rq->curr->prio)
1624 resched_task(rq->curr); 1624 resched_task(rq->curr);
1625 } 1625 }
1626 } 1626 }
1627 1627
1628 /* 1628 /*
1629 * Priority of the task has changed. This may cause 1629 * Priority of the task has changed. This may cause
1630 * us to initiate a push or pull. 1630 * us to initiate a push or pull.
1631 */ 1631 */
1632 static void prio_changed_rt(struct rq *rq, struct task_struct *p, 1632 static void prio_changed_rt(struct rq *rq, struct task_struct *p,
1633 int oldprio, int running) 1633 int oldprio, int running)
1634 { 1634 {
1635 if (running) { 1635 if (running) {
1636 #ifdef CONFIG_SMP 1636 #ifdef CONFIG_SMP
1637 /* 1637 /*
1638 * If our priority decreases while running, we 1638 * If our priority decreases while running, we
1639 * may need to pull tasks to this runqueue. 1639 * may need to pull tasks to this runqueue.
1640 */ 1640 */
1641 if (oldprio < p->prio) 1641 if (oldprio < p->prio)
1642 pull_rt_task(rq); 1642 pull_rt_task(rq);
1643 /* 1643 /*
1644 * If there's a higher priority task waiting to run 1644 * If there's a higher priority task waiting to run
1645 * then reschedule. Note, the above pull_rt_task 1645 * then reschedule. Note, the above pull_rt_task
1646 * can release the rq lock and p could migrate. 1646 * can release the rq lock and p could migrate.
1647 * Only reschedule if p is still on the same runqueue. 1647 * Only reschedule if p is still on the same runqueue.
1648 */ 1648 */
1649 if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) 1649 if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
1650 resched_task(p); 1650 resched_task(p);
1651 #else 1651 #else
1652 /* For UP simply resched on drop of prio */ 1652 /* For UP simply resched on drop of prio */
1653 if (oldprio < p->prio) 1653 if (oldprio < p->prio)
1654 resched_task(p); 1654 resched_task(p);
1655 #endif /* CONFIG_SMP */ 1655 #endif /* CONFIG_SMP */
1656 } else { 1656 } else {
1657 /* 1657 /*
1658 * This task is not running, but if it is 1658 * This task is not running, but if it is
1659 * greater than the current running task 1659 * greater than the current running task
1660 * then reschedule. 1660 * then reschedule.
1661 */ 1661 */
1662 if (p->prio < rq->curr->prio) 1662 if (p->prio < rq->curr->prio)
1663 resched_task(rq->curr); 1663 resched_task(rq->curr);
1664 } 1664 }
1665 } 1665 }
1666 1666
1667 static void watchdog(struct rq *rq, struct task_struct *p) 1667 static void watchdog(struct rq *rq, struct task_struct *p)
1668 { 1668 {
1669 unsigned long soft, hard; 1669 unsigned long soft, hard;
1670 1670
1671 if (!p->signal) 1671 if (!p->signal)
1672 return; 1672 return;
1673 1673
1674 soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur; 1674 soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur;
1675 hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max; 1675 hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max;
1676 1676
1677 if (soft != RLIM_INFINITY) { 1677 if (soft != RLIM_INFINITY) {
1678 unsigned long next; 1678 unsigned long next;
1679 1679
1680 p->rt.timeout++; 1680 p->rt.timeout++;
1681 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 1681 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
1682 if (p->rt.timeout > next) 1682 if (p->rt.timeout > next)
1683 p->cputime_expires.sched_exp = p->se.sum_exec_runtime; 1683 p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
1684 } 1684 }
1685 } 1685 }
1686 1686
1687 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 1687 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1688 { 1688 {
1689 update_curr_rt(rq); 1689 update_curr_rt(rq);
1690 1690
1691 watchdog(rq, p); 1691 watchdog(rq, p);
1692 1692
1693 /* 1693 /*
1694 * RR tasks need a special form of timeslice management. 1694 * RR tasks need a special form of timeslice management.
1695 * FIFO tasks have no timeslices. 1695 * FIFO tasks have no timeslices.
1696 */ 1696 */
1697 if (p->policy != SCHED_RR) 1697 if (p->policy != SCHED_RR)
1698 return; 1698 return;
1699 1699
1700 if (--p->rt.time_slice) 1700 if (--p->rt.time_slice)
1701 return; 1701 return;
1702 1702
1703 p->rt.time_slice = DEF_TIMESLICE; 1703 p->rt.time_slice = DEF_TIMESLICE;
1704 1704
1705 /* 1705 /*
1706 * Requeue to the end of queue if we are not the only element 1706 * Requeue to the end of queue if we are not the only element
1707 * on the queue: 1707 * on the queue:
1708 */ 1708 */
1709 if (p->rt.run_list.prev != p->rt.run_list.next) { 1709 if (p->rt.run_list.prev != p->rt.run_list.next) {
1710 requeue_task_rt(rq, p, 0); 1710 requeue_task_rt(rq, p, 0);
1711 set_tsk_need_resched(p); 1711 set_tsk_need_resched(p);
1712 } 1712 }
1713 } 1713 }
1714 1714
1715 static void set_curr_task_rt(struct rq *rq) 1715 static void set_curr_task_rt(struct rq *rq)
1716 { 1716 {
1717 struct task_struct *p = rq->curr; 1717 struct task_struct *p = rq->curr;
1718 1718
1719 p->se.exec_start = rq->clock; 1719 p->se.exec_start = rq->clock;
1720 1720
1721 /* The running task is never eligible for pushing */ 1721 /* The running task is never eligible for pushing */
1722 dequeue_pushable_task(rq, p); 1722 dequeue_pushable_task(rq, p);
1723 } 1723 }
1724 1724
1725 static const struct sched_class rt_sched_class = { 1725 static const struct sched_class rt_sched_class = {
1726 .next = &fair_sched_class, 1726 .next = &fair_sched_class,
1727 .enqueue_task = enqueue_task_rt, 1727 .enqueue_task = enqueue_task_rt,
1728 .dequeue_task = dequeue_task_rt, 1728 .dequeue_task = dequeue_task_rt,
1729 .yield_task = yield_task_rt, 1729 .yield_task = yield_task_rt,
1730 1730
1731 .check_preempt_curr = check_preempt_curr_rt, 1731 .check_preempt_curr = check_preempt_curr_rt,
1732 1732
1733 .pick_next_task = pick_next_task_rt, 1733 .pick_next_task = pick_next_task_rt,
1734 .put_prev_task = put_prev_task_rt, 1734 .put_prev_task = put_prev_task_rt,
1735 1735
1736 #ifdef CONFIG_SMP 1736 #ifdef CONFIG_SMP
1737 .select_task_rq = select_task_rq_rt, 1737 .select_task_rq = select_task_rq_rt,
1738 1738
1739 .load_balance = load_balance_rt, 1739 .load_balance = load_balance_rt,
1740 .move_one_task = move_one_task_rt, 1740 .move_one_task = move_one_task_rt,
1741 .set_cpus_allowed = set_cpus_allowed_rt, 1741 .set_cpus_allowed = set_cpus_allowed_rt,
1742 .rq_online = rq_online_rt, 1742 .rq_online = rq_online_rt,
1743 .rq_offline = rq_offline_rt, 1743 .rq_offline = rq_offline_rt,
1744 .pre_schedule = pre_schedule_rt, 1744 .pre_schedule = pre_schedule_rt,
1745 .needs_post_schedule = needs_post_schedule_rt, 1745 .needs_post_schedule = needs_post_schedule_rt,
1746 .post_schedule = post_schedule_rt, 1746 .post_schedule = post_schedule_rt,
1747 .task_wake_up = task_wake_up_rt, 1747 .task_wake_up = task_wake_up_rt,
1748 .switched_from = switched_from_rt, 1748 .switched_from = switched_from_rt,
1749 #endif 1749 #endif
1750 1750
1751 .set_curr_task = set_curr_task_rt, 1751 .set_curr_task = set_curr_task_rt,
1752 .task_tick = task_tick_rt, 1752 .task_tick = task_tick_rt,
1753 1753
1754 .prio_changed = prio_changed_rt, 1754 .prio_changed = prio_changed_rt,
1755 .switched_to = switched_to_rt, 1755 .switched_to = switched_to_rt,
1756 }; 1756 };
1757 1757
1758 #ifdef CONFIG_SCHED_DEBUG 1758 #ifdef CONFIG_SCHED_DEBUG
1759 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); 1759 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
1760 1760
1761 static void print_rt_stats(struct seq_file *m, int cpu) 1761 static void print_rt_stats(struct seq_file *m, int cpu)
1762 { 1762 {
1763 struct rt_rq *rt_rq; 1763 struct rt_rq *rt_rq;
1764 1764
1765 rcu_read_lock(); 1765 rcu_read_lock();
1766 for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) 1766 for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
1767 print_rt_rq(m, cpu, rt_rq); 1767 print_rt_rq(m, cpu, rt_rq);
1768 rcu_read_unlock(); 1768 rcu_read_unlock();
1769 } 1769 }
1770 #endif /* CONFIG_SCHED_DEBUG */ 1770 #endif /* CONFIG_SCHED_DEBUG */
1771 1771
1772 1772
1 /* 1 /*
2 * Generic helpers for smp ipi calls 2 * Generic helpers for smp ipi calls
3 * 3 *
4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008 4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
5 */ 5 */
6 #include <linux/rcupdate.h> 6 #include <linux/rcupdate.h>
7 #include <linux/rculist.h> 7 #include <linux/rculist.h>
8 #include <linux/kernel.h> 8 #include <linux/kernel.h>
9 #include <linux/module.h> 9 #include <linux/module.h>
10 #include <linux/percpu.h> 10 #include <linux/percpu.h>
11 #include <linux/init.h> 11 #include <linux/init.h>
12 #include <linux/smp.h> 12 #include <linux/smp.h>
13 #include <linux/cpu.h> 13 #include <linux/cpu.h>
14 14
15 static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); 15 static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
16 16
17 static struct { 17 static struct {
18 struct list_head queue; 18 struct list_head queue;
19 spinlock_t lock; 19 spinlock_t lock;
20 } call_function __cacheline_aligned_in_smp = 20 } call_function __cacheline_aligned_in_smp =
21 { 21 {
22 .queue = LIST_HEAD_INIT(call_function.queue), 22 .queue = LIST_HEAD_INIT(call_function.queue),
23 .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), 23 .lock = __SPIN_LOCK_UNLOCKED(call_function.lock),
24 }; 24 };
25 25
26 enum { 26 enum {
27 CSD_FLAG_LOCK = 0x01, 27 CSD_FLAG_LOCK = 0x01,
28 }; 28 };
29 29
30 struct call_function_data { 30 struct call_function_data {
31 struct call_single_data csd; 31 struct call_single_data csd;
32 spinlock_t lock; 32 spinlock_t lock;
33 unsigned int refs; 33 unsigned int refs;
34 cpumask_var_t cpumask; 34 cpumask_var_t cpumask;
35 }; 35 };
36 36
37 struct call_single_queue { 37 struct call_single_queue {
38 struct list_head list; 38 struct list_head list;
39 spinlock_t lock; 39 spinlock_t lock;
40 }; 40 };
41 41
42 static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { 42 static DEFINE_PER_CPU(struct call_function_data, cfd_data) = {
43 .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), 43 .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock),
44 }; 44 };
45 45
46 static int 46 static int
47 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) 47 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
48 { 48 {
49 long cpu = (long)hcpu; 49 long cpu = (long)hcpu;
50 struct call_function_data *cfd = &per_cpu(cfd_data, cpu); 50 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
51 51
52 switch (action) { 52 switch (action) {
53 case CPU_UP_PREPARE: 53 case CPU_UP_PREPARE:
54 case CPU_UP_PREPARE_FROZEN: 54 case CPU_UP_PREPARE_FROZEN:
55 if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, 55 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
56 cpu_to_node(cpu))) 56 cpu_to_node(cpu)))
57 return NOTIFY_BAD; 57 return NOTIFY_BAD;
58 break; 58 break;
59 59
60 #ifdef CONFIG_CPU_HOTPLUG 60 #ifdef CONFIG_CPU_HOTPLUG
61 case CPU_UP_CANCELED: 61 case CPU_UP_CANCELED:
62 case CPU_UP_CANCELED_FROZEN: 62 case CPU_UP_CANCELED_FROZEN:
63 63
64 case CPU_DEAD: 64 case CPU_DEAD:
65 case CPU_DEAD_FROZEN: 65 case CPU_DEAD_FROZEN:
66 free_cpumask_var(cfd->cpumask); 66 free_cpumask_var(cfd->cpumask);
67 break; 67 break;
68 #endif 68 #endif
69 }; 69 };
70 70
71 return NOTIFY_OK; 71 return NOTIFY_OK;
72 } 72 }
73 73
74 static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { 74 static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
75 .notifier_call = hotplug_cfd, 75 .notifier_call = hotplug_cfd,
76 }; 76 };
77 77
78 static int __cpuinit init_call_single_data(void) 78 static int __cpuinit init_call_single_data(void)
79 { 79 {
80 void *cpu = (void *)(long)smp_processor_id(); 80 void *cpu = (void *)(long)smp_processor_id();
81 int i; 81 int i;
82 82
83 for_each_possible_cpu(i) { 83 for_each_possible_cpu(i) {
84 struct call_single_queue *q = &per_cpu(call_single_queue, i); 84 struct call_single_queue *q = &per_cpu(call_single_queue, i);
85 85
86 spin_lock_init(&q->lock); 86 spin_lock_init(&q->lock);
87 INIT_LIST_HEAD(&q->list); 87 INIT_LIST_HEAD(&q->list);
88 } 88 }
89 89
90 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); 90 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
91 register_cpu_notifier(&hotplug_cfd_notifier); 91 register_cpu_notifier(&hotplug_cfd_notifier);
92 92
93 return 0; 93 return 0;
94 } 94 }
95 early_initcall(init_call_single_data); 95 early_initcall(init_call_single_data);
96 96
97 /* 97 /*
98 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources 98 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
99 * 99 *
100 * For non-synchronous ipi calls the csd can still be in use by the 100 * For non-synchronous ipi calls the csd can still be in use by the
101 * previous function call. For multi-cpu calls its even more interesting 101 * previous function call. For multi-cpu calls its even more interesting
102 * as we'll have to ensure no other cpu is observing our csd. 102 * as we'll have to ensure no other cpu is observing our csd.
103 */ 103 */
104 static void csd_lock_wait(struct call_single_data *data) 104 static void csd_lock_wait(struct call_single_data *data)
105 { 105 {
106 while (data->flags & CSD_FLAG_LOCK) 106 while (data->flags & CSD_FLAG_LOCK)
107 cpu_relax(); 107 cpu_relax();
108 } 108 }
109 109
110 static void csd_lock(struct call_single_data *data) 110 static void csd_lock(struct call_single_data *data)
111 { 111 {
112 csd_lock_wait(data); 112 csd_lock_wait(data);
113 data->flags = CSD_FLAG_LOCK; 113 data->flags = CSD_FLAG_LOCK;
114 114
115 /* 115 /*
116 * prevent CPU from reordering the above assignment 116 * prevent CPU from reordering the above assignment
117 * to ->flags with any subsequent assignments to other 117 * to ->flags with any subsequent assignments to other
118 * fields of the specified call_single_data structure: 118 * fields of the specified call_single_data structure:
119 */ 119 */
120 smp_mb(); 120 smp_mb();
121 } 121 }
122 122
123 static void csd_unlock(struct call_single_data *data) 123 static void csd_unlock(struct call_single_data *data)
124 { 124 {
125 WARN_ON(!(data->flags & CSD_FLAG_LOCK)); 125 WARN_ON(!(data->flags & CSD_FLAG_LOCK));
126 126
127 /* 127 /*
128 * ensure we're all done before releasing data: 128 * ensure we're all done before releasing data:
129 */ 129 */
130 smp_mb(); 130 smp_mb();
131 131
132 data->flags &= ~CSD_FLAG_LOCK; 132 data->flags &= ~CSD_FLAG_LOCK;
133 } 133 }
134 134
135 /* 135 /*
136 * Insert a previously allocated call_single_data element 136 * Insert a previously allocated call_single_data element
137 * for execution on the given CPU. data must already have 137 * for execution on the given CPU. data must already have
138 * ->func, ->info, and ->flags set. 138 * ->func, ->info, and ->flags set.
139 */ 139 */
140 static 140 static
141 void generic_exec_single(int cpu, struct call_single_data *data, int wait) 141 void generic_exec_single(int cpu, struct call_single_data *data, int wait)
142 { 142 {
143 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); 143 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
144 unsigned long flags; 144 unsigned long flags;
145 int ipi; 145 int ipi;
146 146
147 spin_lock_irqsave(&dst->lock, flags); 147 spin_lock_irqsave(&dst->lock, flags);
148 ipi = list_empty(&dst->list); 148 ipi = list_empty(&dst->list);
149 list_add_tail(&data->list, &dst->list); 149 list_add_tail(&data->list, &dst->list);
150 spin_unlock_irqrestore(&dst->lock, flags); 150 spin_unlock_irqrestore(&dst->lock, flags);
151 151
152 /* 152 /*
153 * The list addition should be visible before sending the IPI 153 * The list addition should be visible before sending the IPI
154 * handler locks the list to pull the entry off it because of 154 * handler locks the list to pull the entry off it because of
155 * normal cache coherency rules implied by spinlocks. 155 * normal cache coherency rules implied by spinlocks.
156 * 156 *
157 * If IPIs can go out of order to the cache coherency protocol 157 * If IPIs can go out of order to the cache coherency protocol
158 * in an architecture, sufficient synchronisation should be added 158 * in an architecture, sufficient synchronisation should be added
159 * to arch code to make it appear to obey cache coherency WRT 159 * to arch code to make it appear to obey cache coherency WRT
160 * locking and barrier primitives. Generic code isn't really 160 * locking and barrier primitives. Generic code isn't really
161 * equipped to do the right thing... 161 * equipped to do the right thing...
162 */ 162 */
163 if (ipi) 163 if (ipi)
164 arch_send_call_function_single_ipi(cpu); 164 arch_send_call_function_single_ipi(cpu);
165 165
166 if (wait) 166 if (wait)
167 csd_lock_wait(data); 167 csd_lock_wait(data);
168 } 168 }
169 169
170 /* 170 /*
171 * Invoked by arch to handle an IPI for call function. Must be called with 171 * Invoked by arch to handle an IPI for call function. Must be called with
172 * interrupts disabled. 172 * interrupts disabled.
173 */ 173 */
174 void generic_smp_call_function_interrupt(void) 174 void generic_smp_call_function_interrupt(void)
175 { 175 {
176 struct call_function_data *data; 176 struct call_function_data *data;
177 int cpu = get_cpu(); 177 int cpu = get_cpu();
178 178
179 /* 179 /*
180 * Ensure entry is visible on call_function_queue after we have 180 * Ensure entry is visible on call_function_queue after we have
181 * entered the IPI. See comment in smp_call_function_many. 181 * entered the IPI. See comment in smp_call_function_many.
182 * If we don't have this, then we may miss an entry on the list 182 * If we don't have this, then we may miss an entry on the list
183 * and never get another IPI to process it. 183 * and never get another IPI to process it.
184 */ 184 */
185 smp_mb(); 185 smp_mb();
186 186
187 /* 187 /*
188 * It's ok to use list_for_each_rcu() here even though we may 188 * It's ok to use list_for_each_rcu() here even though we may
189 * delete 'pos', since list_del_rcu() doesn't clear ->next 189 * delete 'pos', since list_del_rcu() doesn't clear ->next
190 */ 190 */
191 list_for_each_entry_rcu(data, &call_function.queue, csd.list) { 191 list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
192 int refs; 192 int refs;
193 193
194 spin_lock(&data->lock); 194 spin_lock(&data->lock);
195 if (!cpumask_test_cpu(cpu, data->cpumask)) { 195 if (!cpumask_test_cpu(cpu, data->cpumask)) {
196 spin_unlock(&data->lock); 196 spin_unlock(&data->lock);
197 continue; 197 continue;
198 } 198 }
199 cpumask_clear_cpu(cpu, data->cpumask); 199 cpumask_clear_cpu(cpu, data->cpumask);
200 spin_unlock(&data->lock); 200 spin_unlock(&data->lock);
201 201
202 data->csd.func(data->csd.info); 202 data->csd.func(data->csd.info);
203 203
204 spin_lock(&data->lock); 204 spin_lock(&data->lock);
205 WARN_ON(data->refs == 0); 205 WARN_ON(data->refs == 0);
206 refs = --data->refs; 206 refs = --data->refs;
207 if (!refs) { 207 if (!refs) {
208 spin_lock(&call_function.lock); 208 spin_lock(&call_function.lock);
209 list_del_rcu(&data->csd.list); 209 list_del_rcu(&data->csd.list);
210 spin_unlock(&call_function.lock); 210 spin_unlock(&call_function.lock);
211 } 211 }
212 spin_unlock(&data->lock); 212 spin_unlock(&data->lock);
213 213
214 if (refs) 214 if (refs)
215 continue; 215 continue;
216 216
217 csd_unlock(&data->csd); 217 csd_unlock(&data->csd);
218 } 218 }
219 219
220 put_cpu(); 220 put_cpu();
221 } 221 }
222 222
223 /* 223 /*
224 * Invoked by arch to handle an IPI for call function single. Must be 224 * Invoked by arch to handle an IPI for call function single. Must be
225 * called from the arch with interrupts disabled. 225 * called from the arch with interrupts disabled.
226 */ 226 */
227 void generic_smp_call_function_single_interrupt(void) 227 void generic_smp_call_function_single_interrupt(void)
228 { 228 {
229 struct call_single_queue *q = &__get_cpu_var(call_single_queue); 229 struct call_single_queue *q = &__get_cpu_var(call_single_queue);
230 unsigned int data_flags; 230 unsigned int data_flags;
231 LIST_HEAD(list); 231 LIST_HEAD(list);
232 232
233 spin_lock(&q->lock); 233 spin_lock(&q->lock);
234 list_replace_init(&q->list, &list); 234 list_replace_init(&q->list, &list);
235 spin_unlock(&q->lock); 235 spin_unlock(&q->lock);
236 236
237 while (!list_empty(&list)) { 237 while (!list_empty(&list)) {
238 struct call_single_data *data; 238 struct call_single_data *data;
239 239
240 data = list_entry(list.next, struct call_single_data, list); 240 data = list_entry(list.next, struct call_single_data, list);
241 list_del(&data->list); 241 list_del(&data->list);
242 242
243 /* 243 /*
244 * 'data' can be invalid after this call if flags == 0 244 * 'data' can be invalid after this call if flags == 0
245 * (when called through generic_exec_single()), 245 * (when called through generic_exec_single()),
246 * so save them away before making the call: 246 * so save them away before making the call:
247 */ 247 */
248 data_flags = data->flags; 248 data_flags = data->flags;
249 249
250 data->func(data->info); 250 data->func(data->info);
251 251
252 /* 252 /*
253 * Unlocked CSDs are valid through generic_exec_single(): 253 * Unlocked CSDs are valid through generic_exec_single():
254 */ 254 */
255 if (data_flags & CSD_FLAG_LOCK) 255 if (data_flags & CSD_FLAG_LOCK)
256 csd_unlock(data); 256 csd_unlock(data);
257 } 257 }
258 } 258 }
259 259
260 static DEFINE_PER_CPU(struct call_single_data, csd_data); 260 static DEFINE_PER_CPU(struct call_single_data, csd_data);
261 261
262 /* 262 /*
263 * smp_call_function_single - Run a function on a specific CPU 263 * smp_call_function_single - Run a function on a specific CPU
264 * @func: The function to run. This must be fast and non-blocking. 264 * @func: The function to run. This must be fast and non-blocking.
265 * @info: An arbitrary pointer to pass to the function. 265 * @info: An arbitrary pointer to pass to the function.
266 * @wait: If true, wait until function has completed on other CPUs. 266 * @wait: If true, wait until function has completed on other CPUs.
267 * 267 *
268 * Returns 0 on success, else a negative status code. Note that @wait 268 * Returns 0 on success, else a negative status code. Note that @wait
269 * will be implicitly turned on in case of allocation failures, since 269 * will be implicitly turned on in case of allocation failures, since
270 * we fall back to on-stack allocation. 270 * we fall back to on-stack allocation.
271 */ 271 */
272 int smp_call_function_single(int cpu, void (*func) (void *info), void *info, 272 int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
273 int wait) 273 int wait)
274 { 274 {
275 struct call_single_data d = { 275 struct call_single_data d = {
276 .flags = 0, 276 .flags = 0,
277 }; 277 };
278 unsigned long flags; 278 unsigned long flags;
279 int this_cpu; 279 int this_cpu;
280 int err = 0; 280 int err = 0;
281 281
282 /* 282 /*
283 * prevent preemption and reschedule on another processor, 283 * prevent preemption and reschedule on another processor,
284 * as well as CPU removal 284 * as well as CPU removal
285 */ 285 */
286 this_cpu = get_cpu(); 286 this_cpu = get_cpu();
287 287
288 /* Can deadlock when called with interrupts disabled */ 288 /* Can deadlock when called with interrupts disabled */
289 WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); 289 WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
290 290
291 if (cpu == this_cpu) { 291 if (cpu == this_cpu) {
292 local_irq_save(flags); 292 local_irq_save(flags);
293 func(info); 293 func(info);
294 local_irq_restore(flags); 294 local_irq_restore(flags);
295 } else { 295 } else {
296 if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { 296 if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
297 struct call_single_data *data = &d; 297 struct call_single_data *data = &d;
298 298
299 if (!wait) 299 if (!wait)
300 data = &__get_cpu_var(csd_data); 300 data = &__get_cpu_var(csd_data);
301 301
302 csd_lock(data); 302 csd_lock(data);
303 303
304 data->func = func; 304 data->func = func;
305 data->info = info; 305 data->info = info;
306 generic_exec_single(cpu, data, wait); 306 generic_exec_single(cpu, data, wait);
307 } else { 307 } else {
308 err = -ENXIO; /* CPU not online */ 308 err = -ENXIO; /* CPU not online */
309 } 309 }
310 } 310 }
311 311
312 put_cpu(); 312 put_cpu();
313 313
314 return err; 314 return err;
315 } 315 }
316 EXPORT_SYMBOL(smp_call_function_single); 316 EXPORT_SYMBOL(smp_call_function_single);
317 317
318 /** 318 /**
319 * __smp_call_function_single(): Run a function on another CPU 319 * __smp_call_function_single(): Run a function on another CPU
320 * @cpu: The CPU to run on. 320 * @cpu: The CPU to run on.
321 * @data: Pre-allocated and setup data structure 321 * @data: Pre-allocated and setup data structure
322 * 322 *
323 * Like smp_call_function_single(), but allow caller to pass in a 323 * Like smp_call_function_single(), but allow caller to pass in a
324 * pre-allocated data structure. Useful for embedding @data inside 324 * pre-allocated data structure. Useful for embedding @data inside
325 * other structures, for instance. 325 * other structures, for instance.
326 */ 326 */
327 void __smp_call_function_single(int cpu, struct call_single_data *data, 327 void __smp_call_function_single(int cpu, struct call_single_data *data,
328 int wait) 328 int wait)
329 { 329 {
330 csd_lock(data); 330 csd_lock(data);
331 331
332 /* Can deadlock when called with interrupts disabled */ 332 /* Can deadlock when called with interrupts disabled */
333 WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress); 333 WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress);
334 334
335 generic_exec_single(cpu, data, wait); 335 generic_exec_single(cpu, data, wait);
336 } 336 }
337 337
338 /* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ 338 /* Deprecated: shim for archs using old arch_send_call_function_ipi API. */
339 339
340 #ifndef arch_send_call_function_ipi_mask 340 #ifndef arch_send_call_function_ipi_mask
341 # define arch_send_call_function_ipi_mask(maskp) \ 341 # define arch_send_call_function_ipi_mask(maskp) \
342 arch_send_call_function_ipi(*(maskp)) 342 arch_send_call_function_ipi(*(maskp))
343 #endif 343 #endif
344 344
345 /** 345 /**
346 * smp_call_function_many(): Run a function on a set of other CPUs. 346 * smp_call_function_many(): Run a function on a set of other CPUs.
347 * @mask: The set of cpus to run on (only runs on online subset). 347 * @mask: The set of cpus to run on (only runs on online subset).
348 * @func: The function to run. This must be fast and non-blocking. 348 * @func: The function to run. This must be fast and non-blocking.
349 * @info: An arbitrary pointer to pass to the function. 349 * @info: An arbitrary pointer to pass to the function.
350 * @wait: If true, wait (atomically) until function has completed 350 * @wait: If true, wait (atomically) until function has completed
351 * on other CPUs. 351 * on other CPUs.
352 * 352 *
353 * If @wait is true, then returns once @func has returned. Note that @wait 353 * If @wait is true, then returns once @func has returned. Note that @wait
354 * will be implicitly turned on in case of allocation failures, since 354 * will be implicitly turned on in case of allocation failures, since
355 * we fall back to on-stack allocation. 355 * we fall back to on-stack allocation.
356 * 356 *
357 * You must not call this function with disabled interrupts or from a 357 * You must not call this function with disabled interrupts or from a
358 * hardware interrupt handler or from a bottom half handler. Preemption 358 * hardware interrupt handler or from a bottom half handler. Preemption
359 * must be disabled when calling this function. 359 * must be disabled when calling this function.
360 */ 360 */
361 void smp_call_function_many(const struct cpumask *mask, 361 void smp_call_function_many(const struct cpumask *mask,
362 void (*func)(void *), void *info, bool wait) 362 void (*func)(void *), void *info, bool wait)
363 { 363 {
364 struct call_function_data *data; 364 struct call_function_data *data;
365 unsigned long flags; 365 unsigned long flags;
366 int cpu, next_cpu, this_cpu = smp_processor_id(); 366 int cpu, next_cpu, this_cpu = smp_processor_id();
367 367
368 /* Can deadlock when called with interrupts disabled */ 368 /* Can deadlock when called with interrupts disabled */
369 WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); 369 WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
370 370
371 /* So, what's a CPU they want? Ignoring this one. */ 371 /* So, what's a CPU they want? Ignoring this one. */
372 cpu = cpumask_first_and(mask, cpu_online_mask); 372 cpu = cpumask_first_and(mask, cpu_online_mask);
373 if (cpu == this_cpu) 373 if (cpu == this_cpu)
374 cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 374 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
375 375
376 /* No online cpus? We're done. */ 376 /* No online cpus? We're done. */
377 if (cpu >= nr_cpu_ids) 377 if (cpu >= nr_cpu_ids)
378 return; 378 return;
379 379
380 /* Do we have another CPU which isn't us? */ 380 /* Do we have another CPU which isn't us? */
381 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 381 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
382 if (next_cpu == this_cpu) 382 if (next_cpu == this_cpu)
383 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); 383 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
384 384
385 /* Fastpath: do that cpu by itself. */ 385 /* Fastpath: do that cpu by itself. */
386 if (next_cpu >= nr_cpu_ids) { 386 if (next_cpu >= nr_cpu_ids) {
387 smp_call_function_single(cpu, func, info, wait); 387 smp_call_function_single(cpu, func, info, wait);
388 return; 388 return;
389 } 389 }
390 390
391 data = &__get_cpu_var(cfd_data); 391 data = &__get_cpu_var(cfd_data);
392 csd_lock(&data->csd); 392 csd_lock(&data->csd);
393 393
394 spin_lock_irqsave(&data->lock, flags); 394 spin_lock_irqsave(&data->lock, flags);
395 data->csd.func = func; 395 data->csd.func = func;
396 data->csd.info = info; 396 data->csd.info = info;
397 cpumask_and(data->cpumask, mask, cpu_online_mask); 397 cpumask_and(data->cpumask, mask, cpu_online_mask);
398 cpumask_clear_cpu(this_cpu, data->cpumask); 398 cpumask_clear_cpu(this_cpu, data->cpumask);
399 data->refs = cpumask_weight(data->cpumask); 399 data->refs = cpumask_weight(data->cpumask);
400 400
401 spin_lock(&call_function.lock); 401 spin_lock(&call_function.lock);
402 /* 402 /*
403 * Place entry at the _HEAD_ of the list, so that any cpu still 403 * Place entry at the _HEAD_ of the list, so that any cpu still
404 * observing the entry in generic_smp_call_function_interrupt() 404 * observing the entry in generic_smp_call_function_interrupt()
405 * will not miss any other list entries: 405 * will not miss any other list entries:
406 */ 406 */
407 list_add_rcu(&data->csd.list, &call_function.queue); 407 list_add_rcu(&data->csd.list, &call_function.queue);
408 spin_unlock(&call_function.lock); 408 spin_unlock(&call_function.lock);
409 409
410 spin_unlock_irqrestore(&data->lock, flags); 410 spin_unlock_irqrestore(&data->lock, flags);
411 411
412 /* 412 /*
413 * Make the list addition visible before sending the ipi. 413 * Make the list addition visible before sending the ipi.
414 * (IPIs must obey or appear to obey normal Linux cache 414 * (IPIs must obey or appear to obey normal Linux cache
415 * coherency rules -- see comment in generic_exec_single). 415 * coherency rules -- see comment in generic_exec_single).
416 */ 416 */
417 smp_mb(); 417 smp_mb();
418 418
419 /* Send a message to all CPUs in the map */ 419 /* Send a message to all CPUs in the map */
420 arch_send_call_function_ipi_mask(data->cpumask); 420 arch_send_call_function_ipi_mask(data->cpumask);
421 421
422 /* Optionally wait for the CPUs to complete */ 422 /* Optionally wait for the CPUs to complete */
423 if (wait) 423 if (wait)
424 csd_lock_wait(&data->csd); 424 csd_lock_wait(&data->csd);
425 } 425 }
426 EXPORT_SYMBOL(smp_call_function_many); 426 EXPORT_SYMBOL(smp_call_function_many);
427 427
428 /** 428 /**
429 * smp_call_function(): Run a function on all other CPUs. 429 * smp_call_function(): Run a function on all other CPUs.
430 * @func: The function to run. This must be fast and non-blocking. 430 * @func: The function to run. This must be fast and non-blocking.
431 * @info: An arbitrary pointer to pass to the function. 431 * @info: An arbitrary pointer to pass to the function.
432 * @wait: If true, wait (atomically) until function has completed 432 * @wait: If true, wait (atomically) until function has completed
433 * on other CPUs. 433 * on other CPUs.
434 * 434 *
435 * Returns 0. 435 * Returns 0.
436 * 436 *
437 * If @wait is true, then returns once @func has returned; otherwise 437 * If @wait is true, then returns once @func has returned; otherwise
438 * it returns just before the target cpu calls @func. In case of allocation 438 * it returns just before the target cpu calls @func. In case of allocation
439 * failure, @wait will be implicitly turned on. 439 * failure, @wait will be implicitly turned on.
440 * 440 *
441 * You must not call this function with disabled interrupts or from a 441 * You must not call this function with disabled interrupts or from a
442 * hardware interrupt handler or from a bottom half handler. 442 * hardware interrupt handler or from a bottom half handler.
443 */ 443 */
444 int smp_call_function(void (*func)(void *), void *info, int wait) 444 int smp_call_function(void (*func)(void *), void *info, int wait)
445 { 445 {
446 preempt_disable(); 446 preempt_disable();
447 smp_call_function_many(cpu_online_mask, func, info, wait); 447 smp_call_function_many(cpu_online_mask, func, info, wait);
448 preempt_enable(); 448 preempt_enable();
449 449
450 return 0; 450 return 0;
451 } 451 }
452 EXPORT_SYMBOL(smp_call_function); 452 EXPORT_SYMBOL(smp_call_function);
453 453
454 void ipi_call_lock(void) 454 void ipi_call_lock(void)
455 { 455 {
456 spin_lock(&call_function.lock); 456 spin_lock(&call_function.lock);
457 } 457 }
458 458
459 void ipi_call_unlock(void) 459 void ipi_call_unlock(void)
460 { 460 {
461 spin_unlock(&call_function.lock); 461 spin_unlock(&call_function.lock);
462 } 462 }
463 463
464 void ipi_call_lock_irq(void) 464 void ipi_call_lock_irq(void)
465 { 465 {
466 spin_lock_irq(&call_function.lock); 466 spin_lock_irq(&call_function.lock);
467 } 467 }
468 468
469 void ipi_call_unlock_irq(void) 469 void ipi_call_unlock_irq(void)
470 { 470 {
471 spin_unlock_irq(&call_function.lock); 471 spin_unlock_irq(&call_function.lock);
472 } 472 }
473 473