Commit eaa958402ea40851097d051f52ba1bb7a885efe9
Committed by
Rusty Russell
1 parent
0281b5dc03
Exists in
master
and in
7 other branches
cpumask: alloc zeroed cpumask for static cpumask_var_ts
These are defined as static cpumask_var_t so if MAXSMP is not used, they are cleared already. Avoid surprises when MAXSMP is enabled. Signed-off-by: Yinghai Lu <yinghai.lu@kernel.org> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Showing 11 changed files with 11 additions and 11 deletions Inline Diff
- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
- arch/x86/kernel/cpu/cpufreq/powernow-k7.c
- arch/x86/kernel/cpu/cpufreq/powernow-k8.c
- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
- arch/x86/kernel/cpu/mcheck/mce_64.c
- arch/x86/kernel/tlb_uv.c
- drivers/acpi/processor_core.c
- drivers/cpufreq/cpufreq.c
- kernel/sched_cpupri.c
- kernel/sched_rt.c
- kernel/smp.c
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
1 | /* | 1 | /* |
2 | * acpi-cpufreq.c - ACPI Processor P-States Driver | 2 | * acpi-cpufreq.c - ACPI Processor P-States Driver |
3 | * | 3 | * |
4 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> | 4 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> |
5 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | 5 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> |
6 | * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> | 6 | * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> |
7 | * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com> | 7 | * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com> |
8 | * | 8 | * |
9 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 9 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
10 | * | 10 | * |
11 | * This program is free software; you can redistribute it and/or modify | 11 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License as published by | 12 | * it under the terms of the GNU General Public License as published by |
13 | * the Free Software Foundation; either version 2 of the License, or (at | 13 | * the Free Software Foundation; either version 2 of the License, or (at |
14 | * your option) any later version. | 14 | * your option) any later version. |
15 | * | 15 | * |
16 | * This program is distributed in the hope that it will be useful, but | 16 | * This program is distributed in the hope that it will be useful, but |
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 17 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | * General Public License for more details. | 19 | * General Public License for more details. |
20 | * | 20 | * |
21 | * You should have received a copy of the GNU General Public License along | 21 | * You should have received a copy of the GNU General Public License along |
22 | * with this program; if not, write to the Free Software Foundation, Inc., | 22 | * with this program; if not, write to the Free Software Foundation, Inc., |
23 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. | 23 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. |
24 | * | 24 | * |
25 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 25 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #include <linux/kernel.h> | 28 | #include <linux/kernel.h> |
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | #include <linux/smp.h> | 31 | #include <linux/smp.h> |
32 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
33 | #include <linux/cpufreq.h> | 33 | #include <linux/cpufreq.h> |
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <trace/power.h> | 36 | #include <trace/power.h> |
37 | 37 | ||
38 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
39 | #include <linux/io.h> | 39 | #include <linux/io.h> |
40 | #include <linux/delay.h> | 40 | #include <linux/delay.h> |
41 | #include <linux/uaccess.h> | 41 | #include <linux/uaccess.h> |
42 | 42 | ||
43 | #include <acpi/processor.h> | 43 | #include <acpi/processor.h> |
44 | 44 | ||
45 | #include <asm/msr.h> | 45 | #include <asm/msr.h> |
46 | #include <asm/processor.h> | 46 | #include <asm/processor.h> |
47 | #include <asm/cpufeature.h> | 47 | #include <asm/cpufeature.h> |
48 | 48 | ||
49 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | 49 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ |
50 | "acpi-cpufreq", msg) | 50 | "acpi-cpufreq", msg) |
51 | 51 | ||
52 | MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); | 52 | MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); |
53 | MODULE_DESCRIPTION("ACPI Processor P-States Driver"); | 53 | MODULE_DESCRIPTION("ACPI Processor P-States Driver"); |
54 | MODULE_LICENSE("GPL"); | 54 | MODULE_LICENSE("GPL"); |
55 | 55 | ||
56 | enum { | 56 | enum { |
57 | UNDEFINED_CAPABLE = 0, | 57 | UNDEFINED_CAPABLE = 0, |
58 | SYSTEM_INTEL_MSR_CAPABLE, | 58 | SYSTEM_INTEL_MSR_CAPABLE, |
59 | SYSTEM_IO_CAPABLE, | 59 | SYSTEM_IO_CAPABLE, |
60 | }; | 60 | }; |
61 | 61 | ||
62 | #define INTEL_MSR_RANGE (0xffff) | 62 | #define INTEL_MSR_RANGE (0xffff) |
63 | #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) | 63 | #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) |
64 | 64 | ||
65 | struct acpi_cpufreq_data { | 65 | struct acpi_cpufreq_data { |
66 | struct acpi_processor_performance *acpi_data; | 66 | struct acpi_processor_performance *acpi_data; |
67 | struct cpufreq_frequency_table *freq_table; | 67 | struct cpufreq_frequency_table *freq_table; |
68 | unsigned int resume; | 68 | unsigned int resume; |
69 | unsigned int cpu_feature; | 69 | unsigned int cpu_feature; |
70 | }; | 70 | }; |
71 | 71 | ||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); | 72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); |
73 | 73 | ||
74 | struct acpi_msr_data { | 74 | struct acpi_msr_data { |
75 | u64 saved_aperf, saved_mperf; | 75 | u64 saved_aperf, saved_mperf; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); | 78 | static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); |
79 | 79 | ||
80 | DEFINE_TRACE(power_mark); | 80 | DEFINE_TRACE(power_mark); |
81 | 81 | ||
82 | /* acpi_perf_data is a pointer to percpu data. */ | 82 | /* acpi_perf_data is a pointer to percpu data. */ |
83 | static struct acpi_processor_performance *acpi_perf_data; | 83 | static struct acpi_processor_performance *acpi_perf_data; |
84 | 84 | ||
85 | static struct cpufreq_driver acpi_cpufreq_driver; | 85 | static struct cpufreq_driver acpi_cpufreq_driver; |
86 | 86 | ||
87 | static unsigned int acpi_pstate_strict; | 87 | static unsigned int acpi_pstate_strict; |
88 | 88 | ||
89 | static int check_est_cpu(unsigned int cpuid) | 89 | static int check_est_cpu(unsigned int cpuid) |
90 | { | 90 | { |
91 | struct cpuinfo_x86 *cpu = &cpu_data(cpuid); | 91 | struct cpuinfo_x86 *cpu = &cpu_data(cpuid); |
92 | 92 | ||
93 | if (cpu->x86_vendor != X86_VENDOR_INTEL || | 93 | if (cpu->x86_vendor != X86_VENDOR_INTEL || |
94 | !cpu_has(cpu, X86_FEATURE_EST)) | 94 | !cpu_has(cpu, X86_FEATURE_EST)) |
95 | return 0; | 95 | return 0; |
96 | 96 | ||
97 | return 1; | 97 | return 1; |
98 | } | 98 | } |
99 | 99 | ||
100 | static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) | 100 | static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) |
101 | { | 101 | { |
102 | struct acpi_processor_performance *perf; | 102 | struct acpi_processor_performance *perf; |
103 | int i; | 103 | int i; |
104 | 104 | ||
105 | perf = data->acpi_data; | 105 | perf = data->acpi_data; |
106 | 106 | ||
107 | for (i = 0; i < perf->state_count; i++) { | 107 | for (i = 0; i < perf->state_count; i++) { |
108 | if (value == perf->states[i].status) | 108 | if (value == perf->states[i].status) |
109 | return data->freq_table[i].frequency; | 109 | return data->freq_table[i].frequency; |
110 | } | 110 | } |
111 | return 0; | 111 | return 0; |
112 | } | 112 | } |
113 | 113 | ||
114 | static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) | 114 | static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) |
115 | { | 115 | { |
116 | int i; | 116 | int i; |
117 | struct acpi_processor_performance *perf; | 117 | struct acpi_processor_performance *perf; |
118 | 118 | ||
119 | msr &= INTEL_MSR_RANGE; | 119 | msr &= INTEL_MSR_RANGE; |
120 | perf = data->acpi_data; | 120 | perf = data->acpi_data; |
121 | 121 | ||
122 | for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { | 122 | for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { |
123 | if (msr == perf->states[data->freq_table[i].index].status) | 123 | if (msr == perf->states[data->freq_table[i].index].status) |
124 | return data->freq_table[i].frequency; | 124 | return data->freq_table[i].frequency; |
125 | } | 125 | } |
126 | return data->freq_table[0].frequency; | 126 | return data->freq_table[0].frequency; |
127 | } | 127 | } |
128 | 128 | ||
129 | static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) | 129 | static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) |
130 | { | 130 | { |
131 | switch (data->cpu_feature) { | 131 | switch (data->cpu_feature) { |
132 | case SYSTEM_INTEL_MSR_CAPABLE: | 132 | case SYSTEM_INTEL_MSR_CAPABLE: |
133 | return extract_msr(val, data); | 133 | return extract_msr(val, data); |
134 | case SYSTEM_IO_CAPABLE: | 134 | case SYSTEM_IO_CAPABLE: |
135 | return extract_io(val, data); | 135 | return extract_io(val, data); |
136 | default: | 136 | default: |
137 | return 0; | 137 | return 0; |
138 | } | 138 | } |
139 | } | 139 | } |
140 | 140 | ||
141 | struct msr_addr { | 141 | struct msr_addr { |
142 | u32 reg; | 142 | u32 reg; |
143 | }; | 143 | }; |
144 | 144 | ||
145 | struct io_addr { | 145 | struct io_addr { |
146 | u16 port; | 146 | u16 port; |
147 | u8 bit_width; | 147 | u8 bit_width; |
148 | }; | 148 | }; |
149 | 149 | ||
150 | struct drv_cmd { | 150 | struct drv_cmd { |
151 | unsigned int type; | 151 | unsigned int type; |
152 | const struct cpumask *mask; | 152 | const struct cpumask *mask; |
153 | union { | 153 | union { |
154 | struct msr_addr msr; | 154 | struct msr_addr msr; |
155 | struct io_addr io; | 155 | struct io_addr io; |
156 | } addr; | 156 | } addr; |
157 | u32 val; | 157 | u32 val; |
158 | }; | 158 | }; |
159 | 159 | ||
160 | /* Called via smp_call_function_single(), on the target CPU */ | 160 | /* Called via smp_call_function_single(), on the target CPU */ |
161 | static void do_drv_read(void *_cmd) | 161 | static void do_drv_read(void *_cmd) |
162 | { | 162 | { |
163 | struct drv_cmd *cmd = _cmd; | 163 | struct drv_cmd *cmd = _cmd; |
164 | u32 h; | 164 | u32 h; |
165 | 165 | ||
166 | switch (cmd->type) { | 166 | switch (cmd->type) { |
167 | case SYSTEM_INTEL_MSR_CAPABLE: | 167 | case SYSTEM_INTEL_MSR_CAPABLE: |
168 | rdmsr(cmd->addr.msr.reg, cmd->val, h); | 168 | rdmsr(cmd->addr.msr.reg, cmd->val, h); |
169 | break; | 169 | break; |
170 | case SYSTEM_IO_CAPABLE: | 170 | case SYSTEM_IO_CAPABLE: |
171 | acpi_os_read_port((acpi_io_address)cmd->addr.io.port, | 171 | acpi_os_read_port((acpi_io_address)cmd->addr.io.port, |
172 | &cmd->val, | 172 | &cmd->val, |
173 | (u32)cmd->addr.io.bit_width); | 173 | (u32)cmd->addr.io.bit_width); |
174 | break; | 174 | break; |
175 | default: | 175 | default: |
176 | break; | 176 | break; |
177 | } | 177 | } |
178 | } | 178 | } |
179 | 179 | ||
180 | /* Called via smp_call_function_many(), on the target CPUs */ | 180 | /* Called via smp_call_function_many(), on the target CPUs */ |
181 | static void do_drv_write(void *_cmd) | 181 | static void do_drv_write(void *_cmd) |
182 | { | 182 | { |
183 | struct drv_cmd *cmd = _cmd; | 183 | struct drv_cmd *cmd = _cmd; |
184 | u32 lo, hi; | 184 | u32 lo, hi; |
185 | 185 | ||
186 | switch (cmd->type) { | 186 | switch (cmd->type) { |
187 | case SYSTEM_INTEL_MSR_CAPABLE: | 187 | case SYSTEM_INTEL_MSR_CAPABLE: |
188 | rdmsr(cmd->addr.msr.reg, lo, hi); | 188 | rdmsr(cmd->addr.msr.reg, lo, hi); |
189 | lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); | 189 | lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); |
190 | wrmsr(cmd->addr.msr.reg, lo, hi); | 190 | wrmsr(cmd->addr.msr.reg, lo, hi); |
191 | break; | 191 | break; |
192 | case SYSTEM_IO_CAPABLE: | 192 | case SYSTEM_IO_CAPABLE: |
193 | acpi_os_write_port((acpi_io_address)cmd->addr.io.port, | 193 | acpi_os_write_port((acpi_io_address)cmd->addr.io.port, |
194 | cmd->val, | 194 | cmd->val, |
195 | (u32)cmd->addr.io.bit_width); | 195 | (u32)cmd->addr.io.bit_width); |
196 | break; | 196 | break; |
197 | default: | 197 | default: |
198 | break; | 198 | break; |
199 | } | 199 | } |
200 | } | 200 | } |
201 | 201 | ||
202 | static void drv_read(struct drv_cmd *cmd) | 202 | static void drv_read(struct drv_cmd *cmd) |
203 | { | 203 | { |
204 | cmd->val = 0; | 204 | cmd->val = 0; |
205 | 205 | ||
206 | smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); | 206 | smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); |
207 | } | 207 | } |
208 | 208 | ||
209 | static void drv_write(struct drv_cmd *cmd) | 209 | static void drv_write(struct drv_cmd *cmd) |
210 | { | 210 | { |
211 | int this_cpu; | 211 | int this_cpu; |
212 | 212 | ||
213 | this_cpu = get_cpu(); | 213 | this_cpu = get_cpu(); |
214 | if (cpumask_test_cpu(this_cpu, cmd->mask)) | 214 | if (cpumask_test_cpu(this_cpu, cmd->mask)) |
215 | do_drv_write(cmd); | 215 | do_drv_write(cmd); |
216 | smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); | 216 | smp_call_function_many(cmd->mask, do_drv_write, cmd, 1); |
217 | put_cpu(); | 217 | put_cpu(); |
218 | } | 218 | } |
219 | 219 | ||
220 | static u32 get_cur_val(const struct cpumask *mask) | 220 | static u32 get_cur_val(const struct cpumask *mask) |
221 | { | 221 | { |
222 | struct acpi_processor_performance *perf; | 222 | struct acpi_processor_performance *perf; |
223 | struct drv_cmd cmd; | 223 | struct drv_cmd cmd; |
224 | 224 | ||
225 | if (unlikely(cpumask_empty(mask))) | 225 | if (unlikely(cpumask_empty(mask))) |
226 | return 0; | 226 | return 0; |
227 | 227 | ||
228 | switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { | 228 | switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { |
229 | case SYSTEM_INTEL_MSR_CAPABLE: | 229 | case SYSTEM_INTEL_MSR_CAPABLE: |
230 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 230 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
231 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; | 231 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; |
232 | break; | 232 | break; |
233 | case SYSTEM_IO_CAPABLE: | 233 | case SYSTEM_IO_CAPABLE: |
234 | cmd.type = SYSTEM_IO_CAPABLE; | 234 | cmd.type = SYSTEM_IO_CAPABLE; |
235 | perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; | 235 | perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; |
236 | cmd.addr.io.port = perf->control_register.address; | 236 | cmd.addr.io.port = perf->control_register.address; |
237 | cmd.addr.io.bit_width = perf->control_register.bit_width; | 237 | cmd.addr.io.bit_width = perf->control_register.bit_width; |
238 | break; | 238 | break; |
239 | default: | 239 | default: |
240 | return 0; | 240 | return 0; |
241 | } | 241 | } |
242 | 242 | ||
243 | cmd.mask = mask; | 243 | cmd.mask = mask; |
244 | drv_read(&cmd); | 244 | drv_read(&cmd); |
245 | 245 | ||
246 | dprintk("get_cur_val = %u\n", cmd.val); | 246 | dprintk("get_cur_val = %u\n", cmd.val); |
247 | 247 | ||
248 | return cmd.val; | 248 | return cmd.val; |
249 | } | 249 | } |
250 | 250 | ||
251 | struct perf_pair { | 251 | struct perf_pair { |
252 | union { | 252 | union { |
253 | struct { | 253 | struct { |
254 | u32 lo; | 254 | u32 lo; |
255 | u32 hi; | 255 | u32 hi; |
256 | } split; | 256 | } split; |
257 | u64 whole; | 257 | u64 whole; |
258 | } aperf, mperf; | 258 | } aperf, mperf; |
259 | }; | 259 | }; |
260 | 260 | ||
261 | /* Called via smp_call_function_single(), on the target CPU */ | 261 | /* Called via smp_call_function_single(), on the target CPU */ |
262 | static void read_measured_perf_ctrs(void *_cur) | 262 | static void read_measured_perf_ctrs(void *_cur) |
263 | { | 263 | { |
264 | struct perf_pair *cur = _cur; | 264 | struct perf_pair *cur = _cur; |
265 | 265 | ||
266 | rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); | 266 | rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); |
267 | rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); | 267 | rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); |
268 | } | 268 | } |
269 | 269 | ||
270 | /* | 270 | /* |
271 | * Return the measured active (C0) frequency on this CPU since last call | 271 | * Return the measured active (C0) frequency on this CPU since last call |
272 | * to this function. | 272 | * to this function. |
273 | * Input: cpu number | 273 | * Input: cpu number |
274 | * Return: Average CPU frequency in terms of max frequency (zero on error) | 274 | * Return: Average CPU frequency in terms of max frequency (zero on error) |
275 | * | 275 | * |
276 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | 276 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance |
277 | * over a period of time, while CPU is in C0 state. | 277 | * over a period of time, while CPU is in C0 state. |
278 | * IA32_MPERF counts at the rate of max advertised frequency | 278 | * IA32_MPERF counts at the rate of max advertised frequency |
279 | * IA32_APERF counts at the rate of actual CPU frequency | 279 | * IA32_APERF counts at the rate of actual CPU frequency |
280 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | 280 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and |
281 | * no meaning should be associated with absolute values of these MSRs. | 281 | * no meaning should be associated with absolute values of these MSRs. |
282 | */ | 282 | */ |
283 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | 283 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, |
284 | unsigned int cpu) | 284 | unsigned int cpu) |
285 | { | 285 | { |
286 | struct perf_pair readin, cur; | 286 | struct perf_pair readin, cur; |
287 | unsigned int perf_percent; | 287 | unsigned int perf_percent; |
288 | unsigned int retval; | 288 | unsigned int retval; |
289 | 289 | ||
290 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) | 290 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) |
291 | return 0; | 291 | return 0; |
292 | 292 | ||
293 | cur.aperf.whole = readin.aperf.whole - | 293 | cur.aperf.whole = readin.aperf.whole - |
294 | per_cpu(msr_data, cpu).saved_aperf; | 294 | per_cpu(msr_data, cpu).saved_aperf; |
295 | cur.mperf.whole = readin.mperf.whole - | 295 | cur.mperf.whole = readin.mperf.whole - |
296 | per_cpu(msr_data, cpu).saved_mperf; | 296 | per_cpu(msr_data, cpu).saved_mperf; |
297 | per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; | 297 | per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; |
298 | per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; | 298 | per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; |
299 | 299 | ||
300 | #ifdef __i386__ | 300 | #ifdef __i386__ |
301 | /* | 301 | /* |
302 | * We dont want to do 64 bit divide with 32 bit kernel | 302 | * We dont want to do 64 bit divide with 32 bit kernel |
303 | * Get an approximate value. Return failure in case we cannot get | 303 | * Get an approximate value. Return failure in case we cannot get |
304 | * an approximate value. | 304 | * an approximate value. |
305 | */ | 305 | */ |
306 | if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { | 306 | if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { |
307 | int shift_count; | 307 | int shift_count; |
308 | u32 h; | 308 | u32 h; |
309 | 309 | ||
310 | h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); | 310 | h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); |
311 | shift_count = fls(h); | 311 | shift_count = fls(h); |
312 | 312 | ||
313 | cur.aperf.whole >>= shift_count; | 313 | cur.aperf.whole >>= shift_count; |
314 | cur.mperf.whole >>= shift_count; | 314 | cur.mperf.whole >>= shift_count; |
315 | } | 315 | } |
316 | 316 | ||
317 | if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { | 317 | if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { |
318 | int shift_count = 7; | 318 | int shift_count = 7; |
319 | cur.aperf.split.lo >>= shift_count; | 319 | cur.aperf.split.lo >>= shift_count; |
320 | cur.mperf.split.lo >>= shift_count; | 320 | cur.mperf.split.lo >>= shift_count; |
321 | } | 321 | } |
322 | 322 | ||
323 | if (cur.aperf.split.lo && cur.mperf.split.lo) | 323 | if (cur.aperf.split.lo && cur.mperf.split.lo) |
324 | perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; | 324 | perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; |
325 | else | 325 | else |
326 | perf_percent = 0; | 326 | perf_percent = 0; |
327 | 327 | ||
328 | #else | 328 | #else |
329 | if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { | 329 | if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { |
330 | int shift_count = 7; | 330 | int shift_count = 7; |
331 | cur.aperf.whole >>= shift_count; | 331 | cur.aperf.whole >>= shift_count; |
332 | cur.mperf.whole >>= shift_count; | 332 | cur.mperf.whole >>= shift_count; |
333 | } | 333 | } |
334 | 334 | ||
335 | if (cur.aperf.whole && cur.mperf.whole) | 335 | if (cur.aperf.whole && cur.mperf.whole) |
336 | perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; | 336 | perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; |
337 | else | 337 | else |
338 | perf_percent = 0; | 338 | perf_percent = 0; |
339 | 339 | ||
340 | #endif | 340 | #endif |
341 | 341 | ||
342 | retval = (policy->cpuinfo.max_freq * perf_percent) / 100; | 342 | retval = (policy->cpuinfo.max_freq * perf_percent) / 100; |
343 | 343 | ||
344 | return retval; | 344 | return retval; |
345 | } | 345 | } |
346 | 346 | ||
347 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 347 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
348 | { | 348 | { |
349 | struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); | 349 | struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); |
350 | unsigned int freq; | 350 | unsigned int freq; |
351 | unsigned int cached_freq; | 351 | unsigned int cached_freq; |
352 | 352 | ||
353 | dprintk("get_cur_freq_on_cpu (%d)\n", cpu); | 353 | dprintk("get_cur_freq_on_cpu (%d)\n", cpu); |
354 | 354 | ||
355 | if (unlikely(data == NULL || | 355 | if (unlikely(data == NULL || |
356 | data->acpi_data == NULL || data->freq_table == NULL)) { | 356 | data->acpi_data == NULL || data->freq_table == NULL)) { |
357 | return 0; | 357 | return 0; |
358 | } | 358 | } |
359 | 359 | ||
360 | cached_freq = data->freq_table[data->acpi_data->state].frequency; | 360 | cached_freq = data->freq_table[data->acpi_data->state].frequency; |
361 | freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); | 361 | freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); |
362 | if (freq != cached_freq) { | 362 | if (freq != cached_freq) { |
363 | /* | 363 | /* |
364 | * The dreaded BIOS frequency change behind our back. | 364 | * The dreaded BIOS frequency change behind our back. |
365 | * Force set the frequency on next target call. | 365 | * Force set the frequency on next target call. |
366 | */ | 366 | */ |
367 | data->resume = 1; | 367 | data->resume = 1; |
368 | } | 368 | } |
369 | 369 | ||
370 | dprintk("cur freq = %u\n", freq); | 370 | dprintk("cur freq = %u\n", freq); |
371 | 371 | ||
372 | return freq; | 372 | return freq; |
373 | } | 373 | } |
374 | 374 | ||
375 | static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, | 375 | static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, |
376 | struct acpi_cpufreq_data *data) | 376 | struct acpi_cpufreq_data *data) |
377 | { | 377 | { |
378 | unsigned int cur_freq; | 378 | unsigned int cur_freq; |
379 | unsigned int i; | 379 | unsigned int i; |
380 | 380 | ||
381 | for (i = 0; i < 100; i++) { | 381 | for (i = 0; i < 100; i++) { |
382 | cur_freq = extract_freq(get_cur_val(mask), data); | 382 | cur_freq = extract_freq(get_cur_val(mask), data); |
383 | if (cur_freq == freq) | 383 | if (cur_freq == freq) |
384 | return 1; | 384 | return 1; |
385 | udelay(10); | 385 | udelay(10); |
386 | } | 386 | } |
387 | return 0; | 387 | return 0; |
388 | } | 388 | } |
389 | 389 | ||
390 | static int acpi_cpufreq_target(struct cpufreq_policy *policy, | 390 | static int acpi_cpufreq_target(struct cpufreq_policy *policy, |
391 | unsigned int target_freq, unsigned int relation) | 391 | unsigned int target_freq, unsigned int relation) |
392 | { | 392 | { |
393 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); | 393 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); |
394 | struct acpi_processor_performance *perf; | 394 | struct acpi_processor_performance *perf; |
395 | struct cpufreq_freqs freqs; | 395 | struct cpufreq_freqs freqs; |
396 | struct drv_cmd cmd; | 396 | struct drv_cmd cmd; |
397 | unsigned int next_state = 0; /* Index into freq_table */ | 397 | unsigned int next_state = 0; /* Index into freq_table */ |
398 | unsigned int next_perf_state = 0; /* Index into perf table */ | 398 | unsigned int next_perf_state = 0; /* Index into perf table */ |
399 | unsigned int i; | 399 | unsigned int i; |
400 | int result = 0; | 400 | int result = 0; |
401 | struct power_trace it; | 401 | struct power_trace it; |
402 | 402 | ||
403 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); | 403 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); |
404 | 404 | ||
405 | if (unlikely(data == NULL || | 405 | if (unlikely(data == NULL || |
406 | data->acpi_data == NULL || data->freq_table == NULL)) { | 406 | data->acpi_data == NULL || data->freq_table == NULL)) { |
407 | return -ENODEV; | 407 | return -ENODEV; |
408 | } | 408 | } |
409 | 409 | ||
410 | perf = data->acpi_data; | 410 | perf = data->acpi_data; |
411 | result = cpufreq_frequency_table_target(policy, | 411 | result = cpufreq_frequency_table_target(policy, |
412 | data->freq_table, | 412 | data->freq_table, |
413 | target_freq, | 413 | target_freq, |
414 | relation, &next_state); | 414 | relation, &next_state); |
415 | if (unlikely(result)) { | 415 | if (unlikely(result)) { |
416 | result = -ENODEV; | 416 | result = -ENODEV; |
417 | goto out; | 417 | goto out; |
418 | } | 418 | } |
419 | 419 | ||
420 | next_perf_state = data->freq_table[next_state].index; | 420 | next_perf_state = data->freq_table[next_state].index; |
421 | if (perf->state == next_perf_state) { | 421 | if (perf->state == next_perf_state) { |
422 | if (unlikely(data->resume)) { | 422 | if (unlikely(data->resume)) { |
423 | dprintk("Called after resume, resetting to P%d\n", | 423 | dprintk("Called after resume, resetting to P%d\n", |
424 | next_perf_state); | 424 | next_perf_state); |
425 | data->resume = 0; | 425 | data->resume = 0; |
426 | } else { | 426 | } else { |
427 | dprintk("Already at target state (P%d)\n", | 427 | dprintk("Already at target state (P%d)\n", |
428 | next_perf_state); | 428 | next_perf_state); |
429 | goto out; | 429 | goto out; |
430 | } | 430 | } |
431 | } | 431 | } |
432 | 432 | ||
433 | trace_power_mark(&it, POWER_PSTATE, next_perf_state); | 433 | trace_power_mark(&it, POWER_PSTATE, next_perf_state); |
434 | 434 | ||
435 | switch (data->cpu_feature) { | 435 | switch (data->cpu_feature) { |
436 | case SYSTEM_INTEL_MSR_CAPABLE: | 436 | case SYSTEM_INTEL_MSR_CAPABLE: |
437 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 437 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
438 | cmd.addr.msr.reg = MSR_IA32_PERF_CTL; | 438 | cmd.addr.msr.reg = MSR_IA32_PERF_CTL; |
439 | cmd.val = (u32) perf->states[next_perf_state].control; | 439 | cmd.val = (u32) perf->states[next_perf_state].control; |
440 | break; | 440 | break; |
441 | case SYSTEM_IO_CAPABLE: | 441 | case SYSTEM_IO_CAPABLE: |
442 | cmd.type = SYSTEM_IO_CAPABLE; | 442 | cmd.type = SYSTEM_IO_CAPABLE; |
443 | cmd.addr.io.port = perf->control_register.address; | 443 | cmd.addr.io.port = perf->control_register.address; |
444 | cmd.addr.io.bit_width = perf->control_register.bit_width; | 444 | cmd.addr.io.bit_width = perf->control_register.bit_width; |
445 | cmd.val = (u32) perf->states[next_perf_state].control; | 445 | cmd.val = (u32) perf->states[next_perf_state].control; |
446 | break; | 446 | break; |
447 | default: | 447 | default: |
448 | result = -ENODEV; | 448 | result = -ENODEV; |
449 | goto out; | 449 | goto out; |
450 | } | 450 | } |
451 | 451 | ||
452 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | 452 | /* cpufreq holds the hotplug lock, so we are safe from here on */ |
453 | if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) | 453 | if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) |
454 | cmd.mask = policy->cpus; | 454 | cmd.mask = policy->cpus; |
455 | else | 455 | else |
456 | cmd.mask = cpumask_of(policy->cpu); | 456 | cmd.mask = cpumask_of(policy->cpu); |
457 | 457 | ||
458 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 458 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
459 | freqs.new = data->freq_table[next_state].frequency; | 459 | freqs.new = data->freq_table[next_state].frequency; |
460 | for_each_cpu(i, cmd.mask) { | 460 | for_each_cpu(i, cmd.mask) { |
461 | freqs.cpu = i; | 461 | freqs.cpu = i; |
462 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 462 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
463 | } | 463 | } |
464 | 464 | ||
465 | drv_write(&cmd); | 465 | drv_write(&cmd); |
466 | 466 | ||
467 | if (acpi_pstate_strict) { | 467 | if (acpi_pstate_strict) { |
468 | if (!check_freqs(cmd.mask, freqs.new, data)) { | 468 | if (!check_freqs(cmd.mask, freqs.new, data)) { |
469 | dprintk("acpi_cpufreq_target failed (%d)\n", | 469 | dprintk("acpi_cpufreq_target failed (%d)\n", |
470 | policy->cpu); | 470 | policy->cpu); |
471 | result = -EAGAIN; | 471 | result = -EAGAIN; |
472 | goto out; | 472 | goto out; |
473 | } | 473 | } |
474 | } | 474 | } |
475 | 475 | ||
476 | for_each_cpu(i, cmd.mask) { | 476 | for_each_cpu(i, cmd.mask) { |
477 | freqs.cpu = i; | 477 | freqs.cpu = i; |
478 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 478 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
479 | } | 479 | } |
480 | perf->state = next_perf_state; | 480 | perf->state = next_perf_state; |
481 | 481 | ||
482 | out: | 482 | out: |
483 | return result; | 483 | return result; |
484 | } | 484 | } |
485 | 485 | ||
486 | static int acpi_cpufreq_verify(struct cpufreq_policy *policy) | 486 | static int acpi_cpufreq_verify(struct cpufreq_policy *policy) |
487 | { | 487 | { |
488 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); | 488 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); |
489 | 489 | ||
490 | dprintk("acpi_cpufreq_verify\n"); | 490 | dprintk("acpi_cpufreq_verify\n"); |
491 | 491 | ||
492 | return cpufreq_frequency_table_verify(policy, data->freq_table); | 492 | return cpufreq_frequency_table_verify(policy, data->freq_table); |
493 | } | 493 | } |
494 | 494 | ||
495 | static unsigned long | 495 | static unsigned long |
496 | acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) | 496 | acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) |
497 | { | 497 | { |
498 | struct acpi_processor_performance *perf = data->acpi_data; | 498 | struct acpi_processor_performance *perf = data->acpi_data; |
499 | 499 | ||
500 | if (cpu_khz) { | 500 | if (cpu_khz) { |
501 | /* search the closest match to cpu_khz */ | 501 | /* search the closest match to cpu_khz */ |
502 | unsigned int i; | 502 | unsigned int i; |
503 | unsigned long freq; | 503 | unsigned long freq; |
504 | unsigned long freqn = perf->states[0].core_frequency * 1000; | 504 | unsigned long freqn = perf->states[0].core_frequency * 1000; |
505 | 505 | ||
506 | for (i = 0; i < (perf->state_count-1); i++) { | 506 | for (i = 0; i < (perf->state_count-1); i++) { |
507 | freq = freqn; | 507 | freq = freqn; |
508 | freqn = perf->states[i+1].core_frequency * 1000; | 508 | freqn = perf->states[i+1].core_frequency * 1000; |
509 | if ((2 * cpu_khz) > (freqn + freq)) { | 509 | if ((2 * cpu_khz) > (freqn + freq)) { |
510 | perf->state = i; | 510 | perf->state = i; |
511 | return freq; | 511 | return freq; |
512 | } | 512 | } |
513 | } | 513 | } |
514 | perf->state = perf->state_count-1; | 514 | perf->state = perf->state_count-1; |
515 | return freqn; | 515 | return freqn; |
516 | } else { | 516 | } else { |
517 | /* assume CPU is at P0... */ | 517 | /* assume CPU is at P0... */ |
518 | perf->state = 0; | 518 | perf->state = 0; |
519 | return perf->states[0].core_frequency * 1000; | 519 | return perf->states[0].core_frequency * 1000; |
520 | } | 520 | } |
521 | } | 521 | } |
522 | 522 | ||
523 | static void free_acpi_perf_data(void) | 523 | static void free_acpi_perf_data(void) |
524 | { | 524 | { |
525 | unsigned int i; | 525 | unsigned int i; |
526 | 526 | ||
527 | /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ | 527 | /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ |
528 | for_each_possible_cpu(i) | 528 | for_each_possible_cpu(i) |
529 | free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) | 529 | free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) |
530 | ->shared_cpu_map); | 530 | ->shared_cpu_map); |
531 | free_percpu(acpi_perf_data); | 531 | free_percpu(acpi_perf_data); |
532 | } | 532 | } |
533 | 533 | ||
534 | /* | 534 | /* |
535 | * acpi_cpufreq_early_init - initialize ACPI P-States library | 535 | * acpi_cpufreq_early_init - initialize ACPI P-States library |
536 | * | 536 | * |
537 | * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) | 537 | * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) |
538 | * in order to determine correct frequency and voltage pairings. We can | 538 | * in order to determine correct frequency and voltage pairings. We can |
539 | * do _PDC and _PSD and find out the processor dependency for the | 539 | * do _PDC and _PSD and find out the processor dependency for the |
540 | * actual init that will happen later... | 540 | * actual init that will happen later... |
541 | */ | 541 | */ |
542 | static int __init acpi_cpufreq_early_init(void) | 542 | static int __init acpi_cpufreq_early_init(void) |
543 | { | 543 | { |
544 | unsigned int i; | 544 | unsigned int i; |
545 | dprintk("acpi_cpufreq_early_init\n"); | 545 | dprintk("acpi_cpufreq_early_init\n"); |
546 | 546 | ||
547 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); | 547 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); |
548 | if (!acpi_perf_data) { | 548 | if (!acpi_perf_data) { |
549 | dprintk("Memory allocation error for acpi_perf_data.\n"); | 549 | dprintk("Memory allocation error for acpi_perf_data.\n"); |
550 | return -ENOMEM; | 550 | return -ENOMEM; |
551 | } | 551 | } |
552 | for_each_possible_cpu(i) { | 552 | for_each_possible_cpu(i) { |
553 | if (!alloc_cpumask_var_node( | 553 | if (!zalloc_cpumask_var_node( |
554 | &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, | 554 | &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, |
555 | GFP_KERNEL, cpu_to_node(i))) { | 555 | GFP_KERNEL, cpu_to_node(i))) { |
556 | 556 | ||
557 | /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ | 557 | /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ |
558 | free_acpi_perf_data(); | 558 | free_acpi_perf_data(); |
559 | return -ENOMEM; | 559 | return -ENOMEM; |
560 | } | 560 | } |
561 | } | 561 | } |
562 | 562 | ||
563 | /* Do initialization in ACPI core */ | 563 | /* Do initialization in ACPI core */ |
564 | acpi_processor_preregister_performance(acpi_perf_data); | 564 | acpi_processor_preregister_performance(acpi_perf_data); |
565 | return 0; | 565 | return 0; |
566 | } | 566 | } |
567 | 567 | ||
568 | #ifdef CONFIG_SMP | 568 | #ifdef CONFIG_SMP |
569 | /* | 569 | /* |
570 | * Some BIOSes do SW_ANY coordination internally, either set it up in hw | 570 | * Some BIOSes do SW_ANY coordination internally, either set it up in hw |
571 | * or do it in BIOS firmware and won't inform about it to OS. If not | 571 | * or do it in BIOS firmware and won't inform about it to OS. If not |
572 | * detected, this has a side effect of making CPU run at a different speed | 572 | * detected, this has a side effect of making CPU run at a different speed |
573 | * than OS intended it to run at. Detect it and handle it cleanly. | 573 | * than OS intended it to run at. Detect it and handle it cleanly. |
574 | */ | 574 | */ |
575 | static int bios_with_sw_any_bug; | 575 | static int bios_with_sw_any_bug; |
576 | 576 | ||
577 | static int sw_any_bug_found(const struct dmi_system_id *d) | 577 | static int sw_any_bug_found(const struct dmi_system_id *d) |
578 | { | 578 | { |
579 | bios_with_sw_any_bug = 1; | 579 | bios_with_sw_any_bug = 1; |
580 | return 0; | 580 | return 0; |
581 | } | 581 | } |
582 | 582 | ||
583 | static const struct dmi_system_id sw_any_bug_dmi_table[] = { | 583 | static const struct dmi_system_id sw_any_bug_dmi_table[] = { |
584 | { | 584 | { |
585 | .callback = sw_any_bug_found, | 585 | .callback = sw_any_bug_found, |
586 | .ident = "Supermicro Server X6DLP", | 586 | .ident = "Supermicro Server X6DLP", |
587 | .matches = { | 587 | .matches = { |
588 | DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), | 588 | DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), |
589 | DMI_MATCH(DMI_BIOS_VERSION, "080010"), | 589 | DMI_MATCH(DMI_BIOS_VERSION, "080010"), |
590 | DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), | 590 | DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), |
591 | }, | 591 | }, |
592 | }, | 592 | }, |
593 | { } | 593 | { } |
594 | }; | 594 | }; |
595 | #endif | 595 | #endif |
596 | 596 | ||
597 | static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | 597 | static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) |
598 | { | 598 | { |
599 | unsigned int i; | 599 | unsigned int i; |
600 | unsigned int valid_states = 0; | 600 | unsigned int valid_states = 0; |
601 | unsigned int cpu = policy->cpu; | 601 | unsigned int cpu = policy->cpu; |
602 | struct acpi_cpufreq_data *data; | 602 | struct acpi_cpufreq_data *data; |
603 | unsigned int result = 0; | 603 | unsigned int result = 0; |
604 | struct cpuinfo_x86 *c = &cpu_data(policy->cpu); | 604 | struct cpuinfo_x86 *c = &cpu_data(policy->cpu); |
605 | struct acpi_processor_performance *perf; | 605 | struct acpi_processor_performance *perf; |
606 | 606 | ||
607 | dprintk("acpi_cpufreq_cpu_init\n"); | 607 | dprintk("acpi_cpufreq_cpu_init\n"); |
608 | 608 | ||
609 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); | 609 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); |
610 | if (!data) | 610 | if (!data) |
611 | return -ENOMEM; | 611 | return -ENOMEM; |
612 | 612 | ||
613 | data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); | 613 | data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); |
614 | per_cpu(drv_data, cpu) = data; | 614 | per_cpu(drv_data, cpu) = data; |
615 | 615 | ||
616 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) | 616 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) |
617 | acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; | 617 | acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; |
618 | 618 | ||
619 | result = acpi_processor_register_performance(data->acpi_data, cpu); | 619 | result = acpi_processor_register_performance(data->acpi_data, cpu); |
620 | if (result) | 620 | if (result) |
621 | goto err_free; | 621 | goto err_free; |
622 | 622 | ||
623 | perf = data->acpi_data; | 623 | perf = data->acpi_data; |
624 | policy->shared_type = perf->shared_type; | 624 | policy->shared_type = perf->shared_type; |
625 | 625 | ||
626 | /* | 626 | /* |
627 | * Will let policy->cpus know about dependency only when software | 627 | * Will let policy->cpus know about dependency only when software |
628 | * coordination is required. | 628 | * coordination is required. |
629 | */ | 629 | */ |
630 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || | 630 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || |
631 | policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { | 631 | policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { |
632 | cpumask_copy(policy->cpus, perf->shared_cpu_map); | 632 | cpumask_copy(policy->cpus, perf->shared_cpu_map); |
633 | } | 633 | } |
634 | cpumask_copy(policy->related_cpus, perf->shared_cpu_map); | 634 | cpumask_copy(policy->related_cpus, perf->shared_cpu_map); |
635 | 635 | ||
636 | #ifdef CONFIG_SMP | 636 | #ifdef CONFIG_SMP |
637 | dmi_check_system(sw_any_bug_dmi_table); | 637 | dmi_check_system(sw_any_bug_dmi_table); |
638 | if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { | 638 | if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) { |
639 | policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; | 639 | policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; |
640 | cpumask_copy(policy->cpus, cpu_core_mask(cpu)); | 640 | cpumask_copy(policy->cpus, cpu_core_mask(cpu)); |
641 | } | 641 | } |
642 | #endif | 642 | #endif |
643 | 643 | ||
644 | /* capability check */ | 644 | /* capability check */ |
645 | if (perf->state_count <= 1) { | 645 | if (perf->state_count <= 1) { |
646 | dprintk("No P-States\n"); | 646 | dprintk("No P-States\n"); |
647 | result = -ENODEV; | 647 | result = -ENODEV; |
648 | goto err_unreg; | 648 | goto err_unreg; |
649 | } | 649 | } |
650 | 650 | ||
651 | if (perf->control_register.space_id != perf->status_register.space_id) { | 651 | if (perf->control_register.space_id != perf->status_register.space_id) { |
652 | result = -ENODEV; | 652 | result = -ENODEV; |
653 | goto err_unreg; | 653 | goto err_unreg; |
654 | } | 654 | } |
655 | 655 | ||
656 | switch (perf->control_register.space_id) { | 656 | switch (perf->control_register.space_id) { |
657 | case ACPI_ADR_SPACE_SYSTEM_IO: | 657 | case ACPI_ADR_SPACE_SYSTEM_IO: |
658 | dprintk("SYSTEM IO addr space\n"); | 658 | dprintk("SYSTEM IO addr space\n"); |
659 | data->cpu_feature = SYSTEM_IO_CAPABLE; | 659 | data->cpu_feature = SYSTEM_IO_CAPABLE; |
660 | break; | 660 | break; |
661 | case ACPI_ADR_SPACE_FIXED_HARDWARE: | 661 | case ACPI_ADR_SPACE_FIXED_HARDWARE: |
662 | dprintk("HARDWARE addr space\n"); | 662 | dprintk("HARDWARE addr space\n"); |
663 | if (!check_est_cpu(cpu)) { | 663 | if (!check_est_cpu(cpu)) { |
664 | result = -ENODEV; | 664 | result = -ENODEV; |
665 | goto err_unreg; | 665 | goto err_unreg; |
666 | } | 666 | } |
667 | data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; | 667 | data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; |
668 | break; | 668 | break; |
669 | default: | 669 | default: |
670 | dprintk("Unknown addr space %d\n", | 670 | dprintk("Unknown addr space %d\n", |
671 | (u32) (perf->control_register.space_id)); | 671 | (u32) (perf->control_register.space_id)); |
672 | result = -ENODEV; | 672 | result = -ENODEV; |
673 | goto err_unreg; | 673 | goto err_unreg; |
674 | } | 674 | } |
675 | 675 | ||
676 | data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * | 676 | data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * |
677 | (perf->state_count+1), GFP_KERNEL); | 677 | (perf->state_count+1), GFP_KERNEL); |
678 | if (!data->freq_table) { | 678 | if (!data->freq_table) { |
679 | result = -ENOMEM; | 679 | result = -ENOMEM; |
680 | goto err_unreg; | 680 | goto err_unreg; |
681 | } | 681 | } |
682 | 682 | ||
683 | /* detect transition latency */ | 683 | /* detect transition latency */ |
684 | policy->cpuinfo.transition_latency = 0; | 684 | policy->cpuinfo.transition_latency = 0; |
685 | for (i = 0; i < perf->state_count; i++) { | 685 | for (i = 0; i < perf->state_count; i++) { |
686 | if ((perf->states[i].transition_latency * 1000) > | 686 | if ((perf->states[i].transition_latency * 1000) > |
687 | policy->cpuinfo.transition_latency) | 687 | policy->cpuinfo.transition_latency) |
688 | policy->cpuinfo.transition_latency = | 688 | policy->cpuinfo.transition_latency = |
689 | perf->states[i].transition_latency * 1000; | 689 | perf->states[i].transition_latency * 1000; |
690 | } | 690 | } |
691 | 691 | ||
692 | /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ | 692 | /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ |
693 | if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && | 693 | if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && |
694 | policy->cpuinfo.transition_latency > 20 * 1000) { | 694 | policy->cpuinfo.transition_latency > 20 * 1000) { |
695 | policy->cpuinfo.transition_latency = 20 * 1000; | 695 | policy->cpuinfo.transition_latency = 20 * 1000; |
696 | printk_once(KERN_INFO | 696 | printk_once(KERN_INFO |
697 | "P-state transition latency capped at 20 uS\n"); | 697 | "P-state transition latency capped at 20 uS\n"); |
698 | } | 698 | } |
699 | 699 | ||
700 | /* table init */ | 700 | /* table init */ |
701 | for (i = 0; i < perf->state_count; i++) { | 701 | for (i = 0; i < perf->state_count; i++) { |
702 | if (i > 0 && perf->states[i].core_frequency >= | 702 | if (i > 0 && perf->states[i].core_frequency >= |
703 | data->freq_table[valid_states-1].frequency / 1000) | 703 | data->freq_table[valid_states-1].frequency / 1000) |
704 | continue; | 704 | continue; |
705 | 705 | ||
706 | data->freq_table[valid_states].index = i; | 706 | data->freq_table[valid_states].index = i; |
707 | data->freq_table[valid_states].frequency = | 707 | data->freq_table[valid_states].frequency = |
708 | perf->states[i].core_frequency * 1000; | 708 | perf->states[i].core_frequency * 1000; |
709 | valid_states++; | 709 | valid_states++; |
710 | } | 710 | } |
711 | data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; | 711 | data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; |
712 | perf->state = 0; | 712 | perf->state = 0; |
713 | 713 | ||
714 | result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); | 714 | result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); |
715 | if (result) | 715 | if (result) |
716 | goto err_freqfree; | 716 | goto err_freqfree; |
717 | 717 | ||
718 | if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) | 718 | if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) |
719 | printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); | 719 | printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); |
720 | 720 | ||
721 | switch (perf->control_register.space_id) { | 721 | switch (perf->control_register.space_id) { |
722 | case ACPI_ADR_SPACE_SYSTEM_IO: | 722 | case ACPI_ADR_SPACE_SYSTEM_IO: |
723 | /* Current speed is unknown and not detectable by IO port */ | 723 | /* Current speed is unknown and not detectable by IO port */ |
724 | policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); | 724 | policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); |
725 | break; | 725 | break; |
726 | case ACPI_ADR_SPACE_FIXED_HARDWARE: | 726 | case ACPI_ADR_SPACE_FIXED_HARDWARE: |
727 | acpi_cpufreq_driver.get = get_cur_freq_on_cpu; | 727 | acpi_cpufreq_driver.get = get_cur_freq_on_cpu; |
728 | policy->cur = get_cur_freq_on_cpu(cpu); | 728 | policy->cur = get_cur_freq_on_cpu(cpu); |
729 | break; | 729 | break; |
730 | default: | 730 | default: |
731 | break; | 731 | break; |
732 | } | 732 | } |
733 | 733 | ||
734 | /* notify BIOS that we exist */ | 734 | /* notify BIOS that we exist */ |
735 | acpi_processor_notify_smm(THIS_MODULE); | 735 | acpi_processor_notify_smm(THIS_MODULE); |
736 | 736 | ||
737 | /* Check for APERF/MPERF support in hardware */ | 737 | /* Check for APERF/MPERF support in hardware */ |
738 | if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { | 738 | if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { |
739 | unsigned int ecx; | 739 | unsigned int ecx; |
740 | ecx = cpuid_ecx(6); | 740 | ecx = cpuid_ecx(6); |
741 | if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) | 741 | if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) |
742 | acpi_cpufreq_driver.getavg = get_measured_perf; | 742 | acpi_cpufreq_driver.getavg = get_measured_perf; |
743 | } | 743 | } |
744 | 744 | ||
745 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 745 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
746 | for (i = 0; i < perf->state_count; i++) | 746 | for (i = 0; i < perf->state_count; i++) |
747 | dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", | 747 | dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", |
748 | (i == perf->state ? '*' : ' '), i, | 748 | (i == perf->state ? '*' : ' '), i, |
749 | (u32) perf->states[i].core_frequency, | 749 | (u32) perf->states[i].core_frequency, |
750 | (u32) perf->states[i].power, | 750 | (u32) perf->states[i].power, |
751 | (u32) perf->states[i].transition_latency); | 751 | (u32) perf->states[i].transition_latency); |
752 | 752 | ||
753 | cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); | 753 | cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); |
754 | 754 | ||
755 | /* | 755 | /* |
756 | * the first call to ->target() should result in us actually | 756 | * the first call to ->target() should result in us actually |
757 | * writing something to the appropriate registers. | 757 | * writing something to the appropriate registers. |
758 | */ | 758 | */ |
759 | data->resume = 1; | 759 | data->resume = 1; |
760 | 760 | ||
761 | return result; | 761 | return result; |
762 | 762 | ||
763 | err_freqfree: | 763 | err_freqfree: |
764 | kfree(data->freq_table); | 764 | kfree(data->freq_table); |
765 | err_unreg: | 765 | err_unreg: |
766 | acpi_processor_unregister_performance(perf, cpu); | 766 | acpi_processor_unregister_performance(perf, cpu); |
767 | err_free: | 767 | err_free: |
768 | kfree(data); | 768 | kfree(data); |
769 | per_cpu(drv_data, cpu) = NULL; | 769 | per_cpu(drv_data, cpu) = NULL; |
770 | 770 | ||
771 | return result; | 771 | return result; |
772 | } | 772 | } |
773 | 773 | ||
774 | static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) | 774 | static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) |
775 | { | 775 | { |
776 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); | 776 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); |
777 | 777 | ||
778 | dprintk("acpi_cpufreq_cpu_exit\n"); | 778 | dprintk("acpi_cpufreq_cpu_exit\n"); |
779 | 779 | ||
780 | if (data) { | 780 | if (data) { |
781 | cpufreq_frequency_table_put_attr(policy->cpu); | 781 | cpufreq_frequency_table_put_attr(policy->cpu); |
782 | per_cpu(drv_data, policy->cpu) = NULL; | 782 | per_cpu(drv_data, policy->cpu) = NULL; |
783 | acpi_processor_unregister_performance(data->acpi_data, | 783 | acpi_processor_unregister_performance(data->acpi_data, |
784 | policy->cpu); | 784 | policy->cpu); |
785 | kfree(data); | 785 | kfree(data); |
786 | } | 786 | } |
787 | 787 | ||
788 | return 0; | 788 | return 0; |
789 | } | 789 | } |
790 | 790 | ||
791 | static int acpi_cpufreq_resume(struct cpufreq_policy *policy) | 791 | static int acpi_cpufreq_resume(struct cpufreq_policy *policy) |
792 | { | 792 | { |
793 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); | 793 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); |
794 | 794 | ||
795 | dprintk("acpi_cpufreq_resume\n"); | 795 | dprintk("acpi_cpufreq_resume\n"); |
796 | 796 | ||
797 | data->resume = 1; | 797 | data->resume = 1; |
798 | 798 | ||
799 | return 0; | 799 | return 0; |
800 | } | 800 | } |
801 | 801 | ||
802 | static struct freq_attr *acpi_cpufreq_attr[] = { | 802 | static struct freq_attr *acpi_cpufreq_attr[] = { |
803 | &cpufreq_freq_attr_scaling_available_freqs, | 803 | &cpufreq_freq_attr_scaling_available_freqs, |
804 | NULL, | 804 | NULL, |
805 | }; | 805 | }; |
806 | 806 | ||
807 | static struct cpufreq_driver acpi_cpufreq_driver = { | 807 | static struct cpufreq_driver acpi_cpufreq_driver = { |
808 | .verify = acpi_cpufreq_verify, | 808 | .verify = acpi_cpufreq_verify, |
809 | .target = acpi_cpufreq_target, | 809 | .target = acpi_cpufreq_target, |
810 | .init = acpi_cpufreq_cpu_init, | 810 | .init = acpi_cpufreq_cpu_init, |
811 | .exit = acpi_cpufreq_cpu_exit, | 811 | .exit = acpi_cpufreq_cpu_exit, |
812 | .resume = acpi_cpufreq_resume, | 812 | .resume = acpi_cpufreq_resume, |
813 | .name = "acpi-cpufreq", | 813 | .name = "acpi-cpufreq", |
814 | .owner = THIS_MODULE, | 814 | .owner = THIS_MODULE, |
815 | .attr = acpi_cpufreq_attr, | 815 | .attr = acpi_cpufreq_attr, |
816 | }; | 816 | }; |
817 | 817 | ||
818 | static int __init acpi_cpufreq_init(void) | 818 | static int __init acpi_cpufreq_init(void) |
819 | { | 819 | { |
820 | int ret; | 820 | int ret; |
821 | 821 | ||
822 | if (acpi_disabled) | 822 | if (acpi_disabled) |
823 | return 0; | 823 | return 0; |
824 | 824 | ||
825 | dprintk("acpi_cpufreq_init\n"); | 825 | dprintk("acpi_cpufreq_init\n"); |
826 | 826 | ||
827 | ret = acpi_cpufreq_early_init(); | 827 | ret = acpi_cpufreq_early_init(); |
828 | if (ret) | 828 | if (ret) |
829 | return ret; | 829 | return ret; |
830 | 830 | ||
831 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); | 831 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); |
832 | if (ret) | 832 | if (ret) |
833 | free_acpi_perf_data(); | 833 | free_acpi_perf_data(); |
834 | 834 | ||
835 | return ret; | 835 | return ret; |
836 | } | 836 | } |
837 | 837 | ||
838 | static void __exit acpi_cpufreq_exit(void) | 838 | static void __exit acpi_cpufreq_exit(void) |
839 | { | 839 | { |
840 | dprintk("acpi_cpufreq_exit\n"); | 840 | dprintk("acpi_cpufreq_exit\n"); |
841 | 841 | ||
842 | cpufreq_unregister_driver(&acpi_cpufreq_driver); | 842 | cpufreq_unregister_driver(&acpi_cpufreq_driver); |
843 | 843 | ||
844 | free_percpu(acpi_perf_data); | 844 | free_percpu(acpi_perf_data); |
845 | } | 845 | } |
846 | 846 | ||
847 | module_param(acpi_pstate_strict, uint, 0644); | 847 | module_param(acpi_pstate_strict, uint, 0644); |
848 | MODULE_PARM_DESC(acpi_pstate_strict, | 848 | MODULE_PARM_DESC(acpi_pstate_strict, |
849 | "value 0 or non-zero. non-zero -> strict ACPI checks are " | 849 | "value 0 or non-zero. non-zero -> strict ACPI checks are " |
850 | "performed during frequency changes."); | 850 | "performed during frequency changes."); |
851 | 851 | ||
852 | late_initcall(acpi_cpufreq_init); | 852 | late_initcall(acpi_cpufreq_init); |
853 | module_exit(acpi_cpufreq_exit); | 853 | module_exit(acpi_cpufreq_exit); |
854 | 854 | ||
855 | MODULE_ALIAS("acpi"); | 855 | MODULE_ALIAS("acpi"); |
856 | 856 |
arch/x86/kernel/cpu/cpufreq/powernow-k7.c
1 | /* | 1 | /* |
2 | * AMD K7 Powernow driver. | 2 | * AMD K7 Powernow driver. |
3 | * (C) 2003 Dave Jones on behalf of SuSE Labs. | 3 | * (C) 2003 Dave Jones on behalf of SuSE Labs. |
4 | * (C) 2003-2004 Dave Jones <davej@redhat.com> | 4 | * (C) 2003-2004 Dave Jones <davej@redhat.com> |
5 | * | 5 | * |
6 | * Licensed under the terms of the GNU GPL License version 2. | 6 | * Licensed under the terms of the GNU GPL License version 2. |
7 | * Based upon datasheets & sample CPUs kindly provided by AMD. | 7 | * Based upon datasheets & sample CPUs kindly provided by AMD. |
8 | * | 8 | * |
9 | * Errata 5: | 9 | * Errata 5: |
10 | * CPU may fail to execute a FID/VID change in presence of interrupt. | 10 | * CPU may fail to execute a FID/VID change in presence of interrupt. |
11 | * - We cli/sti on stepping A0 CPUs around the FID/VID transition. | 11 | * - We cli/sti on stepping A0 CPUs around the FID/VID transition. |
12 | * Errata 15: | 12 | * Errata 15: |
13 | * CPU with half frequency multipliers may hang upon wakeup from disconnect. | 13 | * CPU with half frequency multipliers may hang upon wakeup from disconnect. |
14 | * - We disable half multipliers if ACPI is used on A0 stepping CPUs. | 14 | * - We disable half multipliers if ACPI is used on A0 stepping CPUs. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/moduleparam.h> | 19 | #include <linux/moduleparam.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/cpufreq.h> | 21 | #include <linux/cpufreq.h> |
22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/string.h> | 23 | #include <linux/string.h> |
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <linux/timex.h> | 25 | #include <linux/timex.h> |
26 | #include <linux/io.h> | 26 | #include <linux/io.h> |
27 | 27 | ||
28 | #include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */ | 28 | #include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */ |
29 | #include <asm/msr.h> | 29 | #include <asm/msr.h> |
30 | #include <asm/system.h> | 30 | #include <asm/system.h> |
31 | 31 | ||
32 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | 32 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
33 | #include <linux/acpi.h> | 33 | #include <linux/acpi.h> |
34 | #include <acpi/processor.h> | 34 | #include <acpi/processor.h> |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | #include "powernow-k7.h" | 37 | #include "powernow-k7.h" |
38 | 38 | ||
39 | #define PFX "powernow: " | 39 | #define PFX "powernow: " |
40 | 40 | ||
41 | 41 | ||
42 | struct psb_s { | 42 | struct psb_s { |
43 | u8 signature[10]; | 43 | u8 signature[10]; |
44 | u8 tableversion; | 44 | u8 tableversion; |
45 | u8 flags; | 45 | u8 flags; |
46 | u16 settlingtime; | 46 | u16 settlingtime; |
47 | u8 reserved1; | 47 | u8 reserved1; |
48 | u8 numpst; | 48 | u8 numpst; |
49 | }; | 49 | }; |
50 | 50 | ||
51 | struct pst_s { | 51 | struct pst_s { |
52 | u32 cpuid; | 52 | u32 cpuid; |
53 | u8 fsbspeed; | 53 | u8 fsbspeed; |
54 | u8 maxfid; | 54 | u8 maxfid; |
55 | u8 startvid; | 55 | u8 startvid; |
56 | u8 numpstates; | 56 | u8 numpstates; |
57 | }; | 57 | }; |
58 | 58 | ||
59 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | 59 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
60 | union powernow_acpi_control_t { | 60 | union powernow_acpi_control_t { |
61 | struct { | 61 | struct { |
62 | unsigned long fid:5, | 62 | unsigned long fid:5, |
63 | vid:5, | 63 | vid:5, |
64 | sgtc:20, | 64 | sgtc:20, |
65 | res1:2; | 65 | res1:2; |
66 | } bits; | 66 | } bits; |
67 | unsigned long val; | 67 | unsigned long val; |
68 | }; | 68 | }; |
69 | #endif | 69 | #endif |
70 | 70 | ||
71 | #ifdef CONFIG_CPU_FREQ_DEBUG | 71 | #ifdef CONFIG_CPU_FREQ_DEBUG |
72 | /* divide by 1000 to get VCore voltage in V. */ | 72 | /* divide by 1000 to get VCore voltage in V. */ |
73 | static const int mobile_vid_table[32] = { | 73 | static const int mobile_vid_table[32] = { |
74 | 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, | 74 | 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, |
75 | 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, | 75 | 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, |
76 | 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, | 76 | 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, |
77 | 1075, 1050, 1025, 1000, 975, 950, 925, 0, | 77 | 1075, 1050, 1025, 1000, 975, 950, 925, 0, |
78 | }; | 78 | }; |
79 | #endif | 79 | #endif |
80 | 80 | ||
81 | /* divide by 10 to get FID. */ | 81 | /* divide by 10 to get FID. */ |
82 | static const int fid_codes[32] = { | 82 | static const int fid_codes[32] = { |
83 | 110, 115, 120, 125, 50, 55, 60, 65, | 83 | 110, 115, 120, 125, 50, 55, 60, 65, |
84 | 70, 75, 80, 85, 90, 95, 100, 105, | 84 | 70, 75, 80, 85, 90, 95, 100, 105, |
85 | 30, 190, 40, 200, 130, 135, 140, 210, | 85 | 30, 190, 40, 200, 130, 135, 140, 210, |
86 | 150, 225, 160, 165, 170, 180, -1, -1, | 86 | 150, 225, 160, 165, 170, 180, -1, -1, |
87 | }; | 87 | }; |
88 | 88 | ||
89 | /* This parameter is used in order to force ACPI instead of legacy method for | 89 | /* This parameter is used in order to force ACPI instead of legacy method for |
90 | * configuration purpose. | 90 | * configuration purpose. |
91 | */ | 91 | */ |
92 | 92 | ||
93 | static int acpi_force; | 93 | static int acpi_force; |
94 | 94 | ||
95 | static struct cpufreq_frequency_table *powernow_table; | 95 | static struct cpufreq_frequency_table *powernow_table; |
96 | 96 | ||
97 | static unsigned int can_scale_bus; | 97 | static unsigned int can_scale_bus; |
98 | static unsigned int can_scale_vid; | 98 | static unsigned int can_scale_vid; |
99 | static unsigned int minimum_speed = -1; | 99 | static unsigned int minimum_speed = -1; |
100 | static unsigned int maximum_speed; | 100 | static unsigned int maximum_speed; |
101 | static unsigned int number_scales; | 101 | static unsigned int number_scales; |
102 | static unsigned int fsb; | 102 | static unsigned int fsb; |
103 | static unsigned int latency; | 103 | static unsigned int latency; |
104 | static char have_a0; | 104 | static char have_a0; |
105 | 105 | ||
106 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | 106 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ |
107 | "powernow-k7", msg) | 107 | "powernow-k7", msg) |
108 | 108 | ||
109 | static int check_fsb(unsigned int fsbspeed) | 109 | static int check_fsb(unsigned int fsbspeed) |
110 | { | 110 | { |
111 | int delta; | 111 | int delta; |
112 | unsigned int f = fsb / 1000; | 112 | unsigned int f = fsb / 1000; |
113 | 113 | ||
114 | delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; | 114 | delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; |
115 | return delta < 5; | 115 | return delta < 5; |
116 | } | 116 | } |
117 | 117 | ||
118 | static int check_powernow(void) | 118 | static int check_powernow(void) |
119 | { | 119 | { |
120 | struct cpuinfo_x86 *c = &cpu_data(0); | 120 | struct cpuinfo_x86 *c = &cpu_data(0); |
121 | unsigned int maxei, eax, ebx, ecx, edx; | 121 | unsigned int maxei, eax, ebx, ecx, edx; |
122 | 122 | ||
123 | if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) { | 123 | if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) { |
124 | #ifdef MODULE | 124 | #ifdef MODULE |
125 | printk(KERN_INFO PFX "This module only works with " | 125 | printk(KERN_INFO PFX "This module only works with " |
126 | "AMD K7 CPUs\n"); | 126 | "AMD K7 CPUs\n"); |
127 | #endif | 127 | #endif |
128 | return 0; | 128 | return 0; |
129 | } | 129 | } |
130 | 130 | ||
131 | /* Get maximum capabilities */ | 131 | /* Get maximum capabilities */ |
132 | maxei = cpuid_eax(0x80000000); | 132 | maxei = cpuid_eax(0x80000000); |
133 | if (maxei < 0x80000007) { /* Any powernow info ? */ | 133 | if (maxei < 0x80000007) { /* Any powernow info ? */ |
134 | #ifdef MODULE | 134 | #ifdef MODULE |
135 | printk(KERN_INFO PFX "No powernow capabilities detected\n"); | 135 | printk(KERN_INFO PFX "No powernow capabilities detected\n"); |
136 | #endif | 136 | #endif |
137 | return 0; | 137 | return 0; |
138 | } | 138 | } |
139 | 139 | ||
140 | if ((c->x86_model == 6) && (c->x86_mask == 0)) { | 140 | if ((c->x86_model == 6) && (c->x86_mask == 0)) { |
141 | printk(KERN_INFO PFX "K7 660[A0] core detected, " | 141 | printk(KERN_INFO PFX "K7 660[A0] core detected, " |
142 | "enabling errata workarounds\n"); | 142 | "enabling errata workarounds\n"); |
143 | have_a0 = 1; | 143 | have_a0 = 1; |
144 | } | 144 | } |
145 | 145 | ||
146 | cpuid(0x80000007, &eax, &ebx, &ecx, &edx); | 146 | cpuid(0x80000007, &eax, &ebx, &ecx, &edx); |
147 | 147 | ||
148 | /* Check we can actually do something before we say anything.*/ | 148 | /* Check we can actually do something before we say anything.*/ |
149 | if (!(edx & (1 << 1 | 1 << 2))) | 149 | if (!(edx & (1 << 1 | 1 << 2))) |
150 | return 0; | 150 | return 0; |
151 | 151 | ||
152 | printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); | 152 | printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); |
153 | 153 | ||
154 | if (edx & 1 << 1) { | 154 | if (edx & 1 << 1) { |
155 | printk("frequency"); | 155 | printk("frequency"); |
156 | can_scale_bus = 1; | 156 | can_scale_bus = 1; |
157 | } | 157 | } |
158 | 158 | ||
159 | if ((edx & (1 << 1 | 1 << 2)) == 0x6) | 159 | if ((edx & (1 << 1 | 1 << 2)) == 0x6) |
160 | printk(" and "); | 160 | printk(" and "); |
161 | 161 | ||
162 | if (edx & 1 << 2) { | 162 | if (edx & 1 << 2) { |
163 | printk("voltage"); | 163 | printk("voltage"); |
164 | can_scale_vid = 1; | 164 | can_scale_vid = 1; |
165 | } | 165 | } |
166 | 166 | ||
167 | printk(".\n"); | 167 | printk(".\n"); |
168 | return 1; | 168 | return 1; |
169 | } | 169 | } |
170 | 170 | ||
171 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | 171 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
172 | static void invalidate_entry(unsigned int entry) | 172 | static void invalidate_entry(unsigned int entry) |
173 | { | 173 | { |
174 | powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; | 174 | powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; |
175 | } | 175 | } |
176 | #endif | 176 | #endif |
177 | 177 | ||
178 | static int get_ranges(unsigned char *pst) | 178 | static int get_ranges(unsigned char *pst) |
179 | { | 179 | { |
180 | unsigned int j; | 180 | unsigned int j; |
181 | unsigned int speed; | 181 | unsigned int speed; |
182 | u8 fid, vid; | 182 | u8 fid, vid; |
183 | 183 | ||
184 | powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * | 184 | powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * |
185 | (number_scales + 1)), GFP_KERNEL); | 185 | (number_scales + 1)), GFP_KERNEL); |
186 | if (!powernow_table) | 186 | if (!powernow_table) |
187 | return -ENOMEM; | 187 | return -ENOMEM; |
188 | 188 | ||
189 | for (j = 0 ; j < number_scales; j++) { | 189 | for (j = 0 ; j < number_scales; j++) { |
190 | fid = *pst++; | 190 | fid = *pst++; |
191 | 191 | ||
192 | powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; | 192 | powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; |
193 | powernow_table[j].index = fid; /* lower 8 bits */ | 193 | powernow_table[j].index = fid; /* lower 8 bits */ |
194 | 194 | ||
195 | speed = powernow_table[j].frequency; | 195 | speed = powernow_table[j].frequency; |
196 | 196 | ||
197 | if ((fid_codes[fid] % 10) == 5) { | 197 | if ((fid_codes[fid] % 10) == 5) { |
198 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | 198 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
199 | if (have_a0 == 1) | 199 | if (have_a0 == 1) |
200 | invalidate_entry(j); | 200 | invalidate_entry(j); |
201 | #endif | 201 | #endif |
202 | } | 202 | } |
203 | 203 | ||
204 | if (speed < minimum_speed) | 204 | if (speed < minimum_speed) |
205 | minimum_speed = speed; | 205 | minimum_speed = speed; |
206 | if (speed > maximum_speed) | 206 | if (speed > maximum_speed) |
207 | maximum_speed = speed; | 207 | maximum_speed = speed; |
208 | 208 | ||
209 | vid = *pst++; | 209 | vid = *pst++; |
210 | powernow_table[j].index |= (vid << 8); /* upper 8 bits */ | 210 | powernow_table[j].index |= (vid << 8); /* upper 8 bits */ |
211 | 211 | ||
212 | dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " | 212 | dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " |
213 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, | 213 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, |
214 | fid_codes[fid] % 10, speed/1000, vid, | 214 | fid_codes[fid] % 10, speed/1000, vid, |
215 | mobile_vid_table[vid]/1000, | 215 | mobile_vid_table[vid]/1000, |
216 | mobile_vid_table[vid]%1000); | 216 | mobile_vid_table[vid]%1000); |
217 | } | 217 | } |
218 | powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; | 218 | powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; |
219 | powernow_table[number_scales].index = 0; | 219 | powernow_table[number_scales].index = 0; |
220 | 220 | ||
221 | return 0; | 221 | return 0; |
222 | } | 222 | } |
223 | 223 | ||
224 | 224 | ||
225 | static void change_FID(int fid) | 225 | static void change_FID(int fid) |
226 | { | 226 | { |
227 | union msr_fidvidctl fidvidctl; | 227 | union msr_fidvidctl fidvidctl; |
228 | 228 | ||
229 | rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | 229 | rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); |
230 | if (fidvidctl.bits.FID != fid) { | 230 | if (fidvidctl.bits.FID != fid) { |
231 | fidvidctl.bits.SGTC = latency; | 231 | fidvidctl.bits.SGTC = latency; |
232 | fidvidctl.bits.FID = fid; | 232 | fidvidctl.bits.FID = fid; |
233 | fidvidctl.bits.VIDC = 0; | 233 | fidvidctl.bits.VIDC = 0; |
234 | fidvidctl.bits.FIDC = 1; | 234 | fidvidctl.bits.FIDC = 1; |
235 | wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | 235 | wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); |
236 | } | 236 | } |
237 | } | 237 | } |
238 | 238 | ||
239 | 239 | ||
240 | static void change_VID(int vid) | 240 | static void change_VID(int vid) |
241 | { | 241 | { |
242 | union msr_fidvidctl fidvidctl; | 242 | union msr_fidvidctl fidvidctl; |
243 | 243 | ||
244 | rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | 244 | rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); |
245 | if (fidvidctl.bits.VID != vid) { | 245 | if (fidvidctl.bits.VID != vid) { |
246 | fidvidctl.bits.SGTC = latency; | 246 | fidvidctl.bits.SGTC = latency; |
247 | fidvidctl.bits.VID = vid; | 247 | fidvidctl.bits.VID = vid; |
248 | fidvidctl.bits.FIDC = 0; | 248 | fidvidctl.bits.FIDC = 0; |
249 | fidvidctl.bits.VIDC = 1; | 249 | fidvidctl.bits.VIDC = 1; |
250 | wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); | 250 | wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); |
251 | } | 251 | } |
252 | } | 252 | } |
253 | 253 | ||
254 | 254 | ||
255 | static void change_speed(unsigned int index) | 255 | static void change_speed(unsigned int index) |
256 | { | 256 | { |
257 | u8 fid, vid; | 257 | u8 fid, vid; |
258 | struct cpufreq_freqs freqs; | 258 | struct cpufreq_freqs freqs; |
259 | union msr_fidvidstatus fidvidstatus; | 259 | union msr_fidvidstatus fidvidstatus; |
260 | int cfid; | 260 | int cfid; |
261 | 261 | ||
262 | /* fid are the lower 8 bits of the index we stored into | 262 | /* fid are the lower 8 bits of the index we stored into |
263 | * the cpufreq frequency table in powernow_decode_bios, | 263 | * the cpufreq frequency table in powernow_decode_bios, |
264 | * vid are the upper 8 bits. | 264 | * vid are the upper 8 bits. |
265 | */ | 265 | */ |
266 | 266 | ||
267 | fid = powernow_table[index].index & 0xFF; | 267 | fid = powernow_table[index].index & 0xFF; |
268 | vid = (powernow_table[index].index & 0xFF00) >> 8; | 268 | vid = (powernow_table[index].index & 0xFF00) >> 8; |
269 | 269 | ||
270 | freqs.cpu = 0; | 270 | freqs.cpu = 0; |
271 | 271 | ||
272 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); | 272 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); |
273 | cfid = fidvidstatus.bits.CFID; | 273 | cfid = fidvidstatus.bits.CFID; |
274 | freqs.old = fsb * fid_codes[cfid] / 10; | 274 | freqs.old = fsb * fid_codes[cfid] / 10; |
275 | 275 | ||
276 | freqs.new = powernow_table[index].frequency; | 276 | freqs.new = powernow_table[index].frequency; |
277 | 277 | ||
278 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 278 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
279 | 279 | ||
280 | /* Now do the magic poking into the MSRs. */ | 280 | /* Now do the magic poking into the MSRs. */ |
281 | 281 | ||
282 | if (have_a0 == 1) /* A0 errata 5 */ | 282 | if (have_a0 == 1) /* A0 errata 5 */ |
283 | local_irq_disable(); | 283 | local_irq_disable(); |
284 | 284 | ||
285 | if (freqs.old > freqs.new) { | 285 | if (freqs.old > freqs.new) { |
286 | /* Going down, so change FID first */ | 286 | /* Going down, so change FID first */ |
287 | change_FID(fid); | 287 | change_FID(fid); |
288 | change_VID(vid); | 288 | change_VID(vid); |
289 | } else { | 289 | } else { |
290 | /* Going up, so change VID first */ | 290 | /* Going up, so change VID first */ |
291 | change_VID(vid); | 291 | change_VID(vid); |
292 | change_FID(fid); | 292 | change_FID(fid); |
293 | } | 293 | } |
294 | 294 | ||
295 | 295 | ||
296 | if (have_a0 == 1) | 296 | if (have_a0 == 1) |
297 | local_irq_enable(); | 297 | local_irq_enable(); |
298 | 298 | ||
299 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 299 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
300 | } | 300 | } |
301 | 301 | ||
302 | 302 | ||
303 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | 303 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
304 | 304 | ||
305 | static struct acpi_processor_performance *acpi_processor_perf; | 305 | static struct acpi_processor_performance *acpi_processor_perf; |
306 | 306 | ||
307 | static int powernow_acpi_init(void) | 307 | static int powernow_acpi_init(void) |
308 | { | 308 | { |
309 | int i; | 309 | int i; |
310 | int retval = 0; | 310 | int retval = 0; |
311 | union powernow_acpi_control_t pc; | 311 | union powernow_acpi_control_t pc; |
312 | 312 | ||
313 | if (acpi_processor_perf != NULL && powernow_table != NULL) { | 313 | if (acpi_processor_perf != NULL && powernow_table != NULL) { |
314 | retval = -EINVAL; | 314 | retval = -EINVAL; |
315 | goto err0; | 315 | goto err0; |
316 | } | 316 | } |
317 | 317 | ||
318 | acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), | 318 | acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), |
319 | GFP_KERNEL); | 319 | GFP_KERNEL); |
320 | if (!acpi_processor_perf) { | 320 | if (!acpi_processor_perf) { |
321 | retval = -ENOMEM; | 321 | retval = -ENOMEM; |
322 | goto err0; | 322 | goto err0; |
323 | } | 323 | } |
324 | 324 | ||
325 | if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, | 325 | if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, |
326 | GFP_KERNEL)) { | 326 | GFP_KERNEL)) { |
327 | retval = -ENOMEM; | 327 | retval = -ENOMEM; |
328 | goto err05; | 328 | goto err05; |
329 | } | 329 | } |
330 | 330 | ||
331 | if (acpi_processor_register_performance(acpi_processor_perf, 0)) { | 331 | if (acpi_processor_register_performance(acpi_processor_perf, 0)) { |
332 | retval = -EIO; | 332 | retval = -EIO; |
333 | goto err1; | 333 | goto err1; |
334 | } | 334 | } |
335 | 335 | ||
336 | if (acpi_processor_perf->control_register.space_id != | 336 | if (acpi_processor_perf->control_register.space_id != |
337 | ACPI_ADR_SPACE_FIXED_HARDWARE) { | 337 | ACPI_ADR_SPACE_FIXED_HARDWARE) { |
338 | retval = -ENODEV; | 338 | retval = -ENODEV; |
339 | goto err2; | 339 | goto err2; |
340 | } | 340 | } |
341 | 341 | ||
342 | if (acpi_processor_perf->status_register.space_id != | 342 | if (acpi_processor_perf->status_register.space_id != |
343 | ACPI_ADR_SPACE_FIXED_HARDWARE) { | 343 | ACPI_ADR_SPACE_FIXED_HARDWARE) { |
344 | retval = -ENODEV; | 344 | retval = -ENODEV; |
345 | goto err2; | 345 | goto err2; |
346 | } | 346 | } |
347 | 347 | ||
348 | number_scales = acpi_processor_perf->state_count; | 348 | number_scales = acpi_processor_perf->state_count; |
349 | 349 | ||
350 | if (number_scales < 2) { | 350 | if (number_scales < 2) { |
351 | retval = -ENODEV; | 351 | retval = -ENODEV; |
352 | goto err2; | 352 | goto err2; |
353 | } | 353 | } |
354 | 354 | ||
355 | powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * | 355 | powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * |
356 | (number_scales + 1)), GFP_KERNEL); | 356 | (number_scales + 1)), GFP_KERNEL); |
357 | if (!powernow_table) { | 357 | if (!powernow_table) { |
358 | retval = -ENOMEM; | 358 | retval = -ENOMEM; |
359 | goto err2; | 359 | goto err2; |
360 | } | 360 | } |
361 | 361 | ||
362 | pc.val = (unsigned long) acpi_processor_perf->states[0].control; | 362 | pc.val = (unsigned long) acpi_processor_perf->states[0].control; |
363 | for (i = 0; i < number_scales; i++) { | 363 | for (i = 0; i < number_scales; i++) { |
364 | u8 fid, vid; | 364 | u8 fid, vid; |
365 | struct acpi_processor_px *state = | 365 | struct acpi_processor_px *state = |
366 | &acpi_processor_perf->states[i]; | 366 | &acpi_processor_perf->states[i]; |
367 | unsigned int speed, speed_mhz; | 367 | unsigned int speed, speed_mhz; |
368 | 368 | ||
369 | pc.val = (unsigned long) state->control; | 369 | pc.val = (unsigned long) state->control; |
370 | dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", | 370 | dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", |
371 | i, | 371 | i, |
372 | (u32) state->core_frequency, | 372 | (u32) state->core_frequency, |
373 | (u32) state->power, | 373 | (u32) state->power, |
374 | (u32) state->transition_latency, | 374 | (u32) state->transition_latency, |
375 | (u32) state->control, | 375 | (u32) state->control, |
376 | pc.bits.sgtc); | 376 | pc.bits.sgtc); |
377 | 377 | ||
378 | vid = pc.bits.vid; | 378 | vid = pc.bits.vid; |
379 | fid = pc.bits.fid; | 379 | fid = pc.bits.fid; |
380 | 380 | ||
381 | powernow_table[i].frequency = fsb * fid_codes[fid] / 10; | 381 | powernow_table[i].frequency = fsb * fid_codes[fid] / 10; |
382 | powernow_table[i].index = fid; /* lower 8 bits */ | 382 | powernow_table[i].index = fid; /* lower 8 bits */ |
383 | powernow_table[i].index |= (vid << 8); /* upper 8 bits */ | 383 | powernow_table[i].index |= (vid << 8); /* upper 8 bits */ |
384 | 384 | ||
385 | speed = powernow_table[i].frequency; | 385 | speed = powernow_table[i].frequency; |
386 | speed_mhz = speed / 1000; | 386 | speed_mhz = speed / 1000; |
387 | 387 | ||
388 | /* processor_perflib will multiply the MHz value by 1000 to | 388 | /* processor_perflib will multiply the MHz value by 1000 to |
389 | * get a KHz value (e.g. 1266000). However, powernow-k7 works | 389 | * get a KHz value (e.g. 1266000). However, powernow-k7 works |
390 | * with true KHz values (e.g. 1266768). To ensure that all | 390 | * with true KHz values (e.g. 1266768). To ensure that all |
391 | * powernow frequencies are available, we must ensure that | 391 | * powernow frequencies are available, we must ensure that |
392 | * ACPI doesn't restrict them, so we round up the MHz value | 392 | * ACPI doesn't restrict them, so we round up the MHz value |
393 | * to ensure that perflib's computed KHz value is greater than | 393 | * to ensure that perflib's computed KHz value is greater than |
394 | * or equal to powernow's KHz value. | 394 | * or equal to powernow's KHz value. |
395 | */ | 395 | */ |
396 | if (speed % 1000 > 0) | 396 | if (speed % 1000 > 0) |
397 | speed_mhz++; | 397 | speed_mhz++; |
398 | 398 | ||
399 | if ((fid_codes[fid] % 10) == 5) { | 399 | if ((fid_codes[fid] % 10) == 5) { |
400 | if (have_a0 == 1) | 400 | if (have_a0 == 1) |
401 | invalidate_entry(i); | 401 | invalidate_entry(i); |
402 | } | 402 | } |
403 | 403 | ||
404 | dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " | 404 | dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " |
405 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, | 405 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, |
406 | fid_codes[fid] % 10, speed_mhz, vid, | 406 | fid_codes[fid] % 10, speed_mhz, vid, |
407 | mobile_vid_table[vid]/1000, | 407 | mobile_vid_table[vid]/1000, |
408 | mobile_vid_table[vid]%1000); | 408 | mobile_vid_table[vid]%1000); |
409 | 409 | ||
410 | if (state->core_frequency != speed_mhz) { | 410 | if (state->core_frequency != speed_mhz) { |
411 | state->core_frequency = speed_mhz; | 411 | state->core_frequency = speed_mhz; |
412 | dprintk(" Corrected ACPI frequency to %d\n", | 412 | dprintk(" Corrected ACPI frequency to %d\n", |
413 | speed_mhz); | 413 | speed_mhz); |
414 | } | 414 | } |
415 | 415 | ||
416 | if (latency < pc.bits.sgtc) | 416 | if (latency < pc.bits.sgtc) |
417 | latency = pc.bits.sgtc; | 417 | latency = pc.bits.sgtc; |
418 | 418 | ||
419 | if (speed < minimum_speed) | 419 | if (speed < minimum_speed) |
420 | minimum_speed = speed; | 420 | minimum_speed = speed; |
421 | if (speed > maximum_speed) | 421 | if (speed > maximum_speed) |
422 | maximum_speed = speed; | 422 | maximum_speed = speed; |
423 | } | 423 | } |
424 | 424 | ||
425 | powernow_table[i].frequency = CPUFREQ_TABLE_END; | 425 | powernow_table[i].frequency = CPUFREQ_TABLE_END; |
426 | powernow_table[i].index = 0; | 426 | powernow_table[i].index = 0; |
427 | 427 | ||
428 | /* notify BIOS that we exist */ | 428 | /* notify BIOS that we exist */ |
429 | acpi_processor_notify_smm(THIS_MODULE); | 429 | acpi_processor_notify_smm(THIS_MODULE); |
430 | 430 | ||
431 | return 0; | 431 | return 0; |
432 | 432 | ||
433 | err2: | 433 | err2: |
434 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | 434 | acpi_processor_unregister_performance(acpi_processor_perf, 0); |
435 | err1: | 435 | err1: |
436 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | 436 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); |
437 | err05: | 437 | err05: |
438 | kfree(acpi_processor_perf); | 438 | kfree(acpi_processor_perf); |
439 | err0: | 439 | err0: |
440 | printk(KERN_WARNING PFX "ACPI perflib can not be used on " | 440 | printk(KERN_WARNING PFX "ACPI perflib can not be used on " |
441 | "this platform\n"); | 441 | "this platform\n"); |
442 | acpi_processor_perf = NULL; | 442 | acpi_processor_perf = NULL; |
443 | return retval; | 443 | return retval; |
444 | } | 444 | } |
445 | #else | 445 | #else |
446 | static int powernow_acpi_init(void) | 446 | static int powernow_acpi_init(void) |
447 | { | 447 | { |
448 | printk(KERN_INFO PFX "no support for ACPI processor found." | 448 | printk(KERN_INFO PFX "no support for ACPI processor found." |
449 | " Please recompile your kernel with ACPI processor\n"); | 449 | " Please recompile your kernel with ACPI processor\n"); |
450 | return -EINVAL; | 450 | return -EINVAL; |
451 | } | 451 | } |
452 | #endif | 452 | #endif |
453 | 453 | ||
454 | static void print_pst_entry(struct pst_s *pst, unsigned int j) | 454 | static void print_pst_entry(struct pst_s *pst, unsigned int j) |
455 | { | 455 | { |
456 | dprintk("PST:%d (@%p)\n", j, pst); | 456 | dprintk("PST:%d (@%p)\n", j, pst); |
457 | dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", | 457 | dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", |
458 | pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); | 458 | pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); |
459 | } | 459 | } |
460 | 460 | ||
461 | static int powernow_decode_bios(int maxfid, int startvid) | 461 | static int powernow_decode_bios(int maxfid, int startvid) |
462 | { | 462 | { |
463 | struct psb_s *psb; | 463 | struct psb_s *psb; |
464 | struct pst_s *pst; | 464 | struct pst_s *pst; |
465 | unsigned int i, j; | 465 | unsigned int i, j; |
466 | unsigned char *p; | 466 | unsigned char *p; |
467 | unsigned int etuple; | 467 | unsigned int etuple; |
468 | unsigned int ret; | 468 | unsigned int ret; |
469 | 469 | ||
470 | etuple = cpuid_eax(0x80000001); | 470 | etuple = cpuid_eax(0x80000001); |
471 | 471 | ||
472 | for (i = 0xC0000; i < 0xffff0 ; i += 16) { | 472 | for (i = 0xC0000; i < 0xffff0 ; i += 16) { |
473 | 473 | ||
474 | p = phys_to_virt(i); | 474 | p = phys_to_virt(i); |
475 | 475 | ||
476 | if (memcmp(p, "AMDK7PNOW!", 10) == 0) { | 476 | if (memcmp(p, "AMDK7PNOW!", 10) == 0) { |
477 | dprintk("Found PSB header at %p\n", p); | 477 | dprintk("Found PSB header at %p\n", p); |
478 | psb = (struct psb_s *) p; | 478 | psb = (struct psb_s *) p; |
479 | dprintk("Table version: 0x%x\n", psb->tableversion); | 479 | dprintk("Table version: 0x%x\n", psb->tableversion); |
480 | if (psb->tableversion != 0x12) { | 480 | if (psb->tableversion != 0x12) { |
481 | printk(KERN_INFO PFX "Sorry, only v1.2 tables" | 481 | printk(KERN_INFO PFX "Sorry, only v1.2 tables" |
482 | " supported right now\n"); | 482 | " supported right now\n"); |
483 | return -ENODEV; | 483 | return -ENODEV; |
484 | } | 484 | } |
485 | 485 | ||
486 | dprintk("Flags: 0x%x\n", psb->flags); | 486 | dprintk("Flags: 0x%x\n", psb->flags); |
487 | if ((psb->flags & 1) == 0) | 487 | if ((psb->flags & 1) == 0) |
488 | dprintk("Mobile voltage regulator\n"); | 488 | dprintk("Mobile voltage regulator\n"); |
489 | else | 489 | else |
490 | dprintk("Desktop voltage regulator\n"); | 490 | dprintk("Desktop voltage regulator\n"); |
491 | 491 | ||
492 | latency = psb->settlingtime; | 492 | latency = psb->settlingtime; |
493 | if (latency < 100) { | 493 | if (latency < 100) { |
494 | printk(KERN_INFO PFX "BIOS set settling time " | 494 | printk(KERN_INFO PFX "BIOS set settling time " |
495 | "to %d microseconds. " | 495 | "to %d microseconds. " |
496 | "Should be at least 100. " | 496 | "Should be at least 100. " |
497 | "Correcting.\n", latency); | 497 | "Correcting.\n", latency); |
498 | latency = 100; | 498 | latency = 100; |
499 | } | 499 | } |
500 | dprintk("Settling Time: %d microseconds.\n", | 500 | dprintk("Settling Time: %d microseconds.\n", |
501 | psb->settlingtime); | 501 | psb->settlingtime); |
502 | dprintk("Has %d PST tables. (Only dumping ones " | 502 | dprintk("Has %d PST tables. (Only dumping ones " |
503 | "relevant to this CPU).\n", | 503 | "relevant to this CPU).\n", |
504 | psb->numpst); | 504 | psb->numpst); |
505 | 505 | ||
506 | p += sizeof(struct psb_s); | 506 | p += sizeof(struct psb_s); |
507 | 507 | ||
508 | pst = (struct pst_s *) p; | 508 | pst = (struct pst_s *) p; |
509 | 509 | ||
510 | for (j = 0; j < psb->numpst; j++) { | 510 | for (j = 0; j < psb->numpst; j++) { |
511 | pst = (struct pst_s *) p; | 511 | pst = (struct pst_s *) p; |
512 | number_scales = pst->numpstates; | 512 | number_scales = pst->numpstates; |
513 | 513 | ||
514 | if ((etuple == pst->cpuid) && | 514 | if ((etuple == pst->cpuid) && |
515 | check_fsb(pst->fsbspeed) && | 515 | check_fsb(pst->fsbspeed) && |
516 | (maxfid == pst->maxfid) && | 516 | (maxfid == pst->maxfid) && |
517 | (startvid == pst->startvid)) { | 517 | (startvid == pst->startvid)) { |
518 | print_pst_entry(pst, j); | 518 | print_pst_entry(pst, j); |
519 | p = (char *)pst + sizeof(struct pst_s); | 519 | p = (char *)pst + sizeof(struct pst_s); |
520 | ret = get_ranges(p); | 520 | ret = get_ranges(p); |
521 | return ret; | 521 | return ret; |
522 | } else { | 522 | } else { |
523 | unsigned int k; | 523 | unsigned int k; |
524 | p = (char *)pst + sizeof(struct pst_s); | 524 | p = (char *)pst + sizeof(struct pst_s); |
525 | for (k = 0; k < number_scales; k++) | 525 | for (k = 0; k < number_scales; k++) |
526 | p += 2; | 526 | p += 2; |
527 | } | 527 | } |
528 | } | 528 | } |
529 | printk(KERN_INFO PFX "No PST tables match this cpuid " | 529 | printk(KERN_INFO PFX "No PST tables match this cpuid " |
530 | "(0x%x)\n", etuple); | 530 | "(0x%x)\n", etuple); |
531 | printk(KERN_INFO PFX "This is indicative of a broken " | 531 | printk(KERN_INFO PFX "This is indicative of a broken " |
532 | "BIOS.\n"); | 532 | "BIOS.\n"); |
533 | 533 | ||
534 | return -EINVAL; | 534 | return -EINVAL; |
535 | } | 535 | } |
536 | p++; | 536 | p++; |
537 | } | 537 | } |
538 | 538 | ||
539 | return -ENODEV; | 539 | return -ENODEV; |
540 | } | 540 | } |
541 | 541 | ||
542 | 542 | ||
543 | static int powernow_target(struct cpufreq_policy *policy, | 543 | static int powernow_target(struct cpufreq_policy *policy, |
544 | unsigned int target_freq, | 544 | unsigned int target_freq, |
545 | unsigned int relation) | 545 | unsigned int relation) |
546 | { | 546 | { |
547 | unsigned int newstate; | 547 | unsigned int newstate; |
548 | 548 | ||
549 | if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, | 549 | if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, |
550 | relation, &newstate)) | 550 | relation, &newstate)) |
551 | return -EINVAL; | 551 | return -EINVAL; |
552 | 552 | ||
553 | change_speed(newstate); | 553 | change_speed(newstate); |
554 | 554 | ||
555 | return 0; | 555 | return 0; |
556 | } | 556 | } |
557 | 557 | ||
558 | 558 | ||
559 | static int powernow_verify(struct cpufreq_policy *policy) | 559 | static int powernow_verify(struct cpufreq_policy *policy) |
560 | { | 560 | { |
561 | return cpufreq_frequency_table_verify(policy, powernow_table); | 561 | return cpufreq_frequency_table_verify(policy, powernow_table); |
562 | } | 562 | } |
563 | 563 | ||
564 | /* | 564 | /* |
565 | * We use the fact that the bus frequency is somehow | 565 | * We use the fact that the bus frequency is somehow |
566 | * a multiple of 100000/3 khz, then we compute sgtc according | 566 | * a multiple of 100000/3 khz, then we compute sgtc according |
567 | * to this multiple. | 567 | * to this multiple. |
568 | * That way, we match more how AMD thinks all of that work. | 568 | * That way, we match more how AMD thinks all of that work. |
569 | * We will then get the same kind of behaviour already tested under | 569 | * We will then get the same kind of behaviour already tested under |
570 | * the "well-known" other OS. | 570 | * the "well-known" other OS. |
571 | */ | 571 | */ |
572 | static int __init fixup_sgtc(void) | 572 | static int __init fixup_sgtc(void) |
573 | { | 573 | { |
574 | unsigned int sgtc; | 574 | unsigned int sgtc; |
575 | unsigned int m; | 575 | unsigned int m; |
576 | 576 | ||
577 | m = fsb / 3333; | 577 | m = fsb / 3333; |
578 | if ((m % 10) >= 5) | 578 | if ((m % 10) >= 5) |
579 | m += 5; | 579 | m += 5; |
580 | 580 | ||
581 | m /= 10; | 581 | m /= 10; |
582 | 582 | ||
583 | sgtc = 100 * m * latency; | 583 | sgtc = 100 * m * latency; |
584 | sgtc = sgtc / 3; | 584 | sgtc = sgtc / 3; |
585 | if (sgtc > 0xfffff) { | 585 | if (sgtc > 0xfffff) { |
586 | printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); | 586 | printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); |
587 | sgtc = 0xfffff; | 587 | sgtc = 0xfffff; |
588 | } | 588 | } |
589 | return sgtc; | 589 | return sgtc; |
590 | } | 590 | } |
591 | 591 | ||
592 | static unsigned int powernow_get(unsigned int cpu) | 592 | static unsigned int powernow_get(unsigned int cpu) |
593 | { | 593 | { |
594 | union msr_fidvidstatus fidvidstatus; | 594 | union msr_fidvidstatus fidvidstatus; |
595 | unsigned int cfid; | 595 | unsigned int cfid; |
596 | 596 | ||
597 | if (cpu) | 597 | if (cpu) |
598 | return 0; | 598 | return 0; |
599 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); | 599 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); |
600 | cfid = fidvidstatus.bits.CFID; | 600 | cfid = fidvidstatus.bits.CFID; |
601 | 601 | ||
602 | return fsb * fid_codes[cfid] / 10; | 602 | return fsb * fid_codes[cfid] / 10; |
603 | } | 603 | } |
604 | 604 | ||
605 | 605 | ||
606 | static int __init acer_cpufreq_pst(const struct dmi_system_id *d) | 606 | static int __init acer_cpufreq_pst(const struct dmi_system_id *d) |
607 | { | 607 | { |
608 | printk(KERN_WARNING PFX | 608 | printk(KERN_WARNING PFX |
609 | "%s laptop with broken PST tables in BIOS detected.\n", | 609 | "%s laptop with broken PST tables in BIOS detected.\n", |
610 | d->ident); | 610 | d->ident); |
611 | printk(KERN_WARNING PFX | 611 | printk(KERN_WARNING PFX |
612 | "You need to downgrade to 3A21 (09/09/2002), or try a newer " | 612 | "You need to downgrade to 3A21 (09/09/2002), or try a newer " |
613 | "BIOS than 3A71 (01/20/2003)\n"); | 613 | "BIOS than 3A71 (01/20/2003)\n"); |
614 | printk(KERN_WARNING PFX | 614 | printk(KERN_WARNING PFX |
615 | "cpufreq scaling has been disabled as a result of this.\n"); | 615 | "cpufreq scaling has been disabled as a result of this.\n"); |
616 | return 0; | 616 | return 0; |
617 | } | 617 | } |
618 | 618 | ||
619 | /* | 619 | /* |
620 | * Some Athlon laptops have really fucked PST tables. | 620 | * Some Athlon laptops have really fucked PST tables. |
621 | * A BIOS update is all that can save them. | 621 | * A BIOS update is all that can save them. |
622 | * Mention this, and disable cpufreq. | 622 | * Mention this, and disable cpufreq. |
623 | */ | 623 | */ |
624 | static struct dmi_system_id __initdata powernow_dmi_table[] = { | 624 | static struct dmi_system_id __initdata powernow_dmi_table[] = { |
625 | { | 625 | { |
626 | .callback = acer_cpufreq_pst, | 626 | .callback = acer_cpufreq_pst, |
627 | .ident = "Acer Aspire", | 627 | .ident = "Acer Aspire", |
628 | .matches = { | 628 | .matches = { |
629 | DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), | 629 | DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), |
630 | DMI_MATCH(DMI_BIOS_VERSION, "3A71"), | 630 | DMI_MATCH(DMI_BIOS_VERSION, "3A71"), |
631 | }, | 631 | }, |
632 | }, | 632 | }, |
633 | { } | 633 | { } |
634 | }; | 634 | }; |
635 | 635 | ||
636 | static int __init powernow_cpu_init(struct cpufreq_policy *policy) | 636 | static int __init powernow_cpu_init(struct cpufreq_policy *policy) |
637 | { | 637 | { |
638 | union msr_fidvidstatus fidvidstatus; | 638 | union msr_fidvidstatus fidvidstatus; |
639 | int result; | 639 | int result; |
640 | 640 | ||
641 | if (policy->cpu != 0) | 641 | if (policy->cpu != 0) |
642 | return -ENODEV; | 642 | return -ENODEV; |
643 | 643 | ||
644 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); | 644 | rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); |
645 | 645 | ||
646 | recalibrate_cpu_khz(); | 646 | recalibrate_cpu_khz(); |
647 | 647 | ||
648 | fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; | 648 | fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; |
649 | if (!fsb) { | 649 | if (!fsb) { |
650 | printk(KERN_WARNING PFX "can not determine bus frequency\n"); | 650 | printk(KERN_WARNING PFX "can not determine bus frequency\n"); |
651 | return -EINVAL; | 651 | return -EINVAL; |
652 | } | 652 | } |
653 | dprintk("FSB: %3dMHz\n", fsb/1000); | 653 | dprintk("FSB: %3dMHz\n", fsb/1000); |
654 | 654 | ||
655 | if (dmi_check_system(powernow_dmi_table) || acpi_force) { | 655 | if (dmi_check_system(powernow_dmi_table) || acpi_force) { |
656 | printk(KERN_INFO PFX "PSB/PST known to be broken. " | 656 | printk(KERN_INFO PFX "PSB/PST known to be broken. " |
657 | "Trying ACPI instead\n"); | 657 | "Trying ACPI instead\n"); |
658 | result = powernow_acpi_init(); | 658 | result = powernow_acpi_init(); |
659 | } else { | 659 | } else { |
660 | result = powernow_decode_bios(fidvidstatus.bits.MFID, | 660 | result = powernow_decode_bios(fidvidstatus.bits.MFID, |
661 | fidvidstatus.bits.SVID); | 661 | fidvidstatus.bits.SVID); |
662 | if (result) { | 662 | if (result) { |
663 | printk(KERN_INFO PFX "Trying ACPI perflib\n"); | 663 | printk(KERN_INFO PFX "Trying ACPI perflib\n"); |
664 | maximum_speed = 0; | 664 | maximum_speed = 0; |
665 | minimum_speed = -1; | 665 | minimum_speed = -1; |
666 | latency = 0; | 666 | latency = 0; |
667 | result = powernow_acpi_init(); | 667 | result = powernow_acpi_init(); |
668 | if (result) { | 668 | if (result) { |
669 | printk(KERN_INFO PFX | 669 | printk(KERN_INFO PFX |
670 | "ACPI and legacy methods failed\n"); | 670 | "ACPI and legacy methods failed\n"); |
671 | } | 671 | } |
672 | } else { | 672 | } else { |
673 | /* SGTC use the bus clock as timer */ | 673 | /* SGTC use the bus clock as timer */ |
674 | latency = fixup_sgtc(); | 674 | latency = fixup_sgtc(); |
675 | printk(KERN_INFO PFX "SGTC: %d\n", latency); | 675 | printk(KERN_INFO PFX "SGTC: %d\n", latency); |
676 | } | 676 | } |
677 | } | 677 | } |
678 | 678 | ||
679 | if (result) | 679 | if (result) |
680 | return result; | 680 | return result; |
681 | 681 | ||
682 | printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", | 682 | printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", |
683 | minimum_speed/1000, maximum_speed/1000); | 683 | minimum_speed/1000, maximum_speed/1000); |
684 | 684 | ||
685 | policy->cpuinfo.transition_latency = | 685 | policy->cpuinfo.transition_latency = |
686 | cpufreq_scale(2000000UL, fsb, latency); | 686 | cpufreq_scale(2000000UL, fsb, latency); |
687 | 687 | ||
688 | policy->cur = powernow_get(0); | 688 | policy->cur = powernow_get(0); |
689 | 689 | ||
690 | cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); | 690 | cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); |
691 | 691 | ||
692 | return cpufreq_frequency_table_cpuinfo(policy, powernow_table); | 692 | return cpufreq_frequency_table_cpuinfo(policy, powernow_table); |
693 | } | 693 | } |
694 | 694 | ||
695 | static int powernow_cpu_exit(struct cpufreq_policy *policy) | 695 | static int powernow_cpu_exit(struct cpufreq_policy *policy) |
696 | { | 696 | { |
697 | cpufreq_frequency_table_put_attr(policy->cpu); | 697 | cpufreq_frequency_table_put_attr(policy->cpu); |
698 | 698 | ||
699 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | 699 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
700 | if (acpi_processor_perf) { | 700 | if (acpi_processor_perf) { |
701 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | 701 | acpi_processor_unregister_performance(acpi_processor_perf, 0); |
702 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | 702 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); |
703 | kfree(acpi_processor_perf); | 703 | kfree(acpi_processor_perf); |
704 | } | 704 | } |
705 | #endif | 705 | #endif |
706 | 706 | ||
707 | kfree(powernow_table); | 707 | kfree(powernow_table); |
708 | return 0; | 708 | return 0; |
709 | } | 709 | } |
710 | 710 | ||
711 | static struct freq_attr *powernow_table_attr[] = { | 711 | static struct freq_attr *powernow_table_attr[] = { |
712 | &cpufreq_freq_attr_scaling_available_freqs, | 712 | &cpufreq_freq_attr_scaling_available_freqs, |
713 | NULL, | 713 | NULL, |
714 | }; | 714 | }; |
715 | 715 | ||
716 | static struct cpufreq_driver powernow_driver = { | 716 | static struct cpufreq_driver powernow_driver = { |
717 | .verify = powernow_verify, | 717 | .verify = powernow_verify, |
718 | .target = powernow_target, | 718 | .target = powernow_target, |
719 | .get = powernow_get, | 719 | .get = powernow_get, |
720 | .init = powernow_cpu_init, | 720 | .init = powernow_cpu_init, |
721 | .exit = powernow_cpu_exit, | 721 | .exit = powernow_cpu_exit, |
722 | .name = "powernow-k7", | 722 | .name = "powernow-k7", |
723 | .owner = THIS_MODULE, | 723 | .owner = THIS_MODULE, |
724 | .attr = powernow_table_attr, | 724 | .attr = powernow_table_attr, |
725 | }; | 725 | }; |
726 | 726 | ||
727 | static int __init powernow_init(void) | 727 | static int __init powernow_init(void) |
728 | { | 728 | { |
729 | if (check_powernow() == 0) | 729 | if (check_powernow() == 0) |
730 | return -ENODEV; | 730 | return -ENODEV; |
731 | return cpufreq_register_driver(&powernow_driver); | 731 | return cpufreq_register_driver(&powernow_driver); |
732 | } | 732 | } |
733 | 733 | ||
734 | 734 | ||
735 | static void __exit powernow_exit(void) | 735 | static void __exit powernow_exit(void) |
736 | { | 736 | { |
737 | cpufreq_unregister_driver(&powernow_driver); | 737 | cpufreq_unregister_driver(&powernow_driver); |
738 | } | 738 | } |
739 | 739 | ||
740 | module_param(acpi_force, int, 0444); | 740 | module_param(acpi_force, int, 0444); |
741 | MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); | 741 | MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); |
742 | 742 | ||
743 | MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); | 743 | MODULE_AUTHOR("Dave Jones <davej@redhat.com>"); |
744 | MODULE_DESCRIPTION("Powernow driver for AMD K7 processors."); | 744 | MODULE_DESCRIPTION("Powernow driver for AMD K7 processors."); |
745 | MODULE_LICENSE("GPL"); | 745 | MODULE_LICENSE("GPL"); |
746 | 746 | ||
747 | late_initcall(powernow_init); | 747 | late_initcall(powernow_init); |
748 | module_exit(powernow_exit); | 748 | module_exit(powernow_exit); |
749 | 749 | ||
750 | 750 |
arch/x86/kernel/cpu/cpufreq/powernow-k8.c
1 | /* | 1 | /* |
2 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 2 | * (c) 2003-2006 Advanced Micro Devices, Inc. |
3 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
4 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | * | 6 | * |
7 | * Support : mark.langsdorf@amd.com | 7 | * Support : mark.langsdorf@amd.com |
8 | * | 8 | * |
9 | * Based on the powernow-k7.c module written by Dave Jones. | 9 | * Based on the powernow-k7.c module written by Dave Jones. |
10 | * (C) 2003 Dave Jones on behalf of SuSE Labs | 10 | * (C) 2003 Dave Jones on behalf of SuSE Labs |
11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> | 11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> |
12 | * (C) 2004 Pavel Machek <pavel@suse.cz> | 12 | * (C) 2004 Pavel Machek <pavel@suse.cz> |
13 | * Licensed under the terms of the GNU GPL License version 2. | 13 | * Licensed under the terms of the GNU GPL License version 2. |
14 | * Based upon datasheets & sample CPUs kindly provided by AMD. | 14 | * Based upon datasheets & sample CPUs kindly provided by AMD. |
15 | * | 15 | * |
16 | * Valuable input gratefully received from Dave Jones, Pavel Machek, | 16 | * Valuable input gratefully received from Dave Jones, Pavel Machek, |
17 | * Dominik Brodowski, Jacob Shin, and others. | 17 | * Dominik Brodowski, Jacob Shin, and others. |
18 | * Originally developed by Paul Devriendt. | 18 | * Originally developed by Paul Devriendt. |
19 | * Processor information obtained from Chapter 9 (Power and Thermal Management) | 19 | * Processor information obtained from Chapter 9 (Power and Thermal Management) |
20 | * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD | 20 | * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD |
21 | * Opteron Processors" available for download from www.amd.com | 21 | * Opteron Processors" available for download from www.amd.com |
22 | * | 22 | * |
23 | * Tables for specific CPUs can be inferred from | 23 | * Tables for specific CPUs can be inferred from |
24 | * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf | 24 | * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
28 | #include <linux/smp.h> | 28 | #include <linux/smp.h> |
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | #include <linux/cpufreq.h> | 31 | #include <linux/cpufreq.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/cpumask.h> | 34 | #include <linux/cpumask.h> |
35 | #include <linux/sched.h> /* for current / set_cpus_allowed() */ | 35 | #include <linux/sched.h> /* for current / set_cpus_allowed() */ |
36 | #include <linux/io.h> | 36 | #include <linux/io.h> |
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
38 | 38 | ||
39 | #include <asm/msr.h> | 39 | #include <asm/msr.h> |
40 | 40 | ||
41 | #include <linux/acpi.h> | 41 | #include <linux/acpi.h> |
42 | #include <linux/mutex.h> | 42 | #include <linux/mutex.h> |
43 | #include <acpi/processor.h> | 43 | #include <acpi/processor.h> |
44 | 44 | ||
45 | #define PFX "powernow-k8: " | 45 | #define PFX "powernow-k8: " |
46 | #define VERSION "version 2.20.00" | 46 | #define VERSION "version 2.20.00" |
47 | #include "powernow-k8.h" | 47 | #include "powernow-k8.h" |
48 | 48 | ||
49 | /* serialize freq changes */ | 49 | /* serialize freq changes */ |
50 | static DEFINE_MUTEX(fidvid_mutex); | 50 | static DEFINE_MUTEX(fidvid_mutex); |
51 | 51 | ||
52 | static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | 52 | static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); |
53 | 53 | ||
54 | static int cpu_family = CPU_OPTERON; | 54 | static int cpu_family = CPU_OPTERON; |
55 | 55 | ||
56 | #ifndef CONFIG_SMP | 56 | #ifndef CONFIG_SMP |
57 | static inline const struct cpumask *cpu_core_mask(int cpu) | 57 | static inline const struct cpumask *cpu_core_mask(int cpu) |
58 | { | 58 | { |
59 | return cpumask_of(0); | 59 | return cpumask_of(0); |
60 | } | 60 | } |
61 | #endif | 61 | #endif |
62 | 62 | ||
63 | /* Return a frequency in MHz, given an input fid */ | 63 | /* Return a frequency in MHz, given an input fid */ |
64 | static u32 find_freq_from_fid(u32 fid) | 64 | static u32 find_freq_from_fid(u32 fid) |
65 | { | 65 | { |
66 | return 800 + (fid * 100); | 66 | return 800 + (fid * 100); |
67 | } | 67 | } |
68 | 68 | ||
69 | /* Return a frequency in KHz, given an input fid */ | 69 | /* Return a frequency in KHz, given an input fid */ |
70 | static u32 find_khz_freq_from_fid(u32 fid) | 70 | static u32 find_khz_freq_from_fid(u32 fid) |
71 | { | 71 | { |
72 | return 1000 * find_freq_from_fid(fid); | 72 | return 1000 * find_freq_from_fid(fid); |
73 | } | 73 | } |
74 | 74 | ||
75 | static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, | 75 | static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, |
76 | u32 pstate) | 76 | u32 pstate) |
77 | { | 77 | { |
78 | return data[pstate].frequency; | 78 | return data[pstate].frequency; |
79 | } | 79 | } |
80 | 80 | ||
81 | /* Return the vco fid for an input fid | 81 | /* Return the vco fid for an input fid |
82 | * | 82 | * |
83 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids | 83 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids |
84 | * only from corresponding high fids. This returns "high" fid corresponding to | 84 | * only from corresponding high fids. This returns "high" fid corresponding to |
85 | * "low" one. | 85 | * "low" one. |
86 | */ | 86 | */ |
87 | static u32 convert_fid_to_vco_fid(u32 fid) | 87 | static u32 convert_fid_to_vco_fid(u32 fid) |
88 | { | 88 | { |
89 | if (fid < HI_FID_TABLE_BOTTOM) | 89 | if (fid < HI_FID_TABLE_BOTTOM) |
90 | return 8 + (2 * fid); | 90 | return 8 + (2 * fid); |
91 | else | 91 | else |
92 | return fid; | 92 | return fid; |
93 | } | 93 | } |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * Return 1 if the pending bit is set. Unless we just instructed the processor | 96 | * Return 1 if the pending bit is set. Unless we just instructed the processor |
97 | * to transition to a new state, seeing this bit set is really bad news. | 97 | * to transition to a new state, seeing this bit set is really bad news. |
98 | */ | 98 | */ |
99 | static int pending_bit_stuck(void) | 99 | static int pending_bit_stuck(void) |
100 | { | 100 | { |
101 | u32 lo, hi; | 101 | u32 lo, hi; |
102 | 102 | ||
103 | if (cpu_family == CPU_HW_PSTATE) | 103 | if (cpu_family == CPU_HW_PSTATE) |
104 | return 0; | 104 | return 0; |
105 | 105 | ||
106 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | 106 | rdmsr(MSR_FIDVID_STATUS, lo, hi); |
107 | return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; | 107 | return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; |
108 | } | 108 | } |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * Update the global current fid / vid values from the status msr. | 111 | * Update the global current fid / vid values from the status msr. |
112 | * Returns 1 on error. | 112 | * Returns 1 on error. |
113 | */ | 113 | */ |
114 | static int query_current_values_with_pending_wait(struct powernow_k8_data *data) | 114 | static int query_current_values_with_pending_wait(struct powernow_k8_data *data) |
115 | { | 115 | { |
116 | u32 lo, hi; | 116 | u32 lo, hi; |
117 | u32 i = 0; | 117 | u32 i = 0; |
118 | 118 | ||
119 | if (cpu_family == CPU_HW_PSTATE) { | 119 | if (cpu_family == CPU_HW_PSTATE) { |
120 | if (data->currpstate == HW_PSTATE_INVALID) { | 120 | if (data->currpstate == HW_PSTATE_INVALID) { |
121 | /* read (initial) hw pstate if not yet set */ | 121 | /* read (initial) hw pstate if not yet set */ |
122 | rdmsr(MSR_PSTATE_STATUS, lo, hi); | 122 | rdmsr(MSR_PSTATE_STATUS, lo, hi); |
123 | i = lo & HW_PSTATE_MASK; | 123 | i = lo & HW_PSTATE_MASK; |
124 | 124 | ||
125 | /* | 125 | /* |
126 | * a workaround for family 11h erratum 311 might cause | 126 | * a workaround for family 11h erratum 311 might cause |
127 | * an "out-of-range Pstate if the core is in Pstate-0 | 127 | * an "out-of-range Pstate if the core is in Pstate-0 |
128 | */ | 128 | */ |
129 | if (i >= data->numps) | 129 | if (i >= data->numps) |
130 | data->currpstate = HW_PSTATE_0; | 130 | data->currpstate = HW_PSTATE_0; |
131 | else | 131 | else |
132 | data->currpstate = i; | 132 | data->currpstate = i; |
133 | } | 133 | } |
134 | return 0; | 134 | return 0; |
135 | } | 135 | } |
136 | do { | 136 | do { |
137 | if (i++ > 10000) { | 137 | if (i++ > 10000) { |
138 | dprintk("detected change pending stuck\n"); | 138 | dprintk("detected change pending stuck\n"); |
139 | return 1; | 139 | return 1; |
140 | } | 140 | } |
141 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | 141 | rdmsr(MSR_FIDVID_STATUS, lo, hi); |
142 | } while (lo & MSR_S_LO_CHANGE_PENDING); | 142 | } while (lo & MSR_S_LO_CHANGE_PENDING); |
143 | 143 | ||
144 | data->currvid = hi & MSR_S_HI_CURRENT_VID; | 144 | data->currvid = hi & MSR_S_HI_CURRENT_VID; |
145 | data->currfid = lo & MSR_S_LO_CURRENT_FID; | 145 | data->currfid = lo & MSR_S_LO_CURRENT_FID; |
146 | 146 | ||
147 | return 0; | 147 | return 0; |
148 | } | 148 | } |
149 | 149 | ||
150 | /* the isochronous relief time */ | 150 | /* the isochronous relief time */ |
151 | static void count_off_irt(struct powernow_k8_data *data) | 151 | static void count_off_irt(struct powernow_k8_data *data) |
152 | { | 152 | { |
153 | udelay((1 << data->irt) * 10); | 153 | udelay((1 << data->irt) * 10); |
154 | return; | 154 | return; |
155 | } | 155 | } |
156 | 156 | ||
157 | /* the voltage stabilization time */ | 157 | /* the voltage stabilization time */ |
158 | static void count_off_vst(struct powernow_k8_data *data) | 158 | static void count_off_vst(struct powernow_k8_data *data) |
159 | { | 159 | { |
160 | udelay(data->vstable * VST_UNITS_20US); | 160 | udelay(data->vstable * VST_UNITS_20US); |
161 | return; | 161 | return; |
162 | } | 162 | } |
163 | 163 | ||
164 | /* need to init the control msr to a safe value (for each cpu) */ | 164 | /* need to init the control msr to a safe value (for each cpu) */ |
165 | static void fidvid_msr_init(void) | 165 | static void fidvid_msr_init(void) |
166 | { | 166 | { |
167 | u32 lo, hi; | 167 | u32 lo, hi; |
168 | u8 fid, vid; | 168 | u8 fid, vid; |
169 | 169 | ||
170 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | 170 | rdmsr(MSR_FIDVID_STATUS, lo, hi); |
171 | vid = hi & MSR_S_HI_CURRENT_VID; | 171 | vid = hi & MSR_S_HI_CURRENT_VID; |
172 | fid = lo & MSR_S_LO_CURRENT_FID; | 172 | fid = lo & MSR_S_LO_CURRENT_FID; |
173 | lo = fid | (vid << MSR_C_LO_VID_SHIFT); | 173 | lo = fid | (vid << MSR_C_LO_VID_SHIFT); |
174 | hi = MSR_C_HI_STP_GNT_BENIGN; | 174 | hi = MSR_C_HI_STP_GNT_BENIGN; |
175 | dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); | 175 | dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); |
176 | wrmsr(MSR_FIDVID_CTL, lo, hi); | 176 | wrmsr(MSR_FIDVID_CTL, lo, hi); |
177 | } | 177 | } |
178 | 178 | ||
179 | /* write the new fid value along with the other control fields to the msr */ | 179 | /* write the new fid value along with the other control fields to the msr */ |
180 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) | 180 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) |
181 | { | 181 | { |
182 | u32 lo; | 182 | u32 lo; |
183 | u32 savevid = data->currvid; | 183 | u32 savevid = data->currvid; |
184 | u32 i = 0; | 184 | u32 i = 0; |
185 | 185 | ||
186 | if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { | 186 | if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { |
187 | printk(KERN_ERR PFX "internal error - overflow on fid write\n"); | 187 | printk(KERN_ERR PFX "internal error - overflow on fid write\n"); |
188 | return 1; | 188 | return 1; |
189 | } | 189 | } |
190 | 190 | ||
191 | lo = fid; | 191 | lo = fid; |
192 | lo |= (data->currvid << MSR_C_LO_VID_SHIFT); | 192 | lo |= (data->currvid << MSR_C_LO_VID_SHIFT); |
193 | lo |= MSR_C_LO_INIT_FID_VID; | 193 | lo |= MSR_C_LO_INIT_FID_VID; |
194 | 194 | ||
195 | dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", | 195 | dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", |
196 | fid, lo, data->plllock * PLL_LOCK_CONVERSION); | 196 | fid, lo, data->plllock * PLL_LOCK_CONVERSION); |
197 | 197 | ||
198 | do { | 198 | do { |
199 | wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); | 199 | wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); |
200 | if (i++ > 100) { | 200 | if (i++ > 100) { |
201 | printk(KERN_ERR PFX | 201 | printk(KERN_ERR PFX |
202 | "Hardware error - pending bit very stuck - " | 202 | "Hardware error - pending bit very stuck - " |
203 | "no further pstate changes possible\n"); | 203 | "no further pstate changes possible\n"); |
204 | return 1; | 204 | return 1; |
205 | } | 205 | } |
206 | } while (query_current_values_with_pending_wait(data)); | 206 | } while (query_current_values_with_pending_wait(data)); |
207 | 207 | ||
208 | count_off_irt(data); | 208 | count_off_irt(data); |
209 | 209 | ||
210 | if (savevid != data->currvid) { | 210 | if (savevid != data->currvid) { |
211 | printk(KERN_ERR PFX | 211 | printk(KERN_ERR PFX |
212 | "vid change on fid trans, old 0x%x, new 0x%x\n", | 212 | "vid change on fid trans, old 0x%x, new 0x%x\n", |
213 | savevid, data->currvid); | 213 | savevid, data->currvid); |
214 | return 1; | 214 | return 1; |
215 | } | 215 | } |
216 | 216 | ||
217 | if (fid != data->currfid) { | 217 | if (fid != data->currfid) { |
218 | printk(KERN_ERR PFX | 218 | printk(KERN_ERR PFX |
219 | "fid trans failed, fid 0x%x, curr 0x%x\n", fid, | 219 | "fid trans failed, fid 0x%x, curr 0x%x\n", fid, |
220 | data->currfid); | 220 | data->currfid); |
221 | return 1; | 221 | return 1; |
222 | } | 222 | } |
223 | 223 | ||
224 | return 0; | 224 | return 0; |
225 | } | 225 | } |
226 | 226 | ||
227 | /* Write a new vid to the hardware */ | 227 | /* Write a new vid to the hardware */ |
228 | static int write_new_vid(struct powernow_k8_data *data, u32 vid) | 228 | static int write_new_vid(struct powernow_k8_data *data, u32 vid) |
229 | { | 229 | { |
230 | u32 lo; | 230 | u32 lo; |
231 | u32 savefid = data->currfid; | 231 | u32 savefid = data->currfid; |
232 | int i = 0; | 232 | int i = 0; |
233 | 233 | ||
234 | if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { | 234 | if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { |
235 | printk(KERN_ERR PFX "internal error - overflow on vid write\n"); | 235 | printk(KERN_ERR PFX "internal error - overflow on vid write\n"); |
236 | return 1; | 236 | return 1; |
237 | } | 237 | } |
238 | 238 | ||
239 | lo = data->currfid; | 239 | lo = data->currfid; |
240 | lo |= (vid << MSR_C_LO_VID_SHIFT); | 240 | lo |= (vid << MSR_C_LO_VID_SHIFT); |
241 | lo |= MSR_C_LO_INIT_FID_VID; | 241 | lo |= MSR_C_LO_INIT_FID_VID; |
242 | 242 | ||
243 | dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", | 243 | dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", |
244 | vid, lo, STOP_GRANT_5NS); | 244 | vid, lo, STOP_GRANT_5NS); |
245 | 245 | ||
246 | do { | 246 | do { |
247 | wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); | 247 | wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); |
248 | if (i++ > 100) { | 248 | if (i++ > 100) { |
249 | printk(KERN_ERR PFX "internal error - pending bit " | 249 | printk(KERN_ERR PFX "internal error - pending bit " |
250 | "very stuck - no further pstate " | 250 | "very stuck - no further pstate " |
251 | "changes possible\n"); | 251 | "changes possible\n"); |
252 | return 1; | 252 | return 1; |
253 | } | 253 | } |
254 | } while (query_current_values_with_pending_wait(data)); | 254 | } while (query_current_values_with_pending_wait(data)); |
255 | 255 | ||
256 | if (savefid != data->currfid) { | 256 | if (savefid != data->currfid) { |
257 | printk(KERN_ERR PFX "fid changed on vid trans, old " | 257 | printk(KERN_ERR PFX "fid changed on vid trans, old " |
258 | "0x%x new 0x%x\n", | 258 | "0x%x new 0x%x\n", |
259 | savefid, data->currfid); | 259 | savefid, data->currfid); |
260 | return 1; | 260 | return 1; |
261 | } | 261 | } |
262 | 262 | ||
263 | if (vid != data->currvid) { | 263 | if (vid != data->currvid) { |
264 | printk(KERN_ERR PFX "vid trans failed, vid 0x%x, " | 264 | printk(KERN_ERR PFX "vid trans failed, vid 0x%x, " |
265 | "curr 0x%x\n", | 265 | "curr 0x%x\n", |
266 | vid, data->currvid); | 266 | vid, data->currvid); |
267 | return 1; | 267 | return 1; |
268 | } | 268 | } |
269 | 269 | ||
270 | return 0; | 270 | return 0; |
271 | } | 271 | } |
272 | 272 | ||
273 | /* | 273 | /* |
274 | * Reduce the vid by the max of step or reqvid. | 274 | * Reduce the vid by the max of step or reqvid. |
275 | * Decreasing vid codes represent increasing voltages: | 275 | * Decreasing vid codes represent increasing voltages: |
276 | * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. | 276 | * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. |
277 | */ | 277 | */ |
278 | static int decrease_vid_code_by_step(struct powernow_k8_data *data, | 278 | static int decrease_vid_code_by_step(struct powernow_k8_data *data, |
279 | u32 reqvid, u32 step) | 279 | u32 reqvid, u32 step) |
280 | { | 280 | { |
281 | if ((data->currvid - reqvid) > step) | 281 | if ((data->currvid - reqvid) > step) |
282 | reqvid = data->currvid - step; | 282 | reqvid = data->currvid - step; |
283 | 283 | ||
284 | if (write_new_vid(data, reqvid)) | 284 | if (write_new_vid(data, reqvid)) |
285 | return 1; | 285 | return 1; |
286 | 286 | ||
287 | count_off_vst(data); | 287 | count_off_vst(data); |
288 | 288 | ||
289 | return 0; | 289 | return 0; |
290 | } | 290 | } |
291 | 291 | ||
292 | /* Change hardware pstate by single MSR write */ | 292 | /* Change hardware pstate by single MSR write */ |
293 | static int transition_pstate(struct powernow_k8_data *data, u32 pstate) | 293 | static int transition_pstate(struct powernow_k8_data *data, u32 pstate) |
294 | { | 294 | { |
295 | wrmsr(MSR_PSTATE_CTRL, pstate, 0); | 295 | wrmsr(MSR_PSTATE_CTRL, pstate, 0); |
296 | data->currpstate = pstate; | 296 | data->currpstate = pstate; |
297 | return 0; | 297 | return 0; |
298 | } | 298 | } |
299 | 299 | ||
300 | /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ | 300 | /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ |
301 | static int transition_fid_vid(struct powernow_k8_data *data, | 301 | static int transition_fid_vid(struct powernow_k8_data *data, |
302 | u32 reqfid, u32 reqvid) | 302 | u32 reqfid, u32 reqvid) |
303 | { | 303 | { |
304 | if (core_voltage_pre_transition(data, reqvid)) | 304 | if (core_voltage_pre_transition(data, reqvid)) |
305 | return 1; | 305 | return 1; |
306 | 306 | ||
307 | if (core_frequency_transition(data, reqfid)) | 307 | if (core_frequency_transition(data, reqfid)) |
308 | return 1; | 308 | return 1; |
309 | 309 | ||
310 | if (core_voltage_post_transition(data, reqvid)) | 310 | if (core_voltage_post_transition(data, reqvid)) |
311 | return 1; | 311 | return 1; |
312 | 312 | ||
313 | if (query_current_values_with_pending_wait(data)) | 313 | if (query_current_values_with_pending_wait(data)) |
314 | return 1; | 314 | return 1; |
315 | 315 | ||
316 | if ((reqfid != data->currfid) || (reqvid != data->currvid)) { | 316 | if ((reqfid != data->currfid) || (reqvid != data->currvid)) { |
317 | printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, " | 317 | printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, " |
318 | "curr 0x%x 0x%x\n", | 318 | "curr 0x%x 0x%x\n", |
319 | smp_processor_id(), | 319 | smp_processor_id(), |
320 | reqfid, reqvid, data->currfid, data->currvid); | 320 | reqfid, reqvid, data->currfid, data->currvid); |
321 | return 1; | 321 | return 1; |
322 | } | 322 | } |
323 | 323 | ||
324 | dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", | 324 | dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", |
325 | smp_processor_id(), data->currfid, data->currvid); | 325 | smp_processor_id(), data->currfid, data->currvid); |
326 | 326 | ||
327 | return 0; | 327 | return 0; |
328 | } | 328 | } |
329 | 329 | ||
330 | /* Phase 1 - core voltage transition ... setup voltage */ | 330 | /* Phase 1 - core voltage transition ... setup voltage */ |
331 | static int core_voltage_pre_transition(struct powernow_k8_data *data, | 331 | static int core_voltage_pre_transition(struct powernow_k8_data *data, |
332 | u32 reqvid) | 332 | u32 reqvid) |
333 | { | 333 | { |
334 | u32 rvosteps = data->rvo; | 334 | u32 rvosteps = data->rvo; |
335 | u32 savefid = data->currfid; | 335 | u32 savefid = data->currfid; |
336 | u32 maxvid, lo; | 336 | u32 maxvid, lo; |
337 | 337 | ||
338 | dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " | 338 | dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " |
339 | "reqvid 0x%x, rvo 0x%x\n", | 339 | "reqvid 0x%x, rvo 0x%x\n", |
340 | smp_processor_id(), | 340 | smp_processor_id(), |
341 | data->currfid, data->currvid, reqvid, data->rvo); | 341 | data->currfid, data->currvid, reqvid, data->rvo); |
342 | 342 | ||
343 | rdmsr(MSR_FIDVID_STATUS, lo, maxvid); | 343 | rdmsr(MSR_FIDVID_STATUS, lo, maxvid); |
344 | maxvid = 0x1f & (maxvid >> 16); | 344 | maxvid = 0x1f & (maxvid >> 16); |
345 | dprintk("ph1 maxvid=0x%x\n", maxvid); | 345 | dprintk("ph1 maxvid=0x%x\n", maxvid); |
346 | if (reqvid < maxvid) /* lower numbers are higher voltages */ | 346 | if (reqvid < maxvid) /* lower numbers are higher voltages */ |
347 | reqvid = maxvid; | 347 | reqvid = maxvid; |
348 | 348 | ||
349 | while (data->currvid > reqvid) { | 349 | while (data->currvid > reqvid) { |
350 | dprintk("ph1: curr 0x%x, req vid 0x%x\n", | 350 | dprintk("ph1: curr 0x%x, req vid 0x%x\n", |
351 | data->currvid, reqvid); | 351 | data->currvid, reqvid); |
352 | if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) | 352 | if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) |
353 | return 1; | 353 | return 1; |
354 | } | 354 | } |
355 | 355 | ||
356 | while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { | 356 | while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { |
357 | if (data->currvid == maxvid) { | 357 | if (data->currvid == maxvid) { |
358 | rvosteps = 0; | 358 | rvosteps = 0; |
359 | } else { | 359 | } else { |
360 | dprintk("ph1: changing vid for rvo, req 0x%x\n", | 360 | dprintk("ph1: changing vid for rvo, req 0x%x\n", |
361 | data->currvid - 1); | 361 | data->currvid - 1); |
362 | if (decrease_vid_code_by_step(data, data->currvid-1, 1)) | 362 | if (decrease_vid_code_by_step(data, data->currvid-1, 1)) |
363 | return 1; | 363 | return 1; |
364 | rvosteps--; | 364 | rvosteps--; |
365 | } | 365 | } |
366 | } | 366 | } |
367 | 367 | ||
368 | if (query_current_values_with_pending_wait(data)) | 368 | if (query_current_values_with_pending_wait(data)) |
369 | return 1; | 369 | return 1; |
370 | 370 | ||
371 | if (savefid != data->currfid) { | 371 | if (savefid != data->currfid) { |
372 | printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", | 372 | printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", |
373 | data->currfid); | 373 | data->currfid); |
374 | return 1; | 374 | return 1; |
375 | } | 375 | } |
376 | 376 | ||
377 | dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", | 377 | dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", |
378 | data->currfid, data->currvid); | 378 | data->currfid, data->currvid); |
379 | 379 | ||
380 | return 0; | 380 | return 0; |
381 | } | 381 | } |
382 | 382 | ||
383 | /* Phase 2 - core frequency transition */ | 383 | /* Phase 2 - core frequency transition */ |
384 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) | 384 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) |
385 | { | 385 | { |
386 | u32 vcoreqfid, vcocurrfid, vcofiddiff; | 386 | u32 vcoreqfid, vcocurrfid, vcofiddiff; |
387 | u32 fid_interval, savevid = data->currvid; | 387 | u32 fid_interval, savevid = data->currvid; |
388 | 388 | ||
389 | if ((reqfid < HI_FID_TABLE_BOTTOM) && | 389 | if ((reqfid < HI_FID_TABLE_BOTTOM) && |
390 | (data->currfid < HI_FID_TABLE_BOTTOM)) { | 390 | (data->currfid < HI_FID_TABLE_BOTTOM)) { |
391 | printk(KERN_ERR PFX "ph2: illegal lo-lo transition " | 391 | printk(KERN_ERR PFX "ph2: illegal lo-lo transition " |
392 | "0x%x 0x%x\n", reqfid, data->currfid); | 392 | "0x%x 0x%x\n", reqfid, data->currfid); |
393 | return 1; | 393 | return 1; |
394 | } | 394 | } |
395 | 395 | ||
396 | if (data->currfid == reqfid) { | 396 | if (data->currfid == reqfid) { |
397 | printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", | 397 | printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", |
398 | data->currfid); | 398 | data->currfid); |
399 | return 0; | 399 | return 0; |
400 | } | 400 | } |
401 | 401 | ||
402 | dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, " | 402 | dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, " |
403 | "reqfid 0x%x\n", | 403 | "reqfid 0x%x\n", |
404 | smp_processor_id(), | 404 | smp_processor_id(), |
405 | data->currfid, data->currvid, reqfid); | 405 | data->currfid, data->currvid, reqfid); |
406 | 406 | ||
407 | vcoreqfid = convert_fid_to_vco_fid(reqfid); | 407 | vcoreqfid = convert_fid_to_vco_fid(reqfid); |
408 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); | 408 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); |
409 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid | 409 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid |
410 | : vcoreqfid - vcocurrfid; | 410 | : vcoreqfid - vcocurrfid; |
411 | 411 | ||
412 | while (vcofiddiff > 2) { | 412 | while (vcofiddiff > 2) { |
413 | (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); | 413 | (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); |
414 | 414 | ||
415 | if (reqfid > data->currfid) { | 415 | if (reqfid > data->currfid) { |
416 | if (data->currfid > LO_FID_TABLE_TOP) { | 416 | if (data->currfid > LO_FID_TABLE_TOP) { |
417 | if (write_new_fid(data, | 417 | if (write_new_fid(data, |
418 | data->currfid + fid_interval)) | 418 | data->currfid + fid_interval)) |
419 | return 1; | 419 | return 1; |
420 | } else { | 420 | } else { |
421 | if (write_new_fid | 421 | if (write_new_fid |
422 | (data, | 422 | (data, |
423 | 2 + convert_fid_to_vco_fid(data->currfid))) | 423 | 2 + convert_fid_to_vco_fid(data->currfid))) |
424 | return 1; | 424 | return 1; |
425 | } | 425 | } |
426 | } else { | 426 | } else { |
427 | if (write_new_fid(data, data->currfid - fid_interval)) | 427 | if (write_new_fid(data, data->currfid - fid_interval)) |
428 | return 1; | 428 | return 1; |
429 | } | 429 | } |
430 | 430 | ||
431 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); | 431 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); |
432 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid | 432 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid |
433 | : vcoreqfid - vcocurrfid; | 433 | : vcoreqfid - vcocurrfid; |
434 | } | 434 | } |
435 | 435 | ||
436 | if (write_new_fid(data, reqfid)) | 436 | if (write_new_fid(data, reqfid)) |
437 | return 1; | 437 | return 1; |
438 | 438 | ||
439 | if (query_current_values_with_pending_wait(data)) | 439 | if (query_current_values_with_pending_wait(data)) |
440 | return 1; | 440 | return 1; |
441 | 441 | ||
442 | if (data->currfid != reqfid) { | 442 | if (data->currfid != reqfid) { |
443 | printk(KERN_ERR PFX | 443 | printk(KERN_ERR PFX |
444 | "ph2: mismatch, failed fid transition, " | 444 | "ph2: mismatch, failed fid transition, " |
445 | "curr 0x%x, req 0x%x\n", | 445 | "curr 0x%x, req 0x%x\n", |
446 | data->currfid, reqfid); | 446 | data->currfid, reqfid); |
447 | return 1; | 447 | return 1; |
448 | } | 448 | } |
449 | 449 | ||
450 | if (savevid != data->currvid) { | 450 | if (savevid != data->currvid) { |
451 | printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", | 451 | printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", |
452 | savevid, data->currvid); | 452 | savevid, data->currvid); |
453 | return 1; | 453 | return 1; |
454 | } | 454 | } |
455 | 455 | ||
456 | dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", | 456 | dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", |
457 | data->currfid, data->currvid); | 457 | data->currfid, data->currvid); |
458 | 458 | ||
459 | return 0; | 459 | return 0; |
460 | } | 460 | } |
461 | 461 | ||
462 | /* Phase 3 - core voltage transition flow ... jump to the final vid. */ | 462 | /* Phase 3 - core voltage transition flow ... jump to the final vid. */ |
463 | static int core_voltage_post_transition(struct powernow_k8_data *data, | 463 | static int core_voltage_post_transition(struct powernow_k8_data *data, |
464 | u32 reqvid) | 464 | u32 reqvid) |
465 | { | 465 | { |
466 | u32 savefid = data->currfid; | 466 | u32 savefid = data->currfid; |
467 | u32 savereqvid = reqvid; | 467 | u32 savereqvid = reqvid; |
468 | 468 | ||
469 | dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", | 469 | dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", |
470 | smp_processor_id(), | 470 | smp_processor_id(), |
471 | data->currfid, data->currvid); | 471 | data->currfid, data->currvid); |
472 | 472 | ||
473 | if (reqvid != data->currvid) { | 473 | if (reqvid != data->currvid) { |
474 | if (write_new_vid(data, reqvid)) | 474 | if (write_new_vid(data, reqvid)) |
475 | return 1; | 475 | return 1; |
476 | 476 | ||
477 | if (savefid != data->currfid) { | 477 | if (savefid != data->currfid) { |
478 | printk(KERN_ERR PFX | 478 | printk(KERN_ERR PFX |
479 | "ph3: bad fid change, save 0x%x, curr 0x%x\n", | 479 | "ph3: bad fid change, save 0x%x, curr 0x%x\n", |
480 | savefid, data->currfid); | 480 | savefid, data->currfid); |
481 | return 1; | 481 | return 1; |
482 | } | 482 | } |
483 | 483 | ||
484 | if (data->currvid != reqvid) { | 484 | if (data->currvid != reqvid) { |
485 | printk(KERN_ERR PFX | 485 | printk(KERN_ERR PFX |
486 | "ph3: failed vid transition\n, " | 486 | "ph3: failed vid transition\n, " |
487 | "req 0x%x, curr 0x%x", | 487 | "req 0x%x, curr 0x%x", |
488 | reqvid, data->currvid); | 488 | reqvid, data->currvid); |
489 | return 1; | 489 | return 1; |
490 | } | 490 | } |
491 | } | 491 | } |
492 | 492 | ||
493 | if (query_current_values_with_pending_wait(data)) | 493 | if (query_current_values_with_pending_wait(data)) |
494 | return 1; | 494 | return 1; |
495 | 495 | ||
496 | if (savereqvid != data->currvid) { | 496 | if (savereqvid != data->currvid) { |
497 | dprintk("ph3 failed, currvid 0x%x\n", data->currvid); | 497 | dprintk("ph3 failed, currvid 0x%x\n", data->currvid); |
498 | return 1; | 498 | return 1; |
499 | } | 499 | } |
500 | 500 | ||
501 | if (savefid != data->currfid) { | 501 | if (savefid != data->currfid) { |
502 | dprintk("ph3 failed, currfid changed 0x%x\n", | 502 | dprintk("ph3 failed, currfid changed 0x%x\n", |
503 | data->currfid); | 503 | data->currfid); |
504 | return 1; | 504 | return 1; |
505 | } | 505 | } |
506 | 506 | ||
507 | dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", | 507 | dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", |
508 | data->currfid, data->currvid); | 508 | data->currfid, data->currvid); |
509 | 509 | ||
510 | return 0; | 510 | return 0; |
511 | } | 511 | } |
512 | 512 | ||
513 | static int check_supported_cpu(unsigned int cpu) | 513 | static int check_supported_cpu(unsigned int cpu) |
514 | { | 514 | { |
515 | cpumask_t oldmask; | 515 | cpumask_t oldmask; |
516 | u32 eax, ebx, ecx, edx; | 516 | u32 eax, ebx, ecx, edx; |
517 | unsigned int rc = 0; | 517 | unsigned int rc = 0; |
518 | 518 | ||
519 | oldmask = current->cpus_allowed; | 519 | oldmask = current->cpus_allowed; |
520 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 520 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
521 | 521 | ||
522 | if (smp_processor_id() != cpu) { | 522 | if (smp_processor_id() != cpu) { |
523 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); | 523 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); |
524 | goto out; | 524 | goto out; |
525 | } | 525 | } |
526 | 526 | ||
527 | if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) | 527 | if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) |
528 | goto out; | 528 | goto out; |
529 | 529 | ||
530 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | 530 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); |
531 | if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && | 531 | if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && |
532 | ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) | 532 | ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) |
533 | goto out; | 533 | goto out; |
534 | 534 | ||
535 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { | 535 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { |
536 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || | 536 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || |
537 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { | 537 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { |
538 | printk(KERN_INFO PFX | 538 | printk(KERN_INFO PFX |
539 | "Processor cpuid %x not supported\n", eax); | 539 | "Processor cpuid %x not supported\n", eax); |
540 | goto out; | 540 | goto out; |
541 | } | 541 | } |
542 | 542 | ||
543 | eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); | 543 | eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); |
544 | if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { | 544 | if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { |
545 | printk(KERN_INFO PFX | 545 | printk(KERN_INFO PFX |
546 | "No frequency change capabilities detected\n"); | 546 | "No frequency change capabilities detected\n"); |
547 | goto out; | 547 | goto out; |
548 | } | 548 | } |
549 | 549 | ||
550 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | 550 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); |
551 | if ((edx & P_STATE_TRANSITION_CAPABLE) | 551 | if ((edx & P_STATE_TRANSITION_CAPABLE) |
552 | != P_STATE_TRANSITION_CAPABLE) { | 552 | != P_STATE_TRANSITION_CAPABLE) { |
553 | printk(KERN_INFO PFX | 553 | printk(KERN_INFO PFX |
554 | "Power state transitions not supported\n"); | 554 | "Power state transitions not supported\n"); |
555 | goto out; | 555 | goto out; |
556 | } | 556 | } |
557 | } else { /* must be a HW Pstate capable processor */ | 557 | } else { /* must be a HW Pstate capable processor */ |
558 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | 558 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); |
559 | if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) | 559 | if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) |
560 | cpu_family = CPU_HW_PSTATE; | 560 | cpu_family = CPU_HW_PSTATE; |
561 | else | 561 | else |
562 | goto out; | 562 | goto out; |
563 | } | 563 | } |
564 | 564 | ||
565 | rc = 1; | 565 | rc = 1; |
566 | 566 | ||
567 | out: | 567 | out: |
568 | set_cpus_allowed_ptr(current, &oldmask); | 568 | set_cpus_allowed_ptr(current, &oldmask); |
569 | return rc; | 569 | return rc; |
570 | } | 570 | } |
571 | 571 | ||
572 | static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, | 572 | static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, |
573 | u8 maxvid) | 573 | u8 maxvid) |
574 | { | 574 | { |
575 | unsigned int j; | 575 | unsigned int j; |
576 | u8 lastfid = 0xff; | 576 | u8 lastfid = 0xff; |
577 | 577 | ||
578 | for (j = 0; j < data->numps; j++) { | 578 | for (j = 0; j < data->numps; j++) { |
579 | if (pst[j].vid > LEAST_VID) { | 579 | if (pst[j].vid > LEAST_VID) { |
580 | printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n", | 580 | printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n", |
581 | j, pst[j].vid); | 581 | j, pst[j].vid); |
582 | return -EINVAL; | 582 | return -EINVAL; |
583 | } | 583 | } |
584 | if (pst[j].vid < data->rvo) { | 584 | if (pst[j].vid < data->rvo) { |
585 | /* vid + rvo >= 0 */ | 585 | /* vid + rvo >= 0 */ |
586 | printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" | 586 | printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" |
587 | " %d\n", j); | 587 | " %d\n", j); |
588 | return -ENODEV; | 588 | return -ENODEV; |
589 | } | 589 | } |
590 | if (pst[j].vid < maxvid + data->rvo) { | 590 | if (pst[j].vid < maxvid + data->rvo) { |
591 | /* vid + rvo >= maxvid */ | 591 | /* vid + rvo >= maxvid */ |
592 | printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" | 592 | printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" |
593 | " %d\n", j); | 593 | " %d\n", j); |
594 | return -ENODEV; | 594 | return -ENODEV; |
595 | } | 595 | } |
596 | if (pst[j].fid > MAX_FID) { | 596 | if (pst[j].fid > MAX_FID) { |
597 | printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate" | 597 | printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate" |
598 | " %d\n", j); | 598 | " %d\n", j); |
599 | return -ENODEV; | 599 | return -ENODEV; |
600 | } | 600 | } |
601 | if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { | 601 | if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { |
602 | /* Only first fid is allowed to be in "low" range */ | 602 | /* Only first fid is allowed to be in "low" range */ |
603 | printk(KERN_ERR FW_BUG PFX "two low fids - %d : " | 603 | printk(KERN_ERR FW_BUG PFX "two low fids - %d : " |
604 | "0x%x\n", j, pst[j].fid); | 604 | "0x%x\n", j, pst[j].fid); |
605 | return -EINVAL; | 605 | return -EINVAL; |
606 | } | 606 | } |
607 | if (pst[j].fid < lastfid) | 607 | if (pst[j].fid < lastfid) |
608 | lastfid = pst[j].fid; | 608 | lastfid = pst[j].fid; |
609 | } | 609 | } |
610 | if (lastfid & 1) { | 610 | if (lastfid & 1) { |
611 | printk(KERN_ERR FW_BUG PFX "lastfid invalid\n"); | 611 | printk(KERN_ERR FW_BUG PFX "lastfid invalid\n"); |
612 | return -EINVAL; | 612 | return -EINVAL; |
613 | } | 613 | } |
614 | if (lastfid > LO_FID_TABLE_TOP) | 614 | if (lastfid > LO_FID_TABLE_TOP) |
615 | printk(KERN_INFO FW_BUG PFX | 615 | printk(KERN_INFO FW_BUG PFX |
616 | "first fid not from lo freq table\n"); | 616 | "first fid not from lo freq table\n"); |
617 | 617 | ||
618 | return 0; | 618 | return 0; |
619 | } | 619 | } |
620 | 620 | ||
621 | static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry) | 621 | static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry) |
622 | { | 622 | { |
623 | data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; | 623 | data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; |
624 | } | 624 | } |
625 | 625 | ||
626 | static void print_basics(struct powernow_k8_data *data) | 626 | static void print_basics(struct powernow_k8_data *data) |
627 | { | 627 | { |
628 | int j; | 628 | int j; |
629 | for (j = 0; j < data->numps; j++) { | 629 | for (j = 0; j < data->numps; j++) { |
630 | if (data->powernow_table[j].frequency != | 630 | if (data->powernow_table[j].frequency != |
631 | CPUFREQ_ENTRY_INVALID) { | 631 | CPUFREQ_ENTRY_INVALID) { |
632 | if (cpu_family == CPU_HW_PSTATE) { | 632 | if (cpu_family == CPU_HW_PSTATE) { |
633 | printk(KERN_INFO PFX | 633 | printk(KERN_INFO PFX |
634 | " %d : pstate %d (%d MHz)\n", j, | 634 | " %d : pstate %d (%d MHz)\n", j, |
635 | data->powernow_table[j].index, | 635 | data->powernow_table[j].index, |
636 | data->powernow_table[j].frequency/1000); | 636 | data->powernow_table[j].frequency/1000); |
637 | } else { | 637 | } else { |
638 | printk(KERN_INFO PFX | 638 | printk(KERN_INFO PFX |
639 | " %d : fid 0x%x (%d MHz), vid 0x%x\n", | 639 | " %d : fid 0x%x (%d MHz), vid 0x%x\n", |
640 | j, | 640 | j, |
641 | data->powernow_table[j].index & 0xff, | 641 | data->powernow_table[j].index & 0xff, |
642 | data->powernow_table[j].frequency/1000, | 642 | data->powernow_table[j].frequency/1000, |
643 | data->powernow_table[j].index >> 8); | 643 | data->powernow_table[j].index >> 8); |
644 | } | 644 | } |
645 | } | 645 | } |
646 | } | 646 | } |
647 | if (data->batps) | 647 | if (data->batps) |
648 | printk(KERN_INFO PFX "Only %d pstates on battery\n", | 648 | printk(KERN_INFO PFX "Only %d pstates on battery\n", |
649 | data->batps); | 649 | data->batps); |
650 | } | 650 | } |
651 | 651 | ||
652 | static u32 freq_from_fid_did(u32 fid, u32 did) | 652 | static u32 freq_from_fid_did(u32 fid, u32 did) |
653 | { | 653 | { |
654 | u32 mhz = 0; | 654 | u32 mhz = 0; |
655 | 655 | ||
656 | if (boot_cpu_data.x86 == 0x10) | 656 | if (boot_cpu_data.x86 == 0x10) |
657 | mhz = (100 * (fid + 0x10)) >> did; | 657 | mhz = (100 * (fid + 0x10)) >> did; |
658 | else if (boot_cpu_data.x86 == 0x11) | 658 | else if (boot_cpu_data.x86 == 0x11) |
659 | mhz = (100 * (fid + 8)) >> did; | 659 | mhz = (100 * (fid + 8)) >> did; |
660 | else | 660 | else |
661 | BUG(); | 661 | BUG(); |
662 | 662 | ||
663 | return mhz * 1000; | 663 | return mhz * 1000; |
664 | } | 664 | } |
665 | 665 | ||
666 | static int fill_powernow_table(struct powernow_k8_data *data, | 666 | static int fill_powernow_table(struct powernow_k8_data *data, |
667 | struct pst_s *pst, u8 maxvid) | 667 | struct pst_s *pst, u8 maxvid) |
668 | { | 668 | { |
669 | struct cpufreq_frequency_table *powernow_table; | 669 | struct cpufreq_frequency_table *powernow_table; |
670 | unsigned int j; | 670 | unsigned int j; |
671 | 671 | ||
672 | if (data->batps) { | 672 | if (data->batps) { |
673 | /* use ACPI support to get full speed on mains power */ | 673 | /* use ACPI support to get full speed on mains power */ |
674 | printk(KERN_WARNING PFX | 674 | printk(KERN_WARNING PFX |
675 | "Only %d pstates usable (use ACPI driver for full " | 675 | "Only %d pstates usable (use ACPI driver for full " |
676 | "range\n", data->batps); | 676 | "range\n", data->batps); |
677 | data->numps = data->batps; | 677 | data->numps = data->batps; |
678 | } | 678 | } |
679 | 679 | ||
680 | for (j = 1; j < data->numps; j++) { | 680 | for (j = 1; j < data->numps; j++) { |
681 | if (pst[j-1].fid >= pst[j].fid) { | 681 | if (pst[j-1].fid >= pst[j].fid) { |
682 | printk(KERN_ERR PFX "PST out of sequence\n"); | 682 | printk(KERN_ERR PFX "PST out of sequence\n"); |
683 | return -EINVAL; | 683 | return -EINVAL; |
684 | } | 684 | } |
685 | } | 685 | } |
686 | 686 | ||
687 | if (data->numps < 2) { | 687 | if (data->numps < 2) { |
688 | printk(KERN_ERR PFX "no p states to transition\n"); | 688 | printk(KERN_ERR PFX "no p states to transition\n"); |
689 | return -ENODEV; | 689 | return -ENODEV; |
690 | } | 690 | } |
691 | 691 | ||
692 | if (check_pst_table(data, pst, maxvid)) | 692 | if (check_pst_table(data, pst, maxvid)) |
693 | return -EINVAL; | 693 | return -EINVAL; |
694 | 694 | ||
695 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | 695 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) |
696 | * (data->numps + 1)), GFP_KERNEL); | 696 | * (data->numps + 1)), GFP_KERNEL); |
697 | if (!powernow_table) { | 697 | if (!powernow_table) { |
698 | printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); | 698 | printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); |
699 | return -ENOMEM; | 699 | return -ENOMEM; |
700 | } | 700 | } |
701 | 701 | ||
702 | for (j = 0; j < data->numps; j++) { | 702 | for (j = 0; j < data->numps; j++) { |
703 | int freq; | 703 | int freq; |
704 | powernow_table[j].index = pst[j].fid; /* lower 8 bits */ | 704 | powernow_table[j].index = pst[j].fid; /* lower 8 bits */ |
705 | powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ | 705 | powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ |
706 | freq = find_khz_freq_from_fid(pst[j].fid); | 706 | freq = find_khz_freq_from_fid(pst[j].fid); |
707 | powernow_table[j].frequency = freq; | 707 | powernow_table[j].frequency = freq; |
708 | } | 708 | } |
709 | powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; | 709 | powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; |
710 | powernow_table[data->numps].index = 0; | 710 | powernow_table[data->numps].index = 0; |
711 | 711 | ||
712 | if (query_current_values_with_pending_wait(data)) { | 712 | if (query_current_values_with_pending_wait(data)) { |
713 | kfree(powernow_table); | 713 | kfree(powernow_table); |
714 | return -EIO; | 714 | return -EIO; |
715 | } | 715 | } |
716 | 716 | ||
717 | dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); | 717 | dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); |
718 | data->powernow_table = powernow_table; | 718 | data->powernow_table = powernow_table; |
719 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | 719 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) |
720 | print_basics(data); | 720 | print_basics(data); |
721 | 721 | ||
722 | for (j = 0; j < data->numps; j++) | 722 | for (j = 0; j < data->numps; j++) |
723 | if ((pst[j].fid == data->currfid) && | 723 | if ((pst[j].fid == data->currfid) && |
724 | (pst[j].vid == data->currvid)) | 724 | (pst[j].vid == data->currvid)) |
725 | return 0; | 725 | return 0; |
726 | 726 | ||
727 | dprintk("currfid/vid do not match PST, ignoring\n"); | 727 | dprintk("currfid/vid do not match PST, ignoring\n"); |
728 | return 0; | 728 | return 0; |
729 | } | 729 | } |
730 | 730 | ||
731 | /* Find and validate the PSB/PST table in BIOS. */ | 731 | /* Find and validate the PSB/PST table in BIOS. */ |
732 | static int find_psb_table(struct powernow_k8_data *data) | 732 | static int find_psb_table(struct powernow_k8_data *data) |
733 | { | 733 | { |
734 | struct psb_s *psb; | 734 | struct psb_s *psb; |
735 | unsigned int i; | 735 | unsigned int i; |
736 | u32 mvs; | 736 | u32 mvs; |
737 | u8 maxvid; | 737 | u8 maxvid; |
738 | u32 cpst = 0; | 738 | u32 cpst = 0; |
739 | u32 thiscpuid; | 739 | u32 thiscpuid; |
740 | 740 | ||
741 | for (i = 0xc0000; i < 0xffff0; i += 0x10) { | 741 | for (i = 0xc0000; i < 0xffff0; i += 0x10) { |
742 | /* Scan BIOS looking for the signature. */ | 742 | /* Scan BIOS looking for the signature. */ |
743 | /* It can not be at ffff0 - it is too big. */ | 743 | /* It can not be at ffff0 - it is too big. */ |
744 | 744 | ||
745 | psb = phys_to_virt(i); | 745 | psb = phys_to_virt(i); |
746 | if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) | 746 | if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) |
747 | continue; | 747 | continue; |
748 | 748 | ||
749 | dprintk("found PSB header at 0x%p\n", psb); | 749 | dprintk("found PSB header at 0x%p\n", psb); |
750 | 750 | ||
751 | dprintk("table vers: 0x%x\n", psb->tableversion); | 751 | dprintk("table vers: 0x%x\n", psb->tableversion); |
752 | if (psb->tableversion != PSB_VERSION_1_4) { | 752 | if (psb->tableversion != PSB_VERSION_1_4) { |
753 | printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n"); | 753 | printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n"); |
754 | return -ENODEV; | 754 | return -ENODEV; |
755 | } | 755 | } |
756 | 756 | ||
757 | dprintk("flags: 0x%x\n", psb->flags1); | 757 | dprintk("flags: 0x%x\n", psb->flags1); |
758 | if (psb->flags1) { | 758 | if (psb->flags1) { |
759 | printk(KERN_ERR FW_BUG PFX "unknown flags\n"); | 759 | printk(KERN_ERR FW_BUG PFX "unknown flags\n"); |
760 | return -ENODEV; | 760 | return -ENODEV; |
761 | } | 761 | } |
762 | 762 | ||
763 | data->vstable = psb->vstable; | 763 | data->vstable = psb->vstable; |
764 | dprintk("voltage stabilization time: %d(*20us)\n", | 764 | dprintk("voltage stabilization time: %d(*20us)\n", |
765 | data->vstable); | 765 | data->vstable); |
766 | 766 | ||
767 | dprintk("flags2: 0x%x\n", psb->flags2); | 767 | dprintk("flags2: 0x%x\n", psb->flags2); |
768 | data->rvo = psb->flags2 & 3; | 768 | data->rvo = psb->flags2 & 3; |
769 | data->irt = ((psb->flags2) >> 2) & 3; | 769 | data->irt = ((psb->flags2) >> 2) & 3; |
770 | mvs = ((psb->flags2) >> 4) & 3; | 770 | mvs = ((psb->flags2) >> 4) & 3; |
771 | data->vidmvs = 1 << mvs; | 771 | data->vidmvs = 1 << mvs; |
772 | data->batps = ((psb->flags2) >> 6) & 3; | 772 | data->batps = ((psb->flags2) >> 6) & 3; |
773 | 773 | ||
774 | dprintk("ramp voltage offset: %d\n", data->rvo); | 774 | dprintk("ramp voltage offset: %d\n", data->rvo); |
775 | dprintk("isochronous relief time: %d\n", data->irt); | 775 | dprintk("isochronous relief time: %d\n", data->irt); |
776 | dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); | 776 | dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); |
777 | 777 | ||
778 | dprintk("numpst: 0x%x\n", psb->num_tables); | 778 | dprintk("numpst: 0x%x\n", psb->num_tables); |
779 | cpst = psb->num_tables; | 779 | cpst = psb->num_tables; |
780 | if ((psb->cpuid == 0x00000fc0) || | 780 | if ((psb->cpuid == 0x00000fc0) || |
781 | (psb->cpuid == 0x00000fe0)) { | 781 | (psb->cpuid == 0x00000fe0)) { |
782 | thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | 782 | thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); |
783 | if ((thiscpuid == 0x00000fc0) || | 783 | if ((thiscpuid == 0x00000fc0) || |
784 | (thiscpuid == 0x00000fe0)) | 784 | (thiscpuid == 0x00000fe0)) |
785 | cpst = 1; | 785 | cpst = 1; |
786 | } | 786 | } |
787 | if (cpst != 1) { | 787 | if (cpst != 1) { |
788 | printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); | 788 | printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); |
789 | return -ENODEV; | 789 | return -ENODEV; |
790 | } | 790 | } |
791 | 791 | ||
792 | data->plllock = psb->plllocktime; | 792 | data->plllock = psb->plllocktime; |
793 | dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); | 793 | dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); |
794 | dprintk("maxfid: 0x%x\n", psb->maxfid); | 794 | dprintk("maxfid: 0x%x\n", psb->maxfid); |
795 | dprintk("maxvid: 0x%x\n", psb->maxvid); | 795 | dprintk("maxvid: 0x%x\n", psb->maxvid); |
796 | maxvid = psb->maxvid; | 796 | maxvid = psb->maxvid; |
797 | 797 | ||
798 | data->numps = psb->numps; | 798 | data->numps = psb->numps; |
799 | dprintk("numpstates: 0x%x\n", data->numps); | 799 | dprintk("numpstates: 0x%x\n", data->numps); |
800 | return fill_powernow_table(data, | 800 | return fill_powernow_table(data, |
801 | (struct pst_s *)(psb+1), maxvid); | 801 | (struct pst_s *)(psb+1), maxvid); |
802 | } | 802 | } |
803 | /* | 803 | /* |
804 | * If you see this message, complain to BIOS manufacturer. If | 804 | * If you see this message, complain to BIOS manufacturer. If |
805 | * he tells you "we do not support Linux" or some similar | 805 | * he tells you "we do not support Linux" or some similar |
806 | * nonsense, remember that Windows 2000 uses the same legacy | 806 | * nonsense, remember that Windows 2000 uses the same legacy |
807 | * mechanism that the old Linux PSB driver uses. Tell them it | 807 | * mechanism that the old Linux PSB driver uses. Tell them it |
808 | * is broken with Windows 2000. | 808 | * is broken with Windows 2000. |
809 | * | 809 | * |
810 | * The reference to the AMD documentation is chapter 9 in the | 810 | * The reference to the AMD documentation is chapter 9 in the |
811 | * BIOS and Kernel Developer's Guide, which is available on | 811 | * BIOS and Kernel Developer's Guide, which is available on |
812 | * www.amd.com | 812 | * www.amd.com |
813 | */ | 813 | */ |
814 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); | 814 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); |
815 | return -ENODEV; | 815 | return -ENODEV; |
816 | } | 816 | } |
817 | 817 | ||
818 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, | 818 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, |
819 | unsigned int index) | 819 | unsigned int index) |
820 | { | 820 | { |
821 | acpi_integer control; | 821 | acpi_integer control; |
822 | 822 | ||
823 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | 823 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) |
824 | return; | 824 | return; |
825 | 825 | ||
826 | control = data->acpi_data.states[index].control; data->irt = (control | 826 | control = data->acpi_data.states[index].control; data->irt = (control |
827 | >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> | 827 | >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> |
828 | RVO_SHIFT) & RVO_MASK; data->exttype = (control | 828 | RVO_SHIFT) & RVO_MASK; data->exttype = (control |
829 | >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; | 829 | >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; |
830 | data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 | 830 | data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 |
831 | << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = | 831 | << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = |
832 | (control >> VST_SHIFT) & VST_MASK; } | 832 | (control >> VST_SHIFT) & VST_MASK; } |
833 | 833 | ||
834 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | 834 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) |
835 | { | 835 | { |
836 | struct cpufreq_frequency_table *powernow_table; | 836 | struct cpufreq_frequency_table *powernow_table; |
837 | int ret_val = -ENODEV; | 837 | int ret_val = -ENODEV; |
838 | acpi_integer control, status; | 838 | acpi_integer control, status; |
839 | 839 | ||
840 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | 840 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { |
841 | dprintk("register performance failed: bad ACPI data\n"); | 841 | dprintk("register performance failed: bad ACPI data\n"); |
842 | return -EIO; | 842 | return -EIO; |
843 | } | 843 | } |
844 | 844 | ||
845 | /* verify the data contained in the ACPI structures */ | 845 | /* verify the data contained in the ACPI structures */ |
846 | if (data->acpi_data.state_count <= 1) { | 846 | if (data->acpi_data.state_count <= 1) { |
847 | dprintk("No ACPI P-States\n"); | 847 | dprintk("No ACPI P-States\n"); |
848 | goto err_out; | 848 | goto err_out; |
849 | } | 849 | } |
850 | 850 | ||
851 | control = data->acpi_data.control_register.space_id; | 851 | control = data->acpi_data.control_register.space_id; |
852 | status = data->acpi_data.status_register.space_id; | 852 | status = data->acpi_data.status_register.space_id; |
853 | 853 | ||
854 | if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) || | 854 | if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) || |
855 | (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) { | 855 | (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) { |
856 | dprintk("Invalid control/status registers (%x - %x)\n", | 856 | dprintk("Invalid control/status registers (%x - %x)\n", |
857 | control, status); | 857 | control, status); |
858 | goto err_out; | 858 | goto err_out; |
859 | } | 859 | } |
860 | 860 | ||
861 | /* fill in data->powernow_table */ | 861 | /* fill in data->powernow_table */ |
862 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | 862 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) |
863 | * (data->acpi_data.state_count + 1)), GFP_KERNEL); | 863 | * (data->acpi_data.state_count + 1)), GFP_KERNEL); |
864 | if (!powernow_table) { | 864 | if (!powernow_table) { |
865 | dprintk("powernow_table memory alloc failure\n"); | 865 | dprintk("powernow_table memory alloc failure\n"); |
866 | goto err_out; | 866 | goto err_out; |
867 | } | 867 | } |
868 | 868 | ||
869 | if (cpu_family == CPU_HW_PSTATE) | 869 | if (cpu_family == CPU_HW_PSTATE) |
870 | ret_val = fill_powernow_table_pstate(data, powernow_table); | 870 | ret_val = fill_powernow_table_pstate(data, powernow_table); |
871 | else | 871 | else |
872 | ret_val = fill_powernow_table_fidvid(data, powernow_table); | 872 | ret_val = fill_powernow_table_fidvid(data, powernow_table); |
873 | if (ret_val) | 873 | if (ret_val) |
874 | goto err_out_mem; | 874 | goto err_out_mem; |
875 | 875 | ||
876 | powernow_table[data->acpi_data.state_count].frequency = | 876 | powernow_table[data->acpi_data.state_count].frequency = |
877 | CPUFREQ_TABLE_END; | 877 | CPUFREQ_TABLE_END; |
878 | powernow_table[data->acpi_data.state_count].index = 0; | 878 | powernow_table[data->acpi_data.state_count].index = 0; |
879 | data->powernow_table = powernow_table; | 879 | data->powernow_table = powernow_table; |
880 | 880 | ||
881 | /* fill in data */ | 881 | /* fill in data */ |
882 | data->numps = data->acpi_data.state_count; | 882 | data->numps = data->acpi_data.state_count; |
883 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | 883 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) |
884 | print_basics(data); | 884 | print_basics(data); |
885 | powernow_k8_acpi_pst_values(data, 0); | 885 | powernow_k8_acpi_pst_values(data, 0); |
886 | 886 | ||
887 | /* notify BIOS that we exist */ | 887 | /* notify BIOS that we exist */ |
888 | acpi_processor_notify_smm(THIS_MODULE); | 888 | acpi_processor_notify_smm(THIS_MODULE); |
889 | 889 | ||
890 | if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { | 890 | if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { |
891 | printk(KERN_ERR PFX | 891 | printk(KERN_ERR PFX |
892 | "unable to alloc powernow_k8_data cpumask\n"); | 892 | "unable to alloc powernow_k8_data cpumask\n"); |
893 | ret_val = -ENOMEM; | 893 | ret_val = -ENOMEM; |
894 | goto err_out_mem; | 894 | goto err_out_mem; |
895 | } | 895 | } |
896 | 896 | ||
897 | return 0; | 897 | return 0; |
898 | 898 | ||
899 | err_out_mem: | 899 | err_out_mem: |
900 | kfree(powernow_table); | 900 | kfree(powernow_table); |
901 | 901 | ||
902 | err_out: | 902 | err_out: |
903 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | 903 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); |
904 | 904 | ||
905 | /* data->acpi_data.state_count informs us at ->exit() | 905 | /* data->acpi_data.state_count informs us at ->exit() |
906 | * whether ACPI was used */ | 906 | * whether ACPI was used */ |
907 | data->acpi_data.state_count = 0; | 907 | data->acpi_data.state_count = 0; |
908 | 908 | ||
909 | return ret_val; | 909 | return ret_val; |
910 | } | 910 | } |
911 | 911 | ||
912 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, | 912 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, |
913 | struct cpufreq_frequency_table *powernow_table) | 913 | struct cpufreq_frequency_table *powernow_table) |
914 | { | 914 | { |
915 | int i; | 915 | int i; |
916 | u32 hi = 0, lo = 0; | 916 | u32 hi = 0, lo = 0; |
917 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); | 917 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); |
918 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | 918 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; |
919 | 919 | ||
920 | for (i = 0; i < data->acpi_data.state_count; i++) { | 920 | for (i = 0; i < data->acpi_data.state_count; i++) { |
921 | u32 index; | 921 | u32 index; |
922 | 922 | ||
923 | index = data->acpi_data.states[i].control & HW_PSTATE_MASK; | 923 | index = data->acpi_data.states[i].control & HW_PSTATE_MASK; |
924 | if (index > data->max_hw_pstate) { | 924 | if (index > data->max_hw_pstate) { |
925 | printk(KERN_ERR PFX "invalid pstate %d - " | 925 | printk(KERN_ERR PFX "invalid pstate %d - " |
926 | "bad value %d.\n", i, index); | 926 | "bad value %d.\n", i, index); |
927 | printk(KERN_ERR PFX "Please report to BIOS " | 927 | printk(KERN_ERR PFX "Please report to BIOS " |
928 | "manufacturer\n"); | 928 | "manufacturer\n"); |
929 | invalidate_entry(data, i); | 929 | invalidate_entry(data, i); |
930 | continue; | 930 | continue; |
931 | } | 931 | } |
932 | rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); | 932 | rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); |
933 | if (!(hi & HW_PSTATE_VALID_MASK)) { | 933 | if (!(hi & HW_PSTATE_VALID_MASK)) { |
934 | dprintk("invalid pstate %d, ignoring\n", index); | 934 | dprintk("invalid pstate %d, ignoring\n", index); |
935 | invalidate_entry(data, i); | 935 | invalidate_entry(data, i); |
936 | continue; | 936 | continue; |
937 | } | 937 | } |
938 | 938 | ||
939 | powernow_table[i].index = index; | 939 | powernow_table[i].index = index; |
940 | 940 | ||
941 | /* Frequency may be rounded for these */ | 941 | /* Frequency may be rounded for these */ |
942 | if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { | 942 | if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { |
943 | powernow_table[i].frequency = | 943 | powernow_table[i].frequency = |
944 | freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); | 944 | freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); |
945 | } else | 945 | } else |
946 | powernow_table[i].frequency = | 946 | powernow_table[i].frequency = |
947 | data->acpi_data.states[i].core_frequency * 1000; | 947 | data->acpi_data.states[i].core_frequency * 1000; |
948 | } | 948 | } |
949 | return 0; | 949 | return 0; |
950 | } | 950 | } |
951 | 951 | ||
952 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | 952 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, |
953 | struct cpufreq_frequency_table *powernow_table) | 953 | struct cpufreq_frequency_table *powernow_table) |
954 | { | 954 | { |
955 | int i; | 955 | int i; |
956 | int cntlofreq = 0; | 956 | int cntlofreq = 0; |
957 | 957 | ||
958 | for (i = 0; i < data->acpi_data.state_count; i++) { | 958 | for (i = 0; i < data->acpi_data.state_count; i++) { |
959 | u32 fid; | 959 | u32 fid; |
960 | u32 vid; | 960 | u32 vid; |
961 | u32 freq, index; | 961 | u32 freq, index; |
962 | acpi_integer status, control; | 962 | acpi_integer status, control; |
963 | 963 | ||
964 | if (data->exttype) { | 964 | if (data->exttype) { |
965 | status = data->acpi_data.states[i].status; | 965 | status = data->acpi_data.states[i].status; |
966 | fid = status & EXT_FID_MASK; | 966 | fid = status & EXT_FID_MASK; |
967 | vid = (status >> VID_SHIFT) & EXT_VID_MASK; | 967 | vid = (status >> VID_SHIFT) & EXT_VID_MASK; |
968 | } else { | 968 | } else { |
969 | control = data->acpi_data.states[i].control; | 969 | control = data->acpi_data.states[i].control; |
970 | fid = control & FID_MASK; | 970 | fid = control & FID_MASK; |
971 | vid = (control >> VID_SHIFT) & VID_MASK; | 971 | vid = (control >> VID_SHIFT) & VID_MASK; |
972 | } | 972 | } |
973 | 973 | ||
974 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); | 974 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); |
975 | 975 | ||
976 | index = fid | (vid<<8); | 976 | index = fid | (vid<<8); |
977 | powernow_table[i].index = index; | 977 | powernow_table[i].index = index; |
978 | 978 | ||
979 | freq = find_khz_freq_from_fid(fid); | 979 | freq = find_khz_freq_from_fid(fid); |
980 | powernow_table[i].frequency = freq; | 980 | powernow_table[i].frequency = freq; |
981 | 981 | ||
982 | /* verify frequency is OK */ | 982 | /* verify frequency is OK */ |
983 | if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { | 983 | if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { |
984 | dprintk("invalid freq %u kHz, ignoring\n", freq); | 984 | dprintk("invalid freq %u kHz, ignoring\n", freq); |
985 | invalidate_entry(data, i); | 985 | invalidate_entry(data, i); |
986 | continue; | 986 | continue; |
987 | } | 987 | } |
988 | 988 | ||
989 | /* verify voltage is OK - | 989 | /* verify voltage is OK - |
990 | * BIOSs are using "off" to indicate invalid */ | 990 | * BIOSs are using "off" to indicate invalid */ |
991 | if (vid == VID_OFF) { | 991 | if (vid == VID_OFF) { |
992 | dprintk("invalid vid %u, ignoring\n", vid); | 992 | dprintk("invalid vid %u, ignoring\n", vid); |
993 | invalidate_entry(data, i); | 993 | invalidate_entry(data, i); |
994 | continue; | 994 | continue; |
995 | } | 995 | } |
996 | 996 | ||
997 | /* verify only 1 entry from the lo frequency table */ | 997 | /* verify only 1 entry from the lo frequency table */ |
998 | if (fid < HI_FID_TABLE_BOTTOM) { | 998 | if (fid < HI_FID_TABLE_BOTTOM) { |
999 | if (cntlofreq) { | 999 | if (cntlofreq) { |
1000 | /* if both entries are the same, | 1000 | /* if both entries are the same, |
1001 | * ignore this one ... */ | 1001 | * ignore this one ... */ |
1002 | if ((freq != powernow_table[cntlofreq].frequency) || | 1002 | if ((freq != powernow_table[cntlofreq].frequency) || |
1003 | (index != powernow_table[cntlofreq].index)) { | 1003 | (index != powernow_table[cntlofreq].index)) { |
1004 | printk(KERN_ERR PFX | 1004 | printk(KERN_ERR PFX |
1005 | "Too many lo freq table " | 1005 | "Too many lo freq table " |
1006 | "entries\n"); | 1006 | "entries\n"); |
1007 | return 1; | 1007 | return 1; |
1008 | } | 1008 | } |
1009 | 1009 | ||
1010 | dprintk("double low frequency table entry, " | 1010 | dprintk("double low frequency table entry, " |
1011 | "ignoring it.\n"); | 1011 | "ignoring it.\n"); |
1012 | invalidate_entry(data, i); | 1012 | invalidate_entry(data, i); |
1013 | continue; | 1013 | continue; |
1014 | } else | 1014 | } else |
1015 | cntlofreq = i; | 1015 | cntlofreq = i; |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { | 1018 | if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { |
1019 | printk(KERN_INFO PFX "invalid freq entries " | 1019 | printk(KERN_INFO PFX "invalid freq entries " |
1020 | "%u kHz vs. %u kHz\n", freq, | 1020 | "%u kHz vs. %u kHz\n", freq, |
1021 | (unsigned int) | 1021 | (unsigned int) |
1022 | (data->acpi_data.states[i].core_frequency | 1022 | (data->acpi_data.states[i].core_frequency |
1023 | * 1000)); | 1023 | * 1000)); |
1024 | invalidate_entry(data, i); | 1024 | invalidate_entry(data, i); |
1025 | continue; | 1025 | continue; |
1026 | } | 1026 | } |
1027 | } | 1027 | } |
1028 | return 0; | 1028 | return 0; |
1029 | } | 1029 | } |
1030 | 1030 | ||
1031 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) | 1031 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) |
1032 | { | 1032 | { |
1033 | if (data->acpi_data.state_count) | 1033 | if (data->acpi_data.state_count) |
1034 | acpi_processor_unregister_performance(&data->acpi_data, | 1034 | acpi_processor_unregister_performance(&data->acpi_data, |
1035 | data->cpu); | 1035 | data->cpu); |
1036 | free_cpumask_var(data->acpi_data.shared_cpu_map); | 1036 | free_cpumask_var(data->acpi_data.shared_cpu_map); |
1037 | } | 1037 | } |
1038 | 1038 | ||
1039 | static int get_transition_latency(struct powernow_k8_data *data) | 1039 | static int get_transition_latency(struct powernow_k8_data *data) |
1040 | { | 1040 | { |
1041 | int max_latency = 0; | 1041 | int max_latency = 0; |
1042 | int i; | 1042 | int i; |
1043 | for (i = 0; i < data->acpi_data.state_count; i++) { | 1043 | for (i = 0; i < data->acpi_data.state_count; i++) { |
1044 | int cur_latency = data->acpi_data.states[i].transition_latency | 1044 | int cur_latency = data->acpi_data.states[i].transition_latency |
1045 | + data->acpi_data.states[i].bus_master_latency; | 1045 | + data->acpi_data.states[i].bus_master_latency; |
1046 | if (cur_latency > max_latency) | 1046 | if (cur_latency > max_latency) |
1047 | max_latency = cur_latency; | 1047 | max_latency = cur_latency; |
1048 | } | 1048 | } |
1049 | /* value in usecs, needs to be in nanoseconds */ | 1049 | /* value in usecs, needs to be in nanoseconds */ |
1050 | return 1000 * max_latency; | 1050 | return 1000 * max_latency; |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | /* Take a frequency, and issue the fid/vid transition command */ | 1053 | /* Take a frequency, and issue the fid/vid transition command */ |
1054 | static int transition_frequency_fidvid(struct powernow_k8_data *data, | 1054 | static int transition_frequency_fidvid(struct powernow_k8_data *data, |
1055 | unsigned int index) | 1055 | unsigned int index) |
1056 | { | 1056 | { |
1057 | u32 fid = 0; | 1057 | u32 fid = 0; |
1058 | u32 vid = 0; | 1058 | u32 vid = 0; |
1059 | int res, i; | 1059 | int res, i; |
1060 | struct cpufreq_freqs freqs; | 1060 | struct cpufreq_freqs freqs; |
1061 | 1061 | ||
1062 | dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); | 1062 | dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); |
1063 | 1063 | ||
1064 | /* fid/vid correctness check for k8 */ | 1064 | /* fid/vid correctness check for k8 */ |
1065 | /* fid are the lower 8 bits of the index we stored into | 1065 | /* fid are the lower 8 bits of the index we stored into |
1066 | * the cpufreq frequency table in find_psb_table, vid | 1066 | * the cpufreq frequency table in find_psb_table, vid |
1067 | * are the upper 8 bits. | 1067 | * are the upper 8 bits. |
1068 | */ | 1068 | */ |
1069 | fid = data->powernow_table[index].index & 0xFF; | 1069 | fid = data->powernow_table[index].index & 0xFF; |
1070 | vid = (data->powernow_table[index].index & 0xFF00) >> 8; | 1070 | vid = (data->powernow_table[index].index & 0xFF00) >> 8; |
1071 | 1071 | ||
1072 | dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); | 1072 | dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); |
1073 | 1073 | ||
1074 | if (query_current_values_with_pending_wait(data)) | 1074 | if (query_current_values_with_pending_wait(data)) |
1075 | return 1; | 1075 | return 1; |
1076 | 1076 | ||
1077 | if ((data->currvid == vid) && (data->currfid == fid)) { | 1077 | if ((data->currvid == vid) && (data->currfid == fid)) { |
1078 | dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", | 1078 | dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", |
1079 | fid, vid); | 1079 | fid, vid); |
1080 | return 0; | 1080 | return 0; |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | if ((fid < HI_FID_TABLE_BOTTOM) && | 1083 | if ((fid < HI_FID_TABLE_BOTTOM) && |
1084 | (data->currfid < HI_FID_TABLE_BOTTOM)) { | 1084 | (data->currfid < HI_FID_TABLE_BOTTOM)) { |
1085 | printk(KERN_ERR PFX | 1085 | printk(KERN_ERR PFX |
1086 | "ignoring illegal change in lo freq table-%x to 0x%x\n", | 1086 | "ignoring illegal change in lo freq table-%x to 0x%x\n", |
1087 | data->currfid, fid); | 1087 | data->currfid, fid); |
1088 | return 1; | 1088 | return 1; |
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", | 1091 | dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", |
1092 | smp_processor_id(), fid, vid); | 1092 | smp_processor_id(), fid, vid); |
1093 | freqs.old = find_khz_freq_from_fid(data->currfid); | 1093 | freqs.old = find_khz_freq_from_fid(data->currfid); |
1094 | freqs.new = find_khz_freq_from_fid(fid); | 1094 | freqs.new = find_khz_freq_from_fid(fid); |
1095 | 1095 | ||
1096 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1096 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
1097 | freqs.cpu = i; | 1097 | freqs.cpu = i; |
1098 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1098 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | res = transition_fid_vid(data, fid, vid); | 1101 | res = transition_fid_vid(data, fid, vid); |
1102 | freqs.new = find_khz_freq_from_fid(data->currfid); | 1102 | freqs.new = find_khz_freq_from_fid(data->currfid); |
1103 | 1103 | ||
1104 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1104 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
1105 | freqs.cpu = i; | 1105 | freqs.cpu = i; |
1106 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1106 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
1107 | } | 1107 | } |
1108 | return res; | 1108 | return res; |
1109 | } | 1109 | } |
1110 | 1110 | ||
1111 | /* Take a frequency, and issue the hardware pstate transition command */ | 1111 | /* Take a frequency, and issue the hardware pstate transition command */ |
1112 | static int transition_frequency_pstate(struct powernow_k8_data *data, | 1112 | static int transition_frequency_pstate(struct powernow_k8_data *data, |
1113 | unsigned int index) | 1113 | unsigned int index) |
1114 | { | 1114 | { |
1115 | u32 pstate = 0; | 1115 | u32 pstate = 0; |
1116 | int res, i; | 1116 | int res, i; |
1117 | struct cpufreq_freqs freqs; | 1117 | struct cpufreq_freqs freqs; |
1118 | 1118 | ||
1119 | dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); | 1119 | dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); |
1120 | 1120 | ||
1121 | /* get MSR index for hardware pstate transition */ | 1121 | /* get MSR index for hardware pstate transition */ |
1122 | pstate = index & HW_PSTATE_MASK; | 1122 | pstate = index & HW_PSTATE_MASK; |
1123 | if (pstate > data->max_hw_pstate) | 1123 | if (pstate > data->max_hw_pstate) |
1124 | return 0; | 1124 | return 0; |
1125 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, | 1125 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, |
1126 | data->currpstate); | 1126 | data->currpstate); |
1127 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1127 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
1128 | 1128 | ||
1129 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1129 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
1130 | freqs.cpu = i; | 1130 | freqs.cpu = i; |
1131 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1131 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
1132 | } | 1132 | } |
1133 | 1133 | ||
1134 | res = transition_pstate(data, pstate); | 1134 | res = transition_pstate(data, pstate); |
1135 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1135 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
1136 | 1136 | ||
1137 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1137 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
1138 | freqs.cpu = i; | 1138 | freqs.cpu = i; |
1139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
1140 | } | 1140 | } |
1141 | return res; | 1141 | return res; |
1142 | } | 1142 | } |
1143 | 1143 | ||
1144 | /* Driver entry point to switch to the target frequency */ | 1144 | /* Driver entry point to switch to the target frequency */ |
1145 | static int powernowk8_target(struct cpufreq_policy *pol, | 1145 | static int powernowk8_target(struct cpufreq_policy *pol, |
1146 | unsigned targfreq, unsigned relation) | 1146 | unsigned targfreq, unsigned relation) |
1147 | { | 1147 | { |
1148 | cpumask_t oldmask; | 1148 | cpumask_t oldmask; |
1149 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | 1149 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1150 | u32 checkfid; | 1150 | u32 checkfid; |
1151 | u32 checkvid; | 1151 | u32 checkvid; |
1152 | unsigned int newstate; | 1152 | unsigned int newstate; |
1153 | int ret = -EIO; | 1153 | int ret = -EIO; |
1154 | 1154 | ||
1155 | if (!data) | 1155 | if (!data) |
1156 | return -EINVAL; | 1156 | return -EINVAL; |
1157 | 1157 | ||
1158 | checkfid = data->currfid; | 1158 | checkfid = data->currfid; |
1159 | checkvid = data->currvid; | 1159 | checkvid = data->currvid; |
1160 | 1160 | ||
1161 | /* only run on specific CPU from here on */ | 1161 | /* only run on specific CPU from here on */ |
1162 | oldmask = current->cpus_allowed; | 1162 | oldmask = current->cpus_allowed; |
1163 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); | 1163 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); |
1164 | 1164 | ||
1165 | if (smp_processor_id() != pol->cpu) { | 1165 | if (smp_processor_id() != pol->cpu) { |
1166 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1166 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); |
1167 | goto err_out; | 1167 | goto err_out; |
1168 | } | 1168 | } |
1169 | 1169 | ||
1170 | if (pending_bit_stuck()) { | 1170 | if (pending_bit_stuck()) { |
1171 | printk(KERN_ERR PFX "failing targ, change pending bit set\n"); | 1171 | printk(KERN_ERR PFX "failing targ, change pending bit set\n"); |
1172 | goto err_out; | 1172 | goto err_out; |
1173 | } | 1173 | } |
1174 | 1174 | ||
1175 | dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", | 1175 | dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", |
1176 | pol->cpu, targfreq, pol->min, pol->max, relation); | 1176 | pol->cpu, targfreq, pol->min, pol->max, relation); |
1177 | 1177 | ||
1178 | if (query_current_values_with_pending_wait(data)) | 1178 | if (query_current_values_with_pending_wait(data)) |
1179 | goto err_out; | 1179 | goto err_out; |
1180 | 1180 | ||
1181 | if (cpu_family != CPU_HW_PSTATE) { | 1181 | if (cpu_family != CPU_HW_PSTATE) { |
1182 | dprintk("targ: curr fid 0x%x, vid 0x%x\n", | 1182 | dprintk("targ: curr fid 0x%x, vid 0x%x\n", |
1183 | data->currfid, data->currvid); | 1183 | data->currfid, data->currvid); |
1184 | 1184 | ||
1185 | if ((checkvid != data->currvid) || | 1185 | if ((checkvid != data->currvid) || |
1186 | (checkfid != data->currfid)) { | 1186 | (checkfid != data->currfid)) { |
1187 | printk(KERN_INFO PFX | 1187 | printk(KERN_INFO PFX |
1188 | "error - out of sync, fix 0x%x 0x%x, " | 1188 | "error - out of sync, fix 0x%x 0x%x, " |
1189 | "vid 0x%x 0x%x\n", | 1189 | "vid 0x%x 0x%x\n", |
1190 | checkfid, data->currfid, | 1190 | checkfid, data->currfid, |
1191 | checkvid, data->currvid); | 1191 | checkvid, data->currvid); |
1192 | } | 1192 | } |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | if (cpufreq_frequency_table_target(pol, data->powernow_table, | 1195 | if (cpufreq_frequency_table_target(pol, data->powernow_table, |
1196 | targfreq, relation, &newstate)) | 1196 | targfreq, relation, &newstate)) |
1197 | goto err_out; | 1197 | goto err_out; |
1198 | 1198 | ||
1199 | mutex_lock(&fidvid_mutex); | 1199 | mutex_lock(&fidvid_mutex); |
1200 | 1200 | ||
1201 | powernow_k8_acpi_pst_values(data, newstate); | 1201 | powernow_k8_acpi_pst_values(data, newstate); |
1202 | 1202 | ||
1203 | if (cpu_family == CPU_HW_PSTATE) | 1203 | if (cpu_family == CPU_HW_PSTATE) |
1204 | ret = transition_frequency_pstate(data, newstate); | 1204 | ret = transition_frequency_pstate(data, newstate); |
1205 | else | 1205 | else |
1206 | ret = transition_frequency_fidvid(data, newstate); | 1206 | ret = transition_frequency_fidvid(data, newstate); |
1207 | if (ret) { | 1207 | if (ret) { |
1208 | printk(KERN_ERR PFX "transition frequency failed\n"); | 1208 | printk(KERN_ERR PFX "transition frequency failed\n"); |
1209 | ret = 1; | 1209 | ret = 1; |
1210 | mutex_unlock(&fidvid_mutex); | 1210 | mutex_unlock(&fidvid_mutex); |
1211 | goto err_out; | 1211 | goto err_out; |
1212 | } | 1212 | } |
1213 | mutex_unlock(&fidvid_mutex); | 1213 | mutex_unlock(&fidvid_mutex); |
1214 | 1214 | ||
1215 | if (cpu_family == CPU_HW_PSTATE) | 1215 | if (cpu_family == CPU_HW_PSTATE) |
1216 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, | 1216 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, |
1217 | newstate); | 1217 | newstate); |
1218 | else | 1218 | else |
1219 | pol->cur = find_khz_freq_from_fid(data->currfid); | 1219 | pol->cur = find_khz_freq_from_fid(data->currfid); |
1220 | ret = 0; | 1220 | ret = 0; |
1221 | 1221 | ||
1222 | err_out: | 1222 | err_out: |
1223 | set_cpus_allowed_ptr(current, &oldmask); | 1223 | set_cpus_allowed_ptr(current, &oldmask); |
1224 | return ret; | 1224 | return ret; |
1225 | } | 1225 | } |
1226 | 1226 | ||
1227 | /* Driver entry point to verify the policy and range of frequencies */ | 1227 | /* Driver entry point to verify the policy and range of frequencies */ |
1228 | static int powernowk8_verify(struct cpufreq_policy *pol) | 1228 | static int powernowk8_verify(struct cpufreq_policy *pol) |
1229 | { | 1229 | { |
1230 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | 1230 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1231 | 1231 | ||
1232 | if (!data) | 1232 | if (!data) |
1233 | return -EINVAL; | 1233 | return -EINVAL; |
1234 | 1234 | ||
1235 | return cpufreq_frequency_table_verify(pol, data->powernow_table); | 1235 | return cpufreq_frequency_table_verify(pol, data->powernow_table); |
1236 | } | 1236 | } |
1237 | 1237 | ||
1238 | static const char ACPI_PSS_BIOS_BUG_MSG[] = | 1238 | static const char ACPI_PSS_BIOS_BUG_MSG[] = |
1239 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" | 1239 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" |
1240 | KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; | 1240 | KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; |
1241 | 1241 | ||
1242 | /* per CPU init entry point to the driver */ | 1242 | /* per CPU init entry point to the driver */ |
1243 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | 1243 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) |
1244 | { | 1244 | { |
1245 | struct powernow_k8_data *data; | 1245 | struct powernow_k8_data *data; |
1246 | cpumask_t oldmask; | 1246 | cpumask_t oldmask; |
1247 | int rc; | 1247 | int rc; |
1248 | 1248 | ||
1249 | if (!cpu_online(pol->cpu)) | 1249 | if (!cpu_online(pol->cpu)) |
1250 | return -ENODEV; | 1250 | return -ENODEV; |
1251 | 1251 | ||
1252 | if (!check_supported_cpu(pol->cpu)) | 1252 | if (!check_supported_cpu(pol->cpu)) |
1253 | return -ENODEV; | 1253 | return -ENODEV; |
1254 | 1254 | ||
1255 | data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); | 1255 | data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); |
1256 | if (!data) { | 1256 | if (!data) { |
1257 | printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); | 1257 | printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); |
1258 | return -ENOMEM; | 1258 | return -ENOMEM; |
1259 | } | 1259 | } |
1260 | 1260 | ||
1261 | data->cpu = pol->cpu; | 1261 | data->cpu = pol->cpu; |
1262 | data->currpstate = HW_PSTATE_INVALID; | 1262 | data->currpstate = HW_PSTATE_INVALID; |
1263 | 1263 | ||
1264 | if (powernow_k8_cpu_init_acpi(data)) { | 1264 | if (powernow_k8_cpu_init_acpi(data)) { |
1265 | /* | 1265 | /* |
1266 | * Use the PSB BIOS structure. This is only availabe on | 1266 | * Use the PSB BIOS structure. This is only availabe on |
1267 | * an UP version, and is deprecated by AMD. | 1267 | * an UP version, and is deprecated by AMD. |
1268 | */ | 1268 | */ |
1269 | if (num_online_cpus() != 1) { | 1269 | if (num_online_cpus() != 1) { |
1270 | printk_once(ACPI_PSS_BIOS_BUG_MSG); | 1270 | printk_once(ACPI_PSS_BIOS_BUG_MSG); |
1271 | goto err_out; | 1271 | goto err_out; |
1272 | } | 1272 | } |
1273 | if (pol->cpu != 0) { | 1273 | if (pol->cpu != 0) { |
1274 | printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " | 1274 | printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " |
1275 | "CPU other than CPU0. Complain to your BIOS " | 1275 | "CPU other than CPU0. Complain to your BIOS " |
1276 | "vendor.\n"); | 1276 | "vendor.\n"); |
1277 | goto err_out; | 1277 | goto err_out; |
1278 | } | 1278 | } |
1279 | rc = find_psb_table(data); | 1279 | rc = find_psb_table(data); |
1280 | if (rc) | 1280 | if (rc) |
1281 | goto err_out; | 1281 | goto err_out; |
1282 | 1282 | ||
1283 | /* Take a crude guess here. | 1283 | /* Take a crude guess here. |
1284 | * That guess was in microseconds, so multiply with 1000 */ | 1284 | * That guess was in microseconds, so multiply with 1000 */ |
1285 | pol->cpuinfo.transition_latency = ( | 1285 | pol->cpuinfo.transition_latency = ( |
1286 | ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + | 1286 | ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + |
1287 | ((1 << data->irt) * 30)) * 1000; | 1287 | ((1 << data->irt) * 30)) * 1000; |
1288 | } else /* ACPI _PSS objects available */ | 1288 | } else /* ACPI _PSS objects available */ |
1289 | pol->cpuinfo.transition_latency = get_transition_latency(data); | 1289 | pol->cpuinfo.transition_latency = get_transition_latency(data); |
1290 | 1290 | ||
1291 | /* only run on specific CPU from here on */ | 1291 | /* only run on specific CPU from here on */ |
1292 | oldmask = current->cpus_allowed; | 1292 | oldmask = current->cpus_allowed; |
1293 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); | 1293 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); |
1294 | 1294 | ||
1295 | if (smp_processor_id() != pol->cpu) { | 1295 | if (smp_processor_id() != pol->cpu) { |
1296 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1296 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); |
1297 | goto err_out_unmask; | 1297 | goto err_out_unmask; |
1298 | } | 1298 | } |
1299 | 1299 | ||
1300 | if (pending_bit_stuck()) { | 1300 | if (pending_bit_stuck()) { |
1301 | printk(KERN_ERR PFX "failing init, change pending bit set\n"); | 1301 | printk(KERN_ERR PFX "failing init, change pending bit set\n"); |
1302 | goto err_out_unmask; | 1302 | goto err_out_unmask; |
1303 | } | 1303 | } |
1304 | 1304 | ||
1305 | if (query_current_values_with_pending_wait(data)) | 1305 | if (query_current_values_with_pending_wait(data)) |
1306 | goto err_out_unmask; | 1306 | goto err_out_unmask; |
1307 | 1307 | ||
1308 | if (cpu_family == CPU_OPTERON) | 1308 | if (cpu_family == CPU_OPTERON) |
1309 | fidvid_msr_init(); | 1309 | fidvid_msr_init(); |
1310 | 1310 | ||
1311 | /* run on any CPU again */ | 1311 | /* run on any CPU again */ |
1312 | set_cpus_allowed_ptr(current, &oldmask); | 1312 | set_cpus_allowed_ptr(current, &oldmask); |
1313 | 1313 | ||
1314 | if (cpu_family == CPU_HW_PSTATE) | 1314 | if (cpu_family == CPU_HW_PSTATE) |
1315 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); | 1315 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); |
1316 | else | 1316 | else |
1317 | cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); | 1317 | cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu)); |
1318 | data->available_cores = pol->cpus; | 1318 | data->available_cores = pol->cpus; |
1319 | 1319 | ||
1320 | if (cpu_family == CPU_HW_PSTATE) | 1320 | if (cpu_family == CPU_HW_PSTATE) |
1321 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, | 1321 | pol->cur = find_khz_freq_from_pstate(data->powernow_table, |
1322 | data->currpstate); | 1322 | data->currpstate); |
1323 | else | 1323 | else |
1324 | pol->cur = find_khz_freq_from_fid(data->currfid); | 1324 | pol->cur = find_khz_freq_from_fid(data->currfid); |
1325 | dprintk("policy current frequency %d kHz\n", pol->cur); | 1325 | dprintk("policy current frequency %d kHz\n", pol->cur); |
1326 | 1326 | ||
1327 | /* min/max the cpu is capable of */ | 1327 | /* min/max the cpu is capable of */ |
1328 | if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { | 1328 | if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { |
1329 | printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n"); | 1329 | printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n"); |
1330 | powernow_k8_cpu_exit_acpi(data); | 1330 | powernow_k8_cpu_exit_acpi(data); |
1331 | kfree(data->powernow_table); | 1331 | kfree(data->powernow_table); |
1332 | kfree(data); | 1332 | kfree(data); |
1333 | return -EINVAL; | 1333 | return -EINVAL; |
1334 | } | 1334 | } |
1335 | 1335 | ||
1336 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | 1336 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); |
1337 | 1337 | ||
1338 | if (cpu_family == CPU_HW_PSTATE) | 1338 | if (cpu_family == CPU_HW_PSTATE) |
1339 | dprintk("cpu_init done, current pstate 0x%x\n", | 1339 | dprintk("cpu_init done, current pstate 0x%x\n", |
1340 | data->currpstate); | 1340 | data->currpstate); |
1341 | else | 1341 | else |
1342 | dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", | 1342 | dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", |
1343 | data->currfid, data->currvid); | 1343 | data->currfid, data->currvid); |
1344 | 1344 | ||
1345 | per_cpu(powernow_data, pol->cpu) = data; | 1345 | per_cpu(powernow_data, pol->cpu) = data; |
1346 | 1346 | ||
1347 | return 0; | 1347 | return 0; |
1348 | 1348 | ||
1349 | err_out_unmask: | 1349 | err_out_unmask: |
1350 | set_cpus_allowed_ptr(current, &oldmask); | 1350 | set_cpus_allowed_ptr(current, &oldmask); |
1351 | powernow_k8_cpu_exit_acpi(data); | 1351 | powernow_k8_cpu_exit_acpi(data); |
1352 | 1352 | ||
1353 | err_out: | 1353 | err_out: |
1354 | kfree(data); | 1354 | kfree(data); |
1355 | return -ENODEV; | 1355 | return -ENODEV; |
1356 | } | 1356 | } |
1357 | 1357 | ||
1358 | static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) | 1358 | static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) |
1359 | { | 1359 | { |
1360 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | 1360 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1361 | 1361 | ||
1362 | if (!data) | 1362 | if (!data) |
1363 | return -EINVAL; | 1363 | return -EINVAL; |
1364 | 1364 | ||
1365 | powernow_k8_cpu_exit_acpi(data); | 1365 | powernow_k8_cpu_exit_acpi(data); |
1366 | 1366 | ||
1367 | cpufreq_frequency_table_put_attr(pol->cpu); | 1367 | cpufreq_frequency_table_put_attr(pol->cpu); |
1368 | 1368 | ||
1369 | kfree(data->powernow_table); | 1369 | kfree(data->powernow_table); |
1370 | kfree(data); | 1370 | kfree(data); |
1371 | 1371 | ||
1372 | return 0; | 1372 | return 0; |
1373 | } | 1373 | } |
1374 | 1374 | ||
1375 | static unsigned int powernowk8_get(unsigned int cpu) | 1375 | static unsigned int powernowk8_get(unsigned int cpu) |
1376 | { | 1376 | { |
1377 | struct powernow_k8_data *data; | 1377 | struct powernow_k8_data *data; |
1378 | cpumask_t oldmask = current->cpus_allowed; | 1378 | cpumask_t oldmask = current->cpus_allowed; |
1379 | unsigned int khz = 0; | 1379 | unsigned int khz = 0; |
1380 | unsigned int first; | 1380 | unsigned int first; |
1381 | 1381 | ||
1382 | first = cpumask_first(cpu_core_mask(cpu)); | 1382 | first = cpumask_first(cpu_core_mask(cpu)); |
1383 | data = per_cpu(powernow_data, first); | 1383 | data = per_cpu(powernow_data, first); |
1384 | 1384 | ||
1385 | if (!data) | 1385 | if (!data) |
1386 | return -EINVAL; | 1386 | return -EINVAL; |
1387 | 1387 | ||
1388 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 1388 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
1389 | if (smp_processor_id() != cpu) { | 1389 | if (smp_processor_id() != cpu) { |
1390 | printk(KERN_ERR PFX | 1390 | printk(KERN_ERR PFX |
1391 | "limiting to CPU %d failed in powernowk8_get\n", cpu); | 1391 | "limiting to CPU %d failed in powernowk8_get\n", cpu); |
1392 | set_cpus_allowed_ptr(current, &oldmask); | 1392 | set_cpus_allowed_ptr(current, &oldmask); |
1393 | return 0; | 1393 | return 0; |
1394 | } | 1394 | } |
1395 | 1395 | ||
1396 | if (query_current_values_with_pending_wait(data)) | 1396 | if (query_current_values_with_pending_wait(data)) |
1397 | goto out; | 1397 | goto out; |
1398 | 1398 | ||
1399 | if (cpu_family == CPU_HW_PSTATE) | 1399 | if (cpu_family == CPU_HW_PSTATE) |
1400 | khz = find_khz_freq_from_pstate(data->powernow_table, | 1400 | khz = find_khz_freq_from_pstate(data->powernow_table, |
1401 | data->currpstate); | 1401 | data->currpstate); |
1402 | else | 1402 | else |
1403 | khz = find_khz_freq_from_fid(data->currfid); | 1403 | khz = find_khz_freq_from_fid(data->currfid); |
1404 | 1404 | ||
1405 | 1405 | ||
1406 | out: | 1406 | out: |
1407 | set_cpus_allowed_ptr(current, &oldmask); | 1407 | set_cpus_allowed_ptr(current, &oldmask); |
1408 | return khz; | 1408 | return khz; |
1409 | } | 1409 | } |
1410 | 1410 | ||
1411 | static struct freq_attr *powernow_k8_attr[] = { | 1411 | static struct freq_attr *powernow_k8_attr[] = { |
1412 | &cpufreq_freq_attr_scaling_available_freqs, | 1412 | &cpufreq_freq_attr_scaling_available_freqs, |
1413 | NULL, | 1413 | NULL, |
1414 | }; | 1414 | }; |
1415 | 1415 | ||
1416 | static struct cpufreq_driver cpufreq_amd64_driver = { | 1416 | static struct cpufreq_driver cpufreq_amd64_driver = { |
1417 | .verify = powernowk8_verify, | 1417 | .verify = powernowk8_verify, |
1418 | .target = powernowk8_target, | 1418 | .target = powernowk8_target, |
1419 | .init = powernowk8_cpu_init, | 1419 | .init = powernowk8_cpu_init, |
1420 | .exit = __devexit_p(powernowk8_cpu_exit), | 1420 | .exit = __devexit_p(powernowk8_cpu_exit), |
1421 | .get = powernowk8_get, | 1421 | .get = powernowk8_get, |
1422 | .name = "powernow-k8", | 1422 | .name = "powernow-k8", |
1423 | .owner = THIS_MODULE, | 1423 | .owner = THIS_MODULE, |
1424 | .attr = powernow_k8_attr, | 1424 | .attr = powernow_k8_attr, |
1425 | }; | 1425 | }; |
1426 | 1426 | ||
1427 | /* driver entry point for init */ | 1427 | /* driver entry point for init */ |
1428 | static int __cpuinit powernowk8_init(void) | 1428 | static int __cpuinit powernowk8_init(void) |
1429 | { | 1429 | { |
1430 | unsigned int i, supported_cpus = 0; | 1430 | unsigned int i, supported_cpus = 0; |
1431 | 1431 | ||
1432 | for_each_online_cpu(i) { | 1432 | for_each_online_cpu(i) { |
1433 | if (check_supported_cpu(i)) | 1433 | if (check_supported_cpu(i)) |
1434 | supported_cpus++; | 1434 | supported_cpus++; |
1435 | } | 1435 | } |
1436 | 1436 | ||
1437 | if (supported_cpus == num_online_cpus()) { | 1437 | if (supported_cpus == num_online_cpus()) { |
1438 | printk(KERN_INFO PFX "Found %d %s " | 1438 | printk(KERN_INFO PFX "Found %d %s " |
1439 | "processors (%d cpu cores) (" VERSION ")\n", | 1439 | "processors (%d cpu cores) (" VERSION ")\n", |
1440 | num_online_nodes(), | 1440 | num_online_nodes(), |
1441 | boot_cpu_data.x86_model_id, supported_cpus); | 1441 | boot_cpu_data.x86_model_id, supported_cpus); |
1442 | return cpufreq_register_driver(&cpufreq_amd64_driver); | 1442 | return cpufreq_register_driver(&cpufreq_amd64_driver); |
1443 | } | 1443 | } |
1444 | 1444 | ||
1445 | return -ENODEV; | 1445 | return -ENODEV; |
1446 | } | 1446 | } |
1447 | 1447 | ||
1448 | /* driver entry point for term */ | 1448 | /* driver entry point for term */ |
1449 | static void __exit powernowk8_exit(void) | 1449 | static void __exit powernowk8_exit(void) |
1450 | { | 1450 | { |
1451 | dprintk("exit\n"); | 1451 | dprintk("exit\n"); |
1452 | 1452 | ||
1453 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1453 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
1454 | } | 1454 | } |
1455 | 1455 | ||
1456 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and " | 1456 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and " |
1457 | "Mark Langsdorf <mark.langsdorf@amd.com>"); | 1457 | "Mark Langsdorf <mark.langsdorf@amd.com>"); |
1458 | MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); | 1458 | MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); |
1459 | MODULE_LICENSE("GPL"); | 1459 | MODULE_LICENSE("GPL"); |
1460 | 1460 | ||
1461 | late_initcall(powernowk8_init); | 1461 | late_initcall(powernowk8_init); |
1462 | module_exit(powernowk8_exit); | 1462 | module_exit(powernowk8_exit); |
1463 | 1463 |
arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
1 | /* | 1 | /* |
2 | * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium | 2 | * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium |
3 | * M (part of the Centrino chipset). | 3 | * M (part of the Centrino chipset). |
4 | * | 4 | * |
5 | * Since the original Pentium M, most new Intel CPUs support Enhanced | 5 | * Since the original Pentium M, most new Intel CPUs support Enhanced |
6 | * SpeedStep. | 6 | * SpeedStep. |
7 | * | 7 | * |
8 | * Despite the "SpeedStep" in the name, this is almost entirely unlike | 8 | * Despite the "SpeedStep" in the name, this is almost entirely unlike |
9 | * traditional SpeedStep. | 9 | * traditional SpeedStep. |
10 | * | 10 | * |
11 | * Modelled on speedstep.c | 11 | * Modelled on speedstep.c |
12 | * | 12 | * |
13 | * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> | 13 | * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/init.h> | 18 | #include <linux/init.h> |
19 | #include <linux/cpufreq.h> | 19 | #include <linux/cpufreq.h> |
20 | #include <linux/sched.h> /* current */ | 20 | #include <linux/sched.h> /* current */ |
21 | #include <linux/delay.h> | 21 | #include <linux/delay.h> |
22 | #include <linux/compiler.h> | 22 | #include <linux/compiler.h> |
23 | 23 | ||
24 | #include <asm/msr.h> | 24 | #include <asm/msr.h> |
25 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
26 | #include <asm/cpufeature.h> | 26 | #include <asm/cpufeature.h> |
27 | 27 | ||
28 | #define PFX "speedstep-centrino: " | 28 | #define PFX "speedstep-centrino: " |
29 | #define MAINTAINER "cpufreq@vger.kernel.org" | 29 | #define MAINTAINER "cpufreq@vger.kernel.org" |
30 | 30 | ||
31 | #define dprintk(msg...) \ | 31 | #define dprintk(msg...) \ |
32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | 32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) |
33 | 33 | ||
34 | #define INTEL_MSR_RANGE (0xffff) | 34 | #define INTEL_MSR_RANGE (0xffff) |
35 | 35 | ||
36 | struct cpu_id | 36 | struct cpu_id |
37 | { | 37 | { |
38 | __u8 x86; /* CPU family */ | 38 | __u8 x86; /* CPU family */ |
39 | __u8 x86_model; /* model */ | 39 | __u8 x86_model; /* model */ |
40 | __u8 x86_mask; /* stepping */ | 40 | __u8 x86_mask; /* stepping */ |
41 | }; | 41 | }; |
42 | 42 | ||
43 | enum { | 43 | enum { |
44 | CPU_BANIAS, | 44 | CPU_BANIAS, |
45 | CPU_DOTHAN_A1, | 45 | CPU_DOTHAN_A1, |
46 | CPU_DOTHAN_A2, | 46 | CPU_DOTHAN_A2, |
47 | CPU_DOTHAN_B0, | 47 | CPU_DOTHAN_B0, |
48 | CPU_MP4HT_D0, | 48 | CPU_MP4HT_D0, |
49 | CPU_MP4HT_E0, | 49 | CPU_MP4HT_E0, |
50 | }; | 50 | }; |
51 | 51 | ||
52 | static const struct cpu_id cpu_ids[] = { | 52 | static const struct cpu_id cpu_ids[] = { |
53 | [CPU_BANIAS] = { 6, 9, 5 }, | 53 | [CPU_BANIAS] = { 6, 9, 5 }, |
54 | [CPU_DOTHAN_A1] = { 6, 13, 1 }, | 54 | [CPU_DOTHAN_A1] = { 6, 13, 1 }, |
55 | [CPU_DOTHAN_A2] = { 6, 13, 2 }, | 55 | [CPU_DOTHAN_A2] = { 6, 13, 2 }, |
56 | [CPU_DOTHAN_B0] = { 6, 13, 6 }, | 56 | [CPU_DOTHAN_B0] = { 6, 13, 6 }, |
57 | [CPU_MP4HT_D0] = {15, 3, 4 }, | 57 | [CPU_MP4HT_D0] = {15, 3, 4 }, |
58 | [CPU_MP4HT_E0] = {15, 4, 1 }, | 58 | [CPU_MP4HT_E0] = {15, 4, 1 }, |
59 | }; | 59 | }; |
60 | #define N_IDS ARRAY_SIZE(cpu_ids) | 60 | #define N_IDS ARRAY_SIZE(cpu_ids) |
61 | 61 | ||
62 | struct cpu_model | 62 | struct cpu_model |
63 | { | 63 | { |
64 | const struct cpu_id *cpu_id; | 64 | const struct cpu_id *cpu_id; |
65 | const char *model_name; | 65 | const char *model_name; |
66 | unsigned max_freq; /* max clock in kHz */ | 66 | unsigned max_freq; /* max clock in kHz */ |
67 | 67 | ||
68 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ | 68 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ |
69 | }; | 69 | }; |
70 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, | 70 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
71 | const struct cpu_id *x); | 71 | const struct cpu_id *x); |
72 | 72 | ||
73 | /* Operating points for current CPU */ | 73 | /* Operating points for current CPU */ |
74 | static DEFINE_PER_CPU(struct cpu_model *, centrino_model); | 74 | static DEFINE_PER_CPU(struct cpu_model *, centrino_model); |
75 | static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); | 75 | static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); |
76 | 76 | ||
77 | static struct cpufreq_driver centrino_driver; | 77 | static struct cpufreq_driver centrino_driver; |
78 | 78 | ||
79 | #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE | 79 | #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE |
80 | 80 | ||
81 | /* Computes the correct form for IA32_PERF_CTL MSR for a particular | 81 | /* Computes the correct form for IA32_PERF_CTL MSR for a particular |
82 | frequency/voltage operating point; frequency in MHz, volts in mV. | 82 | frequency/voltage operating point; frequency in MHz, volts in mV. |
83 | This is stored as "index" in the structure. */ | 83 | This is stored as "index" in the structure. */ |
84 | #define OP(mhz, mv) \ | 84 | #define OP(mhz, mv) \ |
85 | { \ | 85 | { \ |
86 | .frequency = (mhz) * 1000, \ | 86 | .frequency = (mhz) * 1000, \ |
87 | .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ | 87 | .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ |
88 | } | 88 | } |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * These voltage tables were derived from the Intel Pentium M | 91 | * These voltage tables were derived from the Intel Pentium M |
92 | * datasheet, document 25261202.pdf, Table 5. I have verified they | 92 | * datasheet, document 25261202.pdf, Table 5. I have verified they |
93 | * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium | 93 | * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium |
94 | * M. | 94 | * M. |
95 | */ | 95 | */ |
96 | 96 | ||
97 | /* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ | 97 | /* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ |
98 | static struct cpufreq_frequency_table banias_900[] = | 98 | static struct cpufreq_frequency_table banias_900[] = |
99 | { | 99 | { |
100 | OP(600, 844), | 100 | OP(600, 844), |
101 | OP(800, 988), | 101 | OP(800, 988), |
102 | OP(900, 1004), | 102 | OP(900, 1004), |
103 | { .frequency = CPUFREQ_TABLE_END } | 103 | { .frequency = CPUFREQ_TABLE_END } |
104 | }; | 104 | }; |
105 | 105 | ||
106 | /* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ | 106 | /* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ |
107 | static struct cpufreq_frequency_table banias_1000[] = | 107 | static struct cpufreq_frequency_table banias_1000[] = |
108 | { | 108 | { |
109 | OP(600, 844), | 109 | OP(600, 844), |
110 | OP(800, 972), | 110 | OP(800, 972), |
111 | OP(900, 988), | 111 | OP(900, 988), |
112 | OP(1000, 1004), | 112 | OP(1000, 1004), |
113 | { .frequency = CPUFREQ_TABLE_END } | 113 | { .frequency = CPUFREQ_TABLE_END } |
114 | }; | 114 | }; |
115 | 115 | ||
116 | /* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ | 116 | /* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ |
117 | static struct cpufreq_frequency_table banias_1100[] = | 117 | static struct cpufreq_frequency_table banias_1100[] = |
118 | { | 118 | { |
119 | OP( 600, 956), | 119 | OP( 600, 956), |
120 | OP( 800, 1020), | 120 | OP( 800, 1020), |
121 | OP( 900, 1100), | 121 | OP( 900, 1100), |
122 | OP(1000, 1164), | 122 | OP(1000, 1164), |
123 | OP(1100, 1180), | 123 | OP(1100, 1180), |
124 | { .frequency = CPUFREQ_TABLE_END } | 124 | { .frequency = CPUFREQ_TABLE_END } |
125 | }; | 125 | }; |
126 | 126 | ||
127 | 127 | ||
128 | /* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ | 128 | /* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ |
129 | static struct cpufreq_frequency_table banias_1200[] = | 129 | static struct cpufreq_frequency_table banias_1200[] = |
130 | { | 130 | { |
131 | OP( 600, 956), | 131 | OP( 600, 956), |
132 | OP( 800, 1004), | 132 | OP( 800, 1004), |
133 | OP( 900, 1020), | 133 | OP( 900, 1020), |
134 | OP(1000, 1100), | 134 | OP(1000, 1100), |
135 | OP(1100, 1164), | 135 | OP(1100, 1164), |
136 | OP(1200, 1180), | 136 | OP(1200, 1180), |
137 | { .frequency = CPUFREQ_TABLE_END } | 137 | { .frequency = CPUFREQ_TABLE_END } |
138 | }; | 138 | }; |
139 | 139 | ||
140 | /* Intel Pentium M processor 1.30GHz (Banias) */ | 140 | /* Intel Pentium M processor 1.30GHz (Banias) */ |
141 | static struct cpufreq_frequency_table banias_1300[] = | 141 | static struct cpufreq_frequency_table banias_1300[] = |
142 | { | 142 | { |
143 | OP( 600, 956), | 143 | OP( 600, 956), |
144 | OP( 800, 1260), | 144 | OP( 800, 1260), |
145 | OP(1000, 1292), | 145 | OP(1000, 1292), |
146 | OP(1200, 1356), | 146 | OP(1200, 1356), |
147 | OP(1300, 1388), | 147 | OP(1300, 1388), |
148 | { .frequency = CPUFREQ_TABLE_END } | 148 | { .frequency = CPUFREQ_TABLE_END } |
149 | }; | 149 | }; |
150 | 150 | ||
151 | /* Intel Pentium M processor 1.40GHz (Banias) */ | 151 | /* Intel Pentium M processor 1.40GHz (Banias) */ |
152 | static struct cpufreq_frequency_table banias_1400[] = | 152 | static struct cpufreq_frequency_table banias_1400[] = |
153 | { | 153 | { |
154 | OP( 600, 956), | 154 | OP( 600, 956), |
155 | OP( 800, 1180), | 155 | OP( 800, 1180), |
156 | OP(1000, 1308), | 156 | OP(1000, 1308), |
157 | OP(1200, 1436), | 157 | OP(1200, 1436), |
158 | OP(1400, 1484), | 158 | OP(1400, 1484), |
159 | { .frequency = CPUFREQ_TABLE_END } | 159 | { .frequency = CPUFREQ_TABLE_END } |
160 | }; | 160 | }; |
161 | 161 | ||
162 | /* Intel Pentium M processor 1.50GHz (Banias) */ | 162 | /* Intel Pentium M processor 1.50GHz (Banias) */ |
163 | static struct cpufreq_frequency_table banias_1500[] = | 163 | static struct cpufreq_frequency_table banias_1500[] = |
164 | { | 164 | { |
165 | OP( 600, 956), | 165 | OP( 600, 956), |
166 | OP( 800, 1116), | 166 | OP( 800, 1116), |
167 | OP(1000, 1228), | 167 | OP(1000, 1228), |
168 | OP(1200, 1356), | 168 | OP(1200, 1356), |
169 | OP(1400, 1452), | 169 | OP(1400, 1452), |
170 | OP(1500, 1484), | 170 | OP(1500, 1484), |
171 | { .frequency = CPUFREQ_TABLE_END } | 171 | { .frequency = CPUFREQ_TABLE_END } |
172 | }; | 172 | }; |
173 | 173 | ||
174 | /* Intel Pentium M processor 1.60GHz (Banias) */ | 174 | /* Intel Pentium M processor 1.60GHz (Banias) */ |
175 | static struct cpufreq_frequency_table banias_1600[] = | 175 | static struct cpufreq_frequency_table banias_1600[] = |
176 | { | 176 | { |
177 | OP( 600, 956), | 177 | OP( 600, 956), |
178 | OP( 800, 1036), | 178 | OP( 800, 1036), |
179 | OP(1000, 1164), | 179 | OP(1000, 1164), |
180 | OP(1200, 1276), | 180 | OP(1200, 1276), |
181 | OP(1400, 1420), | 181 | OP(1400, 1420), |
182 | OP(1600, 1484), | 182 | OP(1600, 1484), |
183 | { .frequency = CPUFREQ_TABLE_END } | 183 | { .frequency = CPUFREQ_TABLE_END } |
184 | }; | 184 | }; |
185 | 185 | ||
186 | /* Intel Pentium M processor 1.70GHz (Banias) */ | 186 | /* Intel Pentium M processor 1.70GHz (Banias) */ |
187 | static struct cpufreq_frequency_table banias_1700[] = | 187 | static struct cpufreq_frequency_table banias_1700[] = |
188 | { | 188 | { |
189 | OP( 600, 956), | 189 | OP( 600, 956), |
190 | OP( 800, 1004), | 190 | OP( 800, 1004), |
191 | OP(1000, 1116), | 191 | OP(1000, 1116), |
192 | OP(1200, 1228), | 192 | OP(1200, 1228), |
193 | OP(1400, 1308), | 193 | OP(1400, 1308), |
194 | OP(1700, 1484), | 194 | OP(1700, 1484), |
195 | { .frequency = CPUFREQ_TABLE_END } | 195 | { .frequency = CPUFREQ_TABLE_END } |
196 | }; | 196 | }; |
197 | #undef OP | 197 | #undef OP |
198 | 198 | ||
199 | #define _BANIAS(cpuid, max, name) \ | 199 | #define _BANIAS(cpuid, max, name) \ |
200 | { .cpu_id = cpuid, \ | 200 | { .cpu_id = cpuid, \ |
201 | .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ | 201 | .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ |
202 | .max_freq = (max)*1000, \ | 202 | .max_freq = (max)*1000, \ |
203 | .op_points = banias_##max, \ | 203 | .op_points = banias_##max, \ |
204 | } | 204 | } |
205 | #define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) | 205 | #define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) |
206 | 206 | ||
207 | /* CPU models, their operating frequency range, and freq/voltage | 207 | /* CPU models, their operating frequency range, and freq/voltage |
208 | operating points */ | 208 | operating points */ |
209 | static struct cpu_model models[] = | 209 | static struct cpu_model models[] = |
210 | { | 210 | { |
211 | _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), | 211 | _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), |
212 | BANIAS(1000), | 212 | BANIAS(1000), |
213 | BANIAS(1100), | 213 | BANIAS(1100), |
214 | BANIAS(1200), | 214 | BANIAS(1200), |
215 | BANIAS(1300), | 215 | BANIAS(1300), |
216 | BANIAS(1400), | 216 | BANIAS(1400), |
217 | BANIAS(1500), | 217 | BANIAS(1500), |
218 | BANIAS(1600), | 218 | BANIAS(1600), |
219 | BANIAS(1700), | 219 | BANIAS(1700), |
220 | 220 | ||
221 | /* NULL model_name is a wildcard */ | 221 | /* NULL model_name is a wildcard */ |
222 | { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, | 222 | { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, |
223 | { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, | 223 | { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, |
224 | { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, | 224 | { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, |
225 | { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, | 225 | { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, |
226 | { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, | 226 | { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, |
227 | 227 | ||
228 | { NULL, } | 228 | { NULL, } |
229 | }; | 229 | }; |
230 | #undef _BANIAS | 230 | #undef _BANIAS |
231 | #undef BANIAS | 231 | #undef BANIAS |
232 | 232 | ||
233 | static int centrino_cpu_init_table(struct cpufreq_policy *policy) | 233 | static int centrino_cpu_init_table(struct cpufreq_policy *policy) |
234 | { | 234 | { |
235 | struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); | 235 | struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); |
236 | struct cpu_model *model; | 236 | struct cpu_model *model; |
237 | 237 | ||
238 | for(model = models; model->cpu_id != NULL; model++) | 238 | for(model = models; model->cpu_id != NULL; model++) |
239 | if (centrino_verify_cpu_id(cpu, model->cpu_id) && | 239 | if (centrino_verify_cpu_id(cpu, model->cpu_id) && |
240 | (model->model_name == NULL || | 240 | (model->model_name == NULL || |
241 | strcmp(cpu->x86_model_id, model->model_name) == 0)) | 241 | strcmp(cpu->x86_model_id, model->model_name) == 0)) |
242 | break; | 242 | break; |
243 | 243 | ||
244 | if (model->cpu_id == NULL) { | 244 | if (model->cpu_id == NULL) { |
245 | /* No match at all */ | 245 | /* No match at all */ |
246 | dprintk("no support for CPU model \"%s\": " | 246 | dprintk("no support for CPU model \"%s\": " |
247 | "send /proc/cpuinfo to " MAINTAINER "\n", | 247 | "send /proc/cpuinfo to " MAINTAINER "\n", |
248 | cpu->x86_model_id); | 248 | cpu->x86_model_id); |
249 | return -ENOENT; | 249 | return -ENOENT; |
250 | } | 250 | } |
251 | 251 | ||
252 | if (model->op_points == NULL) { | 252 | if (model->op_points == NULL) { |
253 | /* Matched a non-match */ | 253 | /* Matched a non-match */ |
254 | dprintk("no table support for CPU model \"%s\"\n", | 254 | dprintk("no table support for CPU model \"%s\"\n", |
255 | cpu->x86_model_id); | 255 | cpu->x86_model_id); |
256 | dprintk("try using the acpi-cpufreq driver\n"); | 256 | dprintk("try using the acpi-cpufreq driver\n"); |
257 | return -ENOENT; | 257 | return -ENOENT; |
258 | } | 258 | } |
259 | 259 | ||
260 | per_cpu(centrino_model, policy->cpu) = model; | 260 | per_cpu(centrino_model, policy->cpu) = model; |
261 | 261 | ||
262 | dprintk("found \"%s\": max frequency: %dkHz\n", | 262 | dprintk("found \"%s\": max frequency: %dkHz\n", |
263 | model->model_name, model->max_freq); | 263 | model->model_name, model->max_freq); |
264 | 264 | ||
265 | return 0; | 265 | return 0; |
266 | } | 266 | } |
267 | 267 | ||
268 | #else | 268 | #else |
269 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) | 269 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) |
270 | { | 270 | { |
271 | return -ENODEV; | 271 | return -ENODEV; |
272 | } | 272 | } |
273 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ | 273 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ |
274 | 274 | ||
275 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, | 275 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
276 | const struct cpu_id *x) | 276 | const struct cpu_id *x) |
277 | { | 277 | { |
278 | if ((c->x86 == x->x86) && | 278 | if ((c->x86 == x->x86) && |
279 | (c->x86_model == x->x86_model) && | 279 | (c->x86_model == x->x86_model) && |
280 | (c->x86_mask == x->x86_mask)) | 280 | (c->x86_mask == x->x86_mask)) |
281 | return 1; | 281 | return 1; |
282 | return 0; | 282 | return 0; |
283 | } | 283 | } |
284 | 284 | ||
285 | /* To be called only after centrino_model is initialized */ | 285 | /* To be called only after centrino_model is initialized */ |
286 | static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) | 286 | static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) |
287 | { | 287 | { |
288 | int i; | 288 | int i; |
289 | 289 | ||
290 | /* | 290 | /* |
291 | * Extract clock in kHz from PERF_CTL value | 291 | * Extract clock in kHz from PERF_CTL value |
292 | * for centrino, as some DSDTs are buggy. | 292 | * for centrino, as some DSDTs are buggy. |
293 | * Ideally, this can be done using the acpi_data structure. | 293 | * Ideally, this can be done using the acpi_data structure. |
294 | */ | 294 | */ |
295 | if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || | 295 | if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || |
296 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || | 296 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || |
297 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { | 297 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { |
298 | msr = (msr >> 8) & 0xff; | 298 | msr = (msr >> 8) & 0xff; |
299 | return msr * 100000; | 299 | return msr * 100000; |
300 | } | 300 | } |
301 | 301 | ||
302 | if ((!per_cpu(centrino_model, cpu)) || | 302 | if ((!per_cpu(centrino_model, cpu)) || |
303 | (!per_cpu(centrino_model, cpu)->op_points)) | 303 | (!per_cpu(centrino_model, cpu)->op_points)) |
304 | return 0; | 304 | return 0; |
305 | 305 | ||
306 | msr &= 0xffff; | 306 | msr &= 0xffff; |
307 | for (i = 0; | 307 | for (i = 0; |
308 | per_cpu(centrino_model, cpu)->op_points[i].frequency | 308 | per_cpu(centrino_model, cpu)->op_points[i].frequency |
309 | != CPUFREQ_TABLE_END; | 309 | != CPUFREQ_TABLE_END; |
310 | i++) { | 310 | i++) { |
311 | if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) | 311 | if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) |
312 | return per_cpu(centrino_model, cpu)-> | 312 | return per_cpu(centrino_model, cpu)-> |
313 | op_points[i].frequency; | 313 | op_points[i].frequency; |
314 | } | 314 | } |
315 | if (failsafe) | 315 | if (failsafe) |
316 | return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; | 316 | return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; |
317 | else | 317 | else |
318 | return 0; | 318 | return 0; |
319 | } | 319 | } |
320 | 320 | ||
321 | /* Return the current CPU frequency in kHz */ | 321 | /* Return the current CPU frequency in kHz */ |
322 | static unsigned int get_cur_freq(unsigned int cpu) | 322 | static unsigned int get_cur_freq(unsigned int cpu) |
323 | { | 323 | { |
324 | unsigned l, h; | 324 | unsigned l, h; |
325 | unsigned clock_freq; | 325 | unsigned clock_freq; |
326 | cpumask_t saved_mask; | 326 | cpumask_t saved_mask; |
327 | 327 | ||
328 | saved_mask = current->cpus_allowed; | 328 | saved_mask = current->cpus_allowed; |
329 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 329 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
330 | if (smp_processor_id() != cpu) | 330 | if (smp_processor_id() != cpu) |
331 | return 0; | 331 | return 0; |
332 | 332 | ||
333 | rdmsr(MSR_IA32_PERF_STATUS, l, h); | 333 | rdmsr(MSR_IA32_PERF_STATUS, l, h); |
334 | clock_freq = extract_clock(l, cpu, 0); | 334 | clock_freq = extract_clock(l, cpu, 0); |
335 | 335 | ||
336 | if (unlikely(clock_freq == 0)) { | 336 | if (unlikely(clock_freq == 0)) { |
337 | /* | 337 | /* |
338 | * On some CPUs, we can see transient MSR values (which are | 338 | * On some CPUs, we can see transient MSR values (which are |
339 | * not present in _PSS), while CPU is doing some automatic | 339 | * not present in _PSS), while CPU is doing some automatic |
340 | * P-state transition (like TM2). Get the last freq set | 340 | * P-state transition (like TM2). Get the last freq set |
341 | * in PERF_CTL. | 341 | * in PERF_CTL. |
342 | */ | 342 | */ |
343 | rdmsr(MSR_IA32_PERF_CTL, l, h); | 343 | rdmsr(MSR_IA32_PERF_CTL, l, h); |
344 | clock_freq = extract_clock(l, cpu, 1); | 344 | clock_freq = extract_clock(l, cpu, 1); |
345 | } | 345 | } |
346 | 346 | ||
347 | set_cpus_allowed_ptr(current, &saved_mask); | 347 | set_cpus_allowed_ptr(current, &saved_mask); |
348 | return clock_freq; | 348 | return clock_freq; |
349 | } | 349 | } |
350 | 350 | ||
351 | 351 | ||
352 | static int centrino_cpu_init(struct cpufreq_policy *policy) | 352 | static int centrino_cpu_init(struct cpufreq_policy *policy) |
353 | { | 353 | { |
354 | struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); | 354 | struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); |
355 | unsigned freq; | 355 | unsigned freq; |
356 | unsigned l, h; | 356 | unsigned l, h; |
357 | int ret; | 357 | int ret; |
358 | int i; | 358 | int i; |
359 | 359 | ||
360 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ | 360 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ |
361 | if (cpu->x86_vendor != X86_VENDOR_INTEL || | 361 | if (cpu->x86_vendor != X86_VENDOR_INTEL || |
362 | !cpu_has(cpu, X86_FEATURE_EST)) | 362 | !cpu_has(cpu, X86_FEATURE_EST)) |
363 | return -ENODEV; | 363 | return -ENODEV; |
364 | 364 | ||
365 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) | 365 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) |
366 | centrino_driver.flags |= CPUFREQ_CONST_LOOPS; | 366 | centrino_driver.flags |= CPUFREQ_CONST_LOOPS; |
367 | 367 | ||
368 | if (policy->cpu != 0) | 368 | if (policy->cpu != 0) |
369 | return -ENODEV; | 369 | return -ENODEV; |
370 | 370 | ||
371 | for (i = 0; i < N_IDS; i++) | 371 | for (i = 0; i < N_IDS; i++) |
372 | if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) | 372 | if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) |
373 | break; | 373 | break; |
374 | 374 | ||
375 | if (i != N_IDS) | 375 | if (i != N_IDS) |
376 | per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; | 376 | per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; |
377 | 377 | ||
378 | if (!per_cpu(centrino_cpu, policy->cpu)) { | 378 | if (!per_cpu(centrino_cpu, policy->cpu)) { |
379 | dprintk("found unsupported CPU with " | 379 | dprintk("found unsupported CPU with " |
380 | "Enhanced SpeedStep: send /proc/cpuinfo to " | 380 | "Enhanced SpeedStep: send /proc/cpuinfo to " |
381 | MAINTAINER "\n"); | 381 | MAINTAINER "\n"); |
382 | return -ENODEV; | 382 | return -ENODEV; |
383 | } | 383 | } |
384 | 384 | ||
385 | if (centrino_cpu_init_table(policy)) { | 385 | if (centrino_cpu_init_table(policy)) { |
386 | return -ENODEV; | 386 | return -ENODEV; |
387 | } | 387 | } |
388 | 388 | ||
389 | /* Check to see if Enhanced SpeedStep is enabled, and try to | 389 | /* Check to see if Enhanced SpeedStep is enabled, and try to |
390 | enable it if not. */ | 390 | enable it if not. */ |
391 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 391 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
392 | 392 | ||
393 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | 393 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { |
394 | l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; | 394 | l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; |
395 | dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); | 395 | dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); |
396 | wrmsr(MSR_IA32_MISC_ENABLE, l, h); | 396 | wrmsr(MSR_IA32_MISC_ENABLE, l, h); |
397 | 397 | ||
398 | /* check to see if it stuck */ | 398 | /* check to see if it stuck */ |
399 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 399 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
400 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { | 400 | if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { |
401 | printk(KERN_INFO PFX | 401 | printk(KERN_INFO PFX |
402 | "couldn't enable Enhanced SpeedStep\n"); | 402 | "couldn't enable Enhanced SpeedStep\n"); |
403 | return -ENODEV; | 403 | return -ENODEV; |
404 | } | 404 | } |
405 | } | 405 | } |
406 | 406 | ||
407 | freq = get_cur_freq(policy->cpu); | 407 | freq = get_cur_freq(policy->cpu); |
408 | policy->cpuinfo.transition_latency = 10000; | 408 | policy->cpuinfo.transition_latency = 10000; |
409 | /* 10uS transition latency */ | 409 | /* 10uS transition latency */ |
410 | policy->cur = freq; | 410 | policy->cur = freq; |
411 | 411 | ||
412 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); | 412 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); |
413 | 413 | ||
414 | ret = cpufreq_frequency_table_cpuinfo(policy, | 414 | ret = cpufreq_frequency_table_cpuinfo(policy, |
415 | per_cpu(centrino_model, policy->cpu)->op_points); | 415 | per_cpu(centrino_model, policy->cpu)->op_points); |
416 | if (ret) | 416 | if (ret) |
417 | return (ret); | 417 | return (ret); |
418 | 418 | ||
419 | cpufreq_frequency_table_get_attr( | 419 | cpufreq_frequency_table_get_attr( |
420 | per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); | 420 | per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); |
421 | 421 | ||
422 | return 0; | 422 | return 0; |
423 | } | 423 | } |
424 | 424 | ||
425 | static int centrino_cpu_exit(struct cpufreq_policy *policy) | 425 | static int centrino_cpu_exit(struct cpufreq_policy *policy) |
426 | { | 426 | { |
427 | unsigned int cpu = policy->cpu; | 427 | unsigned int cpu = policy->cpu; |
428 | 428 | ||
429 | if (!per_cpu(centrino_model, cpu)) | 429 | if (!per_cpu(centrino_model, cpu)) |
430 | return -ENODEV; | 430 | return -ENODEV; |
431 | 431 | ||
432 | cpufreq_frequency_table_put_attr(cpu); | 432 | cpufreq_frequency_table_put_attr(cpu); |
433 | 433 | ||
434 | per_cpu(centrino_model, cpu) = NULL; | 434 | per_cpu(centrino_model, cpu) = NULL; |
435 | 435 | ||
436 | return 0; | 436 | return 0; |
437 | } | 437 | } |
438 | 438 | ||
439 | /** | 439 | /** |
440 | * centrino_verify - verifies a new CPUFreq policy | 440 | * centrino_verify - verifies a new CPUFreq policy |
441 | * @policy: new policy | 441 | * @policy: new policy |
442 | * | 442 | * |
443 | * Limit must be within this model's frequency range at least one | 443 | * Limit must be within this model's frequency range at least one |
444 | * border included. | 444 | * border included. |
445 | */ | 445 | */ |
446 | static int centrino_verify (struct cpufreq_policy *policy) | 446 | static int centrino_verify (struct cpufreq_policy *policy) |
447 | { | 447 | { |
448 | return cpufreq_frequency_table_verify(policy, | 448 | return cpufreq_frequency_table_verify(policy, |
449 | per_cpu(centrino_model, policy->cpu)->op_points); | 449 | per_cpu(centrino_model, policy->cpu)->op_points); |
450 | } | 450 | } |
451 | 451 | ||
452 | /** | 452 | /** |
453 | * centrino_setpolicy - set a new CPUFreq policy | 453 | * centrino_setpolicy - set a new CPUFreq policy |
454 | * @policy: new policy | 454 | * @policy: new policy |
455 | * @target_freq: the target frequency | 455 | * @target_freq: the target frequency |
456 | * @relation: how that frequency relates to achieved frequency | 456 | * @relation: how that frequency relates to achieved frequency |
457 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | 457 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) |
458 | * | 458 | * |
459 | * Sets a new CPUFreq policy. | 459 | * Sets a new CPUFreq policy. |
460 | */ | 460 | */ |
461 | static int centrino_target (struct cpufreq_policy *policy, | 461 | static int centrino_target (struct cpufreq_policy *policy, |
462 | unsigned int target_freq, | 462 | unsigned int target_freq, |
463 | unsigned int relation) | 463 | unsigned int relation) |
464 | { | 464 | { |
465 | unsigned int newstate = 0; | 465 | unsigned int newstate = 0; |
466 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; | 466 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; |
467 | struct cpufreq_freqs freqs; | 467 | struct cpufreq_freqs freqs; |
468 | int retval = 0; | 468 | int retval = 0; |
469 | unsigned int j, k, first_cpu, tmp; | 469 | unsigned int j, k, first_cpu, tmp; |
470 | cpumask_var_t saved_mask, covered_cpus; | 470 | cpumask_var_t saved_mask, covered_cpus; |
471 | 471 | ||
472 | if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) | 472 | if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) |
473 | return -ENOMEM; | 473 | return -ENOMEM; |
474 | if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { | 474 | if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { |
475 | free_cpumask_var(saved_mask); | 475 | free_cpumask_var(saved_mask); |
476 | return -ENOMEM; | 476 | return -ENOMEM; |
477 | } | 477 | } |
478 | cpumask_copy(saved_mask, ¤t->cpus_allowed); | 478 | cpumask_copy(saved_mask, ¤t->cpus_allowed); |
479 | 479 | ||
480 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | 480 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { |
481 | retval = -ENODEV; | 481 | retval = -ENODEV; |
482 | goto out; | 482 | goto out; |
483 | } | 483 | } |
484 | 484 | ||
485 | if (unlikely(cpufreq_frequency_table_target(policy, | 485 | if (unlikely(cpufreq_frequency_table_target(policy, |
486 | per_cpu(centrino_model, cpu)->op_points, | 486 | per_cpu(centrino_model, cpu)->op_points, |
487 | target_freq, | 487 | target_freq, |
488 | relation, | 488 | relation, |
489 | &newstate))) { | 489 | &newstate))) { |
490 | retval = -EINVAL; | 490 | retval = -EINVAL; |
491 | goto out; | 491 | goto out; |
492 | } | 492 | } |
493 | 493 | ||
494 | first_cpu = 1; | 494 | first_cpu = 1; |
495 | for_each_cpu(j, policy->cpus) { | 495 | for_each_cpu(j, policy->cpus) { |
496 | const struct cpumask *mask; | 496 | const struct cpumask *mask; |
497 | 497 | ||
498 | /* cpufreq holds the hotplug lock, so we are safe here */ | 498 | /* cpufreq holds the hotplug lock, so we are safe here */ |
499 | if (!cpu_online(j)) | 499 | if (!cpu_online(j)) |
500 | continue; | 500 | continue; |
501 | 501 | ||
502 | /* | 502 | /* |
503 | * Support for SMP systems. | 503 | * Support for SMP systems. |
504 | * Make sure we are running on CPU that wants to change freq | 504 | * Make sure we are running on CPU that wants to change freq |
505 | */ | 505 | */ |
506 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | 506 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
507 | mask = policy->cpus; | 507 | mask = policy->cpus; |
508 | else | 508 | else |
509 | mask = cpumask_of(j); | 509 | mask = cpumask_of(j); |
510 | 510 | ||
511 | set_cpus_allowed_ptr(current, mask); | 511 | set_cpus_allowed_ptr(current, mask); |
512 | preempt_disable(); | 512 | preempt_disable(); |
513 | if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { | 513 | if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { |
514 | dprintk("couldn't limit to CPUs in this domain\n"); | 514 | dprintk("couldn't limit to CPUs in this domain\n"); |
515 | retval = -EAGAIN; | 515 | retval = -EAGAIN; |
516 | if (first_cpu) { | 516 | if (first_cpu) { |
517 | /* We haven't started the transition yet. */ | 517 | /* We haven't started the transition yet. */ |
518 | goto migrate_end; | 518 | goto migrate_end; |
519 | } | 519 | } |
520 | preempt_enable(); | 520 | preempt_enable(); |
521 | break; | 521 | break; |
522 | } | 522 | } |
523 | 523 | ||
524 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; | 524 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; |
525 | 525 | ||
526 | if (first_cpu) { | 526 | if (first_cpu) { |
527 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 527 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
528 | if (msr == (oldmsr & 0xffff)) { | 528 | if (msr == (oldmsr & 0xffff)) { |
529 | dprintk("no change needed - msr was and needs " | 529 | dprintk("no change needed - msr was and needs " |
530 | "to be %x\n", oldmsr); | 530 | "to be %x\n", oldmsr); |
531 | retval = 0; | 531 | retval = 0; |
532 | goto migrate_end; | 532 | goto migrate_end; |
533 | } | 533 | } |
534 | 534 | ||
535 | freqs.old = extract_clock(oldmsr, cpu, 0); | 535 | freqs.old = extract_clock(oldmsr, cpu, 0); |
536 | freqs.new = extract_clock(msr, cpu, 0); | 536 | freqs.new = extract_clock(msr, cpu, 0); |
537 | 537 | ||
538 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | 538 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", |
539 | target_freq, freqs.old, freqs.new, msr); | 539 | target_freq, freqs.old, freqs.new, msr); |
540 | 540 | ||
541 | for_each_cpu(k, policy->cpus) { | 541 | for_each_cpu(k, policy->cpus) { |
542 | if (!cpu_online(k)) | 542 | if (!cpu_online(k)) |
543 | continue; | 543 | continue; |
544 | freqs.cpu = k; | 544 | freqs.cpu = k; |
545 | cpufreq_notify_transition(&freqs, | 545 | cpufreq_notify_transition(&freqs, |
546 | CPUFREQ_PRECHANGE); | 546 | CPUFREQ_PRECHANGE); |
547 | } | 547 | } |
548 | 548 | ||
549 | first_cpu = 0; | 549 | first_cpu = 0; |
550 | /* all but 16 LSB are reserved, treat them with care */ | 550 | /* all but 16 LSB are reserved, treat them with care */ |
551 | oldmsr &= ~0xffff; | 551 | oldmsr &= ~0xffff; |
552 | msr &= 0xffff; | 552 | msr &= 0xffff; |
553 | oldmsr |= msr; | 553 | oldmsr |= msr; |
554 | } | 554 | } |
555 | 555 | ||
556 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 556 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
557 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { | 557 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { |
558 | preempt_enable(); | 558 | preempt_enable(); |
559 | break; | 559 | break; |
560 | } | 560 | } |
561 | 561 | ||
562 | cpu_set(j, *covered_cpus); | 562 | cpu_set(j, *covered_cpus); |
563 | preempt_enable(); | 563 | preempt_enable(); |
564 | } | 564 | } |
565 | 565 | ||
566 | for_each_cpu(k, policy->cpus) { | 566 | for_each_cpu(k, policy->cpus) { |
567 | if (!cpu_online(k)) | 567 | if (!cpu_online(k)) |
568 | continue; | 568 | continue; |
569 | freqs.cpu = k; | 569 | freqs.cpu = k; |
570 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 570 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
571 | } | 571 | } |
572 | 572 | ||
573 | if (unlikely(retval)) { | 573 | if (unlikely(retval)) { |
574 | /* | 574 | /* |
575 | * We have failed halfway through the frequency change. | 575 | * We have failed halfway through the frequency change. |
576 | * We have sent callbacks to policy->cpus and | 576 | * We have sent callbacks to policy->cpus and |
577 | * MSRs have already been written on coverd_cpus. | 577 | * MSRs have already been written on coverd_cpus. |
578 | * Best effort undo.. | 578 | * Best effort undo.. |
579 | */ | 579 | */ |
580 | 580 | ||
581 | for_each_cpu_mask_nr(j, *covered_cpus) { | 581 | for_each_cpu_mask_nr(j, *covered_cpus) { |
582 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); | 582 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); |
583 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 583 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
584 | } | 584 | } |
585 | 585 | ||
586 | tmp = freqs.new; | 586 | tmp = freqs.new; |
587 | freqs.new = freqs.old; | 587 | freqs.new = freqs.old; |
588 | freqs.old = tmp; | 588 | freqs.old = tmp; |
589 | for_each_cpu(j, policy->cpus) { | 589 | for_each_cpu(j, policy->cpus) { |
590 | if (!cpu_online(j)) | 590 | if (!cpu_online(j)) |
591 | continue; | 591 | continue; |
592 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 592 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
593 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 593 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
594 | } | 594 | } |
595 | } | 595 | } |
596 | set_cpus_allowed_ptr(current, saved_mask); | 596 | set_cpus_allowed_ptr(current, saved_mask); |
597 | retval = 0; | 597 | retval = 0; |
598 | goto out; | 598 | goto out; |
599 | 599 | ||
600 | migrate_end: | 600 | migrate_end: |
601 | preempt_enable(); | 601 | preempt_enable(); |
602 | set_cpus_allowed_ptr(current, saved_mask); | 602 | set_cpus_allowed_ptr(current, saved_mask); |
603 | out: | 603 | out: |
604 | free_cpumask_var(saved_mask); | 604 | free_cpumask_var(saved_mask); |
605 | free_cpumask_var(covered_cpus); | 605 | free_cpumask_var(covered_cpus); |
606 | return retval; | 606 | return retval; |
607 | } | 607 | } |
608 | 608 | ||
609 | static struct freq_attr* centrino_attr[] = { | 609 | static struct freq_attr* centrino_attr[] = { |
610 | &cpufreq_freq_attr_scaling_available_freqs, | 610 | &cpufreq_freq_attr_scaling_available_freqs, |
611 | NULL, | 611 | NULL, |
612 | }; | 612 | }; |
613 | 613 | ||
614 | static struct cpufreq_driver centrino_driver = { | 614 | static struct cpufreq_driver centrino_driver = { |
615 | .name = "centrino", /* should be speedstep-centrino, | 615 | .name = "centrino", /* should be speedstep-centrino, |
616 | but there's a 16 char limit */ | 616 | but there's a 16 char limit */ |
617 | .init = centrino_cpu_init, | 617 | .init = centrino_cpu_init, |
618 | .exit = centrino_cpu_exit, | 618 | .exit = centrino_cpu_exit, |
619 | .verify = centrino_verify, | 619 | .verify = centrino_verify, |
620 | .target = centrino_target, | 620 | .target = centrino_target, |
621 | .get = get_cur_freq, | 621 | .get = get_cur_freq, |
622 | .attr = centrino_attr, | 622 | .attr = centrino_attr, |
623 | .owner = THIS_MODULE, | 623 | .owner = THIS_MODULE, |
624 | }; | 624 | }; |
625 | 625 | ||
626 | 626 | ||
627 | /** | 627 | /** |
628 | * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver | 628 | * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver |
629 | * | 629 | * |
630 | * Initializes the Enhanced SpeedStep support. Returns -ENODEV on | 630 | * Initializes the Enhanced SpeedStep support. Returns -ENODEV on |
631 | * unsupported devices, -ENOENT if there's no voltage table for this | 631 | * unsupported devices, -ENOENT if there's no voltage table for this |
632 | * particular CPU model, -EINVAL on problems during initiatization, | 632 | * particular CPU model, -EINVAL on problems during initiatization, |
633 | * and zero on success. | 633 | * and zero on success. |
634 | * | 634 | * |
635 | * This is quite picky. Not only does the CPU have to advertise the | 635 | * This is quite picky. Not only does the CPU have to advertise the |
636 | * "est" flag in the cpuid capability flags, we look for a specific | 636 | * "est" flag in the cpuid capability flags, we look for a specific |
637 | * CPU model and stepping, and we need to have the exact model name in | 637 | * CPU model and stepping, and we need to have the exact model name in |
638 | * our voltage tables. That is, be paranoid about not releasing | 638 | * our voltage tables. That is, be paranoid about not releasing |
639 | * someone's valuable magic smoke. | 639 | * someone's valuable magic smoke. |
640 | */ | 640 | */ |
641 | static int __init centrino_init(void) | 641 | static int __init centrino_init(void) |
642 | { | 642 | { |
643 | struct cpuinfo_x86 *cpu = &cpu_data(0); | 643 | struct cpuinfo_x86 *cpu = &cpu_data(0); |
644 | 644 | ||
645 | if (!cpu_has(cpu, X86_FEATURE_EST)) | 645 | if (!cpu_has(cpu, X86_FEATURE_EST)) |
646 | return -ENODEV; | 646 | return -ENODEV; |
647 | 647 | ||
648 | return cpufreq_register_driver(¢rino_driver); | 648 | return cpufreq_register_driver(¢rino_driver); |
649 | } | 649 | } |
650 | 650 | ||
651 | static void __exit centrino_exit(void) | 651 | static void __exit centrino_exit(void) |
652 | { | 652 | { |
653 | cpufreq_unregister_driver(¢rino_driver); | 653 | cpufreq_unregister_driver(¢rino_driver); |
654 | } | 654 | } |
655 | 655 | ||
656 | MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); | 656 | MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); |
657 | MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); | 657 | MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); |
658 | MODULE_LICENSE ("GPL"); | 658 | MODULE_LICENSE ("GPL"); |
659 | 659 | ||
660 | late_initcall(centrino_init); | 660 | late_initcall(centrino_init); |
661 | module_exit(centrino_exit); | 661 | module_exit(centrino_exit); |
662 | 662 |
arch/x86/kernel/cpu/mcheck/mce_64.c
1 | /* | 1 | /* |
2 | * Machine check handler. | 2 | * Machine check handler. |
3 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. | 3 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. |
4 | * Rest from unknown author(s). | 4 | * Rest from unknown author(s). |
5 | * 2004 Andi Kleen. Rewrote most of it. | 5 | * 2004 Andi Kleen. Rewrote most of it. |
6 | * Copyright 2008 Intel Corporation | 6 | * Copyright 2008 Intel Corporation |
7 | * Author: Andi Kleen | 7 | * Author: Andi Kleen |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/smp_lock.h> | 14 | #include <linux/smp_lock.h> |
15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
16 | #include <linux/rcupdate.h> | 16 | #include <linux/rcupdate.h> |
17 | #include <linux/kallsyms.h> | 17 | #include <linux/kallsyms.h> |
18 | #include <linux/sysdev.h> | 18 | #include <linux/sysdev.h> |
19 | #include <linux/miscdevice.h> | 19 | #include <linux/miscdevice.h> |
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/capability.h> | 21 | #include <linux/capability.h> |
22 | #include <linux/cpu.h> | 22 | #include <linux/cpu.h> |
23 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
24 | #include <linux/poll.h> | 24 | #include <linux/poll.h> |
25 | #include <linux/thread_info.h> | 25 | #include <linux/thread_info.h> |
26 | #include <linux/ctype.h> | 26 | #include <linux/ctype.h> |
27 | #include <linux/kmod.h> | 27 | #include <linux/kmod.h> |
28 | #include <linux/kdebug.h> | 28 | #include <linux/kdebug.h> |
29 | #include <linux/kobject.h> | 29 | #include <linux/kobject.h> |
30 | #include <linux/sysfs.h> | 30 | #include <linux/sysfs.h> |
31 | #include <linux/ratelimit.h> | 31 | #include <linux/ratelimit.h> |
32 | #include <asm/processor.h> | 32 | #include <asm/processor.h> |
33 | #include <asm/msr.h> | 33 | #include <asm/msr.h> |
34 | #include <asm/mce.h> | 34 | #include <asm/mce.h> |
35 | #include <asm/uaccess.h> | 35 | #include <asm/uaccess.h> |
36 | #include <asm/smp.h> | 36 | #include <asm/smp.h> |
37 | #include <asm/idle.h> | 37 | #include <asm/idle.h> |
38 | 38 | ||
39 | #define MISC_MCELOG_MINOR 227 | 39 | #define MISC_MCELOG_MINOR 227 |
40 | 40 | ||
41 | atomic_t mce_entry; | 41 | atomic_t mce_entry; |
42 | 42 | ||
43 | static int mce_dont_init; | 43 | static int mce_dont_init; |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Tolerant levels: | 46 | * Tolerant levels: |
47 | * 0: always panic on uncorrected errors, log corrected errors | 47 | * 0: always panic on uncorrected errors, log corrected errors |
48 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors | 48 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors |
49 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | 49 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors |
50 | * 3: never panic or SIGBUS, log all errors (for testing only) | 50 | * 3: never panic or SIGBUS, log all errors (for testing only) |
51 | */ | 51 | */ |
52 | static int tolerant = 1; | 52 | static int tolerant = 1; |
53 | static int banks; | 53 | static int banks; |
54 | static u64 *bank; | 54 | static u64 *bank; |
55 | static unsigned long notify_user; | 55 | static unsigned long notify_user; |
56 | static int rip_msr; | 56 | static int rip_msr; |
57 | static int mce_bootlog = -1; | 57 | static int mce_bootlog = -1; |
58 | static atomic_t mce_events; | 58 | static atomic_t mce_events; |
59 | 59 | ||
60 | static char trigger[128]; | 60 | static char trigger[128]; |
61 | static char *trigger_argv[2] = { trigger, NULL }; | 61 | static char *trigger_argv[2] = { trigger, NULL }; |
62 | 62 | ||
63 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | 63 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); |
64 | 64 | ||
65 | /* MCA banks polled by the period polling timer for corrected events */ | 65 | /* MCA banks polled by the period polling timer for corrected events */ |
66 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 66 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
67 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 67 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
68 | }; | 68 | }; |
69 | 69 | ||
70 | /* Do initial initialization of a struct mce */ | 70 | /* Do initial initialization of a struct mce */ |
71 | void mce_setup(struct mce *m) | 71 | void mce_setup(struct mce *m) |
72 | { | 72 | { |
73 | memset(m, 0, sizeof(struct mce)); | 73 | memset(m, 0, sizeof(struct mce)); |
74 | m->cpu = smp_processor_id(); | 74 | m->cpu = smp_processor_id(); |
75 | rdtscll(m->tsc); | 75 | rdtscll(m->tsc); |
76 | } | 76 | } |
77 | 77 | ||
78 | /* | 78 | /* |
79 | * Lockless MCE logging infrastructure. | 79 | * Lockless MCE logging infrastructure. |
80 | * This avoids deadlocks on printk locks without having to break locks. Also | 80 | * This avoids deadlocks on printk locks without having to break locks. Also |
81 | * separate MCEs from kernel messages to avoid bogus bug reports. | 81 | * separate MCEs from kernel messages to avoid bogus bug reports. |
82 | */ | 82 | */ |
83 | 83 | ||
84 | static struct mce_log mcelog = { | 84 | static struct mce_log mcelog = { |
85 | MCE_LOG_SIGNATURE, | 85 | MCE_LOG_SIGNATURE, |
86 | MCE_LOG_LEN, | 86 | MCE_LOG_LEN, |
87 | }; | 87 | }; |
88 | 88 | ||
89 | void mce_log(struct mce *mce) | 89 | void mce_log(struct mce *mce) |
90 | { | 90 | { |
91 | unsigned next, entry; | 91 | unsigned next, entry; |
92 | atomic_inc(&mce_events); | 92 | atomic_inc(&mce_events); |
93 | mce->finished = 0; | 93 | mce->finished = 0; |
94 | wmb(); | 94 | wmb(); |
95 | for (;;) { | 95 | for (;;) { |
96 | entry = rcu_dereference(mcelog.next); | 96 | entry = rcu_dereference(mcelog.next); |
97 | for (;;) { | 97 | for (;;) { |
98 | /* When the buffer fills up discard new entries. Assume | 98 | /* When the buffer fills up discard new entries. Assume |
99 | that the earlier errors are the more interesting. */ | 99 | that the earlier errors are the more interesting. */ |
100 | if (entry >= MCE_LOG_LEN) { | 100 | if (entry >= MCE_LOG_LEN) { |
101 | set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); | 101 | set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); |
102 | return; | 102 | return; |
103 | } | 103 | } |
104 | /* Old left over entry. Skip. */ | 104 | /* Old left over entry. Skip. */ |
105 | if (mcelog.entry[entry].finished) { | 105 | if (mcelog.entry[entry].finished) { |
106 | entry++; | 106 | entry++; |
107 | continue; | 107 | continue; |
108 | } | 108 | } |
109 | break; | 109 | break; |
110 | } | 110 | } |
111 | smp_rmb(); | 111 | smp_rmb(); |
112 | next = entry + 1; | 112 | next = entry + 1; |
113 | if (cmpxchg(&mcelog.next, entry, next) == entry) | 113 | if (cmpxchg(&mcelog.next, entry, next) == entry) |
114 | break; | 114 | break; |
115 | } | 115 | } |
116 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); | 116 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); |
117 | wmb(); | 117 | wmb(); |
118 | mcelog.entry[entry].finished = 1; | 118 | mcelog.entry[entry].finished = 1; |
119 | wmb(); | 119 | wmb(); |
120 | 120 | ||
121 | set_bit(0, ¬ify_user); | 121 | set_bit(0, ¬ify_user); |
122 | } | 122 | } |
123 | 123 | ||
124 | static void print_mce(struct mce *m) | 124 | static void print_mce(struct mce *m) |
125 | { | 125 | { |
126 | printk(KERN_EMERG "\n" | 126 | printk(KERN_EMERG "\n" |
127 | KERN_EMERG "HARDWARE ERROR\n" | 127 | KERN_EMERG "HARDWARE ERROR\n" |
128 | KERN_EMERG | 128 | KERN_EMERG |
129 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | 129 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", |
130 | m->cpu, m->mcgstatus, m->bank, m->status); | 130 | m->cpu, m->mcgstatus, m->bank, m->status); |
131 | if (m->ip) { | 131 | if (m->ip) { |
132 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | 132 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", |
133 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 133 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
134 | m->cs, m->ip); | 134 | m->cs, m->ip); |
135 | if (m->cs == __KERNEL_CS) | 135 | if (m->cs == __KERNEL_CS) |
136 | print_symbol("{%s}", m->ip); | 136 | print_symbol("{%s}", m->ip); |
137 | printk("\n"); | 137 | printk("\n"); |
138 | } | 138 | } |
139 | printk(KERN_EMERG "TSC %llx ", m->tsc); | 139 | printk(KERN_EMERG "TSC %llx ", m->tsc); |
140 | if (m->addr) | 140 | if (m->addr) |
141 | printk("ADDR %llx ", m->addr); | 141 | printk("ADDR %llx ", m->addr); |
142 | if (m->misc) | 142 | if (m->misc) |
143 | printk("MISC %llx ", m->misc); | 143 | printk("MISC %llx ", m->misc); |
144 | printk("\n"); | 144 | printk("\n"); |
145 | printk(KERN_EMERG "This is not a software problem!\n"); | 145 | printk(KERN_EMERG "This is not a software problem!\n"); |
146 | printk(KERN_EMERG "Run through mcelog --ascii to decode " | 146 | printk(KERN_EMERG "Run through mcelog --ascii to decode " |
147 | "and contact your hardware vendor\n"); | 147 | "and contact your hardware vendor\n"); |
148 | } | 148 | } |
149 | 149 | ||
150 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) | 150 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) |
151 | { | 151 | { |
152 | int i; | 152 | int i; |
153 | 153 | ||
154 | oops_begin(); | 154 | oops_begin(); |
155 | for (i = 0; i < MCE_LOG_LEN; i++) { | 155 | for (i = 0; i < MCE_LOG_LEN; i++) { |
156 | unsigned long tsc = mcelog.entry[i].tsc; | 156 | unsigned long tsc = mcelog.entry[i].tsc; |
157 | 157 | ||
158 | if (time_before(tsc, start)) | 158 | if (time_before(tsc, start)) |
159 | continue; | 159 | continue; |
160 | print_mce(&mcelog.entry[i]); | 160 | print_mce(&mcelog.entry[i]); |
161 | if (backup && mcelog.entry[i].tsc == backup->tsc) | 161 | if (backup && mcelog.entry[i].tsc == backup->tsc) |
162 | backup = NULL; | 162 | backup = NULL; |
163 | } | 163 | } |
164 | if (backup) | 164 | if (backup) |
165 | print_mce(backup); | 165 | print_mce(backup); |
166 | panic(msg); | 166 | panic(msg); |
167 | } | 167 | } |
168 | 168 | ||
169 | int mce_available(struct cpuinfo_x86 *c) | 169 | int mce_available(struct cpuinfo_x86 *c) |
170 | { | 170 | { |
171 | if (mce_dont_init) | 171 | if (mce_dont_init) |
172 | return 0; | 172 | return 0; |
173 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | 173 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); |
174 | } | 174 | } |
175 | 175 | ||
176 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | 176 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) |
177 | { | 177 | { |
178 | if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { | 178 | if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { |
179 | m->ip = regs->ip; | 179 | m->ip = regs->ip; |
180 | m->cs = regs->cs; | 180 | m->cs = regs->cs; |
181 | } else { | 181 | } else { |
182 | m->ip = 0; | 182 | m->ip = 0; |
183 | m->cs = 0; | 183 | m->cs = 0; |
184 | } | 184 | } |
185 | if (rip_msr) { | 185 | if (rip_msr) { |
186 | /* Assume the RIP in the MSR is exact. Is this true? */ | 186 | /* Assume the RIP in the MSR is exact. Is this true? */ |
187 | m->mcgstatus |= MCG_STATUS_EIPV; | 187 | m->mcgstatus |= MCG_STATUS_EIPV; |
188 | rdmsrl(rip_msr, m->ip); | 188 | rdmsrl(rip_msr, m->ip); |
189 | m->cs = 0; | 189 | m->cs = 0; |
190 | } | 190 | } |
191 | } | 191 | } |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * Poll for corrected events or events that happened before reset. | 194 | * Poll for corrected events or events that happened before reset. |
195 | * Those are just logged through /dev/mcelog. | 195 | * Those are just logged through /dev/mcelog. |
196 | * | 196 | * |
197 | * This is executed in standard interrupt context. | 197 | * This is executed in standard interrupt context. |
198 | */ | 198 | */ |
199 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | 199 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) |
200 | { | 200 | { |
201 | struct mce m; | 201 | struct mce m; |
202 | int i; | 202 | int i; |
203 | 203 | ||
204 | mce_setup(&m); | 204 | mce_setup(&m); |
205 | 205 | ||
206 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | 206 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); |
207 | for (i = 0; i < banks; i++) { | 207 | for (i = 0; i < banks; i++) { |
208 | if (!bank[i] || !test_bit(i, *b)) | 208 | if (!bank[i] || !test_bit(i, *b)) |
209 | continue; | 209 | continue; |
210 | 210 | ||
211 | m.misc = 0; | 211 | m.misc = 0; |
212 | m.addr = 0; | 212 | m.addr = 0; |
213 | m.bank = i; | 213 | m.bank = i; |
214 | m.tsc = 0; | 214 | m.tsc = 0; |
215 | 215 | ||
216 | barrier(); | 216 | barrier(); |
217 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | 217 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); |
218 | if (!(m.status & MCI_STATUS_VAL)) | 218 | if (!(m.status & MCI_STATUS_VAL)) |
219 | continue; | 219 | continue; |
220 | 220 | ||
221 | /* | 221 | /* |
222 | * Uncorrected events are handled by the exception handler | 222 | * Uncorrected events are handled by the exception handler |
223 | * when it is enabled. But when the exception is disabled log | 223 | * when it is enabled. But when the exception is disabled log |
224 | * everything. | 224 | * everything. |
225 | * | 225 | * |
226 | * TBD do the same check for MCI_STATUS_EN here? | 226 | * TBD do the same check for MCI_STATUS_EN here? |
227 | */ | 227 | */ |
228 | if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) | 228 | if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) |
229 | continue; | 229 | continue; |
230 | 230 | ||
231 | if (m.status & MCI_STATUS_MISCV) | 231 | if (m.status & MCI_STATUS_MISCV) |
232 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); | 232 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); |
233 | if (m.status & MCI_STATUS_ADDRV) | 233 | if (m.status & MCI_STATUS_ADDRV) |
234 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | 234 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); |
235 | 235 | ||
236 | if (!(flags & MCP_TIMESTAMP)) | 236 | if (!(flags & MCP_TIMESTAMP)) |
237 | m.tsc = 0; | 237 | m.tsc = 0; |
238 | /* | 238 | /* |
239 | * Don't get the IP here because it's unlikely to | 239 | * Don't get the IP here because it's unlikely to |
240 | * have anything to do with the actual error location. | 240 | * have anything to do with the actual error location. |
241 | */ | 241 | */ |
242 | if (!(flags & MCP_DONTLOG)) { | 242 | if (!(flags & MCP_DONTLOG)) { |
243 | mce_log(&m); | 243 | mce_log(&m); |
244 | add_taint(TAINT_MACHINE_CHECK); | 244 | add_taint(TAINT_MACHINE_CHECK); |
245 | } | 245 | } |
246 | 246 | ||
247 | /* | 247 | /* |
248 | * Clear state for this bank. | 248 | * Clear state for this bank. |
249 | */ | 249 | */ |
250 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 250 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); |
251 | } | 251 | } |
252 | 252 | ||
253 | /* | 253 | /* |
254 | * Don't clear MCG_STATUS here because it's only defined for | 254 | * Don't clear MCG_STATUS here because it's only defined for |
255 | * exceptions. | 255 | * exceptions. |
256 | */ | 256 | */ |
257 | } | 257 | } |
258 | 258 | ||
259 | /* | 259 | /* |
260 | * The actual machine check handler. This only handles real | 260 | * The actual machine check handler. This only handles real |
261 | * exceptions when something got corrupted coming in through int 18. | 261 | * exceptions when something got corrupted coming in through int 18. |
262 | * | 262 | * |
263 | * This is executed in NMI context not subject to normal locking rules. This | 263 | * This is executed in NMI context not subject to normal locking rules. This |
264 | * implies that most kernel services cannot be safely used. Don't even | 264 | * implies that most kernel services cannot be safely used. Don't even |
265 | * think about putting a printk in there! | 265 | * think about putting a printk in there! |
266 | */ | 266 | */ |
267 | void do_machine_check(struct pt_regs * regs, long error_code) | 267 | void do_machine_check(struct pt_regs * regs, long error_code) |
268 | { | 268 | { |
269 | struct mce m, panicm; | 269 | struct mce m, panicm; |
270 | u64 mcestart = 0; | 270 | u64 mcestart = 0; |
271 | int i; | 271 | int i; |
272 | int panicm_found = 0; | 272 | int panicm_found = 0; |
273 | /* | 273 | /* |
274 | * If no_way_out gets set, there is no safe way to recover from this | 274 | * If no_way_out gets set, there is no safe way to recover from this |
275 | * MCE. If tolerant is cranked up, we'll try anyway. | 275 | * MCE. If tolerant is cranked up, we'll try anyway. |
276 | */ | 276 | */ |
277 | int no_way_out = 0; | 277 | int no_way_out = 0; |
278 | /* | 278 | /* |
279 | * If kill_it gets set, there might be a way to recover from this | 279 | * If kill_it gets set, there might be a way to recover from this |
280 | * error. | 280 | * error. |
281 | */ | 281 | */ |
282 | int kill_it = 0; | 282 | int kill_it = 0; |
283 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | 283 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); |
284 | 284 | ||
285 | atomic_inc(&mce_entry); | 285 | atomic_inc(&mce_entry); |
286 | 286 | ||
287 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | 287 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
288 | 18, SIGKILL) == NOTIFY_STOP) | 288 | 18, SIGKILL) == NOTIFY_STOP) |
289 | goto out2; | 289 | goto out2; |
290 | if (!banks) | 290 | if (!banks) |
291 | goto out2; | 291 | goto out2; |
292 | 292 | ||
293 | mce_setup(&m); | 293 | mce_setup(&m); |
294 | 294 | ||
295 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | 295 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); |
296 | /* if the restart IP is not valid, we're done for */ | 296 | /* if the restart IP is not valid, we're done for */ |
297 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | 297 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
298 | no_way_out = 1; | 298 | no_way_out = 1; |
299 | 299 | ||
300 | rdtscll(mcestart); | 300 | rdtscll(mcestart); |
301 | barrier(); | 301 | barrier(); |
302 | 302 | ||
303 | for (i = 0; i < banks; i++) { | 303 | for (i = 0; i < banks; i++) { |
304 | __clear_bit(i, toclear); | 304 | __clear_bit(i, toclear); |
305 | if (!bank[i]) | 305 | if (!bank[i]) |
306 | continue; | 306 | continue; |
307 | 307 | ||
308 | m.misc = 0; | 308 | m.misc = 0; |
309 | m.addr = 0; | 309 | m.addr = 0; |
310 | m.bank = i; | 310 | m.bank = i; |
311 | 311 | ||
312 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | 312 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); |
313 | if ((m.status & MCI_STATUS_VAL) == 0) | 313 | if ((m.status & MCI_STATUS_VAL) == 0) |
314 | continue; | 314 | continue; |
315 | 315 | ||
316 | /* | 316 | /* |
317 | * Non uncorrected errors are handled by machine_check_poll | 317 | * Non uncorrected errors are handled by machine_check_poll |
318 | * Leave them alone. | 318 | * Leave them alone. |
319 | */ | 319 | */ |
320 | if ((m.status & MCI_STATUS_UC) == 0) | 320 | if ((m.status & MCI_STATUS_UC) == 0) |
321 | continue; | 321 | continue; |
322 | 322 | ||
323 | /* | 323 | /* |
324 | * Set taint even when machine check was not enabled. | 324 | * Set taint even when machine check was not enabled. |
325 | */ | 325 | */ |
326 | add_taint(TAINT_MACHINE_CHECK); | 326 | add_taint(TAINT_MACHINE_CHECK); |
327 | 327 | ||
328 | __set_bit(i, toclear); | 328 | __set_bit(i, toclear); |
329 | 329 | ||
330 | if (m.status & MCI_STATUS_EN) { | 330 | if (m.status & MCI_STATUS_EN) { |
331 | /* if PCC was set, there's no way out */ | 331 | /* if PCC was set, there's no way out */ |
332 | no_way_out |= !!(m.status & MCI_STATUS_PCC); | 332 | no_way_out |= !!(m.status & MCI_STATUS_PCC); |
333 | /* | 333 | /* |
334 | * If this error was uncorrectable and there was | 334 | * If this error was uncorrectable and there was |
335 | * an overflow, we're in trouble. If no overflow, | 335 | * an overflow, we're in trouble. If no overflow, |
336 | * we might get away with just killing a task. | 336 | * we might get away with just killing a task. |
337 | */ | 337 | */ |
338 | if (m.status & MCI_STATUS_UC) { | 338 | if (m.status & MCI_STATUS_UC) { |
339 | if (tolerant < 1 || m.status & MCI_STATUS_OVER) | 339 | if (tolerant < 1 || m.status & MCI_STATUS_OVER) |
340 | no_way_out = 1; | 340 | no_way_out = 1; |
341 | kill_it = 1; | 341 | kill_it = 1; |
342 | } | 342 | } |
343 | } else { | 343 | } else { |
344 | /* | 344 | /* |
345 | * Machine check event was not enabled. Clear, but | 345 | * Machine check event was not enabled. Clear, but |
346 | * ignore. | 346 | * ignore. |
347 | */ | 347 | */ |
348 | continue; | 348 | continue; |
349 | } | 349 | } |
350 | 350 | ||
351 | if (m.status & MCI_STATUS_MISCV) | 351 | if (m.status & MCI_STATUS_MISCV) |
352 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); | 352 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); |
353 | if (m.status & MCI_STATUS_ADDRV) | 353 | if (m.status & MCI_STATUS_ADDRV) |
354 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | 354 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); |
355 | 355 | ||
356 | mce_get_rip(&m, regs); | 356 | mce_get_rip(&m, regs); |
357 | mce_log(&m); | 357 | mce_log(&m); |
358 | 358 | ||
359 | /* Did this bank cause the exception? */ | 359 | /* Did this bank cause the exception? */ |
360 | /* Assume that the bank with uncorrectable errors did it, | 360 | /* Assume that the bank with uncorrectable errors did it, |
361 | and that there is only a single one. */ | 361 | and that there is only a single one. */ |
362 | if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { | 362 | if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { |
363 | panicm = m; | 363 | panicm = m; |
364 | panicm_found = 1; | 364 | panicm_found = 1; |
365 | } | 365 | } |
366 | } | 366 | } |
367 | 367 | ||
368 | /* If we didn't find an uncorrectable error, pick | 368 | /* If we didn't find an uncorrectable error, pick |
369 | the last one (shouldn't happen, just being safe). */ | 369 | the last one (shouldn't happen, just being safe). */ |
370 | if (!panicm_found) | 370 | if (!panicm_found) |
371 | panicm = m; | 371 | panicm = m; |
372 | 372 | ||
373 | /* | 373 | /* |
374 | * If we have decided that we just CAN'T continue, and the user | 374 | * If we have decided that we just CAN'T continue, and the user |
375 | * has not set tolerant to an insane level, give up and die. | 375 | * has not set tolerant to an insane level, give up and die. |
376 | */ | 376 | */ |
377 | if (no_way_out && tolerant < 3) | 377 | if (no_way_out && tolerant < 3) |
378 | mce_panic("Machine check", &panicm, mcestart); | 378 | mce_panic("Machine check", &panicm, mcestart); |
379 | 379 | ||
380 | /* | 380 | /* |
381 | * If the error seems to be unrecoverable, something should be | 381 | * If the error seems to be unrecoverable, something should be |
382 | * done. Try to kill as little as possible. If we can kill just | 382 | * done. Try to kill as little as possible. If we can kill just |
383 | * one task, do that. If the user has set the tolerance very | 383 | * one task, do that. If the user has set the tolerance very |
384 | * high, don't try to do anything at all. | 384 | * high, don't try to do anything at all. |
385 | */ | 385 | */ |
386 | if (kill_it && tolerant < 3) { | 386 | if (kill_it && tolerant < 3) { |
387 | int user_space = 0; | 387 | int user_space = 0; |
388 | 388 | ||
389 | /* | 389 | /* |
390 | * If the EIPV bit is set, it means the saved IP is the | 390 | * If the EIPV bit is set, it means the saved IP is the |
391 | * instruction which caused the MCE. | 391 | * instruction which caused the MCE. |
392 | */ | 392 | */ |
393 | if (m.mcgstatus & MCG_STATUS_EIPV) | 393 | if (m.mcgstatus & MCG_STATUS_EIPV) |
394 | user_space = panicm.ip && (panicm.cs & 3); | 394 | user_space = panicm.ip && (panicm.cs & 3); |
395 | 395 | ||
396 | /* | 396 | /* |
397 | * If we know that the error was in user space, send a | 397 | * If we know that the error was in user space, send a |
398 | * SIGBUS. Otherwise, panic if tolerance is low. | 398 | * SIGBUS. Otherwise, panic if tolerance is low. |
399 | * | 399 | * |
400 | * force_sig() takes an awful lot of locks and has a slight | 400 | * force_sig() takes an awful lot of locks and has a slight |
401 | * risk of deadlocking. | 401 | * risk of deadlocking. |
402 | */ | 402 | */ |
403 | if (user_space) { | 403 | if (user_space) { |
404 | force_sig(SIGBUS, current); | 404 | force_sig(SIGBUS, current); |
405 | } else if (panic_on_oops || tolerant < 2) { | 405 | } else if (panic_on_oops || tolerant < 2) { |
406 | mce_panic("Uncorrected machine check", | 406 | mce_panic("Uncorrected machine check", |
407 | &panicm, mcestart); | 407 | &panicm, mcestart); |
408 | } | 408 | } |
409 | } | 409 | } |
410 | 410 | ||
411 | /* notify userspace ASAP */ | 411 | /* notify userspace ASAP */ |
412 | set_thread_flag(TIF_MCE_NOTIFY); | 412 | set_thread_flag(TIF_MCE_NOTIFY); |
413 | 413 | ||
414 | /* the last thing we do is clear state */ | 414 | /* the last thing we do is clear state */ |
415 | for (i = 0; i < banks; i++) { | 415 | for (i = 0; i < banks; i++) { |
416 | if (test_bit(i, toclear)) | 416 | if (test_bit(i, toclear)) |
417 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 417 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); |
418 | } | 418 | } |
419 | wrmsrl(MSR_IA32_MCG_STATUS, 0); | 419 | wrmsrl(MSR_IA32_MCG_STATUS, 0); |
420 | out2: | 420 | out2: |
421 | atomic_dec(&mce_entry); | 421 | atomic_dec(&mce_entry); |
422 | } | 422 | } |
423 | 423 | ||
424 | #ifdef CONFIG_X86_MCE_INTEL | 424 | #ifdef CONFIG_X86_MCE_INTEL |
425 | /*** | 425 | /*** |
426 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog | 426 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog |
427 | * @cpu: The CPU on which the event occurred. | 427 | * @cpu: The CPU on which the event occurred. |
428 | * @status: Event status information | 428 | * @status: Event status information |
429 | * | 429 | * |
430 | * This function should be called by the thermal interrupt after the | 430 | * This function should be called by the thermal interrupt after the |
431 | * event has been processed and the decision was made to log the event | 431 | * event has been processed and the decision was made to log the event |
432 | * further. | 432 | * further. |
433 | * | 433 | * |
434 | * The status parameter will be saved to the 'status' field of 'struct mce' | 434 | * The status parameter will be saved to the 'status' field of 'struct mce' |
435 | * and historically has been the register value of the | 435 | * and historically has been the register value of the |
436 | * MSR_IA32_THERMAL_STATUS (Intel) msr. | 436 | * MSR_IA32_THERMAL_STATUS (Intel) msr. |
437 | */ | 437 | */ |
438 | void mce_log_therm_throt_event(__u64 status) | 438 | void mce_log_therm_throt_event(__u64 status) |
439 | { | 439 | { |
440 | struct mce m; | 440 | struct mce m; |
441 | 441 | ||
442 | mce_setup(&m); | 442 | mce_setup(&m); |
443 | m.bank = MCE_THERMAL_BANK; | 443 | m.bank = MCE_THERMAL_BANK; |
444 | m.status = status; | 444 | m.status = status; |
445 | mce_log(&m); | 445 | mce_log(&m); |
446 | } | 446 | } |
447 | #endif /* CONFIG_X86_MCE_INTEL */ | 447 | #endif /* CONFIG_X86_MCE_INTEL */ |
448 | 448 | ||
449 | /* | 449 | /* |
450 | * Periodic polling timer for "silent" machine check errors. If the | 450 | * Periodic polling timer for "silent" machine check errors. If the |
451 | * poller finds an MCE, poll 2x faster. When the poller finds no more | 451 | * poller finds an MCE, poll 2x faster. When the poller finds no more |
452 | * errors, poll 2x slower (up to check_interval seconds). | 452 | * errors, poll 2x slower (up to check_interval seconds). |
453 | */ | 453 | */ |
454 | 454 | ||
455 | static int check_interval = 5 * 60; /* 5 minutes */ | 455 | static int check_interval = 5 * 60; /* 5 minutes */ |
456 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | 456 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ |
457 | static void mcheck_timer(unsigned long); | 457 | static void mcheck_timer(unsigned long); |
458 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 458 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
459 | 459 | ||
460 | static void mcheck_timer(unsigned long data) | 460 | static void mcheck_timer(unsigned long data) |
461 | { | 461 | { |
462 | struct timer_list *t = &per_cpu(mce_timer, data); | 462 | struct timer_list *t = &per_cpu(mce_timer, data); |
463 | int *n; | 463 | int *n; |
464 | 464 | ||
465 | WARN_ON(smp_processor_id() != data); | 465 | WARN_ON(smp_processor_id() != data); |
466 | 466 | ||
467 | if (mce_available(¤t_cpu_data)) | 467 | if (mce_available(¤t_cpu_data)) |
468 | machine_check_poll(MCP_TIMESTAMP, | 468 | machine_check_poll(MCP_TIMESTAMP, |
469 | &__get_cpu_var(mce_poll_banks)); | 469 | &__get_cpu_var(mce_poll_banks)); |
470 | 470 | ||
471 | /* | 471 | /* |
472 | * Alert userspace if needed. If we logged an MCE, reduce the | 472 | * Alert userspace if needed. If we logged an MCE, reduce the |
473 | * polling interval, otherwise increase the polling interval. | 473 | * polling interval, otherwise increase the polling interval. |
474 | */ | 474 | */ |
475 | n = &__get_cpu_var(next_interval); | 475 | n = &__get_cpu_var(next_interval); |
476 | if (mce_notify_user()) { | 476 | if (mce_notify_user()) { |
477 | *n = max(*n/2, HZ/100); | 477 | *n = max(*n/2, HZ/100); |
478 | } else { | 478 | } else { |
479 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | 479 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); |
480 | } | 480 | } |
481 | 481 | ||
482 | t->expires = jiffies + *n; | 482 | t->expires = jiffies + *n; |
483 | add_timer(t); | 483 | add_timer(t); |
484 | } | 484 | } |
485 | 485 | ||
486 | static void mce_do_trigger(struct work_struct *work) | 486 | static void mce_do_trigger(struct work_struct *work) |
487 | { | 487 | { |
488 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | 488 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); |
489 | } | 489 | } |
490 | 490 | ||
491 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | 491 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); |
492 | 492 | ||
493 | /* | 493 | /* |
494 | * Notify the user(s) about new machine check events. | 494 | * Notify the user(s) about new machine check events. |
495 | * Can be called from interrupt context, but not from machine check/NMI | 495 | * Can be called from interrupt context, but not from machine check/NMI |
496 | * context. | 496 | * context. |
497 | */ | 497 | */ |
498 | int mce_notify_user(void) | 498 | int mce_notify_user(void) |
499 | { | 499 | { |
500 | /* Not more than two messages every minute */ | 500 | /* Not more than two messages every minute */ |
501 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | 501 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); |
502 | 502 | ||
503 | clear_thread_flag(TIF_MCE_NOTIFY); | 503 | clear_thread_flag(TIF_MCE_NOTIFY); |
504 | if (test_and_clear_bit(0, ¬ify_user)) { | 504 | if (test_and_clear_bit(0, ¬ify_user)) { |
505 | wake_up_interruptible(&mce_wait); | 505 | wake_up_interruptible(&mce_wait); |
506 | 506 | ||
507 | /* | 507 | /* |
508 | * There is no risk of missing notifications because | 508 | * There is no risk of missing notifications because |
509 | * work_pending is always cleared before the function is | 509 | * work_pending is always cleared before the function is |
510 | * executed. | 510 | * executed. |
511 | */ | 511 | */ |
512 | if (trigger[0] && !work_pending(&mce_trigger_work)) | 512 | if (trigger[0] && !work_pending(&mce_trigger_work)) |
513 | schedule_work(&mce_trigger_work); | 513 | schedule_work(&mce_trigger_work); |
514 | 514 | ||
515 | if (__ratelimit(&ratelimit)) | 515 | if (__ratelimit(&ratelimit)) |
516 | printk(KERN_INFO "Machine check events logged\n"); | 516 | printk(KERN_INFO "Machine check events logged\n"); |
517 | 517 | ||
518 | return 1; | 518 | return 1; |
519 | } | 519 | } |
520 | return 0; | 520 | return 0; |
521 | } | 521 | } |
522 | 522 | ||
523 | /* see if the idle task needs to notify userspace */ | 523 | /* see if the idle task needs to notify userspace */ |
524 | static int | 524 | static int |
525 | mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) | 525 | mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) |
526 | { | 526 | { |
527 | /* IDLE_END should be safe - interrupts are back on */ | 527 | /* IDLE_END should be safe - interrupts are back on */ |
528 | if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) | 528 | if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) |
529 | mce_notify_user(); | 529 | mce_notify_user(); |
530 | 530 | ||
531 | return NOTIFY_OK; | 531 | return NOTIFY_OK; |
532 | } | 532 | } |
533 | 533 | ||
534 | static struct notifier_block mce_idle_notifier = { | 534 | static struct notifier_block mce_idle_notifier = { |
535 | .notifier_call = mce_idle_callback, | 535 | .notifier_call = mce_idle_callback, |
536 | }; | 536 | }; |
537 | 537 | ||
538 | static __init int periodic_mcheck_init(void) | 538 | static __init int periodic_mcheck_init(void) |
539 | { | 539 | { |
540 | idle_notifier_register(&mce_idle_notifier); | 540 | idle_notifier_register(&mce_idle_notifier); |
541 | return 0; | 541 | return 0; |
542 | } | 542 | } |
543 | __initcall(periodic_mcheck_init); | 543 | __initcall(periodic_mcheck_init); |
544 | 544 | ||
545 | /* | 545 | /* |
546 | * Initialize Machine Checks for a CPU. | 546 | * Initialize Machine Checks for a CPU. |
547 | */ | 547 | */ |
548 | static int mce_cap_init(void) | 548 | static int mce_cap_init(void) |
549 | { | 549 | { |
550 | u64 cap; | 550 | u64 cap; |
551 | unsigned b; | 551 | unsigned b; |
552 | 552 | ||
553 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 553 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
554 | b = cap & 0xff; | 554 | b = cap & 0xff; |
555 | if (b > MAX_NR_BANKS) { | 555 | if (b > MAX_NR_BANKS) { |
556 | printk(KERN_WARNING | 556 | printk(KERN_WARNING |
557 | "MCE: Using only %u machine check banks out of %u\n", | 557 | "MCE: Using only %u machine check banks out of %u\n", |
558 | MAX_NR_BANKS, b); | 558 | MAX_NR_BANKS, b); |
559 | b = MAX_NR_BANKS; | 559 | b = MAX_NR_BANKS; |
560 | } | 560 | } |
561 | 561 | ||
562 | /* Don't support asymmetric configurations today */ | 562 | /* Don't support asymmetric configurations today */ |
563 | WARN_ON(banks != 0 && b != banks); | 563 | WARN_ON(banks != 0 && b != banks); |
564 | banks = b; | 564 | banks = b; |
565 | if (!bank) { | 565 | if (!bank) { |
566 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | 566 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); |
567 | if (!bank) | 567 | if (!bank) |
568 | return -ENOMEM; | 568 | return -ENOMEM; |
569 | memset(bank, 0xff, banks * sizeof(u64)); | 569 | memset(bank, 0xff, banks * sizeof(u64)); |
570 | } | 570 | } |
571 | 571 | ||
572 | /* Use accurate RIP reporting if available. */ | 572 | /* Use accurate RIP reporting if available. */ |
573 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) | 573 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) |
574 | rip_msr = MSR_IA32_MCG_EIP; | 574 | rip_msr = MSR_IA32_MCG_EIP; |
575 | 575 | ||
576 | return 0; | 576 | return 0; |
577 | } | 577 | } |
578 | 578 | ||
579 | static void mce_init(void *dummy) | 579 | static void mce_init(void *dummy) |
580 | { | 580 | { |
581 | u64 cap; | 581 | u64 cap; |
582 | int i; | 582 | int i; |
583 | mce_banks_t all_banks; | 583 | mce_banks_t all_banks; |
584 | 584 | ||
585 | /* | 585 | /* |
586 | * Log the machine checks left over from the previous reset. | 586 | * Log the machine checks left over from the previous reset. |
587 | */ | 587 | */ |
588 | bitmap_fill(all_banks, MAX_NR_BANKS); | 588 | bitmap_fill(all_banks, MAX_NR_BANKS); |
589 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); | 589 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); |
590 | 590 | ||
591 | set_in_cr4(X86_CR4_MCE); | 591 | set_in_cr4(X86_CR4_MCE); |
592 | 592 | ||
593 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 593 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
594 | if (cap & MCG_CTL_P) | 594 | if (cap & MCG_CTL_P) |
595 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 595 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
596 | 596 | ||
597 | for (i = 0; i < banks; i++) { | 597 | for (i = 0; i < banks; i++) { |
598 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | 598 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); |
599 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 599 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); |
600 | } | 600 | } |
601 | } | 601 | } |
602 | 602 | ||
603 | /* Add per CPU specific workarounds here */ | 603 | /* Add per CPU specific workarounds here */ |
604 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) | 604 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) |
605 | { | 605 | { |
606 | /* This should be disabled by the BIOS, but isn't always */ | 606 | /* This should be disabled by the BIOS, but isn't always */ |
607 | if (c->x86_vendor == X86_VENDOR_AMD) { | 607 | if (c->x86_vendor == X86_VENDOR_AMD) { |
608 | if (c->x86 == 15 && banks > 4) | 608 | if (c->x86 == 15 && banks > 4) |
609 | /* disable GART TBL walk error reporting, which trips off | 609 | /* disable GART TBL walk error reporting, which trips off |
610 | incorrectly with the IOMMU & 3ware & Cerberus. */ | 610 | incorrectly with the IOMMU & 3ware & Cerberus. */ |
611 | clear_bit(10, (unsigned long *)&bank[4]); | 611 | clear_bit(10, (unsigned long *)&bank[4]); |
612 | if(c->x86 <= 17 && mce_bootlog < 0) | 612 | if(c->x86 <= 17 && mce_bootlog < 0) |
613 | /* Lots of broken BIOS around that don't clear them | 613 | /* Lots of broken BIOS around that don't clear them |
614 | by default and leave crap in there. Don't log. */ | 614 | by default and leave crap in there. Don't log. */ |
615 | mce_bootlog = 0; | 615 | mce_bootlog = 0; |
616 | } | 616 | } |
617 | 617 | ||
618 | } | 618 | } |
619 | 619 | ||
620 | static void mce_cpu_features(struct cpuinfo_x86 *c) | 620 | static void mce_cpu_features(struct cpuinfo_x86 *c) |
621 | { | 621 | { |
622 | switch (c->x86_vendor) { | 622 | switch (c->x86_vendor) { |
623 | case X86_VENDOR_INTEL: | 623 | case X86_VENDOR_INTEL: |
624 | mce_intel_feature_init(c); | 624 | mce_intel_feature_init(c); |
625 | break; | 625 | break; |
626 | case X86_VENDOR_AMD: | 626 | case X86_VENDOR_AMD: |
627 | mce_amd_feature_init(c); | 627 | mce_amd_feature_init(c); |
628 | break; | 628 | break; |
629 | default: | 629 | default: |
630 | break; | 630 | break; |
631 | } | 631 | } |
632 | } | 632 | } |
633 | 633 | ||
634 | static void mce_init_timer(void) | 634 | static void mce_init_timer(void) |
635 | { | 635 | { |
636 | struct timer_list *t = &__get_cpu_var(mce_timer); | 636 | struct timer_list *t = &__get_cpu_var(mce_timer); |
637 | int *n = &__get_cpu_var(next_interval); | 637 | int *n = &__get_cpu_var(next_interval); |
638 | 638 | ||
639 | *n = check_interval * HZ; | 639 | *n = check_interval * HZ; |
640 | if (!*n) | 640 | if (!*n) |
641 | return; | 641 | return; |
642 | setup_timer(t, mcheck_timer, smp_processor_id()); | 642 | setup_timer(t, mcheck_timer, smp_processor_id()); |
643 | t->expires = round_jiffies(jiffies + *n); | 643 | t->expires = round_jiffies(jiffies + *n); |
644 | add_timer(t); | 644 | add_timer(t); |
645 | } | 645 | } |
646 | 646 | ||
647 | /* | 647 | /* |
648 | * Called for each booted CPU to set up machine checks. | 648 | * Called for each booted CPU to set up machine checks. |
649 | * Must be called with preempt off. | 649 | * Must be called with preempt off. |
650 | */ | 650 | */ |
651 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 651 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) |
652 | { | 652 | { |
653 | if (!mce_available(c)) | 653 | if (!mce_available(c)) |
654 | return; | 654 | return; |
655 | 655 | ||
656 | if (mce_cap_init() < 0) { | 656 | if (mce_cap_init() < 0) { |
657 | mce_dont_init = 1; | 657 | mce_dont_init = 1; |
658 | return; | 658 | return; |
659 | } | 659 | } |
660 | mce_cpu_quirks(c); | 660 | mce_cpu_quirks(c); |
661 | 661 | ||
662 | mce_init(NULL); | 662 | mce_init(NULL); |
663 | mce_cpu_features(c); | 663 | mce_cpu_features(c); |
664 | mce_init_timer(); | 664 | mce_init_timer(); |
665 | } | 665 | } |
666 | 666 | ||
667 | /* | 667 | /* |
668 | * Character device to read and clear the MCE log. | 668 | * Character device to read and clear the MCE log. |
669 | */ | 669 | */ |
670 | 670 | ||
671 | static DEFINE_SPINLOCK(mce_state_lock); | 671 | static DEFINE_SPINLOCK(mce_state_lock); |
672 | static int open_count; /* #times opened */ | 672 | static int open_count; /* #times opened */ |
673 | static int open_exclu; /* already open exclusive? */ | 673 | static int open_exclu; /* already open exclusive? */ |
674 | 674 | ||
675 | static int mce_open(struct inode *inode, struct file *file) | 675 | static int mce_open(struct inode *inode, struct file *file) |
676 | { | 676 | { |
677 | lock_kernel(); | 677 | lock_kernel(); |
678 | spin_lock(&mce_state_lock); | 678 | spin_lock(&mce_state_lock); |
679 | 679 | ||
680 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | 680 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { |
681 | spin_unlock(&mce_state_lock); | 681 | spin_unlock(&mce_state_lock); |
682 | unlock_kernel(); | 682 | unlock_kernel(); |
683 | return -EBUSY; | 683 | return -EBUSY; |
684 | } | 684 | } |
685 | 685 | ||
686 | if (file->f_flags & O_EXCL) | 686 | if (file->f_flags & O_EXCL) |
687 | open_exclu = 1; | 687 | open_exclu = 1; |
688 | open_count++; | 688 | open_count++; |
689 | 689 | ||
690 | spin_unlock(&mce_state_lock); | 690 | spin_unlock(&mce_state_lock); |
691 | unlock_kernel(); | 691 | unlock_kernel(); |
692 | 692 | ||
693 | return nonseekable_open(inode, file); | 693 | return nonseekable_open(inode, file); |
694 | } | 694 | } |
695 | 695 | ||
696 | static int mce_release(struct inode *inode, struct file *file) | 696 | static int mce_release(struct inode *inode, struct file *file) |
697 | { | 697 | { |
698 | spin_lock(&mce_state_lock); | 698 | spin_lock(&mce_state_lock); |
699 | 699 | ||
700 | open_count--; | 700 | open_count--; |
701 | open_exclu = 0; | 701 | open_exclu = 0; |
702 | 702 | ||
703 | spin_unlock(&mce_state_lock); | 703 | spin_unlock(&mce_state_lock); |
704 | 704 | ||
705 | return 0; | 705 | return 0; |
706 | } | 706 | } |
707 | 707 | ||
708 | static void collect_tscs(void *data) | 708 | static void collect_tscs(void *data) |
709 | { | 709 | { |
710 | unsigned long *cpu_tsc = (unsigned long *)data; | 710 | unsigned long *cpu_tsc = (unsigned long *)data; |
711 | 711 | ||
712 | rdtscll(cpu_tsc[smp_processor_id()]); | 712 | rdtscll(cpu_tsc[smp_processor_id()]); |
713 | } | 713 | } |
714 | 714 | ||
715 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 715 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
716 | loff_t *off) | 716 | loff_t *off) |
717 | { | 717 | { |
718 | unsigned long *cpu_tsc; | 718 | unsigned long *cpu_tsc; |
719 | static DEFINE_MUTEX(mce_read_mutex); | 719 | static DEFINE_MUTEX(mce_read_mutex); |
720 | unsigned prev, next; | 720 | unsigned prev, next; |
721 | char __user *buf = ubuf; | 721 | char __user *buf = ubuf; |
722 | int i, err; | 722 | int i, err; |
723 | 723 | ||
724 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); | 724 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); |
725 | if (!cpu_tsc) | 725 | if (!cpu_tsc) |
726 | return -ENOMEM; | 726 | return -ENOMEM; |
727 | 727 | ||
728 | mutex_lock(&mce_read_mutex); | 728 | mutex_lock(&mce_read_mutex); |
729 | next = rcu_dereference(mcelog.next); | 729 | next = rcu_dereference(mcelog.next); |
730 | 730 | ||
731 | /* Only supports full reads right now */ | 731 | /* Only supports full reads right now */ |
732 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 732 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { |
733 | mutex_unlock(&mce_read_mutex); | 733 | mutex_unlock(&mce_read_mutex); |
734 | kfree(cpu_tsc); | 734 | kfree(cpu_tsc); |
735 | return -EINVAL; | 735 | return -EINVAL; |
736 | } | 736 | } |
737 | 737 | ||
738 | err = 0; | 738 | err = 0; |
739 | prev = 0; | 739 | prev = 0; |
740 | do { | 740 | do { |
741 | for (i = prev; i < next; i++) { | 741 | for (i = prev; i < next; i++) { |
742 | unsigned long start = jiffies; | 742 | unsigned long start = jiffies; |
743 | 743 | ||
744 | while (!mcelog.entry[i].finished) { | 744 | while (!mcelog.entry[i].finished) { |
745 | if (time_after_eq(jiffies, start + 2)) { | 745 | if (time_after_eq(jiffies, start + 2)) { |
746 | memset(mcelog.entry + i, 0, | 746 | memset(mcelog.entry + i, 0, |
747 | sizeof(struct mce)); | 747 | sizeof(struct mce)); |
748 | goto timeout; | 748 | goto timeout; |
749 | } | 749 | } |
750 | cpu_relax(); | 750 | cpu_relax(); |
751 | } | 751 | } |
752 | smp_rmb(); | 752 | smp_rmb(); |
753 | err |= copy_to_user(buf, mcelog.entry + i, | 753 | err |= copy_to_user(buf, mcelog.entry + i, |
754 | sizeof(struct mce)); | 754 | sizeof(struct mce)); |
755 | buf += sizeof(struct mce); | 755 | buf += sizeof(struct mce); |
756 | timeout: | 756 | timeout: |
757 | ; | 757 | ; |
758 | } | 758 | } |
759 | 759 | ||
760 | memset(mcelog.entry + prev, 0, | 760 | memset(mcelog.entry + prev, 0, |
761 | (next - prev) * sizeof(struct mce)); | 761 | (next - prev) * sizeof(struct mce)); |
762 | prev = next; | 762 | prev = next; |
763 | next = cmpxchg(&mcelog.next, prev, 0); | 763 | next = cmpxchg(&mcelog.next, prev, 0); |
764 | } while (next != prev); | 764 | } while (next != prev); |
765 | 765 | ||
766 | synchronize_sched(); | 766 | synchronize_sched(); |
767 | 767 | ||
768 | /* | 768 | /* |
769 | * Collect entries that were still getting written before the | 769 | * Collect entries that were still getting written before the |
770 | * synchronize. | 770 | * synchronize. |
771 | */ | 771 | */ |
772 | on_each_cpu(collect_tscs, cpu_tsc, 1); | 772 | on_each_cpu(collect_tscs, cpu_tsc, 1); |
773 | for (i = next; i < MCE_LOG_LEN; i++) { | 773 | for (i = next; i < MCE_LOG_LEN; i++) { |
774 | if (mcelog.entry[i].finished && | 774 | if (mcelog.entry[i].finished && |
775 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | 775 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { |
776 | err |= copy_to_user(buf, mcelog.entry+i, | 776 | err |= copy_to_user(buf, mcelog.entry+i, |
777 | sizeof(struct mce)); | 777 | sizeof(struct mce)); |
778 | smp_rmb(); | 778 | smp_rmb(); |
779 | buf += sizeof(struct mce); | 779 | buf += sizeof(struct mce); |
780 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 780 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); |
781 | } | 781 | } |
782 | } | 782 | } |
783 | mutex_unlock(&mce_read_mutex); | 783 | mutex_unlock(&mce_read_mutex); |
784 | kfree(cpu_tsc); | 784 | kfree(cpu_tsc); |
785 | return err ? -EFAULT : buf - ubuf; | 785 | return err ? -EFAULT : buf - ubuf; |
786 | } | 786 | } |
787 | 787 | ||
788 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 788 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
789 | { | 789 | { |
790 | poll_wait(file, &mce_wait, wait); | 790 | poll_wait(file, &mce_wait, wait); |
791 | if (rcu_dereference(mcelog.next)) | 791 | if (rcu_dereference(mcelog.next)) |
792 | return POLLIN | POLLRDNORM; | 792 | return POLLIN | POLLRDNORM; |
793 | return 0; | 793 | return 0; |
794 | } | 794 | } |
795 | 795 | ||
796 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | 796 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) |
797 | { | 797 | { |
798 | int __user *p = (int __user *)arg; | 798 | int __user *p = (int __user *)arg; |
799 | 799 | ||
800 | if (!capable(CAP_SYS_ADMIN)) | 800 | if (!capable(CAP_SYS_ADMIN)) |
801 | return -EPERM; | 801 | return -EPERM; |
802 | switch (cmd) { | 802 | switch (cmd) { |
803 | case MCE_GET_RECORD_LEN: | 803 | case MCE_GET_RECORD_LEN: |
804 | return put_user(sizeof(struct mce), p); | 804 | return put_user(sizeof(struct mce), p); |
805 | case MCE_GET_LOG_LEN: | 805 | case MCE_GET_LOG_LEN: |
806 | return put_user(MCE_LOG_LEN, p); | 806 | return put_user(MCE_LOG_LEN, p); |
807 | case MCE_GETCLEAR_FLAGS: { | 807 | case MCE_GETCLEAR_FLAGS: { |
808 | unsigned flags; | 808 | unsigned flags; |
809 | 809 | ||
810 | do { | 810 | do { |
811 | flags = mcelog.flags; | 811 | flags = mcelog.flags; |
812 | } while (cmpxchg(&mcelog.flags, flags, 0) != flags); | 812 | } while (cmpxchg(&mcelog.flags, flags, 0) != flags); |
813 | return put_user(flags, p); | 813 | return put_user(flags, p); |
814 | } | 814 | } |
815 | default: | 815 | default: |
816 | return -ENOTTY; | 816 | return -ENOTTY; |
817 | } | 817 | } |
818 | } | 818 | } |
819 | 819 | ||
820 | static const struct file_operations mce_chrdev_ops = { | 820 | static const struct file_operations mce_chrdev_ops = { |
821 | .open = mce_open, | 821 | .open = mce_open, |
822 | .release = mce_release, | 822 | .release = mce_release, |
823 | .read = mce_read, | 823 | .read = mce_read, |
824 | .poll = mce_poll, | 824 | .poll = mce_poll, |
825 | .unlocked_ioctl = mce_ioctl, | 825 | .unlocked_ioctl = mce_ioctl, |
826 | }; | 826 | }; |
827 | 827 | ||
828 | static struct miscdevice mce_log_device = { | 828 | static struct miscdevice mce_log_device = { |
829 | MISC_MCELOG_MINOR, | 829 | MISC_MCELOG_MINOR, |
830 | "mcelog", | 830 | "mcelog", |
831 | &mce_chrdev_ops, | 831 | &mce_chrdev_ops, |
832 | }; | 832 | }; |
833 | 833 | ||
834 | /* | 834 | /* |
835 | * Old style boot options parsing. Only for compatibility. | 835 | * Old style boot options parsing. Only for compatibility. |
836 | */ | 836 | */ |
837 | static int __init mcheck_disable(char *str) | 837 | static int __init mcheck_disable(char *str) |
838 | { | 838 | { |
839 | mce_dont_init = 1; | 839 | mce_dont_init = 1; |
840 | return 1; | 840 | return 1; |
841 | } | 841 | } |
842 | 842 | ||
843 | /* mce=off disables machine check. | 843 | /* mce=off disables machine check. |
844 | mce=TOLERANCELEVEL (number, see above) | 844 | mce=TOLERANCELEVEL (number, see above) |
845 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | 845 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
846 | mce=nobootlog Don't log MCEs from before booting. */ | 846 | mce=nobootlog Don't log MCEs from before booting. */ |
847 | static int __init mcheck_enable(char *str) | 847 | static int __init mcheck_enable(char *str) |
848 | { | 848 | { |
849 | if (!strcmp(str, "off")) | 849 | if (!strcmp(str, "off")) |
850 | mce_dont_init = 1; | 850 | mce_dont_init = 1; |
851 | else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) | 851 | else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) |
852 | mce_bootlog = str[0] == 'b'; | 852 | mce_bootlog = str[0] == 'b'; |
853 | else if (isdigit(str[0])) | 853 | else if (isdigit(str[0])) |
854 | get_option(&str, &tolerant); | 854 | get_option(&str, &tolerant); |
855 | else | 855 | else |
856 | printk("mce= argument %s ignored. Please use /sys", str); | 856 | printk("mce= argument %s ignored. Please use /sys", str); |
857 | return 1; | 857 | return 1; |
858 | } | 858 | } |
859 | 859 | ||
860 | __setup("nomce", mcheck_disable); | 860 | __setup("nomce", mcheck_disable); |
861 | __setup("mce=", mcheck_enable); | 861 | __setup("mce=", mcheck_enable); |
862 | 862 | ||
863 | /* | 863 | /* |
864 | * Sysfs support | 864 | * Sysfs support |
865 | */ | 865 | */ |
866 | 866 | ||
867 | /* | 867 | /* |
868 | * Disable machine checks on suspend and shutdown. We can't really handle | 868 | * Disable machine checks on suspend and shutdown. We can't really handle |
869 | * them later. | 869 | * them later. |
870 | */ | 870 | */ |
871 | static int mce_disable(void) | 871 | static int mce_disable(void) |
872 | { | 872 | { |
873 | int i; | 873 | int i; |
874 | 874 | ||
875 | for (i = 0; i < banks; i++) | 875 | for (i = 0; i < banks; i++) |
876 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | 876 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); |
877 | return 0; | 877 | return 0; |
878 | } | 878 | } |
879 | 879 | ||
880 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | 880 | static int mce_suspend(struct sys_device *dev, pm_message_t state) |
881 | { | 881 | { |
882 | return mce_disable(); | 882 | return mce_disable(); |
883 | } | 883 | } |
884 | 884 | ||
885 | static int mce_shutdown(struct sys_device *dev) | 885 | static int mce_shutdown(struct sys_device *dev) |
886 | { | 886 | { |
887 | return mce_disable(); | 887 | return mce_disable(); |
888 | } | 888 | } |
889 | 889 | ||
890 | /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. | 890 | /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. |
891 | Only one CPU is active at this time, the others get readded later using | 891 | Only one CPU is active at this time, the others get readded later using |
892 | CPU hotplug. */ | 892 | CPU hotplug. */ |
893 | static int mce_resume(struct sys_device *dev) | 893 | static int mce_resume(struct sys_device *dev) |
894 | { | 894 | { |
895 | mce_init(NULL); | 895 | mce_init(NULL); |
896 | mce_cpu_features(¤t_cpu_data); | 896 | mce_cpu_features(¤t_cpu_data); |
897 | return 0; | 897 | return 0; |
898 | } | 898 | } |
899 | 899 | ||
900 | static void mce_cpu_restart(void *data) | 900 | static void mce_cpu_restart(void *data) |
901 | { | 901 | { |
902 | del_timer_sync(&__get_cpu_var(mce_timer)); | 902 | del_timer_sync(&__get_cpu_var(mce_timer)); |
903 | if (mce_available(¤t_cpu_data)) | 903 | if (mce_available(¤t_cpu_data)) |
904 | mce_init(NULL); | 904 | mce_init(NULL); |
905 | mce_init_timer(); | 905 | mce_init_timer(); |
906 | } | 906 | } |
907 | 907 | ||
908 | /* Reinit MCEs after user configuration changes */ | 908 | /* Reinit MCEs after user configuration changes */ |
909 | static void mce_restart(void) | 909 | static void mce_restart(void) |
910 | { | 910 | { |
911 | on_each_cpu(mce_cpu_restart, NULL, 1); | 911 | on_each_cpu(mce_cpu_restart, NULL, 1); |
912 | } | 912 | } |
913 | 913 | ||
914 | static struct sysdev_class mce_sysclass = { | 914 | static struct sysdev_class mce_sysclass = { |
915 | .suspend = mce_suspend, | 915 | .suspend = mce_suspend, |
916 | .shutdown = mce_shutdown, | 916 | .shutdown = mce_shutdown, |
917 | .resume = mce_resume, | 917 | .resume = mce_resume, |
918 | .name = "machinecheck", | 918 | .name = "machinecheck", |
919 | }; | 919 | }; |
920 | 920 | ||
921 | DEFINE_PER_CPU(struct sys_device, device_mce); | 921 | DEFINE_PER_CPU(struct sys_device, device_mce); |
922 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; | 922 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; |
923 | 923 | ||
924 | /* Why are there no generic functions for this? */ | 924 | /* Why are there no generic functions for this? */ |
925 | #define ACCESSOR(name, var, start) \ | 925 | #define ACCESSOR(name, var, start) \ |
926 | static ssize_t show_ ## name(struct sys_device *s, \ | 926 | static ssize_t show_ ## name(struct sys_device *s, \ |
927 | struct sysdev_attribute *attr, \ | 927 | struct sysdev_attribute *attr, \ |
928 | char *buf) { \ | 928 | char *buf) { \ |
929 | return sprintf(buf, "%lx\n", (unsigned long)var); \ | 929 | return sprintf(buf, "%lx\n", (unsigned long)var); \ |
930 | } \ | 930 | } \ |
931 | static ssize_t set_ ## name(struct sys_device *s, \ | 931 | static ssize_t set_ ## name(struct sys_device *s, \ |
932 | struct sysdev_attribute *attr, \ | 932 | struct sysdev_attribute *attr, \ |
933 | const char *buf, size_t siz) { \ | 933 | const char *buf, size_t siz) { \ |
934 | char *end; \ | 934 | char *end; \ |
935 | unsigned long new = simple_strtoul(buf, &end, 0); \ | 935 | unsigned long new = simple_strtoul(buf, &end, 0); \ |
936 | if (end == buf) return -EINVAL; \ | 936 | if (end == buf) return -EINVAL; \ |
937 | var = new; \ | 937 | var = new; \ |
938 | start; \ | 938 | start; \ |
939 | return end-buf; \ | 939 | return end-buf; \ |
940 | } \ | 940 | } \ |
941 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); | 941 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); |
942 | 942 | ||
943 | static struct sysdev_attribute *bank_attrs; | 943 | static struct sysdev_attribute *bank_attrs; |
944 | 944 | ||
945 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | 945 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, |
946 | char *buf) | 946 | char *buf) |
947 | { | 947 | { |
948 | u64 b = bank[attr - bank_attrs]; | 948 | u64 b = bank[attr - bank_attrs]; |
949 | return sprintf(buf, "%llx\n", b); | 949 | return sprintf(buf, "%llx\n", b); |
950 | } | 950 | } |
951 | 951 | ||
952 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | 952 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, |
953 | const char *buf, size_t siz) | 953 | const char *buf, size_t siz) |
954 | { | 954 | { |
955 | char *end; | 955 | char *end; |
956 | u64 new = simple_strtoull(buf, &end, 0); | 956 | u64 new = simple_strtoull(buf, &end, 0); |
957 | if (end == buf) | 957 | if (end == buf) |
958 | return -EINVAL; | 958 | return -EINVAL; |
959 | bank[attr - bank_attrs] = new; | 959 | bank[attr - bank_attrs] = new; |
960 | mce_restart(); | 960 | mce_restart(); |
961 | return end-buf; | 961 | return end-buf; |
962 | } | 962 | } |
963 | 963 | ||
964 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 964 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
965 | char *buf) | 965 | char *buf) |
966 | { | 966 | { |
967 | strcpy(buf, trigger); | 967 | strcpy(buf, trigger); |
968 | strcat(buf, "\n"); | 968 | strcat(buf, "\n"); |
969 | return strlen(trigger) + 1; | 969 | return strlen(trigger) + 1; |
970 | } | 970 | } |
971 | 971 | ||
972 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 972 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
973 | const char *buf,size_t siz) | 973 | const char *buf,size_t siz) |
974 | { | 974 | { |
975 | char *p; | 975 | char *p; |
976 | int len; | 976 | int len; |
977 | strncpy(trigger, buf, sizeof(trigger)); | 977 | strncpy(trigger, buf, sizeof(trigger)); |
978 | trigger[sizeof(trigger)-1] = 0; | 978 | trigger[sizeof(trigger)-1] = 0; |
979 | len = strlen(trigger); | 979 | len = strlen(trigger); |
980 | p = strchr(trigger, '\n'); | 980 | p = strchr(trigger, '\n'); |
981 | if (*p) *p = 0; | 981 | if (*p) *p = 0; |
982 | return len; | 982 | return len; |
983 | } | 983 | } |
984 | 984 | ||
985 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 985 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); |
986 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 986 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
987 | ACCESSOR(check_interval,check_interval,mce_restart()) | 987 | ACCESSOR(check_interval,check_interval,mce_restart()) |
988 | static struct sysdev_attribute *mce_attributes[] = { | 988 | static struct sysdev_attribute *mce_attributes[] = { |
989 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, | 989 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, |
990 | NULL | 990 | NULL |
991 | }; | 991 | }; |
992 | 992 | ||
993 | static cpumask_var_t mce_device_initialized; | 993 | static cpumask_var_t mce_device_initialized; |
994 | 994 | ||
995 | /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ | 995 | /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ |
996 | static __cpuinit int mce_create_device(unsigned int cpu) | 996 | static __cpuinit int mce_create_device(unsigned int cpu) |
997 | { | 997 | { |
998 | int err; | 998 | int err; |
999 | int i; | 999 | int i; |
1000 | 1000 | ||
1001 | if (!mce_available(&boot_cpu_data)) | 1001 | if (!mce_available(&boot_cpu_data)) |
1002 | return -EIO; | 1002 | return -EIO; |
1003 | 1003 | ||
1004 | memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); | 1004 | memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); |
1005 | per_cpu(device_mce,cpu).id = cpu; | 1005 | per_cpu(device_mce,cpu).id = cpu; |
1006 | per_cpu(device_mce,cpu).cls = &mce_sysclass; | 1006 | per_cpu(device_mce,cpu).cls = &mce_sysclass; |
1007 | 1007 | ||
1008 | err = sysdev_register(&per_cpu(device_mce,cpu)); | 1008 | err = sysdev_register(&per_cpu(device_mce,cpu)); |
1009 | if (err) | 1009 | if (err) |
1010 | return err; | 1010 | return err; |
1011 | 1011 | ||
1012 | for (i = 0; mce_attributes[i]; i++) { | 1012 | for (i = 0; mce_attributes[i]; i++) { |
1013 | err = sysdev_create_file(&per_cpu(device_mce,cpu), | 1013 | err = sysdev_create_file(&per_cpu(device_mce,cpu), |
1014 | mce_attributes[i]); | 1014 | mce_attributes[i]); |
1015 | if (err) | 1015 | if (err) |
1016 | goto error; | 1016 | goto error; |
1017 | } | 1017 | } |
1018 | for (i = 0; i < banks; i++) { | 1018 | for (i = 0; i < banks; i++) { |
1019 | err = sysdev_create_file(&per_cpu(device_mce, cpu), | 1019 | err = sysdev_create_file(&per_cpu(device_mce, cpu), |
1020 | &bank_attrs[i]); | 1020 | &bank_attrs[i]); |
1021 | if (err) | 1021 | if (err) |
1022 | goto error2; | 1022 | goto error2; |
1023 | } | 1023 | } |
1024 | cpumask_set_cpu(cpu, mce_device_initialized); | 1024 | cpumask_set_cpu(cpu, mce_device_initialized); |
1025 | 1025 | ||
1026 | return 0; | 1026 | return 0; |
1027 | error2: | 1027 | error2: |
1028 | while (--i >= 0) { | 1028 | while (--i >= 0) { |
1029 | sysdev_remove_file(&per_cpu(device_mce, cpu), | 1029 | sysdev_remove_file(&per_cpu(device_mce, cpu), |
1030 | &bank_attrs[i]); | 1030 | &bank_attrs[i]); |
1031 | } | 1031 | } |
1032 | error: | 1032 | error: |
1033 | while (--i >= 0) { | 1033 | while (--i >= 0) { |
1034 | sysdev_remove_file(&per_cpu(device_mce,cpu), | 1034 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
1035 | mce_attributes[i]); | 1035 | mce_attributes[i]); |
1036 | } | 1036 | } |
1037 | sysdev_unregister(&per_cpu(device_mce,cpu)); | 1037 | sysdev_unregister(&per_cpu(device_mce,cpu)); |
1038 | 1038 | ||
1039 | return err; | 1039 | return err; |
1040 | } | 1040 | } |
1041 | 1041 | ||
1042 | static __cpuinit void mce_remove_device(unsigned int cpu) | 1042 | static __cpuinit void mce_remove_device(unsigned int cpu) |
1043 | { | 1043 | { |
1044 | int i; | 1044 | int i; |
1045 | 1045 | ||
1046 | if (!cpumask_test_cpu(cpu, mce_device_initialized)) | 1046 | if (!cpumask_test_cpu(cpu, mce_device_initialized)) |
1047 | return; | 1047 | return; |
1048 | 1048 | ||
1049 | for (i = 0; mce_attributes[i]; i++) | 1049 | for (i = 0; mce_attributes[i]; i++) |
1050 | sysdev_remove_file(&per_cpu(device_mce,cpu), | 1050 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
1051 | mce_attributes[i]); | 1051 | mce_attributes[i]); |
1052 | for (i = 0; i < banks; i++) | 1052 | for (i = 0; i < banks; i++) |
1053 | sysdev_remove_file(&per_cpu(device_mce, cpu), | 1053 | sysdev_remove_file(&per_cpu(device_mce, cpu), |
1054 | &bank_attrs[i]); | 1054 | &bank_attrs[i]); |
1055 | sysdev_unregister(&per_cpu(device_mce,cpu)); | 1055 | sysdev_unregister(&per_cpu(device_mce,cpu)); |
1056 | cpumask_clear_cpu(cpu, mce_device_initialized); | 1056 | cpumask_clear_cpu(cpu, mce_device_initialized); |
1057 | } | 1057 | } |
1058 | 1058 | ||
1059 | /* Make sure there are no machine checks on offlined CPUs. */ | 1059 | /* Make sure there are no machine checks on offlined CPUs. */ |
1060 | static void mce_disable_cpu(void *h) | 1060 | static void mce_disable_cpu(void *h) |
1061 | { | 1061 | { |
1062 | int i; | 1062 | int i; |
1063 | unsigned long action = *(unsigned long *)h; | 1063 | unsigned long action = *(unsigned long *)h; |
1064 | 1064 | ||
1065 | if (!mce_available(¤t_cpu_data)) | 1065 | if (!mce_available(¤t_cpu_data)) |
1066 | return; | 1066 | return; |
1067 | if (!(action & CPU_TASKS_FROZEN)) | 1067 | if (!(action & CPU_TASKS_FROZEN)) |
1068 | cmci_clear(); | 1068 | cmci_clear(); |
1069 | for (i = 0; i < banks; i++) | 1069 | for (i = 0; i < banks; i++) |
1070 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | 1070 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); |
1071 | } | 1071 | } |
1072 | 1072 | ||
1073 | static void mce_reenable_cpu(void *h) | 1073 | static void mce_reenable_cpu(void *h) |
1074 | { | 1074 | { |
1075 | int i; | 1075 | int i; |
1076 | unsigned long action = *(unsigned long *)h; | 1076 | unsigned long action = *(unsigned long *)h; |
1077 | 1077 | ||
1078 | if (!mce_available(¤t_cpu_data)) | 1078 | if (!mce_available(¤t_cpu_data)) |
1079 | return; | 1079 | return; |
1080 | if (!(action & CPU_TASKS_FROZEN)) | 1080 | if (!(action & CPU_TASKS_FROZEN)) |
1081 | cmci_reenable(); | 1081 | cmci_reenable(); |
1082 | for (i = 0; i < banks; i++) | 1082 | for (i = 0; i < banks; i++) |
1083 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | 1083 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); |
1084 | } | 1084 | } |
1085 | 1085 | ||
1086 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | 1086 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ |
1087 | static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, | 1087 | static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, |
1088 | unsigned long action, void *hcpu) | 1088 | unsigned long action, void *hcpu) |
1089 | { | 1089 | { |
1090 | unsigned int cpu = (unsigned long)hcpu; | 1090 | unsigned int cpu = (unsigned long)hcpu; |
1091 | struct timer_list *t = &per_cpu(mce_timer, cpu); | 1091 | struct timer_list *t = &per_cpu(mce_timer, cpu); |
1092 | 1092 | ||
1093 | switch (action) { | 1093 | switch (action) { |
1094 | case CPU_ONLINE: | 1094 | case CPU_ONLINE: |
1095 | case CPU_ONLINE_FROZEN: | 1095 | case CPU_ONLINE_FROZEN: |
1096 | mce_create_device(cpu); | 1096 | mce_create_device(cpu); |
1097 | if (threshold_cpu_callback) | 1097 | if (threshold_cpu_callback) |
1098 | threshold_cpu_callback(action, cpu); | 1098 | threshold_cpu_callback(action, cpu); |
1099 | break; | 1099 | break; |
1100 | case CPU_DEAD: | 1100 | case CPU_DEAD: |
1101 | case CPU_DEAD_FROZEN: | 1101 | case CPU_DEAD_FROZEN: |
1102 | if (threshold_cpu_callback) | 1102 | if (threshold_cpu_callback) |
1103 | threshold_cpu_callback(action, cpu); | 1103 | threshold_cpu_callback(action, cpu); |
1104 | mce_remove_device(cpu); | 1104 | mce_remove_device(cpu); |
1105 | break; | 1105 | break; |
1106 | case CPU_DOWN_PREPARE: | 1106 | case CPU_DOWN_PREPARE: |
1107 | case CPU_DOWN_PREPARE_FROZEN: | 1107 | case CPU_DOWN_PREPARE_FROZEN: |
1108 | del_timer_sync(t); | 1108 | del_timer_sync(t); |
1109 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | 1109 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); |
1110 | break; | 1110 | break; |
1111 | case CPU_DOWN_FAILED: | 1111 | case CPU_DOWN_FAILED: |
1112 | case CPU_DOWN_FAILED_FROZEN: | 1112 | case CPU_DOWN_FAILED_FROZEN: |
1113 | t->expires = round_jiffies(jiffies + | 1113 | t->expires = round_jiffies(jiffies + |
1114 | __get_cpu_var(next_interval)); | 1114 | __get_cpu_var(next_interval)); |
1115 | add_timer_on(t, cpu); | 1115 | add_timer_on(t, cpu); |
1116 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 1116 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
1117 | break; | 1117 | break; |
1118 | case CPU_POST_DEAD: | 1118 | case CPU_POST_DEAD: |
1119 | /* intentionally ignoring frozen here */ | 1119 | /* intentionally ignoring frozen here */ |
1120 | cmci_rediscover(cpu); | 1120 | cmci_rediscover(cpu); |
1121 | break; | 1121 | break; |
1122 | } | 1122 | } |
1123 | return NOTIFY_OK; | 1123 | return NOTIFY_OK; |
1124 | } | 1124 | } |
1125 | 1125 | ||
1126 | static struct notifier_block mce_cpu_notifier __cpuinitdata = { | 1126 | static struct notifier_block mce_cpu_notifier __cpuinitdata = { |
1127 | .notifier_call = mce_cpu_callback, | 1127 | .notifier_call = mce_cpu_callback, |
1128 | }; | 1128 | }; |
1129 | 1129 | ||
1130 | static __init int mce_init_banks(void) | 1130 | static __init int mce_init_banks(void) |
1131 | { | 1131 | { |
1132 | int i; | 1132 | int i; |
1133 | 1133 | ||
1134 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | 1134 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, |
1135 | GFP_KERNEL); | 1135 | GFP_KERNEL); |
1136 | if (!bank_attrs) | 1136 | if (!bank_attrs) |
1137 | return -ENOMEM; | 1137 | return -ENOMEM; |
1138 | 1138 | ||
1139 | for (i = 0; i < banks; i++) { | 1139 | for (i = 0; i < banks; i++) { |
1140 | struct sysdev_attribute *a = &bank_attrs[i]; | 1140 | struct sysdev_attribute *a = &bank_attrs[i]; |
1141 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | 1141 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); |
1142 | if (!a->attr.name) | 1142 | if (!a->attr.name) |
1143 | goto nomem; | 1143 | goto nomem; |
1144 | a->attr.mode = 0644; | 1144 | a->attr.mode = 0644; |
1145 | a->show = show_bank; | 1145 | a->show = show_bank; |
1146 | a->store = set_bank; | 1146 | a->store = set_bank; |
1147 | } | 1147 | } |
1148 | return 0; | 1148 | return 0; |
1149 | 1149 | ||
1150 | nomem: | 1150 | nomem: |
1151 | while (--i >= 0) | 1151 | while (--i >= 0) |
1152 | kfree(bank_attrs[i].attr.name); | 1152 | kfree(bank_attrs[i].attr.name); |
1153 | kfree(bank_attrs); | 1153 | kfree(bank_attrs); |
1154 | bank_attrs = NULL; | 1154 | bank_attrs = NULL; |
1155 | return -ENOMEM; | 1155 | return -ENOMEM; |
1156 | } | 1156 | } |
1157 | 1157 | ||
1158 | static __init int mce_init_device(void) | 1158 | static __init int mce_init_device(void) |
1159 | { | 1159 | { |
1160 | int err; | 1160 | int err; |
1161 | int i = 0; | 1161 | int i = 0; |
1162 | 1162 | ||
1163 | if (!mce_available(&boot_cpu_data)) | 1163 | if (!mce_available(&boot_cpu_data)) |
1164 | return -EIO; | 1164 | return -EIO; |
1165 | 1165 | ||
1166 | alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); | 1166 | zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); |
1167 | 1167 | ||
1168 | err = mce_init_banks(); | 1168 | err = mce_init_banks(); |
1169 | if (err) | 1169 | if (err) |
1170 | return err; | 1170 | return err; |
1171 | 1171 | ||
1172 | err = sysdev_class_register(&mce_sysclass); | 1172 | err = sysdev_class_register(&mce_sysclass); |
1173 | if (err) | 1173 | if (err) |
1174 | return err; | 1174 | return err; |
1175 | 1175 | ||
1176 | for_each_online_cpu(i) { | 1176 | for_each_online_cpu(i) { |
1177 | err = mce_create_device(i); | 1177 | err = mce_create_device(i); |
1178 | if (err) | 1178 | if (err) |
1179 | return err; | 1179 | return err; |
1180 | } | 1180 | } |
1181 | 1181 | ||
1182 | register_hotcpu_notifier(&mce_cpu_notifier); | 1182 | register_hotcpu_notifier(&mce_cpu_notifier); |
1183 | misc_register(&mce_log_device); | 1183 | misc_register(&mce_log_device); |
1184 | return err; | 1184 | return err; |
1185 | } | 1185 | } |
1186 | 1186 | ||
1187 | device_initcall(mce_init_device); | 1187 | device_initcall(mce_init_device); |
1188 | 1188 |
arch/x86/kernel/tlb_uv.c
1 | /* | 1 | /* |
2 | * SGI UltraViolet TLB flush routines. | 2 | * SGI UltraViolet TLB flush routines. |
3 | * | 3 | * |
4 | * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI. | 4 | * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI. |
5 | * | 5 | * |
6 | * This code is released under the GNU General Public License version 2 or | 6 | * This code is released under the GNU General Public License version 2 or |
7 | * later. | 7 | * later. |
8 | */ | 8 | */ |
9 | #include <linux/seq_file.h> | 9 | #include <linux/seq_file.h> |
10 | #include <linux/proc_fs.h> | 10 | #include <linux/proc_fs.h> |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | 12 | ||
13 | #include <asm/mmu_context.h> | 13 | #include <asm/mmu_context.h> |
14 | #include <asm/uv/uv.h> | 14 | #include <asm/uv/uv.h> |
15 | #include <asm/uv/uv_mmrs.h> | 15 | #include <asm/uv/uv_mmrs.h> |
16 | #include <asm/uv/uv_hub.h> | 16 | #include <asm/uv/uv_hub.h> |
17 | #include <asm/uv/uv_bau.h> | 17 | #include <asm/uv/uv_bau.h> |
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | #include <asm/idle.h> | 19 | #include <asm/idle.h> |
20 | #include <asm/tsc.h> | 20 | #include <asm/tsc.h> |
21 | #include <asm/irq_vectors.h> | 21 | #include <asm/irq_vectors.h> |
22 | 22 | ||
23 | static struct bau_control **uv_bau_table_bases __read_mostly; | 23 | static struct bau_control **uv_bau_table_bases __read_mostly; |
24 | static int uv_bau_retry_limit __read_mostly; | 24 | static int uv_bau_retry_limit __read_mostly; |
25 | 25 | ||
26 | /* position of pnode (which is nasid>>1): */ | 26 | /* position of pnode (which is nasid>>1): */ |
27 | static int uv_nshift __read_mostly; | 27 | static int uv_nshift __read_mostly; |
28 | /* base pnode in this partition */ | 28 | /* base pnode in this partition */ |
29 | static int uv_partition_base_pnode __read_mostly; | 29 | static int uv_partition_base_pnode __read_mostly; |
30 | 30 | ||
31 | static unsigned long uv_mmask __read_mostly; | 31 | static unsigned long uv_mmask __read_mostly; |
32 | 32 | ||
33 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | 33 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); |
34 | static DEFINE_PER_CPU(struct bau_control, bau_control); | 34 | static DEFINE_PER_CPU(struct bau_control, bau_control); |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * Determine the first node on a blade. | 37 | * Determine the first node on a blade. |
38 | */ | 38 | */ |
39 | static int __init blade_to_first_node(int blade) | 39 | static int __init blade_to_first_node(int blade) |
40 | { | 40 | { |
41 | int node, b; | 41 | int node, b; |
42 | 42 | ||
43 | for_each_online_node(node) { | 43 | for_each_online_node(node) { |
44 | b = uv_node_to_blade_id(node); | 44 | b = uv_node_to_blade_id(node); |
45 | if (blade == b) | 45 | if (blade == b) |
46 | return node; | 46 | return node; |
47 | } | 47 | } |
48 | return -1; /* shouldn't happen */ | 48 | return -1; /* shouldn't happen */ |
49 | } | 49 | } |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * Determine the apicid of the first cpu on a blade. | 52 | * Determine the apicid of the first cpu on a blade. |
53 | */ | 53 | */ |
54 | static int __init blade_to_first_apicid(int blade) | 54 | static int __init blade_to_first_apicid(int blade) |
55 | { | 55 | { |
56 | int cpu; | 56 | int cpu; |
57 | 57 | ||
58 | for_each_present_cpu(cpu) | 58 | for_each_present_cpu(cpu) |
59 | if (blade == uv_cpu_to_blade_id(cpu)) | 59 | if (blade == uv_cpu_to_blade_id(cpu)) |
60 | return per_cpu(x86_cpu_to_apicid, cpu); | 60 | return per_cpu(x86_cpu_to_apicid, cpu); |
61 | return -1; | 61 | return -1; |
62 | } | 62 | } |
63 | 63 | ||
64 | /* | 64 | /* |
65 | * Free a software acknowledge hardware resource by clearing its Pending | 65 | * Free a software acknowledge hardware resource by clearing its Pending |
66 | * bit. This will return a reply to the sender. | 66 | * bit. This will return a reply to the sender. |
67 | * If the message has timed out, a reply has already been sent by the | 67 | * If the message has timed out, a reply has already been sent by the |
68 | * hardware but the resource has not been released. In that case our | 68 | * hardware but the resource has not been released. In that case our |
69 | * clear of the Timeout bit (as well) will free the resource. No reply will | 69 | * clear of the Timeout bit (as well) will free the resource. No reply will |
70 | * be sent (the hardware will only do one reply per message). | 70 | * be sent (the hardware will only do one reply per message). |
71 | */ | 71 | */ |
72 | static void uv_reply_to_message(int resource, | 72 | static void uv_reply_to_message(int resource, |
73 | struct bau_payload_queue_entry *msg, | 73 | struct bau_payload_queue_entry *msg, |
74 | struct bau_msg_status *msp) | 74 | struct bau_msg_status *msp) |
75 | { | 75 | { |
76 | unsigned long dw; | 76 | unsigned long dw; |
77 | 77 | ||
78 | dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); | 78 | dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); |
79 | msg->replied_to = 1; | 79 | msg->replied_to = 1; |
80 | msg->sw_ack_vector = 0; | 80 | msg->sw_ack_vector = 0; |
81 | if (msp) | 81 | if (msp) |
82 | msp->seen_by.bits = 0; | 82 | msp->seen_by.bits = 0; |
83 | uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); | 83 | uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); |
84 | } | 84 | } |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Do all the things a cpu should do for a TLB shootdown message. | 87 | * Do all the things a cpu should do for a TLB shootdown message. |
88 | * Other cpu's may come here at the same time for this message. | 88 | * Other cpu's may come here at the same time for this message. |
89 | */ | 89 | */ |
90 | static void uv_bau_process_message(struct bau_payload_queue_entry *msg, | 90 | static void uv_bau_process_message(struct bau_payload_queue_entry *msg, |
91 | int msg_slot, int sw_ack_slot) | 91 | int msg_slot, int sw_ack_slot) |
92 | { | 92 | { |
93 | unsigned long this_cpu_mask; | 93 | unsigned long this_cpu_mask; |
94 | struct bau_msg_status *msp; | 94 | struct bau_msg_status *msp; |
95 | int cpu; | 95 | int cpu; |
96 | 96 | ||
97 | msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; | 97 | msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; |
98 | cpu = uv_blade_processor_id(); | 98 | cpu = uv_blade_processor_id(); |
99 | msg->number_of_cpus = | 99 | msg->number_of_cpus = |
100 | uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); | 100 | uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); |
101 | this_cpu_mask = 1UL << cpu; | 101 | this_cpu_mask = 1UL << cpu; |
102 | if (msp->seen_by.bits & this_cpu_mask) | 102 | if (msp->seen_by.bits & this_cpu_mask) |
103 | return; | 103 | return; |
104 | atomic_or_long(&msp->seen_by.bits, this_cpu_mask); | 104 | atomic_or_long(&msp->seen_by.bits, this_cpu_mask); |
105 | 105 | ||
106 | if (msg->replied_to == 1) | 106 | if (msg->replied_to == 1) |
107 | return; | 107 | return; |
108 | 108 | ||
109 | if (msg->address == TLB_FLUSH_ALL) { | 109 | if (msg->address == TLB_FLUSH_ALL) { |
110 | local_flush_tlb(); | 110 | local_flush_tlb(); |
111 | __get_cpu_var(ptcstats).alltlb++; | 111 | __get_cpu_var(ptcstats).alltlb++; |
112 | } else { | 112 | } else { |
113 | __flush_tlb_one(msg->address); | 113 | __flush_tlb_one(msg->address); |
114 | __get_cpu_var(ptcstats).onetlb++; | 114 | __get_cpu_var(ptcstats).onetlb++; |
115 | } | 115 | } |
116 | 116 | ||
117 | __get_cpu_var(ptcstats).requestee++; | 117 | __get_cpu_var(ptcstats).requestee++; |
118 | 118 | ||
119 | atomic_inc_short(&msg->acknowledge_count); | 119 | atomic_inc_short(&msg->acknowledge_count); |
120 | if (msg->number_of_cpus == msg->acknowledge_count) | 120 | if (msg->number_of_cpus == msg->acknowledge_count) |
121 | uv_reply_to_message(sw_ack_slot, msg, msp); | 121 | uv_reply_to_message(sw_ack_slot, msg, msp); |
122 | } | 122 | } |
123 | 123 | ||
124 | /* | 124 | /* |
125 | * Examine the payload queue on one distribution node to see | 125 | * Examine the payload queue on one distribution node to see |
126 | * which messages have not been seen, and which cpu(s) have not seen them. | 126 | * which messages have not been seen, and which cpu(s) have not seen them. |
127 | * | 127 | * |
128 | * Returns the number of cpu's that have not responded. | 128 | * Returns the number of cpu's that have not responded. |
129 | */ | 129 | */ |
130 | static int uv_examine_destination(struct bau_control *bau_tablesp, int sender) | 130 | static int uv_examine_destination(struct bau_control *bau_tablesp, int sender) |
131 | { | 131 | { |
132 | struct bau_payload_queue_entry *msg; | 132 | struct bau_payload_queue_entry *msg; |
133 | struct bau_msg_status *msp; | 133 | struct bau_msg_status *msp; |
134 | int count = 0; | 134 | int count = 0; |
135 | int i; | 135 | int i; |
136 | int j; | 136 | int j; |
137 | 137 | ||
138 | for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE; | 138 | for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE; |
139 | msg++, i++) { | 139 | msg++, i++) { |
140 | if ((msg->sending_cpu == sender) && (!msg->replied_to)) { | 140 | if ((msg->sending_cpu == sender) && (!msg->replied_to)) { |
141 | msp = bau_tablesp->msg_statuses + i; | 141 | msp = bau_tablesp->msg_statuses + i; |
142 | printk(KERN_DEBUG | 142 | printk(KERN_DEBUG |
143 | "blade %d: address:%#lx %d of %d, not cpu(s): ", | 143 | "blade %d: address:%#lx %d of %d, not cpu(s): ", |
144 | i, msg->address, msg->acknowledge_count, | 144 | i, msg->address, msg->acknowledge_count, |
145 | msg->number_of_cpus); | 145 | msg->number_of_cpus); |
146 | for (j = 0; j < msg->number_of_cpus; j++) { | 146 | for (j = 0; j < msg->number_of_cpus; j++) { |
147 | if (!((1L << j) & msp->seen_by.bits)) { | 147 | if (!((1L << j) & msp->seen_by.bits)) { |
148 | count++; | 148 | count++; |
149 | printk("%d ", j); | 149 | printk("%d ", j); |
150 | } | 150 | } |
151 | } | 151 | } |
152 | printk("\n"); | 152 | printk("\n"); |
153 | } | 153 | } |
154 | } | 154 | } |
155 | return count; | 155 | return count; |
156 | } | 156 | } |
157 | 157 | ||
158 | /* | 158 | /* |
159 | * Examine the payload queue on all the distribution nodes to see | 159 | * Examine the payload queue on all the distribution nodes to see |
160 | * which messages have not been seen, and which cpu(s) have not seen them. | 160 | * which messages have not been seen, and which cpu(s) have not seen them. |
161 | * | 161 | * |
162 | * Returns the number of cpu's that have not responded. | 162 | * Returns the number of cpu's that have not responded. |
163 | */ | 163 | */ |
164 | static int uv_examine_destinations(struct bau_target_nodemask *distribution) | 164 | static int uv_examine_destinations(struct bau_target_nodemask *distribution) |
165 | { | 165 | { |
166 | int sender; | 166 | int sender; |
167 | int i; | 167 | int i; |
168 | int count = 0; | 168 | int count = 0; |
169 | 169 | ||
170 | sender = smp_processor_id(); | 170 | sender = smp_processor_id(); |
171 | for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) { | 171 | for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) { |
172 | if (!bau_node_isset(i, distribution)) | 172 | if (!bau_node_isset(i, distribution)) |
173 | continue; | 173 | continue; |
174 | count += uv_examine_destination(uv_bau_table_bases[i], sender); | 174 | count += uv_examine_destination(uv_bau_table_bases[i], sender); |
175 | } | 175 | } |
176 | return count; | 176 | return count; |
177 | } | 177 | } |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * wait for completion of a broadcast message | 180 | * wait for completion of a broadcast message |
181 | * | 181 | * |
182 | * return COMPLETE, RETRY or GIVEUP | 182 | * return COMPLETE, RETRY or GIVEUP |
183 | */ | 183 | */ |
184 | static int uv_wait_completion(struct bau_desc *bau_desc, | 184 | static int uv_wait_completion(struct bau_desc *bau_desc, |
185 | unsigned long mmr_offset, int right_shift) | 185 | unsigned long mmr_offset, int right_shift) |
186 | { | 186 | { |
187 | int exams = 0; | 187 | int exams = 0; |
188 | long destination_timeouts = 0; | 188 | long destination_timeouts = 0; |
189 | long source_timeouts = 0; | 189 | long source_timeouts = 0; |
190 | unsigned long descriptor_status; | 190 | unsigned long descriptor_status; |
191 | 191 | ||
192 | while ((descriptor_status = (((unsigned long) | 192 | while ((descriptor_status = (((unsigned long) |
193 | uv_read_local_mmr(mmr_offset) >> | 193 | uv_read_local_mmr(mmr_offset) >> |
194 | right_shift) & UV_ACT_STATUS_MASK)) != | 194 | right_shift) & UV_ACT_STATUS_MASK)) != |
195 | DESC_STATUS_IDLE) { | 195 | DESC_STATUS_IDLE) { |
196 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { | 196 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { |
197 | source_timeouts++; | 197 | source_timeouts++; |
198 | if (source_timeouts > SOURCE_TIMEOUT_LIMIT) | 198 | if (source_timeouts > SOURCE_TIMEOUT_LIMIT) |
199 | source_timeouts = 0; | 199 | source_timeouts = 0; |
200 | __get_cpu_var(ptcstats).s_retry++; | 200 | __get_cpu_var(ptcstats).s_retry++; |
201 | return FLUSH_RETRY; | 201 | return FLUSH_RETRY; |
202 | } | 202 | } |
203 | /* | 203 | /* |
204 | * spin here looking for progress at the destinations | 204 | * spin here looking for progress at the destinations |
205 | */ | 205 | */ |
206 | if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) { | 206 | if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) { |
207 | destination_timeouts++; | 207 | destination_timeouts++; |
208 | if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) { | 208 | if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) { |
209 | /* | 209 | /* |
210 | * returns number of cpus not responding | 210 | * returns number of cpus not responding |
211 | */ | 211 | */ |
212 | if (uv_examine_destinations | 212 | if (uv_examine_destinations |
213 | (&bau_desc->distribution) == 0) { | 213 | (&bau_desc->distribution) == 0) { |
214 | __get_cpu_var(ptcstats).d_retry++; | 214 | __get_cpu_var(ptcstats).d_retry++; |
215 | return FLUSH_RETRY; | 215 | return FLUSH_RETRY; |
216 | } | 216 | } |
217 | exams++; | 217 | exams++; |
218 | if (exams >= uv_bau_retry_limit) { | 218 | if (exams >= uv_bau_retry_limit) { |
219 | printk(KERN_DEBUG | 219 | printk(KERN_DEBUG |
220 | "uv_flush_tlb_others"); | 220 | "uv_flush_tlb_others"); |
221 | printk("giving up on cpu %d\n", | 221 | printk("giving up on cpu %d\n", |
222 | smp_processor_id()); | 222 | smp_processor_id()); |
223 | return FLUSH_GIVEUP; | 223 | return FLUSH_GIVEUP; |
224 | } | 224 | } |
225 | /* | 225 | /* |
226 | * delays can hang the simulator | 226 | * delays can hang the simulator |
227 | udelay(1000); | 227 | udelay(1000); |
228 | */ | 228 | */ |
229 | destination_timeouts = 0; | 229 | destination_timeouts = 0; |
230 | } | 230 | } |
231 | } | 231 | } |
232 | cpu_relax(); | 232 | cpu_relax(); |
233 | } | 233 | } |
234 | return FLUSH_COMPLETE; | 234 | return FLUSH_COMPLETE; |
235 | } | 235 | } |
236 | 236 | ||
237 | /** | 237 | /** |
238 | * uv_flush_send_and_wait | 238 | * uv_flush_send_and_wait |
239 | * | 239 | * |
240 | * Send a broadcast and wait for a broadcast message to complete. | 240 | * Send a broadcast and wait for a broadcast message to complete. |
241 | * | 241 | * |
242 | * The flush_mask contains the cpus the broadcast was sent to. | 242 | * The flush_mask contains the cpus the broadcast was sent to. |
243 | * | 243 | * |
244 | * Returns NULL if all remote flushing was done. The mask is zeroed. | 244 | * Returns NULL if all remote flushing was done. The mask is zeroed. |
245 | * Returns @flush_mask if some remote flushing remains to be done. The | 245 | * Returns @flush_mask if some remote flushing remains to be done. The |
246 | * mask will have some bits still set. | 246 | * mask will have some bits still set. |
247 | */ | 247 | */ |
248 | const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | 248 | const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, |
249 | struct bau_desc *bau_desc, | 249 | struct bau_desc *bau_desc, |
250 | struct cpumask *flush_mask) | 250 | struct cpumask *flush_mask) |
251 | { | 251 | { |
252 | int completion_status = 0; | 252 | int completion_status = 0; |
253 | int right_shift; | 253 | int right_shift; |
254 | int tries = 0; | 254 | int tries = 0; |
255 | int pnode; | 255 | int pnode; |
256 | int bit; | 256 | int bit; |
257 | unsigned long mmr_offset; | 257 | unsigned long mmr_offset; |
258 | unsigned long index; | 258 | unsigned long index; |
259 | cycles_t time1; | 259 | cycles_t time1; |
260 | cycles_t time2; | 260 | cycles_t time2; |
261 | 261 | ||
262 | if (cpu < UV_CPUS_PER_ACT_STATUS) { | 262 | if (cpu < UV_CPUS_PER_ACT_STATUS) { |
263 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | 263 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; |
264 | right_shift = cpu * UV_ACT_STATUS_SIZE; | 264 | right_shift = cpu * UV_ACT_STATUS_SIZE; |
265 | } else { | 265 | } else { |
266 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; | 266 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; |
267 | right_shift = | 267 | right_shift = |
268 | ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); | 268 | ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); |
269 | } | 269 | } |
270 | time1 = get_cycles(); | 270 | time1 = get_cycles(); |
271 | do { | 271 | do { |
272 | tries++; | 272 | tries++; |
273 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | 273 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | |
274 | cpu; | 274 | cpu; |
275 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | 275 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); |
276 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | 276 | completion_status = uv_wait_completion(bau_desc, mmr_offset, |
277 | right_shift); | 277 | right_shift); |
278 | } while (completion_status == FLUSH_RETRY); | 278 | } while (completion_status == FLUSH_RETRY); |
279 | time2 = get_cycles(); | 279 | time2 = get_cycles(); |
280 | __get_cpu_var(ptcstats).sflush += (time2 - time1); | 280 | __get_cpu_var(ptcstats).sflush += (time2 - time1); |
281 | if (tries > 1) | 281 | if (tries > 1) |
282 | __get_cpu_var(ptcstats).retriesok++; | 282 | __get_cpu_var(ptcstats).retriesok++; |
283 | 283 | ||
284 | if (completion_status == FLUSH_GIVEUP) { | 284 | if (completion_status == FLUSH_GIVEUP) { |
285 | /* | 285 | /* |
286 | * Cause the caller to do an IPI-style TLB shootdown on | 286 | * Cause the caller to do an IPI-style TLB shootdown on |
287 | * the cpu's, all of which are still in the mask. | 287 | * the cpu's, all of which are still in the mask. |
288 | */ | 288 | */ |
289 | __get_cpu_var(ptcstats).ptc_i++; | 289 | __get_cpu_var(ptcstats).ptc_i++; |
290 | return flush_mask; | 290 | return flush_mask; |
291 | } | 291 | } |
292 | 292 | ||
293 | /* | 293 | /* |
294 | * Success, so clear the remote cpu's from the mask so we don't | 294 | * Success, so clear the remote cpu's from the mask so we don't |
295 | * use the IPI method of shootdown on them. | 295 | * use the IPI method of shootdown on them. |
296 | */ | 296 | */ |
297 | for_each_cpu(bit, flush_mask) { | 297 | for_each_cpu(bit, flush_mask) { |
298 | pnode = uv_cpu_to_pnode(bit); | 298 | pnode = uv_cpu_to_pnode(bit); |
299 | if (pnode == this_pnode) | 299 | if (pnode == this_pnode) |
300 | continue; | 300 | continue; |
301 | cpumask_clear_cpu(bit, flush_mask); | 301 | cpumask_clear_cpu(bit, flush_mask); |
302 | } | 302 | } |
303 | if (!cpumask_empty(flush_mask)) | 303 | if (!cpumask_empty(flush_mask)) |
304 | return flush_mask; | 304 | return flush_mask; |
305 | return NULL; | 305 | return NULL; |
306 | } | 306 | } |
307 | 307 | ||
308 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | 308 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); |
309 | 309 | ||
310 | /** | 310 | /** |
311 | * uv_flush_tlb_others - globally purge translation cache of a virtual | 311 | * uv_flush_tlb_others - globally purge translation cache of a virtual |
312 | * address or all TLB's | 312 | * address or all TLB's |
313 | * @cpumask: mask of all cpu's in which the address is to be removed | 313 | * @cpumask: mask of all cpu's in which the address is to be removed |
314 | * @mm: mm_struct containing virtual address range | 314 | * @mm: mm_struct containing virtual address range |
315 | * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) | 315 | * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) |
316 | * @cpu: the current cpu | 316 | * @cpu: the current cpu |
317 | * | 317 | * |
318 | * This is the entry point for initiating any UV global TLB shootdown. | 318 | * This is the entry point for initiating any UV global TLB shootdown. |
319 | * | 319 | * |
320 | * Purges the translation caches of all specified processors of the given | 320 | * Purges the translation caches of all specified processors of the given |
321 | * virtual address, or purges all TLB's on specified processors. | 321 | * virtual address, or purges all TLB's on specified processors. |
322 | * | 322 | * |
323 | * The caller has derived the cpumask from the mm_struct. This function | 323 | * The caller has derived the cpumask from the mm_struct. This function |
324 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) | 324 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) |
325 | * | 325 | * |
326 | * The cpumask is converted into a nodemask of the nodes containing | 326 | * The cpumask is converted into a nodemask of the nodes containing |
327 | * the cpus. | 327 | * the cpus. |
328 | * | 328 | * |
329 | * Note that this function should be called with preemption disabled. | 329 | * Note that this function should be called with preemption disabled. |
330 | * | 330 | * |
331 | * Returns NULL if all remote flushing was done. | 331 | * Returns NULL if all remote flushing was done. |
332 | * Returns pointer to cpumask if some remote flushing remains to be | 332 | * Returns pointer to cpumask if some remote flushing remains to be |
333 | * done. The returned pointer is valid till preemption is re-enabled. | 333 | * done. The returned pointer is valid till preemption is re-enabled. |
334 | */ | 334 | */ |
335 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | 335 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
336 | struct mm_struct *mm, | 336 | struct mm_struct *mm, |
337 | unsigned long va, unsigned int cpu) | 337 | unsigned long va, unsigned int cpu) |
338 | { | 338 | { |
339 | struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); | 339 | struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); |
340 | int i; | 340 | int i; |
341 | int bit; | 341 | int bit; |
342 | int pnode; | 342 | int pnode; |
343 | int uv_cpu; | 343 | int uv_cpu; |
344 | int this_pnode; | 344 | int this_pnode; |
345 | int locals = 0; | 345 | int locals = 0; |
346 | struct bau_desc *bau_desc; | 346 | struct bau_desc *bau_desc; |
347 | 347 | ||
348 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | 348 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); |
349 | 349 | ||
350 | uv_cpu = uv_blade_processor_id(); | 350 | uv_cpu = uv_blade_processor_id(); |
351 | this_pnode = uv_hub_info->pnode; | 351 | this_pnode = uv_hub_info->pnode; |
352 | bau_desc = __get_cpu_var(bau_control).descriptor_base; | 352 | bau_desc = __get_cpu_var(bau_control).descriptor_base; |
353 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; | 353 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; |
354 | 354 | ||
355 | bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 355 | bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
356 | 356 | ||
357 | i = 0; | 357 | i = 0; |
358 | for_each_cpu(bit, flush_mask) { | 358 | for_each_cpu(bit, flush_mask) { |
359 | pnode = uv_cpu_to_pnode(bit); | 359 | pnode = uv_cpu_to_pnode(bit); |
360 | BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1)); | 360 | BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1)); |
361 | if (pnode == this_pnode) { | 361 | if (pnode == this_pnode) { |
362 | locals++; | 362 | locals++; |
363 | continue; | 363 | continue; |
364 | } | 364 | } |
365 | bau_node_set(pnode - uv_partition_base_pnode, | 365 | bau_node_set(pnode - uv_partition_base_pnode, |
366 | &bau_desc->distribution); | 366 | &bau_desc->distribution); |
367 | i++; | 367 | i++; |
368 | } | 368 | } |
369 | if (i == 0) { | 369 | if (i == 0) { |
370 | /* | 370 | /* |
371 | * no off_node flushing; return status for local node | 371 | * no off_node flushing; return status for local node |
372 | */ | 372 | */ |
373 | if (locals) | 373 | if (locals) |
374 | return flush_mask; | 374 | return flush_mask; |
375 | else | 375 | else |
376 | return NULL; | 376 | return NULL; |
377 | } | 377 | } |
378 | __get_cpu_var(ptcstats).requestor++; | 378 | __get_cpu_var(ptcstats).requestor++; |
379 | __get_cpu_var(ptcstats).ntargeted += i; | 379 | __get_cpu_var(ptcstats).ntargeted += i; |
380 | 380 | ||
381 | bau_desc->payload.address = va; | 381 | bau_desc->payload.address = va; |
382 | bau_desc->payload.sending_cpu = cpu; | 382 | bau_desc->payload.sending_cpu = cpu; |
383 | 383 | ||
384 | return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask); | 384 | return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask); |
385 | } | 385 | } |
386 | 386 | ||
387 | /* | 387 | /* |
388 | * The BAU message interrupt comes here. (registered by set_intr_gate) | 388 | * The BAU message interrupt comes here. (registered by set_intr_gate) |
389 | * See entry_64.S | 389 | * See entry_64.S |
390 | * | 390 | * |
391 | * We received a broadcast assist message. | 391 | * We received a broadcast assist message. |
392 | * | 392 | * |
393 | * Interrupts may have been disabled; this interrupt could represent | 393 | * Interrupts may have been disabled; this interrupt could represent |
394 | * the receipt of several messages. | 394 | * the receipt of several messages. |
395 | * | 395 | * |
396 | * All cores/threads on this node get this interrupt. | 396 | * All cores/threads on this node get this interrupt. |
397 | * The last one to see it does the s/w ack. | 397 | * The last one to see it does the s/w ack. |
398 | * (the resource will not be freed until noninterruptable cpus see this | 398 | * (the resource will not be freed until noninterruptable cpus see this |
399 | * interrupt; hardware will timeout the s/w ack and reply ERROR) | 399 | * interrupt; hardware will timeout the s/w ack and reply ERROR) |
400 | */ | 400 | */ |
401 | void uv_bau_message_interrupt(struct pt_regs *regs) | 401 | void uv_bau_message_interrupt(struct pt_regs *regs) |
402 | { | 402 | { |
403 | struct bau_payload_queue_entry *va_queue_first; | 403 | struct bau_payload_queue_entry *va_queue_first; |
404 | struct bau_payload_queue_entry *va_queue_last; | 404 | struct bau_payload_queue_entry *va_queue_last; |
405 | struct bau_payload_queue_entry *msg; | 405 | struct bau_payload_queue_entry *msg; |
406 | struct pt_regs *old_regs = set_irq_regs(regs); | 406 | struct pt_regs *old_regs = set_irq_regs(regs); |
407 | cycles_t time1; | 407 | cycles_t time1; |
408 | cycles_t time2; | 408 | cycles_t time2; |
409 | int msg_slot; | 409 | int msg_slot; |
410 | int sw_ack_slot; | 410 | int sw_ack_slot; |
411 | int fw; | 411 | int fw; |
412 | int count = 0; | 412 | int count = 0; |
413 | unsigned long local_pnode; | 413 | unsigned long local_pnode; |
414 | 414 | ||
415 | ack_APIC_irq(); | 415 | ack_APIC_irq(); |
416 | exit_idle(); | 416 | exit_idle(); |
417 | irq_enter(); | 417 | irq_enter(); |
418 | 418 | ||
419 | time1 = get_cycles(); | 419 | time1 = get_cycles(); |
420 | 420 | ||
421 | local_pnode = uv_blade_to_pnode(uv_numa_blade_id()); | 421 | local_pnode = uv_blade_to_pnode(uv_numa_blade_id()); |
422 | 422 | ||
423 | va_queue_first = __get_cpu_var(bau_control).va_queue_first; | 423 | va_queue_first = __get_cpu_var(bau_control).va_queue_first; |
424 | va_queue_last = __get_cpu_var(bau_control).va_queue_last; | 424 | va_queue_last = __get_cpu_var(bau_control).va_queue_last; |
425 | 425 | ||
426 | msg = __get_cpu_var(bau_control).bau_msg_head; | 426 | msg = __get_cpu_var(bau_control).bau_msg_head; |
427 | while (msg->sw_ack_vector) { | 427 | while (msg->sw_ack_vector) { |
428 | count++; | 428 | count++; |
429 | fw = msg->sw_ack_vector; | 429 | fw = msg->sw_ack_vector; |
430 | msg_slot = msg - va_queue_first; | 430 | msg_slot = msg - va_queue_first; |
431 | sw_ack_slot = ffs(fw) - 1; | 431 | sw_ack_slot = ffs(fw) - 1; |
432 | 432 | ||
433 | uv_bau_process_message(msg, msg_slot, sw_ack_slot); | 433 | uv_bau_process_message(msg, msg_slot, sw_ack_slot); |
434 | 434 | ||
435 | msg++; | 435 | msg++; |
436 | if (msg > va_queue_last) | 436 | if (msg > va_queue_last) |
437 | msg = va_queue_first; | 437 | msg = va_queue_first; |
438 | __get_cpu_var(bau_control).bau_msg_head = msg; | 438 | __get_cpu_var(bau_control).bau_msg_head = msg; |
439 | } | 439 | } |
440 | if (!count) | 440 | if (!count) |
441 | __get_cpu_var(ptcstats).nomsg++; | 441 | __get_cpu_var(ptcstats).nomsg++; |
442 | else if (count > 1) | 442 | else if (count > 1) |
443 | __get_cpu_var(ptcstats).multmsg++; | 443 | __get_cpu_var(ptcstats).multmsg++; |
444 | 444 | ||
445 | time2 = get_cycles(); | 445 | time2 = get_cycles(); |
446 | __get_cpu_var(ptcstats).dflush += (time2 - time1); | 446 | __get_cpu_var(ptcstats).dflush += (time2 - time1); |
447 | 447 | ||
448 | irq_exit(); | 448 | irq_exit(); |
449 | set_irq_regs(old_regs); | 449 | set_irq_regs(old_regs); |
450 | } | 450 | } |
451 | 451 | ||
452 | /* | 452 | /* |
453 | * uv_enable_timeouts | 453 | * uv_enable_timeouts |
454 | * | 454 | * |
455 | * Each target blade (i.e. blades that have cpu's) needs to have | 455 | * Each target blade (i.e. blades that have cpu's) needs to have |
456 | * shootdown message timeouts enabled. The timeout does not cause | 456 | * shootdown message timeouts enabled. The timeout does not cause |
457 | * an interrupt, but causes an error message to be returned to | 457 | * an interrupt, but causes an error message to be returned to |
458 | * the sender. | 458 | * the sender. |
459 | */ | 459 | */ |
460 | static void uv_enable_timeouts(void) | 460 | static void uv_enable_timeouts(void) |
461 | { | 461 | { |
462 | int blade; | 462 | int blade; |
463 | int nblades; | 463 | int nblades; |
464 | int pnode; | 464 | int pnode; |
465 | unsigned long mmr_image; | 465 | unsigned long mmr_image; |
466 | 466 | ||
467 | nblades = uv_num_possible_blades(); | 467 | nblades = uv_num_possible_blades(); |
468 | 468 | ||
469 | for (blade = 0; blade < nblades; blade++) { | 469 | for (blade = 0; blade < nblades; blade++) { |
470 | if (!uv_blade_nr_possible_cpus(blade)) | 470 | if (!uv_blade_nr_possible_cpus(blade)) |
471 | continue; | 471 | continue; |
472 | 472 | ||
473 | pnode = uv_blade_to_pnode(blade); | 473 | pnode = uv_blade_to_pnode(blade); |
474 | mmr_image = | 474 | mmr_image = |
475 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); | 475 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); |
476 | /* | 476 | /* |
477 | * Set the timeout period and then lock it in, in three | 477 | * Set the timeout period and then lock it in, in three |
478 | * steps; captures and locks in the period. | 478 | * steps; captures and locks in the period. |
479 | * | 479 | * |
480 | * To program the period, the SOFT_ACK_MODE must be off. | 480 | * To program the period, the SOFT_ACK_MODE must be off. |
481 | */ | 481 | */ |
482 | mmr_image &= ~((unsigned long)1 << | 482 | mmr_image &= ~((unsigned long)1 << |
483 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); | 483 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); |
484 | uv_write_global_mmr64 | 484 | uv_write_global_mmr64 |
485 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 485 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
486 | /* | 486 | /* |
487 | * Set the 4-bit period. | 487 | * Set the 4-bit period. |
488 | */ | 488 | */ |
489 | mmr_image &= ~((unsigned long)0xf << | 489 | mmr_image &= ~((unsigned long)0xf << |
490 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); | 490 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); |
491 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << | 491 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << |
492 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); | 492 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); |
493 | uv_write_global_mmr64 | 493 | uv_write_global_mmr64 |
494 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 494 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
495 | /* | 495 | /* |
496 | * Subsequent reversals of the timebase bit (3) cause an | 496 | * Subsequent reversals of the timebase bit (3) cause an |
497 | * immediate timeout of one or all INTD resources as | 497 | * immediate timeout of one or all INTD resources as |
498 | * indicated in bits 2:0 (7 causes all of them to timeout). | 498 | * indicated in bits 2:0 (7 causes all of them to timeout). |
499 | */ | 499 | */ |
500 | mmr_image |= ((unsigned long)1 << | 500 | mmr_image |= ((unsigned long)1 << |
501 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); | 501 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); |
502 | uv_write_global_mmr64 | 502 | uv_write_global_mmr64 |
503 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 503 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
504 | } | 504 | } |
505 | } | 505 | } |
506 | 506 | ||
507 | static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) | 507 | static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) |
508 | { | 508 | { |
509 | if (*offset < num_possible_cpus()) | 509 | if (*offset < num_possible_cpus()) |
510 | return offset; | 510 | return offset; |
511 | return NULL; | 511 | return NULL; |
512 | } | 512 | } |
513 | 513 | ||
514 | static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) | 514 | static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) |
515 | { | 515 | { |
516 | (*offset)++; | 516 | (*offset)++; |
517 | if (*offset < num_possible_cpus()) | 517 | if (*offset < num_possible_cpus()) |
518 | return offset; | 518 | return offset; |
519 | return NULL; | 519 | return NULL; |
520 | } | 520 | } |
521 | 521 | ||
522 | static void uv_ptc_seq_stop(struct seq_file *file, void *data) | 522 | static void uv_ptc_seq_stop(struct seq_file *file, void *data) |
523 | { | 523 | { |
524 | } | 524 | } |
525 | 525 | ||
526 | /* | 526 | /* |
527 | * Display the statistics thru /proc | 527 | * Display the statistics thru /proc |
528 | * data points to the cpu number | 528 | * data points to the cpu number |
529 | */ | 529 | */ |
530 | static int uv_ptc_seq_show(struct seq_file *file, void *data) | 530 | static int uv_ptc_seq_show(struct seq_file *file, void *data) |
531 | { | 531 | { |
532 | struct ptc_stats *stat; | 532 | struct ptc_stats *stat; |
533 | int cpu; | 533 | int cpu; |
534 | 534 | ||
535 | cpu = *(loff_t *)data; | 535 | cpu = *(loff_t *)data; |
536 | 536 | ||
537 | if (!cpu) { | 537 | if (!cpu) { |
538 | seq_printf(file, | 538 | seq_printf(file, |
539 | "# cpu requestor requestee one all sretry dretry ptc_i "); | 539 | "# cpu requestor requestee one all sretry dretry ptc_i "); |
540 | seq_printf(file, | 540 | seq_printf(file, |
541 | "sw_ack sflush dflush sok dnomsg dmult starget\n"); | 541 | "sw_ack sflush dflush sok dnomsg dmult starget\n"); |
542 | } | 542 | } |
543 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | 543 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { |
544 | stat = &per_cpu(ptcstats, cpu); | 544 | stat = &per_cpu(ptcstats, cpu); |
545 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ", | 545 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ", |
546 | cpu, stat->requestor, | 546 | cpu, stat->requestor, |
547 | stat->requestee, stat->onetlb, stat->alltlb, | 547 | stat->requestee, stat->onetlb, stat->alltlb, |
548 | stat->s_retry, stat->d_retry, stat->ptc_i); | 548 | stat->s_retry, stat->d_retry, stat->ptc_i); |
549 | seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", | 549 | seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", |
550 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), | 550 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), |
551 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), | 551 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), |
552 | stat->sflush, stat->dflush, | 552 | stat->sflush, stat->dflush, |
553 | stat->retriesok, stat->nomsg, | 553 | stat->retriesok, stat->nomsg, |
554 | stat->multmsg, stat->ntargeted); | 554 | stat->multmsg, stat->ntargeted); |
555 | } | 555 | } |
556 | 556 | ||
557 | return 0; | 557 | return 0; |
558 | } | 558 | } |
559 | 559 | ||
560 | /* | 560 | /* |
561 | * 0: display meaning of the statistics | 561 | * 0: display meaning of the statistics |
562 | * >0: retry limit | 562 | * >0: retry limit |
563 | */ | 563 | */ |
564 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | 564 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, |
565 | size_t count, loff_t *data) | 565 | size_t count, loff_t *data) |
566 | { | 566 | { |
567 | long newmode; | 567 | long newmode; |
568 | char optstr[64]; | 568 | char optstr[64]; |
569 | 569 | ||
570 | if (count == 0 || count > sizeof(optstr)) | 570 | if (count == 0 || count > sizeof(optstr)) |
571 | return -EINVAL; | 571 | return -EINVAL; |
572 | if (copy_from_user(optstr, user, count)) | 572 | if (copy_from_user(optstr, user, count)) |
573 | return -EFAULT; | 573 | return -EFAULT; |
574 | optstr[count - 1] = '\0'; | 574 | optstr[count - 1] = '\0'; |
575 | if (strict_strtoul(optstr, 10, &newmode) < 0) { | 575 | if (strict_strtoul(optstr, 10, &newmode) < 0) { |
576 | printk(KERN_DEBUG "%s is invalid\n", optstr); | 576 | printk(KERN_DEBUG "%s is invalid\n", optstr); |
577 | return -EINVAL; | 577 | return -EINVAL; |
578 | } | 578 | } |
579 | 579 | ||
580 | if (newmode == 0) { | 580 | if (newmode == 0) { |
581 | printk(KERN_DEBUG "# cpu: cpu number\n"); | 581 | printk(KERN_DEBUG "# cpu: cpu number\n"); |
582 | printk(KERN_DEBUG | 582 | printk(KERN_DEBUG |
583 | "requestor: times this cpu was the flush requestor\n"); | 583 | "requestor: times this cpu was the flush requestor\n"); |
584 | printk(KERN_DEBUG | 584 | printk(KERN_DEBUG |
585 | "requestee: times this cpu was requested to flush its TLBs\n"); | 585 | "requestee: times this cpu was requested to flush its TLBs\n"); |
586 | printk(KERN_DEBUG | 586 | printk(KERN_DEBUG |
587 | "one: times requested to flush a single address\n"); | 587 | "one: times requested to flush a single address\n"); |
588 | printk(KERN_DEBUG | 588 | printk(KERN_DEBUG |
589 | "all: times requested to flush all TLB's\n"); | 589 | "all: times requested to flush all TLB's\n"); |
590 | printk(KERN_DEBUG | 590 | printk(KERN_DEBUG |
591 | "sretry: number of retries of source-side timeouts\n"); | 591 | "sretry: number of retries of source-side timeouts\n"); |
592 | printk(KERN_DEBUG | 592 | printk(KERN_DEBUG |
593 | "dretry: number of retries of destination-side timeouts\n"); | 593 | "dretry: number of retries of destination-side timeouts\n"); |
594 | printk(KERN_DEBUG | 594 | printk(KERN_DEBUG |
595 | "ptc_i: times UV fell through to IPI-style flushes\n"); | 595 | "ptc_i: times UV fell through to IPI-style flushes\n"); |
596 | printk(KERN_DEBUG | 596 | printk(KERN_DEBUG |
597 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); | 597 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); |
598 | printk(KERN_DEBUG | 598 | printk(KERN_DEBUG |
599 | "sflush_us: cycles spent in uv_flush_tlb_others()\n"); | 599 | "sflush_us: cycles spent in uv_flush_tlb_others()\n"); |
600 | printk(KERN_DEBUG | 600 | printk(KERN_DEBUG |
601 | "dflush_us: cycles spent in handling flush requests\n"); | 601 | "dflush_us: cycles spent in handling flush requests\n"); |
602 | printk(KERN_DEBUG "sok: successes on retry\n"); | 602 | printk(KERN_DEBUG "sok: successes on retry\n"); |
603 | printk(KERN_DEBUG "dnomsg: interrupts with no message\n"); | 603 | printk(KERN_DEBUG "dnomsg: interrupts with no message\n"); |
604 | printk(KERN_DEBUG | 604 | printk(KERN_DEBUG |
605 | "dmult: interrupts with multiple messages\n"); | 605 | "dmult: interrupts with multiple messages\n"); |
606 | printk(KERN_DEBUG "starget: nodes targeted\n"); | 606 | printk(KERN_DEBUG "starget: nodes targeted\n"); |
607 | } else { | 607 | } else { |
608 | uv_bau_retry_limit = newmode; | 608 | uv_bau_retry_limit = newmode; |
609 | printk(KERN_DEBUG "timeout retry limit:%d\n", | 609 | printk(KERN_DEBUG "timeout retry limit:%d\n", |
610 | uv_bau_retry_limit); | 610 | uv_bau_retry_limit); |
611 | } | 611 | } |
612 | 612 | ||
613 | return count; | 613 | return count; |
614 | } | 614 | } |
615 | 615 | ||
616 | static const struct seq_operations uv_ptc_seq_ops = { | 616 | static const struct seq_operations uv_ptc_seq_ops = { |
617 | .start = uv_ptc_seq_start, | 617 | .start = uv_ptc_seq_start, |
618 | .next = uv_ptc_seq_next, | 618 | .next = uv_ptc_seq_next, |
619 | .stop = uv_ptc_seq_stop, | 619 | .stop = uv_ptc_seq_stop, |
620 | .show = uv_ptc_seq_show | 620 | .show = uv_ptc_seq_show |
621 | }; | 621 | }; |
622 | 622 | ||
623 | static int uv_ptc_proc_open(struct inode *inode, struct file *file) | 623 | static int uv_ptc_proc_open(struct inode *inode, struct file *file) |
624 | { | 624 | { |
625 | return seq_open(file, &uv_ptc_seq_ops); | 625 | return seq_open(file, &uv_ptc_seq_ops); |
626 | } | 626 | } |
627 | 627 | ||
628 | static const struct file_operations proc_uv_ptc_operations = { | 628 | static const struct file_operations proc_uv_ptc_operations = { |
629 | .open = uv_ptc_proc_open, | 629 | .open = uv_ptc_proc_open, |
630 | .read = seq_read, | 630 | .read = seq_read, |
631 | .write = uv_ptc_proc_write, | 631 | .write = uv_ptc_proc_write, |
632 | .llseek = seq_lseek, | 632 | .llseek = seq_lseek, |
633 | .release = seq_release, | 633 | .release = seq_release, |
634 | }; | 634 | }; |
635 | 635 | ||
636 | static int __init uv_ptc_init(void) | 636 | static int __init uv_ptc_init(void) |
637 | { | 637 | { |
638 | struct proc_dir_entry *proc_uv_ptc; | 638 | struct proc_dir_entry *proc_uv_ptc; |
639 | 639 | ||
640 | if (!is_uv_system()) | 640 | if (!is_uv_system()) |
641 | return 0; | 641 | return 0; |
642 | 642 | ||
643 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); | 643 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); |
644 | if (!proc_uv_ptc) { | 644 | if (!proc_uv_ptc) { |
645 | printk(KERN_ERR "unable to create %s proc entry\n", | 645 | printk(KERN_ERR "unable to create %s proc entry\n", |
646 | UV_PTC_BASENAME); | 646 | UV_PTC_BASENAME); |
647 | return -EINVAL; | 647 | return -EINVAL; |
648 | } | 648 | } |
649 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; | 649 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; |
650 | return 0; | 650 | return 0; |
651 | } | 651 | } |
652 | 652 | ||
653 | /* | 653 | /* |
654 | * begin the initialization of the per-blade control structures | 654 | * begin the initialization of the per-blade control structures |
655 | */ | 655 | */ |
656 | static struct bau_control * __init uv_table_bases_init(int blade, int node) | 656 | static struct bau_control * __init uv_table_bases_init(int blade, int node) |
657 | { | 657 | { |
658 | int i; | 658 | int i; |
659 | struct bau_msg_status *msp; | 659 | struct bau_msg_status *msp; |
660 | struct bau_control *bau_tabp; | 660 | struct bau_control *bau_tabp; |
661 | 661 | ||
662 | bau_tabp = | 662 | bau_tabp = |
663 | kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node); | 663 | kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node); |
664 | BUG_ON(!bau_tabp); | 664 | BUG_ON(!bau_tabp); |
665 | 665 | ||
666 | bau_tabp->msg_statuses = | 666 | bau_tabp->msg_statuses = |
667 | kmalloc_node(sizeof(struct bau_msg_status) * | 667 | kmalloc_node(sizeof(struct bau_msg_status) * |
668 | DEST_Q_SIZE, GFP_KERNEL, node); | 668 | DEST_Q_SIZE, GFP_KERNEL, node); |
669 | BUG_ON(!bau_tabp->msg_statuses); | 669 | BUG_ON(!bau_tabp->msg_statuses); |
670 | 670 | ||
671 | for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++) | 671 | for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++) |
672 | bau_cpubits_clear(&msp->seen_by, (int) | 672 | bau_cpubits_clear(&msp->seen_by, (int) |
673 | uv_blade_nr_possible_cpus(blade)); | 673 | uv_blade_nr_possible_cpus(blade)); |
674 | 674 | ||
675 | uv_bau_table_bases[blade] = bau_tabp; | 675 | uv_bau_table_bases[blade] = bau_tabp; |
676 | 676 | ||
677 | return bau_tabp; | 677 | return bau_tabp; |
678 | } | 678 | } |
679 | 679 | ||
680 | /* | 680 | /* |
681 | * finish the initialization of the per-blade control structures | 681 | * finish the initialization of the per-blade control structures |
682 | */ | 682 | */ |
683 | static void __init | 683 | static void __init |
684 | uv_table_bases_finish(int blade, | 684 | uv_table_bases_finish(int blade, |
685 | struct bau_control *bau_tablesp, | 685 | struct bau_control *bau_tablesp, |
686 | struct bau_desc *adp) | 686 | struct bau_desc *adp) |
687 | { | 687 | { |
688 | struct bau_control *bcp; | 688 | struct bau_control *bcp; |
689 | int cpu; | 689 | int cpu; |
690 | 690 | ||
691 | for_each_present_cpu(cpu) { | 691 | for_each_present_cpu(cpu) { |
692 | if (blade != uv_cpu_to_blade_id(cpu)) | 692 | if (blade != uv_cpu_to_blade_id(cpu)) |
693 | continue; | 693 | continue; |
694 | 694 | ||
695 | bcp = (struct bau_control *)&per_cpu(bau_control, cpu); | 695 | bcp = (struct bau_control *)&per_cpu(bau_control, cpu); |
696 | bcp->bau_msg_head = bau_tablesp->va_queue_first; | 696 | bcp->bau_msg_head = bau_tablesp->va_queue_first; |
697 | bcp->va_queue_first = bau_tablesp->va_queue_first; | 697 | bcp->va_queue_first = bau_tablesp->va_queue_first; |
698 | bcp->va_queue_last = bau_tablesp->va_queue_last; | 698 | bcp->va_queue_last = bau_tablesp->va_queue_last; |
699 | bcp->msg_statuses = bau_tablesp->msg_statuses; | 699 | bcp->msg_statuses = bau_tablesp->msg_statuses; |
700 | bcp->descriptor_base = adp; | 700 | bcp->descriptor_base = adp; |
701 | } | 701 | } |
702 | } | 702 | } |
703 | 703 | ||
704 | /* | 704 | /* |
705 | * initialize the sending side's sending buffers | 705 | * initialize the sending side's sending buffers |
706 | */ | 706 | */ |
707 | static struct bau_desc * __init | 707 | static struct bau_desc * __init |
708 | uv_activation_descriptor_init(int node, int pnode) | 708 | uv_activation_descriptor_init(int node, int pnode) |
709 | { | 709 | { |
710 | int i; | 710 | int i; |
711 | unsigned long pa; | 711 | unsigned long pa; |
712 | unsigned long m; | 712 | unsigned long m; |
713 | unsigned long n; | 713 | unsigned long n; |
714 | unsigned long mmr_image; | 714 | unsigned long mmr_image; |
715 | struct bau_desc *adp; | 715 | struct bau_desc *adp; |
716 | struct bau_desc *ad2; | 716 | struct bau_desc *ad2; |
717 | 717 | ||
718 | adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node); | 718 | adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node); |
719 | BUG_ON(!adp); | 719 | BUG_ON(!adp); |
720 | 720 | ||
721 | pa = uv_gpa(adp); /* need the real nasid*/ | 721 | pa = uv_gpa(adp); /* need the real nasid*/ |
722 | n = pa >> uv_nshift; | 722 | n = pa >> uv_nshift; |
723 | m = pa & uv_mmask; | 723 | m = pa & uv_mmask; |
724 | 724 | ||
725 | mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); | 725 | mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); |
726 | if (mmr_image) { | 726 | if (mmr_image) { |
727 | uv_write_global_mmr64(pnode, (unsigned long) | 727 | uv_write_global_mmr64(pnode, (unsigned long) |
728 | UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 728 | UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
729 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | 729 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); |
730 | } | 730 | } |
731 | 731 | ||
732 | for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) { | 732 | for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) { |
733 | memset(ad2, 0, sizeof(struct bau_desc)); | 733 | memset(ad2, 0, sizeof(struct bau_desc)); |
734 | ad2->header.sw_ack_flag = 1; | 734 | ad2->header.sw_ack_flag = 1; |
735 | /* | 735 | /* |
736 | * base_dest_nodeid is the first node in the partition, so | 736 | * base_dest_nodeid is the first node in the partition, so |
737 | * the bit map will indicate partition-relative node numbers. | 737 | * the bit map will indicate partition-relative node numbers. |
738 | * note that base_dest_nodeid is actually a nasid. | 738 | * note that base_dest_nodeid is actually a nasid. |
739 | */ | 739 | */ |
740 | ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; | 740 | ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; |
741 | ad2->header.command = UV_NET_ENDPOINT_INTD; | 741 | ad2->header.command = UV_NET_ENDPOINT_INTD; |
742 | ad2->header.int_both = 1; | 742 | ad2->header.int_both = 1; |
743 | /* | 743 | /* |
744 | * all others need to be set to zero: | 744 | * all others need to be set to zero: |
745 | * fairness chaining multilevel count replied_to | 745 | * fairness chaining multilevel count replied_to |
746 | */ | 746 | */ |
747 | } | 747 | } |
748 | return adp; | 748 | return adp; |
749 | } | 749 | } |
750 | 750 | ||
751 | /* | 751 | /* |
752 | * initialize the destination side's receiving buffers | 752 | * initialize the destination side's receiving buffers |
753 | */ | 753 | */ |
754 | static struct bau_payload_queue_entry * __init | 754 | static struct bau_payload_queue_entry * __init |
755 | uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) | 755 | uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) |
756 | { | 756 | { |
757 | struct bau_payload_queue_entry *pqp; | 757 | struct bau_payload_queue_entry *pqp; |
758 | unsigned long pa; | 758 | unsigned long pa; |
759 | int pn; | 759 | int pn; |
760 | char *cp; | 760 | char *cp; |
761 | 761 | ||
762 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( | 762 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( |
763 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), | 763 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), |
764 | GFP_KERNEL, node); | 764 | GFP_KERNEL, node); |
765 | BUG_ON(!pqp); | 765 | BUG_ON(!pqp); |
766 | 766 | ||
767 | cp = (char *)pqp + 31; | 767 | cp = (char *)pqp + 31; |
768 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); | 768 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); |
769 | bau_tablesp->va_queue_first = pqp; | 769 | bau_tablesp->va_queue_first = pqp; |
770 | /* | 770 | /* |
771 | * need the pnode of where the memory was really allocated | 771 | * need the pnode of where the memory was really allocated |
772 | */ | 772 | */ |
773 | pa = uv_gpa(pqp); | 773 | pa = uv_gpa(pqp); |
774 | pn = pa >> uv_nshift; | 774 | pn = pa >> uv_nshift; |
775 | uv_write_global_mmr64(pnode, | 775 | uv_write_global_mmr64(pnode, |
776 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, | 776 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, |
777 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | | 777 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | |
778 | uv_physnodeaddr(pqp)); | 778 | uv_physnodeaddr(pqp)); |
779 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, | 779 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, |
780 | uv_physnodeaddr(pqp)); | 780 | uv_physnodeaddr(pqp)); |
781 | bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1); | 781 | bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1); |
782 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, | 782 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, |
783 | (unsigned long) | 783 | (unsigned long) |
784 | uv_physnodeaddr(bau_tablesp->va_queue_last)); | 784 | uv_physnodeaddr(bau_tablesp->va_queue_last)); |
785 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); | 785 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); |
786 | 786 | ||
787 | return pqp; | 787 | return pqp; |
788 | } | 788 | } |
789 | 789 | ||
790 | /* | 790 | /* |
791 | * Initialization of each UV blade's structures | 791 | * Initialization of each UV blade's structures |
792 | */ | 792 | */ |
793 | static int __init uv_init_blade(int blade) | 793 | static int __init uv_init_blade(int blade) |
794 | { | 794 | { |
795 | int node; | 795 | int node; |
796 | int pnode; | 796 | int pnode; |
797 | unsigned long pa; | 797 | unsigned long pa; |
798 | unsigned long apicid; | 798 | unsigned long apicid; |
799 | struct bau_desc *adp; | 799 | struct bau_desc *adp; |
800 | struct bau_payload_queue_entry *pqp; | 800 | struct bau_payload_queue_entry *pqp; |
801 | struct bau_control *bau_tablesp; | 801 | struct bau_control *bau_tablesp; |
802 | 802 | ||
803 | node = blade_to_first_node(blade); | 803 | node = blade_to_first_node(blade); |
804 | bau_tablesp = uv_table_bases_init(blade, node); | 804 | bau_tablesp = uv_table_bases_init(blade, node); |
805 | pnode = uv_blade_to_pnode(blade); | 805 | pnode = uv_blade_to_pnode(blade); |
806 | adp = uv_activation_descriptor_init(node, pnode); | 806 | adp = uv_activation_descriptor_init(node, pnode); |
807 | pqp = uv_payload_queue_init(node, pnode, bau_tablesp); | 807 | pqp = uv_payload_queue_init(node, pnode, bau_tablesp); |
808 | uv_table_bases_finish(blade, bau_tablesp, adp); | 808 | uv_table_bases_finish(blade, bau_tablesp, adp); |
809 | /* | 809 | /* |
810 | * the below initialization can't be in firmware because the | 810 | * the below initialization can't be in firmware because the |
811 | * messaging IRQ will be determined by the OS | 811 | * messaging IRQ will be determined by the OS |
812 | */ | 812 | */ |
813 | apicid = blade_to_first_apicid(blade); | 813 | apicid = blade_to_first_apicid(blade); |
814 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); | 814 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); |
815 | if ((pa & 0xff) != UV_BAU_MESSAGE) { | 815 | if ((pa & 0xff) != UV_BAU_MESSAGE) { |
816 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | 816 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
817 | ((apicid << 32) | UV_BAU_MESSAGE)); | 817 | ((apicid << 32) | UV_BAU_MESSAGE)); |
818 | } | 818 | } |
819 | return 0; | 819 | return 0; |
820 | } | 820 | } |
821 | 821 | ||
822 | /* | 822 | /* |
823 | * Initialization of BAU-related structures | 823 | * Initialization of BAU-related structures |
824 | */ | 824 | */ |
825 | static int __init uv_bau_init(void) | 825 | static int __init uv_bau_init(void) |
826 | { | 826 | { |
827 | int blade; | 827 | int blade; |
828 | int nblades; | 828 | int nblades; |
829 | int cur_cpu; | 829 | int cur_cpu; |
830 | 830 | ||
831 | if (!is_uv_system()) | 831 | if (!is_uv_system()) |
832 | return 0; | 832 | return 0; |
833 | 833 | ||
834 | for_each_possible_cpu(cur_cpu) | 834 | for_each_possible_cpu(cur_cpu) |
835 | alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), | 835 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), |
836 | GFP_KERNEL, cpu_to_node(cur_cpu)); | 836 | GFP_KERNEL, cpu_to_node(cur_cpu)); |
837 | 837 | ||
838 | uv_bau_retry_limit = 1; | 838 | uv_bau_retry_limit = 1; |
839 | uv_nshift = uv_hub_info->n_val; | 839 | uv_nshift = uv_hub_info->n_val; |
840 | uv_mmask = (1UL << uv_hub_info->n_val) - 1; | 840 | uv_mmask = (1UL << uv_hub_info->n_val) - 1; |
841 | nblades = uv_num_possible_blades(); | 841 | nblades = uv_num_possible_blades(); |
842 | 842 | ||
843 | uv_bau_table_bases = (struct bau_control **) | 843 | uv_bau_table_bases = (struct bau_control **) |
844 | kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); | 844 | kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); |
845 | BUG_ON(!uv_bau_table_bases); | 845 | BUG_ON(!uv_bau_table_bases); |
846 | 846 | ||
847 | uv_partition_base_pnode = 0x7fffffff; | 847 | uv_partition_base_pnode = 0x7fffffff; |
848 | for (blade = 0; blade < nblades; blade++) | 848 | for (blade = 0; blade < nblades; blade++) |
849 | if (uv_blade_nr_possible_cpus(blade) && | 849 | if (uv_blade_nr_possible_cpus(blade) && |
850 | (uv_blade_to_pnode(blade) < uv_partition_base_pnode)) | 850 | (uv_blade_to_pnode(blade) < uv_partition_base_pnode)) |
851 | uv_partition_base_pnode = uv_blade_to_pnode(blade); | 851 | uv_partition_base_pnode = uv_blade_to_pnode(blade); |
852 | for (blade = 0; blade < nblades; blade++) | 852 | for (blade = 0; blade < nblades; blade++) |
853 | if (uv_blade_nr_possible_cpus(blade)) | 853 | if (uv_blade_nr_possible_cpus(blade)) |
854 | uv_init_blade(blade); | 854 | uv_init_blade(blade); |
855 | 855 | ||
856 | alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); | 856 | alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); |
857 | uv_enable_timeouts(); | 857 | uv_enable_timeouts(); |
858 | 858 | ||
859 | return 0; | 859 | return 0; |
860 | } | 860 | } |
861 | __initcall(uv_bau_init); | 861 | __initcall(uv_bau_init); |
862 | __initcall(uv_ptc_init); | 862 | __initcall(uv_ptc_init); |
863 | 863 |
drivers/acpi/processor_core.c
1 | /* | 1 | /* |
2 | * acpi_processor.c - ACPI Processor Driver ($Revision: 71 $) | 2 | * acpi_processor.c - ACPI Processor Driver ($Revision: 71 $) |
3 | * | 3 | * |
4 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> | 4 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> |
5 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | 5 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> |
6 | * Copyright (C) 2004 Dominik Brodowski <linux@brodo.de> | 6 | * Copyright (C) 2004 Dominik Brodowski <linux@brodo.de> |
7 | * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | 7 | * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> |
8 | * - Added processor hotplug support | 8 | * - Added processor hotplug support |
9 | * | 9 | * |
10 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 10 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or modify | 12 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License as published by | 13 | * it under the terms of the GNU General Public License as published by |
14 | * the Free Software Foundation; either version 2 of the License, or (at | 14 | * the Free Software Foundation; either version 2 of the License, or (at |
15 | * your option) any later version. | 15 | * your option) any later version. |
16 | * | 16 | * |
17 | * This program is distributed in the hope that it will be useful, but | 17 | * This program is distributed in the hope that it will be useful, but |
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 18 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 | * General Public License for more details. | 20 | * General Public License for more details. |
21 | * | 21 | * |
22 | * You should have received a copy of the GNU General Public License along | 22 | * You should have received a copy of the GNU General Public License along |
23 | * with this program; if not, write to the Free Software Foundation, Inc., | 23 | * with this program; if not, write to the Free Software Foundation, Inc., |
24 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. | 24 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. |
25 | * | 25 | * |
26 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 26 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
27 | * TBD: | 27 | * TBD: |
28 | * 1. Make # power states dynamic. | 28 | * 1. Make # power states dynamic. |
29 | * 2. Support duty_cycle values that span bit 4. | 29 | * 2. Support duty_cycle values that span bit 4. |
30 | * 3. Optimize by having scheduler determine business instead of | 30 | * 3. Optimize by having scheduler determine business instead of |
31 | * having us try to calculate it here. | 31 | * having us try to calculate it here. |
32 | * 4. Need C1 timing -- must modify kernel (IRQ handler) to get this. | 32 | * 4. Need C1 timing -- must modify kernel (IRQ handler) to get this. |
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/kernel.h> | 35 | #include <linux/kernel.h> |
36 | #include <linux/module.h> | 36 | #include <linux/module.h> |
37 | #include <linux/init.h> | 37 | #include <linux/init.h> |
38 | #include <linux/types.h> | 38 | #include <linux/types.h> |
39 | #include <linux/pci.h> | 39 | #include <linux/pci.h> |
40 | #include <linux/pm.h> | 40 | #include <linux/pm.h> |
41 | #include <linux/cpufreq.h> | 41 | #include <linux/cpufreq.h> |
42 | #include <linux/cpu.h> | 42 | #include <linux/cpu.h> |
43 | #include <linux/proc_fs.h> | 43 | #include <linux/proc_fs.h> |
44 | #include <linux/seq_file.h> | 44 | #include <linux/seq_file.h> |
45 | #include <linux/dmi.h> | 45 | #include <linux/dmi.h> |
46 | #include <linux/moduleparam.h> | 46 | #include <linux/moduleparam.h> |
47 | #include <linux/cpuidle.h> | 47 | #include <linux/cpuidle.h> |
48 | 48 | ||
49 | #include <asm/io.h> | 49 | #include <asm/io.h> |
50 | #include <asm/system.h> | 50 | #include <asm/system.h> |
51 | #include <asm/cpu.h> | 51 | #include <asm/cpu.h> |
52 | #include <asm/delay.h> | 52 | #include <asm/delay.h> |
53 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
54 | #include <asm/processor.h> | 54 | #include <asm/processor.h> |
55 | #include <asm/smp.h> | 55 | #include <asm/smp.h> |
56 | #include <asm/acpi.h> | 56 | #include <asm/acpi.h> |
57 | 57 | ||
58 | #include <acpi/acpi_bus.h> | 58 | #include <acpi/acpi_bus.h> |
59 | #include <acpi/acpi_drivers.h> | 59 | #include <acpi/acpi_drivers.h> |
60 | #include <acpi/processor.h> | 60 | #include <acpi/processor.h> |
61 | 61 | ||
62 | #define ACPI_PROCESSOR_CLASS "processor" | 62 | #define ACPI_PROCESSOR_CLASS "processor" |
63 | #define ACPI_PROCESSOR_DEVICE_NAME "Processor" | 63 | #define ACPI_PROCESSOR_DEVICE_NAME "Processor" |
64 | #define ACPI_PROCESSOR_FILE_INFO "info" | 64 | #define ACPI_PROCESSOR_FILE_INFO "info" |
65 | #define ACPI_PROCESSOR_FILE_THROTTLING "throttling" | 65 | #define ACPI_PROCESSOR_FILE_THROTTLING "throttling" |
66 | #define ACPI_PROCESSOR_FILE_LIMIT "limit" | 66 | #define ACPI_PROCESSOR_FILE_LIMIT "limit" |
67 | #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 | 67 | #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 |
68 | #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 | 68 | #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 |
69 | #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 | 69 | #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 |
70 | 70 | ||
71 | #define ACPI_PROCESSOR_LIMIT_USER 0 | 71 | #define ACPI_PROCESSOR_LIMIT_USER 0 |
72 | #define ACPI_PROCESSOR_LIMIT_THERMAL 1 | 72 | #define ACPI_PROCESSOR_LIMIT_THERMAL 1 |
73 | 73 | ||
74 | #define _COMPONENT ACPI_PROCESSOR_COMPONENT | 74 | #define _COMPONENT ACPI_PROCESSOR_COMPONENT |
75 | ACPI_MODULE_NAME("processor_core"); | 75 | ACPI_MODULE_NAME("processor_core"); |
76 | 76 | ||
77 | MODULE_AUTHOR("Paul Diefenbaugh"); | 77 | MODULE_AUTHOR("Paul Diefenbaugh"); |
78 | MODULE_DESCRIPTION("ACPI Processor Driver"); | 78 | MODULE_DESCRIPTION("ACPI Processor Driver"); |
79 | MODULE_LICENSE("GPL"); | 79 | MODULE_LICENSE("GPL"); |
80 | 80 | ||
81 | static int acpi_processor_add(struct acpi_device *device); | 81 | static int acpi_processor_add(struct acpi_device *device); |
82 | static int acpi_processor_start(struct acpi_device *device); | 82 | static int acpi_processor_start(struct acpi_device *device); |
83 | static int acpi_processor_remove(struct acpi_device *device, int type); | 83 | static int acpi_processor_remove(struct acpi_device *device, int type); |
84 | static int acpi_processor_info_open_fs(struct inode *inode, struct file *file); | 84 | static int acpi_processor_info_open_fs(struct inode *inode, struct file *file); |
85 | static void acpi_processor_notify(struct acpi_device *device, u32 event); | 85 | static void acpi_processor_notify(struct acpi_device *device, u32 event); |
86 | static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu); | 86 | static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu); |
87 | static int acpi_processor_handle_eject(struct acpi_processor *pr); | 87 | static int acpi_processor_handle_eject(struct acpi_processor *pr); |
88 | 88 | ||
89 | 89 | ||
90 | static const struct acpi_device_id processor_device_ids[] = { | 90 | static const struct acpi_device_id processor_device_ids[] = { |
91 | {ACPI_PROCESSOR_OBJECT_HID, 0}, | 91 | {ACPI_PROCESSOR_OBJECT_HID, 0}, |
92 | {ACPI_PROCESSOR_HID, 0}, | 92 | {ACPI_PROCESSOR_HID, 0}, |
93 | {"", 0}, | 93 | {"", 0}, |
94 | }; | 94 | }; |
95 | MODULE_DEVICE_TABLE(acpi, processor_device_ids); | 95 | MODULE_DEVICE_TABLE(acpi, processor_device_ids); |
96 | 96 | ||
97 | static struct acpi_driver acpi_processor_driver = { | 97 | static struct acpi_driver acpi_processor_driver = { |
98 | .name = "processor", | 98 | .name = "processor", |
99 | .class = ACPI_PROCESSOR_CLASS, | 99 | .class = ACPI_PROCESSOR_CLASS, |
100 | .ids = processor_device_ids, | 100 | .ids = processor_device_ids, |
101 | .ops = { | 101 | .ops = { |
102 | .add = acpi_processor_add, | 102 | .add = acpi_processor_add, |
103 | .remove = acpi_processor_remove, | 103 | .remove = acpi_processor_remove, |
104 | .start = acpi_processor_start, | 104 | .start = acpi_processor_start, |
105 | .suspend = acpi_processor_suspend, | 105 | .suspend = acpi_processor_suspend, |
106 | .resume = acpi_processor_resume, | 106 | .resume = acpi_processor_resume, |
107 | .notify = acpi_processor_notify, | 107 | .notify = acpi_processor_notify, |
108 | }, | 108 | }, |
109 | }; | 109 | }; |
110 | 110 | ||
111 | #define INSTALL_NOTIFY_HANDLER 1 | 111 | #define INSTALL_NOTIFY_HANDLER 1 |
112 | #define UNINSTALL_NOTIFY_HANDLER 2 | 112 | #define UNINSTALL_NOTIFY_HANDLER 2 |
113 | 113 | ||
114 | static const struct file_operations acpi_processor_info_fops = { | 114 | static const struct file_operations acpi_processor_info_fops = { |
115 | .owner = THIS_MODULE, | 115 | .owner = THIS_MODULE, |
116 | .open = acpi_processor_info_open_fs, | 116 | .open = acpi_processor_info_open_fs, |
117 | .read = seq_read, | 117 | .read = seq_read, |
118 | .llseek = seq_lseek, | 118 | .llseek = seq_lseek, |
119 | .release = single_release, | 119 | .release = single_release, |
120 | }; | 120 | }; |
121 | 121 | ||
122 | DEFINE_PER_CPU(struct acpi_processor *, processors); | 122 | DEFINE_PER_CPU(struct acpi_processor *, processors); |
123 | struct acpi_processor_errata errata __read_mostly; | 123 | struct acpi_processor_errata errata __read_mostly; |
124 | static int set_no_mwait(const struct dmi_system_id *id) | 124 | static int set_no_mwait(const struct dmi_system_id *id) |
125 | { | 125 | { |
126 | printk(KERN_NOTICE PREFIX "%s detected - " | 126 | printk(KERN_NOTICE PREFIX "%s detected - " |
127 | "disabling mwait for CPU C-states\n", id->ident); | 127 | "disabling mwait for CPU C-states\n", id->ident); |
128 | idle_nomwait = 1; | 128 | idle_nomwait = 1; |
129 | return 0; | 129 | return 0; |
130 | } | 130 | } |
131 | 131 | ||
132 | static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { | 132 | static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { |
133 | { | 133 | { |
134 | set_no_mwait, "IFL91 board", { | 134 | set_no_mwait, "IFL91 board", { |
135 | DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"), | 135 | DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"), |
136 | DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"), | 136 | DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"), |
137 | DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"), | 137 | DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"), |
138 | DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL}, | 138 | DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL}, |
139 | { | 139 | { |
140 | set_no_mwait, "Extensa 5220", { | 140 | set_no_mwait, "Extensa 5220", { |
141 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | 141 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), |
142 | DMI_MATCH(DMI_SYS_VENDOR, "Acer"), | 142 | DMI_MATCH(DMI_SYS_VENDOR, "Acer"), |
143 | DMI_MATCH(DMI_PRODUCT_VERSION, "0100"), | 143 | DMI_MATCH(DMI_PRODUCT_VERSION, "0100"), |
144 | DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL}, | 144 | DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL}, |
145 | {}, | 145 | {}, |
146 | }; | 146 | }; |
147 | 147 | ||
148 | /* -------------------------------------------------------------------------- | 148 | /* -------------------------------------------------------------------------- |
149 | Errata Handling | 149 | Errata Handling |
150 | -------------------------------------------------------------------------- */ | 150 | -------------------------------------------------------------------------- */ |
151 | 151 | ||
152 | static int acpi_processor_errata_piix4(struct pci_dev *dev) | 152 | static int acpi_processor_errata_piix4(struct pci_dev *dev) |
153 | { | 153 | { |
154 | u8 value1 = 0; | 154 | u8 value1 = 0; |
155 | u8 value2 = 0; | 155 | u8 value2 = 0; |
156 | 156 | ||
157 | 157 | ||
158 | if (!dev) | 158 | if (!dev) |
159 | return -EINVAL; | 159 | return -EINVAL; |
160 | 160 | ||
161 | /* | 161 | /* |
162 | * Note that 'dev' references the PIIX4 ACPI Controller. | 162 | * Note that 'dev' references the PIIX4 ACPI Controller. |
163 | */ | 163 | */ |
164 | 164 | ||
165 | switch (dev->revision) { | 165 | switch (dev->revision) { |
166 | case 0: | 166 | case 0: |
167 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 A-step\n")); | 167 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 A-step\n")); |
168 | break; | 168 | break; |
169 | case 1: | 169 | case 1: |
170 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 B-step\n")); | 170 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 B-step\n")); |
171 | break; | 171 | break; |
172 | case 2: | 172 | case 2: |
173 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4E\n")); | 173 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4E\n")); |
174 | break; | 174 | break; |
175 | case 3: | 175 | case 3: |
176 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4M\n")); | 176 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4M\n")); |
177 | break; | 177 | break; |
178 | default: | 178 | default: |
179 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found unknown PIIX4\n")); | 179 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found unknown PIIX4\n")); |
180 | break; | 180 | break; |
181 | } | 181 | } |
182 | 182 | ||
183 | switch (dev->revision) { | 183 | switch (dev->revision) { |
184 | 184 | ||
185 | case 0: /* PIIX4 A-step */ | 185 | case 0: /* PIIX4 A-step */ |
186 | case 1: /* PIIX4 B-step */ | 186 | case 1: /* PIIX4 B-step */ |
187 | /* | 187 | /* |
188 | * See specification changes #13 ("Manual Throttle Duty Cycle") | 188 | * See specification changes #13 ("Manual Throttle Duty Cycle") |
189 | * and #14 ("Enabling and Disabling Manual Throttle"), plus | 189 | * and #14 ("Enabling and Disabling Manual Throttle"), plus |
190 | * erratum #5 ("STPCLK# Deassertion Time") from the January | 190 | * erratum #5 ("STPCLK# Deassertion Time") from the January |
191 | * 2002 PIIX4 specification update. Applies to only older | 191 | * 2002 PIIX4 specification update. Applies to only older |
192 | * PIIX4 models. | 192 | * PIIX4 models. |
193 | */ | 193 | */ |
194 | errata.piix4.throttle = 1; | 194 | errata.piix4.throttle = 1; |
195 | 195 | ||
196 | case 2: /* PIIX4E */ | 196 | case 2: /* PIIX4E */ |
197 | case 3: /* PIIX4M */ | 197 | case 3: /* PIIX4M */ |
198 | /* | 198 | /* |
199 | * See erratum #18 ("C3 Power State/BMIDE and Type-F DMA | 199 | * See erratum #18 ("C3 Power State/BMIDE and Type-F DMA |
200 | * Livelock") from the January 2002 PIIX4 specification update. | 200 | * Livelock") from the January 2002 PIIX4 specification update. |
201 | * Applies to all PIIX4 models. | 201 | * Applies to all PIIX4 models. |
202 | */ | 202 | */ |
203 | 203 | ||
204 | /* | 204 | /* |
205 | * BM-IDE | 205 | * BM-IDE |
206 | * ------ | 206 | * ------ |
207 | * Find the PIIX4 IDE Controller and get the Bus Master IDE | 207 | * Find the PIIX4 IDE Controller and get the Bus Master IDE |
208 | * Status register address. We'll use this later to read | 208 | * Status register address. We'll use this later to read |
209 | * each IDE controller's DMA status to make sure we catch all | 209 | * each IDE controller's DMA status to make sure we catch all |
210 | * DMA activity. | 210 | * DMA activity. |
211 | */ | 211 | */ |
212 | dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | 212 | dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, |
213 | PCI_DEVICE_ID_INTEL_82371AB, | 213 | PCI_DEVICE_ID_INTEL_82371AB, |
214 | PCI_ANY_ID, PCI_ANY_ID, NULL); | 214 | PCI_ANY_ID, PCI_ANY_ID, NULL); |
215 | if (dev) { | 215 | if (dev) { |
216 | errata.piix4.bmisx = pci_resource_start(dev, 4); | 216 | errata.piix4.bmisx = pci_resource_start(dev, 4); |
217 | pci_dev_put(dev); | 217 | pci_dev_put(dev); |
218 | } | 218 | } |
219 | 219 | ||
220 | /* | 220 | /* |
221 | * Type-F DMA | 221 | * Type-F DMA |
222 | * ---------- | 222 | * ---------- |
223 | * Find the PIIX4 ISA Controller and read the Motherboard | 223 | * Find the PIIX4 ISA Controller and read the Motherboard |
224 | * DMA controller's status to see if Type-F (Fast) DMA mode | 224 | * DMA controller's status to see if Type-F (Fast) DMA mode |
225 | * is enabled (bit 7) on either channel. Note that we'll | 225 | * is enabled (bit 7) on either channel. Note that we'll |
226 | * disable C3 support if this is enabled, as some legacy | 226 | * disable C3 support if this is enabled, as some legacy |
227 | * devices won't operate well if fast DMA is disabled. | 227 | * devices won't operate well if fast DMA is disabled. |
228 | */ | 228 | */ |
229 | dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | 229 | dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, |
230 | PCI_DEVICE_ID_INTEL_82371AB_0, | 230 | PCI_DEVICE_ID_INTEL_82371AB_0, |
231 | PCI_ANY_ID, PCI_ANY_ID, NULL); | 231 | PCI_ANY_ID, PCI_ANY_ID, NULL); |
232 | if (dev) { | 232 | if (dev) { |
233 | pci_read_config_byte(dev, 0x76, &value1); | 233 | pci_read_config_byte(dev, 0x76, &value1); |
234 | pci_read_config_byte(dev, 0x77, &value2); | 234 | pci_read_config_byte(dev, 0x77, &value2); |
235 | if ((value1 & 0x80) || (value2 & 0x80)) | 235 | if ((value1 & 0x80) || (value2 & 0x80)) |
236 | errata.piix4.fdma = 1; | 236 | errata.piix4.fdma = 1; |
237 | pci_dev_put(dev); | 237 | pci_dev_put(dev); |
238 | } | 238 | } |
239 | 239 | ||
240 | break; | 240 | break; |
241 | } | 241 | } |
242 | 242 | ||
243 | if (errata.piix4.bmisx) | 243 | if (errata.piix4.bmisx) |
244 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 244 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
245 | "Bus master activity detection (BM-IDE) erratum enabled\n")); | 245 | "Bus master activity detection (BM-IDE) erratum enabled\n")); |
246 | if (errata.piix4.fdma) | 246 | if (errata.piix4.fdma) |
247 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 247 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
248 | "Type-F DMA livelock erratum (C3 disabled)\n")); | 248 | "Type-F DMA livelock erratum (C3 disabled)\n")); |
249 | 249 | ||
250 | return 0; | 250 | return 0; |
251 | } | 251 | } |
252 | 252 | ||
253 | static int acpi_processor_errata(struct acpi_processor *pr) | 253 | static int acpi_processor_errata(struct acpi_processor *pr) |
254 | { | 254 | { |
255 | int result = 0; | 255 | int result = 0; |
256 | struct pci_dev *dev = NULL; | 256 | struct pci_dev *dev = NULL; |
257 | 257 | ||
258 | 258 | ||
259 | if (!pr) | 259 | if (!pr) |
260 | return -EINVAL; | 260 | return -EINVAL; |
261 | 261 | ||
262 | /* | 262 | /* |
263 | * PIIX4 | 263 | * PIIX4 |
264 | */ | 264 | */ |
265 | dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | 265 | dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, |
266 | PCI_DEVICE_ID_INTEL_82371AB_3, PCI_ANY_ID, | 266 | PCI_DEVICE_ID_INTEL_82371AB_3, PCI_ANY_ID, |
267 | PCI_ANY_ID, NULL); | 267 | PCI_ANY_ID, NULL); |
268 | if (dev) { | 268 | if (dev) { |
269 | result = acpi_processor_errata_piix4(dev); | 269 | result = acpi_processor_errata_piix4(dev); |
270 | pci_dev_put(dev); | 270 | pci_dev_put(dev); |
271 | } | 271 | } |
272 | 272 | ||
273 | return result; | 273 | return result; |
274 | } | 274 | } |
275 | 275 | ||
276 | /* -------------------------------------------------------------------------- | 276 | /* -------------------------------------------------------------------------- |
277 | Common ACPI processor functions | 277 | Common ACPI processor functions |
278 | -------------------------------------------------------------------------- */ | 278 | -------------------------------------------------------------------------- */ |
279 | 279 | ||
280 | /* | 280 | /* |
281 | * _PDC is required for a BIOS-OS handshake for most of the newer | 281 | * _PDC is required for a BIOS-OS handshake for most of the newer |
282 | * ACPI processor features. | 282 | * ACPI processor features. |
283 | */ | 283 | */ |
284 | static int acpi_processor_set_pdc(struct acpi_processor *pr) | 284 | static int acpi_processor_set_pdc(struct acpi_processor *pr) |
285 | { | 285 | { |
286 | struct acpi_object_list *pdc_in = pr->pdc; | 286 | struct acpi_object_list *pdc_in = pr->pdc; |
287 | acpi_status status = AE_OK; | 287 | acpi_status status = AE_OK; |
288 | 288 | ||
289 | 289 | ||
290 | if (!pdc_in) | 290 | if (!pdc_in) |
291 | return status; | 291 | return status; |
292 | if (idle_nomwait) { | 292 | if (idle_nomwait) { |
293 | /* | 293 | /* |
294 | * If mwait is disabled for CPU C-states, the C2C3_FFH access | 294 | * If mwait is disabled for CPU C-states, the C2C3_FFH access |
295 | * mode will be disabled in the parameter of _PDC object. | 295 | * mode will be disabled in the parameter of _PDC object. |
296 | * Of course C1_FFH access mode will also be disabled. | 296 | * Of course C1_FFH access mode will also be disabled. |
297 | */ | 297 | */ |
298 | union acpi_object *obj; | 298 | union acpi_object *obj; |
299 | u32 *buffer = NULL; | 299 | u32 *buffer = NULL; |
300 | 300 | ||
301 | obj = pdc_in->pointer; | 301 | obj = pdc_in->pointer; |
302 | buffer = (u32 *)(obj->buffer.pointer); | 302 | buffer = (u32 *)(obj->buffer.pointer); |
303 | buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH); | 303 | buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH); |
304 | 304 | ||
305 | } | 305 | } |
306 | status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL); | 306 | status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL); |
307 | 307 | ||
308 | if (ACPI_FAILURE(status)) | 308 | if (ACPI_FAILURE(status)) |
309 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 309 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
310 | "Could not evaluate _PDC, using legacy perf. control...\n")); | 310 | "Could not evaluate _PDC, using legacy perf. control...\n")); |
311 | 311 | ||
312 | return status; | 312 | return status; |
313 | } | 313 | } |
314 | 314 | ||
315 | /* -------------------------------------------------------------------------- | 315 | /* -------------------------------------------------------------------------- |
316 | FS Interface (/proc) | 316 | FS Interface (/proc) |
317 | -------------------------------------------------------------------------- */ | 317 | -------------------------------------------------------------------------- */ |
318 | 318 | ||
319 | static struct proc_dir_entry *acpi_processor_dir = NULL; | 319 | static struct proc_dir_entry *acpi_processor_dir = NULL; |
320 | 320 | ||
321 | static int acpi_processor_info_seq_show(struct seq_file *seq, void *offset) | 321 | static int acpi_processor_info_seq_show(struct seq_file *seq, void *offset) |
322 | { | 322 | { |
323 | struct acpi_processor *pr = seq->private; | 323 | struct acpi_processor *pr = seq->private; |
324 | 324 | ||
325 | 325 | ||
326 | if (!pr) | 326 | if (!pr) |
327 | goto end; | 327 | goto end; |
328 | 328 | ||
329 | seq_printf(seq, "processor id: %d\n" | 329 | seq_printf(seq, "processor id: %d\n" |
330 | "acpi id: %d\n" | 330 | "acpi id: %d\n" |
331 | "bus mastering control: %s\n" | 331 | "bus mastering control: %s\n" |
332 | "power management: %s\n" | 332 | "power management: %s\n" |
333 | "throttling control: %s\n" | 333 | "throttling control: %s\n" |
334 | "limit interface: %s\n", | 334 | "limit interface: %s\n", |
335 | pr->id, | 335 | pr->id, |
336 | pr->acpi_id, | 336 | pr->acpi_id, |
337 | pr->flags.bm_control ? "yes" : "no", | 337 | pr->flags.bm_control ? "yes" : "no", |
338 | pr->flags.power ? "yes" : "no", | 338 | pr->flags.power ? "yes" : "no", |
339 | pr->flags.throttling ? "yes" : "no", | 339 | pr->flags.throttling ? "yes" : "no", |
340 | pr->flags.limit ? "yes" : "no"); | 340 | pr->flags.limit ? "yes" : "no"); |
341 | 341 | ||
342 | end: | 342 | end: |
343 | return 0; | 343 | return 0; |
344 | } | 344 | } |
345 | 345 | ||
346 | static int acpi_processor_info_open_fs(struct inode *inode, struct file *file) | 346 | static int acpi_processor_info_open_fs(struct inode *inode, struct file *file) |
347 | { | 347 | { |
348 | return single_open(file, acpi_processor_info_seq_show, | 348 | return single_open(file, acpi_processor_info_seq_show, |
349 | PDE(inode)->data); | 349 | PDE(inode)->data); |
350 | } | 350 | } |
351 | 351 | ||
352 | static int acpi_processor_add_fs(struct acpi_device *device) | 352 | static int acpi_processor_add_fs(struct acpi_device *device) |
353 | { | 353 | { |
354 | struct proc_dir_entry *entry = NULL; | 354 | struct proc_dir_entry *entry = NULL; |
355 | 355 | ||
356 | 356 | ||
357 | if (!acpi_device_dir(device)) { | 357 | if (!acpi_device_dir(device)) { |
358 | acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device), | 358 | acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device), |
359 | acpi_processor_dir); | 359 | acpi_processor_dir); |
360 | if (!acpi_device_dir(device)) | 360 | if (!acpi_device_dir(device)) |
361 | return -ENODEV; | 361 | return -ENODEV; |
362 | } | 362 | } |
363 | 363 | ||
364 | /* 'info' [R] */ | 364 | /* 'info' [R] */ |
365 | entry = proc_create_data(ACPI_PROCESSOR_FILE_INFO, | 365 | entry = proc_create_data(ACPI_PROCESSOR_FILE_INFO, |
366 | S_IRUGO, acpi_device_dir(device), | 366 | S_IRUGO, acpi_device_dir(device), |
367 | &acpi_processor_info_fops, | 367 | &acpi_processor_info_fops, |
368 | acpi_driver_data(device)); | 368 | acpi_driver_data(device)); |
369 | if (!entry) | 369 | if (!entry) |
370 | return -EIO; | 370 | return -EIO; |
371 | 371 | ||
372 | /* 'throttling' [R/W] */ | 372 | /* 'throttling' [R/W] */ |
373 | entry = proc_create_data(ACPI_PROCESSOR_FILE_THROTTLING, | 373 | entry = proc_create_data(ACPI_PROCESSOR_FILE_THROTTLING, |
374 | S_IFREG | S_IRUGO | S_IWUSR, | 374 | S_IFREG | S_IRUGO | S_IWUSR, |
375 | acpi_device_dir(device), | 375 | acpi_device_dir(device), |
376 | &acpi_processor_throttling_fops, | 376 | &acpi_processor_throttling_fops, |
377 | acpi_driver_data(device)); | 377 | acpi_driver_data(device)); |
378 | if (!entry) | 378 | if (!entry) |
379 | return -EIO; | 379 | return -EIO; |
380 | 380 | ||
381 | /* 'limit' [R/W] */ | 381 | /* 'limit' [R/W] */ |
382 | entry = proc_create_data(ACPI_PROCESSOR_FILE_LIMIT, | 382 | entry = proc_create_data(ACPI_PROCESSOR_FILE_LIMIT, |
383 | S_IFREG | S_IRUGO | S_IWUSR, | 383 | S_IFREG | S_IRUGO | S_IWUSR, |
384 | acpi_device_dir(device), | 384 | acpi_device_dir(device), |
385 | &acpi_processor_limit_fops, | 385 | &acpi_processor_limit_fops, |
386 | acpi_driver_data(device)); | 386 | acpi_driver_data(device)); |
387 | if (!entry) | 387 | if (!entry) |
388 | return -EIO; | 388 | return -EIO; |
389 | return 0; | 389 | return 0; |
390 | } | 390 | } |
391 | 391 | ||
392 | static int acpi_processor_remove_fs(struct acpi_device *device) | 392 | static int acpi_processor_remove_fs(struct acpi_device *device) |
393 | { | 393 | { |
394 | 394 | ||
395 | if (acpi_device_dir(device)) { | 395 | if (acpi_device_dir(device)) { |
396 | remove_proc_entry(ACPI_PROCESSOR_FILE_INFO, | 396 | remove_proc_entry(ACPI_PROCESSOR_FILE_INFO, |
397 | acpi_device_dir(device)); | 397 | acpi_device_dir(device)); |
398 | remove_proc_entry(ACPI_PROCESSOR_FILE_THROTTLING, | 398 | remove_proc_entry(ACPI_PROCESSOR_FILE_THROTTLING, |
399 | acpi_device_dir(device)); | 399 | acpi_device_dir(device)); |
400 | remove_proc_entry(ACPI_PROCESSOR_FILE_LIMIT, | 400 | remove_proc_entry(ACPI_PROCESSOR_FILE_LIMIT, |
401 | acpi_device_dir(device)); | 401 | acpi_device_dir(device)); |
402 | remove_proc_entry(acpi_device_bid(device), acpi_processor_dir); | 402 | remove_proc_entry(acpi_device_bid(device), acpi_processor_dir); |
403 | acpi_device_dir(device) = NULL; | 403 | acpi_device_dir(device) = NULL; |
404 | } | 404 | } |
405 | 405 | ||
406 | return 0; | 406 | return 0; |
407 | } | 407 | } |
408 | 408 | ||
409 | /* Use the acpiid in MADT to map cpus in case of SMP */ | 409 | /* Use the acpiid in MADT to map cpus in case of SMP */ |
410 | 410 | ||
411 | #ifndef CONFIG_SMP | 411 | #ifndef CONFIG_SMP |
412 | static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id) { return -1; } | 412 | static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id) { return -1; } |
413 | #else | 413 | #else |
414 | 414 | ||
415 | static struct acpi_table_madt *madt; | 415 | static struct acpi_table_madt *madt; |
416 | 416 | ||
417 | static int map_lapic_id(struct acpi_subtable_header *entry, | 417 | static int map_lapic_id(struct acpi_subtable_header *entry, |
418 | u32 acpi_id, int *apic_id) | 418 | u32 acpi_id, int *apic_id) |
419 | { | 419 | { |
420 | struct acpi_madt_local_apic *lapic = | 420 | struct acpi_madt_local_apic *lapic = |
421 | (struct acpi_madt_local_apic *)entry; | 421 | (struct acpi_madt_local_apic *)entry; |
422 | if ((lapic->lapic_flags & ACPI_MADT_ENABLED) && | 422 | if ((lapic->lapic_flags & ACPI_MADT_ENABLED) && |
423 | lapic->processor_id == acpi_id) { | 423 | lapic->processor_id == acpi_id) { |
424 | *apic_id = lapic->id; | 424 | *apic_id = lapic->id; |
425 | return 1; | 425 | return 1; |
426 | } | 426 | } |
427 | return 0; | 427 | return 0; |
428 | } | 428 | } |
429 | 429 | ||
430 | static int map_x2apic_id(struct acpi_subtable_header *entry, | 430 | static int map_x2apic_id(struct acpi_subtable_header *entry, |
431 | int device_declaration, u32 acpi_id, int *apic_id) | 431 | int device_declaration, u32 acpi_id, int *apic_id) |
432 | { | 432 | { |
433 | struct acpi_madt_local_x2apic *apic = | 433 | struct acpi_madt_local_x2apic *apic = |
434 | (struct acpi_madt_local_x2apic *)entry; | 434 | (struct acpi_madt_local_x2apic *)entry; |
435 | u32 tmp = apic->local_apic_id; | 435 | u32 tmp = apic->local_apic_id; |
436 | 436 | ||
437 | /* Only check enabled APICs*/ | 437 | /* Only check enabled APICs*/ |
438 | if (!(apic->lapic_flags & ACPI_MADT_ENABLED)) | 438 | if (!(apic->lapic_flags & ACPI_MADT_ENABLED)) |
439 | return 0; | 439 | return 0; |
440 | 440 | ||
441 | /* Device statement declaration type */ | 441 | /* Device statement declaration type */ |
442 | if (device_declaration) { | 442 | if (device_declaration) { |
443 | if (apic->uid == acpi_id) | 443 | if (apic->uid == acpi_id) |
444 | goto found; | 444 | goto found; |
445 | } | 445 | } |
446 | 446 | ||
447 | return 0; | 447 | return 0; |
448 | found: | 448 | found: |
449 | *apic_id = tmp; | 449 | *apic_id = tmp; |
450 | return 1; | 450 | return 1; |
451 | } | 451 | } |
452 | 452 | ||
453 | static int map_lsapic_id(struct acpi_subtable_header *entry, | 453 | static int map_lsapic_id(struct acpi_subtable_header *entry, |
454 | int device_declaration, u32 acpi_id, int *apic_id) | 454 | int device_declaration, u32 acpi_id, int *apic_id) |
455 | { | 455 | { |
456 | struct acpi_madt_local_sapic *lsapic = | 456 | struct acpi_madt_local_sapic *lsapic = |
457 | (struct acpi_madt_local_sapic *)entry; | 457 | (struct acpi_madt_local_sapic *)entry; |
458 | u32 tmp = (lsapic->id << 8) | lsapic->eid; | 458 | u32 tmp = (lsapic->id << 8) | lsapic->eid; |
459 | 459 | ||
460 | /* Only check enabled APICs*/ | 460 | /* Only check enabled APICs*/ |
461 | if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED)) | 461 | if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED)) |
462 | return 0; | 462 | return 0; |
463 | 463 | ||
464 | /* Device statement declaration type */ | 464 | /* Device statement declaration type */ |
465 | if (device_declaration) { | 465 | if (device_declaration) { |
466 | if (entry->length < 16) | 466 | if (entry->length < 16) |
467 | printk(KERN_ERR PREFIX | 467 | printk(KERN_ERR PREFIX |
468 | "Invalid LSAPIC with Device type processor (SAPIC ID %#x)\n", | 468 | "Invalid LSAPIC with Device type processor (SAPIC ID %#x)\n", |
469 | tmp); | 469 | tmp); |
470 | else if (lsapic->uid == acpi_id) | 470 | else if (lsapic->uid == acpi_id) |
471 | goto found; | 471 | goto found; |
472 | /* Processor statement declaration type */ | 472 | /* Processor statement declaration type */ |
473 | } else if (lsapic->processor_id == acpi_id) | 473 | } else if (lsapic->processor_id == acpi_id) |
474 | goto found; | 474 | goto found; |
475 | 475 | ||
476 | return 0; | 476 | return 0; |
477 | found: | 477 | found: |
478 | *apic_id = tmp; | 478 | *apic_id = tmp; |
479 | return 1; | 479 | return 1; |
480 | } | 480 | } |
481 | 481 | ||
482 | static int map_madt_entry(int type, u32 acpi_id) | 482 | static int map_madt_entry(int type, u32 acpi_id) |
483 | { | 483 | { |
484 | unsigned long madt_end, entry; | 484 | unsigned long madt_end, entry; |
485 | int apic_id = -1; | 485 | int apic_id = -1; |
486 | 486 | ||
487 | if (!madt) | 487 | if (!madt) |
488 | return apic_id; | 488 | return apic_id; |
489 | 489 | ||
490 | entry = (unsigned long)madt; | 490 | entry = (unsigned long)madt; |
491 | madt_end = entry + madt->header.length; | 491 | madt_end = entry + madt->header.length; |
492 | 492 | ||
493 | /* Parse all entries looking for a match. */ | 493 | /* Parse all entries looking for a match. */ |
494 | 494 | ||
495 | entry += sizeof(struct acpi_table_madt); | 495 | entry += sizeof(struct acpi_table_madt); |
496 | while (entry + sizeof(struct acpi_subtable_header) < madt_end) { | 496 | while (entry + sizeof(struct acpi_subtable_header) < madt_end) { |
497 | struct acpi_subtable_header *header = | 497 | struct acpi_subtable_header *header = |
498 | (struct acpi_subtable_header *)entry; | 498 | (struct acpi_subtable_header *)entry; |
499 | if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { | 499 | if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { |
500 | if (map_lapic_id(header, acpi_id, &apic_id)) | 500 | if (map_lapic_id(header, acpi_id, &apic_id)) |
501 | break; | 501 | break; |
502 | } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) { | 502 | } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) { |
503 | if (map_x2apic_id(header, type, acpi_id, &apic_id)) | 503 | if (map_x2apic_id(header, type, acpi_id, &apic_id)) |
504 | break; | 504 | break; |
505 | } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { | 505 | } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { |
506 | if (map_lsapic_id(header, type, acpi_id, &apic_id)) | 506 | if (map_lsapic_id(header, type, acpi_id, &apic_id)) |
507 | break; | 507 | break; |
508 | } | 508 | } |
509 | entry += header->length; | 509 | entry += header->length; |
510 | } | 510 | } |
511 | return apic_id; | 511 | return apic_id; |
512 | } | 512 | } |
513 | 513 | ||
514 | static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) | 514 | static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) |
515 | { | 515 | { |
516 | struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; | 516 | struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; |
517 | union acpi_object *obj; | 517 | union acpi_object *obj; |
518 | struct acpi_subtable_header *header; | 518 | struct acpi_subtable_header *header; |
519 | int apic_id = -1; | 519 | int apic_id = -1; |
520 | 520 | ||
521 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) | 521 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) |
522 | goto exit; | 522 | goto exit; |
523 | 523 | ||
524 | if (!buffer.length || !buffer.pointer) | 524 | if (!buffer.length || !buffer.pointer) |
525 | goto exit; | 525 | goto exit; |
526 | 526 | ||
527 | obj = buffer.pointer; | 527 | obj = buffer.pointer; |
528 | if (obj->type != ACPI_TYPE_BUFFER || | 528 | if (obj->type != ACPI_TYPE_BUFFER || |
529 | obj->buffer.length < sizeof(struct acpi_subtable_header)) { | 529 | obj->buffer.length < sizeof(struct acpi_subtable_header)) { |
530 | goto exit; | 530 | goto exit; |
531 | } | 531 | } |
532 | 532 | ||
533 | header = (struct acpi_subtable_header *)obj->buffer.pointer; | 533 | header = (struct acpi_subtable_header *)obj->buffer.pointer; |
534 | if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { | 534 | if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { |
535 | map_lapic_id(header, acpi_id, &apic_id); | 535 | map_lapic_id(header, acpi_id, &apic_id); |
536 | } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { | 536 | } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { |
537 | map_lsapic_id(header, type, acpi_id, &apic_id); | 537 | map_lsapic_id(header, type, acpi_id, &apic_id); |
538 | } | 538 | } |
539 | 539 | ||
540 | exit: | 540 | exit: |
541 | if (buffer.pointer) | 541 | if (buffer.pointer) |
542 | kfree(buffer.pointer); | 542 | kfree(buffer.pointer); |
543 | return apic_id; | 543 | return apic_id; |
544 | } | 544 | } |
545 | 545 | ||
546 | static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id) | 546 | static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id) |
547 | { | 547 | { |
548 | int i; | 548 | int i; |
549 | int apic_id = -1; | 549 | int apic_id = -1; |
550 | 550 | ||
551 | apic_id = map_mat_entry(handle, type, acpi_id); | 551 | apic_id = map_mat_entry(handle, type, acpi_id); |
552 | if (apic_id == -1) | 552 | if (apic_id == -1) |
553 | apic_id = map_madt_entry(type, acpi_id); | 553 | apic_id = map_madt_entry(type, acpi_id); |
554 | if (apic_id == -1) | 554 | if (apic_id == -1) |
555 | return apic_id; | 555 | return apic_id; |
556 | 556 | ||
557 | for_each_possible_cpu(i) { | 557 | for_each_possible_cpu(i) { |
558 | if (cpu_physical_id(i) == apic_id) | 558 | if (cpu_physical_id(i) == apic_id) |
559 | return i; | 559 | return i; |
560 | } | 560 | } |
561 | return -1; | 561 | return -1; |
562 | } | 562 | } |
563 | #endif | 563 | #endif |
564 | 564 | ||
565 | /* -------------------------------------------------------------------------- | 565 | /* -------------------------------------------------------------------------- |
566 | Driver Interface | 566 | Driver Interface |
567 | -------------------------------------------------------------------------- */ | 567 | -------------------------------------------------------------------------- */ |
568 | 568 | ||
569 | static int acpi_processor_get_info(struct acpi_device *device) | 569 | static int acpi_processor_get_info(struct acpi_device *device) |
570 | { | 570 | { |
571 | acpi_status status = 0; | 571 | acpi_status status = 0; |
572 | union acpi_object object = { 0 }; | 572 | union acpi_object object = { 0 }; |
573 | struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; | 573 | struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; |
574 | struct acpi_processor *pr; | 574 | struct acpi_processor *pr; |
575 | int cpu_index, device_declaration = 0; | 575 | int cpu_index, device_declaration = 0; |
576 | static int cpu0_initialized; | 576 | static int cpu0_initialized; |
577 | 577 | ||
578 | pr = acpi_driver_data(device); | 578 | pr = acpi_driver_data(device); |
579 | if (!pr) | 579 | if (!pr) |
580 | return -EINVAL; | 580 | return -EINVAL; |
581 | 581 | ||
582 | if (num_online_cpus() > 1) | 582 | if (num_online_cpus() > 1) |
583 | errata.smp = TRUE; | 583 | errata.smp = TRUE; |
584 | 584 | ||
585 | acpi_processor_errata(pr); | 585 | acpi_processor_errata(pr); |
586 | 586 | ||
587 | /* | 587 | /* |
588 | * Check to see if we have bus mastering arbitration control. This | 588 | * Check to see if we have bus mastering arbitration control. This |
589 | * is required for proper C3 usage (to maintain cache coherency). | 589 | * is required for proper C3 usage (to maintain cache coherency). |
590 | */ | 590 | */ |
591 | if (acpi_gbl_FADT.pm2_control_block && acpi_gbl_FADT.pm2_control_length) { | 591 | if (acpi_gbl_FADT.pm2_control_block && acpi_gbl_FADT.pm2_control_length) { |
592 | pr->flags.bm_control = 1; | 592 | pr->flags.bm_control = 1; |
593 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 593 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
594 | "Bus mastering arbitration control present\n")); | 594 | "Bus mastering arbitration control present\n")); |
595 | } else | 595 | } else |
596 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 596 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
597 | "No bus mastering arbitration control\n")); | 597 | "No bus mastering arbitration control\n")); |
598 | 598 | ||
599 | if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_HID)) { | 599 | if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_HID)) { |
600 | /* | 600 | /* |
601 | * Declared with "Device" statement; match _UID. | 601 | * Declared with "Device" statement; match _UID. |
602 | * Note that we don't handle string _UIDs yet. | 602 | * Note that we don't handle string _UIDs yet. |
603 | */ | 603 | */ |
604 | unsigned long long value; | 604 | unsigned long long value; |
605 | status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID, | 605 | status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID, |
606 | NULL, &value); | 606 | NULL, &value); |
607 | if (ACPI_FAILURE(status)) { | 607 | if (ACPI_FAILURE(status)) { |
608 | printk(KERN_ERR PREFIX | 608 | printk(KERN_ERR PREFIX |
609 | "Evaluating processor _UID [%#x]\n", status); | 609 | "Evaluating processor _UID [%#x]\n", status); |
610 | return -ENODEV; | 610 | return -ENODEV; |
611 | } | 611 | } |
612 | device_declaration = 1; | 612 | device_declaration = 1; |
613 | pr->acpi_id = value; | 613 | pr->acpi_id = value; |
614 | } else { | 614 | } else { |
615 | /* Declared with "Processor" statement; match ProcessorID */ | 615 | /* Declared with "Processor" statement; match ProcessorID */ |
616 | status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer); | 616 | status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer); |
617 | if (ACPI_FAILURE(status)) { | 617 | if (ACPI_FAILURE(status)) { |
618 | printk(KERN_ERR PREFIX "Evaluating processor object\n"); | 618 | printk(KERN_ERR PREFIX "Evaluating processor object\n"); |
619 | return -ENODEV; | 619 | return -ENODEV; |
620 | } | 620 | } |
621 | 621 | ||
622 | /* | 622 | /* |
623 | * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP. | 623 | * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP. |
624 | * >>> 'acpi_get_processor_id(acpi_id, &id)' in | 624 | * >>> 'acpi_get_processor_id(acpi_id, &id)' in |
625 | * arch/xxx/acpi.c | 625 | * arch/xxx/acpi.c |
626 | */ | 626 | */ |
627 | pr->acpi_id = object.processor.proc_id; | 627 | pr->acpi_id = object.processor.proc_id; |
628 | } | 628 | } |
629 | cpu_index = get_cpu_id(pr->handle, device_declaration, pr->acpi_id); | 629 | cpu_index = get_cpu_id(pr->handle, device_declaration, pr->acpi_id); |
630 | 630 | ||
631 | /* Handle UP system running SMP kernel, with no LAPIC in MADT */ | 631 | /* Handle UP system running SMP kernel, with no LAPIC in MADT */ |
632 | if (!cpu0_initialized && (cpu_index == -1) && | 632 | if (!cpu0_initialized && (cpu_index == -1) && |
633 | (num_online_cpus() == 1)) { | 633 | (num_online_cpus() == 1)) { |
634 | cpu_index = 0; | 634 | cpu_index = 0; |
635 | } | 635 | } |
636 | 636 | ||
637 | cpu0_initialized = 1; | 637 | cpu0_initialized = 1; |
638 | 638 | ||
639 | pr->id = cpu_index; | 639 | pr->id = cpu_index; |
640 | 640 | ||
641 | /* | 641 | /* |
642 | * Extra Processor objects may be enumerated on MP systems with | 642 | * Extra Processor objects may be enumerated on MP systems with |
643 | * less than the max # of CPUs. They should be ignored _iff | 643 | * less than the max # of CPUs. They should be ignored _iff |
644 | * they are physically not present. | 644 | * they are physically not present. |
645 | */ | 645 | */ |
646 | if (pr->id == -1) { | 646 | if (pr->id == -1) { |
647 | if (ACPI_FAILURE | 647 | if (ACPI_FAILURE |
648 | (acpi_processor_hotadd_init(pr->handle, &pr->id))) { | 648 | (acpi_processor_hotadd_init(pr->handle, &pr->id))) { |
649 | return -ENODEV; | 649 | return -ENODEV; |
650 | } | 650 | } |
651 | } | 651 | } |
652 | 652 | ||
653 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id, | 653 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id, |
654 | pr->acpi_id)); | 654 | pr->acpi_id)); |
655 | 655 | ||
656 | if (!object.processor.pblk_address) | 656 | if (!object.processor.pblk_address) |
657 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No PBLK (NULL address)\n")); | 657 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No PBLK (NULL address)\n")); |
658 | else if (object.processor.pblk_length != 6) | 658 | else if (object.processor.pblk_length != 6) |
659 | printk(KERN_ERR PREFIX "Invalid PBLK length [%d]\n", | 659 | printk(KERN_ERR PREFIX "Invalid PBLK length [%d]\n", |
660 | object.processor.pblk_length); | 660 | object.processor.pblk_length); |
661 | else { | 661 | else { |
662 | pr->throttling.address = object.processor.pblk_address; | 662 | pr->throttling.address = object.processor.pblk_address; |
663 | pr->throttling.duty_offset = acpi_gbl_FADT.duty_offset; | 663 | pr->throttling.duty_offset = acpi_gbl_FADT.duty_offset; |
664 | pr->throttling.duty_width = acpi_gbl_FADT.duty_width; | 664 | pr->throttling.duty_width = acpi_gbl_FADT.duty_width; |
665 | 665 | ||
666 | pr->pblk = object.processor.pblk_address; | 666 | pr->pblk = object.processor.pblk_address; |
667 | 667 | ||
668 | /* | 668 | /* |
669 | * We don't care about error returns - we just try to mark | 669 | * We don't care about error returns - we just try to mark |
670 | * these reserved so that nobody else is confused into thinking | 670 | * these reserved so that nobody else is confused into thinking |
671 | * that this region might be unused.. | 671 | * that this region might be unused.. |
672 | * | 672 | * |
673 | * (In particular, allocating the IO range for Cardbus) | 673 | * (In particular, allocating the IO range for Cardbus) |
674 | */ | 674 | */ |
675 | request_region(pr->throttling.address, 6, "ACPI CPU throttle"); | 675 | request_region(pr->throttling.address, 6, "ACPI CPU throttle"); |
676 | } | 676 | } |
677 | 677 | ||
678 | /* | 678 | /* |
679 | * If ACPI describes a slot number for this CPU, we can use it | 679 | * If ACPI describes a slot number for this CPU, we can use it |
680 | * ensure we get the right value in the "physical id" field | 680 | * ensure we get the right value in the "physical id" field |
681 | * of /proc/cpuinfo | 681 | * of /proc/cpuinfo |
682 | */ | 682 | */ |
683 | status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer); | 683 | status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer); |
684 | if (ACPI_SUCCESS(status)) | 684 | if (ACPI_SUCCESS(status)) |
685 | arch_fix_phys_package_id(pr->id, object.integer.value); | 685 | arch_fix_phys_package_id(pr->id, object.integer.value); |
686 | 686 | ||
687 | return 0; | 687 | return 0; |
688 | } | 688 | } |
689 | 689 | ||
690 | static DEFINE_PER_CPU(void *, processor_device_array); | 690 | static DEFINE_PER_CPU(void *, processor_device_array); |
691 | 691 | ||
692 | static int __cpuinit acpi_processor_start(struct acpi_device *device) | 692 | static int __cpuinit acpi_processor_start(struct acpi_device *device) |
693 | { | 693 | { |
694 | int result = 0; | 694 | int result = 0; |
695 | struct acpi_processor *pr; | 695 | struct acpi_processor *pr; |
696 | struct sys_device *sysdev; | 696 | struct sys_device *sysdev; |
697 | 697 | ||
698 | pr = acpi_driver_data(device); | 698 | pr = acpi_driver_data(device); |
699 | 699 | ||
700 | result = acpi_processor_get_info(device); | 700 | result = acpi_processor_get_info(device); |
701 | if (result) { | 701 | if (result) { |
702 | /* Processor is physically not present */ | 702 | /* Processor is physically not present */ |
703 | return 0; | 703 | return 0; |
704 | } | 704 | } |
705 | 705 | ||
706 | BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); | 706 | BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); |
707 | 707 | ||
708 | /* | 708 | /* |
709 | * Buggy BIOS check | 709 | * Buggy BIOS check |
710 | * ACPI id of processors can be reported wrongly by the BIOS. | 710 | * ACPI id of processors can be reported wrongly by the BIOS. |
711 | * Don't trust it blindly | 711 | * Don't trust it blindly |
712 | */ | 712 | */ |
713 | if (per_cpu(processor_device_array, pr->id) != NULL && | 713 | if (per_cpu(processor_device_array, pr->id) != NULL && |
714 | per_cpu(processor_device_array, pr->id) != device) { | 714 | per_cpu(processor_device_array, pr->id) != device) { |
715 | printk(KERN_WARNING "BIOS reported wrong ACPI id " | 715 | printk(KERN_WARNING "BIOS reported wrong ACPI id " |
716 | "for the processor\n"); | 716 | "for the processor\n"); |
717 | return -ENODEV; | 717 | return -ENODEV; |
718 | } | 718 | } |
719 | per_cpu(processor_device_array, pr->id) = device; | 719 | per_cpu(processor_device_array, pr->id) = device; |
720 | 720 | ||
721 | per_cpu(processors, pr->id) = pr; | 721 | per_cpu(processors, pr->id) = pr; |
722 | 722 | ||
723 | result = acpi_processor_add_fs(device); | 723 | result = acpi_processor_add_fs(device); |
724 | if (result) | 724 | if (result) |
725 | goto end; | 725 | goto end; |
726 | 726 | ||
727 | sysdev = get_cpu_sysdev(pr->id); | 727 | sysdev = get_cpu_sysdev(pr->id); |
728 | if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) | 728 | if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) |
729 | return -EFAULT; | 729 | return -EFAULT; |
730 | 730 | ||
731 | /* _PDC call should be done before doing anything else (if reqd.). */ | 731 | /* _PDC call should be done before doing anything else (if reqd.). */ |
732 | arch_acpi_processor_init_pdc(pr); | 732 | arch_acpi_processor_init_pdc(pr); |
733 | acpi_processor_set_pdc(pr); | 733 | acpi_processor_set_pdc(pr); |
734 | #ifdef CONFIG_CPU_FREQ | 734 | #ifdef CONFIG_CPU_FREQ |
735 | acpi_processor_ppc_has_changed(pr); | 735 | acpi_processor_ppc_has_changed(pr); |
736 | #endif | 736 | #endif |
737 | acpi_processor_get_throttling_info(pr); | 737 | acpi_processor_get_throttling_info(pr); |
738 | acpi_processor_get_limit_info(pr); | 738 | acpi_processor_get_limit_info(pr); |
739 | 739 | ||
740 | 740 | ||
741 | acpi_processor_power_init(pr, device); | 741 | acpi_processor_power_init(pr, device); |
742 | 742 | ||
743 | pr->cdev = thermal_cooling_device_register("Processor", device, | 743 | pr->cdev = thermal_cooling_device_register("Processor", device, |
744 | &processor_cooling_ops); | 744 | &processor_cooling_ops); |
745 | if (IS_ERR(pr->cdev)) { | 745 | if (IS_ERR(pr->cdev)) { |
746 | result = PTR_ERR(pr->cdev); | 746 | result = PTR_ERR(pr->cdev); |
747 | goto end; | 747 | goto end; |
748 | } | 748 | } |
749 | 749 | ||
750 | dev_info(&device->dev, "registered as cooling_device%d\n", | 750 | dev_info(&device->dev, "registered as cooling_device%d\n", |
751 | pr->cdev->id); | 751 | pr->cdev->id); |
752 | 752 | ||
753 | result = sysfs_create_link(&device->dev.kobj, | 753 | result = sysfs_create_link(&device->dev.kobj, |
754 | &pr->cdev->device.kobj, | 754 | &pr->cdev->device.kobj, |
755 | "thermal_cooling"); | 755 | "thermal_cooling"); |
756 | if (result) | 756 | if (result) |
757 | printk(KERN_ERR PREFIX "Create sysfs link\n"); | 757 | printk(KERN_ERR PREFIX "Create sysfs link\n"); |
758 | result = sysfs_create_link(&pr->cdev->device.kobj, | 758 | result = sysfs_create_link(&pr->cdev->device.kobj, |
759 | &device->dev.kobj, | 759 | &device->dev.kobj, |
760 | "device"); | 760 | "device"); |
761 | if (result) | 761 | if (result) |
762 | printk(KERN_ERR PREFIX "Create sysfs link\n"); | 762 | printk(KERN_ERR PREFIX "Create sysfs link\n"); |
763 | 763 | ||
764 | if (pr->flags.throttling) { | 764 | if (pr->flags.throttling) { |
765 | printk(KERN_INFO PREFIX "%s [%s] (supports", | 765 | printk(KERN_INFO PREFIX "%s [%s] (supports", |
766 | acpi_device_name(device), acpi_device_bid(device)); | 766 | acpi_device_name(device), acpi_device_bid(device)); |
767 | printk(" %d throttling states", pr->throttling.state_count); | 767 | printk(" %d throttling states", pr->throttling.state_count); |
768 | printk(")\n"); | 768 | printk(")\n"); |
769 | } | 769 | } |
770 | 770 | ||
771 | end: | 771 | end: |
772 | 772 | ||
773 | return result; | 773 | return result; |
774 | } | 774 | } |
775 | 775 | ||
776 | static void acpi_processor_notify(struct acpi_device *device, u32 event) | 776 | static void acpi_processor_notify(struct acpi_device *device, u32 event) |
777 | { | 777 | { |
778 | struct acpi_processor *pr = acpi_driver_data(device); | 778 | struct acpi_processor *pr = acpi_driver_data(device); |
779 | int saved; | 779 | int saved; |
780 | 780 | ||
781 | if (!pr) | 781 | if (!pr) |
782 | return; | 782 | return; |
783 | 783 | ||
784 | switch (event) { | 784 | switch (event) { |
785 | case ACPI_PROCESSOR_NOTIFY_PERFORMANCE: | 785 | case ACPI_PROCESSOR_NOTIFY_PERFORMANCE: |
786 | saved = pr->performance_platform_limit; | 786 | saved = pr->performance_platform_limit; |
787 | acpi_processor_ppc_has_changed(pr); | 787 | acpi_processor_ppc_has_changed(pr); |
788 | if (saved == pr->performance_platform_limit) | 788 | if (saved == pr->performance_platform_limit) |
789 | break; | 789 | break; |
790 | acpi_bus_generate_proc_event(device, event, | 790 | acpi_bus_generate_proc_event(device, event, |
791 | pr->performance_platform_limit); | 791 | pr->performance_platform_limit); |
792 | acpi_bus_generate_netlink_event(device->pnp.device_class, | 792 | acpi_bus_generate_netlink_event(device->pnp.device_class, |
793 | dev_name(&device->dev), event, | 793 | dev_name(&device->dev), event, |
794 | pr->performance_platform_limit); | 794 | pr->performance_platform_limit); |
795 | break; | 795 | break; |
796 | case ACPI_PROCESSOR_NOTIFY_POWER: | 796 | case ACPI_PROCESSOR_NOTIFY_POWER: |
797 | acpi_processor_cst_has_changed(pr); | 797 | acpi_processor_cst_has_changed(pr); |
798 | acpi_bus_generate_proc_event(device, event, 0); | 798 | acpi_bus_generate_proc_event(device, event, 0); |
799 | acpi_bus_generate_netlink_event(device->pnp.device_class, | 799 | acpi_bus_generate_netlink_event(device->pnp.device_class, |
800 | dev_name(&device->dev), event, 0); | 800 | dev_name(&device->dev), event, 0); |
801 | break; | 801 | break; |
802 | case ACPI_PROCESSOR_NOTIFY_THROTTLING: | 802 | case ACPI_PROCESSOR_NOTIFY_THROTTLING: |
803 | acpi_processor_tstate_has_changed(pr); | 803 | acpi_processor_tstate_has_changed(pr); |
804 | acpi_bus_generate_proc_event(device, event, 0); | 804 | acpi_bus_generate_proc_event(device, event, 0); |
805 | acpi_bus_generate_netlink_event(device->pnp.device_class, | 805 | acpi_bus_generate_netlink_event(device->pnp.device_class, |
806 | dev_name(&device->dev), event, 0); | 806 | dev_name(&device->dev), event, 0); |
807 | default: | 807 | default: |
808 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 808 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
809 | "Unsupported event [0x%x]\n", event)); | 809 | "Unsupported event [0x%x]\n", event)); |
810 | break; | 810 | break; |
811 | } | 811 | } |
812 | 812 | ||
813 | return; | 813 | return; |
814 | } | 814 | } |
815 | 815 | ||
816 | static int acpi_cpu_soft_notify(struct notifier_block *nfb, | 816 | static int acpi_cpu_soft_notify(struct notifier_block *nfb, |
817 | unsigned long action, void *hcpu) | 817 | unsigned long action, void *hcpu) |
818 | { | 818 | { |
819 | unsigned int cpu = (unsigned long)hcpu; | 819 | unsigned int cpu = (unsigned long)hcpu; |
820 | struct acpi_processor *pr = per_cpu(processors, cpu); | 820 | struct acpi_processor *pr = per_cpu(processors, cpu); |
821 | 821 | ||
822 | if (action == CPU_ONLINE && pr) { | 822 | if (action == CPU_ONLINE && pr) { |
823 | acpi_processor_ppc_has_changed(pr); | 823 | acpi_processor_ppc_has_changed(pr); |
824 | acpi_processor_cst_has_changed(pr); | 824 | acpi_processor_cst_has_changed(pr); |
825 | acpi_processor_tstate_has_changed(pr); | 825 | acpi_processor_tstate_has_changed(pr); |
826 | } | 826 | } |
827 | return NOTIFY_OK; | 827 | return NOTIFY_OK; |
828 | } | 828 | } |
829 | 829 | ||
830 | static struct notifier_block acpi_cpu_notifier = | 830 | static struct notifier_block acpi_cpu_notifier = |
831 | { | 831 | { |
832 | .notifier_call = acpi_cpu_soft_notify, | 832 | .notifier_call = acpi_cpu_soft_notify, |
833 | }; | 833 | }; |
834 | 834 | ||
835 | static int acpi_processor_add(struct acpi_device *device) | 835 | static int acpi_processor_add(struct acpi_device *device) |
836 | { | 836 | { |
837 | struct acpi_processor *pr = NULL; | 837 | struct acpi_processor *pr = NULL; |
838 | 838 | ||
839 | 839 | ||
840 | if (!device) | 840 | if (!device) |
841 | return -EINVAL; | 841 | return -EINVAL; |
842 | 842 | ||
843 | pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); | 843 | pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); |
844 | if (!pr) | 844 | if (!pr) |
845 | return -ENOMEM; | 845 | return -ENOMEM; |
846 | 846 | ||
847 | if (!alloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { | 847 | if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { |
848 | kfree(pr); | 848 | kfree(pr); |
849 | return -ENOMEM; | 849 | return -ENOMEM; |
850 | } | 850 | } |
851 | 851 | ||
852 | pr->handle = device->handle; | 852 | pr->handle = device->handle; |
853 | strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME); | 853 | strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME); |
854 | strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS); | 854 | strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS); |
855 | device->driver_data = pr; | 855 | device->driver_data = pr; |
856 | 856 | ||
857 | return 0; | 857 | return 0; |
858 | } | 858 | } |
859 | 859 | ||
860 | static int acpi_processor_remove(struct acpi_device *device, int type) | 860 | static int acpi_processor_remove(struct acpi_device *device, int type) |
861 | { | 861 | { |
862 | struct acpi_processor *pr = NULL; | 862 | struct acpi_processor *pr = NULL; |
863 | 863 | ||
864 | 864 | ||
865 | if (!device || !acpi_driver_data(device)) | 865 | if (!device || !acpi_driver_data(device)) |
866 | return -EINVAL; | 866 | return -EINVAL; |
867 | 867 | ||
868 | pr = acpi_driver_data(device); | 868 | pr = acpi_driver_data(device); |
869 | 869 | ||
870 | if (pr->id >= nr_cpu_ids) | 870 | if (pr->id >= nr_cpu_ids) |
871 | goto free; | 871 | goto free; |
872 | 872 | ||
873 | if (type == ACPI_BUS_REMOVAL_EJECT) { | 873 | if (type == ACPI_BUS_REMOVAL_EJECT) { |
874 | if (acpi_processor_handle_eject(pr)) | 874 | if (acpi_processor_handle_eject(pr)) |
875 | return -EINVAL; | 875 | return -EINVAL; |
876 | } | 876 | } |
877 | 877 | ||
878 | acpi_processor_power_exit(pr, device); | 878 | acpi_processor_power_exit(pr, device); |
879 | 879 | ||
880 | sysfs_remove_link(&device->dev.kobj, "sysdev"); | 880 | sysfs_remove_link(&device->dev.kobj, "sysdev"); |
881 | 881 | ||
882 | acpi_processor_remove_fs(device); | 882 | acpi_processor_remove_fs(device); |
883 | 883 | ||
884 | if (pr->cdev) { | 884 | if (pr->cdev) { |
885 | sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); | 885 | sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); |
886 | sysfs_remove_link(&pr->cdev->device.kobj, "device"); | 886 | sysfs_remove_link(&pr->cdev->device.kobj, "device"); |
887 | thermal_cooling_device_unregister(pr->cdev); | 887 | thermal_cooling_device_unregister(pr->cdev); |
888 | pr->cdev = NULL; | 888 | pr->cdev = NULL; |
889 | } | 889 | } |
890 | 890 | ||
891 | per_cpu(processors, pr->id) = NULL; | 891 | per_cpu(processors, pr->id) = NULL; |
892 | per_cpu(processor_device_array, pr->id) = NULL; | 892 | per_cpu(processor_device_array, pr->id) = NULL; |
893 | 893 | ||
894 | free: | 894 | free: |
895 | free_cpumask_var(pr->throttling.shared_cpu_map); | 895 | free_cpumask_var(pr->throttling.shared_cpu_map); |
896 | kfree(pr); | 896 | kfree(pr); |
897 | 897 | ||
898 | return 0; | 898 | return 0; |
899 | } | 899 | } |
900 | 900 | ||
901 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | 901 | #ifdef CONFIG_ACPI_HOTPLUG_CPU |
902 | /**************************************************************************** | 902 | /**************************************************************************** |
903 | * Acpi processor hotplug support * | 903 | * Acpi processor hotplug support * |
904 | ****************************************************************************/ | 904 | ****************************************************************************/ |
905 | 905 | ||
906 | static int is_processor_present(acpi_handle handle) | 906 | static int is_processor_present(acpi_handle handle) |
907 | { | 907 | { |
908 | acpi_status status; | 908 | acpi_status status; |
909 | unsigned long long sta = 0; | 909 | unsigned long long sta = 0; |
910 | 910 | ||
911 | 911 | ||
912 | status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); | 912 | status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); |
913 | 913 | ||
914 | if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT)) | 914 | if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT)) |
915 | return 1; | 915 | return 1; |
916 | 916 | ||
917 | /* | 917 | /* |
918 | * _STA is mandatory for a processor that supports hot plug | 918 | * _STA is mandatory for a processor that supports hot plug |
919 | */ | 919 | */ |
920 | if (status == AE_NOT_FOUND) | 920 | if (status == AE_NOT_FOUND) |
921 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 921 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
922 | "Processor does not support hot plug\n")); | 922 | "Processor does not support hot plug\n")); |
923 | else | 923 | else |
924 | ACPI_EXCEPTION((AE_INFO, status, | 924 | ACPI_EXCEPTION((AE_INFO, status, |
925 | "Processor Device is not present")); | 925 | "Processor Device is not present")); |
926 | return 0; | 926 | return 0; |
927 | } | 927 | } |
928 | 928 | ||
929 | static | 929 | static |
930 | int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device) | 930 | int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device) |
931 | { | 931 | { |
932 | acpi_handle phandle; | 932 | acpi_handle phandle; |
933 | struct acpi_device *pdev; | 933 | struct acpi_device *pdev; |
934 | struct acpi_processor *pr; | 934 | struct acpi_processor *pr; |
935 | 935 | ||
936 | 936 | ||
937 | if (acpi_get_parent(handle, &phandle)) { | 937 | if (acpi_get_parent(handle, &phandle)) { |
938 | return -ENODEV; | 938 | return -ENODEV; |
939 | } | 939 | } |
940 | 940 | ||
941 | if (acpi_bus_get_device(phandle, &pdev)) { | 941 | if (acpi_bus_get_device(phandle, &pdev)) { |
942 | return -ENODEV; | 942 | return -ENODEV; |
943 | } | 943 | } |
944 | 944 | ||
945 | if (acpi_bus_add(device, pdev, handle, ACPI_BUS_TYPE_PROCESSOR)) { | 945 | if (acpi_bus_add(device, pdev, handle, ACPI_BUS_TYPE_PROCESSOR)) { |
946 | return -ENODEV; | 946 | return -ENODEV; |
947 | } | 947 | } |
948 | 948 | ||
949 | acpi_bus_start(*device); | 949 | acpi_bus_start(*device); |
950 | 950 | ||
951 | pr = acpi_driver_data(*device); | 951 | pr = acpi_driver_data(*device); |
952 | if (!pr) | 952 | if (!pr) |
953 | return -ENODEV; | 953 | return -ENODEV; |
954 | 954 | ||
955 | if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) { | 955 | if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) { |
956 | kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE); | 956 | kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE); |
957 | } | 957 | } |
958 | return 0; | 958 | return 0; |
959 | } | 959 | } |
960 | 960 | ||
961 | static void __ref acpi_processor_hotplug_notify(acpi_handle handle, | 961 | static void __ref acpi_processor_hotplug_notify(acpi_handle handle, |
962 | u32 event, void *data) | 962 | u32 event, void *data) |
963 | { | 963 | { |
964 | struct acpi_processor *pr; | 964 | struct acpi_processor *pr; |
965 | struct acpi_device *device = NULL; | 965 | struct acpi_device *device = NULL; |
966 | int result; | 966 | int result; |
967 | 967 | ||
968 | 968 | ||
969 | switch (event) { | 969 | switch (event) { |
970 | case ACPI_NOTIFY_BUS_CHECK: | 970 | case ACPI_NOTIFY_BUS_CHECK: |
971 | case ACPI_NOTIFY_DEVICE_CHECK: | 971 | case ACPI_NOTIFY_DEVICE_CHECK: |
972 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 972 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
973 | "Processor driver received %s event\n", | 973 | "Processor driver received %s event\n", |
974 | (event == ACPI_NOTIFY_BUS_CHECK) ? | 974 | (event == ACPI_NOTIFY_BUS_CHECK) ? |
975 | "ACPI_NOTIFY_BUS_CHECK" : "ACPI_NOTIFY_DEVICE_CHECK")); | 975 | "ACPI_NOTIFY_BUS_CHECK" : "ACPI_NOTIFY_DEVICE_CHECK")); |
976 | 976 | ||
977 | if (!is_processor_present(handle)) | 977 | if (!is_processor_present(handle)) |
978 | break; | 978 | break; |
979 | 979 | ||
980 | if (acpi_bus_get_device(handle, &device)) { | 980 | if (acpi_bus_get_device(handle, &device)) { |
981 | result = acpi_processor_device_add(handle, &device); | 981 | result = acpi_processor_device_add(handle, &device); |
982 | if (result) | 982 | if (result) |
983 | printk(KERN_ERR PREFIX | 983 | printk(KERN_ERR PREFIX |
984 | "Unable to add the device\n"); | 984 | "Unable to add the device\n"); |
985 | break; | 985 | break; |
986 | } | 986 | } |
987 | 987 | ||
988 | pr = acpi_driver_data(device); | 988 | pr = acpi_driver_data(device); |
989 | if (!pr) { | 989 | if (!pr) { |
990 | printk(KERN_ERR PREFIX "Driver data is NULL\n"); | 990 | printk(KERN_ERR PREFIX "Driver data is NULL\n"); |
991 | break; | 991 | break; |
992 | } | 992 | } |
993 | 993 | ||
994 | if (pr->id >= 0 && (pr->id < nr_cpu_ids)) { | 994 | if (pr->id >= 0 && (pr->id < nr_cpu_ids)) { |
995 | kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); | 995 | kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); |
996 | break; | 996 | break; |
997 | } | 997 | } |
998 | 998 | ||
999 | result = acpi_processor_start(device); | 999 | result = acpi_processor_start(device); |
1000 | if ((!result) && ((pr->id >= 0) && (pr->id < nr_cpu_ids))) { | 1000 | if ((!result) && ((pr->id >= 0) && (pr->id < nr_cpu_ids))) { |
1001 | kobject_uevent(&device->dev.kobj, KOBJ_ONLINE); | 1001 | kobject_uevent(&device->dev.kobj, KOBJ_ONLINE); |
1002 | } else { | 1002 | } else { |
1003 | printk(KERN_ERR PREFIX "Device [%s] failed to start\n", | 1003 | printk(KERN_ERR PREFIX "Device [%s] failed to start\n", |
1004 | acpi_device_bid(device)); | 1004 | acpi_device_bid(device)); |
1005 | } | 1005 | } |
1006 | break; | 1006 | break; |
1007 | case ACPI_NOTIFY_EJECT_REQUEST: | 1007 | case ACPI_NOTIFY_EJECT_REQUEST: |
1008 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 1008 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
1009 | "received ACPI_NOTIFY_EJECT_REQUEST\n")); | 1009 | "received ACPI_NOTIFY_EJECT_REQUEST\n")); |
1010 | 1010 | ||
1011 | if (acpi_bus_get_device(handle, &device)) { | 1011 | if (acpi_bus_get_device(handle, &device)) { |
1012 | printk(KERN_ERR PREFIX | 1012 | printk(KERN_ERR PREFIX |
1013 | "Device don't exist, dropping EJECT\n"); | 1013 | "Device don't exist, dropping EJECT\n"); |
1014 | break; | 1014 | break; |
1015 | } | 1015 | } |
1016 | pr = acpi_driver_data(device); | 1016 | pr = acpi_driver_data(device); |
1017 | if (!pr) { | 1017 | if (!pr) { |
1018 | printk(KERN_ERR PREFIX | 1018 | printk(KERN_ERR PREFIX |
1019 | "Driver data is NULL, dropping EJECT\n"); | 1019 | "Driver data is NULL, dropping EJECT\n"); |
1020 | return; | 1020 | return; |
1021 | } | 1021 | } |
1022 | 1022 | ||
1023 | if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id))) | 1023 | if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id))) |
1024 | kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); | 1024 | kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE); |
1025 | break; | 1025 | break; |
1026 | default: | 1026 | default: |
1027 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 1027 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
1028 | "Unsupported event [0x%x]\n", event)); | 1028 | "Unsupported event [0x%x]\n", event)); |
1029 | break; | 1029 | break; |
1030 | } | 1030 | } |
1031 | 1031 | ||
1032 | return; | 1032 | return; |
1033 | } | 1033 | } |
1034 | 1034 | ||
1035 | static acpi_status | 1035 | static acpi_status |
1036 | processor_walk_namespace_cb(acpi_handle handle, | 1036 | processor_walk_namespace_cb(acpi_handle handle, |
1037 | u32 lvl, void *context, void **rv) | 1037 | u32 lvl, void *context, void **rv) |
1038 | { | 1038 | { |
1039 | acpi_status status; | 1039 | acpi_status status; |
1040 | int *action = context; | 1040 | int *action = context; |
1041 | acpi_object_type type = 0; | 1041 | acpi_object_type type = 0; |
1042 | 1042 | ||
1043 | status = acpi_get_type(handle, &type); | 1043 | status = acpi_get_type(handle, &type); |
1044 | if (ACPI_FAILURE(status)) | 1044 | if (ACPI_FAILURE(status)) |
1045 | return (AE_OK); | 1045 | return (AE_OK); |
1046 | 1046 | ||
1047 | if (type != ACPI_TYPE_PROCESSOR) | 1047 | if (type != ACPI_TYPE_PROCESSOR) |
1048 | return (AE_OK); | 1048 | return (AE_OK); |
1049 | 1049 | ||
1050 | switch (*action) { | 1050 | switch (*action) { |
1051 | case INSTALL_NOTIFY_HANDLER: | 1051 | case INSTALL_NOTIFY_HANDLER: |
1052 | acpi_install_notify_handler(handle, | 1052 | acpi_install_notify_handler(handle, |
1053 | ACPI_SYSTEM_NOTIFY, | 1053 | ACPI_SYSTEM_NOTIFY, |
1054 | acpi_processor_hotplug_notify, | 1054 | acpi_processor_hotplug_notify, |
1055 | NULL); | 1055 | NULL); |
1056 | break; | 1056 | break; |
1057 | case UNINSTALL_NOTIFY_HANDLER: | 1057 | case UNINSTALL_NOTIFY_HANDLER: |
1058 | acpi_remove_notify_handler(handle, | 1058 | acpi_remove_notify_handler(handle, |
1059 | ACPI_SYSTEM_NOTIFY, | 1059 | ACPI_SYSTEM_NOTIFY, |
1060 | acpi_processor_hotplug_notify); | 1060 | acpi_processor_hotplug_notify); |
1061 | break; | 1061 | break; |
1062 | default: | 1062 | default: |
1063 | break; | 1063 | break; |
1064 | } | 1064 | } |
1065 | 1065 | ||
1066 | return (AE_OK); | 1066 | return (AE_OK); |
1067 | } | 1067 | } |
1068 | 1068 | ||
1069 | static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) | 1069 | static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) |
1070 | { | 1070 | { |
1071 | 1071 | ||
1072 | if (!is_processor_present(handle)) { | 1072 | if (!is_processor_present(handle)) { |
1073 | return AE_ERROR; | 1073 | return AE_ERROR; |
1074 | } | 1074 | } |
1075 | 1075 | ||
1076 | if (acpi_map_lsapic(handle, p_cpu)) | 1076 | if (acpi_map_lsapic(handle, p_cpu)) |
1077 | return AE_ERROR; | 1077 | return AE_ERROR; |
1078 | 1078 | ||
1079 | if (arch_register_cpu(*p_cpu)) { | 1079 | if (arch_register_cpu(*p_cpu)) { |
1080 | acpi_unmap_lsapic(*p_cpu); | 1080 | acpi_unmap_lsapic(*p_cpu); |
1081 | return AE_ERROR; | 1081 | return AE_ERROR; |
1082 | } | 1082 | } |
1083 | 1083 | ||
1084 | return AE_OK; | 1084 | return AE_OK; |
1085 | } | 1085 | } |
1086 | 1086 | ||
1087 | static int acpi_processor_handle_eject(struct acpi_processor *pr) | 1087 | static int acpi_processor_handle_eject(struct acpi_processor *pr) |
1088 | { | 1088 | { |
1089 | if (cpu_online(pr->id)) | 1089 | if (cpu_online(pr->id)) |
1090 | cpu_down(pr->id); | 1090 | cpu_down(pr->id); |
1091 | 1091 | ||
1092 | arch_unregister_cpu(pr->id); | 1092 | arch_unregister_cpu(pr->id); |
1093 | acpi_unmap_lsapic(pr->id); | 1093 | acpi_unmap_lsapic(pr->id); |
1094 | return (0); | 1094 | return (0); |
1095 | } | 1095 | } |
1096 | #else | 1096 | #else |
1097 | static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) | 1097 | static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu) |
1098 | { | 1098 | { |
1099 | return AE_ERROR; | 1099 | return AE_ERROR; |
1100 | } | 1100 | } |
1101 | static int acpi_processor_handle_eject(struct acpi_processor *pr) | 1101 | static int acpi_processor_handle_eject(struct acpi_processor *pr) |
1102 | { | 1102 | { |
1103 | return (-EINVAL); | 1103 | return (-EINVAL); |
1104 | } | 1104 | } |
1105 | #endif | 1105 | #endif |
1106 | 1106 | ||
1107 | static | 1107 | static |
1108 | void acpi_processor_install_hotplug_notify(void) | 1108 | void acpi_processor_install_hotplug_notify(void) |
1109 | { | 1109 | { |
1110 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | 1110 | #ifdef CONFIG_ACPI_HOTPLUG_CPU |
1111 | int action = INSTALL_NOTIFY_HANDLER; | 1111 | int action = INSTALL_NOTIFY_HANDLER; |
1112 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, | 1112 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, |
1113 | ACPI_ROOT_OBJECT, | 1113 | ACPI_ROOT_OBJECT, |
1114 | ACPI_UINT32_MAX, | 1114 | ACPI_UINT32_MAX, |
1115 | processor_walk_namespace_cb, &action, NULL); | 1115 | processor_walk_namespace_cb, &action, NULL); |
1116 | #endif | 1116 | #endif |
1117 | register_hotcpu_notifier(&acpi_cpu_notifier); | 1117 | register_hotcpu_notifier(&acpi_cpu_notifier); |
1118 | } | 1118 | } |
1119 | 1119 | ||
1120 | static | 1120 | static |
1121 | void acpi_processor_uninstall_hotplug_notify(void) | 1121 | void acpi_processor_uninstall_hotplug_notify(void) |
1122 | { | 1122 | { |
1123 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | 1123 | #ifdef CONFIG_ACPI_HOTPLUG_CPU |
1124 | int action = UNINSTALL_NOTIFY_HANDLER; | 1124 | int action = UNINSTALL_NOTIFY_HANDLER; |
1125 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, | 1125 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, |
1126 | ACPI_ROOT_OBJECT, | 1126 | ACPI_ROOT_OBJECT, |
1127 | ACPI_UINT32_MAX, | 1127 | ACPI_UINT32_MAX, |
1128 | processor_walk_namespace_cb, &action, NULL); | 1128 | processor_walk_namespace_cb, &action, NULL); |
1129 | #endif | 1129 | #endif |
1130 | unregister_hotcpu_notifier(&acpi_cpu_notifier); | 1130 | unregister_hotcpu_notifier(&acpi_cpu_notifier); |
1131 | } | 1131 | } |
1132 | 1132 | ||
1133 | /* | 1133 | /* |
1134 | * We keep the driver loaded even when ACPI is not running. | 1134 | * We keep the driver loaded even when ACPI is not running. |
1135 | * This is needed for the powernow-k8 driver, that works even without | 1135 | * This is needed for the powernow-k8 driver, that works even without |
1136 | * ACPI, but needs symbols from this driver | 1136 | * ACPI, but needs symbols from this driver |
1137 | */ | 1137 | */ |
1138 | 1138 | ||
1139 | static int __init acpi_processor_init(void) | 1139 | static int __init acpi_processor_init(void) |
1140 | { | 1140 | { |
1141 | int result = 0; | 1141 | int result = 0; |
1142 | 1142 | ||
1143 | memset(&errata, 0, sizeof(errata)); | 1143 | memset(&errata, 0, sizeof(errata)); |
1144 | 1144 | ||
1145 | #ifdef CONFIG_SMP | 1145 | #ifdef CONFIG_SMP |
1146 | if (ACPI_FAILURE(acpi_get_table(ACPI_SIG_MADT, 0, | 1146 | if (ACPI_FAILURE(acpi_get_table(ACPI_SIG_MADT, 0, |
1147 | (struct acpi_table_header **)&madt))) | 1147 | (struct acpi_table_header **)&madt))) |
1148 | madt = NULL; | 1148 | madt = NULL; |
1149 | #endif | 1149 | #endif |
1150 | 1150 | ||
1151 | acpi_processor_dir = proc_mkdir(ACPI_PROCESSOR_CLASS, acpi_root_dir); | 1151 | acpi_processor_dir = proc_mkdir(ACPI_PROCESSOR_CLASS, acpi_root_dir); |
1152 | if (!acpi_processor_dir) | 1152 | if (!acpi_processor_dir) |
1153 | return -ENOMEM; | 1153 | return -ENOMEM; |
1154 | 1154 | ||
1155 | /* | 1155 | /* |
1156 | * Check whether the system is DMI table. If yes, OSPM | 1156 | * Check whether the system is DMI table. If yes, OSPM |
1157 | * should not use mwait for CPU-states. | 1157 | * should not use mwait for CPU-states. |
1158 | */ | 1158 | */ |
1159 | dmi_check_system(processor_idle_dmi_table); | 1159 | dmi_check_system(processor_idle_dmi_table); |
1160 | result = cpuidle_register_driver(&acpi_idle_driver); | 1160 | result = cpuidle_register_driver(&acpi_idle_driver); |
1161 | if (result < 0) | 1161 | if (result < 0) |
1162 | goto out_proc; | 1162 | goto out_proc; |
1163 | 1163 | ||
1164 | result = acpi_bus_register_driver(&acpi_processor_driver); | 1164 | result = acpi_bus_register_driver(&acpi_processor_driver); |
1165 | if (result < 0) | 1165 | if (result < 0) |
1166 | goto out_cpuidle; | 1166 | goto out_cpuidle; |
1167 | 1167 | ||
1168 | acpi_processor_install_hotplug_notify(); | 1168 | acpi_processor_install_hotplug_notify(); |
1169 | 1169 | ||
1170 | acpi_thermal_cpufreq_init(); | 1170 | acpi_thermal_cpufreq_init(); |
1171 | 1171 | ||
1172 | acpi_processor_ppc_init(); | 1172 | acpi_processor_ppc_init(); |
1173 | 1173 | ||
1174 | acpi_processor_throttling_init(); | 1174 | acpi_processor_throttling_init(); |
1175 | 1175 | ||
1176 | return 0; | 1176 | return 0; |
1177 | 1177 | ||
1178 | out_cpuidle: | 1178 | out_cpuidle: |
1179 | cpuidle_unregister_driver(&acpi_idle_driver); | 1179 | cpuidle_unregister_driver(&acpi_idle_driver); |
1180 | 1180 | ||
1181 | out_proc: | 1181 | out_proc: |
1182 | remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir); | 1182 | remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir); |
1183 | 1183 | ||
1184 | return result; | 1184 | return result; |
1185 | } | 1185 | } |
1186 | 1186 | ||
1187 | static void __exit acpi_processor_exit(void) | 1187 | static void __exit acpi_processor_exit(void) |
1188 | { | 1188 | { |
1189 | acpi_processor_ppc_exit(); | 1189 | acpi_processor_ppc_exit(); |
1190 | 1190 | ||
1191 | acpi_thermal_cpufreq_exit(); | 1191 | acpi_thermal_cpufreq_exit(); |
1192 | 1192 | ||
1193 | acpi_processor_uninstall_hotplug_notify(); | 1193 | acpi_processor_uninstall_hotplug_notify(); |
1194 | 1194 | ||
1195 | acpi_bus_unregister_driver(&acpi_processor_driver); | 1195 | acpi_bus_unregister_driver(&acpi_processor_driver); |
1196 | 1196 | ||
1197 | cpuidle_unregister_driver(&acpi_idle_driver); | 1197 | cpuidle_unregister_driver(&acpi_idle_driver); |
1198 | 1198 | ||
1199 | remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir); | 1199 | remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir); |
1200 | 1200 | ||
1201 | return; | 1201 | return; |
1202 | } | 1202 | } |
1203 | 1203 | ||
1204 | module_init(acpi_processor_init); | 1204 | module_init(acpi_processor_init); |
1205 | module_exit(acpi_processor_exit); | 1205 | module_exit(acpi_processor_exit); |
1206 | 1206 | ||
1207 | EXPORT_SYMBOL(acpi_processor_set_thermal_limit); | 1207 | EXPORT_SYMBOL(acpi_processor_set_thermal_limit); |
1208 | 1208 | ||
1209 | MODULE_ALIAS("processor"); | 1209 | MODULE_ALIAS("processor"); |
1210 | 1210 |
drivers/cpufreq/cpufreq.c
1 | /* | 1 | /* |
2 | * linux/drivers/cpufreq/cpufreq.c | 2 | * linux/drivers/cpufreq/cpufreq.c |
3 | * | 3 | * |
4 | * Copyright (C) 2001 Russell King | 4 | * Copyright (C) 2001 Russell King |
5 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | 5 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> |
6 | * | 6 | * |
7 | * Oct 2005 - Ashok Raj <ashok.raj@intel.com> | 7 | * Oct 2005 - Ashok Raj <ashok.raj@intel.com> |
8 | * Added handling for CPU hotplug | 8 | * Added handling for CPU hotplug |
9 | * Feb 2006 - Jacob Shin <jacob.shin@amd.com> | 9 | * Feb 2006 - Jacob Shin <jacob.shin@amd.com> |
10 | * Fix handling for CPU hotplug -- affected CPUs | 10 | * Fix handling for CPU hotplug -- affected CPUs |
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or modify | 12 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License version 2 as | 13 | * it under the terms of the GNU General Public License version 2 as |
14 | * published by the Free Software Foundation. | 14 | * published by the Free Software Foundation. |
15 | * | 15 | * |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/notifier.h> | 21 | #include <linux/notifier.h> |
22 | #include <linux/cpufreq.h> | 22 | #include <linux/cpufreq.h> |
23 | #include <linux/delay.h> | 23 | #include <linux/delay.h> |
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
26 | #include <linux/device.h> | 26 | #include <linux/device.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/cpu.h> | 28 | #include <linux/cpu.h> |
29 | #include <linux/completion.h> | 29 | #include <linux/completion.h> |
30 | #include <linux/mutex.h> | 30 | #include <linux/mutex.h> |
31 | 31 | ||
32 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \ | 32 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \ |
33 | "cpufreq-core", msg) | 33 | "cpufreq-core", msg) |
34 | 34 | ||
35 | /** | 35 | /** |
36 | * The "cpufreq driver" - the arch- or hardware-dependent low | 36 | * The "cpufreq driver" - the arch- or hardware-dependent low |
37 | * level driver of CPUFreq support, and its spinlock. This lock | 37 | * level driver of CPUFreq support, and its spinlock. This lock |
38 | * also protects the cpufreq_cpu_data array. | 38 | * also protects the cpufreq_cpu_data array. |
39 | */ | 39 | */ |
40 | static struct cpufreq_driver *cpufreq_driver; | 40 | static struct cpufreq_driver *cpufreq_driver; |
41 | static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); | 41 | static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); |
42 | #ifdef CONFIG_HOTPLUG_CPU | 42 | #ifdef CONFIG_HOTPLUG_CPU |
43 | /* This one keeps track of the previously set governor of a removed CPU */ | 43 | /* This one keeps track of the previously set governor of a removed CPU */ |
44 | static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor); | 44 | static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor); |
45 | #endif | 45 | #endif |
46 | static DEFINE_SPINLOCK(cpufreq_driver_lock); | 46 | static DEFINE_SPINLOCK(cpufreq_driver_lock); |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure | 49 | * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure |
50 | * all cpufreq/hotplug/workqueue/etc related lock issues. | 50 | * all cpufreq/hotplug/workqueue/etc related lock issues. |
51 | * | 51 | * |
52 | * The rules for this semaphore: | 52 | * The rules for this semaphore: |
53 | * - Any routine that wants to read from the policy structure will | 53 | * - Any routine that wants to read from the policy structure will |
54 | * do a down_read on this semaphore. | 54 | * do a down_read on this semaphore. |
55 | * - Any routine that will write to the policy structure and/or may take away | 55 | * - Any routine that will write to the policy structure and/or may take away |
56 | * the policy altogether (eg. CPU hotplug), will hold this lock in write | 56 | * the policy altogether (eg. CPU hotplug), will hold this lock in write |
57 | * mode before doing so. | 57 | * mode before doing so. |
58 | * | 58 | * |
59 | * Additional rules: | 59 | * Additional rules: |
60 | * - All holders of the lock should check to make sure that the CPU they | 60 | * - All holders of the lock should check to make sure that the CPU they |
61 | * are concerned with are online after they get the lock. | 61 | * are concerned with are online after they get the lock. |
62 | * - Governor routines that can be called in cpufreq hotplug path should not | 62 | * - Governor routines that can be called in cpufreq hotplug path should not |
63 | * take this sem as top level hotplug notifier handler takes this. | 63 | * take this sem as top level hotplug notifier handler takes this. |
64 | */ | 64 | */ |
65 | static DEFINE_PER_CPU(int, policy_cpu); | 65 | static DEFINE_PER_CPU(int, policy_cpu); |
66 | static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); | 66 | static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); |
67 | 67 | ||
68 | #define lock_policy_rwsem(mode, cpu) \ | 68 | #define lock_policy_rwsem(mode, cpu) \ |
69 | int lock_policy_rwsem_##mode \ | 69 | int lock_policy_rwsem_##mode \ |
70 | (int cpu) \ | 70 | (int cpu) \ |
71 | { \ | 71 | { \ |
72 | int policy_cpu = per_cpu(policy_cpu, cpu); \ | 72 | int policy_cpu = per_cpu(policy_cpu, cpu); \ |
73 | BUG_ON(policy_cpu == -1); \ | 73 | BUG_ON(policy_cpu == -1); \ |
74 | down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ | 74 | down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ |
75 | if (unlikely(!cpu_online(cpu))) { \ | 75 | if (unlikely(!cpu_online(cpu))) { \ |
76 | up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ | 76 | up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ |
77 | return -1; \ | 77 | return -1; \ |
78 | } \ | 78 | } \ |
79 | \ | 79 | \ |
80 | return 0; \ | 80 | return 0; \ |
81 | } | 81 | } |
82 | 82 | ||
83 | lock_policy_rwsem(read, cpu); | 83 | lock_policy_rwsem(read, cpu); |
84 | EXPORT_SYMBOL_GPL(lock_policy_rwsem_read); | 84 | EXPORT_SYMBOL_GPL(lock_policy_rwsem_read); |
85 | 85 | ||
86 | lock_policy_rwsem(write, cpu); | 86 | lock_policy_rwsem(write, cpu); |
87 | EXPORT_SYMBOL_GPL(lock_policy_rwsem_write); | 87 | EXPORT_SYMBOL_GPL(lock_policy_rwsem_write); |
88 | 88 | ||
89 | void unlock_policy_rwsem_read(int cpu) | 89 | void unlock_policy_rwsem_read(int cpu) |
90 | { | 90 | { |
91 | int policy_cpu = per_cpu(policy_cpu, cpu); | 91 | int policy_cpu = per_cpu(policy_cpu, cpu); |
92 | BUG_ON(policy_cpu == -1); | 92 | BUG_ON(policy_cpu == -1); |
93 | up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); | 93 | up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); |
94 | } | 94 | } |
95 | EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read); | 95 | EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read); |
96 | 96 | ||
97 | void unlock_policy_rwsem_write(int cpu) | 97 | void unlock_policy_rwsem_write(int cpu) |
98 | { | 98 | { |
99 | int policy_cpu = per_cpu(policy_cpu, cpu); | 99 | int policy_cpu = per_cpu(policy_cpu, cpu); |
100 | BUG_ON(policy_cpu == -1); | 100 | BUG_ON(policy_cpu == -1); |
101 | up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); | 101 | up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); |
102 | } | 102 | } |
103 | EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write); | 103 | EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write); |
104 | 104 | ||
105 | 105 | ||
106 | /* internal prototypes */ | 106 | /* internal prototypes */ |
107 | static int __cpufreq_governor(struct cpufreq_policy *policy, | 107 | static int __cpufreq_governor(struct cpufreq_policy *policy, |
108 | unsigned int event); | 108 | unsigned int event); |
109 | static unsigned int __cpufreq_get(unsigned int cpu); | 109 | static unsigned int __cpufreq_get(unsigned int cpu); |
110 | static void handle_update(struct work_struct *work); | 110 | static void handle_update(struct work_struct *work); |
111 | 111 | ||
112 | /** | 112 | /** |
113 | * Two notifier lists: the "policy" list is involved in the | 113 | * Two notifier lists: the "policy" list is involved in the |
114 | * validation process for a new CPU frequency policy; the | 114 | * validation process for a new CPU frequency policy; the |
115 | * "transition" list for kernel code that needs to handle | 115 | * "transition" list for kernel code that needs to handle |
116 | * changes to devices when the CPU clock speed changes. | 116 | * changes to devices when the CPU clock speed changes. |
117 | * The mutex locks both lists. | 117 | * The mutex locks both lists. |
118 | */ | 118 | */ |
119 | static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list); | 119 | static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list); |
120 | static struct srcu_notifier_head cpufreq_transition_notifier_list; | 120 | static struct srcu_notifier_head cpufreq_transition_notifier_list; |
121 | 121 | ||
122 | static bool init_cpufreq_transition_notifier_list_called; | 122 | static bool init_cpufreq_transition_notifier_list_called; |
123 | static int __init init_cpufreq_transition_notifier_list(void) | 123 | static int __init init_cpufreq_transition_notifier_list(void) |
124 | { | 124 | { |
125 | srcu_init_notifier_head(&cpufreq_transition_notifier_list); | 125 | srcu_init_notifier_head(&cpufreq_transition_notifier_list); |
126 | init_cpufreq_transition_notifier_list_called = true; | 126 | init_cpufreq_transition_notifier_list_called = true; |
127 | return 0; | 127 | return 0; |
128 | } | 128 | } |
129 | pure_initcall(init_cpufreq_transition_notifier_list); | 129 | pure_initcall(init_cpufreq_transition_notifier_list); |
130 | 130 | ||
131 | static LIST_HEAD(cpufreq_governor_list); | 131 | static LIST_HEAD(cpufreq_governor_list); |
132 | static DEFINE_MUTEX(cpufreq_governor_mutex); | 132 | static DEFINE_MUTEX(cpufreq_governor_mutex); |
133 | 133 | ||
134 | struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) | 134 | struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) |
135 | { | 135 | { |
136 | struct cpufreq_policy *data; | 136 | struct cpufreq_policy *data; |
137 | unsigned long flags; | 137 | unsigned long flags; |
138 | 138 | ||
139 | if (cpu >= nr_cpu_ids) | 139 | if (cpu >= nr_cpu_ids) |
140 | goto err_out; | 140 | goto err_out; |
141 | 141 | ||
142 | /* get the cpufreq driver */ | 142 | /* get the cpufreq driver */ |
143 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 143 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
144 | 144 | ||
145 | if (!cpufreq_driver) | 145 | if (!cpufreq_driver) |
146 | goto err_out_unlock; | 146 | goto err_out_unlock; |
147 | 147 | ||
148 | if (!try_module_get(cpufreq_driver->owner)) | 148 | if (!try_module_get(cpufreq_driver->owner)) |
149 | goto err_out_unlock; | 149 | goto err_out_unlock; |
150 | 150 | ||
151 | 151 | ||
152 | /* get the CPU */ | 152 | /* get the CPU */ |
153 | data = per_cpu(cpufreq_cpu_data, cpu); | 153 | data = per_cpu(cpufreq_cpu_data, cpu); |
154 | 154 | ||
155 | if (!data) | 155 | if (!data) |
156 | goto err_out_put_module; | 156 | goto err_out_put_module; |
157 | 157 | ||
158 | if (!kobject_get(&data->kobj)) | 158 | if (!kobject_get(&data->kobj)) |
159 | goto err_out_put_module; | 159 | goto err_out_put_module; |
160 | 160 | ||
161 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 161 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
162 | return data; | 162 | return data; |
163 | 163 | ||
164 | err_out_put_module: | 164 | err_out_put_module: |
165 | module_put(cpufreq_driver->owner); | 165 | module_put(cpufreq_driver->owner); |
166 | err_out_unlock: | 166 | err_out_unlock: |
167 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 167 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
168 | err_out: | 168 | err_out: |
169 | return NULL; | 169 | return NULL; |
170 | } | 170 | } |
171 | EXPORT_SYMBOL_GPL(cpufreq_cpu_get); | 171 | EXPORT_SYMBOL_GPL(cpufreq_cpu_get); |
172 | 172 | ||
173 | 173 | ||
174 | void cpufreq_cpu_put(struct cpufreq_policy *data) | 174 | void cpufreq_cpu_put(struct cpufreq_policy *data) |
175 | { | 175 | { |
176 | kobject_put(&data->kobj); | 176 | kobject_put(&data->kobj); |
177 | module_put(cpufreq_driver->owner); | 177 | module_put(cpufreq_driver->owner); |
178 | } | 178 | } |
179 | EXPORT_SYMBOL_GPL(cpufreq_cpu_put); | 179 | EXPORT_SYMBOL_GPL(cpufreq_cpu_put); |
180 | 180 | ||
181 | 181 | ||
182 | /********************************************************************* | 182 | /********************************************************************* |
183 | * UNIFIED DEBUG HELPERS * | 183 | * UNIFIED DEBUG HELPERS * |
184 | *********************************************************************/ | 184 | *********************************************************************/ |
185 | #ifdef CONFIG_CPU_FREQ_DEBUG | 185 | #ifdef CONFIG_CPU_FREQ_DEBUG |
186 | 186 | ||
187 | /* what part(s) of the CPUfreq subsystem are debugged? */ | 187 | /* what part(s) of the CPUfreq subsystem are debugged? */ |
188 | static unsigned int debug; | 188 | static unsigned int debug; |
189 | 189 | ||
190 | /* is the debug output ratelimit'ed using printk_ratelimit? User can | 190 | /* is the debug output ratelimit'ed using printk_ratelimit? User can |
191 | * set or modify this value. | 191 | * set or modify this value. |
192 | */ | 192 | */ |
193 | static unsigned int debug_ratelimit = 1; | 193 | static unsigned int debug_ratelimit = 1; |
194 | 194 | ||
195 | /* is the printk_ratelimit'ing enabled? It's enabled after a successful | 195 | /* is the printk_ratelimit'ing enabled? It's enabled after a successful |
196 | * loading of a cpufreq driver, temporarily disabled when a new policy | 196 | * loading of a cpufreq driver, temporarily disabled when a new policy |
197 | * is set, and disabled upon cpufreq driver removal | 197 | * is set, and disabled upon cpufreq driver removal |
198 | */ | 198 | */ |
199 | static unsigned int disable_ratelimit = 1; | 199 | static unsigned int disable_ratelimit = 1; |
200 | static DEFINE_SPINLOCK(disable_ratelimit_lock); | 200 | static DEFINE_SPINLOCK(disable_ratelimit_lock); |
201 | 201 | ||
202 | static void cpufreq_debug_enable_ratelimit(void) | 202 | static void cpufreq_debug_enable_ratelimit(void) |
203 | { | 203 | { |
204 | unsigned long flags; | 204 | unsigned long flags; |
205 | 205 | ||
206 | spin_lock_irqsave(&disable_ratelimit_lock, flags); | 206 | spin_lock_irqsave(&disable_ratelimit_lock, flags); |
207 | if (disable_ratelimit) | 207 | if (disable_ratelimit) |
208 | disable_ratelimit--; | 208 | disable_ratelimit--; |
209 | spin_unlock_irqrestore(&disable_ratelimit_lock, flags); | 209 | spin_unlock_irqrestore(&disable_ratelimit_lock, flags); |
210 | } | 210 | } |
211 | 211 | ||
212 | static void cpufreq_debug_disable_ratelimit(void) | 212 | static void cpufreq_debug_disable_ratelimit(void) |
213 | { | 213 | { |
214 | unsigned long flags; | 214 | unsigned long flags; |
215 | 215 | ||
216 | spin_lock_irqsave(&disable_ratelimit_lock, flags); | 216 | spin_lock_irqsave(&disable_ratelimit_lock, flags); |
217 | disable_ratelimit++; | 217 | disable_ratelimit++; |
218 | spin_unlock_irqrestore(&disable_ratelimit_lock, flags); | 218 | spin_unlock_irqrestore(&disable_ratelimit_lock, flags); |
219 | } | 219 | } |
220 | 220 | ||
221 | void cpufreq_debug_printk(unsigned int type, const char *prefix, | 221 | void cpufreq_debug_printk(unsigned int type, const char *prefix, |
222 | const char *fmt, ...) | 222 | const char *fmt, ...) |
223 | { | 223 | { |
224 | char s[256]; | 224 | char s[256]; |
225 | va_list args; | 225 | va_list args; |
226 | unsigned int len; | 226 | unsigned int len; |
227 | unsigned long flags; | 227 | unsigned long flags; |
228 | 228 | ||
229 | WARN_ON(!prefix); | 229 | WARN_ON(!prefix); |
230 | if (type & debug) { | 230 | if (type & debug) { |
231 | spin_lock_irqsave(&disable_ratelimit_lock, flags); | 231 | spin_lock_irqsave(&disable_ratelimit_lock, flags); |
232 | if (!disable_ratelimit && debug_ratelimit | 232 | if (!disable_ratelimit && debug_ratelimit |
233 | && !printk_ratelimit()) { | 233 | && !printk_ratelimit()) { |
234 | spin_unlock_irqrestore(&disable_ratelimit_lock, flags); | 234 | spin_unlock_irqrestore(&disable_ratelimit_lock, flags); |
235 | return; | 235 | return; |
236 | } | 236 | } |
237 | spin_unlock_irqrestore(&disable_ratelimit_lock, flags); | 237 | spin_unlock_irqrestore(&disable_ratelimit_lock, flags); |
238 | 238 | ||
239 | len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix); | 239 | len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix); |
240 | 240 | ||
241 | va_start(args, fmt); | 241 | va_start(args, fmt); |
242 | len += vsnprintf(&s[len], (256 - len), fmt, args); | 242 | len += vsnprintf(&s[len], (256 - len), fmt, args); |
243 | va_end(args); | 243 | va_end(args); |
244 | 244 | ||
245 | printk(s); | 245 | printk(s); |
246 | 246 | ||
247 | WARN_ON(len < 5); | 247 | WARN_ON(len < 5); |
248 | } | 248 | } |
249 | } | 249 | } |
250 | EXPORT_SYMBOL(cpufreq_debug_printk); | 250 | EXPORT_SYMBOL(cpufreq_debug_printk); |
251 | 251 | ||
252 | 252 | ||
253 | module_param(debug, uint, 0644); | 253 | module_param(debug, uint, 0644); |
254 | MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core," | 254 | MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core," |
255 | " 2 to debug drivers, and 4 to debug governors."); | 255 | " 2 to debug drivers, and 4 to debug governors."); |
256 | 256 | ||
257 | module_param(debug_ratelimit, uint, 0644); | 257 | module_param(debug_ratelimit, uint, 0644); |
258 | MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:" | 258 | MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:" |
259 | " set to 0 to disable ratelimiting."); | 259 | " set to 0 to disable ratelimiting."); |
260 | 260 | ||
261 | #else /* !CONFIG_CPU_FREQ_DEBUG */ | 261 | #else /* !CONFIG_CPU_FREQ_DEBUG */ |
262 | 262 | ||
263 | static inline void cpufreq_debug_enable_ratelimit(void) { return; } | 263 | static inline void cpufreq_debug_enable_ratelimit(void) { return; } |
264 | static inline void cpufreq_debug_disable_ratelimit(void) { return; } | 264 | static inline void cpufreq_debug_disable_ratelimit(void) { return; } |
265 | 265 | ||
266 | #endif /* CONFIG_CPU_FREQ_DEBUG */ | 266 | #endif /* CONFIG_CPU_FREQ_DEBUG */ |
267 | 267 | ||
268 | 268 | ||
269 | /********************************************************************* | 269 | /********************************************************************* |
270 | * EXTERNALLY AFFECTING FREQUENCY CHANGES * | 270 | * EXTERNALLY AFFECTING FREQUENCY CHANGES * |
271 | *********************************************************************/ | 271 | *********************************************************************/ |
272 | 272 | ||
273 | /** | 273 | /** |
274 | * adjust_jiffies - adjust the system "loops_per_jiffy" | 274 | * adjust_jiffies - adjust the system "loops_per_jiffy" |
275 | * | 275 | * |
276 | * This function alters the system "loops_per_jiffy" for the clock | 276 | * This function alters the system "loops_per_jiffy" for the clock |
277 | * speed change. Note that loops_per_jiffy cannot be updated on SMP | 277 | * speed change. Note that loops_per_jiffy cannot be updated on SMP |
278 | * systems as each CPU might be scaled differently. So, use the arch | 278 | * systems as each CPU might be scaled differently. So, use the arch |
279 | * per-CPU loops_per_jiffy value wherever possible. | 279 | * per-CPU loops_per_jiffy value wherever possible. |
280 | */ | 280 | */ |
281 | #ifndef CONFIG_SMP | 281 | #ifndef CONFIG_SMP |
282 | static unsigned long l_p_j_ref; | 282 | static unsigned long l_p_j_ref; |
283 | static unsigned int l_p_j_ref_freq; | 283 | static unsigned int l_p_j_ref_freq; |
284 | 284 | ||
285 | static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) | 285 | static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) |
286 | { | 286 | { |
287 | if (ci->flags & CPUFREQ_CONST_LOOPS) | 287 | if (ci->flags & CPUFREQ_CONST_LOOPS) |
288 | return; | 288 | return; |
289 | 289 | ||
290 | if (!l_p_j_ref_freq) { | 290 | if (!l_p_j_ref_freq) { |
291 | l_p_j_ref = loops_per_jiffy; | 291 | l_p_j_ref = loops_per_jiffy; |
292 | l_p_j_ref_freq = ci->old; | 292 | l_p_j_ref_freq = ci->old; |
293 | dprintk("saving %lu as reference value for loops_per_jiffy; " | 293 | dprintk("saving %lu as reference value for loops_per_jiffy; " |
294 | "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); | 294 | "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); |
295 | } | 295 | } |
296 | if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) || | 296 | if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) || |
297 | (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) || | 297 | (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) || |
298 | (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { | 298 | (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { |
299 | loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, | 299 | loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, |
300 | ci->new); | 300 | ci->new); |
301 | dprintk("scaling loops_per_jiffy to %lu " | 301 | dprintk("scaling loops_per_jiffy to %lu " |
302 | "for frequency %u kHz\n", loops_per_jiffy, ci->new); | 302 | "for frequency %u kHz\n", loops_per_jiffy, ci->new); |
303 | } | 303 | } |
304 | } | 304 | } |
305 | #else | 305 | #else |
306 | static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) | 306 | static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) |
307 | { | 307 | { |
308 | return; | 308 | return; |
309 | } | 309 | } |
310 | #endif | 310 | #endif |
311 | 311 | ||
312 | 312 | ||
313 | /** | 313 | /** |
314 | * cpufreq_notify_transition - call notifier chain and adjust_jiffies | 314 | * cpufreq_notify_transition - call notifier chain and adjust_jiffies |
315 | * on frequency transition. | 315 | * on frequency transition. |
316 | * | 316 | * |
317 | * This function calls the transition notifiers and the "adjust_jiffies" | 317 | * This function calls the transition notifiers and the "adjust_jiffies" |
318 | * function. It is called twice on all CPU frequency changes that have | 318 | * function. It is called twice on all CPU frequency changes that have |
319 | * external effects. | 319 | * external effects. |
320 | */ | 320 | */ |
321 | void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) | 321 | void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) |
322 | { | 322 | { |
323 | struct cpufreq_policy *policy; | 323 | struct cpufreq_policy *policy; |
324 | 324 | ||
325 | BUG_ON(irqs_disabled()); | 325 | BUG_ON(irqs_disabled()); |
326 | 326 | ||
327 | freqs->flags = cpufreq_driver->flags; | 327 | freqs->flags = cpufreq_driver->flags; |
328 | dprintk("notification %u of frequency transition to %u kHz\n", | 328 | dprintk("notification %u of frequency transition to %u kHz\n", |
329 | state, freqs->new); | 329 | state, freqs->new); |
330 | 330 | ||
331 | policy = per_cpu(cpufreq_cpu_data, freqs->cpu); | 331 | policy = per_cpu(cpufreq_cpu_data, freqs->cpu); |
332 | switch (state) { | 332 | switch (state) { |
333 | 333 | ||
334 | case CPUFREQ_PRECHANGE: | 334 | case CPUFREQ_PRECHANGE: |
335 | /* detect if the driver reported a value as "old frequency" | 335 | /* detect if the driver reported a value as "old frequency" |
336 | * which is not equal to what the cpufreq core thinks is | 336 | * which is not equal to what the cpufreq core thinks is |
337 | * "old frequency". | 337 | * "old frequency". |
338 | */ | 338 | */ |
339 | if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { | 339 | if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { |
340 | if ((policy) && (policy->cpu == freqs->cpu) && | 340 | if ((policy) && (policy->cpu == freqs->cpu) && |
341 | (policy->cur) && (policy->cur != freqs->old)) { | 341 | (policy->cur) && (policy->cur != freqs->old)) { |
342 | dprintk("Warning: CPU frequency is" | 342 | dprintk("Warning: CPU frequency is" |
343 | " %u, cpufreq assumed %u kHz.\n", | 343 | " %u, cpufreq assumed %u kHz.\n", |
344 | freqs->old, policy->cur); | 344 | freqs->old, policy->cur); |
345 | freqs->old = policy->cur; | 345 | freqs->old = policy->cur; |
346 | } | 346 | } |
347 | } | 347 | } |
348 | srcu_notifier_call_chain(&cpufreq_transition_notifier_list, | 348 | srcu_notifier_call_chain(&cpufreq_transition_notifier_list, |
349 | CPUFREQ_PRECHANGE, freqs); | 349 | CPUFREQ_PRECHANGE, freqs); |
350 | adjust_jiffies(CPUFREQ_PRECHANGE, freqs); | 350 | adjust_jiffies(CPUFREQ_PRECHANGE, freqs); |
351 | break; | 351 | break; |
352 | 352 | ||
353 | case CPUFREQ_POSTCHANGE: | 353 | case CPUFREQ_POSTCHANGE: |
354 | adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); | 354 | adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); |
355 | srcu_notifier_call_chain(&cpufreq_transition_notifier_list, | 355 | srcu_notifier_call_chain(&cpufreq_transition_notifier_list, |
356 | CPUFREQ_POSTCHANGE, freqs); | 356 | CPUFREQ_POSTCHANGE, freqs); |
357 | if (likely(policy) && likely(policy->cpu == freqs->cpu)) | 357 | if (likely(policy) && likely(policy->cpu == freqs->cpu)) |
358 | policy->cur = freqs->new; | 358 | policy->cur = freqs->new; |
359 | break; | 359 | break; |
360 | } | 360 | } |
361 | } | 361 | } |
362 | EXPORT_SYMBOL_GPL(cpufreq_notify_transition); | 362 | EXPORT_SYMBOL_GPL(cpufreq_notify_transition); |
363 | 363 | ||
364 | 364 | ||
365 | 365 | ||
366 | /********************************************************************* | 366 | /********************************************************************* |
367 | * SYSFS INTERFACE * | 367 | * SYSFS INTERFACE * |
368 | *********************************************************************/ | 368 | *********************************************************************/ |
369 | 369 | ||
370 | static struct cpufreq_governor *__find_governor(const char *str_governor) | 370 | static struct cpufreq_governor *__find_governor(const char *str_governor) |
371 | { | 371 | { |
372 | struct cpufreq_governor *t; | 372 | struct cpufreq_governor *t; |
373 | 373 | ||
374 | list_for_each_entry(t, &cpufreq_governor_list, governor_list) | 374 | list_for_each_entry(t, &cpufreq_governor_list, governor_list) |
375 | if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN)) | 375 | if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN)) |
376 | return t; | 376 | return t; |
377 | 377 | ||
378 | return NULL; | 378 | return NULL; |
379 | } | 379 | } |
380 | 380 | ||
381 | /** | 381 | /** |
382 | * cpufreq_parse_governor - parse a governor string | 382 | * cpufreq_parse_governor - parse a governor string |
383 | */ | 383 | */ |
384 | static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, | 384 | static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, |
385 | struct cpufreq_governor **governor) | 385 | struct cpufreq_governor **governor) |
386 | { | 386 | { |
387 | int err = -EINVAL; | 387 | int err = -EINVAL; |
388 | 388 | ||
389 | if (!cpufreq_driver) | 389 | if (!cpufreq_driver) |
390 | goto out; | 390 | goto out; |
391 | 391 | ||
392 | if (cpufreq_driver->setpolicy) { | 392 | if (cpufreq_driver->setpolicy) { |
393 | if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { | 393 | if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { |
394 | *policy = CPUFREQ_POLICY_PERFORMANCE; | 394 | *policy = CPUFREQ_POLICY_PERFORMANCE; |
395 | err = 0; | 395 | err = 0; |
396 | } else if (!strnicmp(str_governor, "powersave", | 396 | } else if (!strnicmp(str_governor, "powersave", |
397 | CPUFREQ_NAME_LEN)) { | 397 | CPUFREQ_NAME_LEN)) { |
398 | *policy = CPUFREQ_POLICY_POWERSAVE; | 398 | *policy = CPUFREQ_POLICY_POWERSAVE; |
399 | err = 0; | 399 | err = 0; |
400 | } | 400 | } |
401 | } else if (cpufreq_driver->target) { | 401 | } else if (cpufreq_driver->target) { |
402 | struct cpufreq_governor *t; | 402 | struct cpufreq_governor *t; |
403 | 403 | ||
404 | mutex_lock(&cpufreq_governor_mutex); | 404 | mutex_lock(&cpufreq_governor_mutex); |
405 | 405 | ||
406 | t = __find_governor(str_governor); | 406 | t = __find_governor(str_governor); |
407 | 407 | ||
408 | if (t == NULL) { | 408 | if (t == NULL) { |
409 | char *name = kasprintf(GFP_KERNEL, "cpufreq_%s", | 409 | char *name = kasprintf(GFP_KERNEL, "cpufreq_%s", |
410 | str_governor); | 410 | str_governor); |
411 | 411 | ||
412 | if (name) { | 412 | if (name) { |
413 | int ret; | 413 | int ret; |
414 | 414 | ||
415 | mutex_unlock(&cpufreq_governor_mutex); | 415 | mutex_unlock(&cpufreq_governor_mutex); |
416 | ret = request_module("%s", name); | 416 | ret = request_module("%s", name); |
417 | mutex_lock(&cpufreq_governor_mutex); | 417 | mutex_lock(&cpufreq_governor_mutex); |
418 | 418 | ||
419 | if (ret == 0) | 419 | if (ret == 0) |
420 | t = __find_governor(str_governor); | 420 | t = __find_governor(str_governor); |
421 | } | 421 | } |
422 | 422 | ||
423 | kfree(name); | 423 | kfree(name); |
424 | } | 424 | } |
425 | 425 | ||
426 | if (t != NULL) { | 426 | if (t != NULL) { |
427 | *governor = t; | 427 | *governor = t; |
428 | err = 0; | 428 | err = 0; |
429 | } | 429 | } |
430 | 430 | ||
431 | mutex_unlock(&cpufreq_governor_mutex); | 431 | mutex_unlock(&cpufreq_governor_mutex); |
432 | } | 432 | } |
433 | out: | 433 | out: |
434 | return err; | 434 | return err; |
435 | } | 435 | } |
436 | 436 | ||
437 | 437 | ||
438 | /** | 438 | /** |
439 | * cpufreq_per_cpu_attr_read() / show_##file_name() - | 439 | * cpufreq_per_cpu_attr_read() / show_##file_name() - |
440 | * print out cpufreq information | 440 | * print out cpufreq information |
441 | * | 441 | * |
442 | * Write out information from cpufreq_driver->policy[cpu]; object must be | 442 | * Write out information from cpufreq_driver->policy[cpu]; object must be |
443 | * "unsigned int". | 443 | * "unsigned int". |
444 | */ | 444 | */ |
445 | 445 | ||
446 | #define show_one(file_name, object) \ | 446 | #define show_one(file_name, object) \ |
447 | static ssize_t show_##file_name \ | 447 | static ssize_t show_##file_name \ |
448 | (struct cpufreq_policy *policy, char *buf) \ | 448 | (struct cpufreq_policy *policy, char *buf) \ |
449 | { \ | 449 | { \ |
450 | return sprintf(buf, "%u\n", policy->object); \ | 450 | return sprintf(buf, "%u\n", policy->object); \ |
451 | } | 451 | } |
452 | 452 | ||
453 | show_one(cpuinfo_min_freq, cpuinfo.min_freq); | 453 | show_one(cpuinfo_min_freq, cpuinfo.min_freq); |
454 | show_one(cpuinfo_max_freq, cpuinfo.max_freq); | 454 | show_one(cpuinfo_max_freq, cpuinfo.max_freq); |
455 | show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); | 455 | show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); |
456 | show_one(scaling_min_freq, min); | 456 | show_one(scaling_min_freq, min); |
457 | show_one(scaling_max_freq, max); | 457 | show_one(scaling_max_freq, max); |
458 | show_one(scaling_cur_freq, cur); | 458 | show_one(scaling_cur_freq, cur); |
459 | 459 | ||
460 | static int __cpufreq_set_policy(struct cpufreq_policy *data, | 460 | static int __cpufreq_set_policy(struct cpufreq_policy *data, |
461 | struct cpufreq_policy *policy); | 461 | struct cpufreq_policy *policy); |
462 | 462 | ||
463 | /** | 463 | /** |
464 | * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access | 464 | * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access |
465 | */ | 465 | */ |
466 | #define store_one(file_name, object) \ | 466 | #define store_one(file_name, object) \ |
467 | static ssize_t store_##file_name \ | 467 | static ssize_t store_##file_name \ |
468 | (struct cpufreq_policy *policy, const char *buf, size_t count) \ | 468 | (struct cpufreq_policy *policy, const char *buf, size_t count) \ |
469 | { \ | 469 | { \ |
470 | unsigned int ret = -EINVAL; \ | 470 | unsigned int ret = -EINVAL; \ |
471 | struct cpufreq_policy new_policy; \ | 471 | struct cpufreq_policy new_policy; \ |
472 | \ | 472 | \ |
473 | ret = cpufreq_get_policy(&new_policy, policy->cpu); \ | 473 | ret = cpufreq_get_policy(&new_policy, policy->cpu); \ |
474 | if (ret) \ | 474 | if (ret) \ |
475 | return -EINVAL; \ | 475 | return -EINVAL; \ |
476 | \ | 476 | \ |
477 | ret = sscanf(buf, "%u", &new_policy.object); \ | 477 | ret = sscanf(buf, "%u", &new_policy.object); \ |
478 | if (ret != 1) \ | 478 | if (ret != 1) \ |
479 | return -EINVAL; \ | 479 | return -EINVAL; \ |
480 | \ | 480 | \ |
481 | ret = __cpufreq_set_policy(policy, &new_policy); \ | 481 | ret = __cpufreq_set_policy(policy, &new_policy); \ |
482 | policy->user_policy.object = policy->object; \ | 482 | policy->user_policy.object = policy->object; \ |
483 | \ | 483 | \ |
484 | return ret ? ret : count; \ | 484 | return ret ? ret : count; \ |
485 | } | 485 | } |
486 | 486 | ||
487 | store_one(scaling_min_freq, min); | 487 | store_one(scaling_min_freq, min); |
488 | store_one(scaling_max_freq, max); | 488 | store_one(scaling_max_freq, max); |
489 | 489 | ||
490 | /** | 490 | /** |
491 | * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware | 491 | * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware |
492 | */ | 492 | */ |
493 | static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, | 493 | static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, |
494 | char *buf) | 494 | char *buf) |
495 | { | 495 | { |
496 | unsigned int cur_freq = __cpufreq_get(policy->cpu); | 496 | unsigned int cur_freq = __cpufreq_get(policy->cpu); |
497 | if (!cur_freq) | 497 | if (!cur_freq) |
498 | return sprintf(buf, "<unknown>"); | 498 | return sprintf(buf, "<unknown>"); |
499 | return sprintf(buf, "%u\n", cur_freq); | 499 | return sprintf(buf, "%u\n", cur_freq); |
500 | } | 500 | } |
501 | 501 | ||
502 | 502 | ||
503 | /** | 503 | /** |
504 | * show_scaling_governor - show the current policy for the specified CPU | 504 | * show_scaling_governor - show the current policy for the specified CPU |
505 | */ | 505 | */ |
506 | static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) | 506 | static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) |
507 | { | 507 | { |
508 | if (policy->policy == CPUFREQ_POLICY_POWERSAVE) | 508 | if (policy->policy == CPUFREQ_POLICY_POWERSAVE) |
509 | return sprintf(buf, "powersave\n"); | 509 | return sprintf(buf, "powersave\n"); |
510 | else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) | 510 | else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) |
511 | return sprintf(buf, "performance\n"); | 511 | return sprintf(buf, "performance\n"); |
512 | else if (policy->governor) | 512 | else if (policy->governor) |
513 | return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", | 513 | return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", |
514 | policy->governor->name); | 514 | policy->governor->name); |
515 | return -EINVAL; | 515 | return -EINVAL; |
516 | } | 516 | } |
517 | 517 | ||
518 | 518 | ||
519 | /** | 519 | /** |
520 | * store_scaling_governor - store policy for the specified CPU | 520 | * store_scaling_governor - store policy for the specified CPU |
521 | */ | 521 | */ |
522 | static ssize_t store_scaling_governor(struct cpufreq_policy *policy, | 522 | static ssize_t store_scaling_governor(struct cpufreq_policy *policy, |
523 | const char *buf, size_t count) | 523 | const char *buf, size_t count) |
524 | { | 524 | { |
525 | unsigned int ret = -EINVAL; | 525 | unsigned int ret = -EINVAL; |
526 | char str_governor[16]; | 526 | char str_governor[16]; |
527 | struct cpufreq_policy new_policy; | 527 | struct cpufreq_policy new_policy; |
528 | 528 | ||
529 | ret = cpufreq_get_policy(&new_policy, policy->cpu); | 529 | ret = cpufreq_get_policy(&new_policy, policy->cpu); |
530 | if (ret) | 530 | if (ret) |
531 | return ret; | 531 | return ret; |
532 | 532 | ||
533 | ret = sscanf(buf, "%15s", str_governor); | 533 | ret = sscanf(buf, "%15s", str_governor); |
534 | if (ret != 1) | 534 | if (ret != 1) |
535 | return -EINVAL; | 535 | return -EINVAL; |
536 | 536 | ||
537 | if (cpufreq_parse_governor(str_governor, &new_policy.policy, | 537 | if (cpufreq_parse_governor(str_governor, &new_policy.policy, |
538 | &new_policy.governor)) | 538 | &new_policy.governor)) |
539 | return -EINVAL; | 539 | return -EINVAL; |
540 | 540 | ||
541 | /* Do not use cpufreq_set_policy here or the user_policy.max | 541 | /* Do not use cpufreq_set_policy here or the user_policy.max |
542 | will be wrongly overridden */ | 542 | will be wrongly overridden */ |
543 | ret = __cpufreq_set_policy(policy, &new_policy); | 543 | ret = __cpufreq_set_policy(policy, &new_policy); |
544 | 544 | ||
545 | policy->user_policy.policy = policy->policy; | 545 | policy->user_policy.policy = policy->policy; |
546 | policy->user_policy.governor = policy->governor; | 546 | policy->user_policy.governor = policy->governor; |
547 | 547 | ||
548 | if (ret) | 548 | if (ret) |
549 | return ret; | 549 | return ret; |
550 | else | 550 | else |
551 | return count; | 551 | return count; |
552 | } | 552 | } |
553 | 553 | ||
554 | /** | 554 | /** |
555 | * show_scaling_driver - show the cpufreq driver currently loaded | 555 | * show_scaling_driver - show the cpufreq driver currently loaded |
556 | */ | 556 | */ |
557 | static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) | 557 | static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) |
558 | { | 558 | { |
559 | return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name); | 559 | return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name); |
560 | } | 560 | } |
561 | 561 | ||
562 | /** | 562 | /** |
563 | * show_scaling_available_governors - show the available CPUfreq governors | 563 | * show_scaling_available_governors - show the available CPUfreq governors |
564 | */ | 564 | */ |
565 | static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, | 565 | static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, |
566 | char *buf) | 566 | char *buf) |
567 | { | 567 | { |
568 | ssize_t i = 0; | 568 | ssize_t i = 0; |
569 | struct cpufreq_governor *t; | 569 | struct cpufreq_governor *t; |
570 | 570 | ||
571 | if (!cpufreq_driver->target) { | 571 | if (!cpufreq_driver->target) { |
572 | i += sprintf(buf, "performance powersave"); | 572 | i += sprintf(buf, "performance powersave"); |
573 | goto out; | 573 | goto out; |
574 | } | 574 | } |
575 | 575 | ||
576 | list_for_each_entry(t, &cpufreq_governor_list, governor_list) { | 576 | list_for_each_entry(t, &cpufreq_governor_list, governor_list) { |
577 | if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) | 577 | if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) |
578 | - (CPUFREQ_NAME_LEN + 2))) | 578 | - (CPUFREQ_NAME_LEN + 2))) |
579 | goto out; | 579 | goto out; |
580 | i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name); | 580 | i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name); |
581 | } | 581 | } |
582 | out: | 582 | out: |
583 | i += sprintf(&buf[i], "\n"); | 583 | i += sprintf(&buf[i], "\n"); |
584 | return i; | 584 | return i; |
585 | } | 585 | } |
586 | 586 | ||
587 | static ssize_t show_cpus(const struct cpumask *mask, char *buf) | 587 | static ssize_t show_cpus(const struct cpumask *mask, char *buf) |
588 | { | 588 | { |
589 | ssize_t i = 0; | 589 | ssize_t i = 0; |
590 | unsigned int cpu; | 590 | unsigned int cpu; |
591 | 591 | ||
592 | for_each_cpu(cpu, mask) { | 592 | for_each_cpu(cpu, mask) { |
593 | if (i) | 593 | if (i) |
594 | i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); | 594 | i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); |
595 | i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); | 595 | i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); |
596 | if (i >= (PAGE_SIZE - 5)) | 596 | if (i >= (PAGE_SIZE - 5)) |
597 | break; | 597 | break; |
598 | } | 598 | } |
599 | i += sprintf(&buf[i], "\n"); | 599 | i += sprintf(&buf[i], "\n"); |
600 | return i; | 600 | return i; |
601 | } | 601 | } |
602 | 602 | ||
603 | /** | 603 | /** |
604 | * show_related_cpus - show the CPUs affected by each transition even if | 604 | * show_related_cpus - show the CPUs affected by each transition even if |
605 | * hw coordination is in use | 605 | * hw coordination is in use |
606 | */ | 606 | */ |
607 | static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) | 607 | static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) |
608 | { | 608 | { |
609 | if (cpumask_empty(policy->related_cpus)) | 609 | if (cpumask_empty(policy->related_cpus)) |
610 | return show_cpus(policy->cpus, buf); | 610 | return show_cpus(policy->cpus, buf); |
611 | return show_cpus(policy->related_cpus, buf); | 611 | return show_cpus(policy->related_cpus, buf); |
612 | } | 612 | } |
613 | 613 | ||
614 | /** | 614 | /** |
615 | * show_affected_cpus - show the CPUs affected by each transition | 615 | * show_affected_cpus - show the CPUs affected by each transition |
616 | */ | 616 | */ |
617 | static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) | 617 | static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) |
618 | { | 618 | { |
619 | return show_cpus(policy->cpus, buf); | 619 | return show_cpus(policy->cpus, buf); |
620 | } | 620 | } |
621 | 621 | ||
622 | static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, | 622 | static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, |
623 | const char *buf, size_t count) | 623 | const char *buf, size_t count) |
624 | { | 624 | { |
625 | unsigned int freq = 0; | 625 | unsigned int freq = 0; |
626 | unsigned int ret; | 626 | unsigned int ret; |
627 | 627 | ||
628 | if (!policy->governor || !policy->governor->store_setspeed) | 628 | if (!policy->governor || !policy->governor->store_setspeed) |
629 | return -EINVAL; | 629 | return -EINVAL; |
630 | 630 | ||
631 | ret = sscanf(buf, "%u", &freq); | 631 | ret = sscanf(buf, "%u", &freq); |
632 | if (ret != 1) | 632 | if (ret != 1) |
633 | return -EINVAL; | 633 | return -EINVAL; |
634 | 634 | ||
635 | policy->governor->store_setspeed(policy, freq); | 635 | policy->governor->store_setspeed(policy, freq); |
636 | 636 | ||
637 | return count; | 637 | return count; |
638 | } | 638 | } |
639 | 639 | ||
640 | static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) | 640 | static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) |
641 | { | 641 | { |
642 | if (!policy->governor || !policy->governor->show_setspeed) | 642 | if (!policy->governor || !policy->governor->show_setspeed) |
643 | return sprintf(buf, "<unsupported>\n"); | 643 | return sprintf(buf, "<unsupported>\n"); |
644 | 644 | ||
645 | return policy->governor->show_setspeed(policy, buf); | 645 | return policy->governor->show_setspeed(policy, buf); |
646 | } | 646 | } |
647 | 647 | ||
648 | #define define_one_ro(_name) \ | 648 | #define define_one_ro(_name) \ |
649 | static struct freq_attr _name = \ | 649 | static struct freq_attr _name = \ |
650 | __ATTR(_name, 0444, show_##_name, NULL) | 650 | __ATTR(_name, 0444, show_##_name, NULL) |
651 | 651 | ||
652 | #define define_one_ro0400(_name) \ | 652 | #define define_one_ro0400(_name) \ |
653 | static struct freq_attr _name = \ | 653 | static struct freq_attr _name = \ |
654 | __ATTR(_name, 0400, show_##_name, NULL) | 654 | __ATTR(_name, 0400, show_##_name, NULL) |
655 | 655 | ||
656 | #define define_one_rw(_name) \ | 656 | #define define_one_rw(_name) \ |
657 | static struct freq_attr _name = \ | 657 | static struct freq_attr _name = \ |
658 | __ATTR(_name, 0644, show_##_name, store_##_name) | 658 | __ATTR(_name, 0644, show_##_name, store_##_name) |
659 | 659 | ||
660 | define_one_ro0400(cpuinfo_cur_freq); | 660 | define_one_ro0400(cpuinfo_cur_freq); |
661 | define_one_ro(cpuinfo_min_freq); | 661 | define_one_ro(cpuinfo_min_freq); |
662 | define_one_ro(cpuinfo_max_freq); | 662 | define_one_ro(cpuinfo_max_freq); |
663 | define_one_ro(cpuinfo_transition_latency); | 663 | define_one_ro(cpuinfo_transition_latency); |
664 | define_one_ro(scaling_available_governors); | 664 | define_one_ro(scaling_available_governors); |
665 | define_one_ro(scaling_driver); | 665 | define_one_ro(scaling_driver); |
666 | define_one_ro(scaling_cur_freq); | 666 | define_one_ro(scaling_cur_freq); |
667 | define_one_ro(related_cpus); | 667 | define_one_ro(related_cpus); |
668 | define_one_ro(affected_cpus); | 668 | define_one_ro(affected_cpus); |
669 | define_one_rw(scaling_min_freq); | 669 | define_one_rw(scaling_min_freq); |
670 | define_one_rw(scaling_max_freq); | 670 | define_one_rw(scaling_max_freq); |
671 | define_one_rw(scaling_governor); | 671 | define_one_rw(scaling_governor); |
672 | define_one_rw(scaling_setspeed); | 672 | define_one_rw(scaling_setspeed); |
673 | 673 | ||
674 | static struct attribute *default_attrs[] = { | 674 | static struct attribute *default_attrs[] = { |
675 | &cpuinfo_min_freq.attr, | 675 | &cpuinfo_min_freq.attr, |
676 | &cpuinfo_max_freq.attr, | 676 | &cpuinfo_max_freq.attr, |
677 | &cpuinfo_transition_latency.attr, | 677 | &cpuinfo_transition_latency.attr, |
678 | &scaling_min_freq.attr, | 678 | &scaling_min_freq.attr, |
679 | &scaling_max_freq.attr, | 679 | &scaling_max_freq.attr, |
680 | &affected_cpus.attr, | 680 | &affected_cpus.attr, |
681 | &related_cpus.attr, | 681 | &related_cpus.attr, |
682 | &scaling_governor.attr, | 682 | &scaling_governor.attr, |
683 | &scaling_driver.attr, | 683 | &scaling_driver.attr, |
684 | &scaling_available_governors.attr, | 684 | &scaling_available_governors.attr, |
685 | &scaling_setspeed.attr, | 685 | &scaling_setspeed.attr, |
686 | NULL | 686 | NULL |
687 | }; | 687 | }; |
688 | 688 | ||
689 | #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) | 689 | #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) |
690 | #define to_attr(a) container_of(a, struct freq_attr, attr) | 690 | #define to_attr(a) container_of(a, struct freq_attr, attr) |
691 | 691 | ||
692 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | 692 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) |
693 | { | 693 | { |
694 | struct cpufreq_policy *policy = to_policy(kobj); | 694 | struct cpufreq_policy *policy = to_policy(kobj); |
695 | struct freq_attr *fattr = to_attr(attr); | 695 | struct freq_attr *fattr = to_attr(attr); |
696 | ssize_t ret = -EINVAL; | 696 | ssize_t ret = -EINVAL; |
697 | policy = cpufreq_cpu_get(policy->cpu); | 697 | policy = cpufreq_cpu_get(policy->cpu); |
698 | if (!policy) | 698 | if (!policy) |
699 | goto no_policy; | 699 | goto no_policy; |
700 | 700 | ||
701 | if (lock_policy_rwsem_read(policy->cpu) < 0) | 701 | if (lock_policy_rwsem_read(policy->cpu) < 0) |
702 | goto fail; | 702 | goto fail; |
703 | 703 | ||
704 | if (fattr->show) | 704 | if (fattr->show) |
705 | ret = fattr->show(policy, buf); | 705 | ret = fattr->show(policy, buf); |
706 | else | 706 | else |
707 | ret = -EIO; | 707 | ret = -EIO; |
708 | 708 | ||
709 | unlock_policy_rwsem_read(policy->cpu); | 709 | unlock_policy_rwsem_read(policy->cpu); |
710 | fail: | 710 | fail: |
711 | cpufreq_cpu_put(policy); | 711 | cpufreq_cpu_put(policy); |
712 | no_policy: | 712 | no_policy: |
713 | return ret; | 713 | return ret; |
714 | } | 714 | } |
715 | 715 | ||
716 | static ssize_t store(struct kobject *kobj, struct attribute *attr, | 716 | static ssize_t store(struct kobject *kobj, struct attribute *attr, |
717 | const char *buf, size_t count) | 717 | const char *buf, size_t count) |
718 | { | 718 | { |
719 | struct cpufreq_policy *policy = to_policy(kobj); | 719 | struct cpufreq_policy *policy = to_policy(kobj); |
720 | struct freq_attr *fattr = to_attr(attr); | 720 | struct freq_attr *fattr = to_attr(attr); |
721 | ssize_t ret = -EINVAL; | 721 | ssize_t ret = -EINVAL; |
722 | policy = cpufreq_cpu_get(policy->cpu); | 722 | policy = cpufreq_cpu_get(policy->cpu); |
723 | if (!policy) | 723 | if (!policy) |
724 | goto no_policy; | 724 | goto no_policy; |
725 | 725 | ||
726 | if (lock_policy_rwsem_write(policy->cpu) < 0) | 726 | if (lock_policy_rwsem_write(policy->cpu) < 0) |
727 | goto fail; | 727 | goto fail; |
728 | 728 | ||
729 | if (fattr->store) | 729 | if (fattr->store) |
730 | ret = fattr->store(policy, buf, count); | 730 | ret = fattr->store(policy, buf, count); |
731 | else | 731 | else |
732 | ret = -EIO; | 732 | ret = -EIO; |
733 | 733 | ||
734 | unlock_policy_rwsem_write(policy->cpu); | 734 | unlock_policy_rwsem_write(policy->cpu); |
735 | fail: | 735 | fail: |
736 | cpufreq_cpu_put(policy); | 736 | cpufreq_cpu_put(policy); |
737 | no_policy: | 737 | no_policy: |
738 | return ret; | 738 | return ret; |
739 | } | 739 | } |
740 | 740 | ||
741 | static void cpufreq_sysfs_release(struct kobject *kobj) | 741 | static void cpufreq_sysfs_release(struct kobject *kobj) |
742 | { | 742 | { |
743 | struct cpufreq_policy *policy = to_policy(kobj); | 743 | struct cpufreq_policy *policy = to_policy(kobj); |
744 | dprintk("last reference is dropped\n"); | 744 | dprintk("last reference is dropped\n"); |
745 | complete(&policy->kobj_unregister); | 745 | complete(&policy->kobj_unregister); |
746 | } | 746 | } |
747 | 747 | ||
748 | static struct sysfs_ops sysfs_ops = { | 748 | static struct sysfs_ops sysfs_ops = { |
749 | .show = show, | 749 | .show = show, |
750 | .store = store, | 750 | .store = store, |
751 | }; | 751 | }; |
752 | 752 | ||
753 | static struct kobj_type ktype_cpufreq = { | 753 | static struct kobj_type ktype_cpufreq = { |
754 | .sysfs_ops = &sysfs_ops, | 754 | .sysfs_ops = &sysfs_ops, |
755 | .default_attrs = default_attrs, | 755 | .default_attrs = default_attrs, |
756 | .release = cpufreq_sysfs_release, | 756 | .release = cpufreq_sysfs_release, |
757 | }; | 757 | }; |
758 | 758 | ||
759 | 759 | ||
760 | /** | 760 | /** |
761 | * cpufreq_add_dev - add a CPU device | 761 | * cpufreq_add_dev - add a CPU device |
762 | * | 762 | * |
763 | * Adds the cpufreq interface for a CPU device. | 763 | * Adds the cpufreq interface for a CPU device. |
764 | */ | 764 | */ |
765 | static int cpufreq_add_dev(struct sys_device *sys_dev) | 765 | static int cpufreq_add_dev(struct sys_device *sys_dev) |
766 | { | 766 | { |
767 | unsigned int cpu = sys_dev->id; | 767 | unsigned int cpu = sys_dev->id; |
768 | int ret = 0; | 768 | int ret = 0; |
769 | struct cpufreq_policy new_policy; | 769 | struct cpufreq_policy new_policy; |
770 | struct cpufreq_policy *policy; | 770 | struct cpufreq_policy *policy; |
771 | struct freq_attr **drv_attr; | 771 | struct freq_attr **drv_attr; |
772 | struct sys_device *cpu_sys_dev; | 772 | struct sys_device *cpu_sys_dev; |
773 | unsigned long flags; | 773 | unsigned long flags; |
774 | unsigned int j; | 774 | unsigned int j; |
775 | #ifdef CONFIG_SMP | 775 | #ifdef CONFIG_SMP |
776 | struct cpufreq_policy *managed_policy; | 776 | struct cpufreq_policy *managed_policy; |
777 | #endif | 777 | #endif |
778 | 778 | ||
779 | if (cpu_is_offline(cpu)) | 779 | if (cpu_is_offline(cpu)) |
780 | return 0; | 780 | return 0; |
781 | 781 | ||
782 | cpufreq_debug_disable_ratelimit(); | 782 | cpufreq_debug_disable_ratelimit(); |
783 | dprintk("adding CPU %u\n", cpu); | 783 | dprintk("adding CPU %u\n", cpu); |
784 | 784 | ||
785 | #ifdef CONFIG_SMP | 785 | #ifdef CONFIG_SMP |
786 | /* check whether a different CPU already registered this | 786 | /* check whether a different CPU already registered this |
787 | * CPU because it is in the same boat. */ | 787 | * CPU because it is in the same boat. */ |
788 | policy = cpufreq_cpu_get(cpu); | 788 | policy = cpufreq_cpu_get(cpu); |
789 | if (unlikely(policy)) { | 789 | if (unlikely(policy)) { |
790 | cpufreq_cpu_put(policy); | 790 | cpufreq_cpu_put(policy); |
791 | cpufreq_debug_enable_ratelimit(); | 791 | cpufreq_debug_enable_ratelimit(); |
792 | return 0; | 792 | return 0; |
793 | } | 793 | } |
794 | #endif | 794 | #endif |
795 | 795 | ||
796 | if (!try_module_get(cpufreq_driver->owner)) { | 796 | if (!try_module_get(cpufreq_driver->owner)) { |
797 | ret = -EINVAL; | 797 | ret = -EINVAL; |
798 | goto module_out; | 798 | goto module_out; |
799 | } | 799 | } |
800 | 800 | ||
801 | policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); | 801 | policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); |
802 | if (!policy) { | 802 | if (!policy) { |
803 | ret = -ENOMEM; | 803 | ret = -ENOMEM; |
804 | goto nomem_out; | 804 | goto nomem_out; |
805 | } | 805 | } |
806 | if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) { | 806 | if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) { |
807 | kfree(policy); | 807 | kfree(policy); |
808 | ret = -ENOMEM; | 808 | ret = -ENOMEM; |
809 | goto nomem_out; | 809 | goto nomem_out; |
810 | } | 810 | } |
811 | if (!alloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { | 811 | if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { |
812 | free_cpumask_var(policy->cpus); | 812 | free_cpumask_var(policy->cpus); |
813 | kfree(policy); | 813 | kfree(policy); |
814 | ret = -ENOMEM; | 814 | ret = -ENOMEM; |
815 | goto nomem_out; | 815 | goto nomem_out; |
816 | } | 816 | } |
817 | 817 | ||
818 | policy->cpu = cpu; | 818 | policy->cpu = cpu; |
819 | cpumask_copy(policy->cpus, cpumask_of(cpu)); | 819 | cpumask_copy(policy->cpus, cpumask_of(cpu)); |
820 | 820 | ||
821 | /* Initially set CPU itself as the policy_cpu */ | 821 | /* Initially set CPU itself as the policy_cpu */ |
822 | per_cpu(policy_cpu, cpu) = cpu; | 822 | per_cpu(policy_cpu, cpu) = cpu; |
823 | lock_policy_rwsem_write(cpu); | 823 | lock_policy_rwsem_write(cpu); |
824 | 824 | ||
825 | init_completion(&policy->kobj_unregister); | 825 | init_completion(&policy->kobj_unregister); |
826 | INIT_WORK(&policy->update, handle_update); | 826 | INIT_WORK(&policy->update, handle_update); |
827 | 827 | ||
828 | /* Set governor before ->init, so that driver could check it */ | 828 | /* Set governor before ->init, so that driver could check it */ |
829 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | 829 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; |
830 | /* call driver. From then on the cpufreq must be able | 830 | /* call driver. From then on the cpufreq must be able |
831 | * to accept all calls to ->verify and ->setpolicy for this CPU | 831 | * to accept all calls to ->verify and ->setpolicy for this CPU |
832 | */ | 832 | */ |
833 | ret = cpufreq_driver->init(policy); | 833 | ret = cpufreq_driver->init(policy); |
834 | if (ret) { | 834 | if (ret) { |
835 | dprintk("initialization failed\n"); | 835 | dprintk("initialization failed\n"); |
836 | goto err_out; | 836 | goto err_out; |
837 | } | 837 | } |
838 | policy->user_policy.min = policy->min; | 838 | policy->user_policy.min = policy->min; |
839 | policy->user_policy.max = policy->max; | 839 | policy->user_policy.max = policy->max; |
840 | 840 | ||
841 | blocking_notifier_call_chain(&cpufreq_policy_notifier_list, | 841 | blocking_notifier_call_chain(&cpufreq_policy_notifier_list, |
842 | CPUFREQ_START, policy); | 842 | CPUFREQ_START, policy); |
843 | 843 | ||
844 | #ifdef CONFIG_SMP | 844 | #ifdef CONFIG_SMP |
845 | 845 | ||
846 | #ifdef CONFIG_HOTPLUG_CPU | 846 | #ifdef CONFIG_HOTPLUG_CPU |
847 | if (per_cpu(cpufreq_cpu_governor, cpu)) { | 847 | if (per_cpu(cpufreq_cpu_governor, cpu)) { |
848 | policy->governor = per_cpu(cpufreq_cpu_governor, cpu); | 848 | policy->governor = per_cpu(cpufreq_cpu_governor, cpu); |
849 | dprintk("Restoring governor %s for cpu %d\n", | 849 | dprintk("Restoring governor %s for cpu %d\n", |
850 | policy->governor->name, cpu); | 850 | policy->governor->name, cpu); |
851 | } | 851 | } |
852 | #endif | 852 | #endif |
853 | 853 | ||
854 | for_each_cpu(j, policy->cpus) { | 854 | for_each_cpu(j, policy->cpus) { |
855 | if (cpu == j) | 855 | if (cpu == j) |
856 | continue; | 856 | continue; |
857 | 857 | ||
858 | /* Check for existing affected CPUs. | 858 | /* Check for existing affected CPUs. |
859 | * They may not be aware of it due to CPU Hotplug. | 859 | * They may not be aware of it due to CPU Hotplug. |
860 | */ | 860 | */ |
861 | managed_policy = cpufreq_cpu_get(j); /* FIXME: Where is this released? What about error paths? */ | 861 | managed_policy = cpufreq_cpu_get(j); /* FIXME: Where is this released? What about error paths? */ |
862 | if (unlikely(managed_policy)) { | 862 | if (unlikely(managed_policy)) { |
863 | 863 | ||
864 | /* Set proper policy_cpu */ | 864 | /* Set proper policy_cpu */ |
865 | unlock_policy_rwsem_write(cpu); | 865 | unlock_policy_rwsem_write(cpu); |
866 | per_cpu(policy_cpu, cpu) = managed_policy->cpu; | 866 | per_cpu(policy_cpu, cpu) = managed_policy->cpu; |
867 | 867 | ||
868 | if (lock_policy_rwsem_write(cpu) < 0) | 868 | if (lock_policy_rwsem_write(cpu) < 0) |
869 | goto err_out_driver_exit; | 869 | goto err_out_driver_exit; |
870 | 870 | ||
871 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 871 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
872 | cpumask_copy(managed_policy->cpus, policy->cpus); | 872 | cpumask_copy(managed_policy->cpus, policy->cpus); |
873 | per_cpu(cpufreq_cpu_data, cpu) = managed_policy; | 873 | per_cpu(cpufreq_cpu_data, cpu) = managed_policy; |
874 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 874 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
875 | 875 | ||
876 | dprintk("CPU already managed, adding link\n"); | 876 | dprintk("CPU already managed, adding link\n"); |
877 | ret = sysfs_create_link(&sys_dev->kobj, | 877 | ret = sysfs_create_link(&sys_dev->kobj, |
878 | &managed_policy->kobj, | 878 | &managed_policy->kobj, |
879 | "cpufreq"); | 879 | "cpufreq"); |
880 | if (ret) | 880 | if (ret) |
881 | goto err_out_driver_exit; | 881 | goto err_out_driver_exit; |
882 | 882 | ||
883 | cpufreq_debug_enable_ratelimit(); | 883 | cpufreq_debug_enable_ratelimit(); |
884 | ret = 0; | 884 | ret = 0; |
885 | goto err_out_driver_exit; /* call driver->exit() */ | 885 | goto err_out_driver_exit; /* call driver->exit() */ |
886 | } | 886 | } |
887 | } | 887 | } |
888 | #endif | 888 | #endif |
889 | memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); | 889 | memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); |
890 | 890 | ||
891 | /* prepare interface data */ | 891 | /* prepare interface data */ |
892 | ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, | 892 | ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, |
893 | "cpufreq"); | 893 | "cpufreq"); |
894 | if (ret) | 894 | if (ret) |
895 | goto err_out_driver_exit; | 895 | goto err_out_driver_exit; |
896 | 896 | ||
897 | /* set up files for this cpu device */ | 897 | /* set up files for this cpu device */ |
898 | drv_attr = cpufreq_driver->attr; | 898 | drv_attr = cpufreq_driver->attr; |
899 | while ((drv_attr) && (*drv_attr)) { | 899 | while ((drv_attr) && (*drv_attr)) { |
900 | ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); | 900 | ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); |
901 | if (ret) | 901 | if (ret) |
902 | goto err_out_driver_exit; | 902 | goto err_out_driver_exit; |
903 | drv_attr++; | 903 | drv_attr++; |
904 | } | 904 | } |
905 | if (cpufreq_driver->get) { | 905 | if (cpufreq_driver->get) { |
906 | ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); | 906 | ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); |
907 | if (ret) | 907 | if (ret) |
908 | goto err_out_driver_exit; | 908 | goto err_out_driver_exit; |
909 | } | 909 | } |
910 | if (cpufreq_driver->target) { | 910 | if (cpufreq_driver->target) { |
911 | ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); | 911 | ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); |
912 | if (ret) | 912 | if (ret) |
913 | goto err_out_driver_exit; | 913 | goto err_out_driver_exit; |
914 | } | 914 | } |
915 | 915 | ||
916 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 916 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
917 | for_each_cpu(j, policy->cpus) { | 917 | for_each_cpu(j, policy->cpus) { |
918 | per_cpu(cpufreq_cpu_data, j) = policy; | 918 | per_cpu(cpufreq_cpu_data, j) = policy; |
919 | per_cpu(policy_cpu, j) = policy->cpu; | 919 | per_cpu(policy_cpu, j) = policy->cpu; |
920 | } | 920 | } |
921 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 921 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
922 | 922 | ||
923 | /* symlink affected CPUs */ | 923 | /* symlink affected CPUs */ |
924 | for_each_cpu(j, policy->cpus) { | 924 | for_each_cpu(j, policy->cpus) { |
925 | if (j == cpu) | 925 | if (j == cpu) |
926 | continue; | 926 | continue; |
927 | if (!cpu_online(j)) | 927 | if (!cpu_online(j)) |
928 | continue; | 928 | continue; |
929 | 929 | ||
930 | dprintk("CPU %u already managed, adding link\n", j); | 930 | dprintk("CPU %u already managed, adding link\n", j); |
931 | cpufreq_cpu_get(cpu); | 931 | cpufreq_cpu_get(cpu); |
932 | cpu_sys_dev = get_cpu_sysdev(j); | 932 | cpu_sys_dev = get_cpu_sysdev(j); |
933 | ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, | 933 | ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, |
934 | "cpufreq"); | 934 | "cpufreq"); |
935 | if (ret) | 935 | if (ret) |
936 | goto err_out_unregister; | 936 | goto err_out_unregister; |
937 | } | 937 | } |
938 | 938 | ||
939 | policy->governor = NULL; /* to assure that the starting sequence is | 939 | policy->governor = NULL; /* to assure that the starting sequence is |
940 | * run in cpufreq_set_policy */ | 940 | * run in cpufreq_set_policy */ |
941 | 941 | ||
942 | /* set default policy */ | 942 | /* set default policy */ |
943 | ret = __cpufreq_set_policy(policy, &new_policy); | 943 | ret = __cpufreq_set_policy(policy, &new_policy); |
944 | policy->user_policy.policy = policy->policy; | 944 | policy->user_policy.policy = policy->policy; |
945 | policy->user_policy.governor = policy->governor; | 945 | policy->user_policy.governor = policy->governor; |
946 | 946 | ||
947 | if (ret) { | 947 | if (ret) { |
948 | dprintk("setting policy failed\n"); | 948 | dprintk("setting policy failed\n"); |
949 | goto err_out_unregister; | 949 | goto err_out_unregister; |
950 | } | 950 | } |
951 | 951 | ||
952 | unlock_policy_rwsem_write(cpu); | 952 | unlock_policy_rwsem_write(cpu); |
953 | 953 | ||
954 | kobject_uevent(&policy->kobj, KOBJ_ADD); | 954 | kobject_uevent(&policy->kobj, KOBJ_ADD); |
955 | module_put(cpufreq_driver->owner); | 955 | module_put(cpufreq_driver->owner); |
956 | dprintk("initialization complete\n"); | 956 | dprintk("initialization complete\n"); |
957 | cpufreq_debug_enable_ratelimit(); | 957 | cpufreq_debug_enable_ratelimit(); |
958 | 958 | ||
959 | return 0; | 959 | return 0; |
960 | 960 | ||
961 | 961 | ||
962 | err_out_unregister: | 962 | err_out_unregister: |
963 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 963 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
964 | for_each_cpu(j, policy->cpus) | 964 | for_each_cpu(j, policy->cpus) |
965 | per_cpu(cpufreq_cpu_data, j) = NULL; | 965 | per_cpu(cpufreq_cpu_data, j) = NULL; |
966 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 966 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
967 | 967 | ||
968 | kobject_put(&policy->kobj); | 968 | kobject_put(&policy->kobj); |
969 | wait_for_completion(&policy->kobj_unregister); | 969 | wait_for_completion(&policy->kobj_unregister); |
970 | 970 | ||
971 | err_out_driver_exit: | 971 | err_out_driver_exit: |
972 | if (cpufreq_driver->exit) | 972 | if (cpufreq_driver->exit) |
973 | cpufreq_driver->exit(policy); | 973 | cpufreq_driver->exit(policy); |
974 | 974 | ||
975 | err_out: | 975 | err_out: |
976 | unlock_policy_rwsem_write(cpu); | 976 | unlock_policy_rwsem_write(cpu); |
977 | kfree(policy); | 977 | kfree(policy); |
978 | 978 | ||
979 | nomem_out: | 979 | nomem_out: |
980 | module_put(cpufreq_driver->owner); | 980 | module_put(cpufreq_driver->owner); |
981 | module_out: | 981 | module_out: |
982 | cpufreq_debug_enable_ratelimit(); | 982 | cpufreq_debug_enable_ratelimit(); |
983 | return ret; | 983 | return ret; |
984 | } | 984 | } |
985 | 985 | ||
986 | 986 | ||
987 | /** | 987 | /** |
988 | * __cpufreq_remove_dev - remove a CPU device | 988 | * __cpufreq_remove_dev - remove a CPU device |
989 | * | 989 | * |
990 | * Removes the cpufreq interface for a CPU device. | 990 | * Removes the cpufreq interface for a CPU device. |
991 | * Caller should already have policy_rwsem in write mode for this CPU. | 991 | * Caller should already have policy_rwsem in write mode for this CPU. |
992 | * This routine frees the rwsem before returning. | 992 | * This routine frees the rwsem before returning. |
993 | */ | 993 | */ |
994 | static int __cpufreq_remove_dev(struct sys_device *sys_dev) | 994 | static int __cpufreq_remove_dev(struct sys_device *sys_dev) |
995 | { | 995 | { |
996 | unsigned int cpu = sys_dev->id; | 996 | unsigned int cpu = sys_dev->id; |
997 | unsigned long flags; | 997 | unsigned long flags; |
998 | struct cpufreq_policy *data; | 998 | struct cpufreq_policy *data; |
999 | #ifdef CONFIG_SMP | 999 | #ifdef CONFIG_SMP |
1000 | struct sys_device *cpu_sys_dev; | 1000 | struct sys_device *cpu_sys_dev; |
1001 | unsigned int j; | 1001 | unsigned int j; |
1002 | #endif | 1002 | #endif |
1003 | 1003 | ||
1004 | cpufreq_debug_disable_ratelimit(); | 1004 | cpufreq_debug_disable_ratelimit(); |
1005 | dprintk("unregistering CPU %u\n", cpu); | 1005 | dprintk("unregistering CPU %u\n", cpu); |
1006 | 1006 | ||
1007 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 1007 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
1008 | data = per_cpu(cpufreq_cpu_data, cpu); | 1008 | data = per_cpu(cpufreq_cpu_data, cpu); |
1009 | 1009 | ||
1010 | if (!data) { | 1010 | if (!data) { |
1011 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 1011 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
1012 | cpufreq_debug_enable_ratelimit(); | 1012 | cpufreq_debug_enable_ratelimit(); |
1013 | unlock_policy_rwsem_write(cpu); | 1013 | unlock_policy_rwsem_write(cpu); |
1014 | return -EINVAL; | 1014 | return -EINVAL; |
1015 | } | 1015 | } |
1016 | per_cpu(cpufreq_cpu_data, cpu) = NULL; | 1016 | per_cpu(cpufreq_cpu_data, cpu) = NULL; |
1017 | 1017 | ||
1018 | 1018 | ||
1019 | #ifdef CONFIG_SMP | 1019 | #ifdef CONFIG_SMP |
1020 | /* if this isn't the CPU which is the parent of the kobj, we | 1020 | /* if this isn't the CPU which is the parent of the kobj, we |
1021 | * only need to unlink, put and exit | 1021 | * only need to unlink, put and exit |
1022 | */ | 1022 | */ |
1023 | if (unlikely(cpu != data->cpu)) { | 1023 | if (unlikely(cpu != data->cpu)) { |
1024 | dprintk("removing link\n"); | 1024 | dprintk("removing link\n"); |
1025 | cpumask_clear_cpu(cpu, data->cpus); | 1025 | cpumask_clear_cpu(cpu, data->cpus); |
1026 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 1026 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
1027 | sysfs_remove_link(&sys_dev->kobj, "cpufreq"); | 1027 | sysfs_remove_link(&sys_dev->kobj, "cpufreq"); |
1028 | cpufreq_cpu_put(data); | 1028 | cpufreq_cpu_put(data); |
1029 | cpufreq_debug_enable_ratelimit(); | 1029 | cpufreq_debug_enable_ratelimit(); |
1030 | unlock_policy_rwsem_write(cpu); | 1030 | unlock_policy_rwsem_write(cpu); |
1031 | return 0; | 1031 | return 0; |
1032 | } | 1032 | } |
1033 | #endif | 1033 | #endif |
1034 | 1034 | ||
1035 | #ifdef CONFIG_SMP | 1035 | #ifdef CONFIG_SMP |
1036 | 1036 | ||
1037 | #ifdef CONFIG_HOTPLUG_CPU | 1037 | #ifdef CONFIG_HOTPLUG_CPU |
1038 | per_cpu(cpufreq_cpu_governor, cpu) = data->governor; | 1038 | per_cpu(cpufreq_cpu_governor, cpu) = data->governor; |
1039 | #endif | 1039 | #endif |
1040 | 1040 | ||
1041 | /* if we have other CPUs still registered, we need to unlink them, | 1041 | /* if we have other CPUs still registered, we need to unlink them, |
1042 | * or else wait_for_completion below will lock up. Clean the | 1042 | * or else wait_for_completion below will lock up. Clean the |
1043 | * per_cpu(cpufreq_cpu_data) while holding the lock, and remove | 1043 | * per_cpu(cpufreq_cpu_data) while holding the lock, and remove |
1044 | * the sysfs links afterwards. | 1044 | * the sysfs links afterwards. |
1045 | */ | 1045 | */ |
1046 | if (unlikely(cpumask_weight(data->cpus) > 1)) { | 1046 | if (unlikely(cpumask_weight(data->cpus) > 1)) { |
1047 | for_each_cpu(j, data->cpus) { | 1047 | for_each_cpu(j, data->cpus) { |
1048 | if (j == cpu) | 1048 | if (j == cpu) |
1049 | continue; | 1049 | continue; |
1050 | per_cpu(cpufreq_cpu_data, j) = NULL; | 1050 | per_cpu(cpufreq_cpu_data, j) = NULL; |
1051 | } | 1051 | } |
1052 | } | 1052 | } |
1053 | 1053 | ||
1054 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 1054 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
1055 | 1055 | ||
1056 | if (unlikely(cpumask_weight(data->cpus) > 1)) { | 1056 | if (unlikely(cpumask_weight(data->cpus) > 1)) { |
1057 | for_each_cpu(j, data->cpus) { | 1057 | for_each_cpu(j, data->cpus) { |
1058 | if (j == cpu) | 1058 | if (j == cpu) |
1059 | continue; | 1059 | continue; |
1060 | dprintk("removing link for cpu %u\n", j); | 1060 | dprintk("removing link for cpu %u\n", j); |
1061 | #ifdef CONFIG_HOTPLUG_CPU | 1061 | #ifdef CONFIG_HOTPLUG_CPU |
1062 | per_cpu(cpufreq_cpu_governor, j) = data->governor; | 1062 | per_cpu(cpufreq_cpu_governor, j) = data->governor; |
1063 | #endif | 1063 | #endif |
1064 | cpu_sys_dev = get_cpu_sysdev(j); | 1064 | cpu_sys_dev = get_cpu_sysdev(j); |
1065 | sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq"); | 1065 | sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq"); |
1066 | cpufreq_cpu_put(data); | 1066 | cpufreq_cpu_put(data); |
1067 | } | 1067 | } |
1068 | } | 1068 | } |
1069 | #else | 1069 | #else |
1070 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 1070 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
1071 | #endif | 1071 | #endif |
1072 | 1072 | ||
1073 | unlock_policy_rwsem_write(cpu); | 1073 | unlock_policy_rwsem_write(cpu); |
1074 | 1074 | ||
1075 | if (cpufreq_driver->target) | 1075 | if (cpufreq_driver->target) |
1076 | __cpufreq_governor(data, CPUFREQ_GOV_STOP); | 1076 | __cpufreq_governor(data, CPUFREQ_GOV_STOP); |
1077 | 1077 | ||
1078 | kobject_put(&data->kobj); | 1078 | kobject_put(&data->kobj); |
1079 | 1079 | ||
1080 | /* we need to make sure that the underlying kobj is actually | 1080 | /* we need to make sure that the underlying kobj is actually |
1081 | * not referenced anymore by anybody before we proceed with | 1081 | * not referenced anymore by anybody before we proceed with |
1082 | * unloading. | 1082 | * unloading. |
1083 | */ | 1083 | */ |
1084 | dprintk("waiting for dropping of refcount\n"); | 1084 | dprintk("waiting for dropping of refcount\n"); |
1085 | wait_for_completion(&data->kobj_unregister); | 1085 | wait_for_completion(&data->kobj_unregister); |
1086 | dprintk("wait complete\n"); | 1086 | dprintk("wait complete\n"); |
1087 | 1087 | ||
1088 | if (cpufreq_driver->exit) | 1088 | if (cpufreq_driver->exit) |
1089 | cpufreq_driver->exit(data); | 1089 | cpufreq_driver->exit(data); |
1090 | 1090 | ||
1091 | free_cpumask_var(data->related_cpus); | 1091 | free_cpumask_var(data->related_cpus); |
1092 | free_cpumask_var(data->cpus); | 1092 | free_cpumask_var(data->cpus); |
1093 | kfree(data); | 1093 | kfree(data); |
1094 | per_cpu(cpufreq_cpu_data, cpu) = NULL; | 1094 | per_cpu(cpufreq_cpu_data, cpu) = NULL; |
1095 | 1095 | ||
1096 | cpufreq_debug_enable_ratelimit(); | 1096 | cpufreq_debug_enable_ratelimit(); |
1097 | return 0; | 1097 | return 0; |
1098 | } | 1098 | } |
1099 | 1099 | ||
1100 | 1100 | ||
1101 | static int cpufreq_remove_dev(struct sys_device *sys_dev) | 1101 | static int cpufreq_remove_dev(struct sys_device *sys_dev) |
1102 | { | 1102 | { |
1103 | unsigned int cpu = sys_dev->id; | 1103 | unsigned int cpu = sys_dev->id; |
1104 | int retval; | 1104 | int retval; |
1105 | 1105 | ||
1106 | if (cpu_is_offline(cpu)) | 1106 | if (cpu_is_offline(cpu)) |
1107 | return 0; | 1107 | return 0; |
1108 | 1108 | ||
1109 | if (unlikely(lock_policy_rwsem_write(cpu))) | 1109 | if (unlikely(lock_policy_rwsem_write(cpu))) |
1110 | BUG(); | 1110 | BUG(); |
1111 | 1111 | ||
1112 | retval = __cpufreq_remove_dev(sys_dev); | 1112 | retval = __cpufreq_remove_dev(sys_dev); |
1113 | return retval; | 1113 | return retval; |
1114 | } | 1114 | } |
1115 | 1115 | ||
1116 | 1116 | ||
1117 | static void handle_update(struct work_struct *work) | 1117 | static void handle_update(struct work_struct *work) |
1118 | { | 1118 | { |
1119 | struct cpufreq_policy *policy = | 1119 | struct cpufreq_policy *policy = |
1120 | container_of(work, struct cpufreq_policy, update); | 1120 | container_of(work, struct cpufreq_policy, update); |
1121 | unsigned int cpu = policy->cpu; | 1121 | unsigned int cpu = policy->cpu; |
1122 | dprintk("handle_update for cpu %u called\n", cpu); | 1122 | dprintk("handle_update for cpu %u called\n", cpu); |
1123 | cpufreq_update_policy(cpu); | 1123 | cpufreq_update_policy(cpu); |
1124 | } | 1124 | } |
1125 | 1125 | ||
1126 | /** | 1126 | /** |
1127 | * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble. | 1127 | * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble. |
1128 | * @cpu: cpu number | 1128 | * @cpu: cpu number |
1129 | * @old_freq: CPU frequency the kernel thinks the CPU runs at | 1129 | * @old_freq: CPU frequency the kernel thinks the CPU runs at |
1130 | * @new_freq: CPU frequency the CPU actually runs at | 1130 | * @new_freq: CPU frequency the CPU actually runs at |
1131 | * | 1131 | * |
1132 | * We adjust to current frequency first, and need to clean up later. | 1132 | * We adjust to current frequency first, and need to clean up later. |
1133 | * So either call to cpufreq_update_policy() or schedule handle_update()). | 1133 | * So either call to cpufreq_update_policy() or schedule handle_update()). |
1134 | */ | 1134 | */ |
1135 | static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, | 1135 | static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, |
1136 | unsigned int new_freq) | 1136 | unsigned int new_freq) |
1137 | { | 1137 | { |
1138 | struct cpufreq_freqs freqs; | 1138 | struct cpufreq_freqs freqs; |
1139 | 1139 | ||
1140 | dprintk("Warning: CPU frequency out of sync: cpufreq and timing " | 1140 | dprintk("Warning: CPU frequency out of sync: cpufreq and timing " |
1141 | "core thinks of %u, is %u kHz.\n", old_freq, new_freq); | 1141 | "core thinks of %u, is %u kHz.\n", old_freq, new_freq); |
1142 | 1142 | ||
1143 | freqs.cpu = cpu; | 1143 | freqs.cpu = cpu; |
1144 | freqs.old = old_freq; | 1144 | freqs.old = old_freq; |
1145 | freqs.new = new_freq; | 1145 | freqs.new = new_freq; |
1146 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1146 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
1147 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1147 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
1148 | } | 1148 | } |
1149 | 1149 | ||
1150 | 1150 | ||
1151 | /** | 1151 | /** |
1152 | * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur | 1152 | * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur |
1153 | * @cpu: CPU number | 1153 | * @cpu: CPU number |
1154 | * | 1154 | * |
1155 | * This is the last known freq, without actually getting it from the driver. | 1155 | * This is the last known freq, without actually getting it from the driver. |
1156 | * Return value will be same as what is shown in scaling_cur_freq in sysfs. | 1156 | * Return value will be same as what is shown in scaling_cur_freq in sysfs. |
1157 | */ | 1157 | */ |
1158 | unsigned int cpufreq_quick_get(unsigned int cpu) | 1158 | unsigned int cpufreq_quick_get(unsigned int cpu) |
1159 | { | 1159 | { |
1160 | struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); | 1160 | struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); |
1161 | unsigned int ret_freq = 0; | 1161 | unsigned int ret_freq = 0; |
1162 | 1162 | ||
1163 | if (policy) { | 1163 | if (policy) { |
1164 | ret_freq = policy->cur; | 1164 | ret_freq = policy->cur; |
1165 | cpufreq_cpu_put(policy); | 1165 | cpufreq_cpu_put(policy); |
1166 | } | 1166 | } |
1167 | 1167 | ||
1168 | return ret_freq; | 1168 | return ret_freq; |
1169 | } | 1169 | } |
1170 | EXPORT_SYMBOL(cpufreq_quick_get); | 1170 | EXPORT_SYMBOL(cpufreq_quick_get); |
1171 | 1171 | ||
1172 | 1172 | ||
1173 | static unsigned int __cpufreq_get(unsigned int cpu) | 1173 | static unsigned int __cpufreq_get(unsigned int cpu) |
1174 | { | 1174 | { |
1175 | struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); | 1175 | struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); |
1176 | unsigned int ret_freq = 0; | 1176 | unsigned int ret_freq = 0; |
1177 | 1177 | ||
1178 | if (!cpufreq_driver->get) | 1178 | if (!cpufreq_driver->get) |
1179 | return ret_freq; | 1179 | return ret_freq; |
1180 | 1180 | ||
1181 | ret_freq = cpufreq_driver->get(cpu); | 1181 | ret_freq = cpufreq_driver->get(cpu); |
1182 | 1182 | ||
1183 | if (ret_freq && policy->cur && | 1183 | if (ret_freq && policy->cur && |
1184 | !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { | 1184 | !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { |
1185 | /* verify no discrepancy between actual and | 1185 | /* verify no discrepancy between actual and |
1186 | saved value exists */ | 1186 | saved value exists */ |
1187 | if (unlikely(ret_freq != policy->cur)) { | 1187 | if (unlikely(ret_freq != policy->cur)) { |
1188 | cpufreq_out_of_sync(cpu, policy->cur, ret_freq); | 1188 | cpufreq_out_of_sync(cpu, policy->cur, ret_freq); |
1189 | schedule_work(&policy->update); | 1189 | schedule_work(&policy->update); |
1190 | } | 1190 | } |
1191 | } | 1191 | } |
1192 | 1192 | ||
1193 | return ret_freq; | 1193 | return ret_freq; |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | /** | 1196 | /** |
1197 | * cpufreq_get - get the current CPU frequency (in kHz) | 1197 | * cpufreq_get - get the current CPU frequency (in kHz) |
1198 | * @cpu: CPU number | 1198 | * @cpu: CPU number |
1199 | * | 1199 | * |
1200 | * Get the CPU current (static) CPU frequency | 1200 | * Get the CPU current (static) CPU frequency |
1201 | */ | 1201 | */ |
1202 | unsigned int cpufreq_get(unsigned int cpu) | 1202 | unsigned int cpufreq_get(unsigned int cpu) |
1203 | { | 1203 | { |
1204 | unsigned int ret_freq = 0; | 1204 | unsigned int ret_freq = 0; |
1205 | struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); | 1205 | struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); |
1206 | 1206 | ||
1207 | if (!policy) | 1207 | if (!policy) |
1208 | goto out; | 1208 | goto out; |
1209 | 1209 | ||
1210 | if (unlikely(lock_policy_rwsem_read(cpu))) | 1210 | if (unlikely(lock_policy_rwsem_read(cpu))) |
1211 | goto out_policy; | 1211 | goto out_policy; |
1212 | 1212 | ||
1213 | ret_freq = __cpufreq_get(cpu); | 1213 | ret_freq = __cpufreq_get(cpu); |
1214 | 1214 | ||
1215 | unlock_policy_rwsem_read(cpu); | 1215 | unlock_policy_rwsem_read(cpu); |
1216 | 1216 | ||
1217 | out_policy: | 1217 | out_policy: |
1218 | cpufreq_cpu_put(policy); | 1218 | cpufreq_cpu_put(policy); |
1219 | out: | 1219 | out: |
1220 | return ret_freq; | 1220 | return ret_freq; |
1221 | } | 1221 | } |
1222 | EXPORT_SYMBOL(cpufreq_get); | 1222 | EXPORT_SYMBOL(cpufreq_get); |
1223 | 1223 | ||
1224 | 1224 | ||
1225 | /** | 1225 | /** |
1226 | * cpufreq_suspend - let the low level driver prepare for suspend | 1226 | * cpufreq_suspend - let the low level driver prepare for suspend |
1227 | */ | 1227 | */ |
1228 | 1228 | ||
1229 | static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) | 1229 | static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) |
1230 | { | 1230 | { |
1231 | int cpu = sysdev->id; | 1231 | int cpu = sysdev->id; |
1232 | int ret = 0; | 1232 | int ret = 0; |
1233 | unsigned int cur_freq = 0; | 1233 | unsigned int cur_freq = 0; |
1234 | struct cpufreq_policy *cpu_policy; | 1234 | struct cpufreq_policy *cpu_policy; |
1235 | 1235 | ||
1236 | dprintk("suspending cpu %u\n", cpu); | 1236 | dprintk("suspending cpu %u\n", cpu); |
1237 | 1237 | ||
1238 | if (!cpu_online(cpu)) | 1238 | if (!cpu_online(cpu)) |
1239 | return 0; | 1239 | return 0; |
1240 | 1240 | ||
1241 | /* we may be lax here as interrupts are off. Nonetheless | 1241 | /* we may be lax here as interrupts are off. Nonetheless |
1242 | * we need to grab the correct cpu policy, as to check | 1242 | * we need to grab the correct cpu policy, as to check |
1243 | * whether we really run on this CPU. | 1243 | * whether we really run on this CPU. |
1244 | */ | 1244 | */ |
1245 | 1245 | ||
1246 | cpu_policy = cpufreq_cpu_get(cpu); | 1246 | cpu_policy = cpufreq_cpu_get(cpu); |
1247 | if (!cpu_policy) | 1247 | if (!cpu_policy) |
1248 | return -EINVAL; | 1248 | return -EINVAL; |
1249 | 1249 | ||
1250 | /* only handle each CPU group once */ | 1250 | /* only handle each CPU group once */ |
1251 | if (unlikely(cpu_policy->cpu != cpu)) | 1251 | if (unlikely(cpu_policy->cpu != cpu)) |
1252 | goto out; | 1252 | goto out; |
1253 | 1253 | ||
1254 | if (cpufreq_driver->suspend) { | 1254 | if (cpufreq_driver->suspend) { |
1255 | ret = cpufreq_driver->suspend(cpu_policy, pmsg); | 1255 | ret = cpufreq_driver->suspend(cpu_policy, pmsg); |
1256 | if (ret) { | 1256 | if (ret) { |
1257 | printk(KERN_ERR "cpufreq: suspend failed in ->suspend " | 1257 | printk(KERN_ERR "cpufreq: suspend failed in ->suspend " |
1258 | "step on CPU %u\n", cpu_policy->cpu); | 1258 | "step on CPU %u\n", cpu_policy->cpu); |
1259 | goto out; | 1259 | goto out; |
1260 | } | 1260 | } |
1261 | } | 1261 | } |
1262 | 1262 | ||
1263 | if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS) | 1263 | if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS) |
1264 | goto out; | 1264 | goto out; |
1265 | 1265 | ||
1266 | if (cpufreq_driver->get) | 1266 | if (cpufreq_driver->get) |
1267 | cur_freq = cpufreq_driver->get(cpu_policy->cpu); | 1267 | cur_freq = cpufreq_driver->get(cpu_policy->cpu); |
1268 | 1268 | ||
1269 | if (!cur_freq || !cpu_policy->cur) { | 1269 | if (!cur_freq || !cpu_policy->cur) { |
1270 | printk(KERN_ERR "cpufreq: suspend failed to assert current " | 1270 | printk(KERN_ERR "cpufreq: suspend failed to assert current " |
1271 | "frequency is what timing core thinks it is.\n"); | 1271 | "frequency is what timing core thinks it is.\n"); |
1272 | goto out; | 1272 | goto out; |
1273 | } | 1273 | } |
1274 | 1274 | ||
1275 | if (unlikely(cur_freq != cpu_policy->cur)) { | 1275 | if (unlikely(cur_freq != cpu_policy->cur)) { |
1276 | struct cpufreq_freqs freqs; | 1276 | struct cpufreq_freqs freqs; |
1277 | 1277 | ||
1278 | if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) | 1278 | if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) |
1279 | dprintk("Warning: CPU frequency is %u, " | 1279 | dprintk("Warning: CPU frequency is %u, " |
1280 | "cpufreq assumed %u kHz.\n", | 1280 | "cpufreq assumed %u kHz.\n", |
1281 | cur_freq, cpu_policy->cur); | 1281 | cur_freq, cpu_policy->cur); |
1282 | 1282 | ||
1283 | freqs.cpu = cpu; | 1283 | freqs.cpu = cpu; |
1284 | freqs.old = cpu_policy->cur; | 1284 | freqs.old = cpu_policy->cur; |
1285 | freqs.new = cur_freq; | 1285 | freqs.new = cur_freq; |
1286 | 1286 | ||
1287 | srcu_notifier_call_chain(&cpufreq_transition_notifier_list, | 1287 | srcu_notifier_call_chain(&cpufreq_transition_notifier_list, |
1288 | CPUFREQ_SUSPENDCHANGE, &freqs); | 1288 | CPUFREQ_SUSPENDCHANGE, &freqs); |
1289 | adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs); | 1289 | adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs); |
1290 | 1290 | ||
1291 | cpu_policy->cur = cur_freq; | 1291 | cpu_policy->cur = cur_freq; |
1292 | } | 1292 | } |
1293 | 1293 | ||
1294 | out: | 1294 | out: |
1295 | cpufreq_cpu_put(cpu_policy); | 1295 | cpufreq_cpu_put(cpu_policy); |
1296 | return ret; | 1296 | return ret; |
1297 | } | 1297 | } |
1298 | 1298 | ||
1299 | /** | 1299 | /** |
1300 | * cpufreq_resume - restore proper CPU frequency handling after resume | 1300 | * cpufreq_resume - restore proper CPU frequency handling after resume |
1301 | * | 1301 | * |
1302 | * 1.) resume CPUfreq hardware support (cpufreq_driver->resume()) | 1302 | * 1.) resume CPUfreq hardware support (cpufreq_driver->resume()) |
1303 | * 2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync | 1303 | * 2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync |
1304 | * 3.) schedule call cpufreq_update_policy() ASAP as interrupts are | 1304 | * 3.) schedule call cpufreq_update_policy() ASAP as interrupts are |
1305 | * restored. | 1305 | * restored. |
1306 | */ | 1306 | */ |
1307 | static int cpufreq_resume(struct sys_device *sysdev) | 1307 | static int cpufreq_resume(struct sys_device *sysdev) |
1308 | { | 1308 | { |
1309 | int cpu = sysdev->id; | 1309 | int cpu = sysdev->id; |
1310 | int ret = 0; | 1310 | int ret = 0; |
1311 | struct cpufreq_policy *cpu_policy; | 1311 | struct cpufreq_policy *cpu_policy; |
1312 | 1312 | ||
1313 | dprintk("resuming cpu %u\n", cpu); | 1313 | dprintk("resuming cpu %u\n", cpu); |
1314 | 1314 | ||
1315 | if (!cpu_online(cpu)) | 1315 | if (!cpu_online(cpu)) |
1316 | return 0; | 1316 | return 0; |
1317 | 1317 | ||
1318 | /* we may be lax here as interrupts are off. Nonetheless | 1318 | /* we may be lax here as interrupts are off. Nonetheless |
1319 | * we need to grab the correct cpu policy, as to check | 1319 | * we need to grab the correct cpu policy, as to check |
1320 | * whether we really run on this CPU. | 1320 | * whether we really run on this CPU. |
1321 | */ | 1321 | */ |
1322 | 1322 | ||
1323 | cpu_policy = cpufreq_cpu_get(cpu); | 1323 | cpu_policy = cpufreq_cpu_get(cpu); |
1324 | if (!cpu_policy) | 1324 | if (!cpu_policy) |
1325 | return -EINVAL; | 1325 | return -EINVAL; |
1326 | 1326 | ||
1327 | /* only handle each CPU group once */ | 1327 | /* only handle each CPU group once */ |
1328 | if (unlikely(cpu_policy->cpu != cpu)) | 1328 | if (unlikely(cpu_policy->cpu != cpu)) |
1329 | goto fail; | 1329 | goto fail; |
1330 | 1330 | ||
1331 | if (cpufreq_driver->resume) { | 1331 | if (cpufreq_driver->resume) { |
1332 | ret = cpufreq_driver->resume(cpu_policy); | 1332 | ret = cpufreq_driver->resume(cpu_policy); |
1333 | if (ret) { | 1333 | if (ret) { |
1334 | printk(KERN_ERR "cpufreq: resume failed in ->resume " | 1334 | printk(KERN_ERR "cpufreq: resume failed in ->resume " |
1335 | "step on CPU %u\n", cpu_policy->cpu); | 1335 | "step on CPU %u\n", cpu_policy->cpu); |
1336 | goto fail; | 1336 | goto fail; |
1337 | } | 1337 | } |
1338 | } | 1338 | } |
1339 | 1339 | ||
1340 | if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { | 1340 | if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { |
1341 | unsigned int cur_freq = 0; | 1341 | unsigned int cur_freq = 0; |
1342 | 1342 | ||
1343 | if (cpufreq_driver->get) | 1343 | if (cpufreq_driver->get) |
1344 | cur_freq = cpufreq_driver->get(cpu_policy->cpu); | 1344 | cur_freq = cpufreq_driver->get(cpu_policy->cpu); |
1345 | 1345 | ||
1346 | if (!cur_freq || !cpu_policy->cur) { | 1346 | if (!cur_freq || !cpu_policy->cur) { |
1347 | printk(KERN_ERR "cpufreq: resume failed to assert " | 1347 | printk(KERN_ERR "cpufreq: resume failed to assert " |
1348 | "current frequency is what timing core " | 1348 | "current frequency is what timing core " |
1349 | "thinks it is.\n"); | 1349 | "thinks it is.\n"); |
1350 | goto out; | 1350 | goto out; |
1351 | } | 1351 | } |
1352 | 1352 | ||
1353 | if (unlikely(cur_freq != cpu_policy->cur)) { | 1353 | if (unlikely(cur_freq != cpu_policy->cur)) { |
1354 | struct cpufreq_freqs freqs; | 1354 | struct cpufreq_freqs freqs; |
1355 | 1355 | ||
1356 | if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) | 1356 | if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) |
1357 | dprintk("Warning: CPU frequency " | 1357 | dprintk("Warning: CPU frequency " |
1358 | "is %u, cpufreq assumed %u kHz.\n", | 1358 | "is %u, cpufreq assumed %u kHz.\n", |
1359 | cur_freq, cpu_policy->cur); | 1359 | cur_freq, cpu_policy->cur); |
1360 | 1360 | ||
1361 | freqs.cpu = cpu; | 1361 | freqs.cpu = cpu; |
1362 | freqs.old = cpu_policy->cur; | 1362 | freqs.old = cpu_policy->cur; |
1363 | freqs.new = cur_freq; | 1363 | freqs.new = cur_freq; |
1364 | 1364 | ||
1365 | srcu_notifier_call_chain( | 1365 | srcu_notifier_call_chain( |
1366 | &cpufreq_transition_notifier_list, | 1366 | &cpufreq_transition_notifier_list, |
1367 | CPUFREQ_RESUMECHANGE, &freqs); | 1367 | CPUFREQ_RESUMECHANGE, &freqs); |
1368 | adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs); | 1368 | adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs); |
1369 | 1369 | ||
1370 | cpu_policy->cur = cur_freq; | 1370 | cpu_policy->cur = cur_freq; |
1371 | } | 1371 | } |
1372 | } | 1372 | } |
1373 | 1373 | ||
1374 | out: | 1374 | out: |
1375 | schedule_work(&cpu_policy->update); | 1375 | schedule_work(&cpu_policy->update); |
1376 | fail: | 1376 | fail: |
1377 | cpufreq_cpu_put(cpu_policy); | 1377 | cpufreq_cpu_put(cpu_policy); |
1378 | return ret; | 1378 | return ret; |
1379 | } | 1379 | } |
1380 | 1380 | ||
1381 | static struct sysdev_driver cpufreq_sysdev_driver = { | 1381 | static struct sysdev_driver cpufreq_sysdev_driver = { |
1382 | .add = cpufreq_add_dev, | 1382 | .add = cpufreq_add_dev, |
1383 | .remove = cpufreq_remove_dev, | 1383 | .remove = cpufreq_remove_dev, |
1384 | .suspend = cpufreq_suspend, | 1384 | .suspend = cpufreq_suspend, |
1385 | .resume = cpufreq_resume, | 1385 | .resume = cpufreq_resume, |
1386 | }; | 1386 | }; |
1387 | 1387 | ||
1388 | 1388 | ||
1389 | /********************************************************************* | 1389 | /********************************************************************* |
1390 | * NOTIFIER LISTS INTERFACE * | 1390 | * NOTIFIER LISTS INTERFACE * |
1391 | *********************************************************************/ | 1391 | *********************************************************************/ |
1392 | 1392 | ||
1393 | /** | 1393 | /** |
1394 | * cpufreq_register_notifier - register a driver with cpufreq | 1394 | * cpufreq_register_notifier - register a driver with cpufreq |
1395 | * @nb: notifier function to register | 1395 | * @nb: notifier function to register |
1396 | * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER | 1396 | * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER |
1397 | * | 1397 | * |
1398 | * Add a driver to one of two lists: either a list of drivers that | 1398 | * Add a driver to one of two lists: either a list of drivers that |
1399 | * are notified about clock rate changes (once before and once after | 1399 | * are notified about clock rate changes (once before and once after |
1400 | * the transition), or a list of drivers that are notified about | 1400 | * the transition), or a list of drivers that are notified about |
1401 | * changes in cpufreq policy. | 1401 | * changes in cpufreq policy. |
1402 | * | 1402 | * |
1403 | * This function may sleep, and has the same return conditions as | 1403 | * This function may sleep, and has the same return conditions as |
1404 | * blocking_notifier_chain_register. | 1404 | * blocking_notifier_chain_register. |
1405 | */ | 1405 | */ |
1406 | int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) | 1406 | int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) |
1407 | { | 1407 | { |
1408 | int ret; | 1408 | int ret; |
1409 | 1409 | ||
1410 | WARN_ON(!init_cpufreq_transition_notifier_list_called); | 1410 | WARN_ON(!init_cpufreq_transition_notifier_list_called); |
1411 | 1411 | ||
1412 | switch (list) { | 1412 | switch (list) { |
1413 | case CPUFREQ_TRANSITION_NOTIFIER: | 1413 | case CPUFREQ_TRANSITION_NOTIFIER: |
1414 | ret = srcu_notifier_chain_register( | 1414 | ret = srcu_notifier_chain_register( |
1415 | &cpufreq_transition_notifier_list, nb); | 1415 | &cpufreq_transition_notifier_list, nb); |
1416 | break; | 1416 | break; |
1417 | case CPUFREQ_POLICY_NOTIFIER: | 1417 | case CPUFREQ_POLICY_NOTIFIER: |
1418 | ret = blocking_notifier_chain_register( | 1418 | ret = blocking_notifier_chain_register( |
1419 | &cpufreq_policy_notifier_list, nb); | 1419 | &cpufreq_policy_notifier_list, nb); |
1420 | break; | 1420 | break; |
1421 | default: | 1421 | default: |
1422 | ret = -EINVAL; | 1422 | ret = -EINVAL; |
1423 | } | 1423 | } |
1424 | 1424 | ||
1425 | return ret; | 1425 | return ret; |
1426 | } | 1426 | } |
1427 | EXPORT_SYMBOL(cpufreq_register_notifier); | 1427 | EXPORT_SYMBOL(cpufreq_register_notifier); |
1428 | 1428 | ||
1429 | 1429 | ||
1430 | /** | 1430 | /** |
1431 | * cpufreq_unregister_notifier - unregister a driver with cpufreq | 1431 | * cpufreq_unregister_notifier - unregister a driver with cpufreq |
1432 | * @nb: notifier block to be unregistered | 1432 | * @nb: notifier block to be unregistered |
1433 | * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER | 1433 | * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER |
1434 | * | 1434 | * |
1435 | * Remove a driver from the CPU frequency notifier list. | 1435 | * Remove a driver from the CPU frequency notifier list. |
1436 | * | 1436 | * |
1437 | * This function may sleep, and has the same return conditions as | 1437 | * This function may sleep, and has the same return conditions as |
1438 | * blocking_notifier_chain_unregister. | 1438 | * blocking_notifier_chain_unregister. |
1439 | */ | 1439 | */ |
1440 | int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) | 1440 | int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) |
1441 | { | 1441 | { |
1442 | int ret; | 1442 | int ret; |
1443 | 1443 | ||
1444 | switch (list) { | 1444 | switch (list) { |
1445 | case CPUFREQ_TRANSITION_NOTIFIER: | 1445 | case CPUFREQ_TRANSITION_NOTIFIER: |
1446 | ret = srcu_notifier_chain_unregister( | 1446 | ret = srcu_notifier_chain_unregister( |
1447 | &cpufreq_transition_notifier_list, nb); | 1447 | &cpufreq_transition_notifier_list, nb); |
1448 | break; | 1448 | break; |
1449 | case CPUFREQ_POLICY_NOTIFIER: | 1449 | case CPUFREQ_POLICY_NOTIFIER: |
1450 | ret = blocking_notifier_chain_unregister( | 1450 | ret = blocking_notifier_chain_unregister( |
1451 | &cpufreq_policy_notifier_list, nb); | 1451 | &cpufreq_policy_notifier_list, nb); |
1452 | break; | 1452 | break; |
1453 | default: | 1453 | default: |
1454 | ret = -EINVAL; | 1454 | ret = -EINVAL; |
1455 | } | 1455 | } |
1456 | 1456 | ||
1457 | return ret; | 1457 | return ret; |
1458 | } | 1458 | } |
1459 | EXPORT_SYMBOL(cpufreq_unregister_notifier); | 1459 | EXPORT_SYMBOL(cpufreq_unregister_notifier); |
1460 | 1460 | ||
1461 | 1461 | ||
1462 | /********************************************************************* | 1462 | /********************************************************************* |
1463 | * GOVERNORS * | 1463 | * GOVERNORS * |
1464 | *********************************************************************/ | 1464 | *********************************************************************/ |
1465 | 1465 | ||
1466 | 1466 | ||
1467 | int __cpufreq_driver_target(struct cpufreq_policy *policy, | 1467 | int __cpufreq_driver_target(struct cpufreq_policy *policy, |
1468 | unsigned int target_freq, | 1468 | unsigned int target_freq, |
1469 | unsigned int relation) | 1469 | unsigned int relation) |
1470 | { | 1470 | { |
1471 | int retval = -EINVAL; | 1471 | int retval = -EINVAL; |
1472 | 1472 | ||
1473 | dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, | 1473 | dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, |
1474 | target_freq, relation); | 1474 | target_freq, relation); |
1475 | if (cpu_online(policy->cpu) && cpufreq_driver->target) | 1475 | if (cpu_online(policy->cpu) && cpufreq_driver->target) |
1476 | retval = cpufreq_driver->target(policy, target_freq, relation); | 1476 | retval = cpufreq_driver->target(policy, target_freq, relation); |
1477 | 1477 | ||
1478 | return retval; | 1478 | return retval; |
1479 | } | 1479 | } |
1480 | EXPORT_SYMBOL_GPL(__cpufreq_driver_target); | 1480 | EXPORT_SYMBOL_GPL(__cpufreq_driver_target); |
1481 | 1481 | ||
1482 | int cpufreq_driver_target(struct cpufreq_policy *policy, | 1482 | int cpufreq_driver_target(struct cpufreq_policy *policy, |
1483 | unsigned int target_freq, | 1483 | unsigned int target_freq, |
1484 | unsigned int relation) | 1484 | unsigned int relation) |
1485 | { | 1485 | { |
1486 | int ret = -EINVAL; | 1486 | int ret = -EINVAL; |
1487 | 1487 | ||
1488 | policy = cpufreq_cpu_get(policy->cpu); | 1488 | policy = cpufreq_cpu_get(policy->cpu); |
1489 | if (!policy) | 1489 | if (!policy) |
1490 | goto no_policy; | 1490 | goto no_policy; |
1491 | 1491 | ||
1492 | if (unlikely(lock_policy_rwsem_write(policy->cpu))) | 1492 | if (unlikely(lock_policy_rwsem_write(policy->cpu))) |
1493 | goto fail; | 1493 | goto fail; |
1494 | 1494 | ||
1495 | ret = __cpufreq_driver_target(policy, target_freq, relation); | 1495 | ret = __cpufreq_driver_target(policy, target_freq, relation); |
1496 | 1496 | ||
1497 | unlock_policy_rwsem_write(policy->cpu); | 1497 | unlock_policy_rwsem_write(policy->cpu); |
1498 | 1498 | ||
1499 | fail: | 1499 | fail: |
1500 | cpufreq_cpu_put(policy); | 1500 | cpufreq_cpu_put(policy); |
1501 | no_policy: | 1501 | no_policy: |
1502 | return ret; | 1502 | return ret; |
1503 | } | 1503 | } |
1504 | EXPORT_SYMBOL_GPL(cpufreq_driver_target); | 1504 | EXPORT_SYMBOL_GPL(cpufreq_driver_target); |
1505 | 1505 | ||
1506 | int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu) | 1506 | int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu) |
1507 | { | 1507 | { |
1508 | int ret = 0; | 1508 | int ret = 0; |
1509 | 1509 | ||
1510 | policy = cpufreq_cpu_get(policy->cpu); | 1510 | policy = cpufreq_cpu_get(policy->cpu); |
1511 | if (!policy) | 1511 | if (!policy) |
1512 | return -EINVAL; | 1512 | return -EINVAL; |
1513 | 1513 | ||
1514 | if (cpu_online(cpu) && cpufreq_driver->getavg) | 1514 | if (cpu_online(cpu) && cpufreq_driver->getavg) |
1515 | ret = cpufreq_driver->getavg(policy, cpu); | 1515 | ret = cpufreq_driver->getavg(policy, cpu); |
1516 | 1516 | ||
1517 | cpufreq_cpu_put(policy); | 1517 | cpufreq_cpu_put(policy); |
1518 | return ret; | 1518 | return ret; |
1519 | } | 1519 | } |
1520 | EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg); | 1520 | EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg); |
1521 | 1521 | ||
1522 | /* | 1522 | /* |
1523 | * when "event" is CPUFREQ_GOV_LIMITS | 1523 | * when "event" is CPUFREQ_GOV_LIMITS |
1524 | */ | 1524 | */ |
1525 | 1525 | ||
1526 | static int __cpufreq_governor(struct cpufreq_policy *policy, | 1526 | static int __cpufreq_governor(struct cpufreq_policy *policy, |
1527 | unsigned int event) | 1527 | unsigned int event) |
1528 | { | 1528 | { |
1529 | int ret; | 1529 | int ret; |
1530 | 1530 | ||
1531 | /* Only must be defined when default governor is known to have latency | 1531 | /* Only must be defined when default governor is known to have latency |
1532 | restrictions, like e.g. conservative or ondemand. | 1532 | restrictions, like e.g. conservative or ondemand. |
1533 | That this is the case is already ensured in Kconfig | 1533 | That this is the case is already ensured in Kconfig |
1534 | */ | 1534 | */ |
1535 | #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE | 1535 | #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE |
1536 | struct cpufreq_governor *gov = &cpufreq_gov_performance; | 1536 | struct cpufreq_governor *gov = &cpufreq_gov_performance; |
1537 | #else | 1537 | #else |
1538 | struct cpufreq_governor *gov = NULL; | 1538 | struct cpufreq_governor *gov = NULL; |
1539 | #endif | 1539 | #endif |
1540 | 1540 | ||
1541 | if (policy->governor->max_transition_latency && | 1541 | if (policy->governor->max_transition_latency && |
1542 | policy->cpuinfo.transition_latency > | 1542 | policy->cpuinfo.transition_latency > |
1543 | policy->governor->max_transition_latency) { | 1543 | policy->governor->max_transition_latency) { |
1544 | if (!gov) | 1544 | if (!gov) |
1545 | return -EINVAL; | 1545 | return -EINVAL; |
1546 | else { | 1546 | else { |
1547 | printk(KERN_WARNING "%s governor failed, too long" | 1547 | printk(KERN_WARNING "%s governor failed, too long" |
1548 | " transition latency of HW, fallback" | 1548 | " transition latency of HW, fallback" |
1549 | " to %s governor\n", | 1549 | " to %s governor\n", |
1550 | policy->governor->name, | 1550 | policy->governor->name, |
1551 | gov->name); | 1551 | gov->name); |
1552 | policy->governor = gov; | 1552 | policy->governor = gov; |
1553 | } | 1553 | } |
1554 | } | 1554 | } |
1555 | 1555 | ||
1556 | if (!try_module_get(policy->governor->owner)) | 1556 | if (!try_module_get(policy->governor->owner)) |
1557 | return -EINVAL; | 1557 | return -EINVAL; |
1558 | 1558 | ||
1559 | dprintk("__cpufreq_governor for CPU %u, event %u\n", | 1559 | dprintk("__cpufreq_governor for CPU %u, event %u\n", |
1560 | policy->cpu, event); | 1560 | policy->cpu, event); |
1561 | ret = policy->governor->governor(policy, event); | 1561 | ret = policy->governor->governor(policy, event); |
1562 | 1562 | ||
1563 | /* we keep one module reference alive for | 1563 | /* we keep one module reference alive for |
1564 | each CPU governed by this CPU */ | 1564 | each CPU governed by this CPU */ |
1565 | if ((event != CPUFREQ_GOV_START) || ret) | 1565 | if ((event != CPUFREQ_GOV_START) || ret) |
1566 | module_put(policy->governor->owner); | 1566 | module_put(policy->governor->owner); |
1567 | if ((event == CPUFREQ_GOV_STOP) && !ret) | 1567 | if ((event == CPUFREQ_GOV_STOP) && !ret) |
1568 | module_put(policy->governor->owner); | 1568 | module_put(policy->governor->owner); |
1569 | 1569 | ||
1570 | return ret; | 1570 | return ret; |
1571 | } | 1571 | } |
1572 | 1572 | ||
1573 | 1573 | ||
1574 | int cpufreq_register_governor(struct cpufreq_governor *governor) | 1574 | int cpufreq_register_governor(struct cpufreq_governor *governor) |
1575 | { | 1575 | { |
1576 | int err; | 1576 | int err; |
1577 | 1577 | ||
1578 | if (!governor) | 1578 | if (!governor) |
1579 | return -EINVAL; | 1579 | return -EINVAL; |
1580 | 1580 | ||
1581 | mutex_lock(&cpufreq_governor_mutex); | 1581 | mutex_lock(&cpufreq_governor_mutex); |
1582 | 1582 | ||
1583 | err = -EBUSY; | 1583 | err = -EBUSY; |
1584 | if (__find_governor(governor->name) == NULL) { | 1584 | if (__find_governor(governor->name) == NULL) { |
1585 | err = 0; | 1585 | err = 0; |
1586 | list_add(&governor->governor_list, &cpufreq_governor_list); | 1586 | list_add(&governor->governor_list, &cpufreq_governor_list); |
1587 | } | 1587 | } |
1588 | 1588 | ||
1589 | mutex_unlock(&cpufreq_governor_mutex); | 1589 | mutex_unlock(&cpufreq_governor_mutex); |
1590 | return err; | 1590 | return err; |
1591 | } | 1591 | } |
1592 | EXPORT_SYMBOL_GPL(cpufreq_register_governor); | 1592 | EXPORT_SYMBOL_GPL(cpufreq_register_governor); |
1593 | 1593 | ||
1594 | 1594 | ||
1595 | void cpufreq_unregister_governor(struct cpufreq_governor *governor) | 1595 | void cpufreq_unregister_governor(struct cpufreq_governor *governor) |
1596 | { | 1596 | { |
1597 | if (!governor) | 1597 | if (!governor) |
1598 | return; | 1598 | return; |
1599 | 1599 | ||
1600 | mutex_lock(&cpufreq_governor_mutex); | 1600 | mutex_lock(&cpufreq_governor_mutex); |
1601 | list_del(&governor->governor_list); | 1601 | list_del(&governor->governor_list); |
1602 | mutex_unlock(&cpufreq_governor_mutex); | 1602 | mutex_unlock(&cpufreq_governor_mutex); |
1603 | return; | 1603 | return; |
1604 | } | 1604 | } |
1605 | EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); | 1605 | EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); |
1606 | 1606 | ||
1607 | 1607 | ||
1608 | 1608 | ||
1609 | /********************************************************************* | 1609 | /********************************************************************* |
1610 | * POLICY INTERFACE * | 1610 | * POLICY INTERFACE * |
1611 | *********************************************************************/ | 1611 | *********************************************************************/ |
1612 | 1612 | ||
1613 | /** | 1613 | /** |
1614 | * cpufreq_get_policy - get the current cpufreq_policy | 1614 | * cpufreq_get_policy - get the current cpufreq_policy |
1615 | * @policy: struct cpufreq_policy into which the current cpufreq_policy | 1615 | * @policy: struct cpufreq_policy into which the current cpufreq_policy |
1616 | * is written | 1616 | * is written |
1617 | * | 1617 | * |
1618 | * Reads the current cpufreq policy. | 1618 | * Reads the current cpufreq policy. |
1619 | */ | 1619 | */ |
1620 | int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) | 1620 | int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) |
1621 | { | 1621 | { |
1622 | struct cpufreq_policy *cpu_policy; | 1622 | struct cpufreq_policy *cpu_policy; |
1623 | if (!policy) | 1623 | if (!policy) |
1624 | return -EINVAL; | 1624 | return -EINVAL; |
1625 | 1625 | ||
1626 | cpu_policy = cpufreq_cpu_get(cpu); | 1626 | cpu_policy = cpufreq_cpu_get(cpu); |
1627 | if (!cpu_policy) | 1627 | if (!cpu_policy) |
1628 | return -EINVAL; | 1628 | return -EINVAL; |
1629 | 1629 | ||
1630 | memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy)); | 1630 | memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy)); |
1631 | 1631 | ||
1632 | cpufreq_cpu_put(cpu_policy); | 1632 | cpufreq_cpu_put(cpu_policy); |
1633 | return 0; | 1633 | return 0; |
1634 | } | 1634 | } |
1635 | EXPORT_SYMBOL(cpufreq_get_policy); | 1635 | EXPORT_SYMBOL(cpufreq_get_policy); |
1636 | 1636 | ||
1637 | 1637 | ||
1638 | /* | 1638 | /* |
1639 | * data : current policy. | 1639 | * data : current policy. |
1640 | * policy : policy to be set. | 1640 | * policy : policy to be set. |
1641 | */ | 1641 | */ |
1642 | static int __cpufreq_set_policy(struct cpufreq_policy *data, | 1642 | static int __cpufreq_set_policy(struct cpufreq_policy *data, |
1643 | struct cpufreq_policy *policy) | 1643 | struct cpufreq_policy *policy) |
1644 | { | 1644 | { |
1645 | int ret = 0; | 1645 | int ret = 0; |
1646 | 1646 | ||
1647 | cpufreq_debug_disable_ratelimit(); | 1647 | cpufreq_debug_disable_ratelimit(); |
1648 | dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu, | 1648 | dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu, |
1649 | policy->min, policy->max); | 1649 | policy->min, policy->max); |
1650 | 1650 | ||
1651 | memcpy(&policy->cpuinfo, &data->cpuinfo, | 1651 | memcpy(&policy->cpuinfo, &data->cpuinfo, |
1652 | sizeof(struct cpufreq_cpuinfo)); | 1652 | sizeof(struct cpufreq_cpuinfo)); |
1653 | 1653 | ||
1654 | if (policy->min > data->max || policy->max < data->min) { | 1654 | if (policy->min > data->max || policy->max < data->min) { |
1655 | ret = -EINVAL; | 1655 | ret = -EINVAL; |
1656 | goto error_out; | 1656 | goto error_out; |
1657 | } | 1657 | } |
1658 | 1658 | ||
1659 | /* verify the cpu speed can be set within this limit */ | 1659 | /* verify the cpu speed can be set within this limit */ |
1660 | ret = cpufreq_driver->verify(policy); | 1660 | ret = cpufreq_driver->verify(policy); |
1661 | if (ret) | 1661 | if (ret) |
1662 | goto error_out; | 1662 | goto error_out; |
1663 | 1663 | ||
1664 | /* adjust if necessary - all reasons */ | 1664 | /* adjust if necessary - all reasons */ |
1665 | blocking_notifier_call_chain(&cpufreq_policy_notifier_list, | 1665 | blocking_notifier_call_chain(&cpufreq_policy_notifier_list, |
1666 | CPUFREQ_ADJUST, policy); | 1666 | CPUFREQ_ADJUST, policy); |
1667 | 1667 | ||
1668 | /* adjust if necessary - hardware incompatibility*/ | 1668 | /* adjust if necessary - hardware incompatibility*/ |
1669 | blocking_notifier_call_chain(&cpufreq_policy_notifier_list, | 1669 | blocking_notifier_call_chain(&cpufreq_policy_notifier_list, |
1670 | CPUFREQ_INCOMPATIBLE, policy); | 1670 | CPUFREQ_INCOMPATIBLE, policy); |
1671 | 1671 | ||
1672 | /* verify the cpu speed can be set within this limit, | 1672 | /* verify the cpu speed can be set within this limit, |
1673 | which might be different to the first one */ | 1673 | which might be different to the first one */ |
1674 | ret = cpufreq_driver->verify(policy); | 1674 | ret = cpufreq_driver->verify(policy); |
1675 | if (ret) | 1675 | if (ret) |
1676 | goto error_out; | 1676 | goto error_out; |
1677 | 1677 | ||
1678 | /* notification of the new policy */ | 1678 | /* notification of the new policy */ |
1679 | blocking_notifier_call_chain(&cpufreq_policy_notifier_list, | 1679 | blocking_notifier_call_chain(&cpufreq_policy_notifier_list, |
1680 | CPUFREQ_NOTIFY, policy); | 1680 | CPUFREQ_NOTIFY, policy); |
1681 | 1681 | ||
1682 | data->min = policy->min; | 1682 | data->min = policy->min; |
1683 | data->max = policy->max; | 1683 | data->max = policy->max; |
1684 | 1684 | ||
1685 | dprintk("new min and max freqs are %u - %u kHz\n", | 1685 | dprintk("new min and max freqs are %u - %u kHz\n", |
1686 | data->min, data->max); | 1686 | data->min, data->max); |
1687 | 1687 | ||
1688 | if (cpufreq_driver->setpolicy) { | 1688 | if (cpufreq_driver->setpolicy) { |
1689 | data->policy = policy->policy; | 1689 | data->policy = policy->policy; |
1690 | dprintk("setting range\n"); | 1690 | dprintk("setting range\n"); |
1691 | ret = cpufreq_driver->setpolicy(policy); | 1691 | ret = cpufreq_driver->setpolicy(policy); |
1692 | } else { | 1692 | } else { |
1693 | if (policy->governor != data->governor) { | 1693 | if (policy->governor != data->governor) { |
1694 | /* save old, working values */ | 1694 | /* save old, working values */ |
1695 | struct cpufreq_governor *old_gov = data->governor; | 1695 | struct cpufreq_governor *old_gov = data->governor; |
1696 | 1696 | ||
1697 | dprintk("governor switch\n"); | 1697 | dprintk("governor switch\n"); |
1698 | 1698 | ||
1699 | /* end old governor */ | 1699 | /* end old governor */ |
1700 | if (data->governor) | 1700 | if (data->governor) |
1701 | __cpufreq_governor(data, CPUFREQ_GOV_STOP); | 1701 | __cpufreq_governor(data, CPUFREQ_GOV_STOP); |
1702 | 1702 | ||
1703 | /* start new governor */ | 1703 | /* start new governor */ |
1704 | data->governor = policy->governor; | 1704 | data->governor = policy->governor; |
1705 | if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { | 1705 | if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { |
1706 | /* new governor failed, so re-start old one */ | 1706 | /* new governor failed, so re-start old one */ |
1707 | dprintk("starting governor %s failed\n", | 1707 | dprintk("starting governor %s failed\n", |
1708 | data->governor->name); | 1708 | data->governor->name); |
1709 | if (old_gov) { | 1709 | if (old_gov) { |
1710 | data->governor = old_gov; | 1710 | data->governor = old_gov; |
1711 | __cpufreq_governor(data, | 1711 | __cpufreq_governor(data, |
1712 | CPUFREQ_GOV_START); | 1712 | CPUFREQ_GOV_START); |
1713 | } | 1713 | } |
1714 | ret = -EINVAL; | 1714 | ret = -EINVAL; |
1715 | goto error_out; | 1715 | goto error_out; |
1716 | } | 1716 | } |
1717 | /* might be a policy change, too, so fall through */ | 1717 | /* might be a policy change, too, so fall through */ |
1718 | } | 1718 | } |
1719 | dprintk("governor: change or update limits\n"); | 1719 | dprintk("governor: change or update limits\n"); |
1720 | __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); | 1720 | __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); |
1721 | } | 1721 | } |
1722 | 1722 | ||
1723 | error_out: | 1723 | error_out: |
1724 | cpufreq_debug_enable_ratelimit(); | 1724 | cpufreq_debug_enable_ratelimit(); |
1725 | return ret; | 1725 | return ret; |
1726 | } | 1726 | } |
1727 | 1727 | ||
1728 | /** | 1728 | /** |
1729 | * cpufreq_update_policy - re-evaluate an existing cpufreq policy | 1729 | * cpufreq_update_policy - re-evaluate an existing cpufreq policy |
1730 | * @cpu: CPU which shall be re-evaluated | 1730 | * @cpu: CPU which shall be re-evaluated |
1731 | * | 1731 | * |
1732 | * Usefull for policy notifiers which have different necessities | 1732 | * Usefull for policy notifiers which have different necessities |
1733 | * at different times. | 1733 | * at different times. |
1734 | */ | 1734 | */ |
1735 | int cpufreq_update_policy(unsigned int cpu) | 1735 | int cpufreq_update_policy(unsigned int cpu) |
1736 | { | 1736 | { |
1737 | struct cpufreq_policy *data = cpufreq_cpu_get(cpu); | 1737 | struct cpufreq_policy *data = cpufreq_cpu_get(cpu); |
1738 | struct cpufreq_policy policy; | 1738 | struct cpufreq_policy policy; |
1739 | int ret; | 1739 | int ret; |
1740 | 1740 | ||
1741 | if (!data) { | 1741 | if (!data) { |
1742 | ret = -ENODEV; | 1742 | ret = -ENODEV; |
1743 | goto no_policy; | 1743 | goto no_policy; |
1744 | } | 1744 | } |
1745 | 1745 | ||
1746 | if (unlikely(lock_policy_rwsem_write(cpu))) { | 1746 | if (unlikely(lock_policy_rwsem_write(cpu))) { |
1747 | ret = -EINVAL; | 1747 | ret = -EINVAL; |
1748 | goto fail; | 1748 | goto fail; |
1749 | } | 1749 | } |
1750 | 1750 | ||
1751 | dprintk("updating policy for CPU %u\n", cpu); | 1751 | dprintk("updating policy for CPU %u\n", cpu); |
1752 | memcpy(&policy, data, sizeof(struct cpufreq_policy)); | 1752 | memcpy(&policy, data, sizeof(struct cpufreq_policy)); |
1753 | policy.min = data->user_policy.min; | 1753 | policy.min = data->user_policy.min; |
1754 | policy.max = data->user_policy.max; | 1754 | policy.max = data->user_policy.max; |
1755 | policy.policy = data->user_policy.policy; | 1755 | policy.policy = data->user_policy.policy; |
1756 | policy.governor = data->user_policy.governor; | 1756 | policy.governor = data->user_policy.governor; |
1757 | 1757 | ||
1758 | /* BIOS might change freq behind our back | 1758 | /* BIOS might change freq behind our back |
1759 | -> ask driver for current freq and notify governors about a change */ | 1759 | -> ask driver for current freq and notify governors about a change */ |
1760 | if (cpufreq_driver->get) { | 1760 | if (cpufreq_driver->get) { |
1761 | policy.cur = cpufreq_driver->get(cpu); | 1761 | policy.cur = cpufreq_driver->get(cpu); |
1762 | if (!data->cur) { | 1762 | if (!data->cur) { |
1763 | dprintk("Driver did not initialize current freq"); | 1763 | dprintk("Driver did not initialize current freq"); |
1764 | data->cur = policy.cur; | 1764 | data->cur = policy.cur; |
1765 | } else { | 1765 | } else { |
1766 | if (data->cur != policy.cur) | 1766 | if (data->cur != policy.cur) |
1767 | cpufreq_out_of_sync(cpu, data->cur, | 1767 | cpufreq_out_of_sync(cpu, data->cur, |
1768 | policy.cur); | 1768 | policy.cur); |
1769 | } | 1769 | } |
1770 | } | 1770 | } |
1771 | 1771 | ||
1772 | ret = __cpufreq_set_policy(data, &policy); | 1772 | ret = __cpufreq_set_policy(data, &policy); |
1773 | 1773 | ||
1774 | unlock_policy_rwsem_write(cpu); | 1774 | unlock_policy_rwsem_write(cpu); |
1775 | 1775 | ||
1776 | fail: | 1776 | fail: |
1777 | cpufreq_cpu_put(data); | 1777 | cpufreq_cpu_put(data); |
1778 | no_policy: | 1778 | no_policy: |
1779 | return ret; | 1779 | return ret; |
1780 | } | 1780 | } |
1781 | EXPORT_SYMBOL(cpufreq_update_policy); | 1781 | EXPORT_SYMBOL(cpufreq_update_policy); |
1782 | 1782 | ||
1783 | static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, | 1783 | static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, |
1784 | unsigned long action, void *hcpu) | 1784 | unsigned long action, void *hcpu) |
1785 | { | 1785 | { |
1786 | unsigned int cpu = (unsigned long)hcpu; | 1786 | unsigned int cpu = (unsigned long)hcpu; |
1787 | struct sys_device *sys_dev; | 1787 | struct sys_device *sys_dev; |
1788 | 1788 | ||
1789 | sys_dev = get_cpu_sysdev(cpu); | 1789 | sys_dev = get_cpu_sysdev(cpu); |
1790 | if (sys_dev) { | 1790 | if (sys_dev) { |
1791 | switch (action) { | 1791 | switch (action) { |
1792 | case CPU_ONLINE: | 1792 | case CPU_ONLINE: |
1793 | case CPU_ONLINE_FROZEN: | 1793 | case CPU_ONLINE_FROZEN: |
1794 | cpufreq_add_dev(sys_dev); | 1794 | cpufreq_add_dev(sys_dev); |
1795 | break; | 1795 | break; |
1796 | case CPU_DOWN_PREPARE: | 1796 | case CPU_DOWN_PREPARE: |
1797 | case CPU_DOWN_PREPARE_FROZEN: | 1797 | case CPU_DOWN_PREPARE_FROZEN: |
1798 | if (unlikely(lock_policy_rwsem_write(cpu))) | 1798 | if (unlikely(lock_policy_rwsem_write(cpu))) |
1799 | BUG(); | 1799 | BUG(); |
1800 | 1800 | ||
1801 | __cpufreq_remove_dev(sys_dev); | 1801 | __cpufreq_remove_dev(sys_dev); |
1802 | break; | 1802 | break; |
1803 | case CPU_DOWN_FAILED: | 1803 | case CPU_DOWN_FAILED: |
1804 | case CPU_DOWN_FAILED_FROZEN: | 1804 | case CPU_DOWN_FAILED_FROZEN: |
1805 | cpufreq_add_dev(sys_dev); | 1805 | cpufreq_add_dev(sys_dev); |
1806 | break; | 1806 | break; |
1807 | } | 1807 | } |
1808 | } | 1808 | } |
1809 | return NOTIFY_OK; | 1809 | return NOTIFY_OK; |
1810 | } | 1810 | } |
1811 | 1811 | ||
1812 | static struct notifier_block __refdata cpufreq_cpu_notifier = | 1812 | static struct notifier_block __refdata cpufreq_cpu_notifier = |
1813 | { | 1813 | { |
1814 | .notifier_call = cpufreq_cpu_callback, | 1814 | .notifier_call = cpufreq_cpu_callback, |
1815 | }; | 1815 | }; |
1816 | 1816 | ||
1817 | /********************************************************************* | 1817 | /********************************************************************* |
1818 | * REGISTER / UNREGISTER CPUFREQ DRIVER * | 1818 | * REGISTER / UNREGISTER CPUFREQ DRIVER * |
1819 | *********************************************************************/ | 1819 | *********************************************************************/ |
1820 | 1820 | ||
1821 | /** | 1821 | /** |
1822 | * cpufreq_register_driver - register a CPU Frequency driver | 1822 | * cpufreq_register_driver - register a CPU Frequency driver |
1823 | * @driver_data: A struct cpufreq_driver containing the values# | 1823 | * @driver_data: A struct cpufreq_driver containing the values# |
1824 | * submitted by the CPU Frequency driver. | 1824 | * submitted by the CPU Frequency driver. |
1825 | * | 1825 | * |
1826 | * Registers a CPU Frequency driver to this core code. This code | 1826 | * Registers a CPU Frequency driver to this core code. This code |
1827 | * returns zero on success, -EBUSY when another driver got here first | 1827 | * returns zero on success, -EBUSY when another driver got here first |
1828 | * (and isn't unregistered in the meantime). | 1828 | * (and isn't unregistered in the meantime). |
1829 | * | 1829 | * |
1830 | */ | 1830 | */ |
1831 | int cpufreq_register_driver(struct cpufreq_driver *driver_data) | 1831 | int cpufreq_register_driver(struct cpufreq_driver *driver_data) |
1832 | { | 1832 | { |
1833 | unsigned long flags; | 1833 | unsigned long flags; |
1834 | int ret; | 1834 | int ret; |
1835 | 1835 | ||
1836 | if (!driver_data || !driver_data->verify || !driver_data->init || | 1836 | if (!driver_data || !driver_data->verify || !driver_data->init || |
1837 | ((!driver_data->setpolicy) && (!driver_data->target))) | 1837 | ((!driver_data->setpolicy) && (!driver_data->target))) |
1838 | return -EINVAL; | 1838 | return -EINVAL; |
1839 | 1839 | ||
1840 | dprintk("trying to register driver %s\n", driver_data->name); | 1840 | dprintk("trying to register driver %s\n", driver_data->name); |
1841 | 1841 | ||
1842 | if (driver_data->setpolicy) | 1842 | if (driver_data->setpolicy) |
1843 | driver_data->flags |= CPUFREQ_CONST_LOOPS; | 1843 | driver_data->flags |= CPUFREQ_CONST_LOOPS; |
1844 | 1844 | ||
1845 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 1845 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
1846 | if (cpufreq_driver) { | 1846 | if (cpufreq_driver) { |
1847 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 1847 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
1848 | return -EBUSY; | 1848 | return -EBUSY; |
1849 | } | 1849 | } |
1850 | cpufreq_driver = driver_data; | 1850 | cpufreq_driver = driver_data; |
1851 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 1851 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
1852 | 1852 | ||
1853 | ret = sysdev_driver_register(&cpu_sysdev_class, | 1853 | ret = sysdev_driver_register(&cpu_sysdev_class, |
1854 | &cpufreq_sysdev_driver); | 1854 | &cpufreq_sysdev_driver); |
1855 | 1855 | ||
1856 | if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) { | 1856 | if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) { |
1857 | int i; | 1857 | int i; |
1858 | ret = -ENODEV; | 1858 | ret = -ENODEV; |
1859 | 1859 | ||
1860 | /* check for at least one working CPU */ | 1860 | /* check for at least one working CPU */ |
1861 | for (i = 0; i < nr_cpu_ids; i++) | 1861 | for (i = 0; i < nr_cpu_ids; i++) |
1862 | if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) { | 1862 | if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) { |
1863 | ret = 0; | 1863 | ret = 0; |
1864 | break; | 1864 | break; |
1865 | } | 1865 | } |
1866 | 1866 | ||
1867 | /* if all ->init() calls failed, unregister */ | 1867 | /* if all ->init() calls failed, unregister */ |
1868 | if (ret) { | 1868 | if (ret) { |
1869 | dprintk("no CPU initialized for driver %s\n", | 1869 | dprintk("no CPU initialized for driver %s\n", |
1870 | driver_data->name); | 1870 | driver_data->name); |
1871 | sysdev_driver_unregister(&cpu_sysdev_class, | 1871 | sysdev_driver_unregister(&cpu_sysdev_class, |
1872 | &cpufreq_sysdev_driver); | 1872 | &cpufreq_sysdev_driver); |
1873 | 1873 | ||
1874 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 1874 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
1875 | cpufreq_driver = NULL; | 1875 | cpufreq_driver = NULL; |
1876 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 1876 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
1877 | } | 1877 | } |
1878 | } | 1878 | } |
1879 | 1879 | ||
1880 | if (!ret) { | 1880 | if (!ret) { |
1881 | register_hotcpu_notifier(&cpufreq_cpu_notifier); | 1881 | register_hotcpu_notifier(&cpufreq_cpu_notifier); |
1882 | dprintk("driver %s up and running\n", driver_data->name); | 1882 | dprintk("driver %s up and running\n", driver_data->name); |
1883 | cpufreq_debug_enable_ratelimit(); | 1883 | cpufreq_debug_enable_ratelimit(); |
1884 | } | 1884 | } |
1885 | 1885 | ||
1886 | return ret; | 1886 | return ret; |
1887 | } | 1887 | } |
1888 | EXPORT_SYMBOL_GPL(cpufreq_register_driver); | 1888 | EXPORT_SYMBOL_GPL(cpufreq_register_driver); |
1889 | 1889 | ||
1890 | 1890 | ||
1891 | /** | 1891 | /** |
1892 | * cpufreq_unregister_driver - unregister the current CPUFreq driver | 1892 | * cpufreq_unregister_driver - unregister the current CPUFreq driver |
1893 | * | 1893 | * |
1894 | * Unregister the current CPUFreq driver. Only call this if you have | 1894 | * Unregister the current CPUFreq driver. Only call this if you have |
1895 | * the right to do so, i.e. if you have succeeded in initialising before! | 1895 | * the right to do so, i.e. if you have succeeded in initialising before! |
1896 | * Returns zero if successful, and -EINVAL if the cpufreq_driver is | 1896 | * Returns zero if successful, and -EINVAL if the cpufreq_driver is |
1897 | * currently not initialised. | 1897 | * currently not initialised. |
1898 | */ | 1898 | */ |
1899 | int cpufreq_unregister_driver(struct cpufreq_driver *driver) | 1899 | int cpufreq_unregister_driver(struct cpufreq_driver *driver) |
1900 | { | 1900 | { |
1901 | unsigned long flags; | 1901 | unsigned long flags; |
1902 | 1902 | ||
1903 | cpufreq_debug_disable_ratelimit(); | 1903 | cpufreq_debug_disable_ratelimit(); |
1904 | 1904 | ||
1905 | if (!cpufreq_driver || (driver != cpufreq_driver)) { | 1905 | if (!cpufreq_driver || (driver != cpufreq_driver)) { |
1906 | cpufreq_debug_enable_ratelimit(); | 1906 | cpufreq_debug_enable_ratelimit(); |
1907 | return -EINVAL; | 1907 | return -EINVAL; |
1908 | } | 1908 | } |
1909 | 1909 | ||
1910 | dprintk("unregistering driver %s\n", driver->name); | 1910 | dprintk("unregistering driver %s\n", driver->name); |
1911 | 1911 | ||
1912 | sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); | 1912 | sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); |
1913 | unregister_hotcpu_notifier(&cpufreq_cpu_notifier); | 1913 | unregister_hotcpu_notifier(&cpufreq_cpu_notifier); |
1914 | 1914 | ||
1915 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 1915 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
1916 | cpufreq_driver = NULL; | 1916 | cpufreq_driver = NULL; |
1917 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); | 1917 | spin_unlock_irqrestore(&cpufreq_driver_lock, flags); |
1918 | 1918 | ||
1919 | return 0; | 1919 | return 0; |
1920 | } | 1920 | } |
1921 | EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); | 1921 | EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); |
1922 | 1922 | ||
1923 | static int __init cpufreq_core_init(void) | 1923 | static int __init cpufreq_core_init(void) |
1924 | { | 1924 | { |
1925 | int cpu; | 1925 | int cpu; |
1926 | 1926 | ||
1927 | for_each_possible_cpu(cpu) { | 1927 | for_each_possible_cpu(cpu) { |
1928 | per_cpu(policy_cpu, cpu) = -1; | 1928 | per_cpu(policy_cpu, cpu) = -1; |
1929 | init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); | 1929 | init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); |
1930 | } | 1930 | } |
1931 | return 0; | 1931 | return 0; |
1932 | } | 1932 | } |
1933 | 1933 | ||
1934 | core_initcall(cpufreq_core_init); | 1934 | core_initcall(cpufreq_core_init); |
1935 | 1935 |
kernel/sched_cpupri.c
1 | /* | 1 | /* |
2 | * kernel/sched_cpupri.c | 2 | * kernel/sched_cpupri.c |
3 | * | 3 | * |
4 | * CPU priority management | 4 | * CPU priority management |
5 | * | 5 | * |
6 | * Copyright (C) 2007-2008 Novell | 6 | * Copyright (C) 2007-2008 Novell |
7 | * | 7 | * |
8 | * Author: Gregory Haskins <ghaskins@novell.com> | 8 | * Author: Gregory Haskins <ghaskins@novell.com> |
9 | * | 9 | * |
10 | * This code tracks the priority of each CPU so that global migration | 10 | * This code tracks the priority of each CPU so that global migration |
11 | * decisions are easy to calculate. Each CPU can be in a state as follows: | 11 | * decisions are easy to calculate. Each CPU can be in a state as follows: |
12 | * | 12 | * |
13 | * (INVALID), IDLE, NORMAL, RT1, ... RT99 | 13 | * (INVALID), IDLE, NORMAL, RT1, ... RT99 |
14 | * | 14 | * |
15 | * going from the lowest priority to the highest. CPUs in the INVALID state | 15 | * going from the lowest priority to the highest. CPUs in the INVALID state |
16 | * are not eligible for routing. The system maintains this state with | 16 | * are not eligible for routing. The system maintains this state with |
17 | * a 2 dimensional bitmap (the first for priority class, the second for cpus | 17 | * a 2 dimensional bitmap (the first for priority class, the second for cpus |
18 | * in that class). Therefore a typical application without affinity | 18 | * in that class). Therefore a typical application without affinity |
19 | * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit | 19 | * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit |
20 | * searches). For tasks with affinity restrictions, the algorithm has a | 20 | * searches). For tasks with affinity restrictions, the algorithm has a |
21 | * worst case complexity of O(min(102, nr_domcpus)), though the scenario that | 21 | * worst case complexity of O(min(102, nr_domcpus)), though the scenario that |
22 | * yields the worst case search is fairly contrived. | 22 | * yields the worst case search is fairly contrived. |
23 | * | 23 | * |
24 | * This program is free software; you can redistribute it and/or | 24 | * This program is free software; you can redistribute it and/or |
25 | * modify it under the terms of the GNU General Public License | 25 | * modify it under the terms of the GNU General Public License |
26 | * as published by the Free Software Foundation; version 2 | 26 | * as published by the Free Software Foundation; version 2 |
27 | * of the License. | 27 | * of the License. |
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include "sched_cpupri.h" | 30 | #include "sched_cpupri.h" |
31 | 31 | ||
32 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ | 32 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ |
33 | static int convert_prio(int prio) | 33 | static int convert_prio(int prio) |
34 | { | 34 | { |
35 | int cpupri; | 35 | int cpupri; |
36 | 36 | ||
37 | if (prio == CPUPRI_INVALID) | 37 | if (prio == CPUPRI_INVALID) |
38 | cpupri = CPUPRI_INVALID; | 38 | cpupri = CPUPRI_INVALID; |
39 | else if (prio == MAX_PRIO) | 39 | else if (prio == MAX_PRIO) |
40 | cpupri = CPUPRI_IDLE; | 40 | cpupri = CPUPRI_IDLE; |
41 | else if (prio >= MAX_RT_PRIO) | 41 | else if (prio >= MAX_RT_PRIO) |
42 | cpupri = CPUPRI_NORMAL; | 42 | cpupri = CPUPRI_NORMAL; |
43 | else | 43 | else |
44 | cpupri = MAX_RT_PRIO - prio + 1; | 44 | cpupri = MAX_RT_PRIO - prio + 1; |
45 | 45 | ||
46 | return cpupri; | 46 | return cpupri; |
47 | } | 47 | } |
48 | 48 | ||
49 | #define for_each_cpupri_active(array, idx) \ | 49 | #define for_each_cpupri_active(array, idx) \ |
50 | for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \ | 50 | for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \ |
51 | idx < CPUPRI_NR_PRIORITIES; \ | 51 | idx < CPUPRI_NR_PRIORITIES; \ |
52 | idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1)) | 52 | idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1)) |
53 | 53 | ||
54 | /** | 54 | /** |
55 | * cpupri_find - find the best (lowest-pri) CPU in the system | 55 | * cpupri_find - find the best (lowest-pri) CPU in the system |
56 | * @cp: The cpupri context | 56 | * @cp: The cpupri context |
57 | * @p: The task | 57 | * @p: The task |
58 | * @lowest_mask: A mask to fill in with selected CPUs (or NULL) | 58 | * @lowest_mask: A mask to fill in with selected CPUs (or NULL) |
59 | * | 59 | * |
60 | * Note: This function returns the recommended CPUs as calculated during the | 60 | * Note: This function returns the recommended CPUs as calculated during the |
61 | * current invokation. By the time the call returns, the CPUs may have in | 61 | * current invokation. By the time the call returns, the CPUs may have in |
62 | * fact changed priorities any number of times. While not ideal, it is not | 62 | * fact changed priorities any number of times. While not ideal, it is not |
63 | * an issue of correctness since the normal rebalancer logic will correct | 63 | * an issue of correctness since the normal rebalancer logic will correct |
64 | * any discrepancies created by racing against the uncertainty of the current | 64 | * any discrepancies created by racing against the uncertainty of the current |
65 | * priority configuration. | 65 | * priority configuration. |
66 | * | 66 | * |
67 | * Returns: (int)bool - CPUs were found | 67 | * Returns: (int)bool - CPUs were found |
68 | */ | 68 | */ |
69 | int cpupri_find(struct cpupri *cp, struct task_struct *p, | 69 | int cpupri_find(struct cpupri *cp, struct task_struct *p, |
70 | struct cpumask *lowest_mask) | 70 | struct cpumask *lowest_mask) |
71 | { | 71 | { |
72 | int idx = 0; | 72 | int idx = 0; |
73 | int task_pri = convert_prio(p->prio); | 73 | int task_pri = convert_prio(p->prio); |
74 | 74 | ||
75 | for_each_cpupri_active(cp->pri_active, idx) { | 75 | for_each_cpupri_active(cp->pri_active, idx) { |
76 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; | 76 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; |
77 | 77 | ||
78 | if (idx >= task_pri) | 78 | if (idx >= task_pri) |
79 | break; | 79 | break; |
80 | 80 | ||
81 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) | 81 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) |
82 | continue; | 82 | continue; |
83 | 83 | ||
84 | if (lowest_mask) | 84 | if (lowest_mask) |
85 | cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); | 85 | cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); |
86 | return 1; | 86 | return 1; |
87 | } | 87 | } |
88 | 88 | ||
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
91 | 91 | ||
92 | /** | 92 | /** |
93 | * cpupri_set - update the cpu priority setting | 93 | * cpupri_set - update the cpu priority setting |
94 | * @cp: The cpupri context | 94 | * @cp: The cpupri context |
95 | * @cpu: The target cpu | 95 | * @cpu: The target cpu |
96 | * @pri: The priority (INVALID-RT99) to assign to this CPU | 96 | * @pri: The priority (INVALID-RT99) to assign to this CPU |
97 | * | 97 | * |
98 | * Note: Assumes cpu_rq(cpu)->lock is locked | 98 | * Note: Assumes cpu_rq(cpu)->lock is locked |
99 | * | 99 | * |
100 | * Returns: (void) | 100 | * Returns: (void) |
101 | */ | 101 | */ |
102 | void cpupri_set(struct cpupri *cp, int cpu, int newpri) | 102 | void cpupri_set(struct cpupri *cp, int cpu, int newpri) |
103 | { | 103 | { |
104 | int *currpri = &cp->cpu_to_pri[cpu]; | 104 | int *currpri = &cp->cpu_to_pri[cpu]; |
105 | int oldpri = *currpri; | 105 | int oldpri = *currpri; |
106 | unsigned long flags; | 106 | unsigned long flags; |
107 | 107 | ||
108 | newpri = convert_prio(newpri); | 108 | newpri = convert_prio(newpri); |
109 | 109 | ||
110 | BUG_ON(newpri >= CPUPRI_NR_PRIORITIES); | 110 | BUG_ON(newpri >= CPUPRI_NR_PRIORITIES); |
111 | 111 | ||
112 | if (newpri == oldpri) | 112 | if (newpri == oldpri) |
113 | return; | 113 | return; |
114 | 114 | ||
115 | /* | 115 | /* |
116 | * If the cpu was currently mapped to a different value, we | 116 | * If the cpu was currently mapped to a different value, we |
117 | * first need to unmap the old value | 117 | * first need to unmap the old value |
118 | */ | 118 | */ |
119 | if (likely(oldpri != CPUPRI_INVALID)) { | 119 | if (likely(oldpri != CPUPRI_INVALID)) { |
120 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; | 120 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; |
121 | 121 | ||
122 | spin_lock_irqsave(&vec->lock, flags); | 122 | spin_lock_irqsave(&vec->lock, flags); |
123 | 123 | ||
124 | vec->count--; | 124 | vec->count--; |
125 | if (!vec->count) | 125 | if (!vec->count) |
126 | clear_bit(oldpri, cp->pri_active); | 126 | clear_bit(oldpri, cp->pri_active); |
127 | cpumask_clear_cpu(cpu, vec->mask); | 127 | cpumask_clear_cpu(cpu, vec->mask); |
128 | 128 | ||
129 | spin_unlock_irqrestore(&vec->lock, flags); | 129 | spin_unlock_irqrestore(&vec->lock, flags); |
130 | } | 130 | } |
131 | 131 | ||
132 | if (likely(newpri != CPUPRI_INVALID)) { | 132 | if (likely(newpri != CPUPRI_INVALID)) { |
133 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; | 133 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; |
134 | 134 | ||
135 | spin_lock_irqsave(&vec->lock, flags); | 135 | spin_lock_irqsave(&vec->lock, flags); |
136 | 136 | ||
137 | cpumask_set_cpu(cpu, vec->mask); | 137 | cpumask_set_cpu(cpu, vec->mask); |
138 | vec->count++; | 138 | vec->count++; |
139 | if (vec->count == 1) | 139 | if (vec->count == 1) |
140 | set_bit(newpri, cp->pri_active); | 140 | set_bit(newpri, cp->pri_active); |
141 | 141 | ||
142 | spin_unlock_irqrestore(&vec->lock, flags); | 142 | spin_unlock_irqrestore(&vec->lock, flags); |
143 | } | 143 | } |
144 | 144 | ||
145 | *currpri = newpri; | 145 | *currpri = newpri; |
146 | } | 146 | } |
147 | 147 | ||
148 | /** | 148 | /** |
149 | * cpupri_init - initialize the cpupri structure | 149 | * cpupri_init - initialize the cpupri structure |
150 | * @cp: The cpupri context | 150 | * @cp: The cpupri context |
151 | * @bootmem: true if allocations need to use bootmem | 151 | * @bootmem: true if allocations need to use bootmem |
152 | * | 152 | * |
153 | * Returns: -ENOMEM if memory fails. | 153 | * Returns: -ENOMEM if memory fails. |
154 | */ | 154 | */ |
155 | int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) | 155 | int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) |
156 | { | 156 | { |
157 | int i; | 157 | int i; |
158 | 158 | ||
159 | memset(cp, 0, sizeof(*cp)); | 159 | memset(cp, 0, sizeof(*cp)); |
160 | 160 | ||
161 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { | 161 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { |
162 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; | 162 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; |
163 | 163 | ||
164 | spin_lock_init(&vec->lock); | 164 | spin_lock_init(&vec->lock); |
165 | vec->count = 0; | 165 | vec->count = 0; |
166 | if (bootmem) | 166 | if (bootmem) |
167 | alloc_bootmem_cpumask_var(&vec->mask); | 167 | alloc_bootmem_cpumask_var(&vec->mask); |
168 | else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL)) | 168 | else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) |
169 | goto cleanup; | 169 | goto cleanup; |
170 | } | 170 | } |
171 | 171 | ||
172 | for_each_possible_cpu(i) | 172 | for_each_possible_cpu(i) |
173 | cp->cpu_to_pri[i] = CPUPRI_INVALID; | 173 | cp->cpu_to_pri[i] = CPUPRI_INVALID; |
174 | return 0; | 174 | return 0; |
175 | 175 | ||
176 | cleanup: | 176 | cleanup: |
177 | for (i--; i >= 0; i--) | 177 | for (i--; i >= 0; i--) |
178 | free_cpumask_var(cp->pri_to_cpu[i].mask); | 178 | free_cpumask_var(cp->pri_to_cpu[i].mask); |
179 | return -ENOMEM; | 179 | return -ENOMEM; |
180 | } | 180 | } |
181 | 181 | ||
182 | /** | 182 | /** |
183 | * cpupri_cleanup - clean up the cpupri structure | 183 | * cpupri_cleanup - clean up the cpupri structure |
184 | * @cp: The cpupri context | 184 | * @cp: The cpupri context |
185 | */ | 185 | */ |
186 | void cpupri_cleanup(struct cpupri *cp) | 186 | void cpupri_cleanup(struct cpupri *cp) |
187 | { | 187 | { |
188 | int i; | 188 | int i; |
189 | 189 | ||
190 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) | 190 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) |
191 | free_cpumask_var(cp->pri_to_cpu[i].mask); | 191 | free_cpumask_var(cp->pri_to_cpu[i].mask); |
192 | } | 192 | } |
193 | 193 |
kernel/sched_rt.c
1 | /* | 1 | /* |
2 | * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR | 2 | * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR |
3 | * policies) | 3 | * policies) |
4 | */ | 4 | */ |
5 | 5 | ||
6 | static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) | 6 | static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) |
7 | { | 7 | { |
8 | return container_of(rt_se, struct task_struct, rt); | 8 | return container_of(rt_se, struct task_struct, rt); |
9 | } | 9 | } |
10 | 10 | ||
11 | #ifdef CONFIG_RT_GROUP_SCHED | 11 | #ifdef CONFIG_RT_GROUP_SCHED |
12 | 12 | ||
13 | static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) | 13 | static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) |
14 | { | 14 | { |
15 | return rt_rq->rq; | 15 | return rt_rq->rq; |
16 | } | 16 | } |
17 | 17 | ||
18 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | 18 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) |
19 | { | 19 | { |
20 | return rt_se->rt_rq; | 20 | return rt_se->rt_rq; |
21 | } | 21 | } |
22 | 22 | ||
23 | #else /* CONFIG_RT_GROUP_SCHED */ | 23 | #else /* CONFIG_RT_GROUP_SCHED */ |
24 | 24 | ||
25 | static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) | 25 | static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) |
26 | { | 26 | { |
27 | return container_of(rt_rq, struct rq, rt); | 27 | return container_of(rt_rq, struct rq, rt); |
28 | } | 28 | } |
29 | 29 | ||
30 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | 30 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) |
31 | { | 31 | { |
32 | struct task_struct *p = rt_task_of(rt_se); | 32 | struct task_struct *p = rt_task_of(rt_se); |
33 | struct rq *rq = task_rq(p); | 33 | struct rq *rq = task_rq(p); |
34 | 34 | ||
35 | return &rq->rt; | 35 | return &rq->rt; |
36 | } | 36 | } |
37 | 37 | ||
38 | #endif /* CONFIG_RT_GROUP_SCHED */ | 38 | #endif /* CONFIG_RT_GROUP_SCHED */ |
39 | 39 | ||
40 | #ifdef CONFIG_SMP | 40 | #ifdef CONFIG_SMP |
41 | 41 | ||
42 | static inline int rt_overloaded(struct rq *rq) | 42 | static inline int rt_overloaded(struct rq *rq) |
43 | { | 43 | { |
44 | return atomic_read(&rq->rd->rto_count); | 44 | return atomic_read(&rq->rd->rto_count); |
45 | } | 45 | } |
46 | 46 | ||
47 | static inline void rt_set_overload(struct rq *rq) | 47 | static inline void rt_set_overload(struct rq *rq) |
48 | { | 48 | { |
49 | if (!rq->online) | 49 | if (!rq->online) |
50 | return; | 50 | return; |
51 | 51 | ||
52 | cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); | 52 | cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); |
53 | /* | 53 | /* |
54 | * Make sure the mask is visible before we set | 54 | * Make sure the mask is visible before we set |
55 | * the overload count. That is checked to determine | 55 | * the overload count. That is checked to determine |
56 | * if we should look at the mask. It would be a shame | 56 | * if we should look at the mask. It would be a shame |
57 | * if we looked at the mask, but the mask was not | 57 | * if we looked at the mask, but the mask was not |
58 | * updated yet. | 58 | * updated yet. |
59 | */ | 59 | */ |
60 | wmb(); | 60 | wmb(); |
61 | atomic_inc(&rq->rd->rto_count); | 61 | atomic_inc(&rq->rd->rto_count); |
62 | } | 62 | } |
63 | 63 | ||
64 | static inline void rt_clear_overload(struct rq *rq) | 64 | static inline void rt_clear_overload(struct rq *rq) |
65 | { | 65 | { |
66 | if (!rq->online) | 66 | if (!rq->online) |
67 | return; | 67 | return; |
68 | 68 | ||
69 | /* the order here really doesn't matter */ | 69 | /* the order here really doesn't matter */ |
70 | atomic_dec(&rq->rd->rto_count); | 70 | atomic_dec(&rq->rd->rto_count); |
71 | cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); | 71 | cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); |
72 | } | 72 | } |
73 | 73 | ||
74 | static void update_rt_migration(struct rt_rq *rt_rq) | 74 | static void update_rt_migration(struct rt_rq *rt_rq) |
75 | { | 75 | { |
76 | if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) { | 76 | if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) { |
77 | if (!rt_rq->overloaded) { | 77 | if (!rt_rq->overloaded) { |
78 | rt_set_overload(rq_of_rt_rq(rt_rq)); | 78 | rt_set_overload(rq_of_rt_rq(rt_rq)); |
79 | rt_rq->overloaded = 1; | 79 | rt_rq->overloaded = 1; |
80 | } | 80 | } |
81 | } else if (rt_rq->overloaded) { | 81 | } else if (rt_rq->overloaded) { |
82 | rt_clear_overload(rq_of_rt_rq(rt_rq)); | 82 | rt_clear_overload(rq_of_rt_rq(rt_rq)); |
83 | rt_rq->overloaded = 0; | 83 | rt_rq->overloaded = 0; |
84 | } | 84 | } |
85 | } | 85 | } |
86 | 86 | ||
87 | static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 87 | static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
88 | { | 88 | { |
89 | if (rt_se->nr_cpus_allowed > 1) | 89 | if (rt_se->nr_cpus_allowed > 1) |
90 | rt_rq->rt_nr_migratory++; | 90 | rt_rq->rt_nr_migratory++; |
91 | 91 | ||
92 | update_rt_migration(rt_rq); | 92 | update_rt_migration(rt_rq); |
93 | } | 93 | } |
94 | 94 | ||
95 | static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 95 | static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
96 | { | 96 | { |
97 | if (rt_se->nr_cpus_allowed > 1) | 97 | if (rt_se->nr_cpus_allowed > 1) |
98 | rt_rq->rt_nr_migratory--; | 98 | rt_rq->rt_nr_migratory--; |
99 | 99 | ||
100 | update_rt_migration(rt_rq); | 100 | update_rt_migration(rt_rq); |
101 | } | 101 | } |
102 | 102 | ||
103 | static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) | 103 | static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) |
104 | { | 104 | { |
105 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); | 105 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); |
106 | plist_node_init(&p->pushable_tasks, p->prio); | 106 | plist_node_init(&p->pushable_tasks, p->prio); |
107 | plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); | 107 | plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); |
108 | } | 108 | } |
109 | 109 | ||
110 | static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) | 110 | static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) |
111 | { | 111 | { |
112 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); | 112 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); |
113 | } | 113 | } |
114 | 114 | ||
115 | #else | 115 | #else |
116 | 116 | ||
117 | static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) | 117 | static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) |
118 | { | 118 | { |
119 | } | 119 | } |
120 | 120 | ||
121 | static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) | 121 | static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) |
122 | { | 122 | { |
123 | } | 123 | } |
124 | 124 | ||
125 | static inline | 125 | static inline |
126 | void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 126 | void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
127 | { | 127 | { |
128 | } | 128 | } |
129 | 129 | ||
130 | static inline | 130 | static inline |
131 | void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 131 | void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
132 | { | 132 | { |
133 | } | 133 | } |
134 | 134 | ||
135 | #endif /* CONFIG_SMP */ | 135 | #endif /* CONFIG_SMP */ |
136 | 136 | ||
137 | static inline int on_rt_rq(struct sched_rt_entity *rt_se) | 137 | static inline int on_rt_rq(struct sched_rt_entity *rt_se) |
138 | { | 138 | { |
139 | return !list_empty(&rt_se->run_list); | 139 | return !list_empty(&rt_se->run_list); |
140 | } | 140 | } |
141 | 141 | ||
142 | #ifdef CONFIG_RT_GROUP_SCHED | 142 | #ifdef CONFIG_RT_GROUP_SCHED |
143 | 143 | ||
144 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 144 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
145 | { | 145 | { |
146 | if (!rt_rq->tg) | 146 | if (!rt_rq->tg) |
147 | return RUNTIME_INF; | 147 | return RUNTIME_INF; |
148 | 148 | ||
149 | return rt_rq->rt_runtime; | 149 | return rt_rq->rt_runtime; |
150 | } | 150 | } |
151 | 151 | ||
152 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) | 152 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) |
153 | { | 153 | { |
154 | return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); | 154 | return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); |
155 | } | 155 | } |
156 | 156 | ||
157 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 157 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
158 | list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) | 158 | list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) |
159 | 159 | ||
160 | #define for_each_sched_rt_entity(rt_se) \ | 160 | #define for_each_sched_rt_entity(rt_se) \ |
161 | for (; rt_se; rt_se = rt_se->parent) | 161 | for (; rt_se; rt_se = rt_se->parent) |
162 | 162 | ||
163 | static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | 163 | static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) |
164 | { | 164 | { |
165 | return rt_se->my_q; | 165 | return rt_se->my_q; |
166 | } | 166 | } |
167 | 167 | ||
168 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se); | 168 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se); |
169 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); | 169 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); |
170 | 170 | ||
171 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | 171 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
172 | { | 172 | { |
173 | struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; | 173 | struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; |
174 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | 174 | struct sched_rt_entity *rt_se = rt_rq->rt_se; |
175 | 175 | ||
176 | if (rt_rq->rt_nr_running) { | 176 | if (rt_rq->rt_nr_running) { |
177 | if (rt_se && !on_rt_rq(rt_se)) | 177 | if (rt_se && !on_rt_rq(rt_se)) |
178 | enqueue_rt_entity(rt_se); | 178 | enqueue_rt_entity(rt_se); |
179 | if (rt_rq->highest_prio.curr < curr->prio) | 179 | if (rt_rq->highest_prio.curr < curr->prio) |
180 | resched_task(curr); | 180 | resched_task(curr); |
181 | } | 181 | } |
182 | } | 182 | } |
183 | 183 | ||
184 | static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | 184 | static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
185 | { | 185 | { |
186 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | 186 | struct sched_rt_entity *rt_se = rt_rq->rt_se; |
187 | 187 | ||
188 | if (rt_se && on_rt_rq(rt_se)) | 188 | if (rt_se && on_rt_rq(rt_se)) |
189 | dequeue_rt_entity(rt_se); | 189 | dequeue_rt_entity(rt_se); |
190 | } | 190 | } |
191 | 191 | ||
192 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | 192 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) |
193 | { | 193 | { |
194 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; | 194 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; |
195 | } | 195 | } |
196 | 196 | ||
197 | static int rt_se_boosted(struct sched_rt_entity *rt_se) | 197 | static int rt_se_boosted(struct sched_rt_entity *rt_se) |
198 | { | 198 | { |
199 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | 199 | struct rt_rq *rt_rq = group_rt_rq(rt_se); |
200 | struct task_struct *p; | 200 | struct task_struct *p; |
201 | 201 | ||
202 | if (rt_rq) | 202 | if (rt_rq) |
203 | return !!rt_rq->rt_nr_boosted; | 203 | return !!rt_rq->rt_nr_boosted; |
204 | 204 | ||
205 | p = rt_task_of(rt_se); | 205 | p = rt_task_of(rt_se); |
206 | return p->prio != p->normal_prio; | 206 | return p->prio != p->normal_prio; |
207 | } | 207 | } |
208 | 208 | ||
209 | #ifdef CONFIG_SMP | 209 | #ifdef CONFIG_SMP |
210 | static inline const struct cpumask *sched_rt_period_mask(void) | 210 | static inline const struct cpumask *sched_rt_period_mask(void) |
211 | { | 211 | { |
212 | return cpu_rq(smp_processor_id())->rd->span; | 212 | return cpu_rq(smp_processor_id())->rd->span; |
213 | } | 213 | } |
214 | #else | 214 | #else |
215 | static inline const struct cpumask *sched_rt_period_mask(void) | 215 | static inline const struct cpumask *sched_rt_period_mask(void) |
216 | { | 216 | { |
217 | return cpu_online_mask; | 217 | return cpu_online_mask; |
218 | } | 218 | } |
219 | #endif | 219 | #endif |
220 | 220 | ||
221 | static inline | 221 | static inline |
222 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | 222 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) |
223 | { | 223 | { |
224 | return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; | 224 | return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; |
225 | } | 225 | } |
226 | 226 | ||
227 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | 227 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) |
228 | { | 228 | { |
229 | return &rt_rq->tg->rt_bandwidth; | 229 | return &rt_rq->tg->rt_bandwidth; |
230 | } | 230 | } |
231 | 231 | ||
232 | #else /* !CONFIG_RT_GROUP_SCHED */ | 232 | #else /* !CONFIG_RT_GROUP_SCHED */ |
233 | 233 | ||
234 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 234 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
235 | { | 235 | { |
236 | return rt_rq->rt_runtime; | 236 | return rt_rq->rt_runtime; |
237 | } | 237 | } |
238 | 238 | ||
239 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) | 239 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) |
240 | { | 240 | { |
241 | return ktime_to_ns(def_rt_bandwidth.rt_period); | 241 | return ktime_to_ns(def_rt_bandwidth.rt_period); |
242 | } | 242 | } |
243 | 243 | ||
244 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 244 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
245 | for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) | 245 | for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) |
246 | 246 | ||
247 | #define for_each_sched_rt_entity(rt_se) \ | 247 | #define for_each_sched_rt_entity(rt_se) \ |
248 | for (; rt_se; rt_se = NULL) | 248 | for (; rt_se; rt_se = NULL) |
249 | 249 | ||
250 | static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | 250 | static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) |
251 | { | 251 | { |
252 | return NULL; | 252 | return NULL; |
253 | } | 253 | } |
254 | 254 | ||
255 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | 255 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
256 | { | 256 | { |
257 | if (rt_rq->rt_nr_running) | 257 | if (rt_rq->rt_nr_running) |
258 | resched_task(rq_of_rt_rq(rt_rq)->curr); | 258 | resched_task(rq_of_rt_rq(rt_rq)->curr); |
259 | } | 259 | } |
260 | 260 | ||
261 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | 261 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
262 | { | 262 | { |
263 | } | 263 | } |
264 | 264 | ||
265 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | 265 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) |
266 | { | 266 | { |
267 | return rt_rq->rt_throttled; | 267 | return rt_rq->rt_throttled; |
268 | } | 268 | } |
269 | 269 | ||
270 | static inline const struct cpumask *sched_rt_period_mask(void) | 270 | static inline const struct cpumask *sched_rt_period_mask(void) |
271 | { | 271 | { |
272 | return cpu_online_mask; | 272 | return cpu_online_mask; |
273 | } | 273 | } |
274 | 274 | ||
275 | static inline | 275 | static inline |
276 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | 276 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) |
277 | { | 277 | { |
278 | return &cpu_rq(cpu)->rt; | 278 | return &cpu_rq(cpu)->rt; |
279 | } | 279 | } |
280 | 280 | ||
281 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | 281 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) |
282 | { | 282 | { |
283 | return &def_rt_bandwidth; | 283 | return &def_rt_bandwidth; |
284 | } | 284 | } |
285 | 285 | ||
286 | #endif /* CONFIG_RT_GROUP_SCHED */ | 286 | #endif /* CONFIG_RT_GROUP_SCHED */ |
287 | 287 | ||
288 | #ifdef CONFIG_SMP | 288 | #ifdef CONFIG_SMP |
289 | /* | 289 | /* |
290 | * We ran out of runtime, see if we can borrow some from our neighbours. | 290 | * We ran out of runtime, see if we can borrow some from our neighbours. |
291 | */ | 291 | */ |
292 | static int do_balance_runtime(struct rt_rq *rt_rq) | 292 | static int do_balance_runtime(struct rt_rq *rt_rq) |
293 | { | 293 | { |
294 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | 294 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); |
295 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | 295 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; |
296 | int i, weight, more = 0; | 296 | int i, weight, more = 0; |
297 | u64 rt_period; | 297 | u64 rt_period; |
298 | 298 | ||
299 | weight = cpumask_weight(rd->span); | 299 | weight = cpumask_weight(rd->span); |
300 | 300 | ||
301 | spin_lock(&rt_b->rt_runtime_lock); | 301 | spin_lock(&rt_b->rt_runtime_lock); |
302 | rt_period = ktime_to_ns(rt_b->rt_period); | 302 | rt_period = ktime_to_ns(rt_b->rt_period); |
303 | for_each_cpu(i, rd->span) { | 303 | for_each_cpu(i, rd->span) { |
304 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | 304 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); |
305 | s64 diff; | 305 | s64 diff; |
306 | 306 | ||
307 | if (iter == rt_rq) | 307 | if (iter == rt_rq) |
308 | continue; | 308 | continue; |
309 | 309 | ||
310 | spin_lock(&iter->rt_runtime_lock); | 310 | spin_lock(&iter->rt_runtime_lock); |
311 | /* | 311 | /* |
312 | * Either all rqs have inf runtime and there's nothing to steal | 312 | * Either all rqs have inf runtime and there's nothing to steal |
313 | * or __disable_runtime() below sets a specific rq to inf to | 313 | * or __disable_runtime() below sets a specific rq to inf to |
314 | * indicate its been disabled and disalow stealing. | 314 | * indicate its been disabled and disalow stealing. |
315 | */ | 315 | */ |
316 | if (iter->rt_runtime == RUNTIME_INF) | 316 | if (iter->rt_runtime == RUNTIME_INF) |
317 | goto next; | 317 | goto next; |
318 | 318 | ||
319 | /* | 319 | /* |
320 | * From runqueues with spare time, take 1/n part of their | 320 | * From runqueues with spare time, take 1/n part of their |
321 | * spare time, but no more than our period. | 321 | * spare time, but no more than our period. |
322 | */ | 322 | */ |
323 | diff = iter->rt_runtime - iter->rt_time; | 323 | diff = iter->rt_runtime - iter->rt_time; |
324 | if (diff > 0) { | 324 | if (diff > 0) { |
325 | diff = div_u64((u64)diff, weight); | 325 | diff = div_u64((u64)diff, weight); |
326 | if (rt_rq->rt_runtime + diff > rt_period) | 326 | if (rt_rq->rt_runtime + diff > rt_period) |
327 | diff = rt_period - rt_rq->rt_runtime; | 327 | diff = rt_period - rt_rq->rt_runtime; |
328 | iter->rt_runtime -= diff; | 328 | iter->rt_runtime -= diff; |
329 | rt_rq->rt_runtime += diff; | 329 | rt_rq->rt_runtime += diff; |
330 | more = 1; | 330 | more = 1; |
331 | if (rt_rq->rt_runtime == rt_period) { | 331 | if (rt_rq->rt_runtime == rt_period) { |
332 | spin_unlock(&iter->rt_runtime_lock); | 332 | spin_unlock(&iter->rt_runtime_lock); |
333 | break; | 333 | break; |
334 | } | 334 | } |
335 | } | 335 | } |
336 | next: | 336 | next: |
337 | spin_unlock(&iter->rt_runtime_lock); | 337 | spin_unlock(&iter->rt_runtime_lock); |
338 | } | 338 | } |
339 | spin_unlock(&rt_b->rt_runtime_lock); | 339 | spin_unlock(&rt_b->rt_runtime_lock); |
340 | 340 | ||
341 | return more; | 341 | return more; |
342 | } | 342 | } |
343 | 343 | ||
344 | /* | 344 | /* |
345 | * Ensure this RQ takes back all the runtime it lend to its neighbours. | 345 | * Ensure this RQ takes back all the runtime it lend to its neighbours. |
346 | */ | 346 | */ |
347 | static void __disable_runtime(struct rq *rq) | 347 | static void __disable_runtime(struct rq *rq) |
348 | { | 348 | { |
349 | struct root_domain *rd = rq->rd; | 349 | struct root_domain *rd = rq->rd; |
350 | struct rt_rq *rt_rq; | 350 | struct rt_rq *rt_rq; |
351 | 351 | ||
352 | if (unlikely(!scheduler_running)) | 352 | if (unlikely(!scheduler_running)) |
353 | return; | 353 | return; |
354 | 354 | ||
355 | for_each_leaf_rt_rq(rt_rq, rq) { | 355 | for_each_leaf_rt_rq(rt_rq, rq) { |
356 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | 356 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); |
357 | s64 want; | 357 | s64 want; |
358 | int i; | 358 | int i; |
359 | 359 | ||
360 | spin_lock(&rt_b->rt_runtime_lock); | 360 | spin_lock(&rt_b->rt_runtime_lock); |
361 | spin_lock(&rt_rq->rt_runtime_lock); | 361 | spin_lock(&rt_rq->rt_runtime_lock); |
362 | /* | 362 | /* |
363 | * Either we're all inf and nobody needs to borrow, or we're | 363 | * Either we're all inf and nobody needs to borrow, or we're |
364 | * already disabled and thus have nothing to do, or we have | 364 | * already disabled and thus have nothing to do, or we have |
365 | * exactly the right amount of runtime to take out. | 365 | * exactly the right amount of runtime to take out. |
366 | */ | 366 | */ |
367 | if (rt_rq->rt_runtime == RUNTIME_INF || | 367 | if (rt_rq->rt_runtime == RUNTIME_INF || |
368 | rt_rq->rt_runtime == rt_b->rt_runtime) | 368 | rt_rq->rt_runtime == rt_b->rt_runtime) |
369 | goto balanced; | 369 | goto balanced; |
370 | spin_unlock(&rt_rq->rt_runtime_lock); | 370 | spin_unlock(&rt_rq->rt_runtime_lock); |
371 | 371 | ||
372 | /* | 372 | /* |
373 | * Calculate the difference between what we started out with | 373 | * Calculate the difference between what we started out with |
374 | * and what we current have, that's the amount of runtime | 374 | * and what we current have, that's the amount of runtime |
375 | * we lend and now have to reclaim. | 375 | * we lend and now have to reclaim. |
376 | */ | 376 | */ |
377 | want = rt_b->rt_runtime - rt_rq->rt_runtime; | 377 | want = rt_b->rt_runtime - rt_rq->rt_runtime; |
378 | 378 | ||
379 | /* | 379 | /* |
380 | * Greedy reclaim, take back as much as we can. | 380 | * Greedy reclaim, take back as much as we can. |
381 | */ | 381 | */ |
382 | for_each_cpu(i, rd->span) { | 382 | for_each_cpu(i, rd->span) { |
383 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | 383 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); |
384 | s64 diff; | 384 | s64 diff; |
385 | 385 | ||
386 | /* | 386 | /* |
387 | * Can't reclaim from ourselves or disabled runqueues. | 387 | * Can't reclaim from ourselves or disabled runqueues. |
388 | */ | 388 | */ |
389 | if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) | 389 | if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) |
390 | continue; | 390 | continue; |
391 | 391 | ||
392 | spin_lock(&iter->rt_runtime_lock); | 392 | spin_lock(&iter->rt_runtime_lock); |
393 | if (want > 0) { | 393 | if (want > 0) { |
394 | diff = min_t(s64, iter->rt_runtime, want); | 394 | diff = min_t(s64, iter->rt_runtime, want); |
395 | iter->rt_runtime -= diff; | 395 | iter->rt_runtime -= diff; |
396 | want -= diff; | 396 | want -= diff; |
397 | } else { | 397 | } else { |
398 | iter->rt_runtime -= want; | 398 | iter->rt_runtime -= want; |
399 | want -= want; | 399 | want -= want; |
400 | } | 400 | } |
401 | spin_unlock(&iter->rt_runtime_lock); | 401 | spin_unlock(&iter->rt_runtime_lock); |
402 | 402 | ||
403 | if (!want) | 403 | if (!want) |
404 | break; | 404 | break; |
405 | } | 405 | } |
406 | 406 | ||
407 | spin_lock(&rt_rq->rt_runtime_lock); | 407 | spin_lock(&rt_rq->rt_runtime_lock); |
408 | /* | 408 | /* |
409 | * We cannot be left wanting - that would mean some runtime | 409 | * We cannot be left wanting - that would mean some runtime |
410 | * leaked out of the system. | 410 | * leaked out of the system. |
411 | */ | 411 | */ |
412 | BUG_ON(want); | 412 | BUG_ON(want); |
413 | balanced: | 413 | balanced: |
414 | /* | 414 | /* |
415 | * Disable all the borrow logic by pretending we have inf | 415 | * Disable all the borrow logic by pretending we have inf |
416 | * runtime - in which case borrowing doesn't make sense. | 416 | * runtime - in which case borrowing doesn't make sense. |
417 | */ | 417 | */ |
418 | rt_rq->rt_runtime = RUNTIME_INF; | 418 | rt_rq->rt_runtime = RUNTIME_INF; |
419 | spin_unlock(&rt_rq->rt_runtime_lock); | 419 | spin_unlock(&rt_rq->rt_runtime_lock); |
420 | spin_unlock(&rt_b->rt_runtime_lock); | 420 | spin_unlock(&rt_b->rt_runtime_lock); |
421 | } | 421 | } |
422 | } | 422 | } |
423 | 423 | ||
424 | static void disable_runtime(struct rq *rq) | 424 | static void disable_runtime(struct rq *rq) |
425 | { | 425 | { |
426 | unsigned long flags; | 426 | unsigned long flags; |
427 | 427 | ||
428 | spin_lock_irqsave(&rq->lock, flags); | 428 | spin_lock_irqsave(&rq->lock, flags); |
429 | __disable_runtime(rq); | 429 | __disable_runtime(rq); |
430 | spin_unlock_irqrestore(&rq->lock, flags); | 430 | spin_unlock_irqrestore(&rq->lock, flags); |
431 | } | 431 | } |
432 | 432 | ||
433 | static void __enable_runtime(struct rq *rq) | 433 | static void __enable_runtime(struct rq *rq) |
434 | { | 434 | { |
435 | struct rt_rq *rt_rq; | 435 | struct rt_rq *rt_rq; |
436 | 436 | ||
437 | if (unlikely(!scheduler_running)) | 437 | if (unlikely(!scheduler_running)) |
438 | return; | 438 | return; |
439 | 439 | ||
440 | /* | 440 | /* |
441 | * Reset each runqueue's bandwidth settings | 441 | * Reset each runqueue's bandwidth settings |
442 | */ | 442 | */ |
443 | for_each_leaf_rt_rq(rt_rq, rq) { | 443 | for_each_leaf_rt_rq(rt_rq, rq) { |
444 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | 444 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); |
445 | 445 | ||
446 | spin_lock(&rt_b->rt_runtime_lock); | 446 | spin_lock(&rt_b->rt_runtime_lock); |
447 | spin_lock(&rt_rq->rt_runtime_lock); | 447 | spin_lock(&rt_rq->rt_runtime_lock); |
448 | rt_rq->rt_runtime = rt_b->rt_runtime; | 448 | rt_rq->rt_runtime = rt_b->rt_runtime; |
449 | rt_rq->rt_time = 0; | 449 | rt_rq->rt_time = 0; |
450 | rt_rq->rt_throttled = 0; | 450 | rt_rq->rt_throttled = 0; |
451 | spin_unlock(&rt_rq->rt_runtime_lock); | 451 | spin_unlock(&rt_rq->rt_runtime_lock); |
452 | spin_unlock(&rt_b->rt_runtime_lock); | 452 | spin_unlock(&rt_b->rt_runtime_lock); |
453 | } | 453 | } |
454 | } | 454 | } |
455 | 455 | ||
456 | static void enable_runtime(struct rq *rq) | 456 | static void enable_runtime(struct rq *rq) |
457 | { | 457 | { |
458 | unsigned long flags; | 458 | unsigned long flags; |
459 | 459 | ||
460 | spin_lock_irqsave(&rq->lock, flags); | 460 | spin_lock_irqsave(&rq->lock, flags); |
461 | __enable_runtime(rq); | 461 | __enable_runtime(rq); |
462 | spin_unlock_irqrestore(&rq->lock, flags); | 462 | spin_unlock_irqrestore(&rq->lock, flags); |
463 | } | 463 | } |
464 | 464 | ||
465 | static int balance_runtime(struct rt_rq *rt_rq) | 465 | static int balance_runtime(struct rt_rq *rt_rq) |
466 | { | 466 | { |
467 | int more = 0; | 467 | int more = 0; |
468 | 468 | ||
469 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | 469 | if (rt_rq->rt_time > rt_rq->rt_runtime) { |
470 | spin_unlock(&rt_rq->rt_runtime_lock); | 470 | spin_unlock(&rt_rq->rt_runtime_lock); |
471 | more = do_balance_runtime(rt_rq); | 471 | more = do_balance_runtime(rt_rq); |
472 | spin_lock(&rt_rq->rt_runtime_lock); | 472 | spin_lock(&rt_rq->rt_runtime_lock); |
473 | } | 473 | } |
474 | 474 | ||
475 | return more; | 475 | return more; |
476 | } | 476 | } |
477 | #else /* !CONFIG_SMP */ | 477 | #else /* !CONFIG_SMP */ |
478 | static inline int balance_runtime(struct rt_rq *rt_rq) | 478 | static inline int balance_runtime(struct rt_rq *rt_rq) |
479 | { | 479 | { |
480 | return 0; | 480 | return 0; |
481 | } | 481 | } |
482 | #endif /* CONFIG_SMP */ | 482 | #endif /* CONFIG_SMP */ |
483 | 483 | ||
484 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | 484 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) |
485 | { | 485 | { |
486 | int i, idle = 1; | 486 | int i, idle = 1; |
487 | const struct cpumask *span; | 487 | const struct cpumask *span; |
488 | 488 | ||
489 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | 489 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) |
490 | return 1; | 490 | return 1; |
491 | 491 | ||
492 | span = sched_rt_period_mask(); | 492 | span = sched_rt_period_mask(); |
493 | for_each_cpu(i, span) { | 493 | for_each_cpu(i, span) { |
494 | int enqueue = 0; | 494 | int enqueue = 0; |
495 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | 495 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); |
496 | struct rq *rq = rq_of_rt_rq(rt_rq); | 496 | struct rq *rq = rq_of_rt_rq(rt_rq); |
497 | 497 | ||
498 | spin_lock(&rq->lock); | 498 | spin_lock(&rq->lock); |
499 | if (rt_rq->rt_time) { | 499 | if (rt_rq->rt_time) { |
500 | u64 runtime; | 500 | u64 runtime; |
501 | 501 | ||
502 | spin_lock(&rt_rq->rt_runtime_lock); | 502 | spin_lock(&rt_rq->rt_runtime_lock); |
503 | if (rt_rq->rt_throttled) | 503 | if (rt_rq->rt_throttled) |
504 | balance_runtime(rt_rq); | 504 | balance_runtime(rt_rq); |
505 | runtime = rt_rq->rt_runtime; | 505 | runtime = rt_rq->rt_runtime; |
506 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | 506 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); |
507 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | 507 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { |
508 | rt_rq->rt_throttled = 0; | 508 | rt_rq->rt_throttled = 0; |
509 | enqueue = 1; | 509 | enqueue = 1; |
510 | } | 510 | } |
511 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | 511 | if (rt_rq->rt_time || rt_rq->rt_nr_running) |
512 | idle = 0; | 512 | idle = 0; |
513 | spin_unlock(&rt_rq->rt_runtime_lock); | 513 | spin_unlock(&rt_rq->rt_runtime_lock); |
514 | } else if (rt_rq->rt_nr_running) | 514 | } else if (rt_rq->rt_nr_running) |
515 | idle = 0; | 515 | idle = 0; |
516 | 516 | ||
517 | if (enqueue) | 517 | if (enqueue) |
518 | sched_rt_rq_enqueue(rt_rq); | 518 | sched_rt_rq_enqueue(rt_rq); |
519 | spin_unlock(&rq->lock); | 519 | spin_unlock(&rq->lock); |
520 | } | 520 | } |
521 | 521 | ||
522 | return idle; | 522 | return idle; |
523 | } | 523 | } |
524 | 524 | ||
525 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 525 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
526 | { | 526 | { |
527 | #ifdef CONFIG_RT_GROUP_SCHED | 527 | #ifdef CONFIG_RT_GROUP_SCHED |
528 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | 528 | struct rt_rq *rt_rq = group_rt_rq(rt_se); |
529 | 529 | ||
530 | if (rt_rq) | 530 | if (rt_rq) |
531 | return rt_rq->highest_prio.curr; | 531 | return rt_rq->highest_prio.curr; |
532 | #endif | 532 | #endif |
533 | 533 | ||
534 | return rt_task_of(rt_se)->prio; | 534 | return rt_task_of(rt_se)->prio; |
535 | } | 535 | } |
536 | 536 | ||
537 | static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | 537 | static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) |
538 | { | 538 | { |
539 | u64 runtime = sched_rt_runtime(rt_rq); | 539 | u64 runtime = sched_rt_runtime(rt_rq); |
540 | 540 | ||
541 | if (rt_rq->rt_throttled) | 541 | if (rt_rq->rt_throttled) |
542 | return rt_rq_throttled(rt_rq); | 542 | return rt_rq_throttled(rt_rq); |
543 | 543 | ||
544 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | 544 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) |
545 | return 0; | 545 | return 0; |
546 | 546 | ||
547 | balance_runtime(rt_rq); | 547 | balance_runtime(rt_rq); |
548 | runtime = sched_rt_runtime(rt_rq); | 548 | runtime = sched_rt_runtime(rt_rq); |
549 | if (runtime == RUNTIME_INF) | 549 | if (runtime == RUNTIME_INF) |
550 | return 0; | 550 | return 0; |
551 | 551 | ||
552 | if (rt_rq->rt_time > runtime) { | 552 | if (rt_rq->rt_time > runtime) { |
553 | rt_rq->rt_throttled = 1; | 553 | rt_rq->rt_throttled = 1; |
554 | if (rt_rq_throttled(rt_rq)) { | 554 | if (rt_rq_throttled(rt_rq)) { |
555 | sched_rt_rq_dequeue(rt_rq); | 555 | sched_rt_rq_dequeue(rt_rq); |
556 | return 1; | 556 | return 1; |
557 | } | 557 | } |
558 | } | 558 | } |
559 | 559 | ||
560 | return 0; | 560 | return 0; |
561 | } | 561 | } |
562 | 562 | ||
563 | /* | 563 | /* |
564 | * Update the current task's runtime statistics. Skip current tasks that | 564 | * Update the current task's runtime statistics. Skip current tasks that |
565 | * are not in our scheduling class. | 565 | * are not in our scheduling class. |
566 | */ | 566 | */ |
567 | static void update_curr_rt(struct rq *rq) | 567 | static void update_curr_rt(struct rq *rq) |
568 | { | 568 | { |
569 | struct task_struct *curr = rq->curr; | 569 | struct task_struct *curr = rq->curr; |
570 | struct sched_rt_entity *rt_se = &curr->rt; | 570 | struct sched_rt_entity *rt_se = &curr->rt; |
571 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 571 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
572 | u64 delta_exec; | 572 | u64 delta_exec; |
573 | 573 | ||
574 | if (!task_has_rt_policy(curr)) | 574 | if (!task_has_rt_policy(curr)) |
575 | return; | 575 | return; |
576 | 576 | ||
577 | delta_exec = rq->clock - curr->se.exec_start; | 577 | delta_exec = rq->clock - curr->se.exec_start; |
578 | if (unlikely((s64)delta_exec < 0)) | 578 | if (unlikely((s64)delta_exec < 0)) |
579 | delta_exec = 0; | 579 | delta_exec = 0; |
580 | 580 | ||
581 | schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); | 581 | schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); |
582 | 582 | ||
583 | curr->se.sum_exec_runtime += delta_exec; | 583 | curr->se.sum_exec_runtime += delta_exec; |
584 | account_group_exec_runtime(curr, delta_exec); | 584 | account_group_exec_runtime(curr, delta_exec); |
585 | 585 | ||
586 | curr->se.exec_start = rq->clock; | 586 | curr->se.exec_start = rq->clock; |
587 | cpuacct_charge(curr, delta_exec); | 587 | cpuacct_charge(curr, delta_exec); |
588 | 588 | ||
589 | if (!rt_bandwidth_enabled()) | 589 | if (!rt_bandwidth_enabled()) |
590 | return; | 590 | return; |
591 | 591 | ||
592 | for_each_sched_rt_entity(rt_se) { | 592 | for_each_sched_rt_entity(rt_se) { |
593 | rt_rq = rt_rq_of_se(rt_se); | 593 | rt_rq = rt_rq_of_se(rt_se); |
594 | 594 | ||
595 | if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { | 595 | if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { |
596 | spin_lock(&rt_rq->rt_runtime_lock); | 596 | spin_lock(&rt_rq->rt_runtime_lock); |
597 | rt_rq->rt_time += delta_exec; | 597 | rt_rq->rt_time += delta_exec; |
598 | if (sched_rt_runtime_exceeded(rt_rq)) | 598 | if (sched_rt_runtime_exceeded(rt_rq)) |
599 | resched_task(curr); | 599 | resched_task(curr); |
600 | spin_unlock(&rt_rq->rt_runtime_lock); | 600 | spin_unlock(&rt_rq->rt_runtime_lock); |
601 | } | 601 | } |
602 | } | 602 | } |
603 | } | 603 | } |
604 | 604 | ||
605 | #if defined CONFIG_SMP | 605 | #if defined CONFIG_SMP |
606 | 606 | ||
607 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu); | 607 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu); |
608 | 608 | ||
609 | static inline int next_prio(struct rq *rq) | 609 | static inline int next_prio(struct rq *rq) |
610 | { | 610 | { |
611 | struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu); | 611 | struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu); |
612 | 612 | ||
613 | if (next && rt_prio(next->prio)) | 613 | if (next && rt_prio(next->prio)) |
614 | return next->prio; | 614 | return next->prio; |
615 | else | 615 | else |
616 | return MAX_RT_PRIO; | 616 | return MAX_RT_PRIO; |
617 | } | 617 | } |
618 | 618 | ||
619 | static void | 619 | static void |
620 | inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) | 620 | inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) |
621 | { | 621 | { |
622 | struct rq *rq = rq_of_rt_rq(rt_rq); | 622 | struct rq *rq = rq_of_rt_rq(rt_rq); |
623 | 623 | ||
624 | if (prio < prev_prio) { | 624 | if (prio < prev_prio) { |
625 | 625 | ||
626 | /* | 626 | /* |
627 | * If the new task is higher in priority than anything on the | 627 | * If the new task is higher in priority than anything on the |
628 | * run-queue, we know that the previous high becomes our | 628 | * run-queue, we know that the previous high becomes our |
629 | * next-highest. | 629 | * next-highest. |
630 | */ | 630 | */ |
631 | rt_rq->highest_prio.next = prev_prio; | 631 | rt_rq->highest_prio.next = prev_prio; |
632 | 632 | ||
633 | if (rq->online) | 633 | if (rq->online) |
634 | cpupri_set(&rq->rd->cpupri, rq->cpu, prio); | 634 | cpupri_set(&rq->rd->cpupri, rq->cpu, prio); |
635 | 635 | ||
636 | } else if (prio == rt_rq->highest_prio.curr) | 636 | } else if (prio == rt_rq->highest_prio.curr) |
637 | /* | 637 | /* |
638 | * If the next task is equal in priority to the highest on | 638 | * If the next task is equal in priority to the highest on |
639 | * the run-queue, then we implicitly know that the next highest | 639 | * the run-queue, then we implicitly know that the next highest |
640 | * task cannot be any lower than current | 640 | * task cannot be any lower than current |
641 | */ | 641 | */ |
642 | rt_rq->highest_prio.next = prio; | 642 | rt_rq->highest_prio.next = prio; |
643 | else if (prio < rt_rq->highest_prio.next) | 643 | else if (prio < rt_rq->highest_prio.next) |
644 | /* | 644 | /* |
645 | * Otherwise, we need to recompute next-highest | 645 | * Otherwise, we need to recompute next-highest |
646 | */ | 646 | */ |
647 | rt_rq->highest_prio.next = next_prio(rq); | 647 | rt_rq->highest_prio.next = next_prio(rq); |
648 | } | 648 | } |
649 | 649 | ||
650 | static void | 650 | static void |
651 | dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) | 651 | dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) |
652 | { | 652 | { |
653 | struct rq *rq = rq_of_rt_rq(rt_rq); | 653 | struct rq *rq = rq_of_rt_rq(rt_rq); |
654 | 654 | ||
655 | if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next)) | 655 | if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next)) |
656 | rt_rq->highest_prio.next = next_prio(rq); | 656 | rt_rq->highest_prio.next = next_prio(rq); |
657 | 657 | ||
658 | if (rq->online && rt_rq->highest_prio.curr != prev_prio) | 658 | if (rq->online && rt_rq->highest_prio.curr != prev_prio) |
659 | cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); | 659 | cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); |
660 | } | 660 | } |
661 | 661 | ||
662 | #else /* CONFIG_SMP */ | 662 | #else /* CONFIG_SMP */ |
663 | 663 | ||
664 | static inline | 664 | static inline |
665 | void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} | 665 | void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} |
666 | static inline | 666 | static inline |
667 | void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} | 667 | void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} |
668 | 668 | ||
669 | #endif /* CONFIG_SMP */ | 669 | #endif /* CONFIG_SMP */ |
670 | 670 | ||
671 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 671 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
672 | static void | 672 | static void |
673 | inc_rt_prio(struct rt_rq *rt_rq, int prio) | 673 | inc_rt_prio(struct rt_rq *rt_rq, int prio) |
674 | { | 674 | { |
675 | int prev_prio = rt_rq->highest_prio.curr; | 675 | int prev_prio = rt_rq->highest_prio.curr; |
676 | 676 | ||
677 | if (prio < prev_prio) | 677 | if (prio < prev_prio) |
678 | rt_rq->highest_prio.curr = prio; | 678 | rt_rq->highest_prio.curr = prio; |
679 | 679 | ||
680 | inc_rt_prio_smp(rt_rq, prio, prev_prio); | 680 | inc_rt_prio_smp(rt_rq, prio, prev_prio); |
681 | } | 681 | } |
682 | 682 | ||
683 | static void | 683 | static void |
684 | dec_rt_prio(struct rt_rq *rt_rq, int prio) | 684 | dec_rt_prio(struct rt_rq *rt_rq, int prio) |
685 | { | 685 | { |
686 | int prev_prio = rt_rq->highest_prio.curr; | 686 | int prev_prio = rt_rq->highest_prio.curr; |
687 | 687 | ||
688 | if (rt_rq->rt_nr_running) { | 688 | if (rt_rq->rt_nr_running) { |
689 | 689 | ||
690 | WARN_ON(prio < prev_prio); | 690 | WARN_ON(prio < prev_prio); |
691 | 691 | ||
692 | /* | 692 | /* |
693 | * This may have been our highest task, and therefore | 693 | * This may have been our highest task, and therefore |
694 | * we may have some recomputation to do | 694 | * we may have some recomputation to do |
695 | */ | 695 | */ |
696 | if (prio == prev_prio) { | 696 | if (prio == prev_prio) { |
697 | struct rt_prio_array *array = &rt_rq->active; | 697 | struct rt_prio_array *array = &rt_rq->active; |
698 | 698 | ||
699 | rt_rq->highest_prio.curr = | 699 | rt_rq->highest_prio.curr = |
700 | sched_find_first_bit(array->bitmap); | 700 | sched_find_first_bit(array->bitmap); |
701 | } | 701 | } |
702 | 702 | ||
703 | } else | 703 | } else |
704 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | 704 | rt_rq->highest_prio.curr = MAX_RT_PRIO; |
705 | 705 | ||
706 | dec_rt_prio_smp(rt_rq, prio, prev_prio); | 706 | dec_rt_prio_smp(rt_rq, prio, prev_prio); |
707 | } | 707 | } |
708 | 708 | ||
709 | #else | 709 | #else |
710 | 710 | ||
711 | static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} | 711 | static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} |
712 | static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} | 712 | static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} |
713 | 713 | ||
714 | #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ | 714 | #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ |
715 | 715 | ||
716 | #ifdef CONFIG_RT_GROUP_SCHED | 716 | #ifdef CONFIG_RT_GROUP_SCHED |
717 | 717 | ||
718 | static void | 718 | static void |
719 | inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 719 | inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
720 | { | 720 | { |
721 | if (rt_se_boosted(rt_se)) | 721 | if (rt_se_boosted(rt_se)) |
722 | rt_rq->rt_nr_boosted++; | 722 | rt_rq->rt_nr_boosted++; |
723 | 723 | ||
724 | if (rt_rq->tg) | 724 | if (rt_rq->tg) |
725 | start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); | 725 | start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); |
726 | } | 726 | } |
727 | 727 | ||
728 | static void | 728 | static void |
729 | dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 729 | dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
730 | { | 730 | { |
731 | if (rt_se_boosted(rt_se)) | 731 | if (rt_se_boosted(rt_se)) |
732 | rt_rq->rt_nr_boosted--; | 732 | rt_rq->rt_nr_boosted--; |
733 | 733 | ||
734 | WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); | 734 | WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); |
735 | } | 735 | } |
736 | 736 | ||
737 | #else /* CONFIG_RT_GROUP_SCHED */ | 737 | #else /* CONFIG_RT_GROUP_SCHED */ |
738 | 738 | ||
739 | static void | 739 | static void |
740 | inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 740 | inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
741 | { | 741 | { |
742 | start_rt_bandwidth(&def_rt_bandwidth); | 742 | start_rt_bandwidth(&def_rt_bandwidth); |
743 | } | 743 | } |
744 | 744 | ||
745 | static inline | 745 | static inline |
746 | void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} | 746 | void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} |
747 | 747 | ||
748 | #endif /* CONFIG_RT_GROUP_SCHED */ | 748 | #endif /* CONFIG_RT_GROUP_SCHED */ |
749 | 749 | ||
750 | static inline | 750 | static inline |
751 | void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 751 | void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
752 | { | 752 | { |
753 | int prio = rt_se_prio(rt_se); | 753 | int prio = rt_se_prio(rt_se); |
754 | 754 | ||
755 | WARN_ON(!rt_prio(prio)); | 755 | WARN_ON(!rt_prio(prio)); |
756 | rt_rq->rt_nr_running++; | 756 | rt_rq->rt_nr_running++; |
757 | 757 | ||
758 | inc_rt_prio(rt_rq, prio); | 758 | inc_rt_prio(rt_rq, prio); |
759 | inc_rt_migration(rt_se, rt_rq); | 759 | inc_rt_migration(rt_se, rt_rq); |
760 | inc_rt_group(rt_se, rt_rq); | 760 | inc_rt_group(rt_se, rt_rq); |
761 | } | 761 | } |
762 | 762 | ||
763 | static inline | 763 | static inline |
764 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 764 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
765 | { | 765 | { |
766 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 766 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
767 | WARN_ON(!rt_rq->rt_nr_running); | 767 | WARN_ON(!rt_rq->rt_nr_running); |
768 | rt_rq->rt_nr_running--; | 768 | rt_rq->rt_nr_running--; |
769 | 769 | ||
770 | dec_rt_prio(rt_rq, rt_se_prio(rt_se)); | 770 | dec_rt_prio(rt_rq, rt_se_prio(rt_se)); |
771 | dec_rt_migration(rt_se, rt_rq); | 771 | dec_rt_migration(rt_se, rt_rq); |
772 | dec_rt_group(rt_se, rt_rq); | 772 | dec_rt_group(rt_se, rt_rq); |
773 | } | 773 | } |
774 | 774 | ||
775 | static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | 775 | static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) |
776 | { | 776 | { |
777 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 777 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
778 | struct rt_prio_array *array = &rt_rq->active; | 778 | struct rt_prio_array *array = &rt_rq->active; |
779 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 779 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
780 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | 780 | struct list_head *queue = array->queue + rt_se_prio(rt_se); |
781 | 781 | ||
782 | /* | 782 | /* |
783 | * Don't enqueue the group if its throttled, or when empty. | 783 | * Don't enqueue the group if its throttled, or when empty. |
784 | * The latter is a consequence of the former when a child group | 784 | * The latter is a consequence of the former when a child group |
785 | * get throttled and the current group doesn't have any other | 785 | * get throttled and the current group doesn't have any other |
786 | * active members. | 786 | * active members. |
787 | */ | 787 | */ |
788 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 788 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
789 | return; | 789 | return; |
790 | 790 | ||
791 | list_add_tail(&rt_se->run_list, queue); | 791 | list_add_tail(&rt_se->run_list, queue); |
792 | __set_bit(rt_se_prio(rt_se), array->bitmap); | 792 | __set_bit(rt_se_prio(rt_se), array->bitmap); |
793 | 793 | ||
794 | inc_rt_tasks(rt_se, rt_rq); | 794 | inc_rt_tasks(rt_se, rt_rq); |
795 | } | 795 | } |
796 | 796 | ||
797 | static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) | 797 | static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) |
798 | { | 798 | { |
799 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 799 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
800 | struct rt_prio_array *array = &rt_rq->active; | 800 | struct rt_prio_array *array = &rt_rq->active; |
801 | 801 | ||
802 | list_del_init(&rt_se->run_list); | 802 | list_del_init(&rt_se->run_list); |
803 | if (list_empty(array->queue + rt_se_prio(rt_se))) | 803 | if (list_empty(array->queue + rt_se_prio(rt_se))) |
804 | __clear_bit(rt_se_prio(rt_se), array->bitmap); | 804 | __clear_bit(rt_se_prio(rt_se), array->bitmap); |
805 | 805 | ||
806 | dec_rt_tasks(rt_se, rt_rq); | 806 | dec_rt_tasks(rt_se, rt_rq); |
807 | } | 807 | } |
808 | 808 | ||
809 | /* | 809 | /* |
810 | * Because the prio of an upper entry depends on the lower | 810 | * Because the prio of an upper entry depends on the lower |
811 | * entries, we must remove entries top - down. | 811 | * entries, we must remove entries top - down. |
812 | */ | 812 | */ |
813 | static void dequeue_rt_stack(struct sched_rt_entity *rt_se) | 813 | static void dequeue_rt_stack(struct sched_rt_entity *rt_se) |
814 | { | 814 | { |
815 | struct sched_rt_entity *back = NULL; | 815 | struct sched_rt_entity *back = NULL; |
816 | 816 | ||
817 | for_each_sched_rt_entity(rt_se) { | 817 | for_each_sched_rt_entity(rt_se) { |
818 | rt_se->back = back; | 818 | rt_se->back = back; |
819 | back = rt_se; | 819 | back = rt_se; |
820 | } | 820 | } |
821 | 821 | ||
822 | for (rt_se = back; rt_se; rt_se = rt_se->back) { | 822 | for (rt_se = back; rt_se; rt_se = rt_se->back) { |
823 | if (on_rt_rq(rt_se)) | 823 | if (on_rt_rq(rt_se)) |
824 | __dequeue_rt_entity(rt_se); | 824 | __dequeue_rt_entity(rt_se); |
825 | } | 825 | } |
826 | } | 826 | } |
827 | 827 | ||
828 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | 828 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) |
829 | { | 829 | { |
830 | dequeue_rt_stack(rt_se); | 830 | dequeue_rt_stack(rt_se); |
831 | for_each_sched_rt_entity(rt_se) | 831 | for_each_sched_rt_entity(rt_se) |
832 | __enqueue_rt_entity(rt_se); | 832 | __enqueue_rt_entity(rt_se); |
833 | } | 833 | } |
834 | 834 | ||
835 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | 835 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) |
836 | { | 836 | { |
837 | dequeue_rt_stack(rt_se); | 837 | dequeue_rt_stack(rt_se); |
838 | 838 | ||
839 | for_each_sched_rt_entity(rt_se) { | 839 | for_each_sched_rt_entity(rt_se) { |
840 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | 840 | struct rt_rq *rt_rq = group_rt_rq(rt_se); |
841 | 841 | ||
842 | if (rt_rq && rt_rq->rt_nr_running) | 842 | if (rt_rq && rt_rq->rt_nr_running) |
843 | __enqueue_rt_entity(rt_se); | 843 | __enqueue_rt_entity(rt_se); |
844 | } | 844 | } |
845 | } | 845 | } |
846 | 846 | ||
847 | /* | 847 | /* |
848 | * Adding/removing a task to/from a priority array: | 848 | * Adding/removing a task to/from a priority array: |
849 | */ | 849 | */ |
850 | static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | 850 | static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) |
851 | { | 851 | { |
852 | struct sched_rt_entity *rt_se = &p->rt; | 852 | struct sched_rt_entity *rt_se = &p->rt; |
853 | 853 | ||
854 | if (wakeup) | 854 | if (wakeup) |
855 | rt_se->timeout = 0; | 855 | rt_se->timeout = 0; |
856 | 856 | ||
857 | enqueue_rt_entity(rt_se); | 857 | enqueue_rt_entity(rt_se); |
858 | 858 | ||
859 | if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) | 859 | if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) |
860 | enqueue_pushable_task(rq, p); | 860 | enqueue_pushable_task(rq, p); |
861 | 861 | ||
862 | inc_cpu_load(rq, p->se.load.weight); | 862 | inc_cpu_load(rq, p->se.load.weight); |
863 | } | 863 | } |
864 | 864 | ||
865 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 865 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
866 | { | 866 | { |
867 | struct sched_rt_entity *rt_se = &p->rt; | 867 | struct sched_rt_entity *rt_se = &p->rt; |
868 | 868 | ||
869 | update_curr_rt(rq); | 869 | update_curr_rt(rq); |
870 | dequeue_rt_entity(rt_se); | 870 | dequeue_rt_entity(rt_se); |
871 | 871 | ||
872 | dequeue_pushable_task(rq, p); | 872 | dequeue_pushable_task(rq, p); |
873 | 873 | ||
874 | dec_cpu_load(rq, p->se.load.weight); | 874 | dec_cpu_load(rq, p->se.load.weight); |
875 | } | 875 | } |
876 | 876 | ||
877 | /* | 877 | /* |
878 | * Put task to the end of the run list without the overhead of dequeue | 878 | * Put task to the end of the run list without the overhead of dequeue |
879 | * followed by enqueue. | 879 | * followed by enqueue. |
880 | */ | 880 | */ |
881 | static void | 881 | static void |
882 | requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) | 882 | requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) |
883 | { | 883 | { |
884 | if (on_rt_rq(rt_se)) { | 884 | if (on_rt_rq(rt_se)) { |
885 | struct rt_prio_array *array = &rt_rq->active; | 885 | struct rt_prio_array *array = &rt_rq->active; |
886 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | 886 | struct list_head *queue = array->queue + rt_se_prio(rt_se); |
887 | 887 | ||
888 | if (head) | 888 | if (head) |
889 | list_move(&rt_se->run_list, queue); | 889 | list_move(&rt_se->run_list, queue); |
890 | else | 890 | else |
891 | list_move_tail(&rt_se->run_list, queue); | 891 | list_move_tail(&rt_se->run_list, queue); |
892 | } | 892 | } |
893 | } | 893 | } |
894 | 894 | ||
895 | static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) | 895 | static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) |
896 | { | 896 | { |
897 | struct sched_rt_entity *rt_se = &p->rt; | 897 | struct sched_rt_entity *rt_se = &p->rt; |
898 | struct rt_rq *rt_rq; | 898 | struct rt_rq *rt_rq; |
899 | 899 | ||
900 | for_each_sched_rt_entity(rt_se) { | 900 | for_each_sched_rt_entity(rt_se) { |
901 | rt_rq = rt_rq_of_se(rt_se); | 901 | rt_rq = rt_rq_of_se(rt_se); |
902 | requeue_rt_entity(rt_rq, rt_se, head); | 902 | requeue_rt_entity(rt_rq, rt_se, head); |
903 | } | 903 | } |
904 | } | 904 | } |
905 | 905 | ||
906 | static void yield_task_rt(struct rq *rq) | 906 | static void yield_task_rt(struct rq *rq) |
907 | { | 907 | { |
908 | requeue_task_rt(rq, rq->curr, 0); | 908 | requeue_task_rt(rq, rq->curr, 0); |
909 | } | 909 | } |
910 | 910 | ||
911 | #ifdef CONFIG_SMP | 911 | #ifdef CONFIG_SMP |
912 | static int find_lowest_rq(struct task_struct *task); | 912 | static int find_lowest_rq(struct task_struct *task); |
913 | 913 | ||
914 | static int select_task_rq_rt(struct task_struct *p, int sync) | 914 | static int select_task_rq_rt(struct task_struct *p, int sync) |
915 | { | 915 | { |
916 | struct rq *rq = task_rq(p); | 916 | struct rq *rq = task_rq(p); |
917 | 917 | ||
918 | /* | 918 | /* |
919 | * If the current task is an RT task, then | 919 | * If the current task is an RT task, then |
920 | * try to see if we can wake this RT task up on another | 920 | * try to see if we can wake this RT task up on another |
921 | * runqueue. Otherwise simply start this RT task | 921 | * runqueue. Otherwise simply start this RT task |
922 | * on its current runqueue. | 922 | * on its current runqueue. |
923 | * | 923 | * |
924 | * We want to avoid overloading runqueues. Even if | 924 | * We want to avoid overloading runqueues. Even if |
925 | * the RT task is of higher priority than the current RT task. | 925 | * the RT task is of higher priority than the current RT task. |
926 | * RT tasks behave differently than other tasks. If | 926 | * RT tasks behave differently than other tasks. If |
927 | * one gets preempted, we try to push it off to another queue. | 927 | * one gets preempted, we try to push it off to another queue. |
928 | * So trying to keep a preempting RT task on the same | 928 | * So trying to keep a preempting RT task on the same |
929 | * cache hot CPU will force the running RT task to | 929 | * cache hot CPU will force the running RT task to |
930 | * a cold CPU. So we waste all the cache for the lower | 930 | * a cold CPU. So we waste all the cache for the lower |
931 | * RT task in hopes of saving some of a RT task | 931 | * RT task in hopes of saving some of a RT task |
932 | * that is just being woken and probably will have | 932 | * that is just being woken and probably will have |
933 | * cold cache anyway. | 933 | * cold cache anyway. |
934 | */ | 934 | */ |
935 | if (unlikely(rt_task(rq->curr)) && | 935 | if (unlikely(rt_task(rq->curr)) && |
936 | (p->rt.nr_cpus_allowed > 1)) { | 936 | (p->rt.nr_cpus_allowed > 1)) { |
937 | int cpu = find_lowest_rq(p); | 937 | int cpu = find_lowest_rq(p); |
938 | 938 | ||
939 | return (cpu == -1) ? task_cpu(p) : cpu; | 939 | return (cpu == -1) ? task_cpu(p) : cpu; |
940 | } | 940 | } |
941 | 941 | ||
942 | /* | 942 | /* |
943 | * Otherwise, just let it ride on the affined RQ and the | 943 | * Otherwise, just let it ride on the affined RQ and the |
944 | * post-schedule router will push the preempted task away | 944 | * post-schedule router will push the preempted task away |
945 | */ | 945 | */ |
946 | return task_cpu(p); | 946 | return task_cpu(p); |
947 | } | 947 | } |
948 | 948 | ||
949 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | 949 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) |
950 | { | 950 | { |
951 | if (rq->curr->rt.nr_cpus_allowed == 1) | 951 | if (rq->curr->rt.nr_cpus_allowed == 1) |
952 | return; | 952 | return; |
953 | 953 | ||
954 | if (p->rt.nr_cpus_allowed != 1 | 954 | if (p->rt.nr_cpus_allowed != 1 |
955 | && cpupri_find(&rq->rd->cpupri, p, NULL)) | 955 | && cpupri_find(&rq->rd->cpupri, p, NULL)) |
956 | return; | 956 | return; |
957 | 957 | ||
958 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) | 958 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) |
959 | return; | 959 | return; |
960 | 960 | ||
961 | /* | 961 | /* |
962 | * There appears to be other cpus that can accept | 962 | * There appears to be other cpus that can accept |
963 | * current and none to run 'p', so lets reschedule | 963 | * current and none to run 'p', so lets reschedule |
964 | * to try and push current away: | 964 | * to try and push current away: |
965 | */ | 965 | */ |
966 | requeue_task_rt(rq, p, 1); | 966 | requeue_task_rt(rq, p, 1); |
967 | resched_task(rq->curr); | 967 | resched_task(rq->curr); |
968 | } | 968 | } |
969 | 969 | ||
970 | #endif /* CONFIG_SMP */ | 970 | #endif /* CONFIG_SMP */ |
971 | 971 | ||
972 | /* | 972 | /* |
973 | * Preempt the current task with a newly woken task if needed: | 973 | * Preempt the current task with a newly woken task if needed: |
974 | */ | 974 | */ |
975 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync) | 975 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync) |
976 | { | 976 | { |
977 | if (p->prio < rq->curr->prio) { | 977 | if (p->prio < rq->curr->prio) { |
978 | resched_task(rq->curr); | 978 | resched_task(rq->curr); |
979 | return; | 979 | return; |
980 | } | 980 | } |
981 | 981 | ||
982 | #ifdef CONFIG_SMP | 982 | #ifdef CONFIG_SMP |
983 | /* | 983 | /* |
984 | * If: | 984 | * If: |
985 | * | 985 | * |
986 | * - the newly woken task is of equal priority to the current task | 986 | * - the newly woken task is of equal priority to the current task |
987 | * - the newly woken task is non-migratable while current is migratable | 987 | * - the newly woken task is non-migratable while current is migratable |
988 | * - current will be preempted on the next reschedule | 988 | * - current will be preempted on the next reschedule |
989 | * | 989 | * |
990 | * we should check to see if current can readily move to a different | 990 | * we should check to see if current can readily move to a different |
991 | * cpu. If so, we will reschedule to allow the push logic to try | 991 | * cpu. If so, we will reschedule to allow the push logic to try |
992 | * to move current somewhere else, making room for our non-migratable | 992 | * to move current somewhere else, making room for our non-migratable |
993 | * task. | 993 | * task. |
994 | */ | 994 | */ |
995 | if (p->prio == rq->curr->prio && !need_resched()) | 995 | if (p->prio == rq->curr->prio && !need_resched()) |
996 | check_preempt_equal_prio(rq, p); | 996 | check_preempt_equal_prio(rq, p); |
997 | #endif | 997 | #endif |
998 | } | 998 | } |
999 | 999 | ||
1000 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, | 1000 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, |
1001 | struct rt_rq *rt_rq) | 1001 | struct rt_rq *rt_rq) |
1002 | { | 1002 | { |
1003 | struct rt_prio_array *array = &rt_rq->active; | 1003 | struct rt_prio_array *array = &rt_rq->active; |
1004 | struct sched_rt_entity *next = NULL; | 1004 | struct sched_rt_entity *next = NULL; |
1005 | struct list_head *queue; | 1005 | struct list_head *queue; |
1006 | int idx; | 1006 | int idx; |
1007 | 1007 | ||
1008 | idx = sched_find_first_bit(array->bitmap); | 1008 | idx = sched_find_first_bit(array->bitmap); |
1009 | BUG_ON(idx >= MAX_RT_PRIO); | 1009 | BUG_ON(idx >= MAX_RT_PRIO); |
1010 | 1010 | ||
1011 | queue = array->queue + idx; | 1011 | queue = array->queue + idx; |
1012 | next = list_entry(queue->next, struct sched_rt_entity, run_list); | 1012 | next = list_entry(queue->next, struct sched_rt_entity, run_list); |
1013 | 1013 | ||
1014 | return next; | 1014 | return next; |
1015 | } | 1015 | } |
1016 | 1016 | ||
1017 | static struct task_struct *_pick_next_task_rt(struct rq *rq) | 1017 | static struct task_struct *_pick_next_task_rt(struct rq *rq) |
1018 | { | 1018 | { |
1019 | struct sched_rt_entity *rt_se; | 1019 | struct sched_rt_entity *rt_se; |
1020 | struct task_struct *p; | 1020 | struct task_struct *p; |
1021 | struct rt_rq *rt_rq; | 1021 | struct rt_rq *rt_rq; |
1022 | 1022 | ||
1023 | rt_rq = &rq->rt; | 1023 | rt_rq = &rq->rt; |
1024 | 1024 | ||
1025 | if (unlikely(!rt_rq->rt_nr_running)) | 1025 | if (unlikely(!rt_rq->rt_nr_running)) |
1026 | return NULL; | 1026 | return NULL; |
1027 | 1027 | ||
1028 | if (rt_rq_throttled(rt_rq)) | 1028 | if (rt_rq_throttled(rt_rq)) |
1029 | return NULL; | 1029 | return NULL; |
1030 | 1030 | ||
1031 | do { | 1031 | do { |
1032 | rt_se = pick_next_rt_entity(rq, rt_rq); | 1032 | rt_se = pick_next_rt_entity(rq, rt_rq); |
1033 | BUG_ON(!rt_se); | 1033 | BUG_ON(!rt_se); |
1034 | rt_rq = group_rt_rq(rt_se); | 1034 | rt_rq = group_rt_rq(rt_se); |
1035 | } while (rt_rq); | 1035 | } while (rt_rq); |
1036 | 1036 | ||
1037 | p = rt_task_of(rt_se); | 1037 | p = rt_task_of(rt_se); |
1038 | p->se.exec_start = rq->clock; | 1038 | p->se.exec_start = rq->clock; |
1039 | 1039 | ||
1040 | return p; | 1040 | return p; |
1041 | } | 1041 | } |
1042 | 1042 | ||
1043 | static struct task_struct *pick_next_task_rt(struct rq *rq) | 1043 | static struct task_struct *pick_next_task_rt(struct rq *rq) |
1044 | { | 1044 | { |
1045 | struct task_struct *p = _pick_next_task_rt(rq); | 1045 | struct task_struct *p = _pick_next_task_rt(rq); |
1046 | 1046 | ||
1047 | /* The running task is never eligible for pushing */ | 1047 | /* The running task is never eligible for pushing */ |
1048 | if (p) | 1048 | if (p) |
1049 | dequeue_pushable_task(rq, p); | 1049 | dequeue_pushable_task(rq, p); |
1050 | 1050 | ||
1051 | return p; | 1051 | return p; |
1052 | } | 1052 | } |
1053 | 1053 | ||
1054 | static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | 1054 | static void put_prev_task_rt(struct rq *rq, struct task_struct *p) |
1055 | { | 1055 | { |
1056 | update_curr_rt(rq); | 1056 | update_curr_rt(rq); |
1057 | p->se.exec_start = 0; | 1057 | p->se.exec_start = 0; |
1058 | 1058 | ||
1059 | /* | 1059 | /* |
1060 | * The previous task needs to be made eligible for pushing | 1060 | * The previous task needs to be made eligible for pushing |
1061 | * if it is still active | 1061 | * if it is still active |
1062 | */ | 1062 | */ |
1063 | if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) | 1063 | if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) |
1064 | enqueue_pushable_task(rq, p); | 1064 | enqueue_pushable_task(rq, p); |
1065 | } | 1065 | } |
1066 | 1066 | ||
1067 | #ifdef CONFIG_SMP | 1067 | #ifdef CONFIG_SMP |
1068 | 1068 | ||
1069 | /* Only try algorithms three times */ | 1069 | /* Only try algorithms three times */ |
1070 | #define RT_MAX_TRIES 3 | 1070 | #define RT_MAX_TRIES 3 |
1071 | 1071 | ||
1072 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); | 1072 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); |
1073 | 1073 | ||
1074 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | 1074 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) |
1075 | { | 1075 | { |
1076 | if (!task_running(rq, p) && | 1076 | if (!task_running(rq, p) && |
1077 | (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && | 1077 | (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && |
1078 | (p->rt.nr_cpus_allowed > 1)) | 1078 | (p->rt.nr_cpus_allowed > 1)) |
1079 | return 1; | 1079 | return 1; |
1080 | return 0; | 1080 | return 0; |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | /* Return the second highest RT task, NULL otherwise */ | 1083 | /* Return the second highest RT task, NULL otherwise */ |
1084 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | 1084 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) |
1085 | { | 1085 | { |
1086 | struct task_struct *next = NULL; | 1086 | struct task_struct *next = NULL; |
1087 | struct sched_rt_entity *rt_se; | 1087 | struct sched_rt_entity *rt_se; |
1088 | struct rt_prio_array *array; | 1088 | struct rt_prio_array *array; |
1089 | struct rt_rq *rt_rq; | 1089 | struct rt_rq *rt_rq; |
1090 | int idx; | 1090 | int idx; |
1091 | 1091 | ||
1092 | for_each_leaf_rt_rq(rt_rq, rq) { | 1092 | for_each_leaf_rt_rq(rt_rq, rq) { |
1093 | array = &rt_rq->active; | 1093 | array = &rt_rq->active; |
1094 | idx = sched_find_first_bit(array->bitmap); | 1094 | idx = sched_find_first_bit(array->bitmap); |
1095 | next_idx: | 1095 | next_idx: |
1096 | if (idx >= MAX_RT_PRIO) | 1096 | if (idx >= MAX_RT_PRIO) |
1097 | continue; | 1097 | continue; |
1098 | if (next && next->prio < idx) | 1098 | if (next && next->prio < idx) |
1099 | continue; | 1099 | continue; |
1100 | list_for_each_entry(rt_se, array->queue + idx, run_list) { | 1100 | list_for_each_entry(rt_se, array->queue + idx, run_list) { |
1101 | struct task_struct *p = rt_task_of(rt_se); | 1101 | struct task_struct *p = rt_task_of(rt_se); |
1102 | if (pick_rt_task(rq, p, cpu)) { | 1102 | if (pick_rt_task(rq, p, cpu)) { |
1103 | next = p; | 1103 | next = p; |
1104 | break; | 1104 | break; |
1105 | } | 1105 | } |
1106 | } | 1106 | } |
1107 | if (!next) { | 1107 | if (!next) { |
1108 | idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); | 1108 | idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); |
1109 | goto next_idx; | 1109 | goto next_idx; |
1110 | } | 1110 | } |
1111 | } | 1111 | } |
1112 | 1112 | ||
1113 | return next; | 1113 | return next; |
1114 | } | 1114 | } |
1115 | 1115 | ||
1116 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); | 1116 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); |
1117 | 1117 | ||
1118 | static inline int pick_optimal_cpu(int this_cpu, | 1118 | static inline int pick_optimal_cpu(int this_cpu, |
1119 | const struct cpumask *mask) | 1119 | const struct cpumask *mask) |
1120 | { | 1120 | { |
1121 | int first; | 1121 | int first; |
1122 | 1122 | ||
1123 | /* "this_cpu" is cheaper to preempt than a remote processor */ | 1123 | /* "this_cpu" is cheaper to preempt than a remote processor */ |
1124 | if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask)) | 1124 | if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask)) |
1125 | return this_cpu; | 1125 | return this_cpu; |
1126 | 1126 | ||
1127 | first = cpumask_first(mask); | 1127 | first = cpumask_first(mask); |
1128 | if (first < nr_cpu_ids) | 1128 | if (first < nr_cpu_ids) |
1129 | return first; | 1129 | return first; |
1130 | 1130 | ||
1131 | return -1; | 1131 | return -1; |
1132 | } | 1132 | } |
1133 | 1133 | ||
1134 | static int find_lowest_rq(struct task_struct *task) | 1134 | static int find_lowest_rq(struct task_struct *task) |
1135 | { | 1135 | { |
1136 | struct sched_domain *sd; | 1136 | struct sched_domain *sd; |
1137 | struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); | 1137 | struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); |
1138 | int this_cpu = smp_processor_id(); | 1138 | int this_cpu = smp_processor_id(); |
1139 | int cpu = task_cpu(task); | 1139 | int cpu = task_cpu(task); |
1140 | cpumask_var_t domain_mask; | 1140 | cpumask_var_t domain_mask; |
1141 | 1141 | ||
1142 | if (task->rt.nr_cpus_allowed == 1) | 1142 | if (task->rt.nr_cpus_allowed == 1) |
1143 | return -1; /* No other targets possible */ | 1143 | return -1; /* No other targets possible */ |
1144 | 1144 | ||
1145 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) | 1145 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) |
1146 | return -1; /* No targets found */ | 1146 | return -1; /* No targets found */ |
1147 | 1147 | ||
1148 | /* | 1148 | /* |
1149 | * Only consider CPUs that are usable for migration. | 1149 | * Only consider CPUs that are usable for migration. |
1150 | * I guess we might want to change cpupri_find() to ignore those | 1150 | * I guess we might want to change cpupri_find() to ignore those |
1151 | * in the first place. | 1151 | * in the first place. |
1152 | */ | 1152 | */ |
1153 | cpumask_and(lowest_mask, lowest_mask, cpu_active_mask); | 1153 | cpumask_and(lowest_mask, lowest_mask, cpu_active_mask); |
1154 | 1154 | ||
1155 | /* | 1155 | /* |
1156 | * At this point we have built a mask of cpus representing the | 1156 | * At this point we have built a mask of cpus representing the |
1157 | * lowest priority tasks in the system. Now we want to elect | 1157 | * lowest priority tasks in the system. Now we want to elect |
1158 | * the best one based on our affinity and topology. | 1158 | * the best one based on our affinity and topology. |
1159 | * | 1159 | * |
1160 | * We prioritize the last cpu that the task executed on since | 1160 | * We prioritize the last cpu that the task executed on since |
1161 | * it is most likely cache-hot in that location. | 1161 | * it is most likely cache-hot in that location. |
1162 | */ | 1162 | */ |
1163 | if (cpumask_test_cpu(cpu, lowest_mask)) | 1163 | if (cpumask_test_cpu(cpu, lowest_mask)) |
1164 | return cpu; | 1164 | return cpu; |
1165 | 1165 | ||
1166 | /* | 1166 | /* |
1167 | * Otherwise, we consult the sched_domains span maps to figure | 1167 | * Otherwise, we consult the sched_domains span maps to figure |
1168 | * out which cpu is logically closest to our hot cache data. | 1168 | * out which cpu is logically closest to our hot cache data. |
1169 | */ | 1169 | */ |
1170 | if (this_cpu == cpu) | 1170 | if (this_cpu == cpu) |
1171 | this_cpu = -1; /* Skip this_cpu opt if the same */ | 1171 | this_cpu = -1; /* Skip this_cpu opt if the same */ |
1172 | 1172 | ||
1173 | if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) { | 1173 | if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) { |
1174 | for_each_domain(cpu, sd) { | 1174 | for_each_domain(cpu, sd) { |
1175 | if (sd->flags & SD_WAKE_AFFINE) { | 1175 | if (sd->flags & SD_WAKE_AFFINE) { |
1176 | int best_cpu; | 1176 | int best_cpu; |
1177 | 1177 | ||
1178 | cpumask_and(domain_mask, | 1178 | cpumask_and(domain_mask, |
1179 | sched_domain_span(sd), | 1179 | sched_domain_span(sd), |
1180 | lowest_mask); | 1180 | lowest_mask); |
1181 | 1181 | ||
1182 | best_cpu = pick_optimal_cpu(this_cpu, | 1182 | best_cpu = pick_optimal_cpu(this_cpu, |
1183 | domain_mask); | 1183 | domain_mask); |
1184 | 1184 | ||
1185 | if (best_cpu != -1) { | 1185 | if (best_cpu != -1) { |
1186 | free_cpumask_var(domain_mask); | 1186 | free_cpumask_var(domain_mask); |
1187 | return best_cpu; | 1187 | return best_cpu; |
1188 | } | 1188 | } |
1189 | } | 1189 | } |
1190 | } | 1190 | } |
1191 | free_cpumask_var(domain_mask); | 1191 | free_cpumask_var(domain_mask); |
1192 | } | 1192 | } |
1193 | 1193 | ||
1194 | /* | 1194 | /* |
1195 | * And finally, if there were no matches within the domains | 1195 | * And finally, if there were no matches within the domains |
1196 | * just give the caller *something* to work with from the compatible | 1196 | * just give the caller *something* to work with from the compatible |
1197 | * locations. | 1197 | * locations. |
1198 | */ | 1198 | */ |
1199 | return pick_optimal_cpu(this_cpu, lowest_mask); | 1199 | return pick_optimal_cpu(this_cpu, lowest_mask); |
1200 | } | 1200 | } |
1201 | 1201 | ||
1202 | /* Will lock the rq it finds */ | 1202 | /* Will lock the rq it finds */ |
1203 | static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | 1203 | static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) |
1204 | { | 1204 | { |
1205 | struct rq *lowest_rq = NULL; | 1205 | struct rq *lowest_rq = NULL; |
1206 | int tries; | 1206 | int tries; |
1207 | int cpu; | 1207 | int cpu; |
1208 | 1208 | ||
1209 | for (tries = 0; tries < RT_MAX_TRIES; tries++) { | 1209 | for (tries = 0; tries < RT_MAX_TRIES; tries++) { |
1210 | cpu = find_lowest_rq(task); | 1210 | cpu = find_lowest_rq(task); |
1211 | 1211 | ||
1212 | if ((cpu == -1) || (cpu == rq->cpu)) | 1212 | if ((cpu == -1) || (cpu == rq->cpu)) |
1213 | break; | 1213 | break; |
1214 | 1214 | ||
1215 | lowest_rq = cpu_rq(cpu); | 1215 | lowest_rq = cpu_rq(cpu); |
1216 | 1216 | ||
1217 | /* if the prio of this runqueue changed, try again */ | 1217 | /* if the prio of this runqueue changed, try again */ |
1218 | if (double_lock_balance(rq, lowest_rq)) { | 1218 | if (double_lock_balance(rq, lowest_rq)) { |
1219 | /* | 1219 | /* |
1220 | * We had to unlock the run queue. In | 1220 | * We had to unlock the run queue. In |
1221 | * the mean time, task could have | 1221 | * the mean time, task could have |
1222 | * migrated already or had its affinity changed. | 1222 | * migrated already or had its affinity changed. |
1223 | * Also make sure that it wasn't scheduled on its rq. | 1223 | * Also make sure that it wasn't scheduled on its rq. |
1224 | */ | 1224 | */ |
1225 | if (unlikely(task_rq(task) != rq || | 1225 | if (unlikely(task_rq(task) != rq || |
1226 | !cpumask_test_cpu(lowest_rq->cpu, | 1226 | !cpumask_test_cpu(lowest_rq->cpu, |
1227 | &task->cpus_allowed) || | 1227 | &task->cpus_allowed) || |
1228 | task_running(rq, task) || | 1228 | task_running(rq, task) || |
1229 | !task->se.on_rq)) { | 1229 | !task->se.on_rq)) { |
1230 | 1230 | ||
1231 | spin_unlock(&lowest_rq->lock); | 1231 | spin_unlock(&lowest_rq->lock); |
1232 | lowest_rq = NULL; | 1232 | lowest_rq = NULL; |
1233 | break; | 1233 | break; |
1234 | } | 1234 | } |
1235 | } | 1235 | } |
1236 | 1236 | ||
1237 | /* If this rq is still suitable use it. */ | 1237 | /* If this rq is still suitable use it. */ |
1238 | if (lowest_rq->rt.highest_prio.curr > task->prio) | 1238 | if (lowest_rq->rt.highest_prio.curr > task->prio) |
1239 | break; | 1239 | break; |
1240 | 1240 | ||
1241 | /* try again */ | 1241 | /* try again */ |
1242 | double_unlock_balance(rq, lowest_rq); | 1242 | double_unlock_balance(rq, lowest_rq); |
1243 | lowest_rq = NULL; | 1243 | lowest_rq = NULL; |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | return lowest_rq; | 1246 | return lowest_rq; |
1247 | } | 1247 | } |
1248 | 1248 | ||
1249 | static inline int has_pushable_tasks(struct rq *rq) | 1249 | static inline int has_pushable_tasks(struct rq *rq) |
1250 | { | 1250 | { |
1251 | return !plist_head_empty(&rq->rt.pushable_tasks); | 1251 | return !plist_head_empty(&rq->rt.pushable_tasks); |
1252 | } | 1252 | } |
1253 | 1253 | ||
1254 | static struct task_struct *pick_next_pushable_task(struct rq *rq) | 1254 | static struct task_struct *pick_next_pushable_task(struct rq *rq) |
1255 | { | 1255 | { |
1256 | struct task_struct *p; | 1256 | struct task_struct *p; |
1257 | 1257 | ||
1258 | if (!has_pushable_tasks(rq)) | 1258 | if (!has_pushable_tasks(rq)) |
1259 | return NULL; | 1259 | return NULL; |
1260 | 1260 | ||
1261 | p = plist_first_entry(&rq->rt.pushable_tasks, | 1261 | p = plist_first_entry(&rq->rt.pushable_tasks, |
1262 | struct task_struct, pushable_tasks); | 1262 | struct task_struct, pushable_tasks); |
1263 | 1263 | ||
1264 | BUG_ON(rq->cpu != task_cpu(p)); | 1264 | BUG_ON(rq->cpu != task_cpu(p)); |
1265 | BUG_ON(task_current(rq, p)); | 1265 | BUG_ON(task_current(rq, p)); |
1266 | BUG_ON(p->rt.nr_cpus_allowed <= 1); | 1266 | BUG_ON(p->rt.nr_cpus_allowed <= 1); |
1267 | 1267 | ||
1268 | BUG_ON(!p->se.on_rq); | 1268 | BUG_ON(!p->se.on_rq); |
1269 | BUG_ON(!rt_task(p)); | 1269 | BUG_ON(!rt_task(p)); |
1270 | 1270 | ||
1271 | return p; | 1271 | return p; |
1272 | } | 1272 | } |
1273 | 1273 | ||
1274 | /* | 1274 | /* |
1275 | * If the current CPU has more than one RT task, see if the non | 1275 | * If the current CPU has more than one RT task, see if the non |
1276 | * running task can migrate over to a CPU that is running a task | 1276 | * running task can migrate over to a CPU that is running a task |
1277 | * of lesser priority. | 1277 | * of lesser priority. |
1278 | */ | 1278 | */ |
1279 | static int push_rt_task(struct rq *rq) | 1279 | static int push_rt_task(struct rq *rq) |
1280 | { | 1280 | { |
1281 | struct task_struct *next_task; | 1281 | struct task_struct *next_task; |
1282 | struct rq *lowest_rq; | 1282 | struct rq *lowest_rq; |
1283 | 1283 | ||
1284 | if (!rq->rt.overloaded) | 1284 | if (!rq->rt.overloaded) |
1285 | return 0; | 1285 | return 0; |
1286 | 1286 | ||
1287 | next_task = pick_next_pushable_task(rq); | 1287 | next_task = pick_next_pushable_task(rq); |
1288 | if (!next_task) | 1288 | if (!next_task) |
1289 | return 0; | 1289 | return 0; |
1290 | 1290 | ||
1291 | retry: | 1291 | retry: |
1292 | if (unlikely(next_task == rq->curr)) { | 1292 | if (unlikely(next_task == rq->curr)) { |
1293 | WARN_ON(1); | 1293 | WARN_ON(1); |
1294 | return 0; | 1294 | return 0; |
1295 | } | 1295 | } |
1296 | 1296 | ||
1297 | /* | 1297 | /* |
1298 | * It's possible that the next_task slipped in of | 1298 | * It's possible that the next_task slipped in of |
1299 | * higher priority than current. If that's the case | 1299 | * higher priority than current. If that's the case |
1300 | * just reschedule current. | 1300 | * just reschedule current. |
1301 | */ | 1301 | */ |
1302 | if (unlikely(next_task->prio < rq->curr->prio)) { | 1302 | if (unlikely(next_task->prio < rq->curr->prio)) { |
1303 | resched_task(rq->curr); | 1303 | resched_task(rq->curr); |
1304 | return 0; | 1304 | return 0; |
1305 | } | 1305 | } |
1306 | 1306 | ||
1307 | /* We might release rq lock */ | 1307 | /* We might release rq lock */ |
1308 | get_task_struct(next_task); | 1308 | get_task_struct(next_task); |
1309 | 1309 | ||
1310 | /* find_lock_lowest_rq locks the rq if found */ | 1310 | /* find_lock_lowest_rq locks the rq if found */ |
1311 | lowest_rq = find_lock_lowest_rq(next_task, rq); | 1311 | lowest_rq = find_lock_lowest_rq(next_task, rq); |
1312 | if (!lowest_rq) { | 1312 | if (!lowest_rq) { |
1313 | struct task_struct *task; | 1313 | struct task_struct *task; |
1314 | /* | 1314 | /* |
1315 | * find lock_lowest_rq releases rq->lock | 1315 | * find lock_lowest_rq releases rq->lock |
1316 | * so it is possible that next_task has migrated. | 1316 | * so it is possible that next_task has migrated. |
1317 | * | 1317 | * |
1318 | * We need to make sure that the task is still on the same | 1318 | * We need to make sure that the task is still on the same |
1319 | * run-queue and is also still the next task eligible for | 1319 | * run-queue and is also still the next task eligible for |
1320 | * pushing. | 1320 | * pushing. |
1321 | */ | 1321 | */ |
1322 | task = pick_next_pushable_task(rq); | 1322 | task = pick_next_pushable_task(rq); |
1323 | if (task_cpu(next_task) == rq->cpu && task == next_task) { | 1323 | if (task_cpu(next_task) == rq->cpu && task == next_task) { |
1324 | /* | 1324 | /* |
1325 | * If we get here, the task hasnt moved at all, but | 1325 | * If we get here, the task hasnt moved at all, but |
1326 | * it has failed to push. We will not try again, | 1326 | * it has failed to push. We will not try again, |
1327 | * since the other cpus will pull from us when they | 1327 | * since the other cpus will pull from us when they |
1328 | * are ready. | 1328 | * are ready. |
1329 | */ | 1329 | */ |
1330 | dequeue_pushable_task(rq, next_task); | 1330 | dequeue_pushable_task(rq, next_task); |
1331 | goto out; | 1331 | goto out; |
1332 | } | 1332 | } |
1333 | 1333 | ||
1334 | if (!task) | 1334 | if (!task) |
1335 | /* No more tasks, just exit */ | 1335 | /* No more tasks, just exit */ |
1336 | goto out; | 1336 | goto out; |
1337 | 1337 | ||
1338 | /* | 1338 | /* |
1339 | * Something has shifted, try again. | 1339 | * Something has shifted, try again. |
1340 | */ | 1340 | */ |
1341 | put_task_struct(next_task); | 1341 | put_task_struct(next_task); |
1342 | next_task = task; | 1342 | next_task = task; |
1343 | goto retry; | 1343 | goto retry; |
1344 | } | 1344 | } |
1345 | 1345 | ||
1346 | deactivate_task(rq, next_task, 0); | 1346 | deactivate_task(rq, next_task, 0); |
1347 | set_task_cpu(next_task, lowest_rq->cpu); | 1347 | set_task_cpu(next_task, lowest_rq->cpu); |
1348 | activate_task(lowest_rq, next_task, 0); | 1348 | activate_task(lowest_rq, next_task, 0); |
1349 | 1349 | ||
1350 | resched_task(lowest_rq->curr); | 1350 | resched_task(lowest_rq->curr); |
1351 | 1351 | ||
1352 | double_unlock_balance(rq, lowest_rq); | 1352 | double_unlock_balance(rq, lowest_rq); |
1353 | 1353 | ||
1354 | out: | 1354 | out: |
1355 | put_task_struct(next_task); | 1355 | put_task_struct(next_task); |
1356 | 1356 | ||
1357 | return 1; | 1357 | return 1; |
1358 | } | 1358 | } |
1359 | 1359 | ||
1360 | static void push_rt_tasks(struct rq *rq) | 1360 | static void push_rt_tasks(struct rq *rq) |
1361 | { | 1361 | { |
1362 | /* push_rt_task will return true if it moved an RT */ | 1362 | /* push_rt_task will return true if it moved an RT */ |
1363 | while (push_rt_task(rq)) | 1363 | while (push_rt_task(rq)) |
1364 | ; | 1364 | ; |
1365 | } | 1365 | } |
1366 | 1366 | ||
1367 | static int pull_rt_task(struct rq *this_rq) | 1367 | static int pull_rt_task(struct rq *this_rq) |
1368 | { | 1368 | { |
1369 | int this_cpu = this_rq->cpu, ret = 0, cpu; | 1369 | int this_cpu = this_rq->cpu, ret = 0, cpu; |
1370 | struct task_struct *p; | 1370 | struct task_struct *p; |
1371 | struct rq *src_rq; | 1371 | struct rq *src_rq; |
1372 | 1372 | ||
1373 | if (likely(!rt_overloaded(this_rq))) | 1373 | if (likely(!rt_overloaded(this_rq))) |
1374 | return 0; | 1374 | return 0; |
1375 | 1375 | ||
1376 | for_each_cpu(cpu, this_rq->rd->rto_mask) { | 1376 | for_each_cpu(cpu, this_rq->rd->rto_mask) { |
1377 | if (this_cpu == cpu) | 1377 | if (this_cpu == cpu) |
1378 | continue; | 1378 | continue; |
1379 | 1379 | ||
1380 | src_rq = cpu_rq(cpu); | 1380 | src_rq = cpu_rq(cpu); |
1381 | 1381 | ||
1382 | /* | 1382 | /* |
1383 | * Don't bother taking the src_rq->lock if the next highest | 1383 | * Don't bother taking the src_rq->lock if the next highest |
1384 | * task is known to be lower-priority than our current task. | 1384 | * task is known to be lower-priority than our current task. |
1385 | * This may look racy, but if this value is about to go | 1385 | * This may look racy, but if this value is about to go |
1386 | * logically higher, the src_rq will push this task away. | 1386 | * logically higher, the src_rq will push this task away. |
1387 | * And if its going logically lower, we do not care | 1387 | * And if its going logically lower, we do not care |
1388 | */ | 1388 | */ |
1389 | if (src_rq->rt.highest_prio.next >= | 1389 | if (src_rq->rt.highest_prio.next >= |
1390 | this_rq->rt.highest_prio.curr) | 1390 | this_rq->rt.highest_prio.curr) |
1391 | continue; | 1391 | continue; |
1392 | 1392 | ||
1393 | /* | 1393 | /* |
1394 | * We can potentially drop this_rq's lock in | 1394 | * We can potentially drop this_rq's lock in |
1395 | * double_lock_balance, and another CPU could | 1395 | * double_lock_balance, and another CPU could |
1396 | * alter this_rq | 1396 | * alter this_rq |
1397 | */ | 1397 | */ |
1398 | double_lock_balance(this_rq, src_rq); | 1398 | double_lock_balance(this_rq, src_rq); |
1399 | 1399 | ||
1400 | /* | 1400 | /* |
1401 | * Are there still pullable RT tasks? | 1401 | * Are there still pullable RT tasks? |
1402 | */ | 1402 | */ |
1403 | if (src_rq->rt.rt_nr_running <= 1) | 1403 | if (src_rq->rt.rt_nr_running <= 1) |
1404 | goto skip; | 1404 | goto skip; |
1405 | 1405 | ||
1406 | p = pick_next_highest_task_rt(src_rq, this_cpu); | 1406 | p = pick_next_highest_task_rt(src_rq, this_cpu); |
1407 | 1407 | ||
1408 | /* | 1408 | /* |
1409 | * Do we have an RT task that preempts | 1409 | * Do we have an RT task that preempts |
1410 | * the to-be-scheduled task? | 1410 | * the to-be-scheduled task? |
1411 | */ | 1411 | */ |
1412 | if (p && (p->prio < this_rq->rt.highest_prio.curr)) { | 1412 | if (p && (p->prio < this_rq->rt.highest_prio.curr)) { |
1413 | WARN_ON(p == src_rq->curr); | 1413 | WARN_ON(p == src_rq->curr); |
1414 | WARN_ON(!p->se.on_rq); | 1414 | WARN_ON(!p->se.on_rq); |
1415 | 1415 | ||
1416 | /* | 1416 | /* |
1417 | * There's a chance that p is higher in priority | 1417 | * There's a chance that p is higher in priority |
1418 | * than what's currently running on its cpu. | 1418 | * than what's currently running on its cpu. |
1419 | * This is just that p is wakeing up and hasn't | 1419 | * This is just that p is wakeing up and hasn't |
1420 | * had a chance to schedule. We only pull | 1420 | * had a chance to schedule. We only pull |
1421 | * p if it is lower in priority than the | 1421 | * p if it is lower in priority than the |
1422 | * current task on the run queue | 1422 | * current task on the run queue |
1423 | */ | 1423 | */ |
1424 | if (p->prio < src_rq->curr->prio) | 1424 | if (p->prio < src_rq->curr->prio) |
1425 | goto skip; | 1425 | goto skip; |
1426 | 1426 | ||
1427 | ret = 1; | 1427 | ret = 1; |
1428 | 1428 | ||
1429 | deactivate_task(src_rq, p, 0); | 1429 | deactivate_task(src_rq, p, 0); |
1430 | set_task_cpu(p, this_cpu); | 1430 | set_task_cpu(p, this_cpu); |
1431 | activate_task(this_rq, p, 0); | 1431 | activate_task(this_rq, p, 0); |
1432 | /* | 1432 | /* |
1433 | * We continue with the search, just in | 1433 | * We continue with the search, just in |
1434 | * case there's an even higher prio task | 1434 | * case there's an even higher prio task |
1435 | * in another runqueue. (low likelyhood | 1435 | * in another runqueue. (low likelyhood |
1436 | * but possible) | 1436 | * but possible) |
1437 | */ | 1437 | */ |
1438 | } | 1438 | } |
1439 | skip: | 1439 | skip: |
1440 | double_unlock_balance(this_rq, src_rq); | 1440 | double_unlock_balance(this_rq, src_rq); |
1441 | } | 1441 | } |
1442 | 1442 | ||
1443 | return ret; | 1443 | return ret; |
1444 | } | 1444 | } |
1445 | 1445 | ||
1446 | static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) | 1446 | static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) |
1447 | { | 1447 | { |
1448 | /* Try to pull RT tasks here if we lower this rq's prio */ | 1448 | /* Try to pull RT tasks here if we lower this rq's prio */ |
1449 | if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) | 1449 | if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) |
1450 | pull_rt_task(rq); | 1450 | pull_rt_task(rq); |
1451 | } | 1451 | } |
1452 | 1452 | ||
1453 | /* | 1453 | /* |
1454 | * assumes rq->lock is held | 1454 | * assumes rq->lock is held |
1455 | */ | 1455 | */ |
1456 | static int needs_post_schedule_rt(struct rq *rq) | 1456 | static int needs_post_schedule_rt(struct rq *rq) |
1457 | { | 1457 | { |
1458 | return has_pushable_tasks(rq); | 1458 | return has_pushable_tasks(rq); |
1459 | } | 1459 | } |
1460 | 1460 | ||
1461 | static void post_schedule_rt(struct rq *rq) | 1461 | static void post_schedule_rt(struct rq *rq) |
1462 | { | 1462 | { |
1463 | /* | 1463 | /* |
1464 | * This is only called if needs_post_schedule_rt() indicates that | 1464 | * This is only called if needs_post_schedule_rt() indicates that |
1465 | * we need to push tasks away | 1465 | * we need to push tasks away |
1466 | */ | 1466 | */ |
1467 | spin_lock_irq(&rq->lock); | 1467 | spin_lock_irq(&rq->lock); |
1468 | push_rt_tasks(rq); | 1468 | push_rt_tasks(rq); |
1469 | spin_unlock_irq(&rq->lock); | 1469 | spin_unlock_irq(&rq->lock); |
1470 | } | 1470 | } |
1471 | 1471 | ||
1472 | /* | 1472 | /* |
1473 | * If we are not running and we are not going to reschedule soon, we should | 1473 | * If we are not running and we are not going to reschedule soon, we should |
1474 | * try to push tasks away now | 1474 | * try to push tasks away now |
1475 | */ | 1475 | */ |
1476 | static void task_wake_up_rt(struct rq *rq, struct task_struct *p) | 1476 | static void task_wake_up_rt(struct rq *rq, struct task_struct *p) |
1477 | { | 1477 | { |
1478 | if (!task_running(rq, p) && | 1478 | if (!task_running(rq, p) && |
1479 | !test_tsk_need_resched(rq->curr) && | 1479 | !test_tsk_need_resched(rq->curr) && |
1480 | has_pushable_tasks(rq) && | 1480 | has_pushable_tasks(rq) && |
1481 | p->rt.nr_cpus_allowed > 1) | 1481 | p->rt.nr_cpus_allowed > 1) |
1482 | push_rt_tasks(rq); | 1482 | push_rt_tasks(rq); |
1483 | } | 1483 | } |
1484 | 1484 | ||
1485 | static unsigned long | 1485 | static unsigned long |
1486 | load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1486 | load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1487 | unsigned long max_load_move, | 1487 | unsigned long max_load_move, |
1488 | struct sched_domain *sd, enum cpu_idle_type idle, | 1488 | struct sched_domain *sd, enum cpu_idle_type idle, |
1489 | int *all_pinned, int *this_best_prio) | 1489 | int *all_pinned, int *this_best_prio) |
1490 | { | 1490 | { |
1491 | /* don't touch RT tasks */ | 1491 | /* don't touch RT tasks */ |
1492 | return 0; | 1492 | return 0; |
1493 | } | 1493 | } |
1494 | 1494 | ||
1495 | static int | 1495 | static int |
1496 | move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1496 | move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1497 | struct sched_domain *sd, enum cpu_idle_type idle) | 1497 | struct sched_domain *sd, enum cpu_idle_type idle) |
1498 | { | 1498 | { |
1499 | /* don't touch RT tasks */ | 1499 | /* don't touch RT tasks */ |
1500 | return 0; | 1500 | return 0; |
1501 | } | 1501 | } |
1502 | 1502 | ||
1503 | static void set_cpus_allowed_rt(struct task_struct *p, | 1503 | static void set_cpus_allowed_rt(struct task_struct *p, |
1504 | const struct cpumask *new_mask) | 1504 | const struct cpumask *new_mask) |
1505 | { | 1505 | { |
1506 | int weight = cpumask_weight(new_mask); | 1506 | int weight = cpumask_weight(new_mask); |
1507 | 1507 | ||
1508 | BUG_ON(!rt_task(p)); | 1508 | BUG_ON(!rt_task(p)); |
1509 | 1509 | ||
1510 | /* | 1510 | /* |
1511 | * Update the migration status of the RQ if we have an RT task | 1511 | * Update the migration status of the RQ if we have an RT task |
1512 | * which is running AND changing its weight value. | 1512 | * which is running AND changing its weight value. |
1513 | */ | 1513 | */ |
1514 | if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { | 1514 | if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { |
1515 | struct rq *rq = task_rq(p); | 1515 | struct rq *rq = task_rq(p); |
1516 | 1516 | ||
1517 | if (!task_current(rq, p)) { | 1517 | if (!task_current(rq, p)) { |
1518 | /* | 1518 | /* |
1519 | * Make sure we dequeue this task from the pushable list | 1519 | * Make sure we dequeue this task from the pushable list |
1520 | * before going further. It will either remain off of | 1520 | * before going further. It will either remain off of |
1521 | * the list because we are no longer pushable, or it | 1521 | * the list because we are no longer pushable, or it |
1522 | * will be requeued. | 1522 | * will be requeued. |
1523 | */ | 1523 | */ |
1524 | if (p->rt.nr_cpus_allowed > 1) | 1524 | if (p->rt.nr_cpus_allowed > 1) |
1525 | dequeue_pushable_task(rq, p); | 1525 | dequeue_pushable_task(rq, p); |
1526 | 1526 | ||
1527 | /* | 1527 | /* |
1528 | * Requeue if our weight is changing and still > 1 | 1528 | * Requeue if our weight is changing and still > 1 |
1529 | */ | 1529 | */ |
1530 | if (weight > 1) | 1530 | if (weight > 1) |
1531 | enqueue_pushable_task(rq, p); | 1531 | enqueue_pushable_task(rq, p); |
1532 | 1532 | ||
1533 | } | 1533 | } |
1534 | 1534 | ||
1535 | if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { | 1535 | if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { |
1536 | rq->rt.rt_nr_migratory++; | 1536 | rq->rt.rt_nr_migratory++; |
1537 | } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) { | 1537 | } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) { |
1538 | BUG_ON(!rq->rt.rt_nr_migratory); | 1538 | BUG_ON(!rq->rt.rt_nr_migratory); |
1539 | rq->rt.rt_nr_migratory--; | 1539 | rq->rt.rt_nr_migratory--; |
1540 | } | 1540 | } |
1541 | 1541 | ||
1542 | update_rt_migration(&rq->rt); | 1542 | update_rt_migration(&rq->rt); |
1543 | } | 1543 | } |
1544 | 1544 | ||
1545 | cpumask_copy(&p->cpus_allowed, new_mask); | 1545 | cpumask_copy(&p->cpus_allowed, new_mask); |
1546 | p->rt.nr_cpus_allowed = weight; | 1546 | p->rt.nr_cpus_allowed = weight; |
1547 | } | 1547 | } |
1548 | 1548 | ||
1549 | /* Assumes rq->lock is held */ | 1549 | /* Assumes rq->lock is held */ |
1550 | static void rq_online_rt(struct rq *rq) | 1550 | static void rq_online_rt(struct rq *rq) |
1551 | { | 1551 | { |
1552 | if (rq->rt.overloaded) | 1552 | if (rq->rt.overloaded) |
1553 | rt_set_overload(rq); | 1553 | rt_set_overload(rq); |
1554 | 1554 | ||
1555 | __enable_runtime(rq); | 1555 | __enable_runtime(rq); |
1556 | 1556 | ||
1557 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); | 1557 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); |
1558 | } | 1558 | } |
1559 | 1559 | ||
1560 | /* Assumes rq->lock is held */ | 1560 | /* Assumes rq->lock is held */ |
1561 | static void rq_offline_rt(struct rq *rq) | 1561 | static void rq_offline_rt(struct rq *rq) |
1562 | { | 1562 | { |
1563 | if (rq->rt.overloaded) | 1563 | if (rq->rt.overloaded) |
1564 | rt_clear_overload(rq); | 1564 | rt_clear_overload(rq); |
1565 | 1565 | ||
1566 | __disable_runtime(rq); | 1566 | __disable_runtime(rq); |
1567 | 1567 | ||
1568 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); | 1568 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); |
1569 | } | 1569 | } |
1570 | 1570 | ||
1571 | /* | 1571 | /* |
1572 | * When switch from the rt queue, we bring ourselves to a position | 1572 | * When switch from the rt queue, we bring ourselves to a position |
1573 | * that we might want to pull RT tasks from other runqueues. | 1573 | * that we might want to pull RT tasks from other runqueues. |
1574 | */ | 1574 | */ |
1575 | static void switched_from_rt(struct rq *rq, struct task_struct *p, | 1575 | static void switched_from_rt(struct rq *rq, struct task_struct *p, |
1576 | int running) | 1576 | int running) |
1577 | { | 1577 | { |
1578 | /* | 1578 | /* |
1579 | * If there are other RT tasks then we will reschedule | 1579 | * If there are other RT tasks then we will reschedule |
1580 | * and the scheduling of the other RT tasks will handle | 1580 | * and the scheduling of the other RT tasks will handle |
1581 | * the balancing. But if we are the last RT task | 1581 | * the balancing. But if we are the last RT task |
1582 | * we may need to handle the pulling of RT tasks | 1582 | * we may need to handle the pulling of RT tasks |
1583 | * now. | 1583 | * now. |
1584 | */ | 1584 | */ |
1585 | if (!rq->rt.rt_nr_running) | 1585 | if (!rq->rt.rt_nr_running) |
1586 | pull_rt_task(rq); | 1586 | pull_rt_task(rq); |
1587 | } | 1587 | } |
1588 | 1588 | ||
1589 | static inline void init_sched_rt_class(void) | 1589 | static inline void init_sched_rt_class(void) |
1590 | { | 1590 | { |
1591 | unsigned int i; | 1591 | unsigned int i; |
1592 | 1592 | ||
1593 | for_each_possible_cpu(i) | 1593 | for_each_possible_cpu(i) |
1594 | alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), | 1594 | zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), |
1595 | GFP_KERNEL, cpu_to_node(i)); | 1595 | GFP_KERNEL, cpu_to_node(i)); |
1596 | } | 1596 | } |
1597 | #endif /* CONFIG_SMP */ | 1597 | #endif /* CONFIG_SMP */ |
1598 | 1598 | ||
1599 | /* | 1599 | /* |
1600 | * When switching a task to RT, we may overload the runqueue | 1600 | * When switching a task to RT, we may overload the runqueue |
1601 | * with RT tasks. In this case we try to push them off to | 1601 | * with RT tasks. In this case we try to push them off to |
1602 | * other runqueues. | 1602 | * other runqueues. |
1603 | */ | 1603 | */ |
1604 | static void switched_to_rt(struct rq *rq, struct task_struct *p, | 1604 | static void switched_to_rt(struct rq *rq, struct task_struct *p, |
1605 | int running) | 1605 | int running) |
1606 | { | 1606 | { |
1607 | int check_resched = 1; | 1607 | int check_resched = 1; |
1608 | 1608 | ||
1609 | /* | 1609 | /* |
1610 | * If we are already running, then there's nothing | 1610 | * If we are already running, then there's nothing |
1611 | * that needs to be done. But if we are not running | 1611 | * that needs to be done. But if we are not running |
1612 | * we may need to preempt the current running task. | 1612 | * we may need to preempt the current running task. |
1613 | * If that current running task is also an RT task | 1613 | * If that current running task is also an RT task |
1614 | * then see if we can move to another run queue. | 1614 | * then see if we can move to another run queue. |
1615 | */ | 1615 | */ |
1616 | if (!running) { | 1616 | if (!running) { |
1617 | #ifdef CONFIG_SMP | 1617 | #ifdef CONFIG_SMP |
1618 | if (rq->rt.overloaded && push_rt_task(rq) && | 1618 | if (rq->rt.overloaded && push_rt_task(rq) && |
1619 | /* Don't resched if we changed runqueues */ | 1619 | /* Don't resched if we changed runqueues */ |
1620 | rq != task_rq(p)) | 1620 | rq != task_rq(p)) |
1621 | check_resched = 0; | 1621 | check_resched = 0; |
1622 | #endif /* CONFIG_SMP */ | 1622 | #endif /* CONFIG_SMP */ |
1623 | if (check_resched && p->prio < rq->curr->prio) | 1623 | if (check_resched && p->prio < rq->curr->prio) |
1624 | resched_task(rq->curr); | 1624 | resched_task(rq->curr); |
1625 | } | 1625 | } |
1626 | } | 1626 | } |
1627 | 1627 | ||
1628 | /* | 1628 | /* |
1629 | * Priority of the task has changed. This may cause | 1629 | * Priority of the task has changed. This may cause |
1630 | * us to initiate a push or pull. | 1630 | * us to initiate a push or pull. |
1631 | */ | 1631 | */ |
1632 | static void prio_changed_rt(struct rq *rq, struct task_struct *p, | 1632 | static void prio_changed_rt(struct rq *rq, struct task_struct *p, |
1633 | int oldprio, int running) | 1633 | int oldprio, int running) |
1634 | { | 1634 | { |
1635 | if (running) { | 1635 | if (running) { |
1636 | #ifdef CONFIG_SMP | 1636 | #ifdef CONFIG_SMP |
1637 | /* | 1637 | /* |
1638 | * If our priority decreases while running, we | 1638 | * If our priority decreases while running, we |
1639 | * may need to pull tasks to this runqueue. | 1639 | * may need to pull tasks to this runqueue. |
1640 | */ | 1640 | */ |
1641 | if (oldprio < p->prio) | 1641 | if (oldprio < p->prio) |
1642 | pull_rt_task(rq); | 1642 | pull_rt_task(rq); |
1643 | /* | 1643 | /* |
1644 | * If there's a higher priority task waiting to run | 1644 | * If there's a higher priority task waiting to run |
1645 | * then reschedule. Note, the above pull_rt_task | 1645 | * then reschedule. Note, the above pull_rt_task |
1646 | * can release the rq lock and p could migrate. | 1646 | * can release the rq lock and p could migrate. |
1647 | * Only reschedule if p is still on the same runqueue. | 1647 | * Only reschedule if p is still on the same runqueue. |
1648 | */ | 1648 | */ |
1649 | if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) | 1649 | if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) |
1650 | resched_task(p); | 1650 | resched_task(p); |
1651 | #else | 1651 | #else |
1652 | /* For UP simply resched on drop of prio */ | 1652 | /* For UP simply resched on drop of prio */ |
1653 | if (oldprio < p->prio) | 1653 | if (oldprio < p->prio) |
1654 | resched_task(p); | 1654 | resched_task(p); |
1655 | #endif /* CONFIG_SMP */ | 1655 | #endif /* CONFIG_SMP */ |
1656 | } else { | 1656 | } else { |
1657 | /* | 1657 | /* |
1658 | * This task is not running, but if it is | 1658 | * This task is not running, but if it is |
1659 | * greater than the current running task | 1659 | * greater than the current running task |
1660 | * then reschedule. | 1660 | * then reschedule. |
1661 | */ | 1661 | */ |
1662 | if (p->prio < rq->curr->prio) | 1662 | if (p->prio < rq->curr->prio) |
1663 | resched_task(rq->curr); | 1663 | resched_task(rq->curr); |
1664 | } | 1664 | } |
1665 | } | 1665 | } |
1666 | 1666 | ||
1667 | static void watchdog(struct rq *rq, struct task_struct *p) | 1667 | static void watchdog(struct rq *rq, struct task_struct *p) |
1668 | { | 1668 | { |
1669 | unsigned long soft, hard; | 1669 | unsigned long soft, hard; |
1670 | 1670 | ||
1671 | if (!p->signal) | 1671 | if (!p->signal) |
1672 | return; | 1672 | return; |
1673 | 1673 | ||
1674 | soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur; | 1674 | soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur; |
1675 | hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max; | 1675 | hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max; |
1676 | 1676 | ||
1677 | if (soft != RLIM_INFINITY) { | 1677 | if (soft != RLIM_INFINITY) { |
1678 | unsigned long next; | 1678 | unsigned long next; |
1679 | 1679 | ||
1680 | p->rt.timeout++; | 1680 | p->rt.timeout++; |
1681 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); | 1681 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); |
1682 | if (p->rt.timeout > next) | 1682 | if (p->rt.timeout > next) |
1683 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; | 1683 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; |
1684 | } | 1684 | } |
1685 | } | 1685 | } |
1686 | 1686 | ||
1687 | static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | 1687 | static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) |
1688 | { | 1688 | { |
1689 | update_curr_rt(rq); | 1689 | update_curr_rt(rq); |
1690 | 1690 | ||
1691 | watchdog(rq, p); | 1691 | watchdog(rq, p); |
1692 | 1692 | ||
1693 | /* | 1693 | /* |
1694 | * RR tasks need a special form of timeslice management. | 1694 | * RR tasks need a special form of timeslice management. |
1695 | * FIFO tasks have no timeslices. | 1695 | * FIFO tasks have no timeslices. |
1696 | */ | 1696 | */ |
1697 | if (p->policy != SCHED_RR) | 1697 | if (p->policy != SCHED_RR) |
1698 | return; | 1698 | return; |
1699 | 1699 | ||
1700 | if (--p->rt.time_slice) | 1700 | if (--p->rt.time_slice) |
1701 | return; | 1701 | return; |
1702 | 1702 | ||
1703 | p->rt.time_slice = DEF_TIMESLICE; | 1703 | p->rt.time_slice = DEF_TIMESLICE; |
1704 | 1704 | ||
1705 | /* | 1705 | /* |
1706 | * Requeue to the end of queue if we are not the only element | 1706 | * Requeue to the end of queue if we are not the only element |
1707 | * on the queue: | 1707 | * on the queue: |
1708 | */ | 1708 | */ |
1709 | if (p->rt.run_list.prev != p->rt.run_list.next) { | 1709 | if (p->rt.run_list.prev != p->rt.run_list.next) { |
1710 | requeue_task_rt(rq, p, 0); | 1710 | requeue_task_rt(rq, p, 0); |
1711 | set_tsk_need_resched(p); | 1711 | set_tsk_need_resched(p); |
1712 | } | 1712 | } |
1713 | } | 1713 | } |
1714 | 1714 | ||
1715 | static void set_curr_task_rt(struct rq *rq) | 1715 | static void set_curr_task_rt(struct rq *rq) |
1716 | { | 1716 | { |
1717 | struct task_struct *p = rq->curr; | 1717 | struct task_struct *p = rq->curr; |
1718 | 1718 | ||
1719 | p->se.exec_start = rq->clock; | 1719 | p->se.exec_start = rq->clock; |
1720 | 1720 | ||
1721 | /* The running task is never eligible for pushing */ | 1721 | /* The running task is never eligible for pushing */ |
1722 | dequeue_pushable_task(rq, p); | 1722 | dequeue_pushable_task(rq, p); |
1723 | } | 1723 | } |
1724 | 1724 | ||
1725 | static const struct sched_class rt_sched_class = { | 1725 | static const struct sched_class rt_sched_class = { |
1726 | .next = &fair_sched_class, | 1726 | .next = &fair_sched_class, |
1727 | .enqueue_task = enqueue_task_rt, | 1727 | .enqueue_task = enqueue_task_rt, |
1728 | .dequeue_task = dequeue_task_rt, | 1728 | .dequeue_task = dequeue_task_rt, |
1729 | .yield_task = yield_task_rt, | 1729 | .yield_task = yield_task_rt, |
1730 | 1730 | ||
1731 | .check_preempt_curr = check_preempt_curr_rt, | 1731 | .check_preempt_curr = check_preempt_curr_rt, |
1732 | 1732 | ||
1733 | .pick_next_task = pick_next_task_rt, | 1733 | .pick_next_task = pick_next_task_rt, |
1734 | .put_prev_task = put_prev_task_rt, | 1734 | .put_prev_task = put_prev_task_rt, |
1735 | 1735 | ||
1736 | #ifdef CONFIG_SMP | 1736 | #ifdef CONFIG_SMP |
1737 | .select_task_rq = select_task_rq_rt, | 1737 | .select_task_rq = select_task_rq_rt, |
1738 | 1738 | ||
1739 | .load_balance = load_balance_rt, | 1739 | .load_balance = load_balance_rt, |
1740 | .move_one_task = move_one_task_rt, | 1740 | .move_one_task = move_one_task_rt, |
1741 | .set_cpus_allowed = set_cpus_allowed_rt, | 1741 | .set_cpus_allowed = set_cpus_allowed_rt, |
1742 | .rq_online = rq_online_rt, | 1742 | .rq_online = rq_online_rt, |
1743 | .rq_offline = rq_offline_rt, | 1743 | .rq_offline = rq_offline_rt, |
1744 | .pre_schedule = pre_schedule_rt, | 1744 | .pre_schedule = pre_schedule_rt, |
1745 | .needs_post_schedule = needs_post_schedule_rt, | 1745 | .needs_post_schedule = needs_post_schedule_rt, |
1746 | .post_schedule = post_schedule_rt, | 1746 | .post_schedule = post_schedule_rt, |
1747 | .task_wake_up = task_wake_up_rt, | 1747 | .task_wake_up = task_wake_up_rt, |
1748 | .switched_from = switched_from_rt, | 1748 | .switched_from = switched_from_rt, |
1749 | #endif | 1749 | #endif |
1750 | 1750 | ||
1751 | .set_curr_task = set_curr_task_rt, | 1751 | .set_curr_task = set_curr_task_rt, |
1752 | .task_tick = task_tick_rt, | 1752 | .task_tick = task_tick_rt, |
1753 | 1753 | ||
1754 | .prio_changed = prio_changed_rt, | 1754 | .prio_changed = prio_changed_rt, |
1755 | .switched_to = switched_to_rt, | 1755 | .switched_to = switched_to_rt, |
1756 | }; | 1756 | }; |
1757 | 1757 | ||
1758 | #ifdef CONFIG_SCHED_DEBUG | 1758 | #ifdef CONFIG_SCHED_DEBUG |
1759 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); | 1759 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); |
1760 | 1760 | ||
1761 | static void print_rt_stats(struct seq_file *m, int cpu) | 1761 | static void print_rt_stats(struct seq_file *m, int cpu) |
1762 | { | 1762 | { |
1763 | struct rt_rq *rt_rq; | 1763 | struct rt_rq *rt_rq; |
1764 | 1764 | ||
1765 | rcu_read_lock(); | 1765 | rcu_read_lock(); |
1766 | for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) | 1766 | for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) |
1767 | print_rt_rq(m, cpu, rt_rq); | 1767 | print_rt_rq(m, cpu, rt_rq); |
1768 | rcu_read_unlock(); | 1768 | rcu_read_unlock(); |
1769 | } | 1769 | } |
1770 | #endif /* CONFIG_SCHED_DEBUG */ | 1770 | #endif /* CONFIG_SCHED_DEBUG */ |
1771 | 1771 | ||
1772 | 1772 |
kernel/smp.c
1 | /* | 1 | /* |
2 | * Generic helpers for smp ipi calls | 2 | * Generic helpers for smp ipi calls |
3 | * | 3 | * |
4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 | 4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 |
5 | */ | 5 | */ |
6 | #include <linux/rcupdate.h> | 6 | #include <linux/rcupdate.h> |
7 | #include <linux/rculist.h> | 7 | #include <linux/rculist.h> |
8 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/cpu.h> | 13 | #include <linux/cpu.h> |
14 | 14 | ||
15 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); | 15 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); |
16 | 16 | ||
17 | static struct { | 17 | static struct { |
18 | struct list_head queue; | 18 | struct list_head queue; |
19 | spinlock_t lock; | 19 | spinlock_t lock; |
20 | } call_function __cacheline_aligned_in_smp = | 20 | } call_function __cacheline_aligned_in_smp = |
21 | { | 21 | { |
22 | .queue = LIST_HEAD_INIT(call_function.queue), | 22 | .queue = LIST_HEAD_INIT(call_function.queue), |
23 | .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), | 23 | .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), |
24 | }; | 24 | }; |
25 | 25 | ||
26 | enum { | 26 | enum { |
27 | CSD_FLAG_LOCK = 0x01, | 27 | CSD_FLAG_LOCK = 0x01, |
28 | }; | 28 | }; |
29 | 29 | ||
30 | struct call_function_data { | 30 | struct call_function_data { |
31 | struct call_single_data csd; | 31 | struct call_single_data csd; |
32 | spinlock_t lock; | 32 | spinlock_t lock; |
33 | unsigned int refs; | 33 | unsigned int refs; |
34 | cpumask_var_t cpumask; | 34 | cpumask_var_t cpumask; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | struct call_single_queue { | 37 | struct call_single_queue { |
38 | struct list_head list; | 38 | struct list_head list; |
39 | spinlock_t lock; | 39 | spinlock_t lock; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { | 42 | static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { |
43 | .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), | 43 | .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), |
44 | }; | 44 | }; |
45 | 45 | ||
46 | static int | 46 | static int |
47 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | 47 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) |
48 | { | 48 | { |
49 | long cpu = (long)hcpu; | 49 | long cpu = (long)hcpu; |
50 | struct call_function_data *cfd = &per_cpu(cfd_data, cpu); | 50 | struct call_function_data *cfd = &per_cpu(cfd_data, cpu); |
51 | 51 | ||
52 | switch (action) { | 52 | switch (action) { |
53 | case CPU_UP_PREPARE: | 53 | case CPU_UP_PREPARE: |
54 | case CPU_UP_PREPARE_FROZEN: | 54 | case CPU_UP_PREPARE_FROZEN: |
55 | if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, | 55 | if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, |
56 | cpu_to_node(cpu))) | 56 | cpu_to_node(cpu))) |
57 | return NOTIFY_BAD; | 57 | return NOTIFY_BAD; |
58 | break; | 58 | break; |
59 | 59 | ||
60 | #ifdef CONFIG_CPU_HOTPLUG | 60 | #ifdef CONFIG_CPU_HOTPLUG |
61 | case CPU_UP_CANCELED: | 61 | case CPU_UP_CANCELED: |
62 | case CPU_UP_CANCELED_FROZEN: | 62 | case CPU_UP_CANCELED_FROZEN: |
63 | 63 | ||
64 | case CPU_DEAD: | 64 | case CPU_DEAD: |
65 | case CPU_DEAD_FROZEN: | 65 | case CPU_DEAD_FROZEN: |
66 | free_cpumask_var(cfd->cpumask); | 66 | free_cpumask_var(cfd->cpumask); |
67 | break; | 67 | break; |
68 | #endif | 68 | #endif |
69 | }; | 69 | }; |
70 | 70 | ||
71 | return NOTIFY_OK; | 71 | return NOTIFY_OK; |
72 | } | 72 | } |
73 | 73 | ||
74 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | 74 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { |
75 | .notifier_call = hotplug_cfd, | 75 | .notifier_call = hotplug_cfd, |
76 | }; | 76 | }; |
77 | 77 | ||
78 | static int __cpuinit init_call_single_data(void) | 78 | static int __cpuinit init_call_single_data(void) |
79 | { | 79 | { |
80 | void *cpu = (void *)(long)smp_processor_id(); | 80 | void *cpu = (void *)(long)smp_processor_id(); |
81 | int i; | 81 | int i; |
82 | 82 | ||
83 | for_each_possible_cpu(i) { | 83 | for_each_possible_cpu(i) { |
84 | struct call_single_queue *q = &per_cpu(call_single_queue, i); | 84 | struct call_single_queue *q = &per_cpu(call_single_queue, i); |
85 | 85 | ||
86 | spin_lock_init(&q->lock); | 86 | spin_lock_init(&q->lock); |
87 | INIT_LIST_HEAD(&q->list); | 87 | INIT_LIST_HEAD(&q->list); |
88 | } | 88 | } |
89 | 89 | ||
90 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); | 90 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); |
91 | register_cpu_notifier(&hotplug_cfd_notifier); | 91 | register_cpu_notifier(&hotplug_cfd_notifier); |
92 | 92 | ||
93 | return 0; | 93 | return 0; |
94 | } | 94 | } |
95 | early_initcall(init_call_single_data); | 95 | early_initcall(init_call_single_data); |
96 | 96 | ||
97 | /* | 97 | /* |
98 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources | 98 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources |
99 | * | 99 | * |
100 | * For non-synchronous ipi calls the csd can still be in use by the | 100 | * For non-synchronous ipi calls the csd can still be in use by the |
101 | * previous function call. For multi-cpu calls its even more interesting | 101 | * previous function call. For multi-cpu calls its even more interesting |
102 | * as we'll have to ensure no other cpu is observing our csd. | 102 | * as we'll have to ensure no other cpu is observing our csd. |
103 | */ | 103 | */ |
104 | static void csd_lock_wait(struct call_single_data *data) | 104 | static void csd_lock_wait(struct call_single_data *data) |
105 | { | 105 | { |
106 | while (data->flags & CSD_FLAG_LOCK) | 106 | while (data->flags & CSD_FLAG_LOCK) |
107 | cpu_relax(); | 107 | cpu_relax(); |
108 | } | 108 | } |
109 | 109 | ||
110 | static void csd_lock(struct call_single_data *data) | 110 | static void csd_lock(struct call_single_data *data) |
111 | { | 111 | { |
112 | csd_lock_wait(data); | 112 | csd_lock_wait(data); |
113 | data->flags = CSD_FLAG_LOCK; | 113 | data->flags = CSD_FLAG_LOCK; |
114 | 114 | ||
115 | /* | 115 | /* |
116 | * prevent CPU from reordering the above assignment | 116 | * prevent CPU from reordering the above assignment |
117 | * to ->flags with any subsequent assignments to other | 117 | * to ->flags with any subsequent assignments to other |
118 | * fields of the specified call_single_data structure: | 118 | * fields of the specified call_single_data structure: |
119 | */ | 119 | */ |
120 | smp_mb(); | 120 | smp_mb(); |
121 | } | 121 | } |
122 | 122 | ||
123 | static void csd_unlock(struct call_single_data *data) | 123 | static void csd_unlock(struct call_single_data *data) |
124 | { | 124 | { |
125 | WARN_ON(!(data->flags & CSD_FLAG_LOCK)); | 125 | WARN_ON(!(data->flags & CSD_FLAG_LOCK)); |
126 | 126 | ||
127 | /* | 127 | /* |
128 | * ensure we're all done before releasing data: | 128 | * ensure we're all done before releasing data: |
129 | */ | 129 | */ |
130 | smp_mb(); | 130 | smp_mb(); |
131 | 131 | ||
132 | data->flags &= ~CSD_FLAG_LOCK; | 132 | data->flags &= ~CSD_FLAG_LOCK; |
133 | } | 133 | } |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * Insert a previously allocated call_single_data element | 136 | * Insert a previously allocated call_single_data element |
137 | * for execution on the given CPU. data must already have | 137 | * for execution on the given CPU. data must already have |
138 | * ->func, ->info, and ->flags set. | 138 | * ->func, ->info, and ->flags set. |
139 | */ | 139 | */ |
140 | static | 140 | static |
141 | void generic_exec_single(int cpu, struct call_single_data *data, int wait) | 141 | void generic_exec_single(int cpu, struct call_single_data *data, int wait) |
142 | { | 142 | { |
143 | struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); | 143 | struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); |
144 | unsigned long flags; | 144 | unsigned long flags; |
145 | int ipi; | 145 | int ipi; |
146 | 146 | ||
147 | spin_lock_irqsave(&dst->lock, flags); | 147 | spin_lock_irqsave(&dst->lock, flags); |
148 | ipi = list_empty(&dst->list); | 148 | ipi = list_empty(&dst->list); |
149 | list_add_tail(&data->list, &dst->list); | 149 | list_add_tail(&data->list, &dst->list); |
150 | spin_unlock_irqrestore(&dst->lock, flags); | 150 | spin_unlock_irqrestore(&dst->lock, flags); |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * The list addition should be visible before sending the IPI | 153 | * The list addition should be visible before sending the IPI |
154 | * handler locks the list to pull the entry off it because of | 154 | * handler locks the list to pull the entry off it because of |
155 | * normal cache coherency rules implied by spinlocks. | 155 | * normal cache coherency rules implied by spinlocks. |
156 | * | 156 | * |
157 | * If IPIs can go out of order to the cache coherency protocol | 157 | * If IPIs can go out of order to the cache coherency protocol |
158 | * in an architecture, sufficient synchronisation should be added | 158 | * in an architecture, sufficient synchronisation should be added |
159 | * to arch code to make it appear to obey cache coherency WRT | 159 | * to arch code to make it appear to obey cache coherency WRT |
160 | * locking and barrier primitives. Generic code isn't really | 160 | * locking and barrier primitives. Generic code isn't really |
161 | * equipped to do the right thing... | 161 | * equipped to do the right thing... |
162 | */ | 162 | */ |
163 | if (ipi) | 163 | if (ipi) |
164 | arch_send_call_function_single_ipi(cpu); | 164 | arch_send_call_function_single_ipi(cpu); |
165 | 165 | ||
166 | if (wait) | 166 | if (wait) |
167 | csd_lock_wait(data); | 167 | csd_lock_wait(data); |
168 | } | 168 | } |
169 | 169 | ||
170 | /* | 170 | /* |
171 | * Invoked by arch to handle an IPI for call function. Must be called with | 171 | * Invoked by arch to handle an IPI for call function. Must be called with |
172 | * interrupts disabled. | 172 | * interrupts disabled. |
173 | */ | 173 | */ |
174 | void generic_smp_call_function_interrupt(void) | 174 | void generic_smp_call_function_interrupt(void) |
175 | { | 175 | { |
176 | struct call_function_data *data; | 176 | struct call_function_data *data; |
177 | int cpu = get_cpu(); | 177 | int cpu = get_cpu(); |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * Ensure entry is visible on call_function_queue after we have | 180 | * Ensure entry is visible on call_function_queue after we have |
181 | * entered the IPI. See comment in smp_call_function_many. | 181 | * entered the IPI. See comment in smp_call_function_many. |
182 | * If we don't have this, then we may miss an entry on the list | 182 | * If we don't have this, then we may miss an entry on the list |
183 | * and never get another IPI to process it. | 183 | * and never get another IPI to process it. |
184 | */ | 184 | */ |
185 | smp_mb(); | 185 | smp_mb(); |
186 | 186 | ||
187 | /* | 187 | /* |
188 | * It's ok to use list_for_each_rcu() here even though we may | 188 | * It's ok to use list_for_each_rcu() here even though we may |
189 | * delete 'pos', since list_del_rcu() doesn't clear ->next | 189 | * delete 'pos', since list_del_rcu() doesn't clear ->next |
190 | */ | 190 | */ |
191 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | 191 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
192 | int refs; | 192 | int refs; |
193 | 193 | ||
194 | spin_lock(&data->lock); | 194 | spin_lock(&data->lock); |
195 | if (!cpumask_test_cpu(cpu, data->cpumask)) { | 195 | if (!cpumask_test_cpu(cpu, data->cpumask)) { |
196 | spin_unlock(&data->lock); | 196 | spin_unlock(&data->lock); |
197 | continue; | 197 | continue; |
198 | } | 198 | } |
199 | cpumask_clear_cpu(cpu, data->cpumask); | 199 | cpumask_clear_cpu(cpu, data->cpumask); |
200 | spin_unlock(&data->lock); | 200 | spin_unlock(&data->lock); |
201 | 201 | ||
202 | data->csd.func(data->csd.info); | 202 | data->csd.func(data->csd.info); |
203 | 203 | ||
204 | spin_lock(&data->lock); | 204 | spin_lock(&data->lock); |
205 | WARN_ON(data->refs == 0); | 205 | WARN_ON(data->refs == 0); |
206 | refs = --data->refs; | 206 | refs = --data->refs; |
207 | if (!refs) { | 207 | if (!refs) { |
208 | spin_lock(&call_function.lock); | 208 | spin_lock(&call_function.lock); |
209 | list_del_rcu(&data->csd.list); | 209 | list_del_rcu(&data->csd.list); |
210 | spin_unlock(&call_function.lock); | 210 | spin_unlock(&call_function.lock); |
211 | } | 211 | } |
212 | spin_unlock(&data->lock); | 212 | spin_unlock(&data->lock); |
213 | 213 | ||
214 | if (refs) | 214 | if (refs) |
215 | continue; | 215 | continue; |
216 | 216 | ||
217 | csd_unlock(&data->csd); | 217 | csd_unlock(&data->csd); |
218 | } | 218 | } |
219 | 219 | ||
220 | put_cpu(); | 220 | put_cpu(); |
221 | } | 221 | } |
222 | 222 | ||
223 | /* | 223 | /* |
224 | * Invoked by arch to handle an IPI for call function single. Must be | 224 | * Invoked by arch to handle an IPI for call function single. Must be |
225 | * called from the arch with interrupts disabled. | 225 | * called from the arch with interrupts disabled. |
226 | */ | 226 | */ |
227 | void generic_smp_call_function_single_interrupt(void) | 227 | void generic_smp_call_function_single_interrupt(void) |
228 | { | 228 | { |
229 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); | 229 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); |
230 | unsigned int data_flags; | 230 | unsigned int data_flags; |
231 | LIST_HEAD(list); | 231 | LIST_HEAD(list); |
232 | 232 | ||
233 | spin_lock(&q->lock); | 233 | spin_lock(&q->lock); |
234 | list_replace_init(&q->list, &list); | 234 | list_replace_init(&q->list, &list); |
235 | spin_unlock(&q->lock); | 235 | spin_unlock(&q->lock); |
236 | 236 | ||
237 | while (!list_empty(&list)) { | 237 | while (!list_empty(&list)) { |
238 | struct call_single_data *data; | 238 | struct call_single_data *data; |
239 | 239 | ||
240 | data = list_entry(list.next, struct call_single_data, list); | 240 | data = list_entry(list.next, struct call_single_data, list); |
241 | list_del(&data->list); | 241 | list_del(&data->list); |
242 | 242 | ||
243 | /* | 243 | /* |
244 | * 'data' can be invalid after this call if flags == 0 | 244 | * 'data' can be invalid after this call if flags == 0 |
245 | * (when called through generic_exec_single()), | 245 | * (when called through generic_exec_single()), |
246 | * so save them away before making the call: | 246 | * so save them away before making the call: |
247 | */ | 247 | */ |
248 | data_flags = data->flags; | 248 | data_flags = data->flags; |
249 | 249 | ||
250 | data->func(data->info); | 250 | data->func(data->info); |
251 | 251 | ||
252 | /* | 252 | /* |
253 | * Unlocked CSDs are valid through generic_exec_single(): | 253 | * Unlocked CSDs are valid through generic_exec_single(): |
254 | */ | 254 | */ |
255 | if (data_flags & CSD_FLAG_LOCK) | 255 | if (data_flags & CSD_FLAG_LOCK) |
256 | csd_unlock(data); | 256 | csd_unlock(data); |
257 | } | 257 | } |
258 | } | 258 | } |
259 | 259 | ||
260 | static DEFINE_PER_CPU(struct call_single_data, csd_data); | 260 | static DEFINE_PER_CPU(struct call_single_data, csd_data); |
261 | 261 | ||
262 | /* | 262 | /* |
263 | * smp_call_function_single - Run a function on a specific CPU | 263 | * smp_call_function_single - Run a function on a specific CPU |
264 | * @func: The function to run. This must be fast and non-blocking. | 264 | * @func: The function to run. This must be fast and non-blocking. |
265 | * @info: An arbitrary pointer to pass to the function. | 265 | * @info: An arbitrary pointer to pass to the function. |
266 | * @wait: If true, wait until function has completed on other CPUs. | 266 | * @wait: If true, wait until function has completed on other CPUs. |
267 | * | 267 | * |
268 | * Returns 0 on success, else a negative status code. Note that @wait | 268 | * Returns 0 on success, else a negative status code. Note that @wait |
269 | * will be implicitly turned on in case of allocation failures, since | 269 | * will be implicitly turned on in case of allocation failures, since |
270 | * we fall back to on-stack allocation. | 270 | * we fall back to on-stack allocation. |
271 | */ | 271 | */ |
272 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 272 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
273 | int wait) | 273 | int wait) |
274 | { | 274 | { |
275 | struct call_single_data d = { | 275 | struct call_single_data d = { |
276 | .flags = 0, | 276 | .flags = 0, |
277 | }; | 277 | }; |
278 | unsigned long flags; | 278 | unsigned long flags; |
279 | int this_cpu; | 279 | int this_cpu; |
280 | int err = 0; | 280 | int err = 0; |
281 | 281 | ||
282 | /* | 282 | /* |
283 | * prevent preemption and reschedule on another processor, | 283 | * prevent preemption and reschedule on another processor, |
284 | * as well as CPU removal | 284 | * as well as CPU removal |
285 | */ | 285 | */ |
286 | this_cpu = get_cpu(); | 286 | this_cpu = get_cpu(); |
287 | 287 | ||
288 | /* Can deadlock when called with interrupts disabled */ | 288 | /* Can deadlock when called with interrupts disabled */ |
289 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); | 289 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); |
290 | 290 | ||
291 | if (cpu == this_cpu) { | 291 | if (cpu == this_cpu) { |
292 | local_irq_save(flags); | 292 | local_irq_save(flags); |
293 | func(info); | 293 | func(info); |
294 | local_irq_restore(flags); | 294 | local_irq_restore(flags); |
295 | } else { | 295 | } else { |
296 | if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { | 296 | if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { |
297 | struct call_single_data *data = &d; | 297 | struct call_single_data *data = &d; |
298 | 298 | ||
299 | if (!wait) | 299 | if (!wait) |
300 | data = &__get_cpu_var(csd_data); | 300 | data = &__get_cpu_var(csd_data); |
301 | 301 | ||
302 | csd_lock(data); | 302 | csd_lock(data); |
303 | 303 | ||
304 | data->func = func; | 304 | data->func = func; |
305 | data->info = info; | 305 | data->info = info; |
306 | generic_exec_single(cpu, data, wait); | 306 | generic_exec_single(cpu, data, wait); |
307 | } else { | 307 | } else { |
308 | err = -ENXIO; /* CPU not online */ | 308 | err = -ENXIO; /* CPU not online */ |
309 | } | 309 | } |
310 | } | 310 | } |
311 | 311 | ||
312 | put_cpu(); | 312 | put_cpu(); |
313 | 313 | ||
314 | return err; | 314 | return err; |
315 | } | 315 | } |
316 | EXPORT_SYMBOL(smp_call_function_single); | 316 | EXPORT_SYMBOL(smp_call_function_single); |
317 | 317 | ||
318 | /** | 318 | /** |
319 | * __smp_call_function_single(): Run a function on another CPU | 319 | * __smp_call_function_single(): Run a function on another CPU |
320 | * @cpu: The CPU to run on. | 320 | * @cpu: The CPU to run on. |
321 | * @data: Pre-allocated and setup data structure | 321 | * @data: Pre-allocated and setup data structure |
322 | * | 322 | * |
323 | * Like smp_call_function_single(), but allow caller to pass in a | 323 | * Like smp_call_function_single(), but allow caller to pass in a |
324 | * pre-allocated data structure. Useful for embedding @data inside | 324 | * pre-allocated data structure. Useful for embedding @data inside |
325 | * other structures, for instance. | 325 | * other structures, for instance. |
326 | */ | 326 | */ |
327 | void __smp_call_function_single(int cpu, struct call_single_data *data, | 327 | void __smp_call_function_single(int cpu, struct call_single_data *data, |
328 | int wait) | 328 | int wait) |
329 | { | 329 | { |
330 | csd_lock(data); | 330 | csd_lock(data); |
331 | 331 | ||
332 | /* Can deadlock when called with interrupts disabled */ | 332 | /* Can deadlock when called with interrupts disabled */ |
333 | WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress); | 333 | WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress); |
334 | 334 | ||
335 | generic_exec_single(cpu, data, wait); | 335 | generic_exec_single(cpu, data, wait); |
336 | } | 336 | } |
337 | 337 | ||
338 | /* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ | 338 | /* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ |
339 | 339 | ||
340 | #ifndef arch_send_call_function_ipi_mask | 340 | #ifndef arch_send_call_function_ipi_mask |
341 | # define arch_send_call_function_ipi_mask(maskp) \ | 341 | # define arch_send_call_function_ipi_mask(maskp) \ |
342 | arch_send_call_function_ipi(*(maskp)) | 342 | arch_send_call_function_ipi(*(maskp)) |
343 | #endif | 343 | #endif |
344 | 344 | ||
345 | /** | 345 | /** |
346 | * smp_call_function_many(): Run a function on a set of other CPUs. | 346 | * smp_call_function_many(): Run a function on a set of other CPUs. |
347 | * @mask: The set of cpus to run on (only runs on online subset). | 347 | * @mask: The set of cpus to run on (only runs on online subset). |
348 | * @func: The function to run. This must be fast and non-blocking. | 348 | * @func: The function to run. This must be fast and non-blocking. |
349 | * @info: An arbitrary pointer to pass to the function. | 349 | * @info: An arbitrary pointer to pass to the function. |
350 | * @wait: If true, wait (atomically) until function has completed | 350 | * @wait: If true, wait (atomically) until function has completed |
351 | * on other CPUs. | 351 | * on other CPUs. |
352 | * | 352 | * |
353 | * If @wait is true, then returns once @func has returned. Note that @wait | 353 | * If @wait is true, then returns once @func has returned. Note that @wait |
354 | * will be implicitly turned on in case of allocation failures, since | 354 | * will be implicitly turned on in case of allocation failures, since |
355 | * we fall back to on-stack allocation. | 355 | * we fall back to on-stack allocation. |
356 | * | 356 | * |
357 | * You must not call this function with disabled interrupts or from a | 357 | * You must not call this function with disabled interrupts or from a |
358 | * hardware interrupt handler or from a bottom half handler. Preemption | 358 | * hardware interrupt handler or from a bottom half handler. Preemption |
359 | * must be disabled when calling this function. | 359 | * must be disabled when calling this function. |
360 | */ | 360 | */ |
361 | void smp_call_function_many(const struct cpumask *mask, | 361 | void smp_call_function_many(const struct cpumask *mask, |
362 | void (*func)(void *), void *info, bool wait) | 362 | void (*func)(void *), void *info, bool wait) |
363 | { | 363 | { |
364 | struct call_function_data *data; | 364 | struct call_function_data *data; |
365 | unsigned long flags; | 365 | unsigned long flags; |
366 | int cpu, next_cpu, this_cpu = smp_processor_id(); | 366 | int cpu, next_cpu, this_cpu = smp_processor_id(); |
367 | 367 | ||
368 | /* Can deadlock when called with interrupts disabled */ | 368 | /* Can deadlock when called with interrupts disabled */ |
369 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); | 369 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); |
370 | 370 | ||
371 | /* So, what's a CPU they want? Ignoring this one. */ | 371 | /* So, what's a CPU they want? Ignoring this one. */ |
372 | cpu = cpumask_first_and(mask, cpu_online_mask); | 372 | cpu = cpumask_first_and(mask, cpu_online_mask); |
373 | if (cpu == this_cpu) | 373 | if (cpu == this_cpu) |
374 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 374 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
375 | 375 | ||
376 | /* No online cpus? We're done. */ | 376 | /* No online cpus? We're done. */ |
377 | if (cpu >= nr_cpu_ids) | 377 | if (cpu >= nr_cpu_ids) |
378 | return; | 378 | return; |
379 | 379 | ||
380 | /* Do we have another CPU which isn't us? */ | 380 | /* Do we have another CPU which isn't us? */ |
381 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 381 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
382 | if (next_cpu == this_cpu) | 382 | if (next_cpu == this_cpu) |
383 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); | 383 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); |
384 | 384 | ||
385 | /* Fastpath: do that cpu by itself. */ | 385 | /* Fastpath: do that cpu by itself. */ |
386 | if (next_cpu >= nr_cpu_ids) { | 386 | if (next_cpu >= nr_cpu_ids) { |
387 | smp_call_function_single(cpu, func, info, wait); | 387 | smp_call_function_single(cpu, func, info, wait); |
388 | return; | 388 | return; |
389 | } | 389 | } |
390 | 390 | ||
391 | data = &__get_cpu_var(cfd_data); | 391 | data = &__get_cpu_var(cfd_data); |
392 | csd_lock(&data->csd); | 392 | csd_lock(&data->csd); |
393 | 393 | ||
394 | spin_lock_irqsave(&data->lock, flags); | 394 | spin_lock_irqsave(&data->lock, flags); |
395 | data->csd.func = func; | 395 | data->csd.func = func; |
396 | data->csd.info = info; | 396 | data->csd.info = info; |
397 | cpumask_and(data->cpumask, mask, cpu_online_mask); | 397 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
398 | cpumask_clear_cpu(this_cpu, data->cpumask); | 398 | cpumask_clear_cpu(this_cpu, data->cpumask); |
399 | data->refs = cpumask_weight(data->cpumask); | 399 | data->refs = cpumask_weight(data->cpumask); |
400 | 400 | ||
401 | spin_lock(&call_function.lock); | 401 | spin_lock(&call_function.lock); |
402 | /* | 402 | /* |
403 | * Place entry at the _HEAD_ of the list, so that any cpu still | 403 | * Place entry at the _HEAD_ of the list, so that any cpu still |
404 | * observing the entry in generic_smp_call_function_interrupt() | 404 | * observing the entry in generic_smp_call_function_interrupt() |
405 | * will not miss any other list entries: | 405 | * will not miss any other list entries: |
406 | */ | 406 | */ |
407 | list_add_rcu(&data->csd.list, &call_function.queue); | 407 | list_add_rcu(&data->csd.list, &call_function.queue); |
408 | spin_unlock(&call_function.lock); | 408 | spin_unlock(&call_function.lock); |
409 | 409 | ||
410 | spin_unlock_irqrestore(&data->lock, flags); | 410 | spin_unlock_irqrestore(&data->lock, flags); |
411 | 411 | ||
412 | /* | 412 | /* |
413 | * Make the list addition visible before sending the ipi. | 413 | * Make the list addition visible before sending the ipi. |
414 | * (IPIs must obey or appear to obey normal Linux cache | 414 | * (IPIs must obey or appear to obey normal Linux cache |
415 | * coherency rules -- see comment in generic_exec_single). | 415 | * coherency rules -- see comment in generic_exec_single). |
416 | */ | 416 | */ |
417 | smp_mb(); | 417 | smp_mb(); |
418 | 418 | ||
419 | /* Send a message to all CPUs in the map */ | 419 | /* Send a message to all CPUs in the map */ |
420 | arch_send_call_function_ipi_mask(data->cpumask); | 420 | arch_send_call_function_ipi_mask(data->cpumask); |
421 | 421 | ||
422 | /* Optionally wait for the CPUs to complete */ | 422 | /* Optionally wait for the CPUs to complete */ |
423 | if (wait) | 423 | if (wait) |
424 | csd_lock_wait(&data->csd); | 424 | csd_lock_wait(&data->csd); |
425 | } | 425 | } |
426 | EXPORT_SYMBOL(smp_call_function_many); | 426 | EXPORT_SYMBOL(smp_call_function_many); |
427 | 427 | ||
428 | /** | 428 | /** |
429 | * smp_call_function(): Run a function on all other CPUs. | 429 | * smp_call_function(): Run a function on all other CPUs. |
430 | * @func: The function to run. This must be fast and non-blocking. | 430 | * @func: The function to run. This must be fast and non-blocking. |
431 | * @info: An arbitrary pointer to pass to the function. | 431 | * @info: An arbitrary pointer to pass to the function. |
432 | * @wait: If true, wait (atomically) until function has completed | 432 | * @wait: If true, wait (atomically) until function has completed |
433 | * on other CPUs. | 433 | * on other CPUs. |
434 | * | 434 | * |
435 | * Returns 0. | 435 | * Returns 0. |
436 | * | 436 | * |
437 | * If @wait is true, then returns once @func has returned; otherwise | 437 | * If @wait is true, then returns once @func has returned; otherwise |
438 | * it returns just before the target cpu calls @func. In case of allocation | 438 | * it returns just before the target cpu calls @func. In case of allocation |
439 | * failure, @wait will be implicitly turned on. | 439 | * failure, @wait will be implicitly turned on. |
440 | * | 440 | * |
441 | * You must not call this function with disabled interrupts or from a | 441 | * You must not call this function with disabled interrupts or from a |
442 | * hardware interrupt handler or from a bottom half handler. | 442 | * hardware interrupt handler or from a bottom half handler. |
443 | */ | 443 | */ |
444 | int smp_call_function(void (*func)(void *), void *info, int wait) | 444 | int smp_call_function(void (*func)(void *), void *info, int wait) |
445 | { | 445 | { |
446 | preempt_disable(); | 446 | preempt_disable(); |
447 | smp_call_function_many(cpu_online_mask, func, info, wait); | 447 | smp_call_function_many(cpu_online_mask, func, info, wait); |
448 | preempt_enable(); | 448 | preempt_enable(); |
449 | 449 | ||
450 | return 0; | 450 | return 0; |
451 | } | 451 | } |
452 | EXPORT_SYMBOL(smp_call_function); | 452 | EXPORT_SYMBOL(smp_call_function); |
453 | 453 | ||
454 | void ipi_call_lock(void) | 454 | void ipi_call_lock(void) |
455 | { | 455 | { |
456 | spin_lock(&call_function.lock); | 456 | spin_lock(&call_function.lock); |
457 | } | 457 | } |
458 | 458 | ||
459 | void ipi_call_unlock(void) | 459 | void ipi_call_unlock(void) |
460 | { | 460 | { |
461 | spin_unlock(&call_function.lock); | 461 | spin_unlock(&call_function.lock); |
462 | } | 462 | } |
463 | 463 | ||
464 | void ipi_call_lock_irq(void) | 464 | void ipi_call_lock_irq(void) |
465 | { | 465 | { |
466 | spin_lock_irq(&call_function.lock); | 466 | spin_lock_irq(&call_function.lock); |
467 | } | 467 | } |
468 | 468 | ||
469 | void ipi_call_unlock_irq(void) | 469 | void ipi_call_unlock_irq(void) |
470 | { | 470 | { |
471 | spin_unlock_irq(&call_function.lock); | 471 | spin_unlock_irq(&call_function.lock); |
472 | } | 472 | } |
473 | 473 |