Commit b565201cf75210614903ef2ae5917b4379681647
Committed by
Ingo Molnar
1 parent
1ea7c6737c
Exists in
master
and in
6 other branches
x86: Reduce clock calibration time during slave cpu startup
Reduce the startup time for slave cpus. Adds hooks for an arch-specific function for clock calibration. These hooks are used on x86. If a newly started cpu has the same phys_proc_id as a core already active, uses the TSC for the delay loop and has a CONSTANT_TSC, use the already-calculated value of loops_per_jiffy. This patch reduces the time required to start slave cpus on a 4096 cpu system from: 465 sec OLD 62 sec NEW This reduces boot time on a 4096p system by almost 7 minutes. Nice... Signed-off-by: Jack Steiner <steiner@sgi.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: John Stultz <john.stultz@linaro.org> [fix CONFIG_SMP=n build] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 3 changed files with 46 additions and 5 deletions Side-by-side Diff
arch/x86/kernel/smpboot.c
... | ... | @@ -207,21 +207,27 @@ |
207 | 207 | * Need to setup vector mappings before we enable interrupts. |
208 | 208 | */ |
209 | 209 | setup_vector_irq(smp_processor_id()); |
210 | + | |
210 | 211 | /* |
212 | + * Save our processor parameters. Note: this information | |
213 | + * is needed for clock calibration. | |
214 | + */ | |
215 | + smp_store_cpu_info(cpuid); | |
216 | + | |
217 | + /* | |
211 | 218 | * Get our bogomips. |
219 | + * Update loops_per_jiffy in cpu_data. Previous call to | |
220 | + * smp_store_cpu_info() stored a value that is close but not as | |
221 | + * accurate as the value just calculated. | |
212 | 222 | * |
213 | 223 | * Need to enable IRQs because it can take longer and then |
214 | 224 | * the NMI watchdog might kill us. |
215 | 225 | */ |
216 | 226 | local_irq_enable(); |
217 | 227 | calibrate_delay(); |
228 | + cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; | |
218 | 229 | local_irq_disable(); |
219 | 230 | pr_debug("Stack at about %p\n", &cpuid); |
220 | - | |
221 | - /* | |
222 | - * Save our processor parameters | |
223 | - */ | |
224 | - smp_store_cpu_info(cpuid); | |
225 | 231 | |
226 | 232 | /* |
227 | 233 | * This must be done before setting cpu_online_mask |
arch/x86/kernel/tsc.c
... | ... | @@ -994,4 +994,25 @@ |
994 | 994 | |
995 | 995 | check_system_tsc_reliable(); |
996 | 996 | } |
997 | + | |
998 | +#ifdef CONFIG_SMP | |
999 | +/* | |
1000 | + * If we have a constant TSC and are using the TSC for the delay loop, | |
1001 | + * we can skip clock calibration if another cpu in the same socket has already | |
1002 | + * been calibrated. This assumes that CONSTANT_TSC applies to all | |
1003 | + * cpus in the socket - this should be a safe assumption. | |
1004 | + */ | |
1005 | +unsigned long __cpuinit calibrate_delay_is_known(void) | |
1006 | +{ | |
1007 | + int i, cpu = smp_processor_id(); | |
1008 | + | |
1009 | + if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) | |
1010 | + return 0; | |
1011 | + | |
1012 | + for_each_online_cpu(i) | |
1013 | + if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id) | |
1014 | + return cpu_data(i).loops_per_jiffy; | |
1015 | + return 0; | |
1016 | +} | |
1017 | +#endif |
init/calibrate.c
... | ... | @@ -246,6 +246,19 @@ |
246 | 246 | |
247 | 247 | static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 }; |
248 | 248 | |
249 | +/* | |
250 | + * Check if cpu calibration delay is already known. For example, | |
251 | + * some processors with multi-core sockets may have all cores | |
252 | + * with the same calibration delay. | |
253 | + * | |
254 | + * Architectures should override this function if a faster calibration | |
255 | + * method is available. | |
256 | + */ | |
257 | +unsigned long __attribute__((weak)) __cpuinit calibrate_delay_is_known(void) | |
258 | +{ | |
259 | + return 0; | |
260 | +} | |
261 | + | |
249 | 262 | void __cpuinit calibrate_delay(void) |
250 | 263 | { |
251 | 264 | unsigned long lpj; |
... | ... | @@ -265,6 +278,8 @@ |
265 | 278 | lpj = lpj_fine; |
266 | 279 | pr_info("Calibrating delay loop (skipped), " |
267 | 280 | "value calculated using timer frequency.. "); |
281 | + } else if ((lpj = calibrate_delay_is_known())) { | |
282 | + ; | |
268 | 283 | } else if ((lpj = calibrate_delay_direct()) != 0) { |
269 | 284 | if (!printed) |
270 | 285 | pr_info("Calibrating delay using timer " |