Commit b565201cf75210614903ef2ae5917b4379681647

Authored by Jack Steiner
Committed by Ingo Molnar
1 parent 1ea7c6737c

x86: Reduce clock calibration time during slave cpu startup

Reduce the startup time for slave cpus.

Adds hooks for an arch-specific function for clock calibration.
These hooks are used on x86.  If a newly started cpu has the
same phys_proc_id as a core already active, uses the TSC for the
delay loop and has a CONSTANT_TSC, use the already-calculated
value of loops_per_jiffy.

This patch reduces the time required to start slave cpus on a
4096 cpu system from: 465 sec OLD 62 sec NEW

This reduces boot time on a 4096p system by almost 7 minutes.
Nice...

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: John Stultz <john.stultz@linaro.org>
[fix CONFIG_SMP=n build]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 3 changed files with 46 additions and 5 deletions Side-by-side Diff

arch/x86/kernel/smpboot.c
... ... @@ -207,21 +207,27 @@
207 207 * Need to setup vector mappings before we enable interrupts.
208 208 */
209 209 setup_vector_irq(smp_processor_id());
  210 +
210 211 /*
  212 + * Save our processor parameters. Note: this information
  213 + * is needed for clock calibration.
  214 + */
  215 + smp_store_cpu_info(cpuid);
  216 +
  217 + /*
211 218 * Get our bogomips.
  219 + * Update loops_per_jiffy in cpu_data. Previous call to
  220 + * smp_store_cpu_info() stored a value that is close but not as
  221 + * accurate as the value just calculated.
212 222 *
213 223 * Need to enable IRQs because it can take longer and then
214 224 * the NMI watchdog might kill us.
215 225 */
216 226 local_irq_enable();
217 227 calibrate_delay();
  228 + cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
218 229 local_irq_disable();
219 230 pr_debug("Stack at about %p\n", &cpuid);
220   -
221   - /*
222   - * Save our processor parameters
223   - */
224   - smp_store_cpu_info(cpuid);
225 231  
226 232 /*
227 233 * This must be done before setting cpu_online_mask
arch/x86/kernel/tsc.c
... ... @@ -994,4 +994,25 @@
994 994  
995 995 check_system_tsc_reliable();
996 996 }
  997 +
  998 +#ifdef CONFIG_SMP
  999 +/*
  1000 + * If we have a constant TSC and are using the TSC for the delay loop,
  1001 + * we can skip clock calibration if another cpu in the same socket has already
  1002 + * been calibrated. This assumes that CONSTANT_TSC applies to all
  1003 + * cpus in the socket - this should be a safe assumption.
  1004 + */
  1005 +unsigned long __cpuinit calibrate_delay_is_known(void)
  1006 +{
  1007 + int i, cpu = smp_processor_id();
  1008 +
  1009 + if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
  1010 + return 0;
  1011 +
  1012 + for_each_online_cpu(i)
  1013 + if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
  1014 + return cpu_data(i).loops_per_jiffy;
  1015 + return 0;
  1016 +}
  1017 +#endif
... ... @@ -246,6 +246,19 @@
246 246  
247 247 static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 };
248 248  
  249 +/*
  250 + * Check if cpu calibration delay is already known. For example,
  251 + * some processors with multi-core sockets may have all cores
  252 + * with the same calibration delay.
  253 + *
  254 + * Architectures should override this function if a faster calibration
  255 + * method is available.
  256 + */
  257 +unsigned long __attribute__((weak)) __cpuinit calibrate_delay_is_known(void)
  258 +{
  259 + return 0;
  260 +}
  261 +
249 262 void __cpuinit calibrate_delay(void)
250 263 {
251 264 unsigned long lpj;
... ... @@ -265,6 +278,8 @@
265 278 lpj = lpj_fine;
266 279 pr_info("Calibrating delay loop (skipped), "
267 280 "value calculated using timer frequency.. ");
  281 + } else if ((lpj = calibrate_delay_is_known())) {
  282 + ;
268 283 } else if ((lpj = calibrate_delay_direct()) != 0) {
269 284 if (!printed)
270 285 pr_info("Calibrating delay using timer "