Commit ca74a6f84e68b44867022f4a4f3ec17c087c864e
Committed by
Ingo Molnar
1 parent
7517527891
Exists in
master
and in
7 other branches
x86: optimize lock prefix switching to run less frequently
On VMs implemented using JITs that cache translated code changing the lock prefixes is a quite costly operation that forces the JIT to throw away and retranslate a lot of code. Previously a SMP kernel would rewrite the locks once for each CPU which is quite unnecessary. This patch changes the code to never switch at boot in the normal case (SMP kernel booting with >1 CPU) or only once for SMP kernel on UP. This makes a significant difference in boot up performance on AMD SimNow! Also I expect it to be a little faster on native systems too because a smp switch does a lot of text_poke()s which each synchronize the pipeline. v1->v2: Rename max_cpus v1->v2: Fix off by one in UP check (Thomas Gleixner) Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Showing 3 changed files with 24 additions and 10 deletions Side-by-side Diff
arch/x86/kernel/alternative.c
... | ... | @@ -273,6 +273,7 @@ |
273 | 273 | }; |
274 | 274 | static LIST_HEAD(smp_alt_modules); |
275 | 275 | static DEFINE_SPINLOCK(smp_alt); |
276 | +static int smp_mode = 1; /* protected by smp_alt */ | |
276 | 277 | |
277 | 278 | void alternatives_smp_module_add(struct module *mod, char *name, |
278 | 279 | void *locks, void *locks_end, |
... | ... | @@ -354,7 +355,14 @@ |
354 | 355 | BUG_ON(!smp && (num_online_cpus() > 1)); |
355 | 356 | |
356 | 357 | spin_lock_irqsave(&smp_alt, flags); |
357 | - if (smp) { | |
358 | + | |
359 | + /* | |
360 | + * Avoid unnecessary switches because it forces JIT based VMs to | |
361 | + * throw away all cached translations, which can be quite costly. | |
362 | + */ | |
363 | + if (smp == smp_mode) { | |
364 | + /* nothing */ | |
365 | + } else if (smp) { | |
358 | 366 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); |
359 | 367 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
360 | 368 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
... | ... | @@ -369,6 +377,7 @@ |
369 | 377 | alternatives_smp_unlock(mod->locks, mod->locks_end, |
370 | 378 | mod->text, mod->text_end); |
371 | 379 | } |
380 | + smp_mode = smp; | |
372 | 381 | spin_unlock_irqrestore(&smp_alt, flags); |
373 | 382 | } |
374 | 383 | |
... | ... | @@ -441,7 +450,10 @@ |
441 | 450 | alternatives_smp_module_add(NULL, "core kernel", |
442 | 451 | __smp_locks, __smp_locks_end, |
443 | 452 | _text, _etext); |
444 | - alternatives_smp_switch(0); | |
453 | + | |
454 | + /* Only switch to UP mode if we don't immediately boot others */ | |
455 | + if (num_possible_cpus() == 1 || setup_max_cpus <= 1) | |
456 | + alternatives_smp_switch(0); | |
445 | 457 | } |
446 | 458 | #endif |
447 | 459 | apply_paravirt(__parainstructions, __parainstructions_end); |
include/linux/smp.h
init/main.c
... | ... | @@ -128,7 +128,7 @@ |
128 | 128 | |
129 | 129 | #ifdef CONFIG_SMP |
130 | 130 | /* Setup configured maximum number of CPUs to activate */ |
131 | -static unsigned int __initdata max_cpus = NR_CPUS; | |
131 | +unsigned int __initdata setup_max_cpus = NR_CPUS; | |
132 | 132 | |
133 | 133 | /* |
134 | 134 | * Setup routine for controlling SMP activation |
... | ... | @@ -146,7 +146,7 @@ |
146 | 146 | |
147 | 147 | static int __init nosmp(char *str) |
148 | 148 | { |
149 | - max_cpus = 0; | |
149 | + setup_max_cpus = 0; | |
150 | 150 | disable_ioapic_setup(); |
151 | 151 | return 0; |
152 | 152 | } |
... | ... | @@ -155,8 +155,8 @@ |
155 | 155 | |
156 | 156 | static int __init maxcpus(char *str) |
157 | 157 | { |
158 | - get_option(&str, &max_cpus); | |
159 | - if (max_cpus == 0) | |
158 | + get_option(&str, &setup_max_cpus); | |
159 | + if (setup_max_cpus == 0) | |
160 | 160 | disable_ioapic_setup(); |
161 | 161 | |
162 | 162 | return 0; |
... | ... | @@ -164,7 +164,7 @@ |
164 | 164 | |
165 | 165 | early_param("maxcpus", maxcpus); |
166 | 166 | #else |
167 | -#define max_cpus NR_CPUS | |
167 | +#define setup_max_cpus NR_CPUS | |
168 | 168 | #endif |
169 | 169 | |
170 | 170 | /* |
... | ... | @@ -393,7 +393,7 @@ |
393 | 393 | |
394 | 394 | /* FIXME: This should be done in userspace --RR */ |
395 | 395 | for_each_present_cpu(cpu) { |
396 | - if (num_online_cpus() >= max_cpus) | |
396 | + if (num_online_cpus() >= setup_max_cpus) | |
397 | 397 | break; |
398 | 398 | if (!cpu_online(cpu)) |
399 | 399 | cpu_up(cpu); |
... | ... | @@ -401,7 +401,7 @@ |
401 | 401 | |
402 | 402 | /* Any cleanup work */ |
403 | 403 | printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); |
404 | - smp_cpus_done(max_cpus); | |
404 | + smp_cpus_done(setup_max_cpus); | |
405 | 405 | } |
406 | 406 | |
407 | 407 | #endif |
... | ... | @@ -824,7 +824,7 @@ |
824 | 824 | __set_special_pids(1, 1); |
825 | 825 | cad_pid = task_pid(current); |
826 | 826 | |
827 | - smp_prepare_cpus(max_cpus); | |
827 | + smp_prepare_cpus(setup_max_cpus); | |
828 | 828 | |
829 | 829 | do_pre_smp_initcalls(); |
830 | 830 |