Commit 14746306afe705f1d41bd51774a4378b1aec562d
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: "Hopefully the last round of fixes for 3.19 - regression fix for the LDT changes - regression fix for XEN interrupt handling caused by the APIC changes - regression fixes for the PAT changes - last minute fixes for new the MPX support - regression fix for 32bit UP - fix for a long standing relocation issue on 64bit tagged for stable - functional fix for the Hyper-V clocksource tagged for stable - downgrade of a pr_err which tends to confuse users Looks a bit on the large side, but almost half of it are valuable comments" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/tsc: Change Fast TSC calibration failed from error to info x86/apic: Re-enable PCI_MSI support for non-SMP X86_32 x86, mm: Change cachemode exports to non-gpl x86, tls: Interpret an all-zero struct user_desc as "no segment" x86, tls, ldt: Stop checking lm in LDT_empty x86, mpx: Strictly enforce empty prctl() args x86, mpx: Fix potential performance issue on unmaps x86, mpx: Explicitly disable 32-bit MPX support on 64-bit kernels x86, hyperv: Mark the Hyper-V clocksource as being continuous x86: Don't rely on VMWare emulating PAT MSR correctly x86, irq: Properly tag virtualization entry in /proc/interrupts x86, boot: Skip relocs when load address unchanged x86/xen: Override ACPI IRQ management callback __acpi_unregister_gsi ACPI: pci: Do not clear pci_dev->irq in acpi_pci_irq_disable() x86/xen: Treat SCI interrupt as normal GSI interrupt
Showing 16 changed files Side-by-side Diff
- arch/x86/Kconfig
- arch/x86/boot/compressed/misc.c
- arch/x86/include/asm/acpi.h
- arch/x86/include/asm/desc.h
- arch/x86/include/asm/mmu_context.h
- arch/x86/kernel/acpi/boot.c
- arch/x86/kernel/cpu/mshyperv.c
- arch/x86/kernel/irq.c
- arch/x86/kernel/tls.c
- arch/x86/kernel/tsc.c
- arch/x86/mm/init.c
- arch/x86/mm/mpx.c
- arch/x86/mm/pat.c
- arch/x86/pci/xen.c
- drivers/acpi/pci_irq.c
- kernel/sys.c
arch/x86/Kconfig
... | ... | @@ -857,7 +857,7 @@ |
857 | 857 | |
858 | 858 | config X86_UP_APIC |
859 | 859 | bool "Local APIC support on uniprocessors" |
860 | - depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI | |
860 | + depends on X86_32 && !SMP && !X86_32_NON_STANDARD | |
861 | 861 | ---help--- |
862 | 862 | A local APIC (Advanced Programmable Interrupt Controller) is an |
863 | 863 | integrated interrupt controller in the CPU. If you have a single-CPU |
... | ... | @@ -867,6 +867,10 @@ |
867 | 867 | all. The local APIC supports CPU-generated self-interrupts (timer, |
868 | 868 | performance counters), and the NMI watchdog which detects hard |
869 | 869 | lockups. |
870 | + | |
871 | +config X86_UP_APIC_MSI | |
872 | + def_bool y | |
873 | + select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI | |
870 | 874 | |
871 | 875 | config X86_UP_IOAPIC |
872 | 876 | bool "IO-APIC support on uniprocessors" |
arch/x86/boot/compressed/misc.c
... | ... | @@ -373,6 +373,8 @@ |
373 | 373 | unsigned long output_len, |
374 | 374 | unsigned long run_size) |
375 | 375 | { |
376 | + unsigned char *output_orig = output; | |
377 | + | |
376 | 378 | real_mode = rmode; |
377 | 379 | |
378 | 380 | sanitize_boot_params(real_mode); |
... | ... | @@ -421,7 +423,12 @@ |
421 | 423 | debug_putstr("\nDecompressing Linux... "); |
422 | 424 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); |
423 | 425 | parse_elf(output); |
424 | - handle_relocations(output, output_len); | |
426 | + /* | |
427 | + * 32-bit always performs relocations. 64-bit relocations are only | |
428 | + * needed if kASLR has chosen a different load address. | |
429 | + */ | |
430 | + if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) | |
431 | + handle_relocations(output, output_len); | |
425 | 432 | debug_putstr("done.\nBooting the kernel.\n"); |
426 | 433 | return output; |
427 | 434 | } |
arch/x86/include/asm/acpi.h
arch/x86/include/asm/desc.h
... | ... | @@ -251,7 +251,8 @@ |
251 | 251 | gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; |
252 | 252 | } |
253 | 253 | |
254 | -#define _LDT_empty(info) \ | |
254 | +/* This intentionally ignores lm, since 32-bit apps don't have that field. */ | |
255 | +#define LDT_empty(info) \ | |
255 | 256 | ((info)->base_addr == 0 && \ |
256 | 257 | (info)->limit == 0 && \ |
257 | 258 | (info)->contents == 0 && \ |
... | ... | @@ -261,11 +262,18 @@ |
261 | 262 | (info)->seg_not_present == 1 && \ |
262 | 263 | (info)->useable == 0) |
263 | 264 | |
264 | -#ifdef CONFIG_X86_64 | |
265 | -#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) | |
266 | -#else | |
267 | -#define LDT_empty(info) (_LDT_empty(info)) | |
268 | -#endif | |
265 | +/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */ | |
266 | +static inline bool LDT_zero(const struct user_desc *info) | |
267 | +{ | |
268 | + return (info->base_addr == 0 && | |
269 | + info->limit == 0 && | |
270 | + info->contents == 0 && | |
271 | + info->read_exec_only == 0 && | |
272 | + info->seg_32bit == 0 && | |
273 | + info->limit_in_pages == 0 && | |
274 | + info->seg_not_present == 0 && | |
275 | + info->useable == 0); | |
276 | +} | |
269 | 277 | |
270 | 278 | static inline void clear_LDT(void) |
271 | 279 | { |
arch/x86/include/asm/mmu_context.h
... | ... | @@ -130,7 +130,25 @@ |
130 | 130 | static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, |
131 | 131 | unsigned long start, unsigned long end) |
132 | 132 | { |
133 | - mpx_notify_unmap(mm, vma, start, end); | |
133 | + /* | |
134 | + * mpx_notify_unmap() goes and reads a rarely-hot | |
135 | + * cacheline in the mm_struct. That can be expensive | |
136 | + * enough to be seen in profiles. | |
137 | + * | |
138 | + * The mpx_notify_unmap() call and its contents have been | |
139 | + * observed to affect munmap() performance on hardware | |
140 | + * where MPX is not present. | |
141 | + * | |
142 | + * The unlikely() optimizes for the fast case: no MPX | |
143 | + * in the CPU, or no MPX use in the process. Even if | |
144 | + * we get this wrong (in the unlikely event that MPX | |
145 | + * is widely enabled on some system) the overhead of | |
146 | + * MPX itself (reading bounds tables) is expected to | |
147 | + * overwhelm the overhead of getting this unlikely() | |
148 | + * consistently wrong. | |
149 | + */ | |
150 | + if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) | |
151 | + mpx_notify_unmap(mm, vma, start, end); | |
134 | 152 | } |
135 | 153 | |
136 | 154 | #endif /* _ASM_X86_MMU_CONTEXT_H */ |
arch/x86/kernel/acpi/boot.c
... | ... | @@ -611,20 +611,20 @@ |
611 | 611 | |
612 | 612 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) |
613 | 613 | { |
614 | - int irq; | |
614 | + int rc, irq, trigger, polarity; | |
615 | 615 | |
616 | - if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { | |
617 | - *irqp = gsi; | |
618 | - } else { | |
619 | - mutex_lock(&acpi_ioapic_lock); | |
620 | - irq = mp_map_gsi_to_irq(gsi, | |
621 | - IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); | |
622 | - mutex_unlock(&acpi_ioapic_lock); | |
623 | - if (irq < 0) | |
624 | - return -1; | |
625 | - *irqp = irq; | |
616 | + rc = acpi_get_override_irq(gsi, &trigger, &polarity); | |
617 | + if (rc == 0) { | |
618 | + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; | |
619 | + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; | |
620 | + irq = acpi_register_gsi(NULL, gsi, trigger, polarity); | |
621 | + if (irq >= 0) { | |
622 | + *irqp = irq; | |
623 | + return 0; | |
624 | + } | |
626 | 625 | } |
627 | - return 0; | |
626 | + | |
627 | + return -1; | |
628 | 628 | } |
629 | 629 | EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); |
630 | 630 |
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/irq.c
... | ... | @@ -127,7 +127,7 @@ |
127 | 127 | seq_puts(p, " Machine check polls\n"); |
128 | 128 | #endif |
129 | 129 | #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) |
130 | - seq_printf(p, "%*s: ", prec, "THR"); | |
130 | + seq_printf(p, "%*s: ", prec, "HYP"); | |
131 | 131 | for_each_online_cpu(j) |
132 | 132 | seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); |
133 | 133 | seq_puts(p, " Hypervisor callback interrupts\n"); |
arch/x86/kernel/tls.c
... | ... | @@ -29,7 +29,28 @@ |
29 | 29 | |
30 | 30 | static bool tls_desc_okay(const struct user_desc *info) |
31 | 31 | { |
32 | - if (LDT_empty(info)) | |
32 | + /* | |
33 | + * For historical reasons (i.e. no one ever documented how any | |
34 | + * of the segmentation APIs work), user programs can and do | |
35 | + * assume that a struct user_desc that's all zeros except for | |
36 | + * entry_number means "no segment at all". This never actually | |
37 | + * worked. In fact, up to Linux 3.19, a struct user_desc like | |
38 | + * this would create a 16-bit read-write segment with base and | |
39 | + * limit both equal to zero. | |
40 | + * | |
41 | + * That was close enough to "no segment at all" until we | |
42 | + * hardened this function to disallow 16-bit TLS segments. Fix | |
43 | + * it up by interpreting these zeroed segments the way that they | |
44 | + * were almost certainly intended to be interpreted. | |
45 | + * | |
46 | + * The correct way to ask for "no segment at all" is to specify | |
47 | + * a user_desc that satisfies LDT_empty. To keep everything | |
48 | + * working, we accept both. | |
49 | + * | |
50 | + * Note that there's a similar kludge in modify_ldt -- look at | |
51 | + * the distinction between modes 1 and 0x11. | |
52 | + */ | |
53 | + if (LDT_empty(info) || LDT_zero(info)) | |
33 | 54 | return true; |
34 | 55 | |
35 | 56 | /* |
... | ... | @@ -71,7 +92,7 @@ |
71 | 92 | cpu = get_cpu(); |
72 | 93 | |
73 | 94 | while (n-- > 0) { |
74 | - if (LDT_empty(info)) | |
95 | + if (LDT_empty(info) || LDT_zero(info)) | |
75 | 96 | desc->a = desc->b = 0; |
76 | 97 | else |
77 | 98 | fill_ldt(desc, info); |
arch/x86/kernel/tsc.c
arch/x86/mm/init.c
... | ... | @@ -43,7 +43,7 @@ |
43 | 43 | [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, |
44 | 44 | [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, |
45 | 45 | }; |
46 | -EXPORT_SYMBOL_GPL(__cachemode2pte_tbl); | |
46 | +EXPORT_SYMBOL(__cachemode2pte_tbl); | |
47 | 47 | uint8_t __pte2cachemode_tbl[8] = { |
48 | 48 | [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, |
49 | 49 | [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, |
... | ... | @@ -54,7 +54,7 @@ |
54 | 54 | [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, |
55 | 55 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, |
56 | 56 | }; |
57 | -EXPORT_SYMBOL_GPL(__pte2cachemode_tbl); | |
57 | +EXPORT_SYMBOL(__pte2cachemode_tbl); | |
58 | 58 | |
59 | 59 | static unsigned long __initdata pgt_buf_start; |
60 | 60 | static unsigned long __initdata pgt_buf_end; |
arch/x86/mm/mpx.c
... | ... | @@ -349,6 +349,12 @@ |
349 | 349 | return MPX_INVALID_BOUNDS_DIR; |
350 | 350 | |
351 | 351 | /* |
352 | + * 32-bit binaries on 64-bit kernels are currently | |
353 | + * unsupported. | |
354 | + */ | |
355 | + if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32)) | |
356 | + return MPX_INVALID_BOUNDS_DIR; | |
357 | + /* | |
352 | 358 | * The bounds directory pointer is stored in a register |
353 | 359 | * only accessible if we first do an xsave. |
354 | 360 | */ |
arch/x86/mm/pat.c
... | ... | @@ -234,8 +234,13 @@ |
234 | 234 | PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); |
235 | 235 | |
236 | 236 | /* Boot CPU check */ |
237 | - if (!boot_pat_state) | |
237 | + if (!boot_pat_state) { | |
238 | 238 | rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); |
239 | + if (!boot_pat_state) { | |
240 | + pat_disable("PAT read returns always zero, disabled."); | |
241 | + return; | |
242 | + } | |
243 | + } | |
239 | 244 | |
240 | 245 | wrmsrl(MSR_IA32_CR_PAT, pat); |
241 | 246 |
arch/x86/pci/xen.c
... | ... | @@ -458,6 +458,7 @@ |
458 | 458 | * just how GSIs get registered. |
459 | 459 | */ |
460 | 460 | __acpi_register_gsi = acpi_register_gsi_xen_hvm; |
461 | + __acpi_unregister_gsi = NULL; | |
461 | 462 | #endif |
462 | 463 | |
463 | 464 | #ifdef CONFIG_PCI_MSI |
... | ... | @@ -471,52 +472,6 @@ |
471 | 472 | } |
472 | 473 | |
473 | 474 | #ifdef CONFIG_XEN_DOM0 |
474 | -static __init void xen_setup_acpi_sci(void) | |
475 | -{ | |
476 | - int rc; | |
477 | - int trigger, polarity; | |
478 | - int gsi = acpi_sci_override_gsi; | |
479 | - int irq = -1; | |
480 | - int gsi_override = -1; | |
481 | - | |
482 | - if (!gsi) | |
483 | - return; | |
484 | - | |
485 | - rc = acpi_get_override_irq(gsi, &trigger, &polarity); | |
486 | - if (rc) { | |
487 | - printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi" | |
488 | - " sci, rc=%d\n", rc); | |
489 | - return; | |
490 | - } | |
491 | - trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; | |
492 | - polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; | |
493 | - | |
494 | - printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " | |
495 | - "polarity=%d\n", gsi, trigger, polarity); | |
496 | - | |
497 | - /* Before we bind the GSI to a Linux IRQ, check whether | |
498 | - * we need to override it with bus_irq (IRQ) value. Usually for | |
499 | - * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: | |
500 | - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) | |
501 | - * but there are oddballs where the IRQ != GSI: | |
502 | - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) | |
503 | - * which ends up being: gsi_to_irq[9] == 20 | |
504 | - * (which is what acpi_gsi_to_irq ends up calling when starting the | |
505 | - * the ACPI interpreter and keels over since IRQ 9 has not been | |
506 | - * setup as we had setup IRQ 20 for it). | |
507 | - */ | |
508 | - if (acpi_gsi_to_irq(gsi, &irq) == 0) { | |
509 | - /* Use the provided value if it's valid. */ | |
510 | - if (irq >= 0) | |
511 | - gsi_override = irq; | |
512 | - } | |
513 | - | |
514 | - gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity); | |
515 | - printk(KERN_INFO "xen: acpi sci %d\n", gsi); | |
516 | - | |
517 | - return; | |
518 | -} | |
519 | - | |
520 | 475 | int __init pci_xen_initial_domain(void) |
521 | 476 | { |
522 | 477 | int irq; |
523 | 478 | |
... | ... | @@ -527,8 +482,8 @@ |
527 | 482 | x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs; |
528 | 483 | pci_msi_ignore_mask = 1; |
529 | 484 | #endif |
530 | - xen_setup_acpi_sci(); | |
531 | 485 | __acpi_register_gsi = acpi_register_gsi_xen; |
486 | + __acpi_unregister_gsi = NULL; | |
532 | 487 | /* Pre-allocate legacy irqs */ |
533 | 488 | for (irq = 0; irq < nr_legacy_irqs(); irq++) { |
534 | 489 | int trigger, polarity; |
drivers/acpi/pci_irq.c
kernel/sys.c
... | ... | @@ -2210,9 +2210,13 @@ |
2210 | 2210 | up_write(&me->mm->mmap_sem); |
2211 | 2211 | break; |
2212 | 2212 | case PR_MPX_ENABLE_MANAGEMENT: |
2213 | + if (arg2 || arg3 || arg4 || arg5) | |
2214 | + return -EINVAL; | |
2213 | 2215 | error = MPX_ENABLE_MANAGEMENT(me); |
2214 | 2216 | break; |
2215 | 2217 | case PR_MPX_DISABLE_MANAGEMENT: |
2218 | + if (arg2 || arg3 || arg4 || arg5) | |
2219 | + return -EINVAL; | |
2216 | 2220 | error = MPX_DISABLE_MANAGEMENT(me); |
2217 | 2221 | break; |
2218 | 2222 | default: |