Commit 14746306afe705f1d41bd51774a4378b1aec562d

Authored by Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
 "Hopefully the last round of fixes for 3.19

   - regression fix for the LDT changes
   - regression fix for XEN interrupt handling caused by the APIC
     changes
   - regression fixes for the PAT changes
   - last minute fixes for new the MPX support
   - regression fix for 32bit UP
   - fix for a long standing relocation issue on 64bit tagged for stable
   - functional fix for the Hyper-V clocksource tagged for stable
   - downgrade of a pr_err which tends to confuse users

  Looks a bit on the large side, but almost half of it are valuable
  comments"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tsc: Change Fast TSC calibration failed from error to info
  x86/apic: Re-enable PCI_MSI support for non-SMP X86_32
  x86, mm: Change cachemode exports to non-gpl
  x86, tls: Interpret an all-zero struct user_desc as "no segment"
  x86, tls, ldt: Stop checking lm in LDT_empty
  x86, mpx: Strictly enforce empty prctl() args
  x86, mpx: Fix potential performance issue on unmaps
  x86, mpx: Explicitly disable 32-bit MPX support on 64-bit kernels
  x86, hyperv: Mark the Hyper-V clocksource as being continuous
  x86: Don't rely on VMWare emulating PAT MSR correctly
  x86, irq: Properly tag virtualization entry in /proc/interrupts
  x86, boot: Skip relocs when load address unchanged
  x86/xen: Override ACPI IRQ management callback __acpi_unregister_gsi
  ACPI: pci: Do not clear pci_dev->irq in acpi_pci_irq_disable()
  x86/xen: Treat SCI interrupt as normal GSI interrupt

Showing 16 changed files Side-by-side Diff

... ... @@ -857,7 +857,7 @@
857 857  
858 858 config X86_UP_APIC
859 859 bool "Local APIC support on uniprocessors"
860   - depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI
  860 + depends on X86_32 && !SMP && !X86_32_NON_STANDARD
861 861 ---help---
862 862 A local APIC (Advanced Programmable Interrupt Controller) is an
863 863 integrated interrupt controller in the CPU. If you have a single-CPU
... ... @@ -867,6 +867,10 @@
867 867 all. The local APIC supports CPU-generated self-interrupts (timer,
868 868 performance counters), and the NMI watchdog which detects hard
869 869 lockups.
  870 +
  871 +config X86_UP_APIC_MSI
  872 + def_bool y
  873 + select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI
870 874  
871 875 config X86_UP_IOAPIC
872 876 bool "IO-APIC support on uniprocessors"
arch/x86/boot/compressed/misc.c
... ... @@ -373,6 +373,8 @@
373 373 unsigned long output_len,
374 374 unsigned long run_size)
375 375 {
  376 + unsigned char *output_orig = output;
  377 +
376 378 real_mode = rmode;
377 379  
378 380 sanitize_boot_params(real_mode);
... ... @@ -421,7 +423,12 @@
421 423 debug_putstr("\nDecompressing Linux... ");
422 424 decompress(input_data, input_len, NULL, NULL, output, NULL, error);
423 425 parse_elf(output);
424   - handle_relocations(output, output_len);
  426 + /*
  427 + * 32-bit always performs relocations. 64-bit relocations are only
  428 + * needed if kASLR has chosen a different load address.
  429 + */
  430 + if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
  431 + handle_relocations(output, output_len);
425 432 debug_putstr("done.\nBooting the kernel.\n");
426 433 return output;
427 434 }
arch/x86/include/asm/acpi.h
... ... @@ -50,6 +50,7 @@
50 50  
51 51 extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
52 52 int trigger, int polarity);
  53 +extern void (*__acpi_unregister_gsi)(u32 gsi);
53 54  
54 55 static inline void disable_acpi(void)
55 56 {
arch/x86/include/asm/desc.h
... ... @@ -251,7 +251,8 @@
251 251 gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
252 252 }
253 253  
254   -#define _LDT_empty(info) \
  254 +/* This intentionally ignores lm, since 32-bit apps don't have that field. */
  255 +#define LDT_empty(info) \
255 256 ((info)->base_addr == 0 && \
256 257 (info)->limit == 0 && \
257 258 (info)->contents == 0 && \
... ... @@ -261,11 +262,18 @@
261 262 (info)->seg_not_present == 1 && \
262 263 (info)->useable == 0)
263 264  
264   -#ifdef CONFIG_X86_64
265   -#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
266   -#else
267   -#define LDT_empty(info) (_LDT_empty(info))
268   -#endif
  265 +/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
  266 +static inline bool LDT_zero(const struct user_desc *info)
  267 +{
  268 + return (info->base_addr == 0 &&
  269 + info->limit == 0 &&
  270 + info->contents == 0 &&
  271 + info->read_exec_only == 0 &&
  272 + info->seg_32bit == 0 &&
  273 + info->limit_in_pages == 0 &&
  274 + info->seg_not_present == 0 &&
  275 + info->useable == 0);
  276 +}
269 277  
270 278 static inline void clear_LDT(void)
271 279 {
arch/x86/include/asm/mmu_context.h
... ... @@ -130,7 +130,25 @@
130 130 static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
131 131 unsigned long start, unsigned long end)
132 132 {
133   - mpx_notify_unmap(mm, vma, start, end);
  133 + /*
  134 + * mpx_notify_unmap() goes and reads a rarely-hot
  135 + * cacheline in the mm_struct. That can be expensive
  136 + * enough to be seen in profiles.
  137 + *
  138 + * The mpx_notify_unmap() call and its contents have been
  139 + * observed to affect munmap() performance on hardware
  140 + * where MPX is not present.
  141 + *
  142 + * The unlikely() optimizes for the fast case: no MPX
  143 + * in the CPU, or no MPX use in the process. Even if
  144 + * we get this wrong (in the unlikely event that MPX
  145 + * is widely enabled on some system) the overhead of
  146 + * MPX itself (reading bounds tables) is expected to
  147 + * overwhelm the overhead of getting this unlikely()
  148 + * consistently wrong.
  149 + */
  150 + if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
  151 + mpx_notify_unmap(mm, vma, start, end);
134 152 }
135 153  
136 154 #endif /* _ASM_X86_MMU_CONTEXT_H */
arch/x86/kernel/acpi/boot.c
... ... @@ -611,20 +611,20 @@
611 611  
612 612 int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
613 613 {
614   - int irq;
  614 + int rc, irq, trigger, polarity;
615 615  
616   - if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
617   - *irqp = gsi;
618   - } else {
619   - mutex_lock(&acpi_ioapic_lock);
620   - irq = mp_map_gsi_to_irq(gsi,
621   - IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
622   - mutex_unlock(&acpi_ioapic_lock);
623   - if (irq < 0)
624   - return -1;
625   - *irqp = irq;
  616 + rc = acpi_get_override_irq(gsi, &trigger, &polarity);
  617 + if (rc == 0) {
  618 + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
  619 + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
  620 + irq = acpi_register_gsi(NULL, gsi, trigger, polarity);
  621 + if (irq >= 0) {
  622 + *irqp = irq;
  623 + return 0;
  624 + }
626 625 }
627   - return 0;
  626 +
  627 + return -1;
628 628 }
629 629 EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
630 630  
arch/x86/kernel/cpu/mshyperv.c
... ... @@ -107,6 +107,7 @@
107 107 .rating = 400, /* use this when running on Hyperv*/
108 108 .read = read_hv_clock,
109 109 .mask = CLOCKSOURCE_MASK(64),
  110 + .flags = CLOCK_SOURCE_IS_CONTINUOUS,
110 111 };
111 112  
112 113 static void __init ms_hyperv_init_platform(void)
arch/x86/kernel/irq.c
... ... @@ -127,7 +127,7 @@
127 127 seq_puts(p, " Machine check polls\n");
128 128 #endif
129 129 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
130   - seq_printf(p, "%*s: ", prec, "THR");
  130 + seq_printf(p, "%*s: ", prec, "HYP");
131 131 for_each_online_cpu(j)
132 132 seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
133 133 seq_puts(p, " Hypervisor callback interrupts\n");
arch/x86/kernel/tls.c
... ... @@ -29,7 +29,28 @@
29 29  
30 30 static bool tls_desc_okay(const struct user_desc *info)
31 31 {
32   - if (LDT_empty(info))
  32 + /*
  33 + * For historical reasons (i.e. no one ever documented how any
  34 + * of the segmentation APIs work), user programs can and do
  35 + * assume that a struct user_desc that's all zeros except for
  36 + * entry_number means "no segment at all". This never actually
  37 + * worked. In fact, up to Linux 3.19, a struct user_desc like
  38 + * this would create a 16-bit read-write segment with base and
  39 + * limit both equal to zero.
  40 + *
  41 + * That was close enough to "no segment at all" until we
  42 + * hardened this function to disallow 16-bit TLS segments. Fix
  43 + * it up by interpreting these zeroed segments the way that they
  44 + * were almost certainly intended to be interpreted.
  45 + *
  46 + * The correct way to ask for "no segment at all" is to specify
  47 + * a user_desc that satisfies LDT_empty. To keep everything
  48 + * working, we accept both.
  49 + *
  50 + * Note that there's a similar kludge in modify_ldt -- look at
  51 + * the distinction between modes 1 and 0x11.
  52 + */
  53 + if (LDT_empty(info) || LDT_zero(info))
33 54 return true;
34 55  
35 56 /*
... ... @@ -71,7 +92,7 @@
71 92 cpu = get_cpu();
72 93  
73 94 while (n-- > 0) {
74   - if (LDT_empty(info))
  95 + if (LDT_empty(info) || LDT_zero(info))
75 96 desc->a = desc->b = 0;
76 97 else
77 98 fill_ldt(desc, info);
arch/x86/kernel/tsc.c
... ... @@ -617,7 +617,7 @@
617 617 goto success;
618 618 }
619 619 }
620   - pr_err("Fast TSC calibration failed\n");
  620 + pr_info("Fast TSC calibration failed\n");
621 621 return 0;
622 622  
623 623 success:
... ... @@ -43,7 +43,7 @@
43 43 [_PAGE_CACHE_MODE_WT] = _PAGE_PCD,
44 44 [_PAGE_CACHE_MODE_WP] = _PAGE_PCD,
45 45 };
46   -EXPORT_SYMBOL_GPL(__cachemode2pte_tbl);
  46 +EXPORT_SYMBOL(__cachemode2pte_tbl);
47 47 uint8_t __pte2cachemode_tbl[8] = {
48 48 [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
49 49 [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
... ... @@ -54,7 +54,7 @@
54 54 [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
55 55 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
56 56 };
57   -EXPORT_SYMBOL_GPL(__pte2cachemode_tbl);
  57 +EXPORT_SYMBOL(__pte2cachemode_tbl);
58 58  
59 59 static unsigned long __initdata pgt_buf_start;
60 60 static unsigned long __initdata pgt_buf_end;
... ... @@ -349,6 +349,12 @@
349 349 return MPX_INVALID_BOUNDS_DIR;
350 350  
351 351 /*
  352 + * 32-bit binaries on 64-bit kernels are currently
  353 + * unsupported.
  354 + */
  355 + if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32))
  356 + return MPX_INVALID_BOUNDS_DIR;
  357 + /*
352 358 * The bounds directory pointer is stored in a register
353 359 * only accessible if we first do an xsave.
354 360 */
... ... @@ -234,8 +234,13 @@
234 234 PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
235 235  
236 236 /* Boot CPU check */
237   - if (!boot_pat_state)
  237 + if (!boot_pat_state) {
238 238 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
  239 + if (!boot_pat_state) {
  240 + pat_disable("PAT read returns always zero, disabled.");
  241 + return;
  242 + }
  243 + }
239 244  
240 245 wrmsrl(MSR_IA32_CR_PAT, pat);
241 246  
... ... @@ -458,6 +458,7 @@
458 458 * just how GSIs get registered.
459 459 */
460 460 __acpi_register_gsi = acpi_register_gsi_xen_hvm;
  461 + __acpi_unregister_gsi = NULL;
461 462 #endif
462 463  
463 464 #ifdef CONFIG_PCI_MSI
... ... @@ -471,52 +472,6 @@
471 472 }
472 473  
473 474 #ifdef CONFIG_XEN_DOM0
474   -static __init void xen_setup_acpi_sci(void)
475   -{
476   - int rc;
477   - int trigger, polarity;
478   - int gsi = acpi_sci_override_gsi;
479   - int irq = -1;
480   - int gsi_override = -1;
481   -
482   - if (!gsi)
483   - return;
484   -
485   - rc = acpi_get_override_irq(gsi, &trigger, &polarity);
486   - if (rc) {
487   - printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
488   - " sci, rc=%d\n", rc);
489   - return;
490   - }
491   - trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
492   - polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
493   -
494   - printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
495   - "polarity=%d\n", gsi, trigger, polarity);
496   -
497   - /* Before we bind the GSI to a Linux IRQ, check whether
498   - * we need to override it with bus_irq (IRQ) value. Usually for
499   - * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
500   - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
501   - * but there are oddballs where the IRQ != GSI:
502   - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
503   - * which ends up being: gsi_to_irq[9] == 20
504   - * (which is what acpi_gsi_to_irq ends up calling when starting the
505   - * the ACPI interpreter and keels over since IRQ 9 has not been
506   - * setup as we had setup IRQ 20 for it).
507   - */
508   - if (acpi_gsi_to_irq(gsi, &irq) == 0) {
509   - /* Use the provided value if it's valid. */
510   - if (irq >= 0)
511   - gsi_override = irq;
512   - }
513   -
514   - gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity);
515   - printk(KERN_INFO "xen: acpi sci %d\n", gsi);
516   -
517   - return;
518   -}
519   -
520 475 int __init pci_xen_initial_domain(void)
521 476 {
522 477 int irq;
523 478  
... ... @@ -527,8 +482,8 @@
527 482 x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
528 483 pci_msi_ignore_mask = 1;
529 484 #endif
530   - xen_setup_acpi_sci();
531 485 __acpi_register_gsi = acpi_register_gsi_xen;
  486 + __acpi_unregister_gsi = NULL;
532 487 /* Pre-allocate legacy irqs */
533 488 for (irq = 0; irq < nr_legacy_irqs(); irq++) {
534 489 int trigger, polarity;
drivers/acpi/pci_irq.c
... ... @@ -512,7 +512,6 @@
512 512 dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
513 513 if (gsi >= 0) {
514 514 acpi_unregister_gsi(gsi);
515   - dev->irq = 0;
516 515 dev->irq_managed = 0;
517 516 }
518 517 }
... ... @@ -2210,9 +2210,13 @@
2210 2210 up_write(&me->mm->mmap_sem);
2211 2211 break;
2212 2212 case PR_MPX_ENABLE_MANAGEMENT:
  2213 + if (arg2 || arg3 || arg4 || arg5)
  2214 + return -EINVAL;
2213 2215 error = MPX_ENABLE_MANAGEMENT(me);
2214 2216 break;
2215 2217 case PR_MPX_DISABLE_MANAGEMENT:
  2218 + if (arg2 || arg3 || arg4 || arg5)
  2219 + return -EINVAL;
2216 2220 error = MPX_DISABLE_MANAGEMENT(me);
2217 2221 break;
2218 2222 default: