Commit bc2b0331e077f576369a2b6c75d15ed4de4ef91f
Committed by
H. Peter Anvin
1 parent
db34bbb767
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
X86: Handle Hyper-V vmbus interrupts as special hypervisor interrupts
Starting with win8, vmbus interrupts can be delivered on any VCPU in the guest and furthermore can be concurrently active on multiple VCPUs. Support this interrupt delivery model by setting up a separate IDT entry for Hyper-V vmbus. interrupts. I would like to thank Jan Beulich <JBeulich@suse.com> and Thomas Gleixner <tglx@linutronix.de>, for their help. In this version of the patch, based on the feedback, I have merged the IDT vector for Xen and Hyper-V and made the necessary adjustments. Furhermore, based on Jan's feedback I have added the necessary compilation switches. Signed-off-by: K. Y. Srinivasan <kys@microsoft.com> Link: http://lkml.kernel.org/r/1359940959-32168-3-git-send-email-kys@microsoft.com Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Showing 6 changed files with 68 additions and 7 deletions Inline Diff
arch/x86/include/asm/irq_vectors.h
1 | #ifndef _ASM_X86_IRQ_VECTORS_H | 1 | #ifndef _ASM_X86_IRQ_VECTORS_H |
2 | #define _ASM_X86_IRQ_VECTORS_H | 2 | #define _ASM_X86_IRQ_VECTORS_H |
3 | 3 | ||
4 | #include <linux/threads.h> | 4 | #include <linux/threads.h> |
5 | /* | 5 | /* |
6 | * Linux IRQ vector layout. | 6 | * Linux IRQ vector layout. |
7 | * | 7 | * |
8 | * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can | 8 | * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can |
9 | * be defined by Linux. They are used as a jump table by the CPU when a | 9 | * be defined by Linux. They are used as a jump table by the CPU when a |
10 | * given vector is triggered - by a CPU-external, CPU-internal or | 10 | * given vector is triggered - by a CPU-external, CPU-internal or |
11 | * software-triggered event. | 11 | * software-triggered event. |
12 | * | 12 | * |
13 | * Linux sets the kernel code address each entry jumps to early during | 13 | * Linux sets the kernel code address each entry jumps to early during |
14 | * bootup, and never changes them. This is the general layout of the | 14 | * bootup, and never changes them. This is the general layout of the |
15 | * IDT entries: | 15 | * IDT entries: |
16 | * | 16 | * |
17 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events | 17 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events |
18 | * Vectors 32 ... 127 : device interrupts | 18 | * Vectors 32 ... 127 : device interrupts |
19 | * Vector 128 : legacy int80 syscall interface | 19 | * Vector 128 : legacy int80 syscall interface |
20 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts | 20 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts |
21 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts | 21 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts |
22 | * | 22 | * |
23 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. | 23 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. |
24 | * | 24 | * |
25 | * This file enumerates the exact layout of them: | 25 | * This file enumerates the exact layout of them: |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #define NMI_VECTOR 0x02 | 28 | #define NMI_VECTOR 0x02 |
29 | #define MCE_VECTOR 0x12 | 29 | #define MCE_VECTOR 0x12 |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * IDT vectors usable for external interrupt sources start at 0x20. | 32 | * IDT vectors usable for external interrupt sources start at 0x20. |
33 | * (0x80 is the syscall vector, 0x30-0x3f are for ISA) | 33 | * (0x80 is the syscall vector, 0x30-0x3f are for ISA) |
34 | */ | 34 | */ |
35 | #define FIRST_EXTERNAL_VECTOR 0x20 | 35 | #define FIRST_EXTERNAL_VECTOR 0x20 |
36 | /* | 36 | /* |
37 | * We start allocating at 0x21 to spread out vectors evenly between | 37 | * We start allocating at 0x21 to spread out vectors evenly between |
38 | * priority levels. (0x80 is the syscall vector) | 38 | * priority levels. (0x80 is the syscall vector) |
39 | */ | 39 | */ |
40 | #define VECTOR_OFFSET_START 1 | 40 | #define VECTOR_OFFSET_START 1 |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Reserve the lowest usable vector (and hence lowest priority) 0x20 for | 43 | * Reserve the lowest usable vector (and hence lowest priority) 0x20 for |
44 | * triggering cleanup after irq migration. 0x21-0x2f will still be used | 44 | * triggering cleanup after irq migration. 0x21-0x2f will still be used |
45 | * for device interrupts. | 45 | * for device interrupts. |
46 | */ | 46 | */ |
47 | #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR | 47 | #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR |
48 | 48 | ||
49 | #define IA32_SYSCALL_VECTOR 0x80 | 49 | #define IA32_SYSCALL_VECTOR 0x80 |
50 | #ifdef CONFIG_X86_32 | 50 | #ifdef CONFIG_X86_32 |
51 | # define SYSCALL_VECTOR 0x80 | 51 | # define SYSCALL_VECTOR 0x80 |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | /* | 54 | /* |
55 | * Vectors 0x30-0x3f are used for ISA interrupts. | 55 | * Vectors 0x30-0x3f are used for ISA interrupts. |
56 | * round up to the next 16-vector boundary | 56 | * round up to the next 16-vector boundary |
57 | */ | 57 | */ |
58 | #define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) | 58 | #define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) |
59 | 59 | ||
60 | #define IRQ1_VECTOR (IRQ0_VECTOR + 1) | 60 | #define IRQ1_VECTOR (IRQ0_VECTOR + 1) |
61 | #define IRQ2_VECTOR (IRQ0_VECTOR + 2) | 61 | #define IRQ2_VECTOR (IRQ0_VECTOR + 2) |
62 | #define IRQ3_VECTOR (IRQ0_VECTOR + 3) | 62 | #define IRQ3_VECTOR (IRQ0_VECTOR + 3) |
63 | #define IRQ4_VECTOR (IRQ0_VECTOR + 4) | 63 | #define IRQ4_VECTOR (IRQ0_VECTOR + 4) |
64 | #define IRQ5_VECTOR (IRQ0_VECTOR + 5) | 64 | #define IRQ5_VECTOR (IRQ0_VECTOR + 5) |
65 | #define IRQ6_VECTOR (IRQ0_VECTOR + 6) | 65 | #define IRQ6_VECTOR (IRQ0_VECTOR + 6) |
66 | #define IRQ7_VECTOR (IRQ0_VECTOR + 7) | 66 | #define IRQ7_VECTOR (IRQ0_VECTOR + 7) |
67 | #define IRQ8_VECTOR (IRQ0_VECTOR + 8) | 67 | #define IRQ8_VECTOR (IRQ0_VECTOR + 8) |
68 | #define IRQ9_VECTOR (IRQ0_VECTOR + 9) | 68 | #define IRQ9_VECTOR (IRQ0_VECTOR + 9) |
69 | #define IRQ10_VECTOR (IRQ0_VECTOR + 10) | 69 | #define IRQ10_VECTOR (IRQ0_VECTOR + 10) |
70 | #define IRQ11_VECTOR (IRQ0_VECTOR + 11) | 70 | #define IRQ11_VECTOR (IRQ0_VECTOR + 11) |
71 | #define IRQ12_VECTOR (IRQ0_VECTOR + 12) | 71 | #define IRQ12_VECTOR (IRQ0_VECTOR + 12) |
72 | #define IRQ13_VECTOR (IRQ0_VECTOR + 13) | 72 | #define IRQ13_VECTOR (IRQ0_VECTOR + 13) |
73 | #define IRQ14_VECTOR (IRQ0_VECTOR + 14) | 73 | #define IRQ14_VECTOR (IRQ0_VECTOR + 14) |
74 | #define IRQ15_VECTOR (IRQ0_VECTOR + 15) | 74 | #define IRQ15_VECTOR (IRQ0_VECTOR + 15) |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * Special IRQ vectors used by the SMP architecture, 0xf0-0xff | 77 | * Special IRQ vectors used by the SMP architecture, 0xf0-0xff |
78 | * | 78 | * |
79 | * some of the following vectors are 'rare', they are merged | 79 | * some of the following vectors are 'rare', they are merged |
80 | * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. | 80 | * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. |
81 | * TLB, reschedule and local APIC vectors are performance-critical. | 81 | * TLB, reschedule and local APIC vectors are performance-critical. |
82 | */ | 82 | */ |
83 | 83 | ||
84 | #define SPURIOUS_APIC_VECTOR 0xff | 84 | #define SPURIOUS_APIC_VECTOR 0xff |
85 | /* | 85 | /* |
86 | * Sanity check | 86 | * Sanity check |
87 | */ | 87 | */ |
88 | #if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) | 88 | #if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) |
89 | # error SPURIOUS_APIC_VECTOR definition error | 89 | # error SPURIOUS_APIC_VECTOR definition error |
90 | #endif | 90 | #endif |
91 | 91 | ||
92 | #define ERROR_APIC_VECTOR 0xfe | 92 | #define ERROR_APIC_VECTOR 0xfe |
93 | #define RESCHEDULE_VECTOR 0xfd | 93 | #define RESCHEDULE_VECTOR 0xfd |
94 | #define CALL_FUNCTION_VECTOR 0xfc | 94 | #define CALL_FUNCTION_VECTOR 0xfc |
95 | #define CALL_FUNCTION_SINGLE_VECTOR 0xfb | 95 | #define CALL_FUNCTION_SINGLE_VECTOR 0xfb |
96 | #define THERMAL_APIC_VECTOR 0xfa | 96 | #define THERMAL_APIC_VECTOR 0xfa |
97 | #define THRESHOLD_APIC_VECTOR 0xf9 | 97 | #define THRESHOLD_APIC_VECTOR 0xf9 |
98 | #define REBOOT_VECTOR 0xf8 | 98 | #define REBOOT_VECTOR 0xf8 |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * Generic system vector for platform specific use | 101 | * Generic system vector for platform specific use |
102 | */ | 102 | */ |
103 | #define X86_PLATFORM_IPI_VECTOR 0xf7 | 103 | #define X86_PLATFORM_IPI_VECTOR 0xf7 |
104 | 104 | ||
105 | /* | 105 | /* |
106 | * IRQ work vector: | 106 | * IRQ work vector: |
107 | */ | 107 | */ |
108 | #define IRQ_WORK_VECTOR 0xf6 | 108 | #define IRQ_WORK_VECTOR 0xf6 |
109 | 109 | ||
110 | #define UV_BAU_MESSAGE 0xf5 | 110 | #define UV_BAU_MESSAGE 0xf5 |
111 | 111 | ||
112 | /* Xen vector callback to receive events in a HVM domain */ | 112 | /* Vector on which hypervisor callbacks will be delivered */ |
113 | #define XEN_HVM_EVTCHN_CALLBACK 0xf3 | 113 | #define HYPERVISOR_CALLBACK_VECTOR 0xf3 |
114 | 114 | ||
115 | /* | 115 | /* |
116 | * Local APIC timer IRQ vector is on a different priority level, | 116 | * Local APIC timer IRQ vector is on a different priority level, |
117 | * to work around the 'lost local interrupt if more than 2 IRQ | 117 | * to work around the 'lost local interrupt if more than 2 IRQ |
118 | * sources per level' errata. | 118 | * sources per level' errata. |
119 | */ | 119 | */ |
120 | #define LOCAL_TIMER_VECTOR 0xef | 120 | #define LOCAL_TIMER_VECTOR 0xef |
121 | 121 | ||
122 | #define NR_VECTORS 256 | 122 | #define NR_VECTORS 256 |
123 | 123 | ||
124 | #define FPU_IRQ 13 | 124 | #define FPU_IRQ 13 |
125 | 125 | ||
126 | #define FIRST_VM86_IRQ 3 | 126 | #define FIRST_VM86_IRQ 3 |
127 | #define LAST_VM86_IRQ 15 | 127 | #define LAST_VM86_IRQ 15 |
128 | 128 | ||
129 | #ifndef __ASSEMBLY__ | 129 | #ifndef __ASSEMBLY__ |
130 | static inline int invalid_vm86_irq(int irq) | 130 | static inline int invalid_vm86_irq(int irq) |
131 | { | 131 | { |
132 | return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; | 132 | return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; |
133 | } | 133 | } |
134 | #endif | 134 | #endif |
135 | 135 | ||
136 | /* | 136 | /* |
137 | * Size the maximum number of interrupts. | 137 | * Size the maximum number of interrupts. |
138 | * | 138 | * |
139 | * If the irq_desc[] array has a sparse layout, we can size things | 139 | * If the irq_desc[] array has a sparse layout, we can size things |
140 | * generously - it scales up linearly with the maximum number of CPUs, | 140 | * generously - it scales up linearly with the maximum number of CPUs, |
141 | * and the maximum number of IO-APICs, whichever is higher. | 141 | * and the maximum number of IO-APICs, whichever is higher. |
142 | * | 142 | * |
143 | * In other cases we size more conservatively, to not create too large | 143 | * In other cases we size more conservatively, to not create too large |
144 | * static arrays. | 144 | * static arrays. |
145 | */ | 145 | */ |
146 | 146 | ||
147 | #define NR_IRQS_LEGACY 16 | 147 | #define NR_IRQS_LEGACY 16 |
148 | 148 | ||
149 | #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) | 149 | #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) |
150 | 150 | ||
151 | #ifdef CONFIG_X86_IO_APIC | 151 | #ifdef CONFIG_X86_IO_APIC |
152 | # define CPU_VECTOR_LIMIT (64 * NR_CPUS) | 152 | # define CPU_VECTOR_LIMIT (64 * NR_CPUS) |
153 | # define NR_IRQS \ | 153 | # define NR_IRQS \ |
154 | (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ | 154 | (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ |
155 | (NR_VECTORS + CPU_VECTOR_LIMIT) : \ | 155 | (NR_VECTORS + CPU_VECTOR_LIMIT) : \ |
156 | (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) | 156 | (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) |
157 | #else /* !CONFIG_X86_IO_APIC: */ | 157 | #else /* !CONFIG_X86_IO_APIC: */ |
158 | # define NR_IRQS NR_IRQS_LEGACY | 158 | # define NR_IRQS NR_IRQS_LEGACY |
159 | #endif | 159 | #endif |
160 | 160 | ||
161 | #endif /* _ASM_X86_IRQ_VECTORS_H */ | 161 | #endif /* _ASM_X86_IRQ_VECTORS_H */ |
162 | 162 |
arch/x86/include/asm/mshyperv.h
1 | #ifndef _ASM_X86_MSHYPER_H | 1 | #ifndef _ASM_X86_MSHYPER_H |
2 | #define _ASM_X86_MSHYPER_H | 2 | #define _ASM_X86_MSHYPER_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <asm/hyperv.h> | 5 | #include <asm/hyperv.h> |
6 | 6 | ||
7 | struct ms_hyperv_info { | 7 | struct ms_hyperv_info { |
8 | u32 features; | 8 | u32 features; |
9 | u32 hints; | 9 | u32 hints; |
10 | }; | 10 | }; |
11 | 11 | ||
12 | extern struct ms_hyperv_info ms_hyperv; | 12 | extern struct ms_hyperv_info ms_hyperv; |
13 | 13 | ||
14 | void hyperv_callback_vector(void); | ||
15 | void hyperv_vector_handler(struct pt_regs *regs); | ||
16 | void hv_register_vmbus_handler(int irq, irq_handler_t handler); | ||
17 | |||
14 | #endif | 18 | #endif |
15 | 19 |
arch/x86/kernel/cpu/mshyperv.c
1 | /* | 1 | /* |
2 | * HyperV Detection code. | 2 | * HyperV Detection code. |
3 | * | 3 | * |
4 | * Copyright (C) 2010, Novell, Inc. | 4 | * Copyright (C) 2010, Novell, Inc. |
5 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | 5 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | 8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; version 2 of the License. | 9 | * the Free Software Foundation; version 2 of the License. |
10 | * | 10 | * |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/types.h> | 13 | #include <linux/types.h> |
14 | #include <linux/time.h> | 14 | #include <linux/time.h> |
15 | #include <linux/clocksource.h> | 15 | #include <linux/clocksource.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/hardirq.h> | ||
18 | #include <linux/interrupt.h> | ||
17 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
18 | #include <asm/hypervisor.h> | 20 | #include <asm/hypervisor.h> |
19 | #include <asm/hyperv.h> | 21 | #include <asm/hyperv.h> |
20 | #include <asm/mshyperv.h> | 22 | #include <asm/mshyperv.h> |
23 | #include <asm/desc.h> | ||
24 | #include <asm/idle.h> | ||
25 | #include <asm/irq_regs.h> | ||
21 | 26 | ||
22 | struct ms_hyperv_info ms_hyperv; | 27 | struct ms_hyperv_info ms_hyperv; |
23 | EXPORT_SYMBOL_GPL(ms_hyperv); | 28 | EXPORT_SYMBOL_GPL(ms_hyperv); |
24 | 29 | ||
25 | static bool __init ms_hyperv_platform(void) | 30 | static bool __init ms_hyperv_platform(void) |
26 | { | 31 | { |
27 | u32 eax; | 32 | u32 eax; |
28 | u32 hyp_signature[3]; | 33 | u32 hyp_signature[3]; |
29 | 34 | ||
30 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | 35 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) |
31 | return false; | 36 | return false; |
32 | 37 | ||
33 | /* | 38 | /* |
34 | * Xen emulates Hyper-V to support enlightened Windows. | 39 | * Xen emulates Hyper-V to support enlightened Windows. |
35 | * Check to see first if we are on a Xen Hypervisor. | 40 | * Check to see first if we are on a Xen Hypervisor. |
36 | */ | 41 | */ |
37 | if (xen_cpuid_base()) | 42 | if (xen_cpuid_base()) |
38 | return false; | 43 | return false; |
39 | 44 | ||
40 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | 45 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, |
41 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | 46 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); |
42 | 47 | ||
43 | return eax >= HYPERV_CPUID_MIN && | 48 | return eax >= HYPERV_CPUID_MIN && |
44 | eax <= HYPERV_CPUID_MAX && | 49 | eax <= HYPERV_CPUID_MAX && |
45 | !memcmp("Microsoft Hv", hyp_signature, 12); | 50 | !memcmp("Microsoft Hv", hyp_signature, 12); |
46 | } | 51 | } |
47 | 52 | ||
48 | static cycle_t read_hv_clock(struct clocksource *arg) | 53 | static cycle_t read_hv_clock(struct clocksource *arg) |
49 | { | 54 | { |
50 | cycle_t current_tick; | 55 | cycle_t current_tick; |
51 | /* | 56 | /* |
52 | * Read the partition counter to get the current tick count. This count | 57 | * Read the partition counter to get the current tick count. This count |
53 | * is set to 0 when the partition is created and is incremented in | 58 | * is set to 0 when the partition is created and is incremented in |
54 | * 100 nanosecond units. | 59 | * 100 nanosecond units. |
55 | */ | 60 | */ |
56 | rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); | 61 | rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); |
57 | return current_tick; | 62 | return current_tick; |
58 | } | 63 | } |
59 | 64 | ||
60 | static struct clocksource hyperv_cs = { | 65 | static struct clocksource hyperv_cs = { |
61 | .name = "hyperv_clocksource", | 66 | .name = "hyperv_clocksource", |
62 | .rating = 400, /* use this when running on Hyperv*/ | 67 | .rating = 400, /* use this when running on Hyperv*/ |
63 | .read = read_hv_clock, | 68 | .read = read_hv_clock, |
64 | .mask = CLOCKSOURCE_MASK(64), | 69 | .mask = CLOCKSOURCE_MASK(64), |
65 | }; | 70 | }; |
66 | 71 | ||
67 | static void __init ms_hyperv_init_platform(void) | 72 | static void __init ms_hyperv_init_platform(void) |
68 | { | 73 | { |
69 | /* | 74 | /* |
70 | * Extract the features and hints | 75 | * Extract the features and hints |
71 | */ | 76 | */ |
72 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); | 77 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); |
73 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); | 78 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); |
74 | 79 | ||
75 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | 80 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", |
76 | ms_hyperv.features, ms_hyperv.hints); | 81 | ms_hyperv.features, ms_hyperv.hints); |
77 | 82 | ||
78 | if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) | 83 | if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) |
79 | clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); | 84 | clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); |
85 | #if IS_ENABLED(CONFIG_HYPERV) | ||
86 | /* | ||
87 | * Setup the IDT for hypervisor callback. | ||
88 | */ | ||
89 | alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); | ||
90 | #endif | ||
80 | } | 91 | } |
81 | 92 | ||
82 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | 93 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { |
83 | .name = "Microsoft HyperV", | 94 | .name = "Microsoft HyperV", |
84 | .detect = ms_hyperv_platform, | 95 | .detect = ms_hyperv_platform, |
85 | .init_platform = ms_hyperv_init_platform, | 96 | .init_platform = ms_hyperv_init_platform, |
86 | }; | 97 | }; |
87 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | 98 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); |
99 | |||
100 | #if IS_ENABLED(CONFIG_HYPERV) | ||
101 | static int vmbus_irq = -1; | ||
102 | static irq_handler_t vmbus_isr; | ||
103 | |||
104 | void hv_register_vmbus_handler(int irq, irq_handler_t handler) | ||
105 | { | ||
106 | vmbus_irq = irq; | ||
107 | vmbus_isr = handler; | ||
108 | } | ||
109 | |||
110 | void hyperv_vector_handler(struct pt_regs *regs) | ||
111 | { | ||
112 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
113 | struct irq_desc *desc; | ||
114 | |||
115 | irq_enter(); | ||
116 | exit_idle(); | ||
117 | |||
118 | desc = irq_to_desc(vmbus_irq); | ||
119 | |||
120 | if (desc) | ||
121 | generic_handle_irq_desc(vmbus_irq, desc); | ||
122 | |||
123 | irq_exit(); | ||
124 | set_irq_regs(old_regs); | ||
125 | } | ||
126 | #else | ||
127 | void hv_register_vmbus_handler(int irq, irq_handler_t handler) | ||
128 | { | ||
129 | } | ||
130 | #endif | ||
131 | EXPORT_SYMBOL_GPL(hv_register_vmbus_handler); | ||
88 | 132 |
arch/x86/kernel/entry_32.S
1 | /* | 1 | /* |
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | */ | 4 | */ |
5 | 5 | ||
6 | /* | 6 | /* |
7 | * entry.S contains the system-call and fault low-level handling routines. | 7 | * entry.S contains the system-call and fault low-level handling routines. |
8 | * This also contains the timer-interrupt handler, as well as all interrupts | 8 | * This also contains the timer-interrupt handler, as well as all interrupts |
9 | * and faults that can result in a task-switch. | 9 | * and faults that can result in a task-switch. |
10 | * | 10 | * |
11 | * NOTE: This code handles signal-recognition, which happens every time | 11 | * NOTE: This code handles signal-recognition, which happens every time |
12 | * after a timer-interrupt and after each system call. | 12 | * after a timer-interrupt and after each system call. |
13 | * | 13 | * |
14 | * I changed all the .align's to 4 (16 byte alignment), as that's faster | 14 | * I changed all the .align's to 4 (16 byte alignment), as that's faster |
15 | * on a 486. | 15 | * on a 486. |
16 | * | 16 | * |
17 | * Stack layout in 'syscall_exit': | 17 | * Stack layout in 'syscall_exit': |
18 | * ptrace needs to have all regs on the stack. | 18 | * ptrace needs to have all regs on the stack. |
19 | * if the order here is changed, it needs to be | 19 | * if the order here is changed, it needs to be |
20 | * updated in fork.c:copy_process, signal.c:do_signal, | 20 | * updated in fork.c:copy_process, signal.c:do_signal, |
21 | * ptrace.c and ptrace.h | 21 | * ptrace.c and ptrace.h |
22 | * | 22 | * |
23 | * 0(%esp) - %ebx | 23 | * 0(%esp) - %ebx |
24 | * 4(%esp) - %ecx | 24 | * 4(%esp) - %ecx |
25 | * 8(%esp) - %edx | 25 | * 8(%esp) - %edx |
26 | * C(%esp) - %esi | 26 | * C(%esp) - %esi |
27 | * 10(%esp) - %edi | 27 | * 10(%esp) - %edi |
28 | * 14(%esp) - %ebp | 28 | * 14(%esp) - %ebp |
29 | * 18(%esp) - %eax | 29 | * 18(%esp) - %eax |
30 | * 1C(%esp) - %ds | 30 | * 1C(%esp) - %ds |
31 | * 20(%esp) - %es | 31 | * 20(%esp) - %es |
32 | * 24(%esp) - %fs | 32 | * 24(%esp) - %fs |
33 | * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS | 33 | * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS |
34 | * 2C(%esp) - orig_eax | 34 | * 2C(%esp) - orig_eax |
35 | * 30(%esp) - %eip | 35 | * 30(%esp) - %eip |
36 | * 34(%esp) - %cs | 36 | * 34(%esp) - %cs |
37 | * 38(%esp) - %eflags | 37 | * 38(%esp) - %eflags |
38 | * 3C(%esp) - %oldesp | 38 | * 3C(%esp) - %oldesp |
39 | * 40(%esp) - %oldss | 39 | * 40(%esp) - %oldss |
40 | * | 40 | * |
41 | * "current" is in register %ebx during any slow entries. | 41 | * "current" is in register %ebx during any slow entries. |
42 | */ | 42 | */ |
43 | 43 | ||
44 | #include <linux/linkage.h> | 44 | #include <linux/linkage.h> |
45 | #include <linux/err.h> | 45 | #include <linux/err.h> |
46 | #include <asm/thread_info.h> | 46 | #include <asm/thread_info.h> |
47 | #include <asm/irqflags.h> | 47 | #include <asm/irqflags.h> |
48 | #include <asm/errno.h> | 48 | #include <asm/errno.h> |
49 | #include <asm/segment.h> | 49 | #include <asm/segment.h> |
50 | #include <asm/smp.h> | 50 | #include <asm/smp.h> |
51 | #include <asm/page_types.h> | 51 | #include <asm/page_types.h> |
52 | #include <asm/percpu.h> | 52 | #include <asm/percpu.h> |
53 | #include <asm/dwarf2.h> | 53 | #include <asm/dwarf2.h> |
54 | #include <asm/processor-flags.h> | 54 | #include <asm/processor-flags.h> |
55 | #include <asm/ftrace.h> | 55 | #include <asm/ftrace.h> |
56 | #include <asm/irq_vectors.h> | 56 | #include <asm/irq_vectors.h> |
57 | #include <asm/cpufeature.h> | 57 | #include <asm/cpufeature.h> |
58 | #include <asm/alternative-asm.h> | 58 | #include <asm/alternative-asm.h> |
59 | #include <asm/asm.h> | 59 | #include <asm/asm.h> |
60 | #include <asm/smap.h> | 60 | #include <asm/smap.h> |
61 | 61 | ||
62 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 62 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
63 | #include <linux/elf-em.h> | 63 | #include <linux/elf-em.h> |
64 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | 64 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) |
65 | #define __AUDIT_ARCH_LE 0x40000000 | 65 | #define __AUDIT_ARCH_LE 0x40000000 |
66 | 66 | ||
67 | #ifndef CONFIG_AUDITSYSCALL | 67 | #ifndef CONFIG_AUDITSYSCALL |
68 | #define sysenter_audit syscall_trace_entry | 68 | #define sysenter_audit syscall_trace_entry |
69 | #define sysexit_audit syscall_exit_work | 69 | #define sysexit_audit syscall_exit_work |
70 | #endif | 70 | #endif |
71 | 71 | ||
72 | .section .entry.text, "ax" | 72 | .section .entry.text, "ax" |
73 | 73 | ||
74 | /* | 74 | /* |
75 | * We use macros for low-level operations which need to be overridden | 75 | * We use macros for low-level operations which need to be overridden |
76 | * for paravirtualization. The following will never clobber any registers: | 76 | * for paravirtualization. The following will never clobber any registers: |
77 | * INTERRUPT_RETURN (aka. "iret") | 77 | * INTERRUPT_RETURN (aka. "iret") |
78 | * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") | 78 | * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") |
79 | * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). | 79 | * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). |
80 | * | 80 | * |
81 | * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must | 81 | * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must |
82 | * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). | 82 | * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). |
83 | * Allowing a register to be clobbered can shrink the paravirt replacement | 83 | * Allowing a register to be clobbered can shrink the paravirt replacement |
84 | * enough to patch inline, increasing performance. | 84 | * enough to patch inline, increasing performance. |
85 | */ | 85 | */ |
86 | 86 | ||
87 | #ifdef CONFIG_PREEMPT | 87 | #ifdef CONFIG_PREEMPT |
88 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF | 88 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF |
89 | #else | 89 | #else |
90 | #define preempt_stop(clobbers) | 90 | #define preempt_stop(clobbers) |
91 | #define resume_kernel restore_all | 91 | #define resume_kernel restore_all |
92 | #endif | 92 | #endif |
93 | 93 | ||
94 | .macro TRACE_IRQS_IRET | 94 | .macro TRACE_IRQS_IRET |
95 | #ifdef CONFIG_TRACE_IRQFLAGS | 95 | #ifdef CONFIG_TRACE_IRQFLAGS |
96 | testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off? | 96 | testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off? |
97 | jz 1f | 97 | jz 1f |
98 | TRACE_IRQS_ON | 98 | TRACE_IRQS_ON |
99 | 1: | 99 | 1: |
100 | #endif | 100 | #endif |
101 | .endm | 101 | .endm |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * User gs save/restore | 104 | * User gs save/restore |
105 | * | 105 | * |
106 | * %gs is used for userland TLS and kernel only uses it for stack | 106 | * %gs is used for userland TLS and kernel only uses it for stack |
107 | * canary which is required to be at %gs:20 by gcc. Read the comment | 107 | * canary which is required to be at %gs:20 by gcc. Read the comment |
108 | * at the top of stackprotector.h for more info. | 108 | * at the top of stackprotector.h for more info. |
109 | * | 109 | * |
110 | * Local labels 98 and 99 are used. | 110 | * Local labels 98 and 99 are used. |
111 | */ | 111 | */ |
112 | #ifdef CONFIG_X86_32_LAZY_GS | 112 | #ifdef CONFIG_X86_32_LAZY_GS |
113 | 113 | ||
114 | /* unfortunately push/pop can't be no-op */ | 114 | /* unfortunately push/pop can't be no-op */ |
115 | .macro PUSH_GS | 115 | .macro PUSH_GS |
116 | pushl_cfi $0 | 116 | pushl_cfi $0 |
117 | .endm | 117 | .endm |
118 | .macro POP_GS pop=0 | 118 | .macro POP_GS pop=0 |
119 | addl $(4 + \pop), %esp | 119 | addl $(4 + \pop), %esp |
120 | CFI_ADJUST_CFA_OFFSET -(4 + \pop) | 120 | CFI_ADJUST_CFA_OFFSET -(4 + \pop) |
121 | .endm | 121 | .endm |
122 | .macro POP_GS_EX | 122 | .macro POP_GS_EX |
123 | .endm | 123 | .endm |
124 | 124 | ||
125 | /* all the rest are no-op */ | 125 | /* all the rest are no-op */ |
126 | .macro PTGS_TO_GS | 126 | .macro PTGS_TO_GS |
127 | .endm | 127 | .endm |
128 | .macro PTGS_TO_GS_EX | 128 | .macro PTGS_TO_GS_EX |
129 | .endm | 129 | .endm |
130 | .macro GS_TO_REG reg | 130 | .macro GS_TO_REG reg |
131 | .endm | 131 | .endm |
132 | .macro REG_TO_PTGS reg | 132 | .macro REG_TO_PTGS reg |
133 | .endm | 133 | .endm |
134 | .macro SET_KERNEL_GS reg | 134 | .macro SET_KERNEL_GS reg |
135 | .endm | 135 | .endm |
136 | 136 | ||
137 | #else /* CONFIG_X86_32_LAZY_GS */ | 137 | #else /* CONFIG_X86_32_LAZY_GS */ |
138 | 138 | ||
139 | .macro PUSH_GS | 139 | .macro PUSH_GS |
140 | pushl_cfi %gs | 140 | pushl_cfi %gs |
141 | /*CFI_REL_OFFSET gs, 0*/ | 141 | /*CFI_REL_OFFSET gs, 0*/ |
142 | .endm | 142 | .endm |
143 | 143 | ||
144 | .macro POP_GS pop=0 | 144 | .macro POP_GS pop=0 |
145 | 98: popl_cfi %gs | 145 | 98: popl_cfi %gs |
146 | /*CFI_RESTORE gs*/ | 146 | /*CFI_RESTORE gs*/ |
147 | .if \pop <> 0 | 147 | .if \pop <> 0 |
148 | add $\pop, %esp | 148 | add $\pop, %esp |
149 | CFI_ADJUST_CFA_OFFSET -\pop | 149 | CFI_ADJUST_CFA_OFFSET -\pop |
150 | .endif | 150 | .endif |
151 | .endm | 151 | .endm |
152 | .macro POP_GS_EX | 152 | .macro POP_GS_EX |
153 | .pushsection .fixup, "ax" | 153 | .pushsection .fixup, "ax" |
154 | 99: movl $0, (%esp) | 154 | 99: movl $0, (%esp) |
155 | jmp 98b | 155 | jmp 98b |
156 | .popsection | 156 | .popsection |
157 | _ASM_EXTABLE(98b,99b) | 157 | _ASM_EXTABLE(98b,99b) |
158 | .endm | 158 | .endm |
159 | 159 | ||
160 | .macro PTGS_TO_GS | 160 | .macro PTGS_TO_GS |
161 | 98: mov PT_GS(%esp), %gs | 161 | 98: mov PT_GS(%esp), %gs |
162 | .endm | 162 | .endm |
163 | .macro PTGS_TO_GS_EX | 163 | .macro PTGS_TO_GS_EX |
164 | .pushsection .fixup, "ax" | 164 | .pushsection .fixup, "ax" |
165 | 99: movl $0, PT_GS(%esp) | 165 | 99: movl $0, PT_GS(%esp) |
166 | jmp 98b | 166 | jmp 98b |
167 | .popsection | 167 | .popsection |
168 | _ASM_EXTABLE(98b,99b) | 168 | _ASM_EXTABLE(98b,99b) |
169 | .endm | 169 | .endm |
170 | 170 | ||
171 | .macro GS_TO_REG reg | 171 | .macro GS_TO_REG reg |
172 | movl %gs, \reg | 172 | movl %gs, \reg |
173 | /*CFI_REGISTER gs, \reg*/ | 173 | /*CFI_REGISTER gs, \reg*/ |
174 | .endm | 174 | .endm |
175 | .macro REG_TO_PTGS reg | 175 | .macro REG_TO_PTGS reg |
176 | movl \reg, PT_GS(%esp) | 176 | movl \reg, PT_GS(%esp) |
177 | /*CFI_REL_OFFSET gs, PT_GS*/ | 177 | /*CFI_REL_OFFSET gs, PT_GS*/ |
178 | .endm | 178 | .endm |
179 | .macro SET_KERNEL_GS reg | 179 | .macro SET_KERNEL_GS reg |
180 | movl $(__KERNEL_STACK_CANARY), \reg | 180 | movl $(__KERNEL_STACK_CANARY), \reg |
181 | movl \reg, %gs | 181 | movl \reg, %gs |
182 | .endm | 182 | .endm |
183 | 183 | ||
184 | #endif /* CONFIG_X86_32_LAZY_GS */ | 184 | #endif /* CONFIG_X86_32_LAZY_GS */ |
185 | 185 | ||
186 | .macro SAVE_ALL | 186 | .macro SAVE_ALL |
187 | cld | 187 | cld |
188 | PUSH_GS | 188 | PUSH_GS |
189 | pushl_cfi %fs | 189 | pushl_cfi %fs |
190 | /*CFI_REL_OFFSET fs, 0;*/ | 190 | /*CFI_REL_OFFSET fs, 0;*/ |
191 | pushl_cfi %es | 191 | pushl_cfi %es |
192 | /*CFI_REL_OFFSET es, 0;*/ | 192 | /*CFI_REL_OFFSET es, 0;*/ |
193 | pushl_cfi %ds | 193 | pushl_cfi %ds |
194 | /*CFI_REL_OFFSET ds, 0;*/ | 194 | /*CFI_REL_OFFSET ds, 0;*/ |
195 | pushl_cfi %eax | 195 | pushl_cfi %eax |
196 | CFI_REL_OFFSET eax, 0 | 196 | CFI_REL_OFFSET eax, 0 |
197 | pushl_cfi %ebp | 197 | pushl_cfi %ebp |
198 | CFI_REL_OFFSET ebp, 0 | 198 | CFI_REL_OFFSET ebp, 0 |
199 | pushl_cfi %edi | 199 | pushl_cfi %edi |
200 | CFI_REL_OFFSET edi, 0 | 200 | CFI_REL_OFFSET edi, 0 |
201 | pushl_cfi %esi | 201 | pushl_cfi %esi |
202 | CFI_REL_OFFSET esi, 0 | 202 | CFI_REL_OFFSET esi, 0 |
203 | pushl_cfi %edx | 203 | pushl_cfi %edx |
204 | CFI_REL_OFFSET edx, 0 | 204 | CFI_REL_OFFSET edx, 0 |
205 | pushl_cfi %ecx | 205 | pushl_cfi %ecx |
206 | CFI_REL_OFFSET ecx, 0 | 206 | CFI_REL_OFFSET ecx, 0 |
207 | pushl_cfi %ebx | 207 | pushl_cfi %ebx |
208 | CFI_REL_OFFSET ebx, 0 | 208 | CFI_REL_OFFSET ebx, 0 |
209 | movl $(__USER_DS), %edx | 209 | movl $(__USER_DS), %edx |
210 | movl %edx, %ds | 210 | movl %edx, %ds |
211 | movl %edx, %es | 211 | movl %edx, %es |
212 | movl $(__KERNEL_PERCPU), %edx | 212 | movl $(__KERNEL_PERCPU), %edx |
213 | movl %edx, %fs | 213 | movl %edx, %fs |
214 | SET_KERNEL_GS %edx | 214 | SET_KERNEL_GS %edx |
215 | .endm | 215 | .endm |
216 | 216 | ||
217 | .macro RESTORE_INT_REGS | 217 | .macro RESTORE_INT_REGS |
218 | popl_cfi %ebx | 218 | popl_cfi %ebx |
219 | CFI_RESTORE ebx | 219 | CFI_RESTORE ebx |
220 | popl_cfi %ecx | 220 | popl_cfi %ecx |
221 | CFI_RESTORE ecx | 221 | CFI_RESTORE ecx |
222 | popl_cfi %edx | 222 | popl_cfi %edx |
223 | CFI_RESTORE edx | 223 | CFI_RESTORE edx |
224 | popl_cfi %esi | 224 | popl_cfi %esi |
225 | CFI_RESTORE esi | 225 | CFI_RESTORE esi |
226 | popl_cfi %edi | 226 | popl_cfi %edi |
227 | CFI_RESTORE edi | 227 | CFI_RESTORE edi |
228 | popl_cfi %ebp | 228 | popl_cfi %ebp |
229 | CFI_RESTORE ebp | 229 | CFI_RESTORE ebp |
230 | popl_cfi %eax | 230 | popl_cfi %eax |
231 | CFI_RESTORE eax | 231 | CFI_RESTORE eax |
232 | .endm | 232 | .endm |
233 | 233 | ||
234 | .macro RESTORE_REGS pop=0 | 234 | .macro RESTORE_REGS pop=0 |
235 | RESTORE_INT_REGS | 235 | RESTORE_INT_REGS |
236 | 1: popl_cfi %ds | 236 | 1: popl_cfi %ds |
237 | /*CFI_RESTORE ds;*/ | 237 | /*CFI_RESTORE ds;*/ |
238 | 2: popl_cfi %es | 238 | 2: popl_cfi %es |
239 | /*CFI_RESTORE es;*/ | 239 | /*CFI_RESTORE es;*/ |
240 | 3: popl_cfi %fs | 240 | 3: popl_cfi %fs |
241 | /*CFI_RESTORE fs;*/ | 241 | /*CFI_RESTORE fs;*/ |
242 | POP_GS \pop | 242 | POP_GS \pop |
243 | .pushsection .fixup, "ax" | 243 | .pushsection .fixup, "ax" |
244 | 4: movl $0, (%esp) | 244 | 4: movl $0, (%esp) |
245 | jmp 1b | 245 | jmp 1b |
246 | 5: movl $0, (%esp) | 246 | 5: movl $0, (%esp) |
247 | jmp 2b | 247 | jmp 2b |
248 | 6: movl $0, (%esp) | 248 | 6: movl $0, (%esp) |
249 | jmp 3b | 249 | jmp 3b |
250 | .popsection | 250 | .popsection |
251 | _ASM_EXTABLE(1b,4b) | 251 | _ASM_EXTABLE(1b,4b) |
252 | _ASM_EXTABLE(2b,5b) | 252 | _ASM_EXTABLE(2b,5b) |
253 | _ASM_EXTABLE(3b,6b) | 253 | _ASM_EXTABLE(3b,6b) |
254 | POP_GS_EX | 254 | POP_GS_EX |
255 | .endm | 255 | .endm |
256 | 256 | ||
257 | .macro RING0_INT_FRAME | 257 | .macro RING0_INT_FRAME |
258 | CFI_STARTPROC simple | 258 | CFI_STARTPROC simple |
259 | CFI_SIGNAL_FRAME | 259 | CFI_SIGNAL_FRAME |
260 | CFI_DEF_CFA esp, 3*4 | 260 | CFI_DEF_CFA esp, 3*4 |
261 | /*CFI_OFFSET cs, -2*4;*/ | 261 | /*CFI_OFFSET cs, -2*4;*/ |
262 | CFI_OFFSET eip, -3*4 | 262 | CFI_OFFSET eip, -3*4 |
263 | .endm | 263 | .endm |
264 | 264 | ||
265 | .macro RING0_EC_FRAME | 265 | .macro RING0_EC_FRAME |
266 | CFI_STARTPROC simple | 266 | CFI_STARTPROC simple |
267 | CFI_SIGNAL_FRAME | 267 | CFI_SIGNAL_FRAME |
268 | CFI_DEF_CFA esp, 4*4 | 268 | CFI_DEF_CFA esp, 4*4 |
269 | /*CFI_OFFSET cs, -2*4;*/ | 269 | /*CFI_OFFSET cs, -2*4;*/ |
270 | CFI_OFFSET eip, -3*4 | 270 | CFI_OFFSET eip, -3*4 |
271 | .endm | 271 | .endm |
272 | 272 | ||
273 | .macro RING0_PTREGS_FRAME | 273 | .macro RING0_PTREGS_FRAME |
274 | CFI_STARTPROC simple | 274 | CFI_STARTPROC simple |
275 | CFI_SIGNAL_FRAME | 275 | CFI_SIGNAL_FRAME |
276 | CFI_DEF_CFA esp, PT_OLDESP-PT_EBX | 276 | CFI_DEF_CFA esp, PT_OLDESP-PT_EBX |
277 | /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ | 277 | /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ |
278 | CFI_OFFSET eip, PT_EIP-PT_OLDESP | 278 | CFI_OFFSET eip, PT_EIP-PT_OLDESP |
279 | /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ | 279 | /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ |
280 | /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ | 280 | /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ |
281 | CFI_OFFSET eax, PT_EAX-PT_OLDESP | 281 | CFI_OFFSET eax, PT_EAX-PT_OLDESP |
282 | CFI_OFFSET ebp, PT_EBP-PT_OLDESP | 282 | CFI_OFFSET ebp, PT_EBP-PT_OLDESP |
283 | CFI_OFFSET edi, PT_EDI-PT_OLDESP | 283 | CFI_OFFSET edi, PT_EDI-PT_OLDESP |
284 | CFI_OFFSET esi, PT_ESI-PT_OLDESP | 284 | CFI_OFFSET esi, PT_ESI-PT_OLDESP |
285 | CFI_OFFSET edx, PT_EDX-PT_OLDESP | 285 | CFI_OFFSET edx, PT_EDX-PT_OLDESP |
286 | CFI_OFFSET ecx, PT_ECX-PT_OLDESP | 286 | CFI_OFFSET ecx, PT_ECX-PT_OLDESP |
287 | CFI_OFFSET ebx, PT_EBX-PT_OLDESP | 287 | CFI_OFFSET ebx, PT_EBX-PT_OLDESP |
288 | .endm | 288 | .endm |
289 | 289 | ||
290 | ENTRY(ret_from_fork) | 290 | ENTRY(ret_from_fork) |
291 | CFI_STARTPROC | 291 | CFI_STARTPROC |
292 | pushl_cfi %eax | 292 | pushl_cfi %eax |
293 | call schedule_tail | 293 | call schedule_tail |
294 | GET_THREAD_INFO(%ebp) | 294 | GET_THREAD_INFO(%ebp) |
295 | popl_cfi %eax | 295 | popl_cfi %eax |
296 | pushl_cfi $0x0202 # Reset kernel eflags | 296 | pushl_cfi $0x0202 # Reset kernel eflags |
297 | popfl_cfi | 297 | popfl_cfi |
298 | jmp syscall_exit | 298 | jmp syscall_exit |
299 | CFI_ENDPROC | 299 | CFI_ENDPROC |
300 | END(ret_from_fork) | 300 | END(ret_from_fork) |
301 | 301 | ||
302 | ENTRY(ret_from_kernel_thread) | 302 | ENTRY(ret_from_kernel_thread) |
303 | CFI_STARTPROC | 303 | CFI_STARTPROC |
304 | pushl_cfi %eax | 304 | pushl_cfi %eax |
305 | call schedule_tail | 305 | call schedule_tail |
306 | GET_THREAD_INFO(%ebp) | 306 | GET_THREAD_INFO(%ebp) |
307 | popl_cfi %eax | 307 | popl_cfi %eax |
308 | pushl_cfi $0x0202 # Reset kernel eflags | 308 | pushl_cfi $0x0202 # Reset kernel eflags |
309 | popfl_cfi | 309 | popfl_cfi |
310 | movl PT_EBP(%esp),%eax | 310 | movl PT_EBP(%esp),%eax |
311 | call *PT_EBX(%esp) | 311 | call *PT_EBX(%esp) |
312 | movl $0,PT_EAX(%esp) | 312 | movl $0,PT_EAX(%esp) |
313 | jmp syscall_exit | 313 | jmp syscall_exit |
314 | CFI_ENDPROC | 314 | CFI_ENDPROC |
315 | ENDPROC(ret_from_kernel_thread) | 315 | ENDPROC(ret_from_kernel_thread) |
316 | 316 | ||
317 | /* | 317 | /* |
318 | * Interrupt exit functions should be protected against kprobes | 318 | * Interrupt exit functions should be protected against kprobes |
319 | */ | 319 | */ |
320 | .pushsection .kprobes.text, "ax" | 320 | .pushsection .kprobes.text, "ax" |
321 | /* | 321 | /* |
322 | * Return to user mode is not as complex as all this looks, | 322 | * Return to user mode is not as complex as all this looks, |
323 | * but we want the default path for a system call return to | 323 | * but we want the default path for a system call return to |
324 | * go as quickly as possible which is why some of this is | 324 | * go as quickly as possible which is why some of this is |
325 | * less clear than it otherwise should be. | 325 | * less clear than it otherwise should be. |
326 | */ | 326 | */ |
327 | 327 | ||
328 | # userspace resumption stub bypassing syscall exit tracing | 328 | # userspace resumption stub bypassing syscall exit tracing |
329 | ALIGN | 329 | ALIGN |
330 | RING0_PTREGS_FRAME | 330 | RING0_PTREGS_FRAME |
331 | ret_from_exception: | 331 | ret_from_exception: |
332 | preempt_stop(CLBR_ANY) | 332 | preempt_stop(CLBR_ANY) |
333 | ret_from_intr: | 333 | ret_from_intr: |
334 | GET_THREAD_INFO(%ebp) | 334 | GET_THREAD_INFO(%ebp) |
335 | #ifdef CONFIG_VM86 | 335 | #ifdef CONFIG_VM86 |
336 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS | 336 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS |
337 | movb PT_CS(%esp), %al | 337 | movb PT_CS(%esp), %al |
338 | andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax | 338 | andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax |
339 | #else | 339 | #else |
340 | /* | 340 | /* |
341 | * We can be coming here from child spawned by kernel_thread(). | 341 | * We can be coming here from child spawned by kernel_thread(). |
342 | */ | 342 | */ |
343 | movl PT_CS(%esp), %eax | 343 | movl PT_CS(%esp), %eax |
344 | andl $SEGMENT_RPL_MASK, %eax | 344 | andl $SEGMENT_RPL_MASK, %eax |
345 | #endif | 345 | #endif |
346 | cmpl $USER_RPL, %eax | 346 | cmpl $USER_RPL, %eax |
347 | jb resume_kernel # not returning to v8086 or userspace | 347 | jb resume_kernel # not returning to v8086 or userspace |
348 | 348 | ||
349 | ENTRY(resume_userspace) | 349 | ENTRY(resume_userspace) |
350 | LOCKDEP_SYS_EXIT | 350 | LOCKDEP_SYS_EXIT |
351 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | 351 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt |
352 | # setting need_resched or sigpending | 352 | # setting need_resched or sigpending |
353 | # between sampling and the iret | 353 | # between sampling and the iret |
354 | TRACE_IRQS_OFF | 354 | TRACE_IRQS_OFF |
355 | movl TI_flags(%ebp), %ecx | 355 | movl TI_flags(%ebp), %ecx |
356 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done on | 356 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done on |
357 | # int/exception return? | 357 | # int/exception return? |
358 | jne work_pending | 358 | jne work_pending |
359 | jmp restore_all | 359 | jmp restore_all |
360 | END(ret_from_exception) | 360 | END(ret_from_exception) |
361 | 361 | ||
362 | #ifdef CONFIG_PREEMPT | 362 | #ifdef CONFIG_PREEMPT |
363 | ENTRY(resume_kernel) | 363 | ENTRY(resume_kernel) |
364 | DISABLE_INTERRUPTS(CLBR_ANY) | 364 | DISABLE_INTERRUPTS(CLBR_ANY) |
365 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? | 365 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? |
366 | jnz restore_all | 366 | jnz restore_all |
367 | need_resched: | 367 | need_resched: |
368 | movl TI_flags(%ebp), %ecx # need_resched set ? | 368 | movl TI_flags(%ebp), %ecx # need_resched set ? |
369 | testb $_TIF_NEED_RESCHED, %cl | 369 | testb $_TIF_NEED_RESCHED, %cl |
370 | jz restore_all | 370 | jz restore_all |
371 | testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? | 371 | testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? |
372 | jz restore_all | 372 | jz restore_all |
373 | call preempt_schedule_irq | 373 | call preempt_schedule_irq |
374 | jmp need_resched | 374 | jmp need_resched |
375 | END(resume_kernel) | 375 | END(resume_kernel) |
376 | #endif | 376 | #endif |
377 | CFI_ENDPROC | 377 | CFI_ENDPROC |
378 | /* | 378 | /* |
379 | * End of kprobes section | 379 | * End of kprobes section |
380 | */ | 380 | */ |
381 | .popsection | 381 | .popsection |
382 | 382 | ||
383 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | 383 | /* SYSENTER_RETURN points to after the "sysenter" instruction in |
384 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | 384 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ |
385 | 385 | ||
386 | # sysenter call handler stub | 386 | # sysenter call handler stub |
387 | ENTRY(ia32_sysenter_target) | 387 | ENTRY(ia32_sysenter_target) |
388 | CFI_STARTPROC simple | 388 | CFI_STARTPROC simple |
389 | CFI_SIGNAL_FRAME | 389 | CFI_SIGNAL_FRAME |
390 | CFI_DEF_CFA esp, 0 | 390 | CFI_DEF_CFA esp, 0 |
391 | CFI_REGISTER esp, ebp | 391 | CFI_REGISTER esp, ebp |
392 | movl TSS_sysenter_sp0(%esp),%esp | 392 | movl TSS_sysenter_sp0(%esp),%esp |
393 | sysenter_past_esp: | 393 | sysenter_past_esp: |
394 | /* | 394 | /* |
395 | * Interrupts are disabled here, but we can't trace it until | 395 | * Interrupts are disabled here, but we can't trace it until |
396 | * enough kernel state to call TRACE_IRQS_OFF can be called - but | 396 | * enough kernel state to call TRACE_IRQS_OFF can be called - but |
397 | * we immediately enable interrupts at that point anyway. | 397 | * we immediately enable interrupts at that point anyway. |
398 | */ | 398 | */ |
399 | pushl_cfi $__USER_DS | 399 | pushl_cfi $__USER_DS |
400 | /*CFI_REL_OFFSET ss, 0*/ | 400 | /*CFI_REL_OFFSET ss, 0*/ |
401 | pushl_cfi %ebp | 401 | pushl_cfi %ebp |
402 | CFI_REL_OFFSET esp, 0 | 402 | CFI_REL_OFFSET esp, 0 |
403 | pushfl_cfi | 403 | pushfl_cfi |
404 | orl $X86_EFLAGS_IF, (%esp) | 404 | orl $X86_EFLAGS_IF, (%esp) |
405 | pushl_cfi $__USER_CS | 405 | pushl_cfi $__USER_CS |
406 | /*CFI_REL_OFFSET cs, 0*/ | 406 | /*CFI_REL_OFFSET cs, 0*/ |
407 | /* | 407 | /* |
408 | * Push current_thread_info()->sysenter_return to the stack. | 408 | * Push current_thread_info()->sysenter_return to the stack. |
409 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | 409 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
410 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | 410 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
411 | */ | 411 | */ |
412 | pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) | 412 | pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) |
413 | CFI_REL_OFFSET eip, 0 | 413 | CFI_REL_OFFSET eip, 0 |
414 | 414 | ||
415 | pushl_cfi %eax | 415 | pushl_cfi %eax |
416 | SAVE_ALL | 416 | SAVE_ALL |
417 | ENABLE_INTERRUPTS(CLBR_NONE) | 417 | ENABLE_INTERRUPTS(CLBR_NONE) |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * Load the potential sixth argument from user stack. | 420 | * Load the potential sixth argument from user stack. |
421 | * Careful about security. | 421 | * Careful about security. |
422 | */ | 422 | */ |
423 | cmpl $__PAGE_OFFSET-3,%ebp | 423 | cmpl $__PAGE_OFFSET-3,%ebp |
424 | jae syscall_fault | 424 | jae syscall_fault |
425 | ASM_STAC | 425 | ASM_STAC |
426 | 1: movl (%ebp),%ebp | 426 | 1: movl (%ebp),%ebp |
427 | ASM_CLAC | 427 | ASM_CLAC |
428 | movl %ebp,PT_EBP(%esp) | 428 | movl %ebp,PT_EBP(%esp) |
429 | _ASM_EXTABLE(1b,syscall_fault) | 429 | _ASM_EXTABLE(1b,syscall_fault) |
430 | 430 | ||
431 | GET_THREAD_INFO(%ebp) | 431 | GET_THREAD_INFO(%ebp) |
432 | 432 | ||
433 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | 433 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
434 | jnz sysenter_audit | 434 | jnz sysenter_audit |
435 | sysenter_do_call: | 435 | sysenter_do_call: |
436 | cmpl $(NR_syscalls), %eax | 436 | cmpl $(NR_syscalls), %eax |
437 | jae syscall_badsys | 437 | jae syscall_badsys |
438 | call *sys_call_table(,%eax,4) | 438 | call *sys_call_table(,%eax,4) |
439 | movl %eax,PT_EAX(%esp) | 439 | movl %eax,PT_EAX(%esp) |
440 | LOCKDEP_SYS_EXIT | 440 | LOCKDEP_SYS_EXIT |
441 | DISABLE_INTERRUPTS(CLBR_ANY) | 441 | DISABLE_INTERRUPTS(CLBR_ANY) |
442 | TRACE_IRQS_OFF | 442 | TRACE_IRQS_OFF |
443 | movl TI_flags(%ebp), %ecx | 443 | movl TI_flags(%ebp), %ecx |
444 | testl $_TIF_ALLWORK_MASK, %ecx | 444 | testl $_TIF_ALLWORK_MASK, %ecx |
445 | jne sysexit_audit | 445 | jne sysexit_audit |
446 | sysenter_exit: | 446 | sysenter_exit: |
447 | /* if something modifies registers it must also disable sysexit */ | 447 | /* if something modifies registers it must also disable sysexit */ |
448 | movl PT_EIP(%esp), %edx | 448 | movl PT_EIP(%esp), %edx |
449 | movl PT_OLDESP(%esp), %ecx | 449 | movl PT_OLDESP(%esp), %ecx |
450 | xorl %ebp,%ebp | 450 | xorl %ebp,%ebp |
451 | TRACE_IRQS_ON | 451 | TRACE_IRQS_ON |
452 | 1: mov PT_FS(%esp), %fs | 452 | 1: mov PT_FS(%esp), %fs |
453 | PTGS_TO_GS | 453 | PTGS_TO_GS |
454 | ENABLE_INTERRUPTS_SYSEXIT | 454 | ENABLE_INTERRUPTS_SYSEXIT |
455 | 455 | ||
456 | #ifdef CONFIG_AUDITSYSCALL | 456 | #ifdef CONFIG_AUDITSYSCALL |
457 | sysenter_audit: | 457 | sysenter_audit: |
458 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 458 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) |
459 | jnz syscall_trace_entry | 459 | jnz syscall_trace_entry |
460 | addl $4,%esp | 460 | addl $4,%esp |
461 | CFI_ADJUST_CFA_OFFSET -4 | 461 | CFI_ADJUST_CFA_OFFSET -4 |
462 | /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ | 462 | /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ |
463 | /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ | 463 | /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ |
464 | /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ | 464 | /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ |
465 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ | 465 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ |
466 | movl %eax,%edx /* 2nd arg: syscall number */ | 466 | movl %eax,%edx /* 2nd arg: syscall number */ |
467 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | 467 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ |
468 | call __audit_syscall_entry | 468 | call __audit_syscall_entry |
469 | pushl_cfi %ebx | 469 | pushl_cfi %ebx |
470 | movl PT_EAX(%esp),%eax /* reload syscall number */ | 470 | movl PT_EAX(%esp),%eax /* reload syscall number */ |
471 | jmp sysenter_do_call | 471 | jmp sysenter_do_call |
472 | 472 | ||
473 | sysexit_audit: | 473 | sysexit_audit: |
474 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx | 474 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx |
475 | jne syscall_exit_work | 475 | jne syscall_exit_work |
476 | TRACE_IRQS_ON | 476 | TRACE_IRQS_ON |
477 | ENABLE_INTERRUPTS(CLBR_ANY) | 477 | ENABLE_INTERRUPTS(CLBR_ANY) |
478 | movl %eax,%edx /* second arg, syscall return value */ | 478 | movl %eax,%edx /* second arg, syscall return value */ |
479 | cmpl $-MAX_ERRNO,%eax /* is it an error ? */ | 479 | cmpl $-MAX_ERRNO,%eax /* is it an error ? */ |
480 | setbe %al /* 1 if so, 0 if not */ | 480 | setbe %al /* 1 if so, 0 if not */ |
481 | movzbl %al,%eax /* zero-extend that */ | 481 | movzbl %al,%eax /* zero-extend that */ |
482 | call __audit_syscall_exit | 482 | call __audit_syscall_exit |
483 | DISABLE_INTERRUPTS(CLBR_ANY) | 483 | DISABLE_INTERRUPTS(CLBR_ANY) |
484 | TRACE_IRQS_OFF | 484 | TRACE_IRQS_OFF |
485 | movl TI_flags(%ebp), %ecx | 485 | movl TI_flags(%ebp), %ecx |
486 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx | 486 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx |
487 | jne syscall_exit_work | 487 | jne syscall_exit_work |
488 | movl PT_EAX(%esp),%eax /* reload syscall return value */ | 488 | movl PT_EAX(%esp),%eax /* reload syscall return value */ |
489 | jmp sysenter_exit | 489 | jmp sysenter_exit |
490 | #endif | 490 | #endif |
491 | 491 | ||
492 | CFI_ENDPROC | 492 | CFI_ENDPROC |
493 | .pushsection .fixup,"ax" | 493 | .pushsection .fixup,"ax" |
494 | 2: movl $0,PT_FS(%esp) | 494 | 2: movl $0,PT_FS(%esp) |
495 | jmp 1b | 495 | jmp 1b |
496 | .popsection | 496 | .popsection |
497 | _ASM_EXTABLE(1b,2b) | 497 | _ASM_EXTABLE(1b,2b) |
498 | PTGS_TO_GS_EX | 498 | PTGS_TO_GS_EX |
499 | ENDPROC(ia32_sysenter_target) | 499 | ENDPROC(ia32_sysenter_target) |
500 | 500 | ||
501 | /* | 501 | /* |
502 | * syscall stub including irq exit should be protected against kprobes | 502 | * syscall stub including irq exit should be protected against kprobes |
503 | */ | 503 | */ |
504 | .pushsection .kprobes.text, "ax" | 504 | .pushsection .kprobes.text, "ax" |
505 | # system call handler stub | 505 | # system call handler stub |
506 | ENTRY(system_call) | 506 | ENTRY(system_call) |
507 | RING0_INT_FRAME # can't unwind into user space anyway | 507 | RING0_INT_FRAME # can't unwind into user space anyway |
508 | ASM_CLAC | 508 | ASM_CLAC |
509 | pushl_cfi %eax # save orig_eax | 509 | pushl_cfi %eax # save orig_eax |
510 | SAVE_ALL | 510 | SAVE_ALL |
511 | GET_THREAD_INFO(%ebp) | 511 | GET_THREAD_INFO(%ebp) |
512 | # system call tracing in operation / emulation | 512 | # system call tracing in operation / emulation |
513 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | 513 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
514 | jnz syscall_trace_entry | 514 | jnz syscall_trace_entry |
515 | cmpl $(NR_syscalls), %eax | 515 | cmpl $(NR_syscalls), %eax |
516 | jae syscall_badsys | 516 | jae syscall_badsys |
517 | syscall_call: | 517 | syscall_call: |
518 | call *sys_call_table(,%eax,4) | 518 | call *sys_call_table(,%eax,4) |
519 | movl %eax,PT_EAX(%esp) # store the return value | 519 | movl %eax,PT_EAX(%esp) # store the return value |
520 | syscall_exit: | 520 | syscall_exit: |
521 | LOCKDEP_SYS_EXIT | 521 | LOCKDEP_SYS_EXIT |
522 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | 522 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt |
523 | # setting need_resched or sigpending | 523 | # setting need_resched or sigpending |
524 | # between sampling and the iret | 524 | # between sampling and the iret |
525 | TRACE_IRQS_OFF | 525 | TRACE_IRQS_OFF |
526 | movl TI_flags(%ebp), %ecx | 526 | movl TI_flags(%ebp), %ecx |
527 | testl $_TIF_ALLWORK_MASK, %ecx # current->work | 527 | testl $_TIF_ALLWORK_MASK, %ecx # current->work |
528 | jne syscall_exit_work | 528 | jne syscall_exit_work |
529 | 529 | ||
530 | restore_all: | 530 | restore_all: |
531 | TRACE_IRQS_IRET | 531 | TRACE_IRQS_IRET |
532 | restore_all_notrace: | 532 | restore_all_notrace: |
533 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | 533 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS |
534 | # Warning: PT_OLDSS(%esp) contains the wrong/random values if we | 534 | # Warning: PT_OLDSS(%esp) contains the wrong/random values if we |
535 | # are returning to the kernel. | 535 | # are returning to the kernel. |
536 | # See comments in process.c:copy_thread() for details. | 536 | # See comments in process.c:copy_thread() for details. |
537 | movb PT_OLDSS(%esp), %ah | 537 | movb PT_OLDSS(%esp), %ah |
538 | movb PT_CS(%esp), %al | 538 | movb PT_CS(%esp), %al |
539 | andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax | 539 | andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax |
540 | cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax | 540 | cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax |
541 | CFI_REMEMBER_STATE | 541 | CFI_REMEMBER_STATE |
542 | je ldt_ss # returning to user-space with LDT SS | 542 | je ldt_ss # returning to user-space with LDT SS |
543 | restore_nocheck: | 543 | restore_nocheck: |
544 | RESTORE_REGS 4 # skip orig_eax/error_code | 544 | RESTORE_REGS 4 # skip orig_eax/error_code |
545 | irq_return: | 545 | irq_return: |
546 | INTERRUPT_RETURN | 546 | INTERRUPT_RETURN |
547 | .section .fixup,"ax" | 547 | .section .fixup,"ax" |
548 | ENTRY(iret_exc) | 548 | ENTRY(iret_exc) |
549 | pushl $0 # no error code | 549 | pushl $0 # no error code |
550 | pushl $do_iret_error | 550 | pushl $do_iret_error |
551 | jmp error_code | 551 | jmp error_code |
552 | .previous | 552 | .previous |
553 | _ASM_EXTABLE(irq_return,iret_exc) | 553 | _ASM_EXTABLE(irq_return,iret_exc) |
554 | 554 | ||
555 | CFI_RESTORE_STATE | 555 | CFI_RESTORE_STATE |
556 | ldt_ss: | 556 | ldt_ss: |
557 | larl PT_OLDSS(%esp), %eax | 557 | larl PT_OLDSS(%esp), %eax |
558 | jnz restore_nocheck | 558 | jnz restore_nocheck |
559 | testl $0x00400000, %eax # returning to 32bit stack? | 559 | testl $0x00400000, %eax # returning to 32bit stack? |
560 | jnz restore_nocheck # allright, normal return | 560 | jnz restore_nocheck # allright, normal return |
561 | 561 | ||
562 | #ifdef CONFIG_PARAVIRT | 562 | #ifdef CONFIG_PARAVIRT |
563 | /* | 563 | /* |
564 | * The kernel can't run on a non-flat stack if paravirt mode | 564 | * The kernel can't run on a non-flat stack if paravirt mode |
565 | * is active. Rather than try to fixup the high bits of | 565 | * is active. Rather than try to fixup the high bits of |
566 | * ESP, bypass this code entirely. This may break DOSemu | 566 | * ESP, bypass this code entirely. This may break DOSemu |
567 | * and/or Wine support in a paravirt VM, although the option | 567 | * and/or Wine support in a paravirt VM, although the option |
568 | * is still available to implement the setting of the high | 568 | * is still available to implement the setting of the high |
569 | * 16-bits in the INTERRUPT_RETURN paravirt-op. | 569 | * 16-bits in the INTERRUPT_RETURN paravirt-op. |
570 | */ | 570 | */ |
571 | cmpl $0, pv_info+PARAVIRT_enabled | 571 | cmpl $0, pv_info+PARAVIRT_enabled |
572 | jne restore_nocheck | 572 | jne restore_nocheck |
573 | #endif | 573 | #endif |
574 | 574 | ||
575 | /* | 575 | /* |
576 | * Setup and switch to ESPFIX stack | 576 | * Setup and switch to ESPFIX stack |
577 | * | 577 | * |
578 | * We're returning to userspace with a 16 bit stack. The CPU will not | 578 | * We're returning to userspace with a 16 bit stack. The CPU will not |
579 | * restore the high word of ESP for us on executing iret... This is an | 579 | * restore the high word of ESP for us on executing iret... This is an |
580 | * "official" bug of all the x86-compatible CPUs, which we can work | 580 | * "official" bug of all the x86-compatible CPUs, which we can work |
581 | * around to make dosemu and wine happy. We do this by preloading the | 581 | * around to make dosemu and wine happy. We do this by preloading the |
582 | * high word of ESP with the high word of the userspace ESP while | 582 | * high word of ESP with the high word of the userspace ESP while |
583 | * compensating for the offset by changing to the ESPFIX segment with | 583 | * compensating for the offset by changing to the ESPFIX segment with |
584 | * a base address that matches for the difference. | 584 | * a base address that matches for the difference. |
585 | */ | 585 | */ |
586 | #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) | 586 | #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) |
587 | mov %esp, %edx /* load kernel esp */ | 587 | mov %esp, %edx /* load kernel esp */ |
588 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ | 588 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ |
589 | mov %dx, %ax /* eax: new kernel esp */ | 589 | mov %dx, %ax /* eax: new kernel esp */ |
590 | sub %eax, %edx /* offset (low word is 0) */ | 590 | sub %eax, %edx /* offset (low word is 0) */ |
591 | shr $16, %edx | 591 | shr $16, %edx |
592 | mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ | 592 | mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ |
593 | mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ | 593 | mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ |
594 | pushl_cfi $__ESPFIX_SS | 594 | pushl_cfi $__ESPFIX_SS |
595 | pushl_cfi %eax /* new kernel esp */ | 595 | pushl_cfi %eax /* new kernel esp */ |
596 | /* Disable interrupts, but do not irqtrace this section: we | 596 | /* Disable interrupts, but do not irqtrace this section: we |
597 | * will soon execute iret and the tracer was already set to | 597 | * will soon execute iret and the tracer was already set to |
598 | * the irqstate after the iret */ | 598 | * the irqstate after the iret */ |
599 | DISABLE_INTERRUPTS(CLBR_EAX) | 599 | DISABLE_INTERRUPTS(CLBR_EAX) |
600 | lss (%esp), %esp /* switch to espfix segment */ | 600 | lss (%esp), %esp /* switch to espfix segment */ |
601 | CFI_ADJUST_CFA_OFFSET -8 | 601 | CFI_ADJUST_CFA_OFFSET -8 |
602 | jmp restore_nocheck | 602 | jmp restore_nocheck |
603 | CFI_ENDPROC | 603 | CFI_ENDPROC |
604 | ENDPROC(system_call) | 604 | ENDPROC(system_call) |
605 | 605 | ||
606 | # perform work that needs to be done immediately before resumption | 606 | # perform work that needs to be done immediately before resumption |
607 | ALIGN | 607 | ALIGN |
608 | RING0_PTREGS_FRAME # can't unwind into user space anyway | 608 | RING0_PTREGS_FRAME # can't unwind into user space anyway |
609 | work_pending: | 609 | work_pending: |
610 | testb $_TIF_NEED_RESCHED, %cl | 610 | testb $_TIF_NEED_RESCHED, %cl |
611 | jz work_notifysig | 611 | jz work_notifysig |
612 | work_resched: | 612 | work_resched: |
613 | call schedule | 613 | call schedule |
614 | LOCKDEP_SYS_EXIT | 614 | LOCKDEP_SYS_EXIT |
615 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | 615 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt |
616 | # setting need_resched or sigpending | 616 | # setting need_resched or sigpending |
617 | # between sampling and the iret | 617 | # between sampling and the iret |
618 | TRACE_IRQS_OFF | 618 | TRACE_IRQS_OFF |
619 | movl TI_flags(%ebp), %ecx | 619 | movl TI_flags(%ebp), %ecx |
620 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done other | 620 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done other |
621 | # than syscall tracing? | 621 | # than syscall tracing? |
622 | jz restore_all | 622 | jz restore_all |
623 | testb $_TIF_NEED_RESCHED, %cl | 623 | testb $_TIF_NEED_RESCHED, %cl |
624 | jnz work_resched | 624 | jnz work_resched |
625 | 625 | ||
626 | work_notifysig: # deal with pending signals and | 626 | work_notifysig: # deal with pending signals and |
627 | # notify-resume requests | 627 | # notify-resume requests |
628 | #ifdef CONFIG_VM86 | 628 | #ifdef CONFIG_VM86 |
629 | testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) | 629 | testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) |
630 | movl %esp, %eax | 630 | movl %esp, %eax |
631 | jne work_notifysig_v86 # returning to kernel-space or | 631 | jne work_notifysig_v86 # returning to kernel-space or |
632 | # vm86-space | 632 | # vm86-space |
633 | 1: | 633 | 1: |
634 | #else | 634 | #else |
635 | movl %esp, %eax | 635 | movl %esp, %eax |
636 | #endif | 636 | #endif |
637 | TRACE_IRQS_ON | 637 | TRACE_IRQS_ON |
638 | ENABLE_INTERRUPTS(CLBR_NONE) | 638 | ENABLE_INTERRUPTS(CLBR_NONE) |
639 | movb PT_CS(%esp), %bl | 639 | movb PT_CS(%esp), %bl |
640 | andb $SEGMENT_RPL_MASK, %bl | 640 | andb $SEGMENT_RPL_MASK, %bl |
641 | cmpb $USER_RPL, %bl | 641 | cmpb $USER_RPL, %bl |
642 | jb resume_kernel | 642 | jb resume_kernel |
643 | xorl %edx, %edx | 643 | xorl %edx, %edx |
644 | call do_notify_resume | 644 | call do_notify_resume |
645 | jmp resume_userspace | 645 | jmp resume_userspace |
646 | 646 | ||
647 | #ifdef CONFIG_VM86 | 647 | #ifdef CONFIG_VM86 |
648 | ALIGN | 648 | ALIGN |
649 | work_notifysig_v86: | 649 | work_notifysig_v86: |
650 | pushl_cfi %ecx # save ti_flags for do_notify_resume | 650 | pushl_cfi %ecx # save ti_flags for do_notify_resume |
651 | call save_v86_state # %eax contains pt_regs pointer | 651 | call save_v86_state # %eax contains pt_regs pointer |
652 | popl_cfi %ecx | 652 | popl_cfi %ecx |
653 | movl %eax, %esp | 653 | movl %eax, %esp |
654 | jmp 1b | 654 | jmp 1b |
655 | #endif | 655 | #endif |
656 | END(work_pending) | 656 | END(work_pending) |
657 | 657 | ||
658 | # perform syscall exit tracing | 658 | # perform syscall exit tracing |
659 | ALIGN | 659 | ALIGN |
660 | syscall_trace_entry: | 660 | syscall_trace_entry: |
661 | movl $-ENOSYS,PT_EAX(%esp) | 661 | movl $-ENOSYS,PT_EAX(%esp) |
662 | movl %esp, %eax | 662 | movl %esp, %eax |
663 | call syscall_trace_enter | 663 | call syscall_trace_enter |
664 | /* What it returned is what we'll actually use. */ | 664 | /* What it returned is what we'll actually use. */ |
665 | cmpl $(NR_syscalls), %eax | 665 | cmpl $(NR_syscalls), %eax |
666 | jnae syscall_call | 666 | jnae syscall_call |
667 | jmp syscall_exit | 667 | jmp syscall_exit |
668 | END(syscall_trace_entry) | 668 | END(syscall_trace_entry) |
669 | 669 | ||
670 | # perform syscall exit tracing | 670 | # perform syscall exit tracing |
671 | ALIGN | 671 | ALIGN |
672 | syscall_exit_work: | 672 | syscall_exit_work: |
673 | testl $_TIF_WORK_SYSCALL_EXIT, %ecx | 673 | testl $_TIF_WORK_SYSCALL_EXIT, %ecx |
674 | jz work_pending | 674 | jz work_pending |
675 | TRACE_IRQS_ON | 675 | TRACE_IRQS_ON |
676 | ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call | 676 | ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call |
677 | # schedule() instead | 677 | # schedule() instead |
678 | movl %esp, %eax | 678 | movl %esp, %eax |
679 | call syscall_trace_leave | 679 | call syscall_trace_leave |
680 | jmp resume_userspace | 680 | jmp resume_userspace |
681 | END(syscall_exit_work) | 681 | END(syscall_exit_work) |
682 | CFI_ENDPROC | 682 | CFI_ENDPROC |
683 | 683 | ||
684 | RING0_INT_FRAME # can't unwind into user space anyway | 684 | RING0_INT_FRAME # can't unwind into user space anyway |
685 | syscall_fault: | 685 | syscall_fault: |
686 | ASM_CLAC | 686 | ASM_CLAC |
687 | GET_THREAD_INFO(%ebp) | 687 | GET_THREAD_INFO(%ebp) |
688 | movl $-EFAULT,PT_EAX(%esp) | 688 | movl $-EFAULT,PT_EAX(%esp) |
689 | jmp resume_userspace | 689 | jmp resume_userspace |
690 | END(syscall_fault) | 690 | END(syscall_fault) |
691 | 691 | ||
692 | syscall_badsys: | 692 | syscall_badsys: |
693 | movl $-ENOSYS,PT_EAX(%esp) | 693 | movl $-ENOSYS,PT_EAX(%esp) |
694 | jmp resume_userspace | 694 | jmp resume_userspace |
695 | END(syscall_badsys) | 695 | END(syscall_badsys) |
696 | CFI_ENDPROC | 696 | CFI_ENDPROC |
697 | /* | 697 | /* |
698 | * End of kprobes section | 698 | * End of kprobes section |
699 | */ | 699 | */ |
700 | .popsection | 700 | .popsection |
701 | 701 | ||
702 | /* | 702 | /* |
703 | * System calls that need a pt_regs pointer. | 703 | * System calls that need a pt_regs pointer. |
704 | */ | 704 | */ |
705 | #define PTREGSCALL0(name) \ | 705 | #define PTREGSCALL0(name) \ |
706 | ENTRY(ptregs_##name) ; \ | 706 | ENTRY(ptregs_##name) ; \ |
707 | leal 4(%esp),%eax; \ | 707 | leal 4(%esp),%eax; \ |
708 | jmp sys_##name; \ | 708 | jmp sys_##name; \ |
709 | ENDPROC(ptregs_##name) | 709 | ENDPROC(ptregs_##name) |
710 | 710 | ||
711 | #define PTREGSCALL1(name) \ | 711 | #define PTREGSCALL1(name) \ |
712 | ENTRY(ptregs_##name) ; \ | 712 | ENTRY(ptregs_##name) ; \ |
713 | leal 4(%esp),%edx; \ | 713 | leal 4(%esp),%edx; \ |
714 | movl (PT_EBX+4)(%esp),%eax; \ | 714 | movl (PT_EBX+4)(%esp),%eax; \ |
715 | jmp sys_##name; \ | 715 | jmp sys_##name; \ |
716 | ENDPROC(ptregs_##name) | 716 | ENDPROC(ptregs_##name) |
717 | 717 | ||
718 | #define PTREGSCALL2(name) \ | 718 | #define PTREGSCALL2(name) \ |
719 | ENTRY(ptregs_##name) ; \ | 719 | ENTRY(ptregs_##name) ; \ |
720 | leal 4(%esp),%ecx; \ | 720 | leal 4(%esp),%ecx; \ |
721 | movl (PT_ECX+4)(%esp),%edx; \ | 721 | movl (PT_ECX+4)(%esp),%edx; \ |
722 | movl (PT_EBX+4)(%esp),%eax; \ | 722 | movl (PT_EBX+4)(%esp),%eax; \ |
723 | jmp sys_##name; \ | 723 | jmp sys_##name; \ |
724 | ENDPROC(ptregs_##name) | 724 | ENDPROC(ptregs_##name) |
725 | 725 | ||
726 | #define PTREGSCALL3(name) \ | 726 | #define PTREGSCALL3(name) \ |
727 | ENTRY(ptregs_##name) ; \ | 727 | ENTRY(ptregs_##name) ; \ |
728 | CFI_STARTPROC; \ | 728 | CFI_STARTPROC; \ |
729 | leal 4(%esp),%eax; \ | 729 | leal 4(%esp),%eax; \ |
730 | pushl_cfi %eax; \ | 730 | pushl_cfi %eax; \ |
731 | movl PT_EDX(%eax),%ecx; \ | 731 | movl PT_EDX(%eax),%ecx; \ |
732 | movl PT_ECX(%eax),%edx; \ | 732 | movl PT_ECX(%eax),%edx; \ |
733 | movl PT_EBX(%eax),%eax; \ | 733 | movl PT_EBX(%eax),%eax; \ |
734 | call sys_##name; \ | 734 | call sys_##name; \ |
735 | addl $4,%esp; \ | 735 | addl $4,%esp; \ |
736 | CFI_ADJUST_CFA_OFFSET -4; \ | 736 | CFI_ADJUST_CFA_OFFSET -4; \ |
737 | ret; \ | 737 | ret; \ |
738 | CFI_ENDPROC; \ | 738 | CFI_ENDPROC; \ |
739 | ENDPROC(ptregs_##name) | 739 | ENDPROC(ptregs_##name) |
740 | 740 | ||
741 | PTREGSCALL1(iopl) | 741 | PTREGSCALL1(iopl) |
742 | PTREGSCALL0(sigreturn) | 742 | PTREGSCALL0(sigreturn) |
743 | PTREGSCALL0(rt_sigreturn) | 743 | PTREGSCALL0(rt_sigreturn) |
744 | PTREGSCALL2(vm86) | 744 | PTREGSCALL2(vm86) |
745 | PTREGSCALL1(vm86old) | 745 | PTREGSCALL1(vm86old) |
746 | 746 | ||
747 | .macro FIXUP_ESPFIX_STACK | 747 | .macro FIXUP_ESPFIX_STACK |
748 | /* | 748 | /* |
749 | * Switch back for ESPFIX stack to the normal zerobased stack | 749 | * Switch back for ESPFIX stack to the normal zerobased stack |
750 | * | 750 | * |
751 | * We can't call C functions using the ESPFIX stack. This code reads | 751 | * We can't call C functions using the ESPFIX stack. This code reads |
752 | * the high word of the segment base from the GDT and swiches to the | 752 | * the high word of the segment base from the GDT and swiches to the |
753 | * normal stack and adjusts ESP with the matching offset. | 753 | * normal stack and adjusts ESP with the matching offset. |
754 | */ | 754 | */ |
755 | /* fixup the stack */ | 755 | /* fixup the stack */ |
756 | mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ | 756 | mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ |
757 | mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ | 757 | mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ |
758 | shl $16, %eax | 758 | shl $16, %eax |
759 | addl %esp, %eax /* the adjusted stack pointer */ | 759 | addl %esp, %eax /* the adjusted stack pointer */ |
760 | pushl_cfi $__KERNEL_DS | 760 | pushl_cfi $__KERNEL_DS |
761 | pushl_cfi %eax | 761 | pushl_cfi %eax |
762 | lss (%esp), %esp /* switch to the normal stack segment */ | 762 | lss (%esp), %esp /* switch to the normal stack segment */ |
763 | CFI_ADJUST_CFA_OFFSET -8 | 763 | CFI_ADJUST_CFA_OFFSET -8 |
764 | .endm | 764 | .endm |
765 | .macro UNWIND_ESPFIX_STACK | 765 | .macro UNWIND_ESPFIX_STACK |
766 | movl %ss, %eax | 766 | movl %ss, %eax |
767 | /* see if on espfix stack */ | 767 | /* see if on espfix stack */ |
768 | cmpw $__ESPFIX_SS, %ax | 768 | cmpw $__ESPFIX_SS, %ax |
769 | jne 27f | 769 | jne 27f |
770 | movl $__KERNEL_DS, %eax | 770 | movl $__KERNEL_DS, %eax |
771 | movl %eax, %ds | 771 | movl %eax, %ds |
772 | movl %eax, %es | 772 | movl %eax, %es |
773 | /* switch to normal stack */ | 773 | /* switch to normal stack */ |
774 | FIXUP_ESPFIX_STACK | 774 | FIXUP_ESPFIX_STACK |
775 | 27: | 775 | 27: |
776 | .endm | 776 | .endm |
777 | 777 | ||
778 | /* | 778 | /* |
779 | * Build the entry stubs and pointer table with some assembler magic. | 779 | * Build the entry stubs and pointer table with some assembler magic. |
780 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | 780 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a |
781 | * single cache line on all modern x86 implementations. | 781 | * single cache line on all modern x86 implementations. |
782 | */ | 782 | */ |
783 | .section .init.rodata,"a" | 783 | .section .init.rodata,"a" |
784 | ENTRY(interrupt) | 784 | ENTRY(interrupt) |
785 | .section .entry.text, "ax" | 785 | .section .entry.text, "ax" |
786 | .p2align 5 | 786 | .p2align 5 |
787 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 787 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
788 | ENTRY(irq_entries_start) | 788 | ENTRY(irq_entries_start) |
789 | RING0_INT_FRAME | 789 | RING0_INT_FRAME |
790 | vector=FIRST_EXTERNAL_VECTOR | 790 | vector=FIRST_EXTERNAL_VECTOR |
791 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 | 791 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
792 | .balign 32 | 792 | .balign 32 |
793 | .rept 7 | 793 | .rept 7 |
794 | .if vector < NR_VECTORS | 794 | .if vector < NR_VECTORS |
795 | .if vector <> FIRST_EXTERNAL_VECTOR | 795 | .if vector <> FIRST_EXTERNAL_VECTOR |
796 | CFI_ADJUST_CFA_OFFSET -4 | 796 | CFI_ADJUST_CFA_OFFSET -4 |
797 | .endif | 797 | .endif |
798 | 1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ | 798 | 1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ |
799 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | 799 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
800 | jmp 2f | 800 | jmp 2f |
801 | .endif | 801 | .endif |
802 | .previous | 802 | .previous |
803 | .long 1b | 803 | .long 1b |
804 | .section .entry.text, "ax" | 804 | .section .entry.text, "ax" |
805 | vector=vector+1 | 805 | vector=vector+1 |
806 | .endif | 806 | .endif |
807 | .endr | 807 | .endr |
808 | 2: jmp common_interrupt | 808 | 2: jmp common_interrupt |
809 | .endr | 809 | .endr |
810 | END(irq_entries_start) | 810 | END(irq_entries_start) |
811 | 811 | ||
812 | .previous | 812 | .previous |
813 | END(interrupt) | 813 | END(interrupt) |
814 | .previous | 814 | .previous |
815 | 815 | ||
816 | /* | 816 | /* |
817 | * the CPU automatically disables interrupts when executing an IRQ vector, | 817 | * the CPU automatically disables interrupts when executing an IRQ vector, |
818 | * so IRQ-flags tracing has to follow that: | 818 | * so IRQ-flags tracing has to follow that: |
819 | */ | 819 | */ |
820 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 820 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
821 | common_interrupt: | 821 | common_interrupt: |
822 | ASM_CLAC | 822 | ASM_CLAC |
823 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ | 823 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ |
824 | SAVE_ALL | 824 | SAVE_ALL |
825 | TRACE_IRQS_OFF | 825 | TRACE_IRQS_OFF |
826 | movl %esp,%eax | 826 | movl %esp,%eax |
827 | call do_IRQ | 827 | call do_IRQ |
828 | jmp ret_from_intr | 828 | jmp ret_from_intr |
829 | ENDPROC(common_interrupt) | 829 | ENDPROC(common_interrupt) |
830 | CFI_ENDPROC | 830 | CFI_ENDPROC |
831 | 831 | ||
832 | /* | 832 | /* |
833 | * Irq entries should be protected against kprobes | 833 | * Irq entries should be protected against kprobes |
834 | */ | 834 | */ |
835 | .pushsection .kprobes.text, "ax" | 835 | .pushsection .kprobes.text, "ax" |
836 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 836 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
837 | ENTRY(name) \ | 837 | ENTRY(name) \ |
838 | RING0_INT_FRAME; \ | 838 | RING0_INT_FRAME; \ |
839 | ASM_CLAC; \ | 839 | ASM_CLAC; \ |
840 | pushl_cfi $~(nr); \ | 840 | pushl_cfi $~(nr); \ |
841 | SAVE_ALL; \ | 841 | SAVE_ALL; \ |
842 | TRACE_IRQS_OFF \ | 842 | TRACE_IRQS_OFF \ |
843 | movl %esp,%eax; \ | 843 | movl %esp,%eax; \ |
844 | call fn; \ | 844 | call fn; \ |
845 | jmp ret_from_intr; \ | 845 | jmp ret_from_intr; \ |
846 | CFI_ENDPROC; \ | 846 | CFI_ENDPROC; \ |
847 | ENDPROC(name) | 847 | ENDPROC(name) |
848 | 848 | ||
849 | #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) | 849 | #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) |
850 | 850 | ||
851 | /* The include is where all of the SMP etc. interrupts come from */ | 851 | /* The include is where all of the SMP etc. interrupts come from */ |
852 | #include <asm/entry_arch.h> | 852 | #include <asm/entry_arch.h> |
853 | 853 | ||
854 | ENTRY(coprocessor_error) | 854 | ENTRY(coprocessor_error) |
855 | RING0_INT_FRAME | 855 | RING0_INT_FRAME |
856 | ASM_CLAC | 856 | ASM_CLAC |
857 | pushl_cfi $0 | 857 | pushl_cfi $0 |
858 | pushl_cfi $do_coprocessor_error | 858 | pushl_cfi $do_coprocessor_error |
859 | jmp error_code | 859 | jmp error_code |
860 | CFI_ENDPROC | 860 | CFI_ENDPROC |
861 | END(coprocessor_error) | 861 | END(coprocessor_error) |
862 | 862 | ||
863 | ENTRY(simd_coprocessor_error) | 863 | ENTRY(simd_coprocessor_error) |
864 | RING0_INT_FRAME | 864 | RING0_INT_FRAME |
865 | ASM_CLAC | 865 | ASM_CLAC |
866 | pushl_cfi $0 | 866 | pushl_cfi $0 |
867 | #ifdef CONFIG_X86_INVD_BUG | 867 | #ifdef CONFIG_X86_INVD_BUG |
868 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | 868 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ |
869 | 661: pushl_cfi $do_general_protection | 869 | 661: pushl_cfi $do_general_protection |
870 | 662: | 870 | 662: |
871 | .section .altinstructions,"a" | 871 | .section .altinstructions,"a" |
872 | altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f | 872 | altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f |
873 | .previous | 873 | .previous |
874 | .section .altinstr_replacement,"ax" | 874 | .section .altinstr_replacement,"ax" |
875 | 663: pushl $do_simd_coprocessor_error | 875 | 663: pushl $do_simd_coprocessor_error |
876 | 664: | 876 | 664: |
877 | .previous | 877 | .previous |
878 | #else | 878 | #else |
879 | pushl_cfi $do_simd_coprocessor_error | 879 | pushl_cfi $do_simd_coprocessor_error |
880 | #endif | 880 | #endif |
881 | jmp error_code | 881 | jmp error_code |
882 | CFI_ENDPROC | 882 | CFI_ENDPROC |
883 | END(simd_coprocessor_error) | 883 | END(simd_coprocessor_error) |
884 | 884 | ||
885 | ENTRY(device_not_available) | 885 | ENTRY(device_not_available) |
886 | RING0_INT_FRAME | 886 | RING0_INT_FRAME |
887 | ASM_CLAC | 887 | ASM_CLAC |
888 | pushl_cfi $-1 # mark this as an int | 888 | pushl_cfi $-1 # mark this as an int |
889 | pushl_cfi $do_device_not_available | 889 | pushl_cfi $do_device_not_available |
890 | jmp error_code | 890 | jmp error_code |
891 | CFI_ENDPROC | 891 | CFI_ENDPROC |
892 | END(device_not_available) | 892 | END(device_not_available) |
893 | 893 | ||
894 | #ifdef CONFIG_PARAVIRT | 894 | #ifdef CONFIG_PARAVIRT |
895 | ENTRY(native_iret) | 895 | ENTRY(native_iret) |
896 | iret | 896 | iret |
897 | _ASM_EXTABLE(native_iret, iret_exc) | 897 | _ASM_EXTABLE(native_iret, iret_exc) |
898 | END(native_iret) | 898 | END(native_iret) |
899 | 899 | ||
900 | ENTRY(native_irq_enable_sysexit) | 900 | ENTRY(native_irq_enable_sysexit) |
901 | sti | 901 | sti |
902 | sysexit | 902 | sysexit |
903 | END(native_irq_enable_sysexit) | 903 | END(native_irq_enable_sysexit) |
904 | #endif | 904 | #endif |
905 | 905 | ||
906 | ENTRY(overflow) | 906 | ENTRY(overflow) |
907 | RING0_INT_FRAME | 907 | RING0_INT_FRAME |
908 | ASM_CLAC | 908 | ASM_CLAC |
909 | pushl_cfi $0 | 909 | pushl_cfi $0 |
910 | pushl_cfi $do_overflow | 910 | pushl_cfi $do_overflow |
911 | jmp error_code | 911 | jmp error_code |
912 | CFI_ENDPROC | 912 | CFI_ENDPROC |
913 | END(overflow) | 913 | END(overflow) |
914 | 914 | ||
915 | ENTRY(bounds) | 915 | ENTRY(bounds) |
916 | RING0_INT_FRAME | 916 | RING0_INT_FRAME |
917 | ASM_CLAC | 917 | ASM_CLAC |
918 | pushl_cfi $0 | 918 | pushl_cfi $0 |
919 | pushl_cfi $do_bounds | 919 | pushl_cfi $do_bounds |
920 | jmp error_code | 920 | jmp error_code |
921 | CFI_ENDPROC | 921 | CFI_ENDPROC |
922 | END(bounds) | 922 | END(bounds) |
923 | 923 | ||
924 | ENTRY(invalid_op) | 924 | ENTRY(invalid_op) |
925 | RING0_INT_FRAME | 925 | RING0_INT_FRAME |
926 | ASM_CLAC | 926 | ASM_CLAC |
927 | pushl_cfi $0 | 927 | pushl_cfi $0 |
928 | pushl_cfi $do_invalid_op | 928 | pushl_cfi $do_invalid_op |
929 | jmp error_code | 929 | jmp error_code |
930 | CFI_ENDPROC | 930 | CFI_ENDPROC |
931 | END(invalid_op) | 931 | END(invalid_op) |
932 | 932 | ||
933 | ENTRY(coprocessor_segment_overrun) | 933 | ENTRY(coprocessor_segment_overrun) |
934 | RING0_INT_FRAME | 934 | RING0_INT_FRAME |
935 | ASM_CLAC | 935 | ASM_CLAC |
936 | pushl_cfi $0 | 936 | pushl_cfi $0 |
937 | pushl_cfi $do_coprocessor_segment_overrun | 937 | pushl_cfi $do_coprocessor_segment_overrun |
938 | jmp error_code | 938 | jmp error_code |
939 | CFI_ENDPROC | 939 | CFI_ENDPROC |
940 | END(coprocessor_segment_overrun) | 940 | END(coprocessor_segment_overrun) |
941 | 941 | ||
942 | ENTRY(invalid_TSS) | 942 | ENTRY(invalid_TSS) |
943 | RING0_EC_FRAME | 943 | RING0_EC_FRAME |
944 | ASM_CLAC | 944 | ASM_CLAC |
945 | pushl_cfi $do_invalid_TSS | 945 | pushl_cfi $do_invalid_TSS |
946 | jmp error_code | 946 | jmp error_code |
947 | CFI_ENDPROC | 947 | CFI_ENDPROC |
948 | END(invalid_TSS) | 948 | END(invalid_TSS) |
949 | 949 | ||
950 | ENTRY(segment_not_present) | 950 | ENTRY(segment_not_present) |
951 | RING0_EC_FRAME | 951 | RING0_EC_FRAME |
952 | ASM_CLAC | 952 | ASM_CLAC |
953 | pushl_cfi $do_segment_not_present | 953 | pushl_cfi $do_segment_not_present |
954 | jmp error_code | 954 | jmp error_code |
955 | CFI_ENDPROC | 955 | CFI_ENDPROC |
956 | END(segment_not_present) | 956 | END(segment_not_present) |
957 | 957 | ||
958 | ENTRY(stack_segment) | 958 | ENTRY(stack_segment) |
959 | RING0_EC_FRAME | 959 | RING0_EC_FRAME |
960 | ASM_CLAC | 960 | ASM_CLAC |
961 | pushl_cfi $do_stack_segment | 961 | pushl_cfi $do_stack_segment |
962 | jmp error_code | 962 | jmp error_code |
963 | CFI_ENDPROC | 963 | CFI_ENDPROC |
964 | END(stack_segment) | 964 | END(stack_segment) |
965 | 965 | ||
966 | ENTRY(alignment_check) | 966 | ENTRY(alignment_check) |
967 | RING0_EC_FRAME | 967 | RING0_EC_FRAME |
968 | ASM_CLAC | 968 | ASM_CLAC |
969 | pushl_cfi $do_alignment_check | 969 | pushl_cfi $do_alignment_check |
970 | jmp error_code | 970 | jmp error_code |
971 | CFI_ENDPROC | 971 | CFI_ENDPROC |
972 | END(alignment_check) | 972 | END(alignment_check) |
973 | 973 | ||
974 | ENTRY(divide_error) | 974 | ENTRY(divide_error) |
975 | RING0_INT_FRAME | 975 | RING0_INT_FRAME |
976 | ASM_CLAC | 976 | ASM_CLAC |
977 | pushl_cfi $0 # no error code | 977 | pushl_cfi $0 # no error code |
978 | pushl_cfi $do_divide_error | 978 | pushl_cfi $do_divide_error |
979 | jmp error_code | 979 | jmp error_code |
980 | CFI_ENDPROC | 980 | CFI_ENDPROC |
981 | END(divide_error) | 981 | END(divide_error) |
982 | 982 | ||
983 | #ifdef CONFIG_X86_MCE | 983 | #ifdef CONFIG_X86_MCE |
984 | ENTRY(machine_check) | 984 | ENTRY(machine_check) |
985 | RING0_INT_FRAME | 985 | RING0_INT_FRAME |
986 | ASM_CLAC | 986 | ASM_CLAC |
987 | pushl_cfi $0 | 987 | pushl_cfi $0 |
988 | pushl_cfi machine_check_vector | 988 | pushl_cfi machine_check_vector |
989 | jmp error_code | 989 | jmp error_code |
990 | CFI_ENDPROC | 990 | CFI_ENDPROC |
991 | END(machine_check) | 991 | END(machine_check) |
992 | #endif | 992 | #endif |
993 | 993 | ||
994 | ENTRY(spurious_interrupt_bug) | 994 | ENTRY(spurious_interrupt_bug) |
995 | RING0_INT_FRAME | 995 | RING0_INT_FRAME |
996 | ASM_CLAC | 996 | ASM_CLAC |
997 | pushl_cfi $0 | 997 | pushl_cfi $0 |
998 | pushl_cfi $do_spurious_interrupt_bug | 998 | pushl_cfi $do_spurious_interrupt_bug |
999 | jmp error_code | 999 | jmp error_code |
1000 | CFI_ENDPROC | 1000 | CFI_ENDPROC |
1001 | END(spurious_interrupt_bug) | 1001 | END(spurious_interrupt_bug) |
1002 | /* | 1002 | /* |
1003 | * End of kprobes section | 1003 | * End of kprobes section |
1004 | */ | 1004 | */ |
1005 | .popsection | 1005 | .popsection |
1006 | 1006 | ||
1007 | #ifdef CONFIG_XEN | 1007 | #ifdef CONFIG_XEN |
1008 | /* Xen doesn't set %esp to be precisely what the normal sysenter | 1008 | /* Xen doesn't set %esp to be precisely what the normal sysenter |
1009 | entrypoint expects, so fix it up before using the normal path. */ | 1009 | entrypoint expects, so fix it up before using the normal path. */ |
1010 | ENTRY(xen_sysenter_target) | 1010 | ENTRY(xen_sysenter_target) |
1011 | RING0_INT_FRAME | 1011 | RING0_INT_FRAME |
1012 | addl $5*4, %esp /* remove xen-provided frame */ | 1012 | addl $5*4, %esp /* remove xen-provided frame */ |
1013 | CFI_ADJUST_CFA_OFFSET -5*4 | 1013 | CFI_ADJUST_CFA_OFFSET -5*4 |
1014 | jmp sysenter_past_esp | 1014 | jmp sysenter_past_esp |
1015 | CFI_ENDPROC | 1015 | CFI_ENDPROC |
1016 | 1016 | ||
1017 | ENTRY(xen_hypervisor_callback) | 1017 | ENTRY(xen_hypervisor_callback) |
1018 | CFI_STARTPROC | 1018 | CFI_STARTPROC |
1019 | pushl_cfi $-1 /* orig_ax = -1 => not a system call */ | 1019 | pushl_cfi $-1 /* orig_ax = -1 => not a system call */ |
1020 | SAVE_ALL | 1020 | SAVE_ALL |
1021 | TRACE_IRQS_OFF | 1021 | TRACE_IRQS_OFF |
1022 | 1022 | ||
1023 | /* Check to see if we got the event in the critical | 1023 | /* Check to see if we got the event in the critical |
1024 | region in xen_iret_direct, after we've reenabled | 1024 | region in xen_iret_direct, after we've reenabled |
1025 | events and checked for pending events. This simulates | 1025 | events and checked for pending events. This simulates |
1026 | iret instruction's behaviour where it delivers a | 1026 | iret instruction's behaviour where it delivers a |
1027 | pending interrupt when enabling interrupts. */ | 1027 | pending interrupt when enabling interrupts. */ |
1028 | movl PT_EIP(%esp),%eax | 1028 | movl PT_EIP(%esp),%eax |
1029 | cmpl $xen_iret_start_crit,%eax | 1029 | cmpl $xen_iret_start_crit,%eax |
1030 | jb 1f | 1030 | jb 1f |
1031 | cmpl $xen_iret_end_crit,%eax | 1031 | cmpl $xen_iret_end_crit,%eax |
1032 | jae 1f | 1032 | jae 1f |
1033 | 1033 | ||
1034 | jmp xen_iret_crit_fixup | 1034 | jmp xen_iret_crit_fixup |
1035 | 1035 | ||
1036 | ENTRY(xen_do_upcall) | 1036 | ENTRY(xen_do_upcall) |
1037 | 1: mov %esp, %eax | 1037 | 1: mov %esp, %eax |
1038 | call xen_evtchn_do_upcall | 1038 | call xen_evtchn_do_upcall |
1039 | jmp ret_from_intr | 1039 | jmp ret_from_intr |
1040 | CFI_ENDPROC | 1040 | CFI_ENDPROC |
1041 | ENDPROC(xen_hypervisor_callback) | 1041 | ENDPROC(xen_hypervisor_callback) |
1042 | 1042 | ||
1043 | # Hypervisor uses this for application faults while it executes. | 1043 | # Hypervisor uses this for application faults while it executes. |
1044 | # We get here for two reasons: | 1044 | # We get here for two reasons: |
1045 | # 1. Fault while reloading DS, ES, FS or GS | 1045 | # 1. Fault while reloading DS, ES, FS or GS |
1046 | # 2. Fault while executing IRET | 1046 | # 2. Fault while executing IRET |
1047 | # Category 1 we fix up by reattempting the load, and zeroing the segment | 1047 | # Category 1 we fix up by reattempting the load, and zeroing the segment |
1048 | # register if the load fails. | 1048 | # register if the load fails. |
1049 | # Category 2 we fix up by jumping to do_iret_error. We cannot use the | 1049 | # Category 2 we fix up by jumping to do_iret_error. We cannot use the |
1050 | # normal Linux return path in this case because if we use the IRET hypercall | 1050 | # normal Linux return path in this case because if we use the IRET hypercall |
1051 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. | 1051 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. |
1052 | # We distinguish between categories by maintaining a status value in EAX. | 1052 | # We distinguish between categories by maintaining a status value in EAX. |
1053 | ENTRY(xen_failsafe_callback) | 1053 | ENTRY(xen_failsafe_callback) |
1054 | CFI_STARTPROC | 1054 | CFI_STARTPROC |
1055 | pushl_cfi %eax | 1055 | pushl_cfi %eax |
1056 | movl $1,%eax | 1056 | movl $1,%eax |
1057 | 1: mov 4(%esp),%ds | 1057 | 1: mov 4(%esp),%ds |
1058 | 2: mov 8(%esp),%es | 1058 | 2: mov 8(%esp),%es |
1059 | 3: mov 12(%esp),%fs | 1059 | 3: mov 12(%esp),%fs |
1060 | 4: mov 16(%esp),%gs | 1060 | 4: mov 16(%esp),%gs |
1061 | /* EAX == 0 => Category 1 (Bad segment) | 1061 | /* EAX == 0 => Category 1 (Bad segment) |
1062 | EAX != 0 => Category 2 (Bad IRET) */ | 1062 | EAX != 0 => Category 2 (Bad IRET) */ |
1063 | testl %eax,%eax | 1063 | testl %eax,%eax |
1064 | popl_cfi %eax | 1064 | popl_cfi %eax |
1065 | lea 16(%esp),%esp | 1065 | lea 16(%esp),%esp |
1066 | CFI_ADJUST_CFA_OFFSET -16 | 1066 | CFI_ADJUST_CFA_OFFSET -16 |
1067 | jz 5f | 1067 | jz 5f |
1068 | jmp iret_exc | 1068 | jmp iret_exc |
1069 | 5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ | 1069 | 5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ |
1070 | SAVE_ALL | 1070 | SAVE_ALL |
1071 | jmp ret_from_exception | 1071 | jmp ret_from_exception |
1072 | CFI_ENDPROC | 1072 | CFI_ENDPROC |
1073 | 1073 | ||
1074 | .section .fixup,"ax" | 1074 | .section .fixup,"ax" |
1075 | 6: xorl %eax,%eax | 1075 | 6: xorl %eax,%eax |
1076 | movl %eax,4(%esp) | 1076 | movl %eax,4(%esp) |
1077 | jmp 1b | 1077 | jmp 1b |
1078 | 7: xorl %eax,%eax | 1078 | 7: xorl %eax,%eax |
1079 | movl %eax,8(%esp) | 1079 | movl %eax,8(%esp) |
1080 | jmp 2b | 1080 | jmp 2b |
1081 | 8: xorl %eax,%eax | 1081 | 8: xorl %eax,%eax |
1082 | movl %eax,12(%esp) | 1082 | movl %eax,12(%esp) |
1083 | jmp 3b | 1083 | jmp 3b |
1084 | 9: xorl %eax,%eax | 1084 | 9: xorl %eax,%eax |
1085 | movl %eax,16(%esp) | 1085 | movl %eax,16(%esp) |
1086 | jmp 4b | 1086 | jmp 4b |
1087 | .previous | 1087 | .previous |
1088 | _ASM_EXTABLE(1b,6b) | 1088 | _ASM_EXTABLE(1b,6b) |
1089 | _ASM_EXTABLE(2b,7b) | 1089 | _ASM_EXTABLE(2b,7b) |
1090 | _ASM_EXTABLE(3b,8b) | 1090 | _ASM_EXTABLE(3b,8b) |
1091 | _ASM_EXTABLE(4b,9b) | 1091 | _ASM_EXTABLE(4b,9b) |
1092 | ENDPROC(xen_failsafe_callback) | 1092 | ENDPROC(xen_failsafe_callback) |
1093 | 1093 | ||
1094 | BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, | 1094 | BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, |
1095 | xen_evtchn_do_upcall) | 1095 | xen_evtchn_do_upcall) |
1096 | 1096 | ||
1097 | #endif /* CONFIG_XEN */ | 1097 | #endif /* CONFIG_XEN */ |
1098 | |||
1099 | #if IS_ENABLED(CONFIG_HYPERV) | ||
1100 | |||
1101 | BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, | ||
1102 | hyperv_vector_handler) | ||
1103 | |||
1104 | #endif /* CONFIG_HYPERV */ | ||
1098 | 1105 | ||
1099 | #ifdef CONFIG_FUNCTION_TRACER | 1106 | #ifdef CONFIG_FUNCTION_TRACER |
1100 | #ifdef CONFIG_DYNAMIC_FTRACE | 1107 | #ifdef CONFIG_DYNAMIC_FTRACE |
1101 | 1108 | ||
1102 | ENTRY(mcount) | 1109 | ENTRY(mcount) |
1103 | ret | 1110 | ret |
1104 | END(mcount) | 1111 | END(mcount) |
1105 | 1112 | ||
1106 | ENTRY(ftrace_caller) | 1113 | ENTRY(ftrace_caller) |
1107 | cmpl $0, function_trace_stop | 1114 | cmpl $0, function_trace_stop |
1108 | jne ftrace_stub | 1115 | jne ftrace_stub |
1109 | 1116 | ||
1110 | pushl %eax | 1117 | pushl %eax |
1111 | pushl %ecx | 1118 | pushl %ecx |
1112 | pushl %edx | 1119 | pushl %edx |
1113 | pushl $0 /* Pass NULL as regs pointer */ | 1120 | pushl $0 /* Pass NULL as regs pointer */ |
1114 | movl 4*4(%esp), %eax | 1121 | movl 4*4(%esp), %eax |
1115 | movl 0x4(%ebp), %edx | 1122 | movl 0x4(%ebp), %edx |
1116 | leal function_trace_op, %ecx | 1123 | leal function_trace_op, %ecx |
1117 | subl $MCOUNT_INSN_SIZE, %eax | 1124 | subl $MCOUNT_INSN_SIZE, %eax |
1118 | 1125 | ||
1119 | .globl ftrace_call | 1126 | .globl ftrace_call |
1120 | ftrace_call: | 1127 | ftrace_call: |
1121 | call ftrace_stub | 1128 | call ftrace_stub |
1122 | 1129 | ||
1123 | addl $4,%esp /* skip NULL pointer */ | 1130 | addl $4,%esp /* skip NULL pointer */ |
1124 | popl %edx | 1131 | popl %edx |
1125 | popl %ecx | 1132 | popl %ecx |
1126 | popl %eax | 1133 | popl %eax |
1127 | ftrace_ret: | 1134 | ftrace_ret: |
1128 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1135 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1129 | .globl ftrace_graph_call | 1136 | .globl ftrace_graph_call |
1130 | ftrace_graph_call: | 1137 | ftrace_graph_call: |
1131 | jmp ftrace_stub | 1138 | jmp ftrace_stub |
1132 | #endif | 1139 | #endif |
1133 | 1140 | ||
1134 | .globl ftrace_stub | 1141 | .globl ftrace_stub |
1135 | ftrace_stub: | 1142 | ftrace_stub: |
1136 | ret | 1143 | ret |
1137 | END(ftrace_caller) | 1144 | END(ftrace_caller) |
1138 | 1145 | ||
1139 | ENTRY(ftrace_regs_caller) | 1146 | ENTRY(ftrace_regs_caller) |
1140 | pushf /* push flags before compare (in cs location) */ | 1147 | pushf /* push flags before compare (in cs location) */ |
1141 | cmpl $0, function_trace_stop | 1148 | cmpl $0, function_trace_stop |
1142 | jne ftrace_restore_flags | 1149 | jne ftrace_restore_flags |
1143 | 1150 | ||
1144 | /* | 1151 | /* |
1145 | * i386 does not save SS and ESP when coming from kernel. | 1152 | * i386 does not save SS and ESP when coming from kernel. |
1146 | * Instead, to get sp, ®s->sp is used (see ptrace.h). | 1153 | * Instead, to get sp, ®s->sp is used (see ptrace.h). |
1147 | * Unfortunately, that means eflags must be at the same location | 1154 | * Unfortunately, that means eflags must be at the same location |
1148 | * as the current return ip is. We move the return ip into the | 1155 | * as the current return ip is. We move the return ip into the |
1149 | * ip location, and move flags into the return ip location. | 1156 | * ip location, and move flags into the return ip location. |
1150 | */ | 1157 | */ |
1151 | pushl 4(%esp) /* save return ip into ip slot */ | 1158 | pushl 4(%esp) /* save return ip into ip slot */ |
1152 | 1159 | ||
1153 | pushl $0 /* Load 0 into orig_ax */ | 1160 | pushl $0 /* Load 0 into orig_ax */ |
1154 | pushl %gs | 1161 | pushl %gs |
1155 | pushl %fs | 1162 | pushl %fs |
1156 | pushl %es | 1163 | pushl %es |
1157 | pushl %ds | 1164 | pushl %ds |
1158 | pushl %eax | 1165 | pushl %eax |
1159 | pushl %ebp | 1166 | pushl %ebp |
1160 | pushl %edi | 1167 | pushl %edi |
1161 | pushl %esi | 1168 | pushl %esi |
1162 | pushl %edx | 1169 | pushl %edx |
1163 | pushl %ecx | 1170 | pushl %ecx |
1164 | pushl %ebx | 1171 | pushl %ebx |
1165 | 1172 | ||
1166 | movl 13*4(%esp), %eax /* Get the saved flags */ | 1173 | movl 13*4(%esp), %eax /* Get the saved flags */ |
1167 | movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ | 1174 | movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ |
1168 | /* clobbering return ip */ | 1175 | /* clobbering return ip */ |
1169 | movl $__KERNEL_CS,13*4(%esp) | 1176 | movl $__KERNEL_CS,13*4(%esp) |
1170 | 1177 | ||
1171 | movl 12*4(%esp), %eax /* Load ip (1st parameter) */ | 1178 | movl 12*4(%esp), %eax /* Load ip (1st parameter) */ |
1172 | subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ | 1179 | subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ |
1173 | movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ | 1180 | movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ |
1174 | leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ | 1181 | leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ |
1175 | pushl %esp /* Save pt_regs as 4th parameter */ | 1182 | pushl %esp /* Save pt_regs as 4th parameter */ |
1176 | 1183 | ||
1177 | GLOBAL(ftrace_regs_call) | 1184 | GLOBAL(ftrace_regs_call) |
1178 | call ftrace_stub | 1185 | call ftrace_stub |
1179 | 1186 | ||
1180 | addl $4, %esp /* Skip pt_regs */ | 1187 | addl $4, %esp /* Skip pt_regs */ |
1181 | movl 14*4(%esp), %eax /* Move flags back into cs */ | 1188 | movl 14*4(%esp), %eax /* Move flags back into cs */ |
1182 | movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ | 1189 | movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ |
1183 | movl 12*4(%esp), %eax /* Get return ip from regs->ip */ | 1190 | movl 12*4(%esp), %eax /* Get return ip from regs->ip */ |
1184 | movl %eax, 14*4(%esp) /* Put return ip back for ret */ | 1191 | movl %eax, 14*4(%esp) /* Put return ip back for ret */ |
1185 | 1192 | ||
1186 | popl %ebx | 1193 | popl %ebx |
1187 | popl %ecx | 1194 | popl %ecx |
1188 | popl %edx | 1195 | popl %edx |
1189 | popl %esi | 1196 | popl %esi |
1190 | popl %edi | 1197 | popl %edi |
1191 | popl %ebp | 1198 | popl %ebp |
1192 | popl %eax | 1199 | popl %eax |
1193 | popl %ds | 1200 | popl %ds |
1194 | popl %es | 1201 | popl %es |
1195 | popl %fs | 1202 | popl %fs |
1196 | popl %gs | 1203 | popl %gs |
1197 | addl $8, %esp /* Skip orig_ax and ip */ | 1204 | addl $8, %esp /* Skip orig_ax and ip */ |
1198 | popf /* Pop flags at end (no addl to corrupt flags) */ | 1205 | popf /* Pop flags at end (no addl to corrupt flags) */ |
1199 | jmp ftrace_ret | 1206 | jmp ftrace_ret |
1200 | 1207 | ||
1201 | ftrace_restore_flags: | 1208 | ftrace_restore_flags: |
1202 | popf | 1209 | popf |
1203 | jmp ftrace_stub | 1210 | jmp ftrace_stub |
1204 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 1211 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
1205 | 1212 | ||
1206 | ENTRY(mcount) | 1213 | ENTRY(mcount) |
1207 | cmpl $0, function_trace_stop | 1214 | cmpl $0, function_trace_stop |
1208 | jne ftrace_stub | 1215 | jne ftrace_stub |
1209 | 1216 | ||
1210 | cmpl $ftrace_stub, ftrace_trace_function | 1217 | cmpl $ftrace_stub, ftrace_trace_function |
1211 | jnz trace | 1218 | jnz trace |
1212 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1219 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1213 | cmpl $ftrace_stub, ftrace_graph_return | 1220 | cmpl $ftrace_stub, ftrace_graph_return |
1214 | jnz ftrace_graph_caller | 1221 | jnz ftrace_graph_caller |
1215 | 1222 | ||
1216 | cmpl $ftrace_graph_entry_stub, ftrace_graph_entry | 1223 | cmpl $ftrace_graph_entry_stub, ftrace_graph_entry |
1217 | jnz ftrace_graph_caller | 1224 | jnz ftrace_graph_caller |
1218 | #endif | 1225 | #endif |
1219 | .globl ftrace_stub | 1226 | .globl ftrace_stub |
1220 | ftrace_stub: | 1227 | ftrace_stub: |
1221 | ret | 1228 | ret |
1222 | 1229 | ||
1223 | /* taken from glibc */ | 1230 | /* taken from glibc */ |
1224 | trace: | 1231 | trace: |
1225 | pushl %eax | 1232 | pushl %eax |
1226 | pushl %ecx | 1233 | pushl %ecx |
1227 | pushl %edx | 1234 | pushl %edx |
1228 | movl 0xc(%esp), %eax | 1235 | movl 0xc(%esp), %eax |
1229 | movl 0x4(%ebp), %edx | 1236 | movl 0x4(%ebp), %edx |
1230 | subl $MCOUNT_INSN_SIZE, %eax | 1237 | subl $MCOUNT_INSN_SIZE, %eax |
1231 | 1238 | ||
1232 | call *ftrace_trace_function | 1239 | call *ftrace_trace_function |
1233 | 1240 | ||
1234 | popl %edx | 1241 | popl %edx |
1235 | popl %ecx | 1242 | popl %ecx |
1236 | popl %eax | 1243 | popl %eax |
1237 | jmp ftrace_stub | 1244 | jmp ftrace_stub |
1238 | END(mcount) | 1245 | END(mcount) |
1239 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 1246 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
1240 | #endif /* CONFIG_FUNCTION_TRACER */ | 1247 | #endif /* CONFIG_FUNCTION_TRACER */ |
1241 | 1248 | ||
1242 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 1249 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
1243 | ENTRY(ftrace_graph_caller) | 1250 | ENTRY(ftrace_graph_caller) |
1244 | pushl %eax | 1251 | pushl %eax |
1245 | pushl %ecx | 1252 | pushl %ecx |
1246 | pushl %edx | 1253 | pushl %edx |
1247 | movl 0xc(%esp), %edx | 1254 | movl 0xc(%esp), %edx |
1248 | lea 0x4(%ebp), %eax | 1255 | lea 0x4(%ebp), %eax |
1249 | movl (%ebp), %ecx | 1256 | movl (%ebp), %ecx |
1250 | subl $MCOUNT_INSN_SIZE, %edx | 1257 | subl $MCOUNT_INSN_SIZE, %edx |
1251 | call prepare_ftrace_return | 1258 | call prepare_ftrace_return |
1252 | popl %edx | 1259 | popl %edx |
1253 | popl %ecx | 1260 | popl %ecx |
1254 | popl %eax | 1261 | popl %eax |
1255 | ret | 1262 | ret |
1256 | END(ftrace_graph_caller) | 1263 | END(ftrace_graph_caller) |
1257 | 1264 | ||
1258 | .globl return_to_handler | 1265 | .globl return_to_handler |
1259 | return_to_handler: | 1266 | return_to_handler: |
1260 | pushl %eax | 1267 | pushl %eax |
1261 | pushl %edx | 1268 | pushl %edx |
1262 | movl %ebp, %eax | 1269 | movl %ebp, %eax |
1263 | call ftrace_return_to_handler | 1270 | call ftrace_return_to_handler |
1264 | movl %eax, %ecx | 1271 | movl %eax, %ecx |
1265 | popl %edx | 1272 | popl %edx |
1266 | popl %eax | 1273 | popl %eax |
1267 | jmp *%ecx | 1274 | jmp *%ecx |
1268 | #endif | 1275 | #endif |
1269 | 1276 | ||
1270 | /* | 1277 | /* |
1271 | * Some functions should be protected against kprobes | 1278 | * Some functions should be protected against kprobes |
1272 | */ | 1279 | */ |
1273 | .pushsection .kprobes.text, "ax" | 1280 | .pushsection .kprobes.text, "ax" |
1274 | 1281 | ||
1275 | ENTRY(page_fault) | 1282 | ENTRY(page_fault) |
1276 | RING0_EC_FRAME | 1283 | RING0_EC_FRAME |
1277 | ASM_CLAC | 1284 | ASM_CLAC |
1278 | pushl_cfi $do_page_fault | 1285 | pushl_cfi $do_page_fault |
1279 | ALIGN | 1286 | ALIGN |
1280 | error_code: | 1287 | error_code: |
1281 | /* the function address is in %gs's slot on the stack */ | 1288 | /* the function address is in %gs's slot on the stack */ |
1282 | pushl_cfi %fs | 1289 | pushl_cfi %fs |
1283 | /*CFI_REL_OFFSET fs, 0*/ | 1290 | /*CFI_REL_OFFSET fs, 0*/ |
1284 | pushl_cfi %es | 1291 | pushl_cfi %es |
1285 | /*CFI_REL_OFFSET es, 0*/ | 1292 | /*CFI_REL_OFFSET es, 0*/ |
1286 | pushl_cfi %ds | 1293 | pushl_cfi %ds |
1287 | /*CFI_REL_OFFSET ds, 0*/ | 1294 | /*CFI_REL_OFFSET ds, 0*/ |
1288 | pushl_cfi %eax | 1295 | pushl_cfi %eax |
1289 | CFI_REL_OFFSET eax, 0 | 1296 | CFI_REL_OFFSET eax, 0 |
1290 | pushl_cfi %ebp | 1297 | pushl_cfi %ebp |
1291 | CFI_REL_OFFSET ebp, 0 | 1298 | CFI_REL_OFFSET ebp, 0 |
1292 | pushl_cfi %edi | 1299 | pushl_cfi %edi |
1293 | CFI_REL_OFFSET edi, 0 | 1300 | CFI_REL_OFFSET edi, 0 |
1294 | pushl_cfi %esi | 1301 | pushl_cfi %esi |
1295 | CFI_REL_OFFSET esi, 0 | 1302 | CFI_REL_OFFSET esi, 0 |
1296 | pushl_cfi %edx | 1303 | pushl_cfi %edx |
1297 | CFI_REL_OFFSET edx, 0 | 1304 | CFI_REL_OFFSET edx, 0 |
1298 | pushl_cfi %ecx | 1305 | pushl_cfi %ecx |
1299 | CFI_REL_OFFSET ecx, 0 | 1306 | CFI_REL_OFFSET ecx, 0 |
1300 | pushl_cfi %ebx | 1307 | pushl_cfi %ebx |
1301 | CFI_REL_OFFSET ebx, 0 | 1308 | CFI_REL_OFFSET ebx, 0 |
1302 | cld | 1309 | cld |
1303 | movl $(__KERNEL_PERCPU), %ecx | 1310 | movl $(__KERNEL_PERCPU), %ecx |
1304 | movl %ecx, %fs | 1311 | movl %ecx, %fs |
1305 | UNWIND_ESPFIX_STACK | 1312 | UNWIND_ESPFIX_STACK |
1306 | GS_TO_REG %ecx | 1313 | GS_TO_REG %ecx |
1307 | movl PT_GS(%esp), %edi # get the function address | 1314 | movl PT_GS(%esp), %edi # get the function address |
1308 | movl PT_ORIG_EAX(%esp), %edx # get the error code | 1315 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
1309 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | 1316 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
1310 | REG_TO_PTGS %ecx | 1317 | REG_TO_PTGS %ecx |
1311 | SET_KERNEL_GS %ecx | 1318 | SET_KERNEL_GS %ecx |
1312 | movl $(__USER_DS), %ecx | 1319 | movl $(__USER_DS), %ecx |
1313 | movl %ecx, %ds | 1320 | movl %ecx, %ds |
1314 | movl %ecx, %es | 1321 | movl %ecx, %es |
1315 | TRACE_IRQS_OFF | 1322 | TRACE_IRQS_OFF |
1316 | movl %esp,%eax # pt_regs pointer | 1323 | movl %esp,%eax # pt_regs pointer |
1317 | call *%edi | 1324 | call *%edi |
1318 | jmp ret_from_exception | 1325 | jmp ret_from_exception |
1319 | CFI_ENDPROC | 1326 | CFI_ENDPROC |
1320 | END(page_fault) | 1327 | END(page_fault) |
1321 | 1328 | ||
1322 | /* | 1329 | /* |
1323 | * Debug traps and NMI can happen at the one SYSENTER instruction | 1330 | * Debug traps and NMI can happen at the one SYSENTER instruction |
1324 | * that sets up the real kernel stack. Check here, since we can't | 1331 | * that sets up the real kernel stack. Check here, since we can't |
1325 | * allow the wrong stack to be used. | 1332 | * allow the wrong stack to be used. |
1326 | * | 1333 | * |
1327 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | 1334 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have |
1328 | * already pushed 3 words if it hits on the sysenter instruction: | 1335 | * already pushed 3 words if it hits on the sysenter instruction: |
1329 | * eflags, cs and eip. | 1336 | * eflags, cs and eip. |
1330 | * | 1337 | * |
1331 | * We just load the right stack, and push the three (known) values | 1338 | * We just load the right stack, and push the three (known) values |
1332 | * by hand onto the new stack - while updating the return eip past | 1339 | * by hand onto the new stack - while updating the return eip past |
1333 | * the instruction that would have done it for sysenter. | 1340 | * the instruction that would have done it for sysenter. |
1334 | */ | 1341 | */ |
1335 | .macro FIX_STACK offset ok label | 1342 | .macro FIX_STACK offset ok label |
1336 | cmpw $__KERNEL_CS, 4(%esp) | 1343 | cmpw $__KERNEL_CS, 4(%esp) |
1337 | jne \ok | 1344 | jne \ok |
1338 | \label: | 1345 | \label: |
1339 | movl TSS_sysenter_sp0 + \offset(%esp), %esp | 1346 | movl TSS_sysenter_sp0 + \offset(%esp), %esp |
1340 | CFI_DEF_CFA esp, 0 | 1347 | CFI_DEF_CFA esp, 0 |
1341 | CFI_UNDEFINED eip | 1348 | CFI_UNDEFINED eip |
1342 | pushfl_cfi | 1349 | pushfl_cfi |
1343 | pushl_cfi $__KERNEL_CS | 1350 | pushl_cfi $__KERNEL_CS |
1344 | pushl_cfi $sysenter_past_esp | 1351 | pushl_cfi $sysenter_past_esp |
1345 | CFI_REL_OFFSET eip, 0 | 1352 | CFI_REL_OFFSET eip, 0 |
1346 | .endm | 1353 | .endm |
1347 | 1354 | ||
1348 | ENTRY(debug) | 1355 | ENTRY(debug) |
1349 | RING0_INT_FRAME | 1356 | RING0_INT_FRAME |
1350 | ASM_CLAC | 1357 | ASM_CLAC |
1351 | cmpl $ia32_sysenter_target,(%esp) | 1358 | cmpl $ia32_sysenter_target,(%esp) |
1352 | jne debug_stack_correct | 1359 | jne debug_stack_correct |
1353 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn | 1360 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn |
1354 | debug_stack_correct: | 1361 | debug_stack_correct: |
1355 | pushl_cfi $-1 # mark this as an int | 1362 | pushl_cfi $-1 # mark this as an int |
1356 | SAVE_ALL | 1363 | SAVE_ALL |
1357 | TRACE_IRQS_OFF | 1364 | TRACE_IRQS_OFF |
1358 | xorl %edx,%edx # error code 0 | 1365 | xorl %edx,%edx # error code 0 |
1359 | movl %esp,%eax # pt_regs pointer | 1366 | movl %esp,%eax # pt_regs pointer |
1360 | call do_debug | 1367 | call do_debug |
1361 | jmp ret_from_exception | 1368 | jmp ret_from_exception |
1362 | CFI_ENDPROC | 1369 | CFI_ENDPROC |
1363 | END(debug) | 1370 | END(debug) |
1364 | 1371 | ||
1365 | /* | 1372 | /* |
1366 | * NMI is doubly nasty. It can happen _while_ we're handling | 1373 | * NMI is doubly nasty. It can happen _while_ we're handling |
1367 | * a debug fault, and the debug fault hasn't yet been able to | 1374 | * a debug fault, and the debug fault hasn't yet been able to |
1368 | * clear up the stack. So we first check whether we got an | 1375 | * clear up the stack. So we first check whether we got an |
1369 | * NMI on the sysenter entry path, but after that we need to | 1376 | * NMI on the sysenter entry path, but after that we need to |
1370 | * check whether we got an NMI on the debug path where the debug | 1377 | * check whether we got an NMI on the debug path where the debug |
1371 | * fault happened on the sysenter path. | 1378 | * fault happened on the sysenter path. |
1372 | */ | 1379 | */ |
1373 | ENTRY(nmi) | 1380 | ENTRY(nmi) |
1374 | RING0_INT_FRAME | 1381 | RING0_INT_FRAME |
1375 | ASM_CLAC | 1382 | ASM_CLAC |
1376 | pushl_cfi %eax | 1383 | pushl_cfi %eax |
1377 | movl %ss, %eax | 1384 | movl %ss, %eax |
1378 | cmpw $__ESPFIX_SS, %ax | 1385 | cmpw $__ESPFIX_SS, %ax |
1379 | popl_cfi %eax | 1386 | popl_cfi %eax |
1380 | je nmi_espfix_stack | 1387 | je nmi_espfix_stack |
1381 | cmpl $ia32_sysenter_target,(%esp) | 1388 | cmpl $ia32_sysenter_target,(%esp) |
1382 | je nmi_stack_fixup | 1389 | je nmi_stack_fixup |
1383 | pushl_cfi %eax | 1390 | pushl_cfi %eax |
1384 | movl %esp,%eax | 1391 | movl %esp,%eax |
1385 | /* Do not access memory above the end of our stack page, | 1392 | /* Do not access memory above the end of our stack page, |
1386 | * it might not exist. | 1393 | * it might not exist. |
1387 | */ | 1394 | */ |
1388 | andl $(THREAD_SIZE-1),%eax | 1395 | andl $(THREAD_SIZE-1),%eax |
1389 | cmpl $(THREAD_SIZE-20),%eax | 1396 | cmpl $(THREAD_SIZE-20),%eax |
1390 | popl_cfi %eax | 1397 | popl_cfi %eax |
1391 | jae nmi_stack_correct | 1398 | jae nmi_stack_correct |
1392 | cmpl $ia32_sysenter_target,12(%esp) | 1399 | cmpl $ia32_sysenter_target,12(%esp) |
1393 | je nmi_debug_stack_check | 1400 | je nmi_debug_stack_check |
1394 | nmi_stack_correct: | 1401 | nmi_stack_correct: |
1395 | /* We have a RING0_INT_FRAME here */ | 1402 | /* We have a RING0_INT_FRAME here */ |
1396 | pushl_cfi %eax | 1403 | pushl_cfi %eax |
1397 | SAVE_ALL | 1404 | SAVE_ALL |
1398 | xorl %edx,%edx # zero error code | 1405 | xorl %edx,%edx # zero error code |
1399 | movl %esp,%eax # pt_regs pointer | 1406 | movl %esp,%eax # pt_regs pointer |
1400 | call do_nmi | 1407 | call do_nmi |
1401 | jmp restore_all_notrace | 1408 | jmp restore_all_notrace |
1402 | CFI_ENDPROC | 1409 | CFI_ENDPROC |
1403 | 1410 | ||
1404 | nmi_stack_fixup: | 1411 | nmi_stack_fixup: |
1405 | RING0_INT_FRAME | 1412 | RING0_INT_FRAME |
1406 | FIX_STACK 12, nmi_stack_correct, 1 | 1413 | FIX_STACK 12, nmi_stack_correct, 1 |
1407 | jmp nmi_stack_correct | 1414 | jmp nmi_stack_correct |
1408 | 1415 | ||
1409 | nmi_debug_stack_check: | 1416 | nmi_debug_stack_check: |
1410 | /* We have a RING0_INT_FRAME here */ | 1417 | /* We have a RING0_INT_FRAME here */ |
1411 | cmpw $__KERNEL_CS,16(%esp) | 1418 | cmpw $__KERNEL_CS,16(%esp) |
1412 | jne nmi_stack_correct | 1419 | jne nmi_stack_correct |
1413 | cmpl $debug,(%esp) | 1420 | cmpl $debug,(%esp) |
1414 | jb nmi_stack_correct | 1421 | jb nmi_stack_correct |
1415 | cmpl $debug_esp_fix_insn,(%esp) | 1422 | cmpl $debug_esp_fix_insn,(%esp) |
1416 | ja nmi_stack_correct | 1423 | ja nmi_stack_correct |
1417 | FIX_STACK 24, nmi_stack_correct, 1 | 1424 | FIX_STACK 24, nmi_stack_correct, 1 |
1418 | jmp nmi_stack_correct | 1425 | jmp nmi_stack_correct |
1419 | 1426 | ||
1420 | nmi_espfix_stack: | 1427 | nmi_espfix_stack: |
1421 | /* We have a RING0_INT_FRAME here. | 1428 | /* We have a RING0_INT_FRAME here. |
1422 | * | 1429 | * |
1423 | * create the pointer to lss back | 1430 | * create the pointer to lss back |
1424 | */ | 1431 | */ |
1425 | pushl_cfi %ss | 1432 | pushl_cfi %ss |
1426 | pushl_cfi %esp | 1433 | pushl_cfi %esp |
1427 | addl $4, (%esp) | 1434 | addl $4, (%esp) |
1428 | /* copy the iret frame of 12 bytes */ | 1435 | /* copy the iret frame of 12 bytes */ |
1429 | .rept 3 | 1436 | .rept 3 |
1430 | pushl_cfi 16(%esp) | 1437 | pushl_cfi 16(%esp) |
1431 | .endr | 1438 | .endr |
1432 | pushl_cfi %eax | 1439 | pushl_cfi %eax |
1433 | SAVE_ALL | 1440 | SAVE_ALL |
1434 | FIXUP_ESPFIX_STACK # %eax == %esp | 1441 | FIXUP_ESPFIX_STACK # %eax == %esp |
1435 | xorl %edx,%edx # zero error code | 1442 | xorl %edx,%edx # zero error code |
1436 | call do_nmi | 1443 | call do_nmi |
1437 | RESTORE_REGS | 1444 | RESTORE_REGS |
1438 | lss 12+4(%esp), %esp # back to espfix stack | 1445 | lss 12+4(%esp), %esp # back to espfix stack |
1439 | CFI_ADJUST_CFA_OFFSET -24 | 1446 | CFI_ADJUST_CFA_OFFSET -24 |
1440 | jmp irq_return | 1447 | jmp irq_return |
1441 | CFI_ENDPROC | 1448 | CFI_ENDPROC |
1442 | END(nmi) | 1449 | END(nmi) |
1443 | 1450 | ||
1444 | ENTRY(int3) | 1451 | ENTRY(int3) |
1445 | RING0_INT_FRAME | 1452 | RING0_INT_FRAME |
1446 | ASM_CLAC | 1453 | ASM_CLAC |
1447 | pushl_cfi $-1 # mark this as an int | 1454 | pushl_cfi $-1 # mark this as an int |
1448 | SAVE_ALL | 1455 | SAVE_ALL |
1449 | TRACE_IRQS_OFF | 1456 | TRACE_IRQS_OFF |
1450 | xorl %edx,%edx # zero error code | 1457 | xorl %edx,%edx # zero error code |
1451 | movl %esp,%eax # pt_regs pointer | 1458 | movl %esp,%eax # pt_regs pointer |
1452 | call do_int3 | 1459 | call do_int3 |
1453 | jmp ret_from_exception | 1460 | jmp ret_from_exception |
1454 | CFI_ENDPROC | 1461 | CFI_ENDPROC |
1455 | END(int3) | 1462 | END(int3) |
1456 | 1463 | ||
1457 | ENTRY(general_protection) | 1464 | ENTRY(general_protection) |
1458 | RING0_EC_FRAME | 1465 | RING0_EC_FRAME |
1459 | pushl_cfi $do_general_protection | 1466 | pushl_cfi $do_general_protection |
1460 | jmp error_code | 1467 | jmp error_code |
1461 | CFI_ENDPROC | 1468 | CFI_ENDPROC |
1462 | END(general_protection) | 1469 | END(general_protection) |
1463 | 1470 | ||
1464 | #ifdef CONFIG_KVM_GUEST | 1471 | #ifdef CONFIG_KVM_GUEST |
1465 | ENTRY(async_page_fault) | 1472 | ENTRY(async_page_fault) |
1466 | RING0_EC_FRAME | 1473 | RING0_EC_FRAME |
1467 | ASM_CLAC | 1474 | ASM_CLAC |
1468 | pushl_cfi $do_async_page_fault | 1475 | pushl_cfi $do_async_page_fault |
1469 | jmp error_code | 1476 | jmp error_code |
1470 | CFI_ENDPROC | 1477 | CFI_ENDPROC |
1471 | END(async_page_fault) | 1478 | END(async_page_fault) |
1472 | #endif | 1479 | #endif |
1473 | 1480 | ||
1474 | /* | 1481 | /* |
1475 | * End of kprobes section | 1482 | * End of kprobes section |
1476 | */ | 1483 | */ |
1477 | .popsection | 1484 | .popsection |
1478 | 1485 |
arch/x86/kernel/entry_64.S
1 | /* | 1 | /* |
2 | * linux/arch/x86_64/entry.S | 2 | * linux/arch/x86_64/entry.S |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | 5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs |
6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | 6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * entry.S contains the system-call and fault low-level handling routines. | 10 | * entry.S contains the system-call and fault low-level handling routines. |
11 | * | 11 | * |
12 | * Some of this is documented in Documentation/x86/entry_64.txt | 12 | * Some of this is documented in Documentation/x86/entry_64.txt |
13 | * | 13 | * |
14 | * NOTE: This code handles signal-recognition, which happens every time | 14 | * NOTE: This code handles signal-recognition, which happens every time |
15 | * after an interrupt and after each system call. | 15 | * after an interrupt and after each system call. |
16 | * | 16 | * |
17 | * Normal syscalls and interrupts don't save a full stack frame, this is | 17 | * Normal syscalls and interrupts don't save a full stack frame, this is |
18 | * only done for syscall tracing, signals or fork/exec et.al. | 18 | * only done for syscall tracing, signals or fork/exec et.al. |
19 | * | 19 | * |
20 | * A note on terminology: | 20 | * A note on terminology: |
21 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 21 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
22 | * at the top of the kernel process stack. | 22 | * at the top of the kernel process stack. |
23 | * - partial stack frame: partially saved registers up to R11. | 23 | * - partial stack frame: partially saved registers up to R11. |
24 | * - full stack frame: Like partial stack frame, but all register saved. | 24 | * - full stack frame: Like partial stack frame, but all register saved. |
25 | * | 25 | * |
26 | * Some macro usage: | 26 | * Some macro usage: |
27 | * - CFI macros are used to generate dwarf2 unwind information for better | 27 | * - CFI macros are used to generate dwarf2 unwind information for better |
28 | * backtraces. They don't change any code. | 28 | * backtraces. They don't change any code. |
29 | * - SAVE_ALL/RESTORE_ALL - Save/restore all registers | 29 | * - SAVE_ALL/RESTORE_ALL - Save/restore all registers |
30 | * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. | 30 | * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. |
31 | * There are unfortunately lots of special cases where some registers | 31 | * There are unfortunately lots of special cases where some registers |
32 | * not touched. The macro is a big mess that should be cleaned up. | 32 | * not touched. The macro is a big mess that should be cleaned up. |
33 | * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. | 33 | * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. |
34 | * Gives a full stack frame. | 34 | * Gives a full stack frame. |
35 | * - ENTRY/END Define functions in the symbol table. | 35 | * - ENTRY/END Define functions in the symbol table. |
36 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack | 36 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack |
37 | * frame that is otherwise undefined after a SYSCALL | 37 | * frame that is otherwise undefined after a SYSCALL |
38 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. | 38 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. |
39 | * - errorentry/paranoidentry/zeroentry - Define exception entry points. | 39 | * - errorentry/paranoidentry/zeroentry - Define exception entry points. |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #include <linux/linkage.h> | 42 | #include <linux/linkage.h> |
43 | #include <asm/segment.h> | 43 | #include <asm/segment.h> |
44 | #include <asm/cache.h> | 44 | #include <asm/cache.h> |
45 | #include <asm/errno.h> | 45 | #include <asm/errno.h> |
46 | #include <asm/dwarf2.h> | 46 | #include <asm/dwarf2.h> |
47 | #include <asm/calling.h> | 47 | #include <asm/calling.h> |
48 | #include <asm/asm-offsets.h> | 48 | #include <asm/asm-offsets.h> |
49 | #include <asm/msr.h> | 49 | #include <asm/msr.h> |
50 | #include <asm/unistd.h> | 50 | #include <asm/unistd.h> |
51 | #include <asm/thread_info.h> | 51 | #include <asm/thread_info.h> |
52 | #include <asm/hw_irq.h> | 52 | #include <asm/hw_irq.h> |
53 | #include <asm/page_types.h> | 53 | #include <asm/page_types.h> |
54 | #include <asm/irqflags.h> | 54 | #include <asm/irqflags.h> |
55 | #include <asm/paravirt.h> | 55 | #include <asm/paravirt.h> |
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <asm/asm.h> | 58 | #include <asm/asm.h> |
59 | #include <asm/context_tracking.h> | 59 | #include <asm/context_tracking.h> |
60 | #include <asm/smap.h> | 60 | #include <asm/smap.h> |
61 | #include <linux/err.h> | 61 | #include <linux/err.h> |
62 | 62 | ||
63 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 63 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
64 | #include <linux/elf-em.h> | 64 | #include <linux/elf-em.h> |
65 | #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) | 65 | #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) |
66 | #define __AUDIT_ARCH_64BIT 0x80000000 | 66 | #define __AUDIT_ARCH_64BIT 0x80000000 |
67 | #define __AUDIT_ARCH_LE 0x40000000 | 67 | #define __AUDIT_ARCH_LE 0x40000000 |
68 | 68 | ||
69 | .code64 | 69 | .code64 |
70 | .section .entry.text, "ax" | 70 | .section .entry.text, "ax" |
71 | 71 | ||
72 | #ifdef CONFIG_FUNCTION_TRACER | 72 | #ifdef CONFIG_FUNCTION_TRACER |
73 | 73 | ||
74 | #ifdef CC_USING_FENTRY | 74 | #ifdef CC_USING_FENTRY |
75 | # define function_hook __fentry__ | 75 | # define function_hook __fentry__ |
76 | #else | 76 | #else |
77 | # define function_hook mcount | 77 | # define function_hook mcount |
78 | #endif | 78 | #endif |
79 | 79 | ||
80 | #ifdef CONFIG_DYNAMIC_FTRACE | 80 | #ifdef CONFIG_DYNAMIC_FTRACE |
81 | 81 | ||
82 | ENTRY(function_hook) | 82 | ENTRY(function_hook) |
83 | retq | 83 | retq |
84 | END(function_hook) | 84 | END(function_hook) |
85 | 85 | ||
86 | /* skip is set if stack has been adjusted */ | 86 | /* skip is set if stack has been adjusted */ |
87 | .macro ftrace_caller_setup skip=0 | 87 | .macro ftrace_caller_setup skip=0 |
88 | MCOUNT_SAVE_FRAME \skip | 88 | MCOUNT_SAVE_FRAME \skip |
89 | 89 | ||
90 | /* Load the ftrace_ops into the 3rd parameter */ | 90 | /* Load the ftrace_ops into the 3rd parameter */ |
91 | leaq function_trace_op, %rdx | 91 | leaq function_trace_op, %rdx |
92 | 92 | ||
93 | /* Load ip into the first parameter */ | 93 | /* Load ip into the first parameter */ |
94 | movq RIP(%rsp), %rdi | 94 | movq RIP(%rsp), %rdi |
95 | subq $MCOUNT_INSN_SIZE, %rdi | 95 | subq $MCOUNT_INSN_SIZE, %rdi |
96 | /* Load the parent_ip into the second parameter */ | 96 | /* Load the parent_ip into the second parameter */ |
97 | #ifdef CC_USING_FENTRY | 97 | #ifdef CC_USING_FENTRY |
98 | movq SS+16(%rsp), %rsi | 98 | movq SS+16(%rsp), %rsi |
99 | #else | 99 | #else |
100 | movq 8(%rbp), %rsi | 100 | movq 8(%rbp), %rsi |
101 | #endif | 101 | #endif |
102 | .endm | 102 | .endm |
103 | 103 | ||
104 | ENTRY(ftrace_caller) | 104 | ENTRY(ftrace_caller) |
105 | /* Check if tracing was disabled (quick check) */ | 105 | /* Check if tracing was disabled (quick check) */ |
106 | cmpl $0, function_trace_stop | 106 | cmpl $0, function_trace_stop |
107 | jne ftrace_stub | 107 | jne ftrace_stub |
108 | 108 | ||
109 | ftrace_caller_setup | 109 | ftrace_caller_setup |
110 | /* regs go into 4th parameter (but make it NULL) */ | 110 | /* regs go into 4th parameter (but make it NULL) */ |
111 | movq $0, %rcx | 111 | movq $0, %rcx |
112 | 112 | ||
113 | GLOBAL(ftrace_call) | 113 | GLOBAL(ftrace_call) |
114 | call ftrace_stub | 114 | call ftrace_stub |
115 | 115 | ||
116 | MCOUNT_RESTORE_FRAME | 116 | MCOUNT_RESTORE_FRAME |
117 | ftrace_return: | 117 | ftrace_return: |
118 | 118 | ||
119 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 119 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
120 | GLOBAL(ftrace_graph_call) | 120 | GLOBAL(ftrace_graph_call) |
121 | jmp ftrace_stub | 121 | jmp ftrace_stub |
122 | #endif | 122 | #endif |
123 | 123 | ||
124 | GLOBAL(ftrace_stub) | 124 | GLOBAL(ftrace_stub) |
125 | retq | 125 | retq |
126 | END(ftrace_caller) | 126 | END(ftrace_caller) |
127 | 127 | ||
128 | ENTRY(ftrace_regs_caller) | 128 | ENTRY(ftrace_regs_caller) |
129 | /* Save the current flags before compare (in SS location)*/ | 129 | /* Save the current flags before compare (in SS location)*/ |
130 | pushfq | 130 | pushfq |
131 | 131 | ||
132 | /* Check if tracing was disabled (quick check) */ | 132 | /* Check if tracing was disabled (quick check) */ |
133 | cmpl $0, function_trace_stop | 133 | cmpl $0, function_trace_stop |
134 | jne ftrace_restore_flags | 134 | jne ftrace_restore_flags |
135 | 135 | ||
136 | /* skip=8 to skip flags saved in SS */ | 136 | /* skip=8 to skip flags saved in SS */ |
137 | ftrace_caller_setup 8 | 137 | ftrace_caller_setup 8 |
138 | 138 | ||
139 | /* Save the rest of pt_regs */ | 139 | /* Save the rest of pt_regs */ |
140 | movq %r15, R15(%rsp) | 140 | movq %r15, R15(%rsp) |
141 | movq %r14, R14(%rsp) | 141 | movq %r14, R14(%rsp) |
142 | movq %r13, R13(%rsp) | 142 | movq %r13, R13(%rsp) |
143 | movq %r12, R12(%rsp) | 143 | movq %r12, R12(%rsp) |
144 | movq %r11, R11(%rsp) | 144 | movq %r11, R11(%rsp) |
145 | movq %r10, R10(%rsp) | 145 | movq %r10, R10(%rsp) |
146 | movq %rbp, RBP(%rsp) | 146 | movq %rbp, RBP(%rsp) |
147 | movq %rbx, RBX(%rsp) | 147 | movq %rbx, RBX(%rsp) |
148 | /* Copy saved flags */ | 148 | /* Copy saved flags */ |
149 | movq SS(%rsp), %rcx | 149 | movq SS(%rsp), %rcx |
150 | movq %rcx, EFLAGS(%rsp) | 150 | movq %rcx, EFLAGS(%rsp) |
151 | /* Kernel segments */ | 151 | /* Kernel segments */ |
152 | movq $__KERNEL_DS, %rcx | 152 | movq $__KERNEL_DS, %rcx |
153 | movq %rcx, SS(%rsp) | 153 | movq %rcx, SS(%rsp) |
154 | movq $__KERNEL_CS, %rcx | 154 | movq $__KERNEL_CS, %rcx |
155 | movq %rcx, CS(%rsp) | 155 | movq %rcx, CS(%rsp) |
156 | /* Stack - skipping return address */ | 156 | /* Stack - skipping return address */ |
157 | leaq SS+16(%rsp), %rcx | 157 | leaq SS+16(%rsp), %rcx |
158 | movq %rcx, RSP(%rsp) | 158 | movq %rcx, RSP(%rsp) |
159 | 159 | ||
160 | /* regs go into 4th parameter */ | 160 | /* regs go into 4th parameter */ |
161 | leaq (%rsp), %rcx | 161 | leaq (%rsp), %rcx |
162 | 162 | ||
163 | GLOBAL(ftrace_regs_call) | 163 | GLOBAL(ftrace_regs_call) |
164 | call ftrace_stub | 164 | call ftrace_stub |
165 | 165 | ||
166 | /* Copy flags back to SS, to restore them */ | 166 | /* Copy flags back to SS, to restore them */ |
167 | movq EFLAGS(%rsp), %rax | 167 | movq EFLAGS(%rsp), %rax |
168 | movq %rax, SS(%rsp) | 168 | movq %rax, SS(%rsp) |
169 | 169 | ||
170 | /* Handlers can change the RIP */ | 170 | /* Handlers can change the RIP */ |
171 | movq RIP(%rsp), %rax | 171 | movq RIP(%rsp), %rax |
172 | movq %rax, SS+8(%rsp) | 172 | movq %rax, SS+8(%rsp) |
173 | 173 | ||
174 | /* restore the rest of pt_regs */ | 174 | /* restore the rest of pt_regs */ |
175 | movq R15(%rsp), %r15 | 175 | movq R15(%rsp), %r15 |
176 | movq R14(%rsp), %r14 | 176 | movq R14(%rsp), %r14 |
177 | movq R13(%rsp), %r13 | 177 | movq R13(%rsp), %r13 |
178 | movq R12(%rsp), %r12 | 178 | movq R12(%rsp), %r12 |
179 | movq R10(%rsp), %r10 | 179 | movq R10(%rsp), %r10 |
180 | movq RBP(%rsp), %rbp | 180 | movq RBP(%rsp), %rbp |
181 | movq RBX(%rsp), %rbx | 181 | movq RBX(%rsp), %rbx |
182 | 182 | ||
183 | /* skip=8 to skip flags saved in SS */ | 183 | /* skip=8 to skip flags saved in SS */ |
184 | MCOUNT_RESTORE_FRAME 8 | 184 | MCOUNT_RESTORE_FRAME 8 |
185 | 185 | ||
186 | /* Restore flags */ | 186 | /* Restore flags */ |
187 | popfq | 187 | popfq |
188 | 188 | ||
189 | jmp ftrace_return | 189 | jmp ftrace_return |
190 | ftrace_restore_flags: | 190 | ftrace_restore_flags: |
191 | popfq | 191 | popfq |
192 | jmp ftrace_stub | 192 | jmp ftrace_stub |
193 | 193 | ||
194 | END(ftrace_regs_caller) | 194 | END(ftrace_regs_caller) |
195 | 195 | ||
196 | 196 | ||
197 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 197 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
198 | 198 | ||
199 | ENTRY(function_hook) | 199 | ENTRY(function_hook) |
200 | cmpl $0, function_trace_stop | 200 | cmpl $0, function_trace_stop |
201 | jne ftrace_stub | 201 | jne ftrace_stub |
202 | 202 | ||
203 | cmpq $ftrace_stub, ftrace_trace_function | 203 | cmpq $ftrace_stub, ftrace_trace_function |
204 | jnz trace | 204 | jnz trace |
205 | 205 | ||
206 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 206 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
207 | cmpq $ftrace_stub, ftrace_graph_return | 207 | cmpq $ftrace_stub, ftrace_graph_return |
208 | jnz ftrace_graph_caller | 208 | jnz ftrace_graph_caller |
209 | 209 | ||
210 | cmpq $ftrace_graph_entry_stub, ftrace_graph_entry | 210 | cmpq $ftrace_graph_entry_stub, ftrace_graph_entry |
211 | jnz ftrace_graph_caller | 211 | jnz ftrace_graph_caller |
212 | #endif | 212 | #endif |
213 | 213 | ||
214 | GLOBAL(ftrace_stub) | 214 | GLOBAL(ftrace_stub) |
215 | retq | 215 | retq |
216 | 216 | ||
217 | trace: | 217 | trace: |
218 | MCOUNT_SAVE_FRAME | 218 | MCOUNT_SAVE_FRAME |
219 | 219 | ||
220 | movq RIP(%rsp), %rdi | 220 | movq RIP(%rsp), %rdi |
221 | #ifdef CC_USING_FENTRY | 221 | #ifdef CC_USING_FENTRY |
222 | movq SS+16(%rsp), %rsi | 222 | movq SS+16(%rsp), %rsi |
223 | #else | 223 | #else |
224 | movq 8(%rbp), %rsi | 224 | movq 8(%rbp), %rsi |
225 | #endif | 225 | #endif |
226 | subq $MCOUNT_INSN_SIZE, %rdi | 226 | subq $MCOUNT_INSN_SIZE, %rdi |
227 | 227 | ||
228 | call *ftrace_trace_function | 228 | call *ftrace_trace_function |
229 | 229 | ||
230 | MCOUNT_RESTORE_FRAME | 230 | MCOUNT_RESTORE_FRAME |
231 | 231 | ||
232 | jmp ftrace_stub | 232 | jmp ftrace_stub |
233 | END(function_hook) | 233 | END(function_hook) |
234 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 234 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
235 | #endif /* CONFIG_FUNCTION_TRACER */ | 235 | #endif /* CONFIG_FUNCTION_TRACER */ |
236 | 236 | ||
237 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 237 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
238 | ENTRY(ftrace_graph_caller) | 238 | ENTRY(ftrace_graph_caller) |
239 | MCOUNT_SAVE_FRAME | 239 | MCOUNT_SAVE_FRAME |
240 | 240 | ||
241 | #ifdef CC_USING_FENTRY | 241 | #ifdef CC_USING_FENTRY |
242 | leaq SS+16(%rsp), %rdi | 242 | leaq SS+16(%rsp), %rdi |
243 | movq $0, %rdx /* No framepointers needed */ | 243 | movq $0, %rdx /* No framepointers needed */ |
244 | #else | 244 | #else |
245 | leaq 8(%rbp), %rdi | 245 | leaq 8(%rbp), %rdi |
246 | movq (%rbp), %rdx | 246 | movq (%rbp), %rdx |
247 | #endif | 247 | #endif |
248 | movq RIP(%rsp), %rsi | 248 | movq RIP(%rsp), %rsi |
249 | subq $MCOUNT_INSN_SIZE, %rsi | 249 | subq $MCOUNT_INSN_SIZE, %rsi |
250 | 250 | ||
251 | call prepare_ftrace_return | 251 | call prepare_ftrace_return |
252 | 252 | ||
253 | MCOUNT_RESTORE_FRAME | 253 | MCOUNT_RESTORE_FRAME |
254 | 254 | ||
255 | retq | 255 | retq |
256 | END(ftrace_graph_caller) | 256 | END(ftrace_graph_caller) |
257 | 257 | ||
258 | GLOBAL(return_to_handler) | 258 | GLOBAL(return_to_handler) |
259 | subq $24, %rsp | 259 | subq $24, %rsp |
260 | 260 | ||
261 | /* Save the return values */ | 261 | /* Save the return values */ |
262 | movq %rax, (%rsp) | 262 | movq %rax, (%rsp) |
263 | movq %rdx, 8(%rsp) | 263 | movq %rdx, 8(%rsp) |
264 | movq %rbp, %rdi | 264 | movq %rbp, %rdi |
265 | 265 | ||
266 | call ftrace_return_to_handler | 266 | call ftrace_return_to_handler |
267 | 267 | ||
268 | movq %rax, %rdi | 268 | movq %rax, %rdi |
269 | movq 8(%rsp), %rdx | 269 | movq 8(%rsp), %rdx |
270 | movq (%rsp), %rax | 270 | movq (%rsp), %rax |
271 | addq $24, %rsp | 271 | addq $24, %rsp |
272 | jmp *%rdi | 272 | jmp *%rdi |
273 | #endif | 273 | #endif |
274 | 274 | ||
275 | 275 | ||
276 | #ifndef CONFIG_PREEMPT | 276 | #ifndef CONFIG_PREEMPT |
277 | #define retint_kernel retint_restore_args | 277 | #define retint_kernel retint_restore_args |
278 | #endif | 278 | #endif |
279 | 279 | ||
280 | #ifdef CONFIG_PARAVIRT | 280 | #ifdef CONFIG_PARAVIRT |
281 | ENTRY(native_usergs_sysret64) | 281 | ENTRY(native_usergs_sysret64) |
282 | swapgs | 282 | swapgs |
283 | sysretq | 283 | sysretq |
284 | ENDPROC(native_usergs_sysret64) | 284 | ENDPROC(native_usergs_sysret64) |
285 | #endif /* CONFIG_PARAVIRT */ | 285 | #endif /* CONFIG_PARAVIRT */ |
286 | 286 | ||
287 | 287 | ||
288 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | 288 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET |
289 | #ifdef CONFIG_TRACE_IRQFLAGS | 289 | #ifdef CONFIG_TRACE_IRQFLAGS |
290 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | 290 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ |
291 | jnc 1f | 291 | jnc 1f |
292 | TRACE_IRQS_ON | 292 | TRACE_IRQS_ON |
293 | 1: | 293 | 1: |
294 | #endif | 294 | #endif |
295 | .endm | 295 | .endm |
296 | 296 | ||
297 | /* | 297 | /* |
298 | * When dynamic function tracer is enabled it will add a breakpoint | 298 | * When dynamic function tracer is enabled it will add a breakpoint |
299 | * to all locations that it is about to modify, sync CPUs, update | 299 | * to all locations that it is about to modify, sync CPUs, update |
300 | * all the code, sync CPUs, then remove the breakpoints. In this time | 300 | * all the code, sync CPUs, then remove the breakpoints. In this time |
301 | * if lockdep is enabled, it might jump back into the debug handler | 301 | * if lockdep is enabled, it might jump back into the debug handler |
302 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). | 302 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). |
303 | * | 303 | * |
304 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to | 304 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to |
305 | * make sure the stack pointer does not get reset back to the top | 305 | * make sure the stack pointer does not get reset back to the top |
306 | * of the debug stack, and instead just reuses the current stack. | 306 | * of the debug stack, and instead just reuses the current stack. |
307 | */ | 307 | */ |
308 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) | 308 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) |
309 | 309 | ||
310 | .macro TRACE_IRQS_OFF_DEBUG | 310 | .macro TRACE_IRQS_OFF_DEBUG |
311 | call debug_stack_set_zero | 311 | call debug_stack_set_zero |
312 | TRACE_IRQS_OFF | 312 | TRACE_IRQS_OFF |
313 | call debug_stack_reset | 313 | call debug_stack_reset |
314 | .endm | 314 | .endm |
315 | 315 | ||
316 | .macro TRACE_IRQS_ON_DEBUG | 316 | .macro TRACE_IRQS_ON_DEBUG |
317 | call debug_stack_set_zero | 317 | call debug_stack_set_zero |
318 | TRACE_IRQS_ON | 318 | TRACE_IRQS_ON |
319 | call debug_stack_reset | 319 | call debug_stack_reset |
320 | .endm | 320 | .endm |
321 | 321 | ||
322 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET | 322 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET |
323 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | 323 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ |
324 | jnc 1f | 324 | jnc 1f |
325 | TRACE_IRQS_ON_DEBUG | 325 | TRACE_IRQS_ON_DEBUG |
326 | 1: | 326 | 1: |
327 | .endm | 327 | .endm |
328 | 328 | ||
329 | #else | 329 | #else |
330 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF | 330 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF |
331 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON | 331 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON |
332 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ | 332 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ |
333 | #endif | 333 | #endif |
334 | 334 | ||
335 | /* | 335 | /* |
336 | * C code is not supposed to know about undefined top of stack. Every time | 336 | * C code is not supposed to know about undefined top of stack. Every time |
337 | * a C function with an pt_regs argument is called from the SYSCALL based | 337 | * a C function with an pt_regs argument is called from the SYSCALL based |
338 | * fast path FIXUP_TOP_OF_STACK is needed. | 338 | * fast path FIXUP_TOP_OF_STACK is needed. |
339 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs | 339 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs |
340 | * manipulation. | 340 | * manipulation. |
341 | */ | 341 | */ |
342 | 342 | ||
343 | /* %rsp:at FRAMEEND */ | 343 | /* %rsp:at FRAMEEND */ |
344 | .macro FIXUP_TOP_OF_STACK tmp offset=0 | 344 | .macro FIXUP_TOP_OF_STACK tmp offset=0 |
345 | movq PER_CPU_VAR(old_rsp),\tmp | 345 | movq PER_CPU_VAR(old_rsp),\tmp |
346 | movq \tmp,RSP+\offset(%rsp) | 346 | movq \tmp,RSP+\offset(%rsp) |
347 | movq $__USER_DS,SS+\offset(%rsp) | 347 | movq $__USER_DS,SS+\offset(%rsp) |
348 | movq $__USER_CS,CS+\offset(%rsp) | 348 | movq $__USER_CS,CS+\offset(%rsp) |
349 | movq $-1,RCX+\offset(%rsp) | 349 | movq $-1,RCX+\offset(%rsp) |
350 | movq R11+\offset(%rsp),\tmp /* get eflags */ | 350 | movq R11+\offset(%rsp),\tmp /* get eflags */ |
351 | movq \tmp,EFLAGS+\offset(%rsp) | 351 | movq \tmp,EFLAGS+\offset(%rsp) |
352 | .endm | 352 | .endm |
353 | 353 | ||
354 | .macro RESTORE_TOP_OF_STACK tmp offset=0 | 354 | .macro RESTORE_TOP_OF_STACK tmp offset=0 |
355 | movq RSP+\offset(%rsp),\tmp | 355 | movq RSP+\offset(%rsp),\tmp |
356 | movq \tmp,PER_CPU_VAR(old_rsp) | 356 | movq \tmp,PER_CPU_VAR(old_rsp) |
357 | movq EFLAGS+\offset(%rsp),\tmp | 357 | movq EFLAGS+\offset(%rsp),\tmp |
358 | movq \tmp,R11+\offset(%rsp) | 358 | movq \tmp,R11+\offset(%rsp) |
359 | .endm | 359 | .endm |
360 | 360 | ||
361 | .macro FAKE_STACK_FRAME child_rip | 361 | .macro FAKE_STACK_FRAME child_rip |
362 | /* push in order ss, rsp, eflags, cs, rip */ | 362 | /* push in order ss, rsp, eflags, cs, rip */ |
363 | xorl %eax, %eax | 363 | xorl %eax, %eax |
364 | pushq_cfi $__KERNEL_DS /* ss */ | 364 | pushq_cfi $__KERNEL_DS /* ss */ |
365 | /*CFI_REL_OFFSET ss,0*/ | 365 | /*CFI_REL_OFFSET ss,0*/ |
366 | pushq_cfi %rax /* rsp */ | 366 | pushq_cfi %rax /* rsp */ |
367 | CFI_REL_OFFSET rsp,0 | 367 | CFI_REL_OFFSET rsp,0 |
368 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ | 368 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ |
369 | /*CFI_REL_OFFSET rflags,0*/ | 369 | /*CFI_REL_OFFSET rflags,0*/ |
370 | pushq_cfi $__KERNEL_CS /* cs */ | 370 | pushq_cfi $__KERNEL_CS /* cs */ |
371 | /*CFI_REL_OFFSET cs,0*/ | 371 | /*CFI_REL_OFFSET cs,0*/ |
372 | pushq_cfi \child_rip /* rip */ | 372 | pushq_cfi \child_rip /* rip */ |
373 | CFI_REL_OFFSET rip,0 | 373 | CFI_REL_OFFSET rip,0 |
374 | pushq_cfi %rax /* orig rax */ | 374 | pushq_cfi %rax /* orig rax */ |
375 | .endm | 375 | .endm |
376 | 376 | ||
377 | .macro UNFAKE_STACK_FRAME | 377 | .macro UNFAKE_STACK_FRAME |
378 | addq $8*6, %rsp | 378 | addq $8*6, %rsp |
379 | CFI_ADJUST_CFA_OFFSET -(6*8) | 379 | CFI_ADJUST_CFA_OFFSET -(6*8) |
380 | .endm | 380 | .endm |
381 | 381 | ||
382 | /* | 382 | /* |
383 | * initial frame state for interrupts (and exceptions without error code) | 383 | * initial frame state for interrupts (and exceptions without error code) |
384 | */ | 384 | */ |
385 | .macro EMPTY_FRAME start=1 offset=0 | 385 | .macro EMPTY_FRAME start=1 offset=0 |
386 | .if \start | 386 | .if \start |
387 | CFI_STARTPROC simple | 387 | CFI_STARTPROC simple |
388 | CFI_SIGNAL_FRAME | 388 | CFI_SIGNAL_FRAME |
389 | CFI_DEF_CFA rsp,8+\offset | 389 | CFI_DEF_CFA rsp,8+\offset |
390 | .else | 390 | .else |
391 | CFI_DEF_CFA_OFFSET 8+\offset | 391 | CFI_DEF_CFA_OFFSET 8+\offset |
392 | .endif | 392 | .endif |
393 | .endm | 393 | .endm |
394 | 394 | ||
395 | /* | 395 | /* |
396 | * initial frame state for interrupts (and exceptions without error code) | 396 | * initial frame state for interrupts (and exceptions without error code) |
397 | */ | 397 | */ |
398 | .macro INTR_FRAME start=1 offset=0 | 398 | .macro INTR_FRAME start=1 offset=0 |
399 | EMPTY_FRAME \start, SS+8+\offset-RIP | 399 | EMPTY_FRAME \start, SS+8+\offset-RIP |
400 | /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ | 400 | /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ |
401 | CFI_REL_OFFSET rsp, RSP+\offset-RIP | 401 | CFI_REL_OFFSET rsp, RSP+\offset-RIP |
402 | /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ | 402 | /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ |
403 | /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ | 403 | /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ |
404 | CFI_REL_OFFSET rip, RIP+\offset-RIP | 404 | CFI_REL_OFFSET rip, RIP+\offset-RIP |
405 | .endm | 405 | .endm |
406 | 406 | ||
407 | /* | 407 | /* |
408 | * initial frame state for exceptions with error code (and interrupts | 408 | * initial frame state for exceptions with error code (and interrupts |
409 | * with vector already pushed) | 409 | * with vector already pushed) |
410 | */ | 410 | */ |
411 | .macro XCPT_FRAME start=1 offset=0 | 411 | .macro XCPT_FRAME start=1 offset=0 |
412 | INTR_FRAME \start, RIP+\offset-ORIG_RAX | 412 | INTR_FRAME \start, RIP+\offset-ORIG_RAX |
413 | /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ | 413 | /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ |
414 | .endm | 414 | .endm |
415 | 415 | ||
416 | /* | 416 | /* |
417 | * frame that enables calling into C. | 417 | * frame that enables calling into C. |
418 | */ | 418 | */ |
419 | .macro PARTIAL_FRAME start=1 offset=0 | 419 | .macro PARTIAL_FRAME start=1 offset=0 |
420 | XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET | 420 | XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET |
421 | CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET | 421 | CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET |
422 | CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET | 422 | CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET |
423 | CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET | 423 | CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET |
424 | CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET | 424 | CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET |
425 | CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET | 425 | CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET |
426 | CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET | 426 | CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET |
427 | CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET | 427 | CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET |
428 | CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET | 428 | CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET |
429 | CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET | 429 | CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET |
430 | .endm | 430 | .endm |
431 | 431 | ||
432 | /* | 432 | /* |
433 | * frame that enables passing a complete pt_regs to a C function. | 433 | * frame that enables passing a complete pt_regs to a C function. |
434 | */ | 434 | */ |
435 | .macro DEFAULT_FRAME start=1 offset=0 | 435 | .macro DEFAULT_FRAME start=1 offset=0 |
436 | PARTIAL_FRAME \start, R11+\offset-R15 | 436 | PARTIAL_FRAME \start, R11+\offset-R15 |
437 | CFI_REL_OFFSET rbx, RBX+\offset | 437 | CFI_REL_OFFSET rbx, RBX+\offset |
438 | CFI_REL_OFFSET rbp, RBP+\offset | 438 | CFI_REL_OFFSET rbp, RBP+\offset |
439 | CFI_REL_OFFSET r12, R12+\offset | 439 | CFI_REL_OFFSET r12, R12+\offset |
440 | CFI_REL_OFFSET r13, R13+\offset | 440 | CFI_REL_OFFSET r13, R13+\offset |
441 | CFI_REL_OFFSET r14, R14+\offset | 441 | CFI_REL_OFFSET r14, R14+\offset |
442 | CFI_REL_OFFSET r15, R15+\offset | 442 | CFI_REL_OFFSET r15, R15+\offset |
443 | .endm | 443 | .endm |
444 | 444 | ||
445 | /* save partial stack frame */ | 445 | /* save partial stack frame */ |
446 | .macro SAVE_ARGS_IRQ | 446 | .macro SAVE_ARGS_IRQ |
447 | cld | 447 | cld |
448 | /* start from rbp in pt_regs and jump over */ | 448 | /* start from rbp in pt_regs and jump over */ |
449 | movq_cfi rdi, (RDI-RBP) | 449 | movq_cfi rdi, (RDI-RBP) |
450 | movq_cfi rsi, (RSI-RBP) | 450 | movq_cfi rsi, (RSI-RBP) |
451 | movq_cfi rdx, (RDX-RBP) | 451 | movq_cfi rdx, (RDX-RBP) |
452 | movq_cfi rcx, (RCX-RBP) | 452 | movq_cfi rcx, (RCX-RBP) |
453 | movq_cfi rax, (RAX-RBP) | 453 | movq_cfi rax, (RAX-RBP) |
454 | movq_cfi r8, (R8-RBP) | 454 | movq_cfi r8, (R8-RBP) |
455 | movq_cfi r9, (R9-RBP) | 455 | movq_cfi r9, (R9-RBP) |
456 | movq_cfi r10, (R10-RBP) | 456 | movq_cfi r10, (R10-RBP) |
457 | movq_cfi r11, (R11-RBP) | 457 | movq_cfi r11, (R11-RBP) |
458 | 458 | ||
459 | /* Save rbp so that we can unwind from get_irq_regs() */ | 459 | /* Save rbp so that we can unwind from get_irq_regs() */ |
460 | movq_cfi rbp, 0 | 460 | movq_cfi rbp, 0 |
461 | 461 | ||
462 | /* Save previous stack value */ | 462 | /* Save previous stack value */ |
463 | movq %rsp, %rsi | 463 | movq %rsp, %rsi |
464 | 464 | ||
465 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ | 465 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ |
466 | testl $3, CS-RBP(%rsi) | 466 | testl $3, CS-RBP(%rsi) |
467 | je 1f | 467 | je 1f |
468 | SWAPGS | 468 | SWAPGS |
469 | /* | 469 | /* |
470 | * irq_count is used to check if a CPU is already on an interrupt stack | 470 | * irq_count is used to check if a CPU is already on an interrupt stack |
471 | * or not. While this is essentially redundant with preempt_count it is | 471 | * or not. While this is essentially redundant with preempt_count it is |
472 | * a little cheaper to use a separate counter in the PDA (short of | 472 | * a little cheaper to use a separate counter in the PDA (short of |
473 | * moving irq_enter into assembly, which would be too much work) | 473 | * moving irq_enter into assembly, which would be too much work) |
474 | */ | 474 | */ |
475 | 1: incl PER_CPU_VAR(irq_count) | 475 | 1: incl PER_CPU_VAR(irq_count) |
476 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp | 476 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp |
477 | CFI_DEF_CFA_REGISTER rsi | 477 | CFI_DEF_CFA_REGISTER rsi |
478 | 478 | ||
479 | /* Store previous stack value */ | 479 | /* Store previous stack value */ |
480 | pushq %rsi | 480 | pushq %rsi |
481 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ | 481 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ |
482 | 0x77 /* DW_OP_breg7 */, 0, \ | 482 | 0x77 /* DW_OP_breg7 */, 0, \ |
483 | 0x06 /* DW_OP_deref */, \ | 483 | 0x06 /* DW_OP_deref */, \ |
484 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ | 484 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ |
485 | 0x22 /* DW_OP_plus */ | 485 | 0x22 /* DW_OP_plus */ |
486 | /* We entered an interrupt context - irqs are off: */ | 486 | /* We entered an interrupt context - irqs are off: */ |
487 | TRACE_IRQS_OFF | 487 | TRACE_IRQS_OFF |
488 | .endm | 488 | .endm |
489 | 489 | ||
490 | ENTRY(save_rest) | 490 | ENTRY(save_rest) |
491 | PARTIAL_FRAME 1 (REST_SKIP+8) | 491 | PARTIAL_FRAME 1 (REST_SKIP+8) |
492 | movq 5*8+16(%rsp), %r11 /* save return address */ | 492 | movq 5*8+16(%rsp), %r11 /* save return address */ |
493 | movq_cfi rbx, RBX+16 | 493 | movq_cfi rbx, RBX+16 |
494 | movq_cfi rbp, RBP+16 | 494 | movq_cfi rbp, RBP+16 |
495 | movq_cfi r12, R12+16 | 495 | movq_cfi r12, R12+16 |
496 | movq_cfi r13, R13+16 | 496 | movq_cfi r13, R13+16 |
497 | movq_cfi r14, R14+16 | 497 | movq_cfi r14, R14+16 |
498 | movq_cfi r15, R15+16 | 498 | movq_cfi r15, R15+16 |
499 | movq %r11, 8(%rsp) /* return address */ | 499 | movq %r11, 8(%rsp) /* return address */ |
500 | FIXUP_TOP_OF_STACK %r11, 16 | 500 | FIXUP_TOP_OF_STACK %r11, 16 |
501 | ret | 501 | ret |
502 | CFI_ENDPROC | 502 | CFI_ENDPROC |
503 | END(save_rest) | 503 | END(save_rest) |
504 | 504 | ||
505 | /* save complete stack frame */ | 505 | /* save complete stack frame */ |
506 | .pushsection .kprobes.text, "ax" | 506 | .pushsection .kprobes.text, "ax" |
507 | ENTRY(save_paranoid) | 507 | ENTRY(save_paranoid) |
508 | XCPT_FRAME 1 RDI+8 | 508 | XCPT_FRAME 1 RDI+8 |
509 | cld | 509 | cld |
510 | movq_cfi rdi, RDI+8 | 510 | movq_cfi rdi, RDI+8 |
511 | movq_cfi rsi, RSI+8 | 511 | movq_cfi rsi, RSI+8 |
512 | movq_cfi rdx, RDX+8 | 512 | movq_cfi rdx, RDX+8 |
513 | movq_cfi rcx, RCX+8 | 513 | movq_cfi rcx, RCX+8 |
514 | movq_cfi rax, RAX+8 | 514 | movq_cfi rax, RAX+8 |
515 | movq_cfi r8, R8+8 | 515 | movq_cfi r8, R8+8 |
516 | movq_cfi r9, R9+8 | 516 | movq_cfi r9, R9+8 |
517 | movq_cfi r10, R10+8 | 517 | movq_cfi r10, R10+8 |
518 | movq_cfi r11, R11+8 | 518 | movq_cfi r11, R11+8 |
519 | movq_cfi rbx, RBX+8 | 519 | movq_cfi rbx, RBX+8 |
520 | movq_cfi rbp, RBP+8 | 520 | movq_cfi rbp, RBP+8 |
521 | movq_cfi r12, R12+8 | 521 | movq_cfi r12, R12+8 |
522 | movq_cfi r13, R13+8 | 522 | movq_cfi r13, R13+8 |
523 | movq_cfi r14, R14+8 | 523 | movq_cfi r14, R14+8 |
524 | movq_cfi r15, R15+8 | 524 | movq_cfi r15, R15+8 |
525 | movl $1,%ebx | 525 | movl $1,%ebx |
526 | movl $MSR_GS_BASE,%ecx | 526 | movl $MSR_GS_BASE,%ecx |
527 | rdmsr | 527 | rdmsr |
528 | testl %edx,%edx | 528 | testl %edx,%edx |
529 | js 1f /* negative -> in kernel */ | 529 | js 1f /* negative -> in kernel */ |
530 | SWAPGS | 530 | SWAPGS |
531 | xorl %ebx,%ebx | 531 | xorl %ebx,%ebx |
532 | 1: ret | 532 | 1: ret |
533 | CFI_ENDPROC | 533 | CFI_ENDPROC |
534 | END(save_paranoid) | 534 | END(save_paranoid) |
535 | .popsection | 535 | .popsection |
536 | 536 | ||
537 | /* | 537 | /* |
538 | * A newly forked process directly context switches into this address. | 538 | * A newly forked process directly context switches into this address. |
539 | * | 539 | * |
540 | * rdi: prev task we switched from | 540 | * rdi: prev task we switched from |
541 | */ | 541 | */ |
542 | ENTRY(ret_from_fork) | 542 | ENTRY(ret_from_fork) |
543 | DEFAULT_FRAME | 543 | DEFAULT_FRAME |
544 | 544 | ||
545 | LOCK ; btr $TIF_FORK,TI_flags(%r8) | 545 | LOCK ; btr $TIF_FORK,TI_flags(%r8) |
546 | 546 | ||
547 | pushq_cfi $0x0002 | 547 | pushq_cfi $0x0002 |
548 | popfq_cfi # reset kernel eflags | 548 | popfq_cfi # reset kernel eflags |
549 | 549 | ||
550 | call schedule_tail # rdi: 'prev' task parameter | 550 | call schedule_tail # rdi: 'prev' task parameter |
551 | 551 | ||
552 | GET_THREAD_INFO(%rcx) | 552 | GET_THREAD_INFO(%rcx) |
553 | 553 | ||
554 | RESTORE_REST | 554 | RESTORE_REST |
555 | 555 | ||
556 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | 556 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? |
557 | jz 1f | 557 | jz 1f |
558 | 558 | ||
559 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | 559 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET |
560 | jnz int_ret_from_sys_call | 560 | jnz int_ret_from_sys_call |
561 | 561 | ||
562 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET | 562 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET |
563 | jmp ret_from_sys_call # go to the SYSRET fastpath | 563 | jmp ret_from_sys_call # go to the SYSRET fastpath |
564 | 564 | ||
565 | 1: | 565 | 1: |
566 | subq $REST_SKIP, %rsp # leave space for volatiles | 566 | subq $REST_SKIP, %rsp # leave space for volatiles |
567 | CFI_ADJUST_CFA_OFFSET REST_SKIP | 567 | CFI_ADJUST_CFA_OFFSET REST_SKIP |
568 | movq %rbp, %rdi | 568 | movq %rbp, %rdi |
569 | call *%rbx | 569 | call *%rbx |
570 | movl $0, RAX(%rsp) | 570 | movl $0, RAX(%rsp) |
571 | RESTORE_REST | 571 | RESTORE_REST |
572 | jmp int_ret_from_sys_call | 572 | jmp int_ret_from_sys_call |
573 | CFI_ENDPROC | 573 | CFI_ENDPROC |
574 | END(ret_from_fork) | 574 | END(ret_from_fork) |
575 | 575 | ||
576 | /* | 576 | /* |
577 | * System call entry. Up to 6 arguments in registers are supported. | 577 | * System call entry. Up to 6 arguments in registers are supported. |
578 | * | 578 | * |
579 | * SYSCALL does not save anything on the stack and does not change the | 579 | * SYSCALL does not save anything on the stack and does not change the |
580 | * stack pointer. However, it does mask the flags register for us, so | 580 | * stack pointer. However, it does mask the flags register for us, so |
581 | * CLD and CLAC are not needed. | 581 | * CLD and CLAC are not needed. |
582 | */ | 582 | */ |
583 | 583 | ||
584 | /* | 584 | /* |
585 | * Register setup: | 585 | * Register setup: |
586 | * rax system call number | 586 | * rax system call number |
587 | * rdi arg0 | 587 | * rdi arg0 |
588 | * rcx return address for syscall/sysret, C arg3 | 588 | * rcx return address for syscall/sysret, C arg3 |
589 | * rsi arg1 | 589 | * rsi arg1 |
590 | * rdx arg2 | 590 | * rdx arg2 |
591 | * r10 arg3 (--> moved to rcx for C) | 591 | * r10 arg3 (--> moved to rcx for C) |
592 | * r8 arg4 | 592 | * r8 arg4 |
593 | * r9 arg5 | 593 | * r9 arg5 |
594 | * r11 eflags for syscall/sysret, temporary for C | 594 | * r11 eflags for syscall/sysret, temporary for C |
595 | * r12-r15,rbp,rbx saved by C code, not touched. | 595 | * r12-r15,rbp,rbx saved by C code, not touched. |
596 | * | 596 | * |
597 | * Interrupts are off on entry. | 597 | * Interrupts are off on entry. |
598 | * Only called from user space. | 598 | * Only called from user space. |
599 | * | 599 | * |
600 | * XXX if we had a free scratch register we could save the RSP into the stack frame | 600 | * XXX if we had a free scratch register we could save the RSP into the stack frame |
601 | * and report it properly in ps. Unfortunately we haven't. | 601 | * and report it properly in ps. Unfortunately we haven't. |
602 | * | 602 | * |
603 | * When user can change the frames always force IRET. That is because | 603 | * When user can change the frames always force IRET. That is because |
604 | * it deals with uncanonical addresses better. SYSRET has trouble | 604 | * it deals with uncanonical addresses better. SYSRET has trouble |
605 | * with them due to bugs in both AMD and Intel CPUs. | 605 | * with them due to bugs in both AMD and Intel CPUs. |
606 | */ | 606 | */ |
607 | 607 | ||
608 | ENTRY(system_call) | 608 | ENTRY(system_call) |
609 | CFI_STARTPROC simple | 609 | CFI_STARTPROC simple |
610 | CFI_SIGNAL_FRAME | 610 | CFI_SIGNAL_FRAME |
611 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET | 611 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET |
612 | CFI_REGISTER rip,rcx | 612 | CFI_REGISTER rip,rcx |
613 | /*CFI_REGISTER rflags,r11*/ | 613 | /*CFI_REGISTER rflags,r11*/ |
614 | SWAPGS_UNSAFE_STACK | 614 | SWAPGS_UNSAFE_STACK |
615 | /* | 615 | /* |
616 | * A hypervisor implementation might want to use a label | 616 | * A hypervisor implementation might want to use a label |
617 | * after the swapgs, so that it can do the swapgs | 617 | * after the swapgs, so that it can do the swapgs |
618 | * for the guest and jump here on syscall. | 618 | * for the guest and jump here on syscall. |
619 | */ | 619 | */ |
620 | GLOBAL(system_call_after_swapgs) | 620 | GLOBAL(system_call_after_swapgs) |
621 | 621 | ||
622 | movq %rsp,PER_CPU_VAR(old_rsp) | 622 | movq %rsp,PER_CPU_VAR(old_rsp) |
623 | movq PER_CPU_VAR(kernel_stack),%rsp | 623 | movq PER_CPU_VAR(kernel_stack),%rsp |
624 | /* | 624 | /* |
625 | * No need to follow this irqs off/on section - it's straight | 625 | * No need to follow this irqs off/on section - it's straight |
626 | * and short: | 626 | * and short: |
627 | */ | 627 | */ |
628 | ENABLE_INTERRUPTS(CLBR_NONE) | 628 | ENABLE_INTERRUPTS(CLBR_NONE) |
629 | SAVE_ARGS 8,0 | 629 | SAVE_ARGS 8,0 |
630 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 630 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
631 | movq %rcx,RIP-ARGOFFSET(%rsp) | 631 | movq %rcx,RIP-ARGOFFSET(%rsp) |
632 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 632 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
633 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 633 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
634 | jnz tracesys | 634 | jnz tracesys |
635 | system_call_fastpath: | 635 | system_call_fastpath: |
636 | #if __SYSCALL_MASK == ~0 | 636 | #if __SYSCALL_MASK == ~0 |
637 | cmpq $__NR_syscall_max,%rax | 637 | cmpq $__NR_syscall_max,%rax |
638 | #else | 638 | #else |
639 | andl $__SYSCALL_MASK,%eax | 639 | andl $__SYSCALL_MASK,%eax |
640 | cmpl $__NR_syscall_max,%eax | 640 | cmpl $__NR_syscall_max,%eax |
641 | #endif | 641 | #endif |
642 | ja badsys | 642 | ja badsys |
643 | movq %r10,%rcx | 643 | movq %r10,%rcx |
644 | call *sys_call_table(,%rax,8) # XXX: rip relative | 644 | call *sys_call_table(,%rax,8) # XXX: rip relative |
645 | movq %rax,RAX-ARGOFFSET(%rsp) | 645 | movq %rax,RAX-ARGOFFSET(%rsp) |
646 | /* | 646 | /* |
647 | * Syscall return path ending with SYSRET (fast path) | 647 | * Syscall return path ending with SYSRET (fast path) |
648 | * Has incomplete stack frame and undefined top of stack. | 648 | * Has incomplete stack frame and undefined top of stack. |
649 | */ | 649 | */ |
650 | ret_from_sys_call: | 650 | ret_from_sys_call: |
651 | movl $_TIF_ALLWORK_MASK,%edi | 651 | movl $_TIF_ALLWORK_MASK,%edi |
652 | /* edi: flagmask */ | 652 | /* edi: flagmask */ |
653 | sysret_check: | 653 | sysret_check: |
654 | LOCKDEP_SYS_EXIT | 654 | LOCKDEP_SYS_EXIT |
655 | DISABLE_INTERRUPTS(CLBR_NONE) | 655 | DISABLE_INTERRUPTS(CLBR_NONE) |
656 | TRACE_IRQS_OFF | 656 | TRACE_IRQS_OFF |
657 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx | 657 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx |
658 | andl %edi,%edx | 658 | andl %edi,%edx |
659 | jnz sysret_careful | 659 | jnz sysret_careful |
660 | CFI_REMEMBER_STATE | 660 | CFI_REMEMBER_STATE |
661 | /* | 661 | /* |
662 | * sysretq will re-enable interrupts: | 662 | * sysretq will re-enable interrupts: |
663 | */ | 663 | */ |
664 | TRACE_IRQS_ON | 664 | TRACE_IRQS_ON |
665 | movq RIP-ARGOFFSET(%rsp),%rcx | 665 | movq RIP-ARGOFFSET(%rsp),%rcx |
666 | CFI_REGISTER rip,rcx | 666 | CFI_REGISTER rip,rcx |
667 | RESTORE_ARGS 1,-ARG_SKIP,0 | 667 | RESTORE_ARGS 1,-ARG_SKIP,0 |
668 | /*CFI_REGISTER rflags,r11*/ | 668 | /*CFI_REGISTER rflags,r11*/ |
669 | movq PER_CPU_VAR(old_rsp), %rsp | 669 | movq PER_CPU_VAR(old_rsp), %rsp |
670 | USERGS_SYSRET64 | 670 | USERGS_SYSRET64 |
671 | 671 | ||
672 | CFI_RESTORE_STATE | 672 | CFI_RESTORE_STATE |
673 | /* Handle reschedules */ | 673 | /* Handle reschedules */ |
674 | /* edx: work, edi: workmask */ | 674 | /* edx: work, edi: workmask */ |
675 | sysret_careful: | 675 | sysret_careful: |
676 | bt $TIF_NEED_RESCHED,%edx | 676 | bt $TIF_NEED_RESCHED,%edx |
677 | jnc sysret_signal | 677 | jnc sysret_signal |
678 | TRACE_IRQS_ON | 678 | TRACE_IRQS_ON |
679 | ENABLE_INTERRUPTS(CLBR_NONE) | 679 | ENABLE_INTERRUPTS(CLBR_NONE) |
680 | pushq_cfi %rdi | 680 | pushq_cfi %rdi |
681 | SCHEDULE_USER | 681 | SCHEDULE_USER |
682 | popq_cfi %rdi | 682 | popq_cfi %rdi |
683 | jmp sysret_check | 683 | jmp sysret_check |
684 | 684 | ||
685 | /* Handle a signal */ | 685 | /* Handle a signal */ |
686 | sysret_signal: | 686 | sysret_signal: |
687 | TRACE_IRQS_ON | 687 | TRACE_IRQS_ON |
688 | ENABLE_INTERRUPTS(CLBR_NONE) | 688 | ENABLE_INTERRUPTS(CLBR_NONE) |
689 | #ifdef CONFIG_AUDITSYSCALL | 689 | #ifdef CONFIG_AUDITSYSCALL |
690 | bt $TIF_SYSCALL_AUDIT,%edx | 690 | bt $TIF_SYSCALL_AUDIT,%edx |
691 | jc sysret_audit | 691 | jc sysret_audit |
692 | #endif | 692 | #endif |
693 | /* | 693 | /* |
694 | * We have a signal, or exit tracing or single-step. | 694 | * We have a signal, or exit tracing or single-step. |
695 | * These all wind up with the iret return path anyway, | 695 | * These all wind up with the iret return path anyway, |
696 | * so just join that path right now. | 696 | * so just join that path right now. |
697 | */ | 697 | */ |
698 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET | 698 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET |
699 | jmp int_check_syscall_exit_work | 699 | jmp int_check_syscall_exit_work |
700 | 700 | ||
701 | badsys: | 701 | badsys: |
702 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 702 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
703 | jmp ret_from_sys_call | 703 | jmp ret_from_sys_call |
704 | 704 | ||
705 | #ifdef CONFIG_AUDITSYSCALL | 705 | #ifdef CONFIG_AUDITSYSCALL |
706 | /* | 706 | /* |
707 | * Fast path for syscall audit without full syscall trace. | 707 | * Fast path for syscall audit without full syscall trace. |
708 | * We just call __audit_syscall_entry() directly, and then | 708 | * We just call __audit_syscall_entry() directly, and then |
709 | * jump back to the normal fast path. | 709 | * jump back to the normal fast path. |
710 | */ | 710 | */ |
711 | auditsys: | 711 | auditsys: |
712 | movq %r10,%r9 /* 6th arg: 4th syscall arg */ | 712 | movq %r10,%r9 /* 6th arg: 4th syscall arg */ |
713 | movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ | 713 | movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ |
714 | movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ | 714 | movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ |
715 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | 715 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ |
716 | movq %rax,%rsi /* 2nd arg: syscall number */ | 716 | movq %rax,%rsi /* 2nd arg: syscall number */ |
717 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | 717 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ |
718 | call __audit_syscall_entry | 718 | call __audit_syscall_entry |
719 | LOAD_ARGS 0 /* reload call-clobbered registers */ | 719 | LOAD_ARGS 0 /* reload call-clobbered registers */ |
720 | jmp system_call_fastpath | 720 | jmp system_call_fastpath |
721 | 721 | ||
722 | /* | 722 | /* |
723 | * Return fast path for syscall audit. Call __audit_syscall_exit() | 723 | * Return fast path for syscall audit. Call __audit_syscall_exit() |
724 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | 724 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT |
725 | * masked off. | 725 | * masked off. |
726 | */ | 726 | */ |
727 | sysret_audit: | 727 | sysret_audit: |
728 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ | 728 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ |
729 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ | 729 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ |
730 | setbe %al /* 1 if so, 0 if not */ | 730 | setbe %al /* 1 if so, 0 if not */ |
731 | movzbl %al,%edi /* zero-extend that into %edi */ | 731 | movzbl %al,%edi /* zero-extend that into %edi */ |
732 | call __audit_syscall_exit | 732 | call __audit_syscall_exit |
733 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 733 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
734 | jmp sysret_check | 734 | jmp sysret_check |
735 | #endif /* CONFIG_AUDITSYSCALL */ | 735 | #endif /* CONFIG_AUDITSYSCALL */ |
736 | 736 | ||
737 | /* Do syscall tracing */ | 737 | /* Do syscall tracing */ |
738 | tracesys: | 738 | tracesys: |
739 | #ifdef CONFIG_AUDITSYSCALL | 739 | #ifdef CONFIG_AUDITSYSCALL |
740 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 740 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
741 | jz auditsys | 741 | jz auditsys |
742 | #endif | 742 | #endif |
743 | SAVE_REST | 743 | SAVE_REST |
744 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 744 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
745 | FIXUP_TOP_OF_STACK %rdi | 745 | FIXUP_TOP_OF_STACK %rdi |
746 | movq %rsp,%rdi | 746 | movq %rsp,%rdi |
747 | call syscall_trace_enter | 747 | call syscall_trace_enter |
748 | /* | 748 | /* |
749 | * Reload arg registers from stack in case ptrace changed them. | 749 | * Reload arg registers from stack in case ptrace changed them. |
750 | * We don't reload %rax because syscall_trace_enter() returned | 750 | * We don't reload %rax because syscall_trace_enter() returned |
751 | * the value it wants us to use in the table lookup. | 751 | * the value it wants us to use in the table lookup. |
752 | */ | 752 | */ |
753 | LOAD_ARGS ARGOFFSET, 1 | 753 | LOAD_ARGS ARGOFFSET, 1 |
754 | RESTORE_REST | 754 | RESTORE_REST |
755 | #if __SYSCALL_MASK == ~0 | 755 | #if __SYSCALL_MASK == ~0 |
756 | cmpq $__NR_syscall_max,%rax | 756 | cmpq $__NR_syscall_max,%rax |
757 | #else | 757 | #else |
758 | andl $__SYSCALL_MASK,%eax | 758 | andl $__SYSCALL_MASK,%eax |
759 | cmpl $__NR_syscall_max,%eax | 759 | cmpl $__NR_syscall_max,%eax |
760 | #endif | 760 | #endif |
761 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ | 761 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ |
762 | movq %r10,%rcx /* fixup for C */ | 762 | movq %r10,%rcx /* fixup for C */ |
763 | call *sys_call_table(,%rax,8) | 763 | call *sys_call_table(,%rax,8) |
764 | movq %rax,RAX-ARGOFFSET(%rsp) | 764 | movq %rax,RAX-ARGOFFSET(%rsp) |
765 | /* Use IRET because user could have changed frame */ | 765 | /* Use IRET because user could have changed frame */ |
766 | 766 | ||
767 | /* | 767 | /* |
768 | * Syscall return path ending with IRET. | 768 | * Syscall return path ending with IRET. |
769 | * Has correct top of stack, but partial stack frame. | 769 | * Has correct top of stack, but partial stack frame. |
770 | */ | 770 | */ |
771 | GLOBAL(int_ret_from_sys_call) | 771 | GLOBAL(int_ret_from_sys_call) |
772 | DISABLE_INTERRUPTS(CLBR_NONE) | 772 | DISABLE_INTERRUPTS(CLBR_NONE) |
773 | TRACE_IRQS_OFF | 773 | TRACE_IRQS_OFF |
774 | movl $_TIF_ALLWORK_MASK,%edi | 774 | movl $_TIF_ALLWORK_MASK,%edi |
775 | /* edi: mask to check */ | 775 | /* edi: mask to check */ |
776 | GLOBAL(int_with_check) | 776 | GLOBAL(int_with_check) |
777 | LOCKDEP_SYS_EXIT_IRQ | 777 | LOCKDEP_SYS_EXIT_IRQ |
778 | GET_THREAD_INFO(%rcx) | 778 | GET_THREAD_INFO(%rcx) |
779 | movl TI_flags(%rcx),%edx | 779 | movl TI_flags(%rcx),%edx |
780 | andl %edi,%edx | 780 | andl %edi,%edx |
781 | jnz int_careful | 781 | jnz int_careful |
782 | andl $~TS_COMPAT,TI_status(%rcx) | 782 | andl $~TS_COMPAT,TI_status(%rcx) |
783 | jmp retint_swapgs | 783 | jmp retint_swapgs |
784 | 784 | ||
785 | /* Either reschedule or signal or syscall exit tracking needed. */ | 785 | /* Either reschedule or signal or syscall exit tracking needed. */ |
786 | /* First do a reschedule test. */ | 786 | /* First do a reschedule test. */ |
787 | /* edx: work, edi: workmask */ | 787 | /* edx: work, edi: workmask */ |
788 | int_careful: | 788 | int_careful: |
789 | bt $TIF_NEED_RESCHED,%edx | 789 | bt $TIF_NEED_RESCHED,%edx |
790 | jnc int_very_careful | 790 | jnc int_very_careful |
791 | TRACE_IRQS_ON | 791 | TRACE_IRQS_ON |
792 | ENABLE_INTERRUPTS(CLBR_NONE) | 792 | ENABLE_INTERRUPTS(CLBR_NONE) |
793 | pushq_cfi %rdi | 793 | pushq_cfi %rdi |
794 | SCHEDULE_USER | 794 | SCHEDULE_USER |
795 | popq_cfi %rdi | 795 | popq_cfi %rdi |
796 | DISABLE_INTERRUPTS(CLBR_NONE) | 796 | DISABLE_INTERRUPTS(CLBR_NONE) |
797 | TRACE_IRQS_OFF | 797 | TRACE_IRQS_OFF |
798 | jmp int_with_check | 798 | jmp int_with_check |
799 | 799 | ||
800 | /* handle signals and tracing -- both require a full stack frame */ | 800 | /* handle signals and tracing -- both require a full stack frame */ |
801 | int_very_careful: | 801 | int_very_careful: |
802 | TRACE_IRQS_ON | 802 | TRACE_IRQS_ON |
803 | ENABLE_INTERRUPTS(CLBR_NONE) | 803 | ENABLE_INTERRUPTS(CLBR_NONE) |
804 | int_check_syscall_exit_work: | 804 | int_check_syscall_exit_work: |
805 | SAVE_REST | 805 | SAVE_REST |
806 | /* Check for syscall exit trace */ | 806 | /* Check for syscall exit trace */ |
807 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 807 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
808 | jz int_signal | 808 | jz int_signal |
809 | pushq_cfi %rdi | 809 | pushq_cfi %rdi |
810 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | 810 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
811 | call syscall_trace_leave | 811 | call syscall_trace_leave |
812 | popq_cfi %rdi | 812 | popq_cfi %rdi |
813 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi | 813 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
814 | jmp int_restore_rest | 814 | jmp int_restore_rest |
815 | 815 | ||
816 | int_signal: | 816 | int_signal: |
817 | testl $_TIF_DO_NOTIFY_MASK,%edx | 817 | testl $_TIF_DO_NOTIFY_MASK,%edx |
818 | jz 1f | 818 | jz 1f |
819 | movq %rsp,%rdi # &ptregs -> arg1 | 819 | movq %rsp,%rdi # &ptregs -> arg1 |
820 | xorl %esi,%esi # oldset -> arg2 | 820 | xorl %esi,%esi # oldset -> arg2 |
821 | call do_notify_resume | 821 | call do_notify_resume |
822 | 1: movl $_TIF_WORK_MASK,%edi | 822 | 1: movl $_TIF_WORK_MASK,%edi |
823 | int_restore_rest: | 823 | int_restore_rest: |
824 | RESTORE_REST | 824 | RESTORE_REST |
825 | DISABLE_INTERRUPTS(CLBR_NONE) | 825 | DISABLE_INTERRUPTS(CLBR_NONE) |
826 | TRACE_IRQS_OFF | 826 | TRACE_IRQS_OFF |
827 | jmp int_with_check | 827 | jmp int_with_check |
828 | CFI_ENDPROC | 828 | CFI_ENDPROC |
829 | END(system_call) | 829 | END(system_call) |
830 | 830 | ||
831 | /* | 831 | /* |
832 | * Certain special system calls that need to save a complete full stack frame. | 832 | * Certain special system calls that need to save a complete full stack frame. |
833 | */ | 833 | */ |
834 | .macro PTREGSCALL label,func,arg | 834 | .macro PTREGSCALL label,func,arg |
835 | ENTRY(\label) | 835 | ENTRY(\label) |
836 | PARTIAL_FRAME 1 8 /* offset 8: return address */ | 836 | PARTIAL_FRAME 1 8 /* offset 8: return address */ |
837 | subq $REST_SKIP, %rsp | 837 | subq $REST_SKIP, %rsp |
838 | CFI_ADJUST_CFA_OFFSET REST_SKIP | 838 | CFI_ADJUST_CFA_OFFSET REST_SKIP |
839 | call save_rest | 839 | call save_rest |
840 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | 840 | DEFAULT_FRAME 0 8 /* offset 8: return address */ |
841 | leaq 8(%rsp), \arg /* pt_regs pointer */ | 841 | leaq 8(%rsp), \arg /* pt_regs pointer */ |
842 | call \func | 842 | call \func |
843 | jmp ptregscall_common | 843 | jmp ptregscall_common |
844 | CFI_ENDPROC | 844 | CFI_ENDPROC |
845 | END(\label) | 845 | END(\label) |
846 | .endm | 846 | .endm |
847 | 847 | ||
848 | .macro FORK_LIKE func | 848 | .macro FORK_LIKE func |
849 | ENTRY(stub_\func) | 849 | ENTRY(stub_\func) |
850 | CFI_STARTPROC | 850 | CFI_STARTPROC |
851 | popq %r11 /* save return address */ | 851 | popq %r11 /* save return address */ |
852 | PARTIAL_FRAME 0 | 852 | PARTIAL_FRAME 0 |
853 | SAVE_REST | 853 | SAVE_REST |
854 | pushq %r11 /* put it back on stack */ | 854 | pushq %r11 /* put it back on stack */ |
855 | FIXUP_TOP_OF_STACK %r11, 8 | 855 | FIXUP_TOP_OF_STACK %r11, 8 |
856 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | 856 | DEFAULT_FRAME 0 8 /* offset 8: return address */ |
857 | call sys_\func | 857 | call sys_\func |
858 | RESTORE_TOP_OF_STACK %r11, 8 | 858 | RESTORE_TOP_OF_STACK %r11, 8 |
859 | ret $REST_SKIP /* pop extended registers */ | 859 | ret $REST_SKIP /* pop extended registers */ |
860 | CFI_ENDPROC | 860 | CFI_ENDPROC |
861 | END(stub_\func) | 861 | END(stub_\func) |
862 | .endm | 862 | .endm |
863 | 863 | ||
864 | FORK_LIKE clone | 864 | FORK_LIKE clone |
865 | FORK_LIKE fork | 865 | FORK_LIKE fork |
866 | FORK_LIKE vfork | 866 | FORK_LIKE vfork |
867 | PTREGSCALL stub_iopl, sys_iopl, %rsi | 867 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
868 | 868 | ||
869 | ENTRY(ptregscall_common) | 869 | ENTRY(ptregscall_common) |
870 | DEFAULT_FRAME 1 8 /* offset 8: return address */ | 870 | DEFAULT_FRAME 1 8 /* offset 8: return address */ |
871 | RESTORE_TOP_OF_STACK %r11, 8 | 871 | RESTORE_TOP_OF_STACK %r11, 8 |
872 | movq_cfi_restore R15+8, r15 | 872 | movq_cfi_restore R15+8, r15 |
873 | movq_cfi_restore R14+8, r14 | 873 | movq_cfi_restore R14+8, r14 |
874 | movq_cfi_restore R13+8, r13 | 874 | movq_cfi_restore R13+8, r13 |
875 | movq_cfi_restore R12+8, r12 | 875 | movq_cfi_restore R12+8, r12 |
876 | movq_cfi_restore RBP+8, rbp | 876 | movq_cfi_restore RBP+8, rbp |
877 | movq_cfi_restore RBX+8, rbx | 877 | movq_cfi_restore RBX+8, rbx |
878 | ret $REST_SKIP /* pop extended registers */ | 878 | ret $REST_SKIP /* pop extended registers */ |
879 | CFI_ENDPROC | 879 | CFI_ENDPROC |
880 | END(ptregscall_common) | 880 | END(ptregscall_common) |
881 | 881 | ||
882 | ENTRY(stub_execve) | 882 | ENTRY(stub_execve) |
883 | CFI_STARTPROC | 883 | CFI_STARTPROC |
884 | addq $8, %rsp | 884 | addq $8, %rsp |
885 | PARTIAL_FRAME 0 | 885 | PARTIAL_FRAME 0 |
886 | SAVE_REST | 886 | SAVE_REST |
887 | FIXUP_TOP_OF_STACK %r11 | 887 | FIXUP_TOP_OF_STACK %r11 |
888 | call sys_execve | 888 | call sys_execve |
889 | RESTORE_TOP_OF_STACK %r11 | 889 | RESTORE_TOP_OF_STACK %r11 |
890 | movq %rax,RAX(%rsp) | 890 | movq %rax,RAX(%rsp) |
891 | RESTORE_REST | 891 | RESTORE_REST |
892 | jmp int_ret_from_sys_call | 892 | jmp int_ret_from_sys_call |
893 | CFI_ENDPROC | 893 | CFI_ENDPROC |
894 | END(stub_execve) | 894 | END(stub_execve) |
895 | 895 | ||
896 | /* | 896 | /* |
897 | * sigreturn is special because it needs to restore all registers on return. | 897 | * sigreturn is special because it needs to restore all registers on return. |
898 | * This cannot be done with SYSRET, so use the IRET return path instead. | 898 | * This cannot be done with SYSRET, so use the IRET return path instead. |
899 | */ | 899 | */ |
900 | ENTRY(stub_rt_sigreturn) | 900 | ENTRY(stub_rt_sigreturn) |
901 | CFI_STARTPROC | 901 | CFI_STARTPROC |
902 | addq $8, %rsp | 902 | addq $8, %rsp |
903 | PARTIAL_FRAME 0 | 903 | PARTIAL_FRAME 0 |
904 | SAVE_REST | 904 | SAVE_REST |
905 | movq %rsp,%rdi | 905 | movq %rsp,%rdi |
906 | FIXUP_TOP_OF_STACK %r11 | 906 | FIXUP_TOP_OF_STACK %r11 |
907 | call sys_rt_sigreturn | 907 | call sys_rt_sigreturn |
908 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | 908 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer |
909 | RESTORE_REST | 909 | RESTORE_REST |
910 | jmp int_ret_from_sys_call | 910 | jmp int_ret_from_sys_call |
911 | CFI_ENDPROC | 911 | CFI_ENDPROC |
912 | END(stub_rt_sigreturn) | 912 | END(stub_rt_sigreturn) |
913 | 913 | ||
914 | #ifdef CONFIG_X86_X32_ABI | 914 | #ifdef CONFIG_X86_X32_ABI |
915 | ENTRY(stub_x32_rt_sigreturn) | 915 | ENTRY(stub_x32_rt_sigreturn) |
916 | CFI_STARTPROC | 916 | CFI_STARTPROC |
917 | addq $8, %rsp | 917 | addq $8, %rsp |
918 | PARTIAL_FRAME 0 | 918 | PARTIAL_FRAME 0 |
919 | SAVE_REST | 919 | SAVE_REST |
920 | movq %rsp,%rdi | 920 | movq %rsp,%rdi |
921 | FIXUP_TOP_OF_STACK %r11 | 921 | FIXUP_TOP_OF_STACK %r11 |
922 | call sys32_x32_rt_sigreturn | 922 | call sys32_x32_rt_sigreturn |
923 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | 923 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer |
924 | RESTORE_REST | 924 | RESTORE_REST |
925 | jmp int_ret_from_sys_call | 925 | jmp int_ret_from_sys_call |
926 | CFI_ENDPROC | 926 | CFI_ENDPROC |
927 | END(stub_x32_rt_sigreturn) | 927 | END(stub_x32_rt_sigreturn) |
928 | 928 | ||
929 | ENTRY(stub_x32_execve) | 929 | ENTRY(stub_x32_execve) |
930 | CFI_STARTPROC | 930 | CFI_STARTPROC |
931 | addq $8, %rsp | 931 | addq $8, %rsp |
932 | PARTIAL_FRAME 0 | 932 | PARTIAL_FRAME 0 |
933 | SAVE_REST | 933 | SAVE_REST |
934 | FIXUP_TOP_OF_STACK %r11 | 934 | FIXUP_TOP_OF_STACK %r11 |
935 | call compat_sys_execve | 935 | call compat_sys_execve |
936 | RESTORE_TOP_OF_STACK %r11 | 936 | RESTORE_TOP_OF_STACK %r11 |
937 | movq %rax,RAX(%rsp) | 937 | movq %rax,RAX(%rsp) |
938 | RESTORE_REST | 938 | RESTORE_REST |
939 | jmp int_ret_from_sys_call | 939 | jmp int_ret_from_sys_call |
940 | CFI_ENDPROC | 940 | CFI_ENDPROC |
941 | END(stub_x32_execve) | 941 | END(stub_x32_execve) |
942 | 942 | ||
943 | #endif | 943 | #endif |
944 | 944 | ||
945 | /* | 945 | /* |
946 | * Build the entry stubs and pointer table with some assembler magic. | 946 | * Build the entry stubs and pointer table with some assembler magic. |
947 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | 947 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a |
948 | * single cache line on all modern x86 implementations. | 948 | * single cache line on all modern x86 implementations. |
949 | */ | 949 | */ |
950 | .section .init.rodata,"a" | 950 | .section .init.rodata,"a" |
951 | ENTRY(interrupt) | 951 | ENTRY(interrupt) |
952 | .section .entry.text | 952 | .section .entry.text |
953 | .p2align 5 | 953 | .p2align 5 |
954 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 954 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
955 | ENTRY(irq_entries_start) | 955 | ENTRY(irq_entries_start) |
956 | INTR_FRAME | 956 | INTR_FRAME |
957 | vector=FIRST_EXTERNAL_VECTOR | 957 | vector=FIRST_EXTERNAL_VECTOR |
958 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 | 958 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
959 | .balign 32 | 959 | .balign 32 |
960 | .rept 7 | 960 | .rept 7 |
961 | .if vector < NR_VECTORS | 961 | .if vector < NR_VECTORS |
962 | .if vector <> FIRST_EXTERNAL_VECTOR | 962 | .if vector <> FIRST_EXTERNAL_VECTOR |
963 | CFI_ADJUST_CFA_OFFSET -8 | 963 | CFI_ADJUST_CFA_OFFSET -8 |
964 | .endif | 964 | .endif |
965 | 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ | 965 | 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ |
966 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | 966 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
967 | jmp 2f | 967 | jmp 2f |
968 | .endif | 968 | .endif |
969 | .previous | 969 | .previous |
970 | .quad 1b | 970 | .quad 1b |
971 | .section .entry.text | 971 | .section .entry.text |
972 | vector=vector+1 | 972 | vector=vector+1 |
973 | .endif | 973 | .endif |
974 | .endr | 974 | .endr |
975 | 2: jmp common_interrupt | 975 | 2: jmp common_interrupt |
976 | .endr | 976 | .endr |
977 | CFI_ENDPROC | 977 | CFI_ENDPROC |
978 | END(irq_entries_start) | 978 | END(irq_entries_start) |
979 | 979 | ||
980 | .previous | 980 | .previous |
981 | END(interrupt) | 981 | END(interrupt) |
982 | .previous | 982 | .previous |
983 | 983 | ||
984 | /* | 984 | /* |
985 | * Interrupt entry/exit. | 985 | * Interrupt entry/exit. |
986 | * | 986 | * |
987 | * Interrupt entry points save only callee clobbered registers in fast path. | 987 | * Interrupt entry points save only callee clobbered registers in fast path. |
988 | * | 988 | * |
989 | * Entry runs with interrupts off. | 989 | * Entry runs with interrupts off. |
990 | */ | 990 | */ |
991 | 991 | ||
992 | /* 0(%rsp): ~(interrupt number) */ | 992 | /* 0(%rsp): ~(interrupt number) */ |
993 | .macro interrupt func | 993 | .macro interrupt func |
994 | /* reserve pt_regs for scratch regs and rbp */ | 994 | /* reserve pt_regs for scratch regs and rbp */ |
995 | subq $ORIG_RAX-RBP, %rsp | 995 | subq $ORIG_RAX-RBP, %rsp |
996 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP | 996 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP |
997 | SAVE_ARGS_IRQ | 997 | SAVE_ARGS_IRQ |
998 | call \func | 998 | call \func |
999 | .endm | 999 | .endm |
1000 | 1000 | ||
1001 | /* | 1001 | /* |
1002 | * Interrupt entry/exit should be protected against kprobes | 1002 | * Interrupt entry/exit should be protected against kprobes |
1003 | */ | 1003 | */ |
1004 | .pushsection .kprobes.text, "ax" | 1004 | .pushsection .kprobes.text, "ax" |
1005 | /* | 1005 | /* |
1006 | * The interrupt stubs push (~vector+0x80) onto the stack and | 1006 | * The interrupt stubs push (~vector+0x80) onto the stack and |
1007 | * then jump to common_interrupt. | 1007 | * then jump to common_interrupt. |
1008 | */ | 1008 | */ |
1009 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 1009 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
1010 | common_interrupt: | 1010 | common_interrupt: |
1011 | XCPT_FRAME | 1011 | XCPT_FRAME |
1012 | ASM_CLAC | 1012 | ASM_CLAC |
1013 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | 1013 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ |
1014 | interrupt do_IRQ | 1014 | interrupt do_IRQ |
1015 | /* 0(%rsp): old_rsp-ARGOFFSET */ | 1015 | /* 0(%rsp): old_rsp-ARGOFFSET */ |
1016 | ret_from_intr: | 1016 | ret_from_intr: |
1017 | DISABLE_INTERRUPTS(CLBR_NONE) | 1017 | DISABLE_INTERRUPTS(CLBR_NONE) |
1018 | TRACE_IRQS_OFF | 1018 | TRACE_IRQS_OFF |
1019 | decl PER_CPU_VAR(irq_count) | 1019 | decl PER_CPU_VAR(irq_count) |
1020 | 1020 | ||
1021 | /* Restore saved previous stack */ | 1021 | /* Restore saved previous stack */ |
1022 | popq %rsi | 1022 | popq %rsi |
1023 | CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ | 1023 | CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ |
1024 | leaq ARGOFFSET-RBP(%rsi), %rsp | 1024 | leaq ARGOFFSET-RBP(%rsi), %rsp |
1025 | CFI_DEF_CFA_REGISTER rsp | 1025 | CFI_DEF_CFA_REGISTER rsp |
1026 | CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET | 1026 | CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET |
1027 | 1027 | ||
1028 | exit_intr: | 1028 | exit_intr: |
1029 | GET_THREAD_INFO(%rcx) | 1029 | GET_THREAD_INFO(%rcx) |
1030 | testl $3,CS-ARGOFFSET(%rsp) | 1030 | testl $3,CS-ARGOFFSET(%rsp) |
1031 | je retint_kernel | 1031 | je retint_kernel |
1032 | 1032 | ||
1033 | /* Interrupt came from user space */ | 1033 | /* Interrupt came from user space */ |
1034 | /* | 1034 | /* |
1035 | * Has a correct top of stack, but a partial stack frame | 1035 | * Has a correct top of stack, but a partial stack frame |
1036 | * %rcx: thread info. Interrupts off. | 1036 | * %rcx: thread info. Interrupts off. |
1037 | */ | 1037 | */ |
1038 | retint_with_reschedule: | 1038 | retint_with_reschedule: |
1039 | movl $_TIF_WORK_MASK,%edi | 1039 | movl $_TIF_WORK_MASK,%edi |
1040 | retint_check: | 1040 | retint_check: |
1041 | LOCKDEP_SYS_EXIT_IRQ | 1041 | LOCKDEP_SYS_EXIT_IRQ |
1042 | movl TI_flags(%rcx),%edx | 1042 | movl TI_flags(%rcx),%edx |
1043 | andl %edi,%edx | 1043 | andl %edi,%edx |
1044 | CFI_REMEMBER_STATE | 1044 | CFI_REMEMBER_STATE |
1045 | jnz retint_careful | 1045 | jnz retint_careful |
1046 | 1046 | ||
1047 | retint_swapgs: /* return to user-space */ | 1047 | retint_swapgs: /* return to user-space */ |
1048 | /* | 1048 | /* |
1049 | * The iretq could re-enable interrupts: | 1049 | * The iretq could re-enable interrupts: |
1050 | */ | 1050 | */ |
1051 | DISABLE_INTERRUPTS(CLBR_ANY) | 1051 | DISABLE_INTERRUPTS(CLBR_ANY) |
1052 | TRACE_IRQS_IRETQ | 1052 | TRACE_IRQS_IRETQ |
1053 | SWAPGS | 1053 | SWAPGS |
1054 | jmp restore_args | 1054 | jmp restore_args |
1055 | 1055 | ||
1056 | retint_restore_args: /* return to kernel space */ | 1056 | retint_restore_args: /* return to kernel space */ |
1057 | DISABLE_INTERRUPTS(CLBR_ANY) | 1057 | DISABLE_INTERRUPTS(CLBR_ANY) |
1058 | /* | 1058 | /* |
1059 | * The iretq could re-enable interrupts: | 1059 | * The iretq could re-enable interrupts: |
1060 | */ | 1060 | */ |
1061 | TRACE_IRQS_IRETQ | 1061 | TRACE_IRQS_IRETQ |
1062 | restore_args: | 1062 | restore_args: |
1063 | RESTORE_ARGS 1,8,1 | 1063 | RESTORE_ARGS 1,8,1 |
1064 | 1064 | ||
1065 | irq_return: | 1065 | irq_return: |
1066 | INTERRUPT_RETURN | 1066 | INTERRUPT_RETURN |
1067 | _ASM_EXTABLE(irq_return, bad_iret) | 1067 | _ASM_EXTABLE(irq_return, bad_iret) |
1068 | 1068 | ||
1069 | #ifdef CONFIG_PARAVIRT | 1069 | #ifdef CONFIG_PARAVIRT |
1070 | ENTRY(native_iret) | 1070 | ENTRY(native_iret) |
1071 | iretq | 1071 | iretq |
1072 | _ASM_EXTABLE(native_iret, bad_iret) | 1072 | _ASM_EXTABLE(native_iret, bad_iret) |
1073 | #endif | 1073 | #endif |
1074 | 1074 | ||
1075 | .section .fixup,"ax" | 1075 | .section .fixup,"ax" |
1076 | bad_iret: | 1076 | bad_iret: |
1077 | /* | 1077 | /* |
1078 | * The iret traps when the %cs or %ss being restored is bogus. | 1078 | * The iret traps when the %cs or %ss being restored is bogus. |
1079 | * We've lost the original trap vector and error code. | 1079 | * We've lost the original trap vector and error code. |
1080 | * #GPF is the most likely one to get for an invalid selector. | 1080 | * #GPF is the most likely one to get for an invalid selector. |
1081 | * So pretend we completed the iret and took the #GPF in user mode. | 1081 | * So pretend we completed the iret and took the #GPF in user mode. |
1082 | * | 1082 | * |
1083 | * We are now running with the kernel GS after exception recovery. | 1083 | * We are now running with the kernel GS after exception recovery. |
1084 | * But error_entry expects us to have user GS to match the user %cs, | 1084 | * But error_entry expects us to have user GS to match the user %cs, |
1085 | * so swap back. | 1085 | * so swap back. |
1086 | */ | 1086 | */ |
1087 | pushq $0 | 1087 | pushq $0 |
1088 | 1088 | ||
1089 | SWAPGS | 1089 | SWAPGS |
1090 | jmp general_protection | 1090 | jmp general_protection |
1091 | 1091 | ||
1092 | .previous | 1092 | .previous |
1093 | 1093 | ||
1094 | /* edi: workmask, edx: work */ | 1094 | /* edi: workmask, edx: work */ |
1095 | retint_careful: | 1095 | retint_careful: |
1096 | CFI_RESTORE_STATE | 1096 | CFI_RESTORE_STATE |
1097 | bt $TIF_NEED_RESCHED,%edx | 1097 | bt $TIF_NEED_RESCHED,%edx |
1098 | jnc retint_signal | 1098 | jnc retint_signal |
1099 | TRACE_IRQS_ON | 1099 | TRACE_IRQS_ON |
1100 | ENABLE_INTERRUPTS(CLBR_NONE) | 1100 | ENABLE_INTERRUPTS(CLBR_NONE) |
1101 | pushq_cfi %rdi | 1101 | pushq_cfi %rdi |
1102 | SCHEDULE_USER | 1102 | SCHEDULE_USER |
1103 | popq_cfi %rdi | 1103 | popq_cfi %rdi |
1104 | GET_THREAD_INFO(%rcx) | 1104 | GET_THREAD_INFO(%rcx) |
1105 | DISABLE_INTERRUPTS(CLBR_NONE) | 1105 | DISABLE_INTERRUPTS(CLBR_NONE) |
1106 | TRACE_IRQS_OFF | 1106 | TRACE_IRQS_OFF |
1107 | jmp retint_check | 1107 | jmp retint_check |
1108 | 1108 | ||
1109 | retint_signal: | 1109 | retint_signal: |
1110 | testl $_TIF_DO_NOTIFY_MASK,%edx | 1110 | testl $_TIF_DO_NOTIFY_MASK,%edx |
1111 | jz retint_swapgs | 1111 | jz retint_swapgs |
1112 | TRACE_IRQS_ON | 1112 | TRACE_IRQS_ON |
1113 | ENABLE_INTERRUPTS(CLBR_NONE) | 1113 | ENABLE_INTERRUPTS(CLBR_NONE) |
1114 | SAVE_REST | 1114 | SAVE_REST |
1115 | movq $-1,ORIG_RAX(%rsp) | 1115 | movq $-1,ORIG_RAX(%rsp) |
1116 | xorl %esi,%esi # oldset | 1116 | xorl %esi,%esi # oldset |
1117 | movq %rsp,%rdi # &pt_regs | 1117 | movq %rsp,%rdi # &pt_regs |
1118 | call do_notify_resume | 1118 | call do_notify_resume |
1119 | RESTORE_REST | 1119 | RESTORE_REST |
1120 | DISABLE_INTERRUPTS(CLBR_NONE) | 1120 | DISABLE_INTERRUPTS(CLBR_NONE) |
1121 | TRACE_IRQS_OFF | 1121 | TRACE_IRQS_OFF |
1122 | GET_THREAD_INFO(%rcx) | 1122 | GET_THREAD_INFO(%rcx) |
1123 | jmp retint_with_reschedule | 1123 | jmp retint_with_reschedule |
1124 | 1124 | ||
1125 | #ifdef CONFIG_PREEMPT | 1125 | #ifdef CONFIG_PREEMPT |
1126 | /* Returning to kernel space. Check if we need preemption */ | 1126 | /* Returning to kernel space. Check if we need preemption */ |
1127 | /* rcx: threadinfo. interrupts off. */ | 1127 | /* rcx: threadinfo. interrupts off. */ |
1128 | ENTRY(retint_kernel) | 1128 | ENTRY(retint_kernel) |
1129 | cmpl $0,TI_preempt_count(%rcx) | 1129 | cmpl $0,TI_preempt_count(%rcx) |
1130 | jnz retint_restore_args | 1130 | jnz retint_restore_args |
1131 | bt $TIF_NEED_RESCHED,TI_flags(%rcx) | 1131 | bt $TIF_NEED_RESCHED,TI_flags(%rcx) |
1132 | jnc retint_restore_args | 1132 | jnc retint_restore_args |
1133 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ | 1133 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ |
1134 | jnc retint_restore_args | 1134 | jnc retint_restore_args |
1135 | call preempt_schedule_irq | 1135 | call preempt_schedule_irq |
1136 | jmp exit_intr | 1136 | jmp exit_intr |
1137 | #endif | 1137 | #endif |
1138 | 1138 | ||
1139 | CFI_ENDPROC | 1139 | CFI_ENDPROC |
1140 | END(common_interrupt) | 1140 | END(common_interrupt) |
1141 | /* | 1141 | /* |
1142 | * End of kprobes section | 1142 | * End of kprobes section |
1143 | */ | 1143 | */ |
1144 | .popsection | 1144 | .popsection |
1145 | 1145 | ||
1146 | /* | 1146 | /* |
1147 | * APIC interrupts. | 1147 | * APIC interrupts. |
1148 | */ | 1148 | */ |
1149 | .macro apicinterrupt num sym do_sym | 1149 | .macro apicinterrupt num sym do_sym |
1150 | ENTRY(\sym) | 1150 | ENTRY(\sym) |
1151 | INTR_FRAME | 1151 | INTR_FRAME |
1152 | ASM_CLAC | 1152 | ASM_CLAC |
1153 | pushq_cfi $~(\num) | 1153 | pushq_cfi $~(\num) |
1154 | .Lcommon_\sym: | 1154 | .Lcommon_\sym: |
1155 | interrupt \do_sym | 1155 | interrupt \do_sym |
1156 | jmp ret_from_intr | 1156 | jmp ret_from_intr |
1157 | CFI_ENDPROC | 1157 | CFI_ENDPROC |
1158 | END(\sym) | 1158 | END(\sym) |
1159 | .endm | 1159 | .endm |
1160 | 1160 | ||
1161 | #ifdef CONFIG_SMP | 1161 | #ifdef CONFIG_SMP |
1162 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ | 1162 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ |
1163 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt | 1163 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
1164 | apicinterrupt REBOOT_VECTOR \ | 1164 | apicinterrupt REBOOT_VECTOR \ |
1165 | reboot_interrupt smp_reboot_interrupt | 1165 | reboot_interrupt smp_reboot_interrupt |
1166 | #endif | 1166 | #endif |
1167 | 1167 | ||
1168 | #ifdef CONFIG_X86_UV | 1168 | #ifdef CONFIG_X86_UV |
1169 | apicinterrupt UV_BAU_MESSAGE \ | 1169 | apicinterrupt UV_BAU_MESSAGE \ |
1170 | uv_bau_message_intr1 uv_bau_message_interrupt | 1170 | uv_bau_message_intr1 uv_bau_message_interrupt |
1171 | #endif | 1171 | #endif |
1172 | apicinterrupt LOCAL_TIMER_VECTOR \ | 1172 | apicinterrupt LOCAL_TIMER_VECTOR \ |
1173 | apic_timer_interrupt smp_apic_timer_interrupt | 1173 | apic_timer_interrupt smp_apic_timer_interrupt |
1174 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1174 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
1175 | x86_platform_ipi smp_x86_platform_ipi | 1175 | x86_platform_ipi smp_x86_platform_ipi |
1176 | 1176 | ||
1177 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1177 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
1178 | threshold_interrupt smp_threshold_interrupt | 1178 | threshold_interrupt smp_threshold_interrupt |
1179 | apicinterrupt THERMAL_APIC_VECTOR \ | 1179 | apicinterrupt THERMAL_APIC_VECTOR \ |
1180 | thermal_interrupt smp_thermal_interrupt | 1180 | thermal_interrupt smp_thermal_interrupt |
1181 | 1181 | ||
1182 | #ifdef CONFIG_SMP | 1182 | #ifdef CONFIG_SMP |
1183 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ | 1183 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ |
1184 | call_function_single_interrupt smp_call_function_single_interrupt | 1184 | call_function_single_interrupt smp_call_function_single_interrupt |
1185 | apicinterrupt CALL_FUNCTION_VECTOR \ | 1185 | apicinterrupt CALL_FUNCTION_VECTOR \ |
1186 | call_function_interrupt smp_call_function_interrupt | 1186 | call_function_interrupt smp_call_function_interrupt |
1187 | apicinterrupt RESCHEDULE_VECTOR \ | 1187 | apicinterrupt RESCHEDULE_VECTOR \ |
1188 | reschedule_interrupt smp_reschedule_interrupt | 1188 | reschedule_interrupt smp_reschedule_interrupt |
1189 | #endif | 1189 | #endif |
1190 | 1190 | ||
1191 | apicinterrupt ERROR_APIC_VECTOR \ | 1191 | apicinterrupt ERROR_APIC_VECTOR \ |
1192 | error_interrupt smp_error_interrupt | 1192 | error_interrupt smp_error_interrupt |
1193 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 1193 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1194 | spurious_interrupt smp_spurious_interrupt | 1194 | spurious_interrupt smp_spurious_interrupt |
1195 | 1195 | ||
1196 | #ifdef CONFIG_IRQ_WORK | 1196 | #ifdef CONFIG_IRQ_WORK |
1197 | apicinterrupt IRQ_WORK_VECTOR \ | 1197 | apicinterrupt IRQ_WORK_VECTOR \ |
1198 | irq_work_interrupt smp_irq_work_interrupt | 1198 | irq_work_interrupt smp_irq_work_interrupt |
1199 | #endif | 1199 | #endif |
1200 | 1200 | ||
1201 | /* | 1201 | /* |
1202 | * Exception entry points. | 1202 | * Exception entry points. |
1203 | */ | 1203 | */ |
1204 | .macro zeroentry sym do_sym | 1204 | .macro zeroentry sym do_sym |
1205 | ENTRY(\sym) | 1205 | ENTRY(\sym) |
1206 | INTR_FRAME | 1206 | INTR_FRAME |
1207 | ASM_CLAC | 1207 | ASM_CLAC |
1208 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1208 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1209 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1209 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1210 | subq $ORIG_RAX-R15, %rsp | 1210 | subq $ORIG_RAX-R15, %rsp |
1211 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1211 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1212 | call error_entry | 1212 | call error_entry |
1213 | DEFAULT_FRAME 0 | 1213 | DEFAULT_FRAME 0 |
1214 | movq %rsp,%rdi /* pt_regs pointer */ | 1214 | movq %rsp,%rdi /* pt_regs pointer */ |
1215 | xorl %esi,%esi /* no error code */ | 1215 | xorl %esi,%esi /* no error code */ |
1216 | call \do_sym | 1216 | call \do_sym |
1217 | jmp error_exit /* %ebx: no swapgs flag */ | 1217 | jmp error_exit /* %ebx: no swapgs flag */ |
1218 | CFI_ENDPROC | 1218 | CFI_ENDPROC |
1219 | END(\sym) | 1219 | END(\sym) |
1220 | .endm | 1220 | .endm |
1221 | 1221 | ||
1222 | .macro paranoidzeroentry sym do_sym | 1222 | .macro paranoidzeroentry sym do_sym |
1223 | ENTRY(\sym) | 1223 | ENTRY(\sym) |
1224 | INTR_FRAME | 1224 | INTR_FRAME |
1225 | ASM_CLAC | 1225 | ASM_CLAC |
1226 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1226 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1227 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1227 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1228 | subq $ORIG_RAX-R15, %rsp | 1228 | subq $ORIG_RAX-R15, %rsp |
1229 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1229 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1230 | call save_paranoid | 1230 | call save_paranoid |
1231 | TRACE_IRQS_OFF | 1231 | TRACE_IRQS_OFF |
1232 | movq %rsp,%rdi /* pt_regs pointer */ | 1232 | movq %rsp,%rdi /* pt_regs pointer */ |
1233 | xorl %esi,%esi /* no error code */ | 1233 | xorl %esi,%esi /* no error code */ |
1234 | call \do_sym | 1234 | call \do_sym |
1235 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1235 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1236 | CFI_ENDPROC | 1236 | CFI_ENDPROC |
1237 | END(\sym) | 1237 | END(\sym) |
1238 | .endm | 1238 | .endm |
1239 | 1239 | ||
1240 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) | 1240 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) |
1241 | .macro paranoidzeroentry_ist sym do_sym ist | 1241 | .macro paranoidzeroentry_ist sym do_sym ist |
1242 | ENTRY(\sym) | 1242 | ENTRY(\sym) |
1243 | INTR_FRAME | 1243 | INTR_FRAME |
1244 | ASM_CLAC | 1244 | ASM_CLAC |
1245 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1245 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1246 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1246 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1247 | subq $ORIG_RAX-R15, %rsp | 1247 | subq $ORIG_RAX-R15, %rsp |
1248 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1248 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1249 | call save_paranoid | 1249 | call save_paranoid |
1250 | TRACE_IRQS_OFF_DEBUG | 1250 | TRACE_IRQS_OFF_DEBUG |
1251 | movq %rsp,%rdi /* pt_regs pointer */ | 1251 | movq %rsp,%rdi /* pt_regs pointer */ |
1252 | xorl %esi,%esi /* no error code */ | 1252 | xorl %esi,%esi /* no error code */ |
1253 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1253 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
1254 | call \do_sym | 1254 | call \do_sym |
1255 | addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1255 | addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
1256 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1256 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1257 | CFI_ENDPROC | 1257 | CFI_ENDPROC |
1258 | END(\sym) | 1258 | END(\sym) |
1259 | .endm | 1259 | .endm |
1260 | 1260 | ||
1261 | .macro errorentry sym do_sym | 1261 | .macro errorentry sym do_sym |
1262 | ENTRY(\sym) | 1262 | ENTRY(\sym) |
1263 | XCPT_FRAME | 1263 | XCPT_FRAME |
1264 | ASM_CLAC | 1264 | ASM_CLAC |
1265 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1265 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1266 | subq $ORIG_RAX-R15, %rsp | 1266 | subq $ORIG_RAX-R15, %rsp |
1267 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1267 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1268 | call error_entry | 1268 | call error_entry |
1269 | DEFAULT_FRAME 0 | 1269 | DEFAULT_FRAME 0 |
1270 | movq %rsp,%rdi /* pt_regs pointer */ | 1270 | movq %rsp,%rdi /* pt_regs pointer */ |
1271 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | 1271 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
1272 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ | 1272 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
1273 | call \do_sym | 1273 | call \do_sym |
1274 | jmp error_exit /* %ebx: no swapgs flag */ | 1274 | jmp error_exit /* %ebx: no swapgs flag */ |
1275 | CFI_ENDPROC | 1275 | CFI_ENDPROC |
1276 | END(\sym) | 1276 | END(\sym) |
1277 | .endm | 1277 | .endm |
1278 | 1278 | ||
1279 | /* error code is on the stack already */ | 1279 | /* error code is on the stack already */ |
1280 | .macro paranoiderrorentry sym do_sym | 1280 | .macro paranoiderrorentry sym do_sym |
1281 | ENTRY(\sym) | 1281 | ENTRY(\sym) |
1282 | XCPT_FRAME | 1282 | XCPT_FRAME |
1283 | ASM_CLAC | 1283 | ASM_CLAC |
1284 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1284 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1285 | subq $ORIG_RAX-R15, %rsp | 1285 | subq $ORIG_RAX-R15, %rsp |
1286 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1286 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1287 | call save_paranoid | 1287 | call save_paranoid |
1288 | DEFAULT_FRAME 0 | 1288 | DEFAULT_FRAME 0 |
1289 | TRACE_IRQS_OFF | 1289 | TRACE_IRQS_OFF |
1290 | movq %rsp,%rdi /* pt_regs pointer */ | 1290 | movq %rsp,%rdi /* pt_regs pointer */ |
1291 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | 1291 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
1292 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ | 1292 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
1293 | call \do_sym | 1293 | call \do_sym |
1294 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1294 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1295 | CFI_ENDPROC | 1295 | CFI_ENDPROC |
1296 | END(\sym) | 1296 | END(\sym) |
1297 | .endm | 1297 | .endm |
1298 | 1298 | ||
1299 | zeroentry divide_error do_divide_error | 1299 | zeroentry divide_error do_divide_error |
1300 | zeroentry overflow do_overflow | 1300 | zeroentry overflow do_overflow |
1301 | zeroentry bounds do_bounds | 1301 | zeroentry bounds do_bounds |
1302 | zeroentry invalid_op do_invalid_op | 1302 | zeroentry invalid_op do_invalid_op |
1303 | zeroentry device_not_available do_device_not_available | 1303 | zeroentry device_not_available do_device_not_available |
1304 | paranoiderrorentry double_fault do_double_fault | 1304 | paranoiderrorentry double_fault do_double_fault |
1305 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun | 1305 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun |
1306 | errorentry invalid_TSS do_invalid_TSS | 1306 | errorentry invalid_TSS do_invalid_TSS |
1307 | errorentry segment_not_present do_segment_not_present | 1307 | errorentry segment_not_present do_segment_not_present |
1308 | zeroentry spurious_interrupt_bug do_spurious_interrupt_bug | 1308 | zeroentry spurious_interrupt_bug do_spurious_interrupt_bug |
1309 | zeroentry coprocessor_error do_coprocessor_error | 1309 | zeroentry coprocessor_error do_coprocessor_error |
1310 | errorentry alignment_check do_alignment_check | 1310 | errorentry alignment_check do_alignment_check |
1311 | zeroentry simd_coprocessor_error do_simd_coprocessor_error | 1311 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1312 | 1312 | ||
1313 | 1313 | ||
1314 | /* Reload gs selector with exception handling */ | 1314 | /* Reload gs selector with exception handling */ |
1315 | /* edi: new selector */ | 1315 | /* edi: new selector */ |
1316 | ENTRY(native_load_gs_index) | 1316 | ENTRY(native_load_gs_index) |
1317 | CFI_STARTPROC | 1317 | CFI_STARTPROC |
1318 | pushfq_cfi | 1318 | pushfq_cfi |
1319 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) | 1319 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) |
1320 | SWAPGS | 1320 | SWAPGS |
1321 | gs_change: | 1321 | gs_change: |
1322 | movl %edi,%gs | 1322 | movl %edi,%gs |
1323 | 2: mfence /* workaround */ | 1323 | 2: mfence /* workaround */ |
1324 | SWAPGS | 1324 | SWAPGS |
1325 | popfq_cfi | 1325 | popfq_cfi |
1326 | ret | 1326 | ret |
1327 | CFI_ENDPROC | 1327 | CFI_ENDPROC |
1328 | END(native_load_gs_index) | 1328 | END(native_load_gs_index) |
1329 | 1329 | ||
1330 | _ASM_EXTABLE(gs_change,bad_gs) | 1330 | _ASM_EXTABLE(gs_change,bad_gs) |
1331 | .section .fixup,"ax" | 1331 | .section .fixup,"ax" |
1332 | /* running with kernelgs */ | 1332 | /* running with kernelgs */ |
1333 | bad_gs: | 1333 | bad_gs: |
1334 | SWAPGS /* switch back to user gs */ | 1334 | SWAPGS /* switch back to user gs */ |
1335 | xorl %eax,%eax | 1335 | xorl %eax,%eax |
1336 | movl %eax,%gs | 1336 | movl %eax,%gs |
1337 | jmp 2b | 1337 | jmp 2b |
1338 | .previous | 1338 | .previous |
1339 | 1339 | ||
1340 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1340 | /* Call softirq on interrupt stack. Interrupts are off. */ |
1341 | ENTRY(call_softirq) | 1341 | ENTRY(call_softirq) |
1342 | CFI_STARTPROC | 1342 | CFI_STARTPROC |
1343 | pushq_cfi %rbp | 1343 | pushq_cfi %rbp |
1344 | CFI_REL_OFFSET rbp,0 | 1344 | CFI_REL_OFFSET rbp,0 |
1345 | mov %rsp,%rbp | 1345 | mov %rsp,%rbp |
1346 | CFI_DEF_CFA_REGISTER rbp | 1346 | CFI_DEF_CFA_REGISTER rbp |
1347 | incl PER_CPU_VAR(irq_count) | 1347 | incl PER_CPU_VAR(irq_count) |
1348 | cmove PER_CPU_VAR(irq_stack_ptr),%rsp | 1348 | cmove PER_CPU_VAR(irq_stack_ptr),%rsp |
1349 | push %rbp # backlink for old unwinder | 1349 | push %rbp # backlink for old unwinder |
1350 | call __do_softirq | 1350 | call __do_softirq |
1351 | leaveq | 1351 | leaveq |
1352 | CFI_RESTORE rbp | 1352 | CFI_RESTORE rbp |
1353 | CFI_DEF_CFA_REGISTER rsp | 1353 | CFI_DEF_CFA_REGISTER rsp |
1354 | CFI_ADJUST_CFA_OFFSET -8 | 1354 | CFI_ADJUST_CFA_OFFSET -8 |
1355 | decl PER_CPU_VAR(irq_count) | 1355 | decl PER_CPU_VAR(irq_count) |
1356 | ret | 1356 | ret |
1357 | CFI_ENDPROC | 1357 | CFI_ENDPROC |
1358 | END(call_softirq) | 1358 | END(call_softirq) |
1359 | 1359 | ||
1360 | #ifdef CONFIG_XEN | 1360 | #ifdef CONFIG_XEN |
1361 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback | 1361 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback |
1362 | 1362 | ||
1363 | /* | 1363 | /* |
1364 | * A note on the "critical region" in our callback handler. | 1364 | * A note on the "critical region" in our callback handler. |
1365 | * We want to avoid stacking callback handlers due to events occurring | 1365 | * We want to avoid stacking callback handlers due to events occurring |
1366 | * during handling of the last event. To do this, we keep events disabled | 1366 | * during handling of the last event. To do this, we keep events disabled |
1367 | * until we've done all processing. HOWEVER, we must enable events before | 1367 | * until we've done all processing. HOWEVER, we must enable events before |
1368 | * popping the stack frame (can't be done atomically) and so it would still | 1368 | * popping the stack frame (can't be done atomically) and so it would still |
1369 | * be possible to get enough handler activations to overflow the stack. | 1369 | * be possible to get enough handler activations to overflow the stack. |
1370 | * Although unlikely, bugs of that kind are hard to track down, so we'd | 1370 | * Although unlikely, bugs of that kind are hard to track down, so we'd |
1371 | * like to avoid the possibility. | 1371 | * like to avoid the possibility. |
1372 | * So, on entry to the handler we detect whether we interrupted an | 1372 | * So, on entry to the handler we detect whether we interrupted an |
1373 | * existing activation in its critical region -- if so, we pop the current | 1373 | * existing activation in its critical region -- if so, we pop the current |
1374 | * activation and restart the handler using the previous one. | 1374 | * activation and restart the handler using the previous one. |
1375 | */ | 1375 | */ |
1376 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | 1376 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) |
1377 | CFI_STARTPROC | 1377 | CFI_STARTPROC |
1378 | /* | 1378 | /* |
1379 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | 1379 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will |
1380 | * see the correct pointer to the pt_regs | 1380 | * see the correct pointer to the pt_regs |
1381 | */ | 1381 | */ |
1382 | movq %rdi, %rsp # we don't return, adjust the stack frame | 1382 | movq %rdi, %rsp # we don't return, adjust the stack frame |
1383 | CFI_ENDPROC | 1383 | CFI_ENDPROC |
1384 | DEFAULT_FRAME | 1384 | DEFAULT_FRAME |
1385 | 11: incl PER_CPU_VAR(irq_count) | 1385 | 11: incl PER_CPU_VAR(irq_count) |
1386 | movq %rsp,%rbp | 1386 | movq %rsp,%rbp |
1387 | CFI_DEF_CFA_REGISTER rbp | 1387 | CFI_DEF_CFA_REGISTER rbp |
1388 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp | 1388 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp |
1389 | pushq %rbp # backlink for old unwinder | 1389 | pushq %rbp # backlink for old unwinder |
1390 | call xen_evtchn_do_upcall | 1390 | call xen_evtchn_do_upcall |
1391 | popq %rsp | 1391 | popq %rsp |
1392 | CFI_DEF_CFA_REGISTER rsp | 1392 | CFI_DEF_CFA_REGISTER rsp |
1393 | decl PER_CPU_VAR(irq_count) | 1393 | decl PER_CPU_VAR(irq_count) |
1394 | jmp error_exit | 1394 | jmp error_exit |
1395 | CFI_ENDPROC | 1395 | CFI_ENDPROC |
1396 | END(xen_do_hypervisor_callback) | 1396 | END(xen_do_hypervisor_callback) |
1397 | 1397 | ||
1398 | /* | 1398 | /* |
1399 | * Hypervisor uses this for application faults while it executes. | 1399 | * Hypervisor uses this for application faults while it executes. |
1400 | * We get here for two reasons: | 1400 | * We get here for two reasons: |
1401 | * 1. Fault while reloading DS, ES, FS or GS | 1401 | * 1. Fault while reloading DS, ES, FS or GS |
1402 | * 2. Fault while executing IRET | 1402 | * 2. Fault while executing IRET |
1403 | * Category 1 we do not need to fix up as Xen has already reloaded all segment | 1403 | * Category 1 we do not need to fix up as Xen has already reloaded all segment |
1404 | * registers that could be reloaded and zeroed the others. | 1404 | * registers that could be reloaded and zeroed the others. |
1405 | * Category 2 we fix up by killing the current process. We cannot use the | 1405 | * Category 2 we fix up by killing the current process. We cannot use the |
1406 | * normal Linux return path in this case because if we use the IRET hypercall | 1406 | * normal Linux return path in this case because if we use the IRET hypercall |
1407 | * to pop the stack frame we end up in an infinite loop of failsafe callbacks. | 1407 | * to pop the stack frame we end up in an infinite loop of failsafe callbacks. |
1408 | * We distinguish between categories by comparing each saved segment register | 1408 | * We distinguish between categories by comparing each saved segment register |
1409 | * with its current contents: any discrepancy means we in category 1. | 1409 | * with its current contents: any discrepancy means we in category 1. |
1410 | */ | 1410 | */ |
1411 | ENTRY(xen_failsafe_callback) | 1411 | ENTRY(xen_failsafe_callback) |
1412 | INTR_FRAME 1 (6*8) | 1412 | INTR_FRAME 1 (6*8) |
1413 | /*CFI_REL_OFFSET gs,GS*/ | 1413 | /*CFI_REL_OFFSET gs,GS*/ |
1414 | /*CFI_REL_OFFSET fs,FS*/ | 1414 | /*CFI_REL_OFFSET fs,FS*/ |
1415 | /*CFI_REL_OFFSET es,ES*/ | 1415 | /*CFI_REL_OFFSET es,ES*/ |
1416 | /*CFI_REL_OFFSET ds,DS*/ | 1416 | /*CFI_REL_OFFSET ds,DS*/ |
1417 | CFI_REL_OFFSET r11,8 | 1417 | CFI_REL_OFFSET r11,8 |
1418 | CFI_REL_OFFSET rcx,0 | 1418 | CFI_REL_OFFSET rcx,0 |
1419 | movw %ds,%cx | 1419 | movw %ds,%cx |
1420 | cmpw %cx,0x10(%rsp) | 1420 | cmpw %cx,0x10(%rsp) |
1421 | CFI_REMEMBER_STATE | 1421 | CFI_REMEMBER_STATE |
1422 | jne 1f | 1422 | jne 1f |
1423 | movw %es,%cx | 1423 | movw %es,%cx |
1424 | cmpw %cx,0x18(%rsp) | 1424 | cmpw %cx,0x18(%rsp) |
1425 | jne 1f | 1425 | jne 1f |
1426 | movw %fs,%cx | 1426 | movw %fs,%cx |
1427 | cmpw %cx,0x20(%rsp) | 1427 | cmpw %cx,0x20(%rsp) |
1428 | jne 1f | 1428 | jne 1f |
1429 | movw %gs,%cx | 1429 | movw %gs,%cx |
1430 | cmpw %cx,0x28(%rsp) | 1430 | cmpw %cx,0x28(%rsp) |
1431 | jne 1f | 1431 | jne 1f |
1432 | /* All segments match their saved values => Category 2 (Bad IRET). */ | 1432 | /* All segments match their saved values => Category 2 (Bad IRET). */ |
1433 | movq (%rsp),%rcx | 1433 | movq (%rsp),%rcx |
1434 | CFI_RESTORE rcx | 1434 | CFI_RESTORE rcx |
1435 | movq 8(%rsp),%r11 | 1435 | movq 8(%rsp),%r11 |
1436 | CFI_RESTORE r11 | 1436 | CFI_RESTORE r11 |
1437 | addq $0x30,%rsp | 1437 | addq $0x30,%rsp |
1438 | CFI_ADJUST_CFA_OFFSET -0x30 | 1438 | CFI_ADJUST_CFA_OFFSET -0x30 |
1439 | pushq_cfi $0 /* RIP */ | 1439 | pushq_cfi $0 /* RIP */ |
1440 | pushq_cfi %r11 | 1440 | pushq_cfi %r11 |
1441 | pushq_cfi %rcx | 1441 | pushq_cfi %rcx |
1442 | jmp general_protection | 1442 | jmp general_protection |
1443 | CFI_RESTORE_STATE | 1443 | CFI_RESTORE_STATE |
1444 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | 1444 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ |
1445 | movq (%rsp),%rcx | 1445 | movq (%rsp),%rcx |
1446 | CFI_RESTORE rcx | 1446 | CFI_RESTORE rcx |
1447 | movq 8(%rsp),%r11 | 1447 | movq 8(%rsp),%r11 |
1448 | CFI_RESTORE r11 | 1448 | CFI_RESTORE r11 |
1449 | addq $0x30,%rsp | 1449 | addq $0x30,%rsp |
1450 | CFI_ADJUST_CFA_OFFSET -0x30 | 1450 | CFI_ADJUST_CFA_OFFSET -0x30 |
1451 | pushq_cfi $-1 /* orig_ax = -1 => not a system call */ | 1451 | pushq_cfi $-1 /* orig_ax = -1 => not a system call */ |
1452 | SAVE_ALL | 1452 | SAVE_ALL |
1453 | jmp error_exit | 1453 | jmp error_exit |
1454 | CFI_ENDPROC | 1454 | CFI_ENDPROC |
1455 | END(xen_failsafe_callback) | 1455 | END(xen_failsafe_callback) |
1456 | 1456 | ||
1457 | apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ | 1457 | apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ |
1458 | xen_hvm_callback_vector xen_evtchn_do_upcall | 1458 | xen_hvm_callback_vector xen_evtchn_do_upcall |
1459 | 1459 | ||
1460 | #endif /* CONFIG_XEN */ | 1460 | #endif /* CONFIG_XEN */ |
1461 | |||
1462 | #if IS_ENABLED(CONFIG_HYPERV) | ||
1463 | apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ | ||
1464 | hyperv_callback_vector hyperv_vector_handler | ||
1465 | #endif /* CONFIG_HYPERV */ | ||
1461 | 1466 | ||
1462 | /* | 1467 | /* |
1463 | * Some functions should be protected against kprobes | 1468 | * Some functions should be protected against kprobes |
1464 | */ | 1469 | */ |
1465 | .pushsection .kprobes.text, "ax" | 1470 | .pushsection .kprobes.text, "ax" |
1466 | 1471 | ||
1467 | paranoidzeroentry_ist debug do_debug DEBUG_STACK | 1472 | paranoidzeroentry_ist debug do_debug DEBUG_STACK |
1468 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK | 1473 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK |
1469 | paranoiderrorentry stack_segment do_stack_segment | 1474 | paranoiderrorentry stack_segment do_stack_segment |
1470 | #ifdef CONFIG_XEN | 1475 | #ifdef CONFIG_XEN |
1471 | zeroentry xen_debug do_debug | 1476 | zeroentry xen_debug do_debug |
1472 | zeroentry xen_int3 do_int3 | 1477 | zeroentry xen_int3 do_int3 |
1473 | errorentry xen_stack_segment do_stack_segment | 1478 | errorentry xen_stack_segment do_stack_segment |
1474 | #endif | 1479 | #endif |
1475 | errorentry general_protection do_general_protection | 1480 | errorentry general_protection do_general_protection |
1476 | errorentry page_fault do_page_fault | 1481 | errorentry page_fault do_page_fault |
1477 | #ifdef CONFIG_KVM_GUEST | 1482 | #ifdef CONFIG_KVM_GUEST |
1478 | errorentry async_page_fault do_async_page_fault | 1483 | errorentry async_page_fault do_async_page_fault |
1479 | #endif | 1484 | #endif |
1480 | #ifdef CONFIG_X86_MCE | 1485 | #ifdef CONFIG_X86_MCE |
1481 | paranoidzeroentry machine_check *machine_check_vector(%rip) | 1486 | paranoidzeroentry machine_check *machine_check_vector(%rip) |
1482 | #endif | 1487 | #endif |
1483 | 1488 | ||
1484 | /* | 1489 | /* |
1485 | * "Paranoid" exit path from exception stack. | 1490 | * "Paranoid" exit path from exception stack. |
1486 | * Paranoid because this is used by NMIs and cannot take | 1491 | * Paranoid because this is used by NMIs and cannot take |
1487 | * any kernel state for granted. | 1492 | * any kernel state for granted. |
1488 | * We don't do kernel preemption checks here, because only | 1493 | * We don't do kernel preemption checks here, because only |
1489 | * NMI should be common and it does not enable IRQs and | 1494 | * NMI should be common and it does not enable IRQs and |
1490 | * cannot get reschedule ticks. | 1495 | * cannot get reschedule ticks. |
1491 | * | 1496 | * |
1492 | * "trace" is 0 for the NMI handler only, because irq-tracing | 1497 | * "trace" is 0 for the NMI handler only, because irq-tracing |
1493 | * is fundamentally NMI-unsafe. (we cannot change the soft and | 1498 | * is fundamentally NMI-unsafe. (we cannot change the soft and |
1494 | * hard flags at once, atomically) | 1499 | * hard flags at once, atomically) |
1495 | */ | 1500 | */ |
1496 | 1501 | ||
1497 | /* ebx: no swapgs flag */ | 1502 | /* ebx: no swapgs flag */ |
1498 | ENTRY(paranoid_exit) | 1503 | ENTRY(paranoid_exit) |
1499 | DEFAULT_FRAME | 1504 | DEFAULT_FRAME |
1500 | DISABLE_INTERRUPTS(CLBR_NONE) | 1505 | DISABLE_INTERRUPTS(CLBR_NONE) |
1501 | TRACE_IRQS_OFF_DEBUG | 1506 | TRACE_IRQS_OFF_DEBUG |
1502 | testl %ebx,%ebx /* swapgs needed? */ | 1507 | testl %ebx,%ebx /* swapgs needed? */ |
1503 | jnz paranoid_restore | 1508 | jnz paranoid_restore |
1504 | testl $3,CS(%rsp) | 1509 | testl $3,CS(%rsp) |
1505 | jnz paranoid_userspace | 1510 | jnz paranoid_userspace |
1506 | paranoid_swapgs: | 1511 | paranoid_swapgs: |
1507 | TRACE_IRQS_IRETQ 0 | 1512 | TRACE_IRQS_IRETQ 0 |
1508 | SWAPGS_UNSAFE_STACK | 1513 | SWAPGS_UNSAFE_STACK |
1509 | RESTORE_ALL 8 | 1514 | RESTORE_ALL 8 |
1510 | jmp irq_return | 1515 | jmp irq_return |
1511 | paranoid_restore: | 1516 | paranoid_restore: |
1512 | TRACE_IRQS_IRETQ_DEBUG 0 | 1517 | TRACE_IRQS_IRETQ_DEBUG 0 |
1513 | RESTORE_ALL 8 | 1518 | RESTORE_ALL 8 |
1514 | jmp irq_return | 1519 | jmp irq_return |
1515 | paranoid_userspace: | 1520 | paranoid_userspace: |
1516 | GET_THREAD_INFO(%rcx) | 1521 | GET_THREAD_INFO(%rcx) |
1517 | movl TI_flags(%rcx),%ebx | 1522 | movl TI_flags(%rcx),%ebx |
1518 | andl $_TIF_WORK_MASK,%ebx | 1523 | andl $_TIF_WORK_MASK,%ebx |
1519 | jz paranoid_swapgs | 1524 | jz paranoid_swapgs |
1520 | movq %rsp,%rdi /* &pt_regs */ | 1525 | movq %rsp,%rdi /* &pt_regs */ |
1521 | call sync_regs | 1526 | call sync_regs |
1522 | movq %rax,%rsp /* switch stack for scheduling */ | 1527 | movq %rax,%rsp /* switch stack for scheduling */ |
1523 | testl $_TIF_NEED_RESCHED,%ebx | 1528 | testl $_TIF_NEED_RESCHED,%ebx |
1524 | jnz paranoid_schedule | 1529 | jnz paranoid_schedule |
1525 | movl %ebx,%edx /* arg3: thread flags */ | 1530 | movl %ebx,%edx /* arg3: thread flags */ |
1526 | TRACE_IRQS_ON | 1531 | TRACE_IRQS_ON |
1527 | ENABLE_INTERRUPTS(CLBR_NONE) | 1532 | ENABLE_INTERRUPTS(CLBR_NONE) |
1528 | xorl %esi,%esi /* arg2: oldset */ | 1533 | xorl %esi,%esi /* arg2: oldset */ |
1529 | movq %rsp,%rdi /* arg1: &pt_regs */ | 1534 | movq %rsp,%rdi /* arg1: &pt_regs */ |
1530 | call do_notify_resume | 1535 | call do_notify_resume |
1531 | DISABLE_INTERRUPTS(CLBR_NONE) | 1536 | DISABLE_INTERRUPTS(CLBR_NONE) |
1532 | TRACE_IRQS_OFF | 1537 | TRACE_IRQS_OFF |
1533 | jmp paranoid_userspace | 1538 | jmp paranoid_userspace |
1534 | paranoid_schedule: | 1539 | paranoid_schedule: |
1535 | TRACE_IRQS_ON | 1540 | TRACE_IRQS_ON |
1536 | ENABLE_INTERRUPTS(CLBR_ANY) | 1541 | ENABLE_INTERRUPTS(CLBR_ANY) |
1537 | SCHEDULE_USER | 1542 | SCHEDULE_USER |
1538 | DISABLE_INTERRUPTS(CLBR_ANY) | 1543 | DISABLE_INTERRUPTS(CLBR_ANY) |
1539 | TRACE_IRQS_OFF | 1544 | TRACE_IRQS_OFF |
1540 | jmp paranoid_userspace | 1545 | jmp paranoid_userspace |
1541 | CFI_ENDPROC | 1546 | CFI_ENDPROC |
1542 | END(paranoid_exit) | 1547 | END(paranoid_exit) |
1543 | 1548 | ||
1544 | /* | 1549 | /* |
1545 | * Exception entry point. This expects an error code/orig_rax on the stack. | 1550 | * Exception entry point. This expects an error code/orig_rax on the stack. |
1546 | * returns in "no swapgs flag" in %ebx. | 1551 | * returns in "no swapgs flag" in %ebx. |
1547 | */ | 1552 | */ |
1548 | ENTRY(error_entry) | 1553 | ENTRY(error_entry) |
1549 | XCPT_FRAME | 1554 | XCPT_FRAME |
1550 | CFI_ADJUST_CFA_OFFSET 15*8 | 1555 | CFI_ADJUST_CFA_OFFSET 15*8 |
1551 | /* oldrax contains error code */ | 1556 | /* oldrax contains error code */ |
1552 | cld | 1557 | cld |
1553 | movq_cfi rdi, RDI+8 | 1558 | movq_cfi rdi, RDI+8 |
1554 | movq_cfi rsi, RSI+8 | 1559 | movq_cfi rsi, RSI+8 |
1555 | movq_cfi rdx, RDX+8 | 1560 | movq_cfi rdx, RDX+8 |
1556 | movq_cfi rcx, RCX+8 | 1561 | movq_cfi rcx, RCX+8 |
1557 | movq_cfi rax, RAX+8 | 1562 | movq_cfi rax, RAX+8 |
1558 | movq_cfi r8, R8+8 | 1563 | movq_cfi r8, R8+8 |
1559 | movq_cfi r9, R9+8 | 1564 | movq_cfi r9, R9+8 |
1560 | movq_cfi r10, R10+8 | 1565 | movq_cfi r10, R10+8 |
1561 | movq_cfi r11, R11+8 | 1566 | movq_cfi r11, R11+8 |
1562 | movq_cfi rbx, RBX+8 | 1567 | movq_cfi rbx, RBX+8 |
1563 | movq_cfi rbp, RBP+8 | 1568 | movq_cfi rbp, RBP+8 |
1564 | movq_cfi r12, R12+8 | 1569 | movq_cfi r12, R12+8 |
1565 | movq_cfi r13, R13+8 | 1570 | movq_cfi r13, R13+8 |
1566 | movq_cfi r14, R14+8 | 1571 | movq_cfi r14, R14+8 |
1567 | movq_cfi r15, R15+8 | 1572 | movq_cfi r15, R15+8 |
1568 | xorl %ebx,%ebx | 1573 | xorl %ebx,%ebx |
1569 | testl $3,CS+8(%rsp) | 1574 | testl $3,CS+8(%rsp) |
1570 | je error_kernelspace | 1575 | je error_kernelspace |
1571 | error_swapgs: | 1576 | error_swapgs: |
1572 | SWAPGS | 1577 | SWAPGS |
1573 | error_sti: | 1578 | error_sti: |
1574 | TRACE_IRQS_OFF | 1579 | TRACE_IRQS_OFF |
1575 | ret | 1580 | ret |
1576 | 1581 | ||
1577 | /* | 1582 | /* |
1578 | * There are two places in the kernel that can potentially fault with | 1583 | * There are two places in the kernel that can potentially fault with |
1579 | * usergs. Handle them here. The exception handlers after iret run with | 1584 | * usergs. Handle them here. The exception handlers after iret run with |
1580 | * kernel gs again, so don't set the user space flag. B stepping K8s | 1585 | * kernel gs again, so don't set the user space flag. B stepping K8s |
1581 | * sometimes report an truncated RIP for IRET exceptions returning to | 1586 | * sometimes report an truncated RIP for IRET exceptions returning to |
1582 | * compat mode. Check for these here too. | 1587 | * compat mode. Check for these here too. |
1583 | */ | 1588 | */ |
1584 | error_kernelspace: | 1589 | error_kernelspace: |
1585 | incl %ebx | 1590 | incl %ebx |
1586 | leaq irq_return(%rip),%rcx | 1591 | leaq irq_return(%rip),%rcx |
1587 | cmpq %rcx,RIP+8(%rsp) | 1592 | cmpq %rcx,RIP+8(%rsp) |
1588 | je error_swapgs | 1593 | je error_swapgs |
1589 | movl %ecx,%eax /* zero extend */ | 1594 | movl %ecx,%eax /* zero extend */ |
1590 | cmpq %rax,RIP+8(%rsp) | 1595 | cmpq %rax,RIP+8(%rsp) |
1591 | je bstep_iret | 1596 | je bstep_iret |
1592 | cmpq $gs_change,RIP+8(%rsp) | 1597 | cmpq $gs_change,RIP+8(%rsp) |
1593 | je error_swapgs | 1598 | je error_swapgs |
1594 | jmp error_sti | 1599 | jmp error_sti |
1595 | 1600 | ||
1596 | bstep_iret: | 1601 | bstep_iret: |
1597 | /* Fix truncated RIP */ | 1602 | /* Fix truncated RIP */ |
1598 | movq %rcx,RIP+8(%rsp) | 1603 | movq %rcx,RIP+8(%rsp) |
1599 | jmp error_swapgs | 1604 | jmp error_swapgs |
1600 | CFI_ENDPROC | 1605 | CFI_ENDPROC |
1601 | END(error_entry) | 1606 | END(error_entry) |
1602 | 1607 | ||
1603 | 1608 | ||
1604 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | 1609 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ |
1605 | ENTRY(error_exit) | 1610 | ENTRY(error_exit) |
1606 | DEFAULT_FRAME | 1611 | DEFAULT_FRAME |
1607 | movl %ebx,%eax | 1612 | movl %ebx,%eax |
1608 | RESTORE_REST | 1613 | RESTORE_REST |
1609 | DISABLE_INTERRUPTS(CLBR_NONE) | 1614 | DISABLE_INTERRUPTS(CLBR_NONE) |
1610 | TRACE_IRQS_OFF | 1615 | TRACE_IRQS_OFF |
1611 | GET_THREAD_INFO(%rcx) | 1616 | GET_THREAD_INFO(%rcx) |
1612 | testl %eax,%eax | 1617 | testl %eax,%eax |
1613 | jne retint_kernel | 1618 | jne retint_kernel |
1614 | LOCKDEP_SYS_EXIT_IRQ | 1619 | LOCKDEP_SYS_EXIT_IRQ |
1615 | movl TI_flags(%rcx),%edx | 1620 | movl TI_flags(%rcx),%edx |
1616 | movl $_TIF_WORK_MASK,%edi | 1621 | movl $_TIF_WORK_MASK,%edi |
1617 | andl %edi,%edx | 1622 | andl %edi,%edx |
1618 | jnz retint_careful | 1623 | jnz retint_careful |
1619 | jmp retint_swapgs | 1624 | jmp retint_swapgs |
1620 | CFI_ENDPROC | 1625 | CFI_ENDPROC |
1621 | END(error_exit) | 1626 | END(error_exit) |
1622 | 1627 | ||
1623 | /* | 1628 | /* |
1624 | * Test if a given stack is an NMI stack or not. | 1629 | * Test if a given stack is an NMI stack or not. |
1625 | */ | 1630 | */ |
1626 | .macro test_in_nmi reg stack nmi_ret normal_ret | 1631 | .macro test_in_nmi reg stack nmi_ret normal_ret |
1627 | cmpq %\reg, \stack | 1632 | cmpq %\reg, \stack |
1628 | ja \normal_ret | 1633 | ja \normal_ret |
1629 | subq $EXCEPTION_STKSZ, %\reg | 1634 | subq $EXCEPTION_STKSZ, %\reg |
1630 | cmpq %\reg, \stack | 1635 | cmpq %\reg, \stack |
1631 | jb \normal_ret | 1636 | jb \normal_ret |
1632 | jmp \nmi_ret | 1637 | jmp \nmi_ret |
1633 | .endm | 1638 | .endm |
1634 | 1639 | ||
1635 | /* runs on exception stack */ | 1640 | /* runs on exception stack */ |
1636 | ENTRY(nmi) | 1641 | ENTRY(nmi) |
1637 | INTR_FRAME | 1642 | INTR_FRAME |
1638 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1643 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1639 | /* | 1644 | /* |
1640 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | 1645 | * We allow breakpoints in NMIs. If a breakpoint occurs, then |
1641 | * the iretq it performs will take us out of NMI context. | 1646 | * the iretq it performs will take us out of NMI context. |
1642 | * This means that we can have nested NMIs where the next | 1647 | * This means that we can have nested NMIs where the next |
1643 | * NMI is using the top of the stack of the previous NMI. We | 1648 | * NMI is using the top of the stack of the previous NMI. We |
1644 | * can't let it execute because the nested NMI will corrupt the | 1649 | * can't let it execute because the nested NMI will corrupt the |
1645 | * stack of the previous NMI. NMI handlers are not re-entrant | 1650 | * stack of the previous NMI. NMI handlers are not re-entrant |
1646 | * anyway. | 1651 | * anyway. |
1647 | * | 1652 | * |
1648 | * To handle this case we do the following: | 1653 | * To handle this case we do the following: |
1649 | * Check the a special location on the stack that contains | 1654 | * Check the a special location on the stack that contains |
1650 | * a variable that is set when NMIs are executing. | 1655 | * a variable that is set when NMIs are executing. |
1651 | * The interrupted task's stack is also checked to see if it | 1656 | * The interrupted task's stack is also checked to see if it |
1652 | * is an NMI stack. | 1657 | * is an NMI stack. |
1653 | * If the variable is not set and the stack is not the NMI | 1658 | * If the variable is not set and the stack is not the NMI |
1654 | * stack then: | 1659 | * stack then: |
1655 | * o Set the special variable on the stack | 1660 | * o Set the special variable on the stack |
1656 | * o Copy the interrupt frame into a "saved" location on the stack | 1661 | * o Copy the interrupt frame into a "saved" location on the stack |
1657 | * o Copy the interrupt frame into a "copy" location on the stack | 1662 | * o Copy the interrupt frame into a "copy" location on the stack |
1658 | * o Continue processing the NMI | 1663 | * o Continue processing the NMI |
1659 | * If the variable is set or the previous stack is the NMI stack: | 1664 | * If the variable is set or the previous stack is the NMI stack: |
1660 | * o Modify the "copy" location to jump to the repeate_nmi | 1665 | * o Modify the "copy" location to jump to the repeate_nmi |
1661 | * o return back to the first NMI | 1666 | * o return back to the first NMI |
1662 | * | 1667 | * |
1663 | * Now on exit of the first NMI, we first clear the stack variable | 1668 | * Now on exit of the first NMI, we first clear the stack variable |
1664 | * The NMI stack will tell any nested NMIs at that point that it is | 1669 | * The NMI stack will tell any nested NMIs at that point that it is |
1665 | * nested. Then we pop the stack normally with iret, and if there was | 1670 | * nested. Then we pop the stack normally with iret, and if there was |
1666 | * a nested NMI that updated the copy interrupt stack frame, a | 1671 | * a nested NMI that updated the copy interrupt stack frame, a |
1667 | * jump will be made to the repeat_nmi code that will handle the second | 1672 | * jump will be made to the repeat_nmi code that will handle the second |
1668 | * NMI. | 1673 | * NMI. |
1669 | */ | 1674 | */ |
1670 | 1675 | ||
1671 | /* Use %rdx as out temp variable throughout */ | 1676 | /* Use %rdx as out temp variable throughout */ |
1672 | pushq_cfi %rdx | 1677 | pushq_cfi %rdx |
1673 | CFI_REL_OFFSET rdx, 0 | 1678 | CFI_REL_OFFSET rdx, 0 |
1674 | 1679 | ||
1675 | /* | 1680 | /* |
1676 | * If %cs was not the kernel segment, then the NMI triggered in user | 1681 | * If %cs was not the kernel segment, then the NMI triggered in user |
1677 | * space, which means it is definitely not nested. | 1682 | * space, which means it is definitely not nested. |
1678 | */ | 1683 | */ |
1679 | cmpl $__KERNEL_CS, 16(%rsp) | 1684 | cmpl $__KERNEL_CS, 16(%rsp) |
1680 | jne first_nmi | 1685 | jne first_nmi |
1681 | 1686 | ||
1682 | /* | 1687 | /* |
1683 | * Check the special variable on the stack to see if NMIs are | 1688 | * Check the special variable on the stack to see if NMIs are |
1684 | * executing. | 1689 | * executing. |
1685 | */ | 1690 | */ |
1686 | cmpl $1, -8(%rsp) | 1691 | cmpl $1, -8(%rsp) |
1687 | je nested_nmi | 1692 | je nested_nmi |
1688 | 1693 | ||
1689 | /* | 1694 | /* |
1690 | * Now test if the previous stack was an NMI stack. | 1695 | * Now test if the previous stack was an NMI stack. |
1691 | * We need the double check. We check the NMI stack to satisfy the | 1696 | * We need the double check. We check the NMI stack to satisfy the |
1692 | * race when the first NMI clears the variable before returning. | 1697 | * race when the first NMI clears the variable before returning. |
1693 | * We check the variable because the first NMI could be in a | 1698 | * We check the variable because the first NMI could be in a |
1694 | * breakpoint routine using a breakpoint stack. | 1699 | * breakpoint routine using a breakpoint stack. |
1695 | */ | 1700 | */ |
1696 | lea 6*8(%rsp), %rdx | 1701 | lea 6*8(%rsp), %rdx |
1697 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | 1702 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi |
1698 | CFI_REMEMBER_STATE | 1703 | CFI_REMEMBER_STATE |
1699 | 1704 | ||
1700 | nested_nmi: | 1705 | nested_nmi: |
1701 | /* | 1706 | /* |
1702 | * Do nothing if we interrupted the fixup in repeat_nmi. | 1707 | * Do nothing if we interrupted the fixup in repeat_nmi. |
1703 | * It's about to repeat the NMI handler, so we are fine | 1708 | * It's about to repeat the NMI handler, so we are fine |
1704 | * with ignoring this one. | 1709 | * with ignoring this one. |
1705 | */ | 1710 | */ |
1706 | movq $repeat_nmi, %rdx | 1711 | movq $repeat_nmi, %rdx |
1707 | cmpq 8(%rsp), %rdx | 1712 | cmpq 8(%rsp), %rdx |
1708 | ja 1f | 1713 | ja 1f |
1709 | movq $end_repeat_nmi, %rdx | 1714 | movq $end_repeat_nmi, %rdx |
1710 | cmpq 8(%rsp), %rdx | 1715 | cmpq 8(%rsp), %rdx |
1711 | ja nested_nmi_out | 1716 | ja nested_nmi_out |
1712 | 1717 | ||
1713 | 1: | 1718 | 1: |
1714 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | 1719 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ |
1715 | leaq -1*8(%rsp), %rdx | 1720 | leaq -1*8(%rsp), %rdx |
1716 | movq %rdx, %rsp | 1721 | movq %rdx, %rsp |
1717 | CFI_ADJUST_CFA_OFFSET 1*8 | 1722 | CFI_ADJUST_CFA_OFFSET 1*8 |
1718 | leaq -10*8(%rsp), %rdx | 1723 | leaq -10*8(%rsp), %rdx |
1719 | pushq_cfi $__KERNEL_DS | 1724 | pushq_cfi $__KERNEL_DS |
1720 | pushq_cfi %rdx | 1725 | pushq_cfi %rdx |
1721 | pushfq_cfi | 1726 | pushfq_cfi |
1722 | pushq_cfi $__KERNEL_CS | 1727 | pushq_cfi $__KERNEL_CS |
1723 | pushq_cfi $repeat_nmi | 1728 | pushq_cfi $repeat_nmi |
1724 | 1729 | ||
1725 | /* Put stack back */ | 1730 | /* Put stack back */ |
1726 | addq $(6*8), %rsp | 1731 | addq $(6*8), %rsp |
1727 | CFI_ADJUST_CFA_OFFSET -6*8 | 1732 | CFI_ADJUST_CFA_OFFSET -6*8 |
1728 | 1733 | ||
1729 | nested_nmi_out: | 1734 | nested_nmi_out: |
1730 | popq_cfi %rdx | 1735 | popq_cfi %rdx |
1731 | CFI_RESTORE rdx | 1736 | CFI_RESTORE rdx |
1732 | 1737 | ||
1733 | /* No need to check faults here */ | 1738 | /* No need to check faults here */ |
1734 | INTERRUPT_RETURN | 1739 | INTERRUPT_RETURN |
1735 | 1740 | ||
1736 | CFI_RESTORE_STATE | 1741 | CFI_RESTORE_STATE |
1737 | first_nmi: | 1742 | first_nmi: |
1738 | /* | 1743 | /* |
1739 | * Because nested NMIs will use the pushed location that we | 1744 | * Because nested NMIs will use the pushed location that we |
1740 | * stored in rdx, we must keep that space available. | 1745 | * stored in rdx, we must keep that space available. |
1741 | * Here's what our stack frame will look like: | 1746 | * Here's what our stack frame will look like: |
1742 | * +-------------------------+ | 1747 | * +-------------------------+ |
1743 | * | original SS | | 1748 | * | original SS | |
1744 | * | original Return RSP | | 1749 | * | original Return RSP | |
1745 | * | original RFLAGS | | 1750 | * | original RFLAGS | |
1746 | * | original CS | | 1751 | * | original CS | |
1747 | * | original RIP | | 1752 | * | original RIP | |
1748 | * +-------------------------+ | 1753 | * +-------------------------+ |
1749 | * | temp storage for rdx | | 1754 | * | temp storage for rdx | |
1750 | * +-------------------------+ | 1755 | * +-------------------------+ |
1751 | * | NMI executing variable | | 1756 | * | NMI executing variable | |
1752 | * +-------------------------+ | 1757 | * +-------------------------+ |
1753 | * | copied SS | | 1758 | * | copied SS | |
1754 | * | copied Return RSP | | 1759 | * | copied Return RSP | |
1755 | * | copied RFLAGS | | 1760 | * | copied RFLAGS | |
1756 | * | copied CS | | 1761 | * | copied CS | |
1757 | * | copied RIP | | 1762 | * | copied RIP | |
1758 | * +-------------------------+ | 1763 | * +-------------------------+ |
1759 | * | Saved SS | | 1764 | * | Saved SS | |
1760 | * | Saved Return RSP | | 1765 | * | Saved Return RSP | |
1761 | * | Saved RFLAGS | | 1766 | * | Saved RFLAGS | |
1762 | * | Saved CS | | 1767 | * | Saved CS | |
1763 | * | Saved RIP | | 1768 | * | Saved RIP | |
1764 | * +-------------------------+ | 1769 | * +-------------------------+ |
1765 | * | pt_regs | | 1770 | * | pt_regs | |
1766 | * +-------------------------+ | 1771 | * +-------------------------+ |
1767 | * | 1772 | * |
1768 | * The saved stack frame is used to fix up the copied stack frame | 1773 | * The saved stack frame is used to fix up the copied stack frame |
1769 | * that a nested NMI may change to make the interrupted NMI iret jump | 1774 | * that a nested NMI may change to make the interrupted NMI iret jump |
1770 | * to the repeat_nmi. The original stack frame and the temp storage | 1775 | * to the repeat_nmi. The original stack frame and the temp storage |
1771 | * is also used by nested NMIs and can not be trusted on exit. | 1776 | * is also used by nested NMIs and can not be trusted on exit. |
1772 | */ | 1777 | */ |
1773 | /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ | 1778 | /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ |
1774 | movq (%rsp), %rdx | 1779 | movq (%rsp), %rdx |
1775 | CFI_RESTORE rdx | 1780 | CFI_RESTORE rdx |
1776 | 1781 | ||
1777 | /* Set the NMI executing variable on the stack. */ | 1782 | /* Set the NMI executing variable on the stack. */ |
1778 | pushq_cfi $1 | 1783 | pushq_cfi $1 |
1779 | 1784 | ||
1780 | /* | 1785 | /* |
1781 | * Leave room for the "copied" frame | 1786 | * Leave room for the "copied" frame |
1782 | */ | 1787 | */ |
1783 | subq $(5*8), %rsp | 1788 | subq $(5*8), %rsp |
1784 | CFI_ADJUST_CFA_OFFSET 5*8 | 1789 | CFI_ADJUST_CFA_OFFSET 5*8 |
1785 | 1790 | ||
1786 | /* Copy the stack frame to the Saved frame */ | 1791 | /* Copy the stack frame to the Saved frame */ |
1787 | .rept 5 | 1792 | .rept 5 |
1788 | pushq_cfi 11*8(%rsp) | 1793 | pushq_cfi 11*8(%rsp) |
1789 | .endr | 1794 | .endr |
1790 | CFI_DEF_CFA_OFFSET SS+8-RIP | 1795 | CFI_DEF_CFA_OFFSET SS+8-RIP |
1791 | 1796 | ||
1792 | /* Everything up to here is safe from nested NMIs */ | 1797 | /* Everything up to here is safe from nested NMIs */ |
1793 | 1798 | ||
1794 | /* | 1799 | /* |
1795 | * If there was a nested NMI, the first NMI's iret will return | 1800 | * If there was a nested NMI, the first NMI's iret will return |
1796 | * here. But NMIs are still enabled and we can take another | 1801 | * here. But NMIs are still enabled and we can take another |
1797 | * nested NMI. The nested NMI checks the interrupted RIP to see | 1802 | * nested NMI. The nested NMI checks the interrupted RIP to see |
1798 | * if it is between repeat_nmi and end_repeat_nmi, and if so | 1803 | * if it is between repeat_nmi and end_repeat_nmi, and if so |
1799 | * it will just return, as we are about to repeat an NMI anyway. | 1804 | * it will just return, as we are about to repeat an NMI anyway. |
1800 | * This makes it safe to copy to the stack frame that a nested | 1805 | * This makes it safe to copy to the stack frame that a nested |
1801 | * NMI will update. | 1806 | * NMI will update. |
1802 | */ | 1807 | */ |
1803 | repeat_nmi: | 1808 | repeat_nmi: |
1804 | /* | 1809 | /* |
1805 | * Update the stack variable to say we are still in NMI (the update | 1810 | * Update the stack variable to say we are still in NMI (the update |
1806 | * is benign for the non-repeat case, where 1 was pushed just above | 1811 | * is benign for the non-repeat case, where 1 was pushed just above |
1807 | * to this very stack slot). | 1812 | * to this very stack slot). |
1808 | */ | 1813 | */ |
1809 | movq $1, 10*8(%rsp) | 1814 | movq $1, 10*8(%rsp) |
1810 | 1815 | ||
1811 | /* Make another copy, this one may be modified by nested NMIs */ | 1816 | /* Make another copy, this one may be modified by nested NMIs */ |
1812 | addq $(10*8), %rsp | 1817 | addq $(10*8), %rsp |
1813 | CFI_ADJUST_CFA_OFFSET -10*8 | 1818 | CFI_ADJUST_CFA_OFFSET -10*8 |
1814 | .rept 5 | 1819 | .rept 5 |
1815 | pushq_cfi -6*8(%rsp) | 1820 | pushq_cfi -6*8(%rsp) |
1816 | .endr | 1821 | .endr |
1817 | subq $(5*8), %rsp | 1822 | subq $(5*8), %rsp |
1818 | CFI_DEF_CFA_OFFSET SS+8-RIP | 1823 | CFI_DEF_CFA_OFFSET SS+8-RIP |
1819 | end_repeat_nmi: | 1824 | end_repeat_nmi: |
1820 | 1825 | ||
1821 | /* | 1826 | /* |
1822 | * Everything below this point can be preempted by a nested | 1827 | * Everything below this point can be preempted by a nested |
1823 | * NMI if the first NMI took an exception and reset our iret stack | 1828 | * NMI if the first NMI took an exception and reset our iret stack |
1824 | * so that we repeat another NMI. | 1829 | * so that we repeat another NMI. |
1825 | */ | 1830 | */ |
1826 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1831 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1827 | subq $ORIG_RAX-R15, %rsp | 1832 | subq $ORIG_RAX-R15, %rsp |
1828 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1833 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1829 | /* | 1834 | /* |
1830 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit | 1835 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit |
1831 | * as we should not be calling schedule in NMI context. | 1836 | * as we should not be calling schedule in NMI context. |
1832 | * Even with normal interrupts enabled. An NMI should not be | 1837 | * Even with normal interrupts enabled. An NMI should not be |
1833 | * setting NEED_RESCHED or anything that normal interrupts and | 1838 | * setting NEED_RESCHED or anything that normal interrupts and |
1834 | * exceptions might do. | 1839 | * exceptions might do. |
1835 | */ | 1840 | */ |
1836 | call save_paranoid | 1841 | call save_paranoid |
1837 | DEFAULT_FRAME 0 | 1842 | DEFAULT_FRAME 0 |
1838 | 1843 | ||
1839 | /* | 1844 | /* |
1840 | * Save off the CR2 register. If we take a page fault in the NMI then | 1845 | * Save off the CR2 register. If we take a page fault in the NMI then |
1841 | * it could corrupt the CR2 value. If the NMI preempts a page fault | 1846 | * it could corrupt the CR2 value. If the NMI preempts a page fault |
1842 | * handler before it was able to read the CR2 register, and then the | 1847 | * handler before it was able to read the CR2 register, and then the |
1843 | * NMI itself takes a page fault, the page fault that was preempted | 1848 | * NMI itself takes a page fault, the page fault that was preempted |
1844 | * will read the information from the NMI page fault and not the | 1849 | * will read the information from the NMI page fault and not the |
1845 | * origin fault. Save it off and restore it if it changes. | 1850 | * origin fault. Save it off and restore it if it changes. |
1846 | * Use the r12 callee-saved register. | 1851 | * Use the r12 callee-saved register. |
1847 | */ | 1852 | */ |
1848 | movq %cr2, %r12 | 1853 | movq %cr2, %r12 |
1849 | 1854 | ||
1850 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1855 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1851 | movq %rsp,%rdi | 1856 | movq %rsp,%rdi |
1852 | movq $-1,%rsi | 1857 | movq $-1,%rsi |
1853 | call do_nmi | 1858 | call do_nmi |
1854 | 1859 | ||
1855 | /* Did the NMI take a page fault? Restore cr2 if it did */ | 1860 | /* Did the NMI take a page fault? Restore cr2 if it did */ |
1856 | movq %cr2, %rcx | 1861 | movq %cr2, %rcx |
1857 | cmpq %rcx, %r12 | 1862 | cmpq %rcx, %r12 |
1858 | je 1f | 1863 | je 1f |
1859 | movq %r12, %cr2 | 1864 | movq %r12, %cr2 |
1860 | 1: | 1865 | 1: |
1861 | 1866 | ||
1862 | testl %ebx,%ebx /* swapgs needed? */ | 1867 | testl %ebx,%ebx /* swapgs needed? */ |
1863 | jnz nmi_restore | 1868 | jnz nmi_restore |
1864 | nmi_swapgs: | 1869 | nmi_swapgs: |
1865 | SWAPGS_UNSAFE_STACK | 1870 | SWAPGS_UNSAFE_STACK |
1866 | nmi_restore: | 1871 | nmi_restore: |
1867 | /* Pop the extra iret frame at once */ | 1872 | /* Pop the extra iret frame at once */ |
1868 | RESTORE_ALL 6*8 | 1873 | RESTORE_ALL 6*8 |
1869 | 1874 | ||
1870 | /* Clear the NMI executing stack variable */ | 1875 | /* Clear the NMI executing stack variable */ |
1871 | movq $0, 5*8(%rsp) | 1876 | movq $0, 5*8(%rsp) |
1872 | jmp irq_return | 1877 | jmp irq_return |
1873 | CFI_ENDPROC | 1878 | CFI_ENDPROC |
1874 | END(nmi) | 1879 | END(nmi) |
1875 | 1880 | ||
1876 | ENTRY(ignore_sysret) | 1881 | ENTRY(ignore_sysret) |
1877 | CFI_STARTPROC | 1882 | CFI_STARTPROC |
1878 | mov $-ENOSYS,%eax | 1883 | mov $-ENOSYS,%eax |
1879 | sysret | 1884 | sysret |
1880 | CFI_ENDPROC | 1885 | CFI_ENDPROC |
1881 | END(ignore_sysret) | 1886 | END(ignore_sysret) |
1882 | 1887 | ||
1883 | /* | 1888 | /* |
1884 | * End of kprobes section | 1889 | * End of kprobes section |
1885 | */ | 1890 | */ |
1886 | .popsection | 1891 | .popsection |
1887 | 1892 |
drivers/xen/events.c
1 | /* | 1 | /* |
2 | * Xen event channels | 2 | * Xen event channels |
3 | * | 3 | * |
4 | * Xen models interrupts with abstract event channels. Because each | 4 | * Xen models interrupts with abstract event channels. Because each |
5 | * domain gets 1024 event channels, but NR_IRQ is not that large, we | 5 | * domain gets 1024 event channels, but NR_IRQ is not that large, we |
6 | * must dynamically map irqs<->event channels. The event channels | 6 | * must dynamically map irqs<->event channels. The event channels |
7 | * interface with the rest of the kernel by defining a xen interrupt | 7 | * interface with the rest of the kernel by defining a xen interrupt |
8 | * chip. When an event is received, it is mapped to an irq and sent | 8 | * chip. When an event is received, it is mapped to an irq and sent |
9 | * through the normal interrupt processing path. | 9 | * through the normal interrupt processing path. |
10 | * | 10 | * |
11 | * There are four kinds of events which can be mapped to an event | 11 | * There are four kinds of events which can be mapped to an event |
12 | * channel: | 12 | * channel: |
13 | * | 13 | * |
14 | * 1. Inter-domain notifications. This includes all the virtual | 14 | * 1. Inter-domain notifications. This includes all the virtual |
15 | * device events, since they're driven by front-ends in another domain | 15 | * device events, since they're driven by front-ends in another domain |
16 | * (typically dom0). | 16 | * (typically dom0). |
17 | * 2. VIRQs, typically used for timers. These are per-cpu events. | 17 | * 2. VIRQs, typically used for timers. These are per-cpu events. |
18 | * 3. IPIs. | 18 | * 3. IPIs. |
19 | * 4. PIRQs - Hardware interrupts. | 19 | * 4. PIRQs - Hardware interrupts. |
20 | * | 20 | * |
21 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | 21 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/linkage.h> | 24 | #include <linux/linkage.h> |
25 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
26 | #include <linux/irq.h> | 26 | #include <linux/irq.h> |
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/string.h> | 28 | #include <linux/string.h> |
29 | #include <linux/bootmem.h> | 29 | #include <linux/bootmem.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/irqnr.h> | 31 | #include <linux/irqnr.h> |
32 | #include <linux/pci.h> | 32 | #include <linux/pci.h> |
33 | 33 | ||
34 | #ifdef CONFIG_X86 | 34 | #ifdef CONFIG_X86 |
35 | #include <asm/desc.h> | 35 | #include <asm/desc.h> |
36 | #include <asm/ptrace.h> | 36 | #include <asm/ptrace.h> |
37 | #include <asm/irq.h> | 37 | #include <asm/irq.h> |
38 | #include <asm/idle.h> | 38 | #include <asm/idle.h> |
39 | #include <asm/io_apic.h> | 39 | #include <asm/io_apic.h> |
40 | #include <asm/xen/page.h> | 40 | #include <asm/xen/page.h> |
41 | #include <asm/xen/pci.h> | 41 | #include <asm/xen/pci.h> |
42 | #endif | 42 | #endif |
43 | #include <asm/sync_bitops.h> | 43 | #include <asm/sync_bitops.h> |
44 | #include <asm/xen/hypercall.h> | 44 | #include <asm/xen/hypercall.h> |
45 | #include <asm/xen/hypervisor.h> | 45 | #include <asm/xen/hypervisor.h> |
46 | 46 | ||
47 | #include <xen/xen.h> | 47 | #include <xen/xen.h> |
48 | #include <xen/hvm.h> | 48 | #include <xen/hvm.h> |
49 | #include <xen/xen-ops.h> | 49 | #include <xen/xen-ops.h> |
50 | #include <xen/events.h> | 50 | #include <xen/events.h> |
51 | #include <xen/interface/xen.h> | 51 | #include <xen/interface/xen.h> |
52 | #include <xen/interface/event_channel.h> | 52 | #include <xen/interface/event_channel.h> |
53 | #include <xen/interface/hvm/hvm_op.h> | 53 | #include <xen/interface/hvm/hvm_op.h> |
54 | #include <xen/interface/hvm/params.h> | 54 | #include <xen/interface/hvm/params.h> |
55 | #include <xen/interface/physdev.h> | 55 | #include <xen/interface/physdev.h> |
56 | #include <xen/interface/sched.h> | 56 | #include <xen/interface/sched.h> |
57 | #include <asm/hw_irq.h> | 57 | #include <asm/hw_irq.h> |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * This lock protects updates to the following mapping and reference-count | 60 | * This lock protects updates to the following mapping and reference-count |
61 | * arrays. The lock does not need to be acquired to read the mapping tables. | 61 | * arrays. The lock does not need to be acquired to read the mapping tables. |
62 | */ | 62 | */ |
63 | static DEFINE_MUTEX(irq_mapping_update_lock); | 63 | static DEFINE_MUTEX(irq_mapping_update_lock); |
64 | 64 | ||
65 | static LIST_HEAD(xen_irq_list_head); | 65 | static LIST_HEAD(xen_irq_list_head); |
66 | 66 | ||
67 | /* IRQ <-> VIRQ mapping. */ | 67 | /* IRQ <-> VIRQ mapping. */ |
68 | static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; | 68 | static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; |
69 | 69 | ||
70 | /* IRQ <-> IPI mapping */ | 70 | /* IRQ <-> IPI mapping */ |
71 | static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; | 71 | static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; |
72 | 72 | ||
73 | /* Interrupt types. */ | 73 | /* Interrupt types. */ |
74 | enum xen_irq_type { | 74 | enum xen_irq_type { |
75 | IRQT_UNBOUND = 0, | 75 | IRQT_UNBOUND = 0, |
76 | IRQT_PIRQ, | 76 | IRQT_PIRQ, |
77 | IRQT_VIRQ, | 77 | IRQT_VIRQ, |
78 | IRQT_IPI, | 78 | IRQT_IPI, |
79 | IRQT_EVTCHN | 79 | IRQT_EVTCHN |
80 | }; | 80 | }; |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Packed IRQ information: | 83 | * Packed IRQ information: |
84 | * type - enum xen_irq_type | 84 | * type - enum xen_irq_type |
85 | * event channel - irq->event channel mapping | 85 | * event channel - irq->event channel mapping |
86 | * cpu - cpu this event channel is bound to | 86 | * cpu - cpu this event channel is bound to |
87 | * index - type-specific information: | 87 | * index - type-specific information: |
88 | * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM | 88 | * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM |
89 | * guest, or GSI (real passthrough IRQ) of the device. | 89 | * guest, or GSI (real passthrough IRQ) of the device. |
90 | * VIRQ - virq number | 90 | * VIRQ - virq number |
91 | * IPI - IPI vector | 91 | * IPI - IPI vector |
92 | * EVTCHN - | 92 | * EVTCHN - |
93 | */ | 93 | */ |
94 | struct irq_info { | 94 | struct irq_info { |
95 | struct list_head list; | 95 | struct list_head list; |
96 | int refcnt; | 96 | int refcnt; |
97 | enum xen_irq_type type; /* type */ | 97 | enum xen_irq_type type; /* type */ |
98 | unsigned irq; | 98 | unsigned irq; |
99 | unsigned short evtchn; /* event channel */ | 99 | unsigned short evtchn; /* event channel */ |
100 | unsigned short cpu; /* cpu bound */ | 100 | unsigned short cpu; /* cpu bound */ |
101 | 101 | ||
102 | union { | 102 | union { |
103 | unsigned short virq; | 103 | unsigned short virq; |
104 | enum ipi_vector ipi; | 104 | enum ipi_vector ipi; |
105 | struct { | 105 | struct { |
106 | unsigned short pirq; | 106 | unsigned short pirq; |
107 | unsigned short gsi; | 107 | unsigned short gsi; |
108 | unsigned char vector; | 108 | unsigned char vector; |
109 | unsigned char flags; | 109 | unsigned char flags; |
110 | uint16_t domid; | 110 | uint16_t domid; |
111 | } pirq; | 111 | } pirq; |
112 | } u; | 112 | } u; |
113 | }; | 113 | }; |
114 | #define PIRQ_NEEDS_EOI (1 << 0) | 114 | #define PIRQ_NEEDS_EOI (1 << 0) |
115 | #define PIRQ_SHAREABLE (1 << 1) | 115 | #define PIRQ_SHAREABLE (1 << 1) |
116 | 116 | ||
117 | static int *evtchn_to_irq; | 117 | static int *evtchn_to_irq; |
118 | #ifdef CONFIG_X86 | 118 | #ifdef CONFIG_X86 |
119 | static unsigned long *pirq_eoi_map; | 119 | static unsigned long *pirq_eoi_map; |
120 | #endif | 120 | #endif |
121 | static bool (*pirq_needs_eoi)(unsigned irq); | 121 | static bool (*pirq_needs_eoi)(unsigned irq); |
122 | 122 | ||
123 | static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG], | 123 | static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG], |
124 | cpu_evtchn_mask); | 124 | cpu_evtchn_mask); |
125 | 125 | ||
126 | /* Xen will never allocate port zero for any purpose. */ | 126 | /* Xen will never allocate port zero for any purpose. */ |
127 | #define VALID_EVTCHN(chn) ((chn) != 0) | 127 | #define VALID_EVTCHN(chn) ((chn) != 0) |
128 | 128 | ||
129 | static struct irq_chip xen_dynamic_chip; | 129 | static struct irq_chip xen_dynamic_chip; |
130 | static struct irq_chip xen_percpu_chip; | 130 | static struct irq_chip xen_percpu_chip; |
131 | static struct irq_chip xen_pirq_chip; | 131 | static struct irq_chip xen_pirq_chip; |
132 | static void enable_dynirq(struct irq_data *data); | 132 | static void enable_dynirq(struct irq_data *data); |
133 | static void disable_dynirq(struct irq_data *data); | 133 | static void disable_dynirq(struct irq_data *data); |
134 | 134 | ||
135 | /* Get info for IRQ */ | 135 | /* Get info for IRQ */ |
136 | static struct irq_info *info_for_irq(unsigned irq) | 136 | static struct irq_info *info_for_irq(unsigned irq) |
137 | { | 137 | { |
138 | return irq_get_handler_data(irq); | 138 | return irq_get_handler_data(irq); |
139 | } | 139 | } |
140 | 140 | ||
141 | /* Constructors for packed IRQ information. */ | 141 | /* Constructors for packed IRQ information. */ |
142 | static void xen_irq_info_common_init(struct irq_info *info, | 142 | static void xen_irq_info_common_init(struct irq_info *info, |
143 | unsigned irq, | 143 | unsigned irq, |
144 | enum xen_irq_type type, | 144 | enum xen_irq_type type, |
145 | unsigned short evtchn, | 145 | unsigned short evtchn, |
146 | unsigned short cpu) | 146 | unsigned short cpu) |
147 | { | 147 | { |
148 | 148 | ||
149 | BUG_ON(info->type != IRQT_UNBOUND && info->type != type); | 149 | BUG_ON(info->type != IRQT_UNBOUND && info->type != type); |
150 | 150 | ||
151 | info->type = type; | 151 | info->type = type; |
152 | info->irq = irq; | 152 | info->irq = irq; |
153 | info->evtchn = evtchn; | 153 | info->evtchn = evtchn; |
154 | info->cpu = cpu; | 154 | info->cpu = cpu; |
155 | 155 | ||
156 | evtchn_to_irq[evtchn] = irq; | 156 | evtchn_to_irq[evtchn] = irq; |
157 | } | 157 | } |
158 | 158 | ||
159 | static void xen_irq_info_evtchn_init(unsigned irq, | 159 | static void xen_irq_info_evtchn_init(unsigned irq, |
160 | unsigned short evtchn) | 160 | unsigned short evtchn) |
161 | { | 161 | { |
162 | struct irq_info *info = info_for_irq(irq); | 162 | struct irq_info *info = info_for_irq(irq); |
163 | 163 | ||
164 | xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0); | 164 | xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0); |
165 | } | 165 | } |
166 | 166 | ||
167 | static void xen_irq_info_ipi_init(unsigned cpu, | 167 | static void xen_irq_info_ipi_init(unsigned cpu, |
168 | unsigned irq, | 168 | unsigned irq, |
169 | unsigned short evtchn, | 169 | unsigned short evtchn, |
170 | enum ipi_vector ipi) | 170 | enum ipi_vector ipi) |
171 | { | 171 | { |
172 | struct irq_info *info = info_for_irq(irq); | 172 | struct irq_info *info = info_for_irq(irq); |
173 | 173 | ||
174 | xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0); | 174 | xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0); |
175 | 175 | ||
176 | info->u.ipi = ipi; | 176 | info->u.ipi = ipi; |
177 | 177 | ||
178 | per_cpu(ipi_to_irq, cpu)[ipi] = irq; | 178 | per_cpu(ipi_to_irq, cpu)[ipi] = irq; |
179 | } | 179 | } |
180 | 180 | ||
181 | static void xen_irq_info_virq_init(unsigned cpu, | 181 | static void xen_irq_info_virq_init(unsigned cpu, |
182 | unsigned irq, | 182 | unsigned irq, |
183 | unsigned short evtchn, | 183 | unsigned short evtchn, |
184 | unsigned short virq) | 184 | unsigned short virq) |
185 | { | 185 | { |
186 | struct irq_info *info = info_for_irq(irq); | 186 | struct irq_info *info = info_for_irq(irq); |
187 | 187 | ||
188 | xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0); | 188 | xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0); |
189 | 189 | ||
190 | info->u.virq = virq; | 190 | info->u.virq = virq; |
191 | 191 | ||
192 | per_cpu(virq_to_irq, cpu)[virq] = irq; | 192 | per_cpu(virq_to_irq, cpu)[virq] = irq; |
193 | } | 193 | } |
194 | 194 | ||
195 | static void xen_irq_info_pirq_init(unsigned irq, | 195 | static void xen_irq_info_pirq_init(unsigned irq, |
196 | unsigned short evtchn, | 196 | unsigned short evtchn, |
197 | unsigned short pirq, | 197 | unsigned short pirq, |
198 | unsigned short gsi, | 198 | unsigned short gsi, |
199 | unsigned short vector, | 199 | unsigned short vector, |
200 | uint16_t domid, | 200 | uint16_t domid, |
201 | unsigned char flags) | 201 | unsigned char flags) |
202 | { | 202 | { |
203 | struct irq_info *info = info_for_irq(irq); | 203 | struct irq_info *info = info_for_irq(irq); |
204 | 204 | ||
205 | xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0); | 205 | xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0); |
206 | 206 | ||
207 | info->u.pirq.pirq = pirq; | 207 | info->u.pirq.pirq = pirq; |
208 | info->u.pirq.gsi = gsi; | 208 | info->u.pirq.gsi = gsi; |
209 | info->u.pirq.vector = vector; | 209 | info->u.pirq.vector = vector; |
210 | info->u.pirq.domid = domid; | 210 | info->u.pirq.domid = domid; |
211 | info->u.pirq.flags = flags; | 211 | info->u.pirq.flags = flags; |
212 | } | 212 | } |
213 | 213 | ||
214 | /* | 214 | /* |
215 | * Accessors for packed IRQ information. | 215 | * Accessors for packed IRQ information. |
216 | */ | 216 | */ |
217 | static unsigned int evtchn_from_irq(unsigned irq) | 217 | static unsigned int evtchn_from_irq(unsigned irq) |
218 | { | 218 | { |
219 | if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq))) | 219 | if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq))) |
220 | return 0; | 220 | return 0; |
221 | 221 | ||
222 | return info_for_irq(irq)->evtchn; | 222 | return info_for_irq(irq)->evtchn; |
223 | } | 223 | } |
224 | 224 | ||
225 | unsigned irq_from_evtchn(unsigned int evtchn) | 225 | unsigned irq_from_evtchn(unsigned int evtchn) |
226 | { | 226 | { |
227 | return evtchn_to_irq[evtchn]; | 227 | return evtchn_to_irq[evtchn]; |
228 | } | 228 | } |
229 | EXPORT_SYMBOL_GPL(irq_from_evtchn); | 229 | EXPORT_SYMBOL_GPL(irq_from_evtchn); |
230 | 230 | ||
231 | static enum ipi_vector ipi_from_irq(unsigned irq) | 231 | static enum ipi_vector ipi_from_irq(unsigned irq) |
232 | { | 232 | { |
233 | struct irq_info *info = info_for_irq(irq); | 233 | struct irq_info *info = info_for_irq(irq); |
234 | 234 | ||
235 | BUG_ON(info == NULL); | 235 | BUG_ON(info == NULL); |
236 | BUG_ON(info->type != IRQT_IPI); | 236 | BUG_ON(info->type != IRQT_IPI); |
237 | 237 | ||
238 | return info->u.ipi; | 238 | return info->u.ipi; |
239 | } | 239 | } |
240 | 240 | ||
241 | static unsigned virq_from_irq(unsigned irq) | 241 | static unsigned virq_from_irq(unsigned irq) |
242 | { | 242 | { |
243 | struct irq_info *info = info_for_irq(irq); | 243 | struct irq_info *info = info_for_irq(irq); |
244 | 244 | ||
245 | BUG_ON(info == NULL); | 245 | BUG_ON(info == NULL); |
246 | BUG_ON(info->type != IRQT_VIRQ); | 246 | BUG_ON(info->type != IRQT_VIRQ); |
247 | 247 | ||
248 | return info->u.virq; | 248 | return info->u.virq; |
249 | } | 249 | } |
250 | 250 | ||
251 | static unsigned pirq_from_irq(unsigned irq) | 251 | static unsigned pirq_from_irq(unsigned irq) |
252 | { | 252 | { |
253 | struct irq_info *info = info_for_irq(irq); | 253 | struct irq_info *info = info_for_irq(irq); |
254 | 254 | ||
255 | BUG_ON(info == NULL); | 255 | BUG_ON(info == NULL); |
256 | BUG_ON(info->type != IRQT_PIRQ); | 256 | BUG_ON(info->type != IRQT_PIRQ); |
257 | 257 | ||
258 | return info->u.pirq.pirq; | 258 | return info->u.pirq.pirq; |
259 | } | 259 | } |
260 | 260 | ||
261 | static enum xen_irq_type type_from_irq(unsigned irq) | 261 | static enum xen_irq_type type_from_irq(unsigned irq) |
262 | { | 262 | { |
263 | return info_for_irq(irq)->type; | 263 | return info_for_irq(irq)->type; |
264 | } | 264 | } |
265 | 265 | ||
266 | static unsigned cpu_from_irq(unsigned irq) | 266 | static unsigned cpu_from_irq(unsigned irq) |
267 | { | 267 | { |
268 | return info_for_irq(irq)->cpu; | 268 | return info_for_irq(irq)->cpu; |
269 | } | 269 | } |
270 | 270 | ||
271 | static unsigned int cpu_from_evtchn(unsigned int evtchn) | 271 | static unsigned int cpu_from_evtchn(unsigned int evtchn) |
272 | { | 272 | { |
273 | int irq = evtchn_to_irq[evtchn]; | 273 | int irq = evtchn_to_irq[evtchn]; |
274 | unsigned ret = 0; | 274 | unsigned ret = 0; |
275 | 275 | ||
276 | if (irq != -1) | 276 | if (irq != -1) |
277 | ret = cpu_from_irq(irq); | 277 | ret = cpu_from_irq(irq); |
278 | 278 | ||
279 | return ret; | 279 | return ret; |
280 | } | 280 | } |
281 | 281 | ||
282 | #ifdef CONFIG_X86 | 282 | #ifdef CONFIG_X86 |
283 | static bool pirq_check_eoi_map(unsigned irq) | 283 | static bool pirq_check_eoi_map(unsigned irq) |
284 | { | 284 | { |
285 | return test_bit(pirq_from_irq(irq), pirq_eoi_map); | 285 | return test_bit(pirq_from_irq(irq), pirq_eoi_map); |
286 | } | 286 | } |
287 | #endif | 287 | #endif |
288 | 288 | ||
289 | static bool pirq_needs_eoi_flag(unsigned irq) | 289 | static bool pirq_needs_eoi_flag(unsigned irq) |
290 | { | 290 | { |
291 | struct irq_info *info = info_for_irq(irq); | 291 | struct irq_info *info = info_for_irq(irq); |
292 | BUG_ON(info->type != IRQT_PIRQ); | 292 | BUG_ON(info->type != IRQT_PIRQ); |
293 | 293 | ||
294 | return info->u.pirq.flags & PIRQ_NEEDS_EOI; | 294 | return info->u.pirq.flags & PIRQ_NEEDS_EOI; |
295 | } | 295 | } |
296 | 296 | ||
297 | static inline unsigned long active_evtchns(unsigned int cpu, | 297 | static inline unsigned long active_evtchns(unsigned int cpu, |
298 | struct shared_info *sh, | 298 | struct shared_info *sh, |
299 | unsigned int idx) | 299 | unsigned int idx) |
300 | { | 300 | { |
301 | return sh->evtchn_pending[idx] & | 301 | return sh->evtchn_pending[idx] & |
302 | per_cpu(cpu_evtchn_mask, cpu)[idx] & | 302 | per_cpu(cpu_evtchn_mask, cpu)[idx] & |
303 | ~sh->evtchn_mask[idx]; | 303 | ~sh->evtchn_mask[idx]; |
304 | } | 304 | } |
305 | 305 | ||
306 | static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) | 306 | static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) |
307 | { | 307 | { |
308 | int irq = evtchn_to_irq[chn]; | 308 | int irq = evtchn_to_irq[chn]; |
309 | 309 | ||
310 | BUG_ON(irq == -1); | 310 | BUG_ON(irq == -1); |
311 | #ifdef CONFIG_SMP | 311 | #ifdef CONFIG_SMP |
312 | cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); | 312 | cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); |
313 | #endif | 313 | #endif |
314 | 314 | ||
315 | clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq))); | 315 | clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq))); |
316 | set_bit(chn, per_cpu(cpu_evtchn_mask, cpu)); | 316 | set_bit(chn, per_cpu(cpu_evtchn_mask, cpu)); |
317 | 317 | ||
318 | info_for_irq(irq)->cpu = cpu; | 318 | info_for_irq(irq)->cpu = cpu; |
319 | } | 319 | } |
320 | 320 | ||
321 | static void init_evtchn_cpu_bindings(void) | 321 | static void init_evtchn_cpu_bindings(void) |
322 | { | 322 | { |
323 | int i; | 323 | int i; |
324 | #ifdef CONFIG_SMP | 324 | #ifdef CONFIG_SMP |
325 | struct irq_info *info; | 325 | struct irq_info *info; |
326 | 326 | ||
327 | /* By default all event channels notify CPU#0. */ | 327 | /* By default all event channels notify CPU#0. */ |
328 | list_for_each_entry(info, &xen_irq_list_head, list) { | 328 | list_for_each_entry(info, &xen_irq_list_head, list) { |
329 | struct irq_desc *desc = irq_to_desc(info->irq); | 329 | struct irq_desc *desc = irq_to_desc(info->irq); |
330 | cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); | 330 | cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); |
331 | } | 331 | } |
332 | #endif | 332 | #endif |
333 | 333 | ||
334 | for_each_possible_cpu(i) | 334 | for_each_possible_cpu(i) |
335 | memset(per_cpu(cpu_evtchn_mask, i), | 335 | memset(per_cpu(cpu_evtchn_mask, i), |
336 | (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i))); | 336 | (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i))); |
337 | } | 337 | } |
338 | 338 | ||
339 | static inline void clear_evtchn(int port) | 339 | static inline void clear_evtchn(int port) |
340 | { | 340 | { |
341 | struct shared_info *s = HYPERVISOR_shared_info; | 341 | struct shared_info *s = HYPERVISOR_shared_info; |
342 | sync_clear_bit(port, &s->evtchn_pending[0]); | 342 | sync_clear_bit(port, &s->evtchn_pending[0]); |
343 | } | 343 | } |
344 | 344 | ||
345 | static inline void set_evtchn(int port) | 345 | static inline void set_evtchn(int port) |
346 | { | 346 | { |
347 | struct shared_info *s = HYPERVISOR_shared_info; | 347 | struct shared_info *s = HYPERVISOR_shared_info; |
348 | sync_set_bit(port, &s->evtchn_pending[0]); | 348 | sync_set_bit(port, &s->evtchn_pending[0]); |
349 | } | 349 | } |
350 | 350 | ||
351 | static inline int test_evtchn(int port) | 351 | static inline int test_evtchn(int port) |
352 | { | 352 | { |
353 | struct shared_info *s = HYPERVISOR_shared_info; | 353 | struct shared_info *s = HYPERVISOR_shared_info; |
354 | return sync_test_bit(port, &s->evtchn_pending[0]); | 354 | return sync_test_bit(port, &s->evtchn_pending[0]); |
355 | } | 355 | } |
356 | 356 | ||
357 | 357 | ||
358 | /** | 358 | /** |
359 | * notify_remote_via_irq - send event to remote end of event channel via irq | 359 | * notify_remote_via_irq - send event to remote end of event channel via irq |
360 | * @irq: irq of event channel to send event to | 360 | * @irq: irq of event channel to send event to |
361 | * | 361 | * |
362 | * Unlike notify_remote_via_evtchn(), this is safe to use across | 362 | * Unlike notify_remote_via_evtchn(), this is safe to use across |
363 | * save/restore. Notifications on a broken connection are silently | 363 | * save/restore. Notifications on a broken connection are silently |
364 | * dropped. | 364 | * dropped. |
365 | */ | 365 | */ |
366 | void notify_remote_via_irq(int irq) | 366 | void notify_remote_via_irq(int irq) |
367 | { | 367 | { |
368 | int evtchn = evtchn_from_irq(irq); | 368 | int evtchn = evtchn_from_irq(irq); |
369 | 369 | ||
370 | if (VALID_EVTCHN(evtchn)) | 370 | if (VALID_EVTCHN(evtchn)) |
371 | notify_remote_via_evtchn(evtchn); | 371 | notify_remote_via_evtchn(evtchn); |
372 | } | 372 | } |
373 | EXPORT_SYMBOL_GPL(notify_remote_via_irq); | 373 | EXPORT_SYMBOL_GPL(notify_remote_via_irq); |
374 | 374 | ||
375 | static void mask_evtchn(int port) | 375 | static void mask_evtchn(int port) |
376 | { | 376 | { |
377 | struct shared_info *s = HYPERVISOR_shared_info; | 377 | struct shared_info *s = HYPERVISOR_shared_info; |
378 | sync_set_bit(port, &s->evtchn_mask[0]); | 378 | sync_set_bit(port, &s->evtchn_mask[0]); |
379 | } | 379 | } |
380 | 380 | ||
381 | static void unmask_evtchn(int port) | 381 | static void unmask_evtchn(int port) |
382 | { | 382 | { |
383 | struct shared_info *s = HYPERVISOR_shared_info; | 383 | struct shared_info *s = HYPERVISOR_shared_info; |
384 | unsigned int cpu = get_cpu(); | 384 | unsigned int cpu = get_cpu(); |
385 | int do_hypercall = 0, evtchn_pending = 0; | 385 | int do_hypercall = 0, evtchn_pending = 0; |
386 | 386 | ||
387 | BUG_ON(!irqs_disabled()); | 387 | BUG_ON(!irqs_disabled()); |
388 | 388 | ||
389 | if (unlikely((cpu != cpu_from_evtchn(port)))) | 389 | if (unlikely((cpu != cpu_from_evtchn(port)))) |
390 | do_hypercall = 1; | 390 | do_hypercall = 1; |
391 | else | 391 | else |
392 | evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]); | 392 | evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]); |
393 | 393 | ||
394 | if (unlikely(evtchn_pending && xen_hvm_domain())) | 394 | if (unlikely(evtchn_pending && xen_hvm_domain())) |
395 | do_hypercall = 1; | 395 | do_hypercall = 1; |
396 | 396 | ||
397 | /* Slow path (hypercall) if this is a non-local port or if this is | 397 | /* Slow path (hypercall) if this is a non-local port or if this is |
398 | * an hvm domain and an event is pending (hvm domains don't have | 398 | * an hvm domain and an event is pending (hvm domains don't have |
399 | * their own implementation of irq_enable). */ | 399 | * their own implementation of irq_enable). */ |
400 | if (do_hypercall) { | 400 | if (do_hypercall) { |
401 | struct evtchn_unmask unmask = { .port = port }; | 401 | struct evtchn_unmask unmask = { .port = port }; |
402 | (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); | 402 | (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); |
403 | } else { | 403 | } else { |
404 | struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); | 404 | struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); |
405 | 405 | ||
406 | sync_clear_bit(port, &s->evtchn_mask[0]); | 406 | sync_clear_bit(port, &s->evtchn_mask[0]); |
407 | 407 | ||
408 | /* | 408 | /* |
409 | * The following is basically the equivalent of | 409 | * The following is basically the equivalent of |
410 | * 'hw_resend_irq'. Just like a real IO-APIC we 'lose | 410 | * 'hw_resend_irq'. Just like a real IO-APIC we 'lose |
411 | * the interrupt edge' if the channel is masked. | 411 | * the interrupt edge' if the channel is masked. |
412 | */ | 412 | */ |
413 | if (evtchn_pending && | 413 | if (evtchn_pending && |
414 | !sync_test_and_set_bit(port / BITS_PER_LONG, | 414 | !sync_test_and_set_bit(port / BITS_PER_LONG, |
415 | &vcpu_info->evtchn_pending_sel)) | 415 | &vcpu_info->evtchn_pending_sel)) |
416 | vcpu_info->evtchn_upcall_pending = 1; | 416 | vcpu_info->evtchn_upcall_pending = 1; |
417 | } | 417 | } |
418 | 418 | ||
419 | put_cpu(); | 419 | put_cpu(); |
420 | } | 420 | } |
421 | 421 | ||
422 | static void xen_irq_init(unsigned irq) | 422 | static void xen_irq_init(unsigned irq) |
423 | { | 423 | { |
424 | struct irq_info *info; | 424 | struct irq_info *info; |
425 | #ifdef CONFIG_SMP | 425 | #ifdef CONFIG_SMP |
426 | struct irq_desc *desc = irq_to_desc(irq); | 426 | struct irq_desc *desc = irq_to_desc(irq); |
427 | 427 | ||
428 | /* By default all event channels notify CPU#0. */ | 428 | /* By default all event channels notify CPU#0. */ |
429 | cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); | 429 | cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); |
430 | #endif | 430 | #endif |
431 | 431 | ||
432 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 432 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
433 | if (info == NULL) | 433 | if (info == NULL) |
434 | panic("Unable to allocate metadata for IRQ%d\n", irq); | 434 | panic("Unable to allocate metadata for IRQ%d\n", irq); |
435 | 435 | ||
436 | info->type = IRQT_UNBOUND; | 436 | info->type = IRQT_UNBOUND; |
437 | info->refcnt = -1; | 437 | info->refcnt = -1; |
438 | 438 | ||
439 | irq_set_handler_data(irq, info); | 439 | irq_set_handler_data(irq, info); |
440 | 440 | ||
441 | list_add_tail(&info->list, &xen_irq_list_head); | 441 | list_add_tail(&info->list, &xen_irq_list_head); |
442 | } | 442 | } |
443 | 443 | ||
444 | static int __must_check xen_allocate_irq_dynamic(void) | 444 | static int __must_check xen_allocate_irq_dynamic(void) |
445 | { | 445 | { |
446 | int first = 0; | 446 | int first = 0; |
447 | int irq; | 447 | int irq; |
448 | 448 | ||
449 | #ifdef CONFIG_X86_IO_APIC | 449 | #ifdef CONFIG_X86_IO_APIC |
450 | /* | 450 | /* |
451 | * For an HVM guest or domain 0 which see "real" (emulated or | 451 | * For an HVM guest or domain 0 which see "real" (emulated or |
452 | * actual respectively) GSIs we allocate dynamic IRQs | 452 | * actual respectively) GSIs we allocate dynamic IRQs |
453 | * e.g. those corresponding to event channels or MSIs | 453 | * e.g. those corresponding to event channels or MSIs |
454 | * etc. from the range above those "real" GSIs to avoid | 454 | * etc. from the range above those "real" GSIs to avoid |
455 | * collisions. | 455 | * collisions. |
456 | */ | 456 | */ |
457 | if (xen_initial_domain() || xen_hvm_domain()) | 457 | if (xen_initial_domain() || xen_hvm_domain()) |
458 | first = get_nr_irqs_gsi(); | 458 | first = get_nr_irqs_gsi(); |
459 | #endif | 459 | #endif |
460 | 460 | ||
461 | irq = irq_alloc_desc_from(first, -1); | 461 | irq = irq_alloc_desc_from(first, -1); |
462 | 462 | ||
463 | if (irq >= 0) | 463 | if (irq >= 0) |
464 | xen_irq_init(irq); | 464 | xen_irq_init(irq); |
465 | 465 | ||
466 | return irq; | 466 | return irq; |
467 | } | 467 | } |
468 | 468 | ||
469 | static int __must_check xen_allocate_irq_gsi(unsigned gsi) | 469 | static int __must_check xen_allocate_irq_gsi(unsigned gsi) |
470 | { | 470 | { |
471 | int irq; | 471 | int irq; |
472 | 472 | ||
473 | /* | 473 | /* |
474 | * A PV guest has no concept of a GSI (since it has no ACPI | 474 | * A PV guest has no concept of a GSI (since it has no ACPI |
475 | * nor access to/knowledge of the physical APICs). Therefore | 475 | * nor access to/knowledge of the physical APICs). Therefore |
476 | * all IRQs are dynamically allocated from the entire IRQ | 476 | * all IRQs are dynamically allocated from the entire IRQ |
477 | * space. | 477 | * space. |
478 | */ | 478 | */ |
479 | if (xen_pv_domain() && !xen_initial_domain()) | 479 | if (xen_pv_domain() && !xen_initial_domain()) |
480 | return xen_allocate_irq_dynamic(); | 480 | return xen_allocate_irq_dynamic(); |
481 | 481 | ||
482 | /* Legacy IRQ descriptors are already allocated by the arch. */ | 482 | /* Legacy IRQ descriptors are already allocated by the arch. */ |
483 | if (gsi < NR_IRQS_LEGACY) | 483 | if (gsi < NR_IRQS_LEGACY) |
484 | irq = gsi; | 484 | irq = gsi; |
485 | else | 485 | else |
486 | irq = irq_alloc_desc_at(gsi, -1); | 486 | irq = irq_alloc_desc_at(gsi, -1); |
487 | 487 | ||
488 | xen_irq_init(irq); | 488 | xen_irq_init(irq); |
489 | 489 | ||
490 | return irq; | 490 | return irq; |
491 | } | 491 | } |
492 | 492 | ||
493 | static void xen_free_irq(unsigned irq) | 493 | static void xen_free_irq(unsigned irq) |
494 | { | 494 | { |
495 | struct irq_info *info = irq_get_handler_data(irq); | 495 | struct irq_info *info = irq_get_handler_data(irq); |
496 | 496 | ||
497 | list_del(&info->list); | 497 | list_del(&info->list); |
498 | 498 | ||
499 | irq_set_handler_data(irq, NULL); | 499 | irq_set_handler_data(irq, NULL); |
500 | 500 | ||
501 | WARN_ON(info->refcnt > 0); | 501 | WARN_ON(info->refcnt > 0); |
502 | 502 | ||
503 | kfree(info); | 503 | kfree(info); |
504 | 504 | ||
505 | /* Legacy IRQ descriptors are managed by the arch. */ | 505 | /* Legacy IRQ descriptors are managed by the arch. */ |
506 | if (irq < NR_IRQS_LEGACY) | 506 | if (irq < NR_IRQS_LEGACY) |
507 | return; | 507 | return; |
508 | 508 | ||
509 | irq_free_desc(irq); | 509 | irq_free_desc(irq); |
510 | } | 510 | } |
511 | 511 | ||
512 | static void pirq_query_unmask(int irq) | 512 | static void pirq_query_unmask(int irq) |
513 | { | 513 | { |
514 | struct physdev_irq_status_query irq_status; | 514 | struct physdev_irq_status_query irq_status; |
515 | struct irq_info *info = info_for_irq(irq); | 515 | struct irq_info *info = info_for_irq(irq); |
516 | 516 | ||
517 | BUG_ON(info->type != IRQT_PIRQ); | 517 | BUG_ON(info->type != IRQT_PIRQ); |
518 | 518 | ||
519 | irq_status.irq = pirq_from_irq(irq); | 519 | irq_status.irq = pirq_from_irq(irq); |
520 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) | 520 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) |
521 | irq_status.flags = 0; | 521 | irq_status.flags = 0; |
522 | 522 | ||
523 | info->u.pirq.flags &= ~PIRQ_NEEDS_EOI; | 523 | info->u.pirq.flags &= ~PIRQ_NEEDS_EOI; |
524 | if (irq_status.flags & XENIRQSTAT_needs_eoi) | 524 | if (irq_status.flags & XENIRQSTAT_needs_eoi) |
525 | info->u.pirq.flags |= PIRQ_NEEDS_EOI; | 525 | info->u.pirq.flags |= PIRQ_NEEDS_EOI; |
526 | } | 526 | } |
527 | 527 | ||
528 | static bool probing_irq(int irq) | 528 | static bool probing_irq(int irq) |
529 | { | 529 | { |
530 | struct irq_desc *desc = irq_to_desc(irq); | 530 | struct irq_desc *desc = irq_to_desc(irq); |
531 | 531 | ||
532 | return desc && desc->action == NULL; | 532 | return desc && desc->action == NULL; |
533 | } | 533 | } |
534 | 534 | ||
535 | static void eoi_pirq(struct irq_data *data) | 535 | static void eoi_pirq(struct irq_data *data) |
536 | { | 536 | { |
537 | int evtchn = evtchn_from_irq(data->irq); | 537 | int evtchn = evtchn_from_irq(data->irq); |
538 | struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; | 538 | struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; |
539 | int rc = 0; | 539 | int rc = 0; |
540 | 540 | ||
541 | irq_move_irq(data); | 541 | irq_move_irq(data); |
542 | 542 | ||
543 | if (VALID_EVTCHN(evtchn)) | 543 | if (VALID_EVTCHN(evtchn)) |
544 | clear_evtchn(evtchn); | 544 | clear_evtchn(evtchn); |
545 | 545 | ||
546 | if (pirq_needs_eoi(data->irq)) { | 546 | if (pirq_needs_eoi(data->irq)) { |
547 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); | 547 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); |
548 | WARN_ON(rc); | 548 | WARN_ON(rc); |
549 | } | 549 | } |
550 | } | 550 | } |
551 | 551 | ||
552 | static void mask_ack_pirq(struct irq_data *data) | 552 | static void mask_ack_pirq(struct irq_data *data) |
553 | { | 553 | { |
554 | disable_dynirq(data); | 554 | disable_dynirq(data); |
555 | eoi_pirq(data); | 555 | eoi_pirq(data); |
556 | } | 556 | } |
557 | 557 | ||
558 | static unsigned int __startup_pirq(unsigned int irq) | 558 | static unsigned int __startup_pirq(unsigned int irq) |
559 | { | 559 | { |
560 | struct evtchn_bind_pirq bind_pirq; | 560 | struct evtchn_bind_pirq bind_pirq; |
561 | struct irq_info *info = info_for_irq(irq); | 561 | struct irq_info *info = info_for_irq(irq); |
562 | int evtchn = evtchn_from_irq(irq); | 562 | int evtchn = evtchn_from_irq(irq); |
563 | int rc; | 563 | int rc; |
564 | 564 | ||
565 | BUG_ON(info->type != IRQT_PIRQ); | 565 | BUG_ON(info->type != IRQT_PIRQ); |
566 | 566 | ||
567 | if (VALID_EVTCHN(evtchn)) | 567 | if (VALID_EVTCHN(evtchn)) |
568 | goto out; | 568 | goto out; |
569 | 569 | ||
570 | bind_pirq.pirq = pirq_from_irq(irq); | 570 | bind_pirq.pirq = pirq_from_irq(irq); |
571 | /* NB. We are happy to share unless we are probing. */ | 571 | /* NB. We are happy to share unless we are probing. */ |
572 | bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? | 572 | bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? |
573 | BIND_PIRQ__WILL_SHARE : 0; | 573 | BIND_PIRQ__WILL_SHARE : 0; |
574 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); | 574 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); |
575 | if (rc != 0) { | 575 | if (rc != 0) { |
576 | if (!probing_irq(irq)) | 576 | if (!probing_irq(irq)) |
577 | printk(KERN_INFO "Failed to obtain physical IRQ %d\n", | 577 | printk(KERN_INFO "Failed to obtain physical IRQ %d\n", |
578 | irq); | 578 | irq); |
579 | return 0; | 579 | return 0; |
580 | } | 580 | } |
581 | evtchn = bind_pirq.port; | 581 | evtchn = bind_pirq.port; |
582 | 582 | ||
583 | pirq_query_unmask(irq); | 583 | pirq_query_unmask(irq); |
584 | 584 | ||
585 | evtchn_to_irq[evtchn] = irq; | 585 | evtchn_to_irq[evtchn] = irq; |
586 | bind_evtchn_to_cpu(evtchn, 0); | 586 | bind_evtchn_to_cpu(evtchn, 0); |
587 | info->evtchn = evtchn; | 587 | info->evtchn = evtchn; |
588 | 588 | ||
589 | out: | 589 | out: |
590 | unmask_evtchn(evtchn); | 590 | unmask_evtchn(evtchn); |
591 | eoi_pirq(irq_get_irq_data(irq)); | 591 | eoi_pirq(irq_get_irq_data(irq)); |
592 | 592 | ||
593 | return 0; | 593 | return 0; |
594 | } | 594 | } |
595 | 595 | ||
596 | static unsigned int startup_pirq(struct irq_data *data) | 596 | static unsigned int startup_pirq(struct irq_data *data) |
597 | { | 597 | { |
598 | return __startup_pirq(data->irq); | 598 | return __startup_pirq(data->irq); |
599 | } | 599 | } |
600 | 600 | ||
601 | static void shutdown_pirq(struct irq_data *data) | 601 | static void shutdown_pirq(struct irq_data *data) |
602 | { | 602 | { |
603 | struct evtchn_close close; | 603 | struct evtchn_close close; |
604 | unsigned int irq = data->irq; | 604 | unsigned int irq = data->irq; |
605 | struct irq_info *info = info_for_irq(irq); | 605 | struct irq_info *info = info_for_irq(irq); |
606 | int evtchn = evtchn_from_irq(irq); | 606 | int evtchn = evtchn_from_irq(irq); |
607 | 607 | ||
608 | BUG_ON(info->type != IRQT_PIRQ); | 608 | BUG_ON(info->type != IRQT_PIRQ); |
609 | 609 | ||
610 | if (!VALID_EVTCHN(evtchn)) | 610 | if (!VALID_EVTCHN(evtchn)) |
611 | return; | 611 | return; |
612 | 612 | ||
613 | mask_evtchn(evtchn); | 613 | mask_evtchn(evtchn); |
614 | 614 | ||
615 | close.port = evtchn; | 615 | close.port = evtchn; |
616 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | 616 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) |
617 | BUG(); | 617 | BUG(); |
618 | 618 | ||
619 | bind_evtchn_to_cpu(evtchn, 0); | 619 | bind_evtchn_to_cpu(evtchn, 0); |
620 | evtchn_to_irq[evtchn] = -1; | 620 | evtchn_to_irq[evtchn] = -1; |
621 | info->evtchn = 0; | 621 | info->evtchn = 0; |
622 | } | 622 | } |
623 | 623 | ||
624 | static void enable_pirq(struct irq_data *data) | 624 | static void enable_pirq(struct irq_data *data) |
625 | { | 625 | { |
626 | startup_pirq(data); | 626 | startup_pirq(data); |
627 | } | 627 | } |
628 | 628 | ||
629 | static void disable_pirq(struct irq_data *data) | 629 | static void disable_pirq(struct irq_data *data) |
630 | { | 630 | { |
631 | disable_dynirq(data); | 631 | disable_dynirq(data); |
632 | } | 632 | } |
633 | 633 | ||
634 | int xen_irq_from_gsi(unsigned gsi) | 634 | int xen_irq_from_gsi(unsigned gsi) |
635 | { | 635 | { |
636 | struct irq_info *info; | 636 | struct irq_info *info; |
637 | 637 | ||
638 | list_for_each_entry(info, &xen_irq_list_head, list) { | 638 | list_for_each_entry(info, &xen_irq_list_head, list) { |
639 | if (info->type != IRQT_PIRQ) | 639 | if (info->type != IRQT_PIRQ) |
640 | continue; | 640 | continue; |
641 | 641 | ||
642 | if (info->u.pirq.gsi == gsi) | 642 | if (info->u.pirq.gsi == gsi) |
643 | return info->irq; | 643 | return info->irq; |
644 | } | 644 | } |
645 | 645 | ||
646 | return -1; | 646 | return -1; |
647 | } | 647 | } |
648 | EXPORT_SYMBOL_GPL(xen_irq_from_gsi); | 648 | EXPORT_SYMBOL_GPL(xen_irq_from_gsi); |
649 | 649 | ||
650 | /* | 650 | /* |
651 | * Do not make any assumptions regarding the relationship between the | 651 | * Do not make any assumptions regarding the relationship between the |
652 | * IRQ number returned here and the Xen pirq argument. | 652 | * IRQ number returned here and the Xen pirq argument. |
653 | * | 653 | * |
654 | * Note: We don't assign an event channel until the irq actually started | 654 | * Note: We don't assign an event channel until the irq actually started |
655 | * up. Return an existing irq if we've already got one for the gsi. | 655 | * up. Return an existing irq if we've already got one for the gsi. |
656 | * | 656 | * |
657 | * Shareable implies level triggered, not shareable implies edge | 657 | * Shareable implies level triggered, not shareable implies edge |
658 | * triggered here. | 658 | * triggered here. |
659 | */ | 659 | */ |
660 | int xen_bind_pirq_gsi_to_irq(unsigned gsi, | 660 | int xen_bind_pirq_gsi_to_irq(unsigned gsi, |
661 | unsigned pirq, int shareable, char *name) | 661 | unsigned pirq, int shareable, char *name) |
662 | { | 662 | { |
663 | int irq = -1; | 663 | int irq = -1; |
664 | struct physdev_irq irq_op; | 664 | struct physdev_irq irq_op; |
665 | 665 | ||
666 | mutex_lock(&irq_mapping_update_lock); | 666 | mutex_lock(&irq_mapping_update_lock); |
667 | 667 | ||
668 | irq = xen_irq_from_gsi(gsi); | 668 | irq = xen_irq_from_gsi(gsi); |
669 | if (irq != -1) { | 669 | if (irq != -1) { |
670 | printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", | 670 | printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", |
671 | irq, gsi); | 671 | irq, gsi); |
672 | goto out; | 672 | goto out; |
673 | } | 673 | } |
674 | 674 | ||
675 | irq = xen_allocate_irq_gsi(gsi); | 675 | irq = xen_allocate_irq_gsi(gsi); |
676 | if (irq < 0) | 676 | if (irq < 0) |
677 | goto out; | 677 | goto out; |
678 | 678 | ||
679 | irq_op.irq = irq; | 679 | irq_op.irq = irq; |
680 | irq_op.vector = 0; | 680 | irq_op.vector = 0; |
681 | 681 | ||
682 | /* Only the privileged domain can do this. For non-priv, the pcifront | 682 | /* Only the privileged domain can do this. For non-priv, the pcifront |
683 | * driver provides a PCI bus that does the call to do exactly | 683 | * driver provides a PCI bus that does the call to do exactly |
684 | * this in the priv domain. */ | 684 | * this in the priv domain. */ |
685 | if (xen_initial_domain() && | 685 | if (xen_initial_domain() && |
686 | HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { | 686 | HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { |
687 | xen_free_irq(irq); | 687 | xen_free_irq(irq); |
688 | irq = -ENOSPC; | 688 | irq = -ENOSPC; |
689 | goto out; | 689 | goto out; |
690 | } | 690 | } |
691 | 691 | ||
692 | xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF, | 692 | xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF, |
693 | shareable ? PIRQ_SHAREABLE : 0); | 693 | shareable ? PIRQ_SHAREABLE : 0); |
694 | 694 | ||
695 | pirq_query_unmask(irq); | 695 | pirq_query_unmask(irq); |
696 | /* We try to use the handler with the appropriate semantic for the | 696 | /* We try to use the handler with the appropriate semantic for the |
697 | * type of interrupt: if the interrupt is an edge triggered | 697 | * type of interrupt: if the interrupt is an edge triggered |
698 | * interrupt we use handle_edge_irq. | 698 | * interrupt we use handle_edge_irq. |
699 | * | 699 | * |
700 | * On the other hand if the interrupt is level triggered we use | 700 | * On the other hand if the interrupt is level triggered we use |
701 | * handle_fasteoi_irq like the native code does for this kind of | 701 | * handle_fasteoi_irq like the native code does for this kind of |
702 | * interrupts. | 702 | * interrupts. |
703 | * | 703 | * |
704 | * Depending on the Xen version, pirq_needs_eoi might return true | 704 | * Depending on the Xen version, pirq_needs_eoi might return true |
705 | * not only for level triggered interrupts but for edge triggered | 705 | * not only for level triggered interrupts but for edge triggered |
706 | * interrupts too. In any case Xen always honors the eoi mechanism, | 706 | * interrupts too. In any case Xen always honors the eoi mechanism, |
707 | * not injecting any more pirqs of the same kind if the first one | 707 | * not injecting any more pirqs of the same kind if the first one |
708 | * hasn't received an eoi yet. Therefore using the fasteoi handler | 708 | * hasn't received an eoi yet. Therefore using the fasteoi handler |
709 | * is the right choice either way. | 709 | * is the right choice either way. |
710 | */ | 710 | */ |
711 | if (shareable) | 711 | if (shareable) |
712 | irq_set_chip_and_handler_name(irq, &xen_pirq_chip, | 712 | irq_set_chip_and_handler_name(irq, &xen_pirq_chip, |
713 | handle_fasteoi_irq, name); | 713 | handle_fasteoi_irq, name); |
714 | else | 714 | else |
715 | irq_set_chip_and_handler_name(irq, &xen_pirq_chip, | 715 | irq_set_chip_and_handler_name(irq, &xen_pirq_chip, |
716 | handle_edge_irq, name); | 716 | handle_edge_irq, name); |
717 | 717 | ||
718 | out: | 718 | out: |
719 | mutex_unlock(&irq_mapping_update_lock); | 719 | mutex_unlock(&irq_mapping_update_lock); |
720 | 720 | ||
721 | return irq; | 721 | return irq; |
722 | } | 722 | } |
723 | 723 | ||
724 | #ifdef CONFIG_PCI_MSI | 724 | #ifdef CONFIG_PCI_MSI |
725 | int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) | 725 | int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) |
726 | { | 726 | { |
727 | int rc; | 727 | int rc; |
728 | struct physdev_get_free_pirq op_get_free_pirq; | 728 | struct physdev_get_free_pirq op_get_free_pirq; |
729 | 729 | ||
730 | op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI; | 730 | op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI; |
731 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); | 731 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); |
732 | 732 | ||
733 | WARN_ONCE(rc == -ENOSYS, | 733 | WARN_ONCE(rc == -ENOSYS, |
734 | "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n"); | 734 | "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n"); |
735 | 735 | ||
736 | return rc ? -1 : op_get_free_pirq.pirq; | 736 | return rc ? -1 : op_get_free_pirq.pirq; |
737 | } | 737 | } |
738 | 738 | ||
739 | int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, | 739 | int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, |
740 | int pirq, int vector, const char *name, | 740 | int pirq, int vector, const char *name, |
741 | domid_t domid) | 741 | domid_t domid) |
742 | { | 742 | { |
743 | int irq, ret; | 743 | int irq, ret; |
744 | 744 | ||
745 | mutex_lock(&irq_mapping_update_lock); | 745 | mutex_lock(&irq_mapping_update_lock); |
746 | 746 | ||
747 | irq = xen_allocate_irq_dynamic(); | 747 | irq = xen_allocate_irq_dynamic(); |
748 | if (irq < 0) | 748 | if (irq < 0) |
749 | goto out; | 749 | goto out; |
750 | 750 | ||
751 | irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, | 751 | irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, |
752 | name); | 752 | name); |
753 | 753 | ||
754 | xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0); | 754 | xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0); |
755 | ret = irq_set_msi_desc(irq, msidesc); | 755 | ret = irq_set_msi_desc(irq, msidesc); |
756 | if (ret < 0) | 756 | if (ret < 0) |
757 | goto error_irq; | 757 | goto error_irq; |
758 | out: | 758 | out: |
759 | mutex_unlock(&irq_mapping_update_lock); | 759 | mutex_unlock(&irq_mapping_update_lock); |
760 | return irq; | 760 | return irq; |
761 | error_irq: | 761 | error_irq: |
762 | mutex_unlock(&irq_mapping_update_lock); | 762 | mutex_unlock(&irq_mapping_update_lock); |
763 | xen_free_irq(irq); | 763 | xen_free_irq(irq); |
764 | return ret; | 764 | return ret; |
765 | } | 765 | } |
766 | #endif | 766 | #endif |
767 | 767 | ||
768 | int xen_destroy_irq(int irq) | 768 | int xen_destroy_irq(int irq) |
769 | { | 769 | { |
770 | struct irq_desc *desc; | 770 | struct irq_desc *desc; |
771 | struct physdev_unmap_pirq unmap_irq; | 771 | struct physdev_unmap_pirq unmap_irq; |
772 | struct irq_info *info = info_for_irq(irq); | 772 | struct irq_info *info = info_for_irq(irq); |
773 | int rc = -ENOENT; | 773 | int rc = -ENOENT; |
774 | 774 | ||
775 | mutex_lock(&irq_mapping_update_lock); | 775 | mutex_lock(&irq_mapping_update_lock); |
776 | 776 | ||
777 | desc = irq_to_desc(irq); | 777 | desc = irq_to_desc(irq); |
778 | if (!desc) | 778 | if (!desc) |
779 | goto out; | 779 | goto out; |
780 | 780 | ||
781 | if (xen_initial_domain()) { | 781 | if (xen_initial_domain()) { |
782 | unmap_irq.pirq = info->u.pirq.pirq; | 782 | unmap_irq.pirq = info->u.pirq.pirq; |
783 | unmap_irq.domid = info->u.pirq.domid; | 783 | unmap_irq.domid = info->u.pirq.domid; |
784 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); | 784 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); |
785 | /* If another domain quits without making the pci_disable_msix | 785 | /* If another domain quits without making the pci_disable_msix |
786 | * call, the Xen hypervisor takes care of freeing the PIRQs | 786 | * call, the Xen hypervisor takes care of freeing the PIRQs |
787 | * (free_domain_pirqs). | 787 | * (free_domain_pirqs). |
788 | */ | 788 | */ |
789 | if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF)) | 789 | if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF)) |
790 | printk(KERN_INFO "domain %d does not have %d anymore\n", | 790 | printk(KERN_INFO "domain %d does not have %d anymore\n", |
791 | info->u.pirq.domid, info->u.pirq.pirq); | 791 | info->u.pirq.domid, info->u.pirq.pirq); |
792 | else if (rc) { | 792 | else if (rc) { |
793 | printk(KERN_WARNING "unmap irq failed %d\n", rc); | 793 | printk(KERN_WARNING "unmap irq failed %d\n", rc); |
794 | goto out; | 794 | goto out; |
795 | } | 795 | } |
796 | } | 796 | } |
797 | 797 | ||
798 | xen_free_irq(irq); | 798 | xen_free_irq(irq); |
799 | 799 | ||
800 | out: | 800 | out: |
801 | mutex_unlock(&irq_mapping_update_lock); | 801 | mutex_unlock(&irq_mapping_update_lock); |
802 | return rc; | 802 | return rc; |
803 | } | 803 | } |
804 | 804 | ||
805 | int xen_irq_from_pirq(unsigned pirq) | 805 | int xen_irq_from_pirq(unsigned pirq) |
806 | { | 806 | { |
807 | int irq; | 807 | int irq; |
808 | 808 | ||
809 | struct irq_info *info; | 809 | struct irq_info *info; |
810 | 810 | ||
811 | mutex_lock(&irq_mapping_update_lock); | 811 | mutex_lock(&irq_mapping_update_lock); |
812 | 812 | ||
813 | list_for_each_entry(info, &xen_irq_list_head, list) { | 813 | list_for_each_entry(info, &xen_irq_list_head, list) { |
814 | if (info->type != IRQT_PIRQ) | 814 | if (info->type != IRQT_PIRQ) |
815 | continue; | 815 | continue; |
816 | irq = info->irq; | 816 | irq = info->irq; |
817 | if (info->u.pirq.pirq == pirq) | 817 | if (info->u.pirq.pirq == pirq) |
818 | goto out; | 818 | goto out; |
819 | } | 819 | } |
820 | irq = -1; | 820 | irq = -1; |
821 | out: | 821 | out: |
822 | mutex_unlock(&irq_mapping_update_lock); | 822 | mutex_unlock(&irq_mapping_update_lock); |
823 | 823 | ||
824 | return irq; | 824 | return irq; |
825 | } | 825 | } |
826 | 826 | ||
827 | 827 | ||
828 | int xen_pirq_from_irq(unsigned irq) | 828 | int xen_pirq_from_irq(unsigned irq) |
829 | { | 829 | { |
830 | return pirq_from_irq(irq); | 830 | return pirq_from_irq(irq); |
831 | } | 831 | } |
832 | EXPORT_SYMBOL_GPL(xen_pirq_from_irq); | 832 | EXPORT_SYMBOL_GPL(xen_pirq_from_irq); |
833 | int bind_evtchn_to_irq(unsigned int evtchn) | 833 | int bind_evtchn_to_irq(unsigned int evtchn) |
834 | { | 834 | { |
835 | int irq; | 835 | int irq; |
836 | 836 | ||
837 | mutex_lock(&irq_mapping_update_lock); | 837 | mutex_lock(&irq_mapping_update_lock); |
838 | 838 | ||
839 | irq = evtchn_to_irq[evtchn]; | 839 | irq = evtchn_to_irq[evtchn]; |
840 | 840 | ||
841 | if (irq == -1) { | 841 | if (irq == -1) { |
842 | irq = xen_allocate_irq_dynamic(); | 842 | irq = xen_allocate_irq_dynamic(); |
843 | if (irq < 0) | 843 | if (irq < 0) |
844 | goto out; | 844 | goto out; |
845 | 845 | ||
846 | irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, | 846 | irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, |
847 | handle_edge_irq, "event"); | 847 | handle_edge_irq, "event"); |
848 | 848 | ||
849 | xen_irq_info_evtchn_init(irq, evtchn); | 849 | xen_irq_info_evtchn_init(irq, evtchn); |
850 | } else { | 850 | } else { |
851 | struct irq_info *info = info_for_irq(irq); | 851 | struct irq_info *info = info_for_irq(irq); |
852 | WARN_ON(info == NULL || info->type != IRQT_EVTCHN); | 852 | WARN_ON(info == NULL || info->type != IRQT_EVTCHN); |
853 | } | 853 | } |
854 | irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); | 854 | irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); |
855 | 855 | ||
856 | out: | 856 | out: |
857 | mutex_unlock(&irq_mapping_update_lock); | 857 | mutex_unlock(&irq_mapping_update_lock); |
858 | 858 | ||
859 | return irq; | 859 | return irq; |
860 | } | 860 | } |
861 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); | 861 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); |
862 | 862 | ||
863 | static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) | 863 | static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) |
864 | { | 864 | { |
865 | struct evtchn_bind_ipi bind_ipi; | 865 | struct evtchn_bind_ipi bind_ipi; |
866 | int evtchn, irq; | 866 | int evtchn, irq; |
867 | 867 | ||
868 | mutex_lock(&irq_mapping_update_lock); | 868 | mutex_lock(&irq_mapping_update_lock); |
869 | 869 | ||
870 | irq = per_cpu(ipi_to_irq, cpu)[ipi]; | 870 | irq = per_cpu(ipi_to_irq, cpu)[ipi]; |
871 | 871 | ||
872 | if (irq == -1) { | 872 | if (irq == -1) { |
873 | irq = xen_allocate_irq_dynamic(); | 873 | irq = xen_allocate_irq_dynamic(); |
874 | if (irq < 0) | 874 | if (irq < 0) |
875 | goto out; | 875 | goto out; |
876 | 876 | ||
877 | irq_set_chip_and_handler_name(irq, &xen_percpu_chip, | 877 | irq_set_chip_and_handler_name(irq, &xen_percpu_chip, |
878 | handle_percpu_irq, "ipi"); | 878 | handle_percpu_irq, "ipi"); |
879 | 879 | ||
880 | bind_ipi.vcpu = cpu; | 880 | bind_ipi.vcpu = cpu; |
881 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, | 881 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, |
882 | &bind_ipi) != 0) | 882 | &bind_ipi) != 0) |
883 | BUG(); | 883 | BUG(); |
884 | evtchn = bind_ipi.port; | 884 | evtchn = bind_ipi.port; |
885 | 885 | ||
886 | xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); | 886 | xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); |
887 | 887 | ||
888 | bind_evtchn_to_cpu(evtchn, cpu); | 888 | bind_evtchn_to_cpu(evtchn, cpu); |
889 | } else { | 889 | } else { |
890 | struct irq_info *info = info_for_irq(irq); | 890 | struct irq_info *info = info_for_irq(irq); |
891 | WARN_ON(info == NULL || info->type != IRQT_IPI); | 891 | WARN_ON(info == NULL || info->type != IRQT_IPI); |
892 | } | 892 | } |
893 | 893 | ||
894 | out: | 894 | out: |
895 | mutex_unlock(&irq_mapping_update_lock); | 895 | mutex_unlock(&irq_mapping_update_lock); |
896 | return irq; | 896 | return irq; |
897 | } | 897 | } |
898 | 898 | ||
899 | static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, | 899 | static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, |
900 | unsigned int remote_port) | 900 | unsigned int remote_port) |
901 | { | 901 | { |
902 | struct evtchn_bind_interdomain bind_interdomain; | 902 | struct evtchn_bind_interdomain bind_interdomain; |
903 | int err; | 903 | int err; |
904 | 904 | ||
905 | bind_interdomain.remote_dom = remote_domain; | 905 | bind_interdomain.remote_dom = remote_domain; |
906 | bind_interdomain.remote_port = remote_port; | 906 | bind_interdomain.remote_port = remote_port; |
907 | 907 | ||
908 | err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, | 908 | err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, |
909 | &bind_interdomain); | 909 | &bind_interdomain); |
910 | 910 | ||
911 | return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); | 911 | return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); |
912 | } | 912 | } |
913 | 913 | ||
914 | static int find_virq(unsigned int virq, unsigned int cpu) | 914 | static int find_virq(unsigned int virq, unsigned int cpu) |
915 | { | 915 | { |
916 | struct evtchn_status status; | 916 | struct evtchn_status status; |
917 | int port, rc = -ENOENT; | 917 | int port, rc = -ENOENT; |
918 | 918 | ||
919 | memset(&status, 0, sizeof(status)); | 919 | memset(&status, 0, sizeof(status)); |
920 | for (port = 0; port <= NR_EVENT_CHANNELS; port++) { | 920 | for (port = 0; port <= NR_EVENT_CHANNELS; port++) { |
921 | status.dom = DOMID_SELF; | 921 | status.dom = DOMID_SELF; |
922 | status.port = port; | 922 | status.port = port; |
923 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); | 923 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); |
924 | if (rc < 0) | 924 | if (rc < 0) |
925 | continue; | 925 | continue; |
926 | if (status.status != EVTCHNSTAT_virq) | 926 | if (status.status != EVTCHNSTAT_virq) |
927 | continue; | 927 | continue; |
928 | if (status.u.virq == virq && status.vcpu == cpu) { | 928 | if (status.u.virq == virq && status.vcpu == cpu) { |
929 | rc = port; | 929 | rc = port; |
930 | break; | 930 | break; |
931 | } | 931 | } |
932 | } | 932 | } |
933 | return rc; | 933 | return rc; |
934 | } | 934 | } |
935 | 935 | ||
936 | int bind_virq_to_irq(unsigned int virq, unsigned int cpu) | 936 | int bind_virq_to_irq(unsigned int virq, unsigned int cpu) |
937 | { | 937 | { |
938 | struct evtchn_bind_virq bind_virq; | 938 | struct evtchn_bind_virq bind_virq; |
939 | int evtchn, irq, ret; | 939 | int evtchn, irq, ret; |
940 | 940 | ||
941 | mutex_lock(&irq_mapping_update_lock); | 941 | mutex_lock(&irq_mapping_update_lock); |
942 | 942 | ||
943 | irq = per_cpu(virq_to_irq, cpu)[virq]; | 943 | irq = per_cpu(virq_to_irq, cpu)[virq]; |
944 | 944 | ||
945 | if (irq == -1) { | 945 | if (irq == -1) { |
946 | irq = xen_allocate_irq_dynamic(); | 946 | irq = xen_allocate_irq_dynamic(); |
947 | if (irq < 0) | 947 | if (irq < 0) |
948 | goto out; | 948 | goto out; |
949 | 949 | ||
950 | irq_set_chip_and_handler_name(irq, &xen_percpu_chip, | 950 | irq_set_chip_and_handler_name(irq, &xen_percpu_chip, |
951 | handle_percpu_irq, "virq"); | 951 | handle_percpu_irq, "virq"); |
952 | 952 | ||
953 | bind_virq.virq = virq; | 953 | bind_virq.virq = virq; |
954 | bind_virq.vcpu = cpu; | 954 | bind_virq.vcpu = cpu; |
955 | ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | 955 | ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, |
956 | &bind_virq); | 956 | &bind_virq); |
957 | if (ret == 0) | 957 | if (ret == 0) |
958 | evtchn = bind_virq.port; | 958 | evtchn = bind_virq.port; |
959 | else { | 959 | else { |
960 | if (ret == -EEXIST) | 960 | if (ret == -EEXIST) |
961 | ret = find_virq(virq, cpu); | 961 | ret = find_virq(virq, cpu); |
962 | BUG_ON(ret < 0); | 962 | BUG_ON(ret < 0); |
963 | evtchn = ret; | 963 | evtchn = ret; |
964 | } | 964 | } |
965 | 965 | ||
966 | xen_irq_info_virq_init(cpu, irq, evtchn, virq); | 966 | xen_irq_info_virq_init(cpu, irq, evtchn, virq); |
967 | 967 | ||
968 | bind_evtchn_to_cpu(evtchn, cpu); | 968 | bind_evtchn_to_cpu(evtchn, cpu); |
969 | } else { | 969 | } else { |
970 | struct irq_info *info = info_for_irq(irq); | 970 | struct irq_info *info = info_for_irq(irq); |
971 | WARN_ON(info == NULL || info->type != IRQT_VIRQ); | 971 | WARN_ON(info == NULL || info->type != IRQT_VIRQ); |
972 | } | 972 | } |
973 | 973 | ||
974 | out: | 974 | out: |
975 | mutex_unlock(&irq_mapping_update_lock); | 975 | mutex_unlock(&irq_mapping_update_lock); |
976 | 976 | ||
977 | return irq; | 977 | return irq; |
978 | } | 978 | } |
979 | 979 | ||
980 | static void unbind_from_irq(unsigned int irq) | 980 | static void unbind_from_irq(unsigned int irq) |
981 | { | 981 | { |
982 | struct evtchn_close close; | 982 | struct evtchn_close close; |
983 | int evtchn = evtchn_from_irq(irq); | 983 | int evtchn = evtchn_from_irq(irq); |
984 | struct irq_info *info = irq_get_handler_data(irq); | 984 | struct irq_info *info = irq_get_handler_data(irq); |
985 | 985 | ||
986 | mutex_lock(&irq_mapping_update_lock); | 986 | mutex_lock(&irq_mapping_update_lock); |
987 | 987 | ||
988 | if (info->refcnt > 0) { | 988 | if (info->refcnt > 0) { |
989 | info->refcnt--; | 989 | info->refcnt--; |
990 | if (info->refcnt != 0) | 990 | if (info->refcnt != 0) |
991 | goto done; | 991 | goto done; |
992 | } | 992 | } |
993 | 993 | ||
994 | if (VALID_EVTCHN(evtchn)) { | 994 | if (VALID_EVTCHN(evtchn)) { |
995 | close.port = evtchn; | 995 | close.port = evtchn; |
996 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | 996 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) |
997 | BUG(); | 997 | BUG(); |
998 | 998 | ||
999 | switch (type_from_irq(irq)) { | 999 | switch (type_from_irq(irq)) { |
1000 | case IRQT_VIRQ: | 1000 | case IRQT_VIRQ: |
1001 | per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) | 1001 | per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) |
1002 | [virq_from_irq(irq)] = -1; | 1002 | [virq_from_irq(irq)] = -1; |
1003 | break; | 1003 | break; |
1004 | case IRQT_IPI: | 1004 | case IRQT_IPI: |
1005 | per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn)) | 1005 | per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn)) |
1006 | [ipi_from_irq(irq)] = -1; | 1006 | [ipi_from_irq(irq)] = -1; |
1007 | break; | 1007 | break; |
1008 | default: | 1008 | default: |
1009 | break; | 1009 | break; |
1010 | } | 1010 | } |
1011 | 1011 | ||
1012 | /* Closed ports are implicitly re-bound to VCPU0. */ | 1012 | /* Closed ports are implicitly re-bound to VCPU0. */ |
1013 | bind_evtchn_to_cpu(evtchn, 0); | 1013 | bind_evtchn_to_cpu(evtchn, 0); |
1014 | 1014 | ||
1015 | evtchn_to_irq[evtchn] = -1; | 1015 | evtchn_to_irq[evtchn] = -1; |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND); | 1018 | BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND); |
1019 | 1019 | ||
1020 | xen_free_irq(irq); | 1020 | xen_free_irq(irq); |
1021 | 1021 | ||
1022 | done: | 1022 | done: |
1023 | mutex_unlock(&irq_mapping_update_lock); | 1023 | mutex_unlock(&irq_mapping_update_lock); |
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | int bind_evtchn_to_irqhandler(unsigned int evtchn, | 1026 | int bind_evtchn_to_irqhandler(unsigned int evtchn, |
1027 | irq_handler_t handler, | 1027 | irq_handler_t handler, |
1028 | unsigned long irqflags, | 1028 | unsigned long irqflags, |
1029 | const char *devname, void *dev_id) | 1029 | const char *devname, void *dev_id) |
1030 | { | 1030 | { |
1031 | int irq, retval; | 1031 | int irq, retval; |
1032 | 1032 | ||
1033 | irq = bind_evtchn_to_irq(evtchn); | 1033 | irq = bind_evtchn_to_irq(evtchn); |
1034 | if (irq < 0) | 1034 | if (irq < 0) |
1035 | return irq; | 1035 | return irq; |
1036 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | 1036 | retval = request_irq(irq, handler, irqflags, devname, dev_id); |
1037 | if (retval != 0) { | 1037 | if (retval != 0) { |
1038 | unbind_from_irq(irq); | 1038 | unbind_from_irq(irq); |
1039 | return retval; | 1039 | return retval; |
1040 | } | 1040 | } |
1041 | 1041 | ||
1042 | return irq; | 1042 | return irq; |
1043 | } | 1043 | } |
1044 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); | 1044 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); |
1045 | 1045 | ||
1046 | int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, | 1046 | int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, |
1047 | unsigned int remote_port, | 1047 | unsigned int remote_port, |
1048 | irq_handler_t handler, | 1048 | irq_handler_t handler, |
1049 | unsigned long irqflags, | 1049 | unsigned long irqflags, |
1050 | const char *devname, | 1050 | const char *devname, |
1051 | void *dev_id) | 1051 | void *dev_id) |
1052 | { | 1052 | { |
1053 | int irq, retval; | 1053 | int irq, retval; |
1054 | 1054 | ||
1055 | irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); | 1055 | irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); |
1056 | if (irq < 0) | 1056 | if (irq < 0) |
1057 | return irq; | 1057 | return irq; |
1058 | 1058 | ||
1059 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | 1059 | retval = request_irq(irq, handler, irqflags, devname, dev_id); |
1060 | if (retval != 0) { | 1060 | if (retval != 0) { |
1061 | unbind_from_irq(irq); | 1061 | unbind_from_irq(irq); |
1062 | return retval; | 1062 | return retval; |
1063 | } | 1063 | } |
1064 | 1064 | ||
1065 | return irq; | 1065 | return irq; |
1066 | } | 1066 | } |
1067 | EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); | 1067 | EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); |
1068 | 1068 | ||
1069 | int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, | 1069 | int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, |
1070 | irq_handler_t handler, | 1070 | irq_handler_t handler, |
1071 | unsigned long irqflags, const char *devname, void *dev_id) | 1071 | unsigned long irqflags, const char *devname, void *dev_id) |
1072 | { | 1072 | { |
1073 | int irq, retval; | 1073 | int irq, retval; |
1074 | 1074 | ||
1075 | irq = bind_virq_to_irq(virq, cpu); | 1075 | irq = bind_virq_to_irq(virq, cpu); |
1076 | if (irq < 0) | 1076 | if (irq < 0) |
1077 | return irq; | 1077 | return irq; |
1078 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | 1078 | retval = request_irq(irq, handler, irqflags, devname, dev_id); |
1079 | if (retval != 0) { | 1079 | if (retval != 0) { |
1080 | unbind_from_irq(irq); | 1080 | unbind_from_irq(irq); |
1081 | return retval; | 1081 | return retval; |
1082 | } | 1082 | } |
1083 | 1083 | ||
1084 | return irq; | 1084 | return irq; |
1085 | } | 1085 | } |
1086 | EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); | 1086 | EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); |
1087 | 1087 | ||
1088 | int bind_ipi_to_irqhandler(enum ipi_vector ipi, | 1088 | int bind_ipi_to_irqhandler(enum ipi_vector ipi, |
1089 | unsigned int cpu, | 1089 | unsigned int cpu, |
1090 | irq_handler_t handler, | 1090 | irq_handler_t handler, |
1091 | unsigned long irqflags, | 1091 | unsigned long irqflags, |
1092 | const char *devname, | 1092 | const char *devname, |
1093 | void *dev_id) | 1093 | void *dev_id) |
1094 | { | 1094 | { |
1095 | int irq, retval; | 1095 | int irq, retval; |
1096 | 1096 | ||
1097 | irq = bind_ipi_to_irq(ipi, cpu); | 1097 | irq = bind_ipi_to_irq(ipi, cpu); |
1098 | if (irq < 0) | 1098 | if (irq < 0) |
1099 | return irq; | 1099 | return irq; |
1100 | 1100 | ||
1101 | irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME; | 1101 | irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME; |
1102 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | 1102 | retval = request_irq(irq, handler, irqflags, devname, dev_id); |
1103 | if (retval != 0) { | 1103 | if (retval != 0) { |
1104 | unbind_from_irq(irq); | 1104 | unbind_from_irq(irq); |
1105 | return retval; | 1105 | return retval; |
1106 | } | 1106 | } |
1107 | 1107 | ||
1108 | return irq; | 1108 | return irq; |
1109 | } | 1109 | } |
1110 | 1110 | ||
1111 | void unbind_from_irqhandler(unsigned int irq, void *dev_id) | 1111 | void unbind_from_irqhandler(unsigned int irq, void *dev_id) |
1112 | { | 1112 | { |
1113 | free_irq(irq, dev_id); | 1113 | free_irq(irq, dev_id); |
1114 | unbind_from_irq(irq); | 1114 | unbind_from_irq(irq); |
1115 | } | 1115 | } |
1116 | EXPORT_SYMBOL_GPL(unbind_from_irqhandler); | 1116 | EXPORT_SYMBOL_GPL(unbind_from_irqhandler); |
1117 | 1117 | ||
1118 | int evtchn_make_refcounted(unsigned int evtchn) | 1118 | int evtchn_make_refcounted(unsigned int evtchn) |
1119 | { | 1119 | { |
1120 | int irq = evtchn_to_irq[evtchn]; | 1120 | int irq = evtchn_to_irq[evtchn]; |
1121 | struct irq_info *info; | 1121 | struct irq_info *info; |
1122 | 1122 | ||
1123 | if (irq == -1) | 1123 | if (irq == -1) |
1124 | return -ENOENT; | 1124 | return -ENOENT; |
1125 | 1125 | ||
1126 | info = irq_get_handler_data(irq); | 1126 | info = irq_get_handler_data(irq); |
1127 | 1127 | ||
1128 | if (!info) | 1128 | if (!info) |
1129 | return -ENOENT; | 1129 | return -ENOENT; |
1130 | 1130 | ||
1131 | WARN_ON(info->refcnt != -1); | 1131 | WARN_ON(info->refcnt != -1); |
1132 | 1132 | ||
1133 | info->refcnt = 1; | 1133 | info->refcnt = 1; |
1134 | 1134 | ||
1135 | return 0; | 1135 | return 0; |
1136 | } | 1136 | } |
1137 | EXPORT_SYMBOL_GPL(evtchn_make_refcounted); | 1137 | EXPORT_SYMBOL_GPL(evtchn_make_refcounted); |
1138 | 1138 | ||
1139 | int evtchn_get(unsigned int evtchn) | 1139 | int evtchn_get(unsigned int evtchn) |
1140 | { | 1140 | { |
1141 | int irq; | 1141 | int irq; |
1142 | struct irq_info *info; | 1142 | struct irq_info *info; |
1143 | int err = -ENOENT; | 1143 | int err = -ENOENT; |
1144 | 1144 | ||
1145 | if (evtchn >= NR_EVENT_CHANNELS) | 1145 | if (evtchn >= NR_EVENT_CHANNELS) |
1146 | return -EINVAL; | 1146 | return -EINVAL; |
1147 | 1147 | ||
1148 | mutex_lock(&irq_mapping_update_lock); | 1148 | mutex_lock(&irq_mapping_update_lock); |
1149 | 1149 | ||
1150 | irq = evtchn_to_irq[evtchn]; | 1150 | irq = evtchn_to_irq[evtchn]; |
1151 | if (irq == -1) | 1151 | if (irq == -1) |
1152 | goto done; | 1152 | goto done; |
1153 | 1153 | ||
1154 | info = irq_get_handler_data(irq); | 1154 | info = irq_get_handler_data(irq); |
1155 | 1155 | ||
1156 | if (!info) | 1156 | if (!info) |
1157 | goto done; | 1157 | goto done; |
1158 | 1158 | ||
1159 | err = -EINVAL; | 1159 | err = -EINVAL; |
1160 | if (info->refcnt <= 0) | 1160 | if (info->refcnt <= 0) |
1161 | goto done; | 1161 | goto done; |
1162 | 1162 | ||
1163 | info->refcnt++; | 1163 | info->refcnt++; |
1164 | err = 0; | 1164 | err = 0; |
1165 | done: | 1165 | done: |
1166 | mutex_unlock(&irq_mapping_update_lock); | 1166 | mutex_unlock(&irq_mapping_update_lock); |
1167 | 1167 | ||
1168 | return err; | 1168 | return err; |
1169 | } | 1169 | } |
1170 | EXPORT_SYMBOL_GPL(evtchn_get); | 1170 | EXPORT_SYMBOL_GPL(evtchn_get); |
1171 | 1171 | ||
1172 | void evtchn_put(unsigned int evtchn) | 1172 | void evtchn_put(unsigned int evtchn) |
1173 | { | 1173 | { |
1174 | int irq = evtchn_to_irq[evtchn]; | 1174 | int irq = evtchn_to_irq[evtchn]; |
1175 | if (WARN_ON(irq == -1)) | 1175 | if (WARN_ON(irq == -1)) |
1176 | return; | 1176 | return; |
1177 | unbind_from_irq(irq); | 1177 | unbind_from_irq(irq); |
1178 | } | 1178 | } |
1179 | EXPORT_SYMBOL_GPL(evtchn_put); | 1179 | EXPORT_SYMBOL_GPL(evtchn_put); |
1180 | 1180 | ||
1181 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) | 1181 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) |
1182 | { | 1182 | { |
1183 | int irq = per_cpu(ipi_to_irq, cpu)[vector]; | 1183 | int irq = per_cpu(ipi_to_irq, cpu)[vector]; |
1184 | BUG_ON(irq < 0); | 1184 | BUG_ON(irq < 0); |
1185 | notify_remote_via_irq(irq); | 1185 | notify_remote_via_irq(irq); |
1186 | } | 1186 | } |
1187 | 1187 | ||
1188 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id) | 1188 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id) |
1189 | { | 1189 | { |
1190 | struct shared_info *sh = HYPERVISOR_shared_info; | 1190 | struct shared_info *sh = HYPERVISOR_shared_info; |
1191 | int cpu = smp_processor_id(); | 1191 | int cpu = smp_processor_id(); |
1192 | unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); | 1192 | unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); |
1193 | int i; | 1193 | int i; |
1194 | unsigned long flags; | 1194 | unsigned long flags; |
1195 | static DEFINE_SPINLOCK(debug_lock); | 1195 | static DEFINE_SPINLOCK(debug_lock); |
1196 | struct vcpu_info *v; | 1196 | struct vcpu_info *v; |
1197 | 1197 | ||
1198 | spin_lock_irqsave(&debug_lock, flags); | 1198 | spin_lock_irqsave(&debug_lock, flags); |
1199 | 1199 | ||
1200 | printk("\nvcpu %d\n ", cpu); | 1200 | printk("\nvcpu %d\n ", cpu); |
1201 | 1201 | ||
1202 | for_each_online_cpu(i) { | 1202 | for_each_online_cpu(i) { |
1203 | int pending; | 1203 | int pending; |
1204 | v = per_cpu(xen_vcpu, i); | 1204 | v = per_cpu(xen_vcpu, i); |
1205 | pending = (get_irq_regs() && i == cpu) | 1205 | pending = (get_irq_regs() && i == cpu) |
1206 | ? xen_irqs_disabled(get_irq_regs()) | 1206 | ? xen_irqs_disabled(get_irq_regs()) |
1207 | : v->evtchn_upcall_mask; | 1207 | : v->evtchn_upcall_mask; |
1208 | printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i, | 1208 | printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i, |
1209 | pending, v->evtchn_upcall_pending, | 1209 | pending, v->evtchn_upcall_pending, |
1210 | (int)(sizeof(v->evtchn_pending_sel)*2), | 1210 | (int)(sizeof(v->evtchn_pending_sel)*2), |
1211 | v->evtchn_pending_sel); | 1211 | v->evtchn_pending_sel); |
1212 | } | 1212 | } |
1213 | v = per_cpu(xen_vcpu, cpu); | 1213 | v = per_cpu(xen_vcpu, cpu); |
1214 | 1214 | ||
1215 | printk("\npending:\n "); | 1215 | printk("\npending:\n "); |
1216 | for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--) | 1216 | for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--) |
1217 | printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2, | 1217 | printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2, |
1218 | sh->evtchn_pending[i], | 1218 | sh->evtchn_pending[i], |
1219 | i % 8 == 0 ? "\n " : " "); | 1219 | i % 8 == 0 ? "\n " : " "); |
1220 | printk("\nglobal mask:\n "); | 1220 | printk("\nglobal mask:\n "); |
1221 | for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | 1221 | for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) |
1222 | printk("%0*lx%s", | 1222 | printk("%0*lx%s", |
1223 | (int)(sizeof(sh->evtchn_mask[0])*2), | 1223 | (int)(sizeof(sh->evtchn_mask[0])*2), |
1224 | sh->evtchn_mask[i], | 1224 | sh->evtchn_mask[i], |
1225 | i % 8 == 0 ? "\n " : " "); | 1225 | i % 8 == 0 ? "\n " : " "); |
1226 | 1226 | ||
1227 | printk("\nglobally unmasked:\n "); | 1227 | printk("\nglobally unmasked:\n "); |
1228 | for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | 1228 | for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) |
1229 | printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2), | 1229 | printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2), |
1230 | sh->evtchn_pending[i] & ~sh->evtchn_mask[i], | 1230 | sh->evtchn_pending[i] & ~sh->evtchn_mask[i], |
1231 | i % 8 == 0 ? "\n " : " "); | 1231 | i % 8 == 0 ? "\n " : " "); |
1232 | 1232 | ||
1233 | printk("\nlocal cpu%d mask:\n ", cpu); | 1233 | printk("\nlocal cpu%d mask:\n ", cpu); |
1234 | for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--) | 1234 | for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--) |
1235 | printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), | 1235 | printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), |
1236 | cpu_evtchn[i], | 1236 | cpu_evtchn[i], |
1237 | i % 8 == 0 ? "\n " : " "); | 1237 | i % 8 == 0 ? "\n " : " "); |
1238 | 1238 | ||
1239 | printk("\nlocally unmasked:\n "); | 1239 | printk("\nlocally unmasked:\n "); |
1240 | for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) { | 1240 | for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) { |
1241 | unsigned long pending = sh->evtchn_pending[i] | 1241 | unsigned long pending = sh->evtchn_pending[i] |
1242 | & ~sh->evtchn_mask[i] | 1242 | & ~sh->evtchn_mask[i] |
1243 | & cpu_evtchn[i]; | 1243 | & cpu_evtchn[i]; |
1244 | printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2), | 1244 | printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2), |
1245 | pending, i % 8 == 0 ? "\n " : " "); | 1245 | pending, i % 8 == 0 ? "\n " : " "); |
1246 | } | 1246 | } |
1247 | 1247 | ||
1248 | printk("\npending list:\n"); | 1248 | printk("\npending list:\n"); |
1249 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { | 1249 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { |
1250 | if (sync_test_bit(i, sh->evtchn_pending)) { | 1250 | if (sync_test_bit(i, sh->evtchn_pending)) { |
1251 | int word_idx = i / BITS_PER_LONG; | 1251 | int word_idx = i / BITS_PER_LONG; |
1252 | printk(" %d: event %d -> irq %d%s%s%s\n", | 1252 | printk(" %d: event %d -> irq %d%s%s%s\n", |
1253 | cpu_from_evtchn(i), i, | 1253 | cpu_from_evtchn(i), i, |
1254 | evtchn_to_irq[i], | 1254 | evtchn_to_irq[i], |
1255 | sync_test_bit(word_idx, &v->evtchn_pending_sel) | 1255 | sync_test_bit(word_idx, &v->evtchn_pending_sel) |
1256 | ? "" : " l2-clear", | 1256 | ? "" : " l2-clear", |
1257 | !sync_test_bit(i, sh->evtchn_mask) | 1257 | !sync_test_bit(i, sh->evtchn_mask) |
1258 | ? "" : " globally-masked", | 1258 | ? "" : " globally-masked", |
1259 | sync_test_bit(i, cpu_evtchn) | 1259 | sync_test_bit(i, cpu_evtchn) |
1260 | ? "" : " locally-masked"); | 1260 | ? "" : " locally-masked"); |
1261 | } | 1261 | } |
1262 | } | 1262 | } |
1263 | 1263 | ||
1264 | spin_unlock_irqrestore(&debug_lock, flags); | 1264 | spin_unlock_irqrestore(&debug_lock, flags); |
1265 | 1265 | ||
1266 | return IRQ_HANDLED; | 1266 | return IRQ_HANDLED; |
1267 | } | 1267 | } |
1268 | 1268 | ||
1269 | static DEFINE_PER_CPU(unsigned, xed_nesting_count); | 1269 | static DEFINE_PER_CPU(unsigned, xed_nesting_count); |
1270 | static DEFINE_PER_CPU(unsigned int, current_word_idx); | 1270 | static DEFINE_PER_CPU(unsigned int, current_word_idx); |
1271 | static DEFINE_PER_CPU(unsigned int, current_bit_idx); | 1271 | static DEFINE_PER_CPU(unsigned int, current_bit_idx); |
1272 | 1272 | ||
1273 | /* | 1273 | /* |
1274 | * Mask out the i least significant bits of w | 1274 | * Mask out the i least significant bits of w |
1275 | */ | 1275 | */ |
1276 | #define MASK_LSBS(w, i) (w & ((~0UL) << i)) | 1276 | #define MASK_LSBS(w, i) (w & ((~0UL) << i)) |
1277 | 1277 | ||
1278 | /* | 1278 | /* |
1279 | * Search the CPUs pending events bitmasks. For each one found, map | 1279 | * Search the CPUs pending events bitmasks. For each one found, map |
1280 | * the event number to an irq, and feed it into do_IRQ() for | 1280 | * the event number to an irq, and feed it into do_IRQ() for |
1281 | * handling. | 1281 | * handling. |
1282 | * | 1282 | * |
1283 | * Xen uses a two-level bitmap to speed searching. The first level is | 1283 | * Xen uses a two-level bitmap to speed searching. The first level is |
1284 | * a bitset of words which contain pending event bits. The second | 1284 | * a bitset of words which contain pending event bits. The second |
1285 | * level is a bitset of pending events themselves. | 1285 | * level is a bitset of pending events themselves. |
1286 | */ | 1286 | */ |
1287 | static void __xen_evtchn_do_upcall(void) | 1287 | static void __xen_evtchn_do_upcall(void) |
1288 | { | 1288 | { |
1289 | int start_word_idx, start_bit_idx; | 1289 | int start_word_idx, start_bit_idx; |
1290 | int word_idx, bit_idx; | 1290 | int word_idx, bit_idx; |
1291 | int i; | 1291 | int i; |
1292 | int cpu = get_cpu(); | 1292 | int cpu = get_cpu(); |
1293 | struct shared_info *s = HYPERVISOR_shared_info; | 1293 | struct shared_info *s = HYPERVISOR_shared_info; |
1294 | struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); | 1294 | struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); |
1295 | unsigned count; | 1295 | unsigned count; |
1296 | 1296 | ||
1297 | do { | 1297 | do { |
1298 | unsigned long pending_words; | 1298 | unsigned long pending_words; |
1299 | 1299 | ||
1300 | vcpu_info->evtchn_upcall_pending = 0; | 1300 | vcpu_info->evtchn_upcall_pending = 0; |
1301 | 1301 | ||
1302 | if (__this_cpu_inc_return(xed_nesting_count) - 1) | 1302 | if (__this_cpu_inc_return(xed_nesting_count) - 1) |
1303 | goto out; | 1303 | goto out; |
1304 | 1304 | ||
1305 | #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ | 1305 | #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ |
1306 | /* Clear master flag /before/ clearing selector flag. */ | 1306 | /* Clear master flag /before/ clearing selector flag. */ |
1307 | wmb(); | 1307 | wmb(); |
1308 | #endif | 1308 | #endif |
1309 | pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); | 1309 | pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); |
1310 | 1310 | ||
1311 | start_word_idx = __this_cpu_read(current_word_idx); | 1311 | start_word_idx = __this_cpu_read(current_word_idx); |
1312 | start_bit_idx = __this_cpu_read(current_bit_idx); | 1312 | start_bit_idx = __this_cpu_read(current_bit_idx); |
1313 | 1313 | ||
1314 | word_idx = start_word_idx; | 1314 | word_idx = start_word_idx; |
1315 | 1315 | ||
1316 | for (i = 0; pending_words != 0; i++) { | 1316 | for (i = 0; pending_words != 0; i++) { |
1317 | unsigned long pending_bits; | 1317 | unsigned long pending_bits; |
1318 | unsigned long words; | 1318 | unsigned long words; |
1319 | 1319 | ||
1320 | words = MASK_LSBS(pending_words, word_idx); | 1320 | words = MASK_LSBS(pending_words, word_idx); |
1321 | 1321 | ||
1322 | /* | 1322 | /* |
1323 | * If we masked out all events, wrap to beginning. | 1323 | * If we masked out all events, wrap to beginning. |
1324 | */ | 1324 | */ |
1325 | if (words == 0) { | 1325 | if (words == 0) { |
1326 | word_idx = 0; | 1326 | word_idx = 0; |
1327 | bit_idx = 0; | 1327 | bit_idx = 0; |
1328 | continue; | 1328 | continue; |
1329 | } | 1329 | } |
1330 | word_idx = __ffs(words); | 1330 | word_idx = __ffs(words); |
1331 | 1331 | ||
1332 | pending_bits = active_evtchns(cpu, s, word_idx); | 1332 | pending_bits = active_evtchns(cpu, s, word_idx); |
1333 | bit_idx = 0; /* usually scan entire word from start */ | 1333 | bit_idx = 0; /* usually scan entire word from start */ |
1334 | if (word_idx == start_word_idx) { | 1334 | if (word_idx == start_word_idx) { |
1335 | /* We scan the starting word in two parts */ | 1335 | /* We scan the starting word in two parts */ |
1336 | if (i == 0) | 1336 | if (i == 0) |
1337 | /* 1st time: start in the middle */ | 1337 | /* 1st time: start in the middle */ |
1338 | bit_idx = start_bit_idx; | 1338 | bit_idx = start_bit_idx; |
1339 | else | 1339 | else |
1340 | /* 2nd time: mask bits done already */ | 1340 | /* 2nd time: mask bits done already */ |
1341 | bit_idx &= (1UL << start_bit_idx) - 1; | 1341 | bit_idx &= (1UL << start_bit_idx) - 1; |
1342 | } | 1342 | } |
1343 | 1343 | ||
1344 | do { | 1344 | do { |
1345 | unsigned long bits; | 1345 | unsigned long bits; |
1346 | int port, irq; | 1346 | int port, irq; |
1347 | struct irq_desc *desc; | 1347 | struct irq_desc *desc; |
1348 | 1348 | ||
1349 | bits = MASK_LSBS(pending_bits, bit_idx); | 1349 | bits = MASK_LSBS(pending_bits, bit_idx); |
1350 | 1350 | ||
1351 | /* If we masked out all events, move on. */ | 1351 | /* If we masked out all events, move on. */ |
1352 | if (bits == 0) | 1352 | if (bits == 0) |
1353 | break; | 1353 | break; |
1354 | 1354 | ||
1355 | bit_idx = __ffs(bits); | 1355 | bit_idx = __ffs(bits); |
1356 | 1356 | ||
1357 | /* Process port. */ | 1357 | /* Process port. */ |
1358 | port = (word_idx * BITS_PER_LONG) + bit_idx; | 1358 | port = (word_idx * BITS_PER_LONG) + bit_idx; |
1359 | irq = evtchn_to_irq[port]; | 1359 | irq = evtchn_to_irq[port]; |
1360 | 1360 | ||
1361 | if (irq != -1) { | 1361 | if (irq != -1) { |
1362 | desc = irq_to_desc(irq); | 1362 | desc = irq_to_desc(irq); |
1363 | if (desc) | 1363 | if (desc) |
1364 | generic_handle_irq_desc(irq, desc); | 1364 | generic_handle_irq_desc(irq, desc); |
1365 | } | 1365 | } |
1366 | 1366 | ||
1367 | bit_idx = (bit_idx + 1) % BITS_PER_LONG; | 1367 | bit_idx = (bit_idx + 1) % BITS_PER_LONG; |
1368 | 1368 | ||
1369 | /* Next caller starts at last processed + 1 */ | 1369 | /* Next caller starts at last processed + 1 */ |
1370 | __this_cpu_write(current_word_idx, | 1370 | __this_cpu_write(current_word_idx, |
1371 | bit_idx ? word_idx : | 1371 | bit_idx ? word_idx : |
1372 | (word_idx+1) % BITS_PER_LONG); | 1372 | (word_idx+1) % BITS_PER_LONG); |
1373 | __this_cpu_write(current_bit_idx, bit_idx); | 1373 | __this_cpu_write(current_bit_idx, bit_idx); |
1374 | } while (bit_idx != 0); | 1374 | } while (bit_idx != 0); |
1375 | 1375 | ||
1376 | /* Scan start_l1i twice; all others once. */ | 1376 | /* Scan start_l1i twice; all others once. */ |
1377 | if ((word_idx != start_word_idx) || (i != 0)) | 1377 | if ((word_idx != start_word_idx) || (i != 0)) |
1378 | pending_words &= ~(1UL << word_idx); | 1378 | pending_words &= ~(1UL << word_idx); |
1379 | 1379 | ||
1380 | word_idx = (word_idx + 1) % BITS_PER_LONG; | 1380 | word_idx = (word_idx + 1) % BITS_PER_LONG; |
1381 | } | 1381 | } |
1382 | 1382 | ||
1383 | BUG_ON(!irqs_disabled()); | 1383 | BUG_ON(!irqs_disabled()); |
1384 | 1384 | ||
1385 | count = __this_cpu_read(xed_nesting_count); | 1385 | count = __this_cpu_read(xed_nesting_count); |
1386 | __this_cpu_write(xed_nesting_count, 0); | 1386 | __this_cpu_write(xed_nesting_count, 0); |
1387 | } while (count != 1 || vcpu_info->evtchn_upcall_pending); | 1387 | } while (count != 1 || vcpu_info->evtchn_upcall_pending); |
1388 | 1388 | ||
1389 | out: | 1389 | out: |
1390 | 1390 | ||
1391 | put_cpu(); | 1391 | put_cpu(); |
1392 | } | 1392 | } |
1393 | 1393 | ||
1394 | void xen_evtchn_do_upcall(struct pt_regs *regs) | 1394 | void xen_evtchn_do_upcall(struct pt_regs *regs) |
1395 | { | 1395 | { |
1396 | struct pt_regs *old_regs = set_irq_regs(regs); | 1396 | struct pt_regs *old_regs = set_irq_regs(regs); |
1397 | 1397 | ||
1398 | irq_enter(); | 1398 | irq_enter(); |
1399 | #ifdef CONFIG_X86 | 1399 | #ifdef CONFIG_X86 |
1400 | exit_idle(); | 1400 | exit_idle(); |
1401 | #endif | 1401 | #endif |
1402 | 1402 | ||
1403 | __xen_evtchn_do_upcall(); | 1403 | __xen_evtchn_do_upcall(); |
1404 | 1404 | ||
1405 | irq_exit(); | 1405 | irq_exit(); |
1406 | set_irq_regs(old_regs); | 1406 | set_irq_regs(old_regs); |
1407 | } | 1407 | } |
1408 | 1408 | ||
1409 | void xen_hvm_evtchn_do_upcall(void) | 1409 | void xen_hvm_evtchn_do_upcall(void) |
1410 | { | 1410 | { |
1411 | __xen_evtchn_do_upcall(); | 1411 | __xen_evtchn_do_upcall(); |
1412 | } | 1412 | } |
1413 | EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); | 1413 | EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); |
1414 | 1414 | ||
1415 | /* Rebind a new event channel to an existing irq. */ | 1415 | /* Rebind a new event channel to an existing irq. */ |
1416 | void rebind_evtchn_irq(int evtchn, int irq) | 1416 | void rebind_evtchn_irq(int evtchn, int irq) |
1417 | { | 1417 | { |
1418 | struct irq_info *info = info_for_irq(irq); | 1418 | struct irq_info *info = info_for_irq(irq); |
1419 | 1419 | ||
1420 | /* Make sure the irq is masked, since the new event channel | 1420 | /* Make sure the irq is masked, since the new event channel |
1421 | will also be masked. */ | 1421 | will also be masked. */ |
1422 | disable_irq(irq); | 1422 | disable_irq(irq); |
1423 | 1423 | ||
1424 | mutex_lock(&irq_mapping_update_lock); | 1424 | mutex_lock(&irq_mapping_update_lock); |
1425 | 1425 | ||
1426 | /* After resume the irq<->evtchn mappings are all cleared out */ | 1426 | /* After resume the irq<->evtchn mappings are all cleared out */ |
1427 | BUG_ON(evtchn_to_irq[evtchn] != -1); | 1427 | BUG_ON(evtchn_to_irq[evtchn] != -1); |
1428 | /* Expect irq to have been bound before, | 1428 | /* Expect irq to have been bound before, |
1429 | so there should be a proper type */ | 1429 | so there should be a proper type */ |
1430 | BUG_ON(info->type == IRQT_UNBOUND); | 1430 | BUG_ON(info->type == IRQT_UNBOUND); |
1431 | 1431 | ||
1432 | xen_irq_info_evtchn_init(irq, evtchn); | 1432 | xen_irq_info_evtchn_init(irq, evtchn); |
1433 | 1433 | ||
1434 | mutex_unlock(&irq_mapping_update_lock); | 1434 | mutex_unlock(&irq_mapping_update_lock); |
1435 | 1435 | ||
1436 | /* new event channels are always bound to cpu 0 */ | 1436 | /* new event channels are always bound to cpu 0 */ |
1437 | irq_set_affinity(irq, cpumask_of(0)); | 1437 | irq_set_affinity(irq, cpumask_of(0)); |
1438 | 1438 | ||
1439 | /* Unmask the event channel. */ | 1439 | /* Unmask the event channel. */ |
1440 | enable_irq(irq); | 1440 | enable_irq(irq); |
1441 | } | 1441 | } |
1442 | 1442 | ||
1443 | /* Rebind an evtchn so that it gets delivered to a specific cpu */ | 1443 | /* Rebind an evtchn so that it gets delivered to a specific cpu */ |
1444 | static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) | 1444 | static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) |
1445 | { | 1445 | { |
1446 | struct evtchn_bind_vcpu bind_vcpu; | 1446 | struct evtchn_bind_vcpu bind_vcpu; |
1447 | int evtchn = evtchn_from_irq(irq); | 1447 | int evtchn = evtchn_from_irq(irq); |
1448 | 1448 | ||
1449 | if (!VALID_EVTCHN(evtchn)) | 1449 | if (!VALID_EVTCHN(evtchn)) |
1450 | return -1; | 1450 | return -1; |
1451 | 1451 | ||
1452 | /* | 1452 | /* |
1453 | * Events delivered via platform PCI interrupts are always | 1453 | * Events delivered via platform PCI interrupts are always |
1454 | * routed to vcpu 0 and hence cannot be rebound. | 1454 | * routed to vcpu 0 and hence cannot be rebound. |
1455 | */ | 1455 | */ |
1456 | if (xen_hvm_domain() && !xen_have_vector_callback) | 1456 | if (xen_hvm_domain() && !xen_have_vector_callback) |
1457 | return -1; | 1457 | return -1; |
1458 | 1458 | ||
1459 | /* Send future instances of this interrupt to other vcpu. */ | 1459 | /* Send future instances of this interrupt to other vcpu. */ |
1460 | bind_vcpu.port = evtchn; | 1460 | bind_vcpu.port = evtchn; |
1461 | bind_vcpu.vcpu = tcpu; | 1461 | bind_vcpu.vcpu = tcpu; |
1462 | 1462 | ||
1463 | /* | 1463 | /* |
1464 | * If this fails, it usually just indicates that we're dealing with a | 1464 | * If this fails, it usually just indicates that we're dealing with a |
1465 | * virq or IPI channel, which don't actually need to be rebound. Ignore | 1465 | * virq or IPI channel, which don't actually need to be rebound. Ignore |
1466 | * it, but don't do the xenlinux-level rebind in that case. | 1466 | * it, but don't do the xenlinux-level rebind in that case. |
1467 | */ | 1467 | */ |
1468 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) | 1468 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) |
1469 | bind_evtchn_to_cpu(evtchn, tcpu); | 1469 | bind_evtchn_to_cpu(evtchn, tcpu); |
1470 | 1470 | ||
1471 | return 0; | 1471 | return 0; |
1472 | } | 1472 | } |
1473 | 1473 | ||
1474 | static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, | 1474 | static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, |
1475 | bool force) | 1475 | bool force) |
1476 | { | 1476 | { |
1477 | unsigned tcpu = cpumask_first(dest); | 1477 | unsigned tcpu = cpumask_first(dest); |
1478 | 1478 | ||
1479 | return rebind_irq_to_cpu(data->irq, tcpu); | 1479 | return rebind_irq_to_cpu(data->irq, tcpu); |
1480 | } | 1480 | } |
1481 | 1481 | ||
1482 | int resend_irq_on_evtchn(unsigned int irq) | 1482 | int resend_irq_on_evtchn(unsigned int irq) |
1483 | { | 1483 | { |
1484 | int masked, evtchn = evtchn_from_irq(irq); | 1484 | int masked, evtchn = evtchn_from_irq(irq); |
1485 | struct shared_info *s = HYPERVISOR_shared_info; | 1485 | struct shared_info *s = HYPERVISOR_shared_info; |
1486 | 1486 | ||
1487 | if (!VALID_EVTCHN(evtchn)) | 1487 | if (!VALID_EVTCHN(evtchn)) |
1488 | return 1; | 1488 | return 1; |
1489 | 1489 | ||
1490 | masked = sync_test_and_set_bit(evtchn, s->evtchn_mask); | 1490 | masked = sync_test_and_set_bit(evtchn, s->evtchn_mask); |
1491 | sync_set_bit(evtchn, s->evtchn_pending); | 1491 | sync_set_bit(evtchn, s->evtchn_pending); |
1492 | if (!masked) | 1492 | if (!masked) |
1493 | unmask_evtchn(evtchn); | 1493 | unmask_evtchn(evtchn); |
1494 | 1494 | ||
1495 | return 1; | 1495 | return 1; |
1496 | } | 1496 | } |
1497 | 1497 | ||
1498 | static void enable_dynirq(struct irq_data *data) | 1498 | static void enable_dynirq(struct irq_data *data) |
1499 | { | 1499 | { |
1500 | int evtchn = evtchn_from_irq(data->irq); | 1500 | int evtchn = evtchn_from_irq(data->irq); |
1501 | 1501 | ||
1502 | if (VALID_EVTCHN(evtchn)) | 1502 | if (VALID_EVTCHN(evtchn)) |
1503 | unmask_evtchn(evtchn); | 1503 | unmask_evtchn(evtchn); |
1504 | } | 1504 | } |
1505 | 1505 | ||
1506 | static void disable_dynirq(struct irq_data *data) | 1506 | static void disable_dynirq(struct irq_data *data) |
1507 | { | 1507 | { |
1508 | int evtchn = evtchn_from_irq(data->irq); | 1508 | int evtchn = evtchn_from_irq(data->irq); |
1509 | 1509 | ||
1510 | if (VALID_EVTCHN(evtchn)) | 1510 | if (VALID_EVTCHN(evtchn)) |
1511 | mask_evtchn(evtchn); | 1511 | mask_evtchn(evtchn); |
1512 | } | 1512 | } |
1513 | 1513 | ||
1514 | static void ack_dynirq(struct irq_data *data) | 1514 | static void ack_dynirq(struct irq_data *data) |
1515 | { | 1515 | { |
1516 | int evtchn = evtchn_from_irq(data->irq); | 1516 | int evtchn = evtchn_from_irq(data->irq); |
1517 | 1517 | ||
1518 | irq_move_irq(data); | 1518 | irq_move_irq(data); |
1519 | 1519 | ||
1520 | if (VALID_EVTCHN(evtchn)) | 1520 | if (VALID_EVTCHN(evtchn)) |
1521 | clear_evtchn(evtchn); | 1521 | clear_evtchn(evtchn); |
1522 | } | 1522 | } |
1523 | 1523 | ||
1524 | static void mask_ack_dynirq(struct irq_data *data) | 1524 | static void mask_ack_dynirq(struct irq_data *data) |
1525 | { | 1525 | { |
1526 | disable_dynirq(data); | 1526 | disable_dynirq(data); |
1527 | ack_dynirq(data); | 1527 | ack_dynirq(data); |
1528 | } | 1528 | } |
1529 | 1529 | ||
1530 | static int retrigger_dynirq(struct irq_data *data) | 1530 | static int retrigger_dynirq(struct irq_data *data) |
1531 | { | 1531 | { |
1532 | int evtchn = evtchn_from_irq(data->irq); | 1532 | int evtchn = evtchn_from_irq(data->irq); |
1533 | struct shared_info *sh = HYPERVISOR_shared_info; | 1533 | struct shared_info *sh = HYPERVISOR_shared_info; |
1534 | int ret = 0; | 1534 | int ret = 0; |
1535 | 1535 | ||
1536 | if (VALID_EVTCHN(evtchn)) { | 1536 | if (VALID_EVTCHN(evtchn)) { |
1537 | int masked; | 1537 | int masked; |
1538 | 1538 | ||
1539 | masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask); | 1539 | masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask); |
1540 | sync_set_bit(evtchn, sh->evtchn_pending); | 1540 | sync_set_bit(evtchn, sh->evtchn_pending); |
1541 | if (!masked) | 1541 | if (!masked) |
1542 | unmask_evtchn(evtchn); | 1542 | unmask_evtchn(evtchn); |
1543 | ret = 1; | 1543 | ret = 1; |
1544 | } | 1544 | } |
1545 | 1545 | ||
1546 | return ret; | 1546 | return ret; |
1547 | } | 1547 | } |
1548 | 1548 | ||
1549 | static void restore_pirqs(void) | 1549 | static void restore_pirqs(void) |
1550 | { | 1550 | { |
1551 | int pirq, rc, irq, gsi; | 1551 | int pirq, rc, irq, gsi; |
1552 | struct physdev_map_pirq map_irq; | 1552 | struct physdev_map_pirq map_irq; |
1553 | struct irq_info *info; | 1553 | struct irq_info *info; |
1554 | 1554 | ||
1555 | list_for_each_entry(info, &xen_irq_list_head, list) { | 1555 | list_for_each_entry(info, &xen_irq_list_head, list) { |
1556 | if (info->type != IRQT_PIRQ) | 1556 | if (info->type != IRQT_PIRQ) |
1557 | continue; | 1557 | continue; |
1558 | 1558 | ||
1559 | pirq = info->u.pirq.pirq; | 1559 | pirq = info->u.pirq.pirq; |
1560 | gsi = info->u.pirq.gsi; | 1560 | gsi = info->u.pirq.gsi; |
1561 | irq = info->irq; | 1561 | irq = info->irq; |
1562 | 1562 | ||
1563 | /* save/restore of PT devices doesn't work, so at this point the | 1563 | /* save/restore of PT devices doesn't work, so at this point the |
1564 | * only devices present are GSI based emulated devices */ | 1564 | * only devices present are GSI based emulated devices */ |
1565 | if (!gsi) | 1565 | if (!gsi) |
1566 | continue; | 1566 | continue; |
1567 | 1567 | ||
1568 | map_irq.domid = DOMID_SELF; | 1568 | map_irq.domid = DOMID_SELF; |
1569 | map_irq.type = MAP_PIRQ_TYPE_GSI; | 1569 | map_irq.type = MAP_PIRQ_TYPE_GSI; |
1570 | map_irq.index = gsi; | 1570 | map_irq.index = gsi; |
1571 | map_irq.pirq = pirq; | 1571 | map_irq.pirq = pirq; |
1572 | 1572 | ||
1573 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); | 1573 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); |
1574 | if (rc) { | 1574 | if (rc) { |
1575 | printk(KERN_WARNING "xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n", | 1575 | printk(KERN_WARNING "xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n", |
1576 | gsi, irq, pirq, rc); | 1576 | gsi, irq, pirq, rc); |
1577 | xen_free_irq(irq); | 1577 | xen_free_irq(irq); |
1578 | continue; | 1578 | continue; |
1579 | } | 1579 | } |
1580 | 1580 | ||
1581 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); | 1581 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); |
1582 | 1582 | ||
1583 | __startup_pirq(irq); | 1583 | __startup_pirq(irq); |
1584 | } | 1584 | } |
1585 | } | 1585 | } |
1586 | 1586 | ||
1587 | static void restore_cpu_virqs(unsigned int cpu) | 1587 | static void restore_cpu_virqs(unsigned int cpu) |
1588 | { | 1588 | { |
1589 | struct evtchn_bind_virq bind_virq; | 1589 | struct evtchn_bind_virq bind_virq; |
1590 | int virq, irq, evtchn; | 1590 | int virq, irq, evtchn; |
1591 | 1591 | ||
1592 | for (virq = 0; virq < NR_VIRQS; virq++) { | 1592 | for (virq = 0; virq < NR_VIRQS; virq++) { |
1593 | if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) | 1593 | if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) |
1594 | continue; | 1594 | continue; |
1595 | 1595 | ||
1596 | BUG_ON(virq_from_irq(irq) != virq); | 1596 | BUG_ON(virq_from_irq(irq) != virq); |
1597 | 1597 | ||
1598 | /* Get a new binding from Xen. */ | 1598 | /* Get a new binding from Xen. */ |
1599 | bind_virq.virq = virq; | 1599 | bind_virq.virq = virq; |
1600 | bind_virq.vcpu = cpu; | 1600 | bind_virq.vcpu = cpu; |
1601 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | 1601 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, |
1602 | &bind_virq) != 0) | 1602 | &bind_virq) != 0) |
1603 | BUG(); | 1603 | BUG(); |
1604 | evtchn = bind_virq.port; | 1604 | evtchn = bind_virq.port; |
1605 | 1605 | ||
1606 | /* Record the new mapping. */ | 1606 | /* Record the new mapping. */ |
1607 | xen_irq_info_virq_init(cpu, irq, evtchn, virq); | 1607 | xen_irq_info_virq_init(cpu, irq, evtchn, virq); |
1608 | bind_evtchn_to_cpu(evtchn, cpu); | 1608 | bind_evtchn_to_cpu(evtchn, cpu); |
1609 | } | 1609 | } |
1610 | } | 1610 | } |
1611 | 1611 | ||
1612 | static void restore_cpu_ipis(unsigned int cpu) | 1612 | static void restore_cpu_ipis(unsigned int cpu) |
1613 | { | 1613 | { |
1614 | struct evtchn_bind_ipi bind_ipi; | 1614 | struct evtchn_bind_ipi bind_ipi; |
1615 | int ipi, irq, evtchn; | 1615 | int ipi, irq, evtchn; |
1616 | 1616 | ||
1617 | for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { | 1617 | for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { |
1618 | if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) | 1618 | if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) |
1619 | continue; | 1619 | continue; |
1620 | 1620 | ||
1621 | BUG_ON(ipi_from_irq(irq) != ipi); | 1621 | BUG_ON(ipi_from_irq(irq) != ipi); |
1622 | 1622 | ||
1623 | /* Get a new binding from Xen. */ | 1623 | /* Get a new binding from Xen. */ |
1624 | bind_ipi.vcpu = cpu; | 1624 | bind_ipi.vcpu = cpu; |
1625 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, | 1625 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, |
1626 | &bind_ipi) != 0) | 1626 | &bind_ipi) != 0) |
1627 | BUG(); | 1627 | BUG(); |
1628 | evtchn = bind_ipi.port; | 1628 | evtchn = bind_ipi.port; |
1629 | 1629 | ||
1630 | /* Record the new mapping. */ | 1630 | /* Record the new mapping. */ |
1631 | xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); | 1631 | xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); |
1632 | bind_evtchn_to_cpu(evtchn, cpu); | 1632 | bind_evtchn_to_cpu(evtchn, cpu); |
1633 | } | 1633 | } |
1634 | } | 1634 | } |
1635 | 1635 | ||
1636 | /* Clear an irq's pending state, in preparation for polling on it */ | 1636 | /* Clear an irq's pending state, in preparation for polling on it */ |
1637 | void xen_clear_irq_pending(int irq) | 1637 | void xen_clear_irq_pending(int irq) |
1638 | { | 1638 | { |
1639 | int evtchn = evtchn_from_irq(irq); | 1639 | int evtchn = evtchn_from_irq(irq); |
1640 | 1640 | ||
1641 | if (VALID_EVTCHN(evtchn)) | 1641 | if (VALID_EVTCHN(evtchn)) |
1642 | clear_evtchn(evtchn); | 1642 | clear_evtchn(evtchn); |
1643 | } | 1643 | } |
1644 | EXPORT_SYMBOL(xen_clear_irq_pending); | 1644 | EXPORT_SYMBOL(xen_clear_irq_pending); |
1645 | void xen_set_irq_pending(int irq) | 1645 | void xen_set_irq_pending(int irq) |
1646 | { | 1646 | { |
1647 | int evtchn = evtchn_from_irq(irq); | 1647 | int evtchn = evtchn_from_irq(irq); |
1648 | 1648 | ||
1649 | if (VALID_EVTCHN(evtchn)) | 1649 | if (VALID_EVTCHN(evtchn)) |
1650 | set_evtchn(evtchn); | 1650 | set_evtchn(evtchn); |
1651 | } | 1651 | } |
1652 | 1652 | ||
1653 | bool xen_test_irq_pending(int irq) | 1653 | bool xen_test_irq_pending(int irq) |
1654 | { | 1654 | { |
1655 | int evtchn = evtchn_from_irq(irq); | 1655 | int evtchn = evtchn_from_irq(irq); |
1656 | bool ret = false; | 1656 | bool ret = false; |
1657 | 1657 | ||
1658 | if (VALID_EVTCHN(evtchn)) | 1658 | if (VALID_EVTCHN(evtchn)) |
1659 | ret = test_evtchn(evtchn); | 1659 | ret = test_evtchn(evtchn); |
1660 | 1660 | ||
1661 | return ret; | 1661 | return ret; |
1662 | } | 1662 | } |
1663 | 1663 | ||
1664 | /* Poll waiting for an irq to become pending with timeout. In the usual case, | 1664 | /* Poll waiting for an irq to become pending with timeout. In the usual case, |
1665 | * the irq will be disabled so it won't deliver an interrupt. */ | 1665 | * the irq will be disabled so it won't deliver an interrupt. */ |
1666 | void xen_poll_irq_timeout(int irq, u64 timeout) | 1666 | void xen_poll_irq_timeout(int irq, u64 timeout) |
1667 | { | 1667 | { |
1668 | evtchn_port_t evtchn = evtchn_from_irq(irq); | 1668 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
1669 | 1669 | ||
1670 | if (VALID_EVTCHN(evtchn)) { | 1670 | if (VALID_EVTCHN(evtchn)) { |
1671 | struct sched_poll poll; | 1671 | struct sched_poll poll; |
1672 | 1672 | ||
1673 | poll.nr_ports = 1; | 1673 | poll.nr_ports = 1; |
1674 | poll.timeout = timeout; | 1674 | poll.timeout = timeout; |
1675 | set_xen_guest_handle(poll.ports, &evtchn); | 1675 | set_xen_guest_handle(poll.ports, &evtchn); |
1676 | 1676 | ||
1677 | if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) | 1677 | if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) |
1678 | BUG(); | 1678 | BUG(); |
1679 | } | 1679 | } |
1680 | } | 1680 | } |
1681 | EXPORT_SYMBOL(xen_poll_irq_timeout); | 1681 | EXPORT_SYMBOL(xen_poll_irq_timeout); |
1682 | /* Poll waiting for an irq to become pending. In the usual case, the | 1682 | /* Poll waiting for an irq to become pending. In the usual case, the |
1683 | * irq will be disabled so it won't deliver an interrupt. */ | 1683 | * irq will be disabled so it won't deliver an interrupt. */ |
1684 | void xen_poll_irq(int irq) | 1684 | void xen_poll_irq(int irq) |
1685 | { | 1685 | { |
1686 | xen_poll_irq_timeout(irq, 0 /* no timeout */); | 1686 | xen_poll_irq_timeout(irq, 0 /* no timeout */); |
1687 | } | 1687 | } |
1688 | 1688 | ||
1689 | /* Check whether the IRQ line is shared with other guests. */ | 1689 | /* Check whether the IRQ line is shared with other guests. */ |
1690 | int xen_test_irq_shared(int irq) | 1690 | int xen_test_irq_shared(int irq) |
1691 | { | 1691 | { |
1692 | struct irq_info *info = info_for_irq(irq); | 1692 | struct irq_info *info = info_for_irq(irq); |
1693 | struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq }; | 1693 | struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq }; |
1694 | 1694 | ||
1695 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) | 1695 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) |
1696 | return 0; | 1696 | return 0; |
1697 | return !(irq_status.flags & XENIRQSTAT_shared); | 1697 | return !(irq_status.flags & XENIRQSTAT_shared); |
1698 | } | 1698 | } |
1699 | EXPORT_SYMBOL_GPL(xen_test_irq_shared); | 1699 | EXPORT_SYMBOL_GPL(xen_test_irq_shared); |
1700 | 1700 | ||
1701 | void xen_irq_resume(void) | 1701 | void xen_irq_resume(void) |
1702 | { | 1702 | { |
1703 | unsigned int cpu, evtchn; | 1703 | unsigned int cpu, evtchn; |
1704 | struct irq_info *info; | 1704 | struct irq_info *info; |
1705 | 1705 | ||
1706 | init_evtchn_cpu_bindings(); | 1706 | init_evtchn_cpu_bindings(); |
1707 | 1707 | ||
1708 | /* New event-channel space is not 'live' yet. */ | 1708 | /* New event-channel space is not 'live' yet. */ |
1709 | for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) | 1709 | for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) |
1710 | mask_evtchn(evtchn); | 1710 | mask_evtchn(evtchn); |
1711 | 1711 | ||
1712 | /* No IRQ <-> event-channel mappings. */ | 1712 | /* No IRQ <-> event-channel mappings. */ |
1713 | list_for_each_entry(info, &xen_irq_list_head, list) | 1713 | list_for_each_entry(info, &xen_irq_list_head, list) |
1714 | info->evtchn = 0; /* zap event-channel binding */ | 1714 | info->evtchn = 0; /* zap event-channel binding */ |
1715 | 1715 | ||
1716 | for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) | 1716 | for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) |
1717 | evtchn_to_irq[evtchn] = -1; | 1717 | evtchn_to_irq[evtchn] = -1; |
1718 | 1718 | ||
1719 | for_each_possible_cpu(cpu) { | 1719 | for_each_possible_cpu(cpu) { |
1720 | restore_cpu_virqs(cpu); | 1720 | restore_cpu_virqs(cpu); |
1721 | restore_cpu_ipis(cpu); | 1721 | restore_cpu_ipis(cpu); |
1722 | } | 1722 | } |
1723 | 1723 | ||
1724 | restore_pirqs(); | 1724 | restore_pirqs(); |
1725 | } | 1725 | } |
1726 | 1726 | ||
1727 | static struct irq_chip xen_dynamic_chip __read_mostly = { | 1727 | static struct irq_chip xen_dynamic_chip __read_mostly = { |
1728 | .name = "xen-dyn", | 1728 | .name = "xen-dyn", |
1729 | 1729 | ||
1730 | .irq_disable = disable_dynirq, | 1730 | .irq_disable = disable_dynirq, |
1731 | .irq_mask = disable_dynirq, | 1731 | .irq_mask = disable_dynirq, |
1732 | .irq_unmask = enable_dynirq, | 1732 | .irq_unmask = enable_dynirq, |
1733 | 1733 | ||
1734 | .irq_ack = ack_dynirq, | 1734 | .irq_ack = ack_dynirq, |
1735 | .irq_mask_ack = mask_ack_dynirq, | 1735 | .irq_mask_ack = mask_ack_dynirq, |
1736 | 1736 | ||
1737 | .irq_set_affinity = set_affinity_irq, | 1737 | .irq_set_affinity = set_affinity_irq, |
1738 | .irq_retrigger = retrigger_dynirq, | 1738 | .irq_retrigger = retrigger_dynirq, |
1739 | }; | 1739 | }; |
1740 | 1740 | ||
1741 | static struct irq_chip xen_pirq_chip __read_mostly = { | 1741 | static struct irq_chip xen_pirq_chip __read_mostly = { |
1742 | .name = "xen-pirq", | 1742 | .name = "xen-pirq", |
1743 | 1743 | ||
1744 | .irq_startup = startup_pirq, | 1744 | .irq_startup = startup_pirq, |
1745 | .irq_shutdown = shutdown_pirq, | 1745 | .irq_shutdown = shutdown_pirq, |
1746 | .irq_enable = enable_pirq, | 1746 | .irq_enable = enable_pirq, |
1747 | .irq_disable = disable_pirq, | 1747 | .irq_disable = disable_pirq, |
1748 | 1748 | ||
1749 | .irq_mask = disable_dynirq, | 1749 | .irq_mask = disable_dynirq, |
1750 | .irq_unmask = enable_dynirq, | 1750 | .irq_unmask = enable_dynirq, |
1751 | 1751 | ||
1752 | .irq_ack = eoi_pirq, | 1752 | .irq_ack = eoi_pirq, |
1753 | .irq_eoi = eoi_pirq, | 1753 | .irq_eoi = eoi_pirq, |
1754 | .irq_mask_ack = mask_ack_pirq, | 1754 | .irq_mask_ack = mask_ack_pirq, |
1755 | 1755 | ||
1756 | .irq_set_affinity = set_affinity_irq, | 1756 | .irq_set_affinity = set_affinity_irq, |
1757 | 1757 | ||
1758 | .irq_retrigger = retrigger_dynirq, | 1758 | .irq_retrigger = retrigger_dynirq, |
1759 | }; | 1759 | }; |
1760 | 1760 | ||
1761 | static struct irq_chip xen_percpu_chip __read_mostly = { | 1761 | static struct irq_chip xen_percpu_chip __read_mostly = { |
1762 | .name = "xen-percpu", | 1762 | .name = "xen-percpu", |
1763 | 1763 | ||
1764 | .irq_disable = disable_dynirq, | 1764 | .irq_disable = disable_dynirq, |
1765 | .irq_mask = disable_dynirq, | 1765 | .irq_mask = disable_dynirq, |
1766 | .irq_unmask = enable_dynirq, | 1766 | .irq_unmask = enable_dynirq, |
1767 | 1767 | ||
1768 | .irq_ack = ack_dynirq, | 1768 | .irq_ack = ack_dynirq, |
1769 | }; | 1769 | }; |
1770 | 1770 | ||
1771 | int xen_set_callback_via(uint64_t via) | 1771 | int xen_set_callback_via(uint64_t via) |
1772 | { | 1772 | { |
1773 | struct xen_hvm_param a; | 1773 | struct xen_hvm_param a; |
1774 | a.domid = DOMID_SELF; | 1774 | a.domid = DOMID_SELF; |
1775 | a.index = HVM_PARAM_CALLBACK_IRQ; | 1775 | a.index = HVM_PARAM_CALLBACK_IRQ; |
1776 | a.value = via; | 1776 | a.value = via; |
1777 | return HYPERVISOR_hvm_op(HVMOP_set_param, &a); | 1777 | return HYPERVISOR_hvm_op(HVMOP_set_param, &a); |
1778 | } | 1778 | } |
1779 | EXPORT_SYMBOL_GPL(xen_set_callback_via); | 1779 | EXPORT_SYMBOL_GPL(xen_set_callback_via); |
1780 | 1780 | ||
1781 | #ifdef CONFIG_XEN_PVHVM | 1781 | #ifdef CONFIG_XEN_PVHVM |
1782 | /* Vector callbacks are better than PCI interrupts to receive event | 1782 | /* Vector callbacks are better than PCI interrupts to receive event |
1783 | * channel notifications because we can receive vector callbacks on any | 1783 | * channel notifications because we can receive vector callbacks on any |
1784 | * vcpu and we don't need PCI support or APIC interactions. */ | 1784 | * vcpu and we don't need PCI support or APIC interactions. */ |
1785 | void xen_callback_vector(void) | 1785 | void xen_callback_vector(void) |
1786 | { | 1786 | { |
1787 | int rc; | 1787 | int rc; |
1788 | uint64_t callback_via; | 1788 | uint64_t callback_via; |
1789 | if (xen_have_vector_callback) { | 1789 | if (xen_have_vector_callback) { |
1790 | callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK); | 1790 | callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); |
1791 | rc = xen_set_callback_via(callback_via); | 1791 | rc = xen_set_callback_via(callback_via); |
1792 | if (rc) { | 1792 | if (rc) { |
1793 | printk(KERN_ERR "Request for Xen HVM callback vector" | 1793 | printk(KERN_ERR "Request for Xen HVM callback vector" |
1794 | " failed.\n"); | 1794 | " failed.\n"); |
1795 | xen_have_vector_callback = 0; | 1795 | xen_have_vector_callback = 0; |
1796 | return; | 1796 | return; |
1797 | } | 1797 | } |
1798 | printk(KERN_INFO "Xen HVM callback vector for event delivery is " | 1798 | printk(KERN_INFO "Xen HVM callback vector for event delivery is " |
1799 | "enabled\n"); | 1799 | "enabled\n"); |
1800 | /* in the restore case the vector has already been allocated */ | 1800 | /* in the restore case the vector has already been allocated */ |
1801 | if (!test_bit(XEN_HVM_EVTCHN_CALLBACK, used_vectors)) | 1801 | if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) |
1802 | alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector); | 1802 | alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, |
1803 | xen_hvm_callback_vector); | ||
1803 | } | 1804 | } |
1804 | } | 1805 | } |
1805 | #else | 1806 | #else |
1806 | void xen_callback_vector(void) {} | 1807 | void xen_callback_vector(void) {} |
1807 | #endif | 1808 | #endif |
1808 | 1809 | ||
1809 | void __init xen_init_IRQ(void) | 1810 | void __init xen_init_IRQ(void) |
1810 | { | 1811 | { |
1811 | int i; | 1812 | int i; |
1812 | 1813 | ||
1813 | evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), | 1814 | evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), |
1814 | GFP_KERNEL); | 1815 | GFP_KERNEL); |
1815 | BUG_ON(!evtchn_to_irq); | 1816 | BUG_ON(!evtchn_to_irq); |
1816 | for (i = 0; i < NR_EVENT_CHANNELS; i++) | 1817 | for (i = 0; i < NR_EVENT_CHANNELS; i++) |
1817 | evtchn_to_irq[i] = -1; | 1818 | evtchn_to_irq[i] = -1; |
1818 | 1819 | ||
1819 | init_evtchn_cpu_bindings(); | 1820 | init_evtchn_cpu_bindings(); |
1820 | 1821 | ||
1821 | /* No event channels are 'live' right now. */ | 1822 | /* No event channels are 'live' right now. */ |
1822 | for (i = 0; i < NR_EVENT_CHANNELS; i++) | 1823 | for (i = 0; i < NR_EVENT_CHANNELS; i++) |
1823 | mask_evtchn(i); | 1824 | mask_evtchn(i); |
1824 | 1825 | ||
1825 | pirq_needs_eoi = pirq_needs_eoi_flag; | 1826 | pirq_needs_eoi = pirq_needs_eoi_flag; |
1826 | 1827 | ||
1827 | #ifdef CONFIG_X86 | 1828 | #ifdef CONFIG_X86 |
1828 | if (xen_hvm_domain()) { | 1829 | if (xen_hvm_domain()) { |
1829 | xen_callback_vector(); | 1830 | xen_callback_vector(); |
1830 | native_init_IRQ(); | 1831 | native_init_IRQ(); |
1831 | /* pci_xen_hvm_init must be called after native_init_IRQ so that | 1832 | /* pci_xen_hvm_init must be called after native_init_IRQ so that |
1832 | * __acpi_register_gsi can point at the right function */ | 1833 | * __acpi_register_gsi can point at the right function */ |
1833 | pci_xen_hvm_init(); | 1834 | pci_xen_hvm_init(); |
1834 | } else { | 1835 | } else { |
1835 | int rc; | 1836 | int rc; |
1836 | struct physdev_pirq_eoi_gmfn eoi_gmfn; | 1837 | struct physdev_pirq_eoi_gmfn eoi_gmfn; |
1837 | 1838 | ||
1838 | irq_ctx_init(smp_processor_id()); | 1839 | irq_ctx_init(smp_processor_id()); |
1839 | if (xen_initial_domain()) | 1840 | if (xen_initial_domain()) |
1840 | pci_xen_initial_domain(); | 1841 | pci_xen_initial_domain(); |
1841 | 1842 | ||
1842 | pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); | 1843 | pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); |
1843 | eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map); | 1844 | eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map); |
1844 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); | 1845 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); |
1845 | if (rc != 0) { | 1846 | if (rc != 0) { |
1846 | free_page((unsigned long) pirq_eoi_map); | 1847 | free_page((unsigned long) pirq_eoi_map); |
1847 | pirq_eoi_map = NULL; | 1848 | pirq_eoi_map = NULL; |
1848 | } else | 1849 | } else |
1849 | pirq_needs_eoi = pirq_check_eoi_map; | 1850 | pirq_needs_eoi = pirq_check_eoi_map; |
1850 | } | 1851 | } |
1851 | #endif | 1852 | #endif |
1852 | } | 1853 | } |
1853 | 1854 |