Commit 9f9d489a3e78b49d897734eaaf9dea568dbea66e
Committed by
Ingo Molnar
1 parent
fab58420ac
Exists in
master
and in
4 other branches
x86/paravirt, 64-bit: make load_gs_index() a paravirt operation
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: xen-devel <xen-devel@lists.xensource.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 5 changed files with 18 additions and 4 deletions Inline Diff
arch/x86/kernel/entry_64.S
1 | /* | 1 | /* |
2 | * linux/arch/x86_64/entry.S | 2 | * linux/arch/x86_64/entry.S |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | 5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs |
6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | 6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * entry.S contains the system-call and fault low-level handling routines. | 10 | * entry.S contains the system-call and fault low-level handling routines. |
11 | * | 11 | * |
12 | * NOTE: This code handles signal-recognition, which happens every time | 12 | * NOTE: This code handles signal-recognition, which happens every time |
13 | * after an interrupt and after each system call. | 13 | * after an interrupt and after each system call. |
14 | * | 14 | * |
15 | * Normal syscalls and interrupts don't save a full stack frame, this is | 15 | * Normal syscalls and interrupts don't save a full stack frame, this is |
16 | * only done for syscall tracing, signals or fork/exec et.al. | 16 | * only done for syscall tracing, signals or fork/exec et.al. |
17 | * | 17 | * |
18 | * A note on terminology: | 18 | * A note on terminology: |
19 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 19 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
20 | * at the top of the kernel process stack. | 20 | * at the top of the kernel process stack. |
21 | * - partial stack frame: partially saved registers upto R11. | 21 | * - partial stack frame: partially saved registers upto R11. |
22 | * - full stack frame: Like partial stack frame, but all register saved. | 22 | * - full stack frame: Like partial stack frame, but all register saved. |
23 | * | 23 | * |
24 | * Some macro usage: | 24 | * Some macro usage: |
25 | * - CFI macros are used to generate dwarf2 unwind information for better | 25 | * - CFI macros are used to generate dwarf2 unwind information for better |
26 | * backtraces. They don't change any code. | 26 | * backtraces. They don't change any code. |
27 | * - SAVE_ALL/RESTORE_ALL - Save/restore all registers | 27 | * - SAVE_ALL/RESTORE_ALL - Save/restore all registers |
28 | * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. | 28 | * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. |
29 | * There are unfortunately lots of special cases where some registers | 29 | * There are unfortunately lots of special cases where some registers |
30 | * not touched. The macro is a big mess that should be cleaned up. | 30 | * not touched. The macro is a big mess that should be cleaned up. |
31 | * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. | 31 | * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. |
32 | * Gives a full stack frame. | 32 | * Gives a full stack frame. |
33 | * - ENTRY/END Define functions in the symbol table. | 33 | * - ENTRY/END Define functions in the symbol table. |
34 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack | 34 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack |
35 | * frame that is otherwise undefined after a SYSCALL | 35 | * frame that is otherwise undefined after a SYSCALL |
36 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. | 36 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. |
37 | * - errorentry/paranoidentry/zeroentry - Define exception entry points. | 37 | * - errorentry/paranoidentry/zeroentry - Define exception entry points. |
38 | */ | 38 | */ |
39 | 39 | ||
40 | #include <linux/linkage.h> | 40 | #include <linux/linkage.h> |
41 | #include <asm/segment.h> | 41 | #include <asm/segment.h> |
42 | #include <asm/cache.h> | 42 | #include <asm/cache.h> |
43 | #include <asm/errno.h> | 43 | #include <asm/errno.h> |
44 | #include <asm/dwarf2.h> | 44 | #include <asm/dwarf2.h> |
45 | #include <asm/calling.h> | 45 | #include <asm/calling.h> |
46 | #include <asm/asm-offsets.h> | 46 | #include <asm/asm-offsets.h> |
47 | #include <asm/msr.h> | 47 | #include <asm/msr.h> |
48 | #include <asm/unistd.h> | 48 | #include <asm/unistd.h> |
49 | #include <asm/thread_info.h> | 49 | #include <asm/thread_info.h> |
50 | #include <asm/hw_irq.h> | 50 | #include <asm/hw_irq.h> |
51 | #include <asm/page.h> | 51 | #include <asm/page.h> |
52 | #include <asm/irqflags.h> | 52 | #include <asm/irqflags.h> |
53 | #include <asm/paravirt.h> | 53 | #include <asm/paravirt.h> |
54 | 54 | ||
55 | .code64 | 55 | .code64 |
56 | 56 | ||
57 | #ifndef CONFIG_PREEMPT | 57 | #ifndef CONFIG_PREEMPT |
58 | #define retint_kernel retint_restore_args | 58 | #define retint_kernel retint_restore_args |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | #ifdef CONFIG_PARAVIRT | 61 | #ifdef CONFIG_PARAVIRT |
62 | ENTRY(native_usergs_sysret64) | 62 | ENTRY(native_usergs_sysret64) |
63 | swapgs | 63 | swapgs |
64 | sysretq | 64 | sysretq |
65 | #endif /* CONFIG_PARAVIRT */ | 65 | #endif /* CONFIG_PARAVIRT */ |
66 | 66 | ||
67 | 67 | ||
68 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | 68 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET |
69 | #ifdef CONFIG_TRACE_IRQFLAGS | 69 | #ifdef CONFIG_TRACE_IRQFLAGS |
70 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | 70 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ |
71 | jnc 1f | 71 | jnc 1f |
72 | TRACE_IRQS_ON | 72 | TRACE_IRQS_ON |
73 | 1: | 73 | 1: |
74 | #endif | 74 | #endif |
75 | .endm | 75 | .endm |
76 | 76 | ||
77 | /* | 77 | /* |
78 | * C code is not supposed to know about undefined top of stack. Every time | 78 | * C code is not supposed to know about undefined top of stack. Every time |
79 | * a C function with an pt_regs argument is called from the SYSCALL based | 79 | * a C function with an pt_regs argument is called from the SYSCALL based |
80 | * fast path FIXUP_TOP_OF_STACK is needed. | 80 | * fast path FIXUP_TOP_OF_STACK is needed. |
81 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs | 81 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs |
82 | * manipulation. | 82 | * manipulation. |
83 | */ | 83 | */ |
84 | 84 | ||
85 | /* %rsp:at FRAMEEND */ | 85 | /* %rsp:at FRAMEEND */ |
86 | .macro FIXUP_TOP_OF_STACK tmp | 86 | .macro FIXUP_TOP_OF_STACK tmp |
87 | movq %gs:pda_oldrsp,\tmp | 87 | movq %gs:pda_oldrsp,\tmp |
88 | movq \tmp,RSP(%rsp) | 88 | movq \tmp,RSP(%rsp) |
89 | movq $__USER_DS,SS(%rsp) | 89 | movq $__USER_DS,SS(%rsp) |
90 | movq $__USER_CS,CS(%rsp) | 90 | movq $__USER_CS,CS(%rsp) |
91 | movq $-1,RCX(%rsp) | 91 | movq $-1,RCX(%rsp) |
92 | movq R11(%rsp),\tmp /* get eflags */ | 92 | movq R11(%rsp),\tmp /* get eflags */ |
93 | movq \tmp,EFLAGS(%rsp) | 93 | movq \tmp,EFLAGS(%rsp) |
94 | .endm | 94 | .endm |
95 | 95 | ||
96 | .macro RESTORE_TOP_OF_STACK tmp,offset=0 | 96 | .macro RESTORE_TOP_OF_STACK tmp,offset=0 |
97 | movq RSP-\offset(%rsp),\tmp | 97 | movq RSP-\offset(%rsp),\tmp |
98 | movq \tmp,%gs:pda_oldrsp | 98 | movq \tmp,%gs:pda_oldrsp |
99 | movq EFLAGS-\offset(%rsp),\tmp | 99 | movq EFLAGS-\offset(%rsp),\tmp |
100 | movq \tmp,R11-\offset(%rsp) | 100 | movq \tmp,R11-\offset(%rsp) |
101 | .endm | 101 | .endm |
102 | 102 | ||
103 | .macro FAKE_STACK_FRAME child_rip | 103 | .macro FAKE_STACK_FRAME child_rip |
104 | /* push in order ss, rsp, eflags, cs, rip */ | 104 | /* push in order ss, rsp, eflags, cs, rip */ |
105 | xorl %eax, %eax | 105 | xorl %eax, %eax |
106 | pushq $__KERNEL_DS /* ss */ | 106 | pushq $__KERNEL_DS /* ss */ |
107 | CFI_ADJUST_CFA_OFFSET 8 | 107 | CFI_ADJUST_CFA_OFFSET 8 |
108 | /*CFI_REL_OFFSET ss,0*/ | 108 | /*CFI_REL_OFFSET ss,0*/ |
109 | pushq %rax /* rsp */ | 109 | pushq %rax /* rsp */ |
110 | CFI_ADJUST_CFA_OFFSET 8 | 110 | CFI_ADJUST_CFA_OFFSET 8 |
111 | CFI_REL_OFFSET rsp,0 | 111 | CFI_REL_OFFSET rsp,0 |
112 | pushq $(1<<9) /* eflags - interrupts on */ | 112 | pushq $(1<<9) /* eflags - interrupts on */ |
113 | CFI_ADJUST_CFA_OFFSET 8 | 113 | CFI_ADJUST_CFA_OFFSET 8 |
114 | /*CFI_REL_OFFSET rflags,0*/ | 114 | /*CFI_REL_OFFSET rflags,0*/ |
115 | pushq $__KERNEL_CS /* cs */ | 115 | pushq $__KERNEL_CS /* cs */ |
116 | CFI_ADJUST_CFA_OFFSET 8 | 116 | CFI_ADJUST_CFA_OFFSET 8 |
117 | /*CFI_REL_OFFSET cs,0*/ | 117 | /*CFI_REL_OFFSET cs,0*/ |
118 | pushq \child_rip /* rip */ | 118 | pushq \child_rip /* rip */ |
119 | CFI_ADJUST_CFA_OFFSET 8 | 119 | CFI_ADJUST_CFA_OFFSET 8 |
120 | CFI_REL_OFFSET rip,0 | 120 | CFI_REL_OFFSET rip,0 |
121 | pushq %rax /* orig rax */ | 121 | pushq %rax /* orig rax */ |
122 | CFI_ADJUST_CFA_OFFSET 8 | 122 | CFI_ADJUST_CFA_OFFSET 8 |
123 | .endm | 123 | .endm |
124 | 124 | ||
125 | .macro UNFAKE_STACK_FRAME | 125 | .macro UNFAKE_STACK_FRAME |
126 | addq $8*6, %rsp | 126 | addq $8*6, %rsp |
127 | CFI_ADJUST_CFA_OFFSET -(6*8) | 127 | CFI_ADJUST_CFA_OFFSET -(6*8) |
128 | .endm | 128 | .endm |
129 | 129 | ||
130 | .macro CFI_DEFAULT_STACK start=1 | 130 | .macro CFI_DEFAULT_STACK start=1 |
131 | .if \start | 131 | .if \start |
132 | CFI_STARTPROC simple | 132 | CFI_STARTPROC simple |
133 | CFI_SIGNAL_FRAME | 133 | CFI_SIGNAL_FRAME |
134 | CFI_DEF_CFA rsp,SS+8 | 134 | CFI_DEF_CFA rsp,SS+8 |
135 | .else | 135 | .else |
136 | CFI_DEF_CFA_OFFSET SS+8 | 136 | CFI_DEF_CFA_OFFSET SS+8 |
137 | .endif | 137 | .endif |
138 | CFI_REL_OFFSET r15,R15 | 138 | CFI_REL_OFFSET r15,R15 |
139 | CFI_REL_OFFSET r14,R14 | 139 | CFI_REL_OFFSET r14,R14 |
140 | CFI_REL_OFFSET r13,R13 | 140 | CFI_REL_OFFSET r13,R13 |
141 | CFI_REL_OFFSET r12,R12 | 141 | CFI_REL_OFFSET r12,R12 |
142 | CFI_REL_OFFSET rbp,RBP | 142 | CFI_REL_OFFSET rbp,RBP |
143 | CFI_REL_OFFSET rbx,RBX | 143 | CFI_REL_OFFSET rbx,RBX |
144 | CFI_REL_OFFSET r11,R11 | 144 | CFI_REL_OFFSET r11,R11 |
145 | CFI_REL_OFFSET r10,R10 | 145 | CFI_REL_OFFSET r10,R10 |
146 | CFI_REL_OFFSET r9,R9 | 146 | CFI_REL_OFFSET r9,R9 |
147 | CFI_REL_OFFSET r8,R8 | 147 | CFI_REL_OFFSET r8,R8 |
148 | CFI_REL_OFFSET rax,RAX | 148 | CFI_REL_OFFSET rax,RAX |
149 | CFI_REL_OFFSET rcx,RCX | 149 | CFI_REL_OFFSET rcx,RCX |
150 | CFI_REL_OFFSET rdx,RDX | 150 | CFI_REL_OFFSET rdx,RDX |
151 | CFI_REL_OFFSET rsi,RSI | 151 | CFI_REL_OFFSET rsi,RSI |
152 | CFI_REL_OFFSET rdi,RDI | 152 | CFI_REL_OFFSET rdi,RDI |
153 | CFI_REL_OFFSET rip,RIP | 153 | CFI_REL_OFFSET rip,RIP |
154 | /*CFI_REL_OFFSET cs,CS*/ | 154 | /*CFI_REL_OFFSET cs,CS*/ |
155 | /*CFI_REL_OFFSET rflags,EFLAGS*/ | 155 | /*CFI_REL_OFFSET rflags,EFLAGS*/ |
156 | CFI_REL_OFFSET rsp,RSP | 156 | CFI_REL_OFFSET rsp,RSP |
157 | /*CFI_REL_OFFSET ss,SS*/ | 157 | /*CFI_REL_OFFSET ss,SS*/ |
158 | .endm | 158 | .endm |
159 | /* | 159 | /* |
160 | * A newly forked process directly context switches into this. | 160 | * A newly forked process directly context switches into this. |
161 | */ | 161 | */ |
162 | /* rdi: prev */ | 162 | /* rdi: prev */ |
163 | ENTRY(ret_from_fork) | 163 | ENTRY(ret_from_fork) |
164 | CFI_DEFAULT_STACK | 164 | CFI_DEFAULT_STACK |
165 | push kernel_eflags(%rip) | 165 | push kernel_eflags(%rip) |
166 | CFI_ADJUST_CFA_OFFSET 4 | 166 | CFI_ADJUST_CFA_OFFSET 4 |
167 | popf # reset kernel eflags | 167 | popf # reset kernel eflags |
168 | CFI_ADJUST_CFA_OFFSET -4 | 168 | CFI_ADJUST_CFA_OFFSET -4 |
169 | call schedule_tail | 169 | call schedule_tail |
170 | GET_THREAD_INFO(%rcx) | 170 | GET_THREAD_INFO(%rcx) |
171 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) | 171 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) |
172 | jnz rff_trace | 172 | jnz rff_trace |
173 | rff_action: | 173 | rff_action: |
174 | RESTORE_REST | 174 | RESTORE_REST |
175 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? | 175 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? |
176 | je int_ret_from_sys_call | 176 | je int_ret_from_sys_call |
177 | testl $_TIF_IA32,threadinfo_flags(%rcx) | 177 | testl $_TIF_IA32,threadinfo_flags(%rcx) |
178 | jnz int_ret_from_sys_call | 178 | jnz int_ret_from_sys_call |
179 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET | 179 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET |
180 | jmp ret_from_sys_call | 180 | jmp ret_from_sys_call |
181 | rff_trace: | 181 | rff_trace: |
182 | movq %rsp,%rdi | 182 | movq %rsp,%rdi |
183 | call syscall_trace_leave | 183 | call syscall_trace_leave |
184 | GET_THREAD_INFO(%rcx) | 184 | GET_THREAD_INFO(%rcx) |
185 | jmp rff_action | 185 | jmp rff_action |
186 | CFI_ENDPROC | 186 | CFI_ENDPROC |
187 | END(ret_from_fork) | 187 | END(ret_from_fork) |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * System call entry. Upto 6 arguments in registers are supported. | 190 | * System call entry. Upto 6 arguments in registers are supported. |
191 | * | 191 | * |
192 | * SYSCALL does not save anything on the stack and does not change the | 192 | * SYSCALL does not save anything on the stack and does not change the |
193 | * stack pointer. | 193 | * stack pointer. |
194 | */ | 194 | */ |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * Register setup: | 197 | * Register setup: |
198 | * rax system call number | 198 | * rax system call number |
199 | * rdi arg0 | 199 | * rdi arg0 |
200 | * rcx return address for syscall/sysret, C arg3 | 200 | * rcx return address for syscall/sysret, C arg3 |
201 | * rsi arg1 | 201 | * rsi arg1 |
202 | * rdx arg2 | 202 | * rdx arg2 |
203 | * r10 arg3 (--> moved to rcx for C) | 203 | * r10 arg3 (--> moved to rcx for C) |
204 | * r8 arg4 | 204 | * r8 arg4 |
205 | * r9 arg5 | 205 | * r9 arg5 |
206 | * r11 eflags for syscall/sysret, temporary for C | 206 | * r11 eflags for syscall/sysret, temporary for C |
207 | * r12-r15,rbp,rbx saved by C code, not touched. | 207 | * r12-r15,rbp,rbx saved by C code, not touched. |
208 | * | 208 | * |
209 | * Interrupts are off on entry. | 209 | * Interrupts are off on entry. |
210 | * Only called from user space. | 210 | * Only called from user space. |
211 | * | 211 | * |
212 | * XXX if we had a free scratch register we could save the RSP into the stack frame | 212 | * XXX if we had a free scratch register we could save the RSP into the stack frame |
213 | * and report it properly in ps. Unfortunately we haven't. | 213 | * and report it properly in ps. Unfortunately we haven't. |
214 | * | 214 | * |
215 | * When user can change the frames always force IRET. That is because | 215 | * When user can change the frames always force IRET. That is because |
216 | * it deals with uncanonical addresses better. SYSRET has trouble | 216 | * it deals with uncanonical addresses better. SYSRET has trouble |
217 | * with them due to bugs in both AMD and Intel CPUs. | 217 | * with them due to bugs in both AMD and Intel CPUs. |
218 | */ | 218 | */ |
219 | 219 | ||
220 | ENTRY(system_call) | 220 | ENTRY(system_call) |
221 | CFI_STARTPROC simple | 221 | CFI_STARTPROC simple |
222 | CFI_SIGNAL_FRAME | 222 | CFI_SIGNAL_FRAME |
223 | CFI_DEF_CFA rsp,PDA_STACKOFFSET | 223 | CFI_DEF_CFA rsp,PDA_STACKOFFSET |
224 | CFI_REGISTER rip,rcx | 224 | CFI_REGISTER rip,rcx |
225 | /*CFI_REGISTER rflags,r11*/ | 225 | /*CFI_REGISTER rflags,r11*/ |
226 | SWAPGS_UNSAFE_STACK | 226 | SWAPGS_UNSAFE_STACK |
227 | /* | 227 | /* |
228 | * A hypervisor implementation might want to use a label | 228 | * A hypervisor implementation might want to use a label |
229 | * after the swapgs, so that it can do the swapgs | 229 | * after the swapgs, so that it can do the swapgs |
230 | * for the guest and jump here on syscall. | 230 | * for the guest and jump here on syscall. |
231 | */ | 231 | */ |
232 | ENTRY(system_call_after_swapgs) | 232 | ENTRY(system_call_after_swapgs) |
233 | 233 | ||
234 | movq %rsp,%gs:pda_oldrsp | 234 | movq %rsp,%gs:pda_oldrsp |
235 | movq %gs:pda_kernelstack,%rsp | 235 | movq %gs:pda_kernelstack,%rsp |
236 | /* | 236 | /* |
237 | * No need to follow this irqs off/on section - it's straight | 237 | * No need to follow this irqs off/on section - it's straight |
238 | * and short: | 238 | * and short: |
239 | */ | 239 | */ |
240 | ENABLE_INTERRUPTS(CLBR_NONE) | 240 | ENABLE_INTERRUPTS(CLBR_NONE) |
241 | SAVE_ARGS 8,1 | 241 | SAVE_ARGS 8,1 |
242 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 242 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
243 | movq %rcx,RIP-ARGOFFSET(%rsp) | 243 | movq %rcx,RIP-ARGOFFSET(%rsp) |
244 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 244 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
245 | GET_THREAD_INFO(%rcx) | 245 | GET_THREAD_INFO(%rcx) |
246 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) | 246 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) |
247 | jnz tracesys | 247 | jnz tracesys |
248 | cmpq $__NR_syscall_max,%rax | 248 | cmpq $__NR_syscall_max,%rax |
249 | ja badsys | 249 | ja badsys |
250 | movq %r10,%rcx | 250 | movq %r10,%rcx |
251 | call *sys_call_table(,%rax,8) # XXX: rip relative | 251 | call *sys_call_table(,%rax,8) # XXX: rip relative |
252 | movq %rax,RAX-ARGOFFSET(%rsp) | 252 | movq %rax,RAX-ARGOFFSET(%rsp) |
253 | /* | 253 | /* |
254 | * Syscall return path ending with SYSRET (fast path) | 254 | * Syscall return path ending with SYSRET (fast path) |
255 | * Has incomplete stack frame and undefined top of stack. | 255 | * Has incomplete stack frame and undefined top of stack. |
256 | */ | 256 | */ |
257 | ret_from_sys_call: | 257 | ret_from_sys_call: |
258 | movl $_TIF_ALLWORK_MASK,%edi | 258 | movl $_TIF_ALLWORK_MASK,%edi |
259 | /* edi: flagmask */ | 259 | /* edi: flagmask */ |
260 | sysret_check: | 260 | sysret_check: |
261 | LOCKDEP_SYS_EXIT | 261 | LOCKDEP_SYS_EXIT |
262 | GET_THREAD_INFO(%rcx) | 262 | GET_THREAD_INFO(%rcx) |
263 | DISABLE_INTERRUPTS(CLBR_NONE) | 263 | DISABLE_INTERRUPTS(CLBR_NONE) |
264 | TRACE_IRQS_OFF | 264 | TRACE_IRQS_OFF |
265 | movl threadinfo_flags(%rcx),%edx | 265 | movl threadinfo_flags(%rcx),%edx |
266 | andl %edi,%edx | 266 | andl %edi,%edx |
267 | jnz sysret_careful | 267 | jnz sysret_careful |
268 | CFI_REMEMBER_STATE | 268 | CFI_REMEMBER_STATE |
269 | /* | 269 | /* |
270 | * sysretq will re-enable interrupts: | 270 | * sysretq will re-enable interrupts: |
271 | */ | 271 | */ |
272 | TRACE_IRQS_ON | 272 | TRACE_IRQS_ON |
273 | movq RIP-ARGOFFSET(%rsp),%rcx | 273 | movq RIP-ARGOFFSET(%rsp),%rcx |
274 | CFI_REGISTER rip,rcx | 274 | CFI_REGISTER rip,rcx |
275 | RESTORE_ARGS 0,-ARG_SKIP,1 | 275 | RESTORE_ARGS 0,-ARG_SKIP,1 |
276 | /*CFI_REGISTER rflags,r11*/ | 276 | /*CFI_REGISTER rflags,r11*/ |
277 | movq %gs:pda_oldrsp, %rsp | 277 | movq %gs:pda_oldrsp, %rsp |
278 | USERGS_SYSRET64 | 278 | USERGS_SYSRET64 |
279 | 279 | ||
280 | CFI_RESTORE_STATE | 280 | CFI_RESTORE_STATE |
281 | /* Handle reschedules */ | 281 | /* Handle reschedules */ |
282 | /* edx: work, edi: workmask */ | 282 | /* edx: work, edi: workmask */ |
283 | sysret_careful: | 283 | sysret_careful: |
284 | bt $TIF_NEED_RESCHED,%edx | 284 | bt $TIF_NEED_RESCHED,%edx |
285 | jnc sysret_signal | 285 | jnc sysret_signal |
286 | TRACE_IRQS_ON | 286 | TRACE_IRQS_ON |
287 | ENABLE_INTERRUPTS(CLBR_NONE) | 287 | ENABLE_INTERRUPTS(CLBR_NONE) |
288 | pushq %rdi | 288 | pushq %rdi |
289 | CFI_ADJUST_CFA_OFFSET 8 | 289 | CFI_ADJUST_CFA_OFFSET 8 |
290 | call schedule | 290 | call schedule |
291 | popq %rdi | 291 | popq %rdi |
292 | CFI_ADJUST_CFA_OFFSET -8 | 292 | CFI_ADJUST_CFA_OFFSET -8 |
293 | jmp sysret_check | 293 | jmp sysret_check |
294 | 294 | ||
295 | /* Handle a signal */ | 295 | /* Handle a signal */ |
296 | sysret_signal: | 296 | sysret_signal: |
297 | TRACE_IRQS_ON | 297 | TRACE_IRQS_ON |
298 | ENABLE_INTERRUPTS(CLBR_NONE) | 298 | ENABLE_INTERRUPTS(CLBR_NONE) |
299 | testl $_TIF_DO_NOTIFY_MASK,%edx | 299 | testl $_TIF_DO_NOTIFY_MASK,%edx |
300 | jz 1f | 300 | jz 1f |
301 | 301 | ||
302 | /* Really a signal */ | 302 | /* Really a signal */ |
303 | /* edx: work flags (arg3) */ | 303 | /* edx: work flags (arg3) */ |
304 | leaq do_notify_resume(%rip),%rax | 304 | leaq do_notify_resume(%rip),%rax |
305 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 305 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
306 | xorl %esi,%esi # oldset -> arg2 | 306 | xorl %esi,%esi # oldset -> arg2 |
307 | call ptregscall_common | 307 | call ptregscall_common |
308 | 1: movl $_TIF_NEED_RESCHED,%edi | 308 | 1: movl $_TIF_NEED_RESCHED,%edi |
309 | /* Use IRET because user could have changed frame. This | 309 | /* Use IRET because user could have changed frame. This |
310 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 310 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
311 | DISABLE_INTERRUPTS(CLBR_NONE) | 311 | DISABLE_INTERRUPTS(CLBR_NONE) |
312 | TRACE_IRQS_OFF | 312 | TRACE_IRQS_OFF |
313 | jmp int_with_check | 313 | jmp int_with_check |
314 | 314 | ||
315 | badsys: | 315 | badsys: |
316 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 316 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
317 | jmp ret_from_sys_call | 317 | jmp ret_from_sys_call |
318 | 318 | ||
319 | /* Do syscall tracing */ | 319 | /* Do syscall tracing */ |
320 | tracesys: | 320 | tracesys: |
321 | SAVE_REST | 321 | SAVE_REST |
322 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 322 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
323 | FIXUP_TOP_OF_STACK %rdi | 323 | FIXUP_TOP_OF_STACK %rdi |
324 | movq %rsp,%rdi | 324 | movq %rsp,%rdi |
325 | call syscall_trace_enter | 325 | call syscall_trace_enter |
326 | LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ | 326 | LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ |
327 | RESTORE_REST | 327 | RESTORE_REST |
328 | cmpq $__NR_syscall_max,%rax | 328 | cmpq $__NR_syscall_max,%rax |
329 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ | 329 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ |
330 | movq %r10,%rcx /* fixup for C */ | 330 | movq %r10,%rcx /* fixup for C */ |
331 | call *sys_call_table(,%rax,8) | 331 | call *sys_call_table(,%rax,8) |
332 | movq %rax,RAX-ARGOFFSET(%rsp) | 332 | movq %rax,RAX-ARGOFFSET(%rsp) |
333 | /* Use IRET because user could have changed frame */ | 333 | /* Use IRET because user could have changed frame */ |
334 | 334 | ||
335 | /* | 335 | /* |
336 | * Syscall return path ending with IRET. | 336 | * Syscall return path ending with IRET. |
337 | * Has correct top of stack, but partial stack frame. | 337 | * Has correct top of stack, but partial stack frame. |
338 | */ | 338 | */ |
339 | .globl int_ret_from_sys_call | 339 | .globl int_ret_from_sys_call |
340 | int_ret_from_sys_call: | 340 | int_ret_from_sys_call: |
341 | DISABLE_INTERRUPTS(CLBR_NONE) | 341 | DISABLE_INTERRUPTS(CLBR_NONE) |
342 | TRACE_IRQS_OFF | 342 | TRACE_IRQS_OFF |
343 | testl $3,CS-ARGOFFSET(%rsp) | 343 | testl $3,CS-ARGOFFSET(%rsp) |
344 | je retint_restore_args | 344 | je retint_restore_args |
345 | movl $_TIF_ALLWORK_MASK,%edi | 345 | movl $_TIF_ALLWORK_MASK,%edi |
346 | /* edi: mask to check */ | 346 | /* edi: mask to check */ |
347 | int_with_check: | 347 | int_with_check: |
348 | LOCKDEP_SYS_EXIT_IRQ | 348 | LOCKDEP_SYS_EXIT_IRQ |
349 | GET_THREAD_INFO(%rcx) | 349 | GET_THREAD_INFO(%rcx) |
350 | movl threadinfo_flags(%rcx),%edx | 350 | movl threadinfo_flags(%rcx),%edx |
351 | andl %edi,%edx | 351 | andl %edi,%edx |
352 | jnz int_careful | 352 | jnz int_careful |
353 | andl $~TS_COMPAT,threadinfo_status(%rcx) | 353 | andl $~TS_COMPAT,threadinfo_status(%rcx) |
354 | jmp retint_swapgs | 354 | jmp retint_swapgs |
355 | 355 | ||
356 | /* Either reschedule or signal or syscall exit tracking needed. */ | 356 | /* Either reschedule or signal or syscall exit tracking needed. */ |
357 | /* First do a reschedule test. */ | 357 | /* First do a reschedule test. */ |
358 | /* edx: work, edi: workmask */ | 358 | /* edx: work, edi: workmask */ |
359 | int_careful: | 359 | int_careful: |
360 | bt $TIF_NEED_RESCHED,%edx | 360 | bt $TIF_NEED_RESCHED,%edx |
361 | jnc int_very_careful | 361 | jnc int_very_careful |
362 | TRACE_IRQS_ON | 362 | TRACE_IRQS_ON |
363 | ENABLE_INTERRUPTS(CLBR_NONE) | 363 | ENABLE_INTERRUPTS(CLBR_NONE) |
364 | pushq %rdi | 364 | pushq %rdi |
365 | CFI_ADJUST_CFA_OFFSET 8 | 365 | CFI_ADJUST_CFA_OFFSET 8 |
366 | call schedule | 366 | call schedule |
367 | popq %rdi | 367 | popq %rdi |
368 | CFI_ADJUST_CFA_OFFSET -8 | 368 | CFI_ADJUST_CFA_OFFSET -8 |
369 | DISABLE_INTERRUPTS(CLBR_NONE) | 369 | DISABLE_INTERRUPTS(CLBR_NONE) |
370 | TRACE_IRQS_OFF | 370 | TRACE_IRQS_OFF |
371 | jmp int_with_check | 371 | jmp int_with_check |
372 | 372 | ||
373 | /* handle signals and tracing -- both require a full stack frame */ | 373 | /* handle signals and tracing -- both require a full stack frame */ |
374 | int_very_careful: | 374 | int_very_careful: |
375 | TRACE_IRQS_ON | 375 | TRACE_IRQS_ON |
376 | ENABLE_INTERRUPTS(CLBR_NONE) | 376 | ENABLE_INTERRUPTS(CLBR_NONE) |
377 | SAVE_REST | 377 | SAVE_REST |
378 | /* Check for syscall exit trace */ | 378 | /* Check for syscall exit trace */ |
379 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | 379 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx |
380 | jz int_signal | 380 | jz int_signal |
381 | pushq %rdi | 381 | pushq %rdi |
382 | CFI_ADJUST_CFA_OFFSET 8 | 382 | CFI_ADJUST_CFA_OFFSET 8 |
383 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | 383 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
384 | call syscall_trace_leave | 384 | call syscall_trace_leave |
385 | popq %rdi | 385 | popq %rdi |
386 | CFI_ADJUST_CFA_OFFSET -8 | 386 | CFI_ADJUST_CFA_OFFSET -8 |
387 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | 387 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi |
388 | jmp int_restore_rest | 388 | jmp int_restore_rest |
389 | 389 | ||
390 | int_signal: | 390 | int_signal: |
391 | testl $_TIF_DO_NOTIFY_MASK,%edx | 391 | testl $_TIF_DO_NOTIFY_MASK,%edx |
392 | jz 1f | 392 | jz 1f |
393 | movq %rsp,%rdi # &ptregs -> arg1 | 393 | movq %rsp,%rdi # &ptregs -> arg1 |
394 | xorl %esi,%esi # oldset -> arg2 | 394 | xorl %esi,%esi # oldset -> arg2 |
395 | call do_notify_resume | 395 | call do_notify_resume |
396 | 1: movl $_TIF_NEED_RESCHED,%edi | 396 | 1: movl $_TIF_NEED_RESCHED,%edi |
397 | int_restore_rest: | 397 | int_restore_rest: |
398 | RESTORE_REST | 398 | RESTORE_REST |
399 | DISABLE_INTERRUPTS(CLBR_NONE) | 399 | DISABLE_INTERRUPTS(CLBR_NONE) |
400 | TRACE_IRQS_OFF | 400 | TRACE_IRQS_OFF |
401 | jmp int_with_check | 401 | jmp int_with_check |
402 | CFI_ENDPROC | 402 | CFI_ENDPROC |
403 | END(system_call) | 403 | END(system_call) |
404 | 404 | ||
405 | /* | 405 | /* |
406 | * Certain special system calls that need to save a complete full stack frame. | 406 | * Certain special system calls that need to save a complete full stack frame. |
407 | */ | 407 | */ |
408 | 408 | ||
409 | .macro PTREGSCALL label,func,arg | 409 | .macro PTREGSCALL label,func,arg |
410 | .globl \label | 410 | .globl \label |
411 | \label: | 411 | \label: |
412 | leaq \func(%rip),%rax | 412 | leaq \func(%rip),%rax |
413 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | 413 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ |
414 | jmp ptregscall_common | 414 | jmp ptregscall_common |
415 | END(\label) | 415 | END(\label) |
416 | .endm | 416 | .endm |
417 | 417 | ||
418 | CFI_STARTPROC | 418 | CFI_STARTPROC |
419 | 419 | ||
420 | PTREGSCALL stub_clone, sys_clone, %r8 | 420 | PTREGSCALL stub_clone, sys_clone, %r8 |
421 | PTREGSCALL stub_fork, sys_fork, %rdi | 421 | PTREGSCALL stub_fork, sys_fork, %rdi |
422 | PTREGSCALL stub_vfork, sys_vfork, %rdi | 422 | PTREGSCALL stub_vfork, sys_vfork, %rdi |
423 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx | 423 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx |
424 | PTREGSCALL stub_iopl, sys_iopl, %rsi | 424 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
425 | 425 | ||
426 | ENTRY(ptregscall_common) | 426 | ENTRY(ptregscall_common) |
427 | popq %r11 | 427 | popq %r11 |
428 | CFI_ADJUST_CFA_OFFSET -8 | 428 | CFI_ADJUST_CFA_OFFSET -8 |
429 | CFI_REGISTER rip, r11 | 429 | CFI_REGISTER rip, r11 |
430 | SAVE_REST | 430 | SAVE_REST |
431 | movq %r11, %r15 | 431 | movq %r11, %r15 |
432 | CFI_REGISTER rip, r15 | 432 | CFI_REGISTER rip, r15 |
433 | FIXUP_TOP_OF_STACK %r11 | 433 | FIXUP_TOP_OF_STACK %r11 |
434 | call *%rax | 434 | call *%rax |
435 | RESTORE_TOP_OF_STACK %r11 | 435 | RESTORE_TOP_OF_STACK %r11 |
436 | movq %r15, %r11 | 436 | movq %r15, %r11 |
437 | CFI_REGISTER rip, r11 | 437 | CFI_REGISTER rip, r11 |
438 | RESTORE_REST | 438 | RESTORE_REST |
439 | pushq %r11 | 439 | pushq %r11 |
440 | CFI_ADJUST_CFA_OFFSET 8 | 440 | CFI_ADJUST_CFA_OFFSET 8 |
441 | CFI_REL_OFFSET rip, 0 | 441 | CFI_REL_OFFSET rip, 0 |
442 | ret | 442 | ret |
443 | CFI_ENDPROC | 443 | CFI_ENDPROC |
444 | END(ptregscall_common) | 444 | END(ptregscall_common) |
445 | 445 | ||
446 | ENTRY(stub_execve) | 446 | ENTRY(stub_execve) |
447 | CFI_STARTPROC | 447 | CFI_STARTPROC |
448 | popq %r11 | 448 | popq %r11 |
449 | CFI_ADJUST_CFA_OFFSET -8 | 449 | CFI_ADJUST_CFA_OFFSET -8 |
450 | CFI_REGISTER rip, r11 | 450 | CFI_REGISTER rip, r11 |
451 | SAVE_REST | 451 | SAVE_REST |
452 | FIXUP_TOP_OF_STACK %r11 | 452 | FIXUP_TOP_OF_STACK %r11 |
453 | movq %rsp, %rcx | 453 | movq %rsp, %rcx |
454 | call sys_execve | 454 | call sys_execve |
455 | RESTORE_TOP_OF_STACK %r11 | 455 | RESTORE_TOP_OF_STACK %r11 |
456 | movq %rax,RAX(%rsp) | 456 | movq %rax,RAX(%rsp) |
457 | RESTORE_REST | 457 | RESTORE_REST |
458 | jmp int_ret_from_sys_call | 458 | jmp int_ret_from_sys_call |
459 | CFI_ENDPROC | 459 | CFI_ENDPROC |
460 | END(stub_execve) | 460 | END(stub_execve) |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * sigreturn is special because it needs to restore all registers on return. | 463 | * sigreturn is special because it needs to restore all registers on return. |
464 | * This cannot be done with SYSRET, so use the IRET return path instead. | 464 | * This cannot be done with SYSRET, so use the IRET return path instead. |
465 | */ | 465 | */ |
466 | ENTRY(stub_rt_sigreturn) | 466 | ENTRY(stub_rt_sigreturn) |
467 | CFI_STARTPROC | 467 | CFI_STARTPROC |
468 | addq $8, %rsp | 468 | addq $8, %rsp |
469 | CFI_ADJUST_CFA_OFFSET -8 | 469 | CFI_ADJUST_CFA_OFFSET -8 |
470 | SAVE_REST | 470 | SAVE_REST |
471 | movq %rsp,%rdi | 471 | movq %rsp,%rdi |
472 | FIXUP_TOP_OF_STACK %r11 | 472 | FIXUP_TOP_OF_STACK %r11 |
473 | call sys_rt_sigreturn | 473 | call sys_rt_sigreturn |
474 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | 474 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer |
475 | RESTORE_REST | 475 | RESTORE_REST |
476 | jmp int_ret_from_sys_call | 476 | jmp int_ret_from_sys_call |
477 | CFI_ENDPROC | 477 | CFI_ENDPROC |
478 | END(stub_rt_sigreturn) | 478 | END(stub_rt_sigreturn) |
479 | 479 | ||
480 | /* | 480 | /* |
481 | * initial frame state for interrupts and exceptions | 481 | * initial frame state for interrupts and exceptions |
482 | */ | 482 | */ |
483 | .macro _frame ref | 483 | .macro _frame ref |
484 | CFI_STARTPROC simple | 484 | CFI_STARTPROC simple |
485 | CFI_SIGNAL_FRAME | 485 | CFI_SIGNAL_FRAME |
486 | CFI_DEF_CFA rsp,SS+8-\ref | 486 | CFI_DEF_CFA rsp,SS+8-\ref |
487 | /*CFI_REL_OFFSET ss,SS-\ref*/ | 487 | /*CFI_REL_OFFSET ss,SS-\ref*/ |
488 | CFI_REL_OFFSET rsp,RSP-\ref | 488 | CFI_REL_OFFSET rsp,RSP-\ref |
489 | /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ | 489 | /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ |
490 | /*CFI_REL_OFFSET cs,CS-\ref*/ | 490 | /*CFI_REL_OFFSET cs,CS-\ref*/ |
491 | CFI_REL_OFFSET rip,RIP-\ref | 491 | CFI_REL_OFFSET rip,RIP-\ref |
492 | .endm | 492 | .endm |
493 | 493 | ||
494 | /* initial frame state for interrupts (and exceptions without error code) */ | 494 | /* initial frame state for interrupts (and exceptions without error code) */ |
495 | #define INTR_FRAME _frame RIP | 495 | #define INTR_FRAME _frame RIP |
496 | /* initial frame state for exceptions with error code (and interrupts with | 496 | /* initial frame state for exceptions with error code (and interrupts with |
497 | vector already pushed) */ | 497 | vector already pushed) */ |
498 | #define XCPT_FRAME _frame ORIG_RAX | 498 | #define XCPT_FRAME _frame ORIG_RAX |
499 | 499 | ||
500 | /* | 500 | /* |
501 | * Interrupt entry/exit. | 501 | * Interrupt entry/exit. |
502 | * | 502 | * |
503 | * Interrupt entry points save only callee clobbered registers in fast path. | 503 | * Interrupt entry points save only callee clobbered registers in fast path. |
504 | * | 504 | * |
505 | * Entry runs with interrupts off. | 505 | * Entry runs with interrupts off. |
506 | */ | 506 | */ |
507 | 507 | ||
508 | /* 0(%rsp): interrupt number */ | 508 | /* 0(%rsp): interrupt number */ |
509 | .macro interrupt func | 509 | .macro interrupt func |
510 | cld | 510 | cld |
511 | SAVE_ARGS | 511 | SAVE_ARGS |
512 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler | 512 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler |
513 | pushq %rbp | 513 | pushq %rbp |
514 | CFI_ADJUST_CFA_OFFSET 8 | 514 | CFI_ADJUST_CFA_OFFSET 8 |
515 | CFI_REL_OFFSET rbp, 0 | 515 | CFI_REL_OFFSET rbp, 0 |
516 | movq %rsp,%rbp | 516 | movq %rsp,%rbp |
517 | CFI_DEF_CFA_REGISTER rbp | 517 | CFI_DEF_CFA_REGISTER rbp |
518 | testl $3,CS(%rdi) | 518 | testl $3,CS(%rdi) |
519 | je 1f | 519 | je 1f |
520 | SWAPGS | 520 | SWAPGS |
521 | /* irqcount is used to check if a CPU is already on an interrupt | 521 | /* irqcount is used to check if a CPU is already on an interrupt |
522 | stack or not. While this is essentially redundant with preempt_count | 522 | stack or not. While this is essentially redundant with preempt_count |
523 | it is a little cheaper to use a separate counter in the PDA | 523 | it is a little cheaper to use a separate counter in the PDA |
524 | (short of moving irq_enter into assembly, which would be too | 524 | (short of moving irq_enter into assembly, which would be too |
525 | much work) */ | 525 | much work) */ |
526 | 1: incl %gs:pda_irqcount | 526 | 1: incl %gs:pda_irqcount |
527 | cmoveq %gs:pda_irqstackptr,%rsp | 527 | cmoveq %gs:pda_irqstackptr,%rsp |
528 | push %rbp # backlink for old unwinder | 528 | push %rbp # backlink for old unwinder |
529 | /* | 529 | /* |
530 | * We entered an interrupt context - irqs are off: | 530 | * We entered an interrupt context - irqs are off: |
531 | */ | 531 | */ |
532 | TRACE_IRQS_OFF | 532 | TRACE_IRQS_OFF |
533 | call \func | 533 | call \func |
534 | .endm | 534 | .endm |
535 | 535 | ||
536 | ENTRY(common_interrupt) | 536 | ENTRY(common_interrupt) |
537 | XCPT_FRAME | 537 | XCPT_FRAME |
538 | interrupt do_IRQ | 538 | interrupt do_IRQ |
539 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 539 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
540 | ret_from_intr: | 540 | ret_from_intr: |
541 | DISABLE_INTERRUPTS(CLBR_NONE) | 541 | DISABLE_INTERRUPTS(CLBR_NONE) |
542 | TRACE_IRQS_OFF | 542 | TRACE_IRQS_OFF |
543 | decl %gs:pda_irqcount | 543 | decl %gs:pda_irqcount |
544 | leaveq | 544 | leaveq |
545 | CFI_DEF_CFA_REGISTER rsp | 545 | CFI_DEF_CFA_REGISTER rsp |
546 | CFI_ADJUST_CFA_OFFSET -8 | 546 | CFI_ADJUST_CFA_OFFSET -8 |
547 | exit_intr: | 547 | exit_intr: |
548 | GET_THREAD_INFO(%rcx) | 548 | GET_THREAD_INFO(%rcx) |
549 | testl $3,CS-ARGOFFSET(%rsp) | 549 | testl $3,CS-ARGOFFSET(%rsp) |
550 | je retint_kernel | 550 | je retint_kernel |
551 | 551 | ||
552 | /* Interrupt came from user space */ | 552 | /* Interrupt came from user space */ |
553 | /* | 553 | /* |
554 | * Has a correct top of stack, but a partial stack frame | 554 | * Has a correct top of stack, but a partial stack frame |
555 | * %rcx: thread info. Interrupts off. | 555 | * %rcx: thread info. Interrupts off. |
556 | */ | 556 | */ |
557 | retint_with_reschedule: | 557 | retint_with_reschedule: |
558 | movl $_TIF_WORK_MASK,%edi | 558 | movl $_TIF_WORK_MASK,%edi |
559 | retint_check: | 559 | retint_check: |
560 | LOCKDEP_SYS_EXIT_IRQ | 560 | LOCKDEP_SYS_EXIT_IRQ |
561 | movl threadinfo_flags(%rcx),%edx | 561 | movl threadinfo_flags(%rcx),%edx |
562 | andl %edi,%edx | 562 | andl %edi,%edx |
563 | CFI_REMEMBER_STATE | 563 | CFI_REMEMBER_STATE |
564 | jnz retint_careful | 564 | jnz retint_careful |
565 | 565 | ||
566 | retint_swapgs: /* return to user-space */ | 566 | retint_swapgs: /* return to user-space */ |
567 | /* | 567 | /* |
568 | * The iretq could re-enable interrupts: | 568 | * The iretq could re-enable interrupts: |
569 | */ | 569 | */ |
570 | DISABLE_INTERRUPTS(CLBR_ANY) | 570 | DISABLE_INTERRUPTS(CLBR_ANY) |
571 | TRACE_IRQS_IRETQ | 571 | TRACE_IRQS_IRETQ |
572 | SWAPGS | 572 | SWAPGS |
573 | jmp restore_args | 573 | jmp restore_args |
574 | 574 | ||
575 | retint_restore_args: /* return to kernel space */ | 575 | retint_restore_args: /* return to kernel space */ |
576 | DISABLE_INTERRUPTS(CLBR_ANY) | 576 | DISABLE_INTERRUPTS(CLBR_ANY) |
577 | /* | 577 | /* |
578 | * The iretq could re-enable interrupts: | 578 | * The iretq could re-enable interrupts: |
579 | */ | 579 | */ |
580 | TRACE_IRQS_IRETQ | 580 | TRACE_IRQS_IRETQ |
581 | restore_args: | 581 | restore_args: |
582 | RESTORE_ARGS 0,8,0 | 582 | RESTORE_ARGS 0,8,0 |
583 | 583 | ||
584 | irq_return: | 584 | irq_return: |
585 | INTERRUPT_RETURN | 585 | INTERRUPT_RETURN |
586 | 586 | ||
587 | .section __ex_table, "a" | 587 | .section __ex_table, "a" |
588 | .quad irq_return, bad_iret | 588 | .quad irq_return, bad_iret |
589 | .previous | 589 | .previous |
590 | 590 | ||
591 | #ifdef CONFIG_PARAVIRT | 591 | #ifdef CONFIG_PARAVIRT |
592 | ENTRY(native_iret) | 592 | ENTRY(native_iret) |
593 | iretq | 593 | iretq |
594 | 594 | ||
595 | .section __ex_table,"a" | 595 | .section __ex_table,"a" |
596 | .quad native_iret, bad_iret | 596 | .quad native_iret, bad_iret |
597 | .previous | 597 | .previous |
598 | #endif | 598 | #endif |
599 | 599 | ||
600 | .section .fixup,"ax" | 600 | .section .fixup,"ax" |
601 | bad_iret: | 601 | bad_iret: |
602 | /* | 602 | /* |
603 | * The iret traps when the %cs or %ss being restored is bogus. | 603 | * The iret traps when the %cs or %ss being restored is bogus. |
604 | * We've lost the original trap vector and error code. | 604 | * We've lost the original trap vector and error code. |
605 | * #GPF is the most likely one to get for an invalid selector. | 605 | * #GPF is the most likely one to get for an invalid selector. |
606 | * So pretend we completed the iret and took the #GPF in user mode. | 606 | * So pretend we completed the iret and took the #GPF in user mode. |
607 | * | 607 | * |
608 | * We are now running with the kernel GS after exception recovery. | 608 | * We are now running with the kernel GS after exception recovery. |
609 | * But error_entry expects us to have user GS to match the user %cs, | 609 | * But error_entry expects us to have user GS to match the user %cs, |
610 | * so swap back. | 610 | * so swap back. |
611 | */ | 611 | */ |
612 | pushq $0 | 612 | pushq $0 |
613 | 613 | ||
614 | SWAPGS | 614 | SWAPGS |
615 | jmp general_protection | 615 | jmp general_protection |
616 | 616 | ||
617 | .previous | 617 | .previous |
618 | 618 | ||
619 | /* edi: workmask, edx: work */ | 619 | /* edi: workmask, edx: work */ |
620 | retint_careful: | 620 | retint_careful: |
621 | CFI_RESTORE_STATE | 621 | CFI_RESTORE_STATE |
622 | bt $TIF_NEED_RESCHED,%edx | 622 | bt $TIF_NEED_RESCHED,%edx |
623 | jnc retint_signal | 623 | jnc retint_signal |
624 | TRACE_IRQS_ON | 624 | TRACE_IRQS_ON |
625 | ENABLE_INTERRUPTS(CLBR_NONE) | 625 | ENABLE_INTERRUPTS(CLBR_NONE) |
626 | pushq %rdi | 626 | pushq %rdi |
627 | CFI_ADJUST_CFA_OFFSET 8 | 627 | CFI_ADJUST_CFA_OFFSET 8 |
628 | call schedule | 628 | call schedule |
629 | popq %rdi | 629 | popq %rdi |
630 | CFI_ADJUST_CFA_OFFSET -8 | 630 | CFI_ADJUST_CFA_OFFSET -8 |
631 | GET_THREAD_INFO(%rcx) | 631 | GET_THREAD_INFO(%rcx) |
632 | DISABLE_INTERRUPTS(CLBR_NONE) | 632 | DISABLE_INTERRUPTS(CLBR_NONE) |
633 | TRACE_IRQS_OFF | 633 | TRACE_IRQS_OFF |
634 | jmp retint_check | 634 | jmp retint_check |
635 | 635 | ||
636 | retint_signal: | 636 | retint_signal: |
637 | testl $_TIF_DO_NOTIFY_MASK,%edx | 637 | testl $_TIF_DO_NOTIFY_MASK,%edx |
638 | jz retint_swapgs | 638 | jz retint_swapgs |
639 | TRACE_IRQS_ON | 639 | TRACE_IRQS_ON |
640 | ENABLE_INTERRUPTS(CLBR_NONE) | 640 | ENABLE_INTERRUPTS(CLBR_NONE) |
641 | SAVE_REST | 641 | SAVE_REST |
642 | movq $-1,ORIG_RAX(%rsp) | 642 | movq $-1,ORIG_RAX(%rsp) |
643 | xorl %esi,%esi # oldset | 643 | xorl %esi,%esi # oldset |
644 | movq %rsp,%rdi # &pt_regs | 644 | movq %rsp,%rdi # &pt_regs |
645 | call do_notify_resume | 645 | call do_notify_resume |
646 | RESTORE_REST | 646 | RESTORE_REST |
647 | DISABLE_INTERRUPTS(CLBR_NONE) | 647 | DISABLE_INTERRUPTS(CLBR_NONE) |
648 | TRACE_IRQS_OFF | 648 | TRACE_IRQS_OFF |
649 | movl $_TIF_NEED_RESCHED,%edi | 649 | movl $_TIF_NEED_RESCHED,%edi |
650 | GET_THREAD_INFO(%rcx) | 650 | GET_THREAD_INFO(%rcx) |
651 | jmp retint_check | 651 | jmp retint_check |
652 | 652 | ||
653 | #ifdef CONFIG_PREEMPT | 653 | #ifdef CONFIG_PREEMPT |
654 | /* Returning to kernel space. Check if we need preemption */ | 654 | /* Returning to kernel space. Check if we need preemption */ |
655 | /* rcx: threadinfo. interrupts off. */ | 655 | /* rcx: threadinfo. interrupts off. */ |
656 | ENTRY(retint_kernel) | 656 | ENTRY(retint_kernel) |
657 | cmpl $0,threadinfo_preempt_count(%rcx) | 657 | cmpl $0,threadinfo_preempt_count(%rcx) |
658 | jnz retint_restore_args | 658 | jnz retint_restore_args |
659 | bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) | 659 | bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) |
660 | jnc retint_restore_args | 660 | jnc retint_restore_args |
661 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ | 661 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ |
662 | jnc retint_restore_args | 662 | jnc retint_restore_args |
663 | call preempt_schedule_irq | 663 | call preempt_schedule_irq |
664 | jmp exit_intr | 664 | jmp exit_intr |
665 | #endif | 665 | #endif |
666 | 666 | ||
667 | CFI_ENDPROC | 667 | CFI_ENDPROC |
668 | END(common_interrupt) | 668 | END(common_interrupt) |
669 | 669 | ||
670 | /* | 670 | /* |
671 | * APIC interrupts. | 671 | * APIC interrupts. |
672 | */ | 672 | */ |
673 | .macro apicinterrupt num,func | 673 | .macro apicinterrupt num,func |
674 | INTR_FRAME | 674 | INTR_FRAME |
675 | pushq $~(\num) | 675 | pushq $~(\num) |
676 | CFI_ADJUST_CFA_OFFSET 8 | 676 | CFI_ADJUST_CFA_OFFSET 8 |
677 | interrupt \func | 677 | interrupt \func |
678 | jmp ret_from_intr | 678 | jmp ret_from_intr |
679 | CFI_ENDPROC | 679 | CFI_ENDPROC |
680 | .endm | 680 | .endm |
681 | 681 | ||
682 | ENTRY(thermal_interrupt) | 682 | ENTRY(thermal_interrupt) |
683 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | 683 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt |
684 | END(thermal_interrupt) | 684 | END(thermal_interrupt) |
685 | 685 | ||
686 | ENTRY(threshold_interrupt) | 686 | ENTRY(threshold_interrupt) |
687 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt | 687 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt |
688 | END(threshold_interrupt) | 688 | END(threshold_interrupt) |
689 | 689 | ||
690 | #ifdef CONFIG_SMP | 690 | #ifdef CONFIG_SMP |
691 | ENTRY(reschedule_interrupt) | 691 | ENTRY(reschedule_interrupt) |
692 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | 692 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt |
693 | END(reschedule_interrupt) | 693 | END(reschedule_interrupt) |
694 | 694 | ||
695 | .macro INVALIDATE_ENTRY num | 695 | .macro INVALIDATE_ENTRY num |
696 | ENTRY(invalidate_interrupt\num) | 696 | ENTRY(invalidate_interrupt\num) |
697 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt | 697 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt |
698 | END(invalidate_interrupt\num) | 698 | END(invalidate_interrupt\num) |
699 | .endm | 699 | .endm |
700 | 700 | ||
701 | INVALIDATE_ENTRY 0 | 701 | INVALIDATE_ENTRY 0 |
702 | INVALIDATE_ENTRY 1 | 702 | INVALIDATE_ENTRY 1 |
703 | INVALIDATE_ENTRY 2 | 703 | INVALIDATE_ENTRY 2 |
704 | INVALIDATE_ENTRY 3 | 704 | INVALIDATE_ENTRY 3 |
705 | INVALIDATE_ENTRY 4 | 705 | INVALIDATE_ENTRY 4 |
706 | INVALIDATE_ENTRY 5 | 706 | INVALIDATE_ENTRY 5 |
707 | INVALIDATE_ENTRY 6 | 707 | INVALIDATE_ENTRY 6 |
708 | INVALIDATE_ENTRY 7 | 708 | INVALIDATE_ENTRY 7 |
709 | 709 | ||
710 | ENTRY(call_function_interrupt) | 710 | ENTRY(call_function_interrupt) |
711 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | 711 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt |
712 | END(call_function_interrupt) | 712 | END(call_function_interrupt) |
713 | ENTRY(irq_move_cleanup_interrupt) | 713 | ENTRY(irq_move_cleanup_interrupt) |
714 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt | 714 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt |
715 | END(irq_move_cleanup_interrupt) | 715 | END(irq_move_cleanup_interrupt) |
716 | #endif | 716 | #endif |
717 | 717 | ||
718 | ENTRY(apic_timer_interrupt) | 718 | ENTRY(apic_timer_interrupt) |
719 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt | 719 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt |
720 | END(apic_timer_interrupt) | 720 | END(apic_timer_interrupt) |
721 | 721 | ||
722 | ENTRY(uv_bau_message_intr1) | 722 | ENTRY(uv_bau_message_intr1) |
723 | apicinterrupt 220,uv_bau_message_interrupt | 723 | apicinterrupt 220,uv_bau_message_interrupt |
724 | END(uv_bau_message_intr1) | 724 | END(uv_bau_message_intr1) |
725 | 725 | ||
726 | ENTRY(error_interrupt) | 726 | ENTRY(error_interrupt) |
727 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt | 727 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt |
728 | END(error_interrupt) | 728 | END(error_interrupt) |
729 | 729 | ||
730 | ENTRY(spurious_interrupt) | 730 | ENTRY(spurious_interrupt) |
731 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt | 731 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt |
732 | END(spurious_interrupt) | 732 | END(spurious_interrupt) |
733 | 733 | ||
734 | /* | 734 | /* |
735 | * Exception entry points. | 735 | * Exception entry points. |
736 | */ | 736 | */ |
737 | .macro zeroentry sym | 737 | .macro zeroentry sym |
738 | INTR_FRAME | 738 | INTR_FRAME |
739 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 739 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
740 | pushq $0 /* push error code/oldrax */ | 740 | pushq $0 /* push error code/oldrax */ |
741 | CFI_ADJUST_CFA_OFFSET 8 | 741 | CFI_ADJUST_CFA_OFFSET 8 |
742 | pushq %rax /* push real oldrax to the rdi slot */ | 742 | pushq %rax /* push real oldrax to the rdi slot */ |
743 | CFI_ADJUST_CFA_OFFSET 8 | 743 | CFI_ADJUST_CFA_OFFSET 8 |
744 | CFI_REL_OFFSET rax,0 | 744 | CFI_REL_OFFSET rax,0 |
745 | leaq \sym(%rip),%rax | 745 | leaq \sym(%rip),%rax |
746 | jmp error_entry | 746 | jmp error_entry |
747 | CFI_ENDPROC | 747 | CFI_ENDPROC |
748 | .endm | 748 | .endm |
749 | 749 | ||
750 | .macro errorentry sym | 750 | .macro errorentry sym |
751 | XCPT_FRAME | 751 | XCPT_FRAME |
752 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 752 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
753 | pushq %rax | 753 | pushq %rax |
754 | CFI_ADJUST_CFA_OFFSET 8 | 754 | CFI_ADJUST_CFA_OFFSET 8 |
755 | CFI_REL_OFFSET rax,0 | 755 | CFI_REL_OFFSET rax,0 |
756 | leaq \sym(%rip),%rax | 756 | leaq \sym(%rip),%rax |
757 | jmp error_entry | 757 | jmp error_entry |
758 | CFI_ENDPROC | 758 | CFI_ENDPROC |
759 | .endm | 759 | .endm |
760 | 760 | ||
761 | /* error code is on the stack already */ | 761 | /* error code is on the stack already */ |
762 | /* handle NMI like exceptions that can happen everywhere */ | 762 | /* handle NMI like exceptions that can happen everywhere */ |
763 | .macro paranoidentry sym, ist=0, irqtrace=1 | 763 | .macro paranoidentry sym, ist=0, irqtrace=1 |
764 | SAVE_ALL | 764 | SAVE_ALL |
765 | cld | 765 | cld |
766 | movl $1,%ebx | 766 | movl $1,%ebx |
767 | movl $MSR_GS_BASE,%ecx | 767 | movl $MSR_GS_BASE,%ecx |
768 | rdmsr | 768 | rdmsr |
769 | testl %edx,%edx | 769 | testl %edx,%edx |
770 | js 1f | 770 | js 1f |
771 | SWAPGS | 771 | SWAPGS |
772 | xorl %ebx,%ebx | 772 | xorl %ebx,%ebx |
773 | 1: | 773 | 1: |
774 | .if \ist | 774 | .if \ist |
775 | movq %gs:pda_data_offset, %rbp | 775 | movq %gs:pda_data_offset, %rbp |
776 | .endif | 776 | .endif |
777 | movq %rsp,%rdi | 777 | movq %rsp,%rdi |
778 | movq ORIG_RAX(%rsp),%rsi | 778 | movq ORIG_RAX(%rsp),%rsi |
779 | movq $-1,ORIG_RAX(%rsp) | 779 | movq $-1,ORIG_RAX(%rsp) |
780 | .if \ist | 780 | .if \ist |
781 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 781 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
782 | .endif | 782 | .endif |
783 | call \sym | 783 | call \sym |
784 | .if \ist | 784 | .if \ist |
785 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 785 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
786 | .endif | 786 | .endif |
787 | DISABLE_INTERRUPTS(CLBR_NONE) | 787 | DISABLE_INTERRUPTS(CLBR_NONE) |
788 | .if \irqtrace | 788 | .if \irqtrace |
789 | TRACE_IRQS_OFF | 789 | TRACE_IRQS_OFF |
790 | .endif | 790 | .endif |
791 | .endm | 791 | .endm |
792 | 792 | ||
793 | /* | 793 | /* |
794 | * "Paranoid" exit path from exception stack. | 794 | * "Paranoid" exit path from exception stack. |
795 | * Paranoid because this is used by NMIs and cannot take | 795 | * Paranoid because this is used by NMIs and cannot take |
796 | * any kernel state for granted. | 796 | * any kernel state for granted. |
797 | * We don't do kernel preemption checks here, because only | 797 | * We don't do kernel preemption checks here, because only |
798 | * NMI should be common and it does not enable IRQs and | 798 | * NMI should be common and it does not enable IRQs and |
799 | * cannot get reschedule ticks. | 799 | * cannot get reschedule ticks. |
800 | * | 800 | * |
801 | * "trace" is 0 for the NMI handler only, because irq-tracing | 801 | * "trace" is 0 for the NMI handler only, because irq-tracing |
802 | * is fundamentally NMI-unsafe. (we cannot change the soft and | 802 | * is fundamentally NMI-unsafe. (we cannot change the soft and |
803 | * hard flags at once, atomically) | 803 | * hard flags at once, atomically) |
804 | */ | 804 | */ |
805 | .macro paranoidexit trace=1 | 805 | .macro paranoidexit trace=1 |
806 | /* ebx: no swapgs flag */ | 806 | /* ebx: no swapgs flag */ |
807 | paranoid_exit\trace: | 807 | paranoid_exit\trace: |
808 | testl %ebx,%ebx /* swapgs needed? */ | 808 | testl %ebx,%ebx /* swapgs needed? */ |
809 | jnz paranoid_restore\trace | 809 | jnz paranoid_restore\trace |
810 | testl $3,CS(%rsp) | 810 | testl $3,CS(%rsp) |
811 | jnz paranoid_userspace\trace | 811 | jnz paranoid_userspace\trace |
812 | paranoid_swapgs\trace: | 812 | paranoid_swapgs\trace: |
813 | .if \trace | 813 | .if \trace |
814 | TRACE_IRQS_IRETQ 0 | 814 | TRACE_IRQS_IRETQ 0 |
815 | .endif | 815 | .endif |
816 | SWAPGS_UNSAFE_STACK | 816 | SWAPGS_UNSAFE_STACK |
817 | paranoid_restore\trace: | 817 | paranoid_restore\trace: |
818 | RESTORE_ALL 8 | 818 | RESTORE_ALL 8 |
819 | jmp irq_return | 819 | jmp irq_return |
820 | paranoid_userspace\trace: | 820 | paranoid_userspace\trace: |
821 | GET_THREAD_INFO(%rcx) | 821 | GET_THREAD_INFO(%rcx) |
822 | movl threadinfo_flags(%rcx),%ebx | 822 | movl threadinfo_flags(%rcx),%ebx |
823 | andl $_TIF_WORK_MASK,%ebx | 823 | andl $_TIF_WORK_MASK,%ebx |
824 | jz paranoid_swapgs\trace | 824 | jz paranoid_swapgs\trace |
825 | movq %rsp,%rdi /* &pt_regs */ | 825 | movq %rsp,%rdi /* &pt_regs */ |
826 | call sync_regs | 826 | call sync_regs |
827 | movq %rax,%rsp /* switch stack for scheduling */ | 827 | movq %rax,%rsp /* switch stack for scheduling */ |
828 | testl $_TIF_NEED_RESCHED,%ebx | 828 | testl $_TIF_NEED_RESCHED,%ebx |
829 | jnz paranoid_schedule\trace | 829 | jnz paranoid_schedule\trace |
830 | movl %ebx,%edx /* arg3: thread flags */ | 830 | movl %ebx,%edx /* arg3: thread flags */ |
831 | .if \trace | 831 | .if \trace |
832 | TRACE_IRQS_ON | 832 | TRACE_IRQS_ON |
833 | .endif | 833 | .endif |
834 | ENABLE_INTERRUPTS(CLBR_NONE) | 834 | ENABLE_INTERRUPTS(CLBR_NONE) |
835 | xorl %esi,%esi /* arg2: oldset */ | 835 | xorl %esi,%esi /* arg2: oldset */ |
836 | movq %rsp,%rdi /* arg1: &pt_regs */ | 836 | movq %rsp,%rdi /* arg1: &pt_regs */ |
837 | call do_notify_resume | 837 | call do_notify_resume |
838 | DISABLE_INTERRUPTS(CLBR_NONE) | 838 | DISABLE_INTERRUPTS(CLBR_NONE) |
839 | .if \trace | 839 | .if \trace |
840 | TRACE_IRQS_OFF | 840 | TRACE_IRQS_OFF |
841 | .endif | 841 | .endif |
842 | jmp paranoid_userspace\trace | 842 | jmp paranoid_userspace\trace |
843 | paranoid_schedule\trace: | 843 | paranoid_schedule\trace: |
844 | .if \trace | 844 | .if \trace |
845 | TRACE_IRQS_ON | 845 | TRACE_IRQS_ON |
846 | .endif | 846 | .endif |
847 | ENABLE_INTERRUPTS(CLBR_ANY) | 847 | ENABLE_INTERRUPTS(CLBR_ANY) |
848 | call schedule | 848 | call schedule |
849 | DISABLE_INTERRUPTS(CLBR_ANY) | 849 | DISABLE_INTERRUPTS(CLBR_ANY) |
850 | .if \trace | 850 | .if \trace |
851 | TRACE_IRQS_OFF | 851 | TRACE_IRQS_OFF |
852 | .endif | 852 | .endif |
853 | jmp paranoid_userspace\trace | 853 | jmp paranoid_userspace\trace |
854 | CFI_ENDPROC | 854 | CFI_ENDPROC |
855 | .endm | 855 | .endm |
856 | 856 | ||
857 | /* | 857 | /* |
858 | * Exception entry point. This expects an error code/orig_rax on the stack | 858 | * Exception entry point. This expects an error code/orig_rax on the stack |
859 | * and the exception handler in %rax. | 859 | * and the exception handler in %rax. |
860 | */ | 860 | */ |
861 | KPROBE_ENTRY(error_entry) | 861 | KPROBE_ENTRY(error_entry) |
862 | _frame RDI | 862 | _frame RDI |
863 | CFI_REL_OFFSET rax,0 | 863 | CFI_REL_OFFSET rax,0 |
864 | /* rdi slot contains rax, oldrax contains error code */ | 864 | /* rdi slot contains rax, oldrax contains error code */ |
865 | cld | 865 | cld |
866 | subq $14*8,%rsp | 866 | subq $14*8,%rsp |
867 | CFI_ADJUST_CFA_OFFSET (14*8) | 867 | CFI_ADJUST_CFA_OFFSET (14*8) |
868 | movq %rsi,13*8(%rsp) | 868 | movq %rsi,13*8(%rsp) |
869 | CFI_REL_OFFSET rsi,RSI | 869 | CFI_REL_OFFSET rsi,RSI |
870 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ | 870 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ |
871 | CFI_REGISTER rax,rsi | 871 | CFI_REGISTER rax,rsi |
872 | movq %rdx,12*8(%rsp) | 872 | movq %rdx,12*8(%rsp) |
873 | CFI_REL_OFFSET rdx,RDX | 873 | CFI_REL_OFFSET rdx,RDX |
874 | movq %rcx,11*8(%rsp) | 874 | movq %rcx,11*8(%rsp) |
875 | CFI_REL_OFFSET rcx,RCX | 875 | CFI_REL_OFFSET rcx,RCX |
876 | movq %rsi,10*8(%rsp) /* store rax */ | 876 | movq %rsi,10*8(%rsp) /* store rax */ |
877 | CFI_REL_OFFSET rax,RAX | 877 | CFI_REL_OFFSET rax,RAX |
878 | movq %r8, 9*8(%rsp) | 878 | movq %r8, 9*8(%rsp) |
879 | CFI_REL_OFFSET r8,R8 | 879 | CFI_REL_OFFSET r8,R8 |
880 | movq %r9, 8*8(%rsp) | 880 | movq %r9, 8*8(%rsp) |
881 | CFI_REL_OFFSET r9,R9 | 881 | CFI_REL_OFFSET r9,R9 |
882 | movq %r10,7*8(%rsp) | 882 | movq %r10,7*8(%rsp) |
883 | CFI_REL_OFFSET r10,R10 | 883 | CFI_REL_OFFSET r10,R10 |
884 | movq %r11,6*8(%rsp) | 884 | movq %r11,6*8(%rsp) |
885 | CFI_REL_OFFSET r11,R11 | 885 | CFI_REL_OFFSET r11,R11 |
886 | movq %rbx,5*8(%rsp) | 886 | movq %rbx,5*8(%rsp) |
887 | CFI_REL_OFFSET rbx,RBX | 887 | CFI_REL_OFFSET rbx,RBX |
888 | movq %rbp,4*8(%rsp) | 888 | movq %rbp,4*8(%rsp) |
889 | CFI_REL_OFFSET rbp,RBP | 889 | CFI_REL_OFFSET rbp,RBP |
890 | movq %r12,3*8(%rsp) | 890 | movq %r12,3*8(%rsp) |
891 | CFI_REL_OFFSET r12,R12 | 891 | CFI_REL_OFFSET r12,R12 |
892 | movq %r13,2*8(%rsp) | 892 | movq %r13,2*8(%rsp) |
893 | CFI_REL_OFFSET r13,R13 | 893 | CFI_REL_OFFSET r13,R13 |
894 | movq %r14,1*8(%rsp) | 894 | movq %r14,1*8(%rsp) |
895 | CFI_REL_OFFSET r14,R14 | 895 | CFI_REL_OFFSET r14,R14 |
896 | movq %r15,(%rsp) | 896 | movq %r15,(%rsp) |
897 | CFI_REL_OFFSET r15,R15 | 897 | CFI_REL_OFFSET r15,R15 |
898 | xorl %ebx,%ebx | 898 | xorl %ebx,%ebx |
899 | testl $3,CS(%rsp) | 899 | testl $3,CS(%rsp) |
900 | je error_kernelspace | 900 | je error_kernelspace |
901 | error_swapgs: | 901 | error_swapgs: |
902 | SWAPGS | 902 | SWAPGS |
903 | error_sti: | 903 | error_sti: |
904 | movq %rdi,RDI(%rsp) | 904 | movq %rdi,RDI(%rsp) |
905 | CFI_REL_OFFSET rdi,RDI | 905 | CFI_REL_OFFSET rdi,RDI |
906 | movq %rsp,%rdi | 906 | movq %rsp,%rdi |
907 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | 907 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
908 | movq $-1,ORIG_RAX(%rsp) | 908 | movq $-1,ORIG_RAX(%rsp) |
909 | call *%rax | 909 | call *%rax |
910 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | 910 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ |
911 | error_exit: | 911 | error_exit: |
912 | movl %ebx,%eax | 912 | movl %ebx,%eax |
913 | RESTORE_REST | 913 | RESTORE_REST |
914 | DISABLE_INTERRUPTS(CLBR_NONE) | 914 | DISABLE_INTERRUPTS(CLBR_NONE) |
915 | TRACE_IRQS_OFF | 915 | TRACE_IRQS_OFF |
916 | GET_THREAD_INFO(%rcx) | 916 | GET_THREAD_INFO(%rcx) |
917 | testl %eax,%eax | 917 | testl %eax,%eax |
918 | jne retint_kernel | 918 | jne retint_kernel |
919 | LOCKDEP_SYS_EXIT_IRQ | 919 | LOCKDEP_SYS_EXIT_IRQ |
920 | movl threadinfo_flags(%rcx),%edx | 920 | movl threadinfo_flags(%rcx),%edx |
921 | movl $_TIF_WORK_MASK,%edi | 921 | movl $_TIF_WORK_MASK,%edi |
922 | andl %edi,%edx | 922 | andl %edi,%edx |
923 | jnz retint_careful | 923 | jnz retint_careful |
924 | jmp retint_swapgs | 924 | jmp retint_swapgs |
925 | CFI_ENDPROC | 925 | CFI_ENDPROC |
926 | 926 | ||
927 | error_kernelspace: | 927 | error_kernelspace: |
928 | incl %ebx | 928 | incl %ebx |
929 | /* There are two places in the kernel that can potentially fault with | 929 | /* There are two places in the kernel that can potentially fault with |
930 | usergs. Handle them here. The exception handlers after | 930 | usergs. Handle them here. The exception handlers after |
931 | iret run with kernel gs again, so don't set the user space flag. | 931 | iret run with kernel gs again, so don't set the user space flag. |
932 | B stepping K8s sometimes report an truncated RIP for IRET | 932 | B stepping K8s sometimes report an truncated RIP for IRET |
933 | exceptions returning to compat mode. Check for these here too. */ | 933 | exceptions returning to compat mode. Check for these here too. */ |
934 | leaq irq_return(%rip),%rcx | 934 | leaq irq_return(%rip),%rcx |
935 | cmpq %rcx,RIP(%rsp) | 935 | cmpq %rcx,RIP(%rsp) |
936 | je error_swapgs | 936 | je error_swapgs |
937 | movl %ecx,%ecx /* zero extend */ | 937 | movl %ecx,%ecx /* zero extend */ |
938 | cmpq %rcx,RIP(%rsp) | 938 | cmpq %rcx,RIP(%rsp) |
939 | je error_swapgs | 939 | je error_swapgs |
940 | cmpq $gs_change,RIP(%rsp) | 940 | cmpq $gs_change,RIP(%rsp) |
941 | je error_swapgs | 941 | je error_swapgs |
942 | jmp error_sti | 942 | jmp error_sti |
943 | KPROBE_END(error_entry) | 943 | KPROBE_END(error_entry) |
944 | 944 | ||
945 | /* Reload gs selector with exception handling */ | 945 | /* Reload gs selector with exception handling */ |
946 | /* edi: new selector */ | 946 | /* edi: new selector */ |
947 | ENTRY(load_gs_index) | 947 | ENTRY(native_load_gs_index) |
948 | CFI_STARTPROC | 948 | CFI_STARTPROC |
949 | pushf | 949 | pushf |
950 | CFI_ADJUST_CFA_OFFSET 8 | 950 | CFI_ADJUST_CFA_OFFSET 8 |
951 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) | 951 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) |
952 | SWAPGS | 952 | SWAPGS |
953 | gs_change: | 953 | gs_change: |
954 | movl %edi,%gs | 954 | movl %edi,%gs |
955 | 2: mfence /* workaround */ | 955 | 2: mfence /* workaround */ |
956 | SWAPGS | 956 | SWAPGS |
957 | popf | 957 | popf |
958 | CFI_ADJUST_CFA_OFFSET -8 | 958 | CFI_ADJUST_CFA_OFFSET -8 |
959 | ret | 959 | ret |
960 | CFI_ENDPROC | 960 | CFI_ENDPROC |
961 | ENDPROC(load_gs_index) | 961 | ENDPROC(native_load_gs_index) |
962 | 962 | ||
963 | .section __ex_table,"a" | 963 | .section __ex_table,"a" |
964 | .align 8 | 964 | .align 8 |
965 | .quad gs_change,bad_gs | 965 | .quad gs_change,bad_gs |
966 | .previous | 966 | .previous |
967 | .section .fixup,"ax" | 967 | .section .fixup,"ax" |
968 | /* running with kernelgs */ | 968 | /* running with kernelgs */ |
969 | bad_gs: | 969 | bad_gs: |
970 | SWAPGS /* switch back to user gs */ | 970 | SWAPGS /* switch back to user gs */ |
971 | xorl %eax,%eax | 971 | xorl %eax,%eax |
972 | movl %eax,%gs | 972 | movl %eax,%gs |
973 | jmp 2b | 973 | jmp 2b |
974 | .previous | 974 | .previous |
975 | 975 | ||
976 | /* | 976 | /* |
977 | * Create a kernel thread. | 977 | * Create a kernel thread. |
978 | * | 978 | * |
979 | * C extern interface: | 979 | * C extern interface: |
980 | * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | 980 | * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) |
981 | * | 981 | * |
982 | * asm input arguments: | 982 | * asm input arguments: |
983 | * rdi: fn, rsi: arg, rdx: flags | 983 | * rdi: fn, rsi: arg, rdx: flags |
984 | */ | 984 | */ |
985 | ENTRY(kernel_thread) | 985 | ENTRY(kernel_thread) |
986 | CFI_STARTPROC | 986 | CFI_STARTPROC |
987 | FAKE_STACK_FRAME $child_rip | 987 | FAKE_STACK_FRAME $child_rip |
988 | SAVE_ALL | 988 | SAVE_ALL |
989 | 989 | ||
990 | # rdi: flags, rsi: usp, rdx: will be &pt_regs | 990 | # rdi: flags, rsi: usp, rdx: will be &pt_regs |
991 | movq %rdx,%rdi | 991 | movq %rdx,%rdi |
992 | orq kernel_thread_flags(%rip),%rdi | 992 | orq kernel_thread_flags(%rip),%rdi |
993 | movq $-1, %rsi | 993 | movq $-1, %rsi |
994 | movq %rsp, %rdx | 994 | movq %rsp, %rdx |
995 | 995 | ||
996 | xorl %r8d,%r8d | 996 | xorl %r8d,%r8d |
997 | xorl %r9d,%r9d | 997 | xorl %r9d,%r9d |
998 | 998 | ||
999 | # clone now | 999 | # clone now |
1000 | call do_fork | 1000 | call do_fork |
1001 | movq %rax,RAX(%rsp) | 1001 | movq %rax,RAX(%rsp) |
1002 | xorl %edi,%edi | 1002 | xorl %edi,%edi |
1003 | 1003 | ||
1004 | /* | 1004 | /* |
1005 | * It isn't worth to check for reschedule here, | 1005 | * It isn't worth to check for reschedule here, |
1006 | * so internally to the x86_64 port you can rely on kernel_thread() | 1006 | * so internally to the x86_64 port you can rely on kernel_thread() |
1007 | * not to reschedule the child before returning, this avoids the need | 1007 | * not to reschedule the child before returning, this avoids the need |
1008 | * of hacks for example to fork off the per-CPU idle tasks. | 1008 | * of hacks for example to fork off the per-CPU idle tasks. |
1009 | * [Hopefully no generic code relies on the reschedule -AK] | 1009 | * [Hopefully no generic code relies on the reschedule -AK] |
1010 | */ | 1010 | */ |
1011 | RESTORE_ALL | 1011 | RESTORE_ALL |
1012 | UNFAKE_STACK_FRAME | 1012 | UNFAKE_STACK_FRAME |
1013 | ret | 1013 | ret |
1014 | CFI_ENDPROC | 1014 | CFI_ENDPROC |
1015 | ENDPROC(kernel_thread) | 1015 | ENDPROC(kernel_thread) |
1016 | 1016 | ||
1017 | child_rip: | 1017 | child_rip: |
1018 | pushq $0 # fake return address | 1018 | pushq $0 # fake return address |
1019 | CFI_STARTPROC | 1019 | CFI_STARTPROC |
1020 | /* | 1020 | /* |
1021 | * Here we are in the child and the registers are set as they were | 1021 | * Here we are in the child and the registers are set as they were |
1022 | * at kernel_thread() invocation in the parent. | 1022 | * at kernel_thread() invocation in the parent. |
1023 | */ | 1023 | */ |
1024 | movq %rdi, %rax | 1024 | movq %rdi, %rax |
1025 | movq %rsi, %rdi | 1025 | movq %rsi, %rdi |
1026 | call *%rax | 1026 | call *%rax |
1027 | # exit | 1027 | # exit |
1028 | mov %eax, %edi | 1028 | mov %eax, %edi |
1029 | call do_exit | 1029 | call do_exit |
1030 | CFI_ENDPROC | 1030 | CFI_ENDPROC |
1031 | ENDPROC(child_rip) | 1031 | ENDPROC(child_rip) |
1032 | 1032 | ||
1033 | /* | 1033 | /* |
1034 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 1034 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
1035 | * | 1035 | * |
1036 | * C extern interface: | 1036 | * C extern interface: |
1037 | * extern long execve(char *name, char **argv, char **envp) | 1037 | * extern long execve(char *name, char **argv, char **envp) |
1038 | * | 1038 | * |
1039 | * asm input arguments: | 1039 | * asm input arguments: |
1040 | * rdi: name, rsi: argv, rdx: envp | 1040 | * rdi: name, rsi: argv, rdx: envp |
1041 | * | 1041 | * |
1042 | * We want to fallback into: | 1042 | * We want to fallback into: |
1043 | * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) | 1043 | * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) |
1044 | * | 1044 | * |
1045 | * do_sys_execve asm fallback arguments: | 1045 | * do_sys_execve asm fallback arguments: |
1046 | * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack | 1046 | * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack |
1047 | */ | 1047 | */ |
1048 | ENTRY(kernel_execve) | 1048 | ENTRY(kernel_execve) |
1049 | CFI_STARTPROC | 1049 | CFI_STARTPROC |
1050 | FAKE_STACK_FRAME $0 | 1050 | FAKE_STACK_FRAME $0 |
1051 | SAVE_ALL | 1051 | SAVE_ALL |
1052 | movq %rsp,%rcx | 1052 | movq %rsp,%rcx |
1053 | call sys_execve | 1053 | call sys_execve |
1054 | movq %rax, RAX(%rsp) | 1054 | movq %rax, RAX(%rsp) |
1055 | RESTORE_REST | 1055 | RESTORE_REST |
1056 | testq %rax,%rax | 1056 | testq %rax,%rax |
1057 | je int_ret_from_sys_call | 1057 | je int_ret_from_sys_call |
1058 | RESTORE_ARGS | 1058 | RESTORE_ARGS |
1059 | UNFAKE_STACK_FRAME | 1059 | UNFAKE_STACK_FRAME |
1060 | ret | 1060 | ret |
1061 | CFI_ENDPROC | 1061 | CFI_ENDPROC |
1062 | ENDPROC(kernel_execve) | 1062 | ENDPROC(kernel_execve) |
1063 | 1063 | ||
1064 | KPROBE_ENTRY(page_fault) | 1064 | KPROBE_ENTRY(page_fault) |
1065 | errorentry do_page_fault | 1065 | errorentry do_page_fault |
1066 | KPROBE_END(page_fault) | 1066 | KPROBE_END(page_fault) |
1067 | 1067 | ||
1068 | ENTRY(coprocessor_error) | 1068 | ENTRY(coprocessor_error) |
1069 | zeroentry do_coprocessor_error | 1069 | zeroentry do_coprocessor_error |
1070 | END(coprocessor_error) | 1070 | END(coprocessor_error) |
1071 | 1071 | ||
1072 | ENTRY(simd_coprocessor_error) | 1072 | ENTRY(simd_coprocessor_error) |
1073 | zeroentry do_simd_coprocessor_error | 1073 | zeroentry do_simd_coprocessor_error |
1074 | END(simd_coprocessor_error) | 1074 | END(simd_coprocessor_error) |
1075 | 1075 | ||
1076 | ENTRY(device_not_available) | 1076 | ENTRY(device_not_available) |
1077 | zeroentry math_state_restore | 1077 | zeroentry math_state_restore |
1078 | END(device_not_available) | 1078 | END(device_not_available) |
1079 | 1079 | ||
1080 | /* runs on exception stack */ | 1080 | /* runs on exception stack */ |
1081 | KPROBE_ENTRY(debug) | 1081 | KPROBE_ENTRY(debug) |
1082 | INTR_FRAME | 1082 | INTR_FRAME |
1083 | pushq $0 | 1083 | pushq $0 |
1084 | CFI_ADJUST_CFA_OFFSET 8 | 1084 | CFI_ADJUST_CFA_OFFSET 8 |
1085 | paranoidentry do_debug, DEBUG_STACK | 1085 | paranoidentry do_debug, DEBUG_STACK |
1086 | paranoidexit | 1086 | paranoidexit |
1087 | KPROBE_END(debug) | 1087 | KPROBE_END(debug) |
1088 | 1088 | ||
1089 | /* runs on exception stack */ | 1089 | /* runs on exception stack */ |
1090 | KPROBE_ENTRY(nmi) | 1090 | KPROBE_ENTRY(nmi) |
1091 | INTR_FRAME | 1091 | INTR_FRAME |
1092 | pushq $-1 | 1092 | pushq $-1 |
1093 | CFI_ADJUST_CFA_OFFSET 8 | 1093 | CFI_ADJUST_CFA_OFFSET 8 |
1094 | paranoidentry do_nmi, 0, 0 | 1094 | paranoidentry do_nmi, 0, 0 |
1095 | #ifdef CONFIG_TRACE_IRQFLAGS | 1095 | #ifdef CONFIG_TRACE_IRQFLAGS |
1096 | paranoidexit 0 | 1096 | paranoidexit 0 |
1097 | #else | 1097 | #else |
1098 | jmp paranoid_exit1 | 1098 | jmp paranoid_exit1 |
1099 | CFI_ENDPROC | 1099 | CFI_ENDPROC |
1100 | #endif | 1100 | #endif |
1101 | KPROBE_END(nmi) | 1101 | KPROBE_END(nmi) |
1102 | 1102 | ||
1103 | KPROBE_ENTRY(int3) | 1103 | KPROBE_ENTRY(int3) |
1104 | INTR_FRAME | 1104 | INTR_FRAME |
1105 | pushq $0 | 1105 | pushq $0 |
1106 | CFI_ADJUST_CFA_OFFSET 8 | 1106 | CFI_ADJUST_CFA_OFFSET 8 |
1107 | paranoidentry do_int3, DEBUG_STACK | 1107 | paranoidentry do_int3, DEBUG_STACK |
1108 | jmp paranoid_exit1 | 1108 | jmp paranoid_exit1 |
1109 | CFI_ENDPROC | 1109 | CFI_ENDPROC |
1110 | KPROBE_END(int3) | 1110 | KPROBE_END(int3) |
1111 | 1111 | ||
1112 | ENTRY(overflow) | 1112 | ENTRY(overflow) |
1113 | zeroentry do_overflow | 1113 | zeroentry do_overflow |
1114 | END(overflow) | 1114 | END(overflow) |
1115 | 1115 | ||
1116 | ENTRY(bounds) | 1116 | ENTRY(bounds) |
1117 | zeroentry do_bounds | 1117 | zeroentry do_bounds |
1118 | END(bounds) | 1118 | END(bounds) |
1119 | 1119 | ||
1120 | ENTRY(invalid_op) | 1120 | ENTRY(invalid_op) |
1121 | zeroentry do_invalid_op | 1121 | zeroentry do_invalid_op |
1122 | END(invalid_op) | 1122 | END(invalid_op) |
1123 | 1123 | ||
1124 | ENTRY(coprocessor_segment_overrun) | 1124 | ENTRY(coprocessor_segment_overrun) |
1125 | zeroentry do_coprocessor_segment_overrun | 1125 | zeroentry do_coprocessor_segment_overrun |
1126 | END(coprocessor_segment_overrun) | 1126 | END(coprocessor_segment_overrun) |
1127 | 1127 | ||
1128 | /* runs on exception stack */ | 1128 | /* runs on exception stack */ |
1129 | ENTRY(double_fault) | 1129 | ENTRY(double_fault) |
1130 | XCPT_FRAME | 1130 | XCPT_FRAME |
1131 | paranoidentry do_double_fault | 1131 | paranoidentry do_double_fault |
1132 | jmp paranoid_exit1 | 1132 | jmp paranoid_exit1 |
1133 | CFI_ENDPROC | 1133 | CFI_ENDPROC |
1134 | END(double_fault) | 1134 | END(double_fault) |
1135 | 1135 | ||
1136 | ENTRY(invalid_TSS) | 1136 | ENTRY(invalid_TSS) |
1137 | errorentry do_invalid_TSS | 1137 | errorentry do_invalid_TSS |
1138 | END(invalid_TSS) | 1138 | END(invalid_TSS) |
1139 | 1139 | ||
1140 | ENTRY(segment_not_present) | 1140 | ENTRY(segment_not_present) |
1141 | errorentry do_segment_not_present | 1141 | errorentry do_segment_not_present |
1142 | END(segment_not_present) | 1142 | END(segment_not_present) |
1143 | 1143 | ||
1144 | /* runs on exception stack */ | 1144 | /* runs on exception stack */ |
1145 | ENTRY(stack_segment) | 1145 | ENTRY(stack_segment) |
1146 | XCPT_FRAME | 1146 | XCPT_FRAME |
1147 | paranoidentry do_stack_segment | 1147 | paranoidentry do_stack_segment |
1148 | jmp paranoid_exit1 | 1148 | jmp paranoid_exit1 |
1149 | CFI_ENDPROC | 1149 | CFI_ENDPROC |
1150 | END(stack_segment) | 1150 | END(stack_segment) |
1151 | 1151 | ||
1152 | KPROBE_ENTRY(general_protection) | 1152 | KPROBE_ENTRY(general_protection) |
1153 | errorentry do_general_protection | 1153 | errorentry do_general_protection |
1154 | KPROBE_END(general_protection) | 1154 | KPROBE_END(general_protection) |
1155 | 1155 | ||
1156 | ENTRY(alignment_check) | 1156 | ENTRY(alignment_check) |
1157 | errorentry do_alignment_check | 1157 | errorentry do_alignment_check |
1158 | END(alignment_check) | 1158 | END(alignment_check) |
1159 | 1159 | ||
1160 | ENTRY(divide_error) | 1160 | ENTRY(divide_error) |
1161 | zeroentry do_divide_error | 1161 | zeroentry do_divide_error |
1162 | END(divide_error) | 1162 | END(divide_error) |
1163 | 1163 | ||
1164 | ENTRY(spurious_interrupt_bug) | 1164 | ENTRY(spurious_interrupt_bug) |
1165 | zeroentry do_spurious_interrupt_bug | 1165 | zeroentry do_spurious_interrupt_bug |
1166 | END(spurious_interrupt_bug) | 1166 | END(spurious_interrupt_bug) |
1167 | 1167 | ||
1168 | #ifdef CONFIG_X86_MCE | 1168 | #ifdef CONFIG_X86_MCE |
1169 | /* runs on exception stack */ | 1169 | /* runs on exception stack */ |
1170 | ENTRY(machine_check) | 1170 | ENTRY(machine_check) |
1171 | INTR_FRAME | 1171 | INTR_FRAME |
1172 | pushq $0 | 1172 | pushq $0 |
1173 | CFI_ADJUST_CFA_OFFSET 8 | 1173 | CFI_ADJUST_CFA_OFFSET 8 |
1174 | paranoidentry do_machine_check | 1174 | paranoidentry do_machine_check |
1175 | jmp paranoid_exit1 | 1175 | jmp paranoid_exit1 |
1176 | CFI_ENDPROC | 1176 | CFI_ENDPROC |
1177 | END(machine_check) | 1177 | END(machine_check) |
1178 | #endif | 1178 | #endif |
1179 | 1179 | ||
1180 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1180 | /* Call softirq on interrupt stack. Interrupts are off. */ |
1181 | ENTRY(call_softirq) | 1181 | ENTRY(call_softirq) |
1182 | CFI_STARTPROC | 1182 | CFI_STARTPROC |
1183 | push %rbp | 1183 | push %rbp |
1184 | CFI_ADJUST_CFA_OFFSET 8 | 1184 | CFI_ADJUST_CFA_OFFSET 8 |
1185 | CFI_REL_OFFSET rbp,0 | 1185 | CFI_REL_OFFSET rbp,0 |
1186 | mov %rsp,%rbp | 1186 | mov %rsp,%rbp |
1187 | CFI_DEF_CFA_REGISTER rbp | 1187 | CFI_DEF_CFA_REGISTER rbp |
1188 | incl %gs:pda_irqcount | 1188 | incl %gs:pda_irqcount |
1189 | cmove %gs:pda_irqstackptr,%rsp | 1189 | cmove %gs:pda_irqstackptr,%rsp |
1190 | push %rbp # backlink for old unwinder | 1190 | push %rbp # backlink for old unwinder |
1191 | call __do_softirq | 1191 | call __do_softirq |
1192 | leaveq | 1192 | leaveq |
1193 | CFI_DEF_CFA_REGISTER rsp | 1193 | CFI_DEF_CFA_REGISTER rsp |
1194 | CFI_ADJUST_CFA_OFFSET -8 | 1194 | CFI_ADJUST_CFA_OFFSET -8 |
1195 | decl %gs:pda_irqcount | 1195 | decl %gs:pda_irqcount |
1196 | ret | 1196 | ret |
1197 | CFI_ENDPROC | 1197 | CFI_ENDPROC |
1198 | ENDPROC(call_softirq) | 1198 | ENDPROC(call_softirq) |
1199 | 1199 | ||
1200 | KPROBE_ENTRY(ignore_sysret) | 1200 | KPROBE_ENTRY(ignore_sysret) |
1201 | CFI_STARTPROC | 1201 | CFI_STARTPROC |
1202 | mov $-ENOSYS,%eax | 1202 | mov $-ENOSYS,%eax |
1203 | sysret | 1203 | sysret |
1204 | CFI_ENDPROC | 1204 | CFI_ENDPROC |
1205 | ENDPROC(ignore_sysret) | 1205 | ENDPROC(ignore_sysret) |
1206 | 1206 |
arch/x86/kernel/paravirt.c
1 | /* Paravirtualization interfaces | 1 | /* Paravirtualization interfaces |
2 | Copyright (C) 2006 Rusty Russell IBM Corporation | 2 | Copyright (C) 2006 Rusty Russell IBM Corporation |
3 | 3 | ||
4 | This program is free software; you can redistribute it and/or modify | 4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by | 5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; either version 2 of the License, or | 6 | the Free Software Foundation; either version 2 of the License, or |
7 | (at your option) any later version. | 7 | (at your option) any later version. |
8 | 8 | ||
9 | This program is distributed in the hope that it will be useful, | 9 | This program is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | GNU General Public License for more details. | 12 | GNU General Public License for more details. |
13 | 13 | ||
14 | You should have received a copy of the GNU General Public License | 14 | You should have received a copy of the GNU General Public License |
15 | along with this program; if not, write to the Free Software | 15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | 17 | ||
18 | 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc | 18 | 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/efi.h> | 23 | #include <linux/efi.h> |
24 | #include <linux/bcd.h> | 24 | #include <linux/bcd.h> |
25 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
26 | 26 | ||
27 | #include <asm/bug.h> | 27 | #include <asm/bug.h> |
28 | #include <asm/paravirt.h> | 28 | #include <asm/paravirt.h> |
29 | #include <asm/desc.h> | 29 | #include <asm/desc.h> |
30 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
31 | #include <asm/arch_hooks.h> | 31 | #include <asm/arch_hooks.h> |
32 | #include <asm/time.h> | 32 | #include <asm/time.h> |
33 | #include <asm/pgalloc.h> | 33 | #include <asm/pgalloc.h> |
34 | #include <asm/irq.h> | 34 | #include <asm/irq.h> |
35 | #include <asm/delay.h> | 35 | #include <asm/delay.h> |
36 | #include <asm/fixmap.h> | 36 | #include <asm/fixmap.h> |
37 | #include <asm/apic.h> | 37 | #include <asm/apic.h> |
38 | #include <asm/tlbflush.h> | 38 | #include <asm/tlbflush.h> |
39 | #include <asm/timer.h> | 39 | #include <asm/timer.h> |
40 | 40 | ||
41 | /* nop stub */ | 41 | /* nop stub */ |
42 | void _paravirt_nop(void) | 42 | void _paravirt_nop(void) |
43 | { | 43 | { |
44 | } | 44 | } |
45 | 45 | ||
46 | static void __init default_banner(void) | 46 | static void __init default_banner(void) |
47 | { | 47 | { |
48 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 48 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
49 | pv_info.name); | 49 | pv_info.name); |
50 | } | 50 | } |
51 | 51 | ||
52 | char *memory_setup(void) | 52 | char *memory_setup(void) |
53 | { | 53 | { |
54 | return pv_init_ops.memory_setup(); | 54 | return pv_init_ops.memory_setup(); |
55 | } | 55 | } |
56 | 56 | ||
57 | /* Simple instruction patching code. */ | 57 | /* Simple instruction patching code. */ |
58 | #define DEF_NATIVE(ops, name, code) \ | 58 | #define DEF_NATIVE(ops, name, code) \ |
59 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | 59 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ |
60 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | 60 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") |
61 | 61 | ||
62 | /* Undefined instruction for dealing with missing ops pointers. */ | 62 | /* Undefined instruction for dealing with missing ops pointers. */ |
63 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | 63 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; |
64 | 64 | ||
65 | unsigned paravirt_patch_nop(void) | 65 | unsigned paravirt_patch_nop(void) |
66 | { | 66 | { |
67 | return 0; | 67 | return 0; |
68 | } | 68 | } |
69 | 69 | ||
70 | unsigned paravirt_patch_ignore(unsigned len) | 70 | unsigned paravirt_patch_ignore(unsigned len) |
71 | { | 71 | { |
72 | return len; | 72 | return len; |
73 | } | 73 | } |
74 | 74 | ||
75 | struct branch { | 75 | struct branch { |
76 | unsigned char opcode; | 76 | unsigned char opcode; |
77 | u32 delta; | 77 | u32 delta; |
78 | } __attribute__((packed)); | 78 | } __attribute__((packed)); |
79 | 79 | ||
80 | unsigned paravirt_patch_call(void *insnbuf, | 80 | unsigned paravirt_patch_call(void *insnbuf, |
81 | const void *target, u16 tgt_clobbers, | 81 | const void *target, u16 tgt_clobbers, |
82 | unsigned long addr, u16 site_clobbers, | 82 | unsigned long addr, u16 site_clobbers, |
83 | unsigned len) | 83 | unsigned len) |
84 | { | 84 | { |
85 | struct branch *b = insnbuf; | 85 | struct branch *b = insnbuf; |
86 | unsigned long delta = (unsigned long)target - (addr+5); | 86 | unsigned long delta = (unsigned long)target - (addr+5); |
87 | 87 | ||
88 | if (tgt_clobbers & ~site_clobbers) | 88 | if (tgt_clobbers & ~site_clobbers) |
89 | return len; /* target would clobber too much for this site */ | 89 | return len; /* target would clobber too much for this site */ |
90 | if (len < 5) | 90 | if (len < 5) |
91 | return len; /* call too long for patch site */ | 91 | return len; /* call too long for patch site */ |
92 | 92 | ||
93 | b->opcode = 0xe8; /* call */ | 93 | b->opcode = 0xe8; /* call */ |
94 | b->delta = delta; | 94 | b->delta = delta; |
95 | BUILD_BUG_ON(sizeof(*b) != 5); | 95 | BUILD_BUG_ON(sizeof(*b) != 5); |
96 | 96 | ||
97 | return 5; | 97 | return 5; |
98 | } | 98 | } |
99 | 99 | ||
100 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | 100 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, |
101 | unsigned long addr, unsigned len) | 101 | unsigned long addr, unsigned len) |
102 | { | 102 | { |
103 | struct branch *b = insnbuf; | 103 | struct branch *b = insnbuf; |
104 | unsigned long delta = (unsigned long)target - (addr+5); | 104 | unsigned long delta = (unsigned long)target - (addr+5); |
105 | 105 | ||
106 | if (len < 5) | 106 | if (len < 5) |
107 | return len; /* call too long for patch site */ | 107 | return len; /* call too long for patch site */ |
108 | 108 | ||
109 | b->opcode = 0xe9; /* jmp */ | 109 | b->opcode = 0xe9; /* jmp */ |
110 | b->delta = delta; | 110 | b->delta = delta; |
111 | 111 | ||
112 | return 5; | 112 | return 5; |
113 | } | 113 | } |
114 | 114 | ||
115 | /* Neat trick to map patch type back to the call within the | 115 | /* Neat trick to map patch type back to the call within the |
116 | * corresponding structure. */ | 116 | * corresponding structure. */ |
117 | static void *get_call_destination(u8 type) | 117 | static void *get_call_destination(u8 type) |
118 | { | 118 | { |
119 | struct paravirt_patch_template tmpl = { | 119 | struct paravirt_patch_template tmpl = { |
120 | .pv_init_ops = pv_init_ops, | 120 | .pv_init_ops = pv_init_ops, |
121 | .pv_time_ops = pv_time_ops, | 121 | .pv_time_ops = pv_time_ops, |
122 | .pv_cpu_ops = pv_cpu_ops, | 122 | .pv_cpu_ops = pv_cpu_ops, |
123 | .pv_irq_ops = pv_irq_ops, | 123 | .pv_irq_ops = pv_irq_ops, |
124 | .pv_apic_ops = pv_apic_ops, | 124 | .pv_apic_ops = pv_apic_ops, |
125 | .pv_mmu_ops = pv_mmu_ops, | 125 | .pv_mmu_ops = pv_mmu_ops, |
126 | }; | 126 | }; |
127 | return *((void **)&tmpl + type); | 127 | return *((void **)&tmpl + type); |
128 | } | 128 | } |
129 | 129 | ||
130 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | 130 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, |
131 | unsigned long addr, unsigned len) | 131 | unsigned long addr, unsigned len) |
132 | { | 132 | { |
133 | void *opfunc = get_call_destination(type); | 133 | void *opfunc = get_call_destination(type); |
134 | unsigned ret; | 134 | unsigned ret; |
135 | 135 | ||
136 | if (opfunc == NULL) | 136 | if (opfunc == NULL) |
137 | /* If there's no function, patch it with a ud2a (BUG) */ | 137 | /* If there's no function, patch it with a ud2a (BUG) */ |
138 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); | 138 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); |
139 | else if (opfunc == paravirt_nop) | 139 | else if (opfunc == paravirt_nop) |
140 | /* If the operation is a nop, then nop the callsite */ | 140 | /* If the operation is a nop, then nop the callsite */ |
141 | ret = paravirt_patch_nop(); | 141 | ret = paravirt_patch_nop(); |
142 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || | 142 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
143 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || | 143 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || |
144 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || | 144 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || |
145 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) | 145 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) |
146 | /* If operation requires a jmp, then jmp */ | 146 | /* If operation requires a jmp, then jmp */ |
147 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); | 147 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); |
148 | else | 148 | else |
149 | /* Otherwise call the function; assume target could | 149 | /* Otherwise call the function; assume target could |
150 | clobber any caller-save reg */ | 150 | clobber any caller-save reg */ |
151 | ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, | 151 | ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, |
152 | addr, clobbers, len); | 152 | addr, clobbers, len); |
153 | 153 | ||
154 | return ret; | 154 | return ret; |
155 | } | 155 | } |
156 | 156 | ||
157 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | 157 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, |
158 | const char *start, const char *end) | 158 | const char *start, const char *end) |
159 | { | 159 | { |
160 | unsigned insn_len = end - start; | 160 | unsigned insn_len = end - start; |
161 | 161 | ||
162 | if (insn_len > len || start == NULL) | 162 | if (insn_len > len || start == NULL) |
163 | insn_len = len; | 163 | insn_len = len; |
164 | else | 164 | else |
165 | memcpy(insnbuf, start, insn_len); | 165 | memcpy(insnbuf, start, insn_len); |
166 | 166 | ||
167 | return insn_len; | 167 | return insn_len; |
168 | } | 168 | } |
169 | 169 | ||
170 | void init_IRQ(void) | 170 | void init_IRQ(void) |
171 | { | 171 | { |
172 | pv_irq_ops.init_IRQ(); | 172 | pv_irq_ops.init_IRQ(); |
173 | } | 173 | } |
174 | 174 | ||
175 | static void native_flush_tlb(void) | 175 | static void native_flush_tlb(void) |
176 | { | 176 | { |
177 | __native_flush_tlb(); | 177 | __native_flush_tlb(); |
178 | } | 178 | } |
179 | 179 | ||
180 | /* | 180 | /* |
181 | * Global pages have to be flushed a bit differently. Not a real | 181 | * Global pages have to be flushed a bit differently. Not a real |
182 | * performance problem because this does not happen often. | 182 | * performance problem because this does not happen often. |
183 | */ | 183 | */ |
184 | static void native_flush_tlb_global(void) | 184 | static void native_flush_tlb_global(void) |
185 | { | 185 | { |
186 | __native_flush_tlb_global(); | 186 | __native_flush_tlb_global(); |
187 | } | 187 | } |
188 | 188 | ||
189 | static void native_flush_tlb_single(unsigned long addr) | 189 | static void native_flush_tlb_single(unsigned long addr) |
190 | { | 190 | { |
191 | __native_flush_tlb_single(addr); | 191 | __native_flush_tlb_single(addr); |
192 | } | 192 | } |
193 | 193 | ||
194 | /* These are in entry.S */ | 194 | /* These are in entry.S */ |
195 | extern void native_iret(void); | 195 | extern void native_iret(void); |
196 | extern void native_irq_enable_sysexit(void); | 196 | extern void native_irq_enable_sysexit(void); |
197 | extern void native_usergs_sysret32(void); | 197 | extern void native_usergs_sysret32(void); |
198 | extern void native_usergs_sysret64(void); | 198 | extern void native_usergs_sysret64(void); |
199 | 199 | ||
200 | static int __init print_banner(void) | 200 | static int __init print_banner(void) |
201 | { | 201 | { |
202 | pv_init_ops.banner(); | 202 | pv_init_ops.banner(); |
203 | return 0; | 203 | return 0; |
204 | } | 204 | } |
205 | core_initcall(print_banner); | 205 | core_initcall(print_banner); |
206 | 206 | ||
207 | static struct resource reserve_ioports = { | 207 | static struct resource reserve_ioports = { |
208 | .start = 0, | 208 | .start = 0, |
209 | .end = IO_SPACE_LIMIT, | 209 | .end = IO_SPACE_LIMIT, |
210 | .name = "paravirt-ioport", | 210 | .name = "paravirt-ioport", |
211 | .flags = IORESOURCE_IO | IORESOURCE_BUSY, | 211 | .flags = IORESOURCE_IO | IORESOURCE_BUSY, |
212 | }; | 212 | }; |
213 | 213 | ||
214 | /* | 214 | /* |
215 | * Reserve the whole legacy IO space to prevent any legacy drivers | 215 | * Reserve the whole legacy IO space to prevent any legacy drivers |
216 | * from wasting time probing for their hardware. This is a fairly | 216 | * from wasting time probing for their hardware. This is a fairly |
217 | * brute-force approach to disabling all non-virtual drivers. | 217 | * brute-force approach to disabling all non-virtual drivers. |
218 | * | 218 | * |
219 | * Note that this must be called very early to have any effect. | 219 | * Note that this must be called very early to have any effect. |
220 | */ | 220 | */ |
221 | int paravirt_disable_iospace(void) | 221 | int paravirt_disable_iospace(void) |
222 | { | 222 | { |
223 | return request_resource(&ioport_resource, &reserve_ioports); | 223 | return request_resource(&ioport_resource, &reserve_ioports); |
224 | } | 224 | } |
225 | 225 | ||
226 | static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; | 226 | static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; |
227 | 227 | ||
228 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | 228 | static inline void enter_lazy(enum paravirt_lazy_mode mode) |
229 | { | 229 | { |
230 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); | 230 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); |
231 | BUG_ON(preemptible()); | 231 | BUG_ON(preemptible()); |
232 | 232 | ||
233 | __get_cpu_var(paravirt_lazy_mode) = mode; | 233 | __get_cpu_var(paravirt_lazy_mode) = mode; |
234 | } | 234 | } |
235 | 235 | ||
236 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) | 236 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) |
237 | { | 237 | { |
238 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); | 238 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); |
239 | BUG_ON(preemptible()); | 239 | BUG_ON(preemptible()); |
240 | 240 | ||
241 | __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; | 241 | __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; |
242 | } | 242 | } |
243 | 243 | ||
244 | void paravirt_enter_lazy_mmu(void) | 244 | void paravirt_enter_lazy_mmu(void) |
245 | { | 245 | { |
246 | enter_lazy(PARAVIRT_LAZY_MMU); | 246 | enter_lazy(PARAVIRT_LAZY_MMU); |
247 | } | 247 | } |
248 | 248 | ||
249 | void paravirt_leave_lazy_mmu(void) | 249 | void paravirt_leave_lazy_mmu(void) |
250 | { | 250 | { |
251 | paravirt_leave_lazy(PARAVIRT_LAZY_MMU); | 251 | paravirt_leave_lazy(PARAVIRT_LAZY_MMU); |
252 | } | 252 | } |
253 | 253 | ||
254 | void paravirt_enter_lazy_cpu(void) | 254 | void paravirt_enter_lazy_cpu(void) |
255 | { | 255 | { |
256 | enter_lazy(PARAVIRT_LAZY_CPU); | 256 | enter_lazy(PARAVIRT_LAZY_CPU); |
257 | } | 257 | } |
258 | 258 | ||
259 | void paravirt_leave_lazy_cpu(void) | 259 | void paravirt_leave_lazy_cpu(void) |
260 | { | 260 | { |
261 | paravirt_leave_lazy(PARAVIRT_LAZY_CPU); | 261 | paravirt_leave_lazy(PARAVIRT_LAZY_CPU); |
262 | } | 262 | } |
263 | 263 | ||
264 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | 264 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) |
265 | { | 265 | { |
266 | return __get_cpu_var(paravirt_lazy_mode); | 266 | return __get_cpu_var(paravirt_lazy_mode); |
267 | } | 267 | } |
268 | 268 | ||
269 | struct pv_info pv_info = { | 269 | struct pv_info pv_info = { |
270 | .name = "bare hardware", | 270 | .name = "bare hardware", |
271 | .paravirt_enabled = 0, | 271 | .paravirt_enabled = 0, |
272 | .kernel_rpl = 0, | 272 | .kernel_rpl = 0, |
273 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | 273 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
274 | }; | 274 | }; |
275 | 275 | ||
276 | struct pv_init_ops pv_init_ops = { | 276 | struct pv_init_ops pv_init_ops = { |
277 | .patch = native_patch, | 277 | .patch = native_patch, |
278 | .banner = default_banner, | 278 | .banner = default_banner, |
279 | .arch_setup = paravirt_nop, | 279 | .arch_setup = paravirt_nop, |
280 | .memory_setup = machine_specific_memory_setup, | 280 | .memory_setup = machine_specific_memory_setup, |
281 | }; | 281 | }; |
282 | 282 | ||
283 | struct pv_time_ops pv_time_ops = { | 283 | struct pv_time_ops pv_time_ops = { |
284 | .time_init = hpet_time_init, | 284 | .time_init = hpet_time_init, |
285 | .get_wallclock = native_get_wallclock, | 285 | .get_wallclock = native_get_wallclock, |
286 | .set_wallclock = native_set_wallclock, | 286 | .set_wallclock = native_set_wallclock, |
287 | .sched_clock = native_sched_clock, | 287 | .sched_clock = native_sched_clock, |
288 | .get_cpu_khz = native_calculate_cpu_khz, | 288 | .get_cpu_khz = native_calculate_cpu_khz, |
289 | }; | 289 | }; |
290 | 290 | ||
291 | struct pv_irq_ops pv_irq_ops = { | 291 | struct pv_irq_ops pv_irq_ops = { |
292 | .init_IRQ = native_init_IRQ, | 292 | .init_IRQ = native_init_IRQ, |
293 | .save_fl = native_save_fl, | 293 | .save_fl = native_save_fl, |
294 | .restore_fl = native_restore_fl, | 294 | .restore_fl = native_restore_fl, |
295 | .irq_disable = native_irq_disable, | 295 | .irq_disable = native_irq_disable, |
296 | .irq_enable = native_irq_enable, | 296 | .irq_enable = native_irq_enable, |
297 | .safe_halt = native_safe_halt, | 297 | .safe_halt = native_safe_halt, |
298 | .halt = native_halt, | 298 | .halt = native_halt, |
299 | #ifdef CONFIG_X86_64 | 299 | #ifdef CONFIG_X86_64 |
300 | .adjust_exception_frame = paravirt_nop, | 300 | .adjust_exception_frame = paravirt_nop, |
301 | #endif | 301 | #endif |
302 | }; | 302 | }; |
303 | 303 | ||
304 | struct pv_cpu_ops pv_cpu_ops = { | 304 | struct pv_cpu_ops pv_cpu_ops = { |
305 | .cpuid = native_cpuid, | 305 | .cpuid = native_cpuid, |
306 | .get_debugreg = native_get_debugreg, | 306 | .get_debugreg = native_get_debugreg, |
307 | .set_debugreg = native_set_debugreg, | 307 | .set_debugreg = native_set_debugreg, |
308 | .clts = native_clts, | 308 | .clts = native_clts, |
309 | .read_cr0 = native_read_cr0, | 309 | .read_cr0 = native_read_cr0, |
310 | .write_cr0 = native_write_cr0, | 310 | .write_cr0 = native_write_cr0, |
311 | .read_cr4 = native_read_cr4, | 311 | .read_cr4 = native_read_cr4, |
312 | .read_cr4_safe = native_read_cr4_safe, | 312 | .read_cr4_safe = native_read_cr4_safe, |
313 | .write_cr4 = native_write_cr4, | 313 | .write_cr4 = native_write_cr4, |
314 | #ifdef CONFIG_X86_64 | 314 | #ifdef CONFIG_X86_64 |
315 | .read_cr8 = native_read_cr8, | 315 | .read_cr8 = native_read_cr8, |
316 | .write_cr8 = native_write_cr8, | 316 | .write_cr8 = native_write_cr8, |
317 | #endif | 317 | #endif |
318 | .wbinvd = native_wbinvd, | 318 | .wbinvd = native_wbinvd, |
319 | .read_msr = native_read_msr_safe, | 319 | .read_msr = native_read_msr_safe, |
320 | .write_msr = native_write_msr_safe, | 320 | .write_msr = native_write_msr_safe, |
321 | .read_tsc = native_read_tsc, | 321 | .read_tsc = native_read_tsc, |
322 | .read_pmc = native_read_pmc, | 322 | .read_pmc = native_read_pmc, |
323 | .read_tscp = native_read_tscp, | 323 | .read_tscp = native_read_tscp, |
324 | .load_tr_desc = native_load_tr_desc, | 324 | .load_tr_desc = native_load_tr_desc, |
325 | .set_ldt = native_set_ldt, | 325 | .set_ldt = native_set_ldt, |
326 | .load_gdt = native_load_gdt, | 326 | .load_gdt = native_load_gdt, |
327 | .load_idt = native_load_idt, | 327 | .load_idt = native_load_idt, |
328 | .store_gdt = native_store_gdt, | 328 | .store_gdt = native_store_gdt, |
329 | .store_idt = native_store_idt, | 329 | .store_idt = native_store_idt, |
330 | .store_tr = native_store_tr, | 330 | .store_tr = native_store_tr, |
331 | .load_tls = native_load_tls, | 331 | .load_tls = native_load_tls, |
332 | #ifdef CONFIG_X86_64 | ||
333 | .load_gs_index = native_load_gs_index, | ||
334 | #endif | ||
332 | .write_ldt_entry = native_write_ldt_entry, | 335 | .write_ldt_entry = native_write_ldt_entry, |
333 | .write_gdt_entry = native_write_gdt_entry, | 336 | .write_gdt_entry = native_write_gdt_entry, |
334 | .write_idt_entry = native_write_idt_entry, | 337 | .write_idt_entry = native_write_idt_entry, |
335 | .load_sp0 = native_load_sp0, | 338 | .load_sp0 = native_load_sp0, |
336 | 339 | ||
337 | .irq_enable_sysexit = native_irq_enable_sysexit, | 340 | .irq_enable_sysexit = native_irq_enable_sysexit, |
338 | #ifdef CONFIG_X86_64 | 341 | #ifdef CONFIG_X86_64 |
339 | .usergs_sysret32 = native_usergs_sysret32, | 342 | .usergs_sysret32 = native_usergs_sysret32, |
340 | .usergs_sysret64 = native_usergs_sysret64, | 343 | .usergs_sysret64 = native_usergs_sysret64, |
341 | #endif | 344 | #endif |
342 | .iret = native_iret, | 345 | .iret = native_iret, |
343 | .swapgs = native_swapgs, | 346 | .swapgs = native_swapgs, |
344 | 347 | ||
345 | .set_iopl_mask = native_set_iopl_mask, | 348 | .set_iopl_mask = native_set_iopl_mask, |
346 | .io_delay = native_io_delay, | 349 | .io_delay = native_io_delay, |
347 | 350 | ||
348 | .lazy_mode = { | 351 | .lazy_mode = { |
349 | .enter = paravirt_nop, | 352 | .enter = paravirt_nop, |
350 | .leave = paravirt_nop, | 353 | .leave = paravirt_nop, |
351 | }, | 354 | }, |
352 | }; | 355 | }; |
353 | 356 | ||
354 | struct pv_apic_ops pv_apic_ops = { | 357 | struct pv_apic_ops pv_apic_ops = { |
355 | #ifdef CONFIG_X86_LOCAL_APIC | 358 | #ifdef CONFIG_X86_LOCAL_APIC |
356 | .apic_write = native_apic_write, | 359 | .apic_write = native_apic_write, |
357 | .apic_write_atomic = native_apic_write_atomic, | 360 | .apic_write_atomic = native_apic_write_atomic, |
358 | .apic_read = native_apic_read, | 361 | .apic_read = native_apic_read, |
359 | .setup_boot_clock = setup_boot_APIC_clock, | 362 | .setup_boot_clock = setup_boot_APIC_clock, |
360 | .setup_secondary_clock = setup_secondary_APIC_clock, | 363 | .setup_secondary_clock = setup_secondary_APIC_clock, |
361 | .startup_ipi_hook = paravirt_nop, | 364 | .startup_ipi_hook = paravirt_nop, |
362 | #endif | 365 | #endif |
363 | }; | 366 | }; |
364 | 367 | ||
365 | struct pv_mmu_ops pv_mmu_ops = { | 368 | struct pv_mmu_ops pv_mmu_ops = { |
366 | #ifndef CONFIG_X86_64 | 369 | #ifndef CONFIG_X86_64 |
367 | .pagetable_setup_start = native_pagetable_setup_start, | 370 | .pagetable_setup_start = native_pagetable_setup_start, |
368 | .pagetable_setup_done = native_pagetable_setup_done, | 371 | .pagetable_setup_done = native_pagetable_setup_done, |
369 | #endif | 372 | #endif |
370 | 373 | ||
371 | .read_cr2 = native_read_cr2, | 374 | .read_cr2 = native_read_cr2, |
372 | .write_cr2 = native_write_cr2, | 375 | .write_cr2 = native_write_cr2, |
373 | .read_cr3 = native_read_cr3, | 376 | .read_cr3 = native_read_cr3, |
374 | .write_cr3 = native_write_cr3, | 377 | .write_cr3 = native_write_cr3, |
375 | 378 | ||
376 | .flush_tlb_user = native_flush_tlb, | 379 | .flush_tlb_user = native_flush_tlb, |
377 | .flush_tlb_kernel = native_flush_tlb_global, | 380 | .flush_tlb_kernel = native_flush_tlb_global, |
378 | .flush_tlb_single = native_flush_tlb_single, | 381 | .flush_tlb_single = native_flush_tlb_single, |
379 | .flush_tlb_others = native_flush_tlb_others, | 382 | .flush_tlb_others = native_flush_tlb_others, |
380 | 383 | ||
381 | .pgd_alloc = __paravirt_pgd_alloc, | 384 | .pgd_alloc = __paravirt_pgd_alloc, |
382 | .pgd_free = paravirt_nop, | 385 | .pgd_free = paravirt_nop, |
383 | 386 | ||
384 | .alloc_pte = paravirt_nop, | 387 | .alloc_pte = paravirt_nop, |
385 | .alloc_pmd = paravirt_nop, | 388 | .alloc_pmd = paravirt_nop, |
386 | .alloc_pmd_clone = paravirt_nop, | 389 | .alloc_pmd_clone = paravirt_nop, |
387 | .alloc_pud = paravirt_nop, | 390 | .alloc_pud = paravirt_nop, |
388 | .release_pte = paravirt_nop, | 391 | .release_pte = paravirt_nop, |
389 | .release_pmd = paravirt_nop, | 392 | .release_pmd = paravirt_nop, |
390 | .release_pud = paravirt_nop, | 393 | .release_pud = paravirt_nop, |
391 | 394 | ||
392 | .set_pte = native_set_pte, | 395 | .set_pte = native_set_pte, |
393 | .set_pte_at = native_set_pte_at, | 396 | .set_pte_at = native_set_pte_at, |
394 | .set_pmd = native_set_pmd, | 397 | .set_pmd = native_set_pmd, |
395 | .pte_update = paravirt_nop, | 398 | .pte_update = paravirt_nop, |
396 | .pte_update_defer = paravirt_nop, | 399 | .pte_update_defer = paravirt_nop, |
397 | 400 | ||
398 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 401 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
399 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 402 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
400 | 403 | ||
401 | #ifdef CONFIG_HIGHPTE | 404 | #ifdef CONFIG_HIGHPTE |
402 | .kmap_atomic_pte = kmap_atomic, | 405 | .kmap_atomic_pte = kmap_atomic, |
403 | #endif | 406 | #endif |
404 | 407 | ||
405 | #if PAGETABLE_LEVELS >= 3 | 408 | #if PAGETABLE_LEVELS >= 3 |
406 | #ifdef CONFIG_X86_PAE | 409 | #ifdef CONFIG_X86_PAE |
407 | .set_pte_atomic = native_set_pte_atomic, | 410 | .set_pte_atomic = native_set_pte_atomic, |
408 | .set_pte_present = native_set_pte_present, | 411 | .set_pte_present = native_set_pte_present, |
409 | .pte_clear = native_pte_clear, | 412 | .pte_clear = native_pte_clear, |
410 | .pmd_clear = native_pmd_clear, | 413 | .pmd_clear = native_pmd_clear, |
411 | #endif | 414 | #endif |
412 | .set_pud = native_set_pud, | 415 | .set_pud = native_set_pud, |
413 | .pmd_val = native_pmd_val, | 416 | .pmd_val = native_pmd_val, |
414 | .make_pmd = native_make_pmd, | 417 | .make_pmd = native_make_pmd, |
415 | 418 | ||
416 | #if PAGETABLE_LEVELS == 4 | 419 | #if PAGETABLE_LEVELS == 4 |
417 | .pud_val = native_pud_val, | 420 | .pud_val = native_pud_val, |
418 | .make_pud = native_make_pud, | 421 | .make_pud = native_make_pud, |
419 | .set_pgd = native_set_pgd, | 422 | .set_pgd = native_set_pgd, |
420 | #endif | 423 | #endif |
421 | #endif /* PAGETABLE_LEVELS >= 3 */ | 424 | #endif /* PAGETABLE_LEVELS >= 3 */ |
422 | 425 | ||
423 | .pte_val = native_pte_val, | 426 | .pte_val = native_pte_val, |
424 | .pte_flags = native_pte_val, | 427 | .pte_flags = native_pte_val, |
425 | .pgd_val = native_pgd_val, | 428 | .pgd_val = native_pgd_val, |
426 | 429 | ||
427 | .make_pte = native_make_pte, | 430 | .make_pte = native_make_pte, |
428 | .make_pgd = native_make_pgd, | 431 | .make_pgd = native_make_pgd, |
429 | 432 | ||
430 | .dup_mmap = paravirt_nop, | 433 | .dup_mmap = paravirt_nop, |
431 | .exit_mmap = paravirt_nop, | 434 | .exit_mmap = paravirt_nop, |
432 | .activate_mm = paravirt_nop, | 435 | .activate_mm = paravirt_nop, |
433 | 436 | ||
434 | .lazy_mode = { | 437 | .lazy_mode = { |
435 | .enter = paravirt_nop, | 438 | .enter = paravirt_nop, |
436 | .leave = paravirt_nop, | 439 | .leave = paravirt_nop, |
437 | }, | 440 | }, |
438 | 441 | ||
439 | .set_fixmap = native_set_fixmap, | 442 | .set_fixmap = native_set_fixmap, |
440 | }; | 443 | }; |
441 | 444 | ||
442 | EXPORT_SYMBOL_GPL(pv_time_ops); | 445 | EXPORT_SYMBOL_GPL(pv_time_ops); |
443 | EXPORT_SYMBOL (pv_cpu_ops); | 446 | EXPORT_SYMBOL (pv_cpu_ops); |
444 | EXPORT_SYMBOL (pv_mmu_ops); | 447 | EXPORT_SYMBOL (pv_mmu_ops); |
445 | EXPORT_SYMBOL_GPL(pv_apic_ops); | 448 | EXPORT_SYMBOL_GPL(pv_apic_ops); |
446 | EXPORT_SYMBOL_GPL(pv_info); | 449 | EXPORT_SYMBOL_GPL(pv_info); |
447 | EXPORT_SYMBOL (pv_irq_ops); | 450 | EXPORT_SYMBOL (pv_irq_ops); |
448 | 451 |
include/asm-x86/elf.h
1 | #ifndef _ASM_X86_ELF_H | 1 | #ifndef _ASM_X86_ELF_H |
2 | #define _ASM_X86_ELF_H | 2 | #define _ASM_X86_ELF_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * ELF register definitions.. | 5 | * ELF register definitions.. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <asm/ptrace.h> | 8 | #include <asm/ptrace.h> |
9 | #include <asm/user.h> | 9 | #include <asm/user.h> |
10 | #include <asm/auxvec.h> | 10 | #include <asm/auxvec.h> |
11 | 11 | ||
12 | typedef unsigned long elf_greg_t; | 12 | typedef unsigned long elf_greg_t; |
13 | 13 | ||
14 | #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) | 14 | #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) |
15 | typedef elf_greg_t elf_gregset_t[ELF_NGREG]; | 15 | typedef elf_greg_t elf_gregset_t[ELF_NGREG]; |
16 | 16 | ||
17 | typedef struct user_i387_struct elf_fpregset_t; | 17 | typedef struct user_i387_struct elf_fpregset_t; |
18 | 18 | ||
19 | #ifdef __i386__ | 19 | #ifdef __i386__ |
20 | 20 | ||
21 | typedef struct user_fxsr_struct elf_fpxregset_t; | 21 | typedef struct user_fxsr_struct elf_fpxregset_t; |
22 | 22 | ||
23 | #define R_386_NONE 0 | 23 | #define R_386_NONE 0 |
24 | #define R_386_32 1 | 24 | #define R_386_32 1 |
25 | #define R_386_PC32 2 | 25 | #define R_386_PC32 2 |
26 | #define R_386_GOT32 3 | 26 | #define R_386_GOT32 3 |
27 | #define R_386_PLT32 4 | 27 | #define R_386_PLT32 4 |
28 | #define R_386_COPY 5 | 28 | #define R_386_COPY 5 |
29 | #define R_386_GLOB_DAT 6 | 29 | #define R_386_GLOB_DAT 6 |
30 | #define R_386_JMP_SLOT 7 | 30 | #define R_386_JMP_SLOT 7 |
31 | #define R_386_RELATIVE 8 | 31 | #define R_386_RELATIVE 8 |
32 | #define R_386_GOTOFF 9 | 32 | #define R_386_GOTOFF 9 |
33 | #define R_386_GOTPC 10 | 33 | #define R_386_GOTPC 10 |
34 | #define R_386_NUM 11 | 34 | #define R_386_NUM 11 |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * These are used to set parameters in the core dumps. | 37 | * These are used to set parameters in the core dumps. |
38 | */ | 38 | */ |
39 | #define ELF_CLASS ELFCLASS32 | 39 | #define ELF_CLASS ELFCLASS32 |
40 | #define ELF_DATA ELFDATA2LSB | 40 | #define ELF_DATA ELFDATA2LSB |
41 | #define ELF_ARCH EM_386 | 41 | #define ELF_ARCH EM_386 |
42 | 42 | ||
43 | #else | 43 | #else |
44 | 44 | ||
45 | /* x86-64 relocation types */ | 45 | /* x86-64 relocation types */ |
46 | #define R_X86_64_NONE 0 /* No reloc */ | 46 | #define R_X86_64_NONE 0 /* No reloc */ |
47 | #define R_X86_64_64 1 /* Direct 64 bit */ | 47 | #define R_X86_64_64 1 /* Direct 64 bit */ |
48 | #define R_X86_64_PC32 2 /* PC relative 32 bit signed */ | 48 | #define R_X86_64_PC32 2 /* PC relative 32 bit signed */ |
49 | #define R_X86_64_GOT32 3 /* 32 bit GOT entry */ | 49 | #define R_X86_64_GOT32 3 /* 32 bit GOT entry */ |
50 | #define R_X86_64_PLT32 4 /* 32 bit PLT address */ | 50 | #define R_X86_64_PLT32 4 /* 32 bit PLT address */ |
51 | #define R_X86_64_COPY 5 /* Copy symbol at runtime */ | 51 | #define R_X86_64_COPY 5 /* Copy symbol at runtime */ |
52 | #define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ | 52 | #define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ |
53 | #define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ | 53 | #define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ |
54 | #define R_X86_64_RELATIVE 8 /* Adjust by program base */ | 54 | #define R_X86_64_RELATIVE 8 /* Adjust by program base */ |
55 | #define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative | 55 | #define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative |
56 | offset to GOT */ | 56 | offset to GOT */ |
57 | #define R_X86_64_32 10 /* Direct 32 bit zero extended */ | 57 | #define R_X86_64_32 10 /* Direct 32 bit zero extended */ |
58 | #define R_X86_64_32S 11 /* Direct 32 bit sign extended */ | 58 | #define R_X86_64_32S 11 /* Direct 32 bit sign extended */ |
59 | #define R_X86_64_16 12 /* Direct 16 bit zero extended */ | 59 | #define R_X86_64_16 12 /* Direct 16 bit zero extended */ |
60 | #define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ | 60 | #define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ |
61 | #define R_X86_64_8 14 /* Direct 8 bit sign extended */ | 61 | #define R_X86_64_8 14 /* Direct 8 bit sign extended */ |
62 | #define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ | 62 | #define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ |
63 | 63 | ||
64 | #define R_X86_64_NUM 16 | 64 | #define R_X86_64_NUM 16 |
65 | 65 | ||
66 | /* | 66 | /* |
67 | * These are used to set parameters in the core dumps. | 67 | * These are used to set parameters in the core dumps. |
68 | */ | 68 | */ |
69 | #define ELF_CLASS ELFCLASS64 | 69 | #define ELF_CLASS ELFCLASS64 |
70 | #define ELF_DATA ELFDATA2LSB | 70 | #define ELF_DATA ELFDATA2LSB |
71 | #define ELF_ARCH EM_X86_64 | 71 | #define ELF_ARCH EM_X86_64 |
72 | 72 | ||
73 | #endif | 73 | #endif |
74 | 74 | ||
75 | #include <asm/vdso.h> | 75 | #include <asm/vdso.h> |
76 | 76 | ||
77 | extern unsigned int vdso_enabled; | 77 | extern unsigned int vdso_enabled; |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * This is used to ensure we don't load something for the wrong architecture. | 80 | * This is used to ensure we don't load something for the wrong architecture. |
81 | */ | 81 | */ |
82 | #define elf_check_arch_ia32(x) \ | 82 | #define elf_check_arch_ia32(x) \ |
83 | (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) | 83 | (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) |
84 | 84 | ||
85 | #include <asm/processor.h> | 85 | #include <asm/processor.h> |
86 | #include <asm/system.h> | ||
86 | 87 | ||
87 | #ifdef CONFIG_X86_32 | 88 | #ifdef CONFIG_X86_32 |
88 | #include <asm/system.h> /* for savesegment */ | ||
89 | #include <asm/desc.h> | 89 | #include <asm/desc.h> |
90 | 90 | ||
91 | #define elf_check_arch(x) elf_check_arch_ia32(x) | 91 | #define elf_check_arch(x) elf_check_arch_ia32(x) |
92 | 92 | ||
93 | /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx | 93 | /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx |
94 | contains a pointer to a function which might be registered using `atexit'. | 94 | contains a pointer to a function which might be registered using `atexit'. |
95 | This provides a mean for the dynamic linker to call DT_FINI functions for | 95 | This provides a mean for the dynamic linker to call DT_FINI functions for |
96 | shared libraries that have been loaded before the code runs. | 96 | shared libraries that have been loaded before the code runs. |
97 | 97 | ||
98 | A value of 0 tells we have no such handler. | 98 | A value of 0 tells we have no such handler. |
99 | 99 | ||
100 | We might as well make sure everything else is cleared too (except for %esp), | 100 | We might as well make sure everything else is cleared too (except for %esp), |
101 | just to make things more deterministic. | 101 | just to make things more deterministic. |
102 | */ | 102 | */ |
103 | #define ELF_PLAT_INIT(_r, load_addr) \ | 103 | #define ELF_PLAT_INIT(_r, load_addr) \ |
104 | do { \ | 104 | do { \ |
105 | _r->bx = 0; _r->cx = 0; _r->dx = 0; \ | 105 | _r->bx = 0; _r->cx = 0; _r->dx = 0; \ |
106 | _r->si = 0; _r->di = 0; _r->bp = 0; \ | 106 | _r->si = 0; _r->di = 0; _r->bp = 0; \ |
107 | _r->ax = 0; \ | 107 | _r->ax = 0; \ |
108 | } while (0) | 108 | } while (0) |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * regs is struct pt_regs, pr_reg is elf_gregset_t (which is | 111 | * regs is struct pt_regs, pr_reg is elf_gregset_t (which is |
112 | * now struct_user_regs, they are different) | 112 | * now struct_user_regs, they are different) |
113 | */ | 113 | */ |
114 | 114 | ||
115 | #define ELF_CORE_COPY_REGS(pr_reg, regs) \ | 115 | #define ELF_CORE_COPY_REGS(pr_reg, regs) \ |
116 | do { \ | 116 | do { \ |
117 | pr_reg[0] = regs->bx; \ | 117 | pr_reg[0] = regs->bx; \ |
118 | pr_reg[1] = regs->cx; \ | 118 | pr_reg[1] = regs->cx; \ |
119 | pr_reg[2] = regs->dx; \ | 119 | pr_reg[2] = regs->dx; \ |
120 | pr_reg[3] = regs->si; \ | 120 | pr_reg[3] = regs->si; \ |
121 | pr_reg[4] = regs->di; \ | 121 | pr_reg[4] = regs->di; \ |
122 | pr_reg[5] = regs->bp; \ | 122 | pr_reg[5] = regs->bp; \ |
123 | pr_reg[6] = regs->ax; \ | 123 | pr_reg[6] = regs->ax; \ |
124 | pr_reg[7] = regs->ds & 0xffff; \ | 124 | pr_reg[7] = regs->ds & 0xffff; \ |
125 | pr_reg[8] = regs->es & 0xffff; \ | 125 | pr_reg[8] = regs->es & 0xffff; \ |
126 | pr_reg[9] = regs->fs & 0xffff; \ | 126 | pr_reg[9] = regs->fs & 0xffff; \ |
127 | savesegment(gs, pr_reg[10]); \ | 127 | savesegment(gs, pr_reg[10]); \ |
128 | pr_reg[11] = regs->orig_ax; \ | 128 | pr_reg[11] = regs->orig_ax; \ |
129 | pr_reg[12] = regs->ip; \ | 129 | pr_reg[12] = regs->ip; \ |
130 | pr_reg[13] = regs->cs & 0xffff; \ | 130 | pr_reg[13] = regs->cs & 0xffff; \ |
131 | pr_reg[14] = regs->flags; \ | 131 | pr_reg[14] = regs->flags; \ |
132 | pr_reg[15] = regs->sp; \ | 132 | pr_reg[15] = regs->sp; \ |
133 | pr_reg[16] = regs->ss & 0xffff; \ | 133 | pr_reg[16] = regs->ss & 0xffff; \ |
134 | } while (0); | 134 | } while (0); |
135 | 135 | ||
136 | #define ELF_PLATFORM (utsname()->machine) | 136 | #define ELF_PLATFORM (utsname()->machine) |
137 | #define set_personality_64bit() do { } while (0) | 137 | #define set_personality_64bit() do { } while (0) |
138 | 138 | ||
139 | #else /* CONFIG_X86_32 */ | 139 | #else /* CONFIG_X86_32 */ |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * This is used to ensure we don't load something for the wrong architecture. | 142 | * This is used to ensure we don't load something for the wrong architecture. |
143 | */ | 143 | */ |
144 | #define elf_check_arch(x) \ | 144 | #define elf_check_arch(x) \ |
145 | ((x)->e_machine == EM_X86_64) | 145 | ((x)->e_machine == EM_X86_64) |
146 | 146 | ||
147 | #define compat_elf_check_arch(x) elf_check_arch_ia32(x) | 147 | #define compat_elf_check_arch(x) elf_check_arch_ia32(x) |
148 | 148 | ||
149 | static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp) | 149 | static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp) |
150 | { | 150 | { |
151 | asm volatile("movl %0,%%fs" :: "r" (0)); | 151 | asm volatile("movl %0,%%fs" :: "r" (0)); |
152 | asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS)); | 152 | asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS)); |
153 | load_gs_index(0); | 153 | load_gs_index(0); |
154 | regs->ip = ip; | 154 | regs->ip = ip; |
155 | regs->sp = sp; | 155 | regs->sp = sp; |
156 | regs->flags = X86_EFLAGS_IF; | 156 | regs->flags = X86_EFLAGS_IF; |
157 | regs->cs = __USER32_CS; | 157 | regs->cs = __USER32_CS; |
158 | regs->ss = __USER32_DS; | 158 | regs->ss = __USER32_DS; |
159 | } | 159 | } |
160 | 160 | ||
161 | static inline void elf_common_init(struct thread_struct *t, | 161 | static inline void elf_common_init(struct thread_struct *t, |
162 | struct pt_regs *regs, const u16 ds) | 162 | struct pt_regs *regs, const u16 ds) |
163 | { | 163 | { |
164 | regs->ax = regs->bx = regs->cx = regs->dx = 0; | 164 | regs->ax = regs->bx = regs->cx = regs->dx = 0; |
165 | regs->si = regs->di = regs->bp = 0; | 165 | regs->si = regs->di = regs->bp = 0; |
166 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; | 166 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; |
167 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; | 167 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; |
168 | t->fs = t->gs = 0; | 168 | t->fs = t->gs = 0; |
169 | t->fsindex = t->gsindex = 0; | 169 | t->fsindex = t->gsindex = 0; |
170 | t->ds = t->es = ds; | 170 | t->ds = t->es = ds; |
171 | } | 171 | } |
172 | 172 | ||
173 | #define ELF_PLAT_INIT(_r, load_addr) \ | 173 | #define ELF_PLAT_INIT(_r, load_addr) \ |
174 | do { \ | 174 | do { \ |
175 | elf_common_init(¤t->thread, _r, 0); \ | 175 | elf_common_init(¤t->thread, _r, 0); \ |
176 | clear_thread_flag(TIF_IA32); \ | 176 | clear_thread_flag(TIF_IA32); \ |
177 | } while (0) | 177 | } while (0) |
178 | 178 | ||
179 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ | 179 | #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ |
180 | elf_common_init(¤t->thread, regs, __USER_DS) | 180 | elf_common_init(¤t->thread, regs, __USER_DS) |
181 | 181 | ||
182 | #define compat_start_thread(regs, ip, sp) \ | 182 | #define compat_start_thread(regs, ip, sp) \ |
183 | do { \ | 183 | do { \ |
184 | start_ia32_thread(regs, ip, sp); \ | 184 | start_ia32_thread(regs, ip, sp); \ |
185 | set_fs(USER_DS); \ | 185 | set_fs(USER_DS); \ |
186 | } while (0) | 186 | } while (0) |
187 | 187 | ||
188 | #define COMPAT_SET_PERSONALITY(ex, ibcs2) \ | 188 | #define COMPAT_SET_PERSONALITY(ex, ibcs2) \ |
189 | do { \ | 189 | do { \ |
190 | if (test_thread_flag(TIF_IA32)) \ | 190 | if (test_thread_flag(TIF_IA32)) \ |
191 | clear_thread_flag(TIF_ABI_PENDING); \ | 191 | clear_thread_flag(TIF_ABI_PENDING); \ |
192 | else \ | 192 | else \ |
193 | set_thread_flag(TIF_ABI_PENDING); \ | 193 | set_thread_flag(TIF_ABI_PENDING); \ |
194 | current->personality |= force_personality32; \ | 194 | current->personality |= force_personality32; \ |
195 | } while (0) | 195 | } while (0) |
196 | 196 | ||
197 | #define COMPAT_ELF_PLATFORM ("i686") | 197 | #define COMPAT_ELF_PLATFORM ("i686") |
198 | 198 | ||
199 | /* | 199 | /* |
200 | * regs is struct pt_regs, pr_reg is elf_gregset_t (which is | 200 | * regs is struct pt_regs, pr_reg is elf_gregset_t (which is |
201 | * now struct_user_regs, they are different). Assumes current is the process | 201 | * now struct_user_regs, they are different). Assumes current is the process |
202 | * getting dumped. | 202 | * getting dumped. |
203 | */ | 203 | */ |
204 | 204 | ||
205 | #define ELF_CORE_COPY_REGS(pr_reg, regs) \ | 205 | #define ELF_CORE_COPY_REGS(pr_reg, regs) \ |
206 | do { \ | 206 | do { \ |
207 | unsigned v; \ | 207 | unsigned v; \ |
208 | (pr_reg)[0] = (regs)->r15; \ | 208 | (pr_reg)[0] = (regs)->r15; \ |
209 | (pr_reg)[1] = (regs)->r14; \ | 209 | (pr_reg)[1] = (regs)->r14; \ |
210 | (pr_reg)[2] = (regs)->r13; \ | 210 | (pr_reg)[2] = (regs)->r13; \ |
211 | (pr_reg)[3] = (regs)->r12; \ | 211 | (pr_reg)[3] = (regs)->r12; \ |
212 | (pr_reg)[4] = (regs)->bp; \ | 212 | (pr_reg)[4] = (regs)->bp; \ |
213 | (pr_reg)[5] = (regs)->bx; \ | 213 | (pr_reg)[5] = (regs)->bx; \ |
214 | (pr_reg)[6] = (regs)->r11; \ | 214 | (pr_reg)[6] = (regs)->r11; \ |
215 | (pr_reg)[7] = (regs)->r10; \ | 215 | (pr_reg)[7] = (regs)->r10; \ |
216 | (pr_reg)[8] = (regs)->r9; \ | 216 | (pr_reg)[8] = (regs)->r9; \ |
217 | (pr_reg)[9] = (regs)->r8; \ | 217 | (pr_reg)[9] = (regs)->r8; \ |
218 | (pr_reg)[10] = (regs)->ax; \ | 218 | (pr_reg)[10] = (regs)->ax; \ |
219 | (pr_reg)[11] = (regs)->cx; \ | 219 | (pr_reg)[11] = (regs)->cx; \ |
220 | (pr_reg)[12] = (regs)->dx; \ | 220 | (pr_reg)[12] = (regs)->dx; \ |
221 | (pr_reg)[13] = (regs)->si; \ | 221 | (pr_reg)[13] = (regs)->si; \ |
222 | (pr_reg)[14] = (regs)->di; \ | 222 | (pr_reg)[14] = (regs)->di; \ |
223 | (pr_reg)[15] = (regs)->orig_ax; \ | 223 | (pr_reg)[15] = (regs)->orig_ax; \ |
224 | (pr_reg)[16] = (regs)->ip; \ | 224 | (pr_reg)[16] = (regs)->ip; \ |
225 | (pr_reg)[17] = (regs)->cs; \ | 225 | (pr_reg)[17] = (regs)->cs; \ |
226 | (pr_reg)[18] = (regs)->flags; \ | 226 | (pr_reg)[18] = (regs)->flags; \ |
227 | (pr_reg)[19] = (regs)->sp; \ | 227 | (pr_reg)[19] = (regs)->sp; \ |
228 | (pr_reg)[20] = (regs)->ss; \ | 228 | (pr_reg)[20] = (regs)->ss; \ |
229 | (pr_reg)[21] = current->thread.fs; \ | 229 | (pr_reg)[21] = current->thread.fs; \ |
230 | (pr_reg)[22] = current->thread.gs; \ | 230 | (pr_reg)[22] = current->thread.gs; \ |
231 | asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ | 231 | asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ |
232 | asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ | 232 | asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ |
233 | asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ | 233 | asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ |
234 | asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \ | 234 | asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \ |
235 | } while (0); | 235 | } while (0); |
236 | 236 | ||
237 | /* I'm not sure if we can use '-' here */ | 237 | /* I'm not sure if we can use '-' here */ |
238 | #define ELF_PLATFORM ("x86_64") | 238 | #define ELF_PLATFORM ("x86_64") |
239 | extern void set_personality_64bit(void); | 239 | extern void set_personality_64bit(void); |
240 | extern unsigned int sysctl_vsyscall32; | 240 | extern unsigned int sysctl_vsyscall32; |
241 | extern int force_personality32; | 241 | extern int force_personality32; |
242 | 242 | ||
243 | #endif /* !CONFIG_X86_32 */ | 243 | #endif /* !CONFIG_X86_32 */ |
244 | 244 | ||
245 | #define CORE_DUMP_USE_REGSET | 245 | #define CORE_DUMP_USE_REGSET |
246 | #define USE_ELF_CORE_DUMP | 246 | #define USE_ELF_CORE_DUMP |
247 | #define ELF_EXEC_PAGESIZE 4096 | 247 | #define ELF_EXEC_PAGESIZE 4096 |
248 | 248 | ||
249 | /* This is the location that an ET_DYN program is loaded if exec'ed. Typical | 249 | /* This is the location that an ET_DYN program is loaded if exec'ed. Typical |
250 | use of this is to invoke "./ld.so someprog" to test out a new version of | 250 | use of this is to invoke "./ld.so someprog" to test out a new version of |
251 | the loader. We need to make sure that it is out of the way of the program | 251 | the loader. We need to make sure that it is out of the way of the program |
252 | that it will "exec", and that there is sufficient room for the brk. */ | 252 | that it will "exec", and that there is sufficient room for the brk. */ |
253 | 253 | ||
254 | #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) | 254 | #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) |
255 | 255 | ||
256 | /* This yields a mask that user programs can use to figure out what | 256 | /* This yields a mask that user programs can use to figure out what |
257 | instruction set this CPU supports. This could be done in user space, | 257 | instruction set this CPU supports. This could be done in user space, |
258 | but it's not easy, and we've already done it here. */ | 258 | but it's not easy, and we've already done it here. */ |
259 | 259 | ||
260 | #define ELF_HWCAP (boot_cpu_data.x86_capability[0]) | 260 | #define ELF_HWCAP (boot_cpu_data.x86_capability[0]) |
261 | 261 | ||
262 | /* This yields a string that ld.so will use to load implementation | 262 | /* This yields a string that ld.so will use to load implementation |
263 | specific libraries for optimization. This is more specific in | 263 | specific libraries for optimization. This is more specific in |
264 | intent than poking at uname or /proc/cpuinfo. | 264 | intent than poking at uname or /proc/cpuinfo. |
265 | 265 | ||
266 | For the moment, we have only optimizations for the Intel generations, | 266 | For the moment, we have only optimizations for the Intel generations, |
267 | but that could change... */ | 267 | but that could change... */ |
268 | 268 | ||
269 | #define SET_PERSONALITY(ex, ibcs2) set_personality_64bit() | 269 | #define SET_PERSONALITY(ex, ibcs2) set_personality_64bit() |
270 | 270 | ||
271 | /* | 271 | /* |
272 | * An executable for which elf_read_implies_exec() returns TRUE will | 272 | * An executable for which elf_read_implies_exec() returns TRUE will |
273 | * have the READ_IMPLIES_EXEC personality flag set automatically. | 273 | * have the READ_IMPLIES_EXEC personality flag set automatically. |
274 | */ | 274 | */ |
275 | #define elf_read_implies_exec(ex, executable_stack) \ | 275 | #define elf_read_implies_exec(ex, executable_stack) \ |
276 | (executable_stack != EXSTACK_DISABLE_X) | 276 | (executable_stack != EXSTACK_DISABLE_X) |
277 | 277 | ||
278 | struct task_struct; | 278 | struct task_struct; |
279 | 279 | ||
280 | #define ARCH_DLINFO_IA32(vdso_enabled) \ | 280 | #define ARCH_DLINFO_IA32(vdso_enabled) \ |
281 | do { \ | 281 | do { \ |
282 | if (vdso_enabled) { \ | 282 | if (vdso_enabled) { \ |
283 | NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ | 283 | NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ |
284 | NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ | 284 | NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ |
285 | } \ | 285 | } \ |
286 | } while (0) | 286 | } while (0) |
287 | 287 | ||
288 | #ifdef CONFIG_X86_32 | 288 | #ifdef CONFIG_X86_32 |
289 | 289 | ||
290 | #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) | 290 | #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) |
291 | 291 | ||
292 | #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) | 292 | #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) |
293 | 293 | ||
294 | /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ | 294 | /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ |
295 | 295 | ||
296 | #else /* CONFIG_X86_32 */ | 296 | #else /* CONFIG_X86_32 */ |
297 | 297 | ||
298 | #define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ | 298 | #define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ |
299 | 299 | ||
300 | /* 1GB for 64bit, 8MB for 32bit */ | 300 | /* 1GB for 64bit, 8MB for 32bit */ |
301 | #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) | 301 | #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) |
302 | 302 | ||
303 | #define ARCH_DLINFO \ | 303 | #define ARCH_DLINFO \ |
304 | do { \ | 304 | do { \ |
305 | if (vdso_enabled) \ | 305 | if (vdso_enabled) \ |
306 | NEW_AUX_ENT(AT_SYSINFO_EHDR, \ | 306 | NEW_AUX_ENT(AT_SYSINFO_EHDR, \ |
307 | (unsigned long)current->mm->context.vdso); \ | 307 | (unsigned long)current->mm->context.vdso); \ |
308 | } while (0) | 308 | } while (0) |
309 | 309 | ||
310 | #define AT_SYSINFO 32 | 310 | #define AT_SYSINFO 32 |
311 | 311 | ||
312 | #define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) | 312 | #define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) |
313 | 313 | ||
314 | #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) | 314 | #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) |
315 | 315 | ||
316 | #endif /* !CONFIG_X86_32 */ | 316 | #endif /* !CONFIG_X86_32 */ |
317 | 317 | ||
318 | #define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) | 318 | #define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) |
319 | 319 | ||
320 | #define VDSO_ENTRY \ | 320 | #define VDSO_ENTRY \ |
321 | ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) | 321 | ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) |
322 | 322 | ||
323 | struct linux_binprm; | 323 | struct linux_binprm; |
324 | 324 | ||
325 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 | 325 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 |
326 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, | 326 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, |
327 | int executable_stack); | 327 | int executable_stack); |
328 | 328 | ||
329 | extern int syscall32_setup_pages(struct linux_binprm *, int exstack); | 329 | extern int syscall32_setup_pages(struct linux_binprm *, int exstack); |
330 | #define compat_arch_setup_additional_pages syscall32_setup_pages | 330 | #define compat_arch_setup_additional_pages syscall32_setup_pages |
331 | 331 | ||
332 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); | 332 | extern unsigned long arch_randomize_brk(struct mm_struct *mm); |
333 | #define arch_randomize_brk arch_randomize_brk | 333 | #define arch_randomize_brk arch_randomize_brk |
334 | 334 | ||
335 | #endif | 335 | #endif |
include/asm-x86/paravirt.h
1 | #ifndef __ASM_PARAVIRT_H | 1 | #ifndef __ASM_PARAVIRT_H |
2 | #define __ASM_PARAVIRT_H | 2 | #define __ASM_PARAVIRT_H |
3 | /* Various instructions on x86 need to be replaced for | 3 | /* Various instructions on x86 need to be replaced for |
4 | * para-virtualization: those hooks are defined here. */ | 4 | * para-virtualization: those hooks are defined here. */ |
5 | 5 | ||
6 | #ifdef CONFIG_PARAVIRT | 6 | #ifdef CONFIG_PARAVIRT |
7 | #include <asm/page.h> | 7 | #include <asm/page.h> |
8 | #include <asm/asm.h> | 8 | #include <asm/asm.h> |
9 | 9 | ||
10 | /* Bitmask of what can be clobbered: usually at least eax. */ | 10 | /* Bitmask of what can be clobbered: usually at least eax. */ |
11 | #define CLBR_NONE 0 | 11 | #define CLBR_NONE 0 |
12 | #define CLBR_EAX (1 << 0) | 12 | #define CLBR_EAX (1 << 0) |
13 | #define CLBR_ECX (1 << 1) | 13 | #define CLBR_ECX (1 << 1) |
14 | #define CLBR_EDX (1 << 2) | 14 | #define CLBR_EDX (1 << 2) |
15 | 15 | ||
16 | #ifdef CONFIG_X86_64 | 16 | #ifdef CONFIG_X86_64 |
17 | #define CLBR_RSI (1 << 3) | 17 | #define CLBR_RSI (1 << 3) |
18 | #define CLBR_RDI (1 << 4) | 18 | #define CLBR_RDI (1 << 4) |
19 | #define CLBR_R8 (1 << 5) | 19 | #define CLBR_R8 (1 << 5) |
20 | #define CLBR_R9 (1 << 6) | 20 | #define CLBR_R9 (1 << 6) |
21 | #define CLBR_R10 (1 << 7) | 21 | #define CLBR_R10 (1 << 7) |
22 | #define CLBR_R11 (1 << 8) | 22 | #define CLBR_R11 (1 << 8) |
23 | #define CLBR_ANY ((1 << 9) - 1) | 23 | #define CLBR_ANY ((1 << 9) - 1) |
24 | #include <asm/desc_defs.h> | 24 | #include <asm/desc_defs.h> |
25 | #else | 25 | #else |
26 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ | 26 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ |
27 | #define CLBR_ANY ((1 << 3) - 1) | 27 | #define CLBR_ANY ((1 << 3) - 1) |
28 | #endif /* X86_64 */ | 28 | #endif /* X86_64 */ |
29 | 29 | ||
30 | #ifndef __ASSEMBLY__ | 30 | #ifndef __ASSEMBLY__ |
31 | #include <linux/types.h> | 31 | #include <linux/types.h> |
32 | #include <linux/cpumask.h> | 32 | #include <linux/cpumask.h> |
33 | #include <asm/kmap_types.h> | 33 | #include <asm/kmap_types.h> |
34 | #include <asm/desc_defs.h> | 34 | #include <asm/desc_defs.h> |
35 | 35 | ||
36 | struct page; | 36 | struct page; |
37 | struct thread_struct; | 37 | struct thread_struct; |
38 | struct desc_ptr; | 38 | struct desc_ptr; |
39 | struct tss_struct; | 39 | struct tss_struct; |
40 | struct mm_struct; | 40 | struct mm_struct; |
41 | struct desc_struct; | 41 | struct desc_struct; |
42 | 42 | ||
43 | /* general info */ | 43 | /* general info */ |
44 | struct pv_info { | 44 | struct pv_info { |
45 | unsigned int kernel_rpl; | 45 | unsigned int kernel_rpl; |
46 | int shared_kernel_pmd; | 46 | int shared_kernel_pmd; |
47 | int paravirt_enabled; | 47 | int paravirt_enabled; |
48 | const char *name; | 48 | const char *name; |
49 | }; | 49 | }; |
50 | 50 | ||
51 | struct pv_init_ops { | 51 | struct pv_init_ops { |
52 | /* | 52 | /* |
53 | * Patch may replace one of the defined code sequences with | 53 | * Patch may replace one of the defined code sequences with |
54 | * arbitrary code, subject to the same register constraints. | 54 | * arbitrary code, subject to the same register constraints. |
55 | * This generally means the code is not free to clobber any | 55 | * This generally means the code is not free to clobber any |
56 | * registers other than EAX. The patch function should return | 56 | * registers other than EAX. The patch function should return |
57 | * the number of bytes of code generated, as we nop pad the | 57 | * the number of bytes of code generated, as we nop pad the |
58 | * rest in generic code. | 58 | * rest in generic code. |
59 | */ | 59 | */ |
60 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, | 60 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, |
61 | unsigned long addr, unsigned len); | 61 | unsigned long addr, unsigned len); |
62 | 62 | ||
63 | /* Basic arch-specific setup */ | 63 | /* Basic arch-specific setup */ |
64 | void (*arch_setup)(void); | 64 | void (*arch_setup)(void); |
65 | char *(*memory_setup)(void); | 65 | char *(*memory_setup)(void); |
66 | void (*post_allocator_init)(void); | 66 | void (*post_allocator_init)(void); |
67 | 67 | ||
68 | /* Print a banner to identify the environment */ | 68 | /* Print a banner to identify the environment */ |
69 | void (*banner)(void); | 69 | void (*banner)(void); |
70 | }; | 70 | }; |
71 | 71 | ||
72 | 72 | ||
73 | struct pv_lazy_ops { | 73 | struct pv_lazy_ops { |
74 | /* Set deferred update mode, used for batching operations. */ | 74 | /* Set deferred update mode, used for batching operations. */ |
75 | void (*enter)(void); | 75 | void (*enter)(void); |
76 | void (*leave)(void); | 76 | void (*leave)(void); |
77 | }; | 77 | }; |
78 | 78 | ||
79 | struct pv_time_ops { | 79 | struct pv_time_ops { |
80 | void (*time_init)(void); | 80 | void (*time_init)(void); |
81 | 81 | ||
82 | /* Set and set time of day */ | 82 | /* Set and set time of day */ |
83 | unsigned long (*get_wallclock)(void); | 83 | unsigned long (*get_wallclock)(void); |
84 | int (*set_wallclock)(unsigned long); | 84 | int (*set_wallclock)(unsigned long); |
85 | 85 | ||
86 | unsigned long long (*sched_clock)(void); | 86 | unsigned long long (*sched_clock)(void); |
87 | unsigned long (*get_cpu_khz)(void); | 87 | unsigned long (*get_cpu_khz)(void); |
88 | }; | 88 | }; |
89 | 89 | ||
90 | struct pv_cpu_ops { | 90 | struct pv_cpu_ops { |
91 | /* hooks for various privileged instructions */ | 91 | /* hooks for various privileged instructions */ |
92 | unsigned long (*get_debugreg)(int regno); | 92 | unsigned long (*get_debugreg)(int regno); |
93 | void (*set_debugreg)(int regno, unsigned long value); | 93 | void (*set_debugreg)(int regno, unsigned long value); |
94 | 94 | ||
95 | void (*clts)(void); | 95 | void (*clts)(void); |
96 | 96 | ||
97 | unsigned long (*read_cr0)(void); | 97 | unsigned long (*read_cr0)(void); |
98 | void (*write_cr0)(unsigned long); | 98 | void (*write_cr0)(unsigned long); |
99 | 99 | ||
100 | unsigned long (*read_cr4_safe)(void); | 100 | unsigned long (*read_cr4_safe)(void); |
101 | unsigned long (*read_cr4)(void); | 101 | unsigned long (*read_cr4)(void); |
102 | void (*write_cr4)(unsigned long); | 102 | void (*write_cr4)(unsigned long); |
103 | 103 | ||
104 | #ifdef CONFIG_X86_64 | 104 | #ifdef CONFIG_X86_64 |
105 | unsigned long (*read_cr8)(void); | 105 | unsigned long (*read_cr8)(void); |
106 | void (*write_cr8)(unsigned long); | 106 | void (*write_cr8)(unsigned long); |
107 | #endif | 107 | #endif |
108 | 108 | ||
109 | /* Segment descriptor handling */ | 109 | /* Segment descriptor handling */ |
110 | void (*load_tr_desc)(void); | 110 | void (*load_tr_desc)(void); |
111 | void (*load_gdt)(const struct desc_ptr *); | 111 | void (*load_gdt)(const struct desc_ptr *); |
112 | void (*load_idt)(const struct desc_ptr *); | 112 | void (*load_idt)(const struct desc_ptr *); |
113 | void (*store_gdt)(struct desc_ptr *); | 113 | void (*store_gdt)(struct desc_ptr *); |
114 | void (*store_idt)(struct desc_ptr *); | 114 | void (*store_idt)(struct desc_ptr *); |
115 | void (*set_ldt)(const void *desc, unsigned entries); | 115 | void (*set_ldt)(const void *desc, unsigned entries); |
116 | unsigned long (*store_tr)(void); | 116 | unsigned long (*store_tr)(void); |
117 | void (*load_tls)(struct thread_struct *t, unsigned int cpu); | 117 | void (*load_tls)(struct thread_struct *t, unsigned int cpu); |
118 | #ifdef CONFIG_X86_64 | ||
119 | void (*load_gs_index)(unsigned int idx); | ||
120 | #endif | ||
118 | void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, | 121 | void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, |
119 | const void *desc); | 122 | const void *desc); |
120 | void (*write_gdt_entry)(struct desc_struct *, | 123 | void (*write_gdt_entry)(struct desc_struct *, |
121 | int entrynum, const void *desc, int size); | 124 | int entrynum, const void *desc, int size); |
122 | void (*write_idt_entry)(gate_desc *, | 125 | void (*write_idt_entry)(gate_desc *, |
123 | int entrynum, const gate_desc *gate); | 126 | int entrynum, const gate_desc *gate); |
124 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | 127 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); |
125 | 128 | ||
126 | void (*set_iopl_mask)(unsigned mask); | 129 | void (*set_iopl_mask)(unsigned mask); |
127 | 130 | ||
128 | void (*wbinvd)(void); | 131 | void (*wbinvd)(void); |
129 | void (*io_delay)(void); | 132 | void (*io_delay)(void); |
130 | 133 | ||
131 | /* cpuid emulation, mostly so that caps bits can be disabled */ | 134 | /* cpuid emulation, mostly so that caps bits can be disabled */ |
132 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, | 135 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, |
133 | unsigned int *ecx, unsigned int *edx); | 136 | unsigned int *ecx, unsigned int *edx); |
134 | 137 | ||
135 | /* MSR, PMC and TSR operations. | 138 | /* MSR, PMC and TSR operations. |
136 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | 139 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ |
137 | u64 (*read_msr)(unsigned int msr, int *err); | 140 | u64 (*read_msr)(unsigned int msr, int *err); |
138 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | 141 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); |
139 | 142 | ||
140 | u64 (*read_tsc)(void); | 143 | u64 (*read_tsc)(void); |
141 | u64 (*read_pmc)(int counter); | 144 | u64 (*read_pmc)(int counter); |
142 | unsigned long long (*read_tscp)(unsigned int *aux); | 145 | unsigned long long (*read_tscp)(unsigned int *aux); |
143 | 146 | ||
144 | /* | 147 | /* |
145 | * Atomically enable interrupts and return to userspace. This | 148 | * Atomically enable interrupts and return to userspace. This |
146 | * is only ever used to return to 32-bit processes; in a | 149 | * is only ever used to return to 32-bit processes; in a |
147 | * 64-bit kernel, it's used for 32-on-64 compat processes, but | 150 | * 64-bit kernel, it's used for 32-on-64 compat processes, but |
148 | * never native 64-bit processes. (Jump, not call.) | 151 | * never native 64-bit processes. (Jump, not call.) |
149 | */ | 152 | */ |
150 | void (*irq_enable_sysexit)(void); | 153 | void (*irq_enable_sysexit)(void); |
151 | 154 | ||
152 | /* | 155 | /* |
153 | * Switch to usermode gs and return to 64-bit usermode using | 156 | * Switch to usermode gs and return to 64-bit usermode using |
154 | * sysret. Only used in 64-bit kernels to return to 64-bit | 157 | * sysret. Only used in 64-bit kernels to return to 64-bit |
155 | * processes. Usermode register state, including %rsp, must | 158 | * processes. Usermode register state, including %rsp, must |
156 | * already be restored. | 159 | * already be restored. |
157 | */ | 160 | */ |
158 | void (*usergs_sysret64)(void); | 161 | void (*usergs_sysret64)(void); |
159 | 162 | ||
160 | /* | 163 | /* |
161 | * Switch to usermode gs and return to 32-bit usermode using | 164 | * Switch to usermode gs and return to 32-bit usermode using |
162 | * sysret. Used to return to 32-on-64 compat processes. | 165 | * sysret. Used to return to 32-on-64 compat processes. |
163 | * Other usermode register state, including %esp, must already | 166 | * Other usermode register state, including %esp, must already |
164 | * be restored. | 167 | * be restored. |
165 | */ | 168 | */ |
166 | void (*usergs_sysret32)(void); | 169 | void (*usergs_sysret32)(void); |
167 | 170 | ||
168 | /* Normal iret. Jump to this with the standard iret stack | 171 | /* Normal iret. Jump to this with the standard iret stack |
169 | frame set up. */ | 172 | frame set up. */ |
170 | void (*iret)(void); | 173 | void (*iret)(void); |
171 | 174 | ||
172 | void (*swapgs)(void); | 175 | void (*swapgs)(void); |
173 | 176 | ||
174 | struct pv_lazy_ops lazy_mode; | 177 | struct pv_lazy_ops lazy_mode; |
175 | }; | 178 | }; |
176 | 179 | ||
177 | struct pv_irq_ops { | 180 | struct pv_irq_ops { |
178 | void (*init_IRQ)(void); | 181 | void (*init_IRQ)(void); |
179 | 182 | ||
180 | /* | 183 | /* |
181 | * Get/set interrupt state. save_fl and restore_fl are only | 184 | * Get/set interrupt state. save_fl and restore_fl are only |
182 | * expected to use X86_EFLAGS_IF; all other bits | 185 | * expected to use X86_EFLAGS_IF; all other bits |
183 | * returned from save_fl are undefined, and may be ignored by | 186 | * returned from save_fl are undefined, and may be ignored by |
184 | * restore_fl. | 187 | * restore_fl. |
185 | */ | 188 | */ |
186 | unsigned long (*save_fl)(void); | 189 | unsigned long (*save_fl)(void); |
187 | void (*restore_fl)(unsigned long); | 190 | void (*restore_fl)(unsigned long); |
188 | void (*irq_disable)(void); | 191 | void (*irq_disable)(void); |
189 | void (*irq_enable)(void); | 192 | void (*irq_enable)(void); |
190 | void (*safe_halt)(void); | 193 | void (*safe_halt)(void); |
191 | void (*halt)(void); | 194 | void (*halt)(void); |
192 | 195 | ||
193 | #ifdef CONFIG_X86_64 | 196 | #ifdef CONFIG_X86_64 |
194 | void (*adjust_exception_frame)(void); | 197 | void (*adjust_exception_frame)(void); |
195 | #endif | 198 | #endif |
196 | }; | 199 | }; |
197 | 200 | ||
198 | struct pv_apic_ops { | 201 | struct pv_apic_ops { |
199 | #ifdef CONFIG_X86_LOCAL_APIC | 202 | #ifdef CONFIG_X86_LOCAL_APIC |
200 | /* | 203 | /* |
201 | * Direct APIC operations, principally for VMI. Ideally | 204 | * Direct APIC operations, principally for VMI. Ideally |
202 | * these shouldn't be in this interface. | 205 | * these shouldn't be in this interface. |
203 | */ | 206 | */ |
204 | void (*apic_write)(unsigned long reg, u32 v); | 207 | void (*apic_write)(unsigned long reg, u32 v); |
205 | void (*apic_write_atomic)(unsigned long reg, u32 v); | 208 | void (*apic_write_atomic)(unsigned long reg, u32 v); |
206 | u32 (*apic_read)(unsigned long reg); | 209 | u32 (*apic_read)(unsigned long reg); |
207 | void (*setup_boot_clock)(void); | 210 | void (*setup_boot_clock)(void); |
208 | void (*setup_secondary_clock)(void); | 211 | void (*setup_secondary_clock)(void); |
209 | 212 | ||
210 | void (*startup_ipi_hook)(int phys_apicid, | 213 | void (*startup_ipi_hook)(int phys_apicid, |
211 | unsigned long start_eip, | 214 | unsigned long start_eip, |
212 | unsigned long start_esp); | 215 | unsigned long start_esp); |
213 | #endif | 216 | #endif |
214 | }; | 217 | }; |
215 | 218 | ||
216 | struct pv_mmu_ops { | 219 | struct pv_mmu_ops { |
217 | /* | 220 | /* |
218 | * Called before/after init_mm pagetable setup. setup_start | 221 | * Called before/after init_mm pagetable setup. setup_start |
219 | * may reset %cr3, and may pre-install parts of the pagetable; | 222 | * may reset %cr3, and may pre-install parts of the pagetable; |
220 | * pagetable setup is expected to preserve any existing | 223 | * pagetable setup is expected to preserve any existing |
221 | * mapping. | 224 | * mapping. |
222 | */ | 225 | */ |
223 | void (*pagetable_setup_start)(pgd_t *pgd_base); | 226 | void (*pagetable_setup_start)(pgd_t *pgd_base); |
224 | void (*pagetable_setup_done)(pgd_t *pgd_base); | 227 | void (*pagetable_setup_done)(pgd_t *pgd_base); |
225 | 228 | ||
226 | unsigned long (*read_cr2)(void); | 229 | unsigned long (*read_cr2)(void); |
227 | void (*write_cr2)(unsigned long); | 230 | void (*write_cr2)(unsigned long); |
228 | 231 | ||
229 | unsigned long (*read_cr3)(void); | 232 | unsigned long (*read_cr3)(void); |
230 | void (*write_cr3)(unsigned long); | 233 | void (*write_cr3)(unsigned long); |
231 | 234 | ||
232 | /* | 235 | /* |
233 | * Hooks for intercepting the creation/use/destruction of an | 236 | * Hooks for intercepting the creation/use/destruction of an |
234 | * mm_struct. | 237 | * mm_struct. |
235 | */ | 238 | */ |
236 | void (*activate_mm)(struct mm_struct *prev, | 239 | void (*activate_mm)(struct mm_struct *prev, |
237 | struct mm_struct *next); | 240 | struct mm_struct *next); |
238 | void (*dup_mmap)(struct mm_struct *oldmm, | 241 | void (*dup_mmap)(struct mm_struct *oldmm, |
239 | struct mm_struct *mm); | 242 | struct mm_struct *mm); |
240 | void (*exit_mmap)(struct mm_struct *mm); | 243 | void (*exit_mmap)(struct mm_struct *mm); |
241 | 244 | ||
242 | 245 | ||
243 | /* TLB operations */ | 246 | /* TLB operations */ |
244 | void (*flush_tlb_user)(void); | 247 | void (*flush_tlb_user)(void); |
245 | void (*flush_tlb_kernel)(void); | 248 | void (*flush_tlb_kernel)(void); |
246 | void (*flush_tlb_single)(unsigned long addr); | 249 | void (*flush_tlb_single)(unsigned long addr); |
247 | void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, | 250 | void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, |
248 | unsigned long va); | 251 | unsigned long va); |
249 | 252 | ||
250 | /* Hooks for allocating and freeing a pagetable top-level */ | 253 | /* Hooks for allocating and freeing a pagetable top-level */ |
251 | int (*pgd_alloc)(struct mm_struct *mm); | 254 | int (*pgd_alloc)(struct mm_struct *mm); |
252 | void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); | 255 | void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); |
253 | 256 | ||
254 | /* | 257 | /* |
255 | * Hooks for allocating/releasing pagetable pages when they're | 258 | * Hooks for allocating/releasing pagetable pages when they're |
256 | * attached to a pagetable | 259 | * attached to a pagetable |
257 | */ | 260 | */ |
258 | void (*alloc_pte)(struct mm_struct *mm, u32 pfn); | 261 | void (*alloc_pte)(struct mm_struct *mm, u32 pfn); |
259 | void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); | 262 | void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); |
260 | void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); | 263 | void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); |
261 | void (*alloc_pud)(struct mm_struct *mm, u32 pfn); | 264 | void (*alloc_pud)(struct mm_struct *mm, u32 pfn); |
262 | void (*release_pte)(u32 pfn); | 265 | void (*release_pte)(u32 pfn); |
263 | void (*release_pmd)(u32 pfn); | 266 | void (*release_pmd)(u32 pfn); |
264 | void (*release_pud)(u32 pfn); | 267 | void (*release_pud)(u32 pfn); |
265 | 268 | ||
266 | /* Pagetable manipulation functions */ | 269 | /* Pagetable manipulation functions */ |
267 | void (*set_pte)(pte_t *ptep, pte_t pteval); | 270 | void (*set_pte)(pte_t *ptep, pte_t pteval); |
268 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, | 271 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, |
269 | pte_t *ptep, pte_t pteval); | 272 | pte_t *ptep, pte_t pteval); |
270 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | 273 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); |
271 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | 274 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, |
272 | pte_t *ptep); | 275 | pte_t *ptep); |
273 | void (*pte_update_defer)(struct mm_struct *mm, | 276 | void (*pte_update_defer)(struct mm_struct *mm, |
274 | unsigned long addr, pte_t *ptep); | 277 | unsigned long addr, pte_t *ptep); |
275 | 278 | ||
276 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, | 279 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, |
277 | pte_t *ptep); | 280 | pte_t *ptep); |
278 | void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, | 281 | void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, |
279 | pte_t *ptep, pte_t pte); | 282 | pte_t *ptep, pte_t pte); |
280 | 283 | ||
281 | pteval_t (*pte_val)(pte_t); | 284 | pteval_t (*pte_val)(pte_t); |
282 | pteval_t (*pte_flags)(pte_t); | 285 | pteval_t (*pte_flags)(pte_t); |
283 | pte_t (*make_pte)(pteval_t pte); | 286 | pte_t (*make_pte)(pteval_t pte); |
284 | 287 | ||
285 | pgdval_t (*pgd_val)(pgd_t); | 288 | pgdval_t (*pgd_val)(pgd_t); |
286 | pgd_t (*make_pgd)(pgdval_t pgd); | 289 | pgd_t (*make_pgd)(pgdval_t pgd); |
287 | 290 | ||
288 | #if PAGETABLE_LEVELS >= 3 | 291 | #if PAGETABLE_LEVELS >= 3 |
289 | #ifdef CONFIG_X86_PAE | 292 | #ifdef CONFIG_X86_PAE |
290 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); | 293 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); |
291 | void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, | 294 | void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, |
292 | pte_t *ptep, pte_t pte); | 295 | pte_t *ptep, pte_t pte); |
293 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, | 296 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, |
294 | pte_t *ptep); | 297 | pte_t *ptep); |
295 | void (*pmd_clear)(pmd_t *pmdp); | 298 | void (*pmd_clear)(pmd_t *pmdp); |
296 | 299 | ||
297 | #endif /* CONFIG_X86_PAE */ | 300 | #endif /* CONFIG_X86_PAE */ |
298 | 301 | ||
299 | void (*set_pud)(pud_t *pudp, pud_t pudval); | 302 | void (*set_pud)(pud_t *pudp, pud_t pudval); |
300 | 303 | ||
301 | pmdval_t (*pmd_val)(pmd_t); | 304 | pmdval_t (*pmd_val)(pmd_t); |
302 | pmd_t (*make_pmd)(pmdval_t pmd); | 305 | pmd_t (*make_pmd)(pmdval_t pmd); |
303 | 306 | ||
304 | #if PAGETABLE_LEVELS == 4 | 307 | #if PAGETABLE_LEVELS == 4 |
305 | pudval_t (*pud_val)(pud_t); | 308 | pudval_t (*pud_val)(pud_t); |
306 | pud_t (*make_pud)(pudval_t pud); | 309 | pud_t (*make_pud)(pudval_t pud); |
307 | 310 | ||
308 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); | 311 | void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); |
309 | #endif /* PAGETABLE_LEVELS == 4 */ | 312 | #endif /* PAGETABLE_LEVELS == 4 */ |
310 | #endif /* PAGETABLE_LEVELS >= 3 */ | 313 | #endif /* PAGETABLE_LEVELS >= 3 */ |
311 | 314 | ||
312 | #ifdef CONFIG_HIGHPTE | 315 | #ifdef CONFIG_HIGHPTE |
313 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); | 316 | void *(*kmap_atomic_pte)(struct page *page, enum km_type type); |
314 | #endif | 317 | #endif |
315 | 318 | ||
316 | struct pv_lazy_ops lazy_mode; | 319 | struct pv_lazy_ops lazy_mode; |
317 | 320 | ||
318 | /* dom0 ops */ | 321 | /* dom0 ops */ |
319 | 322 | ||
320 | /* Sometimes the physical address is a pfn, and sometimes its | 323 | /* Sometimes the physical address is a pfn, and sometimes its |
321 | an mfn. We can tell which is which from the index. */ | 324 | an mfn. We can tell which is which from the index. */ |
322 | void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, | 325 | void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, |
323 | unsigned long phys, pgprot_t flags); | 326 | unsigned long phys, pgprot_t flags); |
324 | }; | 327 | }; |
325 | 328 | ||
326 | /* This contains all the paravirt structures: we get a convenient | 329 | /* This contains all the paravirt structures: we get a convenient |
327 | * number for each function using the offset which we use to indicate | 330 | * number for each function using the offset which we use to indicate |
328 | * what to patch. */ | 331 | * what to patch. */ |
329 | struct paravirt_patch_template { | 332 | struct paravirt_patch_template { |
330 | struct pv_init_ops pv_init_ops; | 333 | struct pv_init_ops pv_init_ops; |
331 | struct pv_time_ops pv_time_ops; | 334 | struct pv_time_ops pv_time_ops; |
332 | struct pv_cpu_ops pv_cpu_ops; | 335 | struct pv_cpu_ops pv_cpu_ops; |
333 | struct pv_irq_ops pv_irq_ops; | 336 | struct pv_irq_ops pv_irq_ops; |
334 | struct pv_apic_ops pv_apic_ops; | 337 | struct pv_apic_ops pv_apic_ops; |
335 | struct pv_mmu_ops pv_mmu_ops; | 338 | struct pv_mmu_ops pv_mmu_ops; |
336 | }; | 339 | }; |
337 | 340 | ||
338 | extern struct pv_info pv_info; | 341 | extern struct pv_info pv_info; |
339 | extern struct pv_init_ops pv_init_ops; | 342 | extern struct pv_init_ops pv_init_ops; |
340 | extern struct pv_time_ops pv_time_ops; | 343 | extern struct pv_time_ops pv_time_ops; |
341 | extern struct pv_cpu_ops pv_cpu_ops; | 344 | extern struct pv_cpu_ops pv_cpu_ops; |
342 | extern struct pv_irq_ops pv_irq_ops; | 345 | extern struct pv_irq_ops pv_irq_ops; |
343 | extern struct pv_apic_ops pv_apic_ops; | 346 | extern struct pv_apic_ops pv_apic_ops; |
344 | extern struct pv_mmu_ops pv_mmu_ops; | 347 | extern struct pv_mmu_ops pv_mmu_ops; |
345 | 348 | ||
346 | #define PARAVIRT_PATCH(x) \ | 349 | #define PARAVIRT_PATCH(x) \ |
347 | (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) | 350 | (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) |
348 | 351 | ||
349 | #define paravirt_type(op) \ | 352 | #define paravirt_type(op) \ |
350 | [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ | 353 | [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ |
351 | [paravirt_opptr] "m" (op) | 354 | [paravirt_opptr] "m" (op) |
352 | #define paravirt_clobber(clobber) \ | 355 | #define paravirt_clobber(clobber) \ |
353 | [paravirt_clobber] "i" (clobber) | 356 | [paravirt_clobber] "i" (clobber) |
354 | 357 | ||
355 | /* | 358 | /* |
356 | * Generate some code, and mark it as patchable by the | 359 | * Generate some code, and mark it as patchable by the |
357 | * apply_paravirt() alternate instruction patcher. | 360 | * apply_paravirt() alternate instruction patcher. |
358 | */ | 361 | */ |
359 | #define _paravirt_alt(insn_string, type, clobber) \ | 362 | #define _paravirt_alt(insn_string, type, clobber) \ |
360 | "771:\n\t" insn_string "\n" "772:\n" \ | 363 | "771:\n\t" insn_string "\n" "772:\n" \ |
361 | ".pushsection .parainstructions,\"a\"\n" \ | 364 | ".pushsection .parainstructions,\"a\"\n" \ |
362 | _ASM_ALIGN "\n" \ | 365 | _ASM_ALIGN "\n" \ |
363 | _ASM_PTR " 771b\n" \ | 366 | _ASM_PTR " 771b\n" \ |
364 | " .byte " type "\n" \ | 367 | " .byte " type "\n" \ |
365 | " .byte 772b-771b\n" \ | 368 | " .byte 772b-771b\n" \ |
366 | " .short " clobber "\n" \ | 369 | " .short " clobber "\n" \ |
367 | ".popsection\n" | 370 | ".popsection\n" |
368 | 371 | ||
369 | /* Generate patchable code, with the default asm parameters. */ | 372 | /* Generate patchable code, with the default asm parameters. */ |
370 | #define paravirt_alt(insn_string) \ | 373 | #define paravirt_alt(insn_string) \ |
371 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | 374 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") |
372 | 375 | ||
373 | /* Simple instruction patching code. */ | 376 | /* Simple instruction patching code. */ |
374 | #define DEF_NATIVE(ops, name, code) \ | 377 | #define DEF_NATIVE(ops, name, code) \ |
375 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | 378 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ |
376 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | 379 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") |
377 | 380 | ||
378 | unsigned paravirt_patch_nop(void); | 381 | unsigned paravirt_patch_nop(void); |
379 | unsigned paravirt_patch_ignore(unsigned len); | 382 | unsigned paravirt_patch_ignore(unsigned len); |
380 | unsigned paravirt_patch_call(void *insnbuf, | 383 | unsigned paravirt_patch_call(void *insnbuf, |
381 | const void *target, u16 tgt_clobbers, | 384 | const void *target, u16 tgt_clobbers, |
382 | unsigned long addr, u16 site_clobbers, | 385 | unsigned long addr, u16 site_clobbers, |
383 | unsigned len); | 386 | unsigned len); |
384 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, | 387 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, |
385 | unsigned long addr, unsigned len); | 388 | unsigned long addr, unsigned len); |
386 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | 389 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, |
387 | unsigned long addr, unsigned len); | 390 | unsigned long addr, unsigned len); |
388 | 391 | ||
389 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | 392 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, |
390 | const char *start, const char *end); | 393 | const char *start, const char *end); |
391 | 394 | ||
392 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | 395 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
393 | unsigned long addr, unsigned len); | 396 | unsigned long addr, unsigned len); |
394 | 397 | ||
395 | int paravirt_disable_iospace(void); | 398 | int paravirt_disable_iospace(void); |
396 | 399 | ||
397 | /* | 400 | /* |
398 | * This generates an indirect call based on the operation type number. | 401 | * This generates an indirect call based on the operation type number. |
399 | * The type number, computed in PARAVIRT_PATCH, is derived from the | 402 | * The type number, computed in PARAVIRT_PATCH, is derived from the |
400 | * offset into the paravirt_patch_template structure, and can therefore be | 403 | * offset into the paravirt_patch_template structure, and can therefore be |
401 | * freely converted back into a structure offset. | 404 | * freely converted back into a structure offset. |
402 | */ | 405 | */ |
403 | #define PARAVIRT_CALL "call *%[paravirt_opptr];" | 406 | #define PARAVIRT_CALL "call *%[paravirt_opptr];" |
404 | 407 | ||
405 | /* | 408 | /* |
406 | * These macros are intended to wrap calls through one of the paravirt | 409 | * These macros are intended to wrap calls through one of the paravirt |
407 | * ops structs, so that they can be later identified and patched at | 410 | * ops structs, so that they can be later identified and patched at |
408 | * runtime. | 411 | * runtime. |
409 | * | 412 | * |
410 | * Normally, a call to a pv_op function is a simple indirect call: | 413 | * Normally, a call to a pv_op function is a simple indirect call: |
411 | * (pv_op_struct.operations)(args...). | 414 | * (pv_op_struct.operations)(args...). |
412 | * | 415 | * |
413 | * Unfortunately, this is a relatively slow operation for modern CPUs, | 416 | * Unfortunately, this is a relatively slow operation for modern CPUs, |
414 | * because it cannot necessarily determine what the destination | 417 | * because it cannot necessarily determine what the destination |
415 | * address is. In this case, the address is a runtime constant, so at | 418 | * address is. In this case, the address is a runtime constant, so at |
416 | * the very least we can patch the call to e a simple direct call, or | 419 | * the very least we can patch the call to e a simple direct call, or |
417 | * ideally, patch an inline implementation into the callsite. (Direct | 420 | * ideally, patch an inline implementation into the callsite. (Direct |
418 | * calls are essentially free, because the call and return addresses | 421 | * calls are essentially free, because the call and return addresses |
419 | * are completely predictable.) | 422 | * are completely predictable.) |
420 | * | 423 | * |
421 | * For i386, these macros rely on the standard gcc "regparm(3)" calling | 424 | * For i386, these macros rely on the standard gcc "regparm(3)" calling |
422 | * convention, in which the first three arguments are placed in %eax, | 425 | * convention, in which the first three arguments are placed in %eax, |
423 | * %edx, %ecx (in that order), and the remaining arguments are placed | 426 | * %edx, %ecx (in that order), and the remaining arguments are placed |
424 | * on the stack. All caller-save registers (eax,edx,ecx) are expected | 427 | * on the stack. All caller-save registers (eax,edx,ecx) are expected |
425 | * to be modified (either clobbered or used for return values). | 428 | * to be modified (either clobbered or used for return values). |
426 | * X86_64, on the other hand, already specifies a register-based calling | 429 | * X86_64, on the other hand, already specifies a register-based calling |
427 | * conventions, returning at %rax, with parameteres going on %rdi, %rsi, | 430 | * conventions, returning at %rax, with parameteres going on %rdi, %rsi, |
428 | * %rdx, and %rcx. Note that for this reason, x86_64 does not need any | 431 | * %rdx, and %rcx. Note that for this reason, x86_64 does not need any |
429 | * special handling for dealing with 4 arguments, unlike i386. | 432 | * special handling for dealing with 4 arguments, unlike i386. |
430 | * However, x86_64 also have to clobber all caller saved registers, which | 433 | * However, x86_64 also have to clobber all caller saved registers, which |
431 | * unfortunately, are quite a bit (r8 - r11) | 434 | * unfortunately, are quite a bit (r8 - r11) |
432 | * | 435 | * |
433 | * The call instruction itself is marked by placing its start address | 436 | * The call instruction itself is marked by placing its start address |
434 | * and size into the .parainstructions section, so that | 437 | * and size into the .parainstructions section, so that |
435 | * apply_paravirt() in arch/i386/kernel/alternative.c can do the | 438 | * apply_paravirt() in arch/i386/kernel/alternative.c can do the |
436 | * appropriate patching under the control of the backend pv_init_ops | 439 | * appropriate patching under the control of the backend pv_init_ops |
437 | * implementation. | 440 | * implementation. |
438 | * | 441 | * |
439 | * Unfortunately there's no way to get gcc to generate the args setup | 442 | * Unfortunately there's no way to get gcc to generate the args setup |
440 | * for the call, and then allow the call itself to be generated by an | 443 | * for the call, and then allow the call itself to be generated by an |
441 | * inline asm. Because of this, we must do the complete arg setup and | 444 | * inline asm. Because of this, we must do the complete arg setup and |
442 | * return value handling from within these macros. This is fairly | 445 | * return value handling from within these macros. This is fairly |
443 | * cumbersome. | 446 | * cumbersome. |
444 | * | 447 | * |
445 | * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. | 448 | * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. |
446 | * It could be extended to more arguments, but there would be little | 449 | * It could be extended to more arguments, but there would be little |
447 | * to be gained from that. For each number of arguments, there are | 450 | * to be gained from that. For each number of arguments, there are |
448 | * the two VCALL and CALL variants for void and non-void functions. | 451 | * the two VCALL and CALL variants for void and non-void functions. |
449 | * | 452 | * |
450 | * When there is a return value, the invoker of the macro must specify | 453 | * When there is a return value, the invoker of the macro must specify |
451 | * the return type. The macro then uses sizeof() on that type to | 454 | * the return type. The macro then uses sizeof() on that type to |
452 | * determine whether its a 32 or 64 bit value, and places the return | 455 | * determine whether its a 32 or 64 bit value, and places the return |
453 | * in the right register(s) (just %eax for 32-bit, and %edx:%eax for | 456 | * in the right register(s) (just %eax for 32-bit, and %edx:%eax for |
454 | * 64-bit). For x86_64 machines, it just returns at %rax regardless of | 457 | * 64-bit). For x86_64 machines, it just returns at %rax regardless of |
455 | * the return value size. | 458 | * the return value size. |
456 | * | 459 | * |
457 | * 64-bit arguments are passed as a pair of adjacent 32-bit arguments | 460 | * 64-bit arguments are passed as a pair of adjacent 32-bit arguments |
458 | * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments | 461 | * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments |
459 | * in low,high order | 462 | * in low,high order |
460 | * | 463 | * |
461 | * Small structures are passed and returned in registers. The macro | 464 | * Small structures are passed and returned in registers. The macro |
462 | * calling convention can't directly deal with this, so the wrapper | 465 | * calling convention can't directly deal with this, so the wrapper |
463 | * functions must do this. | 466 | * functions must do this. |
464 | * | 467 | * |
465 | * These PVOP_* macros are only defined within this header. This | 468 | * These PVOP_* macros are only defined within this header. This |
466 | * means that all uses must be wrapped in inline functions. This also | 469 | * means that all uses must be wrapped in inline functions. This also |
467 | * makes sure the incoming and outgoing types are always correct. | 470 | * makes sure the incoming and outgoing types are always correct. |
468 | */ | 471 | */ |
469 | #ifdef CONFIG_X86_32 | 472 | #ifdef CONFIG_X86_32 |
470 | #define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx | 473 | #define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx |
471 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS | 474 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS |
472 | #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ | 475 | #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ |
473 | "=c" (__ecx) | 476 | "=c" (__ecx) |
474 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS | 477 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS |
475 | #define EXTRA_CLOBBERS | 478 | #define EXTRA_CLOBBERS |
476 | #define VEXTRA_CLOBBERS | 479 | #define VEXTRA_CLOBBERS |
477 | #else | 480 | #else |
478 | #define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx | 481 | #define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx |
479 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | 482 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax |
480 | #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ | 483 | #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ |
481 | "=S" (__esi), "=d" (__edx), \ | 484 | "=S" (__esi), "=d" (__edx), \ |
482 | "=c" (__ecx) | 485 | "=c" (__ecx) |
483 | 486 | ||
484 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | 487 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) |
485 | 488 | ||
486 | #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" | 489 | #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" |
487 | #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" | 490 | #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" |
488 | #endif | 491 | #endif |
489 | 492 | ||
490 | #ifdef CONFIG_PARAVIRT_DEBUG | 493 | #ifdef CONFIG_PARAVIRT_DEBUG |
491 | #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) | 494 | #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) |
492 | #else | 495 | #else |
493 | #define PVOP_TEST_NULL(op) ((void)op) | 496 | #define PVOP_TEST_NULL(op) ((void)op) |
494 | #endif | 497 | #endif |
495 | 498 | ||
496 | #define __PVOP_CALL(rettype, op, pre, post, ...) \ | 499 | #define __PVOP_CALL(rettype, op, pre, post, ...) \ |
497 | ({ \ | 500 | ({ \ |
498 | rettype __ret; \ | 501 | rettype __ret; \ |
499 | PVOP_CALL_ARGS; \ | 502 | PVOP_CALL_ARGS; \ |
500 | PVOP_TEST_NULL(op); \ | 503 | PVOP_TEST_NULL(op); \ |
501 | /* This is 32-bit specific, but is okay in 64-bit */ \ | 504 | /* This is 32-bit specific, but is okay in 64-bit */ \ |
502 | /* since this condition will never hold */ \ | 505 | /* since this condition will never hold */ \ |
503 | if (sizeof(rettype) > sizeof(unsigned long)) { \ | 506 | if (sizeof(rettype) > sizeof(unsigned long)) { \ |
504 | asm volatile(pre \ | 507 | asm volatile(pre \ |
505 | paravirt_alt(PARAVIRT_CALL) \ | 508 | paravirt_alt(PARAVIRT_CALL) \ |
506 | post \ | 509 | post \ |
507 | : PVOP_CALL_CLOBBERS \ | 510 | : PVOP_CALL_CLOBBERS \ |
508 | : paravirt_type(op), \ | 511 | : paravirt_type(op), \ |
509 | paravirt_clobber(CLBR_ANY), \ | 512 | paravirt_clobber(CLBR_ANY), \ |
510 | ##__VA_ARGS__ \ | 513 | ##__VA_ARGS__ \ |
511 | : "memory", "cc" EXTRA_CLOBBERS); \ | 514 | : "memory", "cc" EXTRA_CLOBBERS); \ |
512 | __ret = (rettype)((((u64)__edx) << 32) | __eax); \ | 515 | __ret = (rettype)((((u64)__edx) << 32) | __eax); \ |
513 | } else { \ | 516 | } else { \ |
514 | asm volatile(pre \ | 517 | asm volatile(pre \ |
515 | paravirt_alt(PARAVIRT_CALL) \ | 518 | paravirt_alt(PARAVIRT_CALL) \ |
516 | post \ | 519 | post \ |
517 | : PVOP_CALL_CLOBBERS \ | 520 | : PVOP_CALL_CLOBBERS \ |
518 | : paravirt_type(op), \ | 521 | : paravirt_type(op), \ |
519 | paravirt_clobber(CLBR_ANY), \ | 522 | paravirt_clobber(CLBR_ANY), \ |
520 | ##__VA_ARGS__ \ | 523 | ##__VA_ARGS__ \ |
521 | : "memory", "cc" EXTRA_CLOBBERS); \ | 524 | : "memory", "cc" EXTRA_CLOBBERS); \ |
522 | __ret = (rettype)__eax; \ | 525 | __ret = (rettype)__eax; \ |
523 | } \ | 526 | } \ |
524 | __ret; \ | 527 | __ret; \ |
525 | }) | 528 | }) |
526 | #define __PVOP_VCALL(op, pre, post, ...) \ | 529 | #define __PVOP_VCALL(op, pre, post, ...) \ |
527 | ({ \ | 530 | ({ \ |
528 | PVOP_VCALL_ARGS; \ | 531 | PVOP_VCALL_ARGS; \ |
529 | PVOP_TEST_NULL(op); \ | 532 | PVOP_TEST_NULL(op); \ |
530 | asm volatile(pre \ | 533 | asm volatile(pre \ |
531 | paravirt_alt(PARAVIRT_CALL) \ | 534 | paravirt_alt(PARAVIRT_CALL) \ |
532 | post \ | 535 | post \ |
533 | : PVOP_VCALL_CLOBBERS \ | 536 | : PVOP_VCALL_CLOBBERS \ |
534 | : paravirt_type(op), \ | 537 | : paravirt_type(op), \ |
535 | paravirt_clobber(CLBR_ANY), \ | 538 | paravirt_clobber(CLBR_ANY), \ |
536 | ##__VA_ARGS__ \ | 539 | ##__VA_ARGS__ \ |
537 | : "memory", "cc" VEXTRA_CLOBBERS); \ | 540 | : "memory", "cc" VEXTRA_CLOBBERS); \ |
538 | }) | 541 | }) |
539 | 542 | ||
540 | #define PVOP_CALL0(rettype, op) \ | 543 | #define PVOP_CALL0(rettype, op) \ |
541 | __PVOP_CALL(rettype, op, "", "") | 544 | __PVOP_CALL(rettype, op, "", "") |
542 | #define PVOP_VCALL0(op) \ | 545 | #define PVOP_VCALL0(op) \ |
543 | __PVOP_VCALL(op, "", "") | 546 | __PVOP_VCALL(op, "", "") |
544 | 547 | ||
545 | #define PVOP_CALL1(rettype, op, arg1) \ | 548 | #define PVOP_CALL1(rettype, op, arg1) \ |
546 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) | 549 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) |
547 | #define PVOP_VCALL1(op, arg1) \ | 550 | #define PVOP_VCALL1(op, arg1) \ |
548 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) | 551 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) |
549 | 552 | ||
550 | #define PVOP_CALL2(rettype, op, arg1, arg2) \ | 553 | #define PVOP_CALL2(rettype, op, arg1, arg2) \ |
551 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ | 554 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ |
552 | "1" ((unsigned long)(arg2))) | 555 | "1" ((unsigned long)(arg2))) |
553 | #define PVOP_VCALL2(op, arg1, arg2) \ | 556 | #define PVOP_VCALL2(op, arg1, arg2) \ |
554 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ | 557 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ |
555 | "1" ((unsigned long)(arg2))) | 558 | "1" ((unsigned long)(arg2))) |
556 | 559 | ||
557 | #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ | 560 | #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ |
558 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ | 561 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ |
559 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) | 562 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) |
560 | #define PVOP_VCALL3(op, arg1, arg2, arg3) \ | 563 | #define PVOP_VCALL3(op, arg1, arg2, arg3) \ |
561 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ | 564 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ |
562 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) | 565 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) |
563 | 566 | ||
564 | /* This is the only difference in x86_64. We can make it much simpler */ | 567 | /* This is the only difference in x86_64. We can make it much simpler */ |
565 | #ifdef CONFIG_X86_32 | 568 | #ifdef CONFIG_X86_32 |
566 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | 569 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ |
567 | __PVOP_CALL(rettype, op, \ | 570 | __PVOP_CALL(rettype, op, \ |
568 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | 571 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ |
569 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ | 572 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ |
570 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) | 573 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) |
571 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | 574 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ |
572 | __PVOP_VCALL(op, \ | 575 | __PVOP_VCALL(op, \ |
573 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ | 576 | "push %[_arg4];", "lea 4(%%esp),%%esp;", \ |
574 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ | 577 | "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ |
575 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) | 578 | "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) |
576 | #else | 579 | #else |
577 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ | 580 | #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ |
578 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ | 581 | __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ |
579 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ | 582 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ |
580 | "3"((unsigned long)(arg4))) | 583 | "3"((unsigned long)(arg4))) |
581 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ | 584 | #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ |
582 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ | 585 | __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ |
583 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ | 586 | "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ |
584 | "3"((unsigned long)(arg4))) | 587 | "3"((unsigned long)(arg4))) |
585 | #endif | 588 | #endif |
586 | 589 | ||
587 | static inline int paravirt_enabled(void) | 590 | static inline int paravirt_enabled(void) |
588 | { | 591 | { |
589 | return pv_info.paravirt_enabled; | 592 | return pv_info.paravirt_enabled; |
590 | } | 593 | } |
591 | 594 | ||
592 | static inline void load_sp0(struct tss_struct *tss, | 595 | static inline void load_sp0(struct tss_struct *tss, |
593 | struct thread_struct *thread) | 596 | struct thread_struct *thread) |
594 | { | 597 | { |
595 | PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); | 598 | PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); |
596 | } | 599 | } |
597 | 600 | ||
598 | #define ARCH_SETUP pv_init_ops.arch_setup(); | 601 | #define ARCH_SETUP pv_init_ops.arch_setup(); |
599 | static inline unsigned long get_wallclock(void) | 602 | static inline unsigned long get_wallclock(void) |
600 | { | 603 | { |
601 | return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); | 604 | return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); |
602 | } | 605 | } |
603 | 606 | ||
604 | static inline int set_wallclock(unsigned long nowtime) | 607 | static inline int set_wallclock(unsigned long nowtime) |
605 | { | 608 | { |
606 | return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); | 609 | return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); |
607 | } | 610 | } |
608 | 611 | ||
609 | static inline void (*choose_time_init(void))(void) | 612 | static inline void (*choose_time_init(void))(void) |
610 | { | 613 | { |
611 | return pv_time_ops.time_init; | 614 | return pv_time_ops.time_init; |
612 | } | 615 | } |
613 | 616 | ||
614 | /* The paravirtualized CPUID instruction. */ | 617 | /* The paravirtualized CPUID instruction. */ |
615 | static inline void __cpuid(unsigned int *eax, unsigned int *ebx, | 618 | static inline void __cpuid(unsigned int *eax, unsigned int *ebx, |
616 | unsigned int *ecx, unsigned int *edx) | 619 | unsigned int *ecx, unsigned int *edx) |
617 | { | 620 | { |
618 | PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); | 621 | PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); |
619 | } | 622 | } |
620 | 623 | ||
621 | /* | 624 | /* |
622 | * These special macros can be used to get or set a debugging register | 625 | * These special macros can be used to get or set a debugging register |
623 | */ | 626 | */ |
624 | static inline unsigned long paravirt_get_debugreg(int reg) | 627 | static inline unsigned long paravirt_get_debugreg(int reg) |
625 | { | 628 | { |
626 | return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); | 629 | return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); |
627 | } | 630 | } |
628 | #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) | 631 | #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) |
629 | static inline void set_debugreg(unsigned long val, int reg) | 632 | static inline void set_debugreg(unsigned long val, int reg) |
630 | { | 633 | { |
631 | PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); | 634 | PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); |
632 | } | 635 | } |
633 | 636 | ||
634 | static inline void clts(void) | 637 | static inline void clts(void) |
635 | { | 638 | { |
636 | PVOP_VCALL0(pv_cpu_ops.clts); | 639 | PVOP_VCALL0(pv_cpu_ops.clts); |
637 | } | 640 | } |
638 | 641 | ||
639 | static inline unsigned long read_cr0(void) | 642 | static inline unsigned long read_cr0(void) |
640 | { | 643 | { |
641 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); | 644 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); |
642 | } | 645 | } |
643 | 646 | ||
644 | static inline void write_cr0(unsigned long x) | 647 | static inline void write_cr0(unsigned long x) |
645 | { | 648 | { |
646 | PVOP_VCALL1(pv_cpu_ops.write_cr0, x); | 649 | PVOP_VCALL1(pv_cpu_ops.write_cr0, x); |
647 | } | 650 | } |
648 | 651 | ||
649 | static inline unsigned long read_cr2(void) | 652 | static inline unsigned long read_cr2(void) |
650 | { | 653 | { |
651 | return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); | 654 | return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); |
652 | } | 655 | } |
653 | 656 | ||
654 | static inline void write_cr2(unsigned long x) | 657 | static inline void write_cr2(unsigned long x) |
655 | { | 658 | { |
656 | PVOP_VCALL1(pv_mmu_ops.write_cr2, x); | 659 | PVOP_VCALL1(pv_mmu_ops.write_cr2, x); |
657 | } | 660 | } |
658 | 661 | ||
659 | static inline unsigned long read_cr3(void) | 662 | static inline unsigned long read_cr3(void) |
660 | { | 663 | { |
661 | return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); | 664 | return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); |
662 | } | 665 | } |
663 | 666 | ||
664 | static inline void write_cr3(unsigned long x) | 667 | static inline void write_cr3(unsigned long x) |
665 | { | 668 | { |
666 | PVOP_VCALL1(pv_mmu_ops.write_cr3, x); | 669 | PVOP_VCALL1(pv_mmu_ops.write_cr3, x); |
667 | } | 670 | } |
668 | 671 | ||
669 | static inline unsigned long read_cr4(void) | 672 | static inline unsigned long read_cr4(void) |
670 | { | 673 | { |
671 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); | 674 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); |
672 | } | 675 | } |
673 | static inline unsigned long read_cr4_safe(void) | 676 | static inline unsigned long read_cr4_safe(void) |
674 | { | 677 | { |
675 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); | 678 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); |
676 | } | 679 | } |
677 | 680 | ||
678 | static inline void write_cr4(unsigned long x) | 681 | static inline void write_cr4(unsigned long x) |
679 | { | 682 | { |
680 | PVOP_VCALL1(pv_cpu_ops.write_cr4, x); | 683 | PVOP_VCALL1(pv_cpu_ops.write_cr4, x); |
681 | } | 684 | } |
682 | 685 | ||
683 | #ifdef CONFIG_X86_64 | 686 | #ifdef CONFIG_X86_64 |
684 | static inline unsigned long read_cr8(void) | 687 | static inline unsigned long read_cr8(void) |
685 | { | 688 | { |
686 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); | 689 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); |
687 | } | 690 | } |
688 | 691 | ||
689 | static inline void write_cr8(unsigned long x) | 692 | static inline void write_cr8(unsigned long x) |
690 | { | 693 | { |
691 | PVOP_VCALL1(pv_cpu_ops.write_cr8, x); | 694 | PVOP_VCALL1(pv_cpu_ops.write_cr8, x); |
692 | } | 695 | } |
693 | #endif | 696 | #endif |
694 | 697 | ||
695 | static inline void raw_safe_halt(void) | 698 | static inline void raw_safe_halt(void) |
696 | { | 699 | { |
697 | PVOP_VCALL0(pv_irq_ops.safe_halt); | 700 | PVOP_VCALL0(pv_irq_ops.safe_halt); |
698 | } | 701 | } |
699 | 702 | ||
700 | static inline void halt(void) | 703 | static inline void halt(void) |
701 | { | 704 | { |
702 | PVOP_VCALL0(pv_irq_ops.safe_halt); | 705 | PVOP_VCALL0(pv_irq_ops.safe_halt); |
703 | } | 706 | } |
704 | 707 | ||
705 | static inline void wbinvd(void) | 708 | static inline void wbinvd(void) |
706 | { | 709 | { |
707 | PVOP_VCALL0(pv_cpu_ops.wbinvd); | 710 | PVOP_VCALL0(pv_cpu_ops.wbinvd); |
708 | } | 711 | } |
709 | 712 | ||
710 | #define get_kernel_rpl() (pv_info.kernel_rpl) | 713 | #define get_kernel_rpl() (pv_info.kernel_rpl) |
711 | 714 | ||
712 | static inline u64 paravirt_read_msr(unsigned msr, int *err) | 715 | static inline u64 paravirt_read_msr(unsigned msr, int *err) |
713 | { | 716 | { |
714 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); | 717 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); |
715 | } | 718 | } |
716 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) | 719 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) |
717 | { | 720 | { |
718 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); | 721 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); |
719 | } | 722 | } |
720 | 723 | ||
721 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ | 724 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ |
722 | #define rdmsr(msr, val1, val2) \ | 725 | #define rdmsr(msr, val1, val2) \ |
723 | do { \ | 726 | do { \ |
724 | int _err; \ | 727 | int _err; \ |
725 | u64 _l = paravirt_read_msr(msr, &_err); \ | 728 | u64 _l = paravirt_read_msr(msr, &_err); \ |
726 | val1 = (u32)_l; \ | 729 | val1 = (u32)_l; \ |
727 | val2 = _l >> 32; \ | 730 | val2 = _l >> 32; \ |
728 | } while (0) | 731 | } while (0) |
729 | 732 | ||
730 | #define wrmsr(msr, val1, val2) \ | 733 | #define wrmsr(msr, val1, val2) \ |
731 | do { \ | 734 | do { \ |
732 | paravirt_write_msr(msr, val1, val2); \ | 735 | paravirt_write_msr(msr, val1, val2); \ |
733 | } while (0) | 736 | } while (0) |
734 | 737 | ||
735 | #define rdmsrl(msr, val) \ | 738 | #define rdmsrl(msr, val) \ |
736 | do { \ | 739 | do { \ |
737 | int _err; \ | 740 | int _err; \ |
738 | val = paravirt_read_msr(msr, &_err); \ | 741 | val = paravirt_read_msr(msr, &_err); \ |
739 | } while (0) | 742 | } while (0) |
740 | 743 | ||
741 | #define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) | 744 | #define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) |
742 | #define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) | 745 | #define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) |
743 | 746 | ||
744 | /* rdmsr with exception handling */ | 747 | /* rdmsr with exception handling */ |
745 | #define rdmsr_safe(msr, a, b) \ | 748 | #define rdmsr_safe(msr, a, b) \ |
746 | ({ \ | 749 | ({ \ |
747 | int _err; \ | 750 | int _err; \ |
748 | u64 _l = paravirt_read_msr(msr, &_err); \ | 751 | u64 _l = paravirt_read_msr(msr, &_err); \ |
749 | (*a) = (u32)_l; \ | 752 | (*a) = (u32)_l; \ |
750 | (*b) = _l >> 32; \ | 753 | (*b) = _l >> 32; \ |
751 | _err; \ | 754 | _err; \ |
752 | }) | 755 | }) |
753 | 756 | ||
754 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | 757 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) |
755 | { | 758 | { |
756 | int err; | 759 | int err; |
757 | 760 | ||
758 | *p = paravirt_read_msr(msr, &err); | 761 | *p = paravirt_read_msr(msr, &err); |
759 | return err; | 762 | return err; |
760 | } | 763 | } |
761 | 764 | ||
762 | static inline u64 paravirt_read_tsc(void) | 765 | static inline u64 paravirt_read_tsc(void) |
763 | { | 766 | { |
764 | return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); | 767 | return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); |
765 | } | 768 | } |
766 | 769 | ||
767 | #define rdtscl(low) \ | 770 | #define rdtscl(low) \ |
768 | do { \ | 771 | do { \ |
769 | u64 _l = paravirt_read_tsc(); \ | 772 | u64 _l = paravirt_read_tsc(); \ |
770 | low = (int)_l; \ | 773 | low = (int)_l; \ |
771 | } while (0) | 774 | } while (0) |
772 | 775 | ||
773 | #define rdtscll(val) (val = paravirt_read_tsc()) | 776 | #define rdtscll(val) (val = paravirt_read_tsc()) |
774 | 777 | ||
775 | static inline unsigned long long paravirt_sched_clock(void) | 778 | static inline unsigned long long paravirt_sched_clock(void) |
776 | { | 779 | { |
777 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); | 780 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); |
778 | } | 781 | } |
779 | #define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) | 782 | #define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) |
780 | 783 | ||
781 | static inline unsigned long long paravirt_read_pmc(int counter) | 784 | static inline unsigned long long paravirt_read_pmc(int counter) |
782 | { | 785 | { |
783 | return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); | 786 | return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); |
784 | } | 787 | } |
785 | 788 | ||
786 | #define rdpmc(counter, low, high) \ | 789 | #define rdpmc(counter, low, high) \ |
787 | do { \ | 790 | do { \ |
788 | u64 _l = paravirt_read_pmc(counter); \ | 791 | u64 _l = paravirt_read_pmc(counter); \ |
789 | low = (u32)_l; \ | 792 | low = (u32)_l; \ |
790 | high = _l >> 32; \ | 793 | high = _l >> 32; \ |
791 | } while (0) | 794 | } while (0) |
792 | 795 | ||
793 | static inline unsigned long long paravirt_rdtscp(unsigned int *aux) | 796 | static inline unsigned long long paravirt_rdtscp(unsigned int *aux) |
794 | { | 797 | { |
795 | return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); | 798 | return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); |
796 | } | 799 | } |
797 | 800 | ||
798 | #define rdtscp(low, high, aux) \ | 801 | #define rdtscp(low, high, aux) \ |
799 | do { \ | 802 | do { \ |
800 | int __aux; \ | 803 | int __aux; \ |
801 | unsigned long __val = paravirt_rdtscp(&__aux); \ | 804 | unsigned long __val = paravirt_rdtscp(&__aux); \ |
802 | (low) = (u32)__val; \ | 805 | (low) = (u32)__val; \ |
803 | (high) = (u32)(__val >> 32); \ | 806 | (high) = (u32)(__val >> 32); \ |
804 | (aux) = __aux; \ | 807 | (aux) = __aux; \ |
805 | } while (0) | 808 | } while (0) |
806 | 809 | ||
807 | #define rdtscpll(val, aux) \ | 810 | #define rdtscpll(val, aux) \ |
808 | do { \ | 811 | do { \ |
809 | unsigned long __aux; \ | 812 | unsigned long __aux; \ |
810 | val = paravirt_rdtscp(&__aux); \ | 813 | val = paravirt_rdtscp(&__aux); \ |
811 | (aux) = __aux; \ | 814 | (aux) = __aux; \ |
812 | } while (0) | 815 | } while (0) |
813 | 816 | ||
814 | static inline void load_TR_desc(void) | 817 | static inline void load_TR_desc(void) |
815 | { | 818 | { |
816 | PVOP_VCALL0(pv_cpu_ops.load_tr_desc); | 819 | PVOP_VCALL0(pv_cpu_ops.load_tr_desc); |
817 | } | 820 | } |
818 | static inline void load_gdt(const struct desc_ptr *dtr) | 821 | static inline void load_gdt(const struct desc_ptr *dtr) |
819 | { | 822 | { |
820 | PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); | 823 | PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); |
821 | } | 824 | } |
822 | static inline void load_idt(const struct desc_ptr *dtr) | 825 | static inline void load_idt(const struct desc_ptr *dtr) |
823 | { | 826 | { |
824 | PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); | 827 | PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); |
825 | } | 828 | } |
826 | static inline void set_ldt(const void *addr, unsigned entries) | 829 | static inline void set_ldt(const void *addr, unsigned entries) |
827 | { | 830 | { |
828 | PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); | 831 | PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); |
829 | } | 832 | } |
830 | static inline void store_gdt(struct desc_ptr *dtr) | 833 | static inline void store_gdt(struct desc_ptr *dtr) |
831 | { | 834 | { |
832 | PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); | 835 | PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); |
833 | } | 836 | } |
834 | static inline void store_idt(struct desc_ptr *dtr) | 837 | static inline void store_idt(struct desc_ptr *dtr) |
835 | { | 838 | { |
836 | PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); | 839 | PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); |
837 | } | 840 | } |
838 | static inline unsigned long paravirt_store_tr(void) | 841 | static inline unsigned long paravirt_store_tr(void) |
839 | { | 842 | { |
840 | return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); | 843 | return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); |
841 | } | 844 | } |
842 | #define store_tr(tr) ((tr) = paravirt_store_tr()) | 845 | #define store_tr(tr) ((tr) = paravirt_store_tr()) |
843 | static inline void load_TLS(struct thread_struct *t, unsigned cpu) | 846 | static inline void load_TLS(struct thread_struct *t, unsigned cpu) |
844 | { | 847 | { |
845 | PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); | 848 | PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); |
846 | } | 849 | } |
850 | |||
851 | #ifdef CONFIG_X86_64 | ||
852 | static inline void load_gs_index(unsigned int gs) | ||
853 | { | ||
854 | PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs); | ||
855 | } | ||
856 | #endif | ||
847 | 857 | ||
848 | static inline void write_ldt_entry(struct desc_struct *dt, int entry, | 858 | static inline void write_ldt_entry(struct desc_struct *dt, int entry, |
849 | const void *desc) | 859 | const void *desc) |
850 | { | 860 | { |
851 | PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc); | 861 | PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc); |
852 | } | 862 | } |
853 | 863 | ||
854 | static inline void write_gdt_entry(struct desc_struct *dt, int entry, | 864 | static inline void write_gdt_entry(struct desc_struct *dt, int entry, |
855 | void *desc, int type) | 865 | void *desc, int type) |
856 | { | 866 | { |
857 | PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type); | 867 | PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type); |
858 | } | 868 | } |
859 | 869 | ||
860 | static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) | 870 | static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) |
861 | { | 871 | { |
862 | PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); | 872 | PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); |
863 | } | 873 | } |
864 | static inline void set_iopl_mask(unsigned mask) | 874 | static inline void set_iopl_mask(unsigned mask) |
865 | { | 875 | { |
866 | PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); | 876 | PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); |
867 | } | 877 | } |
868 | 878 | ||
869 | /* The paravirtualized I/O functions */ | 879 | /* The paravirtualized I/O functions */ |
870 | static inline void slow_down_io(void) | 880 | static inline void slow_down_io(void) |
871 | { | 881 | { |
872 | pv_cpu_ops.io_delay(); | 882 | pv_cpu_ops.io_delay(); |
873 | #ifdef REALLY_SLOW_IO | 883 | #ifdef REALLY_SLOW_IO |
874 | pv_cpu_ops.io_delay(); | 884 | pv_cpu_ops.io_delay(); |
875 | pv_cpu_ops.io_delay(); | 885 | pv_cpu_ops.io_delay(); |
876 | pv_cpu_ops.io_delay(); | 886 | pv_cpu_ops.io_delay(); |
877 | #endif | 887 | #endif |
878 | } | 888 | } |
879 | 889 | ||
880 | #ifdef CONFIG_X86_LOCAL_APIC | 890 | #ifdef CONFIG_X86_LOCAL_APIC |
881 | /* | 891 | /* |
882 | * Basic functions accessing APICs. | 892 | * Basic functions accessing APICs. |
883 | */ | 893 | */ |
884 | static inline void apic_write(unsigned long reg, u32 v) | 894 | static inline void apic_write(unsigned long reg, u32 v) |
885 | { | 895 | { |
886 | PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); | 896 | PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); |
887 | } | 897 | } |
888 | 898 | ||
889 | static inline void apic_write_atomic(unsigned long reg, u32 v) | 899 | static inline void apic_write_atomic(unsigned long reg, u32 v) |
890 | { | 900 | { |
891 | PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); | 901 | PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); |
892 | } | 902 | } |
893 | 903 | ||
894 | static inline u32 apic_read(unsigned long reg) | 904 | static inline u32 apic_read(unsigned long reg) |
895 | { | 905 | { |
896 | return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); | 906 | return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); |
897 | } | 907 | } |
898 | 908 | ||
899 | static inline void setup_boot_clock(void) | 909 | static inline void setup_boot_clock(void) |
900 | { | 910 | { |
901 | PVOP_VCALL0(pv_apic_ops.setup_boot_clock); | 911 | PVOP_VCALL0(pv_apic_ops.setup_boot_clock); |
902 | } | 912 | } |
903 | 913 | ||
904 | static inline void setup_secondary_clock(void) | 914 | static inline void setup_secondary_clock(void) |
905 | { | 915 | { |
906 | PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); | 916 | PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); |
907 | } | 917 | } |
908 | #endif | 918 | #endif |
909 | 919 | ||
910 | static inline void paravirt_post_allocator_init(void) | 920 | static inline void paravirt_post_allocator_init(void) |
911 | { | 921 | { |
912 | if (pv_init_ops.post_allocator_init) | 922 | if (pv_init_ops.post_allocator_init) |
913 | (*pv_init_ops.post_allocator_init)(); | 923 | (*pv_init_ops.post_allocator_init)(); |
914 | } | 924 | } |
915 | 925 | ||
916 | static inline void paravirt_pagetable_setup_start(pgd_t *base) | 926 | static inline void paravirt_pagetable_setup_start(pgd_t *base) |
917 | { | 927 | { |
918 | (*pv_mmu_ops.pagetable_setup_start)(base); | 928 | (*pv_mmu_ops.pagetable_setup_start)(base); |
919 | } | 929 | } |
920 | 930 | ||
921 | static inline void paravirt_pagetable_setup_done(pgd_t *base) | 931 | static inline void paravirt_pagetable_setup_done(pgd_t *base) |
922 | { | 932 | { |
923 | (*pv_mmu_ops.pagetable_setup_done)(base); | 933 | (*pv_mmu_ops.pagetable_setup_done)(base); |
924 | } | 934 | } |
925 | 935 | ||
926 | #ifdef CONFIG_SMP | 936 | #ifdef CONFIG_SMP |
927 | static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, | 937 | static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, |
928 | unsigned long start_esp) | 938 | unsigned long start_esp) |
929 | { | 939 | { |
930 | PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, | 940 | PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, |
931 | phys_apicid, start_eip, start_esp); | 941 | phys_apicid, start_eip, start_esp); |
932 | } | 942 | } |
933 | #endif | 943 | #endif |
934 | 944 | ||
935 | static inline void paravirt_activate_mm(struct mm_struct *prev, | 945 | static inline void paravirt_activate_mm(struct mm_struct *prev, |
936 | struct mm_struct *next) | 946 | struct mm_struct *next) |
937 | { | 947 | { |
938 | PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); | 948 | PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); |
939 | } | 949 | } |
940 | 950 | ||
941 | static inline void arch_dup_mmap(struct mm_struct *oldmm, | 951 | static inline void arch_dup_mmap(struct mm_struct *oldmm, |
942 | struct mm_struct *mm) | 952 | struct mm_struct *mm) |
943 | { | 953 | { |
944 | PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); | 954 | PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); |
945 | } | 955 | } |
946 | 956 | ||
947 | static inline void arch_exit_mmap(struct mm_struct *mm) | 957 | static inline void arch_exit_mmap(struct mm_struct *mm) |
948 | { | 958 | { |
949 | PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); | 959 | PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); |
950 | } | 960 | } |
951 | 961 | ||
952 | static inline void __flush_tlb(void) | 962 | static inline void __flush_tlb(void) |
953 | { | 963 | { |
954 | PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); | 964 | PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); |
955 | } | 965 | } |
956 | static inline void __flush_tlb_global(void) | 966 | static inline void __flush_tlb_global(void) |
957 | { | 967 | { |
958 | PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); | 968 | PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); |
959 | } | 969 | } |
960 | static inline void __flush_tlb_single(unsigned long addr) | 970 | static inline void __flush_tlb_single(unsigned long addr) |
961 | { | 971 | { |
962 | PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); | 972 | PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); |
963 | } | 973 | } |
964 | 974 | ||
965 | static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | 975 | static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, |
966 | unsigned long va) | 976 | unsigned long va) |
967 | { | 977 | { |
968 | PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); | 978 | PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); |
969 | } | 979 | } |
970 | 980 | ||
971 | static inline int paravirt_pgd_alloc(struct mm_struct *mm) | 981 | static inline int paravirt_pgd_alloc(struct mm_struct *mm) |
972 | { | 982 | { |
973 | return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); | 983 | return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); |
974 | } | 984 | } |
975 | 985 | ||
976 | static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) | 986 | static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) |
977 | { | 987 | { |
978 | PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); | 988 | PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); |
979 | } | 989 | } |
980 | 990 | ||
981 | static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) | 991 | static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) |
982 | { | 992 | { |
983 | PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); | 993 | PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); |
984 | } | 994 | } |
985 | static inline void paravirt_release_pte(unsigned pfn) | 995 | static inline void paravirt_release_pte(unsigned pfn) |
986 | { | 996 | { |
987 | PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); | 997 | PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); |
988 | } | 998 | } |
989 | 999 | ||
990 | static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn) | 1000 | static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn) |
991 | { | 1001 | { |
992 | PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); | 1002 | PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); |
993 | } | 1003 | } |
994 | 1004 | ||
995 | static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn, | 1005 | static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn, |
996 | unsigned start, unsigned count) | 1006 | unsigned start, unsigned count) |
997 | { | 1007 | { |
998 | PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); | 1008 | PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); |
999 | } | 1009 | } |
1000 | static inline void paravirt_release_pmd(unsigned pfn) | 1010 | static inline void paravirt_release_pmd(unsigned pfn) |
1001 | { | 1011 | { |
1002 | PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); | 1012 | PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); |
1003 | } | 1013 | } |
1004 | 1014 | ||
1005 | static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn) | 1015 | static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn) |
1006 | { | 1016 | { |
1007 | PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); | 1017 | PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); |
1008 | } | 1018 | } |
1009 | static inline void paravirt_release_pud(unsigned pfn) | 1019 | static inline void paravirt_release_pud(unsigned pfn) |
1010 | { | 1020 | { |
1011 | PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); | 1021 | PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); |
1012 | } | 1022 | } |
1013 | 1023 | ||
1014 | #ifdef CONFIG_HIGHPTE | 1024 | #ifdef CONFIG_HIGHPTE |
1015 | static inline void *kmap_atomic_pte(struct page *page, enum km_type type) | 1025 | static inline void *kmap_atomic_pte(struct page *page, enum km_type type) |
1016 | { | 1026 | { |
1017 | unsigned long ret; | 1027 | unsigned long ret; |
1018 | ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); | 1028 | ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); |
1019 | return (void *)ret; | 1029 | return (void *)ret; |
1020 | } | 1030 | } |
1021 | #endif | 1031 | #endif |
1022 | 1032 | ||
1023 | static inline void pte_update(struct mm_struct *mm, unsigned long addr, | 1033 | static inline void pte_update(struct mm_struct *mm, unsigned long addr, |
1024 | pte_t *ptep) | 1034 | pte_t *ptep) |
1025 | { | 1035 | { |
1026 | PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); | 1036 | PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); |
1027 | } | 1037 | } |
1028 | 1038 | ||
1029 | static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, | 1039 | static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, |
1030 | pte_t *ptep) | 1040 | pte_t *ptep) |
1031 | { | 1041 | { |
1032 | PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); | 1042 | PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); |
1033 | } | 1043 | } |
1034 | 1044 | ||
1035 | static inline pte_t __pte(pteval_t val) | 1045 | static inline pte_t __pte(pteval_t val) |
1036 | { | 1046 | { |
1037 | pteval_t ret; | 1047 | pteval_t ret; |
1038 | 1048 | ||
1039 | if (sizeof(pteval_t) > sizeof(long)) | 1049 | if (sizeof(pteval_t) > sizeof(long)) |
1040 | ret = PVOP_CALL2(pteval_t, | 1050 | ret = PVOP_CALL2(pteval_t, |
1041 | pv_mmu_ops.make_pte, | 1051 | pv_mmu_ops.make_pte, |
1042 | val, (u64)val >> 32); | 1052 | val, (u64)val >> 32); |
1043 | else | 1053 | else |
1044 | ret = PVOP_CALL1(pteval_t, | 1054 | ret = PVOP_CALL1(pteval_t, |
1045 | pv_mmu_ops.make_pte, | 1055 | pv_mmu_ops.make_pte, |
1046 | val); | 1056 | val); |
1047 | 1057 | ||
1048 | return (pte_t) { .pte = ret }; | 1058 | return (pte_t) { .pte = ret }; |
1049 | } | 1059 | } |
1050 | 1060 | ||
1051 | static inline pteval_t pte_val(pte_t pte) | 1061 | static inline pteval_t pte_val(pte_t pte) |
1052 | { | 1062 | { |
1053 | pteval_t ret; | 1063 | pteval_t ret; |
1054 | 1064 | ||
1055 | if (sizeof(pteval_t) > sizeof(long)) | 1065 | if (sizeof(pteval_t) > sizeof(long)) |
1056 | ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, | 1066 | ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, |
1057 | pte.pte, (u64)pte.pte >> 32); | 1067 | pte.pte, (u64)pte.pte >> 32); |
1058 | else | 1068 | else |
1059 | ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, | 1069 | ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, |
1060 | pte.pte); | 1070 | pte.pte); |
1061 | 1071 | ||
1062 | return ret; | 1072 | return ret; |
1063 | } | 1073 | } |
1064 | 1074 | ||
1065 | static inline pteval_t pte_flags(pte_t pte) | 1075 | static inline pteval_t pte_flags(pte_t pte) |
1066 | { | 1076 | { |
1067 | pteval_t ret; | 1077 | pteval_t ret; |
1068 | 1078 | ||
1069 | if (sizeof(pteval_t) > sizeof(long)) | 1079 | if (sizeof(pteval_t) > sizeof(long)) |
1070 | ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, | 1080 | ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, |
1071 | pte.pte, (u64)pte.pte >> 32); | 1081 | pte.pte, (u64)pte.pte >> 32); |
1072 | else | 1082 | else |
1073 | ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, | 1083 | ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, |
1074 | pte.pte); | 1084 | pte.pte); |
1075 | 1085 | ||
1076 | return ret; | 1086 | return ret; |
1077 | } | 1087 | } |
1078 | 1088 | ||
1079 | static inline pgd_t __pgd(pgdval_t val) | 1089 | static inline pgd_t __pgd(pgdval_t val) |
1080 | { | 1090 | { |
1081 | pgdval_t ret; | 1091 | pgdval_t ret; |
1082 | 1092 | ||
1083 | if (sizeof(pgdval_t) > sizeof(long)) | 1093 | if (sizeof(pgdval_t) > sizeof(long)) |
1084 | ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, | 1094 | ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, |
1085 | val, (u64)val >> 32); | 1095 | val, (u64)val >> 32); |
1086 | else | 1096 | else |
1087 | ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, | 1097 | ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, |
1088 | val); | 1098 | val); |
1089 | 1099 | ||
1090 | return (pgd_t) { ret }; | 1100 | return (pgd_t) { ret }; |
1091 | } | 1101 | } |
1092 | 1102 | ||
1093 | static inline pgdval_t pgd_val(pgd_t pgd) | 1103 | static inline pgdval_t pgd_val(pgd_t pgd) |
1094 | { | 1104 | { |
1095 | pgdval_t ret; | 1105 | pgdval_t ret; |
1096 | 1106 | ||
1097 | if (sizeof(pgdval_t) > sizeof(long)) | 1107 | if (sizeof(pgdval_t) > sizeof(long)) |
1098 | ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, | 1108 | ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, |
1099 | pgd.pgd, (u64)pgd.pgd >> 32); | 1109 | pgd.pgd, (u64)pgd.pgd >> 32); |
1100 | else | 1110 | else |
1101 | ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, | 1111 | ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, |
1102 | pgd.pgd); | 1112 | pgd.pgd); |
1103 | 1113 | ||
1104 | return ret; | 1114 | return ret; |
1105 | } | 1115 | } |
1106 | 1116 | ||
1107 | #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION | 1117 | #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION |
1108 | static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, | 1118 | static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, |
1109 | pte_t *ptep) | 1119 | pte_t *ptep) |
1110 | { | 1120 | { |
1111 | pteval_t ret; | 1121 | pteval_t ret; |
1112 | 1122 | ||
1113 | ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, | 1123 | ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, |
1114 | mm, addr, ptep); | 1124 | mm, addr, ptep); |
1115 | 1125 | ||
1116 | return (pte_t) { .pte = ret }; | 1126 | return (pte_t) { .pte = ret }; |
1117 | } | 1127 | } |
1118 | 1128 | ||
1119 | static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | 1129 | static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, |
1120 | pte_t *ptep, pte_t pte) | 1130 | pte_t *ptep, pte_t pte) |
1121 | { | 1131 | { |
1122 | if (sizeof(pteval_t) > sizeof(long)) | 1132 | if (sizeof(pteval_t) > sizeof(long)) |
1123 | /* 5 arg words */ | 1133 | /* 5 arg words */ |
1124 | pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte); | 1134 | pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte); |
1125 | else | 1135 | else |
1126 | PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit, | 1136 | PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit, |
1127 | mm, addr, ptep, pte.pte); | 1137 | mm, addr, ptep, pte.pte); |
1128 | } | 1138 | } |
1129 | 1139 | ||
1130 | static inline void set_pte(pte_t *ptep, pte_t pte) | 1140 | static inline void set_pte(pte_t *ptep, pte_t pte) |
1131 | { | 1141 | { |
1132 | if (sizeof(pteval_t) > sizeof(long)) | 1142 | if (sizeof(pteval_t) > sizeof(long)) |
1133 | PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, | 1143 | PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, |
1134 | pte.pte, (u64)pte.pte >> 32); | 1144 | pte.pte, (u64)pte.pte >> 32); |
1135 | else | 1145 | else |
1136 | PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, | 1146 | PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, |
1137 | pte.pte); | 1147 | pte.pte); |
1138 | } | 1148 | } |
1139 | 1149 | ||
1140 | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | 1150 | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, |
1141 | pte_t *ptep, pte_t pte) | 1151 | pte_t *ptep, pte_t pte) |
1142 | { | 1152 | { |
1143 | if (sizeof(pteval_t) > sizeof(long)) | 1153 | if (sizeof(pteval_t) > sizeof(long)) |
1144 | /* 5 arg words */ | 1154 | /* 5 arg words */ |
1145 | pv_mmu_ops.set_pte_at(mm, addr, ptep, pte); | 1155 | pv_mmu_ops.set_pte_at(mm, addr, ptep, pte); |
1146 | else | 1156 | else |
1147 | PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); | 1157 | PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); |
1148 | } | 1158 | } |
1149 | 1159 | ||
1150 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | 1160 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) |
1151 | { | 1161 | { |
1152 | pmdval_t val = native_pmd_val(pmd); | 1162 | pmdval_t val = native_pmd_val(pmd); |
1153 | 1163 | ||
1154 | if (sizeof(pmdval_t) > sizeof(long)) | 1164 | if (sizeof(pmdval_t) > sizeof(long)) |
1155 | PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32); | 1165 | PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32); |
1156 | else | 1166 | else |
1157 | PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); | 1167 | PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); |
1158 | } | 1168 | } |
1159 | 1169 | ||
1160 | #if PAGETABLE_LEVELS >= 3 | 1170 | #if PAGETABLE_LEVELS >= 3 |
1161 | static inline pmd_t __pmd(pmdval_t val) | 1171 | static inline pmd_t __pmd(pmdval_t val) |
1162 | { | 1172 | { |
1163 | pmdval_t ret; | 1173 | pmdval_t ret; |
1164 | 1174 | ||
1165 | if (sizeof(pmdval_t) > sizeof(long)) | 1175 | if (sizeof(pmdval_t) > sizeof(long)) |
1166 | ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, | 1176 | ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, |
1167 | val, (u64)val >> 32); | 1177 | val, (u64)val >> 32); |
1168 | else | 1178 | else |
1169 | ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, | 1179 | ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, |
1170 | val); | 1180 | val); |
1171 | 1181 | ||
1172 | return (pmd_t) { ret }; | 1182 | return (pmd_t) { ret }; |
1173 | } | 1183 | } |
1174 | 1184 | ||
1175 | static inline pmdval_t pmd_val(pmd_t pmd) | 1185 | static inline pmdval_t pmd_val(pmd_t pmd) |
1176 | { | 1186 | { |
1177 | pmdval_t ret; | 1187 | pmdval_t ret; |
1178 | 1188 | ||
1179 | if (sizeof(pmdval_t) > sizeof(long)) | 1189 | if (sizeof(pmdval_t) > sizeof(long)) |
1180 | ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, | 1190 | ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, |
1181 | pmd.pmd, (u64)pmd.pmd >> 32); | 1191 | pmd.pmd, (u64)pmd.pmd >> 32); |
1182 | else | 1192 | else |
1183 | ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, | 1193 | ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, |
1184 | pmd.pmd); | 1194 | pmd.pmd); |
1185 | 1195 | ||
1186 | return ret; | 1196 | return ret; |
1187 | } | 1197 | } |
1188 | 1198 | ||
1189 | static inline void set_pud(pud_t *pudp, pud_t pud) | 1199 | static inline void set_pud(pud_t *pudp, pud_t pud) |
1190 | { | 1200 | { |
1191 | pudval_t val = native_pud_val(pud); | 1201 | pudval_t val = native_pud_val(pud); |
1192 | 1202 | ||
1193 | if (sizeof(pudval_t) > sizeof(long)) | 1203 | if (sizeof(pudval_t) > sizeof(long)) |
1194 | PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, | 1204 | PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, |
1195 | val, (u64)val >> 32); | 1205 | val, (u64)val >> 32); |
1196 | else | 1206 | else |
1197 | PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, | 1207 | PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, |
1198 | val); | 1208 | val); |
1199 | } | 1209 | } |
1200 | #if PAGETABLE_LEVELS == 4 | 1210 | #if PAGETABLE_LEVELS == 4 |
1201 | static inline pud_t __pud(pudval_t val) | 1211 | static inline pud_t __pud(pudval_t val) |
1202 | { | 1212 | { |
1203 | pudval_t ret; | 1213 | pudval_t ret; |
1204 | 1214 | ||
1205 | if (sizeof(pudval_t) > sizeof(long)) | 1215 | if (sizeof(pudval_t) > sizeof(long)) |
1206 | ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, | 1216 | ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, |
1207 | val, (u64)val >> 32); | 1217 | val, (u64)val >> 32); |
1208 | else | 1218 | else |
1209 | ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, | 1219 | ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, |
1210 | val); | 1220 | val); |
1211 | 1221 | ||
1212 | return (pud_t) { ret }; | 1222 | return (pud_t) { ret }; |
1213 | } | 1223 | } |
1214 | 1224 | ||
1215 | static inline pudval_t pud_val(pud_t pud) | 1225 | static inline pudval_t pud_val(pud_t pud) |
1216 | { | 1226 | { |
1217 | pudval_t ret; | 1227 | pudval_t ret; |
1218 | 1228 | ||
1219 | if (sizeof(pudval_t) > sizeof(long)) | 1229 | if (sizeof(pudval_t) > sizeof(long)) |
1220 | ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, | 1230 | ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, |
1221 | pud.pud, (u64)pud.pud >> 32); | 1231 | pud.pud, (u64)pud.pud >> 32); |
1222 | else | 1232 | else |
1223 | ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, | 1233 | ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, |
1224 | pud.pud); | 1234 | pud.pud); |
1225 | 1235 | ||
1226 | return ret; | 1236 | return ret; |
1227 | } | 1237 | } |
1228 | 1238 | ||
1229 | static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) | 1239 | static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) |
1230 | { | 1240 | { |
1231 | pgdval_t val = native_pgd_val(pgd); | 1241 | pgdval_t val = native_pgd_val(pgd); |
1232 | 1242 | ||
1233 | if (sizeof(pgdval_t) > sizeof(long)) | 1243 | if (sizeof(pgdval_t) > sizeof(long)) |
1234 | PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp, | 1244 | PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp, |
1235 | val, (u64)val >> 32); | 1245 | val, (u64)val >> 32); |
1236 | else | 1246 | else |
1237 | PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, | 1247 | PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, |
1238 | val); | 1248 | val); |
1239 | } | 1249 | } |
1240 | 1250 | ||
1241 | static inline void pgd_clear(pgd_t *pgdp) | 1251 | static inline void pgd_clear(pgd_t *pgdp) |
1242 | { | 1252 | { |
1243 | set_pgd(pgdp, __pgd(0)); | 1253 | set_pgd(pgdp, __pgd(0)); |
1244 | } | 1254 | } |
1245 | 1255 | ||
1246 | static inline void pud_clear(pud_t *pudp) | 1256 | static inline void pud_clear(pud_t *pudp) |
1247 | { | 1257 | { |
1248 | set_pud(pudp, __pud(0)); | 1258 | set_pud(pudp, __pud(0)); |
1249 | } | 1259 | } |
1250 | 1260 | ||
1251 | #endif /* PAGETABLE_LEVELS == 4 */ | 1261 | #endif /* PAGETABLE_LEVELS == 4 */ |
1252 | 1262 | ||
1253 | #endif /* PAGETABLE_LEVELS >= 3 */ | 1263 | #endif /* PAGETABLE_LEVELS >= 3 */ |
1254 | 1264 | ||
1255 | #ifdef CONFIG_X86_PAE | 1265 | #ifdef CONFIG_X86_PAE |
1256 | /* Special-case pte-setting operations for PAE, which can't update a | 1266 | /* Special-case pte-setting operations for PAE, which can't update a |
1257 | 64-bit pte atomically */ | 1267 | 64-bit pte atomically */ |
1258 | static inline void set_pte_atomic(pte_t *ptep, pte_t pte) | 1268 | static inline void set_pte_atomic(pte_t *ptep, pte_t pte) |
1259 | { | 1269 | { |
1260 | PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, | 1270 | PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, |
1261 | pte.pte, pte.pte >> 32); | 1271 | pte.pte, pte.pte >> 32); |
1262 | } | 1272 | } |
1263 | 1273 | ||
1264 | static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, | 1274 | static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, |
1265 | pte_t *ptep, pte_t pte) | 1275 | pte_t *ptep, pte_t pte) |
1266 | { | 1276 | { |
1267 | /* 5 arg words */ | 1277 | /* 5 arg words */ |
1268 | pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); | 1278 | pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); |
1269 | } | 1279 | } |
1270 | 1280 | ||
1271 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, | 1281 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, |
1272 | pte_t *ptep) | 1282 | pte_t *ptep) |
1273 | { | 1283 | { |
1274 | PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); | 1284 | PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); |
1275 | } | 1285 | } |
1276 | 1286 | ||
1277 | static inline void pmd_clear(pmd_t *pmdp) | 1287 | static inline void pmd_clear(pmd_t *pmdp) |
1278 | { | 1288 | { |
1279 | PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); | 1289 | PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); |
1280 | } | 1290 | } |
1281 | #else /* !CONFIG_X86_PAE */ | 1291 | #else /* !CONFIG_X86_PAE */ |
1282 | static inline void set_pte_atomic(pte_t *ptep, pte_t pte) | 1292 | static inline void set_pte_atomic(pte_t *ptep, pte_t pte) |
1283 | { | 1293 | { |
1284 | set_pte(ptep, pte); | 1294 | set_pte(ptep, pte); |
1285 | } | 1295 | } |
1286 | 1296 | ||
1287 | static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, | 1297 | static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, |
1288 | pte_t *ptep, pte_t pte) | 1298 | pte_t *ptep, pte_t pte) |
1289 | { | 1299 | { |
1290 | set_pte(ptep, pte); | 1300 | set_pte(ptep, pte); |
1291 | } | 1301 | } |
1292 | 1302 | ||
1293 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, | 1303 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, |
1294 | pte_t *ptep) | 1304 | pte_t *ptep) |
1295 | { | 1305 | { |
1296 | set_pte_at(mm, addr, ptep, __pte(0)); | 1306 | set_pte_at(mm, addr, ptep, __pte(0)); |
1297 | } | 1307 | } |
1298 | 1308 | ||
1299 | static inline void pmd_clear(pmd_t *pmdp) | 1309 | static inline void pmd_clear(pmd_t *pmdp) |
1300 | { | 1310 | { |
1301 | set_pmd(pmdp, __pmd(0)); | 1311 | set_pmd(pmdp, __pmd(0)); |
1302 | } | 1312 | } |
1303 | #endif /* CONFIG_X86_PAE */ | 1313 | #endif /* CONFIG_X86_PAE */ |
1304 | 1314 | ||
1305 | /* Lazy mode for batching updates / context switch */ | 1315 | /* Lazy mode for batching updates / context switch */ |
1306 | enum paravirt_lazy_mode { | 1316 | enum paravirt_lazy_mode { |
1307 | PARAVIRT_LAZY_NONE, | 1317 | PARAVIRT_LAZY_NONE, |
1308 | PARAVIRT_LAZY_MMU, | 1318 | PARAVIRT_LAZY_MMU, |
1309 | PARAVIRT_LAZY_CPU, | 1319 | PARAVIRT_LAZY_CPU, |
1310 | }; | 1320 | }; |
1311 | 1321 | ||
1312 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | 1322 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); |
1313 | void paravirt_enter_lazy_cpu(void); | 1323 | void paravirt_enter_lazy_cpu(void); |
1314 | void paravirt_leave_lazy_cpu(void); | 1324 | void paravirt_leave_lazy_cpu(void); |
1315 | void paravirt_enter_lazy_mmu(void); | 1325 | void paravirt_enter_lazy_mmu(void); |
1316 | void paravirt_leave_lazy_mmu(void); | 1326 | void paravirt_leave_lazy_mmu(void); |
1317 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode); | 1327 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode); |
1318 | 1328 | ||
1319 | #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE | 1329 | #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE |
1320 | static inline void arch_enter_lazy_cpu_mode(void) | 1330 | static inline void arch_enter_lazy_cpu_mode(void) |
1321 | { | 1331 | { |
1322 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); | 1332 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); |
1323 | } | 1333 | } |
1324 | 1334 | ||
1325 | static inline void arch_leave_lazy_cpu_mode(void) | 1335 | static inline void arch_leave_lazy_cpu_mode(void) |
1326 | { | 1336 | { |
1327 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); | 1337 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); |
1328 | } | 1338 | } |
1329 | 1339 | ||
1330 | static inline void arch_flush_lazy_cpu_mode(void) | 1340 | static inline void arch_flush_lazy_cpu_mode(void) |
1331 | { | 1341 | { |
1332 | if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { | 1342 | if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { |
1333 | arch_leave_lazy_cpu_mode(); | 1343 | arch_leave_lazy_cpu_mode(); |
1334 | arch_enter_lazy_cpu_mode(); | 1344 | arch_enter_lazy_cpu_mode(); |
1335 | } | 1345 | } |
1336 | } | 1346 | } |
1337 | 1347 | ||
1338 | 1348 | ||
1339 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE | 1349 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE |
1340 | static inline void arch_enter_lazy_mmu_mode(void) | 1350 | static inline void arch_enter_lazy_mmu_mode(void) |
1341 | { | 1351 | { |
1342 | PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); | 1352 | PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); |
1343 | } | 1353 | } |
1344 | 1354 | ||
1345 | static inline void arch_leave_lazy_mmu_mode(void) | 1355 | static inline void arch_leave_lazy_mmu_mode(void) |
1346 | { | 1356 | { |
1347 | PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); | 1357 | PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); |
1348 | } | 1358 | } |
1349 | 1359 | ||
1350 | static inline void arch_flush_lazy_mmu_mode(void) | 1360 | static inline void arch_flush_lazy_mmu_mode(void) |
1351 | { | 1361 | { |
1352 | if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { | 1362 | if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { |
1353 | arch_leave_lazy_mmu_mode(); | 1363 | arch_leave_lazy_mmu_mode(); |
1354 | arch_enter_lazy_mmu_mode(); | 1364 | arch_enter_lazy_mmu_mode(); |
1355 | } | 1365 | } |
1356 | } | 1366 | } |
1357 | 1367 | ||
1358 | static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | 1368 | static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, |
1359 | unsigned long phys, pgprot_t flags) | 1369 | unsigned long phys, pgprot_t flags) |
1360 | { | 1370 | { |
1361 | pv_mmu_ops.set_fixmap(idx, phys, flags); | 1371 | pv_mmu_ops.set_fixmap(idx, phys, flags); |
1362 | } | 1372 | } |
1363 | 1373 | ||
1364 | void _paravirt_nop(void); | 1374 | void _paravirt_nop(void); |
1365 | #define paravirt_nop ((void *)_paravirt_nop) | 1375 | #define paravirt_nop ((void *)_paravirt_nop) |
1366 | 1376 | ||
1367 | /* These all sit in the .parainstructions section to tell us what to patch. */ | 1377 | /* These all sit in the .parainstructions section to tell us what to patch. */ |
1368 | struct paravirt_patch_site { | 1378 | struct paravirt_patch_site { |
1369 | u8 *instr; /* original instructions */ | 1379 | u8 *instr; /* original instructions */ |
1370 | u8 instrtype; /* type of this instruction */ | 1380 | u8 instrtype; /* type of this instruction */ |
1371 | u8 len; /* length of original instruction */ | 1381 | u8 len; /* length of original instruction */ |
1372 | u16 clobbers; /* what registers you may clobber */ | 1382 | u16 clobbers; /* what registers you may clobber */ |
1373 | }; | 1383 | }; |
1374 | 1384 | ||
1375 | extern struct paravirt_patch_site __parainstructions[], | 1385 | extern struct paravirt_patch_site __parainstructions[], |
1376 | __parainstructions_end[]; | 1386 | __parainstructions_end[]; |
1377 | 1387 | ||
1378 | #ifdef CONFIG_X86_32 | 1388 | #ifdef CONFIG_X86_32 |
1379 | #define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" | 1389 | #define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" |
1380 | #define PV_RESTORE_REGS "popl %%edx; popl %%ecx" | 1390 | #define PV_RESTORE_REGS "popl %%edx; popl %%ecx" |
1381 | #define PV_FLAGS_ARG "0" | 1391 | #define PV_FLAGS_ARG "0" |
1382 | #define PV_EXTRA_CLOBBERS | 1392 | #define PV_EXTRA_CLOBBERS |
1383 | #define PV_VEXTRA_CLOBBERS | 1393 | #define PV_VEXTRA_CLOBBERS |
1384 | #else | 1394 | #else |
1385 | /* We save some registers, but all of them, that's too much. We clobber all | 1395 | /* We save some registers, but all of them, that's too much. We clobber all |
1386 | * caller saved registers but the argument parameter */ | 1396 | * caller saved registers but the argument parameter */ |
1387 | #define PV_SAVE_REGS "pushq %%rdi;" | 1397 | #define PV_SAVE_REGS "pushq %%rdi;" |
1388 | #define PV_RESTORE_REGS "popq %%rdi;" | 1398 | #define PV_RESTORE_REGS "popq %%rdi;" |
1389 | #define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx" | 1399 | #define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx" |
1390 | #define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx" | 1400 | #define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx" |
1391 | #define PV_FLAGS_ARG "D" | 1401 | #define PV_FLAGS_ARG "D" |
1392 | #endif | 1402 | #endif |
1393 | 1403 | ||
1394 | static inline unsigned long __raw_local_save_flags(void) | 1404 | static inline unsigned long __raw_local_save_flags(void) |
1395 | { | 1405 | { |
1396 | unsigned long f; | 1406 | unsigned long f; |
1397 | 1407 | ||
1398 | asm volatile(paravirt_alt(PV_SAVE_REGS | 1408 | asm volatile(paravirt_alt(PV_SAVE_REGS |
1399 | PARAVIRT_CALL | 1409 | PARAVIRT_CALL |
1400 | PV_RESTORE_REGS) | 1410 | PV_RESTORE_REGS) |
1401 | : "=a"(f) | 1411 | : "=a"(f) |
1402 | : paravirt_type(pv_irq_ops.save_fl), | 1412 | : paravirt_type(pv_irq_ops.save_fl), |
1403 | paravirt_clobber(CLBR_EAX) | 1413 | paravirt_clobber(CLBR_EAX) |
1404 | : "memory", "cc" PV_VEXTRA_CLOBBERS); | 1414 | : "memory", "cc" PV_VEXTRA_CLOBBERS); |
1405 | return f; | 1415 | return f; |
1406 | } | 1416 | } |
1407 | 1417 | ||
1408 | static inline void raw_local_irq_restore(unsigned long f) | 1418 | static inline void raw_local_irq_restore(unsigned long f) |
1409 | { | 1419 | { |
1410 | asm volatile(paravirt_alt(PV_SAVE_REGS | 1420 | asm volatile(paravirt_alt(PV_SAVE_REGS |
1411 | PARAVIRT_CALL | 1421 | PARAVIRT_CALL |
1412 | PV_RESTORE_REGS) | 1422 | PV_RESTORE_REGS) |
1413 | : "=a"(f) | 1423 | : "=a"(f) |
1414 | : PV_FLAGS_ARG(f), | 1424 | : PV_FLAGS_ARG(f), |
1415 | paravirt_type(pv_irq_ops.restore_fl), | 1425 | paravirt_type(pv_irq_ops.restore_fl), |
1416 | paravirt_clobber(CLBR_EAX) | 1426 | paravirt_clobber(CLBR_EAX) |
1417 | : "memory", "cc" PV_EXTRA_CLOBBERS); | 1427 | : "memory", "cc" PV_EXTRA_CLOBBERS); |
1418 | } | 1428 | } |
1419 | 1429 | ||
1420 | static inline void raw_local_irq_disable(void) | 1430 | static inline void raw_local_irq_disable(void) |
1421 | { | 1431 | { |
1422 | asm volatile(paravirt_alt(PV_SAVE_REGS | 1432 | asm volatile(paravirt_alt(PV_SAVE_REGS |
1423 | PARAVIRT_CALL | 1433 | PARAVIRT_CALL |
1424 | PV_RESTORE_REGS) | 1434 | PV_RESTORE_REGS) |
1425 | : | 1435 | : |
1426 | : paravirt_type(pv_irq_ops.irq_disable), | 1436 | : paravirt_type(pv_irq_ops.irq_disable), |
1427 | paravirt_clobber(CLBR_EAX) | 1437 | paravirt_clobber(CLBR_EAX) |
1428 | : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); | 1438 | : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); |
1429 | } | 1439 | } |
1430 | 1440 | ||
1431 | static inline void raw_local_irq_enable(void) | 1441 | static inline void raw_local_irq_enable(void) |
1432 | { | 1442 | { |
1433 | asm volatile(paravirt_alt(PV_SAVE_REGS | 1443 | asm volatile(paravirt_alt(PV_SAVE_REGS |
1434 | PARAVIRT_CALL | 1444 | PARAVIRT_CALL |
1435 | PV_RESTORE_REGS) | 1445 | PV_RESTORE_REGS) |
1436 | : | 1446 | : |
1437 | : paravirt_type(pv_irq_ops.irq_enable), | 1447 | : paravirt_type(pv_irq_ops.irq_enable), |
1438 | paravirt_clobber(CLBR_EAX) | 1448 | paravirt_clobber(CLBR_EAX) |
1439 | : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); | 1449 | : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); |
1440 | } | 1450 | } |
1441 | 1451 | ||
1442 | static inline unsigned long __raw_local_irq_save(void) | 1452 | static inline unsigned long __raw_local_irq_save(void) |
1443 | { | 1453 | { |
1444 | unsigned long f; | 1454 | unsigned long f; |
1445 | 1455 | ||
1446 | f = __raw_local_save_flags(); | 1456 | f = __raw_local_save_flags(); |
1447 | raw_local_irq_disable(); | 1457 | raw_local_irq_disable(); |
1448 | return f; | 1458 | return f; |
1449 | } | 1459 | } |
1450 | 1460 | ||
1451 | /* Make sure as little as possible of this mess escapes. */ | 1461 | /* Make sure as little as possible of this mess escapes. */ |
1452 | #undef PARAVIRT_CALL | 1462 | #undef PARAVIRT_CALL |
1453 | #undef __PVOP_CALL | 1463 | #undef __PVOP_CALL |
1454 | #undef __PVOP_VCALL | 1464 | #undef __PVOP_VCALL |
1455 | #undef PVOP_VCALL0 | 1465 | #undef PVOP_VCALL0 |
1456 | #undef PVOP_CALL0 | 1466 | #undef PVOP_CALL0 |
1457 | #undef PVOP_VCALL1 | 1467 | #undef PVOP_VCALL1 |
1458 | #undef PVOP_CALL1 | 1468 | #undef PVOP_CALL1 |
1459 | #undef PVOP_VCALL2 | 1469 | #undef PVOP_VCALL2 |
1460 | #undef PVOP_CALL2 | 1470 | #undef PVOP_CALL2 |
1461 | #undef PVOP_VCALL3 | 1471 | #undef PVOP_VCALL3 |
1462 | #undef PVOP_CALL3 | 1472 | #undef PVOP_CALL3 |
1463 | #undef PVOP_VCALL4 | 1473 | #undef PVOP_VCALL4 |
1464 | #undef PVOP_CALL4 | 1474 | #undef PVOP_CALL4 |
1465 | 1475 | ||
1466 | #else /* __ASSEMBLY__ */ | 1476 | #else /* __ASSEMBLY__ */ |
1467 | 1477 | ||
1468 | #define _PVSITE(ptype, clobbers, ops, word, algn) \ | 1478 | #define _PVSITE(ptype, clobbers, ops, word, algn) \ |
1469 | 771:; \ | 1479 | 771:; \ |
1470 | ops; \ | 1480 | ops; \ |
1471 | 772:; \ | 1481 | 772:; \ |
1472 | .pushsection .parainstructions,"a"; \ | 1482 | .pushsection .parainstructions,"a"; \ |
1473 | .align algn; \ | 1483 | .align algn; \ |
1474 | word 771b; \ | 1484 | word 771b; \ |
1475 | .byte ptype; \ | 1485 | .byte ptype; \ |
1476 | .byte 772b-771b; \ | 1486 | .byte 772b-771b; \ |
1477 | .short clobbers; \ | 1487 | .short clobbers; \ |
1478 | .popsection | 1488 | .popsection |
1479 | 1489 | ||
1480 | 1490 | ||
1481 | #ifdef CONFIG_X86_64 | 1491 | #ifdef CONFIG_X86_64 |
1482 | #define PV_SAVE_REGS pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx | 1492 | #define PV_SAVE_REGS pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx |
1483 | #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax | 1493 | #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax |
1484 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) | 1494 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) |
1485 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) | 1495 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) |
1486 | #define PARA_INDIRECT(addr) *addr(%rip) | 1496 | #define PARA_INDIRECT(addr) *addr(%rip) |
1487 | #else | 1497 | #else |
1488 | #define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx | 1498 | #define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx |
1489 | #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax | 1499 | #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax |
1490 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) | 1500 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) |
1491 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) | 1501 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) |
1492 | #define PARA_INDIRECT(addr) *%cs:addr | 1502 | #define PARA_INDIRECT(addr) *%cs:addr |
1493 | #endif | 1503 | #endif |
1494 | 1504 | ||
1495 | #define INTERRUPT_RETURN \ | 1505 | #define INTERRUPT_RETURN \ |
1496 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ | 1506 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ |
1497 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) | 1507 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) |
1498 | 1508 | ||
1499 | #define DISABLE_INTERRUPTS(clobbers) \ | 1509 | #define DISABLE_INTERRUPTS(clobbers) \ |
1500 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ | 1510 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ |
1501 | PV_SAVE_REGS; \ | 1511 | PV_SAVE_REGS; \ |
1502 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ | 1512 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ |
1503 | PV_RESTORE_REGS;) \ | 1513 | PV_RESTORE_REGS;) \ |
1504 | 1514 | ||
1505 | #define ENABLE_INTERRUPTS(clobbers) \ | 1515 | #define ENABLE_INTERRUPTS(clobbers) \ |
1506 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ | 1516 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ |
1507 | PV_SAVE_REGS; \ | 1517 | PV_SAVE_REGS; \ |
1508 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ | 1518 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ |
1509 | PV_RESTORE_REGS;) | 1519 | PV_RESTORE_REGS;) |
1510 | 1520 | ||
1511 | #define USERGS_SYSRET32 \ | 1521 | #define USERGS_SYSRET32 \ |
1512 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ | 1522 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ |
1513 | CLBR_NONE, \ | 1523 | CLBR_NONE, \ |
1514 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) | 1524 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) |
1515 | 1525 | ||
1516 | #ifdef CONFIG_X86_32 | 1526 | #ifdef CONFIG_X86_32 |
1517 | #define GET_CR0_INTO_EAX \ | 1527 | #define GET_CR0_INTO_EAX \ |
1518 | push %ecx; push %edx; \ | 1528 | push %ecx; push %edx; \ |
1519 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ | 1529 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ |
1520 | pop %edx; pop %ecx | 1530 | pop %edx; pop %ecx |
1521 | 1531 | ||
1522 | #define ENABLE_INTERRUPTS_SYSEXIT \ | 1532 | #define ENABLE_INTERRUPTS_SYSEXIT \ |
1523 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ | 1533 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ |
1524 | CLBR_NONE, \ | 1534 | CLBR_NONE, \ |
1525 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) | 1535 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) |
1526 | 1536 | ||
1527 | 1537 | ||
1528 | #else /* !CONFIG_X86_32 */ | 1538 | #else /* !CONFIG_X86_32 */ |
1529 | 1539 | ||
1530 | /* | 1540 | /* |
1531 | * If swapgs is used while the userspace stack is still current, | 1541 | * If swapgs is used while the userspace stack is still current, |
1532 | * there's no way to call a pvop. The PV replacement *must* be | 1542 | * there's no way to call a pvop. The PV replacement *must* be |
1533 | * inlined, or the swapgs instruction must be trapped and emulated. | 1543 | * inlined, or the swapgs instruction must be trapped and emulated. |
1534 | */ | 1544 | */ |
1535 | #define SWAPGS_UNSAFE_STACK \ | 1545 | #define SWAPGS_UNSAFE_STACK \ |
1536 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ | 1546 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ |
1537 | swapgs) | 1547 | swapgs) |
1538 | 1548 | ||
1539 | #define SWAPGS \ | 1549 | #define SWAPGS \ |
1540 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ | 1550 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ |
1541 | PV_SAVE_REGS; \ | 1551 | PV_SAVE_REGS; \ |
1542 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ | 1552 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ |
1543 | PV_RESTORE_REGS \ | 1553 | PV_RESTORE_REGS \ |
1544 | ) | 1554 | ) |
1545 | 1555 | ||
1546 | #define GET_CR2_INTO_RCX \ | 1556 | #define GET_CR2_INTO_RCX \ |
1547 | call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \ | 1557 | call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \ |
1548 | movq %rax, %rcx; \ | 1558 | movq %rax, %rcx; \ |
1549 | xorq %rax, %rax; | 1559 | xorq %rax, %rax; |
1550 | 1560 | ||
1551 | #define PARAVIRT_ADJUST_EXCEPTION_FRAME \ | 1561 | #define PARAVIRT_ADJUST_EXCEPTION_FRAME \ |
1552 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ | 1562 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ |
1553 | CLBR_NONE, \ | 1563 | CLBR_NONE, \ |
1554 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) | 1564 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) |
1555 | 1565 | ||
1556 | #define USERGS_SYSRET64 \ | 1566 | #define USERGS_SYSRET64 \ |
1557 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ | 1567 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ |
1558 | CLBR_NONE, \ | 1568 | CLBR_NONE, \ |
1559 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) | 1569 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) |
1560 | 1570 | ||
1561 | #define ENABLE_INTERRUPTS_SYSEXIT32 \ | 1571 | #define ENABLE_INTERRUPTS_SYSEXIT32 \ |
1562 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ | 1572 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ |
1563 | CLBR_NONE, \ | 1573 | CLBR_NONE, \ |
1564 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) | 1574 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) |
1565 | #endif /* CONFIG_X86_32 */ | 1575 | #endif /* CONFIG_X86_32 */ |
1566 | 1576 | ||
1567 | #endif /* __ASSEMBLY__ */ | 1577 | #endif /* __ASSEMBLY__ */ |
1568 | #endif /* CONFIG_PARAVIRT */ | 1578 | #endif /* CONFIG_PARAVIRT */ |
1569 | #endif /* __ASM_PARAVIRT_H */ | 1579 | #endif /* __ASM_PARAVIRT_H */ |
1570 | 1580 |
include/asm-x86/system.h
1 | #ifndef _ASM_X86_SYSTEM_H_ | 1 | #ifndef _ASM_X86_SYSTEM_H_ |
2 | #define _ASM_X86_SYSTEM_H_ | 2 | #define _ASM_X86_SYSTEM_H_ |
3 | 3 | ||
4 | #include <asm/asm.h> | 4 | #include <asm/asm.h> |
5 | #include <asm/segment.h> | 5 | #include <asm/segment.h> |
6 | #include <asm/cpufeature.h> | 6 | #include <asm/cpufeature.h> |
7 | #include <asm/cmpxchg.h> | 7 | #include <asm/cmpxchg.h> |
8 | #include <asm/nops.h> | 8 | #include <asm/nops.h> |
9 | 9 | ||
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/irqflags.h> | 11 | #include <linux/irqflags.h> |
12 | 12 | ||
13 | /* entries in ARCH_DLINFO: */ | 13 | /* entries in ARCH_DLINFO: */ |
14 | #ifdef CONFIG_IA32_EMULATION | 14 | #ifdef CONFIG_IA32_EMULATION |
15 | # define AT_VECTOR_SIZE_ARCH 2 | 15 | # define AT_VECTOR_SIZE_ARCH 2 |
16 | #else | 16 | #else |
17 | # define AT_VECTOR_SIZE_ARCH 1 | 17 | # define AT_VECTOR_SIZE_ARCH 1 |
18 | #endif | 18 | #endif |
19 | 19 | ||
20 | #ifdef CONFIG_X86_32 | 20 | #ifdef CONFIG_X86_32 |
21 | 21 | ||
22 | struct task_struct; /* one of the stranger aspects of C forward declarations */ | 22 | struct task_struct; /* one of the stranger aspects of C forward declarations */ |
23 | struct task_struct *__switch_to(struct task_struct *prev, | 23 | struct task_struct *__switch_to(struct task_struct *prev, |
24 | struct task_struct *next); | 24 | struct task_struct *next); |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Saving eflags is important. It switches not only IOPL between tasks, | 27 | * Saving eflags is important. It switches not only IOPL between tasks, |
28 | * it also protects other tasks from NT leaking through sysenter etc. | 28 | * it also protects other tasks from NT leaking through sysenter etc. |
29 | */ | 29 | */ |
30 | #define switch_to(prev, next, last) \ | 30 | #define switch_to(prev, next, last) \ |
31 | do { \ | 31 | do { \ |
32 | /* \ | 32 | /* \ |
33 | * Context-switching clobbers all registers, so we clobber \ | 33 | * Context-switching clobbers all registers, so we clobber \ |
34 | * them explicitly, via unused output variables. \ | 34 | * them explicitly, via unused output variables. \ |
35 | * (EAX and EBP is not listed because EBP is saved/restored \ | 35 | * (EAX and EBP is not listed because EBP is saved/restored \ |
36 | * explicitly for wchan access and EAX is the return value of \ | 36 | * explicitly for wchan access and EAX is the return value of \ |
37 | * __switch_to()) \ | 37 | * __switch_to()) \ |
38 | */ \ | 38 | */ \ |
39 | unsigned long ebx, ecx, edx, esi, edi; \ | 39 | unsigned long ebx, ecx, edx, esi, edi; \ |
40 | \ | 40 | \ |
41 | asm volatile("pushfl\n\t" /* save flags */ \ | 41 | asm volatile("pushfl\n\t" /* save flags */ \ |
42 | "pushl %%ebp\n\t" /* save EBP */ \ | 42 | "pushl %%ebp\n\t" /* save EBP */ \ |
43 | "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ | 43 | "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ |
44 | "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ | 44 | "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ |
45 | "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ | 45 | "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ |
46 | "pushl %[next_ip]\n\t" /* restore EIP */ \ | 46 | "pushl %[next_ip]\n\t" /* restore EIP */ \ |
47 | "jmp __switch_to\n" /* regparm call */ \ | 47 | "jmp __switch_to\n" /* regparm call */ \ |
48 | "1:\t" \ | 48 | "1:\t" \ |
49 | "popl %%ebp\n\t" /* restore EBP */ \ | 49 | "popl %%ebp\n\t" /* restore EBP */ \ |
50 | "popfl\n" /* restore flags */ \ | 50 | "popfl\n" /* restore flags */ \ |
51 | \ | 51 | \ |
52 | /* output parameters */ \ | 52 | /* output parameters */ \ |
53 | : [prev_sp] "=m" (prev->thread.sp), \ | 53 | : [prev_sp] "=m" (prev->thread.sp), \ |
54 | [prev_ip] "=m" (prev->thread.ip), \ | 54 | [prev_ip] "=m" (prev->thread.ip), \ |
55 | "=a" (last), \ | 55 | "=a" (last), \ |
56 | \ | 56 | \ |
57 | /* clobbered output registers: */ \ | 57 | /* clobbered output registers: */ \ |
58 | "=b" (ebx), "=c" (ecx), "=d" (edx), \ | 58 | "=b" (ebx), "=c" (ecx), "=d" (edx), \ |
59 | "=S" (esi), "=D" (edi) \ | 59 | "=S" (esi), "=D" (edi) \ |
60 | \ | 60 | \ |
61 | /* input parameters: */ \ | 61 | /* input parameters: */ \ |
62 | : [next_sp] "m" (next->thread.sp), \ | 62 | : [next_sp] "m" (next->thread.sp), \ |
63 | [next_ip] "m" (next->thread.ip), \ | 63 | [next_ip] "m" (next->thread.ip), \ |
64 | \ | 64 | \ |
65 | /* regparm parameters for __switch_to(): */ \ | 65 | /* regparm parameters for __switch_to(): */ \ |
66 | [prev] "a" (prev), \ | 66 | [prev] "a" (prev), \ |
67 | [next] "d" (next)); \ | 67 | [next] "d" (next)); \ |
68 | } while (0) | 68 | } while (0) |
69 | 69 | ||
70 | /* | 70 | /* |
71 | * disable hlt during certain critical i/o operations | 71 | * disable hlt during certain critical i/o operations |
72 | */ | 72 | */ |
73 | #define HAVE_DISABLE_HLT | 73 | #define HAVE_DISABLE_HLT |
74 | #else | 74 | #else |
75 | #define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" | 75 | #define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" |
76 | #define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" | 76 | #define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" |
77 | 77 | ||
78 | /* frame pointer must be last for get_wchan */ | 78 | /* frame pointer must be last for get_wchan */ |
79 | #define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" | 79 | #define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" |
80 | #define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" | 80 | #define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" |
81 | 81 | ||
82 | #define __EXTRA_CLOBBER \ | 82 | #define __EXTRA_CLOBBER \ |
83 | , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ | 83 | , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ |
84 | "r12", "r13", "r14", "r15" | 84 | "r12", "r13", "r14", "r15" |
85 | 85 | ||
86 | /* Save restore flags to clear handle leaking NT */ | 86 | /* Save restore flags to clear handle leaking NT */ |
87 | #define switch_to(prev, next, last) \ | 87 | #define switch_to(prev, next, last) \ |
88 | asm volatile(SAVE_CONTEXT \ | 88 | asm volatile(SAVE_CONTEXT \ |
89 | "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ | 89 | "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ |
90 | "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ | 90 | "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ |
91 | "call __switch_to\n\t" \ | 91 | "call __switch_to\n\t" \ |
92 | ".globl thread_return\n" \ | 92 | ".globl thread_return\n" \ |
93 | "thread_return:\n\t" \ | 93 | "thread_return:\n\t" \ |
94 | "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ | 94 | "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ |
95 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ | 95 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ |
96 | LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ | 96 | LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ |
97 | "movq %%rax,%%rdi\n\t" \ | 97 | "movq %%rax,%%rdi\n\t" \ |
98 | "jc ret_from_fork\n\t" \ | 98 | "jc ret_from_fork\n\t" \ |
99 | RESTORE_CONTEXT \ | 99 | RESTORE_CONTEXT \ |
100 | : "=a" (last) \ | 100 | : "=a" (last) \ |
101 | : [next] "S" (next), [prev] "D" (prev), \ | 101 | : [next] "S" (next), [prev] "D" (prev), \ |
102 | [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ | 102 | [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ |
103 | [ti_flags] "i" (offsetof(struct thread_info, flags)), \ | 103 | [ti_flags] "i" (offsetof(struct thread_info, flags)), \ |
104 | [tif_fork] "i" (TIF_FORK), \ | 104 | [tif_fork] "i" (TIF_FORK), \ |
105 | [thread_info] "i" (offsetof(struct task_struct, stack)), \ | 105 | [thread_info] "i" (offsetof(struct task_struct, stack)), \ |
106 | [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ | 106 | [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ |
107 | : "memory", "cc" __EXTRA_CLOBBER) | 107 | : "memory", "cc" __EXTRA_CLOBBER) |
108 | #endif | 108 | #endif |
109 | 109 | ||
110 | #ifdef __KERNEL__ | 110 | #ifdef __KERNEL__ |
111 | #define _set_base(addr, base) do { unsigned long __pr; \ | 111 | #define _set_base(addr, base) do { unsigned long __pr; \ |
112 | __asm__ __volatile__ ("movw %%dx,%1\n\t" \ | 112 | __asm__ __volatile__ ("movw %%dx,%1\n\t" \ |
113 | "rorl $16,%%edx\n\t" \ | 113 | "rorl $16,%%edx\n\t" \ |
114 | "movb %%dl,%2\n\t" \ | 114 | "movb %%dl,%2\n\t" \ |
115 | "movb %%dh,%3" \ | 115 | "movb %%dh,%3" \ |
116 | :"=&d" (__pr) \ | 116 | :"=&d" (__pr) \ |
117 | :"m" (*((addr)+2)), \ | 117 | :"m" (*((addr)+2)), \ |
118 | "m" (*((addr)+4)), \ | 118 | "m" (*((addr)+4)), \ |
119 | "m" (*((addr)+7)), \ | 119 | "m" (*((addr)+7)), \ |
120 | "0" (base) \ | 120 | "0" (base) \ |
121 | ); } while (0) | 121 | ); } while (0) |
122 | 122 | ||
123 | #define _set_limit(addr, limit) do { unsigned long __lr; \ | 123 | #define _set_limit(addr, limit) do { unsigned long __lr; \ |
124 | __asm__ __volatile__ ("movw %%dx,%1\n\t" \ | 124 | __asm__ __volatile__ ("movw %%dx,%1\n\t" \ |
125 | "rorl $16,%%edx\n\t" \ | 125 | "rorl $16,%%edx\n\t" \ |
126 | "movb %2,%%dh\n\t" \ | 126 | "movb %2,%%dh\n\t" \ |
127 | "andb $0xf0,%%dh\n\t" \ | 127 | "andb $0xf0,%%dh\n\t" \ |
128 | "orb %%dh,%%dl\n\t" \ | 128 | "orb %%dh,%%dl\n\t" \ |
129 | "movb %%dl,%2" \ | 129 | "movb %%dl,%2" \ |
130 | :"=&d" (__lr) \ | 130 | :"=&d" (__lr) \ |
131 | :"m" (*(addr)), \ | 131 | :"m" (*(addr)), \ |
132 | "m" (*((addr)+6)), \ | 132 | "m" (*((addr)+6)), \ |
133 | "0" (limit) \ | 133 | "0" (limit) \ |
134 | ); } while (0) | 134 | ); } while (0) |
135 | 135 | ||
136 | #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) | 136 | #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) |
137 | #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) | 137 | #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) |
138 | 138 | ||
139 | extern void load_gs_index(unsigned); | 139 | extern void native_load_gs_index(unsigned); |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * Load a segment. Fall back on loading the zero | 142 | * Load a segment. Fall back on loading the zero |
143 | * segment if something goes wrong.. | 143 | * segment if something goes wrong.. |
144 | */ | 144 | */ |
145 | #define loadsegment(seg, value) \ | 145 | #define loadsegment(seg, value) \ |
146 | asm volatile("\n" \ | 146 | asm volatile("\n" \ |
147 | "1:\t" \ | 147 | "1:\t" \ |
148 | "movl %k0,%%" #seg "\n" \ | 148 | "movl %k0,%%" #seg "\n" \ |
149 | "2:\n" \ | 149 | "2:\n" \ |
150 | ".section .fixup,\"ax\"\n" \ | 150 | ".section .fixup,\"ax\"\n" \ |
151 | "3:\t" \ | 151 | "3:\t" \ |
152 | "movl %k1, %%" #seg "\n\t" \ | 152 | "movl %k1, %%" #seg "\n\t" \ |
153 | "jmp 2b\n" \ | 153 | "jmp 2b\n" \ |
154 | ".previous\n" \ | 154 | ".previous\n" \ |
155 | _ASM_EXTABLE(1b,3b) \ | 155 | _ASM_EXTABLE(1b,3b) \ |
156 | : :"r" (value), "r" (0) : "memory") | 156 | : :"r" (value), "r" (0) : "memory") |
157 | 157 | ||
158 | 158 | ||
159 | /* | 159 | /* |
160 | * Save a segment register away | 160 | * Save a segment register away |
161 | */ | 161 | */ |
162 | #define savesegment(seg, value) \ | 162 | #define savesegment(seg, value) \ |
163 | asm("mov %%" #seg ",%0":"=rm" (value) : : "memory") | 163 | asm("mov %%" #seg ",%0":"=rm" (value) : : "memory") |
164 | 164 | ||
165 | static inline unsigned long get_limit(unsigned long segment) | 165 | static inline unsigned long get_limit(unsigned long segment) |
166 | { | 166 | { |
167 | unsigned long __limit; | 167 | unsigned long __limit; |
168 | asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); | 168 | asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); |
169 | return __limit + 1; | 169 | return __limit + 1; |
170 | } | 170 | } |
171 | 171 | ||
172 | static inline void native_clts(void) | 172 | static inline void native_clts(void) |
173 | { | 173 | { |
174 | asm volatile("clts"); | 174 | asm volatile("clts"); |
175 | } | 175 | } |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * Volatile isn't enough to prevent the compiler from reordering the | 178 | * Volatile isn't enough to prevent the compiler from reordering the |
179 | * read/write functions for the control registers and messing everything up. | 179 | * read/write functions for the control registers and messing everything up. |
180 | * A memory clobber would solve the problem, but would prevent reordering of | 180 | * A memory clobber would solve the problem, but would prevent reordering of |
181 | * all loads stores around it, which can hurt performance. Solution is to | 181 | * all loads stores around it, which can hurt performance. Solution is to |
182 | * use a variable and mimic reads and writes to it to enforce serialization | 182 | * use a variable and mimic reads and writes to it to enforce serialization |
183 | */ | 183 | */ |
184 | static unsigned long __force_order; | 184 | static unsigned long __force_order; |
185 | 185 | ||
186 | static inline unsigned long native_read_cr0(void) | 186 | static inline unsigned long native_read_cr0(void) |
187 | { | 187 | { |
188 | unsigned long val; | 188 | unsigned long val; |
189 | asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); | 189 | asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); |
190 | return val; | 190 | return val; |
191 | } | 191 | } |
192 | 192 | ||
193 | static inline void native_write_cr0(unsigned long val) | 193 | static inline void native_write_cr0(unsigned long val) |
194 | { | 194 | { |
195 | asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); | 195 | asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); |
196 | } | 196 | } |
197 | 197 | ||
198 | static inline unsigned long native_read_cr2(void) | 198 | static inline unsigned long native_read_cr2(void) |
199 | { | 199 | { |
200 | unsigned long val; | 200 | unsigned long val; |
201 | asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); | 201 | asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); |
202 | return val; | 202 | return val; |
203 | } | 203 | } |
204 | 204 | ||
205 | static inline void native_write_cr2(unsigned long val) | 205 | static inline void native_write_cr2(unsigned long val) |
206 | { | 206 | { |
207 | asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); | 207 | asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); |
208 | } | 208 | } |
209 | 209 | ||
210 | static inline unsigned long native_read_cr3(void) | 210 | static inline unsigned long native_read_cr3(void) |
211 | { | 211 | { |
212 | unsigned long val; | 212 | unsigned long val; |
213 | asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); | 213 | asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); |
214 | return val; | 214 | return val; |
215 | } | 215 | } |
216 | 216 | ||
217 | static inline void native_write_cr3(unsigned long val) | 217 | static inline void native_write_cr3(unsigned long val) |
218 | { | 218 | { |
219 | asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); | 219 | asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); |
220 | } | 220 | } |
221 | 221 | ||
222 | static inline unsigned long native_read_cr4(void) | 222 | static inline unsigned long native_read_cr4(void) |
223 | { | 223 | { |
224 | unsigned long val; | 224 | unsigned long val; |
225 | asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); | 225 | asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); |
226 | return val; | 226 | return val; |
227 | } | 227 | } |
228 | 228 | ||
229 | static inline unsigned long native_read_cr4_safe(void) | 229 | static inline unsigned long native_read_cr4_safe(void) |
230 | { | 230 | { |
231 | unsigned long val; | 231 | unsigned long val; |
232 | /* This could fault if %cr4 does not exist. In x86_64, a cr4 always | 232 | /* This could fault if %cr4 does not exist. In x86_64, a cr4 always |
233 | * exists, so it will never fail. */ | 233 | * exists, so it will never fail. */ |
234 | #ifdef CONFIG_X86_32 | 234 | #ifdef CONFIG_X86_32 |
235 | asm volatile("1: mov %%cr4, %0\n" | 235 | asm volatile("1: mov %%cr4, %0\n" |
236 | "2:\n" | 236 | "2:\n" |
237 | _ASM_EXTABLE(1b, 2b) | 237 | _ASM_EXTABLE(1b, 2b) |
238 | : "=r" (val), "=m" (__force_order) : "0" (0)); | 238 | : "=r" (val), "=m" (__force_order) : "0" (0)); |
239 | #else | 239 | #else |
240 | val = native_read_cr4(); | 240 | val = native_read_cr4(); |
241 | #endif | 241 | #endif |
242 | return val; | 242 | return val; |
243 | } | 243 | } |
244 | 244 | ||
245 | static inline void native_write_cr4(unsigned long val) | 245 | static inline void native_write_cr4(unsigned long val) |
246 | { | 246 | { |
247 | asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); | 247 | asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); |
248 | } | 248 | } |
249 | 249 | ||
250 | #ifdef CONFIG_X86_64 | 250 | #ifdef CONFIG_X86_64 |
251 | static inline unsigned long native_read_cr8(void) | 251 | static inline unsigned long native_read_cr8(void) |
252 | { | 252 | { |
253 | unsigned long cr8; | 253 | unsigned long cr8; |
254 | asm volatile("movq %%cr8,%0" : "=r" (cr8)); | 254 | asm volatile("movq %%cr8,%0" : "=r" (cr8)); |
255 | return cr8; | 255 | return cr8; |
256 | } | 256 | } |
257 | 257 | ||
258 | static inline void native_write_cr8(unsigned long val) | 258 | static inline void native_write_cr8(unsigned long val) |
259 | { | 259 | { |
260 | asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); | 260 | asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); |
261 | } | 261 | } |
262 | #endif | 262 | #endif |
263 | 263 | ||
264 | static inline void native_wbinvd(void) | 264 | static inline void native_wbinvd(void) |
265 | { | 265 | { |
266 | asm volatile("wbinvd": : :"memory"); | 266 | asm volatile("wbinvd": : :"memory"); |
267 | } | 267 | } |
268 | 268 | ||
269 | #ifdef CONFIG_PARAVIRT | 269 | #ifdef CONFIG_PARAVIRT |
270 | #include <asm/paravirt.h> | 270 | #include <asm/paravirt.h> |
271 | #else | 271 | #else |
272 | #define read_cr0() (native_read_cr0()) | 272 | #define read_cr0() (native_read_cr0()) |
273 | #define write_cr0(x) (native_write_cr0(x)) | 273 | #define write_cr0(x) (native_write_cr0(x)) |
274 | #define read_cr2() (native_read_cr2()) | 274 | #define read_cr2() (native_read_cr2()) |
275 | #define write_cr2(x) (native_write_cr2(x)) | 275 | #define write_cr2(x) (native_write_cr2(x)) |
276 | #define read_cr3() (native_read_cr3()) | 276 | #define read_cr3() (native_read_cr3()) |
277 | #define write_cr3(x) (native_write_cr3(x)) | 277 | #define write_cr3(x) (native_write_cr3(x)) |
278 | #define read_cr4() (native_read_cr4()) | 278 | #define read_cr4() (native_read_cr4()) |
279 | #define read_cr4_safe() (native_read_cr4_safe()) | 279 | #define read_cr4_safe() (native_read_cr4_safe()) |
280 | #define write_cr4(x) (native_write_cr4(x)) | 280 | #define write_cr4(x) (native_write_cr4(x)) |
281 | #define wbinvd() (native_wbinvd()) | 281 | #define wbinvd() (native_wbinvd()) |
282 | #ifdef CONFIG_X86_64 | 282 | #ifdef CONFIG_X86_64 |
283 | #define read_cr8() (native_read_cr8()) | 283 | #define read_cr8() (native_read_cr8()) |
284 | #define write_cr8(x) (native_write_cr8(x)) | 284 | #define write_cr8(x) (native_write_cr8(x)) |
285 | #define load_gs_index native_load_gs_index | ||
285 | #endif | 286 | #endif |
286 | 287 | ||
287 | /* Clear the 'TS' bit */ | 288 | /* Clear the 'TS' bit */ |
288 | #define clts() (native_clts()) | 289 | #define clts() (native_clts()) |
289 | 290 | ||
290 | #endif/* CONFIG_PARAVIRT */ | 291 | #endif/* CONFIG_PARAVIRT */ |
291 | 292 | ||
292 | #define stts() write_cr0(read_cr0() | X86_CR0_TS) | 293 | #define stts() write_cr0(read_cr0() | X86_CR0_TS) |
293 | 294 | ||
294 | #endif /* __KERNEL__ */ | 295 | #endif /* __KERNEL__ */ |
295 | 296 | ||
296 | static inline void clflush(volatile void *__p) | 297 | static inline void clflush(volatile void *__p) |
297 | { | 298 | { |
298 | asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); | 299 | asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); |
299 | } | 300 | } |
300 | 301 | ||
301 | #define nop() asm volatile ("nop") | 302 | #define nop() asm volatile ("nop") |
302 | 303 | ||
303 | void disable_hlt(void); | 304 | void disable_hlt(void); |
304 | void enable_hlt(void); | 305 | void enable_hlt(void); |
305 | 306 | ||
306 | void cpu_idle_wait(void); | 307 | void cpu_idle_wait(void); |
307 | 308 | ||
308 | extern unsigned long arch_align_stack(unsigned long sp); | 309 | extern unsigned long arch_align_stack(unsigned long sp); |
309 | extern void free_init_pages(char *what, unsigned long begin, unsigned long end); | 310 | extern void free_init_pages(char *what, unsigned long begin, unsigned long end); |
310 | 311 | ||
311 | void default_idle(void); | 312 | void default_idle(void); |
312 | 313 | ||
313 | /* | 314 | /* |
314 | * Force strict CPU ordering. | 315 | * Force strict CPU ordering. |
315 | * And yes, this is required on UP too when we're talking | 316 | * And yes, this is required on UP too when we're talking |
316 | * to devices. | 317 | * to devices. |
317 | */ | 318 | */ |
318 | #ifdef CONFIG_X86_32 | 319 | #ifdef CONFIG_X86_32 |
319 | /* | 320 | /* |
320 | * Some non-Intel clones support out of order store. wmb() ceases to be a | 321 | * Some non-Intel clones support out of order store. wmb() ceases to be a |
321 | * nop for these. | 322 | * nop for these. |
322 | */ | 323 | */ |
323 | #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) | 324 | #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) |
324 | #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) | 325 | #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) |
325 | #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) | 326 | #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) |
326 | #else | 327 | #else |
327 | #define mb() asm volatile("mfence":::"memory") | 328 | #define mb() asm volatile("mfence":::"memory") |
328 | #define rmb() asm volatile("lfence":::"memory") | 329 | #define rmb() asm volatile("lfence":::"memory") |
329 | #define wmb() asm volatile("sfence" ::: "memory") | 330 | #define wmb() asm volatile("sfence" ::: "memory") |
330 | #endif | 331 | #endif |
331 | 332 | ||
332 | /** | 333 | /** |
333 | * read_barrier_depends - Flush all pending reads that subsequents reads | 334 | * read_barrier_depends - Flush all pending reads that subsequents reads |
334 | * depend on. | 335 | * depend on. |
335 | * | 336 | * |
336 | * No data-dependent reads from memory-like regions are ever reordered | 337 | * No data-dependent reads from memory-like regions are ever reordered |
337 | * over this barrier. All reads preceding this primitive are guaranteed | 338 | * over this barrier. All reads preceding this primitive are guaranteed |
338 | * to access memory (but not necessarily other CPUs' caches) before any | 339 | * to access memory (but not necessarily other CPUs' caches) before any |
339 | * reads following this primitive that depend on the data return by | 340 | * reads following this primitive that depend on the data return by |
340 | * any of the preceding reads. This primitive is much lighter weight than | 341 | * any of the preceding reads. This primitive is much lighter weight than |
341 | * rmb() on most CPUs, and is never heavier weight than is | 342 | * rmb() on most CPUs, and is never heavier weight than is |
342 | * rmb(). | 343 | * rmb(). |
343 | * | 344 | * |
344 | * These ordering constraints are respected by both the local CPU | 345 | * These ordering constraints are respected by both the local CPU |
345 | * and the compiler. | 346 | * and the compiler. |
346 | * | 347 | * |
347 | * Ordering is not guaranteed by anything other than these primitives, | 348 | * Ordering is not guaranteed by anything other than these primitives, |
348 | * not even by data dependencies. See the documentation for | 349 | * not even by data dependencies. See the documentation for |
349 | * memory_barrier() for examples and URLs to more information. | 350 | * memory_barrier() for examples and URLs to more information. |
350 | * | 351 | * |
351 | * For example, the following code would force ordering (the initial | 352 | * For example, the following code would force ordering (the initial |
352 | * value of "a" is zero, "b" is one, and "p" is "&a"): | 353 | * value of "a" is zero, "b" is one, and "p" is "&a"): |
353 | * | 354 | * |
354 | * <programlisting> | 355 | * <programlisting> |
355 | * CPU 0 CPU 1 | 356 | * CPU 0 CPU 1 |
356 | * | 357 | * |
357 | * b = 2; | 358 | * b = 2; |
358 | * memory_barrier(); | 359 | * memory_barrier(); |
359 | * p = &b; q = p; | 360 | * p = &b; q = p; |
360 | * read_barrier_depends(); | 361 | * read_barrier_depends(); |
361 | * d = *q; | 362 | * d = *q; |
362 | * </programlisting> | 363 | * </programlisting> |
363 | * | 364 | * |
364 | * because the read of "*q" depends on the read of "p" and these | 365 | * because the read of "*q" depends on the read of "p" and these |
365 | * two reads are separated by a read_barrier_depends(). However, | 366 | * two reads are separated by a read_barrier_depends(). However, |
366 | * the following code, with the same initial values for "a" and "b": | 367 | * the following code, with the same initial values for "a" and "b": |
367 | * | 368 | * |
368 | * <programlisting> | 369 | * <programlisting> |
369 | * CPU 0 CPU 1 | 370 | * CPU 0 CPU 1 |
370 | * | 371 | * |
371 | * a = 2; | 372 | * a = 2; |
372 | * memory_barrier(); | 373 | * memory_barrier(); |
373 | * b = 3; y = b; | 374 | * b = 3; y = b; |
374 | * read_barrier_depends(); | 375 | * read_barrier_depends(); |
375 | * x = a; | 376 | * x = a; |
376 | * </programlisting> | 377 | * </programlisting> |
377 | * | 378 | * |
378 | * does not enforce ordering, since there is no data dependency between | 379 | * does not enforce ordering, since there is no data dependency between |
379 | * the read of "a" and the read of "b". Therefore, on some CPUs, such | 380 | * the read of "a" and the read of "b". Therefore, on some CPUs, such |
380 | * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() | 381 | * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() |
381 | * in cases like this where there are no data dependencies. | 382 | * in cases like this where there are no data dependencies. |
382 | **/ | 383 | **/ |
383 | 384 | ||
384 | #define read_barrier_depends() do { } while (0) | 385 | #define read_barrier_depends() do { } while (0) |
385 | 386 | ||
386 | #ifdef CONFIG_SMP | 387 | #ifdef CONFIG_SMP |
387 | #define smp_mb() mb() | 388 | #define smp_mb() mb() |
388 | #ifdef CONFIG_X86_PPRO_FENCE | 389 | #ifdef CONFIG_X86_PPRO_FENCE |
389 | # define smp_rmb() rmb() | 390 | # define smp_rmb() rmb() |
390 | #else | 391 | #else |
391 | # define smp_rmb() barrier() | 392 | # define smp_rmb() barrier() |
392 | #endif | 393 | #endif |
393 | #ifdef CONFIG_X86_OOSTORE | 394 | #ifdef CONFIG_X86_OOSTORE |
394 | # define smp_wmb() wmb() | 395 | # define smp_wmb() wmb() |
395 | #else | 396 | #else |
396 | # define smp_wmb() barrier() | 397 | # define smp_wmb() barrier() |
397 | #endif | 398 | #endif |
398 | #define smp_read_barrier_depends() read_barrier_depends() | 399 | #define smp_read_barrier_depends() read_barrier_depends() |
399 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | 400 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) |
400 | #else | 401 | #else |
401 | #define smp_mb() barrier() | 402 | #define smp_mb() barrier() |
402 | #define smp_rmb() barrier() | 403 | #define smp_rmb() barrier() |
403 | #define smp_wmb() barrier() | 404 | #define smp_wmb() barrier() |
404 | #define smp_read_barrier_depends() do { } while (0) | 405 | #define smp_read_barrier_depends() do { } while (0) |
405 | #define set_mb(var, value) do { var = value; barrier(); } while (0) | 406 | #define set_mb(var, value) do { var = value; barrier(); } while (0) |
406 | #endif | 407 | #endif |
407 | 408 | ||
408 | /* | 409 | /* |
409 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | 410 | * Stop RDTSC speculation. This is needed when you need to use RDTSC |
410 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | 411 | * (or get_cycles or vread that possibly accesses the TSC) in a defined |
411 | * code region. | 412 | * code region. |
412 | * | 413 | * |
413 | * (Could use an alternative three way for this if there was one.) | 414 | * (Could use an alternative three way for this if there was one.) |
414 | */ | 415 | */ |
415 | static inline void rdtsc_barrier(void) | 416 | static inline void rdtsc_barrier(void) |
416 | { | 417 | { |
417 | alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); | 418 | alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); |
418 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); | 419 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); |
419 | } | 420 | } |
420 | 421 | ||
421 | #endif | 422 | #endif |
422 | 423 |