Commit 9f9d489a3e78b49d897734eaaf9dea568dbea66e

Authored by Jeremy Fitzhardinge
Committed by Ingo Molnar
1 parent fab58420ac

x86/paravirt, 64-bit: make load_gs_index() a paravirt operation

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 5 changed files with 18 additions and 4 deletions Inline Diff

arch/x86/kernel/entry_64.S
1 /* 1 /*
2 * linux/arch/x86_64/entry.S 2 * linux/arch/x86_64/entry.S
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */ 7 */
8 8
9 /* 9 /*
10 * entry.S contains the system-call and fault low-level handling routines. 10 * entry.S contains the system-call and fault low-level handling routines.
11 * 11 *
12 * NOTE: This code handles signal-recognition, which happens every time 12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call. 13 * after an interrupt and after each system call.
14 * 14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is 15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al. 16 * only done for syscall tracing, signals or fork/exec et.al.
17 * 17 *
18 * A note on terminology: 18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP 19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack. 20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11. 21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved. 22 * - full stack frame: Like partial stack frame, but all register saved.
23 * 23 *
24 * Some macro usage: 24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better 25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code. 26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers 27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. 28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers 29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up. 30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. 31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame. 32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table. 33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack 34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL 35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. 36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points. 37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */ 38 */
39 39
40 #include <linux/linkage.h> 40 #include <linux/linkage.h>
41 #include <asm/segment.h> 41 #include <asm/segment.h>
42 #include <asm/cache.h> 42 #include <asm/cache.h>
43 #include <asm/errno.h> 43 #include <asm/errno.h>
44 #include <asm/dwarf2.h> 44 #include <asm/dwarf2.h>
45 #include <asm/calling.h> 45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h> 46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h> 47 #include <asm/msr.h>
48 #include <asm/unistd.h> 48 #include <asm/unistd.h>
49 #include <asm/thread_info.h> 49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h> 50 #include <asm/hw_irq.h>
51 #include <asm/page.h> 51 #include <asm/page.h>
52 #include <asm/irqflags.h> 52 #include <asm/irqflags.h>
53 #include <asm/paravirt.h> 53 #include <asm/paravirt.h>
54 54
55 .code64 55 .code64
56 56
57 #ifndef CONFIG_PREEMPT 57 #ifndef CONFIG_PREEMPT
58 #define retint_kernel retint_restore_args 58 #define retint_kernel retint_restore_args
59 #endif 59 #endif
60 60
61 #ifdef CONFIG_PARAVIRT 61 #ifdef CONFIG_PARAVIRT
62 ENTRY(native_usergs_sysret64) 62 ENTRY(native_usergs_sysret64)
63 swapgs 63 swapgs
64 sysretq 64 sysretq
65 #endif /* CONFIG_PARAVIRT */ 65 #endif /* CONFIG_PARAVIRT */
66 66
67 67
68 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET 68 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
69 #ifdef CONFIG_TRACE_IRQFLAGS 69 #ifdef CONFIG_TRACE_IRQFLAGS
70 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ 70 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
71 jnc 1f 71 jnc 1f
72 TRACE_IRQS_ON 72 TRACE_IRQS_ON
73 1: 73 1:
74 #endif 74 #endif
75 .endm 75 .endm
76 76
77 /* 77 /*
78 * C code is not supposed to know about undefined top of stack. Every time 78 * C code is not supposed to know about undefined top of stack. Every time
79 * a C function with an pt_regs argument is called from the SYSCALL based 79 * a C function with an pt_regs argument is called from the SYSCALL based
80 * fast path FIXUP_TOP_OF_STACK is needed. 80 * fast path FIXUP_TOP_OF_STACK is needed.
81 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 81 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
82 * manipulation. 82 * manipulation.
83 */ 83 */
84 84
85 /* %rsp:at FRAMEEND */ 85 /* %rsp:at FRAMEEND */
86 .macro FIXUP_TOP_OF_STACK tmp 86 .macro FIXUP_TOP_OF_STACK tmp
87 movq %gs:pda_oldrsp,\tmp 87 movq %gs:pda_oldrsp,\tmp
88 movq \tmp,RSP(%rsp) 88 movq \tmp,RSP(%rsp)
89 movq $__USER_DS,SS(%rsp) 89 movq $__USER_DS,SS(%rsp)
90 movq $__USER_CS,CS(%rsp) 90 movq $__USER_CS,CS(%rsp)
91 movq $-1,RCX(%rsp) 91 movq $-1,RCX(%rsp)
92 movq R11(%rsp),\tmp /* get eflags */ 92 movq R11(%rsp),\tmp /* get eflags */
93 movq \tmp,EFLAGS(%rsp) 93 movq \tmp,EFLAGS(%rsp)
94 .endm 94 .endm
95 95
96 .macro RESTORE_TOP_OF_STACK tmp,offset=0 96 .macro RESTORE_TOP_OF_STACK tmp,offset=0
97 movq RSP-\offset(%rsp),\tmp 97 movq RSP-\offset(%rsp),\tmp
98 movq \tmp,%gs:pda_oldrsp 98 movq \tmp,%gs:pda_oldrsp
99 movq EFLAGS-\offset(%rsp),\tmp 99 movq EFLAGS-\offset(%rsp),\tmp
100 movq \tmp,R11-\offset(%rsp) 100 movq \tmp,R11-\offset(%rsp)
101 .endm 101 .endm
102 102
103 .macro FAKE_STACK_FRAME child_rip 103 .macro FAKE_STACK_FRAME child_rip
104 /* push in order ss, rsp, eflags, cs, rip */ 104 /* push in order ss, rsp, eflags, cs, rip */
105 xorl %eax, %eax 105 xorl %eax, %eax
106 pushq $__KERNEL_DS /* ss */ 106 pushq $__KERNEL_DS /* ss */
107 CFI_ADJUST_CFA_OFFSET 8 107 CFI_ADJUST_CFA_OFFSET 8
108 /*CFI_REL_OFFSET ss,0*/ 108 /*CFI_REL_OFFSET ss,0*/
109 pushq %rax /* rsp */ 109 pushq %rax /* rsp */
110 CFI_ADJUST_CFA_OFFSET 8 110 CFI_ADJUST_CFA_OFFSET 8
111 CFI_REL_OFFSET rsp,0 111 CFI_REL_OFFSET rsp,0
112 pushq $(1<<9) /* eflags - interrupts on */ 112 pushq $(1<<9) /* eflags - interrupts on */
113 CFI_ADJUST_CFA_OFFSET 8 113 CFI_ADJUST_CFA_OFFSET 8
114 /*CFI_REL_OFFSET rflags,0*/ 114 /*CFI_REL_OFFSET rflags,0*/
115 pushq $__KERNEL_CS /* cs */ 115 pushq $__KERNEL_CS /* cs */
116 CFI_ADJUST_CFA_OFFSET 8 116 CFI_ADJUST_CFA_OFFSET 8
117 /*CFI_REL_OFFSET cs,0*/ 117 /*CFI_REL_OFFSET cs,0*/
118 pushq \child_rip /* rip */ 118 pushq \child_rip /* rip */
119 CFI_ADJUST_CFA_OFFSET 8 119 CFI_ADJUST_CFA_OFFSET 8
120 CFI_REL_OFFSET rip,0 120 CFI_REL_OFFSET rip,0
121 pushq %rax /* orig rax */ 121 pushq %rax /* orig rax */
122 CFI_ADJUST_CFA_OFFSET 8 122 CFI_ADJUST_CFA_OFFSET 8
123 .endm 123 .endm
124 124
125 .macro UNFAKE_STACK_FRAME 125 .macro UNFAKE_STACK_FRAME
126 addq $8*6, %rsp 126 addq $8*6, %rsp
127 CFI_ADJUST_CFA_OFFSET -(6*8) 127 CFI_ADJUST_CFA_OFFSET -(6*8)
128 .endm 128 .endm
129 129
130 .macro CFI_DEFAULT_STACK start=1 130 .macro CFI_DEFAULT_STACK start=1
131 .if \start 131 .if \start
132 CFI_STARTPROC simple 132 CFI_STARTPROC simple
133 CFI_SIGNAL_FRAME 133 CFI_SIGNAL_FRAME
134 CFI_DEF_CFA rsp,SS+8 134 CFI_DEF_CFA rsp,SS+8
135 .else 135 .else
136 CFI_DEF_CFA_OFFSET SS+8 136 CFI_DEF_CFA_OFFSET SS+8
137 .endif 137 .endif
138 CFI_REL_OFFSET r15,R15 138 CFI_REL_OFFSET r15,R15
139 CFI_REL_OFFSET r14,R14 139 CFI_REL_OFFSET r14,R14
140 CFI_REL_OFFSET r13,R13 140 CFI_REL_OFFSET r13,R13
141 CFI_REL_OFFSET r12,R12 141 CFI_REL_OFFSET r12,R12
142 CFI_REL_OFFSET rbp,RBP 142 CFI_REL_OFFSET rbp,RBP
143 CFI_REL_OFFSET rbx,RBX 143 CFI_REL_OFFSET rbx,RBX
144 CFI_REL_OFFSET r11,R11 144 CFI_REL_OFFSET r11,R11
145 CFI_REL_OFFSET r10,R10 145 CFI_REL_OFFSET r10,R10
146 CFI_REL_OFFSET r9,R9 146 CFI_REL_OFFSET r9,R9
147 CFI_REL_OFFSET r8,R8 147 CFI_REL_OFFSET r8,R8
148 CFI_REL_OFFSET rax,RAX 148 CFI_REL_OFFSET rax,RAX
149 CFI_REL_OFFSET rcx,RCX 149 CFI_REL_OFFSET rcx,RCX
150 CFI_REL_OFFSET rdx,RDX 150 CFI_REL_OFFSET rdx,RDX
151 CFI_REL_OFFSET rsi,RSI 151 CFI_REL_OFFSET rsi,RSI
152 CFI_REL_OFFSET rdi,RDI 152 CFI_REL_OFFSET rdi,RDI
153 CFI_REL_OFFSET rip,RIP 153 CFI_REL_OFFSET rip,RIP
154 /*CFI_REL_OFFSET cs,CS*/ 154 /*CFI_REL_OFFSET cs,CS*/
155 /*CFI_REL_OFFSET rflags,EFLAGS*/ 155 /*CFI_REL_OFFSET rflags,EFLAGS*/
156 CFI_REL_OFFSET rsp,RSP 156 CFI_REL_OFFSET rsp,RSP
157 /*CFI_REL_OFFSET ss,SS*/ 157 /*CFI_REL_OFFSET ss,SS*/
158 .endm 158 .endm
159 /* 159 /*
160 * A newly forked process directly context switches into this. 160 * A newly forked process directly context switches into this.
161 */ 161 */
162 /* rdi: prev */ 162 /* rdi: prev */
163 ENTRY(ret_from_fork) 163 ENTRY(ret_from_fork)
164 CFI_DEFAULT_STACK 164 CFI_DEFAULT_STACK
165 push kernel_eflags(%rip) 165 push kernel_eflags(%rip)
166 CFI_ADJUST_CFA_OFFSET 4 166 CFI_ADJUST_CFA_OFFSET 4
167 popf # reset kernel eflags 167 popf # reset kernel eflags
168 CFI_ADJUST_CFA_OFFSET -4 168 CFI_ADJUST_CFA_OFFSET -4
169 call schedule_tail 169 call schedule_tail
170 GET_THREAD_INFO(%rcx) 170 GET_THREAD_INFO(%rcx)
171 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) 171 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
172 jnz rff_trace 172 jnz rff_trace
173 rff_action: 173 rff_action:
174 RESTORE_REST 174 RESTORE_REST
175 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 175 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
176 je int_ret_from_sys_call 176 je int_ret_from_sys_call
177 testl $_TIF_IA32,threadinfo_flags(%rcx) 177 testl $_TIF_IA32,threadinfo_flags(%rcx)
178 jnz int_ret_from_sys_call 178 jnz int_ret_from_sys_call
179 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 179 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
180 jmp ret_from_sys_call 180 jmp ret_from_sys_call
181 rff_trace: 181 rff_trace:
182 movq %rsp,%rdi 182 movq %rsp,%rdi
183 call syscall_trace_leave 183 call syscall_trace_leave
184 GET_THREAD_INFO(%rcx) 184 GET_THREAD_INFO(%rcx)
185 jmp rff_action 185 jmp rff_action
186 CFI_ENDPROC 186 CFI_ENDPROC
187 END(ret_from_fork) 187 END(ret_from_fork)
188 188
189 /* 189 /*
190 * System call entry. Upto 6 arguments in registers are supported. 190 * System call entry. Upto 6 arguments in registers are supported.
191 * 191 *
192 * SYSCALL does not save anything on the stack and does not change the 192 * SYSCALL does not save anything on the stack and does not change the
193 * stack pointer. 193 * stack pointer.
194 */ 194 */
195 195
196 /* 196 /*
197 * Register setup: 197 * Register setup:
198 * rax system call number 198 * rax system call number
199 * rdi arg0 199 * rdi arg0
200 * rcx return address for syscall/sysret, C arg3 200 * rcx return address for syscall/sysret, C arg3
201 * rsi arg1 201 * rsi arg1
202 * rdx arg2 202 * rdx arg2
203 * r10 arg3 (--> moved to rcx for C) 203 * r10 arg3 (--> moved to rcx for C)
204 * r8 arg4 204 * r8 arg4
205 * r9 arg5 205 * r9 arg5
206 * r11 eflags for syscall/sysret, temporary for C 206 * r11 eflags for syscall/sysret, temporary for C
207 * r12-r15,rbp,rbx saved by C code, not touched. 207 * r12-r15,rbp,rbx saved by C code, not touched.
208 * 208 *
209 * Interrupts are off on entry. 209 * Interrupts are off on entry.
210 * Only called from user space. 210 * Only called from user space.
211 * 211 *
212 * XXX if we had a free scratch register we could save the RSP into the stack frame 212 * XXX if we had a free scratch register we could save the RSP into the stack frame
213 * and report it properly in ps. Unfortunately we haven't. 213 * and report it properly in ps. Unfortunately we haven't.
214 * 214 *
215 * When user can change the frames always force IRET. That is because 215 * When user can change the frames always force IRET. That is because
216 * it deals with uncanonical addresses better. SYSRET has trouble 216 * it deals with uncanonical addresses better. SYSRET has trouble
217 * with them due to bugs in both AMD and Intel CPUs. 217 * with them due to bugs in both AMD and Intel CPUs.
218 */ 218 */
219 219
220 ENTRY(system_call) 220 ENTRY(system_call)
221 CFI_STARTPROC simple 221 CFI_STARTPROC simple
222 CFI_SIGNAL_FRAME 222 CFI_SIGNAL_FRAME
223 CFI_DEF_CFA rsp,PDA_STACKOFFSET 223 CFI_DEF_CFA rsp,PDA_STACKOFFSET
224 CFI_REGISTER rip,rcx 224 CFI_REGISTER rip,rcx
225 /*CFI_REGISTER rflags,r11*/ 225 /*CFI_REGISTER rflags,r11*/
226 SWAPGS_UNSAFE_STACK 226 SWAPGS_UNSAFE_STACK
227 /* 227 /*
228 * A hypervisor implementation might want to use a label 228 * A hypervisor implementation might want to use a label
229 * after the swapgs, so that it can do the swapgs 229 * after the swapgs, so that it can do the swapgs
230 * for the guest and jump here on syscall. 230 * for the guest and jump here on syscall.
231 */ 231 */
232 ENTRY(system_call_after_swapgs) 232 ENTRY(system_call_after_swapgs)
233 233
234 movq %rsp,%gs:pda_oldrsp 234 movq %rsp,%gs:pda_oldrsp
235 movq %gs:pda_kernelstack,%rsp 235 movq %gs:pda_kernelstack,%rsp
236 /* 236 /*
237 * No need to follow this irqs off/on section - it's straight 237 * No need to follow this irqs off/on section - it's straight
238 * and short: 238 * and short:
239 */ 239 */
240 ENABLE_INTERRUPTS(CLBR_NONE) 240 ENABLE_INTERRUPTS(CLBR_NONE)
241 SAVE_ARGS 8,1 241 SAVE_ARGS 8,1
242 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 242 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
243 movq %rcx,RIP-ARGOFFSET(%rsp) 243 movq %rcx,RIP-ARGOFFSET(%rsp)
244 CFI_REL_OFFSET rip,RIP-ARGOFFSET 244 CFI_REL_OFFSET rip,RIP-ARGOFFSET
245 GET_THREAD_INFO(%rcx) 245 GET_THREAD_INFO(%rcx)
246 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) 246 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
247 jnz tracesys 247 jnz tracesys
248 cmpq $__NR_syscall_max,%rax 248 cmpq $__NR_syscall_max,%rax
249 ja badsys 249 ja badsys
250 movq %r10,%rcx 250 movq %r10,%rcx
251 call *sys_call_table(,%rax,8) # XXX: rip relative 251 call *sys_call_table(,%rax,8) # XXX: rip relative
252 movq %rax,RAX-ARGOFFSET(%rsp) 252 movq %rax,RAX-ARGOFFSET(%rsp)
253 /* 253 /*
254 * Syscall return path ending with SYSRET (fast path) 254 * Syscall return path ending with SYSRET (fast path)
255 * Has incomplete stack frame and undefined top of stack. 255 * Has incomplete stack frame and undefined top of stack.
256 */ 256 */
257 ret_from_sys_call: 257 ret_from_sys_call:
258 movl $_TIF_ALLWORK_MASK,%edi 258 movl $_TIF_ALLWORK_MASK,%edi
259 /* edi: flagmask */ 259 /* edi: flagmask */
260 sysret_check: 260 sysret_check:
261 LOCKDEP_SYS_EXIT 261 LOCKDEP_SYS_EXIT
262 GET_THREAD_INFO(%rcx) 262 GET_THREAD_INFO(%rcx)
263 DISABLE_INTERRUPTS(CLBR_NONE) 263 DISABLE_INTERRUPTS(CLBR_NONE)
264 TRACE_IRQS_OFF 264 TRACE_IRQS_OFF
265 movl threadinfo_flags(%rcx),%edx 265 movl threadinfo_flags(%rcx),%edx
266 andl %edi,%edx 266 andl %edi,%edx
267 jnz sysret_careful 267 jnz sysret_careful
268 CFI_REMEMBER_STATE 268 CFI_REMEMBER_STATE
269 /* 269 /*
270 * sysretq will re-enable interrupts: 270 * sysretq will re-enable interrupts:
271 */ 271 */
272 TRACE_IRQS_ON 272 TRACE_IRQS_ON
273 movq RIP-ARGOFFSET(%rsp),%rcx 273 movq RIP-ARGOFFSET(%rsp),%rcx
274 CFI_REGISTER rip,rcx 274 CFI_REGISTER rip,rcx
275 RESTORE_ARGS 0,-ARG_SKIP,1 275 RESTORE_ARGS 0,-ARG_SKIP,1
276 /*CFI_REGISTER rflags,r11*/ 276 /*CFI_REGISTER rflags,r11*/
277 movq %gs:pda_oldrsp, %rsp 277 movq %gs:pda_oldrsp, %rsp
278 USERGS_SYSRET64 278 USERGS_SYSRET64
279 279
280 CFI_RESTORE_STATE 280 CFI_RESTORE_STATE
281 /* Handle reschedules */ 281 /* Handle reschedules */
282 /* edx: work, edi: workmask */ 282 /* edx: work, edi: workmask */
283 sysret_careful: 283 sysret_careful:
284 bt $TIF_NEED_RESCHED,%edx 284 bt $TIF_NEED_RESCHED,%edx
285 jnc sysret_signal 285 jnc sysret_signal
286 TRACE_IRQS_ON 286 TRACE_IRQS_ON
287 ENABLE_INTERRUPTS(CLBR_NONE) 287 ENABLE_INTERRUPTS(CLBR_NONE)
288 pushq %rdi 288 pushq %rdi
289 CFI_ADJUST_CFA_OFFSET 8 289 CFI_ADJUST_CFA_OFFSET 8
290 call schedule 290 call schedule
291 popq %rdi 291 popq %rdi
292 CFI_ADJUST_CFA_OFFSET -8 292 CFI_ADJUST_CFA_OFFSET -8
293 jmp sysret_check 293 jmp sysret_check
294 294
295 /* Handle a signal */ 295 /* Handle a signal */
296 sysret_signal: 296 sysret_signal:
297 TRACE_IRQS_ON 297 TRACE_IRQS_ON
298 ENABLE_INTERRUPTS(CLBR_NONE) 298 ENABLE_INTERRUPTS(CLBR_NONE)
299 testl $_TIF_DO_NOTIFY_MASK,%edx 299 testl $_TIF_DO_NOTIFY_MASK,%edx
300 jz 1f 300 jz 1f
301 301
302 /* Really a signal */ 302 /* Really a signal */
303 /* edx: work flags (arg3) */ 303 /* edx: work flags (arg3) */
304 leaq do_notify_resume(%rip),%rax 304 leaq do_notify_resume(%rip),%rax
305 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 305 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
306 xorl %esi,%esi # oldset -> arg2 306 xorl %esi,%esi # oldset -> arg2
307 call ptregscall_common 307 call ptregscall_common
308 1: movl $_TIF_NEED_RESCHED,%edi 308 1: movl $_TIF_NEED_RESCHED,%edi
309 /* Use IRET because user could have changed frame. This 309 /* Use IRET because user could have changed frame. This
310 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 310 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
311 DISABLE_INTERRUPTS(CLBR_NONE) 311 DISABLE_INTERRUPTS(CLBR_NONE)
312 TRACE_IRQS_OFF 312 TRACE_IRQS_OFF
313 jmp int_with_check 313 jmp int_with_check
314 314
315 badsys: 315 badsys:
316 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 316 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
317 jmp ret_from_sys_call 317 jmp ret_from_sys_call
318 318
319 /* Do syscall tracing */ 319 /* Do syscall tracing */
320 tracesys: 320 tracesys:
321 SAVE_REST 321 SAVE_REST
322 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 322 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
323 FIXUP_TOP_OF_STACK %rdi 323 FIXUP_TOP_OF_STACK %rdi
324 movq %rsp,%rdi 324 movq %rsp,%rdi
325 call syscall_trace_enter 325 call syscall_trace_enter
326 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ 326 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
327 RESTORE_REST 327 RESTORE_REST
328 cmpq $__NR_syscall_max,%rax 328 cmpq $__NR_syscall_max,%rax
329 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ 329 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
330 movq %r10,%rcx /* fixup for C */ 330 movq %r10,%rcx /* fixup for C */
331 call *sys_call_table(,%rax,8) 331 call *sys_call_table(,%rax,8)
332 movq %rax,RAX-ARGOFFSET(%rsp) 332 movq %rax,RAX-ARGOFFSET(%rsp)
333 /* Use IRET because user could have changed frame */ 333 /* Use IRET because user could have changed frame */
334 334
335 /* 335 /*
336 * Syscall return path ending with IRET. 336 * Syscall return path ending with IRET.
337 * Has correct top of stack, but partial stack frame. 337 * Has correct top of stack, but partial stack frame.
338 */ 338 */
339 .globl int_ret_from_sys_call 339 .globl int_ret_from_sys_call
340 int_ret_from_sys_call: 340 int_ret_from_sys_call:
341 DISABLE_INTERRUPTS(CLBR_NONE) 341 DISABLE_INTERRUPTS(CLBR_NONE)
342 TRACE_IRQS_OFF 342 TRACE_IRQS_OFF
343 testl $3,CS-ARGOFFSET(%rsp) 343 testl $3,CS-ARGOFFSET(%rsp)
344 je retint_restore_args 344 je retint_restore_args
345 movl $_TIF_ALLWORK_MASK,%edi 345 movl $_TIF_ALLWORK_MASK,%edi
346 /* edi: mask to check */ 346 /* edi: mask to check */
347 int_with_check: 347 int_with_check:
348 LOCKDEP_SYS_EXIT_IRQ 348 LOCKDEP_SYS_EXIT_IRQ
349 GET_THREAD_INFO(%rcx) 349 GET_THREAD_INFO(%rcx)
350 movl threadinfo_flags(%rcx),%edx 350 movl threadinfo_flags(%rcx),%edx
351 andl %edi,%edx 351 andl %edi,%edx
352 jnz int_careful 352 jnz int_careful
353 andl $~TS_COMPAT,threadinfo_status(%rcx) 353 andl $~TS_COMPAT,threadinfo_status(%rcx)
354 jmp retint_swapgs 354 jmp retint_swapgs
355 355
356 /* Either reschedule or signal or syscall exit tracking needed. */ 356 /* Either reschedule or signal or syscall exit tracking needed. */
357 /* First do a reschedule test. */ 357 /* First do a reschedule test. */
358 /* edx: work, edi: workmask */ 358 /* edx: work, edi: workmask */
359 int_careful: 359 int_careful:
360 bt $TIF_NEED_RESCHED,%edx 360 bt $TIF_NEED_RESCHED,%edx
361 jnc int_very_careful 361 jnc int_very_careful
362 TRACE_IRQS_ON 362 TRACE_IRQS_ON
363 ENABLE_INTERRUPTS(CLBR_NONE) 363 ENABLE_INTERRUPTS(CLBR_NONE)
364 pushq %rdi 364 pushq %rdi
365 CFI_ADJUST_CFA_OFFSET 8 365 CFI_ADJUST_CFA_OFFSET 8
366 call schedule 366 call schedule
367 popq %rdi 367 popq %rdi
368 CFI_ADJUST_CFA_OFFSET -8 368 CFI_ADJUST_CFA_OFFSET -8
369 DISABLE_INTERRUPTS(CLBR_NONE) 369 DISABLE_INTERRUPTS(CLBR_NONE)
370 TRACE_IRQS_OFF 370 TRACE_IRQS_OFF
371 jmp int_with_check 371 jmp int_with_check
372 372
373 /* handle signals and tracing -- both require a full stack frame */ 373 /* handle signals and tracing -- both require a full stack frame */
374 int_very_careful: 374 int_very_careful:
375 TRACE_IRQS_ON 375 TRACE_IRQS_ON
376 ENABLE_INTERRUPTS(CLBR_NONE) 376 ENABLE_INTERRUPTS(CLBR_NONE)
377 SAVE_REST 377 SAVE_REST
378 /* Check for syscall exit trace */ 378 /* Check for syscall exit trace */
379 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx 379 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
380 jz int_signal 380 jz int_signal
381 pushq %rdi 381 pushq %rdi
382 CFI_ADJUST_CFA_OFFSET 8 382 CFI_ADJUST_CFA_OFFSET 8
383 leaq 8(%rsp),%rdi # &ptregs -> arg1 383 leaq 8(%rsp),%rdi # &ptregs -> arg1
384 call syscall_trace_leave 384 call syscall_trace_leave
385 popq %rdi 385 popq %rdi
386 CFI_ADJUST_CFA_OFFSET -8 386 CFI_ADJUST_CFA_OFFSET -8
387 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 387 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
388 jmp int_restore_rest 388 jmp int_restore_rest
389 389
390 int_signal: 390 int_signal:
391 testl $_TIF_DO_NOTIFY_MASK,%edx 391 testl $_TIF_DO_NOTIFY_MASK,%edx
392 jz 1f 392 jz 1f
393 movq %rsp,%rdi # &ptregs -> arg1 393 movq %rsp,%rdi # &ptregs -> arg1
394 xorl %esi,%esi # oldset -> arg2 394 xorl %esi,%esi # oldset -> arg2
395 call do_notify_resume 395 call do_notify_resume
396 1: movl $_TIF_NEED_RESCHED,%edi 396 1: movl $_TIF_NEED_RESCHED,%edi
397 int_restore_rest: 397 int_restore_rest:
398 RESTORE_REST 398 RESTORE_REST
399 DISABLE_INTERRUPTS(CLBR_NONE) 399 DISABLE_INTERRUPTS(CLBR_NONE)
400 TRACE_IRQS_OFF 400 TRACE_IRQS_OFF
401 jmp int_with_check 401 jmp int_with_check
402 CFI_ENDPROC 402 CFI_ENDPROC
403 END(system_call) 403 END(system_call)
404 404
405 /* 405 /*
406 * Certain special system calls that need to save a complete full stack frame. 406 * Certain special system calls that need to save a complete full stack frame.
407 */ 407 */
408 408
409 .macro PTREGSCALL label,func,arg 409 .macro PTREGSCALL label,func,arg
410 .globl \label 410 .globl \label
411 \label: 411 \label:
412 leaq \func(%rip),%rax 412 leaq \func(%rip),%rax
413 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 413 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
414 jmp ptregscall_common 414 jmp ptregscall_common
415 END(\label) 415 END(\label)
416 .endm 416 .endm
417 417
418 CFI_STARTPROC 418 CFI_STARTPROC
419 419
420 PTREGSCALL stub_clone, sys_clone, %r8 420 PTREGSCALL stub_clone, sys_clone, %r8
421 PTREGSCALL stub_fork, sys_fork, %rdi 421 PTREGSCALL stub_fork, sys_fork, %rdi
422 PTREGSCALL stub_vfork, sys_vfork, %rdi 422 PTREGSCALL stub_vfork, sys_vfork, %rdi
423 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx 423 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
424 PTREGSCALL stub_iopl, sys_iopl, %rsi 424 PTREGSCALL stub_iopl, sys_iopl, %rsi
425 425
426 ENTRY(ptregscall_common) 426 ENTRY(ptregscall_common)
427 popq %r11 427 popq %r11
428 CFI_ADJUST_CFA_OFFSET -8 428 CFI_ADJUST_CFA_OFFSET -8
429 CFI_REGISTER rip, r11 429 CFI_REGISTER rip, r11
430 SAVE_REST 430 SAVE_REST
431 movq %r11, %r15 431 movq %r11, %r15
432 CFI_REGISTER rip, r15 432 CFI_REGISTER rip, r15
433 FIXUP_TOP_OF_STACK %r11 433 FIXUP_TOP_OF_STACK %r11
434 call *%rax 434 call *%rax
435 RESTORE_TOP_OF_STACK %r11 435 RESTORE_TOP_OF_STACK %r11
436 movq %r15, %r11 436 movq %r15, %r11
437 CFI_REGISTER rip, r11 437 CFI_REGISTER rip, r11
438 RESTORE_REST 438 RESTORE_REST
439 pushq %r11 439 pushq %r11
440 CFI_ADJUST_CFA_OFFSET 8 440 CFI_ADJUST_CFA_OFFSET 8
441 CFI_REL_OFFSET rip, 0 441 CFI_REL_OFFSET rip, 0
442 ret 442 ret
443 CFI_ENDPROC 443 CFI_ENDPROC
444 END(ptregscall_common) 444 END(ptregscall_common)
445 445
446 ENTRY(stub_execve) 446 ENTRY(stub_execve)
447 CFI_STARTPROC 447 CFI_STARTPROC
448 popq %r11 448 popq %r11
449 CFI_ADJUST_CFA_OFFSET -8 449 CFI_ADJUST_CFA_OFFSET -8
450 CFI_REGISTER rip, r11 450 CFI_REGISTER rip, r11
451 SAVE_REST 451 SAVE_REST
452 FIXUP_TOP_OF_STACK %r11 452 FIXUP_TOP_OF_STACK %r11
453 movq %rsp, %rcx 453 movq %rsp, %rcx
454 call sys_execve 454 call sys_execve
455 RESTORE_TOP_OF_STACK %r11 455 RESTORE_TOP_OF_STACK %r11
456 movq %rax,RAX(%rsp) 456 movq %rax,RAX(%rsp)
457 RESTORE_REST 457 RESTORE_REST
458 jmp int_ret_from_sys_call 458 jmp int_ret_from_sys_call
459 CFI_ENDPROC 459 CFI_ENDPROC
460 END(stub_execve) 460 END(stub_execve)
461 461
462 /* 462 /*
463 * sigreturn is special because it needs to restore all registers on return. 463 * sigreturn is special because it needs to restore all registers on return.
464 * This cannot be done with SYSRET, so use the IRET return path instead. 464 * This cannot be done with SYSRET, so use the IRET return path instead.
465 */ 465 */
466 ENTRY(stub_rt_sigreturn) 466 ENTRY(stub_rt_sigreturn)
467 CFI_STARTPROC 467 CFI_STARTPROC
468 addq $8, %rsp 468 addq $8, %rsp
469 CFI_ADJUST_CFA_OFFSET -8 469 CFI_ADJUST_CFA_OFFSET -8
470 SAVE_REST 470 SAVE_REST
471 movq %rsp,%rdi 471 movq %rsp,%rdi
472 FIXUP_TOP_OF_STACK %r11 472 FIXUP_TOP_OF_STACK %r11
473 call sys_rt_sigreturn 473 call sys_rt_sigreturn
474 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 474 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
475 RESTORE_REST 475 RESTORE_REST
476 jmp int_ret_from_sys_call 476 jmp int_ret_from_sys_call
477 CFI_ENDPROC 477 CFI_ENDPROC
478 END(stub_rt_sigreturn) 478 END(stub_rt_sigreturn)
479 479
480 /* 480 /*
481 * initial frame state for interrupts and exceptions 481 * initial frame state for interrupts and exceptions
482 */ 482 */
483 .macro _frame ref 483 .macro _frame ref
484 CFI_STARTPROC simple 484 CFI_STARTPROC simple
485 CFI_SIGNAL_FRAME 485 CFI_SIGNAL_FRAME
486 CFI_DEF_CFA rsp,SS+8-\ref 486 CFI_DEF_CFA rsp,SS+8-\ref
487 /*CFI_REL_OFFSET ss,SS-\ref*/ 487 /*CFI_REL_OFFSET ss,SS-\ref*/
488 CFI_REL_OFFSET rsp,RSP-\ref 488 CFI_REL_OFFSET rsp,RSP-\ref
489 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ 489 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
490 /*CFI_REL_OFFSET cs,CS-\ref*/ 490 /*CFI_REL_OFFSET cs,CS-\ref*/
491 CFI_REL_OFFSET rip,RIP-\ref 491 CFI_REL_OFFSET rip,RIP-\ref
492 .endm 492 .endm
493 493
494 /* initial frame state for interrupts (and exceptions without error code) */ 494 /* initial frame state for interrupts (and exceptions without error code) */
495 #define INTR_FRAME _frame RIP 495 #define INTR_FRAME _frame RIP
496 /* initial frame state for exceptions with error code (and interrupts with 496 /* initial frame state for exceptions with error code (and interrupts with
497 vector already pushed) */ 497 vector already pushed) */
498 #define XCPT_FRAME _frame ORIG_RAX 498 #define XCPT_FRAME _frame ORIG_RAX
499 499
500 /* 500 /*
501 * Interrupt entry/exit. 501 * Interrupt entry/exit.
502 * 502 *
503 * Interrupt entry points save only callee clobbered registers in fast path. 503 * Interrupt entry points save only callee clobbered registers in fast path.
504 * 504 *
505 * Entry runs with interrupts off. 505 * Entry runs with interrupts off.
506 */ 506 */
507 507
508 /* 0(%rsp): interrupt number */ 508 /* 0(%rsp): interrupt number */
509 .macro interrupt func 509 .macro interrupt func
510 cld 510 cld
511 SAVE_ARGS 511 SAVE_ARGS
512 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 512 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
513 pushq %rbp 513 pushq %rbp
514 CFI_ADJUST_CFA_OFFSET 8 514 CFI_ADJUST_CFA_OFFSET 8
515 CFI_REL_OFFSET rbp, 0 515 CFI_REL_OFFSET rbp, 0
516 movq %rsp,%rbp 516 movq %rsp,%rbp
517 CFI_DEF_CFA_REGISTER rbp 517 CFI_DEF_CFA_REGISTER rbp
518 testl $3,CS(%rdi) 518 testl $3,CS(%rdi)
519 je 1f 519 je 1f
520 SWAPGS 520 SWAPGS
521 /* irqcount is used to check if a CPU is already on an interrupt 521 /* irqcount is used to check if a CPU is already on an interrupt
522 stack or not. While this is essentially redundant with preempt_count 522 stack or not. While this is essentially redundant with preempt_count
523 it is a little cheaper to use a separate counter in the PDA 523 it is a little cheaper to use a separate counter in the PDA
524 (short of moving irq_enter into assembly, which would be too 524 (short of moving irq_enter into assembly, which would be too
525 much work) */ 525 much work) */
526 1: incl %gs:pda_irqcount 526 1: incl %gs:pda_irqcount
527 cmoveq %gs:pda_irqstackptr,%rsp 527 cmoveq %gs:pda_irqstackptr,%rsp
528 push %rbp # backlink for old unwinder 528 push %rbp # backlink for old unwinder
529 /* 529 /*
530 * We entered an interrupt context - irqs are off: 530 * We entered an interrupt context - irqs are off:
531 */ 531 */
532 TRACE_IRQS_OFF 532 TRACE_IRQS_OFF
533 call \func 533 call \func
534 .endm 534 .endm
535 535
536 ENTRY(common_interrupt) 536 ENTRY(common_interrupt)
537 XCPT_FRAME 537 XCPT_FRAME
538 interrupt do_IRQ 538 interrupt do_IRQ
539 /* 0(%rsp): oldrsp-ARGOFFSET */ 539 /* 0(%rsp): oldrsp-ARGOFFSET */
540 ret_from_intr: 540 ret_from_intr:
541 DISABLE_INTERRUPTS(CLBR_NONE) 541 DISABLE_INTERRUPTS(CLBR_NONE)
542 TRACE_IRQS_OFF 542 TRACE_IRQS_OFF
543 decl %gs:pda_irqcount 543 decl %gs:pda_irqcount
544 leaveq 544 leaveq
545 CFI_DEF_CFA_REGISTER rsp 545 CFI_DEF_CFA_REGISTER rsp
546 CFI_ADJUST_CFA_OFFSET -8 546 CFI_ADJUST_CFA_OFFSET -8
547 exit_intr: 547 exit_intr:
548 GET_THREAD_INFO(%rcx) 548 GET_THREAD_INFO(%rcx)
549 testl $3,CS-ARGOFFSET(%rsp) 549 testl $3,CS-ARGOFFSET(%rsp)
550 je retint_kernel 550 je retint_kernel
551 551
552 /* Interrupt came from user space */ 552 /* Interrupt came from user space */
553 /* 553 /*
554 * Has a correct top of stack, but a partial stack frame 554 * Has a correct top of stack, but a partial stack frame
555 * %rcx: thread info. Interrupts off. 555 * %rcx: thread info. Interrupts off.
556 */ 556 */
557 retint_with_reschedule: 557 retint_with_reschedule:
558 movl $_TIF_WORK_MASK,%edi 558 movl $_TIF_WORK_MASK,%edi
559 retint_check: 559 retint_check:
560 LOCKDEP_SYS_EXIT_IRQ 560 LOCKDEP_SYS_EXIT_IRQ
561 movl threadinfo_flags(%rcx),%edx 561 movl threadinfo_flags(%rcx),%edx
562 andl %edi,%edx 562 andl %edi,%edx
563 CFI_REMEMBER_STATE 563 CFI_REMEMBER_STATE
564 jnz retint_careful 564 jnz retint_careful
565 565
566 retint_swapgs: /* return to user-space */ 566 retint_swapgs: /* return to user-space */
567 /* 567 /*
568 * The iretq could re-enable interrupts: 568 * The iretq could re-enable interrupts:
569 */ 569 */
570 DISABLE_INTERRUPTS(CLBR_ANY) 570 DISABLE_INTERRUPTS(CLBR_ANY)
571 TRACE_IRQS_IRETQ 571 TRACE_IRQS_IRETQ
572 SWAPGS 572 SWAPGS
573 jmp restore_args 573 jmp restore_args
574 574
575 retint_restore_args: /* return to kernel space */ 575 retint_restore_args: /* return to kernel space */
576 DISABLE_INTERRUPTS(CLBR_ANY) 576 DISABLE_INTERRUPTS(CLBR_ANY)
577 /* 577 /*
578 * The iretq could re-enable interrupts: 578 * The iretq could re-enable interrupts:
579 */ 579 */
580 TRACE_IRQS_IRETQ 580 TRACE_IRQS_IRETQ
581 restore_args: 581 restore_args:
582 RESTORE_ARGS 0,8,0 582 RESTORE_ARGS 0,8,0
583 583
584 irq_return: 584 irq_return:
585 INTERRUPT_RETURN 585 INTERRUPT_RETURN
586 586
587 .section __ex_table, "a" 587 .section __ex_table, "a"
588 .quad irq_return, bad_iret 588 .quad irq_return, bad_iret
589 .previous 589 .previous
590 590
591 #ifdef CONFIG_PARAVIRT 591 #ifdef CONFIG_PARAVIRT
592 ENTRY(native_iret) 592 ENTRY(native_iret)
593 iretq 593 iretq
594 594
595 .section __ex_table,"a" 595 .section __ex_table,"a"
596 .quad native_iret, bad_iret 596 .quad native_iret, bad_iret
597 .previous 597 .previous
598 #endif 598 #endif
599 599
600 .section .fixup,"ax" 600 .section .fixup,"ax"
601 bad_iret: 601 bad_iret:
602 /* 602 /*
603 * The iret traps when the %cs or %ss being restored is bogus. 603 * The iret traps when the %cs or %ss being restored is bogus.
604 * We've lost the original trap vector and error code. 604 * We've lost the original trap vector and error code.
605 * #GPF is the most likely one to get for an invalid selector. 605 * #GPF is the most likely one to get for an invalid selector.
606 * So pretend we completed the iret and took the #GPF in user mode. 606 * So pretend we completed the iret and took the #GPF in user mode.
607 * 607 *
608 * We are now running with the kernel GS after exception recovery. 608 * We are now running with the kernel GS after exception recovery.
609 * But error_entry expects us to have user GS to match the user %cs, 609 * But error_entry expects us to have user GS to match the user %cs,
610 * so swap back. 610 * so swap back.
611 */ 611 */
612 pushq $0 612 pushq $0
613 613
614 SWAPGS 614 SWAPGS
615 jmp general_protection 615 jmp general_protection
616 616
617 .previous 617 .previous
618 618
619 /* edi: workmask, edx: work */ 619 /* edi: workmask, edx: work */
620 retint_careful: 620 retint_careful:
621 CFI_RESTORE_STATE 621 CFI_RESTORE_STATE
622 bt $TIF_NEED_RESCHED,%edx 622 bt $TIF_NEED_RESCHED,%edx
623 jnc retint_signal 623 jnc retint_signal
624 TRACE_IRQS_ON 624 TRACE_IRQS_ON
625 ENABLE_INTERRUPTS(CLBR_NONE) 625 ENABLE_INTERRUPTS(CLBR_NONE)
626 pushq %rdi 626 pushq %rdi
627 CFI_ADJUST_CFA_OFFSET 8 627 CFI_ADJUST_CFA_OFFSET 8
628 call schedule 628 call schedule
629 popq %rdi 629 popq %rdi
630 CFI_ADJUST_CFA_OFFSET -8 630 CFI_ADJUST_CFA_OFFSET -8
631 GET_THREAD_INFO(%rcx) 631 GET_THREAD_INFO(%rcx)
632 DISABLE_INTERRUPTS(CLBR_NONE) 632 DISABLE_INTERRUPTS(CLBR_NONE)
633 TRACE_IRQS_OFF 633 TRACE_IRQS_OFF
634 jmp retint_check 634 jmp retint_check
635 635
636 retint_signal: 636 retint_signal:
637 testl $_TIF_DO_NOTIFY_MASK,%edx 637 testl $_TIF_DO_NOTIFY_MASK,%edx
638 jz retint_swapgs 638 jz retint_swapgs
639 TRACE_IRQS_ON 639 TRACE_IRQS_ON
640 ENABLE_INTERRUPTS(CLBR_NONE) 640 ENABLE_INTERRUPTS(CLBR_NONE)
641 SAVE_REST 641 SAVE_REST
642 movq $-1,ORIG_RAX(%rsp) 642 movq $-1,ORIG_RAX(%rsp)
643 xorl %esi,%esi # oldset 643 xorl %esi,%esi # oldset
644 movq %rsp,%rdi # &pt_regs 644 movq %rsp,%rdi # &pt_regs
645 call do_notify_resume 645 call do_notify_resume
646 RESTORE_REST 646 RESTORE_REST
647 DISABLE_INTERRUPTS(CLBR_NONE) 647 DISABLE_INTERRUPTS(CLBR_NONE)
648 TRACE_IRQS_OFF 648 TRACE_IRQS_OFF
649 movl $_TIF_NEED_RESCHED,%edi 649 movl $_TIF_NEED_RESCHED,%edi
650 GET_THREAD_INFO(%rcx) 650 GET_THREAD_INFO(%rcx)
651 jmp retint_check 651 jmp retint_check
652 652
653 #ifdef CONFIG_PREEMPT 653 #ifdef CONFIG_PREEMPT
654 /* Returning to kernel space. Check if we need preemption */ 654 /* Returning to kernel space. Check if we need preemption */
655 /* rcx: threadinfo. interrupts off. */ 655 /* rcx: threadinfo. interrupts off. */
656 ENTRY(retint_kernel) 656 ENTRY(retint_kernel)
657 cmpl $0,threadinfo_preempt_count(%rcx) 657 cmpl $0,threadinfo_preempt_count(%rcx)
658 jnz retint_restore_args 658 jnz retint_restore_args
659 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) 659 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
660 jnc retint_restore_args 660 jnc retint_restore_args
661 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ 661 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
662 jnc retint_restore_args 662 jnc retint_restore_args
663 call preempt_schedule_irq 663 call preempt_schedule_irq
664 jmp exit_intr 664 jmp exit_intr
665 #endif 665 #endif
666 666
667 CFI_ENDPROC 667 CFI_ENDPROC
668 END(common_interrupt) 668 END(common_interrupt)
669 669
670 /* 670 /*
671 * APIC interrupts. 671 * APIC interrupts.
672 */ 672 */
673 .macro apicinterrupt num,func 673 .macro apicinterrupt num,func
674 INTR_FRAME 674 INTR_FRAME
675 pushq $~(\num) 675 pushq $~(\num)
676 CFI_ADJUST_CFA_OFFSET 8 676 CFI_ADJUST_CFA_OFFSET 8
677 interrupt \func 677 interrupt \func
678 jmp ret_from_intr 678 jmp ret_from_intr
679 CFI_ENDPROC 679 CFI_ENDPROC
680 .endm 680 .endm
681 681
682 ENTRY(thermal_interrupt) 682 ENTRY(thermal_interrupt)
683 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt 683 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
684 END(thermal_interrupt) 684 END(thermal_interrupt)
685 685
686 ENTRY(threshold_interrupt) 686 ENTRY(threshold_interrupt)
687 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt 687 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
688 END(threshold_interrupt) 688 END(threshold_interrupt)
689 689
690 #ifdef CONFIG_SMP 690 #ifdef CONFIG_SMP
691 ENTRY(reschedule_interrupt) 691 ENTRY(reschedule_interrupt)
692 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt 692 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
693 END(reschedule_interrupt) 693 END(reschedule_interrupt)
694 694
695 .macro INVALIDATE_ENTRY num 695 .macro INVALIDATE_ENTRY num
696 ENTRY(invalidate_interrupt\num) 696 ENTRY(invalidate_interrupt\num)
697 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 697 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
698 END(invalidate_interrupt\num) 698 END(invalidate_interrupt\num)
699 .endm 699 .endm
700 700
701 INVALIDATE_ENTRY 0 701 INVALIDATE_ENTRY 0
702 INVALIDATE_ENTRY 1 702 INVALIDATE_ENTRY 1
703 INVALIDATE_ENTRY 2 703 INVALIDATE_ENTRY 2
704 INVALIDATE_ENTRY 3 704 INVALIDATE_ENTRY 3
705 INVALIDATE_ENTRY 4 705 INVALIDATE_ENTRY 4
706 INVALIDATE_ENTRY 5 706 INVALIDATE_ENTRY 5
707 INVALIDATE_ENTRY 6 707 INVALIDATE_ENTRY 6
708 INVALIDATE_ENTRY 7 708 INVALIDATE_ENTRY 7
709 709
710 ENTRY(call_function_interrupt) 710 ENTRY(call_function_interrupt)
711 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt 711 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
712 END(call_function_interrupt) 712 END(call_function_interrupt)
713 ENTRY(irq_move_cleanup_interrupt) 713 ENTRY(irq_move_cleanup_interrupt)
714 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt 714 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
715 END(irq_move_cleanup_interrupt) 715 END(irq_move_cleanup_interrupt)
716 #endif 716 #endif
717 717
718 ENTRY(apic_timer_interrupt) 718 ENTRY(apic_timer_interrupt)
719 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 719 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
720 END(apic_timer_interrupt) 720 END(apic_timer_interrupt)
721 721
722 ENTRY(uv_bau_message_intr1) 722 ENTRY(uv_bau_message_intr1)
723 apicinterrupt 220,uv_bau_message_interrupt 723 apicinterrupt 220,uv_bau_message_interrupt
724 END(uv_bau_message_intr1) 724 END(uv_bau_message_intr1)
725 725
726 ENTRY(error_interrupt) 726 ENTRY(error_interrupt)
727 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 727 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
728 END(error_interrupt) 728 END(error_interrupt)
729 729
730 ENTRY(spurious_interrupt) 730 ENTRY(spurious_interrupt)
731 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt 731 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
732 END(spurious_interrupt) 732 END(spurious_interrupt)
733 733
734 /* 734 /*
735 * Exception entry points. 735 * Exception entry points.
736 */ 736 */
737 .macro zeroentry sym 737 .macro zeroentry sym
738 INTR_FRAME 738 INTR_FRAME
739 PARAVIRT_ADJUST_EXCEPTION_FRAME 739 PARAVIRT_ADJUST_EXCEPTION_FRAME
740 pushq $0 /* push error code/oldrax */ 740 pushq $0 /* push error code/oldrax */
741 CFI_ADJUST_CFA_OFFSET 8 741 CFI_ADJUST_CFA_OFFSET 8
742 pushq %rax /* push real oldrax to the rdi slot */ 742 pushq %rax /* push real oldrax to the rdi slot */
743 CFI_ADJUST_CFA_OFFSET 8 743 CFI_ADJUST_CFA_OFFSET 8
744 CFI_REL_OFFSET rax,0 744 CFI_REL_OFFSET rax,0
745 leaq \sym(%rip),%rax 745 leaq \sym(%rip),%rax
746 jmp error_entry 746 jmp error_entry
747 CFI_ENDPROC 747 CFI_ENDPROC
748 .endm 748 .endm
749 749
750 .macro errorentry sym 750 .macro errorentry sym
751 XCPT_FRAME 751 XCPT_FRAME
752 PARAVIRT_ADJUST_EXCEPTION_FRAME 752 PARAVIRT_ADJUST_EXCEPTION_FRAME
753 pushq %rax 753 pushq %rax
754 CFI_ADJUST_CFA_OFFSET 8 754 CFI_ADJUST_CFA_OFFSET 8
755 CFI_REL_OFFSET rax,0 755 CFI_REL_OFFSET rax,0
756 leaq \sym(%rip),%rax 756 leaq \sym(%rip),%rax
757 jmp error_entry 757 jmp error_entry
758 CFI_ENDPROC 758 CFI_ENDPROC
759 .endm 759 .endm
760 760
761 /* error code is on the stack already */ 761 /* error code is on the stack already */
762 /* handle NMI like exceptions that can happen everywhere */ 762 /* handle NMI like exceptions that can happen everywhere */
763 .macro paranoidentry sym, ist=0, irqtrace=1 763 .macro paranoidentry sym, ist=0, irqtrace=1
764 SAVE_ALL 764 SAVE_ALL
765 cld 765 cld
766 movl $1,%ebx 766 movl $1,%ebx
767 movl $MSR_GS_BASE,%ecx 767 movl $MSR_GS_BASE,%ecx
768 rdmsr 768 rdmsr
769 testl %edx,%edx 769 testl %edx,%edx
770 js 1f 770 js 1f
771 SWAPGS 771 SWAPGS
772 xorl %ebx,%ebx 772 xorl %ebx,%ebx
773 1: 773 1:
774 .if \ist 774 .if \ist
775 movq %gs:pda_data_offset, %rbp 775 movq %gs:pda_data_offset, %rbp
776 .endif 776 .endif
777 movq %rsp,%rdi 777 movq %rsp,%rdi
778 movq ORIG_RAX(%rsp),%rsi 778 movq ORIG_RAX(%rsp),%rsi
779 movq $-1,ORIG_RAX(%rsp) 779 movq $-1,ORIG_RAX(%rsp)
780 .if \ist 780 .if \ist
781 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 781 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
782 .endif 782 .endif
783 call \sym 783 call \sym
784 .if \ist 784 .if \ist
785 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 785 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
786 .endif 786 .endif
787 DISABLE_INTERRUPTS(CLBR_NONE) 787 DISABLE_INTERRUPTS(CLBR_NONE)
788 .if \irqtrace 788 .if \irqtrace
789 TRACE_IRQS_OFF 789 TRACE_IRQS_OFF
790 .endif 790 .endif
791 .endm 791 .endm
792 792
793 /* 793 /*
794 * "Paranoid" exit path from exception stack. 794 * "Paranoid" exit path from exception stack.
795 * Paranoid because this is used by NMIs and cannot take 795 * Paranoid because this is used by NMIs and cannot take
796 * any kernel state for granted. 796 * any kernel state for granted.
797 * We don't do kernel preemption checks here, because only 797 * We don't do kernel preemption checks here, because only
798 * NMI should be common and it does not enable IRQs and 798 * NMI should be common and it does not enable IRQs and
799 * cannot get reschedule ticks. 799 * cannot get reschedule ticks.
800 * 800 *
801 * "trace" is 0 for the NMI handler only, because irq-tracing 801 * "trace" is 0 for the NMI handler only, because irq-tracing
802 * is fundamentally NMI-unsafe. (we cannot change the soft and 802 * is fundamentally NMI-unsafe. (we cannot change the soft and
803 * hard flags at once, atomically) 803 * hard flags at once, atomically)
804 */ 804 */
805 .macro paranoidexit trace=1 805 .macro paranoidexit trace=1
806 /* ebx: no swapgs flag */ 806 /* ebx: no swapgs flag */
807 paranoid_exit\trace: 807 paranoid_exit\trace:
808 testl %ebx,%ebx /* swapgs needed? */ 808 testl %ebx,%ebx /* swapgs needed? */
809 jnz paranoid_restore\trace 809 jnz paranoid_restore\trace
810 testl $3,CS(%rsp) 810 testl $3,CS(%rsp)
811 jnz paranoid_userspace\trace 811 jnz paranoid_userspace\trace
812 paranoid_swapgs\trace: 812 paranoid_swapgs\trace:
813 .if \trace 813 .if \trace
814 TRACE_IRQS_IRETQ 0 814 TRACE_IRQS_IRETQ 0
815 .endif 815 .endif
816 SWAPGS_UNSAFE_STACK 816 SWAPGS_UNSAFE_STACK
817 paranoid_restore\trace: 817 paranoid_restore\trace:
818 RESTORE_ALL 8 818 RESTORE_ALL 8
819 jmp irq_return 819 jmp irq_return
820 paranoid_userspace\trace: 820 paranoid_userspace\trace:
821 GET_THREAD_INFO(%rcx) 821 GET_THREAD_INFO(%rcx)
822 movl threadinfo_flags(%rcx),%ebx 822 movl threadinfo_flags(%rcx),%ebx
823 andl $_TIF_WORK_MASK,%ebx 823 andl $_TIF_WORK_MASK,%ebx
824 jz paranoid_swapgs\trace 824 jz paranoid_swapgs\trace
825 movq %rsp,%rdi /* &pt_regs */ 825 movq %rsp,%rdi /* &pt_regs */
826 call sync_regs 826 call sync_regs
827 movq %rax,%rsp /* switch stack for scheduling */ 827 movq %rax,%rsp /* switch stack for scheduling */
828 testl $_TIF_NEED_RESCHED,%ebx 828 testl $_TIF_NEED_RESCHED,%ebx
829 jnz paranoid_schedule\trace 829 jnz paranoid_schedule\trace
830 movl %ebx,%edx /* arg3: thread flags */ 830 movl %ebx,%edx /* arg3: thread flags */
831 .if \trace 831 .if \trace
832 TRACE_IRQS_ON 832 TRACE_IRQS_ON
833 .endif 833 .endif
834 ENABLE_INTERRUPTS(CLBR_NONE) 834 ENABLE_INTERRUPTS(CLBR_NONE)
835 xorl %esi,%esi /* arg2: oldset */ 835 xorl %esi,%esi /* arg2: oldset */
836 movq %rsp,%rdi /* arg1: &pt_regs */ 836 movq %rsp,%rdi /* arg1: &pt_regs */
837 call do_notify_resume 837 call do_notify_resume
838 DISABLE_INTERRUPTS(CLBR_NONE) 838 DISABLE_INTERRUPTS(CLBR_NONE)
839 .if \trace 839 .if \trace
840 TRACE_IRQS_OFF 840 TRACE_IRQS_OFF
841 .endif 841 .endif
842 jmp paranoid_userspace\trace 842 jmp paranoid_userspace\trace
843 paranoid_schedule\trace: 843 paranoid_schedule\trace:
844 .if \trace 844 .if \trace
845 TRACE_IRQS_ON 845 TRACE_IRQS_ON
846 .endif 846 .endif
847 ENABLE_INTERRUPTS(CLBR_ANY) 847 ENABLE_INTERRUPTS(CLBR_ANY)
848 call schedule 848 call schedule
849 DISABLE_INTERRUPTS(CLBR_ANY) 849 DISABLE_INTERRUPTS(CLBR_ANY)
850 .if \trace 850 .if \trace
851 TRACE_IRQS_OFF 851 TRACE_IRQS_OFF
852 .endif 852 .endif
853 jmp paranoid_userspace\trace 853 jmp paranoid_userspace\trace
854 CFI_ENDPROC 854 CFI_ENDPROC
855 .endm 855 .endm
856 856
857 /* 857 /*
858 * Exception entry point. This expects an error code/orig_rax on the stack 858 * Exception entry point. This expects an error code/orig_rax on the stack
859 * and the exception handler in %rax. 859 * and the exception handler in %rax.
860 */ 860 */
861 KPROBE_ENTRY(error_entry) 861 KPROBE_ENTRY(error_entry)
862 _frame RDI 862 _frame RDI
863 CFI_REL_OFFSET rax,0 863 CFI_REL_OFFSET rax,0
864 /* rdi slot contains rax, oldrax contains error code */ 864 /* rdi slot contains rax, oldrax contains error code */
865 cld 865 cld
866 subq $14*8,%rsp 866 subq $14*8,%rsp
867 CFI_ADJUST_CFA_OFFSET (14*8) 867 CFI_ADJUST_CFA_OFFSET (14*8)
868 movq %rsi,13*8(%rsp) 868 movq %rsi,13*8(%rsp)
869 CFI_REL_OFFSET rsi,RSI 869 CFI_REL_OFFSET rsi,RSI
870 movq 14*8(%rsp),%rsi /* load rax from rdi slot */ 870 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
871 CFI_REGISTER rax,rsi 871 CFI_REGISTER rax,rsi
872 movq %rdx,12*8(%rsp) 872 movq %rdx,12*8(%rsp)
873 CFI_REL_OFFSET rdx,RDX 873 CFI_REL_OFFSET rdx,RDX
874 movq %rcx,11*8(%rsp) 874 movq %rcx,11*8(%rsp)
875 CFI_REL_OFFSET rcx,RCX 875 CFI_REL_OFFSET rcx,RCX
876 movq %rsi,10*8(%rsp) /* store rax */ 876 movq %rsi,10*8(%rsp) /* store rax */
877 CFI_REL_OFFSET rax,RAX 877 CFI_REL_OFFSET rax,RAX
878 movq %r8, 9*8(%rsp) 878 movq %r8, 9*8(%rsp)
879 CFI_REL_OFFSET r8,R8 879 CFI_REL_OFFSET r8,R8
880 movq %r9, 8*8(%rsp) 880 movq %r9, 8*8(%rsp)
881 CFI_REL_OFFSET r9,R9 881 CFI_REL_OFFSET r9,R9
882 movq %r10,7*8(%rsp) 882 movq %r10,7*8(%rsp)
883 CFI_REL_OFFSET r10,R10 883 CFI_REL_OFFSET r10,R10
884 movq %r11,6*8(%rsp) 884 movq %r11,6*8(%rsp)
885 CFI_REL_OFFSET r11,R11 885 CFI_REL_OFFSET r11,R11
886 movq %rbx,5*8(%rsp) 886 movq %rbx,5*8(%rsp)
887 CFI_REL_OFFSET rbx,RBX 887 CFI_REL_OFFSET rbx,RBX
888 movq %rbp,4*8(%rsp) 888 movq %rbp,4*8(%rsp)
889 CFI_REL_OFFSET rbp,RBP 889 CFI_REL_OFFSET rbp,RBP
890 movq %r12,3*8(%rsp) 890 movq %r12,3*8(%rsp)
891 CFI_REL_OFFSET r12,R12 891 CFI_REL_OFFSET r12,R12
892 movq %r13,2*8(%rsp) 892 movq %r13,2*8(%rsp)
893 CFI_REL_OFFSET r13,R13 893 CFI_REL_OFFSET r13,R13
894 movq %r14,1*8(%rsp) 894 movq %r14,1*8(%rsp)
895 CFI_REL_OFFSET r14,R14 895 CFI_REL_OFFSET r14,R14
896 movq %r15,(%rsp) 896 movq %r15,(%rsp)
897 CFI_REL_OFFSET r15,R15 897 CFI_REL_OFFSET r15,R15
898 xorl %ebx,%ebx 898 xorl %ebx,%ebx
899 testl $3,CS(%rsp) 899 testl $3,CS(%rsp)
900 je error_kernelspace 900 je error_kernelspace
901 error_swapgs: 901 error_swapgs:
902 SWAPGS 902 SWAPGS
903 error_sti: 903 error_sti:
904 movq %rdi,RDI(%rsp) 904 movq %rdi,RDI(%rsp)
905 CFI_REL_OFFSET rdi,RDI 905 CFI_REL_OFFSET rdi,RDI
906 movq %rsp,%rdi 906 movq %rsp,%rdi
907 movq ORIG_RAX(%rsp),%rsi /* get error code */ 907 movq ORIG_RAX(%rsp),%rsi /* get error code */
908 movq $-1,ORIG_RAX(%rsp) 908 movq $-1,ORIG_RAX(%rsp)
909 call *%rax 909 call *%rax
910 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ 910 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
911 error_exit: 911 error_exit:
912 movl %ebx,%eax 912 movl %ebx,%eax
913 RESTORE_REST 913 RESTORE_REST
914 DISABLE_INTERRUPTS(CLBR_NONE) 914 DISABLE_INTERRUPTS(CLBR_NONE)
915 TRACE_IRQS_OFF 915 TRACE_IRQS_OFF
916 GET_THREAD_INFO(%rcx) 916 GET_THREAD_INFO(%rcx)
917 testl %eax,%eax 917 testl %eax,%eax
918 jne retint_kernel 918 jne retint_kernel
919 LOCKDEP_SYS_EXIT_IRQ 919 LOCKDEP_SYS_EXIT_IRQ
920 movl threadinfo_flags(%rcx),%edx 920 movl threadinfo_flags(%rcx),%edx
921 movl $_TIF_WORK_MASK,%edi 921 movl $_TIF_WORK_MASK,%edi
922 andl %edi,%edx 922 andl %edi,%edx
923 jnz retint_careful 923 jnz retint_careful
924 jmp retint_swapgs 924 jmp retint_swapgs
925 CFI_ENDPROC 925 CFI_ENDPROC
926 926
927 error_kernelspace: 927 error_kernelspace:
928 incl %ebx 928 incl %ebx
929 /* There are two places in the kernel that can potentially fault with 929 /* There are two places in the kernel that can potentially fault with
930 usergs. Handle them here. The exception handlers after 930 usergs. Handle them here. The exception handlers after
931 iret run with kernel gs again, so don't set the user space flag. 931 iret run with kernel gs again, so don't set the user space flag.
932 B stepping K8s sometimes report an truncated RIP for IRET 932 B stepping K8s sometimes report an truncated RIP for IRET
933 exceptions returning to compat mode. Check for these here too. */ 933 exceptions returning to compat mode. Check for these here too. */
934 leaq irq_return(%rip),%rcx 934 leaq irq_return(%rip),%rcx
935 cmpq %rcx,RIP(%rsp) 935 cmpq %rcx,RIP(%rsp)
936 je error_swapgs 936 je error_swapgs
937 movl %ecx,%ecx /* zero extend */ 937 movl %ecx,%ecx /* zero extend */
938 cmpq %rcx,RIP(%rsp) 938 cmpq %rcx,RIP(%rsp)
939 je error_swapgs 939 je error_swapgs
940 cmpq $gs_change,RIP(%rsp) 940 cmpq $gs_change,RIP(%rsp)
941 je error_swapgs 941 je error_swapgs
942 jmp error_sti 942 jmp error_sti
943 KPROBE_END(error_entry) 943 KPROBE_END(error_entry)
944 944
945 /* Reload gs selector with exception handling */ 945 /* Reload gs selector with exception handling */
946 /* edi: new selector */ 946 /* edi: new selector */
947 ENTRY(load_gs_index) 947 ENTRY(native_load_gs_index)
948 CFI_STARTPROC 948 CFI_STARTPROC
949 pushf 949 pushf
950 CFI_ADJUST_CFA_OFFSET 8 950 CFI_ADJUST_CFA_OFFSET 8
951 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 951 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
952 SWAPGS 952 SWAPGS
953 gs_change: 953 gs_change:
954 movl %edi,%gs 954 movl %edi,%gs
955 2: mfence /* workaround */ 955 2: mfence /* workaround */
956 SWAPGS 956 SWAPGS
957 popf 957 popf
958 CFI_ADJUST_CFA_OFFSET -8 958 CFI_ADJUST_CFA_OFFSET -8
959 ret 959 ret
960 CFI_ENDPROC 960 CFI_ENDPROC
961 ENDPROC(load_gs_index) 961 ENDPROC(native_load_gs_index)
962 962
963 .section __ex_table,"a" 963 .section __ex_table,"a"
964 .align 8 964 .align 8
965 .quad gs_change,bad_gs 965 .quad gs_change,bad_gs
966 .previous 966 .previous
967 .section .fixup,"ax" 967 .section .fixup,"ax"
968 /* running with kernelgs */ 968 /* running with kernelgs */
969 bad_gs: 969 bad_gs:
970 SWAPGS /* switch back to user gs */ 970 SWAPGS /* switch back to user gs */
971 xorl %eax,%eax 971 xorl %eax,%eax
972 movl %eax,%gs 972 movl %eax,%gs
973 jmp 2b 973 jmp 2b
974 .previous 974 .previous
975 975
976 /* 976 /*
977 * Create a kernel thread. 977 * Create a kernel thread.
978 * 978 *
979 * C extern interface: 979 * C extern interface:
980 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) 980 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
981 * 981 *
982 * asm input arguments: 982 * asm input arguments:
983 * rdi: fn, rsi: arg, rdx: flags 983 * rdi: fn, rsi: arg, rdx: flags
984 */ 984 */
985 ENTRY(kernel_thread) 985 ENTRY(kernel_thread)
986 CFI_STARTPROC 986 CFI_STARTPROC
987 FAKE_STACK_FRAME $child_rip 987 FAKE_STACK_FRAME $child_rip
988 SAVE_ALL 988 SAVE_ALL
989 989
990 # rdi: flags, rsi: usp, rdx: will be &pt_regs 990 # rdi: flags, rsi: usp, rdx: will be &pt_regs
991 movq %rdx,%rdi 991 movq %rdx,%rdi
992 orq kernel_thread_flags(%rip),%rdi 992 orq kernel_thread_flags(%rip),%rdi
993 movq $-1, %rsi 993 movq $-1, %rsi
994 movq %rsp, %rdx 994 movq %rsp, %rdx
995 995
996 xorl %r8d,%r8d 996 xorl %r8d,%r8d
997 xorl %r9d,%r9d 997 xorl %r9d,%r9d
998 998
999 # clone now 999 # clone now
1000 call do_fork 1000 call do_fork
1001 movq %rax,RAX(%rsp) 1001 movq %rax,RAX(%rsp)
1002 xorl %edi,%edi 1002 xorl %edi,%edi
1003 1003
1004 /* 1004 /*
1005 * It isn't worth to check for reschedule here, 1005 * It isn't worth to check for reschedule here,
1006 * so internally to the x86_64 port you can rely on kernel_thread() 1006 * so internally to the x86_64 port you can rely on kernel_thread()
1007 * not to reschedule the child before returning, this avoids the need 1007 * not to reschedule the child before returning, this avoids the need
1008 * of hacks for example to fork off the per-CPU idle tasks. 1008 * of hacks for example to fork off the per-CPU idle tasks.
1009 * [Hopefully no generic code relies on the reschedule -AK] 1009 * [Hopefully no generic code relies on the reschedule -AK]
1010 */ 1010 */
1011 RESTORE_ALL 1011 RESTORE_ALL
1012 UNFAKE_STACK_FRAME 1012 UNFAKE_STACK_FRAME
1013 ret 1013 ret
1014 CFI_ENDPROC 1014 CFI_ENDPROC
1015 ENDPROC(kernel_thread) 1015 ENDPROC(kernel_thread)
1016 1016
1017 child_rip: 1017 child_rip:
1018 pushq $0 # fake return address 1018 pushq $0 # fake return address
1019 CFI_STARTPROC 1019 CFI_STARTPROC
1020 /* 1020 /*
1021 * Here we are in the child and the registers are set as they were 1021 * Here we are in the child and the registers are set as they were
1022 * at kernel_thread() invocation in the parent. 1022 * at kernel_thread() invocation in the parent.
1023 */ 1023 */
1024 movq %rdi, %rax 1024 movq %rdi, %rax
1025 movq %rsi, %rdi 1025 movq %rsi, %rdi
1026 call *%rax 1026 call *%rax
1027 # exit 1027 # exit
1028 mov %eax, %edi 1028 mov %eax, %edi
1029 call do_exit 1029 call do_exit
1030 CFI_ENDPROC 1030 CFI_ENDPROC
1031 ENDPROC(child_rip) 1031 ENDPROC(child_rip)
1032 1032
1033 /* 1033 /*
1034 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1034 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1035 * 1035 *
1036 * C extern interface: 1036 * C extern interface:
1037 * extern long execve(char *name, char **argv, char **envp) 1037 * extern long execve(char *name, char **argv, char **envp)
1038 * 1038 *
1039 * asm input arguments: 1039 * asm input arguments:
1040 * rdi: name, rsi: argv, rdx: envp 1040 * rdi: name, rsi: argv, rdx: envp
1041 * 1041 *
1042 * We want to fallback into: 1042 * We want to fallback into:
1043 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) 1043 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1044 * 1044 *
1045 * do_sys_execve asm fallback arguments: 1045 * do_sys_execve asm fallback arguments:
1046 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack 1046 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1047 */ 1047 */
1048 ENTRY(kernel_execve) 1048 ENTRY(kernel_execve)
1049 CFI_STARTPROC 1049 CFI_STARTPROC
1050 FAKE_STACK_FRAME $0 1050 FAKE_STACK_FRAME $0
1051 SAVE_ALL 1051 SAVE_ALL
1052 movq %rsp,%rcx 1052 movq %rsp,%rcx
1053 call sys_execve 1053 call sys_execve
1054 movq %rax, RAX(%rsp) 1054 movq %rax, RAX(%rsp)
1055 RESTORE_REST 1055 RESTORE_REST
1056 testq %rax,%rax 1056 testq %rax,%rax
1057 je int_ret_from_sys_call 1057 je int_ret_from_sys_call
1058 RESTORE_ARGS 1058 RESTORE_ARGS
1059 UNFAKE_STACK_FRAME 1059 UNFAKE_STACK_FRAME
1060 ret 1060 ret
1061 CFI_ENDPROC 1061 CFI_ENDPROC
1062 ENDPROC(kernel_execve) 1062 ENDPROC(kernel_execve)
1063 1063
1064 KPROBE_ENTRY(page_fault) 1064 KPROBE_ENTRY(page_fault)
1065 errorentry do_page_fault 1065 errorentry do_page_fault
1066 KPROBE_END(page_fault) 1066 KPROBE_END(page_fault)
1067 1067
1068 ENTRY(coprocessor_error) 1068 ENTRY(coprocessor_error)
1069 zeroentry do_coprocessor_error 1069 zeroentry do_coprocessor_error
1070 END(coprocessor_error) 1070 END(coprocessor_error)
1071 1071
1072 ENTRY(simd_coprocessor_error) 1072 ENTRY(simd_coprocessor_error)
1073 zeroentry do_simd_coprocessor_error 1073 zeroentry do_simd_coprocessor_error
1074 END(simd_coprocessor_error) 1074 END(simd_coprocessor_error)
1075 1075
1076 ENTRY(device_not_available) 1076 ENTRY(device_not_available)
1077 zeroentry math_state_restore 1077 zeroentry math_state_restore
1078 END(device_not_available) 1078 END(device_not_available)
1079 1079
1080 /* runs on exception stack */ 1080 /* runs on exception stack */
1081 KPROBE_ENTRY(debug) 1081 KPROBE_ENTRY(debug)
1082 INTR_FRAME 1082 INTR_FRAME
1083 pushq $0 1083 pushq $0
1084 CFI_ADJUST_CFA_OFFSET 8 1084 CFI_ADJUST_CFA_OFFSET 8
1085 paranoidentry do_debug, DEBUG_STACK 1085 paranoidentry do_debug, DEBUG_STACK
1086 paranoidexit 1086 paranoidexit
1087 KPROBE_END(debug) 1087 KPROBE_END(debug)
1088 1088
1089 /* runs on exception stack */ 1089 /* runs on exception stack */
1090 KPROBE_ENTRY(nmi) 1090 KPROBE_ENTRY(nmi)
1091 INTR_FRAME 1091 INTR_FRAME
1092 pushq $-1 1092 pushq $-1
1093 CFI_ADJUST_CFA_OFFSET 8 1093 CFI_ADJUST_CFA_OFFSET 8
1094 paranoidentry do_nmi, 0, 0 1094 paranoidentry do_nmi, 0, 0
1095 #ifdef CONFIG_TRACE_IRQFLAGS 1095 #ifdef CONFIG_TRACE_IRQFLAGS
1096 paranoidexit 0 1096 paranoidexit 0
1097 #else 1097 #else
1098 jmp paranoid_exit1 1098 jmp paranoid_exit1
1099 CFI_ENDPROC 1099 CFI_ENDPROC
1100 #endif 1100 #endif
1101 KPROBE_END(nmi) 1101 KPROBE_END(nmi)
1102 1102
1103 KPROBE_ENTRY(int3) 1103 KPROBE_ENTRY(int3)
1104 INTR_FRAME 1104 INTR_FRAME
1105 pushq $0 1105 pushq $0
1106 CFI_ADJUST_CFA_OFFSET 8 1106 CFI_ADJUST_CFA_OFFSET 8
1107 paranoidentry do_int3, DEBUG_STACK 1107 paranoidentry do_int3, DEBUG_STACK
1108 jmp paranoid_exit1 1108 jmp paranoid_exit1
1109 CFI_ENDPROC 1109 CFI_ENDPROC
1110 KPROBE_END(int3) 1110 KPROBE_END(int3)
1111 1111
1112 ENTRY(overflow) 1112 ENTRY(overflow)
1113 zeroentry do_overflow 1113 zeroentry do_overflow
1114 END(overflow) 1114 END(overflow)
1115 1115
1116 ENTRY(bounds) 1116 ENTRY(bounds)
1117 zeroentry do_bounds 1117 zeroentry do_bounds
1118 END(bounds) 1118 END(bounds)
1119 1119
1120 ENTRY(invalid_op) 1120 ENTRY(invalid_op)
1121 zeroentry do_invalid_op 1121 zeroentry do_invalid_op
1122 END(invalid_op) 1122 END(invalid_op)
1123 1123
1124 ENTRY(coprocessor_segment_overrun) 1124 ENTRY(coprocessor_segment_overrun)
1125 zeroentry do_coprocessor_segment_overrun 1125 zeroentry do_coprocessor_segment_overrun
1126 END(coprocessor_segment_overrun) 1126 END(coprocessor_segment_overrun)
1127 1127
1128 /* runs on exception stack */ 1128 /* runs on exception stack */
1129 ENTRY(double_fault) 1129 ENTRY(double_fault)
1130 XCPT_FRAME 1130 XCPT_FRAME
1131 paranoidentry do_double_fault 1131 paranoidentry do_double_fault
1132 jmp paranoid_exit1 1132 jmp paranoid_exit1
1133 CFI_ENDPROC 1133 CFI_ENDPROC
1134 END(double_fault) 1134 END(double_fault)
1135 1135
1136 ENTRY(invalid_TSS) 1136 ENTRY(invalid_TSS)
1137 errorentry do_invalid_TSS 1137 errorentry do_invalid_TSS
1138 END(invalid_TSS) 1138 END(invalid_TSS)
1139 1139
1140 ENTRY(segment_not_present) 1140 ENTRY(segment_not_present)
1141 errorentry do_segment_not_present 1141 errorentry do_segment_not_present
1142 END(segment_not_present) 1142 END(segment_not_present)
1143 1143
1144 /* runs on exception stack */ 1144 /* runs on exception stack */
1145 ENTRY(stack_segment) 1145 ENTRY(stack_segment)
1146 XCPT_FRAME 1146 XCPT_FRAME
1147 paranoidentry do_stack_segment 1147 paranoidentry do_stack_segment
1148 jmp paranoid_exit1 1148 jmp paranoid_exit1
1149 CFI_ENDPROC 1149 CFI_ENDPROC
1150 END(stack_segment) 1150 END(stack_segment)
1151 1151
1152 KPROBE_ENTRY(general_protection) 1152 KPROBE_ENTRY(general_protection)
1153 errorentry do_general_protection 1153 errorentry do_general_protection
1154 KPROBE_END(general_protection) 1154 KPROBE_END(general_protection)
1155 1155
1156 ENTRY(alignment_check) 1156 ENTRY(alignment_check)
1157 errorentry do_alignment_check 1157 errorentry do_alignment_check
1158 END(alignment_check) 1158 END(alignment_check)
1159 1159
1160 ENTRY(divide_error) 1160 ENTRY(divide_error)
1161 zeroentry do_divide_error 1161 zeroentry do_divide_error
1162 END(divide_error) 1162 END(divide_error)
1163 1163
1164 ENTRY(spurious_interrupt_bug) 1164 ENTRY(spurious_interrupt_bug)
1165 zeroentry do_spurious_interrupt_bug 1165 zeroentry do_spurious_interrupt_bug
1166 END(spurious_interrupt_bug) 1166 END(spurious_interrupt_bug)
1167 1167
1168 #ifdef CONFIG_X86_MCE 1168 #ifdef CONFIG_X86_MCE
1169 /* runs on exception stack */ 1169 /* runs on exception stack */
1170 ENTRY(machine_check) 1170 ENTRY(machine_check)
1171 INTR_FRAME 1171 INTR_FRAME
1172 pushq $0 1172 pushq $0
1173 CFI_ADJUST_CFA_OFFSET 8 1173 CFI_ADJUST_CFA_OFFSET 8
1174 paranoidentry do_machine_check 1174 paranoidentry do_machine_check
1175 jmp paranoid_exit1 1175 jmp paranoid_exit1
1176 CFI_ENDPROC 1176 CFI_ENDPROC
1177 END(machine_check) 1177 END(machine_check)
1178 #endif 1178 #endif
1179 1179
1180 /* Call softirq on interrupt stack. Interrupts are off. */ 1180 /* Call softirq on interrupt stack. Interrupts are off. */
1181 ENTRY(call_softirq) 1181 ENTRY(call_softirq)
1182 CFI_STARTPROC 1182 CFI_STARTPROC
1183 push %rbp 1183 push %rbp
1184 CFI_ADJUST_CFA_OFFSET 8 1184 CFI_ADJUST_CFA_OFFSET 8
1185 CFI_REL_OFFSET rbp,0 1185 CFI_REL_OFFSET rbp,0
1186 mov %rsp,%rbp 1186 mov %rsp,%rbp
1187 CFI_DEF_CFA_REGISTER rbp 1187 CFI_DEF_CFA_REGISTER rbp
1188 incl %gs:pda_irqcount 1188 incl %gs:pda_irqcount
1189 cmove %gs:pda_irqstackptr,%rsp 1189 cmove %gs:pda_irqstackptr,%rsp
1190 push %rbp # backlink for old unwinder 1190 push %rbp # backlink for old unwinder
1191 call __do_softirq 1191 call __do_softirq
1192 leaveq 1192 leaveq
1193 CFI_DEF_CFA_REGISTER rsp 1193 CFI_DEF_CFA_REGISTER rsp
1194 CFI_ADJUST_CFA_OFFSET -8 1194 CFI_ADJUST_CFA_OFFSET -8
1195 decl %gs:pda_irqcount 1195 decl %gs:pda_irqcount
1196 ret 1196 ret
1197 CFI_ENDPROC 1197 CFI_ENDPROC
1198 ENDPROC(call_softirq) 1198 ENDPROC(call_softirq)
1199 1199
1200 KPROBE_ENTRY(ignore_sysret) 1200 KPROBE_ENTRY(ignore_sysret)
1201 CFI_STARTPROC 1201 CFI_STARTPROC
1202 mov $-ENOSYS,%eax 1202 mov $-ENOSYS,%eax
1203 sysret 1203 sysret
1204 CFI_ENDPROC 1204 CFI_ENDPROC
1205 ENDPROC(ignore_sysret) 1205 ENDPROC(ignore_sysret)
1206 1206
arch/x86/kernel/paravirt.c
1 /* Paravirtualization interfaces 1 /* Paravirtualization interfaces
2 Copyright (C) 2006 Rusty Russell IBM Corporation 2 Copyright (C) 2006 Rusty Russell IBM Corporation
3 3
4 This program is free software; you can redistribute it and/or modify 4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by 5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or 6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version. 7 (at your option) any later version.
8 8
9 This program is distributed in the hope that it will be useful, 9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details. 12 GNU General Public License for more details.
13 13
14 You should have received a copy of the GNU General Public License 14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software 15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 17
18 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc 18 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
19 */ 19 */
20 20
21 #include <linux/errno.h> 21 #include <linux/errno.h>
22 #include <linux/module.h> 22 #include <linux/module.h>
23 #include <linux/efi.h> 23 #include <linux/efi.h>
24 #include <linux/bcd.h> 24 #include <linux/bcd.h>
25 #include <linux/highmem.h> 25 #include <linux/highmem.h>
26 26
27 #include <asm/bug.h> 27 #include <asm/bug.h>
28 #include <asm/paravirt.h> 28 #include <asm/paravirt.h>
29 #include <asm/desc.h> 29 #include <asm/desc.h>
30 #include <asm/setup.h> 30 #include <asm/setup.h>
31 #include <asm/arch_hooks.h> 31 #include <asm/arch_hooks.h>
32 #include <asm/time.h> 32 #include <asm/time.h>
33 #include <asm/pgalloc.h> 33 #include <asm/pgalloc.h>
34 #include <asm/irq.h> 34 #include <asm/irq.h>
35 #include <asm/delay.h> 35 #include <asm/delay.h>
36 #include <asm/fixmap.h> 36 #include <asm/fixmap.h>
37 #include <asm/apic.h> 37 #include <asm/apic.h>
38 #include <asm/tlbflush.h> 38 #include <asm/tlbflush.h>
39 #include <asm/timer.h> 39 #include <asm/timer.h>
40 40
41 /* nop stub */ 41 /* nop stub */
42 void _paravirt_nop(void) 42 void _paravirt_nop(void)
43 { 43 {
44 } 44 }
45 45
46 static void __init default_banner(void) 46 static void __init default_banner(void)
47 { 47 {
48 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 48 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
49 pv_info.name); 49 pv_info.name);
50 } 50 }
51 51
52 char *memory_setup(void) 52 char *memory_setup(void)
53 { 53 {
54 return pv_init_ops.memory_setup(); 54 return pv_init_ops.memory_setup();
55 } 55 }
56 56
57 /* Simple instruction patching code. */ 57 /* Simple instruction patching code. */
58 #define DEF_NATIVE(ops, name, code) \ 58 #define DEF_NATIVE(ops, name, code) \
59 extern const char start_##ops##_##name[], end_##ops##_##name[]; \ 59 extern const char start_##ops##_##name[], end_##ops##_##name[]; \
60 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") 60 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
61 61
62 /* Undefined instruction for dealing with missing ops pointers. */ 62 /* Undefined instruction for dealing with missing ops pointers. */
63 static const unsigned char ud2a[] = { 0x0f, 0x0b }; 63 static const unsigned char ud2a[] = { 0x0f, 0x0b };
64 64
65 unsigned paravirt_patch_nop(void) 65 unsigned paravirt_patch_nop(void)
66 { 66 {
67 return 0; 67 return 0;
68 } 68 }
69 69
70 unsigned paravirt_patch_ignore(unsigned len) 70 unsigned paravirt_patch_ignore(unsigned len)
71 { 71 {
72 return len; 72 return len;
73 } 73 }
74 74
75 struct branch { 75 struct branch {
76 unsigned char opcode; 76 unsigned char opcode;
77 u32 delta; 77 u32 delta;
78 } __attribute__((packed)); 78 } __attribute__((packed));
79 79
80 unsigned paravirt_patch_call(void *insnbuf, 80 unsigned paravirt_patch_call(void *insnbuf,
81 const void *target, u16 tgt_clobbers, 81 const void *target, u16 tgt_clobbers,
82 unsigned long addr, u16 site_clobbers, 82 unsigned long addr, u16 site_clobbers,
83 unsigned len) 83 unsigned len)
84 { 84 {
85 struct branch *b = insnbuf; 85 struct branch *b = insnbuf;
86 unsigned long delta = (unsigned long)target - (addr+5); 86 unsigned long delta = (unsigned long)target - (addr+5);
87 87
88 if (tgt_clobbers & ~site_clobbers) 88 if (tgt_clobbers & ~site_clobbers)
89 return len; /* target would clobber too much for this site */ 89 return len; /* target would clobber too much for this site */
90 if (len < 5) 90 if (len < 5)
91 return len; /* call too long for patch site */ 91 return len; /* call too long for patch site */
92 92
93 b->opcode = 0xe8; /* call */ 93 b->opcode = 0xe8; /* call */
94 b->delta = delta; 94 b->delta = delta;
95 BUILD_BUG_ON(sizeof(*b) != 5); 95 BUILD_BUG_ON(sizeof(*b) != 5);
96 96
97 return 5; 97 return 5;
98 } 98 }
99 99
100 unsigned paravirt_patch_jmp(void *insnbuf, const void *target, 100 unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
101 unsigned long addr, unsigned len) 101 unsigned long addr, unsigned len)
102 { 102 {
103 struct branch *b = insnbuf; 103 struct branch *b = insnbuf;
104 unsigned long delta = (unsigned long)target - (addr+5); 104 unsigned long delta = (unsigned long)target - (addr+5);
105 105
106 if (len < 5) 106 if (len < 5)
107 return len; /* call too long for patch site */ 107 return len; /* call too long for patch site */
108 108
109 b->opcode = 0xe9; /* jmp */ 109 b->opcode = 0xe9; /* jmp */
110 b->delta = delta; 110 b->delta = delta;
111 111
112 return 5; 112 return 5;
113 } 113 }
114 114
115 /* Neat trick to map patch type back to the call within the 115 /* Neat trick to map patch type back to the call within the
116 * corresponding structure. */ 116 * corresponding structure. */
117 static void *get_call_destination(u8 type) 117 static void *get_call_destination(u8 type)
118 { 118 {
119 struct paravirt_patch_template tmpl = { 119 struct paravirt_patch_template tmpl = {
120 .pv_init_ops = pv_init_ops, 120 .pv_init_ops = pv_init_ops,
121 .pv_time_ops = pv_time_ops, 121 .pv_time_ops = pv_time_ops,
122 .pv_cpu_ops = pv_cpu_ops, 122 .pv_cpu_ops = pv_cpu_ops,
123 .pv_irq_ops = pv_irq_ops, 123 .pv_irq_ops = pv_irq_ops,
124 .pv_apic_ops = pv_apic_ops, 124 .pv_apic_ops = pv_apic_ops,
125 .pv_mmu_ops = pv_mmu_ops, 125 .pv_mmu_ops = pv_mmu_ops,
126 }; 126 };
127 return *((void **)&tmpl + type); 127 return *((void **)&tmpl + type);
128 } 128 }
129 129
130 unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 130 unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
131 unsigned long addr, unsigned len) 131 unsigned long addr, unsigned len)
132 { 132 {
133 void *opfunc = get_call_destination(type); 133 void *opfunc = get_call_destination(type);
134 unsigned ret; 134 unsigned ret;
135 135
136 if (opfunc == NULL) 136 if (opfunc == NULL)
137 /* If there's no function, patch it with a ud2a (BUG) */ 137 /* If there's no function, patch it with a ud2a (BUG) */
138 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); 138 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
139 else if (opfunc == paravirt_nop) 139 else if (opfunc == paravirt_nop)
140 /* If the operation is a nop, then nop the callsite */ 140 /* If the operation is a nop, then nop the callsite */
141 ret = paravirt_patch_nop(); 141 ret = paravirt_patch_nop();
142 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || 142 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
143 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || 143 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
144 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || 144 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
145 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) 145 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
146 /* If operation requires a jmp, then jmp */ 146 /* If operation requires a jmp, then jmp */
147 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); 147 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
148 else 148 else
149 /* Otherwise call the function; assume target could 149 /* Otherwise call the function; assume target could
150 clobber any caller-save reg */ 150 clobber any caller-save reg */
151 ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, 151 ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY,
152 addr, clobbers, len); 152 addr, clobbers, len);
153 153
154 return ret; 154 return ret;
155 } 155 }
156 156
157 unsigned paravirt_patch_insns(void *insnbuf, unsigned len, 157 unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
158 const char *start, const char *end) 158 const char *start, const char *end)
159 { 159 {
160 unsigned insn_len = end - start; 160 unsigned insn_len = end - start;
161 161
162 if (insn_len > len || start == NULL) 162 if (insn_len > len || start == NULL)
163 insn_len = len; 163 insn_len = len;
164 else 164 else
165 memcpy(insnbuf, start, insn_len); 165 memcpy(insnbuf, start, insn_len);
166 166
167 return insn_len; 167 return insn_len;
168 } 168 }
169 169
170 void init_IRQ(void) 170 void init_IRQ(void)
171 { 171 {
172 pv_irq_ops.init_IRQ(); 172 pv_irq_ops.init_IRQ();
173 } 173 }
174 174
175 static void native_flush_tlb(void) 175 static void native_flush_tlb(void)
176 { 176 {
177 __native_flush_tlb(); 177 __native_flush_tlb();
178 } 178 }
179 179
180 /* 180 /*
181 * Global pages have to be flushed a bit differently. Not a real 181 * Global pages have to be flushed a bit differently. Not a real
182 * performance problem because this does not happen often. 182 * performance problem because this does not happen often.
183 */ 183 */
184 static void native_flush_tlb_global(void) 184 static void native_flush_tlb_global(void)
185 { 185 {
186 __native_flush_tlb_global(); 186 __native_flush_tlb_global();
187 } 187 }
188 188
189 static void native_flush_tlb_single(unsigned long addr) 189 static void native_flush_tlb_single(unsigned long addr)
190 { 190 {
191 __native_flush_tlb_single(addr); 191 __native_flush_tlb_single(addr);
192 } 192 }
193 193
194 /* These are in entry.S */ 194 /* These are in entry.S */
195 extern void native_iret(void); 195 extern void native_iret(void);
196 extern void native_irq_enable_sysexit(void); 196 extern void native_irq_enable_sysexit(void);
197 extern void native_usergs_sysret32(void); 197 extern void native_usergs_sysret32(void);
198 extern void native_usergs_sysret64(void); 198 extern void native_usergs_sysret64(void);
199 199
200 static int __init print_banner(void) 200 static int __init print_banner(void)
201 { 201 {
202 pv_init_ops.banner(); 202 pv_init_ops.banner();
203 return 0; 203 return 0;
204 } 204 }
205 core_initcall(print_banner); 205 core_initcall(print_banner);
206 206
207 static struct resource reserve_ioports = { 207 static struct resource reserve_ioports = {
208 .start = 0, 208 .start = 0,
209 .end = IO_SPACE_LIMIT, 209 .end = IO_SPACE_LIMIT,
210 .name = "paravirt-ioport", 210 .name = "paravirt-ioport",
211 .flags = IORESOURCE_IO | IORESOURCE_BUSY, 211 .flags = IORESOURCE_IO | IORESOURCE_BUSY,
212 }; 212 };
213 213
214 /* 214 /*
215 * Reserve the whole legacy IO space to prevent any legacy drivers 215 * Reserve the whole legacy IO space to prevent any legacy drivers
216 * from wasting time probing for their hardware. This is a fairly 216 * from wasting time probing for their hardware. This is a fairly
217 * brute-force approach to disabling all non-virtual drivers. 217 * brute-force approach to disabling all non-virtual drivers.
218 * 218 *
219 * Note that this must be called very early to have any effect. 219 * Note that this must be called very early to have any effect.
220 */ 220 */
221 int paravirt_disable_iospace(void) 221 int paravirt_disable_iospace(void)
222 { 222 {
223 return request_resource(&ioport_resource, &reserve_ioports); 223 return request_resource(&ioport_resource, &reserve_ioports);
224 } 224 }
225 225
226 static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; 226 static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
227 227
228 static inline void enter_lazy(enum paravirt_lazy_mode mode) 228 static inline void enter_lazy(enum paravirt_lazy_mode mode)
229 { 229 {
230 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); 230 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
231 BUG_ON(preemptible()); 231 BUG_ON(preemptible());
232 232
233 __get_cpu_var(paravirt_lazy_mode) = mode; 233 __get_cpu_var(paravirt_lazy_mode) = mode;
234 } 234 }
235 235
236 void paravirt_leave_lazy(enum paravirt_lazy_mode mode) 236 void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
237 { 237 {
238 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); 238 BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
239 BUG_ON(preemptible()); 239 BUG_ON(preemptible());
240 240
241 __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; 241 __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
242 } 242 }
243 243
244 void paravirt_enter_lazy_mmu(void) 244 void paravirt_enter_lazy_mmu(void)
245 { 245 {
246 enter_lazy(PARAVIRT_LAZY_MMU); 246 enter_lazy(PARAVIRT_LAZY_MMU);
247 } 247 }
248 248
249 void paravirt_leave_lazy_mmu(void) 249 void paravirt_leave_lazy_mmu(void)
250 { 250 {
251 paravirt_leave_lazy(PARAVIRT_LAZY_MMU); 251 paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
252 } 252 }
253 253
254 void paravirt_enter_lazy_cpu(void) 254 void paravirt_enter_lazy_cpu(void)
255 { 255 {
256 enter_lazy(PARAVIRT_LAZY_CPU); 256 enter_lazy(PARAVIRT_LAZY_CPU);
257 } 257 }
258 258
259 void paravirt_leave_lazy_cpu(void) 259 void paravirt_leave_lazy_cpu(void)
260 { 260 {
261 paravirt_leave_lazy(PARAVIRT_LAZY_CPU); 261 paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
262 } 262 }
263 263
264 enum paravirt_lazy_mode paravirt_get_lazy_mode(void) 264 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
265 { 265 {
266 return __get_cpu_var(paravirt_lazy_mode); 266 return __get_cpu_var(paravirt_lazy_mode);
267 } 267 }
268 268
269 struct pv_info pv_info = { 269 struct pv_info pv_info = {
270 .name = "bare hardware", 270 .name = "bare hardware",
271 .paravirt_enabled = 0, 271 .paravirt_enabled = 0,
272 .kernel_rpl = 0, 272 .kernel_rpl = 0,
273 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ 273 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
274 }; 274 };
275 275
276 struct pv_init_ops pv_init_ops = { 276 struct pv_init_ops pv_init_ops = {
277 .patch = native_patch, 277 .patch = native_patch,
278 .banner = default_banner, 278 .banner = default_banner,
279 .arch_setup = paravirt_nop, 279 .arch_setup = paravirt_nop,
280 .memory_setup = machine_specific_memory_setup, 280 .memory_setup = machine_specific_memory_setup,
281 }; 281 };
282 282
283 struct pv_time_ops pv_time_ops = { 283 struct pv_time_ops pv_time_ops = {
284 .time_init = hpet_time_init, 284 .time_init = hpet_time_init,
285 .get_wallclock = native_get_wallclock, 285 .get_wallclock = native_get_wallclock,
286 .set_wallclock = native_set_wallclock, 286 .set_wallclock = native_set_wallclock,
287 .sched_clock = native_sched_clock, 287 .sched_clock = native_sched_clock,
288 .get_cpu_khz = native_calculate_cpu_khz, 288 .get_cpu_khz = native_calculate_cpu_khz,
289 }; 289 };
290 290
291 struct pv_irq_ops pv_irq_ops = { 291 struct pv_irq_ops pv_irq_ops = {
292 .init_IRQ = native_init_IRQ, 292 .init_IRQ = native_init_IRQ,
293 .save_fl = native_save_fl, 293 .save_fl = native_save_fl,
294 .restore_fl = native_restore_fl, 294 .restore_fl = native_restore_fl,
295 .irq_disable = native_irq_disable, 295 .irq_disable = native_irq_disable,
296 .irq_enable = native_irq_enable, 296 .irq_enable = native_irq_enable,
297 .safe_halt = native_safe_halt, 297 .safe_halt = native_safe_halt,
298 .halt = native_halt, 298 .halt = native_halt,
299 #ifdef CONFIG_X86_64 299 #ifdef CONFIG_X86_64
300 .adjust_exception_frame = paravirt_nop, 300 .adjust_exception_frame = paravirt_nop,
301 #endif 301 #endif
302 }; 302 };
303 303
304 struct pv_cpu_ops pv_cpu_ops = { 304 struct pv_cpu_ops pv_cpu_ops = {
305 .cpuid = native_cpuid, 305 .cpuid = native_cpuid,
306 .get_debugreg = native_get_debugreg, 306 .get_debugreg = native_get_debugreg,
307 .set_debugreg = native_set_debugreg, 307 .set_debugreg = native_set_debugreg,
308 .clts = native_clts, 308 .clts = native_clts,
309 .read_cr0 = native_read_cr0, 309 .read_cr0 = native_read_cr0,
310 .write_cr0 = native_write_cr0, 310 .write_cr0 = native_write_cr0,
311 .read_cr4 = native_read_cr4, 311 .read_cr4 = native_read_cr4,
312 .read_cr4_safe = native_read_cr4_safe, 312 .read_cr4_safe = native_read_cr4_safe,
313 .write_cr4 = native_write_cr4, 313 .write_cr4 = native_write_cr4,
314 #ifdef CONFIG_X86_64 314 #ifdef CONFIG_X86_64
315 .read_cr8 = native_read_cr8, 315 .read_cr8 = native_read_cr8,
316 .write_cr8 = native_write_cr8, 316 .write_cr8 = native_write_cr8,
317 #endif 317 #endif
318 .wbinvd = native_wbinvd, 318 .wbinvd = native_wbinvd,
319 .read_msr = native_read_msr_safe, 319 .read_msr = native_read_msr_safe,
320 .write_msr = native_write_msr_safe, 320 .write_msr = native_write_msr_safe,
321 .read_tsc = native_read_tsc, 321 .read_tsc = native_read_tsc,
322 .read_pmc = native_read_pmc, 322 .read_pmc = native_read_pmc,
323 .read_tscp = native_read_tscp, 323 .read_tscp = native_read_tscp,
324 .load_tr_desc = native_load_tr_desc, 324 .load_tr_desc = native_load_tr_desc,
325 .set_ldt = native_set_ldt, 325 .set_ldt = native_set_ldt,
326 .load_gdt = native_load_gdt, 326 .load_gdt = native_load_gdt,
327 .load_idt = native_load_idt, 327 .load_idt = native_load_idt,
328 .store_gdt = native_store_gdt, 328 .store_gdt = native_store_gdt,
329 .store_idt = native_store_idt, 329 .store_idt = native_store_idt,
330 .store_tr = native_store_tr, 330 .store_tr = native_store_tr,
331 .load_tls = native_load_tls, 331 .load_tls = native_load_tls,
332 #ifdef CONFIG_X86_64
333 .load_gs_index = native_load_gs_index,
334 #endif
332 .write_ldt_entry = native_write_ldt_entry, 335 .write_ldt_entry = native_write_ldt_entry,
333 .write_gdt_entry = native_write_gdt_entry, 336 .write_gdt_entry = native_write_gdt_entry,
334 .write_idt_entry = native_write_idt_entry, 337 .write_idt_entry = native_write_idt_entry,
335 .load_sp0 = native_load_sp0, 338 .load_sp0 = native_load_sp0,
336 339
337 .irq_enable_sysexit = native_irq_enable_sysexit, 340 .irq_enable_sysexit = native_irq_enable_sysexit,
338 #ifdef CONFIG_X86_64 341 #ifdef CONFIG_X86_64
339 .usergs_sysret32 = native_usergs_sysret32, 342 .usergs_sysret32 = native_usergs_sysret32,
340 .usergs_sysret64 = native_usergs_sysret64, 343 .usergs_sysret64 = native_usergs_sysret64,
341 #endif 344 #endif
342 .iret = native_iret, 345 .iret = native_iret,
343 .swapgs = native_swapgs, 346 .swapgs = native_swapgs,
344 347
345 .set_iopl_mask = native_set_iopl_mask, 348 .set_iopl_mask = native_set_iopl_mask,
346 .io_delay = native_io_delay, 349 .io_delay = native_io_delay,
347 350
348 .lazy_mode = { 351 .lazy_mode = {
349 .enter = paravirt_nop, 352 .enter = paravirt_nop,
350 .leave = paravirt_nop, 353 .leave = paravirt_nop,
351 }, 354 },
352 }; 355 };
353 356
354 struct pv_apic_ops pv_apic_ops = { 357 struct pv_apic_ops pv_apic_ops = {
355 #ifdef CONFIG_X86_LOCAL_APIC 358 #ifdef CONFIG_X86_LOCAL_APIC
356 .apic_write = native_apic_write, 359 .apic_write = native_apic_write,
357 .apic_write_atomic = native_apic_write_atomic, 360 .apic_write_atomic = native_apic_write_atomic,
358 .apic_read = native_apic_read, 361 .apic_read = native_apic_read,
359 .setup_boot_clock = setup_boot_APIC_clock, 362 .setup_boot_clock = setup_boot_APIC_clock,
360 .setup_secondary_clock = setup_secondary_APIC_clock, 363 .setup_secondary_clock = setup_secondary_APIC_clock,
361 .startup_ipi_hook = paravirt_nop, 364 .startup_ipi_hook = paravirt_nop,
362 #endif 365 #endif
363 }; 366 };
364 367
365 struct pv_mmu_ops pv_mmu_ops = { 368 struct pv_mmu_ops pv_mmu_ops = {
366 #ifndef CONFIG_X86_64 369 #ifndef CONFIG_X86_64
367 .pagetable_setup_start = native_pagetable_setup_start, 370 .pagetable_setup_start = native_pagetable_setup_start,
368 .pagetable_setup_done = native_pagetable_setup_done, 371 .pagetable_setup_done = native_pagetable_setup_done,
369 #endif 372 #endif
370 373
371 .read_cr2 = native_read_cr2, 374 .read_cr2 = native_read_cr2,
372 .write_cr2 = native_write_cr2, 375 .write_cr2 = native_write_cr2,
373 .read_cr3 = native_read_cr3, 376 .read_cr3 = native_read_cr3,
374 .write_cr3 = native_write_cr3, 377 .write_cr3 = native_write_cr3,
375 378
376 .flush_tlb_user = native_flush_tlb, 379 .flush_tlb_user = native_flush_tlb,
377 .flush_tlb_kernel = native_flush_tlb_global, 380 .flush_tlb_kernel = native_flush_tlb_global,
378 .flush_tlb_single = native_flush_tlb_single, 381 .flush_tlb_single = native_flush_tlb_single,
379 .flush_tlb_others = native_flush_tlb_others, 382 .flush_tlb_others = native_flush_tlb_others,
380 383
381 .pgd_alloc = __paravirt_pgd_alloc, 384 .pgd_alloc = __paravirt_pgd_alloc,
382 .pgd_free = paravirt_nop, 385 .pgd_free = paravirt_nop,
383 386
384 .alloc_pte = paravirt_nop, 387 .alloc_pte = paravirt_nop,
385 .alloc_pmd = paravirt_nop, 388 .alloc_pmd = paravirt_nop,
386 .alloc_pmd_clone = paravirt_nop, 389 .alloc_pmd_clone = paravirt_nop,
387 .alloc_pud = paravirt_nop, 390 .alloc_pud = paravirt_nop,
388 .release_pte = paravirt_nop, 391 .release_pte = paravirt_nop,
389 .release_pmd = paravirt_nop, 392 .release_pmd = paravirt_nop,
390 .release_pud = paravirt_nop, 393 .release_pud = paravirt_nop,
391 394
392 .set_pte = native_set_pte, 395 .set_pte = native_set_pte,
393 .set_pte_at = native_set_pte_at, 396 .set_pte_at = native_set_pte_at,
394 .set_pmd = native_set_pmd, 397 .set_pmd = native_set_pmd,
395 .pte_update = paravirt_nop, 398 .pte_update = paravirt_nop,
396 .pte_update_defer = paravirt_nop, 399 .pte_update_defer = paravirt_nop,
397 400
398 .ptep_modify_prot_start = __ptep_modify_prot_start, 401 .ptep_modify_prot_start = __ptep_modify_prot_start,
399 .ptep_modify_prot_commit = __ptep_modify_prot_commit, 402 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
400 403
401 #ifdef CONFIG_HIGHPTE 404 #ifdef CONFIG_HIGHPTE
402 .kmap_atomic_pte = kmap_atomic, 405 .kmap_atomic_pte = kmap_atomic,
403 #endif 406 #endif
404 407
405 #if PAGETABLE_LEVELS >= 3 408 #if PAGETABLE_LEVELS >= 3
406 #ifdef CONFIG_X86_PAE 409 #ifdef CONFIG_X86_PAE
407 .set_pte_atomic = native_set_pte_atomic, 410 .set_pte_atomic = native_set_pte_atomic,
408 .set_pte_present = native_set_pte_present, 411 .set_pte_present = native_set_pte_present,
409 .pte_clear = native_pte_clear, 412 .pte_clear = native_pte_clear,
410 .pmd_clear = native_pmd_clear, 413 .pmd_clear = native_pmd_clear,
411 #endif 414 #endif
412 .set_pud = native_set_pud, 415 .set_pud = native_set_pud,
413 .pmd_val = native_pmd_val, 416 .pmd_val = native_pmd_val,
414 .make_pmd = native_make_pmd, 417 .make_pmd = native_make_pmd,
415 418
416 #if PAGETABLE_LEVELS == 4 419 #if PAGETABLE_LEVELS == 4
417 .pud_val = native_pud_val, 420 .pud_val = native_pud_val,
418 .make_pud = native_make_pud, 421 .make_pud = native_make_pud,
419 .set_pgd = native_set_pgd, 422 .set_pgd = native_set_pgd,
420 #endif 423 #endif
421 #endif /* PAGETABLE_LEVELS >= 3 */ 424 #endif /* PAGETABLE_LEVELS >= 3 */
422 425
423 .pte_val = native_pte_val, 426 .pte_val = native_pte_val,
424 .pte_flags = native_pte_val, 427 .pte_flags = native_pte_val,
425 .pgd_val = native_pgd_val, 428 .pgd_val = native_pgd_val,
426 429
427 .make_pte = native_make_pte, 430 .make_pte = native_make_pte,
428 .make_pgd = native_make_pgd, 431 .make_pgd = native_make_pgd,
429 432
430 .dup_mmap = paravirt_nop, 433 .dup_mmap = paravirt_nop,
431 .exit_mmap = paravirt_nop, 434 .exit_mmap = paravirt_nop,
432 .activate_mm = paravirt_nop, 435 .activate_mm = paravirt_nop,
433 436
434 .lazy_mode = { 437 .lazy_mode = {
435 .enter = paravirt_nop, 438 .enter = paravirt_nop,
436 .leave = paravirt_nop, 439 .leave = paravirt_nop,
437 }, 440 },
438 441
439 .set_fixmap = native_set_fixmap, 442 .set_fixmap = native_set_fixmap,
440 }; 443 };
441 444
442 EXPORT_SYMBOL_GPL(pv_time_ops); 445 EXPORT_SYMBOL_GPL(pv_time_ops);
443 EXPORT_SYMBOL (pv_cpu_ops); 446 EXPORT_SYMBOL (pv_cpu_ops);
444 EXPORT_SYMBOL (pv_mmu_ops); 447 EXPORT_SYMBOL (pv_mmu_ops);
445 EXPORT_SYMBOL_GPL(pv_apic_ops); 448 EXPORT_SYMBOL_GPL(pv_apic_ops);
446 EXPORT_SYMBOL_GPL(pv_info); 449 EXPORT_SYMBOL_GPL(pv_info);
447 EXPORT_SYMBOL (pv_irq_ops); 450 EXPORT_SYMBOL (pv_irq_ops);
448 451
include/asm-x86/elf.h
1 #ifndef _ASM_X86_ELF_H 1 #ifndef _ASM_X86_ELF_H
2 #define _ASM_X86_ELF_H 2 #define _ASM_X86_ELF_H
3 3
4 /* 4 /*
5 * ELF register definitions.. 5 * ELF register definitions..
6 */ 6 */
7 7
8 #include <asm/ptrace.h> 8 #include <asm/ptrace.h>
9 #include <asm/user.h> 9 #include <asm/user.h>
10 #include <asm/auxvec.h> 10 #include <asm/auxvec.h>
11 11
12 typedef unsigned long elf_greg_t; 12 typedef unsigned long elf_greg_t;
13 13
14 #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t)) 14 #define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
15 typedef elf_greg_t elf_gregset_t[ELF_NGREG]; 15 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
16 16
17 typedef struct user_i387_struct elf_fpregset_t; 17 typedef struct user_i387_struct elf_fpregset_t;
18 18
19 #ifdef __i386__ 19 #ifdef __i386__
20 20
21 typedef struct user_fxsr_struct elf_fpxregset_t; 21 typedef struct user_fxsr_struct elf_fpxregset_t;
22 22
23 #define R_386_NONE 0 23 #define R_386_NONE 0
24 #define R_386_32 1 24 #define R_386_32 1
25 #define R_386_PC32 2 25 #define R_386_PC32 2
26 #define R_386_GOT32 3 26 #define R_386_GOT32 3
27 #define R_386_PLT32 4 27 #define R_386_PLT32 4
28 #define R_386_COPY 5 28 #define R_386_COPY 5
29 #define R_386_GLOB_DAT 6 29 #define R_386_GLOB_DAT 6
30 #define R_386_JMP_SLOT 7 30 #define R_386_JMP_SLOT 7
31 #define R_386_RELATIVE 8 31 #define R_386_RELATIVE 8
32 #define R_386_GOTOFF 9 32 #define R_386_GOTOFF 9
33 #define R_386_GOTPC 10 33 #define R_386_GOTPC 10
34 #define R_386_NUM 11 34 #define R_386_NUM 11
35 35
36 /* 36 /*
37 * These are used to set parameters in the core dumps. 37 * These are used to set parameters in the core dumps.
38 */ 38 */
39 #define ELF_CLASS ELFCLASS32 39 #define ELF_CLASS ELFCLASS32
40 #define ELF_DATA ELFDATA2LSB 40 #define ELF_DATA ELFDATA2LSB
41 #define ELF_ARCH EM_386 41 #define ELF_ARCH EM_386
42 42
43 #else 43 #else
44 44
45 /* x86-64 relocation types */ 45 /* x86-64 relocation types */
46 #define R_X86_64_NONE 0 /* No reloc */ 46 #define R_X86_64_NONE 0 /* No reloc */
47 #define R_X86_64_64 1 /* Direct 64 bit */ 47 #define R_X86_64_64 1 /* Direct 64 bit */
48 #define R_X86_64_PC32 2 /* PC relative 32 bit signed */ 48 #define R_X86_64_PC32 2 /* PC relative 32 bit signed */
49 #define R_X86_64_GOT32 3 /* 32 bit GOT entry */ 49 #define R_X86_64_GOT32 3 /* 32 bit GOT entry */
50 #define R_X86_64_PLT32 4 /* 32 bit PLT address */ 50 #define R_X86_64_PLT32 4 /* 32 bit PLT address */
51 #define R_X86_64_COPY 5 /* Copy symbol at runtime */ 51 #define R_X86_64_COPY 5 /* Copy symbol at runtime */
52 #define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ 52 #define R_X86_64_GLOB_DAT 6 /* Create GOT entry */
53 #define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ 53 #define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */
54 #define R_X86_64_RELATIVE 8 /* Adjust by program base */ 54 #define R_X86_64_RELATIVE 8 /* Adjust by program base */
55 #define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative 55 #define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative
56 offset to GOT */ 56 offset to GOT */
57 #define R_X86_64_32 10 /* Direct 32 bit zero extended */ 57 #define R_X86_64_32 10 /* Direct 32 bit zero extended */
58 #define R_X86_64_32S 11 /* Direct 32 bit sign extended */ 58 #define R_X86_64_32S 11 /* Direct 32 bit sign extended */
59 #define R_X86_64_16 12 /* Direct 16 bit zero extended */ 59 #define R_X86_64_16 12 /* Direct 16 bit zero extended */
60 #define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ 60 #define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */
61 #define R_X86_64_8 14 /* Direct 8 bit sign extended */ 61 #define R_X86_64_8 14 /* Direct 8 bit sign extended */
62 #define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ 62 #define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */
63 63
64 #define R_X86_64_NUM 16 64 #define R_X86_64_NUM 16
65 65
66 /* 66 /*
67 * These are used to set parameters in the core dumps. 67 * These are used to set parameters in the core dumps.
68 */ 68 */
69 #define ELF_CLASS ELFCLASS64 69 #define ELF_CLASS ELFCLASS64
70 #define ELF_DATA ELFDATA2LSB 70 #define ELF_DATA ELFDATA2LSB
71 #define ELF_ARCH EM_X86_64 71 #define ELF_ARCH EM_X86_64
72 72
73 #endif 73 #endif
74 74
75 #include <asm/vdso.h> 75 #include <asm/vdso.h>
76 76
77 extern unsigned int vdso_enabled; 77 extern unsigned int vdso_enabled;
78 78
79 /* 79 /*
80 * This is used to ensure we don't load something for the wrong architecture. 80 * This is used to ensure we don't load something for the wrong architecture.
81 */ 81 */
82 #define elf_check_arch_ia32(x) \ 82 #define elf_check_arch_ia32(x) \
83 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) 83 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
84 84
85 #include <asm/processor.h> 85 #include <asm/processor.h>
86 #include <asm/system.h>
86 87
87 #ifdef CONFIG_X86_32 88 #ifdef CONFIG_X86_32
88 #include <asm/system.h> /* for savesegment */
89 #include <asm/desc.h> 89 #include <asm/desc.h>
90 90
91 #define elf_check_arch(x) elf_check_arch_ia32(x) 91 #define elf_check_arch(x) elf_check_arch_ia32(x)
92 92
93 /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx 93 /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
94 contains a pointer to a function which might be registered using `atexit'. 94 contains a pointer to a function which might be registered using `atexit'.
95 This provides a mean for the dynamic linker to call DT_FINI functions for 95 This provides a mean for the dynamic linker to call DT_FINI functions for
96 shared libraries that have been loaded before the code runs. 96 shared libraries that have been loaded before the code runs.
97 97
98 A value of 0 tells we have no such handler. 98 A value of 0 tells we have no such handler.
99 99
100 We might as well make sure everything else is cleared too (except for %esp), 100 We might as well make sure everything else is cleared too (except for %esp),
101 just to make things more deterministic. 101 just to make things more deterministic.
102 */ 102 */
103 #define ELF_PLAT_INIT(_r, load_addr) \ 103 #define ELF_PLAT_INIT(_r, load_addr) \
104 do { \ 104 do { \
105 _r->bx = 0; _r->cx = 0; _r->dx = 0; \ 105 _r->bx = 0; _r->cx = 0; _r->dx = 0; \
106 _r->si = 0; _r->di = 0; _r->bp = 0; \ 106 _r->si = 0; _r->di = 0; _r->bp = 0; \
107 _r->ax = 0; \ 107 _r->ax = 0; \
108 } while (0) 108 } while (0)
109 109
110 /* 110 /*
111 * regs is struct pt_regs, pr_reg is elf_gregset_t (which is 111 * regs is struct pt_regs, pr_reg is elf_gregset_t (which is
112 * now struct_user_regs, they are different) 112 * now struct_user_regs, they are different)
113 */ 113 */
114 114
115 #define ELF_CORE_COPY_REGS(pr_reg, regs) \ 115 #define ELF_CORE_COPY_REGS(pr_reg, regs) \
116 do { \ 116 do { \
117 pr_reg[0] = regs->bx; \ 117 pr_reg[0] = regs->bx; \
118 pr_reg[1] = regs->cx; \ 118 pr_reg[1] = regs->cx; \
119 pr_reg[2] = regs->dx; \ 119 pr_reg[2] = regs->dx; \
120 pr_reg[3] = regs->si; \ 120 pr_reg[3] = regs->si; \
121 pr_reg[4] = regs->di; \ 121 pr_reg[4] = regs->di; \
122 pr_reg[5] = regs->bp; \ 122 pr_reg[5] = regs->bp; \
123 pr_reg[6] = regs->ax; \ 123 pr_reg[6] = regs->ax; \
124 pr_reg[7] = regs->ds & 0xffff; \ 124 pr_reg[7] = regs->ds & 0xffff; \
125 pr_reg[8] = regs->es & 0xffff; \ 125 pr_reg[8] = regs->es & 0xffff; \
126 pr_reg[9] = regs->fs & 0xffff; \ 126 pr_reg[9] = regs->fs & 0xffff; \
127 savesegment(gs, pr_reg[10]); \ 127 savesegment(gs, pr_reg[10]); \
128 pr_reg[11] = regs->orig_ax; \ 128 pr_reg[11] = regs->orig_ax; \
129 pr_reg[12] = regs->ip; \ 129 pr_reg[12] = regs->ip; \
130 pr_reg[13] = regs->cs & 0xffff; \ 130 pr_reg[13] = regs->cs & 0xffff; \
131 pr_reg[14] = regs->flags; \ 131 pr_reg[14] = regs->flags; \
132 pr_reg[15] = regs->sp; \ 132 pr_reg[15] = regs->sp; \
133 pr_reg[16] = regs->ss & 0xffff; \ 133 pr_reg[16] = regs->ss & 0xffff; \
134 } while (0); 134 } while (0);
135 135
136 #define ELF_PLATFORM (utsname()->machine) 136 #define ELF_PLATFORM (utsname()->machine)
137 #define set_personality_64bit() do { } while (0) 137 #define set_personality_64bit() do { } while (0)
138 138
139 #else /* CONFIG_X86_32 */ 139 #else /* CONFIG_X86_32 */
140 140
141 /* 141 /*
142 * This is used to ensure we don't load something for the wrong architecture. 142 * This is used to ensure we don't load something for the wrong architecture.
143 */ 143 */
144 #define elf_check_arch(x) \ 144 #define elf_check_arch(x) \
145 ((x)->e_machine == EM_X86_64) 145 ((x)->e_machine == EM_X86_64)
146 146
147 #define compat_elf_check_arch(x) elf_check_arch_ia32(x) 147 #define compat_elf_check_arch(x) elf_check_arch_ia32(x)
148 148
149 static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp) 149 static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp)
150 { 150 {
151 asm volatile("movl %0,%%fs" :: "r" (0)); 151 asm volatile("movl %0,%%fs" :: "r" (0));
152 asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS)); 152 asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS));
153 load_gs_index(0); 153 load_gs_index(0);
154 regs->ip = ip; 154 regs->ip = ip;
155 regs->sp = sp; 155 regs->sp = sp;
156 regs->flags = X86_EFLAGS_IF; 156 regs->flags = X86_EFLAGS_IF;
157 regs->cs = __USER32_CS; 157 regs->cs = __USER32_CS;
158 regs->ss = __USER32_DS; 158 regs->ss = __USER32_DS;
159 } 159 }
160 160
161 static inline void elf_common_init(struct thread_struct *t, 161 static inline void elf_common_init(struct thread_struct *t,
162 struct pt_regs *regs, const u16 ds) 162 struct pt_regs *regs, const u16 ds)
163 { 163 {
164 regs->ax = regs->bx = regs->cx = regs->dx = 0; 164 regs->ax = regs->bx = regs->cx = regs->dx = 0;
165 regs->si = regs->di = regs->bp = 0; 165 regs->si = regs->di = regs->bp = 0;
166 regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; 166 regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
167 regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; 167 regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
168 t->fs = t->gs = 0; 168 t->fs = t->gs = 0;
169 t->fsindex = t->gsindex = 0; 169 t->fsindex = t->gsindex = 0;
170 t->ds = t->es = ds; 170 t->ds = t->es = ds;
171 } 171 }
172 172
173 #define ELF_PLAT_INIT(_r, load_addr) \ 173 #define ELF_PLAT_INIT(_r, load_addr) \
174 do { \ 174 do { \
175 elf_common_init(&current->thread, _r, 0); \ 175 elf_common_init(&current->thread, _r, 0); \
176 clear_thread_flag(TIF_IA32); \ 176 clear_thread_flag(TIF_IA32); \
177 } while (0) 177 } while (0)
178 178
179 #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ 179 #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
180 elf_common_init(&current->thread, regs, __USER_DS) 180 elf_common_init(&current->thread, regs, __USER_DS)
181 181
182 #define compat_start_thread(regs, ip, sp) \ 182 #define compat_start_thread(regs, ip, sp) \
183 do { \ 183 do { \
184 start_ia32_thread(regs, ip, sp); \ 184 start_ia32_thread(regs, ip, sp); \
185 set_fs(USER_DS); \ 185 set_fs(USER_DS); \
186 } while (0) 186 } while (0)
187 187
188 #define COMPAT_SET_PERSONALITY(ex, ibcs2) \ 188 #define COMPAT_SET_PERSONALITY(ex, ibcs2) \
189 do { \ 189 do { \
190 if (test_thread_flag(TIF_IA32)) \ 190 if (test_thread_flag(TIF_IA32)) \
191 clear_thread_flag(TIF_ABI_PENDING); \ 191 clear_thread_flag(TIF_ABI_PENDING); \
192 else \ 192 else \
193 set_thread_flag(TIF_ABI_PENDING); \ 193 set_thread_flag(TIF_ABI_PENDING); \
194 current->personality |= force_personality32; \ 194 current->personality |= force_personality32; \
195 } while (0) 195 } while (0)
196 196
197 #define COMPAT_ELF_PLATFORM ("i686") 197 #define COMPAT_ELF_PLATFORM ("i686")
198 198
199 /* 199 /*
200 * regs is struct pt_regs, pr_reg is elf_gregset_t (which is 200 * regs is struct pt_regs, pr_reg is elf_gregset_t (which is
201 * now struct_user_regs, they are different). Assumes current is the process 201 * now struct_user_regs, they are different). Assumes current is the process
202 * getting dumped. 202 * getting dumped.
203 */ 203 */
204 204
205 #define ELF_CORE_COPY_REGS(pr_reg, regs) \ 205 #define ELF_CORE_COPY_REGS(pr_reg, regs) \
206 do { \ 206 do { \
207 unsigned v; \ 207 unsigned v; \
208 (pr_reg)[0] = (regs)->r15; \ 208 (pr_reg)[0] = (regs)->r15; \
209 (pr_reg)[1] = (regs)->r14; \ 209 (pr_reg)[1] = (regs)->r14; \
210 (pr_reg)[2] = (regs)->r13; \ 210 (pr_reg)[2] = (regs)->r13; \
211 (pr_reg)[3] = (regs)->r12; \ 211 (pr_reg)[3] = (regs)->r12; \
212 (pr_reg)[4] = (regs)->bp; \ 212 (pr_reg)[4] = (regs)->bp; \
213 (pr_reg)[5] = (regs)->bx; \ 213 (pr_reg)[5] = (regs)->bx; \
214 (pr_reg)[6] = (regs)->r11; \ 214 (pr_reg)[6] = (regs)->r11; \
215 (pr_reg)[7] = (regs)->r10; \ 215 (pr_reg)[7] = (regs)->r10; \
216 (pr_reg)[8] = (regs)->r9; \ 216 (pr_reg)[8] = (regs)->r9; \
217 (pr_reg)[9] = (regs)->r8; \ 217 (pr_reg)[9] = (regs)->r8; \
218 (pr_reg)[10] = (regs)->ax; \ 218 (pr_reg)[10] = (regs)->ax; \
219 (pr_reg)[11] = (regs)->cx; \ 219 (pr_reg)[11] = (regs)->cx; \
220 (pr_reg)[12] = (regs)->dx; \ 220 (pr_reg)[12] = (regs)->dx; \
221 (pr_reg)[13] = (regs)->si; \ 221 (pr_reg)[13] = (regs)->si; \
222 (pr_reg)[14] = (regs)->di; \ 222 (pr_reg)[14] = (regs)->di; \
223 (pr_reg)[15] = (regs)->orig_ax; \ 223 (pr_reg)[15] = (regs)->orig_ax; \
224 (pr_reg)[16] = (regs)->ip; \ 224 (pr_reg)[16] = (regs)->ip; \
225 (pr_reg)[17] = (regs)->cs; \ 225 (pr_reg)[17] = (regs)->cs; \
226 (pr_reg)[18] = (regs)->flags; \ 226 (pr_reg)[18] = (regs)->flags; \
227 (pr_reg)[19] = (regs)->sp; \ 227 (pr_reg)[19] = (regs)->sp; \
228 (pr_reg)[20] = (regs)->ss; \ 228 (pr_reg)[20] = (regs)->ss; \
229 (pr_reg)[21] = current->thread.fs; \ 229 (pr_reg)[21] = current->thread.fs; \
230 (pr_reg)[22] = current->thread.gs; \ 230 (pr_reg)[22] = current->thread.gs; \
231 asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ 231 asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \
232 asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ 232 asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \
233 asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ 233 asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \
234 asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \ 234 asm("movl %%gs,%0" : "=r" (v)); (pr_reg)[26] = v; \
235 } while (0); 235 } while (0);
236 236
237 /* I'm not sure if we can use '-' here */ 237 /* I'm not sure if we can use '-' here */
238 #define ELF_PLATFORM ("x86_64") 238 #define ELF_PLATFORM ("x86_64")
239 extern void set_personality_64bit(void); 239 extern void set_personality_64bit(void);
240 extern unsigned int sysctl_vsyscall32; 240 extern unsigned int sysctl_vsyscall32;
241 extern int force_personality32; 241 extern int force_personality32;
242 242
243 #endif /* !CONFIG_X86_32 */ 243 #endif /* !CONFIG_X86_32 */
244 244
245 #define CORE_DUMP_USE_REGSET 245 #define CORE_DUMP_USE_REGSET
246 #define USE_ELF_CORE_DUMP 246 #define USE_ELF_CORE_DUMP
247 #define ELF_EXEC_PAGESIZE 4096 247 #define ELF_EXEC_PAGESIZE 4096
248 248
249 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical 249 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical
250 use of this is to invoke "./ld.so someprog" to test out a new version of 250 use of this is to invoke "./ld.so someprog" to test out a new version of
251 the loader. We need to make sure that it is out of the way of the program 251 the loader. We need to make sure that it is out of the way of the program
252 that it will "exec", and that there is sufficient room for the brk. */ 252 that it will "exec", and that there is sufficient room for the brk. */
253 253
254 #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) 254 #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
255 255
256 /* This yields a mask that user programs can use to figure out what 256 /* This yields a mask that user programs can use to figure out what
257 instruction set this CPU supports. This could be done in user space, 257 instruction set this CPU supports. This could be done in user space,
258 but it's not easy, and we've already done it here. */ 258 but it's not easy, and we've already done it here. */
259 259
260 #define ELF_HWCAP (boot_cpu_data.x86_capability[0]) 260 #define ELF_HWCAP (boot_cpu_data.x86_capability[0])
261 261
262 /* This yields a string that ld.so will use to load implementation 262 /* This yields a string that ld.so will use to load implementation
263 specific libraries for optimization. This is more specific in 263 specific libraries for optimization. This is more specific in
264 intent than poking at uname or /proc/cpuinfo. 264 intent than poking at uname or /proc/cpuinfo.
265 265
266 For the moment, we have only optimizations for the Intel generations, 266 For the moment, we have only optimizations for the Intel generations,
267 but that could change... */ 267 but that could change... */
268 268
269 #define SET_PERSONALITY(ex, ibcs2) set_personality_64bit() 269 #define SET_PERSONALITY(ex, ibcs2) set_personality_64bit()
270 270
271 /* 271 /*
272 * An executable for which elf_read_implies_exec() returns TRUE will 272 * An executable for which elf_read_implies_exec() returns TRUE will
273 * have the READ_IMPLIES_EXEC personality flag set automatically. 273 * have the READ_IMPLIES_EXEC personality flag set automatically.
274 */ 274 */
275 #define elf_read_implies_exec(ex, executable_stack) \ 275 #define elf_read_implies_exec(ex, executable_stack) \
276 (executable_stack != EXSTACK_DISABLE_X) 276 (executable_stack != EXSTACK_DISABLE_X)
277 277
278 struct task_struct; 278 struct task_struct;
279 279
280 #define ARCH_DLINFO_IA32(vdso_enabled) \ 280 #define ARCH_DLINFO_IA32(vdso_enabled) \
281 do { \ 281 do { \
282 if (vdso_enabled) { \ 282 if (vdso_enabled) { \
283 NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ 283 NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
284 NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ 284 NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
285 } \ 285 } \
286 } while (0) 286 } while (0)
287 287
288 #ifdef CONFIG_X86_32 288 #ifdef CONFIG_X86_32
289 289
290 #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) 290 #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
291 291
292 #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) 292 #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled)
293 293
294 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ 294 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
295 295
296 #else /* CONFIG_X86_32 */ 296 #else /* CONFIG_X86_32 */
297 297
298 #define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ 298 #define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */
299 299
300 /* 1GB for 64bit, 8MB for 32bit */ 300 /* 1GB for 64bit, 8MB for 32bit */
301 #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) 301 #define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff)
302 302
303 #define ARCH_DLINFO \ 303 #define ARCH_DLINFO \
304 do { \ 304 do { \
305 if (vdso_enabled) \ 305 if (vdso_enabled) \
306 NEW_AUX_ENT(AT_SYSINFO_EHDR, \ 306 NEW_AUX_ENT(AT_SYSINFO_EHDR, \
307 (unsigned long)current->mm->context.vdso); \ 307 (unsigned long)current->mm->context.vdso); \
308 } while (0) 308 } while (0)
309 309
310 #define AT_SYSINFO 32 310 #define AT_SYSINFO 32
311 311
312 #define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) 312 #define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32)
313 313
314 #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) 314 #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000)
315 315
316 #endif /* !CONFIG_X86_32 */ 316 #endif /* !CONFIG_X86_32 */
317 317
318 #define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) 318 #define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso)
319 319
320 #define VDSO_ENTRY \ 320 #define VDSO_ENTRY \
321 ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) 321 ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall))
322 322
323 struct linux_binprm; 323 struct linux_binprm;
324 324
325 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 325 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
326 extern int arch_setup_additional_pages(struct linux_binprm *bprm, 326 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
327 int executable_stack); 327 int executable_stack);
328 328
329 extern int syscall32_setup_pages(struct linux_binprm *, int exstack); 329 extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
330 #define compat_arch_setup_additional_pages syscall32_setup_pages 330 #define compat_arch_setup_additional_pages syscall32_setup_pages
331 331
332 extern unsigned long arch_randomize_brk(struct mm_struct *mm); 332 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
333 #define arch_randomize_brk arch_randomize_brk 333 #define arch_randomize_brk arch_randomize_brk
334 334
335 #endif 335 #endif
include/asm-x86/paravirt.h
1 #ifndef __ASM_PARAVIRT_H 1 #ifndef __ASM_PARAVIRT_H
2 #define __ASM_PARAVIRT_H 2 #define __ASM_PARAVIRT_H
3 /* Various instructions on x86 need to be replaced for 3 /* Various instructions on x86 need to be replaced for
4 * para-virtualization: those hooks are defined here. */ 4 * para-virtualization: those hooks are defined here. */
5 5
6 #ifdef CONFIG_PARAVIRT 6 #ifdef CONFIG_PARAVIRT
7 #include <asm/page.h> 7 #include <asm/page.h>
8 #include <asm/asm.h> 8 #include <asm/asm.h>
9 9
10 /* Bitmask of what can be clobbered: usually at least eax. */ 10 /* Bitmask of what can be clobbered: usually at least eax. */
11 #define CLBR_NONE 0 11 #define CLBR_NONE 0
12 #define CLBR_EAX (1 << 0) 12 #define CLBR_EAX (1 << 0)
13 #define CLBR_ECX (1 << 1) 13 #define CLBR_ECX (1 << 1)
14 #define CLBR_EDX (1 << 2) 14 #define CLBR_EDX (1 << 2)
15 15
16 #ifdef CONFIG_X86_64 16 #ifdef CONFIG_X86_64
17 #define CLBR_RSI (1 << 3) 17 #define CLBR_RSI (1 << 3)
18 #define CLBR_RDI (1 << 4) 18 #define CLBR_RDI (1 << 4)
19 #define CLBR_R8 (1 << 5) 19 #define CLBR_R8 (1 << 5)
20 #define CLBR_R9 (1 << 6) 20 #define CLBR_R9 (1 << 6)
21 #define CLBR_R10 (1 << 7) 21 #define CLBR_R10 (1 << 7)
22 #define CLBR_R11 (1 << 8) 22 #define CLBR_R11 (1 << 8)
23 #define CLBR_ANY ((1 << 9) - 1) 23 #define CLBR_ANY ((1 << 9) - 1)
24 #include <asm/desc_defs.h> 24 #include <asm/desc_defs.h>
25 #else 25 #else
26 /* CLBR_ANY should match all regs platform has. For i386, that's just it */ 26 /* CLBR_ANY should match all regs platform has. For i386, that's just it */
27 #define CLBR_ANY ((1 << 3) - 1) 27 #define CLBR_ANY ((1 << 3) - 1)
28 #endif /* X86_64 */ 28 #endif /* X86_64 */
29 29
30 #ifndef __ASSEMBLY__ 30 #ifndef __ASSEMBLY__
31 #include <linux/types.h> 31 #include <linux/types.h>
32 #include <linux/cpumask.h> 32 #include <linux/cpumask.h>
33 #include <asm/kmap_types.h> 33 #include <asm/kmap_types.h>
34 #include <asm/desc_defs.h> 34 #include <asm/desc_defs.h>
35 35
36 struct page; 36 struct page;
37 struct thread_struct; 37 struct thread_struct;
38 struct desc_ptr; 38 struct desc_ptr;
39 struct tss_struct; 39 struct tss_struct;
40 struct mm_struct; 40 struct mm_struct;
41 struct desc_struct; 41 struct desc_struct;
42 42
43 /* general info */ 43 /* general info */
44 struct pv_info { 44 struct pv_info {
45 unsigned int kernel_rpl; 45 unsigned int kernel_rpl;
46 int shared_kernel_pmd; 46 int shared_kernel_pmd;
47 int paravirt_enabled; 47 int paravirt_enabled;
48 const char *name; 48 const char *name;
49 }; 49 };
50 50
51 struct pv_init_ops { 51 struct pv_init_ops {
52 /* 52 /*
53 * Patch may replace one of the defined code sequences with 53 * Patch may replace one of the defined code sequences with
54 * arbitrary code, subject to the same register constraints. 54 * arbitrary code, subject to the same register constraints.
55 * This generally means the code is not free to clobber any 55 * This generally means the code is not free to clobber any
56 * registers other than EAX. The patch function should return 56 * registers other than EAX. The patch function should return
57 * the number of bytes of code generated, as we nop pad the 57 * the number of bytes of code generated, as we nop pad the
58 * rest in generic code. 58 * rest in generic code.
59 */ 59 */
60 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, 60 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
61 unsigned long addr, unsigned len); 61 unsigned long addr, unsigned len);
62 62
63 /* Basic arch-specific setup */ 63 /* Basic arch-specific setup */
64 void (*arch_setup)(void); 64 void (*arch_setup)(void);
65 char *(*memory_setup)(void); 65 char *(*memory_setup)(void);
66 void (*post_allocator_init)(void); 66 void (*post_allocator_init)(void);
67 67
68 /* Print a banner to identify the environment */ 68 /* Print a banner to identify the environment */
69 void (*banner)(void); 69 void (*banner)(void);
70 }; 70 };
71 71
72 72
73 struct pv_lazy_ops { 73 struct pv_lazy_ops {
74 /* Set deferred update mode, used for batching operations. */ 74 /* Set deferred update mode, used for batching operations. */
75 void (*enter)(void); 75 void (*enter)(void);
76 void (*leave)(void); 76 void (*leave)(void);
77 }; 77 };
78 78
79 struct pv_time_ops { 79 struct pv_time_ops {
80 void (*time_init)(void); 80 void (*time_init)(void);
81 81
82 /* Set and set time of day */ 82 /* Set and set time of day */
83 unsigned long (*get_wallclock)(void); 83 unsigned long (*get_wallclock)(void);
84 int (*set_wallclock)(unsigned long); 84 int (*set_wallclock)(unsigned long);
85 85
86 unsigned long long (*sched_clock)(void); 86 unsigned long long (*sched_clock)(void);
87 unsigned long (*get_cpu_khz)(void); 87 unsigned long (*get_cpu_khz)(void);
88 }; 88 };
89 89
90 struct pv_cpu_ops { 90 struct pv_cpu_ops {
91 /* hooks for various privileged instructions */ 91 /* hooks for various privileged instructions */
92 unsigned long (*get_debugreg)(int regno); 92 unsigned long (*get_debugreg)(int regno);
93 void (*set_debugreg)(int regno, unsigned long value); 93 void (*set_debugreg)(int regno, unsigned long value);
94 94
95 void (*clts)(void); 95 void (*clts)(void);
96 96
97 unsigned long (*read_cr0)(void); 97 unsigned long (*read_cr0)(void);
98 void (*write_cr0)(unsigned long); 98 void (*write_cr0)(unsigned long);
99 99
100 unsigned long (*read_cr4_safe)(void); 100 unsigned long (*read_cr4_safe)(void);
101 unsigned long (*read_cr4)(void); 101 unsigned long (*read_cr4)(void);
102 void (*write_cr4)(unsigned long); 102 void (*write_cr4)(unsigned long);
103 103
104 #ifdef CONFIG_X86_64 104 #ifdef CONFIG_X86_64
105 unsigned long (*read_cr8)(void); 105 unsigned long (*read_cr8)(void);
106 void (*write_cr8)(unsigned long); 106 void (*write_cr8)(unsigned long);
107 #endif 107 #endif
108 108
109 /* Segment descriptor handling */ 109 /* Segment descriptor handling */
110 void (*load_tr_desc)(void); 110 void (*load_tr_desc)(void);
111 void (*load_gdt)(const struct desc_ptr *); 111 void (*load_gdt)(const struct desc_ptr *);
112 void (*load_idt)(const struct desc_ptr *); 112 void (*load_idt)(const struct desc_ptr *);
113 void (*store_gdt)(struct desc_ptr *); 113 void (*store_gdt)(struct desc_ptr *);
114 void (*store_idt)(struct desc_ptr *); 114 void (*store_idt)(struct desc_ptr *);
115 void (*set_ldt)(const void *desc, unsigned entries); 115 void (*set_ldt)(const void *desc, unsigned entries);
116 unsigned long (*store_tr)(void); 116 unsigned long (*store_tr)(void);
117 void (*load_tls)(struct thread_struct *t, unsigned int cpu); 117 void (*load_tls)(struct thread_struct *t, unsigned int cpu);
118 #ifdef CONFIG_X86_64
119 void (*load_gs_index)(unsigned int idx);
120 #endif
118 void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, 121 void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
119 const void *desc); 122 const void *desc);
120 void (*write_gdt_entry)(struct desc_struct *, 123 void (*write_gdt_entry)(struct desc_struct *,
121 int entrynum, const void *desc, int size); 124 int entrynum, const void *desc, int size);
122 void (*write_idt_entry)(gate_desc *, 125 void (*write_idt_entry)(gate_desc *,
123 int entrynum, const gate_desc *gate); 126 int entrynum, const gate_desc *gate);
124 void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); 127 void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
125 128
126 void (*set_iopl_mask)(unsigned mask); 129 void (*set_iopl_mask)(unsigned mask);
127 130
128 void (*wbinvd)(void); 131 void (*wbinvd)(void);
129 void (*io_delay)(void); 132 void (*io_delay)(void);
130 133
131 /* cpuid emulation, mostly so that caps bits can be disabled */ 134 /* cpuid emulation, mostly so that caps bits can be disabled */
132 void (*cpuid)(unsigned int *eax, unsigned int *ebx, 135 void (*cpuid)(unsigned int *eax, unsigned int *ebx,
133 unsigned int *ecx, unsigned int *edx); 136 unsigned int *ecx, unsigned int *edx);
134 137
135 /* MSR, PMC and TSR operations. 138 /* MSR, PMC and TSR operations.
136 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ 139 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
137 u64 (*read_msr)(unsigned int msr, int *err); 140 u64 (*read_msr)(unsigned int msr, int *err);
138 int (*write_msr)(unsigned int msr, unsigned low, unsigned high); 141 int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
139 142
140 u64 (*read_tsc)(void); 143 u64 (*read_tsc)(void);
141 u64 (*read_pmc)(int counter); 144 u64 (*read_pmc)(int counter);
142 unsigned long long (*read_tscp)(unsigned int *aux); 145 unsigned long long (*read_tscp)(unsigned int *aux);
143 146
144 /* 147 /*
145 * Atomically enable interrupts and return to userspace. This 148 * Atomically enable interrupts and return to userspace. This
146 * is only ever used to return to 32-bit processes; in a 149 * is only ever used to return to 32-bit processes; in a
147 * 64-bit kernel, it's used for 32-on-64 compat processes, but 150 * 64-bit kernel, it's used for 32-on-64 compat processes, but
148 * never native 64-bit processes. (Jump, not call.) 151 * never native 64-bit processes. (Jump, not call.)
149 */ 152 */
150 void (*irq_enable_sysexit)(void); 153 void (*irq_enable_sysexit)(void);
151 154
152 /* 155 /*
153 * Switch to usermode gs and return to 64-bit usermode using 156 * Switch to usermode gs and return to 64-bit usermode using
154 * sysret. Only used in 64-bit kernels to return to 64-bit 157 * sysret. Only used in 64-bit kernels to return to 64-bit
155 * processes. Usermode register state, including %rsp, must 158 * processes. Usermode register state, including %rsp, must
156 * already be restored. 159 * already be restored.
157 */ 160 */
158 void (*usergs_sysret64)(void); 161 void (*usergs_sysret64)(void);
159 162
160 /* 163 /*
161 * Switch to usermode gs and return to 32-bit usermode using 164 * Switch to usermode gs and return to 32-bit usermode using
162 * sysret. Used to return to 32-on-64 compat processes. 165 * sysret. Used to return to 32-on-64 compat processes.
163 * Other usermode register state, including %esp, must already 166 * Other usermode register state, including %esp, must already
164 * be restored. 167 * be restored.
165 */ 168 */
166 void (*usergs_sysret32)(void); 169 void (*usergs_sysret32)(void);
167 170
168 /* Normal iret. Jump to this with the standard iret stack 171 /* Normal iret. Jump to this with the standard iret stack
169 frame set up. */ 172 frame set up. */
170 void (*iret)(void); 173 void (*iret)(void);
171 174
172 void (*swapgs)(void); 175 void (*swapgs)(void);
173 176
174 struct pv_lazy_ops lazy_mode; 177 struct pv_lazy_ops lazy_mode;
175 }; 178 };
176 179
177 struct pv_irq_ops { 180 struct pv_irq_ops {
178 void (*init_IRQ)(void); 181 void (*init_IRQ)(void);
179 182
180 /* 183 /*
181 * Get/set interrupt state. save_fl and restore_fl are only 184 * Get/set interrupt state. save_fl and restore_fl are only
182 * expected to use X86_EFLAGS_IF; all other bits 185 * expected to use X86_EFLAGS_IF; all other bits
183 * returned from save_fl are undefined, and may be ignored by 186 * returned from save_fl are undefined, and may be ignored by
184 * restore_fl. 187 * restore_fl.
185 */ 188 */
186 unsigned long (*save_fl)(void); 189 unsigned long (*save_fl)(void);
187 void (*restore_fl)(unsigned long); 190 void (*restore_fl)(unsigned long);
188 void (*irq_disable)(void); 191 void (*irq_disable)(void);
189 void (*irq_enable)(void); 192 void (*irq_enable)(void);
190 void (*safe_halt)(void); 193 void (*safe_halt)(void);
191 void (*halt)(void); 194 void (*halt)(void);
192 195
193 #ifdef CONFIG_X86_64 196 #ifdef CONFIG_X86_64
194 void (*adjust_exception_frame)(void); 197 void (*adjust_exception_frame)(void);
195 #endif 198 #endif
196 }; 199 };
197 200
198 struct pv_apic_ops { 201 struct pv_apic_ops {
199 #ifdef CONFIG_X86_LOCAL_APIC 202 #ifdef CONFIG_X86_LOCAL_APIC
200 /* 203 /*
201 * Direct APIC operations, principally for VMI. Ideally 204 * Direct APIC operations, principally for VMI. Ideally
202 * these shouldn't be in this interface. 205 * these shouldn't be in this interface.
203 */ 206 */
204 void (*apic_write)(unsigned long reg, u32 v); 207 void (*apic_write)(unsigned long reg, u32 v);
205 void (*apic_write_atomic)(unsigned long reg, u32 v); 208 void (*apic_write_atomic)(unsigned long reg, u32 v);
206 u32 (*apic_read)(unsigned long reg); 209 u32 (*apic_read)(unsigned long reg);
207 void (*setup_boot_clock)(void); 210 void (*setup_boot_clock)(void);
208 void (*setup_secondary_clock)(void); 211 void (*setup_secondary_clock)(void);
209 212
210 void (*startup_ipi_hook)(int phys_apicid, 213 void (*startup_ipi_hook)(int phys_apicid,
211 unsigned long start_eip, 214 unsigned long start_eip,
212 unsigned long start_esp); 215 unsigned long start_esp);
213 #endif 216 #endif
214 }; 217 };
215 218
216 struct pv_mmu_ops { 219 struct pv_mmu_ops {
217 /* 220 /*
218 * Called before/after init_mm pagetable setup. setup_start 221 * Called before/after init_mm pagetable setup. setup_start
219 * may reset %cr3, and may pre-install parts of the pagetable; 222 * may reset %cr3, and may pre-install parts of the pagetable;
220 * pagetable setup is expected to preserve any existing 223 * pagetable setup is expected to preserve any existing
221 * mapping. 224 * mapping.
222 */ 225 */
223 void (*pagetable_setup_start)(pgd_t *pgd_base); 226 void (*pagetable_setup_start)(pgd_t *pgd_base);
224 void (*pagetable_setup_done)(pgd_t *pgd_base); 227 void (*pagetable_setup_done)(pgd_t *pgd_base);
225 228
226 unsigned long (*read_cr2)(void); 229 unsigned long (*read_cr2)(void);
227 void (*write_cr2)(unsigned long); 230 void (*write_cr2)(unsigned long);
228 231
229 unsigned long (*read_cr3)(void); 232 unsigned long (*read_cr3)(void);
230 void (*write_cr3)(unsigned long); 233 void (*write_cr3)(unsigned long);
231 234
232 /* 235 /*
233 * Hooks for intercepting the creation/use/destruction of an 236 * Hooks for intercepting the creation/use/destruction of an
234 * mm_struct. 237 * mm_struct.
235 */ 238 */
236 void (*activate_mm)(struct mm_struct *prev, 239 void (*activate_mm)(struct mm_struct *prev,
237 struct mm_struct *next); 240 struct mm_struct *next);
238 void (*dup_mmap)(struct mm_struct *oldmm, 241 void (*dup_mmap)(struct mm_struct *oldmm,
239 struct mm_struct *mm); 242 struct mm_struct *mm);
240 void (*exit_mmap)(struct mm_struct *mm); 243 void (*exit_mmap)(struct mm_struct *mm);
241 244
242 245
243 /* TLB operations */ 246 /* TLB operations */
244 void (*flush_tlb_user)(void); 247 void (*flush_tlb_user)(void);
245 void (*flush_tlb_kernel)(void); 248 void (*flush_tlb_kernel)(void);
246 void (*flush_tlb_single)(unsigned long addr); 249 void (*flush_tlb_single)(unsigned long addr);
247 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, 250 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
248 unsigned long va); 251 unsigned long va);
249 252
250 /* Hooks for allocating and freeing a pagetable top-level */ 253 /* Hooks for allocating and freeing a pagetable top-level */
251 int (*pgd_alloc)(struct mm_struct *mm); 254 int (*pgd_alloc)(struct mm_struct *mm);
252 void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); 255 void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
253 256
254 /* 257 /*
255 * Hooks for allocating/releasing pagetable pages when they're 258 * Hooks for allocating/releasing pagetable pages when they're
256 * attached to a pagetable 259 * attached to a pagetable
257 */ 260 */
258 void (*alloc_pte)(struct mm_struct *mm, u32 pfn); 261 void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
259 void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); 262 void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
260 void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 263 void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
261 void (*alloc_pud)(struct mm_struct *mm, u32 pfn); 264 void (*alloc_pud)(struct mm_struct *mm, u32 pfn);
262 void (*release_pte)(u32 pfn); 265 void (*release_pte)(u32 pfn);
263 void (*release_pmd)(u32 pfn); 266 void (*release_pmd)(u32 pfn);
264 void (*release_pud)(u32 pfn); 267 void (*release_pud)(u32 pfn);
265 268
266 /* Pagetable manipulation functions */ 269 /* Pagetable manipulation functions */
267 void (*set_pte)(pte_t *ptep, pte_t pteval); 270 void (*set_pte)(pte_t *ptep, pte_t pteval);
268 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, 271 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
269 pte_t *ptep, pte_t pteval); 272 pte_t *ptep, pte_t pteval);
270 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); 273 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
271 void (*pte_update)(struct mm_struct *mm, unsigned long addr, 274 void (*pte_update)(struct mm_struct *mm, unsigned long addr,
272 pte_t *ptep); 275 pte_t *ptep);
273 void (*pte_update_defer)(struct mm_struct *mm, 276 void (*pte_update_defer)(struct mm_struct *mm,
274 unsigned long addr, pte_t *ptep); 277 unsigned long addr, pte_t *ptep);
275 278
276 pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, 279 pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
277 pte_t *ptep); 280 pte_t *ptep);
278 void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, 281 void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
279 pte_t *ptep, pte_t pte); 282 pte_t *ptep, pte_t pte);
280 283
281 pteval_t (*pte_val)(pte_t); 284 pteval_t (*pte_val)(pte_t);
282 pteval_t (*pte_flags)(pte_t); 285 pteval_t (*pte_flags)(pte_t);
283 pte_t (*make_pte)(pteval_t pte); 286 pte_t (*make_pte)(pteval_t pte);
284 287
285 pgdval_t (*pgd_val)(pgd_t); 288 pgdval_t (*pgd_val)(pgd_t);
286 pgd_t (*make_pgd)(pgdval_t pgd); 289 pgd_t (*make_pgd)(pgdval_t pgd);
287 290
288 #if PAGETABLE_LEVELS >= 3 291 #if PAGETABLE_LEVELS >= 3
289 #ifdef CONFIG_X86_PAE 292 #ifdef CONFIG_X86_PAE
290 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); 293 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
291 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, 294 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
292 pte_t *ptep, pte_t pte); 295 pte_t *ptep, pte_t pte);
293 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, 296 void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
294 pte_t *ptep); 297 pte_t *ptep);
295 void (*pmd_clear)(pmd_t *pmdp); 298 void (*pmd_clear)(pmd_t *pmdp);
296 299
297 #endif /* CONFIG_X86_PAE */ 300 #endif /* CONFIG_X86_PAE */
298 301
299 void (*set_pud)(pud_t *pudp, pud_t pudval); 302 void (*set_pud)(pud_t *pudp, pud_t pudval);
300 303
301 pmdval_t (*pmd_val)(pmd_t); 304 pmdval_t (*pmd_val)(pmd_t);
302 pmd_t (*make_pmd)(pmdval_t pmd); 305 pmd_t (*make_pmd)(pmdval_t pmd);
303 306
304 #if PAGETABLE_LEVELS == 4 307 #if PAGETABLE_LEVELS == 4
305 pudval_t (*pud_val)(pud_t); 308 pudval_t (*pud_val)(pud_t);
306 pud_t (*make_pud)(pudval_t pud); 309 pud_t (*make_pud)(pudval_t pud);
307 310
308 void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); 311 void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
309 #endif /* PAGETABLE_LEVELS == 4 */ 312 #endif /* PAGETABLE_LEVELS == 4 */
310 #endif /* PAGETABLE_LEVELS >= 3 */ 313 #endif /* PAGETABLE_LEVELS >= 3 */
311 314
312 #ifdef CONFIG_HIGHPTE 315 #ifdef CONFIG_HIGHPTE
313 void *(*kmap_atomic_pte)(struct page *page, enum km_type type); 316 void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
314 #endif 317 #endif
315 318
316 struct pv_lazy_ops lazy_mode; 319 struct pv_lazy_ops lazy_mode;
317 320
318 /* dom0 ops */ 321 /* dom0 ops */
319 322
320 /* Sometimes the physical address is a pfn, and sometimes its 323 /* Sometimes the physical address is a pfn, and sometimes its
321 an mfn. We can tell which is which from the index. */ 324 an mfn. We can tell which is which from the index. */
322 void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, 325 void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
323 unsigned long phys, pgprot_t flags); 326 unsigned long phys, pgprot_t flags);
324 }; 327 };
325 328
326 /* This contains all the paravirt structures: we get a convenient 329 /* This contains all the paravirt structures: we get a convenient
327 * number for each function using the offset which we use to indicate 330 * number for each function using the offset which we use to indicate
328 * what to patch. */ 331 * what to patch. */
329 struct paravirt_patch_template { 332 struct paravirt_patch_template {
330 struct pv_init_ops pv_init_ops; 333 struct pv_init_ops pv_init_ops;
331 struct pv_time_ops pv_time_ops; 334 struct pv_time_ops pv_time_ops;
332 struct pv_cpu_ops pv_cpu_ops; 335 struct pv_cpu_ops pv_cpu_ops;
333 struct pv_irq_ops pv_irq_ops; 336 struct pv_irq_ops pv_irq_ops;
334 struct pv_apic_ops pv_apic_ops; 337 struct pv_apic_ops pv_apic_ops;
335 struct pv_mmu_ops pv_mmu_ops; 338 struct pv_mmu_ops pv_mmu_ops;
336 }; 339 };
337 340
338 extern struct pv_info pv_info; 341 extern struct pv_info pv_info;
339 extern struct pv_init_ops pv_init_ops; 342 extern struct pv_init_ops pv_init_ops;
340 extern struct pv_time_ops pv_time_ops; 343 extern struct pv_time_ops pv_time_ops;
341 extern struct pv_cpu_ops pv_cpu_ops; 344 extern struct pv_cpu_ops pv_cpu_ops;
342 extern struct pv_irq_ops pv_irq_ops; 345 extern struct pv_irq_ops pv_irq_ops;
343 extern struct pv_apic_ops pv_apic_ops; 346 extern struct pv_apic_ops pv_apic_ops;
344 extern struct pv_mmu_ops pv_mmu_ops; 347 extern struct pv_mmu_ops pv_mmu_ops;
345 348
346 #define PARAVIRT_PATCH(x) \ 349 #define PARAVIRT_PATCH(x) \
347 (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) 350 (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
348 351
349 #define paravirt_type(op) \ 352 #define paravirt_type(op) \
350 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ 353 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
351 [paravirt_opptr] "m" (op) 354 [paravirt_opptr] "m" (op)
352 #define paravirt_clobber(clobber) \ 355 #define paravirt_clobber(clobber) \
353 [paravirt_clobber] "i" (clobber) 356 [paravirt_clobber] "i" (clobber)
354 357
355 /* 358 /*
356 * Generate some code, and mark it as patchable by the 359 * Generate some code, and mark it as patchable by the
357 * apply_paravirt() alternate instruction patcher. 360 * apply_paravirt() alternate instruction patcher.
358 */ 361 */
359 #define _paravirt_alt(insn_string, type, clobber) \ 362 #define _paravirt_alt(insn_string, type, clobber) \
360 "771:\n\t" insn_string "\n" "772:\n" \ 363 "771:\n\t" insn_string "\n" "772:\n" \
361 ".pushsection .parainstructions,\"a\"\n" \ 364 ".pushsection .parainstructions,\"a\"\n" \
362 _ASM_ALIGN "\n" \ 365 _ASM_ALIGN "\n" \
363 _ASM_PTR " 771b\n" \ 366 _ASM_PTR " 771b\n" \
364 " .byte " type "\n" \ 367 " .byte " type "\n" \
365 " .byte 772b-771b\n" \ 368 " .byte 772b-771b\n" \
366 " .short " clobber "\n" \ 369 " .short " clobber "\n" \
367 ".popsection\n" 370 ".popsection\n"
368 371
369 /* Generate patchable code, with the default asm parameters. */ 372 /* Generate patchable code, with the default asm parameters. */
370 #define paravirt_alt(insn_string) \ 373 #define paravirt_alt(insn_string) \
371 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") 374 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
372 375
373 /* Simple instruction patching code. */ 376 /* Simple instruction patching code. */
374 #define DEF_NATIVE(ops, name, code) \ 377 #define DEF_NATIVE(ops, name, code) \
375 extern const char start_##ops##_##name[], end_##ops##_##name[]; \ 378 extern const char start_##ops##_##name[], end_##ops##_##name[]; \
376 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") 379 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
377 380
378 unsigned paravirt_patch_nop(void); 381 unsigned paravirt_patch_nop(void);
379 unsigned paravirt_patch_ignore(unsigned len); 382 unsigned paravirt_patch_ignore(unsigned len);
380 unsigned paravirt_patch_call(void *insnbuf, 383 unsigned paravirt_patch_call(void *insnbuf,
381 const void *target, u16 tgt_clobbers, 384 const void *target, u16 tgt_clobbers,
382 unsigned long addr, u16 site_clobbers, 385 unsigned long addr, u16 site_clobbers,
383 unsigned len); 386 unsigned len);
384 unsigned paravirt_patch_jmp(void *insnbuf, const void *target, 387 unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
385 unsigned long addr, unsigned len); 388 unsigned long addr, unsigned len);
386 unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 389 unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
387 unsigned long addr, unsigned len); 390 unsigned long addr, unsigned len);
388 391
389 unsigned paravirt_patch_insns(void *insnbuf, unsigned len, 392 unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
390 const char *start, const char *end); 393 const char *start, const char *end);
391 394
392 unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 395 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
393 unsigned long addr, unsigned len); 396 unsigned long addr, unsigned len);
394 397
395 int paravirt_disable_iospace(void); 398 int paravirt_disable_iospace(void);
396 399
397 /* 400 /*
398 * This generates an indirect call based on the operation type number. 401 * This generates an indirect call based on the operation type number.
399 * The type number, computed in PARAVIRT_PATCH, is derived from the 402 * The type number, computed in PARAVIRT_PATCH, is derived from the
400 * offset into the paravirt_patch_template structure, and can therefore be 403 * offset into the paravirt_patch_template structure, and can therefore be
401 * freely converted back into a structure offset. 404 * freely converted back into a structure offset.
402 */ 405 */
403 #define PARAVIRT_CALL "call *%[paravirt_opptr];" 406 #define PARAVIRT_CALL "call *%[paravirt_opptr];"
404 407
405 /* 408 /*
406 * These macros are intended to wrap calls through one of the paravirt 409 * These macros are intended to wrap calls through one of the paravirt
407 * ops structs, so that they can be later identified and patched at 410 * ops structs, so that they can be later identified and patched at
408 * runtime. 411 * runtime.
409 * 412 *
410 * Normally, a call to a pv_op function is a simple indirect call: 413 * Normally, a call to a pv_op function is a simple indirect call:
411 * (pv_op_struct.operations)(args...). 414 * (pv_op_struct.operations)(args...).
412 * 415 *
413 * Unfortunately, this is a relatively slow operation for modern CPUs, 416 * Unfortunately, this is a relatively slow operation for modern CPUs,
414 * because it cannot necessarily determine what the destination 417 * because it cannot necessarily determine what the destination
415 * address is. In this case, the address is a runtime constant, so at 418 * address is. In this case, the address is a runtime constant, so at
416 * the very least we can patch the call to e a simple direct call, or 419 * the very least we can patch the call to e a simple direct call, or
417 * ideally, patch an inline implementation into the callsite. (Direct 420 * ideally, patch an inline implementation into the callsite. (Direct
418 * calls are essentially free, because the call and return addresses 421 * calls are essentially free, because the call and return addresses
419 * are completely predictable.) 422 * are completely predictable.)
420 * 423 *
421 * For i386, these macros rely on the standard gcc "regparm(3)" calling 424 * For i386, these macros rely on the standard gcc "regparm(3)" calling
422 * convention, in which the first three arguments are placed in %eax, 425 * convention, in which the first three arguments are placed in %eax,
423 * %edx, %ecx (in that order), and the remaining arguments are placed 426 * %edx, %ecx (in that order), and the remaining arguments are placed
424 * on the stack. All caller-save registers (eax,edx,ecx) are expected 427 * on the stack. All caller-save registers (eax,edx,ecx) are expected
425 * to be modified (either clobbered or used for return values). 428 * to be modified (either clobbered or used for return values).
426 * X86_64, on the other hand, already specifies a register-based calling 429 * X86_64, on the other hand, already specifies a register-based calling
427 * conventions, returning at %rax, with parameteres going on %rdi, %rsi, 430 * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
428 * %rdx, and %rcx. Note that for this reason, x86_64 does not need any 431 * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
429 * special handling for dealing with 4 arguments, unlike i386. 432 * special handling for dealing with 4 arguments, unlike i386.
430 * However, x86_64 also have to clobber all caller saved registers, which 433 * However, x86_64 also have to clobber all caller saved registers, which
431 * unfortunately, are quite a bit (r8 - r11) 434 * unfortunately, are quite a bit (r8 - r11)
432 * 435 *
433 * The call instruction itself is marked by placing its start address 436 * The call instruction itself is marked by placing its start address
434 * and size into the .parainstructions section, so that 437 * and size into the .parainstructions section, so that
435 * apply_paravirt() in arch/i386/kernel/alternative.c can do the 438 * apply_paravirt() in arch/i386/kernel/alternative.c can do the
436 * appropriate patching under the control of the backend pv_init_ops 439 * appropriate patching under the control of the backend pv_init_ops
437 * implementation. 440 * implementation.
438 * 441 *
439 * Unfortunately there's no way to get gcc to generate the args setup 442 * Unfortunately there's no way to get gcc to generate the args setup
440 * for the call, and then allow the call itself to be generated by an 443 * for the call, and then allow the call itself to be generated by an
441 * inline asm. Because of this, we must do the complete arg setup and 444 * inline asm. Because of this, we must do the complete arg setup and
442 * return value handling from within these macros. This is fairly 445 * return value handling from within these macros. This is fairly
443 * cumbersome. 446 * cumbersome.
444 * 447 *
445 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. 448 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
446 * It could be extended to more arguments, but there would be little 449 * It could be extended to more arguments, but there would be little
447 * to be gained from that. For each number of arguments, there are 450 * to be gained from that. For each number of arguments, there are
448 * the two VCALL and CALL variants for void and non-void functions. 451 * the two VCALL and CALL variants for void and non-void functions.
449 * 452 *
450 * When there is a return value, the invoker of the macro must specify 453 * When there is a return value, the invoker of the macro must specify
451 * the return type. The macro then uses sizeof() on that type to 454 * the return type. The macro then uses sizeof() on that type to
452 * determine whether its a 32 or 64 bit value, and places the return 455 * determine whether its a 32 or 64 bit value, and places the return
453 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for 456 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
454 * 64-bit). For x86_64 machines, it just returns at %rax regardless of 457 * 64-bit). For x86_64 machines, it just returns at %rax regardless of
455 * the return value size. 458 * the return value size.
456 * 459 *
457 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments 460 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
458 * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments 461 * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
459 * in low,high order 462 * in low,high order
460 * 463 *
461 * Small structures are passed and returned in registers. The macro 464 * Small structures are passed and returned in registers. The macro
462 * calling convention can't directly deal with this, so the wrapper 465 * calling convention can't directly deal with this, so the wrapper
463 * functions must do this. 466 * functions must do this.
464 * 467 *
465 * These PVOP_* macros are only defined within this header. This 468 * These PVOP_* macros are only defined within this header. This
466 * means that all uses must be wrapped in inline functions. This also 469 * means that all uses must be wrapped in inline functions. This also
467 * makes sure the incoming and outgoing types are always correct. 470 * makes sure the incoming and outgoing types are always correct.
468 */ 471 */
469 #ifdef CONFIG_X86_32 472 #ifdef CONFIG_X86_32
470 #define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx 473 #define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx
471 #define PVOP_CALL_ARGS PVOP_VCALL_ARGS 474 #define PVOP_CALL_ARGS PVOP_VCALL_ARGS
472 #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ 475 #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \
473 "=c" (__ecx) 476 "=c" (__ecx)
474 #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS 477 #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS
475 #define EXTRA_CLOBBERS 478 #define EXTRA_CLOBBERS
476 #define VEXTRA_CLOBBERS 479 #define VEXTRA_CLOBBERS
477 #else 480 #else
478 #define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx 481 #define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx
479 #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax 482 #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
480 #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ 483 #define PVOP_VCALL_CLOBBERS "=D" (__edi), \
481 "=S" (__esi), "=d" (__edx), \ 484 "=S" (__esi), "=d" (__edx), \
482 "=c" (__ecx) 485 "=c" (__ecx)
483 486
484 #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) 487 #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
485 488
486 #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" 489 #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
487 #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" 490 #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
488 #endif 491 #endif
489 492
490 #ifdef CONFIG_PARAVIRT_DEBUG 493 #ifdef CONFIG_PARAVIRT_DEBUG
491 #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) 494 #define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
492 #else 495 #else
493 #define PVOP_TEST_NULL(op) ((void)op) 496 #define PVOP_TEST_NULL(op) ((void)op)
494 #endif 497 #endif
495 498
496 #define __PVOP_CALL(rettype, op, pre, post, ...) \ 499 #define __PVOP_CALL(rettype, op, pre, post, ...) \
497 ({ \ 500 ({ \
498 rettype __ret; \ 501 rettype __ret; \
499 PVOP_CALL_ARGS; \ 502 PVOP_CALL_ARGS; \
500 PVOP_TEST_NULL(op); \ 503 PVOP_TEST_NULL(op); \
501 /* This is 32-bit specific, but is okay in 64-bit */ \ 504 /* This is 32-bit specific, but is okay in 64-bit */ \
502 /* since this condition will never hold */ \ 505 /* since this condition will never hold */ \
503 if (sizeof(rettype) > sizeof(unsigned long)) { \ 506 if (sizeof(rettype) > sizeof(unsigned long)) { \
504 asm volatile(pre \ 507 asm volatile(pre \
505 paravirt_alt(PARAVIRT_CALL) \ 508 paravirt_alt(PARAVIRT_CALL) \
506 post \ 509 post \
507 : PVOP_CALL_CLOBBERS \ 510 : PVOP_CALL_CLOBBERS \
508 : paravirt_type(op), \ 511 : paravirt_type(op), \
509 paravirt_clobber(CLBR_ANY), \ 512 paravirt_clobber(CLBR_ANY), \
510 ##__VA_ARGS__ \ 513 ##__VA_ARGS__ \
511 : "memory", "cc" EXTRA_CLOBBERS); \ 514 : "memory", "cc" EXTRA_CLOBBERS); \
512 __ret = (rettype)((((u64)__edx) << 32) | __eax); \ 515 __ret = (rettype)((((u64)__edx) << 32) | __eax); \
513 } else { \ 516 } else { \
514 asm volatile(pre \ 517 asm volatile(pre \
515 paravirt_alt(PARAVIRT_CALL) \ 518 paravirt_alt(PARAVIRT_CALL) \
516 post \ 519 post \
517 : PVOP_CALL_CLOBBERS \ 520 : PVOP_CALL_CLOBBERS \
518 : paravirt_type(op), \ 521 : paravirt_type(op), \
519 paravirt_clobber(CLBR_ANY), \ 522 paravirt_clobber(CLBR_ANY), \
520 ##__VA_ARGS__ \ 523 ##__VA_ARGS__ \
521 : "memory", "cc" EXTRA_CLOBBERS); \ 524 : "memory", "cc" EXTRA_CLOBBERS); \
522 __ret = (rettype)__eax; \ 525 __ret = (rettype)__eax; \
523 } \ 526 } \
524 __ret; \ 527 __ret; \
525 }) 528 })
526 #define __PVOP_VCALL(op, pre, post, ...) \ 529 #define __PVOP_VCALL(op, pre, post, ...) \
527 ({ \ 530 ({ \
528 PVOP_VCALL_ARGS; \ 531 PVOP_VCALL_ARGS; \
529 PVOP_TEST_NULL(op); \ 532 PVOP_TEST_NULL(op); \
530 asm volatile(pre \ 533 asm volatile(pre \
531 paravirt_alt(PARAVIRT_CALL) \ 534 paravirt_alt(PARAVIRT_CALL) \
532 post \ 535 post \
533 : PVOP_VCALL_CLOBBERS \ 536 : PVOP_VCALL_CLOBBERS \
534 : paravirt_type(op), \ 537 : paravirt_type(op), \
535 paravirt_clobber(CLBR_ANY), \ 538 paravirt_clobber(CLBR_ANY), \
536 ##__VA_ARGS__ \ 539 ##__VA_ARGS__ \
537 : "memory", "cc" VEXTRA_CLOBBERS); \ 540 : "memory", "cc" VEXTRA_CLOBBERS); \
538 }) 541 })
539 542
540 #define PVOP_CALL0(rettype, op) \ 543 #define PVOP_CALL0(rettype, op) \
541 __PVOP_CALL(rettype, op, "", "") 544 __PVOP_CALL(rettype, op, "", "")
542 #define PVOP_VCALL0(op) \ 545 #define PVOP_VCALL0(op) \
543 __PVOP_VCALL(op, "", "") 546 __PVOP_VCALL(op, "", "")
544 547
545 #define PVOP_CALL1(rettype, op, arg1) \ 548 #define PVOP_CALL1(rettype, op, arg1) \
546 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) 549 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
547 #define PVOP_VCALL1(op, arg1) \ 550 #define PVOP_VCALL1(op, arg1) \
548 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) 551 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
549 552
550 #define PVOP_CALL2(rettype, op, arg1, arg2) \ 553 #define PVOP_CALL2(rettype, op, arg1, arg2) \
551 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ 554 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
552 "1" ((unsigned long)(arg2))) 555 "1" ((unsigned long)(arg2)))
553 #define PVOP_VCALL2(op, arg1, arg2) \ 556 #define PVOP_VCALL2(op, arg1, arg2) \
554 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ 557 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
555 "1" ((unsigned long)(arg2))) 558 "1" ((unsigned long)(arg2)))
556 559
557 #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ 560 #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
558 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ 561 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
559 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) 562 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
560 #define PVOP_VCALL3(op, arg1, arg2, arg3) \ 563 #define PVOP_VCALL3(op, arg1, arg2, arg3) \
561 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ 564 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
562 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) 565 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
563 566
564 /* This is the only difference in x86_64. We can make it much simpler */ 567 /* This is the only difference in x86_64. We can make it much simpler */
565 #ifdef CONFIG_X86_32 568 #ifdef CONFIG_X86_32
566 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ 569 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
567 __PVOP_CALL(rettype, op, \ 570 __PVOP_CALL(rettype, op, \
568 "push %[_arg4];", "lea 4(%%esp),%%esp;", \ 571 "push %[_arg4];", "lea 4(%%esp),%%esp;", \
569 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ 572 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
570 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) 573 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
571 #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ 574 #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
572 __PVOP_VCALL(op, \ 575 __PVOP_VCALL(op, \
573 "push %[_arg4];", "lea 4(%%esp),%%esp;", \ 576 "push %[_arg4];", "lea 4(%%esp),%%esp;", \
574 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ 577 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
575 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) 578 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
576 #else 579 #else
577 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ 580 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
578 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ 581 __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \
579 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ 582 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
580 "3"((unsigned long)(arg4))) 583 "3"((unsigned long)(arg4)))
581 #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ 584 #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
582 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ 585 __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \
583 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ 586 "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \
584 "3"((unsigned long)(arg4))) 587 "3"((unsigned long)(arg4)))
585 #endif 588 #endif
586 589
587 static inline int paravirt_enabled(void) 590 static inline int paravirt_enabled(void)
588 { 591 {
589 return pv_info.paravirt_enabled; 592 return pv_info.paravirt_enabled;
590 } 593 }
591 594
592 static inline void load_sp0(struct tss_struct *tss, 595 static inline void load_sp0(struct tss_struct *tss,
593 struct thread_struct *thread) 596 struct thread_struct *thread)
594 { 597 {
595 PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); 598 PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
596 } 599 }
597 600
598 #define ARCH_SETUP pv_init_ops.arch_setup(); 601 #define ARCH_SETUP pv_init_ops.arch_setup();
599 static inline unsigned long get_wallclock(void) 602 static inline unsigned long get_wallclock(void)
600 { 603 {
601 return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); 604 return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock);
602 } 605 }
603 606
604 static inline int set_wallclock(unsigned long nowtime) 607 static inline int set_wallclock(unsigned long nowtime)
605 { 608 {
606 return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); 609 return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime);
607 } 610 }
608 611
609 static inline void (*choose_time_init(void))(void) 612 static inline void (*choose_time_init(void))(void)
610 { 613 {
611 return pv_time_ops.time_init; 614 return pv_time_ops.time_init;
612 } 615 }
613 616
614 /* The paravirtualized CPUID instruction. */ 617 /* The paravirtualized CPUID instruction. */
615 static inline void __cpuid(unsigned int *eax, unsigned int *ebx, 618 static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
616 unsigned int *ecx, unsigned int *edx) 619 unsigned int *ecx, unsigned int *edx)
617 { 620 {
618 PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); 621 PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
619 } 622 }
620 623
621 /* 624 /*
622 * These special macros can be used to get or set a debugging register 625 * These special macros can be used to get or set a debugging register
623 */ 626 */
624 static inline unsigned long paravirt_get_debugreg(int reg) 627 static inline unsigned long paravirt_get_debugreg(int reg)
625 { 628 {
626 return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); 629 return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
627 } 630 }
628 #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) 631 #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
629 static inline void set_debugreg(unsigned long val, int reg) 632 static inline void set_debugreg(unsigned long val, int reg)
630 { 633 {
631 PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); 634 PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
632 } 635 }
633 636
634 static inline void clts(void) 637 static inline void clts(void)
635 { 638 {
636 PVOP_VCALL0(pv_cpu_ops.clts); 639 PVOP_VCALL0(pv_cpu_ops.clts);
637 } 640 }
638 641
639 static inline unsigned long read_cr0(void) 642 static inline unsigned long read_cr0(void)
640 { 643 {
641 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); 644 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
642 } 645 }
643 646
644 static inline void write_cr0(unsigned long x) 647 static inline void write_cr0(unsigned long x)
645 { 648 {
646 PVOP_VCALL1(pv_cpu_ops.write_cr0, x); 649 PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
647 } 650 }
648 651
649 static inline unsigned long read_cr2(void) 652 static inline unsigned long read_cr2(void)
650 { 653 {
651 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); 654 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
652 } 655 }
653 656
654 static inline void write_cr2(unsigned long x) 657 static inline void write_cr2(unsigned long x)
655 { 658 {
656 PVOP_VCALL1(pv_mmu_ops.write_cr2, x); 659 PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
657 } 660 }
658 661
659 static inline unsigned long read_cr3(void) 662 static inline unsigned long read_cr3(void)
660 { 663 {
661 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); 664 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
662 } 665 }
663 666
664 static inline void write_cr3(unsigned long x) 667 static inline void write_cr3(unsigned long x)
665 { 668 {
666 PVOP_VCALL1(pv_mmu_ops.write_cr3, x); 669 PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
667 } 670 }
668 671
669 static inline unsigned long read_cr4(void) 672 static inline unsigned long read_cr4(void)
670 { 673 {
671 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 674 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
672 } 675 }
673 static inline unsigned long read_cr4_safe(void) 676 static inline unsigned long read_cr4_safe(void)
674 { 677 {
675 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); 678 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
676 } 679 }
677 680
678 static inline void write_cr4(unsigned long x) 681 static inline void write_cr4(unsigned long x)
679 { 682 {
680 PVOP_VCALL1(pv_cpu_ops.write_cr4, x); 683 PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
681 } 684 }
682 685
683 #ifdef CONFIG_X86_64 686 #ifdef CONFIG_X86_64
684 static inline unsigned long read_cr8(void) 687 static inline unsigned long read_cr8(void)
685 { 688 {
686 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); 689 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8);
687 } 690 }
688 691
689 static inline void write_cr8(unsigned long x) 692 static inline void write_cr8(unsigned long x)
690 { 693 {
691 PVOP_VCALL1(pv_cpu_ops.write_cr8, x); 694 PVOP_VCALL1(pv_cpu_ops.write_cr8, x);
692 } 695 }
693 #endif 696 #endif
694 697
695 static inline void raw_safe_halt(void) 698 static inline void raw_safe_halt(void)
696 { 699 {
697 PVOP_VCALL0(pv_irq_ops.safe_halt); 700 PVOP_VCALL0(pv_irq_ops.safe_halt);
698 } 701 }
699 702
700 static inline void halt(void) 703 static inline void halt(void)
701 { 704 {
702 PVOP_VCALL0(pv_irq_ops.safe_halt); 705 PVOP_VCALL0(pv_irq_ops.safe_halt);
703 } 706 }
704 707
705 static inline void wbinvd(void) 708 static inline void wbinvd(void)
706 { 709 {
707 PVOP_VCALL0(pv_cpu_ops.wbinvd); 710 PVOP_VCALL0(pv_cpu_ops.wbinvd);
708 } 711 }
709 712
710 #define get_kernel_rpl() (pv_info.kernel_rpl) 713 #define get_kernel_rpl() (pv_info.kernel_rpl)
711 714
712 static inline u64 paravirt_read_msr(unsigned msr, int *err) 715 static inline u64 paravirt_read_msr(unsigned msr, int *err)
713 { 716 {
714 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); 717 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
715 } 718 }
716 static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) 719 static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
717 { 720 {
718 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); 721 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
719 } 722 }
720 723
721 /* These should all do BUG_ON(_err), but our headers are too tangled. */ 724 /* These should all do BUG_ON(_err), but our headers are too tangled. */
722 #define rdmsr(msr, val1, val2) \ 725 #define rdmsr(msr, val1, val2) \
723 do { \ 726 do { \
724 int _err; \ 727 int _err; \
725 u64 _l = paravirt_read_msr(msr, &_err); \ 728 u64 _l = paravirt_read_msr(msr, &_err); \
726 val1 = (u32)_l; \ 729 val1 = (u32)_l; \
727 val2 = _l >> 32; \ 730 val2 = _l >> 32; \
728 } while (0) 731 } while (0)
729 732
730 #define wrmsr(msr, val1, val2) \ 733 #define wrmsr(msr, val1, val2) \
731 do { \ 734 do { \
732 paravirt_write_msr(msr, val1, val2); \ 735 paravirt_write_msr(msr, val1, val2); \
733 } while (0) 736 } while (0)
734 737
735 #define rdmsrl(msr, val) \ 738 #define rdmsrl(msr, val) \
736 do { \ 739 do { \
737 int _err; \ 740 int _err; \
738 val = paravirt_read_msr(msr, &_err); \ 741 val = paravirt_read_msr(msr, &_err); \
739 } while (0) 742 } while (0)
740 743
741 #define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) 744 #define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32)
742 #define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b) 745 #define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b)
743 746
744 /* rdmsr with exception handling */ 747 /* rdmsr with exception handling */
745 #define rdmsr_safe(msr, a, b) \ 748 #define rdmsr_safe(msr, a, b) \
746 ({ \ 749 ({ \
747 int _err; \ 750 int _err; \
748 u64 _l = paravirt_read_msr(msr, &_err); \ 751 u64 _l = paravirt_read_msr(msr, &_err); \
749 (*a) = (u32)_l; \ 752 (*a) = (u32)_l; \
750 (*b) = _l >> 32; \ 753 (*b) = _l >> 32; \
751 _err; \ 754 _err; \
752 }) 755 })
753 756
754 static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) 757 static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
755 { 758 {
756 int err; 759 int err;
757 760
758 *p = paravirt_read_msr(msr, &err); 761 *p = paravirt_read_msr(msr, &err);
759 return err; 762 return err;
760 } 763 }
761 764
762 static inline u64 paravirt_read_tsc(void) 765 static inline u64 paravirt_read_tsc(void)
763 { 766 {
764 return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); 767 return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
765 } 768 }
766 769
767 #define rdtscl(low) \ 770 #define rdtscl(low) \
768 do { \ 771 do { \
769 u64 _l = paravirt_read_tsc(); \ 772 u64 _l = paravirt_read_tsc(); \
770 low = (int)_l; \ 773 low = (int)_l; \
771 } while (0) 774 } while (0)
772 775
773 #define rdtscll(val) (val = paravirt_read_tsc()) 776 #define rdtscll(val) (val = paravirt_read_tsc())
774 777
775 static inline unsigned long long paravirt_sched_clock(void) 778 static inline unsigned long long paravirt_sched_clock(void)
776 { 779 {
777 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); 780 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
778 } 781 }
779 #define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) 782 #define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
780 783
781 static inline unsigned long long paravirt_read_pmc(int counter) 784 static inline unsigned long long paravirt_read_pmc(int counter)
782 { 785 {
783 return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); 786 return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
784 } 787 }
785 788
786 #define rdpmc(counter, low, high) \ 789 #define rdpmc(counter, low, high) \
787 do { \ 790 do { \
788 u64 _l = paravirt_read_pmc(counter); \ 791 u64 _l = paravirt_read_pmc(counter); \
789 low = (u32)_l; \ 792 low = (u32)_l; \
790 high = _l >> 32; \ 793 high = _l >> 32; \
791 } while (0) 794 } while (0)
792 795
793 static inline unsigned long long paravirt_rdtscp(unsigned int *aux) 796 static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
794 { 797 {
795 return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); 798 return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
796 } 799 }
797 800
798 #define rdtscp(low, high, aux) \ 801 #define rdtscp(low, high, aux) \
799 do { \ 802 do { \
800 int __aux; \ 803 int __aux; \
801 unsigned long __val = paravirt_rdtscp(&__aux); \ 804 unsigned long __val = paravirt_rdtscp(&__aux); \
802 (low) = (u32)__val; \ 805 (low) = (u32)__val; \
803 (high) = (u32)(__val >> 32); \ 806 (high) = (u32)(__val >> 32); \
804 (aux) = __aux; \ 807 (aux) = __aux; \
805 } while (0) 808 } while (0)
806 809
807 #define rdtscpll(val, aux) \ 810 #define rdtscpll(val, aux) \
808 do { \ 811 do { \
809 unsigned long __aux; \ 812 unsigned long __aux; \
810 val = paravirt_rdtscp(&__aux); \ 813 val = paravirt_rdtscp(&__aux); \
811 (aux) = __aux; \ 814 (aux) = __aux; \
812 } while (0) 815 } while (0)
813 816
814 static inline void load_TR_desc(void) 817 static inline void load_TR_desc(void)
815 { 818 {
816 PVOP_VCALL0(pv_cpu_ops.load_tr_desc); 819 PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
817 } 820 }
818 static inline void load_gdt(const struct desc_ptr *dtr) 821 static inline void load_gdt(const struct desc_ptr *dtr)
819 { 822 {
820 PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); 823 PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
821 } 824 }
822 static inline void load_idt(const struct desc_ptr *dtr) 825 static inline void load_idt(const struct desc_ptr *dtr)
823 { 826 {
824 PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); 827 PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
825 } 828 }
826 static inline void set_ldt(const void *addr, unsigned entries) 829 static inline void set_ldt(const void *addr, unsigned entries)
827 { 830 {
828 PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); 831 PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
829 } 832 }
830 static inline void store_gdt(struct desc_ptr *dtr) 833 static inline void store_gdt(struct desc_ptr *dtr)
831 { 834 {
832 PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); 835 PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
833 } 836 }
834 static inline void store_idt(struct desc_ptr *dtr) 837 static inline void store_idt(struct desc_ptr *dtr)
835 { 838 {
836 PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); 839 PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
837 } 840 }
838 static inline unsigned long paravirt_store_tr(void) 841 static inline unsigned long paravirt_store_tr(void)
839 { 842 {
840 return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); 843 return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
841 } 844 }
842 #define store_tr(tr) ((tr) = paravirt_store_tr()) 845 #define store_tr(tr) ((tr) = paravirt_store_tr())
843 static inline void load_TLS(struct thread_struct *t, unsigned cpu) 846 static inline void load_TLS(struct thread_struct *t, unsigned cpu)
844 { 847 {
845 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); 848 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
846 } 849 }
850
851 #ifdef CONFIG_X86_64
852 static inline void load_gs_index(unsigned int gs)
853 {
854 PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
855 }
856 #endif
847 857
848 static inline void write_ldt_entry(struct desc_struct *dt, int entry, 858 static inline void write_ldt_entry(struct desc_struct *dt, int entry,
849 const void *desc) 859 const void *desc)
850 { 860 {
851 PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc); 861 PVOP_VCALL3(pv_cpu_ops.write_ldt_entry, dt, entry, desc);
852 } 862 }
853 863
854 static inline void write_gdt_entry(struct desc_struct *dt, int entry, 864 static inline void write_gdt_entry(struct desc_struct *dt, int entry,
855 void *desc, int type) 865 void *desc, int type)
856 { 866 {
857 PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type); 867 PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, desc, type);
858 } 868 }
859 869
860 static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) 870 static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
861 { 871 {
862 PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g); 872 PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
863 } 873 }
864 static inline void set_iopl_mask(unsigned mask) 874 static inline void set_iopl_mask(unsigned mask)
865 { 875 {
866 PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); 876 PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
867 } 877 }
868 878
869 /* The paravirtualized I/O functions */ 879 /* The paravirtualized I/O functions */
870 static inline void slow_down_io(void) 880 static inline void slow_down_io(void)
871 { 881 {
872 pv_cpu_ops.io_delay(); 882 pv_cpu_ops.io_delay();
873 #ifdef REALLY_SLOW_IO 883 #ifdef REALLY_SLOW_IO
874 pv_cpu_ops.io_delay(); 884 pv_cpu_ops.io_delay();
875 pv_cpu_ops.io_delay(); 885 pv_cpu_ops.io_delay();
876 pv_cpu_ops.io_delay(); 886 pv_cpu_ops.io_delay();
877 #endif 887 #endif
878 } 888 }
879 889
880 #ifdef CONFIG_X86_LOCAL_APIC 890 #ifdef CONFIG_X86_LOCAL_APIC
881 /* 891 /*
882 * Basic functions accessing APICs. 892 * Basic functions accessing APICs.
883 */ 893 */
884 static inline void apic_write(unsigned long reg, u32 v) 894 static inline void apic_write(unsigned long reg, u32 v)
885 { 895 {
886 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); 896 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
887 } 897 }
888 898
889 static inline void apic_write_atomic(unsigned long reg, u32 v) 899 static inline void apic_write_atomic(unsigned long reg, u32 v)
890 { 900 {
891 PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); 901 PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
892 } 902 }
893 903
894 static inline u32 apic_read(unsigned long reg) 904 static inline u32 apic_read(unsigned long reg)
895 { 905 {
896 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); 906 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
897 } 907 }
898 908
899 static inline void setup_boot_clock(void) 909 static inline void setup_boot_clock(void)
900 { 910 {
901 PVOP_VCALL0(pv_apic_ops.setup_boot_clock); 911 PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
902 } 912 }
903 913
904 static inline void setup_secondary_clock(void) 914 static inline void setup_secondary_clock(void)
905 { 915 {
906 PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); 916 PVOP_VCALL0(pv_apic_ops.setup_secondary_clock);
907 } 917 }
908 #endif 918 #endif
909 919
910 static inline void paravirt_post_allocator_init(void) 920 static inline void paravirt_post_allocator_init(void)
911 { 921 {
912 if (pv_init_ops.post_allocator_init) 922 if (pv_init_ops.post_allocator_init)
913 (*pv_init_ops.post_allocator_init)(); 923 (*pv_init_ops.post_allocator_init)();
914 } 924 }
915 925
916 static inline void paravirt_pagetable_setup_start(pgd_t *base) 926 static inline void paravirt_pagetable_setup_start(pgd_t *base)
917 { 927 {
918 (*pv_mmu_ops.pagetable_setup_start)(base); 928 (*pv_mmu_ops.pagetable_setup_start)(base);
919 } 929 }
920 930
921 static inline void paravirt_pagetable_setup_done(pgd_t *base) 931 static inline void paravirt_pagetable_setup_done(pgd_t *base)
922 { 932 {
923 (*pv_mmu_ops.pagetable_setup_done)(base); 933 (*pv_mmu_ops.pagetable_setup_done)(base);
924 } 934 }
925 935
926 #ifdef CONFIG_SMP 936 #ifdef CONFIG_SMP
927 static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, 937 static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
928 unsigned long start_esp) 938 unsigned long start_esp)
929 { 939 {
930 PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, 940 PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
931 phys_apicid, start_eip, start_esp); 941 phys_apicid, start_eip, start_esp);
932 } 942 }
933 #endif 943 #endif
934 944
935 static inline void paravirt_activate_mm(struct mm_struct *prev, 945 static inline void paravirt_activate_mm(struct mm_struct *prev,
936 struct mm_struct *next) 946 struct mm_struct *next)
937 { 947 {
938 PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); 948 PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
939 } 949 }
940 950
941 static inline void arch_dup_mmap(struct mm_struct *oldmm, 951 static inline void arch_dup_mmap(struct mm_struct *oldmm,
942 struct mm_struct *mm) 952 struct mm_struct *mm)
943 { 953 {
944 PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); 954 PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
945 } 955 }
946 956
947 static inline void arch_exit_mmap(struct mm_struct *mm) 957 static inline void arch_exit_mmap(struct mm_struct *mm)
948 { 958 {
949 PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); 959 PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
950 } 960 }
951 961
952 static inline void __flush_tlb(void) 962 static inline void __flush_tlb(void)
953 { 963 {
954 PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); 964 PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
955 } 965 }
956 static inline void __flush_tlb_global(void) 966 static inline void __flush_tlb_global(void)
957 { 967 {
958 PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); 968 PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
959 } 969 }
960 static inline void __flush_tlb_single(unsigned long addr) 970 static inline void __flush_tlb_single(unsigned long addr)
961 { 971 {
962 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); 972 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
963 } 973 }
964 974
965 static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 975 static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
966 unsigned long va) 976 unsigned long va)
967 { 977 {
968 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 978 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
969 } 979 }
970 980
971 static inline int paravirt_pgd_alloc(struct mm_struct *mm) 981 static inline int paravirt_pgd_alloc(struct mm_struct *mm)
972 { 982 {
973 return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); 983 return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
974 } 984 }
975 985
976 static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) 986 static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
977 { 987 {
978 PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); 988 PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
979 } 989 }
980 990
981 static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) 991 static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
982 { 992 {
983 PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); 993 PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
984 } 994 }
985 static inline void paravirt_release_pte(unsigned pfn) 995 static inline void paravirt_release_pte(unsigned pfn)
986 { 996 {
987 PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); 997 PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
988 } 998 }
989 999
990 static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn) 1000 static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn)
991 { 1001 {
992 PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); 1002 PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
993 } 1003 }
994 1004
995 static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn, 1005 static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn,
996 unsigned start, unsigned count) 1006 unsigned start, unsigned count)
997 { 1007 {
998 PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); 1008 PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
999 } 1009 }
1000 static inline void paravirt_release_pmd(unsigned pfn) 1010 static inline void paravirt_release_pmd(unsigned pfn)
1001 { 1011 {
1002 PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); 1012 PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
1003 } 1013 }
1004 1014
1005 static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn) 1015 static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn)
1006 { 1016 {
1007 PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); 1017 PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
1008 } 1018 }
1009 static inline void paravirt_release_pud(unsigned pfn) 1019 static inline void paravirt_release_pud(unsigned pfn)
1010 { 1020 {
1011 PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); 1021 PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
1012 } 1022 }
1013 1023
1014 #ifdef CONFIG_HIGHPTE 1024 #ifdef CONFIG_HIGHPTE
1015 static inline void *kmap_atomic_pte(struct page *page, enum km_type type) 1025 static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
1016 { 1026 {
1017 unsigned long ret; 1027 unsigned long ret;
1018 ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); 1028 ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type);
1019 return (void *)ret; 1029 return (void *)ret;
1020 } 1030 }
1021 #endif 1031 #endif
1022 1032
1023 static inline void pte_update(struct mm_struct *mm, unsigned long addr, 1033 static inline void pte_update(struct mm_struct *mm, unsigned long addr,
1024 pte_t *ptep) 1034 pte_t *ptep)
1025 { 1035 {
1026 PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); 1036 PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
1027 } 1037 }
1028 1038
1029 static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, 1039 static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
1030 pte_t *ptep) 1040 pte_t *ptep)
1031 { 1041 {
1032 PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); 1042 PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
1033 } 1043 }
1034 1044
1035 static inline pte_t __pte(pteval_t val) 1045 static inline pte_t __pte(pteval_t val)
1036 { 1046 {
1037 pteval_t ret; 1047 pteval_t ret;
1038 1048
1039 if (sizeof(pteval_t) > sizeof(long)) 1049 if (sizeof(pteval_t) > sizeof(long))
1040 ret = PVOP_CALL2(pteval_t, 1050 ret = PVOP_CALL2(pteval_t,
1041 pv_mmu_ops.make_pte, 1051 pv_mmu_ops.make_pte,
1042 val, (u64)val >> 32); 1052 val, (u64)val >> 32);
1043 else 1053 else
1044 ret = PVOP_CALL1(pteval_t, 1054 ret = PVOP_CALL1(pteval_t,
1045 pv_mmu_ops.make_pte, 1055 pv_mmu_ops.make_pte,
1046 val); 1056 val);
1047 1057
1048 return (pte_t) { .pte = ret }; 1058 return (pte_t) { .pte = ret };
1049 } 1059 }
1050 1060
1051 static inline pteval_t pte_val(pte_t pte) 1061 static inline pteval_t pte_val(pte_t pte)
1052 { 1062 {
1053 pteval_t ret; 1063 pteval_t ret;
1054 1064
1055 if (sizeof(pteval_t) > sizeof(long)) 1065 if (sizeof(pteval_t) > sizeof(long))
1056 ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, 1066 ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
1057 pte.pte, (u64)pte.pte >> 32); 1067 pte.pte, (u64)pte.pte >> 32);
1058 else 1068 else
1059 ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, 1069 ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
1060 pte.pte); 1070 pte.pte);
1061 1071
1062 return ret; 1072 return ret;
1063 } 1073 }
1064 1074
1065 static inline pteval_t pte_flags(pte_t pte) 1075 static inline pteval_t pte_flags(pte_t pte)
1066 { 1076 {
1067 pteval_t ret; 1077 pteval_t ret;
1068 1078
1069 if (sizeof(pteval_t) > sizeof(long)) 1079 if (sizeof(pteval_t) > sizeof(long))
1070 ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, 1080 ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
1071 pte.pte, (u64)pte.pte >> 32); 1081 pte.pte, (u64)pte.pte >> 32);
1072 else 1082 else
1073 ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, 1083 ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
1074 pte.pte); 1084 pte.pte);
1075 1085
1076 return ret; 1086 return ret;
1077 } 1087 }
1078 1088
1079 static inline pgd_t __pgd(pgdval_t val) 1089 static inline pgd_t __pgd(pgdval_t val)
1080 { 1090 {
1081 pgdval_t ret; 1091 pgdval_t ret;
1082 1092
1083 if (sizeof(pgdval_t) > sizeof(long)) 1093 if (sizeof(pgdval_t) > sizeof(long))
1084 ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, 1094 ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
1085 val, (u64)val >> 32); 1095 val, (u64)val >> 32);
1086 else 1096 else
1087 ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, 1097 ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
1088 val); 1098 val);
1089 1099
1090 return (pgd_t) { ret }; 1100 return (pgd_t) { ret };
1091 } 1101 }
1092 1102
1093 static inline pgdval_t pgd_val(pgd_t pgd) 1103 static inline pgdval_t pgd_val(pgd_t pgd)
1094 { 1104 {
1095 pgdval_t ret; 1105 pgdval_t ret;
1096 1106
1097 if (sizeof(pgdval_t) > sizeof(long)) 1107 if (sizeof(pgdval_t) > sizeof(long))
1098 ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, 1108 ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
1099 pgd.pgd, (u64)pgd.pgd >> 32); 1109 pgd.pgd, (u64)pgd.pgd >> 32);
1100 else 1110 else
1101 ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, 1111 ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
1102 pgd.pgd); 1112 pgd.pgd);
1103 1113
1104 return ret; 1114 return ret;
1105 } 1115 }
1106 1116
1107 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 1117 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1108 static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, 1118 static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
1109 pte_t *ptep) 1119 pte_t *ptep)
1110 { 1120 {
1111 pteval_t ret; 1121 pteval_t ret;
1112 1122
1113 ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start, 1123 ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
1114 mm, addr, ptep); 1124 mm, addr, ptep);
1115 1125
1116 return (pte_t) { .pte = ret }; 1126 return (pte_t) { .pte = ret };
1117 } 1127 }
1118 1128
1119 static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, 1129 static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
1120 pte_t *ptep, pte_t pte) 1130 pte_t *ptep, pte_t pte)
1121 { 1131 {
1122 if (sizeof(pteval_t) > sizeof(long)) 1132 if (sizeof(pteval_t) > sizeof(long))
1123 /* 5 arg words */ 1133 /* 5 arg words */
1124 pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte); 1134 pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
1125 else 1135 else
1126 PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit, 1136 PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
1127 mm, addr, ptep, pte.pte); 1137 mm, addr, ptep, pte.pte);
1128 } 1138 }
1129 1139
1130 static inline void set_pte(pte_t *ptep, pte_t pte) 1140 static inline void set_pte(pte_t *ptep, pte_t pte)
1131 { 1141 {
1132 if (sizeof(pteval_t) > sizeof(long)) 1142 if (sizeof(pteval_t) > sizeof(long))
1133 PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, 1143 PVOP_VCALL3(pv_mmu_ops.set_pte, ptep,
1134 pte.pte, (u64)pte.pte >> 32); 1144 pte.pte, (u64)pte.pte >> 32);
1135 else 1145 else
1136 PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, 1146 PVOP_VCALL2(pv_mmu_ops.set_pte, ptep,
1137 pte.pte); 1147 pte.pte);
1138 } 1148 }
1139 1149
1140 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 1150 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
1141 pte_t *ptep, pte_t pte) 1151 pte_t *ptep, pte_t pte)
1142 { 1152 {
1143 if (sizeof(pteval_t) > sizeof(long)) 1153 if (sizeof(pteval_t) > sizeof(long))
1144 /* 5 arg words */ 1154 /* 5 arg words */
1145 pv_mmu_ops.set_pte_at(mm, addr, ptep, pte); 1155 pv_mmu_ops.set_pte_at(mm, addr, ptep, pte);
1146 else 1156 else
1147 PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); 1157 PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
1148 } 1158 }
1149 1159
1150 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) 1160 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
1151 { 1161 {
1152 pmdval_t val = native_pmd_val(pmd); 1162 pmdval_t val = native_pmd_val(pmd);
1153 1163
1154 if (sizeof(pmdval_t) > sizeof(long)) 1164 if (sizeof(pmdval_t) > sizeof(long))
1155 PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32); 1165 PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, val, (u64)val >> 32);
1156 else 1166 else
1157 PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); 1167 PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
1158 } 1168 }
1159 1169
1160 #if PAGETABLE_LEVELS >= 3 1170 #if PAGETABLE_LEVELS >= 3
1161 static inline pmd_t __pmd(pmdval_t val) 1171 static inline pmd_t __pmd(pmdval_t val)
1162 { 1172 {
1163 pmdval_t ret; 1173 pmdval_t ret;
1164 1174
1165 if (sizeof(pmdval_t) > sizeof(long)) 1175 if (sizeof(pmdval_t) > sizeof(long))
1166 ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, 1176 ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
1167 val, (u64)val >> 32); 1177 val, (u64)val >> 32);
1168 else 1178 else
1169 ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, 1179 ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
1170 val); 1180 val);
1171 1181
1172 return (pmd_t) { ret }; 1182 return (pmd_t) { ret };
1173 } 1183 }
1174 1184
1175 static inline pmdval_t pmd_val(pmd_t pmd) 1185 static inline pmdval_t pmd_val(pmd_t pmd)
1176 { 1186 {
1177 pmdval_t ret; 1187 pmdval_t ret;
1178 1188
1179 if (sizeof(pmdval_t) > sizeof(long)) 1189 if (sizeof(pmdval_t) > sizeof(long))
1180 ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, 1190 ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
1181 pmd.pmd, (u64)pmd.pmd >> 32); 1191 pmd.pmd, (u64)pmd.pmd >> 32);
1182 else 1192 else
1183 ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, 1193 ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
1184 pmd.pmd); 1194 pmd.pmd);
1185 1195
1186 return ret; 1196 return ret;
1187 } 1197 }
1188 1198
1189 static inline void set_pud(pud_t *pudp, pud_t pud) 1199 static inline void set_pud(pud_t *pudp, pud_t pud)
1190 { 1200 {
1191 pudval_t val = native_pud_val(pud); 1201 pudval_t val = native_pud_val(pud);
1192 1202
1193 if (sizeof(pudval_t) > sizeof(long)) 1203 if (sizeof(pudval_t) > sizeof(long))
1194 PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, 1204 PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
1195 val, (u64)val >> 32); 1205 val, (u64)val >> 32);
1196 else 1206 else
1197 PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, 1207 PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
1198 val); 1208 val);
1199 } 1209 }
1200 #if PAGETABLE_LEVELS == 4 1210 #if PAGETABLE_LEVELS == 4
1201 static inline pud_t __pud(pudval_t val) 1211 static inline pud_t __pud(pudval_t val)
1202 { 1212 {
1203 pudval_t ret; 1213 pudval_t ret;
1204 1214
1205 if (sizeof(pudval_t) > sizeof(long)) 1215 if (sizeof(pudval_t) > sizeof(long))
1206 ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, 1216 ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
1207 val, (u64)val >> 32); 1217 val, (u64)val >> 32);
1208 else 1218 else
1209 ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, 1219 ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
1210 val); 1220 val);
1211 1221
1212 return (pud_t) { ret }; 1222 return (pud_t) { ret };
1213 } 1223 }
1214 1224
1215 static inline pudval_t pud_val(pud_t pud) 1225 static inline pudval_t pud_val(pud_t pud)
1216 { 1226 {
1217 pudval_t ret; 1227 pudval_t ret;
1218 1228
1219 if (sizeof(pudval_t) > sizeof(long)) 1229 if (sizeof(pudval_t) > sizeof(long))
1220 ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, 1230 ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
1221 pud.pud, (u64)pud.pud >> 32); 1231 pud.pud, (u64)pud.pud >> 32);
1222 else 1232 else
1223 ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, 1233 ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
1224 pud.pud); 1234 pud.pud);
1225 1235
1226 return ret; 1236 return ret;
1227 } 1237 }
1228 1238
1229 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) 1239 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
1230 { 1240 {
1231 pgdval_t val = native_pgd_val(pgd); 1241 pgdval_t val = native_pgd_val(pgd);
1232 1242
1233 if (sizeof(pgdval_t) > sizeof(long)) 1243 if (sizeof(pgdval_t) > sizeof(long))
1234 PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp, 1244 PVOP_VCALL3(pv_mmu_ops.set_pgd, pgdp,
1235 val, (u64)val >> 32); 1245 val, (u64)val >> 32);
1236 else 1246 else
1237 PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, 1247 PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp,
1238 val); 1248 val);
1239 } 1249 }
1240 1250
1241 static inline void pgd_clear(pgd_t *pgdp) 1251 static inline void pgd_clear(pgd_t *pgdp)
1242 { 1252 {
1243 set_pgd(pgdp, __pgd(0)); 1253 set_pgd(pgdp, __pgd(0));
1244 } 1254 }
1245 1255
1246 static inline void pud_clear(pud_t *pudp) 1256 static inline void pud_clear(pud_t *pudp)
1247 { 1257 {
1248 set_pud(pudp, __pud(0)); 1258 set_pud(pudp, __pud(0));
1249 } 1259 }
1250 1260
1251 #endif /* PAGETABLE_LEVELS == 4 */ 1261 #endif /* PAGETABLE_LEVELS == 4 */
1252 1262
1253 #endif /* PAGETABLE_LEVELS >= 3 */ 1263 #endif /* PAGETABLE_LEVELS >= 3 */
1254 1264
1255 #ifdef CONFIG_X86_PAE 1265 #ifdef CONFIG_X86_PAE
1256 /* Special-case pte-setting operations for PAE, which can't update a 1266 /* Special-case pte-setting operations for PAE, which can't update a
1257 64-bit pte atomically */ 1267 64-bit pte atomically */
1258 static inline void set_pte_atomic(pte_t *ptep, pte_t pte) 1268 static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
1259 { 1269 {
1260 PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, 1270 PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
1261 pte.pte, pte.pte >> 32); 1271 pte.pte, pte.pte >> 32);
1262 } 1272 }
1263 1273
1264 static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, 1274 static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
1265 pte_t *ptep, pte_t pte) 1275 pte_t *ptep, pte_t pte)
1266 { 1276 {
1267 /* 5 arg words */ 1277 /* 5 arg words */
1268 pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); 1278 pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
1269 } 1279 }
1270 1280
1271 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 1281 static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
1272 pte_t *ptep) 1282 pte_t *ptep)
1273 { 1283 {
1274 PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); 1284 PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
1275 } 1285 }
1276 1286
1277 static inline void pmd_clear(pmd_t *pmdp) 1287 static inline void pmd_clear(pmd_t *pmdp)
1278 { 1288 {
1279 PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); 1289 PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
1280 } 1290 }
1281 #else /* !CONFIG_X86_PAE */ 1291 #else /* !CONFIG_X86_PAE */
1282 static inline void set_pte_atomic(pte_t *ptep, pte_t pte) 1292 static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
1283 { 1293 {
1284 set_pte(ptep, pte); 1294 set_pte(ptep, pte);
1285 } 1295 }
1286 1296
1287 static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, 1297 static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
1288 pte_t *ptep, pte_t pte) 1298 pte_t *ptep, pte_t pte)
1289 { 1299 {
1290 set_pte(ptep, pte); 1300 set_pte(ptep, pte);
1291 } 1301 }
1292 1302
1293 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 1303 static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
1294 pte_t *ptep) 1304 pte_t *ptep)
1295 { 1305 {
1296 set_pte_at(mm, addr, ptep, __pte(0)); 1306 set_pte_at(mm, addr, ptep, __pte(0));
1297 } 1307 }
1298 1308
1299 static inline void pmd_clear(pmd_t *pmdp) 1309 static inline void pmd_clear(pmd_t *pmdp)
1300 { 1310 {
1301 set_pmd(pmdp, __pmd(0)); 1311 set_pmd(pmdp, __pmd(0));
1302 } 1312 }
1303 #endif /* CONFIG_X86_PAE */ 1313 #endif /* CONFIG_X86_PAE */
1304 1314
1305 /* Lazy mode for batching updates / context switch */ 1315 /* Lazy mode for batching updates / context switch */
1306 enum paravirt_lazy_mode { 1316 enum paravirt_lazy_mode {
1307 PARAVIRT_LAZY_NONE, 1317 PARAVIRT_LAZY_NONE,
1308 PARAVIRT_LAZY_MMU, 1318 PARAVIRT_LAZY_MMU,
1309 PARAVIRT_LAZY_CPU, 1319 PARAVIRT_LAZY_CPU,
1310 }; 1320 };
1311 1321
1312 enum paravirt_lazy_mode paravirt_get_lazy_mode(void); 1322 enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
1313 void paravirt_enter_lazy_cpu(void); 1323 void paravirt_enter_lazy_cpu(void);
1314 void paravirt_leave_lazy_cpu(void); 1324 void paravirt_leave_lazy_cpu(void);
1315 void paravirt_enter_lazy_mmu(void); 1325 void paravirt_enter_lazy_mmu(void);
1316 void paravirt_leave_lazy_mmu(void); 1326 void paravirt_leave_lazy_mmu(void);
1317 void paravirt_leave_lazy(enum paravirt_lazy_mode mode); 1327 void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
1318 1328
1319 #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE 1329 #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE
1320 static inline void arch_enter_lazy_cpu_mode(void) 1330 static inline void arch_enter_lazy_cpu_mode(void)
1321 { 1331 {
1322 PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); 1332 PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
1323 } 1333 }
1324 1334
1325 static inline void arch_leave_lazy_cpu_mode(void) 1335 static inline void arch_leave_lazy_cpu_mode(void)
1326 { 1336 {
1327 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 1337 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
1328 } 1338 }
1329 1339
1330 static inline void arch_flush_lazy_cpu_mode(void) 1340 static inline void arch_flush_lazy_cpu_mode(void)
1331 { 1341 {
1332 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { 1342 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
1333 arch_leave_lazy_cpu_mode(); 1343 arch_leave_lazy_cpu_mode();
1334 arch_enter_lazy_cpu_mode(); 1344 arch_enter_lazy_cpu_mode();
1335 } 1345 }
1336 } 1346 }
1337 1347
1338 1348
1339 #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 1349 #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
1340 static inline void arch_enter_lazy_mmu_mode(void) 1350 static inline void arch_enter_lazy_mmu_mode(void)
1341 { 1351 {
1342 PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); 1352 PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter);
1343 } 1353 }
1344 1354
1345 static inline void arch_leave_lazy_mmu_mode(void) 1355 static inline void arch_leave_lazy_mmu_mode(void)
1346 { 1356 {
1347 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); 1357 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
1348 } 1358 }
1349 1359
1350 static inline void arch_flush_lazy_mmu_mode(void) 1360 static inline void arch_flush_lazy_mmu_mode(void)
1351 { 1361 {
1352 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { 1362 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
1353 arch_leave_lazy_mmu_mode(); 1363 arch_leave_lazy_mmu_mode();
1354 arch_enter_lazy_mmu_mode(); 1364 arch_enter_lazy_mmu_mode();
1355 } 1365 }
1356 } 1366 }
1357 1367
1358 static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, 1368 static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1359 unsigned long phys, pgprot_t flags) 1369 unsigned long phys, pgprot_t flags)
1360 { 1370 {
1361 pv_mmu_ops.set_fixmap(idx, phys, flags); 1371 pv_mmu_ops.set_fixmap(idx, phys, flags);
1362 } 1372 }
1363 1373
1364 void _paravirt_nop(void); 1374 void _paravirt_nop(void);
1365 #define paravirt_nop ((void *)_paravirt_nop) 1375 #define paravirt_nop ((void *)_paravirt_nop)
1366 1376
1367 /* These all sit in the .parainstructions section to tell us what to patch. */ 1377 /* These all sit in the .parainstructions section to tell us what to patch. */
1368 struct paravirt_patch_site { 1378 struct paravirt_patch_site {
1369 u8 *instr; /* original instructions */ 1379 u8 *instr; /* original instructions */
1370 u8 instrtype; /* type of this instruction */ 1380 u8 instrtype; /* type of this instruction */
1371 u8 len; /* length of original instruction */ 1381 u8 len; /* length of original instruction */
1372 u16 clobbers; /* what registers you may clobber */ 1382 u16 clobbers; /* what registers you may clobber */
1373 }; 1383 };
1374 1384
1375 extern struct paravirt_patch_site __parainstructions[], 1385 extern struct paravirt_patch_site __parainstructions[],
1376 __parainstructions_end[]; 1386 __parainstructions_end[];
1377 1387
1378 #ifdef CONFIG_X86_32 1388 #ifdef CONFIG_X86_32
1379 #define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" 1389 #define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
1380 #define PV_RESTORE_REGS "popl %%edx; popl %%ecx" 1390 #define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
1381 #define PV_FLAGS_ARG "0" 1391 #define PV_FLAGS_ARG "0"
1382 #define PV_EXTRA_CLOBBERS 1392 #define PV_EXTRA_CLOBBERS
1383 #define PV_VEXTRA_CLOBBERS 1393 #define PV_VEXTRA_CLOBBERS
1384 #else 1394 #else
1385 /* We save some registers, but all of them, that's too much. We clobber all 1395 /* We save some registers, but all of them, that's too much. We clobber all
1386 * caller saved registers but the argument parameter */ 1396 * caller saved registers but the argument parameter */
1387 #define PV_SAVE_REGS "pushq %%rdi;" 1397 #define PV_SAVE_REGS "pushq %%rdi;"
1388 #define PV_RESTORE_REGS "popq %%rdi;" 1398 #define PV_RESTORE_REGS "popq %%rdi;"
1389 #define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx" 1399 #define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx"
1390 #define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx" 1400 #define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx"
1391 #define PV_FLAGS_ARG "D" 1401 #define PV_FLAGS_ARG "D"
1392 #endif 1402 #endif
1393 1403
1394 static inline unsigned long __raw_local_save_flags(void) 1404 static inline unsigned long __raw_local_save_flags(void)
1395 { 1405 {
1396 unsigned long f; 1406 unsigned long f;
1397 1407
1398 asm volatile(paravirt_alt(PV_SAVE_REGS 1408 asm volatile(paravirt_alt(PV_SAVE_REGS
1399 PARAVIRT_CALL 1409 PARAVIRT_CALL
1400 PV_RESTORE_REGS) 1410 PV_RESTORE_REGS)
1401 : "=a"(f) 1411 : "=a"(f)
1402 : paravirt_type(pv_irq_ops.save_fl), 1412 : paravirt_type(pv_irq_ops.save_fl),
1403 paravirt_clobber(CLBR_EAX) 1413 paravirt_clobber(CLBR_EAX)
1404 : "memory", "cc" PV_VEXTRA_CLOBBERS); 1414 : "memory", "cc" PV_VEXTRA_CLOBBERS);
1405 return f; 1415 return f;
1406 } 1416 }
1407 1417
1408 static inline void raw_local_irq_restore(unsigned long f) 1418 static inline void raw_local_irq_restore(unsigned long f)
1409 { 1419 {
1410 asm volatile(paravirt_alt(PV_SAVE_REGS 1420 asm volatile(paravirt_alt(PV_SAVE_REGS
1411 PARAVIRT_CALL 1421 PARAVIRT_CALL
1412 PV_RESTORE_REGS) 1422 PV_RESTORE_REGS)
1413 : "=a"(f) 1423 : "=a"(f)
1414 : PV_FLAGS_ARG(f), 1424 : PV_FLAGS_ARG(f),
1415 paravirt_type(pv_irq_ops.restore_fl), 1425 paravirt_type(pv_irq_ops.restore_fl),
1416 paravirt_clobber(CLBR_EAX) 1426 paravirt_clobber(CLBR_EAX)
1417 : "memory", "cc" PV_EXTRA_CLOBBERS); 1427 : "memory", "cc" PV_EXTRA_CLOBBERS);
1418 } 1428 }
1419 1429
1420 static inline void raw_local_irq_disable(void) 1430 static inline void raw_local_irq_disable(void)
1421 { 1431 {
1422 asm volatile(paravirt_alt(PV_SAVE_REGS 1432 asm volatile(paravirt_alt(PV_SAVE_REGS
1423 PARAVIRT_CALL 1433 PARAVIRT_CALL
1424 PV_RESTORE_REGS) 1434 PV_RESTORE_REGS)
1425 : 1435 :
1426 : paravirt_type(pv_irq_ops.irq_disable), 1436 : paravirt_type(pv_irq_ops.irq_disable),
1427 paravirt_clobber(CLBR_EAX) 1437 paravirt_clobber(CLBR_EAX)
1428 : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); 1438 : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
1429 } 1439 }
1430 1440
1431 static inline void raw_local_irq_enable(void) 1441 static inline void raw_local_irq_enable(void)
1432 { 1442 {
1433 asm volatile(paravirt_alt(PV_SAVE_REGS 1443 asm volatile(paravirt_alt(PV_SAVE_REGS
1434 PARAVIRT_CALL 1444 PARAVIRT_CALL
1435 PV_RESTORE_REGS) 1445 PV_RESTORE_REGS)
1436 : 1446 :
1437 : paravirt_type(pv_irq_ops.irq_enable), 1447 : paravirt_type(pv_irq_ops.irq_enable),
1438 paravirt_clobber(CLBR_EAX) 1448 paravirt_clobber(CLBR_EAX)
1439 : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); 1449 : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
1440 } 1450 }
1441 1451
1442 static inline unsigned long __raw_local_irq_save(void) 1452 static inline unsigned long __raw_local_irq_save(void)
1443 { 1453 {
1444 unsigned long f; 1454 unsigned long f;
1445 1455
1446 f = __raw_local_save_flags(); 1456 f = __raw_local_save_flags();
1447 raw_local_irq_disable(); 1457 raw_local_irq_disable();
1448 return f; 1458 return f;
1449 } 1459 }
1450 1460
1451 /* Make sure as little as possible of this mess escapes. */ 1461 /* Make sure as little as possible of this mess escapes. */
1452 #undef PARAVIRT_CALL 1462 #undef PARAVIRT_CALL
1453 #undef __PVOP_CALL 1463 #undef __PVOP_CALL
1454 #undef __PVOP_VCALL 1464 #undef __PVOP_VCALL
1455 #undef PVOP_VCALL0 1465 #undef PVOP_VCALL0
1456 #undef PVOP_CALL0 1466 #undef PVOP_CALL0
1457 #undef PVOP_VCALL1 1467 #undef PVOP_VCALL1
1458 #undef PVOP_CALL1 1468 #undef PVOP_CALL1
1459 #undef PVOP_VCALL2 1469 #undef PVOP_VCALL2
1460 #undef PVOP_CALL2 1470 #undef PVOP_CALL2
1461 #undef PVOP_VCALL3 1471 #undef PVOP_VCALL3
1462 #undef PVOP_CALL3 1472 #undef PVOP_CALL3
1463 #undef PVOP_VCALL4 1473 #undef PVOP_VCALL4
1464 #undef PVOP_CALL4 1474 #undef PVOP_CALL4
1465 1475
1466 #else /* __ASSEMBLY__ */ 1476 #else /* __ASSEMBLY__ */
1467 1477
1468 #define _PVSITE(ptype, clobbers, ops, word, algn) \ 1478 #define _PVSITE(ptype, clobbers, ops, word, algn) \
1469 771:; \ 1479 771:; \
1470 ops; \ 1480 ops; \
1471 772:; \ 1481 772:; \
1472 .pushsection .parainstructions,"a"; \ 1482 .pushsection .parainstructions,"a"; \
1473 .align algn; \ 1483 .align algn; \
1474 word 771b; \ 1484 word 771b; \
1475 .byte ptype; \ 1485 .byte ptype; \
1476 .byte 772b-771b; \ 1486 .byte 772b-771b; \
1477 .short clobbers; \ 1487 .short clobbers; \
1478 .popsection 1488 .popsection
1479 1489
1480 1490
1481 #ifdef CONFIG_X86_64 1491 #ifdef CONFIG_X86_64
1482 #define PV_SAVE_REGS pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx 1492 #define PV_SAVE_REGS pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
1483 #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax 1493 #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
1484 #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) 1494 #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
1485 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) 1495 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
1486 #define PARA_INDIRECT(addr) *addr(%rip) 1496 #define PARA_INDIRECT(addr) *addr(%rip)
1487 #else 1497 #else
1488 #define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx 1498 #define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx
1489 #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax 1499 #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
1490 #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) 1500 #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
1491 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) 1501 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
1492 #define PARA_INDIRECT(addr) *%cs:addr 1502 #define PARA_INDIRECT(addr) *%cs:addr
1493 #endif 1503 #endif
1494 1504
1495 #define INTERRUPT_RETURN \ 1505 #define INTERRUPT_RETURN \
1496 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ 1506 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
1497 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) 1507 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
1498 1508
1499 #define DISABLE_INTERRUPTS(clobbers) \ 1509 #define DISABLE_INTERRUPTS(clobbers) \
1500 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ 1510 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
1501 PV_SAVE_REGS; \ 1511 PV_SAVE_REGS; \
1502 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ 1512 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
1503 PV_RESTORE_REGS;) \ 1513 PV_RESTORE_REGS;) \
1504 1514
1505 #define ENABLE_INTERRUPTS(clobbers) \ 1515 #define ENABLE_INTERRUPTS(clobbers) \
1506 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ 1516 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
1507 PV_SAVE_REGS; \ 1517 PV_SAVE_REGS; \
1508 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ 1518 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
1509 PV_RESTORE_REGS;) 1519 PV_RESTORE_REGS;)
1510 1520
1511 #define USERGS_SYSRET32 \ 1521 #define USERGS_SYSRET32 \
1512 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ 1522 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
1513 CLBR_NONE, \ 1523 CLBR_NONE, \
1514 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) 1524 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
1515 1525
1516 #ifdef CONFIG_X86_32 1526 #ifdef CONFIG_X86_32
1517 #define GET_CR0_INTO_EAX \ 1527 #define GET_CR0_INTO_EAX \
1518 push %ecx; push %edx; \ 1528 push %ecx; push %edx; \
1519 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ 1529 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
1520 pop %edx; pop %ecx 1530 pop %edx; pop %ecx
1521 1531
1522 #define ENABLE_INTERRUPTS_SYSEXIT \ 1532 #define ENABLE_INTERRUPTS_SYSEXIT \
1523 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ 1533 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
1524 CLBR_NONE, \ 1534 CLBR_NONE, \
1525 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) 1535 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1526 1536
1527 1537
1528 #else /* !CONFIG_X86_32 */ 1538 #else /* !CONFIG_X86_32 */
1529 1539
1530 /* 1540 /*
1531 * If swapgs is used while the userspace stack is still current, 1541 * If swapgs is used while the userspace stack is still current,
1532 * there's no way to call a pvop. The PV replacement *must* be 1542 * there's no way to call a pvop. The PV replacement *must* be
1533 * inlined, or the swapgs instruction must be trapped and emulated. 1543 * inlined, or the swapgs instruction must be trapped and emulated.
1534 */ 1544 */
1535 #define SWAPGS_UNSAFE_STACK \ 1545 #define SWAPGS_UNSAFE_STACK \
1536 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 1546 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
1537 swapgs) 1547 swapgs)
1538 1548
1539 #define SWAPGS \ 1549 #define SWAPGS \
1540 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 1550 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
1541 PV_SAVE_REGS; \ 1551 PV_SAVE_REGS; \
1542 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ 1552 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
1543 PV_RESTORE_REGS \ 1553 PV_RESTORE_REGS \
1544 ) 1554 )
1545 1555
1546 #define GET_CR2_INTO_RCX \ 1556 #define GET_CR2_INTO_RCX \
1547 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \ 1557 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \
1548 movq %rax, %rcx; \ 1558 movq %rax, %rcx; \
1549 xorq %rax, %rax; 1559 xorq %rax, %rax;
1550 1560
1551 #define PARAVIRT_ADJUST_EXCEPTION_FRAME \ 1561 #define PARAVIRT_ADJUST_EXCEPTION_FRAME \
1552 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ 1562 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
1553 CLBR_NONE, \ 1563 CLBR_NONE, \
1554 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) 1564 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
1555 1565
1556 #define USERGS_SYSRET64 \ 1566 #define USERGS_SYSRET64 \
1557 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 1567 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
1558 CLBR_NONE, \ 1568 CLBR_NONE, \
1559 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 1569 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
1560 1570
1561 #define ENABLE_INTERRUPTS_SYSEXIT32 \ 1571 #define ENABLE_INTERRUPTS_SYSEXIT32 \
1562 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ 1572 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
1563 CLBR_NONE, \ 1573 CLBR_NONE, \
1564 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) 1574 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1565 #endif /* CONFIG_X86_32 */ 1575 #endif /* CONFIG_X86_32 */
1566 1576
1567 #endif /* __ASSEMBLY__ */ 1577 #endif /* __ASSEMBLY__ */
1568 #endif /* CONFIG_PARAVIRT */ 1578 #endif /* CONFIG_PARAVIRT */
1569 #endif /* __ASM_PARAVIRT_H */ 1579 #endif /* __ASM_PARAVIRT_H */
1570 1580
include/asm-x86/system.h
1 #ifndef _ASM_X86_SYSTEM_H_ 1 #ifndef _ASM_X86_SYSTEM_H_
2 #define _ASM_X86_SYSTEM_H_ 2 #define _ASM_X86_SYSTEM_H_
3 3
4 #include <asm/asm.h> 4 #include <asm/asm.h>
5 #include <asm/segment.h> 5 #include <asm/segment.h>
6 #include <asm/cpufeature.h> 6 #include <asm/cpufeature.h>
7 #include <asm/cmpxchg.h> 7 #include <asm/cmpxchg.h>
8 #include <asm/nops.h> 8 #include <asm/nops.h>
9 9
10 #include <linux/kernel.h> 10 #include <linux/kernel.h>
11 #include <linux/irqflags.h> 11 #include <linux/irqflags.h>
12 12
13 /* entries in ARCH_DLINFO: */ 13 /* entries in ARCH_DLINFO: */
14 #ifdef CONFIG_IA32_EMULATION 14 #ifdef CONFIG_IA32_EMULATION
15 # define AT_VECTOR_SIZE_ARCH 2 15 # define AT_VECTOR_SIZE_ARCH 2
16 #else 16 #else
17 # define AT_VECTOR_SIZE_ARCH 1 17 # define AT_VECTOR_SIZE_ARCH 1
18 #endif 18 #endif
19 19
20 #ifdef CONFIG_X86_32 20 #ifdef CONFIG_X86_32
21 21
22 struct task_struct; /* one of the stranger aspects of C forward declarations */ 22 struct task_struct; /* one of the stranger aspects of C forward declarations */
23 struct task_struct *__switch_to(struct task_struct *prev, 23 struct task_struct *__switch_to(struct task_struct *prev,
24 struct task_struct *next); 24 struct task_struct *next);
25 25
26 /* 26 /*
27 * Saving eflags is important. It switches not only IOPL between tasks, 27 * Saving eflags is important. It switches not only IOPL between tasks,
28 * it also protects other tasks from NT leaking through sysenter etc. 28 * it also protects other tasks from NT leaking through sysenter etc.
29 */ 29 */
30 #define switch_to(prev, next, last) \ 30 #define switch_to(prev, next, last) \
31 do { \ 31 do { \
32 /* \ 32 /* \
33 * Context-switching clobbers all registers, so we clobber \ 33 * Context-switching clobbers all registers, so we clobber \
34 * them explicitly, via unused output variables. \ 34 * them explicitly, via unused output variables. \
35 * (EAX and EBP is not listed because EBP is saved/restored \ 35 * (EAX and EBP is not listed because EBP is saved/restored \
36 * explicitly for wchan access and EAX is the return value of \ 36 * explicitly for wchan access and EAX is the return value of \
37 * __switch_to()) \ 37 * __switch_to()) \
38 */ \ 38 */ \
39 unsigned long ebx, ecx, edx, esi, edi; \ 39 unsigned long ebx, ecx, edx, esi, edi; \
40 \ 40 \
41 asm volatile("pushfl\n\t" /* save flags */ \ 41 asm volatile("pushfl\n\t" /* save flags */ \
42 "pushl %%ebp\n\t" /* save EBP */ \ 42 "pushl %%ebp\n\t" /* save EBP */ \
43 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ 43 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
44 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ 44 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
45 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ 45 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
46 "pushl %[next_ip]\n\t" /* restore EIP */ \ 46 "pushl %[next_ip]\n\t" /* restore EIP */ \
47 "jmp __switch_to\n" /* regparm call */ \ 47 "jmp __switch_to\n" /* regparm call */ \
48 "1:\t" \ 48 "1:\t" \
49 "popl %%ebp\n\t" /* restore EBP */ \ 49 "popl %%ebp\n\t" /* restore EBP */ \
50 "popfl\n" /* restore flags */ \ 50 "popfl\n" /* restore flags */ \
51 \ 51 \
52 /* output parameters */ \ 52 /* output parameters */ \
53 : [prev_sp] "=m" (prev->thread.sp), \ 53 : [prev_sp] "=m" (prev->thread.sp), \
54 [prev_ip] "=m" (prev->thread.ip), \ 54 [prev_ip] "=m" (prev->thread.ip), \
55 "=a" (last), \ 55 "=a" (last), \
56 \ 56 \
57 /* clobbered output registers: */ \ 57 /* clobbered output registers: */ \
58 "=b" (ebx), "=c" (ecx), "=d" (edx), \ 58 "=b" (ebx), "=c" (ecx), "=d" (edx), \
59 "=S" (esi), "=D" (edi) \ 59 "=S" (esi), "=D" (edi) \
60 \ 60 \
61 /* input parameters: */ \ 61 /* input parameters: */ \
62 : [next_sp] "m" (next->thread.sp), \ 62 : [next_sp] "m" (next->thread.sp), \
63 [next_ip] "m" (next->thread.ip), \ 63 [next_ip] "m" (next->thread.ip), \
64 \ 64 \
65 /* regparm parameters for __switch_to(): */ \ 65 /* regparm parameters for __switch_to(): */ \
66 [prev] "a" (prev), \ 66 [prev] "a" (prev), \
67 [next] "d" (next)); \ 67 [next] "d" (next)); \
68 } while (0) 68 } while (0)
69 69
70 /* 70 /*
71 * disable hlt during certain critical i/o operations 71 * disable hlt during certain critical i/o operations
72 */ 72 */
73 #define HAVE_DISABLE_HLT 73 #define HAVE_DISABLE_HLT
74 #else 74 #else
75 #define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" 75 #define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
76 #define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" 76 #define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
77 77
78 /* frame pointer must be last for get_wchan */ 78 /* frame pointer must be last for get_wchan */
79 #define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" 79 #define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
80 #define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" 80 #define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
81 81
82 #define __EXTRA_CLOBBER \ 82 #define __EXTRA_CLOBBER \
83 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ 83 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
84 "r12", "r13", "r14", "r15" 84 "r12", "r13", "r14", "r15"
85 85
86 /* Save restore flags to clear handle leaking NT */ 86 /* Save restore flags to clear handle leaking NT */
87 #define switch_to(prev, next, last) \ 87 #define switch_to(prev, next, last) \
88 asm volatile(SAVE_CONTEXT \ 88 asm volatile(SAVE_CONTEXT \
89 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ 89 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
90 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ 90 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
91 "call __switch_to\n\t" \ 91 "call __switch_to\n\t" \
92 ".globl thread_return\n" \ 92 ".globl thread_return\n" \
93 "thread_return:\n\t" \ 93 "thread_return:\n\t" \
94 "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ 94 "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
95 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 95 "movq %P[thread_info](%%rsi),%%r8\n\t" \
96 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ 96 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
97 "movq %%rax,%%rdi\n\t" \ 97 "movq %%rax,%%rdi\n\t" \
98 "jc ret_from_fork\n\t" \ 98 "jc ret_from_fork\n\t" \
99 RESTORE_CONTEXT \ 99 RESTORE_CONTEXT \
100 : "=a" (last) \ 100 : "=a" (last) \
101 : [next] "S" (next), [prev] "D" (prev), \ 101 : [next] "S" (next), [prev] "D" (prev), \
102 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ 102 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
103 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 103 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
104 [tif_fork] "i" (TIF_FORK), \ 104 [tif_fork] "i" (TIF_FORK), \
105 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 105 [thread_info] "i" (offsetof(struct task_struct, stack)), \
106 [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ 106 [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
107 : "memory", "cc" __EXTRA_CLOBBER) 107 : "memory", "cc" __EXTRA_CLOBBER)
108 #endif 108 #endif
109 109
110 #ifdef __KERNEL__ 110 #ifdef __KERNEL__
111 #define _set_base(addr, base) do { unsigned long __pr; \ 111 #define _set_base(addr, base) do { unsigned long __pr; \
112 __asm__ __volatile__ ("movw %%dx,%1\n\t" \ 112 __asm__ __volatile__ ("movw %%dx,%1\n\t" \
113 "rorl $16,%%edx\n\t" \ 113 "rorl $16,%%edx\n\t" \
114 "movb %%dl,%2\n\t" \ 114 "movb %%dl,%2\n\t" \
115 "movb %%dh,%3" \ 115 "movb %%dh,%3" \
116 :"=&d" (__pr) \ 116 :"=&d" (__pr) \
117 :"m" (*((addr)+2)), \ 117 :"m" (*((addr)+2)), \
118 "m" (*((addr)+4)), \ 118 "m" (*((addr)+4)), \
119 "m" (*((addr)+7)), \ 119 "m" (*((addr)+7)), \
120 "0" (base) \ 120 "0" (base) \
121 ); } while (0) 121 ); } while (0)
122 122
123 #define _set_limit(addr, limit) do { unsigned long __lr; \ 123 #define _set_limit(addr, limit) do { unsigned long __lr; \
124 __asm__ __volatile__ ("movw %%dx,%1\n\t" \ 124 __asm__ __volatile__ ("movw %%dx,%1\n\t" \
125 "rorl $16,%%edx\n\t" \ 125 "rorl $16,%%edx\n\t" \
126 "movb %2,%%dh\n\t" \ 126 "movb %2,%%dh\n\t" \
127 "andb $0xf0,%%dh\n\t" \ 127 "andb $0xf0,%%dh\n\t" \
128 "orb %%dh,%%dl\n\t" \ 128 "orb %%dh,%%dl\n\t" \
129 "movb %%dl,%2" \ 129 "movb %%dl,%2" \
130 :"=&d" (__lr) \ 130 :"=&d" (__lr) \
131 :"m" (*(addr)), \ 131 :"m" (*(addr)), \
132 "m" (*((addr)+6)), \ 132 "m" (*((addr)+6)), \
133 "0" (limit) \ 133 "0" (limit) \
134 ); } while (0) 134 ); } while (0)
135 135
136 #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) 136 #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
137 #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) 137 #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
138 138
139 extern void load_gs_index(unsigned); 139 extern void native_load_gs_index(unsigned);
140 140
141 /* 141 /*
142 * Load a segment. Fall back on loading the zero 142 * Load a segment. Fall back on loading the zero
143 * segment if something goes wrong.. 143 * segment if something goes wrong..
144 */ 144 */
145 #define loadsegment(seg, value) \ 145 #define loadsegment(seg, value) \
146 asm volatile("\n" \ 146 asm volatile("\n" \
147 "1:\t" \ 147 "1:\t" \
148 "movl %k0,%%" #seg "\n" \ 148 "movl %k0,%%" #seg "\n" \
149 "2:\n" \ 149 "2:\n" \
150 ".section .fixup,\"ax\"\n" \ 150 ".section .fixup,\"ax\"\n" \
151 "3:\t" \ 151 "3:\t" \
152 "movl %k1, %%" #seg "\n\t" \ 152 "movl %k1, %%" #seg "\n\t" \
153 "jmp 2b\n" \ 153 "jmp 2b\n" \
154 ".previous\n" \ 154 ".previous\n" \
155 _ASM_EXTABLE(1b,3b) \ 155 _ASM_EXTABLE(1b,3b) \
156 : :"r" (value), "r" (0) : "memory") 156 : :"r" (value), "r" (0) : "memory")
157 157
158 158
159 /* 159 /*
160 * Save a segment register away 160 * Save a segment register away
161 */ 161 */
162 #define savesegment(seg, value) \ 162 #define savesegment(seg, value) \
163 asm("mov %%" #seg ",%0":"=rm" (value) : : "memory") 163 asm("mov %%" #seg ",%0":"=rm" (value) : : "memory")
164 164
165 static inline unsigned long get_limit(unsigned long segment) 165 static inline unsigned long get_limit(unsigned long segment)
166 { 166 {
167 unsigned long __limit; 167 unsigned long __limit;
168 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); 168 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
169 return __limit + 1; 169 return __limit + 1;
170 } 170 }
171 171
172 static inline void native_clts(void) 172 static inline void native_clts(void)
173 { 173 {
174 asm volatile("clts"); 174 asm volatile("clts");
175 } 175 }
176 176
177 /* 177 /*
178 * Volatile isn't enough to prevent the compiler from reordering the 178 * Volatile isn't enough to prevent the compiler from reordering the
179 * read/write functions for the control registers and messing everything up. 179 * read/write functions for the control registers and messing everything up.
180 * A memory clobber would solve the problem, but would prevent reordering of 180 * A memory clobber would solve the problem, but would prevent reordering of
181 * all loads stores around it, which can hurt performance. Solution is to 181 * all loads stores around it, which can hurt performance. Solution is to
182 * use a variable and mimic reads and writes to it to enforce serialization 182 * use a variable and mimic reads and writes to it to enforce serialization
183 */ 183 */
184 static unsigned long __force_order; 184 static unsigned long __force_order;
185 185
186 static inline unsigned long native_read_cr0(void) 186 static inline unsigned long native_read_cr0(void)
187 { 187 {
188 unsigned long val; 188 unsigned long val;
189 asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); 189 asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
190 return val; 190 return val;
191 } 191 }
192 192
193 static inline void native_write_cr0(unsigned long val) 193 static inline void native_write_cr0(unsigned long val)
194 { 194 {
195 asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); 195 asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
196 } 196 }
197 197
198 static inline unsigned long native_read_cr2(void) 198 static inline unsigned long native_read_cr2(void)
199 { 199 {
200 unsigned long val; 200 unsigned long val;
201 asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); 201 asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
202 return val; 202 return val;
203 } 203 }
204 204
205 static inline void native_write_cr2(unsigned long val) 205 static inline void native_write_cr2(unsigned long val)
206 { 206 {
207 asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); 207 asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
208 } 208 }
209 209
210 static inline unsigned long native_read_cr3(void) 210 static inline unsigned long native_read_cr3(void)
211 { 211 {
212 unsigned long val; 212 unsigned long val;
213 asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); 213 asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
214 return val; 214 return val;
215 } 215 }
216 216
217 static inline void native_write_cr3(unsigned long val) 217 static inline void native_write_cr3(unsigned long val)
218 { 218 {
219 asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); 219 asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
220 } 220 }
221 221
222 static inline unsigned long native_read_cr4(void) 222 static inline unsigned long native_read_cr4(void)
223 { 223 {
224 unsigned long val; 224 unsigned long val;
225 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); 225 asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
226 return val; 226 return val;
227 } 227 }
228 228
229 static inline unsigned long native_read_cr4_safe(void) 229 static inline unsigned long native_read_cr4_safe(void)
230 { 230 {
231 unsigned long val; 231 unsigned long val;
232 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always 232 /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
233 * exists, so it will never fail. */ 233 * exists, so it will never fail. */
234 #ifdef CONFIG_X86_32 234 #ifdef CONFIG_X86_32
235 asm volatile("1: mov %%cr4, %0\n" 235 asm volatile("1: mov %%cr4, %0\n"
236 "2:\n" 236 "2:\n"
237 _ASM_EXTABLE(1b, 2b) 237 _ASM_EXTABLE(1b, 2b)
238 : "=r" (val), "=m" (__force_order) : "0" (0)); 238 : "=r" (val), "=m" (__force_order) : "0" (0));
239 #else 239 #else
240 val = native_read_cr4(); 240 val = native_read_cr4();
241 #endif 241 #endif
242 return val; 242 return val;
243 } 243 }
244 244
245 static inline void native_write_cr4(unsigned long val) 245 static inline void native_write_cr4(unsigned long val)
246 { 246 {
247 asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); 247 asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
248 } 248 }
249 249
250 #ifdef CONFIG_X86_64 250 #ifdef CONFIG_X86_64
251 static inline unsigned long native_read_cr8(void) 251 static inline unsigned long native_read_cr8(void)
252 { 252 {
253 unsigned long cr8; 253 unsigned long cr8;
254 asm volatile("movq %%cr8,%0" : "=r" (cr8)); 254 asm volatile("movq %%cr8,%0" : "=r" (cr8));
255 return cr8; 255 return cr8;
256 } 256 }
257 257
258 static inline void native_write_cr8(unsigned long val) 258 static inline void native_write_cr8(unsigned long val)
259 { 259 {
260 asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); 260 asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
261 } 261 }
262 #endif 262 #endif
263 263
264 static inline void native_wbinvd(void) 264 static inline void native_wbinvd(void)
265 { 265 {
266 asm volatile("wbinvd": : :"memory"); 266 asm volatile("wbinvd": : :"memory");
267 } 267 }
268 268
269 #ifdef CONFIG_PARAVIRT 269 #ifdef CONFIG_PARAVIRT
270 #include <asm/paravirt.h> 270 #include <asm/paravirt.h>
271 #else 271 #else
272 #define read_cr0() (native_read_cr0()) 272 #define read_cr0() (native_read_cr0())
273 #define write_cr0(x) (native_write_cr0(x)) 273 #define write_cr0(x) (native_write_cr0(x))
274 #define read_cr2() (native_read_cr2()) 274 #define read_cr2() (native_read_cr2())
275 #define write_cr2(x) (native_write_cr2(x)) 275 #define write_cr2(x) (native_write_cr2(x))
276 #define read_cr3() (native_read_cr3()) 276 #define read_cr3() (native_read_cr3())
277 #define write_cr3(x) (native_write_cr3(x)) 277 #define write_cr3(x) (native_write_cr3(x))
278 #define read_cr4() (native_read_cr4()) 278 #define read_cr4() (native_read_cr4())
279 #define read_cr4_safe() (native_read_cr4_safe()) 279 #define read_cr4_safe() (native_read_cr4_safe())
280 #define write_cr4(x) (native_write_cr4(x)) 280 #define write_cr4(x) (native_write_cr4(x))
281 #define wbinvd() (native_wbinvd()) 281 #define wbinvd() (native_wbinvd())
282 #ifdef CONFIG_X86_64 282 #ifdef CONFIG_X86_64
283 #define read_cr8() (native_read_cr8()) 283 #define read_cr8() (native_read_cr8())
284 #define write_cr8(x) (native_write_cr8(x)) 284 #define write_cr8(x) (native_write_cr8(x))
285 #define load_gs_index native_load_gs_index
285 #endif 286 #endif
286 287
287 /* Clear the 'TS' bit */ 288 /* Clear the 'TS' bit */
288 #define clts() (native_clts()) 289 #define clts() (native_clts())
289 290
290 #endif/* CONFIG_PARAVIRT */ 291 #endif/* CONFIG_PARAVIRT */
291 292
292 #define stts() write_cr0(read_cr0() | X86_CR0_TS) 293 #define stts() write_cr0(read_cr0() | X86_CR0_TS)
293 294
294 #endif /* __KERNEL__ */ 295 #endif /* __KERNEL__ */
295 296
296 static inline void clflush(volatile void *__p) 297 static inline void clflush(volatile void *__p)
297 { 298 {
298 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); 299 asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
299 } 300 }
300 301
301 #define nop() asm volatile ("nop") 302 #define nop() asm volatile ("nop")
302 303
303 void disable_hlt(void); 304 void disable_hlt(void);
304 void enable_hlt(void); 305 void enable_hlt(void);
305 306
306 void cpu_idle_wait(void); 307 void cpu_idle_wait(void);
307 308
308 extern unsigned long arch_align_stack(unsigned long sp); 309 extern unsigned long arch_align_stack(unsigned long sp);
309 extern void free_init_pages(char *what, unsigned long begin, unsigned long end); 310 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
310 311
311 void default_idle(void); 312 void default_idle(void);
312 313
313 /* 314 /*
314 * Force strict CPU ordering. 315 * Force strict CPU ordering.
315 * And yes, this is required on UP too when we're talking 316 * And yes, this is required on UP too when we're talking
316 * to devices. 317 * to devices.
317 */ 318 */
318 #ifdef CONFIG_X86_32 319 #ifdef CONFIG_X86_32
319 /* 320 /*
320 * Some non-Intel clones support out of order store. wmb() ceases to be a 321 * Some non-Intel clones support out of order store. wmb() ceases to be a
321 * nop for these. 322 * nop for these.
322 */ 323 */
323 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) 324 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
324 #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) 325 #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
325 #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) 326 #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
326 #else 327 #else
327 #define mb() asm volatile("mfence":::"memory") 328 #define mb() asm volatile("mfence":::"memory")
328 #define rmb() asm volatile("lfence":::"memory") 329 #define rmb() asm volatile("lfence":::"memory")
329 #define wmb() asm volatile("sfence" ::: "memory") 330 #define wmb() asm volatile("sfence" ::: "memory")
330 #endif 331 #endif
331 332
332 /** 333 /**
333 * read_barrier_depends - Flush all pending reads that subsequents reads 334 * read_barrier_depends - Flush all pending reads that subsequents reads
334 * depend on. 335 * depend on.
335 * 336 *
336 * No data-dependent reads from memory-like regions are ever reordered 337 * No data-dependent reads from memory-like regions are ever reordered
337 * over this barrier. All reads preceding this primitive are guaranteed 338 * over this barrier. All reads preceding this primitive are guaranteed
338 * to access memory (but not necessarily other CPUs' caches) before any 339 * to access memory (but not necessarily other CPUs' caches) before any
339 * reads following this primitive that depend on the data return by 340 * reads following this primitive that depend on the data return by
340 * any of the preceding reads. This primitive is much lighter weight than 341 * any of the preceding reads. This primitive is much lighter weight than
341 * rmb() on most CPUs, and is never heavier weight than is 342 * rmb() on most CPUs, and is never heavier weight than is
342 * rmb(). 343 * rmb().
343 * 344 *
344 * These ordering constraints are respected by both the local CPU 345 * These ordering constraints are respected by both the local CPU
345 * and the compiler. 346 * and the compiler.
346 * 347 *
347 * Ordering is not guaranteed by anything other than these primitives, 348 * Ordering is not guaranteed by anything other than these primitives,
348 * not even by data dependencies. See the documentation for 349 * not even by data dependencies. See the documentation for
349 * memory_barrier() for examples and URLs to more information. 350 * memory_barrier() for examples and URLs to more information.
350 * 351 *
351 * For example, the following code would force ordering (the initial 352 * For example, the following code would force ordering (the initial
352 * value of "a" is zero, "b" is one, and "p" is "&a"): 353 * value of "a" is zero, "b" is one, and "p" is "&a"):
353 * 354 *
354 * <programlisting> 355 * <programlisting>
355 * CPU 0 CPU 1 356 * CPU 0 CPU 1
356 * 357 *
357 * b = 2; 358 * b = 2;
358 * memory_barrier(); 359 * memory_barrier();
359 * p = &b; q = p; 360 * p = &b; q = p;
360 * read_barrier_depends(); 361 * read_barrier_depends();
361 * d = *q; 362 * d = *q;
362 * </programlisting> 363 * </programlisting>
363 * 364 *
364 * because the read of "*q" depends on the read of "p" and these 365 * because the read of "*q" depends on the read of "p" and these
365 * two reads are separated by a read_barrier_depends(). However, 366 * two reads are separated by a read_barrier_depends(). However,
366 * the following code, with the same initial values for "a" and "b": 367 * the following code, with the same initial values for "a" and "b":
367 * 368 *
368 * <programlisting> 369 * <programlisting>
369 * CPU 0 CPU 1 370 * CPU 0 CPU 1
370 * 371 *
371 * a = 2; 372 * a = 2;
372 * memory_barrier(); 373 * memory_barrier();
373 * b = 3; y = b; 374 * b = 3; y = b;
374 * read_barrier_depends(); 375 * read_barrier_depends();
375 * x = a; 376 * x = a;
376 * </programlisting> 377 * </programlisting>
377 * 378 *
378 * does not enforce ordering, since there is no data dependency between 379 * does not enforce ordering, since there is no data dependency between
379 * the read of "a" and the read of "b". Therefore, on some CPUs, such 380 * the read of "a" and the read of "b". Therefore, on some CPUs, such
380 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() 381 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
381 * in cases like this where there are no data dependencies. 382 * in cases like this where there are no data dependencies.
382 **/ 383 **/
383 384
384 #define read_barrier_depends() do { } while (0) 385 #define read_barrier_depends() do { } while (0)
385 386
386 #ifdef CONFIG_SMP 387 #ifdef CONFIG_SMP
387 #define smp_mb() mb() 388 #define smp_mb() mb()
388 #ifdef CONFIG_X86_PPRO_FENCE 389 #ifdef CONFIG_X86_PPRO_FENCE
389 # define smp_rmb() rmb() 390 # define smp_rmb() rmb()
390 #else 391 #else
391 # define smp_rmb() barrier() 392 # define smp_rmb() barrier()
392 #endif 393 #endif
393 #ifdef CONFIG_X86_OOSTORE 394 #ifdef CONFIG_X86_OOSTORE
394 # define smp_wmb() wmb() 395 # define smp_wmb() wmb()
395 #else 396 #else
396 # define smp_wmb() barrier() 397 # define smp_wmb() barrier()
397 #endif 398 #endif
398 #define smp_read_barrier_depends() read_barrier_depends() 399 #define smp_read_barrier_depends() read_barrier_depends()
399 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 400 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
400 #else 401 #else
401 #define smp_mb() barrier() 402 #define smp_mb() barrier()
402 #define smp_rmb() barrier() 403 #define smp_rmb() barrier()
403 #define smp_wmb() barrier() 404 #define smp_wmb() barrier()
404 #define smp_read_barrier_depends() do { } while (0) 405 #define smp_read_barrier_depends() do { } while (0)
405 #define set_mb(var, value) do { var = value; barrier(); } while (0) 406 #define set_mb(var, value) do { var = value; barrier(); } while (0)
406 #endif 407 #endif
407 408
408 /* 409 /*
409 * Stop RDTSC speculation. This is needed when you need to use RDTSC 410 * Stop RDTSC speculation. This is needed when you need to use RDTSC
410 * (or get_cycles or vread that possibly accesses the TSC) in a defined 411 * (or get_cycles or vread that possibly accesses the TSC) in a defined
411 * code region. 412 * code region.
412 * 413 *
413 * (Could use an alternative three way for this if there was one.) 414 * (Could use an alternative three way for this if there was one.)
414 */ 415 */
415 static inline void rdtsc_barrier(void) 416 static inline void rdtsc_barrier(void)
416 { 417 {
417 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); 418 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
418 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 419 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
419 } 420 }
420 421
421 #endif 422 #endif
422 423