Commit 63bcff2a307b9bcc712a8251eb27df8b2e117967

Authored by H. Peter Anvin
1 parent a052858fab

x86, smap: Add STAC and CLAC instructions to control user space access

When Supervisor Mode Access Prevention (SMAP) is enabled, access to
userspace from the kernel is controlled by the AC flag.  To make the
performance of manipulating that flag acceptable, there are two new
instructions, STAC and CLAC, to set and clear it.

This patch adds those instructions, via alternative(), when the SMAP
feature is enabled.  It also adds X86_EFLAGS_AC unconditionally to the
SYSCALL entry mask; there is simply no reason to make that one
conditional.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1348256595-29119-9-git-send-email-hpa@linux.intel.com

Showing 14 changed files with 106 additions and 32 deletions Inline Diff

arch/x86/ia32/ia32entry.S
1 /* 1 /*
2 * Compatibility mode system call entry point for x86-64. 2 * Compatibility mode system call entry point for x86-64.
3 * 3 *
4 * Copyright 2000-2002 Andi Kleen, SuSE Labs. 4 * Copyright 2000-2002 Andi Kleen, SuSE Labs.
5 */ 5 */
6 6
7 #include <asm/dwarf2.h> 7 #include <asm/dwarf2.h>
8 #include <asm/calling.h> 8 #include <asm/calling.h>
9 #include <asm/asm-offsets.h> 9 #include <asm/asm-offsets.h>
10 #include <asm/current.h> 10 #include <asm/current.h>
11 #include <asm/errno.h> 11 #include <asm/errno.h>
12 #include <asm/ia32_unistd.h> 12 #include <asm/ia32_unistd.h>
13 #include <asm/thread_info.h> 13 #include <asm/thread_info.h>
14 #include <asm/segment.h> 14 #include <asm/segment.h>
15 #include <asm/irqflags.h> 15 #include <asm/irqflags.h>
16 #include <asm/asm.h> 16 #include <asm/asm.h>
17 #include <asm/smap.h>
17 #include <linux/linkage.h> 18 #include <linux/linkage.h>
18 #include <linux/err.h> 19 #include <linux/err.h>
19 20
20 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 21 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
21 #include <linux/elf-em.h> 22 #include <linux/elf-em.h>
22 #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) 23 #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
23 #define __AUDIT_ARCH_LE 0x40000000 24 #define __AUDIT_ARCH_LE 0x40000000
24 25
25 #ifndef CONFIG_AUDITSYSCALL 26 #ifndef CONFIG_AUDITSYSCALL
26 #define sysexit_audit ia32_ret_from_sys_call 27 #define sysexit_audit ia32_ret_from_sys_call
27 #define sysretl_audit ia32_ret_from_sys_call 28 #define sysretl_audit ia32_ret_from_sys_call
28 #endif 29 #endif
29 30
30 .section .entry.text, "ax" 31 .section .entry.text, "ax"
31 32
32 .macro IA32_ARG_FIXUP noebp=0 33 .macro IA32_ARG_FIXUP noebp=0
33 movl %edi,%r8d 34 movl %edi,%r8d
34 .if \noebp 35 .if \noebp
35 .else 36 .else
36 movl %ebp,%r9d 37 movl %ebp,%r9d
37 .endif 38 .endif
38 xchg %ecx,%esi 39 xchg %ecx,%esi
39 movl %ebx,%edi 40 movl %ebx,%edi
40 movl %edx,%edx /* zero extension */ 41 movl %edx,%edx /* zero extension */
41 .endm 42 .endm
42 43
43 /* clobbers %eax */ 44 /* clobbers %eax */
44 .macro CLEAR_RREGS offset=0, _r9=rax 45 .macro CLEAR_RREGS offset=0, _r9=rax
45 xorl %eax,%eax 46 xorl %eax,%eax
46 movq %rax,\offset+R11(%rsp) 47 movq %rax,\offset+R11(%rsp)
47 movq %rax,\offset+R10(%rsp) 48 movq %rax,\offset+R10(%rsp)
48 movq %\_r9,\offset+R9(%rsp) 49 movq %\_r9,\offset+R9(%rsp)
49 movq %rax,\offset+R8(%rsp) 50 movq %rax,\offset+R8(%rsp)
50 .endm 51 .endm
51 52
52 /* 53 /*
53 * Reload arg registers from stack in case ptrace changed them. 54 * Reload arg registers from stack in case ptrace changed them.
54 * We don't reload %eax because syscall_trace_enter() returned 55 * We don't reload %eax because syscall_trace_enter() returned
55 * the %rax value we should see. Instead, we just truncate that 56 * the %rax value we should see. Instead, we just truncate that
56 * value to 32 bits again as we did on entry from user mode. 57 * value to 32 bits again as we did on entry from user mode.
57 * If it's a new value set by user_regset during entry tracing, 58 * If it's a new value set by user_regset during entry tracing,
58 * this matches the normal truncation of the user-mode value. 59 * this matches the normal truncation of the user-mode value.
59 * If it's -1 to make us punt the syscall, then (u32)-1 is still 60 * If it's -1 to make us punt the syscall, then (u32)-1 is still
60 * an appropriately invalid value. 61 * an appropriately invalid value.
61 */ 62 */
62 .macro LOAD_ARGS32 offset, _r9=0 63 .macro LOAD_ARGS32 offset, _r9=0
63 .if \_r9 64 .if \_r9
64 movl \offset+16(%rsp),%r9d 65 movl \offset+16(%rsp),%r9d
65 .endif 66 .endif
66 movl \offset+40(%rsp),%ecx 67 movl \offset+40(%rsp),%ecx
67 movl \offset+48(%rsp),%edx 68 movl \offset+48(%rsp),%edx
68 movl \offset+56(%rsp),%esi 69 movl \offset+56(%rsp),%esi
69 movl \offset+64(%rsp),%edi 70 movl \offset+64(%rsp),%edi
70 movl %eax,%eax /* zero extension */ 71 movl %eax,%eax /* zero extension */
71 .endm 72 .endm
72 73
73 .macro CFI_STARTPROC32 simple 74 .macro CFI_STARTPROC32 simple
74 CFI_STARTPROC \simple 75 CFI_STARTPROC \simple
75 CFI_UNDEFINED r8 76 CFI_UNDEFINED r8
76 CFI_UNDEFINED r9 77 CFI_UNDEFINED r9
77 CFI_UNDEFINED r10 78 CFI_UNDEFINED r10
78 CFI_UNDEFINED r11 79 CFI_UNDEFINED r11
79 CFI_UNDEFINED r12 80 CFI_UNDEFINED r12
80 CFI_UNDEFINED r13 81 CFI_UNDEFINED r13
81 CFI_UNDEFINED r14 82 CFI_UNDEFINED r14
82 CFI_UNDEFINED r15 83 CFI_UNDEFINED r15
83 .endm 84 .endm
84 85
85 #ifdef CONFIG_PARAVIRT 86 #ifdef CONFIG_PARAVIRT
86 ENTRY(native_usergs_sysret32) 87 ENTRY(native_usergs_sysret32)
87 swapgs 88 swapgs
88 sysretl 89 sysretl
89 ENDPROC(native_usergs_sysret32) 90 ENDPROC(native_usergs_sysret32)
90 91
91 ENTRY(native_irq_enable_sysexit) 92 ENTRY(native_irq_enable_sysexit)
92 swapgs 93 swapgs
93 sti 94 sti
94 sysexit 95 sysexit
95 ENDPROC(native_irq_enable_sysexit) 96 ENDPROC(native_irq_enable_sysexit)
96 #endif 97 #endif
97 98
98 /* 99 /*
99 * 32bit SYSENTER instruction entry. 100 * 32bit SYSENTER instruction entry.
100 * 101 *
101 * Arguments: 102 * Arguments:
102 * %eax System call number. 103 * %eax System call number.
103 * %ebx Arg1 104 * %ebx Arg1
104 * %ecx Arg2 105 * %ecx Arg2
105 * %edx Arg3 106 * %edx Arg3
106 * %esi Arg4 107 * %esi Arg4
107 * %edi Arg5 108 * %edi Arg5
108 * %ebp user stack 109 * %ebp user stack
109 * 0(%ebp) Arg6 110 * 0(%ebp) Arg6
110 * 111 *
111 * Interrupts off. 112 * Interrupts off.
112 * 113 *
113 * This is purely a fast path. For anything complicated we use the int 0x80 114 * This is purely a fast path. For anything complicated we use the int 0x80
114 * path below. Set up a complete hardware stack frame to share code 115 * path below. Set up a complete hardware stack frame to share code
115 * with the int 0x80 path. 116 * with the int 0x80 path.
116 */ 117 */
117 ENTRY(ia32_sysenter_target) 118 ENTRY(ia32_sysenter_target)
118 CFI_STARTPROC32 simple 119 CFI_STARTPROC32 simple
119 CFI_SIGNAL_FRAME 120 CFI_SIGNAL_FRAME
120 CFI_DEF_CFA rsp,0 121 CFI_DEF_CFA rsp,0
121 CFI_REGISTER rsp,rbp 122 CFI_REGISTER rsp,rbp
122 SWAPGS_UNSAFE_STACK 123 SWAPGS_UNSAFE_STACK
123 movq PER_CPU_VAR(kernel_stack), %rsp 124 movq PER_CPU_VAR(kernel_stack), %rsp
124 addq $(KERNEL_STACK_OFFSET),%rsp 125 addq $(KERNEL_STACK_OFFSET),%rsp
125 /* 126 /*
126 * No need to follow this irqs on/off section: the syscall 127 * No need to follow this irqs on/off section: the syscall
127 * disabled irqs, here we enable it straight after entry: 128 * disabled irqs, here we enable it straight after entry:
128 */ 129 */
129 ENABLE_INTERRUPTS(CLBR_NONE) 130 ENABLE_INTERRUPTS(CLBR_NONE)
130 movl %ebp,%ebp /* zero extension */ 131 movl %ebp,%ebp /* zero extension */
131 pushq_cfi $__USER32_DS 132 pushq_cfi $__USER32_DS
132 /*CFI_REL_OFFSET ss,0*/ 133 /*CFI_REL_OFFSET ss,0*/
133 pushq_cfi %rbp 134 pushq_cfi %rbp
134 CFI_REL_OFFSET rsp,0 135 CFI_REL_OFFSET rsp,0
135 pushfq_cfi 136 pushfq_cfi
136 /*CFI_REL_OFFSET rflags,0*/ 137 /*CFI_REL_OFFSET rflags,0*/
137 movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d 138 movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
138 CFI_REGISTER rip,r10 139 CFI_REGISTER rip,r10
139 pushq_cfi $__USER32_CS 140 pushq_cfi $__USER32_CS
140 /*CFI_REL_OFFSET cs,0*/ 141 /*CFI_REL_OFFSET cs,0*/
141 movl %eax, %eax 142 movl %eax, %eax
142 pushq_cfi %r10 143 pushq_cfi %r10
143 CFI_REL_OFFSET rip,0 144 CFI_REL_OFFSET rip,0
144 pushq_cfi %rax 145 pushq_cfi %rax
145 cld 146 cld
146 SAVE_ARGS 0,1,0 147 SAVE_ARGS 0,1,0
147 /* no need to do an access_ok check here because rbp has been 148 /* no need to do an access_ok check here because rbp has been
148 32bit zero extended */ 149 32bit zero extended */
150 ASM_STAC
149 1: movl (%rbp),%ebp 151 1: movl (%rbp),%ebp
150 _ASM_EXTABLE(1b,ia32_badarg) 152 _ASM_EXTABLE(1b,ia32_badarg)
153 ASM_CLAC
151 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 154 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
152 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 155 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
153 CFI_REMEMBER_STATE 156 CFI_REMEMBER_STATE
154 jnz sysenter_tracesys 157 jnz sysenter_tracesys
155 cmpq $(IA32_NR_syscalls-1),%rax 158 cmpq $(IA32_NR_syscalls-1),%rax
156 ja ia32_badsys 159 ja ia32_badsys
157 sysenter_do_call: 160 sysenter_do_call:
158 IA32_ARG_FIXUP 161 IA32_ARG_FIXUP
159 sysenter_dispatch: 162 sysenter_dispatch:
160 call *ia32_sys_call_table(,%rax,8) 163 call *ia32_sys_call_table(,%rax,8)
161 movq %rax,RAX-ARGOFFSET(%rsp) 164 movq %rax,RAX-ARGOFFSET(%rsp)
162 DISABLE_INTERRUPTS(CLBR_NONE) 165 DISABLE_INTERRUPTS(CLBR_NONE)
163 TRACE_IRQS_OFF 166 TRACE_IRQS_OFF
164 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 167 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
165 jnz sysexit_audit 168 jnz sysexit_audit
166 sysexit_from_sys_call: 169 sysexit_from_sys_call:
167 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 170 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
168 /* clear IF, that popfq doesn't enable interrupts early */ 171 /* clear IF, that popfq doesn't enable interrupts early */
169 andl $~0x200,EFLAGS-R11(%rsp) 172 andl $~0x200,EFLAGS-R11(%rsp)
170 movl RIP-R11(%rsp),%edx /* User %eip */ 173 movl RIP-R11(%rsp),%edx /* User %eip */
171 CFI_REGISTER rip,rdx 174 CFI_REGISTER rip,rdx
172 RESTORE_ARGS 0,24,0,0,0,0 175 RESTORE_ARGS 0,24,0,0,0,0
173 xorq %r8,%r8 176 xorq %r8,%r8
174 xorq %r9,%r9 177 xorq %r9,%r9
175 xorq %r10,%r10 178 xorq %r10,%r10
176 xorq %r11,%r11 179 xorq %r11,%r11
177 popfq_cfi 180 popfq_cfi
178 /*CFI_RESTORE rflags*/ 181 /*CFI_RESTORE rflags*/
179 popq_cfi %rcx /* User %esp */ 182 popq_cfi %rcx /* User %esp */
180 CFI_REGISTER rsp,rcx 183 CFI_REGISTER rsp,rcx
181 TRACE_IRQS_ON 184 TRACE_IRQS_ON
182 ENABLE_INTERRUPTS_SYSEXIT32 185 ENABLE_INTERRUPTS_SYSEXIT32
183 186
184 #ifdef CONFIG_AUDITSYSCALL 187 #ifdef CONFIG_AUDITSYSCALL
185 .macro auditsys_entry_common 188 .macro auditsys_entry_common
186 movl %esi,%r9d /* 6th arg: 4th syscall arg */ 189 movl %esi,%r9d /* 6th arg: 4th syscall arg */
187 movl %edx,%r8d /* 5th arg: 3rd syscall arg */ 190 movl %edx,%r8d /* 5th arg: 3rd syscall arg */
188 /* (already in %ecx) 4th arg: 2nd syscall arg */ 191 /* (already in %ecx) 4th arg: 2nd syscall arg */
189 movl %ebx,%edx /* 3rd arg: 1st syscall arg */ 192 movl %ebx,%edx /* 3rd arg: 1st syscall arg */
190 movl %eax,%esi /* 2nd arg: syscall number */ 193 movl %eax,%esi /* 2nd arg: syscall number */
191 movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ 194 movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
192 call __audit_syscall_entry 195 call __audit_syscall_entry
193 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ 196 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
194 cmpq $(IA32_NR_syscalls-1),%rax 197 cmpq $(IA32_NR_syscalls-1),%rax
195 ja ia32_badsys 198 ja ia32_badsys
196 movl %ebx,%edi /* reload 1st syscall arg */ 199 movl %ebx,%edi /* reload 1st syscall arg */
197 movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ 200 movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
198 movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ 201 movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
199 movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ 202 movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
200 movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ 203 movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
201 .endm 204 .endm
202 205
203 .macro auditsys_exit exit 206 .macro auditsys_exit exit
204 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 207 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
205 jnz ia32_ret_from_sys_call 208 jnz ia32_ret_from_sys_call
206 TRACE_IRQS_ON 209 TRACE_IRQS_ON
207 sti 210 sti
208 movl %eax,%esi /* second arg, syscall return value */ 211 movl %eax,%esi /* second arg, syscall return value */
209 cmpl $-MAX_ERRNO,%eax /* is it an error ? */ 212 cmpl $-MAX_ERRNO,%eax /* is it an error ? */
210 jbe 1f 213 jbe 1f
211 movslq %eax, %rsi /* if error sign extend to 64 bits */ 214 movslq %eax, %rsi /* if error sign extend to 64 bits */
212 1: setbe %al /* 1 if error, 0 if not */ 215 1: setbe %al /* 1 if error, 0 if not */
213 movzbl %al,%edi /* zero-extend that into %edi */ 216 movzbl %al,%edi /* zero-extend that into %edi */
214 call __audit_syscall_exit 217 call __audit_syscall_exit
215 movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ 218 movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
216 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 219 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
217 cli 220 cli
218 TRACE_IRQS_OFF 221 TRACE_IRQS_OFF
219 testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 222 testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
220 jz \exit 223 jz \exit
221 CLEAR_RREGS -ARGOFFSET 224 CLEAR_RREGS -ARGOFFSET
222 jmp int_with_check 225 jmp int_with_check
223 .endm 226 .endm
224 227
225 sysenter_auditsys: 228 sysenter_auditsys:
226 CFI_RESTORE_STATE 229 CFI_RESTORE_STATE
227 auditsys_entry_common 230 auditsys_entry_common
228 movl %ebp,%r9d /* reload 6th syscall arg */ 231 movl %ebp,%r9d /* reload 6th syscall arg */
229 jmp sysenter_dispatch 232 jmp sysenter_dispatch
230 233
231 sysexit_audit: 234 sysexit_audit:
232 auditsys_exit sysexit_from_sys_call 235 auditsys_exit sysexit_from_sys_call
233 #endif 236 #endif
234 237
235 sysenter_tracesys: 238 sysenter_tracesys:
236 #ifdef CONFIG_AUDITSYSCALL 239 #ifdef CONFIG_AUDITSYSCALL
237 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 240 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
238 jz sysenter_auditsys 241 jz sysenter_auditsys
239 #endif 242 #endif
240 SAVE_REST 243 SAVE_REST
241 CLEAR_RREGS 244 CLEAR_RREGS
242 movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ 245 movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
243 movq %rsp,%rdi /* &pt_regs -> arg1 */ 246 movq %rsp,%rdi /* &pt_regs -> arg1 */
244 call syscall_trace_enter 247 call syscall_trace_enter
245 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ 248 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
246 RESTORE_REST 249 RESTORE_REST
247 cmpq $(IA32_NR_syscalls-1),%rax 250 cmpq $(IA32_NR_syscalls-1),%rax
248 ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ 251 ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
249 jmp sysenter_do_call 252 jmp sysenter_do_call
250 CFI_ENDPROC 253 CFI_ENDPROC
251 ENDPROC(ia32_sysenter_target) 254 ENDPROC(ia32_sysenter_target)
252 255
253 /* 256 /*
254 * 32bit SYSCALL instruction entry. 257 * 32bit SYSCALL instruction entry.
255 * 258 *
256 * Arguments: 259 * Arguments:
257 * %eax System call number. 260 * %eax System call number.
258 * %ebx Arg1 261 * %ebx Arg1
259 * %ecx return EIP 262 * %ecx return EIP
260 * %edx Arg3 263 * %edx Arg3
261 * %esi Arg4 264 * %esi Arg4
262 * %edi Arg5 265 * %edi Arg5
263 * %ebp Arg2 [note: not saved in the stack frame, should not be touched] 266 * %ebp Arg2 [note: not saved in the stack frame, should not be touched]
264 * %esp user stack 267 * %esp user stack
265 * 0(%esp) Arg6 268 * 0(%esp) Arg6
266 * 269 *
267 * Interrupts off. 270 * Interrupts off.
268 * 271 *
269 * This is purely a fast path. For anything complicated we use the int 0x80 272 * This is purely a fast path. For anything complicated we use the int 0x80
270 * path below. Set up a complete hardware stack frame to share code 273 * path below. Set up a complete hardware stack frame to share code
271 * with the int 0x80 path. 274 * with the int 0x80 path.
272 */ 275 */
273 ENTRY(ia32_cstar_target) 276 ENTRY(ia32_cstar_target)
274 CFI_STARTPROC32 simple 277 CFI_STARTPROC32 simple
275 CFI_SIGNAL_FRAME 278 CFI_SIGNAL_FRAME
276 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET 279 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
277 CFI_REGISTER rip,rcx 280 CFI_REGISTER rip,rcx
278 /*CFI_REGISTER rflags,r11*/ 281 /*CFI_REGISTER rflags,r11*/
279 SWAPGS_UNSAFE_STACK 282 SWAPGS_UNSAFE_STACK
280 movl %esp,%r8d 283 movl %esp,%r8d
281 CFI_REGISTER rsp,r8 284 CFI_REGISTER rsp,r8
282 movq PER_CPU_VAR(kernel_stack),%rsp 285 movq PER_CPU_VAR(kernel_stack),%rsp
283 /* 286 /*
284 * No need to follow this irqs on/off section: the syscall 287 * No need to follow this irqs on/off section: the syscall
285 * disabled irqs and here we enable it straight after entry: 288 * disabled irqs and here we enable it straight after entry:
286 */ 289 */
287 ENABLE_INTERRUPTS(CLBR_NONE) 290 ENABLE_INTERRUPTS(CLBR_NONE)
288 SAVE_ARGS 8,0,0 291 SAVE_ARGS 8,0,0
289 movl %eax,%eax /* zero extension */ 292 movl %eax,%eax /* zero extension */
290 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 293 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
291 movq %rcx,RIP-ARGOFFSET(%rsp) 294 movq %rcx,RIP-ARGOFFSET(%rsp)
292 CFI_REL_OFFSET rip,RIP-ARGOFFSET 295 CFI_REL_OFFSET rip,RIP-ARGOFFSET
293 movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ 296 movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
294 movl %ebp,%ecx 297 movl %ebp,%ecx
295 movq $__USER32_CS,CS-ARGOFFSET(%rsp) 298 movq $__USER32_CS,CS-ARGOFFSET(%rsp)
296 movq $__USER32_DS,SS-ARGOFFSET(%rsp) 299 movq $__USER32_DS,SS-ARGOFFSET(%rsp)
297 movq %r11,EFLAGS-ARGOFFSET(%rsp) 300 movq %r11,EFLAGS-ARGOFFSET(%rsp)
298 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ 301 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
299 movq %r8,RSP-ARGOFFSET(%rsp) 302 movq %r8,RSP-ARGOFFSET(%rsp)
300 CFI_REL_OFFSET rsp,RSP-ARGOFFSET 303 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
301 /* no need to do an access_ok check here because r8 has been 304 /* no need to do an access_ok check here because r8 has been
302 32bit zero extended */ 305 32bit zero extended */
303 /* hardware stack frame is complete now */ 306 /* hardware stack frame is complete now */
307 ASM_STAC
304 1: movl (%r8),%r9d 308 1: movl (%r8),%r9d
305 _ASM_EXTABLE(1b,ia32_badarg) 309 _ASM_EXTABLE(1b,ia32_badarg)
310 ASM_CLAC
306 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 311 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
307 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 312 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
308 CFI_REMEMBER_STATE 313 CFI_REMEMBER_STATE
309 jnz cstar_tracesys 314 jnz cstar_tracesys
310 cmpq $IA32_NR_syscalls-1,%rax 315 cmpq $IA32_NR_syscalls-1,%rax
311 ja ia32_badsys 316 ja ia32_badsys
312 cstar_do_call: 317 cstar_do_call:
313 IA32_ARG_FIXUP 1 318 IA32_ARG_FIXUP 1
314 cstar_dispatch: 319 cstar_dispatch:
315 call *ia32_sys_call_table(,%rax,8) 320 call *ia32_sys_call_table(,%rax,8)
316 movq %rax,RAX-ARGOFFSET(%rsp) 321 movq %rax,RAX-ARGOFFSET(%rsp)
317 DISABLE_INTERRUPTS(CLBR_NONE) 322 DISABLE_INTERRUPTS(CLBR_NONE)
318 TRACE_IRQS_OFF 323 TRACE_IRQS_OFF
319 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 324 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
320 jnz sysretl_audit 325 jnz sysretl_audit
321 sysretl_from_sys_call: 326 sysretl_from_sys_call:
322 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 327 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
323 RESTORE_ARGS 0,-ARG_SKIP,0,0,0 328 RESTORE_ARGS 0,-ARG_SKIP,0,0,0
324 movl RIP-ARGOFFSET(%rsp),%ecx 329 movl RIP-ARGOFFSET(%rsp),%ecx
325 CFI_REGISTER rip,rcx 330 CFI_REGISTER rip,rcx
326 movl EFLAGS-ARGOFFSET(%rsp),%r11d 331 movl EFLAGS-ARGOFFSET(%rsp),%r11d
327 /*CFI_REGISTER rflags,r11*/ 332 /*CFI_REGISTER rflags,r11*/
328 xorq %r10,%r10 333 xorq %r10,%r10
329 xorq %r9,%r9 334 xorq %r9,%r9
330 xorq %r8,%r8 335 xorq %r8,%r8
331 TRACE_IRQS_ON 336 TRACE_IRQS_ON
332 movl RSP-ARGOFFSET(%rsp),%esp 337 movl RSP-ARGOFFSET(%rsp),%esp
333 CFI_RESTORE rsp 338 CFI_RESTORE rsp
334 USERGS_SYSRET32 339 USERGS_SYSRET32
335 340
336 #ifdef CONFIG_AUDITSYSCALL 341 #ifdef CONFIG_AUDITSYSCALL
337 cstar_auditsys: 342 cstar_auditsys:
338 CFI_RESTORE_STATE 343 CFI_RESTORE_STATE
339 movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ 344 movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
340 auditsys_entry_common 345 auditsys_entry_common
341 movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ 346 movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
342 jmp cstar_dispatch 347 jmp cstar_dispatch
343 348
344 sysretl_audit: 349 sysretl_audit:
345 auditsys_exit sysretl_from_sys_call 350 auditsys_exit sysretl_from_sys_call
346 #endif 351 #endif
347 352
348 cstar_tracesys: 353 cstar_tracesys:
349 #ifdef CONFIG_AUDITSYSCALL 354 #ifdef CONFIG_AUDITSYSCALL
350 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 355 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
351 jz cstar_auditsys 356 jz cstar_auditsys
352 #endif 357 #endif
353 xchgl %r9d,%ebp 358 xchgl %r9d,%ebp
354 SAVE_REST 359 SAVE_REST
355 CLEAR_RREGS 0, r9 360 CLEAR_RREGS 0, r9
356 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 361 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
357 movq %rsp,%rdi /* &pt_regs -> arg1 */ 362 movq %rsp,%rdi /* &pt_regs -> arg1 */
358 call syscall_trace_enter 363 call syscall_trace_enter
359 LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ 364 LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
360 RESTORE_REST 365 RESTORE_REST
361 xchgl %ebp,%r9d 366 xchgl %ebp,%r9d
362 cmpq $(IA32_NR_syscalls-1),%rax 367 cmpq $(IA32_NR_syscalls-1),%rax
363 ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ 368 ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
364 jmp cstar_do_call 369 jmp cstar_do_call
365 END(ia32_cstar_target) 370 END(ia32_cstar_target)
366 371
367 ia32_badarg: 372 ia32_badarg:
373 ASM_CLAC
368 movq $-EFAULT,%rax 374 movq $-EFAULT,%rax
369 jmp ia32_sysret 375 jmp ia32_sysret
370 CFI_ENDPROC 376 CFI_ENDPROC
371 377
372 /* 378 /*
373 * Emulated IA32 system calls via int 0x80. 379 * Emulated IA32 system calls via int 0x80.
374 * 380 *
375 * Arguments: 381 * Arguments:
376 * %eax System call number. 382 * %eax System call number.
377 * %ebx Arg1 383 * %ebx Arg1
378 * %ecx Arg2 384 * %ecx Arg2
379 * %edx Arg3 385 * %edx Arg3
380 * %esi Arg4 386 * %esi Arg4
381 * %edi Arg5 387 * %edi Arg5
382 * %ebp Arg6 [note: not saved in the stack frame, should not be touched] 388 * %ebp Arg6 [note: not saved in the stack frame, should not be touched]
383 * 389 *
384 * Notes: 390 * Notes:
385 * Uses the same stack frame as the x86-64 version. 391 * Uses the same stack frame as the x86-64 version.
386 * All registers except %eax must be saved (but ptrace may violate that) 392 * All registers except %eax must be saved (but ptrace may violate that)
387 * Arguments are zero extended. For system calls that want sign extension and 393 * Arguments are zero extended. For system calls that want sign extension and
388 * take long arguments a wrapper is needed. Most calls can just be called 394 * take long arguments a wrapper is needed. Most calls can just be called
389 * directly. 395 * directly.
390 * Assumes it is only called from user space and entered with interrupts off. 396 * Assumes it is only called from user space and entered with interrupts off.
391 */ 397 */
392 398
393 ENTRY(ia32_syscall) 399 ENTRY(ia32_syscall)
394 CFI_STARTPROC32 simple 400 CFI_STARTPROC32 simple
395 CFI_SIGNAL_FRAME 401 CFI_SIGNAL_FRAME
396 CFI_DEF_CFA rsp,SS+8-RIP 402 CFI_DEF_CFA rsp,SS+8-RIP
397 /*CFI_REL_OFFSET ss,SS-RIP*/ 403 /*CFI_REL_OFFSET ss,SS-RIP*/
398 CFI_REL_OFFSET rsp,RSP-RIP 404 CFI_REL_OFFSET rsp,RSP-RIP
399 /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ 405 /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
400 /*CFI_REL_OFFSET cs,CS-RIP*/ 406 /*CFI_REL_OFFSET cs,CS-RIP*/
401 CFI_REL_OFFSET rip,RIP-RIP 407 CFI_REL_OFFSET rip,RIP-RIP
402 PARAVIRT_ADJUST_EXCEPTION_FRAME 408 PARAVIRT_ADJUST_EXCEPTION_FRAME
403 SWAPGS 409 SWAPGS
404 /* 410 /*
405 * No need to follow this irqs on/off section: the syscall 411 * No need to follow this irqs on/off section: the syscall
406 * disabled irqs and here we enable it straight after entry: 412 * disabled irqs and here we enable it straight after entry:
407 */ 413 */
408 ENABLE_INTERRUPTS(CLBR_NONE) 414 ENABLE_INTERRUPTS(CLBR_NONE)
409 movl %eax,%eax 415 movl %eax,%eax
410 pushq_cfi %rax 416 pushq_cfi %rax
411 cld 417 cld
412 /* note the registers are not zero extended to the sf. 418 /* note the registers are not zero extended to the sf.
413 this could be a problem. */ 419 this could be a problem. */
414 SAVE_ARGS 0,1,0 420 SAVE_ARGS 0,1,0
415 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 421 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
416 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 422 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
417 jnz ia32_tracesys 423 jnz ia32_tracesys
418 cmpq $(IA32_NR_syscalls-1),%rax 424 cmpq $(IA32_NR_syscalls-1),%rax
419 ja ia32_badsys 425 ja ia32_badsys
420 ia32_do_call: 426 ia32_do_call:
421 IA32_ARG_FIXUP 427 IA32_ARG_FIXUP
422 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative 428 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
423 ia32_sysret: 429 ia32_sysret:
424 movq %rax,RAX-ARGOFFSET(%rsp) 430 movq %rax,RAX-ARGOFFSET(%rsp)
425 ia32_ret_from_sys_call: 431 ia32_ret_from_sys_call:
426 CLEAR_RREGS -ARGOFFSET 432 CLEAR_RREGS -ARGOFFSET
427 jmp int_ret_from_sys_call 433 jmp int_ret_from_sys_call
428 434
429 ia32_tracesys: 435 ia32_tracesys:
430 SAVE_REST 436 SAVE_REST
431 CLEAR_RREGS 437 CLEAR_RREGS
432 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 438 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
433 movq %rsp,%rdi /* &pt_regs -> arg1 */ 439 movq %rsp,%rdi /* &pt_regs -> arg1 */
434 call syscall_trace_enter 440 call syscall_trace_enter
435 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ 441 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
436 RESTORE_REST 442 RESTORE_REST
437 cmpq $(IA32_NR_syscalls-1),%rax 443 cmpq $(IA32_NR_syscalls-1),%rax
438 ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ 444 ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
439 jmp ia32_do_call 445 jmp ia32_do_call
440 END(ia32_syscall) 446 END(ia32_syscall)
441 447
442 ia32_badsys: 448 ia32_badsys:
443 movq $0,ORIG_RAX-ARGOFFSET(%rsp) 449 movq $0,ORIG_RAX-ARGOFFSET(%rsp)
444 movq $-ENOSYS,%rax 450 movq $-ENOSYS,%rax
445 jmp ia32_sysret 451 jmp ia32_sysret
446 452
447 CFI_ENDPROC 453 CFI_ENDPROC
448 454
449 .macro PTREGSCALL label, func, arg 455 .macro PTREGSCALL label, func, arg
450 ALIGN 456 ALIGN
451 GLOBAL(\label) 457 GLOBAL(\label)
452 leaq \func(%rip),%rax 458 leaq \func(%rip),%rax
453 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 459 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
454 jmp ia32_ptregs_common 460 jmp ia32_ptregs_common
455 .endm 461 .endm
456 462
457 CFI_STARTPROC32 463 CFI_STARTPROC32
458 464
459 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi 465 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
460 PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi 466 PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
461 PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx 467 PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
462 PTREGSCALL stub32_execve, sys32_execve, %rcx 468 PTREGSCALL stub32_execve, sys32_execve, %rcx
463 PTREGSCALL stub32_fork, sys_fork, %rdi 469 PTREGSCALL stub32_fork, sys_fork, %rdi
464 PTREGSCALL stub32_clone, sys32_clone, %rdx 470 PTREGSCALL stub32_clone, sys32_clone, %rdx
465 PTREGSCALL stub32_vfork, sys_vfork, %rdi 471 PTREGSCALL stub32_vfork, sys_vfork, %rdi
466 PTREGSCALL stub32_iopl, sys_iopl, %rsi 472 PTREGSCALL stub32_iopl, sys_iopl, %rsi
467 473
468 ALIGN 474 ALIGN
469 ia32_ptregs_common: 475 ia32_ptregs_common:
470 popq %r11 476 popq %r11
471 CFI_ENDPROC 477 CFI_ENDPROC
472 CFI_STARTPROC32 simple 478 CFI_STARTPROC32 simple
473 CFI_SIGNAL_FRAME 479 CFI_SIGNAL_FRAME
474 CFI_DEF_CFA rsp,SS+8-ARGOFFSET 480 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
475 CFI_REL_OFFSET rax,RAX-ARGOFFSET 481 CFI_REL_OFFSET rax,RAX-ARGOFFSET
476 CFI_REL_OFFSET rcx,RCX-ARGOFFSET 482 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
477 CFI_REL_OFFSET rdx,RDX-ARGOFFSET 483 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
478 CFI_REL_OFFSET rsi,RSI-ARGOFFSET 484 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
479 CFI_REL_OFFSET rdi,RDI-ARGOFFSET 485 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
480 CFI_REL_OFFSET rip,RIP-ARGOFFSET 486 CFI_REL_OFFSET rip,RIP-ARGOFFSET
481 /* CFI_REL_OFFSET cs,CS-ARGOFFSET*/ 487 /* CFI_REL_OFFSET cs,CS-ARGOFFSET*/
482 /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ 488 /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
483 CFI_REL_OFFSET rsp,RSP-ARGOFFSET 489 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
484 /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ 490 /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/
485 SAVE_REST 491 SAVE_REST
486 call *%rax 492 call *%rax
487 RESTORE_REST 493 RESTORE_REST
488 jmp ia32_sysret /* misbalances the return cache */ 494 jmp ia32_sysret /* misbalances the return cache */
489 CFI_ENDPROC 495 CFI_ENDPROC
490 END(ia32_ptregs_common) 496 END(ia32_ptregs_common)
491 497
arch/x86/include/asm/fpu-internal.h
1 /* 1 /*
2 * Copyright (C) 1994 Linus Torvalds 2 * Copyright (C) 1994 Linus Torvalds
3 * 3 *
4 * Pentium III FXSR, SSE support 4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups 5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000 6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * x86-64 work by Andi Kleen 2002 7 * x86-64 work by Andi Kleen 2002
8 */ 8 */
9 9
10 #ifndef _FPU_INTERNAL_H 10 #ifndef _FPU_INTERNAL_H
11 #define _FPU_INTERNAL_H 11 #define _FPU_INTERNAL_H
12 12
13 #include <linux/kernel_stat.h> 13 #include <linux/kernel_stat.h>
14 #include <linux/regset.h> 14 #include <linux/regset.h>
15 #include <linux/slab.h> 15 #include <linux/slab.h>
16 #include <asm/asm.h> 16 #include <asm/asm.h>
17 #include <asm/cpufeature.h> 17 #include <asm/cpufeature.h>
18 #include <asm/processor.h> 18 #include <asm/processor.h>
19 #include <asm/sigcontext.h> 19 #include <asm/sigcontext.h>
20 #include <asm/user.h> 20 #include <asm/user.h>
21 #include <asm/uaccess.h> 21 #include <asm/uaccess.h>
22 #include <asm/xsave.h> 22 #include <asm/xsave.h>
23 23
24 extern unsigned int sig_xstate_size; 24 extern unsigned int sig_xstate_size;
25 extern void fpu_init(void); 25 extern void fpu_init(void);
26 26
27 DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); 27 DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
28 28
29 extern user_regset_active_fn fpregs_active, xfpregs_active; 29 extern user_regset_active_fn fpregs_active, xfpregs_active;
30 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, 30 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
31 xstateregs_get; 31 xstateregs_get;
32 extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, 32 extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
33 xstateregs_set; 33 xstateregs_set;
34 34
35 35
36 /* 36 /*
37 * xstateregs_active == fpregs_active. Please refer to the comment 37 * xstateregs_active == fpregs_active. Please refer to the comment
38 * at the definition of fpregs_active. 38 * at the definition of fpregs_active.
39 */ 39 */
40 #define xstateregs_active fpregs_active 40 #define xstateregs_active fpregs_active
41 41
42 extern struct _fpx_sw_bytes fx_sw_reserved; 42 extern struct _fpx_sw_bytes fx_sw_reserved;
43 #ifdef CONFIG_IA32_EMULATION 43 #ifdef CONFIG_IA32_EMULATION
44 extern unsigned int sig_xstate_ia32_size; 44 extern unsigned int sig_xstate_ia32_size;
45 extern struct _fpx_sw_bytes fx_sw_reserved_ia32; 45 extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
46 struct _fpstate_ia32; 46 struct _fpstate_ia32;
47 struct _xstate_ia32; 47 struct _xstate_ia32;
48 extern int save_i387_xstate_ia32(void __user *buf); 48 extern int save_i387_xstate_ia32(void __user *buf);
49 extern int restore_i387_xstate_ia32(void __user *buf); 49 extern int restore_i387_xstate_ia32(void __user *buf);
50 #endif 50 #endif
51 51
52 #ifdef CONFIG_MATH_EMULATION 52 #ifdef CONFIG_MATH_EMULATION
53 extern void finit_soft_fpu(struct i387_soft_struct *soft); 53 extern void finit_soft_fpu(struct i387_soft_struct *soft);
54 #else 54 #else
55 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} 55 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
56 #endif 56 #endif
57 57
58 #define X87_FSW_ES (1 << 7) /* Exception Summary */ 58 #define X87_FSW_ES (1 << 7) /* Exception Summary */
59 59
60 static __always_inline __pure bool use_xsaveopt(void) 60 static __always_inline __pure bool use_xsaveopt(void)
61 { 61 {
62 return static_cpu_has(X86_FEATURE_XSAVEOPT); 62 return static_cpu_has(X86_FEATURE_XSAVEOPT);
63 } 63 }
64 64
65 static __always_inline __pure bool use_xsave(void) 65 static __always_inline __pure bool use_xsave(void)
66 { 66 {
67 return static_cpu_has(X86_FEATURE_XSAVE); 67 return static_cpu_has(X86_FEATURE_XSAVE);
68 } 68 }
69 69
70 static __always_inline __pure bool use_fxsr(void) 70 static __always_inline __pure bool use_fxsr(void)
71 { 71 {
72 return static_cpu_has(X86_FEATURE_FXSR); 72 return static_cpu_has(X86_FEATURE_FXSR);
73 } 73 }
74 74
75 extern void __sanitize_i387_state(struct task_struct *); 75 extern void __sanitize_i387_state(struct task_struct *);
76 76
77 static inline void sanitize_i387_state(struct task_struct *tsk) 77 static inline void sanitize_i387_state(struct task_struct *tsk)
78 { 78 {
79 if (!use_xsaveopt()) 79 if (!use_xsaveopt())
80 return; 80 return;
81 __sanitize_i387_state(tsk); 81 __sanitize_i387_state(tsk);
82 } 82 }
83 83
84 #ifdef CONFIG_X86_64 84 #ifdef CONFIG_X86_64
85 static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 85 static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
86 { 86 {
87 int err; 87 int err;
88 88
89 /* See comment in fxsave() below. */ 89 /* See comment in fxsave() below. */
90 #ifdef CONFIG_AS_FXSAVEQ 90 #ifdef CONFIG_AS_FXSAVEQ
91 asm volatile("1: fxrstorq %[fx]\n\t" 91 asm volatile("1: fxrstorq %[fx]\n\t"
92 "2:\n" 92 "2:\n"
93 ".section .fixup,\"ax\"\n" 93 ".section .fixup,\"ax\"\n"
94 "3: movl $-1,%[err]\n" 94 "3: movl $-1,%[err]\n"
95 " jmp 2b\n" 95 " jmp 2b\n"
96 ".previous\n" 96 ".previous\n"
97 _ASM_EXTABLE(1b, 3b) 97 _ASM_EXTABLE(1b, 3b)
98 : [err] "=r" (err) 98 : [err] "=r" (err)
99 : [fx] "m" (*fx), "0" (0)); 99 : [fx] "m" (*fx), "0" (0));
100 #else 100 #else
101 asm volatile("1: rex64/fxrstor (%[fx])\n\t" 101 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
102 "2:\n" 102 "2:\n"
103 ".section .fixup,\"ax\"\n" 103 ".section .fixup,\"ax\"\n"
104 "3: movl $-1,%[err]\n" 104 "3: movl $-1,%[err]\n"
105 " jmp 2b\n" 105 " jmp 2b\n"
106 ".previous\n" 106 ".previous\n"
107 _ASM_EXTABLE(1b, 3b) 107 _ASM_EXTABLE(1b, 3b)
108 : [err] "=r" (err) 108 : [err] "=r" (err)
109 : [fx] "R" (fx), "m" (*fx), "0" (0)); 109 : [fx] "R" (fx), "m" (*fx), "0" (0));
110 #endif 110 #endif
111 return err; 111 return err;
112 } 112 }
113 113
114 static inline int fxsave_user(struct i387_fxsave_struct __user *fx) 114 static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
115 { 115 {
116 int err; 116 int err;
117 117
118 /* 118 /*
119 * Clear the bytes not touched by the fxsave and reserved 119 * Clear the bytes not touched by the fxsave and reserved
120 * for the SW usage. 120 * for the SW usage.
121 */ 121 */
122 err = __clear_user(&fx->sw_reserved, 122 err = __clear_user(&fx->sw_reserved,
123 sizeof(struct _fpx_sw_bytes)); 123 sizeof(struct _fpx_sw_bytes));
124 if (unlikely(err)) 124 if (unlikely(err))
125 return -EFAULT; 125 return -EFAULT;
126 126
127 /* See comment in fxsave() below. */ 127 /* See comment in fxsave() below. */
128 #ifdef CONFIG_AS_FXSAVEQ 128 #ifdef CONFIG_AS_FXSAVEQ
129 asm volatile("1: fxsaveq %[fx]\n\t" 129 asm volatile(ASM_STAC "\n"
130 "2:\n" 130 "1: fxsaveq %[fx]\n\t"
131 "2: " ASM_CLAC "\n"
131 ".section .fixup,\"ax\"\n" 132 ".section .fixup,\"ax\"\n"
132 "3: movl $-1,%[err]\n" 133 "3: movl $-1,%[err]\n"
133 " jmp 2b\n" 134 " jmp 2b\n"
134 ".previous\n" 135 ".previous\n"
135 _ASM_EXTABLE(1b, 3b) 136 _ASM_EXTABLE(1b, 3b)
136 : [err] "=r" (err), [fx] "=m" (*fx) 137 : [err] "=r" (err), [fx] "=m" (*fx)
137 : "0" (0)); 138 : "0" (0));
138 #else 139 #else
139 asm volatile("1: rex64/fxsave (%[fx])\n\t" 140 asm volatile(ASM_STAC "\n"
140 "2:\n" 141 "1: rex64/fxsave (%[fx])\n\t"
142 "2: " ASM_CLAC "\n"
141 ".section .fixup,\"ax\"\n" 143 ".section .fixup,\"ax\"\n"
142 "3: movl $-1,%[err]\n" 144 "3: movl $-1,%[err]\n"
143 " jmp 2b\n" 145 " jmp 2b\n"
144 ".previous\n" 146 ".previous\n"
145 _ASM_EXTABLE(1b, 3b) 147 _ASM_EXTABLE(1b, 3b)
146 : [err] "=r" (err), "=m" (*fx) 148 : [err] "=r" (err), "=m" (*fx)
147 : [fx] "R" (fx), "0" (0)); 149 : [fx] "R" (fx), "0" (0));
148 #endif 150 #endif
149 if (unlikely(err) && 151 if (unlikely(err) &&
150 __clear_user(fx, sizeof(struct i387_fxsave_struct))) 152 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
151 err = -EFAULT; 153 err = -EFAULT;
152 /* No need to clear here because the caller clears USED_MATH */ 154 /* No need to clear here because the caller clears USED_MATH */
153 return err; 155 return err;
154 } 156 }
155 157
156 static inline void fpu_fxsave(struct fpu *fpu) 158 static inline void fpu_fxsave(struct fpu *fpu)
157 { 159 {
158 /* Using "rex64; fxsave %0" is broken because, if the memory operand 160 /* Using "rex64; fxsave %0" is broken because, if the memory operand
159 uses any extended registers for addressing, a second REX prefix 161 uses any extended registers for addressing, a second REX prefix
160 will be generated (to the assembler, rex64 followed by semicolon 162 will be generated (to the assembler, rex64 followed by semicolon
161 is a separate instruction), and hence the 64-bitness is lost. */ 163 is a separate instruction), and hence the 64-bitness is lost. */
162 164
163 #ifdef CONFIG_AS_FXSAVEQ 165 #ifdef CONFIG_AS_FXSAVEQ
164 /* Using "fxsaveq %0" would be the ideal choice, but is only supported 166 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
165 starting with gas 2.16. */ 167 starting with gas 2.16. */
166 __asm__ __volatile__("fxsaveq %0" 168 __asm__ __volatile__("fxsaveq %0"
167 : "=m" (fpu->state->fxsave)); 169 : "=m" (fpu->state->fxsave));
168 #else 170 #else
169 /* Using, as a workaround, the properly prefixed form below isn't 171 /* Using, as a workaround, the properly prefixed form below isn't
170 accepted by any binutils version so far released, complaining that 172 accepted by any binutils version so far released, complaining that
171 the same type of prefix is used twice if an extended register is 173 the same type of prefix is used twice if an extended register is
172 needed for addressing (fix submitted to mainline 2005-11-21). 174 needed for addressing (fix submitted to mainline 2005-11-21).
173 asm volatile("rex64/fxsave %0" 175 asm volatile("rex64/fxsave %0"
174 : "=m" (fpu->state->fxsave)); 176 : "=m" (fpu->state->fxsave));
175 This, however, we can work around by forcing the compiler to select 177 This, however, we can work around by forcing the compiler to select
176 an addressing mode that doesn't require extended registers. */ 178 an addressing mode that doesn't require extended registers. */
177 asm volatile("rex64/fxsave (%[fx])" 179 asm volatile("rex64/fxsave (%[fx])"
178 : "=m" (fpu->state->fxsave) 180 : "=m" (fpu->state->fxsave)
179 : [fx] "R" (&fpu->state->fxsave)); 181 : [fx] "R" (&fpu->state->fxsave));
180 #endif 182 #endif
181 } 183 }
182 184
183 #else /* CONFIG_X86_32 */ 185 #else /* CONFIG_X86_32 */
184 186
185 /* perform fxrstor iff the processor has extended states, otherwise frstor */ 187 /* perform fxrstor iff the processor has extended states, otherwise frstor */
186 static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 188 static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
187 { 189 {
188 /* 190 /*
189 * The "nop" is needed to make the instructions the same 191 * The "nop" is needed to make the instructions the same
190 * length. 192 * length.
191 */ 193 */
192 alternative_input( 194 alternative_input(
193 "nop ; frstor %1", 195 "nop ; frstor %1",
194 "fxrstor %1", 196 "fxrstor %1",
195 X86_FEATURE_FXSR, 197 X86_FEATURE_FXSR,
196 "m" (*fx)); 198 "m" (*fx));
197 199
198 return 0; 200 return 0;
199 } 201 }
200 202
201 static inline void fpu_fxsave(struct fpu *fpu) 203 static inline void fpu_fxsave(struct fpu *fpu)
202 { 204 {
203 asm volatile("fxsave %[fx]" 205 asm volatile("fxsave %[fx]"
204 : [fx] "=m" (fpu->state->fxsave)); 206 : [fx] "=m" (fpu->state->fxsave));
205 } 207 }
206 208
207 #endif /* CONFIG_X86_64 */ 209 #endif /* CONFIG_X86_64 */
208 210
209 /* 211 /*
210 * These must be called with preempt disabled. Returns 212 * These must be called with preempt disabled. Returns
211 * 'true' if the FPU state is still intact. 213 * 'true' if the FPU state is still intact.
212 */ 214 */
213 static inline int fpu_save_init(struct fpu *fpu) 215 static inline int fpu_save_init(struct fpu *fpu)
214 { 216 {
215 if (use_xsave()) { 217 if (use_xsave()) {
216 fpu_xsave(fpu); 218 fpu_xsave(fpu);
217 219
218 /* 220 /*
219 * xsave header may indicate the init state of the FP. 221 * xsave header may indicate the init state of the FP.
220 */ 222 */
221 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) 223 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
222 return 1; 224 return 1;
223 } else if (use_fxsr()) { 225 } else if (use_fxsr()) {
224 fpu_fxsave(fpu); 226 fpu_fxsave(fpu);
225 } else { 227 } else {
226 asm volatile("fnsave %[fx]; fwait" 228 asm volatile("fnsave %[fx]; fwait"
227 : [fx] "=m" (fpu->state->fsave)); 229 : [fx] "=m" (fpu->state->fsave));
228 return 0; 230 return 0;
229 } 231 }
230 232
231 /* 233 /*
232 * If exceptions are pending, we need to clear them so 234 * If exceptions are pending, we need to clear them so
233 * that we don't randomly get exceptions later. 235 * that we don't randomly get exceptions later.
234 * 236 *
235 * FIXME! Is this perhaps only true for the old-style 237 * FIXME! Is this perhaps only true for the old-style
236 * irq13 case? Maybe we could leave the x87 state 238 * irq13 case? Maybe we could leave the x87 state
237 * intact otherwise? 239 * intact otherwise?
238 */ 240 */
239 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { 241 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
240 asm volatile("fnclex"); 242 asm volatile("fnclex");
241 return 0; 243 return 0;
242 } 244 }
243 return 1; 245 return 1;
244 } 246 }
245 247
246 static inline int __save_init_fpu(struct task_struct *tsk) 248 static inline int __save_init_fpu(struct task_struct *tsk)
247 { 249 {
248 return fpu_save_init(&tsk->thread.fpu); 250 return fpu_save_init(&tsk->thread.fpu);
249 } 251 }
250 252
251 static inline int fpu_fxrstor_checking(struct fpu *fpu) 253 static inline int fpu_fxrstor_checking(struct fpu *fpu)
252 { 254 {
253 return fxrstor_checking(&fpu->state->fxsave); 255 return fxrstor_checking(&fpu->state->fxsave);
254 } 256 }
255 257
256 static inline int fpu_restore_checking(struct fpu *fpu) 258 static inline int fpu_restore_checking(struct fpu *fpu)
257 { 259 {
258 if (use_xsave()) 260 if (use_xsave())
259 return fpu_xrstor_checking(fpu); 261 return fpu_xrstor_checking(fpu);
260 else 262 else
261 return fpu_fxrstor_checking(fpu); 263 return fpu_fxrstor_checking(fpu);
262 } 264 }
263 265
264 static inline int restore_fpu_checking(struct task_struct *tsk) 266 static inline int restore_fpu_checking(struct task_struct *tsk)
265 { 267 {
266 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 268 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
267 is pending. Clear the x87 state here by setting it to fixed 269 is pending. Clear the x87 state here by setting it to fixed
268 values. "m" is a random variable that should be in L1 */ 270 values. "m" is a random variable that should be in L1 */
269 alternative_input( 271 alternative_input(
270 ASM_NOP8 ASM_NOP2, 272 ASM_NOP8 ASM_NOP2,
271 "emms\n\t" /* clear stack tags */ 273 "emms\n\t" /* clear stack tags */
272 "fildl %P[addr]", /* set F?P to defined value */ 274 "fildl %P[addr]", /* set F?P to defined value */
273 X86_FEATURE_FXSAVE_LEAK, 275 X86_FEATURE_FXSAVE_LEAK,
274 [addr] "m" (tsk->thread.fpu.has_fpu)); 276 [addr] "m" (tsk->thread.fpu.has_fpu));
275 277
276 return fpu_restore_checking(&tsk->thread.fpu); 278 return fpu_restore_checking(&tsk->thread.fpu);
277 } 279 }
278 280
279 /* 281 /*
280 * Software FPU state helpers. Careful: these need to 282 * Software FPU state helpers. Careful: these need to
281 * be preemption protection *and* they need to be 283 * be preemption protection *and* they need to be
282 * properly paired with the CR0.TS changes! 284 * properly paired with the CR0.TS changes!
283 */ 285 */
284 static inline int __thread_has_fpu(struct task_struct *tsk) 286 static inline int __thread_has_fpu(struct task_struct *tsk)
285 { 287 {
286 return tsk->thread.fpu.has_fpu; 288 return tsk->thread.fpu.has_fpu;
287 } 289 }
288 290
289 /* Must be paired with an 'stts' after! */ 291 /* Must be paired with an 'stts' after! */
290 static inline void __thread_clear_has_fpu(struct task_struct *tsk) 292 static inline void __thread_clear_has_fpu(struct task_struct *tsk)
291 { 293 {
292 tsk->thread.fpu.has_fpu = 0; 294 tsk->thread.fpu.has_fpu = 0;
293 this_cpu_write(fpu_owner_task, NULL); 295 this_cpu_write(fpu_owner_task, NULL);
294 } 296 }
295 297
296 /* Must be paired with a 'clts' before! */ 298 /* Must be paired with a 'clts' before! */
297 static inline void __thread_set_has_fpu(struct task_struct *tsk) 299 static inline void __thread_set_has_fpu(struct task_struct *tsk)
298 { 300 {
299 tsk->thread.fpu.has_fpu = 1; 301 tsk->thread.fpu.has_fpu = 1;
300 this_cpu_write(fpu_owner_task, tsk); 302 this_cpu_write(fpu_owner_task, tsk);
301 } 303 }
302 304
303 /* 305 /*
304 * Encapsulate the CR0.TS handling together with the 306 * Encapsulate the CR0.TS handling together with the
305 * software flag. 307 * software flag.
306 * 308 *
307 * These generally need preemption protection to work, 309 * These generally need preemption protection to work,
308 * do try to avoid using these on their own. 310 * do try to avoid using these on their own.
309 */ 311 */
310 static inline void __thread_fpu_end(struct task_struct *tsk) 312 static inline void __thread_fpu_end(struct task_struct *tsk)
311 { 313 {
312 __thread_clear_has_fpu(tsk); 314 __thread_clear_has_fpu(tsk);
313 stts(); 315 stts();
314 } 316 }
315 317
316 static inline void __thread_fpu_begin(struct task_struct *tsk) 318 static inline void __thread_fpu_begin(struct task_struct *tsk)
317 { 319 {
318 clts(); 320 clts();
319 __thread_set_has_fpu(tsk); 321 __thread_set_has_fpu(tsk);
320 } 322 }
321 323
322 /* 324 /*
323 * FPU state switching for scheduling. 325 * FPU state switching for scheduling.
324 * 326 *
325 * This is a two-stage process: 327 * This is a two-stage process:
326 * 328 *
327 * - switch_fpu_prepare() saves the old state and 329 * - switch_fpu_prepare() saves the old state and
328 * sets the new state of the CR0.TS bit. This is 330 * sets the new state of the CR0.TS bit. This is
329 * done within the context of the old process. 331 * done within the context of the old process.
330 * 332 *
331 * - switch_fpu_finish() restores the new state as 333 * - switch_fpu_finish() restores the new state as
332 * necessary. 334 * necessary.
333 */ 335 */
334 typedef struct { int preload; } fpu_switch_t; 336 typedef struct { int preload; } fpu_switch_t;
335 337
336 /* 338 /*
337 * FIXME! We could do a totally lazy restore, but we need to 339 * FIXME! We could do a totally lazy restore, but we need to
338 * add a per-cpu "this was the task that last touched the FPU 340 * add a per-cpu "this was the task that last touched the FPU
339 * on this CPU" variable, and the task needs to have a "I last 341 * on this CPU" variable, and the task needs to have a "I last
340 * touched the FPU on this CPU" and check them. 342 * touched the FPU on this CPU" and check them.
341 * 343 *
342 * We don't do that yet, so "fpu_lazy_restore()" always returns 344 * We don't do that yet, so "fpu_lazy_restore()" always returns
343 * false, but some day.. 345 * false, but some day..
344 */ 346 */
345 static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) 347 static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
346 { 348 {
347 return new == this_cpu_read_stable(fpu_owner_task) && 349 return new == this_cpu_read_stable(fpu_owner_task) &&
348 cpu == new->thread.fpu.last_cpu; 350 cpu == new->thread.fpu.last_cpu;
349 } 351 }
350 352
351 static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) 353 static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
352 { 354 {
353 fpu_switch_t fpu; 355 fpu_switch_t fpu;
354 356
355 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; 357 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
356 if (__thread_has_fpu(old)) { 358 if (__thread_has_fpu(old)) {
357 if (!__save_init_fpu(old)) 359 if (!__save_init_fpu(old))
358 cpu = ~0; 360 cpu = ~0;
359 old->thread.fpu.last_cpu = cpu; 361 old->thread.fpu.last_cpu = cpu;
360 old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ 362 old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
361 363
362 /* Don't change CR0.TS if we just switch! */ 364 /* Don't change CR0.TS if we just switch! */
363 if (fpu.preload) { 365 if (fpu.preload) {
364 new->fpu_counter++; 366 new->fpu_counter++;
365 __thread_set_has_fpu(new); 367 __thread_set_has_fpu(new);
366 prefetch(new->thread.fpu.state); 368 prefetch(new->thread.fpu.state);
367 } else 369 } else
368 stts(); 370 stts();
369 } else { 371 } else {
370 old->fpu_counter = 0; 372 old->fpu_counter = 0;
371 old->thread.fpu.last_cpu = ~0; 373 old->thread.fpu.last_cpu = ~0;
372 if (fpu.preload) { 374 if (fpu.preload) {
373 new->fpu_counter++; 375 new->fpu_counter++;
374 if (fpu_lazy_restore(new, cpu)) 376 if (fpu_lazy_restore(new, cpu))
375 fpu.preload = 0; 377 fpu.preload = 0;
376 else 378 else
377 prefetch(new->thread.fpu.state); 379 prefetch(new->thread.fpu.state);
378 __thread_fpu_begin(new); 380 __thread_fpu_begin(new);
379 } 381 }
380 } 382 }
381 return fpu; 383 return fpu;
382 } 384 }
383 385
384 /* 386 /*
385 * By the time this gets called, we've already cleared CR0.TS and 387 * By the time this gets called, we've already cleared CR0.TS and
386 * given the process the FPU if we are going to preload the FPU 388 * given the process the FPU if we are going to preload the FPU
387 * state - all we need to do is to conditionally restore the register 389 * state - all we need to do is to conditionally restore the register
388 * state itself. 390 * state itself.
389 */ 391 */
390 static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) 392 static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
391 { 393 {
392 if (fpu.preload) { 394 if (fpu.preload) {
393 if (unlikely(restore_fpu_checking(new))) 395 if (unlikely(restore_fpu_checking(new)))
394 __thread_fpu_end(new); 396 __thread_fpu_end(new);
395 } 397 }
396 } 398 }
397 399
398 /* 400 /*
399 * Signal frame handlers... 401 * Signal frame handlers...
400 */ 402 */
401 extern int save_i387_xstate(void __user *buf); 403 extern int save_i387_xstate(void __user *buf);
402 extern int restore_i387_xstate(void __user *buf); 404 extern int restore_i387_xstate(void __user *buf);
403 405
404 static inline void __clear_fpu(struct task_struct *tsk) 406 static inline void __clear_fpu(struct task_struct *tsk)
405 { 407 {
406 if (__thread_has_fpu(tsk)) { 408 if (__thread_has_fpu(tsk)) {
407 /* Ignore delayed exceptions from user space */ 409 /* Ignore delayed exceptions from user space */
408 asm volatile("1: fwait\n" 410 asm volatile("1: fwait\n"
409 "2:\n" 411 "2:\n"
410 _ASM_EXTABLE(1b, 2b)); 412 _ASM_EXTABLE(1b, 2b));
411 __thread_fpu_end(tsk); 413 __thread_fpu_end(tsk);
412 } 414 }
413 } 415 }
414 416
415 /* 417 /*
416 * The actual user_fpu_begin/end() functions 418 * The actual user_fpu_begin/end() functions
417 * need to be preemption-safe. 419 * need to be preemption-safe.
418 * 420 *
419 * NOTE! user_fpu_end() must be used only after you 421 * NOTE! user_fpu_end() must be used only after you
420 * have saved the FP state, and user_fpu_begin() must 422 * have saved the FP state, and user_fpu_begin() must
421 * be used only immediately before restoring it. 423 * be used only immediately before restoring it.
422 * These functions do not do any save/restore on 424 * These functions do not do any save/restore on
423 * their own. 425 * their own.
424 */ 426 */
425 static inline void user_fpu_end(void) 427 static inline void user_fpu_end(void)
426 { 428 {
427 preempt_disable(); 429 preempt_disable();
428 __thread_fpu_end(current); 430 __thread_fpu_end(current);
429 preempt_enable(); 431 preempt_enable();
430 } 432 }
431 433
432 static inline void user_fpu_begin(void) 434 static inline void user_fpu_begin(void)
433 { 435 {
434 preempt_disable(); 436 preempt_disable();
435 if (!user_has_fpu()) 437 if (!user_has_fpu())
436 __thread_fpu_begin(current); 438 __thread_fpu_begin(current);
437 preempt_enable(); 439 preempt_enable();
438 } 440 }
439 441
440 /* 442 /*
441 * These disable preemption on their own and are safe 443 * These disable preemption on their own and are safe
442 */ 444 */
443 static inline void save_init_fpu(struct task_struct *tsk) 445 static inline void save_init_fpu(struct task_struct *tsk)
444 { 446 {
445 WARN_ON_ONCE(!__thread_has_fpu(tsk)); 447 WARN_ON_ONCE(!__thread_has_fpu(tsk));
446 preempt_disable(); 448 preempt_disable();
447 __save_init_fpu(tsk); 449 __save_init_fpu(tsk);
448 __thread_fpu_end(tsk); 450 __thread_fpu_end(tsk);
449 preempt_enable(); 451 preempt_enable();
450 } 452 }
451 453
452 static inline void clear_fpu(struct task_struct *tsk) 454 static inline void clear_fpu(struct task_struct *tsk)
453 { 455 {
454 preempt_disable(); 456 preempt_disable();
455 __clear_fpu(tsk); 457 __clear_fpu(tsk);
456 preempt_enable(); 458 preempt_enable();
457 } 459 }
458 460
459 /* 461 /*
460 * i387 state interaction 462 * i387 state interaction
461 */ 463 */
462 static inline unsigned short get_fpu_cwd(struct task_struct *tsk) 464 static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
463 { 465 {
464 if (cpu_has_fxsr) { 466 if (cpu_has_fxsr) {
465 return tsk->thread.fpu.state->fxsave.cwd; 467 return tsk->thread.fpu.state->fxsave.cwd;
466 } else { 468 } else {
467 return (unsigned short)tsk->thread.fpu.state->fsave.cwd; 469 return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
468 } 470 }
469 } 471 }
470 472
471 static inline unsigned short get_fpu_swd(struct task_struct *tsk) 473 static inline unsigned short get_fpu_swd(struct task_struct *tsk)
472 { 474 {
473 if (cpu_has_fxsr) { 475 if (cpu_has_fxsr) {
474 return tsk->thread.fpu.state->fxsave.swd; 476 return tsk->thread.fpu.state->fxsave.swd;
475 } else { 477 } else {
476 return (unsigned short)tsk->thread.fpu.state->fsave.swd; 478 return (unsigned short)tsk->thread.fpu.state->fsave.swd;
477 } 479 }
478 } 480 }
479 481
480 static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) 482 static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
481 { 483 {
482 if (cpu_has_xmm) { 484 if (cpu_has_xmm) {
483 return tsk->thread.fpu.state->fxsave.mxcsr; 485 return tsk->thread.fpu.state->fxsave.mxcsr;
484 } else { 486 } else {
485 return MXCSR_DEFAULT; 487 return MXCSR_DEFAULT;
486 } 488 }
487 } 489 }
488 490
489 static bool fpu_allocated(struct fpu *fpu) 491 static bool fpu_allocated(struct fpu *fpu)
490 { 492 {
491 return fpu->state != NULL; 493 return fpu->state != NULL;
492 } 494 }
493 495
494 static inline int fpu_alloc(struct fpu *fpu) 496 static inline int fpu_alloc(struct fpu *fpu)
495 { 497 {
496 if (fpu_allocated(fpu)) 498 if (fpu_allocated(fpu))
497 return 0; 499 return 0;
498 fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); 500 fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
499 if (!fpu->state) 501 if (!fpu->state)
500 return -ENOMEM; 502 return -ENOMEM;
501 WARN_ON((unsigned long)fpu->state & 15); 503 WARN_ON((unsigned long)fpu->state & 15);
502 return 0; 504 return 0;
503 } 505 }
504 506
505 static inline void fpu_free(struct fpu *fpu) 507 static inline void fpu_free(struct fpu *fpu)
506 { 508 {
507 if (fpu->state) { 509 if (fpu->state) {
508 kmem_cache_free(task_xstate_cachep, fpu->state); 510 kmem_cache_free(task_xstate_cachep, fpu->state);
509 fpu->state = NULL; 511 fpu->state = NULL;
510 } 512 }
511 } 513 }
512 514
513 static inline void fpu_copy(struct fpu *dst, struct fpu *src) 515 static inline void fpu_copy(struct fpu *dst, struct fpu *src)
514 { 516 {
515 memcpy(dst->state, src->state, xstate_size); 517 memcpy(dst->state, src->state, xstate_size);
516 } 518 }
517 519
518 extern void fpu_finit(struct fpu *fpu); 520 extern void fpu_finit(struct fpu *fpu);
519 521
520 #endif 522 #endif
521 523
arch/x86/include/asm/futex.h
1 #ifndef _ASM_X86_FUTEX_H 1 #ifndef _ASM_X86_FUTEX_H
2 #define _ASM_X86_FUTEX_H 2 #define _ASM_X86_FUTEX_H
3 3
4 #ifdef __KERNEL__ 4 #ifdef __KERNEL__
5 5
6 #include <linux/futex.h> 6 #include <linux/futex.h>
7 #include <linux/uaccess.h> 7 #include <linux/uaccess.h>
8 8
9 #include <asm/asm.h> 9 #include <asm/asm.h>
10 #include <asm/errno.h> 10 #include <asm/errno.h>
11 #include <asm/processor.h> 11 #include <asm/processor.h>
12 #include <asm/smap.h>
12 13
13 #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ 14 #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
14 asm volatile("1:\t" insn "\n" \ 15 asm volatile("\t" ASM_STAC "\n" \
15 "2:\t.section .fixup,\"ax\"\n" \ 16 "1:\t" insn "\n" \
17 "2:\t" ASM_CLAC "\n" \
18 "\t.section .fixup,\"ax\"\n" \
16 "3:\tmov\t%3, %1\n" \ 19 "3:\tmov\t%3, %1\n" \
17 "\tjmp\t2b\n" \ 20 "\tjmp\t2b\n" \
18 "\t.previous\n" \ 21 "\t.previous\n" \
19 _ASM_EXTABLE(1b, 3b) \ 22 _ASM_EXTABLE(1b, 3b) \
20 : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ 23 : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
21 : "i" (-EFAULT), "0" (oparg), "1" (0)) 24 : "i" (-EFAULT), "0" (oparg), "1" (0))
22 25
23 #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ 26 #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
24 asm volatile("1:\tmovl %2, %0\n" \ 27 asm volatile("\t" ASM_STAC "\n" \
28 "1:\tmovl %2, %0\n" \
25 "\tmovl\t%0, %3\n" \ 29 "\tmovl\t%0, %3\n" \
26 "\t" insn "\n" \ 30 "\t" insn "\n" \
27 "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ 31 "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
28 "\tjnz\t1b\n" \ 32 "\tjnz\t1b\n" \
29 "3:\t.section .fixup,\"ax\"\n" \ 33 "3:\t" ASM_CLAC "\n" \
34 "\t.section .fixup,\"ax\"\n" \
30 "4:\tmov\t%5, %1\n" \ 35 "4:\tmov\t%5, %1\n" \
31 "\tjmp\t3b\n" \ 36 "\tjmp\t3b\n" \
32 "\t.previous\n" \ 37 "\t.previous\n" \
33 _ASM_EXTABLE(1b, 4b) \ 38 _ASM_EXTABLE(1b, 4b) \
34 _ASM_EXTABLE(2b, 4b) \ 39 _ASM_EXTABLE(2b, 4b) \
35 : "=&a" (oldval), "=&r" (ret), \ 40 : "=&a" (oldval), "=&r" (ret), \
36 "+m" (*uaddr), "=&r" (tem) \ 41 "+m" (*uaddr), "=&r" (tem) \
37 : "r" (oparg), "i" (-EFAULT), "1" (0)) 42 : "r" (oparg), "i" (-EFAULT), "1" (0))
38 43
39 static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 44 static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
40 { 45 {
41 int op = (encoded_op >> 28) & 7; 46 int op = (encoded_op >> 28) & 7;
42 int cmp = (encoded_op >> 24) & 15; 47 int cmp = (encoded_op >> 24) & 15;
43 int oparg = (encoded_op << 8) >> 20; 48 int oparg = (encoded_op << 8) >> 20;
44 int cmparg = (encoded_op << 20) >> 20; 49 int cmparg = (encoded_op << 20) >> 20;
45 int oldval = 0, ret, tem; 50 int oldval = 0, ret, tem;
46 51
47 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 52 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
48 oparg = 1 << oparg; 53 oparg = 1 << oparg;
49 54
50 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) 55 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
51 return -EFAULT; 56 return -EFAULT;
52 57
53 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) 58 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
54 /* Real i386 machines can only support FUTEX_OP_SET */ 59 /* Real i386 machines can only support FUTEX_OP_SET */
55 if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3) 60 if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3)
56 return -ENOSYS; 61 return -ENOSYS;
57 #endif 62 #endif
58 63
59 pagefault_disable(); 64 pagefault_disable();
60 65
61 switch (op) { 66 switch (op) {
62 case FUTEX_OP_SET: 67 case FUTEX_OP_SET:
63 __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); 68 __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
64 break; 69 break;
65 case FUTEX_OP_ADD: 70 case FUTEX_OP_ADD:
66 __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, 71 __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
67 uaddr, oparg); 72 uaddr, oparg);
68 break; 73 break;
69 case FUTEX_OP_OR: 74 case FUTEX_OP_OR:
70 __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); 75 __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
71 break; 76 break;
72 case FUTEX_OP_ANDN: 77 case FUTEX_OP_ANDN:
73 __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); 78 __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
74 break; 79 break;
75 case FUTEX_OP_XOR: 80 case FUTEX_OP_XOR:
76 __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); 81 __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
77 break; 82 break;
78 default: 83 default:
79 ret = -ENOSYS; 84 ret = -ENOSYS;
80 } 85 }
81 86
82 pagefault_enable(); 87 pagefault_enable();
83 88
84 if (!ret) { 89 if (!ret) {
85 switch (cmp) { 90 switch (cmp) {
86 case FUTEX_OP_CMP_EQ: 91 case FUTEX_OP_CMP_EQ:
87 ret = (oldval == cmparg); 92 ret = (oldval == cmparg);
88 break; 93 break;
89 case FUTEX_OP_CMP_NE: 94 case FUTEX_OP_CMP_NE:
90 ret = (oldval != cmparg); 95 ret = (oldval != cmparg);
91 break; 96 break;
92 case FUTEX_OP_CMP_LT: 97 case FUTEX_OP_CMP_LT:
93 ret = (oldval < cmparg); 98 ret = (oldval < cmparg);
94 break; 99 break;
95 case FUTEX_OP_CMP_GE: 100 case FUTEX_OP_CMP_GE:
96 ret = (oldval >= cmparg); 101 ret = (oldval >= cmparg);
97 break; 102 break;
98 case FUTEX_OP_CMP_LE: 103 case FUTEX_OP_CMP_LE:
99 ret = (oldval <= cmparg); 104 ret = (oldval <= cmparg);
100 break; 105 break;
101 case FUTEX_OP_CMP_GT: 106 case FUTEX_OP_CMP_GT:
102 ret = (oldval > cmparg); 107 ret = (oldval > cmparg);
103 break; 108 break;
104 default: 109 default:
105 ret = -ENOSYS; 110 ret = -ENOSYS;
106 } 111 }
107 } 112 }
108 return ret; 113 return ret;
109 } 114 }
110 115
111 static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, 116 static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
112 u32 oldval, u32 newval) 117 u32 oldval, u32 newval)
113 { 118 {
114 int ret = 0; 119 int ret = 0;
115 120
116 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) 121 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
117 /* Real i386 machines have no cmpxchg instruction */ 122 /* Real i386 machines have no cmpxchg instruction */
118 if (boot_cpu_data.x86 == 3) 123 if (boot_cpu_data.x86 == 3)
119 return -ENOSYS; 124 return -ENOSYS;
120 #endif 125 #endif
121 126
122 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) 127 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
123 return -EFAULT; 128 return -EFAULT;
124 129
125 asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" 130 asm volatile("\t" ASM_STAC "\n"
126 "2:\t.section .fixup, \"ax\"\n" 131 "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
132 "2:\t" ASM_CLAC "\n"
133 "\t.section .fixup, \"ax\"\n"
127 "3:\tmov %3, %0\n" 134 "3:\tmov %3, %0\n"
128 "\tjmp 2b\n" 135 "\tjmp 2b\n"
129 "\t.previous\n" 136 "\t.previous\n"
130 _ASM_EXTABLE(1b, 3b) 137 _ASM_EXTABLE(1b, 3b)
131 : "+r" (ret), "=a" (oldval), "+m" (*uaddr) 138 : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
132 : "i" (-EFAULT), "r" (newval), "1" (oldval) 139 : "i" (-EFAULT), "r" (newval), "1" (oldval)
133 : "memory" 140 : "memory"
134 ); 141 );
135 142
136 *uval = oldval; 143 *uval = oldval;
137 return ret; 144 return ret;
138 } 145 }
139 146
140 #endif 147 #endif
141 #endif /* _ASM_X86_FUTEX_H */ 148 #endif /* _ASM_X86_FUTEX_H */
142 149
arch/x86/include/asm/smap.h
1 /* 1 /*
2 * Supervisor Mode Access Prevention support 2 * Supervisor Mode Access Prevention support
3 * 3 *
4 * Copyright (C) 2012 Intel Corporation 4 * Copyright (C) 2012 Intel Corporation
5 * Author: H. Peter Anvin <hpa@linux.intel.com> 5 * Author: H. Peter Anvin <hpa@linux.intel.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2 9 * as published by the Free Software Foundation; version 2
10 * of the License. 10 * of the License.
11 */ 11 */
12 12
13 #ifndef _ASM_X86_SMAP_H 13 #ifndef _ASM_X86_SMAP_H
14 #define _ASM_X86_SMAP_H 14 #define _ASM_X86_SMAP_H
15 15
16 #include <linux/stringify.h> 16 #include <linux/stringify.h>
17 #include <asm/nops.h> 17 #include <asm/nops.h>
18 #include <asm/cpufeature.h> 18 #include <asm/cpufeature.h>
19 19
20 /* "Raw" instruction opcodes */ 20 /* "Raw" instruction opcodes */
21 #define __ASM_CLAC .byte 0x0f,0x01,0xca 21 #define __ASM_CLAC .byte 0x0f,0x01,0xca
22 #define __ASM_STAC .byte 0x0f,0x01,0xcb 22 #define __ASM_STAC .byte 0x0f,0x01,0xcb
23 23
24 #ifdef __ASSEMBLY__ 24 #ifdef __ASSEMBLY__
25 25
26 #include <asm/alternative-asm.h> 26 #include <asm/alternative-asm.h>
27 27
28 #ifdef CONFIG_X86_SMAP 28 #ifdef CONFIG_X86_SMAP
29 29
30 #define ASM_CLAC \ 30 #define ASM_CLAC \
31 661: ASM_NOP3 ; \ 31 661: ASM_NOP3 ; \
32 .pushsection .altinstr_replacement, "ax" ; \ 32 .pushsection .altinstr_replacement, "ax" ; \
33 662: __ASM_CLAC ; \ 33 662: __ASM_CLAC ; \
34 .popsection ; \ 34 .popsection ; \
35 .pushsection .altinstructions, "a" ; \ 35 .pushsection .altinstructions, "a" ; \
36 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ 36 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
37 .popsection 37 .popsection
38 38
39 #define ASM_STAC \ 39 #define ASM_STAC \
40 661: ASM_NOP3 ; \ 40 661: ASM_NOP3 ; \
41 .pushsection .altinstr_replacement, "ax" ; \ 41 .pushsection .altinstr_replacement, "ax" ; \
42 662: __ASM_STAC ; \ 42 662: __ASM_STAC ; \
43 .popsection ; \ 43 .popsection ; \
44 .pushsection .altinstructions, "a" ; \ 44 .pushsection .altinstructions, "a" ; \
45 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ 45 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
46 .popsection 46 .popsection
47 47
48 #else /* CONFIG_X86_SMAP */ 48 #else /* CONFIG_X86_SMAP */
49 49
50 #define ASM_CLAC 50 #define ASM_CLAC
51 #define ASM_STAC 51 #define ASM_STAC
52 52
53 #endif /* CONFIG_X86_SMAP */ 53 #endif /* CONFIG_X86_SMAP */
54 54
55 #else /* __ASSEMBLY__ */ 55 #else /* __ASSEMBLY__ */
56 56
57 #include <asm/alternative.h> 57 #include <asm/alternative.h>
58 58
59 #ifdef CONFIG_X86_SMAP 59 #ifdef CONFIG_X86_SMAP
60 60
61 static inline void clac(void) 61 static __always_inline void clac(void)
62 { 62 {
63 /* Note: a barrier is implicit in alternative() */ 63 /* Note: a barrier is implicit in alternative() */
64 alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); 64 alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
65 } 65 }
66 66
67 static inline void stac(void) 67 static __always_inline void stac(void)
68 { 68 {
69 /* Note: a barrier is implicit in alternative() */ 69 /* Note: a barrier is implicit in alternative() */
70 alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); 70 alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
71 } 71 }
72 72
73 /* These macros can be used in asm() statements */ 73 /* These macros can be used in asm() statements */
74 #define ASM_CLAC \ 74 #define ASM_CLAC \
75 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) 75 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
76 #define ASM_STAC \ 76 #define ASM_STAC \
77 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) 77 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
78 78
79 #else /* CONFIG_X86_SMAP */ 79 #else /* CONFIG_X86_SMAP */
80 80
81 static inline void clac(void) { } 81 static inline void clac(void) { }
82 static inline void stac(void) { } 82 static inline void stac(void) { }
83 83
84 #define ASM_CLAC 84 #define ASM_CLAC
85 #define ASM_STAC 85 #define ASM_STAC
86 86
87 #endif /* CONFIG_X86_SMAP */ 87 #endif /* CONFIG_X86_SMAP */
88 88
89 #endif /* __ASSEMBLY__ */ 89 #endif /* __ASSEMBLY__ */
90 90
91 #endif /* _ASM_X86_SMAP_H */ 91 #endif /* _ASM_X86_SMAP_H */
92 92
arch/x86/include/asm/uaccess.h
1 #ifndef _ASM_X86_UACCESS_H 1 #ifndef _ASM_X86_UACCESS_H
2 #define _ASM_X86_UACCESS_H 2 #define _ASM_X86_UACCESS_H
3 /* 3 /*
4 * User space memory access functions 4 * User space memory access functions
5 */ 5 */
6 #include <linux/errno.h> 6 #include <linux/errno.h>
7 #include <linux/compiler.h> 7 #include <linux/compiler.h>
8 #include <linux/thread_info.h> 8 #include <linux/thread_info.h>
9 #include <linux/string.h> 9 #include <linux/string.h>
10 #include <asm/asm.h> 10 #include <asm/asm.h>
11 #include <asm/page.h> 11 #include <asm/page.h>
12 #include <asm/smap.h>
12 13
13 #define VERIFY_READ 0 14 #define VERIFY_READ 0
14 #define VERIFY_WRITE 1 15 #define VERIFY_WRITE 1
15 16
16 /* 17 /*
17 * The fs value determines whether argument validity checking should be 18 * The fs value determines whether argument validity checking should be
18 * performed or not. If get_fs() == USER_DS, checking is performed, with 19 * performed or not. If get_fs() == USER_DS, checking is performed, with
19 * get_fs() == KERNEL_DS, checking is bypassed. 20 * get_fs() == KERNEL_DS, checking is bypassed.
20 * 21 *
21 * For historical reasons, these macros are grossly misnamed. 22 * For historical reasons, these macros are grossly misnamed.
22 */ 23 */
23 24
24 #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) 25 #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
25 26
26 #define KERNEL_DS MAKE_MM_SEG(-1UL) 27 #define KERNEL_DS MAKE_MM_SEG(-1UL)
27 #define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) 28 #define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
28 29
29 #define get_ds() (KERNEL_DS) 30 #define get_ds() (KERNEL_DS)
30 #define get_fs() (current_thread_info()->addr_limit) 31 #define get_fs() (current_thread_info()->addr_limit)
31 #define set_fs(x) (current_thread_info()->addr_limit = (x)) 32 #define set_fs(x) (current_thread_info()->addr_limit = (x))
32 33
33 #define segment_eq(a, b) ((a).seg == (b).seg) 34 #define segment_eq(a, b) ((a).seg == (b).seg)
34 35
35 #define user_addr_max() (current_thread_info()->addr_limit.seg) 36 #define user_addr_max() (current_thread_info()->addr_limit.seg)
36 #define __addr_ok(addr) \ 37 #define __addr_ok(addr) \
37 ((unsigned long __force)(addr) < user_addr_max()) 38 ((unsigned long __force)(addr) < user_addr_max())
38 39
39 /* 40 /*
40 * Test whether a block of memory is a valid user space address. 41 * Test whether a block of memory is a valid user space address.
41 * Returns 0 if the range is valid, nonzero otherwise. 42 * Returns 0 if the range is valid, nonzero otherwise.
42 * 43 *
43 * This is equivalent to the following test: 44 * This is equivalent to the following test:
44 * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) 45 * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64)
45 * 46 *
46 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... 47 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
47 */ 48 */
48 49
49 #define __range_not_ok(addr, size, limit) \ 50 #define __range_not_ok(addr, size, limit) \
50 ({ \ 51 ({ \
51 unsigned long flag, roksum; \ 52 unsigned long flag, roksum; \
52 __chk_user_ptr(addr); \ 53 __chk_user_ptr(addr); \
53 asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ 54 asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \
54 : "=&r" (flag), "=r" (roksum) \ 55 : "=&r" (flag), "=r" (roksum) \
55 : "1" (addr), "g" ((long)(size)), \ 56 : "1" (addr), "g" ((long)(size)), \
56 "rm" (limit)); \ 57 "rm" (limit)); \
57 flag; \ 58 flag; \
58 }) 59 })
59 60
60 /** 61 /**
61 * access_ok: - Checks if a user space pointer is valid 62 * access_ok: - Checks if a user space pointer is valid
62 * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that 63 * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
63 * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe 64 * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
64 * to write to a block, it is always safe to read from it. 65 * to write to a block, it is always safe to read from it.
65 * @addr: User space pointer to start of block to check 66 * @addr: User space pointer to start of block to check
66 * @size: Size of block to check 67 * @size: Size of block to check
67 * 68 *
68 * Context: User context only. This function may sleep. 69 * Context: User context only. This function may sleep.
69 * 70 *
70 * Checks if a pointer to a block of memory in user space is valid. 71 * Checks if a pointer to a block of memory in user space is valid.
71 * 72 *
72 * Returns true (nonzero) if the memory block may be valid, false (zero) 73 * Returns true (nonzero) if the memory block may be valid, false (zero)
73 * if it is definitely invalid. 74 * if it is definitely invalid.
74 * 75 *
75 * Note that, depending on architecture, this function probably just 76 * Note that, depending on architecture, this function probably just
76 * checks that the pointer is in the user space range - after calling 77 * checks that the pointer is in the user space range - after calling
77 * this function, memory access functions may still return -EFAULT. 78 * this function, memory access functions may still return -EFAULT.
78 */ 79 */
79 #define access_ok(type, addr, size) \ 80 #define access_ok(type, addr, size) \
80 (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) 81 (likely(__range_not_ok(addr, size, user_addr_max()) == 0))
81 82
82 /* 83 /*
83 * The exception table consists of pairs of addresses relative to the 84 * The exception table consists of pairs of addresses relative to the
84 * exception table enty itself: the first is the address of an 85 * exception table enty itself: the first is the address of an
85 * instruction that is allowed to fault, and the second is the address 86 * instruction that is allowed to fault, and the second is the address
86 * at which the program should continue. No registers are modified, 87 * at which the program should continue. No registers are modified,
87 * so it is entirely up to the continuation code to figure out what to 88 * so it is entirely up to the continuation code to figure out what to
88 * do. 89 * do.
89 * 90 *
90 * All the routines below use bits of fixup code that are out of line 91 * All the routines below use bits of fixup code that are out of line
91 * with the main instruction path. This means when everything is well, 92 * with the main instruction path. This means when everything is well,
92 * we don't even have to jump over them. Further, they do not intrude 93 * we don't even have to jump over them. Further, they do not intrude
93 * on our cache or tlb entries. 94 * on our cache or tlb entries.
94 */ 95 */
95 96
96 struct exception_table_entry { 97 struct exception_table_entry {
97 int insn, fixup; 98 int insn, fixup;
98 }; 99 };
99 /* This is not the generic standard exception_table_entry format */ 100 /* This is not the generic standard exception_table_entry format */
100 #define ARCH_HAS_SORT_EXTABLE 101 #define ARCH_HAS_SORT_EXTABLE
101 #define ARCH_HAS_SEARCH_EXTABLE 102 #define ARCH_HAS_SEARCH_EXTABLE
102 103
103 extern int fixup_exception(struct pt_regs *regs); 104 extern int fixup_exception(struct pt_regs *regs);
104 extern int early_fixup_exception(unsigned long *ip); 105 extern int early_fixup_exception(unsigned long *ip);
105 106
106 /* 107 /*
107 * These are the main single-value transfer routines. They automatically 108 * These are the main single-value transfer routines. They automatically
108 * use the right size if we just have the right pointer type. 109 * use the right size if we just have the right pointer type.
109 * 110 *
110 * This gets kind of ugly. We want to return _two_ values in "get_user()" 111 * This gets kind of ugly. We want to return _two_ values in "get_user()"
111 * and yet we don't want to do any pointers, because that is too much 112 * and yet we don't want to do any pointers, because that is too much
112 * of a performance impact. Thus we have a few rather ugly macros here, 113 * of a performance impact. Thus we have a few rather ugly macros here,
113 * and hide all the ugliness from the user. 114 * and hide all the ugliness from the user.
114 * 115 *
115 * The "__xxx" versions of the user access functions are versions that 116 * The "__xxx" versions of the user access functions are versions that
116 * do not verify the address space, that must have been done previously 117 * do not verify the address space, that must have been done previously
117 * with a separate "access_ok()" call (this is used when we do multiple 118 * with a separate "access_ok()" call (this is used when we do multiple
118 * accesses to the same area of user memory). 119 * accesses to the same area of user memory).
119 */ 120 */
120 121
121 extern int __get_user_1(void); 122 extern int __get_user_1(void);
122 extern int __get_user_2(void); 123 extern int __get_user_2(void);
123 extern int __get_user_4(void); 124 extern int __get_user_4(void);
124 extern int __get_user_8(void); 125 extern int __get_user_8(void);
125 extern int __get_user_bad(void); 126 extern int __get_user_bad(void);
126 127
127 #define __get_user_x(size, ret, x, ptr) \ 128 #define __get_user_x(size, ret, x, ptr) \
128 asm volatile("call __get_user_" #size \ 129 asm volatile("call __get_user_" #size \
129 : "=a" (ret), "=d" (x) \ 130 : "=a" (ret), "=d" (x) \
130 : "0" (ptr)) \ 131 : "0" (ptr)) \
131 132
132 /* Careful: we have to cast the result to the type of the pointer 133 /* Careful: we have to cast the result to the type of the pointer
133 * for sign reasons */ 134 * for sign reasons */
134 135
135 /** 136 /**
136 * get_user: - Get a simple variable from user space. 137 * get_user: - Get a simple variable from user space.
137 * @x: Variable to store result. 138 * @x: Variable to store result.
138 * @ptr: Source address, in user space. 139 * @ptr: Source address, in user space.
139 * 140 *
140 * Context: User context only. This function may sleep. 141 * Context: User context only. This function may sleep.
141 * 142 *
142 * This macro copies a single simple variable from user space to kernel 143 * This macro copies a single simple variable from user space to kernel
143 * space. It supports simple types like char and int, but not larger 144 * space. It supports simple types like char and int, but not larger
144 * data types like structures or arrays. 145 * data types like structures or arrays.
145 * 146 *
146 * @ptr must have pointer-to-simple-variable type, and the result of 147 * @ptr must have pointer-to-simple-variable type, and the result of
147 * dereferencing @ptr must be assignable to @x without a cast. 148 * dereferencing @ptr must be assignable to @x without a cast.
148 * 149 *
149 * Returns zero on success, or -EFAULT on error. 150 * Returns zero on success, or -EFAULT on error.
150 * On error, the variable @x is set to zero. 151 * On error, the variable @x is set to zero.
151 */ 152 */
152 #ifdef CONFIG_X86_32 153 #ifdef CONFIG_X86_32
153 #define __get_user_8(__ret_gu, __val_gu, ptr) \ 154 #define __get_user_8(__ret_gu, __val_gu, ptr) \
154 __get_user_x(X, __ret_gu, __val_gu, ptr) 155 __get_user_x(X, __ret_gu, __val_gu, ptr)
155 #else 156 #else
156 #define __get_user_8(__ret_gu, __val_gu, ptr) \ 157 #define __get_user_8(__ret_gu, __val_gu, ptr) \
157 __get_user_x(8, __ret_gu, __val_gu, ptr) 158 __get_user_x(8, __ret_gu, __val_gu, ptr)
158 #endif 159 #endif
159 160
160 #define get_user(x, ptr) \ 161 #define get_user(x, ptr) \
161 ({ \ 162 ({ \
162 int __ret_gu; \ 163 int __ret_gu; \
163 unsigned long __val_gu; \ 164 unsigned long __val_gu; \
164 __chk_user_ptr(ptr); \ 165 __chk_user_ptr(ptr); \
165 might_fault(); \ 166 might_fault(); \
166 switch (sizeof(*(ptr))) { \ 167 switch (sizeof(*(ptr))) { \
167 case 1: \ 168 case 1: \
168 __get_user_x(1, __ret_gu, __val_gu, ptr); \ 169 __get_user_x(1, __ret_gu, __val_gu, ptr); \
169 break; \ 170 break; \
170 case 2: \ 171 case 2: \
171 __get_user_x(2, __ret_gu, __val_gu, ptr); \ 172 __get_user_x(2, __ret_gu, __val_gu, ptr); \
172 break; \ 173 break; \
173 case 4: \ 174 case 4: \
174 __get_user_x(4, __ret_gu, __val_gu, ptr); \ 175 __get_user_x(4, __ret_gu, __val_gu, ptr); \
175 break; \ 176 break; \
176 case 8: \ 177 case 8: \
177 __get_user_8(__ret_gu, __val_gu, ptr); \ 178 __get_user_8(__ret_gu, __val_gu, ptr); \
178 break; \ 179 break; \
179 default: \ 180 default: \
180 __get_user_x(X, __ret_gu, __val_gu, ptr); \ 181 __get_user_x(X, __ret_gu, __val_gu, ptr); \
181 break; \ 182 break; \
182 } \ 183 } \
183 (x) = (__typeof__(*(ptr)))__val_gu; \ 184 (x) = (__typeof__(*(ptr)))__val_gu; \
184 __ret_gu; \ 185 __ret_gu; \
185 }) 186 })
186 187
187 #define __put_user_x(size, x, ptr, __ret_pu) \ 188 #define __put_user_x(size, x, ptr, __ret_pu) \
188 asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ 189 asm volatile("call __put_user_" #size : "=a" (__ret_pu) \
189 : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") 190 : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
190 191
191 192
192 193
193 #ifdef CONFIG_X86_32 194 #ifdef CONFIG_X86_32
194 #define __put_user_asm_u64(x, addr, err, errret) \ 195 #define __put_user_asm_u64(x, addr, err, errret) \
195 asm volatile("1: movl %%eax,0(%2)\n" \ 196 asm volatile(ASM_STAC "\n" \
197 "1: movl %%eax,0(%2)\n" \
196 "2: movl %%edx,4(%2)\n" \ 198 "2: movl %%edx,4(%2)\n" \
197 "3:\n" \ 199 "3: " ASM_CLAC "\n" \
198 ".section .fixup,\"ax\"\n" \ 200 ".section .fixup,\"ax\"\n" \
199 "4: movl %3,%0\n" \ 201 "4: movl %3,%0\n" \
200 " jmp 3b\n" \ 202 " jmp 3b\n" \
201 ".previous\n" \ 203 ".previous\n" \
202 _ASM_EXTABLE(1b, 4b) \ 204 _ASM_EXTABLE(1b, 4b) \
203 _ASM_EXTABLE(2b, 4b) \ 205 _ASM_EXTABLE(2b, 4b) \
204 : "=r" (err) \ 206 : "=r" (err) \
205 : "A" (x), "r" (addr), "i" (errret), "0" (err)) 207 : "A" (x), "r" (addr), "i" (errret), "0" (err))
206 208
207 #define __put_user_asm_ex_u64(x, addr) \ 209 #define __put_user_asm_ex_u64(x, addr) \
208 asm volatile("1: movl %%eax,0(%1)\n" \ 210 asm volatile(ASM_STAC "\n" \
211 "1: movl %%eax,0(%1)\n" \
209 "2: movl %%edx,4(%1)\n" \ 212 "2: movl %%edx,4(%1)\n" \
210 "3:\n" \ 213 "3: " ASM_CLAC "\n" \
211 _ASM_EXTABLE_EX(1b, 2b) \ 214 _ASM_EXTABLE_EX(1b, 2b) \
212 _ASM_EXTABLE_EX(2b, 3b) \ 215 _ASM_EXTABLE_EX(2b, 3b) \
213 : : "A" (x), "r" (addr)) 216 : : "A" (x), "r" (addr))
214 217
215 #define __put_user_x8(x, ptr, __ret_pu) \ 218 #define __put_user_x8(x, ptr, __ret_pu) \
216 asm volatile("call __put_user_8" : "=a" (__ret_pu) \ 219 asm volatile("call __put_user_8" : "=a" (__ret_pu) \
217 : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") 220 : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
218 #else 221 #else
219 #define __put_user_asm_u64(x, ptr, retval, errret) \ 222 #define __put_user_asm_u64(x, ptr, retval, errret) \
220 __put_user_asm(x, ptr, retval, "q", "", "er", errret) 223 __put_user_asm(x, ptr, retval, "q", "", "er", errret)
221 #define __put_user_asm_ex_u64(x, addr) \ 224 #define __put_user_asm_ex_u64(x, addr) \
222 __put_user_asm_ex(x, addr, "q", "", "er") 225 __put_user_asm_ex(x, addr, "q", "", "er")
223 #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) 226 #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
224 #endif 227 #endif
225 228
226 extern void __put_user_bad(void); 229 extern void __put_user_bad(void);
227 230
228 /* 231 /*
229 * Strange magic calling convention: pointer in %ecx, 232 * Strange magic calling convention: pointer in %ecx,
230 * value in %eax(:%edx), return value in %eax. clobbers %rbx 233 * value in %eax(:%edx), return value in %eax. clobbers %rbx
231 */ 234 */
232 extern void __put_user_1(void); 235 extern void __put_user_1(void);
233 extern void __put_user_2(void); 236 extern void __put_user_2(void);
234 extern void __put_user_4(void); 237 extern void __put_user_4(void);
235 extern void __put_user_8(void); 238 extern void __put_user_8(void);
236 239
237 #ifdef CONFIG_X86_WP_WORKS_OK 240 #ifdef CONFIG_X86_WP_WORKS_OK
238 241
239 /** 242 /**
240 * put_user: - Write a simple value into user space. 243 * put_user: - Write a simple value into user space.
241 * @x: Value to copy to user space. 244 * @x: Value to copy to user space.
242 * @ptr: Destination address, in user space. 245 * @ptr: Destination address, in user space.
243 * 246 *
244 * Context: User context only. This function may sleep. 247 * Context: User context only. This function may sleep.
245 * 248 *
246 * This macro copies a single simple value from kernel space to user 249 * This macro copies a single simple value from kernel space to user
247 * space. It supports simple types like char and int, but not larger 250 * space. It supports simple types like char and int, but not larger
248 * data types like structures or arrays. 251 * data types like structures or arrays.
249 * 252 *
250 * @ptr must have pointer-to-simple-variable type, and @x must be assignable 253 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
251 * to the result of dereferencing @ptr. 254 * to the result of dereferencing @ptr.
252 * 255 *
253 * Returns zero on success, or -EFAULT on error. 256 * Returns zero on success, or -EFAULT on error.
254 */ 257 */
255 #define put_user(x, ptr) \ 258 #define put_user(x, ptr) \
256 ({ \ 259 ({ \
257 int __ret_pu; \ 260 int __ret_pu; \
258 __typeof__(*(ptr)) __pu_val; \ 261 __typeof__(*(ptr)) __pu_val; \
259 __chk_user_ptr(ptr); \ 262 __chk_user_ptr(ptr); \
260 might_fault(); \ 263 might_fault(); \
261 __pu_val = x; \ 264 __pu_val = x; \
262 switch (sizeof(*(ptr))) { \ 265 switch (sizeof(*(ptr))) { \
263 case 1: \ 266 case 1: \
264 __put_user_x(1, __pu_val, ptr, __ret_pu); \ 267 __put_user_x(1, __pu_val, ptr, __ret_pu); \
265 break; \ 268 break; \
266 case 2: \ 269 case 2: \
267 __put_user_x(2, __pu_val, ptr, __ret_pu); \ 270 __put_user_x(2, __pu_val, ptr, __ret_pu); \
268 break; \ 271 break; \
269 case 4: \ 272 case 4: \
270 __put_user_x(4, __pu_val, ptr, __ret_pu); \ 273 __put_user_x(4, __pu_val, ptr, __ret_pu); \
271 break; \ 274 break; \
272 case 8: \ 275 case 8: \
273 __put_user_x8(__pu_val, ptr, __ret_pu); \ 276 __put_user_x8(__pu_val, ptr, __ret_pu); \
274 break; \ 277 break; \
275 default: \ 278 default: \
276 __put_user_x(X, __pu_val, ptr, __ret_pu); \ 279 __put_user_x(X, __pu_val, ptr, __ret_pu); \
277 break; \ 280 break; \
278 } \ 281 } \
279 __ret_pu; \ 282 __ret_pu; \
280 }) 283 })
281 284
282 #define __put_user_size(x, ptr, size, retval, errret) \ 285 #define __put_user_size(x, ptr, size, retval, errret) \
283 do { \ 286 do { \
284 retval = 0; \ 287 retval = 0; \
285 __chk_user_ptr(ptr); \ 288 __chk_user_ptr(ptr); \
286 switch (size) { \ 289 switch (size) { \
287 case 1: \ 290 case 1: \
288 __put_user_asm(x, ptr, retval, "b", "b", "iq", errret); \ 291 __put_user_asm(x, ptr, retval, "b", "b", "iq", errret); \
289 break; \ 292 break; \
290 case 2: \ 293 case 2: \
291 __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ 294 __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \
292 break; \ 295 break; \
293 case 4: \ 296 case 4: \
294 __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ 297 __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \
295 break; \ 298 break; \
296 case 8: \ 299 case 8: \
297 __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ 300 __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \
298 errret); \ 301 errret); \
299 break; \ 302 break; \
300 default: \ 303 default: \
301 __put_user_bad(); \ 304 __put_user_bad(); \
302 } \ 305 } \
303 } while (0) 306 } while (0)
304 307
305 #define __put_user_size_ex(x, ptr, size) \ 308 #define __put_user_size_ex(x, ptr, size) \
306 do { \ 309 do { \
307 __chk_user_ptr(ptr); \ 310 __chk_user_ptr(ptr); \
308 switch (size) { \ 311 switch (size) { \
309 case 1: \ 312 case 1: \
310 __put_user_asm_ex(x, ptr, "b", "b", "iq"); \ 313 __put_user_asm_ex(x, ptr, "b", "b", "iq"); \
311 break; \ 314 break; \
312 case 2: \ 315 case 2: \
313 __put_user_asm_ex(x, ptr, "w", "w", "ir"); \ 316 __put_user_asm_ex(x, ptr, "w", "w", "ir"); \
314 break; \ 317 break; \
315 case 4: \ 318 case 4: \
316 __put_user_asm_ex(x, ptr, "l", "k", "ir"); \ 319 __put_user_asm_ex(x, ptr, "l", "k", "ir"); \
317 break; \ 320 break; \
318 case 8: \ 321 case 8: \
319 __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \ 322 __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \
320 break; \ 323 break; \
321 default: \ 324 default: \
322 __put_user_bad(); \ 325 __put_user_bad(); \
323 } \ 326 } \
324 } while (0) 327 } while (0)
325 328
326 #else 329 #else
327 330
328 #define __put_user_size(x, ptr, size, retval, errret) \ 331 #define __put_user_size(x, ptr, size, retval, errret) \
329 do { \ 332 do { \
330 __typeof__(*(ptr))__pus_tmp = x; \ 333 __typeof__(*(ptr))__pus_tmp = x; \
331 retval = 0; \ 334 retval = 0; \
332 \ 335 \
333 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \ 336 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \
334 retval = errret; \ 337 retval = errret; \
335 } while (0) 338 } while (0)
336 339
337 #define put_user(x, ptr) \ 340 #define put_user(x, ptr) \
338 ({ \ 341 ({ \
339 int __ret_pu; \ 342 int __ret_pu; \
340 __typeof__(*(ptr))__pus_tmp = x; \ 343 __typeof__(*(ptr))__pus_tmp = x; \
341 __ret_pu = 0; \ 344 __ret_pu = 0; \
342 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \ 345 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \
343 sizeof(*(ptr))) != 0)) \ 346 sizeof(*(ptr))) != 0)) \
344 __ret_pu = -EFAULT; \ 347 __ret_pu = -EFAULT; \
345 __ret_pu; \ 348 __ret_pu; \
346 }) 349 })
347 #endif 350 #endif
348 351
349 #ifdef CONFIG_X86_32 352 #ifdef CONFIG_X86_32
350 #define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() 353 #define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad()
351 #define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() 354 #define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad()
352 #else 355 #else
353 #define __get_user_asm_u64(x, ptr, retval, errret) \ 356 #define __get_user_asm_u64(x, ptr, retval, errret) \
354 __get_user_asm(x, ptr, retval, "q", "", "=r", errret) 357 __get_user_asm(x, ptr, retval, "q", "", "=r", errret)
355 #define __get_user_asm_ex_u64(x, ptr) \ 358 #define __get_user_asm_ex_u64(x, ptr) \
356 __get_user_asm_ex(x, ptr, "q", "", "=r") 359 __get_user_asm_ex(x, ptr, "q", "", "=r")
357 #endif 360 #endif
358 361
359 #define __get_user_size(x, ptr, size, retval, errret) \ 362 #define __get_user_size(x, ptr, size, retval, errret) \
360 do { \ 363 do { \
361 retval = 0; \ 364 retval = 0; \
362 __chk_user_ptr(ptr); \ 365 __chk_user_ptr(ptr); \
363 switch (size) { \ 366 switch (size) { \
364 case 1: \ 367 case 1: \
365 __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \ 368 __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \
366 break; \ 369 break; \
367 case 2: \ 370 case 2: \
368 __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \ 371 __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \
369 break; \ 372 break; \
370 case 4: \ 373 case 4: \
371 __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \ 374 __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \
372 break; \ 375 break; \
373 case 8: \ 376 case 8: \
374 __get_user_asm_u64(x, ptr, retval, errret); \ 377 __get_user_asm_u64(x, ptr, retval, errret); \
375 break; \ 378 break; \
376 default: \ 379 default: \
377 (x) = __get_user_bad(); \ 380 (x) = __get_user_bad(); \
378 } \ 381 } \
379 } while (0) 382 } while (0)
380 383
381 #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ 384 #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
382 asm volatile("1: mov"itype" %2,%"rtype"1\n" \ 385 asm volatile(ASM_STAC "\n" \
383 "2:\n" \ 386 "1: mov"itype" %2,%"rtype"1\n" \
387 "2: " ASM_CLAC "\n" \
384 ".section .fixup,\"ax\"\n" \ 388 ".section .fixup,\"ax\"\n" \
385 "3: mov %3,%0\n" \ 389 "3: mov %3,%0\n" \
386 " xor"itype" %"rtype"1,%"rtype"1\n" \ 390 " xor"itype" %"rtype"1,%"rtype"1\n" \
387 " jmp 2b\n" \ 391 " jmp 2b\n" \
388 ".previous\n" \ 392 ".previous\n" \
389 _ASM_EXTABLE(1b, 3b) \ 393 _ASM_EXTABLE(1b, 3b) \
390 : "=r" (err), ltype(x) \ 394 : "=r" (err), ltype(x) \
391 : "m" (__m(addr)), "i" (errret), "0" (err)) 395 : "m" (__m(addr)), "i" (errret), "0" (err))
392 396
393 #define __get_user_size_ex(x, ptr, size) \ 397 #define __get_user_size_ex(x, ptr, size) \
394 do { \ 398 do { \
395 __chk_user_ptr(ptr); \ 399 __chk_user_ptr(ptr); \
396 switch (size) { \ 400 switch (size) { \
397 case 1: \ 401 case 1: \
398 __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ 402 __get_user_asm_ex(x, ptr, "b", "b", "=q"); \
399 break; \ 403 break; \
400 case 2: \ 404 case 2: \
401 __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ 405 __get_user_asm_ex(x, ptr, "w", "w", "=r"); \
402 break; \ 406 break; \
403 case 4: \ 407 case 4: \
404 __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ 408 __get_user_asm_ex(x, ptr, "l", "k", "=r"); \
405 break; \ 409 break; \
406 case 8: \ 410 case 8: \
407 __get_user_asm_ex_u64(x, ptr); \ 411 __get_user_asm_ex_u64(x, ptr); \
408 break; \ 412 break; \
409 default: \ 413 default: \
410 (x) = __get_user_bad(); \ 414 (x) = __get_user_bad(); \
411 } \ 415 } \
412 } while (0) 416 } while (0)
413 417
414 #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ 418 #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \
415 asm volatile("1: mov"itype" %1,%"rtype"0\n" \ 419 asm volatile(ASM_STAC "\n" \
416 "2:\n" \ 420 "1: mov"itype" %1,%"rtype"0\n" \
421 "2: " ASM_CLAC "\n" \
417 _ASM_EXTABLE_EX(1b, 2b) \ 422 _ASM_EXTABLE_EX(1b, 2b) \
418 : ltype(x) : "m" (__m(addr))) 423 : ltype(x) : "m" (__m(addr)))
419 424
420 #define __put_user_nocheck(x, ptr, size) \ 425 #define __put_user_nocheck(x, ptr, size) \
421 ({ \ 426 ({ \
422 int __pu_err; \ 427 int __pu_err; \
423 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ 428 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
424 __pu_err; \ 429 __pu_err; \
425 }) 430 })
426 431
427 #define __get_user_nocheck(x, ptr, size) \ 432 #define __get_user_nocheck(x, ptr, size) \
428 ({ \ 433 ({ \
429 int __gu_err; \ 434 int __gu_err; \
430 unsigned long __gu_val; \ 435 unsigned long __gu_val; \
431 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ 436 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
432 (x) = (__force __typeof__(*(ptr)))__gu_val; \ 437 (x) = (__force __typeof__(*(ptr)))__gu_val; \
433 __gu_err; \ 438 __gu_err; \
434 }) 439 })
435 440
436 /* FIXME: this hack is definitely wrong -AK */ 441 /* FIXME: this hack is definitely wrong -AK */
437 struct __large_struct { unsigned long buf[100]; }; 442 struct __large_struct { unsigned long buf[100]; };
438 #define __m(x) (*(struct __large_struct __user *)(x)) 443 #define __m(x) (*(struct __large_struct __user *)(x))
439 444
440 /* 445 /*
441 * Tell gcc we read from memory instead of writing: this is because 446 * Tell gcc we read from memory instead of writing: this is because
442 * we do not write to any memory gcc knows about, so there are no 447 * we do not write to any memory gcc knows about, so there are no
443 * aliasing issues. 448 * aliasing issues.
444 */ 449 */
445 #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ 450 #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \
446 asm volatile("1: mov"itype" %"rtype"1,%2\n" \ 451 asm volatile(ASM_STAC "\n" \
447 "2:\n" \ 452 "1: mov"itype" %"rtype"1,%2\n" \
453 "2: " ASM_CLAC "\n" \
448 ".section .fixup,\"ax\"\n" \ 454 ".section .fixup,\"ax\"\n" \
449 "3: mov %3,%0\n" \ 455 "3: mov %3,%0\n" \
450 " jmp 2b\n" \ 456 " jmp 2b\n" \
451 ".previous\n" \ 457 ".previous\n" \
452 _ASM_EXTABLE(1b, 3b) \ 458 _ASM_EXTABLE(1b, 3b) \
453 : "=r"(err) \ 459 : "=r"(err) \
454 : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) 460 : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
455 461
456 #define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ 462 #define __put_user_asm_ex(x, addr, itype, rtype, ltype) \
457 asm volatile("1: mov"itype" %"rtype"0,%1\n" \ 463 asm volatile(ASM_STAC "\n" \
458 "2:\n" \ 464 "1: mov"itype" %"rtype"0,%1\n" \
465 "2: " ASM_CLAC "\n" \
459 _ASM_EXTABLE_EX(1b, 2b) \ 466 _ASM_EXTABLE_EX(1b, 2b) \
460 : : ltype(x), "m" (__m(addr))) 467 : : ltype(x), "m" (__m(addr)))
461 468
462 /* 469 /*
463 * uaccess_try and catch 470 * uaccess_try and catch
464 */ 471 */
465 #define uaccess_try do { \ 472 #define uaccess_try do { \
466 int prev_err = current_thread_info()->uaccess_err; \ 473 int prev_err = current_thread_info()->uaccess_err; \
467 current_thread_info()->uaccess_err = 0; \ 474 current_thread_info()->uaccess_err = 0; \
468 barrier(); 475 barrier();
469 476
470 #define uaccess_catch(err) \ 477 #define uaccess_catch(err) \
471 (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ 478 (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \
472 current_thread_info()->uaccess_err = prev_err; \ 479 current_thread_info()->uaccess_err = prev_err; \
473 } while (0) 480 } while (0)
474 481
475 /** 482 /**
476 * __get_user: - Get a simple variable from user space, with less checking. 483 * __get_user: - Get a simple variable from user space, with less checking.
477 * @x: Variable to store result. 484 * @x: Variable to store result.
478 * @ptr: Source address, in user space. 485 * @ptr: Source address, in user space.
479 * 486 *
480 * Context: User context only. This function may sleep. 487 * Context: User context only. This function may sleep.
481 * 488 *
482 * This macro copies a single simple variable from user space to kernel 489 * This macro copies a single simple variable from user space to kernel
483 * space. It supports simple types like char and int, but not larger 490 * space. It supports simple types like char and int, but not larger
484 * data types like structures or arrays. 491 * data types like structures or arrays.
485 * 492 *
486 * @ptr must have pointer-to-simple-variable type, and the result of 493 * @ptr must have pointer-to-simple-variable type, and the result of
487 * dereferencing @ptr must be assignable to @x without a cast. 494 * dereferencing @ptr must be assignable to @x without a cast.
488 * 495 *
489 * Caller must check the pointer with access_ok() before calling this 496 * Caller must check the pointer with access_ok() before calling this
490 * function. 497 * function.
491 * 498 *
492 * Returns zero on success, or -EFAULT on error. 499 * Returns zero on success, or -EFAULT on error.
493 * On error, the variable @x is set to zero. 500 * On error, the variable @x is set to zero.
494 */ 501 */
495 502
496 #define __get_user(x, ptr) \ 503 #define __get_user(x, ptr) \
497 __get_user_nocheck((x), (ptr), sizeof(*(ptr))) 504 __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
498 505
499 /** 506 /**
500 * __put_user: - Write a simple value into user space, with less checking. 507 * __put_user: - Write a simple value into user space, with less checking.
501 * @x: Value to copy to user space. 508 * @x: Value to copy to user space.
502 * @ptr: Destination address, in user space. 509 * @ptr: Destination address, in user space.
503 * 510 *
504 * Context: User context only. This function may sleep. 511 * Context: User context only. This function may sleep.
505 * 512 *
506 * This macro copies a single simple value from kernel space to user 513 * This macro copies a single simple value from kernel space to user
507 * space. It supports simple types like char and int, but not larger 514 * space. It supports simple types like char and int, but not larger
508 * data types like structures or arrays. 515 * data types like structures or arrays.
509 * 516 *
510 * @ptr must have pointer-to-simple-variable type, and @x must be assignable 517 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
511 * to the result of dereferencing @ptr. 518 * to the result of dereferencing @ptr.
512 * 519 *
513 * Caller must check the pointer with access_ok() before calling this 520 * Caller must check the pointer with access_ok() before calling this
514 * function. 521 * function.
515 * 522 *
516 * Returns zero on success, or -EFAULT on error. 523 * Returns zero on success, or -EFAULT on error.
517 */ 524 */
518 525
519 #define __put_user(x, ptr) \ 526 #define __put_user(x, ptr) \
520 __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) 527 __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
521 528
522 #define __get_user_unaligned __get_user 529 #define __get_user_unaligned __get_user
523 #define __put_user_unaligned __put_user 530 #define __put_user_unaligned __put_user
524 531
525 /* 532 /*
526 * {get|put}_user_try and catch 533 * {get|put}_user_try and catch
527 * 534 *
528 * get_user_try { 535 * get_user_try {
529 * get_user_ex(...); 536 * get_user_ex(...);
530 * } get_user_catch(err) 537 * } get_user_catch(err)
531 */ 538 */
532 #define get_user_try uaccess_try 539 #define get_user_try uaccess_try
533 #define get_user_catch(err) uaccess_catch(err) 540 #define get_user_catch(err) uaccess_catch(err)
534 541
535 #define get_user_ex(x, ptr) do { \ 542 #define get_user_ex(x, ptr) do { \
536 unsigned long __gue_val; \ 543 unsigned long __gue_val; \
537 __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ 544 __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \
538 (x) = (__force __typeof__(*(ptr)))__gue_val; \ 545 (x) = (__force __typeof__(*(ptr)))__gue_val; \
539 } while (0) 546 } while (0)
540 547
541 #ifdef CONFIG_X86_WP_WORKS_OK 548 #ifdef CONFIG_X86_WP_WORKS_OK
542 549
543 #define put_user_try uaccess_try 550 #define put_user_try uaccess_try
544 #define put_user_catch(err) uaccess_catch(err) 551 #define put_user_catch(err) uaccess_catch(err)
545 552
546 #define put_user_ex(x, ptr) \ 553 #define put_user_ex(x, ptr) \
547 __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) 554 __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
548 555
549 #else /* !CONFIG_X86_WP_WORKS_OK */ 556 #else /* !CONFIG_X86_WP_WORKS_OK */
550 557
551 #define put_user_try do { \ 558 #define put_user_try do { \
552 int __uaccess_err = 0; 559 int __uaccess_err = 0;
553 560
554 #define put_user_catch(err) \ 561 #define put_user_catch(err) \
555 (err) |= __uaccess_err; \ 562 (err) |= __uaccess_err; \
556 } while (0) 563 } while (0)
557 564
558 #define put_user_ex(x, ptr) do { \ 565 #define put_user_ex(x, ptr) do { \
559 __uaccess_err |= __put_user(x, ptr); \ 566 __uaccess_err |= __put_user(x, ptr); \
560 } while (0) 567 } while (0)
561 568
562 #endif /* CONFIG_X86_WP_WORKS_OK */ 569 #endif /* CONFIG_X86_WP_WORKS_OK */
563 570
564 extern unsigned long 571 extern unsigned long
565 copy_from_user_nmi(void *to, const void __user *from, unsigned long n); 572 copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
566 extern __must_check long 573 extern __must_check long
567 strncpy_from_user(char *dst, const char __user *src, long count); 574 strncpy_from_user(char *dst, const char __user *src, long count);
568 575
569 extern __must_check long strlen_user(const char __user *str); 576 extern __must_check long strlen_user(const char __user *str);
570 extern __must_check long strnlen_user(const char __user *str, long n); 577 extern __must_check long strnlen_user(const char __user *str, long n);
571 578
572 unsigned long __must_check clear_user(void __user *mem, unsigned long len); 579 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
573 unsigned long __must_check __clear_user(void __user *mem, unsigned long len); 580 unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
574 581
575 /* 582 /*
576 * movsl can be slow when source and dest are not both 8-byte aligned 583 * movsl can be slow when source and dest are not both 8-byte aligned
577 */ 584 */
578 #ifdef CONFIG_X86_INTEL_USERCOPY 585 #ifdef CONFIG_X86_INTEL_USERCOPY
579 extern struct movsl_mask { 586 extern struct movsl_mask {
580 int mask; 587 int mask;
581 } ____cacheline_aligned_in_smp movsl_mask; 588 } ____cacheline_aligned_in_smp movsl_mask;
582 #endif 589 #endif
583 590
584 #define ARCH_HAS_NOCACHE_UACCESS 1 591 #define ARCH_HAS_NOCACHE_UACCESS 1
585 592
586 #ifdef CONFIG_X86_32 593 #ifdef CONFIG_X86_32
587 # include "uaccess_32.h" 594 # include "uaccess_32.h"
588 #else 595 #else
589 # include "uaccess_64.h" 596 # include "uaccess_64.h"
590 #endif 597 #endif
591 598
592 #endif /* _ASM_X86_UACCESS_H */ 599 #endif /* _ASM_X86_UACCESS_H */
593 600
594 601
arch/x86/include/asm/xsave.h
1 #ifndef __ASM_X86_XSAVE_H 1 #ifndef __ASM_X86_XSAVE_H
2 #define __ASM_X86_XSAVE_H 2 #define __ASM_X86_XSAVE_H
3 3
4 #include <linux/types.h> 4 #include <linux/types.h>
5 #include <asm/processor.h> 5 #include <asm/processor.h>
6 6
7 #define XSTATE_CPUID 0x0000000d 7 #define XSTATE_CPUID 0x0000000d
8 8
9 #define XSTATE_FP 0x1 9 #define XSTATE_FP 0x1
10 #define XSTATE_SSE 0x2 10 #define XSTATE_SSE 0x2
11 #define XSTATE_YMM 0x4 11 #define XSTATE_YMM 0x4
12 12
13 #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) 13 #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
14 14
15 #define FXSAVE_SIZE 512 15 #define FXSAVE_SIZE 512
16 16
17 #define XSAVE_HDR_SIZE 64 17 #define XSAVE_HDR_SIZE 64
18 #define XSAVE_HDR_OFFSET FXSAVE_SIZE 18 #define XSAVE_HDR_OFFSET FXSAVE_SIZE
19 19
20 #define XSAVE_YMM_SIZE 256 20 #define XSAVE_YMM_SIZE 256
21 #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) 21 #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
22 22
23 /* 23 /*
24 * These are the features that the OS can handle currently. 24 * These are the features that the OS can handle currently.
25 */ 25 */
26 #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 26 #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
27 27
28 #ifdef CONFIG_X86_64 28 #ifdef CONFIG_X86_64
29 #define REX_PREFIX "0x48, " 29 #define REX_PREFIX "0x48, "
30 #else 30 #else
31 #define REX_PREFIX 31 #define REX_PREFIX
32 #endif 32 #endif
33 33
34 extern unsigned int xstate_size; 34 extern unsigned int xstate_size;
35 extern u64 pcntxt_mask; 35 extern u64 pcntxt_mask;
36 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; 36 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
37 37
38 extern void xsave_init(void); 38 extern void xsave_init(void);
39 extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); 39 extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
40 extern int init_fpu(struct task_struct *child); 40 extern int init_fpu(struct task_struct *child);
41 extern int check_for_xstate(struct i387_fxsave_struct __user *buf, 41 extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
42 void __user *fpstate, 42 void __user *fpstate,
43 struct _fpx_sw_bytes *sw); 43 struct _fpx_sw_bytes *sw);
44 44
45 static inline int fpu_xrstor_checking(struct fpu *fpu) 45 static inline int fpu_xrstor_checking(struct fpu *fpu)
46 { 46 {
47 struct xsave_struct *fx = &fpu->state->xsave; 47 struct xsave_struct *fx = &fpu->state->xsave;
48 int err; 48 int err;
49 49
50 asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" 50 asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
51 "2:\n" 51 "2:\n"
52 ".section .fixup,\"ax\"\n" 52 ".section .fixup,\"ax\"\n"
53 "3: movl $-1,%[err]\n" 53 "3: movl $-1,%[err]\n"
54 " jmp 2b\n" 54 " jmp 2b\n"
55 ".previous\n" 55 ".previous\n"
56 _ASM_EXTABLE(1b, 3b) 56 _ASM_EXTABLE(1b, 3b)
57 : [err] "=r" (err) 57 : [err] "=r" (err)
58 : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) 58 : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
59 : "memory"); 59 : "memory");
60 60
61 return err; 61 return err;
62 } 62 }
63 63
64 static inline int xsave_user(struct xsave_struct __user *buf) 64 static inline int xsave_user(struct xsave_struct __user *buf)
65 { 65 {
66 int err; 66 int err;
67 67
68 /* 68 /*
69 * Clear the xsave header first, so that reserved fields are 69 * Clear the xsave header first, so that reserved fields are
70 * initialized to zero. 70 * initialized to zero.
71 */ 71 */
72 err = __clear_user(&buf->xsave_hdr, 72 err = __clear_user(&buf->xsave_hdr,
73 sizeof(struct xsave_hdr_struct)); 73 sizeof(struct xsave_hdr_struct));
74 if (unlikely(err)) 74 if (unlikely(err))
75 return -EFAULT; 75 return -EFAULT;
76 76
77 __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" 77 __asm__ __volatile__(ASM_STAC "\n"
78 "2:\n" 78 "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
79 "2: " ASM_CLAC "\n"
79 ".section .fixup,\"ax\"\n" 80 ".section .fixup,\"ax\"\n"
80 "3: movl $-1,%[err]\n" 81 "3: movl $-1,%[err]\n"
81 " jmp 2b\n" 82 " jmp 2b\n"
82 ".previous\n" 83 ".previous\n"
83 _ASM_EXTABLE(1b,3b) 84 _ASM_EXTABLE(1b,3b)
84 : [err] "=r" (err) 85 : [err] "=r" (err)
85 : "D" (buf), "a" (-1), "d" (-1), "0" (0) 86 : "D" (buf), "a" (-1), "d" (-1), "0" (0)
86 : "memory"); 87 : "memory");
87 if (unlikely(err) && __clear_user(buf, xstate_size)) 88 if (unlikely(err) && __clear_user(buf, xstate_size))
88 err = -EFAULT; 89 err = -EFAULT;
89 /* No need to clear here because the caller clears USED_MATH */ 90 /* No need to clear here because the caller clears USED_MATH */
90 return err; 91 return err;
91 } 92 }
92 93
93 static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) 94 static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
94 { 95 {
95 int err; 96 int err;
96 struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); 97 struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
97 u32 lmask = mask; 98 u32 lmask = mask;
98 u32 hmask = mask >> 32; 99 u32 hmask = mask >> 32;
99 100
100 __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" 101 __asm__ __volatile__(ASM_STAC "\n"
101 "2:\n" 102 "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
103 "2: " ASM_CLAC "\n"
102 ".section .fixup,\"ax\"\n" 104 ".section .fixup,\"ax\"\n"
103 "3: movl $-1,%[err]\n" 105 "3: movl $-1,%[err]\n"
104 " jmp 2b\n" 106 " jmp 2b\n"
105 ".previous\n" 107 ".previous\n"
106 _ASM_EXTABLE(1b,3b) 108 _ASM_EXTABLE(1b,3b)
107 : [err] "=r" (err) 109 : [err] "=r" (err)
108 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) 110 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
109 : "memory"); /* memory required? */ 111 : "memory"); /* memory required? */
110 return err; 112 return err;
111 } 113 }
112 114
113 static inline void xrstor_state(struct xsave_struct *fx, u64 mask) 115 static inline void xrstor_state(struct xsave_struct *fx, u64 mask)
114 { 116 {
115 u32 lmask = mask; 117 u32 lmask = mask;
116 u32 hmask = mask >> 32; 118 u32 hmask = mask >> 32;
117 119
118 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" 120 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
119 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) 121 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
120 : "memory"); 122 : "memory");
121 } 123 }
122 124
123 static inline void xsave_state(struct xsave_struct *fx, u64 mask) 125 static inline void xsave_state(struct xsave_struct *fx, u64 mask)
124 { 126 {
125 u32 lmask = mask; 127 u32 lmask = mask;
126 u32 hmask = mask >> 32; 128 u32 hmask = mask >> 32;
127 129
128 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" 130 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
129 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) 131 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
130 : "memory"); 132 : "memory");
131 } 133 }
132 134
133 static inline void fpu_xsave(struct fpu *fpu) 135 static inline void fpu_xsave(struct fpu *fpu)
134 { 136 {
135 /* This, however, we can work around by forcing the compiler to select 137 /* This, however, we can work around by forcing the compiler to select
136 an addressing mode that doesn't require extended registers. */ 138 an addressing mode that doesn't require extended registers. */
137 alternative_input( 139 alternative_input(
138 ".byte " REX_PREFIX "0x0f,0xae,0x27", 140 ".byte " REX_PREFIX "0x0f,0xae,0x27",
139 ".byte " REX_PREFIX "0x0f,0xae,0x37", 141 ".byte " REX_PREFIX "0x0f,0xae,0x37",
140 X86_FEATURE_XSAVEOPT, 142 X86_FEATURE_XSAVEOPT,
141 [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) : 143 [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) :
142 "memory"); 144 "memory");
143 } 145 }
144 #endif 146 #endif
145 147
arch/x86/kernel/cpu/common.c
1 #include <linux/bootmem.h> 1 #include <linux/bootmem.h>
2 #include <linux/linkage.h> 2 #include <linux/linkage.h>
3 #include <linux/bitops.h> 3 #include <linux/bitops.h>
4 #include <linux/kernel.h> 4 #include <linux/kernel.h>
5 #include <linux/module.h> 5 #include <linux/module.h>
6 #include <linux/percpu.h> 6 #include <linux/percpu.h>
7 #include <linux/string.h> 7 #include <linux/string.h>
8 #include <linux/delay.h> 8 #include <linux/delay.h>
9 #include <linux/sched.h> 9 #include <linux/sched.h>
10 #include <linux/init.h> 10 #include <linux/init.h>
11 #include <linux/kgdb.h> 11 #include <linux/kgdb.h>
12 #include <linux/smp.h> 12 #include <linux/smp.h>
13 #include <linux/io.h> 13 #include <linux/io.h>
14 14
15 #include <asm/stackprotector.h> 15 #include <asm/stackprotector.h>
16 #include <asm/perf_event.h> 16 #include <asm/perf_event.h>
17 #include <asm/mmu_context.h> 17 #include <asm/mmu_context.h>
18 #include <asm/archrandom.h> 18 #include <asm/archrandom.h>
19 #include <asm/hypervisor.h> 19 #include <asm/hypervisor.h>
20 #include <asm/processor.h> 20 #include <asm/processor.h>
21 #include <asm/debugreg.h> 21 #include <asm/debugreg.h>
22 #include <asm/sections.h> 22 #include <asm/sections.h>
23 #include <linux/topology.h> 23 #include <linux/topology.h>
24 #include <linux/cpumask.h> 24 #include <linux/cpumask.h>
25 #include <asm/pgtable.h> 25 #include <asm/pgtable.h>
26 #include <linux/atomic.h> 26 #include <linux/atomic.h>
27 #include <asm/proto.h> 27 #include <asm/proto.h>
28 #include <asm/setup.h> 28 #include <asm/setup.h>
29 #include <asm/apic.h> 29 #include <asm/apic.h>
30 #include <asm/desc.h> 30 #include <asm/desc.h>
31 #include <asm/i387.h> 31 #include <asm/i387.h>
32 #include <asm/fpu-internal.h> 32 #include <asm/fpu-internal.h>
33 #include <asm/mtrr.h> 33 #include <asm/mtrr.h>
34 #include <linux/numa.h> 34 #include <linux/numa.h>
35 #include <asm/asm.h> 35 #include <asm/asm.h>
36 #include <asm/cpu.h> 36 #include <asm/cpu.h>
37 #include <asm/mce.h> 37 #include <asm/mce.h>
38 #include <asm/msr.h> 38 #include <asm/msr.h>
39 #include <asm/pat.h> 39 #include <asm/pat.h>
40 40
41 #ifdef CONFIG_X86_LOCAL_APIC 41 #ifdef CONFIG_X86_LOCAL_APIC
42 #include <asm/uv/uv.h> 42 #include <asm/uv/uv.h>
43 #endif 43 #endif
44 44
45 #include "cpu.h" 45 #include "cpu.h"
46 46
47 /* all of these masks are initialized in setup_cpu_local_masks() */ 47 /* all of these masks are initialized in setup_cpu_local_masks() */
48 cpumask_var_t cpu_initialized_mask; 48 cpumask_var_t cpu_initialized_mask;
49 cpumask_var_t cpu_callout_mask; 49 cpumask_var_t cpu_callout_mask;
50 cpumask_var_t cpu_callin_mask; 50 cpumask_var_t cpu_callin_mask;
51 51
52 /* representing cpus for which sibling maps can be computed */ 52 /* representing cpus for which sibling maps can be computed */
53 cpumask_var_t cpu_sibling_setup_mask; 53 cpumask_var_t cpu_sibling_setup_mask;
54 54
55 /* correctly size the local cpu masks */ 55 /* correctly size the local cpu masks */
56 void __init setup_cpu_local_masks(void) 56 void __init setup_cpu_local_masks(void)
57 { 57 {
58 alloc_bootmem_cpumask_var(&cpu_initialized_mask); 58 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
59 alloc_bootmem_cpumask_var(&cpu_callin_mask); 59 alloc_bootmem_cpumask_var(&cpu_callin_mask);
60 alloc_bootmem_cpumask_var(&cpu_callout_mask); 60 alloc_bootmem_cpumask_var(&cpu_callout_mask);
61 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 61 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
62 } 62 }
63 63
64 static void __cpuinit default_init(struct cpuinfo_x86 *c) 64 static void __cpuinit default_init(struct cpuinfo_x86 *c)
65 { 65 {
66 #ifdef CONFIG_X86_64 66 #ifdef CONFIG_X86_64
67 cpu_detect_cache_sizes(c); 67 cpu_detect_cache_sizes(c);
68 #else 68 #else
69 /* Not much we can do here... */ 69 /* Not much we can do here... */
70 /* Check if at least it has cpuid */ 70 /* Check if at least it has cpuid */
71 if (c->cpuid_level == -1) { 71 if (c->cpuid_level == -1) {
72 /* No cpuid. It must be an ancient CPU */ 72 /* No cpuid. It must be an ancient CPU */
73 if (c->x86 == 4) 73 if (c->x86 == 4)
74 strcpy(c->x86_model_id, "486"); 74 strcpy(c->x86_model_id, "486");
75 else if (c->x86 == 3) 75 else if (c->x86 == 3)
76 strcpy(c->x86_model_id, "386"); 76 strcpy(c->x86_model_id, "386");
77 } 77 }
78 #endif 78 #endif
79 } 79 }
80 80
81 static const struct cpu_dev __cpuinitconst default_cpu = { 81 static const struct cpu_dev __cpuinitconst default_cpu = {
82 .c_init = default_init, 82 .c_init = default_init,
83 .c_vendor = "Unknown", 83 .c_vendor = "Unknown",
84 .c_x86_vendor = X86_VENDOR_UNKNOWN, 84 .c_x86_vendor = X86_VENDOR_UNKNOWN,
85 }; 85 };
86 86
87 static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; 87 static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
88 88
89 DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 89 DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
90 #ifdef CONFIG_X86_64 90 #ifdef CONFIG_X86_64
91 /* 91 /*
92 * We need valid kernel segments for data and code in long mode too 92 * We need valid kernel segments for data and code in long mode too
93 * IRET will check the segment types kkeil 2000/10/28 93 * IRET will check the segment types kkeil 2000/10/28
94 * Also sysret mandates a special GDT layout 94 * Also sysret mandates a special GDT layout
95 * 95 *
96 * TLS descriptors are currently at a different place compared to i386. 96 * TLS descriptors are currently at a different place compared to i386.
97 * Hopefully nobody expects them at a fixed place (Wine?) 97 * Hopefully nobody expects them at a fixed place (Wine?)
98 */ 98 */
99 [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), 99 [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
100 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), 100 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
101 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), 101 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
102 [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), 102 [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
103 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), 103 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
104 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), 104 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
105 #else 105 #else
106 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), 106 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
107 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), 107 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
108 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), 108 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
109 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), 109 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
110 /* 110 /*
111 * Segments used for calling PnP BIOS have byte granularity. 111 * Segments used for calling PnP BIOS have byte granularity.
112 * They code segments and data segments have fixed 64k limits, 112 * They code segments and data segments have fixed 64k limits,
113 * the transfer segment sizes are set at run time. 113 * the transfer segment sizes are set at run time.
114 */ 114 */
115 /* 32-bit code */ 115 /* 32-bit code */
116 [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), 116 [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
117 /* 16-bit code */ 117 /* 16-bit code */
118 [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), 118 [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
119 /* 16-bit data */ 119 /* 16-bit data */
120 [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), 120 [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff),
121 /* 16-bit data */ 121 /* 16-bit data */
122 [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), 122 [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0),
123 /* 16-bit data */ 123 /* 16-bit data */
124 [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), 124 [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0),
125 /* 125 /*
126 * The APM segments have byte granularity and their bases 126 * The APM segments have byte granularity and their bases
127 * are set at run time. All have 64k limits. 127 * are set at run time. All have 64k limits.
128 */ 128 */
129 /* 32-bit code */ 129 /* 32-bit code */
130 [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), 130 [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
131 /* 16-bit code */ 131 /* 16-bit code */
132 [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), 132 [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
133 /* data */ 133 /* data */
134 [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), 134 [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff),
135 135
136 [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), 136 [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
137 [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), 137 [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
138 GDT_STACK_CANARY_INIT 138 GDT_STACK_CANARY_INIT
139 #endif 139 #endif
140 } }; 140 } };
141 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 141 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
142 142
143 static int __init x86_xsave_setup(char *s) 143 static int __init x86_xsave_setup(char *s)
144 { 144 {
145 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 145 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
146 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 146 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
147 setup_clear_cpu_cap(X86_FEATURE_AVX); 147 setup_clear_cpu_cap(X86_FEATURE_AVX);
148 setup_clear_cpu_cap(X86_FEATURE_AVX2); 148 setup_clear_cpu_cap(X86_FEATURE_AVX2);
149 return 1; 149 return 1;
150 } 150 }
151 __setup("noxsave", x86_xsave_setup); 151 __setup("noxsave", x86_xsave_setup);
152 152
153 static int __init x86_xsaveopt_setup(char *s) 153 static int __init x86_xsaveopt_setup(char *s)
154 { 154 {
155 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 155 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
156 return 1; 156 return 1;
157 } 157 }
158 __setup("noxsaveopt", x86_xsaveopt_setup); 158 __setup("noxsaveopt", x86_xsaveopt_setup);
159 159
160 #ifdef CONFIG_X86_32 160 #ifdef CONFIG_X86_32
161 static int cachesize_override __cpuinitdata = -1; 161 static int cachesize_override __cpuinitdata = -1;
162 static int disable_x86_serial_nr __cpuinitdata = 1; 162 static int disable_x86_serial_nr __cpuinitdata = 1;
163 163
164 static int __init cachesize_setup(char *str) 164 static int __init cachesize_setup(char *str)
165 { 165 {
166 get_option(&str, &cachesize_override); 166 get_option(&str, &cachesize_override);
167 return 1; 167 return 1;
168 } 168 }
169 __setup("cachesize=", cachesize_setup); 169 __setup("cachesize=", cachesize_setup);
170 170
171 static int __init x86_fxsr_setup(char *s) 171 static int __init x86_fxsr_setup(char *s)
172 { 172 {
173 setup_clear_cpu_cap(X86_FEATURE_FXSR); 173 setup_clear_cpu_cap(X86_FEATURE_FXSR);
174 setup_clear_cpu_cap(X86_FEATURE_XMM); 174 setup_clear_cpu_cap(X86_FEATURE_XMM);
175 return 1; 175 return 1;
176 } 176 }
177 __setup("nofxsr", x86_fxsr_setup); 177 __setup("nofxsr", x86_fxsr_setup);
178 178
179 static int __init x86_sep_setup(char *s) 179 static int __init x86_sep_setup(char *s)
180 { 180 {
181 setup_clear_cpu_cap(X86_FEATURE_SEP); 181 setup_clear_cpu_cap(X86_FEATURE_SEP);
182 return 1; 182 return 1;
183 } 183 }
184 __setup("nosep", x86_sep_setup); 184 __setup("nosep", x86_sep_setup);
185 185
186 /* Standard macro to see if a specific flag is changeable */ 186 /* Standard macro to see if a specific flag is changeable */
187 static inline int flag_is_changeable_p(u32 flag) 187 static inline int flag_is_changeable_p(u32 flag)
188 { 188 {
189 u32 f1, f2; 189 u32 f1, f2;
190 190
191 /* 191 /*
192 * Cyrix and IDT cpus allow disabling of CPUID 192 * Cyrix and IDT cpus allow disabling of CPUID
193 * so the code below may return different results 193 * so the code below may return different results
194 * when it is executed before and after enabling 194 * when it is executed before and after enabling
195 * the CPUID. Add "volatile" to not allow gcc to 195 * the CPUID. Add "volatile" to not allow gcc to
196 * optimize the subsequent calls to this function. 196 * optimize the subsequent calls to this function.
197 */ 197 */
198 asm volatile ("pushfl \n\t" 198 asm volatile ("pushfl \n\t"
199 "pushfl \n\t" 199 "pushfl \n\t"
200 "popl %0 \n\t" 200 "popl %0 \n\t"
201 "movl %0, %1 \n\t" 201 "movl %0, %1 \n\t"
202 "xorl %2, %0 \n\t" 202 "xorl %2, %0 \n\t"
203 "pushl %0 \n\t" 203 "pushl %0 \n\t"
204 "popfl \n\t" 204 "popfl \n\t"
205 "pushfl \n\t" 205 "pushfl \n\t"
206 "popl %0 \n\t" 206 "popl %0 \n\t"
207 "popfl \n\t" 207 "popfl \n\t"
208 208
209 : "=&r" (f1), "=&r" (f2) 209 : "=&r" (f1), "=&r" (f2)
210 : "ir" (flag)); 210 : "ir" (flag));
211 211
212 return ((f1^f2) & flag) != 0; 212 return ((f1^f2) & flag) != 0;
213 } 213 }
214 214
215 /* Probe for the CPUID instruction */ 215 /* Probe for the CPUID instruction */
216 static int __cpuinit have_cpuid_p(void) 216 static int __cpuinit have_cpuid_p(void)
217 { 217 {
218 return flag_is_changeable_p(X86_EFLAGS_ID); 218 return flag_is_changeable_p(X86_EFLAGS_ID);
219 } 219 }
220 220
221 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 221 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
222 { 222 {
223 unsigned long lo, hi; 223 unsigned long lo, hi;
224 224
225 if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) 225 if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
226 return; 226 return;
227 227
228 /* Disable processor serial number: */ 228 /* Disable processor serial number: */
229 229
230 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 230 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
231 lo |= 0x200000; 231 lo |= 0x200000;
232 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 232 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
233 233
234 printk(KERN_NOTICE "CPU serial number disabled.\n"); 234 printk(KERN_NOTICE "CPU serial number disabled.\n");
235 clear_cpu_cap(c, X86_FEATURE_PN); 235 clear_cpu_cap(c, X86_FEATURE_PN);
236 236
237 /* Disabling the serial number may affect the cpuid level */ 237 /* Disabling the serial number may affect the cpuid level */
238 c->cpuid_level = cpuid_eax(0); 238 c->cpuid_level = cpuid_eax(0);
239 } 239 }
240 240
241 static int __init x86_serial_nr_setup(char *s) 241 static int __init x86_serial_nr_setup(char *s)
242 { 242 {
243 disable_x86_serial_nr = 0; 243 disable_x86_serial_nr = 0;
244 return 1; 244 return 1;
245 } 245 }
246 __setup("serialnumber", x86_serial_nr_setup); 246 __setup("serialnumber", x86_serial_nr_setup);
247 #else 247 #else
248 static inline int flag_is_changeable_p(u32 flag) 248 static inline int flag_is_changeable_p(u32 flag)
249 { 249 {
250 return 1; 250 return 1;
251 } 251 }
252 /* Probe for the CPUID instruction */ 252 /* Probe for the CPUID instruction */
253 static inline int have_cpuid_p(void) 253 static inline int have_cpuid_p(void)
254 { 254 {
255 return 1; 255 return 1;
256 } 256 }
257 static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 257 static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
258 { 258 {
259 } 259 }
260 #endif 260 #endif
261 261
262 static int disable_smep __cpuinitdata; 262 static int disable_smep __cpuinitdata;
263 static __init int setup_disable_smep(char *arg) 263 static __init int setup_disable_smep(char *arg)
264 { 264 {
265 disable_smep = 1; 265 disable_smep = 1;
266 return 1; 266 return 1;
267 } 267 }
268 __setup("nosmep", setup_disable_smep); 268 __setup("nosmep", setup_disable_smep);
269 269
270 static __cpuinit void setup_smep(struct cpuinfo_x86 *c) 270 static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
271 { 271 {
272 if (cpu_has(c, X86_FEATURE_SMEP)) { 272 if (cpu_has(c, X86_FEATURE_SMEP)) {
273 if (unlikely(disable_smep)) { 273 if (unlikely(disable_smep)) {
274 setup_clear_cpu_cap(X86_FEATURE_SMEP); 274 setup_clear_cpu_cap(X86_FEATURE_SMEP);
275 clear_in_cr4(X86_CR4_SMEP); 275 clear_in_cr4(X86_CR4_SMEP);
276 } else 276 } else
277 set_in_cr4(X86_CR4_SMEP); 277 set_in_cr4(X86_CR4_SMEP);
278 } 278 }
279 } 279 }
280 280
281 /* 281 /*
282 * Some CPU features depend on higher CPUID levels, which may not always 282 * Some CPU features depend on higher CPUID levels, which may not always
283 * be available due to CPUID level capping or broken virtualization 283 * be available due to CPUID level capping or broken virtualization
284 * software. Add those features to this table to auto-disable them. 284 * software. Add those features to this table to auto-disable them.
285 */ 285 */
286 struct cpuid_dependent_feature { 286 struct cpuid_dependent_feature {
287 u32 feature; 287 u32 feature;
288 u32 level; 288 u32 level;
289 }; 289 };
290 290
291 static const struct cpuid_dependent_feature __cpuinitconst 291 static const struct cpuid_dependent_feature __cpuinitconst
292 cpuid_dependent_features[] = { 292 cpuid_dependent_features[] = {
293 { X86_FEATURE_MWAIT, 0x00000005 }, 293 { X86_FEATURE_MWAIT, 0x00000005 },
294 { X86_FEATURE_DCA, 0x00000009 }, 294 { X86_FEATURE_DCA, 0x00000009 },
295 { X86_FEATURE_XSAVE, 0x0000000d }, 295 { X86_FEATURE_XSAVE, 0x0000000d },
296 { 0, 0 } 296 { 0, 0 }
297 }; 297 };
298 298
299 static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) 299 static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
300 { 300 {
301 const struct cpuid_dependent_feature *df; 301 const struct cpuid_dependent_feature *df;
302 302
303 for (df = cpuid_dependent_features; df->feature; df++) { 303 for (df = cpuid_dependent_features; df->feature; df++) {
304 304
305 if (!cpu_has(c, df->feature)) 305 if (!cpu_has(c, df->feature))
306 continue; 306 continue;
307 /* 307 /*
308 * Note: cpuid_level is set to -1 if unavailable, but 308 * Note: cpuid_level is set to -1 if unavailable, but
309 * extended_extended_level is set to 0 if unavailable 309 * extended_extended_level is set to 0 if unavailable
310 * and the legitimate extended levels are all negative 310 * and the legitimate extended levels are all negative
311 * when signed; hence the weird messing around with 311 * when signed; hence the weird messing around with
312 * signs here... 312 * signs here...
313 */ 313 */
314 if (!((s32)df->level < 0 ? 314 if (!((s32)df->level < 0 ?
315 (u32)df->level > (u32)c->extended_cpuid_level : 315 (u32)df->level > (u32)c->extended_cpuid_level :
316 (s32)df->level > (s32)c->cpuid_level)) 316 (s32)df->level > (s32)c->cpuid_level))
317 continue; 317 continue;
318 318
319 clear_cpu_cap(c, df->feature); 319 clear_cpu_cap(c, df->feature);
320 if (!warn) 320 if (!warn)
321 continue; 321 continue;
322 322
323 printk(KERN_WARNING 323 printk(KERN_WARNING
324 "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", 324 "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
325 x86_cap_flags[df->feature], df->level); 325 x86_cap_flags[df->feature], df->level);
326 } 326 }
327 } 327 }
328 328
329 /* 329 /*
330 * Naming convention should be: <Name> [(<Codename>)] 330 * Naming convention should be: <Name> [(<Codename>)]
331 * This table only is used unless init_<vendor>() below doesn't set it; 331 * This table only is used unless init_<vendor>() below doesn't set it;
332 * in particular, if CPUID levels 0x80000002..4 are supported, this 332 * in particular, if CPUID levels 0x80000002..4 are supported, this
333 * isn't used 333 * isn't used
334 */ 334 */
335 335
336 /* Look up CPU names by table lookup. */ 336 /* Look up CPU names by table lookup. */
337 static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) 337 static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
338 { 338 {
339 const struct cpu_model_info *info; 339 const struct cpu_model_info *info;
340 340
341 if (c->x86_model >= 16) 341 if (c->x86_model >= 16)
342 return NULL; /* Range check */ 342 return NULL; /* Range check */
343 343
344 if (!this_cpu) 344 if (!this_cpu)
345 return NULL; 345 return NULL;
346 346
347 info = this_cpu->c_models; 347 info = this_cpu->c_models;
348 348
349 while (info && info->family) { 349 while (info && info->family) {
350 if (info->family == c->x86) 350 if (info->family == c->x86)
351 return info->model_names[c->x86_model]; 351 return info->model_names[c->x86_model];
352 info++; 352 info++;
353 } 353 }
354 return NULL; /* Not found */ 354 return NULL; /* Not found */
355 } 355 }
356 356
357 __u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; 357 __u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
358 __u32 cpu_caps_set[NCAPINTS] __cpuinitdata; 358 __u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
359 359
360 void load_percpu_segment(int cpu) 360 void load_percpu_segment(int cpu)
361 { 361 {
362 #ifdef CONFIG_X86_32 362 #ifdef CONFIG_X86_32
363 loadsegment(fs, __KERNEL_PERCPU); 363 loadsegment(fs, __KERNEL_PERCPU);
364 #else 364 #else
365 loadsegment(gs, 0); 365 loadsegment(gs, 0);
366 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); 366 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
367 #endif 367 #endif
368 load_stack_canary_segment(); 368 load_stack_canary_segment();
369 } 369 }
370 370
371 /* 371 /*
372 * Current gdt points %fs at the "master" per-cpu area: after this, 372 * Current gdt points %fs at the "master" per-cpu area: after this,
373 * it's on the real one. 373 * it's on the real one.
374 */ 374 */
375 void switch_to_new_gdt(int cpu) 375 void switch_to_new_gdt(int cpu)
376 { 376 {
377 struct desc_ptr gdt_descr; 377 struct desc_ptr gdt_descr;
378 378
379 gdt_descr.address = (long)get_cpu_gdt_table(cpu); 379 gdt_descr.address = (long)get_cpu_gdt_table(cpu);
380 gdt_descr.size = GDT_SIZE - 1; 380 gdt_descr.size = GDT_SIZE - 1;
381 load_gdt(&gdt_descr); 381 load_gdt(&gdt_descr);
382 /* Reload the per-cpu base */ 382 /* Reload the per-cpu base */
383 383
384 load_percpu_segment(cpu); 384 load_percpu_segment(cpu);
385 } 385 }
386 386
387 static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 387 static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
388 388
389 static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 389 static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
390 { 390 {
391 unsigned int *v; 391 unsigned int *v;
392 char *p, *q; 392 char *p, *q;
393 393
394 if (c->extended_cpuid_level < 0x80000004) 394 if (c->extended_cpuid_level < 0x80000004)
395 return; 395 return;
396 396
397 v = (unsigned int *)c->x86_model_id; 397 v = (unsigned int *)c->x86_model_id;
398 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); 398 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
399 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); 399 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
400 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); 400 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
401 c->x86_model_id[48] = 0; 401 c->x86_model_id[48] = 0;
402 402
403 /* 403 /*
404 * Intel chips right-justify this string for some dumb reason; 404 * Intel chips right-justify this string for some dumb reason;
405 * undo that brain damage: 405 * undo that brain damage:
406 */ 406 */
407 p = q = &c->x86_model_id[0]; 407 p = q = &c->x86_model_id[0];
408 while (*p == ' ') 408 while (*p == ' ')
409 p++; 409 p++;
410 if (p != q) { 410 if (p != q) {
411 while (*p) 411 while (*p)
412 *q++ = *p++; 412 *q++ = *p++;
413 while (q <= &c->x86_model_id[48]) 413 while (q <= &c->x86_model_id[48])
414 *q++ = '\0'; /* Zero-pad the rest */ 414 *q++ = '\0'; /* Zero-pad the rest */
415 } 415 }
416 } 416 }
417 417
418 void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) 418 void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
419 { 419 {
420 unsigned int n, dummy, ebx, ecx, edx, l2size; 420 unsigned int n, dummy, ebx, ecx, edx, l2size;
421 421
422 n = c->extended_cpuid_level; 422 n = c->extended_cpuid_level;
423 423
424 if (n >= 0x80000005) { 424 if (n >= 0x80000005) {
425 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); 425 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
426 c->x86_cache_size = (ecx>>24) + (edx>>24); 426 c->x86_cache_size = (ecx>>24) + (edx>>24);
427 #ifdef CONFIG_X86_64 427 #ifdef CONFIG_X86_64
428 /* On K8 L1 TLB is inclusive, so don't count it */ 428 /* On K8 L1 TLB is inclusive, so don't count it */
429 c->x86_tlbsize = 0; 429 c->x86_tlbsize = 0;
430 #endif 430 #endif
431 } 431 }
432 432
433 if (n < 0x80000006) /* Some chips just has a large L1. */ 433 if (n < 0x80000006) /* Some chips just has a large L1. */
434 return; 434 return;
435 435
436 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); 436 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
437 l2size = ecx >> 16; 437 l2size = ecx >> 16;
438 438
439 #ifdef CONFIG_X86_64 439 #ifdef CONFIG_X86_64
440 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); 440 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
441 #else 441 #else
442 /* do processor-specific cache resizing */ 442 /* do processor-specific cache resizing */
443 if (this_cpu->c_size_cache) 443 if (this_cpu->c_size_cache)
444 l2size = this_cpu->c_size_cache(c, l2size); 444 l2size = this_cpu->c_size_cache(c, l2size);
445 445
446 /* Allow user to override all this if necessary. */ 446 /* Allow user to override all this if necessary. */
447 if (cachesize_override != -1) 447 if (cachesize_override != -1)
448 l2size = cachesize_override; 448 l2size = cachesize_override;
449 449
450 if (l2size == 0) 450 if (l2size == 0)
451 return; /* Again, no L2 cache is possible */ 451 return; /* Again, no L2 cache is possible */
452 #endif 452 #endif
453 453
454 c->x86_cache_size = l2size; 454 c->x86_cache_size = l2size;
455 } 455 }
456 456
457 u16 __read_mostly tlb_lli_4k[NR_INFO]; 457 u16 __read_mostly tlb_lli_4k[NR_INFO];
458 u16 __read_mostly tlb_lli_2m[NR_INFO]; 458 u16 __read_mostly tlb_lli_2m[NR_INFO];
459 u16 __read_mostly tlb_lli_4m[NR_INFO]; 459 u16 __read_mostly tlb_lli_4m[NR_INFO];
460 u16 __read_mostly tlb_lld_4k[NR_INFO]; 460 u16 __read_mostly tlb_lld_4k[NR_INFO];
461 u16 __read_mostly tlb_lld_2m[NR_INFO]; 461 u16 __read_mostly tlb_lld_2m[NR_INFO];
462 u16 __read_mostly tlb_lld_4m[NR_INFO]; 462 u16 __read_mostly tlb_lld_4m[NR_INFO];
463 463
464 /* 464 /*
465 * tlb_flushall_shift shows the balance point in replacing cr3 write 465 * tlb_flushall_shift shows the balance point in replacing cr3 write
466 * with multiple 'invlpg'. It will do this replacement when 466 * with multiple 'invlpg'. It will do this replacement when
467 * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. 467 * flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
468 * If tlb_flushall_shift is -1, means the replacement will be disabled. 468 * If tlb_flushall_shift is -1, means the replacement will be disabled.
469 */ 469 */
470 s8 __read_mostly tlb_flushall_shift = -1; 470 s8 __read_mostly tlb_flushall_shift = -1;
471 471
472 void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) 472 void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
473 { 473 {
474 if (this_cpu->c_detect_tlb) 474 if (this_cpu->c_detect_tlb)
475 this_cpu->c_detect_tlb(c); 475 this_cpu->c_detect_tlb(c);
476 476
477 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ 477 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
478 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ 478 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
479 "tlb_flushall_shift is 0x%x\n", 479 "tlb_flushall_shift is 0x%x\n",
480 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], 480 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
481 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], 481 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
482 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], 482 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
483 tlb_flushall_shift); 483 tlb_flushall_shift);
484 } 484 }
485 485
486 void __cpuinit detect_ht(struct cpuinfo_x86 *c) 486 void __cpuinit detect_ht(struct cpuinfo_x86 *c)
487 { 487 {
488 #ifdef CONFIG_X86_HT 488 #ifdef CONFIG_X86_HT
489 u32 eax, ebx, ecx, edx; 489 u32 eax, ebx, ecx, edx;
490 int index_msb, core_bits; 490 int index_msb, core_bits;
491 static bool printed; 491 static bool printed;
492 492
493 if (!cpu_has(c, X86_FEATURE_HT)) 493 if (!cpu_has(c, X86_FEATURE_HT))
494 return; 494 return;
495 495
496 if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) 496 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
497 goto out; 497 goto out;
498 498
499 if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) 499 if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
500 return; 500 return;
501 501
502 cpuid(1, &eax, &ebx, &ecx, &edx); 502 cpuid(1, &eax, &ebx, &ecx, &edx);
503 503
504 smp_num_siblings = (ebx & 0xff0000) >> 16; 504 smp_num_siblings = (ebx & 0xff0000) >> 16;
505 505
506 if (smp_num_siblings == 1) { 506 if (smp_num_siblings == 1) {
507 printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); 507 printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
508 goto out; 508 goto out;
509 } 509 }
510 510
511 if (smp_num_siblings <= 1) 511 if (smp_num_siblings <= 1)
512 goto out; 512 goto out;
513 513
514 index_msb = get_count_order(smp_num_siblings); 514 index_msb = get_count_order(smp_num_siblings);
515 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); 515 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
516 516
517 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 517 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
518 518
519 index_msb = get_count_order(smp_num_siblings); 519 index_msb = get_count_order(smp_num_siblings);
520 520
521 core_bits = get_count_order(c->x86_max_cores); 521 core_bits = get_count_order(c->x86_max_cores);
522 522
523 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & 523 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
524 ((1 << core_bits) - 1); 524 ((1 << core_bits) - 1);
525 525
526 out: 526 out:
527 if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { 527 if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
528 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 528 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
529 c->phys_proc_id); 529 c->phys_proc_id);
530 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 530 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
531 c->cpu_core_id); 531 c->cpu_core_id);
532 printed = 1; 532 printed = 1;
533 } 533 }
534 #endif 534 #endif
535 } 535 }
536 536
537 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) 537 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
538 { 538 {
539 char *v = c->x86_vendor_id; 539 char *v = c->x86_vendor_id;
540 int i; 540 int i;
541 541
542 for (i = 0; i < X86_VENDOR_NUM; i++) { 542 for (i = 0; i < X86_VENDOR_NUM; i++) {
543 if (!cpu_devs[i]) 543 if (!cpu_devs[i])
544 break; 544 break;
545 545
546 if (!strcmp(v, cpu_devs[i]->c_ident[0]) || 546 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
547 (cpu_devs[i]->c_ident[1] && 547 (cpu_devs[i]->c_ident[1] &&
548 !strcmp(v, cpu_devs[i]->c_ident[1]))) { 548 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
549 549
550 this_cpu = cpu_devs[i]; 550 this_cpu = cpu_devs[i];
551 c->x86_vendor = this_cpu->c_x86_vendor; 551 c->x86_vendor = this_cpu->c_x86_vendor;
552 return; 552 return;
553 } 553 }
554 } 554 }
555 555
556 printk_once(KERN_ERR 556 printk_once(KERN_ERR
557 "CPU: vendor_id '%s' unknown, using generic init.\n" \ 557 "CPU: vendor_id '%s' unknown, using generic init.\n" \
558 "CPU: Your system may be unstable.\n", v); 558 "CPU: Your system may be unstable.\n", v);
559 559
560 c->x86_vendor = X86_VENDOR_UNKNOWN; 560 c->x86_vendor = X86_VENDOR_UNKNOWN;
561 this_cpu = &default_cpu; 561 this_cpu = &default_cpu;
562 } 562 }
563 563
564 void __cpuinit cpu_detect(struct cpuinfo_x86 *c) 564 void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
565 { 565 {
566 /* Get vendor name */ 566 /* Get vendor name */
567 cpuid(0x00000000, (unsigned int *)&c->cpuid_level, 567 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
568 (unsigned int *)&c->x86_vendor_id[0], 568 (unsigned int *)&c->x86_vendor_id[0],
569 (unsigned int *)&c->x86_vendor_id[8], 569 (unsigned int *)&c->x86_vendor_id[8],
570 (unsigned int *)&c->x86_vendor_id[4]); 570 (unsigned int *)&c->x86_vendor_id[4]);
571 571
572 c->x86 = 4; 572 c->x86 = 4;
573 /* Intel-defined flags: level 0x00000001 */ 573 /* Intel-defined flags: level 0x00000001 */
574 if (c->cpuid_level >= 0x00000001) { 574 if (c->cpuid_level >= 0x00000001) {
575 u32 junk, tfms, cap0, misc; 575 u32 junk, tfms, cap0, misc;
576 576
577 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 577 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
578 c->x86 = (tfms >> 8) & 0xf; 578 c->x86 = (tfms >> 8) & 0xf;
579 c->x86_model = (tfms >> 4) & 0xf; 579 c->x86_model = (tfms >> 4) & 0xf;
580 c->x86_mask = tfms & 0xf; 580 c->x86_mask = tfms & 0xf;
581 581
582 if (c->x86 == 0xf) 582 if (c->x86 == 0xf)
583 c->x86 += (tfms >> 20) & 0xff; 583 c->x86 += (tfms >> 20) & 0xff;
584 if (c->x86 >= 0x6) 584 if (c->x86 >= 0x6)
585 c->x86_model += ((tfms >> 16) & 0xf) << 4; 585 c->x86_model += ((tfms >> 16) & 0xf) << 4;
586 586
587 if (cap0 & (1<<19)) { 587 if (cap0 & (1<<19)) {
588 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 588 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
589 c->x86_cache_alignment = c->x86_clflush_size; 589 c->x86_cache_alignment = c->x86_clflush_size;
590 } 590 }
591 } 591 }
592 } 592 }
593 593
594 void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) 594 void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
595 { 595 {
596 u32 tfms, xlvl; 596 u32 tfms, xlvl;
597 u32 ebx; 597 u32 ebx;
598 598
599 /* Intel-defined flags: level 0x00000001 */ 599 /* Intel-defined flags: level 0x00000001 */
600 if (c->cpuid_level >= 0x00000001) { 600 if (c->cpuid_level >= 0x00000001) {
601 u32 capability, excap; 601 u32 capability, excap;
602 602
603 cpuid(0x00000001, &tfms, &ebx, &excap, &capability); 603 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
604 c->x86_capability[0] = capability; 604 c->x86_capability[0] = capability;
605 c->x86_capability[4] = excap; 605 c->x86_capability[4] = excap;
606 } 606 }
607 607
608 /* Additional Intel-defined flags: level 0x00000007 */ 608 /* Additional Intel-defined flags: level 0x00000007 */
609 if (c->cpuid_level >= 0x00000007) { 609 if (c->cpuid_level >= 0x00000007) {
610 u32 eax, ebx, ecx, edx; 610 u32 eax, ebx, ecx, edx;
611 611
612 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); 612 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
613 613
614 c->x86_capability[9] = ebx; 614 c->x86_capability[9] = ebx;
615 } 615 }
616 616
617 /* AMD-defined flags: level 0x80000001 */ 617 /* AMD-defined flags: level 0x80000001 */
618 xlvl = cpuid_eax(0x80000000); 618 xlvl = cpuid_eax(0x80000000);
619 c->extended_cpuid_level = xlvl; 619 c->extended_cpuid_level = xlvl;
620 620
621 if ((xlvl & 0xffff0000) == 0x80000000) { 621 if ((xlvl & 0xffff0000) == 0x80000000) {
622 if (xlvl >= 0x80000001) { 622 if (xlvl >= 0x80000001) {
623 c->x86_capability[1] = cpuid_edx(0x80000001); 623 c->x86_capability[1] = cpuid_edx(0x80000001);
624 c->x86_capability[6] = cpuid_ecx(0x80000001); 624 c->x86_capability[6] = cpuid_ecx(0x80000001);
625 } 625 }
626 } 626 }
627 627
628 if (c->extended_cpuid_level >= 0x80000008) { 628 if (c->extended_cpuid_level >= 0x80000008) {
629 u32 eax = cpuid_eax(0x80000008); 629 u32 eax = cpuid_eax(0x80000008);
630 630
631 c->x86_virt_bits = (eax >> 8) & 0xff; 631 c->x86_virt_bits = (eax >> 8) & 0xff;
632 c->x86_phys_bits = eax & 0xff; 632 c->x86_phys_bits = eax & 0xff;
633 } 633 }
634 #ifdef CONFIG_X86_32 634 #ifdef CONFIG_X86_32
635 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) 635 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
636 c->x86_phys_bits = 36; 636 c->x86_phys_bits = 36;
637 #endif 637 #endif
638 638
639 if (c->extended_cpuid_level >= 0x80000007) 639 if (c->extended_cpuid_level >= 0x80000007)
640 c->x86_power = cpuid_edx(0x80000007); 640 c->x86_power = cpuid_edx(0x80000007);
641 641
642 init_scattered_cpuid_features(c); 642 init_scattered_cpuid_features(c);
643 } 643 }
644 644
645 static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) 645 static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
646 { 646 {
647 #ifdef CONFIG_X86_32 647 #ifdef CONFIG_X86_32
648 int i; 648 int i;
649 649
650 /* 650 /*
651 * First of all, decide if this is a 486 or higher 651 * First of all, decide if this is a 486 or higher
652 * It's a 486 if we can modify the AC flag 652 * It's a 486 if we can modify the AC flag
653 */ 653 */
654 if (flag_is_changeable_p(X86_EFLAGS_AC)) 654 if (flag_is_changeable_p(X86_EFLAGS_AC))
655 c->x86 = 4; 655 c->x86 = 4;
656 else 656 else
657 c->x86 = 3; 657 c->x86 = 3;
658 658
659 for (i = 0; i < X86_VENDOR_NUM; i++) 659 for (i = 0; i < X86_VENDOR_NUM; i++)
660 if (cpu_devs[i] && cpu_devs[i]->c_identify) { 660 if (cpu_devs[i] && cpu_devs[i]->c_identify) {
661 c->x86_vendor_id[0] = 0; 661 c->x86_vendor_id[0] = 0;
662 cpu_devs[i]->c_identify(c); 662 cpu_devs[i]->c_identify(c);
663 if (c->x86_vendor_id[0]) { 663 if (c->x86_vendor_id[0]) {
664 get_cpu_vendor(c); 664 get_cpu_vendor(c);
665 break; 665 break;
666 } 666 }
667 } 667 }
668 #endif 668 #endif
669 } 669 }
670 670
671 /* 671 /*
672 * Do minimum CPU detection early. 672 * Do minimum CPU detection early.
673 * Fields really needed: vendor, cpuid_level, family, model, mask, 673 * Fields really needed: vendor, cpuid_level, family, model, mask,
674 * cache alignment. 674 * cache alignment.
675 * The others are not touched to avoid unwanted side effects. 675 * The others are not touched to avoid unwanted side effects.
676 * 676 *
677 * WARNING: this function is only called on the BP. Don't add code here 677 * WARNING: this function is only called on the BP. Don't add code here
678 * that is supposed to run on all CPUs. 678 * that is supposed to run on all CPUs.
679 */ 679 */
680 static void __init early_identify_cpu(struct cpuinfo_x86 *c) 680 static void __init early_identify_cpu(struct cpuinfo_x86 *c)
681 { 681 {
682 #ifdef CONFIG_X86_64 682 #ifdef CONFIG_X86_64
683 c->x86_clflush_size = 64; 683 c->x86_clflush_size = 64;
684 c->x86_phys_bits = 36; 684 c->x86_phys_bits = 36;
685 c->x86_virt_bits = 48; 685 c->x86_virt_bits = 48;
686 #else 686 #else
687 c->x86_clflush_size = 32; 687 c->x86_clflush_size = 32;
688 c->x86_phys_bits = 32; 688 c->x86_phys_bits = 32;
689 c->x86_virt_bits = 32; 689 c->x86_virt_bits = 32;
690 #endif 690 #endif
691 c->x86_cache_alignment = c->x86_clflush_size; 691 c->x86_cache_alignment = c->x86_clflush_size;
692 692
693 memset(&c->x86_capability, 0, sizeof c->x86_capability); 693 memset(&c->x86_capability, 0, sizeof c->x86_capability);
694 c->extended_cpuid_level = 0; 694 c->extended_cpuid_level = 0;
695 695
696 if (!have_cpuid_p()) 696 if (!have_cpuid_p())
697 identify_cpu_without_cpuid(c); 697 identify_cpu_without_cpuid(c);
698 698
699 /* cyrix could have cpuid enabled via c_identify()*/ 699 /* cyrix could have cpuid enabled via c_identify()*/
700 if (!have_cpuid_p()) 700 if (!have_cpuid_p())
701 return; 701 return;
702 702
703 cpu_detect(c); 703 cpu_detect(c);
704 704
705 get_cpu_vendor(c); 705 get_cpu_vendor(c);
706 706
707 get_cpu_cap(c); 707 get_cpu_cap(c);
708 708
709 if (this_cpu->c_early_init) 709 if (this_cpu->c_early_init)
710 this_cpu->c_early_init(c); 710 this_cpu->c_early_init(c);
711 711
712 c->cpu_index = 0; 712 c->cpu_index = 0;
713 filter_cpuid_features(c, false); 713 filter_cpuid_features(c, false);
714 714
715 setup_smep(c); 715 setup_smep(c);
716 716
717 if (this_cpu->c_bsp_init) 717 if (this_cpu->c_bsp_init)
718 this_cpu->c_bsp_init(c); 718 this_cpu->c_bsp_init(c);
719 } 719 }
720 720
721 void __init early_cpu_init(void) 721 void __init early_cpu_init(void)
722 { 722 {
723 const struct cpu_dev *const *cdev; 723 const struct cpu_dev *const *cdev;
724 int count = 0; 724 int count = 0;
725 725
726 #ifdef CONFIG_PROCESSOR_SELECT 726 #ifdef CONFIG_PROCESSOR_SELECT
727 printk(KERN_INFO "KERNEL supported cpus:\n"); 727 printk(KERN_INFO "KERNEL supported cpus:\n");
728 #endif 728 #endif
729 729
730 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { 730 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
731 const struct cpu_dev *cpudev = *cdev; 731 const struct cpu_dev *cpudev = *cdev;
732 732
733 if (count >= X86_VENDOR_NUM) 733 if (count >= X86_VENDOR_NUM)
734 break; 734 break;
735 cpu_devs[count] = cpudev; 735 cpu_devs[count] = cpudev;
736 count++; 736 count++;
737 737
738 #ifdef CONFIG_PROCESSOR_SELECT 738 #ifdef CONFIG_PROCESSOR_SELECT
739 { 739 {
740 unsigned int j; 740 unsigned int j;
741 741
742 for (j = 0; j < 2; j++) { 742 for (j = 0; j < 2; j++) {
743 if (!cpudev->c_ident[j]) 743 if (!cpudev->c_ident[j])
744 continue; 744 continue;
745 printk(KERN_INFO " %s %s\n", cpudev->c_vendor, 745 printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
746 cpudev->c_ident[j]); 746 cpudev->c_ident[j]);
747 } 747 }
748 } 748 }
749 #endif 749 #endif
750 } 750 }
751 early_identify_cpu(&boot_cpu_data); 751 early_identify_cpu(&boot_cpu_data);
752 } 752 }
753 753
754 /* 754 /*
755 * The NOPL instruction is supposed to exist on all CPUs of family >= 6; 755 * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
756 * unfortunately, that's not true in practice because of early VIA 756 * unfortunately, that's not true in practice because of early VIA
757 * chips and (more importantly) broken virtualizers that are not easy 757 * chips and (more importantly) broken virtualizers that are not easy
758 * to detect. In the latter case it doesn't even *fail* reliably, so 758 * to detect. In the latter case it doesn't even *fail* reliably, so
759 * probing for it doesn't even work. Disable it completely on 32-bit 759 * probing for it doesn't even work. Disable it completely on 32-bit
760 * unless we can find a reliable way to detect all the broken cases. 760 * unless we can find a reliable way to detect all the broken cases.
761 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). 761 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
762 */ 762 */
763 static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) 763 static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
764 { 764 {
765 #ifdef CONFIG_X86_32 765 #ifdef CONFIG_X86_32
766 clear_cpu_cap(c, X86_FEATURE_NOPL); 766 clear_cpu_cap(c, X86_FEATURE_NOPL);
767 #else 767 #else
768 set_cpu_cap(c, X86_FEATURE_NOPL); 768 set_cpu_cap(c, X86_FEATURE_NOPL);
769 #endif 769 #endif
770 } 770 }
771 771
772 static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 772 static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
773 { 773 {
774 c->extended_cpuid_level = 0; 774 c->extended_cpuid_level = 0;
775 775
776 if (!have_cpuid_p()) 776 if (!have_cpuid_p())
777 identify_cpu_without_cpuid(c); 777 identify_cpu_without_cpuid(c);
778 778
779 /* cyrix could have cpuid enabled via c_identify()*/ 779 /* cyrix could have cpuid enabled via c_identify()*/
780 if (!have_cpuid_p()) 780 if (!have_cpuid_p())
781 return; 781 return;
782 782
783 cpu_detect(c); 783 cpu_detect(c);
784 784
785 get_cpu_vendor(c); 785 get_cpu_vendor(c);
786 786
787 get_cpu_cap(c); 787 get_cpu_cap(c);
788 788
789 if (c->cpuid_level >= 0x00000001) { 789 if (c->cpuid_level >= 0x00000001) {
790 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; 790 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
791 #ifdef CONFIG_X86_32 791 #ifdef CONFIG_X86_32
792 # ifdef CONFIG_X86_HT 792 # ifdef CONFIG_X86_HT
793 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); 793 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
794 # else 794 # else
795 c->apicid = c->initial_apicid; 795 c->apicid = c->initial_apicid;
796 # endif 796 # endif
797 #endif 797 #endif
798 c->phys_proc_id = c->initial_apicid; 798 c->phys_proc_id = c->initial_apicid;
799 } 799 }
800 800
801 setup_smep(c); 801 setup_smep(c);
802 802
803 get_model_name(c); /* Default name */ 803 get_model_name(c); /* Default name */
804 804
805 detect_nopl(c); 805 detect_nopl(c);
806 } 806 }
807 807
808 /* 808 /*
809 * This does the hard work of actually picking apart the CPU stuff... 809 * This does the hard work of actually picking apart the CPU stuff...
810 */ 810 */
811 static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) 811 static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
812 { 812 {
813 int i; 813 int i;
814 814
815 c->loops_per_jiffy = loops_per_jiffy; 815 c->loops_per_jiffy = loops_per_jiffy;
816 c->x86_cache_size = -1; 816 c->x86_cache_size = -1;
817 c->x86_vendor = X86_VENDOR_UNKNOWN; 817 c->x86_vendor = X86_VENDOR_UNKNOWN;
818 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 818 c->x86_model = c->x86_mask = 0; /* So far unknown... */
819 c->x86_vendor_id[0] = '\0'; /* Unset */ 819 c->x86_vendor_id[0] = '\0'; /* Unset */
820 c->x86_model_id[0] = '\0'; /* Unset */ 820 c->x86_model_id[0] = '\0'; /* Unset */
821 c->x86_max_cores = 1; 821 c->x86_max_cores = 1;
822 c->x86_coreid_bits = 0; 822 c->x86_coreid_bits = 0;
823 #ifdef CONFIG_X86_64 823 #ifdef CONFIG_X86_64
824 c->x86_clflush_size = 64; 824 c->x86_clflush_size = 64;
825 c->x86_phys_bits = 36; 825 c->x86_phys_bits = 36;
826 c->x86_virt_bits = 48; 826 c->x86_virt_bits = 48;
827 #else 827 #else
828 c->cpuid_level = -1; /* CPUID not detected */ 828 c->cpuid_level = -1; /* CPUID not detected */
829 c->x86_clflush_size = 32; 829 c->x86_clflush_size = 32;
830 c->x86_phys_bits = 32; 830 c->x86_phys_bits = 32;
831 c->x86_virt_bits = 32; 831 c->x86_virt_bits = 32;
832 #endif 832 #endif
833 c->x86_cache_alignment = c->x86_clflush_size; 833 c->x86_cache_alignment = c->x86_clflush_size;
834 memset(&c->x86_capability, 0, sizeof c->x86_capability); 834 memset(&c->x86_capability, 0, sizeof c->x86_capability);
835 835
836 generic_identify(c); 836 generic_identify(c);
837 837
838 if (this_cpu->c_identify) 838 if (this_cpu->c_identify)
839 this_cpu->c_identify(c); 839 this_cpu->c_identify(c);
840 840
841 /* Clear/Set all flags overriden by options, after probe */ 841 /* Clear/Set all flags overriden by options, after probe */
842 for (i = 0; i < NCAPINTS; i++) { 842 for (i = 0; i < NCAPINTS; i++) {
843 c->x86_capability[i] &= ~cpu_caps_cleared[i]; 843 c->x86_capability[i] &= ~cpu_caps_cleared[i];
844 c->x86_capability[i] |= cpu_caps_set[i]; 844 c->x86_capability[i] |= cpu_caps_set[i];
845 } 845 }
846 846
847 #ifdef CONFIG_X86_64 847 #ifdef CONFIG_X86_64
848 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); 848 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
849 #endif 849 #endif
850 850
851 /* 851 /*
852 * Vendor-specific initialization. In this section we 852 * Vendor-specific initialization. In this section we
853 * canonicalize the feature flags, meaning if there are 853 * canonicalize the feature flags, meaning if there are
854 * features a certain CPU supports which CPUID doesn't 854 * features a certain CPU supports which CPUID doesn't
855 * tell us, CPUID claiming incorrect flags, or other bugs, 855 * tell us, CPUID claiming incorrect flags, or other bugs,
856 * we handle them here. 856 * we handle them here.
857 * 857 *
858 * At the end of this section, c->x86_capability better 858 * At the end of this section, c->x86_capability better
859 * indicate the features this CPU genuinely supports! 859 * indicate the features this CPU genuinely supports!
860 */ 860 */
861 if (this_cpu->c_init) 861 if (this_cpu->c_init)
862 this_cpu->c_init(c); 862 this_cpu->c_init(c);
863 863
864 /* Disable the PN if appropriate */ 864 /* Disable the PN if appropriate */
865 squash_the_stupid_serial_number(c); 865 squash_the_stupid_serial_number(c);
866 866
867 /* 867 /*
868 * The vendor-specific functions might have changed features. 868 * The vendor-specific functions might have changed features.
869 * Now we do "generic changes." 869 * Now we do "generic changes."
870 */ 870 */
871 871
872 /* Filter out anything that depends on CPUID levels we don't have */ 872 /* Filter out anything that depends on CPUID levels we don't have */
873 filter_cpuid_features(c, true); 873 filter_cpuid_features(c, true);
874 874
875 /* If the model name is still unset, do table lookup. */ 875 /* If the model name is still unset, do table lookup. */
876 if (!c->x86_model_id[0]) { 876 if (!c->x86_model_id[0]) {
877 const char *p; 877 const char *p;
878 p = table_lookup_model(c); 878 p = table_lookup_model(c);
879 if (p) 879 if (p)
880 strcpy(c->x86_model_id, p); 880 strcpy(c->x86_model_id, p);
881 else 881 else
882 /* Last resort... */ 882 /* Last resort... */
883 sprintf(c->x86_model_id, "%02x/%02x", 883 sprintf(c->x86_model_id, "%02x/%02x",
884 c->x86, c->x86_model); 884 c->x86, c->x86_model);
885 } 885 }
886 886
887 #ifdef CONFIG_X86_64 887 #ifdef CONFIG_X86_64
888 detect_ht(c); 888 detect_ht(c);
889 #endif 889 #endif
890 890
891 init_hypervisor(c); 891 init_hypervisor(c);
892 x86_init_rdrand(c); 892 x86_init_rdrand(c);
893 893
894 /* 894 /*
895 * Clear/Set all flags overriden by options, need do it 895 * Clear/Set all flags overriden by options, need do it
896 * before following smp all cpus cap AND. 896 * before following smp all cpus cap AND.
897 */ 897 */
898 for (i = 0; i < NCAPINTS; i++) { 898 for (i = 0; i < NCAPINTS; i++) {
899 c->x86_capability[i] &= ~cpu_caps_cleared[i]; 899 c->x86_capability[i] &= ~cpu_caps_cleared[i];
900 c->x86_capability[i] |= cpu_caps_set[i]; 900 c->x86_capability[i] |= cpu_caps_set[i];
901 } 901 }
902 902
903 /* 903 /*
904 * On SMP, boot_cpu_data holds the common feature set between 904 * On SMP, boot_cpu_data holds the common feature set between
905 * all CPUs; so make sure that we indicate which features are 905 * all CPUs; so make sure that we indicate which features are
906 * common between the CPUs. The first time this routine gets 906 * common between the CPUs. The first time this routine gets
907 * executed, c == &boot_cpu_data. 907 * executed, c == &boot_cpu_data.
908 */ 908 */
909 if (c != &boot_cpu_data) { 909 if (c != &boot_cpu_data) {
910 /* AND the already accumulated flags with these */ 910 /* AND the already accumulated flags with these */
911 for (i = 0; i < NCAPINTS; i++) 911 for (i = 0; i < NCAPINTS; i++)
912 boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; 912 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
913 } 913 }
914 914
915 /* Init Machine Check Exception if available. */ 915 /* Init Machine Check Exception if available. */
916 mcheck_cpu_init(c); 916 mcheck_cpu_init(c);
917 917
918 select_idle_routine(c); 918 select_idle_routine(c);
919 919
920 #ifdef CONFIG_NUMA 920 #ifdef CONFIG_NUMA
921 numa_add_cpu(smp_processor_id()); 921 numa_add_cpu(smp_processor_id());
922 #endif 922 #endif
923 } 923 }
924 924
925 #ifdef CONFIG_X86_64 925 #ifdef CONFIG_X86_64
926 static void vgetcpu_set_mode(void) 926 static void vgetcpu_set_mode(void)
927 { 927 {
928 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) 928 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
929 vgetcpu_mode = VGETCPU_RDTSCP; 929 vgetcpu_mode = VGETCPU_RDTSCP;
930 else 930 else
931 vgetcpu_mode = VGETCPU_LSL; 931 vgetcpu_mode = VGETCPU_LSL;
932 } 932 }
933 #endif 933 #endif
934 934
935 void __init identify_boot_cpu(void) 935 void __init identify_boot_cpu(void)
936 { 936 {
937 identify_cpu(&boot_cpu_data); 937 identify_cpu(&boot_cpu_data);
938 init_amd_e400_c1e_mask(); 938 init_amd_e400_c1e_mask();
939 #ifdef CONFIG_X86_32 939 #ifdef CONFIG_X86_32
940 sysenter_setup(); 940 sysenter_setup();
941 enable_sep_cpu(); 941 enable_sep_cpu();
942 #else 942 #else
943 vgetcpu_set_mode(); 943 vgetcpu_set_mode();
944 #endif 944 #endif
945 if (boot_cpu_data.cpuid_level >= 2) 945 if (boot_cpu_data.cpuid_level >= 2)
946 cpu_detect_tlb(&boot_cpu_data); 946 cpu_detect_tlb(&boot_cpu_data);
947 } 947 }
948 948
949 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 949 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
950 { 950 {
951 BUG_ON(c == &boot_cpu_data); 951 BUG_ON(c == &boot_cpu_data);
952 identify_cpu(c); 952 identify_cpu(c);
953 #ifdef CONFIG_X86_32 953 #ifdef CONFIG_X86_32
954 enable_sep_cpu(); 954 enable_sep_cpu();
955 #endif 955 #endif
956 mtrr_ap_init(); 956 mtrr_ap_init();
957 } 957 }
958 958
959 struct msr_range { 959 struct msr_range {
960 unsigned min; 960 unsigned min;
961 unsigned max; 961 unsigned max;
962 }; 962 };
963 963
964 static const struct msr_range msr_range_array[] __cpuinitconst = { 964 static const struct msr_range msr_range_array[] __cpuinitconst = {
965 { 0x00000000, 0x00000418}, 965 { 0x00000000, 0x00000418},
966 { 0xc0000000, 0xc000040b}, 966 { 0xc0000000, 0xc000040b},
967 { 0xc0010000, 0xc0010142}, 967 { 0xc0010000, 0xc0010142},
968 { 0xc0011000, 0xc001103b}, 968 { 0xc0011000, 0xc001103b},
969 }; 969 };
970 970
971 static void __cpuinit __print_cpu_msr(void) 971 static void __cpuinit __print_cpu_msr(void)
972 { 972 {
973 unsigned index_min, index_max; 973 unsigned index_min, index_max;
974 unsigned index; 974 unsigned index;
975 u64 val; 975 u64 val;
976 int i; 976 int i;
977 977
978 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { 978 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
979 index_min = msr_range_array[i].min; 979 index_min = msr_range_array[i].min;
980 index_max = msr_range_array[i].max; 980 index_max = msr_range_array[i].max;
981 981
982 for (index = index_min; index < index_max; index++) { 982 for (index = index_min; index < index_max; index++) {
983 if (rdmsrl_safe(index, &val)) 983 if (rdmsrl_safe(index, &val))
984 continue; 984 continue;
985 printk(KERN_INFO " MSR%08x: %016llx\n", index, val); 985 printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
986 } 986 }
987 } 987 }
988 } 988 }
989 989
990 static int show_msr __cpuinitdata; 990 static int show_msr __cpuinitdata;
991 991
992 static __init int setup_show_msr(char *arg) 992 static __init int setup_show_msr(char *arg)
993 { 993 {
994 int num; 994 int num;
995 995
996 get_option(&arg, &num); 996 get_option(&arg, &num);
997 997
998 if (num > 0) 998 if (num > 0)
999 show_msr = num; 999 show_msr = num;
1000 return 1; 1000 return 1;
1001 } 1001 }
1002 __setup("show_msr=", setup_show_msr); 1002 __setup("show_msr=", setup_show_msr);
1003 1003
1004 static __init int setup_noclflush(char *arg) 1004 static __init int setup_noclflush(char *arg)
1005 { 1005 {
1006 setup_clear_cpu_cap(X86_FEATURE_CLFLSH); 1006 setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
1007 return 1; 1007 return 1;
1008 } 1008 }
1009 __setup("noclflush", setup_noclflush); 1009 __setup("noclflush", setup_noclflush);
1010 1010
1011 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) 1011 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1012 { 1012 {
1013 const char *vendor = NULL; 1013 const char *vendor = NULL;
1014 1014
1015 if (c->x86_vendor < X86_VENDOR_NUM) { 1015 if (c->x86_vendor < X86_VENDOR_NUM) {
1016 vendor = this_cpu->c_vendor; 1016 vendor = this_cpu->c_vendor;
1017 } else { 1017 } else {
1018 if (c->cpuid_level >= 0) 1018 if (c->cpuid_level >= 0)
1019 vendor = c->x86_vendor_id; 1019 vendor = c->x86_vendor_id;
1020 } 1020 }
1021 1021
1022 if (vendor && !strstr(c->x86_model_id, vendor)) 1022 if (vendor && !strstr(c->x86_model_id, vendor))
1023 printk(KERN_CONT "%s ", vendor); 1023 printk(KERN_CONT "%s ", vendor);
1024 1024
1025 if (c->x86_model_id[0]) 1025 if (c->x86_model_id[0])
1026 printk(KERN_CONT "%s", c->x86_model_id); 1026 printk(KERN_CONT "%s", c->x86_model_id);
1027 else 1027 else
1028 printk(KERN_CONT "%d86", c->x86); 1028 printk(KERN_CONT "%d86", c->x86);
1029 1029
1030 if (c->x86_mask || c->cpuid_level >= 0) 1030 if (c->x86_mask || c->cpuid_level >= 0)
1031 printk(KERN_CONT " stepping %02x\n", c->x86_mask); 1031 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
1032 else 1032 else
1033 printk(KERN_CONT "\n"); 1033 printk(KERN_CONT "\n");
1034 1034
1035 print_cpu_msr(c); 1035 print_cpu_msr(c);
1036 } 1036 }
1037 1037
1038 void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c) 1038 void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c)
1039 { 1039 {
1040 if (c->cpu_index < show_msr) 1040 if (c->cpu_index < show_msr)
1041 __print_cpu_msr(); 1041 __print_cpu_msr();
1042 } 1042 }
1043 1043
1044 static __init int setup_disablecpuid(char *arg) 1044 static __init int setup_disablecpuid(char *arg)
1045 { 1045 {
1046 int bit; 1046 int bit;
1047 1047
1048 if (get_option(&arg, &bit) && bit < NCAPINTS*32) 1048 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
1049 setup_clear_cpu_cap(bit); 1049 setup_clear_cpu_cap(bit);
1050 else 1050 else
1051 return 0; 1051 return 0;
1052 1052
1053 return 1; 1053 return 1;
1054 } 1054 }
1055 __setup("clearcpuid=", setup_disablecpuid); 1055 __setup("clearcpuid=", setup_disablecpuid);
1056 1056
1057 #ifdef CONFIG_X86_64 1057 #ifdef CONFIG_X86_64
1058 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1058 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
1059 struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, 1059 struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
1060 (unsigned long) nmi_idt_table }; 1060 (unsigned long) nmi_idt_table };
1061 1061
1062 DEFINE_PER_CPU_FIRST(union irq_stack_union, 1062 DEFINE_PER_CPU_FIRST(union irq_stack_union,
1063 irq_stack_union) __aligned(PAGE_SIZE); 1063 irq_stack_union) __aligned(PAGE_SIZE);
1064 1064
1065 /* 1065 /*
1066 * The following four percpu variables are hot. Align current_task to 1066 * The following four percpu variables are hot. Align current_task to
1067 * cacheline size such that all four fall in the same cacheline. 1067 * cacheline size such that all four fall in the same cacheline.
1068 */ 1068 */
1069 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = 1069 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
1070 &init_task; 1070 &init_task;
1071 EXPORT_PER_CPU_SYMBOL(current_task); 1071 EXPORT_PER_CPU_SYMBOL(current_task);
1072 1072
1073 DEFINE_PER_CPU(unsigned long, kernel_stack) = 1073 DEFINE_PER_CPU(unsigned long, kernel_stack) =
1074 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; 1074 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
1075 EXPORT_PER_CPU_SYMBOL(kernel_stack); 1075 EXPORT_PER_CPU_SYMBOL(kernel_stack);
1076 1076
1077 DEFINE_PER_CPU(char *, irq_stack_ptr) = 1077 DEFINE_PER_CPU(char *, irq_stack_ptr) =
1078 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 1078 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
1079 1079
1080 DEFINE_PER_CPU(unsigned int, irq_count) = -1; 1080 DEFINE_PER_CPU(unsigned int, irq_count) = -1;
1081 1081
1082 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); 1082 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1083 1083
1084 /* 1084 /*
1085 * Special IST stacks which the CPU switches to when it calls 1085 * Special IST stacks which the CPU switches to when it calls
1086 * an IST-marked descriptor entry. Up to 7 stacks (hardware 1086 * an IST-marked descriptor entry. Up to 7 stacks (hardware
1087 * limit), all of them are 4K, except the debug stack which 1087 * limit), all of them are 4K, except the debug stack which
1088 * is 8K. 1088 * is 8K.
1089 */ 1089 */
1090 static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { 1090 static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
1091 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, 1091 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1092 [DEBUG_STACK - 1] = DEBUG_STKSZ 1092 [DEBUG_STACK - 1] = DEBUG_STKSZ
1093 }; 1093 };
1094 1094
1095 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks 1095 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1096 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); 1096 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
1097 1097
1098 /* May not be marked __init: used by software suspend */ 1098 /* May not be marked __init: used by software suspend */
1099 void syscall_init(void) 1099 void syscall_init(void)
1100 { 1100 {
1101 /* 1101 /*
1102 * LSTAR and STAR live in a bit strange symbiosis. 1102 * LSTAR and STAR live in a bit strange symbiosis.
1103 * They both write to the same internal register. STAR allows to 1103 * They both write to the same internal register. STAR allows to
1104 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. 1104 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
1105 */ 1105 */
1106 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); 1106 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
1107 wrmsrl(MSR_LSTAR, system_call); 1107 wrmsrl(MSR_LSTAR, system_call);
1108 wrmsrl(MSR_CSTAR, ignore_sysret); 1108 wrmsrl(MSR_CSTAR, ignore_sysret);
1109 1109
1110 #ifdef CONFIG_IA32_EMULATION 1110 #ifdef CONFIG_IA32_EMULATION
1111 syscall32_cpu_init(); 1111 syscall32_cpu_init();
1112 #endif 1112 #endif
1113 1113
1114 /* Flags to clear on syscall */ 1114 /* Flags to clear on syscall */
1115 wrmsrl(MSR_SYSCALL_MASK, 1115 wrmsrl(MSR_SYSCALL_MASK,
1116 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); 1116 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
1117 X86_EFLAGS_IOPL|X86_EFLAGS_AC);
1117 } 1118 }
1118 1119
1119 unsigned long kernel_eflags; 1120 unsigned long kernel_eflags;
1120 1121
1121 /* 1122 /*
1122 * Copies of the original ist values from the tss are only accessed during 1123 * Copies of the original ist values from the tss are only accessed during
1123 * debugging, no special alignment required. 1124 * debugging, no special alignment required.
1124 */ 1125 */
1125 DEFINE_PER_CPU(struct orig_ist, orig_ist); 1126 DEFINE_PER_CPU(struct orig_ist, orig_ist);
1126 1127
1127 static DEFINE_PER_CPU(unsigned long, debug_stack_addr); 1128 static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
1128 DEFINE_PER_CPU(int, debug_stack_usage); 1129 DEFINE_PER_CPU(int, debug_stack_usage);
1129 1130
1130 int is_debug_stack(unsigned long addr) 1131 int is_debug_stack(unsigned long addr)
1131 { 1132 {
1132 return __get_cpu_var(debug_stack_usage) || 1133 return __get_cpu_var(debug_stack_usage) ||
1133 (addr <= __get_cpu_var(debug_stack_addr) && 1134 (addr <= __get_cpu_var(debug_stack_addr) &&
1134 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); 1135 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
1135 } 1136 }
1136 1137
1137 static DEFINE_PER_CPU(u32, debug_stack_use_ctr); 1138 static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
1138 1139
1139 void debug_stack_set_zero(void) 1140 void debug_stack_set_zero(void)
1140 { 1141 {
1141 this_cpu_inc(debug_stack_use_ctr); 1142 this_cpu_inc(debug_stack_use_ctr);
1142 load_idt((const struct desc_ptr *)&nmi_idt_descr); 1143 load_idt((const struct desc_ptr *)&nmi_idt_descr);
1143 } 1144 }
1144 1145
1145 void debug_stack_reset(void) 1146 void debug_stack_reset(void)
1146 { 1147 {
1147 if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) 1148 if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
1148 return; 1149 return;
1149 if (this_cpu_dec_return(debug_stack_use_ctr) == 0) 1150 if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
1150 load_idt((const struct desc_ptr *)&idt_descr); 1151 load_idt((const struct desc_ptr *)&idt_descr);
1151 } 1152 }
1152 1153
1153 #else /* CONFIG_X86_64 */ 1154 #else /* CONFIG_X86_64 */
1154 1155
1155 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 1156 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1156 EXPORT_PER_CPU_SYMBOL(current_task); 1157 EXPORT_PER_CPU_SYMBOL(current_task);
1157 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); 1158 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1158 1159
1159 #ifdef CONFIG_CC_STACKPROTECTOR 1160 #ifdef CONFIG_CC_STACKPROTECTOR
1160 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); 1161 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
1161 #endif 1162 #endif
1162 1163
1163 /* Make sure %fs and %gs are initialized properly in idle threads */ 1164 /* Make sure %fs and %gs are initialized properly in idle threads */
1164 struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 1165 struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
1165 { 1166 {
1166 memset(regs, 0, sizeof(struct pt_regs)); 1167 memset(regs, 0, sizeof(struct pt_regs));
1167 regs->fs = __KERNEL_PERCPU; 1168 regs->fs = __KERNEL_PERCPU;
1168 regs->gs = __KERNEL_STACK_CANARY; 1169 regs->gs = __KERNEL_STACK_CANARY;
1169 1170
1170 return regs; 1171 return regs;
1171 } 1172 }
1172 #endif /* CONFIG_X86_64 */ 1173 #endif /* CONFIG_X86_64 */
1173 1174
1174 /* 1175 /*
1175 * Clear all 6 debug registers: 1176 * Clear all 6 debug registers:
1176 */ 1177 */
1177 static void clear_all_debug_regs(void) 1178 static void clear_all_debug_regs(void)
1178 { 1179 {
1179 int i; 1180 int i;
1180 1181
1181 for (i = 0; i < 8; i++) { 1182 for (i = 0; i < 8; i++) {
1182 /* Ignore db4, db5 */ 1183 /* Ignore db4, db5 */
1183 if ((i == 4) || (i == 5)) 1184 if ((i == 4) || (i == 5))
1184 continue; 1185 continue;
1185 1186
1186 set_debugreg(0, i); 1187 set_debugreg(0, i);
1187 } 1188 }
1188 } 1189 }
1189 1190
1190 #ifdef CONFIG_KGDB 1191 #ifdef CONFIG_KGDB
1191 /* 1192 /*
1192 * Restore debug regs if using kgdbwait and you have a kernel debugger 1193 * Restore debug regs if using kgdbwait and you have a kernel debugger
1193 * connection established. 1194 * connection established.
1194 */ 1195 */
1195 static void dbg_restore_debug_regs(void) 1196 static void dbg_restore_debug_regs(void)
1196 { 1197 {
1197 if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) 1198 if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break))
1198 arch_kgdb_ops.correct_hw_break(); 1199 arch_kgdb_ops.correct_hw_break();
1199 } 1200 }
1200 #else /* ! CONFIG_KGDB */ 1201 #else /* ! CONFIG_KGDB */
1201 #define dbg_restore_debug_regs() 1202 #define dbg_restore_debug_regs()
1202 #endif /* ! CONFIG_KGDB */ 1203 #endif /* ! CONFIG_KGDB */
1203 1204
1204 /* 1205 /*
1205 * cpu_init() initializes state that is per-CPU. Some data is already 1206 * cpu_init() initializes state that is per-CPU. Some data is already
1206 * initialized (naturally) in the bootstrap process, such as the GDT 1207 * initialized (naturally) in the bootstrap process, such as the GDT
1207 * and IDT. We reload them nevertheless, this function acts as a 1208 * and IDT. We reload them nevertheless, this function acts as a
1208 * 'CPU state barrier', nothing should get across. 1209 * 'CPU state barrier', nothing should get across.
1209 * A lot of state is already set up in PDA init for 64 bit 1210 * A lot of state is already set up in PDA init for 64 bit
1210 */ 1211 */
1211 #ifdef CONFIG_X86_64 1212 #ifdef CONFIG_X86_64
1212 1213
1213 void __cpuinit cpu_init(void) 1214 void __cpuinit cpu_init(void)
1214 { 1215 {
1215 struct orig_ist *oist; 1216 struct orig_ist *oist;
1216 struct task_struct *me; 1217 struct task_struct *me;
1217 struct tss_struct *t; 1218 struct tss_struct *t;
1218 unsigned long v; 1219 unsigned long v;
1219 int cpu; 1220 int cpu;
1220 int i; 1221 int i;
1221 1222
1222 cpu = stack_smp_processor_id(); 1223 cpu = stack_smp_processor_id();
1223 t = &per_cpu(init_tss, cpu); 1224 t = &per_cpu(init_tss, cpu);
1224 oist = &per_cpu(orig_ist, cpu); 1225 oist = &per_cpu(orig_ist, cpu);
1225 1226
1226 #ifdef CONFIG_NUMA 1227 #ifdef CONFIG_NUMA
1227 if (cpu != 0 && this_cpu_read(numa_node) == 0 && 1228 if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
1228 early_cpu_to_node(cpu) != NUMA_NO_NODE) 1229 early_cpu_to_node(cpu) != NUMA_NO_NODE)
1229 set_numa_node(early_cpu_to_node(cpu)); 1230 set_numa_node(early_cpu_to_node(cpu));
1230 #endif 1231 #endif
1231 1232
1232 me = current; 1233 me = current;
1233 1234
1234 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) 1235 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
1235 panic("CPU#%d already initialized!\n", cpu); 1236 panic("CPU#%d already initialized!\n", cpu);
1236 1237
1237 pr_debug("Initializing CPU#%d\n", cpu); 1238 pr_debug("Initializing CPU#%d\n", cpu);
1238 1239
1239 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1240 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1240 1241
1241 /* 1242 /*
1242 * Initialize the per-CPU GDT with the boot GDT, 1243 * Initialize the per-CPU GDT with the boot GDT,
1243 * and set up the GDT descriptor: 1244 * and set up the GDT descriptor:
1244 */ 1245 */
1245 1246
1246 switch_to_new_gdt(cpu); 1247 switch_to_new_gdt(cpu);
1247 loadsegment(fs, 0); 1248 loadsegment(fs, 0);
1248 1249
1249 load_idt((const struct desc_ptr *)&idt_descr); 1250 load_idt((const struct desc_ptr *)&idt_descr);
1250 1251
1251 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1252 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
1252 syscall_init(); 1253 syscall_init();
1253 1254
1254 wrmsrl(MSR_FS_BASE, 0); 1255 wrmsrl(MSR_FS_BASE, 0);
1255 wrmsrl(MSR_KERNEL_GS_BASE, 0); 1256 wrmsrl(MSR_KERNEL_GS_BASE, 0);
1256 barrier(); 1257 barrier();
1257 1258
1258 x86_configure_nx(); 1259 x86_configure_nx();
1259 if (cpu != 0) 1260 if (cpu != 0)
1260 enable_x2apic(); 1261 enable_x2apic();
1261 1262
1262 /* 1263 /*
1263 * set up and load the per-CPU TSS 1264 * set up and load the per-CPU TSS
1264 */ 1265 */
1265 if (!oist->ist[0]) { 1266 if (!oist->ist[0]) {
1266 char *estacks = per_cpu(exception_stacks, cpu); 1267 char *estacks = per_cpu(exception_stacks, cpu);
1267 1268
1268 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1269 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1269 estacks += exception_stack_sizes[v]; 1270 estacks += exception_stack_sizes[v];
1270 oist->ist[v] = t->x86_tss.ist[v] = 1271 oist->ist[v] = t->x86_tss.ist[v] =
1271 (unsigned long)estacks; 1272 (unsigned long)estacks;
1272 if (v == DEBUG_STACK-1) 1273 if (v == DEBUG_STACK-1)
1273 per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; 1274 per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
1274 } 1275 }
1275 } 1276 }
1276 1277
1277 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1278 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
1278 1279
1279 /* 1280 /*
1280 * <= is required because the CPU will access up to 1281 * <= is required because the CPU will access up to
1281 * 8 bits beyond the end of the IO permission bitmap. 1282 * 8 bits beyond the end of the IO permission bitmap.
1282 */ 1283 */
1283 for (i = 0; i <= IO_BITMAP_LONGS; i++) 1284 for (i = 0; i <= IO_BITMAP_LONGS; i++)
1284 t->io_bitmap[i] = ~0UL; 1285 t->io_bitmap[i] = ~0UL;
1285 1286
1286 atomic_inc(&init_mm.mm_count); 1287 atomic_inc(&init_mm.mm_count);
1287 me->active_mm = &init_mm; 1288 me->active_mm = &init_mm;
1288 BUG_ON(me->mm); 1289 BUG_ON(me->mm);
1289 enter_lazy_tlb(&init_mm, me); 1290 enter_lazy_tlb(&init_mm, me);
1290 1291
1291 load_sp0(t, &current->thread); 1292 load_sp0(t, &current->thread);
1292 set_tss_desc(cpu, t); 1293 set_tss_desc(cpu, t);
1293 load_TR_desc(); 1294 load_TR_desc();
1294 load_LDT(&init_mm.context); 1295 load_LDT(&init_mm.context);
1295 1296
1296 clear_all_debug_regs(); 1297 clear_all_debug_regs();
1297 dbg_restore_debug_regs(); 1298 dbg_restore_debug_regs();
1298 1299
1299 fpu_init(); 1300 fpu_init();
1300 xsave_init(); 1301 xsave_init();
1301 1302
1302 raw_local_save_flags(kernel_eflags); 1303 raw_local_save_flags(kernel_eflags);
1303 1304
1304 if (is_uv_system()) 1305 if (is_uv_system())
1305 uv_cpu_init(); 1306 uv_cpu_init();
1306 } 1307 }
1307 1308
1308 #else 1309 #else
1309 1310
1310 void __cpuinit cpu_init(void) 1311 void __cpuinit cpu_init(void)
1311 { 1312 {
1312 int cpu = smp_processor_id(); 1313 int cpu = smp_processor_id();
1313 struct task_struct *curr = current; 1314 struct task_struct *curr = current;
1314 struct tss_struct *t = &per_cpu(init_tss, cpu); 1315 struct tss_struct *t = &per_cpu(init_tss, cpu);
1315 struct thread_struct *thread = &curr->thread; 1316 struct thread_struct *thread = &curr->thread;
1316 1317
1317 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1318 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
1318 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1319 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
1319 for (;;) 1320 for (;;)
1320 local_irq_enable(); 1321 local_irq_enable();
1321 } 1322 }
1322 1323
1323 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1324 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
1324 1325
1325 if (cpu_has_vme || cpu_has_tsc || cpu_has_de) 1326 if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
1326 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1327 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1327 1328
1328 load_idt(&idt_descr); 1329 load_idt(&idt_descr);
1329 switch_to_new_gdt(cpu); 1330 switch_to_new_gdt(cpu);
1330 1331
1331 /* 1332 /*
1332 * Set up and load the per-CPU TSS and LDT 1333 * Set up and load the per-CPU TSS and LDT
1333 */ 1334 */
1334 atomic_inc(&init_mm.mm_count); 1335 atomic_inc(&init_mm.mm_count);
1335 curr->active_mm = &init_mm; 1336 curr->active_mm = &init_mm;
1336 BUG_ON(curr->mm); 1337 BUG_ON(curr->mm);
1337 enter_lazy_tlb(&init_mm, curr); 1338 enter_lazy_tlb(&init_mm, curr);
1338 1339
1339 load_sp0(t, thread); 1340 load_sp0(t, thread);
1340 set_tss_desc(cpu, t); 1341 set_tss_desc(cpu, t);
1341 load_TR_desc(); 1342 load_TR_desc();
1342 load_LDT(&init_mm.context); 1343 load_LDT(&init_mm.context);
1343 1344
1344 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1345 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
1345 1346
1346 #ifdef CONFIG_DOUBLEFAULT 1347 #ifdef CONFIG_DOUBLEFAULT
1347 /* Set up doublefault TSS pointer in the GDT */ 1348 /* Set up doublefault TSS pointer in the GDT */
1348 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1349 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1349 #endif 1350 #endif
1350 1351
1351 clear_all_debug_regs(); 1352 clear_all_debug_regs();
1352 dbg_restore_debug_regs(); 1353 dbg_restore_debug_regs();
1353 1354
1354 fpu_init(); 1355 fpu_init();
1355 xsave_init(); 1356 xsave_init();
1356 } 1357 }
1357 #endif 1358 #endif
1358 1359
arch/x86/kernel/entry_64.S
1 /* 1 /*
2 * linux/arch/x86_64/entry.S 2 * linux/arch/x86_64/entry.S
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */ 7 */
8 8
9 /* 9 /*
10 * entry.S contains the system-call and fault low-level handling routines. 10 * entry.S contains the system-call and fault low-level handling routines.
11 * 11 *
12 * Some of this is documented in Documentation/x86/entry_64.txt 12 * Some of this is documented in Documentation/x86/entry_64.txt
13 * 13 *
14 * NOTE: This code handles signal-recognition, which happens every time 14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call. 15 * after an interrupt and after each system call.
16 * 16 *
17 * Normal syscalls and interrupts don't save a full stack frame, this is 17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for syscall tracing, signals or fork/exec et.al. 18 * only done for syscall tracing, signals or fork/exec et.al.
19 * 19 *
20 * A note on terminology: 20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP 21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack. 22 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers up to R11. 23 * - partial stack frame: partially saved registers up to R11.
24 * - full stack frame: Like partial stack frame, but all register saved. 24 * - full stack frame: Like partial stack frame, but all register saved.
25 * 25 *
26 * Some macro usage: 26 * Some macro usage:
27 * - CFI macros are used to generate dwarf2 unwind information for better 27 * - CFI macros are used to generate dwarf2 unwind information for better
28 * backtraces. They don't change any code. 28 * backtraces. They don't change any code.
29 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers 29 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
30 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. 30 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
31 * There are unfortunately lots of special cases where some registers 31 * There are unfortunately lots of special cases where some registers
32 * not touched. The macro is a big mess that should be cleaned up. 32 * not touched. The macro is a big mess that should be cleaned up.
33 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. 33 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
34 * Gives a full stack frame. 34 * Gives a full stack frame.
35 * - ENTRY/END Define functions in the symbol table. 35 * - ENTRY/END Define functions in the symbol table.
36 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack 36 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
37 * frame that is otherwise undefined after a SYSCALL 37 * frame that is otherwise undefined after a SYSCALL
38 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. 38 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
39 * - errorentry/paranoidentry/zeroentry - Define exception entry points. 39 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
40 */ 40 */
41 41
42 #include <linux/linkage.h> 42 #include <linux/linkage.h>
43 #include <asm/segment.h> 43 #include <asm/segment.h>
44 #include <asm/cache.h> 44 #include <asm/cache.h>
45 #include <asm/errno.h> 45 #include <asm/errno.h>
46 #include <asm/dwarf2.h> 46 #include <asm/dwarf2.h>
47 #include <asm/calling.h> 47 #include <asm/calling.h>
48 #include <asm/asm-offsets.h> 48 #include <asm/asm-offsets.h>
49 #include <asm/msr.h> 49 #include <asm/msr.h>
50 #include <asm/unistd.h> 50 #include <asm/unistd.h>
51 #include <asm/thread_info.h> 51 #include <asm/thread_info.h>
52 #include <asm/hw_irq.h> 52 #include <asm/hw_irq.h>
53 #include <asm/page_types.h> 53 #include <asm/page_types.h>
54 #include <asm/irqflags.h> 54 #include <asm/irqflags.h>
55 #include <asm/paravirt.h> 55 #include <asm/paravirt.h>
56 #include <asm/ftrace.h> 56 #include <asm/ftrace.h>
57 #include <asm/percpu.h> 57 #include <asm/percpu.h>
58 #include <asm/asm.h> 58 #include <asm/asm.h>
59 #include <asm/smap.h>
59 #include <linux/err.h> 60 #include <linux/err.h>
60 61
61 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 62 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
62 #include <linux/elf-em.h> 63 #include <linux/elf-em.h>
63 #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) 64 #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
64 #define __AUDIT_ARCH_64BIT 0x80000000 65 #define __AUDIT_ARCH_64BIT 0x80000000
65 #define __AUDIT_ARCH_LE 0x40000000 66 #define __AUDIT_ARCH_LE 0x40000000
66 67
67 .code64 68 .code64
68 .section .entry.text, "ax" 69 .section .entry.text, "ax"
69 70
70 #ifdef CONFIG_FUNCTION_TRACER 71 #ifdef CONFIG_FUNCTION_TRACER
71 #ifdef CONFIG_DYNAMIC_FTRACE 72 #ifdef CONFIG_DYNAMIC_FTRACE
72 ENTRY(mcount) 73 ENTRY(mcount)
73 retq 74 retq
74 END(mcount) 75 END(mcount)
75 76
76 ENTRY(ftrace_caller) 77 ENTRY(ftrace_caller)
77 cmpl $0, function_trace_stop 78 cmpl $0, function_trace_stop
78 jne ftrace_stub 79 jne ftrace_stub
79 80
80 MCOUNT_SAVE_FRAME 81 MCOUNT_SAVE_FRAME
81 82
82 movq 0x38(%rsp), %rdi 83 movq 0x38(%rsp), %rdi
83 movq 8(%rbp), %rsi 84 movq 8(%rbp), %rsi
84 subq $MCOUNT_INSN_SIZE, %rdi 85 subq $MCOUNT_INSN_SIZE, %rdi
85 86
86 GLOBAL(ftrace_call) 87 GLOBAL(ftrace_call)
87 call ftrace_stub 88 call ftrace_stub
88 89
89 MCOUNT_RESTORE_FRAME 90 MCOUNT_RESTORE_FRAME
90 91
91 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 92 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
92 GLOBAL(ftrace_graph_call) 93 GLOBAL(ftrace_graph_call)
93 jmp ftrace_stub 94 jmp ftrace_stub
94 #endif 95 #endif
95 96
96 GLOBAL(ftrace_stub) 97 GLOBAL(ftrace_stub)
97 retq 98 retq
98 END(ftrace_caller) 99 END(ftrace_caller)
99 100
100 #else /* ! CONFIG_DYNAMIC_FTRACE */ 101 #else /* ! CONFIG_DYNAMIC_FTRACE */
101 ENTRY(mcount) 102 ENTRY(mcount)
102 cmpl $0, function_trace_stop 103 cmpl $0, function_trace_stop
103 jne ftrace_stub 104 jne ftrace_stub
104 105
105 cmpq $ftrace_stub, ftrace_trace_function 106 cmpq $ftrace_stub, ftrace_trace_function
106 jnz trace 107 jnz trace
107 108
108 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 109 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
109 cmpq $ftrace_stub, ftrace_graph_return 110 cmpq $ftrace_stub, ftrace_graph_return
110 jnz ftrace_graph_caller 111 jnz ftrace_graph_caller
111 112
112 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry 113 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
113 jnz ftrace_graph_caller 114 jnz ftrace_graph_caller
114 #endif 115 #endif
115 116
116 GLOBAL(ftrace_stub) 117 GLOBAL(ftrace_stub)
117 retq 118 retq
118 119
119 trace: 120 trace:
120 MCOUNT_SAVE_FRAME 121 MCOUNT_SAVE_FRAME
121 122
122 movq 0x38(%rsp), %rdi 123 movq 0x38(%rsp), %rdi
123 movq 8(%rbp), %rsi 124 movq 8(%rbp), %rsi
124 subq $MCOUNT_INSN_SIZE, %rdi 125 subq $MCOUNT_INSN_SIZE, %rdi
125 126
126 call *ftrace_trace_function 127 call *ftrace_trace_function
127 128
128 MCOUNT_RESTORE_FRAME 129 MCOUNT_RESTORE_FRAME
129 130
130 jmp ftrace_stub 131 jmp ftrace_stub
131 END(mcount) 132 END(mcount)
132 #endif /* CONFIG_DYNAMIC_FTRACE */ 133 #endif /* CONFIG_DYNAMIC_FTRACE */
133 #endif /* CONFIG_FUNCTION_TRACER */ 134 #endif /* CONFIG_FUNCTION_TRACER */
134 135
135 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 136 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
136 ENTRY(ftrace_graph_caller) 137 ENTRY(ftrace_graph_caller)
137 cmpl $0, function_trace_stop 138 cmpl $0, function_trace_stop
138 jne ftrace_stub 139 jne ftrace_stub
139 140
140 MCOUNT_SAVE_FRAME 141 MCOUNT_SAVE_FRAME
141 142
142 leaq 8(%rbp), %rdi 143 leaq 8(%rbp), %rdi
143 movq 0x38(%rsp), %rsi 144 movq 0x38(%rsp), %rsi
144 movq (%rbp), %rdx 145 movq (%rbp), %rdx
145 subq $MCOUNT_INSN_SIZE, %rsi 146 subq $MCOUNT_INSN_SIZE, %rsi
146 147
147 call prepare_ftrace_return 148 call prepare_ftrace_return
148 149
149 MCOUNT_RESTORE_FRAME 150 MCOUNT_RESTORE_FRAME
150 151
151 retq 152 retq
152 END(ftrace_graph_caller) 153 END(ftrace_graph_caller)
153 154
154 GLOBAL(return_to_handler) 155 GLOBAL(return_to_handler)
155 subq $24, %rsp 156 subq $24, %rsp
156 157
157 /* Save the return values */ 158 /* Save the return values */
158 movq %rax, (%rsp) 159 movq %rax, (%rsp)
159 movq %rdx, 8(%rsp) 160 movq %rdx, 8(%rsp)
160 movq %rbp, %rdi 161 movq %rbp, %rdi
161 162
162 call ftrace_return_to_handler 163 call ftrace_return_to_handler
163 164
164 movq %rax, %rdi 165 movq %rax, %rdi
165 movq 8(%rsp), %rdx 166 movq 8(%rsp), %rdx
166 movq (%rsp), %rax 167 movq (%rsp), %rax
167 addq $24, %rsp 168 addq $24, %rsp
168 jmp *%rdi 169 jmp *%rdi
169 #endif 170 #endif
170 171
171 172
172 #ifndef CONFIG_PREEMPT 173 #ifndef CONFIG_PREEMPT
173 #define retint_kernel retint_restore_args 174 #define retint_kernel retint_restore_args
174 #endif 175 #endif
175 176
176 #ifdef CONFIG_PARAVIRT 177 #ifdef CONFIG_PARAVIRT
177 ENTRY(native_usergs_sysret64) 178 ENTRY(native_usergs_sysret64)
178 swapgs 179 swapgs
179 sysretq 180 sysretq
180 ENDPROC(native_usergs_sysret64) 181 ENDPROC(native_usergs_sysret64)
181 #endif /* CONFIG_PARAVIRT */ 182 #endif /* CONFIG_PARAVIRT */
182 183
183 184
184 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET 185 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
185 #ifdef CONFIG_TRACE_IRQFLAGS 186 #ifdef CONFIG_TRACE_IRQFLAGS
186 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ 187 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
187 jnc 1f 188 jnc 1f
188 TRACE_IRQS_ON 189 TRACE_IRQS_ON
189 1: 190 1:
190 #endif 191 #endif
191 .endm 192 .endm
192 193
193 /* 194 /*
194 * When dynamic function tracer is enabled it will add a breakpoint 195 * When dynamic function tracer is enabled it will add a breakpoint
195 * to all locations that it is about to modify, sync CPUs, update 196 * to all locations that it is about to modify, sync CPUs, update
196 * all the code, sync CPUs, then remove the breakpoints. In this time 197 * all the code, sync CPUs, then remove the breakpoints. In this time
197 * if lockdep is enabled, it might jump back into the debug handler 198 * if lockdep is enabled, it might jump back into the debug handler
198 * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). 199 * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
199 * 200 *
200 * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to 201 * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
201 * make sure the stack pointer does not get reset back to the top 202 * make sure the stack pointer does not get reset back to the top
202 * of the debug stack, and instead just reuses the current stack. 203 * of the debug stack, and instead just reuses the current stack.
203 */ 204 */
204 #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) 205 #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
205 206
206 .macro TRACE_IRQS_OFF_DEBUG 207 .macro TRACE_IRQS_OFF_DEBUG
207 call debug_stack_set_zero 208 call debug_stack_set_zero
208 TRACE_IRQS_OFF 209 TRACE_IRQS_OFF
209 call debug_stack_reset 210 call debug_stack_reset
210 .endm 211 .endm
211 212
212 .macro TRACE_IRQS_ON_DEBUG 213 .macro TRACE_IRQS_ON_DEBUG
213 call debug_stack_set_zero 214 call debug_stack_set_zero
214 TRACE_IRQS_ON 215 TRACE_IRQS_ON
215 call debug_stack_reset 216 call debug_stack_reset
216 .endm 217 .endm
217 218
218 .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET 219 .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
219 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ 220 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
220 jnc 1f 221 jnc 1f
221 TRACE_IRQS_ON_DEBUG 222 TRACE_IRQS_ON_DEBUG
222 1: 223 1:
223 .endm 224 .endm
224 225
225 #else 226 #else
226 # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF 227 # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
227 # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON 228 # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
228 # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ 229 # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
229 #endif 230 #endif
230 231
231 /* 232 /*
232 * C code is not supposed to know about undefined top of stack. Every time 233 * C code is not supposed to know about undefined top of stack. Every time
233 * a C function with an pt_regs argument is called from the SYSCALL based 234 * a C function with an pt_regs argument is called from the SYSCALL based
234 * fast path FIXUP_TOP_OF_STACK is needed. 235 * fast path FIXUP_TOP_OF_STACK is needed.
235 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 236 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
236 * manipulation. 237 * manipulation.
237 */ 238 */
238 239
239 /* %rsp:at FRAMEEND */ 240 /* %rsp:at FRAMEEND */
240 .macro FIXUP_TOP_OF_STACK tmp offset=0 241 .macro FIXUP_TOP_OF_STACK tmp offset=0
241 movq PER_CPU_VAR(old_rsp),\tmp 242 movq PER_CPU_VAR(old_rsp),\tmp
242 movq \tmp,RSP+\offset(%rsp) 243 movq \tmp,RSP+\offset(%rsp)
243 movq $__USER_DS,SS+\offset(%rsp) 244 movq $__USER_DS,SS+\offset(%rsp)
244 movq $__USER_CS,CS+\offset(%rsp) 245 movq $__USER_CS,CS+\offset(%rsp)
245 movq $-1,RCX+\offset(%rsp) 246 movq $-1,RCX+\offset(%rsp)
246 movq R11+\offset(%rsp),\tmp /* get eflags */ 247 movq R11+\offset(%rsp),\tmp /* get eflags */
247 movq \tmp,EFLAGS+\offset(%rsp) 248 movq \tmp,EFLAGS+\offset(%rsp)
248 .endm 249 .endm
249 250
250 .macro RESTORE_TOP_OF_STACK tmp offset=0 251 .macro RESTORE_TOP_OF_STACK tmp offset=0
251 movq RSP+\offset(%rsp),\tmp 252 movq RSP+\offset(%rsp),\tmp
252 movq \tmp,PER_CPU_VAR(old_rsp) 253 movq \tmp,PER_CPU_VAR(old_rsp)
253 movq EFLAGS+\offset(%rsp),\tmp 254 movq EFLAGS+\offset(%rsp),\tmp
254 movq \tmp,R11+\offset(%rsp) 255 movq \tmp,R11+\offset(%rsp)
255 .endm 256 .endm
256 257
257 .macro FAKE_STACK_FRAME child_rip 258 .macro FAKE_STACK_FRAME child_rip
258 /* push in order ss, rsp, eflags, cs, rip */ 259 /* push in order ss, rsp, eflags, cs, rip */
259 xorl %eax, %eax 260 xorl %eax, %eax
260 pushq_cfi $__KERNEL_DS /* ss */ 261 pushq_cfi $__KERNEL_DS /* ss */
261 /*CFI_REL_OFFSET ss,0*/ 262 /*CFI_REL_OFFSET ss,0*/
262 pushq_cfi %rax /* rsp */ 263 pushq_cfi %rax /* rsp */
263 CFI_REL_OFFSET rsp,0 264 CFI_REL_OFFSET rsp,0
264 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ 265 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
265 /*CFI_REL_OFFSET rflags,0*/ 266 /*CFI_REL_OFFSET rflags,0*/
266 pushq_cfi $__KERNEL_CS /* cs */ 267 pushq_cfi $__KERNEL_CS /* cs */
267 /*CFI_REL_OFFSET cs,0*/ 268 /*CFI_REL_OFFSET cs,0*/
268 pushq_cfi \child_rip /* rip */ 269 pushq_cfi \child_rip /* rip */
269 CFI_REL_OFFSET rip,0 270 CFI_REL_OFFSET rip,0
270 pushq_cfi %rax /* orig rax */ 271 pushq_cfi %rax /* orig rax */
271 .endm 272 .endm
272 273
273 .macro UNFAKE_STACK_FRAME 274 .macro UNFAKE_STACK_FRAME
274 addq $8*6, %rsp 275 addq $8*6, %rsp
275 CFI_ADJUST_CFA_OFFSET -(6*8) 276 CFI_ADJUST_CFA_OFFSET -(6*8)
276 .endm 277 .endm
277 278
278 /* 279 /*
279 * initial frame state for interrupts (and exceptions without error code) 280 * initial frame state for interrupts (and exceptions without error code)
280 */ 281 */
281 .macro EMPTY_FRAME start=1 offset=0 282 .macro EMPTY_FRAME start=1 offset=0
282 .if \start 283 .if \start
283 CFI_STARTPROC simple 284 CFI_STARTPROC simple
284 CFI_SIGNAL_FRAME 285 CFI_SIGNAL_FRAME
285 CFI_DEF_CFA rsp,8+\offset 286 CFI_DEF_CFA rsp,8+\offset
286 .else 287 .else
287 CFI_DEF_CFA_OFFSET 8+\offset 288 CFI_DEF_CFA_OFFSET 8+\offset
288 .endif 289 .endif
289 .endm 290 .endm
290 291
291 /* 292 /*
292 * initial frame state for interrupts (and exceptions without error code) 293 * initial frame state for interrupts (and exceptions without error code)
293 */ 294 */
294 .macro INTR_FRAME start=1 offset=0 295 .macro INTR_FRAME start=1 offset=0
295 EMPTY_FRAME \start, SS+8+\offset-RIP 296 EMPTY_FRAME \start, SS+8+\offset-RIP
296 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ 297 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
297 CFI_REL_OFFSET rsp, RSP+\offset-RIP 298 CFI_REL_OFFSET rsp, RSP+\offset-RIP
298 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ 299 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
299 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ 300 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
300 CFI_REL_OFFSET rip, RIP+\offset-RIP 301 CFI_REL_OFFSET rip, RIP+\offset-RIP
301 .endm 302 .endm
302 303
303 /* 304 /*
304 * initial frame state for exceptions with error code (and interrupts 305 * initial frame state for exceptions with error code (and interrupts
305 * with vector already pushed) 306 * with vector already pushed)
306 */ 307 */
307 .macro XCPT_FRAME start=1 offset=0 308 .macro XCPT_FRAME start=1 offset=0
308 INTR_FRAME \start, RIP+\offset-ORIG_RAX 309 INTR_FRAME \start, RIP+\offset-ORIG_RAX
309 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ 310 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
310 .endm 311 .endm
311 312
312 /* 313 /*
313 * frame that enables calling into C. 314 * frame that enables calling into C.
314 */ 315 */
315 .macro PARTIAL_FRAME start=1 offset=0 316 .macro PARTIAL_FRAME start=1 offset=0
316 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET 317 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
317 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET 318 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
318 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET 319 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
319 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET 320 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
320 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET 321 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
321 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET 322 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
322 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET 323 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
323 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET 324 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
324 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET 325 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
325 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET 326 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
326 .endm 327 .endm
327 328
328 /* 329 /*
329 * frame that enables passing a complete pt_regs to a C function. 330 * frame that enables passing a complete pt_regs to a C function.
330 */ 331 */
331 .macro DEFAULT_FRAME start=1 offset=0 332 .macro DEFAULT_FRAME start=1 offset=0
332 PARTIAL_FRAME \start, R11+\offset-R15 333 PARTIAL_FRAME \start, R11+\offset-R15
333 CFI_REL_OFFSET rbx, RBX+\offset 334 CFI_REL_OFFSET rbx, RBX+\offset
334 CFI_REL_OFFSET rbp, RBP+\offset 335 CFI_REL_OFFSET rbp, RBP+\offset
335 CFI_REL_OFFSET r12, R12+\offset 336 CFI_REL_OFFSET r12, R12+\offset
336 CFI_REL_OFFSET r13, R13+\offset 337 CFI_REL_OFFSET r13, R13+\offset
337 CFI_REL_OFFSET r14, R14+\offset 338 CFI_REL_OFFSET r14, R14+\offset
338 CFI_REL_OFFSET r15, R15+\offset 339 CFI_REL_OFFSET r15, R15+\offset
339 .endm 340 .endm
340 341
341 /* save partial stack frame */ 342 /* save partial stack frame */
342 .macro SAVE_ARGS_IRQ 343 .macro SAVE_ARGS_IRQ
343 cld 344 cld
344 /* start from rbp in pt_regs and jump over */ 345 /* start from rbp in pt_regs and jump over */
345 movq_cfi rdi, RDI-RBP 346 movq_cfi rdi, RDI-RBP
346 movq_cfi rsi, RSI-RBP 347 movq_cfi rsi, RSI-RBP
347 movq_cfi rdx, RDX-RBP 348 movq_cfi rdx, RDX-RBP
348 movq_cfi rcx, RCX-RBP 349 movq_cfi rcx, RCX-RBP
349 movq_cfi rax, RAX-RBP 350 movq_cfi rax, RAX-RBP
350 movq_cfi r8, R8-RBP 351 movq_cfi r8, R8-RBP
351 movq_cfi r9, R9-RBP 352 movq_cfi r9, R9-RBP
352 movq_cfi r10, R10-RBP 353 movq_cfi r10, R10-RBP
353 movq_cfi r11, R11-RBP 354 movq_cfi r11, R11-RBP
354 355
355 /* Save rbp so that we can unwind from get_irq_regs() */ 356 /* Save rbp so that we can unwind from get_irq_regs() */
356 movq_cfi rbp, 0 357 movq_cfi rbp, 0
357 358
358 /* Save previous stack value */ 359 /* Save previous stack value */
359 movq %rsp, %rsi 360 movq %rsp, %rsi
360 361
361 leaq -RBP(%rsp),%rdi /* arg1 for handler */ 362 leaq -RBP(%rsp),%rdi /* arg1 for handler */
362 testl $3, CS-RBP(%rsi) 363 testl $3, CS-RBP(%rsi)
363 je 1f 364 je 1f
364 SWAPGS 365 SWAPGS
365 /* 366 /*
366 * irq_count is used to check if a CPU is already on an interrupt stack 367 * irq_count is used to check if a CPU is already on an interrupt stack
367 * or not. While this is essentially redundant with preempt_count it is 368 * or not. While this is essentially redundant with preempt_count it is
368 * a little cheaper to use a separate counter in the PDA (short of 369 * a little cheaper to use a separate counter in the PDA (short of
369 * moving irq_enter into assembly, which would be too much work) 370 * moving irq_enter into assembly, which would be too much work)
370 */ 371 */
371 1: incl PER_CPU_VAR(irq_count) 372 1: incl PER_CPU_VAR(irq_count)
372 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp 373 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
373 CFI_DEF_CFA_REGISTER rsi 374 CFI_DEF_CFA_REGISTER rsi
374 375
375 /* Store previous stack value */ 376 /* Store previous stack value */
376 pushq %rsi 377 pushq %rsi
377 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ 378 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
378 0x77 /* DW_OP_breg7 */, 0, \ 379 0x77 /* DW_OP_breg7 */, 0, \
379 0x06 /* DW_OP_deref */, \ 380 0x06 /* DW_OP_deref */, \
380 0x08 /* DW_OP_const1u */, SS+8-RBP, \ 381 0x08 /* DW_OP_const1u */, SS+8-RBP, \
381 0x22 /* DW_OP_plus */ 382 0x22 /* DW_OP_plus */
382 /* We entered an interrupt context - irqs are off: */ 383 /* We entered an interrupt context - irqs are off: */
383 TRACE_IRQS_OFF 384 TRACE_IRQS_OFF
384 .endm 385 .endm
385 386
386 ENTRY(save_rest) 387 ENTRY(save_rest)
387 PARTIAL_FRAME 1 REST_SKIP+8 388 PARTIAL_FRAME 1 REST_SKIP+8
388 movq 5*8+16(%rsp), %r11 /* save return address */ 389 movq 5*8+16(%rsp), %r11 /* save return address */
389 movq_cfi rbx, RBX+16 390 movq_cfi rbx, RBX+16
390 movq_cfi rbp, RBP+16 391 movq_cfi rbp, RBP+16
391 movq_cfi r12, R12+16 392 movq_cfi r12, R12+16
392 movq_cfi r13, R13+16 393 movq_cfi r13, R13+16
393 movq_cfi r14, R14+16 394 movq_cfi r14, R14+16
394 movq_cfi r15, R15+16 395 movq_cfi r15, R15+16
395 movq %r11, 8(%rsp) /* return address */ 396 movq %r11, 8(%rsp) /* return address */
396 FIXUP_TOP_OF_STACK %r11, 16 397 FIXUP_TOP_OF_STACK %r11, 16
397 ret 398 ret
398 CFI_ENDPROC 399 CFI_ENDPROC
399 END(save_rest) 400 END(save_rest)
400 401
401 /* save complete stack frame */ 402 /* save complete stack frame */
402 .pushsection .kprobes.text, "ax" 403 .pushsection .kprobes.text, "ax"
403 ENTRY(save_paranoid) 404 ENTRY(save_paranoid)
404 XCPT_FRAME 1 RDI+8 405 XCPT_FRAME 1 RDI+8
405 cld 406 cld
406 movq_cfi rdi, RDI+8 407 movq_cfi rdi, RDI+8
407 movq_cfi rsi, RSI+8 408 movq_cfi rsi, RSI+8
408 movq_cfi rdx, RDX+8 409 movq_cfi rdx, RDX+8
409 movq_cfi rcx, RCX+8 410 movq_cfi rcx, RCX+8
410 movq_cfi rax, RAX+8 411 movq_cfi rax, RAX+8
411 movq_cfi r8, R8+8 412 movq_cfi r8, R8+8
412 movq_cfi r9, R9+8 413 movq_cfi r9, R9+8
413 movq_cfi r10, R10+8 414 movq_cfi r10, R10+8
414 movq_cfi r11, R11+8 415 movq_cfi r11, R11+8
415 movq_cfi rbx, RBX+8 416 movq_cfi rbx, RBX+8
416 movq_cfi rbp, RBP+8 417 movq_cfi rbp, RBP+8
417 movq_cfi r12, R12+8 418 movq_cfi r12, R12+8
418 movq_cfi r13, R13+8 419 movq_cfi r13, R13+8
419 movq_cfi r14, R14+8 420 movq_cfi r14, R14+8
420 movq_cfi r15, R15+8 421 movq_cfi r15, R15+8
421 movl $1,%ebx 422 movl $1,%ebx
422 movl $MSR_GS_BASE,%ecx 423 movl $MSR_GS_BASE,%ecx
423 rdmsr 424 rdmsr
424 testl %edx,%edx 425 testl %edx,%edx
425 js 1f /* negative -> in kernel */ 426 js 1f /* negative -> in kernel */
426 SWAPGS 427 SWAPGS
427 xorl %ebx,%ebx 428 xorl %ebx,%ebx
428 1: ret 429 1: ret
429 CFI_ENDPROC 430 CFI_ENDPROC
430 END(save_paranoid) 431 END(save_paranoid)
431 .popsection 432 .popsection
432 433
433 /* 434 /*
434 * A newly forked process directly context switches into this address. 435 * A newly forked process directly context switches into this address.
435 * 436 *
436 * rdi: prev task we switched from 437 * rdi: prev task we switched from
437 */ 438 */
438 ENTRY(ret_from_fork) 439 ENTRY(ret_from_fork)
439 DEFAULT_FRAME 440 DEFAULT_FRAME
440 441
441 LOCK ; btr $TIF_FORK,TI_flags(%r8) 442 LOCK ; btr $TIF_FORK,TI_flags(%r8)
442 443
443 pushq_cfi kernel_eflags(%rip) 444 pushq_cfi kernel_eflags(%rip)
444 popfq_cfi # reset kernel eflags 445 popfq_cfi # reset kernel eflags
445 446
446 call schedule_tail # rdi: 'prev' task parameter 447 call schedule_tail # rdi: 'prev' task parameter
447 448
448 GET_THREAD_INFO(%rcx) 449 GET_THREAD_INFO(%rcx)
449 450
450 RESTORE_REST 451 RESTORE_REST
451 452
452 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 453 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
453 jz retint_restore_args 454 jz retint_restore_args
454 455
455 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET 456 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
456 jnz int_ret_from_sys_call 457 jnz int_ret_from_sys_call
457 458
458 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET 459 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
459 jmp ret_from_sys_call # go to the SYSRET fastpath 460 jmp ret_from_sys_call # go to the SYSRET fastpath
460 461
461 CFI_ENDPROC 462 CFI_ENDPROC
462 END(ret_from_fork) 463 END(ret_from_fork)
463 464
464 /* 465 /*
465 * System call entry. Up to 6 arguments in registers are supported. 466 * System call entry. Up to 6 arguments in registers are supported.
466 * 467 *
467 * SYSCALL does not save anything on the stack and does not change the 468 * SYSCALL does not save anything on the stack and does not change the
468 * stack pointer. 469 * stack pointer. However, it does mask the flags register for us, so
470 * CLD and CLAC are not needed.
469 */ 471 */
470 472
471 /* 473 /*
472 * Register setup: 474 * Register setup:
473 * rax system call number 475 * rax system call number
474 * rdi arg0 476 * rdi arg0
475 * rcx return address for syscall/sysret, C arg3 477 * rcx return address for syscall/sysret, C arg3
476 * rsi arg1 478 * rsi arg1
477 * rdx arg2 479 * rdx arg2
478 * r10 arg3 (--> moved to rcx for C) 480 * r10 arg3 (--> moved to rcx for C)
479 * r8 arg4 481 * r8 arg4
480 * r9 arg5 482 * r9 arg5
481 * r11 eflags for syscall/sysret, temporary for C 483 * r11 eflags for syscall/sysret, temporary for C
482 * r12-r15,rbp,rbx saved by C code, not touched. 484 * r12-r15,rbp,rbx saved by C code, not touched.
483 * 485 *
484 * Interrupts are off on entry. 486 * Interrupts are off on entry.
485 * Only called from user space. 487 * Only called from user space.
486 * 488 *
487 * XXX if we had a free scratch register we could save the RSP into the stack frame 489 * XXX if we had a free scratch register we could save the RSP into the stack frame
488 * and report it properly in ps. Unfortunately we haven't. 490 * and report it properly in ps. Unfortunately we haven't.
489 * 491 *
490 * When user can change the frames always force IRET. That is because 492 * When user can change the frames always force IRET. That is because
491 * it deals with uncanonical addresses better. SYSRET has trouble 493 * it deals with uncanonical addresses better. SYSRET has trouble
492 * with them due to bugs in both AMD and Intel CPUs. 494 * with them due to bugs in both AMD and Intel CPUs.
493 */ 495 */
494 496
495 ENTRY(system_call) 497 ENTRY(system_call)
496 CFI_STARTPROC simple 498 CFI_STARTPROC simple
497 CFI_SIGNAL_FRAME 499 CFI_SIGNAL_FRAME
498 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET 500 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
499 CFI_REGISTER rip,rcx 501 CFI_REGISTER rip,rcx
500 /*CFI_REGISTER rflags,r11*/ 502 /*CFI_REGISTER rflags,r11*/
501 SWAPGS_UNSAFE_STACK 503 SWAPGS_UNSAFE_STACK
502 /* 504 /*
503 * A hypervisor implementation might want to use a label 505 * A hypervisor implementation might want to use a label
504 * after the swapgs, so that it can do the swapgs 506 * after the swapgs, so that it can do the swapgs
505 * for the guest and jump here on syscall. 507 * for the guest and jump here on syscall.
506 */ 508 */
507 GLOBAL(system_call_after_swapgs) 509 GLOBAL(system_call_after_swapgs)
508 510
509 movq %rsp,PER_CPU_VAR(old_rsp) 511 movq %rsp,PER_CPU_VAR(old_rsp)
510 movq PER_CPU_VAR(kernel_stack),%rsp 512 movq PER_CPU_VAR(kernel_stack),%rsp
511 /* 513 /*
512 * No need to follow this irqs off/on section - it's straight 514 * No need to follow this irqs off/on section - it's straight
513 * and short: 515 * and short:
514 */ 516 */
515 ENABLE_INTERRUPTS(CLBR_NONE) 517 ENABLE_INTERRUPTS(CLBR_NONE)
516 SAVE_ARGS 8,0 518 SAVE_ARGS 8,0
517 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 519 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
518 movq %rcx,RIP-ARGOFFSET(%rsp) 520 movq %rcx,RIP-ARGOFFSET(%rsp)
519 CFI_REL_OFFSET rip,RIP-ARGOFFSET 521 CFI_REL_OFFSET rip,RIP-ARGOFFSET
520 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 522 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
521 jnz tracesys 523 jnz tracesys
522 system_call_fastpath: 524 system_call_fastpath:
523 #if __SYSCALL_MASK == ~0 525 #if __SYSCALL_MASK == ~0
524 cmpq $__NR_syscall_max,%rax 526 cmpq $__NR_syscall_max,%rax
525 #else 527 #else
526 andl $__SYSCALL_MASK,%eax 528 andl $__SYSCALL_MASK,%eax
527 cmpl $__NR_syscall_max,%eax 529 cmpl $__NR_syscall_max,%eax
528 #endif 530 #endif
529 ja badsys 531 ja badsys
530 movq %r10,%rcx 532 movq %r10,%rcx
531 call *sys_call_table(,%rax,8) # XXX: rip relative 533 call *sys_call_table(,%rax,8) # XXX: rip relative
532 movq %rax,RAX-ARGOFFSET(%rsp) 534 movq %rax,RAX-ARGOFFSET(%rsp)
533 /* 535 /*
534 * Syscall return path ending with SYSRET (fast path) 536 * Syscall return path ending with SYSRET (fast path)
535 * Has incomplete stack frame and undefined top of stack. 537 * Has incomplete stack frame and undefined top of stack.
536 */ 538 */
537 ret_from_sys_call: 539 ret_from_sys_call:
538 movl $_TIF_ALLWORK_MASK,%edi 540 movl $_TIF_ALLWORK_MASK,%edi
539 /* edi: flagmask */ 541 /* edi: flagmask */
540 sysret_check: 542 sysret_check:
541 LOCKDEP_SYS_EXIT 543 LOCKDEP_SYS_EXIT
542 DISABLE_INTERRUPTS(CLBR_NONE) 544 DISABLE_INTERRUPTS(CLBR_NONE)
543 TRACE_IRQS_OFF 545 TRACE_IRQS_OFF
544 movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx 546 movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
545 andl %edi,%edx 547 andl %edi,%edx
546 jnz sysret_careful 548 jnz sysret_careful
547 CFI_REMEMBER_STATE 549 CFI_REMEMBER_STATE
548 /* 550 /*
549 * sysretq will re-enable interrupts: 551 * sysretq will re-enable interrupts:
550 */ 552 */
551 TRACE_IRQS_ON 553 TRACE_IRQS_ON
552 movq RIP-ARGOFFSET(%rsp),%rcx 554 movq RIP-ARGOFFSET(%rsp),%rcx
553 CFI_REGISTER rip,rcx 555 CFI_REGISTER rip,rcx
554 RESTORE_ARGS 1,-ARG_SKIP,0 556 RESTORE_ARGS 1,-ARG_SKIP,0
555 /*CFI_REGISTER rflags,r11*/ 557 /*CFI_REGISTER rflags,r11*/
556 movq PER_CPU_VAR(old_rsp), %rsp 558 movq PER_CPU_VAR(old_rsp), %rsp
557 USERGS_SYSRET64 559 USERGS_SYSRET64
558 560
559 CFI_RESTORE_STATE 561 CFI_RESTORE_STATE
560 /* Handle reschedules */ 562 /* Handle reschedules */
561 /* edx: work, edi: workmask */ 563 /* edx: work, edi: workmask */
562 sysret_careful: 564 sysret_careful:
563 bt $TIF_NEED_RESCHED,%edx 565 bt $TIF_NEED_RESCHED,%edx
564 jnc sysret_signal 566 jnc sysret_signal
565 TRACE_IRQS_ON 567 TRACE_IRQS_ON
566 ENABLE_INTERRUPTS(CLBR_NONE) 568 ENABLE_INTERRUPTS(CLBR_NONE)
567 pushq_cfi %rdi 569 pushq_cfi %rdi
568 call schedule 570 call schedule
569 popq_cfi %rdi 571 popq_cfi %rdi
570 jmp sysret_check 572 jmp sysret_check
571 573
572 /* Handle a signal */ 574 /* Handle a signal */
573 sysret_signal: 575 sysret_signal:
574 TRACE_IRQS_ON 576 TRACE_IRQS_ON
575 ENABLE_INTERRUPTS(CLBR_NONE) 577 ENABLE_INTERRUPTS(CLBR_NONE)
576 #ifdef CONFIG_AUDITSYSCALL 578 #ifdef CONFIG_AUDITSYSCALL
577 bt $TIF_SYSCALL_AUDIT,%edx 579 bt $TIF_SYSCALL_AUDIT,%edx
578 jc sysret_audit 580 jc sysret_audit
579 #endif 581 #endif
580 /* 582 /*
581 * We have a signal, or exit tracing or single-step. 583 * We have a signal, or exit tracing or single-step.
582 * These all wind up with the iret return path anyway, 584 * These all wind up with the iret return path anyway,
583 * so just join that path right now. 585 * so just join that path right now.
584 */ 586 */
585 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET 587 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
586 jmp int_check_syscall_exit_work 588 jmp int_check_syscall_exit_work
587 589
588 badsys: 590 badsys:
589 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 591 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
590 jmp ret_from_sys_call 592 jmp ret_from_sys_call
591 593
592 #ifdef CONFIG_AUDITSYSCALL 594 #ifdef CONFIG_AUDITSYSCALL
593 /* 595 /*
594 * Fast path for syscall audit without full syscall trace. 596 * Fast path for syscall audit without full syscall trace.
595 * We just call __audit_syscall_entry() directly, and then 597 * We just call __audit_syscall_entry() directly, and then
596 * jump back to the normal fast path. 598 * jump back to the normal fast path.
597 */ 599 */
598 auditsys: 600 auditsys:
599 movq %r10,%r9 /* 6th arg: 4th syscall arg */ 601 movq %r10,%r9 /* 6th arg: 4th syscall arg */
600 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ 602 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
601 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ 603 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
602 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ 604 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
603 movq %rax,%rsi /* 2nd arg: syscall number */ 605 movq %rax,%rsi /* 2nd arg: syscall number */
604 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ 606 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
605 call __audit_syscall_entry 607 call __audit_syscall_entry
606 LOAD_ARGS 0 /* reload call-clobbered registers */ 608 LOAD_ARGS 0 /* reload call-clobbered registers */
607 jmp system_call_fastpath 609 jmp system_call_fastpath
608 610
609 /* 611 /*
610 * Return fast path for syscall audit. Call __audit_syscall_exit() 612 * Return fast path for syscall audit. Call __audit_syscall_exit()
611 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT 613 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
612 * masked off. 614 * masked off.
613 */ 615 */
614 sysret_audit: 616 sysret_audit:
615 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ 617 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
616 cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ 618 cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
617 setbe %al /* 1 if so, 0 if not */ 619 setbe %al /* 1 if so, 0 if not */
618 movzbl %al,%edi /* zero-extend that into %edi */ 620 movzbl %al,%edi /* zero-extend that into %edi */
619 call __audit_syscall_exit 621 call __audit_syscall_exit
620 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 622 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
621 jmp sysret_check 623 jmp sysret_check
622 #endif /* CONFIG_AUDITSYSCALL */ 624 #endif /* CONFIG_AUDITSYSCALL */
623 625
624 /* Do syscall tracing */ 626 /* Do syscall tracing */
625 tracesys: 627 tracesys:
626 #ifdef CONFIG_AUDITSYSCALL 628 #ifdef CONFIG_AUDITSYSCALL
627 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 629 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
628 jz auditsys 630 jz auditsys
629 #endif 631 #endif
630 SAVE_REST 632 SAVE_REST
631 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ 633 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
632 FIXUP_TOP_OF_STACK %rdi 634 FIXUP_TOP_OF_STACK %rdi
633 movq %rsp,%rdi 635 movq %rsp,%rdi
634 call syscall_trace_enter 636 call syscall_trace_enter
635 /* 637 /*
636 * Reload arg registers from stack in case ptrace changed them. 638 * Reload arg registers from stack in case ptrace changed them.
637 * We don't reload %rax because syscall_trace_enter() returned 639 * We don't reload %rax because syscall_trace_enter() returned
638 * the value it wants us to use in the table lookup. 640 * the value it wants us to use in the table lookup.
639 */ 641 */
640 LOAD_ARGS ARGOFFSET, 1 642 LOAD_ARGS ARGOFFSET, 1
641 RESTORE_REST 643 RESTORE_REST
642 #if __SYSCALL_MASK == ~0 644 #if __SYSCALL_MASK == ~0
643 cmpq $__NR_syscall_max,%rax 645 cmpq $__NR_syscall_max,%rax
644 #else 646 #else
645 andl $__SYSCALL_MASK,%eax 647 andl $__SYSCALL_MASK,%eax
646 cmpl $__NR_syscall_max,%eax 648 cmpl $__NR_syscall_max,%eax
647 #endif 649 #endif
648 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ 650 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
649 movq %r10,%rcx /* fixup for C */ 651 movq %r10,%rcx /* fixup for C */
650 call *sys_call_table(,%rax,8) 652 call *sys_call_table(,%rax,8)
651 movq %rax,RAX-ARGOFFSET(%rsp) 653 movq %rax,RAX-ARGOFFSET(%rsp)
652 /* Use IRET because user could have changed frame */ 654 /* Use IRET because user could have changed frame */
653 655
654 /* 656 /*
655 * Syscall return path ending with IRET. 657 * Syscall return path ending with IRET.
656 * Has correct top of stack, but partial stack frame. 658 * Has correct top of stack, but partial stack frame.
657 */ 659 */
658 GLOBAL(int_ret_from_sys_call) 660 GLOBAL(int_ret_from_sys_call)
659 DISABLE_INTERRUPTS(CLBR_NONE) 661 DISABLE_INTERRUPTS(CLBR_NONE)
660 TRACE_IRQS_OFF 662 TRACE_IRQS_OFF
661 movl $_TIF_ALLWORK_MASK,%edi 663 movl $_TIF_ALLWORK_MASK,%edi
662 /* edi: mask to check */ 664 /* edi: mask to check */
663 GLOBAL(int_with_check) 665 GLOBAL(int_with_check)
664 LOCKDEP_SYS_EXIT_IRQ 666 LOCKDEP_SYS_EXIT_IRQ
665 GET_THREAD_INFO(%rcx) 667 GET_THREAD_INFO(%rcx)
666 movl TI_flags(%rcx),%edx 668 movl TI_flags(%rcx),%edx
667 andl %edi,%edx 669 andl %edi,%edx
668 jnz int_careful 670 jnz int_careful
669 andl $~TS_COMPAT,TI_status(%rcx) 671 andl $~TS_COMPAT,TI_status(%rcx)
670 jmp retint_swapgs 672 jmp retint_swapgs
671 673
672 /* Either reschedule or signal or syscall exit tracking needed. */ 674 /* Either reschedule or signal or syscall exit tracking needed. */
673 /* First do a reschedule test. */ 675 /* First do a reschedule test. */
674 /* edx: work, edi: workmask */ 676 /* edx: work, edi: workmask */
675 int_careful: 677 int_careful:
676 bt $TIF_NEED_RESCHED,%edx 678 bt $TIF_NEED_RESCHED,%edx
677 jnc int_very_careful 679 jnc int_very_careful
678 TRACE_IRQS_ON 680 TRACE_IRQS_ON
679 ENABLE_INTERRUPTS(CLBR_NONE) 681 ENABLE_INTERRUPTS(CLBR_NONE)
680 pushq_cfi %rdi 682 pushq_cfi %rdi
681 call schedule 683 call schedule
682 popq_cfi %rdi 684 popq_cfi %rdi
683 DISABLE_INTERRUPTS(CLBR_NONE) 685 DISABLE_INTERRUPTS(CLBR_NONE)
684 TRACE_IRQS_OFF 686 TRACE_IRQS_OFF
685 jmp int_with_check 687 jmp int_with_check
686 688
687 /* handle signals and tracing -- both require a full stack frame */ 689 /* handle signals and tracing -- both require a full stack frame */
688 int_very_careful: 690 int_very_careful:
689 TRACE_IRQS_ON 691 TRACE_IRQS_ON
690 ENABLE_INTERRUPTS(CLBR_NONE) 692 ENABLE_INTERRUPTS(CLBR_NONE)
691 int_check_syscall_exit_work: 693 int_check_syscall_exit_work:
692 SAVE_REST 694 SAVE_REST
693 /* Check for syscall exit trace */ 695 /* Check for syscall exit trace */
694 testl $_TIF_WORK_SYSCALL_EXIT,%edx 696 testl $_TIF_WORK_SYSCALL_EXIT,%edx
695 jz int_signal 697 jz int_signal
696 pushq_cfi %rdi 698 pushq_cfi %rdi
697 leaq 8(%rsp),%rdi # &ptregs -> arg1 699 leaq 8(%rsp),%rdi # &ptregs -> arg1
698 call syscall_trace_leave 700 call syscall_trace_leave
699 popq_cfi %rdi 701 popq_cfi %rdi
700 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 702 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
701 jmp int_restore_rest 703 jmp int_restore_rest
702 704
703 int_signal: 705 int_signal:
704 testl $_TIF_DO_NOTIFY_MASK,%edx 706 testl $_TIF_DO_NOTIFY_MASK,%edx
705 jz 1f 707 jz 1f
706 movq %rsp,%rdi # &ptregs -> arg1 708 movq %rsp,%rdi # &ptregs -> arg1
707 xorl %esi,%esi # oldset -> arg2 709 xorl %esi,%esi # oldset -> arg2
708 call do_notify_resume 710 call do_notify_resume
709 1: movl $_TIF_WORK_MASK,%edi 711 1: movl $_TIF_WORK_MASK,%edi
710 int_restore_rest: 712 int_restore_rest:
711 RESTORE_REST 713 RESTORE_REST
712 DISABLE_INTERRUPTS(CLBR_NONE) 714 DISABLE_INTERRUPTS(CLBR_NONE)
713 TRACE_IRQS_OFF 715 TRACE_IRQS_OFF
714 jmp int_with_check 716 jmp int_with_check
715 CFI_ENDPROC 717 CFI_ENDPROC
716 END(system_call) 718 END(system_call)
717 719
718 /* 720 /*
719 * Certain special system calls that need to save a complete full stack frame. 721 * Certain special system calls that need to save a complete full stack frame.
720 */ 722 */
721 .macro PTREGSCALL label,func,arg 723 .macro PTREGSCALL label,func,arg
722 ENTRY(\label) 724 ENTRY(\label)
723 PARTIAL_FRAME 1 8 /* offset 8: return address */ 725 PARTIAL_FRAME 1 8 /* offset 8: return address */
724 subq $REST_SKIP, %rsp 726 subq $REST_SKIP, %rsp
725 CFI_ADJUST_CFA_OFFSET REST_SKIP 727 CFI_ADJUST_CFA_OFFSET REST_SKIP
726 call save_rest 728 call save_rest
727 DEFAULT_FRAME 0 8 /* offset 8: return address */ 729 DEFAULT_FRAME 0 8 /* offset 8: return address */
728 leaq 8(%rsp), \arg /* pt_regs pointer */ 730 leaq 8(%rsp), \arg /* pt_regs pointer */
729 call \func 731 call \func
730 jmp ptregscall_common 732 jmp ptregscall_common
731 CFI_ENDPROC 733 CFI_ENDPROC
732 END(\label) 734 END(\label)
733 .endm 735 .endm
734 736
735 PTREGSCALL stub_clone, sys_clone, %r8 737 PTREGSCALL stub_clone, sys_clone, %r8
736 PTREGSCALL stub_fork, sys_fork, %rdi 738 PTREGSCALL stub_fork, sys_fork, %rdi
737 PTREGSCALL stub_vfork, sys_vfork, %rdi 739 PTREGSCALL stub_vfork, sys_vfork, %rdi
738 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx 740 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
739 PTREGSCALL stub_iopl, sys_iopl, %rsi 741 PTREGSCALL stub_iopl, sys_iopl, %rsi
740 742
741 ENTRY(ptregscall_common) 743 ENTRY(ptregscall_common)
742 DEFAULT_FRAME 1 8 /* offset 8: return address */ 744 DEFAULT_FRAME 1 8 /* offset 8: return address */
743 RESTORE_TOP_OF_STACK %r11, 8 745 RESTORE_TOP_OF_STACK %r11, 8
744 movq_cfi_restore R15+8, r15 746 movq_cfi_restore R15+8, r15
745 movq_cfi_restore R14+8, r14 747 movq_cfi_restore R14+8, r14
746 movq_cfi_restore R13+8, r13 748 movq_cfi_restore R13+8, r13
747 movq_cfi_restore R12+8, r12 749 movq_cfi_restore R12+8, r12
748 movq_cfi_restore RBP+8, rbp 750 movq_cfi_restore RBP+8, rbp
749 movq_cfi_restore RBX+8, rbx 751 movq_cfi_restore RBX+8, rbx
750 ret $REST_SKIP /* pop extended registers */ 752 ret $REST_SKIP /* pop extended registers */
751 CFI_ENDPROC 753 CFI_ENDPROC
752 END(ptregscall_common) 754 END(ptregscall_common)
753 755
754 ENTRY(stub_execve) 756 ENTRY(stub_execve)
755 CFI_STARTPROC 757 CFI_STARTPROC
756 addq $8, %rsp 758 addq $8, %rsp
757 PARTIAL_FRAME 0 759 PARTIAL_FRAME 0
758 SAVE_REST 760 SAVE_REST
759 FIXUP_TOP_OF_STACK %r11 761 FIXUP_TOP_OF_STACK %r11
760 movq %rsp, %rcx 762 movq %rsp, %rcx
761 call sys_execve 763 call sys_execve
762 RESTORE_TOP_OF_STACK %r11 764 RESTORE_TOP_OF_STACK %r11
763 movq %rax,RAX(%rsp) 765 movq %rax,RAX(%rsp)
764 RESTORE_REST 766 RESTORE_REST
765 jmp int_ret_from_sys_call 767 jmp int_ret_from_sys_call
766 CFI_ENDPROC 768 CFI_ENDPROC
767 END(stub_execve) 769 END(stub_execve)
768 770
769 /* 771 /*
770 * sigreturn is special because it needs to restore all registers on return. 772 * sigreturn is special because it needs to restore all registers on return.
771 * This cannot be done with SYSRET, so use the IRET return path instead. 773 * This cannot be done with SYSRET, so use the IRET return path instead.
772 */ 774 */
773 ENTRY(stub_rt_sigreturn) 775 ENTRY(stub_rt_sigreturn)
774 CFI_STARTPROC 776 CFI_STARTPROC
775 addq $8, %rsp 777 addq $8, %rsp
776 PARTIAL_FRAME 0 778 PARTIAL_FRAME 0
777 SAVE_REST 779 SAVE_REST
778 movq %rsp,%rdi 780 movq %rsp,%rdi
779 FIXUP_TOP_OF_STACK %r11 781 FIXUP_TOP_OF_STACK %r11
780 call sys_rt_sigreturn 782 call sys_rt_sigreturn
781 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 783 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
782 RESTORE_REST 784 RESTORE_REST
783 jmp int_ret_from_sys_call 785 jmp int_ret_from_sys_call
784 CFI_ENDPROC 786 CFI_ENDPROC
785 END(stub_rt_sigreturn) 787 END(stub_rt_sigreturn)
786 788
787 #ifdef CONFIG_X86_X32_ABI 789 #ifdef CONFIG_X86_X32_ABI
788 PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx 790 PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx
789 791
790 ENTRY(stub_x32_rt_sigreturn) 792 ENTRY(stub_x32_rt_sigreturn)
791 CFI_STARTPROC 793 CFI_STARTPROC
792 addq $8, %rsp 794 addq $8, %rsp
793 PARTIAL_FRAME 0 795 PARTIAL_FRAME 0
794 SAVE_REST 796 SAVE_REST
795 movq %rsp,%rdi 797 movq %rsp,%rdi
796 FIXUP_TOP_OF_STACK %r11 798 FIXUP_TOP_OF_STACK %r11
797 call sys32_x32_rt_sigreturn 799 call sys32_x32_rt_sigreturn
798 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer 800 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
799 RESTORE_REST 801 RESTORE_REST
800 jmp int_ret_from_sys_call 802 jmp int_ret_from_sys_call
801 CFI_ENDPROC 803 CFI_ENDPROC
802 END(stub_x32_rt_sigreturn) 804 END(stub_x32_rt_sigreturn)
803 805
804 ENTRY(stub_x32_execve) 806 ENTRY(stub_x32_execve)
805 CFI_STARTPROC 807 CFI_STARTPROC
806 addq $8, %rsp 808 addq $8, %rsp
807 PARTIAL_FRAME 0 809 PARTIAL_FRAME 0
808 SAVE_REST 810 SAVE_REST
809 FIXUP_TOP_OF_STACK %r11 811 FIXUP_TOP_OF_STACK %r11
810 movq %rsp, %rcx 812 movq %rsp, %rcx
811 call sys32_execve 813 call sys32_execve
812 RESTORE_TOP_OF_STACK %r11 814 RESTORE_TOP_OF_STACK %r11
813 movq %rax,RAX(%rsp) 815 movq %rax,RAX(%rsp)
814 RESTORE_REST 816 RESTORE_REST
815 jmp int_ret_from_sys_call 817 jmp int_ret_from_sys_call
816 CFI_ENDPROC 818 CFI_ENDPROC
817 END(stub_x32_execve) 819 END(stub_x32_execve)
818 820
819 #endif 821 #endif
820 822
821 /* 823 /*
822 * Build the entry stubs and pointer table with some assembler magic. 824 * Build the entry stubs and pointer table with some assembler magic.
823 * We pack 7 stubs into a single 32-byte chunk, which will fit in a 825 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
824 * single cache line on all modern x86 implementations. 826 * single cache line on all modern x86 implementations.
825 */ 827 */
826 .section .init.rodata,"a" 828 .section .init.rodata,"a"
827 ENTRY(interrupt) 829 ENTRY(interrupt)
828 .section .entry.text 830 .section .entry.text
829 .p2align 5 831 .p2align 5
830 .p2align CONFIG_X86_L1_CACHE_SHIFT 832 .p2align CONFIG_X86_L1_CACHE_SHIFT
831 ENTRY(irq_entries_start) 833 ENTRY(irq_entries_start)
832 INTR_FRAME 834 INTR_FRAME
833 vector=FIRST_EXTERNAL_VECTOR 835 vector=FIRST_EXTERNAL_VECTOR
834 .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 836 .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
835 .balign 32 837 .balign 32
836 .rept 7 838 .rept 7
837 .if vector < NR_VECTORS 839 .if vector < NR_VECTORS
838 .if vector <> FIRST_EXTERNAL_VECTOR 840 .if vector <> FIRST_EXTERNAL_VECTOR
839 CFI_ADJUST_CFA_OFFSET -8 841 CFI_ADJUST_CFA_OFFSET -8
840 .endif 842 .endif
841 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ 843 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
842 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 844 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
843 jmp 2f 845 jmp 2f
844 .endif 846 .endif
845 .previous 847 .previous
846 .quad 1b 848 .quad 1b
847 .section .entry.text 849 .section .entry.text
848 vector=vector+1 850 vector=vector+1
849 .endif 851 .endif
850 .endr 852 .endr
851 2: jmp common_interrupt 853 2: jmp common_interrupt
852 .endr 854 .endr
853 CFI_ENDPROC 855 CFI_ENDPROC
854 END(irq_entries_start) 856 END(irq_entries_start)
855 857
856 .previous 858 .previous
857 END(interrupt) 859 END(interrupt)
858 .previous 860 .previous
859 861
860 /* 862 /*
861 * Interrupt entry/exit. 863 * Interrupt entry/exit.
862 * 864 *
863 * Interrupt entry points save only callee clobbered registers in fast path. 865 * Interrupt entry points save only callee clobbered registers in fast path.
864 * 866 *
865 * Entry runs with interrupts off. 867 * Entry runs with interrupts off.
866 */ 868 */
867 869
868 /* 0(%rsp): ~(interrupt number) */ 870 /* 0(%rsp): ~(interrupt number) */
869 .macro interrupt func 871 .macro interrupt func
870 /* reserve pt_regs for scratch regs and rbp */ 872 /* reserve pt_regs for scratch regs and rbp */
871 subq $ORIG_RAX-RBP, %rsp 873 subq $ORIG_RAX-RBP, %rsp
872 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP 874 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
873 SAVE_ARGS_IRQ 875 SAVE_ARGS_IRQ
874 call \func 876 call \func
875 .endm 877 .endm
876 878
877 /* 879 /*
878 * Interrupt entry/exit should be protected against kprobes 880 * Interrupt entry/exit should be protected against kprobes
879 */ 881 */
880 .pushsection .kprobes.text, "ax" 882 .pushsection .kprobes.text, "ax"
881 /* 883 /*
882 * The interrupt stubs push (~vector+0x80) onto the stack and 884 * The interrupt stubs push (~vector+0x80) onto the stack and
883 * then jump to common_interrupt. 885 * then jump to common_interrupt.
884 */ 886 */
885 .p2align CONFIG_X86_L1_CACHE_SHIFT 887 .p2align CONFIG_X86_L1_CACHE_SHIFT
886 common_interrupt: 888 common_interrupt:
889 ASM_CLAC
887 XCPT_FRAME 890 XCPT_FRAME
888 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 891 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
889 interrupt do_IRQ 892 interrupt do_IRQ
890 /* 0(%rsp): old_rsp-ARGOFFSET */ 893 /* 0(%rsp): old_rsp-ARGOFFSET */
891 ret_from_intr: 894 ret_from_intr:
892 DISABLE_INTERRUPTS(CLBR_NONE) 895 DISABLE_INTERRUPTS(CLBR_NONE)
893 TRACE_IRQS_OFF 896 TRACE_IRQS_OFF
894 decl PER_CPU_VAR(irq_count) 897 decl PER_CPU_VAR(irq_count)
895 898
896 /* Restore saved previous stack */ 899 /* Restore saved previous stack */
897 popq %rsi 900 popq %rsi
898 CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ 901 CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
899 leaq ARGOFFSET-RBP(%rsi), %rsp 902 leaq ARGOFFSET-RBP(%rsi), %rsp
900 CFI_DEF_CFA_REGISTER rsp 903 CFI_DEF_CFA_REGISTER rsp
901 CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET 904 CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
902 905
903 exit_intr: 906 exit_intr:
904 GET_THREAD_INFO(%rcx) 907 GET_THREAD_INFO(%rcx)
905 testl $3,CS-ARGOFFSET(%rsp) 908 testl $3,CS-ARGOFFSET(%rsp)
906 je retint_kernel 909 je retint_kernel
907 910
908 /* Interrupt came from user space */ 911 /* Interrupt came from user space */
909 /* 912 /*
910 * Has a correct top of stack, but a partial stack frame 913 * Has a correct top of stack, but a partial stack frame
911 * %rcx: thread info. Interrupts off. 914 * %rcx: thread info. Interrupts off.
912 */ 915 */
913 retint_with_reschedule: 916 retint_with_reschedule:
914 movl $_TIF_WORK_MASK,%edi 917 movl $_TIF_WORK_MASK,%edi
915 retint_check: 918 retint_check:
916 LOCKDEP_SYS_EXIT_IRQ 919 LOCKDEP_SYS_EXIT_IRQ
917 movl TI_flags(%rcx),%edx 920 movl TI_flags(%rcx),%edx
918 andl %edi,%edx 921 andl %edi,%edx
919 CFI_REMEMBER_STATE 922 CFI_REMEMBER_STATE
920 jnz retint_careful 923 jnz retint_careful
921 924
922 retint_swapgs: /* return to user-space */ 925 retint_swapgs: /* return to user-space */
923 /* 926 /*
924 * The iretq could re-enable interrupts: 927 * The iretq could re-enable interrupts:
925 */ 928 */
926 DISABLE_INTERRUPTS(CLBR_ANY) 929 DISABLE_INTERRUPTS(CLBR_ANY)
927 TRACE_IRQS_IRETQ 930 TRACE_IRQS_IRETQ
928 SWAPGS 931 SWAPGS
929 jmp restore_args 932 jmp restore_args
930 933
931 retint_restore_args: /* return to kernel space */ 934 retint_restore_args: /* return to kernel space */
932 DISABLE_INTERRUPTS(CLBR_ANY) 935 DISABLE_INTERRUPTS(CLBR_ANY)
933 /* 936 /*
934 * The iretq could re-enable interrupts: 937 * The iretq could re-enable interrupts:
935 */ 938 */
936 TRACE_IRQS_IRETQ 939 TRACE_IRQS_IRETQ
937 restore_args: 940 restore_args:
938 RESTORE_ARGS 1,8,1 941 RESTORE_ARGS 1,8,1
939 942
940 irq_return: 943 irq_return:
941 INTERRUPT_RETURN 944 INTERRUPT_RETURN
942 _ASM_EXTABLE(irq_return, bad_iret) 945 _ASM_EXTABLE(irq_return, bad_iret)
943 946
944 #ifdef CONFIG_PARAVIRT 947 #ifdef CONFIG_PARAVIRT
945 ENTRY(native_iret) 948 ENTRY(native_iret)
946 iretq 949 iretq
947 _ASM_EXTABLE(native_iret, bad_iret) 950 _ASM_EXTABLE(native_iret, bad_iret)
948 #endif 951 #endif
949 952
950 .section .fixup,"ax" 953 .section .fixup,"ax"
951 bad_iret: 954 bad_iret:
952 /* 955 /*
953 * The iret traps when the %cs or %ss being restored is bogus. 956 * The iret traps when the %cs or %ss being restored is bogus.
954 * We've lost the original trap vector and error code. 957 * We've lost the original trap vector and error code.
955 * #GPF is the most likely one to get for an invalid selector. 958 * #GPF is the most likely one to get for an invalid selector.
956 * So pretend we completed the iret and took the #GPF in user mode. 959 * So pretend we completed the iret and took the #GPF in user mode.
957 * 960 *
958 * We are now running with the kernel GS after exception recovery. 961 * We are now running with the kernel GS after exception recovery.
959 * But error_entry expects us to have user GS to match the user %cs, 962 * But error_entry expects us to have user GS to match the user %cs,
960 * so swap back. 963 * so swap back.
961 */ 964 */
962 pushq $0 965 pushq $0
963 966
964 SWAPGS 967 SWAPGS
965 jmp general_protection 968 jmp general_protection
966 969
967 .previous 970 .previous
968 971
969 /* edi: workmask, edx: work */ 972 /* edi: workmask, edx: work */
970 retint_careful: 973 retint_careful:
971 CFI_RESTORE_STATE 974 CFI_RESTORE_STATE
972 bt $TIF_NEED_RESCHED,%edx 975 bt $TIF_NEED_RESCHED,%edx
973 jnc retint_signal 976 jnc retint_signal
974 TRACE_IRQS_ON 977 TRACE_IRQS_ON
975 ENABLE_INTERRUPTS(CLBR_NONE) 978 ENABLE_INTERRUPTS(CLBR_NONE)
976 pushq_cfi %rdi 979 pushq_cfi %rdi
977 call schedule 980 call schedule
978 popq_cfi %rdi 981 popq_cfi %rdi
979 GET_THREAD_INFO(%rcx) 982 GET_THREAD_INFO(%rcx)
980 DISABLE_INTERRUPTS(CLBR_NONE) 983 DISABLE_INTERRUPTS(CLBR_NONE)
981 TRACE_IRQS_OFF 984 TRACE_IRQS_OFF
982 jmp retint_check 985 jmp retint_check
983 986
984 retint_signal: 987 retint_signal:
985 testl $_TIF_DO_NOTIFY_MASK,%edx 988 testl $_TIF_DO_NOTIFY_MASK,%edx
986 jz retint_swapgs 989 jz retint_swapgs
987 TRACE_IRQS_ON 990 TRACE_IRQS_ON
988 ENABLE_INTERRUPTS(CLBR_NONE) 991 ENABLE_INTERRUPTS(CLBR_NONE)
989 SAVE_REST 992 SAVE_REST
990 movq $-1,ORIG_RAX(%rsp) 993 movq $-1,ORIG_RAX(%rsp)
991 xorl %esi,%esi # oldset 994 xorl %esi,%esi # oldset
992 movq %rsp,%rdi # &pt_regs 995 movq %rsp,%rdi # &pt_regs
993 call do_notify_resume 996 call do_notify_resume
994 RESTORE_REST 997 RESTORE_REST
995 DISABLE_INTERRUPTS(CLBR_NONE) 998 DISABLE_INTERRUPTS(CLBR_NONE)
996 TRACE_IRQS_OFF 999 TRACE_IRQS_OFF
997 GET_THREAD_INFO(%rcx) 1000 GET_THREAD_INFO(%rcx)
998 jmp retint_with_reschedule 1001 jmp retint_with_reschedule
999 1002
1000 #ifdef CONFIG_PREEMPT 1003 #ifdef CONFIG_PREEMPT
1001 /* Returning to kernel space. Check if we need preemption */ 1004 /* Returning to kernel space. Check if we need preemption */
1002 /* rcx: threadinfo. interrupts off. */ 1005 /* rcx: threadinfo. interrupts off. */
1003 ENTRY(retint_kernel) 1006 ENTRY(retint_kernel)
1004 cmpl $0,TI_preempt_count(%rcx) 1007 cmpl $0,TI_preempt_count(%rcx)
1005 jnz retint_restore_args 1008 jnz retint_restore_args
1006 bt $TIF_NEED_RESCHED,TI_flags(%rcx) 1009 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
1007 jnc retint_restore_args 1010 jnc retint_restore_args
1008 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ 1011 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
1009 jnc retint_restore_args 1012 jnc retint_restore_args
1010 call preempt_schedule_irq 1013 call preempt_schedule_irq
1011 jmp exit_intr 1014 jmp exit_intr
1012 #endif 1015 #endif
1013 1016
1014 CFI_ENDPROC 1017 CFI_ENDPROC
1015 END(common_interrupt) 1018 END(common_interrupt)
1016 /* 1019 /*
1017 * End of kprobes section 1020 * End of kprobes section
1018 */ 1021 */
1019 .popsection 1022 .popsection
1020 1023
1021 /* 1024 /*
1022 * APIC interrupts. 1025 * APIC interrupts.
1023 */ 1026 */
1024 .macro apicinterrupt num sym do_sym 1027 .macro apicinterrupt num sym do_sym
1025 ENTRY(\sym) 1028 ENTRY(\sym)
1029 ASM_CLAC
1026 INTR_FRAME 1030 INTR_FRAME
1027 pushq_cfi $~(\num) 1031 pushq_cfi $~(\num)
1028 .Lcommon_\sym: 1032 .Lcommon_\sym:
1029 interrupt \do_sym 1033 interrupt \do_sym
1030 jmp ret_from_intr 1034 jmp ret_from_intr
1031 CFI_ENDPROC 1035 CFI_ENDPROC
1032 END(\sym) 1036 END(\sym)
1033 .endm 1037 .endm
1034 1038
1035 #ifdef CONFIG_SMP 1039 #ifdef CONFIG_SMP
1036 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ 1040 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
1037 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 1041 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
1038 apicinterrupt REBOOT_VECTOR \ 1042 apicinterrupt REBOOT_VECTOR \
1039 reboot_interrupt smp_reboot_interrupt 1043 reboot_interrupt smp_reboot_interrupt
1040 #endif 1044 #endif
1041 1045
1042 #ifdef CONFIG_X86_UV 1046 #ifdef CONFIG_X86_UV
1043 apicinterrupt UV_BAU_MESSAGE \ 1047 apicinterrupt UV_BAU_MESSAGE \
1044 uv_bau_message_intr1 uv_bau_message_interrupt 1048 uv_bau_message_intr1 uv_bau_message_interrupt
1045 #endif 1049 #endif
1046 apicinterrupt LOCAL_TIMER_VECTOR \ 1050 apicinterrupt LOCAL_TIMER_VECTOR \
1047 apic_timer_interrupt smp_apic_timer_interrupt 1051 apic_timer_interrupt smp_apic_timer_interrupt
1048 apicinterrupt X86_PLATFORM_IPI_VECTOR \ 1052 apicinterrupt X86_PLATFORM_IPI_VECTOR \
1049 x86_platform_ipi smp_x86_platform_ipi 1053 x86_platform_ipi smp_x86_platform_ipi
1050 1054
1051 apicinterrupt THRESHOLD_APIC_VECTOR \ 1055 apicinterrupt THRESHOLD_APIC_VECTOR \
1052 threshold_interrupt smp_threshold_interrupt 1056 threshold_interrupt smp_threshold_interrupt
1053 apicinterrupt THERMAL_APIC_VECTOR \ 1057 apicinterrupt THERMAL_APIC_VECTOR \
1054 thermal_interrupt smp_thermal_interrupt 1058 thermal_interrupt smp_thermal_interrupt
1055 1059
1056 #ifdef CONFIG_SMP 1060 #ifdef CONFIG_SMP
1057 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ 1061 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
1058 call_function_single_interrupt smp_call_function_single_interrupt 1062 call_function_single_interrupt smp_call_function_single_interrupt
1059 apicinterrupt CALL_FUNCTION_VECTOR \ 1063 apicinterrupt CALL_FUNCTION_VECTOR \
1060 call_function_interrupt smp_call_function_interrupt 1064 call_function_interrupt smp_call_function_interrupt
1061 apicinterrupt RESCHEDULE_VECTOR \ 1065 apicinterrupt RESCHEDULE_VECTOR \
1062 reschedule_interrupt smp_reschedule_interrupt 1066 reschedule_interrupt smp_reschedule_interrupt
1063 #endif 1067 #endif
1064 1068
1065 apicinterrupt ERROR_APIC_VECTOR \ 1069 apicinterrupt ERROR_APIC_VECTOR \
1066 error_interrupt smp_error_interrupt 1070 error_interrupt smp_error_interrupt
1067 apicinterrupt SPURIOUS_APIC_VECTOR \ 1071 apicinterrupt SPURIOUS_APIC_VECTOR \
1068 spurious_interrupt smp_spurious_interrupt 1072 spurious_interrupt smp_spurious_interrupt
1069 1073
1070 #ifdef CONFIG_IRQ_WORK 1074 #ifdef CONFIG_IRQ_WORK
1071 apicinterrupt IRQ_WORK_VECTOR \ 1075 apicinterrupt IRQ_WORK_VECTOR \
1072 irq_work_interrupt smp_irq_work_interrupt 1076 irq_work_interrupt smp_irq_work_interrupt
1073 #endif 1077 #endif
1074 1078
1075 /* 1079 /*
1076 * Exception entry points. 1080 * Exception entry points.
1077 */ 1081 */
1078 .macro zeroentry sym do_sym 1082 .macro zeroentry sym do_sym
1079 ENTRY(\sym) 1083 ENTRY(\sym)
1084 ASM_CLAC
1080 INTR_FRAME 1085 INTR_FRAME
1081 PARAVIRT_ADJUST_EXCEPTION_FRAME 1086 PARAVIRT_ADJUST_EXCEPTION_FRAME
1082 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1087 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1083 subq $ORIG_RAX-R15, %rsp 1088 subq $ORIG_RAX-R15, %rsp
1084 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1089 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1085 call error_entry 1090 call error_entry
1086 DEFAULT_FRAME 0 1091 DEFAULT_FRAME 0
1087 movq %rsp,%rdi /* pt_regs pointer */ 1092 movq %rsp,%rdi /* pt_regs pointer */
1088 xorl %esi,%esi /* no error code */ 1093 xorl %esi,%esi /* no error code */
1089 call \do_sym 1094 call \do_sym
1090 jmp error_exit /* %ebx: no swapgs flag */ 1095 jmp error_exit /* %ebx: no swapgs flag */
1091 CFI_ENDPROC 1096 CFI_ENDPROC
1092 END(\sym) 1097 END(\sym)
1093 .endm 1098 .endm
1094 1099
1095 .macro paranoidzeroentry sym do_sym 1100 .macro paranoidzeroentry sym do_sym
1096 ENTRY(\sym) 1101 ENTRY(\sym)
1102 ASM_CLAC
1097 INTR_FRAME 1103 INTR_FRAME
1098 PARAVIRT_ADJUST_EXCEPTION_FRAME 1104 PARAVIRT_ADJUST_EXCEPTION_FRAME
1099 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1105 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1100 subq $ORIG_RAX-R15, %rsp 1106 subq $ORIG_RAX-R15, %rsp
1101 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1107 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1102 call save_paranoid 1108 call save_paranoid
1103 TRACE_IRQS_OFF 1109 TRACE_IRQS_OFF
1104 movq %rsp,%rdi /* pt_regs pointer */ 1110 movq %rsp,%rdi /* pt_regs pointer */
1105 xorl %esi,%esi /* no error code */ 1111 xorl %esi,%esi /* no error code */
1106 call \do_sym 1112 call \do_sym
1107 jmp paranoid_exit /* %ebx: no swapgs flag */ 1113 jmp paranoid_exit /* %ebx: no swapgs flag */
1108 CFI_ENDPROC 1114 CFI_ENDPROC
1109 END(\sym) 1115 END(\sym)
1110 .endm 1116 .endm
1111 1117
1112 #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) 1118 #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
1113 .macro paranoidzeroentry_ist sym do_sym ist 1119 .macro paranoidzeroentry_ist sym do_sym ist
1114 ENTRY(\sym) 1120 ENTRY(\sym)
1121 ASM_CLAC
1115 INTR_FRAME 1122 INTR_FRAME
1116 PARAVIRT_ADJUST_EXCEPTION_FRAME 1123 PARAVIRT_ADJUST_EXCEPTION_FRAME
1117 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1124 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1118 subq $ORIG_RAX-R15, %rsp 1125 subq $ORIG_RAX-R15, %rsp
1119 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1126 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1120 call save_paranoid 1127 call save_paranoid
1121 TRACE_IRQS_OFF_DEBUG 1128 TRACE_IRQS_OFF_DEBUG
1122 movq %rsp,%rdi /* pt_regs pointer */ 1129 movq %rsp,%rdi /* pt_regs pointer */
1123 xorl %esi,%esi /* no error code */ 1130 xorl %esi,%esi /* no error code */
1124 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) 1131 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1125 call \do_sym 1132 call \do_sym
1126 addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) 1133 addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1127 jmp paranoid_exit /* %ebx: no swapgs flag */ 1134 jmp paranoid_exit /* %ebx: no swapgs flag */
1128 CFI_ENDPROC 1135 CFI_ENDPROC
1129 END(\sym) 1136 END(\sym)
1130 .endm 1137 .endm
1131 1138
1132 .macro errorentry sym do_sym 1139 .macro errorentry sym do_sym
1133 ENTRY(\sym) 1140 ENTRY(\sym)
1141 ASM_CLAC
1134 XCPT_FRAME 1142 XCPT_FRAME
1135 PARAVIRT_ADJUST_EXCEPTION_FRAME 1143 PARAVIRT_ADJUST_EXCEPTION_FRAME
1136 subq $ORIG_RAX-R15, %rsp 1144 subq $ORIG_RAX-R15, %rsp
1137 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1145 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1138 call error_entry 1146 call error_entry
1139 DEFAULT_FRAME 0 1147 DEFAULT_FRAME 0
1140 movq %rsp,%rdi /* pt_regs pointer */ 1148 movq %rsp,%rdi /* pt_regs pointer */
1141 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1149 movq ORIG_RAX(%rsp),%rsi /* get error code */
1142 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 1150 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1143 call \do_sym 1151 call \do_sym
1144 jmp error_exit /* %ebx: no swapgs flag */ 1152 jmp error_exit /* %ebx: no swapgs flag */
1145 CFI_ENDPROC 1153 CFI_ENDPROC
1146 END(\sym) 1154 END(\sym)
1147 .endm 1155 .endm
1148 1156
1149 /* error code is on the stack already */ 1157 /* error code is on the stack already */
1150 .macro paranoiderrorentry sym do_sym 1158 .macro paranoiderrorentry sym do_sym
1151 ENTRY(\sym) 1159 ENTRY(\sym)
1160 ASM_CLAC
1152 XCPT_FRAME 1161 XCPT_FRAME
1153 PARAVIRT_ADJUST_EXCEPTION_FRAME 1162 PARAVIRT_ADJUST_EXCEPTION_FRAME
1154 subq $ORIG_RAX-R15, %rsp 1163 subq $ORIG_RAX-R15, %rsp
1155 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1164 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1156 call save_paranoid 1165 call save_paranoid
1157 DEFAULT_FRAME 0 1166 DEFAULT_FRAME 0
1158 TRACE_IRQS_OFF 1167 TRACE_IRQS_OFF
1159 movq %rsp,%rdi /* pt_regs pointer */ 1168 movq %rsp,%rdi /* pt_regs pointer */
1160 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1169 movq ORIG_RAX(%rsp),%rsi /* get error code */
1161 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ 1170 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1162 call \do_sym 1171 call \do_sym
1163 jmp paranoid_exit /* %ebx: no swapgs flag */ 1172 jmp paranoid_exit /* %ebx: no swapgs flag */
1164 CFI_ENDPROC 1173 CFI_ENDPROC
1165 END(\sym) 1174 END(\sym)
1166 .endm 1175 .endm
1167 1176
1168 zeroentry divide_error do_divide_error 1177 zeroentry divide_error do_divide_error
1169 zeroentry overflow do_overflow 1178 zeroentry overflow do_overflow
1170 zeroentry bounds do_bounds 1179 zeroentry bounds do_bounds
1171 zeroentry invalid_op do_invalid_op 1180 zeroentry invalid_op do_invalid_op
1172 zeroentry device_not_available do_device_not_available 1181 zeroentry device_not_available do_device_not_available
1173 paranoiderrorentry double_fault do_double_fault 1182 paranoiderrorentry double_fault do_double_fault
1174 zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun 1183 zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1175 errorentry invalid_TSS do_invalid_TSS 1184 errorentry invalid_TSS do_invalid_TSS
1176 errorentry segment_not_present do_segment_not_present 1185 errorentry segment_not_present do_segment_not_present
1177 zeroentry spurious_interrupt_bug do_spurious_interrupt_bug 1186 zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1178 zeroentry coprocessor_error do_coprocessor_error 1187 zeroentry coprocessor_error do_coprocessor_error
1179 errorentry alignment_check do_alignment_check 1188 errorentry alignment_check do_alignment_check
1180 zeroentry simd_coprocessor_error do_simd_coprocessor_error 1189 zeroentry simd_coprocessor_error do_simd_coprocessor_error
1181 1190
1182 1191
1183 /* Reload gs selector with exception handling */ 1192 /* Reload gs selector with exception handling */
1184 /* edi: new selector */ 1193 /* edi: new selector */
1185 ENTRY(native_load_gs_index) 1194 ENTRY(native_load_gs_index)
1186 CFI_STARTPROC 1195 CFI_STARTPROC
1187 pushfq_cfi 1196 pushfq_cfi
1188 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) 1197 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1189 SWAPGS 1198 SWAPGS
1190 gs_change: 1199 gs_change:
1191 movl %edi,%gs 1200 movl %edi,%gs
1192 2: mfence /* workaround */ 1201 2: mfence /* workaround */
1193 SWAPGS 1202 SWAPGS
1194 popfq_cfi 1203 popfq_cfi
1195 ret 1204 ret
1196 CFI_ENDPROC 1205 CFI_ENDPROC
1197 END(native_load_gs_index) 1206 END(native_load_gs_index)
1198 1207
1199 _ASM_EXTABLE(gs_change,bad_gs) 1208 _ASM_EXTABLE(gs_change,bad_gs)
1200 .section .fixup,"ax" 1209 .section .fixup,"ax"
1201 /* running with kernelgs */ 1210 /* running with kernelgs */
1202 bad_gs: 1211 bad_gs:
1203 SWAPGS /* switch back to user gs */ 1212 SWAPGS /* switch back to user gs */
1204 xorl %eax,%eax 1213 xorl %eax,%eax
1205 movl %eax,%gs 1214 movl %eax,%gs
1206 jmp 2b 1215 jmp 2b
1207 .previous 1216 .previous
1208 1217
1209 ENTRY(kernel_thread_helper) 1218 ENTRY(kernel_thread_helper)
1210 pushq $0 # fake return address 1219 pushq $0 # fake return address
1211 CFI_STARTPROC 1220 CFI_STARTPROC
1212 /* 1221 /*
1213 * Here we are in the child and the registers are set as they were 1222 * Here we are in the child and the registers are set as they were
1214 * at kernel_thread() invocation in the parent. 1223 * at kernel_thread() invocation in the parent.
1215 */ 1224 */
1216 call *%rsi 1225 call *%rsi
1217 # exit 1226 # exit
1218 mov %eax, %edi 1227 mov %eax, %edi
1219 call do_exit 1228 call do_exit
1220 ud2 # padding for call trace 1229 ud2 # padding for call trace
1221 CFI_ENDPROC 1230 CFI_ENDPROC
1222 END(kernel_thread_helper) 1231 END(kernel_thread_helper)
1223 1232
1224 /* 1233 /*
1225 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1234 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1226 * 1235 *
1227 * C extern interface: 1236 * C extern interface:
1228 * extern long execve(const char *name, char **argv, char **envp) 1237 * extern long execve(const char *name, char **argv, char **envp)
1229 * 1238 *
1230 * asm input arguments: 1239 * asm input arguments:
1231 * rdi: name, rsi: argv, rdx: envp 1240 * rdi: name, rsi: argv, rdx: envp
1232 * 1241 *
1233 * We want to fallback into: 1242 * We want to fallback into:
1234 * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) 1243 * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
1235 * 1244 *
1236 * do_sys_execve asm fallback arguments: 1245 * do_sys_execve asm fallback arguments:
1237 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack 1246 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1238 */ 1247 */
1239 ENTRY(kernel_execve) 1248 ENTRY(kernel_execve)
1240 CFI_STARTPROC 1249 CFI_STARTPROC
1241 FAKE_STACK_FRAME $0 1250 FAKE_STACK_FRAME $0
1242 SAVE_ALL 1251 SAVE_ALL
1243 movq %rsp,%rcx 1252 movq %rsp,%rcx
1244 call sys_execve 1253 call sys_execve
1245 movq %rax, RAX(%rsp) 1254 movq %rax, RAX(%rsp)
1246 RESTORE_REST 1255 RESTORE_REST
1247 testq %rax,%rax 1256 testq %rax,%rax
1248 je int_ret_from_sys_call 1257 je int_ret_from_sys_call
1249 RESTORE_ARGS 1258 RESTORE_ARGS
1250 UNFAKE_STACK_FRAME 1259 UNFAKE_STACK_FRAME
1251 ret 1260 ret
1252 CFI_ENDPROC 1261 CFI_ENDPROC
1253 END(kernel_execve) 1262 END(kernel_execve)
1254 1263
1255 /* Call softirq on interrupt stack. Interrupts are off. */ 1264 /* Call softirq on interrupt stack. Interrupts are off. */
1256 ENTRY(call_softirq) 1265 ENTRY(call_softirq)
1257 CFI_STARTPROC 1266 CFI_STARTPROC
1258 pushq_cfi %rbp 1267 pushq_cfi %rbp
1259 CFI_REL_OFFSET rbp,0 1268 CFI_REL_OFFSET rbp,0
1260 mov %rsp,%rbp 1269 mov %rsp,%rbp
1261 CFI_DEF_CFA_REGISTER rbp 1270 CFI_DEF_CFA_REGISTER rbp
1262 incl PER_CPU_VAR(irq_count) 1271 incl PER_CPU_VAR(irq_count)
1263 cmove PER_CPU_VAR(irq_stack_ptr),%rsp 1272 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1264 push %rbp # backlink for old unwinder 1273 push %rbp # backlink for old unwinder
1265 call __do_softirq 1274 call __do_softirq
1266 leaveq 1275 leaveq
1267 CFI_RESTORE rbp 1276 CFI_RESTORE rbp
1268 CFI_DEF_CFA_REGISTER rsp 1277 CFI_DEF_CFA_REGISTER rsp
1269 CFI_ADJUST_CFA_OFFSET -8 1278 CFI_ADJUST_CFA_OFFSET -8
1270 decl PER_CPU_VAR(irq_count) 1279 decl PER_CPU_VAR(irq_count)
1271 ret 1280 ret
1272 CFI_ENDPROC 1281 CFI_ENDPROC
1273 END(call_softirq) 1282 END(call_softirq)
1274 1283
1275 #ifdef CONFIG_XEN 1284 #ifdef CONFIG_XEN
1276 zeroentry xen_hypervisor_callback xen_do_hypervisor_callback 1285 zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1277 1286
1278 /* 1287 /*
1279 * A note on the "critical region" in our callback handler. 1288 * A note on the "critical region" in our callback handler.
1280 * We want to avoid stacking callback handlers due to events occurring 1289 * We want to avoid stacking callback handlers due to events occurring
1281 * during handling of the last event. To do this, we keep events disabled 1290 * during handling of the last event. To do this, we keep events disabled
1282 * until we've done all processing. HOWEVER, we must enable events before 1291 * until we've done all processing. HOWEVER, we must enable events before
1283 * popping the stack frame (can't be done atomically) and so it would still 1292 * popping the stack frame (can't be done atomically) and so it would still
1284 * be possible to get enough handler activations to overflow the stack. 1293 * be possible to get enough handler activations to overflow the stack.
1285 * Although unlikely, bugs of that kind are hard to track down, so we'd 1294 * Although unlikely, bugs of that kind are hard to track down, so we'd
1286 * like to avoid the possibility. 1295 * like to avoid the possibility.
1287 * So, on entry to the handler we detect whether we interrupted an 1296 * So, on entry to the handler we detect whether we interrupted an
1288 * existing activation in its critical region -- if so, we pop the current 1297 * existing activation in its critical region -- if so, we pop the current
1289 * activation and restart the handler using the previous one. 1298 * activation and restart the handler using the previous one.
1290 */ 1299 */
1291 ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1300 ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1292 CFI_STARTPROC 1301 CFI_STARTPROC
1293 /* 1302 /*
1294 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1303 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1295 * see the correct pointer to the pt_regs 1304 * see the correct pointer to the pt_regs
1296 */ 1305 */
1297 movq %rdi, %rsp # we don't return, adjust the stack frame 1306 movq %rdi, %rsp # we don't return, adjust the stack frame
1298 CFI_ENDPROC 1307 CFI_ENDPROC
1299 DEFAULT_FRAME 1308 DEFAULT_FRAME
1300 11: incl PER_CPU_VAR(irq_count) 1309 11: incl PER_CPU_VAR(irq_count)
1301 movq %rsp,%rbp 1310 movq %rsp,%rbp
1302 CFI_DEF_CFA_REGISTER rbp 1311 CFI_DEF_CFA_REGISTER rbp
1303 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp 1312 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1304 pushq %rbp # backlink for old unwinder 1313 pushq %rbp # backlink for old unwinder
1305 call xen_evtchn_do_upcall 1314 call xen_evtchn_do_upcall
1306 popq %rsp 1315 popq %rsp
1307 CFI_DEF_CFA_REGISTER rsp 1316 CFI_DEF_CFA_REGISTER rsp
1308 decl PER_CPU_VAR(irq_count) 1317 decl PER_CPU_VAR(irq_count)
1309 jmp error_exit 1318 jmp error_exit
1310 CFI_ENDPROC 1319 CFI_ENDPROC
1311 END(xen_do_hypervisor_callback) 1320 END(xen_do_hypervisor_callback)
1312 1321
1313 /* 1322 /*
1314 * Hypervisor uses this for application faults while it executes. 1323 * Hypervisor uses this for application faults while it executes.
1315 * We get here for two reasons: 1324 * We get here for two reasons:
1316 * 1. Fault while reloading DS, ES, FS or GS 1325 * 1. Fault while reloading DS, ES, FS or GS
1317 * 2. Fault while executing IRET 1326 * 2. Fault while executing IRET
1318 * Category 1 we do not need to fix up as Xen has already reloaded all segment 1327 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1319 * registers that could be reloaded and zeroed the others. 1328 * registers that could be reloaded and zeroed the others.
1320 * Category 2 we fix up by killing the current process. We cannot use the 1329 * Category 2 we fix up by killing the current process. We cannot use the
1321 * normal Linux return path in this case because if we use the IRET hypercall 1330 * normal Linux return path in this case because if we use the IRET hypercall
1322 * to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1331 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1323 * We distinguish between categories by comparing each saved segment register 1332 * We distinguish between categories by comparing each saved segment register
1324 * with its current contents: any discrepancy means we in category 1. 1333 * with its current contents: any discrepancy means we in category 1.
1325 */ 1334 */
1326 ENTRY(xen_failsafe_callback) 1335 ENTRY(xen_failsafe_callback)
1327 INTR_FRAME 1 (6*8) 1336 INTR_FRAME 1 (6*8)
1328 /*CFI_REL_OFFSET gs,GS*/ 1337 /*CFI_REL_OFFSET gs,GS*/
1329 /*CFI_REL_OFFSET fs,FS*/ 1338 /*CFI_REL_OFFSET fs,FS*/
1330 /*CFI_REL_OFFSET es,ES*/ 1339 /*CFI_REL_OFFSET es,ES*/
1331 /*CFI_REL_OFFSET ds,DS*/ 1340 /*CFI_REL_OFFSET ds,DS*/
1332 CFI_REL_OFFSET r11,8 1341 CFI_REL_OFFSET r11,8
1333 CFI_REL_OFFSET rcx,0 1342 CFI_REL_OFFSET rcx,0
1334 movw %ds,%cx 1343 movw %ds,%cx
1335 cmpw %cx,0x10(%rsp) 1344 cmpw %cx,0x10(%rsp)
1336 CFI_REMEMBER_STATE 1345 CFI_REMEMBER_STATE
1337 jne 1f 1346 jne 1f
1338 movw %es,%cx 1347 movw %es,%cx
1339 cmpw %cx,0x18(%rsp) 1348 cmpw %cx,0x18(%rsp)
1340 jne 1f 1349 jne 1f
1341 movw %fs,%cx 1350 movw %fs,%cx
1342 cmpw %cx,0x20(%rsp) 1351 cmpw %cx,0x20(%rsp)
1343 jne 1f 1352 jne 1f
1344 movw %gs,%cx 1353 movw %gs,%cx
1345 cmpw %cx,0x28(%rsp) 1354 cmpw %cx,0x28(%rsp)
1346 jne 1f 1355 jne 1f
1347 /* All segments match their saved values => Category 2 (Bad IRET). */ 1356 /* All segments match their saved values => Category 2 (Bad IRET). */
1348 movq (%rsp),%rcx 1357 movq (%rsp),%rcx
1349 CFI_RESTORE rcx 1358 CFI_RESTORE rcx
1350 movq 8(%rsp),%r11 1359 movq 8(%rsp),%r11
1351 CFI_RESTORE r11 1360 CFI_RESTORE r11
1352 addq $0x30,%rsp 1361 addq $0x30,%rsp
1353 CFI_ADJUST_CFA_OFFSET -0x30 1362 CFI_ADJUST_CFA_OFFSET -0x30
1354 pushq_cfi $0 /* RIP */ 1363 pushq_cfi $0 /* RIP */
1355 pushq_cfi %r11 1364 pushq_cfi %r11
1356 pushq_cfi %rcx 1365 pushq_cfi %rcx
1357 jmp general_protection 1366 jmp general_protection
1358 CFI_RESTORE_STATE 1367 CFI_RESTORE_STATE
1359 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 1368 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1360 movq (%rsp),%rcx 1369 movq (%rsp),%rcx
1361 CFI_RESTORE rcx 1370 CFI_RESTORE rcx
1362 movq 8(%rsp),%r11 1371 movq 8(%rsp),%r11
1363 CFI_RESTORE r11 1372 CFI_RESTORE r11
1364 addq $0x30,%rsp 1373 addq $0x30,%rsp
1365 CFI_ADJUST_CFA_OFFSET -0x30 1374 CFI_ADJUST_CFA_OFFSET -0x30
1366 pushq_cfi $0 1375 pushq_cfi $0
1367 SAVE_ALL 1376 SAVE_ALL
1368 jmp error_exit 1377 jmp error_exit
1369 CFI_ENDPROC 1378 CFI_ENDPROC
1370 END(xen_failsafe_callback) 1379 END(xen_failsafe_callback)
1371 1380
1372 apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ 1381 apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
1373 xen_hvm_callback_vector xen_evtchn_do_upcall 1382 xen_hvm_callback_vector xen_evtchn_do_upcall
1374 1383
1375 #endif /* CONFIG_XEN */ 1384 #endif /* CONFIG_XEN */
1376 1385
1377 /* 1386 /*
1378 * Some functions should be protected against kprobes 1387 * Some functions should be protected against kprobes
1379 */ 1388 */
1380 .pushsection .kprobes.text, "ax" 1389 .pushsection .kprobes.text, "ax"
1381 1390
1382 paranoidzeroentry_ist debug do_debug DEBUG_STACK 1391 paranoidzeroentry_ist debug do_debug DEBUG_STACK
1383 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK 1392 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1384 paranoiderrorentry stack_segment do_stack_segment 1393 paranoiderrorentry stack_segment do_stack_segment
1385 #ifdef CONFIG_XEN 1394 #ifdef CONFIG_XEN
1386 zeroentry xen_debug do_debug 1395 zeroentry xen_debug do_debug
1387 zeroentry xen_int3 do_int3 1396 zeroentry xen_int3 do_int3
1388 errorentry xen_stack_segment do_stack_segment 1397 errorentry xen_stack_segment do_stack_segment
1389 #endif 1398 #endif
1390 errorentry general_protection do_general_protection 1399 errorentry general_protection do_general_protection
1391 errorentry page_fault do_page_fault 1400 errorentry page_fault do_page_fault
1392 #ifdef CONFIG_KVM_GUEST 1401 #ifdef CONFIG_KVM_GUEST
1393 errorentry async_page_fault do_async_page_fault 1402 errorentry async_page_fault do_async_page_fault
1394 #endif 1403 #endif
1395 #ifdef CONFIG_X86_MCE 1404 #ifdef CONFIG_X86_MCE
1396 paranoidzeroentry machine_check *machine_check_vector(%rip) 1405 paranoidzeroentry machine_check *machine_check_vector(%rip)
1397 #endif 1406 #endif
1398 1407
1399 /* 1408 /*
1400 * "Paranoid" exit path from exception stack. 1409 * "Paranoid" exit path from exception stack.
1401 * Paranoid because this is used by NMIs and cannot take 1410 * Paranoid because this is used by NMIs and cannot take
1402 * any kernel state for granted. 1411 * any kernel state for granted.
1403 * We don't do kernel preemption checks here, because only 1412 * We don't do kernel preemption checks here, because only
1404 * NMI should be common and it does not enable IRQs and 1413 * NMI should be common and it does not enable IRQs and
1405 * cannot get reschedule ticks. 1414 * cannot get reschedule ticks.
1406 * 1415 *
1407 * "trace" is 0 for the NMI handler only, because irq-tracing 1416 * "trace" is 0 for the NMI handler only, because irq-tracing
1408 * is fundamentally NMI-unsafe. (we cannot change the soft and 1417 * is fundamentally NMI-unsafe. (we cannot change the soft and
1409 * hard flags at once, atomically) 1418 * hard flags at once, atomically)
1410 */ 1419 */
1411 1420
1412 /* ebx: no swapgs flag */ 1421 /* ebx: no swapgs flag */
1413 ENTRY(paranoid_exit) 1422 ENTRY(paranoid_exit)
1414 DEFAULT_FRAME 1423 DEFAULT_FRAME
1415 DISABLE_INTERRUPTS(CLBR_NONE) 1424 DISABLE_INTERRUPTS(CLBR_NONE)
1416 TRACE_IRQS_OFF_DEBUG 1425 TRACE_IRQS_OFF_DEBUG
1417 testl %ebx,%ebx /* swapgs needed? */ 1426 testl %ebx,%ebx /* swapgs needed? */
1418 jnz paranoid_restore 1427 jnz paranoid_restore
1419 testl $3,CS(%rsp) 1428 testl $3,CS(%rsp)
1420 jnz paranoid_userspace 1429 jnz paranoid_userspace
1421 paranoid_swapgs: 1430 paranoid_swapgs:
1422 TRACE_IRQS_IRETQ 0 1431 TRACE_IRQS_IRETQ 0
1423 SWAPGS_UNSAFE_STACK 1432 SWAPGS_UNSAFE_STACK
1424 RESTORE_ALL 8 1433 RESTORE_ALL 8
1425 jmp irq_return 1434 jmp irq_return
1426 paranoid_restore: 1435 paranoid_restore:
1427 TRACE_IRQS_IRETQ_DEBUG 0 1436 TRACE_IRQS_IRETQ_DEBUG 0
1428 RESTORE_ALL 8 1437 RESTORE_ALL 8
1429 jmp irq_return 1438 jmp irq_return
1430 paranoid_userspace: 1439 paranoid_userspace:
1431 GET_THREAD_INFO(%rcx) 1440 GET_THREAD_INFO(%rcx)
1432 movl TI_flags(%rcx),%ebx 1441 movl TI_flags(%rcx),%ebx
1433 andl $_TIF_WORK_MASK,%ebx 1442 andl $_TIF_WORK_MASK,%ebx
1434 jz paranoid_swapgs 1443 jz paranoid_swapgs
1435 movq %rsp,%rdi /* &pt_regs */ 1444 movq %rsp,%rdi /* &pt_regs */
1436 call sync_regs 1445 call sync_regs
1437 movq %rax,%rsp /* switch stack for scheduling */ 1446 movq %rax,%rsp /* switch stack for scheduling */
1438 testl $_TIF_NEED_RESCHED,%ebx 1447 testl $_TIF_NEED_RESCHED,%ebx
1439 jnz paranoid_schedule 1448 jnz paranoid_schedule
1440 movl %ebx,%edx /* arg3: thread flags */ 1449 movl %ebx,%edx /* arg3: thread flags */
1441 TRACE_IRQS_ON 1450 TRACE_IRQS_ON
1442 ENABLE_INTERRUPTS(CLBR_NONE) 1451 ENABLE_INTERRUPTS(CLBR_NONE)
1443 xorl %esi,%esi /* arg2: oldset */ 1452 xorl %esi,%esi /* arg2: oldset */
1444 movq %rsp,%rdi /* arg1: &pt_regs */ 1453 movq %rsp,%rdi /* arg1: &pt_regs */
1445 call do_notify_resume 1454 call do_notify_resume
1446 DISABLE_INTERRUPTS(CLBR_NONE) 1455 DISABLE_INTERRUPTS(CLBR_NONE)
1447 TRACE_IRQS_OFF 1456 TRACE_IRQS_OFF
1448 jmp paranoid_userspace 1457 jmp paranoid_userspace
1449 paranoid_schedule: 1458 paranoid_schedule:
1450 TRACE_IRQS_ON 1459 TRACE_IRQS_ON
1451 ENABLE_INTERRUPTS(CLBR_ANY) 1460 ENABLE_INTERRUPTS(CLBR_ANY)
1452 call schedule 1461 call schedule
1453 DISABLE_INTERRUPTS(CLBR_ANY) 1462 DISABLE_INTERRUPTS(CLBR_ANY)
1454 TRACE_IRQS_OFF 1463 TRACE_IRQS_OFF
1455 jmp paranoid_userspace 1464 jmp paranoid_userspace
1456 CFI_ENDPROC 1465 CFI_ENDPROC
1457 END(paranoid_exit) 1466 END(paranoid_exit)
1458 1467
1459 /* 1468 /*
1460 * Exception entry point. This expects an error code/orig_rax on the stack. 1469 * Exception entry point. This expects an error code/orig_rax on the stack.
1461 * returns in "no swapgs flag" in %ebx. 1470 * returns in "no swapgs flag" in %ebx.
1462 */ 1471 */
1463 ENTRY(error_entry) 1472 ENTRY(error_entry)
1464 XCPT_FRAME 1473 XCPT_FRAME
1465 CFI_ADJUST_CFA_OFFSET 15*8 1474 CFI_ADJUST_CFA_OFFSET 15*8
1466 /* oldrax contains error code */ 1475 /* oldrax contains error code */
1467 cld 1476 cld
1468 movq_cfi rdi, RDI+8 1477 movq_cfi rdi, RDI+8
1469 movq_cfi rsi, RSI+8 1478 movq_cfi rsi, RSI+8
1470 movq_cfi rdx, RDX+8 1479 movq_cfi rdx, RDX+8
1471 movq_cfi rcx, RCX+8 1480 movq_cfi rcx, RCX+8
1472 movq_cfi rax, RAX+8 1481 movq_cfi rax, RAX+8
1473 movq_cfi r8, R8+8 1482 movq_cfi r8, R8+8
1474 movq_cfi r9, R9+8 1483 movq_cfi r9, R9+8
1475 movq_cfi r10, R10+8 1484 movq_cfi r10, R10+8
1476 movq_cfi r11, R11+8 1485 movq_cfi r11, R11+8
1477 movq_cfi rbx, RBX+8 1486 movq_cfi rbx, RBX+8
1478 movq_cfi rbp, RBP+8 1487 movq_cfi rbp, RBP+8
1479 movq_cfi r12, R12+8 1488 movq_cfi r12, R12+8
1480 movq_cfi r13, R13+8 1489 movq_cfi r13, R13+8
1481 movq_cfi r14, R14+8 1490 movq_cfi r14, R14+8
1482 movq_cfi r15, R15+8 1491 movq_cfi r15, R15+8
1483 xorl %ebx,%ebx 1492 xorl %ebx,%ebx
1484 testl $3,CS+8(%rsp) 1493 testl $3,CS+8(%rsp)
1485 je error_kernelspace 1494 je error_kernelspace
1486 error_swapgs: 1495 error_swapgs:
1487 SWAPGS 1496 SWAPGS
1488 error_sti: 1497 error_sti:
1489 TRACE_IRQS_OFF 1498 TRACE_IRQS_OFF
1490 ret 1499 ret
1491 1500
1492 /* 1501 /*
1493 * There are two places in the kernel that can potentially fault with 1502 * There are two places in the kernel that can potentially fault with
1494 * usergs. Handle them here. The exception handlers after iret run with 1503 * usergs. Handle them here. The exception handlers after iret run with
1495 * kernel gs again, so don't set the user space flag. B stepping K8s 1504 * kernel gs again, so don't set the user space flag. B stepping K8s
1496 * sometimes report an truncated RIP for IRET exceptions returning to 1505 * sometimes report an truncated RIP for IRET exceptions returning to
1497 * compat mode. Check for these here too. 1506 * compat mode. Check for these here too.
1498 */ 1507 */
1499 error_kernelspace: 1508 error_kernelspace:
1500 incl %ebx 1509 incl %ebx
1501 leaq irq_return(%rip),%rcx 1510 leaq irq_return(%rip),%rcx
1502 cmpq %rcx,RIP+8(%rsp) 1511 cmpq %rcx,RIP+8(%rsp)
1503 je error_swapgs 1512 je error_swapgs
1504 movl %ecx,%eax /* zero extend */ 1513 movl %ecx,%eax /* zero extend */
1505 cmpq %rax,RIP+8(%rsp) 1514 cmpq %rax,RIP+8(%rsp)
1506 je bstep_iret 1515 je bstep_iret
1507 cmpq $gs_change,RIP+8(%rsp) 1516 cmpq $gs_change,RIP+8(%rsp)
1508 je error_swapgs 1517 je error_swapgs
1509 jmp error_sti 1518 jmp error_sti
1510 1519
1511 bstep_iret: 1520 bstep_iret:
1512 /* Fix truncated RIP */ 1521 /* Fix truncated RIP */
1513 movq %rcx,RIP+8(%rsp) 1522 movq %rcx,RIP+8(%rsp)
1514 jmp error_swapgs 1523 jmp error_swapgs
1515 CFI_ENDPROC 1524 CFI_ENDPROC
1516 END(error_entry) 1525 END(error_entry)
1517 1526
1518 1527
1519 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ 1528 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1520 ENTRY(error_exit) 1529 ENTRY(error_exit)
1521 DEFAULT_FRAME 1530 DEFAULT_FRAME
1522 movl %ebx,%eax 1531 movl %ebx,%eax
1523 RESTORE_REST 1532 RESTORE_REST
1524 DISABLE_INTERRUPTS(CLBR_NONE) 1533 DISABLE_INTERRUPTS(CLBR_NONE)
1525 TRACE_IRQS_OFF 1534 TRACE_IRQS_OFF
1526 GET_THREAD_INFO(%rcx) 1535 GET_THREAD_INFO(%rcx)
1527 testl %eax,%eax 1536 testl %eax,%eax
1528 jne retint_kernel 1537 jne retint_kernel
1529 LOCKDEP_SYS_EXIT_IRQ 1538 LOCKDEP_SYS_EXIT_IRQ
1530 movl TI_flags(%rcx),%edx 1539 movl TI_flags(%rcx),%edx
1531 movl $_TIF_WORK_MASK,%edi 1540 movl $_TIF_WORK_MASK,%edi
1532 andl %edi,%edx 1541 andl %edi,%edx
1533 jnz retint_careful 1542 jnz retint_careful
1534 jmp retint_swapgs 1543 jmp retint_swapgs
1535 CFI_ENDPROC 1544 CFI_ENDPROC
1536 END(error_exit) 1545 END(error_exit)
1537 1546
1538 /* 1547 /*
1539 * Test if a given stack is an NMI stack or not. 1548 * Test if a given stack is an NMI stack or not.
1540 */ 1549 */
1541 .macro test_in_nmi reg stack nmi_ret normal_ret 1550 .macro test_in_nmi reg stack nmi_ret normal_ret
1542 cmpq %\reg, \stack 1551 cmpq %\reg, \stack
1543 ja \normal_ret 1552 ja \normal_ret
1544 subq $EXCEPTION_STKSZ, %\reg 1553 subq $EXCEPTION_STKSZ, %\reg
1545 cmpq %\reg, \stack 1554 cmpq %\reg, \stack
1546 jb \normal_ret 1555 jb \normal_ret
1547 jmp \nmi_ret 1556 jmp \nmi_ret
1548 .endm 1557 .endm
1549 1558
1550 /* runs on exception stack */ 1559 /* runs on exception stack */
1551 ENTRY(nmi) 1560 ENTRY(nmi)
1552 INTR_FRAME 1561 INTR_FRAME
1553 PARAVIRT_ADJUST_EXCEPTION_FRAME 1562 PARAVIRT_ADJUST_EXCEPTION_FRAME
1554 /* 1563 /*
1555 * We allow breakpoints in NMIs. If a breakpoint occurs, then 1564 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1556 * the iretq it performs will take us out of NMI context. 1565 * the iretq it performs will take us out of NMI context.
1557 * This means that we can have nested NMIs where the next 1566 * This means that we can have nested NMIs where the next
1558 * NMI is using the top of the stack of the previous NMI. We 1567 * NMI is using the top of the stack of the previous NMI. We
1559 * can't let it execute because the nested NMI will corrupt the 1568 * can't let it execute because the nested NMI will corrupt the
1560 * stack of the previous NMI. NMI handlers are not re-entrant 1569 * stack of the previous NMI. NMI handlers are not re-entrant
1561 * anyway. 1570 * anyway.
1562 * 1571 *
1563 * To handle this case we do the following: 1572 * To handle this case we do the following:
1564 * Check the a special location on the stack that contains 1573 * Check the a special location on the stack that contains
1565 * a variable that is set when NMIs are executing. 1574 * a variable that is set when NMIs are executing.
1566 * The interrupted task's stack is also checked to see if it 1575 * The interrupted task's stack is also checked to see if it
1567 * is an NMI stack. 1576 * is an NMI stack.
1568 * If the variable is not set and the stack is not the NMI 1577 * If the variable is not set and the stack is not the NMI
1569 * stack then: 1578 * stack then:
1570 * o Set the special variable on the stack 1579 * o Set the special variable on the stack
1571 * o Copy the interrupt frame into a "saved" location on the stack 1580 * o Copy the interrupt frame into a "saved" location on the stack
1572 * o Copy the interrupt frame into a "copy" location on the stack 1581 * o Copy the interrupt frame into a "copy" location on the stack
1573 * o Continue processing the NMI 1582 * o Continue processing the NMI
1574 * If the variable is set or the previous stack is the NMI stack: 1583 * If the variable is set or the previous stack is the NMI stack:
1575 * o Modify the "copy" location to jump to the repeate_nmi 1584 * o Modify the "copy" location to jump to the repeate_nmi
1576 * o return back to the first NMI 1585 * o return back to the first NMI
1577 * 1586 *
1578 * Now on exit of the first NMI, we first clear the stack variable 1587 * Now on exit of the first NMI, we first clear the stack variable
1579 * The NMI stack will tell any nested NMIs at that point that it is 1588 * The NMI stack will tell any nested NMIs at that point that it is
1580 * nested. Then we pop the stack normally with iret, and if there was 1589 * nested. Then we pop the stack normally with iret, and if there was
1581 * a nested NMI that updated the copy interrupt stack frame, a 1590 * a nested NMI that updated the copy interrupt stack frame, a
1582 * jump will be made to the repeat_nmi code that will handle the second 1591 * jump will be made to the repeat_nmi code that will handle the second
1583 * NMI. 1592 * NMI.
1584 */ 1593 */
1585 1594
1586 /* Use %rdx as out temp variable throughout */ 1595 /* Use %rdx as out temp variable throughout */
1587 pushq_cfi %rdx 1596 pushq_cfi %rdx
1588 CFI_REL_OFFSET rdx, 0 1597 CFI_REL_OFFSET rdx, 0
1589 1598
1590 /* 1599 /*
1591 * If %cs was not the kernel segment, then the NMI triggered in user 1600 * If %cs was not the kernel segment, then the NMI triggered in user
1592 * space, which means it is definitely not nested. 1601 * space, which means it is definitely not nested.
1593 */ 1602 */
1594 cmpl $__KERNEL_CS, 16(%rsp) 1603 cmpl $__KERNEL_CS, 16(%rsp)
1595 jne first_nmi 1604 jne first_nmi
1596 1605
1597 /* 1606 /*
1598 * Check the special variable on the stack to see if NMIs are 1607 * Check the special variable on the stack to see if NMIs are
1599 * executing. 1608 * executing.
1600 */ 1609 */
1601 cmpl $1, -8(%rsp) 1610 cmpl $1, -8(%rsp)
1602 je nested_nmi 1611 je nested_nmi
1603 1612
1604 /* 1613 /*
1605 * Now test if the previous stack was an NMI stack. 1614 * Now test if the previous stack was an NMI stack.
1606 * We need the double check. We check the NMI stack to satisfy the 1615 * We need the double check. We check the NMI stack to satisfy the
1607 * race when the first NMI clears the variable before returning. 1616 * race when the first NMI clears the variable before returning.
1608 * We check the variable because the first NMI could be in a 1617 * We check the variable because the first NMI could be in a
1609 * breakpoint routine using a breakpoint stack. 1618 * breakpoint routine using a breakpoint stack.
1610 */ 1619 */
1611 lea 6*8(%rsp), %rdx 1620 lea 6*8(%rsp), %rdx
1612 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi 1621 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
1613 CFI_REMEMBER_STATE 1622 CFI_REMEMBER_STATE
1614 1623
1615 nested_nmi: 1624 nested_nmi:
1616 /* 1625 /*
1617 * Do nothing if we interrupted the fixup in repeat_nmi. 1626 * Do nothing if we interrupted the fixup in repeat_nmi.
1618 * It's about to repeat the NMI handler, so we are fine 1627 * It's about to repeat the NMI handler, so we are fine
1619 * with ignoring this one. 1628 * with ignoring this one.
1620 */ 1629 */
1621 movq $repeat_nmi, %rdx 1630 movq $repeat_nmi, %rdx
1622 cmpq 8(%rsp), %rdx 1631 cmpq 8(%rsp), %rdx
1623 ja 1f 1632 ja 1f
1624 movq $end_repeat_nmi, %rdx 1633 movq $end_repeat_nmi, %rdx
1625 cmpq 8(%rsp), %rdx 1634 cmpq 8(%rsp), %rdx
1626 ja nested_nmi_out 1635 ja nested_nmi_out
1627 1636
1628 1: 1637 1:
1629 /* Set up the interrupted NMIs stack to jump to repeat_nmi */ 1638 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1630 leaq -6*8(%rsp), %rdx 1639 leaq -6*8(%rsp), %rdx
1631 movq %rdx, %rsp 1640 movq %rdx, %rsp
1632 CFI_ADJUST_CFA_OFFSET 6*8 1641 CFI_ADJUST_CFA_OFFSET 6*8
1633 pushq_cfi $__KERNEL_DS 1642 pushq_cfi $__KERNEL_DS
1634 pushq_cfi %rdx 1643 pushq_cfi %rdx
1635 pushfq_cfi 1644 pushfq_cfi
1636 pushq_cfi $__KERNEL_CS 1645 pushq_cfi $__KERNEL_CS
1637 pushq_cfi $repeat_nmi 1646 pushq_cfi $repeat_nmi
1638 1647
1639 /* Put stack back */ 1648 /* Put stack back */
1640 addq $(11*8), %rsp 1649 addq $(11*8), %rsp
1641 CFI_ADJUST_CFA_OFFSET -11*8 1650 CFI_ADJUST_CFA_OFFSET -11*8
1642 1651
1643 nested_nmi_out: 1652 nested_nmi_out:
1644 popq_cfi %rdx 1653 popq_cfi %rdx
1645 CFI_RESTORE rdx 1654 CFI_RESTORE rdx
1646 1655
1647 /* No need to check faults here */ 1656 /* No need to check faults here */
1648 INTERRUPT_RETURN 1657 INTERRUPT_RETURN
1649 1658
1650 CFI_RESTORE_STATE 1659 CFI_RESTORE_STATE
1651 first_nmi: 1660 first_nmi:
1652 /* 1661 /*
1653 * Because nested NMIs will use the pushed location that we 1662 * Because nested NMIs will use the pushed location that we
1654 * stored in rdx, we must keep that space available. 1663 * stored in rdx, we must keep that space available.
1655 * Here's what our stack frame will look like: 1664 * Here's what our stack frame will look like:
1656 * +-------------------------+ 1665 * +-------------------------+
1657 * | original SS | 1666 * | original SS |
1658 * | original Return RSP | 1667 * | original Return RSP |
1659 * | original RFLAGS | 1668 * | original RFLAGS |
1660 * | original CS | 1669 * | original CS |
1661 * | original RIP | 1670 * | original RIP |
1662 * +-------------------------+ 1671 * +-------------------------+
1663 * | temp storage for rdx | 1672 * | temp storage for rdx |
1664 * +-------------------------+ 1673 * +-------------------------+
1665 * | NMI executing variable | 1674 * | NMI executing variable |
1666 * +-------------------------+ 1675 * +-------------------------+
1667 * | Saved SS | 1676 * | Saved SS |
1668 * | Saved Return RSP | 1677 * | Saved Return RSP |
1669 * | Saved RFLAGS | 1678 * | Saved RFLAGS |
1670 * | Saved CS | 1679 * | Saved CS |
1671 * | Saved RIP | 1680 * | Saved RIP |
1672 * +-------------------------+ 1681 * +-------------------------+
1673 * | copied SS | 1682 * | copied SS |
1674 * | copied Return RSP | 1683 * | copied Return RSP |
1675 * | copied RFLAGS | 1684 * | copied RFLAGS |
1676 * | copied CS | 1685 * | copied CS |
1677 * | copied RIP | 1686 * | copied RIP |
1678 * +-------------------------+ 1687 * +-------------------------+
1679 * | pt_regs | 1688 * | pt_regs |
1680 * +-------------------------+ 1689 * +-------------------------+
1681 * 1690 *
1682 * The saved stack frame is used to fix up the copied stack frame 1691 * The saved stack frame is used to fix up the copied stack frame
1683 * that a nested NMI may change to make the interrupted NMI iret jump 1692 * that a nested NMI may change to make the interrupted NMI iret jump
1684 * to the repeat_nmi. The original stack frame and the temp storage 1693 * to the repeat_nmi. The original stack frame and the temp storage
1685 * is also used by nested NMIs and can not be trusted on exit. 1694 * is also used by nested NMIs and can not be trusted on exit.
1686 */ 1695 */
1687 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ 1696 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
1688 movq (%rsp), %rdx 1697 movq (%rsp), %rdx
1689 CFI_RESTORE rdx 1698 CFI_RESTORE rdx
1690 1699
1691 /* Set the NMI executing variable on the stack. */ 1700 /* Set the NMI executing variable on the stack. */
1692 pushq_cfi $1 1701 pushq_cfi $1
1693 1702
1694 /* Copy the stack frame to the Saved frame */ 1703 /* Copy the stack frame to the Saved frame */
1695 .rept 5 1704 .rept 5
1696 pushq_cfi 6*8(%rsp) 1705 pushq_cfi 6*8(%rsp)
1697 .endr 1706 .endr
1698 CFI_DEF_CFA_OFFSET SS+8-RIP 1707 CFI_DEF_CFA_OFFSET SS+8-RIP
1699 1708
1700 /* Everything up to here is safe from nested NMIs */ 1709 /* Everything up to here is safe from nested NMIs */
1701 1710
1702 /* 1711 /*
1703 * If there was a nested NMI, the first NMI's iret will return 1712 * If there was a nested NMI, the first NMI's iret will return
1704 * here. But NMIs are still enabled and we can take another 1713 * here. But NMIs are still enabled and we can take another
1705 * nested NMI. The nested NMI checks the interrupted RIP to see 1714 * nested NMI. The nested NMI checks the interrupted RIP to see
1706 * if it is between repeat_nmi and end_repeat_nmi, and if so 1715 * if it is between repeat_nmi and end_repeat_nmi, and if so
1707 * it will just return, as we are about to repeat an NMI anyway. 1716 * it will just return, as we are about to repeat an NMI anyway.
1708 * This makes it safe to copy to the stack frame that a nested 1717 * This makes it safe to copy to the stack frame that a nested
1709 * NMI will update. 1718 * NMI will update.
1710 */ 1719 */
1711 repeat_nmi: 1720 repeat_nmi:
1712 /* 1721 /*
1713 * Update the stack variable to say we are still in NMI (the update 1722 * Update the stack variable to say we are still in NMI (the update
1714 * is benign for the non-repeat case, where 1 was pushed just above 1723 * is benign for the non-repeat case, where 1 was pushed just above
1715 * to this very stack slot). 1724 * to this very stack slot).
1716 */ 1725 */
1717 movq $1, 5*8(%rsp) 1726 movq $1, 5*8(%rsp)
1718 1727
1719 /* Make another copy, this one may be modified by nested NMIs */ 1728 /* Make another copy, this one may be modified by nested NMIs */
1720 .rept 5 1729 .rept 5
1721 pushq_cfi 4*8(%rsp) 1730 pushq_cfi 4*8(%rsp)
1722 .endr 1731 .endr
1723 CFI_DEF_CFA_OFFSET SS+8-RIP 1732 CFI_DEF_CFA_OFFSET SS+8-RIP
1724 end_repeat_nmi: 1733 end_repeat_nmi:
1725 1734
1726 /* 1735 /*
1727 * Everything below this point can be preempted by a nested 1736 * Everything below this point can be preempted by a nested
1728 * NMI if the first NMI took an exception and reset our iret stack 1737 * NMI if the first NMI took an exception and reset our iret stack
1729 * so that we repeat another NMI. 1738 * so that we repeat another NMI.
1730 */ 1739 */
1731 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1740 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1732 subq $ORIG_RAX-R15, %rsp 1741 subq $ORIG_RAX-R15, %rsp
1733 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1742 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1734 /* 1743 /*
1735 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit 1744 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
1736 * as we should not be calling schedule in NMI context. 1745 * as we should not be calling schedule in NMI context.
1737 * Even with normal interrupts enabled. An NMI should not be 1746 * Even with normal interrupts enabled. An NMI should not be
1738 * setting NEED_RESCHED or anything that normal interrupts and 1747 * setting NEED_RESCHED or anything that normal interrupts and
1739 * exceptions might do. 1748 * exceptions might do.
1740 */ 1749 */
1741 call save_paranoid 1750 call save_paranoid
1742 DEFAULT_FRAME 0 1751 DEFAULT_FRAME 0
1743 1752
1744 /* 1753 /*
1745 * Save off the CR2 register. If we take a page fault in the NMI then 1754 * Save off the CR2 register. If we take a page fault in the NMI then
1746 * it could corrupt the CR2 value. If the NMI preempts a page fault 1755 * it could corrupt the CR2 value. If the NMI preempts a page fault
1747 * handler before it was able to read the CR2 register, and then the 1756 * handler before it was able to read the CR2 register, and then the
1748 * NMI itself takes a page fault, the page fault that was preempted 1757 * NMI itself takes a page fault, the page fault that was preempted
1749 * will read the information from the NMI page fault and not the 1758 * will read the information from the NMI page fault and not the
1750 * origin fault. Save it off and restore it if it changes. 1759 * origin fault. Save it off and restore it if it changes.
1751 * Use the r12 callee-saved register. 1760 * Use the r12 callee-saved register.
1752 */ 1761 */
1753 movq %cr2, %r12 1762 movq %cr2, %r12
1754 1763
1755 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1764 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1756 movq %rsp,%rdi 1765 movq %rsp,%rdi
1757 movq $-1,%rsi 1766 movq $-1,%rsi
1758 call do_nmi 1767 call do_nmi
1759 1768
1760 /* Did the NMI take a page fault? Restore cr2 if it did */ 1769 /* Did the NMI take a page fault? Restore cr2 if it did */
1761 movq %cr2, %rcx 1770 movq %cr2, %rcx
1762 cmpq %rcx, %r12 1771 cmpq %rcx, %r12
1763 je 1f 1772 je 1f
1764 movq %r12, %cr2 1773 movq %r12, %cr2
1765 1: 1774 1:
1766 1775
1767 testl %ebx,%ebx /* swapgs needed? */ 1776 testl %ebx,%ebx /* swapgs needed? */
1768 jnz nmi_restore 1777 jnz nmi_restore
1769 nmi_swapgs: 1778 nmi_swapgs:
1770 SWAPGS_UNSAFE_STACK 1779 SWAPGS_UNSAFE_STACK
1771 nmi_restore: 1780 nmi_restore:
1772 RESTORE_ALL 8 1781 RESTORE_ALL 8
1773 /* Clear the NMI executing stack variable */ 1782 /* Clear the NMI executing stack variable */
1774 movq $0, 10*8(%rsp) 1783 movq $0, 10*8(%rsp)
1775 jmp irq_return 1784 jmp irq_return
1776 CFI_ENDPROC 1785 CFI_ENDPROC
1777 END(nmi) 1786 END(nmi)
1778 1787
1779 ENTRY(ignore_sysret) 1788 ENTRY(ignore_sysret)
1780 CFI_STARTPROC 1789 CFI_STARTPROC
1781 mov $-ENOSYS,%eax 1790 mov $-ENOSYS,%eax
1782 sysret 1791 sysret
1783 CFI_ENDPROC 1792 CFI_ENDPROC
1784 END(ignore_sysret) 1793 END(ignore_sysret)
1785 1794
1786 /* 1795 /*
1787 * End of kprobes section 1796 * End of kprobes section
1788 */ 1797 */
1789 .popsection 1798 .popsection
1790 1799
arch/x86/lib/copy_user_64.S
1 /* 1 /*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs. 3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * Subject to the GNU Public License v2. 4 * Subject to the GNU Public License v2.
5 * 5 *
6 * Functions to copy from and to user space. 6 * Functions to copy from and to user space.
7 */ 7 */
8 8
9 #include <linux/linkage.h> 9 #include <linux/linkage.h>
10 #include <asm/dwarf2.h> 10 #include <asm/dwarf2.h>
11 11
12 #define FIX_ALIGNMENT 1 12 #define FIX_ALIGNMENT 1
13 13
14 #include <asm/current.h> 14 #include <asm/current.h>
15 #include <asm/asm-offsets.h> 15 #include <asm/asm-offsets.h>
16 #include <asm/thread_info.h> 16 #include <asm/thread_info.h>
17 #include <asm/cpufeature.h> 17 #include <asm/cpufeature.h>
18 #include <asm/alternative-asm.h> 18 #include <asm/alternative-asm.h>
19 #include <asm/asm.h> 19 #include <asm/asm.h>
20 #include <asm/smap.h>
20 21
21 /* 22 /*
22 * By placing feature2 after feature1 in altinstructions section, we logically 23 * By placing feature2 after feature1 in altinstructions section, we logically
23 * implement: 24 * implement:
24 * If CPU has feature2, jmp to alt2 is used 25 * If CPU has feature2, jmp to alt2 is used
25 * else if CPU has feature1, jmp to alt1 is used 26 * else if CPU has feature1, jmp to alt1 is used
26 * else jmp to orig is used. 27 * else jmp to orig is used.
27 */ 28 */
28 .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 29 .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
29 0: 30 0:
30 .byte 0xe9 /* 32bit jump */ 31 .byte 0xe9 /* 32bit jump */
31 .long \orig-1f /* by default jump to orig */ 32 .long \orig-1f /* by default jump to orig */
32 1: 33 1:
33 .section .altinstr_replacement,"ax" 34 .section .altinstr_replacement,"ax"
34 2: .byte 0xe9 /* near jump with 32bit immediate */ 35 2: .byte 0xe9 /* near jump with 32bit immediate */
35 .long \alt1-1b /* offset */ /* or alternatively to alt1 */ 36 .long \alt1-1b /* offset */ /* or alternatively to alt1 */
36 3: .byte 0xe9 /* near jump with 32bit immediate */ 37 3: .byte 0xe9 /* near jump with 32bit immediate */
37 .long \alt2-1b /* offset */ /* or alternatively to alt2 */ 38 .long \alt2-1b /* offset */ /* or alternatively to alt2 */
38 .previous 39 .previous
39 40
40 .section .altinstructions,"a" 41 .section .altinstructions,"a"
41 altinstruction_entry 0b,2b,\feature1,5,5 42 altinstruction_entry 0b,2b,\feature1,5,5
42 altinstruction_entry 0b,3b,\feature2,5,5 43 altinstruction_entry 0b,3b,\feature2,5,5
43 .previous 44 .previous
44 .endm 45 .endm
45 46
46 .macro ALIGN_DESTINATION 47 .macro ALIGN_DESTINATION
47 #ifdef FIX_ALIGNMENT 48 #ifdef FIX_ALIGNMENT
48 /* check for bad alignment of destination */ 49 /* check for bad alignment of destination */
49 movl %edi,%ecx 50 movl %edi,%ecx
50 andl $7,%ecx 51 andl $7,%ecx
51 jz 102f /* already aligned */ 52 jz 102f /* already aligned */
52 subl $8,%ecx 53 subl $8,%ecx
53 negl %ecx 54 negl %ecx
54 subl %ecx,%edx 55 subl %ecx,%edx
55 100: movb (%rsi),%al 56 100: movb (%rsi),%al
56 101: movb %al,(%rdi) 57 101: movb %al,(%rdi)
57 incq %rsi 58 incq %rsi
58 incq %rdi 59 incq %rdi
59 decl %ecx 60 decl %ecx
60 jnz 100b 61 jnz 100b
61 102: 62 102:
62 .section .fixup,"ax" 63 .section .fixup,"ax"
63 103: addl %ecx,%edx /* ecx is zerorest also */ 64 103: addl %ecx,%edx /* ecx is zerorest also */
64 jmp copy_user_handle_tail 65 jmp copy_user_handle_tail
65 .previous 66 .previous
66 67
67 _ASM_EXTABLE(100b,103b) 68 _ASM_EXTABLE(100b,103b)
68 _ASM_EXTABLE(101b,103b) 69 _ASM_EXTABLE(101b,103b)
69 #endif 70 #endif
70 .endm 71 .endm
71 72
72 /* Standard copy_to_user with segment limit checking */ 73 /* Standard copy_to_user with segment limit checking */
73 ENTRY(_copy_to_user) 74 ENTRY(_copy_to_user)
74 CFI_STARTPROC 75 CFI_STARTPROC
75 GET_THREAD_INFO(%rax) 76 GET_THREAD_INFO(%rax)
76 movq %rdi,%rcx 77 movq %rdi,%rcx
77 addq %rdx,%rcx 78 addq %rdx,%rcx
78 jc bad_to_user 79 jc bad_to_user
79 cmpq TI_addr_limit(%rax),%rcx 80 cmpq TI_addr_limit(%rax),%rcx
80 ja bad_to_user 81 ja bad_to_user
81 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ 82 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
82 copy_user_generic_unrolled,copy_user_generic_string, \ 83 copy_user_generic_unrolled,copy_user_generic_string, \
83 copy_user_enhanced_fast_string 84 copy_user_enhanced_fast_string
84 CFI_ENDPROC 85 CFI_ENDPROC
85 ENDPROC(_copy_to_user) 86 ENDPROC(_copy_to_user)
86 87
87 /* Standard copy_from_user with segment limit checking */ 88 /* Standard copy_from_user with segment limit checking */
88 ENTRY(_copy_from_user) 89 ENTRY(_copy_from_user)
89 CFI_STARTPROC 90 CFI_STARTPROC
90 GET_THREAD_INFO(%rax) 91 GET_THREAD_INFO(%rax)
91 movq %rsi,%rcx 92 movq %rsi,%rcx
92 addq %rdx,%rcx 93 addq %rdx,%rcx
93 jc bad_from_user 94 jc bad_from_user
94 cmpq TI_addr_limit(%rax),%rcx 95 cmpq TI_addr_limit(%rax),%rcx
95 ja bad_from_user 96 ja bad_from_user
96 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ 97 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
97 copy_user_generic_unrolled,copy_user_generic_string, \ 98 copy_user_generic_unrolled,copy_user_generic_string, \
98 copy_user_enhanced_fast_string 99 copy_user_enhanced_fast_string
99 CFI_ENDPROC 100 CFI_ENDPROC
100 ENDPROC(_copy_from_user) 101 ENDPROC(_copy_from_user)
101 102
102 .section .fixup,"ax" 103 .section .fixup,"ax"
103 /* must zero dest */ 104 /* must zero dest */
104 ENTRY(bad_from_user) 105 ENTRY(bad_from_user)
105 bad_from_user: 106 bad_from_user:
106 CFI_STARTPROC 107 CFI_STARTPROC
107 movl %edx,%ecx 108 movl %edx,%ecx
108 xorl %eax,%eax 109 xorl %eax,%eax
109 rep 110 rep
110 stosb 111 stosb
111 bad_to_user: 112 bad_to_user:
112 movl %edx,%eax 113 movl %edx,%eax
113 ret 114 ret
114 CFI_ENDPROC 115 CFI_ENDPROC
115 ENDPROC(bad_from_user) 116 ENDPROC(bad_from_user)
116 .previous 117 .previous
117 118
118 /* 119 /*
119 * copy_user_generic_unrolled - memory copy with exception handling. 120 * copy_user_generic_unrolled - memory copy with exception handling.
120 * This version is for CPUs like P4 that don't have efficient micro 121 * This version is for CPUs like P4 that don't have efficient micro
121 * code for rep movsq 122 * code for rep movsq
122 * 123 *
123 * Input: 124 * Input:
124 * rdi destination 125 * rdi destination
125 * rsi source 126 * rsi source
126 * rdx count 127 * rdx count
127 * 128 *
128 * Output: 129 * Output:
129 * eax uncopied bytes or 0 if successful. 130 * eax uncopied bytes or 0 if successful.
130 */ 131 */
131 ENTRY(copy_user_generic_unrolled) 132 ENTRY(copy_user_generic_unrolled)
132 CFI_STARTPROC 133 CFI_STARTPROC
134 ASM_STAC
133 cmpl $8,%edx 135 cmpl $8,%edx
134 jb 20f /* less then 8 bytes, go to byte copy loop */ 136 jb 20f /* less then 8 bytes, go to byte copy loop */
135 ALIGN_DESTINATION 137 ALIGN_DESTINATION
136 movl %edx,%ecx 138 movl %edx,%ecx
137 andl $63,%edx 139 andl $63,%edx
138 shrl $6,%ecx 140 shrl $6,%ecx
139 jz 17f 141 jz 17f
140 1: movq (%rsi),%r8 142 1: movq (%rsi),%r8
141 2: movq 1*8(%rsi),%r9 143 2: movq 1*8(%rsi),%r9
142 3: movq 2*8(%rsi),%r10 144 3: movq 2*8(%rsi),%r10
143 4: movq 3*8(%rsi),%r11 145 4: movq 3*8(%rsi),%r11
144 5: movq %r8,(%rdi) 146 5: movq %r8,(%rdi)
145 6: movq %r9,1*8(%rdi) 147 6: movq %r9,1*8(%rdi)
146 7: movq %r10,2*8(%rdi) 148 7: movq %r10,2*8(%rdi)
147 8: movq %r11,3*8(%rdi) 149 8: movq %r11,3*8(%rdi)
148 9: movq 4*8(%rsi),%r8 150 9: movq 4*8(%rsi),%r8
149 10: movq 5*8(%rsi),%r9 151 10: movq 5*8(%rsi),%r9
150 11: movq 6*8(%rsi),%r10 152 11: movq 6*8(%rsi),%r10
151 12: movq 7*8(%rsi),%r11 153 12: movq 7*8(%rsi),%r11
152 13: movq %r8,4*8(%rdi) 154 13: movq %r8,4*8(%rdi)
153 14: movq %r9,5*8(%rdi) 155 14: movq %r9,5*8(%rdi)
154 15: movq %r10,6*8(%rdi) 156 15: movq %r10,6*8(%rdi)
155 16: movq %r11,7*8(%rdi) 157 16: movq %r11,7*8(%rdi)
156 leaq 64(%rsi),%rsi 158 leaq 64(%rsi),%rsi
157 leaq 64(%rdi),%rdi 159 leaq 64(%rdi),%rdi
158 decl %ecx 160 decl %ecx
159 jnz 1b 161 jnz 1b
160 17: movl %edx,%ecx 162 17: movl %edx,%ecx
161 andl $7,%edx 163 andl $7,%edx
162 shrl $3,%ecx 164 shrl $3,%ecx
163 jz 20f 165 jz 20f
164 18: movq (%rsi),%r8 166 18: movq (%rsi),%r8
165 19: movq %r8,(%rdi) 167 19: movq %r8,(%rdi)
166 leaq 8(%rsi),%rsi 168 leaq 8(%rsi),%rsi
167 leaq 8(%rdi),%rdi 169 leaq 8(%rdi),%rdi
168 decl %ecx 170 decl %ecx
169 jnz 18b 171 jnz 18b
170 20: andl %edx,%edx 172 20: andl %edx,%edx
171 jz 23f 173 jz 23f
172 movl %edx,%ecx 174 movl %edx,%ecx
173 21: movb (%rsi),%al 175 21: movb (%rsi),%al
174 22: movb %al,(%rdi) 176 22: movb %al,(%rdi)
175 incq %rsi 177 incq %rsi
176 incq %rdi 178 incq %rdi
177 decl %ecx 179 decl %ecx
178 jnz 21b 180 jnz 21b
179 23: xor %eax,%eax 181 23: xor %eax,%eax
182 ASM_CLAC
180 ret 183 ret
181 184
182 .section .fixup,"ax" 185 .section .fixup,"ax"
183 30: shll $6,%ecx 186 30: shll $6,%ecx
184 addl %ecx,%edx 187 addl %ecx,%edx
185 jmp 60f 188 jmp 60f
186 40: lea (%rdx,%rcx,8),%rdx 189 40: lea (%rdx,%rcx,8),%rdx
187 jmp 60f 190 jmp 60f
188 50: movl %ecx,%edx 191 50: movl %ecx,%edx
189 60: jmp copy_user_handle_tail /* ecx is zerorest also */ 192 60: jmp copy_user_handle_tail /* ecx is zerorest also */
190 .previous 193 .previous
191 194
192 _ASM_EXTABLE(1b,30b) 195 _ASM_EXTABLE(1b,30b)
193 _ASM_EXTABLE(2b,30b) 196 _ASM_EXTABLE(2b,30b)
194 _ASM_EXTABLE(3b,30b) 197 _ASM_EXTABLE(3b,30b)
195 _ASM_EXTABLE(4b,30b) 198 _ASM_EXTABLE(4b,30b)
196 _ASM_EXTABLE(5b,30b) 199 _ASM_EXTABLE(5b,30b)
197 _ASM_EXTABLE(6b,30b) 200 _ASM_EXTABLE(6b,30b)
198 _ASM_EXTABLE(7b,30b) 201 _ASM_EXTABLE(7b,30b)
199 _ASM_EXTABLE(8b,30b) 202 _ASM_EXTABLE(8b,30b)
200 _ASM_EXTABLE(9b,30b) 203 _ASM_EXTABLE(9b,30b)
201 _ASM_EXTABLE(10b,30b) 204 _ASM_EXTABLE(10b,30b)
202 _ASM_EXTABLE(11b,30b) 205 _ASM_EXTABLE(11b,30b)
203 _ASM_EXTABLE(12b,30b) 206 _ASM_EXTABLE(12b,30b)
204 _ASM_EXTABLE(13b,30b) 207 _ASM_EXTABLE(13b,30b)
205 _ASM_EXTABLE(14b,30b) 208 _ASM_EXTABLE(14b,30b)
206 _ASM_EXTABLE(15b,30b) 209 _ASM_EXTABLE(15b,30b)
207 _ASM_EXTABLE(16b,30b) 210 _ASM_EXTABLE(16b,30b)
208 _ASM_EXTABLE(18b,40b) 211 _ASM_EXTABLE(18b,40b)
209 _ASM_EXTABLE(19b,40b) 212 _ASM_EXTABLE(19b,40b)
210 _ASM_EXTABLE(21b,50b) 213 _ASM_EXTABLE(21b,50b)
211 _ASM_EXTABLE(22b,50b) 214 _ASM_EXTABLE(22b,50b)
212 CFI_ENDPROC 215 CFI_ENDPROC
213 ENDPROC(copy_user_generic_unrolled) 216 ENDPROC(copy_user_generic_unrolled)
214 217
215 /* Some CPUs run faster using the string copy instructions. 218 /* Some CPUs run faster using the string copy instructions.
216 * This is also a lot simpler. Use them when possible. 219 * This is also a lot simpler. Use them when possible.
217 * 220 *
218 * Only 4GB of copy is supported. This shouldn't be a problem 221 * Only 4GB of copy is supported. This shouldn't be a problem
219 * because the kernel normally only writes from/to page sized chunks 222 * because the kernel normally only writes from/to page sized chunks
220 * even if user space passed a longer buffer. 223 * even if user space passed a longer buffer.
221 * And more would be dangerous because both Intel and AMD have 224 * And more would be dangerous because both Intel and AMD have
222 * errata with rep movsq > 4GB. If someone feels the need to fix 225 * errata with rep movsq > 4GB. If someone feels the need to fix
223 * this please consider this. 226 * this please consider this.
224 * 227 *
225 * Input: 228 * Input:
226 * rdi destination 229 * rdi destination
227 * rsi source 230 * rsi source
228 * rdx count 231 * rdx count
229 * 232 *
230 * Output: 233 * Output:
231 * eax uncopied bytes or 0 if successful. 234 * eax uncopied bytes or 0 if successful.
232 */ 235 */
233 ENTRY(copy_user_generic_string) 236 ENTRY(copy_user_generic_string)
234 CFI_STARTPROC 237 CFI_STARTPROC
238 ASM_STAC
235 andl %edx,%edx 239 andl %edx,%edx
236 jz 4f 240 jz 4f
237 cmpl $8,%edx 241 cmpl $8,%edx
238 jb 2f /* less than 8 bytes, go to byte copy loop */ 242 jb 2f /* less than 8 bytes, go to byte copy loop */
239 ALIGN_DESTINATION 243 ALIGN_DESTINATION
240 movl %edx,%ecx 244 movl %edx,%ecx
241 shrl $3,%ecx 245 shrl $3,%ecx
242 andl $7,%edx 246 andl $7,%edx
243 1: rep 247 1: rep
244 movsq 248 movsq
245 2: movl %edx,%ecx 249 2: movl %edx,%ecx
246 3: rep 250 3: rep
247 movsb 251 movsb
248 4: xorl %eax,%eax 252 4: xorl %eax,%eax
253 ASM_CLAC
249 ret 254 ret
250 255
251 .section .fixup,"ax" 256 .section .fixup,"ax"
252 11: lea (%rdx,%rcx,8),%rcx 257 11: lea (%rdx,%rcx,8),%rcx
253 12: movl %ecx,%edx /* ecx is zerorest also */ 258 12: movl %ecx,%edx /* ecx is zerorest also */
254 jmp copy_user_handle_tail 259 jmp copy_user_handle_tail
255 .previous 260 .previous
256 261
257 _ASM_EXTABLE(1b,11b) 262 _ASM_EXTABLE(1b,11b)
258 _ASM_EXTABLE(3b,12b) 263 _ASM_EXTABLE(3b,12b)
259 CFI_ENDPROC 264 CFI_ENDPROC
260 ENDPROC(copy_user_generic_string) 265 ENDPROC(copy_user_generic_string)
261 266
262 /* 267 /*
263 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 268 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
264 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 269 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
265 * 270 *
266 * Input: 271 * Input:
267 * rdi destination 272 * rdi destination
268 * rsi source 273 * rsi source
269 * rdx count 274 * rdx count
270 * 275 *
271 * Output: 276 * Output:
272 * eax uncopied bytes or 0 if successful. 277 * eax uncopied bytes or 0 if successful.
273 */ 278 */
274 ENTRY(copy_user_enhanced_fast_string) 279 ENTRY(copy_user_enhanced_fast_string)
275 CFI_STARTPROC 280 CFI_STARTPROC
281 ASM_STAC
276 andl %edx,%edx 282 andl %edx,%edx
277 jz 2f 283 jz 2f
278 movl %edx,%ecx 284 movl %edx,%ecx
279 1: rep 285 1: rep
280 movsb 286 movsb
281 2: xorl %eax,%eax 287 2: xorl %eax,%eax
288 ASM_CLAC
282 ret 289 ret
283 290
284 .section .fixup,"ax" 291 .section .fixup,"ax"
285 12: movl %ecx,%edx /* ecx is zerorest also */ 292 12: movl %ecx,%edx /* ecx is zerorest also */
286 jmp copy_user_handle_tail 293 jmp copy_user_handle_tail
287 .previous 294 .previous
288 295
289 _ASM_EXTABLE(1b,12b) 296 _ASM_EXTABLE(1b,12b)
290 CFI_ENDPROC 297 CFI_ENDPROC
291 ENDPROC(copy_user_enhanced_fast_string) 298 ENDPROC(copy_user_enhanced_fast_string)
292 299
arch/x86/lib/copy_user_nocache_64.S
1 /* 1 /*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs. 3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * Subject to the GNU Public License v2. 4 * Subject to the GNU Public License v2.
5 * 5 *
6 * Functions to copy from and to user space. 6 * Functions to copy from and to user space.
7 */ 7 */
8 8
9 #include <linux/linkage.h> 9 #include <linux/linkage.h>
10 #include <asm/dwarf2.h> 10 #include <asm/dwarf2.h>
11 11
12 #define FIX_ALIGNMENT 1 12 #define FIX_ALIGNMENT 1
13 13
14 #include <asm/current.h> 14 #include <asm/current.h>
15 #include <asm/asm-offsets.h> 15 #include <asm/asm-offsets.h>
16 #include <asm/thread_info.h> 16 #include <asm/thread_info.h>
17 #include <asm/asm.h> 17 #include <asm/asm.h>
18 #include <asm/smap.h>
18 19
19 .macro ALIGN_DESTINATION 20 .macro ALIGN_DESTINATION
20 #ifdef FIX_ALIGNMENT 21 #ifdef FIX_ALIGNMENT
21 /* check for bad alignment of destination */ 22 /* check for bad alignment of destination */
22 movl %edi,%ecx 23 movl %edi,%ecx
23 andl $7,%ecx 24 andl $7,%ecx
24 jz 102f /* already aligned */ 25 jz 102f /* already aligned */
25 subl $8,%ecx 26 subl $8,%ecx
26 negl %ecx 27 negl %ecx
27 subl %ecx,%edx 28 subl %ecx,%edx
28 100: movb (%rsi),%al 29 100: movb (%rsi),%al
29 101: movb %al,(%rdi) 30 101: movb %al,(%rdi)
30 incq %rsi 31 incq %rsi
31 incq %rdi 32 incq %rdi
32 decl %ecx 33 decl %ecx
33 jnz 100b 34 jnz 100b
34 102: 35 102:
35 .section .fixup,"ax" 36 .section .fixup,"ax"
36 103: addl %ecx,%edx /* ecx is zerorest also */ 37 103: addl %ecx,%edx /* ecx is zerorest also */
37 jmp copy_user_handle_tail 38 jmp copy_user_handle_tail
38 .previous 39 .previous
39 40
40 _ASM_EXTABLE(100b,103b) 41 _ASM_EXTABLE(100b,103b)
41 _ASM_EXTABLE(101b,103b) 42 _ASM_EXTABLE(101b,103b)
42 #endif 43 #endif
43 .endm 44 .endm
44 45
45 /* 46 /*
46 * copy_user_nocache - Uncached memory copy with exception handling 47 * copy_user_nocache - Uncached memory copy with exception handling
47 * This will force destination/source out of cache for more performance. 48 * This will force destination/source out of cache for more performance.
48 */ 49 */
49 ENTRY(__copy_user_nocache) 50 ENTRY(__copy_user_nocache)
50 CFI_STARTPROC 51 CFI_STARTPROC
52 ASM_STAC
51 cmpl $8,%edx 53 cmpl $8,%edx
52 jb 20f /* less then 8 bytes, go to byte copy loop */ 54 jb 20f /* less then 8 bytes, go to byte copy loop */
53 ALIGN_DESTINATION 55 ALIGN_DESTINATION
54 movl %edx,%ecx 56 movl %edx,%ecx
55 andl $63,%edx 57 andl $63,%edx
56 shrl $6,%ecx 58 shrl $6,%ecx
57 jz 17f 59 jz 17f
58 1: movq (%rsi),%r8 60 1: movq (%rsi),%r8
59 2: movq 1*8(%rsi),%r9 61 2: movq 1*8(%rsi),%r9
60 3: movq 2*8(%rsi),%r10 62 3: movq 2*8(%rsi),%r10
61 4: movq 3*8(%rsi),%r11 63 4: movq 3*8(%rsi),%r11
62 5: movnti %r8,(%rdi) 64 5: movnti %r8,(%rdi)
63 6: movnti %r9,1*8(%rdi) 65 6: movnti %r9,1*8(%rdi)
64 7: movnti %r10,2*8(%rdi) 66 7: movnti %r10,2*8(%rdi)
65 8: movnti %r11,3*8(%rdi) 67 8: movnti %r11,3*8(%rdi)
66 9: movq 4*8(%rsi),%r8 68 9: movq 4*8(%rsi),%r8
67 10: movq 5*8(%rsi),%r9 69 10: movq 5*8(%rsi),%r9
68 11: movq 6*8(%rsi),%r10 70 11: movq 6*8(%rsi),%r10
69 12: movq 7*8(%rsi),%r11 71 12: movq 7*8(%rsi),%r11
70 13: movnti %r8,4*8(%rdi) 72 13: movnti %r8,4*8(%rdi)
71 14: movnti %r9,5*8(%rdi) 73 14: movnti %r9,5*8(%rdi)
72 15: movnti %r10,6*8(%rdi) 74 15: movnti %r10,6*8(%rdi)
73 16: movnti %r11,7*8(%rdi) 75 16: movnti %r11,7*8(%rdi)
74 leaq 64(%rsi),%rsi 76 leaq 64(%rsi),%rsi
75 leaq 64(%rdi),%rdi 77 leaq 64(%rdi),%rdi
76 decl %ecx 78 decl %ecx
77 jnz 1b 79 jnz 1b
78 17: movl %edx,%ecx 80 17: movl %edx,%ecx
79 andl $7,%edx 81 andl $7,%edx
80 shrl $3,%ecx 82 shrl $3,%ecx
81 jz 20f 83 jz 20f
82 18: movq (%rsi),%r8 84 18: movq (%rsi),%r8
83 19: movnti %r8,(%rdi) 85 19: movnti %r8,(%rdi)
84 leaq 8(%rsi),%rsi 86 leaq 8(%rsi),%rsi
85 leaq 8(%rdi),%rdi 87 leaq 8(%rdi),%rdi
86 decl %ecx 88 decl %ecx
87 jnz 18b 89 jnz 18b
88 20: andl %edx,%edx 90 20: andl %edx,%edx
89 jz 23f 91 jz 23f
90 movl %edx,%ecx 92 movl %edx,%ecx
91 21: movb (%rsi),%al 93 21: movb (%rsi),%al
92 22: movb %al,(%rdi) 94 22: movb %al,(%rdi)
93 incq %rsi 95 incq %rsi
94 incq %rdi 96 incq %rdi
95 decl %ecx 97 decl %ecx
96 jnz 21b 98 jnz 21b
97 23: xorl %eax,%eax 99 23: xorl %eax,%eax
100 ASM_CLAC
98 sfence 101 sfence
99 ret 102 ret
100 103
101 .section .fixup,"ax" 104 .section .fixup,"ax"
102 30: shll $6,%ecx 105 30: shll $6,%ecx
103 addl %ecx,%edx 106 addl %ecx,%edx
104 jmp 60f 107 jmp 60f
105 40: lea (%rdx,%rcx,8),%rdx 108 40: lea (%rdx,%rcx,8),%rdx
106 jmp 60f 109 jmp 60f
107 50: movl %ecx,%edx 110 50: movl %ecx,%edx
108 60: sfence 111 60: sfence
109 jmp copy_user_handle_tail 112 jmp copy_user_handle_tail
110 .previous 113 .previous
111 114
112 _ASM_EXTABLE(1b,30b) 115 _ASM_EXTABLE(1b,30b)
113 _ASM_EXTABLE(2b,30b) 116 _ASM_EXTABLE(2b,30b)
114 _ASM_EXTABLE(3b,30b) 117 _ASM_EXTABLE(3b,30b)
115 _ASM_EXTABLE(4b,30b) 118 _ASM_EXTABLE(4b,30b)
116 _ASM_EXTABLE(5b,30b) 119 _ASM_EXTABLE(5b,30b)
117 _ASM_EXTABLE(6b,30b) 120 _ASM_EXTABLE(6b,30b)
118 _ASM_EXTABLE(7b,30b) 121 _ASM_EXTABLE(7b,30b)
119 _ASM_EXTABLE(8b,30b) 122 _ASM_EXTABLE(8b,30b)
120 _ASM_EXTABLE(9b,30b) 123 _ASM_EXTABLE(9b,30b)
121 _ASM_EXTABLE(10b,30b) 124 _ASM_EXTABLE(10b,30b)
122 _ASM_EXTABLE(11b,30b) 125 _ASM_EXTABLE(11b,30b)
123 _ASM_EXTABLE(12b,30b) 126 _ASM_EXTABLE(12b,30b)
124 _ASM_EXTABLE(13b,30b) 127 _ASM_EXTABLE(13b,30b)
125 _ASM_EXTABLE(14b,30b) 128 _ASM_EXTABLE(14b,30b)
126 _ASM_EXTABLE(15b,30b) 129 _ASM_EXTABLE(15b,30b)
127 _ASM_EXTABLE(16b,30b) 130 _ASM_EXTABLE(16b,30b)
128 _ASM_EXTABLE(18b,40b) 131 _ASM_EXTABLE(18b,40b)
129 _ASM_EXTABLE(19b,40b) 132 _ASM_EXTABLE(19b,40b)
130 _ASM_EXTABLE(21b,50b) 133 _ASM_EXTABLE(21b,50b)
131 _ASM_EXTABLE(22b,50b) 134 _ASM_EXTABLE(22b,50b)
132 CFI_ENDPROC 135 CFI_ENDPROC
133 ENDPROC(__copy_user_nocache) 136 ENDPROC(__copy_user_nocache)
134 137
arch/x86/lib/getuser.S
1 /* 1 /*
2 * __get_user functions. 2 * __get_user functions.
3 * 3 *
4 * (C) Copyright 1998 Linus Torvalds 4 * (C) Copyright 1998 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen 5 * (C) Copyright 2005 Andi Kleen
6 * (C) Copyright 2008 Glauber Costa 6 * (C) Copyright 2008 Glauber Costa
7 * 7 *
8 * These functions have a non-standard call interface 8 * These functions have a non-standard call interface
9 * to make them more efficient, especially as they 9 * to make them more efficient, especially as they
10 * return an error value in addition to the "real" 10 * return an error value in addition to the "real"
11 * return value. 11 * return value.
12 */ 12 */
13 13
14 /* 14 /*
15 * __get_user_X 15 * __get_user_X
16 * 16 *
17 * Inputs: %[r|e]ax contains the address. 17 * Inputs: %[r|e]ax contains the address.
18 * The register is modified, but all changes are undone 18 * The register is modified, but all changes are undone
19 * before returning because the C code doesn't know about it. 19 * before returning because the C code doesn't know about it.
20 * 20 *
21 * Outputs: %[r|e]ax is error code (0 or -EFAULT) 21 * Outputs: %[r|e]ax is error code (0 or -EFAULT)
22 * %[r|e]dx contains zero-extended value 22 * %[r|e]dx contains zero-extended value
23 * 23 *
24 * 24 *
25 * These functions should not modify any other registers, 25 * These functions should not modify any other registers,
26 * as they get called from within inline assembly. 26 * as they get called from within inline assembly.
27 */ 27 */
28 28
29 #include <linux/linkage.h> 29 #include <linux/linkage.h>
30 #include <asm/dwarf2.h> 30 #include <asm/dwarf2.h>
31 #include <asm/page_types.h> 31 #include <asm/page_types.h>
32 #include <asm/errno.h> 32 #include <asm/errno.h>
33 #include <asm/asm-offsets.h> 33 #include <asm/asm-offsets.h>
34 #include <asm/thread_info.h> 34 #include <asm/thread_info.h>
35 #include <asm/asm.h> 35 #include <asm/asm.h>
36 #include <asm/smap.h>
36 37
37 .text 38 .text
38 ENTRY(__get_user_1) 39 ENTRY(__get_user_1)
39 CFI_STARTPROC 40 CFI_STARTPROC
40 GET_THREAD_INFO(%_ASM_DX) 41 GET_THREAD_INFO(%_ASM_DX)
41 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 42 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
42 jae bad_get_user 43 jae bad_get_user
44 ASM_STAC
43 1: movzb (%_ASM_AX),%edx 45 1: movzb (%_ASM_AX),%edx
44 xor %eax,%eax 46 xor %eax,%eax
47 ASM_CLAC
45 ret 48 ret
46 CFI_ENDPROC 49 CFI_ENDPROC
47 ENDPROC(__get_user_1) 50 ENDPROC(__get_user_1)
48 51
49 ENTRY(__get_user_2) 52 ENTRY(__get_user_2)
50 CFI_STARTPROC 53 CFI_STARTPROC
51 add $1,%_ASM_AX 54 add $1,%_ASM_AX
52 jc bad_get_user 55 jc bad_get_user
53 GET_THREAD_INFO(%_ASM_DX) 56 GET_THREAD_INFO(%_ASM_DX)
54 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 57 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
55 jae bad_get_user 58 jae bad_get_user
59 ASM_STAC
56 2: movzwl -1(%_ASM_AX),%edx 60 2: movzwl -1(%_ASM_AX),%edx
57 xor %eax,%eax 61 xor %eax,%eax
62 ASM_CLAC
58 ret 63 ret
59 CFI_ENDPROC 64 CFI_ENDPROC
60 ENDPROC(__get_user_2) 65 ENDPROC(__get_user_2)
61 66
62 ENTRY(__get_user_4) 67 ENTRY(__get_user_4)
63 CFI_STARTPROC 68 CFI_STARTPROC
64 add $3,%_ASM_AX 69 add $3,%_ASM_AX
65 jc bad_get_user 70 jc bad_get_user
66 GET_THREAD_INFO(%_ASM_DX) 71 GET_THREAD_INFO(%_ASM_DX)
67 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 72 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
68 jae bad_get_user 73 jae bad_get_user
74 ASM_STAC
69 3: mov -3(%_ASM_AX),%edx 75 3: mov -3(%_ASM_AX),%edx
70 xor %eax,%eax 76 xor %eax,%eax
77 ASM_CLAC
71 ret 78 ret
72 CFI_ENDPROC 79 CFI_ENDPROC
73 ENDPROC(__get_user_4) 80 ENDPROC(__get_user_4)
74 81
75 #ifdef CONFIG_X86_64 82 #ifdef CONFIG_X86_64
76 ENTRY(__get_user_8) 83 ENTRY(__get_user_8)
77 CFI_STARTPROC 84 CFI_STARTPROC
78 add $7,%_ASM_AX 85 add $7,%_ASM_AX
79 jc bad_get_user 86 jc bad_get_user
80 GET_THREAD_INFO(%_ASM_DX) 87 GET_THREAD_INFO(%_ASM_DX)
81 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX 88 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
82 jae bad_get_user 89 jae bad_get_user
90 ASM_STAC
83 4: movq -7(%_ASM_AX),%_ASM_DX 91 4: movq -7(%_ASM_AX),%_ASM_DX
84 xor %eax,%eax 92 xor %eax,%eax
93 ASM_CLAC
85 ret 94 ret
86 CFI_ENDPROC 95 CFI_ENDPROC
87 ENDPROC(__get_user_8) 96 ENDPROC(__get_user_8)
88 #endif 97 #endif
89 98
90 bad_get_user: 99 bad_get_user:
91 CFI_STARTPROC 100 CFI_STARTPROC
92 xor %edx,%edx 101 xor %edx,%edx
93 mov $(-EFAULT),%_ASM_AX 102 mov $(-EFAULT),%_ASM_AX
103 ASM_CLAC
94 ret 104 ret
95 CFI_ENDPROC 105 CFI_ENDPROC
96 END(bad_get_user) 106 END(bad_get_user)
97 107
98 _ASM_EXTABLE(1b,bad_get_user) 108 _ASM_EXTABLE(1b,bad_get_user)
99 _ASM_EXTABLE(2b,bad_get_user) 109 _ASM_EXTABLE(2b,bad_get_user)
100 _ASM_EXTABLE(3b,bad_get_user) 110 _ASM_EXTABLE(3b,bad_get_user)
101 #ifdef CONFIG_X86_64 111 #ifdef CONFIG_X86_64
102 _ASM_EXTABLE(4b,bad_get_user) 112 _ASM_EXTABLE(4b,bad_get_user)
103 #endif 113 #endif
104 114
arch/x86/lib/putuser.S
1 /* 1 /*
2 * __put_user functions. 2 * __put_user functions.
3 * 3 *
4 * (C) Copyright 2005 Linus Torvalds 4 * (C) Copyright 2005 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen 5 * (C) Copyright 2005 Andi Kleen
6 * (C) Copyright 2008 Glauber Costa 6 * (C) Copyright 2008 Glauber Costa
7 * 7 *
8 * These functions have a non-standard call interface 8 * These functions have a non-standard call interface
9 * to make them more efficient, especially as they 9 * to make them more efficient, especially as they
10 * return an error value in addition to the "real" 10 * return an error value in addition to the "real"
11 * return value. 11 * return value.
12 */ 12 */
13 #include <linux/linkage.h> 13 #include <linux/linkage.h>
14 #include <asm/dwarf2.h> 14 #include <asm/dwarf2.h>
15 #include <asm/thread_info.h> 15 #include <asm/thread_info.h>
16 #include <asm/errno.h> 16 #include <asm/errno.h>
17 #include <asm/asm.h> 17 #include <asm/asm.h>
18 #include <asm/smap.h>
18 19
19 20
20 /* 21 /*
21 * __put_user_X 22 * __put_user_X
22 * 23 *
23 * Inputs: %eax[:%edx] contains the data 24 * Inputs: %eax[:%edx] contains the data
24 * %ecx contains the address 25 * %ecx contains the address
25 * 26 *
26 * Outputs: %eax is error code (0 or -EFAULT) 27 * Outputs: %eax is error code (0 or -EFAULT)
27 * 28 *
28 * These functions should not modify any other registers, 29 * These functions should not modify any other registers,
29 * as they get called from within inline assembly. 30 * as they get called from within inline assembly.
30 */ 31 */
31 32
32 #define ENTER CFI_STARTPROC ; \ 33 #define ENTER CFI_STARTPROC ; \
33 GET_THREAD_INFO(%_ASM_BX) 34 GET_THREAD_INFO(%_ASM_BX)
34 #define EXIT ret ; \ 35 #define EXIT ASM_CLAC ; \
36 ret ; \
35 CFI_ENDPROC 37 CFI_ENDPROC
36 38
37 .text 39 .text
38 ENTRY(__put_user_1) 40 ENTRY(__put_user_1)
39 ENTER 41 ENTER
40 cmp TI_addr_limit(%_ASM_BX),%_ASM_CX 42 cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
41 jae bad_put_user 43 jae bad_put_user
44 ASM_STAC
42 1: movb %al,(%_ASM_CX) 45 1: movb %al,(%_ASM_CX)
43 xor %eax,%eax 46 xor %eax,%eax
44 EXIT 47 EXIT
45 ENDPROC(__put_user_1) 48 ENDPROC(__put_user_1)
46 49
47 ENTRY(__put_user_2) 50 ENTRY(__put_user_2)
48 ENTER 51 ENTER
49 mov TI_addr_limit(%_ASM_BX),%_ASM_BX 52 mov TI_addr_limit(%_ASM_BX),%_ASM_BX
50 sub $1,%_ASM_BX 53 sub $1,%_ASM_BX
51 cmp %_ASM_BX,%_ASM_CX 54 cmp %_ASM_BX,%_ASM_CX
52 jae bad_put_user 55 jae bad_put_user
56 ASM_STAC
53 2: movw %ax,(%_ASM_CX) 57 2: movw %ax,(%_ASM_CX)
54 xor %eax,%eax 58 xor %eax,%eax
55 EXIT 59 EXIT
56 ENDPROC(__put_user_2) 60 ENDPROC(__put_user_2)
57 61
58 ENTRY(__put_user_4) 62 ENTRY(__put_user_4)
59 ENTER 63 ENTER
60 mov TI_addr_limit(%_ASM_BX),%_ASM_BX 64 mov TI_addr_limit(%_ASM_BX),%_ASM_BX
61 sub $3,%_ASM_BX 65 sub $3,%_ASM_BX
62 cmp %_ASM_BX,%_ASM_CX 66 cmp %_ASM_BX,%_ASM_CX
63 jae bad_put_user 67 jae bad_put_user
68 ASM_STAC
64 3: movl %eax,(%_ASM_CX) 69 3: movl %eax,(%_ASM_CX)
65 xor %eax,%eax 70 xor %eax,%eax
66 EXIT 71 EXIT
67 ENDPROC(__put_user_4) 72 ENDPROC(__put_user_4)
68 73
69 ENTRY(__put_user_8) 74 ENTRY(__put_user_8)
70 ENTER 75 ENTER
71 mov TI_addr_limit(%_ASM_BX),%_ASM_BX 76 mov TI_addr_limit(%_ASM_BX),%_ASM_BX
72 sub $7,%_ASM_BX 77 sub $7,%_ASM_BX
73 cmp %_ASM_BX,%_ASM_CX 78 cmp %_ASM_BX,%_ASM_CX
74 jae bad_put_user 79 jae bad_put_user
80 ASM_STAC
75 4: mov %_ASM_AX,(%_ASM_CX) 81 4: mov %_ASM_AX,(%_ASM_CX)
76 #ifdef CONFIG_X86_32 82 #ifdef CONFIG_X86_32
77 5: movl %edx,4(%_ASM_CX) 83 5: movl %edx,4(%_ASM_CX)
78 #endif 84 #endif
79 xor %eax,%eax 85 xor %eax,%eax
80 EXIT 86 EXIT
81 ENDPROC(__put_user_8) 87 ENDPROC(__put_user_8)
82 88
83 bad_put_user: 89 bad_put_user:
84 CFI_STARTPROC 90 CFI_STARTPROC
85 movl $-EFAULT,%eax 91 movl $-EFAULT,%eax
86 EXIT 92 EXIT
87 END(bad_put_user) 93 END(bad_put_user)
88 94
89 _ASM_EXTABLE(1b,bad_put_user) 95 _ASM_EXTABLE(1b,bad_put_user)
90 _ASM_EXTABLE(2b,bad_put_user) 96 _ASM_EXTABLE(2b,bad_put_user)
91 _ASM_EXTABLE(3b,bad_put_user) 97 _ASM_EXTABLE(3b,bad_put_user)
92 _ASM_EXTABLE(4b,bad_put_user) 98 _ASM_EXTABLE(4b,bad_put_user)
93 #ifdef CONFIG_X86_32 99 #ifdef CONFIG_X86_32
94 _ASM_EXTABLE(5b,bad_put_user) 100 _ASM_EXTABLE(5b,bad_put_user)
95 #endif 101 #endif
96 102
arch/x86/lib/usercopy_32.c
1 /* 1 /*
2 * User address space access functions. 2 * User address space access functions.
3 * The non inlined parts of asm-i386/uaccess.h are here. 3 * The non inlined parts of asm-i386/uaccess.h are here.
4 * 4 *
5 * Copyright 1997 Andi Kleen <ak@muc.de> 5 * Copyright 1997 Andi Kleen <ak@muc.de>
6 * Copyright 1997 Linus Torvalds 6 * Copyright 1997 Linus Torvalds
7 */ 7 */
8 #include <linux/mm.h> 8 #include <linux/mm.h>
9 #include <linux/highmem.h> 9 #include <linux/highmem.h>
10 #include <linux/blkdev.h> 10 #include <linux/blkdev.h>
11 #include <linux/module.h> 11 #include <linux/module.h>
12 #include <linux/backing-dev.h> 12 #include <linux/backing-dev.h>
13 #include <linux/interrupt.h> 13 #include <linux/interrupt.h>
14 #include <asm/uaccess.h> 14 #include <asm/uaccess.h>
15 #include <asm/mmx.h> 15 #include <asm/mmx.h>
16 #include <asm/asm.h> 16 #include <asm/asm.h>
17 17
18 #ifdef CONFIG_X86_INTEL_USERCOPY 18 #ifdef CONFIG_X86_INTEL_USERCOPY
19 /* 19 /*
20 * Alignment at which movsl is preferred for bulk memory copies. 20 * Alignment at which movsl is preferred for bulk memory copies.
21 */ 21 */
22 struct movsl_mask movsl_mask __read_mostly; 22 struct movsl_mask movsl_mask __read_mostly;
23 #endif 23 #endif
24 24
25 static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) 25 static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
26 { 26 {
27 #ifdef CONFIG_X86_INTEL_USERCOPY 27 #ifdef CONFIG_X86_INTEL_USERCOPY
28 if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask)) 28 if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask))
29 return 0; 29 return 0;
30 #endif 30 #endif
31 return 1; 31 return 1;
32 } 32 }
33 #define movsl_is_ok(a1, a2, n) \ 33 #define movsl_is_ok(a1, a2, n) \
34 __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) 34 __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n))
35 35
36 /* 36 /*
37 * Zero Userspace 37 * Zero Userspace
38 */ 38 */
39 39
40 #define __do_clear_user(addr,size) \ 40 #define __do_clear_user(addr,size) \
41 do { \ 41 do { \
42 int __d0; \ 42 int __d0; \
43 might_fault(); \ 43 might_fault(); \
44 __asm__ __volatile__( \ 44 __asm__ __volatile__( \
45 ASM_STAC "\n" \
45 "0: rep; stosl\n" \ 46 "0: rep; stosl\n" \
46 " movl %2,%0\n" \ 47 " movl %2,%0\n" \
47 "1: rep; stosb\n" \ 48 "1: rep; stosb\n" \
48 "2:\n" \ 49 "2: " ASM_CLAC "\n" \
49 ".section .fixup,\"ax\"\n" \ 50 ".section .fixup,\"ax\"\n" \
50 "3: lea 0(%2,%0,4),%0\n" \ 51 "3: lea 0(%2,%0,4),%0\n" \
51 " jmp 2b\n" \ 52 " jmp 2b\n" \
52 ".previous\n" \ 53 ".previous\n" \
53 _ASM_EXTABLE(0b,3b) \ 54 _ASM_EXTABLE(0b,3b) \
54 _ASM_EXTABLE(1b,2b) \ 55 _ASM_EXTABLE(1b,2b) \
55 : "=&c"(size), "=&D" (__d0) \ 56 : "=&c"(size), "=&D" (__d0) \
56 : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ 57 : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
57 } while (0) 58 } while (0)
58 59
59 /** 60 /**
60 * clear_user: - Zero a block of memory in user space. 61 * clear_user: - Zero a block of memory in user space.
61 * @to: Destination address, in user space. 62 * @to: Destination address, in user space.
62 * @n: Number of bytes to zero. 63 * @n: Number of bytes to zero.
63 * 64 *
64 * Zero a block of memory in user space. 65 * Zero a block of memory in user space.
65 * 66 *
66 * Returns number of bytes that could not be cleared. 67 * Returns number of bytes that could not be cleared.
67 * On success, this will be zero. 68 * On success, this will be zero.
68 */ 69 */
69 unsigned long 70 unsigned long
70 clear_user(void __user *to, unsigned long n) 71 clear_user(void __user *to, unsigned long n)
71 { 72 {
72 might_fault(); 73 might_fault();
73 if (access_ok(VERIFY_WRITE, to, n)) 74 if (access_ok(VERIFY_WRITE, to, n))
74 __do_clear_user(to, n); 75 __do_clear_user(to, n);
75 return n; 76 return n;
76 } 77 }
77 EXPORT_SYMBOL(clear_user); 78 EXPORT_SYMBOL(clear_user);
78 79
79 /** 80 /**
80 * __clear_user: - Zero a block of memory in user space, with less checking. 81 * __clear_user: - Zero a block of memory in user space, with less checking.
81 * @to: Destination address, in user space. 82 * @to: Destination address, in user space.
82 * @n: Number of bytes to zero. 83 * @n: Number of bytes to zero.
83 * 84 *
84 * Zero a block of memory in user space. Caller must check 85 * Zero a block of memory in user space. Caller must check
85 * the specified block with access_ok() before calling this function. 86 * the specified block with access_ok() before calling this function.
86 * 87 *
87 * Returns number of bytes that could not be cleared. 88 * Returns number of bytes that could not be cleared.
88 * On success, this will be zero. 89 * On success, this will be zero.
89 */ 90 */
90 unsigned long 91 unsigned long
91 __clear_user(void __user *to, unsigned long n) 92 __clear_user(void __user *to, unsigned long n)
92 { 93 {
93 __do_clear_user(to, n); 94 __do_clear_user(to, n);
94 return n; 95 return n;
95 } 96 }
96 EXPORT_SYMBOL(__clear_user); 97 EXPORT_SYMBOL(__clear_user);
97 98
98 #ifdef CONFIG_X86_INTEL_USERCOPY 99 #ifdef CONFIG_X86_INTEL_USERCOPY
99 static unsigned long 100 static unsigned long
100 __copy_user_intel(void __user *to, const void *from, unsigned long size) 101 __copy_user_intel(void __user *to, const void *from, unsigned long size)
101 { 102 {
102 int d0, d1; 103 int d0, d1;
103 __asm__ __volatile__( 104 __asm__ __volatile__(
104 " .align 2,0x90\n" 105 " .align 2,0x90\n"
105 "1: movl 32(%4), %%eax\n" 106 "1: movl 32(%4), %%eax\n"
106 " cmpl $67, %0\n" 107 " cmpl $67, %0\n"
107 " jbe 3f\n" 108 " jbe 3f\n"
108 "2: movl 64(%4), %%eax\n" 109 "2: movl 64(%4), %%eax\n"
109 " .align 2,0x90\n" 110 " .align 2,0x90\n"
110 "3: movl 0(%4), %%eax\n" 111 "3: movl 0(%4), %%eax\n"
111 "4: movl 4(%4), %%edx\n" 112 "4: movl 4(%4), %%edx\n"
112 "5: movl %%eax, 0(%3)\n" 113 "5: movl %%eax, 0(%3)\n"
113 "6: movl %%edx, 4(%3)\n" 114 "6: movl %%edx, 4(%3)\n"
114 "7: movl 8(%4), %%eax\n" 115 "7: movl 8(%4), %%eax\n"
115 "8: movl 12(%4),%%edx\n" 116 "8: movl 12(%4),%%edx\n"
116 "9: movl %%eax, 8(%3)\n" 117 "9: movl %%eax, 8(%3)\n"
117 "10: movl %%edx, 12(%3)\n" 118 "10: movl %%edx, 12(%3)\n"
118 "11: movl 16(%4), %%eax\n" 119 "11: movl 16(%4), %%eax\n"
119 "12: movl 20(%4), %%edx\n" 120 "12: movl 20(%4), %%edx\n"
120 "13: movl %%eax, 16(%3)\n" 121 "13: movl %%eax, 16(%3)\n"
121 "14: movl %%edx, 20(%3)\n" 122 "14: movl %%edx, 20(%3)\n"
122 "15: movl 24(%4), %%eax\n" 123 "15: movl 24(%4), %%eax\n"
123 "16: movl 28(%4), %%edx\n" 124 "16: movl 28(%4), %%edx\n"
124 "17: movl %%eax, 24(%3)\n" 125 "17: movl %%eax, 24(%3)\n"
125 "18: movl %%edx, 28(%3)\n" 126 "18: movl %%edx, 28(%3)\n"
126 "19: movl 32(%4), %%eax\n" 127 "19: movl 32(%4), %%eax\n"
127 "20: movl 36(%4), %%edx\n" 128 "20: movl 36(%4), %%edx\n"
128 "21: movl %%eax, 32(%3)\n" 129 "21: movl %%eax, 32(%3)\n"
129 "22: movl %%edx, 36(%3)\n" 130 "22: movl %%edx, 36(%3)\n"
130 "23: movl 40(%4), %%eax\n" 131 "23: movl 40(%4), %%eax\n"
131 "24: movl 44(%4), %%edx\n" 132 "24: movl 44(%4), %%edx\n"
132 "25: movl %%eax, 40(%3)\n" 133 "25: movl %%eax, 40(%3)\n"
133 "26: movl %%edx, 44(%3)\n" 134 "26: movl %%edx, 44(%3)\n"
134 "27: movl 48(%4), %%eax\n" 135 "27: movl 48(%4), %%eax\n"
135 "28: movl 52(%4), %%edx\n" 136 "28: movl 52(%4), %%edx\n"
136 "29: movl %%eax, 48(%3)\n" 137 "29: movl %%eax, 48(%3)\n"
137 "30: movl %%edx, 52(%3)\n" 138 "30: movl %%edx, 52(%3)\n"
138 "31: movl 56(%4), %%eax\n" 139 "31: movl 56(%4), %%eax\n"
139 "32: movl 60(%4), %%edx\n" 140 "32: movl 60(%4), %%edx\n"
140 "33: movl %%eax, 56(%3)\n" 141 "33: movl %%eax, 56(%3)\n"
141 "34: movl %%edx, 60(%3)\n" 142 "34: movl %%edx, 60(%3)\n"
142 " addl $-64, %0\n" 143 " addl $-64, %0\n"
143 " addl $64, %4\n" 144 " addl $64, %4\n"
144 " addl $64, %3\n" 145 " addl $64, %3\n"
145 " cmpl $63, %0\n" 146 " cmpl $63, %0\n"
146 " ja 1b\n" 147 " ja 1b\n"
147 "35: movl %0, %%eax\n" 148 "35: movl %0, %%eax\n"
148 " shrl $2, %0\n" 149 " shrl $2, %0\n"
149 " andl $3, %%eax\n" 150 " andl $3, %%eax\n"
150 " cld\n" 151 " cld\n"
151 "99: rep; movsl\n" 152 "99: rep; movsl\n"
152 "36: movl %%eax, %0\n" 153 "36: movl %%eax, %0\n"
153 "37: rep; movsb\n" 154 "37: rep; movsb\n"
154 "100:\n" 155 "100:\n"
155 ".section .fixup,\"ax\"\n" 156 ".section .fixup,\"ax\"\n"
156 "101: lea 0(%%eax,%0,4),%0\n" 157 "101: lea 0(%%eax,%0,4),%0\n"
157 " jmp 100b\n" 158 " jmp 100b\n"
158 ".previous\n" 159 ".previous\n"
159 _ASM_EXTABLE(1b,100b) 160 _ASM_EXTABLE(1b,100b)
160 _ASM_EXTABLE(2b,100b) 161 _ASM_EXTABLE(2b,100b)
161 _ASM_EXTABLE(3b,100b) 162 _ASM_EXTABLE(3b,100b)
162 _ASM_EXTABLE(4b,100b) 163 _ASM_EXTABLE(4b,100b)
163 _ASM_EXTABLE(5b,100b) 164 _ASM_EXTABLE(5b,100b)
164 _ASM_EXTABLE(6b,100b) 165 _ASM_EXTABLE(6b,100b)
165 _ASM_EXTABLE(7b,100b) 166 _ASM_EXTABLE(7b,100b)
166 _ASM_EXTABLE(8b,100b) 167 _ASM_EXTABLE(8b,100b)
167 _ASM_EXTABLE(9b,100b) 168 _ASM_EXTABLE(9b,100b)
168 _ASM_EXTABLE(10b,100b) 169 _ASM_EXTABLE(10b,100b)
169 _ASM_EXTABLE(11b,100b) 170 _ASM_EXTABLE(11b,100b)
170 _ASM_EXTABLE(12b,100b) 171 _ASM_EXTABLE(12b,100b)
171 _ASM_EXTABLE(13b,100b) 172 _ASM_EXTABLE(13b,100b)
172 _ASM_EXTABLE(14b,100b) 173 _ASM_EXTABLE(14b,100b)
173 _ASM_EXTABLE(15b,100b) 174 _ASM_EXTABLE(15b,100b)
174 _ASM_EXTABLE(16b,100b) 175 _ASM_EXTABLE(16b,100b)
175 _ASM_EXTABLE(17b,100b) 176 _ASM_EXTABLE(17b,100b)
176 _ASM_EXTABLE(18b,100b) 177 _ASM_EXTABLE(18b,100b)
177 _ASM_EXTABLE(19b,100b) 178 _ASM_EXTABLE(19b,100b)
178 _ASM_EXTABLE(20b,100b) 179 _ASM_EXTABLE(20b,100b)
179 _ASM_EXTABLE(21b,100b) 180 _ASM_EXTABLE(21b,100b)
180 _ASM_EXTABLE(22b,100b) 181 _ASM_EXTABLE(22b,100b)
181 _ASM_EXTABLE(23b,100b) 182 _ASM_EXTABLE(23b,100b)
182 _ASM_EXTABLE(24b,100b) 183 _ASM_EXTABLE(24b,100b)
183 _ASM_EXTABLE(25b,100b) 184 _ASM_EXTABLE(25b,100b)
184 _ASM_EXTABLE(26b,100b) 185 _ASM_EXTABLE(26b,100b)
185 _ASM_EXTABLE(27b,100b) 186 _ASM_EXTABLE(27b,100b)
186 _ASM_EXTABLE(28b,100b) 187 _ASM_EXTABLE(28b,100b)
187 _ASM_EXTABLE(29b,100b) 188 _ASM_EXTABLE(29b,100b)
188 _ASM_EXTABLE(30b,100b) 189 _ASM_EXTABLE(30b,100b)
189 _ASM_EXTABLE(31b,100b) 190 _ASM_EXTABLE(31b,100b)
190 _ASM_EXTABLE(32b,100b) 191 _ASM_EXTABLE(32b,100b)
191 _ASM_EXTABLE(33b,100b) 192 _ASM_EXTABLE(33b,100b)
192 _ASM_EXTABLE(34b,100b) 193 _ASM_EXTABLE(34b,100b)
193 _ASM_EXTABLE(35b,100b) 194 _ASM_EXTABLE(35b,100b)
194 _ASM_EXTABLE(36b,100b) 195 _ASM_EXTABLE(36b,100b)
195 _ASM_EXTABLE(37b,100b) 196 _ASM_EXTABLE(37b,100b)
196 _ASM_EXTABLE(99b,101b) 197 _ASM_EXTABLE(99b,101b)
197 : "=&c"(size), "=&D" (d0), "=&S" (d1) 198 : "=&c"(size), "=&D" (d0), "=&S" (d1)
198 : "1"(to), "2"(from), "0"(size) 199 : "1"(to), "2"(from), "0"(size)
199 : "eax", "edx", "memory"); 200 : "eax", "edx", "memory");
200 return size; 201 return size;
201 } 202 }
202 203
203 static unsigned long 204 static unsigned long
204 __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) 205 __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
205 { 206 {
206 int d0, d1; 207 int d0, d1;
207 __asm__ __volatile__( 208 __asm__ __volatile__(
208 " .align 2,0x90\n" 209 " .align 2,0x90\n"
209 "0: movl 32(%4), %%eax\n" 210 "0: movl 32(%4), %%eax\n"
210 " cmpl $67, %0\n" 211 " cmpl $67, %0\n"
211 " jbe 2f\n" 212 " jbe 2f\n"
212 "1: movl 64(%4), %%eax\n" 213 "1: movl 64(%4), %%eax\n"
213 " .align 2,0x90\n" 214 " .align 2,0x90\n"
214 "2: movl 0(%4), %%eax\n" 215 "2: movl 0(%4), %%eax\n"
215 "21: movl 4(%4), %%edx\n" 216 "21: movl 4(%4), %%edx\n"
216 " movl %%eax, 0(%3)\n" 217 " movl %%eax, 0(%3)\n"
217 " movl %%edx, 4(%3)\n" 218 " movl %%edx, 4(%3)\n"
218 "3: movl 8(%4), %%eax\n" 219 "3: movl 8(%4), %%eax\n"
219 "31: movl 12(%4),%%edx\n" 220 "31: movl 12(%4),%%edx\n"
220 " movl %%eax, 8(%3)\n" 221 " movl %%eax, 8(%3)\n"
221 " movl %%edx, 12(%3)\n" 222 " movl %%edx, 12(%3)\n"
222 "4: movl 16(%4), %%eax\n" 223 "4: movl 16(%4), %%eax\n"
223 "41: movl 20(%4), %%edx\n" 224 "41: movl 20(%4), %%edx\n"
224 " movl %%eax, 16(%3)\n" 225 " movl %%eax, 16(%3)\n"
225 " movl %%edx, 20(%3)\n" 226 " movl %%edx, 20(%3)\n"
226 "10: movl 24(%4), %%eax\n" 227 "10: movl 24(%4), %%eax\n"
227 "51: movl 28(%4), %%edx\n" 228 "51: movl 28(%4), %%edx\n"
228 " movl %%eax, 24(%3)\n" 229 " movl %%eax, 24(%3)\n"
229 " movl %%edx, 28(%3)\n" 230 " movl %%edx, 28(%3)\n"
230 "11: movl 32(%4), %%eax\n" 231 "11: movl 32(%4), %%eax\n"
231 "61: movl 36(%4), %%edx\n" 232 "61: movl 36(%4), %%edx\n"
232 " movl %%eax, 32(%3)\n" 233 " movl %%eax, 32(%3)\n"
233 " movl %%edx, 36(%3)\n" 234 " movl %%edx, 36(%3)\n"
234 "12: movl 40(%4), %%eax\n" 235 "12: movl 40(%4), %%eax\n"
235 "71: movl 44(%4), %%edx\n" 236 "71: movl 44(%4), %%edx\n"
236 " movl %%eax, 40(%3)\n" 237 " movl %%eax, 40(%3)\n"
237 " movl %%edx, 44(%3)\n" 238 " movl %%edx, 44(%3)\n"
238 "13: movl 48(%4), %%eax\n" 239 "13: movl 48(%4), %%eax\n"
239 "81: movl 52(%4), %%edx\n" 240 "81: movl 52(%4), %%edx\n"
240 " movl %%eax, 48(%3)\n" 241 " movl %%eax, 48(%3)\n"
241 " movl %%edx, 52(%3)\n" 242 " movl %%edx, 52(%3)\n"
242 "14: movl 56(%4), %%eax\n" 243 "14: movl 56(%4), %%eax\n"
243 "91: movl 60(%4), %%edx\n" 244 "91: movl 60(%4), %%edx\n"
244 " movl %%eax, 56(%3)\n" 245 " movl %%eax, 56(%3)\n"
245 " movl %%edx, 60(%3)\n" 246 " movl %%edx, 60(%3)\n"
246 " addl $-64, %0\n" 247 " addl $-64, %0\n"
247 " addl $64, %4\n" 248 " addl $64, %4\n"
248 " addl $64, %3\n" 249 " addl $64, %3\n"
249 " cmpl $63, %0\n" 250 " cmpl $63, %0\n"
250 " ja 0b\n" 251 " ja 0b\n"
251 "5: movl %0, %%eax\n" 252 "5: movl %0, %%eax\n"
252 " shrl $2, %0\n" 253 " shrl $2, %0\n"
253 " andl $3, %%eax\n" 254 " andl $3, %%eax\n"
254 " cld\n" 255 " cld\n"
255 "6: rep; movsl\n" 256 "6: rep; movsl\n"
256 " movl %%eax,%0\n" 257 " movl %%eax,%0\n"
257 "7: rep; movsb\n" 258 "7: rep; movsb\n"
258 "8:\n" 259 "8:\n"
259 ".section .fixup,\"ax\"\n" 260 ".section .fixup,\"ax\"\n"
260 "9: lea 0(%%eax,%0,4),%0\n" 261 "9: lea 0(%%eax,%0,4),%0\n"
261 "16: pushl %0\n" 262 "16: pushl %0\n"
262 " pushl %%eax\n" 263 " pushl %%eax\n"
263 " xorl %%eax,%%eax\n" 264 " xorl %%eax,%%eax\n"
264 " rep; stosb\n" 265 " rep; stosb\n"
265 " popl %%eax\n" 266 " popl %%eax\n"
266 " popl %0\n" 267 " popl %0\n"
267 " jmp 8b\n" 268 " jmp 8b\n"
268 ".previous\n" 269 ".previous\n"
269 _ASM_EXTABLE(0b,16b) 270 _ASM_EXTABLE(0b,16b)
270 _ASM_EXTABLE(1b,16b) 271 _ASM_EXTABLE(1b,16b)
271 _ASM_EXTABLE(2b,16b) 272 _ASM_EXTABLE(2b,16b)
272 _ASM_EXTABLE(21b,16b) 273 _ASM_EXTABLE(21b,16b)
273 _ASM_EXTABLE(3b,16b) 274 _ASM_EXTABLE(3b,16b)
274 _ASM_EXTABLE(31b,16b) 275 _ASM_EXTABLE(31b,16b)
275 _ASM_EXTABLE(4b,16b) 276 _ASM_EXTABLE(4b,16b)
276 _ASM_EXTABLE(41b,16b) 277 _ASM_EXTABLE(41b,16b)
277 _ASM_EXTABLE(10b,16b) 278 _ASM_EXTABLE(10b,16b)
278 _ASM_EXTABLE(51b,16b) 279 _ASM_EXTABLE(51b,16b)
279 _ASM_EXTABLE(11b,16b) 280 _ASM_EXTABLE(11b,16b)
280 _ASM_EXTABLE(61b,16b) 281 _ASM_EXTABLE(61b,16b)
281 _ASM_EXTABLE(12b,16b) 282 _ASM_EXTABLE(12b,16b)
282 _ASM_EXTABLE(71b,16b) 283 _ASM_EXTABLE(71b,16b)
283 _ASM_EXTABLE(13b,16b) 284 _ASM_EXTABLE(13b,16b)
284 _ASM_EXTABLE(81b,16b) 285 _ASM_EXTABLE(81b,16b)
285 _ASM_EXTABLE(14b,16b) 286 _ASM_EXTABLE(14b,16b)
286 _ASM_EXTABLE(91b,16b) 287 _ASM_EXTABLE(91b,16b)
287 _ASM_EXTABLE(6b,9b) 288 _ASM_EXTABLE(6b,9b)
288 _ASM_EXTABLE(7b,16b) 289 _ASM_EXTABLE(7b,16b)
289 : "=&c"(size), "=&D" (d0), "=&S" (d1) 290 : "=&c"(size), "=&D" (d0), "=&S" (d1)
290 : "1"(to), "2"(from), "0"(size) 291 : "1"(to), "2"(from), "0"(size)
291 : "eax", "edx", "memory"); 292 : "eax", "edx", "memory");
292 return size; 293 return size;
293 } 294 }
294 295
295 /* 296 /*
296 * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware. 297 * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware.
297 * hyoshiok@miraclelinux.com 298 * hyoshiok@miraclelinux.com
298 */ 299 */
299 300
300 static unsigned long __copy_user_zeroing_intel_nocache(void *to, 301 static unsigned long __copy_user_zeroing_intel_nocache(void *to,
301 const void __user *from, unsigned long size) 302 const void __user *from, unsigned long size)
302 { 303 {
303 int d0, d1; 304 int d0, d1;
304 305
305 __asm__ __volatile__( 306 __asm__ __volatile__(
306 " .align 2,0x90\n" 307 " .align 2,0x90\n"
307 "0: movl 32(%4), %%eax\n" 308 "0: movl 32(%4), %%eax\n"
308 " cmpl $67, %0\n" 309 " cmpl $67, %0\n"
309 " jbe 2f\n" 310 " jbe 2f\n"
310 "1: movl 64(%4), %%eax\n" 311 "1: movl 64(%4), %%eax\n"
311 " .align 2,0x90\n" 312 " .align 2,0x90\n"
312 "2: movl 0(%4), %%eax\n" 313 "2: movl 0(%4), %%eax\n"
313 "21: movl 4(%4), %%edx\n" 314 "21: movl 4(%4), %%edx\n"
314 " movnti %%eax, 0(%3)\n" 315 " movnti %%eax, 0(%3)\n"
315 " movnti %%edx, 4(%3)\n" 316 " movnti %%edx, 4(%3)\n"
316 "3: movl 8(%4), %%eax\n" 317 "3: movl 8(%4), %%eax\n"
317 "31: movl 12(%4),%%edx\n" 318 "31: movl 12(%4),%%edx\n"
318 " movnti %%eax, 8(%3)\n" 319 " movnti %%eax, 8(%3)\n"
319 " movnti %%edx, 12(%3)\n" 320 " movnti %%edx, 12(%3)\n"
320 "4: movl 16(%4), %%eax\n" 321 "4: movl 16(%4), %%eax\n"
321 "41: movl 20(%4), %%edx\n" 322 "41: movl 20(%4), %%edx\n"
322 " movnti %%eax, 16(%3)\n" 323 " movnti %%eax, 16(%3)\n"
323 " movnti %%edx, 20(%3)\n" 324 " movnti %%edx, 20(%3)\n"
324 "10: movl 24(%4), %%eax\n" 325 "10: movl 24(%4), %%eax\n"
325 "51: movl 28(%4), %%edx\n" 326 "51: movl 28(%4), %%edx\n"
326 " movnti %%eax, 24(%3)\n" 327 " movnti %%eax, 24(%3)\n"
327 " movnti %%edx, 28(%3)\n" 328 " movnti %%edx, 28(%3)\n"
328 "11: movl 32(%4), %%eax\n" 329 "11: movl 32(%4), %%eax\n"
329 "61: movl 36(%4), %%edx\n" 330 "61: movl 36(%4), %%edx\n"
330 " movnti %%eax, 32(%3)\n" 331 " movnti %%eax, 32(%3)\n"
331 " movnti %%edx, 36(%3)\n" 332 " movnti %%edx, 36(%3)\n"
332 "12: movl 40(%4), %%eax\n" 333 "12: movl 40(%4), %%eax\n"
333 "71: movl 44(%4), %%edx\n" 334 "71: movl 44(%4), %%edx\n"
334 " movnti %%eax, 40(%3)\n" 335 " movnti %%eax, 40(%3)\n"
335 " movnti %%edx, 44(%3)\n" 336 " movnti %%edx, 44(%3)\n"
336 "13: movl 48(%4), %%eax\n" 337 "13: movl 48(%4), %%eax\n"
337 "81: movl 52(%4), %%edx\n" 338 "81: movl 52(%4), %%edx\n"
338 " movnti %%eax, 48(%3)\n" 339 " movnti %%eax, 48(%3)\n"
339 " movnti %%edx, 52(%3)\n" 340 " movnti %%edx, 52(%3)\n"
340 "14: movl 56(%4), %%eax\n" 341 "14: movl 56(%4), %%eax\n"
341 "91: movl 60(%4), %%edx\n" 342 "91: movl 60(%4), %%edx\n"
342 " movnti %%eax, 56(%3)\n" 343 " movnti %%eax, 56(%3)\n"
343 " movnti %%edx, 60(%3)\n" 344 " movnti %%edx, 60(%3)\n"
344 " addl $-64, %0\n" 345 " addl $-64, %0\n"
345 " addl $64, %4\n" 346 " addl $64, %4\n"
346 " addl $64, %3\n" 347 " addl $64, %3\n"
347 " cmpl $63, %0\n" 348 " cmpl $63, %0\n"
348 " ja 0b\n" 349 " ja 0b\n"
349 " sfence \n" 350 " sfence \n"
350 "5: movl %0, %%eax\n" 351 "5: movl %0, %%eax\n"
351 " shrl $2, %0\n" 352 " shrl $2, %0\n"
352 " andl $3, %%eax\n" 353 " andl $3, %%eax\n"
353 " cld\n" 354 " cld\n"
354 "6: rep; movsl\n" 355 "6: rep; movsl\n"
355 " movl %%eax,%0\n" 356 " movl %%eax,%0\n"
356 "7: rep; movsb\n" 357 "7: rep; movsb\n"
357 "8:\n" 358 "8:\n"
358 ".section .fixup,\"ax\"\n" 359 ".section .fixup,\"ax\"\n"
359 "9: lea 0(%%eax,%0,4),%0\n" 360 "9: lea 0(%%eax,%0,4),%0\n"
360 "16: pushl %0\n" 361 "16: pushl %0\n"
361 " pushl %%eax\n" 362 " pushl %%eax\n"
362 " xorl %%eax,%%eax\n" 363 " xorl %%eax,%%eax\n"
363 " rep; stosb\n" 364 " rep; stosb\n"
364 " popl %%eax\n" 365 " popl %%eax\n"
365 " popl %0\n" 366 " popl %0\n"
366 " jmp 8b\n" 367 " jmp 8b\n"
367 ".previous\n" 368 ".previous\n"
368 _ASM_EXTABLE(0b,16b) 369 _ASM_EXTABLE(0b,16b)
369 _ASM_EXTABLE(1b,16b) 370 _ASM_EXTABLE(1b,16b)
370 _ASM_EXTABLE(2b,16b) 371 _ASM_EXTABLE(2b,16b)
371 _ASM_EXTABLE(21b,16b) 372 _ASM_EXTABLE(21b,16b)
372 _ASM_EXTABLE(3b,16b) 373 _ASM_EXTABLE(3b,16b)
373 _ASM_EXTABLE(31b,16b) 374 _ASM_EXTABLE(31b,16b)
374 _ASM_EXTABLE(4b,16b) 375 _ASM_EXTABLE(4b,16b)
375 _ASM_EXTABLE(41b,16b) 376 _ASM_EXTABLE(41b,16b)
376 _ASM_EXTABLE(10b,16b) 377 _ASM_EXTABLE(10b,16b)
377 _ASM_EXTABLE(51b,16b) 378 _ASM_EXTABLE(51b,16b)
378 _ASM_EXTABLE(11b,16b) 379 _ASM_EXTABLE(11b,16b)
379 _ASM_EXTABLE(61b,16b) 380 _ASM_EXTABLE(61b,16b)
380 _ASM_EXTABLE(12b,16b) 381 _ASM_EXTABLE(12b,16b)
381 _ASM_EXTABLE(71b,16b) 382 _ASM_EXTABLE(71b,16b)
382 _ASM_EXTABLE(13b,16b) 383 _ASM_EXTABLE(13b,16b)
383 _ASM_EXTABLE(81b,16b) 384 _ASM_EXTABLE(81b,16b)
384 _ASM_EXTABLE(14b,16b) 385 _ASM_EXTABLE(14b,16b)
385 _ASM_EXTABLE(91b,16b) 386 _ASM_EXTABLE(91b,16b)
386 _ASM_EXTABLE(6b,9b) 387 _ASM_EXTABLE(6b,9b)
387 _ASM_EXTABLE(7b,16b) 388 _ASM_EXTABLE(7b,16b)
388 : "=&c"(size), "=&D" (d0), "=&S" (d1) 389 : "=&c"(size), "=&D" (d0), "=&S" (d1)
389 : "1"(to), "2"(from), "0"(size) 390 : "1"(to), "2"(from), "0"(size)
390 : "eax", "edx", "memory"); 391 : "eax", "edx", "memory");
391 return size; 392 return size;
392 } 393 }
393 394
394 static unsigned long __copy_user_intel_nocache(void *to, 395 static unsigned long __copy_user_intel_nocache(void *to,
395 const void __user *from, unsigned long size) 396 const void __user *from, unsigned long size)
396 { 397 {
397 int d0, d1; 398 int d0, d1;
398 399
399 __asm__ __volatile__( 400 __asm__ __volatile__(
400 " .align 2,0x90\n" 401 " .align 2,0x90\n"
401 "0: movl 32(%4), %%eax\n" 402 "0: movl 32(%4), %%eax\n"
402 " cmpl $67, %0\n" 403 " cmpl $67, %0\n"
403 " jbe 2f\n" 404 " jbe 2f\n"
404 "1: movl 64(%4), %%eax\n" 405 "1: movl 64(%4), %%eax\n"
405 " .align 2,0x90\n" 406 " .align 2,0x90\n"
406 "2: movl 0(%4), %%eax\n" 407 "2: movl 0(%4), %%eax\n"
407 "21: movl 4(%4), %%edx\n" 408 "21: movl 4(%4), %%edx\n"
408 " movnti %%eax, 0(%3)\n" 409 " movnti %%eax, 0(%3)\n"
409 " movnti %%edx, 4(%3)\n" 410 " movnti %%edx, 4(%3)\n"
410 "3: movl 8(%4), %%eax\n" 411 "3: movl 8(%4), %%eax\n"
411 "31: movl 12(%4),%%edx\n" 412 "31: movl 12(%4),%%edx\n"
412 " movnti %%eax, 8(%3)\n" 413 " movnti %%eax, 8(%3)\n"
413 " movnti %%edx, 12(%3)\n" 414 " movnti %%edx, 12(%3)\n"
414 "4: movl 16(%4), %%eax\n" 415 "4: movl 16(%4), %%eax\n"
415 "41: movl 20(%4), %%edx\n" 416 "41: movl 20(%4), %%edx\n"
416 " movnti %%eax, 16(%3)\n" 417 " movnti %%eax, 16(%3)\n"
417 " movnti %%edx, 20(%3)\n" 418 " movnti %%edx, 20(%3)\n"
418 "10: movl 24(%4), %%eax\n" 419 "10: movl 24(%4), %%eax\n"
419 "51: movl 28(%4), %%edx\n" 420 "51: movl 28(%4), %%edx\n"
420 " movnti %%eax, 24(%3)\n" 421 " movnti %%eax, 24(%3)\n"
421 " movnti %%edx, 28(%3)\n" 422 " movnti %%edx, 28(%3)\n"
422 "11: movl 32(%4), %%eax\n" 423 "11: movl 32(%4), %%eax\n"
423 "61: movl 36(%4), %%edx\n" 424 "61: movl 36(%4), %%edx\n"
424 " movnti %%eax, 32(%3)\n" 425 " movnti %%eax, 32(%3)\n"
425 " movnti %%edx, 36(%3)\n" 426 " movnti %%edx, 36(%3)\n"
426 "12: movl 40(%4), %%eax\n" 427 "12: movl 40(%4), %%eax\n"
427 "71: movl 44(%4), %%edx\n" 428 "71: movl 44(%4), %%edx\n"
428 " movnti %%eax, 40(%3)\n" 429 " movnti %%eax, 40(%3)\n"
429 " movnti %%edx, 44(%3)\n" 430 " movnti %%edx, 44(%3)\n"
430 "13: movl 48(%4), %%eax\n" 431 "13: movl 48(%4), %%eax\n"
431 "81: movl 52(%4), %%edx\n" 432 "81: movl 52(%4), %%edx\n"
432 " movnti %%eax, 48(%3)\n" 433 " movnti %%eax, 48(%3)\n"
433 " movnti %%edx, 52(%3)\n" 434 " movnti %%edx, 52(%3)\n"
434 "14: movl 56(%4), %%eax\n" 435 "14: movl 56(%4), %%eax\n"
435 "91: movl 60(%4), %%edx\n" 436 "91: movl 60(%4), %%edx\n"
436 " movnti %%eax, 56(%3)\n" 437 " movnti %%eax, 56(%3)\n"
437 " movnti %%edx, 60(%3)\n" 438 " movnti %%edx, 60(%3)\n"
438 " addl $-64, %0\n" 439 " addl $-64, %0\n"
439 " addl $64, %4\n" 440 " addl $64, %4\n"
440 " addl $64, %3\n" 441 " addl $64, %3\n"
441 " cmpl $63, %0\n" 442 " cmpl $63, %0\n"
442 " ja 0b\n" 443 " ja 0b\n"
443 " sfence \n" 444 " sfence \n"
444 "5: movl %0, %%eax\n" 445 "5: movl %0, %%eax\n"
445 " shrl $2, %0\n" 446 " shrl $2, %0\n"
446 " andl $3, %%eax\n" 447 " andl $3, %%eax\n"
447 " cld\n" 448 " cld\n"
448 "6: rep; movsl\n" 449 "6: rep; movsl\n"
449 " movl %%eax,%0\n" 450 " movl %%eax,%0\n"
450 "7: rep; movsb\n" 451 "7: rep; movsb\n"
451 "8:\n" 452 "8:\n"
452 ".section .fixup,\"ax\"\n" 453 ".section .fixup,\"ax\"\n"
453 "9: lea 0(%%eax,%0,4),%0\n" 454 "9: lea 0(%%eax,%0,4),%0\n"
454 "16: jmp 8b\n" 455 "16: jmp 8b\n"
455 ".previous\n" 456 ".previous\n"
456 _ASM_EXTABLE(0b,16b) 457 _ASM_EXTABLE(0b,16b)
457 _ASM_EXTABLE(1b,16b) 458 _ASM_EXTABLE(1b,16b)
458 _ASM_EXTABLE(2b,16b) 459 _ASM_EXTABLE(2b,16b)
459 _ASM_EXTABLE(21b,16b) 460 _ASM_EXTABLE(21b,16b)
460 _ASM_EXTABLE(3b,16b) 461 _ASM_EXTABLE(3b,16b)
461 _ASM_EXTABLE(31b,16b) 462 _ASM_EXTABLE(31b,16b)
462 _ASM_EXTABLE(4b,16b) 463 _ASM_EXTABLE(4b,16b)
463 _ASM_EXTABLE(41b,16b) 464 _ASM_EXTABLE(41b,16b)
464 _ASM_EXTABLE(10b,16b) 465 _ASM_EXTABLE(10b,16b)
465 _ASM_EXTABLE(51b,16b) 466 _ASM_EXTABLE(51b,16b)
466 _ASM_EXTABLE(11b,16b) 467 _ASM_EXTABLE(11b,16b)
467 _ASM_EXTABLE(61b,16b) 468 _ASM_EXTABLE(61b,16b)
468 _ASM_EXTABLE(12b,16b) 469 _ASM_EXTABLE(12b,16b)
469 _ASM_EXTABLE(71b,16b) 470 _ASM_EXTABLE(71b,16b)
470 _ASM_EXTABLE(13b,16b) 471 _ASM_EXTABLE(13b,16b)
471 _ASM_EXTABLE(81b,16b) 472 _ASM_EXTABLE(81b,16b)
472 _ASM_EXTABLE(14b,16b) 473 _ASM_EXTABLE(14b,16b)
473 _ASM_EXTABLE(91b,16b) 474 _ASM_EXTABLE(91b,16b)
474 _ASM_EXTABLE(6b,9b) 475 _ASM_EXTABLE(6b,9b)
475 _ASM_EXTABLE(7b,16b) 476 _ASM_EXTABLE(7b,16b)
476 : "=&c"(size), "=&D" (d0), "=&S" (d1) 477 : "=&c"(size), "=&D" (d0), "=&S" (d1)
477 : "1"(to), "2"(from), "0"(size) 478 : "1"(to), "2"(from), "0"(size)
478 : "eax", "edx", "memory"); 479 : "eax", "edx", "memory");
479 return size; 480 return size;
480 } 481 }
481 482
482 #else 483 #else
483 484
484 /* 485 /*
485 * Leave these declared but undefined. They should not be any references to 486 * Leave these declared but undefined. They should not be any references to
486 * them 487 * them
487 */ 488 */
488 unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, 489 unsigned long __copy_user_zeroing_intel(void *to, const void __user *from,
489 unsigned long size); 490 unsigned long size);
490 unsigned long __copy_user_intel(void __user *to, const void *from, 491 unsigned long __copy_user_intel(void __user *to, const void *from,
491 unsigned long size); 492 unsigned long size);
492 unsigned long __copy_user_zeroing_intel_nocache(void *to, 493 unsigned long __copy_user_zeroing_intel_nocache(void *to,
493 const void __user *from, unsigned long size); 494 const void __user *from, unsigned long size);
494 #endif /* CONFIG_X86_INTEL_USERCOPY */ 495 #endif /* CONFIG_X86_INTEL_USERCOPY */
495 496
496 /* Generic arbitrary sized copy. */ 497 /* Generic arbitrary sized copy. */
497 #define __copy_user(to, from, size) \ 498 #define __copy_user(to, from, size) \
498 do { \ 499 do { \
499 int __d0, __d1, __d2; \ 500 int __d0, __d1, __d2; \
500 __asm__ __volatile__( \ 501 __asm__ __volatile__( \
501 " cmp $7,%0\n" \ 502 " cmp $7,%0\n" \
502 " jbe 1f\n" \ 503 " jbe 1f\n" \
503 " movl %1,%0\n" \ 504 " movl %1,%0\n" \
504 " negl %0\n" \ 505 " negl %0\n" \
505 " andl $7,%0\n" \ 506 " andl $7,%0\n" \
506 " subl %0,%3\n" \ 507 " subl %0,%3\n" \
507 "4: rep; movsb\n" \ 508 "4: rep; movsb\n" \
508 " movl %3,%0\n" \ 509 " movl %3,%0\n" \
509 " shrl $2,%0\n" \ 510 " shrl $2,%0\n" \
510 " andl $3,%3\n" \ 511 " andl $3,%3\n" \
511 " .align 2,0x90\n" \ 512 " .align 2,0x90\n" \
512 "0: rep; movsl\n" \ 513 "0: rep; movsl\n" \
513 " movl %3,%0\n" \ 514 " movl %3,%0\n" \
514 "1: rep; movsb\n" \ 515 "1: rep; movsb\n" \
515 "2:\n" \ 516 "2:\n" \
516 ".section .fixup,\"ax\"\n" \ 517 ".section .fixup,\"ax\"\n" \
517 "5: addl %3,%0\n" \ 518 "5: addl %3,%0\n" \
518 " jmp 2b\n" \ 519 " jmp 2b\n" \
519 "3: lea 0(%3,%0,4),%0\n" \ 520 "3: lea 0(%3,%0,4),%0\n" \
520 " jmp 2b\n" \ 521 " jmp 2b\n" \
521 ".previous\n" \ 522 ".previous\n" \
522 _ASM_EXTABLE(4b,5b) \ 523 _ASM_EXTABLE(4b,5b) \
523 _ASM_EXTABLE(0b,3b) \ 524 _ASM_EXTABLE(0b,3b) \
524 _ASM_EXTABLE(1b,2b) \ 525 _ASM_EXTABLE(1b,2b) \
525 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ 526 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
526 : "3"(size), "0"(size), "1"(to), "2"(from) \ 527 : "3"(size), "0"(size), "1"(to), "2"(from) \
527 : "memory"); \ 528 : "memory"); \
528 } while (0) 529 } while (0)
529 530
530 #define __copy_user_zeroing(to, from, size) \ 531 #define __copy_user_zeroing(to, from, size) \
531 do { \ 532 do { \
532 int __d0, __d1, __d2; \ 533 int __d0, __d1, __d2; \
533 __asm__ __volatile__( \ 534 __asm__ __volatile__( \
534 " cmp $7,%0\n" \ 535 " cmp $7,%0\n" \
535 " jbe 1f\n" \ 536 " jbe 1f\n" \
536 " movl %1,%0\n" \ 537 " movl %1,%0\n" \
537 " negl %0\n" \ 538 " negl %0\n" \
538 " andl $7,%0\n" \ 539 " andl $7,%0\n" \
539 " subl %0,%3\n" \ 540 " subl %0,%3\n" \
540 "4: rep; movsb\n" \ 541 "4: rep; movsb\n" \
541 " movl %3,%0\n" \ 542 " movl %3,%0\n" \
542 " shrl $2,%0\n" \ 543 " shrl $2,%0\n" \
543 " andl $3,%3\n" \ 544 " andl $3,%3\n" \
544 " .align 2,0x90\n" \ 545 " .align 2,0x90\n" \
545 "0: rep; movsl\n" \ 546 "0: rep; movsl\n" \
546 " movl %3,%0\n" \ 547 " movl %3,%0\n" \
547 "1: rep; movsb\n" \ 548 "1: rep; movsb\n" \
548 "2:\n" \ 549 "2:\n" \
549 ".section .fixup,\"ax\"\n" \ 550 ".section .fixup,\"ax\"\n" \
550 "5: addl %3,%0\n" \ 551 "5: addl %3,%0\n" \
551 " jmp 6f\n" \ 552 " jmp 6f\n" \
552 "3: lea 0(%3,%0,4),%0\n" \ 553 "3: lea 0(%3,%0,4),%0\n" \
553 "6: pushl %0\n" \ 554 "6: pushl %0\n" \
554 " pushl %%eax\n" \ 555 " pushl %%eax\n" \
555 " xorl %%eax,%%eax\n" \ 556 " xorl %%eax,%%eax\n" \
556 " rep; stosb\n" \ 557 " rep; stosb\n" \
557 " popl %%eax\n" \ 558 " popl %%eax\n" \
558 " popl %0\n" \ 559 " popl %0\n" \
559 " jmp 2b\n" \ 560 " jmp 2b\n" \
560 ".previous\n" \ 561 ".previous\n" \
561 _ASM_EXTABLE(4b,5b) \ 562 _ASM_EXTABLE(4b,5b) \
562 _ASM_EXTABLE(0b,3b) \ 563 _ASM_EXTABLE(0b,3b) \
563 _ASM_EXTABLE(1b,6b) \ 564 _ASM_EXTABLE(1b,6b) \
564 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ 565 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
565 : "3"(size), "0"(size), "1"(to), "2"(from) \ 566 : "3"(size), "0"(size), "1"(to), "2"(from) \
566 : "memory"); \ 567 : "memory"); \
567 } while (0) 568 } while (0)
568 569
569 unsigned long __copy_to_user_ll(void __user *to, const void *from, 570 unsigned long __copy_to_user_ll(void __user *to, const void *from,
570 unsigned long n) 571 unsigned long n)
571 { 572 {
572 #ifndef CONFIG_X86_WP_WORKS_OK 573 #ifndef CONFIG_X86_WP_WORKS_OK
573 if (unlikely(boot_cpu_data.wp_works_ok == 0) && 574 if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
574 ((unsigned long)to) < TASK_SIZE) { 575 ((unsigned long)to) < TASK_SIZE) {
575 /* 576 /*
576 * When we are in an atomic section (see 577 * When we are in an atomic section (see
577 * mm/filemap.c:file_read_actor), return the full 578 * mm/filemap.c:file_read_actor), return the full
578 * length to take the slow path. 579 * length to take the slow path.
579 */ 580 */
580 if (in_atomic()) 581 if (in_atomic())
581 return n; 582 return n;
582 583
583 /* 584 /*
584 * CPU does not honor the WP bit when writing 585 * CPU does not honor the WP bit when writing
585 * from supervisory mode, and due to preemption or SMP, 586 * from supervisory mode, and due to preemption or SMP,
586 * the page tables can change at any time. 587 * the page tables can change at any time.
587 * Do it manually. Manfred <manfred@colorfullife.com> 588 * Do it manually. Manfred <manfred@colorfullife.com>
588 */ 589 */
589 while (n) { 590 while (n) {
590 unsigned long offset = ((unsigned long)to)%PAGE_SIZE; 591 unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
591 unsigned long len = PAGE_SIZE - offset; 592 unsigned long len = PAGE_SIZE - offset;
592 int retval; 593 int retval;
593 struct page *pg; 594 struct page *pg;
594 void *maddr; 595 void *maddr;
595 596
596 if (len > n) 597 if (len > n)
597 len = n; 598 len = n;
598 599
599 survive: 600 survive:
600 down_read(&current->mm->mmap_sem); 601 down_read(&current->mm->mmap_sem);
601 retval = get_user_pages(current, current->mm, 602 retval = get_user_pages(current, current->mm,
602 (unsigned long)to, 1, 1, 0, &pg, NULL); 603 (unsigned long)to, 1, 1, 0, &pg, NULL);
603 604
604 if (retval == -ENOMEM && is_global_init(current)) { 605 if (retval == -ENOMEM && is_global_init(current)) {
605 up_read(&current->mm->mmap_sem); 606 up_read(&current->mm->mmap_sem);
606 congestion_wait(BLK_RW_ASYNC, HZ/50); 607 congestion_wait(BLK_RW_ASYNC, HZ/50);
607 goto survive; 608 goto survive;
608 } 609 }
609 610
610 if (retval != 1) { 611 if (retval != 1) {
611 up_read(&current->mm->mmap_sem); 612 up_read(&current->mm->mmap_sem);
612 break; 613 break;
613 } 614 }
614 615
615 maddr = kmap_atomic(pg); 616 maddr = kmap_atomic(pg);
616 memcpy(maddr + offset, from, len); 617 memcpy(maddr + offset, from, len);
617 kunmap_atomic(maddr); 618 kunmap_atomic(maddr);
618 set_page_dirty_lock(pg); 619 set_page_dirty_lock(pg);
619 put_page(pg); 620 put_page(pg);
620 up_read(&current->mm->mmap_sem); 621 up_read(&current->mm->mmap_sem);
621 622
622 from += len; 623 from += len;
623 to += len; 624 to += len;
624 n -= len; 625 n -= len;
625 } 626 }
626 return n; 627 return n;
627 } 628 }
628 #endif 629 #endif
630 stac();
629 if (movsl_is_ok(to, from, n)) 631 if (movsl_is_ok(to, from, n))
630 __copy_user(to, from, n); 632 __copy_user(to, from, n);
631 else 633 else
632 n = __copy_user_intel(to, from, n); 634 n = __copy_user_intel(to, from, n);
635 clac();
633 return n; 636 return n;
634 } 637 }
635 EXPORT_SYMBOL(__copy_to_user_ll); 638 EXPORT_SYMBOL(__copy_to_user_ll);
636 639
637 unsigned long __copy_from_user_ll(void *to, const void __user *from, 640 unsigned long __copy_from_user_ll(void *to, const void __user *from,
638 unsigned long n) 641 unsigned long n)
639 { 642 {
643 stac();
640 if (movsl_is_ok(to, from, n)) 644 if (movsl_is_ok(to, from, n))
641 __copy_user_zeroing(to, from, n); 645 __copy_user_zeroing(to, from, n);
642 else 646 else
643 n = __copy_user_zeroing_intel(to, from, n); 647 n = __copy_user_zeroing_intel(to, from, n);
648 clac();
644 return n; 649 return n;
645 } 650 }
646 EXPORT_SYMBOL(__copy_from_user_ll); 651 EXPORT_SYMBOL(__copy_from_user_ll);
647 652
648 unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, 653 unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
649 unsigned long n) 654 unsigned long n)
650 { 655 {
656 stac();
651 if (movsl_is_ok(to, from, n)) 657 if (movsl_is_ok(to, from, n))
652 __copy_user(to, from, n); 658 __copy_user(to, from, n);
653 else 659 else
654 n = __copy_user_intel((void __user *)to, 660 n = __copy_user_intel((void __user *)to,
655 (const void *)from, n); 661 (const void *)from, n);
662 clac();
656 return n; 663 return n;
657 } 664 }
658 EXPORT_SYMBOL(__copy_from_user_ll_nozero); 665 EXPORT_SYMBOL(__copy_from_user_ll_nozero);
659 666
660 unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, 667 unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
661 unsigned long n) 668 unsigned long n)
662 { 669 {
670 stac();
663 #ifdef CONFIG_X86_INTEL_USERCOPY 671 #ifdef CONFIG_X86_INTEL_USERCOPY
664 if (n > 64 && cpu_has_xmm2) 672 if (n > 64 && cpu_has_xmm2)
665 n = __copy_user_zeroing_intel_nocache(to, from, n); 673 n = __copy_user_zeroing_intel_nocache(to, from, n);
666 else 674 else
667 __copy_user_zeroing(to, from, n); 675 __copy_user_zeroing(to, from, n);
668 #else 676 #else
669 __copy_user_zeroing(to, from, n); 677 __copy_user_zeroing(to, from, n);
670 #endif 678 #endif
679 clac();
671 return n; 680 return n;
672 } 681 }
673 EXPORT_SYMBOL(__copy_from_user_ll_nocache); 682 EXPORT_SYMBOL(__copy_from_user_ll_nocache);
674 683
675 unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, 684 unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
676 unsigned long n) 685 unsigned long n)
677 { 686 {
687 stac();
678 #ifdef CONFIG_X86_INTEL_USERCOPY 688 #ifdef CONFIG_X86_INTEL_USERCOPY
679 if (n > 64 && cpu_has_xmm2) 689 if (n > 64 && cpu_has_xmm2)
680 n = __copy_user_intel_nocache(to, from, n); 690 n = __copy_user_intel_nocache(to, from, n);
681 else 691 else
682 __copy_user(to, from, n); 692 __copy_user(to, from, n);
683 #else 693 #else
684 __copy_user(to, from, n); 694 __copy_user(to, from, n);
685 #endif 695 #endif
696 clac();
686 return n; 697 return n;
687 } 698 }
688 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); 699 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
689 700
690 /** 701 /**
691 * copy_to_user: - Copy a block of data into user space. 702 * copy_to_user: - Copy a block of data into user space.
692 * @to: Destination address, in user space. 703 * @to: Destination address, in user space.
693 * @from: Source address, in kernel space. 704 * @from: Source address, in kernel space.
694 * @n: Number of bytes to copy. 705 * @n: Number of bytes to copy.
695 * 706 *
696 * Context: User context only. This function may sleep. 707 * Context: User context only. This function may sleep.
697 * 708 *
698 * Copy data from kernel space to user space. 709 * Copy data from kernel space to user space.
699 * 710 *
700 * Returns number of bytes that could not be copied. 711 * Returns number of bytes that could not be copied.
701 * On success, this will be zero. 712 * On success, this will be zero.
702 */ 713 */
703 unsigned long 714 unsigned long
704 copy_to_user(void __user *to, const void *from, unsigned long n) 715 copy_to_user(void __user *to, const void *from, unsigned long n)
705 { 716 {
706 if (access_ok(VERIFY_WRITE, to, n)) 717 if (access_ok(VERIFY_WRITE, to, n))
707 n = __copy_to_user(to, from, n); 718 n = __copy_to_user(to, from, n);
708 return n; 719 return n;
709 } 720 }
710 EXPORT_SYMBOL(copy_to_user); 721 EXPORT_SYMBOL(copy_to_user);
711 722
712 /** 723 /**
713 * copy_from_user: - Copy a block of data from user space. 724 * copy_from_user: - Copy a block of data from user space.
714 * @to: Destination address, in kernel space. 725 * @to: Destination address, in kernel space.
715 * @from: Source address, in user space. 726 * @from: Source address, in user space.
716 * @n: Number of bytes to copy. 727 * @n: Number of bytes to copy.
717 * 728 *
718 * Context: User context only. This function may sleep. 729 * Context: User context only. This function may sleep.
719 * 730 *
720 * Copy data from user space to kernel space. 731 * Copy data from user space to kernel space.
721 * 732 *
722 * Returns number of bytes that could not be copied. 733 * Returns number of bytes that could not be copied.
723 * On success, this will be zero. 734 * On success, this will be zero.
724 * 735 *
725 * If some data could not be copied, this function will pad the copied 736 * If some data could not be copied, this function will pad the copied
726 * data to the requested size using zero bytes. 737 * data to the requested size using zero bytes.
727 */ 738 */
728 unsigned long 739 unsigned long
729 _copy_from_user(void *to, const void __user *from, unsigned long n) 740 _copy_from_user(void *to, const void __user *from, unsigned long n)
730 { 741 {
731 if (access_ok(VERIFY_READ, from, n)) 742 if (access_ok(VERIFY_READ, from, n))
732 n = __copy_from_user(to, from, n); 743 n = __copy_from_user(to, from, n);
733 else 744 else
734 memset(to, 0, n); 745 memset(to, 0, n);
735 return n; 746 return n;
736 } 747 }
737 EXPORT_SYMBOL(_copy_from_user); 748 EXPORT_SYMBOL(_copy_from_user);
738 749
739 void copy_from_user_overflow(void) 750 void copy_from_user_overflow(void)
740 { 751 {
741 WARN(1, "Buffer overflow detected!\n"); 752 WARN(1, "Buffer overflow detected!\n");
742 } 753 }
743 EXPORT_SYMBOL(copy_from_user_overflow); 754 EXPORT_SYMBOL(copy_from_user_overflow);
744 755
arch/x86/lib/usercopy_64.c
1 /* 1 /*
2 * User address space access functions. 2 * User address space access functions.
3 * 3 *
4 * Copyright 1997 Andi Kleen <ak@muc.de> 4 * Copyright 1997 Andi Kleen <ak@muc.de>
5 * Copyright 1997 Linus Torvalds 5 * Copyright 1997 Linus Torvalds
6 * Copyright 2002 Andi Kleen <ak@suse.de> 6 * Copyright 2002 Andi Kleen <ak@suse.de>
7 */ 7 */
8 #include <linux/module.h> 8 #include <linux/module.h>
9 #include <asm/uaccess.h> 9 #include <asm/uaccess.h>
10 10
11 /* 11 /*
12 * Zero Userspace 12 * Zero Userspace
13 */ 13 */
14 14
15 unsigned long __clear_user(void __user *addr, unsigned long size) 15 unsigned long __clear_user(void __user *addr, unsigned long size)
16 { 16 {
17 long __d0; 17 long __d0;
18 might_fault(); 18 might_fault();
19 /* no memory constraint because it doesn't change any memory gcc knows 19 /* no memory constraint because it doesn't change any memory gcc knows
20 about */ 20 about */
21 stac();
21 asm volatile( 22 asm volatile(
22 " testq %[size8],%[size8]\n" 23 " testq %[size8],%[size8]\n"
23 " jz 4f\n" 24 " jz 4f\n"
24 "0: movq %[zero],(%[dst])\n" 25 "0: movq %[zero],(%[dst])\n"
25 " addq %[eight],%[dst]\n" 26 " addq %[eight],%[dst]\n"
26 " decl %%ecx ; jnz 0b\n" 27 " decl %%ecx ; jnz 0b\n"
27 "4: movq %[size1],%%rcx\n" 28 "4: movq %[size1],%%rcx\n"
28 " testl %%ecx,%%ecx\n" 29 " testl %%ecx,%%ecx\n"
29 " jz 2f\n" 30 " jz 2f\n"
30 "1: movb %b[zero],(%[dst])\n" 31 "1: movb %b[zero],(%[dst])\n"
31 " incq %[dst]\n" 32 " incq %[dst]\n"
32 " decl %%ecx ; jnz 1b\n" 33 " decl %%ecx ; jnz 1b\n"
33 "2:\n" 34 "2:\n"
34 ".section .fixup,\"ax\"\n" 35 ".section .fixup,\"ax\"\n"
35 "3: lea 0(%[size1],%[size8],8),%[size8]\n" 36 "3: lea 0(%[size1],%[size8],8),%[size8]\n"
36 " jmp 2b\n" 37 " jmp 2b\n"
37 ".previous\n" 38 ".previous\n"
38 _ASM_EXTABLE(0b,3b) 39 _ASM_EXTABLE(0b,3b)
39 _ASM_EXTABLE(1b,2b) 40 _ASM_EXTABLE(1b,2b)
40 : [size8] "=&c"(size), [dst] "=&D" (__d0) 41 : [size8] "=&c"(size), [dst] "=&D" (__d0)
41 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), 42 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
42 [zero] "r" (0UL), [eight] "r" (8UL)); 43 [zero] "r" (0UL), [eight] "r" (8UL));
44 clac();
43 return size; 45 return size;
44 } 46 }
45 EXPORT_SYMBOL(__clear_user); 47 EXPORT_SYMBOL(__clear_user);
46 48
47 unsigned long clear_user(void __user *to, unsigned long n) 49 unsigned long clear_user(void __user *to, unsigned long n)
48 { 50 {
49 if (access_ok(VERIFY_WRITE, to, n)) 51 if (access_ok(VERIFY_WRITE, to, n))
50 return __clear_user(to, n); 52 return __clear_user(to, n);
51 return n; 53 return n;
52 } 54 }
53 EXPORT_SYMBOL(clear_user); 55 EXPORT_SYMBOL(clear_user);
54 56
55 unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) 57 unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
56 { 58 {
57 if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { 59 if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) {
58 return copy_user_generic((__force void *)to, (__force void *)from, len); 60 return copy_user_generic((__force void *)to, (__force void *)from, len);
59 } 61 }
60 return len; 62 return len;
61 } 63 }
62 EXPORT_SYMBOL(copy_in_user); 64 EXPORT_SYMBOL(copy_in_user);
63 65
64 /* 66 /*
65 * Try to copy last bytes and clear the rest if needed. 67 * Try to copy last bytes and clear the rest if needed.
66 * Since protection fault in copy_from/to_user is not a normal situation, 68 * Since protection fault in copy_from/to_user is not a normal situation,
67 * it is not necessary to optimize tail handling. 69 * it is not necessary to optimize tail handling.
68 */ 70 */
69 unsigned long 71 unsigned long
70 copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) 72 copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
71 { 73 {
72 char c; 74 char c;
73 unsigned zero_len; 75 unsigned zero_len;
74 76
75 for (; len; --len) { 77 for (; len; --len) {
76 if (__get_user_nocheck(c, from++, sizeof(char))) 78 if (__get_user_nocheck(c, from++, sizeof(char)))
77 break; 79 break;
78 if (__put_user_nocheck(c, to++, sizeof(char))) 80 if (__put_user_nocheck(c, to++, sizeof(char)))
79 break; 81 break;
80 } 82 }
81 83
82 for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) 84 for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
83 if (__put_user_nocheck(c, to++, sizeof(char))) 85 if (__put_user_nocheck(c, to++, sizeof(char)))
84 break; 86 break;
87 clac();
85 return len; 88 return len;
86 } 89 }
87 90