Commit 63bcff2a307b9bcc712a8251eb27df8b2e117967
1 parent
a052858fab
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
x86, smap: Add STAC and CLAC instructions to control user space access
When Supervisor Mode Access Prevention (SMAP) is enabled, access to userspace from the kernel is controlled by the AC flag. To make the performance of manipulating that flag acceptable, there are two new instructions, STAC and CLAC, to set and clear it. This patch adds those instructions, via alternative(), when the SMAP feature is enabled. It also adds X86_EFLAGS_AC unconditionally to the SYSCALL entry mask; there is simply no reason to make that one conditional. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> Link: http://lkml.kernel.org/r/1348256595-29119-9-git-send-email-hpa@linux.intel.com
Showing 14 changed files with 106 additions and 32 deletions Inline Diff
- arch/x86/ia32/ia32entry.S
- arch/x86/include/asm/fpu-internal.h
- arch/x86/include/asm/futex.h
- arch/x86/include/asm/smap.h
- arch/x86/include/asm/uaccess.h
- arch/x86/include/asm/xsave.h
- arch/x86/kernel/cpu/common.c
- arch/x86/kernel/entry_64.S
- arch/x86/lib/copy_user_64.S
- arch/x86/lib/copy_user_nocache_64.S
- arch/x86/lib/getuser.S
- arch/x86/lib/putuser.S
- arch/x86/lib/usercopy_32.c
- arch/x86/lib/usercopy_64.c
arch/x86/ia32/ia32entry.S
1 | /* | 1 | /* |
2 | * Compatibility mode system call entry point for x86-64. | 2 | * Compatibility mode system call entry point for x86-64. |
3 | * | 3 | * |
4 | * Copyright 2000-2002 Andi Kleen, SuSE Labs. | 4 | * Copyright 2000-2002 Andi Kleen, SuSE Labs. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <asm/dwarf2.h> | 7 | #include <asm/dwarf2.h> |
8 | #include <asm/calling.h> | 8 | #include <asm/calling.h> |
9 | #include <asm/asm-offsets.h> | 9 | #include <asm/asm-offsets.h> |
10 | #include <asm/current.h> | 10 | #include <asm/current.h> |
11 | #include <asm/errno.h> | 11 | #include <asm/errno.h> |
12 | #include <asm/ia32_unistd.h> | 12 | #include <asm/ia32_unistd.h> |
13 | #include <asm/thread_info.h> | 13 | #include <asm/thread_info.h> |
14 | #include <asm/segment.h> | 14 | #include <asm/segment.h> |
15 | #include <asm/irqflags.h> | 15 | #include <asm/irqflags.h> |
16 | #include <asm/asm.h> | 16 | #include <asm/asm.h> |
17 | #include <asm/smap.h> | ||
17 | #include <linux/linkage.h> | 18 | #include <linux/linkage.h> |
18 | #include <linux/err.h> | 19 | #include <linux/err.h> |
19 | 20 | ||
20 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 21 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
21 | #include <linux/elf-em.h> | 22 | #include <linux/elf-em.h> |
22 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | 23 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) |
23 | #define __AUDIT_ARCH_LE 0x40000000 | 24 | #define __AUDIT_ARCH_LE 0x40000000 |
24 | 25 | ||
25 | #ifndef CONFIG_AUDITSYSCALL | 26 | #ifndef CONFIG_AUDITSYSCALL |
26 | #define sysexit_audit ia32_ret_from_sys_call | 27 | #define sysexit_audit ia32_ret_from_sys_call |
27 | #define sysretl_audit ia32_ret_from_sys_call | 28 | #define sysretl_audit ia32_ret_from_sys_call |
28 | #endif | 29 | #endif |
29 | 30 | ||
30 | .section .entry.text, "ax" | 31 | .section .entry.text, "ax" |
31 | 32 | ||
32 | .macro IA32_ARG_FIXUP noebp=0 | 33 | .macro IA32_ARG_FIXUP noebp=0 |
33 | movl %edi,%r8d | 34 | movl %edi,%r8d |
34 | .if \noebp | 35 | .if \noebp |
35 | .else | 36 | .else |
36 | movl %ebp,%r9d | 37 | movl %ebp,%r9d |
37 | .endif | 38 | .endif |
38 | xchg %ecx,%esi | 39 | xchg %ecx,%esi |
39 | movl %ebx,%edi | 40 | movl %ebx,%edi |
40 | movl %edx,%edx /* zero extension */ | 41 | movl %edx,%edx /* zero extension */ |
41 | .endm | 42 | .endm |
42 | 43 | ||
43 | /* clobbers %eax */ | 44 | /* clobbers %eax */ |
44 | .macro CLEAR_RREGS offset=0, _r9=rax | 45 | .macro CLEAR_RREGS offset=0, _r9=rax |
45 | xorl %eax,%eax | 46 | xorl %eax,%eax |
46 | movq %rax,\offset+R11(%rsp) | 47 | movq %rax,\offset+R11(%rsp) |
47 | movq %rax,\offset+R10(%rsp) | 48 | movq %rax,\offset+R10(%rsp) |
48 | movq %\_r9,\offset+R9(%rsp) | 49 | movq %\_r9,\offset+R9(%rsp) |
49 | movq %rax,\offset+R8(%rsp) | 50 | movq %rax,\offset+R8(%rsp) |
50 | .endm | 51 | .endm |
51 | 52 | ||
52 | /* | 53 | /* |
53 | * Reload arg registers from stack in case ptrace changed them. | 54 | * Reload arg registers from stack in case ptrace changed them. |
54 | * We don't reload %eax because syscall_trace_enter() returned | 55 | * We don't reload %eax because syscall_trace_enter() returned |
55 | * the %rax value we should see. Instead, we just truncate that | 56 | * the %rax value we should see. Instead, we just truncate that |
56 | * value to 32 bits again as we did on entry from user mode. | 57 | * value to 32 bits again as we did on entry from user mode. |
57 | * If it's a new value set by user_regset during entry tracing, | 58 | * If it's a new value set by user_regset during entry tracing, |
58 | * this matches the normal truncation of the user-mode value. | 59 | * this matches the normal truncation of the user-mode value. |
59 | * If it's -1 to make us punt the syscall, then (u32)-1 is still | 60 | * If it's -1 to make us punt the syscall, then (u32)-1 is still |
60 | * an appropriately invalid value. | 61 | * an appropriately invalid value. |
61 | */ | 62 | */ |
62 | .macro LOAD_ARGS32 offset, _r9=0 | 63 | .macro LOAD_ARGS32 offset, _r9=0 |
63 | .if \_r9 | 64 | .if \_r9 |
64 | movl \offset+16(%rsp),%r9d | 65 | movl \offset+16(%rsp),%r9d |
65 | .endif | 66 | .endif |
66 | movl \offset+40(%rsp),%ecx | 67 | movl \offset+40(%rsp),%ecx |
67 | movl \offset+48(%rsp),%edx | 68 | movl \offset+48(%rsp),%edx |
68 | movl \offset+56(%rsp),%esi | 69 | movl \offset+56(%rsp),%esi |
69 | movl \offset+64(%rsp),%edi | 70 | movl \offset+64(%rsp),%edi |
70 | movl %eax,%eax /* zero extension */ | 71 | movl %eax,%eax /* zero extension */ |
71 | .endm | 72 | .endm |
72 | 73 | ||
73 | .macro CFI_STARTPROC32 simple | 74 | .macro CFI_STARTPROC32 simple |
74 | CFI_STARTPROC \simple | 75 | CFI_STARTPROC \simple |
75 | CFI_UNDEFINED r8 | 76 | CFI_UNDEFINED r8 |
76 | CFI_UNDEFINED r9 | 77 | CFI_UNDEFINED r9 |
77 | CFI_UNDEFINED r10 | 78 | CFI_UNDEFINED r10 |
78 | CFI_UNDEFINED r11 | 79 | CFI_UNDEFINED r11 |
79 | CFI_UNDEFINED r12 | 80 | CFI_UNDEFINED r12 |
80 | CFI_UNDEFINED r13 | 81 | CFI_UNDEFINED r13 |
81 | CFI_UNDEFINED r14 | 82 | CFI_UNDEFINED r14 |
82 | CFI_UNDEFINED r15 | 83 | CFI_UNDEFINED r15 |
83 | .endm | 84 | .endm |
84 | 85 | ||
85 | #ifdef CONFIG_PARAVIRT | 86 | #ifdef CONFIG_PARAVIRT |
86 | ENTRY(native_usergs_sysret32) | 87 | ENTRY(native_usergs_sysret32) |
87 | swapgs | 88 | swapgs |
88 | sysretl | 89 | sysretl |
89 | ENDPROC(native_usergs_sysret32) | 90 | ENDPROC(native_usergs_sysret32) |
90 | 91 | ||
91 | ENTRY(native_irq_enable_sysexit) | 92 | ENTRY(native_irq_enable_sysexit) |
92 | swapgs | 93 | swapgs |
93 | sti | 94 | sti |
94 | sysexit | 95 | sysexit |
95 | ENDPROC(native_irq_enable_sysexit) | 96 | ENDPROC(native_irq_enable_sysexit) |
96 | #endif | 97 | #endif |
97 | 98 | ||
98 | /* | 99 | /* |
99 | * 32bit SYSENTER instruction entry. | 100 | * 32bit SYSENTER instruction entry. |
100 | * | 101 | * |
101 | * Arguments: | 102 | * Arguments: |
102 | * %eax System call number. | 103 | * %eax System call number. |
103 | * %ebx Arg1 | 104 | * %ebx Arg1 |
104 | * %ecx Arg2 | 105 | * %ecx Arg2 |
105 | * %edx Arg3 | 106 | * %edx Arg3 |
106 | * %esi Arg4 | 107 | * %esi Arg4 |
107 | * %edi Arg5 | 108 | * %edi Arg5 |
108 | * %ebp user stack | 109 | * %ebp user stack |
109 | * 0(%ebp) Arg6 | 110 | * 0(%ebp) Arg6 |
110 | * | 111 | * |
111 | * Interrupts off. | 112 | * Interrupts off. |
112 | * | 113 | * |
113 | * This is purely a fast path. For anything complicated we use the int 0x80 | 114 | * This is purely a fast path. For anything complicated we use the int 0x80 |
114 | * path below. Set up a complete hardware stack frame to share code | 115 | * path below. Set up a complete hardware stack frame to share code |
115 | * with the int 0x80 path. | 116 | * with the int 0x80 path. |
116 | */ | 117 | */ |
117 | ENTRY(ia32_sysenter_target) | 118 | ENTRY(ia32_sysenter_target) |
118 | CFI_STARTPROC32 simple | 119 | CFI_STARTPROC32 simple |
119 | CFI_SIGNAL_FRAME | 120 | CFI_SIGNAL_FRAME |
120 | CFI_DEF_CFA rsp,0 | 121 | CFI_DEF_CFA rsp,0 |
121 | CFI_REGISTER rsp,rbp | 122 | CFI_REGISTER rsp,rbp |
122 | SWAPGS_UNSAFE_STACK | 123 | SWAPGS_UNSAFE_STACK |
123 | movq PER_CPU_VAR(kernel_stack), %rsp | 124 | movq PER_CPU_VAR(kernel_stack), %rsp |
124 | addq $(KERNEL_STACK_OFFSET),%rsp | 125 | addq $(KERNEL_STACK_OFFSET),%rsp |
125 | /* | 126 | /* |
126 | * No need to follow this irqs on/off section: the syscall | 127 | * No need to follow this irqs on/off section: the syscall |
127 | * disabled irqs, here we enable it straight after entry: | 128 | * disabled irqs, here we enable it straight after entry: |
128 | */ | 129 | */ |
129 | ENABLE_INTERRUPTS(CLBR_NONE) | 130 | ENABLE_INTERRUPTS(CLBR_NONE) |
130 | movl %ebp,%ebp /* zero extension */ | 131 | movl %ebp,%ebp /* zero extension */ |
131 | pushq_cfi $__USER32_DS | 132 | pushq_cfi $__USER32_DS |
132 | /*CFI_REL_OFFSET ss,0*/ | 133 | /*CFI_REL_OFFSET ss,0*/ |
133 | pushq_cfi %rbp | 134 | pushq_cfi %rbp |
134 | CFI_REL_OFFSET rsp,0 | 135 | CFI_REL_OFFSET rsp,0 |
135 | pushfq_cfi | 136 | pushfq_cfi |
136 | /*CFI_REL_OFFSET rflags,0*/ | 137 | /*CFI_REL_OFFSET rflags,0*/ |
137 | movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d | 138 | movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d |
138 | CFI_REGISTER rip,r10 | 139 | CFI_REGISTER rip,r10 |
139 | pushq_cfi $__USER32_CS | 140 | pushq_cfi $__USER32_CS |
140 | /*CFI_REL_OFFSET cs,0*/ | 141 | /*CFI_REL_OFFSET cs,0*/ |
141 | movl %eax, %eax | 142 | movl %eax, %eax |
142 | pushq_cfi %r10 | 143 | pushq_cfi %r10 |
143 | CFI_REL_OFFSET rip,0 | 144 | CFI_REL_OFFSET rip,0 |
144 | pushq_cfi %rax | 145 | pushq_cfi %rax |
145 | cld | 146 | cld |
146 | SAVE_ARGS 0,1,0 | 147 | SAVE_ARGS 0,1,0 |
147 | /* no need to do an access_ok check here because rbp has been | 148 | /* no need to do an access_ok check here because rbp has been |
148 | 32bit zero extended */ | 149 | 32bit zero extended */ |
150 | ASM_STAC | ||
149 | 1: movl (%rbp),%ebp | 151 | 1: movl (%rbp),%ebp |
150 | _ASM_EXTABLE(1b,ia32_badarg) | 152 | _ASM_EXTABLE(1b,ia32_badarg) |
153 | ASM_CLAC | ||
151 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 154 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
152 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 155 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
153 | CFI_REMEMBER_STATE | 156 | CFI_REMEMBER_STATE |
154 | jnz sysenter_tracesys | 157 | jnz sysenter_tracesys |
155 | cmpq $(IA32_NR_syscalls-1),%rax | 158 | cmpq $(IA32_NR_syscalls-1),%rax |
156 | ja ia32_badsys | 159 | ja ia32_badsys |
157 | sysenter_do_call: | 160 | sysenter_do_call: |
158 | IA32_ARG_FIXUP | 161 | IA32_ARG_FIXUP |
159 | sysenter_dispatch: | 162 | sysenter_dispatch: |
160 | call *ia32_sys_call_table(,%rax,8) | 163 | call *ia32_sys_call_table(,%rax,8) |
161 | movq %rax,RAX-ARGOFFSET(%rsp) | 164 | movq %rax,RAX-ARGOFFSET(%rsp) |
162 | DISABLE_INTERRUPTS(CLBR_NONE) | 165 | DISABLE_INTERRUPTS(CLBR_NONE) |
163 | TRACE_IRQS_OFF | 166 | TRACE_IRQS_OFF |
164 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 167 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
165 | jnz sysexit_audit | 168 | jnz sysexit_audit |
166 | sysexit_from_sys_call: | 169 | sysexit_from_sys_call: |
167 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 170 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
168 | /* clear IF, that popfq doesn't enable interrupts early */ | 171 | /* clear IF, that popfq doesn't enable interrupts early */ |
169 | andl $~0x200,EFLAGS-R11(%rsp) | 172 | andl $~0x200,EFLAGS-R11(%rsp) |
170 | movl RIP-R11(%rsp),%edx /* User %eip */ | 173 | movl RIP-R11(%rsp),%edx /* User %eip */ |
171 | CFI_REGISTER rip,rdx | 174 | CFI_REGISTER rip,rdx |
172 | RESTORE_ARGS 0,24,0,0,0,0 | 175 | RESTORE_ARGS 0,24,0,0,0,0 |
173 | xorq %r8,%r8 | 176 | xorq %r8,%r8 |
174 | xorq %r9,%r9 | 177 | xorq %r9,%r9 |
175 | xorq %r10,%r10 | 178 | xorq %r10,%r10 |
176 | xorq %r11,%r11 | 179 | xorq %r11,%r11 |
177 | popfq_cfi | 180 | popfq_cfi |
178 | /*CFI_RESTORE rflags*/ | 181 | /*CFI_RESTORE rflags*/ |
179 | popq_cfi %rcx /* User %esp */ | 182 | popq_cfi %rcx /* User %esp */ |
180 | CFI_REGISTER rsp,rcx | 183 | CFI_REGISTER rsp,rcx |
181 | TRACE_IRQS_ON | 184 | TRACE_IRQS_ON |
182 | ENABLE_INTERRUPTS_SYSEXIT32 | 185 | ENABLE_INTERRUPTS_SYSEXIT32 |
183 | 186 | ||
184 | #ifdef CONFIG_AUDITSYSCALL | 187 | #ifdef CONFIG_AUDITSYSCALL |
185 | .macro auditsys_entry_common | 188 | .macro auditsys_entry_common |
186 | movl %esi,%r9d /* 6th arg: 4th syscall arg */ | 189 | movl %esi,%r9d /* 6th arg: 4th syscall arg */ |
187 | movl %edx,%r8d /* 5th arg: 3rd syscall arg */ | 190 | movl %edx,%r8d /* 5th arg: 3rd syscall arg */ |
188 | /* (already in %ecx) 4th arg: 2nd syscall arg */ | 191 | /* (already in %ecx) 4th arg: 2nd syscall arg */ |
189 | movl %ebx,%edx /* 3rd arg: 1st syscall arg */ | 192 | movl %ebx,%edx /* 3rd arg: 1st syscall arg */ |
190 | movl %eax,%esi /* 2nd arg: syscall number */ | 193 | movl %eax,%esi /* 2nd arg: syscall number */ |
191 | movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ | 194 | movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ |
192 | call __audit_syscall_entry | 195 | call __audit_syscall_entry |
193 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ | 196 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ |
194 | cmpq $(IA32_NR_syscalls-1),%rax | 197 | cmpq $(IA32_NR_syscalls-1),%rax |
195 | ja ia32_badsys | 198 | ja ia32_badsys |
196 | movl %ebx,%edi /* reload 1st syscall arg */ | 199 | movl %ebx,%edi /* reload 1st syscall arg */ |
197 | movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ | 200 | movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ |
198 | movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ | 201 | movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ |
199 | movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ | 202 | movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ |
200 | movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ | 203 | movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ |
201 | .endm | 204 | .endm |
202 | 205 | ||
203 | .macro auditsys_exit exit | 206 | .macro auditsys_exit exit |
204 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 207 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
205 | jnz ia32_ret_from_sys_call | 208 | jnz ia32_ret_from_sys_call |
206 | TRACE_IRQS_ON | 209 | TRACE_IRQS_ON |
207 | sti | 210 | sti |
208 | movl %eax,%esi /* second arg, syscall return value */ | 211 | movl %eax,%esi /* second arg, syscall return value */ |
209 | cmpl $-MAX_ERRNO,%eax /* is it an error ? */ | 212 | cmpl $-MAX_ERRNO,%eax /* is it an error ? */ |
210 | jbe 1f | 213 | jbe 1f |
211 | movslq %eax, %rsi /* if error sign extend to 64 bits */ | 214 | movslq %eax, %rsi /* if error sign extend to 64 bits */ |
212 | 1: setbe %al /* 1 if error, 0 if not */ | 215 | 1: setbe %al /* 1 if error, 0 if not */ |
213 | movzbl %al,%edi /* zero-extend that into %edi */ | 216 | movzbl %al,%edi /* zero-extend that into %edi */ |
214 | call __audit_syscall_exit | 217 | call __audit_syscall_exit |
215 | movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ | 218 | movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ |
216 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 219 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
217 | cli | 220 | cli |
218 | TRACE_IRQS_OFF | 221 | TRACE_IRQS_OFF |
219 | testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 222 | testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
220 | jz \exit | 223 | jz \exit |
221 | CLEAR_RREGS -ARGOFFSET | 224 | CLEAR_RREGS -ARGOFFSET |
222 | jmp int_with_check | 225 | jmp int_with_check |
223 | .endm | 226 | .endm |
224 | 227 | ||
225 | sysenter_auditsys: | 228 | sysenter_auditsys: |
226 | CFI_RESTORE_STATE | 229 | CFI_RESTORE_STATE |
227 | auditsys_entry_common | 230 | auditsys_entry_common |
228 | movl %ebp,%r9d /* reload 6th syscall arg */ | 231 | movl %ebp,%r9d /* reload 6th syscall arg */ |
229 | jmp sysenter_dispatch | 232 | jmp sysenter_dispatch |
230 | 233 | ||
231 | sysexit_audit: | 234 | sysexit_audit: |
232 | auditsys_exit sysexit_from_sys_call | 235 | auditsys_exit sysexit_from_sys_call |
233 | #endif | 236 | #endif |
234 | 237 | ||
235 | sysenter_tracesys: | 238 | sysenter_tracesys: |
236 | #ifdef CONFIG_AUDITSYSCALL | 239 | #ifdef CONFIG_AUDITSYSCALL |
237 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 240 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
238 | jz sysenter_auditsys | 241 | jz sysenter_auditsys |
239 | #endif | 242 | #endif |
240 | SAVE_REST | 243 | SAVE_REST |
241 | CLEAR_RREGS | 244 | CLEAR_RREGS |
242 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ | 245 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ |
243 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 246 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
244 | call syscall_trace_enter | 247 | call syscall_trace_enter |
245 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 248 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ |
246 | RESTORE_REST | 249 | RESTORE_REST |
247 | cmpq $(IA32_NR_syscalls-1),%rax | 250 | cmpq $(IA32_NR_syscalls-1),%rax |
248 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ | 251 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ |
249 | jmp sysenter_do_call | 252 | jmp sysenter_do_call |
250 | CFI_ENDPROC | 253 | CFI_ENDPROC |
251 | ENDPROC(ia32_sysenter_target) | 254 | ENDPROC(ia32_sysenter_target) |
252 | 255 | ||
253 | /* | 256 | /* |
254 | * 32bit SYSCALL instruction entry. | 257 | * 32bit SYSCALL instruction entry. |
255 | * | 258 | * |
256 | * Arguments: | 259 | * Arguments: |
257 | * %eax System call number. | 260 | * %eax System call number. |
258 | * %ebx Arg1 | 261 | * %ebx Arg1 |
259 | * %ecx return EIP | 262 | * %ecx return EIP |
260 | * %edx Arg3 | 263 | * %edx Arg3 |
261 | * %esi Arg4 | 264 | * %esi Arg4 |
262 | * %edi Arg5 | 265 | * %edi Arg5 |
263 | * %ebp Arg2 [note: not saved in the stack frame, should not be touched] | 266 | * %ebp Arg2 [note: not saved in the stack frame, should not be touched] |
264 | * %esp user stack | 267 | * %esp user stack |
265 | * 0(%esp) Arg6 | 268 | * 0(%esp) Arg6 |
266 | * | 269 | * |
267 | * Interrupts off. | 270 | * Interrupts off. |
268 | * | 271 | * |
269 | * This is purely a fast path. For anything complicated we use the int 0x80 | 272 | * This is purely a fast path. For anything complicated we use the int 0x80 |
270 | * path below. Set up a complete hardware stack frame to share code | 273 | * path below. Set up a complete hardware stack frame to share code |
271 | * with the int 0x80 path. | 274 | * with the int 0x80 path. |
272 | */ | 275 | */ |
273 | ENTRY(ia32_cstar_target) | 276 | ENTRY(ia32_cstar_target) |
274 | CFI_STARTPROC32 simple | 277 | CFI_STARTPROC32 simple |
275 | CFI_SIGNAL_FRAME | 278 | CFI_SIGNAL_FRAME |
276 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET | 279 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET |
277 | CFI_REGISTER rip,rcx | 280 | CFI_REGISTER rip,rcx |
278 | /*CFI_REGISTER rflags,r11*/ | 281 | /*CFI_REGISTER rflags,r11*/ |
279 | SWAPGS_UNSAFE_STACK | 282 | SWAPGS_UNSAFE_STACK |
280 | movl %esp,%r8d | 283 | movl %esp,%r8d |
281 | CFI_REGISTER rsp,r8 | 284 | CFI_REGISTER rsp,r8 |
282 | movq PER_CPU_VAR(kernel_stack),%rsp | 285 | movq PER_CPU_VAR(kernel_stack),%rsp |
283 | /* | 286 | /* |
284 | * No need to follow this irqs on/off section: the syscall | 287 | * No need to follow this irqs on/off section: the syscall |
285 | * disabled irqs and here we enable it straight after entry: | 288 | * disabled irqs and here we enable it straight after entry: |
286 | */ | 289 | */ |
287 | ENABLE_INTERRUPTS(CLBR_NONE) | 290 | ENABLE_INTERRUPTS(CLBR_NONE) |
288 | SAVE_ARGS 8,0,0 | 291 | SAVE_ARGS 8,0,0 |
289 | movl %eax,%eax /* zero extension */ | 292 | movl %eax,%eax /* zero extension */ |
290 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 293 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
291 | movq %rcx,RIP-ARGOFFSET(%rsp) | 294 | movq %rcx,RIP-ARGOFFSET(%rsp) |
292 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 295 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
293 | movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ | 296 | movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ |
294 | movl %ebp,%ecx | 297 | movl %ebp,%ecx |
295 | movq $__USER32_CS,CS-ARGOFFSET(%rsp) | 298 | movq $__USER32_CS,CS-ARGOFFSET(%rsp) |
296 | movq $__USER32_DS,SS-ARGOFFSET(%rsp) | 299 | movq $__USER32_DS,SS-ARGOFFSET(%rsp) |
297 | movq %r11,EFLAGS-ARGOFFSET(%rsp) | 300 | movq %r11,EFLAGS-ARGOFFSET(%rsp) |
298 | /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | 301 | /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ |
299 | movq %r8,RSP-ARGOFFSET(%rsp) | 302 | movq %r8,RSP-ARGOFFSET(%rsp) |
300 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET | 303 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET |
301 | /* no need to do an access_ok check here because r8 has been | 304 | /* no need to do an access_ok check here because r8 has been |
302 | 32bit zero extended */ | 305 | 32bit zero extended */ |
303 | /* hardware stack frame is complete now */ | 306 | /* hardware stack frame is complete now */ |
307 | ASM_STAC | ||
304 | 1: movl (%r8),%r9d | 308 | 1: movl (%r8),%r9d |
305 | _ASM_EXTABLE(1b,ia32_badarg) | 309 | _ASM_EXTABLE(1b,ia32_badarg) |
310 | ASM_CLAC | ||
306 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 311 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
307 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 312 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
308 | CFI_REMEMBER_STATE | 313 | CFI_REMEMBER_STATE |
309 | jnz cstar_tracesys | 314 | jnz cstar_tracesys |
310 | cmpq $IA32_NR_syscalls-1,%rax | 315 | cmpq $IA32_NR_syscalls-1,%rax |
311 | ja ia32_badsys | 316 | ja ia32_badsys |
312 | cstar_do_call: | 317 | cstar_do_call: |
313 | IA32_ARG_FIXUP 1 | 318 | IA32_ARG_FIXUP 1 |
314 | cstar_dispatch: | 319 | cstar_dispatch: |
315 | call *ia32_sys_call_table(,%rax,8) | 320 | call *ia32_sys_call_table(,%rax,8) |
316 | movq %rax,RAX-ARGOFFSET(%rsp) | 321 | movq %rax,RAX-ARGOFFSET(%rsp) |
317 | DISABLE_INTERRUPTS(CLBR_NONE) | 322 | DISABLE_INTERRUPTS(CLBR_NONE) |
318 | TRACE_IRQS_OFF | 323 | TRACE_IRQS_OFF |
319 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 324 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
320 | jnz sysretl_audit | 325 | jnz sysretl_audit |
321 | sysretl_from_sys_call: | 326 | sysretl_from_sys_call: |
322 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 327 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
323 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 | 328 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 |
324 | movl RIP-ARGOFFSET(%rsp),%ecx | 329 | movl RIP-ARGOFFSET(%rsp),%ecx |
325 | CFI_REGISTER rip,rcx | 330 | CFI_REGISTER rip,rcx |
326 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 331 | movl EFLAGS-ARGOFFSET(%rsp),%r11d |
327 | /*CFI_REGISTER rflags,r11*/ | 332 | /*CFI_REGISTER rflags,r11*/ |
328 | xorq %r10,%r10 | 333 | xorq %r10,%r10 |
329 | xorq %r9,%r9 | 334 | xorq %r9,%r9 |
330 | xorq %r8,%r8 | 335 | xorq %r8,%r8 |
331 | TRACE_IRQS_ON | 336 | TRACE_IRQS_ON |
332 | movl RSP-ARGOFFSET(%rsp),%esp | 337 | movl RSP-ARGOFFSET(%rsp),%esp |
333 | CFI_RESTORE rsp | 338 | CFI_RESTORE rsp |
334 | USERGS_SYSRET32 | 339 | USERGS_SYSRET32 |
335 | 340 | ||
336 | #ifdef CONFIG_AUDITSYSCALL | 341 | #ifdef CONFIG_AUDITSYSCALL |
337 | cstar_auditsys: | 342 | cstar_auditsys: |
338 | CFI_RESTORE_STATE | 343 | CFI_RESTORE_STATE |
339 | movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ | 344 | movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ |
340 | auditsys_entry_common | 345 | auditsys_entry_common |
341 | movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ | 346 | movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ |
342 | jmp cstar_dispatch | 347 | jmp cstar_dispatch |
343 | 348 | ||
344 | sysretl_audit: | 349 | sysretl_audit: |
345 | auditsys_exit sysretl_from_sys_call | 350 | auditsys_exit sysretl_from_sys_call |
346 | #endif | 351 | #endif |
347 | 352 | ||
348 | cstar_tracesys: | 353 | cstar_tracesys: |
349 | #ifdef CONFIG_AUDITSYSCALL | 354 | #ifdef CONFIG_AUDITSYSCALL |
350 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 355 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
351 | jz cstar_auditsys | 356 | jz cstar_auditsys |
352 | #endif | 357 | #endif |
353 | xchgl %r9d,%ebp | 358 | xchgl %r9d,%ebp |
354 | SAVE_REST | 359 | SAVE_REST |
355 | CLEAR_RREGS 0, r9 | 360 | CLEAR_RREGS 0, r9 |
356 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 361 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
357 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 362 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
358 | call syscall_trace_enter | 363 | call syscall_trace_enter |
359 | LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ | 364 | LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ |
360 | RESTORE_REST | 365 | RESTORE_REST |
361 | xchgl %ebp,%r9d | 366 | xchgl %ebp,%r9d |
362 | cmpq $(IA32_NR_syscalls-1),%rax | 367 | cmpq $(IA32_NR_syscalls-1),%rax |
363 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ | 368 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ |
364 | jmp cstar_do_call | 369 | jmp cstar_do_call |
365 | END(ia32_cstar_target) | 370 | END(ia32_cstar_target) |
366 | 371 | ||
367 | ia32_badarg: | 372 | ia32_badarg: |
373 | ASM_CLAC | ||
368 | movq $-EFAULT,%rax | 374 | movq $-EFAULT,%rax |
369 | jmp ia32_sysret | 375 | jmp ia32_sysret |
370 | CFI_ENDPROC | 376 | CFI_ENDPROC |
371 | 377 | ||
372 | /* | 378 | /* |
373 | * Emulated IA32 system calls via int 0x80. | 379 | * Emulated IA32 system calls via int 0x80. |
374 | * | 380 | * |
375 | * Arguments: | 381 | * Arguments: |
376 | * %eax System call number. | 382 | * %eax System call number. |
377 | * %ebx Arg1 | 383 | * %ebx Arg1 |
378 | * %ecx Arg2 | 384 | * %ecx Arg2 |
379 | * %edx Arg3 | 385 | * %edx Arg3 |
380 | * %esi Arg4 | 386 | * %esi Arg4 |
381 | * %edi Arg5 | 387 | * %edi Arg5 |
382 | * %ebp Arg6 [note: not saved in the stack frame, should not be touched] | 388 | * %ebp Arg6 [note: not saved in the stack frame, should not be touched] |
383 | * | 389 | * |
384 | * Notes: | 390 | * Notes: |
385 | * Uses the same stack frame as the x86-64 version. | 391 | * Uses the same stack frame as the x86-64 version. |
386 | * All registers except %eax must be saved (but ptrace may violate that) | 392 | * All registers except %eax must be saved (but ptrace may violate that) |
387 | * Arguments are zero extended. For system calls that want sign extension and | 393 | * Arguments are zero extended. For system calls that want sign extension and |
388 | * take long arguments a wrapper is needed. Most calls can just be called | 394 | * take long arguments a wrapper is needed. Most calls can just be called |
389 | * directly. | 395 | * directly. |
390 | * Assumes it is only called from user space and entered with interrupts off. | 396 | * Assumes it is only called from user space and entered with interrupts off. |
391 | */ | 397 | */ |
392 | 398 | ||
393 | ENTRY(ia32_syscall) | 399 | ENTRY(ia32_syscall) |
394 | CFI_STARTPROC32 simple | 400 | CFI_STARTPROC32 simple |
395 | CFI_SIGNAL_FRAME | 401 | CFI_SIGNAL_FRAME |
396 | CFI_DEF_CFA rsp,SS+8-RIP | 402 | CFI_DEF_CFA rsp,SS+8-RIP |
397 | /*CFI_REL_OFFSET ss,SS-RIP*/ | 403 | /*CFI_REL_OFFSET ss,SS-RIP*/ |
398 | CFI_REL_OFFSET rsp,RSP-RIP | 404 | CFI_REL_OFFSET rsp,RSP-RIP |
399 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ | 405 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ |
400 | /*CFI_REL_OFFSET cs,CS-RIP*/ | 406 | /*CFI_REL_OFFSET cs,CS-RIP*/ |
401 | CFI_REL_OFFSET rip,RIP-RIP | 407 | CFI_REL_OFFSET rip,RIP-RIP |
402 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 408 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
403 | SWAPGS | 409 | SWAPGS |
404 | /* | 410 | /* |
405 | * No need to follow this irqs on/off section: the syscall | 411 | * No need to follow this irqs on/off section: the syscall |
406 | * disabled irqs and here we enable it straight after entry: | 412 | * disabled irqs and here we enable it straight after entry: |
407 | */ | 413 | */ |
408 | ENABLE_INTERRUPTS(CLBR_NONE) | 414 | ENABLE_INTERRUPTS(CLBR_NONE) |
409 | movl %eax,%eax | 415 | movl %eax,%eax |
410 | pushq_cfi %rax | 416 | pushq_cfi %rax |
411 | cld | 417 | cld |
412 | /* note the registers are not zero extended to the sf. | 418 | /* note the registers are not zero extended to the sf. |
413 | this could be a problem. */ | 419 | this could be a problem. */ |
414 | SAVE_ARGS 0,1,0 | 420 | SAVE_ARGS 0,1,0 |
415 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 421 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
416 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 422 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
417 | jnz ia32_tracesys | 423 | jnz ia32_tracesys |
418 | cmpq $(IA32_NR_syscalls-1),%rax | 424 | cmpq $(IA32_NR_syscalls-1),%rax |
419 | ja ia32_badsys | 425 | ja ia32_badsys |
420 | ia32_do_call: | 426 | ia32_do_call: |
421 | IA32_ARG_FIXUP | 427 | IA32_ARG_FIXUP |
422 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative | 428 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative |
423 | ia32_sysret: | 429 | ia32_sysret: |
424 | movq %rax,RAX-ARGOFFSET(%rsp) | 430 | movq %rax,RAX-ARGOFFSET(%rsp) |
425 | ia32_ret_from_sys_call: | 431 | ia32_ret_from_sys_call: |
426 | CLEAR_RREGS -ARGOFFSET | 432 | CLEAR_RREGS -ARGOFFSET |
427 | jmp int_ret_from_sys_call | 433 | jmp int_ret_from_sys_call |
428 | 434 | ||
429 | ia32_tracesys: | 435 | ia32_tracesys: |
430 | SAVE_REST | 436 | SAVE_REST |
431 | CLEAR_RREGS | 437 | CLEAR_RREGS |
432 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 438 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
433 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 439 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
434 | call syscall_trace_enter | 440 | call syscall_trace_enter |
435 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 441 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ |
436 | RESTORE_REST | 442 | RESTORE_REST |
437 | cmpq $(IA32_NR_syscalls-1),%rax | 443 | cmpq $(IA32_NR_syscalls-1),%rax |
438 | ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ | 444 | ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ |
439 | jmp ia32_do_call | 445 | jmp ia32_do_call |
440 | END(ia32_syscall) | 446 | END(ia32_syscall) |
441 | 447 | ||
442 | ia32_badsys: | 448 | ia32_badsys: |
443 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) | 449 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) |
444 | movq $-ENOSYS,%rax | 450 | movq $-ENOSYS,%rax |
445 | jmp ia32_sysret | 451 | jmp ia32_sysret |
446 | 452 | ||
447 | CFI_ENDPROC | 453 | CFI_ENDPROC |
448 | 454 | ||
449 | .macro PTREGSCALL label, func, arg | 455 | .macro PTREGSCALL label, func, arg |
450 | ALIGN | 456 | ALIGN |
451 | GLOBAL(\label) | 457 | GLOBAL(\label) |
452 | leaq \func(%rip),%rax | 458 | leaq \func(%rip),%rax |
453 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | 459 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ |
454 | jmp ia32_ptregs_common | 460 | jmp ia32_ptregs_common |
455 | .endm | 461 | .endm |
456 | 462 | ||
457 | CFI_STARTPROC32 | 463 | CFI_STARTPROC32 |
458 | 464 | ||
459 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi | 465 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi |
460 | PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi | 466 | PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi |
461 | PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx | 467 | PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx |
462 | PTREGSCALL stub32_execve, sys32_execve, %rcx | 468 | PTREGSCALL stub32_execve, sys32_execve, %rcx |
463 | PTREGSCALL stub32_fork, sys_fork, %rdi | 469 | PTREGSCALL stub32_fork, sys_fork, %rdi |
464 | PTREGSCALL stub32_clone, sys32_clone, %rdx | 470 | PTREGSCALL stub32_clone, sys32_clone, %rdx |
465 | PTREGSCALL stub32_vfork, sys_vfork, %rdi | 471 | PTREGSCALL stub32_vfork, sys_vfork, %rdi |
466 | PTREGSCALL stub32_iopl, sys_iopl, %rsi | 472 | PTREGSCALL stub32_iopl, sys_iopl, %rsi |
467 | 473 | ||
468 | ALIGN | 474 | ALIGN |
469 | ia32_ptregs_common: | 475 | ia32_ptregs_common: |
470 | popq %r11 | 476 | popq %r11 |
471 | CFI_ENDPROC | 477 | CFI_ENDPROC |
472 | CFI_STARTPROC32 simple | 478 | CFI_STARTPROC32 simple |
473 | CFI_SIGNAL_FRAME | 479 | CFI_SIGNAL_FRAME |
474 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET | 480 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET |
475 | CFI_REL_OFFSET rax,RAX-ARGOFFSET | 481 | CFI_REL_OFFSET rax,RAX-ARGOFFSET |
476 | CFI_REL_OFFSET rcx,RCX-ARGOFFSET | 482 | CFI_REL_OFFSET rcx,RCX-ARGOFFSET |
477 | CFI_REL_OFFSET rdx,RDX-ARGOFFSET | 483 | CFI_REL_OFFSET rdx,RDX-ARGOFFSET |
478 | CFI_REL_OFFSET rsi,RSI-ARGOFFSET | 484 | CFI_REL_OFFSET rsi,RSI-ARGOFFSET |
479 | CFI_REL_OFFSET rdi,RDI-ARGOFFSET | 485 | CFI_REL_OFFSET rdi,RDI-ARGOFFSET |
480 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 486 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
481 | /* CFI_REL_OFFSET cs,CS-ARGOFFSET*/ | 487 | /* CFI_REL_OFFSET cs,CS-ARGOFFSET*/ |
482 | /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | 488 | /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ |
483 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET | 489 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET |
484 | /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ | 490 | /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ |
485 | SAVE_REST | 491 | SAVE_REST |
486 | call *%rax | 492 | call *%rax |
487 | RESTORE_REST | 493 | RESTORE_REST |
488 | jmp ia32_sysret /* misbalances the return cache */ | 494 | jmp ia32_sysret /* misbalances the return cache */ |
489 | CFI_ENDPROC | 495 | CFI_ENDPROC |
490 | END(ia32_ptregs_common) | 496 | END(ia32_ptregs_common) |
491 | 497 |
arch/x86/include/asm/fpu-internal.h
1 | /* | 1 | /* |
2 | * Copyright (C) 1994 Linus Torvalds | 2 | * Copyright (C) 1994 Linus Torvalds |
3 | * | 3 | * |
4 | * Pentium III FXSR, SSE support | 4 | * Pentium III FXSR, SSE support |
5 | * General FPU state handling cleanups | 5 | * General FPU state handling cleanups |
6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 6 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
7 | * x86-64 work by Andi Kleen 2002 | 7 | * x86-64 work by Andi Kleen 2002 |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #ifndef _FPU_INTERNAL_H | 10 | #ifndef _FPU_INTERNAL_H |
11 | #define _FPU_INTERNAL_H | 11 | #define _FPU_INTERNAL_H |
12 | 12 | ||
13 | #include <linux/kernel_stat.h> | 13 | #include <linux/kernel_stat.h> |
14 | #include <linux/regset.h> | 14 | #include <linux/regset.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <asm/asm.h> | 16 | #include <asm/asm.h> |
17 | #include <asm/cpufeature.h> | 17 | #include <asm/cpufeature.h> |
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <asm/sigcontext.h> | 19 | #include <asm/sigcontext.h> |
20 | #include <asm/user.h> | 20 | #include <asm/user.h> |
21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
22 | #include <asm/xsave.h> | 22 | #include <asm/xsave.h> |
23 | 23 | ||
24 | extern unsigned int sig_xstate_size; | 24 | extern unsigned int sig_xstate_size; |
25 | extern void fpu_init(void); | 25 | extern void fpu_init(void); |
26 | 26 | ||
27 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | 27 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); |
28 | 28 | ||
29 | extern user_regset_active_fn fpregs_active, xfpregs_active; | 29 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
30 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, | 30 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, |
31 | xstateregs_get; | 31 | xstateregs_get; |
32 | extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, | 32 | extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, |
33 | xstateregs_set; | 33 | xstateregs_set; |
34 | 34 | ||
35 | 35 | ||
36 | /* | 36 | /* |
37 | * xstateregs_active == fpregs_active. Please refer to the comment | 37 | * xstateregs_active == fpregs_active. Please refer to the comment |
38 | * at the definition of fpregs_active. | 38 | * at the definition of fpregs_active. |
39 | */ | 39 | */ |
40 | #define xstateregs_active fpregs_active | 40 | #define xstateregs_active fpregs_active |
41 | 41 | ||
42 | extern struct _fpx_sw_bytes fx_sw_reserved; | 42 | extern struct _fpx_sw_bytes fx_sw_reserved; |
43 | #ifdef CONFIG_IA32_EMULATION | 43 | #ifdef CONFIG_IA32_EMULATION |
44 | extern unsigned int sig_xstate_ia32_size; | 44 | extern unsigned int sig_xstate_ia32_size; |
45 | extern struct _fpx_sw_bytes fx_sw_reserved_ia32; | 45 | extern struct _fpx_sw_bytes fx_sw_reserved_ia32; |
46 | struct _fpstate_ia32; | 46 | struct _fpstate_ia32; |
47 | struct _xstate_ia32; | 47 | struct _xstate_ia32; |
48 | extern int save_i387_xstate_ia32(void __user *buf); | 48 | extern int save_i387_xstate_ia32(void __user *buf); |
49 | extern int restore_i387_xstate_ia32(void __user *buf); | 49 | extern int restore_i387_xstate_ia32(void __user *buf); |
50 | #endif | 50 | #endif |
51 | 51 | ||
52 | #ifdef CONFIG_MATH_EMULATION | 52 | #ifdef CONFIG_MATH_EMULATION |
53 | extern void finit_soft_fpu(struct i387_soft_struct *soft); | 53 | extern void finit_soft_fpu(struct i387_soft_struct *soft); |
54 | #else | 54 | #else |
55 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} | 55 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 58 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
59 | 59 | ||
60 | static __always_inline __pure bool use_xsaveopt(void) | 60 | static __always_inline __pure bool use_xsaveopt(void) |
61 | { | 61 | { |
62 | return static_cpu_has(X86_FEATURE_XSAVEOPT); | 62 | return static_cpu_has(X86_FEATURE_XSAVEOPT); |
63 | } | 63 | } |
64 | 64 | ||
65 | static __always_inline __pure bool use_xsave(void) | 65 | static __always_inline __pure bool use_xsave(void) |
66 | { | 66 | { |
67 | return static_cpu_has(X86_FEATURE_XSAVE); | 67 | return static_cpu_has(X86_FEATURE_XSAVE); |
68 | } | 68 | } |
69 | 69 | ||
70 | static __always_inline __pure bool use_fxsr(void) | 70 | static __always_inline __pure bool use_fxsr(void) |
71 | { | 71 | { |
72 | return static_cpu_has(X86_FEATURE_FXSR); | 72 | return static_cpu_has(X86_FEATURE_FXSR); |
73 | } | 73 | } |
74 | 74 | ||
75 | extern void __sanitize_i387_state(struct task_struct *); | 75 | extern void __sanitize_i387_state(struct task_struct *); |
76 | 76 | ||
77 | static inline void sanitize_i387_state(struct task_struct *tsk) | 77 | static inline void sanitize_i387_state(struct task_struct *tsk) |
78 | { | 78 | { |
79 | if (!use_xsaveopt()) | 79 | if (!use_xsaveopt()) |
80 | return; | 80 | return; |
81 | __sanitize_i387_state(tsk); | 81 | __sanitize_i387_state(tsk); |
82 | } | 82 | } |
83 | 83 | ||
84 | #ifdef CONFIG_X86_64 | 84 | #ifdef CONFIG_X86_64 |
85 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | 85 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) |
86 | { | 86 | { |
87 | int err; | 87 | int err; |
88 | 88 | ||
89 | /* See comment in fxsave() below. */ | 89 | /* See comment in fxsave() below. */ |
90 | #ifdef CONFIG_AS_FXSAVEQ | 90 | #ifdef CONFIG_AS_FXSAVEQ |
91 | asm volatile("1: fxrstorq %[fx]\n\t" | 91 | asm volatile("1: fxrstorq %[fx]\n\t" |
92 | "2:\n" | 92 | "2:\n" |
93 | ".section .fixup,\"ax\"\n" | 93 | ".section .fixup,\"ax\"\n" |
94 | "3: movl $-1,%[err]\n" | 94 | "3: movl $-1,%[err]\n" |
95 | " jmp 2b\n" | 95 | " jmp 2b\n" |
96 | ".previous\n" | 96 | ".previous\n" |
97 | _ASM_EXTABLE(1b, 3b) | 97 | _ASM_EXTABLE(1b, 3b) |
98 | : [err] "=r" (err) | 98 | : [err] "=r" (err) |
99 | : [fx] "m" (*fx), "0" (0)); | 99 | : [fx] "m" (*fx), "0" (0)); |
100 | #else | 100 | #else |
101 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" | 101 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" |
102 | "2:\n" | 102 | "2:\n" |
103 | ".section .fixup,\"ax\"\n" | 103 | ".section .fixup,\"ax\"\n" |
104 | "3: movl $-1,%[err]\n" | 104 | "3: movl $-1,%[err]\n" |
105 | " jmp 2b\n" | 105 | " jmp 2b\n" |
106 | ".previous\n" | 106 | ".previous\n" |
107 | _ASM_EXTABLE(1b, 3b) | 107 | _ASM_EXTABLE(1b, 3b) |
108 | : [err] "=r" (err) | 108 | : [err] "=r" (err) |
109 | : [fx] "R" (fx), "m" (*fx), "0" (0)); | 109 | : [fx] "R" (fx), "m" (*fx), "0" (0)); |
110 | #endif | 110 | #endif |
111 | return err; | 111 | return err; |
112 | } | 112 | } |
113 | 113 | ||
114 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | 114 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) |
115 | { | 115 | { |
116 | int err; | 116 | int err; |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * Clear the bytes not touched by the fxsave and reserved | 119 | * Clear the bytes not touched by the fxsave and reserved |
120 | * for the SW usage. | 120 | * for the SW usage. |
121 | */ | 121 | */ |
122 | err = __clear_user(&fx->sw_reserved, | 122 | err = __clear_user(&fx->sw_reserved, |
123 | sizeof(struct _fpx_sw_bytes)); | 123 | sizeof(struct _fpx_sw_bytes)); |
124 | if (unlikely(err)) | 124 | if (unlikely(err)) |
125 | return -EFAULT; | 125 | return -EFAULT; |
126 | 126 | ||
127 | /* See comment in fxsave() below. */ | 127 | /* See comment in fxsave() below. */ |
128 | #ifdef CONFIG_AS_FXSAVEQ | 128 | #ifdef CONFIG_AS_FXSAVEQ |
129 | asm volatile("1: fxsaveq %[fx]\n\t" | 129 | asm volatile(ASM_STAC "\n" |
130 | "2:\n" | 130 | "1: fxsaveq %[fx]\n\t" |
131 | "2: " ASM_CLAC "\n" | ||
131 | ".section .fixup,\"ax\"\n" | 132 | ".section .fixup,\"ax\"\n" |
132 | "3: movl $-1,%[err]\n" | 133 | "3: movl $-1,%[err]\n" |
133 | " jmp 2b\n" | 134 | " jmp 2b\n" |
134 | ".previous\n" | 135 | ".previous\n" |
135 | _ASM_EXTABLE(1b, 3b) | 136 | _ASM_EXTABLE(1b, 3b) |
136 | : [err] "=r" (err), [fx] "=m" (*fx) | 137 | : [err] "=r" (err), [fx] "=m" (*fx) |
137 | : "0" (0)); | 138 | : "0" (0)); |
138 | #else | 139 | #else |
139 | asm volatile("1: rex64/fxsave (%[fx])\n\t" | 140 | asm volatile(ASM_STAC "\n" |
140 | "2:\n" | 141 | "1: rex64/fxsave (%[fx])\n\t" |
142 | "2: " ASM_CLAC "\n" | ||
141 | ".section .fixup,\"ax\"\n" | 143 | ".section .fixup,\"ax\"\n" |
142 | "3: movl $-1,%[err]\n" | 144 | "3: movl $-1,%[err]\n" |
143 | " jmp 2b\n" | 145 | " jmp 2b\n" |
144 | ".previous\n" | 146 | ".previous\n" |
145 | _ASM_EXTABLE(1b, 3b) | 147 | _ASM_EXTABLE(1b, 3b) |
146 | : [err] "=r" (err), "=m" (*fx) | 148 | : [err] "=r" (err), "=m" (*fx) |
147 | : [fx] "R" (fx), "0" (0)); | 149 | : [fx] "R" (fx), "0" (0)); |
148 | #endif | 150 | #endif |
149 | if (unlikely(err) && | 151 | if (unlikely(err) && |
150 | __clear_user(fx, sizeof(struct i387_fxsave_struct))) | 152 | __clear_user(fx, sizeof(struct i387_fxsave_struct))) |
151 | err = -EFAULT; | 153 | err = -EFAULT; |
152 | /* No need to clear here because the caller clears USED_MATH */ | 154 | /* No need to clear here because the caller clears USED_MATH */ |
153 | return err; | 155 | return err; |
154 | } | 156 | } |
155 | 157 | ||
156 | static inline void fpu_fxsave(struct fpu *fpu) | 158 | static inline void fpu_fxsave(struct fpu *fpu) |
157 | { | 159 | { |
158 | /* Using "rex64; fxsave %0" is broken because, if the memory operand | 160 | /* Using "rex64; fxsave %0" is broken because, if the memory operand |
159 | uses any extended registers for addressing, a second REX prefix | 161 | uses any extended registers for addressing, a second REX prefix |
160 | will be generated (to the assembler, rex64 followed by semicolon | 162 | will be generated (to the assembler, rex64 followed by semicolon |
161 | is a separate instruction), and hence the 64-bitness is lost. */ | 163 | is a separate instruction), and hence the 64-bitness is lost. */ |
162 | 164 | ||
163 | #ifdef CONFIG_AS_FXSAVEQ | 165 | #ifdef CONFIG_AS_FXSAVEQ |
164 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported | 166 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported |
165 | starting with gas 2.16. */ | 167 | starting with gas 2.16. */ |
166 | __asm__ __volatile__("fxsaveq %0" | 168 | __asm__ __volatile__("fxsaveq %0" |
167 | : "=m" (fpu->state->fxsave)); | 169 | : "=m" (fpu->state->fxsave)); |
168 | #else | 170 | #else |
169 | /* Using, as a workaround, the properly prefixed form below isn't | 171 | /* Using, as a workaround, the properly prefixed form below isn't |
170 | accepted by any binutils version so far released, complaining that | 172 | accepted by any binutils version so far released, complaining that |
171 | the same type of prefix is used twice if an extended register is | 173 | the same type of prefix is used twice if an extended register is |
172 | needed for addressing (fix submitted to mainline 2005-11-21). | 174 | needed for addressing (fix submitted to mainline 2005-11-21). |
173 | asm volatile("rex64/fxsave %0" | 175 | asm volatile("rex64/fxsave %0" |
174 | : "=m" (fpu->state->fxsave)); | 176 | : "=m" (fpu->state->fxsave)); |
175 | This, however, we can work around by forcing the compiler to select | 177 | This, however, we can work around by forcing the compiler to select |
176 | an addressing mode that doesn't require extended registers. */ | 178 | an addressing mode that doesn't require extended registers. */ |
177 | asm volatile("rex64/fxsave (%[fx])" | 179 | asm volatile("rex64/fxsave (%[fx])" |
178 | : "=m" (fpu->state->fxsave) | 180 | : "=m" (fpu->state->fxsave) |
179 | : [fx] "R" (&fpu->state->fxsave)); | 181 | : [fx] "R" (&fpu->state->fxsave)); |
180 | #endif | 182 | #endif |
181 | } | 183 | } |
182 | 184 | ||
183 | #else /* CONFIG_X86_32 */ | 185 | #else /* CONFIG_X86_32 */ |
184 | 186 | ||
185 | /* perform fxrstor iff the processor has extended states, otherwise frstor */ | 187 | /* perform fxrstor iff the processor has extended states, otherwise frstor */ |
186 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | 188 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) |
187 | { | 189 | { |
188 | /* | 190 | /* |
189 | * The "nop" is needed to make the instructions the same | 191 | * The "nop" is needed to make the instructions the same |
190 | * length. | 192 | * length. |
191 | */ | 193 | */ |
192 | alternative_input( | 194 | alternative_input( |
193 | "nop ; frstor %1", | 195 | "nop ; frstor %1", |
194 | "fxrstor %1", | 196 | "fxrstor %1", |
195 | X86_FEATURE_FXSR, | 197 | X86_FEATURE_FXSR, |
196 | "m" (*fx)); | 198 | "m" (*fx)); |
197 | 199 | ||
198 | return 0; | 200 | return 0; |
199 | } | 201 | } |
200 | 202 | ||
201 | static inline void fpu_fxsave(struct fpu *fpu) | 203 | static inline void fpu_fxsave(struct fpu *fpu) |
202 | { | 204 | { |
203 | asm volatile("fxsave %[fx]" | 205 | asm volatile("fxsave %[fx]" |
204 | : [fx] "=m" (fpu->state->fxsave)); | 206 | : [fx] "=m" (fpu->state->fxsave)); |
205 | } | 207 | } |
206 | 208 | ||
207 | #endif /* CONFIG_X86_64 */ | 209 | #endif /* CONFIG_X86_64 */ |
208 | 210 | ||
209 | /* | 211 | /* |
210 | * These must be called with preempt disabled. Returns | 212 | * These must be called with preempt disabled. Returns |
211 | * 'true' if the FPU state is still intact. | 213 | * 'true' if the FPU state is still intact. |
212 | */ | 214 | */ |
213 | static inline int fpu_save_init(struct fpu *fpu) | 215 | static inline int fpu_save_init(struct fpu *fpu) |
214 | { | 216 | { |
215 | if (use_xsave()) { | 217 | if (use_xsave()) { |
216 | fpu_xsave(fpu); | 218 | fpu_xsave(fpu); |
217 | 219 | ||
218 | /* | 220 | /* |
219 | * xsave header may indicate the init state of the FP. | 221 | * xsave header may indicate the init state of the FP. |
220 | */ | 222 | */ |
221 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) | 223 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) |
222 | return 1; | 224 | return 1; |
223 | } else if (use_fxsr()) { | 225 | } else if (use_fxsr()) { |
224 | fpu_fxsave(fpu); | 226 | fpu_fxsave(fpu); |
225 | } else { | 227 | } else { |
226 | asm volatile("fnsave %[fx]; fwait" | 228 | asm volatile("fnsave %[fx]; fwait" |
227 | : [fx] "=m" (fpu->state->fsave)); | 229 | : [fx] "=m" (fpu->state->fsave)); |
228 | return 0; | 230 | return 0; |
229 | } | 231 | } |
230 | 232 | ||
231 | /* | 233 | /* |
232 | * If exceptions are pending, we need to clear them so | 234 | * If exceptions are pending, we need to clear them so |
233 | * that we don't randomly get exceptions later. | 235 | * that we don't randomly get exceptions later. |
234 | * | 236 | * |
235 | * FIXME! Is this perhaps only true for the old-style | 237 | * FIXME! Is this perhaps only true for the old-style |
236 | * irq13 case? Maybe we could leave the x87 state | 238 | * irq13 case? Maybe we could leave the x87 state |
237 | * intact otherwise? | 239 | * intact otherwise? |
238 | */ | 240 | */ |
239 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { | 241 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { |
240 | asm volatile("fnclex"); | 242 | asm volatile("fnclex"); |
241 | return 0; | 243 | return 0; |
242 | } | 244 | } |
243 | return 1; | 245 | return 1; |
244 | } | 246 | } |
245 | 247 | ||
246 | static inline int __save_init_fpu(struct task_struct *tsk) | 248 | static inline int __save_init_fpu(struct task_struct *tsk) |
247 | { | 249 | { |
248 | return fpu_save_init(&tsk->thread.fpu); | 250 | return fpu_save_init(&tsk->thread.fpu); |
249 | } | 251 | } |
250 | 252 | ||
251 | static inline int fpu_fxrstor_checking(struct fpu *fpu) | 253 | static inline int fpu_fxrstor_checking(struct fpu *fpu) |
252 | { | 254 | { |
253 | return fxrstor_checking(&fpu->state->fxsave); | 255 | return fxrstor_checking(&fpu->state->fxsave); |
254 | } | 256 | } |
255 | 257 | ||
256 | static inline int fpu_restore_checking(struct fpu *fpu) | 258 | static inline int fpu_restore_checking(struct fpu *fpu) |
257 | { | 259 | { |
258 | if (use_xsave()) | 260 | if (use_xsave()) |
259 | return fpu_xrstor_checking(fpu); | 261 | return fpu_xrstor_checking(fpu); |
260 | else | 262 | else |
261 | return fpu_fxrstor_checking(fpu); | 263 | return fpu_fxrstor_checking(fpu); |
262 | } | 264 | } |
263 | 265 | ||
264 | static inline int restore_fpu_checking(struct task_struct *tsk) | 266 | static inline int restore_fpu_checking(struct task_struct *tsk) |
265 | { | 267 | { |
266 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 268 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
267 | is pending. Clear the x87 state here by setting it to fixed | 269 | is pending. Clear the x87 state here by setting it to fixed |
268 | values. "m" is a random variable that should be in L1 */ | 270 | values. "m" is a random variable that should be in L1 */ |
269 | alternative_input( | 271 | alternative_input( |
270 | ASM_NOP8 ASM_NOP2, | 272 | ASM_NOP8 ASM_NOP2, |
271 | "emms\n\t" /* clear stack tags */ | 273 | "emms\n\t" /* clear stack tags */ |
272 | "fildl %P[addr]", /* set F?P to defined value */ | 274 | "fildl %P[addr]", /* set F?P to defined value */ |
273 | X86_FEATURE_FXSAVE_LEAK, | 275 | X86_FEATURE_FXSAVE_LEAK, |
274 | [addr] "m" (tsk->thread.fpu.has_fpu)); | 276 | [addr] "m" (tsk->thread.fpu.has_fpu)); |
275 | 277 | ||
276 | return fpu_restore_checking(&tsk->thread.fpu); | 278 | return fpu_restore_checking(&tsk->thread.fpu); |
277 | } | 279 | } |
278 | 280 | ||
279 | /* | 281 | /* |
280 | * Software FPU state helpers. Careful: these need to | 282 | * Software FPU state helpers. Careful: these need to |
281 | * be preemption protection *and* they need to be | 283 | * be preemption protection *and* they need to be |
282 | * properly paired with the CR0.TS changes! | 284 | * properly paired with the CR0.TS changes! |
283 | */ | 285 | */ |
284 | static inline int __thread_has_fpu(struct task_struct *tsk) | 286 | static inline int __thread_has_fpu(struct task_struct *tsk) |
285 | { | 287 | { |
286 | return tsk->thread.fpu.has_fpu; | 288 | return tsk->thread.fpu.has_fpu; |
287 | } | 289 | } |
288 | 290 | ||
289 | /* Must be paired with an 'stts' after! */ | 291 | /* Must be paired with an 'stts' after! */ |
290 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) | 292 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) |
291 | { | 293 | { |
292 | tsk->thread.fpu.has_fpu = 0; | 294 | tsk->thread.fpu.has_fpu = 0; |
293 | this_cpu_write(fpu_owner_task, NULL); | 295 | this_cpu_write(fpu_owner_task, NULL); |
294 | } | 296 | } |
295 | 297 | ||
296 | /* Must be paired with a 'clts' before! */ | 298 | /* Must be paired with a 'clts' before! */ |
297 | static inline void __thread_set_has_fpu(struct task_struct *tsk) | 299 | static inline void __thread_set_has_fpu(struct task_struct *tsk) |
298 | { | 300 | { |
299 | tsk->thread.fpu.has_fpu = 1; | 301 | tsk->thread.fpu.has_fpu = 1; |
300 | this_cpu_write(fpu_owner_task, tsk); | 302 | this_cpu_write(fpu_owner_task, tsk); |
301 | } | 303 | } |
302 | 304 | ||
303 | /* | 305 | /* |
304 | * Encapsulate the CR0.TS handling together with the | 306 | * Encapsulate the CR0.TS handling together with the |
305 | * software flag. | 307 | * software flag. |
306 | * | 308 | * |
307 | * These generally need preemption protection to work, | 309 | * These generally need preemption protection to work, |
308 | * do try to avoid using these on their own. | 310 | * do try to avoid using these on their own. |
309 | */ | 311 | */ |
310 | static inline void __thread_fpu_end(struct task_struct *tsk) | 312 | static inline void __thread_fpu_end(struct task_struct *tsk) |
311 | { | 313 | { |
312 | __thread_clear_has_fpu(tsk); | 314 | __thread_clear_has_fpu(tsk); |
313 | stts(); | 315 | stts(); |
314 | } | 316 | } |
315 | 317 | ||
316 | static inline void __thread_fpu_begin(struct task_struct *tsk) | 318 | static inline void __thread_fpu_begin(struct task_struct *tsk) |
317 | { | 319 | { |
318 | clts(); | 320 | clts(); |
319 | __thread_set_has_fpu(tsk); | 321 | __thread_set_has_fpu(tsk); |
320 | } | 322 | } |
321 | 323 | ||
322 | /* | 324 | /* |
323 | * FPU state switching for scheduling. | 325 | * FPU state switching for scheduling. |
324 | * | 326 | * |
325 | * This is a two-stage process: | 327 | * This is a two-stage process: |
326 | * | 328 | * |
327 | * - switch_fpu_prepare() saves the old state and | 329 | * - switch_fpu_prepare() saves the old state and |
328 | * sets the new state of the CR0.TS bit. This is | 330 | * sets the new state of the CR0.TS bit. This is |
329 | * done within the context of the old process. | 331 | * done within the context of the old process. |
330 | * | 332 | * |
331 | * - switch_fpu_finish() restores the new state as | 333 | * - switch_fpu_finish() restores the new state as |
332 | * necessary. | 334 | * necessary. |
333 | */ | 335 | */ |
334 | typedef struct { int preload; } fpu_switch_t; | 336 | typedef struct { int preload; } fpu_switch_t; |
335 | 337 | ||
336 | /* | 338 | /* |
337 | * FIXME! We could do a totally lazy restore, but we need to | 339 | * FIXME! We could do a totally lazy restore, but we need to |
338 | * add a per-cpu "this was the task that last touched the FPU | 340 | * add a per-cpu "this was the task that last touched the FPU |
339 | * on this CPU" variable, and the task needs to have a "I last | 341 | * on this CPU" variable, and the task needs to have a "I last |
340 | * touched the FPU on this CPU" and check them. | 342 | * touched the FPU on this CPU" and check them. |
341 | * | 343 | * |
342 | * We don't do that yet, so "fpu_lazy_restore()" always returns | 344 | * We don't do that yet, so "fpu_lazy_restore()" always returns |
343 | * false, but some day.. | 345 | * false, but some day.. |
344 | */ | 346 | */ |
345 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) | 347 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) |
346 | { | 348 | { |
347 | return new == this_cpu_read_stable(fpu_owner_task) && | 349 | return new == this_cpu_read_stable(fpu_owner_task) && |
348 | cpu == new->thread.fpu.last_cpu; | 350 | cpu == new->thread.fpu.last_cpu; |
349 | } | 351 | } |
350 | 352 | ||
351 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) | 353 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) |
352 | { | 354 | { |
353 | fpu_switch_t fpu; | 355 | fpu_switch_t fpu; |
354 | 356 | ||
355 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | 357 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; |
356 | if (__thread_has_fpu(old)) { | 358 | if (__thread_has_fpu(old)) { |
357 | if (!__save_init_fpu(old)) | 359 | if (!__save_init_fpu(old)) |
358 | cpu = ~0; | 360 | cpu = ~0; |
359 | old->thread.fpu.last_cpu = cpu; | 361 | old->thread.fpu.last_cpu = cpu; |
360 | old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ | 362 | old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ |
361 | 363 | ||
362 | /* Don't change CR0.TS if we just switch! */ | 364 | /* Don't change CR0.TS if we just switch! */ |
363 | if (fpu.preload) { | 365 | if (fpu.preload) { |
364 | new->fpu_counter++; | 366 | new->fpu_counter++; |
365 | __thread_set_has_fpu(new); | 367 | __thread_set_has_fpu(new); |
366 | prefetch(new->thread.fpu.state); | 368 | prefetch(new->thread.fpu.state); |
367 | } else | 369 | } else |
368 | stts(); | 370 | stts(); |
369 | } else { | 371 | } else { |
370 | old->fpu_counter = 0; | 372 | old->fpu_counter = 0; |
371 | old->thread.fpu.last_cpu = ~0; | 373 | old->thread.fpu.last_cpu = ~0; |
372 | if (fpu.preload) { | 374 | if (fpu.preload) { |
373 | new->fpu_counter++; | 375 | new->fpu_counter++; |
374 | if (fpu_lazy_restore(new, cpu)) | 376 | if (fpu_lazy_restore(new, cpu)) |
375 | fpu.preload = 0; | 377 | fpu.preload = 0; |
376 | else | 378 | else |
377 | prefetch(new->thread.fpu.state); | 379 | prefetch(new->thread.fpu.state); |
378 | __thread_fpu_begin(new); | 380 | __thread_fpu_begin(new); |
379 | } | 381 | } |
380 | } | 382 | } |
381 | return fpu; | 383 | return fpu; |
382 | } | 384 | } |
383 | 385 | ||
384 | /* | 386 | /* |
385 | * By the time this gets called, we've already cleared CR0.TS and | 387 | * By the time this gets called, we've already cleared CR0.TS and |
386 | * given the process the FPU if we are going to preload the FPU | 388 | * given the process the FPU if we are going to preload the FPU |
387 | * state - all we need to do is to conditionally restore the register | 389 | * state - all we need to do is to conditionally restore the register |
388 | * state itself. | 390 | * state itself. |
389 | */ | 391 | */ |
390 | static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | 392 | static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) |
391 | { | 393 | { |
392 | if (fpu.preload) { | 394 | if (fpu.preload) { |
393 | if (unlikely(restore_fpu_checking(new))) | 395 | if (unlikely(restore_fpu_checking(new))) |
394 | __thread_fpu_end(new); | 396 | __thread_fpu_end(new); |
395 | } | 397 | } |
396 | } | 398 | } |
397 | 399 | ||
398 | /* | 400 | /* |
399 | * Signal frame handlers... | 401 | * Signal frame handlers... |
400 | */ | 402 | */ |
401 | extern int save_i387_xstate(void __user *buf); | 403 | extern int save_i387_xstate(void __user *buf); |
402 | extern int restore_i387_xstate(void __user *buf); | 404 | extern int restore_i387_xstate(void __user *buf); |
403 | 405 | ||
404 | static inline void __clear_fpu(struct task_struct *tsk) | 406 | static inline void __clear_fpu(struct task_struct *tsk) |
405 | { | 407 | { |
406 | if (__thread_has_fpu(tsk)) { | 408 | if (__thread_has_fpu(tsk)) { |
407 | /* Ignore delayed exceptions from user space */ | 409 | /* Ignore delayed exceptions from user space */ |
408 | asm volatile("1: fwait\n" | 410 | asm volatile("1: fwait\n" |
409 | "2:\n" | 411 | "2:\n" |
410 | _ASM_EXTABLE(1b, 2b)); | 412 | _ASM_EXTABLE(1b, 2b)); |
411 | __thread_fpu_end(tsk); | 413 | __thread_fpu_end(tsk); |
412 | } | 414 | } |
413 | } | 415 | } |
414 | 416 | ||
415 | /* | 417 | /* |
416 | * The actual user_fpu_begin/end() functions | 418 | * The actual user_fpu_begin/end() functions |
417 | * need to be preemption-safe. | 419 | * need to be preemption-safe. |
418 | * | 420 | * |
419 | * NOTE! user_fpu_end() must be used only after you | 421 | * NOTE! user_fpu_end() must be used only after you |
420 | * have saved the FP state, and user_fpu_begin() must | 422 | * have saved the FP state, and user_fpu_begin() must |
421 | * be used only immediately before restoring it. | 423 | * be used only immediately before restoring it. |
422 | * These functions do not do any save/restore on | 424 | * These functions do not do any save/restore on |
423 | * their own. | 425 | * their own. |
424 | */ | 426 | */ |
425 | static inline void user_fpu_end(void) | 427 | static inline void user_fpu_end(void) |
426 | { | 428 | { |
427 | preempt_disable(); | 429 | preempt_disable(); |
428 | __thread_fpu_end(current); | 430 | __thread_fpu_end(current); |
429 | preempt_enable(); | 431 | preempt_enable(); |
430 | } | 432 | } |
431 | 433 | ||
432 | static inline void user_fpu_begin(void) | 434 | static inline void user_fpu_begin(void) |
433 | { | 435 | { |
434 | preempt_disable(); | 436 | preempt_disable(); |
435 | if (!user_has_fpu()) | 437 | if (!user_has_fpu()) |
436 | __thread_fpu_begin(current); | 438 | __thread_fpu_begin(current); |
437 | preempt_enable(); | 439 | preempt_enable(); |
438 | } | 440 | } |
439 | 441 | ||
440 | /* | 442 | /* |
441 | * These disable preemption on their own and are safe | 443 | * These disable preemption on their own and are safe |
442 | */ | 444 | */ |
443 | static inline void save_init_fpu(struct task_struct *tsk) | 445 | static inline void save_init_fpu(struct task_struct *tsk) |
444 | { | 446 | { |
445 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | 447 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); |
446 | preempt_disable(); | 448 | preempt_disable(); |
447 | __save_init_fpu(tsk); | 449 | __save_init_fpu(tsk); |
448 | __thread_fpu_end(tsk); | 450 | __thread_fpu_end(tsk); |
449 | preempt_enable(); | 451 | preempt_enable(); |
450 | } | 452 | } |
451 | 453 | ||
452 | static inline void clear_fpu(struct task_struct *tsk) | 454 | static inline void clear_fpu(struct task_struct *tsk) |
453 | { | 455 | { |
454 | preempt_disable(); | 456 | preempt_disable(); |
455 | __clear_fpu(tsk); | 457 | __clear_fpu(tsk); |
456 | preempt_enable(); | 458 | preempt_enable(); |
457 | } | 459 | } |
458 | 460 | ||
459 | /* | 461 | /* |
460 | * i387 state interaction | 462 | * i387 state interaction |
461 | */ | 463 | */ |
462 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | 464 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) |
463 | { | 465 | { |
464 | if (cpu_has_fxsr) { | 466 | if (cpu_has_fxsr) { |
465 | return tsk->thread.fpu.state->fxsave.cwd; | 467 | return tsk->thread.fpu.state->fxsave.cwd; |
466 | } else { | 468 | } else { |
467 | return (unsigned short)tsk->thread.fpu.state->fsave.cwd; | 469 | return (unsigned short)tsk->thread.fpu.state->fsave.cwd; |
468 | } | 470 | } |
469 | } | 471 | } |
470 | 472 | ||
471 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) | 473 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) |
472 | { | 474 | { |
473 | if (cpu_has_fxsr) { | 475 | if (cpu_has_fxsr) { |
474 | return tsk->thread.fpu.state->fxsave.swd; | 476 | return tsk->thread.fpu.state->fxsave.swd; |
475 | } else { | 477 | } else { |
476 | return (unsigned short)tsk->thread.fpu.state->fsave.swd; | 478 | return (unsigned short)tsk->thread.fpu.state->fsave.swd; |
477 | } | 479 | } |
478 | } | 480 | } |
479 | 481 | ||
480 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | 482 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) |
481 | { | 483 | { |
482 | if (cpu_has_xmm) { | 484 | if (cpu_has_xmm) { |
483 | return tsk->thread.fpu.state->fxsave.mxcsr; | 485 | return tsk->thread.fpu.state->fxsave.mxcsr; |
484 | } else { | 486 | } else { |
485 | return MXCSR_DEFAULT; | 487 | return MXCSR_DEFAULT; |
486 | } | 488 | } |
487 | } | 489 | } |
488 | 490 | ||
489 | static bool fpu_allocated(struct fpu *fpu) | 491 | static bool fpu_allocated(struct fpu *fpu) |
490 | { | 492 | { |
491 | return fpu->state != NULL; | 493 | return fpu->state != NULL; |
492 | } | 494 | } |
493 | 495 | ||
494 | static inline int fpu_alloc(struct fpu *fpu) | 496 | static inline int fpu_alloc(struct fpu *fpu) |
495 | { | 497 | { |
496 | if (fpu_allocated(fpu)) | 498 | if (fpu_allocated(fpu)) |
497 | return 0; | 499 | return 0; |
498 | fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); | 500 | fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); |
499 | if (!fpu->state) | 501 | if (!fpu->state) |
500 | return -ENOMEM; | 502 | return -ENOMEM; |
501 | WARN_ON((unsigned long)fpu->state & 15); | 503 | WARN_ON((unsigned long)fpu->state & 15); |
502 | return 0; | 504 | return 0; |
503 | } | 505 | } |
504 | 506 | ||
505 | static inline void fpu_free(struct fpu *fpu) | 507 | static inline void fpu_free(struct fpu *fpu) |
506 | { | 508 | { |
507 | if (fpu->state) { | 509 | if (fpu->state) { |
508 | kmem_cache_free(task_xstate_cachep, fpu->state); | 510 | kmem_cache_free(task_xstate_cachep, fpu->state); |
509 | fpu->state = NULL; | 511 | fpu->state = NULL; |
510 | } | 512 | } |
511 | } | 513 | } |
512 | 514 | ||
513 | static inline void fpu_copy(struct fpu *dst, struct fpu *src) | 515 | static inline void fpu_copy(struct fpu *dst, struct fpu *src) |
514 | { | 516 | { |
515 | memcpy(dst->state, src->state, xstate_size); | 517 | memcpy(dst->state, src->state, xstate_size); |
516 | } | 518 | } |
517 | 519 | ||
518 | extern void fpu_finit(struct fpu *fpu); | 520 | extern void fpu_finit(struct fpu *fpu); |
519 | 521 | ||
520 | #endif | 522 | #endif |
521 | 523 |
arch/x86/include/asm/futex.h
1 | #ifndef _ASM_X86_FUTEX_H | 1 | #ifndef _ASM_X86_FUTEX_H |
2 | #define _ASM_X86_FUTEX_H | 2 | #define _ASM_X86_FUTEX_H |
3 | 3 | ||
4 | #ifdef __KERNEL__ | 4 | #ifdef __KERNEL__ |
5 | 5 | ||
6 | #include <linux/futex.h> | 6 | #include <linux/futex.h> |
7 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
8 | 8 | ||
9 | #include <asm/asm.h> | 9 | #include <asm/asm.h> |
10 | #include <asm/errno.h> | 10 | #include <asm/errno.h> |
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/smap.h> | ||
12 | 13 | ||
13 | #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ | 14 | #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ |
14 | asm volatile("1:\t" insn "\n" \ | 15 | asm volatile("\t" ASM_STAC "\n" \ |
15 | "2:\t.section .fixup,\"ax\"\n" \ | 16 | "1:\t" insn "\n" \ |
17 | "2:\t" ASM_CLAC "\n" \ | ||
18 | "\t.section .fixup,\"ax\"\n" \ | ||
16 | "3:\tmov\t%3, %1\n" \ | 19 | "3:\tmov\t%3, %1\n" \ |
17 | "\tjmp\t2b\n" \ | 20 | "\tjmp\t2b\n" \ |
18 | "\t.previous\n" \ | 21 | "\t.previous\n" \ |
19 | _ASM_EXTABLE(1b, 3b) \ | 22 | _ASM_EXTABLE(1b, 3b) \ |
20 | : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ | 23 | : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ |
21 | : "i" (-EFAULT), "0" (oparg), "1" (0)) | 24 | : "i" (-EFAULT), "0" (oparg), "1" (0)) |
22 | 25 | ||
23 | #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ | 26 | #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ |
24 | asm volatile("1:\tmovl %2, %0\n" \ | 27 | asm volatile("\t" ASM_STAC "\n" \ |
28 | "1:\tmovl %2, %0\n" \ | ||
25 | "\tmovl\t%0, %3\n" \ | 29 | "\tmovl\t%0, %3\n" \ |
26 | "\t" insn "\n" \ | 30 | "\t" insn "\n" \ |
27 | "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ | 31 | "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ |
28 | "\tjnz\t1b\n" \ | 32 | "\tjnz\t1b\n" \ |
29 | "3:\t.section .fixup,\"ax\"\n" \ | 33 | "3:\t" ASM_CLAC "\n" \ |
34 | "\t.section .fixup,\"ax\"\n" \ | ||
30 | "4:\tmov\t%5, %1\n" \ | 35 | "4:\tmov\t%5, %1\n" \ |
31 | "\tjmp\t3b\n" \ | 36 | "\tjmp\t3b\n" \ |
32 | "\t.previous\n" \ | 37 | "\t.previous\n" \ |
33 | _ASM_EXTABLE(1b, 4b) \ | 38 | _ASM_EXTABLE(1b, 4b) \ |
34 | _ASM_EXTABLE(2b, 4b) \ | 39 | _ASM_EXTABLE(2b, 4b) \ |
35 | : "=&a" (oldval), "=&r" (ret), \ | 40 | : "=&a" (oldval), "=&r" (ret), \ |
36 | "+m" (*uaddr), "=&r" (tem) \ | 41 | "+m" (*uaddr), "=&r" (tem) \ |
37 | : "r" (oparg), "i" (-EFAULT), "1" (0)) | 42 | : "r" (oparg), "i" (-EFAULT), "1" (0)) |
38 | 43 | ||
39 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 44 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) |
40 | { | 45 | { |
41 | int op = (encoded_op >> 28) & 7; | 46 | int op = (encoded_op >> 28) & 7; |
42 | int cmp = (encoded_op >> 24) & 15; | 47 | int cmp = (encoded_op >> 24) & 15; |
43 | int oparg = (encoded_op << 8) >> 20; | 48 | int oparg = (encoded_op << 8) >> 20; |
44 | int cmparg = (encoded_op << 20) >> 20; | 49 | int cmparg = (encoded_op << 20) >> 20; |
45 | int oldval = 0, ret, tem; | 50 | int oldval = 0, ret, tem; |
46 | 51 | ||
47 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | 52 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) |
48 | oparg = 1 << oparg; | 53 | oparg = 1 << oparg; |
49 | 54 | ||
50 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | 55 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) |
51 | return -EFAULT; | 56 | return -EFAULT; |
52 | 57 | ||
53 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) | 58 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) |
54 | /* Real i386 machines can only support FUTEX_OP_SET */ | 59 | /* Real i386 machines can only support FUTEX_OP_SET */ |
55 | if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3) | 60 | if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3) |
56 | return -ENOSYS; | 61 | return -ENOSYS; |
57 | #endif | 62 | #endif |
58 | 63 | ||
59 | pagefault_disable(); | 64 | pagefault_disable(); |
60 | 65 | ||
61 | switch (op) { | 66 | switch (op) { |
62 | case FUTEX_OP_SET: | 67 | case FUTEX_OP_SET: |
63 | __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); | 68 | __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); |
64 | break; | 69 | break; |
65 | case FUTEX_OP_ADD: | 70 | case FUTEX_OP_ADD: |
66 | __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, | 71 | __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, |
67 | uaddr, oparg); | 72 | uaddr, oparg); |
68 | break; | 73 | break; |
69 | case FUTEX_OP_OR: | 74 | case FUTEX_OP_OR: |
70 | __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); | 75 | __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); |
71 | break; | 76 | break; |
72 | case FUTEX_OP_ANDN: | 77 | case FUTEX_OP_ANDN: |
73 | __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); | 78 | __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); |
74 | break; | 79 | break; |
75 | case FUTEX_OP_XOR: | 80 | case FUTEX_OP_XOR: |
76 | __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); | 81 | __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); |
77 | break; | 82 | break; |
78 | default: | 83 | default: |
79 | ret = -ENOSYS; | 84 | ret = -ENOSYS; |
80 | } | 85 | } |
81 | 86 | ||
82 | pagefault_enable(); | 87 | pagefault_enable(); |
83 | 88 | ||
84 | if (!ret) { | 89 | if (!ret) { |
85 | switch (cmp) { | 90 | switch (cmp) { |
86 | case FUTEX_OP_CMP_EQ: | 91 | case FUTEX_OP_CMP_EQ: |
87 | ret = (oldval == cmparg); | 92 | ret = (oldval == cmparg); |
88 | break; | 93 | break; |
89 | case FUTEX_OP_CMP_NE: | 94 | case FUTEX_OP_CMP_NE: |
90 | ret = (oldval != cmparg); | 95 | ret = (oldval != cmparg); |
91 | break; | 96 | break; |
92 | case FUTEX_OP_CMP_LT: | 97 | case FUTEX_OP_CMP_LT: |
93 | ret = (oldval < cmparg); | 98 | ret = (oldval < cmparg); |
94 | break; | 99 | break; |
95 | case FUTEX_OP_CMP_GE: | 100 | case FUTEX_OP_CMP_GE: |
96 | ret = (oldval >= cmparg); | 101 | ret = (oldval >= cmparg); |
97 | break; | 102 | break; |
98 | case FUTEX_OP_CMP_LE: | 103 | case FUTEX_OP_CMP_LE: |
99 | ret = (oldval <= cmparg); | 104 | ret = (oldval <= cmparg); |
100 | break; | 105 | break; |
101 | case FUTEX_OP_CMP_GT: | 106 | case FUTEX_OP_CMP_GT: |
102 | ret = (oldval > cmparg); | 107 | ret = (oldval > cmparg); |
103 | break; | 108 | break; |
104 | default: | 109 | default: |
105 | ret = -ENOSYS; | 110 | ret = -ENOSYS; |
106 | } | 111 | } |
107 | } | 112 | } |
108 | return ret; | 113 | return ret; |
109 | } | 114 | } |
110 | 115 | ||
111 | static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | 116 | static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, |
112 | u32 oldval, u32 newval) | 117 | u32 oldval, u32 newval) |
113 | { | 118 | { |
114 | int ret = 0; | 119 | int ret = 0; |
115 | 120 | ||
116 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) | 121 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) |
117 | /* Real i386 machines have no cmpxchg instruction */ | 122 | /* Real i386 machines have no cmpxchg instruction */ |
118 | if (boot_cpu_data.x86 == 3) | 123 | if (boot_cpu_data.x86 == 3) |
119 | return -ENOSYS; | 124 | return -ENOSYS; |
120 | #endif | 125 | #endif |
121 | 126 | ||
122 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | 127 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) |
123 | return -EFAULT; | 128 | return -EFAULT; |
124 | 129 | ||
125 | asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" | 130 | asm volatile("\t" ASM_STAC "\n" |
126 | "2:\t.section .fixup, \"ax\"\n" | 131 | "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" |
132 | "2:\t" ASM_CLAC "\n" | ||
133 | "\t.section .fixup, \"ax\"\n" | ||
127 | "3:\tmov %3, %0\n" | 134 | "3:\tmov %3, %0\n" |
128 | "\tjmp 2b\n" | 135 | "\tjmp 2b\n" |
129 | "\t.previous\n" | 136 | "\t.previous\n" |
130 | _ASM_EXTABLE(1b, 3b) | 137 | _ASM_EXTABLE(1b, 3b) |
131 | : "+r" (ret), "=a" (oldval), "+m" (*uaddr) | 138 | : "+r" (ret), "=a" (oldval), "+m" (*uaddr) |
132 | : "i" (-EFAULT), "r" (newval), "1" (oldval) | 139 | : "i" (-EFAULT), "r" (newval), "1" (oldval) |
133 | : "memory" | 140 | : "memory" |
134 | ); | 141 | ); |
135 | 142 | ||
136 | *uval = oldval; | 143 | *uval = oldval; |
137 | return ret; | 144 | return ret; |
138 | } | 145 | } |
139 | 146 | ||
140 | #endif | 147 | #endif |
141 | #endif /* _ASM_X86_FUTEX_H */ | 148 | #endif /* _ASM_X86_FUTEX_H */ |
142 | 149 |
arch/x86/include/asm/smap.h
1 | /* | 1 | /* |
2 | * Supervisor Mode Access Prevention support | 2 | * Supervisor Mode Access Prevention support |
3 | * | 3 | * |
4 | * Copyright (C) 2012 Intel Corporation | 4 | * Copyright (C) 2012 Intel Corporation |
5 | * Author: H. Peter Anvin <hpa@linux.intel.com> | 5 | * Author: H. Peter Anvin <hpa@linux.intel.com> |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or | 7 | * This program is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public License | 8 | * modify it under the terms of the GNU General Public License |
9 | * as published by the Free Software Foundation; version 2 | 9 | * as published by the Free Software Foundation; version 2 |
10 | * of the License. | 10 | * of the License. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #ifndef _ASM_X86_SMAP_H | 13 | #ifndef _ASM_X86_SMAP_H |
14 | #define _ASM_X86_SMAP_H | 14 | #define _ASM_X86_SMAP_H |
15 | 15 | ||
16 | #include <linux/stringify.h> | 16 | #include <linux/stringify.h> |
17 | #include <asm/nops.h> | 17 | #include <asm/nops.h> |
18 | #include <asm/cpufeature.h> | 18 | #include <asm/cpufeature.h> |
19 | 19 | ||
20 | /* "Raw" instruction opcodes */ | 20 | /* "Raw" instruction opcodes */ |
21 | #define __ASM_CLAC .byte 0x0f,0x01,0xca | 21 | #define __ASM_CLAC .byte 0x0f,0x01,0xca |
22 | #define __ASM_STAC .byte 0x0f,0x01,0xcb | 22 | #define __ASM_STAC .byte 0x0f,0x01,0xcb |
23 | 23 | ||
24 | #ifdef __ASSEMBLY__ | 24 | #ifdef __ASSEMBLY__ |
25 | 25 | ||
26 | #include <asm/alternative-asm.h> | 26 | #include <asm/alternative-asm.h> |
27 | 27 | ||
28 | #ifdef CONFIG_X86_SMAP | 28 | #ifdef CONFIG_X86_SMAP |
29 | 29 | ||
30 | #define ASM_CLAC \ | 30 | #define ASM_CLAC \ |
31 | 661: ASM_NOP3 ; \ | 31 | 661: ASM_NOP3 ; \ |
32 | .pushsection .altinstr_replacement, "ax" ; \ | 32 | .pushsection .altinstr_replacement, "ax" ; \ |
33 | 662: __ASM_CLAC ; \ | 33 | 662: __ASM_CLAC ; \ |
34 | .popsection ; \ | 34 | .popsection ; \ |
35 | .pushsection .altinstructions, "a" ; \ | 35 | .pushsection .altinstructions, "a" ; \ |
36 | altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ | 36 | altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ |
37 | .popsection | 37 | .popsection |
38 | 38 | ||
39 | #define ASM_STAC \ | 39 | #define ASM_STAC \ |
40 | 661: ASM_NOP3 ; \ | 40 | 661: ASM_NOP3 ; \ |
41 | .pushsection .altinstr_replacement, "ax" ; \ | 41 | .pushsection .altinstr_replacement, "ax" ; \ |
42 | 662: __ASM_STAC ; \ | 42 | 662: __ASM_STAC ; \ |
43 | .popsection ; \ | 43 | .popsection ; \ |
44 | .pushsection .altinstructions, "a" ; \ | 44 | .pushsection .altinstructions, "a" ; \ |
45 | altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ | 45 | altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ |
46 | .popsection | 46 | .popsection |
47 | 47 | ||
48 | #else /* CONFIG_X86_SMAP */ | 48 | #else /* CONFIG_X86_SMAP */ |
49 | 49 | ||
50 | #define ASM_CLAC | 50 | #define ASM_CLAC |
51 | #define ASM_STAC | 51 | #define ASM_STAC |
52 | 52 | ||
53 | #endif /* CONFIG_X86_SMAP */ | 53 | #endif /* CONFIG_X86_SMAP */ |
54 | 54 | ||
55 | #else /* __ASSEMBLY__ */ | 55 | #else /* __ASSEMBLY__ */ |
56 | 56 | ||
57 | #include <asm/alternative.h> | 57 | #include <asm/alternative.h> |
58 | 58 | ||
59 | #ifdef CONFIG_X86_SMAP | 59 | #ifdef CONFIG_X86_SMAP |
60 | 60 | ||
61 | static inline void clac(void) | 61 | static __always_inline void clac(void) |
62 | { | 62 | { |
63 | /* Note: a barrier is implicit in alternative() */ | 63 | /* Note: a barrier is implicit in alternative() */ |
64 | alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); | 64 | alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); |
65 | } | 65 | } |
66 | 66 | ||
67 | static inline void stac(void) | 67 | static __always_inline void stac(void) |
68 | { | 68 | { |
69 | /* Note: a barrier is implicit in alternative() */ | 69 | /* Note: a barrier is implicit in alternative() */ |
70 | alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); | 70 | alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); |
71 | } | 71 | } |
72 | 72 | ||
73 | /* These macros can be used in asm() statements */ | 73 | /* These macros can be used in asm() statements */ |
74 | #define ASM_CLAC \ | 74 | #define ASM_CLAC \ |
75 | ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) | 75 | ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) |
76 | #define ASM_STAC \ | 76 | #define ASM_STAC \ |
77 | ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) | 77 | ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) |
78 | 78 | ||
79 | #else /* CONFIG_X86_SMAP */ | 79 | #else /* CONFIG_X86_SMAP */ |
80 | 80 | ||
81 | static inline void clac(void) { } | 81 | static inline void clac(void) { } |
82 | static inline void stac(void) { } | 82 | static inline void stac(void) { } |
83 | 83 | ||
84 | #define ASM_CLAC | 84 | #define ASM_CLAC |
85 | #define ASM_STAC | 85 | #define ASM_STAC |
86 | 86 | ||
87 | #endif /* CONFIG_X86_SMAP */ | 87 | #endif /* CONFIG_X86_SMAP */ |
88 | 88 | ||
89 | #endif /* __ASSEMBLY__ */ | 89 | #endif /* __ASSEMBLY__ */ |
90 | 90 | ||
91 | #endif /* _ASM_X86_SMAP_H */ | 91 | #endif /* _ASM_X86_SMAP_H */ |
92 | 92 |
arch/x86/include/asm/uaccess.h
1 | #ifndef _ASM_X86_UACCESS_H | 1 | #ifndef _ASM_X86_UACCESS_H |
2 | #define _ASM_X86_UACCESS_H | 2 | #define _ASM_X86_UACCESS_H |
3 | /* | 3 | /* |
4 | * User space memory access functions | 4 | * User space memory access functions |
5 | */ | 5 | */ |
6 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
7 | #include <linux/compiler.h> | 7 | #include <linux/compiler.h> |
8 | #include <linux/thread_info.h> | 8 | #include <linux/thread_info.h> |
9 | #include <linux/string.h> | 9 | #include <linux/string.h> |
10 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
11 | #include <asm/page.h> | 11 | #include <asm/page.h> |
12 | #include <asm/smap.h> | ||
12 | 13 | ||
13 | #define VERIFY_READ 0 | 14 | #define VERIFY_READ 0 |
14 | #define VERIFY_WRITE 1 | 15 | #define VERIFY_WRITE 1 |
15 | 16 | ||
16 | /* | 17 | /* |
17 | * The fs value determines whether argument validity checking should be | 18 | * The fs value determines whether argument validity checking should be |
18 | * performed or not. If get_fs() == USER_DS, checking is performed, with | 19 | * performed or not. If get_fs() == USER_DS, checking is performed, with |
19 | * get_fs() == KERNEL_DS, checking is bypassed. | 20 | * get_fs() == KERNEL_DS, checking is bypassed. |
20 | * | 21 | * |
21 | * For historical reasons, these macros are grossly misnamed. | 22 | * For historical reasons, these macros are grossly misnamed. |
22 | */ | 23 | */ |
23 | 24 | ||
24 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) | 25 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) |
25 | 26 | ||
26 | #define KERNEL_DS MAKE_MM_SEG(-1UL) | 27 | #define KERNEL_DS MAKE_MM_SEG(-1UL) |
27 | #define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) | 28 | #define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) |
28 | 29 | ||
29 | #define get_ds() (KERNEL_DS) | 30 | #define get_ds() (KERNEL_DS) |
30 | #define get_fs() (current_thread_info()->addr_limit) | 31 | #define get_fs() (current_thread_info()->addr_limit) |
31 | #define set_fs(x) (current_thread_info()->addr_limit = (x)) | 32 | #define set_fs(x) (current_thread_info()->addr_limit = (x)) |
32 | 33 | ||
33 | #define segment_eq(a, b) ((a).seg == (b).seg) | 34 | #define segment_eq(a, b) ((a).seg == (b).seg) |
34 | 35 | ||
35 | #define user_addr_max() (current_thread_info()->addr_limit.seg) | 36 | #define user_addr_max() (current_thread_info()->addr_limit.seg) |
36 | #define __addr_ok(addr) \ | 37 | #define __addr_ok(addr) \ |
37 | ((unsigned long __force)(addr) < user_addr_max()) | 38 | ((unsigned long __force)(addr) < user_addr_max()) |
38 | 39 | ||
39 | /* | 40 | /* |
40 | * Test whether a block of memory is a valid user space address. | 41 | * Test whether a block of memory is a valid user space address. |
41 | * Returns 0 if the range is valid, nonzero otherwise. | 42 | * Returns 0 if the range is valid, nonzero otherwise. |
42 | * | 43 | * |
43 | * This is equivalent to the following test: | 44 | * This is equivalent to the following test: |
44 | * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) | 45 | * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) |
45 | * | 46 | * |
46 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... | 47 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... |
47 | */ | 48 | */ |
48 | 49 | ||
49 | #define __range_not_ok(addr, size, limit) \ | 50 | #define __range_not_ok(addr, size, limit) \ |
50 | ({ \ | 51 | ({ \ |
51 | unsigned long flag, roksum; \ | 52 | unsigned long flag, roksum; \ |
52 | __chk_user_ptr(addr); \ | 53 | __chk_user_ptr(addr); \ |
53 | asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ | 54 | asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ |
54 | : "=&r" (flag), "=r" (roksum) \ | 55 | : "=&r" (flag), "=r" (roksum) \ |
55 | : "1" (addr), "g" ((long)(size)), \ | 56 | : "1" (addr), "g" ((long)(size)), \ |
56 | "rm" (limit)); \ | 57 | "rm" (limit)); \ |
57 | flag; \ | 58 | flag; \ |
58 | }) | 59 | }) |
59 | 60 | ||
60 | /** | 61 | /** |
61 | * access_ok: - Checks if a user space pointer is valid | 62 | * access_ok: - Checks if a user space pointer is valid |
62 | * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that | 63 | * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that |
63 | * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe | 64 | * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe |
64 | * to write to a block, it is always safe to read from it. | 65 | * to write to a block, it is always safe to read from it. |
65 | * @addr: User space pointer to start of block to check | 66 | * @addr: User space pointer to start of block to check |
66 | * @size: Size of block to check | 67 | * @size: Size of block to check |
67 | * | 68 | * |
68 | * Context: User context only. This function may sleep. | 69 | * Context: User context only. This function may sleep. |
69 | * | 70 | * |
70 | * Checks if a pointer to a block of memory in user space is valid. | 71 | * Checks if a pointer to a block of memory in user space is valid. |
71 | * | 72 | * |
72 | * Returns true (nonzero) if the memory block may be valid, false (zero) | 73 | * Returns true (nonzero) if the memory block may be valid, false (zero) |
73 | * if it is definitely invalid. | 74 | * if it is definitely invalid. |
74 | * | 75 | * |
75 | * Note that, depending on architecture, this function probably just | 76 | * Note that, depending on architecture, this function probably just |
76 | * checks that the pointer is in the user space range - after calling | 77 | * checks that the pointer is in the user space range - after calling |
77 | * this function, memory access functions may still return -EFAULT. | 78 | * this function, memory access functions may still return -EFAULT. |
78 | */ | 79 | */ |
79 | #define access_ok(type, addr, size) \ | 80 | #define access_ok(type, addr, size) \ |
80 | (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) | 81 | (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) |
81 | 82 | ||
82 | /* | 83 | /* |
83 | * The exception table consists of pairs of addresses relative to the | 84 | * The exception table consists of pairs of addresses relative to the |
84 | * exception table enty itself: the first is the address of an | 85 | * exception table enty itself: the first is the address of an |
85 | * instruction that is allowed to fault, and the second is the address | 86 | * instruction that is allowed to fault, and the second is the address |
86 | * at which the program should continue. No registers are modified, | 87 | * at which the program should continue. No registers are modified, |
87 | * so it is entirely up to the continuation code to figure out what to | 88 | * so it is entirely up to the continuation code to figure out what to |
88 | * do. | 89 | * do. |
89 | * | 90 | * |
90 | * All the routines below use bits of fixup code that are out of line | 91 | * All the routines below use bits of fixup code that are out of line |
91 | * with the main instruction path. This means when everything is well, | 92 | * with the main instruction path. This means when everything is well, |
92 | * we don't even have to jump over them. Further, they do not intrude | 93 | * we don't even have to jump over them. Further, they do not intrude |
93 | * on our cache or tlb entries. | 94 | * on our cache or tlb entries. |
94 | */ | 95 | */ |
95 | 96 | ||
96 | struct exception_table_entry { | 97 | struct exception_table_entry { |
97 | int insn, fixup; | 98 | int insn, fixup; |
98 | }; | 99 | }; |
99 | /* This is not the generic standard exception_table_entry format */ | 100 | /* This is not the generic standard exception_table_entry format */ |
100 | #define ARCH_HAS_SORT_EXTABLE | 101 | #define ARCH_HAS_SORT_EXTABLE |
101 | #define ARCH_HAS_SEARCH_EXTABLE | 102 | #define ARCH_HAS_SEARCH_EXTABLE |
102 | 103 | ||
103 | extern int fixup_exception(struct pt_regs *regs); | 104 | extern int fixup_exception(struct pt_regs *regs); |
104 | extern int early_fixup_exception(unsigned long *ip); | 105 | extern int early_fixup_exception(unsigned long *ip); |
105 | 106 | ||
106 | /* | 107 | /* |
107 | * These are the main single-value transfer routines. They automatically | 108 | * These are the main single-value transfer routines. They automatically |
108 | * use the right size if we just have the right pointer type. | 109 | * use the right size if we just have the right pointer type. |
109 | * | 110 | * |
110 | * This gets kind of ugly. We want to return _two_ values in "get_user()" | 111 | * This gets kind of ugly. We want to return _two_ values in "get_user()" |
111 | * and yet we don't want to do any pointers, because that is too much | 112 | * and yet we don't want to do any pointers, because that is too much |
112 | * of a performance impact. Thus we have a few rather ugly macros here, | 113 | * of a performance impact. Thus we have a few rather ugly macros here, |
113 | * and hide all the ugliness from the user. | 114 | * and hide all the ugliness from the user. |
114 | * | 115 | * |
115 | * The "__xxx" versions of the user access functions are versions that | 116 | * The "__xxx" versions of the user access functions are versions that |
116 | * do not verify the address space, that must have been done previously | 117 | * do not verify the address space, that must have been done previously |
117 | * with a separate "access_ok()" call (this is used when we do multiple | 118 | * with a separate "access_ok()" call (this is used when we do multiple |
118 | * accesses to the same area of user memory). | 119 | * accesses to the same area of user memory). |
119 | */ | 120 | */ |
120 | 121 | ||
121 | extern int __get_user_1(void); | 122 | extern int __get_user_1(void); |
122 | extern int __get_user_2(void); | 123 | extern int __get_user_2(void); |
123 | extern int __get_user_4(void); | 124 | extern int __get_user_4(void); |
124 | extern int __get_user_8(void); | 125 | extern int __get_user_8(void); |
125 | extern int __get_user_bad(void); | 126 | extern int __get_user_bad(void); |
126 | 127 | ||
127 | #define __get_user_x(size, ret, x, ptr) \ | 128 | #define __get_user_x(size, ret, x, ptr) \ |
128 | asm volatile("call __get_user_" #size \ | 129 | asm volatile("call __get_user_" #size \ |
129 | : "=a" (ret), "=d" (x) \ | 130 | : "=a" (ret), "=d" (x) \ |
130 | : "0" (ptr)) \ | 131 | : "0" (ptr)) \ |
131 | 132 | ||
132 | /* Careful: we have to cast the result to the type of the pointer | 133 | /* Careful: we have to cast the result to the type of the pointer |
133 | * for sign reasons */ | 134 | * for sign reasons */ |
134 | 135 | ||
135 | /** | 136 | /** |
136 | * get_user: - Get a simple variable from user space. | 137 | * get_user: - Get a simple variable from user space. |
137 | * @x: Variable to store result. | 138 | * @x: Variable to store result. |
138 | * @ptr: Source address, in user space. | 139 | * @ptr: Source address, in user space. |
139 | * | 140 | * |
140 | * Context: User context only. This function may sleep. | 141 | * Context: User context only. This function may sleep. |
141 | * | 142 | * |
142 | * This macro copies a single simple variable from user space to kernel | 143 | * This macro copies a single simple variable from user space to kernel |
143 | * space. It supports simple types like char and int, but not larger | 144 | * space. It supports simple types like char and int, but not larger |
144 | * data types like structures or arrays. | 145 | * data types like structures or arrays. |
145 | * | 146 | * |
146 | * @ptr must have pointer-to-simple-variable type, and the result of | 147 | * @ptr must have pointer-to-simple-variable type, and the result of |
147 | * dereferencing @ptr must be assignable to @x without a cast. | 148 | * dereferencing @ptr must be assignable to @x without a cast. |
148 | * | 149 | * |
149 | * Returns zero on success, or -EFAULT on error. | 150 | * Returns zero on success, or -EFAULT on error. |
150 | * On error, the variable @x is set to zero. | 151 | * On error, the variable @x is set to zero. |
151 | */ | 152 | */ |
152 | #ifdef CONFIG_X86_32 | 153 | #ifdef CONFIG_X86_32 |
153 | #define __get_user_8(__ret_gu, __val_gu, ptr) \ | 154 | #define __get_user_8(__ret_gu, __val_gu, ptr) \ |
154 | __get_user_x(X, __ret_gu, __val_gu, ptr) | 155 | __get_user_x(X, __ret_gu, __val_gu, ptr) |
155 | #else | 156 | #else |
156 | #define __get_user_8(__ret_gu, __val_gu, ptr) \ | 157 | #define __get_user_8(__ret_gu, __val_gu, ptr) \ |
157 | __get_user_x(8, __ret_gu, __val_gu, ptr) | 158 | __get_user_x(8, __ret_gu, __val_gu, ptr) |
158 | #endif | 159 | #endif |
159 | 160 | ||
160 | #define get_user(x, ptr) \ | 161 | #define get_user(x, ptr) \ |
161 | ({ \ | 162 | ({ \ |
162 | int __ret_gu; \ | 163 | int __ret_gu; \ |
163 | unsigned long __val_gu; \ | 164 | unsigned long __val_gu; \ |
164 | __chk_user_ptr(ptr); \ | 165 | __chk_user_ptr(ptr); \ |
165 | might_fault(); \ | 166 | might_fault(); \ |
166 | switch (sizeof(*(ptr))) { \ | 167 | switch (sizeof(*(ptr))) { \ |
167 | case 1: \ | 168 | case 1: \ |
168 | __get_user_x(1, __ret_gu, __val_gu, ptr); \ | 169 | __get_user_x(1, __ret_gu, __val_gu, ptr); \ |
169 | break; \ | 170 | break; \ |
170 | case 2: \ | 171 | case 2: \ |
171 | __get_user_x(2, __ret_gu, __val_gu, ptr); \ | 172 | __get_user_x(2, __ret_gu, __val_gu, ptr); \ |
172 | break; \ | 173 | break; \ |
173 | case 4: \ | 174 | case 4: \ |
174 | __get_user_x(4, __ret_gu, __val_gu, ptr); \ | 175 | __get_user_x(4, __ret_gu, __val_gu, ptr); \ |
175 | break; \ | 176 | break; \ |
176 | case 8: \ | 177 | case 8: \ |
177 | __get_user_8(__ret_gu, __val_gu, ptr); \ | 178 | __get_user_8(__ret_gu, __val_gu, ptr); \ |
178 | break; \ | 179 | break; \ |
179 | default: \ | 180 | default: \ |
180 | __get_user_x(X, __ret_gu, __val_gu, ptr); \ | 181 | __get_user_x(X, __ret_gu, __val_gu, ptr); \ |
181 | break; \ | 182 | break; \ |
182 | } \ | 183 | } \ |
183 | (x) = (__typeof__(*(ptr)))__val_gu; \ | 184 | (x) = (__typeof__(*(ptr)))__val_gu; \ |
184 | __ret_gu; \ | 185 | __ret_gu; \ |
185 | }) | 186 | }) |
186 | 187 | ||
187 | #define __put_user_x(size, x, ptr, __ret_pu) \ | 188 | #define __put_user_x(size, x, ptr, __ret_pu) \ |
188 | asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ | 189 | asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ |
189 | : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") | 190 | : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") |
190 | 191 | ||
191 | 192 | ||
192 | 193 | ||
193 | #ifdef CONFIG_X86_32 | 194 | #ifdef CONFIG_X86_32 |
194 | #define __put_user_asm_u64(x, addr, err, errret) \ | 195 | #define __put_user_asm_u64(x, addr, err, errret) \ |
195 | asm volatile("1: movl %%eax,0(%2)\n" \ | 196 | asm volatile(ASM_STAC "\n" \ |
197 | "1: movl %%eax,0(%2)\n" \ | ||
196 | "2: movl %%edx,4(%2)\n" \ | 198 | "2: movl %%edx,4(%2)\n" \ |
197 | "3:\n" \ | 199 | "3: " ASM_CLAC "\n" \ |
198 | ".section .fixup,\"ax\"\n" \ | 200 | ".section .fixup,\"ax\"\n" \ |
199 | "4: movl %3,%0\n" \ | 201 | "4: movl %3,%0\n" \ |
200 | " jmp 3b\n" \ | 202 | " jmp 3b\n" \ |
201 | ".previous\n" \ | 203 | ".previous\n" \ |
202 | _ASM_EXTABLE(1b, 4b) \ | 204 | _ASM_EXTABLE(1b, 4b) \ |
203 | _ASM_EXTABLE(2b, 4b) \ | 205 | _ASM_EXTABLE(2b, 4b) \ |
204 | : "=r" (err) \ | 206 | : "=r" (err) \ |
205 | : "A" (x), "r" (addr), "i" (errret), "0" (err)) | 207 | : "A" (x), "r" (addr), "i" (errret), "0" (err)) |
206 | 208 | ||
207 | #define __put_user_asm_ex_u64(x, addr) \ | 209 | #define __put_user_asm_ex_u64(x, addr) \ |
208 | asm volatile("1: movl %%eax,0(%1)\n" \ | 210 | asm volatile(ASM_STAC "\n" \ |
211 | "1: movl %%eax,0(%1)\n" \ | ||
209 | "2: movl %%edx,4(%1)\n" \ | 212 | "2: movl %%edx,4(%1)\n" \ |
210 | "3:\n" \ | 213 | "3: " ASM_CLAC "\n" \ |
211 | _ASM_EXTABLE_EX(1b, 2b) \ | 214 | _ASM_EXTABLE_EX(1b, 2b) \ |
212 | _ASM_EXTABLE_EX(2b, 3b) \ | 215 | _ASM_EXTABLE_EX(2b, 3b) \ |
213 | : : "A" (x), "r" (addr)) | 216 | : : "A" (x), "r" (addr)) |
214 | 217 | ||
215 | #define __put_user_x8(x, ptr, __ret_pu) \ | 218 | #define __put_user_x8(x, ptr, __ret_pu) \ |
216 | asm volatile("call __put_user_8" : "=a" (__ret_pu) \ | 219 | asm volatile("call __put_user_8" : "=a" (__ret_pu) \ |
217 | : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") | 220 | : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") |
218 | #else | 221 | #else |
219 | #define __put_user_asm_u64(x, ptr, retval, errret) \ | 222 | #define __put_user_asm_u64(x, ptr, retval, errret) \ |
220 | __put_user_asm(x, ptr, retval, "q", "", "er", errret) | 223 | __put_user_asm(x, ptr, retval, "q", "", "er", errret) |
221 | #define __put_user_asm_ex_u64(x, addr) \ | 224 | #define __put_user_asm_ex_u64(x, addr) \ |
222 | __put_user_asm_ex(x, addr, "q", "", "er") | 225 | __put_user_asm_ex(x, addr, "q", "", "er") |
223 | #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) | 226 | #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) |
224 | #endif | 227 | #endif |
225 | 228 | ||
226 | extern void __put_user_bad(void); | 229 | extern void __put_user_bad(void); |
227 | 230 | ||
228 | /* | 231 | /* |
229 | * Strange magic calling convention: pointer in %ecx, | 232 | * Strange magic calling convention: pointer in %ecx, |
230 | * value in %eax(:%edx), return value in %eax. clobbers %rbx | 233 | * value in %eax(:%edx), return value in %eax. clobbers %rbx |
231 | */ | 234 | */ |
232 | extern void __put_user_1(void); | 235 | extern void __put_user_1(void); |
233 | extern void __put_user_2(void); | 236 | extern void __put_user_2(void); |
234 | extern void __put_user_4(void); | 237 | extern void __put_user_4(void); |
235 | extern void __put_user_8(void); | 238 | extern void __put_user_8(void); |
236 | 239 | ||
237 | #ifdef CONFIG_X86_WP_WORKS_OK | 240 | #ifdef CONFIG_X86_WP_WORKS_OK |
238 | 241 | ||
239 | /** | 242 | /** |
240 | * put_user: - Write a simple value into user space. | 243 | * put_user: - Write a simple value into user space. |
241 | * @x: Value to copy to user space. | 244 | * @x: Value to copy to user space. |
242 | * @ptr: Destination address, in user space. | 245 | * @ptr: Destination address, in user space. |
243 | * | 246 | * |
244 | * Context: User context only. This function may sleep. | 247 | * Context: User context only. This function may sleep. |
245 | * | 248 | * |
246 | * This macro copies a single simple value from kernel space to user | 249 | * This macro copies a single simple value from kernel space to user |
247 | * space. It supports simple types like char and int, but not larger | 250 | * space. It supports simple types like char and int, but not larger |
248 | * data types like structures or arrays. | 251 | * data types like structures or arrays. |
249 | * | 252 | * |
250 | * @ptr must have pointer-to-simple-variable type, and @x must be assignable | 253 | * @ptr must have pointer-to-simple-variable type, and @x must be assignable |
251 | * to the result of dereferencing @ptr. | 254 | * to the result of dereferencing @ptr. |
252 | * | 255 | * |
253 | * Returns zero on success, or -EFAULT on error. | 256 | * Returns zero on success, or -EFAULT on error. |
254 | */ | 257 | */ |
255 | #define put_user(x, ptr) \ | 258 | #define put_user(x, ptr) \ |
256 | ({ \ | 259 | ({ \ |
257 | int __ret_pu; \ | 260 | int __ret_pu; \ |
258 | __typeof__(*(ptr)) __pu_val; \ | 261 | __typeof__(*(ptr)) __pu_val; \ |
259 | __chk_user_ptr(ptr); \ | 262 | __chk_user_ptr(ptr); \ |
260 | might_fault(); \ | 263 | might_fault(); \ |
261 | __pu_val = x; \ | 264 | __pu_val = x; \ |
262 | switch (sizeof(*(ptr))) { \ | 265 | switch (sizeof(*(ptr))) { \ |
263 | case 1: \ | 266 | case 1: \ |
264 | __put_user_x(1, __pu_val, ptr, __ret_pu); \ | 267 | __put_user_x(1, __pu_val, ptr, __ret_pu); \ |
265 | break; \ | 268 | break; \ |
266 | case 2: \ | 269 | case 2: \ |
267 | __put_user_x(2, __pu_val, ptr, __ret_pu); \ | 270 | __put_user_x(2, __pu_val, ptr, __ret_pu); \ |
268 | break; \ | 271 | break; \ |
269 | case 4: \ | 272 | case 4: \ |
270 | __put_user_x(4, __pu_val, ptr, __ret_pu); \ | 273 | __put_user_x(4, __pu_val, ptr, __ret_pu); \ |
271 | break; \ | 274 | break; \ |
272 | case 8: \ | 275 | case 8: \ |
273 | __put_user_x8(__pu_val, ptr, __ret_pu); \ | 276 | __put_user_x8(__pu_val, ptr, __ret_pu); \ |
274 | break; \ | 277 | break; \ |
275 | default: \ | 278 | default: \ |
276 | __put_user_x(X, __pu_val, ptr, __ret_pu); \ | 279 | __put_user_x(X, __pu_val, ptr, __ret_pu); \ |
277 | break; \ | 280 | break; \ |
278 | } \ | 281 | } \ |
279 | __ret_pu; \ | 282 | __ret_pu; \ |
280 | }) | 283 | }) |
281 | 284 | ||
282 | #define __put_user_size(x, ptr, size, retval, errret) \ | 285 | #define __put_user_size(x, ptr, size, retval, errret) \ |
283 | do { \ | 286 | do { \ |
284 | retval = 0; \ | 287 | retval = 0; \ |
285 | __chk_user_ptr(ptr); \ | 288 | __chk_user_ptr(ptr); \ |
286 | switch (size) { \ | 289 | switch (size) { \ |
287 | case 1: \ | 290 | case 1: \ |
288 | __put_user_asm(x, ptr, retval, "b", "b", "iq", errret); \ | 291 | __put_user_asm(x, ptr, retval, "b", "b", "iq", errret); \ |
289 | break; \ | 292 | break; \ |
290 | case 2: \ | 293 | case 2: \ |
291 | __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ | 294 | __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ |
292 | break; \ | 295 | break; \ |
293 | case 4: \ | 296 | case 4: \ |
294 | __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ | 297 | __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ |
295 | break; \ | 298 | break; \ |
296 | case 8: \ | 299 | case 8: \ |
297 | __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ | 300 | __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ |
298 | errret); \ | 301 | errret); \ |
299 | break; \ | 302 | break; \ |
300 | default: \ | 303 | default: \ |
301 | __put_user_bad(); \ | 304 | __put_user_bad(); \ |
302 | } \ | 305 | } \ |
303 | } while (0) | 306 | } while (0) |
304 | 307 | ||
305 | #define __put_user_size_ex(x, ptr, size) \ | 308 | #define __put_user_size_ex(x, ptr, size) \ |
306 | do { \ | 309 | do { \ |
307 | __chk_user_ptr(ptr); \ | 310 | __chk_user_ptr(ptr); \ |
308 | switch (size) { \ | 311 | switch (size) { \ |
309 | case 1: \ | 312 | case 1: \ |
310 | __put_user_asm_ex(x, ptr, "b", "b", "iq"); \ | 313 | __put_user_asm_ex(x, ptr, "b", "b", "iq"); \ |
311 | break; \ | 314 | break; \ |
312 | case 2: \ | 315 | case 2: \ |
313 | __put_user_asm_ex(x, ptr, "w", "w", "ir"); \ | 316 | __put_user_asm_ex(x, ptr, "w", "w", "ir"); \ |
314 | break; \ | 317 | break; \ |
315 | case 4: \ | 318 | case 4: \ |
316 | __put_user_asm_ex(x, ptr, "l", "k", "ir"); \ | 319 | __put_user_asm_ex(x, ptr, "l", "k", "ir"); \ |
317 | break; \ | 320 | break; \ |
318 | case 8: \ | 321 | case 8: \ |
319 | __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \ | 322 | __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \ |
320 | break; \ | 323 | break; \ |
321 | default: \ | 324 | default: \ |
322 | __put_user_bad(); \ | 325 | __put_user_bad(); \ |
323 | } \ | 326 | } \ |
324 | } while (0) | 327 | } while (0) |
325 | 328 | ||
326 | #else | 329 | #else |
327 | 330 | ||
328 | #define __put_user_size(x, ptr, size, retval, errret) \ | 331 | #define __put_user_size(x, ptr, size, retval, errret) \ |
329 | do { \ | 332 | do { \ |
330 | __typeof__(*(ptr))__pus_tmp = x; \ | 333 | __typeof__(*(ptr))__pus_tmp = x; \ |
331 | retval = 0; \ | 334 | retval = 0; \ |
332 | \ | 335 | \ |
333 | if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \ | 336 | if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \ |
334 | retval = errret; \ | 337 | retval = errret; \ |
335 | } while (0) | 338 | } while (0) |
336 | 339 | ||
337 | #define put_user(x, ptr) \ | 340 | #define put_user(x, ptr) \ |
338 | ({ \ | 341 | ({ \ |
339 | int __ret_pu; \ | 342 | int __ret_pu; \ |
340 | __typeof__(*(ptr))__pus_tmp = x; \ | 343 | __typeof__(*(ptr))__pus_tmp = x; \ |
341 | __ret_pu = 0; \ | 344 | __ret_pu = 0; \ |
342 | if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \ | 345 | if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \ |
343 | sizeof(*(ptr))) != 0)) \ | 346 | sizeof(*(ptr))) != 0)) \ |
344 | __ret_pu = -EFAULT; \ | 347 | __ret_pu = -EFAULT; \ |
345 | __ret_pu; \ | 348 | __ret_pu; \ |
346 | }) | 349 | }) |
347 | #endif | 350 | #endif |
348 | 351 | ||
349 | #ifdef CONFIG_X86_32 | 352 | #ifdef CONFIG_X86_32 |
350 | #define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() | 353 | #define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() |
351 | #define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() | 354 | #define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() |
352 | #else | 355 | #else |
353 | #define __get_user_asm_u64(x, ptr, retval, errret) \ | 356 | #define __get_user_asm_u64(x, ptr, retval, errret) \ |
354 | __get_user_asm(x, ptr, retval, "q", "", "=r", errret) | 357 | __get_user_asm(x, ptr, retval, "q", "", "=r", errret) |
355 | #define __get_user_asm_ex_u64(x, ptr) \ | 358 | #define __get_user_asm_ex_u64(x, ptr) \ |
356 | __get_user_asm_ex(x, ptr, "q", "", "=r") | 359 | __get_user_asm_ex(x, ptr, "q", "", "=r") |
357 | #endif | 360 | #endif |
358 | 361 | ||
359 | #define __get_user_size(x, ptr, size, retval, errret) \ | 362 | #define __get_user_size(x, ptr, size, retval, errret) \ |
360 | do { \ | 363 | do { \ |
361 | retval = 0; \ | 364 | retval = 0; \ |
362 | __chk_user_ptr(ptr); \ | 365 | __chk_user_ptr(ptr); \ |
363 | switch (size) { \ | 366 | switch (size) { \ |
364 | case 1: \ | 367 | case 1: \ |
365 | __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \ | 368 | __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \ |
366 | break; \ | 369 | break; \ |
367 | case 2: \ | 370 | case 2: \ |
368 | __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \ | 371 | __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \ |
369 | break; \ | 372 | break; \ |
370 | case 4: \ | 373 | case 4: \ |
371 | __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \ | 374 | __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \ |
372 | break; \ | 375 | break; \ |
373 | case 8: \ | 376 | case 8: \ |
374 | __get_user_asm_u64(x, ptr, retval, errret); \ | 377 | __get_user_asm_u64(x, ptr, retval, errret); \ |
375 | break; \ | 378 | break; \ |
376 | default: \ | 379 | default: \ |
377 | (x) = __get_user_bad(); \ | 380 | (x) = __get_user_bad(); \ |
378 | } \ | 381 | } \ |
379 | } while (0) | 382 | } while (0) |
380 | 383 | ||
381 | #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ | 384 | #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ |
382 | asm volatile("1: mov"itype" %2,%"rtype"1\n" \ | 385 | asm volatile(ASM_STAC "\n" \ |
383 | "2:\n" \ | 386 | "1: mov"itype" %2,%"rtype"1\n" \ |
387 | "2: " ASM_CLAC "\n" \ | ||
384 | ".section .fixup,\"ax\"\n" \ | 388 | ".section .fixup,\"ax\"\n" \ |
385 | "3: mov %3,%0\n" \ | 389 | "3: mov %3,%0\n" \ |
386 | " xor"itype" %"rtype"1,%"rtype"1\n" \ | 390 | " xor"itype" %"rtype"1,%"rtype"1\n" \ |
387 | " jmp 2b\n" \ | 391 | " jmp 2b\n" \ |
388 | ".previous\n" \ | 392 | ".previous\n" \ |
389 | _ASM_EXTABLE(1b, 3b) \ | 393 | _ASM_EXTABLE(1b, 3b) \ |
390 | : "=r" (err), ltype(x) \ | 394 | : "=r" (err), ltype(x) \ |
391 | : "m" (__m(addr)), "i" (errret), "0" (err)) | 395 | : "m" (__m(addr)), "i" (errret), "0" (err)) |
392 | 396 | ||
393 | #define __get_user_size_ex(x, ptr, size) \ | 397 | #define __get_user_size_ex(x, ptr, size) \ |
394 | do { \ | 398 | do { \ |
395 | __chk_user_ptr(ptr); \ | 399 | __chk_user_ptr(ptr); \ |
396 | switch (size) { \ | 400 | switch (size) { \ |
397 | case 1: \ | 401 | case 1: \ |
398 | __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ | 402 | __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ |
399 | break; \ | 403 | break; \ |
400 | case 2: \ | 404 | case 2: \ |
401 | __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ | 405 | __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ |
402 | break; \ | 406 | break; \ |
403 | case 4: \ | 407 | case 4: \ |
404 | __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ | 408 | __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ |
405 | break; \ | 409 | break; \ |
406 | case 8: \ | 410 | case 8: \ |
407 | __get_user_asm_ex_u64(x, ptr); \ | 411 | __get_user_asm_ex_u64(x, ptr); \ |
408 | break; \ | 412 | break; \ |
409 | default: \ | 413 | default: \ |
410 | (x) = __get_user_bad(); \ | 414 | (x) = __get_user_bad(); \ |
411 | } \ | 415 | } \ |
412 | } while (0) | 416 | } while (0) |
413 | 417 | ||
414 | #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ | 418 | #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ |
415 | asm volatile("1: mov"itype" %1,%"rtype"0\n" \ | 419 | asm volatile(ASM_STAC "\n" \ |
416 | "2:\n" \ | 420 | "1: mov"itype" %1,%"rtype"0\n" \ |
421 | "2: " ASM_CLAC "\n" \ | ||
417 | _ASM_EXTABLE_EX(1b, 2b) \ | 422 | _ASM_EXTABLE_EX(1b, 2b) \ |
418 | : ltype(x) : "m" (__m(addr))) | 423 | : ltype(x) : "m" (__m(addr))) |
419 | 424 | ||
420 | #define __put_user_nocheck(x, ptr, size) \ | 425 | #define __put_user_nocheck(x, ptr, size) \ |
421 | ({ \ | 426 | ({ \ |
422 | int __pu_err; \ | 427 | int __pu_err; \ |
423 | __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ | 428 | __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ |
424 | __pu_err; \ | 429 | __pu_err; \ |
425 | }) | 430 | }) |
426 | 431 | ||
427 | #define __get_user_nocheck(x, ptr, size) \ | 432 | #define __get_user_nocheck(x, ptr, size) \ |
428 | ({ \ | 433 | ({ \ |
429 | int __gu_err; \ | 434 | int __gu_err; \ |
430 | unsigned long __gu_val; \ | 435 | unsigned long __gu_val; \ |
431 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ | 436 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ |
432 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ | 437 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ |
433 | __gu_err; \ | 438 | __gu_err; \ |
434 | }) | 439 | }) |
435 | 440 | ||
436 | /* FIXME: this hack is definitely wrong -AK */ | 441 | /* FIXME: this hack is definitely wrong -AK */ |
437 | struct __large_struct { unsigned long buf[100]; }; | 442 | struct __large_struct { unsigned long buf[100]; }; |
438 | #define __m(x) (*(struct __large_struct __user *)(x)) | 443 | #define __m(x) (*(struct __large_struct __user *)(x)) |
439 | 444 | ||
440 | /* | 445 | /* |
441 | * Tell gcc we read from memory instead of writing: this is because | 446 | * Tell gcc we read from memory instead of writing: this is because |
442 | * we do not write to any memory gcc knows about, so there are no | 447 | * we do not write to any memory gcc knows about, so there are no |
443 | * aliasing issues. | 448 | * aliasing issues. |
444 | */ | 449 | */ |
445 | #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ | 450 | #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ |
446 | asm volatile("1: mov"itype" %"rtype"1,%2\n" \ | 451 | asm volatile(ASM_STAC "\n" \ |
447 | "2:\n" \ | 452 | "1: mov"itype" %"rtype"1,%2\n" \ |
453 | "2: " ASM_CLAC "\n" \ | ||
448 | ".section .fixup,\"ax\"\n" \ | 454 | ".section .fixup,\"ax\"\n" \ |
449 | "3: mov %3,%0\n" \ | 455 | "3: mov %3,%0\n" \ |
450 | " jmp 2b\n" \ | 456 | " jmp 2b\n" \ |
451 | ".previous\n" \ | 457 | ".previous\n" \ |
452 | _ASM_EXTABLE(1b, 3b) \ | 458 | _ASM_EXTABLE(1b, 3b) \ |
453 | : "=r"(err) \ | 459 | : "=r"(err) \ |
454 | : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) | 460 | : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) |
455 | 461 | ||
456 | #define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ | 462 | #define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ |
457 | asm volatile("1: mov"itype" %"rtype"0,%1\n" \ | 463 | asm volatile(ASM_STAC "\n" \ |
458 | "2:\n" \ | 464 | "1: mov"itype" %"rtype"0,%1\n" \ |
465 | "2: " ASM_CLAC "\n" \ | ||
459 | _ASM_EXTABLE_EX(1b, 2b) \ | 466 | _ASM_EXTABLE_EX(1b, 2b) \ |
460 | : : ltype(x), "m" (__m(addr))) | 467 | : : ltype(x), "m" (__m(addr))) |
461 | 468 | ||
462 | /* | 469 | /* |
463 | * uaccess_try and catch | 470 | * uaccess_try and catch |
464 | */ | 471 | */ |
465 | #define uaccess_try do { \ | 472 | #define uaccess_try do { \ |
466 | int prev_err = current_thread_info()->uaccess_err; \ | 473 | int prev_err = current_thread_info()->uaccess_err; \ |
467 | current_thread_info()->uaccess_err = 0; \ | 474 | current_thread_info()->uaccess_err = 0; \ |
468 | barrier(); | 475 | barrier(); |
469 | 476 | ||
470 | #define uaccess_catch(err) \ | 477 | #define uaccess_catch(err) \ |
471 | (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ | 478 | (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ |
472 | current_thread_info()->uaccess_err = prev_err; \ | 479 | current_thread_info()->uaccess_err = prev_err; \ |
473 | } while (0) | 480 | } while (0) |
474 | 481 | ||
475 | /** | 482 | /** |
476 | * __get_user: - Get a simple variable from user space, with less checking. | 483 | * __get_user: - Get a simple variable from user space, with less checking. |
477 | * @x: Variable to store result. | 484 | * @x: Variable to store result. |
478 | * @ptr: Source address, in user space. | 485 | * @ptr: Source address, in user space. |
479 | * | 486 | * |
480 | * Context: User context only. This function may sleep. | 487 | * Context: User context only. This function may sleep. |
481 | * | 488 | * |
482 | * This macro copies a single simple variable from user space to kernel | 489 | * This macro copies a single simple variable from user space to kernel |
483 | * space. It supports simple types like char and int, but not larger | 490 | * space. It supports simple types like char and int, but not larger |
484 | * data types like structures or arrays. | 491 | * data types like structures or arrays. |
485 | * | 492 | * |
486 | * @ptr must have pointer-to-simple-variable type, and the result of | 493 | * @ptr must have pointer-to-simple-variable type, and the result of |
487 | * dereferencing @ptr must be assignable to @x without a cast. | 494 | * dereferencing @ptr must be assignable to @x without a cast. |
488 | * | 495 | * |
489 | * Caller must check the pointer with access_ok() before calling this | 496 | * Caller must check the pointer with access_ok() before calling this |
490 | * function. | 497 | * function. |
491 | * | 498 | * |
492 | * Returns zero on success, or -EFAULT on error. | 499 | * Returns zero on success, or -EFAULT on error. |
493 | * On error, the variable @x is set to zero. | 500 | * On error, the variable @x is set to zero. |
494 | */ | 501 | */ |
495 | 502 | ||
496 | #define __get_user(x, ptr) \ | 503 | #define __get_user(x, ptr) \ |
497 | __get_user_nocheck((x), (ptr), sizeof(*(ptr))) | 504 | __get_user_nocheck((x), (ptr), sizeof(*(ptr))) |
498 | 505 | ||
499 | /** | 506 | /** |
500 | * __put_user: - Write a simple value into user space, with less checking. | 507 | * __put_user: - Write a simple value into user space, with less checking. |
501 | * @x: Value to copy to user space. | 508 | * @x: Value to copy to user space. |
502 | * @ptr: Destination address, in user space. | 509 | * @ptr: Destination address, in user space. |
503 | * | 510 | * |
504 | * Context: User context only. This function may sleep. | 511 | * Context: User context only. This function may sleep. |
505 | * | 512 | * |
506 | * This macro copies a single simple value from kernel space to user | 513 | * This macro copies a single simple value from kernel space to user |
507 | * space. It supports simple types like char and int, but not larger | 514 | * space. It supports simple types like char and int, but not larger |
508 | * data types like structures or arrays. | 515 | * data types like structures or arrays. |
509 | * | 516 | * |
510 | * @ptr must have pointer-to-simple-variable type, and @x must be assignable | 517 | * @ptr must have pointer-to-simple-variable type, and @x must be assignable |
511 | * to the result of dereferencing @ptr. | 518 | * to the result of dereferencing @ptr. |
512 | * | 519 | * |
513 | * Caller must check the pointer with access_ok() before calling this | 520 | * Caller must check the pointer with access_ok() before calling this |
514 | * function. | 521 | * function. |
515 | * | 522 | * |
516 | * Returns zero on success, or -EFAULT on error. | 523 | * Returns zero on success, or -EFAULT on error. |
517 | */ | 524 | */ |
518 | 525 | ||
519 | #define __put_user(x, ptr) \ | 526 | #define __put_user(x, ptr) \ |
520 | __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) | 527 | __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) |
521 | 528 | ||
522 | #define __get_user_unaligned __get_user | 529 | #define __get_user_unaligned __get_user |
523 | #define __put_user_unaligned __put_user | 530 | #define __put_user_unaligned __put_user |
524 | 531 | ||
525 | /* | 532 | /* |
526 | * {get|put}_user_try and catch | 533 | * {get|put}_user_try and catch |
527 | * | 534 | * |
528 | * get_user_try { | 535 | * get_user_try { |
529 | * get_user_ex(...); | 536 | * get_user_ex(...); |
530 | * } get_user_catch(err) | 537 | * } get_user_catch(err) |
531 | */ | 538 | */ |
532 | #define get_user_try uaccess_try | 539 | #define get_user_try uaccess_try |
533 | #define get_user_catch(err) uaccess_catch(err) | 540 | #define get_user_catch(err) uaccess_catch(err) |
534 | 541 | ||
535 | #define get_user_ex(x, ptr) do { \ | 542 | #define get_user_ex(x, ptr) do { \ |
536 | unsigned long __gue_val; \ | 543 | unsigned long __gue_val; \ |
537 | __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ | 544 | __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ |
538 | (x) = (__force __typeof__(*(ptr)))__gue_val; \ | 545 | (x) = (__force __typeof__(*(ptr)))__gue_val; \ |
539 | } while (0) | 546 | } while (0) |
540 | 547 | ||
541 | #ifdef CONFIG_X86_WP_WORKS_OK | 548 | #ifdef CONFIG_X86_WP_WORKS_OK |
542 | 549 | ||
543 | #define put_user_try uaccess_try | 550 | #define put_user_try uaccess_try |
544 | #define put_user_catch(err) uaccess_catch(err) | 551 | #define put_user_catch(err) uaccess_catch(err) |
545 | 552 | ||
546 | #define put_user_ex(x, ptr) \ | 553 | #define put_user_ex(x, ptr) \ |
547 | __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) | 554 | __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) |
548 | 555 | ||
549 | #else /* !CONFIG_X86_WP_WORKS_OK */ | 556 | #else /* !CONFIG_X86_WP_WORKS_OK */ |
550 | 557 | ||
551 | #define put_user_try do { \ | 558 | #define put_user_try do { \ |
552 | int __uaccess_err = 0; | 559 | int __uaccess_err = 0; |
553 | 560 | ||
554 | #define put_user_catch(err) \ | 561 | #define put_user_catch(err) \ |
555 | (err) |= __uaccess_err; \ | 562 | (err) |= __uaccess_err; \ |
556 | } while (0) | 563 | } while (0) |
557 | 564 | ||
558 | #define put_user_ex(x, ptr) do { \ | 565 | #define put_user_ex(x, ptr) do { \ |
559 | __uaccess_err |= __put_user(x, ptr); \ | 566 | __uaccess_err |= __put_user(x, ptr); \ |
560 | } while (0) | 567 | } while (0) |
561 | 568 | ||
562 | #endif /* CONFIG_X86_WP_WORKS_OK */ | 569 | #endif /* CONFIG_X86_WP_WORKS_OK */ |
563 | 570 | ||
564 | extern unsigned long | 571 | extern unsigned long |
565 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n); | 572 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n); |
566 | extern __must_check long | 573 | extern __must_check long |
567 | strncpy_from_user(char *dst, const char __user *src, long count); | 574 | strncpy_from_user(char *dst, const char __user *src, long count); |
568 | 575 | ||
569 | extern __must_check long strlen_user(const char __user *str); | 576 | extern __must_check long strlen_user(const char __user *str); |
570 | extern __must_check long strnlen_user(const char __user *str, long n); | 577 | extern __must_check long strnlen_user(const char __user *str, long n); |
571 | 578 | ||
572 | unsigned long __must_check clear_user(void __user *mem, unsigned long len); | 579 | unsigned long __must_check clear_user(void __user *mem, unsigned long len); |
573 | unsigned long __must_check __clear_user(void __user *mem, unsigned long len); | 580 | unsigned long __must_check __clear_user(void __user *mem, unsigned long len); |
574 | 581 | ||
575 | /* | 582 | /* |
576 | * movsl can be slow when source and dest are not both 8-byte aligned | 583 | * movsl can be slow when source and dest are not both 8-byte aligned |
577 | */ | 584 | */ |
578 | #ifdef CONFIG_X86_INTEL_USERCOPY | 585 | #ifdef CONFIG_X86_INTEL_USERCOPY |
579 | extern struct movsl_mask { | 586 | extern struct movsl_mask { |
580 | int mask; | 587 | int mask; |
581 | } ____cacheline_aligned_in_smp movsl_mask; | 588 | } ____cacheline_aligned_in_smp movsl_mask; |
582 | #endif | 589 | #endif |
583 | 590 | ||
584 | #define ARCH_HAS_NOCACHE_UACCESS 1 | 591 | #define ARCH_HAS_NOCACHE_UACCESS 1 |
585 | 592 | ||
586 | #ifdef CONFIG_X86_32 | 593 | #ifdef CONFIG_X86_32 |
587 | # include "uaccess_32.h" | 594 | # include "uaccess_32.h" |
588 | #else | 595 | #else |
589 | # include "uaccess_64.h" | 596 | # include "uaccess_64.h" |
590 | #endif | 597 | #endif |
591 | 598 | ||
592 | #endif /* _ASM_X86_UACCESS_H */ | 599 | #endif /* _ASM_X86_UACCESS_H */ |
593 | 600 | ||
594 | 601 |
arch/x86/include/asm/xsave.h
1 | #ifndef __ASM_X86_XSAVE_H | 1 | #ifndef __ASM_X86_XSAVE_H |
2 | #define __ASM_X86_XSAVE_H | 2 | #define __ASM_X86_XSAVE_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <asm/processor.h> | 5 | #include <asm/processor.h> |
6 | 6 | ||
7 | #define XSTATE_CPUID 0x0000000d | 7 | #define XSTATE_CPUID 0x0000000d |
8 | 8 | ||
9 | #define XSTATE_FP 0x1 | 9 | #define XSTATE_FP 0x1 |
10 | #define XSTATE_SSE 0x2 | 10 | #define XSTATE_SSE 0x2 |
11 | #define XSTATE_YMM 0x4 | 11 | #define XSTATE_YMM 0x4 |
12 | 12 | ||
13 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) | 13 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) |
14 | 14 | ||
15 | #define FXSAVE_SIZE 512 | 15 | #define FXSAVE_SIZE 512 |
16 | 16 | ||
17 | #define XSAVE_HDR_SIZE 64 | 17 | #define XSAVE_HDR_SIZE 64 |
18 | #define XSAVE_HDR_OFFSET FXSAVE_SIZE | 18 | #define XSAVE_HDR_OFFSET FXSAVE_SIZE |
19 | 19 | ||
20 | #define XSAVE_YMM_SIZE 256 | 20 | #define XSAVE_YMM_SIZE 256 |
21 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | 21 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * These are the features that the OS can handle currently. | 24 | * These are the features that the OS can handle currently. |
25 | */ | 25 | */ |
26 | #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) | 26 | #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) |
27 | 27 | ||
28 | #ifdef CONFIG_X86_64 | 28 | #ifdef CONFIG_X86_64 |
29 | #define REX_PREFIX "0x48, " | 29 | #define REX_PREFIX "0x48, " |
30 | #else | 30 | #else |
31 | #define REX_PREFIX | 31 | #define REX_PREFIX |
32 | #endif | 32 | #endif |
33 | 33 | ||
34 | extern unsigned int xstate_size; | 34 | extern unsigned int xstate_size; |
35 | extern u64 pcntxt_mask; | 35 | extern u64 pcntxt_mask; |
36 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | 36 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; |
37 | 37 | ||
38 | extern void xsave_init(void); | 38 | extern void xsave_init(void); |
39 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); | 39 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); |
40 | extern int init_fpu(struct task_struct *child); | 40 | extern int init_fpu(struct task_struct *child); |
41 | extern int check_for_xstate(struct i387_fxsave_struct __user *buf, | 41 | extern int check_for_xstate(struct i387_fxsave_struct __user *buf, |
42 | void __user *fpstate, | 42 | void __user *fpstate, |
43 | struct _fpx_sw_bytes *sw); | 43 | struct _fpx_sw_bytes *sw); |
44 | 44 | ||
45 | static inline int fpu_xrstor_checking(struct fpu *fpu) | 45 | static inline int fpu_xrstor_checking(struct fpu *fpu) |
46 | { | 46 | { |
47 | struct xsave_struct *fx = &fpu->state->xsave; | 47 | struct xsave_struct *fx = &fpu->state->xsave; |
48 | int err; | 48 | int err; |
49 | 49 | ||
50 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" | 50 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" |
51 | "2:\n" | 51 | "2:\n" |
52 | ".section .fixup,\"ax\"\n" | 52 | ".section .fixup,\"ax\"\n" |
53 | "3: movl $-1,%[err]\n" | 53 | "3: movl $-1,%[err]\n" |
54 | " jmp 2b\n" | 54 | " jmp 2b\n" |
55 | ".previous\n" | 55 | ".previous\n" |
56 | _ASM_EXTABLE(1b, 3b) | 56 | _ASM_EXTABLE(1b, 3b) |
57 | : [err] "=r" (err) | 57 | : [err] "=r" (err) |
58 | : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) | 58 | : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) |
59 | : "memory"); | 59 | : "memory"); |
60 | 60 | ||
61 | return err; | 61 | return err; |
62 | } | 62 | } |
63 | 63 | ||
64 | static inline int xsave_user(struct xsave_struct __user *buf) | 64 | static inline int xsave_user(struct xsave_struct __user *buf) |
65 | { | 65 | { |
66 | int err; | 66 | int err; |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Clear the xsave header first, so that reserved fields are | 69 | * Clear the xsave header first, so that reserved fields are |
70 | * initialized to zero. | 70 | * initialized to zero. |
71 | */ | 71 | */ |
72 | err = __clear_user(&buf->xsave_hdr, | 72 | err = __clear_user(&buf->xsave_hdr, |
73 | sizeof(struct xsave_hdr_struct)); | 73 | sizeof(struct xsave_hdr_struct)); |
74 | if (unlikely(err)) | 74 | if (unlikely(err)) |
75 | return -EFAULT; | 75 | return -EFAULT; |
76 | 76 | ||
77 | __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" | 77 | __asm__ __volatile__(ASM_STAC "\n" |
78 | "2:\n" | 78 | "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" |
79 | "2: " ASM_CLAC "\n" | ||
79 | ".section .fixup,\"ax\"\n" | 80 | ".section .fixup,\"ax\"\n" |
80 | "3: movl $-1,%[err]\n" | 81 | "3: movl $-1,%[err]\n" |
81 | " jmp 2b\n" | 82 | " jmp 2b\n" |
82 | ".previous\n" | 83 | ".previous\n" |
83 | _ASM_EXTABLE(1b,3b) | 84 | _ASM_EXTABLE(1b,3b) |
84 | : [err] "=r" (err) | 85 | : [err] "=r" (err) |
85 | : "D" (buf), "a" (-1), "d" (-1), "0" (0) | 86 | : "D" (buf), "a" (-1), "d" (-1), "0" (0) |
86 | : "memory"); | 87 | : "memory"); |
87 | if (unlikely(err) && __clear_user(buf, xstate_size)) | 88 | if (unlikely(err) && __clear_user(buf, xstate_size)) |
88 | err = -EFAULT; | 89 | err = -EFAULT; |
89 | /* No need to clear here because the caller clears USED_MATH */ | 90 | /* No need to clear here because the caller clears USED_MATH */ |
90 | return err; | 91 | return err; |
91 | } | 92 | } |
92 | 93 | ||
93 | static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) | 94 | static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) |
94 | { | 95 | { |
95 | int err; | 96 | int err; |
96 | struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); | 97 | struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); |
97 | u32 lmask = mask; | 98 | u32 lmask = mask; |
98 | u32 hmask = mask >> 32; | 99 | u32 hmask = mask >> 32; |
99 | 100 | ||
100 | __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" | 101 | __asm__ __volatile__(ASM_STAC "\n" |
101 | "2:\n" | 102 | "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" |
103 | "2: " ASM_CLAC "\n" | ||
102 | ".section .fixup,\"ax\"\n" | 104 | ".section .fixup,\"ax\"\n" |
103 | "3: movl $-1,%[err]\n" | 105 | "3: movl $-1,%[err]\n" |
104 | " jmp 2b\n" | 106 | " jmp 2b\n" |
105 | ".previous\n" | 107 | ".previous\n" |
106 | _ASM_EXTABLE(1b,3b) | 108 | _ASM_EXTABLE(1b,3b) |
107 | : [err] "=r" (err) | 109 | : [err] "=r" (err) |
108 | : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) | 110 | : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) |
109 | : "memory"); /* memory required? */ | 111 | : "memory"); /* memory required? */ |
110 | return err; | 112 | return err; |
111 | } | 113 | } |
112 | 114 | ||
113 | static inline void xrstor_state(struct xsave_struct *fx, u64 mask) | 115 | static inline void xrstor_state(struct xsave_struct *fx, u64 mask) |
114 | { | 116 | { |
115 | u32 lmask = mask; | 117 | u32 lmask = mask; |
116 | u32 hmask = mask >> 32; | 118 | u32 hmask = mask >> 32; |
117 | 119 | ||
118 | asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" | 120 | asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" |
119 | : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) | 121 | : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) |
120 | : "memory"); | 122 | : "memory"); |
121 | } | 123 | } |
122 | 124 | ||
123 | static inline void xsave_state(struct xsave_struct *fx, u64 mask) | 125 | static inline void xsave_state(struct xsave_struct *fx, u64 mask) |
124 | { | 126 | { |
125 | u32 lmask = mask; | 127 | u32 lmask = mask; |
126 | u32 hmask = mask >> 32; | 128 | u32 hmask = mask >> 32; |
127 | 129 | ||
128 | asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" | 130 | asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" |
129 | : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) | 131 | : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) |
130 | : "memory"); | 132 | : "memory"); |
131 | } | 133 | } |
132 | 134 | ||
133 | static inline void fpu_xsave(struct fpu *fpu) | 135 | static inline void fpu_xsave(struct fpu *fpu) |
134 | { | 136 | { |
135 | /* This, however, we can work around by forcing the compiler to select | 137 | /* This, however, we can work around by forcing the compiler to select |
136 | an addressing mode that doesn't require extended registers. */ | 138 | an addressing mode that doesn't require extended registers. */ |
137 | alternative_input( | 139 | alternative_input( |
138 | ".byte " REX_PREFIX "0x0f,0xae,0x27", | 140 | ".byte " REX_PREFIX "0x0f,0xae,0x27", |
139 | ".byte " REX_PREFIX "0x0f,0xae,0x37", | 141 | ".byte " REX_PREFIX "0x0f,0xae,0x37", |
140 | X86_FEATURE_XSAVEOPT, | 142 | X86_FEATURE_XSAVEOPT, |
141 | [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) : | 143 | [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) : |
142 | "memory"); | 144 | "memory"); |
143 | } | 145 | } |
144 | #endif | 146 | #endif |
145 | 147 |
arch/x86/kernel/cpu/common.c
1 | #include <linux/bootmem.h> | 1 | #include <linux/bootmem.h> |
2 | #include <linux/linkage.h> | 2 | #include <linux/linkage.h> |
3 | #include <linux/bitops.h> | 3 | #include <linux/bitops.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/percpu.h> | 6 | #include <linux/percpu.h> |
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/delay.h> | 8 | #include <linux/delay.h> |
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/kgdb.h> | 11 | #include <linux/kgdb.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
14 | 14 | ||
15 | #include <asm/stackprotector.h> | 15 | #include <asm/stackprotector.h> |
16 | #include <asm/perf_event.h> | 16 | #include <asm/perf_event.h> |
17 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
18 | #include <asm/archrandom.h> | 18 | #include <asm/archrandom.h> |
19 | #include <asm/hypervisor.h> | 19 | #include <asm/hypervisor.h> |
20 | #include <asm/processor.h> | 20 | #include <asm/processor.h> |
21 | #include <asm/debugreg.h> | 21 | #include <asm/debugreg.h> |
22 | #include <asm/sections.h> | 22 | #include <asm/sections.h> |
23 | #include <linux/topology.h> | 23 | #include <linux/topology.h> |
24 | #include <linux/cpumask.h> | 24 | #include <linux/cpumask.h> |
25 | #include <asm/pgtable.h> | 25 | #include <asm/pgtable.h> |
26 | #include <linux/atomic.h> | 26 | #include <linux/atomic.h> |
27 | #include <asm/proto.h> | 27 | #include <asm/proto.h> |
28 | #include <asm/setup.h> | 28 | #include <asm/setup.h> |
29 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
30 | #include <asm/desc.h> | 30 | #include <asm/desc.h> |
31 | #include <asm/i387.h> | 31 | #include <asm/i387.h> |
32 | #include <asm/fpu-internal.h> | 32 | #include <asm/fpu-internal.h> |
33 | #include <asm/mtrr.h> | 33 | #include <asm/mtrr.h> |
34 | #include <linux/numa.h> | 34 | #include <linux/numa.h> |
35 | #include <asm/asm.h> | 35 | #include <asm/asm.h> |
36 | #include <asm/cpu.h> | 36 | #include <asm/cpu.h> |
37 | #include <asm/mce.h> | 37 | #include <asm/mce.h> |
38 | #include <asm/msr.h> | 38 | #include <asm/msr.h> |
39 | #include <asm/pat.h> | 39 | #include <asm/pat.h> |
40 | 40 | ||
41 | #ifdef CONFIG_X86_LOCAL_APIC | 41 | #ifdef CONFIG_X86_LOCAL_APIC |
42 | #include <asm/uv/uv.h> | 42 | #include <asm/uv/uv.h> |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | #include "cpu.h" | 45 | #include "cpu.h" |
46 | 46 | ||
47 | /* all of these masks are initialized in setup_cpu_local_masks() */ | 47 | /* all of these masks are initialized in setup_cpu_local_masks() */ |
48 | cpumask_var_t cpu_initialized_mask; | 48 | cpumask_var_t cpu_initialized_mask; |
49 | cpumask_var_t cpu_callout_mask; | 49 | cpumask_var_t cpu_callout_mask; |
50 | cpumask_var_t cpu_callin_mask; | 50 | cpumask_var_t cpu_callin_mask; |
51 | 51 | ||
52 | /* representing cpus for which sibling maps can be computed */ | 52 | /* representing cpus for which sibling maps can be computed */ |
53 | cpumask_var_t cpu_sibling_setup_mask; | 53 | cpumask_var_t cpu_sibling_setup_mask; |
54 | 54 | ||
55 | /* correctly size the local cpu masks */ | 55 | /* correctly size the local cpu masks */ |
56 | void __init setup_cpu_local_masks(void) | 56 | void __init setup_cpu_local_masks(void) |
57 | { | 57 | { |
58 | alloc_bootmem_cpumask_var(&cpu_initialized_mask); | 58 | alloc_bootmem_cpumask_var(&cpu_initialized_mask); |
59 | alloc_bootmem_cpumask_var(&cpu_callin_mask); | 59 | alloc_bootmem_cpumask_var(&cpu_callin_mask); |
60 | alloc_bootmem_cpumask_var(&cpu_callout_mask); | 60 | alloc_bootmem_cpumask_var(&cpu_callout_mask); |
61 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); | 61 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); |
62 | } | 62 | } |
63 | 63 | ||
64 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | 64 | static void __cpuinit default_init(struct cpuinfo_x86 *c) |
65 | { | 65 | { |
66 | #ifdef CONFIG_X86_64 | 66 | #ifdef CONFIG_X86_64 |
67 | cpu_detect_cache_sizes(c); | 67 | cpu_detect_cache_sizes(c); |
68 | #else | 68 | #else |
69 | /* Not much we can do here... */ | 69 | /* Not much we can do here... */ |
70 | /* Check if at least it has cpuid */ | 70 | /* Check if at least it has cpuid */ |
71 | if (c->cpuid_level == -1) { | 71 | if (c->cpuid_level == -1) { |
72 | /* No cpuid. It must be an ancient CPU */ | 72 | /* No cpuid. It must be an ancient CPU */ |
73 | if (c->x86 == 4) | 73 | if (c->x86 == 4) |
74 | strcpy(c->x86_model_id, "486"); | 74 | strcpy(c->x86_model_id, "486"); |
75 | else if (c->x86 == 3) | 75 | else if (c->x86 == 3) |
76 | strcpy(c->x86_model_id, "386"); | 76 | strcpy(c->x86_model_id, "386"); |
77 | } | 77 | } |
78 | #endif | 78 | #endif |
79 | } | 79 | } |
80 | 80 | ||
81 | static const struct cpu_dev __cpuinitconst default_cpu = { | 81 | static const struct cpu_dev __cpuinitconst default_cpu = { |
82 | .c_init = default_init, | 82 | .c_init = default_init, |
83 | .c_vendor = "Unknown", | 83 | .c_vendor = "Unknown", |
84 | .c_x86_vendor = X86_VENDOR_UNKNOWN, | 84 | .c_x86_vendor = X86_VENDOR_UNKNOWN, |
85 | }; | 85 | }; |
86 | 86 | ||
87 | static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; | 87 | static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; |
88 | 88 | ||
89 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | 89 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { |
90 | #ifdef CONFIG_X86_64 | 90 | #ifdef CONFIG_X86_64 |
91 | /* | 91 | /* |
92 | * We need valid kernel segments for data and code in long mode too | 92 | * We need valid kernel segments for data and code in long mode too |
93 | * IRET will check the segment types kkeil 2000/10/28 | 93 | * IRET will check the segment types kkeil 2000/10/28 |
94 | * Also sysret mandates a special GDT layout | 94 | * Also sysret mandates a special GDT layout |
95 | * | 95 | * |
96 | * TLS descriptors are currently at a different place compared to i386. | 96 | * TLS descriptors are currently at a different place compared to i386. |
97 | * Hopefully nobody expects them at a fixed place (Wine?) | 97 | * Hopefully nobody expects them at a fixed place (Wine?) |
98 | */ | 98 | */ |
99 | [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), | 99 | [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), |
100 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), | 100 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), |
101 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), | 101 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), |
102 | [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), | 102 | [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), |
103 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), | 103 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), |
104 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), | 104 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), |
105 | #else | 105 | #else |
106 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), | 106 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), |
107 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), | 107 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
108 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), | 108 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), |
109 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), | 109 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), |
110 | /* | 110 | /* |
111 | * Segments used for calling PnP BIOS have byte granularity. | 111 | * Segments used for calling PnP BIOS have byte granularity. |
112 | * They code segments and data segments have fixed 64k limits, | 112 | * They code segments and data segments have fixed 64k limits, |
113 | * the transfer segment sizes are set at run time. | 113 | * the transfer segment sizes are set at run time. |
114 | */ | 114 | */ |
115 | /* 32-bit code */ | 115 | /* 32-bit code */ |
116 | [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), | 116 | [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), |
117 | /* 16-bit code */ | 117 | /* 16-bit code */ |
118 | [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), | 118 | [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), |
119 | /* 16-bit data */ | 119 | /* 16-bit data */ |
120 | [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), | 120 | [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), |
121 | /* 16-bit data */ | 121 | /* 16-bit data */ |
122 | [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), | 122 | [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), |
123 | /* 16-bit data */ | 123 | /* 16-bit data */ |
124 | [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), | 124 | [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), |
125 | /* | 125 | /* |
126 | * The APM segments have byte granularity and their bases | 126 | * The APM segments have byte granularity and their bases |
127 | * are set at run time. All have 64k limits. | 127 | * are set at run time. All have 64k limits. |
128 | */ | 128 | */ |
129 | /* 32-bit code */ | 129 | /* 32-bit code */ |
130 | [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), | 130 | [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), |
131 | /* 16-bit code */ | 131 | /* 16-bit code */ |
132 | [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), | 132 | [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), |
133 | /* data */ | 133 | /* data */ |
134 | [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), | 134 | [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), |
135 | 135 | ||
136 | [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), | 136 | [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
137 | [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), | 137 | [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
138 | GDT_STACK_CANARY_INIT | 138 | GDT_STACK_CANARY_INIT |
139 | #endif | 139 | #endif |
140 | } }; | 140 | } }; |
141 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | 141 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
142 | 142 | ||
143 | static int __init x86_xsave_setup(char *s) | 143 | static int __init x86_xsave_setup(char *s) |
144 | { | 144 | { |
145 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | 145 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
146 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | 146 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); |
147 | setup_clear_cpu_cap(X86_FEATURE_AVX); | 147 | setup_clear_cpu_cap(X86_FEATURE_AVX); |
148 | setup_clear_cpu_cap(X86_FEATURE_AVX2); | 148 | setup_clear_cpu_cap(X86_FEATURE_AVX2); |
149 | return 1; | 149 | return 1; |
150 | } | 150 | } |
151 | __setup("noxsave", x86_xsave_setup); | 151 | __setup("noxsave", x86_xsave_setup); |
152 | 152 | ||
153 | static int __init x86_xsaveopt_setup(char *s) | 153 | static int __init x86_xsaveopt_setup(char *s) |
154 | { | 154 | { |
155 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | 155 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); |
156 | return 1; | 156 | return 1; |
157 | } | 157 | } |
158 | __setup("noxsaveopt", x86_xsaveopt_setup); | 158 | __setup("noxsaveopt", x86_xsaveopt_setup); |
159 | 159 | ||
160 | #ifdef CONFIG_X86_32 | 160 | #ifdef CONFIG_X86_32 |
161 | static int cachesize_override __cpuinitdata = -1; | 161 | static int cachesize_override __cpuinitdata = -1; |
162 | static int disable_x86_serial_nr __cpuinitdata = 1; | 162 | static int disable_x86_serial_nr __cpuinitdata = 1; |
163 | 163 | ||
164 | static int __init cachesize_setup(char *str) | 164 | static int __init cachesize_setup(char *str) |
165 | { | 165 | { |
166 | get_option(&str, &cachesize_override); | 166 | get_option(&str, &cachesize_override); |
167 | return 1; | 167 | return 1; |
168 | } | 168 | } |
169 | __setup("cachesize=", cachesize_setup); | 169 | __setup("cachesize=", cachesize_setup); |
170 | 170 | ||
171 | static int __init x86_fxsr_setup(char *s) | 171 | static int __init x86_fxsr_setup(char *s) |
172 | { | 172 | { |
173 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | 173 | setup_clear_cpu_cap(X86_FEATURE_FXSR); |
174 | setup_clear_cpu_cap(X86_FEATURE_XMM); | 174 | setup_clear_cpu_cap(X86_FEATURE_XMM); |
175 | return 1; | 175 | return 1; |
176 | } | 176 | } |
177 | __setup("nofxsr", x86_fxsr_setup); | 177 | __setup("nofxsr", x86_fxsr_setup); |
178 | 178 | ||
179 | static int __init x86_sep_setup(char *s) | 179 | static int __init x86_sep_setup(char *s) |
180 | { | 180 | { |
181 | setup_clear_cpu_cap(X86_FEATURE_SEP); | 181 | setup_clear_cpu_cap(X86_FEATURE_SEP); |
182 | return 1; | 182 | return 1; |
183 | } | 183 | } |
184 | __setup("nosep", x86_sep_setup); | 184 | __setup("nosep", x86_sep_setup); |
185 | 185 | ||
186 | /* Standard macro to see if a specific flag is changeable */ | 186 | /* Standard macro to see if a specific flag is changeable */ |
187 | static inline int flag_is_changeable_p(u32 flag) | 187 | static inline int flag_is_changeable_p(u32 flag) |
188 | { | 188 | { |
189 | u32 f1, f2; | 189 | u32 f1, f2; |
190 | 190 | ||
191 | /* | 191 | /* |
192 | * Cyrix and IDT cpus allow disabling of CPUID | 192 | * Cyrix and IDT cpus allow disabling of CPUID |
193 | * so the code below may return different results | 193 | * so the code below may return different results |
194 | * when it is executed before and after enabling | 194 | * when it is executed before and after enabling |
195 | * the CPUID. Add "volatile" to not allow gcc to | 195 | * the CPUID. Add "volatile" to not allow gcc to |
196 | * optimize the subsequent calls to this function. | 196 | * optimize the subsequent calls to this function. |
197 | */ | 197 | */ |
198 | asm volatile ("pushfl \n\t" | 198 | asm volatile ("pushfl \n\t" |
199 | "pushfl \n\t" | 199 | "pushfl \n\t" |
200 | "popl %0 \n\t" | 200 | "popl %0 \n\t" |
201 | "movl %0, %1 \n\t" | 201 | "movl %0, %1 \n\t" |
202 | "xorl %2, %0 \n\t" | 202 | "xorl %2, %0 \n\t" |
203 | "pushl %0 \n\t" | 203 | "pushl %0 \n\t" |
204 | "popfl \n\t" | 204 | "popfl \n\t" |
205 | "pushfl \n\t" | 205 | "pushfl \n\t" |
206 | "popl %0 \n\t" | 206 | "popl %0 \n\t" |
207 | "popfl \n\t" | 207 | "popfl \n\t" |
208 | 208 | ||
209 | : "=&r" (f1), "=&r" (f2) | 209 | : "=&r" (f1), "=&r" (f2) |
210 | : "ir" (flag)); | 210 | : "ir" (flag)); |
211 | 211 | ||
212 | return ((f1^f2) & flag) != 0; | 212 | return ((f1^f2) & flag) != 0; |
213 | } | 213 | } |
214 | 214 | ||
215 | /* Probe for the CPUID instruction */ | 215 | /* Probe for the CPUID instruction */ |
216 | static int __cpuinit have_cpuid_p(void) | 216 | static int __cpuinit have_cpuid_p(void) |
217 | { | 217 | { |
218 | return flag_is_changeable_p(X86_EFLAGS_ID); | 218 | return flag_is_changeable_p(X86_EFLAGS_ID); |
219 | } | 219 | } |
220 | 220 | ||
221 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | 221 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) |
222 | { | 222 | { |
223 | unsigned long lo, hi; | 223 | unsigned long lo, hi; |
224 | 224 | ||
225 | if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) | 225 | if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) |
226 | return; | 226 | return; |
227 | 227 | ||
228 | /* Disable processor serial number: */ | 228 | /* Disable processor serial number: */ |
229 | 229 | ||
230 | rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); | 230 | rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); |
231 | lo |= 0x200000; | 231 | lo |= 0x200000; |
232 | wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); | 232 | wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); |
233 | 233 | ||
234 | printk(KERN_NOTICE "CPU serial number disabled.\n"); | 234 | printk(KERN_NOTICE "CPU serial number disabled.\n"); |
235 | clear_cpu_cap(c, X86_FEATURE_PN); | 235 | clear_cpu_cap(c, X86_FEATURE_PN); |
236 | 236 | ||
237 | /* Disabling the serial number may affect the cpuid level */ | 237 | /* Disabling the serial number may affect the cpuid level */ |
238 | c->cpuid_level = cpuid_eax(0); | 238 | c->cpuid_level = cpuid_eax(0); |
239 | } | 239 | } |
240 | 240 | ||
241 | static int __init x86_serial_nr_setup(char *s) | 241 | static int __init x86_serial_nr_setup(char *s) |
242 | { | 242 | { |
243 | disable_x86_serial_nr = 0; | 243 | disable_x86_serial_nr = 0; |
244 | return 1; | 244 | return 1; |
245 | } | 245 | } |
246 | __setup("serialnumber", x86_serial_nr_setup); | 246 | __setup("serialnumber", x86_serial_nr_setup); |
247 | #else | 247 | #else |
248 | static inline int flag_is_changeable_p(u32 flag) | 248 | static inline int flag_is_changeable_p(u32 flag) |
249 | { | 249 | { |
250 | return 1; | 250 | return 1; |
251 | } | 251 | } |
252 | /* Probe for the CPUID instruction */ | 252 | /* Probe for the CPUID instruction */ |
253 | static inline int have_cpuid_p(void) | 253 | static inline int have_cpuid_p(void) |
254 | { | 254 | { |
255 | return 1; | 255 | return 1; |
256 | } | 256 | } |
257 | static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | 257 | static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) |
258 | { | 258 | { |
259 | } | 259 | } |
260 | #endif | 260 | #endif |
261 | 261 | ||
262 | static int disable_smep __cpuinitdata; | 262 | static int disable_smep __cpuinitdata; |
263 | static __init int setup_disable_smep(char *arg) | 263 | static __init int setup_disable_smep(char *arg) |
264 | { | 264 | { |
265 | disable_smep = 1; | 265 | disable_smep = 1; |
266 | return 1; | 266 | return 1; |
267 | } | 267 | } |
268 | __setup("nosmep", setup_disable_smep); | 268 | __setup("nosmep", setup_disable_smep); |
269 | 269 | ||
270 | static __cpuinit void setup_smep(struct cpuinfo_x86 *c) | 270 | static __cpuinit void setup_smep(struct cpuinfo_x86 *c) |
271 | { | 271 | { |
272 | if (cpu_has(c, X86_FEATURE_SMEP)) { | 272 | if (cpu_has(c, X86_FEATURE_SMEP)) { |
273 | if (unlikely(disable_smep)) { | 273 | if (unlikely(disable_smep)) { |
274 | setup_clear_cpu_cap(X86_FEATURE_SMEP); | 274 | setup_clear_cpu_cap(X86_FEATURE_SMEP); |
275 | clear_in_cr4(X86_CR4_SMEP); | 275 | clear_in_cr4(X86_CR4_SMEP); |
276 | } else | 276 | } else |
277 | set_in_cr4(X86_CR4_SMEP); | 277 | set_in_cr4(X86_CR4_SMEP); |
278 | } | 278 | } |
279 | } | 279 | } |
280 | 280 | ||
281 | /* | 281 | /* |
282 | * Some CPU features depend on higher CPUID levels, which may not always | 282 | * Some CPU features depend on higher CPUID levels, which may not always |
283 | * be available due to CPUID level capping or broken virtualization | 283 | * be available due to CPUID level capping or broken virtualization |
284 | * software. Add those features to this table to auto-disable them. | 284 | * software. Add those features to this table to auto-disable them. |
285 | */ | 285 | */ |
286 | struct cpuid_dependent_feature { | 286 | struct cpuid_dependent_feature { |
287 | u32 feature; | 287 | u32 feature; |
288 | u32 level; | 288 | u32 level; |
289 | }; | 289 | }; |
290 | 290 | ||
291 | static const struct cpuid_dependent_feature __cpuinitconst | 291 | static const struct cpuid_dependent_feature __cpuinitconst |
292 | cpuid_dependent_features[] = { | 292 | cpuid_dependent_features[] = { |
293 | { X86_FEATURE_MWAIT, 0x00000005 }, | 293 | { X86_FEATURE_MWAIT, 0x00000005 }, |
294 | { X86_FEATURE_DCA, 0x00000009 }, | 294 | { X86_FEATURE_DCA, 0x00000009 }, |
295 | { X86_FEATURE_XSAVE, 0x0000000d }, | 295 | { X86_FEATURE_XSAVE, 0x0000000d }, |
296 | { 0, 0 } | 296 | { 0, 0 } |
297 | }; | 297 | }; |
298 | 298 | ||
299 | static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) | 299 | static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) |
300 | { | 300 | { |
301 | const struct cpuid_dependent_feature *df; | 301 | const struct cpuid_dependent_feature *df; |
302 | 302 | ||
303 | for (df = cpuid_dependent_features; df->feature; df++) { | 303 | for (df = cpuid_dependent_features; df->feature; df++) { |
304 | 304 | ||
305 | if (!cpu_has(c, df->feature)) | 305 | if (!cpu_has(c, df->feature)) |
306 | continue; | 306 | continue; |
307 | /* | 307 | /* |
308 | * Note: cpuid_level is set to -1 if unavailable, but | 308 | * Note: cpuid_level is set to -1 if unavailable, but |
309 | * extended_extended_level is set to 0 if unavailable | 309 | * extended_extended_level is set to 0 if unavailable |
310 | * and the legitimate extended levels are all negative | 310 | * and the legitimate extended levels are all negative |
311 | * when signed; hence the weird messing around with | 311 | * when signed; hence the weird messing around with |
312 | * signs here... | 312 | * signs here... |
313 | */ | 313 | */ |
314 | if (!((s32)df->level < 0 ? | 314 | if (!((s32)df->level < 0 ? |
315 | (u32)df->level > (u32)c->extended_cpuid_level : | 315 | (u32)df->level > (u32)c->extended_cpuid_level : |
316 | (s32)df->level > (s32)c->cpuid_level)) | 316 | (s32)df->level > (s32)c->cpuid_level)) |
317 | continue; | 317 | continue; |
318 | 318 | ||
319 | clear_cpu_cap(c, df->feature); | 319 | clear_cpu_cap(c, df->feature); |
320 | if (!warn) | 320 | if (!warn) |
321 | continue; | 321 | continue; |
322 | 322 | ||
323 | printk(KERN_WARNING | 323 | printk(KERN_WARNING |
324 | "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", | 324 | "CPU: CPU feature %s disabled, no CPUID level 0x%x\n", |
325 | x86_cap_flags[df->feature], df->level); | 325 | x86_cap_flags[df->feature], df->level); |
326 | } | 326 | } |
327 | } | 327 | } |
328 | 328 | ||
329 | /* | 329 | /* |
330 | * Naming convention should be: <Name> [(<Codename>)] | 330 | * Naming convention should be: <Name> [(<Codename>)] |
331 | * This table only is used unless init_<vendor>() below doesn't set it; | 331 | * This table only is used unless init_<vendor>() below doesn't set it; |
332 | * in particular, if CPUID levels 0x80000002..4 are supported, this | 332 | * in particular, if CPUID levels 0x80000002..4 are supported, this |
333 | * isn't used | 333 | * isn't used |
334 | */ | 334 | */ |
335 | 335 | ||
336 | /* Look up CPU names by table lookup. */ | 336 | /* Look up CPU names by table lookup. */ |
337 | static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) | 337 | static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) |
338 | { | 338 | { |
339 | const struct cpu_model_info *info; | 339 | const struct cpu_model_info *info; |
340 | 340 | ||
341 | if (c->x86_model >= 16) | 341 | if (c->x86_model >= 16) |
342 | return NULL; /* Range check */ | 342 | return NULL; /* Range check */ |
343 | 343 | ||
344 | if (!this_cpu) | 344 | if (!this_cpu) |
345 | return NULL; | 345 | return NULL; |
346 | 346 | ||
347 | info = this_cpu->c_models; | 347 | info = this_cpu->c_models; |
348 | 348 | ||
349 | while (info && info->family) { | 349 | while (info && info->family) { |
350 | if (info->family == c->x86) | 350 | if (info->family == c->x86) |
351 | return info->model_names[c->x86_model]; | 351 | return info->model_names[c->x86_model]; |
352 | info++; | 352 | info++; |
353 | } | 353 | } |
354 | return NULL; /* Not found */ | 354 | return NULL; /* Not found */ |
355 | } | 355 | } |
356 | 356 | ||
357 | __u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; | 357 | __u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; |
358 | __u32 cpu_caps_set[NCAPINTS] __cpuinitdata; | 358 | __u32 cpu_caps_set[NCAPINTS] __cpuinitdata; |
359 | 359 | ||
360 | void load_percpu_segment(int cpu) | 360 | void load_percpu_segment(int cpu) |
361 | { | 361 | { |
362 | #ifdef CONFIG_X86_32 | 362 | #ifdef CONFIG_X86_32 |
363 | loadsegment(fs, __KERNEL_PERCPU); | 363 | loadsegment(fs, __KERNEL_PERCPU); |
364 | #else | 364 | #else |
365 | loadsegment(gs, 0); | 365 | loadsegment(gs, 0); |
366 | wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); | 366 | wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); |
367 | #endif | 367 | #endif |
368 | load_stack_canary_segment(); | 368 | load_stack_canary_segment(); |
369 | } | 369 | } |
370 | 370 | ||
371 | /* | 371 | /* |
372 | * Current gdt points %fs at the "master" per-cpu area: after this, | 372 | * Current gdt points %fs at the "master" per-cpu area: after this, |
373 | * it's on the real one. | 373 | * it's on the real one. |
374 | */ | 374 | */ |
375 | void switch_to_new_gdt(int cpu) | 375 | void switch_to_new_gdt(int cpu) |
376 | { | 376 | { |
377 | struct desc_ptr gdt_descr; | 377 | struct desc_ptr gdt_descr; |
378 | 378 | ||
379 | gdt_descr.address = (long)get_cpu_gdt_table(cpu); | 379 | gdt_descr.address = (long)get_cpu_gdt_table(cpu); |
380 | gdt_descr.size = GDT_SIZE - 1; | 380 | gdt_descr.size = GDT_SIZE - 1; |
381 | load_gdt(&gdt_descr); | 381 | load_gdt(&gdt_descr); |
382 | /* Reload the per-cpu base */ | 382 | /* Reload the per-cpu base */ |
383 | 383 | ||
384 | load_percpu_segment(cpu); | 384 | load_percpu_segment(cpu); |
385 | } | 385 | } |
386 | 386 | ||
387 | static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; | 387 | static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; |
388 | 388 | ||
389 | static void __cpuinit get_model_name(struct cpuinfo_x86 *c) | 389 | static void __cpuinit get_model_name(struct cpuinfo_x86 *c) |
390 | { | 390 | { |
391 | unsigned int *v; | 391 | unsigned int *v; |
392 | char *p, *q; | 392 | char *p, *q; |
393 | 393 | ||
394 | if (c->extended_cpuid_level < 0x80000004) | 394 | if (c->extended_cpuid_level < 0x80000004) |
395 | return; | 395 | return; |
396 | 396 | ||
397 | v = (unsigned int *)c->x86_model_id; | 397 | v = (unsigned int *)c->x86_model_id; |
398 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); | 398 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); |
399 | cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); | 399 | cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); |
400 | cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); | 400 | cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); |
401 | c->x86_model_id[48] = 0; | 401 | c->x86_model_id[48] = 0; |
402 | 402 | ||
403 | /* | 403 | /* |
404 | * Intel chips right-justify this string for some dumb reason; | 404 | * Intel chips right-justify this string for some dumb reason; |
405 | * undo that brain damage: | 405 | * undo that brain damage: |
406 | */ | 406 | */ |
407 | p = q = &c->x86_model_id[0]; | 407 | p = q = &c->x86_model_id[0]; |
408 | while (*p == ' ') | 408 | while (*p == ' ') |
409 | p++; | 409 | p++; |
410 | if (p != q) { | 410 | if (p != q) { |
411 | while (*p) | 411 | while (*p) |
412 | *q++ = *p++; | 412 | *q++ = *p++; |
413 | while (q <= &c->x86_model_id[48]) | 413 | while (q <= &c->x86_model_id[48]) |
414 | *q++ = '\0'; /* Zero-pad the rest */ | 414 | *q++ = '\0'; /* Zero-pad the rest */ |
415 | } | 415 | } |
416 | } | 416 | } |
417 | 417 | ||
418 | void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) | 418 | void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) |
419 | { | 419 | { |
420 | unsigned int n, dummy, ebx, ecx, edx, l2size; | 420 | unsigned int n, dummy, ebx, ecx, edx, l2size; |
421 | 421 | ||
422 | n = c->extended_cpuid_level; | 422 | n = c->extended_cpuid_level; |
423 | 423 | ||
424 | if (n >= 0x80000005) { | 424 | if (n >= 0x80000005) { |
425 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | 425 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); |
426 | c->x86_cache_size = (ecx>>24) + (edx>>24); | 426 | c->x86_cache_size = (ecx>>24) + (edx>>24); |
427 | #ifdef CONFIG_X86_64 | 427 | #ifdef CONFIG_X86_64 |
428 | /* On K8 L1 TLB is inclusive, so don't count it */ | 428 | /* On K8 L1 TLB is inclusive, so don't count it */ |
429 | c->x86_tlbsize = 0; | 429 | c->x86_tlbsize = 0; |
430 | #endif | 430 | #endif |
431 | } | 431 | } |
432 | 432 | ||
433 | if (n < 0x80000006) /* Some chips just has a large L1. */ | 433 | if (n < 0x80000006) /* Some chips just has a large L1. */ |
434 | return; | 434 | return; |
435 | 435 | ||
436 | cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); | 436 | cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); |
437 | l2size = ecx >> 16; | 437 | l2size = ecx >> 16; |
438 | 438 | ||
439 | #ifdef CONFIG_X86_64 | 439 | #ifdef CONFIG_X86_64 |
440 | c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); | 440 | c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); |
441 | #else | 441 | #else |
442 | /* do processor-specific cache resizing */ | 442 | /* do processor-specific cache resizing */ |
443 | if (this_cpu->c_size_cache) | 443 | if (this_cpu->c_size_cache) |
444 | l2size = this_cpu->c_size_cache(c, l2size); | 444 | l2size = this_cpu->c_size_cache(c, l2size); |
445 | 445 | ||
446 | /* Allow user to override all this if necessary. */ | 446 | /* Allow user to override all this if necessary. */ |
447 | if (cachesize_override != -1) | 447 | if (cachesize_override != -1) |
448 | l2size = cachesize_override; | 448 | l2size = cachesize_override; |
449 | 449 | ||
450 | if (l2size == 0) | 450 | if (l2size == 0) |
451 | return; /* Again, no L2 cache is possible */ | 451 | return; /* Again, no L2 cache is possible */ |
452 | #endif | 452 | #endif |
453 | 453 | ||
454 | c->x86_cache_size = l2size; | 454 | c->x86_cache_size = l2size; |
455 | } | 455 | } |
456 | 456 | ||
457 | u16 __read_mostly tlb_lli_4k[NR_INFO]; | 457 | u16 __read_mostly tlb_lli_4k[NR_INFO]; |
458 | u16 __read_mostly tlb_lli_2m[NR_INFO]; | 458 | u16 __read_mostly tlb_lli_2m[NR_INFO]; |
459 | u16 __read_mostly tlb_lli_4m[NR_INFO]; | 459 | u16 __read_mostly tlb_lli_4m[NR_INFO]; |
460 | u16 __read_mostly tlb_lld_4k[NR_INFO]; | 460 | u16 __read_mostly tlb_lld_4k[NR_INFO]; |
461 | u16 __read_mostly tlb_lld_2m[NR_INFO]; | 461 | u16 __read_mostly tlb_lld_2m[NR_INFO]; |
462 | u16 __read_mostly tlb_lld_4m[NR_INFO]; | 462 | u16 __read_mostly tlb_lld_4m[NR_INFO]; |
463 | 463 | ||
464 | /* | 464 | /* |
465 | * tlb_flushall_shift shows the balance point in replacing cr3 write | 465 | * tlb_flushall_shift shows the balance point in replacing cr3 write |
466 | * with multiple 'invlpg'. It will do this replacement when | 466 | * with multiple 'invlpg'. It will do this replacement when |
467 | * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. | 467 | * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. |
468 | * If tlb_flushall_shift is -1, means the replacement will be disabled. | 468 | * If tlb_flushall_shift is -1, means the replacement will be disabled. |
469 | */ | 469 | */ |
470 | s8 __read_mostly tlb_flushall_shift = -1; | 470 | s8 __read_mostly tlb_flushall_shift = -1; |
471 | 471 | ||
472 | void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) | 472 | void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) |
473 | { | 473 | { |
474 | if (this_cpu->c_detect_tlb) | 474 | if (this_cpu->c_detect_tlb) |
475 | this_cpu->c_detect_tlb(c); | 475 | this_cpu->c_detect_tlb(c); |
476 | 476 | ||
477 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 477 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ |
478 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 478 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ |
479 | "tlb_flushall_shift is 0x%x\n", | 479 | "tlb_flushall_shift is 0x%x\n", |
480 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | 480 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], |
481 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | 481 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], |
482 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | 482 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], |
483 | tlb_flushall_shift); | 483 | tlb_flushall_shift); |
484 | } | 484 | } |
485 | 485 | ||
486 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 486 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
487 | { | 487 | { |
488 | #ifdef CONFIG_X86_HT | 488 | #ifdef CONFIG_X86_HT |
489 | u32 eax, ebx, ecx, edx; | 489 | u32 eax, ebx, ecx, edx; |
490 | int index_msb, core_bits; | 490 | int index_msb, core_bits; |
491 | static bool printed; | 491 | static bool printed; |
492 | 492 | ||
493 | if (!cpu_has(c, X86_FEATURE_HT)) | 493 | if (!cpu_has(c, X86_FEATURE_HT)) |
494 | return; | 494 | return; |
495 | 495 | ||
496 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 496 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
497 | goto out; | 497 | goto out; |
498 | 498 | ||
499 | if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) | 499 | if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) |
500 | return; | 500 | return; |
501 | 501 | ||
502 | cpuid(1, &eax, &ebx, &ecx, &edx); | 502 | cpuid(1, &eax, &ebx, &ecx, &edx); |
503 | 503 | ||
504 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 504 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
505 | 505 | ||
506 | if (smp_num_siblings == 1) { | 506 | if (smp_num_siblings == 1) { |
507 | printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); | 507 | printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); |
508 | goto out; | 508 | goto out; |
509 | } | 509 | } |
510 | 510 | ||
511 | if (smp_num_siblings <= 1) | 511 | if (smp_num_siblings <= 1) |
512 | goto out; | 512 | goto out; |
513 | 513 | ||
514 | index_msb = get_count_order(smp_num_siblings); | 514 | index_msb = get_count_order(smp_num_siblings); |
515 | c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); | 515 | c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); |
516 | 516 | ||
517 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | 517 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
518 | 518 | ||
519 | index_msb = get_count_order(smp_num_siblings); | 519 | index_msb = get_count_order(smp_num_siblings); |
520 | 520 | ||
521 | core_bits = get_count_order(c->x86_max_cores); | 521 | core_bits = get_count_order(c->x86_max_cores); |
522 | 522 | ||
523 | c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & | 523 | c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & |
524 | ((1 << core_bits) - 1); | 524 | ((1 << core_bits) - 1); |
525 | 525 | ||
526 | out: | 526 | out: |
527 | if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { | 527 | if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { |
528 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 528 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
529 | c->phys_proc_id); | 529 | c->phys_proc_id); |
530 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 530 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
531 | c->cpu_core_id); | 531 | c->cpu_core_id); |
532 | printed = 1; | 532 | printed = 1; |
533 | } | 533 | } |
534 | #endif | 534 | #endif |
535 | } | 535 | } |
536 | 536 | ||
537 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | 537 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) |
538 | { | 538 | { |
539 | char *v = c->x86_vendor_id; | 539 | char *v = c->x86_vendor_id; |
540 | int i; | 540 | int i; |
541 | 541 | ||
542 | for (i = 0; i < X86_VENDOR_NUM; i++) { | 542 | for (i = 0; i < X86_VENDOR_NUM; i++) { |
543 | if (!cpu_devs[i]) | 543 | if (!cpu_devs[i]) |
544 | break; | 544 | break; |
545 | 545 | ||
546 | if (!strcmp(v, cpu_devs[i]->c_ident[0]) || | 546 | if (!strcmp(v, cpu_devs[i]->c_ident[0]) || |
547 | (cpu_devs[i]->c_ident[1] && | 547 | (cpu_devs[i]->c_ident[1] && |
548 | !strcmp(v, cpu_devs[i]->c_ident[1]))) { | 548 | !strcmp(v, cpu_devs[i]->c_ident[1]))) { |
549 | 549 | ||
550 | this_cpu = cpu_devs[i]; | 550 | this_cpu = cpu_devs[i]; |
551 | c->x86_vendor = this_cpu->c_x86_vendor; | 551 | c->x86_vendor = this_cpu->c_x86_vendor; |
552 | return; | 552 | return; |
553 | } | 553 | } |
554 | } | 554 | } |
555 | 555 | ||
556 | printk_once(KERN_ERR | 556 | printk_once(KERN_ERR |
557 | "CPU: vendor_id '%s' unknown, using generic init.\n" \ | 557 | "CPU: vendor_id '%s' unknown, using generic init.\n" \ |
558 | "CPU: Your system may be unstable.\n", v); | 558 | "CPU: Your system may be unstable.\n", v); |
559 | 559 | ||
560 | c->x86_vendor = X86_VENDOR_UNKNOWN; | 560 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
561 | this_cpu = &default_cpu; | 561 | this_cpu = &default_cpu; |
562 | } | 562 | } |
563 | 563 | ||
564 | void __cpuinit cpu_detect(struct cpuinfo_x86 *c) | 564 | void __cpuinit cpu_detect(struct cpuinfo_x86 *c) |
565 | { | 565 | { |
566 | /* Get vendor name */ | 566 | /* Get vendor name */ |
567 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | 567 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, |
568 | (unsigned int *)&c->x86_vendor_id[0], | 568 | (unsigned int *)&c->x86_vendor_id[0], |
569 | (unsigned int *)&c->x86_vendor_id[8], | 569 | (unsigned int *)&c->x86_vendor_id[8], |
570 | (unsigned int *)&c->x86_vendor_id[4]); | 570 | (unsigned int *)&c->x86_vendor_id[4]); |
571 | 571 | ||
572 | c->x86 = 4; | 572 | c->x86 = 4; |
573 | /* Intel-defined flags: level 0x00000001 */ | 573 | /* Intel-defined flags: level 0x00000001 */ |
574 | if (c->cpuid_level >= 0x00000001) { | 574 | if (c->cpuid_level >= 0x00000001) { |
575 | u32 junk, tfms, cap0, misc; | 575 | u32 junk, tfms, cap0, misc; |
576 | 576 | ||
577 | cpuid(0x00000001, &tfms, &misc, &junk, &cap0); | 577 | cpuid(0x00000001, &tfms, &misc, &junk, &cap0); |
578 | c->x86 = (tfms >> 8) & 0xf; | 578 | c->x86 = (tfms >> 8) & 0xf; |
579 | c->x86_model = (tfms >> 4) & 0xf; | 579 | c->x86_model = (tfms >> 4) & 0xf; |
580 | c->x86_mask = tfms & 0xf; | 580 | c->x86_mask = tfms & 0xf; |
581 | 581 | ||
582 | if (c->x86 == 0xf) | 582 | if (c->x86 == 0xf) |
583 | c->x86 += (tfms >> 20) & 0xff; | 583 | c->x86 += (tfms >> 20) & 0xff; |
584 | if (c->x86 >= 0x6) | 584 | if (c->x86 >= 0x6) |
585 | c->x86_model += ((tfms >> 16) & 0xf) << 4; | 585 | c->x86_model += ((tfms >> 16) & 0xf) << 4; |
586 | 586 | ||
587 | if (cap0 & (1<<19)) { | 587 | if (cap0 & (1<<19)) { |
588 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | 588 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; |
589 | c->x86_cache_alignment = c->x86_clflush_size; | 589 | c->x86_cache_alignment = c->x86_clflush_size; |
590 | } | 590 | } |
591 | } | 591 | } |
592 | } | 592 | } |
593 | 593 | ||
594 | void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | 594 | void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) |
595 | { | 595 | { |
596 | u32 tfms, xlvl; | 596 | u32 tfms, xlvl; |
597 | u32 ebx; | 597 | u32 ebx; |
598 | 598 | ||
599 | /* Intel-defined flags: level 0x00000001 */ | 599 | /* Intel-defined flags: level 0x00000001 */ |
600 | if (c->cpuid_level >= 0x00000001) { | 600 | if (c->cpuid_level >= 0x00000001) { |
601 | u32 capability, excap; | 601 | u32 capability, excap; |
602 | 602 | ||
603 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); | 603 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); |
604 | c->x86_capability[0] = capability; | 604 | c->x86_capability[0] = capability; |
605 | c->x86_capability[4] = excap; | 605 | c->x86_capability[4] = excap; |
606 | } | 606 | } |
607 | 607 | ||
608 | /* Additional Intel-defined flags: level 0x00000007 */ | 608 | /* Additional Intel-defined flags: level 0x00000007 */ |
609 | if (c->cpuid_level >= 0x00000007) { | 609 | if (c->cpuid_level >= 0x00000007) { |
610 | u32 eax, ebx, ecx, edx; | 610 | u32 eax, ebx, ecx, edx; |
611 | 611 | ||
612 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); | 612 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); |
613 | 613 | ||
614 | c->x86_capability[9] = ebx; | 614 | c->x86_capability[9] = ebx; |
615 | } | 615 | } |
616 | 616 | ||
617 | /* AMD-defined flags: level 0x80000001 */ | 617 | /* AMD-defined flags: level 0x80000001 */ |
618 | xlvl = cpuid_eax(0x80000000); | 618 | xlvl = cpuid_eax(0x80000000); |
619 | c->extended_cpuid_level = xlvl; | 619 | c->extended_cpuid_level = xlvl; |
620 | 620 | ||
621 | if ((xlvl & 0xffff0000) == 0x80000000) { | 621 | if ((xlvl & 0xffff0000) == 0x80000000) { |
622 | if (xlvl >= 0x80000001) { | 622 | if (xlvl >= 0x80000001) { |
623 | c->x86_capability[1] = cpuid_edx(0x80000001); | 623 | c->x86_capability[1] = cpuid_edx(0x80000001); |
624 | c->x86_capability[6] = cpuid_ecx(0x80000001); | 624 | c->x86_capability[6] = cpuid_ecx(0x80000001); |
625 | } | 625 | } |
626 | } | 626 | } |
627 | 627 | ||
628 | if (c->extended_cpuid_level >= 0x80000008) { | 628 | if (c->extended_cpuid_level >= 0x80000008) { |
629 | u32 eax = cpuid_eax(0x80000008); | 629 | u32 eax = cpuid_eax(0x80000008); |
630 | 630 | ||
631 | c->x86_virt_bits = (eax >> 8) & 0xff; | 631 | c->x86_virt_bits = (eax >> 8) & 0xff; |
632 | c->x86_phys_bits = eax & 0xff; | 632 | c->x86_phys_bits = eax & 0xff; |
633 | } | 633 | } |
634 | #ifdef CONFIG_X86_32 | 634 | #ifdef CONFIG_X86_32 |
635 | else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) | 635 | else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) |
636 | c->x86_phys_bits = 36; | 636 | c->x86_phys_bits = 36; |
637 | #endif | 637 | #endif |
638 | 638 | ||
639 | if (c->extended_cpuid_level >= 0x80000007) | 639 | if (c->extended_cpuid_level >= 0x80000007) |
640 | c->x86_power = cpuid_edx(0x80000007); | 640 | c->x86_power = cpuid_edx(0x80000007); |
641 | 641 | ||
642 | init_scattered_cpuid_features(c); | 642 | init_scattered_cpuid_features(c); |
643 | } | 643 | } |
644 | 644 | ||
645 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | 645 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
646 | { | 646 | { |
647 | #ifdef CONFIG_X86_32 | 647 | #ifdef CONFIG_X86_32 |
648 | int i; | 648 | int i; |
649 | 649 | ||
650 | /* | 650 | /* |
651 | * First of all, decide if this is a 486 or higher | 651 | * First of all, decide if this is a 486 or higher |
652 | * It's a 486 if we can modify the AC flag | 652 | * It's a 486 if we can modify the AC flag |
653 | */ | 653 | */ |
654 | if (flag_is_changeable_p(X86_EFLAGS_AC)) | 654 | if (flag_is_changeable_p(X86_EFLAGS_AC)) |
655 | c->x86 = 4; | 655 | c->x86 = 4; |
656 | else | 656 | else |
657 | c->x86 = 3; | 657 | c->x86 = 3; |
658 | 658 | ||
659 | for (i = 0; i < X86_VENDOR_NUM; i++) | 659 | for (i = 0; i < X86_VENDOR_NUM; i++) |
660 | if (cpu_devs[i] && cpu_devs[i]->c_identify) { | 660 | if (cpu_devs[i] && cpu_devs[i]->c_identify) { |
661 | c->x86_vendor_id[0] = 0; | 661 | c->x86_vendor_id[0] = 0; |
662 | cpu_devs[i]->c_identify(c); | 662 | cpu_devs[i]->c_identify(c); |
663 | if (c->x86_vendor_id[0]) { | 663 | if (c->x86_vendor_id[0]) { |
664 | get_cpu_vendor(c); | 664 | get_cpu_vendor(c); |
665 | break; | 665 | break; |
666 | } | 666 | } |
667 | } | 667 | } |
668 | #endif | 668 | #endif |
669 | } | 669 | } |
670 | 670 | ||
671 | /* | 671 | /* |
672 | * Do minimum CPU detection early. | 672 | * Do minimum CPU detection early. |
673 | * Fields really needed: vendor, cpuid_level, family, model, mask, | 673 | * Fields really needed: vendor, cpuid_level, family, model, mask, |
674 | * cache alignment. | 674 | * cache alignment. |
675 | * The others are not touched to avoid unwanted side effects. | 675 | * The others are not touched to avoid unwanted side effects. |
676 | * | 676 | * |
677 | * WARNING: this function is only called on the BP. Don't add code here | 677 | * WARNING: this function is only called on the BP. Don't add code here |
678 | * that is supposed to run on all CPUs. | 678 | * that is supposed to run on all CPUs. |
679 | */ | 679 | */ |
680 | static void __init early_identify_cpu(struct cpuinfo_x86 *c) | 680 | static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
681 | { | 681 | { |
682 | #ifdef CONFIG_X86_64 | 682 | #ifdef CONFIG_X86_64 |
683 | c->x86_clflush_size = 64; | 683 | c->x86_clflush_size = 64; |
684 | c->x86_phys_bits = 36; | 684 | c->x86_phys_bits = 36; |
685 | c->x86_virt_bits = 48; | 685 | c->x86_virt_bits = 48; |
686 | #else | 686 | #else |
687 | c->x86_clflush_size = 32; | 687 | c->x86_clflush_size = 32; |
688 | c->x86_phys_bits = 32; | 688 | c->x86_phys_bits = 32; |
689 | c->x86_virt_bits = 32; | 689 | c->x86_virt_bits = 32; |
690 | #endif | 690 | #endif |
691 | c->x86_cache_alignment = c->x86_clflush_size; | 691 | c->x86_cache_alignment = c->x86_clflush_size; |
692 | 692 | ||
693 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 693 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
694 | c->extended_cpuid_level = 0; | 694 | c->extended_cpuid_level = 0; |
695 | 695 | ||
696 | if (!have_cpuid_p()) | 696 | if (!have_cpuid_p()) |
697 | identify_cpu_without_cpuid(c); | 697 | identify_cpu_without_cpuid(c); |
698 | 698 | ||
699 | /* cyrix could have cpuid enabled via c_identify()*/ | 699 | /* cyrix could have cpuid enabled via c_identify()*/ |
700 | if (!have_cpuid_p()) | 700 | if (!have_cpuid_p()) |
701 | return; | 701 | return; |
702 | 702 | ||
703 | cpu_detect(c); | 703 | cpu_detect(c); |
704 | 704 | ||
705 | get_cpu_vendor(c); | 705 | get_cpu_vendor(c); |
706 | 706 | ||
707 | get_cpu_cap(c); | 707 | get_cpu_cap(c); |
708 | 708 | ||
709 | if (this_cpu->c_early_init) | 709 | if (this_cpu->c_early_init) |
710 | this_cpu->c_early_init(c); | 710 | this_cpu->c_early_init(c); |
711 | 711 | ||
712 | c->cpu_index = 0; | 712 | c->cpu_index = 0; |
713 | filter_cpuid_features(c, false); | 713 | filter_cpuid_features(c, false); |
714 | 714 | ||
715 | setup_smep(c); | 715 | setup_smep(c); |
716 | 716 | ||
717 | if (this_cpu->c_bsp_init) | 717 | if (this_cpu->c_bsp_init) |
718 | this_cpu->c_bsp_init(c); | 718 | this_cpu->c_bsp_init(c); |
719 | } | 719 | } |
720 | 720 | ||
721 | void __init early_cpu_init(void) | 721 | void __init early_cpu_init(void) |
722 | { | 722 | { |
723 | const struct cpu_dev *const *cdev; | 723 | const struct cpu_dev *const *cdev; |
724 | int count = 0; | 724 | int count = 0; |
725 | 725 | ||
726 | #ifdef CONFIG_PROCESSOR_SELECT | 726 | #ifdef CONFIG_PROCESSOR_SELECT |
727 | printk(KERN_INFO "KERNEL supported cpus:\n"); | 727 | printk(KERN_INFO "KERNEL supported cpus:\n"); |
728 | #endif | 728 | #endif |
729 | 729 | ||
730 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { | 730 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { |
731 | const struct cpu_dev *cpudev = *cdev; | 731 | const struct cpu_dev *cpudev = *cdev; |
732 | 732 | ||
733 | if (count >= X86_VENDOR_NUM) | 733 | if (count >= X86_VENDOR_NUM) |
734 | break; | 734 | break; |
735 | cpu_devs[count] = cpudev; | 735 | cpu_devs[count] = cpudev; |
736 | count++; | 736 | count++; |
737 | 737 | ||
738 | #ifdef CONFIG_PROCESSOR_SELECT | 738 | #ifdef CONFIG_PROCESSOR_SELECT |
739 | { | 739 | { |
740 | unsigned int j; | 740 | unsigned int j; |
741 | 741 | ||
742 | for (j = 0; j < 2; j++) { | 742 | for (j = 0; j < 2; j++) { |
743 | if (!cpudev->c_ident[j]) | 743 | if (!cpudev->c_ident[j]) |
744 | continue; | 744 | continue; |
745 | printk(KERN_INFO " %s %s\n", cpudev->c_vendor, | 745 | printk(KERN_INFO " %s %s\n", cpudev->c_vendor, |
746 | cpudev->c_ident[j]); | 746 | cpudev->c_ident[j]); |
747 | } | 747 | } |
748 | } | 748 | } |
749 | #endif | 749 | #endif |
750 | } | 750 | } |
751 | early_identify_cpu(&boot_cpu_data); | 751 | early_identify_cpu(&boot_cpu_data); |
752 | } | 752 | } |
753 | 753 | ||
754 | /* | 754 | /* |
755 | * The NOPL instruction is supposed to exist on all CPUs of family >= 6; | 755 | * The NOPL instruction is supposed to exist on all CPUs of family >= 6; |
756 | * unfortunately, that's not true in practice because of early VIA | 756 | * unfortunately, that's not true in practice because of early VIA |
757 | * chips and (more importantly) broken virtualizers that are not easy | 757 | * chips and (more importantly) broken virtualizers that are not easy |
758 | * to detect. In the latter case it doesn't even *fail* reliably, so | 758 | * to detect. In the latter case it doesn't even *fail* reliably, so |
759 | * probing for it doesn't even work. Disable it completely on 32-bit | 759 | * probing for it doesn't even work. Disable it completely on 32-bit |
760 | * unless we can find a reliable way to detect all the broken cases. | 760 | * unless we can find a reliable way to detect all the broken cases. |
761 | * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). | 761 | * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). |
762 | */ | 762 | */ |
763 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | 763 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) |
764 | { | 764 | { |
765 | #ifdef CONFIG_X86_32 | 765 | #ifdef CONFIG_X86_32 |
766 | clear_cpu_cap(c, X86_FEATURE_NOPL); | 766 | clear_cpu_cap(c, X86_FEATURE_NOPL); |
767 | #else | 767 | #else |
768 | set_cpu_cap(c, X86_FEATURE_NOPL); | 768 | set_cpu_cap(c, X86_FEATURE_NOPL); |
769 | #endif | 769 | #endif |
770 | } | 770 | } |
771 | 771 | ||
772 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | 772 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) |
773 | { | 773 | { |
774 | c->extended_cpuid_level = 0; | 774 | c->extended_cpuid_level = 0; |
775 | 775 | ||
776 | if (!have_cpuid_p()) | 776 | if (!have_cpuid_p()) |
777 | identify_cpu_without_cpuid(c); | 777 | identify_cpu_without_cpuid(c); |
778 | 778 | ||
779 | /* cyrix could have cpuid enabled via c_identify()*/ | 779 | /* cyrix could have cpuid enabled via c_identify()*/ |
780 | if (!have_cpuid_p()) | 780 | if (!have_cpuid_p()) |
781 | return; | 781 | return; |
782 | 782 | ||
783 | cpu_detect(c); | 783 | cpu_detect(c); |
784 | 784 | ||
785 | get_cpu_vendor(c); | 785 | get_cpu_vendor(c); |
786 | 786 | ||
787 | get_cpu_cap(c); | 787 | get_cpu_cap(c); |
788 | 788 | ||
789 | if (c->cpuid_level >= 0x00000001) { | 789 | if (c->cpuid_level >= 0x00000001) { |
790 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; | 790 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; |
791 | #ifdef CONFIG_X86_32 | 791 | #ifdef CONFIG_X86_32 |
792 | # ifdef CONFIG_X86_HT | 792 | # ifdef CONFIG_X86_HT |
793 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); | 793 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); |
794 | # else | 794 | # else |
795 | c->apicid = c->initial_apicid; | 795 | c->apicid = c->initial_apicid; |
796 | # endif | 796 | # endif |
797 | #endif | 797 | #endif |
798 | c->phys_proc_id = c->initial_apicid; | 798 | c->phys_proc_id = c->initial_apicid; |
799 | } | 799 | } |
800 | 800 | ||
801 | setup_smep(c); | 801 | setup_smep(c); |
802 | 802 | ||
803 | get_model_name(c); /* Default name */ | 803 | get_model_name(c); /* Default name */ |
804 | 804 | ||
805 | detect_nopl(c); | 805 | detect_nopl(c); |
806 | } | 806 | } |
807 | 807 | ||
808 | /* | 808 | /* |
809 | * This does the hard work of actually picking apart the CPU stuff... | 809 | * This does the hard work of actually picking apart the CPU stuff... |
810 | */ | 810 | */ |
811 | static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | 811 | static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) |
812 | { | 812 | { |
813 | int i; | 813 | int i; |
814 | 814 | ||
815 | c->loops_per_jiffy = loops_per_jiffy; | 815 | c->loops_per_jiffy = loops_per_jiffy; |
816 | c->x86_cache_size = -1; | 816 | c->x86_cache_size = -1; |
817 | c->x86_vendor = X86_VENDOR_UNKNOWN; | 817 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
818 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ | 818 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ |
819 | c->x86_vendor_id[0] = '\0'; /* Unset */ | 819 | c->x86_vendor_id[0] = '\0'; /* Unset */ |
820 | c->x86_model_id[0] = '\0'; /* Unset */ | 820 | c->x86_model_id[0] = '\0'; /* Unset */ |
821 | c->x86_max_cores = 1; | 821 | c->x86_max_cores = 1; |
822 | c->x86_coreid_bits = 0; | 822 | c->x86_coreid_bits = 0; |
823 | #ifdef CONFIG_X86_64 | 823 | #ifdef CONFIG_X86_64 |
824 | c->x86_clflush_size = 64; | 824 | c->x86_clflush_size = 64; |
825 | c->x86_phys_bits = 36; | 825 | c->x86_phys_bits = 36; |
826 | c->x86_virt_bits = 48; | 826 | c->x86_virt_bits = 48; |
827 | #else | 827 | #else |
828 | c->cpuid_level = -1; /* CPUID not detected */ | 828 | c->cpuid_level = -1; /* CPUID not detected */ |
829 | c->x86_clflush_size = 32; | 829 | c->x86_clflush_size = 32; |
830 | c->x86_phys_bits = 32; | 830 | c->x86_phys_bits = 32; |
831 | c->x86_virt_bits = 32; | 831 | c->x86_virt_bits = 32; |
832 | #endif | 832 | #endif |
833 | c->x86_cache_alignment = c->x86_clflush_size; | 833 | c->x86_cache_alignment = c->x86_clflush_size; |
834 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 834 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
835 | 835 | ||
836 | generic_identify(c); | 836 | generic_identify(c); |
837 | 837 | ||
838 | if (this_cpu->c_identify) | 838 | if (this_cpu->c_identify) |
839 | this_cpu->c_identify(c); | 839 | this_cpu->c_identify(c); |
840 | 840 | ||
841 | /* Clear/Set all flags overriden by options, after probe */ | 841 | /* Clear/Set all flags overriden by options, after probe */ |
842 | for (i = 0; i < NCAPINTS; i++) { | 842 | for (i = 0; i < NCAPINTS; i++) { |
843 | c->x86_capability[i] &= ~cpu_caps_cleared[i]; | 843 | c->x86_capability[i] &= ~cpu_caps_cleared[i]; |
844 | c->x86_capability[i] |= cpu_caps_set[i]; | 844 | c->x86_capability[i] |= cpu_caps_set[i]; |
845 | } | 845 | } |
846 | 846 | ||
847 | #ifdef CONFIG_X86_64 | 847 | #ifdef CONFIG_X86_64 |
848 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); | 848 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); |
849 | #endif | 849 | #endif |
850 | 850 | ||
851 | /* | 851 | /* |
852 | * Vendor-specific initialization. In this section we | 852 | * Vendor-specific initialization. In this section we |
853 | * canonicalize the feature flags, meaning if there are | 853 | * canonicalize the feature flags, meaning if there are |
854 | * features a certain CPU supports which CPUID doesn't | 854 | * features a certain CPU supports which CPUID doesn't |
855 | * tell us, CPUID claiming incorrect flags, or other bugs, | 855 | * tell us, CPUID claiming incorrect flags, or other bugs, |
856 | * we handle them here. | 856 | * we handle them here. |
857 | * | 857 | * |
858 | * At the end of this section, c->x86_capability better | 858 | * At the end of this section, c->x86_capability better |
859 | * indicate the features this CPU genuinely supports! | 859 | * indicate the features this CPU genuinely supports! |
860 | */ | 860 | */ |
861 | if (this_cpu->c_init) | 861 | if (this_cpu->c_init) |
862 | this_cpu->c_init(c); | 862 | this_cpu->c_init(c); |
863 | 863 | ||
864 | /* Disable the PN if appropriate */ | 864 | /* Disable the PN if appropriate */ |
865 | squash_the_stupid_serial_number(c); | 865 | squash_the_stupid_serial_number(c); |
866 | 866 | ||
867 | /* | 867 | /* |
868 | * The vendor-specific functions might have changed features. | 868 | * The vendor-specific functions might have changed features. |
869 | * Now we do "generic changes." | 869 | * Now we do "generic changes." |
870 | */ | 870 | */ |
871 | 871 | ||
872 | /* Filter out anything that depends on CPUID levels we don't have */ | 872 | /* Filter out anything that depends on CPUID levels we don't have */ |
873 | filter_cpuid_features(c, true); | 873 | filter_cpuid_features(c, true); |
874 | 874 | ||
875 | /* If the model name is still unset, do table lookup. */ | 875 | /* If the model name is still unset, do table lookup. */ |
876 | if (!c->x86_model_id[0]) { | 876 | if (!c->x86_model_id[0]) { |
877 | const char *p; | 877 | const char *p; |
878 | p = table_lookup_model(c); | 878 | p = table_lookup_model(c); |
879 | if (p) | 879 | if (p) |
880 | strcpy(c->x86_model_id, p); | 880 | strcpy(c->x86_model_id, p); |
881 | else | 881 | else |
882 | /* Last resort... */ | 882 | /* Last resort... */ |
883 | sprintf(c->x86_model_id, "%02x/%02x", | 883 | sprintf(c->x86_model_id, "%02x/%02x", |
884 | c->x86, c->x86_model); | 884 | c->x86, c->x86_model); |
885 | } | 885 | } |
886 | 886 | ||
887 | #ifdef CONFIG_X86_64 | 887 | #ifdef CONFIG_X86_64 |
888 | detect_ht(c); | 888 | detect_ht(c); |
889 | #endif | 889 | #endif |
890 | 890 | ||
891 | init_hypervisor(c); | 891 | init_hypervisor(c); |
892 | x86_init_rdrand(c); | 892 | x86_init_rdrand(c); |
893 | 893 | ||
894 | /* | 894 | /* |
895 | * Clear/Set all flags overriden by options, need do it | 895 | * Clear/Set all flags overriden by options, need do it |
896 | * before following smp all cpus cap AND. | 896 | * before following smp all cpus cap AND. |
897 | */ | 897 | */ |
898 | for (i = 0; i < NCAPINTS; i++) { | 898 | for (i = 0; i < NCAPINTS; i++) { |
899 | c->x86_capability[i] &= ~cpu_caps_cleared[i]; | 899 | c->x86_capability[i] &= ~cpu_caps_cleared[i]; |
900 | c->x86_capability[i] |= cpu_caps_set[i]; | 900 | c->x86_capability[i] |= cpu_caps_set[i]; |
901 | } | 901 | } |
902 | 902 | ||
903 | /* | 903 | /* |
904 | * On SMP, boot_cpu_data holds the common feature set between | 904 | * On SMP, boot_cpu_data holds the common feature set between |
905 | * all CPUs; so make sure that we indicate which features are | 905 | * all CPUs; so make sure that we indicate which features are |
906 | * common between the CPUs. The first time this routine gets | 906 | * common between the CPUs. The first time this routine gets |
907 | * executed, c == &boot_cpu_data. | 907 | * executed, c == &boot_cpu_data. |
908 | */ | 908 | */ |
909 | if (c != &boot_cpu_data) { | 909 | if (c != &boot_cpu_data) { |
910 | /* AND the already accumulated flags with these */ | 910 | /* AND the already accumulated flags with these */ |
911 | for (i = 0; i < NCAPINTS; i++) | 911 | for (i = 0; i < NCAPINTS; i++) |
912 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 912 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
913 | } | 913 | } |
914 | 914 | ||
915 | /* Init Machine Check Exception if available. */ | 915 | /* Init Machine Check Exception if available. */ |
916 | mcheck_cpu_init(c); | 916 | mcheck_cpu_init(c); |
917 | 917 | ||
918 | select_idle_routine(c); | 918 | select_idle_routine(c); |
919 | 919 | ||
920 | #ifdef CONFIG_NUMA | 920 | #ifdef CONFIG_NUMA |
921 | numa_add_cpu(smp_processor_id()); | 921 | numa_add_cpu(smp_processor_id()); |
922 | #endif | 922 | #endif |
923 | } | 923 | } |
924 | 924 | ||
925 | #ifdef CONFIG_X86_64 | 925 | #ifdef CONFIG_X86_64 |
926 | static void vgetcpu_set_mode(void) | 926 | static void vgetcpu_set_mode(void) |
927 | { | 927 | { |
928 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | 928 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) |
929 | vgetcpu_mode = VGETCPU_RDTSCP; | 929 | vgetcpu_mode = VGETCPU_RDTSCP; |
930 | else | 930 | else |
931 | vgetcpu_mode = VGETCPU_LSL; | 931 | vgetcpu_mode = VGETCPU_LSL; |
932 | } | 932 | } |
933 | #endif | 933 | #endif |
934 | 934 | ||
935 | void __init identify_boot_cpu(void) | 935 | void __init identify_boot_cpu(void) |
936 | { | 936 | { |
937 | identify_cpu(&boot_cpu_data); | 937 | identify_cpu(&boot_cpu_data); |
938 | init_amd_e400_c1e_mask(); | 938 | init_amd_e400_c1e_mask(); |
939 | #ifdef CONFIG_X86_32 | 939 | #ifdef CONFIG_X86_32 |
940 | sysenter_setup(); | 940 | sysenter_setup(); |
941 | enable_sep_cpu(); | 941 | enable_sep_cpu(); |
942 | #else | 942 | #else |
943 | vgetcpu_set_mode(); | 943 | vgetcpu_set_mode(); |
944 | #endif | 944 | #endif |
945 | if (boot_cpu_data.cpuid_level >= 2) | 945 | if (boot_cpu_data.cpuid_level >= 2) |
946 | cpu_detect_tlb(&boot_cpu_data); | 946 | cpu_detect_tlb(&boot_cpu_data); |
947 | } | 947 | } |
948 | 948 | ||
949 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 949 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
950 | { | 950 | { |
951 | BUG_ON(c == &boot_cpu_data); | 951 | BUG_ON(c == &boot_cpu_data); |
952 | identify_cpu(c); | 952 | identify_cpu(c); |
953 | #ifdef CONFIG_X86_32 | 953 | #ifdef CONFIG_X86_32 |
954 | enable_sep_cpu(); | 954 | enable_sep_cpu(); |
955 | #endif | 955 | #endif |
956 | mtrr_ap_init(); | 956 | mtrr_ap_init(); |
957 | } | 957 | } |
958 | 958 | ||
959 | struct msr_range { | 959 | struct msr_range { |
960 | unsigned min; | 960 | unsigned min; |
961 | unsigned max; | 961 | unsigned max; |
962 | }; | 962 | }; |
963 | 963 | ||
964 | static const struct msr_range msr_range_array[] __cpuinitconst = { | 964 | static const struct msr_range msr_range_array[] __cpuinitconst = { |
965 | { 0x00000000, 0x00000418}, | 965 | { 0x00000000, 0x00000418}, |
966 | { 0xc0000000, 0xc000040b}, | 966 | { 0xc0000000, 0xc000040b}, |
967 | { 0xc0010000, 0xc0010142}, | 967 | { 0xc0010000, 0xc0010142}, |
968 | { 0xc0011000, 0xc001103b}, | 968 | { 0xc0011000, 0xc001103b}, |
969 | }; | 969 | }; |
970 | 970 | ||
971 | static void __cpuinit __print_cpu_msr(void) | 971 | static void __cpuinit __print_cpu_msr(void) |
972 | { | 972 | { |
973 | unsigned index_min, index_max; | 973 | unsigned index_min, index_max; |
974 | unsigned index; | 974 | unsigned index; |
975 | u64 val; | 975 | u64 val; |
976 | int i; | 976 | int i; |
977 | 977 | ||
978 | for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { | 978 | for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { |
979 | index_min = msr_range_array[i].min; | 979 | index_min = msr_range_array[i].min; |
980 | index_max = msr_range_array[i].max; | 980 | index_max = msr_range_array[i].max; |
981 | 981 | ||
982 | for (index = index_min; index < index_max; index++) { | 982 | for (index = index_min; index < index_max; index++) { |
983 | if (rdmsrl_safe(index, &val)) | 983 | if (rdmsrl_safe(index, &val)) |
984 | continue; | 984 | continue; |
985 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); | 985 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); |
986 | } | 986 | } |
987 | } | 987 | } |
988 | } | 988 | } |
989 | 989 | ||
990 | static int show_msr __cpuinitdata; | 990 | static int show_msr __cpuinitdata; |
991 | 991 | ||
992 | static __init int setup_show_msr(char *arg) | 992 | static __init int setup_show_msr(char *arg) |
993 | { | 993 | { |
994 | int num; | 994 | int num; |
995 | 995 | ||
996 | get_option(&arg, &num); | 996 | get_option(&arg, &num); |
997 | 997 | ||
998 | if (num > 0) | 998 | if (num > 0) |
999 | show_msr = num; | 999 | show_msr = num; |
1000 | return 1; | 1000 | return 1; |
1001 | } | 1001 | } |
1002 | __setup("show_msr=", setup_show_msr); | 1002 | __setup("show_msr=", setup_show_msr); |
1003 | 1003 | ||
1004 | static __init int setup_noclflush(char *arg) | 1004 | static __init int setup_noclflush(char *arg) |
1005 | { | 1005 | { |
1006 | setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | 1006 | setup_clear_cpu_cap(X86_FEATURE_CLFLSH); |
1007 | return 1; | 1007 | return 1; |
1008 | } | 1008 | } |
1009 | __setup("noclflush", setup_noclflush); | 1009 | __setup("noclflush", setup_noclflush); |
1010 | 1010 | ||
1011 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | 1011 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) |
1012 | { | 1012 | { |
1013 | const char *vendor = NULL; | 1013 | const char *vendor = NULL; |
1014 | 1014 | ||
1015 | if (c->x86_vendor < X86_VENDOR_NUM) { | 1015 | if (c->x86_vendor < X86_VENDOR_NUM) { |
1016 | vendor = this_cpu->c_vendor; | 1016 | vendor = this_cpu->c_vendor; |
1017 | } else { | 1017 | } else { |
1018 | if (c->cpuid_level >= 0) | 1018 | if (c->cpuid_level >= 0) |
1019 | vendor = c->x86_vendor_id; | 1019 | vendor = c->x86_vendor_id; |
1020 | } | 1020 | } |
1021 | 1021 | ||
1022 | if (vendor && !strstr(c->x86_model_id, vendor)) | 1022 | if (vendor && !strstr(c->x86_model_id, vendor)) |
1023 | printk(KERN_CONT "%s ", vendor); | 1023 | printk(KERN_CONT "%s ", vendor); |
1024 | 1024 | ||
1025 | if (c->x86_model_id[0]) | 1025 | if (c->x86_model_id[0]) |
1026 | printk(KERN_CONT "%s", c->x86_model_id); | 1026 | printk(KERN_CONT "%s", c->x86_model_id); |
1027 | else | 1027 | else |
1028 | printk(KERN_CONT "%d86", c->x86); | 1028 | printk(KERN_CONT "%d86", c->x86); |
1029 | 1029 | ||
1030 | if (c->x86_mask || c->cpuid_level >= 0) | 1030 | if (c->x86_mask || c->cpuid_level >= 0) |
1031 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); | 1031 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); |
1032 | else | 1032 | else |
1033 | printk(KERN_CONT "\n"); | 1033 | printk(KERN_CONT "\n"); |
1034 | 1034 | ||
1035 | print_cpu_msr(c); | 1035 | print_cpu_msr(c); |
1036 | } | 1036 | } |
1037 | 1037 | ||
1038 | void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c) | 1038 | void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c) |
1039 | { | 1039 | { |
1040 | if (c->cpu_index < show_msr) | 1040 | if (c->cpu_index < show_msr) |
1041 | __print_cpu_msr(); | 1041 | __print_cpu_msr(); |
1042 | } | 1042 | } |
1043 | 1043 | ||
1044 | static __init int setup_disablecpuid(char *arg) | 1044 | static __init int setup_disablecpuid(char *arg) |
1045 | { | 1045 | { |
1046 | int bit; | 1046 | int bit; |
1047 | 1047 | ||
1048 | if (get_option(&arg, &bit) && bit < NCAPINTS*32) | 1048 | if (get_option(&arg, &bit) && bit < NCAPINTS*32) |
1049 | setup_clear_cpu_cap(bit); | 1049 | setup_clear_cpu_cap(bit); |
1050 | else | 1050 | else |
1051 | return 0; | 1051 | return 0; |
1052 | 1052 | ||
1053 | return 1; | 1053 | return 1; |
1054 | } | 1054 | } |
1055 | __setup("clearcpuid=", setup_disablecpuid); | 1055 | __setup("clearcpuid=", setup_disablecpuid); |
1056 | 1056 | ||
1057 | #ifdef CONFIG_X86_64 | 1057 | #ifdef CONFIG_X86_64 |
1058 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; | 1058 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; |
1059 | struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, | 1059 | struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, |
1060 | (unsigned long) nmi_idt_table }; | 1060 | (unsigned long) nmi_idt_table }; |
1061 | 1061 | ||
1062 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1062 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
1063 | irq_stack_union) __aligned(PAGE_SIZE); | 1063 | irq_stack_union) __aligned(PAGE_SIZE); |
1064 | 1064 | ||
1065 | /* | 1065 | /* |
1066 | * The following four percpu variables are hot. Align current_task to | 1066 | * The following four percpu variables are hot. Align current_task to |
1067 | * cacheline size such that all four fall in the same cacheline. | 1067 | * cacheline size such that all four fall in the same cacheline. |
1068 | */ | 1068 | */ |
1069 | DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = | 1069 | DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = |
1070 | &init_task; | 1070 | &init_task; |
1071 | EXPORT_PER_CPU_SYMBOL(current_task); | 1071 | EXPORT_PER_CPU_SYMBOL(current_task); |
1072 | 1072 | ||
1073 | DEFINE_PER_CPU(unsigned long, kernel_stack) = | 1073 | DEFINE_PER_CPU(unsigned long, kernel_stack) = |
1074 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; | 1074 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; |
1075 | EXPORT_PER_CPU_SYMBOL(kernel_stack); | 1075 | EXPORT_PER_CPU_SYMBOL(kernel_stack); |
1076 | 1076 | ||
1077 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 1077 | DEFINE_PER_CPU(char *, irq_stack_ptr) = |
1078 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | 1078 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; |
1079 | 1079 | ||
1080 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; | 1080 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; |
1081 | 1081 | ||
1082 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | 1082 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); |
1083 | 1083 | ||
1084 | /* | 1084 | /* |
1085 | * Special IST stacks which the CPU switches to when it calls | 1085 | * Special IST stacks which the CPU switches to when it calls |
1086 | * an IST-marked descriptor entry. Up to 7 stacks (hardware | 1086 | * an IST-marked descriptor entry. Up to 7 stacks (hardware |
1087 | * limit), all of them are 4K, except the debug stack which | 1087 | * limit), all of them are 4K, except the debug stack which |
1088 | * is 8K. | 1088 | * is 8K. |
1089 | */ | 1089 | */ |
1090 | static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { | 1090 | static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { |
1091 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, | 1091 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, |
1092 | [DEBUG_STACK - 1] = DEBUG_STKSZ | 1092 | [DEBUG_STACK - 1] = DEBUG_STKSZ |
1093 | }; | 1093 | }; |
1094 | 1094 | ||
1095 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | 1095 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks |
1096 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | 1096 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); |
1097 | 1097 | ||
1098 | /* May not be marked __init: used by software suspend */ | 1098 | /* May not be marked __init: used by software suspend */ |
1099 | void syscall_init(void) | 1099 | void syscall_init(void) |
1100 | { | 1100 | { |
1101 | /* | 1101 | /* |
1102 | * LSTAR and STAR live in a bit strange symbiosis. | 1102 | * LSTAR and STAR live in a bit strange symbiosis. |
1103 | * They both write to the same internal register. STAR allows to | 1103 | * They both write to the same internal register. STAR allows to |
1104 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | 1104 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. |
1105 | */ | 1105 | */ |
1106 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | 1106 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); |
1107 | wrmsrl(MSR_LSTAR, system_call); | 1107 | wrmsrl(MSR_LSTAR, system_call); |
1108 | wrmsrl(MSR_CSTAR, ignore_sysret); | 1108 | wrmsrl(MSR_CSTAR, ignore_sysret); |
1109 | 1109 | ||
1110 | #ifdef CONFIG_IA32_EMULATION | 1110 | #ifdef CONFIG_IA32_EMULATION |
1111 | syscall32_cpu_init(); | 1111 | syscall32_cpu_init(); |
1112 | #endif | 1112 | #endif |
1113 | 1113 | ||
1114 | /* Flags to clear on syscall */ | 1114 | /* Flags to clear on syscall */ |
1115 | wrmsrl(MSR_SYSCALL_MASK, | 1115 | wrmsrl(MSR_SYSCALL_MASK, |
1116 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | 1116 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| |
1117 | X86_EFLAGS_IOPL|X86_EFLAGS_AC); | ||
1117 | } | 1118 | } |
1118 | 1119 | ||
1119 | unsigned long kernel_eflags; | 1120 | unsigned long kernel_eflags; |
1120 | 1121 | ||
1121 | /* | 1122 | /* |
1122 | * Copies of the original ist values from the tss are only accessed during | 1123 | * Copies of the original ist values from the tss are only accessed during |
1123 | * debugging, no special alignment required. | 1124 | * debugging, no special alignment required. |
1124 | */ | 1125 | */ |
1125 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | 1126 | DEFINE_PER_CPU(struct orig_ist, orig_ist); |
1126 | 1127 | ||
1127 | static DEFINE_PER_CPU(unsigned long, debug_stack_addr); | 1128 | static DEFINE_PER_CPU(unsigned long, debug_stack_addr); |
1128 | DEFINE_PER_CPU(int, debug_stack_usage); | 1129 | DEFINE_PER_CPU(int, debug_stack_usage); |
1129 | 1130 | ||
1130 | int is_debug_stack(unsigned long addr) | 1131 | int is_debug_stack(unsigned long addr) |
1131 | { | 1132 | { |
1132 | return __get_cpu_var(debug_stack_usage) || | 1133 | return __get_cpu_var(debug_stack_usage) || |
1133 | (addr <= __get_cpu_var(debug_stack_addr) && | 1134 | (addr <= __get_cpu_var(debug_stack_addr) && |
1134 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | 1135 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); |
1135 | } | 1136 | } |
1136 | 1137 | ||
1137 | static DEFINE_PER_CPU(u32, debug_stack_use_ctr); | 1138 | static DEFINE_PER_CPU(u32, debug_stack_use_ctr); |
1138 | 1139 | ||
1139 | void debug_stack_set_zero(void) | 1140 | void debug_stack_set_zero(void) |
1140 | { | 1141 | { |
1141 | this_cpu_inc(debug_stack_use_ctr); | 1142 | this_cpu_inc(debug_stack_use_ctr); |
1142 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | 1143 | load_idt((const struct desc_ptr *)&nmi_idt_descr); |
1143 | } | 1144 | } |
1144 | 1145 | ||
1145 | void debug_stack_reset(void) | 1146 | void debug_stack_reset(void) |
1146 | { | 1147 | { |
1147 | if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) | 1148 | if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) |
1148 | return; | 1149 | return; |
1149 | if (this_cpu_dec_return(debug_stack_use_ctr) == 0) | 1150 | if (this_cpu_dec_return(debug_stack_use_ctr) == 0) |
1150 | load_idt((const struct desc_ptr *)&idt_descr); | 1151 | load_idt((const struct desc_ptr *)&idt_descr); |
1151 | } | 1152 | } |
1152 | 1153 | ||
1153 | #else /* CONFIG_X86_64 */ | 1154 | #else /* CONFIG_X86_64 */ |
1154 | 1155 | ||
1155 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | 1156 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
1156 | EXPORT_PER_CPU_SYMBOL(current_task); | 1157 | EXPORT_PER_CPU_SYMBOL(current_task); |
1157 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | 1158 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); |
1158 | 1159 | ||
1159 | #ifdef CONFIG_CC_STACKPROTECTOR | 1160 | #ifdef CONFIG_CC_STACKPROTECTOR |
1160 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); | 1161 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); |
1161 | #endif | 1162 | #endif |
1162 | 1163 | ||
1163 | /* Make sure %fs and %gs are initialized properly in idle threads */ | 1164 | /* Make sure %fs and %gs are initialized properly in idle threads */ |
1164 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | 1165 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) |
1165 | { | 1166 | { |
1166 | memset(regs, 0, sizeof(struct pt_regs)); | 1167 | memset(regs, 0, sizeof(struct pt_regs)); |
1167 | regs->fs = __KERNEL_PERCPU; | 1168 | regs->fs = __KERNEL_PERCPU; |
1168 | regs->gs = __KERNEL_STACK_CANARY; | 1169 | regs->gs = __KERNEL_STACK_CANARY; |
1169 | 1170 | ||
1170 | return regs; | 1171 | return regs; |
1171 | } | 1172 | } |
1172 | #endif /* CONFIG_X86_64 */ | 1173 | #endif /* CONFIG_X86_64 */ |
1173 | 1174 | ||
1174 | /* | 1175 | /* |
1175 | * Clear all 6 debug registers: | 1176 | * Clear all 6 debug registers: |
1176 | */ | 1177 | */ |
1177 | static void clear_all_debug_regs(void) | 1178 | static void clear_all_debug_regs(void) |
1178 | { | 1179 | { |
1179 | int i; | 1180 | int i; |
1180 | 1181 | ||
1181 | for (i = 0; i < 8; i++) { | 1182 | for (i = 0; i < 8; i++) { |
1182 | /* Ignore db4, db5 */ | 1183 | /* Ignore db4, db5 */ |
1183 | if ((i == 4) || (i == 5)) | 1184 | if ((i == 4) || (i == 5)) |
1184 | continue; | 1185 | continue; |
1185 | 1186 | ||
1186 | set_debugreg(0, i); | 1187 | set_debugreg(0, i); |
1187 | } | 1188 | } |
1188 | } | 1189 | } |
1189 | 1190 | ||
1190 | #ifdef CONFIG_KGDB | 1191 | #ifdef CONFIG_KGDB |
1191 | /* | 1192 | /* |
1192 | * Restore debug regs if using kgdbwait and you have a kernel debugger | 1193 | * Restore debug regs if using kgdbwait and you have a kernel debugger |
1193 | * connection established. | 1194 | * connection established. |
1194 | */ | 1195 | */ |
1195 | static void dbg_restore_debug_regs(void) | 1196 | static void dbg_restore_debug_regs(void) |
1196 | { | 1197 | { |
1197 | if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) | 1198 | if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) |
1198 | arch_kgdb_ops.correct_hw_break(); | 1199 | arch_kgdb_ops.correct_hw_break(); |
1199 | } | 1200 | } |
1200 | #else /* ! CONFIG_KGDB */ | 1201 | #else /* ! CONFIG_KGDB */ |
1201 | #define dbg_restore_debug_regs() | 1202 | #define dbg_restore_debug_regs() |
1202 | #endif /* ! CONFIG_KGDB */ | 1203 | #endif /* ! CONFIG_KGDB */ |
1203 | 1204 | ||
1204 | /* | 1205 | /* |
1205 | * cpu_init() initializes state that is per-CPU. Some data is already | 1206 | * cpu_init() initializes state that is per-CPU. Some data is already |
1206 | * initialized (naturally) in the bootstrap process, such as the GDT | 1207 | * initialized (naturally) in the bootstrap process, such as the GDT |
1207 | * and IDT. We reload them nevertheless, this function acts as a | 1208 | * and IDT. We reload them nevertheless, this function acts as a |
1208 | * 'CPU state barrier', nothing should get across. | 1209 | * 'CPU state barrier', nothing should get across. |
1209 | * A lot of state is already set up in PDA init for 64 bit | 1210 | * A lot of state is already set up in PDA init for 64 bit |
1210 | */ | 1211 | */ |
1211 | #ifdef CONFIG_X86_64 | 1212 | #ifdef CONFIG_X86_64 |
1212 | 1213 | ||
1213 | void __cpuinit cpu_init(void) | 1214 | void __cpuinit cpu_init(void) |
1214 | { | 1215 | { |
1215 | struct orig_ist *oist; | 1216 | struct orig_ist *oist; |
1216 | struct task_struct *me; | 1217 | struct task_struct *me; |
1217 | struct tss_struct *t; | 1218 | struct tss_struct *t; |
1218 | unsigned long v; | 1219 | unsigned long v; |
1219 | int cpu; | 1220 | int cpu; |
1220 | int i; | 1221 | int i; |
1221 | 1222 | ||
1222 | cpu = stack_smp_processor_id(); | 1223 | cpu = stack_smp_processor_id(); |
1223 | t = &per_cpu(init_tss, cpu); | 1224 | t = &per_cpu(init_tss, cpu); |
1224 | oist = &per_cpu(orig_ist, cpu); | 1225 | oist = &per_cpu(orig_ist, cpu); |
1225 | 1226 | ||
1226 | #ifdef CONFIG_NUMA | 1227 | #ifdef CONFIG_NUMA |
1227 | if (cpu != 0 && this_cpu_read(numa_node) == 0 && | 1228 | if (cpu != 0 && this_cpu_read(numa_node) == 0 && |
1228 | early_cpu_to_node(cpu) != NUMA_NO_NODE) | 1229 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
1229 | set_numa_node(early_cpu_to_node(cpu)); | 1230 | set_numa_node(early_cpu_to_node(cpu)); |
1230 | #endif | 1231 | #endif |
1231 | 1232 | ||
1232 | me = current; | 1233 | me = current; |
1233 | 1234 | ||
1234 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) | 1235 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) |
1235 | panic("CPU#%d already initialized!\n", cpu); | 1236 | panic("CPU#%d already initialized!\n", cpu); |
1236 | 1237 | ||
1237 | pr_debug("Initializing CPU#%d\n", cpu); | 1238 | pr_debug("Initializing CPU#%d\n", cpu); |
1238 | 1239 | ||
1239 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 1240 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
1240 | 1241 | ||
1241 | /* | 1242 | /* |
1242 | * Initialize the per-CPU GDT with the boot GDT, | 1243 | * Initialize the per-CPU GDT with the boot GDT, |
1243 | * and set up the GDT descriptor: | 1244 | * and set up the GDT descriptor: |
1244 | */ | 1245 | */ |
1245 | 1246 | ||
1246 | switch_to_new_gdt(cpu); | 1247 | switch_to_new_gdt(cpu); |
1247 | loadsegment(fs, 0); | 1248 | loadsegment(fs, 0); |
1248 | 1249 | ||
1249 | load_idt((const struct desc_ptr *)&idt_descr); | 1250 | load_idt((const struct desc_ptr *)&idt_descr); |
1250 | 1251 | ||
1251 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | 1252 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); |
1252 | syscall_init(); | 1253 | syscall_init(); |
1253 | 1254 | ||
1254 | wrmsrl(MSR_FS_BASE, 0); | 1255 | wrmsrl(MSR_FS_BASE, 0); |
1255 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | 1256 | wrmsrl(MSR_KERNEL_GS_BASE, 0); |
1256 | barrier(); | 1257 | barrier(); |
1257 | 1258 | ||
1258 | x86_configure_nx(); | 1259 | x86_configure_nx(); |
1259 | if (cpu != 0) | 1260 | if (cpu != 0) |
1260 | enable_x2apic(); | 1261 | enable_x2apic(); |
1261 | 1262 | ||
1262 | /* | 1263 | /* |
1263 | * set up and load the per-CPU TSS | 1264 | * set up and load the per-CPU TSS |
1264 | */ | 1265 | */ |
1265 | if (!oist->ist[0]) { | 1266 | if (!oist->ist[0]) { |
1266 | char *estacks = per_cpu(exception_stacks, cpu); | 1267 | char *estacks = per_cpu(exception_stacks, cpu); |
1267 | 1268 | ||
1268 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 1269 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
1269 | estacks += exception_stack_sizes[v]; | 1270 | estacks += exception_stack_sizes[v]; |
1270 | oist->ist[v] = t->x86_tss.ist[v] = | 1271 | oist->ist[v] = t->x86_tss.ist[v] = |
1271 | (unsigned long)estacks; | 1272 | (unsigned long)estacks; |
1272 | if (v == DEBUG_STACK-1) | 1273 | if (v == DEBUG_STACK-1) |
1273 | per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; | 1274 | per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; |
1274 | } | 1275 | } |
1275 | } | 1276 | } |
1276 | 1277 | ||
1277 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | 1278 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); |
1278 | 1279 | ||
1279 | /* | 1280 | /* |
1280 | * <= is required because the CPU will access up to | 1281 | * <= is required because the CPU will access up to |
1281 | * 8 bits beyond the end of the IO permission bitmap. | 1282 | * 8 bits beyond the end of the IO permission bitmap. |
1282 | */ | 1283 | */ |
1283 | for (i = 0; i <= IO_BITMAP_LONGS; i++) | 1284 | for (i = 0; i <= IO_BITMAP_LONGS; i++) |
1284 | t->io_bitmap[i] = ~0UL; | 1285 | t->io_bitmap[i] = ~0UL; |
1285 | 1286 | ||
1286 | atomic_inc(&init_mm.mm_count); | 1287 | atomic_inc(&init_mm.mm_count); |
1287 | me->active_mm = &init_mm; | 1288 | me->active_mm = &init_mm; |
1288 | BUG_ON(me->mm); | 1289 | BUG_ON(me->mm); |
1289 | enter_lazy_tlb(&init_mm, me); | 1290 | enter_lazy_tlb(&init_mm, me); |
1290 | 1291 | ||
1291 | load_sp0(t, ¤t->thread); | 1292 | load_sp0(t, ¤t->thread); |
1292 | set_tss_desc(cpu, t); | 1293 | set_tss_desc(cpu, t); |
1293 | load_TR_desc(); | 1294 | load_TR_desc(); |
1294 | load_LDT(&init_mm.context); | 1295 | load_LDT(&init_mm.context); |
1295 | 1296 | ||
1296 | clear_all_debug_regs(); | 1297 | clear_all_debug_regs(); |
1297 | dbg_restore_debug_regs(); | 1298 | dbg_restore_debug_regs(); |
1298 | 1299 | ||
1299 | fpu_init(); | 1300 | fpu_init(); |
1300 | xsave_init(); | 1301 | xsave_init(); |
1301 | 1302 | ||
1302 | raw_local_save_flags(kernel_eflags); | 1303 | raw_local_save_flags(kernel_eflags); |
1303 | 1304 | ||
1304 | if (is_uv_system()) | 1305 | if (is_uv_system()) |
1305 | uv_cpu_init(); | 1306 | uv_cpu_init(); |
1306 | } | 1307 | } |
1307 | 1308 | ||
1308 | #else | 1309 | #else |
1309 | 1310 | ||
1310 | void __cpuinit cpu_init(void) | 1311 | void __cpuinit cpu_init(void) |
1311 | { | 1312 | { |
1312 | int cpu = smp_processor_id(); | 1313 | int cpu = smp_processor_id(); |
1313 | struct task_struct *curr = current; | 1314 | struct task_struct *curr = current; |
1314 | struct tss_struct *t = &per_cpu(init_tss, cpu); | 1315 | struct tss_struct *t = &per_cpu(init_tss, cpu); |
1315 | struct thread_struct *thread = &curr->thread; | 1316 | struct thread_struct *thread = &curr->thread; |
1316 | 1317 | ||
1317 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { | 1318 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { |
1318 | printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); | 1319 | printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); |
1319 | for (;;) | 1320 | for (;;) |
1320 | local_irq_enable(); | 1321 | local_irq_enable(); |
1321 | } | 1322 | } |
1322 | 1323 | ||
1323 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | 1324 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); |
1324 | 1325 | ||
1325 | if (cpu_has_vme || cpu_has_tsc || cpu_has_de) | 1326 | if (cpu_has_vme || cpu_has_tsc || cpu_has_de) |
1326 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 1327 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
1327 | 1328 | ||
1328 | load_idt(&idt_descr); | 1329 | load_idt(&idt_descr); |
1329 | switch_to_new_gdt(cpu); | 1330 | switch_to_new_gdt(cpu); |
1330 | 1331 | ||
1331 | /* | 1332 | /* |
1332 | * Set up and load the per-CPU TSS and LDT | 1333 | * Set up and load the per-CPU TSS and LDT |
1333 | */ | 1334 | */ |
1334 | atomic_inc(&init_mm.mm_count); | 1335 | atomic_inc(&init_mm.mm_count); |
1335 | curr->active_mm = &init_mm; | 1336 | curr->active_mm = &init_mm; |
1336 | BUG_ON(curr->mm); | 1337 | BUG_ON(curr->mm); |
1337 | enter_lazy_tlb(&init_mm, curr); | 1338 | enter_lazy_tlb(&init_mm, curr); |
1338 | 1339 | ||
1339 | load_sp0(t, thread); | 1340 | load_sp0(t, thread); |
1340 | set_tss_desc(cpu, t); | 1341 | set_tss_desc(cpu, t); |
1341 | load_TR_desc(); | 1342 | load_TR_desc(); |
1342 | load_LDT(&init_mm.context); | 1343 | load_LDT(&init_mm.context); |
1343 | 1344 | ||
1344 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | 1345 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); |
1345 | 1346 | ||
1346 | #ifdef CONFIG_DOUBLEFAULT | 1347 | #ifdef CONFIG_DOUBLEFAULT |
1347 | /* Set up doublefault TSS pointer in the GDT */ | 1348 | /* Set up doublefault TSS pointer in the GDT */ |
1348 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | 1349 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); |
1349 | #endif | 1350 | #endif |
1350 | 1351 | ||
1351 | clear_all_debug_regs(); | 1352 | clear_all_debug_regs(); |
1352 | dbg_restore_debug_regs(); | 1353 | dbg_restore_debug_regs(); |
1353 | 1354 | ||
1354 | fpu_init(); | 1355 | fpu_init(); |
1355 | xsave_init(); | 1356 | xsave_init(); |
1356 | } | 1357 | } |
1357 | #endif | 1358 | #endif |
1358 | 1359 |
arch/x86/kernel/entry_64.S
1 | /* | 1 | /* |
2 | * linux/arch/x86_64/entry.S | 2 | * linux/arch/x86_64/entry.S |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | 5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs |
6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | 6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * entry.S contains the system-call and fault low-level handling routines. | 10 | * entry.S contains the system-call and fault low-level handling routines. |
11 | * | 11 | * |
12 | * Some of this is documented in Documentation/x86/entry_64.txt | 12 | * Some of this is documented in Documentation/x86/entry_64.txt |
13 | * | 13 | * |
14 | * NOTE: This code handles signal-recognition, which happens every time | 14 | * NOTE: This code handles signal-recognition, which happens every time |
15 | * after an interrupt and after each system call. | 15 | * after an interrupt and after each system call. |
16 | * | 16 | * |
17 | * Normal syscalls and interrupts don't save a full stack frame, this is | 17 | * Normal syscalls and interrupts don't save a full stack frame, this is |
18 | * only done for syscall tracing, signals or fork/exec et.al. | 18 | * only done for syscall tracing, signals or fork/exec et.al. |
19 | * | 19 | * |
20 | * A note on terminology: | 20 | * A note on terminology: |
21 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 21 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
22 | * at the top of the kernel process stack. | 22 | * at the top of the kernel process stack. |
23 | * - partial stack frame: partially saved registers up to R11. | 23 | * - partial stack frame: partially saved registers up to R11. |
24 | * - full stack frame: Like partial stack frame, but all register saved. | 24 | * - full stack frame: Like partial stack frame, but all register saved. |
25 | * | 25 | * |
26 | * Some macro usage: | 26 | * Some macro usage: |
27 | * - CFI macros are used to generate dwarf2 unwind information for better | 27 | * - CFI macros are used to generate dwarf2 unwind information for better |
28 | * backtraces. They don't change any code. | 28 | * backtraces. They don't change any code. |
29 | * - SAVE_ALL/RESTORE_ALL - Save/restore all registers | 29 | * - SAVE_ALL/RESTORE_ALL - Save/restore all registers |
30 | * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. | 30 | * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. |
31 | * There are unfortunately lots of special cases where some registers | 31 | * There are unfortunately lots of special cases where some registers |
32 | * not touched. The macro is a big mess that should be cleaned up. | 32 | * not touched. The macro is a big mess that should be cleaned up. |
33 | * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. | 33 | * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. |
34 | * Gives a full stack frame. | 34 | * Gives a full stack frame. |
35 | * - ENTRY/END Define functions in the symbol table. | 35 | * - ENTRY/END Define functions in the symbol table. |
36 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack | 36 | * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack |
37 | * frame that is otherwise undefined after a SYSCALL | 37 | * frame that is otherwise undefined after a SYSCALL |
38 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. | 38 | * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. |
39 | * - errorentry/paranoidentry/zeroentry - Define exception entry points. | 39 | * - errorentry/paranoidentry/zeroentry - Define exception entry points. |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #include <linux/linkage.h> | 42 | #include <linux/linkage.h> |
43 | #include <asm/segment.h> | 43 | #include <asm/segment.h> |
44 | #include <asm/cache.h> | 44 | #include <asm/cache.h> |
45 | #include <asm/errno.h> | 45 | #include <asm/errno.h> |
46 | #include <asm/dwarf2.h> | 46 | #include <asm/dwarf2.h> |
47 | #include <asm/calling.h> | 47 | #include <asm/calling.h> |
48 | #include <asm/asm-offsets.h> | 48 | #include <asm/asm-offsets.h> |
49 | #include <asm/msr.h> | 49 | #include <asm/msr.h> |
50 | #include <asm/unistd.h> | 50 | #include <asm/unistd.h> |
51 | #include <asm/thread_info.h> | 51 | #include <asm/thread_info.h> |
52 | #include <asm/hw_irq.h> | 52 | #include <asm/hw_irq.h> |
53 | #include <asm/page_types.h> | 53 | #include <asm/page_types.h> |
54 | #include <asm/irqflags.h> | 54 | #include <asm/irqflags.h> |
55 | #include <asm/paravirt.h> | 55 | #include <asm/paravirt.h> |
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <asm/asm.h> | 58 | #include <asm/asm.h> |
59 | #include <asm/smap.h> | ||
59 | #include <linux/err.h> | 60 | #include <linux/err.h> |
60 | 61 | ||
61 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 62 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
62 | #include <linux/elf-em.h> | 63 | #include <linux/elf-em.h> |
63 | #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) | 64 | #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) |
64 | #define __AUDIT_ARCH_64BIT 0x80000000 | 65 | #define __AUDIT_ARCH_64BIT 0x80000000 |
65 | #define __AUDIT_ARCH_LE 0x40000000 | 66 | #define __AUDIT_ARCH_LE 0x40000000 |
66 | 67 | ||
67 | .code64 | 68 | .code64 |
68 | .section .entry.text, "ax" | 69 | .section .entry.text, "ax" |
69 | 70 | ||
70 | #ifdef CONFIG_FUNCTION_TRACER | 71 | #ifdef CONFIG_FUNCTION_TRACER |
71 | #ifdef CONFIG_DYNAMIC_FTRACE | 72 | #ifdef CONFIG_DYNAMIC_FTRACE |
72 | ENTRY(mcount) | 73 | ENTRY(mcount) |
73 | retq | 74 | retq |
74 | END(mcount) | 75 | END(mcount) |
75 | 76 | ||
76 | ENTRY(ftrace_caller) | 77 | ENTRY(ftrace_caller) |
77 | cmpl $0, function_trace_stop | 78 | cmpl $0, function_trace_stop |
78 | jne ftrace_stub | 79 | jne ftrace_stub |
79 | 80 | ||
80 | MCOUNT_SAVE_FRAME | 81 | MCOUNT_SAVE_FRAME |
81 | 82 | ||
82 | movq 0x38(%rsp), %rdi | 83 | movq 0x38(%rsp), %rdi |
83 | movq 8(%rbp), %rsi | 84 | movq 8(%rbp), %rsi |
84 | subq $MCOUNT_INSN_SIZE, %rdi | 85 | subq $MCOUNT_INSN_SIZE, %rdi |
85 | 86 | ||
86 | GLOBAL(ftrace_call) | 87 | GLOBAL(ftrace_call) |
87 | call ftrace_stub | 88 | call ftrace_stub |
88 | 89 | ||
89 | MCOUNT_RESTORE_FRAME | 90 | MCOUNT_RESTORE_FRAME |
90 | 91 | ||
91 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 92 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
92 | GLOBAL(ftrace_graph_call) | 93 | GLOBAL(ftrace_graph_call) |
93 | jmp ftrace_stub | 94 | jmp ftrace_stub |
94 | #endif | 95 | #endif |
95 | 96 | ||
96 | GLOBAL(ftrace_stub) | 97 | GLOBAL(ftrace_stub) |
97 | retq | 98 | retq |
98 | END(ftrace_caller) | 99 | END(ftrace_caller) |
99 | 100 | ||
100 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 101 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
101 | ENTRY(mcount) | 102 | ENTRY(mcount) |
102 | cmpl $0, function_trace_stop | 103 | cmpl $0, function_trace_stop |
103 | jne ftrace_stub | 104 | jne ftrace_stub |
104 | 105 | ||
105 | cmpq $ftrace_stub, ftrace_trace_function | 106 | cmpq $ftrace_stub, ftrace_trace_function |
106 | jnz trace | 107 | jnz trace |
107 | 108 | ||
108 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 109 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
109 | cmpq $ftrace_stub, ftrace_graph_return | 110 | cmpq $ftrace_stub, ftrace_graph_return |
110 | jnz ftrace_graph_caller | 111 | jnz ftrace_graph_caller |
111 | 112 | ||
112 | cmpq $ftrace_graph_entry_stub, ftrace_graph_entry | 113 | cmpq $ftrace_graph_entry_stub, ftrace_graph_entry |
113 | jnz ftrace_graph_caller | 114 | jnz ftrace_graph_caller |
114 | #endif | 115 | #endif |
115 | 116 | ||
116 | GLOBAL(ftrace_stub) | 117 | GLOBAL(ftrace_stub) |
117 | retq | 118 | retq |
118 | 119 | ||
119 | trace: | 120 | trace: |
120 | MCOUNT_SAVE_FRAME | 121 | MCOUNT_SAVE_FRAME |
121 | 122 | ||
122 | movq 0x38(%rsp), %rdi | 123 | movq 0x38(%rsp), %rdi |
123 | movq 8(%rbp), %rsi | 124 | movq 8(%rbp), %rsi |
124 | subq $MCOUNT_INSN_SIZE, %rdi | 125 | subq $MCOUNT_INSN_SIZE, %rdi |
125 | 126 | ||
126 | call *ftrace_trace_function | 127 | call *ftrace_trace_function |
127 | 128 | ||
128 | MCOUNT_RESTORE_FRAME | 129 | MCOUNT_RESTORE_FRAME |
129 | 130 | ||
130 | jmp ftrace_stub | 131 | jmp ftrace_stub |
131 | END(mcount) | 132 | END(mcount) |
132 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 133 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
133 | #endif /* CONFIG_FUNCTION_TRACER */ | 134 | #endif /* CONFIG_FUNCTION_TRACER */ |
134 | 135 | ||
135 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 136 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
136 | ENTRY(ftrace_graph_caller) | 137 | ENTRY(ftrace_graph_caller) |
137 | cmpl $0, function_trace_stop | 138 | cmpl $0, function_trace_stop |
138 | jne ftrace_stub | 139 | jne ftrace_stub |
139 | 140 | ||
140 | MCOUNT_SAVE_FRAME | 141 | MCOUNT_SAVE_FRAME |
141 | 142 | ||
142 | leaq 8(%rbp), %rdi | 143 | leaq 8(%rbp), %rdi |
143 | movq 0x38(%rsp), %rsi | 144 | movq 0x38(%rsp), %rsi |
144 | movq (%rbp), %rdx | 145 | movq (%rbp), %rdx |
145 | subq $MCOUNT_INSN_SIZE, %rsi | 146 | subq $MCOUNT_INSN_SIZE, %rsi |
146 | 147 | ||
147 | call prepare_ftrace_return | 148 | call prepare_ftrace_return |
148 | 149 | ||
149 | MCOUNT_RESTORE_FRAME | 150 | MCOUNT_RESTORE_FRAME |
150 | 151 | ||
151 | retq | 152 | retq |
152 | END(ftrace_graph_caller) | 153 | END(ftrace_graph_caller) |
153 | 154 | ||
154 | GLOBAL(return_to_handler) | 155 | GLOBAL(return_to_handler) |
155 | subq $24, %rsp | 156 | subq $24, %rsp |
156 | 157 | ||
157 | /* Save the return values */ | 158 | /* Save the return values */ |
158 | movq %rax, (%rsp) | 159 | movq %rax, (%rsp) |
159 | movq %rdx, 8(%rsp) | 160 | movq %rdx, 8(%rsp) |
160 | movq %rbp, %rdi | 161 | movq %rbp, %rdi |
161 | 162 | ||
162 | call ftrace_return_to_handler | 163 | call ftrace_return_to_handler |
163 | 164 | ||
164 | movq %rax, %rdi | 165 | movq %rax, %rdi |
165 | movq 8(%rsp), %rdx | 166 | movq 8(%rsp), %rdx |
166 | movq (%rsp), %rax | 167 | movq (%rsp), %rax |
167 | addq $24, %rsp | 168 | addq $24, %rsp |
168 | jmp *%rdi | 169 | jmp *%rdi |
169 | #endif | 170 | #endif |
170 | 171 | ||
171 | 172 | ||
172 | #ifndef CONFIG_PREEMPT | 173 | #ifndef CONFIG_PREEMPT |
173 | #define retint_kernel retint_restore_args | 174 | #define retint_kernel retint_restore_args |
174 | #endif | 175 | #endif |
175 | 176 | ||
176 | #ifdef CONFIG_PARAVIRT | 177 | #ifdef CONFIG_PARAVIRT |
177 | ENTRY(native_usergs_sysret64) | 178 | ENTRY(native_usergs_sysret64) |
178 | swapgs | 179 | swapgs |
179 | sysretq | 180 | sysretq |
180 | ENDPROC(native_usergs_sysret64) | 181 | ENDPROC(native_usergs_sysret64) |
181 | #endif /* CONFIG_PARAVIRT */ | 182 | #endif /* CONFIG_PARAVIRT */ |
182 | 183 | ||
183 | 184 | ||
184 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | 185 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET |
185 | #ifdef CONFIG_TRACE_IRQFLAGS | 186 | #ifdef CONFIG_TRACE_IRQFLAGS |
186 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | 187 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ |
187 | jnc 1f | 188 | jnc 1f |
188 | TRACE_IRQS_ON | 189 | TRACE_IRQS_ON |
189 | 1: | 190 | 1: |
190 | #endif | 191 | #endif |
191 | .endm | 192 | .endm |
192 | 193 | ||
193 | /* | 194 | /* |
194 | * When dynamic function tracer is enabled it will add a breakpoint | 195 | * When dynamic function tracer is enabled it will add a breakpoint |
195 | * to all locations that it is about to modify, sync CPUs, update | 196 | * to all locations that it is about to modify, sync CPUs, update |
196 | * all the code, sync CPUs, then remove the breakpoints. In this time | 197 | * all the code, sync CPUs, then remove the breakpoints. In this time |
197 | * if lockdep is enabled, it might jump back into the debug handler | 198 | * if lockdep is enabled, it might jump back into the debug handler |
198 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). | 199 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). |
199 | * | 200 | * |
200 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to | 201 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to |
201 | * make sure the stack pointer does not get reset back to the top | 202 | * make sure the stack pointer does not get reset back to the top |
202 | * of the debug stack, and instead just reuses the current stack. | 203 | * of the debug stack, and instead just reuses the current stack. |
203 | */ | 204 | */ |
204 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) | 205 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) |
205 | 206 | ||
206 | .macro TRACE_IRQS_OFF_DEBUG | 207 | .macro TRACE_IRQS_OFF_DEBUG |
207 | call debug_stack_set_zero | 208 | call debug_stack_set_zero |
208 | TRACE_IRQS_OFF | 209 | TRACE_IRQS_OFF |
209 | call debug_stack_reset | 210 | call debug_stack_reset |
210 | .endm | 211 | .endm |
211 | 212 | ||
212 | .macro TRACE_IRQS_ON_DEBUG | 213 | .macro TRACE_IRQS_ON_DEBUG |
213 | call debug_stack_set_zero | 214 | call debug_stack_set_zero |
214 | TRACE_IRQS_ON | 215 | TRACE_IRQS_ON |
215 | call debug_stack_reset | 216 | call debug_stack_reset |
216 | .endm | 217 | .endm |
217 | 218 | ||
218 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET | 219 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET |
219 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | 220 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ |
220 | jnc 1f | 221 | jnc 1f |
221 | TRACE_IRQS_ON_DEBUG | 222 | TRACE_IRQS_ON_DEBUG |
222 | 1: | 223 | 1: |
223 | .endm | 224 | .endm |
224 | 225 | ||
225 | #else | 226 | #else |
226 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF | 227 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF |
227 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON | 228 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON |
228 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ | 229 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ |
229 | #endif | 230 | #endif |
230 | 231 | ||
231 | /* | 232 | /* |
232 | * C code is not supposed to know about undefined top of stack. Every time | 233 | * C code is not supposed to know about undefined top of stack. Every time |
233 | * a C function with an pt_regs argument is called from the SYSCALL based | 234 | * a C function with an pt_regs argument is called from the SYSCALL based |
234 | * fast path FIXUP_TOP_OF_STACK is needed. | 235 | * fast path FIXUP_TOP_OF_STACK is needed. |
235 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs | 236 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs |
236 | * manipulation. | 237 | * manipulation. |
237 | */ | 238 | */ |
238 | 239 | ||
239 | /* %rsp:at FRAMEEND */ | 240 | /* %rsp:at FRAMEEND */ |
240 | .macro FIXUP_TOP_OF_STACK tmp offset=0 | 241 | .macro FIXUP_TOP_OF_STACK tmp offset=0 |
241 | movq PER_CPU_VAR(old_rsp),\tmp | 242 | movq PER_CPU_VAR(old_rsp),\tmp |
242 | movq \tmp,RSP+\offset(%rsp) | 243 | movq \tmp,RSP+\offset(%rsp) |
243 | movq $__USER_DS,SS+\offset(%rsp) | 244 | movq $__USER_DS,SS+\offset(%rsp) |
244 | movq $__USER_CS,CS+\offset(%rsp) | 245 | movq $__USER_CS,CS+\offset(%rsp) |
245 | movq $-1,RCX+\offset(%rsp) | 246 | movq $-1,RCX+\offset(%rsp) |
246 | movq R11+\offset(%rsp),\tmp /* get eflags */ | 247 | movq R11+\offset(%rsp),\tmp /* get eflags */ |
247 | movq \tmp,EFLAGS+\offset(%rsp) | 248 | movq \tmp,EFLAGS+\offset(%rsp) |
248 | .endm | 249 | .endm |
249 | 250 | ||
250 | .macro RESTORE_TOP_OF_STACK tmp offset=0 | 251 | .macro RESTORE_TOP_OF_STACK tmp offset=0 |
251 | movq RSP+\offset(%rsp),\tmp | 252 | movq RSP+\offset(%rsp),\tmp |
252 | movq \tmp,PER_CPU_VAR(old_rsp) | 253 | movq \tmp,PER_CPU_VAR(old_rsp) |
253 | movq EFLAGS+\offset(%rsp),\tmp | 254 | movq EFLAGS+\offset(%rsp),\tmp |
254 | movq \tmp,R11+\offset(%rsp) | 255 | movq \tmp,R11+\offset(%rsp) |
255 | .endm | 256 | .endm |
256 | 257 | ||
257 | .macro FAKE_STACK_FRAME child_rip | 258 | .macro FAKE_STACK_FRAME child_rip |
258 | /* push in order ss, rsp, eflags, cs, rip */ | 259 | /* push in order ss, rsp, eflags, cs, rip */ |
259 | xorl %eax, %eax | 260 | xorl %eax, %eax |
260 | pushq_cfi $__KERNEL_DS /* ss */ | 261 | pushq_cfi $__KERNEL_DS /* ss */ |
261 | /*CFI_REL_OFFSET ss,0*/ | 262 | /*CFI_REL_OFFSET ss,0*/ |
262 | pushq_cfi %rax /* rsp */ | 263 | pushq_cfi %rax /* rsp */ |
263 | CFI_REL_OFFSET rsp,0 | 264 | CFI_REL_OFFSET rsp,0 |
264 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ | 265 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ |
265 | /*CFI_REL_OFFSET rflags,0*/ | 266 | /*CFI_REL_OFFSET rflags,0*/ |
266 | pushq_cfi $__KERNEL_CS /* cs */ | 267 | pushq_cfi $__KERNEL_CS /* cs */ |
267 | /*CFI_REL_OFFSET cs,0*/ | 268 | /*CFI_REL_OFFSET cs,0*/ |
268 | pushq_cfi \child_rip /* rip */ | 269 | pushq_cfi \child_rip /* rip */ |
269 | CFI_REL_OFFSET rip,0 | 270 | CFI_REL_OFFSET rip,0 |
270 | pushq_cfi %rax /* orig rax */ | 271 | pushq_cfi %rax /* orig rax */ |
271 | .endm | 272 | .endm |
272 | 273 | ||
273 | .macro UNFAKE_STACK_FRAME | 274 | .macro UNFAKE_STACK_FRAME |
274 | addq $8*6, %rsp | 275 | addq $8*6, %rsp |
275 | CFI_ADJUST_CFA_OFFSET -(6*8) | 276 | CFI_ADJUST_CFA_OFFSET -(6*8) |
276 | .endm | 277 | .endm |
277 | 278 | ||
278 | /* | 279 | /* |
279 | * initial frame state for interrupts (and exceptions without error code) | 280 | * initial frame state for interrupts (and exceptions without error code) |
280 | */ | 281 | */ |
281 | .macro EMPTY_FRAME start=1 offset=0 | 282 | .macro EMPTY_FRAME start=1 offset=0 |
282 | .if \start | 283 | .if \start |
283 | CFI_STARTPROC simple | 284 | CFI_STARTPROC simple |
284 | CFI_SIGNAL_FRAME | 285 | CFI_SIGNAL_FRAME |
285 | CFI_DEF_CFA rsp,8+\offset | 286 | CFI_DEF_CFA rsp,8+\offset |
286 | .else | 287 | .else |
287 | CFI_DEF_CFA_OFFSET 8+\offset | 288 | CFI_DEF_CFA_OFFSET 8+\offset |
288 | .endif | 289 | .endif |
289 | .endm | 290 | .endm |
290 | 291 | ||
291 | /* | 292 | /* |
292 | * initial frame state for interrupts (and exceptions without error code) | 293 | * initial frame state for interrupts (and exceptions without error code) |
293 | */ | 294 | */ |
294 | .macro INTR_FRAME start=1 offset=0 | 295 | .macro INTR_FRAME start=1 offset=0 |
295 | EMPTY_FRAME \start, SS+8+\offset-RIP | 296 | EMPTY_FRAME \start, SS+8+\offset-RIP |
296 | /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ | 297 | /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ |
297 | CFI_REL_OFFSET rsp, RSP+\offset-RIP | 298 | CFI_REL_OFFSET rsp, RSP+\offset-RIP |
298 | /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ | 299 | /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ |
299 | /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ | 300 | /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ |
300 | CFI_REL_OFFSET rip, RIP+\offset-RIP | 301 | CFI_REL_OFFSET rip, RIP+\offset-RIP |
301 | .endm | 302 | .endm |
302 | 303 | ||
303 | /* | 304 | /* |
304 | * initial frame state for exceptions with error code (and interrupts | 305 | * initial frame state for exceptions with error code (and interrupts |
305 | * with vector already pushed) | 306 | * with vector already pushed) |
306 | */ | 307 | */ |
307 | .macro XCPT_FRAME start=1 offset=0 | 308 | .macro XCPT_FRAME start=1 offset=0 |
308 | INTR_FRAME \start, RIP+\offset-ORIG_RAX | 309 | INTR_FRAME \start, RIP+\offset-ORIG_RAX |
309 | /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ | 310 | /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ |
310 | .endm | 311 | .endm |
311 | 312 | ||
312 | /* | 313 | /* |
313 | * frame that enables calling into C. | 314 | * frame that enables calling into C. |
314 | */ | 315 | */ |
315 | .macro PARTIAL_FRAME start=1 offset=0 | 316 | .macro PARTIAL_FRAME start=1 offset=0 |
316 | XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET | 317 | XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET |
317 | CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET | 318 | CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET |
318 | CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET | 319 | CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET |
319 | CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET | 320 | CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET |
320 | CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET | 321 | CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET |
321 | CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET | 322 | CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET |
322 | CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET | 323 | CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET |
323 | CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET | 324 | CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET |
324 | CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET | 325 | CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET |
325 | CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET | 326 | CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET |
326 | .endm | 327 | .endm |
327 | 328 | ||
328 | /* | 329 | /* |
329 | * frame that enables passing a complete pt_regs to a C function. | 330 | * frame that enables passing a complete pt_regs to a C function. |
330 | */ | 331 | */ |
331 | .macro DEFAULT_FRAME start=1 offset=0 | 332 | .macro DEFAULT_FRAME start=1 offset=0 |
332 | PARTIAL_FRAME \start, R11+\offset-R15 | 333 | PARTIAL_FRAME \start, R11+\offset-R15 |
333 | CFI_REL_OFFSET rbx, RBX+\offset | 334 | CFI_REL_OFFSET rbx, RBX+\offset |
334 | CFI_REL_OFFSET rbp, RBP+\offset | 335 | CFI_REL_OFFSET rbp, RBP+\offset |
335 | CFI_REL_OFFSET r12, R12+\offset | 336 | CFI_REL_OFFSET r12, R12+\offset |
336 | CFI_REL_OFFSET r13, R13+\offset | 337 | CFI_REL_OFFSET r13, R13+\offset |
337 | CFI_REL_OFFSET r14, R14+\offset | 338 | CFI_REL_OFFSET r14, R14+\offset |
338 | CFI_REL_OFFSET r15, R15+\offset | 339 | CFI_REL_OFFSET r15, R15+\offset |
339 | .endm | 340 | .endm |
340 | 341 | ||
341 | /* save partial stack frame */ | 342 | /* save partial stack frame */ |
342 | .macro SAVE_ARGS_IRQ | 343 | .macro SAVE_ARGS_IRQ |
343 | cld | 344 | cld |
344 | /* start from rbp in pt_regs and jump over */ | 345 | /* start from rbp in pt_regs and jump over */ |
345 | movq_cfi rdi, RDI-RBP | 346 | movq_cfi rdi, RDI-RBP |
346 | movq_cfi rsi, RSI-RBP | 347 | movq_cfi rsi, RSI-RBP |
347 | movq_cfi rdx, RDX-RBP | 348 | movq_cfi rdx, RDX-RBP |
348 | movq_cfi rcx, RCX-RBP | 349 | movq_cfi rcx, RCX-RBP |
349 | movq_cfi rax, RAX-RBP | 350 | movq_cfi rax, RAX-RBP |
350 | movq_cfi r8, R8-RBP | 351 | movq_cfi r8, R8-RBP |
351 | movq_cfi r9, R9-RBP | 352 | movq_cfi r9, R9-RBP |
352 | movq_cfi r10, R10-RBP | 353 | movq_cfi r10, R10-RBP |
353 | movq_cfi r11, R11-RBP | 354 | movq_cfi r11, R11-RBP |
354 | 355 | ||
355 | /* Save rbp so that we can unwind from get_irq_regs() */ | 356 | /* Save rbp so that we can unwind from get_irq_regs() */ |
356 | movq_cfi rbp, 0 | 357 | movq_cfi rbp, 0 |
357 | 358 | ||
358 | /* Save previous stack value */ | 359 | /* Save previous stack value */ |
359 | movq %rsp, %rsi | 360 | movq %rsp, %rsi |
360 | 361 | ||
361 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ | 362 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ |
362 | testl $3, CS-RBP(%rsi) | 363 | testl $3, CS-RBP(%rsi) |
363 | je 1f | 364 | je 1f |
364 | SWAPGS | 365 | SWAPGS |
365 | /* | 366 | /* |
366 | * irq_count is used to check if a CPU is already on an interrupt stack | 367 | * irq_count is used to check if a CPU is already on an interrupt stack |
367 | * or not. While this is essentially redundant with preempt_count it is | 368 | * or not. While this is essentially redundant with preempt_count it is |
368 | * a little cheaper to use a separate counter in the PDA (short of | 369 | * a little cheaper to use a separate counter in the PDA (short of |
369 | * moving irq_enter into assembly, which would be too much work) | 370 | * moving irq_enter into assembly, which would be too much work) |
370 | */ | 371 | */ |
371 | 1: incl PER_CPU_VAR(irq_count) | 372 | 1: incl PER_CPU_VAR(irq_count) |
372 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp | 373 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp |
373 | CFI_DEF_CFA_REGISTER rsi | 374 | CFI_DEF_CFA_REGISTER rsi |
374 | 375 | ||
375 | /* Store previous stack value */ | 376 | /* Store previous stack value */ |
376 | pushq %rsi | 377 | pushq %rsi |
377 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ | 378 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ |
378 | 0x77 /* DW_OP_breg7 */, 0, \ | 379 | 0x77 /* DW_OP_breg7 */, 0, \ |
379 | 0x06 /* DW_OP_deref */, \ | 380 | 0x06 /* DW_OP_deref */, \ |
380 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ | 381 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ |
381 | 0x22 /* DW_OP_plus */ | 382 | 0x22 /* DW_OP_plus */ |
382 | /* We entered an interrupt context - irqs are off: */ | 383 | /* We entered an interrupt context - irqs are off: */ |
383 | TRACE_IRQS_OFF | 384 | TRACE_IRQS_OFF |
384 | .endm | 385 | .endm |
385 | 386 | ||
386 | ENTRY(save_rest) | 387 | ENTRY(save_rest) |
387 | PARTIAL_FRAME 1 REST_SKIP+8 | 388 | PARTIAL_FRAME 1 REST_SKIP+8 |
388 | movq 5*8+16(%rsp), %r11 /* save return address */ | 389 | movq 5*8+16(%rsp), %r11 /* save return address */ |
389 | movq_cfi rbx, RBX+16 | 390 | movq_cfi rbx, RBX+16 |
390 | movq_cfi rbp, RBP+16 | 391 | movq_cfi rbp, RBP+16 |
391 | movq_cfi r12, R12+16 | 392 | movq_cfi r12, R12+16 |
392 | movq_cfi r13, R13+16 | 393 | movq_cfi r13, R13+16 |
393 | movq_cfi r14, R14+16 | 394 | movq_cfi r14, R14+16 |
394 | movq_cfi r15, R15+16 | 395 | movq_cfi r15, R15+16 |
395 | movq %r11, 8(%rsp) /* return address */ | 396 | movq %r11, 8(%rsp) /* return address */ |
396 | FIXUP_TOP_OF_STACK %r11, 16 | 397 | FIXUP_TOP_OF_STACK %r11, 16 |
397 | ret | 398 | ret |
398 | CFI_ENDPROC | 399 | CFI_ENDPROC |
399 | END(save_rest) | 400 | END(save_rest) |
400 | 401 | ||
401 | /* save complete stack frame */ | 402 | /* save complete stack frame */ |
402 | .pushsection .kprobes.text, "ax" | 403 | .pushsection .kprobes.text, "ax" |
403 | ENTRY(save_paranoid) | 404 | ENTRY(save_paranoid) |
404 | XCPT_FRAME 1 RDI+8 | 405 | XCPT_FRAME 1 RDI+8 |
405 | cld | 406 | cld |
406 | movq_cfi rdi, RDI+8 | 407 | movq_cfi rdi, RDI+8 |
407 | movq_cfi rsi, RSI+8 | 408 | movq_cfi rsi, RSI+8 |
408 | movq_cfi rdx, RDX+8 | 409 | movq_cfi rdx, RDX+8 |
409 | movq_cfi rcx, RCX+8 | 410 | movq_cfi rcx, RCX+8 |
410 | movq_cfi rax, RAX+8 | 411 | movq_cfi rax, RAX+8 |
411 | movq_cfi r8, R8+8 | 412 | movq_cfi r8, R8+8 |
412 | movq_cfi r9, R9+8 | 413 | movq_cfi r9, R9+8 |
413 | movq_cfi r10, R10+8 | 414 | movq_cfi r10, R10+8 |
414 | movq_cfi r11, R11+8 | 415 | movq_cfi r11, R11+8 |
415 | movq_cfi rbx, RBX+8 | 416 | movq_cfi rbx, RBX+8 |
416 | movq_cfi rbp, RBP+8 | 417 | movq_cfi rbp, RBP+8 |
417 | movq_cfi r12, R12+8 | 418 | movq_cfi r12, R12+8 |
418 | movq_cfi r13, R13+8 | 419 | movq_cfi r13, R13+8 |
419 | movq_cfi r14, R14+8 | 420 | movq_cfi r14, R14+8 |
420 | movq_cfi r15, R15+8 | 421 | movq_cfi r15, R15+8 |
421 | movl $1,%ebx | 422 | movl $1,%ebx |
422 | movl $MSR_GS_BASE,%ecx | 423 | movl $MSR_GS_BASE,%ecx |
423 | rdmsr | 424 | rdmsr |
424 | testl %edx,%edx | 425 | testl %edx,%edx |
425 | js 1f /* negative -> in kernel */ | 426 | js 1f /* negative -> in kernel */ |
426 | SWAPGS | 427 | SWAPGS |
427 | xorl %ebx,%ebx | 428 | xorl %ebx,%ebx |
428 | 1: ret | 429 | 1: ret |
429 | CFI_ENDPROC | 430 | CFI_ENDPROC |
430 | END(save_paranoid) | 431 | END(save_paranoid) |
431 | .popsection | 432 | .popsection |
432 | 433 | ||
433 | /* | 434 | /* |
434 | * A newly forked process directly context switches into this address. | 435 | * A newly forked process directly context switches into this address. |
435 | * | 436 | * |
436 | * rdi: prev task we switched from | 437 | * rdi: prev task we switched from |
437 | */ | 438 | */ |
438 | ENTRY(ret_from_fork) | 439 | ENTRY(ret_from_fork) |
439 | DEFAULT_FRAME | 440 | DEFAULT_FRAME |
440 | 441 | ||
441 | LOCK ; btr $TIF_FORK,TI_flags(%r8) | 442 | LOCK ; btr $TIF_FORK,TI_flags(%r8) |
442 | 443 | ||
443 | pushq_cfi kernel_eflags(%rip) | 444 | pushq_cfi kernel_eflags(%rip) |
444 | popfq_cfi # reset kernel eflags | 445 | popfq_cfi # reset kernel eflags |
445 | 446 | ||
446 | call schedule_tail # rdi: 'prev' task parameter | 447 | call schedule_tail # rdi: 'prev' task parameter |
447 | 448 | ||
448 | GET_THREAD_INFO(%rcx) | 449 | GET_THREAD_INFO(%rcx) |
449 | 450 | ||
450 | RESTORE_REST | 451 | RESTORE_REST |
451 | 452 | ||
452 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | 453 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? |
453 | jz retint_restore_args | 454 | jz retint_restore_args |
454 | 455 | ||
455 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | 456 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET |
456 | jnz int_ret_from_sys_call | 457 | jnz int_ret_from_sys_call |
457 | 458 | ||
458 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET | 459 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET |
459 | jmp ret_from_sys_call # go to the SYSRET fastpath | 460 | jmp ret_from_sys_call # go to the SYSRET fastpath |
460 | 461 | ||
461 | CFI_ENDPROC | 462 | CFI_ENDPROC |
462 | END(ret_from_fork) | 463 | END(ret_from_fork) |
463 | 464 | ||
464 | /* | 465 | /* |
465 | * System call entry. Up to 6 arguments in registers are supported. | 466 | * System call entry. Up to 6 arguments in registers are supported. |
466 | * | 467 | * |
467 | * SYSCALL does not save anything on the stack and does not change the | 468 | * SYSCALL does not save anything on the stack and does not change the |
468 | * stack pointer. | 469 | * stack pointer. However, it does mask the flags register for us, so |
470 | * CLD and CLAC are not needed. | ||
469 | */ | 471 | */ |
470 | 472 | ||
471 | /* | 473 | /* |
472 | * Register setup: | 474 | * Register setup: |
473 | * rax system call number | 475 | * rax system call number |
474 | * rdi arg0 | 476 | * rdi arg0 |
475 | * rcx return address for syscall/sysret, C arg3 | 477 | * rcx return address for syscall/sysret, C arg3 |
476 | * rsi arg1 | 478 | * rsi arg1 |
477 | * rdx arg2 | 479 | * rdx arg2 |
478 | * r10 arg3 (--> moved to rcx for C) | 480 | * r10 arg3 (--> moved to rcx for C) |
479 | * r8 arg4 | 481 | * r8 arg4 |
480 | * r9 arg5 | 482 | * r9 arg5 |
481 | * r11 eflags for syscall/sysret, temporary for C | 483 | * r11 eflags for syscall/sysret, temporary for C |
482 | * r12-r15,rbp,rbx saved by C code, not touched. | 484 | * r12-r15,rbp,rbx saved by C code, not touched. |
483 | * | 485 | * |
484 | * Interrupts are off on entry. | 486 | * Interrupts are off on entry. |
485 | * Only called from user space. | 487 | * Only called from user space. |
486 | * | 488 | * |
487 | * XXX if we had a free scratch register we could save the RSP into the stack frame | 489 | * XXX if we had a free scratch register we could save the RSP into the stack frame |
488 | * and report it properly in ps. Unfortunately we haven't. | 490 | * and report it properly in ps. Unfortunately we haven't. |
489 | * | 491 | * |
490 | * When user can change the frames always force IRET. That is because | 492 | * When user can change the frames always force IRET. That is because |
491 | * it deals with uncanonical addresses better. SYSRET has trouble | 493 | * it deals with uncanonical addresses better. SYSRET has trouble |
492 | * with them due to bugs in both AMD and Intel CPUs. | 494 | * with them due to bugs in both AMD and Intel CPUs. |
493 | */ | 495 | */ |
494 | 496 | ||
495 | ENTRY(system_call) | 497 | ENTRY(system_call) |
496 | CFI_STARTPROC simple | 498 | CFI_STARTPROC simple |
497 | CFI_SIGNAL_FRAME | 499 | CFI_SIGNAL_FRAME |
498 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET | 500 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET |
499 | CFI_REGISTER rip,rcx | 501 | CFI_REGISTER rip,rcx |
500 | /*CFI_REGISTER rflags,r11*/ | 502 | /*CFI_REGISTER rflags,r11*/ |
501 | SWAPGS_UNSAFE_STACK | 503 | SWAPGS_UNSAFE_STACK |
502 | /* | 504 | /* |
503 | * A hypervisor implementation might want to use a label | 505 | * A hypervisor implementation might want to use a label |
504 | * after the swapgs, so that it can do the swapgs | 506 | * after the swapgs, so that it can do the swapgs |
505 | * for the guest and jump here on syscall. | 507 | * for the guest and jump here on syscall. |
506 | */ | 508 | */ |
507 | GLOBAL(system_call_after_swapgs) | 509 | GLOBAL(system_call_after_swapgs) |
508 | 510 | ||
509 | movq %rsp,PER_CPU_VAR(old_rsp) | 511 | movq %rsp,PER_CPU_VAR(old_rsp) |
510 | movq PER_CPU_VAR(kernel_stack),%rsp | 512 | movq PER_CPU_VAR(kernel_stack),%rsp |
511 | /* | 513 | /* |
512 | * No need to follow this irqs off/on section - it's straight | 514 | * No need to follow this irqs off/on section - it's straight |
513 | * and short: | 515 | * and short: |
514 | */ | 516 | */ |
515 | ENABLE_INTERRUPTS(CLBR_NONE) | 517 | ENABLE_INTERRUPTS(CLBR_NONE) |
516 | SAVE_ARGS 8,0 | 518 | SAVE_ARGS 8,0 |
517 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 519 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
518 | movq %rcx,RIP-ARGOFFSET(%rsp) | 520 | movq %rcx,RIP-ARGOFFSET(%rsp) |
519 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 521 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
520 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 522 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
521 | jnz tracesys | 523 | jnz tracesys |
522 | system_call_fastpath: | 524 | system_call_fastpath: |
523 | #if __SYSCALL_MASK == ~0 | 525 | #if __SYSCALL_MASK == ~0 |
524 | cmpq $__NR_syscall_max,%rax | 526 | cmpq $__NR_syscall_max,%rax |
525 | #else | 527 | #else |
526 | andl $__SYSCALL_MASK,%eax | 528 | andl $__SYSCALL_MASK,%eax |
527 | cmpl $__NR_syscall_max,%eax | 529 | cmpl $__NR_syscall_max,%eax |
528 | #endif | 530 | #endif |
529 | ja badsys | 531 | ja badsys |
530 | movq %r10,%rcx | 532 | movq %r10,%rcx |
531 | call *sys_call_table(,%rax,8) # XXX: rip relative | 533 | call *sys_call_table(,%rax,8) # XXX: rip relative |
532 | movq %rax,RAX-ARGOFFSET(%rsp) | 534 | movq %rax,RAX-ARGOFFSET(%rsp) |
533 | /* | 535 | /* |
534 | * Syscall return path ending with SYSRET (fast path) | 536 | * Syscall return path ending with SYSRET (fast path) |
535 | * Has incomplete stack frame and undefined top of stack. | 537 | * Has incomplete stack frame and undefined top of stack. |
536 | */ | 538 | */ |
537 | ret_from_sys_call: | 539 | ret_from_sys_call: |
538 | movl $_TIF_ALLWORK_MASK,%edi | 540 | movl $_TIF_ALLWORK_MASK,%edi |
539 | /* edi: flagmask */ | 541 | /* edi: flagmask */ |
540 | sysret_check: | 542 | sysret_check: |
541 | LOCKDEP_SYS_EXIT | 543 | LOCKDEP_SYS_EXIT |
542 | DISABLE_INTERRUPTS(CLBR_NONE) | 544 | DISABLE_INTERRUPTS(CLBR_NONE) |
543 | TRACE_IRQS_OFF | 545 | TRACE_IRQS_OFF |
544 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx | 546 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx |
545 | andl %edi,%edx | 547 | andl %edi,%edx |
546 | jnz sysret_careful | 548 | jnz sysret_careful |
547 | CFI_REMEMBER_STATE | 549 | CFI_REMEMBER_STATE |
548 | /* | 550 | /* |
549 | * sysretq will re-enable interrupts: | 551 | * sysretq will re-enable interrupts: |
550 | */ | 552 | */ |
551 | TRACE_IRQS_ON | 553 | TRACE_IRQS_ON |
552 | movq RIP-ARGOFFSET(%rsp),%rcx | 554 | movq RIP-ARGOFFSET(%rsp),%rcx |
553 | CFI_REGISTER rip,rcx | 555 | CFI_REGISTER rip,rcx |
554 | RESTORE_ARGS 1,-ARG_SKIP,0 | 556 | RESTORE_ARGS 1,-ARG_SKIP,0 |
555 | /*CFI_REGISTER rflags,r11*/ | 557 | /*CFI_REGISTER rflags,r11*/ |
556 | movq PER_CPU_VAR(old_rsp), %rsp | 558 | movq PER_CPU_VAR(old_rsp), %rsp |
557 | USERGS_SYSRET64 | 559 | USERGS_SYSRET64 |
558 | 560 | ||
559 | CFI_RESTORE_STATE | 561 | CFI_RESTORE_STATE |
560 | /* Handle reschedules */ | 562 | /* Handle reschedules */ |
561 | /* edx: work, edi: workmask */ | 563 | /* edx: work, edi: workmask */ |
562 | sysret_careful: | 564 | sysret_careful: |
563 | bt $TIF_NEED_RESCHED,%edx | 565 | bt $TIF_NEED_RESCHED,%edx |
564 | jnc sysret_signal | 566 | jnc sysret_signal |
565 | TRACE_IRQS_ON | 567 | TRACE_IRQS_ON |
566 | ENABLE_INTERRUPTS(CLBR_NONE) | 568 | ENABLE_INTERRUPTS(CLBR_NONE) |
567 | pushq_cfi %rdi | 569 | pushq_cfi %rdi |
568 | call schedule | 570 | call schedule |
569 | popq_cfi %rdi | 571 | popq_cfi %rdi |
570 | jmp sysret_check | 572 | jmp sysret_check |
571 | 573 | ||
572 | /* Handle a signal */ | 574 | /* Handle a signal */ |
573 | sysret_signal: | 575 | sysret_signal: |
574 | TRACE_IRQS_ON | 576 | TRACE_IRQS_ON |
575 | ENABLE_INTERRUPTS(CLBR_NONE) | 577 | ENABLE_INTERRUPTS(CLBR_NONE) |
576 | #ifdef CONFIG_AUDITSYSCALL | 578 | #ifdef CONFIG_AUDITSYSCALL |
577 | bt $TIF_SYSCALL_AUDIT,%edx | 579 | bt $TIF_SYSCALL_AUDIT,%edx |
578 | jc sysret_audit | 580 | jc sysret_audit |
579 | #endif | 581 | #endif |
580 | /* | 582 | /* |
581 | * We have a signal, or exit tracing or single-step. | 583 | * We have a signal, or exit tracing or single-step. |
582 | * These all wind up with the iret return path anyway, | 584 | * These all wind up with the iret return path anyway, |
583 | * so just join that path right now. | 585 | * so just join that path right now. |
584 | */ | 586 | */ |
585 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET | 587 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET |
586 | jmp int_check_syscall_exit_work | 588 | jmp int_check_syscall_exit_work |
587 | 589 | ||
588 | badsys: | 590 | badsys: |
589 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 591 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
590 | jmp ret_from_sys_call | 592 | jmp ret_from_sys_call |
591 | 593 | ||
592 | #ifdef CONFIG_AUDITSYSCALL | 594 | #ifdef CONFIG_AUDITSYSCALL |
593 | /* | 595 | /* |
594 | * Fast path for syscall audit without full syscall trace. | 596 | * Fast path for syscall audit without full syscall trace. |
595 | * We just call __audit_syscall_entry() directly, and then | 597 | * We just call __audit_syscall_entry() directly, and then |
596 | * jump back to the normal fast path. | 598 | * jump back to the normal fast path. |
597 | */ | 599 | */ |
598 | auditsys: | 600 | auditsys: |
599 | movq %r10,%r9 /* 6th arg: 4th syscall arg */ | 601 | movq %r10,%r9 /* 6th arg: 4th syscall arg */ |
600 | movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ | 602 | movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ |
601 | movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ | 603 | movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ |
602 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | 604 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ |
603 | movq %rax,%rsi /* 2nd arg: syscall number */ | 605 | movq %rax,%rsi /* 2nd arg: syscall number */ |
604 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | 606 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ |
605 | call __audit_syscall_entry | 607 | call __audit_syscall_entry |
606 | LOAD_ARGS 0 /* reload call-clobbered registers */ | 608 | LOAD_ARGS 0 /* reload call-clobbered registers */ |
607 | jmp system_call_fastpath | 609 | jmp system_call_fastpath |
608 | 610 | ||
609 | /* | 611 | /* |
610 | * Return fast path for syscall audit. Call __audit_syscall_exit() | 612 | * Return fast path for syscall audit. Call __audit_syscall_exit() |
611 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | 613 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT |
612 | * masked off. | 614 | * masked off. |
613 | */ | 615 | */ |
614 | sysret_audit: | 616 | sysret_audit: |
615 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ | 617 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ |
616 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ | 618 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ |
617 | setbe %al /* 1 if so, 0 if not */ | 619 | setbe %al /* 1 if so, 0 if not */ |
618 | movzbl %al,%edi /* zero-extend that into %edi */ | 620 | movzbl %al,%edi /* zero-extend that into %edi */ |
619 | call __audit_syscall_exit | 621 | call __audit_syscall_exit |
620 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 622 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
621 | jmp sysret_check | 623 | jmp sysret_check |
622 | #endif /* CONFIG_AUDITSYSCALL */ | 624 | #endif /* CONFIG_AUDITSYSCALL */ |
623 | 625 | ||
624 | /* Do syscall tracing */ | 626 | /* Do syscall tracing */ |
625 | tracesys: | 627 | tracesys: |
626 | #ifdef CONFIG_AUDITSYSCALL | 628 | #ifdef CONFIG_AUDITSYSCALL |
627 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 629 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
628 | jz auditsys | 630 | jz auditsys |
629 | #endif | 631 | #endif |
630 | SAVE_REST | 632 | SAVE_REST |
631 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 633 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
632 | FIXUP_TOP_OF_STACK %rdi | 634 | FIXUP_TOP_OF_STACK %rdi |
633 | movq %rsp,%rdi | 635 | movq %rsp,%rdi |
634 | call syscall_trace_enter | 636 | call syscall_trace_enter |
635 | /* | 637 | /* |
636 | * Reload arg registers from stack in case ptrace changed them. | 638 | * Reload arg registers from stack in case ptrace changed them. |
637 | * We don't reload %rax because syscall_trace_enter() returned | 639 | * We don't reload %rax because syscall_trace_enter() returned |
638 | * the value it wants us to use in the table lookup. | 640 | * the value it wants us to use in the table lookup. |
639 | */ | 641 | */ |
640 | LOAD_ARGS ARGOFFSET, 1 | 642 | LOAD_ARGS ARGOFFSET, 1 |
641 | RESTORE_REST | 643 | RESTORE_REST |
642 | #if __SYSCALL_MASK == ~0 | 644 | #if __SYSCALL_MASK == ~0 |
643 | cmpq $__NR_syscall_max,%rax | 645 | cmpq $__NR_syscall_max,%rax |
644 | #else | 646 | #else |
645 | andl $__SYSCALL_MASK,%eax | 647 | andl $__SYSCALL_MASK,%eax |
646 | cmpl $__NR_syscall_max,%eax | 648 | cmpl $__NR_syscall_max,%eax |
647 | #endif | 649 | #endif |
648 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ | 650 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ |
649 | movq %r10,%rcx /* fixup for C */ | 651 | movq %r10,%rcx /* fixup for C */ |
650 | call *sys_call_table(,%rax,8) | 652 | call *sys_call_table(,%rax,8) |
651 | movq %rax,RAX-ARGOFFSET(%rsp) | 653 | movq %rax,RAX-ARGOFFSET(%rsp) |
652 | /* Use IRET because user could have changed frame */ | 654 | /* Use IRET because user could have changed frame */ |
653 | 655 | ||
654 | /* | 656 | /* |
655 | * Syscall return path ending with IRET. | 657 | * Syscall return path ending with IRET. |
656 | * Has correct top of stack, but partial stack frame. | 658 | * Has correct top of stack, but partial stack frame. |
657 | */ | 659 | */ |
658 | GLOBAL(int_ret_from_sys_call) | 660 | GLOBAL(int_ret_from_sys_call) |
659 | DISABLE_INTERRUPTS(CLBR_NONE) | 661 | DISABLE_INTERRUPTS(CLBR_NONE) |
660 | TRACE_IRQS_OFF | 662 | TRACE_IRQS_OFF |
661 | movl $_TIF_ALLWORK_MASK,%edi | 663 | movl $_TIF_ALLWORK_MASK,%edi |
662 | /* edi: mask to check */ | 664 | /* edi: mask to check */ |
663 | GLOBAL(int_with_check) | 665 | GLOBAL(int_with_check) |
664 | LOCKDEP_SYS_EXIT_IRQ | 666 | LOCKDEP_SYS_EXIT_IRQ |
665 | GET_THREAD_INFO(%rcx) | 667 | GET_THREAD_INFO(%rcx) |
666 | movl TI_flags(%rcx),%edx | 668 | movl TI_flags(%rcx),%edx |
667 | andl %edi,%edx | 669 | andl %edi,%edx |
668 | jnz int_careful | 670 | jnz int_careful |
669 | andl $~TS_COMPAT,TI_status(%rcx) | 671 | andl $~TS_COMPAT,TI_status(%rcx) |
670 | jmp retint_swapgs | 672 | jmp retint_swapgs |
671 | 673 | ||
672 | /* Either reschedule or signal or syscall exit tracking needed. */ | 674 | /* Either reschedule or signal or syscall exit tracking needed. */ |
673 | /* First do a reschedule test. */ | 675 | /* First do a reschedule test. */ |
674 | /* edx: work, edi: workmask */ | 676 | /* edx: work, edi: workmask */ |
675 | int_careful: | 677 | int_careful: |
676 | bt $TIF_NEED_RESCHED,%edx | 678 | bt $TIF_NEED_RESCHED,%edx |
677 | jnc int_very_careful | 679 | jnc int_very_careful |
678 | TRACE_IRQS_ON | 680 | TRACE_IRQS_ON |
679 | ENABLE_INTERRUPTS(CLBR_NONE) | 681 | ENABLE_INTERRUPTS(CLBR_NONE) |
680 | pushq_cfi %rdi | 682 | pushq_cfi %rdi |
681 | call schedule | 683 | call schedule |
682 | popq_cfi %rdi | 684 | popq_cfi %rdi |
683 | DISABLE_INTERRUPTS(CLBR_NONE) | 685 | DISABLE_INTERRUPTS(CLBR_NONE) |
684 | TRACE_IRQS_OFF | 686 | TRACE_IRQS_OFF |
685 | jmp int_with_check | 687 | jmp int_with_check |
686 | 688 | ||
687 | /* handle signals and tracing -- both require a full stack frame */ | 689 | /* handle signals and tracing -- both require a full stack frame */ |
688 | int_very_careful: | 690 | int_very_careful: |
689 | TRACE_IRQS_ON | 691 | TRACE_IRQS_ON |
690 | ENABLE_INTERRUPTS(CLBR_NONE) | 692 | ENABLE_INTERRUPTS(CLBR_NONE) |
691 | int_check_syscall_exit_work: | 693 | int_check_syscall_exit_work: |
692 | SAVE_REST | 694 | SAVE_REST |
693 | /* Check for syscall exit trace */ | 695 | /* Check for syscall exit trace */ |
694 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 696 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
695 | jz int_signal | 697 | jz int_signal |
696 | pushq_cfi %rdi | 698 | pushq_cfi %rdi |
697 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | 699 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
698 | call syscall_trace_leave | 700 | call syscall_trace_leave |
699 | popq_cfi %rdi | 701 | popq_cfi %rdi |
700 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi | 702 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
701 | jmp int_restore_rest | 703 | jmp int_restore_rest |
702 | 704 | ||
703 | int_signal: | 705 | int_signal: |
704 | testl $_TIF_DO_NOTIFY_MASK,%edx | 706 | testl $_TIF_DO_NOTIFY_MASK,%edx |
705 | jz 1f | 707 | jz 1f |
706 | movq %rsp,%rdi # &ptregs -> arg1 | 708 | movq %rsp,%rdi # &ptregs -> arg1 |
707 | xorl %esi,%esi # oldset -> arg2 | 709 | xorl %esi,%esi # oldset -> arg2 |
708 | call do_notify_resume | 710 | call do_notify_resume |
709 | 1: movl $_TIF_WORK_MASK,%edi | 711 | 1: movl $_TIF_WORK_MASK,%edi |
710 | int_restore_rest: | 712 | int_restore_rest: |
711 | RESTORE_REST | 713 | RESTORE_REST |
712 | DISABLE_INTERRUPTS(CLBR_NONE) | 714 | DISABLE_INTERRUPTS(CLBR_NONE) |
713 | TRACE_IRQS_OFF | 715 | TRACE_IRQS_OFF |
714 | jmp int_with_check | 716 | jmp int_with_check |
715 | CFI_ENDPROC | 717 | CFI_ENDPROC |
716 | END(system_call) | 718 | END(system_call) |
717 | 719 | ||
718 | /* | 720 | /* |
719 | * Certain special system calls that need to save a complete full stack frame. | 721 | * Certain special system calls that need to save a complete full stack frame. |
720 | */ | 722 | */ |
721 | .macro PTREGSCALL label,func,arg | 723 | .macro PTREGSCALL label,func,arg |
722 | ENTRY(\label) | 724 | ENTRY(\label) |
723 | PARTIAL_FRAME 1 8 /* offset 8: return address */ | 725 | PARTIAL_FRAME 1 8 /* offset 8: return address */ |
724 | subq $REST_SKIP, %rsp | 726 | subq $REST_SKIP, %rsp |
725 | CFI_ADJUST_CFA_OFFSET REST_SKIP | 727 | CFI_ADJUST_CFA_OFFSET REST_SKIP |
726 | call save_rest | 728 | call save_rest |
727 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | 729 | DEFAULT_FRAME 0 8 /* offset 8: return address */ |
728 | leaq 8(%rsp), \arg /* pt_regs pointer */ | 730 | leaq 8(%rsp), \arg /* pt_regs pointer */ |
729 | call \func | 731 | call \func |
730 | jmp ptregscall_common | 732 | jmp ptregscall_common |
731 | CFI_ENDPROC | 733 | CFI_ENDPROC |
732 | END(\label) | 734 | END(\label) |
733 | .endm | 735 | .endm |
734 | 736 | ||
735 | PTREGSCALL stub_clone, sys_clone, %r8 | 737 | PTREGSCALL stub_clone, sys_clone, %r8 |
736 | PTREGSCALL stub_fork, sys_fork, %rdi | 738 | PTREGSCALL stub_fork, sys_fork, %rdi |
737 | PTREGSCALL stub_vfork, sys_vfork, %rdi | 739 | PTREGSCALL stub_vfork, sys_vfork, %rdi |
738 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx | 740 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx |
739 | PTREGSCALL stub_iopl, sys_iopl, %rsi | 741 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
740 | 742 | ||
741 | ENTRY(ptregscall_common) | 743 | ENTRY(ptregscall_common) |
742 | DEFAULT_FRAME 1 8 /* offset 8: return address */ | 744 | DEFAULT_FRAME 1 8 /* offset 8: return address */ |
743 | RESTORE_TOP_OF_STACK %r11, 8 | 745 | RESTORE_TOP_OF_STACK %r11, 8 |
744 | movq_cfi_restore R15+8, r15 | 746 | movq_cfi_restore R15+8, r15 |
745 | movq_cfi_restore R14+8, r14 | 747 | movq_cfi_restore R14+8, r14 |
746 | movq_cfi_restore R13+8, r13 | 748 | movq_cfi_restore R13+8, r13 |
747 | movq_cfi_restore R12+8, r12 | 749 | movq_cfi_restore R12+8, r12 |
748 | movq_cfi_restore RBP+8, rbp | 750 | movq_cfi_restore RBP+8, rbp |
749 | movq_cfi_restore RBX+8, rbx | 751 | movq_cfi_restore RBX+8, rbx |
750 | ret $REST_SKIP /* pop extended registers */ | 752 | ret $REST_SKIP /* pop extended registers */ |
751 | CFI_ENDPROC | 753 | CFI_ENDPROC |
752 | END(ptregscall_common) | 754 | END(ptregscall_common) |
753 | 755 | ||
754 | ENTRY(stub_execve) | 756 | ENTRY(stub_execve) |
755 | CFI_STARTPROC | 757 | CFI_STARTPROC |
756 | addq $8, %rsp | 758 | addq $8, %rsp |
757 | PARTIAL_FRAME 0 | 759 | PARTIAL_FRAME 0 |
758 | SAVE_REST | 760 | SAVE_REST |
759 | FIXUP_TOP_OF_STACK %r11 | 761 | FIXUP_TOP_OF_STACK %r11 |
760 | movq %rsp, %rcx | 762 | movq %rsp, %rcx |
761 | call sys_execve | 763 | call sys_execve |
762 | RESTORE_TOP_OF_STACK %r11 | 764 | RESTORE_TOP_OF_STACK %r11 |
763 | movq %rax,RAX(%rsp) | 765 | movq %rax,RAX(%rsp) |
764 | RESTORE_REST | 766 | RESTORE_REST |
765 | jmp int_ret_from_sys_call | 767 | jmp int_ret_from_sys_call |
766 | CFI_ENDPROC | 768 | CFI_ENDPROC |
767 | END(stub_execve) | 769 | END(stub_execve) |
768 | 770 | ||
769 | /* | 771 | /* |
770 | * sigreturn is special because it needs to restore all registers on return. | 772 | * sigreturn is special because it needs to restore all registers on return. |
771 | * This cannot be done with SYSRET, so use the IRET return path instead. | 773 | * This cannot be done with SYSRET, so use the IRET return path instead. |
772 | */ | 774 | */ |
773 | ENTRY(stub_rt_sigreturn) | 775 | ENTRY(stub_rt_sigreturn) |
774 | CFI_STARTPROC | 776 | CFI_STARTPROC |
775 | addq $8, %rsp | 777 | addq $8, %rsp |
776 | PARTIAL_FRAME 0 | 778 | PARTIAL_FRAME 0 |
777 | SAVE_REST | 779 | SAVE_REST |
778 | movq %rsp,%rdi | 780 | movq %rsp,%rdi |
779 | FIXUP_TOP_OF_STACK %r11 | 781 | FIXUP_TOP_OF_STACK %r11 |
780 | call sys_rt_sigreturn | 782 | call sys_rt_sigreturn |
781 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | 783 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer |
782 | RESTORE_REST | 784 | RESTORE_REST |
783 | jmp int_ret_from_sys_call | 785 | jmp int_ret_from_sys_call |
784 | CFI_ENDPROC | 786 | CFI_ENDPROC |
785 | END(stub_rt_sigreturn) | 787 | END(stub_rt_sigreturn) |
786 | 788 | ||
787 | #ifdef CONFIG_X86_X32_ABI | 789 | #ifdef CONFIG_X86_X32_ABI |
788 | PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx | 790 | PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx |
789 | 791 | ||
790 | ENTRY(stub_x32_rt_sigreturn) | 792 | ENTRY(stub_x32_rt_sigreturn) |
791 | CFI_STARTPROC | 793 | CFI_STARTPROC |
792 | addq $8, %rsp | 794 | addq $8, %rsp |
793 | PARTIAL_FRAME 0 | 795 | PARTIAL_FRAME 0 |
794 | SAVE_REST | 796 | SAVE_REST |
795 | movq %rsp,%rdi | 797 | movq %rsp,%rdi |
796 | FIXUP_TOP_OF_STACK %r11 | 798 | FIXUP_TOP_OF_STACK %r11 |
797 | call sys32_x32_rt_sigreturn | 799 | call sys32_x32_rt_sigreturn |
798 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | 800 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer |
799 | RESTORE_REST | 801 | RESTORE_REST |
800 | jmp int_ret_from_sys_call | 802 | jmp int_ret_from_sys_call |
801 | CFI_ENDPROC | 803 | CFI_ENDPROC |
802 | END(stub_x32_rt_sigreturn) | 804 | END(stub_x32_rt_sigreturn) |
803 | 805 | ||
804 | ENTRY(stub_x32_execve) | 806 | ENTRY(stub_x32_execve) |
805 | CFI_STARTPROC | 807 | CFI_STARTPROC |
806 | addq $8, %rsp | 808 | addq $8, %rsp |
807 | PARTIAL_FRAME 0 | 809 | PARTIAL_FRAME 0 |
808 | SAVE_REST | 810 | SAVE_REST |
809 | FIXUP_TOP_OF_STACK %r11 | 811 | FIXUP_TOP_OF_STACK %r11 |
810 | movq %rsp, %rcx | 812 | movq %rsp, %rcx |
811 | call sys32_execve | 813 | call sys32_execve |
812 | RESTORE_TOP_OF_STACK %r11 | 814 | RESTORE_TOP_OF_STACK %r11 |
813 | movq %rax,RAX(%rsp) | 815 | movq %rax,RAX(%rsp) |
814 | RESTORE_REST | 816 | RESTORE_REST |
815 | jmp int_ret_from_sys_call | 817 | jmp int_ret_from_sys_call |
816 | CFI_ENDPROC | 818 | CFI_ENDPROC |
817 | END(stub_x32_execve) | 819 | END(stub_x32_execve) |
818 | 820 | ||
819 | #endif | 821 | #endif |
820 | 822 | ||
821 | /* | 823 | /* |
822 | * Build the entry stubs and pointer table with some assembler magic. | 824 | * Build the entry stubs and pointer table with some assembler magic. |
823 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | 825 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a |
824 | * single cache line on all modern x86 implementations. | 826 | * single cache line on all modern x86 implementations. |
825 | */ | 827 | */ |
826 | .section .init.rodata,"a" | 828 | .section .init.rodata,"a" |
827 | ENTRY(interrupt) | 829 | ENTRY(interrupt) |
828 | .section .entry.text | 830 | .section .entry.text |
829 | .p2align 5 | 831 | .p2align 5 |
830 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 832 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
831 | ENTRY(irq_entries_start) | 833 | ENTRY(irq_entries_start) |
832 | INTR_FRAME | 834 | INTR_FRAME |
833 | vector=FIRST_EXTERNAL_VECTOR | 835 | vector=FIRST_EXTERNAL_VECTOR |
834 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 | 836 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
835 | .balign 32 | 837 | .balign 32 |
836 | .rept 7 | 838 | .rept 7 |
837 | .if vector < NR_VECTORS | 839 | .if vector < NR_VECTORS |
838 | .if vector <> FIRST_EXTERNAL_VECTOR | 840 | .if vector <> FIRST_EXTERNAL_VECTOR |
839 | CFI_ADJUST_CFA_OFFSET -8 | 841 | CFI_ADJUST_CFA_OFFSET -8 |
840 | .endif | 842 | .endif |
841 | 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ | 843 | 1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ |
842 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | 844 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
843 | jmp 2f | 845 | jmp 2f |
844 | .endif | 846 | .endif |
845 | .previous | 847 | .previous |
846 | .quad 1b | 848 | .quad 1b |
847 | .section .entry.text | 849 | .section .entry.text |
848 | vector=vector+1 | 850 | vector=vector+1 |
849 | .endif | 851 | .endif |
850 | .endr | 852 | .endr |
851 | 2: jmp common_interrupt | 853 | 2: jmp common_interrupt |
852 | .endr | 854 | .endr |
853 | CFI_ENDPROC | 855 | CFI_ENDPROC |
854 | END(irq_entries_start) | 856 | END(irq_entries_start) |
855 | 857 | ||
856 | .previous | 858 | .previous |
857 | END(interrupt) | 859 | END(interrupt) |
858 | .previous | 860 | .previous |
859 | 861 | ||
860 | /* | 862 | /* |
861 | * Interrupt entry/exit. | 863 | * Interrupt entry/exit. |
862 | * | 864 | * |
863 | * Interrupt entry points save only callee clobbered registers in fast path. | 865 | * Interrupt entry points save only callee clobbered registers in fast path. |
864 | * | 866 | * |
865 | * Entry runs with interrupts off. | 867 | * Entry runs with interrupts off. |
866 | */ | 868 | */ |
867 | 869 | ||
868 | /* 0(%rsp): ~(interrupt number) */ | 870 | /* 0(%rsp): ~(interrupt number) */ |
869 | .macro interrupt func | 871 | .macro interrupt func |
870 | /* reserve pt_regs for scratch regs and rbp */ | 872 | /* reserve pt_regs for scratch regs and rbp */ |
871 | subq $ORIG_RAX-RBP, %rsp | 873 | subq $ORIG_RAX-RBP, %rsp |
872 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP | 874 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP |
873 | SAVE_ARGS_IRQ | 875 | SAVE_ARGS_IRQ |
874 | call \func | 876 | call \func |
875 | .endm | 877 | .endm |
876 | 878 | ||
877 | /* | 879 | /* |
878 | * Interrupt entry/exit should be protected against kprobes | 880 | * Interrupt entry/exit should be protected against kprobes |
879 | */ | 881 | */ |
880 | .pushsection .kprobes.text, "ax" | 882 | .pushsection .kprobes.text, "ax" |
881 | /* | 883 | /* |
882 | * The interrupt stubs push (~vector+0x80) onto the stack and | 884 | * The interrupt stubs push (~vector+0x80) onto the stack and |
883 | * then jump to common_interrupt. | 885 | * then jump to common_interrupt. |
884 | */ | 886 | */ |
885 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 887 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
886 | common_interrupt: | 888 | common_interrupt: |
889 | ASM_CLAC | ||
887 | XCPT_FRAME | 890 | XCPT_FRAME |
888 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | 891 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ |
889 | interrupt do_IRQ | 892 | interrupt do_IRQ |
890 | /* 0(%rsp): old_rsp-ARGOFFSET */ | 893 | /* 0(%rsp): old_rsp-ARGOFFSET */ |
891 | ret_from_intr: | 894 | ret_from_intr: |
892 | DISABLE_INTERRUPTS(CLBR_NONE) | 895 | DISABLE_INTERRUPTS(CLBR_NONE) |
893 | TRACE_IRQS_OFF | 896 | TRACE_IRQS_OFF |
894 | decl PER_CPU_VAR(irq_count) | 897 | decl PER_CPU_VAR(irq_count) |
895 | 898 | ||
896 | /* Restore saved previous stack */ | 899 | /* Restore saved previous stack */ |
897 | popq %rsi | 900 | popq %rsi |
898 | CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ | 901 | CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ |
899 | leaq ARGOFFSET-RBP(%rsi), %rsp | 902 | leaq ARGOFFSET-RBP(%rsi), %rsp |
900 | CFI_DEF_CFA_REGISTER rsp | 903 | CFI_DEF_CFA_REGISTER rsp |
901 | CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET | 904 | CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET |
902 | 905 | ||
903 | exit_intr: | 906 | exit_intr: |
904 | GET_THREAD_INFO(%rcx) | 907 | GET_THREAD_INFO(%rcx) |
905 | testl $3,CS-ARGOFFSET(%rsp) | 908 | testl $3,CS-ARGOFFSET(%rsp) |
906 | je retint_kernel | 909 | je retint_kernel |
907 | 910 | ||
908 | /* Interrupt came from user space */ | 911 | /* Interrupt came from user space */ |
909 | /* | 912 | /* |
910 | * Has a correct top of stack, but a partial stack frame | 913 | * Has a correct top of stack, but a partial stack frame |
911 | * %rcx: thread info. Interrupts off. | 914 | * %rcx: thread info. Interrupts off. |
912 | */ | 915 | */ |
913 | retint_with_reschedule: | 916 | retint_with_reschedule: |
914 | movl $_TIF_WORK_MASK,%edi | 917 | movl $_TIF_WORK_MASK,%edi |
915 | retint_check: | 918 | retint_check: |
916 | LOCKDEP_SYS_EXIT_IRQ | 919 | LOCKDEP_SYS_EXIT_IRQ |
917 | movl TI_flags(%rcx),%edx | 920 | movl TI_flags(%rcx),%edx |
918 | andl %edi,%edx | 921 | andl %edi,%edx |
919 | CFI_REMEMBER_STATE | 922 | CFI_REMEMBER_STATE |
920 | jnz retint_careful | 923 | jnz retint_careful |
921 | 924 | ||
922 | retint_swapgs: /* return to user-space */ | 925 | retint_swapgs: /* return to user-space */ |
923 | /* | 926 | /* |
924 | * The iretq could re-enable interrupts: | 927 | * The iretq could re-enable interrupts: |
925 | */ | 928 | */ |
926 | DISABLE_INTERRUPTS(CLBR_ANY) | 929 | DISABLE_INTERRUPTS(CLBR_ANY) |
927 | TRACE_IRQS_IRETQ | 930 | TRACE_IRQS_IRETQ |
928 | SWAPGS | 931 | SWAPGS |
929 | jmp restore_args | 932 | jmp restore_args |
930 | 933 | ||
931 | retint_restore_args: /* return to kernel space */ | 934 | retint_restore_args: /* return to kernel space */ |
932 | DISABLE_INTERRUPTS(CLBR_ANY) | 935 | DISABLE_INTERRUPTS(CLBR_ANY) |
933 | /* | 936 | /* |
934 | * The iretq could re-enable interrupts: | 937 | * The iretq could re-enable interrupts: |
935 | */ | 938 | */ |
936 | TRACE_IRQS_IRETQ | 939 | TRACE_IRQS_IRETQ |
937 | restore_args: | 940 | restore_args: |
938 | RESTORE_ARGS 1,8,1 | 941 | RESTORE_ARGS 1,8,1 |
939 | 942 | ||
940 | irq_return: | 943 | irq_return: |
941 | INTERRUPT_RETURN | 944 | INTERRUPT_RETURN |
942 | _ASM_EXTABLE(irq_return, bad_iret) | 945 | _ASM_EXTABLE(irq_return, bad_iret) |
943 | 946 | ||
944 | #ifdef CONFIG_PARAVIRT | 947 | #ifdef CONFIG_PARAVIRT |
945 | ENTRY(native_iret) | 948 | ENTRY(native_iret) |
946 | iretq | 949 | iretq |
947 | _ASM_EXTABLE(native_iret, bad_iret) | 950 | _ASM_EXTABLE(native_iret, bad_iret) |
948 | #endif | 951 | #endif |
949 | 952 | ||
950 | .section .fixup,"ax" | 953 | .section .fixup,"ax" |
951 | bad_iret: | 954 | bad_iret: |
952 | /* | 955 | /* |
953 | * The iret traps when the %cs or %ss being restored is bogus. | 956 | * The iret traps when the %cs or %ss being restored is bogus. |
954 | * We've lost the original trap vector and error code. | 957 | * We've lost the original trap vector and error code. |
955 | * #GPF is the most likely one to get for an invalid selector. | 958 | * #GPF is the most likely one to get for an invalid selector. |
956 | * So pretend we completed the iret and took the #GPF in user mode. | 959 | * So pretend we completed the iret and took the #GPF in user mode. |
957 | * | 960 | * |
958 | * We are now running with the kernel GS after exception recovery. | 961 | * We are now running with the kernel GS after exception recovery. |
959 | * But error_entry expects us to have user GS to match the user %cs, | 962 | * But error_entry expects us to have user GS to match the user %cs, |
960 | * so swap back. | 963 | * so swap back. |
961 | */ | 964 | */ |
962 | pushq $0 | 965 | pushq $0 |
963 | 966 | ||
964 | SWAPGS | 967 | SWAPGS |
965 | jmp general_protection | 968 | jmp general_protection |
966 | 969 | ||
967 | .previous | 970 | .previous |
968 | 971 | ||
969 | /* edi: workmask, edx: work */ | 972 | /* edi: workmask, edx: work */ |
970 | retint_careful: | 973 | retint_careful: |
971 | CFI_RESTORE_STATE | 974 | CFI_RESTORE_STATE |
972 | bt $TIF_NEED_RESCHED,%edx | 975 | bt $TIF_NEED_RESCHED,%edx |
973 | jnc retint_signal | 976 | jnc retint_signal |
974 | TRACE_IRQS_ON | 977 | TRACE_IRQS_ON |
975 | ENABLE_INTERRUPTS(CLBR_NONE) | 978 | ENABLE_INTERRUPTS(CLBR_NONE) |
976 | pushq_cfi %rdi | 979 | pushq_cfi %rdi |
977 | call schedule | 980 | call schedule |
978 | popq_cfi %rdi | 981 | popq_cfi %rdi |
979 | GET_THREAD_INFO(%rcx) | 982 | GET_THREAD_INFO(%rcx) |
980 | DISABLE_INTERRUPTS(CLBR_NONE) | 983 | DISABLE_INTERRUPTS(CLBR_NONE) |
981 | TRACE_IRQS_OFF | 984 | TRACE_IRQS_OFF |
982 | jmp retint_check | 985 | jmp retint_check |
983 | 986 | ||
984 | retint_signal: | 987 | retint_signal: |
985 | testl $_TIF_DO_NOTIFY_MASK,%edx | 988 | testl $_TIF_DO_NOTIFY_MASK,%edx |
986 | jz retint_swapgs | 989 | jz retint_swapgs |
987 | TRACE_IRQS_ON | 990 | TRACE_IRQS_ON |
988 | ENABLE_INTERRUPTS(CLBR_NONE) | 991 | ENABLE_INTERRUPTS(CLBR_NONE) |
989 | SAVE_REST | 992 | SAVE_REST |
990 | movq $-1,ORIG_RAX(%rsp) | 993 | movq $-1,ORIG_RAX(%rsp) |
991 | xorl %esi,%esi # oldset | 994 | xorl %esi,%esi # oldset |
992 | movq %rsp,%rdi # &pt_regs | 995 | movq %rsp,%rdi # &pt_regs |
993 | call do_notify_resume | 996 | call do_notify_resume |
994 | RESTORE_REST | 997 | RESTORE_REST |
995 | DISABLE_INTERRUPTS(CLBR_NONE) | 998 | DISABLE_INTERRUPTS(CLBR_NONE) |
996 | TRACE_IRQS_OFF | 999 | TRACE_IRQS_OFF |
997 | GET_THREAD_INFO(%rcx) | 1000 | GET_THREAD_INFO(%rcx) |
998 | jmp retint_with_reschedule | 1001 | jmp retint_with_reschedule |
999 | 1002 | ||
1000 | #ifdef CONFIG_PREEMPT | 1003 | #ifdef CONFIG_PREEMPT |
1001 | /* Returning to kernel space. Check if we need preemption */ | 1004 | /* Returning to kernel space. Check if we need preemption */ |
1002 | /* rcx: threadinfo. interrupts off. */ | 1005 | /* rcx: threadinfo. interrupts off. */ |
1003 | ENTRY(retint_kernel) | 1006 | ENTRY(retint_kernel) |
1004 | cmpl $0,TI_preempt_count(%rcx) | 1007 | cmpl $0,TI_preempt_count(%rcx) |
1005 | jnz retint_restore_args | 1008 | jnz retint_restore_args |
1006 | bt $TIF_NEED_RESCHED,TI_flags(%rcx) | 1009 | bt $TIF_NEED_RESCHED,TI_flags(%rcx) |
1007 | jnc retint_restore_args | 1010 | jnc retint_restore_args |
1008 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ | 1011 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ |
1009 | jnc retint_restore_args | 1012 | jnc retint_restore_args |
1010 | call preempt_schedule_irq | 1013 | call preempt_schedule_irq |
1011 | jmp exit_intr | 1014 | jmp exit_intr |
1012 | #endif | 1015 | #endif |
1013 | 1016 | ||
1014 | CFI_ENDPROC | 1017 | CFI_ENDPROC |
1015 | END(common_interrupt) | 1018 | END(common_interrupt) |
1016 | /* | 1019 | /* |
1017 | * End of kprobes section | 1020 | * End of kprobes section |
1018 | */ | 1021 | */ |
1019 | .popsection | 1022 | .popsection |
1020 | 1023 | ||
1021 | /* | 1024 | /* |
1022 | * APIC interrupts. | 1025 | * APIC interrupts. |
1023 | */ | 1026 | */ |
1024 | .macro apicinterrupt num sym do_sym | 1027 | .macro apicinterrupt num sym do_sym |
1025 | ENTRY(\sym) | 1028 | ENTRY(\sym) |
1029 | ASM_CLAC | ||
1026 | INTR_FRAME | 1030 | INTR_FRAME |
1027 | pushq_cfi $~(\num) | 1031 | pushq_cfi $~(\num) |
1028 | .Lcommon_\sym: | 1032 | .Lcommon_\sym: |
1029 | interrupt \do_sym | 1033 | interrupt \do_sym |
1030 | jmp ret_from_intr | 1034 | jmp ret_from_intr |
1031 | CFI_ENDPROC | 1035 | CFI_ENDPROC |
1032 | END(\sym) | 1036 | END(\sym) |
1033 | .endm | 1037 | .endm |
1034 | 1038 | ||
1035 | #ifdef CONFIG_SMP | 1039 | #ifdef CONFIG_SMP |
1036 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ | 1040 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ |
1037 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt | 1041 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
1038 | apicinterrupt REBOOT_VECTOR \ | 1042 | apicinterrupt REBOOT_VECTOR \ |
1039 | reboot_interrupt smp_reboot_interrupt | 1043 | reboot_interrupt smp_reboot_interrupt |
1040 | #endif | 1044 | #endif |
1041 | 1045 | ||
1042 | #ifdef CONFIG_X86_UV | 1046 | #ifdef CONFIG_X86_UV |
1043 | apicinterrupt UV_BAU_MESSAGE \ | 1047 | apicinterrupt UV_BAU_MESSAGE \ |
1044 | uv_bau_message_intr1 uv_bau_message_interrupt | 1048 | uv_bau_message_intr1 uv_bau_message_interrupt |
1045 | #endif | 1049 | #endif |
1046 | apicinterrupt LOCAL_TIMER_VECTOR \ | 1050 | apicinterrupt LOCAL_TIMER_VECTOR \ |
1047 | apic_timer_interrupt smp_apic_timer_interrupt | 1051 | apic_timer_interrupt smp_apic_timer_interrupt |
1048 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1052 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
1049 | x86_platform_ipi smp_x86_platform_ipi | 1053 | x86_platform_ipi smp_x86_platform_ipi |
1050 | 1054 | ||
1051 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1055 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
1052 | threshold_interrupt smp_threshold_interrupt | 1056 | threshold_interrupt smp_threshold_interrupt |
1053 | apicinterrupt THERMAL_APIC_VECTOR \ | 1057 | apicinterrupt THERMAL_APIC_VECTOR \ |
1054 | thermal_interrupt smp_thermal_interrupt | 1058 | thermal_interrupt smp_thermal_interrupt |
1055 | 1059 | ||
1056 | #ifdef CONFIG_SMP | 1060 | #ifdef CONFIG_SMP |
1057 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ | 1061 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ |
1058 | call_function_single_interrupt smp_call_function_single_interrupt | 1062 | call_function_single_interrupt smp_call_function_single_interrupt |
1059 | apicinterrupt CALL_FUNCTION_VECTOR \ | 1063 | apicinterrupt CALL_FUNCTION_VECTOR \ |
1060 | call_function_interrupt smp_call_function_interrupt | 1064 | call_function_interrupt smp_call_function_interrupt |
1061 | apicinterrupt RESCHEDULE_VECTOR \ | 1065 | apicinterrupt RESCHEDULE_VECTOR \ |
1062 | reschedule_interrupt smp_reschedule_interrupt | 1066 | reschedule_interrupt smp_reschedule_interrupt |
1063 | #endif | 1067 | #endif |
1064 | 1068 | ||
1065 | apicinterrupt ERROR_APIC_VECTOR \ | 1069 | apicinterrupt ERROR_APIC_VECTOR \ |
1066 | error_interrupt smp_error_interrupt | 1070 | error_interrupt smp_error_interrupt |
1067 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 1071 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1068 | spurious_interrupt smp_spurious_interrupt | 1072 | spurious_interrupt smp_spurious_interrupt |
1069 | 1073 | ||
1070 | #ifdef CONFIG_IRQ_WORK | 1074 | #ifdef CONFIG_IRQ_WORK |
1071 | apicinterrupt IRQ_WORK_VECTOR \ | 1075 | apicinterrupt IRQ_WORK_VECTOR \ |
1072 | irq_work_interrupt smp_irq_work_interrupt | 1076 | irq_work_interrupt smp_irq_work_interrupt |
1073 | #endif | 1077 | #endif |
1074 | 1078 | ||
1075 | /* | 1079 | /* |
1076 | * Exception entry points. | 1080 | * Exception entry points. |
1077 | */ | 1081 | */ |
1078 | .macro zeroentry sym do_sym | 1082 | .macro zeroentry sym do_sym |
1079 | ENTRY(\sym) | 1083 | ENTRY(\sym) |
1084 | ASM_CLAC | ||
1080 | INTR_FRAME | 1085 | INTR_FRAME |
1081 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1086 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1082 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1087 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1083 | subq $ORIG_RAX-R15, %rsp | 1088 | subq $ORIG_RAX-R15, %rsp |
1084 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1089 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1085 | call error_entry | 1090 | call error_entry |
1086 | DEFAULT_FRAME 0 | 1091 | DEFAULT_FRAME 0 |
1087 | movq %rsp,%rdi /* pt_regs pointer */ | 1092 | movq %rsp,%rdi /* pt_regs pointer */ |
1088 | xorl %esi,%esi /* no error code */ | 1093 | xorl %esi,%esi /* no error code */ |
1089 | call \do_sym | 1094 | call \do_sym |
1090 | jmp error_exit /* %ebx: no swapgs flag */ | 1095 | jmp error_exit /* %ebx: no swapgs flag */ |
1091 | CFI_ENDPROC | 1096 | CFI_ENDPROC |
1092 | END(\sym) | 1097 | END(\sym) |
1093 | .endm | 1098 | .endm |
1094 | 1099 | ||
1095 | .macro paranoidzeroentry sym do_sym | 1100 | .macro paranoidzeroentry sym do_sym |
1096 | ENTRY(\sym) | 1101 | ENTRY(\sym) |
1102 | ASM_CLAC | ||
1097 | INTR_FRAME | 1103 | INTR_FRAME |
1098 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1104 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1099 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1105 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1100 | subq $ORIG_RAX-R15, %rsp | 1106 | subq $ORIG_RAX-R15, %rsp |
1101 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1107 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1102 | call save_paranoid | 1108 | call save_paranoid |
1103 | TRACE_IRQS_OFF | 1109 | TRACE_IRQS_OFF |
1104 | movq %rsp,%rdi /* pt_regs pointer */ | 1110 | movq %rsp,%rdi /* pt_regs pointer */ |
1105 | xorl %esi,%esi /* no error code */ | 1111 | xorl %esi,%esi /* no error code */ |
1106 | call \do_sym | 1112 | call \do_sym |
1107 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1113 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1108 | CFI_ENDPROC | 1114 | CFI_ENDPROC |
1109 | END(\sym) | 1115 | END(\sym) |
1110 | .endm | 1116 | .endm |
1111 | 1117 | ||
1112 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) | 1118 | #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) |
1113 | .macro paranoidzeroentry_ist sym do_sym ist | 1119 | .macro paranoidzeroentry_ist sym do_sym ist |
1114 | ENTRY(\sym) | 1120 | ENTRY(\sym) |
1121 | ASM_CLAC | ||
1115 | INTR_FRAME | 1122 | INTR_FRAME |
1116 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1123 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1117 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1124 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1118 | subq $ORIG_RAX-R15, %rsp | 1125 | subq $ORIG_RAX-R15, %rsp |
1119 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1126 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1120 | call save_paranoid | 1127 | call save_paranoid |
1121 | TRACE_IRQS_OFF_DEBUG | 1128 | TRACE_IRQS_OFF_DEBUG |
1122 | movq %rsp,%rdi /* pt_regs pointer */ | 1129 | movq %rsp,%rdi /* pt_regs pointer */ |
1123 | xorl %esi,%esi /* no error code */ | 1130 | xorl %esi,%esi /* no error code */ |
1124 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1131 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
1125 | call \do_sym | 1132 | call \do_sym |
1126 | addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1133 | addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
1127 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1134 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1128 | CFI_ENDPROC | 1135 | CFI_ENDPROC |
1129 | END(\sym) | 1136 | END(\sym) |
1130 | .endm | 1137 | .endm |
1131 | 1138 | ||
1132 | .macro errorentry sym do_sym | 1139 | .macro errorentry sym do_sym |
1133 | ENTRY(\sym) | 1140 | ENTRY(\sym) |
1141 | ASM_CLAC | ||
1134 | XCPT_FRAME | 1142 | XCPT_FRAME |
1135 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1143 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1136 | subq $ORIG_RAX-R15, %rsp | 1144 | subq $ORIG_RAX-R15, %rsp |
1137 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1145 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1138 | call error_entry | 1146 | call error_entry |
1139 | DEFAULT_FRAME 0 | 1147 | DEFAULT_FRAME 0 |
1140 | movq %rsp,%rdi /* pt_regs pointer */ | 1148 | movq %rsp,%rdi /* pt_regs pointer */ |
1141 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | 1149 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
1142 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ | 1150 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
1143 | call \do_sym | 1151 | call \do_sym |
1144 | jmp error_exit /* %ebx: no swapgs flag */ | 1152 | jmp error_exit /* %ebx: no swapgs flag */ |
1145 | CFI_ENDPROC | 1153 | CFI_ENDPROC |
1146 | END(\sym) | 1154 | END(\sym) |
1147 | .endm | 1155 | .endm |
1148 | 1156 | ||
1149 | /* error code is on the stack already */ | 1157 | /* error code is on the stack already */ |
1150 | .macro paranoiderrorentry sym do_sym | 1158 | .macro paranoiderrorentry sym do_sym |
1151 | ENTRY(\sym) | 1159 | ENTRY(\sym) |
1160 | ASM_CLAC | ||
1152 | XCPT_FRAME | 1161 | XCPT_FRAME |
1153 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1162 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1154 | subq $ORIG_RAX-R15, %rsp | 1163 | subq $ORIG_RAX-R15, %rsp |
1155 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1164 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1156 | call save_paranoid | 1165 | call save_paranoid |
1157 | DEFAULT_FRAME 0 | 1166 | DEFAULT_FRAME 0 |
1158 | TRACE_IRQS_OFF | 1167 | TRACE_IRQS_OFF |
1159 | movq %rsp,%rdi /* pt_regs pointer */ | 1168 | movq %rsp,%rdi /* pt_regs pointer */ |
1160 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | 1169 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
1161 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ | 1170 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
1162 | call \do_sym | 1171 | call \do_sym |
1163 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1172 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1164 | CFI_ENDPROC | 1173 | CFI_ENDPROC |
1165 | END(\sym) | 1174 | END(\sym) |
1166 | .endm | 1175 | .endm |
1167 | 1176 | ||
1168 | zeroentry divide_error do_divide_error | 1177 | zeroentry divide_error do_divide_error |
1169 | zeroentry overflow do_overflow | 1178 | zeroentry overflow do_overflow |
1170 | zeroentry bounds do_bounds | 1179 | zeroentry bounds do_bounds |
1171 | zeroentry invalid_op do_invalid_op | 1180 | zeroentry invalid_op do_invalid_op |
1172 | zeroentry device_not_available do_device_not_available | 1181 | zeroentry device_not_available do_device_not_available |
1173 | paranoiderrorentry double_fault do_double_fault | 1182 | paranoiderrorentry double_fault do_double_fault |
1174 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun | 1183 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun |
1175 | errorentry invalid_TSS do_invalid_TSS | 1184 | errorentry invalid_TSS do_invalid_TSS |
1176 | errorentry segment_not_present do_segment_not_present | 1185 | errorentry segment_not_present do_segment_not_present |
1177 | zeroentry spurious_interrupt_bug do_spurious_interrupt_bug | 1186 | zeroentry spurious_interrupt_bug do_spurious_interrupt_bug |
1178 | zeroentry coprocessor_error do_coprocessor_error | 1187 | zeroentry coprocessor_error do_coprocessor_error |
1179 | errorentry alignment_check do_alignment_check | 1188 | errorentry alignment_check do_alignment_check |
1180 | zeroentry simd_coprocessor_error do_simd_coprocessor_error | 1189 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1181 | 1190 | ||
1182 | 1191 | ||
1183 | /* Reload gs selector with exception handling */ | 1192 | /* Reload gs selector with exception handling */ |
1184 | /* edi: new selector */ | 1193 | /* edi: new selector */ |
1185 | ENTRY(native_load_gs_index) | 1194 | ENTRY(native_load_gs_index) |
1186 | CFI_STARTPROC | 1195 | CFI_STARTPROC |
1187 | pushfq_cfi | 1196 | pushfq_cfi |
1188 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) | 1197 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) |
1189 | SWAPGS | 1198 | SWAPGS |
1190 | gs_change: | 1199 | gs_change: |
1191 | movl %edi,%gs | 1200 | movl %edi,%gs |
1192 | 2: mfence /* workaround */ | 1201 | 2: mfence /* workaround */ |
1193 | SWAPGS | 1202 | SWAPGS |
1194 | popfq_cfi | 1203 | popfq_cfi |
1195 | ret | 1204 | ret |
1196 | CFI_ENDPROC | 1205 | CFI_ENDPROC |
1197 | END(native_load_gs_index) | 1206 | END(native_load_gs_index) |
1198 | 1207 | ||
1199 | _ASM_EXTABLE(gs_change,bad_gs) | 1208 | _ASM_EXTABLE(gs_change,bad_gs) |
1200 | .section .fixup,"ax" | 1209 | .section .fixup,"ax" |
1201 | /* running with kernelgs */ | 1210 | /* running with kernelgs */ |
1202 | bad_gs: | 1211 | bad_gs: |
1203 | SWAPGS /* switch back to user gs */ | 1212 | SWAPGS /* switch back to user gs */ |
1204 | xorl %eax,%eax | 1213 | xorl %eax,%eax |
1205 | movl %eax,%gs | 1214 | movl %eax,%gs |
1206 | jmp 2b | 1215 | jmp 2b |
1207 | .previous | 1216 | .previous |
1208 | 1217 | ||
1209 | ENTRY(kernel_thread_helper) | 1218 | ENTRY(kernel_thread_helper) |
1210 | pushq $0 # fake return address | 1219 | pushq $0 # fake return address |
1211 | CFI_STARTPROC | 1220 | CFI_STARTPROC |
1212 | /* | 1221 | /* |
1213 | * Here we are in the child and the registers are set as they were | 1222 | * Here we are in the child and the registers are set as they were |
1214 | * at kernel_thread() invocation in the parent. | 1223 | * at kernel_thread() invocation in the parent. |
1215 | */ | 1224 | */ |
1216 | call *%rsi | 1225 | call *%rsi |
1217 | # exit | 1226 | # exit |
1218 | mov %eax, %edi | 1227 | mov %eax, %edi |
1219 | call do_exit | 1228 | call do_exit |
1220 | ud2 # padding for call trace | 1229 | ud2 # padding for call trace |
1221 | CFI_ENDPROC | 1230 | CFI_ENDPROC |
1222 | END(kernel_thread_helper) | 1231 | END(kernel_thread_helper) |
1223 | 1232 | ||
1224 | /* | 1233 | /* |
1225 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 1234 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
1226 | * | 1235 | * |
1227 | * C extern interface: | 1236 | * C extern interface: |
1228 | * extern long execve(const char *name, char **argv, char **envp) | 1237 | * extern long execve(const char *name, char **argv, char **envp) |
1229 | * | 1238 | * |
1230 | * asm input arguments: | 1239 | * asm input arguments: |
1231 | * rdi: name, rsi: argv, rdx: envp | 1240 | * rdi: name, rsi: argv, rdx: envp |
1232 | * | 1241 | * |
1233 | * We want to fallback into: | 1242 | * We want to fallback into: |
1234 | * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) | 1243 | * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) |
1235 | * | 1244 | * |
1236 | * do_sys_execve asm fallback arguments: | 1245 | * do_sys_execve asm fallback arguments: |
1237 | * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack | 1246 | * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack |
1238 | */ | 1247 | */ |
1239 | ENTRY(kernel_execve) | 1248 | ENTRY(kernel_execve) |
1240 | CFI_STARTPROC | 1249 | CFI_STARTPROC |
1241 | FAKE_STACK_FRAME $0 | 1250 | FAKE_STACK_FRAME $0 |
1242 | SAVE_ALL | 1251 | SAVE_ALL |
1243 | movq %rsp,%rcx | 1252 | movq %rsp,%rcx |
1244 | call sys_execve | 1253 | call sys_execve |
1245 | movq %rax, RAX(%rsp) | 1254 | movq %rax, RAX(%rsp) |
1246 | RESTORE_REST | 1255 | RESTORE_REST |
1247 | testq %rax,%rax | 1256 | testq %rax,%rax |
1248 | je int_ret_from_sys_call | 1257 | je int_ret_from_sys_call |
1249 | RESTORE_ARGS | 1258 | RESTORE_ARGS |
1250 | UNFAKE_STACK_FRAME | 1259 | UNFAKE_STACK_FRAME |
1251 | ret | 1260 | ret |
1252 | CFI_ENDPROC | 1261 | CFI_ENDPROC |
1253 | END(kernel_execve) | 1262 | END(kernel_execve) |
1254 | 1263 | ||
1255 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1264 | /* Call softirq on interrupt stack. Interrupts are off. */ |
1256 | ENTRY(call_softirq) | 1265 | ENTRY(call_softirq) |
1257 | CFI_STARTPROC | 1266 | CFI_STARTPROC |
1258 | pushq_cfi %rbp | 1267 | pushq_cfi %rbp |
1259 | CFI_REL_OFFSET rbp,0 | 1268 | CFI_REL_OFFSET rbp,0 |
1260 | mov %rsp,%rbp | 1269 | mov %rsp,%rbp |
1261 | CFI_DEF_CFA_REGISTER rbp | 1270 | CFI_DEF_CFA_REGISTER rbp |
1262 | incl PER_CPU_VAR(irq_count) | 1271 | incl PER_CPU_VAR(irq_count) |
1263 | cmove PER_CPU_VAR(irq_stack_ptr),%rsp | 1272 | cmove PER_CPU_VAR(irq_stack_ptr),%rsp |
1264 | push %rbp # backlink for old unwinder | 1273 | push %rbp # backlink for old unwinder |
1265 | call __do_softirq | 1274 | call __do_softirq |
1266 | leaveq | 1275 | leaveq |
1267 | CFI_RESTORE rbp | 1276 | CFI_RESTORE rbp |
1268 | CFI_DEF_CFA_REGISTER rsp | 1277 | CFI_DEF_CFA_REGISTER rsp |
1269 | CFI_ADJUST_CFA_OFFSET -8 | 1278 | CFI_ADJUST_CFA_OFFSET -8 |
1270 | decl PER_CPU_VAR(irq_count) | 1279 | decl PER_CPU_VAR(irq_count) |
1271 | ret | 1280 | ret |
1272 | CFI_ENDPROC | 1281 | CFI_ENDPROC |
1273 | END(call_softirq) | 1282 | END(call_softirq) |
1274 | 1283 | ||
1275 | #ifdef CONFIG_XEN | 1284 | #ifdef CONFIG_XEN |
1276 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback | 1285 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback |
1277 | 1286 | ||
1278 | /* | 1287 | /* |
1279 | * A note on the "critical region" in our callback handler. | 1288 | * A note on the "critical region" in our callback handler. |
1280 | * We want to avoid stacking callback handlers due to events occurring | 1289 | * We want to avoid stacking callback handlers due to events occurring |
1281 | * during handling of the last event. To do this, we keep events disabled | 1290 | * during handling of the last event. To do this, we keep events disabled |
1282 | * until we've done all processing. HOWEVER, we must enable events before | 1291 | * until we've done all processing. HOWEVER, we must enable events before |
1283 | * popping the stack frame (can't be done atomically) and so it would still | 1292 | * popping the stack frame (can't be done atomically) and so it would still |
1284 | * be possible to get enough handler activations to overflow the stack. | 1293 | * be possible to get enough handler activations to overflow the stack. |
1285 | * Although unlikely, bugs of that kind are hard to track down, so we'd | 1294 | * Although unlikely, bugs of that kind are hard to track down, so we'd |
1286 | * like to avoid the possibility. | 1295 | * like to avoid the possibility. |
1287 | * So, on entry to the handler we detect whether we interrupted an | 1296 | * So, on entry to the handler we detect whether we interrupted an |
1288 | * existing activation in its critical region -- if so, we pop the current | 1297 | * existing activation in its critical region -- if so, we pop the current |
1289 | * activation and restart the handler using the previous one. | 1298 | * activation and restart the handler using the previous one. |
1290 | */ | 1299 | */ |
1291 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | 1300 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) |
1292 | CFI_STARTPROC | 1301 | CFI_STARTPROC |
1293 | /* | 1302 | /* |
1294 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | 1303 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will |
1295 | * see the correct pointer to the pt_regs | 1304 | * see the correct pointer to the pt_regs |
1296 | */ | 1305 | */ |
1297 | movq %rdi, %rsp # we don't return, adjust the stack frame | 1306 | movq %rdi, %rsp # we don't return, adjust the stack frame |
1298 | CFI_ENDPROC | 1307 | CFI_ENDPROC |
1299 | DEFAULT_FRAME | 1308 | DEFAULT_FRAME |
1300 | 11: incl PER_CPU_VAR(irq_count) | 1309 | 11: incl PER_CPU_VAR(irq_count) |
1301 | movq %rsp,%rbp | 1310 | movq %rsp,%rbp |
1302 | CFI_DEF_CFA_REGISTER rbp | 1311 | CFI_DEF_CFA_REGISTER rbp |
1303 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp | 1312 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp |
1304 | pushq %rbp # backlink for old unwinder | 1313 | pushq %rbp # backlink for old unwinder |
1305 | call xen_evtchn_do_upcall | 1314 | call xen_evtchn_do_upcall |
1306 | popq %rsp | 1315 | popq %rsp |
1307 | CFI_DEF_CFA_REGISTER rsp | 1316 | CFI_DEF_CFA_REGISTER rsp |
1308 | decl PER_CPU_VAR(irq_count) | 1317 | decl PER_CPU_VAR(irq_count) |
1309 | jmp error_exit | 1318 | jmp error_exit |
1310 | CFI_ENDPROC | 1319 | CFI_ENDPROC |
1311 | END(xen_do_hypervisor_callback) | 1320 | END(xen_do_hypervisor_callback) |
1312 | 1321 | ||
1313 | /* | 1322 | /* |
1314 | * Hypervisor uses this for application faults while it executes. | 1323 | * Hypervisor uses this for application faults while it executes. |
1315 | * We get here for two reasons: | 1324 | * We get here for two reasons: |
1316 | * 1. Fault while reloading DS, ES, FS or GS | 1325 | * 1. Fault while reloading DS, ES, FS or GS |
1317 | * 2. Fault while executing IRET | 1326 | * 2. Fault while executing IRET |
1318 | * Category 1 we do not need to fix up as Xen has already reloaded all segment | 1327 | * Category 1 we do not need to fix up as Xen has already reloaded all segment |
1319 | * registers that could be reloaded and zeroed the others. | 1328 | * registers that could be reloaded and zeroed the others. |
1320 | * Category 2 we fix up by killing the current process. We cannot use the | 1329 | * Category 2 we fix up by killing the current process. We cannot use the |
1321 | * normal Linux return path in this case because if we use the IRET hypercall | 1330 | * normal Linux return path in this case because if we use the IRET hypercall |
1322 | * to pop the stack frame we end up in an infinite loop of failsafe callbacks. | 1331 | * to pop the stack frame we end up in an infinite loop of failsafe callbacks. |
1323 | * We distinguish between categories by comparing each saved segment register | 1332 | * We distinguish between categories by comparing each saved segment register |
1324 | * with its current contents: any discrepancy means we in category 1. | 1333 | * with its current contents: any discrepancy means we in category 1. |
1325 | */ | 1334 | */ |
1326 | ENTRY(xen_failsafe_callback) | 1335 | ENTRY(xen_failsafe_callback) |
1327 | INTR_FRAME 1 (6*8) | 1336 | INTR_FRAME 1 (6*8) |
1328 | /*CFI_REL_OFFSET gs,GS*/ | 1337 | /*CFI_REL_OFFSET gs,GS*/ |
1329 | /*CFI_REL_OFFSET fs,FS*/ | 1338 | /*CFI_REL_OFFSET fs,FS*/ |
1330 | /*CFI_REL_OFFSET es,ES*/ | 1339 | /*CFI_REL_OFFSET es,ES*/ |
1331 | /*CFI_REL_OFFSET ds,DS*/ | 1340 | /*CFI_REL_OFFSET ds,DS*/ |
1332 | CFI_REL_OFFSET r11,8 | 1341 | CFI_REL_OFFSET r11,8 |
1333 | CFI_REL_OFFSET rcx,0 | 1342 | CFI_REL_OFFSET rcx,0 |
1334 | movw %ds,%cx | 1343 | movw %ds,%cx |
1335 | cmpw %cx,0x10(%rsp) | 1344 | cmpw %cx,0x10(%rsp) |
1336 | CFI_REMEMBER_STATE | 1345 | CFI_REMEMBER_STATE |
1337 | jne 1f | 1346 | jne 1f |
1338 | movw %es,%cx | 1347 | movw %es,%cx |
1339 | cmpw %cx,0x18(%rsp) | 1348 | cmpw %cx,0x18(%rsp) |
1340 | jne 1f | 1349 | jne 1f |
1341 | movw %fs,%cx | 1350 | movw %fs,%cx |
1342 | cmpw %cx,0x20(%rsp) | 1351 | cmpw %cx,0x20(%rsp) |
1343 | jne 1f | 1352 | jne 1f |
1344 | movw %gs,%cx | 1353 | movw %gs,%cx |
1345 | cmpw %cx,0x28(%rsp) | 1354 | cmpw %cx,0x28(%rsp) |
1346 | jne 1f | 1355 | jne 1f |
1347 | /* All segments match their saved values => Category 2 (Bad IRET). */ | 1356 | /* All segments match their saved values => Category 2 (Bad IRET). */ |
1348 | movq (%rsp),%rcx | 1357 | movq (%rsp),%rcx |
1349 | CFI_RESTORE rcx | 1358 | CFI_RESTORE rcx |
1350 | movq 8(%rsp),%r11 | 1359 | movq 8(%rsp),%r11 |
1351 | CFI_RESTORE r11 | 1360 | CFI_RESTORE r11 |
1352 | addq $0x30,%rsp | 1361 | addq $0x30,%rsp |
1353 | CFI_ADJUST_CFA_OFFSET -0x30 | 1362 | CFI_ADJUST_CFA_OFFSET -0x30 |
1354 | pushq_cfi $0 /* RIP */ | 1363 | pushq_cfi $0 /* RIP */ |
1355 | pushq_cfi %r11 | 1364 | pushq_cfi %r11 |
1356 | pushq_cfi %rcx | 1365 | pushq_cfi %rcx |
1357 | jmp general_protection | 1366 | jmp general_protection |
1358 | CFI_RESTORE_STATE | 1367 | CFI_RESTORE_STATE |
1359 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | 1368 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ |
1360 | movq (%rsp),%rcx | 1369 | movq (%rsp),%rcx |
1361 | CFI_RESTORE rcx | 1370 | CFI_RESTORE rcx |
1362 | movq 8(%rsp),%r11 | 1371 | movq 8(%rsp),%r11 |
1363 | CFI_RESTORE r11 | 1372 | CFI_RESTORE r11 |
1364 | addq $0x30,%rsp | 1373 | addq $0x30,%rsp |
1365 | CFI_ADJUST_CFA_OFFSET -0x30 | 1374 | CFI_ADJUST_CFA_OFFSET -0x30 |
1366 | pushq_cfi $0 | 1375 | pushq_cfi $0 |
1367 | SAVE_ALL | 1376 | SAVE_ALL |
1368 | jmp error_exit | 1377 | jmp error_exit |
1369 | CFI_ENDPROC | 1378 | CFI_ENDPROC |
1370 | END(xen_failsafe_callback) | 1379 | END(xen_failsafe_callback) |
1371 | 1380 | ||
1372 | apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ | 1381 | apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ |
1373 | xen_hvm_callback_vector xen_evtchn_do_upcall | 1382 | xen_hvm_callback_vector xen_evtchn_do_upcall |
1374 | 1383 | ||
1375 | #endif /* CONFIG_XEN */ | 1384 | #endif /* CONFIG_XEN */ |
1376 | 1385 | ||
1377 | /* | 1386 | /* |
1378 | * Some functions should be protected against kprobes | 1387 | * Some functions should be protected against kprobes |
1379 | */ | 1388 | */ |
1380 | .pushsection .kprobes.text, "ax" | 1389 | .pushsection .kprobes.text, "ax" |
1381 | 1390 | ||
1382 | paranoidzeroentry_ist debug do_debug DEBUG_STACK | 1391 | paranoidzeroentry_ist debug do_debug DEBUG_STACK |
1383 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK | 1392 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK |
1384 | paranoiderrorentry stack_segment do_stack_segment | 1393 | paranoiderrorentry stack_segment do_stack_segment |
1385 | #ifdef CONFIG_XEN | 1394 | #ifdef CONFIG_XEN |
1386 | zeroentry xen_debug do_debug | 1395 | zeroentry xen_debug do_debug |
1387 | zeroentry xen_int3 do_int3 | 1396 | zeroentry xen_int3 do_int3 |
1388 | errorentry xen_stack_segment do_stack_segment | 1397 | errorentry xen_stack_segment do_stack_segment |
1389 | #endif | 1398 | #endif |
1390 | errorentry general_protection do_general_protection | 1399 | errorentry general_protection do_general_protection |
1391 | errorentry page_fault do_page_fault | 1400 | errorentry page_fault do_page_fault |
1392 | #ifdef CONFIG_KVM_GUEST | 1401 | #ifdef CONFIG_KVM_GUEST |
1393 | errorentry async_page_fault do_async_page_fault | 1402 | errorentry async_page_fault do_async_page_fault |
1394 | #endif | 1403 | #endif |
1395 | #ifdef CONFIG_X86_MCE | 1404 | #ifdef CONFIG_X86_MCE |
1396 | paranoidzeroentry machine_check *machine_check_vector(%rip) | 1405 | paranoidzeroentry machine_check *machine_check_vector(%rip) |
1397 | #endif | 1406 | #endif |
1398 | 1407 | ||
1399 | /* | 1408 | /* |
1400 | * "Paranoid" exit path from exception stack. | 1409 | * "Paranoid" exit path from exception stack. |
1401 | * Paranoid because this is used by NMIs and cannot take | 1410 | * Paranoid because this is used by NMIs and cannot take |
1402 | * any kernel state for granted. | 1411 | * any kernel state for granted. |
1403 | * We don't do kernel preemption checks here, because only | 1412 | * We don't do kernel preemption checks here, because only |
1404 | * NMI should be common and it does not enable IRQs and | 1413 | * NMI should be common and it does not enable IRQs and |
1405 | * cannot get reschedule ticks. | 1414 | * cannot get reschedule ticks. |
1406 | * | 1415 | * |
1407 | * "trace" is 0 for the NMI handler only, because irq-tracing | 1416 | * "trace" is 0 for the NMI handler only, because irq-tracing |
1408 | * is fundamentally NMI-unsafe. (we cannot change the soft and | 1417 | * is fundamentally NMI-unsafe. (we cannot change the soft and |
1409 | * hard flags at once, atomically) | 1418 | * hard flags at once, atomically) |
1410 | */ | 1419 | */ |
1411 | 1420 | ||
1412 | /* ebx: no swapgs flag */ | 1421 | /* ebx: no swapgs flag */ |
1413 | ENTRY(paranoid_exit) | 1422 | ENTRY(paranoid_exit) |
1414 | DEFAULT_FRAME | 1423 | DEFAULT_FRAME |
1415 | DISABLE_INTERRUPTS(CLBR_NONE) | 1424 | DISABLE_INTERRUPTS(CLBR_NONE) |
1416 | TRACE_IRQS_OFF_DEBUG | 1425 | TRACE_IRQS_OFF_DEBUG |
1417 | testl %ebx,%ebx /* swapgs needed? */ | 1426 | testl %ebx,%ebx /* swapgs needed? */ |
1418 | jnz paranoid_restore | 1427 | jnz paranoid_restore |
1419 | testl $3,CS(%rsp) | 1428 | testl $3,CS(%rsp) |
1420 | jnz paranoid_userspace | 1429 | jnz paranoid_userspace |
1421 | paranoid_swapgs: | 1430 | paranoid_swapgs: |
1422 | TRACE_IRQS_IRETQ 0 | 1431 | TRACE_IRQS_IRETQ 0 |
1423 | SWAPGS_UNSAFE_STACK | 1432 | SWAPGS_UNSAFE_STACK |
1424 | RESTORE_ALL 8 | 1433 | RESTORE_ALL 8 |
1425 | jmp irq_return | 1434 | jmp irq_return |
1426 | paranoid_restore: | 1435 | paranoid_restore: |
1427 | TRACE_IRQS_IRETQ_DEBUG 0 | 1436 | TRACE_IRQS_IRETQ_DEBUG 0 |
1428 | RESTORE_ALL 8 | 1437 | RESTORE_ALL 8 |
1429 | jmp irq_return | 1438 | jmp irq_return |
1430 | paranoid_userspace: | 1439 | paranoid_userspace: |
1431 | GET_THREAD_INFO(%rcx) | 1440 | GET_THREAD_INFO(%rcx) |
1432 | movl TI_flags(%rcx),%ebx | 1441 | movl TI_flags(%rcx),%ebx |
1433 | andl $_TIF_WORK_MASK,%ebx | 1442 | andl $_TIF_WORK_MASK,%ebx |
1434 | jz paranoid_swapgs | 1443 | jz paranoid_swapgs |
1435 | movq %rsp,%rdi /* &pt_regs */ | 1444 | movq %rsp,%rdi /* &pt_regs */ |
1436 | call sync_regs | 1445 | call sync_regs |
1437 | movq %rax,%rsp /* switch stack for scheduling */ | 1446 | movq %rax,%rsp /* switch stack for scheduling */ |
1438 | testl $_TIF_NEED_RESCHED,%ebx | 1447 | testl $_TIF_NEED_RESCHED,%ebx |
1439 | jnz paranoid_schedule | 1448 | jnz paranoid_schedule |
1440 | movl %ebx,%edx /* arg3: thread flags */ | 1449 | movl %ebx,%edx /* arg3: thread flags */ |
1441 | TRACE_IRQS_ON | 1450 | TRACE_IRQS_ON |
1442 | ENABLE_INTERRUPTS(CLBR_NONE) | 1451 | ENABLE_INTERRUPTS(CLBR_NONE) |
1443 | xorl %esi,%esi /* arg2: oldset */ | 1452 | xorl %esi,%esi /* arg2: oldset */ |
1444 | movq %rsp,%rdi /* arg1: &pt_regs */ | 1453 | movq %rsp,%rdi /* arg1: &pt_regs */ |
1445 | call do_notify_resume | 1454 | call do_notify_resume |
1446 | DISABLE_INTERRUPTS(CLBR_NONE) | 1455 | DISABLE_INTERRUPTS(CLBR_NONE) |
1447 | TRACE_IRQS_OFF | 1456 | TRACE_IRQS_OFF |
1448 | jmp paranoid_userspace | 1457 | jmp paranoid_userspace |
1449 | paranoid_schedule: | 1458 | paranoid_schedule: |
1450 | TRACE_IRQS_ON | 1459 | TRACE_IRQS_ON |
1451 | ENABLE_INTERRUPTS(CLBR_ANY) | 1460 | ENABLE_INTERRUPTS(CLBR_ANY) |
1452 | call schedule | 1461 | call schedule |
1453 | DISABLE_INTERRUPTS(CLBR_ANY) | 1462 | DISABLE_INTERRUPTS(CLBR_ANY) |
1454 | TRACE_IRQS_OFF | 1463 | TRACE_IRQS_OFF |
1455 | jmp paranoid_userspace | 1464 | jmp paranoid_userspace |
1456 | CFI_ENDPROC | 1465 | CFI_ENDPROC |
1457 | END(paranoid_exit) | 1466 | END(paranoid_exit) |
1458 | 1467 | ||
1459 | /* | 1468 | /* |
1460 | * Exception entry point. This expects an error code/orig_rax on the stack. | 1469 | * Exception entry point. This expects an error code/orig_rax on the stack. |
1461 | * returns in "no swapgs flag" in %ebx. | 1470 | * returns in "no swapgs flag" in %ebx. |
1462 | */ | 1471 | */ |
1463 | ENTRY(error_entry) | 1472 | ENTRY(error_entry) |
1464 | XCPT_FRAME | 1473 | XCPT_FRAME |
1465 | CFI_ADJUST_CFA_OFFSET 15*8 | 1474 | CFI_ADJUST_CFA_OFFSET 15*8 |
1466 | /* oldrax contains error code */ | 1475 | /* oldrax contains error code */ |
1467 | cld | 1476 | cld |
1468 | movq_cfi rdi, RDI+8 | 1477 | movq_cfi rdi, RDI+8 |
1469 | movq_cfi rsi, RSI+8 | 1478 | movq_cfi rsi, RSI+8 |
1470 | movq_cfi rdx, RDX+8 | 1479 | movq_cfi rdx, RDX+8 |
1471 | movq_cfi rcx, RCX+8 | 1480 | movq_cfi rcx, RCX+8 |
1472 | movq_cfi rax, RAX+8 | 1481 | movq_cfi rax, RAX+8 |
1473 | movq_cfi r8, R8+8 | 1482 | movq_cfi r8, R8+8 |
1474 | movq_cfi r9, R9+8 | 1483 | movq_cfi r9, R9+8 |
1475 | movq_cfi r10, R10+8 | 1484 | movq_cfi r10, R10+8 |
1476 | movq_cfi r11, R11+8 | 1485 | movq_cfi r11, R11+8 |
1477 | movq_cfi rbx, RBX+8 | 1486 | movq_cfi rbx, RBX+8 |
1478 | movq_cfi rbp, RBP+8 | 1487 | movq_cfi rbp, RBP+8 |
1479 | movq_cfi r12, R12+8 | 1488 | movq_cfi r12, R12+8 |
1480 | movq_cfi r13, R13+8 | 1489 | movq_cfi r13, R13+8 |
1481 | movq_cfi r14, R14+8 | 1490 | movq_cfi r14, R14+8 |
1482 | movq_cfi r15, R15+8 | 1491 | movq_cfi r15, R15+8 |
1483 | xorl %ebx,%ebx | 1492 | xorl %ebx,%ebx |
1484 | testl $3,CS+8(%rsp) | 1493 | testl $3,CS+8(%rsp) |
1485 | je error_kernelspace | 1494 | je error_kernelspace |
1486 | error_swapgs: | 1495 | error_swapgs: |
1487 | SWAPGS | 1496 | SWAPGS |
1488 | error_sti: | 1497 | error_sti: |
1489 | TRACE_IRQS_OFF | 1498 | TRACE_IRQS_OFF |
1490 | ret | 1499 | ret |
1491 | 1500 | ||
1492 | /* | 1501 | /* |
1493 | * There are two places in the kernel that can potentially fault with | 1502 | * There are two places in the kernel that can potentially fault with |
1494 | * usergs. Handle them here. The exception handlers after iret run with | 1503 | * usergs. Handle them here. The exception handlers after iret run with |
1495 | * kernel gs again, so don't set the user space flag. B stepping K8s | 1504 | * kernel gs again, so don't set the user space flag. B stepping K8s |
1496 | * sometimes report an truncated RIP for IRET exceptions returning to | 1505 | * sometimes report an truncated RIP for IRET exceptions returning to |
1497 | * compat mode. Check for these here too. | 1506 | * compat mode. Check for these here too. |
1498 | */ | 1507 | */ |
1499 | error_kernelspace: | 1508 | error_kernelspace: |
1500 | incl %ebx | 1509 | incl %ebx |
1501 | leaq irq_return(%rip),%rcx | 1510 | leaq irq_return(%rip),%rcx |
1502 | cmpq %rcx,RIP+8(%rsp) | 1511 | cmpq %rcx,RIP+8(%rsp) |
1503 | je error_swapgs | 1512 | je error_swapgs |
1504 | movl %ecx,%eax /* zero extend */ | 1513 | movl %ecx,%eax /* zero extend */ |
1505 | cmpq %rax,RIP+8(%rsp) | 1514 | cmpq %rax,RIP+8(%rsp) |
1506 | je bstep_iret | 1515 | je bstep_iret |
1507 | cmpq $gs_change,RIP+8(%rsp) | 1516 | cmpq $gs_change,RIP+8(%rsp) |
1508 | je error_swapgs | 1517 | je error_swapgs |
1509 | jmp error_sti | 1518 | jmp error_sti |
1510 | 1519 | ||
1511 | bstep_iret: | 1520 | bstep_iret: |
1512 | /* Fix truncated RIP */ | 1521 | /* Fix truncated RIP */ |
1513 | movq %rcx,RIP+8(%rsp) | 1522 | movq %rcx,RIP+8(%rsp) |
1514 | jmp error_swapgs | 1523 | jmp error_swapgs |
1515 | CFI_ENDPROC | 1524 | CFI_ENDPROC |
1516 | END(error_entry) | 1525 | END(error_entry) |
1517 | 1526 | ||
1518 | 1527 | ||
1519 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | 1528 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ |
1520 | ENTRY(error_exit) | 1529 | ENTRY(error_exit) |
1521 | DEFAULT_FRAME | 1530 | DEFAULT_FRAME |
1522 | movl %ebx,%eax | 1531 | movl %ebx,%eax |
1523 | RESTORE_REST | 1532 | RESTORE_REST |
1524 | DISABLE_INTERRUPTS(CLBR_NONE) | 1533 | DISABLE_INTERRUPTS(CLBR_NONE) |
1525 | TRACE_IRQS_OFF | 1534 | TRACE_IRQS_OFF |
1526 | GET_THREAD_INFO(%rcx) | 1535 | GET_THREAD_INFO(%rcx) |
1527 | testl %eax,%eax | 1536 | testl %eax,%eax |
1528 | jne retint_kernel | 1537 | jne retint_kernel |
1529 | LOCKDEP_SYS_EXIT_IRQ | 1538 | LOCKDEP_SYS_EXIT_IRQ |
1530 | movl TI_flags(%rcx),%edx | 1539 | movl TI_flags(%rcx),%edx |
1531 | movl $_TIF_WORK_MASK,%edi | 1540 | movl $_TIF_WORK_MASK,%edi |
1532 | andl %edi,%edx | 1541 | andl %edi,%edx |
1533 | jnz retint_careful | 1542 | jnz retint_careful |
1534 | jmp retint_swapgs | 1543 | jmp retint_swapgs |
1535 | CFI_ENDPROC | 1544 | CFI_ENDPROC |
1536 | END(error_exit) | 1545 | END(error_exit) |
1537 | 1546 | ||
1538 | /* | 1547 | /* |
1539 | * Test if a given stack is an NMI stack or not. | 1548 | * Test if a given stack is an NMI stack or not. |
1540 | */ | 1549 | */ |
1541 | .macro test_in_nmi reg stack nmi_ret normal_ret | 1550 | .macro test_in_nmi reg stack nmi_ret normal_ret |
1542 | cmpq %\reg, \stack | 1551 | cmpq %\reg, \stack |
1543 | ja \normal_ret | 1552 | ja \normal_ret |
1544 | subq $EXCEPTION_STKSZ, %\reg | 1553 | subq $EXCEPTION_STKSZ, %\reg |
1545 | cmpq %\reg, \stack | 1554 | cmpq %\reg, \stack |
1546 | jb \normal_ret | 1555 | jb \normal_ret |
1547 | jmp \nmi_ret | 1556 | jmp \nmi_ret |
1548 | .endm | 1557 | .endm |
1549 | 1558 | ||
1550 | /* runs on exception stack */ | 1559 | /* runs on exception stack */ |
1551 | ENTRY(nmi) | 1560 | ENTRY(nmi) |
1552 | INTR_FRAME | 1561 | INTR_FRAME |
1553 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1562 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1554 | /* | 1563 | /* |
1555 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | 1564 | * We allow breakpoints in NMIs. If a breakpoint occurs, then |
1556 | * the iretq it performs will take us out of NMI context. | 1565 | * the iretq it performs will take us out of NMI context. |
1557 | * This means that we can have nested NMIs where the next | 1566 | * This means that we can have nested NMIs where the next |
1558 | * NMI is using the top of the stack of the previous NMI. We | 1567 | * NMI is using the top of the stack of the previous NMI. We |
1559 | * can't let it execute because the nested NMI will corrupt the | 1568 | * can't let it execute because the nested NMI will corrupt the |
1560 | * stack of the previous NMI. NMI handlers are not re-entrant | 1569 | * stack of the previous NMI. NMI handlers are not re-entrant |
1561 | * anyway. | 1570 | * anyway. |
1562 | * | 1571 | * |
1563 | * To handle this case we do the following: | 1572 | * To handle this case we do the following: |
1564 | * Check the a special location on the stack that contains | 1573 | * Check the a special location on the stack that contains |
1565 | * a variable that is set when NMIs are executing. | 1574 | * a variable that is set when NMIs are executing. |
1566 | * The interrupted task's stack is also checked to see if it | 1575 | * The interrupted task's stack is also checked to see if it |
1567 | * is an NMI stack. | 1576 | * is an NMI stack. |
1568 | * If the variable is not set and the stack is not the NMI | 1577 | * If the variable is not set and the stack is not the NMI |
1569 | * stack then: | 1578 | * stack then: |
1570 | * o Set the special variable on the stack | 1579 | * o Set the special variable on the stack |
1571 | * o Copy the interrupt frame into a "saved" location on the stack | 1580 | * o Copy the interrupt frame into a "saved" location on the stack |
1572 | * o Copy the interrupt frame into a "copy" location on the stack | 1581 | * o Copy the interrupt frame into a "copy" location on the stack |
1573 | * o Continue processing the NMI | 1582 | * o Continue processing the NMI |
1574 | * If the variable is set or the previous stack is the NMI stack: | 1583 | * If the variable is set or the previous stack is the NMI stack: |
1575 | * o Modify the "copy" location to jump to the repeate_nmi | 1584 | * o Modify the "copy" location to jump to the repeate_nmi |
1576 | * o return back to the first NMI | 1585 | * o return back to the first NMI |
1577 | * | 1586 | * |
1578 | * Now on exit of the first NMI, we first clear the stack variable | 1587 | * Now on exit of the first NMI, we first clear the stack variable |
1579 | * The NMI stack will tell any nested NMIs at that point that it is | 1588 | * The NMI stack will tell any nested NMIs at that point that it is |
1580 | * nested. Then we pop the stack normally with iret, and if there was | 1589 | * nested. Then we pop the stack normally with iret, and if there was |
1581 | * a nested NMI that updated the copy interrupt stack frame, a | 1590 | * a nested NMI that updated the copy interrupt stack frame, a |
1582 | * jump will be made to the repeat_nmi code that will handle the second | 1591 | * jump will be made to the repeat_nmi code that will handle the second |
1583 | * NMI. | 1592 | * NMI. |
1584 | */ | 1593 | */ |
1585 | 1594 | ||
1586 | /* Use %rdx as out temp variable throughout */ | 1595 | /* Use %rdx as out temp variable throughout */ |
1587 | pushq_cfi %rdx | 1596 | pushq_cfi %rdx |
1588 | CFI_REL_OFFSET rdx, 0 | 1597 | CFI_REL_OFFSET rdx, 0 |
1589 | 1598 | ||
1590 | /* | 1599 | /* |
1591 | * If %cs was not the kernel segment, then the NMI triggered in user | 1600 | * If %cs was not the kernel segment, then the NMI triggered in user |
1592 | * space, which means it is definitely not nested. | 1601 | * space, which means it is definitely not nested. |
1593 | */ | 1602 | */ |
1594 | cmpl $__KERNEL_CS, 16(%rsp) | 1603 | cmpl $__KERNEL_CS, 16(%rsp) |
1595 | jne first_nmi | 1604 | jne first_nmi |
1596 | 1605 | ||
1597 | /* | 1606 | /* |
1598 | * Check the special variable on the stack to see if NMIs are | 1607 | * Check the special variable on the stack to see if NMIs are |
1599 | * executing. | 1608 | * executing. |
1600 | */ | 1609 | */ |
1601 | cmpl $1, -8(%rsp) | 1610 | cmpl $1, -8(%rsp) |
1602 | je nested_nmi | 1611 | je nested_nmi |
1603 | 1612 | ||
1604 | /* | 1613 | /* |
1605 | * Now test if the previous stack was an NMI stack. | 1614 | * Now test if the previous stack was an NMI stack. |
1606 | * We need the double check. We check the NMI stack to satisfy the | 1615 | * We need the double check. We check the NMI stack to satisfy the |
1607 | * race when the first NMI clears the variable before returning. | 1616 | * race when the first NMI clears the variable before returning. |
1608 | * We check the variable because the first NMI could be in a | 1617 | * We check the variable because the first NMI could be in a |
1609 | * breakpoint routine using a breakpoint stack. | 1618 | * breakpoint routine using a breakpoint stack. |
1610 | */ | 1619 | */ |
1611 | lea 6*8(%rsp), %rdx | 1620 | lea 6*8(%rsp), %rdx |
1612 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | 1621 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi |
1613 | CFI_REMEMBER_STATE | 1622 | CFI_REMEMBER_STATE |
1614 | 1623 | ||
1615 | nested_nmi: | 1624 | nested_nmi: |
1616 | /* | 1625 | /* |
1617 | * Do nothing if we interrupted the fixup in repeat_nmi. | 1626 | * Do nothing if we interrupted the fixup in repeat_nmi. |
1618 | * It's about to repeat the NMI handler, so we are fine | 1627 | * It's about to repeat the NMI handler, so we are fine |
1619 | * with ignoring this one. | 1628 | * with ignoring this one. |
1620 | */ | 1629 | */ |
1621 | movq $repeat_nmi, %rdx | 1630 | movq $repeat_nmi, %rdx |
1622 | cmpq 8(%rsp), %rdx | 1631 | cmpq 8(%rsp), %rdx |
1623 | ja 1f | 1632 | ja 1f |
1624 | movq $end_repeat_nmi, %rdx | 1633 | movq $end_repeat_nmi, %rdx |
1625 | cmpq 8(%rsp), %rdx | 1634 | cmpq 8(%rsp), %rdx |
1626 | ja nested_nmi_out | 1635 | ja nested_nmi_out |
1627 | 1636 | ||
1628 | 1: | 1637 | 1: |
1629 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | 1638 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ |
1630 | leaq -6*8(%rsp), %rdx | 1639 | leaq -6*8(%rsp), %rdx |
1631 | movq %rdx, %rsp | 1640 | movq %rdx, %rsp |
1632 | CFI_ADJUST_CFA_OFFSET 6*8 | 1641 | CFI_ADJUST_CFA_OFFSET 6*8 |
1633 | pushq_cfi $__KERNEL_DS | 1642 | pushq_cfi $__KERNEL_DS |
1634 | pushq_cfi %rdx | 1643 | pushq_cfi %rdx |
1635 | pushfq_cfi | 1644 | pushfq_cfi |
1636 | pushq_cfi $__KERNEL_CS | 1645 | pushq_cfi $__KERNEL_CS |
1637 | pushq_cfi $repeat_nmi | 1646 | pushq_cfi $repeat_nmi |
1638 | 1647 | ||
1639 | /* Put stack back */ | 1648 | /* Put stack back */ |
1640 | addq $(11*8), %rsp | 1649 | addq $(11*8), %rsp |
1641 | CFI_ADJUST_CFA_OFFSET -11*8 | 1650 | CFI_ADJUST_CFA_OFFSET -11*8 |
1642 | 1651 | ||
1643 | nested_nmi_out: | 1652 | nested_nmi_out: |
1644 | popq_cfi %rdx | 1653 | popq_cfi %rdx |
1645 | CFI_RESTORE rdx | 1654 | CFI_RESTORE rdx |
1646 | 1655 | ||
1647 | /* No need to check faults here */ | 1656 | /* No need to check faults here */ |
1648 | INTERRUPT_RETURN | 1657 | INTERRUPT_RETURN |
1649 | 1658 | ||
1650 | CFI_RESTORE_STATE | 1659 | CFI_RESTORE_STATE |
1651 | first_nmi: | 1660 | first_nmi: |
1652 | /* | 1661 | /* |
1653 | * Because nested NMIs will use the pushed location that we | 1662 | * Because nested NMIs will use the pushed location that we |
1654 | * stored in rdx, we must keep that space available. | 1663 | * stored in rdx, we must keep that space available. |
1655 | * Here's what our stack frame will look like: | 1664 | * Here's what our stack frame will look like: |
1656 | * +-------------------------+ | 1665 | * +-------------------------+ |
1657 | * | original SS | | 1666 | * | original SS | |
1658 | * | original Return RSP | | 1667 | * | original Return RSP | |
1659 | * | original RFLAGS | | 1668 | * | original RFLAGS | |
1660 | * | original CS | | 1669 | * | original CS | |
1661 | * | original RIP | | 1670 | * | original RIP | |
1662 | * +-------------------------+ | 1671 | * +-------------------------+ |
1663 | * | temp storage for rdx | | 1672 | * | temp storage for rdx | |
1664 | * +-------------------------+ | 1673 | * +-------------------------+ |
1665 | * | NMI executing variable | | 1674 | * | NMI executing variable | |
1666 | * +-------------------------+ | 1675 | * +-------------------------+ |
1667 | * | Saved SS | | 1676 | * | Saved SS | |
1668 | * | Saved Return RSP | | 1677 | * | Saved Return RSP | |
1669 | * | Saved RFLAGS | | 1678 | * | Saved RFLAGS | |
1670 | * | Saved CS | | 1679 | * | Saved CS | |
1671 | * | Saved RIP | | 1680 | * | Saved RIP | |
1672 | * +-------------------------+ | 1681 | * +-------------------------+ |
1673 | * | copied SS | | 1682 | * | copied SS | |
1674 | * | copied Return RSP | | 1683 | * | copied Return RSP | |
1675 | * | copied RFLAGS | | 1684 | * | copied RFLAGS | |
1676 | * | copied CS | | 1685 | * | copied CS | |
1677 | * | copied RIP | | 1686 | * | copied RIP | |
1678 | * +-------------------------+ | 1687 | * +-------------------------+ |
1679 | * | pt_regs | | 1688 | * | pt_regs | |
1680 | * +-------------------------+ | 1689 | * +-------------------------+ |
1681 | * | 1690 | * |
1682 | * The saved stack frame is used to fix up the copied stack frame | 1691 | * The saved stack frame is used to fix up the copied stack frame |
1683 | * that a nested NMI may change to make the interrupted NMI iret jump | 1692 | * that a nested NMI may change to make the interrupted NMI iret jump |
1684 | * to the repeat_nmi. The original stack frame and the temp storage | 1693 | * to the repeat_nmi. The original stack frame and the temp storage |
1685 | * is also used by nested NMIs and can not be trusted on exit. | 1694 | * is also used by nested NMIs and can not be trusted on exit. |
1686 | */ | 1695 | */ |
1687 | /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ | 1696 | /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ |
1688 | movq (%rsp), %rdx | 1697 | movq (%rsp), %rdx |
1689 | CFI_RESTORE rdx | 1698 | CFI_RESTORE rdx |
1690 | 1699 | ||
1691 | /* Set the NMI executing variable on the stack. */ | 1700 | /* Set the NMI executing variable on the stack. */ |
1692 | pushq_cfi $1 | 1701 | pushq_cfi $1 |
1693 | 1702 | ||
1694 | /* Copy the stack frame to the Saved frame */ | 1703 | /* Copy the stack frame to the Saved frame */ |
1695 | .rept 5 | 1704 | .rept 5 |
1696 | pushq_cfi 6*8(%rsp) | 1705 | pushq_cfi 6*8(%rsp) |
1697 | .endr | 1706 | .endr |
1698 | CFI_DEF_CFA_OFFSET SS+8-RIP | 1707 | CFI_DEF_CFA_OFFSET SS+8-RIP |
1699 | 1708 | ||
1700 | /* Everything up to here is safe from nested NMIs */ | 1709 | /* Everything up to here is safe from nested NMIs */ |
1701 | 1710 | ||
1702 | /* | 1711 | /* |
1703 | * If there was a nested NMI, the first NMI's iret will return | 1712 | * If there was a nested NMI, the first NMI's iret will return |
1704 | * here. But NMIs are still enabled and we can take another | 1713 | * here. But NMIs are still enabled and we can take another |
1705 | * nested NMI. The nested NMI checks the interrupted RIP to see | 1714 | * nested NMI. The nested NMI checks the interrupted RIP to see |
1706 | * if it is between repeat_nmi and end_repeat_nmi, and if so | 1715 | * if it is between repeat_nmi and end_repeat_nmi, and if so |
1707 | * it will just return, as we are about to repeat an NMI anyway. | 1716 | * it will just return, as we are about to repeat an NMI anyway. |
1708 | * This makes it safe to copy to the stack frame that a nested | 1717 | * This makes it safe to copy to the stack frame that a nested |
1709 | * NMI will update. | 1718 | * NMI will update. |
1710 | */ | 1719 | */ |
1711 | repeat_nmi: | 1720 | repeat_nmi: |
1712 | /* | 1721 | /* |
1713 | * Update the stack variable to say we are still in NMI (the update | 1722 | * Update the stack variable to say we are still in NMI (the update |
1714 | * is benign for the non-repeat case, where 1 was pushed just above | 1723 | * is benign for the non-repeat case, where 1 was pushed just above |
1715 | * to this very stack slot). | 1724 | * to this very stack slot). |
1716 | */ | 1725 | */ |
1717 | movq $1, 5*8(%rsp) | 1726 | movq $1, 5*8(%rsp) |
1718 | 1727 | ||
1719 | /* Make another copy, this one may be modified by nested NMIs */ | 1728 | /* Make another copy, this one may be modified by nested NMIs */ |
1720 | .rept 5 | 1729 | .rept 5 |
1721 | pushq_cfi 4*8(%rsp) | 1730 | pushq_cfi 4*8(%rsp) |
1722 | .endr | 1731 | .endr |
1723 | CFI_DEF_CFA_OFFSET SS+8-RIP | 1732 | CFI_DEF_CFA_OFFSET SS+8-RIP |
1724 | end_repeat_nmi: | 1733 | end_repeat_nmi: |
1725 | 1734 | ||
1726 | /* | 1735 | /* |
1727 | * Everything below this point can be preempted by a nested | 1736 | * Everything below this point can be preempted by a nested |
1728 | * NMI if the first NMI took an exception and reset our iret stack | 1737 | * NMI if the first NMI took an exception and reset our iret stack |
1729 | * so that we repeat another NMI. | 1738 | * so that we repeat another NMI. |
1730 | */ | 1739 | */ |
1731 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1740 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1732 | subq $ORIG_RAX-R15, %rsp | 1741 | subq $ORIG_RAX-R15, %rsp |
1733 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1742 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1734 | /* | 1743 | /* |
1735 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit | 1744 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit |
1736 | * as we should not be calling schedule in NMI context. | 1745 | * as we should not be calling schedule in NMI context. |
1737 | * Even with normal interrupts enabled. An NMI should not be | 1746 | * Even with normal interrupts enabled. An NMI should not be |
1738 | * setting NEED_RESCHED or anything that normal interrupts and | 1747 | * setting NEED_RESCHED or anything that normal interrupts and |
1739 | * exceptions might do. | 1748 | * exceptions might do. |
1740 | */ | 1749 | */ |
1741 | call save_paranoid | 1750 | call save_paranoid |
1742 | DEFAULT_FRAME 0 | 1751 | DEFAULT_FRAME 0 |
1743 | 1752 | ||
1744 | /* | 1753 | /* |
1745 | * Save off the CR2 register. If we take a page fault in the NMI then | 1754 | * Save off the CR2 register. If we take a page fault in the NMI then |
1746 | * it could corrupt the CR2 value. If the NMI preempts a page fault | 1755 | * it could corrupt the CR2 value. If the NMI preempts a page fault |
1747 | * handler before it was able to read the CR2 register, and then the | 1756 | * handler before it was able to read the CR2 register, and then the |
1748 | * NMI itself takes a page fault, the page fault that was preempted | 1757 | * NMI itself takes a page fault, the page fault that was preempted |
1749 | * will read the information from the NMI page fault and not the | 1758 | * will read the information from the NMI page fault and not the |
1750 | * origin fault. Save it off and restore it if it changes. | 1759 | * origin fault. Save it off and restore it if it changes. |
1751 | * Use the r12 callee-saved register. | 1760 | * Use the r12 callee-saved register. |
1752 | */ | 1761 | */ |
1753 | movq %cr2, %r12 | 1762 | movq %cr2, %r12 |
1754 | 1763 | ||
1755 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1764 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1756 | movq %rsp,%rdi | 1765 | movq %rsp,%rdi |
1757 | movq $-1,%rsi | 1766 | movq $-1,%rsi |
1758 | call do_nmi | 1767 | call do_nmi |
1759 | 1768 | ||
1760 | /* Did the NMI take a page fault? Restore cr2 if it did */ | 1769 | /* Did the NMI take a page fault? Restore cr2 if it did */ |
1761 | movq %cr2, %rcx | 1770 | movq %cr2, %rcx |
1762 | cmpq %rcx, %r12 | 1771 | cmpq %rcx, %r12 |
1763 | je 1f | 1772 | je 1f |
1764 | movq %r12, %cr2 | 1773 | movq %r12, %cr2 |
1765 | 1: | 1774 | 1: |
1766 | 1775 | ||
1767 | testl %ebx,%ebx /* swapgs needed? */ | 1776 | testl %ebx,%ebx /* swapgs needed? */ |
1768 | jnz nmi_restore | 1777 | jnz nmi_restore |
1769 | nmi_swapgs: | 1778 | nmi_swapgs: |
1770 | SWAPGS_UNSAFE_STACK | 1779 | SWAPGS_UNSAFE_STACK |
1771 | nmi_restore: | 1780 | nmi_restore: |
1772 | RESTORE_ALL 8 | 1781 | RESTORE_ALL 8 |
1773 | /* Clear the NMI executing stack variable */ | 1782 | /* Clear the NMI executing stack variable */ |
1774 | movq $0, 10*8(%rsp) | 1783 | movq $0, 10*8(%rsp) |
1775 | jmp irq_return | 1784 | jmp irq_return |
1776 | CFI_ENDPROC | 1785 | CFI_ENDPROC |
1777 | END(nmi) | 1786 | END(nmi) |
1778 | 1787 | ||
1779 | ENTRY(ignore_sysret) | 1788 | ENTRY(ignore_sysret) |
1780 | CFI_STARTPROC | 1789 | CFI_STARTPROC |
1781 | mov $-ENOSYS,%eax | 1790 | mov $-ENOSYS,%eax |
1782 | sysret | 1791 | sysret |
1783 | CFI_ENDPROC | 1792 | CFI_ENDPROC |
1784 | END(ignore_sysret) | 1793 | END(ignore_sysret) |
1785 | 1794 | ||
1786 | /* | 1795 | /* |
1787 | * End of kprobes section | 1796 | * End of kprobes section |
1788 | */ | 1797 | */ |
1789 | .popsection | 1798 | .popsection |
1790 | 1799 |
arch/x86/lib/copy_user_64.S
1 | /* | 1 | /* |
2 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> | 2 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> |
3 | * Copyright 2002 Andi Kleen, SuSE Labs. | 3 | * Copyright 2002 Andi Kleen, SuSE Labs. |
4 | * Subject to the GNU Public License v2. | 4 | * Subject to the GNU Public License v2. |
5 | * | 5 | * |
6 | * Functions to copy from and to user space. | 6 | * Functions to copy from and to user space. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
11 | 11 | ||
12 | #define FIX_ALIGNMENT 1 | 12 | #define FIX_ALIGNMENT 1 |
13 | 13 | ||
14 | #include <asm/current.h> | 14 | #include <asm/current.h> |
15 | #include <asm/asm-offsets.h> | 15 | #include <asm/asm-offsets.h> |
16 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
17 | #include <asm/cpufeature.h> | 17 | #include <asm/cpufeature.h> |
18 | #include <asm/alternative-asm.h> | 18 | #include <asm/alternative-asm.h> |
19 | #include <asm/asm.h> | 19 | #include <asm/asm.h> |
20 | #include <asm/smap.h> | ||
20 | 21 | ||
21 | /* | 22 | /* |
22 | * By placing feature2 after feature1 in altinstructions section, we logically | 23 | * By placing feature2 after feature1 in altinstructions section, we logically |
23 | * implement: | 24 | * implement: |
24 | * If CPU has feature2, jmp to alt2 is used | 25 | * If CPU has feature2, jmp to alt2 is used |
25 | * else if CPU has feature1, jmp to alt1 is used | 26 | * else if CPU has feature1, jmp to alt1 is used |
26 | * else jmp to orig is used. | 27 | * else jmp to orig is used. |
27 | */ | 28 | */ |
28 | .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 | 29 | .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 |
29 | 0: | 30 | 0: |
30 | .byte 0xe9 /* 32bit jump */ | 31 | .byte 0xe9 /* 32bit jump */ |
31 | .long \orig-1f /* by default jump to orig */ | 32 | .long \orig-1f /* by default jump to orig */ |
32 | 1: | 33 | 1: |
33 | .section .altinstr_replacement,"ax" | 34 | .section .altinstr_replacement,"ax" |
34 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | 35 | 2: .byte 0xe9 /* near jump with 32bit immediate */ |
35 | .long \alt1-1b /* offset */ /* or alternatively to alt1 */ | 36 | .long \alt1-1b /* offset */ /* or alternatively to alt1 */ |
36 | 3: .byte 0xe9 /* near jump with 32bit immediate */ | 37 | 3: .byte 0xe9 /* near jump with 32bit immediate */ |
37 | .long \alt2-1b /* offset */ /* or alternatively to alt2 */ | 38 | .long \alt2-1b /* offset */ /* or alternatively to alt2 */ |
38 | .previous | 39 | .previous |
39 | 40 | ||
40 | .section .altinstructions,"a" | 41 | .section .altinstructions,"a" |
41 | altinstruction_entry 0b,2b,\feature1,5,5 | 42 | altinstruction_entry 0b,2b,\feature1,5,5 |
42 | altinstruction_entry 0b,3b,\feature2,5,5 | 43 | altinstruction_entry 0b,3b,\feature2,5,5 |
43 | .previous | 44 | .previous |
44 | .endm | 45 | .endm |
45 | 46 | ||
46 | .macro ALIGN_DESTINATION | 47 | .macro ALIGN_DESTINATION |
47 | #ifdef FIX_ALIGNMENT | 48 | #ifdef FIX_ALIGNMENT |
48 | /* check for bad alignment of destination */ | 49 | /* check for bad alignment of destination */ |
49 | movl %edi,%ecx | 50 | movl %edi,%ecx |
50 | andl $7,%ecx | 51 | andl $7,%ecx |
51 | jz 102f /* already aligned */ | 52 | jz 102f /* already aligned */ |
52 | subl $8,%ecx | 53 | subl $8,%ecx |
53 | negl %ecx | 54 | negl %ecx |
54 | subl %ecx,%edx | 55 | subl %ecx,%edx |
55 | 100: movb (%rsi),%al | 56 | 100: movb (%rsi),%al |
56 | 101: movb %al,(%rdi) | 57 | 101: movb %al,(%rdi) |
57 | incq %rsi | 58 | incq %rsi |
58 | incq %rdi | 59 | incq %rdi |
59 | decl %ecx | 60 | decl %ecx |
60 | jnz 100b | 61 | jnz 100b |
61 | 102: | 62 | 102: |
62 | .section .fixup,"ax" | 63 | .section .fixup,"ax" |
63 | 103: addl %ecx,%edx /* ecx is zerorest also */ | 64 | 103: addl %ecx,%edx /* ecx is zerorest also */ |
64 | jmp copy_user_handle_tail | 65 | jmp copy_user_handle_tail |
65 | .previous | 66 | .previous |
66 | 67 | ||
67 | _ASM_EXTABLE(100b,103b) | 68 | _ASM_EXTABLE(100b,103b) |
68 | _ASM_EXTABLE(101b,103b) | 69 | _ASM_EXTABLE(101b,103b) |
69 | #endif | 70 | #endif |
70 | .endm | 71 | .endm |
71 | 72 | ||
72 | /* Standard copy_to_user with segment limit checking */ | 73 | /* Standard copy_to_user with segment limit checking */ |
73 | ENTRY(_copy_to_user) | 74 | ENTRY(_copy_to_user) |
74 | CFI_STARTPROC | 75 | CFI_STARTPROC |
75 | GET_THREAD_INFO(%rax) | 76 | GET_THREAD_INFO(%rax) |
76 | movq %rdi,%rcx | 77 | movq %rdi,%rcx |
77 | addq %rdx,%rcx | 78 | addq %rdx,%rcx |
78 | jc bad_to_user | 79 | jc bad_to_user |
79 | cmpq TI_addr_limit(%rax),%rcx | 80 | cmpq TI_addr_limit(%rax),%rcx |
80 | ja bad_to_user | 81 | ja bad_to_user |
81 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ | 82 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ |
82 | copy_user_generic_unrolled,copy_user_generic_string, \ | 83 | copy_user_generic_unrolled,copy_user_generic_string, \ |
83 | copy_user_enhanced_fast_string | 84 | copy_user_enhanced_fast_string |
84 | CFI_ENDPROC | 85 | CFI_ENDPROC |
85 | ENDPROC(_copy_to_user) | 86 | ENDPROC(_copy_to_user) |
86 | 87 | ||
87 | /* Standard copy_from_user with segment limit checking */ | 88 | /* Standard copy_from_user with segment limit checking */ |
88 | ENTRY(_copy_from_user) | 89 | ENTRY(_copy_from_user) |
89 | CFI_STARTPROC | 90 | CFI_STARTPROC |
90 | GET_THREAD_INFO(%rax) | 91 | GET_THREAD_INFO(%rax) |
91 | movq %rsi,%rcx | 92 | movq %rsi,%rcx |
92 | addq %rdx,%rcx | 93 | addq %rdx,%rcx |
93 | jc bad_from_user | 94 | jc bad_from_user |
94 | cmpq TI_addr_limit(%rax),%rcx | 95 | cmpq TI_addr_limit(%rax),%rcx |
95 | ja bad_from_user | 96 | ja bad_from_user |
96 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ | 97 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ |
97 | copy_user_generic_unrolled,copy_user_generic_string, \ | 98 | copy_user_generic_unrolled,copy_user_generic_string, \ |
98 | copy_user_enhanced_fast_string | 99 | copy_user_enhanced_fast_string |
99 | CFI_ENDPROC | 100 | CFI_ENDPROC |
100 | ENDPROC(_copy_from_user) | 101 | ENDPROC(_copy_from_user) |
101 | 102 | ||
102 | .section .fixup,"ax" | 103 | .section .fixup,"ax" |
103 | /* must zero dest */ | 104 | /* must zero dest */ |
104 | ENTRY(bad_from_user) | 105 | ENTRY(bad_from_user) |
105 | bad_from_user: | 106 | bad_from_user: |
106 | CFI_STARTPROC | 107 | CFI_STARTPROC |
107 | movl %edx,%ecx | 108 | movl %edx,%ecx |
108 | xorl %eax,%eax | 109 | xorl %eax,%eax |
109 | rep | 110 | rep |
110 | stosb | 111 | stosb |
111 | bad_to_user: | 112 | bad_to_user: |
112 | movl %edx,%eax | 113 | movl %edx,%eax |
113 | ret | 114 | ret |
114 | CFI_ENDPROC | 115 | CFI_ENDPROC |
115 | ENDPROC(bad_from_user) | 116 | ENDPROC(bad_from_user) |
116 | .previous | 117 | .previous |
117 | 118 | ||
118 | /* | 119 | /* |
119 | * copy_user_generic_unrolled - memory copy with exception handling. | 120 | * copy_user_generic_unrolled - memory copy with exception handling. |
120 | * This version is for CPUs like P4 that don't have efficient micro | 121 | * This version is for CPUs like P4 that don't have efficient micro |
121 | * code for rep movsq | 122 | * code for rep movsq |
122 | * | 123 | * |
123 | * Input: | 124 | * Input: |
124 | * rdi destination | 125 | * rdi destination |
125 | * rsi source | 126 | * rsi source |
126 | * rdx count | 127 | * rdx count |
127 | * | 128 | * |
128 | * Output: | 129 | * Output: |
129 | * eax uncopied bytes or 0 if successful. | 130 | * eax uncopied bytes or 0 if successful. |
130 | */ | 131 | */ |
131 | ENTRY(copy_user_generic_unrolled) | 132 | ENTRY(copy_user_generic_unrolled) |
132 | CFI_STARTPROC | 133 | CFI_STARTPROC |
134 | ASM_STAC | ||
133 | cmpl $8,%edx | 135 | cmpl $8,%edx |
134 | jb 20f /* less then 8 bytes, go to byte copy loop */ | 136 | jb 20f /* less then 8 bytes, go to byte copy loop */ |
135 | ALIGN_DESTINATION | 137 | ALIGN_DESTINATION |
136 | movl %edx,%ecx | 138 | movl %edx,%ecx |
137 | andl $63,%edx | 139 | andl $63,%edx |
138 | shrl $6,%ecx | 140 | shrl $6,%ecx |
139 | jz 17f | 141 | jz 17f |
140 | 1: movq (%rsi),%r8 | 142 | 1: movq (%rsi),%r8 |
141 | 2: movq 1*8(%rsi),%r9 | 143 | 2: movq 1*8(%rsi),%r9 |
142 | 3: movq 2*8(%rsi),%r10 | 144 | 3: movq 2*8(%rsi),%r10 |
143 | 4: movq 3*8(%rsi),%r11 | 145 | 4: movq 3*8(%rsi),%r11 |
144 | 5: movq %r8,(%rdi) | 146 | 5: movq %r8,(%rdi) |
145 | 6: movq %r9,1*8(%rdi) | 147 | 6: movq %r9,1*8(%rdi) |
146 | 7: movq %r10,2*8(%rdi) | 148 | 7: movq %r10,2*8(%rdi) |
147 | 8: movq %r11,3*8(%rdi) | 149 | 8: movq %r11,3*8(%rdi) |
148 | 9: movq 4*8(%rsi),%r8 | 150 | 9: movq 4*8(%rsi),%r8 |
149 | 10: movq 5*8(%rsi),%r9 | 151 | 10: movq 5*8(%rsi),%r9 |
150 | 11: movq 6*8(%rsi),%r10 | 152 | 11: movq 6*8(%rsi),%r10 |
151 | 12: movq 7*8(%rsi),%r11 | 153 | 12: movq 7*8(%rsi),%r11 |
152 | 13: movq %r8,4*8(%rdi) | 154 | 13: movq %r8,4*8(%rdi) |
153 | 14: movq %r9,5*8(%rdi) | 155 | 14: movq %r9,5*8(%rdi) |
154 | 15: movq %r10,6*8(%rdi) | 156 | 15: movq %r10,6*8(%rdi) |
155 | 16: movq %r11,7*8(%rdi) | 157 | 16: movq %r11,7*8(%rdi) |
156 | leaq 64(%rsi),%rsi | 158 | leaq 64(%rsi),%rsi |
157 | leaq 64(%rdi),%rdi | 159 | leaq 64(%rdi),%rdi |
158 | decl %ecx | 160 | decl %ecx |
159 | jnz 1b | 161 | jnz 1b |
160 | 17: movl %edx,%ecx | 162 | 17: movl %edx,%ecx |
161 | andl $7,%edx | 163 | andl $7,%edx |
162 | shrl $3,%ecx | 164 | shrl $3,%ecx |
163 | jz 20f | 165 | jz 20f |
164 | 18: movq (%rsi),%r8 | 166 | 18: movq (%rsi),%r8 |
165 | 19: movq %r8,(%rdi) | 167 | 19: movq %r8,(%rdi) |
166 | leaq 8(%rsi),%rsi | 168 | leaq 8(%rsi),%rsi |
167 | leaq 8(%rdi),%rdi | 169 | leaq 8(%rdi),%rdi |
168 | decl %ecx | 170 | decl %ecx |
169 | jnz 18b | 171 | jnz 18b |
170 | 20: andl %edx,%edx | 172 | 20: andl %edx,%edx |
171 | jz 23f | 173 | jz 23f |
172 | movl %edx,%ecx | 174 | movl %edx,%ecx |
173 | 21: movb (%rsi),%al | 175 | 21: movb (%rsi),%al |
174 | 22: movb %al,(%rdi) | 176 | 22: movb %al,(%rdi) |
175 | incq %rsi | 177 | incq %rsi |
176 | incq %rdi | 178 | incq %rdi |
177 | decl %ecx | 179 | decl %ecx |
178 | jnz 21b | 180 | jnz 21b |
179 | 23: xor %eax,%eax | 181 | 23: xor %eax,%eax |
182 | ASM_CLAC | ||
180 | ret | 183 | ret |
181 | 184 | ||
182 | .section .fixup,"ax" | 185 | .section .fixup,"ax" |
183 | 30: shll $6,%ecx | 186 | 30: shll $6,%ecx |
184 | addl %ecx,%edx | 187 | addl %ecx,%edx |
185 | jmp 60f | 188 | jmp 60f |
186 | 40: lea (%rdx,%rcx,8),%rdx | 189 | 40: lea (%rdx,%rcx,8),%rdx |
187 | jmp 60f | 190 | jmp 60f |
188 | 50: movl %ecx,%edx | 191 | 50: movl %ecx,%edx |
189 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ | 192 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ |
190 | .previous | 193 | .previous |
191 | 194 | ||
192 | _ASM_EXTABLE(1b,30b) | 195 | _ASM_EXTABLE(1b,30b) |
193 | _ASM_EXTABLE(2b,30b) | 196 | _ASM_EXTABLE(2b,30b) |
194 | _ASM_EXTABLE(3b,30b) | 197 | _ASM_EXTABLE(3b,30b) |
195 | _ASM_EXTABLE(4b,30b) | 198 | _ASM_EXTABLE(4b,30b) |
196 | _ASM_EXTABLE(5b,30b) | 199 | _ASM_EXTABLE(5b,30b) |
197 | _ASM_EXTABLE(6b,30b) | 200 | _ASM_EXTABLE(6b,30b) |
198 | _ASM_EXTABLE(7b,30b) | 201 | _ASM_EXTABLE(7b,30b) |
199 | _ASM_EXTABLE(8b,30b) | 202 | _ASM_EXTABLE(8b,30b) |
200 | _ASM_EXTABLE(9b,30b) | 203 | _ASM_EXTABLE(9b,30b) |
201 | _ASM_EXTABLE(10b,30b) | 204 | _ASM_EXTABLE(10b,30b) |
202 | _ASM_EXTABLE(11b,30b) | 205 | _ASM_EXTABLE(11b,30b) |
203 | _ASM_EXTABLE(12b,30b) | 206 | _ASM_EXTABLE(12b,30b) |
204 | _ASM_EXTABLE(13b,30b) | 207 | _ASM_EXTABLE(13b,30b) |
205 | _ASM_EXTABLE(14b,30b) | 208 | _ASM_EXTABLE(14b,30b) |
206 | _ASM_EXTABLE(15b,30b) | 209 | _ASM_EXTABLE(15b,30b) |
207 | _ASM_EXTABLE(16b,30b) | 210 | _ASM_EXTABLE(16b,30b) |
208 | _ASM_EXTABLE(18b,40b) | 211 | _ASM_EXTABLE(18b,40b) |
209 | _ASM_EXTABLE(19b,40b) | 212 | _ASM_EXTABLE(19b,40b) |
210 | _ASM_EXTABLE(21b,50b) | 213 | _ASM_EXTABLE(21b,50b) |
211 | _ASM_EXTABLE(22b,50b) | 214 | _ASM_EXTABLE(22b,50b) |
212 | CFI_ENDPROC | 215 | CFI_ENDPROC |
213 | ENDPROC(copy_user_generic_unrolled) | 216 | ENDPROC(copy_user_generic_unrolled) |
214 | 217 | ||
215 | /* Some CPUs run faster using the string copy instructions. | 218 | /* Some CPUs run faster using the string copy instructions. |
216 | * This is also a lot simpler. Use them when possible. | 219 | * This is also a lot simpler. Use them when possible. |
217 | * | 220 | * |
218 | * Only 4GB of copy is supported. This shouldn't be a problem | 221 | * Only 4GB of copy is supported. This shouldn't be a problem |
219 | * because the kernel normally only writes from/to page sized chunks | 222 | * because the kernel normally only writes from/to page sized chunks |
220 | * even if user space passed a longer buffer. | 223 | * even if user space passed a longer buffer. |
221 | * And more would be dangerous because both Intel and AMD have | 224 | * And more would be dangerous because both Intel and AMD have |
222 | * errata with rep movsq > 4GB. If someone feels the need to fix | 225 | * errata with rep movsq > 4GB. If someone feels the need to fix |
223 | * this please consider this. | 226 | * this please consider this. |
224 | * | 227 | * |
225 | * Input: | 228 | * Input: |
226 | * rdi destination | 229 | * rdi destination |
227 | * rsi source | 230 | * rsi source |
228 | * rdx count | 231 | * rdx count |
229 | * | 232 | * |
230 | * Output: | 233 | * Output: |
231 | * eax uncopied bytes or 0 if successful. | 234 | * eax uncopied bytes or 0 if successful. |
232 | */ | 235 | */ |
233 | ENTRY(copy_user_generic_string) | 236 | ENTRY(copy_user_generic_string) |
234 | CFI_STARTPROC | 237 | CFI_STARTPROC |
238 | ASM_STAC | ||
235 | andl %edx,%edx | 239 | andl %edx,%edx |
236 | jz 4f | 240 | jz 4f |
237 | cmpl $8,%edx | 241 | cmpl $8,%edx |
238 | jb 2f /* less than 8 bytes, go to byte copy loop */ | 242 | jb 2f /* less than 8 bytes, go to byte copy loop */ |
239 | ALIGN_DESTINATION | 243 | ALIGN_DESTINATION |
240 | movl %edx,%ecx | 244 | movl %edx,%ecx |
241 | shrl $3,%ecx | 245 | shrl $3,%ecx |
242 | andl $7,%edx | 246 | andl $7,%edx |
243 | 1: rep | 247 | 1: rep |
244 | movsq | 248 | movsq |
245 | 2: movl %edx,%ecx | 249 | 2: movl %edx,%ecx |
246 | 3: rep | 250 | 3: rep |
247 | movsb | 251 | movsb |
248 | 4: xorl %eax,%eax | 252 | 4: xorl %eax,%eax |
253 | ASM_CLAC | ||
249 | ret | 254 | ret |
250 | 255 | ||
251 | .section .fixup,"ax" | 256 | .section .fixup,"ax" |
252 | 11: lea (%rdx,%rcx,8),%rcx | 257 | 11: lea (%rdx,%rcx,8),%rcx |
253 | 12: movl %ecx,%edx /* ecx is zerorest also */ | 258 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
254 | jmp copy_user_handle_tail | 259 | jmp copy_user_handle_tail |
255 | .previous | 260 | .previous |
256 | 261 | ||
257 | _ASM_EXTABLE(1b,11b) | 262 | _ASM_EXTABLE(1b,11b) |
258 | _ASM_EXTABLE(3b,12b) | 263 | _ASM_EXTABLE(3b,12b) |
259 | CFI_ENDPROC | 264 | CFI_ENDPROC |
260 | ENDPROC(copy_user_generic_string) | 265 | ENDPROC(copy_user_generic_string) |
261 | 266 | ||
262 | /* | 267 | /* |
263 | * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. | 268 | * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. |
264 | * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. | 269 | * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. |
265 | * | 270 | * |
266 | * Input: | 271 | * Input: |
267 | * rdi destination | 272 | * rdi destination |
268 | * rsi source | 273 | * rsi source |
269 | * rdx count | 274 | * rdx count |
270 | * | 275 | * |
271 | * Output: | 276 | * Output: |
272 | * eax uncopied bytes or 0 if successful. | 277 | * eax uncopied bytes or 0 if successful. |
273 | */ | 278 | */ |
274 | ENTRY(copy_user_enhanced_fast_string) | 279 | ENTRY(copy_user_enhanced_fast_string) |
275 | CFI_STARTPROC | 280 | CFI_STARTPROC |
281 | ASM_STAC | ||
276 | andl %edx,%edx | 282 | andl %edx,%edx |
277 | jz 2f | 283 | jz 2f |
278 | movl %edx,%ecx | 284 | movl %edx,%ecx |
279 | 1: rep | 285 | 1: rep |
280 | movsb | 286 | movsb |
281 | 2: xorl %eax,%eax | 287 | 2: xorl %eax,%eax |
288 | ASM_CLAC | ||
282 | ret | 289 | ret |
283 | 290 | ||
284 | .section .fixup,"ax" | 291 | .section .fixup,"ax" |
285 | 12: movl %ecx,%edx /* ecx is zerorest also */ | 292 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
286 | jmp copy_user_handle_tail | 293 | jmp copy_user_handle_tail |
287 | .previous | 294 | .previous |
288 | 295 | ||
289 | _ASM_EXTABLE(1b,12b) | 296 | _ASM_EXTABLE(1b,12b) |
290 | CFI_ENDPROC | 297 | CFI_ENDPROC |
291 | ENDPROC(copy_user_enhanced_fast_string) | 298 | ENDPROC(copy_user_enhanced_fast_string) |
292 | 299 |
arch/x86/lib/copy_user_nocache_64.S
1 | /* | 1 | /* |
2 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> | 2 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> |
3 | * Copyright 2002 Andi Kleen, SuSE Labs. | 3 | * Copyright 2002 Andi Kleen, SuSE Labs. |
4 | * Subject to the GNU Public License v2. | 4 | * Subject to the GNU Public License v2. |
5 | * | 5 | * |
6 | * Functions to copy from and to user space. | 6 | * Functions to copy from and to user space. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
11 | 11 | ||
12 | #define FIX_ALIGNMENT 1 | 12 | #define FIX_ALIGNMENT 1 |
13 | 13 | ||
14 | #include <asm/current.h> | 14 | #include <asm/current.h> |
15 | #include <asm/asm-offsets.h> | 15 | #include <asm/asm-offsets.h> |
16 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
17 | #include <asm/asm.h> | 17 | #include <asm/asm.h> |
18 | #include <asm/smap.h> | ||
18 | 19 | ||
19 | .macro ALIGN_DESTINATION | 20 | .macro ALIGN_DESTINATION |
20 | #ifdef FIX_ALIGNMENT | 21 | #ifdef FIX_ALIGNMENT |
21 | /* check for bad alignment of destination */ | 22 | /* check for bad alignment of destination */ |
22 | movl %edi,%ecx | 23 | movl %edi,%ecx |
23 | andl $7,%ecx | 24 | andl $7,%ecx |
24 | jz 102f /* already aligned */ | 25 | jz 102f /* already aligned */ |
25 | subl $8,%ecx | 26 | subl $8,%ecx |
26 | negl %ecx | 27 | negl %ecx |
27 | subl %ecx,%edx | 28 | subl %ecx,%edx |
28 | 100: movb (%rsi),%al | 29 | 100: movb (%rsi),%al |
29 | 101: movb %al,(%rdi) | 30 | 101: movb %al,(%rdi) |
30 | incq %rsi | 31 | incq %rsi |
31 | incq %rdi | 32 | incq %rdi |
32 | decl %ecx | 33 | decl %ecx |
33 | jnz 100b | 34 | jnz 100b |
34 | 102: | 35 | 102: |
35 | .section .fixup,"ax" | 36 | .section .fixup,"ax" |
36 | 103: addl %ecx,%edx /* ecx is zerorest also */ | 37 | 103: addl %ecx,%edx /* ecx is zerorest also */ |
37 | jmp copy_user_handle_tail | 38 | jmp copy_user_handle_tail |
38 | .previous | 39 | .previous |
39 | 40 | ||
40 | _ASM_EXTABLE(100b,103b) | 41 | _ASM_EXTABLE(100b,103b) |
41 | _ASM_EXTABLE(101b,103b) | 42 | _ASM_EXTABLE(101b,103b) |
42 | #endif | 43 | #endif |
43 | .endm | 44 | .endm |
44 | 45 | ||
45 | /* | 46 | /* |
46 | * copy_user_nocache - Uncached memory copy with exception handling | 47 | * copy_user_nocache - Uncached memory copy with exception handling |
47 | * This will force destination/source out of cache for more performance. | 48 | * This will force destination/source out of cache for more performance. |
48 | */ | 49 | */ |
49 | ENTRY(__copy_user_nocache) | 50 | ENTRY(__copy_user_nocache) |
50 | CFI_STARTPROC | 51 | CFI_STARTPROC |
52 | ASM_STAC | ||
51 | cmpl $8,%edx | 53 | cmpl $8,%edx |
52 | jb 20f /* less then 8 bytes, go to byte copy loop */ | 54 | jb 20f /* less then 8 bytes, go to byte copy loop */ |
53 | ALIGN_DESTINATION | 55 | ALIGN_DESTINATION |
54 | movl %edx,%ecx | 56 | movl %edx,%ecx |
55 | andl $63,%edx | 57 | andl $63,%edx |
56 | shrl $6,%ecx | 58 | shrl $6,%ecx |
57 | jz 17f | 59 | jz 17f |
58 | 1: movq (%rsi),%r8 | 60 | 1: movq (%rsi),%r8 |
59 | 2: movq 1*8(%rsi),%r9 | 61 | 2: movq 1*8(%rsi),%r9 |
60 | 3: movq 2*8(%rsi),%r10 | 62 | 3: movq 2*8(%rsi),%r10 |
61 | 4: movq 3*8(%rsi),%r11 | 63 | 4: movq 3*8(%rsi),%r11 |
62 | 5: movnti %r8,(%rdi) | 64 | 5: movnti %r8,(%rdi) |
63 | 6: movnti %r9,1*8(%rdi) | 65 | 6: movnti %r9,1*8(%rdi) |
64 | 7: movnti %r10,2*8(%rdi) | 66 | 7: movnti %r10,2*8(%rdi) |
65 | 8: movnti %r11,3*8(%rdi) | 67 | 8: movnti %r11,3*8(%rdi) |
66 | 9: movq 4*8(%rsi),%r8 | 68 | 9: movq 4*8(%rsi),%r8 |
67 | 10: movq 5*8(%rsi),%r9 | 69 | 10: movq 5*8(%rsi),%r9 |
68 | 11: movq 6*8(%rsi),%r10 | 70 | 11: movq 6*8(%rsi),%r10 |
69 | 12: movq 7*8(%rsi),%r11 | 71 | 12: movq 7*8(%rsi),%r11 |
70 | 13: movnti %r8,4*8(%rdi) | 72 | 13: movnti %r8,4*8(%rdi) |
71 | 14: movnti %r9,5*8(%rdi) | 73 | 14: movnti %r9,5*8(%rdi) |
72 | 15: movnti %r10,6*8(%rdi) | 74 | 15: movnti %r10,6*8(%rdi) |
73 | 16: movnti %r11,7*8(%rdi) | 75 | 16: movnti %r11,7*8(%rdi) |
74 | leaq 64(%rsi),%rsi | 76 | leaq 64(%rsi),%rsi |
75 | leaq 64(%rdi),%rdi | 77 | leaq 64(%rdi),%rdi |
76 | decl %ecx | 78 | decl %ecx |
77 | jnz 1b | 79 | jnz 1b |
78 | 17: movl %edx,%ecx | 80 | 17: movl %edx,%ecx |
79 | andl $7,%edx | 81 | andl $7,%edx |
80 | shrl $3,%ecx | 82 | shrl $3,%ecx |
81 | jz 20f | 83 | jz 20f |
82 | 18: movq (%rsi),%r8 | 84 | 18: movq (%rsi),%r8 |
83 | 19: movnti %r8,(%rdi) | 85 | 19: movnti %r8,(%rdi) |
84 | leaq 8(%rsi),%rsi | 86 | leaq 8(%rsi),%rsi |
85 | leaq 8(%rdi),%rdi | 87 | leaq 8(%rdi),%rdi |
86 | decl %ecx | 88 | decl %ecx |
87 | jnz 18b | 89 | jnz 18b |
88 | 20: andl %edx,%edx | 90 | 20: andl %edx,%edx |
89 | jz 23f | 91 | jz 23f |
90 | movl %edx,%ecx | 92 | movl %edx,%ecx |
91 | 21: movb (%rsi),%al | 93 | 21: movb (%rsi),%al |
92 | 22: movb %al,(%rdi) | 94 | 22: movb %al,(%rdi) |
93 | incq %rsi | 95 | incq %rsi |
94 | incq %rdi | 96 | incq %rdi |
95 | decl %ecx | 97 | decl %ecx |
96 | jnz 21b | 98 | jnz 21b |
97 | 23: xorl %eax,%eax | 99 | 23: xorl %eax,%eax |
100 | ASM_CLAC | ||
98 | sfence | 101 | sfence |
99 | ret | 102 | ret |
100 | 103 | ||
101 | .section .fixup,"ax" | 104 | .section .fixup,"ax" |
102 | 30: shll $6,%ecx | 105 | 30: shll $6,%ecx |
103 | addl %ecx,%edx | 106 | addl %ecx,%edx |
104 | jmp 60f | 107 | jmp 60f |
105 | 40: lea (%rdx,%rcx,8),%rdx | 108 | 40: lea (%rdx,%rcx,8),%rdx |
106 | jmp 60f | 109 | jmp 60f |
107 | 50: movl %ecx,%edx | 110 | 50: movl %ecx,%edx |
108 | 60: sfence | 111 | 60: sfence |
109 | jmp copy_user_handle_tail | 112 | jmp copy_user_handle_tail |
110 | .previous | 113 | .previous |
111 | 114 | ||
112 | _ASM_EXTABLE(1b,30b) | 115 | _ASM_EXTABLE(1b,30b) |
113 | _ASM_EXTABLE(2b,30b) | 116 | _ASM_EXTABLE(2b,30b) |
114 | _ASM_EXTABLE(3b,30b) | 117 | _ASM_EXTABLE(3b,30b) |
115 | _ASM_EXTABLE(4b,30b) | 118 | _ASM_EXTABLE(4b,30b) |
116 | _ASM_EXTABLE(5b,30b) | 119 | _ASM_EXTABLE(5b,30b) |
117 | _ASM_EXTABLE(6b,30b) | 120 | _ASM_EXTABLE(6b,30b) |
118 | _ASM_EXTABLE(7b,30b) | 121 | _ASM_EXTABLE(7b,30b) |
119 | _ASM_EXTABLE(8b,30b) | 122 | _ASM_EXTABLE(8b,30b) |
120 | _ASM_EXTABLE(9b,30b) | 123 | _ASM_EXTABLE(9b,30b) |
121 | _ASM_EXTABLE(10b,30b) | 124 | _ASM_EXTABLE(10b,30b) |
122 | _ASM_EXTABLE(11b,30b) | 125 | _ASM_EXTABLE(11b,30b) |
123 | _ASM_EXTABLE(12b,30b) | 126 | _ASM_EXTABLE(12b,30b) |
124 | _ASM_EXTABLE(13b,30b) | 127 | _ASM_EXTABLE(13b,30b) |
125 | _ASM_EXTABLE(14b,30b) | 128 | _ASM_EXTABLE(14b,30b) |
126 | _ASM_EXTABLE(15b,30b) | 129 | _ASM_EXTABLE(15b,30b) |
127 | _ASM_EXTABLE(16b,30b) | 130 | _ASM_EXTABLE(16b,30b) |
128 | _ASM_EXTABLE(18b,40b) | 131 | _ASM_EXTABLE(18b,40b) |
129 | _ASM_EXTABLE(19b,40b) | 132 | _ASM_EXTABLE(19b,40b) |
130 | _ASM_EXTABLE(21b,50b) | 133 | _ASM_EXTABLE(21b,50b) |
131 | _ASM_EXTABLE(22b,50b) | 134 | _ASM_EXTABLE(22b,50b) |
132 | CFI_ENDPROC | 135 | CFI_ENDPROC |
133 | ENDPROC(__copy_user_nocache) | 136 | ENDPROC(__copy_user_nocache) |
134 | 137 |
arch/x86/lib/getuser.S
1 | /* | 1 | /* |
2 | * __get_user functions. | 2 | * __get_user functions. |
3 | * | 3 | * |
4 | * (C) Copyright 1998 Linus Torvalds | 4 | * (C) Copyright 1998 Linus Torvalds |
5 | * (C) Copyright 2005 Andi Kleen | 5 | * (C) Copyright 2005 Andi Kleen |
6 | * (C) Copyright 2008 Glauber Costa | 6 | * (C) Copyright 2008 Glauber Costa |
7 | * | 7 | * |
8 | * These functions have a non-standard call interface | 8 | * These functions have a non-standard call interface |
9 | * to make them more efficient, especially as they | 9 | * to make them more efficient, especially as they |
10 | * return an error value in addition to the "real" | 10 | * return an error value in addition to the "real" |
11 | * return value. | 11 | * return value. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * __get_user_X | 15 | * __get_user_X |
16 | * | 16 | * |
17 | * Inputs: %[r|e]ax contains the address. | 17 | * Inputs: %[r|e]ax contains the address. |
18 | * The register is modified, but all changes are undone | 18 | * The register is modified, but all changes are undone |
19 | * before returning because the C code doesn't know about it. | 19 | * before returning because the C code doesn't know about it. |
20 | * | 20 | * |
21 | * Outputs: %[r|e]ax is error code (0 or -EFAULT) | 21 | * Outputs: %[r|e]ax is error code (0 or -EFAULT) |
22 | * %[r|e]dx contains zero-extended value | 22 | * %[r|e]dx contains zero-extended value |
23 | * | 23 | * |
24 | * | 24 | * |
25 | * These functions should not modify any other registers, | 25 | * These functions should not modify any other registers, |
26 | * as they get called from within inline assembly. | 26 | * as they get called from within inline assembly. |
27 | */ | 27 | */ |
28 | 28 | ||
29 | #include <linux/linkage.h> | 29 | #include <linux/linkage.h> |
30 | #include <asm/dwarf2.h> | 30 | #include <asm/dwarf2.h> |
31 | #include <asm/page_types.h> | 31 | #include <asm/page_types.h> |
32 | #include <asm/errno.h> | 32 | #include <asm/errno.h> |
33 | #include <asm/asm-offsets.h> | 33 | #include <asm/asm-offsets.h> |
34 | #include <asm/thread_info.h> | 34 | #include <asm/thread_info.h> |
35 | #include <asm/asm.h> | 35 | #include <asm/asm.h> |
36 | #include <asm/smap.h> | ||
36 | 37 | ||
37 | .text | 38 | .text |
38 | ENTRY(__get_user_1) | 39 | ENTRY(__get_user_1) |
39 | CFI_STARTPROC | 40 | CFI_STARTPROC |
40 | GET_THREAD_INFO(%_ASM_DX) | 41 | GET_THREAD_INFO(%_ASM_DX) |
41 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX | 42 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
42 | jae bad_get_user | 43 | jae bad_get_user |
44 | ASM_STAC | ||
43 | 1: movzb (%_ASM_AX),%edx | 45 | 1: movzb (%_ASM_AX),%edx |
44 | xor %eax,%eax | 46 | xor %eax,%eax |
47 | ASM_CLAC | ||
45 | ret | 48 | ret |
46 | CFI_ENDPROC | 49 | CFI_ENDPROC |
47 | ENDPROC(__get_user_1) | 50 | ENDPROC(__get_user_1) |
48 | 51 | ||
49 | ENTRY(__get_user_2) | 52 | ENTRY(__get_user_2) |
50 | CFI_STARTPROC | 53 | CFI_STARTPROC |
51 | add $1,%_ASM_AX | 54 | add $1,%_ASM_AX |
52 | jc bad_get_user | 55 | jc bad_get_user |
53 | GET_THREAD_INFO(%_ASM_DX) | 56 | GET_THREAD_INFO(%_ASM_DX) |
54 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX | 57 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
55 | jae bad_get_user | 58 | jae bad_get_user |
59 | ASM_STAC | ||
56 | 2: movzwl -1(%_ASM_AX),%edx | 60 | 2: movzwl -1(%_ASM_AX),%edx |
57 | xor %eax,%eax | 61 | xor %eax,%eax |
62 | ASM_CLAC | ||
58 | ret | 63 | ret |
59 | CFI_ENDPROC | 64 | CFI_ENDPROC |
60 | ENDPROC(__get_user_2) | 65 | ENDPROC(__get_user_2) |
61 | 66 | ||
62 | ENTRY(__get_user_4) | 67 | ENTRY(__get_user_4) |
63 | CFI_STARTPROC | 68 | CFI_STARTPROC |
64 | add $3,%_ASM_AX | 69 | add $3,%_ASM_AX |
65 | jc bad_get_user | 70 | jc bad_get_user |
66 | GET_THREAD_INFO(%_ASM_DX) | 71 | GET_THREAD_INFO(%_ASM_DX) |
67 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX | 72 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
68 | jae bad_get_user | 73 | jae bad_get_user |
74 | ASM_STAC | ||
69 | 3: mov -3(%_ASM_AX),%edx | 75 | 3: mov -3(%_ASM_AX),%edx |
70 | xor %eax,%eax | 76 | xor %eax,%eax |
77 | ASM_CLAC | ||
71 | ret | 78 | ret |
72 | CFI_ENDPROC | 79 | CFI_ENDPROC |
73 | ENDPROC(__get_user_4) | 80 | ENDPROC(__get_user_4) |
74 | 81 | ||
75 | #ifdef CONFIG_X86_64 | 82 | #ifdef CONFIG_X86_64 |
76 | ENTRY(__get_user_8) | 83 | ENTRY(__get_user_8) |
77 | CFI_STARTPROC | 84 | CFI_STARTPROC |
78 | add $7,%_ASM_AX | 85 | add $7,%_ASM_AX |
79 | jc bad_get_user | 86 | jc bad_get_user |
80 | GET_THREAD_INFO(%_ASM_DX) | 87 | GET_THREAD_INFO(%_ASM_DX) |
81 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX | 88 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
82 | jae bad_get_user | 89 | jae bad_get_user |
90 | ASM_STAC | ||
83 | 4: movq -7(%_ASM_AX),%_ASM_DX | 91 | 4: movq -7(%_ASM_AX),%_ASM_DX |
84 | xor %eax,%eax | 92 | xor %eax,%eax |
93 | ASM_CLAC | ||
85 | ret | 94 | ret |
86 | CFI_ENDPROC | 95 | CFI_ENDPROC |
87 | ENDPROC(__get_user_8) | 96 | ENDPROC(__get_user_8) |
88 | #endif | 97 | #endif |
89 | 98 | ||
90 | bad_get_user: | 99 | bad_get_user: |
91 | CFI_STARTPROC | 100 | CFI_STARTPROC |
92 | xor %edx,%edx | 101 | xor %edx,%edx |
93 | mov $(-EFAULT),%_ASM_AX | 102 | mov $(-EFAULT),%_ASM_AX |
103 | ASM_CLAC | ||
94 | ret | 104 | ret |
95 | CFI_ENDPROC | 105 | CFI_ENDPROC |
96 | END(bad_get_user) | 106 | END(bad_get_user) |
97 | 107 | ||
98 | _ASM_EXTABLE(1b,bad_get_user) | 108 | _ASM_EXTABLE(1b,bad_get_user) |
99 | _ASM_EXTABLE(2b,bad_get_user) | 109 | _ASM_EXTABLE(2b,bad_get_user) |
100 | _ASM_EXTABLE(3b,bad_get_user) | 110 | _ASM_EXTABLE(3b,bad_get_user) |
101 | #ifdef CONFIG_X86_64 | 111 | #ifdef CONFIG_X86_64 |
102 | _ASM_EXTABLE(4b,bad_get_user) | 112 | _ASM_EXTABLE(4b,bad_get_user) |
103 | #endif | 113 | #endif |
104 | 114 |
arch/x86/lib/putuser.S
1 | /* | 1 | /* |
2 | * __put_user functions. | 2 | * __put_user functions. |
3 | * | 3 | * |
4 | * (C) Copyright 2005 Linus Torvalds | 4 | * (C) Copyright 2005 Linus Torvalds |
5 | * (C) Copyright 2005 Andi Kleen | 5 | * (C) Copyright 2005 Andi Kleen |
6 | * (C) Copyright 2008 Glauber Costa | 6 | * (C) Copyright 2008 Glauber Costa |
7 | * | 7 | * |
8 | * These functions have a non-standard call interface | 8 | * These functions have a non-standard call interface |
9 | * to make them more efficient, especially as they | 9 | * to make them more efficient, especially as they |
10 | * return an error value in addition to the "real" | 10 | * return an error value in addition to the "real" |
11 | * return value. | 11 | * return value. |
12 | */ | 12 | */ |
13 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
14 | #include <asm/dwarf2.h> | 14 | #include <asm/dwarf2.h> |
15 | #include <asm/thread_info.h> | 15 | #include <asm/thread_info.h> |
16 | #include <asm/errno.h> | 16 | #include <asm/errno.h> |
17 | #include <asm/asm.h> | 17 | #include <asm/asm.h> |
18 | #include <asm/smap.h> | ||
18 | 19 | ||
19 | 20 | ||
20 | /* | 21 | /* |
21 | * __put_user_X | 22 | * __put_user_X |
22 | * | 23 | * |
23 | * Inputs: %eax[:%edx] contains the data | 24 | * Inputs: %eax[:%edx] contains the data |
24 | * %ecx contains the address | 25 | * %ecx contains the address |
25 | * | 26 | * |
26 | * Outputs: %eax is error code (0 or -EFAULT) | 27 | * Outputs: %eax is error code (0 or -EFAULT) |
27 | * | 28 | * |
28 | * These functions should not modify any other registers, | 29 | * These functions should not modify any other registers, |
29 | * as they get called from within inline assembly. | 30 | * as they get called from within inline assembly. |
30 | */ | 31 | */ |
31 | 32 | ||
32 | #define ENTER CFI_STARTPROC ; \ | 33 | #define ENTER CFI_STARTPROC ; \ |
33 | GET_THREAD_INFO(%_ASM_BX) | 34 | GET_THREAD_INFO(%_ASM_BX) |
34 | #define EXIT ret ; \ | 35 | #define EXIT ASM_CLAC ; \ |
36 | ret ; \ | ||
35 | CFI_ENDPROC | 37 | CFI_ENDPROC |
36 | 38 | ||
37 | .text | 39 | .text |
38 | ENTRY(__put_user_1) | 40 | ENTRY(__put_user_1) |
39 | ENTER | 41 | ENTER |
40 | cmp TI_addr_limit(%_ASM_BX),%_ASM_CX | 42 | cmp TI_addr_limit(%_ASM_BX),%_ASM_CX |
41 | jae bad_put_user | 43 | jae bad_put_user |
44 | ASM_STAC | ||
42 | 1: movb %al,(%_ASM_CX) | 45 | 1: movb %al,(%_ASM_CX) |
43 | xor %eax,%eax | 46 | xor %eax,%eax |
44 | EXIT | 47 | EXIT |
45 | ENDPROC(__put_user_1) | 48 | ENDPROC(__put_user_1) |
46 | 49 | ||
47 | ENTRY(__put_user_2) | 50 | ENTRY(__put_user_2) |
48 | ENTER | 51 | ENTER |
49 | mov TI_addr_limit(%_ASM_BX),%_ASM_BX | 52 | mov TI_addr_limit(%_ASM_BX),%_ASM_BX |
50 | sub $1,%_ASM_BX | 53 | sub $1,%_ASM_BX |
51 | cmp %_ASM_BX,%_ASM_CX | 54 | cmp %_ASM_BX,%_ASM_CX |
52 | jae bad_put_user | 55 | jae bad_put_user |
56 | ASM_STAC | ||
53 | 2: movw %ax,(%_ASM_CX) | 57 | 2: movw %ax,(%_ASM_CX) |
54 | xor %eax,%eax | 58 | xor %eax,%eax |
55 | EXIT | 59 | EXIT |
56 | ENDPROC(__put_user_2) | 60 | ENDPROC(__put_user_2) |
57 | 61 | ||
58 | ENTRY(__put_user_4) | 62 | ENTRY(__put_user_4) |
59 | ENTER | 63 | ENTER |
60 | mov TI_addr_limit(%_ASM_BX),%_ASM_BX | 64 | mov TI_addr_limit(%_ASM_BX),%_ASM_BX |
61 | sub $3,%_ASM_BX | 65 | sub $3,%_ASM_BX |
62 | cmp %_ASM_BX,%_ASM_CX | 66 | cmp %_ASM_BX,%_ASM_CX |
63 | jae bad_put_user | 67 | jae bad_put_user |
68 | ASM_STAC | ||
64 | 3: movl %eax,(%_ASM_CX) | 69 | 3: movl %eax,(%_ASM_CX) |
65 | xor %eax,%eax | 70 | xor %eax,%eax |
66 | EXIT | 71 | EXIT |
67 | ENDPROC(__put_user_4) | 72 | ENDPROC(__put_user_4) |
68 | 73 | ||
69 | ENTRY(__put_user_8) | 74 | ENTRY(__put_user_8) |
70 | ENTER | 75 | ENTER |
71 | mov TI_addr_limit(%_ASM_BX),%_ASM_BX | 76 | mov TI_addr_limit(%_ASM_BX),%_ASM_BX |
72 | sub $7,%_ASM_BX | 77 | sub $7,%_ASM_BX |
73 | cmp %_ASM_BX,%_ASM_CX | 78 | cmp %_ASM_BX,%_ASM_CX |
74 | jae bad_put_user | 79 | jae bad_put_user |
80 | ASM_STAC | ||
75 | 4: mov %_ASM_AX,(%_ASM_CX) | 81 | 4: mov %_ASM_AX,(%_ASM_CX) |
76 | #ifdef CONFIG_X86_32 | 82 | #ifdef CONFIG_X86_32 |
77 | 5: movl %edx,4(%_ASM_CX) | 83 | 5: movl %edx,4(%_ASM_CX) |
78 | #endif | 84 | #endif |
79 | xor %eax,%eax | 85 | xor %eax,%eax |
80 | EXIT | 86 | EXIT |
81 | ENDPROC(__put_user_8) | 87 | ENDPROC(__put_user_8) |
82 | 88 | ||
83 | bad_put_user: | 89 | bad_put_user: |
84 | CFI_STARTPROC | 90 | CFI_STARTPROC |
85 | movl $-EFAULT,%eax | 91 | movl $-EFAULT,%eax |
86 | EXIT | 92 | EXIT |
87 | END(bad_put_user) | 93 | END(bad_put_user) |
88 | 94 | ||
89 | _ASM_EXTABLE(1b,bad_put_user) | 95 | _ASM_EXTABLE(1b,bad_put_user) |
90 | _ASM_EXTABLE(2b,bad_put_user) | 96 | _ASM_EXTABLE(2b,bad_put_user) |
91 | _ASM_EXTABLE(3b,bad_put_user) | 97 | _ASM_EXTABLE(3b,bad_put_user) |
92 | _ASM_EXTABLE(4b,bad_put_user) | 98 | _ASM_EXTABLE(4b,bad_put_user) |
93 | #ifdef CONFIG_X86_32 | 99 | #ifdef CONFIG_X86_32 |
94 | _ASM_EXTABLE(5b,bad_put_user) | 100 | _ASM_EXTABLE(5b,bad_put_user) |
95 | #endif | 101 | #endif |
96 | 102 |
arch/x86/lib/usercopy_32.c
1 | /* | 1 | /* |
2 | * User address space access functions. | 2 | * User address space access functions. |
3 | * The non inlined parts of asm-i386/uaccess.h are here. | 3 | * The non inlined parts of asm-i386/uaccess.h are here. |
4 | * | 4 | * |
5 | * Copyright 1997 Andi Kleen <ak@muc.de> | 5 | * Copyright 1997 Andi Kleen <ak@muc.de> |
6 | * Copyright 1997 Linus Torvalds | 6 | * Copyright 1997 Linus Torvalds |
7 | */ | 7 | */ |
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/highmem.h> | 9 | #include <linux/highmem.h> |
10 | #include <linux/blkdev.h> | 10 | #include <linux/blkdev.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/backing-dev.h> | 12 | #include <linux/backing-dev.h> |
13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
14 | #include <asm/uaccess.h> | 14 | #include <asm/uaccess.h> |
15 | #include <asm/mmx.h> | 15 | #include <asm/mmx.h> |
16 | #include <asm/asm.h> | 16 | #include <asm/asm.h> |
17 | 17 | ||
18 | #ifdef CONFIG_X86_INTEL_USERCOPY | 18 | #ifdef CONFIG_X86_INTEL_USERCOPY |
19 | /* | 19 | /* |
20 | * Alignment at which movsl is preferred for bulk memory copies. | 20 | * Alignment at which movsl is preferred for bulk memory copies. |
21 | */ | 21 | */ |
22 | struct movsl_mask movsl_mask __read_mostly; | 22 | struct movsl_mask movsl_mask __read_mostly; |
23 | #endif | 23 | #endif |
24 | 24 | ||
25 | static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) | 25 | static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) |
26 | { | 26 | { |
27 | #ifdef CONFIG_X86_INTEL_USERCOPY | 27 | #ifdef CONFIG_X86_INTEL_USERCOPY |
28 | if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask)) | 28 | if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask)) |
29 | return 0; | 29 | return 0; |
30 | #endif | 30 | #endif |
31 | return 1; | 31 | return 1; |
32 | } | 32 | } |
33 | #define movsl_is_ok(a1, a2, n) \ | 33 | #define movsl_is_ok(a1, a2, n) \ |
34 | __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) | 34 | __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * Zero Userspace | 37 | * Zero Userspace |
38 | */ | 38 | */ |
39 | 39 | ||
40 | #define __do_clear_user(addr,size) \ | 40 | #define __do_clear_user(addr,size) \ |
41 | do { \ | 41 | do { \ |
42 | int __d0; \ | 42 | int __d0; \ |
43 | might_fault(); \ | 43 | might_fault(); \ |
44 | __asm__ __volatile__( \ | 44 | __asm__ __volatile__( \ |
45 | ASM_STAC "\n" \ | ||
45 | "0: rep; stosl\n" \ | 46 | "0: rep; stosl\n" \ |
46 | " movl %2,%0\n" \ | 47 | " movl %2,%0\n" \ |
47 | "1: rep; stosb\n" \ | 48 | "1: rep; stosb\n" \ |
48 | "2:\n" \ | 49 | "2: " ASM_CLAC "\n" \ |
49 | ".section .fixup,\"ax\"\n" \ | 50 | ".section .fixup,\"ax\"\n" \ |
50 | "3: lea 0(%2,%0,4),%0\n" \ | 51 | "3: lea 0(%2,%0,4),%0\n" \ |
51 | " jmp 2b\n" \ | 52 | " jmp 2b\n" \ |
52 | ".previous\n" \ | 53 | ".previous\n" \ |
53 | _ASM_EXTABLE(0b,3b) \ | 54 | _ASM_EXTABLE(0b,3b) \ |
54 | _ASM_EXTABLE(1b,2b) \ | 55 | _ASM_EXTABLE(1b,2b) \ |
55 | : "=&c"(size), "=&D" (__d0) \ | 56 | : "=&c"(size), "=&D" (__d0) \ |
56 | : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ | 57 | : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ |
57 | } while (0) | 58 | } while (0) |
58 | 59 | ||
59 | /** | 60 | /** |
60 | * clear_user: - Zero a block of memory in user space. | 61 | * clear_user: - Zero a block of memory in user space. |
61 | * @to: Destination address, in user space. | 62 | * @to: Destination address, in user space. |
62 | * @n: Number of bytes to zero. | 63 | * @n: Number of bytes to zero. |
63 | * | 64 | * |
64 | * Zero a block of memory in user space. | 65 | * Zero a block of memory in user space. |
65 | * | 66 | * |
66 | * Returns number of bytes that could not be cleared. | 67 | * Returns number of bytes that could not be cleared. |
67 | * On success, this will be zero. | 68 | * On success, this will be zero. |
68 | */ | 69 | */ |
69 | unsigned long | 70 | unsigned long |
70 | clear_user(void __user *to, unsigned long n) | 71 | clear_user(void __user *to, unsigned long n) |
71 | { | 72 | { |
72 | might_fault(); | 73 | might_fault(); |
73 | if (access_ok(VERIFY_WRITE, to, n)) | 74 | if (access_ok(VERIFY_WRITE, to, n)) |
74 | __do_clear_user(to, n); | 75 | __do_clear_user(to, n); |
75 | return n; | 76 | return n; |
76 | } | 77 | } |
77 | EXPORT_SYMBOL(clear_user); | 78 | EXPORT_SYMBOL(clear_user); |
78 | 79 | ||
79 | /** | 80 | /** |
80 | * __clear_user: - Zero a block of memory in user space, with less checking. | 81 | * __clear_user: - Zero a block of memory in user space, with less checking. |
81 | * @to: Destination address, in user space. | 82 | * @to: Destination address, in user space. |
82 | * @n: Number of bytes to zero. | 83 | * @n: Number of bytes to zero. |
83 | * | 84 | * |
84 | * Zero a block of memory in user space. Caller must check | 85 | * Zero a block of memory in user space. Caller must check |
85 | * the specified block with access_ok() before calling this function. | 86 | * the specified block with access_ok() before calling this function. |
86 | * | 87 | * |
87 | * Returns number of bytes that could not be cleared. | 88 | * Returns number of bytes that could not be cleared. |
88 | * On success, this will be zero. | 89 | * On success, this will be zero. |
89 | */ | 90 | */ |
90 | unsigned long | 91 | unsigned long |
91 | __clear_user(void __user *to, unsigned long n) | 92 | __clear_user(void __user *to, unsigned long n) |
92 | { | 93 | { |
93 | __do_clear_user(to, n); | 94 | __do_clear_user(to, n); |
94 | return n; | 95 | return n; |
95 | } | 96 | } |
96 | EXPORT_SYMBOL(__clear_user); | 97 | EXPORT_SYMBOL(__clear_user); |
97 | 98 | ||
98 | #ifdef CONFIG_X86_INTEL_USERCOPY | 99 | #ifdef CONFIG_X86_INTEL_USERCOPY |
99 | static unsigned long | 100 | static unsigned long |
100 | __copy_user_intel(void __user *to, const void *from, unsigned long size) | 101 | __copy_user_intel(void __user *to, const void *from, unsigned long size) |
101 | { | 102 | { |
102 | int d0, d1; | 103 | int d0, d1; |
103 | __asm__ __volatile__( | 104 | __asm__ __volatile__( |
104 | " .align 2,0x90\n" | 105 | " .align 2,0x90\n" |
105 | "1: movl 32(%4), %%eax\n" | 106 | "1: movl 32(%4), %%eax\n" |
106 | " cmpl $67, %0\n" | 107 | " cmpl $67, %0\n" |
107 | " jbe 3f\n" | 108 | " jbe 3f\n" |
108 | "2: movl 64(%4), %%eax\n" | 109 | "2: movl 64(%4), %%eax\n" |
109 | " .align 2,0x90\n" | 110 | " .align 2,0x90\n" |
110 | "3: movl 0(%4), %%eax\n" | 111 | "3: movl 0(%4), %%eax\n" |
111 | "4: movl 4(%4), %%edx\n" | 112 | "4: movl 4(%4), %%edx\n" |
112 | "5: movl %%eax, 0(%3)\n" | 113 | "5: movl %%eax, 0(%3)\n" |
113 | "6: movl %%edx, 4(%3)\n" | 114 | "6: movl %%edx, 4(%3)\n" |
114 | "7: movl 8(%4), %%eax\n" | 115 | "7: movl 8(%4), %%eax\n" |
115 | "8: movl 12(%4),%%edx\n" | 116 | "8: movl 12(%4),%%edx\n" |
116 | "9: movl %%eax, 8(%3)\n" | 117 | "9: movl %%eax, 8(%3)\n" |
117 | "10: movl %%edx, 12(%3)\n" | 118 | "10: movl %%edx, 12(%3)\n" |
118 | "11: movl 16(%4), %%eax\n" | 119 | "11: movl 16(%4), %%eax\n" |
119 | "12: movl 20(%4), %%edx\n" | 120 | "12: movl 20(%4), %%edx\n" |
120 | "13: movl %%eax, 16(%3)\n" | 121 | "13: movl %%eax, 16(%3)\n" |
121 | "14: movl %%edx, 20(%3)\n" | 122 | "14: movl %%edx, 20(%3)\n" |
122 | "15: movl 24(%4), %%eax\n" | 123 | "15: movl 24(%4), %%eax\n" |
123 | "16: movl 28(%4), %%edx\n" | 124 | "16: movl 28(%4), %%edx\n" |
124 | "17: movl %%eax, 24(%3)\n" | 125 | "17: movl %%eax, 24(%3)\n" |
125 | "18: movl %%edx, 28(%3)\n" | 126 | "18: movl %%edx, 28(%3)\n" |
126 | "19: movl 32(%4), %%eax\n" | 127 | "19: movl 32(%4), %%eax\n" |
127 | "20: movl 36(%4), %%edx\n" | 128 | "20: movl 36(%4), %%edx\n" |
128 | "21: movl %%eax, 32(%3)\n" | 129 | "21: movl %%eax, 32(%3)\n" |
129 | "22: movl %%edx, 36(%3)\n" | 130 | "22: movl %%edx, 36(%3)\n" |
130 | "23: movl 40(%4), %%eax\n" | 131 | "23: movl 40(%4), %%eax\n" |
131 | "24: movl 44(%4), %%edx\n" | 132 | "24: movl 44(%4), %%edx\n" |
132 | "25: movl %%eax, 40(%3)\n" | 133 | "25: movl %%eax, 40(%3)\n" |
133 | "26: movl %%edx, 44(%3)\n" | 134 | "26: movl %%edx, 44(%3)\n" |
134 | "27: movl 48(%4), %%eax\n" | 135 | "27: movl 48(%4), %%eax\n" |
135 | "28: movl 52(%4), %%edx\n" | 136 | "28: movl 52(%4), %%edx\n" |
136 | "29: movl %%eax, 48(%3)\n" | 137 | "29: movl %%eax, 48(%3)\n" |
137 | "30: movl %%edx, 52(%3)\n" | 138 | "30: movl %%edx, 52(%3)\n" |
138 | "31: movl 56(%4), %%eax\n" | 139 | "31: movl 56(%4), %%eax\n" |
139 | "32: movl 60(%4), %%edx\n" | 140 | "32: movl 60(%4), %%edx\n" |
140 | "33: movl %%eax, 56(%3)\n" | 141 | "33: movl %%eax, 56(%3)\n" |
141 | "34: movl %%edx, 60(%3)\n" | 142 | "34: movl %%edx, 60(%3)\n" |
142 | " addl $-64, %0\n" | 143 | " addl $-64, %0\n" |
143 | " addl $64, %4\n" | 144 | " addl $64, %4\n" |
144 | " addl $64, %3\n" | 145 | " addl $64, %3\n" |
145 | " cmpl $63, %0\n" | 146 | " cmpl $63, %0\n" |
146 | " ja 1b\n" | 147 | " ja 1b\n" |
147 | "35: movl %0, %%eax\n" | 148 | "35: movl %0, %%eax\n" |
148 | " shrl $2, %0\n" | 149 | " shrl $2, %0\n" |
149 | " andl $3, %%eax\n" | 150 | " andl $3, %%eax\n" |
150 | " cld\n" | 151 | " cld\n" |
151 | "99: rep; movsl\n" | 152 | "99: rep; movsl\n" |
152 | "36: movl %%eax, %0\n" | 153 | "36: movl %%eax, %0\n" |
153 | "37: rep; movsb\n" | 154 | "37: rep; movsb\n" |
154 | "100:\n" | 155 | "100:\n" |
155 | ".section .fixup,\"ax\"\n" | 156 | ".section .fixup,\"ax\"\n" |
156 | "101: lea 0(%%eax,%0,4),%0\n" | 157 | "101: lea 0(%%eax,%0,4),%0\n" |
157 | " jmp 100b\n" | 158 | " jmp 100b\n" |
158 | ".previous\n" | 159 | ".previous\n" |
159 | _ASM_EXTABLE(1b,100b) | 160 | _ASM_EXTABLE(1b,100b) |
160 | _ASM_EXTABLE(2b,100b) | 161 | _ASM_EXTABLE(2b,100b) |
161 | _ASM_EXTABLE(3b,100b) | 162 | _ASM_EXTABLE(3b,100b) |
162 | _ASM_EXTABLE(4b,100b) | 163 | _ASM_EXTABLE(4b,100b) |
163 | _ASM_EXTABLE(5b,100b) | 164 | _ASM_EXTABLE(5b,100b) |
164 | _ASM_EXTABLE(6b,100b) | 165 | _ASM_EXTABLE(6b,100b) |
165 | _ASM_EXTABLE(7b,100b) | 166 | _ASM_EXTABLE(7b,100b) |
166 | _ASM_EXTABLE(8b,100b) | 167 | _ASM_EXTABLE(8b,100b) |
167 | _ASM_EXTABLE(9b,100b) | 168 | _ASM_EXTABLE(9b,100b) |
168 | _ASM_EXTABLE(10b,100b) | 169 | _ASM_EXTABLE(10b,100b) |
169 | _ASM_EXTABLE(11b,100b) | 170 | _ASM_EXTABLE(11b,100b) |
170 | _ASM_EXTABLE(12b,100b) | 171 | _ASM_EXTABLE(12b,100b) |
171 | _ASM_EXTABLE(13b,100b) | 172 | _ASM_EXTABLE(13b,100b) |
172 | _ASM_EXTABLE(14b,100b) | 173 | _ASM_EXTABLE(14b,100b) |
173 | _ASM_EXTABLE(15b,100b) | 174 | _ASM_EXTABLE(15b,100b) |
174 | _ASM_EXTABLE(16b,100b) | 175 | _ASM_EXTABLE(16b,100b) |
175 | _ASM_EXTABLE(17b,100b) | 176 | _ASM_EXTABLE(17b,100b) |
176 | _ASM_EXTABLE(18b,100b) | 177 | _ASM_EXTABLE(18b,100b) |
177 | _ASM_EXTABLE(19b,100b) | 178 | _ASM_EXTABLE(19b,100b) |
178 | _ASM_EXTABLE(20b,100b) | 179 | _ASM_EXTABLE(20b,100b) |
179 | _ASM_EXTABLE(21b,100b) | 180 | _ASM_EXTABLE(21b,100b) |
180 | _ASM_EXTABLE(22b,100b) | 181 | _ASM_EXTABLE(22b,100b) |
181 | _ASM_EXTABLE(23b,100b) | 182 | _ASM_EXTABLE(23b,100b) |
182 | _ASM_EXTABLE(24b,100b) | 183 | _ASM_EXTABLE(24b,100b) |
183 | _ASM_EXTABLE(25b,100b) | 184 | _ASM_EXTABLE(25b,100b) |
184 | _ASM_EXTABLE(26b,100b) | 185 | _ASM_EXTABLE(26b,100b) |
185 | _ASM_EXTABLE(27b,100b) | 186 | _ASM_EXTABLE(27b,100b) |
186 | _ASM_EXTABLE(28b,100b) | 187 | _ASM_EXTABLE(28b,100b) |
187 | _ASM_EXTABLE(29b,100b) | 188 | _ASM_EXTABLE(29b,100b) |
188 | _ASM_EXTABLE(30b,100b) | 189 | _ASM_EXTABLE(30b,100b) |
189 | _ASM_EXTABLE(31b,100b) | 190 | _ASM_EXTABLE(31b,100b) |
190 | _ASM_EXTABLE(32b,100b) | 191 | _ASM_EXTABLE(32b,100b) |
191 | _ASM_EXTABLE(33b,100b) | 192 | _ASM_EXTABLE(33b,100b) |
192 | _ASM_EXTABLE(34b,100b) | 193 | _ASM_EXTABLE(34b,100b) |
193 | _ASM_EXTABLE(35b,100b) | 194 | _ASM_EXTABLE(35b,100b) |
194 | _ASM_EXTABLE(36b,100b) | 195 | _ASM_EXTABLE(36b,100b) |
195 | _ASM_EXTABLE(37b,100b) | 196 | _ASM_EXTABLE(37b,100b) |
196 | _ASM_EXTABLE(99b,101b) | 197 | _ASM_EXTABLE(99b,101b) |
197 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | 198 | : "=&c"(size), "=&D" (d0), "=&S" (d1) |
198 | : "1"(to), "2"(from), "0"(size) | 199 | : "1"(to), "2"(from), "0"(size) |
199 | : "eax", "edx", "memory"); | 200 | : "eax", "edx", "memory"); |
200 | return size; | 201 | return size; |
201 | } | 202 | } |
202 | 203 | ||
203 | static unsigned long | 204 | static unsigned long |
204 | __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) | 205 | __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) |
205 | { | 206 | { |
206 | int d0, d1; | 207 | int d0, d1; |
207 | __asm__ __volatile__( | 208 | __asm__ __volatile__( |
208 | " .align 2,0x90\n" | 209 | " .align 2,0x90\n" |
209 | "0: movl 32(%4), %%eax\n" | 210 | "0: movl 32(%4), %%eax\n" |
210 | " cmpl $67, %0\n" | 211 | " cmpl $67, %0\n" |
211 | " jbe 2f\n" | 212 | " jbe 2f\n" |
212 | "1: movl 64(%4), %%eax\n" | 213 | "1: movl 64(%4), %%eax\n" |
213 | " .align 2,0x90\n" | 214 | " .align 2,0x90\n" |
214 | "2: movl 0(%4), %%eax\n" | 215 | "2: movl 0(%4), %%eax\n" |
215 | "21: movl 4(%4), %%edx\n" | 216 | "21: movl 4(%4), %%edx\n" |
216 | " movl %%eax, 0(%3)\n" | 217 | " movl %%eax, 0(%3)\n" |
217 | " movl %%edx, 4(%3)\n" | 218 | " movl %%edx, 4(%3)\n" |
218 | "3: movl 8(%4), %%eax\n" | 219 | "3: movl 8(%4), %%eax\n" |
219 | "31: movl 12(%4),%%edx\n" | 220 | "31: movl 12(%4),%%edx\n" |
220 | " movl %%eax, 8(%3)\n" | 221 | " movl %%eax, 8(%3)\n" |
221 | " movl %%edx, 12(%3)\n" | 222 | " movl %%edx, 12(%3)\n" |
222 | "4: movl 16(%4), %%eax\n" | 223 | "4: movl 16(%4), %%eax\n" |
223 | "41: movl 20(%4), %%edx\n" | 224 | "41: movl 20(%4), %%edx\n" |
224 | " movl %%eax, 16(%3)\n" | 225 | " movl %%eax, 16(%3)\n" |
225 | " movl %%edx, 20(%3)\n" | 226 | " movl %%edx, 20(%3)\n" |
226 | "10: movl 24(%4), %%eax\n" | 227 | "10: movl 24(%4), %%eax\n" |
227 | "51: movl 28(%4), %%edx\n" | 228 | "51: movl 28(%4), %%edx\n" |
228 | " movl %%eax, 24(%3)\n" | 229 | " movl %%eax, 24(%3)\n" |
229 | " movl %%edx, 28(%3)\n" | 230 | " movl %%edx, 28(%3)\n" |
230 | "11: movl 32(%4), %%eax\n" | 231 | "11: movl 32(%4), %%eax\n" |
231 | "61: movl 36(%4), %%edx\n" | 232 | "61: movl 36(%4), %%edx\n" |
232 | " movl %%eax, 32(%3)\n" | 233 | " movl %%eax, 32(%3)\n" |
233 | " movl %%edx, 36(%3)\n" | 234 | " movl %%edx, 36(%3)\n" |
234 | "12: movl 40(%4), %%eax\n" | 235 | "12: movl 40(%4), %%eax\n" |
235 | "71: movl 44(%4), %%edx\n" | 236 | "71: movl 44(%4), %%edx\n" |
236 | " movl %%eax, 40(%3)\n" | 237 | " movl %%eax, 40(%3)\n" |
237 | " movl %%edx, 44(%3)\n" | 238 | " movl %%edx, 44(%3)\n" |
238 | "13: movl 48(%4), %%eax\n" | 239 | "13: movl 48(%4), %%eax\n" |
239 | "81: movl 52(%4), %%edx\n" | 240 | "81: movl 52(%4), %%edx\n" |
240 | " movl %%eax, 48(%3)\n" | 241 | " movl %%eax, 48(%3)\n" |
241 | " movl %%edx, 52(%3)\n" | 242 | " movl %%edx, 52(%3)\n" |
242 | "14: movl 56(%4), %%eax\n" | 243 | "14: movl 56(%4), %%eax\n" |
243 | "91: movl 60(%4), %%edx\n" | 244 | "91: movl 60(%4), %%edx\n" |
244 | " movl %%eax, 56(%3)\n" | 245 | " movl %%eax, 56(%3)\n" |
245 | " movl %%edx, 60(%3)\n" | 246 | " movl %%edx, 60(%3)\n" |
246 | " addl $-64, %0\n" | 247 | " addl $-64, %0\n" |
247 | " addl $64, %4\n" | 248 | " addl $64, %4\n" |
248 | " addl $64, %3\n" | 249 | " addl $64, %3\n" |
249 | " cmpl $63, %0\n" | 250 | " cmpl $63, %0\n" |
250 | " ja 0b\n" | 251 | " ja 0b\n" |
251 | "5: movl %0, %%eax\n" | 252 | "5: movl %0, %%eax\n" |
252 | " shrl $2, %0\n" | 253 | " shrl $2, %0\n" |
253 | " andl $3, %%eax\n" | 254 | " andl $3, %%eax\n" |
254 | " cld\n" | 255 | " cld\n" |
255 | "6: rep; movsl\n" | 256 | "6: rep; movsl\n" |
256 | " movl %%eax,%0\n" | 257 | " movl %%eax,%0\n" |
257 | "7: rep; movsb\n" | 258 | "7: rep; movsb\n" |
258 | "8:\n" | 259 | "8:\n" |
259 | ".section .fixup,\"ax\"\n" | 260 | ".section .fixup,\"ax\"\n" |
260 | "9: lea 0(%%eax,%0,4),%0\n" | 261 | "9: lea 0(%%eax,%0,4),%0\n" |
261 | "16: pushl %0\n" | 262 | "16: pushl %0\n" |
262 | " pushl %%eax\n" | 263 | " pushl %%eax\n" |
263 | " xorl %%eax,%%eax\n" | 264 | " xorl %%eax,%%eax\n" |
264 | " rep; stosb\n" | 265 | " rep; stosb\n" |
265 | " popl %%eax\n" | 266 | " popl %%eax\n" |
266 | " popl %0\n" | 267 | " popl %0\n" |
267 | " jmp 8b\n" | 268 | " jmp 8b\n" |
268 | ".previous\n" | 269 | ".previous\n" |
269 | _ASM_EXTABLE(0b,16b) | 270 | _ASM_EXTABLE(0b,16b) |
270 | _ASM_EXTABLE(1b,16b) | 271 | _ASM_EXTABLE(1b,16b) |
271 | _ASM_EXTABLE(2b,16b) | 272 | _ASM_EXTABLE(2b,16b) |
272 | _ASM_EXTABLE(21b,16b) | 273 | _ASM_EXTABLE(21b,16b) |
273 | _ASM_EXTABLE(3b,16b) | 274 | _ASM_EXTABLE(3b,16b) |
274 | _ASM_EXTABLE(31b,16b) | 275 | _ASM_EXTABLE(31b,16b) |
275 | _ASM_EXTABLE(4b,16b) | 276 | _ASM_EXTABLE(4b,16b) |
276 | _ASM_EXTABLE(41b,16b) | 277 | _ASM_EXTABLE(41b,16b) |
277 | _ASM_EXTABLE(10b,16b) | 278 | _ASM_EXTABLE(10b,16b) |
278 | _ASM_EXTABLE(51b,16b) | 279 | _ASM_EXTABLE(51b,16b) |
279 | _ASM_EXTABLE(11b,16b) | 280 | _ASM_EXTABLE(11b,16b) |
280 | _ASM_EXTABLE(61b,16b) | 281 | _ASM_EXTABLE(61b,16b) |
281 | _ASM_EXTABLE(12b,16b) | 282 | _ASM_EXTABLE(12b,16b) |
282 | _ASM_EXTABLE(71b,16b) | 283 | _ASM_EXTABLE(71b,16b) |
283 | _ASM_EXTABLE(13b,16b) | 284 | _ASM_EXTABLE(13b,16b) |
284 | _ASM_EXTABLE(81b,16b) | 285 | _ASM_EXTABLE(81b,16b) |
285 | _ASM_EXTABLE(14b,16b) | 286 | _ASM_EXTABLE(14b,16b) |
286 | _ASM_EXTABLE(91b,16b) | 287 | _ASM_EXTABLE(91b,16b) |
287 | _ASM_EXTABLE(6b,9b) | 288 | _ASM_EXTABLE(6b,9b) |
288 | _ASM_EXTABLE(7b,16b) | 289 | _ASM_EXTABLE(7b,16b) |
289 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | 290 | : "=&c"(size), "=&D" (d0), "=&S" (d1) |
290 | : "1"(to), "2"(from), "0"(size) | 291 | : "1"(to), "2"(from), "0"(size) |
291 | : "eax", "edx", "memory"); | 292 | : "eax", "edx", "memory"); |
292 | return size; | 293 | return size; |
293 | } | 294 | } |
294 | 295 | ||
295 | /* | 296 | /* |
296 | * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware. | 297 | * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware. |
297 | * hyoshiok@miraclelinux.com | 298 | * hyoshiok@miraclelinux.com |
298 | */ | 299 | */ |
299 | 300 | ||
300 | static unsigned long __copy_user_zeroing_intel_nocache(void *to, | 301 | static unsigned long __copy_user_zeroing_intel_nocache(void *to, |
301 | const void __user *from, unsigned long size) | 302 | const void __user *from, unsigned long size) |
302 | { | 303 | { |
303 | int d0, d1; | 304 | int d0, d1; |
304 | 305 | ||
305 | __asm__ __volatile__( | 306 | __asm__ __volatile__( |
306 | " .align 2,0x90\n" | 307 | " .align 2,0x90\n" |
307 | "0: movl 32(%4), %%eax\n" | 308 | "0: movl 32(%4), %%eax\n" |
308 | " cmpl $67, %0\n" | 309 | " cmpl $67, %0\n" |
309 | " jbe 2f\n" | 310 | " jbe 2f\n" |
310 | "1: movl 64(%4), %%eax\n" | 311 | "1: movl 64(%4), %%eax\n" |
311 | " .align 2,0x90\n" | 312 | " .align 2,0x90\n" |
312 | "2: movl 0(%4), %%eax\n" | 313 | "2: movl 0(%4), %%eax\n" |
313 | "21: movl 4(%4), %%edx\n" | 314 | "21: movl 4(%4), %%edx\n" |
314 | " movnti %%eax, 0(%3)\n" | 315 | " movnti %%eax, 0(%3)\n" |
315 | " movnti %%edx, 4(%3)\n" | 316 | " movnti %%edx, 4(%3)\n" |
316 | "3: movl 8(%4), %%eax\n" | 317 | "3: movl 8(%4), %%eax\n" |
317 | "31: movl 12(%4),%%edx\n" | 318 | "31: movl 12(%4),%%edx\n" |
318 | " movnti %%eax, 8(%3)\n" | 319 | " movnti %%eax, 8(%3)\n" |
319 | " movnti %%edx, 12(%3)\n" | 320 | " movnti %%edx, 12(%3)\n" |
320 | "4: movl 16(%4), %%eax\n" | 321 | "4: movl 16(%4), %%eax\n" |
321 | "41: movl 20(%4), %%edx\n" | 322 | "41: movl 20(%4), %%edx\n" |
322 | " movnti %%eax, 16(%3)\n" | 323 | " movnti %%eax, 16(%3)\n" |
323 | " movnti %%edx, 20(%3)\n" | 324 | " movnti %%edx, 20(%3)\n" |
324 | "10: movl 24(%4), %%eax\n" | 325 | "10: movl 24(%4), %%eax\n" |
325 | "51: movl 28(%4), %%edx\n" | 326 | "51: movl 28(%4), %%edx\n" |
326 | " movnti %%eax, 24(%3)\n" | 327 | " movnti %%eax, 24(%3)\n" |
327 | " movnti %%edx, 28(%3)\n" | 328 | " movnti %%edx, 28(%3)\n" |
328 | "11: movl 32(%4), %%eax\n" | 329 | "11: movl 32(%4), %%eax\n" |
329 | "61: movl 36(%4), %%edx\n" | 330 | "61: movl 36(%4), %%edx\n" |
330 | " movnti %%eax, 32(%3)\n" | 331 | " movnti %%eax, 32(%3)\n" |
331 | " movnti %%edx, 36(%3)\n" | 332 | " movnti %%edx, 36(%3)\n" |
332 | "12: movl 40(%4), %%eax\n" | 333 | "12: movl 40(%4), %%eax\n" |
333 | "71: movl 44(%4), %%edx\n" | 334 | "71: movl 44(%4), %%edx\n" |
334 | " movnti %%eax, 40(%3)\n" | 335 | " movnti %%eax, 40(%3)\n" |
335 | " movnti %%edx, 44(%3)\n" | 336 | " movnti %%edx, 44(%3)\n" |
336 | "13: movl 48(%4), %%eax\n" | 337 | "13: movl 48(%4), %%eax\n" |
337 | "81: movl 52(%4), %%edx\n" | 338 | "81: movl 52(%4), %%edx\n" |
338 | " movnti %%eax, 48(%3)\n" | 339 | " movnti %%eax, 48(%3)\n" |
339 | " movnti %%edx, 52(%3)\n" | 340 | " movnti %%edx, 52(%3)\n" |
340 | "14: movl 56(%4), %%eax\n" | 341 | "14: movl 56(%4), %%eax\n" |
341 | "91: movl 60(%4), %%edx\n" | 342 | "91: movl 60(%4), %%edx\n" |
342 | " movnti %%eax, 56(%3)\n" | 343 | " movnti %%eax, 56(%3)\n" |
343 | " movnti %%edx, 60(%3)\n" | 344 | " movnti %%edx, 60(%3)\n" |
344 | " addl $-64, %0\n" | 345 | " addl $-64, %0\n" |
345 | " addl $64, %4\n" | 346 | " addl $64, %4\n" |
346 | " addl $64, %3\n" | 347 | " addl $64, %3\n" |
347 | " cmpl $63, %0\n" | 348 | " cmpl $63, %0\n" |
348 | " ja 0b\n" | 349 | " ja 0b\n" |
349 | " sfence \n" | 350 | " sfence \n" |
350 | "5: movl %0, %%eax\n" | 351 | "5: movl %0, %%eax\n" |
351 | " shrl $2, %0\n" | 352 | " shrl $2, %0\n" |
352 | " andl $3, %%eax\n" | 353 | " andl $3, %%eax\n" |
353 | " cld\n" | 354 | " cld\n" |
354 | "6: rep; movsl\n" | 355 | "6: rep; movsl\n" |
355 | " movl %%eax,%0\n" | 356 | " movl %%eax,%0\n" |
356 | "7: rep; movsb\n" | 357 | "7: rep; movsb\n" |
357 | "8:\n" | 358 | "8:\n" |
358 | ".section .fixup,\"ax\"\n" | 359 | ".section .fixup,\"ax\"\n" |
359 | "9: lea 0(%%eax,%0,4),%0\n" | 360 | "9: lea 0(%%eax,%0,4),%0\n" |
360 | "16: pushl %0\n" | 361 | "16: pushl %0\n" |
361 | " pushl %%eax\n" | 362 | " pushl %%eax\n" |
362 | " xorl %%eax,%%eax\n" | 363 | " xorl %%eax,%%eax\n" |
363 | " rep; stosb\n" | 364 | " rep; stosb\n" |
364 | " popl %%eax\n" | 365 | " popl %%eax\n" |
365 | " popl %0\n" | 366 | " popl %0\n" |
366 | " jmp 8b\n" | 367 | " jmp 8b\n" |
367 | ".previous\n" | 368 | ".previous\n" |
368 | _ASM_EXTABLE(0b,16b) | 369 | _ASM_EXTABLE(0b,16b) |
369 | _ASM_EXTABLE(1b,16b) | 370 | _ASM_EXTABLE(1b,16b) |
370 | _ASM_EXTABLE(2b,16b) | 371 | _ASM_EXTABLE(2b,16b) |
371 | _ASM_EXTABLE(21b,16b) | 372 | _ASM_EXTABLE(21b,16b) |
372 | _ASM_EXTABLE(3b,16b) | 373 | _ASM_EXTABLE(3b,16b) |
373 | _ASM_EXTABLE(31b,16b) | 374 | _ASM_EXTABLE(31b,16b) |
374 | _ASM_EXTABLE(4b,16b) | 375 | _ASM_EXTABLE(4b,16b) |
375 | _ASM_EXTABLE(41b,16b) | 376 | _ASM_EXTABLE(41b,16b) |
376 | _ASM_EXTABLE(10b,16b) | 377 | _ASM_EXTABLE(10b,16b) |
377 | _ASM_EXTABLE(51b,16b) | 378 | _ASM_EXTABLE(51b,16b) |
378 | _ASM_EXTABLE(11b,16b) | 379 | _ASM_EXTABLE(11b,16b) |
379 | _ASM_EXTABLE(61b,16b) | 380 | _ASM_EXTABLE(61b,16b) |
380 | _ASM_EXTABLE(12b,16b) | 381 | _ASM_EXTABLE(12b,16b) |
381 | _ASM_EXTABLE(71b,16b) | 382 | _ASM_EXTABLE(71b,16b) |
382 | _ASM_EXTABLE(13b,16b) | 383 | _ASM_EXTABLE(13b,16b) |
383 | _ASM_EXTABLE(81b,16b) | 384 | _ASM_EXTABLE(81b,16b) |
384 | _ASM_EXTABLE(14b,16b) | 385 | _ASM_EXTABLE(14b,16b) |
385 | _ASM_EXTABLE(91b,16b) | 386 | _ASM_EXTABLE(91b,16b) |
386 | _ASM_EXTABLE(6b,9b) | 387 | _ASM_EXTABLE(6b,9b) |
387 | _ASM_EXTABLE(7b,16b) | 388 | _ASM_EXTABLE(7b,16b) |
388 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | 389 | : "=&c"(size), "=&D" (d0), "=&S" (d1) |
389 | : "1"(to), "2"(from), "0"(size) | 390 | : "1"(to), "2"(from), "0"(size) |
390 | : "eax", "edx", "memory"); | 391 | : "eax", "edx", "memory"); |
391 | return size; | 392 | return size; |
392 | } | 393 | } |
393 | 394 | ||
394 | static unsigned long __copy_user_intel_nocache(void *to, | 395 | static unsigned long __copy_user_intel_nocache(void *to, |
395 | const void __user *from, unsigned long size) | 396 | const void __user *from, unsigned long size) |
396 | { | 397 | { |
397 | int d0, d1; | 398 | int d0, d1; |
398 | 399 | ||
399 | __asm__ __volatile__( | 400 | __asm__ __volatile__( |
400 | " .align 2,0x90\n" | 401 | " .align 2,0x90\n" |
401 | "0: movl 32(%4), %%eax\n" | 402 | "0: movl 32(%4), %%eax\n" |
402 | " cmpl $67, %0\n" | 403 | " cmpl $67, %0\n" |
403 | " jbe 2f\n" | 404 | " jbe 2f\n" |
404 | "1: movl 64(%4), %%eax\n" | 405 | "1: movl 64(%4), %%eax\n" |
405 | " .align 2,0x90\n" | 406 | " .align 2,0x90\n" |
406 | "2: movl 0(%4), %%eax\n" | 407 | "2: movl 0(%4), %%eax\n" |
407 | "21: movl 4(%4), %%edx\n" | 408 | "21: movl 4(%4), %%edx\n" |
408 | " movnti %%eax, 0(%3)\n" | 409 | " movnti %%eax, 0(%3)\n" |
409 | " movnti %%edx, 4(%3)\n" | 410 | " movnti %%edx, 4(%3)\n" |
410 | "3: movl 8(%4), %%eax\n" | 411 | "3: movl 8(%4), %%eax\n" |
411 | "31: movl 12(%4),%%edx\n" | 412 | "31: movl 12(%4),%%edx\n" |
412 | " movnti %%eax, 8(%3)\n" | 413 | " movnti %%eax, 8(%3)\n" |
413 | " movnti %%edx, 12(%3)\n" | 414 | " movnti %%edx, 12(%3)\n" |
414 | "4: movl 16(%4), %%eax\n" | 415 | "4: movl 16(%4), %%eax\n" |
415 | "41: movl 20(%4), %%edx\n" | 416 | "41: movl 20(%4), %%edx\n" |
416 | " movnti %%eax, 16(%3)\n" | 417 | " movnti %%eax, 16(%3)\n" |
417 | " movnti %%edx, 20(%3)\n" | 418 | " movnti %%edx, 20(%3)\n" |
418 | "10: movl 24(%4), %%eax\n" | 419 | "10: movl 24(%4), %%eax\n" |
419 | "51: movl 28(%4), %%edx\n" | 420 | "51: movl 28(%4), %%edx\n" |
420 | " movnti %%eax, 24(%3)\n" | 421 | " movnti %%eax, 24(%3)\n" |
421 | " movnti %%edx, 28(%3)\n" | 422 | " movnti %%edx, 28(%3)\n" |
422 | "11: movl 32(%4), %%eax\n" | 423 | "11: movl 32(%4), %%eax\n" |
423 | "61: movl 36(%4), %%edx\n" | 424 | "61: movl 36(%4), %%edx\n" |
424 | " movnti %%eax, 32(%3)\n" | 425 | " movnti %%eax, 32(%3)\n" |
425 | " movnti %%edx, 36(%3)\n" | 426 | " movnti %%edx, 36(%3)\n" |
426 | "12: movl 40(%4), %%eax\n" | 427 | "12: movl 40(%4), %%eax\n" |
427 | "71: movl 44(%4), %%edx\n" | 428 | "71: movl 44(%4), %%edx\n" |
428 | " movnti %%eax, 40(%3)\n" | 429 | " movnti %%eax, 40(%3)\n" |
429 | " movnti %%edx, 44(%3)\n" | 430 | " movnti %%edx, 44(%3)\n" |
430 | "13: movl 48(%4), %%eax\n" | 431 | "13: movl 48(%4), %%eax\n" |
431 | "81: movl 52(%4), %%edx\n" | 432 | "81: movl 52(%4), %%edx\n" |
432 | " movnti %%eax, 48(%3)\n" | 433 | " movnti %%eax, 48(%3)\n" |
433 | " movnti %%edx, 52(%3)\n" | 434 | " movnti %%edx, 52(%3)\n" |
434 | "14: movl 56(%4), %%eax\n" | 435 | "14: movl 56(%4), %%eax\n" |
435 | "91: movl 60(%4), %%edx\n" | 436 | "91: movl 60(%4), %%edx\n" |
436 | " movnti %%eax, 56(%3)\n" | 437 | " movnti %%eax, 56(%3)\n" |
437 | " movnti %%edx, 60(%3)\n" | 438 | " movnti %%edx, 60(%3)\n" |
438 | " addl $-64, %0\n" | 439 | " addl $-64, %0\n" |
439 | " addl $64, %4\n" | 440 | " addl $64, %4\n" |
440 | " addl $64, %3\n" | 441 | " addl $64, %3\n" |
441 | " cmpl $63, %0\n" | 442 | " cmpl $63, %0\n" |
442 | " ja 0b\n" | 443 | " ja 0b\n" |
443 | " sfence \n" | 444 | " sfence \n" |
444 | "5: movl %0, %%eax\n" | 445 | "5: movl %0, %%eax\n" |
445 | " shrl $2, %0\n" | 446 | " shrl $2, %0\n" |
446 | " andl $3, %%eax\n" | 447 | " andl $3, %%eax\n" |
447 | " cld\n" | 448 | " cld\n" |
448 | "6: rep; movsl\n" | 449 | "6: rep; movsl\n" |
449 | " movl %%eax,%0\n" | 450 | " movl %%eax,%0\n" |
450 | "7: rep; movsb\n" | 451 | "7: rep; movsb\n" |
451 | "8:\n" | 452 | "8:\n" |
452 | ".section .fixup,\"ax\"\n" | 453 | ".section .fixup,\"ax\"\n" |
453 | "9: lea 0(%%eax,%0,4),%0\n" | 454 | "9: lea 0(%%eax,%0,4),%0\n" |
454 | "16: jmp 8b\n" | 455 | "16: jmp 8b\n" |
455 | ".previous\n" | 456 | ".previous\n" |
456 | _ASM_EXTABLE(0b,16b) | 457 | _ASM_EXTABLE(0b,16b) |
457 | _ASM_EXTABLE(1b,16b) | 458 | _ASM_EXTABLE(1b,16b) |
458 | _ASM_EXTABLE(2b,16b) | 459 | _ASM_EXTABLE(2b,16b) |
459 | _ASM_EXTABLE(21b,16b) | 460 | _ASM_EXTABLE(21b,16b) |
460 | _ASM_EXTABLE(3b,16b) | 461 | _ASM_EXTABLE(3b,16b) |
461 | _ASM_EXTABLE(31b,16b) | 462 | _ASM_EXTABLE(31b,16b) |
462 | _ASM_EXTABLE(4b,16b) | 463 | _ASM_EXTABLE(4b,16b) |
463 | _ASM_EXTABLE(41b,16b) | 464 | _ASM_EXTABLE(41b,16b) |
464 | _ASM_EXTABLE(10b,16b) | 465 | _ASM_EXTABLE(10b,16b) |
465 | _ASM_EXTABLE(51b,16b) | 466 | _ASM_EXTABLE(51b,16b) |
466 | _ASM_EXTABLE(11b,16b) | 467 | _ASM_EXTABLE(11b,16b) |
467 | _ASM_EXTABLE(61b,16b) | 468 | _ASM_EXTABLE(61b,16b) |
468 | _ASM_EXTABLE(12b,16b) | 469 | _ASM_EXTABLE(12b,16b) |
469 | _ASM_EXTABLE(71b,16b) | 470 | _ASM_EXTABLE(71b,16b) |
470 | _ASM_EXTABLE(13b,16b) | 471 | _ASM_EXTABLE(13b,16b) |
471 | _ASM_EXTABLE(81b,16b) | 472 | _ASM_EXTABLE(81b,16b) |
472 | _ASM_EXTABLE(14b,16b) | 473 | _ASM_EXTABLE(14b,16b) |
473 | _ASM_EXTABLE(91b,16b) | 474 | _ASM_EXTABLE(91b,16b) |
474 | _ASM_EXTABLE(6b,9b) | 475 | _ASM_EXTABLE(6b,9b) |
475 | _ASM_EXTABLE(7b,16b) | 476 | _ASM_EXTABLE(7b,16b) |
476 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | 477 | : "=&c"(size), "=&D" (d0), "=&S" (d1) |
477 | : "1"(to), "2"(from), "0"(size) | 478 | : "1"(to), "2"(from), "0"(size) |
478 | : "eax", "edx", "memory"); | 479 | : "eax", "edx", "memory"); |
479 | return size; | 480 | return size; |
480 | } | 481 | } |
481 | 482 | ||
482 | #else | 483 | #else |
483 | 484 | ||
484 | /* | 485 | /* |
485 | * Leave these declared but undefined. They should not be any references to | 486 | * Leave these declared but undefined. They should not be any references to |
486 | * them | 487 | * them |
487 | */ | 488 | */ |
488 | unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, | 489 | unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, |
489 | unsigned long size); | 490 | unsigned long size); |
490 | unsigned long __copy_user_intel(void __user *to, const void *from, | 491 | unsigned long __copy_user_intel(void __user *to, const void *from, |
491 | unsigned long size); | 492 | unsigned long size); |
492 | unsigned long __copy_user_zeroing_intel_nocache(void *to, | 493 | unsigned long __copy_user_zeroing_intel_nocache(void *to, |
493 | const void __user *from, unsigned long size); | 494 | const void __user *from, unsigned long size); |
494 | #endif /* CONFIG_X86_INTEL_USERCOPY */ | 495 | #endif /* CONFIG_X86_INTEL_USERCOPY */ |
495 | 496 | ||
496 | /* Generic arbitrary sized copy. */ | 497 | /* Generic arbitrary sized copy. */ |
497 | #define __copy_user(to, from, size) \ | 498 | #define __copy_user(to, from, size) \ |
498 | do { \ | 499 | do { \ |
499 | int __d0, __d1, __d2; \ | 500 | int __d0, __d1, __d2; \ |
500 | __asm__ __volatile__( \ | 501 | __asm__ __volatile__( \ |
501 | " cmp $7,%0\n" \ | 502 | " cmp $7,%0\n" \ |
502 | " jbe 1f\n" \ | 503 | " jbe 1f\n" \ |
503 | " movl %1,%0\n" \ | 504 | " movl %1,%0\n" \ |
504 | " negl %0\n" \ | 505 | " negl %0\n" \ |
505 | " andl $7,%0\n" \ | 506 | " andl $7,%0\n" \ |
506 | " subl %0,%3\n" \ | 507 | " subl %0,%3\n" \ |
507 | "4: rep; movsb\n" \ | 508 | "4: rep; movsb\n" \ |
508 | " movl %3,%0\n" \ | 509 | " movl %3,%0\n" \ |
509 | " shrl $2,%0\n" \ | 510 | " shrl $2,%0\n" \ |
510 | " andl $3,%3\n" \ | 511 | " andl $3,%3\n" \ |
511 | " .align 2,0x90\n" \ | 512 | " .align 2,0x90\n" \ |
512 | "0: rep; movsl\n" \ | 513 | "0: rep; movsl\n" \ |
513 | " movl %3,%0\n" \ | 514 | " movl %3,%0\n" \ |
514 | "1: rep; movsb\n" \ | 515 | "1: rep; movsb\n" \ |
515 | "2:\n" \ | 516 | "2:\n" \ |
516 | ".section .fixup,\"ax\"\n" \ | 517 | ".section .fixup,\"ax\"\n" \ |
517 | "5: addl %3,%0\n" \ | 518 | "5: addl %3,%0\n" \ |
518 | " jmp 2b\n" \ | 519 | " jmp 2b\n" \ |
519 | "3: lea 0(%3,%0,4),%0\n" \ | 520 | "3: lea 0(%3,%0,4),%0\n" \ |
520 | " jmp 2b\n" \ | 521 | " jmp 2b\n" \ |
521 | ".previous\n" \ | 522 | ".previous\n" \ |
522 | _ASM_EXTABLE(4b,5b) \ | 523 | _ASM_EXTABLE(4b,5b) \ |
523 | _ASM_EXTABLE(0b,3b) \ | 524 | _ASM_EXTABLE(0b,3b) \ |
524 | _ASM_EXTABLE(1b,2b) \ | 525 | _ASM_EXTABLE(1b,2b) \ |
525 | : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ | 526 | : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ |
526 | : "3"(size), "0"(size), "1"(to), "2"(from) \ | 527 | : "3"(size), "0"(size), "1"(to), "2"(from) \ |
527 | : "memory"); \ | 528 | : "memory"); \ |
528 | } while (0) | 529 | } while (0) |
529 | 530 | ||
530 | #define __copy_user_zeroing(to, from, size) \ | 531 | #define __copy_user_zeroing(to, from, size) \ |
531 | do { \ | 532 | do { \ |
532 | int __d0, __d1, __d2; \ | 533 | int __d0, __d1, __d2; \ |
533 | __asm__ __volatile__( \ | 534 | __asm__ __volatile__( \ |
534 | " cmp $7,%0\n" \ | 535 | " cmp $7,%0\n" \ |
535 | " jbe 1f\n" \ | 536 | " jbe 1f\n" \ |
536 | " movl %1,%0\n" \ | 537 | " movl %1,%0\n" \ |
537 | " negl %0\n" \ | 538 | " negl %0\n" \ |
538 | " andl $7,%0\n" \ | 539 | " andl $7,%0\n" \ |
539 | " subl %0,%3\n" \ | 540 | " subl %0,%3\n" \ |
540 | "4: rep; movsb\n" \ | 541 | "4: rep; movsb\n" \ |
541 | " movl %3,%0\n" \ | 542 | " movl %3,%0\n" \ |
542 | " shrl $2,%0\n" \ | 543 | " shrl $2,%0\n" \ |
543 | " andl $3,%3\n" \ | 544 | " andl $3,%3\n" \ |
544 | " .align 2,0x90\n" \ | 545 | " .align 2,0x90\n" \ |
545 | "0: rep; movsl\n" \ | 546 | "0: rep; movsl\n" \ |
546 | " movl %3,%0\n" \ | 547 | " movl %3,%0\n" \ |
547 | "1: rep; movsb\n" \ | 548 | "1: rep; movsb\n" \ |
548 | "2:\n" \ | 549 | "2:\n" \ |
549 | ".section .fixup,\"ax\"\n" \ | 550 | ".section .fixup,\"ax\"\n" \ |
550 | "5: addl %3,%0\n" \ | 551 | "5: addl %3,%0\n" \ |
551 | " jmp 6f\n" \ | 552 | " jmp 6f\n" \ |
552 | "3: lea 0(%3,%0,4),%0\n" \ | 553 | "3: lea 0(%3,%0,4),%0\n" \ |
553 | "6: pushl %0\n" \ | 554 | "6: pushl %0\n" \ |
554 | " pushl %%eax\n" \ | 555 | " pushl %%eax\n" \ |
555 | " xorl %%eax,%%eax\n" \ | 556 | " xorl %%eax,%%eax\n" \ |
556 | " rep; stosb\n" \ | 557 | " rep; stosb\n" \ |
557 | " popl %%eax\n" \ | 558 | " popl %%eax\n" \ |
558 | " popl %0\n" \ | 559 | " popl %0\n" \ |
559 | " jmp 2b\n" \ | 560 | " jmp 2b\n" \ |
560 | ".previous\n" \ | 561 | ".previous\n" \ |
561 | _ASM_EXTABLE(4b,5b) \ | 562 | _ASM_EXTABLE(4b,5b) \ |
562 | _ASM_EXTABLE(0b,3b) \ | 563 | _ASM_EXTABLE(0b,3b) \ |
563 | _ASM_EXTABLE(1b,6b) \ | 564 | _ASM_EXTABLE(1b,6b) \ |
564 | : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ | 565 | : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ |
565 | : "3"(size), "0"(size), "1"(to), "2"(from) \ | 566 | : "3"(size), "0"(size), "1"(to), "2"(from) \ |
566 | : "memory"); \ | 567 | : "memory"); \ |
567 | } while (0) | 568 | } while (0) |
568 | 569 | ||
569 | unsigned long __copy_to_user_ll(void __user *to, const void *from, | 570 | unsigned long __copy_to_user_ll(void __user *to, const void *from, |
570 | unsigned long n) | 571 | unsigned long n) |
571 | { | 572 | { |
572 | #ifndef CONFIG_X86_WP_WORKS_OK | 573 | #ifndef CONFIG_X86_WP_WORKS_OK |
573 | if (unlikely(boot_cpu_data.wp_works_ok == 0) && | 574 | if (unlikely(boot_cpu_data.wp_works_ok == 0) && |
574 | ((unsigned long)to) < TASK_SIZE) { | 575 | ((unsigned long)to) < TASK_SIZE) { |
575 | /* | 576 | /* |
576 | * When we are in an atomic section (see | 577 | * When we are in an atomic section (see |
577 | * mm/filemap.c:file_read_actor), return the full | 578 | * mm/filemap.c:file_read_actor), return the full |
578 | * length to take the slow path. | 579 | * length to take the slow path. |
579 | */ | 580 | */ |
580 | if (in_atomic()) | 581 | if (in_atomic()) |
581 | return n; | 582 | return n; |
582 | 583 | ||
583 | /* | 584 | /* |
584 | * CPU does not honor the WP bit when writing | 585 | * CPU does not honor the WP bit when writing |
585 | * from supervisory mode, and due to preemption or SMP, | 586 | * from supervisory mode, and due to preemption or SMP, |
586 | * the page tables can change at any time. | 587 | * the page tables can change at any time. |
587 | * Do it manually. Manfred <manfred@colorfullife.com> | 588 | * Do it manually. Manfred <manfred@colorfullife.com> |
588 | */ | 589 | */ |
589 | while (n) { | 590 | while (n) { |
590 | unsigned long offset = ((unsigned long)to)%PAGE_SIZE; | 591 | unsigned long offset = ((unsigned long)to)%PAGE_SIZE; |
591 | unsigned long len = PAGE_SIZE - offset; | 592 | unsigned long len = PAGE_SIZE - offset; |
592 | int retval; | 593 | int retval; |
593 | struct page *pg; | 594 | struct page *pg; |
594 | void *maddr; | 595 | void *maddr; |
595 | 596 | ||
596 | if (len > n) | 597 | if (len > n) |
597 | len = n; | 598 | len = n; |
598 | 599 | ||
599 | survive: | 600 | survive: |
600 | down_read(¤t->mm->mmap_sem); | 601 | down_read(¤t->mm->mmap_sem); |
601 | retval = get_user_pages(current, current->mm, | 602 | retval = get_user_pages(current, current->mm, |
602 | (unsigned long)to, 1, 1, 0, &pg, NULL); | 603 | (unsigned long)to, 1, 1, 0, &pg, NULL); |
603 | 604 | ||
604 | if (retval == -ENOMEM && is_global_init(current)) { | 605 | if (retval == -ENOMEM && is_global_init(current)) { |
605 | up_read(¤t->mm->mmap_sem); | 606 | up_read(¤t->mm->mmap_sem); |
606 | congestion_wait(BLK_RW_ASYNC, HZ/50); | 607 | congestion_wait(BLK_RW_ASYNC, HZ/50); |
607 | goto survive; | 608 | goto survive; |
608 | } | 609 | } |
609 | 610 | ||
610 | if (retval != 1) { | 611 | if (retval != 1) { |
611 | up_read(¤t->mm->mmap_sem); | 612 | up_read(¤t->mm->mmap_sem); |
612 | break; | 613 | break; |
613 | } | 614 | } |
614 | 615 | ||
615 | maddr = kmap_atomic(pg); | 616 | maddr = kmap_atomic(pg); |
616 | memcpy(maddr + offset, from, len); | 617 | memcpy(maddr + offset, from, len); |
617 | kunmap_atomic(maddr); | 618 | kunmap_atomic(maddr); |
618 | set_page_dirty_lock(pg); | 619 | set_page_dirty_lock(pg); |
619 | put_page(pg); | 620 | put_page(pg); |
620 | up_read(¤t->mm->mmap_sem); | 621 | up_read(¤t->mm->mmap_sem); |
621 | 622 | ||
622 | from += len; | 623 | from += len; |
623 | to += len; | 624 | to += len; |
624 | n -= len; | 625 | n -= len; |
625 | } | 626 | } |
626 | return n; | 627 | return n; |
627 | } | 628 | } |
628 | #endif | 629 | #endif |
630 | stac(); | ||
629 | if (movsl_is_ok(to, from, n)) | 631 | if (movsl_is_ok(to, from, n)) |
630 | __copy_user(to, from, n); | 632 | __copy_user(to, from, n); |
631 | else | 633 | else |
632 | n = __copy_user_intel(to, from, n); | 634 | n = __copy_user_intel(to, from, n); |
635 | clac(); | ||
633 | return n; | 636 | return n; |
634 | } | 637 | } |
635 | EXPORT_SYMBOL(__copy_to_user_ll); | 638 | EXPORT_SYMBOL(__copy_to_user_ll); |
636 | 639 | ||
637 | unsigned long __copy_from_user_ll(void *to, const void __user *from, | 640 | unsigned long __copy_from_user_ll(void *to, const void __user *from, |
638 | unsigned long n) | 641 | unsigned long n) |
639 | { | 642 | { |
643 | stac(); | ||
640 | if (movsl_is_ok(to, from, n)) | 644 | if (movsl_is_ok(to, from, n)) |
641 | __copy_user_zeroing(to, from, n); | 645 | __copy_user_zeroing(to, from, n); |
642 | else | 646 | else |
643 | n = __copy_user_zeroing_intel(to, from, n); | 647 | n = __copy_user_zeroing_intel(to, from, n); |
648 | clac(); | ||
644 | return n; | 649 | return n; |
645 | } | 650 | } |
646 | EXPORT_SYMBOL(__copy_from_user_ll); | 651 | EXPORT_SYMBOL(__copy_from_user_ll); |
647 | 652 | ||
648 | unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, | 653 | unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, |
649 | unsigned long n) | 654 | unsigned long n) |
650 | { | 655 | { |
656 | stac(); | ||
651 | if (movsl_is_ok(to, from, n)) | 657 | if (movsl_is_ok(to, from, n)) |
652 | __copy_user(to, from, n); | 658 | __copy_user(to, from, n); |
653 | else | 659 | else |
654 | n = __copy_user_intel((void __user *)to, | 660 | n = __copy_user_intel((void __user *)to, |
655 | (const void *)from, n); | 661 | (const void *)from, n); |
662 | clac(); | ||
656 | return n; | 663 | return n; |
657 | } | 664 | } |
658 | EXPORT_SYMBOL(__copy_from_user_ll_nozero); | 665 | EXPORT_SYMBOL(__copy_from_user_ll_nozero); |
659 | 666 | ||
660 | unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, | 667 | unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, |
661 | unsigned long n) | 668 | unsigned long n) |
662 | { | 669 | { |
670 | stac(); | ||
663 | #ifdef CONFIG_X86_INTEL_USERCOPY | 671 | #ifdef CONFIG_X86_INTEL_USERCOPY |
664 | if (n > 64 && cpu_has_xmm2) | 672 | if (n > 64 && cpu_has_xmm2) |
665 | n = __copy_user_zeroing_intel_nocache(to, from, n); | 673 | n = __copy_user_zeroing_intel_nocache(to, from, n); |
666 | else | 674 | else |
667 | __copy_user_zeroing(to, from, n); | 675 | __copy_user_zeroing(to, from, n); |
668 | #else | 676 | #else |
669 | __copy_user_zeroing(to, from, n); | 677 | __copy_user_zeroing(to, from, n); |
670 | #endif | 678 | #endif |
679 | clac(); | ||
671 | return n; | 680 | return n; |
672 | } | 681 | } |
673 | EXPORT_SYMBOL(__copy_from_user_ll_nocache); | 682 | EXPORT_SYMBOL(__copy_from_user_ll_nocache); |
674 | 683 | ||
675 | unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, | 684 | unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, |
676 | unsigned long n) | 685 | unsigned long n) |
677 | { | 686 | { |
687 | stac(); | ||
678 | #ifdef CONFIG_X86_INTEL_USERCOPY | 688 | #ifdef CONFIG_X86_INTEL_USERCOPY |
679 | if (n > 64 && cpu_has_xmm2) | 689 | if (n > 64 && cpu_has_xmm2) |
680 | n = __copy_user_intel_nocache(to, from, n); | 690 | n = __copy_user_intel_nocache(to, from, n); |
681 | else | 691 | else |
682 | __copy_user(to, from, n); | 692 | __copy_user(to, from, n); |
683 | #else | 693 | #else |
684 | __copy_user(to, from, n); | 694 | __copy_user(to, from, n); |
685 | #endif | 695 | #endif |
696 | clac(); | ||
686 | return n; | 697 | return n; |
687 | } | 698 | } |
688 | EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); | 699 | EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); |
689 | 700 | ||
690 | /** | 701 | /** |
691 | * copy_to_user: - Copy a block of data into user space. | 702 | * copy_to_user: - Copy a block of data into user space. |
692 | * @to: Destination address, in user space. | 703 | * @to: Destination address, in user space. |
693 | * @from: Source address, in kernel space. | 704 | * @from: Source address, in kernel space. |
694 | * @n: Number of bytes to copy. | 705 | * @n: Number of bytes to copy. |
695 | * | 706 | * |
696 | * Context: User context only. This function may sleep. | 707 | * Context: User context only. This function may sleep. |
697 | * | 708 | * |
698 | * Copy data from kernel space to user space. | 709 | * Copy data from kernel space to user space. |
699 | * | 710 | * |
700 | * Returns number of bytes that could not be copied. | 711 | * Returns number of bytes that could not be copied. |
701 | * On success, this will be zero. | 712 | * On success, this will be zero. |
702 | */ | 713 | */ |
703 | unsigned long | 714 | unsigned long |
704 | copy_to_user(void __user *to, const void *from, unsigned long n) | 715 | copy_to_user(void __user *to, const void *from, unsigned long n) |
705 | { | 716 | { |
706 | if (access_ok(VERIFY_WRITE, to, n)) | 717 | if (access_ok(VERIFY_WRITE, to, n)) |
707 | n = __copy_to_user(to, from, n); | 718 | n = __copy_to_user(to, from, n); |
708 | return n; | 719 | return n; |
709 | } | 720 | } |
710 | EXPORT_SYMBOL(copy_to_user); | 721 | EXPORT_SYMBOL(copy_to_user); |
711 | 722 | ||
712 | /** | 723 | /** |
713 | * copy_from_user: - Copy a block of data from user space. | 724 | * copy_from_user: - Copy a block of data from user space. |
714 | * @to: Destination address, in kernel space. | 725 | * @to: Destination address, in kernel space. |
715 | * @from: Source address, in user space. | 726 | * @from: Source address, in user space. |
716 | * @n: Number of bytes to copy. | 727 | * @n: Number of bytes to copy. |
717 | * | 728 | * |
718 | * Context: User context only. This function may sleep. | 729 | * Context: User context only. This function may sleep. |
719 | * | 730 | * |
720 | * Copy data from user space to kernel space. | 731 | * Copy data from user space to kernel space. |
721 | * | 732 | * |
722 | * Returns number of bytes that could not be copied. | 733 | * Returns number of bytes that could not be copied. |
723 | * On success, this will be zero. | 734 | * On success, this will be zero. |
724 | * | 735 | * |
725 | * If some data could not be copied, this function will pad the copied | 736 | * If some data could not be copied, this function will pad the copied |
726 | * data to the requested size using zero bytes. | 737 | * data to the requested size using zero bytes. |
727 | */ | 738 | */ |
728 | unsigned long | 739 | unsigned long |
729 | _copy_from_user(void *to, const void __user *from, unsigned long n) | 740 | _copy_from_user(void *to, const void __user *from, unsigned long n) |
730 | { | 741 | { |
731 | if (access_ok(VERIFY_READ, from, n)) | 742 | if (access_ok(VERIFY_READ, from, n)) |
732 | n = __copy_from_user(to, from, n); | 743 | n = __copy_from_user(to, from, n); |
733 | else | 744 | else |
734 | memset(to, 0, n); | 745 | memset(to, 0, n); |
735 | return n; | 746 | return n; |
736 | } | 747 | } |
737 | EXPORT_SYMBOL(_copy_from_user); | 748 | EXPORT_SYMBOL(_copy_from_user); |
738 | 749 | ||
739 | void copy_from_user_overflow(void) | 750 | void copy_from_user_overflow(void) |
740 | { | 751 | { |
741 | WARN(1, "Buffer overflow detected!\n"); | 752 | WARN(1, "Buffer overflow detected!\n"); |
742 | } | 753 | } |
743 | EXPORT_SYMBOL(copy_from_user_overflow); | 754 | EXPORT_SYMBOL(copy_from_user_overflow); |
744 | 755 |
arch/x86/lib/usercopy_64.c
1 | /* | 1 | /* |
2 | * User address space access functions. | 2 | * User address space access functions. |
3 | * | 3 | * |
4 | * Copyright 1997 Andi Kleen <ak@muc.de> | 4 | * Copyright 1997 Andi Kleen <ak@muc.de> |
5 | * Copyright 1997 Linus Torvalds | 5 | * Copyright 1997 Linus Torvalds |
6 | * Copyright 2002 Andi Kleen <ak@suse.de> | 6 | * Copyright 2002 Andi Kleen <ak@suse.de> |
7 | */ | 7 | */ |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * Zero Userspace | 12 | * Zero Userspace |
13 | */ | 13 | */ |
14 | 14 | ||
15 | unsigned long __clear_user(void __user *addr, unsigned long size) | 15 | unsigned long __clear_user(void __user *addr, unsigned long size) |
16 | { | 16 | { |
17 | long __d0; | 17 | long __d0; |
18 | might_fault(); | 18 | might_fault(); |
19 | /* no memory constraint because it doesn't change any memory gcc knows | 19 | /* no memory constraint because it doesn't change any memory gcc knows |
20 | about */ | 20 | about */ |
21 | stac(); | ||
21 | asm volatile( | 22 | asm volatile( |
22 | " testq %[size8],%[size8]\n" | 23 | " testq %[size8],%[size8]\n" |
23 | " jz 4f\n" | 24 | " jz 4f\n" |
24 | "0: movq %[zero],(%[dst])\n" | 25 | "0: movq %[zero],(%[dst])\n" |
25 | " addq %[eight],%[dst]\n" | 26 | " addq %[eight],%[dst]\n" |
26 | " decl %%ecx ; jnz 0b\n" | 27 | " decl %%ecx ; jnz 0b\n" |
27 | "4: movq %[size1],%%rcx\n" | 28 | "4: movq %[size1],%%rcx\n" |
28 | " testl %%ecx,%%ecx\n" | 29 | " testl %%ecx,%%ecx\n" |
29 | " jz 2f\n" | 30 | " jz 2f\n" |
30 | "1: movb %b[zero],(%[dst])\n" | 31 | "1: movb %b[zero],(%[dst])\n" |
31 | " incq %[dst]\n" | 32 | " incq %[dst]\n" |
32 | " decl %%ecx ; jnz 1b\n" | 33 | " decl %%ecx ; jnz 1b\n" |
33 | "2:\n" | 34 | "2:\n" |
34 | ".section .fixup,\"ax\"\n" | 35 | ".section .fixup,\"ax\"\n" |
35 | "3: lea 0(%[size1],%[size8],8),%[size8]\n" | 36 | "3: lea 0(%[size1],%[size8],8),%[size8]\n" |
36 | " jmp 2b\n" | 37 | " jmp 2b\n" |
37 | ".previous\n" | 38 | ".previous\n" |
38 | _ASM_EXTABLE(0b,3b) | 39 | _ASM_EXTABLE(0b,3b) |
39 | _ASM_EXTABLE(1b,2b) | 40 | _ASM_EXTABLE(1b,2b) |
40 | : [size8] "=&c"(size), [dst] "=&D" (__d0) | 41 | : [size8] "=&c"(size), [dst] "=&D" (__d0) |
41 | : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), | 42 | : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), |
42 | [zero] "r" (0UL), [eight] "r" (8UL)); | 43 | [zero] "r" (0UL), [eight] "r" (8UL)); |
44 | clac(); | ||
43 | return size; | 45 | return size; |
44 | } | 46 | } |
45 | EXPORT_SYMBOL(__clear_user); | 47 | EXPORT_SYMBOL(__clear_user); |
46 | 48 | ||
47 | unsigned long clear_user(void __user *to, unsigned long n) | 49 | unsigned long clear_user(void __user *to, unsigned long n) |
48 | { | 50 | { |
49 | if (access_ok(VERIFY_WRITE, to, n)) | 51 | if (access_ok(VERIFY_WRITE, to, n)) |
50 | return __clear_user(to, n); | 52 | return __clear_user(to, n); |
51 | return n; | 53 | return n; |
52 | } | 54 | } |
53 | EXPORT_SYMBOL(clear_user); | 55 | EXPORT_SYMBOL(clear_user); |
54 | 56 | ||
55 | unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) | 57 | unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) |
56 | { | 58 | { |
57 | if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { | 59 | if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { |
58 | return copy_user_generic((__force void *)to, (__force void *)from, len); | 60 | return copy_user_generic((__force void *)to, (__force void *)from, len); |
59 | } | 61 | } |
60 | return len; | 62 | return len; |
61 | } | 63 | } |
62 | EXPORT_SYMBOL(copy_in_user); | 64 | EXPORT_SYMBOL(copy_in_user); |
63 | 65 | ||
64 | /* | 66 | /* |
65 | * Try to copy last bytes and clear the rest if needed. | 67 | * Try to copy last bytes and clear the rest if needed. |
66 | * Since protection fault in copy_from/to_user is not a normal situation, | 68 | * Since protection fault in copy_from/to_user is not a normal situation, |
67 | * it is not necessary to optimize tail handling. | 69 | * it is not necessary to optimize tail handling. |
68 | */ | 70 | */ |
69 | unsigned long | 71 | unsigned long |
70 | copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) | 72 | copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) |
71 | { | 73 | { |
72 | char c; | 74 | char c; |
73 | unsigned zero_len; | 75 | unsigned zero_len; |
74 | 76 | ||
75 | for (; len; --len) { | 77 | for (; len; --len) { |
76 | if (__get_user_nocheck(c, from++, sizeof(char))) | 78 | if (__get_user_nocheck(c, from++, sizeof(char))) |
77 | break; | 79 | break; |
78 | if (__put_user_nocheck(c, to++, sizeof(char))) | 80 | if (__put_user_nocheck(c, to++, sizeof(char))) |
79 | break; | 81 | break; |
80 | } | 82 | } |
81 | 83 | ||
82 | for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) | 84 | for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) |
83 | if (__put_user_nocheck(c, to++, sizeof(char))) | 85 | if (__put_user_nocheck(c, to++, sizeof(char))) |
84 | break; | 86 | break; |
87 | clac(); | ||
85 | return len; | 88 | return len; |
86 | } | 89 | } |
87 | 90 |