Commit 61c4628b538608c1a85211ed8438136adfeb9a95
Committed by
Ingo Molnar
1 parent
fa5c463941
Exists in
master
and in
4 other branches
x86, fpu: split FPU state from task struct - v5
Split the FPU save area from the task struct. This allows easy migration of FPU context, and it's generally cleaner. It also allows the following two optimizations: 1) only allocate when the application actually uses FPU, so in the first lazy FPU trap. This could save memory for non-fpu using apps. Next patch does this lazy allocation. 2) allocate the right size for the actual cpu rather than 512 bytes always. Patches enabling xsave/xrstor support (coming shortly) will take advantage of this. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Showing 16 changed files with 161 additions and 90 deletions Side-by-side Diff
- arch/x86/kernel/Makefile
- arch/x86/kernel/i387.c
- arch/x86/kernel/process.c
- arch/x86/kernel/process_32.c
- arch/x86/kernel/process_64.c
- arch/x86/kernel/traps_32.c
- arch/x86/kernel/traps_64.c
- arch/x86/math-emu/fpu_entry.c
- arch/x86/math-emu/fpu_system.h
- arch/x86/math-emu/reg_ld_str.c
- include/asm-x86/i387.h
- include/asm-x86/processor.h
- include/asm-x86/thread_info.h
- include/asm-x86/thread_info_32.h
- include/asm-x86/thread_info_64.h
- kernel/fork.c
arch/x86/kernel/Makefile
arch/x86/kernel/i387.c
... | ... | @@ -8,6 +8,7 @@ |
8 | 8 | #include <linux/module.h> |
9 | 9 | #include <linux/regset.h> |
10 | 10 | #include <linux/sched.h> |
11 | +#include <linux/bootmem.h> | |
11 | 12 | |
12 | 13 | #include <asm/sigcontext.h> |
13 | 14 | #include <asm/processor.h> |
14 | 15 | |
15 | 16 | |
... | ... | @@ -35,17 +36,18 @@ |
35 | 36 | #endif |
36 | 37 | |
37 | 38 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; |
39 | +unsigned int xstate_size; | |
40 | +static struct i387_fxsave_struct fx_scratch __cpuinitdata; | |
38 | 41 | |
39 | -void mxcsr_feature_mask_init(void) | |
42 | +void __cpuinit mxcsr_feature_mask_init(void) | |
40 | 43 | { |
41 | 44 | unsigned long mask = 0; |
42 | 45 | |
43 | 46 | clts(); |
44 | 47 | if (cpu_has_fxsr) { |
45 | - memset(¤t->thread.i387.fxsave, 0, | |
46 | - sizeof(struct i387_fxsave_struct)); | |
47 | - asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); | |
48 | - mask = current->thread.i387.fxsave.mxcsr_mask; | |
48 | + memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); | |
49 | + asm volatile("fxsave %0" : : "m" (fx_scratch)); | |
50 | + mask = fx_scratch.mxcsr_mask; | |
49 | 51 | if (mask == 0) |
50 | 52 | mask = 0x0000ffbf; |
51 | 53 | } |
... | ... | @@ -53,6 +55,17 @@ |
53 | 55 | stts(); |
54 | 56 | } |
55 | 57 | |
58 | +void __init init_thread_xstate(void) | |
59 | +{ | |
60 | + if (cpu_has_fxsr) | |
61 | + xstate_size = sizeof(struct i387_fxsave_struct); | |
62 | +#ifdef CONFIG_X86_32 | |
63 | + else | |
64 | + xstate_size = sizeof(struct i387_fsave_struct); | |
65 | +#endif | |
66 | + init_task.thread.xstate = alloc_bootmem(xstate_size); | |
67 | +} | |
68 | + | |
56 | 69 | #ifdef CONFIG_X86_64 |
57 | 70 | /* |
58 | 71 | * Called at bootup to set up the initial FPU state that is later cloned |
59 | 72 | |
... | ... | @@ -61,11 +74,7 @@ |
61 | 74 | void __cpuinit fpu_init(void) |
62 | 75 | { |
63 | 76 | unsigned long oldcr0 = read_cr0(); |
64 | - extern void __bad_fxsave_alignment(void); | |
65 | 77 | |
66 | - if (offsetof(struct task_struct, thread.i387.fxsave) & 15) | |
67 | - __bad_fxsave_alignment(); | |
68 | - | |
69 | 78 | set_in_cr4(X86_CR4_OSFXSR); |
70 | 79 | set_in_cr4(X86_CR4_OSXMMEXCPT); |
71 | 80 | |
72 | 81 | |
73 | 82 | |
... | ... | @@ -93,18 +102,19 @@ |
93 | 102 | } |
94 | 103 | |
95 | 104 | if (cpu_has_fxsr) { |
96 | - memset(&tsk->thread.i387.fxsave, 0, | |
97 | - sizeof(struct i387_fxsave_struct)); | |
98 | - tsk->thread.i387.fxsave.cwd = 0x37f; | |
105 | + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | |
106 | + | |
107 | + memset(fx, 0, xstate_size); | |
108 | + fx->cwd = 0x37f; | |
99 | 109 | if (cpu_has_xmm) |
100 | - tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; | |
110 | + fx->mxcsr = MXCSR_DEFAULT; | |
101 | 111 | } else { |
102 | - memset(&tsk->thread.i387.fsave, 0, | |
103 | - sizeof(struct i387_fsave_struct)); | |
104 | - tsk->thread.i387.fsave.cwd = 0xffff037fu; | |
105 | - tsk->thread.i387.fsave.swd = 0xffff0000u; | |
106 | - tsk->thread.i387.fsave.twd = 0xffffffffu; | |
107 | - tsk->thread.i387.fsave.fos = 0xffff0000u; | |
112 | + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | |
113 | + memset(fp, 0, xstate_size); | |
114 | + fp->cwd = 0xffff037fu; | |
115 | + fp->swd = 0xffff0000u; | |
116 | + fp->twd = 0xffffffffu; | |
117 | + fp->fos = 0xffff0000u; | |
108 | 118 | } |
109 | 119 | /* |
110 | 120 | * Only the device not available exception or ptrace can call init_fpu. |
... | ... | @@ -132,7 +142,7 @@ |
132 | 142 | init_fpu(target); |
133 | 143 | |
134 | 144 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
135 | - &target->thread.i387.fxsave, 0, -1); | |
145 | + &target->thread.xstate->fxsave, 0, -1); | |
136 | 146 | } |
137 | 147 | |
138 | 148 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, |
139 | 149 | |
... | ... | @@ -148,12 +158,12 @@ |
148 | 158 | set_stopped_child_used_math(target); |
149 | 159 | |
150 | 160 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
151 | - &target->thread.i387.fxsave, 0, -1); | |
161 | + &target->thread.xstate->fxsave, 0, -1); | |
152 | 162 | |
153 | 163 | /* |
154 | 164 | * mxcsr reserved bits must be masked to zero for security reasons. |
155 | 165 | */ |
156 | - target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | |
166 | + target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | |
157 | 167 | |
158 | 168 | return ret; |
159 | 169 | } |
... | ... | @@ -233,7 +243,7 @@ |
233 | 243 | static void |
234 | 244 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) |
235 | 245 | { |
236 | - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; | |
246 | + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | |
237 | 247 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; |
238 | 248 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; |
239 | 249 | int i; |
... | ... | @@ -273,7 +283,7 @@ |
273 | 283 | const struct user_i387_ia32_struct *env) |
274 | 284 | |
275 | 285 | { |
276 | - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; | |
286 | + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | |
277 | 287 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; |
278 | 288 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; |
279 | 289 | int i; |
... | ... | @@ -310,7 +320,8 @@ |
310 | 320 | |
311 | 321 | if (!cpu_has_fxsr) { |
312 | 322 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
313 | - &target->thread.i387.fsave, 0, -1); | |
323 | + &target->thread.xstate->fsave, 0, | |
324 | + -1); | |
314 | 325 | } |
315 | 326 | |
316 | 327 | if (kbuf && pos == 0 && count == sizeof(env)) { |
... | ... | @@ -338,7 +349,7 @@ |
338 | 349 | |
339 | 350 | if (!cpu_has_fxsr) { |
340 | 351 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
341 | - &target->thread.i387.fsave, 0, -1); | |
352 | + &target->thread.xstate->fsave, 0, -1); | |
342 | 353 | } |
343 | 354 | |
344 | 355 | if (pos > 0 || count < sizeof(env)) |
345 | 356 | |
... | ... | @@ -358,11 +369,11 @@ |
358 | 369 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) |
359 | 370 | { |
360 | 371 | struct task_struct *tsk = current; |
372 | + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | |
361 | 373 | |
362 | 374 | unlazy_fpu(tsk); |
363 | - tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; | |
364 | - if (__copy_to_user(buf, &tsk->thread.i387.fsave, | |
365 | - sizeof(struct i387_fsave_struct))) | |
375 | + fp->status = fp->swd; | |
376 | + if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) | |
366 | 377 | return -1; |
367 | 378 | return 1; |
368 | 379 | } |
... | ... | @@ -370,6 +381,7 @@ |
370 | 381 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) |
371 | 382 | { |
372 | 383 | struct task_struct *tsk = current; |
384 | + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | |
373 | 385 | struct user_i387_ia32_struct env; |
374 | 386 | int err = 0; |
375 | 387 | |
376 | 388 | |
... | ... | @@ -379,12 +391,12 @@ |
379 | 391 | if (__copy_to_user(buf, &env, sizeof(env))) |
380 | 392 | return -1; |
381 | 393 | |
382 | - err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); | |
394 | + err |= __put_user(fx->swd, &buf->status); | |
383 | 395 | err |= __put_user(X86_FXSR_MAGIC, &buf->magic); |
384 | 396 | if (err) |
385 | 397 | return -1; |
386 | 398 | |
387 | - if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, | |
399 | + if (__copy_to_user(&buf->_fxsr_env[0], fx, | |
388 | 400 | sizeof(struct i387_fxsave_struct))) |
389 | 401 | return -1; |
390 | 402 | return 1; |
... | ... | @@ -417,7 +429,7 @@ |
417 | 429 | struct task_struct *tsk = current; |
418 | 430 | |
419 | 431 | clear_fpu(tsk); |
420 | - return __copy_from_user(&tsk->thread.i387.fsave, buf, | |
432 | + return __copy_from_user(&tsk->thread.xstate->fsave, buf, | |
421 | 433 | sizeof(struct i387_fsave_struct)); |
422 | 434 | } |
423 | 435 | |
424 | 436 | |
... | ... | @@ -428,10 +440,10 @@ |
428 | 440 | int err; |
429 | 441 | |
430 | 442 | clear_fpu(tsk); |
431 | - err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], | |
443 | + err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], | |
432 | 444 | sizeof(struct i387_fxsave_struct)); |
433 | 445 | /* mxcsr reserved bits must be masked to zero for security reasons */ |
434 | - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | |
446 | + tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | |
435 | 447 | if (err || __copy_from_user(&env, buf, sizeof(env))) |
436 | 448 | return 1; |
437 | 449 | convert_to_fxsr(tsk, &env); |
arch/x86/kernel/process.c
1 | +#include <linux/errno.h> | |
2 | +#include <linux/kernel.h> | |
3 | +#include <linux/mm.h> | |
4 | +#include <linux/smp.h> | |
5 | +#include <linux/slab.h> | |
6 | +#include <linux/sched.h> | |
7 | + | |
8 | +static struct kmem_cache *task_xstate_cachep; | |
9 | + | |
10 | +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |
11 | +{ | |
12 | + *dst = *src; | |
13 | + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); | |
14 | + if (!dst->thread.xstate) | |
15 | + return -ENOMEM; | |
16 | + WARN_ON((unsigned long)dst->thread.xstate & 15); | |
17 | + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); | |
18 | + return 0; | |
19 | +} | |
20 | + | |
21 | +void free_thread_info(struct thread_info *ti) | |
22 | +{ | |
23 | + kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate); | |
24 | + ti->task->thread.xstate = NULL; | |
25 | + | |
26 | + free_pages((unsigned long)(ti), get_order(THREAD_SIZE)); | |
27 | +} | |
28 | + | |
29 | +void arch_task_cache_init(void) | |
30 | +{ | |
31 | + task_xstate_cachep = | |
32 | + kmem_cache_create("task_xstate", xstate_size, | |
33 | + __alignof__(union thread_xstate), | |
34 | + SLAB_PANIC, NULL); | |
35 | +} |
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/traps_32.c
... | ... | @@ -1208,11 +1208,6 @@ |
1208 | 1208 | #endif |
1209 | 1209 | set_trap_gate(19, &simd_coprocessor_error); |
1210 | 1210 | |
1211 | - /* | |
1212 | - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. | |
1213 | - * Generate a build-time error if the alignment is wrong. | |
1214 | - */ | |
1215 | - BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15); | |
1216 | 1211 | if (cpu_has_fxsr) { |
1217 | 1212 | printk(KERN_INFO "Enabling fast FPU save and restore... "); |
1218 | 1213 | set_in_cr4(X86_CR4_OSFXSR); |
... | ... | @@ -1233,6 +1228,7 @@ |
1233 | 1228 | |
1234 | 1229 | set_bit(SYSCALL_VECTOR, used_vectors); |
1235 | 1230 | |
1231 | + init_thread_xstate(); | |
1236 | 1232 | /* |
1237 | 1233 | * Should be a barrier for any external CPU state: |
1238 | 1234 | */ |
arch/x86/kernel/traps_64.c
... | ... | @@ -1128,7 +1128,7 @@ |
1128 | 1128 | |
1129 | 1129 | if (!used_math()) |
1130 | 1130 | init_fpu(me); |
1131 | - restore_fpu_checking(&me->thread.i387.fxsave); | |
1131 | + restore_fpu_checking(&me->thread.xstate->fxsave); | |
1132 | 1132 | task_thread_info(me)->status |= TS_USEDFPU; |
1133 | 1133 | me->fpu_counter++; |
1134 | 1134 | } |
... | ... | @@ -1163,6 +1163,10 @@ |
1163 | 1163 | set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); |
1164 | 1164 | #endif |
1165 | 1165 | |
1166 | + /* | |
1167 | + * initialize the per thread extended state: | |
1168 | + */ | |
1169 | + init_thread_xstate(); | |
1166 | 1170 | /* |
1167 | 1171 | * Should be a barrier for any external CPU state. |
1168 | 1172 | */ |
arch/x86/math-emu/fpu_entry.c
... | ... | @@ -678,7 +678,7 @@ |
678 | 678 | unsigned int pos, unsigned int count, |
679 | 679 | const void *kbuf, const void __user *ubuf) |
680 | 680 | { |
681 | - struct i387_soft_struct *s387 = &target->thread.i387.soft; | |
681 | + struct i387_soft_struct *s387 = &target->thread.xstate->soft; | |
682 | 682 | void *space = s387->st_space; |
683 | 683 | int ret; |
684 | 684 | int offset, other, i, tags, regnr, tag, newtop; |
... | ... | @@ -730,7 +730,7 @@ |
730 | 730 | unsigned int pos, unsigned int count, |
731 | 731 | void *kbuf, void __user *ubuf) |
732 | 732 | { |
733 | - struct i387_soft_struct *s387 = &target->thread.i387.soft; | |
733 | + struct i387_soft_struct *s387 = &target->thread.xstate->soft; | |
734 | 734 | const void *space = s387->st_space; |
735 | 735 | int ret; |
736 | 736 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; |
arch/x86/math-emu/fpu_system.h
... | ... | @@ -35,8 +35,8 @@ |
35 | 35 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ |
36 | 36 | == (1 << 10)) |
37 | 37 | |
38 | -#define I387 (current->thread.i387) | |
39 | -#define FPU_info (I387.soft.info) | |
38 | +#define I387 (current->thread.xstate) | |
39 | +#define FPU_info (I387->soft.info) | |
40 | 40 | |
41 | 41 | #define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) |
42 | 42 | #define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) |
43 | 43 | |
44 | 44 | |
45 | 45 | |
46 | 46 | |
... | ... | @@ -46,25 +46,25 @@ |
46 | 46 | #define FPU_EIP (FPU_info->___eip) |
47 | 47 | #define FPU_ORIG_EIP (FPU_info->___orig_eip) |
48 | 48 | |
49 | -#define FPU_lookahead (I387.soft.lookahead) | |
49 | +#define FPU_lookahead (I387->soft.lookahead) | |
50 | 50 | |
51 | 51 | /* nz if ip_offset and cs_selector are not to be set for the current |
52 | 52 | instruction. */ |
53 | -#define no_ip_update (*(u_char *)&(I387.soft.no_update)) | |
54 | -#define FPU_rm (*(u_char *)&(I387.soft.rm)) | |
53 | +#define no_ip_update (*(u_char *)&(I387->soft.no_update)) | |
54 | +#define FPU_rm (*(u_char *)&(I387->soft.rm)) | |
55 | 55 | |
56 | 56 | /* Number of bytes of data which can be legally accessed by the current |
57 | 57 | instruction. This only needs to hold a number <= 108, so a byte will do. */ |
58 | -#define access_limit (*(u_char *)&(I387.soft.alimit)) | |
58 | +#define access_limit (*(u_char *)&(I387->soft.alimit)) | |
59 | 59 | |
60 | -#define partial_status (I387.soft.swd) | |
61 | -#define control_word (I387.soft.cwd) | |
62 | -#define fpu_tag_word (I387.soft.twd) | |
63 | -#define registers (I387.soft.st_space) | |
64 | -#define top (I387.soft.ftop) | |
60 | +#define partial_status (I387->soft.swd) | |
61 | +#define control_word (I387->soft.cwd) | |
62 | +#define fpu_tag_word (I387->soft.twd) | |
63 | +#define registers (I387->soft.st_space) | |
64 | +#define top (I387->soft.ftop) | |
65 | 65 | |
66 | -#define instruction_address (*(struct address *)&I387.soft.fip) | |
67 | -#define operand_address (*(struct address *)&I387.soft.foo) | |
66 | +#define instruction_address (*(struct address *)&I387->soft.fip) | |
67 | +#define operand_address (*(struct address *)&I387->soft.foo) | |
68 | 68 | |
69 | 69 | #define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ |
70 | 70 | math_abort(FPU_info,SIGSEGV) |
arch/x86/math-emu/reg_ld_str.c
... | ... | @@ -1180,8 +1180,8 @@ |
1180 | 1180 | control_word |= 0xffff0040; |
1181 | 1181 | partial_status = status_word() | 0xffff0000; |
1182 | 1182 | fpu_tag_word |= 0xffff0000; |
1183 | - I387.soft.fcs &= ~0xf8000000; | |
1184 | - I387.soft.fos |= 0xffff0000; | |
1183 | + I387->soft.fcs &= ~0xf8000000; | |
1184 | + I387->soft.fos |= 0xffff0000; | |
1185 | 1185 | #endif /* PECULIAR_486 */ |
1186 | 1186 | if (__copy_to_user(d, &control_word, 7 * 4)) |
1187 | 1187 | FPU_abort; |
include/asm-x86/i387.h
... | ... | @@ -23,6 +23,7 @@ |
23 | 23 | extern void mxcsr_feature_mask_init(void); |
24 | 24 | extern void init_fpu(struct task_struct *child); |
25 | 25 | extern asmlinkage void math_state_restore(void); |
26 | +extern void init_thread_xstate(void); | |
26 | 27 | |
27 | 28 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
28 | 29 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; |
29 | 30 | |
30 | 31 | |
31 | 32 | |
... | ... | @@ -117,24 +118,22 @@ |
117 | 118 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported |
118 | 119 | starting with gas 2.16. */ |
119 | 120 | __asm__ __volatile__("fxsaveq %0" |
120 | - : "=m" (tsk->thread.i387.fxsave)); | |
121 | + : "=m" (tsk->thread.xstate->fxsave)); | |
121 | 122 | #elif 0 |
122 | 123 | /* Using, as a workaround, the properly prefixed form below isn't |
123 | 124 | accepted by any binutils version so far released, complaining that |
124 | 125 | the same type of prefix is used twice if an extended register is |
125 | 126 | needed for addressing (fix submitted to mainline 2005-11-21). */ |
126 | 127 | __asm__ __volatile__("rex64/fxsave %0" |
127 | - : "=m" (tsk->thread.i387.fxsave)); | |
128 | + : "=m" (tsk->thread.xstate->fxsave)); | |
128 | 129 | #else |
129 | 130 | /* This, however, we can work around by forcing the compiler to select |
130 | 131 | an addressing mode that doesn't require extended registers. */ |
131 | - __asm__ __volatile__("rex64/fxsave %P2(%1)" | |
132 | - : "=m" (tsk->thread.i387.fxsave) | |
133 | - : "cdaSDb" (tsk), | |
134 | - "i" (offsetof(__typeof__(*tsk), | |
135 | - thread.i387.fxsave))); | |
132 | + __asm__ __volatile__("rex64/fxsave (%1)" | |
133 | + : "=m" (tsk->thread.xstate->fxsave) | |
134 | + : "cdaSDb" (&tsk->thread.xstate->fxsave)); | |
136 | 135 | #endif |
137 | - clear_fpu_state(&tsk->thread.i387.fxsave); | |
136 | + clear_fpu_state(&tsk->thread.xstate->fxsave); | |
138 | 137 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
139 | 138 | } |
140 | 139 | |
... | ... | @@ -148,7 +147,7 @@ |
148 | 147 | int err = 0; |
149 | 148 | |
150 | 149 | BUILD_BUG_ON(sizeof(struct user_i387_struct) != |
151 | - sizeof(tsk->thread.i387.fxsave)); | |
150 | + sizeof(tsk->thread.xstate->fxsave)); | |
152 | 151 | |
153 | 152 | if ((unsigned long)buf % 16) |
154 | 153 | printk("save_i387: bad fpstate %p\n", buf); |
... | ... | @@ -164,7 +163,7 @@ |
164 | 163 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
165 | 164 | stts(); |
166 | 165 | } else { |
167 | - if (__copy_to_user(buf, &tsk->thread.i387.fxsave, | |
166 | + if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, | |
168 | 167 | sizeof(struct i387_fxsave_struct))) |
169 | 168 | return -1; |
170 | 169 | } |
... | ... | @@ -201,7 +200,7 @@ |
201 | 200 | "nop ; frstor %1", |
202 | 201 | "fxrstor %1", |
203 | 202 | X86_FEATURE_FXSR, |
204 | - "m" ((tsk)->thread.i387.fxsave)); | |
203 | + "m" (tsk->thread.xstate->fxsave)); | |
205 | 204 | } |
206 | 205 | |
207 | 206 | /* We need a safe address that is cheap to find and that is already |
... | ... | @@ -225,8 +224,8 @@ |
225 | 224 | "fxsave %[fx]\n" |
226 | 225 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", |
227 | 226 | X86_FEATURE_FXSR, |
228 | - [fx] "m" (tsk->thread.i387.fxsave), | |
229 | - [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory"); | |
227 | + [fx] "m" (tsk->thread.xstate->fxsave), | |
228 | + [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); | |
230 | 229 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
231 | 230 | is pending. Clear the x87 state here by setting it to fixed |
232 | 231 | values. safe_address is a random variable that should be in L1 */ |
233 | 232 | |
234 | 233 | |
235 | 234 | |
236 | 235 | |
... | ... | @@ -327,25 +326,25 @@ |
327 | 326 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) |
328 | 327 | { |
329 | 328 | if (cpu_has_fxsr) { |
330 | - return tsk->thread.i387.fxsave.cwd; | |
329 | + return tsk->thread.xstate->fxsave.cwd; | |
331 | 330 | } else { |
332 | - return (unsigned short)tsk->thread.i387.fsave.cwd; | |
331 | + return (unsigned short) tsk->thread.xstate->fsave.cwd; | |
333 | 332 | } |
334 | 333 | } |
335 | 334 | |
336 | 335 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) |
337 | 336 | { |
338 | 337 | if (cpu_has_fxsr) { |
339 | - return tsk->thread.i387.fxsave.swd; | |
338 | + return tsk->thread.xstate->fxsave.swd; | |
340 | 339 | } else { |
341 | - return (unsigned short)tsk->thread.i387.fsave.swd; | |
340 | + return (unsigned short) tsk->thread.xstate->fsave.swd; | |
342 | 341 | } |
343 | 342 | } |
344 | 343 | |
345 | 344 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) |
346 | 345 | { |
347 | 346 | if (cpu_has_xmm) { |
348 | - return tsk->thread.i387.fxsave.mxcsr; | |
347 | + return tsk->thread.xstate->fxsave.mxcsr; | |
349 | 348 | } else { |
350 | 349 | return MXCSR_DEFAULT; |
351 | 350 | } |
include/asm-x86/processor.h
... | ... | @@ -354,7 +354,7 @@ |
354 | 354 | u32 entry_eip; |
355 | 355 | }; |
356 | 356 | |
357 | -union i387_union { | |
357 | +union thread_xstate { | |
358 | 358 | struct i387_fsave_struct fsave; |
359 | 359 | struct i387_fxsave_struct fxsave; |
360 | 360 | struct i387_soft_struct soft; |
... | ... | @@ -365,6 +365,7 @@ |
365 | 365 | #endif |
366 | 366 | |
367 | 367 | extern void print_cpu_info(struct cpuinfo_x86 *); |
368 | +extern unsigned int xstate_size; | |
368 | 369 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); |
369 | 370 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); |
370 | 371 | extern unsigned short num_cache_leaves; |
... | ... | @@ -397,8 +398,8 @@ |
397 | 398 | unsigned long cr2; |
398 | 399 | unsigned long trap_no; |
399 | 400 | unsigned long error_code; |
400 | - /* Floating point info: */ | |
401 | - union i387_union i387 __attribute__((aligned(16)));; | |
401 | + /* floating point and extended processor state */ | |
402 | + union thread_xstate *xstate; | |
402 | 403 | #ifdef CONFIG_X86_32 |
403 | 404 | /* Virtual 86 mode info */ |
404 | 405 | struct vm86_struct __user *vm86_info; |
include/asm-x86/thread_info.h
1 | +#ifndef _ASM_X86_THREAD_INFO_H | |
1 | 2 | #ifdef CONFIG_X86_32 |
2 | 3 | # include "thread_info_32.h" |
3 | 4 | #else |
4 | 5 | # include "thread_info_64.h" |
5 | 6 | #endif |
7 | + | |
8 | +#ifndef __ASSEMBLY__ | |
9 | +extern void arch_task_cache_init(void); | |
10 | +extern void free_thread_info(struct thread_info *ti); | |
11 | +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); | |
12 | +#endif | |
13 | +#endif /* _ASM_X86_THREAD_INFO_H */ |
include/asm-x86/thread_info_32.h
... | ... | @@ -102,8 +102,6 @@ |
102 | 102 | __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) |
103 | 103 | #endif |
104 | 104 | |
105 | -#define free_thread_info(info) free_pages((unsigned long)(info), get_order(THREAD_SIZE)) | |
106 | - | |
107 | 105 | #else /* !__ASSEMBLY__ */ |
108 | 106 | |
109 | 107 | /* how to get the thread information struct from ASM */ |
include/asm-x86/thread_info_64.h
... | ... | @@ -85,8 +85,6 @@ |
85 | 85 | #define alloc_thread_info(tsk) \ |
86 | 86 | ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) |
87 | 87 | |
88 | -#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER) | |
89 | - | |
90 | 88 | #else /* !__ASSEMBLY__ */ |
91 | 89 | |
92 | 90 | /* how to get the thread information struct from ASM */ |
kernel/fork.c
... | ... | @@ -132,6 +132,10 @@ |
132 | 132 | free_task(tsk); |
133 | 133 | } |
134 | 134 | |
135 | +void __attribute__((weak)) arch_task_cache_init(void) | |
136 | +{ | |
137 | +} | |
138 | + | |
135 | 139 | void __init fork_init(unsigned long mempages) |
136 | 140 | { |
137 | 141 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR |
... | ... | @@ -144,6 +148,9 @@ |
144 | 148 | ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); |
145 | 149 | #endif |
146 | 150 | |
151 | + /* do the arch specific task caches init */ | |
152 | + arch_task_cache_init(); | |
153 | + | |
147 | 154 | /* |
148 | 155 | * The default maximum number of threads is set to a safe |
149 | 156 | * value: the thread structures can take up at most half |
... | ... | @@ -163,6 +170,13 @@ |
163 | 170 | init_task.signal->rlim[RLIMIT_NPROC]; |
164 | 171 | } |
165 | 172 | |
173 | +int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, | |
174 | + struct task_struct *src) | |
175 | +{ | |
176 | + *dst = *src; | |
177 | + return 0; | |
178 | +} | |
179 | + | |
166 | 180 | static struct task_struct *dup_task_struct(struct task_struct *orig) |
167 | 181 | { |
168 | 182 | struct task_struct *tsk; |
169 | 183 | |
... | ... | @@ -181,15 +195,15 @@ |
181 | 195 | return NULL; |
182 | 196 | } |
183 | 197 | |
184 | - *tsk = *orig; | |
198 | + err = arch_dup_task_struct(tsk, orig); | |
199 | + if (err) | |
200 | + goto out; | |
201 | + | |
185 | 202 | tsk->stack = ti; |
186 | 203 | |
187 | 204 | err = prop_local_init_single(&tsk->dirties); |
188 | - if (err) { | |
189 | - free_thread_info(ti); | |
190 | - free_task_struct(tsk); | |
191 | - return NULL; | |
192 | - } | |
205 | + if (err) | |
206 | + goto out; | |
193 | 207 | |
194 | 208 | setup_thread_stack(tsk, orig); |
195 | 209 | |
... | ... | @@ -205,6 +219,11 @@ |
205 | 219 | #endif |
206 | 220 | tsk->splice_pipe = NULL; |
207 | 221 | return tsk; |
222 | + | |
223 | +out: | |
224 | + free_thread_info(ti); | |
225 | + free_task_struct(tsk); | |
226 | + return NULL; | |
208 | 227 | } |
209 | 228 | |
210 | 229 | #ifdef CONFIG_MMU |