Commit 61c4628b538608c1a85211ed8438136adfeb9a95

Authored by Suresh Siddha
Committed by Ingo Molnar
1 parent fa5c463941

x86, fpu: split FPU state from task struct - v5

Split the FPU save area from the task struct. This allows easy migration
of FPU context, and it's generally cleaner. It also allows the following
two optimizations:

1) only allocate when the application actually uses FPU, so in the first
lazy FPU trap. This could save memory for non-fpu using apps. Next patch
does this lazy allocation.

2) allocate the right size for the actual cpu rather than 512 bytes always.
Patches enabling xsave/xrstor support (coming shortly) will take advantage
of this.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Showing 16 changed files with 161 additions and 90 deletions Side-by-side Diff

arch/x86/kernel/Makefile
... ... @@ -29,6 +29,7 @@
29 29 obj-y += tsc_$(BITS).o io_delay.o rtc.o
30 30  
31 31 obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
  32 +obj-y += process.o
32 33 obj-y += i387.o
33 34 obj-y += ptrace.o
34 35 obj-y += ds.o
arch/x86/kernel/i387.c
... ... @@ -8,6 +8,7 @@
8 8 #include <linux/module.h>
9 9 #include <linux/regset.h>
10 10 #include <linux/sched.h>
  11 +#include <linux/bootmem.h>
11 12  
12 13 #include <asm/sigcontext.h>
13 14 #include <asm/processor.h>
14 15  
15 16  
... ... @@ -35,17 +36,18 @@
35 36 #endif
36 37  
37 38 static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
  39 +unsigned int xstate_size;
  40 +static struct i387_fxsave_struct fx_scratch __cpuinitdata;
38 41  
39   -void mxcsr_feature_mask_init(void)
  42 +void __cpuinit mxcsr_feature_mask_init(void)
40 43 {
41 44 unsigned long mask = 0;
42 45  
43 46 clts();
44 47 if (cpu_has_fxsr) {
45   - memset(&current->thread.i387.fxsave, 0,
46   - sizeof(struct i387_fxsave_struct));
47   - asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave));
48   - mask = current->thread.i387.fxsave.mxcsr_mask;
  48 + memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
  49 + asm volatile("fxsave %0" : : "m" (fx_scratch));
  50 + mask = fx_scratch.mxcsr_mask;
49 51 if (mask == 0)
50 52 mask = 0x0000ffbf;
51 53 }
... ... @@ -53,6 +55,17 @@
53 55 stts();
54 56 }
55 57  
  58 +void __init init_thread_xstate(void)
  59 +{
  60 + if (cpu_has_fxsr)
  61 + xstate_size = sizeof(struct i387_fxsave_struct);
  62 +#ifdef CONFIG_X86_32
  63 + else
  64 + xstate_size = sizeof(struct i387_fsave_struct);
  65 +#endif
  66 + init_task.thread.xstate = alloc_bootmem(xstate_size);
  67 +}
  68 +
56 69 #ifdef CONFIG_X86_64
57 70 /*
58 71 * Called at bootup to set up the initial FPU state that is later cloned
59 72  
... ... @@ -61,11 +74,7 @@
61 74 void __cpuinit fpu_init(void)
62 75 {
63 76 unsigned long oldcr0 = read_cr0();
64   - extern void __bad_fxsave_alignment(void);
65 77  
66   - if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
67   - __bad_fxsave_alignment();
68   -
69 78 set_in_cr4(X86_CR4_OSFXSR);
70 79 set_in_cr4(X86_CR4_OSXMMEXCPT);
71 80  
72 81  
73 82  
... ... @@ -93,18 +102,19 @@
93 102 }
94 103  
95 104 if (cpu_has_fxsr) {
96   - memset(&tsk->thread.i387.fxsave, 0,
97   - sizeof(struct i387_fxsave_struct));
98   - tsk->thread.i387.fxsave.cwd = 0x37f;
  105 + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
  106 +
  107 + memset(fx, 0, xstate_size);
  108 + fx->cwd = 0x37f;
99 109 if (cpu_has_xmm)
100   - tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT;
  110 + fx->mxcsr = MXCSR_DEFAULT;
101 111 } else {
102   - memset(&tsk->thread.i387.fsave, 0,
103   - sizeof(struct i387_fsave_struct));
104   - tsk->thread.i387.fsave.cwd = 0xffff037fu;
105   - tsk->thread.i387.fsave.swd = 0xffff0000u;
106   - tsk->thread.i387.fsave.twd = 0xffffffffu;
107   - tsk->thread.i387.fsave.fos = 0xffff0000u;
  112 + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
  113 + memset(fp, 0, xstate_size);
  114 + fp->cwd = 0xffff037fu;
  115 + fp->swd = 0xffff0000u;
  116 + fp->twd = 0xffffffffu;
  117 + fp->fos = 0xffff0000u;
108 118 }
109 119 /*
110 120 * Only the device not available exception or ptrace can call init_fpu.
... ... @@ -132,7 +142,7 @@
132 142 init_fpu(target);
133 143  
134 144 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
135   - &target->thread.i387.fxsave, 0, -1);
  145 + &target->thread.xstate->fxsave, 0, -1);
136 146 }
137 147  
138 148 int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
139 149  
... ... @@ -148,12 +158,12 @@
148 158 set_stopped_child_used_math(target);
149 159  
150 160 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
151   - &target->thread.i387.fxsave, 0, -1);
  161 + &target->thread.xstate->fxsave, 0, -1);
152 162  
153 163 /*
154 164 * mxcsr reserved bits must be masked to zero for security reasons.
155 165 */
156   - target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
  166 + target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
157 167  
158 168 return ret;
159 169 }
... ... @@ -233,7 +243,7 @@
233 243 static void
234 244 convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
235 245 {
236   - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
  246 + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
237 247 struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
238 248 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
239 249 int i;
... ... @@ -273,7 +283,7 @@
273 283 const struct user_i387_ia32_struct *env)
274 284  
275 285 {
276   - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
  286 + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave;
277 287 struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
278 288 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
279 289 int i;
... ... @@ -310,7 +320,8 @@
310 320  
311 321 if (!cpu_has_fxsr) {
312 322 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
313   - &target->thread.i387.fsave, 0, -1);
  323 + &target->thread.xstate->fsave, 0,
  324 + -1);
314 325 }
315 326  
316 327 if (kbuf && pos == 0 && count == sizeof(env)) {
... ... @@ -338,7 +349,7 @@
338 349  
339 350 if (!cpu_has_fxsr) {
340 351 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
341   - &target->thread.i387.fsave, 0, -1);
  352 + &target->thread.xstate->fsave, 0, -1);
342 353 }
343 354  
344 355 if (pos > 0 || count < sizeof(env))
345 356  
... ... @@ -358,11 +369,11 @@
358 369 static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
359 370 {
360 371 struct task_struct *tsk = current;
  372 + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
361 373  
362 374 unlazy_fpu(tsk);
363   - tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
364   - if (__copy_to_user(buf, &tsk->thread.i387.fsave,
365   - sizeof(struct i387_fsave_struct)))
  375 + fp->status = fp->swd;
  376 + if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
366 377 return -1;
367 378 return 1;
368 379 }
... ... @@ -370,6 +381,7 @@
370 381 static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
371 382 {
372 383 struct task_struct *tsk = current;
  384 + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
373 385 struct user_i387_ia32_struct env;
374 386 int err = 0;
375 387  
376 388  
... ... @@ -379,12 +391,12 @@
379 391 if (__copy_to_user(buf, &env, sizeof(env)))
380 392 return -1;
381 393  
382   - err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
  394 + err |= __put_user(fx->swd, &buf->status);
383 395 err |= __put_user(X86_FXSR_MAGIC, &buf->magic);
384 396 if (err)
385 397 return -1;
386 398  
387   - if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
  399 + if (__copy_to_user(&buf->_fxsr_env[0], fx,
388 400 sizeof(struct i387_fxsave_struct)))
389 401 return -1;
390 402 return 1;
... ... @@ -417,7 +429,7 @@
417 429 struct task_struct *tsk = current;
418 430  
419 431 clear_fpu(tsk);
420   - return __copy_from_user(&tsk->thread.i387.fsave, buf,
  432 + return __copy_from_user(&tsk->thread.xstate->fsave, buf,
421 433 sizeof(struct i387_fsave_struct));
422 434 }
423 435  
424 436  
... ... @@ -428,10 +440,10 @@
428 440 int err;
429 441  
430 442 clear_fpu(tsk);
431   - err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
  443 + err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
432 444 sizeof(struct i387_fxsave_struct));
433 445 /* mxcsr reserved bits must be masked to zero for security reasons */
434   - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
  446 + tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
435 447 if (err || __copy_from_user(&env, buf, sizeof(env)))
436 448 return 1;
437 449 convert_to_fxsr(tsk, &env);
arch/x86/kernel/process.c
  1 +#include <linux/errno.h>
  2 +#include <linux/kernel.h>
  3 +#include <linux/mm.h>
  4 +#include <linux/smp.h>
  5 +#include <linux/slab.h>
  6 +#include <linux/sched.h>
  7 +
  8 +static struct kmem_cache *task_xstate_cachep;
  9 +
  10 +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
  11 +{
  12 + *dst = *src;
  13 + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
  14 + if (!dst->thread.xstate)
  15 + return -ENOMEM;
  16 + WARN_ON((unsigned long)dst->thread.xstate & 15);
  17 + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
  18 + return 0;
  19 +}
  20 +
  21 +void free_thread_info(struct thread_info *ti)
  22 +{
  23 + kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate);
  24 + ti->task->thread.xstate = NULL;
  25 +
  26 + free_pages((unsigned long)(ti), get_order(THREAD_SIZE));
  27 +}
  28 +
  29 +void arch_task_cache_init(void)
  30 +{
  31 + task_xstate_cachep =
  32 + kmem_cache_create("task_xstate", xstate_size,
  33 + __alignof__(union thread_xstate),
  34 + SLAB_PANIC, NULL);
  35 +}
arch/x86/kernel/process_32.c
... ... @@ -703,7 +703,7 @@
703 703  
704 704 /* we're going to use this soon, after a few expensive things */
705 705 if (next_p->fpu_counter > 5)
706   - prefetch(&next->i387.fxsave);
  706 + prefetch(next->xstate);
707 707  
708 708 /*
709 709 * Reload esp0.
arch/x86/kernel/process_64.c
... ... @@ -682,7 +682,7 @@
682 682  
683 683 /* we're going to use this soon, after a few expensive things */
684 684 if (next_p->fpu_counter>5)
685   - prefetch(&next->i387.fxsave);
  685 + prefetch(next->xstate);
686 686  
687 687 /*
688 688 * Reload esp0, LDT and the page table pointer:
arch/x86/kernel/traps_32.c
... ... @@ -1208,11 +1208,6 @@
1208 1208 #endif
1209 1209 set_trap_gate(19, &simd_coprocessor_error);
1210 1210  
1211   - /*
1212   - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
1213   - * Generate a build-time error if the alignment is wrong.
1214   - */
1215   - BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
1216 1211 if (cpu_has_fxsr) {
1217 1212 printk(KERN_INFO "Enabling fast FPU save and restore... ");
1218 1213 set_in_cr4(X86_CR4_OSFXSR);
... ... @@ -1233,6 +1228,7 @@
1233 1228  
1234 1229 set_bit(SYSCALL_VECTOR, used_vectors);
1235 1230  
  1231 + init_thread_xstate();
1236 1232 /*
1237 1233 * Should be a barrier for any external CPU state:
1238 1234 */
arch/x86/kernel/traps_64.c
... ... @@ -1128,7 +1128,7 @@
1128 1128  
1129 1129 if (!used_math())
1130 1130 init_fpu(me);
1131   - restore_fpu_checking(&me->thread.i387.fxsave);
  1131 + restore_fpu_checking(&me->thread.xstate->fxsave);
1132 1132 task_thread_info(me)->status |= TS_USEDFPU;
1133 1133 me->fpu_counter++;
1134 1134 }
... ... @@ -1163,6 +1163,10 @@
1163 1163 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1164 1164 #endif
1165 1165  
  1166 + /*
  1167 + * initialize the per thread extended state:
  1168 + */
  1169 + init_thread_xstate();
1166 1170 /*
1167 1171 * Should be a barrier for any external CPU state.
1168 1172 */
arch/x86/math-emu/fpu_entry.c
... ... @@ -678,7 +678,7 @@
678 678 unsigned int pos, unsigned int count,
679 679 const void *kbuf, const void __user *ubuf)
680 680 {
681   - struct i387_soft_struct *s387 = &target->thread.i387.soft;
  681 + struct i387_soft_struct *s387 = &target->thread.xstate->soft;
682 682 void *space = s387->st_space;
683 683 int ret;
684 684 int offset, other, i, tags, regnr, tag, newtop;
... ... @@ -730,7 +730,7 @@
730 730 unsigned int pos, unsigned int count,
731 731 void *kbuf, void __user *ubuf)
732 732 {
733   - struct i387_soft_struct *s387 = &target->thread.i387.soft;
  733 + struct i387_soft_struct *s387 = &target->thread.xstate->soft;
734 734 const void *space = s387->st_space;
735 735 int ret;
736 736 int offset = (S387->ftop & 7) * 10, other = 80 - offset;
arch/x86/math-emu/fpu_system.h
... ... @@ -35,8 +35,8 @@
35 35 #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
36 36 == (1 << 10))
37 37  
38   -#define I387 (current->thread.i387)
39   -#define FPU_info (I387.soft.info)
  38 +#define I387 (current->thread.xstate)
  39 +#define FPU_info (I387->soft.info)
40 40  
41 41 #define FPU_CS (*(unsigned short *) &(FPU_info->___cs))
42 42 #define FPU_SS (*(unsigned short *) &(FPU_info->___ss))
43 43  
44 44  
45 45  
46 46  
... ... @@ -46,25 +46,25 @@
46 46 #define FPU_EIP (FPU_info->___eip)
47 47 #define FPU_ORIG_EIP (FPU_info->___orig_eip)
48 48  
49   -#define FPU_lookahead (I387.soft.lookahead)
  49 +#define FPU_lookahead (I387->soft.lookahead)
50 50  
51 51 /* nz if ip_offset and cs_selector are not to be set for the current
52 52 instruction. */
53   -#define no_ip_update (*(u_char *)&(I387.soft.no_update))
54   -#define FPU_rm (*(u_char *)&(I387.soft.rm))
  53 +#define no_ip_update (*(u_char *)&(I387->soft.no_update))
  54 +#define FPU_rm (*(u_char *)&(I387->soft.rm))
55 55  
56 56 /* Number of bytes of data which can be legally accessed by the current
57 57 instruction. This only needs to hold a number <= 108, so a byte will do. */
58   -#define access_limit (*(u_char *)&(I387.soft.alimit))
  58 +#define access_limit (*(u_char *)&(I387->soft.alimit))
59 59  
60   -#define partial_status (I387.soft.swd)
61   -#define control_word (I387.soft.cwd)
62   -#define fpu_tag_word (I387.soft.twd)
63   -#define registers (I387.soft.st_space)
64   -#define top (I387.soft.ftop)
  60 +#define partial_status (I387->soft.swd)
  61 +#define control_word (I387->soft.cwd)
  62 +#define fpu_tag_word (I387->soft.twd)
  63 +#define registers (I387->soft.st_space)
  64 +#define top (I387->soft.ftop)
65 65  
66   -#define instruction_address (*(struct address *)&I387.soft.fip)
67   -#define operand_address (*(struct address *)&I387.soft.foo)
  66 +#define instruction_address (*(struct address *)&I387->soft.fip)
  67 +#define operand_address (*(struct address *)&I387->soft.foo)
68 68  
69 69 #define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \
70 70 math_abort(FPU_info,SIGSEGV)
arch/x86/math-emu/reg_ld_str.c
... ... @@ -1180,8 +1180,8 @@
1180 1180 control_word |= 0xffff0040;
1181 1181 partial_status = status_word() | 0xffff0000;
1182 1182 fpu_tag_word |= 0xffff0000;
1183   - I387.soft.fcs &= ~0xf8000000;
1184   - I387.soft.fos |= 0xffff0000;
  1183 + I387->soft.fcs &= ~0xf8000000;
  1184 + I387->soft.fos |= 0xffff0000;
1185 1185 #endif /* PECULIAR_486 */
1186 1186 if (__copy_to_user(d, &control_word, 7 * 4))
1187 1187 FPU_abort;
include/asm-x86/i387.h
... ... @@ -23,6 +23,7 @@
23 23 extern void mxcsr_feature_mask_init(void);
24 24 extern void init_fpu(struct task_struct *child);
25 25 extern asmlinkage void math_state_restore(void);
  26 +extern void init_thread_xstate(void);
26 27  
27 28 extern user_regset_active_fn fpregs_active, xfpregs_active;
28 29 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
29 30  
30 31  
31 32  
... ... @@ -117,24 +118,22 @@
117 118 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
118 119 starting with gas 2.16. */
119 120 __asm__ __volatile__("fxsaveq %0"
120   - : "=m" (tsk->thread.i387.fxsave));
  121 + : "=m" (tsk->thread.xstate->fxsave));
121 122 #elif 0
122 123 /* Using, as a workaround, the properly prefixed form below isn't
123 124 accepted by any binutils version so far released, complaining that
124 125 the same type of prefix is used twice if an extended register is
125 126 needed for addressing (fix submitted to mainline 2005-11-21). */
126 127 __asm__ __volatile__("rex64/fxsave %0"
127   - : "=m" (tsk->thread.i387.fxsave));
  128 + : "=m" (tsk->thread.xstate->fxsave));
128 129 #else
129 130 /* This, however, we can work around by forcing the compiler to select
130 131 an addressing mode that doesn't require extended registers. */
131   - __asm__ __volatile__("rex64/fxsave %P2(%1)"
132   - : "=m" (tsk->thread.i387.fxsave)
133   - : "cdaSDb" (tsk),
134   - "i" (offsetof(__typeof__(*tsk),
135   - thread.i387.fxsave)));
  132 + __asm__ __volatile__("rex64/fxsave (%1)"
  133 + : "=m" (tsk->thread.xstate->fxsave)
  134 + : "cdaSDb" (&tsk->thread.xstate->fxsave));
136 135 #endif
137   - clear_fpu_state(&tsk->thread.i387.fxsave);
  136 + clear_fpu_state(&tsk->thread.xstate->fxsave);
138 137 task_thread_info(tsk)->status &= ~TS_USEDFPU;
139 138 }
140 139  
... ... @@ -148,7 +147,7 @@
148 147 int err = 0;
149 148  
150 149 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
151   - sizeof(tsk->thread.i387.fxsave));
  150 + sizeof(tsk->thread.xstate->fxsave));
152 151  
153 152 if ((unsigned long)buf % 16)
154 153 printk("save_i387: bad fpstate %p\n", buf);
... ... @@ -164,7 +163,7 @@
164 163 task_thread_info(tsk)->status &= ~TS_USEDFPU;
165 164 stts();
166 165 } else {
167   - if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
  166 + if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
168 167 sizeof(struct i387_fxsave_struct)))
169 168 return -1;
170 169 }
... ... @@ -201,7 +200,7 @@
201 200 "nop ; frstor %1",
202 201 "fxrstor %1",
203 202 X86_FEATURE_FXSR,
204   - "m" ((tsk)->thread.i387.fxsave));
  203 + "m" (tsk->thread.xstate->fxsave));
205 204 }
206 205  
207 206 /* We need a safe address that is cheap to find and that is already
... ... @@ -225,8 +224,8 @@
225 224 "fxsave %[fx]\n"
226 225 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
227 226 X86_FEATURE_FXSR,
228   - [fx] "m" (tsk->thread.i387.fxsave),
229   - [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
  227 + [fx] "m" (tsk->thread.xstate->fxsave),
  228 + [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
230 229 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
231 230 is pending. Clear the x87 state here by setting it to fixed
232 231 values. safe_address is a random variable that should be in L1 */
233 232  
234 233  
235 234  
236 235  
... ... @@ -327,25 +326,25 @@
327 326 static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
328 327 {
329 328 if (cpu_has_fxsr) {
330   - return tsk->thread.i387.fxsave.cwd;
  329 + return tsk->thread.xstate->fxsave.cwd;
331 330 } else {
332   - return (unsigned short)tsk->thread.i387.fsave.cwd;
  331 + return (unsigned short) tsk->thread.xstate->fsave.cwd;
333 332 }
334 333 }
335 334  
336 335 static inline unsigned short get_fpu_swd(struct task_struct *tsk)
337 336 {
338 337 if (cpu_has_fxsr) {
339   - return tsk->thread.i387.fxsave.swd;
  338 + return tsk->thread.xstate->fxsave.swd;
340 339 } else {
341   - return (unsigned short)tsk->thread.i387.fsave.swd;
  340 + return (unsigned short) tsk->thread.xstate->fsave.swd;
342 341 }
343 342 }
344 343  
345 344 static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
346 345 {
347 346 if (cpu_has_xmm) {
348   - return tsk->thread.i387.fxsave.mxcsr;
  347 + return tsk->thread.xstate->fxsave.mxcsr;
349 348 } else {
350 349 return MXCSR_DEFAULT;
351 350 }
include/asm-x86/processor.h
... ... @@ -354,7 +354,7 @@
354 354 u32 entry_eip;
355 355 };
356 356  
357   -union i387_union {
  357 +union thread_xstate {
358 358 struct i387_fsave_struct fsave;
359 359 struct i387_fxsave_struct fxsave;
360 360 struct i387_soft_struct soft;
... ... @@ -365,6 +365,7 @@
365 365 #endif
366 366  
367 367 extern void print_cpu_info(struct cpuinfo_x86 *);
  368 +extern unsigned int xstate_size;
368 369 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
369 370 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
370 371 extern unsigned short num_cache_leaves;
... ... @@ -397,8 +398,8 @@
397 398 unsigned long cr2;
398 399 unsigned long trap_no;
399 400 unsigned long error_code;
400   - /* Floating point info: */
401   - union i387_union i387 __attribute__((aligned(16)));;
  401 + /* floating point and extended processor state */
  402 + union thread_xstate *xstate;
402 403 #ifdef CONFIG_X86_32
403 404 /* Virtual 86 mode info */
404 405 struct vm86_struct __user *vm86_info;
include/asm-x86/thread_info.h
  1 +#ifndef _ASM_X86_THREAD_INFO_H
1 2 #ifdef CONFIG_X86_32
2 3 # include "thread_info_32.h"
3 4 #else
4 5 # include "thread_info_64.h"
5 6 #endif
  7 +
  8 +#ifndef __ASSEMBLY__
  9 +extern void arch_task_cache_init(void);
  10 +extern void free_thread_info(struct thread_info *ti);
  11 +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
  12 +#endif
  13 +#endif /* _ASM_X86_THREAD_INFO_H */
include/asm-x86/thread_info_32.h
... ... @@ -102,8 +102,6 @@
102 102 __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
103 103 #endif
104 104  
105   -#define free_thread_info(info) free_pages((unsigned long)(info), get_order(THREAD_SIZE))
106   -
107 105 #else /* !__ASSEMBLY__ */
108 106  
109 107 /* how to get the thread information struct from ASM */
include/asm-x86/thread_info_64.h
... ... @@ -85,8 +85,6 @@
85 85 #define alloc_thread_info(tsk) \
86 86 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
87 87  
88   -#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
89   -
90 88 #else /* !__ASSEMBLY__ */
91 89  
92 90 /* how to get the thread information struct from ASM */
... ... @@ -132,6 +132,10 @@
132 132 free_task(tsk);
133 133 }
134 134  
  135 +void __attribute__((weak)) arch_task_cache_init(void)
  136 +{
  137 +}
  138 +
135 139 void __init fork_init(unsigned long mempages)
136 140 {
137 141 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
... ... @@ -144,6 +148,9 @@
144 148 ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
145 149 #endif
146 150  
  151 + /* do the arch specific task caches init */
  152 + arch_task_cache_init();
  153 +
147 154 /*
148 155 * The default maximum number of threads is set to a safe
149 156 * value: the thread structures can take up at most half
... ... @@ -163,6 +170,13 @@
163 170 init_task.signal->rlim[RLIMIT_NPROC];
164 171 }
165 172  
  173 +int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
  174 + struct task_struct *src)
  175 +{
  176 + *dst = *src;
  177 + return 0;
  178 +}
  179 +
166 180 static struct task_struct *dup_task_struct(struct task_struct *orig)
167 181 {
168 182 struct task_struct *tsk;
169 183  
... ... @@ -181,15 +195,15 @@
181 195 return NULL;
182 196 }
183 197  
184   - *tsk = *orig;
  198 + err = arch_dup_task_struct(tsk, orig);
  199 + if (err)
  200 + goto out;
  201 +
185 202 tsk->stack = ti;
186 203  
187 204 err = prop_local_init_single(&tsk->dirties);
188   - if (err) {
189   - free_thread_info(ti);
190   - free_task_struct(tsk);
191   - return NULL;
192   - }
  205 + if (err)
  206 + goto out;
193 207  
194 208 setup_thread_stack(tsk, orig);
195 209  
... ... @@ -205,6 +219,11 @@
205 219 #endif
206 220 tsk->splice_pipe = NULL;
207 221 return tsk;
  222 +
  223 +out:
  224 + free_thread_info(ti);
  225 + free_task_struct(tsk);
  226 + return NULL;
208 227 }
209 228  
210 229 #ifdef CONFIG_MMU