Commit 7e16838d94b566a17b65231073d179bc04d590c8

Authored by Linus Torvalds
1 parent 80ab6f1e8c

i387: support lazy restore of FPU state

This makes us recognize when we try to restore FPU state that matches
what we already have in the FPU on this CPU, and avoids the restore
entirely if so.

To do this, we add two new data fields:

 - a percpu 'fpu_owner_task' variable that gets written any time we
   update the "has_fpu" field, and thus acts as a kind of back-pointer
   to the task that owns the CPU.  The exception is when we save the FPU
   state as part of a context switch - if the save can keep the FPU
   state around, we leave the 'fpu_owner_task' variable pointing at the
   task whose FP state still remains on the CPU.

 - a per-thread 'last_cpu' field, that indicates which CPU that thread
   used its FPU on last.  We update this on every context switch
   (writing an invalid CPU number if the last context switch didn't
   leave the FPU in a lazily usable state), so we know that *that*
   thread has done nothing else with the FPU since.

These two fields together can be used when next switching back to the
task to see if the CPU still matches: if 'fpu_owner_task' matches the
task we are switching to, we know that no other task (or kernel FPU
usage) touched the FPU on this CPU in the meantime, and if the current
CPU number matches the 'last_cpu' field, we know that this thread did no
other FP work on any other CPU, so the FPU state on the CPU must match
what was saved on last context switch.

In that case, we can avoid the 'f[x]rstor' entirely, and just clear the
CR0.TS bit.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 5 changed files with 29 additions and 15 deletions Side-by-side Diff

arch/x86/include/asm/i387.h
... ... @@ -32,6 +32,8 @@
32 32 extern void math_state_restore(void);
33 33 extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
34 34  
  35 +DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
  36 +
35 37 extern user_regset_active_fn fpregs_active, xfpregs_active;
36 38 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
37 39 xstateregs_get;
... ... @@ -276,7 +278,7 @@
276 278 "emms\n\t" /* clear stack tags */
277 279 "fildl %P[addr]", /* set F?P to defined value */
278 280 X86_FEATURE_FXSAVE_LEAK,
279   - [addr] "m" (tsk->thread.has_fpu));
  281 + [addr] "m" (tsk->thread.fpu.has_fpu));
280 282  
281 283 return fpu_restore_checking(&tsk->thread.fpu);
282 284 }
283 285  
284 286  
... ... @@ -288,19 +290,21 @@
288 290 */
289 291 static inline int __thread_has_fpu(struct task_struct *tsk)
290 292 {
291   - return tsk->thread.has_fpu;
  293 + return tsk->thread.fpu.has_fpu;
292 294 }
293 295  
294 296 /* Must be paired with an 'stts' after! */
295 297 static inline void __thread_clear_has_fpu(struct task_struct *tsk)
296 298 {
297   - tsk->thread.has_fpu = 0;
  299 + tsk->thread.fpu.has_fpu = 0;
  300 + percpu_write(fpu_owner_task, NULL);
298 301 }
299 302  
300 303 /* Must be paired with a 'clts' before! */
301 304 static inline void __thread_set_has_fpu(struct task_struct *tsk)
302 305 {
303   - tsk->thread.has_fpu = 1;
  306 + tsk->thread.fpu.has_fpu = 1;
  307 + percpu_write(fpu_owner_task, tsk);
304 308 }
305 309  
306 310 /*
307 311  
308 312  
... ... @@ -345,18 +349,22 @@
345 349 * We don't do that yet, so "fpu_lazy_restore()" always returns
346 350 * false, but some day..
347 351 */
348   -#define fpu_lazy_restore(tsk) (0)
349   -#define fpu_lazy_state_intact(tsk) do { } while (0)
  352 +static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
  353 +{
  354 + return new == percpu_read_stable(fpu_owner_task) &&
  355 + cpu == new->thread.fpu.last_cpu;
  356 +}
350 357  
351   -static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
  358 +static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
352 359 {
353 360 fpu_switch_t fpu;
354 361  
355 362 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
356 363 if (__thread_has_fpu(old)) {
357   - if (__save_init_fpu(old))
358   - fpu_lazy_state_intact(old);
359   - __thread_clear_has_fpu(old);
  364 + if (!__save_init_fpu(old))
  365 + cpu = ~0;
  366 + old->thread.fpu.last_cpu = cpu;
  367 + old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
360 368  
361 369 /* Don't change CR0.TS if we just switch! */
362 370 if (fpu.preload) {
363 371  
... ... @@ -367,9 +375,10 @@
367 375 stts();
368 376 } else {
369 377 old->fpu_counter = 0;
  378 + old->thread.fpu.last_cpu = ~0;
370 379 if (fpu.preload) {
371 380 new->fpu_counter++;
372   - if (fpu_lazy_restore(new))
  381 + if (fpu_lazy_restore(new, cpu))
373 382 fpu.preload = 0;
374 383 else
375 384 prefetch(new->thread.fpu.state);
376 385  
... ... @@ -463,8 +472,10 @@
463 472 __save_init_fpu(me);
464 473 __thread_clear_has_fpu(me);
465 474 /* We do 'stts()' in kernel_fpu_end() */
466   - } else
  475 + } else {
  476 + percpu_write(fpu_owner_task, NULL);
467 477 clts();
  478 + }
468 479 }
469 480  
470 481 static inline void kernel_fpu_end(void)
arch/x86/include/asm/processor.h
... ... @@ -374,6 +374,8 @@
374 374 };
375 375  
376 376 struct fpu {
  377 + unsigned int last_cpu;
  378 + unsigned int has_fpu;
377 379 union thread_xstate *state;
378 380 };
379 381  
... ... @@ -454,7 +456,6 @@
454 456 unsigned long trap_no;
455 457 unsigned long error_code;
456 458 /* floating point and extended processor state */
457   - unsigned long has_fpu;
458 459 struct fpu fpu;
459 460 #ifdef CONFIG_X86_32
460 461 /* Virtual 86 mode info */
arch/x86/kernel/cpu/common.c
... ... @@ -1044,6 +1044,8 @@
1044 1044  
1045 1045 DEFINE_PER_CPU(unsigned int, irq_count) = -1;
1046 1046  
  1047 +DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
  1048 +
1047 1049 /*
1048 1050 * Special IST stacks which the CPU switches to when it calls
1049 1051 * an IST-marked descriptor entry. Up to 7 stacks (hardware
arch/x86/kernel/process_32.c
... ... @@ -304,7 +304,7 @@
304 304  
305 305 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
306 306  
307   - fpu = switch_fpu_prepare(prev_p, next_p);
  307 + fpu = switch_fpu_prepare(prev_p, next_p, cpu);
308 308  
309 309 /*
310 310 * Reload esp0.
arch/x86/kernel/process_64.c
... ... @@ -389,7 +389,7 @@
389 389 unsigned fsindex, gsindex;
390 390 fpu_switch_t fpu;
391 391  
392   - fpu = switch_fpu_prepare(prev_p, next_p);
  392 + fpu = switch_fpu_prepare(prev_p, next_p, cpu);
393 393  
394 394 /*
395 395 * Reload esp0, LDT and the page table pointer: