From aeb997b9f2a2199c72b89b7a304cafc394e4202b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 1 May 2015 09:59:04 +0200 Subject: [PATCH] x86/fpu: Change fpu->fpregs_active from 'int' to 'char', add lazy switching comments Improve the memory layout of 'struct fpu': - change ->fpregs_active from 'int' to 'char' - it's just a single flag and modern x86 CPUs can do efficient byte accesses. - pack related fields closer to each other: often 'fpu->state' will not be touched, while the other fields will - so pack them into a group. Also add comments to each field, describing their purpose, and add some background information about lazy restores. Also fix an obsolete, lazy switching related comment in fpu_copy()'s description. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 82 ++++++++++++++++++++++++++++---- arch/x86/kernel/fpu/core.c | 6 +-- arch/x86/kernel/fpu/xstate.c | 9 ++-- 3 files changed, 79 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index fe2ce3276a38..261cfb76065f 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -159,8 +159,44 @@ union fpregs_state { struct fpu { /* + * @state: + * + * In-memory copy of all FPU registers that we save/restore + * over context switches. If the task is using the FPU then + * the registers in the FPU are more recent than this state + * copy. If the task context-switches away then they get + * saved here and represent the FPU state. + * + * After context switches there may be a (short) time period + * during which the in-FPU hardware registers are unchanged + * and still perfectly match this state, if the tasks + * scheduled afterwards are not using the FPU. + * + * This is the 'lazy restore' window of optimization, which + * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. + * + * We detect whether a subsequent task uses the FPU via setting + * CR0::TS to 1, which causes any FPU use to raise a #NM fault. + * + * During this window, if the task gets scheduled again, we + * might be able to skip having to do a restore from this + * memory buffer to the hardware registers - at the cost of + * incurring the overhead of #NM fault traps. + * + * Note that on modern CPUs that support the XSAVEOPT (or other + * optimized XSAVE instructions), we don't use #NM traps anymore, + * as the hardware can track whether FPU registers need saving + * or not. On such CPUs we activate the non-lazy ('eagerfpu') + * logic, which unconditionally saves/restores all FPU state + * across context switches. (if FPU state exists.) + */ + union fpregs_state state; + + /* + * @last_cpu: + * * Records the last CPU on which this context was loaded into - * FPU registers. (In the lazy-switching case we might be + * FPU registers. (In the lazy-restore case we might be * able to reuse FPU registers across multiple context switches * this way, if no intermediate task used the FPU.) * @@ -170,23 +206,49 @@ struct fpu { */ unsigned int last_cpu; - unsigned int fpregs_active; - union fpregs_state state; /* + * @fpstate_active: + * + * This flag indicates whether this context is active: if the task + * is not running then we can restore from this context, if the task + * is running then we should save into this context. + */ + unsigned char fpstate_active; + + /* + * @fpregs_active: + * + * This flag determines whether a given context is actively + * loaded into the FPU's registers and that those registers + * represent the task's current FPU state. + * + * Note the interaction with fpstate_active: + * + * # task does not use the FPU: + * fpstate_active == 0 + * + * # task uses the FPU and regs are active: + * fpstate_active == 1 && fpregs_active == 1 + * + * # the regs are inactive but still match fpstate: + * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu + * + * The third state is what we use for the lazy restore optimization + * on lazy-switching CPUs. + */ + unsigned char fpregs_active; + + /* + * @counter: + * * This counter contains the number of consecutive context switches * during which the FPU stays used. If this is over a threshold, the - * lazy fpu saving logic becomes unlazy, to save the trap overhead. + * lazy FPU restore logic becomes eager, to save the trap overhead. * This is an unsigned char so that after 256 iterations the counter * wraps and the context switch behavior turns lazy again; this is to * deal with bursty apps that only use the FPU for a short time: */ unsigned char counter; - /* - * This flag indicates whether this context is fpstate_active: if the task is - * not running then we can restore from this context, if the task - * is running then we should save into this context. - */ - unsigned char fpstate_active; }; #endif /* _ASM_X86_FPU_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ac39616cb021..97df457784aa 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -227,10 +227,8 @@ EXPORT_SYMBOL_GPL(fpstate_init); /* * Copy the current task's FPU state to a new task's FPU context. * - * In the 'eager' case we just save to the destination context. - * - * In the 'lazy' case we save to the source context, mark the FPU lazy - * via stts() and copy the source context into the destination context. + * In both the 'eager' and the 'lazy' case we save hardware registers + * directly to the destination buffer. */ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 733a8aec7bd7..cd7f1a6bd933 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -76,10 +76,11 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) EXPORT_SYMBOL_GPL(cpu_has_xfeatures); /* - * When executing XSAVEOPT (optimized XSAVE), if a processor implementation - * detects that an FPU state component is still (or is again) in its - * initialized state, it may clear the corresponding bit in the header.xfeatures - * field, and can skip the writeout of registers to the corresponding memory layout. + * When executing XSAVEOPT (or other optimized XSAVE instructions), if + * a processor implementation detects that an FPU state component is still + * (or is again) in its initialized state, it may clear the corresponding + * bit in the header.xfeatures field, and can skip the writeout of registers + * to the corresponding memory layout. * * This means that when the bit is zero, the state component might still contain * some previous - non-initialized register state.