mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-02-25 09:00:42 +07:00

sh port of the sLeAZY-fpu feature currently implemented for some architectures such us i386. Right now the SH kernel has a 100% lazy fpu behaviour. This is of course great for applications that have very sporadic or no FPU use. However for very frequent FPU users... you take an extra trap every context switch. The patch below adds a simple heuristic to this code: after 5 consecutive context switches of FPU use, the lazy behavior is disabled and the context gets restored every context switch. After 256 switches, this is reset and the 100% lazy behavior is returned. Tests with LMbench showed no regression. I saw a little improvement due to the prefetching (~2%). The tests below also show that, with this sLeazy patch, indeed, the number of FPU exceptions is reduced. To test this. I hacked the lat_ctx LMBench to use the FPU a little more. sLeasy implementation =========================================== switch_to calls | 79326 sleasy calls | 42577 do_fpu_state_restore calls| 59232 restore_fpu calls | 59032 Exceptions: 0x800 (FPU disabled ): 16604 100% Leazy (default implementation) =========================================== switch_to calls | 79690 do_fpu_state_restore calls | 53299 restore_fpu calls | 53101 Exceptions: 0x800 (FPU disabled ): 53273 Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com> Signed-off-by: Stuart Menefy <stuart.menefy@st.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
78 lines
1.6 KiB
C
78 lines
1.6 KiB
C
#ifndef __ASM_SH_FPU_H
|
|
#define __ASM_SH_FPU_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/preempt.h>
|
|
#include <asm/ptrace.h>
|
|
|
|
#ifdef CONFIG_SH_FPU
|
|
static inline void release_fpu(struct pt_regs *regs)
|
|
{
|
|
regs->sr |= SR_FD;
|
|
}
|
|
|
|
static inline void grab_fpu(struct pt_regs *regs)
|
|
{
|
|
regs->sr &= ~SR_FD;
|
|
}
|
|
|
|
struct task_struct;
|
|
|
|
extern void save_fpu(struct task_struct *__tsk, struct pt_regs *regs);
|
|
void fpu_state_restore(struct pt_regs *regs);
|
|
#else
|
|
|
|
#define release_fpu(regs) do { } while (0)
|
|
#define grab_fpu(regs) do { } while (0)
|
|
|
|
static inline void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
|
|
{
|
|
clear_tsk_thread_flag(tsk, TIF_USEDFPU);
|
|
}
|
|
#endif
|
|
|
|
struct user_regset;
|
|
|
|
extern int do_fpu_inst(unsigned short, struct pt_regs *);
|
|
|
|
extern int fpregs_get(struct task_struct *target,
|
|
const struct user_regset *regset,
|
|
unsigned int pos, unsigned int count,
|
|
void *kbuf, void __user *ubuf);
|
|
|
|
static inline void unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs)
|
|
{
|
|
preempt_disable();
|
|
if (test_tsk_thread_flag(tsk, TIF_USEDFPU))
|
|
save_fpu(tsk, regs);
|
|
else
|
|
tsk->fpu_counter = 0;
|
|
preempt_enable();
|
|
}
|
|
|
|
static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs)
|
|
{
|
|
preempt_disable();
|
|
if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {
|
|
clear_tsk_thread_flag(tsk, TIF_USEDFPU);
|
|
release_fpu(regs);
|
|
}
|
|
preempt_enable();
|
|
}
|
|
|
|
static inline int init_fpu(struct task_struct *tsk)
|
|
{
|
|
if (tsk_used_math(tsk)) {
|
|
if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current)
|
|
unlazy_fpu(tsk, task_pt_regs(tsk));
|
|
return 0;
|
|
}
|
|
|
|
set_stopped_child_used_math(tsk);
|
|
return 0;
|
|
}
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* __ASM_SH_FPU_H */
|