mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-18 18:26:44 +07:00
44e92e0364
Currently, the nds32 FPU dose not support the arithmetic of denormalized number. When the nds32 FPU finds the result of the instruction is a denormlized number, the nds32 FPU considers it to be an underflow condition and rounds the result to an appropriate number. It may causes some loss of precision. This commit proposes a solution to re-execute the instruction by the FPU emulator to enhance the precision. To transfer calculations from user space to kernel space, this feature will enable the underflow exception trap by default. Enabling this feature may cause some side effects: 1. Performance loss due to extra FPU exception 2. Need another scheme to control real underflow trap A new parameter, UDF_trap, which is belong to FPU context is used to control underflow trap. User can configure this feature via CONFIG_SUPPORT_DENORMAL_ARITHMETIC Signed-off-by: Vincent Chen <vincentc@andestech.com> Acked-by: Greentime Hu <greentime@andestech.com> Signed-off-by: Greentime Hu <greentime@andestech.com>
270 lines
6.9 KiB
C
270 lines
6.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
// Copyright (C) 2005-2018 Andes Technology Corporation
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/signal.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/user.h>
|
|
#include <asm/io.h>
|
|
#include <asm/bitfield.h>
|
|
#include <asm/fpu.h>
|
|
|
|
const struct fpu_struct init_fpuregs = {
|
|
.fd_regs = {[0 ... 31] = sNAN64},
|
|
.fpcsr = FPCSR_INIT,
|
|
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
|
|
.UDF_trap = 0
|
|
#endif
|
|
};
|
|
|
|
void save_fpu(struct task_struct *tsk)
|
|
{
|
|
unsigned int fpcfg, fpcsr;
|
|
|
|
enable_fpu();
|
|
fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
|
|
switch (fpcfg) {
|
|
case SP32_DP32_reg:
|
|
asm volatile ("fsdi $fd31, [%0+0xf8]\n\t"
|
|
"fsdi $fd30, [%0+0xf0]\n\t"
|
|
"fsdi $fd29, [%0+0xe8]\n\t"
|
|
"fsdi $fd28, [%0+0xe0]\n\t"
|
|
"fsdi $fd27, [%0+0xd8]\n\t"
|
|
"fsdi $fd26, [%0+0xd0]\n\t"
|
|
"fsdi $fd25, [%0+0xc8]\n\t"
|
|
"fsdi $fd24, [%0+0xc0]\n\t"
|
|
"fsdi $fd23, [%0+0xb8]\n\t"
|
|
"fsdi $fd22, [%0+0xb0]\n\t"
|
|
"fsdi $fd21, [%0+0xa8]\n\t"
|
|
"fsdi $fd20, [%0+0xa0]\n\t"
|
|
"fsdi $fd19, [%0+0x98]\n\t"
|
|
"fsdi $fd18, [%0+0x90]\n\t"
|
|
"fsdi $fd17, [%0+0x88]\n\t"
|
|
"fsdi $fd16, [%0+0x80]\n\t"
|
|
: /* no output */
|
|
: "r" (&tsk->thread.fpu)
|
|
: "memory");
|
|
/* fall through */
|
|
case SP32_DP16_reg:
|
|
asm volatile ("fsdi $fd15, [%0+0x78]\n\t"
|
|
"fsdi $fd14, [%0+0x70]\n\t"
|
|
"fsdi $fd13, [%0+0x68]\n\t"
|
|
"fsdi $fd12, [%0+0x60]\n\t"
|
|
"fsdi $fd11, [%0+0x58]\n\t"
|
|
"fsdi $fd10, [%0+0x50]\n\t"
|
|
"fsdi $fd9, [%0+0x48]\n\t"
|
|
"fsdi $fd8, [%0+0x40]\n\t"
|
|
: /* no output */
|
|
: "r" (&tsk->thread.fpu)
|
|
: "memory");
|
|
/* fall through */
|
|
case SP16_DP8_reg:
|
|
asm volatile ("fsdi $fd7, [%0+0x38]\n\t"
|
|
"fsdi $fd6, [%0+0x30]\n\t"
|
|
"fsdi $fd5, [%0+0x28]\n\t"
|
|
"fsdi $fd4, [%0+0x20]\n\t"
|
|
: /* no output */
|
|
: "r" (&tsk->thread.fpu)
|
|
: "memory");
|
|
/* fall through */
|
|
case SP8_DP4_reg:
|
|
asm volatile ("fsdi $fd3, [%1+0x18]\n\t"
|
|
"fsdi $fd2, [%1+0x10]\n\t"
|
|
"fsdi $fd1, [%1+0x8]\n\t"
|
|
"fsdi $fd0, [%1+0x0]\n\t"
|
|
"fmfcsr %0\n\t"
|
|
"swi %0, [%1+0x100]\n\t"
|
|
: "=&r" (fpcsr)
|
|
: "r"(&tsk->thread.fpu)
|
|
: "memory");
|
|
}
|
|
disable_fpu();
|
|
}
|
|
|
|
void load_fpu(const struct fpu_struct *fpregs)
|
|
{
|
|
unsigned int fpcfg, fpcsr;
|
|
|
|
enable_fpu();
|
|
fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
|
|
switch (fpcfg) {
|
|
case SP32_DP32_reg:
|
|
asm volatile ("fldi $fd31, [%0+0xf8]\n\t"
|
|
"fldi $fd30, [%0+0xf0]\n\t"
|
|
"fldi $fd29, [%0+0xe8]\n\t"
|
|
"fldi $fd28, [%0+0xe0]\n\t"
|
|
"fldi $fd27, [%0+0xd8]\n\t"
|
|
"fldi $fd26, [%0+0xd0]\n\t"
|
|
"fldi $fd25, [%0+0xc8]\n\t"
|
|
"fldi $fd24, [%0+0xc0]\n\t"
|
|
"fldi $fd23, [%0+0xb8]\n\t"
|
|
"fldi $fd22, [%0+0xb0]\n\t"
|
|
"fldi $fd21, [%0+0xa8]\n\t"
|
|
"fldi $fd20, [%0+0xa0]\n\t"
|
|
"fldi $fd19, [%0+0x98]\n\t"
|
|
"fldi $fd18, [%0+0x90]\n\t"
|
|
"fldi $fd17, [%0+0x88]\n\t"
|
|
"fldi $fd16, [%0+0x80]\n\t"
|
|
: /* no output */
|
|
: "r" (fpregs));
|
|
/* fall through */
|
|
case SP32_DP16_reg:
|
|
asm volatile ("fldi $fd15, [%0+0x78]\n\t"
|
|
"fldi $fd14, [%0+0x70]\n\t"
|
|
"fldi $fd13, [%0+0x68]\n\t"
|
|
"fldi $fd12, [%0+0x60]\n\t"
|
|
"fldi $fd11, [%0+0x58]\n\t"
|
|
"fldi $fd10, [%0+0x50]\n\t"
|
|
"fldi $fd9, [%0+0x48]\n\t"
|
|
"fldi $fd8, [%0+0x40]\n\t"
|
|
: /* no output */
|
|
: "r" (fpregs));
|
|
/* fall through */
|
|
case SP16_DP8_reg:
|
|
asm volatile ("fldi $fd7, [%0+0x38]\n\t"
|
|
"fldi $fd6, [%0+0x30]\n\t"
|
|
"fldi $fd5, [%0+0x28]\n\t"
|
|
"fldi $fd4, [%0+0x20]\n\t"
|
|
: /* no output */
|
|
: "r" (fpregs));
|
|
/* fall through */
|
|
case SP8_DP4_reg:
|
|
asm volatile ("fldi $fd3, [%1+0x18]\n\t"
|
|
"fldi $fd2, [%1+0x10]\n\t"
|
|
"fldi $fd1, [%1+0x8]\n\t"
|
|
"fldi $fd0, [%1+0x0]\n\t"
|
|
"lwi %0, [%1+0x100]\n\t"
|
|
"fmtcsr %0\n\t":"=&r" (fpcsr)
|
|
: "r"(fpregs));
|
|
}
|
|
disable_fpu();
|
|
}
|
|
void store_fpu_for_suspend(void)
|
|
{
|
|
#ifdef CONFIG_LAZY_FPU
|
|
if (last_task_used_math != NULL)
|
|
save_fpu(last_task_used_math);
|
|
last_task_used_math = NULL;
|
|
#else
|
|
if (!used_math())
|
|
return;
|
|
unlazy_fpu(current);
|
|
#endif
|
|
clear_fpu(task_pt_regs(current));
|
|
}
|
|
inline void do_fpu_context_switch(struct pt_regs *regs)
|
|
{
|
|
/* Enable to use FPU. */
|
|
|
|
if (!user_mode(regs)) {
|
|
pr_err("BUG: FPU is used in kernel mode.\n");
|
|
BUG();
|
|
return;
|
|
}
|
|
|
|
enable_ptreg_fpu(regs);
|
|
#ifdef CONFIG_LAZY_FPU //Lazy FPU is used
|
|
if (last_task_used_math == current)
|
|
return;
|
|
if (last_task_used_math != NULL)
|
|
/* Other processes fpu state, save away */
|
|
save_fpu(last_task_used_math);
|
|
last_task_used_math = current;
|
|
#endif
|
|
if (used_math()) {
|
|
load_fpu(¤t->thread.fpu);
|
|
} else {
|
|
/* First time FPU user. */
|
|
load_fpu(&init_fpuregs);
|
|
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
|
|
current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap;
|
|
#endif
|
|
set_used_math();
|
|
}
|
|
|
|
}
|
|
|
|
inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo)
|
|
{
|
|
if (fpcsr & FPCSR_mskOVFT)
|
|
*signo = FPE_FLTOVF;
|
|
#ifndef CONFIG_SUPPORT_DENORMAL_ARITHMETIC
|
|
else if (fpcsr & FPCSR_mskUDFT)
|
|
*signo = FPE_FLTUND;
|
|
#endif
|
|
else if (fpcsr & FPCSR_mskIVOT)
|
|
*signo = FPE_FLTINV;
|
|
else if (fpcsr & FPCSR_mskDBZT)
|
|
*signo = FPE_FLTDIV;
|
|
else if (fpcsr & FPCSR_mskIEXT)
|
|
*signo = FPE_FLTRES;
|
|
}
|
|
|
|
inline void handle_fpu_exception(struct pt_regs *regs)
|
|
{
|
|
unsigned int fpcsr;
|
|
int si_code = 0, si_signo = SIGFPE;
|
|
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
|
|
unsigned long redo_except = FPCSR_mskDNIT|FPCSR_mskUDFT;
|
|
#else
|
|
unsigned long redo_except = FPCSR_mskDNIT;
|
|
#endif
|
|
|
|
lose_fpu();
|
|
fpcsr = current->thread.fpu.fpcsr;
|
|
|
|
if (fpcsr & redo_except) {
|
|
#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
|
|
if (fpcsr & FPCSR_mskUDFT)
|
|
current->thread.fpu.fpcsr &= ~FPCSR_mskIEX;
|
|
#endif
|
|
si_signo = do_fpuemu(regs, ¤t->thread.fpu);
|
|
fpcsr = current->thread.fpu.fpcsr;
|
|
if (!si_signo)
|
|
goto done;
|
|
} else if (fpcsr & FPCSR_mskRIT) {
|
|
if (!user_mode(regs))
|
|
do_exit(SIGILL);
|
|
si_signo = SIGILL;
|
|
}
|
|
|
|
|
|
switch (si_signo) {
|
|
case SIGFPE:
|
|
fill_sigfpe_signo(fpcsr, &si_code);
|
|
break;
|
|
case SIGILL:
|
|
show_regs(regs);
|
|
si_code = ILL_COPROC;
|
|
break;
|
|
case SIGBUS:
|
|
si_code = BUS_ADRERR;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
force_sig_fault(si_signo, si_code,
|
|
(void __user *)instruction_pointer(regs), current);
|
|
done:
|
|
own_fpu();
|
|
}
|
|
|
|
bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs)
|
|
{
|
|
int done = true;
|
|
/* Coprocessor disabled exception */
|
|
if (subtype == FPU_DISABLE_EXCEPTION) {
|
|
preempt_disable();
|
|
do_fpu_context_switch(regs);
|
|
preempt_enable();
|
|
}
|
|
/* Coprocessor exception such as underflow and overflow */
|
|
else if (subtype == FPU_EXCEPTION)
|
|
handle_fpu_exception(regs);
|
|
else
|
|
done = false;
|
|
return done;
|
|
}
|