mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-25 11:44:22 +07:00
de79f7b9f6
This creates new 'thread_fp_state' and 'thread_vr_state' structures to store FP/VSX state (including FPSCR) and Altivec/VSX state (including VSCR), and uses them in the thread_struct. In the thread_fp_state, the FPRs and VSRs are represented as u64 rather than double, since we rarely perform floating-point computations on the values, and this will enable the structures to be used in KVM code as well. Similarly FPSCR is now a u64 rather than a structure of two 32-bit values. This takes the offsets out of the macros such as SAVE_32FPRS, REST_32FPRS, etc. This enables the same macros to be used for normal and transactional state, enabling us to delete the transactional versions of the macros. This also removes the unused do_load_up_fpu and do_load_up_altivec, which were in fact buggy since they didn't create large enough stack frames to account for the fact that load_up_fpu and load_up_altivec are not designed to be called from C and assume that their caller's stack frame is an interrupt frame. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
346 lines
8.4 KiB
C
346 lines
8.4 KiB
C
/*
|
|
* Routines to emulate some Altivec/VMX instructions, specifically
|
|
* those that can trap when given denormalized operands in Java mode.
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/sched.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/uaccess.h>
|
|
|
|
/* Functions in vector.S */
|
|
extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
|
|
extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
|
|
extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
|
|
extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
|
|
extern void vrefp(vector128 *dst, vector128 *src);
|
|
extern void vrsqrtefp(vector128 *dst, vector128 *src);
|
|
extern void vexptep(vector128 *dst, vector128 *src);
|
|
|
|
static unsigned int exp2s[8] = {
|
|
0x800000,
|
|
0x8b95c2,
|
|
0x9837f0,
|
|
0xa5fed7,
|
|
0xb504f3,
|
|
0xc5672a,
|
|
0xd744fd,
|
|
0xeac0c7
|
|
};
|
|
|
|
/*
|
|
* Computes an estimate of 2^x. The `s' argument is the 32-bit
|
|
* single-precision floating-point representation of x.
|
|
*/
|
|
static unsigned int eexp2(unsigned int s)
|
|
{
|
|
int exp, pwr;
|
|
unsigned int mant, frac;
|
|
|
|
/* extract exponent field from input */
|
|
exp = ((s >> 23) & 0xff) - 127;
|
|
if (exp > 7) {
|
|
/* check for NaN input */
|
|
if (exp == 128 && (s & 0x7fffff) != 0)
|
|
return s | 0x400000; /* return QNaN */
|
|
/* 2^-big = 0, 2^+big = +Inf */
|
|
return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
|
|
}
|
|
if (exp < -23)
|
|
return 0x3f800000; /* 1.0 */
|
|
|
|
/* convert to fixed point integer in 9.23 representation */
|
|
pwr = (s & 0x7fffff) | 0x800000;
|
|
if (exp > 0)
|
|
pwr <<= exp;
|
|
else
|
|
pwr >>= -exp;
|
|
if (s & 0x80000000)
|
|
pwr = -pwr;
|
|
|
|
/* extract integer part, which becomes exponent part of result */
|
|
exp = (pwr >> 23) + 126;
|
|
if (exp >= 254)
|
|
return 0x7f800000;
|
|
if (exp < -23)
|
|
return 0;
|
|
|
|
/* table lookup on top 3 bits of fraction to get mantissa */
|
|
mant = exp2s[(pwr >> 20) & 7];
|
|
|
|
/* linear interpolation using remaining 20 bits of fraction */
|
|
asm("mulhwu %0,%1,%2" : "=r" (frac)
|
|
: "r" (pwr << 12), "r" (0x172b83ff));
|
|
asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
|
|
mant += frac;
|
|
|
|
if (exp >= 0)
|
|
return mant + (exp << 23);
|
|
|
|
/* denormalized result */
|
|
exp = -exp;
|
|
mant += 1 << (exp - 1);
|
|
return mant >> exp;
|
|
}
|
|
|
|
/*
|
|
* Computes an estimate of log_2(x). The `s' argument is the 32-bit
|
|
* single-precision floating-point representation of x.
|
|
*/
|
|
static unsigned int elog2(unsigned int s)
|
|
{
|
|
int exp, mant, lz, frac;
|
|
|
|
exp = s & 0x7f800000;
|
|
mant = s & 0x7fffff;
|
|
if (exp == 0x7f800000) { /* Inf or NaN */
|
|
if (mant != 0)
|
|
s |= 0x400000; /* turn NaN into QNaN */
|
|
return s;
|
|
}
|
|
if ((exp | mant) == 0) /* +0 or -0 */
|
|
return 0xff800000; /* return -Inf */
|
|
|
|
if (exp == 0) {
|
|
/* denormalized */
|
|
asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
|
|
mant <<= lz - 8;
|
|
exp = (-118 - lz) << 23;
|
|
} else {
|
|
mant |= 0x800000;
|
|
exp -= 127 << 23;
|
|
}
|
|
|
|
if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
|
|
exp |= 0x400000; /* 0.5 * 2^23 */
|
|
asm("mulhwu %0,%1,%2" : "=r" (mant)
|
|
: "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
|
|
}
|
|
if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
|
|
exp |= 0x200000; /* 0.25 * 2^23 */
|
|
asm("mulhwu %0,%1,%2" : "=r" (mant)
|
|
: "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
|
|
}
|
|
if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
|
|
exp |= 0x100000; /* 0.125 * 2^23 */
|
|
asm("mulhwu %0,%1,%2" : "=r" (mant)
|
|
: "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
|
|
}
|
|
if (mant > 0x800000) { /* 1.0 * 2^23 */
|
|
/* calculate (mant - 1) * 1.381097463 */
|
|
/* 1.381097463 == 0.125 / (2^0.125 - 1) */
|
|
asm("mulhwu %0,%1,%2" : "=r" (frac)
|
|
: "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
|
|
exp += frac;
|
|
}
|
|
s = exp & 0x80000000;
|
|
if (exp != 0) {
|
|
if (s)
|
|
exp = -exp;
|
|
asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
|
|
lz = 8 - lz;
|
|
if (lz > 0)
|
|
exp >>= lz;
|
|
else if (lz < 0)
|
|
exp <<= -lz;
|
|
s += ((lz + 126) << 23) + exp;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
#define VSCR_SAT 1
|
|
|
|
static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
|
|
{
|
|
int exp, mant;
|
|
|
|
exp = (x >> 23) & 0xff;
|
|
mant = x & 0x7fffff;
|
|
if (exp == 255 && mant != 0)
|
|
return 0; /* NaN -> 0 */
|
|
exp = exp - 127 + scale;
|
|
if (exp < 0)
|
|
return 0; /* round towards zero */
|
|
if (exp >= 31) {
|
|
/* saturate, unless the result would be -2^31 */
|
|
if (x + (scale << 23) != 0xcf000000)
|
|
*vscrp |= VSCR_SAT;
|
|
return (x & 0x80000000)? 0x80000000: 0x7fffffff;
|
|
}
|
|
mant |= 0x800000;
|
|
mant = (mant << 7) >> (30 - exp);
|
|
return (x & 0x80000000)? -mant: mant;
|
|
}
|
|
|
|
static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
|
|
{
|
|
int exp;
|
|
unsigned int mant;
|
|
|
|
exp = (x >> 23) & 0xff;
|
|
mant = x & 0x7fffff;
|
|
if (exp == 255 && mant != 0)
|
|
return 0; /* NaN -> 0 */
|
|
exp = exp - 127 + scale;
|
|
if (exp < 0)
|
|
return 0; /* round towards zero */
|
|
if (x & 0x80000000) {
|
|
/* negative => saturate to 0 */
|
|
*vscrp |= VSCR_SAT;
|
|
return 0;
|
|
}
|
|
if (exp >= 32) {
|
|
/* saturate */
|
|
*vscrp |= VSCR_SAT;
|
|
return 0xffffffff;
|
|
}
|
|
mant |= 0x800000;
|
|
mant = (mant << 8) >> (31 - exp);
|
|
return mant;
|
|
}
|
|
|
|
/* Round to floating integer, towards 0 */
|
|
static unsigned int rfiz(unsigned int x)
|
|
{
|
|
int exp;
|
|
|
|
exp = ((x >> 23) & 0xff) - 127;
|
|
if (exp == 128 && (x & 0x7fffff) != 0)
|
|
return x | 0x400000; /* NaN -> make it a QNaN */
|
|
if (exp >= 23)
|
|
return x; /* it's an integer already (or Inf) */
|
|
if (exp < 0)
|
|
return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
|
|
return x & ~(0x7fffff >> exp);
|
|
}
|
|
|
|
/* Round to floating integer, towards +/- Inf */
|
|
static unsigned int rfii(unsigned int x)
|
|
{
|
|
int exp, mask;
|
|
|
|
exp = ((x >> 23) & 0xff) - 127;
|
|
if (exp == 128 && (x & 0x7fffff) != 0)
|
|
return x | 0x400000; /* NaN -> make it a QNaN */
|
|
if (exp >= 23)
|
|
return x; /* it's an integer already (or Inf) */
|
|
if ((x & 0x7fffffff) == 0)
|
|
return x; /* +/-0 -> +/-0 */
|
|
if (exp < 0)
|
|
/* 0 < |x| < 1.0 rounds to +/- 1.0 */
|
|
return (x & 0x80000000) | 0x3f800000;
|
|
mask = 0x7fffff >> exp;
|
|
/* mantissa overflows into exponent - that's OK,
|
|
it can't overflow into the sign bit */
|
|
return (x + mask) & ~mask;
|
|
}
|
|
|
|
/* Round to floating integer, to nearest */
|
|
static unsigned int rfin(unsigned int x)
|
|
{
|
|
int exp, half;
|
|
|
|
exp = ((x >> 23) & 0xff) - 127;
|
|
if (exp == 128 && (x & 0x7fffff) != 0)
|
|
return x | 0x400000; /* NaN -> make it a QNaN */
|
|
if (exp >= 23)
|
|
return x; /* it's an integer already (or Inf) */
|
|
if (exp < -1)
|
|
return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
|
|
if (exp == -1)
|
|
/* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
|
|
return (x & 0x80000000) | 0x3f800000;
|
|
half = 0x400000 >> exp;
|
|
/* add 0.5 to the magnitude and chop off the fraction bits */
|
|
return (x + half) & ~(0x7fffff >> exp);
|
|
}
|
|
|
|
int emulate_altivec(struct pt_regs *regs)
|
|
{
|
|
unsigned int instr, i;
|
|
unsigned int va, vb, vc, vd;
|
|
vector128 *vrs;
|
|
|
|
if (get_user(instr, (unsigned int __user *) regs->nip))
|
|
return -EFAULT;
|
|
if ((instr >> 26) != 4)
|
|
return -EINVAL; /* not an altivec instruction */
|
|
vd = (instr >> 21) & 0x1f;
|
|
va = (instr >> 16) & 0x1f;
|
|
vb = (instr >> 11) & 0x1f;
|
|
vc = (instr >> 6) & 0x1f;
|
|
|
|
vrs = current->thread.vr_state.vr;
|
|
switch (instr & 0x3f) {
|
|
case 10:
|
|
switch (vc) {
|
|
case 0: /* vaddfp */
|
|
vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
|
|
break;
|
|
case 1: /* vsubfp */
|
|
vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
|
|
break;
|
|
case 4: /* vrefp */
|
|
vrefp(&vrs[vd], &vrs[vb]);
|
|
break;
|
|
case 5: /* vrsqrtefp */
|
|
vrsqrtefp(&vrs[vd], &vrs[vb]);
|
|
break;
|
|
case 6: /* vexptefp */
|
|
for (i = 0; i < 4; ++i)
|
|
vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
|
|
break;
|
|
case 7: /* vlogefp */
|
|
for (i = 0; i < 4; ++i)
|
|
vrs[vd].u[i] = elog2(vrs[vb].u[i]);
|
|
break;
|
|
case 8: /* vrfin */
|
|
for (i = 0; i < 4; ++i)
|
|
vrs[vd].u[i] = rfin(vrs[vb].u[i]);
|
|
break;
|
|
case 9: /* vrfiz */
|
|
for (i = 0; i < 4; ++i)
|
|
vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
|
|
break;
|
|
case 10: /* vrfip */
|
|
for (i = 0; i < 4; ++i) {
|
|
u32 x = vrs[vb].u[i];
|
|
x = (x & 0x80000000)? rfiz(x): rfii(x);
|
|
vrs[vd].u[i] = x;
|
|
}
|
|
break;
|
|
case 11: /* vrfim */
|
|
for (i = 0; i < 4; ++i) {
|
|
u32 x = vrs[vb].u[i];
|
|
x = (x & 0x80000000)? rfii(x): rfiz(x);
|
|
vrs[vd].u[i] = x;
|
|
}
|
|
break;
|
|
case 14: /* vctuxs */
|
|
for (i = 0; i < 4; ++i)
|
|
vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
|
|
¤t->thread.vr_state.vscr.u[3]);
|
|
break;
|
|
case 15: /* vctsxs */
|
|
for (i = 0; i < 4; ++i)
|
|
vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
|
|
¤t->thread.vr_state.vscr.u[3]);
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
break;
|
|
case 46: /* vmaddfp */
|
|
vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
|
|
break;
|
|
case 47: /* vnmsubfp */
|
|
vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|