linux_dsm_epyc7002/arch/mips/math-emu/sp_rint.c

/* IEEE754 floating point arithmetic
 * single precision
 */
/*
 * MIPS floating point support
 * Copyright (C) 1994-2000 Algorithmics Ltd.
 * Copyright (C) 2017 Imagination Technologies, Ltd.
 * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
 *
 *  This program is free software; you can distribute it and/or modify it
 *  under the terms of the GNU General Public License (Version 2) as
 *  published by the Free Software Foundation.
 *
 *  This program is distributed in the hope it will be useful, but WITHOUT
 *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program.
 */

#include "ieee754sp.h"

union ieee754sp ieee754sp_rint(union ieee754sp x)
{
	union ieee754sp ret;
	u32 residue;
	int sticky;
	int round;
	int odd;

	COMPXDP;		/* <-- DP needed for 64-bit mantissa tmp */

	ieee754_clearcx();

	EXPLODEXSP;
	FLUSHXSP;

	if (xc == IEEE754_CLASS_SNAN)
		return ieee754sp_nanxcpt(x);

	if ((xc == IEEE754_CLASS_QNAN) ||
	    (xc == IEEE754_CLASS_INF) ||
	    (xc == IEEE754_CLASS_ZERO))
		return x;

	if (xe >= SP_FBITS)
		return x;

	if (xe < -1) {
		residue = xm;
		round = 0;
		sticky = residue != 0;
		xm = 0;
	} else {
		residue = xm << (xe + 1);
		residue <<= 31 - SP_FBITS;
		round = (residue >> 31) != 0;
		sticky = (residue << 1) != 0;
		xm >>= SP_FBITS - xe;
	}

	odd = (xm & 0x1) != 0x0;

	switch (ieee754_csr.rm) {
	case FPU_CSR_RN:	/* toward nearest */
		if (round && (sticky || odd))
			xm++;
		break;
	case FPU_CSR_RZ:	/* toward zero */
		break;
	case FPU_CSR_RU:	/* toward +infinity */
		if ((round || sticky) && !xs)
			xm++;
		break;
	case FPU_CSR_RD:	/* toward -infinity */
		if ((round || sticky) && xs)
			xm++;
		break;
	}

	if (round || sticky)
		ieee754_setcx(IEEE754_INEXACT);

	ret = ieee754sp_flong(xm);
	SPSIGN(ret) = xs;

	return ret;
}
MIPS: math-emu: RINT.<D\|S>: Fix several problems by reimplementation Reimplement RINT.<D\|S> kernel emulation so that all RINT.<D\|S> specifications are met. For the sake of simplicity, let's analyze RINT.S only. Prior to this patch, RINT.S emulation was essentially implemented as (in pseudocode) <output> = ieee754sp_flong(ieee754sp_tlong(<input>)), where ieee754sp_tlong() and ieee754sp_flong() are functions providing conversion from double to integer, and from integer to double, respectively. On surface, this implementation looks correct, but actually fails in many cases. Following problems were detected: 1. NaN and infinity cases will not be handled properly. The function ieee754sp_flong() never returns NaN nor infinity. 2. For RINT.S, for all inputs larger than LONG_MAX, and smaller than FLT_MAX, the result will be wrong, and the overflow exception will be erroneously set. A similar problem for negative inputs exists as well. 3. For some rounding modes, for some negative inputs close to zero, the return value will be zero, and should be -zero. This is because ieee754sp_flong() never returns -zero. This patch removes the problems above by implementing dedicated functions for RINT.<D\|S> emulation. The core of the new function functionality is adapted version of the core of the function ieee754sp_tlong(). However, there are many details that are implemented to match RINT.<D\|S> specification. It should be said that the functionality of ieee754sp_tlong() actually closely corresponds to CVT.L.S instruction, and it is used while emulating CVT.L.S. However, RINT.S and CVT.L.S instructions differ in many aspects. This patch fulfills missing support for RINT.<D\|S>. Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com> Signed-off-by: Goran Ferenc <goran.ferenc@imgtec.com> Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com> Cc: David S. Miller <davem@davemloft.net> Cc: Douglas Leung <douglas.leung@imgtec.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Hans Verkuil <hans.verkuil@cisco.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Maciej W. Rozycki <macro@imgtec.com> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Mauro Carvalho Chehab <mchehab@kernel.org> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Petar Jovanovic <petar.jovanovic@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17141/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> 2017-08-21 19:24:48 +07:00			`/* IEEE754 floating point arithmetic`
			`* single precision`
			`*/`
			`/*`
			`* MIPS floating point support`
			`* Copyright (C) 1994-2000 Algorithmics Ltd.`
			`* Copyright (C) 2017 Imagination Technologies, Ltd.`
			`* Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>`
			`*`
			`* This program is free software; you can distribute it and/or modify it`
			`* under the terms of the GNU General Public License (Version 2) as`
			`* published by the Free Software Foundation.`
			`*`
			`* This program is distributed in the hope it will be useful, but WITHOUT`
			`* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or`
			`* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
			`* for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License along`
			`* with this program.`
			`*/`

			`#include "ieee754sp.h"`

			`union ieee754sp ieee754sp_rint(union ieee754sp x)`
			`{`
			`union ieee754sp ret;`
			`u32 residue;`
			`int sticky;`
			`int round;`
			`int odd;`

			`COMPXDP; /* <-- DP needed for 64-bit mantissa tmp */`

			`ieee754_clearcx();`

			`EXPLODEXSP;`
			`FLUSHXSP;`

			`if (xc == IEEE754_CLASS_SNAN)`
			`return ieee754sp_nanxcpt(x);`

			`if ((xc == IEEE754_CLASS_QNAN) \|\|`
			`(xc == IEEE754_CLASS_INF) \|\|`
			`(xc == IEEE754_CLASS_ZERO))`
			`return x;`

			`if (xe >= SP_FBITS)`
			`return x;`

			`if (xe < -1) {`
			`residue = xm;`
			`round = 0;`
			`sticky = residue != 0;`
			`xm = 0;`
			`} else {`
			`residue = xm << (xe + 1);`
			`residue <<= 31 - SP_FBITS;`
			`round = (residue >> 31) != 0;`
			`sticky = (residue << 1) != 0;`
			`xm >>= SP_FBITS - xe;`
			`}`

			`odd = (xm & 0x1) != 0x0;`

			`switch (ieee754_csr.rm) {`
			`case FPU_CSR_RN: /* toward nearest */`
			`if (round && (sticky \|\| odd))`
			`xm++;`
			`break;`
			`case FPU_CSR_RZ: /* toward zero */`
			`break;`
			`case FPU_CSR_RU: /* toward +infinity */`
			`if ((round \|\| sticky) && !xs)`
			`xm++;`
			`break;`
			`case FPU_CSR_RD: /* toward -infinity */`
			`if ((round \|\| sticky) && xs)`
			`xm++;`
			`break;`
			`}`

			`if (round \|\| sticky)`
			`ieee754_setcx(IEEE754_INEXACT);`

			`ret = ieee754sp_flong(xm);`
			`SPSIGN(ret) = xs;`

			`return ret;`
			`}`