mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-16 04:07:05 +07:00
619b6e18fc
This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at=<reg> feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
113 lines
2.9 KiB
C
113 lines
2.9 KiB
C
/*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 1994 by Waldorf Electronics
|
|
* Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle
|
|
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
|
|
* Copyright (C) 2007 Maciej W. Rozycki
|
|
*/
|
|
#ifndef _ASM_DELAY_H
|
|
#define _ASM_DELAY_H
|
|
|
|
#include <linux/param.h>
|
|
#include <linux/smp.h>
|
|
|
|
#include <asm/compiler.h>
|
|
#include <asm/war.h>
|
|
|
|
static inline void __delay(unsigned long loops)
|
|
{
|
|
if (sizeof(long) == 4)
|
|
__asm__ __volatile__ (
|
|
" .set noreorder \n"
|
|
" .align 3 \n"
|
|
"1: bnez %0, 1b \n"
|
|
" subu %0, 1 \n"
|
|
" .set reorder \n"
|
|
: "=r" (loops)
|
|
: "0" (loops));
|
|
else if (sizeof(long) == 8 && !DADDI_WAR)
|
|
__asm__ __volatile__ (
|
|
" .set noreorder \n"
|
|
" .align 3 \n"
|
|
"1: bnez %0, 1b \n"
|
|
" dsubu %0, 1 \n"
|
|
" .set reorder \n"
|
|
: "=r" (loops)
|
|
: "0" (loops));
|
|
else if (sizeof(long) == 8 && DADDI_WAR)
|
|
__asm__ __volatile__ (
|
|
" .set noreorder \n"
|
|
" .align 3 \n"
|
|
"1: bnez %0, 1b \n"
|
|
" dsubu %0, %2 \n"
|
|
" .set reorder \n"
|
|
: "=r" (loops)
|
|
: "0" (loops), "r" (1));
|
|
}
|
|
|
|
|
|
/*
|
|
* Division by multiplication: you don't have to worry about
|
|
* loss of precision.
|
|
*
|
|
* Use only for very small delays ( < 1 msec). Should probably use a
|
|
* lookup table, really, as the multiplications take much too long with
|
|
* short delays. This is a "reasonable" implementation, though (and the
|
|
* first constant multiplications gets optimized away if the delay is
|
|
* a constant)
|
|
*/
|
|
|
|
static inline void __udelay(unsigned long usecs, unsigned long lpj)
|
|
{
|
|
unsigned long hi, lo;
|
|
|
|
/*
|
|
* The rates of 128 is rounded wrongly by the catchall case
|
|
* for 64-bit. Excessive precission? Probably ...
|
|
*/
|
|
#if defined(CONFIG_64BIT) && (HZ == 128)
|
|
usecs *= 0x0008637bd05af6c7UL; /* 2**64 / (1000000 / HZ) */
|
|
#elif defined(CONFIG_64BIT)
|
|
usecs *= (0x8000000000000000UL / (500000 / HZ));
|
|
#else /* 32-bit junk follows here */
|
|
usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) +
|
|
0x80000000ULL) >> 32);
|
|
#endif
|
|
|
|
if (sizeof(long) == 4)
|
|
__asm__("multu\t%2, %3"
|
|
: "=h" (usecs), "=l" (lo)
|
|
: "r" (usecs), "r" (lpj)
|
|
: GCC_REG_ACCUM);
|
|
else if (sizeof(long) == 8 && !R4000_WAR)
|
|
__asm__("dmultu\t%2, %3"
|
|
: "=h" (usecs), "=l" (lo)
|
|
: "r" (usecs), "r" (lpj)
|
|
: GCC_REG_ACCUM);
|
|
else if (sizeof(long) == 8 && R4000_WAR)
|
|
__asm__("dmultu\t%3, %4\n\tmfhi\t%0"
|
|
: "=r" (usecs), "=h" (hi), "=l" (lo)
|
|
: "r" (usecs), "r" (lpj)
|
|
: GCC_REG_ACCUM);
|
|
|
|
__delay(usecs);
|
|
}
|
|
|
|
#define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val
|
|
|
|
#define udelay(usecs) __udelay((usecs), __udelay_val)
|
|
|
|
/* make sure "usecs *= ..." in udelay do not overflow. */
|
|
#if HZ >= 1000
|
|
#define MAX_UDELAY_MS 1
|
|
#elif HZ <= 200
|
|
#define MAX_UDELAY_MS 5
|
|
#else
|
|
#define MAX_UDELAY_MS (1000 / HZ)
|
|
#endif
|
|
|
|
#endif /* _ASM_DELAY_H */
|