linux_dsm_epyc7002/include/asm-xtensa/xtensa/coreasm.h
Chris Zankel e344b63eee [PATCH] xtensa: Architecture support for Tensilica Xtensa Part 7
The attached patches provides part 7 of an architecture implementation for the
Tensilica Xtensa CPU series.

Signed-off-by: Chris Zankel <chris@zankel.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 00:05:22 -07:00

527 lines
16 KiB
C

#ifndef XTENSA_COREASM_H
#define XTENSA_COREASM_H
/*
* THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
*
* include/asm-xtensa/xtensa/coreasm.h -- assembler-specific
* definitions that depend on CORE configuration.
*
* Source for configuration-independent binaries (which link in a
* configuration-specific HAL library) must NEVER include this file.
* It is perfectly normal, however, for the HAL itself to include this
* file.
*
* This file must NOT include xtensa/config/system.h. Any assembler
* header file that depends on system information should likely go in
* a new systemasm.h (or sysasm.h) header file.
*
* NOTE: macro beqi32 is NOT configuration-dependent, and is placed
* here til we will have configuration-independent header file.
*
* This file is subject to the terms and conditions of the GNU General
* Public License. See the file "COPYING" in the main directory of
* this archive for more details.
*
* Copyright (C) 2002 Tensilica Inc.
*/
#include <xtensa/config/core.h>
#include <xtensa/config/specreg.h>
/*
* Assembly-language specific definitions (assembly macros, etc.).
*/
/*----------------------------------------------------------------------
* find_ms_setbit
*
* This macro finds the most significant bit that is set in <as>
* and return its index + <base> in <ad>, or <base> - 1 if <as> is zero.
* The index counts starting at zero for the lsbit, so the return
* value ranges from <base>-1 (no bit set) to <base>+31 (msbit set).
*
* Parameters:
* <ad> destination address register (any register)
* <as> source address register
* <at> temporary address register (must be different than <as>)
* <base> constant value added to result (usually 0 or 1)
* On entry:
* <ad> = undefined if different than <as>
* <as> = value whose most significant set bit is to be found
* <at> = undefined
* no other registers are used by this macro.
* On exit:
* <ad> = <base> + index of msbit set in original <as>,
* = <base> - 1 if original <as> was zero.
* <as> clobbered (if not <ad>)
* <at> clobbered (if not <ad>)
* Example:
* find_ms_setbit a0, a4, a0, 0 -- return in a0 index of msbit set in a4
*/
.macro find_ms_setbit ad, as, at, base
#if XCHAL_HAVE_NSA
movi \at, 31+\base
nsau \as, \as // get index of \as, numbered from msbit (32 if absent)
sub \ad, \at, \as // get numbering from lsbit (0..31, -1 if absent)
#else /* XCHAL_HAVE_NSA */
movi \at, \base // start with result of 0 (point to lsbit of 32)
beqz \as, 2f // special case for zero argument: return -1
bltui \as, 0x10000, 1f // is it one of the 16 lsbits? (if so, check lower 16 bits)
addi \at, \at, 16 // no, increment result to upper 16 bits (of 32)
//srli \as, \as, 16 // check upper half (shift right 16 bits)
extui \as, \as, 16, 16 // check upper half (shift right 16 bits)
1: bltui \as, 0x100, 1f // is it one of the 8 lsbits? (if so, check lower 8 bits)
addi \at, \at, 8 // no, increment result to upper 8 bits (of 16)
srli \as, \as, 8 // shift right to check upper 8 bits
1: bltui \as, 0x10, 1f // is it one of the 4 lsbits? (if so, check lower 4 bits)
addi \at, \at, 4 // no, increment result to upper 4 bits (of 8)
srli \as, \as, 4 // shift right 4 bits to check upper half
1: bltui \as, 0x4, 1f // is it one of the 2 lsbits? (if so, check lower 2 bits)
addi \at, \at, 2 // no, increment result to upper 2 bits (of 4)
srli \as, \as, 2 // shift right 2 bits to check upper half
1: bltui \as, 0x2, 1f // is it the lsbit?
addi \at, \at, 2 // no, increment result to upper bit (of 2)
2: addi \at, \at, -1 // (from just above: add 1; from beqz: return -1)
//srli \as, \as, 1
1: // done! \at contains index of msbit set (or -1 if none set)
.if 0x\ad - 0x\at // destination different than \at ? (works because regs are a0-a15)
mov \ad, \at // then move result to \ad
.endif
#endif /* XCHAL_HAVE_NSA */
.endm // find_ms_setbit
/*----------------------------------------------------------------------
* find_ls_setbit
*
* This macro finds the least significant bit that is set in <as>,
* and return its index in <ad>.
* Usage is the same as for the find_ms_setbit macro.
* Example:
* find_ls_setbit a0, a4, a0, 0 -- return in a0 index of lsbit set in a4
*/
.macro find_ls_setbit ad, as, at, base
neg \at, \as // keep only the least-significant bit that is set...
and \as, \at, \as // ... in \as
find_ms_setbit \ad, \as, \at, \base
.endm // find_ls_setbit
/*----------------------------------------------------------------------
* find_ls_one
*
* Same as find_ls_setbit with base zero.
* Source (as) and destination (ad) registers must be different.
* Provided for backward compatibility.
*/
.macro find_ls_one ad, as
find_ls_setbit \ad, \as, \ad, 0
.endm // find_ls_one
/*----------------------------------------------------------------------
* floop, floopnez, floopgtz, floopend
*
* These macros are used for fast inner loops that
* work whether or not the Loops options is configured.
* If the Loops option is configured, they simply use
* the zero-overhead LOOP instructions; otherwise
* they use explicit decrement and branch instructions.
*
* They are used in pairs, with floop, floopnez or floopgtz
* at the beginning of the loop, and floopend at the end.
*
* Each pair of loop macro calls must be given the loop count
* address register and a unique label for that loop.
*
* Example:
*
* movi a3, 16 // loop 16 times
* floop a3, myloop1
* :
* bnez a7, end1 // exit loop if a7 != 0
* :
* floopend a3, myloop1
* end1:
*
* Like the LOOP instructions, these macros cannot be
* nested, must include at least one instruction,
* cannot call functions inside the loop, etc.
* The loop can be exited by jumping to the instruction
* following floopend (or elsewhere outside the loop),
* or continued by jumping to a NOP instruction placed
* immediately before floopend.
*
* Unlike LOOP instructions, the register passed to floop*
* cannot be used inside the loop, because it is used as
* the loop counter if the Loops option is not configured.
* And its value is undefined after exiting the loop.
* And because the loop counter register is active inside
* the loop, you can't easily use this construct to loop
* across a register file using ROTW as you might with LOOP
* instructions, unless you copy the loop register along.
*/
/* Named label version of the macros: */
.macro floop ar, endlabel
floop_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
.endm
.macro floopnez ar, endlabel
floopnez_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
.endm
.macro floopgtz ar, endlabel
floopgtz_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
.endm
.macro floopend ar, endlabel
floopend_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
.endm
/* Numbered local label version of the macros: */
#if 0 /*UNTESTED*/
.macro floop89 ar
floop_ \ar, 8, 9f
.endm
.macro floopnez89 ar
floopnez_ \ar, 8, 9f
.endm
.macro floopgtz89 ar
floopgtz_ \ar, 8, 9f
.endm
.macro floopend89 ar
floopend_ \ar, 8b, 9
.endm
#endif /*0*/
/* Underlying version of the macros: */
.macro floop_ ar, startlabel, endlabelref
.ifdef _infloop_
.if _infloop_
.err // Error: floop cannot be nested
.endif
.endif
.set _infloop_, 1
#if XCHAL_HAVE_LOOPS
loop \ar, \endlabelref
#else /* XCHAL_HAVE_LOOPS */
\startlabel:
addi \ar, \ar, -1
#endif /* XCHAL_HAVE_LOOPS */
.endm // floop_
.macro floopnez_ ar, startlabel, endlabelref
.ifdef _infloop_
.if _infloop_
.err // Error: floopnez cannot be nested
.endif
.endif
.set _infloop_, 1
#if XCHAL_HAVE_LOOPS
loopnez \ar, \endlabelref
#else /* XCHAL_HAVE_LOOPS */
beqz \ar, \endlabelref
\startlabel:
addi \ar, \ar, -1
#endif /* XCHAL_HAVE_LOOPS */
.endm // floopnez_
.macro floopgtz_ ar, startlabel, endlabelref
.ifdef _infloop_
.if _infloop_
.err // Error: floopgtz cannot be nested
.endif
.endif
.set _infloop_, 1
#if XCHAL_HAVE_LOOPS
loopgtz \ar, \endlabelref
#else /* XCHAL_HAVE_LOOPS */
bltz \ar, \endlabelref
beqz \ar, \endlabelref
\startlabel:
addi \ar, \ar, -1
#endif /* XCHAL_HAVE_LOOPS */
.endm // floopgtz_
.macro floopend_ ar, startlabelref, endlabel
.ifndef _infloop_
.err // Error: floopend without matching floopXXX
.endif
.ifeq _infloop_
.err // Error: floopend without matching floopXXX
.endif
.set _infloop_, 0
#if ! XCHAL_HAVE_LOOPS
bnez \ar, \startlabelref
#endif /* XCHAL_HAVE_LOOPS */
\endlabel:
.endm // floopend_
/*----------------------------------------------------------------------
* crsil -- conditional RSIL (read/set interrupt level)
*
* Executes the RSIL instruction if it exists, else just reads PS.
* The RSIL instruction does not exist in the new exception architecture
* if the interrupt option is not selected.
*/
.macro crsil ar, newlevel
#if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS
rsil \ar, \newlevel
#else
rsr \ar, PS
#endif
.endm // crsil
/*----------------------------------------------------------------------
* window_spill{4,8,12}
*
* These macros spill callers' register windows to the stack.
* They work for both privileged and non-privileged tasks.
* Must be called from a windowed ABI context, eg. within
* a windowed ABI function (ie. valid stack frame, window
* exceptions enabled, not in exception mode, etc).
*
* This macro requires a single invocation of the window_spill_common
* macro in the same assembly unit and section.
*
* Note that using window_spill{4,8,12} macros is more efficient
* than calling a function implemented using window_spill_function,
* because the latter needs extra code to figure out the size of
* the call to the spilling function.
*
* Example usage:
*
* .text
* .align 4
* .global some_function
* .type some_function,@function
* some_function:
* entry a1, 16
* :
* :
*
* window_spill4 // spill windows of some_function's callers; preserves a0..a3 only;
* // to use window_spill{8,12} in this example function we'd have
* // to increase space allocated by the entry instruction, because
* // 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed
* // for call8/window_spill8 or call12/window_spill12 respectively.
* :
*
* retw
*
* window_spill_common // instantiates code used by window_spill4
*
*
* On entry:
* none (if window_spill4)
* stack frame has enough space allocated for call8 (if window_spill8)
* stack frame has enough space allocated for call12 (if window_spill12)
* On exit:
* a4..a15 clobbered (if window_spill4)
* a8..a15 clobbered (if window_spill8)
* a12..a15 clobbered (if window_spill12)
* no caller windows are in live registers
*/
.macro window_spill4
#if XCHAL_HAVE_WINDOWED
# if XCHAL_NUM_AREGS == 16
movi a15, 0 // for 16-register files, no need to call to reach the end
# elif XCHAL_NUM_AREGS == 32
call4 .L__wdwspill_assist28 // call deep enough to clear out any live callers
# elif XCHAL_NUM_AREGS == 64
call4 .L__wdwspill_assist60 // call deep enough to clear out any live callers
# endif
#endif
.endm // window_spill4
.macro window_spill8
#if XCHAL_HAVE_WINDOWED
# if XCHAL_NUM_AREGS == 16
movi a15, 0 // for 16-register files, no need to call to reach the end
# elif XCHAL_NUM_AREGS == 32
call8 .L__wdwspill_assist24 // call deep enough to clear out any live callers
# elif XCHAL_NUM_AREGS == 64
call8 .L__wdwspill_assist56 // call deep enough to clear out any live callers
# endif
#endif
.endm // window_spill8
.macro window_spill12
#if XCHAL_HAVE_WINDOWED
# if XCHAL_NUM_AREGS == 16
movi a15, 0 // for 16-register files, no need to call to reach the end
# elif XCHAL_NUM_AREGS == 32
call12 .L__wdwspill_assist20 // call deep enough to clear out any live callers
# elif XCHAL_NUM_AREGS == 64
call12 .L__wdwspill_assist52 // call deep enough to clear out any live callers
# endif
#endif
.endm // window_spill12
/*----------------------------------------------------------------------
* window_spill_function
*
* This macro outputs a function that will spill its caller's callers'
* register windows to the stack. Eg. it could be used to implement
* a version of xthal_window_spill() that works in non-privileged tasks.
* This works for both privileged and non-privileged tasks.
*
* Typical usage:
*
* .text
* .align 4
* .global my_spill_function
* .type my_spill_function,@function
* my_spill_function:
* window_spill_function
*
* On entry to resulting function:
* none
* On exit from resulting function:
* none (no caller windows are in live registers)
*/
.macro window_spill_function
#if XCHAL_HAVE_WINDOWED
# if XCHAL_NUM_AREGS == 32
entry sp, 48
bbci.l a0, 31, 1f // branch if called with call4
bbsi.l a0, 30, 2f // branch if called with call12
call8 .L__wdwspill_assist16 // called with call8, only need another 8
retw
1: call12 .L__wdwspill_assist16 // called with call4, only need another 12
retw
2: call4 .L__wdwspill_assist16 // called with call12, only need another 4
retw
# elif XCHAL_NUM_AREGS == 64
entry sp, 48
bbci.l a0, 31, 1f // branch if called with call4
bbsi.l a0, 30, 2f // branch if called with call12
call4 .L__wdwspill_assist52 // called with call8, only need a call4
retw
1: call8 .L__wdwspill_assist52 // called with call4, only need a call8
retw
2: call12 .L__wdwspill_assist40 // called with call12, can skip a call12
retw
# elif XCHAL_NUM_AREGS == 16
entry sp, 16
bbci.l a0, 31, 1f // branch if called with call4
bbsi.l a0, 30, 2f // branch if called with call12
movi a7, 0 // called with call8
retw
1: movi a11, 0 // called with call4
2: retw // if called with call12, everything already spilled
// movi a15, 0 // trick to spill all but the direct caller
// j 1f
// // The entry instruction is magical in the assembler (gets auto-aligned)
// // so we have to jump to it to avoid falling through the padding.
// // We need entry/retw to know where to return.
//1: entry sp, 16
// retw
# else
# error "unrecognized address register file size"
# endif
#endif /* XCHAL_HAVE_WINDOWED */
window_spill_common
.endm // window_spill_function
/*----------------------------------------------------------------------
* window_spill_common
*
* Common code used by any number of invocations of the window_spill##
* and window_spill_function macros.
*
* Must be instantiated exactly once within a given assembly unit,
* within call/j range of and same section as window_spill##
* macro invocations for that assembly unit.
* (Is automatically instantiated by the window_spill_function macro.)
*/
.macro window_spill_common
#if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64)
.ifndef .L__wdwspill_defined
# if XCHAL_NUM_AREGS >= 64
.L__wdwspill_assist60:
entry sp, 32
call8 .L__wdwspill_assist52
retw
.L__wdwspill_assist56:
entry sp, 16
call4 .L__wdwspill_assist52
retw
.L__wdwspill_assist52:
entry sp, 48
call12 .L__wdwspill_assist40
retw
.L__wdwspill_assist40:
entry sp, 48
call12 .L__wdwspill_assist28
retw
# endif
.L__wdwspill_assist28:
entry sp, 48
call12 .L__wdwspill_assist16
retw
.L__wdwspill_assist24:
entry sp, 32
call8 .L__wdwspill_assist16
retw
.L__wdwspill_assist20:
entry sp, 16
call4 .L__wdwspill_assist16
retw
.L__wdwspill_assist16:
entry sp, 16
movi a15, 0
retw
.set .L__wdwspill_defined, 1
.endif
#endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */
.endm // window_spill_common
/*----------------------------------------------------------------------
* beqi32
*
* macro implements version of beqi for arbitrary 32-bit immidiate value
*
* beqi32 ax, ay, imm32, label
*
* Compares value in register ax with imm32 value and jumps to label if
* equal. Clobberes register ay if needed
*
*/
.macro beqi32 ax, ay, imm, label
.ifeq ((\imm-1) & ~7) // 1..8 ?
beqi \ax, \imm, \label
.else
.ifeq (\imm+1) // -1 ?
beqi \ax, \imm, \label
.else
.ifeq (\imm) // 0 ?
beqz \ax, \label
.else
// We could also handle immediates 10,12,16,32,64,128,256
// but it would be a long macro...
movi \ay, \imm
beq \ax, \ay, \label
.endif
.endif
.endif
.endm // beqi32
#endif /*XTENSA_COREASM_H*/