mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 21:56:26 +07:00
5be8b70af1
The LSE atomics implementation uses runtime patching to patch in calls
to out of line non-LSE atomics implementations on cores that lack hardware
support for LSE. To avoid paying the overhead cost of a function call even
if no call ends up being made, the bl instruction is kept invisible to the
compiler, and the out of line implementations preserve all registers, not
just the ones that they are required to preserve as per the AAPCS64.
However, commit fd045f6cd9
("arm64: add support for module PLTs") added
support for routing branch instructions via veneers if the branch target
offset exceeds the range of the ordinary relative branch instructions.
Since this deals with jump and call instructions that are exposed to ELF
relocations, the PLT code uses x16 to hold the address of the branch target
when it performs an indirect branch-to-register, something which is
explicitly allowed by the AAPCS64 (and ordinary compiler generated code
does not expect register x16 or x17 to retain their values across a bl
instruction).
Since the lse runtime patched bl instructions don't adhere to the AAPCS64,
they don't deal with this clobbering of registers x16 and x17. So add them
to the clobber list of the asm() statements that perform the call
instructions, and drop x16 and x17 from the list of registers that are
callee saved in the out of line non-LSE implementations.
In addition, since we have given these functions two scratch registers,
they no longer need to stack/unstack temp registers.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: factored clobber list into #define, updated Makefile comment]
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
55 lines
1.2 KiB
C
55 lines
1.2 KiB
C
#ifndef __ASM_LSE_H
|
|
#define __ASM_LSE_H
|
|
|
|
#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
|
|
|
|
#include <linux/stringify.h>
|
|
#include <asm/alternative.h>
|
|
#include <asm/cpufeature.h>
|
|
|
|
#ifdef __ASSEMBLER__
|
|
|
|
.arch_extension lse
|
|
|
|
.macro alt_lse, llsc, lse
|
|
alternative_insn "\llsc", "\lse", ARM64_HAS_LSE_ATOMICS
|
|
.endm
|
|
|
|
#else /* __ASSEMBLER__ */
|
|
|
|
__asm__(".arch_extension lse");
|
|
|
|
/* Move the ll/sc atomics out-of-line */
|
|
#define __LL_SC_INLINE
|
|
#define __LL_SC_PREFIX(x) __ll_sc_##x
|
|
#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x))
|
|
|
|
/* Macro for constructing calls to out-of-line ll/sc atomics */
|
|
#define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
|
|
#define __LL_SC_CLOBBERS "x16", "x17", "x30"
|
|
|
|
/* In-line patching at runtime */
|
|
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \
|
|
ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS)
|
|
|
|
#endif /* __ASSEMBLER__ */
|
|
#else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
|
|
|
|
#ifdef __ASSEMBLER__
|
|
|
|
.macro alt_lse, llsc, lse
|
|
\llsc
|
|
.endm
|
|
|
|
#else /* __ASSEMBLER__ */
|
|
|
|
#define __LL_SC_INLINE static inline
|
|
#define __LL_SC_PREFIX(x) x
|
|
#define __LL_SC_EXPORT(x)
|
|
|
|
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
|
|
|
|
#endif /* __ASSEMBLER__ */
|
|
#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
|
|
#endif /* __ASM_LSE_H */
|