mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-13 16:26:23 +07:00
335aca5f65
From Nick's cover letter: Linux powerpc new system call instruction and ABI System Call Vectored (scv) ABI ============================== The scv instruction is introduced with POWER9 / ISA3, it comes with an rfscv counter-part. The benefit of these instructions is performance (trading slower SRR0/1 with faster LR/CTR registers, and entering the kernel with MSR[EE] and MSR[RI] left enabled, which can reduce MSR updates. The scv instruction has 128 levels (not enough to cover the Linux system call space). Assignment and advertisement ---------------------------- The proposal is to assign scv levels conservatively, and advertise them with HWCAP feature bits as we add support for more. Linux has not enabled FSCR[SCV] yet, so executing the scv instruction will cause the kernel to log a "SCV facility unavilable" message, and deliver a SIGILL with ILL_ILLOPC to the process. Linux has defined a HWCAP2 bit PPC_FEATURE2_SCV for SCV support, but does not set it. This change allocates the zero level ('scv 0'), advertised with PPC_FEATURE2_SCV, which will be used to provide normal Linux system calls (equivalent to 'sc'). Attempting to execute scv with other levels will cause a SIGILL to be delivered the same as before, but will not log a "SCV facility unavailable" message (because the processor facility is enabled). Calling convention ------------------ The proposal is for scv 0 to provide the standard Linux system call ABI with the following differences from sc convention[1]: - LR is to be volatile across scv calls. This is necessary because the scv instruction clobbers LR. From previous discussion, this should be possible to deal with in GCC clobbers and CFI. - cr1 and cr5-cr7 are volatile. This matches the C ABI and would allow the kernel system call exit to avoid restoring the volatile cr registers (although we probably still would anyway to avoid information leaks). - Error handling: The consensus among kernel, glibc, and musl is to move to using negative return values in r3 rather than CR0[SO]=1 to indicate error, which matches most other architectures, and is closer to a function call. Notes ----- - r0,r4-r8 are documented as volatile in the ABI, but the kernel patch as submitted currently preserves them. This is to leave room for deciding which way to go with these. Some small benefit was found by preserving them[1] but I'm not convinced it's worth deviating from the C function call ABI just for this. Release code should follow the ABI. Previous discussions: https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/208691.html https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/209268.html [1] https://github.com/torvalds/linux/blob/master/Documentation/powerpc/syscall64-abi.rst [2] https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/209263.html
183 lines
4.9 KiB
C
183 lines
4.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM
|
|
*/
|
|
#include <asm/inst.h>
|
|
|
|
struct pt_regs;
|
|
|
|
/*
|
|
* We don't allow single-stepping an mtmsrd that would clear
|
|
* MSR_RI, since that would make the exception unrecoverable.
|
|
* Since we need to single-step to proceed from a breakpoint,
|
|
* we don't allow putting a breakpoint on an mtmsrd instruction.
|
|
* Similarly we don't allow breakpoints on rfid instructions.
|
|
* These macros tell us if an instruction is a mtmsrd or rfid.
|
|
* Note that IS_MTMSRD returns true for both an mtmsr (32-bit)
|
|
* and an mtmsrd (64-bit).
|
|
*/
|
|
#define IS_MTMSRD(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x7c000124)
|
|
#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000024)
|
|
#define IS_RFI(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000064)
|
|
|
|
enum instruction_type {
|
|
COMPUTE, /* arith/logical/CR op, etc. */
|
|
LOAD, /* load and store types need to be contiguous */
|
|
LOAD_MULTI,
|
|
LOAD_FP,
|
|
LOAD_VMX,
|
|
LOAD_VSX,
|
|
STORE,
|
|
STORE_MULTI,
|
|
STORE_FP,
|
|
STORE_VMX,
|
|
STORE_VSX,
|
|
LARX,
|
|
STCX,
|
|
BRANCH,
|
|
MFSPR,
|
|
MTSPR,
|
|
CACHEOP,
|
|
BARRIER,
|
|
SYSCALL,
|
|
SYSCALL_VECTORED_0,
|
|
MFMSR,
|
|
MTMSR,
|
|
RFI,
|
|
INTERRUPT,
|
|
UNKNOWN
|
|
};
|
|
|
|
#define INSTR_TYPE_MASK 0x1f
|
|
|
|
#define OP_IS_LOAD(type) ((LOAD <= (type) && (type) <= LOAD_VSX) || (type) == LARX)
|
|
#define OP_IS_STORE(type) ((STORE <= (type) && (type) <= STORE_VSX) || (type) == STCX)
|
|
#define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX)
|
|
|
|
/* Compute flags, ORed in with type */
|
|
#define SETREG 0x20
|
|
#define SETCC 0x40
|
|
#define SETXER 0x80
|
|
|
|
/* Branch flags, ORed in with type */
|
|
#define SETLK 0x20
|
|
#define BRTAKEN 0x40
|
|
#define DECCTR 0x80
|
|
|
|
/* Load/store flags, ORed in with type */
|
|
#define SIGNEXT 0x20
|
|
#define UPDATE 0x40 /* matches bit in opcode 31 instructions */
|
|
#define BYTEREV 0x80
|
|
#define FPCONV 0x100
|
|
|
|
/* Barrier type field, ORed in with type */
|
|
#define BARRIER_MASK 0xe0
|
|
#define BARRIER_SYNC 0x00
|
|
#define BARRIER_ISYNC 0x20
|
|
#define BARRIER_EIEIO 0x40
|
|
#define BARRIER_LWSYNC 0x60
|
|
#define BARRIER_PTESYNC 0x80
|
|
|
|
/* Cacheop values, ORed in with type */
|
|
#define CACHEOP_MASK 0x700
|
|
#define DCBST 0
|
|
#define DCBF 0x100
|
|
#define DCBTST 0x200
|
|
#define DCBT 0x300
|
|
#define ICBI 0x400
|
|
#define DCBZ 0x500
|
|
|
|
/* VSX flags values */
|
|
#define VSX_FPCONV 1 /* do floating point SP/DP conversion */
|
|
#define VSX_SPLAT 2 /* store loaded value into all elements */
|
|
#define VSX_LDLEFT 4 /* load VSX register from left */
|
|
#define VSX_CHECK_VEC 8 /* check MSR_VEC not MSR_VSX for reg >= 32 */
|
|
|
|
/* Prefixed flag, ORed in with type */
|
|
#define PREFIXED 0x800
|
|
|
|
/* Size field in type word */
|
|
#define SIZE(n) ((n) << 12)
|
|
#define GETSIZE(w) ((w) >> 12)
|
|
|
|
#define GETTYPE(t) ((t) & INSTR_TYPE_MASK)
|
|
#define GETLENGTH(t) (((t) & PREFIXED) ? 8 : 4)
|
|
|
|
#define MKOP(t, f, s) ((t) | (f) | SIZE(s))
|
|
|
|
/* Prefix instruction operands */
|
|
#define GET_PREFIX_RA(i) (((i) >> 16) & 0x1f)
|
|
#define GET_PREFIX_R(i) ((i) & (1ul << 20))
|
|
|
|
extern s32 patch__exec_instr;
|
|
|
|
struct instruction_op {
|
|
int type;
|
|
int reg;
|
|
unsigned long val;
|
|
/* For LOAD/STORE/LARX/STCX */
|
|
unsigned long ea;
|
|
int update_reg;
|
|
/* For MFSPR */
|
|
int spr;
|
|
u32 ccval;
|
|
u32 xerval;
|
|
u8 element_size; /* for VSX/VMX loads/stores */
|
|
u8 vsx_flags;
|
|
};
|
|
|
|
union vsx_reg {
|
|
u8 b[16];
|
|
u16 h[8];
|
|
u32 w[4];
|
|
unsigned long d[2];
|
|
float fp[4];
|
|
double dp[2];
|
|
__vector128 v;
|
|
};
|
|
|
|
/*
|
|
* Decode an instruction, and return information about it in *op
|
|
* without changing *regs.
|
|
*
|
|
* Return value is 1 if the instruction can be emulated just by
|
|
* updating *regs with the information in *op, -1 if we need the
|
|
* GPRs but *regs doesn't contain the full register set, or 0
|
|
* otherwise.
|
|
*/
|
|
extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
|
|
struct ppc_inst instr);
|
|
|
|
/*
|
|
* Emulate an instruction that can be executed just by updating
|
|
* fields in *regs.
|
|
*/
|
|
void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op);
|
|
|
|
/*
|
|
* Emulate instructions that cause a transfer of control,
|
|
* arithmetic/logical instructions, loads and stores,
|
|
* cache operations and barriers.
|
|
*
|
|
* Returns 1 if the instruction was emulated successfully,
|
|
* 0 if it could not be emulated, or -1 for an instruction that
|
|
* should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.).
|
|
*/
|
|
extern int emulate_step(struct pt_regs *regs, struct ppc_inst instr);
|
|
|
|
/*
|
|
* Emulate a load or store instruction by reading/writing the
|
|
* memory of the current process. FP/VMX/VSX registers are assumed
|
|
* to hold live values if the appropriate enable bit in regs->msr is
|
|
* set; otherwise this will use the saved values in the thread struct
|
|
* for user-mode accesses.
|
|
*/
|
|
extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op);
|
|
|
|
extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
|
|
const void *mem, bool cross_endian);
|
|
extern void emulate_vsx_store(struct instruction_op *op,
|
|
const union vsx_reg *reg, void *mem,
|
|
bool cross_endian);
|
|
extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs);
|