mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-14 15:56:08 +07:00
a7159a87a3
For many sun4v processor types, reading or writing a privileged register has a latency of 40 to 70 cycles. Use a combination of the low-latency allclean, otherw, normalw, and nop instructions in etrap and rtrap to replace 2 rdpr and 5 wrpr instructions and improve etrap/rtrap performance. allclean, otherw, and normalw are available on NG2 and later processors. The average ticks to execute the flush windows trap ("ta 0x3") with and without this patch on select platforms: CPU Not patched Patched % Latency Reduction NG2 1762 1558 -11.58 NG4 3619 3204 -11.47 M7 3015 2624 -12.97 SPARC64-X 829 770 -7.12 Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
259 lines
6.0 KiB
ArmAsm
259 lines
6.0 KiB
ArmAsm
/*
|
|
* etrap.S: Preparing for entry into the kernel on Sparc V9.
|
|
*
|
|
* Copyright (C) 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
|
|
* Copyright (C) 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
|
|
*/
|
|
|
|
|
|
#include <asm/asi.h>
|
|
#include <asm/pstate.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/page.h>
|
|
#include <asm/spitfire.h>
|
|
#include <asm/head.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/mmu.h>
|
|
|
|
#define TASK_REGOFF (THREAD_SIZE-TRACEREG_SZ-STACKFRAME_SZ)
|
|
#define ETRAP_PSTATE1 (PSTATE_TSO | PSTATE_PRIV)
|
|
#define ETRAP_PSTATE2 \
|
|
(PSTATE_TSO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE)
|
|
|
|
/*
|
|
* On entry, %g7 is return address - 0x4.
|
|
* %g4 and %g5 will be preserved %l4 and %l5 respectively.
|
|
*/
|
|
|
|
.text
|
|
.align 64
|
|
.globl etrap_syscall, etrap, etrap_irq, etraptl1
|
|
etrap: rdpr %pil, %g2
|
|
etrap_irq: clr %g3
|
|
etrap_syscall: TRAP_LOAD_THREAD_REG(%g6, %g1)
|
|
rdpr %tstate, %g1
|
|
or %g1, %g3, %g1
|
|
sllx %g2, 20, %g3
|
|
andcc %g1, TSTATE_PRIV, %g0
|
|
or %g1, %g3, %g1
|
|
bne,pn %xcc, 1f
|
|
sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2
|
|
661: wrpr %g0, 7, %cleanwin
|
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
|
.word 661b
|
|
.word 0x85880000 ! allclean
|
|
.previous
|
|
|
|
sethi %hi(TASK_REGOFF), %g2
|
|
sethi %hi(TSTATE_PEF), %g3
|
|
or %g2, %lo(TASK_REGOFF), %g2
|
|
and %g1, %g3, %g3
|
|
brnz,pn %g3, 1f
|
|
add %g6, %g2, %g2
|
|
wr %g0, 0, %fprs
|
|
1: rdpr %tpc, %g3
|
|
|
|
stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TSTATE]
|
|
rdpr %tnpc, %g1
|
|
stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC]
|
|
rd %y, %g3
|
|
stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC]
|
|
rdpr %tt, %g1
|
|
st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y]
|
|
sethi %hi(PT_REGS_MAGIC), %g3
|
|
or %g3, %g1, %g1
|
|
st %g1, [%g2 + STACKFRAME_SZ + PT_V9_MAGIC]
|
|
|
|
rdpr %cansave, %g1
|
|
brnz,pt %g1, etrap_save
|
|
nop
|
|
|
|
rdpr %cwp, %g1
|
|
add %g1, 2, %g1
|
|
wrpr %g1, %cwp
|
|
be,pt %xcc, etrap_user_spill
|
|
mov ASI_AIUP, %g3
|
|
|
|
rdpr %otherwin, %g3
|
|
brz %g3, etrap_kernel_spill
|
|
mov ASI_AIUS, %g3
|
|
|
|
etrap_user_spill:
|
|
|
|
wr %g3, 0x0, %asi
|
|
ldx [%g6 + TI_FLAGS], %g3
|
|
and %g3, _TIF_32BIT, %g3
|
|
brnz,pt %g3, etrap_user_spill_32bit
|
|
nop
|
|
ba,a,pt %xcc, etrap_user_spill_64bit
|
|
|
|
etrap_save: save %g2, -STACK_BIAS, %sp
|
|
mov %g6, %l6
|
|
|
|
bne,pn %xcc, 3f
|
|
mov PRIMARY_CONTEXT, %l4
|
|
661: rdpr %canrestore, %g3
|
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
|
.word 661b
|
|
nop
|
|
.previous
|
|
|
|
rdpr %wstate, %g2
|
|
661: wrpr %g0, 0, %canrestore
|
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
|
.word 661b
|
|
nop
|
|
.previous
|
|
sll %g2, 3, %g2
|
|
|
|
/* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR. */
|
|
mov 1, %l5
|
|
sth %l5, [%l6 + TI_SYS_NOERROR]
|
|
|
|
661: wrpr %g3, 0, %otherwin
|
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
|
.word 661b
|
|
.word 0x87880000 ! otherw
|
|
.previous
|
|
|
|
wrpr %g2, 0, %wstate
|
|
sethi %hi(sparc64_kern_pri_context), %g2
|
|
ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3
|
|
|
|
661: stxa %g3, [%l4] ASI_DMMU
|
|
.section .sun4v_1insn_patch, "ax"
|
|
.word 661b
|
|
stxa %g3, [%l4] ASI_MMU
|
|
.previous
|
|
|
|
sethi %hi(KERNBASE), %l4
|
|
flush %l4
|
|
mov ASI_AIUS, %l7
|
|
2: mov %g4, %l4
|
|
mov %g5, %l5
|
|
add %g7, 4, %l2
|
|
|
|
/* Go to trap time globals so we can save them. */
|
|
661: wrpr %g0, ETRAP_PSTATE1, %pstate
|
|
.section .sun4v_1insn_patch, "ax"
|
|
.word 661b
|
|
SET_GL(0)
|
|
.previous
|
|
|
|
stx %g1, [%sp + PTREGS_OFF + PT_V9_G1]
|
|
stx %g2, [%sp + PTREGS_OFF + PT_V9_G2]
|
|
sllx %l7, 24, %l7
|
|
stx %g3, [%sp + PTREGS_OFF + PT_V9_G3]
|
|
rdpr %cwp, %l0
|
|
stx %g4, [%sp + PTREGS_OFF + PT_V9_G4]
|
|
stx %g5, [%sp + PTREGS_OFF + PT_V9_G5]
|
|
stx %g6, [%sp + PTREGS_OFF + PT_V9_G6]
|
|
stx %g7, [%sp + PTREGS_OFF + PT_V9_G7]
|
|
or %l7, %l0, %l7
|
|
sethi %hi(TSTATE_TSO | TSTATE_PEF), %l0
|
|
or %l7, %l0, %l7
|
|
wrpr %l2, %tnpc
|
|
wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate
|
|
stx %i0, [%sp + PTREGS_OFF + PT_V9_I0]
|
|
stx %i1, [%sp + PTREGS_OFF + PT_V9_I1]
|
|
stx %i2, [%sp + PTREGS_OFF + PT_V9_I2]
|
|
stx %i3, [%sp + PTREGS_OFF + PT_V9_I3]
|
|
stx %i4, [%sp + PTREGS_OFF + PT_V9_I4]
|
|
stx %i5, [%sp + PTREGS_OFF + PT_V9_I5]
|
|
stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
|
|
mov %l6, %g6
|
|
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
|
|
LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1)
|
|
ldx [%g6 + TI_TASK], %g4
|
|
done
|
|
|
|
3: mov ASI_P, %l7
|
|
ldub [%l6 + TI_FPDEPTH], %l5
|
|
add %l6, TI_FPSAVED + 1, %l4
|
|
srl %l5, 1, %l3
|
|
add %l5, 2, %l5
|
|
|
|
/* Set TI_SYS_FPDEPTH to %l5 and clear TI_SYS_NOERROR. */
|
|
sth %l5, [%l6 + TI_SYS_NOERROR]
|
|
ba,pt %xcc, 2b
|
|
stb %g0, [%l4 + %l3]
|
|
nop
|
|
|
|
etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
|
|
* We place this right after pt_regs on the trap stack.
|
|
* The layout is:
|
|
* 0x00 TL1's TSTATE
|
|
* 0x08 TL1's TPC
|
|
* 0x10 TL1's TNPC
|
|
* 0x18 TL1's TT
|
|
* ...
|
|
* 0x58 TL4's TT
|
|
* 0x60 TL
|
|
*/
|
|
TRAP_LOAD_THREAD_REG(%g6, %g1)
|
|
sub %sp, ((4 * 8) * 4) + 8, %g2
|
|
rdpr %tl, %g1
|
|
|
|
wrpr %g0, 1, %tl
|
|
rdpr %tstate, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x00]
|
|
rdpr %tpc, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x08]
|
|
rdpr %tnpc, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x10]
|
|
rdpr %tt, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x18]
|
|
|
|
wrpr %g0, 2, %tl
|
|
rdpr %tstate, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x20]
|
|
rdpr %tpc, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x28]
|
|
rdpr %tnpc, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x30]
|
|
rdpr %tt, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x38]
|
|
|
|
sethi %hi(is_sun4v), %g3
|
|
lduw [%g3 + %lo(is_sun4v)], %g3
|
|
brnz,pn %g3, finish_tl1_capture
|
|
nop
|
|
|
|
wrpr %g0, 3, %tl
|
|
rdpr %tstate, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x40]
|
|
rdpr %tpc, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x48]
|
|
rdpr %tnpc, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x50]
|
|
rdpr %tt, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x58]
|
|
|
|
wrpr %g0, 4, %tl
|
|
rdpr %tstate, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x60]
|
|
rdpr %tpc, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x68]
|
|
rdpr %tnpc, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x70]
|
|
rdpr %tt, %g3
|
|
stx %g3, [%g2 + STACK_BIAS + 0x78]
|
|
|
|
stx %g1, [%g2 + STACK_BIAS + 0x80]
|
|
|
|
finish_tl1_capture:
|
|
wrpr %g0, 1, %tl
|
|
661: nop
|
|
.section .sun4v_1insn_patch, "ax"
|
|
.word 661b
|
|
SET_GL(1)
|
|
.previous
|
|
|
|
rdpr %tstate, %g1
|
|
sub %g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2
|
|
ba,pt %xcc, 1b
|
|
andcc %g1, TSTATE_PRIV, %g0
|
|
|
|
#undef TASK_REGOFF
|
|
#undef ETRAP_PSTATE1
|