mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-19 05:56:11 +07:00
crypto: sha512-avx2 - Fix RBP usage
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Mix things up a little bit to get rid of the RBP usage, without hurting performance too much. Use RDI instead of RBP for the TBL pointer. That will clobber CTX, so spill CTX onto the stack and use R12 to read it in the outer loop. R12 is used as a non-persistent temporary variable elsewhere, so it's safe to use. Also remove the unused y4 variable. Reported-by: Eric Biggers <ebiggers3@gmail.com> Reported-by: Peter Zijlstra <peterz@infradead.org> Tested-by: Eric Biggers <ebiggers@google.com> Acked-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
539012dcbd
commit
ca04c82376
@ -69,8 +69,9 @@ XFER = YTMP0
|
||||
|
||||
BYTE_FLIP_MASK = %ymm9
|
||||
|
||||
# 1st arg
|
||||
CTX = %rdi
|
||||
# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
|
||||
CTX1 = %rdi
|
||||
CTX2 = %r12
|
||||
# 2nd arg
|
||||
INP = %rsi
|
||||
# 3rd arg
|
||||
@ -81,7 +82,7 @@ d = %r8
|
||||
e = %rdx
|
||||
y3 = %rsi
|
||||
|
||||
TBL = %rbp
|
||||
TBL = %rdi # clobbers CTX1
|
||||
|
||||
a = %rax
|
||||
b = %rbx
|
||||
@ -91,26 +92,26 @@ g = %r10
|
||||
h = %r11
|
||||
old_h = %r11
|
||||
|
||||
T1 = %r12
|
||||
T1 = %r12 # clobbers CTX2
|
||||
y0 = %r13
|
||||
y1 = %r14
|
||||
y2 = %r15
|
||||
|
||||
y4 = %r12
|
||||
|
||||
# Local variables (stack frame)
|
||||
XFER_SIZE = 4*8
|
||||
SRND_SIZE = 1*8
|
||||
INP_SIZE = 1*8
|
||||
INPEND_SIZE = 1*8
|
||||
CTX_SIZE = 1*8
|
||||
RSPSAVE_SIZE = 1*8
|
||||
GPRSAVE_SIZE = 6*8
|
||||
GPRSAVE_SIZE = 5*8
|
||||
|
||||
frame_XFER = 0
|
||||
frame_SRND = frame_XFER + XFER_SIZE
|
||||
frame_INP = frame_SRND + SRND_SIZE
|
||||
frame_INPEND = frame_INP + INP_SIZE
|
||||
frame_RSPSAVE = frame_INPEND + INPEND_SIZE
|
||||
frame_CTX = frame_INPEND + INPEND_SIZE
|
||||
frame_RSPSAVE = frame_CTX + CTX_SIZE
|
||||
frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
|
||||
frame_size = frame_GPRSAVE + GPRSAVE_SIZE
|
||||
|
||||
@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
|
||||
mov %rax, frame_RSPSAVE(%rsp)
|
||||
|
||||
# Save GPRs
|
||||
mov %rbp, frame_GPRSAVE(%rsp)
|
||||
mov %rbx, 8*1+frame_GPRSAVE(%rsp)
|
||||
mov %r12, 8*2+frame_GPRSAVE(%rsp)
|
||||
mov %r13, 8*3+frame_GPRSAVE(%rsp)
|
||||
mov %r14, 8*4+frame_GPRSAVE(%rsp)
|
||||
mov %r15, 8*5+frame_GPRSAVE(%rsp)
|
||||
mov %rbx, 8*0+frame_GPRSAVE(%rsp)
|
||||
mov %r12, 8*1+frame_GPRSAVE(%rsp)
|
||||
mov %r13, 8*2+frame_GPRSAVE(%rsp)
|
||||
mov %r14, 8*3+frame_GPRSAVE(%rsp)
|
||||
mov %r15, 8*4+frame_GPRSAVE(%rsp)
|
||||
|
||||
shl $7, NUM_BLKS # convert to bytes
|
||||
jz done_hash
|
||||
@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
|
||||
mov NUM_BLKS, frame_INPEND(%rsp)
|
||||
|
||||
## load initial digest
|
||||
mov 8*0(CTX),a
|
||||
mov 8*1(CTX),b
|
||||
mov 8*2(CTX),c
|
||||
mov 8*3(CTX),d
|
||||
mov 8*4(CTX),e
|
||||
mov 8*5(CTX),f
|
||||
mov 8*6(CTX),g
|
||||
mov 8*7(CTX),h
|
||||
mov 8*0(CTX1), a
|
||||
mov 8*1(CTX1), b
|
||||
mov 8*2(CTX1), c
|
||||
mov 8*3(CTX1), d
|
||||
mov 8*4(CTX1), e
|
||||
mov 8*5(CTX1), f
|
||||
mov 8*6(CTX1), g
|
||||
mov 8*7(CTX1), h
|
||||
|
||||
# save %rdi (CTX) before it gets clobbered
|
||||
mov %rdi, frame_CTX(%rsp)
|
||||
|
||||
vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
|
||||
|
||||
@ -652,14 +655,15 @@ loop2:
|
||||
subq $1, frame_SRND(%rsp)
|
||||
jne loop2
|
||||
|
||||
addm 8*0(CTX),a
|
||||
addm 8*1(CTX),b
|
||||
addm 8*2(CTX),c
|
||||
addm 8*3(CTX),d
|
||||
addm 8*4(CTX),e
|
||||
addm 8*5(CTX),f
|
||||
addm 8*6(CTX),g
|
||||
addm 8*7(CTX),h
|
||||
mov frame_CTX(%rsp), CTX2
|
||||
addm 8*0(CTX2), a
|
||||
addm 8*1(CTX2), b
|
||||
addm 8*2(CTX2), c
|
||||
addm 8*3(CTX2), d
|
||||
addm 8*4(CTX2), e
|
||||
addm 8*5(CTX2), f
|
||||
addm 8*6(CTX2), g
|
||||
addm 8*7(CTX2), h
|
||||
|
||||
mov frame_INP(%rsp), INP
|
||||
add $128, INP
|
||||
@ -669,12 +673,11 @@ loop2:
|
||||
done_hash:
|
||||
|
||||
# Restore GPRs
|
||||
mov frame_GPRSAVE(%rsp) ,%rbp
|
||||
mov 8*1+frame_GPRSAVE(%rsp) ,%rbx
|
||||
mov 8*2+frame_GPRSAVE(%rsp) ,%r12
|
||||
mov 8*3+frame_GPRSAVE(%rsp) ,%r13
|
||||
mov 8*4+frame_GPRSAVE(%rsp) ,%r14
|
||||
mov 8*5+frame_GPRSAVE(%rsp) ,%r15
|
||||
mov 8*0+frame_GPRSAVE(%rsp), %rbx
|
||||
mov 8*1+frame_GPRSAVE(%rsp), %r12
|
||||
mov 8*2+frame_GPRSAVE(%rsp), %r13
|
||||
mov 8*3+frame_GPRSAVE(%rsp), %r14
|
||||
mov 8*4+frame_GPRSAVE(%rsp), %r15
|
||||
|
||||
# Restore Stack Pointer
|
||||
mov frame_RSPSAVE(%rsp), %rsp
|
||||
|
Loading…
Reference in New Issue
Block a user