mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
74d8b90a88
Use the newly added SYM_FUNC_START_LOCAL to annotate beginnings of all functions which do not have ".globl" annotation, but their endings are annotated by ENDPROC. This is needed to balance ENDPROC for tools that generate debuginfo. These function names are not prepended with ".L" as they might appear in call traces and they wouldn't be visible after such change. To be symmetric, the functions' ENDPROCs are converted to the new SYM_FUNC_END. Signed-off-by: Jiri Slaby <jslaby@suse.cz> Signed-off-by: Borislav Petkov <bp@suse.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: linux-arch@vger.kernel.org Cc: linux-crypto@vger.kernel.org Cc: Thomas Gleixner <tglx@linutronix.de> Cc: x86-ml <x86@kernel.org> Link: https://lkml.kernel.org/r/20191011115108.12392-7-jslaby@suse.cz
748 lines
14 KiB
ArmAsm
748 lines
14 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* AES-NI + SSE2 implementation of AEGIS-128
|
|
*
|
|
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
|
|
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/frame.h>
|
|
|
|
#define STATE0 %xmm0
|
|
#define STATE1 %xmm1
|
|
#define STATE2 %xmm2
|
|
#define STATE3 %xmm3
|
|
#define STATE4 %xmm4
|
|
#define KEY %xmm5
|
|
#define MSG %xmm5
|
|
#define T0 %xmm6
|
|
#define T1 %xmm7
|
|
|
|
#define STATEP %rdi
|
|
#define LEN %rsi
|
|
#define SRC %rdx
|
|
#define DST %rcx
|
|
|
|
.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
|
|
.align 16
|
|
.Laegis128_const_0:
|
|
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
|
|
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
|
|
.Laegis128_const_1:
|
|
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
|
|
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
|
|
|
|
.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
|
|
.align 16
|
|
.Laegis128_counter:
|
|
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
|
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
|
|
|
.text
|
|
|
|
/*
|
|
* aegis128_update
|
|
* input:
|
|
* STATE[0-4] - input state
|
|
* output:
|
|
* STATE[0-4] - output state (shifted positions)
|
|
* changed:
|
|
* T0
|
|
*/
|
|
.macro aegis128_update
|
|
movdqa STATE4, T0
|
|
aesenc STATE0, STATE4
|
|
aesenc STATE1, STATE0
|
|
aesenc STATE2, STATE1
|
|
aesenc STATE3, STATE2
|
|
aesenc T0, STATE3
|
|
.endm
|
|
|
|
/*
|
|
* __load_partial: internal ABI
|
|
* input:
|
|
* LEN - bytes
|
|
* SRC - src
|
|
* output:
|
|
* MSG - message block
|
|
* changed:
|
|
* T0
|
|
* %r8
|
|
* %r9
|
|
*/
|
|
SYM_FUNC_START_LOCAL(__load_partial)
|
|
xor %r9d, %r9d
|
|
pxor MSG, MSG
|
|
|
|
mov LEN, %r8
|
|
and $0x1, %r8
|
|
jz .Lld_partial_1
|
|
|
|
mov LEN, %r8
|
|
and $0x1E, %r8
|
|
add SRC, %r8
|
|
mov (%r8), %r9b
|
|
|
|
.Lld_partial_1:
|
|
mov LEN, %r8
|
|
and $0x2, %r8
|
|
jz .Lld_partial_2
|
|
|
|
mov LEN, %r8
|
|
and $0x1C, %r8
|
|
add SRC, %r8
|
|
shl $0x10, %r9
|
|
mov (%r8), %r9w
|
|
|
|
.Lld_partial_2:
|
|
mov LEN, %r8
|
|
and $0x4, %r8
|
|
jz .Lld_partial_4
|
|
|
|
mov LEN, %r8
|
|
and $0x18, %r8
|
|
add SRC, %r8
|
|
shl $32, %r9
|
|
mov (%r8), %r8d
|
|
xor %r8, %r9
|
|
|
|
.Lld_partial_4:
|
|
movq %r9, MSG
|
|
|
|
mov LEN, %r8
|
|
and $0x8, %r8
|
|
jz .Lld_partial_8
|
|
|
|
mov LEN, %r8
|
|
and $0x10, %r8
|
|
add SRC, %r8
|
|
pslldq $8, MSG
|
|
movq (%r8), T0
|
|
pxor T0, MSG
|
|
|
|
.Lld_partial_8:
|
|
ret
|
|
SYM_FUNC_END(__load_partial)
|
|
|
|
/*
|
|
* __store_partial: internal ABI
|
|
* input:
|
|
* LEN - bytes
|
|
* DST - dst
|
|
* output:
|
|
* T0 - message block
|
|
* changed:
|
|
* %r8
|
|
* %r9
|
|
* %r10
|
|
*/
|
|
SYM_FUNC_START_LOCAL(__store_partial)
|
|
mov LEN, %r8
|
|
mov DST, %r9
|
|
|
|
movq T0, %r10
|
|
|
|
cmp $8, %r8
|
|
jl .Lst_partial_8
|
|
|
|
mov %r10, (%r9)
|
|
psrldq $8, T0
|
|
movq T0, %r10
|
|
|
|
sub $8, %r8
|
|
add $8, %r9
|
|
|
|
.Lst_partial_8:
|
|
cmp $4, %r8
|
|
jl .Lst_partial_4
|
|
|
|
mov %r10d, (%r9)
|
|
shr $32, %r10
|
|
|
|
sub $4, %r8
|
|
add $4, %r9
|
|
|
|
.Lst_partial_4:
|
|
cmp $2, %r8
|
|
jl .Lst_partial_2
|
|
|
|
mov %r10w, (%r9)
|
|
shr $0x10, %r10
|
|
|
|
sub $2, %r8
|
|
add $2, %r9
|
|
|
|
.Lst_partial_2:
|
|
cmp $1, %r8
|
|
jl .Lst_partial_1
|
|
|
|
mov %r10b, (%r9)
|
|
|
|
.Lst_partial_1:
|
|
ret
|
|
SYM_FUNC_END(__store_partial)
|
|
|
|
/*
|
|
* void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
|
|
*/
|
|
ENTRY(crypto_aegis128_aesni_init)
|
|
FRAME_BEGIN
|
|
|
|
/* load IV: */
|
|
movdqu (%rdx), T1
|
|
|
|
/* load key: */
|
|
movdqa (%rsi), KEY
|
|
pxor KEY, T1
|
|
movdqa T1, STATE0
|
|
movdqa KEY, STATE3
|
|
movdqa KEY, STATE4
|
|
|
|
/* load the constants: */
|
|
movdqa .Laegis128_const_0, STATE2
|
|
movdqa .Laegis128_const_1, STATE1
|
|
pxor STATE2, STATE3
|
|
pxor STATE1, STATE4
|
|
|
|
/* update 10 times with KEY / KEY xor IV: */
|
|
aegis128_update; pxor KEY, STATE4
|
|
aegis128_update; pxor T1, STATE3
|
|
aegis128_update; pxor KEY, STATE2
|
|
aegis128_update; pxor T1, STATE1
|
|
aegis128_update; pxor KEY, STATE0
|
|
aegis128_update; pxor T1, STATE4
|
|
aegis128_update; pxor KEY, STATE3
|
|
aegis128_update; pxor T1, STATE2
|
|
aegis128_update; pxor KEY, STATE1
|
|
aegis128_update; pxor T1, STATE0
|
|
|
|
/* store the state: */
|
|
movdqu STATE0, 0x00(STATEP)
|
|
movdqu STATE1, 0x10(STATEP)
|
|
movdqu STATE2, 0x20(STATEP)
|
|
movdqu STATE3, 0x30(STATEP)
|
|
movdqu STATE4, 0x40(STATEP)
|
|
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128_aesni_init)
|
|
|
|
/*
|
|
* void crypto_aegis128_aesni_ad(void *state, unsigned int length,
|
|
* const void *data);
|
|
*/
|
|
ENTRY(crypto_aegis128_aesni_ad)
|
|
FRAME_BEGIN
|
|
|
|
cmp $0x10, LEN
|
|
jb .Lad_out
|
|
|
|
/* load the state: */
|
|
movdqu 0x00(STATEP), STATE0
|
|
movdqu 0x10(STATEP), STATE1
|
|
movdqu 0x20(STATEP), STATE2
|
|
movdqu 0x30(STATEP), STATE3
|
|
movdqu 0x40(STATEP), STATE4
|
|
|
|
mov SRC, %r8
|
|
and $0xF, %r8
|
|
jnz .Lad_u_loop
|
|
|
|
.align 8
|
|
.Lad_a_loop:
|
|
movdqa 0x00(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE4
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_1
|
|
|
|
movdqa 0x10(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE3
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_2
|
|
|
|
movdqa 0x20(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE2
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_3
|
|
|
|
movdqa 0x30(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE1
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_4
|
|
|
|
movdqa 0x40(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE0
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_0
|
|
|
|
add $0x50, SRC
|
|
jmp .Lad_a_loop
|
|
|
|
.align 8
|
|
.Lad_u_loop:
|
|
movdqu 0x00(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE4
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_1
|
|
|
|
movdqu 0x10(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE3
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_2
|
|
|
|
movdqu 0x20(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE2
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_3
|
|
|
|
movdqu 0x30(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE1
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_4
|
|
|
|
movdqu 0x40(SRC), MSG
|
|
aegis128_update
|
|
pxor MSG, STATE0
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lad_out_0
|
|
|
|
add $0x50, SRC
|
|
jmp .Lad_u_loop
|
|
|
|
/* store the state: */
|
|
.Lad_out_0:
|
|
movdqu STATE0, 0x00(STATEP)
|
|
movdqu STATE1, 0x10(STATEP)
|
|
movdqu STATE2, 0x20(STATEP)
|
|
movdqu STATE3, 0x30(STATEP)
|
|
movdqu STATE4, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_1:
|
|
movdqu STATE4, 0x00(STATEP)
|
|
movdqu STATE0, 0x10(STATEP)
|
|
movdqu STATE1, 0x20(STATEP)
|
|
movdqu STATE2, 0x30(STATEP)
|
|
movdqu STATE3, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_2:
|
|
movdqu STATE3, 0x00(STATEP)
|
|
movdqu STATE4, 0x10(STATEP)
|
|
movdqu STATE0, 0x20(STATEP)
|
|
movdqu STATE1, 0x30(STATEP)
|
|
movdqu STATE2, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_3:
|
|
movdqu STATE2, 0x00(STATEP)
|
|
movdqu STATE3, 0x10(STATEP)
|
|
movdqu STATE4, 0x20(STATEP)
|
|
movdqu STATE0, 0x30(STATEP)
|
|
movdqu STATE1, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out_4:
|
|
movdqu STATE1, 0x00(STATEP)
|
|
movdqu STATE2, 0x10(STATEP)
|
|
movdqu STATE3, 0x20(STATEP)
|
|
movdqu STATE4, 0x30(STATEP)
|
|
movdqu STATE0, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lad_out:
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128_aesni_ad)
|
|
|
|
.macro encrypt_block a s0 s1 s2 s3 s4 i
|
|
movdq\a (\i * 0x10)(SRC), MSG
|
|
movdqa MSG, T0
|
|
pxor \s1, T0
|
|
pxor \s4, T0
|
|
movdqa \s2, T1
|
|
pand \s3, T1
|
|
pxor T1, T0
|
|
movdq\a T0, (\i * 0x10)(DST)
|
|
|
|
aegis128_update
|
|
pxor MSG, \s4
|
|
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Lenc_out_\i
|
|
.endm
|
|
|
|
/*
|
|
* void crypto_aegis128_aesni_enc(void *state, unsigned int length,
|
|
* const void *src, void *dst);
|
|
*/
|
|
ENTRY(crypto_aegis128_aesni_enc)
|
|
FRAME_BEGIN
|
|
|
|
cmp $0x10, LEN
|
|
jb .Lenc_out
|
|
|
|
/* load the state: */
|
|
movdqu 0x00(STATEP), STATE0
|
|
movdqu 0x10(STATEP), STATE1
|
|
movdqu 0x20(STATEP), STATE2
|
|
movdqu 0x30(STATEP), STATE3
|
|
movdqu 0x40(STATEP), STATE4
|
|
|
|
mov SRC, %r8
|
|
or DST, %r8
|
|
and $0xF, %r8
|
|
jnz .Lenc_u_loop
|
|
|
|
.align 8
|
|
.Lenc_a_loop:
|
|
encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
|
|
encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
|
|
encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
|
|
encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
|
|
encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
|
|
|
|
add $0x50, SRC
|
|
add $0x50, DST
|
|
jmp .Lenc_a_loop
|
|
|
|
.align 8
|
|
.Lenc_u_loop:
|
|
encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
|
|
encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
|
|
encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
|
|
encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
|
|
encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
|
|
|
|
add $0x50, SRC
|
|
add $0x50, DST
|
|
jmp .Lenc_u_loop
|
|
|
|
/* store the state: */
|
|
.Lenc_out_0:
|
|
movdqu STATE4, 0x00(STATEP)
|
|
movdqu STATE0, 0x10(STATEP)
|
|
movdqu STATE1, 0x20(STATEP)
|
|
movdqu STATE2, 0x30(STATEP)
|
|
movdqu STATE3, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_1:
|
|
movdqu STATE3, 0x00(STATEP)
|
|
movdqu STATE4, 0x10(STATEP)
|
|
movdqu STATE0, 0x20(STATEP)
|
|
movdqu STATE1, 0x30(STATEP)
|
|
movdqu STATE2, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_2:
|
|
movdqu STATE2, 0x00(STATEP)
|
|
movdqu STATE3, 0x10(STATEP)
|
|
movdqu STATE4, 0x20(STATEP)
|
|
movdqu STATE0, 0x30(STATEP)
|
|
movdqu STATE1, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_3:
|
|
movdqu STATE1, 0x00(STATEP)
|
|
movdqu STATE2, 0x10(STATEP)
|
|
movdqu STATE3, 0x20(STATEP)
|
|
movdqu STATE4, 0x30(STATEP)
|
|
movdqu STATE0, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out_4:
|
|
movdqu STATE0, 0x00(STATEP)
|
|
movdqu STATE1, 0x10(STATEP)
|
|
movdqu STATE2, 0x20(STATEP)
|
|
movdqu STATE3, 0x30(STATEP)
|
|
movdqu STATE4, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Lenc_out:
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128_aesni_enc)
|
|
|
|
/*
|
|
* void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
|
|
* const void *src, void *dst);
|
|
*/
|
|
ENTRY(crypto_aegis128_aesni_enc_tail)
|
|
FRAME_BEGIN
|
|
|
|
/* load the state: */
|
|
movdqu 0x00(STATEP), STATE0
|
|
movdqu 0x10(STATEP), STATE1
|
|
movdqu 0x20(STATEP), STATE2
|
|
movdqu 0x30(STATEP), STATE3
|
|
movdqu 0x40(STATEP), STATE4
|
|
|
|
/* encrypt message: */
|
|
call __load_partial
|
|
|
|
movdqa MSG, T0
|
|
pxor STATE1, T0
|
|
pxor STATE4, T0
|
|
movdqa STATE2, T1
|
|
pand STATE3, T1
|
|
pxor T1, T0
|
|
|
|
call __store_partial
|
|
|
|
aegis128_update
|
|
pxor MSG, STATE4
|
|
|
|
/* store the state: */
|
|
movdqu STATE4, 0x00(STATEP)
|
|
movdqu STATE0, 0x10(STATEP)
|
|
movdqu STATE1, 0x20(STATEP)
|
|
movdqu STATE2, 0x30(STATEP)
|
|
movdqu STATE3, 0x40(STATEP)
|
|
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128_aesni_enc_tail)
|
|
|
|
.macro decrypt_block a s0 s1 s2 s3 s4 i
|
|
movdq\a (\i * 0x10)(SRC), MSG
|
|
pxor \s1, MSG
|
|
pxor \s4, MSG
|
|
movdqa \s2, T1
|
|
pand \s3, T1
|
|
pxor T1, MSG
|
|
movdq\a MSG, (\i * 0x10)(DST)
|
|
|
|
aegis128_update
|
|
pxor MSG, \s4
|
|
|
|
sub $0x10, LEN
|
|
cmp $0x10, LEN
|
|
jl .Ldec_out_\i
|
|
.endm
|
|
|
|
/*
|
|
* void crypto_aegis128_aesni_dec(void *state, unsigned int length,
|
|
* const void *src, void *dst);
|
|
*/
|
|
ENTRY(crypto_aegis128_aesni_dec)
|
|
FRAME_BEGIN
|
|
|
|
cmp $0x10, LEN
|
|
jb .Ldec_out
|
|
|
|
/* load the state: */
|
|
movdqu 0x00(STATEP), STATE0
|
|
movdqu 0x10(STATEP), STATE1
|
|
movdqu 0x20(STATEP), STATE2
|
|
movdqu 0x30(STATEP), STATE3
|
|
movdqu 0x40(STATEP), STATE4
|
|
|
|
mov SRC, %r8
|
|
or DST, %r8
|
|
and $0xF, %r8
|
|
jnz .Ldec_u_loop
|
|
|
|
.align 8
|
|
.Ldec_a_loop:
|
|
decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
|
|
decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
|
|
decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
|
|
decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
|
|
decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
|
|
|
|
add $0x50, SRC
|
|
add $0x50, DST
|
|
jmp .Ldec_a_loop
|
|
|
|
.align 8
|
|
.Ldec_u_loop:
|
|
decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
|
|
decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
|
|
decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
|
|
decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
|
|
decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
|
|
|
|
add $0x50, SRC
|
|
add $0x50, DST
|
|
jmp .Ldec_u_loop
|
|
|
|
/* store the state: */
|
|
.Ldec_out_0:
|
|
movdqu STATE4, 0x00(STATEP)
|
|
movdqu STATE0, 0x10(STATEP)
|
|
movdqu STATE1, 0x20(STATEP)
|
|
movdqu STATE2, 0x30(STATEP)
|
|
movdqu STATE3, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_1:
|
|
movdqu STATE3, 0x00(STATEP)
|
|
movdqu STATE4, 0x10(STATEP)
|
|
movdqu STATE0, 0x20(STATEP)
|
|
movdqu STATE1, 0x30(STATEP)
|
|
movdqu STATE2, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_2:
|
|
movdqu STATE2, 0x00(STATEP)
|
|
movdqu STATE3, 0x10(STATEP)
|
|
movdqu STATE4, 0x20(STATEP)
|
|
movdqu STATE0, 0x30(STATEP)
|
|
movdqu STATE1, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_3:
|
|
movdqu STATE1, 0x00(STATEP)
|
|
movdqu STATE2, 0x10(STATEP)
|
|
movdqu STATE3, 0x20(STATEP)
|
|
movdqu STATE4, 0x30(STATEP)
|
|
movdqu STATE0, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out_4:
|
|
movdqu STATE0, 0x00(STATEP)
|
|
movdqu STATE1, 0x10(STATEP)
|
|
movdqu STATE2, 0x20(STATEP)
|
|
movdqu STATE3, 0x30(STATEP)
|
|
movdqu STATE4, 0x40(STATEP)
|
|
FRAME_END
|
|
ret
|
|
|
|
.Ldec_out:
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128_aesni_dec)
|
|
|
|
/*
|
|
* void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
|
|
* const void *src, void *dst);
|
|
*/
|
|
ENTRY(crypto_aegis128_aesni_dec_tail)
|
|
FRAME_BEGIN
|
|
|
|
/* load the state: */
|
|
movdqu 0x00(STATEP), STATE0
|
|
movdqu 0x10(STATEP), STATE1
|
|
movdqu 0x20(STATEP), STATE2
|
|
movdqu 0x30(STATEP), STATE3
|
|
movdqu 0x40(STATEP), STATE4
|
|
|
|
/* decrypt message: */
|
|
call __load_partial
|
|
|
|
pxor STATE1, MSG
|
|
pxor STATE4, MSG
|
|
movdqa STATE2, T1
|
|
pand STATE3, T1
|
|
pxor T1, MSG
|
|
|
|
movdqa MSG, T0
|
|
call __store_partial
|
|
|
|
/* mask with byte count: */
|
|
movq LEN, T0
|
|
punpcklbw T0, T0
|
|
punpcklbw T0, T0
|
|
punpcklbw T0, T0
|
|
punpcklbw T0, T0
|
|
movdqa .Laegis128_counter, T1
|
|
pcmpgtb T1, T0
|
|
pand T0, MSG
|
|
|
|
aegis128_update
|
|
pxor MSG, STATE4
|
|
|
|
/* store the state: */
|
|
movdqu STATE4, 0x00(STATEP)
|
|
movdqu STATE0, 0x10(STATEP)
|
|
movdqu STATE1, 0x20(STATEP)
|
|
movdqu STATE2, 0x30(STATEP)
|
|
movdqu STATE3, 0x40(STATEP)
|
|
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128_aesni_dec_tail)
|
|
|
|
/*
|
|
* void crypto_aegis128_aesni_final(void *state, void *tag_xor,
|
|
* u64 assoclen, u64 cryptlen);
|
|
*/
|
|
ENTRY(crypto_aegis128_aesni_final)
|
|
FRAME_BEGIN
|
|
|
|
/* load the state: */
|
|
movdqu 0x00(STATEP), STATE0
|
|
movdqu 0x10(STATEP), STATE1
|
|
movdqu 0x20(STATEP), STATE2
|
|
movdqu 0x30(STATEP), STATE3
|
|
movdqu 0x40(STATEP), STATE4
|
|
|
|
/* prepare length block: */
|
|
movq %rdx, MSG
|
|
movq %rcx, T0
|
|
pslldq $8, T0
|
|
pxor T0, MSG
|
|
psllq $3, MSG /* multiply by 8 (to get bit count) */
|
|
|
|
pxor STATE3, MSG
|
|
|
|
/* update state: */
|
|
aegis128_update; pxor MSG, STATE4
|
|
aegis128_update; pxor MSG, STATE3
|
|
aegis128_update; pxor MSG, STATE2
|
|
aegis128_update; pxor MSG, STATE1
|
|
aegis128_update; pxor MSG, STATE0
|
|
aegis128_update; pxor MSG, STATE4
|
|
aegis128_update; pxor MSG, STATE3
|
|
|
|
/* xor tag: */
|
|
movdqu (%rsi), MSG
|
|
|
|
pxor STATE0, MSG
|
|
pxor STATE1, MSG
|
|
pxor STATE2, MSG
|
|
pxor STATE3, MSG
|
|
pxor STATE4, MSG
|
|
|
|
movdqu MSG, (%rsi)
|
|
|
|
FRAME_END
|
|
ret
|
|
ENDPROC(crypto_aegis128_aesni_final)
|