mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 15:46:31 +07:00
6dcc5627f6
These are all functions which are invoked from elsewhere, so annotate them as global using the new SYM_FUNC_START and their ENDPROC's by SYM_FUNC_END. Make sure ENTRY/ENDPROC is not defined on X86_64, given these were the last users. Signed-off-by: Jiri Slaby <jslaby@suse.cz> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> [hibernate] Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> [xen bits] Acked-by: Herbert Xu <herbert@gondor.apana.org.au> [crypto] Cc: Allison Randal <allison@lohutok.net> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Andy Shevchenko <andy@infradead.org> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: Armijn Hemel <armijn@tjaldur.nl> Cc: Cao jin <caoj.fnst@cn.fujitsu.com> Cc: Darren Hart <dvhart@infradead.org> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Enrico Weigelt <info@metux.net> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jim Mattson <jmattson@google.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Kate Stewart <kstewart@linuxfoundation.org> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: kvm ML <kvm@vger.kernel.org> Cc: Len Brown <len.brown@intel.com> Cc: linux-arch@vger.kernel.org Cc: linux-crypto@vger.kernel.org Cc: linux-efi <linux-efi@vger.kernel.org> Cc: linux-efi@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: Mark Rutland <mark.rutland@arm.com> Cc: Matt Fleming <matt@codeblueprint.co.uk> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Peter Zijlstra <peterz@infradead.org> Cc: platform-driver-x86@vger.kernel.org Cc: "Radim Krčmář" <rkrcmar@redhat.com> Cc: Sean Christopherson <sean.j.christopherson@intel.com> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vitaly Kuznetsov <vkuznets@redhat.com> Cc: Wanpeng Li <wanpengli@tencent.com> Cc: Wei Huang <wei@redhat.com> Cc: x86-ml <x86@kernel.org> Cc: xen-devel@lists.xenproject.org Cc: Xiaoyao Li <xiaoyao.li@linux.intel.com> Link: https://lkml.kernel.org/r/20191011115108.12392-25-jslaby@suse.cz
500 lines
10 KiB
ArmAsm
500 lines
10 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* Camellia Cipher Algorithm (x86_64)
|
|
*
|
|
* Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
|
|
.file "camellia-x86_64-asm_64.S"
|
|
.text
|
|
|
|
.extern camellia_sp10011110;
|
|
.extern camellia_sp22000222;
|
|
.extern camellia_sp03303033;
|
|
.extern camellia_sp00444404;
|
|
.extern camellia_sp02220222;
|
|
.extern camellia_sp30333033;
|
|
.extern camellia_sp44044404;
|
|
.extern camellia_sp11101110;
|
|
|
|
#define sp10011110 camellia_sp10011110
|
|
#define sp22000222 camellia_sp22000222
|
|
#define sp03303033 camellia_sp03303033
|
|
#define sp00444404 camellia_sp00444404
|
|
#define sp02220222 camellia_sp02220222
|
|
#define sp30333033 camellia_sp30333033
|
|
#define sp44044404 camellia_sp44044404
|
|
#define sp11101110 camellia_sp11101110
|
|
|
|
#define CAMELLIA_TABLE_BYTE_LEN 272
|
|
|
|
/* struct camellia_ctx: */
|
|
#define key_table 0
|
|
#define key_length CAMELLIA_TABLE_BYTE_LEN
|
|
|
|
/* register macros */
|
|
#define CTX %rdi
|
|
#define RIO %rsi
|
|
#define RIOd %esi
|
|
|
|
#define RAB0 %rax
|
|
#define RCD0 %rcx
|
|
#define RAB1 %rbx
|
|
#define RCD1 %rdx
|
|
|
|
#define RAB0d %eax
|
|
#define RCD0d %ecx
|
|
#define RAB1d %ebx
|
|
#define RCD1d %edx
|
|
|
|
#define RAB0bl %al
|
|
#define RCD0bl %cl
|
|
#define RAB1bl %bl
|
|
#define RCD1bl %dl
|
|
|
|
#define RAB0bh %ah
|
|
#define RCD0bh %ch
|
|
#define RAB1bh %bh
|
|
#define RCD1bh %dh
|
|
|
|
#define RT0 %rsi
|
|
#define RT1 %r12
|
|
#define RT2 %r8
|
|
|
|
#define RT0d %esi
|
|
#define RT1d %r12d
|
|
#define RT2d %r8d
|
|
|
|
#define RT2bl %r8b
|
|
|
|
#define RXOR %r9
|
|
#define RR12 %r10
|
|
#define RDST %r11
|
|
|
|
#define RXORd %r9d
|
|
#define RXORbl %r9b
|
|
|
|
#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
|
|
movzbl ab ## bl, tmp2 ## d; \
|
|
movzbl ab ## bh, tmp1 ## d; \
|
|
rorq $16, ab; \
|
|
xorq T0(, tmp2, 8), dst; \
|
|
xorq T1(, tmp1, 8), dst;
|
|
|
|
/**********************************************************************
|
|
1-way camellia
|
|
**********************************************************************/
|
|
#define roundsm(ab, subkey, cd) \
|
|
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
|
|
\
|
|
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
|
|
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
|
|
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
|
|
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
|
|
\
|
|
xorq RT2, cd ## 0;
|
|
|
|
#define fls(l, r, kl, kr) \
|
|
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
|
|
andl l ## 0d, RT0d; \
|
|
roll $1, RT0d; \
|
|
shlq $32, RT0; \
|
|
xorq RT0, l ## 0; \
|
|
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
|
|
orq r ## 0, RT1; \
|
|
shrq $32, RT1; \
|
|
xorq RT1, r ## 0; \
|
|
\
|
|
movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
|
|
orq l ## 0, RT2; \
|
|
shrq $32, RT2; \
|
|
xorq RT2, l ## 0; \
|
|
movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
|
|
andl r ## 0d, RT0d; \
|
|
roll $1, RT0d; \
|
|
shlq $32, RT0; \
|
|
xorq RT0, r ## 0;
|
|
|
|
#define enc_rounds(i) \
|
|
roundsm(RAB, i + 2, RCD); \
|
|
roundsm(RCD, i + 3, RAB); \
|
|
roundsm(RAB, i + 4, RCD); \
|
|
roundsm(RCD, i + 5, RAB); \
|
|
roundsm(RAB, i + 6, RCD); \
|
|
roundsm(RCD, i + 7, RAB);
|
|
|
|
#define enc_fls(i) \
|
|
fls(RAB, RCD, i + 0, i + 1);
|
|
|
|
#define enc_inpack() \
|
|
movq (RIO), RAB0; \
|
|
bswapq RAB0; \
|
|
rolq $32, RAB0; \
|
|
movq 4*2(RIO), RCD0; \
|
|
bswapq RCD0; \
|
|
rorq $32, RCD0; \
|
|
xorq key_table(CTX), RAB0;
|
|
|
|
#define enc_outunpack(op, max) \
|
|
xorq key_table(CTX, max, 8), RCD0; \
|
|
rorq $32, RCD0; \
|
|
bswapq RCD0; \
|
|
op ## q RCD0, (RIO); \
|
|
rolq $32, RAB0; \
|
|
bswapq RAB0; \
|
|
op ## q RAB0, 4*2(RIO);
|
|
|
|
#define dec_rounds(i) \
|
|
roundsm(RAB, i + 7, RCD); \
|
|
roundsm(RCD, i + 6, RAB); \
|
|
roundsm(RAB, i + 5, RCD); \
|
|
roundsm(RCD, i + 4, RAB); \
|
|
roundsm(RAB, i + 3, RCD); \
|
|
roundsm(RCD, i + 2, RAB);
|
|
|
|
#define dec_fls(i) \
|
|
fls(RAB, RCD, i + 1, i + 0);
|
|
|
|
#define dec_inpack(max) \
|
|
movq (RIO), RAB0; \
|
|
bswapq RAB0; \
|
|
rolq $32, RAB0; \
|
|
movq 4*2(RIO), RCD0; \
|
|
bswapq RCD0; \
|
|
rorq $32, RCD0; \
|
|
xorq key_table(CTX, max, 8), RAB0;
|
|
|
|
#define dec_outunpack() \
|
|
xorq key_table(CTX), RCD0; \
|
|
rorq $32, RCD0; \
|
|
bswapq RCD0; \
|
|
movq RCD0, (RIO); \
|
|
rolq $32, RAB0; \
|
|
bswapq RAB0; \
|
|
movq RAB0, 4*2(RIO);
|
|
|
|
SYM_FUNC_START(__camellia_enc_blk)
|
|
/* input:
|
|
* %rdi: ctx, CTX
|
|
* %rsi: dst
|
|
* %rdx: src
|
|
* %rcx: bool xor
|
|
*/
|
|
movq %r12, RR12;
|
|
|
|
movq %rcx, RXOR;
|
|
movq %rsi, RDST;
|
|
movq %rdx, RIO;
|
|
|
|
enc_inpack();
|
|
|
|
enc_rounds(0);
|
|
enc_fls(8);
|
|
enc_rounds(8);
|
|
enc_fls(16);
|
|
enc_rounds(16);
|
|
movl $24, RT1d; /* max */
|
|
|
|
cmpb $16, key_length(CTX);
|
|
je .L__enc_done;
|
|
|
|
enc_fls(24);
|
|
enc_rounds(24);
|
|
movl $32, RT1d; /* max */
|
|
|
|
.L__enc_done:
|
|
testb RXORbl, RXORbl;
|
|
movq RDST, RIO;
|
|
|
|
jnz .L__enc_xor;
|
|
|
|
enc_outunpack(mov, RT1);
|
|
|
|
movq RR12, %r12;
|
|
ret;
|
|
|
|
.L__enc_xor:
|
|
enc_outunpack(xor, RT1);
|
|
|
|
movq RR12, %r12;
|
|
ret;
|
|
SYM_FUNC_END(__camellia_enc_blk)
|
|
|
|
SYM_FUNC_START(camellia_dec_blk)
|
|
/* input:
|
|
* %rdi: ctx, CTX
|
|
* %rsi: dst
|
|
* %rdx: src
|
|
*/
|
|
cmpl $16, key_length(CTX);
|
|
movl $32, RT2d;
|
|
movl $24, RXORd;
|
|
cmovel RXORd, RT2d; /* max */
|
|
|
|
movq %r12, RR12;
|
|
movq %rsi, RDST;
|
|
movq %rdx, RIO;
|
|
|
|
dec_inpack(RT2);
|
|
|
|
cmpb $24, RT2bl;
|
|
je .L__dec_rounds16;
|
|
|
|
dec_rounds(24);
|
|
dec_fls(24);
|
|
|
|
.L__dec_rounds16:
|
|
dec_rounds(16);
|
|
dec_fls(16);
|
|
dec_rounds(8);
|
|
dec_fls(8);
|
|
dec_rounds(0);
|
|
|
|
movq RDST, RIO;
|
|
|
|
dec_outunpack();
|
|
|
|
movq RR12, %r12;
|
|
ret;
|
|
SYM_FUNC_END(camellia_dec_blk)
|
|
|
|
/**********************************************************************
|
|
2-way camellia
|
|
**********************************************************************/
|
|
#define roundsm2(ab, subkey, cd) \
|
|
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
|
|
xorq RT2, cd ## 1; \
|
|
\
|
|
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
|
|
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
|
|
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
|
|
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
|
|
\
|
|
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
|
|
xorq RT2, cd ## 0; \
|
|
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
|
|
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
|
|
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
|
|
|
|
#define fls2(l, r, kl, kr) \
|
|
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
|
|
andl l ## 0d, RT0d; \
|
|
roll $1, RT0d; \
|
|
shlq $32, RT0; \
|
|
xorq RT0, l ## 0; \
|
|
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
|
|
orq r ## 0, RT1; \
|
|
shrq $32, RT1; \
|
|
xorq RT1, r ## 0; \
|
|
\
|
|
movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
|
|
andl l ## 1d, RT2d; \
|
|
roll $1, RT2d; \
|
|
shlq $32, RT2; \
|
|
xorq RT2, l ## 1; \
|
|
movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
|
|
orq r ## 1, RT0; \
|
|
shrq $32, RT0; \
|
|
xorq RT0, r ## 1; \
|
|
\
|
|
movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
|
|
orq l ## 0, RT1; \
|
|
shrq $32, RT1; \
|
|
xorq RT1, l ## 0; \
|
|
movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
|
|
andl r ## 0d, RT2d; \
|
|
roll $1, RT2d; \
|
|
shlq $32, RT2; \
|
|
xorq RT2, r ## 0; \
|
|
\
|
|
movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
|
|
orq l ## 1, RT0; \
|
|
shrq $32, RT0; \
|
|
xorq RT0, l ## 1; \
|
|
movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
|
|
andl r ## 1d, RT1d; \
|
|
roll $1, RT1d; \
|
|
shlq $32, RT1; \
|
|
xorq RT1, r ## 1;
|
|
|
|
#define enc_rounds2(i) \
|
|
roundsm2(RAB, i + 2, RCD); \
|
|
roundsm2(RCD, i + 3, RAB); \
|
|
roundsm2(RAB, i + 4, RCD); \
|
|
roundsm2(RCD, i + 5, RAB); \
|
|
roundsm2(RAB, i + 6, RCD); \
|
|
roundsm2(RCD, i + 7, RAB);
|
|
|
|
#define enc_fls2(i) \
|
|
fls2(RAB, RCD, i + 0, i + 1);
|
|
|
|
#define enc_inpack2() \
|
|
movq (RIO), RAB0; \
|
|
bswapq RAB0; \
|
|
rorq $32, RAB0; \
|
|
movq 4*2(RIO), RCD0; \
|
|
bswapq RCD0; \
|
|
rolq $32, RCD0; \
|
|
xorq key_table(CTX), RAB0; \
|
|
\
|
|
movq 8*2(RIO), RAB1; \
|
|
bswapq RAB1; \
|
|
rorq $32, RAB1; \
|
|
movq 12*2(RIO), RCD1; \
|
|
bswapq RCD1; \
|
|
rolq $32, RCD1; \
|
|
xorq key_table(CTX), RAB1;
|
|
|
|
#define enc_outunpack2(op, max) \
|
|
xorq key_table(CTX, max, 8), RCD0; \
|
|
rolq $32, RCD0; \
|
|
bswapq RCD0; \
|
|
op ## q RCD0, (RIO); \
|
|
rorq $32, RAB0; \
|
|
bswapq RAB0; \
|
|
op ## q RAB0, 4*2(RIO); \
|
|
\
|
|
xorq key_table(CTX, max, 8), RCD1; \
|
|
rolq $32, RCD1; \
|
|
bswapq RCD1; \
|
|
op ## q RCD1, 8*2(RIO); \
|
|
rorq $32, RAB1; \
|
|
bswapq RAB1; \
|
|
op ## q RAB1, 12*2(RIO);
|
|
|
|
#define dec_rounds2(i) \
|
|
roundsm2(RAB, i + 7, RCD); \
|
|
roundsm2(RCD, i + 6, RAB); \
|
|
roundsm2(RAB, i + 5, RCD); \
|
|
roundsm2(RCD, i + 4, RAB); \
|
|
roundsm2(RAB, i + 3, RCD); \
|
|
roundsm2(RCD, i + 2, RAB);
|
|
|
|
#define dec_fls2(i) \
|
|
fls2(RAB, RCD, i + 1, i + 0);
|
|
|
|
#define dec_inpack2(max) \
|
|
movq (RIO), RAB0; \
|
|
bswapq RAB0; \
|
|
rorq $32, RAB0; \
|
|
movq 4*2(RIO), RCD0; \
|
|
bswapq RCD0; \
|
|
rolq $32, RCD0; \
|
|
xorq key_table(CTX, max, 8), RAB0; \
|
|
\
|
|
movq 8*2(RIO), RAB1; \
|
|
bswapq RAB1; \
|
|
rorq $32, RAB1; \
|
|
movq 12*2(RIO), RCD1; \
|
|
bswapq RCD1; \
|
|
rolq $32, RCD1; \
|
|
xorq key_table(CTX, max, 8), RAB1;
|
|
|
|
#define dec_outunpack2() \
|
|
xorq key_table(CTX), RCD0; \
|
|
rolq $32, RCD0; \
|
|
bswapq RCD0; \
|
|
movq RCD0, (RIO); \
|
|
rorq $32, RAB0; \
|
|
bswapq RAB0; \
|
|
movq RAB0, 4*2(RIO); \
|
|
\
|
|
xorq key_table(CTX), RCD1; \
|
|
rolq $32, RCD1; \
|
|
bswapq RCD1; \
|
|
movq RCD1, 8*2(RIO); \
|
|
rorq $32, RAB1; \
|
|
bswapq RAB1; \
|
|
movq RAB1, 12*2(RIO);
|
|
|
|
SYM_FUNC_START(__camellia_enc_blk_2way)
|
|
/* input:
|
|
* %rdi: ctx, CTX
|
|
* %rsi: dst
|
|
* %rdx: src
|
|
* %rcx: bool xor
|
|
*/
|
|
pushq %rbx;
|
|
|
|
movq %r12, RR12;
|
|
movq %rcx, RXOR;
|
|
movq %rsi, RDST;
|
|
movq %rdx, RIO;
|
|
|
|
enc_inpack2();
|
|
|
|
enc_rounds2(0);
|
|
enc_fls2(8);
|
|
enc_rounds2(8);
|
|
enc_fls2(16);
|
|
enc_rounds2(16);
|
|
movl $24, RT2d; /* max */
|
|
|
|
cmpb $16, key_length(CTX);
|
|
je .L__enc2_done;
|
|
|
|
enc_fls2(24);
|
|
enc_rounds2(24);
|
|
movl $32, RT2d; /* max */
|
|
|
|
.L__enc2_done:
|
|
test RXORbl, RXORbl;
|
|
movq RDST, RIO;
|
|
jnz .L__enc2_xor;
|
|
|
|
enc_outunpack2(mov, RT2);
|
|
|
|
movq RR12, %r12;
|
|
popq %rbx;
|
|
ret;
|
|
|
|
.L__enc2_xor:
|
|
enc_outunpack2(xor, RT2);
|
|
|
|
movq RR12, %r12;
|
|
popq %rbx;
|
|
ret;
|
|
SYM_FUNC_END(__camellia_enc_blk_2way)
|
|
|
|
SYM_FUNC_START(camellia_dec_blk_2way)
|
|
/* input:
|
|
* %rdi: ctx, CTX
|
|
* %rsi: dst
|
|
* %rdx: src
|
|
*/
|
|
cmpl $16, key_length(CTX);
|
|
movl $32, RT2d;
|
|
movl $24, RXORd;
|
|
cmovel RXORd, RT2d; /* max */
|
|
|
|
movq %rbx, RXOR;
|
|
movq %r12, RR12;
|
|
movq %rsi, RDST;
|
|
movq %rdx, RIO;
|
|
|
|
dec_inpack2(RT2);
|
|
|
|
cmpb $24, RT2bl;
|
|
je .L__dec2_rounds16;
|
|
|
|
dec_rounds2(24);
|
|
dec_fls2(24);
|
|
|
|
.L__dec2_rounds16:
|
|
dec_rounds2(16);
|
|
dec_fls2(16);
|
|
dec_rounds2(8);
|
|
dec_fls2(8);
|
|
dec_rounds2(0);
|
|
|
|
movq RDST, RIO;
|
|
|
|
dec_outunpack2();
|
|
|
|
movq RR12, %r12;
|
|
movq RXOR, %rbx;
|
|
ret;
|
|
SYM_FUNC_END(camellia_dec_blk_2way)
|