linux_dsm_epyc7002/arch/powerpc/crypto/aes-spe-modes.S
Thomas Gleixner 2874c5fd28 treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 152
Based on 1 normalized pattern(s):

  this program is free software you can redistribute it and or modify
  it under the terms of the gnu general public license as published by
  the free software foundation either version 2 of the license or at
  your option any later version

extracted by the scancode license scanner the SPDX license identifier

  GPL-2.0-or-later

has been chosen to replace the boilerplate/reference in 3029 file(s).

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Allison Randal <allison@lohutok.net>
Cc: linux-spdx@vger.kernel.org
Link: https://lkml.kernel.org/r/20190527070032.746973796@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-05-30 11:26:32 -07:00

626 lines
14 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
*
* Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
*/
#include <asm/ppc_asm.h>
#include "aes-spe-regs.h"
#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
#define LOAD_DATA(reg, off) \
lwz reg,off(rSP); /* load with offset */
#define SAVE_DATA(reg, off) \
stw reg,off(rDP); /* save with offset */
#define NEXT_BLOCK \
addi rSP,rSP,16; /* increment pointers per bloc */ \
addi rDP,rDP,16;
#define LOAD_IV(reg, off) \
lwz reg,off(rIP); /* IV loading with offset */
#define SAVE_IV(reg, off) \
stw reg,off(rIP); /* IV saving with offset */
#define START_IV /* nothing to reset */
#define CBC_DEC 16 /* CBC decrement per block */
#define CTR_DEC 1 /* CTR decrement one byte */
#else /* Macros for little endian */
#define LOAD_DATA(reg, off) \
lwbrx reg,0,rSP; /* load reversed */ \
addi rSP,rSP,4; /* and increment pointer */
#define SAVE_DATA(reg, off) \
stwbrx reg,0,rDP; /* save reversed */ \
addi rDP,rDP,4; /* and increment pointer */
#define NEXT_BLOCK /* nothing todo */
#define LOAD_IV(reg, off) \
lwbrx reg,0,rIP; /* load reversed */ \
addi rIP,rIP,4; /* and increment pointer */
#define SAVE_IV(reg, off) \
stwbrx reg,0,rIP; /* load reversed */ \
addi rIP,rIP,4; /* and increment pointer */
#define START_IV \
subi rIP,rIP,16; /* must reset pointer */
#define CBC_DEC 32 /* 2 blocks because of incs */
#define CTR_DEC 17 /* 1 block because of incs */
#endif
#define SAVE_0_REGS
#define LOAD_0_REGS
#define SAVE_4_REGS \
stw rI0,96(r1); /* save 32 bit registers */ \
stw rI1,100(r1); \
stw rI2,104(r1); \
stw rI3,108(r1);
#define LOAD_4_REGS \
lwz rI0,96(r1); /* restore 32 bit registers */ \
lwz rI1,100(r1); \
lwz rI2,104(r1); \
lwz rI3,108(r1);
#define SAVE_8_REGS \
SAVE_4_REGS \
stw rG0,112(r1); /* save 32 bit registers */ \
stw rG1,116(r1); \
stw rG2,120(r1); \
stw rG3,124(r1);
#define LOAD_8_REGS \
LOAD_4_REGS \
lwz rG0,112(r1); /* restore 32 bit registers */ \
lwz rG1,116(r1); \
lwz rG2,120(r1); \
lwz rG3,124(r1);
#define INITIALIZE_CRYPT(tab,nr32bitregs) \
mflr r0; \
stwu r1,-160(r1); /* create stack frame */ \
lis rT0,tab@h; /* en-/decryption table pointer */ \
stw r0,8(r1); /* save link register */ \
ori rT0,rT0,tab@l; \
evstdw r14,16(r1); \
mr rKS,rKP; \
evstdw r15,24(r1); /* We must save non volatile */ \
evstdw r16,32(r1); /* registers. Take the chance */ \
evstdw r17,40(r1); /* and save the SPE part too */ \
evstdw r18,48(r1); \
evstdw r19,56(r1); \
evstdw r20,64(r1); \
evstdw r21,72(r1); \
evstdw r22,80(r1); \
evstdw r23,88(r1); \
SAVE_##nr32bitregs##_REGS
#define FINALIZE_CRYPT(nr32bitregs) \
lwz r0,8(r1); \
evldw r14,16(r1); /* restore SPE registers */ \
evldw r15,24(r1); \
evldw r16,32(r1); \
evldw r17,40(r1); \
evldw r18,48(r1); \
evldw r19,56(r1); \
evldw r20,64(r1); \
evldw r21,72(r1); \
evldw r22,80(r1); \
evldw r23,88(r1); \
LOAD_##nr32bitregs##_REGS \
mtlr r0; /* restore link register */ \
xor r0,r0,r0; \
stw r0,16(r1); /* delete sensitive data */ \
stw r0,24(r1); /* that we might have pushed */ \
stw r0,32(r1); /* from other context that runs */ \
stw r0,40(r1); /* the same code */ \
stw r0,48(r1); \
stw r0,56(r1); \
stw r0,64(r1); \
stw r0,72(r1); \
stw r0,80(r1); \
stw r0,88(r1); \
addi r1,r1,160; /* cleanup stack frame */
#define ENDIAN_SWAP(t0, t1, s0, s1) \
rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
rotrwi t1,s1,8; \
rlwimi t0,s0,8,8,15; \
rlwimi t1,s1,8,8,15; \
rlwimi t0,s0,8,24,31; \
rlwimi t1,s1,8,24,31;
#define GF128_MUL(d0, d1, d2, d3, t0) \
li t0,0x87; /* multiplication in GF128 */ \
cmpwi d3,-1; \
iselgt t0,0,t0; \
rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
rotlwi d3,d3,1; \
rlwimi d2,d1,0,0,0; \
rotlwi d2,d2,1; \
rlwimi d1,d0,0,0,0; \
slwi d0,d0,1; /* shift left 128 bit */ \
rotlwi d1,d1,1; \
xor d0,d0,t0;
#define START_KEY(d0, d1, d2, d3) \
lwz rW0,0(rKP); \
mtctr rRR; \
lwz rW1,4(rKP); \
lwz rW2,8(rKP); \
lwz rW3,12(rKP); \
xor rD0,d0,rW0; \
xor rD1,d1,rW1; \
xor rD2,d2,rW2; \
xor rD3,d3,rW3;
/*
* ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds)
*
* called from glue layer to encrypt a single 16 byte block
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_aes)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
LOAD_DATA(rD0, 0)
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
FINALIZE_CRYPT(0)
blr
/*
* ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds)
*
* called from glue layer to decrypt a single 16 byte block
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_aes)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
LOAD_DATA(rD0, 0)
addi rT1,rT0,4096
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
FINALIZE_CRYPT(0)
blr
/*
* ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes);
*
* called from glue layer to encrypt multiple blocks via ECB
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_ecb)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
ppc_encrypt_ecb_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
NEXT_BLOCK
bt gt,ppc_encrypt_ecb_loop
FINALIZE_CRYPT(0)
blr
/*
* ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 bytes);
*
* called from glue layer to decrypt multiple blocks via ECB
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_ecb)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
addi rT1,rT0,4096
ppc_decrypt_ecb_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
NEXT_BLOCK
bt gt,ppc_decrypt_ecb_loop
FINALIZE_CRYPT(0)
blr
/*
* ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
* 32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to encrypt multiple blocks via CBC
* Bytes must be larger or equal 16 and only whole blocks are
* processed. round values are AES128 = 4, AES192 = 5 and
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_cbc)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
LOAD_IV(rI3, 12)
ppc_encrypt_cbc_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
cmpwi rLN,15
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
SAVE_DATA(rI0, 0)
xor rI1,rD1,rW1
SAVE_DATA(rI1, 4)
xor rI2,rD2,rW2
SAVE_DATA(rI2, 8)
xor rI3,rD3,rW3
SAVE_DATA(rI3, 12)
NEXT_BLOCK
bt gt,ppc_encrypt_cbc_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to decrypt multiple blocks via CBC
* round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_cbc)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
li rT1,15
LOAD_IV(rI0, 0)
andc rLN,rLN,rT1
LOAD_IV(rI1, 4)
subi rLN,rLN,16
LOAD_IV(rI2, 8)
add rSP,rSP,rLN /* reverse processing */
LOAD_IV(rI3, 12)
add rDP,rDP,rLN
LOAD_DATA(rD0, 0)
addi rT1,rT0,4096
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
START_IV
SAVE_IV(rD0, 0)
SAVE_IV(rD1, 4)
SAVE_IV(rD2, 8)
cmpwi rLN,16
SAVE_IV(rD3, 12)
bt lt,ppc_decrypt_cbc_end
ppc_decrypt_cbc_loop:
mr rKP,rKS
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
subi rLN,rLN,16
subi rSP,rSP,CBC_DEC
xor rW0,rD0,rW0
LOAD_DATA(rD0, 0)
xor rW1,rD1,rW1
LOAD_DATA(rD1, 4)
xor rW2,rD2,rW2
LOAD_DATA(rD2, 8)
xor rW3,rD3,rW3
LOAD_DATA(rD3, 12)
xor rW0,rW0,rD0
SAVE_DATA(rW0, 0)
xor rW1,rW1,rD1
SAVE_DATA(rW1, 4)
xor rW2,rW2,rD2
SAVE_DATA(rW2, 8)
xor rW3,rW3,rD3
SAVE_DATA(rW3, 12)
cmpwi rLN,15
subi rDP,rDP,CBC_DEC
bt gt,ppc_decrypt_cbc_loop
ppc_decrypt_cbc_end:
mr rKP,rKS
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rW0,rW0,rD0
xor rW1,rW1,rD1
xor rW2,rW2,rD2
xor rW3,rW3,rD3
xor rW0,rW0,rI0 /* decrypt with initial IV */
SAVE_DATA(rW0, 0)
xor rW1,rW1,rI1
SAVE_DATA(rW1, 4)
xor rW2,rW2,rI2
SAVE_DATA(rW2, 8)
xor rW3,rW3,rI3
SAVE_DATA(rW3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes, u8 *iv);
*
* called from glue layer to encrypt/decrypt multiple blocks
* via CTR. Number of bytes does not need to be a multiple of
* 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
*
*/
_GLOBAL(ppc_crypt_ctr)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rLN,16
LOAD_IV(rI3, 12)
START_IV
bt lt,ppc_crypt_ctr_partial
ppc_crypt_ctr_loop:
mr rKP,rKS
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rW0,rD0,rW0
xor rW1,rD1,rW1
xor rW2,rD2,rW2
xor rW3,rD3,rW3
LOAD_DATA(rD0, 0)
subi rLN,rLN,16
LOAD_DATA(rD1, 4)
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rW0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rW1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rW2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rW3
SAVE_DATA(rD3, 12)
addic rI3,rI3,1 /* increase counter */
addze rI2,rI2
addze rI1,rI1
addze rI0,rI0
NEXT_BLOCK
cmpwi rLN,15
bt gt,ppc_crypt_ctr_loop
ppc_crypt_ctr_partial:
cmpwi rLN,0
bt eq,ppc_crypt_ctr_end
mr rKP,rKS
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rW0,rD0,rW0
SAVE_IV(rW0, 0)
xor rW1,rD1,rW1
SAVE_IV(rW1, 4)
xor rW2,rD2,rW2
SAVE_IV(rW2, 8)
xor rW3,rD3,rW3
SAVE_IV(rW3, 12)
mtctr rLN
subi rIP,rIP,CTR_DEC
subi rSP,rSP,1
subi rDP,rDP,1
ppc_crypt_ctr_xorbyte:
lbzu rW4,1(rIP) /* bytewise xor for partial block */
lbzu rW5,1(rSP)
xor rW4,rW4,rW5
stbu rW4,1(rDP)
bdnz ppc_crypt_ctr_xorbyte
subf rIP,rLN,rIP
addi rIP,rIP,1
addic rI3,rI3,1
addze rI2,rI2
addze rI1,rI1
addze rI0,rI0
ppc_crypt_ctr_end:
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(4)
blr
/*
* ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
* u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
*
* called from glue layer to encrypt multiple blocks via XTS
* If key_twk is given, the initial IV encryption will be
* processed too. Round values are AES128 = 4, AES192 = 5,
* AES256 = 6
*
*/
_GLOBAL(ppc_encrypt_xts)
INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
LOAD_IV(rI0, 0)
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rKT,0
LOAD_IV(rI3, 12)
bt eq,ppc_encrypt_xts_notweak
mr rKP,rKT
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
xor rI1,rD1,rW1
xor rI2,rD2,rW2
xor rI3,rD3,rW3
ppc_encrypt_xts_notweak:
ENDIAN_SWAP(rG0, rG1, rI0, rI1)
ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_encrypt_xts_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_encrypt_block
xor rD0,rD0,rW0
xor rD1,rD1,rW1
xor rD2,rD2,rW2
xor rD3,rD3,rW3
xor rD0,rD0,rI0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rI1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rI2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rI3
SAVE_DATA(rD3, 12)
GF128_MUL(rG0, rG1, rG2, rG3, rW0)
ENDIAN_SWAP(rI0, rI1, rG0, rG1)
ENDIAN_SWAP(rI2, rI3, rG2, rG3)
cmpwi rLN,0
NEXT_BLOCK
bt gt,ppc_encrypt_xts_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(8)
blr
/*
* ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
* u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
*
* called from glue layer to decrypt multiple blocks via XTS
* If key_twk is given, the initial IV encryption will be
* processed too. Round values are AES128 = 4, AES192 = 5,
* AES256 = 6
*
*/
_GLOBAL(ppc_decrypt_xts)
INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
LOAD_IV(rI0, 0)
addi rT1,rT0,4096
LOAD_IV(rI1, 4)
LOAD_IV(rI2, 8)
cmpwi rKT,0
LOAD_IV(rI3, 12)
bt eq,ppc_decrypt_xts_notweak
subi rT0,rT0,4096
mr rKP,rKT
START_KEY(rI0, rI1, rI2, rI3)
bl ppc_encrypt_block
xor rI0,rD0,rW0
xor rI1,rD1,rW1
xor rI2,rD2,rW2
xor rI3,rD3,rW3
addi rT0,rT0,4096
ppc_decrypt_xts_notweak:
ENDIAN_SWAP(rG0, rG1, rI0, rI1)
ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_decrypt_xts_loop:
LOAD_DATA(rD0, 0)
mr rKP,rKS
LOAD_DATA(rD1, 4)
subi rLN,rLN,16
LOAD_DATA(rD2, 8)
LOAD_DATA(rD3, 12)
xor rD0,rD0,rI0
xor rD1,rD1,rI1
xor rD2,rD2,rI2
xor rD3,rD3,rI3
START_KEY(rD0, rD1, rD2, rD3)
bl ppc_decrypt_block
xor rD0,rD0,rW0
xor rD1,rD1,rW1
xor rD2,rD2,rW2
xor rD3,rD3,rW3
xor rD0,rD0,rI0
SAVE_DATA(rD0, 0)
xor rD1,rD1,rI1
SAVE_DATA(rD1, 4)
xor rD2,rD2,rI2
SAVE_DATA(rD2, 8)
xor rD3,rD3,rI3
SAVE_DATA(rD3, 12)
GF128_MUL(rG0, rG1, rG2, rG3, rW0)
ENDIAN_SWAP(rI0, rI1, rG0, rG1)
ENDIAN_SWAP(rI2, rI3, rG2, rG3)
cmpwi rLN,0
NEXT_BLOCK
bt gt,ppc_decrypt_xts_loop
START_IV
SAVE_IV(rI0, 0)
SAVE_IV(rI1, 4)
SAVE_IV(rI2, 8)
SAVE_IV(rI3, 12)
FINALIZE_CRYPT(8)
blr