2019-06-04 15:11:33 +07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2014-03-21 16:19:17 +07:00
|
|
|
/*
|
|
|
|
* linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
|
|
|
|
* Crypto Extensions
|
|
|
|
*
|
2017-07-24 17:28:10 +07:00
|
|
|
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
2014-03-21 16:19:17 +07:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/linkage.h>
|
2016-10-12 01:15:19 +07:00
|
|
|
#include <asm/assembler.h>
|
2014-03-21 16:19:17 +07:00
|
|
|
|
|
|
|
#define AES_ENTRY(func) ENTRY(ce_ ## func)
|
|
|
|
#define AES_ENDPROC(func) ENDPROC(ce_ ## func)
|
|
|
|
|
|
|
|
.arch armv8-a+crypto
|
|
|
|
|
2018-09-10 21:41:15 +07:00
|
|
|
xtsmask .req v16
|
crypto: arm64/aes-ce - implement 5 way interleave for ECB, CBC and CTR
This implements 5-way interleaving for ECB, CBC decryption and CTR,
resulting in a speedup of ~11% on Marvell ThunderX2, which has a
very deep pipeline and therefore a high issue latency for NEON
instructions operating on the same registers.
Note that XTS is left alone: implementing 5-way interleave there
would either involve spilling of the calculated tweaks to the
stack, or recalculating them after the encryption operation, and
doing either of those would most likely penalize low end cores.
For ECB, this is not a concern at all, given that we have plenty
of spare registers. For CTR and CBC decryption, we take advantage
of the fact that v16 is not used by the CE version of the code
(which is the only one targeted by the optimization), and so we
can reshuffle the code a bit and avoid having to spill to memory
(with the exception of one extra reload in the CBC routine)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2019-06-25 00:38:31 +07:00
|
|
|
cbciv .req v16
|
|
|
|
vctr .req v16
|
2018-09-10 21:41:15 +07:00
|
|
|
|
|
|
|
.macro xts_reload_mask, tmp
|
|
|
|
.endm
|
|
|
|
|
2014-03-21 16:19:17 +07:00
|
|
|
/* preload all round keys */
|
|
|
|
.macro load_round_keys, rounds, rk
|
|
|
|
cmp \rounds, #12
|
|
|
|
blo 2222f /* 128 bits */
|
|
|
|
beq 1111f /* 192 bits */
|
2017-07-24 17:28:10 +07:00
|
|
|
ld1 {v17.4s-v18.4s}, [\rk], #32
|
|
|
|
1111: ld1 {v19.4s-v20.4s}, [\rk], #32
|
|
|
|
2222: ld1 {v21.4s-v24.4s}, [\rk], #64
|
|
|
|
ld1 {v25.4s-v28.4s}, [\rk], #64
|
|
|
|
ld1 {v29.4s-v31.4s}, [\rk]
|
2014-03-21 16:19:17 +07:00
|
|
|
.endm
|
|
|
|
|
|
|
|
/* prepare for encryption with key in rk[] */
|
2018-04-30 23:18:24 +07:00
|
|
|
.macro enc_prepare, rounds, rk, temp
|
|
|
|
mov \temp, \rk
|
|
|
|
load_round_keys \rounds, \temp
|
2014-03-21 16:19:17 +07:00
|
|
|
.endm
|
|
|
|
|
|
|
|
/* prepare for encryption (again) but with new key in rk[] */
|
2018-04-30 23:18:24 +07:00
|
|
|
.macro enc_switch_key, rounds, rk, temp
|
|
|
|
mov \temp, \rk
|
|
|
|
load_round_keys \rounds, \temp
|
2014-03-21 16:19:17 +07:00
|
|
|
.endm
|
|
|
|
|
|
|
|
/* prepare for decryption with key in rk[] */
|
2018-04-30 23:18:24 +07:00
|
|
|
.macro dec_prepare, rounds, rk, temp
|
|
|
|
mov \temp, \rk
|
|
|
|
load_round_keys \rounds, \temp
|
2014-03-21 16:19:17 +07:00
|
|
|
.endm
|
|
|
|
|
2019-06-25 00:38:30 +07:00
|
|
|
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
|
2014-03-21 16:19:17 +07:00
|
|
|
aes\de \i0\().16b, \k\().16b
|
|
|
|
aes\mc \i0\().16b, \i0\().16b
|
|
|
|
.ifnb \i1
|
2015-03-18 01:05:13 +07:00
|
|
|
aes\de \i1\().16b, \k\().16b
|
2014-03-21 16:19:17 +07:00
|
|
|
aes\mc \i1\().16b, \i1\().16b
|
|
|
|
.ifnb \i3
|
2015-03-18 01:05:13 +07:00
|
|
|
aes\de \i2\().16b, \k\().16b
|
2014-03-21 16:19:17 +07:00
|
|
|
aes\mc \i2\().16b, \i2\().16b
|
2015-03-18 01:05:13 +07:00
|
|
|
aes\de \i3\().16b, \k\().16b
|
2014-03-21 16:19:17 +07:00
|
|
|
aes\mc \i3\().16b, \i3\().16b
|
2019-06-25 00:38:30 +07:00
|
|
|
.ifnb \i4
|
|
|
|
aes\de \i4\().16b, \k\().16b
|
|
|
|
aes\mc \i4\().16b, \i4\().16b
|
|
|
|
.endif
|
2014-03-21 16:19:17 +07:00
|
|
|
.endif
|
|
|
|
.endif
|
|
|
|
.endm
|
|
|
|
|
2019-06-25 00:38:30 +07:00
|
|
|
/* up to 5 interleaved encryption rounds with the same round key */
|
|
|
|
.macro round_Nx, enc, k, i0, i1, i2, i3, i4
|
2014-03-21 16:19:17 +07:00
|
|
|
.ifc \enc, e
|
2019-06-25 00:38:30 +07:00
|
|
|
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4
|
2014-03-21 16:19:17 +07:00
|
|
|
.else
|
2019-06-25 00:38:30 +07:00
|
|
|
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4
|
2014-03-21 16:19:17 +07:00
|
|
|
.endif
|
|
|
|
.endm
|
|
|
|
|
2019-06-25 00:38:30 +07:00
|
|
|
/* up to 5 interleaved final rounds */
|
|
|
|
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4
|
2014-03-21 16:19:17 +07:00
|
|
|
aes\de \i0\().16b, \k\().16b
|
|
|
|
.ifnb \i1
|
|
|
|
aes\de \i1\().16b, \k\().16b
|
|
|
|
.ifnb \i3
|
|
|
|
aes\de \i2\().16b, \k\().16b
|
|
|
|
aes\de \i3\().16b, \k\().16b
|
2019-06-25 00:38:30 +07:00
|
|
|
.ifnb \i4
|
|
|
|
aes\de \i4\().16b, \k\().16b
|
|
|
|
.endif
|
2014-03-21 16:19:17 +07:00
|
|
|
.endif
|
|
|
|
.endif
|
|
|
|
eor \i0\().16b, \i0\().16b, \k2\().16b
|
|
|
|
.ifnb \i1
|
|
|
|
eor \i1\().16b, \i1\().16b, \k2\().16b
|
|
|
|
.ifnb \i3
|
|
|
|
eor \i2\().16b, \i2\().16b, \k2\().16b
|
|
|
|
eor \i3\().16b, \i3\().16b, \k2\().16b
|
2019-06-25 00:38:30 +07:00
|
|
|
.ifnb \i4
|
|
|
|
eor \i4\().16b, \i4\().16b, \k2\().16b
|
|
|
|
.endif
|
2014-03-21 16:19:17 +07:00
|
|
|
.endif
|
|
|
|
.endif
|
|
|
|
.endm
|
|
|
|
|
2019-06-25 00:38:30 +07:00
|
|
|
/* up to 5 interleaved blocks */
|
|
|
|
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
|
2014-03-21 16:19:17 +07:00
|
|
|
cmp \rounds, #12
|
|
|
|
blo 2222f /* 128 bits */
|
|
|
|
beq 1111f /* 192 bits */
|
2019-06-25 00:38:30 +07:00
|
|
|
round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4
|
|
|
|
round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4
|
|
|
|
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
|
|
|
|
round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4
|
2014-03-21 16:19:17 +07:00
|
|
|
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
|
2019-06-25 00:38:30 +07:00
|
|
|
round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4
|
2014-03-21 16:19:17 +07:00
|
|
|
.endr
|
2019-06-25 00:38:30 +07:00
|
|
|
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4
|
2014-03-21 16:19:17 +07:00
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro encrypt_block, in, rounds, t0, t1, t2
|
|
|
|
do_block_Nx e, \rounds, \in
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
|
|
|
|
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
|
|
|
|
.endm
|
|
|
|
|
2019-06-25 00:38:30 +07:00
|
|
|
.macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
|
|
|
|
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4
|
2014-03-21 16:19:17 +07:00
|
|
|
.endm
|
|
|
|
|
2019-06-25 00:38:30 +07:00
|
|
|
.macro decrypt_block, in, rounds, t0, t1, t2
|
|
|
|
do_block_Nx d, \rounds, \in
|
2014-03-21 16:19:17 +07:00
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
|
|
|
|
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
|
|
|
|
.endm
|
|
|
|
|
2019-06-25 00:38:30 +07:00
|
|
|
.macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
|
|
|
|
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4
|
|
|
|
.endm
|
|
|
|
|
|
|
|
#define MAX_STRIDE 5
|
|
|
|
|
2014-03-21 16:19:17 +07:00
|
|
|
#include "aes-modes.S"
|