mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-11 01:46:41 +07:00
crypto: arm64/aes-blk - add support for CTS-CBC mode
Currently, we rely on the generic CTS chaining mode wrapper to instantiate the cts(cbc(aes)) skcipher. Due to the high performance of the ARMv8 Crypto Extensions AES instructions (~1 cycles per byte), any overhead in the chaining mode layers is amplified, and so it pays off considerably to fold the CTS handling into the SIMD routines. On Cortex-A53, this results in a ~50% speedup for smaller input sizes. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
6e7de6af91
commit
dd597fb33f
@ -15,6 +15,7 @@
|
|||||||
#include <crypto/internal/hash.h>
|
#include <crypto/internal/hash.h>
|
||||||
#include <crypto/internal/simd.h>
|
#include <crypto/internal/simd.h>
|
||||||
#include <crypto/internal/skcipher.h>
|
#include <crypto/internal/skcipher.h>
|
||||||
|
#include <crypto/scatterwalk.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/cpufeature.h>
|
#include <linux/cpufeature.h>
|
||||||
#include <crypto/xts.h>
|
#include <crypto/xts.h>
|
||||||
@ -31,6 +32,8 @@
|
|||||||
#define aes_ecb_decrypt ce_aes_ecb_decrypt
|
#define aes_ecb_decrypt ce_aes_ecb_decrypt
|
||||||
#define aes_cbc_encrypt ce_aes_cbc_encrypt
|
#define aes_cbc_encrypt ce_aes_cbc_encrypt
|
||||||
#define aes_cbc_decrypt ce_aes_cbc_decrypt
|
#define aes_cbc_decrypt ce_aes_cbc_decrypt
|
||||||
|
#define aes_cbc_cts_encrypt ce_aes_cbc_cts_encrypt
|
||||||
|
#define aes_cbc_cts_decrypt ce_aes_cbc_cts_decrypt
|
||||||
#define aes_ctr_encrypt ce_aes_ctr_encrypt
|
#define aes_ctr_encrypt ce_aes_ctr_encrypt
|
||||||
#define aes_xts_encrypt ce_aes_xts_encrypt
|
#define aes_xts_encrypt ce_aes_xts_encrypt
|
||||||
#define aes_xts_decrypt ce_aes_xts_decrypt
|
#define aes_xts_decrypt ce_aes_xts_decrypt
|
||||||
@ -45,6 +48,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
|||||||
#define aes_ecb_decrypt neon_aes_ecb_decrypt
|
#define aes_ecb_decrypt neon_aes_ecb_decrypt
|
||||||
#define aes_cbc_encrypt neon_aes_cbc_encrypt
|
#define aes_cbc_encrypt neon_aes_cbc_encrypt
|
||||||
#define aes_cbc_decrypt neon_aes_cbc_decrypt
|
#define aes_cbc_decrypt neon_aes_cbc_decrypt
|
||||||
|
#define aes_cbc_cts_encrypt neon_aes_cbc_cts_encrypt
|
||||||
|
#define aes_cbc_cts_decrypt neon_aes_cbc_cts_decrypt
|
||||||
#define aes_ctr_encrypt neon_aes_ctr_encrypt
|
#define aes_ctr_encrypt neon_aes_ctr_encrypt
|
||||||
#define aes_xts_encrypt neon_aes_xts_encrypt
|
#define aes_xts_encrypt neon_aes_xts_encrypt
|
||||||
#define aes_xts_decrypt neon_aes_xts_decrypt
|
#define aes_xts_decrypt neon_aes_xts_decrypt
|
||||||
@ -73,6 +78,11 @@ asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
|||||||
asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||||
int rounds, int blocks, u8 iv[]);
|
int rounds, int blocks, u8 iv[]);
|
||||||
|
|
||||||
|
asmlinkage void aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||||
|
int rounds, int bytes, u8 const iv[]);
|
||||||
|
asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||||
|
int rounds, int bytes, u8 const iv[]);
|
||||||
|
|
||||||
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||||
int rounds, int blocks, u8 ctr[]);
|
int rounds, int blocks, u8 ctr[]);
|
||||||
|
|
||||||
@ -87,6 +97,12 @@ asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
|
|||||||
int blocks, u8 dg[], int enc_before,
|
int blocks, u8 dg[], int enc_before,
|
||||||
int enc_after);
|
int enc_after);
|
||||||
|
|
||||||
|
struct cts_cbc_req_ctx {
|
||||||
|
struct scatterlist sg_src[2];
|
||||||
|
struct scatterlist sg_dst[2];
|
||||||
|
struct skcipher_request subreq;
|
||||||
|
};
|
||||||
|
|
||||||
struct crypto_aes_xts_ctx {
|
struct crypto_aes_xts_ctx {
|
||||||
struct crypto_aes_ctx key1;
|
struct crypto_aes_ctx key1;
|
||||||
struct crypto_aes_ctx __aligned(8) key2;
|
struct crypto_aes_ctx __aligned(8) key2;
|
||||||
@ -209,6 +225,136 @@ static int cbc_decrypt(struct skcipher_request *req)
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
|
||||||
|
{
|
||||||
|
crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cts_cbc_encrypt(struct skcipher_request *req)
|
||||||
|
{
|
||||||
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||||
|
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||||
|
struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
|
||||||
|
int err, rounds = 6 + ctx->key_length / 4;
|
||||||
|
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||||
|
struct scatterlist *src = req->src, *dst = req->dst;
|
||||||
|
struct skcipher_walk walk;
|
||||||
|
|
||||||
|
skcipher_request_set_tfm(&rctx->subreq, tfm);
|
||||||
|
|
||||||
|
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||||
|
cbc_blocks = 1;
|
||||||
|
|
||||||
|
if (cbc_blocks > 0) {
|
||||||
|
unsigned int blocks;
|
||||||
|
|
||||||
|
skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
|
||||||
|
cbc_blocks * AES_BLOCK_SIZE,
|
||||||
|
req->iv);
|
||||||
|
|
||||||
|
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
|
||||||
|
|
||||||
|
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||||
|
kernel_neon_begin();
|
||||||
|
aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||||
|
ctx->key_enc, rounds, blocks, walk.iv);
|
||||||
|
kernel_neon_end();
|
||||||
|
err = skcipher_walk_done(&walk,
|
||||||
|
walk.nbytes % AES_BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
|
||||||
|
rctx->subreq.cryptlen);
|
||||||
|
if (req->dst != req->src)
|
||||||
|
dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
|
||||||
|
rctx->subreq.cryptlen);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* handle ciphertext stealing */
|
||||||
|
skcipher_request_set_crypt(&rctx->subreq, src, dst,
|
||||||
|
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
|
||||||
|
req->iv);
|
||||||
|
|
||||||
|
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
kernel_neon_begin();
|
||||||
|
aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||||
|
ctx->key_enc, rounds, walk.nbytes, walk.iv);
|
||||||
|
kernel_neon_end();
|
||||||
|
|
||||||
|
return skcipher_walk_done(&walk, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cts_cbc_decrypt(struct skcipher_request *req)
|
||||||
|
{
|
||||||
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||||
|
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||||
|
struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
|
||||||
|
int err, rounds = 6 + ctx->key_length / 4;
|
||||||
|
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||||
|
struct scatterlist *src = req->src, *dst = req->dst;
|
||||||
|
struct skcipher_walk walk;
|
||||||
|
|
||||||
|
skcipher_request_set_tfm(&rctx->subreq, tfm);
|
||||||
|
|
||||||
|
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||||
|
cbc_blocks = 1;
|
||||||
|
|
||||||
|
if (cbc_blocks > 0) {
|
||||||
|
unsigned int blocks;
|
||||||
|
|
||||||
|
skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
|
||||||
|
cbc_blocks * AES_BLOCK_SIZE,
|
||||||
|
req->iv);
|
||||||
|
|
||||||
|
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
|
||||||
|
|
||||||
|
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||||
|
kernel_neon_begin();
|
||||||
|
aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||||
|
ctx->key_dec, rounds, blocks, walk.iv);
|
||||||
|
kernel_neon_end();
|
||||||
|
err = skcipher_walk_done(&walk,
|
||||||
|
walk.nbytes % AES_BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
|
||||||
|
rctx->subreq.cryptlen);
|
||||||
|
if (req->dst != req->src)
|
||||||
|
dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
|
||||||
|
rctx->subreq.cryptlen);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* handle ciphertext stealing */
|
||||||
|
skcipher_request_set_crypt(&rctx->subreq, src, dst,
|
||||||
|
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
|
||||||
|
req->iv);
|
||||||
|
|
||||||
|
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
kernel_neon_begin();
|
||||||
|
aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||||
|
ctx->key_dec, rounds, walk.nbytes, walk.iv);
|
||||||
|
kernel_neon_end();
|
||||||
|
|
||||||
|
return skcipher_walk_done(&walk, 0);
|
||||||
|
}
|
||||||
|
|
||||||
static int ctr_encrypt(struct skcipher_request *req)
|
static int ctr_encrypt(struct skcipher_request *req)
|
||||||
{
|
{
|
||||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||||
@ -334,6 +480,25 @@ static struct skcipher_alg aes_algs[] = { {
|
|||||||
.setkey = skcipher_aes_setkey,
|
.setkey = skcipher_aes_setkey,
|
||||||
.encrypt = cbc_encrypt,
|
.encrypt = cbc_encrypt,
|
||||||
.decrypt = cbc_decrypt,
|
.decrypt = cbc_decrypt,
|
||||||
|
}, {
|
||||||
|
.base = {
|
||||||
|
.cra_name = "__cts(cbc(aes))",
|
||||||
|
.cra_driver_name = "__cts-cbc-aes-" MODE,
|
||||||
|
.cra_priority = PRIO,
|
||||||
|
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||||
|
.cra_blocksize = 1,
|
||||||
|
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||||
|
.cra_module = THIS_MODULE,
|
||||||
|
},
|
||||||
|
.min_keysize = AES_MIN_KEY_SIZE,
|
||||||
|
.max_keysize = AES_MAX_KEY_SIZE,
|
||||||
|
.ivsize = AES_BLOCK_SIZE,
|
||||||
|
.chunksize = AES_BLOCK_SIZE,
|
||||||
|
.walksize = 2 * AES_BLOCK_SIZE,
|
||||||
|
.setkey = skcipher_aes_setkey,
|
||||||
|
.encrypt = cts_cbc_encrypt,
|
||||||
|
.decrypt = cts_cbc_decrypt,
|
||||||
|
.init = cts_cbc_init_tfm,
|
||||||
}, {
|
}, {
|
||||||
.base = {
|
.base = {
|
||||||
.cra_name = "__ctr(aes)",
|
.cra_name = "__ctr(aes)",
|
||||||
|
@ -170,6 +170,84 @@ AES_ENTRY(aes_cbc_decrypt)
|
|||||||
AES_ENDPROC(aes_cbc_decrypt)
|
AES_ENDPROC(aes_cbc_decrypt)
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||||
|
* int rounds, int bytes, u8 const iv[])
|
||||||
|
* aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||||
|
* int rounds, int bytes, u8 const iv[])
|
||||||
|
*/
|
||||||
|
|
||||||
|
AES_ENTRY(aes_cbc_cts_encrypt)
|
||||||
|
adr_l x8, .Lcts_permute_table
|
||||||
|
sub x4, x4, #16
|
||||||
|
add x9, x8, #32
|
||||||
|
add x8, x8, x4
|
||||||
|
sub x9, x9, x4
|
||||||
|
ld1 {v3.16b}, [x8]
|
||||||
|
ld1 {v4.16b}, [x9]
|
||||||
|
|
||||||
|
ld1 {v0.16b}, [x1], x4 /* overlapping loads */
|
||||||
|
ld1 {v1.16b}, [x1]
|
||||||
|
|
||||||
|
ld1 {v5.16b}, [x5] /* get iv */
|
||||||
|
enc_prepare w3, x2, x6
|
||||||
|
|
||||||
|
eor v0.16b, v0.16b, v5.16b /* xor with iv */
|
||||||
|
tbl v1.16b, {v1.16b}, v4.16b
|
||||||
|
encrypt_block v0, w3, x2, x6, w7
|
||||||
|
|
||||||
|
eor v1.16b, v1.16b, v0.16b
|
||||||
|
tbl v0.16b, {v0.16b}, v3.16b
|
||||||
|
encrypt_block v1, w3, x2, x6, w7
|
||||||
|
|
||||||
|
add x4, x0, x4
|
||||||
|
st1 {v0.16b}, [x4] /* overlapping stores */
|
||||||
|
st1 {v1.16b}, [x0]
|
||||||
|
ret
|
||||||
|
AES_ENDPROC(aes_cbc_cts_encrypt)
|
||||||
|
|
||||||
|
AES_ENTRY(aes_cbc_cts_decrypt)
|
||||||
|
adr_l x8, .Lcts_permute_table
|
||||||
|
sub x4, x4, #16
|
||||||
|
add x9, x8, #32
|
||||||
|
add x8, x8, x4
|
||||||
|
sub x9, x9, x4
|
||||||
|
ld1 {v3.16b}, [x8]
|
||||||
|
ld1 {v4.16b}, [x9]
|
||||||
|
|
||||||
|
ld1 {v0.16b}, [x1], x4 /* overlapping loads */
|
||||||
|
ld1 {v1.16b}, [x1]
|
||||||
|
|
||||||
|
ld1 {v5.16b}, [x5] /* get iv */
|
||||||
|
dec_prepare w3, x2, x6
|
||||||
|
|
||||||
|
tbl v2.16b, {v1.16b}, v4.16b
|
||||||
|
decrypt_block v0, w3, x2, x6, w7
|
||||||
|
eor v2.16b, v2.16b, v0.16b
|
||||||
|
|
||||||
|
tbx v0.16b, {v1.16b}, v4.16b
|
||||||
|
tbl v2.16b, {v2.16b}, v3.16b
|
||||||
|
decrypt_block v0, w3, x2, x6, w7
|
||||||
|
eor v0.16b, v0.16b, v5.16b /* xor with iv */
|
||||||
|
|
||||||
|
add x4, x0, x4
|
||||||
|
st1 {v2.16b}, [x4] /* overlapping stores */
|
||||||
|
st1 {v0.16b}, [x0]
|
||||||
|
ret
|
||||||
|
AES_ENDPROC(aes_cbc_cts_decrypt)
|
||||||
|
|
||||||
|
.section ".rodata", "a"
|
||||||
|
.align 6
|
||||||
|
.Lcts_permute_table:
|
||||||
|
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||||
|
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||||
|
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||||
|
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||||
|
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||||
|
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||||
|
.previous
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||||
* int blocks, u8 ctr[])
|
* int blocks, u8 ctr[])
|
||||||
@ -253,7 +331,6 @@ AES_ENTRY(aes_ctr_encrypt)
|
|||||||
ins v4.d[0], x7
|
ins v4.d[0], x7
|
||||||
b .Lctrcarrydone
|
b .Lctrcarrydone
|
||||||
AES_ENDPROC(aes_ctr_encrypt)
|
AES_ENDPROC(aes_ctr_encrypt)
|
||||||
.ltorg
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user