2019-06-04 15:11:33 +07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2017-01-11 23:41:55 +07:00
|
|
|
/*
|
|
|
|
* Bit sliced AES using NEON instructions
|
|
|
|
*
|
2017-07-24 17:28:15 +07:00
|
|
|
* Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
2017-01-11 23:41:55 +07:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <asm/neon.h>
|
2017-07-24 17:28:15 +07:00
|
|
|
#include <asm/simd.h>
|
2017-01-11 23:41:55 +07:00
|
|
|
#include <crypto/aes.h>
|
2019-07-03 02:41:35 +07:00
|
|
|
#include <crypto/ctr.h>
|
2017-01-11 23:41:55 +07:00
|
|
|
#include <crypto/internal/simd.h>
|
|
|
|
#include <crypto/internal/skcipher.h>
|
2019-09-03 23:43:34 +07:00
|
|
|
#include <crypto/scatterwalk.h>
|
2017-01-11 23:41:55 +07:00
|
|
|
#include <crypto/xts.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
|
|
|
|
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
|
|
|
MODULE_LICENSE("GPL v2");
|
|
|
|
|
|
|
|
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
|
|
|
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
|
|
|
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
|
|
|
MODULE_ALIAS_CRYPTO("xts(aes)");
|
|
|
|
|
|
|
|
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
|
|
|
|
|
|
|
|
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
|
|
int rounds, int blocks);
|
|
|
|
asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
|
|
int rounds, int blocks);
|
|
|
|
|
|
|
|
asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
|
|
int rounds, int blocks, u8 iv[]);
|
|
|
|
|
|
|
|
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
2017-02-02 18:38:55 +07:00
|
|
|
int rounds, int blocks, u8 iv[], u8 final[]);
|
2017-01-11 23:41:55 +07:00
|
|
|
|
|
|
|
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
|
|
int rounds, int blocks, u8 iv[]);
|
|
|
|
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
|
|
int rounds, int blocks, u8 iv[]);
|
|
|
|
|
2017-01-29 06:25:39 +07:00
|
|
|
/* borrowed from aes-neon-blk.ko */
|
|
|
|
asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
crypto: arm64/aes-blk - move kernel mode neon en/disable into loop
When kernel mode NEON was first introduced on arm64, the preserve and
restore of the userland NEON state was completely unoptimized, and
involved saving all registers on each call to kernel_neon_begin(),
and restoring them on each call to kernel_neon_end(). For this reason,
the NEON crypto code that was introduced at the time keeps the NEON
enabled throughout the execution of the crypto API methods, which may
include calls back into the crypto API that could result in memory
allocation or other actions that we should avoid when running with
preemption disabled.
Since then, we have optimized the kernel mode NEON handling, which now
restores lazily (upon return to userland), and so the preserve action
is only costly the first time it is called after entering the kernel.
So let's put the kernel_neon_begin() and kernel_neon_end() calls around
the actual invocations of the NEON crypto code, and run the remainder of
the code with kernel mode NEON disabled (and preemption enabled)
Note that this requires some reshuffling of the registers in the asm
code, because the XTS routines can no longer rely on the registers to
retain their contents between invocations.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-03-10 22:21:48 +07:00
|
|
|
int rounds, int blocks);
|
2017-01-29 06:25:39 +07:00
|
|
|
asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
crypto: arm64/aes-blk - move kernel mode neon en/disable into loop
When kernel mode NEON was first introduced on arm64, the preserve and
restore of the userland NEON state was completely unoptimized, and
involved saving all registers on each call to kernel_neon_begin(),
and restoring them on each call to kernel_neon_end(). For this reason,
the NEON crypto code that was introduced at the time keeps the NEON
enabled throughout the execution of the crypto API methods, which may
include calls back into the crypto API that could result in memory
allocation or other actions that we should avoid when running with
preemption disabled.
Since then, we have optimized the kernel mode NEON handling, which now
restores lazily (upon return to userland), and so the preserve action
is only costly the first time it is called after entering the kernel.
So let's put the kernel_neon_begin() and kernel_neon_end() calls around
the actual invocations of the NEON crypto code, and run the remainder of
the code with kernel mode NEON disabled (and preemption enabled)
Note that this requires some reshuffling of the registers in the asm
code, because the XTS routines can no longer rely on the registers to
retain their contents between invocations.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-03-10 22:21:48 +07:00
|
|
|
int rounds, int blocks, u8 iv[]);
|
2019-09-03 23:43:34 +07:00
|
|
|
asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[],
|
|
|
|
u32 const rk1[], int rounds, int bytes,
|
|
|
|
u32 const rk2[], u8 iv[], int first);
|
|
|
|
asmlinkage void neon_aes_xts_decrypt(u8 out[], u8 const in[],
|
|
|
|
u32 const rk1[], int rounds, int bytes,
|
|
|
|
u32 const rk2[], u8 iv[], int first);
|
2017-01-11 23:41:55 +07:00
|
|
|
|
|
|
|
struct aesbs_ctx {
|
|
|
|
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32];
|
|
|
|
int rounds;
|
|
|
|
} __aligned(AES_BLOCK_SIZE);
|
|
|
|
|
|
|
|
struct aesbs_cbc_ctx {
|
|
|
|
struct aesbs_ctx key;
|
|
|
|
u32 enc[AES_MAX_KEYLENGTH_U32];
|
|
|
|
};
|
|
|
|
|
2017-07-24 17:28:15 +07:00
|
|
|
struct aesbs_ctr_ctx {
|
|
|
|
struct aesbs_ctx key; /* must be first member */
|
|
|
|
struct crypto_aes_ctx fallback;
|
|
|
|
};
|
|
|
|
|
2017-01-11 23:41:55 +07:00
|
|
|
struct aesbs_xts_ctx {
|
|
|
|
struct aesbs_ctx key;
|
|
|
|
u32 twkey[AES_MAX_KEYLENGTH_U32];
|
2019-09-03 23:43:34 +07:00
|
|
|
struct crypto_aes_ctx cts;
|
2017-01-11 23:41:55 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
|
|
|
unsigned int key_len)
|
|
|
|
{
|
|
|
|
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
struct crypto_aes_ctx rk;
|
|
|
|
int err;
|
|
|
|
|
2019-07-03 02:41:31 +07:00
|
|
|
err = aes_expandkey(&rk, in_key, key_len);
|
2017-01-11 23:41:55 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
ctx->rounds = 6 + key_len / 4;
|
|
|
|
|
|
|
|
kernel_neon_begin();
|
|
|
|
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
|
|
|
|
kernel_neon_end();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __ecb_crypt(struct skcipher_request *req,
|
|
|
|
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
|
|
|
int rounds, int blocks))
|
|
|
|
{
|
|
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
|
|
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
struct skcipher_walk walk;
|
|
|
|
int err;
|
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
2017-01-11 23:41:55 +07:00
|
|
|
|
|
|
|
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
|
|
|
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
|
|
|
|
|
|
|
if (walk.nbytes < walk.total)
|
|
|
|
blocks = round_down(blocks,
|
|
|
|
walk.stride / AES_BLOCK_SIZE);
|
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_begin();
|
2017-01-11 23:41:55 +07:00
|
|
|
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
|
|
|
|
ctx->rounds, blocks);
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_end();
|
2017-01-11 23:41:55 +07:00
|
|
|
err = skcipher_walk_done(&walk,
|
|
|
|
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ecb_encrypt(struct skcipher_request *req)
|
|
|
|
{
|
|
|
|
return __ecb_crypt(req, aesbs_ecb_encrypt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ecb_decrypt(struct skcipher_request *req)
|
|
|
|
{
|
|
|
|
return __ecb_crypt(req, aesbs_ecb_decrypt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
|
|
|
unsigned int key_len)
|
|
|
|
{
|
|
|
|
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
struct crypto_aes_ctx rk;
|
|
|
|
int err;
|
|
|
|
|
2019-07-03 02:41:31 +07:00
|
|
|
err = aes_expandkey(&rk, in_key, key_len);
|
2017-01-11 23:41:55 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
ctx->key.rounds = 6 + key_len / 4;
|
|
|
|
|
|
|
|
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
|
|
|
|
|
|
|
|
kernel_neon_begin();
|
|
|
|
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
|
|
|
|
kernel_neon_end();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-29 06:25:39 +07:00
|
|
|
static int cbc_encrypt(struct skcipher_request *req)
|
2017-01-11 23:41:55 +07:00
|
|
|
{
|
2017-01-29 06:25:39 +07:00
|
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
2017-01-11 23:41:55 +07:00
|
|
|
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
2017-01-29 06:25:39 +07:00
|
|
|
struct skcipher_walk walk;
|
crypto: arm64/aes-blk - move kernel mode neon en/disable into loop
When kernel mode NEON was first introduced on arm64, the preserve and
restore of the userland NEON state was completely unoptimized, and
involved saving all registers on each call to kernel_neon_begin(),
and restoring them on each call to kernel_neon_end(). For this reason,
the NEON crypto code that was introduced at the time keeps the NEON
enabled throughout the execution of the crypto API methods, which may
include calls back into the crypto API that could result in memory
allocation or other actions that we should avoid when running with
preemption disabled.
Since then, we have optimized the kernel mode NEON handling, which now
restores lazily (upon return to userland), and so the preserve action
is only costly the first time it is called after entering the kernel.
So let's put the kernel_neon_begin() and kernel_neon_end() calls around
the actual invocations of the NEON crypto code, and run the remainder of
the code with kernel mode NEON disabled (and preemption enabled)
Note that this requires some reshuffling of the registers in the asm
code, because the XTS routines can no longer rely on the registers to
retain their contents between invocations.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-03-10 22:21:48 +07:00
|
|
|
int err;
|
2017-01-11 23:41:55 +07:00
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
2017-01-11 23:41:55 +07:00
|
|
|
|
2017-01-29 06:25:39 +07:00
|
|
|
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
|
|
|
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
|
|
|
|
|
|
|
/* fall back to the non-bitsliced NEON implementation */
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_begin();
|
2017-01-29 06:25:39 +07:00
|
|
|
neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
crypto: arm64/aes-blk - move kernel mode neon en/disable into loop
When kernel mode NEON was first introduced on arm64, the preserve and
restore of the userland NEON state was completely unoptimized, and
involved saving all registers on each call to kernel_neon_begin(),
and restoring them on each call to kernel_neon_end(). For this reason,
the NEON crypto code that was introduced at the time keeps the NEON
enabled throughout the execution of the crypto API methods, which may
include calls back into the crypto API that could result in memory
allocation or other actions that we should avoid when running with
preemption disabled.
Since then, we have optimized the kernel mode NEON handling, which now
restores lazily (upon return to userland), and so the preserve action
is only costly the first time it is called after entering the kernel.
So let's put the kernel_neon_begin() and kernel_neon_end() calls around
the actual invocations of the NEON crypto code, and run the remainder of
the code with kernel mode NEON disabled (and preemption enabled)
Note that this requires some reshuffling of the registers in the asm
code, because the XTS routines can no longer rely on the registers to
retain their contents between invocations.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-03-10 22:21:48 +07:00
|
|
|
ctx->enc, ctx->key.rounds, blocks,
|
|
|
|
walk.iv);
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_end();
|
2017-01-29 06:25:39 +07:00
|
|
|
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
|
|
|
}
|
|
|
|
return err;
|
2017-01-11 23:41:55 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int cbc_decrypt(struct skcipher_request *req)
|
|
|
|
{
|
|
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
|
|
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
struct skcipher_walk walk;
|
|
|
|
int err;
|
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
2017-01-11 23:41:55 +07:00
|
|
|
|
|
|
|
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
|
|
|
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
|
|
|
|
|
|
|
if (walk.nbytes < walk.total)
|
|
|
|
blocks = round_down(blocks,
|
|
|
|
walk.stride / AES_BLOCK_SIZE);
|
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_begin();
|
2017-01-11 23:41:55 +07:00
|
|
|
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
|
|
|
ctx->key.rk, ctx->key.rounds, blocks,
|
|
|
|
walk.iv);
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_end();
|
2017-01-11 23:41:55 +07:00
|
|
|
err = skcipher_walk_done(&walk,
|
|
|
|
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-07-24 17:28:15 +07:00
|
|
|
static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
|
|
|
|
unsigned int key_len)
|
|
|
|
{
|
|
|
|
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
int err;
|
|
|
|
|
2019-07-03 02:41:31 +07:00
|
|
|
err = aes_expandkey(&ctx->fallback, in_key, key_len);
|
2017-07-24 17:28:15 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
ctx->key.rounds = 6 + key_len / 4;
|
|
|
|
|
|
|
|
kernel_neon_begin();
|
|
|
|
aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
|
|
|
|
kernel_neon_end();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-01-11 23:41:55 +07:00
|
|
|
static int ctr_encrypt(struct skcipher_request *req)
|
|
|
|
{
|
|
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
|
|
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
struct skcipher_walk walk;
|
2017-02-02 18:38:55 +07:00
|
|
|
u8 buf[AES_BLOCK_SIZE];
|
2017-01-11 23:41:55 +07:00
|
|
|
int err;
|
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
2017-01-11 23:41:55 +07:00
|
|
|
|
|
|
|
while (walk.nbytes > 0) {
|
|
|
|
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
2017-02-02 18:38:55 +07:00
|
|
|
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
|
2017-01-11 23:41:55 +07:00
|
|
|
|
|
|
|
if (walk.nbytes < walk.total) {
|
|
|
|
blocks = round_down(blocks,
|
|
|
|
walk.stride / AES_BLOCK_SIZE);
|
2017-02-02 18:38:55 +07:00
|
|
|
final = NULL;
|
2017-01-11 23:41:55 +07:00
|
|
|
}
|
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_begin();
|
2017-01-11 23:41:55 +07:00
|
|
|
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
|
|
|
ctx->rk, ctx->rounds, blocks, walk.iv, final);
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_end();
|
2017-01-11 23:41:55 +07:00
|
|
|
|
|
|
|
if (final) {
|
|
|
|
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
|
|
|
u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
|
|
|
|
|
crypto: algapi - make crypto_xor() take separate dst and src arguments
There are quite a number of occurrences in the kernel of the pattern
if (dst != src)
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
or
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
where crypto_xor() is preceded or followed by a memcpy() invocation
that is only there because crypto_xor() uses its output parameter as
one of the inputs. To avoid having to add new instances of this pattern
in the arm64 code, which will be refactored to implement non-SIMD
fallbacks, add an alternative implementation called crypto_xor_cpy(),
taking separate input and output arguments. This removes the need for
the separate memcpy().
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-07-24 17:28:04 +07:00
|
|
|
crypto_xor_cpy(dst, src, final,
|
|
|
|
walk.total % AES_BLOCK_SIZE);
|
2017-01-11 23:41:55 +07:00
|
|
|
|
|
|
|
err = skcipher_walk_done(&walk, 0);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
err = skcipher_walk_done(&walk,
|
|
|
|
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
|
|
|
unsigned int key_len)
|
|
|
|
{
|
|
|
|
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
|
|
struct crypto_aes_ctx rk;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = xts_verify_key(tfm, in_key, key_len);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
key_len /= 2;
|
2019-09-03 23:43:34 +07:00
|
|
|
err = aes_expandkey(&ctx->cts, in_key, key_len);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2019-07-03 02:41:31 +07:00
|
|
|
err = aes_expandkey(&rk, in_key + key_len, key_len);
|
2017-01-11 23:41:55 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
|
|
|
|
|
|
|
|
return aesbs_setkey(tfm, in_key, key_len);
|
|
|
|
}
|
|
|
|
|
2019-07-03 02:41:35 +07:00
|
|
|
static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
|
2017-07-24 17:28:15 +07:00
|
|
|
{
|
|
|
|
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
2019-07-03 02:41:35 +07:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Temporarily disable interrupts to avoid races where
|
|
|
|
* cachelines are evicted when the CPU is interrupted
|
|
|
|
* to do something else.
|
|
|
|
*/
|
|
|
|
local_irq_save(flags);
|
|
|
|
aes_encrypt(&ctx->fallback, dst, src);
|
|
|
|
local_irq_restore(flags);
|
|
|
|
}
|
2017-07-24 17:28:15 +07:00
|
|
|
|
2019-07-03 02:41:35 +07:00
|
|
|
static int ctr_encrypt_sync(struct skcipher_request *req)
|
|
|
|
{
|
2019-03-13 12:12:50 +07:00
|
|
|
if (!crypto_simd_usable())
|
2019-07-03 02:41:35 +07:00
|
|
|
return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
|
2017-07-24 17:28:15 +07:00
|
|
|
|
|
|
|
return ctr_encrypt(req);
|
|
|
|
}
|
|
|
|
|
2019-09-03 23:43:34 +07:00
|
|
|
static int __xts_crypt(struct skcipher_request *req, bool encrypt,
|
2017-01-11 23:41:55 +07:00
|
|
|
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
|
|
|
int rounds, int blocks, u8 iv[]))
|
|
|
|
{
|
|
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
|
|
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
2019-09-03 23:43:34 +07:00
|
|
|
int tail = req->cryptlen % (8 * AES_BLOCK_SIZE);
|
|
|
|
struct scatterlist sg_src[2], sg_dst[2];
|
|
|
|
struct skcipher_request subreq;
|
|
|
|
struct scatterlist *src, *dst;
|
2017-01-11 23:41:55 +07:00
|
|
|
struct skcipher_walk walk;
|
2019-09-03 23:43:34 +07:00
|
|
|
int nbytes, err;
|
|
|
|
int first = 1;
|
|
|
|
u8 *out, *in;
|
|
|
|
|
|
|
|
if (req->cryptlen < AES_BLOCK_SIZE)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* ensure that the cts tail is covered by a single step */
|
|
|
|
if (unlikely(tail > 0 && tail < AES_BLOCK_SIZE)) {
|
|
|
|
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
|
|
|
|
AES_BLOCK_SIZE) - 2;
|
|
|
|
|
|
|
|
skcipher_request_set_tfm(&subreq, tfm);
|
|
|
|
skcipher_request_set_callback(&subreq,
|
|
|
|
skcipher_request_flags(req),
|
|
|
|
NULL, NULL);
|
|
|
|
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
|
|
|
xts_blocks * AES_BLOCK_SIZE,
|
|
|
|
req->iv);
|
|
|
|
req = &subreq;
|
|
|
|
} else {
|
|
|
|
tail = 0;
|
|
|
|
}
|
2017-01-11 23:41:55 +07:00
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
2019-04-10 13:46:32 +07:00
|
|
|
if (err)
|
|
|
|
return err;
|
2017-01-11 23:41:55 +07:00
|
|
|
|
|
|
|
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
|
|
|
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
|
|
|
|
2019-09-03 23:43:34 +07:00
|
|
|
if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE)
|
2017-01-11 23:41:55 +07:00
|
|
|
blocks = round_down(blocks,
|
|
|
|
walk.stride / AES_BLOCK_SIZE);
|
|
|
|
|
2019-09-03 23:43:34 +07:00
|
|
|
out = walk.dst.virt.addr;
|
|
|
|
in = walk.src.virt.addr;
|
|
|
|
nbytes = walk.nbytes;
|
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_begin();
|
2019-09-03 23:43:34 +07:00
|
|
|
if (likely(blocks > 6)) { /* plain NEON is faster otherwise */
|
|
|
|
if (first)
|
|
|
|
neon_aes_ecb_encrypt(walk.iv, walk.iv,
|
|
|
|
ctx->twkey,
|
|
|
|
ctx->key.rounds, 1);
|
|
|
|
first = 0;
|
|
|
|
|
|
|
|
fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
|
|
|
|
walk.iv);
|
|
|
|
|
|
|
|
out += blocks * AES_BLOCK_SIZE;
|
|
|
|
in += blocks * AES_BLOCK_SIZE;
|
|
|
|
nbytes -= blocks * AES_BLOCK_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (walk.nbytes == walk.total && nbytes > 0)
|
|
|
|
goto xts_tail;
|
|
|
|
|
2018-03-10 22:21:49 +07:00
|
|
|
kernel_neon_end();
|
2019-10-22 15:11:18 +07:00
|
|
|
err = skcipher_walk_done(&walk, nbytes);
|
2017-01-11 23:41:55 +07:00
|
|
|
}
|
2019-09-03 23:43:34 +07:00
|
|
|
|
|
|
|
if (err || likely(!tail))
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* handle ciphertext stealing */
|
|
|
|
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
|
|
|
|
if (req->dst != req->src)
|
|
|
|
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
|
|
|
|
|
|
|
|
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
|
|
|
|
req->iv);
|
|
|
|
|
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
out = walk.dst.virt.addr;
|
|
|
|
in = walk.src.virt.addr;
|
|
|
|
nbytes = walk.nbytes;
|
|
|
|
|
|
|
|
kernel_neon_begin();
|
|
|
|
xts_tail:
|
|
|
|
if (encrypt)
|
|
|
|
neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
|
|
|
|
nbytes, ctx->twkey, walk.iv, first ?: 2);
|
|
|
|
else
|
|
|
|
neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
|
|
|
|
nbytes, ctx->twkey, walk.iv, first ?: 2);
|
|
|
|
kernel_neon_end();
|
|
|
|
|
|
|
|
return skcipher_walk_done(&walk, 0);
|
2017-01-11 23:41:55 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int xts_encrypt(struct skcipher_request *req)
|
|
|
|
{
|
2019-09-03 23:43:34 +07:00
|
|
|
return __xts_crypt(req, true, aesbs_xts_encrypt);
|
2017-01-11 23:41:55 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static int xts_decrypt(struct skcipher_request *req)
|
|
|
|
{
|
2019-09-03 23:43:34 +07:00
|
|
|
return __xts_crypt(req, false, aesbs_xts_decrypt);
|
2017-01-11 23:41:55 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct skcipher_alg aes_algs[] = { {
|
|
|
|
.base.cra_name = "__ecb(aes)",
|
|
|
|
.base.cra_driver_name = "__ecb-aes-neonbs",
|
|
|
|
.base.cra_priority = 250,
|
|
|
|
.base.cra_blocksize = AES_BLOCK_SIZE,
|
|
|
|
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
|
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
|
|
|
|
|
|
|
.min_keysize = AES_MIN_KEY_SIZE,
|
|
|
|
.max_keysize = AES_MAX_KEY_SIZE,
|
|
|
|
.walksize = 8 * AES_BLOCK_SIZE,
|
|
|
|
.setkey = aesbs_setkey,
|
|
|
|
.encrypt = ecb_encrypt,
|
|
|
|
.decrypt = ecb_decrypt,
|
|
|
|
}, {
|
|
|
|
.base.cra_name = "__cbc(aes)",
|
|
|
|
.base.cra_driver_name = "__cbc-aes-neonbs",
|
|
|
|
.base.cra_priority = 250,
|
|
|
|
.base.cra_blocksize = AES_BLOCK_SIZE,
|
|
|
|
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
|
|
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
|
|
|
|
|
|
|
.min_keysize = AES_MIN_KEY_SIZE,
|
|
|
|
.max_keysize = AES_MAX_KEY_SIZE,
|
|
|
|
.walksize = 8 * AES_BLOCK_SIZE,
|
|
|
|
.ivsize = AES_BLOCK_SIZE,
|
|
|
|
.setkey = aesbs_cbc_setkey,
|
|
|
|
.encrypt = cbc_encrypt,
|
|
|
|
.decrypt = cbc_decrypt,
|
|
|
|
}, {
|
|
|
|
.base.cra_name = "__ctr(aes)",
|
|
|
|
.base.cra_driver_name = "__ctr-aes-neonbs",
|
|
|
|
.base.cra_priority = 250,
|
|
|
|
.base.cra_blocksize = 1,
|
|
|
|
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
|
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
|
|
|
|
|
|
|
.min_keysize = AES_MIN_KEY_SIZE,
|
|
|
|
.max_keysize = AES_MAX_KEY_SIZE,
|
|
|
|
.chunksize = AES_BLOCK_SIZE,
|
|
|
|
.walksize = 8 * AES_BLOCK_SIZE,
|
|
|
|
.ivsize = AES_BLOCK_SIZE,
|
|
|
|
.setkey = aesbs_setkey,
|
|
|
|
.encrypt = ctr_encrypt,
|
|
|
|
.decrypt = ctr_encrypt,
|
|
|
|
}, {
|
|
|
|
.base.cra_name = "ctr(aes)",
|
|
|
|
.base.cra_driver_name = "ctr-aes-neonbs",
|
|
|
|
.base.cra_priority = 250 - 1,
|
|
|
|
.base.cra_blocksize = 1,
|
2017-07-24 17:28:15 +07:00
|
|
|
.base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
|
2017-01-11 23:41:55 +07:00
|
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
|
|
|
|
.min_keysize = AES_MIN_KEY_SIZE,
|
|
|
|
.max_keysize = AES_MAX_KEY_SIZE,
|
|
|
|
.chunksize = AES_BLOCK_SIZE,
|
|
|
|
.walksize = 8 * AES_BLOCK_SIZE,
|
|
|
|
.ivsize = AES_BLOCK_SIZE,
|
2017-07-24 17:28:15 +07:00
|
|
|
.setkey = aesbs_ctr_setkey_sync,
|
|
|
|
.encrypt = ctr_encrypt_sync,
|
|
|
|
.decrypt = ctr_encrypt_sync,
|
2017-01-11 23:41:55 +07:00
|
|
|
}, {
|
|
|
|
.base.cra_name = "__xts(aes)",
|
|
|
|
.base.cra_driver_name = "__xts-aes-neonbs",
|
|
|
|
.base.cra_priority = 250,
|
|
|
|
.base.cra_blocksize = AES_BLOCK_SIZE,
|
|
|
|
.base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
|
|
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
|
|
|
|
|
|
|
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
|
|
|
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
|
|
|
.walksize = 8 * AES_BLOCK_SIZE,
|
|
|
|
.ivsize = AES_BLOCK_SIZE,
|
|
|
|
.setkey = aesbs_xts_setkey,
|
|
|
|
.encrypt = xts_encrypt,
|
|
|
|
.decrypt = xts_decrypt,
|
|
|
|
} };
|
|
|
|
|
|
|
|
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
|
|
|
|
|
|
|
|
static void aes_exit(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
|
|
|
|
if (aes_simd_algs[i])
|
|
|
|
simd_skcipher_free(aes_simd_algs[i]);
|
|
|
|
|
|
|
|
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init aes_init(void)
|
|
|
|
{
|
|
|
|
struct simd_skcipher_alg *simd;
|
|
|
|
const char *basename;
|
|
|
|
const char *algname;
|
|
|
|
const char *drvname;
|
|
|
|
int err;
|
|
|
|
int i;
|
|
|
|
|
2019-04-09 16:52:40 +07:00
|
|
|
if (!cpu_have_named_feature(ASIMD))
|
2017-01-11 23:41:55 +07:00
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
|
|
|
|
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
algname = aes_algs[i].base.cra_name + 2;
|
|
|
|
drvname = aes_algs[i].base.cra_driver_name + 2;
|
|
|
|
basename = aes_algs[i].base.cra_driver_name;
|
|
|
|
simd = simd_skcipher_create_compat(algname, drvname, basename);
|
|
|
|
err = PTR_ERR(simd);
|
|
|
|
if (IS_ERR(simd))
|
|
|
|
goto unregister_simds;
|
|
|
|
|
|
|
|
aes_simd_algs[i] = simd;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
unregister_simds:
|
|
|
|
aes_exit();
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(aes_init);
|
|
|
|
module_exit(aes_exit);
|