mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
9b17608f15
Now that all block functions support partial lengths, engage the wider block sizes more aggressively. This prevents using smaller block functions multiple times, where the next larger block function would have been faster. Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
155 lines
4.1 KiB
C
155 lines
4.1 KiB
C
/*
|
|
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
|
*
|
|
* Copyright (C) 2015 Martin Willi
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*/
|
|
|
|
#include <crypto/algapi.h>
|
|
#include <crypto/chacha20.h>
|
|
#include <crypto/internal/skcipher.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <asm/fpu/api.h>
|
|
#include <asm/simd.h>
|
|
|
|
#define CHACHA20_STATE_ALIGN 16
|
|
|
|
asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
|
|
unsigned int len);
|
|
asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
|
|
unsigned int len);
|
|
#ifdef CONFIG_AS_AVX2
|
|
asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
|
|
unsigned int len);
|
|
static bool chacha20_use_avx2;
|
|
#endif
|
|
|
|
static unsigned int chacha20_advance(unsigned int len, unsigned int maxblocks)
|
|
{
|
|
len = min(len, maxblocks * CHACHA20_BLOCK_SIZE);
|
|
return round_up(len, CHACHA20_BLOCK_SIZE) / CHACHA20_BLOCK_SIZE;
|
|
}
|
|
|
|
static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
|
|
unsigned int bytes)
|
|
{
|
|
#ifdef CONFIG_AS_AVX2
|
|
if (chacha20_use_avx2) {
|
|
while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
|
|
chacha20_8block_xor_avx2(state, dst, src, bytes);
|
|
bytes -= CHACHA20_BLOCK_SIZE * 8;
|
|
src += CHACHA20_BLOCK_SIZE * 8;
|
|
dst += CHACHA20_BLOCK_SIZE * 8;
|
|
state[12] += 8;
|
|
}
|
|
if (bytes > CHACHA20_BLOCK_SIZE * 4) {
|
|
chacha20_8block_xor_avx2(state, dst, src, bytes);
|
|
state[12] += chacha20_advance(bytes, 8);
|
|
return;
|
|
}
|
|
}
|
|
#endif
|
|
while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
|
|
chacha20_4block_xor_ssse3(state, dst, src, bytes);
|
|
bytes -= CHACHA20_BLOCK_SIZE * 4;
|
|
src += CHACHA20_BLOCK_SIZE * 4;
|
|
dst += CHACHA20_BLOCK_SIZE * 4;
|
|
state[12] += 4;
|
|
}
|
|
if (bytes > CHACHA20_BLOCK_SIZE) {
|
|
chacha20_4block_xor_ssse3(state, dst, src, bytes);
|
|
state[12] += chacha20_advance(bytes, 4);
|
|
return;
|
|
}
|
|
if (bytes) {
|
|
chacha20_block_xor_ssse3(state, dst, src, bytes);
|
|
state[12]++;
|
|
}
|
|
}
|
|
|
|
static int chacha20_simd(struct skcipher_request *req)
|
|
{
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
u32 *state, state_buf[16 + 2] __aligned(8);
|
|
struct skcipher_walk walk;
|
|
int err;
|
|
|
|
BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
|
|
state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
|
|
|
|
if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
|
|
return crypto_chacha20_crypt(req);
|
|
|
|
err = skcipher_walk_virt(&walk, req, true);
|
|
|
|
crypto_chacha20_init(state, ctx, walk.iv);
|
|
|
|
kernel_fpu_begin();
|
|
|
|
while (walk.nbytes > 0) {
|
|
unsigned int nbytes = walk.nbytes;
|
|
|
|
if (nbytes < walk.total)
|
|
nbytes = round_down(nbytes, walk.stride);
|
|
|
|
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
|
|
nbytes);
|
|
|
|
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
|
}
|
|
|
|
kernel_fpu_end();
|
|
|
|
return err;
|
|
}
|
|
|
|
static struct skcipher_alg alg = {
|
|
.base.cra_name = "chacha20",
|
|
.base.cra_driver_name = "chacha20-simd",
|
|
.base.cra_priority = 300,
|
|
.base.cra_blocksize = 1,
|
|
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
.min_keysize = CHACHA20_KEY_SIZE,
|
|
.max_keysize = CHACHA20_KEY_SIZE,
|
|
.ivsize = CHACHA20_IV_SIZE,
|
|
.chunksize = CHACHA20_BLOCK_SIZE,
|
|
.setkey = crypto_chacha20_setkey,
|
|
.encrypt = chacha20_simd,
|
|
.decrypt = chacha20_simd,
|
|
};
|
|
|
|
static int __init chacha20_simd_mod_init(void)
|
|
{
|
|
if (!boot_cpu_has(X86_FEATURE_SSSE3))
|
|
return -ENODEV;
|
|
|
|
#ifdef CONFIG_AS_AVX2
|
|
chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
|
|
boot_cpu_has(X86_FEATURE_AVX2) &&
|
|
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
|
#endif
|
|
return crypto_register_skcipher(&alg);
|
|
}
|
|
|
|
static void __exit chacha20_simd_mod_fini(void)
|
|
{
|
|
crypto_unregister_skcipher(&alg);
|
|
}
|
|
|
|
module_init(chacha20_simd_mod_init);
|
|
module_exit(chacha20_simd_mod_fini);
|
|
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
|
|
MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
|
|
MODULE_ALIAS_CRYPTO("chacha20");
|
|
MODULE_ALIAS_CRYPTO("chacha20-simd");
|