linux_dsm_epyc7002/arch/arm/crypto/sha2-ce-glue.c
Sami Tolvanen f2f770d74a crypto: arm/sha256 - Add optimized SHA-256/224
Add Andy Polyakov's optimized assembly and NEON implementations for
SHA-256/224.

The sha256-armv4.pl script for generating the assembly code is from
OpenSSL commit 51f8d095562f36cdaa6893597b5c609e943b0565.

Compared to sha256-generic these implementations have the following
tcrypt speed improvements on Motorola Nexus 6 (Snapdragon 805):

  bs    b/u      sha256-neon  sha256-asm
  16    16       x1.32        x1.19
  64    16       x1.27        x1.15
  64    64       x1.36        x1.20
  256   16       x1.22        x1.11
  256   64       x1.36        x1.19
  256   256      x1.59        x1.23
  1024  16       x1.21        x1.10
  1024  256      x1.65        x1.23
  1024  1024     x1.76        x1.25
  2048  16       x1.21        x1.10
  2048  256      x1.66        x1.23
  2048  1024     x1.78        x1.25
  2048  2048     x1.79        x1.25
  4096  16       x1.20        x1.09
  4096  256      x1.66        x1.23
  4096  1024     x1.79        x1.26
  4096  4096     x1.82        x1.26
  8192  16       x1.20        x1.09
  8192  256      x1.67        x1.23
  8192  1024     x1.80        x1.26
  8192  4096     x1.85        x1.28
  8192  8192     x1.85        x1.27

Where bs refers to block size and b/u to bytes per update.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Cc: Andy Polyakov <appro@openssl.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-04-03 18:03:40 +08:00

204 lines
4.7 KiB
C

/*
* sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
*
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <asm/hwcap.h>
#include <asm/simd.h>
#include <asm/neon.h>
#include <asm/unaligned.h>
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
asmlinkage void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
u8 *head);
static int sha224_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha256_state){
.state = {
SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
}
};
return 0;
}
static int sha256_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha256_state){
.state = {
SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
}
};
return 0;
}
static int sha2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
unsigned int partial;
if (!may_use_simd())
return crypto_sha256_update(desc, data, len);
partial = sctx->count % SHA256_BLOCK_SIZE;
sctx->count += len;
if ((partial + len) >= SHA256_BLOCK_SIZE) {
int blocks;
if (partial) {
int p = SHA256_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, p);
data += p;
len -= p;
}
blocks = len / SHA256_BLOCK_SIZE;
len %= SHA256_BLOCK_SIZE;
kernel_neon_begin();
sha2_ce_transform(blocks, data, sctx->state,
partial ? sctx->buf : NULL);
kernel_neon_end();
data += blocks * SHA256_BLOCK_SIZE;
partial = 0;
}
if (len)
memcpy(sctx->buf + partial, data, len);
return 0;
}
static void sha2_final(struct shash_desc *desc)
{
static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
struct sha256_state *sctx = shash_desc_ctx(desc);
__be64 bits = cpu_to_be64(sctx->count << 3);
u32 padlen = SHA256_BLOCK_SIZE
- ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
sha2_update(desc, padding, padlen);
sha2_update(desc, (const u8 *)&bits, sizeof(bits));
}
static int sha224_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_final(desc);
for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha256_final(struct shash_desc *desc, u8 *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
__be32 *dst = (__be32 *)out;
int i;
sha2_final(desc);
for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
put_unaligned_be32(sctx->state[i], dst++);
*sctx = (struct sha256_state){};
return 0;
}
static int sha2_export(struct shash_desc *desc, void *out)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_state *dst = out;
*dst = *sctx;
return 0;
}
static int sha2_import(struct shash_desc *desc, const void *in)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
struct sha256_state const *src = in;
*sctx = *src;
return 0;
}
static struct shash_alg algs[] = { {
.init = sha224_init,
.update = sha2_update,
.final = sha224_final,
.export = sha2_export,
.import = sha2_import,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA224_DIGEST_SIZE,
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.init = sha256_init,
.update = sha2_update,
.final = sha256_final,
.export = sha2_export,
.import = sha2_import,
.descsize = sizeof(struct sha256_state),
.digestsize = SHA256_DIGEST_SIZE,
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ce",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha2_ce_mod_init(void)
{
if (!(elf_hwcap2 & HWCAP2_SHA2))
return -ENODEV;
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
static void __exit sha2_ce_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha2_ce_mod_init);
module_exit(sha2_ce_mod_fini);