linux_dsm_epyc7002/arch/x86/crypto/glue_helper.c
Jussi Kivilinna a05248ed2d crypto: x86 - add more optimized XTS-mode for serpent-avx
This patch adds AVX optimized XTS-mode helper functions/macros and converts
serpent-avx to use the new facilities. Benefits are slightly improved speed
and reduced stack usage as use of temporary IV-array is avoided.

tcrypt results, with Intel i5-2450M:
        enc     dec
16B     1.00x   1.00x
64B     1.00x   1.00x
256B    1.04x   1.06x
1024B   1.09x   1.09x
8192B   1.10x   1.09x

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2013-04-25 21:01:51 +08:00

403 lines
10 KiB
C

/*
* Shared glue code for 128bit block ciphers
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <crypto/b128ops.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
#include <crypto/scatterwalk.h>
static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
void *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = 128 / 8;
unsigned int nbytes, i, func_bytes;
bool fpu_enabled = false;
int err;
err = blkcipher_walk_virt(desc, walk);
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
desc, fpu_enabled, nbytes);
for (i = 0; i < gctx->num_funcs; i++) {
func_bytes = bsize * gctx->funcs[i].num_blocks;
/* Process multi-block batch */
if (nbytes >= func_bytes) {
do {
gctx->funcs[i].fn_u.ecb(ctx, wdst,
wsrc);
wsrc += func_bytes;
wdst += func_bytes;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
goto done;
}
}
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
glue_fpu_end(fpu_enabled);
return err;
}
int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return __glue_ecb_crypt_128bit(gctx, desc, &walk);
}
EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
void *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = 128 / 8;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 *iv = (u128 *)walk->iv;
do {
u128_xor(dst, src, iv);
fn(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u128 *)walk->iv = *iv;
return nbytes;
}
int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
static unsigned int
__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
void *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = 128 / 8;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
u128 last_iv;
unsigned int num_blocks, func_bytes;
unsigned int i;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
for (i = 0; i < gctx->num_funcs; i++) {
num_blocks = gctx->funcs[i].num_blocks;
func_bytes = bsize * num_blocks;
/* Process multi-block batch */
if (nbytes >= func_bytes) {
do {
nbytes -= func_bytes - bsize;
src -= num_blocks - 1;
dst -= num_blocks - 1;
gctx->funcs[i].fn_u.cbc(ctx, dst, src);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
u128_xor(dst, dst, src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
goto done;
}
}
done:
u128_xor(dst, dst, (u128 *)walk->iv);
*(u128 *)walk->iv = last_iv;
return nbytes;
}
int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
const unsigned int bsize = 128 / 8;
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
desc, fpu_enabled, nbytes);
nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
void *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *src = (u8 *)walk->src.virt.addr;
u8 *dst = (u8 *)walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
le128 ctrblk;
u128 tmp;
be128_to_le128(&ctrblk, (be128 *)walk->iv);
memcpy(&tmp, src, nbytes);
fn_ctr(ctx, &tmp, &tmp, &ctrblk);
memcpy(dst, &tmp, nbytes);
le128_to_be128((be128 *)walk->iv, &ctrblk);
}
EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
const unsigned int bsize = 128 / 8;
void *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
le128 ctrblk;
unsigned int num_blocks, func_bytes;
unsigned int i;
be128_to_le128(&ctrblk, (be128 *)walk->iv);
/* Process multi-block batch */
for (i = 0; i < gctx->num_funcs; i++) {
num_blocks = gctx->funcs[i].num_blocks;
func_bytes = bsize * num_blocks;
if (nbytes >= func_bytes) {
do {
gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
src += num_blocks;
dst += num_blocks;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
goto done;
}
}
done:
le128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes;
}
int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
const unsigned int bsize = 128 / 8;
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, bsize);
while ((nbytes = walk.nbytes) >= bsize) {
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
desc, fpu_enabled, nbytes);
nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
glue_fpu_end(fpu_enabled);
if (walk.nbytes) {
glue_ctr_crypt_final_128bit(
gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
void *ctx,
struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
const unsigned int bsize = 128 / 8;
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
unsigned int num_blocks, func_bytes;
unsigned int i;
/* Process multi-block batch */
for (i = 0; i < gctx->num_funcs; i++) {
num_blocks = gctx->funcs[i].num_blocks;
func_bytes = bsize * num_blocks;
if (nbytes >= func_bytes) {
do {
gctx->funcs[i].fn_u.xts(ctx, dst, src,
(le128 *)walk->iv);
src += num_blocks;
dst += num_blocks;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
goto done;
}
}
done:
return nbytes;
}
/* for implementations implementing faster XTS IV generator */
int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes,
void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src),
void *tweak_ctx, void *crypt_ctx)
{
const unsigned int bsize = 128 / 8;
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
nbytes = walk.nbytes;
if (!nbytes)
return err;
/* set minimum length to bsize, for tweak_fn */
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
desc, fpu_enabled,
nbytes < bsize ? bsize : nbytes);
/* calculate first value of T */
tweak_fn(tweak_ctx, walk.iv, walk.iv);
while (nbytes) {
nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
nbytes = walk.nbytes;
}
glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
common_glue_func_t fn)
{
le128 ivblk = *iv;
/* generate next IV */
le128_gf128mul_x_ble(iv, &ivblk);
/* CC <- T xor C */
u128_xor(dst, src, (u128 *)&ivblk);
/* PP <- D(Key2,CC) */
fn(ctx, (u8 *)dst, (u8 *)dst);
/* P <- T xor PP */
u128_xor(dst, dst, (u128 *)&ivblk);
}
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
MODULE_LICENSE("GPL");