mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-26 02:19:31 +07:00
0f46a79a5a
We are using percpu send window on P9 NX (powerNV) instead of opening / closing per each crypto session. Means txwin is removed from workmem. So we do not need to initialize workmem for each request. Signed-off-by: Haren Myneni <haren@us.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
532 lines
15 KiB
C
532 lines
15 KiB
C
/*
|
|
* Cryptographic API for the NX-842 hardware compression.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* Copyright (C) IBM Corporation, 2011-2015
|
|
*
|
|
* Designer of the Power data compression engine:
|
|
* Bulent Abali <abali@us.ibm.com>
|
|
*
|
|
* Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
|
|
* Seth Jennings <sjenning@linux.vnet.ibm.com>
|
|
*
|
|
* Rewrite: Dan Streetman <ddstreet@ieee.org>
|
|
*
|
|
* This is an interface to the NX-842 compression hardware in PowerPC
|
|
* processors. Most of the complexity of this drvier is due to the fact that
|
|
* the NX-842 compression hardware requires the input and output data buffers
|
|
* to be specifically aligned, to be a specific multiple in length, and within
|
|
* specific minimum and maximum lengths. Those restrictions, provided by the
|
|
* nx-842 driver via nx842_constraints, mean this driver must use bounce
|
|
* buffers and headers to correct misaligned in or out buffers, and to split
|
|
* input buffers that are too large.
|
|
*
|
|
* This driver will fall back to software decompression if the hardware
|
|
* decompression fails, so this driver's decompression should never fail as
|
|
* long as the provided compressed buffer is valid. Any compressed buffer
|
|
* created by this driver will have a header (except ones where the input
|
|
* perfectly matches the constraints); so users of this driver cannot simply
|
|
* pass a compressed buffer created by this driver over to the 842 software
|
|
* decompression library. Instead, users must use this driver to decompress;
|
|
* if the hardware fails or is unavailable, the compressed buffer will be
|
|
* parsed and the header removed, and the raw 842 buffer(s) passed to the 842
|
|
* software decompression library.
|
|
*
|
|
* This does not fall back to software compression, however, since the caller
|
|
* of this function is specifically requesting hardware compression; if the
|
|
* hardware compression fails, the caller can fall back to software
|
|
* compression, and the raw 842 compressed buffer that the software compressor
|
|
* creates can be passed to this driver for hardware decompression; any
|
|
* buffer without our specific header magic is assumed to be a raw 842 buffer
|
|
* and passed directly to the hardware. Note that the software compression
|
|
* library will produce a compressed buffer that is incompatible with the
|
|
* hardware decompressor if the original input buffer length is not a multiple
|
|
* of 8; if such a compressed buffer is passed to this driver for
|
|
* decompression, the hardware will reject it and this driver will then pass
|
|
* it over to the software library for decompression.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/sw842.h>
|
|
#include <linux/spinlock.h>
|
|
|
|
#include "nx-842.h"
|
|
|
|
/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
|
|
* template (see lib/842/842.h), so this magic number will never appear at
|
|
* the start of a raw 842 compressed buffer. That is important, as any buffer
|
|
* passed to us without this magic is assumed to be a raw 842 compressed
|
|
* buffer, and passed directly to the hardware to decompress.
|
|
*/
|
|
#define NX842_CRYPTO_MAGIC (0xf842)
|
|
#define NX842_CRYPTO_HEADER_SIZE(g) \
|
|
(sizeof(struct nx842_crypto_header) + \
|
|
sizeof(struct nx842_crypto_header_group) * (g))
|
|
#define NX842_CRYPTO_HEADER_MAX_SIZE \
|
|
NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
|
|
|
|
/* bounce buffer size */
|
|
#define BOUNCE_BUFFER_ORDER (2)
|
|
#define BOUNCE_BUFFER_SIZE \
|
|
((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
|
|
|
|
/* try longer on comp because we can fallback to sw decomp if hw is busy */
|
|
#define COMP_BUSY_TIMEOUT (250) /* ms */
|
|
#define DECOMP_BUSY_TIMEOUT (50) /* ms */
|
|
|
|
struct nx842_crypto_param {
|
|
u8 *in;
|
|
unsigned int iremain;
|
|
u8 *out;
|
|
unsigned int oremain;
|
|
unsigned int ototal;
|
|
};
|
|
|
|
static int update_param(struct nx842_crypto_param *p,
|
|
unsigned int slen, unsigned int dlen)
|
|
{
|
|
if (p->iremain < slen)
|
|
return -EOVERFLOW;
|
|
if (p->oremain < dlen)
|
|
return -ENOSPC;
|
|
|
|
p->in += slen;
|
|
p->iremain -= slen;
|
|
p->out += dlen;
|
|
p->oremain -= dlen;
|
|
p->ototal += dlen;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
|
|
{
|
|
struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
|
|
|
|
spin_lock_init(&ctx->lock);
|
|
ctx->driver = driver;
|
|
ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
|
|
ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
|
|
ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
|
|
if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
|
|
kfree(ctx->wmem);
|
|
free_page((unsigned long)ctx->sbounce);
|
|
free_page((unsigned long)ctx->dbounce);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nx842_crypto_init);
|
|
|
|
void nx842_crypto_exit(struct crypto_tfm *tfm)
|
|
{
|
|
struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
|
|
|
|
kfree(ctx->wmem);
|
|
free_page((unsigned long)ctx->sbounce);
|
|
free_page((unsigned long)ctx->dbounce);
|
|
}
|
|
EXPORT_SYMBOL_GPL(nx842_crypto_exit);
|
|
|
|
static void check_constraints(struct nx842_constraints *c)
|
|
{
|
|
/* limit maximum, to always have enough bounce buffer to decompress */
|
|
if (c->maximum > BOUNCE_BUFFER_SIZE)
|
|
c->maximum = BOUNCE_BUFFER_SIZE;
|
|
}
|
|
|
|
static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
|
|
{
|
|
int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
|
|
|
|
/* compress should have added space for header */
|
|
if (s > be16_to_cpu(hdr->group[0].padding)) {
|
|
pr_err("Internal error: no space for header\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
memcpy(buf, hdr, s);
|
|
|
|
print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int compress(struct nx842_crypto_ctx *ctx,
|
|
struct nx842_crypto_param *p,
|
|
struct nx842_crypto_header_group *g,
|
|
struct nx842_constraints *c,
|
|
u16 *ignore,
|
|
unsigned int hdrsize)
|
|
{
|
|
unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
|
|
unsigned int adj_slen = slen;
|
|
u8 *src = p->in, *dst = p->out;
|
|
int ret, dskip = 0;
|
|
ktime_t timeout;
|
|
|
|
if (p->iremain == 0)
|
|
return -EOVERFLOW;
|
|
|
|
if (p->oremain == 0 || hdrsize + c->minimum > dlen)
|
|
return -ENOSPC;
|
|
|
|
if (slen % c->multiple)
|
|
adj_slen = round_up(slen, c->multiple);
|
|
if (slen < c->minimum)
|
|
adj_slen = c->minimum;
|
|
if (slen > c->maximum)
|
|
adj_slen = slen = c->maximum;
|
|
if (adj_slen > slen || (u64)src % c->alignment) {
|
|
adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
|
|
slen = min(slen, BOUNCE_BUFFER_SIZE);
|
|
if (adj_slen > slen)
|
|
memset(ctx->sbounce + slen, 0, adj_slen - slen);
|
|
memcpy(ctx->sbounce, src, slen);
|
|
src = ctx->sbounce;
|
|
slen = adj_slen;
|
|
pr_debug("using comp sbounce buffer, len %x\n", slen);
|
|
}
|
|
|
|
dst += hdrsize;
|
|
dlen -= hdrsize;
|
|
|
|
if ((u64)dst % c->alignment) {
|
|
dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
|
|
dst += dskip;
|
|
dlen -= dskip;
|
|
}
|
|
if (dlen % c->multiple)
|
|
dlen = round_down(dlen, c->multiple);
|
|
if (dlen < c->minimum) {
|
|
nospc:
|
|
dst = ctx->dbounce;
|
|
dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
|
|
dlen = round_down(dlen, c->multiple);
|
|
dskip = 0;
|
|
pr_debug("using comp dbounce buffer, len %x\n", dlen);
|
|
}
|
|
if (dlen > c->maximum)
|
|
dlen = c->maximum;
|
|
|
|
tmplen = dlen;
|
|
timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
|
|
do {
|
|
dlen = tmplen; /* reset dlen, if we're retrying */
|
|
ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
|
|
/* possibly we should reduce the slen here, instead of
|
|
* retrying with the dbounce buffer?
|
|
*/
|
|
if (ret == -ENOSPC && dst != ctx->dbounce)
|
|
goto nospc;
|
|
} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
|
|
if (ret)
|
|
return ret;
|
|
|
|
dskip += hdrsize;
|
|
|
|
if (dst == ctx->dbounce)
|
|
memcpy(p->out + dskip, dst, dlen);
|
|
|
|
g->padding = cpu_to_be16(dskip);
|
|
g->compressed_length = cpu_to_be32(dlen);
|
|
g->uncompressed_length = cpu_to_be32(slen);
|
|
|
|
if (p->iremain < slen) {
|
|
*ignore = slen - p->iremain;
|
|
slen = p->iremain;
|
|
}
|
|
|
|
pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
|
|
slen, *ignore, dlen, dskip);
|
|
|
|
return update_param(p, slen, dskip + dlen);
|
|
}
|
|
|
|
int nx842_crypto_compress(struct crypto_tfm *tfm,
|
|
const u8 *src, unsigned int slen,
|
|
u8 *dst, unsigned int *dlen)
|
|
{
|
|
struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
|
|
struct nx842_crypto_header *hdr = &ctx->header;
|
|
struct nx842_crypto_param p;
|
|
struct nx842_constraints c = *ctx->driver->constraints;
|
|
unsigned int groups, hdrsize, h;
|
|
int ret, n;
|
|
bool add_header;
|
|
u16 ignore = 0;
|
|
|
|
check_constraints(&c);
|
|
|
|
p.in = (u8 *)src;
|
|
p.iremain = slen;
|
|
p.out = dst;
|
|
p.oremain = *dlen;
|
|
p.ototal = 0;
|
|
|
|
*dlen = 0;
|
|
|
|
groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
|
|
DIV_ROUND_UP(p.iremain, c.maximum));
|
|
hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
|
|
|
|
spin_lock_bh(&ctx->lock);
|
|
|
|
/* skip adding header if the buffers meet all constraints */
|
|
add_header = (p.iremain % c.multiple ||
|
|
p.iremain < c.minimum ||
|
|
p.iremain > c.maximum ||
|
|
(u64)p.in % c.alignment ||
|
|
p.oremain % c.multiple ||
|
|
p.oremain < c.minimum ||
|
|
p.oremain > c.maximum ||
|
|
(u64)p.out % c.alignment);
|
|
|
|
hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
|
|
hdr->groups = 0;
|
|
hdr->ignore = 0;
|
|
|
|
while (p.iremain > 0) {
|
|
n = hdr->groups++;
|
|
ret = -ENOSPC;
|
|
if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
|
|
goto unlock;
|
|
|
|
/* header goes before first group */
|
|
h = !n && add_header ? hdrsize : 0;
|
|
|
|
if (ignore)
|
|
pr_warn("internal error, ignore is set %x\n", ignore);
|
|
|
|
ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
|
|
if (ret)
|
|
goto unlock;
|
|
}
|
|
|
|
if (!add_header && hdr->groups > 1) {
|
|
pr_err("Internal error: No header but multiple groups\n");
|
|
ret = -EINVAL;
|
|
goto unlock;
|
|
}
|
|
|
|
/* ignore indicates the input stream needed to be padded */
|
|
hdr->ignore = cpu_to_be16(ignore);
|
|
if (ignore)
|
|
pr_debug("marked %d bytes as ignore\n", ignore);
|
|
|
|
if (add_header)
|
|
ret = nx842_crypto_add_header(hdr, dst);
|
|
if (ret)
|
|
goto unlock;
|
|
|
|
*dlen = p.ototal;
|
|
|
|
pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
|
|
|
|
unlock:
|
|
spin_unlock_bh(&ctx->lock);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nx842_crypto_compress);
|
|
|
|
static int decompress(struct nx842_crypto_ctx *ctx,
|
|
struct nx842_crypto_param *p,
|
|
struct nx842_crypto_header_group *g,
|
|
struct nx842_constraints *c,
|
|
u16 ignore)
|
|
{
|
|
unsigned int slen = be32_to_cpu(g->compressed_length);
|
|
unsigned int required_len = be32_to_cpu(g->uncompressed_length);
|
|
unsigned int dlen = p->oremain, tmplen;
|
|
unsigned int adj_slen = slen;
|
|
u8 *src = p->in, *dst = p->out;
|
|
u16 padding = be16_to_cpu(g->padding);
|
|
int ret, spadding = 0, dpadding = 0;
|
|
ktime_t timeout;
|
|
|
|
if (!slen || !required_len)
|
|
return -EINVAL;
|
|
|
|
if (p->iremain <= 0 || padding + slen > p->iremain)
|
|
return -EOVERFLOW;
|
|
|
|
if (p->oremain <= 0 || required_len - ignore > p->oremain)
|
|
return -ENOSPC;
|
|
|
|
src += padding;
|
|
|
|
if (slen % c->multiple)
|
|
adj_slen = round_up(slen, c->multiple);
|
|
if (slen < c->minimum)
|
|
adj_slen = c->minimum;
|
|
if (slen > c->maximum)
|
|
goto usesw;
|
|
if (slen < adj_slen || (u64)src % c->alignment) {
|
|
/* we can append padding bytes because the 842 format defines
|
|
* an "end" template (see lib/842/842_decompress.c) and will
|
|
* ignore any bytes following it.
|
|
*/
|
|
if (slen < adj_slen)
|
|
memset(ctx->sbounce + slen, 0, adj_slen - slen);
|
|
memcpy(ctx->sbounce, src, slen);
|
|
src = ctx->sbounce;
|
|
spadding = adj_slen - slen;
|
|
slen = adj_slen;
|
|
pr_debug("using decomp sbounce buffer, len %x\n", slen);
|
|
}
|
|
|
|
if (dlen % c->multiple)
|
|
dlen = round_down(dlen, c->multiple);
|
|
if (dlen < required_len || (u64)dst % c->alignment) {
|
|
dst = ctx->dbounce;
|
|
dlen = min(required_len, BOUNCE_BUFFER_SIZE);
|
|
pr_debug("using decomp dbounce buffer, len %x\n", dlen);
|
|
}
|
|
if (dlen < c->minimum)
|
|
goto usesw;
|
|
if (dlen > c->maximum)
|
|
dlen = c->maximum;
|
|
|
|
tmplen = dlen;
|
|
timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
|
|
do {
|
|
dlen = tmplen; /* reset dlen, if we're retrying */
|
|
ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
|
|
} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
|
|
if (ret) {
|
|
usesw:
|
|
/* reset everything, sw doesn't have constraints */
|
|
src = p->in + padding;
|
|
slen = be32_to_cpu(g->compressed_length);
|
|
spadding = 0;
|
|
dst = p->out;
|
|
dlen = p->oremain;
|
|
dpadding = 0;
|
|
if (dlen < required_len) { /* have ignore bytes */
|
|
dst = ctx->dbounce;
|
|
dlen = BOUNCE_BUFFER_SIZE;
|
|
}
|
|
pr_info_ratelimited("using software 842 decompression\n");
|
|
ret = sw842_decompress(src, slen, dst, &dlen);
|
|
}
|
|
if (ret)
|
|
return ret;
|
|
|
|
slen -= spadding;
|
|
|
|
dlen -= ignore;
|
|
if (ignore)
|
|
pr_debug("ignoring last %x bytes\n", ignore);
|
|
|
|
if (dst == ctx->dbounce)
|
|
memcpy(p->out, dst, dlen);
|
|
|
|
pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
|
|
slen, padding, dlen, ignore);
|
|
|
|
return update_param(p, slen + padding, dlen);
|
|
}
|
|
|
|
int nx842_crypto_decompress(struct crypto_tfm *tfm,
|
|
const u8 *src, unsigned int slen,
|
|
u8 *dst, unsigned int *dlen)
|
|
{
|
|
struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
|
|
struct nx842_crypto_header *hdr;
|
|
struct nx842_crypto_param p;
|
|
struct nx842_constraints c = *ctx->driver->constraints;
|
|
int n, ret, hdr_len;
|
|
u16 ignore = 0;
|
|
|
|
check_constraints(&c);
|
|
|
|
p.in = (u8 *)src;
|
|
p.iremain = slen;
|
|
p.out = dst;
|
|
p.oremain = *dlen;
|
|
p.ototal = 0;
|
|
|
|
*dlen = 0;
|
|
|
|
hdr = (struct nx842_crypto_header *)src;
|
|
|
|
spin_lock_bh(&ctx->lock);
|
|
|
|
/* If it doesn't start with our header magic number, assume it's a raw
|
|
* 842 compressed buffer and pass it directly to the hardware driver
|
|
*/
|
|
if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
|
|
struct nx842_crypto_header_group g = {
|
|
.padding = 0,
|
|
.compressed_length = cpu_to_be32(p.iremain),
|
|
.uncompressed_length = cpu_to_be32(p.oremain),
|
|
};
|
|
|
|
ret = decompress(ctx, &p, &g, &c, 0);
|
|
if (ret)
|
|
goto unlock;
|
|
|
|
goto success;
|
|
}
|
|
|
|
if (!hdr->groups) {
|
|
pr_err("header has no groups\n");
|
|
ret = -EINVAL;
|
|
goto unlock;
|
|
}
|
|
if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
|
|
pr_err("header has too many groups %x, max %x\n",
|
|
hdr->groups, NX842_CRYPTO_GROUP_MAX);
|
|
ret = -EINVAL;
|
|
goto unlock;
|
|
}
|
|
|
|
hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
|
|
if (hdr_len > slen) {
|
|
ret = -EOVERFLOW;
|
|
goto unlock;
|
|
}
|
|
|
|
memcpy(&ctx->header, src, hdr_len);
|
|
hdr = &ctx->header;
|
|
|
|
for (n = 0; n < hdr->groups; n++) {
|
|
/* ignore applies to last group */
|
|
if (n + 1 == hdr->groups)
|
|
ignore = be16_to_cpu(hdr->ignore);
|
|
|
|
ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
|
|
if (ret)
|
|
goto unlock;
|
|
}
|
|
|
|
success:
|
|
*dlen = p.ototal;
|
|
|
|
pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
|
|
|
|
ret = 0;
|
|
|
|
unlock:
|
|
spin_unlock_bh(&ctx->lock);
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
|
|
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
|
|
MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
|