From 8279dd748f9704b811e528b31304e2fab026abc5 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Wed, 6 Jul 2005 13:51:00 -0700 Subject: [PATCH 01/14] [CRYPTO] Don't check for NULL before kfree() Checking a pointer for NULL before calling kfree() on it is redundant. This patch removes such checks from crypto/ Signed-off-by: Jesper Juhl Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/cipher.c | 3 +-- crypto/hmac.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/cipher.c b/crypto/cipher.c index f434ce7c2d0b..69264497b48c 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -336,6 +336,5 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) void crypto_exit_cipher_ops(struct crypto_tfm *tfm) { - if (tfm->crt_cipher.cit_iv) - kfree(tfm->crt_cipher.cit_iv); + kfree(tfm->crt_cipher.cit_iv); } diff --git a/crypto/hmac.c b/crypto/hmac.c index 847df9263e16..da0456b37109 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -49,8 +49,7 @@ int crypto_alloc_hmac_block(struct crypto_tfm *tfm) void crypto_free_hmac_block(struct crypto_tfm *tfm) { - if (tfm->crt_digest.dit_hmac_block) - kfree(tfm->crt_digest.dit_hmac_block); + kfree(tfm->crt_digest.dit_hmac_block); } void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen) From c774e93e2152d0be2612739418689e6e6400f4eb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:51:31 -0700 Subject: [PATCH 02/14] [CRYPTO] Add plumbing for multi-block operations The VIA Padlock device is able to perform much better when multiple blocks are fed to it at once. As this device offers an exceptional throughput rate it is worthwhile to optimise the infrastructure specifically for it. We shift the existing page-sized fast path down to the CBC/ECB functions. We can then replace the CBC/ECB functions with functions provided by the underlying algorithm that performs the multi-block operations. As a side-effect this improves the performance of large cipher operations for all existing algorithm implementations. I've measured the gain to be around 5% for 3DES and 15% for AES. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/cipher.c | 248 +++++++++++++++++++++++++++---------------- crypto/scatterwalk.c | 4 +- crypto/scatterwalk.h | 6 +- 3 files changed, 162 insertions(+), 96 deletions(-) diff --git a/crypto/cipher.c b/crypto/cipher.c index 69264497b48c..c4243345b154 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -4,6 +4,7 @@ * Cipher operations. * * Copyright (c) 2002 James Morris + * Copyright (c) 2005 Herbert Xu * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -22,9 +23,13 @@ #include "internal.h" #include "scatterwalk.h" -typedef void (cryptfn_t)(void *, u8 *, const u8 *); -typedef void (procfn_t)(struct crypto_tfm *, u8 *, - u8*, cryptfn_t, void *); +struct cipher_desc { + struct crypto_tfm *tfm; + void (*crfn)(void *ctx, u8 *dst, const u8 *src); + unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst, + const u8 *src, unsigned int nbytes); + void *info; +}; static inline void xor_64(u8 *a, const u8 *b) { @@ -39,63 +44,57 @@ static inline void xor_128(u8 *a, const u8 *b) ((u32 *)a)[2] ^= ((u32 *)b)[2]; ((u32 *)a)[3] ^= ((u32 *)b)[3]; } - -static inline void *prepare_src(struct scatter_walk *walk, int bsize, - void *tmp, int in_place) -{ - void *src = walk->data; - int n = bsize; - if (unlikely(scatterwalk_across_pages(walk, bsize))) { - src = tmp; - n = scatterwalk_copychunks(src, walk, bsize, 0); - } - scatterwalk_advance(walk, n); - return src; +static unsigned int crypt_slow(const struct cipher_desc *desc, + struct scatter_walk *in, + struct scatter_walk *out, unsigned int bsize) +{ + u8 src[bsize]; + u8 dst[bsize]; + unsigned int n; + + n = scatterwalk_copychunks(src, in, bsize, 0); + scatterwalk_advance(in, n); + + desc->prfn(desc, dst, src, bsize); + + n = scatterwalk_copychunks(dst, out, bsize, 1); + scatterwalk_advance(out, n); + + return bsize; } -static inline void *prepare_dst(struct scatter_walk *walk, int bsize, - void *tmp, int in_place) +static inline unsigned int crypt_fast(const struct cipher_desc *desc, + struct scatter_walk *in, + struct scatter_walk *out, + unsigned int nbytes) { - void *dst = walk->data; + u8 *src, *dst; - if (unlikely(scatterwalk_across_pages(walk, bsize)) || in_place) - dst = tmp; - return dst; -} + src = in->data; + dst = scatterwalk_samebuf(in, out) ? src : out->data; -static inline void complete_src(struct scatter_walk *walk, int bsize, - void *src, int in_place) -{ -} + nbytes = desc->prfn(desc, dst, src, nbytes); -static inline void complete_dst(struct scatter_walk *walk, int bsize, - void *dst, int in_place) -{ - int n = bsize; + scatterwalk_advance(in, nbytes); + scatterwalk_advance(out, nbytes); - if (unlikely(scatterwalk_across_pages(walk, bsize))) - n = scatterwalk_copychunks(dst, walk, bsize, 1); - else if (in_place) - memcpy(walk->data, dst, bsize); - scatterwalk_advance(walk, n); + return nbytes; } /* * Generic encrypt/decrypt wrapper for ciphers, handles operations across * multiple page boundaries by using temporary blocks. In user context, - * the kernel is given a chance to schedule us once per block. + * the kernel is given a chance to schedule us once per page. */ -static int crypt(struct crypto_tfm *tfm, +static int crypt(const struct cipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes, cryptfn_t crfn, - procfn_t prfn, void *info) + unsigned int nbytes) { struct scatter_walk walk_in, walk_out; + struct crypto_tfm *tfm = desc->tfm; const unsigned int bsize = crypto_tfm_alg_blocksize(tfm); - u8 tmp_src[bsize]; - u8 tmp_dst[bsize]; if (!nbytes) return 0; @@ -109,29 +108,20 @@ static int crypt(struct crypto_tfm *tfm, scatterwalk_start(&walk_out, dst); for(;;) { - u8 *src_p, *dst_p; - int in_place; + unsigned int n; scatterwalk_map(&walk_in, 0); scatterwalk_map(&walk_out, 1); - in_place = scatterwalk_samebuf(&walk_in, &walk_out); + n = scatterwalk_clamp(&walk_in, nbytes); + n = scatterwalk_clamp(&walk_out, n); - do { - src_p = prepare_src(&walk_in, bsize, tmp_src, - in_place); - dst_p = prepare_dst(&walk_out, bsize, tmp_dst, - in_place); + if (likely(n >= bsize)) + n = crypt_fast(desc, &walk_in, &walk_out, n); + else + n = crypt_slow(desc, &walk_in, &walk_out, bsize); - prfn(tfm, dst_p, src_p, crfn, info); - - complete_src(&walk_in, bsize, src_p, in_place); - complete_dst(&walk_out, bsize, dst_p, in_place); - - nbytes -= bsize; - } while (nbytes && - !scatterwalk_across_pages(&walk_in, bsize) && - !scatterwalk_across_pages(&walk_out, bsize)); + nbytes -= n; scatterwalk_done(&walk_in, 0, nbytes); scatterwalk_done(&walk_out, 1, nbytes); @@ -143,30 +133,78 @@ static int crypt(struct crypto_tfm *tfm, } } -static void cbc_process_encrypt(struct crypto_tfm *tfm, u8 *dst, u8 *src, - cryptfn_t fn, void *info) +static unsigned int cbc_process_encrypt(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes) { - u8 *iv = info; + struct crypto_tfm *tfm = desc->tfm; + void (*xor)(u8 *, const u8 *) = tfm->crt_u.cipher.cit_xor_block; + int bsize = crypto_tfm_alg_blocksize(tfm); - tfm->crt_u.cipher.cit_xor_block(iv, src); - fn(crypto_tfm_ctx(tfm), dst, iv); - memcpy(iv, dst, crypto_tfm_alg_blocksize(tfm)); + void (*fn)(void *, u8 *, const u8 *) = desc->crfn; + u8 *iv = desc->info; + unsigned int done = 0; + + do { + xor(iv, src); + fn(crypto_tfm_ctx(tfm), dst, iv); + memcpy(iv, dst, bsize); + + src += bsize; + dst += bsize; + } while ((done += bsize) < nbytes); + + return done; } -static void cbc_process_decrypt(struct crypto_tfm *tfm, u8 *dst, u8 *src, - cryptfn_t fn, void *info) +static unsigned int cbc_process_decrypt(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes) { - u8 *iv = info; + struct crypto_tfm *tfm = desc->tfm; + void (*xor)(u8 *, const u8 *) = tfm->crt_u.cipher.cit_xor_block; + int bsize = crypto_tfm_alg_blocksize(tfm); - fn(crypto_tfm_ctx(tfm), dst, src); - tfm->crt_u.cipher.cit_xor_block(dst, iv); - memcpy(iv, src, crypto_tfm_alg_blocksize(tfm)); + u8 stack[src == dst ? bsize : 0]; + u8 *buf = stack; + u8 **dst_p = src == dst ? &buf : &dst; + + void (*fn)(void *, u8 *, const u8 *) = desc->crfn; + u8 *iv = desc->info; + unsigned int done = 0; + + do { + u8 *tmp_dst = *dst_p; + + fn(crypto_tfm_ctx(tfm), tmp_dst, src); + xor(tmp_dst, iv); + memcpy(iv, src, bsize); + if (tmp_dst != dst) + memcpy(dst, tmp_dst, bsize); + + src += bsize; + dst += bsize; + } while ((done += bsize) < nbytes); + + return done; } -static void ecb_process(struct crypto_tfm *tfm, u8 *dst, u8 *src, - cryptfn_t fn, void *info) +static unsigned int ecb_process(const struct cipher_desc *desc, u8 *dst, + const u8 *src, unsigned int nbytes) { - fn(crypto_tfm_ctx(tfm), dst, src); + struct crypto_tfm *tfm = desc->tfm; + int bsize = crypto_tfm_alg_blocksize(tfm); + void (*fn)(void *, u8 *, const u8 *) = desc->crfn; + unsigned int done = 0; + + do { + fn(crypto_tfm_ctx(tfm), dst, src); + + src += bsize; + dst += bsize; + } while ((done += bsize) < nbytes); + + return done; } static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) @@ -185,9 +223,13 @@ static int ecb_encrypt(struct crypto_tfm *tfm, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - return crypt(tfm, dst, src, nbytes, - tfm->__crt_alg->cra_cipher.cia_encrypt, - ecb_process, NULL); + struct cipher_desc desc; + + desc.tfm = tfm; + desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; + desc.prfn = ecb_process; + + return crypt(&desc, dst, src, nbytes); } static int ecb_decrypt(struct crypto_tfm *tfm, @@ -195,9 +237,13 @@ static int ecb_decrypt(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes) { - return crypt(tfm, dst, src, nbytes, - tfm->__crt_alg->cra_cipher.cia_decrypt, - ecb_process, NULL); + struct cipher_desc desc; + + desc.tfm = tfm; + desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; + desc.prfn = ecb_process; + + return crypt(&desc, dst, src, nbytes); } static int cbc_encrypt(struct crypto_tfm *tfm, @@ -205,9 +251,14 @@ static int cbc_encrypt(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes) { - return crypt(tfm, dst, src, nbytes, - tfm->__crt_alg->cra_cipher.cia_encrypt, - cbc_process_encrypt, tfm->crt_cipher.cit_iv); + struct cipher_desc desc; + + desc.tfm = tfm; + desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; + desc.prfn = cbc_process_encrypt; + desc.info = tfm->crt_cipher.cit_iv; + + return crypt(&desc, dst, src, nbytes); } static int cbc_encrypt_iv(struct crypto_tfm *tfm, @@ -215,9 +266,14 @@ static int cbc_encrypt_iv(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes, u8 *iv) { - return crypt(tfm, dst, src, nbytes, - tfm->__crt_alg->cra_cipher.cia_encrypt, - cbc_process_encrypt, iv); + struct cipher_desc desc; + + desc.tfm = tfm; + desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; + desc.prfn = cbc_process_encrypt; + desc.info = iv; + + return crypt(&desc, dst, src, nbytes); } static int cbc_decrypt(struct crypto_tfm *tfm, @@ -225,9 +281,14 @@ static int cbc_decrypt(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes) { - return crypt(tfm, dst, src, nbytes, - tfm->__crt_alg->cra_cipher.cia_decrypt, - cbc_process_decrypt, tfm->crt_cipher.cit_iv); + struct cipher_desc desc; + + desc.tfm = tfm; + desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; + desc.prfn = cbc_process_decrypt; + desc.info = tfm->crt_cipher.cit_iv; + + return crypt(&desc, dst, src, nbytes); } static int cbc_decrypt_iv(struct crypto_tfm *tfm, @@ -235,9 +296,14 @@ static int cbc_decrypt_iv(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes, u8 *iv) { - return crypt(tfm, dst, src, nbytes, - tfm->__crt_alg->cra_cipher.cia_decrypt, - cbc_process_decrypt, iv); + struct cipher_desc desc; + + desc.tfm = tfm; + desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; + desc.prfn = cbc_process_decrypt; + desc.info = iv; + + return crypt(&desc, dst, src, nbytes); } static int nocrypt(struct crypto_tfm *tfm, diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 50c9461e8cc6..47ac90e615f4 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -100,7 +100,7 @@ void scatterwalk_done(struct scatter_walk *walk, int out, int more) int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out) { - do { + while (nbytes > walk->len_this_page) { memcpy_dir(buf, walk->data, walk->len_this_page, out); buf += walk->len_this_page; nbytes -= walk->len_this_page; @@ -108,7 +108,7 @@ int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, scatterwalk_unmap(walk, out); scatterwalk_pagedone(walk, out, 1); scatterwalk_map(walk, out); - } while (nbytes > walk->len_this_page); + } memcpy_dir(buf, walk->data, nbytes, out); return nbytes; diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h index 02aa56c649b4..5495bb970816 100644 --- a/crypto/scatterwalk.h +++ b/crypto/scatterwalk.h @@ -40,10 +40,10 @@ static inline int scatterwalk_samebuf(struct scatter_walk *walk_in, walk_in->offset == walk_out->offset; } -static inline int scatterwalk_across_pages(struct scatter_walk *walk, - unsigned int nbytes) +static inline unsigned int scatterwalk_clamp(struct scatter_walk *walk, + unsigned int nbytes) { - return nbytes > walk->len_this_page; + return nbytes > walk->len_this_page ? walk->len_this_page : nbytes; } static inline void scatterwalk_advance(struct scatter_walk *walk, From 40725181b74be6b0e3bdc8c05bd1e0b9873ec5cc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:51:52 -0700 Subject: [PATCH 03/14] [CRYPTO] Add support for low-level multi-block operations This patch adds hooks for cipher algorithms to implement multi-block ECB/CBC operations directly. This is expected to provide significant performance boots to the VIA Padlock. It could also be used for improving software implementations such as AES where operating on multiple blocks at a time may enable certain optimisations. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/cipher.c | 38 ++++++++++++++++++-------------------- crypto/internal.h | 5 ----- include/linux/crypto.h | 28 +++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/crypto/cipher.c b/crypto/cipher.c index c4243345b154..54c4a560070d 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -23,14 +23,6 @@ #include "internal.h" #include "scatterwalk.h" -struct cipher_desc { - struct crypto_tfm *tfm; - void (*crfn)(void *ctx, u8 *dst, const u8 *src); - unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst, - const u8 *src, unsigned int nbytes); - void *info; -}; - static inline void xor_64(u8 *a, const u8 *b) { ((u32 *)a)[0] ^= ((u32 *)b)[0]; @@ -224,10 +216,11 @@ static int ecb_encrypt(struct crypto_tfm *tfm, struct scatterlist *src, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; - desc.prfn = ecb_process; + desc.crfn = cipher->cia_encrypt; + desc.prfn = cipher->cia_encrypt_ecb ?: ecb_process; return crypt(&desc, dst, src, nbytes); } @@ -238,10 +231,11 @@ static int ecb_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; - desc.prfn = ecb_process; + desc.crfn = cipher->cia_decrypt; + desc.prfn = cipher->cia_decrypt_ecb ?: ecb_process; return crypt(&desc, dst, src, nbytes); } @@ -252,10 +246,11 @@ static int cbc_encrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; - desc.prfn = cbc_process_encrypt; + desc.crfn = cipher->cia_encrypt; + desc.prfn = cipher->cia_encrypt_cbc ?: cbc_process_encrypt; desc.info = tfm->crt_cipher.cit_iv; return crypt(&desc, dst, src, nbytes); @@ -267,10 +262,11 @@ static int cbc_encrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_encrypt; - desc.prfn = cbc_process_encrypt; + desc.crfn = cipher->cia_encrypt; + desc.prfn = cipher->cia_encrypt_cbc ?: cbc_process_encrypt; desc.info = iv; return crypt(&desc, dst, src, nbytes); @@ -282,10 +278,11 @@ static int cbc_decrypt(struct crypto_tfm *tfm, unsigned int nbytes) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; - desc.prfn = cbc_process_decrypt; + desc.crfn = cipher->cia_decrypt; + desc.prfn = cipher->cia_decrypt_cbc ?: cbc_process_decrypt; desc.info = tfm->crt_cipher.cit_iv; return crypt(&desc, dst, src, nbytes); @@ -297,10 +294,11 @@ static int cbc_decrypt_iv(struct crypto_tfm *tfm, unsigned int nbytes, u8 *iv) { struct cipher_desc desc; + struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher; desc.tfm = tfm; - desc.crfn = tfm->__crt_alg->cra_cipher.cia_decrypt; - desc.prfn = cbc_process_decrypt; + desc.crfn = cipher->cia_decrypt; + desc.prfn = cipher->cia_decrypt_cbc ?: cbc_process_decrypt; desc.info = iv; return crypt(&desc, dst, src, nbytes); diff --git a/crypto/internal.h b/crypto/internal.h index 964b9a60ca24..5ed383f7dce6 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -42,11 +42,6 @@ static inline void crypto_yield(struct crypto_tfm *tfm) cond_resched(); } -static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) -{ - return (void *)&tfm[1]; -} - struct crypto_alg *crypto_alg_lookup(const char *name); /* A far more intelligent version of this is planned. For now, just diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 387da6a3e58c..26ce01c25745 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -61,6 +61,15 @@ #define CRYPTO_DIR_DECRYPT 0 struct scatterlist; +struct crypto_tfm; + +struct cipher_desc { + struct crypto_tfm *tfm; + void (*crfn)(void *ctx, u8 *dst, const u8 *src); + unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst, + const u8 *src, unsigned int nbytes); + void *info; +}; /* * Algorithms: modular crypto algorithm implementations, managed @@ -73,6 +82,19 @@ struct cipher_alg { unsigned int keylen, u32 *flags); void (*cia_encrypt)(void *ctx, u8 *dst, const u8 *src); void (*cia_decrypt)(void *ctx, u8 *dst, const u8 *src); + + unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); + unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc, + u8 *dst, const u8 *src, + unsigned int nbytes); }; struct digest_alg { @@ -136,7 +158,6 @@ static inline int crypto_alg_available(const char *name, u32 flags) * and core processing logic. Managed via crypto_alloc_tfm() and * crypto_free_tfm(), as well as the various helpers below. */ -struct crypto_tfm; struct cipher_tfm { void *cit_iv; @@ -266,6 +287,11 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_digest.dia_digestsize; } +static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) +{ + return (void *)&tfm[1]; +} + /* * API wrappers. */ From 95477377995aefa2ec1654a9a3777bd57ea99146 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:52:09 -0700 Subject: [PATCH 04/14] [CRYPTO] Add alignmask for low-level cipher implementations The VIA Padlock device requires the input and output buffers to be aligned on 16-byte boundaries. This patch adds the alignmask attribute for low-level cipher implementations to indicate their alignment requirements. The mid-level crypt() function will copy the input/output buffers if they are not aligned correctly before they are passed to the low-level implementation. Strictly speaking, some of the software implementations require the buffers to be aligned on 4-byte boundaries as they do 32-bit loads. However, it is not clear whether it is better to copy the buffers or pay the penalty for unaligned loads/stores. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 6 ++++++ crypto/cipher.c | 43 +++++++++++++++++++++++++++++++++++------- crypto/scatterwalk.h | 6 ++++++ include/linux/crypto.h | 1 + 4 files changed, 49 insertions(+), 7 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 394169a8577d..f55856b21992 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -168,6 +168,12 @@ int crypto_register_alg(struct crypto_alg *alg) { int ret = 0; struct crypto_alg *q; + + if (alg->cra_alignmask & (alg->cra_alignmask + 1)) + return -EINVAL; + + if (alg->cra_alignmask > PAGE_SIZE) + return -EINVAL; down_write(&crypto_alg_sem); diff --git a/crypto/cipher.c b/crypto/cipher.c index 54c4a560070d..85eb12f8e564 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -41,8 +41,10 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, unsigned int bsize) { - u8 src[bsize]; - u8 dst[bsize]; + unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask; + u8 buffer[bsize * 2 + alignmask]; + u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + u8 *dst = src + bsize; unsigned int n; n = scatterwalk_copychunks(src, in, bsize, 0); @@ -59,15 +61,24 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, static inline unsigned int crypt_fast(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, - unsigned int nbytes) + unsigned int nbytes, u8 *tmp) { u8 *src, *dst; src = in->data; dst = scatterwalk_samebuf(in, out) ? src : out->data; + if (tmp) { + memcpy(tmp, in->data, nbytes); + src = tmp; + dst = tmp; + } + nbytes = desc->prfn(desc, dst, src, nbytes); + if (tmp) + memcpy(out->data, tmp, nbytes); + scatterwalk_advance(in, nbytes); scatterwalk_advance(out, nbytes); @@ -87,6 +98,8 @@ static int crypt(const struct cipher_desc *desc, struct scatter_walk walk_in, walk_out; struct crypto_tfm *tfm = desc->tfm; const unsigned int bsize = crypto_tfm_alg_blocksize(tfm); + unsigned int alignmask = tfm->__crt_alg->cra_alignmask; + unsigned long buffer = 0; if (!nbytes) return 0; @@ -100,16 +113,27 @@ static int crypt(const struct cipher_desc *desc, scatterwalk_start(&walk_out, dst); for(;;) { - unsigned int n; + unsigned int n = nbytes; + u8 *tmp = NULL; + + if (!scatterwalk_aligned(&walk_in, alignmask) || + !scatterwalk_aligned(&walk_out, alignmask)) { + if (!buffer) { + buffer = __get_free_page(GFP_ATOMIC); + if (!buffer) + n = 0; + } + tmp = (u8 *)buffer; + } scatterwalk_map(&walk_in, 0); scatterwalk_map(&walk_out, 1); - n = scatterwalk_clamp(&walk_in, nbytes); + n = scatterwalk_clamp(&walk_in, n); n = scatterwalk_clamp(&walk_out, n); if (likely(n >= bsize)) - n = crypt_fast(desc, &walk_in, &walk_out, n); + n = crypt_fast(desc, &walk_in, &walk_out, n, tmp); else n = crypt_slow(desc, &walk_in, &walk_out, bsize); @@ -119,10 +143,15 @@ static int crypt(const struct cipher_desc *desc, scatterwalk_done(&walk_out, 1, nbytes); if (!nbytes) - return 0; + break; crypto_yield(tfm); } + + if (buffer) + free_page(buffer); + + return 0; } static unsigned int cbc_process_encrypt(const struct cipher_desc *desc, diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h index 5495bb970816..e79925c474a3 100644 --- a/crypto/scatterwalk.h +++ b/crypto/scatterwalk.h @@ -55,6 +55,12 @@ static inline void scatterwalk_advance(struct scatter_walk *walk, walk->len_this_segment -= nbytes; } +static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk, + unsigned int alignmask) +{ + return !(walk->offset & alignmask); +} + void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg); int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out); void scatterwalk_map(struct scatter_walk *walk, int out); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 26ce01c25745..ac9d49beecd3 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -124,6 +124,7 @@ struct crypto_alg { u32 cra_flags; unsigned int cra_blocksize; unsigned int cra_ctxsize; + unsigned int cra_alignmask; const char cra_name[CRYPTO_MAX_ALG_NAME]; union { From 6789b2dc455b90efc9c88886c9366adc9abb7347 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:52:27 -0700 Subject: [PATCH 05/14] [PADLOCK] Move fast path work into aes_set_key and upper layer Most of the work done aes_padlock can be done in aes_set_key. This means that we only have to do it once when the key changes rather than every time we perform an encryption or decryption. This patch also sets cra_alignmask to let the upper layer ensure that the buffers fed to us are aligned correctly. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/crypto/padlock-aes.c | 102 ++++++++++++++--------------------- drivers/crypto/padlock.h | 22 ++++---- 2 files changed, 52 insertions(+), 72 deletions(-) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index ed708b4427b0..5f28909d4012 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include "padlock.h" @@ -59,8 +60,12 @@ #define AES_EXTENDED_KEY_SIZE_B (AES_EXTENDED_KEY_SIZE * sizeof(uint32_t)) struct aes_ctx { - uint32_t e_data[AES_EXTENDED_KEY_SIZE+4]; - uint32_t d_data[AES_EXTENDED_KEY_SIZE+4]; + uint32_t e_data[AES_EXTENDED_KEY_SIZE]; + uint32_t d_data[AES_EXTENDED_KEY_SIZE]; + struct { + struct cword encrypt; + struct cword decrypt; + } cword; uint32_t *E; uint32_t *D; int key_length; @@ -280,10 +285,15 @@ aes_hw_extkey_available(uint8_t key_len) return 0; } +static inline struct aes_ctx *aes_ctx(void *ctx) +{ + return (struct aes_ctx *)ALIGN((unsigned long)ctx, PADLOCK_ALIGNMENT); +} + static int aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t *flags) { - struct aes_ctx *ctx = ctx_arg; + struct aes_ctx *ctx = aes_ctx(ctx_arg); uint32_t i, t, u, v, w; uint32_t P[AES_EXTENDED_KEY_SIZE]; uint32_t rounds; @@ -295,25 +305,36 @@ aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t ctx->key_length = key_len; + /* + * If the hardware is capable of generating the extended key + * itself we must supply the plain key for both encryption + * and decryption. + */ ctx->E = ctx->e_data; - ctx->D = ctx->d_data; - - /* Ensure 16-Bytes alignmentation of keys for VIA PadLock. */ - if ((int)(ctx->e_data) & 0x0F) - ctx->E += 4 - (((int)(ctx->e_data) & 0x0F) / sizeof (ctx->e_data[0])); - - if ((int)(ctx->d_data) & 0x0F) - ctx->D += 4 - (((int)(ctx->d_data) & 0x0F) / sizeof (ctx->d_data[0])); + ctx->D = ctx->e_data; E_KEY[0] = uint32_t_in (in_key); E_KEY[1] = uint32_t_in (in_key + 4); E_KEY[2] = uint32_t_in (in_key + 8); E_KEY[3] = uint32_t_in (in_key + 12); + /* Prepare control words. */ + memset(&ctx->cword, 0, sizeof(ctx->cword)); + + ctx->cword.decrypt.encdec = 1; + ctx->cword.encrypt.rounds = 10 + (key_len - 16) / 4; + ctx->cword.decrypt.rounds = ctx->cword.encrypt.rounds; + ctx->cword.encrypt.ksize = (key_len - 16) / 8; + ctx->cword.decrypt.ksize = ctx->cword.encrypt.ksize; + /* Don't generate extended keys if the hardware can do it. */ if (aes_hw_extkey_available(key_len)) return 0; + ctx->D = ctx->d_data; + ctx->cword.encrypt.keygen = 1; + ctx->cword.decrypt.keygen = 1; + switch (key_len) { case 16: t = E_KEY[3]; @@ -370,9 +391,8 @@ aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t /* ====== Encryption/decryption routines ====== */ /* This is the real call to PadLock. */ -static inline void -padlock_xcrypt_ecb(uint8_t *input, uint8_t *output, uint8_t *key, - void *control_word, uint32_t count) +static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, + void *control_word, u32 count) { asm volatile ("pushfl; popfl"); /* enforce key reload. */ asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ @@ -380,67 +400,27 @@ padlock_xcrypt_ecb(uint8_t *input, uint8_t *output, uint8_t *key, : "d"(control_word), "b"(key), "c"(count)); } -static void -aes_padlock(void *ctx_arg, uint8_t *out_arg, const uint8_t *in_arg, int encdec) -{ - /* Don't blindly modify this structure - the items must - fit on 16-Bytes boundaries! */ - struct padlock_xcrypt_data { - uint8_t buf[AES_BLOCK_SIZE]; - union cword cword; - }; - - struct aes_ctx *ctx = ctx_arg; - char bigbuf[sizeof(struct padlock_xcrypt_data) + 16]; - struct padlock_xcrypt_data *data; - void *key; - - /* Place 'data' at the first 16-Bytes aligned address in 'bigbuf'. */ - if (((long)bigbuf) & 0x0F) - data = (void*)(bigbuf + 16 - ((long)bigbuf & 0x0F)); - else - data = (void*)bigbuf; - - /* Prepare Control word. */ - memset (data, 0, sizeof(struct padlock_xcrypt_data)); - data->cword.b.encdec = !encdec; /* in the rest of cryptoapi ENC=1/DEC=0 */ - data->cword.b.rounds = 10 + (ctx->key_length - 16) / 4; - data->cword.b.ksize = (ctx->key_length - 16) / 8; - - /* Is the hardware capable to generate the extended key? */ - if (!aes_hw_extkey_available(ctx->key_length)) - data->cword.b.keygen = 1; - - /* ctx->E starts with a plain key - if the hardware is capable - to generate the extended key itself we must supply - the plain key for both Encryption and Decryption. */ - if (encdec == CRYPTO_DIR_ENCRYPT || data->cword.b.keygen == 0) - key = ctx->E; - else - key = ctx->D; - - memcpy(data->buf, in_arg, AES_BLOCK_SIZE); - padlock_xcrypt_ecb(data->buf, data->buf, key, &data->cword, 1); - memcpy(out_arg, data->buf, AES_BLOCK_SIZE); -} - static void aes_encrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) { - aes_padlock(ctx_arg, out, in, CRYPTO_DIR_ENCRYPT); + struct aes_ctx *ctx = aes_ctx(ctx_arg); + padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, 1); } static void aes_decrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) { - aes_padlock(ctx_arg, out, in, CRYPTO_DIR_DECRYPT); + struct aes_ctx *ctx = aes_ctx(ctx_arg); + padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1); } static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aes_ctx), + .cra_ctxsize = sizeof(struct aes_ctx) + + PADLOCK_ALIGNMENT, + .cra_alignmask = PADLOCK_ALIGNMENT - 1, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), .cra_u = { diff --git a/drivers/crypto/padlock.h b/drivers/crypto/padlock.h index 7a500605e449..3cf2b7a12348 100644 --- a/drivers/crypto/padlock.h +++ b/drivers/crypto/padlock.h @@ -13,18 +13,18 @@ #ifndef _CRYPTO_PADLOCK_H #define _CRYPTO_PADLOCK_H +#define PADLOCK_ALIGNMENT 16 + /* Control word. */ -union cword { - uint32_t cword[4]; - struct { - int rounds:4; - int algo:3; - int keygen:1; - int interm:1; - int encdec:1; - int ksize:2; - } b; -}; +struct cword { + int __attribute__ ((__packed__)) + rounds:4, + algo:3, + keygen:1, + interm:1, + encdec:1, + ksize:2; +} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT))); #define PFX "padlock: " From 28e8c3ad9464de54a632f00ab3df88fa5f4652d1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:52:43 -0700 Subject: [PATCH 06/14] [PADLOCK] Implement multi-block operations By operating on multiple blocks at once, we expect to extract more performance out of the VIA Padlock. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/crypto/padlock-aes.c | 55 ++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 5f28909d4012..d2745ff4699c 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -390,7 +390,7 @@ aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t /* ====== Encryption/decryption routines ====== */ -/* This is the real call to PadLock. */ +/* These are the real call to PadLock. */ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, void *control_word, u32 count) { @@ -400,6 +400,17 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, : "d"(control_word), "b"(key), "c"(count)); } +static inline void padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, + u8 *iv, void *control_word, u32 count) +{ + /* Enforce key reload. */ + asm volatile ("pushfl; popfl"); + /* rep xcryptcbc */ + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" + : "+S" (input), "+D" (output), "+a" (iv) + : "d" (control_word), "b" (key), "c" (count)); +} + static void aes_encrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) { @@ -414,6 +425,42 @@ aes_decrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1); } +static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out, + const u8 *in, unsigned int nbytes) +{ + struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); + padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, + nbytes / AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); +} + +static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out, + const u8 *in, unsigned int nbytes) +{ + struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); + padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, + nbytes / AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); +} + +static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out, + const u8 *in, unsigned int nbytes) +{ + struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); + padlock_xcrypt_cbc(in, out, ctx->E, desc->info, &ctx->cword.encrypt, + nbytes / AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); +} + +static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out, + const u8 *in, unsigned int nbytes) +{ + struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); + padlock_xcrypt_cbc(in, out, ctx->D, desc->info, &ctx->cword.decrypt, + nbytes / AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); +} + static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, @@ -429,7 +476,11 @@ static struct crypto_alg aes_alg = { .cia_max_keysize = AES_MAX_KEY_SIZE, .cia_setkey = aes_set_key, .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt + .cia_decrypt = aes_decrypt, + .cia_encrypt_ecb = aes_encrypt_ecb, + .cia_decrypt_ecb = aes_decrypt_ecb, + .cia_encrypt_cbc = aes_encrypt_cbc, + .cia_decrypt_cbc = aes_decrypt_cbc, } } }; From 176c3652c544b6f8d4bb1984c58c10080f45dbf0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Jul 2005 13:53:09 -0700 Subject: [PATCH 07/14] [CRYPTO] Make crypto_alg_lookup static This patch makes a needlessly global function static. Signed-off-by: Adrian Bunk Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 10 +++++++++- crypto/internal.h | 10 ---------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index f55856b21992..0b583d24f7fa 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -33,7 +34,7 @@ static inline void crypto_alg_put(struct crypto_alg *alg) module_put(alg->cra_module); } -struct crypto_alg *crypto_alg_lookup(const char *name) +static struct crypto_alg *crypto_alg_lookup(const char *name) { struct crypto_alg *q, *alg = NULL; @@ -54,6 +55,13 @@ struct crypto_alg *crypto_alg_lookup(const char *name) return alg; } +/* A far more intelligent version of this is planned. For now, just + * try an exact match on the name of the algorithm. */ +static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name) +{ + return try_then_request_module(crypto_alg_lookup(name), name); +} + static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags) { tfm->crt_flags = 0; diff --git a/crypto/internal.h b/crypto/internal.h index 5ed383f7dce6..83b1b6d6d92b 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -16,7 +16,6 @@ #include #include #include -#include #include extern enum km_type crypto_km_types[]; @@ -42,15 +41,6 @@ static inline void crypto_yield(struct crypto_tfm *tfm) cond_resched(); } -struct crypto_alg *crypto_alg_lookup(const char *name); - -/* A far more intelligent version of this is planned. For now, just - * try an exact match on the name of the algorithm. */ -static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name) -{ - return try_then_request_module(crypto_alg_lookup(name), name); -} - #ifdef CONFIG_CRYPTO_HMAC int crypto_alloc_hmac_block(struct crypto_tfm *tfm); void crypto_free_hmac_block(struct crypto_tfm *tfm); From fbdae9f3e7fb57c07cb0d973f113eb25da2e8ff2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:53:29 -0700 Subject: [PATCH 08/14] [CRYPTO] Ensure cit_iv is aligned correctly This patch ensures that cit_iv is aligned according to cra_alignmask by allocating it as part of the tfm structure. As a side effect the crypto layer will also guarantee that the tfm ctx area has enough space to be aligned by cra_alignmask. This allows us to remove the extra space reservation from the Padlock driver. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 32 +++++++++++++++++++++++++++++--- crypto/cipher.c | 15 +++++++++------ crypto/internal.h | 28 ++++++++++++++++++++++++++++ drivers/crypto/padlock-aes.c | 3 +-- include/linux/crypto.h | 5 +++++ 5 files changed, 72 insertions(+), 11 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 0b583d24f7fa..2d8d828c0ca2 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -125,20 +125,46 @@ static void crypto_exit_ops(struct crypto_tfm *tfm) } } +static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags) +{ + unsigned int len; + + switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + default: + BUG(); + + case CRYPTO_ALG_TYPE_CIPHER: + len = crypto_cipher_ctxsize(alg, flags); + break; + + case CRYPTO_ALG_TYPE_DIGEST: + len = crypto_digest_ctxsize(alg, flags); + break; + + case CRYPTO_ALG_TYPE_COMPRESS: + len = crypto_compress_ctxsize(alg, flags); + break; + } + + return len + alg->cra_alignmask; +} + struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) { struct crypto_tfm *tfm = NULL; struct crypto_alg *alg; + unsigned int tfm_size; alg = crypto_alg_mod_lookup(name); if (alg == NULL) goto out; - - tfm = kmalloc(sizeof(*tfm) + alg->cra_ctxsize, GFP_KERNEL); + + tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags); + tfm = kmalloc(tfm_size, GFP_KERNEL); if (tfm == NULL) goto out_put; - memset(tfm, 0, sizeof(*tfm) + alg->cra_ctxsize); + memset(tfm, 0, tfm_size); tfm->__crt_alg = alg; diff --git a/crypto/cipher.c b/crypto/cipher.c index 85eb12f8e564..d3295ce14a57 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -41,7 +41,7 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, unsigned int bsize) { - unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask; + unsigned int alignmask = crypto_tfm_alg_alignmask(desc->tfm); u8 buffer[bsize * 2 + alignmask]; u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); u8 *dst = src + bsize; @@ -98,7 +98,7 @@ static int crypt(const struct cipher_desc *desc, struct scatter_walk walk_in, walk_out; struct crypto_tfm *tfm = desc->tfm; const unsigned int bsize = crypto_tfm_alg_blocksize(tfm); - unsigned int alignmask = tfm->__crt_alg->cra_alignmask; + unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); unsigned long buffer = 0; if (!nbytes) @@ -399,6 +399,8 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) } if (ops->cit_mode == CRYPTO_TFM_MODE_CBC) { + unsigned int align; + unsigned long addr; switch (crypto_tfm_alg_blocksize(tfm)) { case 8: @@ -418,9 +420,11 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) } ops->cit_ivsize = crypto_tfm_alg_blocksize(tfm); - ops->cit_iv = kmalloc(ops->cit_ivsize, GFP_KERNEL); - if (ops->cit_iv == NULL) - ret = -ENOMEM; + align = crypto_tfm_alg_alignmask(tfm) + 1; + addr = (unsigned long)crypto_tfm_ctx(tfm); + addr = ALIGN(addr, align); + addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align); + ops->cit_iv = (void *)addr; } out: @@ -429,5 +433,4 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) void crypto_exit_cipher_ops(struct crypto_tfm *tfm) { - kfree(tfm->crt_cipher.cit_iv); } diff --git a/crypto/internal.h b/crypto/internal.h index 83b1b6d6d92b..68612874b5fd 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -16,6 +16,7 @@ #include #include #include +#include #include extern enum km_type crypto_km_types[]; @@ -61,6 +62,33 @@ static inline void crypto_init_proc(void) { } #endif +static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg, + int flags) +{ + return alg->cra_ctxsize; +} + +static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg, + int flags) +{ + unsigned int len = alg->cra_ctxsize; + + switch (flags & CRYPTO_TFM_MODE_MASK) { + case CRYPTO_TFM_MODE_CBC: + len = ALIGN(len, alg->cra_alignmask + 1); + len += alg->cra_blocksize; + break; + } + + return len; +} + +static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg, + int flags) +{ + return alg->cra_ctxsize; +} + int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags); diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index d2745ff4699c..c5b58fae95f2 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -465,8 +465,7 @@ static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aes_ctx) + - PADLOCK_ALIGNMENT, + .cra_ctxsize = sizeof(struct aes_ctx), .cra_alignmask = PADLOCK_ALIGNMENT - 1, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ac9d49beecd3..5e2bcc636a02 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -288,6 +288,11 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_digest.dia_digestsize; } +static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_alignmask; +} + static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) { return (void *)&tfm[1]; From 915e8561d559abba1b81934e31e54a3f850fa7bf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:53:47 -0700 Subject: [PATCH 09/14] [CRYPTO] Handle unaligned iv from encrypt_iv/decrypt_iv Even though cit_iv is now always aligned, the user can still supply an unaligned iv through crypto_cipher_encrypt_iv/crypto_cipher_decrypt_iv. This patch will check the alignment of the user-supplied iv and copy it if necessary. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/cipher.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/crypto/cipher.c b/crypto/cipher.c index d3295ce14a57..1c92c6bb138b 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -154,6 +154,31 @@ static int crypt(const struct cipher_desc *desc, return 0; } +static int crypt_iv_unaligned(struct cipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_tfm *tfm = desc->tfm; + unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); + u8 *iv = desc->info; + + if (unlikely(((unsigned long)iv & alignmask))) { + unsigned int ivsize = tfm->crt_cipher.cit_ivsize; + u8 buffer[ivsize + alignmask]; + u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + int err; + + desc->info = memcpy(tmp, iv, ivsize); + err = crypt(desc, dst, src, nbytes); + memcpy(iv, tmp, ivsize); + + return err; + } + + return crypt(desc, dst, src, nbytes); +} + static unsigned int cbc_process_encrypt(const struct cipher_desc *desc, u8 *dst, const u8 *src, unsigned int nbytes) @@ -298,7 +323,7 @@ static int cbc_encrypt_iv(struct crypto_tfm *tfm, desc.prfn = cipher->cia_encrypt_cbc ?: cbc_process_encrypt; desc.info = iv; - return crypt(&desc, dst, src, nbytes); + return crypt_iv_unaligned(&desc, dst, src, nbytes); } static int cbc_decrypt(struct crypto_tfm *tfm, @@ -330,7 +355,7 @@ static int cbc_decrypt_iv(struct crypto_tfm *tfm, desc.prfn = cipher->cia_decrypt_cbc ?: cbc_process_decrypt; desc.info = iv; - return crypt(&desc, dst, src, nbytes); + return crypt_iv_unaligned(&desc, dst, src, nbytes); } static int nocrypt(struct crypto_tfm *tfm, From 476df259cd577e20379b02a7f7ffd086ea925a83 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:54:09 -0700 Subject: [PATCH 10/14] [CRYPTO] Update IV correctly for Padlock CBC encryption When the Padlock does CBC encryption, the memory pointed to by EAX is not updated at all. Instead, it updates the value of EAX by pointing it to the last block in the output. Therefore to maintain the correct semantics we need to copy the IV. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/crypto/padlock-aes.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index c5b58fae95f2..71407c578afe 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -400,8 +400,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, : "d"(control_word), "b"(key), "c"(count)); } -static inline void padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, - u8 *iv, void *control_word, u32 count) +static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, + u8 *iv, void *control_word, u32 count) { /* Enforce key reload. */ asm volatile ("pushfl; popfl"); @@ -409,6 +409,7 @@ static inline void padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" : "+S" (input), "+D" (output), "+a" (iv) : "d" (control_word), "b" (key), "c" (count)); + return iv; } static void @@ -447,8 +448,12 @@ static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out, const u8 *in, unsigned int nbytes) { struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); - padlock_xcrypt_cbc(in, out, ctx->E, desc->info, &ctx->cword.encrypt, - nbytes / AES_BLOCK_SIZE); + u8 *iv; + + iv = padlock_xcrypt_cbc(in, out, ctx->E, desc->info, + &ctx->cword.encrypt, nbytes / AES_BLOCK_SIZE); + memcpy(desc->info, iv, AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); } From a61cc44812ff94793987bf43b70a3d9bc64a6820 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Wed, 6 Jul 2005 13:54:31 -0700 Subject: [PATCH 11/14] [CRYPTO] Add null short circuit to crypto_free_tfm As far as I'm aware there's a general concensus that functions that are responsible for freeing resources should be able to cope with being passed a NULL pointer. This makes sense as it removes the need for all callers to check for NULL, thus elliminating the bugs that happen when some forget (safer to just check centrally in the freeing function) and it also makes for smaller code all over due to the lack of all those NULL checks. This patch makes it safe to pass the crypto_free_tfm() function a NULL pointer. Once this patch is applied we can start removing the NULL checks from the callers. Signed-off-by: Jesper Juhl Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 2d8d828c0ca2..b4728811ce3b 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -13,6 +13,8 @@ * any later version. * */ + +#include #include #include #include @@ -189,8 +191,14 @@ struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) void crypto_free_tfm(struct crypto_tfm *tfm) { - struct crypto_alg *alg = tfm->__crt_alg; - int size = sizeof(*tfm) + alg->cra_ctxsize; + struct crypto_alg *alg; + int size; + + if (unlikely(!tfm)) + return; + + alg = tfm->__crt_alg; + size = sizeof(*tfm) + alg->cra_ctxsize; crypto_exit_ops(tfm); crypto_alg_put(alg); From a2a892a236d03a6e985471a7e57d1c863de144c8 Mon Sep 17 00:00:00 2001 From: Andreas Steinmetz Date: Wed, 6 Jul 2005 13:55:00 -0700 Subject: [PATCH 12/14] [CRYPTO] Add x86_64 asm AES Implementation: =============== The encrypt/decrypt code is based on an x86 implementation I did a while ago which I never published. This unpublished implementation does include an assembler based key schedule and precomputed tables. For simplicity and best acceptance, however, I took Gladman's in-kernel code for table generation and key schedule for the kernel port of my assembler code and modified this code to produce the key schedule as required by my assembler implementation. File locations and Kconfig are kept similar to the i586 AES assembler implementation. It may seem a little bit strange to use 32 bit I/O and registers in the assembler implementation but this gives the best code size. My implementation takes one instruction more per round compared to Gladman's x86 assembler but it doesn't require any stack for local variables or saved registers and it is less serialized than Gladman's code. Note that all comparisons to Gladman's code were done after my code was implemented. I did only use FIPS PUB 197 for the implementation so my implementation is independent work. If anybody has a better assembler solution for x86_64 I'll be pleased to have my code replaced with the better solution. Testing: ======== The implementation passes the in-kernel crypto testing module and I'm running it without any problems on my laptop where it is mainly used for dm-crypt. Microbenchmark: =============== The microbenchmark was done in userspace with similar compile flags as used during kernel compile. Encrypt/decrypt is about 35% faster than the generic C implementation. As the generic C as well as my assembler implementation are both table I don't really expect that there is much room for further improvements though I'll be glad to be corrected here. The key schedule is about 5% slower than the generic C implementation. This is due to the fact that some more work has to be done in the key schedule routine to fit the schedule to the assembler implementation. Code Size: ========== Encrypt and decrypt are together about 2.1 Kbytes smaller than the generic C implementation which is important with regard to L1 cache usage. The key schedule routine is about 100 bytes larger than the generic C implementation. Data Size: ========== There's no difference in data size requirements between the assembler implementation and the generic C implementation. License: ======== Gladmans's code is dual BSD/GPL whereas my assembler code is GPLv2 only (I'm not going to change the license for my code). So I had to change the module license for the x86_64 aes module from 'Dual BSD/GPL' to 'GPL' to reflect the most restrictive license within the module. Signed-off-by: Andreas Steinmetz Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- arch/x86_64/Makefile | 4 +- arch/x86_64/crypto/Makefile | 9 + arch/x86_64/crypto/aes-x86_64-asm.S | 186 ++++++++++++++++ arch/x86_64/crypto/aes.c | 324 ++++++++++++++++++++++++++++ crypto/Kconfig | 22 +- 5 files changed, 543 insertions(+), 2 deletions(-) create mode 100644 arch/x86_64/crypto/Makefile create mode 100644 arch/x86_64/crypto/aes-x86_64-asm.S create mode 100644 arch/x86_64/crypto/aes.c diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 8a73794f9b90..428915697675 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -65,7 +65,9 @@ CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o libs-y += arch/x86_64/lib/ -core-y += arch/x86_64/kernel/ arch/x86_64/mm/ +core-y += arch/x86_64/kernel/ \ + arch/x86_64/mm/ \ + arch/x86_64/crypto/ core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/ drivers-$(CONFIG_PCI) += arch/x86_64/pci/ drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile new file mode 100644 index 000000000000..426d20f4b72e --- /dev/null +++ b/arch/x86_64/crypto/Makefile @@ -0,0 +1,9 @@ +# +# x86_64/crypto/Makefile +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o + +aes-x86_64-y := aes-x86_64-asm.o aes.o diff --git a/arch/x86_64/crypto/aes-x86_64-asm.S b/arch/x86_64/crypto/aes-x86_64-asm.S new file mode 100644 index 000000000000..483cbb23ab8d --- /dev/null +++ b/arch/x86_64/crypto/aes-x86_64-asm.S @@ -0,0 +1,186 @@ +/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64 + * + * Copyright (C) 2005 Andreas Steinmetz, + * + * License: + * This code can be distributed under the terms of the GNU General Public + * License (GPL) Version 2 provided that the above header down to and + * including this sentence is retained in full. + */ + +.extern aes_ft_tab +.extern aes_it_tab +.extern aes_fl_tab +.extern aes_il_tab + +.text + +#define R1 %rax +#define R1E %eax +#define R1X %ax +#define R1H %ah +#define R1L %al +#define R2 %rbx +#define R2E %ebx +#define R2X %bx +#define R2H %bh +#define R2L %bl +#define R3 %rcx +#define R3E %ecx +#define R3X %cx +#define R3H %ch +#define R3L %cl +#define R4 %rdx +#define R4E %edx +#define R4X %dx +#define R4H %dh +#define R4L %dl +#define R5 %rsi +#define R5E %esi +#define R6 %rdi +#define R6E %edi +#define R7 %rbp +#define R7E %ebp +#define R8 %r8 +#define R9 %r9 +#define R10 %r10 +#define R11 %r11 + +#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ + .global FUNC; \ + .type FUNC,@function; \ + .align 8; \ +FUNC: movq r1,r2; \ + movq r3,r4; \ + leaq BASE+52(r8),r9; \ + movq r10,r11; \ + movl (r7),r5 ## E; \ + movl 4(r7),r1 ## E; \ + movl 8(r7),r6 ## E; \ + movl 12(r7),r7 ## E; \ + movl (r8),r10 ## E; \ + xorl -48(r9),r5 ## E; \ + xorl -44(r9),r1 ## E; \ + xorl -40(r9),r6 ## E; \ + xorl -36(r9),r7 ## E; \ + cmpl $24,r10 ## E; \ + jb B128; \ + leaq 32(r9),r9; \ + je B192; \ + leaq 32(r9),r9; + +#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ + movq r1,r2; \ + movq r3,r4; \ + movl r5 ## E,(r9); \ + movl r6 ## E,4(r9); \ + movl r7 ## E,8(r9); \ + movl r8 ## E,12(r9); \ + ret; + +#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ + movzbl r2 ## H,r5 ## E; \ + movzbl r2 ## L,r6 ## E; \ + movl TAB+1024(,r5,4),r5 ## E;\ + movw r4 ## X,r2 ## X; \ + movl TAB(,r6,4),r6 ## E; \ + roll $16,r2 ## E; \ + shrl $16,r4 ## E; \ + movzbl r4 ## H,r7 ## E; \ + movzbl r4 ## L,r4 ## E; \ + xorl OFFSET(r8),ra ## E; \ + xorl OFFSET+4(r8),rb ## E; \ + xorl TAB+3072(,r7,4),r5 ## E;\ + xorl TAB+2048(,r4,4),r6 ## E;\ + movzbl r1 ## L,r7 ## E; \ + movzbl r1 ## H,r4 ## E; \ + movl TAB+1024(,r4,4),r4 ## E;\ + movw r3 ## X,r1 ## X; \ + roll $16,r1 ## E; \ + shrl $16,r3 ## E; \ + xorl TAB(,r7,4),r5 ## E; \ + movzbl r3 ## H,r7 ## E; \ + movzbl r3 ## L,r3 ## E; \ + xorl TAB+3072(,r7,4),r4 ## E;\ + xorl TAB+2048(,r3,4),r5 ## E;\ + movzbl r1 ## H,r7 ## E; \ + movzbl r1 ## L,r3 ## E; \ + shrl $16,r1 ## E; \ + xorl TAB+3072(,r7,4),r6 ## E;\ + movl TAB+2048(,r3,4),r3 ## E;\ + movzbl r1 ## H,r7 ## E; \ + movzbl r1 ## L,r1 ## E; \ + xorl TAB+1024(,r7,4),r6 ## E;\ + xorl TAB(,r1,4),r3 ## E; \ + movzbl r2 ## H,r1 ## E; \ + movzbl r2 ## L,r7 ## E; \ + shrl $16,r2 ## E; \ + xorl TAB+3072(,r1,4),r3 ## E;\ + xorl TAB+2048(,r7,4),r4 ## E;\ + movzbl r2 ## H,r1 ## E; \ + movzbl r2 ## L,r2 ## E; \ + xorl OFFSET+8(r8),rc ## E; \ + xorl OFFSET+12(r8),rd ## E; \ + xorl TAB+1024(,r1,4),r3 ## E;\ + xorl TAB(,r2,4),r4 ## E; + +#define move_regs(r1,r2,r3,r4) \ + movl r3 ## E,r1 ## E; \ + movl r4 ## E,r2 ## E; + +#define entry(FUNC,BASE,B128,B192) \ + prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) + +#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) + +#define encrypt_round(TAB,OFFSET) \ + round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ + move_regs(R1,R2,R5,R6) + +#define encrypt_final(TAB,OFFSET) \ + round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) + +#define decrypt_round(TAB,OFFSET) \ + round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \ + move_regs(R1,R2,R5,R6) + +#define decrypt_final(TAB,OFFSET) \ + round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) + +/* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */ + + entry(aes_encrypt,0,enc128,enc192) + encrypt_round(aes_ft_tab,-96) + encrypt_round(aes_ft_tab,-80) +enc192: encrypt_round(aes_ft_tab,-64) + encrypt_round(aes_ft_tab,-48) +enc128: encrypt_round(aes_ft_tab,-32) + encrypt_round(aes_ft_tab,-16) + encrypt_round(aes_ft_tab, 0) + encrypt_round(aes_ft_tab, 16) + encrypt_round(aes_ft_tab, 32) + encrypt_round(aes_ft_tab, 48) + encrypt_round(aes_ft_tab, 64) + encrypt_round(aes_ft_tab, 80) + encrypt_round(aes_ft_tab, 96) + encrypt_final(aes_fl_tab,112) + return + +/* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */ + + entry(aes_decrypt,240,dec128,dec192) + decrypt_round(aes_it_tab,-96) + decrypt_round(aes_it_tab,-80) +dec192: decrypt_round(aes_it_tab,-64) + decrypt_round(aes_it_tab,-48) +dec128: decrypt_round(aes_it_tab,-32) + decrypt_round(aes_it_tab,-16) + decrypt_round(aes_it_tab, 0) + decrypt_round(aes_it_tab, 16) + decrypt_round(aes_it_tab, 32) + decrypt_round(aes_it_tab, 48) + decrypt_round(aes_it_tab, 64) + decrypt_round(aes_it_tab, 80) + decrypt_round(aes_it_tab, 96) + decrypt_final(aes_il_tab,112) + return diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c new file mode 100644 index 000000000000..2b5c4010ce38 --- /dev/null +++ b/arch/x86_64/crypto/aes.c @@ -0,0 +1,324 @@ +/* + * Cryptographic API. + * + * AES Cipher Algorithm. + * + * Based on Brian Gladman's code. + * + * Linux developers: + * Alexander Kjeldaas + * Herbert Valerio Riedel + * Kyle McMartin + * Adam J. Richter (conversion to 2.5 API). + * Andreas Steinmetz (adapted to x86_64 assembler) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * --------------------------------------------------------------------------- + * Copyright (c) 2002, Dr Brian Gladman , Worcester, UK. + * All rights reserved. + * + * LICENSE TERMS + * + * The free distribution and use of this software in both source and binary + * form is allowed (with or without changes) provided that: + * + * 1. distributions of this source code include the above copyright + * notice, this list of conditions and the following disclaimer; + * + * 2. distributions in binary form include the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other associated materials; + * + * 3. the copyright holder's name is not used to endorse products + * built using this software without specific written permission. + * + * ALTERNATIVELY, provided that this notice is retained in full, this product + * may be distributed under the terms of the GNU General Public License (GPL), + * in which case the provisions of the GPL apply INSTEAD OF those given above. + * + * DISCLAIMER + * + * This software is provided 'as is' with no explicit or implied warranties + * in respect of its properties, including, but not limited to, correctness + * and/or fitness for purpose. + * --------------------------------------------------------------------------- + */ + +/* Some changes from the Gladman version: + s/RIJNDAEL(e_key)/E_KEY/g + s/RIJNDAEL(d_key)/D_KEY/g +*/ + +#include +#include +#include +#include +#include +#include +#include + +#define AES_MIN_KEY_SIZE 16 +#define AES_MAX_KEY_SIZE 32 + +#define AES_BLOCK_SIZE 16 + +/* + * #define byte(x, nr) ((unsigned char)((x) >> (nr*8))) + */ +static inline u8 byte(const u32 x, const unsigned n) +{ + return x >> (n << 3); +} + +#define u32_in(x) le32_to_cpu(*(const __le32 *)(x)) + +struct aes_ctx +{ + u32 key_length; + u32 E[60]; + u32 D[60]; +}; + +#define E_KEY ctx->E +#define D_KEY ctx->D + +static u8 pow_tab[256] __initdata; +static u8 log_tab[256] __initdata; +static u8 sbx_tab[256] __initdata; +static u8 isb_tab[256] __initdata; +static u32 rco_tab[10]; +u32 aes_ft_tab[4][256]; +u32 aes_it_tab[4][256]; + +u32 aes_fl_tab[4][256]; +u32 aes_il_tab[4][256]; + +static inline u8 f_mult(u8 a, u8 b) +{ + u8 aa = log_tab[a], cc = aa + log_tab[b]; + + return pow_tab[cc + (cc < aa ? 1 : 0)]; +} + +#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0) + +#define ls_box(x) \ + (aes_fl_tab[0][byte(x, 0)] ^ \ + aes_fl_tab[1][byte(x, 1)] ^ \ + aes_fl_tab[2][byte(x, 2)] ^ \ + aes_fl_tab[3][byte(x, 3)]) + +static void __init gen_tabs(void) +{ + u32 i, t; + u8 p, q; + + /* log and power tables for GF(2**8) finite field with + 0x011b as modular polynomial - the simplest primitive + root is 0x03, used here to generate the tables */ + + for (i = 0, p = 1; i < 256; ++i) { + pow_tab[i] = (u8)p; + log_tab[p] = (u8)i; + + p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0); + } + + log_tab[1] = 0; + + for (i = 0, p = 1; i < 10; ++i) { + rco_tab[i] = p; + + p = (p << 1) ^ (p & 0x80 ? 0x01b : 0); + } + + for (i = 0; i < 256; ++i) { + p = (i ? pow_tab[255 - log_tab[i]] : 0); + q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2)); + p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2)); + sbx_tab[i] = p; + isb_tab[p] = (u8)i; + } + + for (i = 0; i < 256; ++i) { + p = sbx_tab[i]; + + t = p; + aes_fl_tab[0][i] = t; + aes_fl_tab[1][i] = rol32(t, 8); + aes_fl_tab[2][i] = rol32(t, 16); + aes_fl_tab[3][i] = rol32(t, 24); + + t = ((u32)ff_mult(2, p)) | + ((u32)p << 8) | + ((u32)p << 16) | ((u32)ff_mult(3, p) << 24); + + aes_ft_tab[0][i] = t; + aes_ft_tab[1][i] = rol32(t, 8); + aes_ft_tab[2][i] = rol32(t, 16); + aes_ft_tab[3][i] = rol32(t, 24); + + p = isb_tab[i]; + + t = p; + aes_il_tab[0][i] = t; + aes_il_tab[1][i] = rol32(t, 8); + aes_il_tab[2][i] = rol32(t, 16); + aes_il_tab[3][i] = rol32(t, 24); + + t = ((u32)ff_mult(14, p)) | + ((u32)ff_mult(9, p) << 8) | + ((u32)ff_mult(13, p) << 16) | + ((u32)ff_mult(11, p) << 24); + + aes_it_tab[0][i] = t; + aes_it_tab[1][i] = rol32(t, 8); + aes_it_tab[2][i] = rol32(t, 16); + aes_it_tab[3][i] = rol32(t, 24); + } +} + +#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) + +#define imix_col(y, x) \ + u = star_x(x); \ + v = star_x(u); \ + w = star_x(v); \ + t = w ^ (x); \ + (y) = u ^ v ^ w; \ + (y) ^= ror32(u ^ t, 8) ^ \ + ror32(v ^ t, 16) ^ \ + ror32(t, 24) + +/* initialise the key schedule from the user supplied key */ + +#define loop4(i) \ +{ \ + t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \ + t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \ + t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \ + t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \ + t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \ +} + +#define loop6(i) \ +{ \ + t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \ + t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \ + t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \ + t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \ + t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \ + t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \ + t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \ +} + +#define loop8(i) \ +{ \ + t = ror32(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \ + t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \ + t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \ + t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \ + t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \ + t = E_KEY[8 * i + 4] ^ ls_box(t); \ + E_KEY[8 * i + 12] = t; \ + t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \ + t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \ + t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \ +} + +static int aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, + u32 *flags) +{ + struct aes_ctx *ctx = ctx_arg; + u32 i, j, t, u, v, w; + + if (key_len != 16 && key_len != 24 && key_len != 32) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + ctx->key_length = key_len; + + D_KEY[key_len + 24] = E_KEY[0] = u32_in(in_key); + D_KEY[key_len + 25] = E_KEY[1] = u32_in(in_key + 4); + D_KEY[key_len + 26] = E_KEY[2] = u32_in(in_key + 8); + D_KEY[key_len + 27] = E_KEY[3] = u32_in(in_key + 12); + + switch (key_len) { + case 16: + t = E_KEY[3]; + for (i = 0; i < 10; ++i) + loop4(i); + break; + + case 24: + E_KEY[4] = u32_in(in_key + 16); + t = E_KEY[5] = u32_in(in_key + 20); + for (i = 0; i < 8; ++i) + loop6 (i); + break; + + case 32: + E_KEY[4] = u32_in(in_key + 16); + E_KEY[5] = u32_in(in_key + 20); + E_KEY[6] = u32_in(in_key + 24); + t = E_KEY[7] = u32_in(in_key + 28); + for (i = 0; i < 7; ++i) + loop8(i); + break; + } + + D_KEY[0] = E_KEY[key_len + 24]; + D_KEY[1] = E_KEY[key_len + 25]; + D_KEY[2] = E_KEY[key_len + 26]; + D_KEY[3] = E_KEY[key_len + 27]; + + for (i = 4; i < key_len + 24; ++i) { + j = key_len + 24 - (i & ~3) + (i & 3); + imix_col(D_KEY[j], E_KEY[i]); + } + + return 0; +} + +extern void aes_encrypt(void *ctx_arg, u8 *out, const u8 *in); +extern void aes_decrypt(void *ctx_arg, u8 *out, const u8 *in); + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aes_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int __init aes_init(void) +{ + gen_tabs(); + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); +MODULE_LICENSE("GPL"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 90d6089d60ed..256c0b1fed10 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -146,7 +146,7 @@ config CRYPTO_SERPENT config CRYPTO_AES tristate "AES cipher algorithms" - depends on CRYPTO && !((X86 || UML_X86) && !64BIT) + depends on CRYPTO && !(X86 || UML_X86) help AES cipher algorithms (FIPS-197). AES uses the Rijndael algorithm. @@ -184,6 +184,26 @@ config CRYPTO_AES_586 See for more information. +config CRYPTO_AES_X86_64 + tristate "AES cipher algorithms (x86_64)" + depends on CRYPTO && ((X86 || UML_X86) && 64BIT) + help + AES cipher algorithms (FIPS-197). AES uses the Rijndael + algorithm. + + Rijndael appears to be consistently a very good performer in + both hardware and software across a wide range of computing + environments regardless of its use in feedback or non-feedback + modes. Its key setup time is excellent, and its key agility is + good. Rijndael's very low memory requirements make it very well + suited for restricted-space environments, in which it also + demonstrates excellent performance. Rijndael's operations are + among the easiest to defend against power and timing attacks. + + The AES specifies three key sizes: 128, 192 and 256 bits + + See for more information. + config CRYPTO_CAST5 tristate "CAST5 (CAST-128) cipher algorithm" depends on CRYPTO From a9df3597fec5472d0840fbfdc2a3fac5268f7d08 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:55:21 -0700 Subject: [PATCH 13/14] [CRYPTO] Remove unused iv field from context structure The iv field in des_ctx/des3_ede_ctx/serpent_ctx has never been used. This was noticed by Dag Arne Osvik. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/des.c | 2 -- crypto/serpent.c | 1 - 2 files changed, 3 deletions(-) diff --git a/crypto/des.c b/crypto/des.c index 1c7e6de9356c..fc5d1b61a0e9 100644 --- a/crypto/des.c +++ b/crypto/des.c @@ -38,12 +38,10 @@ #define ROR(d,c,o) ((d) = (d) >> (c) | (d) << (o)) struct des_ctx { - u8 iv[DES_BLOCK_SIZE]; u32 expkey[DES_EXPKEY_WORDS]; }; struct des3_ede_ctx { - u8 iv[DES_BLOCK_SIZE]; u32 expkey[DES3_EDE_EXPKEY_WORDS]; }; diff --git a/crypto/serpent.c b/crypto/serpent.c index 7d152e89016f..3cf2c5067eea 100644 --- a/crypto/serpent.c +++ b/crypto/serpent.c @@ -210,7 +210,6 @@ x4 ^= x2; struct serpent_ctx { - u8 iv[SERPENT_BLOCK_SIZE]; u32 expkey[SERPENT_EXPKEY_WORDS]; }; From e1d5dea1dfbfe484358c40db7f233ed6b5605646 Mon Sep 17 00:00:00 2001 From: Dag Arne Osvik Date: Wed, 6 Jul 2005 13:55:44 -0700 Subject: [PATCH 14/14] [CRYPTO] Add faster DES code from Dag Arne Osvik I've made a new implementation of DES to replace the old one in the kernel. It provides faster encryption on all tested processors apart from the original Pentium, and key setup is many times faster. Speed relative to old kernel implementation Processor des_setkey des_encrypt des3_ede_setkey des3_ede_encrypt Pentium 120Mhz 6.8 0.82 7.2 0.86 Pentium III 1.266Ghz 5.6 1.19 5.8 1.34 Pentium M 1.3Ghz 5.7 1.15 6.0 1.31 Pentium 4 2.266Ghz 5.8 1.24 6.0 1.40 Pentium 4E 3Ghz 5.4 1.27 5.5 1.48 StrongARM 1110 206Mhz 4.3 1.03 4.4 1.14 Athlon XP 2Ghz 7.8 1.44 8.1 1.61 Athlon 64 2Ghz 7.8 1.34 8.3 1.49 Signed-off-by: Dag Arne Osvik Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/des.c | 2001 +++++++++++++++++++++----------------------------- 1 file changed, 853 insertions(+), 1148 deletions(-) diff --git a/crypto/des.c b/crypto/des.c index fc5d1b61a0e9..a3c863dddded 100644 --- a/crypto/des.c +++ b/crypto/des.c @@ -1,18 +1,9 @@ -/* +/* * Cryptographic API. * * DES & Triple DES EDE Cipher Algorithms. * - * Originally released as descore by Dana L. How . - * Modified by Raimar Falke for the Linux-Kernel. - * Derived from Cryptoapi and Nettle implementations, adapted for in-place - * scatterlist interface. Changed LGPL to GPL per section 3 of the LGPL. - * - * Copyright (c) 1992 Dana L. How. - * Copyright (c) Raimar Falke - * Copyright (c) Gisle Sælensminde - * Copyright (C) 2001 Niels Möller. - * Copyright (c) 2002 James Morris + * Copyright (c) 2005 Dag Arne Osvik * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,11 +11,11 @@ * (at your option) any later version. * */ + +#include #include #include -#include #include -#include #include #define DES_KEY_SIZE 8 @@ -35,7 +26,8 @@ #define DES3_EDE_EXPKEY_WORDS (3 * DES_EXPKEY_WORDS) #define DES3_EDE_BLOCK_SIZE DES_BLOCK_SIZE -#define ROR(d,c,o) ((d) = (d) >> (c) | (d) << (o)) +#define ROL(x, r) ((x) = rol32((x), (r))) +#define ROR(x, r) ((x) = ror32((x), (r))) struct des_ctx { u32 expkey[DES_EXPKEY_WORDS]; @@ -45,1145 +37,815 @@ struct des3_ede_ctx { u32 expkey[DES3_EDE_EXPKEY_WORDS]; }; -static const u32 des_keymap[] = { - 0x02080008, 0x02082000, 0x00002008, 0x00000000, - 0x02002000, 0x00080008, 0x02080000, 0x02082008, - 0x00000008, 0x02000000, 0x00082000, 0x00002008, - 0x00082008, 0x02002008, 0x02000008, 0x02080000, - 0x00002000, 0x00082008, 0x00080008, 0x02002000, - 0x02082008, 0x02000008, 0x00000000, 0x00082000, - 0x02000000, 0x00080000, 0x02002008, 0x02080008, - 0x00080000, 0x00002000, 0x02082000, 0x00000008, - 0x00080000, 0x00002000, 0x02000008, 0x02082008, - 0x00002008, 0x02000000, 0x00000000, 0x00082000, - 0x02080008, 0x02002008, 0x02002000, 0x00080008, - 0x02082000, 0x00000008, 0x00080008, 0x02002000, - 0x02082008, 0x00080000, 0x02080000, 0x02000008, - 0x00082000, 0x00002008, 0x02002008, 0x02080000, - 0x00000008, 0x02082000, 0x00082008, 0x00000000, - 0x02000000, 0x02080008, 0x00002000, 0x00082008, +/* Lookup tables for key expansion */ - 0x08000004, 0x00020004, 0x00000000, 0x08020200, - 0x00020004, 0x00000200, 0x08000204, 0x00020000, - 0x00000204, 0x08020204, 0x00020200, 0x08000000, - 0x08000200, 0x08000004, 0x08020000, 0x00020204, - 0x00020000, 0x08000204, 0x08020004, 0x00000000, - 0x00000200, 0x00000004, 0x08020200, 0x08020004, - 0x08020204, 0x08020000, 0x08000000, 0x00000204, - 0x00000004, 0x00020200, 0x00020204, 0x08000200, - 0x00000204, 0x08000000, 0x08000200, 0x00020204, - 0x08020200, 0x00020004, 0x00000000, 0x08000200, - 0x08000000, 0x00000200, 0x08020004, 0x00020000, - 0x00020004, 0x08020204, 0x00020200, 0x00000004, - 0x08020204, 0x00020200, 0x00020000, 0x08000204, - 0x08000004, 0x08020000, 0x00020204, 0x00000000, - 0x00000200, 0x08000004, 0x08000204, 0x08020200, - 0x08020000, 0x00000204, 0x00000004, 0x08020004, - - 0x80040100, 0x01000100, 0x80000000, 0x81040100, - 0x00000000, 0x01040000, 0x81000100, 0x80040000, - 0x01040100, 0x81000000, 0x01000000, 0x80000100, - 0x81000000, 0x80040100, 0x00040000, 0x01000000, - 0x81040000, 0x00040100, 0x00000100, 0x80000000, - 0x00040100, 0x81000100, 0x01040000, 0x00000100, - 0x80000100, 0x00000000, 0x80040000, 0x01040100, - 0x01000100, 0x81040000, 0x81040100, 0x00040000, - 0x81040000, 0x80000100, 0x00040000, 0x81000000, - 0x00040100, 0x01000100, 0x80000000, 0x01040000, - 0x81000100, 0x00000000, 0x00000100, 0x80040000, - 0x00000000, 0x81040000, 0x01040100, 0x00000100, - 0x01000000, 0x81040100, 0x80040100, 0x00040000, - 0x81040100, 0x80000000, 0x01000100, 0x80040100, - 0x80040000, 0x00040100, 0x01040000, 0x81000100, - 0x80000100, 0x01000000, 0x81000000, 0x01040100, - - 0x04010801, 0x00000000, 0x00010800, 0x04010000, - 0x04000001, 0x00000801, 0x04000800, 0x00010800, - 0x00000800, 0x04010001, 0x00000001, 0x04000800, - 0x00010001, 0x04010800, 0x04010000, 0x00000001, - 0x00010000, 0x04000801, 0x04010001, 0x00000800, - 0x00010801, 0x04000000, 0x00000000, 0x00010001, - 0x04000801, 0x00010801, 0x04010800, 0x04000001, - 0x04000000, 0x00010000, 0x00000801, 0x04010801, - 0x00010001, 0x04010800, 0x04000800, 0x00010801, - 0x04010801, 0x00010001, 0x04000001, 0x00000000, - 0x04000000, 0x00000801, 0x00010000, 0x04010001, - 0x00000800, 0x04000000, 0x00010801, 0x04000801, - 0x04010800, 0x00000800, 0x00000000, 0x04000001, - 0x00000001, 0x04010801, 0x00010800, 0x04010000, - 0x04010001, 0x00010000, 0x00000801, 0x04000800, - 0x04000801, 0x00000001, 0x04010000, 0x00010800, - - 0x00000400, 0x00000020, 0x00100020, 0x40100000, - 0x40100420, 0x40000400, 0x00000420, 0x00000000, - 0x00100000, 0x40100020, 0x40000020, 0x00100400, - 0x40000000, 0x00100420, 0x00100400, 0x40000020, - 0x40100020, 0x00000400, 0x40000400, 0x40100420, - 0x00000000, 0x00100020, 0x40100000, 0x00000420, - 0x40100400, 0x40000420, 0x00100420, 0x40000000, - 0x40000420, 0x40100400, 0x00000020, 0x00100000, - 0x40000420, 0x00100400, 0x40100400, 0x40000020, - 0x00000400, 0x00000020, 0x00100000, 0x40100400, - 0x40100020, 0x40000420, 0x00000420, 0x00000000, - 0x00000020, 0x40100000, 0x40000000, 0x00100020, - 0x00000000, 0x40100020, 0x00100020, 0x00000420, - 0x40000020, 0x00000400, 0x40100420, 0x00100000, - 0x00100420, 0x40000000, 0x40000400, 0x40100420, - 0x40100000, 0x00100420, 0x00100400, 0x40000400, - - 0x00800000, 0x00001000, 0x00000040, 0x00801042, - 0x00801002, 0x00800040, 0x00001042, 0x00801000, - 0x00001000, 0x00000002, 0x00800002, 0x00001040, - 0x00800042, 0x00801002, 0x00801040, 0x00000000, - 0x00001040, 0x00800000, 0x00001002, 0x00000042, - 0x00800040, 0x00001042, 0x00000000, 0x00800002, - 0x00000002, 0x00800042, 0x00801042, 0x00001002, - 0x00801000, 0x00000040, 0x00000042, 0x00801040, - 0x00801040, 0x00800042, 0x00001002, 0x00801000, - 0x00001000, 0x00000002, 0x00800002, 0x00800040, - 0x00800000, 0x00001040, 0x00801042, 0x00000000, - 0x00001042, 0x00800000, 0x00000040, 0x00001002, - 0x00800042, 0x00000040, 0x00000000, 0x00801042, - 0x00801002, 0x00801040, 0x00000042, 0x00001000, - 0x00001040, 0x00801002, 0x00800040, 0x00000042, - 0x00000002, 0x00001042, 0x00801000, 0x00800002, - - 0x10400000, 0x00404010, 0x00000010, 0x10400010, - 0x10004000, 0x00400000, 0x10400010, 0x00004010, - 0x00400010, 0x00004000, 0x00404000, 0x10000000, - 0x10404010, 0x10000010, 0x10000000, 0x10404000, - 0x00000000, 0x10004000, 0x00404010, 0x00000010, - 0x10000010, 0x10404010, 0x00004000, 0x10400000, - 0x10404000, 0x00400010, 0x10004010, 0x00404000, - 0x00004010, 0x00000000, 0x00400000, 0x10004010, - 0x00404010, 0x00000010, 0x10000000, 0x00004000, - 0x10000010, 0x10004000, 0x00404000, 0x10400010, - 0x00000000, 0x00404010, 0x00004010, 0x10404000, - 0x10004000, 0x00400000, 0x10404010, 0x10000000, - 0x10004010, 0x10400000, 0x00400000, 0x10404010, - 0x00004000, 0x00400010, 0x10400010, 0x00004010, - 0x00400010, 0x00000000, 0x10404000, 0x10000010, - 0x10400000, 0x10004010, 0x00000010, 0x00404000, - - 0x00208080, 0x00008000, 0x20200000, 0x20208080, - 0x00200000, 0x20008080, 0x20008000, 0x20200000, - 0x20008080, 0x00208080, 0x00208000, 0x20000080, - 0x20200080, 0x00200000, 0x00000000, 0x20008000, - 0x00008000, 0x20000000, 0x00200080, 0x00008080, - 0x20208080, 0x00208000, 0x20000080, 0x00200080, - 0x20000000, 0x00000080, 0x00008080, 0x20208000, - 0x00000080, 0x20200080, 0x20208000, 0x00000000, - 0x00000000, 0x20208080, 0x00200080, 0x20008000, - 0x00208080, 0x00008000, 0x20000080, 0x00200080, - 0x20208000, 0x00000080, 0x00008080, 0x20200000, - 0x20008080, 0x20000000, 0x20200000, 0x00208000, - 0x20208080, 0x00008080, 0x00208000, 0x20200080, - 0x00200000, 0x20000080, 0x20008000, 0x00000000, - 0x00008000, 0x00200000, 0x20200080, 0x00208080, - 0x20000000, 0x20208000, 0x00000080, 0x20008080, +static const u8 pc1[256] = { + 0x00, 0x00, 0x40, 0x04, 0x10, 0x10, 0x50, 0x14, + 0x04, 0x40, 0x44, 0x44, 0x14, 0x50, 0x54, 0x54, + 0x02, 0x02, 0x42, 0x06, 0x12, 0x12, 0x52, 0x16, + 0x06, 0x42, 0x46, 0x46, 0x16, 0x52, 0x56, 0x56, + 0x80, 0x08, 0xc0, 0x0c, 0x90, 0x18, 0xd0, 0x1c, + 0x84, 0x48, 0xc4, 0x4c, 0x94, 0x58, 0xd4, 0x5c, + 0x82, 0x0a, 0xc2, 0x0e, 0x92, 0x1a, 0xd2, 0x1e, + 0x86, 0x4a, 0xc6, 0x4e, 0x96, 0x5a, 0xd6, 0x5e, + 0x20, 0x20, 0x60, 0x24, 0x30, 0x30, 0x70, 0x34, + 0x24, 0x60, 0x64, 0x64, 0x34, 0x70, 0x74, 0x74, + 0x22, 0x22, 0x62, 0x26, 0x32, 0x32, 0x72, 0x36, + 0x26, 0x62, 0x66, 0x66, 0x36, 0x72, 0x76, 0x76, + 0xa0, 0x28, 0xe0, 0x2c, 0xb0, 0x38, 0xf0, 0x3c, + 0xa4, 0x68, 0xe4, 0x6c, 0xb4, 0x78, 0xf4, 0x7c, + 0xa2, 0x2a, 0xe2, 0x2e, 0xb2, 0x3a, 0xf2, 0x3e, + 0xa6, 0x6a, 0xe6, 0x6e, 0xb6, 0x7a, 0xf6, 0x7e, + 0x08, 0x80, 0x48, 0x84, 0x18, 0x90, 0x58, 0x94, + 0x0c, 0xc0, 0x4c, 0xc4, 0x1c, 0xd0, 0x5c, 0xd4, + 0x0a, 0x82, 0x4a, 0x86, 0x1a, 0x92, 0x5a, 0x96, + 0x0e, 0xc2, 0x4e, 0xc6, 0x1e, 0xd2, 0x5e, 0xd6, + 0x88, 0x88, 0xc8, 0x8c, 0x98, 0x98, 0xd8, 0x9c, + 0x8c, 0xc8, 0xcc, 0xcc, 0x9c, 0xd8, 0xdc, 0xdc, + 0x8a, 0x8a, 0xca, 0x8e, 0x9a, 0x9a, 0xda, 0x9e, + 0x8e, 0xca, 0xce, 0xce, 0x9e, 0xda, 0xde, 0xde, + 0x28, 0xa0, 0x68, 0xa4, 0x38, 0xb0, 0x78, 0xb4, + 0x2c, 0xe0, 0x6c, 0xe4, 0x3c, 0xf0, 0x7c, 0xf4, + 0x2a, 0xa2, 0x6a, 0xa6, 0x3a, 0xb2, 0x7a, 0xb6, + 0x2e, 0xe2, 0x6e, 0xe6, 0x3e, 0xf2, 0x7e, 0xf6, + 0xa8, 0xa8, 0xe8, 0xac, 0xb8, 0xb8, 0xf8, 0xbc, + 0xac, 0xe8, 0xec, 0xec, 0xbc, 0xf8, 0xfc, 0xfc, + 0xaa, 0xaa, 0xea, 0xae, 0xba, 0xba, 0xfa, 0xbe, + 0xae, 0xea, 0xee, 0xee, 0xbe, 0xfa, 0xfe, 0xfe }; -static const u8 rotors[] = { - 34, 13, 5, 46, 47, 18, 32, 41, 11, 53, 33, 20, - 14, 36, 30, 24, 49, 2, 15, 37, 42, 50, 0, 21, - 38, 48, 6, 26, 39, 4, 52, 25, 12, 27, 31, 40, - 1, 17, 28, 29, 23, 51, 35, 7, 3, 22, 9, 43, - - 41, 20, 12, 53, 54, 25, 39, 48, 18, 31, 40, 27, - 21, 43, 37, 0, 1, 9, 22, 44, 49, 2, 7, 28, - 45, 55, 13, 33, 46, 11, 6, 32, 19, 34, 38, 47, - 8, 24, 35, 36, 30, 3, 42, 14, 10, 29, 16, 50, - - 55, 34, 26, 38, 11, 39, 53, 5, 32, 45, 54, 41, - 35, 2, 51, 14, 15, 23, 36, 3, 8, 16, 21, 42, - 6, 12, 27, 47, 31, 25, 20, 46, 33, 48, 52, 4, - 22, 7, 49, 50, 44, 17, 1, 28, 24, 43, 30, 9, - - 12, 48, 40, 52, 25, 53, 38, 19, 46, 6, 11, 55, - 49, 16, 10, 28, 29, 37, 50, 17, 22, 30, 35, 1, - 20, 26, 41, 4, 45, 39, 34, 31, 47, 5, 13, 18, - 36, 21, 8, 9, 3, 0, 15, 42, 7, 2, 44, 23, - - 26, 5, 54, 13, 39, 38, 52, 33, 31, 20, 25, 12, - 8, 30, 24, 42, 43, 51, 9, 0, 36, 44, 49, 15, - 34, 40, 55, 18, 6, 53, 48, 45, 4, 19, 27, 32, - 50, 35, 22, 23, 17, 14, 29, 1, 21, 16, 3, 37, - - 40, 19, 11, 27, 53, 52, 13, 47, 45, 34, 39, 26, - 22, 44, 7, 1, 2, 10, 23, 14, 50, 3, 8, 29, - 48, 54, 12, 32, 20, 38, 5, 6, 18, 33, 41, 46, - 9, 49, 36, 37, 0, 28, 43, 15, 35, 30, 17, 51, - - 54, 33, 25, 41, 38, 13, 27, 4, 6, 48, 53, 40, - 36, 3, 21, 15, 16, 24, 37, 28, 9, 17, 22, 43, - 5, 11, 26, 46, 34, 52, 19, 20, 32, 47, 55, 31, - 23, 8, 50, 51, 14, 42, 2, 29, 49, 44, 0, 10, - - 11, 47, 39, 55, 52, 27, 41, 18, 20, 5, 38, 54, - 50, 17, 35, 29, 30, 7, 51, 42, 23, 0, 36, 2, - 19, 25, 40, 31, 48, 13, 33, 34, 46, 4, 12, 45, - 37, 22, 9, 10, 28, 1, 16, 43, 8, 3, 14, 24, - - 18, 54, 46, 5, 6, 34, 48, 25, 27, 12, 45, 4, - 2, 24, 42, 36, 37, 14, 3, 49, 30, 7, 43, 9, - 26, 32, 47, 38, 55, 20, 40, 41, 53, 11, 19, 52, - 44, 29, 16, 17, 35, 8, 23, 50, 15, 10, 21, 0, - - 32, 11, 31, 19, 20, 48, 5, 39, 41, 26, 6, 18, - 16, 7, 1, 50, 51, 28, 17, 8, 44, 21, 2, 23, - 40, 46, 4, 52, 12, 34, 54, 55, 38, 25, 33, 13, - 3, 43, 30, 0, 49, 22, 37, 9, 29, 24, 35, 14, - - 46, 25, 45, 33, 34, 5, 19, 53, 55, 40, 20, 32, - 30, 21, 15, 9, 10, 42, 0, 22, 3, 35, 16, 37, - 54, 31, 18, 13, 26, 48, 11, 12, 52, 39, 47, 27, - 17, 2, 44, 14, 8, 36, 51, 23, 43, 7, 49, 28, - - 31, 39, 6, 47, 48, 19, 33, 38, 12, 54, 34, 46, - 44, 35, 29, 23, 24, 1, 14, 36, 17, 49, 30, 51, - 11, 45, 32, 27, 40, 5, 25, 26, 13, 53, 4, 41, - 0, 16, 3, 28, 22, 50, 10, 37, 2, 21, 8, 42, - - 45, 53, 20, 4, 5, 33, 47, 52, 26, 11, 48, 31, - 3, 49, 43, 37, 7, 15, 28, 50, 0, 8, 44, 10, - 25, 6, 46, 41, 54, 19, 39, 40, 27, 38, 18, 55, - 14, 30, 17, 42, 36, 9, 24, 51, 16, 35, 22, 1, - - 6, 38, 34, 18, 19, 47, 4, 13, 40, 25, 5, 45, - 17, 8, 2, 51, 21, 29, 42, 9, 14, 22, 3, 24, - 39, 20, 31, 55, 11, 33, 53, 54, 41, 52, 32, 12, - 28, 44, 0, 1, 50, 23, 7, 10, 30, 49, 36, 15, - - 20, 52, 48, 32, 33, 4, 18, 27, 54, 39, 19, 6, - 0, 22, 16, 10, 35, 43, 1, 23, 28, 36, 17, 7, - 53, 34, 45, 12, 25, 47, 38, 11, 55, 13, 46, 26, - 42, 3, 14, 15, 9, 37, 21, 24, 44, 8, 50, 29, - - 27, 6, 55, 39, 40, 11, 25, 34, 4, 46, 26, 13, - 7, 29, 23, 17, 42, 50, 8, 30, 35, 43, 24, 14, - 31, 41, 52, 19, 32, 54, 45, 18, 5, 20, 53, 33, - 49, 10, 21, 22, 16, 44, 28, 0, 51, 15, 2, 36, +static const u8 rs[256] = { + 0x00, 0x00, 0x80, 0x80, 0x02, 0x02, 0x82, 0x82, + 0x04, 0x04, 0x84, 0x84, 0x06, 0x06, 0x86, 0x86, + 0x08, 0x08, 0x88, 0x88, 0x0a, 0x0a, 0x8a, 0x8a, + 0x0c, 0x0c, 0x8c, 0x8c, 0x0e, 0x0e, 0x8e, 0x8e, + 0x10, 0x10, 0x90, 0x90, 0x12, 0x12, 0x92, 0x92, + 0x14, 0x14, 0x94, 0x94, 0x16, 0x16, 0x96, 0x96, + 0x18, 0x18, 0x98, 0x98, 0x1a, 0x1a, 0x9a, 0x9a, + 0x1c, 0x1c, 0x9c, 0x9c, 0x1e, 0x1e, 0x9e, 0x9e, + 0x20, 0x20, 0xa0, 0xa0, 0x22, 0x22, 0xa2, 0xa2, + 0x24, 0x24, 0xa4, 0xa4, 0x26, 0x26, 0xa6, 0xa6, + 0x28, 0x28, 0xa8, 0xa8, 0x2a, 0x2a, 0xaa, 0xaa, + 0x2c, 0x2c, 0xac, 0xac, 0x2e, 0x2e, 0xae, 0xae, + 0x30, 0x30, 0xb0, 0xb0, 0x32, 0x32, 0xb2, 0xb2, + 0x34, 0x34, 0xb4, 0xb4, 0x36, 0x36, 0xb6, 0xb6, + 0x38, 0x38, 0xb8, 0xb8, 0x3a, 0x3a, 0xba, 0xba, + 0x3c, 0x3c, 0xbc, 0xbc, 0x3e, 0x3e, 0xbe, 0xbe, + 0x40, 0x40, 0xc0, 0xc0, 0x42, 0x42, 0xc2, 0xc2, + 0x44, 0x44, 0xc4, 0xc4, 0x46, 0x46, 0xc6, 0xc6, + 0x48, 0x48, 0xc8, 0xc8, 0x4a, 0x4a, 0xca, 0xca, + 0x4c, 0x4c, 0xcc, 0xcc, 0x4e, 0x4e, 0xce, 0xce, + 0x50, 0x50, 0xd0, 0xd0, 0x52, 0x52, 0xd2, 0xd2, + 0x54, 0x54, 0xd4, 0xd4, 0x56, 0x56, 0xd6, 0xd6, + 0x58, 0x58, 0xd8, 0xd8, 0x5a, 0x5a, 0xda, 0xda, + 0x5c, 0x5c, 0xdc, 0xdc, 0x5e, 0x5e, 0xde, 0xde, + 0x60, 0x60, 0xe0, 0xe0, 0x62, 0x62, 0xe2, 0xe2, + 0x64, 0x64, 0xe4, 0xe4, 0x66, 0x66, 0xe6, 0xe6, + 0x68, 0x68, 0xe8, 0xe8, 0x6a, 0x6a, 0xea, 0xea, + 0x6c, 0x6c, 0xec, 0xec, 0x6e, 0x6e, 0xee, 0xee, + 0x70, 0x70, 0xf0, 0xf0, 0x72, 0x72, 0xf2, 0xf2, + 0x74, 0x74, 0xf4, 0xf4, 0x76, 0x76, 0xf6, 0xf6, + 0x78, 0x78, 0xf8, 0xf8, 0x7a, 0x7a, 0xfa, 0xfa, + 0x7c, 0x7c, 0xfc, 0xfc, 0x7e, 0x7e, 0xfe, 0xfe }; -static const u8 parity[] = { - 8,1,0,8,0,8,8,0,0,8,8,0,8,0,2,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,3, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 4,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,5,0,8,0,8,8,0,0,8,8,0,8,0,6,8, +static const u32 pc2[1024] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00040000, 0x00000000, 0x04000000, 0x00100000, + 0x00400000, 0x00000008, 0x00000800, 0x40000000, + 0x00440000, 0x00000008, 0x04000800, 0x40100000, + 0x00000400, 0x00000020, 0x08000000, 0x00000100, + 0x00040400, 0x00000020, 0x0c000000, 0x00100100, + 0x00400400, 0x00000028, 0x08000800, 0x40000100, + 0x00440400, 0x00000028, 0x0c000800, 0x40100100, + 0x80000000, 0x00000010, 0x00000000, 0x00800000, + 0x80040000, 0x00000010, 0x04000000, 0x00900000, + 0x80400000, 0x00000018, 0x00000800, 0x40800000, + 0x80440000, 0x00000018, 0x04000800, 0x40900000, + 0x80000400, 0x00000030, 0x08000000, 0x00800100, + 0x80040400, 0x00000030, 0x0c000000, 0x00900100, + 0x80400400, 0x00000038, 0x08000800, 0x40800100, + 0x80440400, 0x00000038, 0x0c000800, 0x40900100, + 0x10000000, 0x00000000, 0x00200000, 0x00001000, + 0x10040000, 0x00000000, 0x04200000, 0x00101000, + 0x10400000, 0x00000008, 0x00200800, 0x40001000, + 0x10440000, 0x00000008, 0x04200800, 0x40101000, + 0x10000400, 0x00000020, 0x08200000, 0x00001100, + 0x10040400, 0x00000020, 0x0c200000, 0x00101100, + 0x10400400, 0x00000028, 0x08200800, 0x40001100, + 0x10440400, 0x00000028, 0x0c200800, 0x40101100, + 0x90000000, 0x00000010, 0x00200000, 0x00801000, + 0x90040000, 0x00000010, 0x04200000, 0x00901000, + 0x90400000, 0x00000018, 0x00200800, 0x40801000, + 0x90440000, 0x00000018, 0x04200800, 0x40901000, + 0x90000400, 0x00000030, 0x08200000, 0x00801100, + 0x90040400, 0x00000030, 0x0c200000, 0x00901100, + 0x90400400, 0x00000038, 0x08200800, 0x40801100, + 0x90440400, 0x00000038, 0x0c200800, 0x40901100, + 0x00000200, 0x00080000, 0x00000000, 0x00000004, + 0x00040200, 0x00080000, 0x04000000, 0x00100004, + 0x00400200, 0x00080008, 0x00000800, 0x40000004, + 0x00440200, 0x00080008, 0x04000800, 0x40100004, + 0x00000600, 0x00080020, 0x08000000, 0x00000104, + 0x00040600, 0x00080020, 0x0c000000, 0x00100104, + 0x00400600, 0x00080028, 0x08000800, 0x40000104, + 0x00440600, 0x00080028, 0x0c000800, 0x40100104, + 0x80000200, 0x00080010, 0x00000000, 0x00800004, + 0x80040200, 0x00080010, 0x04000000, 0x00900004, + 0x80400200, 0x00080018, 0x00000800, 0x40800004, + 0x80440200, 0x00080018, 0x04000800, 0x40900004, + 0x80000600, 0x00080030, 0x08000000, 0x00800104, + 0x80040600, 0x00080030, 0x0c000000, 0x00900104, + 0x80400600, 0x00080038, 0x08000800, 0x40800104, + 0x80440600, 0x00080038, 0x0c000800, 0x40900104, + 0x10000200, 0x00080000, 0x00200000, 0x00001004, + 0x10040200, 0x00080000, 0x04200000, 0x00101004, + 0x10400200, 0x00080008, 0x00200800, 0x40001004, + 0x10440200, 0x00080008, 0x04200800, 0x40101004, + 0x10000600, 0x00080020, 0x08200000, 0x00001104, + 0x10040600, 0x00080020, 0x0c200000, 0x00101104, + 0x10400600, 0x00080028, 0x08200800, 0x40001104, + 0x10440600, 0x00080028, 0x0c200800, 0x40101104, + 0x90000200, 0x00080010, 0x00200000, 0x00801004, + 0x90040200, 0x00080010, 0x04200000, 0x00901004, + 0x90400200, 0x00080018, 0x00200800, 0x40801004, + 0x90440200, 0x00080018, 0x04200800, 0x40901004, + 0x90000600, 0x00080030, 0x08200000, 0x00801104, + 0x90040600, 0x00080030, 0x0c200000, 0x00901104, + 0x90400600, 0x00080038, 0x08200800, 0x40801104, + 0x90440600, 0x00080038, 0x0c200800, 0x40901104, + 0x00000002, 0x00002000, 0x20000000, 0x00000001, + 0x00040002, 0x00002000, 0x24000000, 0x00100001, + 0x00400002, 0x00002008, 0x20000800, 0x40000001, + 0x00440002, 0x00002008, 0x24000800, 0x40100001, + 0x00000402, 0x00002020, 0x28000000, 0x00000101, + 0x00040402, 0x00002020, 0x2c000000, 0x00100101, + 0x00400402, 0x00002028, 0x28000800, 0x40000101, + 0x00440402, 0x00002028, 0x2c000800, 0x40100101, + 0x80000002, 0x00002010, 0x20000000, 0x00800001, + 0x80040002, 0x00002010, 0x24000000, 0x00900001, + 0x80400002, 0x00002018, 0x20000800, 0x40800001, + 0x80440002, 0x00002018, 0x24000800, 0x40900001, + 0x80000402, 0x00002030, 0x28000000, 0x00800101, + 0x80040402, 0x00002030, 0x2c000000, 0x00900101, + 0x80400402, 0x00002038, 0x28000800, 0x40800101, + 0x80440402, 0x00002038, 0x2c000800, 0x40900101, + 0x10000002, 0x00002000, 0x20200000, 0x00001001, + 0x10040002, 0x00002000, 0x24200000, 0x00101001, + 0x10400002, 0x00002008, 0x20200800, 0x40001001, + 0x10440002, 0x00002008, 0x24200800, 0x40101001, + 0x10000402, 0x00002020, 0x28200000, 0x00001101, + 0x10040402, 0x00002020, 0x2c200000, 0x00101101, + 0x10400402, 0x00002028, 0x28200800, 0x40001101, + 0x10440402, 0x00002028, 0x2c200800, 0x40101101, + 0x90000002, 0x00002010, 0x20200000, 0x00801001, + 0x90040002, 0x00002010, 0x24200000, 0x00901001, + 0x90400002, 0x00002018, 0x20200800, 0x40801001, + 0x90440002, 0x00002018, 0x24200800, 0x40901001, + 0x90000402, 0x00002030, 0x28200000, 0x00801101, + 0x90040402, 0x00002030, 0x2c200000, 0x00901101, + 0x90400402, 0x00002038, 0x28200800, 0x40801101, + 0x90440402, 0x00002038, 0x2c200800, 0x40901101, + 0x00000202, 0x00082000, 0x20000000, 0x00000005, + 0x00040202, 0x00082000, 0x24000000, 0x00100005, + 0x00400202, 0x00082008, 0x20000800, 0x40000005, + 0x00440202, 0x00082008, 0x24000800, 0x40100005, + 0x00000602, 0x00082020, 0x28000000, 0x00000105, + 0x00040602, 0x00082020, 0x2c000000, 0x00100105, + 0x00400602, 0x00082028, 0x28000800, 0x40000105, + 0x00440602, 0x00082028, 0x2c000800, 0x40100105, + 0x80000202, 0x00082010, 0x20000000, 0x00800005, + 0x80040202, 0x00082010, 0x24000000, 0x00900005, + 0x80400202, 0x00082018, 0x20000800, 0x40800005, + 0x80440202, 0x00082018, 0x24000800, 0x40900005, + 0x80000602, 0x00082030, 0x28000000, 0x00800105, + 0x80040602, 0x00082030, 0x2c000000, 0x00900105, + 0x80400602, 0x00082038, 0x28000800, 0x40800105, + 0x80440602, 0x00082038, 0x2c000800, 0x40900105, + 0x10000202, 0x00082000, 0x20200000, 0x00001005, + 0x10040202, 0x00082000, 0x24200000, 0x00101005, + 0x10400202, 0x00082008, 0x20200800, 0x40001005, + 0x10440202, 0x00082008, 0x24200800, 0x40101005, + 0x10000602, 0x00082020, 0x28200000, 0x00001105, + 0x10040602, 0x00082020, 0x2c200000, 0x00101105, + 0x10400602, 0x00082028, 0x28200800, 0x40001105, + 0x10440602, 0x00082028, 0x2c200800, 0x40101105, + 0x90000202, 0x00082010, 0x20200000, 0x00801005, + 0x90040202, 0x00082010, 0x24200000, 0x00901005, + 0x90400202, 0x00082018, 0x20200800, 0x40801005, + 0x90440202, 0x00082018, 0x24200800, 0x40901005, + 0x90000602, 0x00082030, 0x28200000, 0x00801105, + 0x90040602, 0x00082030, 0x2c200000, 0x00901105, + 0x90400602, 0x00082038, 0x28200800, 0x40801105, + 0x90440602, 0x00082038, 0x2c200800, 0x40901105, + + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000008, 0x00080000, 0x10000000, + 0x02000000, 0x00000000, 0x00000080, 0x00001000, + 0x02000000, 0x00000008, 0x00080080, 0x10001000, + 0x00004000, 0x00000000, 0x00000040, 0x00040000, + 0x00004000, 0x00000008, 0x00080040, 0x10040000, + 0x02004000, 0x00000000, 0x000000c0, 0x00041000, + 0x02004000, 0x00000008, 0x000800c0, 0x10041000, + 0x00020000, 0x00008000, 0x08000000, 0x00200000, + 0x00020000, 0x00008008, 0x08080000, 0x10200000, + 0x02020000, 0x00008000, 0x08000080, 0x00201000, + 0x02020000, 0x00008008, 0x08080080, 0x10201000, + 0x00024000, 0x00008000, 0x08000040, 0x00240000, + 0x00024000, 0x00008008, 0x08080040, 0x10240000, + 0x02024000, 0x00008000, 0x080000c0, 0x00241000, + 0x02024000, 0x00008008, 0x080800c0, 0x10241000, + 0x00000000, 0x01000000, 0x00002000, 0x00000020, + 0x00000000, 0x01000008, 0x00082000, 0x10000020, + 0x02000000, 0x01000000, 0x00002080, 0x00001020, + 0x02000000, 0x01000008, 0x00082080, 0x10001020, + 0x00004000, 0x01000000, 0x00002040, 0x00040020, + 0x00004000, 0x01000008, 0x00082040, 0x10040020, + 0x02004000, 0x01000000, 0x000020c0, 0x00041020, + 0x02004000, 0x01000008, 0x000820c0, 0x10041020, + 0x00020000, 0x01008000, 0x08002000, 0x00200020, + 0x00020000, 0x01008008, 0x08082000, 0x10200020, + 0x02020000, 0x01008000, 0x08002080, 0x00201020, + 0x02020000, 0x01008008, 0x08082080, 0x10201020, + 0x00024000, 0x01008000, 0x08002040, 0x00240020, + 0x00024000, 0x01008008, 0x08082040, 0x10240020, + 0x02024000, 0x01008000, 0x080020c0, 0x00241020, + 0x02024000, 0x01008008, 0x080820c0, 0x10241020, + 0x00000400, 0x04000000, 0x00100000, 0x00000004, + 0x00000400, 0x04000008, 0x00180000, 0x10000004, + 0x02000400, 0x04000000, 0x00100080, 0x00001004, + 0x02000400, 0x04000008, 0x00180080, 0x10001004, + 0x00004400, 0x04000000, 0x00100040, 0x00040004, + 0x00004400, 0x04000008, 0x00180040, 0x10040004, + 0x02004400, 0x04000000, 0x001000c0, 0x00041004, + 0x02004400, 0x04000008, 0x001800c0, 0x10041004, + 0x00020400, 0x04008000, 0x08100000, 0x00200004, + 0x00020400, 0x04008008, 0x08180000, 0x10200004, + 0x02020400, 0x04008000, 0x08100080, 0x00201004, + 0x02020400, 0x04008008, 0x08180080, 0x10201004, + 0x00024400, 0x04008000, 0x08100040, 0x00240004, + 0x00024400, 0x04008008, 0x08180040, 0x10240004, + 0x02024400, 0x04008000, 0x081000c0, 0x00241004, + 0x02024400, 0x04008008, 0x081800c0, 0x10241004, + 0x00000400, 0x05000000, 0x00102000, 0x00000024, + 0x00000400, 0x05000008, 0x00182000, 0x10000024, + 0x02000400, 0x05000000, 0x00102080, 0x00001024, + 0x02000400, 0x05000008, 0x00182080, 0x10001024, + 0x00004400, 0x05000000, 0x00102040, 0x00040024, + 0x00004400, 0x05000008, 0x00182040, 0x10040024, + 0x02004400, 0x05000000, 0x001020c0, 0x00041024, + 0x02004400, 0x05000008, 0x001820c0, 0x10041024, + 0x00020400, 0x05008000, 0x08102000, 0x00200024, + 0x00020400, 0x05008008, 0x08182000, 0x10200024, + 0x02020400, 0x05008000, 0x08102080, 0x00201024, + 0x02020400, 0x05008008, 0x08182080, 0x10201024, + 0x00024400, 0x05008000, 0x08102040, 0x00240024, + 0x00024400, 0x05008008, 0x08182040, 0x10240024, + 0x02024400, 0x05008000, 0x081020c0, 0x00241024, + 0x02024400, 0x05008008, 0x081820c0, 0x10241024, + 0x00000800, 0x00010000, 0x20000000, 0x00000010, + 0x00000800, 0x00010008, 0x20080000, 0x10000010, + 0x02000800, 0x00010000, 0x20000080, 0x00001010, + 0x02000800, 0x00010008, 0x20080080, 0x10001010, + 0x00004800, 0x00010000, 0x20000040, 0x00040010, + 0x00004800, 0x00010008, 0x20080040, 0x10040010, + 0x02004800, 0x00010000, 0x200000c0, 0x00041010, + 0x02004800, 0x00010008, 0x200800c0, 0x10041010, + 0x00020800, 0x00018000, 0x28000000, 0x00200010, + 0x00020800, 0x00018008, 0x28080000, 0x10200010, + 0x02020800, 0x00018000, 0x28000080, 0x00201010, + 0x02020800, 0x00018008, 0x28080080, 0x10201010, + 0x00024800, 0x00018000, 0x28000040, 0x00240010, + 0x00024800, 0x00018008, 0x28080040, 0x10240010, + 0x02024800, 0x00018000, 0x280000c0, 0x00241010, + 0x02024800, 0x00018008, 0x280800c0, 0x10241010, + 0x00000800, 0x01010000, 0x20002000, 0x00000030, + 0x00000800, 0x01010008, 0x20082000, 0x10000030, + 0x02000800, 0x01010000, 0x20002080, 0x00001030, + 0x02000800, 0x01010008, 0x20082080, 0x10001030, + 0x00004800, 0x01010000, 0x20002040, 0x00040030, + 0x00004800, 0x01010008, 0x20082040, 0x10040030, + 0x02004800, 0x01010000, 0x200020c0, 0x00041030, + 0x02004800, 0x01010008, 0x200820c0, 0x10041030, + 0x00020800, 0x01018000, 0x28002000, 0x00200030, + 0x00020800, 0x01018008, 0x28082000, 0x10200030, + 0x02020800, 0x01018000, 0x28002080, 0x00201030, + 0x02020800, 0x01018008, 0x28082080, 0x10201030, + 0x00024800, 0x01018000, 0x28002040, 0x00240030, + 0x00024800, 0x01018008, 0x28082040, 0x10240030, + 0x02024800, 0x01018000, 0x280020c0, 0x00241030, + 0x02024800, 0x01018008, 0x280820c0, 0x10241030, + 0x00000c00, 0x04010000, 0x20100000, 0x00000014, + 0x00000c00, 0x04010008, 0x20180000, 0x10000014, + 0x02000c00, 0x04010000, 0x20100080, 0x00001014, + 0x02000c00, 0x04010008, 0x20180080, 0x10001014, + 0x00004c00, 0x04010000, 0x20100040, 0x00040014, + 0x00004c00, 0x04010008, 0x20180040, 0x10040014, + 0x02004c00, 0x04010000, 0x201000c0, 0x00041014, + 0x02004c00, 0x04010008, 0x201800c0, 0x10041014, + 0x00020c00, 0x04018000, 0x28100000, 0x00200014, + 0x00020c00, 0x04018008, 0x28180000, 0x10200014, + 0x02020c00, 0x04018000, 0x28100080, 0x00201014, + 0x02020c00, 0x04018008, 0x28180080, 0x10201014, + 0x00024c00, 0x04018000, 0x28100040, 0x00240014, + 0x00024c00, 0x04018008, 0x28180040, 0x10240014, + 0x02024c00, 0x04018000, 0x281000c0, 0x00241014, + 0x02024c00, 0x04018008, 0x281800c0, 0x10241014, + 0x00000c00, 0x05010000, 0x20102000, 0x00000034, + 0x00000c00, 0x05010008, 0x20182000, 0x10000034, + 0x02000c00, 0x05010000, 0x20102080, 0x00001034, + 0x02000c00, 0x05010008, 0x20182080, 0x10001034, + 0x00004c00, 0x05010000, 0x20102040, 0x00040034, + 0x00004c00, 0x05010008, 0x20182040, 0x10040034, + 0x02004c00, 0x05010000, 0x201020c0, 0x00041034, + 0x02004c00, 0x05010008, 0x201820c0, 0x10041034, + 0x00020c00, 0x05018000, 0x28102000, 0x00200034, + 0x00020c00, 0x05018008, 0x28182000, 0x10200034, + 0x02020c00, 0x05018000, 0x28102080, 0x00201034, + 0x02020c00, 0x05018008, 0x28182080, 0x10201034, + 0x00024c00, 0x05018000, 0x28102040, 0x00240034, + 0x00024c00, 0x05018008, 0x28182040, 0x10240034, + 0x02024c00, 0x05018000, 0x281020c0, 0x00241034, + 0x02024c00, 0x05018008, 0x281820c0, 0x10241034 }; +/* S-box lookup tables */ -static void des_small_fips_encrypt(u32 *expkey, u8 *dst, const u8 *src) +static const u32 S1[64] = { + 0x01010400, 0x00000000, 0x00010000, 0x01010404, + 0x01010004, 0x00010404, 0x00000004, 0x00010000, + 0x00000400, 0x01010400, 0x01010404, 0x00000400, + 0x01000404, 0x01010004, 0x01000000, 0x00000004, + 0x00000404, 0x01000400, 0x01000400, 0x00010400, + 0x00010400, 0x01010000, 0x01010000, 0x01000404, + 0x00010004, 0x01000004, 0x01000004, 0x00010004, + 0x00000000, 0x00000404, 0x00010404, 0x01000000, + 0x00010000, 0x01010404, 0x00000004, 0x01010000, + 0x01010400, 0x01000000, 0x01000000, 0x00000400, + 0x01010004, 0x00010000, 0x00010400, 0x01000004, + 0x00000400, 0x00000004, 0x01000404, 0x00010404, + 0x01010404, 0x00010004, 0x01010000, 0x01000404, + 0x01000004, 0x00000404, 0x00010404, 0x01010400, + 0x00000404, 0x01000400, 0x01000400, 0x00000000, + 0x00010004, 0x00010400, 0x00000000, 0x01010004 +}; + +static const u32 S2[64] = { + 0x80108020, 0x80008000, 0x00008000, 0x00108020, + 0x00100000, 0x00000020, 0x80100020, 0x80008020, + 0x80000020, 0x80108020, 0x80108000, 0x80000000, + 0x80008000, 0x00100000, 0x00000020, 0x80100020, + 0x00108000, 0x00100020, 0x80008020, 0x00000000, + 0x80000000, 0x00008000, 0x00108020, 0x80100000, + 0x00100020, 0x80000020, 0x00000000, 0x00108000, + 0x00008020, 0x80108000, 0x80100000, 0x00008020, + 0x00000000, 0x00108020, 0x80100020, 0x00100000, + 0x80008020, 0x80100000, 0x80108000, 0x00008000, + 0x80100000, 0x80008000, 0x00000020, 0x80108020, + 0x00108020, 0x00000020, 0x00008000, 0x80000000, + 0x00008020, 0x80108000, 0x00100000, 0x80000020, + 0x00100020, 0x80008020, 0x80000020, 0x00100020, + 0x00108000, 0x00000000, 0x80008000, 0x00008020, + 0x80000000, 0x80100020, 0x80108020, 0x00108000 +}; + +static const u32 S3[64] = { + 0x00000208, 0x08020200, 0x00000000, 0x08020008, + 0x08000200, 0x00000000, 0x00020208, 0x08000200, + 0x00020008, 0x08000008, 0x08000008, 0x00020000, + 0x08020208, 0x00020008, 0x08020000, 0x00000208, + 0x08000000, 0x00000008, 0x08020200, 0x00000200, + 0x00020200, 0x08020000, 0x08020008, 0x00020208, + 0x08000208, 0x00020200, 0x00020000, 0x08000208, + 0x00000008, 0x08020208, 0x00000200, 0x08000000, + 0x08020200, 0x08000000, 0x00020008, 0x00000208, + 0x00020000, 0x08020200, 0x08000200, 0x00000000, + 0x00000200, 0x00020008, 0x08020208, 0x08000200, + 0x08000008, 0x00000200, 0x00000000, 0x08020008, + 0x08000208, 0x00020000, 0x08000000, 0x08020208, + 0x00000008, 0x00020208, 0x00020200, 0x08000008, + 0x08020000, 0x08000208, 0x00000208, 0x08020000, + 0x00020208, 0x00000008, 0x08020008, 0x00020200 +}; + +static const u32 S4[64] = { + 0x00802001, 0x00002081, 0x00002081, 0x00000080, + 0x00802080, 0x00800081, 0x00800001, 0x00002001, + 0x00000000, 0x00802000, 0x00802000, 0x00802081, + 0x00000081, 0x00000000, 0x00800080, 0x00800001, + 0x00000001, 0x00002000, 0x00800000, 0x00802001, + 0x00000080, 0x00800000, 0x00002001, 0x00002080, + 0x00800081, 0x00000001, 0x00002080, 0x00800080, + 0x00002000, 0x00802080, 0x00802081, 0x00000081, + 0x00800080, 0x00800001, 0x00802000, 0x00802081, + 0x00000081, 0x00000000, 0x00000000, 0x00802000, + 0x00002080, 0x00800080, 0x00800081, 0x00000001, + 0x00802001, 0x00002081, 0x00002081, 0x00000080, + 0x00802081, 0x00000081, 0x00000001, 0x00002000, + 0x00800001, 0x00002001, 0x00802080, 0x00800081, + 0x00002001, 0x00002080, 0x00800000, 0x00802001, + 0x00000080, 0x00800000, 0x00002000, 0x00802080 +}; + +static const u32 S5[64] = { + 0x00000100, 0x02080100, 0x02080000, 0x42000100, + 0x00080000, 0x00000100, 0x40000000, 0x02080000, + 0x40080100, 0x00080000, 0x02000100, 0x40080100, + 0x42000100, 0x42080000, 0x00080100, 0x40000000, + 0x02000000, 0x40080000, 0x40080000, 0x00000000, + 0x40000100, 0x42080100, 0x42080100, 0x02000100, + 0x42080000, 0x40000100, 0x00000000, 0x42000000, + 0x02080100, 0x02000000, 0x42000000, 0x00080100, + 0x00080000, 0x42000100, 0x00000100, 0x02000000, + 0x40000000, 0x02080000, 0x42000100, 0x40080100, + 0x02000100, 0x40000000, 0x42080000, 0x02080100, + 0x40080100, 0x00000100, 0x02000000, 0x42080000, + 0x42080100, 0x00080100, 0x42000000, 0x42080100, + 0x02080000, 0x00000000, 0x40080000, 0x42000000, + 0x00080100, 0x02000100, 0x40000100, 0x00080000, + 0x00000000, 0x40080000, 0x02080100, 0x40000100 +}; + +static const u32 S6[64] = { + 0x20000010, 0x20400000, 0x00004000, 0x20404010, + 0x20400000, 0x00000010, 0x20404010, 0x00400000, + 0x20004000, 0x00404010, 0x00400000, 0x20000010, + 0x00400010, 0x20004000, 0x20000000, 0x00004010, + 0x00000000, 0x00400010, 0x20004010, 0x00004000, + 0x00404000, 0x20004010, 0x00000010, 0x20400010, + 0x20400010, 0x00000000, 0x00404010, 0x20404000, + 0x00004010, 0x00404000, 0x20404000, 0x20000000, + 0x20004000, 0x00000010, 0x20400010, 0x00404000, + 0x20404010, 0x00400000, 0x00004010, 0x20000010, + 0x00400000, 0x20004000, 0x20000000, 0x00004010, + 0x20000010, 0x20404010, 0x00404000, 0x20400000, + 0x00404010, 0x20404000, 0x00000000, 0x20400010, + 0x00000010, 0x00004000, 0x20400000, 0x00404010, + 0x00004000, 0x00400010, 0x20004010, 0x00000000, + 0x20404000, 0x20000000, 0x00400010, 0x20004010 +}; + +static const u32 S7[64] = { + 0x00200000, 0x04200002, 0x04000802, 0x00000000, + 0x00000800, 0x04000802, 0x00200802, 0x04200800, + 0x04200802, 0x00200000, 0x00000000, 0x04000002, + 0x00000002, 0x04000000, 0x04200002, 0x00000802, + 0x04000800, 0x00200802, 0x00200002, 0x04000800, + 0x04000002, 0x04200000, 0x04200800, 0x00200002, + 0x04200000, 0x00000800, 0x00000802, 0x04200802, + 0x00200800, 0x00000002, 0x04000000, 0x00200800, + 0x04000000, 0x00200800, 0x00200000, 0x04000802, + 0x04000802, 0x04200002, 0x04200002, 0x00000002, + 0x00200002, 0x04000000, 0x04000800, 0x00200000, + 0x04200800, 0x00000802, 0x00200802, 0x04200800, + 0x00000802, 0x04000002, 0x04200802, 0x04200000, + 0x00200800, 0x00000000, 0x00000002, 0x04200802, + 0x00000000, 0x00200802, 0x04200000, 0x00000800, + 0x04000002, 0x04000800, 0x00000800, 0x00200002 +}; + +static const u32 S8[64] = { + 0x10001040, 0x00001000, 0x00040000, 0x10041040, + 0x10000000, 0x10001040, 0x00000040, 0x10000000, + 0x00040040, 0x10040000, 0x10041040, 0x00041000, + 0x10041000, 0x00041040, 0x00001000, 0x00000040, + 0x10040000, 0x10000040, 0x10001000, 0x00001040, + 0x00041000, 0x00040040, 0x10040040, 0x10041000, + 0x00001040, 0x00000000, 0x00000000, 0x10040040, + 0x10000040, 0x10001000, 0x00041040, 0x00040000, + 0x00041040, 0x00040000, 0x10041000, 0x00001000, + 0x00000040, 0x10040040, 0x00001000, 0x00041040, + 0x10001000, 0x00000040, 0x10000040, 0x10040000, + 0x10040040, 0x10000000, 0x00040000, 0x10001040, + 0x00000000, 0x10041040, 0x00040040, 0x10000040, + 0x10040000, 0x10001000, 0x10001040, 0x00000000, + 0x10041040, 0x00041000, 0x00041000, 0x00001040, + 0x00001040, 0x00040040, 0x10000000, 0x10041000 +}; + +/* Encryption components: IP, FP, and round function */ + +#define IP(L, R, T) \ + ROL(R, 4); \ + T = L; \ + L ^= R; \ + L &= 0xf0f0f0f0; \ + R ^= L; \ + L ^= T; \ + ROL(R, 12); \ + T = L; \ + L ^= R; \ + L &= 0xffff0000; \ + R ^= L; \ + L ^= T; \ + ROR(R, 14); \ + T = L; \ + L ^= R; \ + L &= 0xcccccccc; \ + R ^= L; \ + L ^= T; \ + ROL(R, 6); \ + T = L; \ + L ^= R; \ + L &= 0xff00ff00; \ + R ^= L; \ + L ^= T; \ + ROR(R, 7); \ + T = L; \ + L ^= R; \ + L &= 0xaaaaaaaa; \ + R ^= L; \ + L ^= T; \ + ROL(L, 1); + +#define FP(L, R, T) \ + ROR(L, 1); \ + T = L; \ + L ^= R; \ + L &= 0xaaaaaaaa; \ + R ^= L; \ + L ^= T; \ + ROL(R, 7); \ + T = L; \ + L ^= R; \ + L &= 0xff00ff00; \ + R ^= L; \ + L ^= T; \ + ROR(R, 6); \ + T = L; \ + L ^= R; \ + L &= 0xcccccccc; \ + R ^= L; \ + L ^= T; \ + ROL(R, 14); \ + T = L; \ + L ^= R; \ + L &= 0xffff0000; \ + R ^= L; \ + L ^= T; \ + ROR(R, 12); \ + T = L; \ + L ^= R; \ + L &= 0xf0f0f0f0; \ + R ^= L; \ + L ^= T; \ + ROR(R, 4); + +#define ROUND(L, R, A, B, K, d) \ + B = K[0]; A = K[1]; K += d; \ + B ^= R; A ^= R; \ + B &= 0x3f3f3f3f; ROR(A, 4); \ + L ^= S8[0xff & B]; A &= 0x3f3f3f3f; \ + L ^= S6[0xff & (B >> 8)]; B >>= 16; \ + L ^= S7[0xff & A]; \ + L ^= S5[0xff & (A >> 8)]; A >>= 16; \ + L ^= S4[0xff & B]; \ + L ^= S2[0xff & (B >> 8)]; \ + L ^= S3[0xff & A]; \ + L ^= S1[0xff & (A >> 8)]; + +/* + * PC2 lookup tables are organized as 2 consecutive sets of 4 interleaved + * tables of 128 elements. One set is for C_i and the other for D_i, while + * the 4 interleaved tables correspond to four 7-bit subsets of C_i or D_i. + * + * After PC1 each of the variables a,b,c,d contains a 7 bit subset of C_i + * or D_i in bits 7-1 (bit 0 being the least significant). + */ + +#define T1(x) pt[2 * (x) + 0] +#define T2(x) pt[2 * (x) + 1] +#define T3(x) pt[2 * (x) + 2] +#define T4(x) pt[2 * (x) + 3] + +#define PC2(a, b, c, d) (T4(d) | T3(c) | T2(b) | T1(a)) + +/* + * Encryption key expansion + * + * RFC2451: Weak key checks SHOULD be performed. + * + * FIPS 74: + * + * Keys having duals are keys which produce all zeros, all ones, or + * alternating zero-one patterns in the C and D registers after Permuted + * Choice 1 has operated on the key. + * + */ +static unsigned long ekey(u32 *pe, const u8 *k) { - u32 x, y, z; - - x = src[7]; - x <<= 8; - x |= src[6]; - x <<= 8; - x |= src[5]; - x <<= 8; - x |= src[4]; - y = src[3]; - y <<= 8; - y |= src[2]; - y <<= 8; - y |= src[1]; - y <<= 8; - y |= src[0]; - z = ((x >> 004) ^ y) & 0x0F0F0F0FL; - x ^= z << 004; - y ^= z; - z = ((y >> 020) ^ x) & 0x0000FFFFL; - y ^= z << 020; - x ^= z; - z = ((x >> 002) ^ y) & 0x33333333L; - x ^= z << 002; - y ^= z; - z = ((y >> 010) ^ x) & 0x00FF00FFL; - y ^= z << 010; - x ^= z; - x = x >> 1 | x << 31; - z = (x ^ y) & 0x55555555L; - y ^= z; - x ^= z; - y = y >> 1 | y << 31; - z = expkey[0]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[1]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[2]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[3]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[4]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[5]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[6]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[7]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[8]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[9]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[10]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[11]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[12]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[13]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[14]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[15]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[16]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[17]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[18]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[19]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[20]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[21]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[22]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[23]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[24]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[25]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[26]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[27]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[28]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[29]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[30]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[31]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - x = x << 1 | x >> 31; - z = (x ^ y) & 0x55555555L; - y ^= z; - x ^= z; - y = y << 1 | y >> 31; - z = ((x >> 010) ^ y) & 0x00FF00FFL; - x ^= z << 010; - y ^= z; - z = ((y >> 002) ^ x) & 0x33333333L; - y ^= z << 002; - x ^= z; - z = ((x >> 020) ^ y) & 0x0000FFFFL; - x ^= z << 020; - y ^= z; - z = ((y >> 004) ^ x) & 0x0F0F0F0FL; - y ^= z << 004; - x ^= z; - dst[0] = x; - x >>= 8; - dst[1] = x; - x >>= 8; - dst[2] = x; - x >>= 8; - dst[3] = x; - dst[4] = y; - y >>= 8; - dst[5] = y; - y >>= 8; - dst[6] = y; - y >>= 8; - dst[7] = y; -} + /* K&R: long is at least 32 bits */ + unsigned long a, b, c, d, w; + const u32 *pt = pc2; -static void des_small_fips_decrypt(u32 *expkey, u8 *dst, const u8 *src) -{ - u32 x, y, z; - - x = src[7]; - x <<= 8; - x |= src[6]; - x <<= 8; - x |= src[5]; - x <<= 8; - x |= src[4]; - y = src[3]; - y <<= 8; - y |= src[2]; - y <<= 8; - y |= src[1]; - y <<= 8; - y |= src[0]; - z = ((x >> 004) ^ y) & 0x0F0F0F0FL; - x ^= z << 004; - y ^= z; - z = ((y >> 020) ^ x) & 0x0000FFFFL; - y ^= z << 020; - x ^= z; - z = ((x >> 002) ^ y) & 0x33333333L; - x ^= z << 002; - y ^= z; - z = ((y >> 010) ^ x) & 0x00FF00FFL; - y ^= z << 010; - x ^= z; - x = x >> 1 | x << 31; - z = (x ^ y) & 0x55555555L; - y ^= z; - x ^= z; - y = y >> 1 | y << 31; - z = expkey[31]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[30]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[29]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[28]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[27]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[26]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[25]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[24]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[23]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[22]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[21]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[20]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[19]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[18]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[17]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[16]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[15]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[14]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[13]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[12]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[11]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[10]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[9]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[8]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[7]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[6]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[5]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[4]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[3]; - z ^= y; - z = z << 4 | z >> 28; - x ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[2]; - z ^= y; - x ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - x ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - z = expkey[1]; - z ^= x; - z = z << 4 | z >> 28; - y ^= * (u32 *) ((u8 *) (des_keymap + 448) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 384) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 320) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 256) + (0xFC & z)); - z = expkey[0]; - z ^= x; - y ^= * (u32 *) ((u8 *) (des_keymap + 192) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 128) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) (des_keymap + 64) + (0xFC & z)); - z >>= 8; - y ^= * (u32 *) ((u8 *) des_keymap + (0xFC & z)); - x = x << 1 | x >> 31; - z = (x ^ y) & 0x55555555L; - y ^= z; - x ^= z; - y = y << 1 | y >> 31; - z = ((x >> 010) ^ y) & 0x00FF00FFL; - x ^= z << 010; - y ^= z; - z = ((y >> 002) ^ x) & 0x33333333L; - y ^= z << 002; - x ^= z; - z = ((x >> 020) ^ y) & 0x0000FFFFL; - x ^= z << 020; - y ^= z; - z = ((y >> 004) ^ x) & 0x0F0F0F0FL; - y ^= z << 004; - x ^= z; - dst[0] = x; - x >>= 8; - dst[1] = x; - x >>= 8; - dst[2] = x; - x >>= 8; - dst[3] = x; - dst[4] = y; - y >>= 8; - dst[5] = y; - y >>= 8; - dst[6] = y; - y >>= 8; - dst[7] = y; + d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d]; + c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c]; + b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b]; + a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a]; + + pe[15 * 2 + 0] = PC2(a, b, c, d); d = rs[d]; + pe[14 * 2 + 0] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[13 * 2 + 0] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[12 * 2 + 0] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[11 * 2 + 0] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[10 * 2 + 0] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[ 9 * 2 + 0] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[ 8 * 2 + 0] = PC2(d, a, b, c); c = rs[c]; + pe[ 7 * 2 + 0] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[ 6 * 2 + 0] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[ 5 * 2 + 0] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[ 4 * 2 + 0] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[ 3 * 2 + 0] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[ 2 * 2 + 0] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[ 1 * 2 + 0] = PC2(c, d, a, b); b = rs[b]; + pe[ 0 * 2 + 0] = PC2(b, c, d, a); + + /* Check if first half is weak */ + w = (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]); + + /* Skip to next table set */ + pt += 512; + + d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1]; + c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1]; + b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1]; + a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1]; + + /* Check if second half is weak */ + w |= (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]); + + pe[15 * 2 + 1] = PC2(a, b, c, d); d = rs[d]; + pe[14 * 2 + 1] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[13 * 2 + 1] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[12 * 2 + 1] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[11 * 2 + 1] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[10 * 2 + 1] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[ 9 * 2 + 1] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[ 8 * 2 + 1] = PC2(d, a, b, c); c = rs[c]; + pe[ 7 * 2 + 1] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[ 6 * 2 + 1] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[ 5 * 2 + 1] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[ 4 * 2 + 1] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[ 3 * 2 + 1] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[ 2 * 2 + 1] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[ 1 * 2 + 1] = PC2(c, d, a, b); b = rs[b]; + pe[ 0 * 2 + 1] = PC2(b, c, d, a); + + /* Fixup: 2413 5768 -> 1357 2468 */ + for (d = 0; d < 16; ++d) { + a = pe[2 * d]; + b = pe[2 * d + 1]; + c = a ^ b; + c &= 0xffff0000; + a ^= c; + b ^= c; + ROL(b, 18); + pe[2 * d] = a; + pe[2 * d + 1] = b; + } + + /* Zero if weak key */ + return w; } /* - * RFC2451: Weak key checks SHOULD be performed. + * Decryption key expansion + * + * No weak key checking is performed, as this is only used by triple DES + * */ -static int setkey(u32 *expkey, const u8 *key, unsigned int keylen, u32 *flags) +static void dkey(u32 *pe, const u8 *k) { - const u8 *k; - u8 *b0, *b1; - u32 n, w; - u8 bits0[56], bits1[56]; + /* K&R: long is at least 32 bits */ + unsigned long a, b, c, d; + const u32 *pt = pc2; - n = parity[key[0]]; n <<= 4; - n |= parity[key[1]]; n <<= 4; - n |= parity[key[2]]; n <<= 4; - n |= parity[key[3]]; n <<= 4; - n |= parity[key[4]]; n <<= 4; - n |= parity[key[5]]; n <<= 4; - n |= parity[key[6]]; n <<= 4; - n |= parity[key[7]]; - w = 0x88888888L; - - if ((*flags & CRYPTO_TFM_REQ_WEAK_KEY) - && !((n - (w >> 3)) & w)) { /* 1 in 10^10 keys passes this test */ - if (n < 0x41415151) { - if (n < 0x31312121) { - if (n < 0x14141515) { - /* 01 01 01 01 01 01 01 01 */ - if (n == 0x11111111) goto weak; - /* 01 1F 01 1F 01 0E 01 0E */ - if (n == 0x13131212) goto weak; - } else { - /* 01 E0 01 E0 01 F1 01 F1 */ - if (n == 0x14141515) goto weak; - /* 01 FE 01 FE 01 FE 01 FE */ - if (n == 0x16161616) goto weak; - } - } else { - if (n < 0x34342525) { - /* 1F 01 1F 01 0E 01 0E 01 */ - if (n == 0x31312121) goto weak; - /* 1F 1F 1F 1F 0E 0E 0E 0E (?) */ - if (n == 0x33332222) goto weak; - } else { - /* 1F E0 1F E0 0E F1 0E F1 */ - if (n == 0x34342525) goto weak; - /* 1F FE 1F FE 0E FE 0E FE */ - if (n == 0x36362626) goto weak; - } - } - } else { - if (n < 0x61616161) { - if (n < 0x44445555) { - /* E0 01 E0 01 F1 01 F1 01 */ - if (n == 0x41415151) goto weak; - /* E0 1F E0 1F F1 0E F1 0E */ - if (n == 0x43435252) goto weak; - } else { - /* E0 E0 E0 E0 F1 F1 F1 F1 (?) */ - if (n == 0x44445555) goto weak; - /* E0 FE E0 FE F1 FE F1 FE */ - if (n == 0x46465656) goto weak; - } - } else { - if (n < 0x64646565) { - /* FE 01 FE 01 FE 01 FE 01 */ - if (n == 0x61616161) goto weak; - /* FE 1F FE 1F FE 0E FE 0E */ - if (n == 0x63636262) goto weak; - } else { - /* FE E0 FE E0 FE F1 FE F1 */ - if (n == 0x64646565) goto weak; - /* FE FE FE FE FE FE FE FE */ - if (n == 0x66666666) goto weak; - } - } - } - - goto not_weak; -weak: - *flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; + d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d]; + c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c]; + b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b]; + a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a]; + + pe[ 0 * 2] = PC2(a, b, c, d); d = rs[d]; + pe[ 1 * 2] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[ 2 * 2] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[ 3 * 2] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[ 4 * 2] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[ 5 * 2] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[ 6 * 2] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[ 7 * 2] = PC2(d, a, b, c); c = rs[c]; + pe[ 8 * 2] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[ 9 * 2] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[10 * 2] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[11 * 2] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[12 * 2] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[13 * 2] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[14 * 2] = PC2(c, d, a, b); b = rs[b]; + pe[15 * 2] = PC2(b, c, d, a); + + /* Skip to next table set */ + pt += 512; + + d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1]; + c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1]; + b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1]; + a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1]; + + pe[ 0 * 2 + 1] = PC2(a, b, c, d); d = rs[d]; + pe[ 1 * 2 + 1] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[ 2 * 2 + 1] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[ 3 * 2 + 1] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[ 4 * 2 + 1] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[ 5 * 2 + 1] = PC2(d, a, b, c); c = rs[c]; b = rs[b]; + pe[ 6 * 2 + 1] = PC2(b, c, d, a); a = rs[a]; d = rs[d]; + pe[ 7 * 2 + 1] = PC2(d, a, b, c); c = rs[c]; + pe[ 8 * 2 + 1] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[ 9 * 2 + 1] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[10 * 2 + 1] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[11 * 2 + 1] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[12 * 2 + 1] = PC2(c, d, a, b); b = rs[b]; a = rs[a]; + pe[13 * 2 + 1] = PC2(a, b, c, d); d = rs[d]; c = rs[c]; + pe[14 * 2 + 1] = PC2(c, d, a, b); b = rs[b]; + pe[15 * 2 + 1] = PC2(b, c, d, a); + + /* Fixup: 2413 5768 -> 1357 2468 */ + for (d = 0; d < 16; ++d) { + a = pe[2 * d]; + b = pe[2 * d + 1]; + c = a ^ b; + c &= 0xffff0000; + a ^= c; + b ^= c; + ROL(b, 18); + pe[2 * d] = a; + pe[2 * d + 1] = b; } - -not_weak: - - /* explode the bits */ - n = 56; - b0 = bits0; - b1 = bits1; - - do { - w = (256 | *key++) << 2; - do { - --n; - b1[n] = 8 & w; - w >>= 1; - b0[n] = 4 & w; - } while ( w >= 16 ); - } while ( n ); - - /* put the bits in the correct places */ - n = 16; - k = rotors; - - do { - w = (b1[k[ 0 ]] | b0[k[ 1 ]]) << 4; - w |= (b1[k[ 2 ]] | b0[k[ 3 ]]) << 2; - w |= b1[k[ 4 ]] | b0[k[ 5 ]]; - w <<= 8; - w |= (b1[k[ 6 ]] | b0[k[ 7 ]]) << 4; - w |= (b1[k[ 8 ]] | b0[k[ 9 ]]) << 2; - w |= b1[k[10 ]] | b0[k[11 ]]; - w <<= 8; - w |= (b1[k[12 ]] | b0[k[13 ]]) << 4; - w |= (b1[k[14 ]] | b0[k[15 ]]) << 2; - w |= b1[k[16 ]] | b0[k[17 ]]; - w <<= 8; - w |= (b1[k[18 ]] | b0[k[19 ]]) << 4; - w |= (b1[k[20 ]] | b0[k[21 ]]) << 2; - w |= b1[k[22 ]] | b0[k[23 ]]; - expkey[0] = w; - - w = (b1[k[ 0+24]] | b0[k[ 1+24]]) << 4; - w |= (b1[k[ 2+24]] | b0[k[ 3+24]]) << 2; - w |= b1[k[ 4+24]] | b0[k[ 5+24]]; - w <<= 8; - w |= (b1[k[ 6+24]] | b0[k[ 7+24]]) << 4; - w |= (b1[k[ 8+24]] | b0[k[ 9+24]]) << 2; - w |= b1[k[10+24]] | b0[k[11+24]]; - w <<= 8; - w |= (b1[k[12+24]] | b0[k[13+24]]) << 4; - w |= (b1[k[14+24]] | b0[k[15+24]]) << 2; - w |= b1[k[16+24]] | b0[k[17+24]]; - w <<= 8; - w |= (b1[k[18+24]] | b0[k[19+24]]) << 4; - w |= (b1[k[20+24]] | b0[k[21+24]]) << 2; - w |= b1[k[22+24]] | b0[k[23+24]]; - - ROR(w, 4, 28); /* could be eliminated */ - expkey[1] = w; - - k += 48; - expkey += 2; - } while (--n); - - return 0; } static int des_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags) { - return setkey(((struct des_ctx *)ctx)->expkey, key, keylen, flags); + struct des_ctx *dctx = ctx; + u32 tmp[DES_EXPKEY_WORDS]; + int ret; + + /* Expand to tmp */ + ret = ekey(tmp, key); + + if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + /* Copy to output */ + memcpy(dctx->expkey, tmp, sizeof(dctx->expkey)); + + return 0; } static void des_encrypt(void *ctx, u8 *dst, const u8 *src) { - des_small_fips_encrypt(((struct des_ctx *)ctx)->expkey, dst, src); + const u32 *K = ((struct des_ctx *)ctx)->expkey; + const __le32 *s = (const __le32 *)src; + __le32 *d = (__le32 *)dst; + u32 L, R, A, B; + int i; + + L = le32_to_cpu(s[0]); + R = le32_to_cpu(s[1]); + + IP(L, R, A); + for (i = 0; i < 8; i++) { + ROUND(L, R, A, B, K, 2); + ROUND(R, L, A, B, K, 2); + } + FP(R, L, A); + + d[0] = cpu_to_le32(R); + d[1] = cpu_to_le32(L); } static void des_decrypt(void *ctx, u8 *dst, const u8 *src) { - des_small_fips_decrypt(((struct des_ctx *)ctx)->expkey, dst, src); + const u32 *K = ((struct des_ctx *)ctx)->expkey + DES_EXPKEY_WORDS - 2; + const __le32 *s = (const __le32 *)src; + __le32 *d = (__le32 *)dst; + u32 L, R, A, B; + int i; + + L = le32_to_cpu(s[0]); + R = le32_to_cpu(s[1]); + + IP(L, R, A); + for (i = 0; i < 8; i++) { + ROUND(L, R, A, B, K, -2); + ROUND(R, L, A, B, K, -2); + } + FP(R, L, A); + + d[0] = cpu_to_le32(R); + d[1] = cpu_to_le32(L); } -/* +/* * RFC2451: * * For DES-EDE3, there is no known need to reject weak or @@ -1197,44 +859,86 @@ static void des_decrypt(void *ctx, u8 *dst, const u8 *src) * */ static int des3_ede_setkey(void *ctx, const u8 *key, - unsigned int keylen, u32 *flags) + unsigned int keylen, u32 *flags) { - unsigned int i, off; + const u32 *K = (const u32 *)key; struct des3_ede_ctx *dctx = ctx; + u32 *expkey = dctx->expkey; - if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && - memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], - DES_KEY_SIZE))) { - + if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || + !((K[2] ^ K[4]) | (K[3] ^ K[5])))) + { *flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED; return -EINVAL; } - - for (i = 0, off = 0; i < 3; i++, off += DES_EXPKEY_WORDS, - key += DES_KEY_SIZE) { - int ret = setkey(&dctx->expkey[off], key, DES_KEY_SIZE, flags); - if (ret < 0) - return ret; - } + + ekey(expkey, key); expkey += DES_EXPKEY_WORDS; key += DES_KEY_SIZE; + dkey(expkey, key); expkey += DES_EXPKEY_WORDS; key += DES_KEY_SIZE; + ekey(expkey, key); + return 0; } static void des3_ede_encrypt(void *ctx, u8 *dst, const u8 *src) { struct des3_ede_ctx *dctx = ctx; - - des_small_fips_encrypt(dctx->expkey, dst, src); - des_small_fips_decrypt(&dctx->expkey[DES_EXPKEY_WORDS], dst, dst); - des_small_fips_encrypt(&dctx->expkey[DES_EXPKEY_WORDS * 2], dst, dst); + const u32 *K = dctx->expkey; + const __le32 *s = (const __le32 *)src; + __le32 *d = (__le32 *)dst; + u32 L, R, A, B; + int i; + + L = le32_to_cpu(s[0]); + R = le32_to_cpu(s[1]); + + IP(L, R, A); + for (i = 0; i < 8; i++) { + ROUND(L, R, A, B, K, 2); + ROUND(R, L, A, B, K, 2); + } + for (i = 0; i < 8; i++) { + ROUND(R, L, A, B, K, 2); + ROUND(L, R, A, B, K, 2); + } + for (i = 0; i < 8; i++) { + ROUND(L, R, A, B, K, 2); + ROUND(R, L, A, B, K, 2); + } + FP(R, L, A); + + d[0] = cpu_to_le32(R); + d[1] = cpu_to_le32(L); } static void des3_ede_decrypt(void *ctx, u8 *dst, const u8 *src) { struct des3_ede_ctx *dctx = ctx; + const u32 *K = dctx->expkey + DES3_EDE_EXPKEY_WORDS - 2; + const __le32 *s = (const __le32 *)src; + __le32 *d = (__le32 *)dst; + u32 L, R, A, B; + int i; - des_small_fips_decrypt(&dctx->expkey[DES_EXPKEY_WORDS * 2], dst, src); - des_small_fips_encrypt(&dctx->expkey[DES_EXPKEY_WORDS], dst, dst); - des_small_fips_decrypt(dctx->expkey, dst, dst); + L = le32_to_cpu(s[0]); + R = le32_to_cpu(s[1]); + + IP(L, R, A); + for (i = 0; i < 8; i++) { + ROUND(L, R, A, B, K, -2); + ROUND(R, L, A, B, K, -2); + } + for (i = 0; i < 8; i++) { + ROUND(R, L, A, B, K, -2); + ROUND(L, R, A, B, K, -2); + } + for (i = 0; i < 8; i++) { + ROUND(L, R, A, B, K, -2); + ROUND(R, L, A, B, K, -2); + } + FP(R, L, A); + + d[0] = cpu_to_le32(R); + d[1] = cpu_to_le32(L); } static struct crypto_alg des_alg = { @@ -1247,7 +951,7 @@ static struct crypto_alg des_alg = { .cra_u = { .cipher = { .cia_min_keysize = DES_KEY_SIZE, .cia_max_keysize = DES_KEY_SIZE, - .cia_setkey = des_setkey, + .cia_setkey = des_setkey, .cia_encrypt = des_encrypt, .cia_decrypt = des_decrypt } } }; @@ -1262,9 +966,9 @@ static struct crypto_alg des3_ede_alg = { .cra_u = { .cipher = { .cia_min_keysize = DES3_EDE_KEY_SIZE, .cia_max_keysize = DES3_EDE_KEY_SIZE, - .cia_setkey = des3_ede_setkey, - .cia_encrypt = des3_ede_encrypt, - .cia_decrypt = des3_ede_decrypt } } + .cia_setkey = des3_ede_setkey, + .cia_encrypt = des3_ede_encrypt, + .cia_decrypt = des3_ede_decrypt } } }; MODULE_ALIAS("des3_ede"); @@ -1272,7 +976,7 @@ MODULE_ALIAS("des3_ede"); static int __init init(void) { int ret = 0; - + ret = crypto_register_alg(&des_alg); if (ret < 0) goto out; @@ -1280,7 +984,7 @@ static int __init init(void) ret = crypto_register_alg(&des3_ede_alg); if (ret < 0) crypto_unregister_alg(&des_alg); -out: +out: return ret; } @@ -1295,3 +999,4 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms"); +MODULE_AUTHOR("Dag Arne Osvik ");