mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-23 03:45:20 +07:00
806 lines
26 KiB
ArmAsm
806 lines
26 KiB
ArmAsm
|
/*
|
||
|
* des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
|
||
|
*
|
||
|
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||
|
*
|
||
|
* This program is free software; you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License as published by
|
||
|
* the Free Software Foundation; either version 2 of the License, or
|
||
|
* (at your option) any later version.
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*/
|
||
|
|
||
|
#include <linux/linkage.h>
|
||
|
|
||
|
.file "des3_ede-asm_64.S"
|
||
|
.text
|
||
|
|
||
|
#define s1 .L_s1
|
||
|
#define s2 ((s1) + (64*8))
|
||
|
#define s3 ((s2) + (64*8))
|
||
|
#define s4 ((s3) + (64*8))
|
||
|
#define s5 ((s4) + (64*8))
|
||
|
#define s6 ((s5) + (64*8))
|
||
|
#define s7 ((s6) + (64*8))
|
||
|
#define s8 ((s7) + (64*8))
|
||
|
|
||
|
/* register macros */
|
||
|
#define CTX %rdi
|
||
|
|
||
|
#define RL0 %r8
|
||
|
#define RL1 %r9
|
||
|
#define RL2 %r10
|
||
|
|
||
|
#define RL0d %r8d
|
||
|
#define RL1d %r9d
|
||
|
#define RL2d %r10d
|
||
|
|
||
|
#define RR0 %r11
|
||
|
#define RR1 %r12
|
||
|
#define RR2 %r13
|
||
|
|
||
|
#define RR0d %r11d
|
||
|
#define RR1d %r12d
|
||
|
#define RR2d %r13d
|
||
|
|
||
|
#define RW0 %rax
|
||
|
#define RW1 %rbx
|
||
|
#define RW2 %rcx
|
||
|
|
||
|
#define RW0d %eax
|
||
|
#define RW1d %ebx
|
||
|
#define RW2d %ecx
|
||
|
|
||
|
#define RW0bl %al
|
||
|
#define RW1bl %bl
|
||
|
#define RW2bl %cl
|
||
|
|
||
|
#define RW0bh %ah
|
||
|
#define RW1bh %bh
|
||
|
#define RW2bh %ch
|
||
|
|
||
|
#define RT0 %r15
|
||
|
#define RT1 %rbp
|
||
|
#define RT2 %r14
|
||
|
#define RT3 %rdx
|
||
|
|
||
|
#define RT0d %r15d
|
||
|
#define RT1d %ebp
|
||
|
#define RT2d %r14d
|
||
|
#define RT3d %edx
|
||
|
|
||
|
/***********************************************************************
|
||
|
* 1-way 3DES
|
||
|
***********************************************************************/
|
||
|
#define do_permutation(a, b, offset, mask) \
|
||
|
movl a, RT0d; \
|
||
|
shrl $(offset), RT0d; \
|
||
|
xorl b, RT0d; \
|
||
|
andl $(mask), RT0d; \
|
||
|
xorl RT0d, b; \
|
||
|
shll $(offset), RT0d; \
|
||
|
xorl RT0d, a;
|
||
|
|
||
|
#define expand_to_64bits(val, mask) \
|
||
|
movl val##d, RT0d; \
|
||
|
rorl $4, RT0d; \
|
||
|
shlq $32, RT0; \
|
||
|
orq RT0, val; \
|
||
|
andq mask, val;
|
||
|
|
||
|
#define compress_to_64bits(val) \
|
||
|
movq val, RT0; \
|
||
|
shrq $32, RT0; \
|
||
|
roll $4, RT0d; \
|
||
|
orl RT0d, val##d;
|
||
|
|
||
|
#define initial_permutation(left, right) \
|
||
|
do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
|
||
|
do_permutation(left##d, right##d, 16, 0x0000ffff); \
|
||
|
do_permutation(right##d, left##d, 2, 0x33333333); \
|
||
|
do_permutation(right##d, left##d, 8, 0x00ff00ff); \
|
||
|
movabs $0x3f3f3f3f3f3f3f3f, RT3; \
|
||
|
movl left##d, RW0d; \
|
||
|
roll $1, right##d; \
|
||
|
xorl right##d, RW0d; \
|
||
|
andl $0xaaaaaaaa, RW0d; \
|
||
|
xorl RW0d, left##d; \
|
||
|
xorl RW0d, right##d; \
|
||
|
roll $1, left##d; \
|
||
|
expand_to_64bits(right, RT3); \
|
||
|
expand_to_64bits(left, RT3);
|
||
|
|
||
|
#define final_permutation(left, right) \
|
||
|
compress_to_64bits(right); \
|
||
|
compress_to_64bits(left); \
|
||
|
movl right##d, RW0d; \
|
||
|
rorl $1, left##d; \
|
||
|
xorl left##d, RW0d; \
|
||
|
andl $0xaaaaaaaa, RW0d; \
|
||
|
xorl RW0d, right##d; \
|
||
|
xorl RW0d, left##d; \
|
||
|
rorl $1, right##d; \
|
||
|
do_permutation(right##d, left##d, 8, 0x00ff00ff); \
|
||
|
do_permutation(right##d, left##d, 2, 0x33333333); \
|
||
|
do_permutation(left##d, right##d, 16, 0x0000ffff); \
|
||
|
do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
|
||
|
|
||
|
#define round1(n, from, to, load_next_key) \
|
||
|
xorq from, RW0; \
|
||
|
\
|
||
|
movzbl RW0bl, RT0d; \
|
||
|
movzbl RW0bh, RT1d; \
|
||
|
shrq $16, RW0; \
|
||
|
movzbl RW0bl, RT2d; \
|
||
|
movzbl RW0bh, RT3d; \
|
||
|
shrq $16, RW0; \
|
||
|
movq s8(, RT0, 8), RT0; \
|
||
|
xorq s6(, RT1, 8), to; \
|
||
|
movzbl RW0bl, RL1d; \
|
||
|
movzbl RW0bh, RT1d; \
|
||
|
shrl $16, RW0d; \
|
||
|
xorq s4(, RT2, 8), RT0; \
|
||
|
xorq s2(, RT3, 8), to; \
|
||
|
movzbl RW0bl, RT2d; \
|
||
|
movzbl RW0bh, RT3d; \
|
||
|
xorq s7(, RL1, 8), RT0; \
|
||
|
xorq s5(, RT1, 8), to; \
|
||
|
xorq s3(, RT2, 8), RT0; \
|
||
|
load_next_key(n, RW0); \
|
||
|
xorq RT0, to; \
|
||
|
xorq s1(, RT3, 8), to; \
|
||
|
|
||
|
#define load_next_key(n, RWx) \
|
||
|
movq (((n) + 1) * 8)(CTX), RWx;
|
||
|
|
||
|
#define dummy2(a, b) /*_*/
|
||
|
|
||
|
#define read_block(io, left, right) \
|
||
|
movl (io), left##d; \
|
||
|
movl 4(io), right##d; \
|
||
|
bswapl left##d; \
|
||
|
bswapl right##d;
|
||
|
|
||
|
#define write_block(io, left, right) \
|
||
|
bswapl left##d; \
|
||
|
bswapl right##d; \
|
||
|
movl left##d, (io); \
|
||
|
movl right##d, 4(io);
|
||
|
|
||
|
ENTRY(des3_ede_x86_64_crypt_blk)
|
||
|
/* input:
|
||
|
* %rdi: round keys, CTX
|
||
|
* %rsi: dst
|
||
|
* %rdx: src
|
||
|
*/
|
||
|
pushq %rbp;
|
||
|
pushq %rbx;
|
||
|
pushq %r12;
|
||
|
pushq %r13;
|
||
|
pushq %r14;
|
||
|
pushq %r15;
|
||
|
|
||
|
read_block(%rdx, RL0, RR0);
|
||
|
initial_permutation(RL0, RR0);
|
||
|
|
||
|
movq (CTX), RW0;
|
||
|
|
||
|
round1(0, RR0, RL0, load_next_key);
|
||
|
round1(1, RL0, RR0, load_next_key);
|
||
|
round1(2, RR0, RL0, load_next_key);
|
||
|
round1(3, RL0, RR0, load_next_key);
|
||
|
round1(4, RR0, RL0, load_next_key);
|
||
|
round1(5, RL0, RR0, load_next_key);
|
||
|
round1(6, RR0, RL0, load_next_key);
|
||
|
round1(7, RL0, RR0, load_next_key);
|
||
|
round1(8, RR0, RL0, load_next_key);
|
||
|
round1(9, RL0, RR0, load_next_key);
|
||
|
round1(10, RR0, RL0, load_next_key);
|
||
|
round1(11, RL0, RR0, load_next_key);
|
||
|
round1(12, RR0, RL0, load_next_key);
|
||
|
round1(13, RL0, RR0, load_next_key);
|
||
|
round1(14, RR0, RL0, load_next_key);
|
||
|
round1(15, RL0, RR0, load_next_key);
|
||
|
|
||
|
round1(16+0, RL0, RR0, load_next_key);
|
||
|
round1(16+1, RR0, RL0, load_next_key);
|
||
|
round1(16+2, RL0, RR0, load_next_key);
|
||
|
round1(16+3, RR0, RL0, load_next_key);
|
||
|
round1(16+4, RL0, RR0, load_next_key);
|
||
|
round1(16+5, RR0, RL0, load_next_key);
|
||
|
round1(16+6, RL0, RR0, load_next_key);
|
||
|
round1(16+7, RR0, RL0, load_next_key);
|
||
|
round1(16+8, RL0, RR0, load_next_key);
|
||
|
round1(16+9, RR0, RL0, load_next_key);
|
||
|
round1(16+10, RL0, RR0, load_next_key);
|
||
|
round1(16+11, RR0, RL0, load_next_key);
|
||
|
round1(16+12, RL0, RR0, load_next_key);
|
||
|
round1(16+13, RR0, RL0, load_next_key);
|
||
|
round1(16+14, RL0, RR0, load_next_key);
|
||
|
round1(16+15, RR0, RL0, load_next_key);
|
||
|
|
||
|
round1(32+0, RR0, RL0, load_next_key);
|
||
|
round1(32+1, RL0, RR0, load_next_key);
|
||
|
round1(32+2, RR0, RL0, load_next_key);
|
||
|
round1(32+3, RL0, RR0, load_next_key);
|
||
|
round1(32+4, RR0, RL0, load_next_key);
|
||
|
round1(32+5, RL0, RR0, load_next_key);
|
||
|
round1(32+6, RR0, RL0, load_next_key);
|
||
|
round1(32+7, RL0, RR0, load_next_key);
|
||
|
round1(32+8, RR0, RL0, load_next_key);
|
||
|
round1(32+9, RL0, RR0, load_next_key);
|
||
|
round1(32+10, RR0, RL0, load_next_key);
|
||
|
round1(32+11, RL0, RR0, load_next_key);
|
||
|
round1(32+12, RR0, RL0, load_next_key);
|
||
|
round1(32+13, RL0, RR0, load_next_key);
|
||
|
round1(32+14, RR0, RL0, load_next_key);
|
||
|
round1(32+15, RL0, RR0, dummy2);
|
||
|
|
||
|
final_permutation(RR0, RL0);
|
||
|
write_block(%rsi, RR0, RL0);
|
||
|
|
||
|
popq %r15;
|
||
|
popq %r14;
|
||
|
popq %r13;
|
||
|
popq %r12;
|
||
|
popq %rbx;
|
||
|
popq %rbp;
|
||
|
|
||
|
ret;
|
||
|
ENDPROC(des3_ede_x86_64_crypt_blk)
|
||
|
|
||
|
/***********************************************************************
|
||
|
* 3-way 3DES
|
||
|
***********************************************************************/
|
||
|
#define expand_to_64bits(val, mask) \
|
||
|
movl val##d, RT0d; \
|
||
|
rorl $4, RT0d; \
|
||
|
shlq $32, RT0; \
|
||
|
orq RT0, val; \
|
||
|
andq mask, val;
|
||
|
|
||
|
#define compress_to_64bits(val) \
|
||
|
movq val, RT0; \
|
||
|
shrq $32, RT0; \
|
||
|
roll $4, RT0d; \
|
||
|
orl RT0d, val##d;
|
||
|
|
||
|
#define initial_permutation3(left, right) \
|
||
|
do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
|
||
|
do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
|
||
|
do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
|
||
|
do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
|
||
|
do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
|
||
|
do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
|
||
|
\
|
||
|
do_permutation(right##0d, left##0d, 2, 0x33333333); \
|
||
|
do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
|
||
|
do_permutation(right##1d, left##1d, 2, 0x33333333); \
|
||
|
do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
|
||
|
do_permutation(right##2d, left##2d, 2, 0x33333333); \
|
||
|
do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
|
||
|
\
|
||
|
movabs $0x3f3f3f3f3f3f3f3f, RT3; \
|
||
|
\
|
||
|
movl left##0d, RW0d; \
|
||
|
roll $1, right##0d; \
|
||
|
xorl right##0d, RW0d; \
|
||
|
andl $0xaaaaaaaa, RW0d; \
|
||
|
xorl RW0d, left##0d; \
|
||
|
xorl RW0d, right##0d; \
|
||
|
roll $1, left##0d; \
|
||
|
expand_to_64bits(right##0, RT3); \
|
||
|
expand_to_64bits(left##0, RT3); \
|
||
|
movl left##1d, RW1d; \
|
||
|
roll $1, right##1d; \
|
||
|
xorl right##1d, RW1d; \
|
||
|
andl $0xaaaaaaaa, RW1d; \
|
||
|
xorl RW1d, left##1d; \
|
||
|
xorl RW1d, right##1d; \
|
||
|
roll $1, left##1d; \
|
||
|
expand_to_64bits(right##1, RT3); \
|
||
|
expand_to_64bits(left##1, RT3); \
|
||
|
movl left##2d, RW2d; \
|
||
|
roll $1, right##2d; \
|
||
|
xorl right##2d, RW2d; \
|
||
|
andl $0xaaaaaaaa, RW2d; \
|
||
|
xorl RW2d, left##2d; \
|
||
|
xorl RW2d, right##2d; \
|
||
|
roll $1, left##2d; \
|
||
|
expand_to_64bits(right##2, RT3); \
|
||
|
expand_to_64bits(left##2, RT3);
|
||
|
|
||
|
#define final_permutation3(left, right) \
|
||
|
compress_to_64bits(right##0); \
|
||
|
compress_to_64bits(left##0); \
|
||
|
movl right##0d, RW0d; \
|
||
|
rorl $1, left##0d; \
|
||
|
xorl left##0d, RW0d; \
|
||
|
andl $0xaaaaaaaa, RW0d; \
|
||
|
xorl RW0d, right##0d; \
|
||
|
xorl RW0d, left##0d; \
|
||
|
rorl $1, right##0d; \
|
||
|
compress_to_64bits(right##1); \
|
||
|
compress_to_64bits(left##1); \
|
||
|
movl right##1d, RW1d; \
|
||
|
rorl $1, left##1d; \
|
||
|
xorl left##1d, RW1d; \
|
||
|
andl $0xaaaaaaaa, RW1d; \
|
||
|
xorl RW1d, right##1d; \
|
||
|
xorl RW1d, left##1d; \
|
||
|
rorl $1, right##1d; \
|
||
|
compress_to_64bits(right##2); \
|
||
|
compress_to_64bits(left##2); \
|
||
|
movl right##2d, RW2d; \
|
||
|
rorl $1, left##2d; \
|
||
|
xorl left##2d, RW2d; \
|
||
|
andl $0xaaaaaaaa, RW2d; \
|
||
|
xorl RW2d, right##2d; \
|
||
|
xorl RW2d, left##2d; \
|
||
|
rorl $1, right##2d; \
|
||
|
\
|
||
|
do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
|
||
|
do_permutation(right##0d, left##0d, 2, 0x33333333); \
|
||
|
do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
|
||
|
do_permutation(right##1d, left##1d, 2, 0x33333333); \
|
||
|
do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
|
||
|
do_permutation(right##2d, left##2d, 2, 0x33333333); \
|
||
|
\
|
||
|
do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
|
||
|
do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
|
||
|
do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
|
||
|
do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
|
||
|
do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
|
||
|
do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
|
||
|
|
||
|
#define round3(n, from, to, load_next_key, do_movq) \
|
||
|
xorq from##0, RW0; \
|
||
|
movzbl RW0bl, RT3d; \
|
||
|
movzbl RW0bh, RT1d; \
|
||
|
shrq $16, RW0; \
|
||
|
xorq s8(, RT3, 8), to##0; \
|
||
|
xorq s6(, RT1, 8), to##0; \
|
||
|
movzbl RW0bl, RT3d; \
|
||
|
movzbl RW0bh, RT1d; \
|
||
|
shrq $16, RW0; \
|
||
|
xorq s4(, RT3, 8), to##0; \
|
||
|
xorq s2(, RT1, 8), to##0; \
|
||
|
movzbl RW0bl, RT3d; \
|
||
|
movzbl RW0bh, RT1d; \
|
||
|
shrl $16, RW0d; \
|
||
|
xorq s7(, RT3, 8), to##0; \
|
||
|
xorq s5(, RT1, 8), to##0; \
|
||
|
movzbl RW0bl, RT3d; \
|
||
|
movzbl RW0bh, RT1d; \
|
||
|
load_next_key(n, RW0); \
|
||
|
xorq s3(, RT3, 8), to##0; \
|
||
|
xorq s1(, RT1, 8), to##0; \
|
||
|
xorq from##1, RW1; \
|
||
|
movzbl RW1bl, RT3d; \
|
||
|
movzbl RW1bh, RT1d; \
|
||
|
shrq $16, RW1; \
|
||
|
xorq s8(, RT3, 8), to##1; \
|
||
|
xorq s6(, RT1, 8), to##1; \
|
||
|
movzbl RW1bl, RT3d; \
|
||
|
movzbl RW1bh, RT1d; \
|
||
|
shrq $16, RW1; \
|
||
|
xorq s4(, RT3, 8), to##1; \
|
||
|
xorq s2(, RT1, 8), to##1; \
|
||
|
movzbl RW1bl, RT3d; \
|
||
|
movzbl RW1bh, RT1d; \
|
||
|
shrl $16, RW1d; \
|
||
|
xorq s7(, RT3, 8), to##1; \
|
||
|
xorq s5(, RT1, 8), to##1; \
|
||
|
movzbl RW1bl, RT3d; \
|
||
|
movzbl RW1bh, RT1d; \
|
||
|
do_movq(RW0, RW1); \
|
||
|
xorq s3(, RT3, 8), to##1; \
|
||
|
xorq s1(, RT1, 8), to##1; \
|
||
|
xorq from##2, RW2; \
|
||
|
movzbl RW2bl, RT3d; \
|
||
|
movzbl RW2bh, RT1d; \
|
||
|
shrq $16, RW2; \
|
||
|
xorq s8(, RT3, 8), to##2; \
|
||
|
xorq s6(, RT1, 8), to##2; \
|
||
|
movzbl RW2bl, RT3d; \
|
||
|
movzbl RW2bh, RT1d; \
|
||
|
shrq $16, RW2; \
|
||
|
xorq s4(, RT3, 8), to##2; \
|
||
|
xorq s2(, RT1, 8), to##2; \
|
||
|
movzbl RW2bl, RT3d; \
|
||
|
movzbl RW2bh, RT1d; \
|
||
|
shrl $16, RW2d; \
|
||
|
xorq s7(, RT3, 8), to##2; \
|
||
|
xorq s5(, RT1, 8), to##2; \
|
||
|
movzbl RW2bl, RT3d; \
|
||
|
movzbl RW2bh, RT1d; \
|
||
|
do_movq(RW0, RW2); \
|
||
|
xorq s3(, RT3, 8), to##2; \
|
||
|
xorq s1(, RT1, 8), to##2;
|
||
|
|
||
|
#define __movq(src, dst) \
|
||
|
movq src, dst;
|
||
|
|
||
|
ENTRY(des3_ede_x86_64_crypt_blk_3way)
|
||
|
/* input:
|
||
|
* %rdi: ctx, round keys
|
||
|
* %rsi: dst (3 blocks)
|
||
|
* %rdx: src (3 blocks)
|
||
|
*/
|
||
|
|
||
|
pushq %rbp;
|
||
|
pushq %rbx;
|
||
|
pushq %r12;
|
||
|
pushq %r13;
|
||
|
pushq %r14;
|
||
|
pushq %r15;
|
||
|
|
||
|
/* load input */
|
||
|
movl 0 * 4(%rdx), RL0d;
|
||
|
movl 1 * 4(%rdx), RR0d;
|
||
|
movl 2 * 4(%rdx), RL1d;
|
||
|
movl 3 * 4(%rdx), RR1d;
|
||
|
movl 4 * 4(%rdx), RL2d;
|
||
|
movl 5 * 4(%rdx), RR2d;
|
||
|
|
||
|
bswapl RL0d;
|
||
|
bswapl RR0d;
|
||
|
bswapl RL1d;
|
||
|
bswapl RR1d;
|
||
|
bswapl RL2d;
|
||
|
bswapl RR2d;
|
||
|
|
||
|
initial_permutation3(RL, RR);
|
||
|
|
||
|
movq 0(CTX), RW0;
|
||
|
movq RW0, RW1;
|
||
|
movq RW0, RW2;
|
||
|
|
||
|
round3(0, RR, RL, load_next_key, __movq);
|
||
|
round3(1, RL, RR, load_next_key, __movq);
|
||
|
round3(2, RR, RL, load_next_key, __movq);
|
||
|
round3(3, RL, RR, load_next_key, __movq);
|
||
|
round3(4, RR, RL, load_next_key, __movq);
|
||
|
round3(5, RL, RR, load_next_key, __movq);
|
||
|
round3(6, RR, RL, load_next_key, __movq);
|
||
|
round3(7, RL, RR, load_next_key, __movq);
|
||
|
round3(8, RR, RL, load_next_key, __movq);
|
||
|
round3(9, RL, RR, load_next_key, __movq);
|
||
|
round3(10, RR, RL, load_next_key, __movq);
|
||
|
round3(11, RL, RR, load_next_key, __movq);
|
||
|
round3(12, RR, RL, load_next_key, __movq);
|
||
|
round3(13, RL, RR, load_next_key, __movq);
|
||
|
round3(14, RR, RL, load_next_key, __movq);
|
||
|
round3(15, RL, RR, load_next_key, __movq);
|
||
|
|
||
|
round3(16+0, RL, RR, load_next_key, __movq);
|
||
|
round3(16+1, RR, RL, load_next_key, __movq);
|
||
|
round3(16+2, RL, RR, load_next_key, __movq);
|
||
|
round3(16+3, RR, RL, load_next_key, __movq);
|
||
|
round3(16+4, RL, RR, load_next_key, __movq);
|
||
|
round3(16+5, RR, RL, load_next_key, __movq);
|
||
|
round3(16+6, RL, RR, load_next_key, __movq);
|
||
|
round3(16+7, RR, RL, load_next_key, __movq);
|
||
|
round3(16+8, RL, RR, load_next_key, __movq);
|
||
|
round3(16+9, RR, RL, load_next_key, __movq);
|
||
|
round3(16+10, RL, RR, load_next_key, __movq);
|
||
|
round3(16+11, RR, RL, load_next_key, __movq);
|
||
|
round3(16+12, RL, RR, load_next_key, __movq);
|
||
|
round3(16+13, RR, RL, load_next_key, __movq);
|
||
|
round3(16+14, RL, RR, load_next_key, __movq);
|
||
|
round3(16+15, RR, RL, load_next_key, __movq);
|
||
|
|
||
|
round3(32+0, RR, RL, load_next_key, __movq);
|
||
|
round3(32+1, RL, RR, load_next_key, __movq);
|
||
|
round3(32+2, RR, RL, load_next_key, __movq);
|
||
|
round3(32+3, RL, RR, load_next_key, __movq);
|
||
|
round3(32+4, RR, RL, load_next_key, __movq);
|
||
|
round3(32+5, RL, RR, load_next_key, __movq);
|
||
|
round3(32+6, RR, RL, load_next_key, __movq);
|
||
|
round3(32+7, RL, RR, load_next_key, __movq);
|
||
|
round3(32+8, RR, RL, load_next_key, __movq);
|
||
|
round3(32+9, RL, RR, load_next_key, __movq);
|
||
|
round3(32+10, RR, RL, load_next_key, __movq);
|
||
|
round3(32+11, RL, RR, load_next_key, __movq);
|
||
|
round3(32+12, RR, RL, load_next_key, __movq);
|
||
|
round3(32+13, RL, RR, load_next_key, __movq);
|
||
|
round3(32+14, RR, RL, load_next_key, __movq);
|
||
|
round3(32+15, RL, RR, dummy2, dummy2);
|
||
|
|
||
|
final_permutation3(RR, RL);
|
||
|
|
||
|
bswapl RR0d;
|
||
|
bswapl RL0d;
|
||
|
bswapl RR1d;
|
||
|
bswapl RL1d;
|
||
|
bswapl RR2d;
|
||
|
bswapl RL2d;
|
||
|
|
||
|
movl RR0d, 0 * 4(%rsi);
|
||
|
movl RL0d, 1 * 4(%rsi);
|
||
|
movl RR1d, 2 * 4(%rsi);
|
||
|
movl RL1d, 3 * 4(%rsi);
|
||
|
movl RR2d, 4 * 4(%rsi);
|
||
|
movl RL2d, 5 * 4(%rsi);
|
||
|
|
||
|
popq %r15;
|
||
|
popq %r14;
|
||
|
popq %r13;
|
||
|
popq %r12;
|
||
|
popq %rbx;
|
||
|
popq %rbp;
|
||
|
|
||
|
ret;
|
||
|
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
|
||
|
|
||
|
.data
|
||
|
.align 16
|
||
|
.L_s1:
|
||
|
.quad 0x0010100001010400, 0x0000000000000000
|
||
|
.quad 0x0000100000010000, 0x0010100001010404
|
||
|
.quad 0x0010100001010004, 0x0000100000010404
|
||
|
.quad 0x0000000000000004, 0x0000100000010000
|
||
|
.quad 0x0000000000000400, 0x0010100001010400
|
||
|
.quad 0x0010100001010404, 0x0000000000000400
|
||
|
.quad 0x0010000001000404, 0x0010100001010004
|
||
|
.quad 0x0010000001000000, 0x0000000000000004
|
||
|
.quad 0x0000000000000404, 0x0010000001000400
|
||
|
.quad 0x0010000001000400, 0x0000100000010400
|
||
|
.quad 0x0000100000010400, 0x0010100001010000
|
||
|
.quad 0x0010100001010000, 0x0010000001000404
|
||
|
.quad 0x0000100000010004, 0x0010000001000004
|
||
|
.quad 0x0010000001000004, 0x0000100000010004
|
||
|
.quad 0x0000000000000000, 0x0000000000000404
|
||
|
.quad 0x0000100000010404, 0x0010000001000000
|
||
|
.quad 0x0000100000010000, 0x0010100001010404
|
||
|
.quad 0x0000000000000004, 0x0010100001010000
|
||
|
.quad 0x0010100001010400, 0x0010000001000000
|
||
|
.quad 0x0010000001000000, 0x0000000000000400
|
||
|
.quad 0x0010100001010004, 0x0000100000010000
|
||
|
.quad 0x0000100000010400, 0x0010000001000004
|
||
|
.quad 0x0000000000000400, 0x0000000000000004
|
||
|
.quad 0x0010000001000404, 0x0000100000010404
|
||
|
.quad 0x0010100001010404, 0x0000100000010004
|
||
|
.quad 0x0010100001010000, 0x0010000001000404
|
||
|
.quad 0x0010000001000004, 0x0000000000000404
|
||
|
.quad 0x0000100000010404, 0x0010100001010400
|
||
|
.quad 0x0000000000000404, 0x0010000001000400
|
||
|
.quad 0x0010000001000400, 0x0000000000000000
|
||
|
.quad 0x0000100000010004, 0x0000100000010400
|
||
|
.quad 0x0000000000000000, 0x0010100001010004
|
||
|
.L_s2:
|
||
|
.quad 0x0801080200100020, 0x0800080000000000
|
||
|
.quad 0x0000080000000000, 0x0001080200100020
|
||
|
.quad 0x0001000000100000, 0x0000000200000020
|
||
|
.quad 0x0801000200100020, 0x0800080200000020
|
||
|
.quad 0x0800000200000020, 0x0801080200100020
|
||
|
.quad 0x0801080000100000, 0x0800000000000000
|
||
|
.quad 0x0800080000000000, 0x0001000000100000
|
||
|
.quad 0x0000000200000020, 0x0801000200100020
|
||
|
.quad 0x0001080000100000, 0x0001000200100020
|
||
|
.quad 0x0800080200000020, 0x0000000000000000
|
||
|
.quad 0x0800000000000000, 0x0000080000000000
|
||
|
.quad 0x0001080200100020, 0x0801000000100000
|
||
|
.quad 0x0001000200100020, 0x0800000200000020
|
||
|
.quad 0x0000000000000000, 0x0001080000100000
|
||
|
.quad 0x0000080200000020, 0x0801080000100000
|
||
|
.quad 0x0801000000100000, 0x0000080200000020
|
||
|
.quad 0x0000000000000000, 0x0001080200100020
|
||
|
.quad 0x0801000200100020, 0x0001000000100000
|
||
|
.quad 0x0800080200000020, 0x0801000000100000
|
||
|
.quad 0x0801080000100000, 0x0000080000000000
|
||
|
.quad 0x0801000000100000, 0x0800080000000000
|
||
|
.quad 0x0000000200000020, 0x0801080200100020
|
||
|
.quad 0x0001080200100020, 0x0000000200000020
|
||
|
.quad 0x0000080000000000, 0x0800000000000000
|
||
|
.quad 0x0000080200000020, 0x0801080000100000
|
||
|
.quad 0x0001000000100000, 0x0800000200000020
|
||
|
.quad 0x0001000200100020, 0x0800080200000020
|
||
|
.quad 0x0800000200000020, 0x0001000200100020
|
||
|
.quad 0x0001080000100000, 0x0000000000000000
|
||
|
.quad 0x0800080000000000, 0x0000080200000020
|
||
|
.quad 0x0800000000000000, 0x0801000200100020
|
||
|
.quad 0x0801080200100020, 0x0001080000100000
|
||
|
.L_s3:
|
||
|
.quad 0x0000002000000208, 0x0000202008020200
|
||
|
.quad 0x0000000000000000, 0x0000200008020008
|
||
|
.quad 0x0000002008000200, 0x0000000000000000
|
||
|
.quad 0x0000202000020208, 0x0000002008000200
|
||
|
.quad 0x0000200000020008, 0x0000000008000008
|
||
|
.quad 0x0000000008000008, 0x0000200000020000
|
||
|
.quad 0x0000202008020208, 0x0000200000020008
|
||
|
.quad 0x0000200008020000, 0x0000002000000208
|
||
|
.quad 0x0000000008000000, 0x0000000000000008
|
||
|
.quad 0x0000202008020200, 0x0000002000000200
|
||
|
.quad 0x0000202000020200, 0x0000200008020000
|
||
|
.quad 0x0000200008020008, 0x0000202000020208
|
||
|
.quad 0x0000002008000208, 0x0000202000020200
|
||
|
.quad 0x0000200000020000, 0x0000002008000208
|
||
|
.quad 0x0000000000000008, 0x0000202008020208
|
||
|
.quad 0x0000002000000200, 0x0000000008000000
|
||
|
.quad 0x0000202008020200, 0x0000000008000000
|
||
|
.quad 0x0000200000020008, 0x0000002000000208
|
||
|
.quad 0x0000200000020000, 0x0000202008020200
|
||
|
.quad 0x0000002008000200, 0x0000000000000000
|
||
|
.quad 0x0000002000000200, 0x0000200000020008
|
||
|
.quad 0x0000202008020208, 0x0000002008000200
|
||
|
.quad 0x0000000008000008, 0x0000002000000200
|
||
|
.quad 0x0000000000000000, 0x0000200008020008
|
||
|
.quad 0x0000002008000208, 0x0000200000020000
|
||
|
.quad 0x0000000008000000, 0x0000202008020208
|
||
|
.quad 0x0000000000000008, 0x0000202000020208
|
||
|
.quad 0x0000202000020200, 0x0000000008000008
|
||
|
.quad 0x0000200008020000, 0x0000002008000208
|
||
|
.quad 0x0000002000000208, 0x0000200008020000
|
||
|
.quad 0x0000202000020208, 0x0000000000000008
|
||
|
.quad 0x0000200008020008, 0x0000202000020200
|
||
|
.L_s4:
|
||
|
.quad 0x1008020000002001, 0x1000020800002001
|
||
|
.quad 0x1000020800002001, 0x0000000800000000
|
||
|
.quad 0x0008020800002000, 0x1008000800000001
|
||
|
.quad 0x1008000000000001, 0x1000020000002001
|
||
|
.quad 0x0000000000000000, 0x0008020000002000
|
||
|
.quad 0x0008020000002000, 0x1008020800002001
|
||
|
.quad 0x1000000800000001, 0x0000000000000000
|
||
|
.quad 0x0008000800000000, 0x1008000000000001
|
||
|
.quad 0x1000000000000001, 0x0000020000002000
|
||
|
.quad 0x0008000000000000, 0x1008020000002001
|
||
|
.quad 0x0000000800000000, 0x0008000000000000
|
||
|
.quad 0x1000020000002001, 0x0000020800002000
|
||
|
.quad 0x1008000800000001, 0x1000000000000001
|
||
|
.quad 0x0000020800002000, 0x0008000800000000
|
||
|
.quad 0x0000020000002000, 0x0008020800002000
|
||
|
.quad 0x1008020800002001, 0x1000000800000001
|
||
|
.quad 0x0008000800000000, 0x1008000000000001
|
||
|
.quad 0x0008020000002000, 0x1008020800002001
|
||
|
.quad 0x1000000800000001, 0x0000000000000000
|
||
|
.quad 0x0000000000000000, 0x0008020000002000
|
||
|
.quad 0x0000020800002000, 0x0008000800000000
|
||
|
.quad 0x1008000800000001, 0x1000000000000001
|
||
|
.quad 0x1008020000002001, 0x1000020800002001
|
||
|
.quad 0x1000020800002001, 0x0000000800000000
|
||
|
.quad 0x1008020800002001, 0x1000000800000001
|
||
|
.quad 0x1000000000000001, 0x0000020000002000
|
||
|
.quad 0x1008000000000001, 0x1000020000002001
|
||
|
.quad 0x0008020800002000, 0x1008000800000001
|
||
|
.quad 0x1000020000002001, 0x0000020800002000
|
||
|
.quad 0x0008000000000000, 0x1008020000002001
|
||
|
.quad 0x0000000800000000, 0x0008000000000000
|
||
|
.quad 0x0000020000002000, 0x0008020800002000
|
||
|
.L_s5:
|
||
|
.quad 0x0000001000000100, 0x0020001002080100
|
||
|
.quad 0x0020000002080000, 0x0420001002000100
|
||
|
.quad 0x0000000000080000, 0x0000001000000100
|
||
|
.quad 0x0400000000000000, 0x0020000002080000
|
||
|
.quad 0x0400001000080100, 0x0000000000080000
|
||
|
.quad 0x0020001002000100, 0x0400001000080100
|
||
|
.quad 0x0420001002000100, 0x0420000002080000
|
||
|
.quad 0x0000001000080100, 0x0400000000000000
|
||
|
.quad 0x0020000002000000, 0x0400000000080000
|
||
|
.quad 0x0400000000080000, 0x0000000000000000
|
||
|
.quad 0x0400001000000100, 0x0420001002080100
|
||
|
.quad 0x0420001002080100, 0x0020001002000100
|
||
|
.quad 0x0420000002080000, 0x0400001000000100
|
||
|
.quad 0x0000000000000000, 0x0420000002000000
|
||
|
.quad 0x0020001002080100, 0x0020000002000000
|
||
|
.quad 0x0420000002000000, 0x0000001000080100
|
||
|
.quad 0x0000000000080000, 0x0420001002000100
|
||
|
.quad 0x0000001000000100, 0x0020000002000000
|
||
|
.quad 0x0400000000000000, 0x0020000002080000
|
||
|
.quad 0x0420001002000100, 0x0400001000080100
|
||
|
.quad 0x0020001002000100, 0x0400000000000000
|
||
|
.quad 0x0420000002080000, 0x0020001002080100
|
||
|
.quad 0x0400001000080100, 0x0000001000000100
|
||
|
.quad 0x0020000002000000, 0x0420000002080000
|
||
|
.quad 0x0420001002080100, 0x0000001000080100
|
||
|
.quad 0x0420000002000000, 0x0420001002080100
|
||
|
.quad 0x0020000002080000, 0x0000000000000000
|
||
|
.quad 0x0400000000080000, 0x0420000002000000
|
||
|
.quad 0x0000001000080100, 0x0020001002000100
|
||
|
.quad 0x0400001000000100, 0x0000000000080000
|
||
|
.quad 0x0000000000000000, 0x0400000000080000
|
||
|
.quad 0x0020001002080100, 0x0400001000000100
|
||
|
.L_s6:
|
||
|
.quad 0x0200000120000010, 0x0204000020000000
|
||
|
.quad 0x0000040000000000, 0x0204040120000010
|
||
|
.quad 0x0204000020000000, 0x0000000100000010
|
||
|
.quad 0x0204040120000010, 0x0004000000000000
|
||
|
.quad 0x0200040020000000, 0x0004040100000010
|
||
|
.quad 0x0004000000000000, 0x0200000120000010
|
||
|
.quad 0x0004000100000010, 0x0200040020000000
|
||
|
.quad 0x0200000020000000, 0x0000040100000010
|
||
|
.quad 0x0000000000000000, 0x0004000100000010
|
||
|
.quad 0x0200040120000010, 0x0000040000000000
|
||
|
.quad 0x0004040000000000, 0x0200040120000010
|
||
|
.quad 0x0000000100000010, 0x0204000120000010
|
||
|
.quad 0x0204000120000010, 0x0000000000000000
|
||
|
.quad 0x0004040100000010, 0x0204040020000000
|
||
|
.quad 0x0000040100000010, 0x0004040000000000
|
||
|
.quad 0x0204040020000000, 0x0200000020000000
|
||
|
.quad 0x0200040020000000, 0x0000000100000010
|
||
|
.quad 0x0204000120000010, 0x0004040000000000
|
||
|
.quad 0x0204040120000010, 0x0004000000000000
|
||
|
.quad 0x0000040100000010, 0x0200000120000010
|
||
|
.quad 0x0004000000000000, 0x0200040020000000
|
||
|
.quad 0x0200000020000000, 0x0000040100000010
|
||
|
.quad 0x0200000120000010, 0x0204040120000010
|
||
|
.quad 0x0004040000000000, 0x0204000020000000
|
||
|
.quad 0x0004040100000010, 0x0204040020000000
|
||
|
.quad 0x0000000000000000, 0x0204000120000010
|
||
|
.quad 0x0000000100000010, 0x0000040000000000
|
||
|
.quad 0x0204000020000000, 0x0004040100000010
|
||
|
.quad 0x0000040000000000, 0x0004000100000010
|
||
|
.quad 0x0200040120000010, 0x0000000000000000
|
||
|
.quad 0x0204040020000000, 0x0200000020000000
|
||
|
.quad 0x0004000100000010, 0x0200040120000010
|
||
|
.L_s7:
|
||
|
.quad 0x0002000000200000, 0x2002000004200002
|
||
|
.quad 0x2000000004000802, 0x0000000000000000
|
||
|
.quad 0x0000000000000800, 0x2000000004000802
|
||
|
.quad 0x2002000000200802, 0x0002000004200800
|
||
|
.quad 0x2002000004200802, 0x0002000000200000
|
||
|
.quad 0x0000000000000000, 0x2000000004000002
|
||
|
.quad 0x2000000000000002, 0x0000000004000000
|
||
|
.quad 0x2002000004200002, 0x2000000000000802
|
||
|
.quad 0x0000000004000800, 0x2002000000200802
|
||
|
.quad 0x2002000000200002, 0x0000000004000800
|
||
|
.quad 0x2000000004000002, 0x0002000004200000
|
||
|
.quad 0x0002000004200800, 0x2002000000200002
|
||
|
.quad 0x0002000004200000, 0x0000000000000800
|
||
|
.quad 0x2000000000000802, 0x2002000004200802
|
||
|
.quad 0x0002000000200800, 0x2000000000000002
|
||
|
.quad 0x0000000004000000, 0x0002000000200800
|
||
|
.quad 0x0000000004000000, 0x0002000000200800
|
||
|
.quad 0x0002000000200000, 0x2000000004000802
|
||
|
.quad 0x2000000004000802, 0x2002000004200002
|
||
|
.quad 0x2002000004200002, 0x2000000000000002
|
||
|
.quad 0x2002000000200002, 0x0000000004000000
|
||
|
.quad 0x0000000004000800, 0x0002000000200000
|
||
|
.quad 0x0002000004200800, 0x2000000000000802
|
||
|
.quad 0x2002000000200802, 0x0002000004200800
|
||
|
.quad 0x2000000000000802, 0x2000000004000002
|
||
|
.quad 0x2002000004200802, 0x0002000004200000
|
||
|
.quad 0x0002000000200800, 0x0000000000000000
|
||
|
.quad 0x2000000000000002, 0x2002000004200802
|
||
|
.quad 0x0000000000000000, 0x2002000000200802
|
||
|
.quad 0x0002000004200000, 0x0000000000000800
|
||
|
.quad 0x2000000004000002, 0x0000000004000800
|
||
|
.quad 0x0000000000000800, 0x2002000000200002
|
||
|
.L_s8:
|
||
|
.quad 0x0100010410001000, 0x0000010000001000
|
||
|
.quad 0x0000000000040000, 0x0100010410041000
|
||
|
.quad 0x0100000010000000, 0x0100010410001000
|
||
|
.quad 0x0000000400000000, 0x0100000010000000
|
||
|
.quad 0x0000000400040000, 0x0100000010040000
|
||
|
.quad 0x0100010410041000, 0x0000010000041000
|
||
|
.quad 0x0100010010041000, 0x0000010400041000
|
||
|
.quad 0x0000010000001000, 0x0000000400000000
|
||
|
.quad 0x0100000010040000, 0x0100000410000000
|
||
|
.quad 0x0100010010001000, 0x0000010400001000
|
||
|
.quad 0x0000010000041000, 0x0000000400040000
|
||
|
.quad 0x0100000410040000, 0x0100010010041000
|
||
|
.quad 0x0000010400001000, 0x0000000000000000
|
||
|
.quad 0x0000000000000000, 0x0100000410040000
|
||
|
.quad 0x0100000410000000, 0x0100010010001000
|
||
|
.quad 0x0000010400041000, 0x0000000000040000
|
||
|
.quad 0x0000010400041000, 0x0000000000040000
|
||
|
.quad 0x0100010010041000, 0x0000010000001000
|
||
|
.quad 0x0000000400000000, 0x0100000410040000
|
||
|
.quad 0x0000010000001000, 0x0000010400041000
|
||
|
.quad 0x0100010010001000, 0x0000000400000000
|
||
|
.quad 0x0100000410000000, 0x0100000010040000
|
||
|
.quad 0x0100000410040000, 0x0100000010000000
|
||
|
.quad 0x0000000000040000, 0x0100010410001000
|
||
|
.quad 0x0000000000000000, 0x0100010410041000
|
||
|
.quad 0x0000000400040000, 0x0100000410000000
|
||
|
.quad 0x0100000010040000, 0x0100010010001000
|
||
|
.quad 0x0100010410001000, 0x0000000000000000
|
||
|
.quad 0x0100010410041000, 0x0000010000041000
|
||
|
.quad 0x0000010000041000, 0x0000010400001000
|
||
|
.quad 0x0000010400001000, 0x0000000400040000
|
||
|
.quad 0x0100000010000000, 0x0100010010041000
|