mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-13 05:16:15 +07:00
c9a3ff8f22
Convert salsa20-asm from the deprecated "blkcipher" API to the "skcipher" API, in the process fixing it up to use the generic helpers. This allows removing the salsa20_keysetup() and salsa20_ivsetup() assembly functions, which aren't performance critical; the C versions do just fine. This also fixes the same bug that salsa20-generic had, where the state array was being maintained directly in the transform context rather than on the stack or in the request context. Thus, if multiple threads used the same Salsa20 transform concurrently they produced the wrong results. Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
939 lines
16 KiB
ArmAsm
939 lines
16 KiB
ArmAsm
# Derived from:
|
|
# salsa20_pm.s version 20051229
|
|
# D. J. Bernstein
|
|
# Public domain.
|
|
|
|
#include <linux/linkage.h>
|
|
|
|
.text
|
|
|
|
# enter salsa20_encrypt_bytes
|
|
ENTRY(salsa20_encrypt_bytes)
|
|
mov %esp,%eax
|
|
and $31,%eax
|
|
add $256,%eax
|
|
sub %eax,%esp
|
|
# eax_stack = eax
|
|
movl %eax,80(%esp)
|
|
# ebx_stack = ebx
|
|
movl %ebx,84(%esp)
|
|
# esi_stack = esi
|
|
movl %esi,88(%esp)
|
|
# edi_stack = edi
|
|
movl %edi,92(%esp)
|
|
# ebp_stack = ebp
|
|
movl %ebp,96(%esp)
|
|
# x = arg1
|
|
movl 4(%esp,%eax),%edx
|
|
# m = arg2
|
|
movl 8(%esp,%eax),%esi
|
|
# out = arg3
|
|
movl 12(%esp,%eax),%edi
|
|
# bytes = arg4
|
|
movl 16(%esp,%eax),%ebx
|
|
# bytes -= 0
|
|
sub $0,%ebx
|
|
# goto done if unsigned<=
|
|
jbe ._done
|
|
._start:
|
|
# in0 = *(uint32 *) (x + 0)
|
|
movl 0(%edx),%eax
|
|
# in1 = *(uint32 *) (x + 4)
|
|
movl 4(%edx),%ecx
|
|
# in2 = *(uint32 *) (x + 8)
|
|
movl 8(%edx),%ebp
|
|
# j0 = in0
|
|
movl %eax,164(%esp)
|
|
# in3 = *(uint32 *) (x + 12)
|
|
movl 12(%edx),%eax
|
|
# j1 = in1
|
|
movl %ecx,168(%esp)
|
|
# in4 = *(uint32 *) (x + 16)
|
|
movl 16(%edx),%ecx
|
|
# j2 = in2
|
|
movl %ebp,172(%esp)
|
|
# in5 = *(uint32 *) (x + 20)
|
|
movl 20(%edx),%ebp
|
|
# j3 = in3
|
|
movl %eax,176(%esp)
|
|
# in6 = *(uint32 *) (x + 24)
|
|
movl 24(%edx),%eax
|
|
# j4 = in4
|
|
movl %ecx,180(%esp)
|
|
# in7 = *(uint32 *) (x + 28)
|
|
movl 28(%edx),%ecx
|
|
# j5 = in5
|
|
movl %ebp,184(%esp)
|
|
# in8 = *(uint32 *) (x + 32)
|
|
movl 32(%edx),%ebp
|
|
# j6 = in6
|
|
movl %eax,188(%esp)
|
|
# in9 = *(uint32 *) (x + 36)
|
|
movl 36(%edx),%eax
|
|
# j7 = in7
|
|
movl %ecx,192(%esp)
|
|
# in10 = *(uint32 *) (x + 40)
|
|
movl 40(%edx),%ecx
|
|
# j8 = in8
|
|
movl %ebp,196(%esp)
|
|
# in11 = *(uint32 *) (x + 44)
|
|
movl 44(%edx),%ebp
|
|
# j9 = in9
|
|
movl %eax,200(%esp)
|
|
# in12 = *(uint32 *) (x + 48)
|
|
movl 48(%edx),%eax
|
|
# j10 = in10
|
|
movl %ecx,204(%esp)
|
|
# in13 = *(uint32 *) (x + 52)
|
|
movl 52(%edx),%ecx
|
|
# j11 = in11
|
|
movl %ebp,208(%esp)
|
|
# in14 = *(uint32 *) (x + 56)
|
|
movl 56(%edx),%ebp
|
|
# j12 = in12
|
|
movl %eax,212(%esp)
|
|
# in15 = *(uint32 *) (x + 60)
|
|
movl 60(%edx),%eax
|
|
# j13 = in13
|
|
movl %ecx,216(%esp)
|
|
# j14 = in14
|
|
movl %ebp,220(%esp)
|
|
# j15 = in15
|
|
movl %eax,224(%esp)
|
|
# x_backup = x
|
|
movl %edx,64(%esp)
|
|
._bytesatleast1:
|
|
# bytes - 64
|
|
cmp $64,%ebx
|
|
# goto nocopy if unsigned>=
|
|
jae ._nocopy
|
|
# ctarget = out
|
|
movl %edi,228(%esp)
|
|
# out = &tmp
|
|
leal 0(%esp),%edi
|
|
# i = bytes
|
|
mov %ebx,%ecx
|
|
# while (i) { *out++ = *m++; --i }
|
|
rep movsb
|
|
# out = &tmp
|
|
leal 0(%esp),%edi
|
|
# m = &tmp
|
|
leal 0(%esp),%esi
|
|
._nocopy:
|
|
# out_backup = out
|
|
movl %edi,72(%esp)
|
|
# m_backup = m
|
|
movl %esi,68(%esp)
|
|
# bytes_backup = bytes
|
|
movl %ebx,76(%esp)
|
|
# in0 = j0
|
|
movl 164(%esp),%eax
|
|
# in1 = j1
|
|
movl 168(%esp),%ecx
|
|
# in2 = j2
|
|
movl 172(%esp),%edx
|
|
# in3 = j3
|
|
movl 176(%esp),%ebx
|
|
# x0 = in0
|
|
movl %eax,100(%esp)
|
|
# x1 = in1
|
|
movl %ecx,104(%esp)
|
|
# x2 = in2
|
|
movl %edx,108(%esp)
|
|
# x3 = in3
|
|
movl %ebx,112(%esp)
|
|
# in4 = j4
|
|
movl 180(%esp),%eax
|
|
# in5 = j5
|
|
movl 184(%esp),%ecx
|
|
# in6 = j6
|
|
movl 188(%esp),%edx
|
|
# in7 = j7
|
|
movl 192(%esp),%ebx
|
|
# x4 = in4
|
|
movl %eax,116(%esp)
|
|
# x5 = in5
|
|
movl %ecx,120(%esp)
|
|
# x6 = in6
|
|
movl %edx,124(%esp)
|
|
# x7 = in7
|
|
movl %ebx,128(%esp)
|
|
# in8 = j8
|
|
movl 196(%esp),%eax
|
|
# in9 = j9
|
|
movl 200(%esp),%ecx
|
|
# in10 = j10
|
|
movl 204(%esp),%edx
|
|
# in11 = j11
|
|
movl 208(%esp),%ebx
|
|
# x8 = in8
|
|
movl %eax,132(%esp)
|
|
# x9 = in9
|
|
movl %ecx,136(%esp)
|
|
# x10 = in10
|
|
movl %edx,140(%esp)
|
|
# x11 = in11
|
|
movl %ebx,144(%esp)
|
|
# in12 = j12
|
|
movl 212(%esp),%eax
|
|
# in13 = j13
|
|
movl 216(%esp),%ecx
|
|
# in14 = j14
|
|
movl 220(%esp),%edx
|
|
# in15 = j15
|
|
movl 224(%esp),%ebx
|
|
# x12 = in12
|
|
movl %eax,148(%esp)
|
|
# x13 = in13
|
|
movl %ecx,152(%esp)
|
|
# x14 = in14
|
|
movl %edx,156(%esp)
|
|
# x15 = in15
|
|
movl %ebx,160(%esp)
|
|
# i = 20
|
|
mov $20,%ebp
|
|
# p = x0
|
|
movl 100(%esp),%eax
|
|
# s = x5
|
|
movl 120(%esp),%ecx
|
|
# t = x10
|
|
movl 140(%esp),%edx
|
|
# w = x15
|
|
movl 160(%esp),%ebx
|
|
._mainloop:
|
|
# x0 = p
|
|
movl %eax,100(%esp)
|
|
# x10 = t
|
|
movl %edx,140(%esp)
|
|
# p += x12
|
|
addl 148(%esp),%eax
|
|
# x5 = s
|
|
movl %ecx,120(%esp)
|
|
# t += x6
|
|
addl 124(%esp),%edx
|
|
# x15 = w
|
|
movl %ebx,160(%esp)
|
|
# r = x1
|
|
movl 104(%esp),%esi
|
|
# r += s
|
|
add %ecx,%esi
|
|
# v = x11
|
|
movl 144(%esp),%edi
|
|
# v += w
|
|
add %ebx,%edi
|
|
# p <<<= 7
|
|
rol $7,%eax
|
|
# p ^= x4
|
|
xorl 116(%esp),%eax
|
|
# t <<<= 7
|
|
rol $7,%edx
|
|
# t ^= x14
|
|
xorl 156(%esp),%edx
|
|
# r <<<= 7
|
|
rol $7,%esi
|
|
# r ^= x9
|
|
xorl 136(%esp),%esi
|
|
# v <<<= 7
|
|
rol $7,%edi
|
|
# v ^= x3
|
|
xorl 112(%esp),%edi
|
|
# x4 = p
|
|
movl %eax,116(%esp)
|
|
# x14 = t
|
|
movl %edx,156(%esp)
|
|
# p += x0
|
|
addl 100(%esp),%eax
|
|
# x9 = r
|
|
movl %esi,136(%esp)
|
|
# t += x10
|
|
addl 140(%esp),%edx
|
|
# x3 = v
|
|
movl %edi,112(%esp)
|
|
# p <<<= 9
|
|
rol $9,%eax
|
|
# p ^= x8
|
|
xorl 132(%esp),%eax
|
|
# t <<<= 9
|
|
rol $9,%edx
|
|
# t ^= x2
|
|
xorl 108(%esp),%edx
|
|
# s += r
|
|
add %esi,%ecx
|
|
# s <<<= 9
|
|
rol $9,%ecx
|
|
# s ^= x13
|
|
xorl 152(%esp),%ecx
|
|
# w += v
|
|
add %edi,%ebx
|
|
# w <<<= 9
|
|
rol $9,%ebx
|
|
# w ^= x7
|
|
xorl 128(%esp),%ebx
|
|
# x8 = p
|
|
movl %eax,132(%esp)
|
|
# x2 = t
|
|
movl %edx,108(%esp)
|
|
# p += x4
|
|
addl 116(%esp),%eax
|
|
# x13 = s
|
|
movl %ecx,152(%esp)
|
|
# t += x14
|
|
addl 156(%esp),%edx
|
|
# x7 = w
|
|
movl %ebx,128(%esp)
|
|
# p <<<= 13
|
|
rol $13,%eax
|
|
# p ^= x12
|
|
xorl 148(%esp),%eax
|
|
# t <<<= 13
|
|
rol $13,%edx
|
|
# t ^= x6
|
|
xorl 124(%esp),%edx
|
|
# r += s
|
|
add %ecx,%esi
|
|
# r <<<= 13
|
|
rol $13,%esi
|
|
# r ^= x1
|
|
xorl 104(%esp),%esi
|
|
# v += w
|
|
add %ebx,%edi
|
|
# v <<<= 13
|
|
rol $13,%edi
|
|
# v ^= x11
|
|
xorl 144(%esp),%edi
|
|
# x12 = p
|
|
movl %eax,148(%esp)
|
|
# x6 = t
|
|
movl %edx,124(%esp)
|
|
# p += x8
|
|
addl 132(%esp),%eax
|
|
# x1 = r
|
|
movl %esi,104(%esp)
|
|
# t += x2
|
|
addl 108(%esp),%edx
|
|
# x11 = v
|
|
movl %edi,144(%esp)
|
|
# p <<<= 18
|
|
rol $18,%eax
|
|
# p ^= x0
|
|
xorl 100(%esp),%eax
|
|
# t <<<= 18
|
|
rol $18,%edx
|
|
# t ^= x10
|
|
xorl 140(%esp),%edx
|
|
# s += r
|
|
add %esi,%ecx
|
|
# s <<<= 18
|
|
rol $18,%ecx
|
|
# s ^= x5
|
|
xorl 120(%esp),%ecx
|
|
# w += v
|
|
add %edi,%ebx
|
|
# w <<<= 18
|
|
rol $18,%ebx
|
|
# w ^= x15
|
|
xorl 160(%esp),%ebx
|
|
# x0 = p
|
|
movl %eax,100(%esp)
|
|
# x10 = t
|
|
movl %edx,140(%esp)
|
|
# p += x3
|
|
addl 112(%esp),%eax
|
|
# p <<<= 7
|
|
rol $7,%eax
|
|
# x5 = s
|
|
movl %ecx,120(%esp)
|
|
# t += x9
|
|
addl 136(%esp),%edx
|
|
# x15 = w
|
|
movl %ebx,160(%esp)
|
|
# r = x4
|
|
movl 116(%esp),%esi
|
|
# r += s
|
|
add %ecx,%esi
|
|
# v = x14
|
|
movl 156(%esp),%edi
|
|
# v += w
|
|
add %ebx,%edi
|
|
# p ^= x1
|
|
xorl 104(%esp),%eax
|
|
# t <<<= 7
|
|
rol $7,%edx
|
|
# t ^= x11
|
|
xorl 144(%esp),%edx
|
|
# r <<<= 7
|
|
rol $7,%esi
|
|
# r ^= x6
|
|
xorl 124(%esp),%esi
|
|
# v <<<= 7
|
|
rol $7,%edi
|
|
# v ^= x12
|
|
xorl 148(%esp),%edi
|
|
# x1 = p
|
|
movl %eax,104(%esp)
|
|
# x11 = t
|
|
movl %edx,144(%esp)
|
|
# p += x0
|
|
addl 100(%esp),%eax
|
|
# x6 = r
|
|
movl %esi,124(%esp)
|
|
# t += x10
|
|
addl 140(%esp),%edx
|
|
# x12 = v
|
|
movl %edi,148(%esp)
|
|
# p <<<= 9
|
|
rol $9,%eax
|
|
# p ^= x2
|
|
xorl 108(%esp),%eax
|
|
# t <<<= 9
|
|
rol $9,%edx
|
|
# t ^= x8
|
|
xorl 132(%esp),%edx
|
|
# s += r
|
|
add %esi,%ecx
|
|
# s <<<= 9
|
|
rol $9,%ecx
|
|
# s ^= x7
|
|
xorl 128(%esp),%ecx
|
|
# w += v
|
|
add %edi,%ebx
|
|
# w <<<= 9
|
|
rol $9,%ebx
|
|
# w ^= x13
|
|
xorl 152(%esp),%ebx
|
|
# x2 = p
|
|
movl %eax,108(%esp)
|
|
# x8 = t
|
|
movl %edx,132(%esp)
|
|
# p += x1
|
|
addl 104(%esp),%eax
|
|
# x7 = s
|
|
movl %ecx,128(%esp)
|
|
# t += x11
|
|
addl 144(%esp),%edx
|
|
# x13 = w
|
|
movl %ebx,152(%esp)
|
|
# p <<<= 13
|
|
rol $13,%eax
|
|
# p ^= x3
|
|
xorl 112(%esp),%eax
|
|
# t <<<= 13
|
|
rol $13,%edx
|
|
# t ^= x9
|
|
xorl 136(%esp),%edx
|
|
# r += s
|
|
add %ecx,%esi
|
|
# r <<<= 13
|
|
rol $13,%esi
|
|
# r ^= x4
|
|
xorl 116(%esp),%esi
|
|
# v += w
|
|
add %ebx,%edi
|
|
# v <<<= 13
|
|
rol $13,%edi
|
|
# v ^= x14
|
|
xorl 156(%esp),%edi
|
|
# x3 = p
|
|
movl %eax,112(%esp)
|
|
# x9 = t
|
|
movl %edx,136(%esp)
|
|
# p += x2
|
|
addl 108(%esp),%eax
|
|
# x4 = r
|
|
movl %esi,116(%esp)
|
|
# t += x8
|
|
addl 132(%esp),%edx
|
|
# x14 = v
|
|
movl %edi,156(%esp)
|
|
# p <<<= 18
|
|
rol $18,%eax
|
|
# p ^= x0
|
|
xorl 100(%esp),%eax
|
|
# t <<<= 18
|
|
rol $18,%edx
|
|
# t ^= x10
|
|
xorl 140(%esp),%edx
|
|
# s += r
|
|
add %esi,%ecx
|
|
# s <<<= 18
|
|
rol $18,%ecx
|
|
# s ^= x5
|
|
xorl 120(%esp),%ecx
|
|
# w += v
|
|
add %edi,%ebx
|
|
# w <<<= 18
|
|
rol $18,%ebx
|
|
# w ^= x15
|
|
xorl 160(%esp),%ebx
|
|
# x0 = p
|
|
movl %eax,100(%esp)
|
|
# x10 = t
|
|
movl %edx,140(%esp)
|
|
# p += x12
|
|
addl 148(%esp),%eax
|
|
# x5 = s
|
|
movl %ecx,120(%esp)
|
|
# t += x6
|
|
addl 124(%esp),%edx
|
|
# x15 = w
|
|
movl %ebx,160(%esp)
|
|
# r = x1
|
|
movl 104(%esp),%esi
|
|
# r += s
|
|
add %ecx,%esi
|
|
# v = x11
|
|
movl 144(%esp),%edi
|
|
# v += w
|
|
add %ebx,%edi
|
|
# p <<<= 7
|
|
rol $7,%eax
|
|
# p ^= x4
|
|
xorl 116(%esp),%eax
|
|
# t <<<= 7
|
|
rol $7,%edx
|
|
# t ^= x14
|
|
xorl 156(%esp),%edx
|
|
# r <<<= 7
|
|
rol $7,%esi
|
|
# r ^= x9
|
|
xorl 136(%esp),%esi
|
|
# v <<<= 7
|
|
rol $7,%edi
|
|
# v ^= x3
|
|
xorl 112(%esp),%edi
|
|
# x4 = p
|
|
movl %eax,116(%esp)
|
|
# x14 = t
|
|
movl %edx,156(%esp)
|
|
# p += x0
|
|
addl 100(%esp),%eax
|
|
# x9 = r
|
|
movl %esi,136(%esp)
|
|
# t += x10
|
|
addl 140(%esp),%edx
|
|
# x3 = v
|
|
movl %edi,112(%esp)
|
|
# p <<<= 9
|
|
rol $9,%eax
|
|
# p ^= x8
|
|
xorl 132(%esp),%eax
|
|
# t <<<= 9
|
|
rol $9,%edx
|
|
# t ^= x2
|
|
xorl 108(%esp),%edx
|
|
# s += r
|
|
add %esi,%ecx
|
|
# s <<<= 9
|
|
rol $9,%ecx
|
|
# s ^= x13
|
|
xorl 152(%esp),%ecx
|
|
# w += v
|
|
add %edi,%ebx
|
|
# w <<<= 9
|
|
rol $9,%ebx
|
|
# w ^= x7
|
|
xorl 128(%esp),%ebx
|
|
# x8 = p
|
|
movl %eax,132(%esp)
|
|
# x2 = t
|
|
movl %edx,108(%esp)
|
|
# p += x4
|
|
addl 116(%esp),%eax
|
|
# x13 = s
|
|
movl %ecx,152(%esp)
|
|
# t += x14
|
|
addl 156(%esp),%edx
|
|
# x7 = w
|
|
movl %ebx,128(%esp)
|
|
# p <<<= 13
|
|
rol $13,%eax
|
|
# p ^= x12
|
|
xorl 148(%esp),%eax
|
|
# t <<<= 13
|
|
rol $13,%edx
|
|
# t ^= x6
|
|
xorl 124(%esp),%edx
|
|
# r += s
|
|
add %ecx,%esi
|
|
# r <<<= 13
|
|
rol $13,%esi
|
|
# r ^= x1
|
|
xorl 104(%esp),%esi
|
|
# v += w
|
|
add %ebx,%edi
|
|
# v <<<= 13
|
|
rol $13,%edi
|
|
# v ^= x11
|
|
xorl 144(%esp),%edi
|
|
# x12 = p
|
|
movl %eax,148(%esp)
|
|
# x6 = t
|
|
movl %edx,124(%esp)
|
|
# p += x8
|
|
addl 132(%esp),%eax
|
|
# x1 = r
|
|
movl %esi,104(%esp)
|
|
# t += x2
|
|
addl 108(%esp),%edx
|
|
# x11 = v
|
|
movl %edi,144(%esp)
|
|
# p <<<= 18
|
|
rol $18,%eax
|
|
# p ^= x0
|
|
xorl 100(%esp),%eax
|
|
# t <<<= 18
|
|
rol $18,%edx
|
|
# t ^= x10
|
|
xorl 140(%esp),%edx
|
|
# s += r
|
|
add %esi,%ecx
|
|
# s <<<= 18
|
|
rol $18,%ecx
|
|
# s ^= x5
|
|
xorl 120(%esp),%ecx
|
|
# w += v
|
|
add %edi,%ebx
|
|
# w <<<= 18
|
|
rol $18,%ebx
|
|
# w ^= x15
|
|
xorl 160(%esp),%ebx
|
|
# x0 = p
|
|
movl %eax,100(%esp)
|
|
# x10 = t
|
|
movl %edx,140(%esp)
|
|
# p += x3
|
|
addl 112(%esp),%eax
|
|
# p <<<= 7
|
|
rol $7,%eax
|
|
# x5 = s
|
|
movl %ecx,120(%esp)
|
|
# t += x9
|
|
addl 136(%esp),%edx
|
|
# x15 = w
|
|
movl %ebx,160(%esp)
|
|
# r = x4
|
|
movl 116(%esp),%esi
|
|
# r += s
|
|
add %ecx,%esi
|
|
# v = x14
|
|
movl 156(%esp),%edi
|
|
# v += w
|
|
add %ebx,%edi
|
|
# p ^= x1
|
|
xorl 104(%esp),%eax
|
|
# t <<<= 7
|
|
rol $7,%edx
|
|
# t ^= x11
|
|
xorl 144(%esp),%edx
|
|
# r <<<= 7
|
|
rol $7,%esi
|
|
# r ^= x6
|
|
xorl 124(%esp),%esi
|
|
# v <<<= 7
|
|
rol $7,%edi
|
|
# v ^= x12
|
|
xorl 148(%esp),%edi
|
|
# x1 = p
|
|
movl %eax,104(%esp)
|
|
# x11 = t
|
|
movl %edx,144(%esp)
|
|
# p += x0
|
|
addl 100(%esp),%eax
|
|
# x6 = r
|
|
movl %esi,124(%esp)
|
|
# t += x10
|
|
addl 140(%esp),%edx
|
|
# x12 = v
|
|
movl %edi,148(%esp)
|
|
# p <<<= 9
|
|
rol $9,%eax
|
|
# p ^= x2
|
|
xorl 108(%esp),%eax
|
|
# t <<<= 9
|
|
rol $9,%edx
|
|
# t ^= x8
|
|
xorl 132(%esp),%edx
|
|
# s += r
|
|
add %esi,%ecx
|
|
# s <<<= 9
|
|
rol $9,%ecx
|
|
# s ^= x7
|
|
xorl 128(%esp),%ecx
|
|
# w += v
|
|
add %edi,%ebx
|
|
# w <<<= 9
|
|
rol $9,%ebx
|
|
# w ^= x13
|
|
xorl 152(%esp),%ebx
|
|
# x2 = p
|
|
movl %eax,108(%esp)
|
|
# x8 = t
|
|
movl %edx,132(%esp)
|
|
# p += x1
|
|
addl 104(%esp),%eax
|
|
# x7 = s
|
|
movl %ecx,128(%esp)
|
|
# t += x11
|
|
addl 144(%esp),%edx
|
|
# x13 = w
|
|
movl %ebx,152(%esp)
|
|
# p <<<= 13
|
|
rol $13,%eax
|
|
# p ^= x3
|
|
xorl 112(%esp),%eax
|
|
# t <<<= 13
|
|
rol $13,%edx
|
|
# t ^= x9
|
|
xorl 136(%esp),%edx
|
|
# r += s
|
|
add %ecx,%esi
|
|
# r <<<= 13
|
|
rol $13,%esi
|
|
# r ^= x4
|
|
xorl 116(%esp),%esi
|
|
# v += w
|
|
add %ebx,%edi
|
|
# v <<<= 13
|
|
rol $13,%edi
|
|
# v ^= x14
|
|
xorl 156(%esp),%edi
|
|
# x3 = p
|
|
movl %eax,112(%esp)
|
|
# x9 = t
|
|
movl %edx,136(%esp)
|
|
# p += x2
|
|
addl 108(%esp),%eax
|
|
# x4 = r
|
|
movl %esi,116(%esp)
|
|
# t += x8
|
|
addl 132(%esp),%edx
|
|
# x14 = v
|
|
movl %edi,156(%esp)
|
|
# p <<<= 18
|
|
rol $18,%eax
|
|
# p ^= x0
|
|
xorl 100(%esp),%eax
|
|
# t <<<= 18
|
|
rol $18,%edx
|
|
# t ^= x10
|
|
xorl 140(%esp),%edx
|
|
# s += r
|
|
add %esi,%ecx
|
|
# s <<<= 18
|
|
rol $18,%ecx
|
|
# s ^= x5
|
|
xorl 120(%esp),%ecx
|
|
# w += v
|
|
add %edi,%ebx
|
|
# w <<<= 18
|
|
rol $18,%ebx
|
|
# w ^= x15
|
|
xorl 160(%esp),%ebx
|
|
# i -= 4
|
|
sub $4,%ebp
|
|
# goto mainloop if unsigned >
|
|
ja ._mainloop
|
|
# x0 = p
|
|
movl %eax,100(%esp)
|
|
# x5 = s
|
|
movl %ecx,120(%esp)
|
|
# x10 = t
|
|
movl %edx,140(%esp)
|
|
# x15 = w
|
|
movl %ebx,160(%esp)
|
|
# out = out_backup
|
|
movl 72(%esp),%edi
|
|
# m = m_backup
|
|
movl 68(%esp),%esi
|
|
# in0 = x0
|
|
movl 100(%esp),%eax
|
|
# in1 = x1
|
|
movl 104(%esp),%ecx
|
|
# in0 += j0
|
|
addl 164(%esp),%eax
|
|
# in1 += j1
|
|
addl 168(%esp),%ecx
|
|
# in0 ^= *(uint32 *) (m + 0)
|
|
xorl 0(%esi),%eax
|
|
# in1 ^= *(uint32 *) (m + 4)
|
|
xorl 4(%esi),%ecx
|
|
# *(uint32 *) (out + 0) = in0
|
|
movl %eax,0(%edi)
|
|
# *(uint32 *) (out + 4) = in1
|
|
movl %ecx,4(%edi)
|
|
# in2 = x2
|
|
movl 108(%esp),%eax
|
|
# in3 = x3
|
|
movl 112(%esp),%ecx
|
|
# in2 += j2
|
|
addl 172(%esp),%eax
|
|
# in3 += j3
|
|
addl 176(%esp),%ecx
|
|
# in2 ^= *(uint32 *) (m + 8)
|
|
xorl 8(%esi),%eax
|
|
# in3 ^= *(uint32 *) (m + 12)
|
|
xorl 12(%esi),%ecx
|
|
# *(uint32 *) (out + 8) = in2
|
|
movl %eax,8(%edi)
|
|
# *(uint32 *) (out + 12) = in3
|
|
movl %ecx,12(%edi)
|
|
# in4 = x4
|
|
movl 116(%esp),%eax
|
|
# in5 = x5
|
|
movl 120(%esp),%ecx
|
|
# in4 += j4
|
|
addl 180(%esp),%eax
|
|
# in5 += j5
|
|
addl 184(%esp),%ecx
|
|
# in4 ^= *(uint32 *) (m + 16)
|
|
xorl 16(%esi),%eax
|
|
# in5 ^= *(uint32 *) (m + 20)
|
|
xorl 20(%esi),%ecx
|
|
# *(uint32 *) (out + 16) = in4
|
|
movl %eax,16(%edi)
|
|
# *(uint32 *) (out + 20) = in5
|
|
movl %ecx,20(%edi)
|
|
# in6 = x6
|
|
movl 124(%esp),%eax
|
|
# in7 = x7
|
|
movl 128(%esp),%ecx
|
|
# in6 += j6
|
|
addl 188(%esp),%eax
|
|
# in7 += j7
|
|
addl 192(%esp),%ecx
|
|
# in6 ^= *(uint32 *) (m + 24)
|
|
xorl 24(%esi),%eax
|
|
# in7 ^= *(uint32 *) (m + 28)
|
|
xorl 28(%esi),%ecx
|
|
# *(uint32 *) (out + 24) = in6
|
|
movl %eax,24(%edi)
|
|
# *(uint32 *) (out + 28) = in7
|
|
movl %ecx,28(%edi)
|
|
# in8 = x8
|
|
movl 132(%esp),%eax
|
|
# in9 = x9
|
|
movl 136(%esp),%ecx
|
|
# in8 += j8
|
|
addl 196(%esp),%eax
|
|
# in9 += j9
|
|
addl 200(%esp),%ecx
|
|
# in8 ^= *(uint32 *) (m + 32)
|
|
xorl 32(%esi),%eax
|
|
# in9 ^= *(uint32 *) (m + 36)
|
|
xorl 36(%esi),%ecx
|
|
# *(uint32 *) (out + 32) = in8
|
|
movl %eax,32(%edi)
|
|
# *(uint32 *) (out + 36) = in9
|
|
movl %ecx,36(%edi)
|
|
# in10 = x10
|
|
movl 140(%esp),%eax
|
|
# in11 = x11
|
|
movl 144(%esp),%ecx
|
|
# in10 += j10
|
|
addl 204(%esp),%eax
|
|
# in11 += j11
|
|
addl 208(%esp),%ecx
|
|
# in10 ^= *(uint32 *) (m + 40)
|
|
xorl 40(%esi),%eax
|
|
# in11 ^= *(uint32 *) (m + 44)
|
|
xorl 44(%esi),%ecx
|
|
# *(uint32 *) (out + 40) = in10
|
|
movl %eax,40(%edi)
|
|
# *(uint32 *) (out + 44) = in11
|
|
movl %ecx,44(%edi)
|
|
# in12 = x12
|
|
movl 148(%esp),%eax
|
|
# in13 = x13
|
|
movl 152(%esp),%ecx
|
|
# in12 += j12
|
|
addl 212(%esp),%eax
|
|
# in13 += j13
|
|
addl 216(%esp),%ecx
|
|
# in12 ^= *(uint32 *) (m + 48)
|
|
xorl 48(%esi),%eax
|
|
# in13 ^= *(uint32 *) (m + 52)
|
|
xorl 52(%esi),%ecx
|
|
# *(uint32 *) (out + 48) = in12
|
|
movl %eax,48(%edi)
|
|
# *(uint32 *) (out + 52) = in13
|
|
movl %ecx,52(%edi)
|
|
# in14 = x14
|
|
movl 156(%esp),%eax
|
|
# in15 = x15
|
|
movl 160(%esp),%ecx
|
|
# in14 += j14
|
|
addl 220(%esp),%eax
|
|
# in15 += j15
|
|
addl 224(%esp),%ecx
|
|
# in14 ^= *(uint32 *) (m + 56)
|
|
xorl 56(%esi),%eax
|
|
# in15 ^= *(uint32 *) (m + 60)
|
|
xorl 60(%esi),%ecx
|
|
# *(uint32 *) (out + 56) = in14
|
|
movl %eax,56(%edi)
|
|
# *(uint32 *) (out + 60) = in15
|
|
movl %ecx,60(%edi)
|
|
# bytes = bytes_backup
|
|
movl 76(%esp),%ebx
|
|
# in8 = j8
|
|
movl 196(%esp),%eax
|
|
# in9 = j9
|
|
movl 200(%esp),%ecx
|
|
# in8 += 1
|
|
add $1,%eax
|
|
# in9 += 0 + carry
|
|
adc $0,%ecx
|
|
# j8 = in8
|
|
movl %eax,196(%esp)
|
|
# j9 = in9
|
|
movl %ecx,200(%esp)
|
|
# bytes - 64
|
|
cmp $64,%ebx
|
|
# goto bytesatleast65 if unsigned>
|
|
ja ._bytesatleast65
|
|
# goto bytesatleast64 if unsigned>=
|
|
jae ._bytesatleast64
|
|
# m = out
|
|
mov %edi,%esi
|
|
# out = ctarget
|
|
movl 228(%esp),%edi
|
|
# i = bytes
|
|
mov %ebx,%ecx
|
|
# while (i) { *out++ = *m++; --i }
|
|
rep movsb
|
|
._bytesatleast64:
|
|
# x = x_backup
|
|
movl 64(%esp),%eax
|
|
# in8 = j8
|
|
movl 196(%esp),%ecx
|
|
# in9 = j9
|
|
movl 200(%esp),%edx
|
|
# *(uint32 *) (x + 32) = in8
|
|
movl %ecx,32(%eax)
|
|
# *(uint32 *) (x + 36) = in9
|
|
movl %edx,36(%eax)
|
|
._done:
|
|
# eax = eax_stack
|
|
movl 80(%esp),%eax
|
|
# ebx = ebx_stack
|
|
movl 84(%esp),%ebx
|
|
# esi = esi_stack
|
|
movl 88(%esp),%esi
|
|
# edi = edi_stack
|
|
movl 92(%esp),%edi
|
|
# ebp = ebp_stack
|
|
movl 96(%esp),%ebp
|
|
# leave
|
|
add %eax,%esp
|
|
ret
|
|
._bytesatleast65:
|
|
# bytes -= 64
|
|
sub $64,%ebx
|
|
# out += 64
|
|
add $64,%edi
|
|
# m += 64
|
|
add $64,%esi
|
|
# goto bytesatleast1
|
|
jmp ._bytesatleast1
|
|
ENDPROC(salsa20_encrypt_bytes)
|