mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-25 19:39:56 +07:00
d7d7b85356
These x86_64 vectorized implementations support AVX, AVX-2, and AVX512F. The AVX-512F implementation is disabled on Skylake, due to throttling, but it is quite fast on >= Cannonlake. On the left is cycle counts on a Core i7 6700HQ using the AVX-2 codepath, comparing this implementation ("new") to the implementation in the current crypto api ("old"). On the right are benchmarks on a Xeon Gold 5120 using the AVX-512 codepath. The new implementation is faster on all benchmarks. AVX-2 AVX-512 --------- ----------- size old new size old new ---- ---- ---- ---- ---- ---- 0 70 68 0 74 70 16 92 90 16 96 92 32 134 104 32 136 106 48 172 120 48 184 124 64 218 136 64 218 138 80 254 158 80 260 160 96 298 174 96 300 176 112 342 192 112 342 194 128 388 212 128 384 212 144 428 228 144 420 226 160 466 246 160 464 248 176 510 264 176 504 264 192 550 282 192 544 282 208 594 302 208 582 300 224 628 316 224 624 318 240 676 334 240 662 338 256 716 354 256 708 358 272 764 374 272 748 372 288 802 352 288 788 358 304 420 366 304 422 370 320 428 360 320 432 364 336 484 378 336 486 380 352 426 384 352 434 390 368 478 400 368 480 408 384 488 394 384 490 398 400 542 408 400 542 412 416 486 416 416 492 426 432 534 430 432 538 436 448 544 422 448 546 432 464 600 438 464 600 448 480 540 448 480 548 456 496 594 464 496 594 476 512 602 456 512 606 470 528 656 476 528 656 480 544 600 480 544 606 498 560 650 494 560 652 512 576 664 490 576 662 508 592 714 508 592 716 522 608 656 514 608 664 538 624 708 532 624 710 552 640 716 524 640 720 516 656 770 536 656 772 526 672 716 548 672 722 544 688 770 562 688 768 556 704 774 552 704 778 556 720 826 568 720 832 568 736 768 574 736 780 584 752 822 592 752 826 600 768 830 584 768 836 560 784 884 602 784 888 572 800 828 610 800 838 588 816 884 628 816 884 604 832 888 618 832 894 598 848 942 632 848 946 612 864 884 644 864 896 628 880 936 660 880 942 644 896 948 652 896 952 608 912 1000 664 912 1004 616 928 942 676 928 954 634 944 994 690 944 1000 646 960 1002 680 960 1008 646 976 1054 694 976 1062 658 992 1002 706 992 1012 674 1008 1052 720 1008 1058 690 This commit wires in the prior implementation from Andy, and makes the following changes to be suitable for kernel land. - Some cosmetic and structural changes, like renaming labels to .Lname, constants, and other Linux conventions, as well as making the code easy for us to maintain moving forward. - CPU feature checking is done in C by the glue code. - We avoid jumping into the middle of functions, to appease objtool, and instead parameterize shared code. - We maintain frame pointers so that stack traces make sense. - We remove the dependency on the perl xlate code, which transforms the output into things that assemblers we don't care about use. Importantly, none of our changes affect the arithmetic or core code, but just involve the differing environment of kernel space. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Signed-off-by: Samuel Neves <sneves@dei.uc.pt> Co-developed-by: Samuel Neves <sneves@dei.uc.pt> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
131 lines
4.4 KiB
Plaintext
131 lines
4.4 KiB
Plaintext
# SPDX-License-Identifier: GPL-2.0
|
|
|
|
comment "Crypto library routines"
|
|
|
|
config CRYPTO_LIB_AES
|
|
tristate
|
|
|
|
config CRYPTO_LIB_ARC4
|
|
tristate
|
|
|
|
config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
|
tristate
|
|
help
|
|
Declares whether the architecture provides an arch-specific
|
|
accelerated implementation of the Blake2s library interface,
|
|
either builtin or as a module.
|
|
|
|
config CRYPTO_LIB_BLAKE2S_GENERIC
|
|
tristate
|
|
help
|
|
This symbol can be depended upon by arch implementations of the
|
|
Blake2s library interface that require the generic code as a
|
|
fallback, e.g., for SIMD implementations. If no arch specific
|
|
implementation is enabled, this implementation serves the users
|
|
of CRYPTO_LIB_BLAKE2S.
|
|
|
|
config CRYPTO_LIB_BLAKE2S
|
|
tristate "BLAKE2s hash function library"
|
|
depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
|
select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n
|
|
help
|
|
Enable the Blake2s library interface. This interface may be fulfilled
|
|
by either the generic implementation or an arch-specific one, if one
|
|
is available and enabled.
|
|
|
|
config CRYPTO_ARCH_HAVE_LIB_CHACHA
|
|
tristate
|
|
help
|
|
Declares whether the architecture provides an arch-specific
|
|
accelerated implementation of the ChaCha library interface,
|
|
either builtin or as a module.
|
|
|
|
config CRYPTO_LIB_CHACHA_GENERIC
|
|
tristate
|
|
select CRYPTO_ALGAPI
|
|
help
|
|
This symbol can be depended upon by arch implementations of the
|
|
ChaCha library interface that require the generic code as a
|
|
fallback, e.g., for SIMD implementations. If no arch specific
|
|
implementation is enabled, this implementation serves the users
|
|
of CRYPTO_LIB_CHACHA.
|
|
|
|
config CRYPTO_LIB_CHACHA
|
|
tristate "ChaCha library interface"
|
|
depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
|
|
select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
|
|
help
|
|
Enable the ChaCha library interface. This interface may be fulfilled
|
|
by either the generic implementation or an arch-specific one, if one
|
|
is available and enabled.
|
|
|
|
config CRYPTO_ARCH_HAVE_LIB_CURVE25519
|
|
tristate
|
|
help
|
|
Declares whether the architecture provides an arch-specific
|
|
accelerated implementation of the Curve25519 library interface,
|
|
either builtin or as a module.
|
|
|
|
config CRYPTO_LIB_CURVE25519_GENERIC
|
|
tristate
|
|
help
|
|
This symbol can be depended upon by arch implementations of the
|
|
Curve25519 library interface that require the generic code as a
|
|
fallback, e.g., for SIMD implementations. If no arch specific
|
|
implementation is enabled, this implementation serves the users
|
|
of CRYPTO_LIB_CURVE25519.
|
|
|
|
config CRYPTO_LIB_CURVE25519
|
|
tristate "Curve25519 scalar multiplication library"
|
|
depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519
|
|
select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n
|
|
help
|
|
Enable the Curve25519 library interface. This interface may be
|
|
fulfilled by either the generic implementation or an arch-specific
|
|
one, if one is available and enabled.
|
|
|
|
config CRYPTO_LIB_DES
|
|
tristate
|
|
|
|
config CRYPTO_LIB_POLY1305_RSIZE
|
|
int
|
|
default 2 if MIPS
|
|
default 11 if X86_64
|
|
default 9 if ARM || ARM64
|
|
default 1
|
|
|
|
config CRYPTO_ARCH_HAVE_LIB_POLY1305
|
|
tristate
|
|
help
|
|
Declares whether the architecture provides an arch-specific
|
|
accelerated implementation of the Poly1305 library interface,
|
|
either builtin or as a module.
|
|
|
|
config CRYPTO_LIB_POLY1305_GENERIC
|
|
tristate
|
|
help
|
|
This symbol can be depended upon by arch implementations of the
|
|
Poly1305 library interface that require the generic code as a
|
|
fallback, e.g., for SIMD implementations. If no arch specific
|
|
implementation is enabled, this implementation serves the users
|
|
of CRYPTO_LIB_POLY1305.
|
|
|
|
config CRYPTO_LIB_POLY1305
|
|
tristate "Poly1305 library interface"
|
|
depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
|
|
select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n
|
|
help
|
|
Enable the Poly1305 library interface. This interface may be fulfilled
|
|
by either the generic implementation or an arch-specific one, if one
|
|
is available and enabled.
|
|
|
|
config CRYPTO_LIB_CHACHA20POLY1305
|
|
tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)"
|
|
depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
|
|
depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
|
|
select CRYPTO_LIB_CHACHA
|
|
select CRYPTO_LIB_POLY1305
|
|
|
|
config CRYPTO_LIB_SHA256
|
|
tristate
|