mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-25 00:51:28 +07:00
0eee0fbd41
This patch increases the interleave factor for parallel AES modes to 4x. This improves performance on Cortex-A57 by ~35%. This is due to the 3-cycle latency of AES instructions on the A57's relatively deep pipeline (compared to Cortex-A53 where the AES instruction latency is only 2 cycles). At the same time, disable inline expansion of the core AES functions, as the performance benefit of this feature is negligible. Measured on AMD Seattle (using tcrypt.ko mode=500 sec=1): Baseline (2x interleave, inline expansion) ------------------------------------------ testing speed of async cbc(aes) (cbc-aes-ce) decryption test 4 (128 bit key, 8192 byte blocks): 95545 operations in 1 seconds test 14 (256 bit key, 8192 byte blocks): 68496 operations in 1 seconds This patch (4x interleave, no inline expansion) ----------------------------------------------- testing speed of async cbc(aes) (cbc-aes-ce) decryption test 4 (128 bit key, 8192 byte blocks): 124735 operations in 1 seconds test 14 (256 bit key, 8192 byte blocks): 92328 operations in 1 seconds Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
43 lines
1.2 KiB
Makefile
43 lines
1.2 KiB
Makefile
#
|
|
# linux/arch/arm64/crypto/Makefile
|
|
#
|
|
# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
#
|
|
|
|
obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
|
|
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
|
|
|
|
obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
|
|
sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
|
|
|
|
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
|
|
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
|
|
|
|
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
|
|
CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
|
|
|
|
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
|
|
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
|
|
|
|
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
|
|
aes-ce-blk-y := aes-glue-ce.o aes-ce.o
|
|
|
|
obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
|
|
aes-neon-blk-y := aes-glue-neon.o aes-neon.o
|
|
|
|
AFLAGS_aes-ce.o := -DINTERLEAVE=4
|
|
AFLAGS_aes-neon.o := -DINTERLEAVE=4
|
|
|
|
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
|
|
|
|
obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
|
|
|
|
CFLAGS_crc32-arm64.o := -mcpu=generic+crc
|
|
|
|
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
|
|
$(call if_changed_rule,cc_o_c)
|