mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-24 21:28:03 +07:00
ff5fdafc9e
The __memzero assembly code is almost identical to memset's except for two orr instructions. The runtime performance of __memset(p, n) and memset(p, 0, n) is accordingly almost identical. However, the memset() macro used to guard against a zero length and to call __memzero at compile time when the fill value is a constant zero interferes with compiler optimizations. Arnd found tha the test against a zero length brings up some new warnings with gcc v8: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82103 And successively rremoving the test against a zero length and the call to __memzero optimization produces the following kernel sizes for defconfig with gcc 6: text data bss dec hex filename 12248142 6278960 413588 18940690 1210312 vmlinux.orig 12244474 6278960 413588 18937022 120f4be vmlinux.no_zero_test 12239160 6278960 413588 18931708 120dffc vmlinux.no_memzero So it is probably not worth keeping __memzero around given that the compiler can do a better job at inlining trivial memset(p,0,n) on its own. And the memset code already handles a zero length just fine. Suggested-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Nicolas Pitre <nico@linaro.org> Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
46 lines
1.4 KiB
Makefile
46 lines
1.4 KiB
Makefile
# SPDX-License-Identifier: GPL-2.0
|
|
#
|
|
# linux/arch/arm/lib/Makefile
|
|
#
|
|
# Copyright (C) 1995-2000 Russell King
|
|
#
|
|
|
|
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
|
|
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
|
|
delay.o delay-loop.o findbit.o memchr.o memcpy.o \
|
|
memmove.o memset.o setbit.o \
|
|
strchr.o strrchr.o \
|
|
testchangebit.o testclearbit.o testsetbit.o \
|
|
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
|
|
ucmpdi2.o lib1funcs.o div64.o \
|
|
io-readsb.o io-writesb.o io-readsl.o io-writesl.o \
|
|
call_with_stack.o bswapsdi2.o
|
|
|
|
mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
|
|
copy_from_user.o copy_to_user.o
|
|
|
|
# using lib_ here won't override already available weak symbols
|
|
obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
|
|
|
|
lib-$(CONFIG_MMU) += $(mmu-y)
|
|
|
|
ifeq ($(CONFIG_CPU_32v3),y)
|
|
lib-y += io-readsw-armv3.o io-writesw-armv3.o
|
|
else
|
|
lib-y += io-readsw-armv4.o io-writesw-armv4.o
|
|
endif
|
|
|
|
ifeq ($(CONFIG_ARCH_RPC),y)
|
|
lib-y += ecard.o io-acorn.o floppydma.o
|
|
AFLAGS_delay-loop.o += -march=armv4
|
|
endif
|
|
|
|
$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S
|
|
$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S
|
|
|
|
ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
|
|
NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon
|
|
CFLAGS_xor-neon.o += $(NEON_FLAGS)
|
|
obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
|
|
endif
|