mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-14 11:26:43 +07:00
f2db633d30
Similar to x86/sparc/powerpc implementations except: 1) we implement an extremely efficient has_zero()/find_zero() sequence with both prep_zero_mask() and create_zero_mask() no-operations. 2) Our output from prep_zero_mask() differs in that only the lowest eight bits are used to represent the zero bytes nevertheless it can be safely ORed with other similar masks from prep_zero_mask() and forms input to create_zero_mask(), the two fundamental properties prep_zero_mask() must satisfy. Tests on EV67 and EV68 CPUs revealed that the generic code is essentially as fast (to within 0.5% of CPU cycles) of the old Alpha specific code for large quadword-aligned strings, despite the 30% extra CPU instructions executed. In contrast, the generic code for unaligned strings is substantially slower (by more than a factor of 3) than the old Alpha specific code. Signed-off-by: Michael Cree <mcree@orcon.net.nz> Acked-by: Matt Turner <mattst88@gmail.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
57 lines
1.3 KiB
Makefile
57 lines
1.3 KiB
Makefile
#
|
|
# Makefile for alpha-specific library files..
|
|
#
|
|
|
|
asflags-y := $(KBUILD_CFLAGS)
|
|
ccflags-y := -Werror
|
|
|
|
# Many of these routines have implementations tuned for ev6.
|
|
# Choose them iff we're targeting ev6 specifically.
|
|
ev6-$(CONFIG_ALPHA_EV6) := ev6-
|
|
|
|
# Several make use of the cttz instruction introduced in ev67.
|
|
ev67-$(CONFIG_ALPHA_EV67) := ev67-
|
|
|
|
lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \
|
|
udelay.o \
|
|
$(ev6-y)memset.o \
|
|
$(ev6-y)memcpy.o \
|
|
memmove.o \
|
|
checksum.o \
|
|
csum_partial_copy.o \
|
|
$(ev67-y)strlen.o \
|
|
$(ev67-y)strcat.o \
|
|
strcpy.o \
|
|
$(ev67-y)strncat.o \
|
|
strncpy.o \
|
|
$(ev6-y)stxcpy.o \
|
|
$(ev6-y)stxncpy.o \
|
|
$(ev67-y)strchr.o \
|
|
$(ev67-y)strrchr.o \
|
|
$(ev6-y)memchr.o \
|
|
$(ev6-y)copy_user.o \
|
|
$(ev6-y)clear_user.o \
|
|
$(ev6-y)csum_ipv6_magic.o \
|
|
$(ev6-y)clear_page.o \
|
|
$(ev6-y)copy_page.o \
|
|
fpreg.o \
|
|
callback_srm.o srm_puts.o srm_printk.o \
|
|
fls.o
|
|
|
|
lib-$(CONFIG_SMP) += dec_and_lock.o
|
|
|
|
# The division routines are built from single source, with different defines.
|
|
AFLAGS___divqu.o = -DDIV
|
|
AFLAGS___remqu.o = -DREM
|
|
AFLAGS___divlu.o = -DDIV -DINTSIZE
|
|
AFLAGS___remlu.o = -DREM -DINTSIZE
|
|
|
|
$(obj)/__divqu.o: $(obj)/$(ev6-y)divide.S
|
|
$(cmd_as_o_S)
|
|
$(obj)/__remqu.o: $(obj)/$(ev6-y)divide.S
|
|
$(cmd_as_o_S)
|
|
$(obj)/__divlu.o: $(obj)/$(ev6-y)divide.S
|
|
$(cmd_as_o_S)
|
|
$(obj)/__remlu.o: $(obj)/$(ev6-y)divide.S
|
|
$(cmd_as_o_S)
|