mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-21 19:18:45 +07:00
x86/lib/memcpy_64.S: Convert memcpy to ALTERNATIVE_2 macro
Make REP_GOOD variant the default after alternatives have run. Signed-off-by: Borislav Petkov <bp@suse.de>
This commit is contained in:
parent
a77600cd03
commit
e0bc8d179e
@ -1,11 +1,19 @@
|
||||
/* Copyright 2002 Andi Kleen */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
* We build a jump to memcpy_orig by default which gets NOPped out on
|
||||
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
|
||||
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
|
||||
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
|
||||
*/
|
||||
|
||||
.weak memcpy
|
||||
|
||||
/*
|
||||
* memcpy - Copy a memory block.
|
||||
*
|
||||
@ -17,15 +25,11 @@
|
||||
* Output:
|
||||
* rax original destination
|
||||
*/
|
||||
ENTRY(__memcpy)
|
||||
ENTRY(memcpy)
|
||||
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
|
||||
"jmp memcpy_erms", X86_FEATURE_ERMS
|
||||
|
||||
/*
|
||||
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
|
||||
*
|
||||
* This gets patched over the unrolled variant (below) via the
|
||||
* alternative instructions framework:
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemcpy_c:
|
||||
movq %rdi, %rax
|
||||
movq %rdx, %rcx
|
||||
shrq $3, %rcx
|
||||
@ -34,29 +38,21 @@
|
||||
movl %edx, %ecx
|
||||
rep movsb
|
||||
ret
|
||||
.Lmemcpy_e:
|
||||
.previous
|
||||
ENDPROC(memcpy)
|
||||
ENDPROC(__memcpy)
|
||||
|
||||
/*
|
||||
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
|
||||
* memcpy_c. Use memcpy_c_e when possible.
|
||||
*
|
||||
* This gets patched over the unrolled variant (below) via the
|
||||
* alternative instructions framework:
|
||||
* memcpy_erms() - enhanced fast string memcpy. This is faster and
|
||||
* simpler than memcpy. Use memcpy_erms when possible.
|
||||
*/
|
||||
.section .altinstr_replacement, "ax", @progbits
|
||||
.Lmemcpy_c_e:
|
||||
ENTRY(memcpy_erms)
|
||||
movq %rdi, %rax
|
||||
movq %rdx, %rcx
|
||||
rep movsb
|
||||
ret
|
||||
.Lmemcpy_e_e:
|
||||
.previous
|
||||
ENDPROC(memcpy_erms)
|
||||
|
||||
.weak memcpy
|
||||
|
||||
ENTRY(__memcpy)
|
||||
ENTRY(memcpy)
|
||||
ENTRY(memcpy_orig)
|
||||
CFI_STARTPROC
|
||||
movq %rdi, %rax
|
||||
|
||||
@ -183,26 +179,4 @@ ENTRY(memcpy)
|
||||
.Lend:
|
||||
retq
|
||||
CFI_ENDPROC
|
||||
ENDPROC(memcpy)
|
||||
ENDPROC(__memcpy)
|
||||
|
||||
/*
|
||||
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
|
||||
* If the feature is supported, memcpy_c_e() is the first choice.
|
||||
* If enhanced rep movsb copy is not available, use fast string copy
|
||||
* memcpy_c() when possible. This is faster and code is simpler than
|
||||
* original memcpy().
|
||||
* Otherwise, original memcpy() is used.
|
||||
* In .altinstructions section, ERMS feature is placed after REG_GOOD
|
||||
* feature to implement the right patch order.
|
||||
*
|
||||
* Replace only beginning, memcpy is used to apply alternatives,
|
||||
* so it is silly to overwrite itself with nops - reboot is the
|
||||
* only outcome...
|
||||
*/
|
||||
.section .altinstructions, "a"
|
||||
altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
|
||||
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c,0
|
||||
altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
|
||||
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e,0
|
||||
.previous
|
||||
ENDPROC(memcpy_orig)
|
||||
|
Loading…
Reference in New Issue
Block a user