mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-19 15:17:02 +07:00
x86/lib/copy_page_64.S: Use generic ALTERNATIVE macro
... instead of the semi-version with the spelled out sections. What is more, make the REP_GOOD version be the default copy_page() version as the majority of the relevant x86 CPUs do set X86_FEATURE_REP_GOOD. Thus, copy_page gets compiled to: ffffffff8130af80 <copy_page>: ffffffff8130af80: e9 0b 00 00 00 jmpq ffffffff8130af90 <copy_page_regs> ffffffff8130af85: b9 00 02 00 00 mov $0x200,%ecx ffffffff8130af8a: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi) ffffffff8130af8d: c3 retq ffffffff8130af8e: 66 90 xchg %ax,%ax ffffffff8130af90 <copy_page_regs>: ... and after the alternatives have run, the JMP to the old, unrolled version gets NOPed out: ffffffff8130af80 <copy_page>: ffffffff8130af80: 66 66 90 xchg %ax,%ax ffffffff8130af83: 66 90 xchg %ax,%ax ffffffff8130af85: b9 00 02 00 00 mov $0x200,%ecx ffffffff8130af8a: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi) ffffffff8130af8d: c3 retq On modern uarches, those NOPs are cheaper than the unconditional JMP previously. Signed-off-by: Borislav Petkov <bp@suse.de>
This commit is contained in:
parent
4fd4b6e553
commit
090a3f6155
@ -2,23 +2,26 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
/*
|
||||
* Some CPUs run faster using the string copy instructions (sane microcode).
|
||||
* It is also a lot simpler. Use this when possible. But, don't use streaming
|
||||
* copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
|
||||
* prefetch distance based on SMP/UP.
|
||||
*/
|
||||
ALIGN
|
||||
copy_page_rep:
|
||||
ENTRY(copy_page)
|
||||
CFI_STARTPROC
|
||||
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
|
||||
movl $4096/8, %ecx
|
||||
rep movsq
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(copy_page_rep)
|
||||
ENDPROC(copy_page)
|
||||
|
||||
/*
|
||||
* Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
|
||||
* Could vary the prefetch distance based on SMP/UP.
|
||||
*/
|
||||
|
||||
ENTRY(copy_page)
|
||||
ENTRY(copy_page_regs)
|
||||
CFI_STARTPROC
|
||||
subq $2*8, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 2*8
|
||||
@ -90,21 +93,5 @@ ENTRY(copy_page)
|
||||
addq $2*8, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -2*8
|
||||
ret
|
||||
.Lcopy_page_end:
|
||||
CFI_ENDPROC
|
||||
ENDPROC(copy_page)
|
||||
|
||||
/* Some CPUs run faster using the string copy instructions.
|
||||
It is also a lot simpler. Use this when possible */
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
.section .altinstr_replacement,"ax"
|
||||
1: .byte 0xeb /* jmp <disp8> */
|
||||
.byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
|
||||
2:
|
||||
.previous
|
||||
.section .altinstructions,"a"
|
||||
altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
|
||||
.Lcopy_page_end-copy_page, 2b-1b, 0
|
||||
.previous
|
||||
ENDPROC(copy_page_regs)
|
||||
|
Loading…
Reference in New Issue
Block a user