mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-14 07:18:04 +07:00
4332195c56
Up until now we have always paid attention to make sure the length of the new instruction replacing the old one is at least less or equal to the length of the old instruction. If the new instruction is longer, at the time it replaces the old instruction it will overwrite the beginning of the next instruction in the kernel image and cause your pants to catch fire. So instead of having to pay attention, teach the alternatives framework to pad shorter old instructions with NOPs at buildtime - but only in the case when len(old instruction(s)) < len(new instruction(s)) and add nothing in the >= case. (In that case we do add_nops() when patching). This way the alternatives user shouldn't have to care about instruction sizes and simply use the macros. Add asm ALTERNATIVE* flavor macros too, while at it. Also, we need to save the pad length in a separate struct alt_instr member for NOP optimization and the way to do that reliably is to carry the pad length instead of trying to detect whether we're looking at single-byte NOPs or at pathological instruction offsets like e9 90 90 90 90, for example, which is a valid instruction. Thanks to Michael Matz for the great help with toolchain questions. Signed-off-by: Borislav Petkov <bp@suse.de>
228 lines
3.8 KiB
ArmAsm
228 lines
3.8 KiB
ArmAsm
/*
|
|
* Normally compiler builtins are used, but sometimes the compiler calls out
|
|
* of line code. Based on asm-i386/string.h.
|
|
*
|
|
* This assembly file is re-written from memmove_64.c file.
|
|
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
|
|
*/
|
|
#define _STRING_C
|
|
#include <linux/linkage.h>
|
|
#include <asm/dwarf2.h>
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/alternative-asm.h>
|
|
|
|
#undef memmove
|
|
|
|
/*
|
|
* Implement memmove(). This can handle overlap between src and dst.
|
|
*
|
|
* Input:
|
|
* rdi: dest
|
|
* rsi: src
|
|
* rdx: count
|
|
*
|
|
* Output:
|
|
* rax: dest
|
|
*/
|
|
.weak memmove
|
|
|
|
ENTRY(memmove)
|
|
ENTRY(__memmove)
|
|
CFI_STARTPROC
|
|
|
|
/* Handle more 32 bytes in loop */
|
|
mov %rdi, %rax
|
|
cmp $0x20, %rdx
|
|
jb 1f
|
|
|
|
/* Decide forward/backward copy mode */
|
|
cmp %rdi, %rsi
|
|
jge .Lmemmove_begin_forward
|
|
mov %rsi, %r8
|
|
add %rdx, %r8
|
|
cmp %rdi, %r8
|
|
jg 2f
|
|
|
|
.Lmemmove_begin_forward:
|
|
/*
|
|
* movsq instruction have many startup latency
|
|
* so we handle small size by general register.
|
|
*/
|
|
cmp $680, %rdx
|
|
jb 3f
|
|
/*
|
|
* movsq instruction is only good for aligned case.
|
|
*/
|
|
|
|
cmpb %dil, %sil
|
|
je 4f
|
|
3:
|
|
sub $0x20, %rdx
|
|
/*
|
|
* We gobble 32 bytes forward in each loop.
|
|
*/
|
|
5:
|
|
sub $0x20, %rdx
|
|
movq 0*8(%rsi), %r11
|
|
movq 1*8(%rsi), %r10
|
|
movq 2*8(%rsi), %r9
|
|
movq 3*8(%rsi), %r8
|
|
leaq 4*8(%rsi), %rsi
|
|
|
|
movq %r11, 0*8(%rdi)
|
|
movq %r10, 1*8(%rdi)
|
|
movq %r9, 2*8(%rdi)
|
|
movq %r8, 3*8(%rdi)
|
|
leaq 4*8(%rdi), %rdi
|
|
jae 5b
|
|
addq $0x20, %rdx
|
|
jmp 1f
|
|
/*
|
|
* Handle data forward by movsq.
|
|
*/
|
|
.p2align 4
|
|
4:
|
|
movq %rdx, %rcx
|
|
movq -8(%rsi, %rdx), %r11
|
|
lea -8(%rdi, %rdx), %r10
|
|
shrq $3, %rcx
|
|
rep movsq
|
|
movq %r11, (%r10)
|
|
jmp 13f
|
|
.Lmemmove_end_forward:
|
|
|
|
/*
|
|
* Handle data backward by movsq.
|
|
*/
|
|
.p2align 4
|
|
7:
|
|
movq %rdx, %rcx
|
|
movq (%rsi), %r11
|
|
movq %rdi, %r10
|
|
leaq -8(%rsi, %rdx), %rsi
|
|
leaq -8(%rdi, %rdx), %rdi
|
|
shrq $3, %rcx
|
|
std
|
|
rep movsq
|
|
cld
|
|
movq %r11, (%r10)
|
|
jmp 13f
|
|
|
|
/*
|
|
* Start to prepare for backward copy.
|
|
*/
|
|
.p2align 4
|
|
2:
|
|
cmp $680, %rdx
|
|
jb 6f
|
|
cmp %dil, %sil
|
|
je 7b
|
|
6:
|
|
/*
|
|
* Calculate copy position to tail.
|
|
*/
|
|
addq %rdx, %rsi
|
|
addq %rdx, %rdi
|
|
subq $0x20, %rdx
|
|
/*
|
|
* We gobble 32 bytes backward in each loop.
|
|
*/
|
|
8:
|
|
subq $0x20, %rdx
|
|
movq -1*8(%rsi), %r11
|
|
movq -2*8(%rsi), %r10
|
|
movq -3*8(%rsi), %r9
|
|
movq -4*8(%rsi), %r8
|
|
leaq -4*8(%rsi), %rsi
|
|
|
|
movq %r11, -1*8(%rdi)
|
|
movq %r10, -2*8(%rdi)
|
|
movq %r9, -3*8(%rdi)
|
|
movq %r8, -4*8(%rdi)
|
|
leaq -4*8(%rdi), %rdi
|
|
jae 8b
|
|
/*
|
|
* Calculate copy position to head.
|
|
*/
|
|
addq $0x20, %rdx
|
|
subq %rdx, %rsi
|
|
subq %rdx, %rdi
|
|
1:
|
|
cmpq $16, %rdx
|
|
jb 9f
|
|
/*
|
|
* Move data from 16 bytes to 31 bytes.
|
|
*/
|
|
movq 0*8(%rsi), %r11
|
|
movq 1*8(%rsi), %r10
|
|
movq -2*8(%rsi, %rdx), %r9
|
|
movq -1*8(%rsi, %rdx), %r8
|
|
movq %r11, 0*8(%rdi)
|
|
movq %r10, 1*8(%rdi)
|
|
movq %r9, -2*8(%rdi, %rdx)
|
|
movq %r8, -1*8(%rdi, %rdx)
|
|
jmp 13f
|
|
.p2align 4
|
|
9:
|
|
cmpq $8, %rdx
|
|
jb 10f
|
|
/*
|
|
* Move data from 8 bytes to 15 bytes.
|
|
*/
|
|
movq 0*8(%rsi), %r11
|
|
movq -1*8(%rsi, %rdx), %r10
|
|
movq %r11, 0*8(%rdi)
|
|
movq %r10, -1*8(%rdi, %rdx)
|
|
jmp 13f
|
|
10:
|
|
cmpq $4, %rdx
|
|
jb 11f
|
|
/*
|
|
* Move data from 4 bytes to 7 bytes.
|
|
*/
|
|
movl (%rsi), %r11d
|
|
movl -4(%rsi, %rdx), %r10d
|
|
movl %r11d, (%rdi)
|
|
movl %r10d, -4(%rdi, %rdx)
|
|
jmp 13f
|
|
11:
|
|
cmp $2, %rdx
|
|
jb 12f
|
|
/*
|
|
* Move data from 2 bytes to 3 bytes.
|
|
*/
|
|
movw (%rsi), %r11w
|
|
movw -2(%rsi, %rdx), %r10w
|
|
movw %r11w, (%rdi)
|
|
movw %r10w, -2(%rdi, %rdx)
|
|
jmp 13f
|
|
12:
|
|
cmp $1, %rdx
|
|
jb 13f
|
|
/*
|
|
* Move data for 1 byte.
|
|
*/
|
|
movb (%rsi), %r11b
|
|
movb %r11b, (%rdi)
|
|
13:
|
|
retq
|
|
CFI_ENDPROC
|
|
|
|
.section .altinstr_replacement,"ax"
|
|
.Lmemmove_begin_forward_efs:
|
|
/* Forward moving data. */
|
|
movq %rdx, %rcx
|
|
rep movsb
|
|
retq
|
|
.Lmemmove_end_forward_efs:
|
|
.previous
|
|
|
|
.section .altinstructions,"a"
|
|
altinstruction_entry .Lmemmove_begin_forward, \
|
|
.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
|
|
.Lmemmove_end_forward-.Lmemmove_begin_forward, \
|
|
.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs,0
|
|
.previous
|
|
ENDPROC(__memmove)
|
|
ENDPROC(memmove)
|