mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-02 21:36:41 +07:00
4332195c56
Up until now we have always paid attention to make sure the length of the new instruction replacing the old one is at least less or equal to the length of the old instruction. If the new instruction is longer, at the time it replaces the old instruction it will overwrite the beginning of the next instruction in the kernel image and cause your pants to catch fire. So instead of having to pay attention, teach the alternatives framework to pad shorter old instructions with NOPs at buildtime - but only in the case when len(old instruction(s)) < len(new instruction(s)) and add nothing in the >= case. (In that case we do add_nops() when patching). This way the alternatives user shouldn't have to care about instruction sizes and simply use the macros. Add asm ALTERNATIVE* flavor macros too, while at it. Also, we need to save the pad length in a separate struct alt_instr member for NOP optimization and the way to do that reliably is to carry the pad length instead of trying to detect whether we're looking at single-byte NOPs or at pathological instruction offsets like e9 90 90 90 90, for example, which is a valid instruction. Thanks to Michael Matz for the great help with toolchain questions. Signed-off-by: Borislav Petkov <bp@suse.de>
290 lines
6.1 KiB
ArmAsm
290 lines
6.1 KiB
ArmAsm
/*
|
|
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
|
|
* Copyright 2002 Andi Kleen, SuSE Labs.
|
|
* Subject to the GNU Public License v2.
|
|
*
|
|
* Functions to copy from and to user space.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/dwarf2.h>
|
|
#include <asm/current.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/alternative-asm.h>
|
|
#include <asm/asm.h>
|
|
#include <asm/smap.h>
|
|
|
|
/*
|
|
* By placing feature2 after feature1 in altinstructions section, we logically
|
|
* implement:
|
|
* If CPU has feature2, jmp to alt2 is used
|
|
* else if CPU has feature1, jmp to alt1 is used
|
|
* else jmp to orig is used.
|
|
*/
|
|
.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
|
|
0:
|
|
.byte 0xe9 /* 32bit jump */
|
|
.long \orig-1f /* by default jump to orig */
|
|
1:
|
|
.section .altinstr_replacement,"ax"
|
|
2: .byte 0xe9 /* near jump with 32bit immediate */
|
|
.long \alt1-1b /* offset */ /* or alternatively to alt1 */
|
|
3: .byte 0xe9 /* near jump with 32bit immediate */
|
|
.long \alt2-1b /* offset */ /* or alternatively to alt2 */
|
|
.previous
|
|
|
|
.section .altinstructions,"a"
|
|
altinstruction_entry 0b,2b,\feature1,5,5,0
|
|
altinstruction_entry 0b,3b,\feature2,5,5,0
|
|
.previous
|
|
.endm
|
|
|
|
.macro ALIGN_DESTINATION
|
|
/* check for bad alignment of destination */
|
|
movl %edi,%ecx
|
|
andl $7,%ecx
|
|
jz 102f /* already aligned */
|
|
subl $8,%ecx
|
|
negl %ecx
|
|
subl %ecx,%edx
|
|
100: movb (%rsi),%al
|
|
101: movb %al,(%rdi)
|
|
incq %rsi
|
|
incq %rdi
|
|
decl %ecx
|
|
jnz 100b
|
|
102:
|
|
.section .fixup,"ax"
|
|
103: addl %ecx,%edx /* ecx is zerorest also */
|
|
jmp copy_user_handle_tail
|
|
.previous
|
|
|
|
_ASM_EXTABLE(100b,103b)
|
|
_ASM_EXTABLE(101b,103b)
|
|
.endm
|
|
|
|
/* Standard copy_to_user with segment limit checking */
|
|
ENTRY(_copy_to_user)
|
|
CFI_STARTPROC
|
|
GET_THREAD_INFO(%rax)
|
|
movq %rdi,%rcx
|
|
addq %rdx,%rcx
|
|
jc bad_to_user
|
|
cmpq TI_addr_limit(%rax),%rcx
|
|
ja bad_to_user
|
|
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
|
|
copy_user_generic_unrolled,copy_user_generic_string, \
|
|
copy_user_enhanced_fast_string
|
|
CFI_ENDPROC
|
|
ENDPROC(_copy_to_user)
|
|
|
|
/* Standard copy_from_user with segment limit checking */
|
|
ENTRY(_copy_from_user)
|
|
CFI_STARTPROC
|
|
GET_THREAD_INFO(%rax)
|
|
movq %rsi,%rcx
|
|
addq %rdx,%rcx
|
|
jc bad_from_user
|
|
cmpq TI_addr_limit(%rax),%rcx
|
|
ja bad_from_user
|
|
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
|
|
copy_user_generic_unrolled,copy_user_generic_string, \
|
|
copy_user_enhanced_fast_string
|
|
CFI_ENDPROC
|
|
ENDPROC(_copy_from_user)
|
|
|
|
.section .fixup,"ax"
|
|
/* must zero dest */
|
|
ENTRY(bad_from_user)
|
|
bad_from_user:
|
|
CFI_STARTPROC
|
|
movl %edx,%ecx
|
|
xorl %eax,%eax
|
|
rep
|
|
stosb
|
|
bad_to_user:
|
|
movl %edx,%eax
|
|
ret
|
|
CFI_ENDPROC
|
|
ENDPROC(bad_from_user)
|
|
.previous
|
|
|
|
/*
|
|
* copy_user_generic_unrolled - memory copy with exception handling.
|
|
* This version is for CPUs like P4 that don't have efficient micro
|
|
* code for rep movsq
|
|
*
|
|
* Input:
|
|
* rdi destination
|
|
* rsi source
|
|
* rdx count
|
|
*
|
|
* Output:
|
|
* eax uncopied bytes or 0 if successful.
|
|
*/
|
|
ENTRY(copy_user_generic_unrolled)
|
|
CFI_STARTPROC
|
|
ASM_STAC
|
|
cmpl $8,%edx
|
|
jb 20f /* less then 8 bytes, go to byte copy loop */
|
|
ALIGN_DESTINATION
|
|
movl %edx,%ecx
|
|
andl $63,%edx
|
|
shrl $6,%ecx
|
|
jz 17f
|
|
1: movq (%rsi),%r8
|
|
2: movq 1*8(%rsi),%r9
|
|
3: movq 2*8(%rsi),%r10
|
|
4: movq 3*8(%rsi),%r11
|
|
5: movq %r8,(%rdi)
|
|
6: movq %r9,1*8(%rdi)
|
|
7: movq %r10,2*8(%rdi)
|
|
8: movq %r11,3*8(%rdi)
|
|
9: movq 4*8(%rsi),%r8
|
|
10: movq 5*8(%rsi),%r9
|
|
11: movq 6*8(%rsi),%r10
|
|
12: movq 7*8(%rsi),%r11
|
|
13: movq %r8,4*8(%rdi)
|
|
14: movq %r9,5*8(%rdi)
|
|
15: movq %r10,6*8(%rdi)
|
|
16: movq %r11,7*8(%rdi)
|
|
leaq 64(%rsi),%rsi
|
|
leaq 64(%rdi),%rdi
|
|
decl %ecx
|
|
jnz 1b
|
|
17: movl %edx,%ecx
|
|
andl $7,%edx
|
|
shrl $3,%ecx
|
|
jz 20f
|
|
18: movq (%rsi),%r8
|
|
19: movq %r8,(%rdi)
|
|
leaq 8(%rsi),%rsi
|
|
leaq 8(%rdi),%rdi
|
|
decl %ecx
|
|
jnz 18b
|
|
20: andl %edx,%edx
|
|
jz 23f
|
|
movl %edx,%ecx
|
|
21: movb (%rsi),%al
|
|
22: movb %al,(%rdi)
|
|
incq %rsi
|
|
incq %rdi
|
|
decl %ecx
|
|
jnz 21b
|
|
23: xor %eax,%eax
|
|
ASM_CLAC
|
|
ret
|
|
|
|
.section .fixup,"ax"
|
|
30: shll $6,%ecx
|
|
addl %ecx,%edx
|
|
jmp 60f
|
|
40: leal (%rdx,%rcx,8),%edx
|
|
jmp 60f
|
|
50: movl %ecx,%edx
|
|
60: jmp copy_user_handle_tail /* ecx is zerorest also */
|
|
.previous
|
|
|
|
_ASM_EXTABLE(1b,30b)
|
|
_ASM_EXTABLE(2b,30b)
|
|
_ASM_EXTABLE(3b,30b)
|
|
_ASM_EXTABLE(4b,30b)
|
|
_ASM_EXTABLE(5b,30b)
|
|
_ASM_EXTABLE(6b,30b)
|
|
_ASM_EXTABLE(7b,30b)
|
|
_ASM_EXTABLE(8b,30b)
|
|
_ASM_EXTABLE(9b,30b)
|
|
_ASM_EXTABLE(10b,30b)
|
|
_ASM_EXTABLE(11b,30b)
|
|
_ASM_EXTABLE(12b,30b)
|
|
_ASM_EXTABLE(13b,30b)
|
|
_ASM_EXTABLE(14b,30b)
|
|
_ASM_EXTABLE(15b,30b)
|
|
_ASM_EXTABLE(16b,30b)
|
|
_ASM_EXTABLE(18b,40b)
|
|
_ASM_EXTABLE(19b,40b)
|
|
_ASM_EXTABLE(21b,50b)
|
|
_ASM_EXTABLE(22b,50b)
|
|
CFI_ENDPROC
|
|
ENDPROC(copy_user_generic_unrolled)
|
|
|
|
/* Some CPUs run faster using the string copy instructions.
|
|
* This is also a lot simpler. Use them when possible.
|
|
*
|
|
* Only 4GB of copy is supported. This shouldn't be a problem
|
|
* because the kernel normally only writes from/to page sized chunks
|
|
* even if user space passed a longer buffer.
|
|
* And more would be dangerous because both Intel and AMD have
|
|
* errata with rep movsq > 4GB. If someone feels the need to fix
|
|
* this please consider this.
|
|
*
|
|
* Input:
|
|
* rdi destination
|
|
* rsi source
|
|
* rdx count
|
|
*
|
|
* Output:
|
|
* eax uncopied bytes or 0 if successful.
|
|
*/
|
|
ENTRY(copy_user_generic_string)
|
|
CFI_STARTPROC
|
|
ASM_STAC
|
|
cmpl $8,%edx
|
|
jb 2f /* less than 8 bytes, go to byte copy loop */
|
|
ALIGN_DESTINATION
|
|
movl %edx,%ecx
|
|
shrl $3,%ecx
|
|
andl $7,%edx
|
|
1: rep
|
|
movsq
|
|
2: movl %edx,%ecx
|
|
3: rep
|
|
movsb
|
|
xorl %eax,%eax
|
|
ASM_CLAC
|
|
ret
|
|
|
|
.section .fixup,"ax"
|
|
11: leal (%rdx,%rcx,8),%ecx
|
|
12: movl %ecx,%edx /* ecx is zerorest also */
|
|
jmp copy_user_handle_tail
|
|
.previous
|
|
|
|
_ASM_EXTABLE(1b,11b)
|
|
_ASM_EXTABLE(3b,12b)
|
|
CFI_ENDPROC
|
|
ENDPROC(copy_user_generic_string)
|
|
|
|
/*
|
|
* Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
|
|
* It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
|
|
*
|
|
* Input:
|
|
* rdi destination
|
|
* rsi source
|
|
* rdx count
|
|
*
|
|
* Output:
|
|
* eax uncopied bytes or 0 if successful.
|
|
*/
|
|
ENTRY(copy_user_enhanced_fast_string)
|
|
CFI_STARTPROC
|
|
ASM_STAC
|
|
movl %edx,%ecx
|
|
1: rep
|
|
movsb
|
|
xorl %eax,%eax
|
|
ASM_CLAC
|
|
ret
|
|
|
|
.section .fixup,"ax"
|
|
12: movl %ecx,%edx /* ecx is zerorest also */
|
|
jmp copy_user_handle_tail
|
|
.previous
|
|
|
|
_ASM_EXTABLE(1b,12b)
|
|
CFI_ENDPROC
|
|
ENDPROC(copy_user_enhanced_fast_string)
|