mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-26 19:05:23 +07:00
556f47ac60
Running "stress-ng --enosys 4 -t 20 -v" showed a large number of kernel oops
with "Unable to handle kernel paging request at virtual address" message. This
happens when enosys stressor starts testing random non-valid syscalls.
I forgot to redirect any syscall below -1 to sys_ni_syscall.
With the patch kernel oops messages are gone while running stress-ng enosys
stressor.
Signed-off-by: David Abdurachmanov <david.abdurachmanov@sifive.com>
Fixes: 5340627e3f
("riscv: add support for SECCOMP and SECCOMP_FILTER")
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
457 lines
11 KiB
ArmAsm
457 lines
11 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2012 Regents of the University of California
|
|
* Copyright (C) 2017 SiFive
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/linkage.h>
|
|
|
|
#include <asm/asm.h>
|
|
#include <asm/csr.h>
|
|
#include <asm/unistd.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/asm-offsets.h>
|
|
|
|
.text
|
|
.altmacro
|
|
|
|
/*
|
|
* Prepares to enter a system call or exception by saving all registers to the
|
|
* stack.
|
|
*/
|
|
.macro SAVE_ALL
|
|
LOCAL _restore_kernel_tpsp
|
|
LOCAL _save_context
|
|
|
|
/*
|
|
* If coming from userspace, preserve the user thread pointer and load
|
|
* the kernel thread pointer. If we came from the kernel, the scratch
|
|
* register will contain 0, and we should continue on the current TP.
|
|
*/
|
|
csrrw tp, CSR_SCRATCH, tp
|
|
bnez tp, _save_context
|
|
|
|
_restore_kernel_tpsp:
|
|
csrr tp, CSR_SCRATCH
|
|
REG_S sp, TASK_TI_KERNEL_SP(tp)
|
|
_save_context:
|
|
REG_S sp, TASK_TI_USER_SP(tp)
|
|
REG_L sp, TASK_TI_KERNEL_SP(tp)
|
|
addi sp, sp, -(PT_SIZE_ON_STACK)
|
|
REG_S x1, PT_RA(sp)
|
|
REG_S x3, PT_GP(sp)
|
|
REG_S x5, PT_T0(sp)
|
|
REG_S x6, PT_T1(sp)
|
|
REG_S x7, PT_T2(sp)
|
|
REG_S x8, PT_S0(sp)
|
|
REG_S x9, PT_S1(sp)
|
|
REG_S x10, PT_A0(sp)
|
|
REG_S x11, PT_A1(sp)
|
|
REG_S x12, PT_A2(sp)
|
|
REG_S x13, PT_A3(sp)
|
|
REG_S x14, PT_A4(sp)
|
|
REG_S x15, PT_A5(sp)
|
|
REG_S x16, PT_A6(sp)
|
|
REG_S x17, PT_A7(sp)
|
|
REG_S x18, PT_S2(sp)
|
|
REG_S x19, PT_S3(sp)
|
|
REG_S x20, PT_S4(sp)
|
|
REG_S x21, PT_S5(sp)
|
|
REG_S x22, PT_S6(sp)
|
|
REG_S x23, PT_S7(sp)
|
|
REG_S x24, PT_S8(sp)
|
|
REG_S x25, PT_S9(sp)
|
|
REG_S x26, PT_S10(sp)
|
|
REG_S x27, PT_S11(sp)
|
|
REG_S x28, PT_T3(sp)
|
|
REG_S x29, PT_T4(sp)
|
|
REG_S x30, PT_T5(sp)
|
|
REG_S x31, PT_T6(sp)
|
|
|
|
/*
|
|
* Disable user-mode memory access as it should only be set in the
|
|
* actual user copy routines.
|
|
*
|
|
* Disable the FPU to detect illegal usage of floating point in kernel
|
|
* space.
|
|
*/
|
|
li t0, SR_SUM | SR_FS
|
|
|
|
REG_L s0, TASK_TI_USER_SP(tp)
|
|
csrrc s1, CSR_STATUS, t0
|
|
csrr s2, CSR_EPC
|
|
csrr s3, CSR_TVAL
|
|
csrr s4, CSR_CAUSE
|
|
csrr s5, CSR_SCRATCH
|
|
REG_S s0, PT_SP(sp)
|
|
REG_S s1, PT_STATUS(sp)
|
|
REG_S s2, PT_EPC(sp)
|
|
REG_S s3, PT_BADADDR(sp)
|
|
REG_S s4, PT_CAUSE(sp)
|
|
REG_S s5, PT_TP(sp)
|
|
.endm
|
|
|
|
/*
|
|
* Prepares to return from a system call or exception by restoring all
|
|
* registers from the stack.
|
|
*/
|
|
.macro RESTORE_ALL
|
|
REG_L a0, PT_STATUS(sp)
|
|
/*
|
|
* The current load reservation is effectively part of the processor's
|
|
* state, in the sense that load reservations cannot be shared between
|
|
* different hart contexts. We can't actually save and restore a load
|
|
* reservation, so instead here we clear any existing reservation --
|
|
* it's always legal for implementations to clear load reservations at
|
|
* any point (as long as the forward progress guarantee is kept, but
|
|
* we'll ignore that here).
|
|
*
|
|
* Dangling load reservations can be the result of taking a trap in the
|
|
* middle of an LR/SC sequence, but can also be the result of a taken
|
|
* forward branch around an SC -- which is how we implement CAS. As a
|
|
* result we need to clear reservations between the last CAS and the
|
|
* jump back to the new context. While it is unlikely the store
|
|
* completes, implementations are allowed to expand reservations to be
|
|
* arbitrarily large.
|
|
*/
|
|
REG_L a2, PT_EPC(sp)
|
|
REG_SC x0, a2, PT_EPC(sp)
|
|
|
|
csrw CSR_STATUS, a0
|
|
csrw CSR_EPC, a2
|
|
|
|
REG_L x1, PT_RA(sp)
|
|
REG_L x3, PT_GP(sp)
|
|
REG_L x4, PT_TP(sp)
|
|
REG_L x5, PT_T0(sp)
|
|
REG_L x6, PT_T1(sp)
|
|
REG_L x7, PT_T2(sp)
|
|
REG_L x8, PT_S0(sp)
|
|
REG_L x9, PT_S1(sp)
|
|
REG_L x10, PT_A0(sp)
|
|
REG_L x11, PT_A1(sp)
|
|
REG_L x12, PT_A2(sp)
|
|
REG_L x13, PT_A3(sp)
|
|
REG_L x14, PT_A4(sp)
|
|
REG_L x15, PT_A5(sp)
|
|
REG_L x16, PT_A6(sp)
|
|
REG_L x17, PT_A7(sp)
|
|
REG_L x18, PT_S2(sp)
|
|
REG_L x19, PT_S3(sp)
|
|
REG_L x20, PT_S4(sp)
|
|
REG_L x21, PT_S5(sp)
|
|
REG_L x22, PT_S6(sp)
|
|
REG_L x23, PT_S7(sp)
|
|
REG_L x24, PT_S8(sp)
|
|
REG_L x25, PT_S9(sp)
|
|
REG_L x26, PT_S10(sp)
|
|
REG_L x27, PT_S11(sp)
|
|
REG_L x28, PT_T3(sp)
|
|
REG_L x29, PT_T4(sp)
|
|
REG_L x30, PT_T5(sp)
|
|
REG_L x31, PT_T6(sp)
|
|
|
|
REG_L x2, PT_SP(sp)
|
|
.endm
|
|
|
|
#if !IS_ENABLED(CONFIG_PREEMPT)
|
|
.set resume_kernel, restore_all
|
|
#endif
|
|
|
|
ENTRY(handle_exception)
|
|
SAVE_ALL
|
|
|
|
/*
|
|
* Set the scratch register to 0, so that if a recursive exception
|
|
* occurs, the exception vector knows it came from the kernel
|
|
*/
|
|
csrw CSR_SCRATCH, x0
|
|
|
|
/* Load the global pointer */
|
|
.option push
|
|
.option norelax
|
|
la gp, __global_pointer$
|
|
.option pop
|
|
|
|
la ra, ret_from_exception
|
|
/*
|
|
* MSB of cause differentiates between
|
|
* interrupts and exceptions
|
|
*/
|
|
bge s4, zero, 1f
|
|
|
|
/* Handle interrupts */
|
|
move a0, sp /* pt_regs */
|
|
tail do_IRQ
|
|
1:
|
|
/*
|
|
* Exceptions run with interrupts enabled or disabled depending on the
|
|
* state of SR_PIE in m/sstatus.
|
|
*/
|
|
andi t0, s1, SR_PIE
|
|
beqz t0, 1f
|
|
csrs CSR_STATUS, SR_IE
|
|
|
|
1:
|
|
/* Handle syscalls */
|
|
li t0, EXC_SYSCALL
|
|
beq s4, t0, handle_syscall
|
|
|
|
/* Handle other exceptions */
|
|
slli t0, s4, RISCV_LGPTR
|
|
la t1, excp_vect_table
|
|
la t2, excp_vect_table_end
|
|
move a0, sp /* pt_regs */
|
|
add t0, t1, t0
|
|
/* Check if exception code lies within bounds */
|
|
bgeu t0, t2, 1f
|
|
REG_L t0, 0(t0)
|
|
jr t0
|
|
1:
|
|
tail do_trap_unknown
|
|
|
|
handle_syscall:
|
|
/* save the initial A0 value (needed in signal handlers) */
|
|
REG_S a0, PT_ORIG_A0(sp)
|
|
/*
|
|
* Advance SEPC to avoid executing the original
|
|
* scall instruction on sret
|
|
*/
|
|
addi s2, s2, 0x4
|
|
REG_S s2, PT_EPC(sp)
|
|
/* Trace syscalls, but only if requested by the user. */
|
|
REG_L t0, TASK_TI_FLAGS(tp)
|
|
andi t0, t0, _TIF_SYSCALL_WORK
|
|
bnez t0, handle_syscall_trace_enter
|
|
check_syscall_nr:
|
|
/* Check to make sure we don't jump to a bogus syscall number. */
|
|
li t0, __NR_syscalls
|
|
la s0, sys_ni_syscall
|
|
/*
|
|
* The tracer can change syscall number to valid/invalid value.
|
|
* We use syscall_set_nr helper in syscall_trace_enter thus we
|
|
* cannot trust the current value in a7 and have to reload from
|
|
* the current task pt_regs.
|
|
*/
|
|
REG_L a7, PT_A7(sp)
|
|
/*
|
|
* Syscall number held in a7.
|
|
* If syscall number is above allowed value, redirect to ni_syscall.
|
|
*/
|
|
bge a7, t0, 1f
|
|
/*
|
|
* Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
|
|
* If yes, we pretend it was executed.
|
|
*/
|
|
li t1, -1
|
|
beq a7, t1, ret_from_syscall_rejected
|
|
blt a7, t1, 1f
|
|
/* Call syscall */
|
|
la s0, sys_call_table
|
|
slli t0, a7, RISCV_LGPTR
|
|
add s0, s0, t0
|
|
REG_L s0, 0(s0)
|
|
1:
|
|
jalr s0
|
|
|
|
ret_from_syscall:
|
|
/* Set user a0 to kernel a0 */
|
|
REG_S a0, PT_A0(sp)
|
|
/*
|
|
* We didn't execute the actual syscall.
|
|
* Seccomp already set return value for the current task pt_regs.
|
|
* (If it was configured with SECCOMP_RET_ERRNO/TRACE)
|
|
*/
|
|
ret_from_syscall_rejected:
|
|
/* Trace syscalls, but only if requested by the user. */
|
|
REG_L t0, TASK_TI_FLAGS(tp)
|
|
andi t0, t0, _TIF_SYSCALL_WORK
|
|
bnez t0, handle_syscall_trace_exit
|
|
|
|
ret_from_exception:
|
|
REG_L s0, PT_STATUS(sp)
|
|
csrc CSR_STATUS, SR_IE
|
|
#ifdef CONFIG_RISCV_M_MODE
|
|
/* the MPP value is too large to be used as an immediate arg for addi */
|
|
li t0, SR_MPP
|
|
and s0, s0, t0
|
|
#else
|
|
andi s0, s0, SR_SPP
|
|
#endif
|
|
bnez s0, resume_kernel
|
|
|
|
resume_userspace:
|
|
/* Interrupts must be disabled here so flags are checked atomically */
|
|
REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */
|
|
andi s1, s0, _TIF_WORK_MASK
|
|
bnez s1, work_pending
|
|
|
|
/* Save unwound kernel stack pointer in thread_info */
|
|
addi s0, sp, PT_SIZE_ON_STACK
|
|
REG_S s0, TASK_TI_KERNEL_SP(tp)
|
|
|
|
/*
|
|
* Save TP into the scratch register , so we can find the kernel data
|
|
* structures again.
|
|
*/
|
|
csrw CSR_SCRATCH, tp
|
|
|
|
restore_all:
|
|
RESTORE_ALL
|
|
#ifdef CONFIG_RISCV_M_MODE
|
|
mret
|
|
#else
|
|
sret
|
|
#endif
|
|
|
|
#if IS_ENABLED(CONFIG_PREEMPT)
|
|
resume_kernel:
|
|
REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
|
|
bnez s0, restore_all
|
|
REG_L s0, TASK_TI_FLAGS(tp)
|
|
andi s0, s0, _TIF_NEED_RESCHED
|
|
beqz s0, restore_all
|
|
call preempt_schedule_irq
|
|
j restore_all
|
|
#endif
|
|
|
|
work_pending:
|
|
/* Enter slow path for supplementary processing */
|
|
la ra, ret_from_exception
|
|
andi s1, s0, _TIF_NEED_RESCHED
|
|
bnez s1, work_resched
|
|
work_notifysig:
|
|
/* Handle pending signals and notify-resume requests */
|
|
csrs CSR_STATUS, SR_IE /* Enable interrupts for do_notify_resume() */
|
|
move a0, sp /* pt_regs */
|
|
move a1, s0 /* current_thread_info->flags */
|
|
tail do_notify_resume
|
|
work_resched:
|
|
tail schedule
|
|
|
|
/* Slow paths for ptrace. */
|
|
handle_syscall_trace_enter:
|
|
move a0, sp
|
|
call do_syscall_trace_enter
|
|
REG_L a0, PT_A0(sp)
|
|
REG_L a1, PT_A1(sp)
|
|
REG_L a2, PT_A2(sp)
|
|
REG_L a3, PT_A3(sp)
|
|
REG_L a4, PT_A4(sp)
|
|
REG_L a5, PT_A5(sp)
|
|
REG_L a6, PT_A6(sp)
|
|
REG_L a7, PT_A7(sp)
|
|
j check_syscall_nr
|
|
handle_syscall_trace_exit:
|
|
move a0, sp
|
|
call do_syscall_trace_exit
|
|
j ret_from_exception
|
|
|
|
END(handle_exception)
|
|
|
|
ENTRY(ret_from_fork)
|
|
la ra, ret_from_exception
|
|
tail schedule_tail
|
|
ENDPROC(ret_from_fork)
|
|
|
|
ENTRY(ret_from_kernel_thread)
|
|
call schedule_tail
|
|
/* Call fn(arg) */
|
|
la ra, ret_from_exception
|
|
move a0, s1
|
|
jr s0
|
|
ENDPROC(ret_from_kernel_thread)
|
|
|
|
|
|
/*
|
|
* Integer register context switch
|
|
* The callee-saved registers must be saved and restored.
|
|
*
|
|
* a0: previous task_struct (must be preserved across the switch)
|
|
* a1: next task_struct
|
|
*
|
|
* The value of a0 and a1 must be preserved by this function, as that's how
|
|
* arguments are passed to schedule_tail.
|
|
*/
|
|
ENTRY(__switch_to)
|
|
/* Save context into prev->thread */
|
|
li a4, TASK_THREAD_RA
|
|
add a3, a0, a4
|
|
add a4, a1, a4
|
|
REG_S ra, TASK_THREAD_RA_RA(a3)
|
|
REG_S sp, TASK_THREAD_SP_RA(a3)
|
|
REG_S s0, TASK_THREAD_S0_RA(a3)
|
|
REG_S s1, TASK_THREAD_S1_RA(a3)
|
|
REG_S s2, TASK_THREAD_S2_RA(a3)
|
|
REG_S s3, TASK_THREAD_S3_RA(a3)
|
|
REG_S s4, TASK_THREAD_S4_RA(a3)
|
|
REG_S s5, TASK_THREAD_S5_RA(a3)
|
|
REG_S s6, TASK_THREAD_S6_RA(a3)
|
|
REG_S s7, TASK_THREAD_S7_RA(a3)
|
|
REG_S s8, TASK_THREAD_S8_RA(a3)
|
|
REG_S s9, TASK_THREAD_S9_RA(a3)
|
|
REG_S s10, TASK_THREAD_S10_RA(a3)
|
|
REG_S s11, TASK_THREAD_S11_RA(a3)
|
|
/* Restore context from next->thread */
|
|
REG_L ra, TASK_THREAD_RA_RA(a4)
|
|
REG_L sp, TASK_THREAD_SP_RA(a4)
|
|
REG_L s0, TASK_THREAD_S0_RA(a4)
|
|
REG_L s1, TASK_THREAD_S1_RA(a4)
|
|
REG_L s2, TASK_THREAD_S2_RA(a4)
|
|
REG_L s3, TASK_THREAD_S3_RA(a4)
|
|
REG_L s4, TASK_THREAD_S4_RA(a4)
|
|
REG_L s5, TASK_THREAD_S5_RA(a4)
|
|
REG_L s6, TASK_THREAD_S6_RA(a4)
|
|
REG_L s7, TASK_THREAD_S7_RA(a4)
|
|
REG_L s8, TASK_THREAD_S8_RA(a4)
|
|
REG_L s9, TASK_THREAD_S9_RA(a4)
|
|
REG_L s10, TASK_THREAD_S10_RA(a4)
|
|
REG_L s11, TASK_THREAD_S11_RA(a4)
|
|
/* Swap the CPU entry around. */
|
|
lw a3, TASK_TI_CPU(a0)
|
|
lw a4, TASK_TI_CPU(a1)
|
|
sw a3, TASK_TI_CPU(a1)
|
|
sw a4, TASK_TI_CPU(a0)
|
|
#if TASK_TI != 0
|
|
#error "TASK_TI != 0: tp will contain a 'struct thread_info', not a 'struct task_struct' so get_current() won't work."
|
|
addi tp, a1, TASK_TI
|
|
#else
|
|
move tp, a1
|
|
#endif
|
|
ret
|
|
ENDPROC(__switch_to)
|
|
|
|
#ifndef CONFIG_MMU
|
|
#define do_page_fault do_trap_unknown
|
|
#endif
|
|
|
|
.section ".rodata"
|
|
/* Exception vector table */
|
|
ENTRY(excp_vect_table)
|
|
RISCV_PTR do_trap_insn_misaligned
|
|
RISCV_PTR do_trap_insn_fault
|
|
RISCV_PTR do_trap_insn_illegal
|
|
RISCV_PTR do_trap_break
|
|
RISCV_PTR do_trap_load_misaligned
|
|
RISCV_PTR do_trap_load_fault
|
|
RISCV_PTR do_trap_store_misaligned
|
|
RISCV_PTR do_trap_store_fault
|
|
RISCV_PTR do_trap_ecall_u /* system call, gets intercepted */
|
|
RISCV_PTR do_trap_ecall_s
|
|
RISCV_PTR do_trap_unknown
|
|
RISCV_PTR do_trap_ecall_m
|
|
RISCV_PTR do_page_fault /* instruction page fault */
|
|
RISCV_PTR do_page_fault /* load page fault */
|
|
RISCV_PTR do_trap_unknown
|
|
RISCV_PTR do_page_fault /* store page fault */
|
|
excp_vect_table_end:
|
|
END(excp_vect_table)
|
|
|
|
#ifndef CONFIG_MMU
|
|
ENTRY(__user_rt_sigreturn)
|
|
li a7, __NR_rt_sigreturn
|
|
scall
|
|
END(__user_rt_sigreturn)
|
|
#endif
|