mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-23 14:05:25 +07:00
e33f8d3267
Disable the generic address limit check in favor of an architecture specific optimized implementation. The generic implementation using pending work flags did not work well with ARM and alignment faults. The address limit is checked on each syscall return path to user-mode path as well as the irq user-mode return function. If the address limit was changed, a function is called to report data corruption (stopping the kernel or process based on configuration). The address limit check has to be done before any pending work because they can reset the address limit and the process is killed using a SIGKILL signal. For example the lkdtm address limit check does not work because the signal to kill the process will reset the user-mode address limit. Signed-off-by: Thomas Garnier <thgarnie@google.com> Signed-off-by: Kees Cook <keescook@chromium.org> Tested-by: Kees Cook <keescook@chromium.org> Tested-by: Leonard Crestez <leonard.crestez@nxp.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Pratyush Anand <panand@redhat.com> Cc: Dave Martin <Dave.Martin@arm.com> Cc: Will Drewry <wad@chromium.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Andy Lutomirski <luto@amacapital.net> Cc: David Howells <dhowells@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: linux-api@vger.kernel.org Cc: Yonghong Song <yhs@fb.com> Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1504798247-48833-4-git-send-email-keescook@chromium.org
462 lines
11 KiB
ArmAsm
462 lines
11 KiB
ArmAsm
/*
|
|
* linux/arch/arm/kernel/entry-common.S
|
|
*
|
|
* Copyright (C) 2000 Russell King
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <asm/assembler.h>
|
|
#include <asm/unistd.h>
|
|
#include <asm/ftrace.h>
|
|
#include <asm/unwind.h>
|
|
#include <asm/memory.h>
|
|
#ifdef CONFIG_AEABI
|
|
#include <asm/unistd-oabi.h>
|
|
#endif
|
|
|
|
.equ NR_syscalls, __NR_syscalls
|
|
|
|
#ifdef CONFIG_NEED_RET_TO_USER
|
|
#include <mach/entry-macro.S>
|
|
#else
|
|
.macro arch_ret_to_user, tmp1, tmp2
|
|
.endm
|
|
#endif
|
|
|
|
#include "entry-header.S"
|
|
|
|
saved_psr .req r8
|
|
#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING)
|
|
saved_pc .req r9
|
|
#define TRACE(x...) x
|
|
#else
|
|
saved_pc .req lr
|
|
#define TRACE(x...)
|
|
#endif
|
|
|
|
.align 5
|
|
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
|
|
/*
|
|
* This is the fast syscall return path. We do as little as possible here,
|
|
* such as avoiding writing r0 to the stack. We only use this path if we
|
|
* have tracing and context tracking disabled - the overheads from those
|
|
* features make this path too inefficient.
|
|
*/
|
|
ret_fast_syscall:
|
|
UNWIND(.fnstart )
|
|
UNWIND(.cantunwind )
|
|
disable_irq_notrace @ disable interrupts
|
|
ldr r2, [tsk, #TI_ADDR_LIMIT]
|
|
cmp r2, #TASK_SIZE
|
|
blne addr_limit_check_failed
|
|
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
|
|
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
|
|
bne fast_work_pending
|
|
|
|
|
|
/* perform architecture specific actions before user return */
|
|
arch_ret_to_user r1, lr
|
|
|
|
restore_user_regs fast = 1, offset = S_OFF
|
|
UNWIND(.fnend )
|
|
ENDPROC(ret_fast_syscall)
|
|
|
|
/* Ok, we need to do extra processing, enter the slow path. */
|
|
fast_work_pending:
|
|
str r0, [sp, #S_R0+S_OFF]! @ returned r0
|
|
/* fall through to work_pending */
|
|
#else
|
|
/*
|
|
* The "replacement" ret_fast_syscall for when tracing or context tracking
|
|
* is enabled. As we will need to call out to some C functions, we save
|
|
* r0 first to avoid needing to save registers around each C function call.
|
|
*/
|
|
ret_fast_syscall:
|
|
UNWIND(.fnstart )
|
|
UNWIND(.cantunwind )
|
|
str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
|
|
disable_irq_notrace @ disable interrupts
|
|
ldr r2, [tsk, #TI_ADDR_LIMIT]
|
|
cmp r2, #TASK_SIZE
|
|
blne addr_limit_check_failed
|
|
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
|
|
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
|
|
beq no_work_pending
|
|
UNWIND(.fnend )
|
|
ENDPROC(ret_fast_syscall)
|
|
|
|
/* Slower path - fall through to work_pending */
|
|
#endif
|
|
|
|
tst r1, #_TIF_SYSCALL_WORK
|
|
bne __sys_trace_return_nosave
|
|
slow_work_pending:
|
|
mov r0, sp @ 'regs'
|
|
mov r2, why @ 'syscall'
|
|
bl do_work_pending
|
|
cmp r0, #0
|
|
beq no_work_pending
|
|
movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
|
|
ldmia sp, {r0 - r6} @ have to reload r0 - r6
|
|
b local_restart @ ... and off we go
|
|
ENDPROC(ret_fast_syscall)
|
|
|
|
/*
|
|
* "slow" syscall return path. "why" tells us if this was a real syscall.
|
|
* IRQs may be enabled here, so always disable them. Note that we use the
|
|
* "notrace" version to avoid calling into the tracing code unnecessarily.
|
|
* do_work_pending() will update this state if necessary.
|
|
*/
|
|
ENTRY(ret_to_user)
|
|
ret_slow_syscall:
|
|
disable_irq_notrace @ disable interrupts
|
|
ENTRY(ret_to_user_from_irq)
|
|
ldr r2, [tsk, #TI_ADDR_LIMIT]
|
|
cmp r2, #TASK_SIZE
|
|
blne addr_limit_check_failed
|
|
ldr r1, [tsk, #TI_FLAGS]
|
|
tst r1, #_TIF_WORK_MASK
|
|
bne slow_work_pending
|
|
no_work_pending:
|
|
asm_trace_hardirqs_on save = 0
|
|
|
|
/* perform architecture specific actions before user return */
|
|
arch_ret_to_user r1, lr
|
|
ct_user_enter save = 0
|
|
|
|
restore_user_regs fast = 0, offset = 0
|
|
ENDPROC(ret_to_user_from_irq)
|
|
ENDPROC(ret_to_user)
|
|
|
|
/*
|
|
* This is how we return from a fork.
|
|
*/
|
|
ENTRY(ret_from_fork)
|
|
bl schedule_tail
|
|
cmp r5, #0
|
|
movne r0, r4
|
|
badrne lr, 1f
|
|
retne r5
|
|
1: get_thread_info tsk
|
|
b ret_slow_syscall
|
|
ENDPROC(ret_from_fork)
|
|
|
|
/*=============================================================================
|
|
* SWI handler
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
|
|
.align 5
|
|
ENTRY(vector_swi)
|
|
#ifdef CONFIG_CPU_V7M
|
|
v7m_exception_entry
|
|
#else
|
|
sub sp, sp, #PT_REGS_SIZE
|
|
stmia sp, {r0 - r12} @ Calling r0 - r12
|
|
ARM( add r8, sp, #S_PC )
|
|
ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr
|
|
THUMB( mov r8, sp )
|
|
THUMB( store_user_sp_lr r8, r10, S_SP ) @ calling sp, lr
|
|
mrs saved_psr, spsr @ called from non-FIQ mode, so ok.
|
|
TRACE( mov saved_pc, lr )
|
|
str saved_pc, [sp, #S_PC] @ Save calling PC
|
|
str saved_psr, [sp, #S_PSR] @ Save CPSR
|
|
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
|
|
#endif
|
|
zero_fp
|
|
alignment_trap r10, ip, __cr_alignment
|
|
asm_trace_hardirqs_on save=0
|
|
enable_irq_notrace
|
|
ct_user_exit save=0
|
|
|
|
/*
|
|
* Get the system call number.
|
|
*/
|
|
|
|
#if defined(CONFIG_OABI_COMPAT)
|
|
|
|
/*
|
|
* If we have CONFIG_OABI_COMPAT then we need to look at the swi
|
|
* value to determine if it is an EABI or an old ABI call.
|
|
*/
|
|
#ifdef CONFIG_ARM_THUMB
|
|
tst saved_psr, #PSR_T_BIT
|
|
movne r10, #0 @ no thumb OABI emulation
|
|
USER( ldreq r10, [saved_pc, #-4] ) @ get SWI instruction
|
|
#else
|
|
USER( ldr r10, [saved_pc, #-4] ) @ get SWI instruction
|
|
#endif
|
|
ARM_BE8(rev r10, r10) @ little endian instruction
|
|
|
|
#elif defined(CONFIG_AEABI)
|
|
|
|
/*
|
|
* Pure EABI user space always put syscall number into scno (r7).
|
|
*/
|
|
#elif defined(CONFIG_ARM_THUMB)
|
|
/* Legacy ABI only, possibly thumb mode. */
|
|
tst saved_psr, #PSR_T_BIT @ this is SPSR from save_user_regs
|
|
addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in
|
|
USER( ldreq scno, [saved_pc, #-4] )
|
|
|
|
#else
|
|
/* Legacy ABI only. */
|
|
USER( ldr scno, [saved_pc, #-4] ) @ get SWI instruction
|
|
#endif
|
|
|
|
/* saved_psr and saved_pc are now dead */
|
|
|
|
uaccess_disable tbl
|
|
|
|
adr tbl, sys_call_table @ load syscall table pointer
|
|
|
|
#if defined(CONFIG_OABI_COMPAT)
|
|
/*
|
|
* If the swi argument is zero, this is an EABI call and we do nothing.
|
|
*
|
|
* If this is an old ABI call, get the syscall number into scno and
|
|
* get the old ABI syscall table address.
|
|
*/
|
|
bics r10, r10, #0xff000000
|
|
eorne scno, r10, #__NR_OABI_SYSCALL_BASE
|
|
ldrne tbl, =sys_oabi_call_table
|
|
#elif !defined(CONFIG_AEABI)
|
|
bic scno, scno, #0xff000000 @ mask off SWI op-code
|
|
eor scno, scno, #__NR_SYSCALL_BASE @ check OS number
|
|
#endif
|
|
get_thread_info tsk
|
|
/*
|
|
* Reload the registers that may have been corrupted on entry to
|
|
* the syscall assembly (by tracing or context tracking.)
|
|
*/
|
|
TRACE( ldmia sp, {r0 - r3} )
|
|
|
|
local_restart:
|
|
ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing
|
|
stmdb sp!, {r4, r5} @ push fifth and sixth args
|
|
|
|
tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
|
|
bne __sys_trace
|
|
|
|
cmp scno, #NR_syscalls @ check upper syscall limit
|
|
badr lr, ret_fast_syscall @ return address
|
|
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
|
|
|
|
add r1, sp, #S_OFF
|
|
2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
|
|
eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back
|
|
bcs arm_syscall
|
|
mov why, #0 @ no longer a real syscall
|
|
b sys_ni_syscall @ not private func
|
|
|
|
#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
|
|
/*
|
|
* We failed to handle a fault trying to access the page
|
|
* containing the swi instruction, but we're not really in a
|
|
* position to return -EFAULT. Instead, return back to the
|
|
* instruction and re-enter the user fault handling path trying
|
|
* to page it in. This will likely result in sending SEGV to the
|
|
* current task.
|
|
*/
|
|
9001:
|
|
sub lr, saved_pc, #4
|
|
str lr, [sp, #S_PC]
|
|
get_thread_info tsk
|
|
b ret_fast_syscall
|
|
#endif
|
|
ENDPROC(vector_swi)
|
|
|
|
/*
|
|
* This is the really slow path. We're going to be doing
|
|
* context switches, and waiting for our parent to respond.
|
|
*/
|
|
__sys_trace:
|
|
mov r1, scno
|
|
add r0, sp, #S_OFF
|
|
bl syscall_trace_enter
|
|
|
|
badr lr, __sys_trace_return @ return address
|
|
mov scno, r0 @ syscall number (possibly new)
|
|
add r1, sp, #S_R0 + S_OFF @ pointer to regs
|
|
cmp scno, #NR_syscalls @ check upper syscall limit
|
|
ldmccia r1, {r0 - r6} @ have to reload r0 - r6
|
|
stmccia sp, {r4, r5} @ and update the stack args
|
|
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
|
|
cmp scno, #-1 @ skip the syscall?
|
|
bne 2b
|
|
add sp, sp, #S_OFF @ restore stack
|
|
b ret_slow_syscall
|
|
|
|
__sys_trace_return:
|
|
str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
|
|
mov r0, sp
|
|
bl syscall_trace_exit
|
|
b ret_slow_syscall
|
|
|
|
__sys_trace_return_nosave:
|
|
enable_irq_notrace
|
|
mov r0, sp
|
|
bl syscall_trace_exit
|
|
b ret_slow_syscall
|
|
|
|
.align 5
|
|
#ifdef CONFIG_ALIGNMENT_TRAP
|
|
.type __cr_alignment, #object
|
|
__cr_alignment:
|
|
.word cr_alignment
|
|
#endif
|
|
.ltorg
|
|
|
|
.macro syscall_table_start, sym
|
|
.equ __sys_nr, 0
|
|
.type \sym, #object
|
|
ENTRY(\sym)
|
|
.endm
|
|
|
|
.macro syscall, nr, func
|
|
.ifgt __sys_nr - \nr
|
|
.error "Duplicated/unorded system call entry"
|
|
.endif
|
|
.rept \nr - __sys_nr
|
|
.long sys_ni_syscall
|
|
.endr
|
|
.long \func
|
|
.equ __sys_nr, \nr + 1
|
|
.endm
|
|
|
|
.macro syscall_table_end, sym
|
|
.ifgt __sys_nr - __NR_syscalls
|
|
.error "System call table too big"
|
|
.endif
|
|
.rept __NR_syscalls - __sys_nr
|
|
.long sys_ni_syscall
|
|
.endr
|
|
.size \sym, . - \sym
|
|
.endm
|
|
|
|
#define NATIVE(nr, func) syscall nr, func
|
|
|
|
/*
|
|
* This is the syscall table declaration for native ABI syscalls.
|
|
* With EABI a couple syscalls are obsolete and defined as sys_ni_syscall.
|
|
*/
|
|
syscall_table_start sys_call_table
|
|
#define COMPAT(nr, native, compat) syscall nr, native
|
|
#ifdef CONFIG_AEABI
|
|
#include <calls-eabi.S>
|
|
#else
|
|
#include <calls-oabi.S>
|
|
#endif
|
|
#undef COMPAT
|
|
syscall_table_end sys_call_table
|
|
|
|
/*============================================================================
|
|
* Special system call wrappers
|
|
*/
|
|
@ r0 = syscall number
|
|
@ r8 = syscall table
|
|
sys_syscall:
|
|
bic scno, r0, #__NR_OABI_SYSCALL_BASE
|
|
cmp scno, #__NR_syscall - __NR_SYSCALL_BASE
|
|
cmpne scno, #NR_syscalls @ check range
|
|
stmloia sp, {r5, r6} @ shuffle args
|
|
movlo r0, r1
|
|
movlo r1, r2
|
|
movlo r2, r3
|
|
movlo r3, r4
|
|
ldrlo pc, [tbl, scno, lsl #2]
|
|
b sys_ni_syscall
|
|
ENDPROC(sys_syscall)
|
|
|
|
sys_sigreturn_wrapper:
|
|
add r0, sp, #S_OFF
|
|
mov why, #0 @ prevent syscall restart handling
|
|
b sys_sigreturn
|
|
ENDPROC(sys_sigreturn_wrapper)
|
|
|
|
sys_rt_sigreturn_wrapper:
|
|
add r0, sp, #S_OFF
|
|
mov why, #0 @ prevent syscall restart handling
|
|
b sys_rt_sigreturn
|
|
ENDPROC(sys_rt_sigreturn_wrapper)
|
|
|
|
sys_statfs64_wrapper:
|
|
teq r1, #88
|
|
moveq r1, #84
|
|
b sys_statfs64
|
|
ENDPROC(sys_statfs64_wrapper)
|
|
|
|
sys_fstatfs64_wrapper:
|
|
teq r1, #88
|
|
moveq r1, #84
|
|
b sys_fstatfs64
|
|
ENDPROC(sys_fstatfs64_wrapper)
|
|
|
|
/*
|
|
* Note: off_4k (r5) is always units of 4K. If we can't do the requested
|
|
* offset, we return EINVAL.
|
|
*/
|
|
sys_mmap2:
|
|
#if PAGE_SHIFT > 12
|
|
tst r5, #PGOFF_MASK
|
|
moveq r5, r5, lsr #PAGE_SHIFT - 12
|
|
streq r5, [sp, #4]
|
|
beq sys_mmap_pgoff
|
|
mov r0, #-EINVAL
|
|
ret lr
|
|
#else
|
|
str r5, [sp, #4]
|
|
b sys_mmap_pgoff
|
|
#endif
|
|
ENDPROC(sys_mmap2)
|
|
|
|
#ifdef CONFIG_OABI_COMPAT
|
|
|
|
/*
|
|
* These are syscalls with argument register differences
|
|
*/
|
|
|
|
sys_oabi_pread64:
|
|
stmia sp, {r3, r4}
|
|
b sys_pread64
|
|
ENDPROC(sys_oabi_pread64)
|
|
|
|
sys_oabi_pwrite64:
|
|
stmia sp, {r3, r4}
|
|
b sys_pwrite64
|
|
ENDPROC(sys_oabi_pwrite64)
|
|
|
|
sys_oabi_truncate64:
|
|
mov r3, r2
|
|
mov r2, r1
|
|
b sys_truncate64
|
|
ENDPROC(sys_oabi_truncate64)
|
|
|
|
sys_oabi_ftruncate64:
|
|
mov r3, r2
|
|
mov r2, r1
|
|
b sys_ftruncate64
|
|
ENDPROC(sys_oabi_ftruncate64)
|
|
|
|
sys_oabi_readahead:
|
|
str r3, [sp]
|
|
mov r3, r2
|
|
mov r2, r1
|
|
b sys_readahead
|
|
ENDPROC(sys_oabi_readahead)
|
|
|
|
/*
|
|
* Let's declare a second syscall table for old ABI binaries
|
|
* using the compatibility syscall entries.
|
|
*/
|
|
syscall_table_start sys_oabi_call_table
|
|
#define COMPAT(nr, native, compat) syscall nr, compat
|
|
#include <calls-oabi.S>
|
|
syscall_table_end sys_oabi_call_table
|
|
|
|
#endif
|
|
|