mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-04 02:06:43 +07:00
Misc fixes:
- Fix more generic entry code ABI fallout - Fix debug register handling bugs - Fix vmalloc mappings on 32-bit kernels - Fix kprobes instrumentation output on 32-bit kernels - Fix over-eager WARN_ON_ONCE() on !SMAP hardware - Fix NUMA debugging - Fix Clang related crash on !RETPOLINE kernels Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAl9UljIRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1g1yA//VecoyJOw4jb43LdkeKDGtUjCsPVZlt4w fw55nT4taqqbgl9mQjrJQlh8thtk7LvAqcsrEGk/SH+1fp/hDvBG0i3etyI1mPJ2 t97MCVtD1bz2zyLpOtGN48tgiRxSazr4S9nZPCLTec+c75I3pmJssj44m/eJi/Z2 hoj/syiO4J0BPa7a1ou++Jeyag6J+PgXdJTOMyjuqi99vqai1aTVKo8GdWMInext +fJNYd0ZQRj1FxVdMusDfzxOk7N7b8nAzvd30iJN67R6QwoEazO12K1F4IYQmHSq 0rhHrwe0lTLtjmYdp/ef14kfzD7DRFN6Nv2gk/zyZsH+tjGflxTZConkFPnfoJEc 33cNHfigh0V9TSVNDDhHnkRyy6dzCHkYHEf33KFuX3amC236TgrCEL7+oWE2rcNp 9PJbPGlXCqNb2feNy2de4cY+KiZ2a1N/T4VcdMK6DEdENFh5T03EZgIChQEd0S99 LNBYHqTWJdQEKfkzfAXlR4Bd2hX1LWLMM6rNcXxInrH7rWDXUCS0X9m3gLZR9DIs 7/nXoK4OkaJdgH/D2CToDgwMNT5hlIiTGtVtB3H6Qz8eQQ4+fwTyboQDqpeG4Upy LfOH2h5Fo33FCgqnrua8IsgUKLwW2yJGdghJpcd9d0qfVUDEJuXGo6xe6SEHdSu/ VEiQtFUf50U= =EhRy -----END PGP SIGNATURE----- Merge tag 'x86-urgent-2020-09-06' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fixes from Ingo Molnar: - more generic entry code ABI fallout - debug register handling bugfixes - fix vmalloc mappings on 32-bit kernels - kprobes instrumentation output fix on 32-bit kernels - fix over-eager WARN_ON_ONCE() on !SMAP hardware - NUMA debugging fix - fix Clang related crash on !RETPOLINE kernels * tag 'x86-urgent-2020-09-06' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/entry: Unbreak 32bit fast syscall x86/debug: Allow a single level of #DB recursion x86/entry: Fix AC assertion tracing/kprobes, x86/ptrace: Fix regs argument order for i386 x86, fakenuma: Fix invalid starting node ID x86/mm/32: Bring back vmalloc faulting on x86_32 x86/cmdline: Disable jump tables for cmdline.c
This commit is contained in:
commit
015b3155c4
@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
||||
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
|
||||
{
|
||||
unsigned int nr = (unsigned int)regs->orig_ax;
|
||||
|
||||
if (IS_ENABLED(CONFIG_IA32_EMULATION))
|
||||
current_thread_info()->status |= TS_COMPAT;
|
||||
/*
|
||||
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
|
||||
* orig_ax, the unsigned int return value truncates it. This may
|
||||
* or may not be necessary, but it matches the old asm behavior.
|
||||
*/
|
||||
return (unsigned int)syscall_enter_from_user_mode(regs, nr);
|
||||
|
||||
return (unsigned int)regs->orig_ax;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -91,6 +85,13 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
|
||||
{
|
||||
unsigned int nr = syscall_32_enter(regs);
|
||||
|
||||
/*
|
||||
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
|
||||
* orig_ax, the unsigned int return value truncates it. This may
|
||||
* or may not be necessary, but it matches the old asm behavior.
|
||||
*/
|
||||
nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
|
||||
|
||||
do_syscall_32_irqs_on(regs, nr);
|
||||
syscall_exit_to_user_mode(regs);
|
||||
}
|
||||
@ -100,6 +101,13 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
|
||||
unsigned int nr = syscall_32_enter(regs);
|
||||
int res;
|
||||
|
||||
/*
|
||||
* This cannot use syscall_enter_from_user_mode() as it has to
|
||||
* fetch EBP before invoking any of the syscall entry work
|
||||
* functions.
|
||||
*/
|
||||
syscall_enter_from_user_mode_prepare(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
/* Fetch EBP from where the vDSO stashed it. */
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
|
||||
nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
|
||||
|
||||
/* Now this is just like a normal syscall. */
|
||||
do_syscall_32_irqs_on(regs, nr);
|
||||
syscall_exit_to_user_mode(regs);
|
||||
|
@ -18,8 +18,16 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs)
|
||||
* state, not the interrupt state as imagined by Xen.
|
||||
*/
|
||||
unsigned long flags = native_save_fl();
|
||||
WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
|
||||
X86_EFLAGS_NT));
|
||||
unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT;
|
||||
|
||||
/*
|
||||
* For !SMAP hardware we patch out CLAC on entry.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_SMAP) ||
|
||||
(IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV)))
|
||||
mask |= X86_EFLAGS_AC;
|
||||
|
||||
WARN_ON_ONCE(flags & mask);
|
||||
|
||||
/* We think we came from user mode. Make sure pt_regs agrees. */
|
||||
WARN_ON_ONCE(!user_mode(regs));
|
||||
|
@ -327,8 +327,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
|
||||
static const unsigned int argument_offs[] = {
|
||||
#ifdef __i386__
|
||||
offsetof(struct pt_regs, ax),
|
||||
offsetof(struct pt_regs, cx),
|
||||
offsetof(struct pt_regs, dx),
|
||||
offsetof(struct pt_regs, cx),
|
||||
#define NR_REG_ARGUMENTS 3
|
||||
#else
|
||||
offsetof(struct pt_regs, di),
|
||||
|
@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
|
||||
#endif
|
||||
}
|
||||
|
||||
static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
|
||||
static __always_inline unsigned long debug_read_clear_dr6(void)
|
||||
{
|
||||
/*
|
||||
* Disable breakpoints during exception handling; recursive exceptions
|
||||
* are exceedingly 'fun'.
|
||||
*
|
||||
* Since this function is NOKPROBE, and that also applies to
|
||||
* HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
|
||||
* HW_BREAKPOINT_W on our stack)
|
||||
*
|
||||
* Entry text is excluded for HW_BP_X and cpu_entry_area, which
|
||||
* includes the entry stack is excluded for everything.
|
||||
*/
|
||||
*dr7 = local_db_save();
|
||||
unsigned long dr6;
|
||||
|
||||
/*
|
||||
* The Intel SDM says:
|
||||
@ -755,15 +744,12 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7)
|
||||
*
|
||||
* Keep it simple: clear DR6 immediately.
|
||||
*/
|
||||
get_debugreg(*dr6, 6);
|
||||
get_debugreg(dr6, 6);
|
||||
set_debugreg(0, 6);
|
||||
/* Filter out all the reserved bits which are preset to 1 */
|
||||
*dr6 &= ~DR6_RESERVED;
|
||||
}
|
||||
dr6 &= ~DR6_RESERVED;
|
||||
|
||||
static __always_inline void debug_exit(unsigned long dr7)
|
||||
{
|
||||
local_db_restore(dr7);
|
||||
return dr6;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -863,6 +849,18 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
|
||||
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
|
||||
unsigned long dr6)
|
||||
{
|
||||
/*
|
||||
* Disable breakpoints during exception handling; recursive exceptions
|
||||
* are exceedingly 'fun'.
|
||||
*
|
||||
* Since this function is NOKPROBE, and that also applies to
|
||||
* HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a
|
||||
* HW_BREAKPOINT_W on our stack)
|
||||
*
|
||||
* Entry text is excluded for HW_BP_X and cpu_entry_area, which
|
||||
* includes the entry stack is excluded for everything.
|
||||
*/
|
||||
unsigned long dr7 = local_db_save();
|
||||
bool irq_state = idtentry_enter_nmi(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
@ -883,6 +881,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
|
||||
|
||||
instrumentation_end();
|
||||
idtentry_exit_nmi(regs, irq_state);
|
||||
|
||||
local_db_restore(dr7);
|
||||
}
|
||||
|
||||
static __always_inline void exc_debug_user(struct pt_regs *regs,
|
||||
@ -894,6 +894,15 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
|
||||
*/
|
||||
WARN_ON_ONCE(!user_mode(regs));
|
||||
|
||||
/*
|
||||
* NB: We can't easily clear DR7 here because
|
||||
* idtentry_exit_to_usermode() can invoke ptrace, schedule, access
|
||||
* user memory, etc. This means that a recursive #DB is possible. If
|
||||
* this happens, that #DB will hit exc_debug_kernel() and clear DR7.
|
||||
* Since we're not on the IST stack right now, everything will be
|
||||
* fine.
|
||||
*/
|
||||
|
||||
irqentry_enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
@ -907,36 +916,24 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
|
||||
/* IST stack entry */
|
||||
DEFINE_IDTENTRY_DEBUG(exc_debug)
|
||||
{
|
||||
unsigned long dr6, dr7;
|
||||
|
||||
debug_enter(&dr6, &dr7);
|
||||
exc_debug_kernel(regs, dr6);
|
||||
debug_exit(dr7);
|
||||
exc_debug_kernel(regs, debug_read_clear_dr6());
|
||||
}
|
||||
|
||||
/* User entry, runs on regular task stack */
|
||||
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
|
||||
{
|
||||
unsigned long dr6, dr7;
|
||||
|
||||
debug_enter(&dr6, &dr7);
|
||||
exc_debug_user(regs, dr6);
|
||||
debug_exit(dr7);
|
||||
exc_debug_user(regs, debug_read_clear_dr6());
|
||||
}
|
||||
#else
|
||||
/* 32 bit does not have separate entry points. */
|
||||
DEFINE_IDTENTRY_RAW(exc_debug)
|
||||
{
|
||||
unsigned long dr6, dr7;
|
||||
|
||||
debug_enter(&dr6, &dr7);
|
||||
unsigned long dr6 = debug_read_clear_dr6();
|
||||
|
||||
if (user_mode(regs))
|
||||
exc_debug_user(regs, dr6);
|
||||
else
|
||||
exc_debug_kernel(regs, dr6);
|
||||
|
||||
debug_exit(dr7);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -24,7 +24,7 @@ ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_cmdline.o = -pg
|
||||
endif
|
||||
|
||||
CFLAGS_cmdline.o := -fno-stack-protector
|
||||
CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
|
||||
endif
|
||||
|
||||
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
|
||||
|
@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
|
||||
return pmd_k;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle a fault on the vmalloc or module mapping area
|
||||
*
|
||||
* This is needed because there is a race condition between the time
|
||||
* when the vmalloc mapping code updates the PMD to the point in time
|
||||
* where it synchronizes this update with the other page-tables in the
|
||||
* system.
|
||||
*
|
||||
* In this race window another thread/CPU can map an area on the same
|
||||
* PMD, finds it already present and does not synchronize it with the
|
||||
* rest of the system yet. As a result v[mz]alloc might return areas
|
||||
* which are not mapped in every page-table in the system, causing an
|
||||
* unhandled page-fault when they are accessed.
|
||||
*/
|
||||
static noinline int vmalloc_fault(unsigned long address)
|
||||
{
|
||||
unsigned long pgd_paddr;
|
||||
pmd_t *pmd_k;
|
||||
pte_t *pte_k;
|
||||
|
||||
/* Make sure we are in vmalloc area: */
|
||||
if (!(address >= VMALLOC_START && address < VMALLOC_END))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Synchronize this task's top level page-table
|
||||
* with the 'reference' page table.
|
||||
*
|
||||
* Do _not_ use "current" here. We might be inside
|
||||
* an interrupt in the middle of a task switch..
|
||||
*/
|
||||
pgd_paddr = read_cr3_pa();
|
||||
pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
|
||||
if (!pmd_k)
|
||||
return -1;
|
||||
|
||||
if (pmd_large(*pmd_k))
|
||||
return 0;
|
||||
|
||||
pte_k = pte_offset_kernel(pmd_k, address);
|
||||
if (!pte_present(*pte_k))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(vmalloc_fault);
|
||||
|
||||
void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long addr;
|
||||
@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
|
||||
*/
|
||||
WARN_ON_ONCE(hw_error_code & X86_PF_PK);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* We can fault-in kernel-space virtual memory on-demand. The
|
||||
* 'reference' page table is init_mm.pgd.
|
||||
*
|
||||
* NOTE! We MUST NOT take any locks for this case. We may
|
||||
* be in an interrupt or a critical region, and should
|
||||
* only copy the information from the master page table,
|
||||
* nothing more.
|
||||
*
|
||||
* Before doing this on-demand faulting, ensure that the
|
||||
* fault is not any of the following:
|
||||
* 1. A fault on a PTE with a reserved bit set.
|
||||
* 2. A fault caused by a user-mode access. (Do not demand-
|
||||
* fault kernel memory due to user-mode accesses).
|
||||
* 3. A fault caused by a page-level protection violation.
|
||||
* (A demand fault would be on a non-present page which
|
||||
* would have X86_PF_PROT==0).
|
||||
*
|
||||
* This is only needed to close a race condition on x86-32 in
|
||||
* the vmalloc mapping/unmapping code. See the comment above
|
||||
* vmalloc_fault() for details. On x86-64 the race does not
|
||||
* exist as the vmalloc mappings don't need to be synchronized
|
||||
* there.
|
||||
*/
|
||||
if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
|
||||
if (vmalloc_fault(address) >= 0)
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Was the fault spurious, caused by lazy TLB invalidation? */
|
||||
if (spurious_kernel_fault(hw_error_code, address))
|
||||
return;
|
||||
|
@ -321,7 +321,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
|
||||
u64 addr, u64 max_addr, u64 size)
|
||||
{
|
||||
return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size,
|
||||
0, NULL, NUMA_NO_NODE);
|
||||
0, NULL, 0);
|
||||
}
|
||||
|
||||
static int __init setup_emu2phys_nid(int *dfl_phys_nid)
|
||||
|
@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
|
||||
#endif
|
||||
|
||||
/**
|
||||
* syscall_enter_from_user_mode - Check and handle work before invoking
|
||||
* syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
|
||||
* @regs: Pointer to currents pt_regs
|
||||
*
|
||||
* Invoked from architecture specific syscall entry code with interrupts
|
||||
* disabled. The calling code has to be non-instrumentable. When the
|
||||
* function returns all state is correct, interrupts are enabled and the
|
||||
* subsequent functions can be instrumented.
|
||||
*
|
||||
* This handles lockdep, RCU (context tracking) and tracing state.
|
||||
*
|
||||
* This is invoked when there is extra architecture specific functionality
|
||||
* to be done between establishing state and handling user mode entry work.
|
||||
*/
|
||||
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
|
||||
|
||||
/**
|
||||
* syscall_enter_from_user_mode_work - Check and handle work before invoking
|
||||
* a syscall
|
||||
* @regs: Pointer to currents pt_regs
|
||||
* @syscall: The syscall number
|
||||
*
|
||||
* Invoked from architecture specific syscall entry code with interrupts
|
||||
* disabled. The calling code has to be non-instrumentable. When the
|
||||
* function returns all state is correct and the subsequent functions can be
|
||||
* instrumented.
|
||||
* enabled after invoking syscall_enter_from_user_mode_prepare() and extra
|
||||
* architecture specific work.
|
||||
*
|
||||
* Returns: The original or a modified syscall number
|
||||
*
|
||||
@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
|
||||
* syscall_set_return_value() first. If neither of those are called and -1
|
||||
* is returned, then the syscall will fail with ENOSYS.
|
||||
*
|
||||
* The following functionality is handled here:
|
||||
* It handles the following work items:
|
||||
*
|
||||
* 1) Establish state (lockdep, RCU (context tracking), tracing)
|
||||
* 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
|
||||
* 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
|
||||
* __secure_computing(), trace_sys_enter()
|
||||
* 3) Invocation of audit_syscall_entry()
|
||||
* 2) Invocation of audit_syscall_entry()
|
||||
*/
|
||||
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
|
||||
|
||||
/**
|
||||
* syscall_enter_from_user_mode - Establish state and check and handle work
|
||||
* before invoking a syscall
|
||||
* @regs: Pointer to currents pt_regs
|
||||
* @syscall: The syscall number
|
||||
*
|
||||
* Invoked from architecture specific syscall entry code with interrupts
|
||||
* disabled. The calling code has to be non-instrumentable. When the
|
||||
* function returns all state is correct, interrupts are enabled and the
|
||||
* subsequent functions can be instrumented.
|
||||
*
|
||||
* This is combination of syscall_enter_from_user_mode_prepare() and
|
||||
* syscall_enter_from_user_mode_work().
|
||||
*
|
||||
* Returns: The original or a modified syscall number. See
|
||||
* syscall_enter_from_user_mode_work() for further explanation.
|
||||
*/
|
||||
long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
|
||||
|
||||
|
@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
|
||||
return ret ? : syscall_get_nr(current, regs);
|
||||
}
|
||||
|
||||
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
|
||||
static __always_inline long
|
||||
__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
|
||||
{
|
||||
unsigned long ti_work;
|
||||
|
||||
enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
local_irq_enable();
|
||||
ti_work = READ_ONCE(current_thread_info()->flags);
|
||||
if (ti_work & SYSCALL_ENTER_WORK)
|
||||
syscall = syscall_trace_enter(regs, syscall, ti_work);
|
||||
instrumentation_end();
|
||||
|
||||
return syscall;
|
||||
}
|
||||
|
||||
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
|
||||
{
|
||||
return __syscall_enter_from_user_work(regs, syscall);
|
||||
}
|
||||
|
||||
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
|
||||
{
|
||||
long ret;
|
||||
|
||||
enter_from_user_mode(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
local_irq_enable();
|
||||
ret = __syscall_enter_from_user_work(regs, syscall);
|
||||
instrumentation_end();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
|
||||
{
|
||||
enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
local_irq_enable();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/**
|
||||
* exit_to_user_mode - Fixup state when exiting to user mode
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user