2019-05-29 21:12:31 +07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2015-07-04 02:44:23 +07:00
|
|
|
/*
|
|
|
|
* common.c - C code for kernel entry and exit
|
|
|
|
* Copyright (c) 2015 Andrew Lutomirski
|
|
|
|
*
|
|
|
|
* Based on asm and ptrace code by many authors. The code here originated
|
|
|
|
* in ptrace.c and signal.c.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/sched.h>
|
2017-02-09 00:51:37 +07:00
|
|
|
#include <linux/sched/task_stack.h>
|
2020-07-23 05:00:04 +07:00
|
|
|
#include <linux/entry-common.h>
|
2015-07-04 02:44:23 +07:00
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/smp.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/ptrace.h>
|
|
|
|
#include <linux/export.h>
|
2018-01-30 08:02:59 +07:00
|
|
|
#include <linux/nospec.h>
|
2017-06-15 08:12:01 +07:00
|
|
|
#include <linux/syscalls.h>
|
2019-04-03 23:41:52 +07:00
|
|
|
#include <linux/uaccess.h>
|
2015-07-04 02:44:23 +07:00
|
|
|
|
2020-05-22 03:05:26 +07:00
|
|
|
#ifdef CONFIG_XEN_PV
|
|
|
|
#include <xen/xen-ops.h>
|
|
|
|
#include <xen/events.h>
|
|
|
|
#endif
|
|
|
|
|
2015-07-04 02:44:23 +07:00
|
|
|
#include <asm/desc.h>
|
|
|
|
#include <asm/traps.h>
|
2015-10-06 07:48:10 +07:00
|
|
|
#include <asm/vdso.h>
|
2016-01-27 04:12:04 +07:00
|
|
|
#include <asm/cpufeature.h>
|
2019-04-03 23:41:52 +07:00
|
|
|
#include <asm/fpu/api.h>
|
2019-02-19 05:42:51 +07:00
|
|
|
#include <asm/nospec-branch.h>
|
2019-11-12 05:03:23 +07:00
|
|
|
#include <asm/io_bitmap.h>
|
2020-01-23 22:27:54 +07:00
|
|
|
#include <asm/syscall.h>
|
2020-05-22 03:05:26 +07:00
|
|
|
#include <asm/irq_stack.h>
|
2015-07-04 02:44:23 +07:00
|
|
|
|
2020-07-23 05:00:01 +07:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
|
|
|
|
{
|
2020-07-23 05:00:04 +07:00
|
|
|
nr = syscall_enter_from_user_mode(regs, nr);
|
2020-07-23 05:00:01 +07:00
|
|
|
|
|
|
|
instrumentation_begin();
|
2018-04-05 16:53:00 +07:00
|
|
|
if (likely(nr < NR_syscalls)) {
|
|
|
|
nr = array_index_nospec(nr, NR_syscalls);
|
syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls
Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems:
Each syscall defines a stub which takes struct pt_regs as its only
argument. It decodes just those parameters it needs, e.g:
asmlinkage long sys_xyzzy(const struct pt_regs *regs)
{
return SyS_xyzzy(regs->di, regs->si, regs->dx);
}
This approach avoids leaking random user-provided register content down
the call chain.
For example, for sys_recv() which is a 4-parameter syscall, the assembly
now is (in slightly reordered fashion):
<sys_recv>:
callq <__fentry__>
/* decode regs->di, ->si, ->dx and ->r10 */
mov 0x70(%rdi),%rdi
mov 0x68(%rdi),%rsi
mov 0x60(%rdi),%rdx
mov 0x38(%rdi),%rcx
[ SyS_recv() is automatically inlined by the compiler,
as it is not [yet] used anywhere else ]
/* clear %r9 and %r8, the 5th and 6th args */
xor %r9d,%r9d
xor %r8d,%r8d
/* do the actual work */
callq __sys_recvfrom
/* cleanup and return */
cltq
retq
The only valid place in an x86-64 kernel which rightfully calls
a syscall function on its own -- vsyscall -- needs to be modified
to pass struct pt_regs onwards as well.
To keep the syscall table generation working independent of
SYSCALL_PTREGS being enabled, the stubs are named the same as the
"original" syscall stubs, i.e. sys_*().
This patch is based on an original proof-of-concept
| From: Linus Torvalds <torvalds@linux-foundation.org>
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
and was split up and heavily modified by me, in particular to base it on
ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being,
and to update the vsyscall to the new calling convention.
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-04-05 16:53:02 +07:00
|
|
|
regs->ax = sys_call_table[nr](regs);
|
x86/syscalls: Split the x32 syscalls into their own table
For unfortunate historical reasons, the x32 syscalls and the x86_64
syscalls are not all numbered the same. As an example, ioctl() is nr 16 on
x86_64 but 514 on x32.
This has potentially nasty consequences, since it means that there are two
valid RAX values to do ioctl(2) and two invalid RAX values. The valid
values are 16 (i.e. ioctl(2) using the x86_64 ABI) and (514 | 0x40000000)
(i.e. ioctl(2) using the x32 ABI).
The invalid values are 514 and (16 | 0x40000000). 514 will enter the
"COMPAT_SYSCALL_DEFINE3(ioctl, ...)" entry point with in_compat_syscall()
and in_x32_syscall() returning false, whereas (16 | 0x40000000) will enter
the native entry point with in_compat_syscall() and in_x32_syscall()
returning true. Both are bogus, and both will exercise code paths in the
kernel and in any running seccomp filters that really ought to be
unreachable.
Splitting out the x32 syscalls into their own tables, allows both bogus
invocations to return -ENOSYS. I've checked glibc, musl, and Bionic, and
all of them appear to call syscalls with their correct numbers, so this
change should have no effect on them.
There is an added benefit going forward: new syscalls that need special
handling on x32 can share the same number on x32 and x86_64. This means
that the special syscall range 512-547 can be treated as a legacy wart
instead of something that may need to be extended in the future.
Also add a selftest to verify the new behavior.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/208024256b764312598f014ebfb0a42472c19354.1562185330.git.luto@kernel.org
2019-07-04 03:34:04 +07:00
|
|
|
#ifdef CONFIG_X86_X32_ABI
|
|
|
|
} else if (likely((nr & __X32_SYSCALL_BIT) &&
|
|
|
|
(nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
|
|
|
|
nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
|
|
|
|
X32_NR_syscalls);
|
|
|
|
regs->ax = x32_sys_call_table[nr](regs);
|
|
|
|
#endif
|
2016-01-29 06:11:28 +07:00
|
|
|
}
|
2020-03-10 20:46:27 +07:00
|
|
|
instrumentation_end();
|
2020-07-23 05:00:05 +07:00
|
|
|
syscall_exit_to_user_mode(regs);
|
2016-01-29 06:11:28 +07:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-10-06 07:48:08 +07:00
|
|
|
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
2020-07-23 05:00:01 +07:00
|
|
|
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
if (IS_ENABLED(CONFIG_IA32_EMULATION))
|
|
|
|
current_thread_info()->status |= TS_COMPAT;
|
2020-09-02 06:50:54 +07:00
|
|
|
|
|
|
|
return (unsigned int)regs->orig_ax;
|
2020-07-23 05:00:01 +07:00
|
|
|
}
|
|
|
|
|
2015-10-06 07:48:08 +07:00
|
|
|
/*
|
2020-07-23 05:00:01 +07:00
|
|
|
* Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
|
2015-10-06 07:48:08 +07:00
|
|
|
*/
|
2020-07-23 05:00:01 +07:00
|
|
|
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
|
|
|
|
unsigned int nr)
|
2015-10-06 07:48:08 +07:00
|
|
|
{
|
2015-10-06 07:48:19 +07:00
|
|
|
if (likely(nr < IA32_NR_syscalls)) {
|
2020-07-23 05:00:01 +07:00
|
|
|
instrumentation_begin();
|
2018-01-30 08:02:59 +07:00
|
|
|
nr = array_index_nospec(nr, IA32_NR_syscalls);
|
syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32
Extend ARCH_HAS_SYSCALL_WRAPPER for i386 emulation and for x32 on 64-bit
x86.
For x32, all we need to do is to create an additional stub for each
compat syscall which decodes the parameters in x86-64 ordering, e.g.:
asmlinkage long __compat_sys_x32_xyzzy(struct pt_regs *regs)
{
return c_SyS_xyzzy(regs->di, regs->si, regs->dx);
}
For i386 emulation, we need to teach compat_sys_*() to take struct
pt_regs as its only argument, e.g.:
asmlinkage long __compat_sys_ia32_xyzzy(struct pt_regs *regs)
{
return c_SyS_xyzzy(regs->bx, regs->cx, regs->dx);
}
In addition, we need to create additional stubs for common syscalls
(that is, for syscalls which have the same parameters on 32-bit and
64-bit), e.g.:
asmlinkage long __sys_ia32_xyzzy(struct pt_regs *regs)
{
return c_sys_xyzzy(regs->bx, regs->cx, regs->dx);
}
This approach avoids leaking random user-provided register content down
the call chain.
This patch is based on an original proof-of-concept
| From: Linus Torvalds <torvalds@linux-foundation.org>
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
and was split up and heavily modified by me, in particular to base it on
ARCH_HAS_SYSCALL_WRAPPER.
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180405095307.3730-6-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-04-05 16:53:04 +07:00
|
|
|
regs->ax = ia32_sys_call_table[nr](regs);
|
2020-07-23 05:00:01 +07:00
|
|
|
instrumentation_end();
|
2015-10-06 07:48:08 +07:00
|
|
|
}
|
|
|
|
}
|
2015-10-06 07:48:10 +07:00
|
|
|
|
2016-03-10 04:24:32 +07:00
|
|
|
/* Handles int $0x80 */
|
2020-03-10 20:46:27 +07:00
|
|
|
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
|
2015-10-06 07:48:17 +07:00
|
|
|
{
|
2020-07-23 05:00:01 +07:00
|
|
|
unsigned int nr = syscall_32_enter(regs);
|
2020-03-10 20:46:27 +07:00
|
|
|
|
2020-09-02 06:50:54 +07:00
|
|
|
/*
|
|
|
|
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
|
|
|
|
* orig_ax, the unsigned int return value truncates it. This may
|
|
|
|
* or may not be necessary, but it matches the old asm behavior.
|
|
|
|
*/
|
|
|
|
nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
|
|
|
|
|
2020-07-23 05:00:01 +07:00
|
|
|
do_syscall_32_irqs_on(regs, nr);
|
2020-07-23 05:00:05 +07:00
|
|
|
syscall_exit_to_user_mode(regs);
|
2020-03-10 20:46:27 +07:00
|
|
|
}
|
|
|
|
|
2020-07-23 05:00:01 +07:00
|
|
|
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
|
2020-03-10 20:46:27 +07:00
|
|
|
{
|
2020-09-02 06:50:54 +07:00
|
|
|
unsigned int nr = syscall_32_enter(regs);
|
2020-03-10 20:46:27 +07:00
|
|
|
int res;
|
|
|
|
|
2020-09-02 06:50:54 +07:00
|
|
|
/*
|
|
|
|
* This cannot use syscall_enter_from_user_mode() as it has to
|
|
|
|
* fetch EBP before invoking any of the syscall entry work
|
|
|
|
* functions.
|
|
|
|
*/
|
|
|
|
syscall_enter_from_user_mode_prepare(regs);
|
|
|
|
|
2020-07-23 05:00:01 +07:00
|
|
|
instrumentation_begin();
|
2020-03-10 20:46:27 +07:00
|
|
|
/* Fetch EBP from where the vDSO stashed it. */
|
|
|
|
if (IS_ENABLED(CONFIG_X86_64)) {
|
|
|
|
/*
|
|
|
|
* Micro-optimization: the pointer we're following is
|
|
|
|
* explicitly 32 bits, so it can't be out of range.
|
|
|
|
*/
|
|
|
|
res = __get_user(*(u32 *)®s->bp,
|
|
|
|
(u32 __user __force *)(unsigned long)(u32)regs->sp);
|
|
|
|
} else {
|
|
|
|
res = get_user(*(u32 *)®s->bp,
|
|
|
|
(u32 __user __force *)(unsigned long)(u32)regs->sp);
|
|
|
|
}
|
2020-07-23 05:00:01 +07:00
|
|
|
instrumentation_end();
|
2020-03-10 20:46:27 +07:00
|
|
|
|
|
|
|
if (res) {
|
|
|
|
/* User code screwed up. */
|
|
|
|
regs->ax = -EFAULT;
|
2020-07-23 05:00:05 +07:00
|
|
|
syscall_exit_to_user_mode(regs);
|
2020-03-10 20:46:27 +07:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-09-02 06:50:54 +07:00
|
|
|
/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
|
|
|
|
nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
|
|
|
|
|
2020-03-10 20:46:27 +07:00
|
|
|
/* Now this is just like a normal syscall. */
|
2020-07-23 05:00:01 +07:00
|
|
|
do_syscall_32_irqs_on(regs, nr);
|
2020-07-23 05:00:05 +07:00
|
|
|
syscall_exit_to_user_mode(regs);
|
2020-03-10 20:46:27 +07:00
|
|
|
return true;
|
2015-10-06 07:48:17 +07:00
|
|
|
}
|
|
|
|
|
2015-10-06 07:48:15 +07:00
|
|
|
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
|
2020-03-10 20:46:27 +07:00
|
|
|
__visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
|
2015-10-06 07:48:10 +07:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Called using the internal vDSO SYSENTER/SYSCALL32 calling
|
|
|
|
* convention. Adjust regs so it looks like we entered using int80.
|
|
|
|
*/
|
|
|
|
unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
|
2020-03-10 20:46:27 +07:00
|
|
|
vdso_image_32.sym_int80_landing_pad;
|
2015-10-06 07:48:10 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
|
|
|
|
* so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
|
|
|
|
* Fix it up.
|
|
|
|
*/
|
|
|
|
regs->ip = landing_pad;
|
|
|
|
|
2020-07-23 05:00:01 +07:00
|
|
|
/* Invoke the syscall. If it failed, keep it simple: use IRET. */
|
|
|
|
if (!__do_fast_syscall_32(regs))
|
2020-03-10 20:46:27 +07:00
|
|
|
return 0;
|
2015-10-06 07:48:12 +07:00
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
/*
|
|
|
|
* Opportunistic SYSRETL: if possible, try to return using SYSRETL.
|
|
|
|
* SYSRETL is available on all 64-bit CPUs, so we don't need to
|
|
|
|
* bother with SYSEXIT.
|
|
|
|
*
|
|
|
|
* Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
|
|
|
|
* because the ECX fixup above will ensure that this is essentially
|
|
|
|
* never the case.
|
|
|
|
*/
|
|
|
|
return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
|
|
|
|
regs->ip == landing_pad &&
|
|
|
|
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
|
|
|
|
#else
|
2015-10-06 07:48:15 +07:00
|
|
|
/*
|
|
|
|
* Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
|
|
|
|
*
|
|
|
|
* Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
|
|
|
|
* because the ECX fixup above will ensure that this is essentially
|
|
|
|
* never the case.
|
|
|
|
*
|
|
|
|
* We don't allow syscalls at all from VM86 mode, but we still
|
|
|
|
* need to check VM, because we might be returning from sys_vm86.
|
|
|
|
*/
|
|
|
|
return static_cpu_has(X86_FEATURE_SEP) &&
|
|
|
|
regs->cs == __USER_CS && regs->ss == __USER_DS &&
|
|
|
|
regs->ip == landing_pad &&
|
|
|
|
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
|
2015-10-06 07:48:12 +07:00
|
|
|
#endif
|
2015-10-06 07:48:10 +07:00
|
|
|
}
|
2020-06-27 00:21:12 +07:00
|
|
|
|
|
|
|
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
|
|
|
|
__visible noinstr long do_SYSENTER_32(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
/* SYSENTER loses RSP, but the vDSO saved it in RBP. */
|
|
|
|
regs->sp = regs->bp;
|
|
|
|
|
|
|
|
/* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */
|
|
|
|
regs->flags |= X86_EFLAGS_IF;
|
|
|
|
|
|
|
|
return do_fast_syscall_32(regs);
|
|
|
|
}
|
2015-10-06 07:48:08 +07:00
|
|
|
#endif
|
2020-03-14 02:51:32 +07:00
|
|
|
|
|
|
|
SYSCALL_DEFINE0(ni_syscall)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
2020-03-26 22:28:52 +07:00
|
|
|
|
2020-05-27 20:50:29 +07:00
|
|
|
noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
|
|
|
|
{
|
2020-07-25 15:14:55 +07:00
|
|
|
bool irq_state = lockdep_hardirqs_enabled();
|
2020-05-27 20:50:29 +07:00
|
|
|
|
|
|
|
__nmi_enter();
|
|
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
|
|
lockdep_hardirq_enter();
|
|
|
|
rcu_nmi_enter();
|
|
|
|
|
|
|
|
instrumentation_begin();
|
|
|
|
trace_hardirqs_off_finish();
|
|
|
|
ftrace_nmi_enter();
|
|
|
|
instrumentation_end();
|
|
|
|
|
|
|
|
return irq_state;
|
|
|
|
}
|
|
|
|
|
|
|
|
noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
|
|
|
|
{
|
|
|
|
instrumentation_begin();
|
|
|
|
ftrace_nmi_exit();
|
|
|
|
if (restore) {
|
|
|
|
trace_hardirqs_on_prepare();
|
|
|
|
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
|
|
|
|
}
|
|
|
|
instrumentation_end();
|
|
|
|
|
|
|
|
rcu_nmi_exit();
|
|
|
|
lockdep_hardirq_exit();
|
|
|
|
if (restore)
|
|
|
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
|
|
|
__nmi_exit();
|
|
|
|
}
|
|
|
|
|
2020-05-22 03:05:26 +07:00
|
|
|
#ifdef CONFIG_XEN_PV
|
|
|
|
#ifndef CONFIG_PREEMPTION
|
|
|
|
/*
|
|
|
|
* Some hypercalls issued by the toolstack can take many 10s of
|
|
|
|
* seconds. Allow tasks running hypercalls via the privcmd driver to
|
|
|
|
* be voluntarily preempted even if full kernel preemption is
|
|
|
|
* disabled.
|
|
|
|
*
|
|
|
|
* Such preemptible hypercalls are bracketed by
|
|
|
|
* xen_preemptible_hcall_begin() and xen_preemptible_hcall_end()
|
|
|
|
* calls.
|
|
|
|
*/
|
|
|
|
DEFINE_PER_CPU(bool, xen_in_preemptible_hcall);
|
|
|
|
EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In case of scheduling the flag must be cleared and restored after
|
|
|
|
* returning from schedule as the task might move to a different CPU.
|
|
|
|
*/
|
|
|
|
static __always_inline bool get_and_clear_inhcall(void)
|
|
|
|
{
|
|
|
|
bool inhcall = __this_cpu_read(xen_in_preemptible_hcall);
|
|
|
|
|
|
|
|
__this_cpu_write(xen_in_preemptible_hcall, false);
|
|
|
|
return inhcall;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline void restore_inhcall(bool inhcall)
|
|
|
|
{
|
|
|
|
__this_cpu_write(xen_in_preemptible_hcall, inhcall);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static __always_inline bool get_and_clear_inhcall(void) { return false; }
|
|
|
|
static __always_inline void restore_inhcall(bool inhcall) { }
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static void __xen_pv_evtchn_do_upcall(void)
|
|
|
|
{
|
|
|
|
irq_enter_rcu();
|
|
|
|
inc_irq_stat(irq_hv_callback_count);
|
|
|
|
|
|
|
|
xen_hvm_evtchn_do_upcall();
|
|
|
|
|
|
|
|
irq_exit_rcu();
|
|
|
|
}
|
|
|
|
|
|
|
|
__visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
struct pt_regs *old_regs;
|
2020-07-04 00:02:58 +07:00
|
|
|
bool inhcall;
|
2020-07-23 05:00:08 +07:00
|
|
|
irqentry_state_t state;
|
2020-05-22 03:05:26 +07:00
|
|
|
|
2020-07-23 05:00:08 +07:00
|
|
|
state = irqentry_enter(regs);
|
2020-05-22 03:05:26 +07:00
|
|
|
old_regs = set_irq_regs(regs);
|
|
|
|
|
|
|
|
instrumentation_begin();
|
|
|
|
run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, NULL, regs);
|
|
|
|
instrumentation_begin();
|
|
|
|
|
|
|
|
set_irq_regs(old_regs);
|
|
|
|
|
|
|
|
inhcall = get_and_clear_inhcall();
|
2020-07-04 00:02:58 +07:00
|
|
|
if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
|
2020-05-22 03:05:26 +07:00
|
|
|
instrumentation_begin();
|
2020-07-23 05:00:07 +07:00
|
|
|
irqentry_exit_cond_resched();
|
2020-05-22 03:05:26 +07:00
|
|
|
instrumentation_end();
|
|
|
|
restore_inhcall(inhcall);
|
|
|
|
} else {
|
2020-07-23 05:00:08 +07:00
|
|
|
irqentry_exit(regs, state);
|
2020-05-22 03:05:26 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_XEN_PV */
|