2019-05-28 23:57:20 +07:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2008-04-19 07:08:44 +07:00
|
|
|
/*
|
|
|
|
* Access to user system call parameters and results
|
|
|
|
*
|
2009-09-23 09:57:51 +07:00
|
|
|
* Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
|
2008-04-19 07:08:44 +07:00
|
|
|
*
|
|
|
|
* See asm-generic/syscall.h for descriptions of what we must do here.
|
|
|
|
*/
|
|
|
|
|
2008-10-23 14:20:33 +07:00
|
|
|
#ifndef _ASM_X86_SYSCALL_H
|
|
|
|
#define _ASM_X86_SYSCALL_H
|
2008-04-19 07:08:44 +07:00
|
|
|
|
2014-03-11 23:55:42 +07:00
|
|
|
#include <uapi/linux/audit.h>
|
2008-04-19 07:08:44 +07:00
|
|
|
#include <linux/sched.h>
|
2008-09-03 18:31:42 +07:00
|
|
|
#include <linux/err.h>
|
2012-01-08 05:10:18 +07:00
|
|
|
#include <asm/asm-offsets.h> /* For NR_syscalls */
|
2012-04-13 04:47:56 +07:00
|
|
|
#include <asm/thread_info.h> /* for TS_COMPAT */
|
2012-02-19 22:56:26 +07:00
|
|
|
#include <asm/unistd.h>
|
2008-04-19 07:08:44 +07:00
|
|
|
|
2018-04-05 16:53:05 +07:00
|
|
|
#ifdef CONFIG_X86_64
|
syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls
Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems:
Each syscall defines a stub which takes struct pt_regs as its only
argument. It decodes just those parameters it needs, e.g:
asmlinkage long sys_xyzzy(const struct pt_regs *regs)
{
return SyS_xyzzy(regs->di, regs->si, regs->dx);
}
This approach avoids leaking random user-provided register content down
the call chain.
For example, for sys_recv() which is a 4-parameter syscall, the assembly
now is (in slightly reordered fashion):
<sys_recv>:
callq <__fentry__>
/* decode regs->di, ->si, ->dx and ->r10 */
mov 0x70(%rdi),%rdi
mov 0x68(%rdi),%rsi
mov 0x60(%rdi),%rdx
mov 0x38(%rdi),%rcx
[ SyS_recv() is automatically inlined by the compiler,
as it is not [yet] used anywhere else ]
/* clear %r9 and %r8, the 5th and 6th args */
xor %r9d,%r9d
xor %r8d,%r8d
/* do the actual work */
callq __sys_recvfrom
/* cleanup and return */
cltq
retq
The only valid place in an x86-64 kernel which rightfully calls
a syscall function on its own -- vsyscall -- needs to be modified
to pass struct pt_regs onwards as well.
To keep the syscall table generation working independent of
SYSCALL_PTREGS being enabled, the stubs are named the same as the
"original" syscall stubs, i.e. sys_*().
This patch is based on an original proof-of-concept
| From: Linus Torvalds <torvalds@linux-foundation.org>
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
and was split up and heavily modified by me, in particular to base it on
ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being,
and to update the vsyscall to the new calling convention.
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-04-05 16:53:02 +07:00
|
|
|
typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
|
|
|
|
#else
|
2015-10-06 07:48:07 +07:00
|
|
|
typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
|
|
|
|
unsigned long, unsigned long,
|
|
|
|
unsigned long, unsigned long);
|
2018-04-05 16:53:05 +07:00
|
|
|
#endif /* CONFIG_X86_64 */
|
2013-08-06 05:02:35 +07:00
|
|
|
extern const sys_call_ptr_t sys_call_table[];
|
2010-01-26 16:40:03 +07:00
|
|
|
|
2015-10-06 07:48:06 +07:00
|
|
|
#if defined(CONFIG_X86_32)
|
|
|
|
#define ia32_sys_call_table sys_call_table
|
|
|
|
#define __NR_syscall_compat_max __NR_syscall_max
|
|
|
|
#define IA32_NR_syscalls NR_syscalls
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(CONFIG_IA32_EMULATION)
|
|
|
|
extern const sys_call_ptr_t ia32_sys_call_table[];
|
|
|
|
#endif
|
|
|
|
|
x86/syscalls: Split the x32 syscalls into their own table
For unfortunate historical reasons, the x32 syscalls and the x86_64
syscalls are not all numbered the same. As an example, ioctl() is nr 16 on
x86_64 but 514 on x32.
This has potentially nasty consequences, since it means that there are two
valid RAX values to do ioctl(2) and two invalid RAX values. The valid
values are 16 (i.e. ioctl(2) using the x86_64 ABI) and (514 | 0x40000000)
(i.e. ioctl(2) using the x32 ABI).
The invalid values are 514 and (16 | 0x40000000). 514 will enter the
"COMPAT_SYSCALL_DEFINE3(ioctl, ...)" entry point with in_compat_syscall()
and in_x32_syscall() returning false, whereas (16 | 0x40000000) will enter
the native entry point with in_compat_syscall() and in_x32_syscall()
returning true. Both are bogus, and both will exercise code paths in the
kernel and in any running seccomp filters that really ought to be
unreachable.
Splitting out the x32 syscalls into their own tables, allows both bogus
invocations to return -ENOSYS. I've checked glibc, musl, and Bionic, and
all of them appear to call syscalls with their correct numbers, so this
change should have no effect on them.
There is an added benefit going forward: new syscalls that need special
handling on x32 can share the same number on x32 and x86_64. This means
that the special syscall range 512-547 can be treated as a legacy wart
instead of something that may need to be extended in the future.
Also add a selftest to verify the new behavior.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/208024256b764312598f014ebfb0a42472c19354.1562185330.git.luto@kernel.org
2019-07-04 03:34:04 +07:00
|
|
|
#ifdef CONFIG_X86_X32_ABI
|
|
|
|
extern const sys_call_ptr_t x32_sys_call_table[];
|
|
|
|
#endif
|
|
|
|
|
2009-09-23 09:57:51 +07:00
|
|
|
/*
|
|
|
|
* Only the low 32 bits of orig_ax are meaningful, so we return int.
|
|
|
|
* This importantly ignores the high bits on 64-bit, so comparisons
|
|
|
|
* sign-extend the low 32 bits.
|
|
|
|
*/
|
|
|
|
static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
|
2008-04-19 07:08:44 +07:00
|
|
|
{
|
2013-02-16 00:21:43 +07:00
|
|
|
return regs->orig_ax;
|
2008-04-19 07:08:44 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void syscall_rollback(struct task_struct *task,
|
|
|
|
struct pt_regs *regs)
|
|
|
|
{
|
2013-02-16 00:21:43 +07:00
|
|
|
regs->ax = regs->orig_ax;
|
2008-04-19 07:08:44 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline long syscall_get_error(struct task_struct *task,
|
|
|
|
struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
unsigned long error = regs->ax;
|
|
|
|
#ifdef CONFIG_IA32_EMULATION
|
|
|
|
/*
|
|
|
|
* TS_COMPAT is set for 32-bit syscall entries and then
|
|
|
|
* remains set until we return to user mode.
|
|
|
|
*/
|
2018-01-29 01:38:50 +07:00
|
|
|
if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
|
2008-04-19 07:08:44 +07:00
|
|
|
/*
|
|
|
|
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
|
|
|
|
* and will match correctly in comparisons.
|
|
|
|
*/
|
|
|
|
error = (long) (int) error;
|
|
|
|
#endif
|
2008-09-03 18:31:42 +07:00
|
|
|
return IS_ERR_VALUE(error) ? error : 0;
|
2008-04-19 07:08:44 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline long syscall_get_return_value(struct task_struct *task,
|
|
|
|
struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
return regs->ax;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void syscall_set_return_value(struct task_struct *task,
|
|
|
|
struct pt_regs *regs,
|
|
|
|
int error, long val)
|
|
|
|
{
|
|
|
|
regs->ax = (long) error ?: val;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
|
|
|
|
static inline void syscall_get_arguments(struct task_struct *task,
|
|
|
|
struct pt_regs *regs,
|
|
|
|
unsigned long *args)
|
|
|
|
{
|
2016-11-08 04:26:37 +07:00
|
|
|
memcpy(args, ®s->bx, 6 * sizeof(args[0]));
|
2008-04-19 07:08:44 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void syscall_set_arguments(struct task_struct *task,
|
|
|
|
struct pt_regs *regs,
|
|
|
|
unsigned int i, unsigned int n,
|
|
|
|
const unsigned long *args)
|
|
|
|
{
|
|
|
|
BUG_ON(i + n > 6);
|
|
|
|
memcpy(®s->bx + i, args, n * sizeof(args[0]));
|
|
|
|
}
|
|
|
|
|
2019-03-18 06:30:18 +07:00
|
|
|
static inline int syscall_get_arch(struct task_struct *task)
|
2012-04-13 04:47:56 +07:00
|
|
|
{
|
|
|
|
return AUDIT_ARCH_I386;
|
|
|
|
}
|
|
|
|
|
2008-04-19 07:08:44 +07:00
|
|
|
#else /* CONFIG_X86_64 */
|
|
|
|
|
|
|
|
static inline void syscall_get_arguments(struct task_struct *task,
|
|
|
|
struct pt_regs *regs,
|
|
|
|
unsigned long *args)
|
|
|
|
{
|
|
|
|
# ifdef CONFIG_IA32_EMULATION
|
2016-11-08 04:26:37 +07:00
|
|
|
if (task->thread_info.status & TS_COMPAT) {
|
|
|
|
*args++ = regs->bx;
|
|
|
|
*args++ = regs->cx;
|
|
|
|
*args++ = regs->dx;
|
|
|
|
*args++ = regs->si;
|
|
|
|
*args++ = regs->di;
|
|
|
|
*args = regs->bp;
|
|
|
|
} else
|
2008-04-19 07:08:44 +07:00
|
|
|
# endif
|
2016-11-08 04:26:37 +07:00
|
|
|
{
|
|
|
|
*args++ = regs->di;
|
|
|
|
*args++ = regs->si;
|
|
|
|
*args++ = regs->dx;
|
|
|
|
*args++ = regs->r10;
|
|
|
|
*args++ = regs->r8;
|
|
|
|
*args = regs->r9;
|
|
|
|
}
|
2008-04-19 07:08:44 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void syscall_set_arguments(struct task_struct *task,
|
|
|
|
struct pt_regs *regs,
|
|
|
|
const unsigned long *args)
|
|
|
|
{
|
|
|
|
# ifdef CONFIG_IA32_EMULATION
|
2019-03-28 07:07:31 +07:00
|
|
|
if (task->thread_info.status & TS_COMPAT) {
|
|
|
|
regs->bx = *args++;
|
|
|
|
regs->cx = *args++;
|
|
|
|
regs->dx = *args++;
|
|
|
|
regs->si = *args++;
|
|
|
|
regs->di = *args++;
|
|
|
|
regs->bp = *args;
|
|
|
|
} else
|
2008-04-19 07:08:44 +07:00
|
|
|
# endif
|
2019-03-28 07:07:31 +07:00
|
|
|
{
|
|
|
|
regs->di = *args++;
|
|
|
|
regs->si = *args++;
|
|
|
|
regs->dx = *args++;
|
|
|
|
regs->r10 = *args++;
|
|
|
|
regs->r8 = *args++;
|
|
|
|
regs->r9 = *args;
|
|
|
|
}
|
2008-04-19 07:08:44 +07:00
|
|
|
}
|
|
|
|
|
2019-03-18 06:30:18 +07:00
|
|
|
static inline int syscall_get_arch(struct task_struct *task)
|
2012-04-13 04:47:56 +07:00
|
|
|
{
|
2016-09-14 04:29:21 +07:00
|
|
|
/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
|
2019-03-18 06:30:18 +07:00
|
|
|
return (IS_ENABLED(CONFIG_IA32_EMULATION) &&
|
|
|
|
task->thread_info.status & TS_COMPAT)
|
|
|
|
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
2012-04-13 04:47:56 +07:00
|
|
|
}
|
2008-04-19 07:08:44 +07:00
|
|
|
#endif /* CONFIG_X86_32 */
|
|
|
|
|
2008-10-23 14:20:33 +07:00
|
|
|
#endif /* _ASM_X86_SYSCALL_H */
|