linux_dsm_epyc7002/arch/x86/include/asm/syscall.h
Dominik Brodowski fa697140f9 syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls
Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems:

Each syscall defines a stub which takes struct pt_regs as its only
argument. It decodes just those parameters it needs, e.g:

	asmlinkage long sys_xyzzy(const struct pt_regs *regs)
	{
		return SyS_xyzzy(regs->di, regs->si, regs->dx);
	}

This approach avoids leaking random user-provided register content down
the call chain.

For example, for sys_recv() which is a 4-parameter syscall, the assembly
now is (in slightly reordered fashion):

	<sys_recv>:
		callq	<__fentry__>

		/* decode regs->di, ->si, ->dx and ->r10 */
		mov	0x70(%rdi),%rdi
		mov	0x68(%rdi),%rsi
		mov	0x60(%rdi),%rdx
		mov	0x38(%rdi),%rcx

		[ SyS_recv() is automatically inlined by the compiler,
		  as it is not [yet] used anywhere else ]
		/* clear %r9 and %r8, the 5th and 6th args */
		xor	%r9d,%r9d
		xor	%r8d,%r8d

		/* do the actual work */
		callq	__sys_recvfrom

		/* cleanup and return */
		cltq
		retq

The only valid place in an x86-64 kernel which rightfully calls
a syscall function on its own -- vsyscall -- needs to be modified
to pass struct pt_regs onwards as well.

To keep the syscall table generation working independent of
SYSCALL_PTREGS being enabled, the stubs are named the same as the
"original" syscall stubs, i.e. sys_*().

This patch is based on an original proof-of-concept

 | From: Linus Torvalds <torvalds@linux-foundation.org>
 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

and was split up and heavily modified by me, in particular to base it on
ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being,
and to update the vsyscall to the new calling convention.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-04-05 16:59:26 +02:00

247 lines
5.3 KiB
C

/*
* Access to user system call parameters and results
*
* Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*
* See asm-generic/syscall.h for descriptions of what we must do here.
*/
#ifndef _ASM_X86_SYSCALL_H
#define _ASM_X86_SYSCALL_H
#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <asm/asm-offsets.h> /* For NR_syscalls */
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
#ifdef CONFIG_SYSCALL_PTREGS
typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
#else
typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
#endif /* CONFIG_SYSCALL_PTREGS */
extern const sys_call_ptr_t sys_call_table[];
#if defined(CONFIG_X86_32)
#define ia32_sys_call_table sys_call_table
#define __NR_syscall_compat_max __NR_syscall_max
#define IA32_NR_syscalls NR_syscalls
#endif
#if defined(CONFIG_IA32_EMULATION)
extern const sys_call_ptr_t ia32_sys_call_table[];
#endif
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons
* sign-extend the low 32 bits.
*/
static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
return regs->orig_ax;
}
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
regs->ax = regs->orig_ax;
}
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
unsigned long error = regs->ax;
#ifdef CONFIG_IA32_EMULATION
/*
* TS_COMPAT is set for 32-bit syscall entries and then
* remains set until we return to user mode.
*/
if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
/*
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
* and will match correctly in comparisons.
*/
error = (long) (int) error;
#endif
return IS_ERR_VALUE(error) ? error : 0;
}
static inline long syscall_get_return_value(struct task_struct *task,
struct pt_regs *regs)
{
return regs->ax;
}
static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
regs->ax = (long) error ?: val;
}
#ifdef CONFIG_X86_32
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned int i, unsigned int n,
unsigned long *args)
{
BUG_ON(i + n > 6);
memcpy(args, &regs->bx + i, n * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned int i, unsigned int n,
const unsigned long *args)
{
BUG_ON(i + n > 6);
memcpy(&regs->bx + i, args, n * sizeof(args[0]));
}
static inline int syscall_get_arch(void)
{
return AUDIT_ARCH_I386;
}
#else /* CONFIG_X86_64 */
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned int i, unsigned int n,
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
if (task->thread_info.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
*args++ = regs->bx;
case 1:
if (!n--) break;
*args++ = regs->cx;
case 2:
if (!n--) break;
*args++ = regs->dx;
case 3:
if (!n--) break;
*args++ = regs->si;
case 4:
if (!n--) break;
*args++ = regs->di;
case 5:
if (!n--) break;
*args++ = regs->bp;
case 6:
if (!n--) break;
default:
BUG();
break;
}
else
# endif
switch (i) {
case 0:
if (!n--) break;
*args++ = regs->di;
case 1:
if (!n--) break;
*args++ = regs->si;
case 2:
if (!n--) break;
*args++ = regs->dx;
case 3:
if (!n--) break;
*args++ = regs->r10;
case 4:
if (!n--) break;
*args++ = regs->r8;
case 5:
if (!n--) break;
*args++ = regs->r9;
case 6:
if (!n--) break;
default:
BUG();
break;
}
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned int i, unsigned int n,
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
if (task->thread_info.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
regs->bx = *args++;
case 1:
if (!n--) break;
regs->cx = *args++;
case 2:
if (!n--) break;
regs->dx = *args++;
case 3:
if (!n--) break;
regs->si = *args++;
case 4:
if (!n--) break;
regs->di = *args++;
case 5:
if (!n--) break;
regs->bp = *args++;
case 6:
if (!n--) break;
default:
BUG();
break;
}
else
# endif
switch (i) {
case 0:
if (!n--) break;
regs->di = *args++;
case 1:
if (!n--) break;
regs->si = *args++;
case 2:
if (!n--) break;
regs->dx = *args++;
case 3:
if (!n--) break;
regs->r10 = *args++;
case 4:
if (!n--) break;
regs->r8 = *args++;
case 5:
if (!n--) break;
regs->r9 = *args++;
case 6:
if (!n--) break;
default:
BUG();
break;
}
}
static inline int syscall_get_arch(void)
{
/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_SYSCALL_H */