mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-25 01:10:04 +07:00
2aae950b21
This implements new vDSO for x86-64. The concept is similar to the existing vDSOs on i386 and PPC. x86-64 has had static vsyscalls before, but these are not flexible enough anymore. A vDSO is a ELF shared library supplied by the kernel that is mapped into user address space. The vDSO mapping is randomized for each process for security reasons. Doing this was needed for clock_gettime, because clock_gettime always needs a syscall fallback and having one at a fixed address would have made buffer overflow exploits too easy to write. The vdso can be disabled with vdso=0 It currently includes a new gettimeofday implemention and optimized clock_gettime(). The gettimeofday implementation is slightly faster than the one in the old vsyscall. clock_gettime is significantly faster than the syscall for CLOCK_MONOTONIC and CLOCK_REALTIME. The new calls are generally faster than the old vsyscall. Advantages over the old x86-64 vsyscalls: - Extensible - Randomized - Cleaner - Easier to virtualize (the old static address range previously causes overhead e.g. for Xen because it has to create special page tables for it) Weak points: - glibc support still to be written The VM interface is partly based on Ingo Molnar's i386 version. Includes compile fix from Joachim Deguara Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
140 lines
3.2 KiB
C
140 lines
3.2 KiB
C
/*
|
|
* Set up the VMAs to tell the VM about the vDSO.
|
|
* Copyright 2007 Andi Kleen, SUSE Labs.
|
|
* Subject to the GPL, v.2
|
|
*/
|
|
#include <linux/mm.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/init.h>
|
|
#include <linux/random.h>
|
|
#include <asm/vsyscall.h>
|
|
#include <asm/vgtod.h>
|
|
#include <asm/proto.h>
|
|
#include "voffset.h"
|
|
|
|
int vdso_enabled = 1;
|
|
|
|
#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
|
|
#include "vextern.h"
|
|
#undef VEXTERN
|
|
|
|
extern char vdso_kernel_start[], vdso_start[], vdso_end[];
|
|
extern unsigned short vdso_sync_cpuid;
|
|
|
|
struct page **vdso_pages;
|
|
|
|
static inline void *var_ref(void *vbase, char *var, char *name)
|
|
{
|
|
unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
|
|
void *p = vbase + offset;
|
|
if (*(void **)p != (void *)VMAGIC) {
|
|
printk("VDSO: variable %s broken\n", name);
|
|
vdso_enabled = 0;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
static int __init init_vdso_vars(void)
|
|
{
|
|
int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
|
|
int i;
|
|
char *vbase;
|
|
|
|
vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
|
|
if (!vdso_pages)
|
|
goto oom;
|
|
for (i = 0; i < npages; i++) {
|
|
struct page *p;
|
|
p = alloc_page(GFP_KERNEL);
|
|
if (!p)
|
|
goto oom;
|
|
vdso_pages[i] = p;
|
|
copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
|
|
}
|
|
|
|
vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
|
|
if (!vbase)
|
|
goto oom;
|
|
|
|
if (memcmp(vbase, "\177ELF", 4)) {
|
|
printk("VDSO: I'm broken; not ELF\n");
|
|
vdso_enabled = 0;
|
|
}
|
|
|
|
#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
|
|
#define VEXTERN(x) \
|
|
V(vdso_ ## x) = &__ ## x;
|
|
#include "vextern.h"
|
|
#undef VEXTERN
|
|
return 0;
|
|
|
|
oom:
|
|
printk("Cannot allocate vdso\n");
|
|
vdso_enabled = 0;
|
|
return -ENOMEM;
|
|
}
|
|
__initcall(init_vdso_vars);
|
|
|
|
struct linux_binprm;
|
|
|
|
/* Put the vdso above the (randomized) stack with another randomized offset.
|
|
This way there is no hole in the middle of address space.
|
|
To save memory make sure it is still in the same PTE as the stack top.
|
|
This doesn't give that many random bits */
|
|
static unsigned long vdso_addr(unsigned long start, unsigned len)
|
|
{
|
|
unsigned long addr, end;
|
|
unsigned offset;
|
|
end = (start + PMD_SIZE - 1) & PMD_MASK;
|
|
if (end >= TASK_SIZE64)
|
|
end = TASK_SIZE64;
|
|
end -= len;
|
|
/* This loses some more bits than a modulo, but is cheaper */
|
|
offset = get_random_int() & (PTRS_PER_PTE - 1);
|
|
addr = start + (offset << PAGE_SHIFT);
|
|
if (addr >= end)
|
|
addr = end;
|
|
return addr;
|
|
}
|
|
|
|
/* Setup a VMA at program startup for the vsyscall page.
|
|
Not called for compat tasks */
|
|
int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
unsigned long addr;
|
|
int ret;
|
|
unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
|
|
|
|
if (!vdso_enabled)
|
|
return 0;
|
|
|
|
down_write(&mm->mmap_sem);
|
|
addr = vdso_addr(mm->start_stack, len);
|
|
addr = get_unmapped_area(NULL, addr, len, 0, 0);
|
|
if (IS_ERR_VALUE(addr)) {
|
|
ret = addr;
|
|
goto up_fail;
|
|
}
|
|
|
|
ret = install_special_mapping(mm, addr, len,
|
|
VM_READ|VM_EXEC|
|
|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
|
|
VM_ALWAYSDUMP,
|
|
vdso_pages);
|
|
if (ret)
|
|
goto up_fail;
|
|
|
|
current->mm->context.vdso = (void *)addr;
|
|
up_fail:
|
|
up_write(&mm->mmap_sem);
|
|
return ret;
|
|
}
|
|
|
|
static __init int vdso_setup(char *s)
|
|
{
|
|
vdso_enabled = simple_strtoul(s, NULL, 0);
|
|
return 0;
|
|
}
|
|
__setup("vdso=", vdso_setup);
|