linux_dsm_epyc7002/arch/arm64/kernel/vdso.c
Michal Hocko 6904817607 vdso: make arch_setup_additional_pages wait for mmap_sem for write killable
most architectures are relying on mmap_sem for write in their
arch_setup_additional_pages.  If the waiting task gets killed by the oom
killer it would block oom_reaper from asynchronous address space reclaim
and reduce the chances of timely OOM resolving.  Wait for the lock in
the killable mode and return with EINTR if the task got killed while
waiting.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>	[x86 vdso]
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-05-23 17:04:14 -07:00

233 lines
6.0 KiB
C

/*
* VDSO implementation for AArch64 and vector page setup for AArch32.
*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
*/
#include <linux/kernel.h>
#include <linux/clocksource.h>
#include <linux/elf.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <asm/signal32.h>
#include <asm/vdso.h>
#include <asm/vdso_datapage.h>
extern char vdso_start, vdso_end;
static unsigned long vdso_pages;
static struct page **vdso_pagelist;
/*
* The vDSO data page.
*/
static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
#ifdef CONFIG_COMPAT
/*
* Create and map the vectors page for AArch32 tasks.
*/
static struct page *vectors_page[1];
static int alloc_vectors_page(void)
{
extern char __kuser_helper_start[], __kuser_helper_end[];
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
int kuser_sz = __kuser_helper_end - __kuser_helper_start;
int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
unsigned long vpage;
vpage = get_zeroed_page(GFP_ATOMIC);
if (!vpage)
return -ENOMEM;
/* kuser helpers */
memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start,
kuser_sz);
/* sigreturn code */
memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET,
__aarch32_sigret_code_start, sigret_sz);
flush_icache_range(vpage, vpage + PAGE_SIZE);
vectors_page[0] = virt_to_page(vpage);
return 0;
}
arch_initcall(alloc_vectors_page);
int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
unsigned long addr = AARCH32_VECTORS_BASE;
static struct vm_special_mapping spec = {
.name = "[vectors]",
.pages = vectors_page,
};
void *ret;
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
current->mm->context.vdso = (void *)addr;
/* Map vectors page at the high address. */
ret = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC,
&spec);
up_write(&mm->mmap_sem);
return PTR_ERR_OR_ZERO(ret);
}
#endif /* CONFIG_COMPAT */
static struct vm_special_mapping vdso_spec[2];
static int __init vdso_init(void)
{
int i;
if (memcmp(&vdso_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
return -EINVAL;
}
vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data);
/* Allocate the vDSO pagelist, plus a page for the data. */
vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
GFP_KERNEL);
if (vdso_pagelist == NULL)
return -ENOMEM;
/* Grab the vDSO data page. */
vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data)));
/* Grab the vDSO code pages. */
for (i = 0; i < vdso_pages; i++)
vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i);
/* Populate the special mapping structures */
vdso_spec[0] = (struct vm_special_mapping) {
.name = "[vvar]",
.pages = vdso_pagelist,
};
vdso_spec[1] = (struct vm_special_mapping) {
.name = "[vdso]",
.pages = &vdso_pagelist[1],
};
return 0;
}
arch_initcall(vdso_init);
int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp)
{
struct mm_struct *mm = current->mm;
unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
void *ret;
vdso_text_len = vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
vdso_mapping_len = vdso_text_len + PAGE_SIZE;
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
ret = ERR_PTR(vdso_base);
goto up_fail;
}
ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
VM_READ|VM_MAYREAD,
&vdso_spec[0]);
if (IS_ERR(ret))
goto up_fail;
vdso_base += PAGE_SIZE;
mm->context.vdso = (void *)vdso_base;
ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
&vdso_spec[1]);
if (IS_ERR(ret))
goto up_fail;
up_write(&mm->mmap_sem);
return 0;
up_fail:
mm->context.vdso = NULL;
up_write(&mm->mmap_sem);
return PTR_ERR(ret);
}
/*
* Update the vDSO data page to keep in sync with kernel timekeeping.
*/
void update_vsyscall(struct timekeeper *tk)
{
u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
++vdso_data->tb_seq_count;
smp_wmb();
vdso_data->use_syscall = use_syscall;
vdso_data->xtime_coarse_sec = tk->xtime_sec;
vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
tk->tkr_mono.shift;
vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
if (!use_syscall) {
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->cs_mult = tk->tkr_mono.mult;
vdso_data->cs_shift = tk->tkr_mono.shift;
}
smp_wmb();
++vdso_data->tb_seq_count;
}
void update_vsyscall_tz(void)
{
vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
vdso_data->tz_dsttime = sys_tz.tz_dsttime;
}