mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
f4ea6dcb08
Not all user space application is ready to handle wide addresses. It's known that at least some JIT compilers use higher bits in pointers to encode their information. It collides with valid pointers with 512TB addresses and leads to crashes. To mitigate this, we are not going to allocate virtual address space above 128TB by default. But userspace can ask for allocation from full address space by specifying hint address (with or without MAP_FIXED) above 128TB. If hint address set above 128TB, but MAP_FIXED is not specified, we try to look for unmapped area by specified address. If it's already occupied, we look for unmapped area in *full* address space, rather than from 128TB window. This approach helps to easily make application's memory allocator aware about large address space without manually tracking allocated virtual address space. This is going to be a per mmap decision. ie, we can have some mmaps with larger addresses and other that do not. A sample memory layout looks like: 10000000-10010000 r-xp 00000000 fc:00 9057045 /home/max_addr_512TB 10010000-10020000 r--p 00000000 fc:00 9057045 /home/max_addr_512TB 10020000-10030000 rw-p 00010000 fc:00 9057045 /home/max_addr_512TB 10029630000-10029660000 rw-p 00000000 00:00 0 [heap] 7fff834a0000-7fff834b0000 rw-p 00000000 00:00 0 7fff834b0000-7fff83670000 r-xp 00000000 fc:00 9177190 /lib/powerpc64le-linux-gnu/libc-2.23.so 7fff83670000-7fff83680000 r--p 001b0000 fc:00 9177190 /lib/powerpc64le-linux-gnu/libc-2.23.so 7fff83680000-7fff83690000 rw-p 001c0000 fc:00 9177190 /lib/powerpc64le-linux-gnu/libc-2.23.so 7fff83690000-7fff836a0000 rw-p 00000000 00:00 0 7fff836a0000-7fff836c0000 r-xp 00000000 00:00 0 [vdso] 7fff836c0000-7fff83700000 r-xp 00000000 fc:00 9177193 /lib/powerpc64le-linux-gnu/ld-2.23.so 7fff83700000-7fff83710000 r--p 00030000 fc:00 9177193 /lib/powerpc64le-linux-gnu/ld-2.23.so 7fff83710000-7fff83720000 rw-p 00040000 fc:00 9177193 /lib/powerpc64le-linux-gnu/ld-2.23.so 7fffdccf0000-7fffdcd20000 rw-p 00000000 00:00 0 [stack] 1000000000000-1000000010000 rw-p 00000000 00:00 0 1ffff83710000-1ffff83720000 rw-p 00000000 00:00 0 Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
225 lines
6.1 KiB
C
225 lines
6.1 KiB
C
/*
|
|
* flexible mmap layout support
|
|
*
|
|
* Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
|
|
* All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*
|
|
*
|
|
* Started by Ingo Molnar <mingo@elte.hu>
|
|
*/
|
|
|
|
#include <linux/personality.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/random.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/elf-randomize.h>
|
|
#include <linux/security.h>
|
|
#include <linux/mman.h>
|
|
|
|
/*
|
|
* Top of mmap area (just below the process stack).
|
|
*
|
|
* Leave at least a ~128 MB hole on 32bit applications.
|
|
*
|
|
* On 64bit applications we randomise the stack by 1GB so we need to
|
|
* space our mmap start address by a further 1GB, otherwise there is a
|
|
* chance the mmap area will end up closer to the stack than our ulimit
|
|
* requires.
|
|
*/
|
|
#define MIN_GAP32 (128*1024*1024)
|
|
#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
|
|
#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
|
|
#define MAX_GAP (TASK_SIZE/6*5)
|
|
|
|
static inline int mmap_is_legacy(void)
|
|
{
|
|
if (current->personality & ADDR_COMPAT_LAYOUT)
|
|
return 1;
|
|
|
|
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
|
|
return 1;
|
|
|
|
return sysctl_legacy_va_layout;
|
|
}
|
|
|
|
unsigned long arch_mmap_rnd(void)
|
|
{
|
|
unsigned long rnd;
|
|
|
|
/* 8MB for 32bit, 1GB for 64bit */
|
|
if (is_32bit_task())
|
|
rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
|
|
else
|
|
rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));
|
|
|
|
return rnd << PAGE_SHIFT;
|
|
}
|
|
|
|
static inline unsigned long mmap_base(unsigned long rnd)
|
|
{
|
|
unsigned long gap = rlimit(RLIMIT_STACK);
|
|
|
|
if (gap < MIN_GAP)
|
|
gap = MIN_GAP;
|
|
else if (gap > MAX_GAP)
|
|
gap = MAX_GAP;
|
|
|
|
return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd);
|
|
}
|
|
|
|
#ifdef CONFIG_PPC_RADIX_MMU
|
|
/*
|
|
* Same function as generic code used only for radix, because we don't need to overload
|
|
* the generic one. But we will have to duplicate, because hash select
|
|
* HAVE_ARCH_UNMAPPED_AREA
|
|
*/
|
|
static unsigned long
|
|
radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
|
|
unsigned long len, unsigned long pgoff,
|
|
unsigned long flags)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
struct vm_area_struct *vma;
|
|
struct vm_unmapped_area_info info;
|
|
|
|
if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
|
|
mm->context.addr_limit = TASK_SIZE;
|
|
|
|
if (len > mm->context.addr_limit - mmap_min_addr)
|
|
return -ENOMEM;
|
|
|
|
if (flags & MAP_FIXED)
|
|
return addr;
|
|
|
|
if (addr) {
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (mm->context.addr_limit - len >= addr && addr >= mmap_min_addr &&
|
|
(!vma || addr + len <= vma->vm_start))
|
|
return addr;
|
|
}
|
|
|
|
info.flags = 0;
|
|
info.length = len;
|
|
info.low_limit = mm->mmap_base;
|
|
info.align_mask = 0;
|
|
|
|
if (unlikely(addr > DEFAULT_MAP_WINDOW))
|
|
info.high_limit = mm->context.addr_limit;
|
|
else
|
|
info.high_limit = DEFAULT_MAP_WINDOW;
|
|
|
|
return vm_unmapped_area(&info);
|
|
}
|
|
|
|
static unsigned long
|
|
radix__arch_get_unmapped_area_topdown(struct file *filp,
|
|
const unsigned long addr0,
|
|
const unsigned long len,
|
|
const unsigned long pgoff,
|
|
const unsigned long flags)
|
|
{
|
|
struct vm_area_struct *vma;
|
|
struct mm_struct *mm = current->mm;
|
|
unsigned long addr = addr0;
|
|
struct vm_unmapped_area_info info;
|
|
|
|
if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
|
|
mm->context.addr_limit = TASK_SIZE;
|
|
|
|
/* requested length too big for entire address space */
|
|
if (len > mm->context.addr_limit - mmap_min_addr)
|
|
return -ENOMEM;
|
|
|
|
if (flags & MAP_FIXED)
|
|
return addr;
|
|
|
|
/* requesting a specific address */
|
|
if (addr) {
|
|
addr = PAGE_ALIGN(addr);
|
|
vma = find_vma(mm, addr);
|
|
if (mm->context.addr_limit - len >= addr && addr >= mmap_min_addr &&
|
|
(!vma || addr + len <= vma->vm_start))
|
|
return addr;
|
|
}
|
|
|
|
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
|
|
info.length = len;
|
|
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
|
|
info.high_limit = mm->mmap_base;
|
|
info.align_mask = 0;
|
|
|
|
if (addr > DEFAULT_MAP_WINDOW)
|
|
info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;
|
|
|
|
addr = vm_unmapped_area(&info);
|
|
if (!(addr & ~PAGE_MASK))
|
|
return addr;
|
|
VM_BUG_ON(addr != -ENOMEM);
|
|
|
|
/*
|
|
* A failed mmap() very likely causes application failure,
|
|
* so fall back to the bottom-up function here. This scenario
|
|
* can happen with large stack limits and large mmap()
|
|
* allocations.
|
|
*/
|
|
return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
|
|
}
|
|
|
|
static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
|
|
unsigned long random_factor)
|
|
{
|
|
if (mmap_is_legacy()) {
|
|
mm->mmap_base = TASK_UNMAPPED_BASE;
|
|
mm->get_unmapped_area = radix__arch_get_unmapped_area;
|
|
} else {
|
|
mm->mmap_base = mmap_base(random_factor);
|
|
mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
|
|
}
|
|
}
|
|
#else
|
|
/* dummy */
|
|
extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
|
|
unsigned long random_factor);
|
|
#endif
|
|
/*
|
|
* This function, called very early during the creation of a new
|
|
* process VM image, sets up which VM layout function to use:
|
|
*/
|
|
void arch_pick_mmap_layout(struct mm_struct *mm)
|
|
{
|
|
unsigned long random_factor = 0UL;
|
|
|
|
if (current->flags & PF_RANDOMIZE)
|
|
random_factor = arch_mmap_rnd();
|
|
|
|
if (radix_enabled())
|
|
return radix__arch_pick_mmap_layout(mm, random_factor);
|
|
/*
|
|
* Fall back to the standard layout if the personality
|
|
* bit is set, or if the expected stack growth is unlimited:
|
|
*/
|
|
if (mmap_is_legacy()) {
|
|
mm->mmap_base = TASK_UNMAPPED_BASE;
|
|
mm->get_unmapped_area = arch_get_unmapped_area;
|
|
} else {
|
|
mm->mmap_base = mmap_base(random_factor);
|
|
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
|
|
}
|
|
}
|