linux_dsm_epyc7002/arch/s390/mm/mmap.c
Kees Cook 8f2af155b5 exec: pass stack rlimit into mm layout functions
Patch series "exec: Pin stack limit during exec".

Attempts to solve problems with the stack limit changing during exec
continue to be frustrated[1][2].  In addition to the specific issues
around the Stack Clash family of flaws, Andy Lutomirski pointed out[3]
other places during exec where the stack limit is used and is assumed to
be unchanging.  Given the many places it gets used and the fact that it
can be manipulated/raced via setrlimit() and prlimit(), I think the only
way to handle this is to move away from the "current" view of the stack
limit and instead attach it to the bprm, and plumb this down into the
functions that need to know the stack limits.  This series implements
the approach.

[1] 04e35f4495 ("exec: avoid RLIMIT_STACK races with prlimit()")
[2] 779f4e1c6c ("Revert "exec: avoid RLIMIT_STACK races with prlimit()"")
[3] to security@kernel.org, "Subject: existing rlimit races?"

This patch (of 3):

Since it is possible that the stack rlimit can change externally during
exec (either via another thread calling setrlimit() or another process
calling prlimit()), provide a way to pass the rlimit down into the
per-architecture mm layout functions so that the rlimit can stay in the
bprm structure instead of sitting in the signal structure until exec is
finalized.

Link: http://lkml.kernel.org/r/1518638796-20819-2-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Hugh Dickins <hughd@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Greg KH <greg@kroah.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Cc: Brad Spengler <spender@grsecurity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-04-11 10:28:37 -07:00

207 lines
5.0 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/*
* flexible mmap layout support
*
* Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
* All Rights Reserved.
*
* Started by Ingo Molnar <mingo@elte.hu>
*/
#include <linux/elf-randomize.h>
#include <linux/personality.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/random.h>
#include <linux/compat.h>
#include <linux/security.h>
#include <asm/pgalloc.h>
#include <asm/elf.h>
static unsigned long stack_maxrandom_size(void)
{
if (!(current->flags & PF_RANDOMIZE))
return 0;
if (current->personality & ADDR_NO_RANDOMIZE)
return 0;
return STACK_RND_MASK << PAGE_SHIFT;
}
/*
* Top of mmap area (just below the process stack).
*
* Leave at least a ~32 MB hole.
*/
#define MIN_GAP (32*1024*1024)
#define MAX_GAP (STACK_TOP/6*5)
static inline int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
}
unsigned long arch_mmap_rnd(void)
{
return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
}
static unsigned long mmap_base_legacy(unsigned long rnd)
{
return TASK_UNMAPPED_BASE + rnd;
}
static inline unsigned long mmap_base(unsigned long rnd,
struct rlimit *rlim_stack)
{
unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP)
gap = MIN_GAP;
else if (gap > MAX_GAP)
gap = MAX_GAP;
gap &= PAGE_MASK;
return STACK_TOP - stack_maxrandom_size() - rnd - gap;
}
unsigned long
arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
int rc;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
goto check_asce_limit;
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)))
goto check_asce_limit;
}
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
if (filp || (flags & MAP_SHARED))
info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
else
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
if (addr & ~PAGE_MASK)
return addr;
check_asce_limit:
if (addr + len > current->mm->context.asce_limit &&
addr + len <= TASK_SIZE) {
rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
}
return addr;
}
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len, const unsigned long pgoff,
const unsigned long flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
int rc;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
goto check_asce_limit;
/* requesting a specific address */
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)))
goto check_asce_limit;
}
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
info.high_limit = mm->mmap_base;
if (filp || (flags & MAP_SHARED))
info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
else
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
/*
* A failed mmap() very likely causes application failure,
* so fall back to the bottom-up function here. This scenario
* can happen with large stack limits and large mmap()
* allocations.
*/
if (addr & ~PAGE_MASK) {
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = TASK_SIZE;
addr = vm_unmapped_area(&info);
if (addr & ~PAGE_MASK)
return addr;
}
check_asce_limit:
if (addr + len > current->mm->context.asce_limit &&
addr + len <= TASK_SIZE) {
rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
}
return addr;
}
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
/*
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
*/
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = mmap_base_legacy(random_factor);
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}