linux_dsm_epyc7002/arch/riscv/kernel/sys_riscv.c
Yash Shah e0d17c842c
RISC-V: Don't allow write+exec only page mapping request in mmap
As per the table 4.4 of version "20190608-Priv-MSU-Ratified" of the
RISC-V instruction set manual[0], the PTE permission bit combination of
"write+exec only" is reserved for future use. Hence, don't allow such
mapping request in mmap call.

An issue is been reported by David Abdurachmanov, that while running
stress-ng with "sysbadaddr" argument, RCU stalls are observed on RISC-V
specific kernel.

This issue arises when the stress-sysbadaddr request for pages with
"write+exec only" permission bits and then passes the address obtain
from this mmap call to various system call. For the riscv kernel, the
mmap call should fail for this particular combination of permission bits
since it's not valid.

[0]: http://dabbelt.com/~palmer/keep/riscv-isa-manual/riscv-privileged-20190608-1.pdf

Signed-off-by: Yash Shah <yash.shah@sifive.com>
Reported-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
[Palmer: Refer to the latest ISA specification at the only link I could
find, and update the terminology.]
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
2020-06-18 17:28:53 -07:00

74 lines
2.4 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2014 Darius Rad <darius@bluespec.com>
* Copyright (C) 2017 SiFive
*/
#include <linux/syscalls.h>
#include <asm/unistd.h>
#include <asm/cacheflush.h>
#include <asm-generic/mman-common.h>
static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, off_t offset,
unsigned long page_shift_offset)
{
if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
return -EINVAL;
if ((prot & PROT_WRITE) && (prot & PROT_EXEC))
if (unlikely(!(prot & PROT_READ)))
return -EINVAL;
return ksys_mmap_pgoff(addr, len, prot, flags, fd,
offset >> (PAGE_SHIFT - page_shift_offset));
}
#ifdef CONFIG_64BIT
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, off_t, offset)
{
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0);
}
#else
SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, off_t, offset)
{
/*
* Note that the shift for mmap2 is constant (12),
* regardless of PAGE_SIZE
*/
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12);
}
#endif /* !CONFIG_64BIT */
/*
* Allows the instruction cache to be flushed from userspace. Despite RISC-V
* having a direct 'fence.i' instruction available to userspace (which we
* can't trap!), that's not actually viable when running on Linux because the
* kernel might schedule a process on another hart. There is no way for
* userspace to handle this without invoking the kernel (as it doesn't know the
* thread->hart mappings), so we've defined a RISC-V specific system call to
* flush the instruction cache.
*
* sys_riscv_flush_icache() is defined to flush the instruction cache over an
* address range, with the flush applying to either all threads or just the
* caller. We don't currently do anything with the address range, that's just
* in there for forwards compatibility.
*/
SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
uintptr_t, flags)
{
/* Check the reserved flags. */
if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
return -EINVAL;
flush_icache_mm(current->mm, flags & SYS_RISCV_FLUSH_ICACHE_LOCAL);
return 0;
}