mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-17 00:16:40 +07:00
e8c24d3a23
This patch adds two new system calls: int pkey_alloc(unsigned long flags, unsigned long init_access_rights) int pkey_free(int pkey); These implement an "allocator" for the protection keys themselves, which can be thought of as analogous to the allocator that the kernel has for file descriptors. The kernel tracks which numbers are in use, and only allows operations on keys that are valid. A key which was not obtained by pkey_alloc() may not, for instance, be passed to pkey_mprotect(). These system calls are also very important given the kernel's use of pkeys to implement execute-only support. These help ensure that userspace can never assume that it has control of a key unless it first asks the kernel. The kernel does not promise to preserve PKRU (right register) contents except for allocated pkeys. The 'init_access_rights' argument to pkey_alloc() specifies the rights that will be established for the returned pkey. For instance: pkey = pkey_alloc(flags, PKEY_DENY_WRITE); will allocate 'pkey', but also sets the bits in PKRU[1] such that writing to 'pkey' is already denied. The kernel does not prevent pkey_free() from successfully freeing in-use pkeys (those still assigned to a memory range by pkey_mprotect()). It would be expensive to implement the checks for this, so we instead say, "Just don't do it" since sane software will never do it anyway. Any piece of userspace calling pkey_alloc() needs to be prepared for it to fail. Why? pkey_alloc() returns the same error code (ENOSPC) when there are no pkeys and when pkeys are unsupported. They can be unsupported for a whole host of reasons, so apps must be prepared for this. Also, libraries or LD_PRELOADs might steal keys before an application gets access to them. This allocation mechanism could be implemented in userspace. Even if we did it in userspace, we would still need additional user/kernel interfaces to tell userspace which keys are being used by the kernel internally (such as for execute-only mappings). Having the kernel provide this facility completely removes the need for these additional interfaces, or having an implementation of this in userspace at all. Note that we have to make changes to all of the architectures that do not use mman-common.h because we use the new PKEY_DENY_ACCESS/WRITE macros in arch-independent code. 1. PKRU is the Protection Key Rights User register. It is a usermode-accessible register that controls whether writes and/or access to each individual pkey is allowed or denied. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Cc: linux-arch@vger.kernel.org Cc: Dave Hansen <dave@sr71.net> Cc: arnd@arndb.de Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163015.444FE75F@viggo.jf.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
124 lines
3.6 KiB
C
124 lines
3.6 KiB
C
/*
|
|
* Intel Memory Protection Keys management
|
|
* Copyright (c) 2015, Intel Corporation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
#include <linux/mm_types.h> /* mm_struct, vma, etc... */
|
|
#include <linux/pkeys.h> /* PKEY_* */
|
|
#include <uapi/asm-generic/mman-common.h>
|
|
|
|
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
|
|
#include <asm/mmu_context.h> /* vma_pkey() */
|
|
#include <asm/fpu/internal.h> /* fpregs_active() */
|
|
|
|
int __execute_only_pkey(struct mm_struct *mm)
|
|
{
|
|
bool need_to_set_mm_pkey = false;
|
|
int execute_only_pkey = mm->context.execute_only_pkey;
|
|
int ret;
|
|
|
|
/* Do we need to assign a pkey for mm's execute-only maps? */
|
|
if (execute_only_pkey == -1) {
|
|
/* Go allocate one to use, which might fail */
|
|
execute_only_pkey = mm_pkey_alloc(mm);
|
|
if (execute_only_pkey < 0)
|
|
return -1;
|
|
need_to_set_mm_pkey = true;
|
|
}
|
|
|
|
/*
|
|
* We do not want to go through the relatively costly
|
|
* dance to set PKRU if we do not need to. Check it
|
|
* first and assume that if the execute-only pkey is
|
|
* write-disabled that we do not have to set it
|
|
* ourselves. We need preempt off so that nobody
|
|
* can make fpregs inactive.
|
|
*/
|
|
preempt_disable();
|
|
if (!need_to_set_mm_pkey &&
|
|
fpregs_active() &&
|
|
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
|
|
preempt_enable();
|
|
return execute_only_pkey;
|
|
}
|
|
preempt_enable();
|
|
|
|
/*
|
|
* Set up PKRU so that it denies access for everything
|
|
* other than execution.
|
|
*/
|
|
ret = arch_set_user_pkey_access(current, execute_only_pkey,
|
|
PKEY_DISABLE_ACCESS);
|
|
/*
|
|
* If the PKRU-set operation failed somehow, just return
|
|
* 0 and effectively disable execute-only support.
|
|
*/
|
|
if (ret) {
|
|
mm_set_pkey_free(mm, execute_only_pkey);
|
|
return -1;
|
|
}
|
|
|
|
/* We got one, store it and use it from here on out */
|
|
if (need_to_set_mm_pkey)
|
|
mm->context.execute_only_pkey = execute_only_pkey;
|
|
return execute_only_pkey;
|
|
}
|
|
|
|
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
|
|
{
|
|
/* Do this check first since the vm_flags should be hot */
|
|
if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
|
|
return false;
|
|
if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is only called for *plain* mprotect calls.
|
|
*/
|
|
int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey)
|
|
{
|
|
/*
|
|
* Is this an mprotect_pkey() call? If so, never
|
|
* override the value that came from the user.
|
|
*/
|
|
if (pkey != -1)
|
|
return pkey;
|
|
/*
|
|
* Look for a protection-key-drive execute-only mapping
|
|
* which is now being given permissions that are not
|
|
* execute-only. Move it back to the default pkey.
|
|
*/
|
|
if (vma_is_pkey_exec_only(vma) &&
|
|
(prot & (PROT_READ|PROT_WRITE))) {
|
|
return 0;
|
|
}
|
|
/*
|
|
* The mapping is execute-only. Go try to get the
|
|
* execute-only protection key. If we fail to do that,
|
|
* fall through as if we do not have execute-only
|
|
* support.
|
|
*/
|
|
if (prot == PROT_EXEC) {
|
|
pkey = execute_only_pkey(vma->vm_mm);
|
|
if (pkey > 0)
|
|
return pkey;
|
|
}
|
|
/*
|
|
* This is a vanilla, non-pkey mprotect (or we failed to
|
|
* setup execute-only), inherit the pkey from the VMA we
|
|
* are working on.
|
|
*/
|
|
return vma_pkey(vma);
|
|
}
|