mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 22:16:10 +07:00
acd547b298
PKRU is the register that lets you disallow writes or all access to a given protection key. The XSAVE hardware defines an "init state" of 0 for PKRU: its most permissive state, allowing access/writes to everything. Since we start off all new processes with the init state, we start all processes off with the most permissive possible PKRU. This is unfortunate. If a thread is clone()'d [1] before a program has time to set PKRU to a restrictive value, that thread will be able to write to all data, no matter what pkey is set on it. This weakens any integrity guarantees that we want pkeys to provide. To fix this, we define a very restrictive PKRU to override the XSAVE-provided value when we create a new FPU context. We choose a value that only allows access to pkey 0, which is as restrictive as we can practically make it. This does not cause any practical problems with applications using protection keys because we require them to specify initial permissions for each key when it is allocated, which override the restrictive default. In the end, this ensures that threads which do not know how to manage their own pkey rights can not do damage to data which is pkey-protected. I would have thought this was a pretty contrived scenario, except that I heard a bug report from an MPX user who was creating threads in some very early code before main(). It may be crazy, but folks evidently _do_ it. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Cc: linux-arch@vger.kernel.org Cc: Dave Hansen <dave@sr71.net> Cc: mgorman@techsingularity.net Cc: arnd@arndb.de Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163021.F3C25D4A@viggo.jf.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
106 lines
2.6 KiB
C
106 lines
2.6 KiB
C
#ifndef _ASM_X86_PKEYS_H
|
|
#define _ASM_X86_PKEYS_H
|
|
|
|
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
|
|
|
|
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
|
unsigned long init_val);
|
|
|
|
/*
|
|
* Try to dedicate one of the protection keys to be used as an
|
|
* execute-only protection key.
|
|
*/
|
|
extern int __execute_only_pkey(struct mm_struct *mm);
|
|
static inline int execute_only_pkey(struct mm_struct *mm)
|
|
{
|
|
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
|
return 0;
|
|
|
|
return __execute_only_pkey(mm);
|
|
}
|
|
|
|
extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
|
|
int prot, int pkey);
|
|
static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
|
|
int prot, int pkey)
|
|
{
|
|
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
|
return 0;
|
|
|
|
return __arch_override_mprotect_pkey(vma, prot, pkey);
|
|
}
|
|
|
|
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
|
unsigned long init_val);
|
|
|
|
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
|
|
|
|
#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
|
|
#define mm_set_pkey_allocated(mm, pkey) do { \
|
|
mm_pkey_allocation_map(mm) |= (1U << pkey); \
|
|
} while (0)
|
|
#define mm_set_pkey_free(mm, pkey) do { \
|
|
mm_pkey_allocation_map(mm) &= ~(1U << pkey); \
|
|
} while (0)
|
|
|
|
static inline
|
|
bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
|
|
{
|
|
return mm_pkey_allocation_map(mm) & (1U << pkey);
|
|
}
|
|
|
|
/*
|
|
* Returns a positive, 4-bit key on success, or -1 on failure.
|
|
*/
|
|
static inline
|
|
int mm_pkey_alloc(struct mm_struct *mm)
|
|
{
|
|
/*
|
|
* Note: this is the one and only place we make sure
|
|
* that the pkey is valid as far as the hardware is
|
|
* concerned. The rest of the kernel trusts that
|
|
* only good, valid pkeys come out of here.
|
|
*/
|
|
u16 all_pkeys_mask = ((1U << arch_max_pkey()) - 1);
|
|
int ret;
|
|
|
|
/*
|
|
* Are we out of pkeys? We must handle this specially
|
|
* because ffz() behavior is undefined if there are no
|
|
* zeros.
|
|
*/
|
|
if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
|
|
return -1;
|
|
|
|
ret = ffz(mm_pkey_allocation_map(mm));
|
|
|
|
mm_set_pkey_allocated(mm, ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline
|
|
int mm_pkey_free(struct mm_struct *mm, int pkey)
|
|
{
|
|
/*
|
|
* pkey 0 is special, always allocated and can never
|
|
* be freed.
|
|
*/
|
|
if (!pkey)
|
|
return -EINVAL;
|
|
if (!mm_pkey_is_allocated(mm, pkey))
|
|
return -EINVAL;
|
|
|
|
mm_set_pkey_free(mm, pkey);
|
|
|
|
return 0;
|
|
}
|
|
|
|
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
|
unsigned long init_val);
|
|
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
|
unsigned long init_val);
|
|
extern void copy_init_pkru_to_fpregs(void);
|
|
|
|
#endif /*_ASM_X86_PKEYS_H */
|