mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
1b2ee1266e
We try to enforce protection keys in software the same way that we do in hardware. (See long example below). But, we only want to do this when accessing our *own* process's memory. If GDB set PKRU[6].AD=1 (disable access to PKEY 6), then tried to PTRACE_POKE a target process which just happened to have some mprotect_pkey(pkey=6) memory, we do *not* want to deny the debugger access to that memory. PKRU is fundamentally a thread-local structure and we do not want to enforce it on access to _another_ thread's data. This gets especially tricky when we have workqueues or other delayed-work mechanisms that might run in a random process's context. We can check that we only enforce pkeys when operating on our *own* mm, but delayed work gets performed when a random user context is active. We might end up with a situation where a delayed-work gup fails when running randomly under its "own" task but succeeds when running under another process. We want to avoid that. To avoid that, we use the new GUP flag: FOLL_REMOTE and add a fault flag: FAULT_FLAG_REMOTE. They indicate that we are walking an mm which is not guranteed to be the same as current->mm and should not be subject to protection key enforcement. Thanks to Jerome Glisse for pointing out this scenario. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alexey Kardashevskiy <aik@ozlabs.ru> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boaz Harrosh <boaz@plexistor.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Chinner <dchinner@redhat.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Dominik Dingel <dingel@linux.vnet.ibm.com> Cc: Dominik Vogt <vogt@linux.vnet.ibm.com> Cc: Eric B Munson <emunson@akamai.com> Cc: Geliang Tang <geliangtang@163.com> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Low <jason.low2@hp.com> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@suse.com> Cc: Mikulas Patocka <mpatocka@redhat.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Shachar Raindel <raindel@mellanox.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Xie XiuQi <xiexiuqi@huawei.com> Cc: iommu@lists.linux-foundation.org Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-s390@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
165 lines
4.7 KiB
C
165 lines
4.7 KiB
C
#ifndef __ASM_POWERPC_MMU_CONTEXT_H
|
|
#define __ASM_POWERPC_MMU_CONTEXT_H
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/spinlock.h>
|
|
#include <asm/mmu.h>
|
|
#include <asm/cputable.h>
|
|
#include <asm/cputhreads.h>
|
|
|
|
/*
|
|
* Most if the context management is out of line
|
|
*/
|
|
extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
|
extern void destroy_context(struct mm_struct *mm);
|
|
#ifdef CONFIG_SPAPR_TCE_IOMMU
|
|
struct mm_iommu_table_group_mem_t;
|
|
|
|
extern bool mm_iommu_preregistered(void);
|
|
extern long mm_iommu_get(unsigned long ua, unsigned long entries,
|
|
struct mm_iommu_table_group_mem_t **pmem);
|
|
extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem);
|
|
extern void mm_iommu_init(mm_context_t *ctx);
|
|
extern void mm_iommu_cleanup(mm_context_t *ctx);
|
|
extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua,
|
|
unsigned long size);
|
|
extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua,
|
|
unsigned long entries);
|
|
extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
|
|
unsigned long ua, unsigned long *hpa);
|
|
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
|
|
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
|
|
#endif
|
|
|
|
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
|
|
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
|
|
extern void set_context(unsigned long id, pgd_t *pgd);
|
|
|
|
#ifdef CONFIG_PPC_BOOK3S_64
|
|
extern int __init_new_context(void);
|
|
extern void __destroy_context(int context_id);
|
|
static inline void mmu_context_init(void) { }
|
|
#else
|
|
extern unsigned long __init_new_context(void);
|
|
extern void __destroy_context(unsigned long context_id);
|
|
extern void mmu_context_init(void);
|
|
#endif
|
|
|
|
extern void switch_cop(struct mm_struct *next);
|
|
extern int use_cop(unsigned long acop, struct mm_struct *mm);
|
|
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
|
|
|
|
/*
|
|
* switch_mm is the entry point called from the architecture independent
|
|
* code in kernel/sched/core.c
|
|
*/
|
|
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|
struct task_struct *tsk)
|
|
{
|
|
/* Mark this context has been used on the new CPU */
|
|
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
|
|
|
|
/* 32-bit keeps track of the current PGDIR in the thread struct */
|
|
#ifdef CONFIG_PPC32
|
|
tsk->thread.pgdir = next->pgd;
|
|
#endif /* CONFIG_PPC32 */
|
|
|
|
/* 64-bit Book3E keeps track of current PGD in the PACA */
|
|
#ifdef CONFIG_PPC_BOOK3E_64
|
|
get_paca()->pgd = next->pgd;
|
|
#endif
|
|
/* Nothing else to do if we aren't actually switching */
|
|
if (prev == next)
|
|
return;
|
|
|
|
#ifdef CONFIG_PPC_ICSWX
|
|
/* Switch coprocessor context only if prev or next uses a coprocessor */
|
|
if (prev->context.acop || next->context.acop)
|
|
switch_cop(next);
|
|
#endif /* CONFIG_PPC_ICSWX */
|
|
|
|
/* We must stop all altivec streams before changing the HW
|
|
* context
|
|
*/
|
|
#ifdef CONFIG_ALTIVEC
|
|
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
|
asm volatile ("dssall");
|
|
#endif /* CONFIG_ALTIVEC */
|
|
|
|
/* The actual HW switching method differs between the various
|
|
* sub architectures.
|
|
*/
|
|
#ifdef CONFIG_PPC_STD_MMU_64
|
|
switch_slb(tsk, next);
|
|
#else
|
|
/* Out of line for now */
|
|
switch_mmu_context(prev, next);
|
|
#endif
|
|
|
|
}
|
|
|
|
#define deactivate_mm(tsk,mm) do { } while (0)
|
|
|
|
/*
|
|
* After we have set current->mm to a new value, this activates
|
|
* the context for the new mm so we see the new mappings.
|
|
*/
|
|
static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
|
|
{
|
|
unsigned long flags;
|
|
|
|
local_irq_save(flags);
|
|
switch_mm(prev, next, current);
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
/* We don't currently use enter_lazy_tlb() for anything */
|
|
static inline void enter_lazy_tlb(struct mm_struct *mm,
|
|
struct task_struct *tsk)
|
|
{
|
|
/* 64-bit Book3E keeps track of current PGD in the PACA */
|
|
#ifdef CONFIG_PPC_BOOK3E_64
|
|
get_paca()->pgd = NULL;
|
|
#endif
|
|
}
|
|
|
|
static inline void arch_dup_mmap(struct mm_struct *oldmm,
|
|
struct mm_struct *mm)
|
|
{
|
|
}
|
|
|
|
static inline void arch_exit_mmap(struct mm_struct *mm)
|
|
{
|
|
}
|
|
|
|
static inline void arch_unmap(struct mm_struct *mm,
|
|
struct vm_area_struct *vma,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
if (start <= mm->context.vdso_base && mm->context.vdso_base < end)
|
|
mm->context.vdso_base = 0;
|
|
}
|
|
|
|
static inline void arch_bprm_mm_init(struct mm_struct *mm,
|
|
struct vm_area_struct *vma)
|
|
{
|
|
}
|
|
|
|
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
|
bool write, bool foreign)
|
|
{
|
|
/* by default, allow everything */
|
|
return true;
|
|
}
|
|
|
|
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
|
{
|
|
/* by default, allow everything */
|
|
return true;
|
|
}
|
|
#endif /* __KERNEL__ */
|
|
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
|