mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-14 23:16:25 +07:00
85bcfaf69c
Implementing pte_update with pte_xchg (which uses cmpxchg) is inefficient. A single larx/stcx. works fine, no need for the less efficient cmpxchg sequence. Then remove the memory barriers from the operation. There is a requirement for TLB flushing to load mm_cpumask after the store that reduces pte permissions, which is moved into the TLB flush code. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
100 lines
2.7 KiB
C
100 lines
2.7 KiB
C
/*
|
|
* Common implementation of switch_mm_irqs_off
|
|
*
|
|
* Copyright IBM Corp. 2017
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/sched/mm.h>
|
|
|
|
#include <asm/mmu_context.h>
|
|
|
|
#if defined(CONFIG_PPC32)
|
|
static inline void switch_mm_pgdir(struct task_struct *tsk,
|
|
struct mm_struct *mm)
|
|
{
|
|
/* 32-bit keeps track of the current PGDIR in the thread struct */
|
|
tsk->thread.pgdir = mm->pgd;
|
|
}
|
|
#elif defined(CONFIG_PPC_BOOK3E_64)
|
|
static inline void switch_mm_pgdir(struct task_struct *tsk,
|
|
struct mm_struct *mm)
|
|
{
|
|
/* 64-bit Book3E keeps track of current PGD in the PACA */
|
|
get_paca()->pgd = mm->pgd;
|
|
}
|
|
#else
|
|
static inline void switch_mm_pgdir(struct task_struct *tsk,
|
|
struct mm_struct *mm) { }
|
|
#endif
|
|
|
|
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
struct task_struct *tsk)
|
|
{
|
|
bool new_on_cpu = false;
|
|
|
|
/* Mark this context has been used on the new CPU */
|
|
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
|
|
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
|
|
inc_mm_active_cpus(next);
|
|
|
|
/*
|
|
* This full barrier orders the store to the cpumask above vs
|
|
* a subsequent operation which allows this CPU to begin loading
|
|
* translations for next.
|
|
*
|
|
* When using the radix MMU that operation is the load of the
|
|
* MMU context id, which is then moved to SPRN_PID.
|
|
*
|
|
* For the hash MMU it is either the first load from slb_cache
|
|
* in switch_slb(), and/or the store of paca->mm_ctx_id in
|
|
* copy_mm_to_paca().
|
|
*
|
|
* On the other side, the barrier is in mm/tlb-radix.c for
|
|
* radix which orders earlier stores to clear the PTEs vs
|
|
* the load of mm_cpumask. And pte_xchg which does the same
|
|
* thing for hash.
|
|
*
|
|
* This full barrier is needed by membarrier when switching
|
|
* between processes after store to rq->curr, before user-space
|
|
* memory accesses.
|
|
*/
|
|
smp_mb();
|
|
|
|
new_on_cpu = true;
|
|
}
|
|
|
|
/* Some subarchs need to track the PGD elsewhere */
|
|
switch_mm_pgdir(tsk, next);
|
|
|
|
/* Nothing else to do if we aren't actually switching */
|
|
if (prev == next)
|
|
return;
|
|
|
|
/*
|
|
* We must stop all altivec streams before changing the HW
|
|
* context
|
|
*/
|
|
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
|
asm volatile ("dssall");
|
|
|
|
if (new_on_cpu)
|
|
radix_kvm_prefetch_workaround(next);
|
|
else
|
|
membarrier_arch_switch_mm(prev, next, tsk);
|
|
|
|
/*
|
|
* The actual HW switching method differs between the various
|
|
* sub architectures. Out of line for now
|
|
*/
|
|
switch_mmu_context(prev, next, tsk);
|
|
}
|
|
|