mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-19 06:08:27 +07:00
3ccfebedd8
Allow PowerPC to skip the full memory barrier in switch_mm(), and only issue the barrier when scheduling into a task belonging to a process that has registered to use expedited private. Threads targeting the same VM but which belong to different thread groups is a tricky case. It has a few consequences: It turns out that we cannot rely on get_nr_threads(p) to count the number of threads using a VM. We can use (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) instead to skip the synchronize_sched() for cases where the VM only has a single user, and that user only has a single thread. It also turns out that we cannot use for_each_thread() to set thread flags in all threads using a VM, as it only iterates on the thread group. Therefore, test the membarrier state variable directly rather than relying on thread flags. This means membarrier_register_private_expedited() needs to set the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows private expedited membarrier commands to succeed. membarrier_arch_switch_mm() now tests for the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrea Parri <parri.andrea@gmail.com> Cc: Andrew Hunter <ahh@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Avi Kivity <avi@scylladb.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Dave Watson <davejwatson@fb.com> Cc: David Sehr <sehr@google.com> Cc: Greg Hackmann <ghackmann@google.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Maged Michael <maged.michael@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
98 lines
2.6 KiB
C
98 lines
2.6 KiB
C
/*
|
|
* Common implementation of switch_mm_irqs_off
|
|
*
|
|
* Copyright IBM Corp. 2017
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/sched/mm.h>
|
|
|
|
#include <asm/mmu_context.h>
|
|
|
|
#if defined(CONFIG_PPC32)
|
|
static inline void switch_mm_pgdir(struct task_struct *tsk,
|
|
struct mm_struct *mm)
|
|
{
|
|
/* 32-bit keeps track of the current PGDIR in the thread struct */
|
|
tsk->thread.pgdir = mm->pgd;
|
|
}
|
|
#elif defined(CONFIG_PPC_BOOK3E_64)
|
|
static inline void switch_mm_pgdir(struct task_struct *tsk,
|
|
struct mm_struct *mm)
|
|
{
|
|
/* 64-bit Book3E keeps track of current PGD in the PACA */
|
|
get_paca()->pgd = mm->pgd;
|
|
}
|
|
#else
|
|
static inline void switch_mm_pgdir(struct task_struct *tsk,
|
|
struct mm_struct *mm) { }
|
|
#endif
|
|
|
|
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|
struct task_struct *tsk)
|
|
{
|
|
bool new_on_cpu = false;
|
|
|
|
/* Mark this context has been used on the new CPU */
|
|
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
|
|
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
|
|
inc_mm_active_cpus(next);
|
|
|
|
/*
|
|
* This full barrier orders the store to the cpumask above vs
|
|
* a subsequent operation which allows this CPU to begin loading
|
|
* translations for next.
|
|
*
|
|
* When using the radix MMU that operation is the load of the
|
|
* MMU context id, which is then moved to SPRN_PID.
|
|
*
|
|
* For the hash MMU it is either the first load from slb_cache
|
|
* in switch_slb(), and/or the store of paca->mm_ctx_id in
|
|
* copy_mm_to_paca().
|
|
*
|
|
* On the read side the barrier is in pte_xchg(), which orders
|
|
* the store to the PTE vs the load of mm_cpumask.
|
|
*
|
|
* This full barrier is needed by membarrier when switching
|
|
* between processes after store to rq->curr, before user-space
|
|
* memory accesses.
|
|
*/
|
|
smp_mb();
|
|
|
|
new_on_cpu = true;
|
|
}
|
|
|
|
/* Some subarchs need to track the PGD elsewhere */
|
|
switch_mm_pgdir(tsk, next);
|
|
|
|
/* Nothing else to do if we aren't actually switching */
|
|
if (prev == next)
|
|
return;
|
|
|
|
/*
|
|
* We must stop all altivec streams before changing the HW
|
|
* context
|
|
*/
|
|
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
|
asm volatile ("dssall");
|
|
|
|
if (new_on_cpu)
|
|
radix_kvm_prefetch_workaround(next);
|
|
else
|
|
membarrier_arch_switch_mm(prev, next, tsk);
|
|
|
|
/*
|
|
* The actual HW switching method differs between the various
|
|
* sub architectures. Out of line for now
|
|
*/
|
|
switch_mmu_context(prev, next, tsk);
|
|
}
|
|
|