mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-17 02:46:47 +07:00
9145effd62
The sync (aka. hwsync, aka. heavyweight sync) in the context switch code to prevent MMIO access being reordered from the point of view of a single process if it gets migrated to a different CPU is not required because there is an hwsync performed earlier in the context switch path. Comment this so it's clear enough if anything changes on the scheduler or the powerpc sides. Remove the hwsync from _switch. This improves context switch performance by 2-3% on POWER8. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
87 lines
2.9 KiB
C
87 lines
2.9 KiB
C
/*
|
|
* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
|
|
*/
|
|
#ifndef _ASM_POWERPC_BARRIER_H
|
|
#define _ASM_POWERPC_BARRIER_H
|
|
|
|
/*
|
|
* Memory barrier.
|
|
* The sync instruction guarantees that all memory accesses initiated
|
|
* by this processor have been performed (with respect to all other
|
|
* mechanisms that access memory). The eieio instruction is a barrier
|
|
* providing an ordering (separately) for (a) cacheable stores and (b)
|
|
* loads and stores to non-cacheable memory (e.g. I/O devices).
|
|
*
|
|
* mb() prevents loads and stores being reordered across this point.
|
|
* rmb() prevents loads being reordered across this point.
|
|
* wmb() prevents stores being reordered across this point.
|
|
* read_barrier_depends() prevents data-dependent loads being reordered
|
|
* across this point (nop on PPC).
|
|
*
|
|
* *mb() variants without smp_ prefix must order all types of memory
|
|
* operations with one another. sync is the only instruction sufficient
|
|
* to do this.
|
|
*
|
|
* For the smp_ barriers, ordering is for cacheable memory operations
|
|
* only. We have to use the sync instruction for smp_mb(), since lwsync
|
|
* doesn't order loads with respect to previous stores. Lwsync can be
|
|
* used for smp_rmb() and smp_wmb().
|
|
*
|
|
* However, on CPUs that don't support lwsync, lwsync actually maps to a
|
|
* heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
|
|
*/
|
|
#define mb() __asm__ __volatile__ ("sync" : : : "memory")
|
|
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
|
|
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
|
|
|
|
#ifdef __SUBARCH_HAS_LWSYNC
|
|
# define SMPWMB LWSYNC
|
|
#else
|
|
# define SMPWMB eieio
|
|
#endif
|
|
|
|
#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
|
|
#define dma_rmb() __lwsync()
|
|
#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
|
|
|
|
#define __smp_lwsync() __lwsync()
|
|
|
|
#define __smp_mb() mb()
|
|
#define __smp_rmb() __lwsync()
|
|
#define __smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
|
|
|
|
/*
|
|
* This is a barrier which prevents following instructions from being
|
|
* started until the value of the argument x is known. For example, if
|
|
* x is a variable loaded from memory, this prevents following
|
|
* instructions from being executed until the load has been performed.
|
|
*/
|
|
#define data_barrier(x) \
|
|
asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
|
|
|
|
#define __smp_store_release(p, v) \
|
|
do { \
|
|
compiletime_assert_atomic_type(*p); \
|
|
__smp_lwsync(); \
|
|
WRITE_ONCE(*p, v); \
|
|
} while (0)
|
|
|
|
#define __smp_load_acquire(p) \
|
|
({ \
|
|
typeof(*p) ___p1 = READ_ONCE(*p); \
|
|
compiletime_assert_atomic_type(*p); \
|
|
__smp_lwsync(); \
|
|
___p1; \
|
|
})
|
|
|
|
/*
|
|
* This must resolve to hwsync on SMP for the context switch path.
|
|
* See _switch, and core scheduler context switch memory ordering
|
|
* comments.
|
|
*/
|
|
#define smp_mb__before_spinlock() smp_mb()
|
|
|
|
#include <asm-generic/barrier.h>
|
|
|
|
#endif /* _ASM_POWERPC_BARRIER_H */
|