mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
c32ffce0f6
After a bunch of benchmarking on the interaction between dmb and pldw, it turns out that issuing the pldw *after* the dmb instruction can give modest performance gains (~3% atomic_add_return improvement on a dual A15). This patch adds prefetchw invocations to our barriered atomic operations including cmpxchg, test_and_xxx and futexes. Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
289 lines
6.3 KiB
C
289 lines
6.3 KiB
C
#ifndef __ASM_ARM_CMPXCHG_H
|
|
#define __ASM_ARM_CMPXCHG_H
|
|
|
|
#include <linux/irqflags.h>
|
|
#include <linux/prefetch.h>
|
|
#include <asm/barrier.h>
|
|
|
|
#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
|
|
/*
|
|
* On the StrongARM, "swp" is terminally broken since it bypasses the
|
|
* cache totally. This means that the cache becomes inconsistent, and,
|
|
* since we use normal loads/stores as well, this is really bad.
|
|
* Typically, this causes oopsen in filp_close, but could have other,
|
|
* more disastrous effects. There are two work-arounds:
|
|
* 1. Disable interrupts and emulate the atomic swap
|
|
* 2. Clean the cache, perform atomic swap, flush the cache
|
|
*
|
|
* We choose (1) since its the "easiest" to achieve here and is not
|
|
* dependent on the processor type.
|
|
*
|
|
* NOTE that this solution won't work on an SMP system, so explcitly
|
|
* forbid it here.
|
|
*/
|
|
#define swp_is_buggy
|
|
#endif
|
|
|
|
static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
|
|
{
|
|
extern void __bad_xchg(volatile void *, int);
|
|
unsigned long ret;
|
|
#ifdef swp_is_buggy
|
|
unsigned long flags;
|
|
#endif
|
|
#if __LINUX_ARM_ARCH__ >= 6
|
|
unsigned int tmp;
|
|
#endif
|
|
|
|
smp_mb();
|
|
prefetchw((const void *)ptr);
|
|
|
|
switch (size) {
|
|
#if __LINUX_ARM_ARCH__ >= 6
|
|
case 1:
|
|
asm volatile("@ __xchg1\n"
|
|
"1: ldrexb %0, [%3]\n"
|
|
" strexb %1, %2, [%3]\n"
|
|
" teq %1, #0\n"
|
|
" bne 1b"
|
|
: "=&r" (ret), "=&r" (tmp)
|
|
: "r" (x), "r" (ptr)
|
|
: "memory", "cc");
|
|
break;
|
|
case 4:
|
|
asm volatile("@ __xchg4\n"
|
|
"1: ldrex %0, [%3]\n"
|
|
" strex %1, %2, [%3]\n"
|
|
" teq %1, #0\n"
|
|
" bne 1b"
|
|
: "=&r" (ret), "=&r" (tmp)
|
|
: "r" (x), "r" (ptr)
|
|
: "memory", "cc");
|
|
break;
|
|
#elif defined(swp_is_buggy)
|
|
#ifdef CONFIG_SMP
|
|
#error SMP is not supported on this platform
|
|
#endif
|
|
case 1:
|
|
raw_local_irq_save(flags);
|
|
ret = *(volatile unsigned char *)ptr;
|
|
*(volatile unsigned char *)ptr = x;
|
|
raw_local_irq_restore(flags);
|
|
break;
|
|
|
|
case 4:
|
|
raw_local_irq_save(flags);
|
|
ret = *(volatile unsigned long *)ptr;
|
|
*(volatile unsigned long *)ptr = x;
|
|
raw_local_irq_restore(flags);
|
|
break;
|
|
#else
|
|
case 1:
|
|
asm volatile("@ __xchg1\n"
|
|
" swpb %0, %1, [%2]"
|
|
: "=&r" (ret)
|
|
: "r" (x), "r" (ptr)
|
|
: "memory", "cc");
|
|
break;
|
|
case 4:
|
|
asm volatile("@ __xchg4\n"
|
|
" swp %0, %1, [%2]"
|
|
: "=&r" (ret)
|
|
: "r" (x), "r" (ptr)
|
|
: "memory", "cc");
|
|
break;
|
|
#endif
|
|
default:
|
|
__bad_xchg(ptr, size), ret = 0;
|
|
break;
|
|
}
|
|
smp_mb();
|
|
|
|
return ret;
|
|
}
|
|
|
|
#define xchg(ptr,x) \
|
|
((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
|
|
|
|
#include <asm-generic/cmpxchg-local.h>
|
|
|
|
#if __LINUX_ARM_ARCH__ < 6
|
|
/* min ARCH < ARMv6 */
|
|
|
|
#ifdef CONFIG_SMP
|
|
#error "SMP is not supported on this platform"
|
|
#endif
|
|
|
|
/*
|
|
* cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
|
|
* them available.
|
|
*/
|
|
#define cmpxchg_local(ptr, o, n) \
|
|
((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
|
|
(unsigned long)(n), sizeof(*(ptr))))
|
|
#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
|
|
|
|
#ifndef CONFIG_SMP
|
|
#include <asm-generic/cmpxchg.h>
|
|
#endif
|
|
|
|
#else /* min ARCH >= ARMv6 */
|
|
|
|
extern void __bad_cmpxchg(volatile void *ptr, int size);
|
|
|
|
/*
|
|
* cmpxchg only support 32-bits operands on ARMv6.
|
|
*/
|
|
|
|
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
|
|
unsigned long new, int size)
|
|
{
|
|
unsigned long oldval, res;
|
|
|
|
prefetchw((const void *)ptr);
|
|
|
|
switch (size) {
|
|
#ifndef CONFIG_CPU_V6 /* min ARCH >= ARMv6K */
|
|
case 1:
|
|
do {
|
|
asm volatile("@ __cmpxchg1\n"
|
|
" ldrexb %1, [%2]\n"
|
|
" mov %0, #0\n"
|
|
" teq %1, %3\n"
|
|
" strexbeq %0, %4, [%2]\n"
|
|
: "=&r" (res), "=&r" (oldval)
|
|
: "r" (ptr), "Ir" (old), "r" (new)
|
|
: "memory", "cc");
|
|
} while (res);
|
|
break;
|
|
case 2:
|
|
do {
|
|
asm volatile("@ __cmpxchg1\n"
|
|
" ldrexh %1, [%2]\n"
|
|
" mov %0, #0\n"
|
|
" teq %1, %3\n"
|
|
" strexheq %0, %4, [%2]\n"
|
|
: "=&r" (res), "=&r" (oldval)
|
|
: "r" (ptr), "Ir" (old), "r" (new)
|
|
: "memory", "cc");
|
|
} while (res);
|
|
break;
|
|
#endif
|
|
case 4:
|
|
do {
|
|
asm volatile("@ __cmpxchg4\n"
|
|
" ldrex %1, [%2]\n"
|
|
" mov %0, #0\n"
|
|
" teq %1, %3\n"
|
|
" strexeq %0, %4, [%2]\n"
|
|
: "=&r" (res), "=&r" (oldval)
|
|
: "r" (ptr), "Ir" (old), "r" (new)
|
|
: "memory", "cc");
|
|
} while (res);
|
|
break;
|
|
default:
|
|
__bad_cmpxchg(ptr, size);
|
|
oldval = 0;
|
|
}
|
|
|
|
return oldval;
|
|
}
|
|
|
|
static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
|
|
unsigned long new, int size)
|
|
{
|
|
unsigned long ret;
|
|
|
|
smp_mb();
|
|
ret = __cmpxchg(ptr, old, new, size);
|
|
smp_mb();
|
|
|
|
return ret;
|
|
}
|
|
|
|
#define cmpxchg(ptr,o,n) \
|
|
((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \
|
|
(unsigned long)(o), \
|
|
(unsigned long)(n), \
|
|
sizeof(*(ptr))))
|
|
|
|
static inline unsigned long __cmpxchg_local(volatile void *ptr,
|
|
unsigned long old,
|
|
unsigned long new, int size)
|
|
{
|
|
unsigned long ret;
|
|
|
|
switch (size) {
|
|
#ifdef CONFIG_CPU_V6 /* min ARCH == ARMv6 */
|
|
case 1:
|
|
case 2:
|
|
ret = __cmpxchg_local_generic(ptr, old, new, size);
|
|
break;
|
|
#endif
|
|
default:
|
|
ret = __cmpxchg(ptr, old, new, size);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
|
|
unsigned long long old,
|
|
unsigned long long new)
|
|
{
|
|
unsigned long long oldval;
|
|
unsigned long res;
|
|
|
|
prefetchw(ptr);
|
|
|
|
__asm__ __volatile__(
|
|
"1: ldrexd %1, %H1, [%3]\n"
|
|
" teq %1, %4\n"
|
|
" teqeq %H1, %H4\n"
|
|
" bne 2f\n"
|
|
" strexd %0, %5, %H5, [%3]\n"
|
|
" teq %0, #0\n"
|
|
" bne 1b\n"
|
|
"2:"
|
|
: "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
|
|
: "r" (ptr), "r" (old), "r" (new)
|
|
: "cc");
|
|
|
|
return oldval;
|
|
}
|
|
|
|
static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr,
|
|
unsigned long long old,
|
|
unsigned long long new)
|
|
{
|
|
unsigned long long ret;
|
|
|
|
smp_mb();
|
|
ret = __cmpxchg64(ptr, old, new);
|
|
smp_mb();
|
|
|
|
return ret;
|
|
}
|
|
|
|
#define cmpxchg_local(ptr,o,n) \
|
|
((__typeof__(*(ptr)))__cmpxchg_local((ptr), \
|
|
(unsigned long)(o), \
|
|
(unsigned long)(n), \
|
|
sizeof(*(ptr))))
|
|
|
|
#define cmpxchg64(ptr, o, n) \
|
|
((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \
|
|
(unsigned long long)(o), \
|
|
(unsigned long long)(n)))
|
|
|
|
#define cmpxchg64_relaxed(ptr, o, n) \
|
|
((__typeof__(*(ptr)))__cmpxchg64((ptr), \
|
|
(unsigned long long)(o), \
|
|
(unsigned long long)(n)))
|
|
|
|
#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n))
|
|
|
|
#endif /* __LINUX_ARM_ARCH__ >= 6 */
|
|
|
|
#endif /* __ASM_ARM_CMPXCHG_H */
|