mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
e192832869
Pull locking updates from Ingo Molnar: "The main changes in this cycle are: - rwsem scalability improvements, phase #2, by Waiman Long, which are rather impressive: "On a 2-socket 40-core 80-thread Skylake system with 40 reader and writer locking threads, the min/mean/max locking operations done in a 5-second testing window before the patchset were: 40 readers, Iterations Min/Mean/Max = 1,807/1,808/1,810 40 writers, Iterations Min/Mean/Max = 1,807/50,344/151,255 After the patchset, they became: 40 readers, Iterations Min/Mean/Max = 30,057/31,359/32,741 40 writers, Iterations Min/Mean/Max = 94,466/95,845/97,098" There's a lot of changes to the locking implementation that makes it similar to qrwlock, including owner handoff for more fair locking. Another microbenchmark shows how across the spectrum the improvements are: "With a locking microbenchmark running on 5.1 based kernel, the total locking rates (in kops/s) on a 2-socket Skylake system with equal numbers of readers and writers (mixed) before and after this patchset were: # of Threads Before Patch After Patch ------------ ------------ ----------- 2 2,618 4,193 4 1,202 3,726 8 802 3,622 16 729 3,359 32 319 2,826 64 102 2,744" The changes are extensive and the patch-set has been through several iterations addressing various locking workloads. There might be more regressions, but unless they are pathological I believe we want to use this new implementation as the baseline going forward. - jump-label optimizations by Daniel Bristot de Oliveira: the primary motivation was to remove IPI disturbance of isolated RT-workload CPUs, which resulted in the implementation of batched jump-label updates. Beyond the improvement of the real-time characteristics kernel, in one test this patchset improved static key update overhead from 57 msecs to just 1.4 msecs - which is a nice speedup as well. - atomic64_t cross-arch type cleanups by Mark Rutland: over the last ~10 years of atomic64_t existence the various types used by the APIs only had to be self-consistent within each architecture - which means they became wildly inconsistent across architectures. Mark puts and end to this by reworking all the atomic64 implementations to use 's64' as the base type for atomic64_t, and to ensure that this type is consistently used for parameters and return values in the API, avoiding further problems in this area. - A large set of small improvements to lockdep by Yuyang Du: type cleanups, output cleanups, function return type and othr cleanups all around the place. - A set of percpu ops cleanups and fixes by Peter Zijlstra. - Misc other changes - please see the Git log for more details" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (82 commits) locking/lockdep: increase size of counters for lockdep statistics locking/atomics: Use sed(1) instead of non-standard head(1) option locking/lockdep: Move mark_lock() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING x86/jump_label: Make tp_vec_nr static x86/percpu: Optimize raw_cpu_xchg() x86/percpu, sched/fair: Avoid local_clock() x86/percpu, x86/irq: Relax {set,get}_irq_regs() x86/percpu: Relax smp_processor_id() x86/percpu: Differentiate this_cpu_{}() and __this_cpu_{}() locking/rwsem: Guard against making count negative locking/rwsem: Adaptive disabling of reader optimistic spinning locking/rwsem: Enable time-based spinning on reader-owned rwsem locking/rwsem: Make rwsem->owner an atomic_long_t locking/rwsem: Enable readers spinning on writer locking/rwsem: Clarify usage of owner's nonspinaable bit locking/rwsem: Wake up almost all readers in wait queue locking/rwsem: More optimal RT task handling of null owner locking/rwsem: Always release wait_lock before waking up tasks locking/rwsem: Implement lock handoff to prevent lock starvation locking/rwsem: Make rwsem_spin_on_owner() return owner state ...
524 lines
14 KiB
C
524 lines
14 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Based on arch/arm/include/asm/atomic.h
|
|
*
|
|
* Copyright (C) 1996 Russell King.
|
|
* Copyright (C) 2002 Deep Blue Solutions Ltd.
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
*/
|
|
|
|
#ifndef __ASM_ATOMIC_LSE_H
|
|
#define __ASM_ATOMIC_LSE_H
|
|
|
|
#ifndef __ARM64_IN_ATOMIC_IMPL
|
|
#error "please don't include this file directly"
|
|
#endif
|
|
|
|
#define __LL_SC_ATOMIC(op) __LL_SC_CALL(arch_atomic_##op)
|
|
#define ATOMIC_OP(op, asm_op) \
|
|
static inline void arch_atomic_##op(int i, atomic_t *v) \
|
|
{ \
|
|
register int w0 asm ("w0") = i; \
|
|
register atomic_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \
|
|
" " #asm_op " %w[i], %[v]\n") \
|
|
: [i] "+r" (w0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS); \
|
|
}
|
|
|
|
ATOMIC_OP(andnot, stclr)
|
|
ATOMIC_OP(or, stset)
|
|
ATOMIC_OP(xor, steor)
|
|
ATOMIC_OP(add, stadd)
|
|
|
|
#undef ATOMIC_OP
|
|
|
|
#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \
|
|
static inline int arch_atomic_fetch_##op##name(int i, atomic_t *v) \
|
|
{ \
|
|
register int w0 asm ("w0") = i; \
|
|
register atomic_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC(fetch_##op##name), \
|
|
/* LSE atomics */ \
|
|
" " #asm_op #mb " %w[i], %w[i], %[v]") \
|
|
: [i] "+r" (w0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return w0; \
|
|
}
|
|
|
|
#define ATOMIC_FETCH_OPS(op, asm_op) \
|
|
ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \
|
|
ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \
|
|
ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \
|
|
ATOMIC_FETCH_OP( , al, op, asm_op, "memory")
|
|
|
|
ATOMIC_FETCH_OPS(andnot, ldclr)
|
|
ATOMIC_FETCH_OPS(or, ldset)
|
|
ATOMIC_FETCH_OPS(xor, ldeor)
|
|
ATOMIC_FETCH_OPS(add, ldadd)
|
|
|
|
#undef ATOMIC_FETCH_OP
|
|
#undef ATOMIC_FETCH_OPS
|
|
|
|
#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
|
|
static inline int arch_atomic_add_return##name(int i, atomic_t *v) \
|
|
{ \
|
|
register int w0 asm ("w0") = i; \
|
|
register atomic_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC(add_return##name) \
|
|
__nops(1), \
|
|
/* LSE atomics */ \
|
|
" ldadd" #mb " %w[i], w30, %[v]\n" \
|
|
" add %w[i], %w[i], w30") \
|
|
: [i] "+r" (w0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return w0; \
|
|
}
|
|
|
|
ATOMIC_OP_ADD_RETURN(_relaxed, )
|
|
ATOMIC_OP_ADD_RETURN(_acquire, a, "memory")
|
|
ATOMIC_OP_ADD_RETURN(_release, l, "memory")
|
|
ATOMIC_OP_ADD_RETURN( , al, "memory")
|
|
|
|
#undef ATOMIC_OP_ADD_RETURN
|
|
|
|
static inline void arch_atomic_and(int i, atomic_t *v)
|
|
{
|
|
register int w0 asm ("w0") = i;
|
|
register atomic_t *x1 asm ("x1") = v;
|
|
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN(
|
|
/* LL/SC */
|
|
__LL_SC_ATOMIC(and)
|
|
__nops(1),
|
|
/* LSE atomics */
|
|
" mvn %w[i], %w[i]\n"
|
|
" stclr %w[i], %[v]")
|
|
: [i] "+&r" (w0), [v] "+Q" (v->counter)
|
|
: "r" (x1)
|
|
: __LL_SC_CLOBBERS);
|
|
}
|
|
|
|
#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \
|
|
static inline int arch_atomic_fetch_and##name(int i, atomic_t *v) \
|
|
{ \
|
|
register int w0 asm ("w0") = i; \
|
|
register atomic_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC(fetch_and##name) \
|
|
__nops(1), \
|
|
/* LSE atomics */ \
|
|
" mvn %w[i], %w[i]\n" \
|
|
" ldclr" #mb " %w[i], %w[i], %[v]") \
|
|
: [i] "+&r" (w0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return w0; \
|
|
}
|
|
|
|
ATOMIC_FETCH_OP_AND(_relaxed, )
|
|
ATOMIC_FETCH_OP_AND(_acquire, a, "memory")
|
|
ATOMIC_FETCH_OP_AND(_release, l, "memory")
|
|
ATOMIC_FETCH_OP_AND( , al, "memory")
|
|
|
|
#undef ATOMIC_FETCH_OP_AND
|
|
|
|
static inline void arch_atomic_sub(int i, atomic_t *v)
|
|
{
|
|
register int w0 asm ("w0") = i;
|
|
register atomic_t *x1 asm ("x1") = v;
|
|
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN(
|
|
/* LL/SC */
|
|
__LL_SC_ATOMIC(sub)
|
|
__nops(1),
|
|
/* LSE atomics */
|
|
" neg %w[i], %w[i]\n"
|
|
" stadd %w[i], %[v]")
|
|
: [i] "+&r" (w0), [v] "+Q" (v->counter)
|
|
: "r" (x1)
|
|
: __LL_SC_CLOBBERS);
|
|
}
|
|
|
|
#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \
|
|
static inline int arch_atomic_sub_return##name(int i, atomic_t *v) \
|
|
{ \
|
|
register int w0 asm ("w0") = i; \
|
|
register atomic_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC(sub_return##name) \
|
|
__nops(2), \
|
|
/* LSE atomics */ \
|
|
" neg %w[i], %w[i]\n" \
|
|
" ldadd" #mb " %w[i], w30, %[v]\n" \
|
|
" add %w[i], %w[i], w30") \
|
|
: [i] "+&r" (w0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS , ##cl); \
|
|
\
|
|
return w0; \
|
|
}
|
|
|
|
ATOMIC_OP_SUB_RETURN(_relaxed, )
|
|
ATOMIC_OP_SUB_RETURN(_acquire, a, "memory")
|
|
ATOMIC_OP_SUB_RETURN(_release, l, "memory")
|
|
ATOMIC_OP_SUB_RETURN( , al, "memory")
|
|
|
|
#undef ATOMIC_OP_SUB_RETURN
|
|
|
|
#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \
|
|
static inline int arch_atomic_fetch_sub##name(int i, atomic_t *v) \
|
|
{ \
|
|
register int w0 asm ("w0") = i; \
|
|
register atomic_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC(fetch_sub##name) \
|
|
__nops(1), \
|
|
/* LSE atomics */ \
|
|
" neg %w[i], %w[i]\n" \
|
|
" ldadd" #mb " %w[i], %w[i], %[v]") \
|
|
: [i] "+&r" (w0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return w0; \
|
|
}
|
|
|
|
ATOMIC_FETCH_OP_SUB(_relaxed, )
|
|
ATOMIC_FETCH_OP_SUB(_acquire, a, "memory")
|
|
ATOMIC_FETCH_OP_SUB(_release, l, "memory")
|
|
ATOMIC_FETCH_OP_SUB( , al, "memory")
|
|
|
|
#undef ATOMIC_FETCH_OP_SUB
|
|
#undef __LL_SC_ATOMIC
|
|
|
|
#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(arch_atomic64_##op)
|
|
#define ATOMIC64_OP(op, asm_op) \
|
|
static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
|
|
{ \
|
|
register s64 x0 asm ("x0") = i; \
|
|
register atomic64_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \
|
|
" " #asm_op " %[i], %[v]\n") \
|
|
: [i] "+r" (x0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS); \
|
|
}
|
|
|
|
ATOMIC64_OP(andnot, stclr)
|
|
ATOMIC64_OP(or, stset)
|
|
ATOMIC64_OP(xor, steor)
|
|
ATOMIC64_OP(add, stadd)
|
|
|
|
#undef ATOMIC64_OP
|
|
|
|
#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
|
|
static inline s64 arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
|
|
{ \
|
|
register s64 x0 asm ("x0") = i; \
|
|
register atomic64_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC64(fetch_##op##name), \
|
|
/* LSE atomics */ \
|
|
" " #asm_op #mb " %[i], %[i], %[v]") \
|
|
: [i] "+r" (x0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return x0; \
|
|
}
|
|
|
|
#define ATOMIC64_FETCH_OPS(op, asm_op) \
|
|
ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \
|
|
ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \
|
|
ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \
|
|
ATOMIC64_FETCH_OP( , al, op, asm_op, "memory")
|
|
|
|
ATOMIC64_FETCH_OPS(andnot, ldclr)
|
|
ATOMIC64_FETCH_OPS(or, ldset)
|
|
ATOMIC64_FETCH_OPS(xor, ldeor)
|
|
ATOMIC64_FETCH_OPS(add, ldadd)
|
|
|
|
#undef ATOMIC64_FETCH_OP
|
|
#undef ATOMIC64_FETCH_OPS
|
|
|
|
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
|
|
static inline s64 arch_atomic64_add_return##name(s64 i, atomic64_t *v) \
|
|
{ \
|
|
register s64 x0 asm ("x0") = i; \
|
|
register atomic64_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC64(add_return##name) \
|
|
__nops(1), \
|
|
/* LSE atomics */ \
|
|
" ldadd" #mb " %[i], x30, %[v]\n" \
|
|
" add %[i], %[i], x30") \
|
|
: [i] "+r" (x0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return x0; \
|
|
}
|
|
|
|
ATOMIC64_OP_ADD_RETURN(_relaxed, )
|
|
ATOMIC64_OP_ADD_RETURN(_acquire, a, "memory")
|
|
ATOMIC64_OP_ADD_RETURN(_release, l, "memory")
|
|
ATOMIC64_OP_ADD_RETURN( , al, "memory")
|
|
|
|
#undef ATOMIC64_OP_ADD_RETURN
|
|
|
|
static inline void arch_atomic64_and(s64 i, atomic64_t *v)
|
|
{
|
|
register s64 x0 asm ("x0") = i;
|
|
register atomic64_t *x1 asm ("x1") = v;
|
|
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN(
|
|
/* LL/SC */
|
|
__LL_SC_ATOMIC64(and)
|
|
__nops(1),
|
|
/* LSE atomics */
|
|
" mvn %[i], %[i]\n"
|
|
" stclr %[i], %[v]")
|
|
: [i] "+&r" (x0), [v] "+Q" (v->counter)
|
|
: "r" (x1)
|
|
: __LL_SC_CLOBBERS);
|
|
}
|
|
|
|
#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
|
|
static inline s64 arch_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
|
|
{ \
|
|
register s64 x0 asm ("x0") = i; \
|
|
register atomic64_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC64(fetch_and##name) \
|
|
__nops(1), \
|
|
/* LSE atomics */ \
|
|
" mvn %[i], %[i]\n" \
|
|
" ldclr" #mb " %[i], %[i], %[v]") \
|
|
: [i] "+&r" (x0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return x0; \
|
|
}
|
|
|
|
ATOMIC64_FETCH_OP_AND(_relaxed, )
|
|
ATOMIC64_FETCH_OP_AND(_acquire, a, "memory")
|
|
ATOMIC64_FETCH_OP_AND(_release, l, "memory")
|
|
ATOMIC64_FETCH_OP_AND( , al, "memory")
|
|
|
|
#undef ATOMIC64_FETCH_OP_AND
|
|
|
|
static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
|
|
{
|
|
register s64 x0 asm ("x0") = i;
|
|
register atomic64_t *x1 asm ("x1") = v;
|
|
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN(
|
|
/* LL/SC */
|
|
__LL_SC_ATOMIC64(sub)
|
|
__nops(1),
|
|
/* LSE atomics */
|
|
" neg %[i], %[i]\n"
|
|
" stadd %[i], %[v]")
|
|
: [i] "+&r" (x0), [v] "+Q" (v->counter)
|
|
: "r" (x1)
|
|
: __LL_SC_CLOBBERS);
|
|
}
|
|
|
|
#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
|
|
static inline s64 arch_atomic64_sub_return##name(s64 i, atomic64_t *v) \
|
|
{ \
|
|
register s64 x0 asm ("x0") = i; \
|
|
register atomic64_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC64(sub_return##name) \
|
|
__nops(2), \
|
|
/* LSE atomics */ \
|
|
" neg %[i], %[i]\n" \
|
|
" ldadd" #mb " %[i], x30, %[v]\n" \
|
|
" add %[i], %[i], x30") \
|
|
: [i] "+&r" (x0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return x0; \
|
|
}
|
|
|
|
ATOMIC64_OP_SUB_RETURN(_relaxed, )
|
|
ATOMIC64_OP_SUB_RETURN(_acquire, a, "memory")
|
|
ATOMIC64_OP_SUB_RETURN(_release, l, "memory")
|
|
ATOMIC64_OP_SUB_RETURN( , al, "memory")
|
|
|
|
#undef ATOMIC64_OP_SUB_RETURN
|
|
|
|
#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \
|
|
static inline s64 arch_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
|
|
{ \
|
|
register s64 x0 asm ("x0") = i; \
|
|
register atomic64_t *x1 asm ("x1") = v; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_ATOMIC64(fetch_sub##name) \
|
|
__nops(1), \
|
|
/* LSE atomics */ \
|
|
" neg %[i], %[i]\n" \
|
|
" ldadd" #mb " %[i], %[i], %[v]") \
|
|
: [i] "+&r" (x0), [v] "+Q" (v->counter) \
|
|
: "r" (x1) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return x0; \
|
|
}
|
|
|
|
ATOMIC64_FETCH_OP_SUB(_relaxed, )
|
|
ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory")
|
|
ATOMIC64_FETCH_OP_SUB(_release, l, "memory")
|
|
ATOMIC64_FETCH_OP_SUB( , al, "memory")
|
|
|
|
#undef ATOMIC64_FETCH_OP_SUB
|
|
|
|
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
|
|
{
|
|
register long x0 asm ("x0") = (long)v;
|
|
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN(
|
|
/* LL/SC */
|
|
__LL_SC_ATOMIC64(dec_if_positive)
|
|
__nops(6),
|
|
/* LSE atomics */
|
|
"1: ldr x30, %[v]\n"
|
|
" subs %[ret], x30, #1\n"
|
|
" b.lt 2f\n"
|
|
" casal x30, %[ret], %[v]\n"
|
|
" sub x30, x30, #1\n"
|
|
" sub x30, x30, %[ret]\n"
|
|
" cbnz x30, 1b\n"
|
|
"2:")
|
|
: [ret] "+&r" (x0), [v] "+Q" (v->counter)
|
|
:
|
|
: __LL_SC_CLOBBERS, "cc", "memory");
|
|
|
|
return x0;
|
|
}
|
|
|
|
#undef __LL_SC_ATOMIC64
|
|
|
|
#define __LL_SC_CMPXCHG(op) __LL_SC_CALL(__cmpxchg_case_##op)
|
|
|
|
#define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \
|
|
static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
|
|
u##sz old, \
|
|
u##sz new) \
|
|
{ \
|
|
register unsigned long x0 asm ("x0") = (unsigned long)ptr; \
|
|
register u##sz x1 asm ("x1") = old; \
|
|
register u##sz x2 asm ("x2") = new; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_CMPXCHG(name##sz) \
|
|
__nops(2), \
|
|
/* LSE atomics */ \
|
|
" mov " #w "30, %" #w "[old]\n" \
|
|
" cas" #mb #sfx "\t" #w "30, %" #w "[new], %[v]\n" \
|
|
" mov %" #w "[ret], " #w "30") \
|
|
: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \
|
|
: [old] "r" (x1), [new] "r" (x2) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return x0; \
|
|
}
|
|
|
|
__CMPXCHG_CASE(w, b, , 8, )
|
|
__CMPXCHG_CASE(w, h, , 16, )
|
|
__CMPXCHG_CASE(w, , , 32, )
|
|
__CMPXCHG_CASE(x, , , 64, )
|
|
__CMPXCHG_CASE(w, b, acq_, 8, a, "memory")
|
|
__CMPXCHG_CASE(w, h, acq_, 16, a, "memory")
|
|
__CMPXCHG_CASE(w, , acq_, 32, a, "memory")
|
|
__CMPXCHG_CASE(x, , acq_, 64, a, "memory")
|
|
__CMPXCHG_CASE(w, b, rel_, 8, l, "memory")
|
|
__CMPXCHG_CASE(w, h, rel_, 16, l, "memory")
|
|
__CMPXCHG_CASE(w, , rel_, 32, l, "memory")
|
|
__CMPXCHG_CASE(x, , rel_, 64, l, "memory")
|
|
__CMPXCHG_CASE(w, b, mb_, 8, al, "memory")
|
|
__CMPXCHG_CASE(w, h, mb_, 16, al, "memory")
|
|
__CMPXCHG_CASE(w, , mb_, 32, al, "memory")
|
|
__CMPXCHG_CASE(x, , mb_, 64, al, "memory")
|
|
|
|
#undef __LL_SC_CMPXCHG
|
|
#undef __CMPXCHG_CASE
|
|
|
|
#define __LL_SC_CMPXCHG_DBL(op) __LL_SC_CALL(__cmpxchg_double##op)
|
|
|
|
#define __CMPXCHG_DBL(name, mb, cl...) \
|
|
static inline long __cmpxchg_double##name(unsigned long old1, \
|
|
unsigned long old2, \
|
|
unsigned long new1, \
|
|
unsigned long new2, \
|
|
volatile void *ptr) \
|
|
{ \
|
|
unsigned long oldval1 = old1; \
|
|
unsigned long oldval2 = old2; \
|
|
register unsigned long x0 asm ("x0") = old1; \
|
|
register unsigned long x1 asm ("x1") = old2; \
|
|
register unsigned long x2 asm ("x2") = new1; \
|
|
register unsigned long x3 asm ("x3") = new2; \
|
|
register unsigned long x4 asm ("x4") = (unsigned long)ptr; \
|
|
\
|
|
asm volatile(ARM64_LSE_ATOMIC_INSN( \
|
|
/* LL/SC */ \
|
|
__LL_SC_CMPXCHG_DBL(name) \
|
|
__nops(3), \
|
|
/* LSE atomics */ \
|
|
" casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
|
|
" eor %[old1], %[old1], %[oldval1]\n" \
|
|
" eor %[old2], %[old2], %[oldval2]\n" \
|
|
" orr %[old1], %[old1], %[old2]") \
|
|
: [old1] "+&r" (x0), [old2] "+&r" (x1), \
|
|
[v] "+Q" (*(unsigned long *)ptr) \
|
|
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
|
|
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
|
|
: __LL_SC_CLOBBERS, ##cl); \
|
|
\
|
|
return x0; \
|
|
}
|
|
|
|
__CMPXCHG_DBL( , )
|
|
__CMPXCHG_DBL(_mb, al, "memory")
|
|
|
|
#undef __LL_SC_CMPXCHG_DBL
|
|
#undef __CMPXCHG_DBL
|
|
|
|
#endif /* __ASM_ATOMIC_LSE_H */
|