mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-11 21:46:46 +07:00
105ff3cbf2
This seems to be a mis-reading of how alpha memory ordering works, and is not backed up by the alpha architecture manual. The helper functions don't do anything special on any other architectures, and the arguments that support them being safe on other architectures also argue that they are safe on alpha. Basically, the "control dependency" is between a previous read and a subsequent write that is dependent on the value read. Even if the subsequent write is actually done speculatively, there is no way that such a speculative write could be made visible to other cpu's until it has been committed, which requires validating the speculation. Note that most weakely ordered architectures (very much including alpha) do not guarantee any ordering relationship between two loads that depend on each other on a control dependency: read A if (val == 1) read B because the conditional may be predicted, and the "read B" may be speculatively moved up to before reading the value A. So we require the user to insert a smp_rmb() between the two accesses to be correct: read A; if (A == 1) smp_rmb() read B Alpha is further special in that it can break that ordering even if the *address* of B depends on the read of A, because the cacheline that is read later may be stale unless you have a memory barrier in between the pointer read and the read of the value behind a pointer: read ptr read offset(ptr) whereas all other weakly ordered architectures guarantee that the data dependency (as opposed to just a control dependency) will order the two accesses. As a result, alpha needs a "smp_read_barrier_depends()" in between those two reads for them to be ordered. The coontrol dependency that "READ_ONCE_CTRL()" and "atomic_read_ctrl()" had was a control dependency to a subsequent *write*, however, and nobody can finalize such a subsequent write without having actually done the read. And were you to write such a value to a "stale" cacheline (the way the unordered reads came to be), that would seem to lose the write entirely. So the things that make alpha able to re-order reads even more aggressively than other weak architectures do not seem to be relevant for a subsequent write. Alpha memory ordering may be strange, but there's no real indication that it is *that* strange. Also, the alpha architecture reference manual very explicitly talks about the definition of "Dependence Constraints" in section 5.6.1.7, where a preceding read dominates a subsequent write. Such a dependence constraint admittedly does not impose a BEFORE (alpha architecture term for globally visible ordering), but it does guarantee that there can be no "causal loop". I don't see how you could avoid such a loop if another cpu could see the stored value and then impact the value of the first read. Put another way: the read and the write could not be seen as being out of order wrt other cpus. So I do not see how these "x_ctrl()" functions can currently be necessary. I may have to eat my words at some point, but in the absense of clear proof that alpha actually needs this, or indeed even an explanation of how alpha could _possibly_ need it, I do not believe these functions are called for. And if it turns out that alpha really _does_ need a barrier for this case, that barrier still should not be "smp_read_barrier_depends()". We'd have to make up some new speciality barrier just for alpha, along with the documentation for why it really is necessary. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul E McKenney <paulmck@us.ibm.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
191 lines
5.1 KiB
C
191 lines
5.1 KiB
C
#ifndef _ASM_GENERIC_ATOMIC_LONG_H
|
|
#define _ASM_GENERIC_ATOMIC_LONG_H
|
|
/*
|
|
* Copyright (C) 2005 Silicon Graphics, Inc.
|
|
* Christoph Lameter
|
|
*
|
|
* Allows to provide arch independent atomic definitions without the need to
|
|
* edit all arch specific atomic.h files.
|
|
*/
|
|
|
|
#include <asm/types.h>
|
|
|
|
/*
|
|
* Suppport for atomic_long_t
|
|
*
|
|
* Casts for parameters are avoided for existing atomic functions in order to
|
|
* avoid issues with cast-as-lval under gcc 4.x and other limitations that the
|
|
* macros of a platform may have.
|
|
*/
|
|
|
|
#if BITS_PER_LONG == 64
|
|
|
|
typedef atomic64_t atomic_long_t;
|
|
|
|
#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i)
|
|
#define ATOMIC_LONG_PFX(x) atomic64 ## x
|
|
|
|
#else
|
|
|
|
typedef atomic_t atomic_long_t;
|
|
|
|
#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i)
|
|
#define ATOMIC_LONG_PFX(x) atomic ## x
|
|
|
|
#endif
|
|
|
|
#define ATOMIC_LONG_READ_OP(mo) \
|
|
static inline long atomic_long_read##mo(const atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
return (long)ATOMIC_LONG_PFX(_read##mo)(v); \
|
|
}
|
|
ATOMIC_LONG_READ_OP()
|
|
ATOMIC_LONG_READ_OP(_acquire)
|
|
|
|
#undef ATOMIC_LONG_READ_OP
|
|
|
|
#define ATOMIC_LONG_SET_OP(mo) \
|
|
static inline void atomic_long_set##mo(atomic_long_t *l, long i) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
ATOMIC_LONG_PFX(_set##mo)(v, i); \
|
|
}
|
|
ATOMIC_LONG_SET_OP()
|
|
ATOMIC_LONG_SET_OP(_release)
|
|
|
|
#undef ATOMIC_LONG_SET_OP
|
|
|
|
#define ATOMIC_LONG_ADD_SUB_OP(op, mo) \
|
|
static inline long \
|
|
atomic_long_##op##_return##mo(long i, atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(i, v); \
|
|
}
|
|
ATOMIC_LONG_ADD_SUB_OP(add,)
|
|
ATOMIC_LONG_ADD_SUB_OP(add, _relaxed)
|
|
ATOMIC_LONG_ADD_SUB_OP(add, _acquire)
|
|
ATOMIC_LONG_ADD_SUB_OP(add, _release)
|
|
ATOMIC_LONG_ADD_SUB_OP(sub,)
|
|
ATOMIC_LONG_ADD_SUB_OP(sub, _relaxed)
|
|
ATOMIC_LONG_ADD_SUB_OP(sub, _acquire)
|
|
ATOMIC_LONG_ADD_SUB_OP(sub, _release)
|
|
|
|
#undef ATOMIC_LONG_ADD_SUB_OP
|
|
|
|
#define atomic_long_cmpxchg_relaxed(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(old), (new)))
|
|
#define atomic_long_cmpxchg_acquire(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(old), (new)))
|
|
#define atomic_long_cmpxchg_release(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(old), (new)))
|
|
#define atomic_long_cmpxchg(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
|
|
|
|
#define atomic_long_xchg_relaxed(v, new) \
|
|
(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
|
|
#define atomic_long_xchg_acquire(v, new) \
|
|
(ATOMIC_LONG_PFX(_xchg_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
|
|
#define atomic_long_xchg_release(v, new) \
|
|
(ATOMIC_LONG_PFX(_xchg_release)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
|
|
#define atomic_long_xchg(v, new) \
|
|
(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
|
|
|
|
static inline void atomic_long_inc(atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
ATOMIC_LONG_PFX(_inc)(v);
|
|
}
|
|
|
|
static inline void atomic_long_dec(atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
ATOMIC_LONG_PFX(_dec)(v);
|
|
}
|
|
|
|
#define ATOMIC_LONG_OP(op) \
|
|
static inline void \
|
|
atomic_long_##op(long i, atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
ATOMIC_LONG_PFX(_##op)(i, v); \
|
|
}
|
|
|
|
ATOMIC_LONG_OP(add)
|
|
ATOMIC_LONG_OP(sub)
|
|
ATOMIC_LONG_OP(and)
|
|
ATOMIC_LONG_OP(or)
|
|
ATOMIC_LONG_OP(xor)
|
|
ATOMIC_LONG_OP(andnot)
|
|
|
|
#undef ATOMIC_LONG_OP
|
|
|
|
static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return ATOMIC_LONG_PFX(_sub_and_test)(i, v);
|
|
}
|
|
|
|
static inline int atomic_long_dec_and_test(atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return ATOMIC_LONG_PFX(_dec_and_test)(v);
|
|
}
|
|
|
|
static inline int atomic_long_inc_and_test(atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return ATOMIC_LONG_PFX(_inc_and_test)(v);
|
|
}
|
|
|
|
static inline int atomic_long_add_negative(long i, atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return ATOMIC_LONG_PFX(_add_negative)(i, v);
|
|
}
|
|
|
|
#define ATOMIC_LONG_INC_DEC_OP(op, mo) \
|
|
static inline long \
|
|
atomic_long_##op##_return##mo(atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(v); \
|
|
}
|
|
ATOMIC_LONG_INC_DEC_OP(inc,)
|
|
ATOMIC_LONG_INC_DEC_OP(inc, _relaxed)
|
|
ATOMIC_LONG_INC_DEC_OP(inc, _acquire)
|
|
ATOMIC_LONG_INC_DEC_OP(inc, _release)
|
|
ATOMIC_LONG_INC_DEC_OP(dec,)
|
|
ATOMIC_LONG_INC_DEC_OP(dec, _relaxed)
|
|
ATOMIC_LONG_INC_DEC_OP(dec, _acquire)
|
|
ATOMIC_LONG_INC_DEC_OP(dec, _release)
|
|
|
|
#undef ATOMIC_LONG_INC_DEC_OP
|
|
|
|
static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return (long)ATOMIC_LONG_PFX(_add_unless)(v, a, u);
|
|
}
|
|
|
|
#define atomic_long_inc_not_zero(l) \
|
|
ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
|
|
|
|
#endif /* _ASM_GENERIC_ATOMIC_LONG_H */
|