linux_dsm_epyc7002/include/asm-generic/atomic-long.h
Linus Torvalds 105ff3cbf2 atomic: remove all traces of READ_ONCE_CTRL() and atomic*_read_ctrl()
This seems to be a mis-reading of how alpha memory ordering works, and
is not backed up by the alpha architecture manual.  The helper functions
don't do anything special on any other architectures, and the arguments
that support them being safe on other architectures also argue that they
are safe on alpha.

Basically, the "control dependency" is between a previous read and a
subsequent write that is dependent on the value read.  Even if the
subsequent write is actually done speculatively, there is no way that
such a speculative write could be made visible to other cpu's until it
has been committed, which requires validating the speculation.

Note that most weakely ordered architectures (very much including alpha)
do not guarantee any ordering relationship between two loads that depend
on each other on a control dependency:

    read A
    if (val == 1)
        read B

because the conditional may be predicted, and the "read B" may be
speculatively moved up to before reading the value A.  So we require the
user to insert a smp_rmb() between the two accesses to be correct:

    read A;
    if (A == 1)
        smp_rmb()
        read B

Alpha is further special in that it can break that ordering even if the
*address* of B depends on the read of A, because the cacheline that is
read later may be stale unless you have a memory barrier in between the
pointer read and the read of the value behind a pointer:

    read ptr
    read offset(ptr)

whereas all other weakly ordered architectures guarantee that the data
dependency (as opposed to just a control dependency) will order the two
accesses.  As a result, alpha needs a "smp_read_barrier_depends()" in
between those two reads for them to be ordered.

The coontrol dependency that "READ_ONCE_CTRL()" and "atomic_read_ctrl()"
had was a control dependency to a subsequent *write*, however, and
nobody can finalize such a subsequent write without having actually done
the read.  And were you to write such a value to a "stale" cacheline
(the way the unordered reads came to be), that would seem to lose the
write entirely.

So the things that make alpha able to re-order reads even more
aggressively than other weak architectures do not seem to be relevant
for a subsequent write.  Alpha memory ordering may be strange, but
there's no real indication that it is *that* strange.

Also, the alpha architecture reference manual very explicitly talks
about the definition of "Dependence Constraints" in section 5.6.1.7,
where a preceding read dominates a subsequent write.

Such a dependence constraint admittedly does not impose a BEFORE (alpha
architecture term for globally visible ordering), but it does guarantee
that there can be no "causal loop".  I don't see how you could avoid
such a loop if another cpu could see the stored value and then impact
the value of the first read.  Put another way: the read and the write
could not be seen as being out of order wrt other cpus.

So I do not see how these "x_ctrl()" functions can currently be necessary.

I may have to eat my words at some point, but in the absense of clear
proof that alpha actually needs this, or indeed even an explanation of
how alpha could _possibly_ need it, I do not believe these functions are
called for.

And if it turns out that alpha really _does_ need a barrier for this
case, that barrier still should not be "smp_read_barrier_depends()".
We'd have to make up some new speciality barrier just for alpha, along
with the documentation for why it really is necessary.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul E McKenney <paulmck@us.ibm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-03 17:22:17 -08:00

191 lines
5.1 KiB
C

#ifndef _ASM_GENERIC_ATOMIC_LONG_H
#define _ASM_GENERIC_ATOMIC_LONG_H
/*
* Copyright (C) 2005 Silicon Graphics, Inc.
* Christoph Lameter
*
* Allows to provide arch independent atomic definitions without the need to
* edit all arch specific atomic.h files.
*/
#include <asm/types.h>
/*
* Suppport for atomic_long_t
*
* Casts for parameters are avoided for existing atomic functions in order to
* avoid issues with cast-as-lval under gcc 4.x and other limitations that the
* macros of a platform may have.
*/
#if BITS_PER_LONG == 64
typedef atomic64_t atomic_long_t;
#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i)
#define ATOMIC_LONG_PFX(x) atomic64 ## x
#else
typedef atomic_t atomic_long_t;
#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i)
#define ATOMIC_LONG_PFX(x) atomic ## x
#endif
#define ATOMIC_LONG_READ_OP(mo) \
static inline long atomic_long_read##mo(const atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
return (long)ATOMIC_LONG_PFX(_read##mo)(v); \
}
ATOMIC_LONG_READ_OP()
ATOMIC_LONG_READ_OP(_acquire)
#undef ATOMIC_LONG_READ_OP
#define ATOMIC_LONG_SET_OP(mo) \
static inline void atomic_long_set##mo(atomic_long_t *l, long i) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
ATOMIC_LONG_PFX(_set##mo)(v, i); \
}
ATOMIC_LONG_SET_OP()
ATOMIC_LONG_SET_OP(_release)
#undef ATOMIC_LONG_SET_OP
#define ATOMIC_LONG_ADD_SUB_OP(op, mo) \
static inline long \
atomic_long_##op##_return##mo(long i, atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(i, v); \
}
ATOMIC_LONG_ADD_SUB_OP(add,)
ATOMIC_LONG_ADD_SUB_OP(add, _relaxed)
ATOMIC_LONG_ADD_SUB_OP(add, _acquire)
ATOMIC_LONG_ADD_SUB_OP(add, _release)
ATOMIC_LONG_ADD_SUB_OP(sub,)
ATOMIC_LONG_ADD_SUB_OP(sub, _relaxed)
ATOMIC_LONG_ADD_SUB_OP(sub, _acquire)
ATOMIC_LONG_ADD_SUB_OP(sub, _release)
#undef ATOMIC_LONG_ADD_SUB_OP
#define atomic_long_cmpxchg_relaxed(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
(old), (new)))
#define atomic_long_cmpxchg_acquire(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
(old), (new)))
#define atomic_long_cmpxchg_release(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
(old), (new)))
#define atomic_long_cmpxchg(l, old, new) \
(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
#define atomic_long_xchg_relaxed(v, new) \
(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
#define atomic_long_xchg_acquire(v, new) \
(ATOMIC_LONG_PFX(_xchg_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
#define atomic_long_xchg_release(v, new) \
(ATOMIC_LONG_PFX(_xchg_release)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
#define atomic_long_xchg(v, new) \
(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
static inline void atomic_long_inc(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
ATOMIC_LONG_PFX(_inc)(v);
}
static inline void atomic_long_dec(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
ATOMIC_LONG_PFX(_dec)(v);
}
#define ATOMIC_LONG_OP(op) \
static inline void \
atomic_long_##op(long i, atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
ATOMIC_LONG_PFX(_##op)(i, v); \
}
ATOMIC_LONG_OP(add)
ATOMIC_LONG_OP(sub)
ATOMIC_LONG_OP(and)
ATOMIC_LONG_OP(or)
ATOMIC_LONG_OP(xor)
ATOMIC_LONG_OP(andnot)
#undef ATOMIC_LONG_OP
static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return ATOMIC_LONG_PFX(_sub_and_test)(i, v);
}
static inline int atomic_long_dec_and_test(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return ATOMIC_LONG_PFX(_dec_and_test)(v);
}
static inline int atomic_long_inc_and_test(atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return ATOMIC_LONG_PFX(_inc_and_test)(v);
}
static inline int atomic_long_add_negative(long i, atomic_long_t *l)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return ATOMIC_LONG_PFX(_add_negative)(i, v);
}
#define ATOMIC_LONG_INC_DEC_OP(op, mo) \
static inline long \
atomic_long_##op##_return##mo(atomic_long_t *l) \
{ \
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
\
return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(v); \
}
ATOMIC_LONG_INC_DEC_OP(inc,)
ATOMIC_LONG_INC_DEC_OP(inc, _relaxed)
ATOMIC_LONG_INC_DEC_OP(inc, _acquire)
ATOMIC_LONG_INC_DEC_OP(inc, _release)
ATOMIC_LONG_INC_DEC_OP(dec,)
ATOMIC_LONG_INC_DEC_OP(dec, _relaxed)
ATOMIC_LONG_INC_DEC_OP(dec, _acquire)
ATOMIC_LONG_INC_DEC_OP(dec, _release)
#undef ATOMIC_LONG_INC_DEC_OP
static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
{
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
return (long)ATOMIC_LONG_PFX(_add_unless)(v, a, u);
}
#define atomic_long_inc_not_zero(l) \
ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
#endif /* _ASM_GENERIC_ATOMIC_LONG_H */