mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-24 02:18:18 +07:00
c427f69564
Use try_cmpxchg to avoid the pointless TEST instruction.. And add the (missing) atomic_long_try_cmpxchg*() wrappery. On x86_64 this gives: 0000000000000710 <mutex_lock>: 0000000000000710 <mutex_lock>: 710: 65 48 8b 14 25 00 00 mov %gs:0x0,%rdx 710: 65 48 8b 14 25 00 00 mov %gs:0x0,%rdx 717: 00 00 717: 00 00 715: R_X86_64_32S current_task 715: R_X86_64_32S current_task 719: 31 c0 xor %eax,%eax 719: 31 c0 xor %eax,%eax 71b: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi) 71b: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi) 720: 48 85 c0 test %rax,%rax 720: 75 02 jne 724 <mutex_lock+0x14> 723: 75 02 jne 727 <mutex_lock+0x17> 722: f3 c3 repz retq 725: f3 c3 repz retq 724: eb da jmp 700 <__mutex_lock_slowpath> 727: eb d7 jmp 700 <__mutex_lock_slowpath> 726: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 729: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 72d: 00 00 00 On ARM64 this gives: 000000000000638 <mutex_lock>: 0000000000000638 <mutex_lock>: 638: d5384101 mrs x1, sp_el0 638: d5384101 mrs x1, sp_el0 63c: d2800002 mov x2, #0x0 63c: d2800002 mov x2, #0x0 640: f9800011 prfm pstl1strm, [x0] 640: f9800011 prfm pstl1strm, [x0] 644: c85ffc03 ldaxr x3, [x0] 644: c85ffc03 ldaxr x3, [x0] 648: ca020064 eor x4, x3, x2 648: ca020064 eor x4, x3, x2 64c: b5000064 cbnz x4, 658 <mutex_lock+0x20> 64c: b5000064 cbnz x4, 658 <mutex_lock+0x20> 650: c8047c01 stxr w4, x1, [x0] 650: c8047c01 stxr w4, x1, [x0] 654: 35ffff84 cbnz w4, 644 <mutex_lock+0xc> 654: 35ffff84 cbnz w4, 644 <mutex_lock+0xc> 658: b40000c3 cbz x3, 670 <mutex_lock+0x38> 658: b5000043 cbnz x3, 660 <mutex_lock+0x28> 65c: a9bf7bfd stp x29, x30, [sp,#-16]! 65c: d65f03c0 ret 660: 910003fd mov x29, sp 660: a9bf7bfd stp x29, x30, [sp,#-16]! 664: 97ffffef bl 620 <__mutex_lock_slowpath> 664: 910003fd mov x29, sp 668: a8c17bfd ldp x29, x30, [sp],#16 668: 97ffffee bl 620 <__mutex_lock_slowpath> 66c: d65f03c0 ret 66c: a8c17bfd ldp x29, x30, [sp],#16 670: d65f03c0 ret 670: d65f03c0 ret Reported-by: Matthew Wilcox <mawilcox@microsoft.com> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
270 lines
7.9 KiB
C
270 lines
7.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_GENERIC_ATOMIC_LONG_H
|
|
#define _ASM_GENERIC_ATOMIC_LONG_H
|
|
/*
|
|
* Copyright (C) 2005 Silicon Graphics, Inc.
|
|
* Christoph Lameter
|
|
*
|
|
* Allows to provide arch independent atomic definitions without the need to
|
|
* edit all arch specific atomic.h files.
|
|
*/
|
|
|
|
#include <asm/types.h>
|
|
|
|
/*
|
|
* Suppport for atomic_long_t
|
|
*
|
|
* Casts for parameters are avoided for existing atomic functions in order to
|
|
* avoid issues with cast-as-lval under gcc 4.x and other limitations that the
|
|
* macros of a platform may have.
|
|
*/
|
|
|
|
#if BITS_PER_LONG == 64
|
|
|
|
typedef atomic64_t atomic_long_t;
|
|
|
|
#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i)
|
|
#define ATOMIC_LONG_PFX(x) atomic64 ## x
|
|
#define ATOMIC_LONG_TYPE s64
|
|
|
|
#else
|
|
|
|
typedef atomic_t atomic_long_t;
|
|
|
|
#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i)
|
|
#define ATOMIC_LONG_PFX(x) atomic ## x
|
|
#define ATOMIC_LONG_TYPE int
|
|
|
|
#endif
|
|
|
|
#define ATOMIC_LONG_READ_OP(mo) \
|
|
static inline long atomic_long_read##mo(const atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
return (long)ATOMIC_LONG_PFX(_read##mo)(v); \
|
|
}
|
|
ATOMIC_LONG_READ_OP()
|
|
ATOMIC_LONG_READ_OP(_acquire)
|
|
|
|
#undef ATOMIC_LONG_READ_OP
|
|
|
|
#define ATOMIC_LONG_SET_OP(mo) \
|
|
static inline void atomic_long_set##mo(atomic_long_t *l, long i) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
ATOMIC_LONG_PFX(_set##mo)(v, i); \
|
|
}
|
|
ATOMIC_LONG_SET_OP()
|
|
ATOMIC_LONG_SET_OP(_release)
|
|
|
|
#undef ATOMIC_LONG_SET_OP
|
|
|
|
#define ATOMIC_LONG_ADD_SUB_OP(op, mo) \
|
|
static inline long \
|
|
atomic_long_##op##_return##mo(long i, atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(i, v); \
|
|
}
|
|
ATOMIC_LONG_ADD_SUB_OP(add,)
|
|
ATOMIC_LONG_ADD_SUB_OP(add, _relaxed)
|
|
ATOMIC_LONG_ADD_SUB_OP(add, _acquire)
|
|
ATOMIC_LONG_ADD_SUB_OP(add, _release)
|
|
ATOMIC_LONG_ADD_SUB_OP(sub,)
|
|
ATOMIC_LONG_ADD_SUB_OP(sub, _relaxed)
|
|
ATOMIC_LONG_ADD_SUB_OP(sub, _acquire)
|
|
ATOMIC_LONG_ADD_SUB_OP(sub, _release)
|
|
|
|
#undef ATOMIC_LONG_ADD_SUB_OP
|
|
|
|
#define atomic_long_cmpxchg_relaxed(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(old), (new)))
|
|
#define atomic_long_cmpxchg_acquire(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(old), (new)))
|
|
#define atomic_long_cmpxchg_release(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(old), (new)))
|
|
#define atomic_long_cmpxchg(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
|
|
|
|
|
|
#define atomic_long_try_cmpxchg_relaxed(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_try_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
|
|
#define atomic_long_try_cmpxchg_acquire(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_try_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
|
|
#define atomic_long_try_cmpxchg_release(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_try_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
|
|
#define atomic_long_try_cmpxchg(l, old, new) \
|
|
(ATOMIC_LONG_PFX(_try_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), \
|
|
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
|
|
|
|
|
|
#define atomic_long_xchg_relaxed(v, new) \
|
|
(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
|
|
#define atomic_long_xchg_acquire(v, new) \
|
|
(ATOMIC_LONG_PFX(_xchg_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
|
|
#define atomic_long_xchg_release(v, new) \
|
|
(ATOMIC_LONG_PFX(_xchg_release)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
|
|
#define atomic_long_xchg(v, new) \
|
|
(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
|
|
|
|
static __always_inline void atomic_long_inc(atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
ATOMIC_LONG_PFX(_inc)(v);
|
|
}
|
|
|
|
static __always_inline void atomic_long_dec(atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
ATOMIC_LONG_PFX(_dec)(v);
|
|
}
|
|
|
|
#define ATOMIC_LONG_FETCH_OP(op, mo) \
|
|
static inline long \
|
|
atomic_long_fetch_##op##mo(long i, atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(i, v); \
|
|
}
|
|
|
|
ATOMIC_LONG_FETCH_OP(add, )
|
|
ATOMIC_LONG_FETCH_OP(add, _relaxed)
|
|
ATOMIC_LONG_FETCH_OP(add, _acquire)
|
|
ATOMIC_LONG_FETCH_OP(add, _release)
|
|
ATOMIC_LONG_FETCH_OP(sub, )
|
|
ATOMIC_LONG_FETCH_OP(sub, _relaxed)
|
|
ATOMIC_LONG_FETCH_OP(sub, _acquire)
|
|
ATOMIC_LONG_FETCH_OP(sub, _release)
|
|
ATOMIC_LONG_FETCH_OP(and, )
|
|
ATOMIC_LONG_FETCH_OP(and, _relaxed)
|
|
ATOMIC_LONG_FETCH_OP(and, _acquire)
|
|
ATOMIC_LONG_FETCH_OP(and, _release)
|
|
ATOMIC_LONG_FETCH_OP(andnot, )
|
|
ATOMIC_LONG_FETCH_OP(andnot, _relaxed)
|
|
ATOMIC_LONG_FETCH_OP(andnot, _acquire)
|
|
ATOMIC_LONG_FETCH_OP(andnot, _release)
|
|
ATOMIC_LONG_FETCH_OP(or, )
|
|
ATOMIC_LONG_FETCH_OP(or, _relaxed)
|
|
ATOMIC_LONG_FETCH_OP(or, _acquire)
|
|
ATOMIC_LONG_FETCH_OP(or, _release)
|
|
ATOMIC_LONG_FETCH_OP(xor, )
|
|
ATOMIC_LONG_FETCH_OP(xor, _relaxed)
|
|
ATOMIC_LONG_FETCH_OP(xor, _acquire)
|
|
ATOMIC_LONG_FETCH_OP(xor, _release)
|
|
|
|
#undef ATOMIC_LONG_FETCH_OP
|
|
|
|
#define ATOMIC_LONG_FETCH_INC_DEC_OP(op, mo) \
|
|
static inline long \
|
|
atomic_long_fetch_##op##mo(atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(v); \
|
|
}
|
|
|
|
ATOMIC_LONG_FETCH_INC_DEC_OP(inc,)
|
|
ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _relaxed)
|
|
ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _acquire)
|
|
ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _release)
|
|
ATOMIC_LONG_FETCH_INC_DEC_OP(dec,)
|
|
ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _relaxed)
|
|
ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _acquire)
|
|
ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _release)
|
|
|
|
#undef ATOMIC_LONG_FETCH_INC_DEC_OP
|
|
|
|
#define ATOMIC_LONG_OP(op) \
|
|
static __always_inline void \
|
|
atomic_long_##op(long i, atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
ATOMIC_LONG_PFX(_##op)(i, v); \
|
|
}
|
|
|
|
ATOMIC_LONG_OP(add)
|
|
ATOMIC_LONG_OP(sub)
|
|
ATOMIC_LONG_OP(and)
|
|
ATOMIC_LONG_OP(andnot)
|
|
ATOMIC_LONG_OP(or)
|
|
ATOMIC_LONG_OP(xor)
|
|
|
|
#undef ATOMIC_LONG_OP
|
|
|
|
static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return ATOMIC_LONG_PFX(_sub_and_test)(i, v);
|
|
}
|
|
|
|
static inline int atomic_long_dec_and_test(atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return ATOMIC_LONG_PFX(_dec_and_test)(v);
|
|
}
|
|
|
|
static inline int atomic_long_inc_and_test(atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return ATOMIC_LONG_PFX(_inc_and_test)(v);
|
|
}
|
|
|
|
static inline int atomic_long_add_negative(long i, atomic_long_t *l)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return ATOMIC_LONG_PFX(_add_negative)(i, v);
|
|
}
|
|
|
|
#define ATOMIC_LONG_INC_DEC_OP(op, mo) \
|
|
static inline long \
|
|
atomic_long_##op##_return##mo(atomic_long_t *l) \
|
|
{ \
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \
|
|
\
|
|
return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(v); \
|
|
}
|
|
ATOMIC_LONG_INC_DEC_OP(inc,)
|
|
ATOMIC_LONG_INC_DEC_OP(inc, _relaxed)
|
|
ATOMIC_LONG_INC_DEC_OP(inc, _acquire)
|
|
ATOMIC_LONG_INC_DEC_OP(inc, _release)
|
|
ATOMIC_LONG_INC_DEC_OP(dec,)
|
|
ATOMIC_LONG_INC_DEC_OP(dec, _relaxed)
|
|
ATOMIC_LONG_INC_DEC_OP(dec, _acquire)
|
|
ATOMIC_LONG_INC_DEC_OP(dec, _release)
|
|
|
|
#undef ATOMIC_LONG_INC_DEC_OP
|
|
|
|
static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
|
|
{
|
|
ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
|
|
|
|
return (long)ATOMIC_LONG_PFX(_add_unless)(v, a, u);
|
|
}
|
|
|
|
#define atomic_long_inc_not_zero(l) \
|
|
ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
|
|
|
|
#define atomic_long_cond_read_relaxed(v, c) \
|
|
ATOMIC_LONG_PFX(_cond_read_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (c))
|
|
#define atomic_long_cond_read_acquire(v, c) \
|
|
ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c))
|
|
|
|
#endif /* _ASM_GENERIC_ATOMIC_LONG_H */
|