linux_dsm_epyc7002/arch/arm/include/asm/spinlock.h
Will Deacon 15e7e5c1eb ARM: 7749/1: spinlock: retry trylock operation if strex fails on free lock
An exclusive store instruction may fail for reasons other than lock
contention (e.g. a cache eviction during the critical section) so, in
line with other architectures using similar exclusive instructions
(alpha, mips, powerpc), retry the trylock operation if the lock appears
to be free but the strex reported failure.

Reported-by: Tony Thompson <anthony.thompson@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2013-06-17 09:27:04 +01:00

282 lines
5.9 KiB
C

#ifndef __ASM_SPINLOCK_H
#define __ASM_SPINLOCK_H
#if __LINUX_ARM_ARCH__ < 6
#error SMP not supported on pre-ARMv6 CPUs
#endif
#include <asm/processor.h>
/*
* sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K
* extensions, so when running on UP, we have to patch these instructions away.
*/
#define ALT_SMP(smp, up) \
"9998: " smp "\n" \
" .pushsection \".alt.smp.init\", \"a\"\n" \
" .long 9998b\n" \
" " up "\n" \
" .popsection\n"
#ifdef CONFIG_THUMB2_KERNEL
#define SEV ALT_SMP("sev.w", "nop.w")
/*
* For Thumb-2, special care is needed to ensure that the conditional WFE
* instruction really does assemble to exactly 4 bytes (as required by
* the SMP_ON_UP fixup code). By itself "wfene" might cause the
* assembler to insert a extra (16-bit) IT instruction, depending on the
* presence or absence of neighbouring conditional instructions.
*
* To avoid this unpredictableness, an approprite IT is inserted explicitly:
* the assembler won't change IT instructions which are explicitly present
* in the input.
*/
#define WFE(cond) ALT_SMP( \
"it " cond "\n\t" \
"wfe" cond ".n", \
\
"nop.w" \
)
#else
#define SEV ALT_SMP("sev", "nop")
#define WFE(cond) ALT_SMP("wfe" cond, "nop")
#endif
static inline void dsb_sev(void)
{
#if __LINUX_ARM_ARCH__ >= 7
__asm__ __volatile__ (
"dsb\n"
SEV
);
#else
__asm__ __volatile__ (
"mcr p15, 0, %0, c7, c10, 4\n"
SEV
: : "r" (0)
);
#endif
}
/*
* ARMv6 ticket-based spin-locking.
*
* A memory barrier is required after we get a lock, and before we
* release it, because V6 CPUs are assumed to have weakly ordered
* memory.
*/
#define arch_spin_unlock_wait(lock) \
do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned long tmp;
u32 newval;
arch_spinlock_t lockval;
__asm__ __volatile__(
"1: ldrex %0, [%3]\n"
" add %1, %0, %4\n"
" strex %2, %1, [%3]\n"
" teq %2, #0\n"
" bne 1b"
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
while (lockval.tickets.next != lockval.tickets.owner) {
wfe();
lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
}
smp_mb();
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned long contended, res;
u32 slock;
do {
__asm__ __volatile__(
" ldrex %0, [%3]\n"
" mov %2, #0\n"
" subs %1, %0, %0, ror #16\n"
" addeq %0, %0, %4\n"
" strexeq %2, %0, [%3]"
: "=&r" (slock), "=&r" (contended), "=r" (res)
: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
} while (res);
if (!contended) {
smp_mb();
return 1;
} else {
return 0;
}
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_mb();
lock->tickets.owner++;
dsb_sev();
}
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
return tickets.owner != tickets.next;
}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
return (tickets.next - tickets.owner) > 1;
}
#define arch_spin_is_contended arch_spin_is_contended
/*
* RWLOCKS
*
*
* Write locks are easy - we just set bit 31. When unlocking, we can
* just write zero since the lock is exclusively held.
*/
static inline void arch_write_lock(arch_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__(
"1: ldrex %0, [%1]\n"
" teq %0, #0\n"
WFE("ne")
" strexeq %0, %2, [%1]\n"
" teq %0, #0\n"
" bne 1b"
: "=&r" (tmp)
: "r" (&rw->lock), "r" (0x80000000)
: "cc");
smp_mb();
}
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__(
" ldrex %0, [%1]\n"
" teq %0, #0\n"
" strexeq %0, %2, [%1]"
: "=&r" (tmp)
: "r" (&rw->lock), "r" (0x80000000)
: "cc");
if (tmp == 0) {
smp_mb();
return 1;
} else {
return 0;
}
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
smp_mb();
__asm__ __volatile__(
"str %1, [%0]\n"
:
: "r" (&rw->lock), "r" (0)
: "cc");
dsb_sev();
}
/* write_can_lock - would write_trylock() succeed? */
#define arch_write_can_lock(x) ((x)->lock == 0)
/*
* Read locks are a bit more hairy:
* - Exclusively load the lock value.
* - Increment it.
* - Store new lock value if positive, and we still own this location.
* If the value is negative, we've already failed.
* - If we failed to store the value, we want a negative result.
* - If we failed, try again.
* Unlocking is similarly hairy. We may have multiple read locks
* currently active. However, we know we won't have any write
* locks.
*/
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned long tmp, tmp2;
__asm__ __volatile__(
"1: ldrex %0, [%2]\n"
" adds %0, %0, #1\n"
" strexpl %1, %0, [%2]\n"
WFE("mi")
" rsbpls %0, %1, #0\n"
" bmi 1b"
: "=&r" (tmp), "=&r" (tmp2)
: "r" (&rw->lock)
: "cc");
smp_mb();
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned long tmp, tmp2;
smp_mb();
__asm__ __volatile__(
"1: ldrex %0, [%2]\n"
" sub %0, %0, #1\n"
" strex %1, %0, [%2]\n"
" teq %1, #0\n"
" bne 1b"
: "=&r" (tmp), "=&r" (tmp2)
: "r" (&rw->lock)
: "cc");
if (tmp == 0)
dsb_sev();
}
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned long tmp, tmp2 = 1;
__asm__ __volatile__(
" ldrex %0, [%2]\n"
" adds %0, %0, #1\n"
" strexpl %1, %0, [%2]\n"
: "=&r" (tmp), "+r" (tmp2)
: "r" (&rw->lock)
: "cc");
smp_mb();
return tmp2 == 0;
}
/* read_can_lock - would read_trylock() succeed? */
#define arch_read_can_lock(x) ((x)->lock < 0x80000000)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
#define arch_spin_relax(lock) cpu_relax()
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
#endif /* __ASM_SPINLOCK_H */