linux_dsm_epyc7002/include/asm-arm/spinlock.h
Martin Schwidefsky ef6edc9746 [PATCH] Directed yield: cpu_relax variants for spinlocks and rw-locks
On systems running with virtual cpus there is optimization potential in
regard to spinlocks and rw-locks.  If the virtual cpu that has taken a lock
is known to a cpu that wants to acquire the same lock it is beneficial to
yield the timeslice of the virtual cpu in favour of the cpu that has the
lock (directed yield).

With CONFIG_PREEMPT="n" this can be implemented by the architecture without
common code changes.  Powerpc already does this.

With CONFIG_PREEMPT="y" the lock loops are coded with _raw_spin_trylock,
_raw_read_trylock and _raw_write_trylock in kernel/spinlock.c.  If the lock
could not be taken cpu_relax is called.  A directed yield is not possible
because cpu_relax doesn't know anything about the lock.  To be able to
yield the lock in favour of the current lock holder variants of cpu_relax
for spinlocks and rw-locks are needed.  The new _raw_spin_relax,
_raw_read_relax and _raw_write_relax primitives differ from cpu_relax
insofar that they have an argument: a pointer to the lock structure.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01 00:39:21 -07:00

226 lines
4.3 KiB
C

#ifndef __ASM_SPINLOCK_H
#define __ASM_SPINLOCK_H
#if __LINUX_ARM_ARCH__ < 6
#error SMP not supported on pre-ARMv6 CPUs
#endif
/*
* ARMv6 Spin-locking.
*
* We exclusively read the old value. If it is zero, we may have
* won the lock, so we try exclusively storing it. A memory barrier
* is required after we get a lock, and before we release it, because
* V6 CPUs are assumed to have weakly ordered memory.
*
* Unlocked value: 0
* Locked value: 1
*/
#define __raw_spin_is_locked(x) ((x)->lock != 0)
#define __raw_spin_unlock_wait(lock) \
do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
static inline void __raw_spin_lock(raw_spinlock_t *lock)
{
unsigned long tmp;
__asm__ __volatile__(
"1: ldrex %0, [%1]\n"
" teq %0, #0\n"
#ifdef CONFIG_CPU_32v6K
" wfene\n"
#endif
" strexeq %0, %2, [%1]\n"
" teqeq %0, #0\n"
" bne 1b"
: "=&r" (tmp)
: "r" (&lock->lock), "r" (1)
: "cc");
smp_mb();
}
static inline int __raw_spin_trylock(raw_spinlock_t *lock)
{
unsigned long tmp;
__asm__ __volatile__(
" ldrex %0, [%1]\n"
" teq %0, #0\n"
" strexeq %0, %2, [%1]"
: "=&r" (tmp)
: "r" (&lock->lock), "r" (1)
: "cc");
if (tmp == 0) {
smp_mb();
return 1;
} else {
return 0;
}
}
static inline void __raw_spin_unlock(raw_spinlock_t *lock)
{
smp_mb();
__asm__ __volatile__(
" str %1, [%0]\n"
#ifdef CONFIG_CPU_32v6K
" mcr p15, 0, %1, c7, c10, 4\n" /* DSB */
" sev"
#endif
:
: "r" (&lock->lock), "r" (0)
: "cc");
}
/*
* RWLOCKS
*
*
* Write locks are easy - we just set bit 31. When unlocking, we can
* just write zero since the lock is exclusively held.
*/
#define rwlock_is_locked(x) (*((volatile unsigned int *)(x)) != 0)
static inline void __raw_write_lock(raw_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__(
"1: ldrex %0, [%1]\n"
" teq %0, #0\n"
#ifdef CONFIG_CPU_32v6K
" wfene\n"
#endif
" strexeq %0, %2, [%1]\n"
" teq %0, #0\n"
" bne 1b"
: "=&r" (tmp)
: "r" (&rw->lock), "r" (0x80000000)
: "cc");
smp_mb();
}
static inline int __raw_write_trylock(raw_rwlock_t *rw)
{
unsigned long tmp;
__asm__ __volatile__(
"1: ldrex %0, [%1]\n"
" teq %0, #0\n"
" strexeq %0, %2, [%1]"
: "=&r" (tmp)
: "r" (&rw->lock), "r" (0x80000000)
: "cc");
if (tmp == 0) {
smp_mb();
return 1;
} else {
return 0;
}
}
static inline void __raw_write_unlock(raw_rwlock_t *rw)
{
smp_mb();
__asm__ __volatile__(
"str %1, [%0]\n"
#ifdef CONFIG_CPU_32v6K
" mcr p15, 0, %1, c7, c10, 4\n" /* DSB */
" sev\n"
#endif
:
: "r" (&rw->lock), "r" (0)
: "cc");
}
/* write_can_lock - would write_trylock() succeed? */
#define __raw_write_can_lock(x) ((x)->lock == 0x80000000)
/*
* Read locks are a bit more hairy:
* - Exclusively load the lock value.
* - Increment it.
* - Store new lock value if positive, and we still own this location.
* If the value is negative, we've already failed.
* - If we failed to store the value, we want a negative result.
* - If we failed, try again.
* Unlocking is similarly hairy. We may have multiple read locks
* currently active. However, we know we won't have any write
* locks.
*/
static inline void __raw_read_lock(raw_rwlock_t *rw)
{
unsigned long tmp, tmp2;
__asm__ __volatile__(
"1: ldrex %0, [%2]\n"
" adds %0, %0, #1\n"
" strexpl %1, %0, [%2]\n"
#ifdef CONFIG_CPU_32v6K
" wfemi\n"
#endif
" rsbpls %0, %1, #0\n"
" bmi 1b"
: "=&r" (tmp), "=&r" (tmp2)
: "r" (&rw->lock)
: "cc");
smp_mb();
}
static inline void __raw_read_unlock(raw_rwlock_t *rw)
{
unsigned long tmp, tmp2;
smp_mb();
__asm__ __volatile__(
"1: ldrex %0, [%2]\n"
" sub %0, %0, #1\n"
" strex %1, %0, [%2]\n"
" teq %1, #0\n"
" bne 1b"
#ifdef CONFIG_CPU_32v6K
"\n cmp %0, #0\n"
" mcreq p15, 0, %0, c7, c10, 4\n"
" seveq"
#endif
: "=&r" (tmp), "=&r" (tmp2)
: "r" (&rw->lock)
: "cc");
}
static inline int __raw_read_trylock(raw_rwlock_t *rw)
{
unsigned long tmp, tmp2 = 1;
__asm__ __volatile__(
"1: ldrex %0, [%2]\n"
" adds %0, %0, #1\n"
" strexpl %1, %0, [%2]\n"
: "=&r" (tmp), "+r" (tmp2)
: "r" (&rw->lock)
: "cc");
smp_mb();
return tmp2 == 0;
}
/* read_can_lock - would read_trylock() succeed? */
#define __raw_read_can_lock(x) ((x)->lock < 0x80000000)
#define _raw_spin_relax(lock) cpu_relax()
#define _raw_read_relax(lock) cpu_relax()
#define _raw_write_relax(lock) cpu_relax()
#endif /* __ASM_SPINLOCK_H */