linux_dsm_epyc7002/arch/s390/include/asm/spinlock.h

274 lines
6.0 KiB
C
Raw Normal View History

/*
* S390 version
* Copyright IBM Corp. 1999
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
*
* Derived from "include/asm-i386/spinlock.h"
*/
#ifndef __ASM_SPINLOCK_H
#define __ASM_SPINLOCK_H
#include <linux/smp.h>
#include <asm/atomic_ops.h>
locking/spinlock, arch: Update and fix spin_unlock_wait() implementations This patch updates/fixes all spin_unlock_wait() implementations. The update is in semantics; where it previously was only a control dependency, we now upgrade to a full load-acquire to match the store-release from the spin_unlock() we waited on. This ensures that when spin_unlock_wait() returns, we're guaranteed to observe the full critical section we waited on. This fixes a number of spin_unlock_wait() users that (not unreasonably) rely on this. I also fixed a number of ticket lock versions to only wait on the current lock holder, instead of for a full unlock, as this is sufficient. Furthermore; again for ticket locks; I added an smp_rmb() in between the initial ticket load and the spin loop testing the current value because I could not convince myself the address dependency is sufficient, esp. if the loads are of different sizes. I'm more than happy to remove this smp_rmb() again if people are certain the address dependency does indeed work as expected. Note: PPC32 will be fixed independently Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: chris@zankel.net Cc: cmetcalf@mellanox.com Cc: davem@davemloft.net Cc: dhowells@redhat.com Cc: james.hogan@imgtec.com Cc: jejb@parisc-linux.org Cc: linux@armlinux.org.uk Cc: mpe@ellerman.id.au Cc: ralf@linux-mips.org Cc: realmz6@gmail.com Cc: rkuo@codeaurora.org Cc: rth@twiddle.net Cc: schwidefsky@de.ibm.com Cc: tony.luck@intel.com Cc: vgupta@synopsys.com Cc: ysato@users.sourceforge.jp Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-05-26 15:35:03 +07:00
#include <asm/barrier.h>
#include <asm/processor.h>
#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
extern int spin_retry;
#ifndef CONFIG_SMP
static inline bool arch_vcpu_is_preempted(int cpu) { return false; }
#else
bool arch_vcpu_is_preempted(int cpu);
#endif
#define vcpu_is_preempted arch_vcpu_is_preempted
/*
* Simple spin lock operations. There are two variants, one clears IRQ's
* on the local processor, one does not.
*
* We make no fairness assumptions. They have a cost.
[PATCH] spinlock consolidation This patch (written by me and also containing many suggestions of Arjan van de Ven) does a major cleanup of the spinlock code. It does the following things: - consolidates and enhances the spinlock/rwlock debugging code - simplifies the asm/spinlock.h files - encapsulates the raw spinlock type and moves generic spinlock features (such as ->break_lock) into the generic code. - cleans up the spinlock code hierarchy to get rid of the spaghetti. Most notably there's now only a single variant of the debugging code, located in lib/spinlock_debug.c. (previously we had one SMP debugging variant per architecture, plus a separate generic one for UP builds) Also, i've enhanced the rwlock debugging facility, it will now track write-owners. There is new spinlock-owner/CPU-tracking on SMP builds too. All locks have lockup detection now, which will work for both soft and hard spin/rwlock lockups. The arch-level include files now only contain the minimally necessary subset of the spinlock code - all the rest that can be generalized now lives in the generic headers: include/asm-i386/spinlock_types.h | 16 include/asm-x86_64/spinlock_types.h | 16 I have also split up the various spinlock variants into separate files, making it easier to see which does what. The new layout is: SMP | UP ----------------------------|----------------------------------- asm/spinlock_types_smp.h | linux/spinlock_types_up.h linux/spinlock_types.h | linux/spinlock_types.h asm/spinlock_smp.h | linux/spinlock_up.h linux/spinlock_api_smp.h | linux/spinlock_api_up.h linux/spinlock.h | linux/spinlock.h /* * here's the role of the various spinlock/rwlock related include files: * * on SMP builds: * * asm/spinlock_types.h: contains the raw_spinlock_t/raw_rwlock_t and the * initializers * * linux/spinlock_types.h: * defines the generic type and initializers * * asm/spinlock.h: contains the __raw_spin_*()/etc. lowlevel * implementations, mostly inline assembly code * * (also included on UP-debug builds:) * * linux/spinlock_api_smp.h: * contains the prototypes for the _spin_*() APIs. * * linux/spinlock.h: builds the final spin_*() APIs. * * on UP builds: * * linux/spinlock_type_up.h: * contains the generic, simplified UP spinlock type. * (which is an empty structure on non-debug builds) * * linux/spinlock_types.h: * defines the generic type and initializers * * linux/spinlock_up.h: * contains the __raw_spin_*()/etc. version of UP * builds. (which are NOPs on non-debug, non-preempt * builds) * * (included on UP-non-debug builds:) * * linux/spinlock_api_up.h: * builds the _spin_*() APIs. * * linux/spinlock.h: builds the final spin_*() APIs. */ All SMP and UP architectures are converted by this patch. arm, i386, ia64, ppc, ppc64, s390/s390x, x64 was build-tested via crosscompilers. m32r, mips, sh, sparc, have not been tested yet, but should be mostly fine. From: Grant Grundler <grundler@parisc-linux.org> Booted and lightly tested on a500-44 (64-bit, SMP kernel, dual CPU). Builds 32-bit SMP kernel (not booted or tested). I did not try to build non-SMP kernels. That should be trivial to fix up later if necessary. I converted bit ops atomic_hash lock to raw_spinlock_t. Doing so avoids some ugly nesting of linux/*.h and asm/*.h files. Those particular locks are well tested and contained entirely inside arch specific code. I do NOT expect any new issues to arise with them. If someone does ever need to use debug/metrics with them, then they will need to unravel this hairball between spinlocks, atomic ops, and bit ops that exist only because parisc has exactly one atomic instruction: LDCW (load and clear word). From: "Luck, Tony" <tony.luck@intel.com> ia64 fix Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjanv@infradead.org> Signed-off-by: Grant Grundler <grundler@parisc-linux.org> Cc: Matthew Wilcox <willy@debian.org> Signed-off-by: Hirokazu Takata <takata@linux-m32r.org> Signed-off-by: Mikael Pettersson <mikpe@csd.uu.se> Signed-off-by: Benoit Boissinot <benoit.boissinot@ens-lyon.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-10 14:25:56 +07:00
*
* (the type definitions are in asm/spinlock_types.h)
*/
void arch_lock_relax(int cpu);
s390/spinlock: introduce spinlock wait queueing The queued spinlock code for s390 follows the principles of the common code qspinlock implementation but with a few notable differences. The format of the spinlock_t locking word differs, s390 needs to store the logical CPU number of the lock holder in the spinlock_t to be able to use the diagnose 9c directed yield hypervisor call. The inline code sequences for spin_lock and spin_unlock are nice and short. The inline portion of a spin_lock now typically looks like this: lhi %r0,0 # 0 indicates an empty lock l %r1,0x3a0 # CPU number + 1 from lowcore cs %r0,%r1,<some_lock> # lock operation jnz call_wait # on failure call wait function locked: ... call_wait: la %r2,<some_lock> brasl %r14,arch_spin_lock_wait j locked A spin_unlock is as simple as before: lhi %r0,0 sth %r0,2(%r2) # unlock operation After a CPU has queued itself it may not enable interrupts again for the arch_spin_lock_flags() variant. The arch_spin_lock_wait_flags wait function is removed. To improve performance the code implements opportunistic lock stealing. If the wait function finds a spinlock_t that indicates that the lock is free but there are queued waiters, the CPU may steal the lock up to three times without queueing itself. The lock stealing update the steal counter in the lock word to prevent more than 3 steals. The counter is reset at the time the CPU next in the queue successfully takes the lock. While the queued spinlocks improve performance in a system with dedicated CPUs, in a virtualized environment with continuously overcommitted CPUs the queued spinlocks can have a negative effect on performance. This is due to the fact that a queued CPU that is preempted by the hypervisor will block the queue at some point even without holding the lock. With the classic spinlock it does not matter if a CPU is preempted that waits for the lock. Therefore use the queued spinlock code only if the system runs with dedicated CPUs and fall back to classic spinlocks when running with shared CPUs. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2017-03-24 23:25:02 +07:00
void arch_spin_relax(arch_spinlock_t *lock);
void arch_spin_lock_wait(arch_spinlock_t *);
int arch_spin_trylock_retry(arch_spinlock_t *);
s390/spinlock: introduce spinlock wait queueing The queued spinlock code for s390 follows the principles of the common code qspinlock implementation but with a few notable differences. The format of the spinlock_t locking word differs, s390 needs to store the logical CPU number of the lock holder in the spinlock_t to be able to use the diagnose 9c directed yield hypervisor call. The inline code sequences for spin_lock and spin_unlock are nice and short. The inline portion of a spin_lock now typically looks like this: lhi %r0,0 # 0 indicates an empty lock l %r1,0x3a0 # CPU number + 1 from lowcore cs %r0,%r1,<some_lock> # lock operation jnz call_wait # on failure call wait function locked: ... call_wait: la %r2,<some_lock> brasl %r14,arch_spin_lock_wait j locked A spin_unlock is as simple as before: lhi %r0,0 sth %r0,2(%r2) # unlock operation After a CPU has queued itself it may not enable interrupts again for the arch_spin_lock_flags() variant. The arch_spin_lock_wait_flags wait function is removed. To improve performance the code implements opportunistic lock stealing. If the wait function finds a spinlock_t that indicates that the lock is free but there are queued waiters, the CPU may steal the lock up to three times without queueing itself. The lock stealing update the steal counter in the lock word to prevent more than 3 steals. The counter is reset at the time the CPU next in the queue successfully takes the lock. While the queued spinlocks improve performance in a system with dedicated CPUs, in a virtualized environment with continuously overcommitted CPUs the queued spinlocks can have a negative effect on performance. This is due to the fact that a queued CPU that is preempted by the hypervisor will block the queue at some point even without holding the lock. With the classic spinlock it does not matter if a CPU is preempted that waits for the lock. Therefore use the queued spinlock code only if the system runs with dedicated CPUs and fall back to classic spinlocks when running with shared CPUs. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2017-03-24 23:25:02 +07:00
void arch_spin_lock_setup(int cpu);
static inline u32 arch_spin_lockval(int cpu)
{
return cpu + 1;
}
static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.lock == 0;
}
static inline int arch_spin_is_locked(arch_spinlock_t *lp)
{
return READ_ONCE(lp->lock) != 0;
}
static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
{
barrier();
s390/spinlock: introduce spinlock wait queueing The queued spinlock code for s390 follows the principles of the common code qspinlock implementation but with a few notable differences. The format of the spinlock_t locking word differs, s390 needs to store the logical CPU number of the lock holder in the spinlock_t to be able to use the diagnose 9c directed yield hypervisor call. The inline code sequences for spin_lock and spin_unlock are nice and short. The inline portion of a spin_lock now typically looks like this: lhi %r0,0 # 0 indicates an empty lock l %r1,0x3a0 # CPU number + 1 from lowcore cs %r0,%r1,<some_lock> # lock operation jnz call_wait # on failure call wait function locked: ... call_wait: la %r2,<some_lock> brasl %r14,arch_spin_lock_wait j locked A spin_unlock is as simple as before: lhi %r0,0 sth %r0,2(%r2) # unlock operation After a CPU has queued itself it may not enable interrupts again for the arch_spin_lock_flags() variant. The arch_spin_lock_wait_flags wait function is removed. To improve performance the code implements opportunistic lock stealing. If the wait function finds a spinlock_t that indicates that the lock is free but there are queued waiters, the CPU may steal the lock up to three times without queueing itself. The lock stealing update the steal counter in the lock word to prevent more than 3 steals. The counter is reset at the time the CPU next in the queue successfully takes the lock. While the queued spinlocks improve performance in a system with dedicated CPUs, in a virtualized environment with continuously overcommitted CPUs the queued spinlocks can have a negative effect on performance. This is due to the fact that a queued CPU that is preempted by the hypervisor will block the queue at some point even without holding the lock. With the classic spinlock it does not matter if a CPU is preempted that waits for the lock. Therefore use the queued spinlock code only if the system runs with dedicated CPUs and fall back to classic spinlocks when running with shared CPUs. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2017-03-24 23:25:02 +07:00
return likely(__atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
}
static inline void arch_spin_lock(arch_spinlock_t *lp)
{
if (!arch_spin_trylock_once(lp))
arch_spin_lock_wait(lp);
}
static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
unsigned long flags)
{
if (!arch_spin_trylock_once(lp))
s390/spinlock: introduce spinlock wait queueing The queued spinlock code for s390 follows the principles of the common code qspinlock implementation but with a few notable differences. The format of the spinlock_t locking word differs, s390 needs to store the logical CPU number of the lock holder in the spinlock_t to be able to use the diagnose 9c directed yield hypervisor call. The inline code sequences for spin_lock and spin_unlock are nice and short. The inline portion of a spin_lock now typically looks like this: lhi %r0,0 # 0 indicates an empty lock l %r1,0x3a0 # CPU number + 1 from lowcore cs %r0,%r1,<some_lock> # lock operation jnz call_wait # on failure call wait function locked: ... call_wait: la %r2,<some_lock> brasl %r14,arch_spin_lock_wait j locked A spin_unlock is as simple as before: lhi %r0,0 sth %r0,2(%r2) # unlock operation After a CPU has queued itself it may not enable interrupts again for the arch_spin_lock_flags() variant. The arch_spin_lock_wait_flags wait function is removed. To improve performance the code implements opportunistic lock stealing. If the wait function finds a spinlock_t that indicates that the lock is free but there are queued waiters, the CPU may steal the lock up to three times without queueing itself. The lock stealing update the steal counter in the lock word to prevent more than 3 steals. The counter is reset at the time the CPU next in the queue successfully takes the lock. While the queued spinlocks improve performance in a system with dedicated CPUs, in a virtualized environment with continuously overcommitted CPUs the queued spinlocks can have a negative effect on performance. This is due to the fact that a queued CPU that is preempted by the hypervisor will block the queue at some point even without holding the lock. With the classic spinlock it does not matter if a CPU is preempted that waits for the lock. Therefore use the queued spinlock code only if the system runs with dedicated CPUs and fall back to classic spinlocks when running with shared CPUs. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2017-03-24 23:25:02 +07:00
arch_spin_lock_wait(lp);
}
static inline int arch_spin_trylock(arch_spinlock_t *lp)
{
if (!arch_spin_trylock_once(lp))
return arch_spin_trylock_retry(lp);
return 1;
}
static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
typecheck(int, lp->lock);
asm volatile(
#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
" .long 0xb2fa0070\n" /* NIAI 7 */
#endif
s390/spinlock: introduce spinlock wait queueing The queued spinlock code for s390 follows the principles of the common code qspinlock implementation but with a few notable differences. The format of the spinlock_t locking word differs, s390 needs to store the logical CPU number of the lock holder in the spinlock_t to be able to use the diagnose 9c directed yield hypervisor call. The inline code sequences for spin_lock and spin_unlock are nice and short. The inline portion of a spin_lock now typically looks like this: lhi %r0,0 # 0 indicates an empty lock l %r1,0x3a0 # CPU number + 1 from lowcore cs %r0,%r1,<some_lock> # lock operation jnz call_wait # on failure call wait function locked: ... call_wait: la %r2,<some_lock> brasl %r14,arch_spin_lock_wait j locked A spin_unlock is as simple as before: lhi %r0,0 sth %r0,2(%r2) # unlock operation After a CPU has queued itself it may not enable interrupts again for the arch_spin_lock_flags() variant. The arch_spin_lock_wait_flags wait function is removed. To improve performance the code implements opportunistic lock stealing. If the wait function finds a spinlock_t that indicates that the lock is free but there are queued waiters, the CPU may steal the lock up to three times without queueing itself. The lock stealing update the steal counter in the lock word to prevent more than 3 steals. The counter is reset at the time the CPU next in the queue successfully takes the lock. While the queued spinlocks improve performance in a system with dedicated CPUs, in a virtualized environment with continuously overcommitted CPUs the queued spinlocks can have a negative effect on performance. This is due to the fact that a queued CPU that is preempted by the hypervisor will block the queue at some point even without holding the lock. With the classic spinlock it does not matter if a CPU is preempted that waits for the lock. Therefore use the queued spinlock code only if the system runs with dedicated CPUs and fall back to classic spinlocks when running with shared CPUs. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2017-03-24 23:25:02 +07:00
" sth %1,%0\n"
: "=Q" (((unsigned short *) &lp->lock)[1])
: "d" (0) : "cc", "memory");
}
/*
* Read-write spinlocks, allowing multiple readers
* but only one writer.
*
* NOTE! it is quite common to have readers in interrupts
* but no interrupt writers. For those circumstances we
* can "mix" irq-safe locks - any writer needs to get a
* irq-safe write-lock, but readers can get non-irqsafe
* read-locks.
*/
/**
* read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_read_can_lock(x) ((int)(x)->lock >= 0)
/**
* write_can_lock - would write_trylock() succeed?
* @lock: the rwlock in question.
*/
#define arch_write_can_lock(x) ((x)->lock == 0)
extern int _raw_read_trylock_retry(arch_rwlock_t *lp);
extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
static inline int arch_read_trylock_once(arch_rwlock_t *rw)
{
int old = ACCESS_ONCE(rw->lock);
return likely(old >= 0 &&
__atomic_cmpxchg_bool(&rw->lock, old, old + 1));
}
static inline int arch_write_trylock_once(arch_rwlock_t *rw)
{
int old = ACCESS_ONCE(rw->lock);
return likely(old == 0 &&
__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000));
}
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __RAW_OP_OR "lao"
#define __RAW_OP_AND "lan"
#define __RAW_OP_ADD "laa"
#define __RAW_LOCK(ptr, op_val, op_string) \
({ \
int old_val; \
\
typecheck(int *, ptr); \
asm volatile( \
op_string " %0,%2,%1\n" \
"bcr 14,0\n" \
: "=d" (old_val), "+Q" (*ptr) \
: "d" (op_val) \
: "cc", "memory"); \
old_val; \
})
#define __RAW_UNLOCK(ptr, op_val, op_string) \
({ \
int old_val; \
\
typecheck(int *, ptr); \
asm volatile( \
op_string " %0,%2,%1\n" \
: "=d" (old_val), "+Q" (*ptr) \
: "d" (op_val) \
: "cc", "memory"); \
old_val; \
})
extern void _raw_read_lock_wait(arch_rwlock_t *lp);
extern void _raw_write_lock_wait(arch_rwlock_t *lp, int prev);
static inline void arch_read_lock(arch_rwlock_t *rw)
{
int old;
old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD);
if (old < 0)
_raw_read_lock_wait(rw);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
__RAW_UNLOCK(&rw->lock, -1, __RAW_OP_ADD);
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
int old;
old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
if (old != 0)
_raw_write_lock_wait(rw, old);
rw->owner = SPINLOCK_LOCKVAL;
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
rw->owner = 0;
__RAW_UNLOCK(&rw->lock, 0x7fffffff, __RAW_OP_AND);
}
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
extern void _raw_read_lock_wait(arch_rwlock_t *lp);
extern void _raw_write_lock_wait(arch_rwlock_t *lp);
static inline void arch_read_lock(arch_rwlock_t *rw)
{
if (!arch_read_trylock_once(rw))
_raw_read_lock_wait(rw);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
int old;
do {
old = ACCESS_ONCE(rw->lock);
} while (!__atomic_cmpxchg_bool(&rw->lock, old, old - 1));
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
if (!arch_write_trylock_once(rw))
_raw_write_lock_wait(rw);
rw->owner = SPINLOCK_LOCKVAL;
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
typecheck(int, rw->lock);
rw->owner = 0;
asm volatile(
"st %1,%0\n"
: "+Q" (rw->lock)
: "d" (0)
: "cc", "memory");
}
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
if (!arch_read_trylock_once(rw))
return _raw_read_trylock_retry(rw);
return 1;
}
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
if (!arch_write_trylock_once(rw) && !_raw_write_trylock_retry(rw))
return 0;
rw->owner = SPINLOCK_LOCKVAL;
return 1;
}
static inline void arch_read_relax(arch_rwlock_t *rw)
{
arch_lock_relax(rw->owner);
}
static inline void arch_write_relax(arch_rwlock_t *rw)
{
arch_lock_relax(rw->owner);
}
#endif /* __ASM_SPINLOCK_H */