mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 10:56:12 +07:00
e87fc0ec07
Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Miao <realmz6@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: adi-buildroot-devel@lists.sourceforge.net Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
946 lines
16 KiB
ArmAsm
946 lines
16 KiB
ArmAsm
/*
|
|
* Copyright 2007-2008 Analog Devices Inc.
|
|
* Philippe Gerum <rpm@xenomai.org>
|
|
*
|
|
* Licensed under the GPL-2 or later.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/blackfin.h>
|
|
#include <asm/cache.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/rwlock.h>
|
|
#include <asm/cplb.h>
|
|
|
|
.text
|
|
|
|
.macro coreslot_loadaddr reg:req
|
|
\reg\().l = _corelock;
|
|
\reg\().h = _corelock;
|
|
.endm
|
|
|
|
.macro safe_testset addr:req, scratch:req
|
|
#if ANOMALY_05000477
|
|
cli \scratch;
|
|
testset (\addr);
|
|
sti \scratch;
|
|
#else
|
|
testset (\addr);
|
|
#endif
|
|
.endm
|
|
|
|
/*
|
|
* r0 = address of atomic data to flush and invalidate (32bit).
|
|
*
|
|
* Clear interrupts and return the old mask.
|
|
* We assume that no atomic data can span cachelines.
|
|
*
|
|
* Clobbers: r2:0, p0
|
|
*/
|
|
ENTRY(_get_core_lock)
|
|
r1 = -L1_CACHE_BYTES;
|
|
r1 = r0 & r1;
|
|
cli r0;
|
|
coreslot_loadaddr p0;
|
|
.Lretry_corelock:
|
|
safe_testset p0, r2;
|
|
if cc jump .Ldone_corelock;
|
|
SSYNC(r2);
|
|
jump .Lretry_corelock
|
|
.Ldone_corelock:
|
|
p0 = r1;
|
|
/* flush core internal write buffer before invalidate dcache */
|
|
CSYNC(r2);
|
|
flushinv[p0];
|
|
SSYNC(r2);
|
|
rts;
|
|
ENDPROC(_get_core_lock)
|
|
|
|
/*
|
|
* r0 = address of atomic data in uncacheable memory region (32bit).
|
|
*
|
|
* Clear interrupts and return the old mask.
|
|
*
|
|
* Clobbers: r0, p0
|
|
*/
|
|
ENTRY(_get_core_lock_noflush)
|
|
cli r0;
|
|
coreslot_loadaddr p0;
|
|
.Lretry_corelock_noflush:
|
|
safe_testset p0, r2;
|
|
if cc jump .Ldone_corelock_noflush;
|
|
SSYNC(r2);
|
|
jump .Lretry_corelock_noflush
|
|
.Ldone_corelock_noflush:
|
|
/*
|
|
* SMP kgdb runs into dead loop without NOP here, when one core
|
|
* single steps over get_core_lock_noflush and the other executes
|
|
* get_core_lock as a slave node.
|
|
*/
|
|
nop;
|
|
CSYNC(r2);
|
|
rts;
|
|
ENDPROC(_get_core_lock_noflush)
|
|
|
|
/*
|
|
* r0 = interrupt mask to restore.
|
|
* r1 = address of atomic data to flush and invalidate (32bit).
|
|
*
|
|
* Interrupts are masked on entry (see _get_core_lock).
|
|
* Clobbers: r2:0, p0
|
|
*/
|
|
ENTRY(_put_core_lock)
|
|
/* Write-through cache assumed, so no flush needed here. */
|
|
coreslot_loadaddr p0;
|
|
r1 = 0;
|
|
[p0] = r1;
|
|
SSYNC(r2);
|
|
sti r0;
|
|
rts;
|
|
ENDPROC(_put_core_lock)
|
|
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
|
|
ENTRY(___raw_smp_mark_barrier_asm)
|
|
[--sp] = rets;
|
|
[--sp] = ( r7:5 );
|
|
[--sp] = r0;
|
|
[--sp] = p1;
|
|
[--sp] = p0;
|
|
call _get_core_lock_noflush;
|
|
|
|
/*
|
|
* Calculate current core mask
|
|
*/
|
|
GET_CPUID(p1, r7);
|
|
r6 = 1;
|
|
r6 <<= r7;
|
|
|
|
/*
|
|
* Set bit of other cores in barrier mask. Don't change current core bit.
|
|
*/
|
|
p1.l = _barrier_mask;
|
|
p1.h = _barrier_mask;
|
|
r7 = [p1];
|
|
r5 = r7 & r6;
|
|
r7 = ~r6;
|
|
cc = r5 == 0;
|
|
if cc jump 1f;
|
|
r7 = r7 | r6;
|
|
1:
|
|
[p1] = r7;
|
|
SSYNC(r2);
|
|
|
|
call _put_core_lock;
|
|
p0 = [sp++];
|
|
p1 = [sp++];
|
|
r0 = [sp++];
|
|
( r7:5 ) = [sp++];
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_smp_mark_barrier_asm)
|
|
|
|
ENTRY(___raw_smp_check_barrier_asm)
|
|
[--sp] = rets;
|
|
[--sp] = ( r7:5 );
|
|
[--sp] = r0;
|
|
[--sp] = p1;
|
|
[--sp] = p0;
|
|
call _get_core_lock_noflush;
|
|
|
|
/*
|
|
* Calculate current core mask
|
|
*/
|
|
GET_CPUID(p1, r7);
|
|
r6 = 1;
|
|
r6 <<= r7;
|
|
|
|
/*
|
|
* Clear current core bit in barrier mask if it is set.
|
|
*/
|
|
p1.l = _barrier_mask;
|
|
p1.h = _barrier_mask;
|
|
r7 = [p1];
|
|
r5 = r7 & r6;
|
|
cc = r5 == 0;
|
|
if cc jump 1f;
|
|
r6 = ~r6;
|
|
r7 = r7 & r6;
|
|
[p1] = r7;
|
|
SSYNC(r2);
|
|
|
|
call _put_core_lock;
|
|
|
|
/*
|
|
* Invalidate the entire D-cache of current core.
|
|
*/
|
|
sp += -12;
|
|
call _resync_core_dcache
|
|
sp += 12;
|
|
jump 2f;
|
|
1:
|
|
call _put_core_lock;
|
|
2:
|
|
p0 = [sp++];
|
|
p1 = [sp++];
|
|
r0 = [sp++];
|
|
( r7:5 ) = [sp++];
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_smp_check_barrier_asm)
|
|
|
|
/*
|
|
* r0 = irqflags
|
|
* r1 = address of atomic data
|
|
*
|
|
* Clobbers: r2:0, p1:0
|
|
*/
|
|
_start_lock_coherent:
|
|
|
|
[--sp] = rets;
|
|
[--sp] = ( r7:6 );
|
|
r7 = r0;
|
|
p1 = r1;
|
|
|
|
/*
|
|
* Determine whether the atomic data was previously
|
|
* owned by another CPU (=r6).
|
|
*/
|
|
GET_CPUID(p0, r2);
|
|
r1 = 1;
|
|
r1 <<= r2;
|
|
r2 = ~r1;
|
|
|
|
r1 = [p1];
|
|
r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */
|
|
r6 = r1 & r2;
|
|
r1 = [p1];
|
|
r1 <<= 4;
|
|
r1 >>= 4;
|
|
[p1] = r1;
|
|
|
|
/*
|
|
* Release the core lock now, but keep IRQs disabled while we are
|
|
* performing the remaining housekeeping chores for the current CPU.
|
|
*/
|
|
coreslot_loadaddr p0;
|
|
r1 = 0;
|
|
[p0] = r1;
|
|
|
|
/*
|
|
* If another CPU has owned the same atomic section before us,
|
|
* then our D-cached copy of the shared data protected by the
|
|
* current spin/write_lock may be obsolete.
|
|
*/
|
|
cc = r6 == 0;
|
|
if cc jump .Lcache_synced
|
|
|
|
/*
|
|
* Invalidate the entire D-cache of the current core.
|
|
*/
|
|
sp += -12;
|
|
call _resync_core_dcache
|
|
sp += 12;
|
|
|
|
.Lcache_synced:
|
|
SSYNC(r2);
|
|
sti r7;
|
|
( r7:6 ) = [sp++];
|
|
rets = [sp++];
|
|
rts
|
|
|
|
/*
|
|
* r0 = irqflags
|
|
* r1 = address of atomic data
|
|
*
|
|
* Clobbers: r2:0, p1:0
|
|
*/
|
|
_end_lock_coherent:
|
|
|
|
p1 = r1;
|
|
GET_CPUID(p0, r2);
|
|
r2 += 28;
|
|
r1 = 1;
|
|
r1 <<= r2;
|
|
r2 = [p1];
|
|
r2 = r1 | r2;
|
|
[p1] = r2;
|
|
r1 = p1;
|
|
jump _put_core_lock;
|
|
|
|
#endif /* __ARCH_SYNC_CORE_DCACHE */
|
|
|
|
/*
|
|
* r0 = &spinlock->lock
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_spin_is_locked_asm)
|
|
p1 = r0;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r3 = [p1];
|
|
cc = bittst( r3, 0 );
|
|
r3 = cc;
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
rets = [sp++];
|
|
r0 = r3;
|
|
rts;
|
|
ENDPROC(___raw_spin_is_locked_asm)
|
|
|
|
/*
|
|
* r0 = &spinlock->lock
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_spin_lock_asm)
|
|
p1 = r0;
|
|
[--sp] = rets;
|
|
.Lretry_spinlock:
|
|
call _get_core_lock;
|
|
r1 = p1;
|
|
r2 = [p1];
|
|
cc = bittst( r2, 0 );
|
|
if cc jump .Lbusy_spinlock
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
r3 = p1;
|
|
bitset ( r2, 0 ); /* Raise the lock bit. */
|
|
[p1] = r2;
|
|
call _start_lock_coherent
|
|
#else
|
|
r2 = 1;
|
|
[p1] = r2;
|
|
call _put_core_lock;
|
|
#endif
|
|
rets = [sp++];
|
|
rts;
|
|
|
|
.Lbusy_spinlock:
|
|
/* We don't touch the atomic area if busy, so that flush
|
|
will behave like nop in _put_core_lock. */
|
|
call _put_core_lock;
|
|
SSYNC(r2);
|
|
r0 = p1;
|
|
jump .Lretry_spinlock
|
|
ENDPROC(___raw_spin_lock_asm)
|
|
|
|
/*
|
|
* r0 = &spinlock->lock
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_spin_trylock_asm)
|
|
p1 = r0;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r1 = p1;
|
|
r3 = [p1];
|
|
cc = bittst( r3, 0 );
|
|
if cc jump .Lfailed_trylock
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
bitset ( r3, 0 ); /* Raise the lock bit. */
|
|
[p1] = r3;
|
|
call _start_lock_coherent
|
|
#else
|
|
r2 = 1;
|
|
[p1] = r2;
|
|
call _put_core_lock;
|
|
#endif
|
|
r0 = 1;
|
|
rets = [sp++];
|
|
rts;
|
|
.Lfailed_trylock:
|
|
call _put_core_lock;
|
|
r0 = 0;
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_spin_trylock_asm)
|
|
|
|
/*
|
|
* r0 = &spinlock->lock
|
|
*
|
|
* Clobbers: r2:0, p1:0
|
|
*/
|
|
ENTRY(___raw_spin_unlock_asm)
|
|
p1 = r0;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r2 = [p1];
|
|
bitclr ( r2, 0 );
|
|
[p1] = r2;
|
|
r1 = p1;
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
call _end_lock_coherent
|
|
#else
|
|
call _put_core_lock;
|
|
#endif
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_spin_unlock_asm)
|
|
|
|
/*
|
|
* r0 = &rwlock->lock
|
|
*
|
|
* Clobbers: r2:0, p1:0
|
|
*/
|
|
ENTRY(___raw_read_lock_asm)
|
|
p1 = r0;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
.Lrdlock_try:
|
|
r1 = [p1];
|
|
r1 += -1;
|
|
[p1] = r1;
|
|
cc = r1 < 0;
|
|
if cc jump .Lrdlock_failed
|
|
r1 = p1;
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
call _start_lock_coherent
|
|
#else
|
|
call _put_core_lock;
|
|
#endif
|
|
rets = [sp++];
|
|
rts;
|
|
|
|
.Lrdlock_failed:
|
|
r1 += 1;
|
|
[p1] = r1;
|
|
.Lrdlock_wait:
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
SSYNC(r2);
|
|
r0 = p1;
|
|
call _get_core_lock;
|
|
r1 = [p1];
|
|
cc = r1 < 2;
|
|
if cc jump .Lrdlock_wait;
|
|
jump .Lrdlock_try
|
|
ENDPROC(___raw_read_lock_asm)
|
|
|
|
/*
|
|
* r0 = &rwlock->lock
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_read_trylock_asm)
|
|
p1 = r0;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r1 = [p1];
|
|
cc = r1 <= 0;
|
|
if cc jump .Lfailed_tryrdlock;
|
|
r1 += -1;
|
|
[p1] = r1;
|
|
r1 = p1;
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
call _start_lock_coherent
|
|
#else
|
|
call _put_core_lock;
|
|
#endif
|
|
rets = [sp++];
|
|
r0 = 1;
|
|
rts;
|
|
.Lfailed_tryrdlock:
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
rets = [sp++];
|
|
r0 = 0;
|
|
rts;
|
|
ENDPROC(___raw_read_trylock_asm)
|
|
|
|
/*
|
|
* r0 = &rwlock->lock
|
|
*
|
|
* Note: Processing controlled by a reader lock should not have
|
|
* any side-effect on cache issues with the other core, so we
|
|
* just release the core lock and exit (no _end_lock_coherent).
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_read_unlock_asm)
|
|
p1 = r0;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r1 = [p1];
|
|
r1 += 1;
|
|
[p1] = r1;
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_read_unlock_asm)
|
|
|
|
/*
|
|
* r0 = &rwlock->lock
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_write_lock_asm)
|
|
p1 = r0;
|
|
r3.l = lo(RW_LOCK_BIAS);
|
|
r3.h = hi(RW_LOCK_BIAS);
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
.Lwrlock_try:
|
|
r1 = [p1];
|
|
r1 = r1 - r3;
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
r2 = r1;
|
|
r2 <<= 4;
|
|
r2 >>= 4;
|
|
cc = r2 == 0;
|
|
#else
|
|
cc = r1 == 0;
|
|
#endif
|
|
if !cc jump .Lwrlock_wait
|
|
[p1] = r1;
|
|
r1 = p1;
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
call _start_lock_coherent
|
|
#else
|
|
call _put_core_lock;
|
|
#endif
|
|
rets = [sp++];
|
|
rts;
|
|
|
|
.Lwrlock_wait:
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
SSYNC(r2);
|
|
r0 = p1;
|
|
call _get_core_lock;
|
|
r1 = [p1];
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
r1 <<= 4;
|
|
r1 >>= 4;
|
|
#endif
|
|
cc = r1 == r3;
|
|
if !cc jump .Lwrlock_wait;
|
|
jump .Lwrlock_try
|
|
ENDPROC(___raw_write_lock_asm)
|
|
|
|
/*
|
|
* r0 = &rwlock->lock
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_write_trylock_asm)
|
|
p1 = r0;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r1 = [p1];
|
|
r2.l = lo(RW_LOCK_BIAS);
|
|
r2.h = hi(RW_LOCK_BIAS);
|
|
cc = r1 == r2;
|
|
if !cc jump .Lfailed_trywrlock;
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
r1 >>= 28;
|
|
r1 <<= 28;
|
|
#else
|
|
r1 = 0;
|
|
#endif
|
|
[p1] = r1;
|
|
r1 = p1;
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
call _start_lock_coherent
|
|
#else
|
|
call _put_core_lock;
|
|
#endif
|
|
rets = [sp++];
|
|
r0 = 1;
|
|
rts;
|
|
|
|
.Lfailed_trywrlock:
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
rets = [sp++];
|
|
r0 = 0;
|
|
rts;
|
|
ENDPROC(___raw_write_trylock_asm)
|
|
|
|
/*
|
|
* r0 = &rwlock->lock
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_write_unlock_asm)
|
|
p1 = r0;
|
|
r3.l = lo(RW_LOCK_BIAS);
|
|
r3.h = hi(RW_LOCK_BIAS);
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r1 = [p1];
|
|
r1 = r1 + r3;
|
|
[p1] = r1;
|
|
r1 = p1;
|
|
#ifdef __ARCH_SYNC_CORE_DCACHE
|
|
call _end_lock_coherent
|
|
#else
|
|
call _put_core_lock;
|
|
#endif
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_write_unlock_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = value
|
|
*
|
|
* ADD a signed value to a 32bit word and return the new value atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_atomic_add_asm)
|
|
p1 = r0;
|
|
r3 = r1;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r2 = [p1];
|
|
r3 = r3 + r2;
|
|
[p1] = r3;
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
r0 = r3;
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_atomic_add_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = value
|
|
*
|
|
* ADD a signed value to a 32bit word and return the old value atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_atomic_xadd_asm)
|
|
p1 = r0;
|
|
r3 = r1;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r3 = [p1];
|
|
r2 = r3 + r2;
|
|
[p1] = r2;
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
r0 = r3;
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_atomic_add_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = mask
|
|
*
|
|
* AND the mask bits from a 32bit word and return the old 32bit value
|
|
* atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_atomic_and_asm)
|
|
p1 = r0;
|
|
r3 = r1;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r3 = [p1];
|
|
r2 = r2 & r3;
|
|
[p1] = r2;
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
r0 = r3;
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_atomic_and_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = mask
|
|
*
|
|
* OR the mask bits into a 32bit word and return the old 32bit value
|
|
* atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_atomic_or_asm)
|
|
p1 = r0;
|
|
r3 = r1;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r3 = [p1];
|
|
r2 = r2 | r3;
|
|
[p1] = r2;
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
r0 = r3;
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_atomic_or_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = mask
|
|
*
|
|
* XOR the mask bits with a 32bit word and return the old 32bit value
|
|
* atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_atomic_xor_asm)
|
|
p1 = r0;
|
|
r3 = r1;
|
|
[--sp] = rets;
|
|
call _get_core_lock;
|
|
r3 = [p1];
|
|
r2 = r2 ^ r3;
|
|
[p1] = r2;
|
|
r1 = p1;
|
|
call _put_core_lock;
|
|
r0 = r3;
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_atomic_xor_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = mask
|
|
*
|
|
* Perform a logical AND between the mask bits and a 32bit word, and
|
|
* return the masked value. We need this on this architecture in
|
|
* order to invalidate the local cache before testing.
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_atomic_test_asm)
|
|
p1 = r0;
|
|
r3 = r1;
|
|
r1 = -L1_CACHE_BYTES;
|
|
r1 = r0 & r1;
|
|
p0 = r1;
|
|
/* flush core internal write buffer before invalidate dcache */
|
|
CSYNC(r2);
|
|
flushinv[p0];
|
|
SSYNC(r2);
|
|
r0 = [p1];
|
|
r0 = r0 & r3;
|
|
rts;
|
|
ENDPROC(___raw_atomic_test_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = value
|
|
*
|
|
* Swap *ptr with value and return the old 32bit value atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
#define __do_xchg(src, dst) \
|
|
p1 = r0; \
|
|
r3 = r1; \
|
|
[--sp] = rets; \
|
|
call _get_core_lock; \
|
|
r2 = src; \
|
|
dst = r3; \
|
|
r3 = r2; \
|
|
r1 = p1; \
|
|
call _put_core_lock; \
|
|
r0 = r3; \
|
|
rets = [sp++]; \
|
|
rts;
|
|
|
|
ENTRY(___raw_xchg_1_asm)
|
|
__do_xchg(b[p1] (z), b[p1])
|
|
ENDPROC(___raw_xchg_1_asm)
|
|
|
|
ENTRY(___raw_xchg_2_asm)
|
|
__do_xchg(w[p1] (z), w[p1])
|
|
ENDPROC(___raw_xchg_2_asm)
|
|
|
|
ENTRY(___raw_xchg_4_asm)
|
|
__do_xchg([p1], [p1])
|
|
ENDPROC(___raw_xchg_4_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = new
|
|
* r2 = old
|
|
*
|
|
* Swap *ptr with new if *ptr == old and return the previous *ptr
|
|
* value atomically.
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
#define __do_cmpxchg(src, dst) \
|
|
[--sp] = rets; \
|
|
[--sp] = r4; \
|
|
p1 = r0; \
|
|
r3 = r1; \
|
|
r4 = r2; \
|
|
call _get_core_lock; \
|
|
r2 = src; \
|
|
cc = r2 == r4; \
|
|
if !cc jump 1f; \
|
|
dst = r3; \
|
|
1: r3 = r2; \
|
|
r1 = p1; \
|
|
call _put_core_lock; \
|
|
r0 = r3; \
|
|
r4 = [sp++]; \
|
|
rets = [sp++]; \
|
|
rts;
|
|
|
|
ENTRY(___raw_cmpxchg_1_asm)
|
|
__do_cmpxchg(b[p1] (z), b[p1])
|
|
ENDPROC(___raw_cmpxchg_1_asm)
|
|
|
|
ENTRY(___raw_cmpxchg_2_asm)
|
|
__do_cmpxchg(w[p1] (z), w[p1])
|
|
ENDPROC(___raw_cmpxchg_2_asm)
|
|
|
|
ENTRY(___raw_cmpxchg_4_asm)
|
|
__do_cmpxchg([p1], [p1])
|
|
ENDPROC(___raw_cmpxchg_4_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = bitnr
|
|
*
|
|
* Set a bit in a 32bit word and return the old 32bit value atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_bit_set_asm)
|
|
r2 = r1;
|
|
r1 = 1;
|
|
r1 <<= r2;
|
|
jump ___raw_atomic_or_asm
|
|
ENDPROC(___raw_bit_set_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = bitnr
|
|
*
|
|
* Clear a bit in a 32bit word and return the old 32bit value atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_bit_clear_asm)
|
|
r2 = 1;
|
|
r2 <<= r1;
|
|
r1 = ~r2;
|
|
jump ___raw_atomic_and_asm
|
|
ENDPROC(___raw_bit_clear_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = bitnr
|
|
*
|
|
* Toggle a bit in a 32bit word and return the old 32bit value atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_bit_toggle_asm)
|
|
r2 = r1;
|
|
r1 = 1;
|
|
r1 <<= r2;
|
|
jump ___raw_atomic_xor_asm
|
|
ENDPROC(___raw_bit_toggle_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = bitnr
|
|
*
|
|
* Test-and-set a bit in a 32bit word and return the old bit value atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_bit_test_set_asm)
|
|
[--sp] = rets;
|
|
[--sp] = r1;
|
|
call ___raw_bit_set_asm
|
|
r1 = [sp++];
|
|
r2 = 1;
|
|
r2 <<= r1;
|
|
r0 = r0 & r2;
|
|
cc = r0 == 0;
|
|
if cc jump 1f
|
|
r0 = 1;
|
|
1:
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_bit_test_set_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = bitnr
|
|
*
|
|
* Test-and-clear a bit in a 32bit word and return the old bit value atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_bit_test_clear_asm)
|
|
[--sp] = rets;
|
|
[--sp] = r1;
|
|
call ___raw_bit_clear_asm
|
|
r1 = [sp++];
|
|
r2 = 1;
|
|
r2 <<= r1;
|
|
r0 = r0 & r2;
|
|
cc = r0 == 0;
|
|
if cc jump 1f
|
|
r0 = 1;
|
|
1:
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_bit_test_clear_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = bitnr
|
|
*
|
|
* Test-and-toggle a bit in a 32bit word,
|
|
* and return the old bit value atomically.
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_bit_test_toggle_asm)
|
|
[--sp] = rets;
|
|
[--sp] = r1;
|
|
call ___raw_bit_toggle_asm
|
|
r1 = [sp++];
|
|
r2 = 1;
|
|
r2 <<= r1;
|
|
r0 = r0 & r2;
|
|
cc = r0 == 0;
|
|
if cc jump 1f
|
|
r0 = 1;
|
|
1:
|
|
rets = [sp++];
|
|
rts;
|
|
ENDPROC(___raw_bit_test_toggle_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
* r1 = bitnr
|
|
*
|
|
* Test a bit in a 32bit word and return its value.
|
|
* We need this on this architecture in order to invalidate
|
|
* the local cache before testing.
|
|
*
|
|
* Clobbers: r3:0, p1:0
|
|
*/
|
|
ENTRY(___raw_bit_test_asm)
|
|
r2 = r1;
|
|
r1 = 1;
|
|
r1 <<= r2;
|
|
jump ___raw_atomic_test_asm
|
|
ENDPROC(___raw_bit_test_asm)
|
|
|
|
/*
|
|
* r0 = ptr
|
|
*
|
|
* Fetch and return an uncached 32bit value.
|
|
*
|
|
* Clobbers: r2:0, p1:0
|
|
*/
|
|
ENTRY(___raw_uncached_fetch_asm)
|
|
p1 = r0;
|
|
r1 = -L1_CACHE_BYTES;
|
|
r1 = r0 & r1;
|
|
p0 = r1;
|
|
/* flush core internal write buffer before invalidate dcache */
|
|
CSYNC(r2);
|
|
flushinv[p0];
|
|
SSYNC(r2);
|
|
r0 = [p1];
|
|
rts;
|
|
ENDPROC(___raw_uncached_fetch_asm)
|