linux_dsm_epyc7002/arch/arm/lib/bitops.h
Will Deacon d779c07dd7 ARM: bitops: prefetch the destination word for write prior to strex
The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch prefixes our atomic bitops implementation with prefetchw,
to try and grab the line in exclusive state from the start. The testop
macro is left alone, since the barrier semantics limit the usefulness
of prefetching data.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2013-09-30 16:42:56 +01:00

101 lines
1.9 KiB
C

#include <asm/unwind.h>
#if __LINUX_ARM_ARCH__ >= 6
.macro bitop, name, instr
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
strneb r1, [ip] @ assert word-aligned
mov r2, #1
and r3, r0, #31 @ Get bit offset
mov r0, r0, lsr #5
add r1, r1, r0, lsl #2 @ Get word offset
#if __LINUX_ARM_ARCH__ >= 7
.arch_extension mp
ALT_SMP(W(pldw) [r1])
ALT_UP(W(nop))
#endif
mov r3, r2, lsl r3
1: ldrex r2, [r1]
\instr r2, r2, r3
strex r0, r2, [r1]
cmp r0, #0
bne 1b
bx lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
.macro testop, name, instr, store
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
strneb r1, [ip] @ assert word-aligned
mov r2, #1
and r3, r0, #31 @ Get bit offset
mov r0, r0, lsr #5
add r1, r1, r0, lsl #2 @ Get word offset
mov r3, r2, lsl r3 @ create mask
smp_dmb
1: ldrex r2, [r1]
ands r0, r2, r3 @ save old value of bit
\instr r2, r2, r3 @ toggle bit
strex ip, r2, [r1]
cmp ip, #0
bne 1b
smp_dmb
cmp r0, #0
movne r0, #1
2: bx lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
#else
.macro bitop, name, instr
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
strneb r1, [ip] @ assert word-aligned
and r2, r0, #31
mov r0, r0, lsr #5
mov r3, #1
mov r3, r3, lsl r2
save_and_disable_irqs ip
ldr r2, [r1, r0, lsl #2]
\instr r2, r2, r3
str r2, [r1, r0, lsl #2]
restore_irqs ip
mov pc, lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
/**
* testop - implement a test_and_xxx_bit operation.
* @instr: operational instruction
* @store: store instruction
*
* Note: we can trivially conditionalise the store instruction
* to avoid dirtying the data cache.
*/
.macro testop, name, instr, store
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
strneb r1, [ip] @ assert word-aligned
and r3, r0, #31
mov r0, r0, lsr #5
save_and_disable_irqs ip
ldr r2, [r1, r0, lsl #2]!
mov r0, #1
tst r2, r0, lsl r3
\instr r2, r2, r0, lsl r3
\store r2, [r1]
moveq r0, #0
restore_irqs ip
mov pc, lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
#endif