mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-30 12:26:41 +07:00
percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support
Support this_cpu_cmpxchg_double() using the cmpxchg16b and cmpxchg8b instructions. -tj: s/percpu_cmpxchg16b/percpu_cmpxchg16b_double/ for consistency and other cosmetic changes. Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
7c33433921
commit
b9ec40af0e
@ -451,6 +451,26 @@ do { \
|
||||
#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
||||
#endif /* !CONFIG_M386 */
|
||||
|
||||
#ifdef CONFIG_X86_CMPXCHG64
|
||||
#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \
|
||||
({ \
|
||||
char __ret; \
|
||||
typeof(o1) __o1 = o1; \
|
||||
typeof(o1) __n1 = n1; \
|
||||
typeof(o2) __o2 = o2; \
|
||||
typeof(o2) __n2 = n2; \
|
||||
typeof(o2) __dummy = n2; \
|
||||
asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
|
||||
: "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \
|
||||
: "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
|
||||
#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
|
||||
#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
|
||||
#endif /* CONFIG_X86_CMPXCHG64 */
|
||||
|
||||
/*
|
||||
* Per cpu atomic 64 bit operations are only available under 64 bit.
|
||||
* 32 bit must fall back to generic operations.
|
||||
@ -480,6 +500,34 @@ do { \
|
||||
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
|
||||
#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
|
||||
#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
|
||||
|
||||
/*
|
||||
* Pretty complex macro to generate cmpxchg16 instruction. The instruction
|
||||
* is not supported on early AMD64 processors so we must be able to emulate
|
||||
* it in software. The address used in the cmpxchg16 instruction must be
|
||||
* aligned to a 16 byte boundary.
|
||||
*/
|
||||
#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
|
||||
({ \
|
||||
char __ret; \
|
||||
typeof(o1) __o1 = o1; \
|
||||
typeof(o1) __n1 = n1; \
|
||||
typeof(o2) __o2 = o2; \
|
||||
typeof(o2) __n2 = n2; \
|
||||
typeof(o2) __dummy; \
|
||||
alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \
|
||||
"cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \
|
||||
X86_FEATURE_CX16, \
|
||||
ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
|
||||
"S" (&pcp1), "b"(__n1), "c"(__n2), \
|
||||
"a"(__o1), "d"(__o2)); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
|
||||
#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
|
||||
#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
|
||||
|
||||
#endif
|
||||
|
||||
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
|
||||
|
@ -42,4 +42,5 @@ else
|
||||
lib-y += memmove_64.o memset_64.o
|
||||
lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
|
||||
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
|
||||
lib-y += cmpxchg16b_emu.o
|
||||
endif
|
||||
|
59
arch/x86/lib/cmpxchg16b_emu.S
Normal file
59
arch/x86/lib/cmpxchg16b_emu.S
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/frame.h>
|
||||
#include <asm/dwarf2.h>
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* Inputs:
|
||||
* %rsi : memory location to compare
|
||||
* %rax : low 64 bits of old value
|
||||
* %rdx : high 64 bits of old value
|
||||
* %rbx : low 64 bits of new value
|
||||
* %rcx : high 64 bits of new value
|
||||
* %al : Operation successful
|
||||
*/
|
||||
ENTRY(this_cpu_cmpxchg16b_emu)
|
||||
CFI_STARTPROC
|
||||
|
||||
#
|
||||
# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
|
||||
# via the ZF. Caller will access %al to get result.
|
||||
#
|
||||
# Note that this is only useful for a cpuops operation. Meaning that we
|
||||
# do *not* have a fully atomic operation but just an operation that is
|
||||
# *atomic* on a single cpu (as provided by the this_cpu_xx class of
|
||||
# macros).
|
||||
#
|
||||
this_cpu_cmpxchg16b_emu:
|
||||
pushf
|
||||
cli
|
||||
|
||||
cmpq %gs:(%rsi), %rax
|
||||
jne not_same
|
||||
cmpq %gs:8(%rsi), %rdx
|
||||
jne not_same
|
||||
|
||||
movq %rbx, %gs:(%rsi)
|
||||
movq %rcx, %gs:8(%rsi)
|
||||
|
||||
popf
|
||||
mov $1, %al
|
||||
ret
|
||||
|
||||
not_same:
|
||||
popf
|
||||
xor %al,%al
|
||||
ret
|
||||
|
||||
CFI_ENDPROC
|
||||
|
||||
ENDPROC(this_cpu_cmpxchg16b_emu)
|
Loading…
Reference in New Issue
Block a user