mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-20 22:59:12 +07:00
69d927bba3
Recent probing at the Linux Kernel Memory Model uncovered a 'surprise'. Strongly ordered architectures where the atomic RmW primitive implies full memory ordering and smp_mb__{before,after}_atomic() are a simple barrier() (such as x86) fail for: *x = 1; atomic_inc(u); smp_mb__after_atomic(); r0 = *y; Because, while the atomic_inc() implies memory order, it (surprisingly) does not provide a compiler barrier. This then allows the compiler to re-order like so: atomic_inc(u); *x = 1; smp_mb__after_atomic(); r0 = *y; Which the CPU is then allowed to re-order (under TSO rules) like: atomic_inc(u); r0 = *y; *x = 1; And this very much was not intended. Therefore strengthen the atomic RmW ops to include a compiler barrier. NOTE: atomic_{or,and,xor} and the bitops already had the compiler barrier. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
89 lines
2.5 KiB
C
89 lines
2.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_BARRIER_H
|
|
#define _ASM_X86_BARRIER_H
|
|
|
|
#include <asm/alternative.h>
|
|
#include <asm/nops.h>
|
|
|
|
/*
|
|
* Force strict CPU ordering.
|
|
* And yes, this might be required on UP too when we're talking
|
|
* to devices.
|
|
*/
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \
|
|
X86_FEATURE_XMM2) ::: "memory", "cc")
|
|
#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \
|
|
X86_FEATURE_XMM2) ::: "memory", "cc")
|
|
#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \
|
|
X86_FEATURE_XMM2) ::: "memory", "cc")
|
|
#else
|
|
#define mb() asm volatile("mfence":::"memory")
|
|
#define rmb() asm volatile("lfence":::"memory")
|
|
#define wmb() asm volatile("sfence" ::: "memory")
|
|
#endif
|
|
|
|
/**
|
|
* array_index_mask_nospec() - generate a mask that is ~0UL when the
|
|
* bounds check succeeds and 0 otherwise
|
|
* @index: array element index
|
|
* @size: number of elements in array
|
|
*
|
|
* Returns:
|
|
* 0 - (index < size)
|
|
*/
|
|
static inline unsigned long array_index_mask_nospec(unsigned long index,
|
|
unsigned long size)
|
|
{
|
|
unsigned long mask;
|
|
|
|
asm volatile ("cmp %1,%2; sbb %0,%0;"
|
|
:"=r" (mask)
|
|
:"g"(size),"r" (index)
|
|
:"cc");
|
|
return mask;
|
|
}
|
|
|
|
/* Override the default implementation from linux/nospec.h. */
|
|
#define array_index_mask_nospec array_index_mask_nospec
|
|
|
|
/* Prevent speculative execution past this barrier. */
|
|
#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
|
|
"lfence", X86_FEATURE_LFENCE_RDTSC)
|
|
|
|
#define dma_rmb() barrier()
|
|
#define dma_wmb() barrier()
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc")
|
|
#else
|
|
#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc")
|
|
#endif
|
|
#define __smp_rmb() dma_rmb()
|
|
#define __smp_wmb() barrier()
|
|
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
|
|
|
#define __smp_store_release(p, v) \
|
|
do { \
|
|
compiletime_assert_atomic_type(*p); \
|
|
barrier(); \
|
|
WRITE_ONCE(*p, v); \
|
|
} while (0)
|
|
|
|
#define __smp_load_acquire(p) \
|
|
({ \
|
|
typeof(*p) ___p1 = READ_ONCE(*p); \
|
|
compiletime_assert_atomic_type(*p); \
|
|
barrier(); \
|
|
___p1; \
|
|
})
|
|
|
|
/* Atomic operations are already serializing on x86 */
|
|
#define __smp_mb__before_atomic() do { } while (0)
|
|
#define __smp_mb__after_atomic() do { } while (0)
|
|
|
|
#include <asm-generic/barrier.h>
|
|
|
|
#endif /* _ASM_X86_BARRIER_H */
|