mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 10:06:10 +07:00
eab6870fee
Mark Rutland noticed that GCC optimization passes have the potential to elide
necessary invocations of the array_index_mask_nospec() instruction sequence,
so mark the asm() volatile.
Mark explains:
"The volatile will inhibit *some* cases where the compiler could lift the
array_index_nospec() call out of a branch, e.g. where there are multiple
invocations of array_index_nospec() with the same arguments:
if (idx < foo) {
idx1 = array_idx_nospec(idx, foo)
do_something(idx1);
}
< some other code >
if (idx < foo) {
idx2 = array_idx_nospec(idx, foo);
do_something_else(idx2);
}
... since the compiler can determine that the two invocations yield the same
result, and reuse the first result (likely the same register as idx was in
originally) for the second branch, effectively re-writing the above as:
if (idx < foo) {
idx = array_idx_nospec(idx, foo);
do_something(idx);
}
< some other code >
if (idx < foo) {
do_something_else(idx);
}
... if we don't take the first branch, then speculatively take the second, we
lose the nospec protection.
There's more info on volatile asm in the GCC docs:
https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile
"
Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: babdde2698
("x86: Implement array_index_mask_nospec")
Link: https://lkml.kernel.org/lkml/152838798950.14521.4893346294059739135.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
89 lines
2.4 KiB
C
89 lines
2.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_BARRIER_H
|
|
#define _ASM_X86_BARRIER_H
|
|
|
|
#include <asm/alternative.h>
|
|
#include <asm/nops.h>
|
|
|
|
/*
|
|
* Force strict CPU ordering.
|
|
* And yes, this might be required on UP too when we're talking
|
|
* to devices.
|
|
*/
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \
|
|
X86_FEATURE_XMM2) ::: "memory", "cc")
|
|
#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \
|
|
X86_FEATURE_XMM2) ::: "memory", "cc")
|
|
#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \
|
|
X86_FEATURE_XMM2) ::: "memory", "cc")
|
|
#else
|
|
#define mb() asm volatile("mfence":::"memory")
|
|
#define rmb() asm volatile("lfence":::"memory")
|
|
#define wmb() asm volatile("sfence" ::: "memory")
|
|
#endif
|
|
|
|
/**
|
|
* array_index_mask_nospec() - generate a mask that is ~0UL when the
|
|
* bounds check succeeds and 0 otherwise
|
|
* @index: array element index
|
|
* @size: number of elements in array
|
|
*
|
|
* Returns:
|
|
* 0 - (index < size)
|
|
*/
|
|
static inline unsigned long array_index_mask_nospec(unsigned long index,
|
|
unsigned long size)
|
|
{
|
|
unsigned long mask;
|
|
|
|
asm volatile ("cmp %1,%2; sbb %0,%0;"
|
|
:"=r" (mask)
|
|
:"g"(size),"r" (index)
|
|
:"cc");
|
|
return mask;
|
|
}
|
|
|
|
/* Override the default implementation from linux/nospec.h. */
|
|
#define array_index_mask_nospec array_index_mask_nospec
|
|
|
|
/* Prevent speculative execution past this barrier. */
|
|
#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
|
|
"lfence", X86_FEATURE_LFENCE_RDTSC)
|
|
|
|
#define dma_rmb() barrier()
|
|
#define dma_wmb() barrier()
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc")
|
|
#else
|
|
#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc")
|
|
#endif
|
|
#define __smp_rmb() dma_rmb()
|
|
#define __smp_wmb() barrier()
|
|
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
|
|
|
#define __smp_store_release(p, v) \
|
|
do { \
|
|
compiletime_assert_atomic_type(*p); \
|
|
barrier(); \
|
|
WRITE_ONCE(*p, v); \
|
|
} while (0)
|
|
|
|
#define __smp_load_acquire(p) \
|
|
({ \
|
|
typeof(*p) ___p1 = READ_ONCE(*p); \
|
|
compiletime_assert_atomic_type(*p); \
|
|
barrier(); \
|
|
___p1; \
|
|
})
|
|
|
|
/* Atomic operations are already serializing on x86 */
|
|
#define __smp_mb__before_atomic() barrier()
|
|
#define __smp_mb__after_atomic() barrier()
|
|
|
|
#include <asm-generic/barrier.h>
|
|
|
|
#endif /* _ASM_X86_BARRIER_H */
|