mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-26 16:49:32 +07:00
a15fd609ad
Peter Zijlstra noticed that with CONFIG_PROFILE_ALL_BRANCHES, the "if" macro converts the conditional to an array index. This can cause GCC to create horrible code. When there are nested ifs, the generated code uses register values to encode branching decisions. Josh Poimboeuf found that replacing the define "if" macro from using the condition as an array index and incrementing the branch statics with an if statement itself, reduced the asm complexity and shrinks the generated code quite a bit. But this can be simplified even further by replacing the internal if statement with a ternary operator. Link: https://lkml.kernel.org/r/20190307174802.46fmpysxyo35hh43@treble Link: http://lkml.kernel.org/r/CAHk-=wiALN3jRuzARpwThN62iKd476Xj-uom+YnLZ4=eqcz7xQ@mail.gmail.com Reported-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reported-by: Josh Poimboeuf <jpoimboe@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
355 lines
11 KiB
C
355 lines
11 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __LINUX_COMPILER_H
|
|
#define __LINUX_COMPILER_H
|
|
|
|
#include <linux/compiler_types.h>
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
/*
|
|
* Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
|
|
* to disable branch tracing on a per file basis.
|
|
*/
|
|
#if defined(CONFIG_TRACE_BRANCH_PROFILING) \
|
|
&& !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
|
|
void ftrace_likely_update(struct ftrace_likely_data *f, int val,
|
|
int expect, int is_constant);
|
|
|
|
#define likely_notrace(x) __builtin_expect(!!(x), 1)
|
|
#define unlikely_notrace(x) __builtin_expect(!!(x), 0)
|
|
|
|
#define __branch_check__(x, expect, is_constant) ({ \
|
|
long ______r; \
|
|
static struct ftrace_likely_data \
|
|
__aligned(4) \
|
|
__section("_ftrace_annotated_branch") \
|
|
______f = { \
|
|
.data.func = __func__, \
|
|
.data.file = __FILE__, \
|
|
.data.line = __LINE__, \
|
|
}; \
|
|
______r = __builtin_expect(!!(x), expect); \
|
|
ftrace_likely_update(&______f, ______r, \
|
|
expect, is_constant); \
|
|
______r; \
|
|
})
|
|
|
|
/*
|
|
* Using __builtin_constant_p(x) to ignore cases where the return
|
|
* value is always the same. This idea is taken from a similar patch
|
|
* written by Daniel Walker.
|
|
*/
|
|
# ifndef likely
|
|
# define likely(x) (__branch_check__(x, 1, __builtin_constant_p(x)))
|
|
# endif
|
|
# ifndef unlikely
|
|
# define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x)))
|
|
# endif
|
|
|
|
#ifdef CONFIG_PROFILE_ALL_BRANCHES
|
|
/*
|
|
* "Define 'is'", Bill Clinton
|
|
* "Define 'if'", Steven Rostedt
|
|
*/
|
|
#define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
|
|
|
|
#define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
|
|
|
|
#define __trace_if_value(cond) ({ \
|
|
static struct ftrace_branch_data \
|
|
__aligned(4) \
|
|
__section("_ftrace_branch") \
|
|
__if_trace = { \
|
|
.func = __func__, \
|
|
.file = __FILE__, \
|
|
.line = __LINE__, \
|
|
}; \
|
|
(cond) ? \
|
|
(__if_trace.miss_hit[1]++,1) : \
|
|
(__if_trace.miss_hit[0]++,0); \
|
|
})
|
|
|
|
#endif /* CONFIG_PROFILE_ALL_BRANCHES */
|
|
|
|
#else
|
|
# define likely(x) __builtin_expect(!!(x), 1)
|
|
# define unlikely(x) __builtin_expect(!!(x), 0)
|
|
#endif
|
|
|
|
/* Optimization barrier */
|
|
#ifndef barrier
|
|
# define barrier() __memory_barrier()
|
|
#endif
|
|
|
|
#ifndef barrier_data
|
|
# define barrier_data(ptr) barrier()
|
|
#endif
|
|
|
|
/* workaround for GCC PR82365 if needed */
|
|
#ifndef barrier_before_unreachable
|
|
# define barrier_before_unreachable() do { } while (0)
|
|
#endif
|
|
|
|
/* Unreachable code */
|
|
#ifdef CONFIG_STACK_VALIDATION
|
|
/*
|
|
* These macros help objtool understand GCC code flow for unreachable code.
|
|
* The __COUNTER__ based labels are a hack to make each instance of the macros
|
|
* unique, to convince GCC not to merge duplicate inline asm statements.
|
|
*/
|
|
#define annotate_reachable() ({ \
|
|
asm volatile("%c0:\n\t" \
|
|
".pushsection .discard.reachable\n\t" \
|
|
".long %c0b - .\n\t" \
|
|
".popsection\n\t" : : "i" (__COUNTER__)); \
|
|
})
|
|
#define annotate_unreachable() ({ \
|
|
asm volatile("%c0:\n\t" \
|
|
".pushsection .discard.unreachable\n\t" \
|
|
".long %c0b - .\n\t" \
|
|
".popsection\n\t" : : "i" (__COUNTER__)); \
|
|
})
|
|
#define ASM_UNREACHABLE \
|
|
"999:\n\t" \
|
|
".pushsection .discard.unreachable\n\t" \
|
|
".long 999b - .\n\t" \
|
|
".popsection\n\t"
|
|
#else
|
|
#define annotate_reachable()
|
|
#define annotate_unreachable()
|
|
#endif
|
|
|
|
#ifndef ASM_UNREACHABLE
|
|
# define ASM_UNREACHABLE
|
|
#endif
|
|
#ifndef unreachable
|
|
# define unreachable() do { \
|
|
annotate_unreachable(); \
|
|
__builtin_unreachable(); \
|
|
} while (0)
|
|
#endif
|
|
|
|
/*
|
|
* KENTRY - kernel entry point
|
|
* This can be used to annotate symbols (functions or data) that are used
|
|
* without their linker symbol being referenced explicitly. For example,
|
|
* interrupt vector handlers, or functions in the kernel image that are found
|
|
* programatically.
|
|
*
|
|
* Not required for symbols exported with EXPORT_SYMBOL, or initcalls. Those
|
|
* are handled in their own way (with KEEP() in linker scripts).
|
|
*
|
|
* KENTRY can be avoided if the symbols in question are marked as KEEP() in the
|
|
* linker script. For example an architecture could KEEP() its entire
|
|
* boot/exception vector code rather than annotate each function and data.
|
|
*/
|
|
#ifndef KENTRY
|
|
# define KENTRY(sym) \
|
|
extern typeof(sym) sym; \
|
|
static const unsigned long __kentry_##sym \
|
|
__used \
|
|
__section("___kentry" "+" #sym ) \
|
|
= (unsigned long)&sym;
|
|
#endif
|
|
|
|
#ifndef RELOC_HIDE
|
|
# define RELOC_HIDE(ptr, off) \
|
|
({ unsigned long __ptr; \
|
|
__ptr = (unsigned long) (ptr); \
|
|
(typeof(ptr)) (__ptr + (off)); })
|
|
#endif
|
|
|
|
#ifndef OPTIMIZER_HIDE_VAR
|
|
/* Make the optimizer believe the variable can be manipulated arbitrarily. */
|
|
#define OPTIMIZER_HIDE_VAR(var) \
|
|
__asm__ ("" : "=r" (var) : "0" (var))
|
|
#endif
|
|
|
|
/* Not-quite-unique ID. */
|
|
#ifndef __UNIQUE_ID
|
|
# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
|
|
#endif
|
|
|
|
#include <uapi/linux/types.h>
|
|
|
|
#define __READ_ONCE_SIZE \
|
|
({ \
|
|
switch (size) { \
|
|
case 1: *(__u8 *)res = *(volatile __u8 *)p; break; \
|
|
case 2: *(__u16 *)res = *(volatile __u16 *)p; break; \
|
|
case 4: *(__u32 *)res = *(volatile __u32 *)p; break; \
|
|
case 8: *(__u64 *)res = *(volatile __u64 *)p; break; \
|
|
default: \
|
|
barrier(); \
|
|
__builtin_memcpy((void *)res, (const void *)p, size); \
|
|
barrier(); \
|
|
} \
|
|
})
|
|
|
|
static __always_inline
|
|
void __read_once_size(const volatile void *p, void *res, int size)
|
|
{
|
|
__READ_ONCE_SIZE;
|
|
}
|
|
|
|
#ifdef CONFIG_KASAN
|
|
/*
|
|
* We can't declare function 'inline' because __no_sanitize_address confilcts
|
|
* with inlining. Attempt to inline it may cause a build failure.
|
|
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
|
|
* '__maybe_unused' allows us to avoid defined-but-not-used warnings.
|
|
*/
|
|
# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
|
|
#else
|
|
# define __no_kasan_or_inline __always_inline
|
|
#endif
|
|
|
|
static __no_kasan_or_inline
|
|
void __read_once_size_nocheck(const volatile void *p, void *res, int size)
|
|
{
|
|
__READ_ONCE_SIZE;
|
|
}
|
|
|
|
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
|
|
{
|
|
switch (size) {
|
|
case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
|
|
case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
|
|
case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
|
|
case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
|
|
default:
|
|
barrier();
|
|
__builtin_memcpy((void *)p, (const void *)res, size);
|
|
barrier();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Prevent the compiler from merging or refetching reads or writes. The
|
|
* compiler is also forbidden from reordering successive instances of
|
|
* READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
|
|
* particular ordering. One way to make the compiler aware of ordering is to
|
|
* put the two invocations of READ_ONCE or WRITE_ONCE in different C
|
|
* statements.
|
|
*
|
|
* These two macros will also work on aggregate data types like structs or
|
|
* unions. If the size of the accessed data type exceeds the word size of
|
|
* the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will
|
|
* fall back to memcpy(). There's at least two memcpy()s: one for the
|
|
* __builtin_memcpy() and then one for the macro doing the copy of variable
|
|
* - '__u' allocated on the stack.
|
|
*
|
|
* Their two major use cases are: (1) Mediating communication between
|
|
* process-level code and irq/NMI handlers, all running on the same CPU,
|
|
* and (2) Ensuring that the compiler does not fold, spindle, or otherwise
|
|
* mutilate accesses that either do not require ordering or that interact
|
|
* with an explicit memory barrier or atomic instruction that provides the
|
|
* required ordering.
|
|
*/
|
|
#include <asm/barrier.h>
|
|
#include <linux/kasan-checks.h>
|
|
|
|
#define __READ_ONCE(x, check) \
|
|
({ \
|
|
union { typeof(x) __val; char __c[1]; } __u; \
|
|
if (check) \
|
|
__read_once_size(&(x), __u.__c, sizeof(x)); \
|
|
else \
|
|
__read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \
|
|
smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
|
|
__u.__val; \
|
|
})
|
|
#define READ_ONCE(x) __READ_ONCE(x, 1)
|
|
|
|
/*
|
|
* Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need
|
|
* to hide memory access from KASAN.
|
|
*/
|
|
#define READ_ONCE_NOCHECK(x) __READ_ONCE(x, 0)
|
|
|
|
static __no_kasan_or_inline
|
|
unsigned long read_word_at_a_time(const void *addr)
|
|
{
|
|
kasan_check_read(addr, 1);
|
|
return *(unsigned long *)addr;
|
|
}
|
|
|
|
#define WRITE_ONCE(x, val) \
|
|
({ \
|
|
union { typeof(x) __val; char __c[1]; } __u = \
|
|
{ .__val = (__force typeof(x)) (val) }; \
|
|
__write_once_size(&(x), __u.__c, sizeof(x)); \
|
|
__u.__val; \
|
|
})
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
/*
|
|
* Force the compiler to emit 'sym' as a symbol, so that we can reference
|
|
* it from inline assembler. Necessary in case 'sym' could be inlined
|
|
* otherwise, or eliminated entirely due to lack of references that are
|
|
* visible to the compiler.
|
|
*/
|
|
#define __ADDRESSABLE(sym) \
|
|
static void * __section(".discard.addressable") __used \
|
|
__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
|
|
|
|
/**
|
|
* offset_to_ptr - convert a relative memory offset to an absolute pointer
|
|
* @off: the address of the 32-bit offset value
|
|
*/
|
|
static inline void *offset_to_ptr(const int *off)
|
|
{
|
|
return (void *)((unsigned long)off + *off);
|
|
}
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
/* Compile time object size, -1 for unknown */
|
|
#ifndef __compiletime_object_size
|
|
# define __compiletime_object_size(obj) -1
|
|
#endif
|
|
#ifndef __compiletime_warning
|
|
# define __compiletime_warning(message)
|
|
#endif
|
|
#ifndef __compiletime_error
|
|
# define __compiletime_error(message)
|
|
#endif
|
|
|
|
#ifdef __OPTIMIZE__
|
|
# define __compiletime_assert(condition, msg, prefix, suffix) \
|
|
do { \
|
|
extern void prefix ## suffix(void) __compiletime_error(msg); \
|
|
if (!(condition)) \
|
|
prefix ## suffix(); \
|
|
} while (0)
|
|
#else
|
|
# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
|
|
#endif
|
|
|
|
#define _compiletime_assert(condition, msg, prefix, suffix) \
|
|
__compiletime_assert(condition, msg, prefix, suffix)
|
|
|
|
/**
|
|
* compiletime_assert - break build and emit msg if condition is false
|
|
* @condition: a compile-time constant condition to check
|
|
* @msg: a message to emit if condition is false
|
|
*
|
|
* In tradition of POSIX assert, this macro will break the build if the
|
|
* supplied condition is *false*, emitting the supplied error message if the
|
|
* compiler has support to do so.
|
|
*/
|
|
#define compiletime_assert(condition, msg) \
|
|
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
|
|
|
|
#define compiletime_assert_atomic_type(t) \
|
|
compiletime_assert(__native_word(t), \
|
|
"Need native word sized stores/loads for atomicity.")
|
|
|
|
/* &a[0] degrades to a pointer: a different type from an array */
|
|
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
|
|
|
|
#endif /* __LINUX_COMPILER_H */
|