mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-12 21:16:19 +07:00
bc27fb68aa
Sometimes gcc mysteriously doesn't inline very small functions we expect to be inlined. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122 With this .config: http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os, the following functions get deinlined many times. Examples of disassembly: <get_unaligned_be16> (12 copies, 51 calls): 66 8b 07 mov (%rdi),%ax 55 push %rbp 48 89 e5 mov %rsp,%rbp 86 e0 xchg %ah,%al 5d pop %rbp c3 retq <get_unaligned_be32> (12 copies, 135 calls): 8b 07 mov (%rdi),%eax 55 push %rbp 48 89 e5 mov %rsp,%rbp 0f c8 bswap %eax 5d pop %rbp c3 retq <get_unaligned_be64> (2 copies, 20 calls): 48 8b 07 mov (%rdi),%rax 55 push %rbp 48 89 e5 mov %rsp,%rbp 48 0f c8 bswap %rax 5d pop %rbp c3 retq <__swab16p> (16 copies, 146 calls): 55 push %rbp 89 f8 mov %edi,%eax 86 e0 xchg %ah,%al 48 89 e5 mov %rsp,%rbp 5d pop %rbp c3 retq <__swab32p> (43 copies, ~560 calls): 55 push %rbp 89 f8 mov %edi,%eax 0f c8 bswap %eax 48 89 e5 mov %rsp,%rbp 5d pop %rbp c3 retq <__swab64p> (21 copies, 119 calls): 55 push %rbp 48 89 f8 mov %rdi,%rax 48 0f c8 bswap %rax 48 89 e5 mov %rsp,%rbp 5d pop %rbp c3 retq <__swab32s> (6 copies, 47 calls): 8b 07 mov (%rdi),%eax 55 push %rbp 48 89 e5 mov %rsp,%rbp 0f c8 bswap %eax 89 07 mov %eax,(%rdi) 5d pop %rbp c3 retq This patch fixes this via s/inline/__always_inline/. Code size decrease after the patch is ~4.5k: text data bss dec hex filename 92202377 20826112 36417536 149446025 8e85d89 vmlinux 92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> Acked-by: Ingo Molnar <mingo@kernel.org> Cc: Thomas Graf <tgraf@suug.ch> Cc: Peter Zijlstra <peterz@infradead.org> Cc: David Rientjes <rientjes@google.com> Cc: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
289 lines
6.5 KiB
C
289 lines
6.5 KiB
C
#ifndef _UAPI_LINUX_SWAB_H
|
|
#define _UAPI_LINUX_SWAB_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/compiler.h>
|
|
#include <asm/swab.h>
|
|
|
|
/*
|
|
* casts are necessary for constants, because we never know how for sure
|
|
* how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
|
|
*/
|
|
#define ___constant_swab16(x) ((__u16)( \
|
|
(((__u16)(x) & (__u16)0x00ffU) << 8) | \
|
|
(((__u16)(x) & (__u16)0xff00U) >> 8)))
|
|
|
|
#define ___constant_swab32(x) ((__u32)( \
|
|
(((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
|
|
(((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \
|
|
(((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \
|
|
(((__u32)(x) & (__u32)0xff000000UL) >> 24)))
|
|
|
|
#define ___constant_swab64(x) ((__u64)( \
|
|
(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
|
|
(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
|
|
(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
|
|
(((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \
|
|
(((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \
|
|
(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
|
|
(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
|
|
(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56)))
|
|
|
|
#define ___constant_swahw32(x) ((__u32)( \
|
|
(((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \
|
|
(((__u32)(x) & (__u32)0xffff0000UL) >> 16)))
|
|
|
|
#define ___constant_swahb32(x) ((__u32)( \
|
|
(((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \
|
|
(((__u32)(x) & (__u32)0xff00ff00UL) >> 8)))
|
|
|
|
/*
|
|
* Implement the following as inlines, but define the interface using
|
|
* macros to allow constant folding when possible:
|
|
* ___swab16, ___swab32, ___swab64, ___swahw32, ___swahb32
|
|
*/
|
|
|
|
static inline __attribute_const__ __u16 __fswab16(__u16 val)
|
|
{
|
|
#ifdef __HAVE_BUILTIN_BSWAP16__
|
|
return __builtin_bswap16(val);
|
|
#elif defined (__arch_swab16)
|
|
return __arch_swab16(val);
|
|
#else
|
|
return ___constant_swab16(val);
|
|
#endif
|
|
}
|
|
|
|
static inline __attribute_const__ __u32 __fswab32(__u32 val)
|
|
{
|
|
#ifdef __HAVE_BUILTIN_BSWAP32__
|
|
return __builtin_bswap32(val);
|
|
#elif defined(__arch_swab32)
|
|
return __arch_swab32(val);
|
|
#else
|
|
return ___constant_swab32(val);
|
|
#endif
|
|
}
|
|
|
|
static inline __attribute_const__ __u64 __fswab64(__u64 val)
|
|
{
|
|
#ifdef __HAVE_BUILTIN_BSWAP64__
|
|
return __builtin_bswap64(val);
|
|
#elif defined (__arch_swab64)
|
|
return __arch_swab64(val);
|
|
#elif defined(__SWAB_64_THRU_32__)
|
|
__u32 h = val >> 32;
|
|
__u32 l = val & ((1ULL << 32) - 1);
|
|
return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h)));
|
|
#else
|
|
return ___constant_swab64(val);
|
|
#endif
|
|
}
|
|
|
|
static inline __attribute_const__ __u32 __fswahw32(__u32 val)
|
|
{
|
|
#ifdef __arch_swahw32
|
|
return __arch_swahw32(val);
|
|
#else
|
|
return ___constant_swahw32(val);
|
|
#endif
|
|
}
|
|
|
|
static inline __attribute_const__ __u32 __fswahb32(__u32 val)
|
|
{
|
|
#ifdef __arch_swahb32
|
|
return __arch_swahb32(val);
|
|
#else
|
|
return ___constant_swahb32(val);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __swab16 - return a byteswapped 16-bit value
|
|
* @x: value to byteswap
|
|
*/
|
|
#define __swab16(x) \
|
|
(__builtin_constant_p((__u16)(x)) ? \
|
|
___constant_swab16(x) : \
|
|
__fswab16(x))
|
|
|
|
/**
|
|
* __swab32 - return a byteswapped 32-bit value
|
|
* @x: value to byteswap
|
|
*/
|
|
#define __swab32(x) \
|
|
(__builtin_constant_p((__u32)(x)) ? \
|
|
___constant_swab32(x) : \
|
|
__fswab32(x))
|
|
|
|
/**
|
|
* __swab64 - return a byteswapped 64-bit value
|
|
* @x: value to byteswap
|
|
*/
|
|
#define __swab64(x) \
|
|
(__builtin_constant_p((__u64)(x)) ? \
|
|
___constant_swab64(x) : \
|
|
__fswab64(x))
|
|
|
|
/**
|
|
* __swahw32 - return a word-swapped 32-bit value
|
|
* @x: value to wordswap
|
|
*
|
|
* __swahw32(0x12340000) is 0x00001234
|
|
*/
|
|
#define __swahw32(x) \
|
|
(__builtin_constant_p((__u32)(x)) ? \
|
|
___constant_swahw32(x) : \
|
|
__fswahw32(x))
|
|
|
|
/**
|
|
* __swahb32 - return a high and low byte-swapped 32-bit value
|
|
* @x: value to byteswap
|
|
*
|
|
* __swahb32(0x12345678) is 0x34127856
|
|
*/
|
|
#define __swahb32(x) \
|
|
(__builtin_constant_p((__u32)(x)) ? \
|
|
___constant_swahb32(x) : \
|
|
__fswahb32(x))
|
|
|
|
/**
|
|
* __swab16p - return a byteswapped 16-bit value from a pointer
|
|
* @p: pointer to a naturally-aligned 16-bit value
|
|
*/
|
|
static __always_inline __u16 __swab16p(const __u16 *p)
|
|
{
|
|
#ifdef __arch_swab16p
|
|
return __arch_swab16p(p);
|
|
#else
|
|
return __swab16(*p);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __swab32p - return a byteswapped 32-bit value from a pointer
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
*/
|
|
static __always_inline __u32 __swab32p(const __u32 *p)
|
|
{
|
|
#ifdef __arch_swab32p
|
|
return __arch_swab32p(p);
|
|
#else
|
|
return __swab32(*p);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __swab64p - return a byteswapped 64-bit value from a pointer
|
|
* @p: pointer to a naturally-aligned 64-bit value
|
|
*/
|
|
static __always_inline __u64 __swab64p(const __u64 *p)
|
|
{
|
|
#ifdef __arch_swab64p
|
|
return __arch_swab64p(p);
|
|
#else
|
|
return __swab64(*p);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __swahw32p - return a wordswapped 32-bit value from a pointer
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
*
|
|
* See __swahw32() for details of wordswapping.
|
|
*/
|
|
static inline __u32 __swahw32p(const __u32 *p)
|
|
{
|
|
#ifdef __arch_swahw32p
|
|
return __arch_swahw32p(p);
|
|
#else
|
|
return __swahw32(*p);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __swahb32p - return a high and low byteswapped 32-bit value from a pointer
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
*
|
|
* See __swahb32() for details of high/low byteswapping.
|
|
*/
|
|
static inline __u32 __swahb32p(const __u32 *p)
|
|
{
|
|
#ifdef __arch_swahb32p
|
|
return __arch_swahb32p(p);
|
|
#else
|
|
return __swahb32(*p);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __swab16s - byteswap a 16-bit value in-place
|
|
* @p: pointer to a naturally-aligned 16-bit value
|
|
*/
|
|
static inline void __swab16s(__u16 *p)
|
|
{
|
|
#ifdef __arch_swab16s
|
|
__arch_swab16s(p);
|
|
#else
|
|
*p = __swab16p(p);
|
|
#endif
|
|
}
|
|
/**
|
|
* __swab32s - byteswap a 32-bit value in-place
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
*/
|
|
static __always_inline void __swab32s(__u32 *p)
|
|
{
|
|
#ifdef __arch_swab32s
|
|
__arch_swab32s(p);
|
|
#else
|
|
*p = __swab32p(p);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __swab64s - byteswap a 64-bit value in-place
|
|
* @p: pointer to a naturally-aligned 64-bit value
|
|
*/
|
|
static __always_inline void __swab64s(__u64 *p)
|
|
{
|
|
#ifdef __arch_swab64s
|
|
__arch_swab64s(p);
|
|
#else
|
|
*p = __swab64p(p);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __swahw32s - wordswap a 32-bit value in-place
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
*
|
|
* See __swahw32() for details of wordswapping
|
|
*/
|
|
static inline void __swahw32s(__u32 *p)
|
|
{
|
|
#ifdef __arch_swahw32s
|
|
__arch_swahw32s(p);
|
|
#else
|
|
*p = __swahw32p(p);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* __swahb32s - high and low byteswap a 32-bit value in-place
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
*
|
|
* See __swahb32() for details of high and low byte swapping
|
|
*/
|
|
static inline void __swahb32s(__u32 *p)
|
|
{
|
|
#ifdef __arch_swahb32s
|
|
__arch_swahb32s(p);
|
|
#else
|
|
*p = __swahb32p(p);
|
|
#endif
|
|
}
|
|
|
|
|
|
#endif /* _UAPI_LINUX_SWAB_H */
|