linux_dsm_epyc7002/include/uapi/linux/swab.h
Denys Vlasenko bc27fb68aa include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122

With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:

<get_unaligned_be16> (12 copies, 51 calls):
       66 8b 07                mov    (%rdi),%ax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       86 e0                   xchg   %ah,%al
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be32> (12 copies, 135 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be64> (2 copies, 20 calls):
       48 8b 07                mov    (%rdi),%rax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       48 0f c8                bswap  %rax
       5d                      pop    %rbp
       c3                      retq

<__swab16p> (16 copies, 146 calls):
       55                      push   %rbp
       89 f8                   mov    %edi,%eax
       86 e0                   xchg   %ah,%al
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<__swab32p> (43 copies, ~560 calls):
       55                      push   %rbp
       89 f8                   mov    %edi,%eax
       0f c8                   bswap  %eax
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<__swab64p> (21 copies, 119 calls):
       55                      push   %rbp
       48 89 f8                mov    %rdi,%rax
       48 0f c8                bswap  %rax
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<__swab32s> (6 copies, 47 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       89 07                   mov    %eax,(%rdi)
       5d                      pop    %rbp
       c3                      retq

This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:

    text     data      bss       dec     hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-17 15:09:34 -07:00

289 lines
6.5 KiB
C

#ifndef _UAPI_LINUX_SWAB_H
#define _UAPI_LINUX_SWAB_H
#include <linux/types.h>
#include <linux/compiler.h>
#include <asm/swab.h>
/*
* casts are necessary for constants, because we never know how for sure
* how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
*/
#define ___constant_swab16(x) ((__u16)( \
(((__u16)(x) & (__u16)0x00ffU) << 8) | \
(((__u16)(x) & (__u16)0xff00U) >> 8)))
#define ___constant_swab32(x) ((__u32)( \
(((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
(((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \
(((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \
(((__u32)(x) & (__u32)0xff000000UL) >> 24)))
#define ___constant_swab64(x) ((__u64)( \
(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
(((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \
(((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \
(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56)))
#define ___constant_swahw32(x) ((__u32)( \
(((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \
(((__u32)(x) & (__u32)0xffff0000UL) >> 16)))
#define ___constant_swahb32(x) ((__u32)( \
(((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \
(((__u32)(x) & (__u32)0xff00ff00UL) >> 8)))
/*
* Implement the following as inlines, but define the interface using
* macros to allow constant folding when possible:
* ___swab16, ___swab32, ___swab64, ___swahw32, ___swahb32
*/
static inline __attribute_const__ __u16 __fswab16(__u16 val)
{
#ifdef __HAVE_BUILTIN_BSWAP16__
return __builtin_bswap16(val);
#elif defined (__arch_swab16)
return __arch_swab16(val);
#else
return ___constant_swab16(val);
#endif
}
static inline __attribute_const__ __u32 __fswab32(__u32 val)
{
#ifdef __HAVE_BUILTIN_BSWAP32__
return __builtin_bswap32(val);
#elif defined(__arch_swab32)
return __arch_swab32(val);
#else
return ___constant_swab32(val);
#endif
}
static inline __attribute_const__ __u64 __fswab64(__u64 val)
{
#ifdef __HAVE_BUILTIN_BSWAP64__
return __builtin_bswap64(val);
#elif defined (__arch_swab64)
return __arch_swab64(val);
#elif defined(__SWAB_64_THRU_32__)
__u32 h = val >> 32;
__u32 l = val & ((1ULL << 32) - 1);
return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h)));
#else
return ___constant_swab64(val);
#endif
}
static inline __attribute_const__ __u32 __fswahw32(__u32 val)
{
#ifdef __arch_swahw32
return __arch_swahw32(val);
#else
return ___constant_swahw32(val);
#endif
}
static inline __attribute_const__ __u32 __fswahb32(__u32 val)
{
#ifdef __arch_swahb32
return __arch_swahb32(val);
#else
return ___constant_swahb32(val);
#endif
}
/**
* __swab16 - return a byteswapped 16-bit value
* @x: value to byteswap
*/
#define __swab16(x) \
(__builtin_constant_p((__u16)(x)) ? \
___constant_swab16(x) : \
__fswab16(x))
/**
* __swab32 - return a byteswapped 32-bit value
* @x: value to byteswap
*/
#define __swab32(x) \
(__builtin_constant_p((__u32)(x)) ? \
___constant_swab32(x) : \
__fswab32(x))
/**
* __swab64 - return a byteswapped 64-bit value
* @x: value to byteswap
*/
#define __swab64(x) \
(__builtin_constant_p((__u64)(x)) ? \
___constant_swab64(x) : \
__fswab64(x))
/**
* __swahw32 - return a word-swapped 32-bit value
* @x: value to wordswap
*
* __swahw32(0x12340000) is 0x00001234
*/
#define __swahw32(x) \
(__builtin_constant_p((__u32)(x)) ? \
___constant_swahw32(x) : \
__fswahw32(x))
/**
* __swahb32 - return a high and low byte-swapped 32-bit value
* @x: value to byteswap
*
* __swahb32(0x12345678) is 0x34127856
*/
#define __swahb32(x) \
(__builtin_constant_p((__u32)(x)) ? \
___constant_swahb32(x) : \
__fswahb32(x))
/**
* __swab16p - return a byteswapped 16-bit value from a pointer
* @p: pointer to a naturally-aligned 16-bit value
*/
static __always_inline __u16 __swab16p(const __u16 *p)
{
#ifdef __arch_swab16p
return __arch_swab16p(p);
#else
return __swab16(*p);
#endif
}
/**
* __swab32p - return a byteswapped 32-bit value from a pointer
* @p: pointer to a naturally-aligned 32-bit value
*/
static __always_inline __u32 __swab32p(const __u32 *p)
{
#ifdef __arch_swab32p
return __arch_swab32p(p);
#else
return __swab32(*p);
#endif
}
/**
* __swab64p - return a byteswapped 64-bit value from a pointer
* @p: pointer to a naturally-aligned 64-bit value
*/
static __always_inline __u64 __swab64p(const __u64 *p)
{
#ifdef __arch_swab64p
return __arch_swab64p(p);
#else
return __swab64(*p);
#endif
}
/**
* __swahw32p - return a wordswapped 32-bit value from a pointer
* @p: pointer to a naturally-aligned 32-bit value
*
* See __swahw32() for details of wordswapping.
*/
static inline __u32 __swahw32p(const __u32 *p)
{
#ifdef __arch_swahw32p
return __arch_swahw32p(p);
#else
return __swahw32(*p);
#endif
}
/**
* __swahb32p - return a high and low byteswapped 32-bit value from a pointer
* @p: pointer to a naturally-aligned 32-bit value
*
* See __swahb32() for details of high/low byteswapping.
*/
static inline __u32 __swahb32p(const __u32 *p)
{
#ifdef __arch_swahb32p
return __arch_swahb32p(p);
#else
return __swahb32(*p);
#endif
}
/**
* __swab16s - byteswap a 16-bit value in-place
* @p: pointer to a naturally-aligned 16-bit value
*/
static inline void __swab16s(__u16 *p)
{
#ifdef __arch_swab16s
__arch_swab16s(p);
#else
*p = __swab16p(p);
#endif
}
/**
* __swab32s - byteswap a 32-bit value in-place
* @p: pointer to a naturally-aligned 32-bit value
*/
static __always_inline void __swab32s(__u32 *p)
{
#ifdef __arch_swab32s
__arch_swab32s(p);
#else
*p = __swab32p(p);
#endif
}
/**
* __swab64s - byteswap a 64-bit value in-place
* @p: pointer to a naturally-aligned 64-bit value
*/
static __always_inline void __swab64s(__u64 *p)
{
#ifdef __arch_swab64s
__arch_swab64s(p);
#else
*p = __swab64p(p);
#endif
}
/**
* __swahw32s - wordswap a 32-bit value in-place
* @p: pointer to a naturally-aligned 32-bit value
*
* See __swahw32() for details of wordswapping
*/
static inline void __swahw32s(__u32 *p)
{
#ifdef __arch_swahw32s
__arch_swahw32s(p);
#else
*p = __swahw32p(p);
#endif
}
/**
* __swahb32s - high and low byteswap a 32-bit value in-place
* @p: pointer to a naturally-aligned 32-bit value
*
* See __swahb32() for details of high and low byte swapping
*/
static inline void __swahb32s(__u32 *p)
{
#ifdef __arch_swahb32s
__arch_swahb32s(p);
#else
*p = __swahb32p(p);
#endif
}
#endif /* _UAPI_LINUX_SWAB_H */