mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-16 13:56:56 +07:00
bc27fb68aa
Sometimes gcc mysteriously doesn't inline very small functions we expect to be inlined. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122 With this .config: http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os, the following functions get deinlined many times. Examples of disassembly: <get_unaligned_be16> (12 copies, 51 calls): 66 8b 07 mov (%rdi),%ax 55 push %rbp 48 89 e5 mov %rsp,%rbp 86 e0 xchg %ah,%al 5d pop %rbp c3 retq <get_unaligned_be32> (12 copies, 135 calls): 8b 07 mov (%rdi),%eax 55 push %rbp 48 89 e5 mov %rsp,%rbp 0f c8 bswap %eax 5d pop %rbp c3 retq <get_unaligned_be64> (2 copies, 20 calls): 48 8b 07 mov (%rdi),%rax 55 push %rbp 48 89 e5 mov %rsp,%rbp 48 0f c8 bswap %rax 5d pop %rbp c3 retq <__swab16p> (16 copies, 146 calls): 55 push %rbp 89 f8 mov %edi,%eax 86 e0 xchg %ah,%al 48 89 e5 mov %rsp,%rbp 5d pop %rbp c3 retq <__swab32p> (43 copies, ~560 calls): 55 push %rbp 89 f8 mov %edi,%eax 0f c8 bswap %eax 48 89 e5 mov %rsp,%rbp 5d pop %rbp c3 retq <__swab64p> (21 copies, 119 calls): 55 push %rbp 48 89 f8 mov %rdi,%rax 48 0f c8 bswap %rax 48 89 e5 mov %rsp,%rbp 5d pop %rbp c3 retq <__swab32s> (6 copies, 47 calls): 8b 07 mov (%rdi),%eax 55 push %rbp 48 89 e5 mov %rsp,%rbp 0f c8 bswap %eax 89 07 mov %eax,(%rdi) 5d pop %rbp c3 retq This patch fixes this via s/inline/__always_inline/. Code size decrease after the patch is ~4.5k: text data bss dec hex filename 92202377 20826112 36417536 149446025 8e85d89 vmlinux 92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> Acked-by: Ingo Molnar <mingo@kernel.org> Cc: Thomas Graf <tgraf@suug.ch> Cc: Peter Zijlstra <peterz@infradead.org> Cc: David Rientjes <rientjes@google.com> Cc: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
106 lines
3.8 KiB
C
106 lines
3.8 KiB
C
#ifndef _UAPI_LINUX_BYTEORDER_LITTLE_ENDIAN_H
|
|
#define _UAPI_LINUX_BYTEORDER_LITTLE_ENDIAN_H
|
|
|
|
#ifndef __LITTLE_ENDIAN
|
|
#define __LITTLE_ENDIAN 1234
|
|
#endif
|
|
#ifndef __LITTLE_ENDIAN_BITFIELD
|
|
#define __LITTLE_ENDIAN_BITFIELD
|
|
#endif
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/swab.h>
|
|
|
|
#define __constant_htonl(x) ((__force __be32)___constant_swab32((x)))
|
|
#define __constant_ntohl(x) ___constant_swab32((__force __be32)(x))
|
|
#define __constant_htons(x) ((__force __be16)___constant_swab16((x)))
|
|
#define __constant_ntohs(x) ___constant_swab16((__force __be16)(x))
|
|
#define __constant_cpu_to_le64(x) ((__force __le64)(__u64)(x))
|
|
#define __constant_le64_to_cpu(x) ((__force __u64)(__le64)(x))
|
|
#define __constant_cpu_to_le32(x) ((__force __le32)(__u32)(x))
|
|
#define __constant_le32_to_cpu(x) ((__force __u32)(__le32)(x))
|
|
#define __constant_cpu_to_le16(x) ((__force __le16)(__u16)(x))
|
|
#define __constant_le16_to_cpu(x) ((__force __u16)(__le16)(x))
|
|
#define __constant_cpu_to_be64(x) ((__force __be64)___constant_swab64((x)))
|
|
#define __constant_be64_to_cpu(x) ___constant_swab64((__force __u64)(__be64)(x))
|
|
#define __constant_cpu_to_be32(x) ((__force __be32)___constant_swab32((x)))
|
|
#define __constant_be32_to_cpu(x) ___constant_swab32((__force __u32)(__be32)(x))
|
|
#define __constant_cpu_to_be16(x) ((__force __be16)___constant_swab16((x)))
|
|
#define __constant_be16_to_cpu(x) ___constant_swab16((__force __u16)(__be16)(x))
|
|
#define __cpu_to_le64(x) ((__force __le64)(__u64)(x))
|
|
#define __le64_to_cpu(x) ((__force __u64)(__le64)(x))
|
|
#define __cpu_to_le32(x) ((__force __le32)(__u32)(x))
|
|
#define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
|
|
#define __cpu_to_le16(x) ((__force __le16)(__u16)(x))
|
|
#define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
|
|
#define __cpu_to_be64(x) ((__force __be64)__swab64((x)))
|
|
#define __be64_to_cpu(x) __swab64((__force __u64)(__be64)(x))
|
|
#define __cpu_to_be32(x) ((__force __be32)__swab32((x)))
|
|
#define __be32_to_cpu(x) __swab32((__force __u32)(__be32)(x))
|
|
#define __cpu_to_be16(x) ((__force __be16)__swab16((x)))
|
|
#define __be16_to_cpu(x) __swab16((__force __u16)(__be16)(x))
|
|
|
|
static __always_inline __le64 __cpu_to_le64p(const __u64 *p)
|
|
{
|
|
return (__force __le64)*p;
|
|
}
|
|
static __always_inline __u64 __le64_to_cpup(const __le64 *p)
|
|
{
|
|
return (__force __u64)*p;
|
|
}
|
|
static __always_inline __le32 __cpu_to_le32p(const __u32 *p)
|
|
{
|
|
return (__force __le32)*p;
|
|
}
|
|
static __always_inline __u32 __le32_to_cpup(const __le32 *p)
|
|
{
|
|
return (__force __u32)*p;
|
|
}
|
|
static __always_inline __le16 __cpu_to_le16p(const __u16 *p)
|
|
{
|
|
return (__force __le16)*p;
|
|
}
|
|
static __always_inline __u16 __le16_to_cpup(const __le16 *p)
|
|
{
|
|
return (__force __u16)*p;
|
|
}
|
|
static __always_inline __be64 __cpu_to_be64p(const __u64 *p)
|
|
{
|
|
return (__force __be64)__swab64p(p);
|
|
}
|
|
static __always_inline __u64 __be64_to_cpup(const __be64 *p)
|
|
{
|
|
return __swab64p((__u64 *)p);
|
|
}
|
|
static __always_inline __be32 __cpu_to_be32p(const __u32 *p)
|
|
{
|
|
return (__force __be32)__swab32p(p);
|
|
}
|
|
static __always_inline __u32 __be32_to_cpup(const __be32 *p)
|
|
{
|
|
return __swab32p((__u32 *)p);
|
|
}
|
|
static __always_inline __be16 __cpu_to_be16p(const __u16 *p)
|
|
{
|
|
return (__force __be16)__swab16p(p);
|
|
}
|
|
static __always_inline __u16 __be16_to_cpup(const __be16 *p)
|
|
{
|
|
return __swab16p((__u16 *)p);
|
|
}
|
|
#define __cpu_to_le64s(x) do { (void)(x); } while (0)
|
|
#define __le64_to_cpus(x) do { (void)(x); } while (0)
|
|
#define __cpu_to_le32s(x) do { (void)(x); } while (0)
|
|
#define __le32_to_cpus(x) do { (void)(x); } while (0)
|
|
#define __cpu_to_le16s(x) do { (void)(x); } while (0)
|
|
#define __le16_to_cpus(x) do { (void)(x); } while (0)
|
|
#define __cpu_to_be64s(x) __swab64s((x))
|
|
#define __be64_to_cpus(x) __swab64s((x))
|
|
#define __cpu_to_be32s(x) __swab32s((x))
|
|
#define __be32_to_cpus(x) __swab32s((x))
|
|
#define __cpu_to_be16s(x) __swab16s((x))
|
|
#define __be16_to_cpus(x) __swab16s((x))
|
|
|
|
|
|
#endif /* _UAPI_LINUX_BYTEORDER_LITTLE_ENDIAN_H */
|