2012-10-13 16:46:48 +07:00
|
|
|
#ifndef _UAPI_LINUX_SWAB_H
|
|
|
|
#define _UAPI_LINUX_SWAB_H
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <asm/swab.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* casts are necessary for constants, because we never know how for sure
|
|
|
|
* how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
|
|
|
|
*/
|
|
|
|
#define ___constant_swab16(x) ((__u16)( \
|
|
|
|
(((__u16)(x) & (__u16)0x00ffU) << 8) | \
|
|
|
|
(((__u16)(x) & (__u16)0xff00U) >> 8)))
|
|
|
|
|
|
|
|
#define ___constant_swab32(x) ((__u32)( \
|
|
|
|
(((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
|
|
|
|
(((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \
|
|
|
|
(((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \
|
|
|
|
(((__u32)(x) & (__u32)0xff000000UL) >> 24)))
|
|
|
|
|
|
|
|
#define ___constant_swab64(x) ((__u64)( \
|
|
|
|
(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
|
|
|
|
(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
|
|
|
|
(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
|
|
|
|
(((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \
|
|
|
|
(((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \
|
|
|
|
(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
|
|
|
|
(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
|
|
|
|
(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56)))
|
|
|
|
|
|
|
|
#define ___constant_swahw32(x) ((__u32)( \
|
|
|
|
(((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \
|
|
|
|
(((__u32)(x) & (__u32)0xffff0000UL) >> 16)))
|
|
|
|
|
|
|
|
#define ___constant_swahb32(x) ((__u32)( \
|
|
|
|
(((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \
|
|
|
|
(((__u32)(x) & (__u32)0xff00ff00UL) >> 8)))
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Implement the following as inlines, but define the interface using
|
|
|
|
* macros to allow constant folding when possible:
|
|
|
|
* ___swab16, ___swab32, ___swab64, ___swahw32, ___swahb32
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u16 __fswab16(__u16 val)
|
|
|
|
{
|
2016-05-06 06:22:39 +07:00
|
|
|
#if defined (__arch_swab16)
|
2012-10-13 16:46:48 +07:00
|
|
|
return __arch_swab16(val);
|
|
|
|
#else
|
|
|
|
return ___constant_swab16(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u32 __fswab32(__u32 val)
|
|
|
|
{
|
2016-05-06 06:22:39 +07:00
|
|
|
#if defined(__arch_swab32)
|
2012-10-13 16:46:48 +07:00
|
|
|
return __arch_swab32(val);
|
|
|
|
#else
|
|
|
|
return ___constant_swab32(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u64 __fswab64(__u64 val)
|
|
|
|
{
|
2016-05-06 06:22:39 +07:00
|
|
|
#if defined (__arch_swab64)
|
2012-10-13 16:46:48 +07:00
|
|
|
return __arch_swab64(val);
|
|
|
|
#elif defined(__SWAB_64_THRU_32__)
|
|
|
|
__u32 h = val >> 32;
|
|
|
|
__u32 l = val & ((1ULL << 32) - 1);
|
|
|
|
return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h)));
|
|
|
|
#else
|
|
|
|
return ___constant_swab64(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u32 __fswahw32(__u32 val)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahw32
|
|
|
|
return __arch_swahw32(val);
|
|
|
|
#else
|
|
|
|
return ___constant_swahw32(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __attribute_const__ __u32 __fswahb32(__u32 val)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahb32
|
|
|
|
return __arch_swahb32(val);
|
|
|
|
#else
|
|
|
|
return ___constant_swahb32(val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab16 - return a byteswapped 16-bit value
|
|
|
|
* @x: value to byteswap
|
|
|
|
*/
|
2016-05-06 06:22:39 +07:00
|
|
|
#ifdef __HAVE_BUILTIN_BSWAP16__
|
|
|
|
#define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
|
|
|
|
#else
|
2012-10-13 16:46:48 +07:00
|
|
|
#define __swab16(x) \
|
|
|
|
(__builtin_constant_p((__u16)(x)) ? \
|
|
|
|
___constant_swab16(x) : \
|
|
|
|
__fswab16(x))
|
2016-05-06 06:22:39 +07:00
|
|
|
#endif
|
2012-10-13 16:46:48 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab32 - return a byteswapped 32-bit value
|
|
|
|
* @x: value to byteswap
|
|
|
|
*/
|
2016-05-06 06:22:39 +07:00
|
|
|
#ifdef __HAVE_BUILTIN_BSWAP32__
|
|
|
|
#define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
|
|
|
|
#else
|
2012-10-13 16:46:48 +07:00
|
|
|
#define __swab32(x) \
|
|
|
|
(__builtin_constant_p((__u32)(x)) ? \
|
|
|
|
___constant_swab32(x) : \
|
|
|
|
__fswab32(x))
|
2016-05-06 06:22:39 +07:00
|
|
|
#endif
|
2012-10-13 16:46:48 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab64 - return a byteswapped 64-bit value
|
|
|
|
* @x: value to byteswap
|
|
|
|
*/
|
2016-05-06 06:22:39 +07:00
|
|
|
#ifdef __HAVE_BUILTIN_BSWAP64__
|
|
|
|
#define __swab64(x) (__u64)__builtin_bswap64((__u64)(x))
|
|
|
|
#else
|
2012-10-13 16:46:48 +07:00
|
|
|
#define __swab64(x) \
|
|
|
|
(__builtin_constant_p((__u64)(x)) ? \
|
|
|
|
___constant_swab64(x) : \
|
|
|
|
__fswab64(x))
|
2016-05-06 06:22:39 +07:00
|
|
|
#endif
|
2012-10-13 16:46:48 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahw32 - return a word-swapped 32-bit value
|
|
|
|
* @x: value to wordswap
|
|
|
|
*
|
|
|
|
* __swahw32(0x12340000) is 0x00001234
|
|
|
|
*/
|
|
|
|
#define __swahw32(x) \
|
|
|
|
(__builtin_constant_p((__u32)(x)) ? \
|
|
|
|
___constant_swahw32(x) : \
|
|
|
|
__fswahw32(x))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahb32 - return a high and low byte-swapped 32-bit value
|
|
|
|
* @x: value to byteswap
|
|
|
|
*
|
|
|
|
* __swahb32(0x12345678) is 0x34127856
|
|
|
|
*/
|
|
|
|
#define __swahb32(x) \
|
|
|
|
(__builtin_constant_p((__u32)(x)) ? \
|
|
|
|
___constant_swahb32(x) : \
|
|
|
|
__fswahb32(x))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab16p - return a byteswapped 16-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 16-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-18 04:22:44 +07:00
|
|
|
static __always_inline __u16 __swab16p(const __u16 *p)
|
2012-10-13 16:46:48 +07:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab16p
|
|
|
|
return __arch_swab16p(p);
|
|
|
|
#else
|
|
|
|
return __swab16(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab32p - return a byteswapped 32-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-18 04:22:44 +07:00
|
|
|
static __always_inline __u32 __swab32p(const __u32 *p)
|
2012-10-13 16:46:48 +07:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab32p
|
|
|
|
return __arch_swab32p(p);
|
|
|
|
#else
|
|
|
|
return __swab32(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab64p - return a byteswapped 64-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 64-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-18 04:22:44 +07:00
|
|
|
static __always_inline __u64 __swab64p(const __u64 *p)
|
2012-10-13 16:46:48 +07:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab64p
|
|
|
|
return __arch_swab64p(p);
|
|
|
|
#else
|
|
|
|
return __swab64(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahw32p - return a wordswapped 32-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*
|
|
|
|
* See __swahw32() for details of wordswapping.
|
|
|
|
*/
|
|
|
|
static inline __u32 __swahw32p(const __u32 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahw32p
|
|
|
|
return __arch_swahw32p(p);
|
|
|
|
#else
|
|
|
|
return __swahw32(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahb32p - return a high and low byteswapped 32-bit value from a pointer
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*
|
|
|
|
* See __swahb32() for details of high/low byteswapping.
|
|
|
|
*/
|
|
|
|
static inline __u32 __swahb32p(const __u32 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahb32p
|
|
|
|
return __arch_swahb32p(p);
|
|
|
|
#else
|
|
|
|
return __swahb32(*p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab16s - byteswap a 16-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 16-bit value
|
|
|
|
*/
|
|
|
|
static inline void __swab16s(__u16 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swab16s
|
|
|
|
__arch_swab16s(p);
|
|
|
|
#else
|
|
|
|
*p = __swab16p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
/**
|
|
|
|
* __swab32s - byteswap a 32-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-18 04:22:44 +07:00
|
|
|
static __always_inline void __swab32s(__u32 *p)
|
2012-10-13 16:46:48 +07:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab32s
|
|
|
|
__arch_swab32s(p);
|
|
|
|
#else
|
|
|
|
*p = __swab32p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swab64s - byteswap a 64-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 64-bit value
|
|
|
|
*/
|
include/uapi/linux/byteorder, swab: force inlining of some byteswap operations
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122
With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:
<get_unaligned_be16> (12 copies, 51 calls):
66 8b 07 mov (%rdi),%ax
55 push %rbp
48 89 e5 mov %rsp,%rbp
86 e0 xchg %ah,%al
5d pop %rbp
c3 retq
<get_unaligned_be32> (12 copies, 135 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
5d pop %rbp
c3 retq
<get_unaligned_be64> (2 copies, 20 calls):
48 8b 07 mov (%rdi),%rax
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 0f c8 bswap %rax
5d pop %rbp
c3 retq
<__swab16p> (16 copies, 146 calls):
55 push %rbp
89 f8 mov %edi,%eax
86 e0 xchg %ah,%al
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32p> (43 copies, ~560 calls):
55 push %rbp
89 f8 mov %edi,%eax
0f c8 bswap %eax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab64p> (21 copies, 119 calls):
55 push %rbp
48 89 f8 mov %rdi,%rax
48 0f c8 bswap %rax
48 89 e5 mov %rsp,%rbp
5d pop %rbp
c3 retq
<__swab32s> (6 copies, 47 calls):
8b 07 mov (%rdi),%eax
55 push %rbp
48 89 e5 mov %rsp,%rbp
0f c8 bswap %eax
89 07 mov %eax,(%rdi)
5d pop %rbp
c3 retq
This patch fixes this via s/inline/__always_inline/.
Code size decrease after the patch is ~4.5k:
text data bss dec hex filename
92202377 20826112 36417536 149446025 8e85d89 vmlinux
92197848 20826112 36417536 149441496 8e84bd8 vmlinux5_swap_after
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-18 04:22:44 +07:00
|
|
|
static __always_inline void __swab64s(__u64 *p)
|
2012-10-13 16:46:48 +07:00
|
|
|
{
|
|
|
|
#ifdef __arch_swab64s
|
|
|
|
__arch_swab64s(p);
|
|
|
|
#else
|
|
|
|
*p = __swab64p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahw32s - wordswap a 32-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*
|
|
|
|
* See __swahw32() for details of wordswapping
|
|
|
|
*/
|
|
|
|
static inline void __swahw32s(__u32 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahw32s
|
|
|
|
__arch_swahw32s(p);
|
|
|
|
#else
|
|
|
|
*p = __swahw32p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __swahb32s - high and low byteswap a 32-bit value in-place
|
|
|
|
* @p: pointer to a naturally-aligned 32-bit value
|
|
|
|
*
|
|
|
|
* See __swahb32() for details of high and low byte swapping
|
|
|
|
*/
|
|
|
|
static inline void __swahb32s(__u32 *p)
|
|
|
|
{
|
|
|
|
#ifdef __arch_swahb32s
|
|
|
|
__arch_swahb32s(p);
|
|
|
|
#else
|
|
|
|
*p = __swahb32p(p);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* _UAPI_LINUX_SWAB_H */
|