Commit cf66bb93e0f75e0a4ba1ec070692618fa028e994
1 parent
27d7c2a006
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
byteorder: allow arch to opt to use GCC intrinsics for byteswapping
Since GCC 4.4, there have been __builtin_bswap32() and __builtin_bswap16() intrinsics. A __builtin_bswap16() came a little later (4.6 for PowerPC, 48 for other platforms). By using these instead of the inline assembler that most architectures have in their __arch_swabXX() macros, we let the compiler see what's actually happening. The resulting code should be at least as good, and much *better* in the cases where it can be combined with a nearby load or store, using a load-and-byteswap or store-and-byteswap instruction (e.g. lwbrx/stwbrx on PowerPC, movbe on Atom). When GCC is sufficiently recent *and* the architecture opts in to using the intrinsics by setting CONFIG_ARCH_USE_BUILTIN_BSWAP, they will be used in preference to the __arch_swabXX() macros. An architecture which does not set ARCH_USE_BUILTIN_BSWAP will continue to use its own hand-crafted macros. Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Showing 4 changed files with 45 additions and 3 deletions Side-by-side Diff
arch/Kconfig
... | ... | @@ -112,6 +112,25 @@ |
112 | 112 | See Documentation/unaligned-memory-access.txt for more |
113 | 113 | information on the topic of unaligned memory accesses. |
114 | 114 | |
115 | +config ARCH_USE_BUILTIN_BSWAP | |
116 | + bool | |
117 | + help | |
118 | + Modern versions of GCC (since 4.4) have builtin functions | |
119 | + for handling byte-swapping. Using these, instead of the old | |
120 | + inline assembler that the architecture code provides in the | |
121 | + __arch_bswapXX() macros, allows the compiler to see what's | |
122 | + happening and offers more opportunity for optimisation. In | |
123 | + particular, the compiler will be able to combine the byteswap | |
124 | + with a nearby load or store and use load-and-swap or | |
125 | + store-and-swap instructions if the architecture has them. It | |
126 | + should almost *never* result in code which is worse than the | |
127 | + hand-coded assembler in <asm/swab.h>. But just in case it | |
128 | + does, the use of the builtins is optional. | |
129 | + | |
130 | + Any architecture with load-and-swap or store-and-swap | |
131 | + instructions should set this. And it shouldn't hurt to set it | |
132 | + on architectures that don't have such instructions. | |
133 | + | |
115 | 134 | config HAVE_SYSCALL_WRAPPERS |
116 | 135 | bool |
117 | 136 |
include/linux/compiler-gcc4.h
... | ... | @@ -63,4 +63,14 @@ |
63 | 63 | #define __compiletime_warning(message) __attribute__((warning(message))) |
64 | 64 | #define __compiletime_error(message) __attribute__((error(message))) |
65 | 65 | #endif |
66 | + | |
67 | +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP | |
68 | +#if __GNUC_MINOR__ >= 4 | |
69 | +#define __HAVE_BUILTIN_BSWAP32__ | |
70 | +#define __HAVE_BUILTIN_BSWAP64__ | |
71 | +#endif | |
72 | +#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6) | |
73 | +#define __HAVE_BUILTIN_BSWAP16__ | |
74 | +#endif | |
75 | +#endif |
include/linux/compiler-intel.h
include/uapi/linux/swab.h
... | ... | @@ -45,7 +45,9 @@ |
45 | 45 | |
46 | 46 | static inline __attribute_const__ __u16 __fswab16(__u16 val) |
47 | 47 | { |
48 | -#ifdef __arch_swab16 | |
48 | +#ifdef __HAVE_BUILTIN_BSWAP16__ | |
49 | + return __builtin_bswap16(val); | |
50 | +#elif defined (__arch_swab16) | |
49 | 51 | return __arch_swab16(val); |
50 | 52 | #else |
51 | 53 | return ___constant_swab16(val); |
... | ... | @@ -54,7 +56,9 @@ |
54 | 56 | |
55 | 57 | static inline __attribute_const__ __u32 __fswab32(__u32 val) |
56 | 58 | { |
57 | -#ifdef __arch_swab32 | |
59 | +#ifdef __HAVE_BUILTIN_BSWAP32__ | |
60 | + return __builtin_bswap32(val); | |
61 | +#elif defined(__arch_swab32) | |
58 | 62 | return __arch_swab32(val); |
59 | 63 | #else |
60 | 64 | return ___constant_swab32(val); |
... | ... | @@ -63,7 +67,9 @@ |
63 | 67 | |
64 | 68 | static inline __attribute_const__ __u64 __fswab64(__u64 val) |
65 | 69 | { |
66 | -#ifdef __arch_swab64 | |
70 | +#ifdef __HAVE_BUILTIN_BSWAP64__ | |
71 | + return __builtin_bswap64(val); | |
72 | +#elif defined (__arch_swab64) | |
67 | 73 | return __arch_swab64(val); |
68 | 74 | #elif defined(__SWAB_64_THRU_32__) |
69 | 75 | __u32 h = val >> 32; |