Commit 0d74c42f788caf3cad727c61c490d9459bc8918b
Committed by
David S. Miller
1 parent
5cc208becb
Exists in
master
and in
16 other branches
ether_addr_equal: Optimize implementation, remove unused compare_ether_addr
Add a new check for CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS to reduce the number of or's used in the ether_addr_equal comparison to very slightly improve function performance. Simplify the ether_addr_equal_64bits implementation. Integrate and remove the zap_last_2bytes helper as it's now used only once. Remove the now unused compare_ether_addr function. Update the unaligned-memory-access documentation to remove the compare_ether_addr description and show how unaligned accesses could occur with ether_addr_equal. Signed-off-by: Joe Perches <joe@perches.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 2 changed files with 37 additions and 42 deletions Side-by-side Diff
Documentation/unaligned-memory-access.txt
... | ... | @@ -137,24 +137,34 @@ |
137 | 137 | ================================= |
138 | 138 | |
139 | 139 | With the above in mind, let's move onto a real life example of a function |
140 | -that can cause an unaligned memory access. The following function adapted | |
140 | +that can cause an unaligned memory access. The following function taken | |
141 | 141 | from include/linux/etherdevice.h is an optimized routine to compare two |
142 | 142 | ethernet MAC addresses for equality. |
143 | 143 | |
144 | -unsigned int compare_ether_addr(const u8 *addr1, const u8 *addr2) | |
144 | +bool ether_addr_equal(const u8 *addr1, const u8 *addr2) | |
145 | 145 | { |
146 | - const u16 *a = (const u16 *) addr1; | |
147 | - const u16 *b = (const u16 *) addr2; | |
146 | +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | |
147 | + u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) | | |
148 | + ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4))); | |
149 | + | |
150 | + return fold == 0; | |
151 | +#else | |
152 | + const u16 *a = (const u16 *)addr1; | |
153 | + const u16 *b = (const u16 *)addr2; | |
148 | 154 | return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; |
155 | +#endif | |
149 | 156 | } |
150 | 157 | |
151 | -In the above function, the reference to a[0] causes 2 bytes (16 bits) to | |
152 | -be read from memory starting at address addr1. Think about what would happen | |
153 | -if addr1 was an odd address such as 0x10003. (Hint: it'd be an unaligned | |
154 | -access.) | |
158 | +In the above function, when the hardware has efficient unaligned access | |
159 | +capability, there is no issue with this code. But when the hardware isn't | |
160 | +able to access memory on arbitrary boundaries, the reference to a[0] causes | |
161 | +2 bytes (16 bits) to be read from memory starting at address addr1. | |
155 | 162 | |
163 | +Think about what would happen if addr1 was an odd address such as 0x10003. | |
164 | +(Hint: it'd be an unaligned access.) | |
165 | + | |
156 | 166 | Despite the potential unaligned access problems with the above function, it |
157 | -is included in the kernel anyway but is understood to only work on | |
167 | +is included in the kernel anyway but is understood to only work normally on | |
158 | 168 | 16-bit-aligned addresses. It is up to the caller to ensure this alignment or |
159 | 169 | not use this function at all. This alignment-unsafe function is still useful |
160 | 170 | as it is a decent optimization for the cases when you can ensure alignment, |
include/linux/etherdevice.h
... | ... | @@ -26,6 +26,7 @@ |
26 | 26 | #include <linux/netdevice.h> |
27 | 27 | #include <linux/random.h> |
28 | 28 | #include <asm/unaligned.h> |
29 | +#include <asm/bitsperlong.h> | |
29 | 30 | |
30 | 31 | #ifdef __KERNEL__ |
31 | 32 | __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); |
32 | 33 | |
33 | 34 | |
34 | 35 | |
35 | 36 | |
... | ... | @@ -211,40 +212,26 @@ |
211 | 212 | } |
212 | 213 | |
213 | 214 | /** |
214 | - * compare_ether_addr - Compare two Ethernet addresses | |
215 | - * @addr1: Pointer to a six-byte array containing the Ethernet address | |
216 | - * @addr2: Pointer other six-byte array containing the Ethernet address | |
217 | - * | |
218 | - * Compare two Ethernet addresses, returns 0 if equal, non-zero otherwise. | |
219 | - * Unlike memcmp(), it doesn't return a value suitable for sorting. | |
220 | - */ | |
221 | -static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2) | |
222 | -{ | |
223 | - const u16 *a = (const u16 *) addr1; | |
224 | - const u16 *b = (const u16 *) addr2; | |
225 | - | |
226 | - BUILD_BUG_ON(ETH_ALEN != 6); | |
227 | - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; | |
228 | -} | |
229 | - | |
230 | -/** | |
231 | 215 | * ether_addr_equal - Compare two Ethernet addresses |
232 | 216 | * @addr1: Pointer to a six-byte array containing the Ethernet address |
233 | 217 | * @addr2: Pointer other six-byte array containing the Ethernet address |
234 | 218 | * |
235 | 219 | * Compare two Ethernet addresses, returns true if equal |
220 | + * | |
221 | + * Please note: addr1 & addr2 must both be aligned to u16. | |
236 | 222 | */ |
237 | 223 | static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) |
238 | 224 | { |
239 | - return !compare_ether_addr(addr1, addr2); | |
240 | -} | |
225 | +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | |
226 | + u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) | | |
227 | + ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4))); | |
241 | 228 | |
242 | -static inline unsigned long zap_last_2bytes(unsigned long value) | |
243 | -{ | |
244 | -#ifdef __BIG_ENDIAN | |
245 | - return value >> 16; | |
229 | + return fold == 0; | |
246 | 230 | #else |
247 | - return value << 16; | |
231 | + const u16 *a = (const u16 *)addr1; | |
232 | + const u16 *b = (const u16 *)addr2; | |
233 | + | |
234 | + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0; | |
248 | 235 | #endif |
249 | 236 | } |
250 | 237 | |
251 | 238 | |
... | ... | @@ -265,16 +252,14 @@ |
265 | 252 | static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], |
266 | 253 | const u8 addr2[6+2]) |
267 | 254 | { |
268 | -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | |
269 | - unsigned long fold = ((*(unsigned long *)addr1) ^ | |
270 | - (*(unsigned long *)addr2)); | |
255 | +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 | |
256 | + u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); | |
271 | 257 | |
272 | - if (sizeof(fold) == 8) | |
273 | - return zap_last_2bytes(fold) == 0; | |
274 | - | |
275 | - fold |= zap_last_2bytes((*(unsigned long *)(addr1 + 4)) ^ | |
276 | - (*(unsigned long *)(addr2 + 4))); | |
277 | - return fold == 0; | |
258 | +#ifdef __BIG_ENDIAN | |
259 | + return (fold >> 16) == 0; | |
260 | +#else | |
261 | + return (fold << 16) == 0; | |
262 | +#endif | |
278 | 263 | #else |
279 | 264 | return ether_addr_equal(addr1, addr2); |
280 | 265 | #endif |