Commit 1f87e235e6fb92c2968b52b9191de04f1aff8e77

Authored by Eric Dumazet
Committed by David S. Miller
1 parent 70eb1bfd52

eth: Declare an optimized compare_ether_addr_64bits() function

Linus mentioned we could try to perform long word operations, even
on potentially unaligned addresses, on x86 at least. David mentioned
the HAVE_EFFICIENT_UNALIGNED_ACCESS test to handle this on all
arches that have efficient unailgned accesses.

I tried this idea and got nice assembly on 32 bits:

158:   33 82 38 01 00 00       xor    0x138(%edx),%eax
15e:   33 8a 34 01 00 00       xor    0x134(%edx),%ecx
164:   c1 e0 10                shl    $0x10,%eax
167:   09 c1                   or     %eax,%ecx
169:   74 0b                   je     176 <eth_type_trans+0x87>

And very nice assembly on 64 bits of course (one xor, one shl)

Nice oprofile improvement in eth_type_trans(), 0.17 % instead of 0.41 %,
expected since we remove 8 instructions on a fast path.

This patch implements a compare_ether_addr_64bits() function, that
uses the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS ifdef to efficiently
perform the 6 bytes comparison on all capable arches.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 2 changed files with 45 additions and 3 deletions Side-by-side Diff

include/linux/etherdevice.h
... ... @@ -27,6 +27,7 @@
27 27 #include <linux/if_ether.h>
28 28 #include <linux/netdevice.h>
29 29 #include <linux/random.h>
  30 +#include <asm/unaligned.h>
30 31  
31 32 #ifdef __KERNEL__
32 33 extern __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
... ... @@ -139,6 +140,47 @@
139 140  
140 141 BUILD_BUG_ON(ETH_ALEN != 6);
141 142 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
  143 +}
  144 +
  145 +static inline unsigned long zap_last_2bytes(unsigned long value)
  146 +{
  147 +#ifdef __BIG_ENDIAN
  148 + return value >> 16;
  149 +#else
  150 + return value << 16;
  151 +#endif
  152 +}
  153 +
  154 +/**
  155 + * compare_ether_addr_64bits - Compare two Ethernet addresses
  156 + * @addr1: Pointer to an array of 8 bytes
  157 + * @addr2: Pointer to an other array of 8 bytes
  158 + *
  159 + * Compare two ethernet addresses, returns 0 if equal.
  160 + * Same result than "memcmp(addr1, addr2, ETH_ALEN)" but without conditional
  161 + * branches, and possibly long word memory accesses on CPU allowing cheap
  162 + * unaligned memory reads.
  163 + * arrays = { byte1, byte2, byte3, byte4, byte6, byte7, pad1, pad2}
  164 + *
  165 + * Please note that alignment of addr1 & addr2 is only guaranted to be 16 bits.
  166 + */
  167 +
  168 +static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2],
  169 + const u8 addr2[6+2])
  170 +{
  171 +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  172 + unsigned long fold = ((*(unsigned long *)addr1) ^
  173 + (*(unsigned long *)addr2));
  174 +
  175 + if (sizeof(fold) == 8)
  176 + return zap_last_2bytes(fold) != 0;
  177 +
  178 + fold |= zap_last_2bytes((*(unsigned long *)(addr1 + 4)) ^
  179 + (*(unsigned long *)(addr2 + 4)));
  180 + return fold != 0;
  181 +#else
  182 + return compare_ether_addr(addr1, addr2);
  183 +#endif
142 184 }
143 185 #endif /* __KERNEL__ */
144 186  
... ... @@ -165,8 +165,8 @@
165 165 skb_pull(skb, ETH_HLEN);
166 166 eth = eth_hdr(skb);
167 167  
168   - if (is_multicast_ether_addr(eth->h_dest)) {
169   - if (!compare_ether_addr(eth->h_dest, dev->broadcast))
  168 + if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
  169 + if (!compare_ether_addr_64bits(eth->h_dest, dev->broadcast))
170 170 skb->pkt_type = PACKET_BROADCAST;
171 171 else
172 172 skb->pkt_type = PACKET_MULTICAST;
... ... @@ -181,7 +181,7 @@
181 181 */
182 182  
183 183 else if (1 /*dev->flags&IFF_PROMISC */ ) {
184   - if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr)))
  184 + if (unlikely(compare_ether_addr_64bits(eth->h_dest, dev->dev_addr)))
185 185 skb->pkt_type = PACKET_OTHERHOST;
186 186 }
187 187