Commit 4b9b1cdf83c4facba89e0646aeac8ead679851b8

Authored by Nikolay Aleksandrov
Committed by David S. Miller
1 parent 6ce995c6f4

net: fix wrong mac_len calculation for vlans

After 1e785f48d29a ("net: Start with correct mac_len in
skb_network_protocol") skb->mac_len is used as a start of the
calculation in skb_network_protocol() but that is not always correct. If
skb->protocol == 8021Q/AD, usually the vlan header is already inserted
in the skb (i.e. vlan reorder hdr == 0). Usually when the packet enters
dev_hard_xmit it has mac_len == 0 so we take 2 bytes from the
destination mac address (skb->data + VLAN_HLEN) as a type in
skb_network_protocol() and return vlan_depth == 4. In the case where TSO is
off, then the mac_len is set but it's == 18 (ETH_HLEN + VLAN_HLEN), so
skb_network_protocol() returns a type from inside the packet and
offset == 22. Also make vlan_depth unsigned as suggested before.
As suggested by Eric Dumazet, move the while() loop in the if() so we
can avoid additional testing in fast path.

Here are few netperf tests + debug printk's to illustrate:
cat netperf.tso-on.reorder-on.bugged
- Vlan -> device (reorder on, default, this case is okay)
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.3.1 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    7111.54
[   81.605435] skb->len 65226 skb->gso_size 1448 skb->proto 0x800
skb->mac_len 0 vlan_depth 0 type 0x800

- Vlan -> device (reorder off, bad)
cat netperf.tso-on.reorder-off.bugged
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.3.1 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00     241.35
[  204.578332] skb->len 1518 skb->gso_size 0 skb->proto 0x8100
skb->mac_len 0 vlan_depth 4 type 0x5301
0x5301 are the last two bytes of the destination mac.

And if we stop TSO, we may get even the following:
[   83.343156] skb->len 2966 skb->gso_size 1448 skb->proto 0x8100
skb->mac_len 18 vlan_depth 22 type 0xb84
Because mac_len already accounts for VLAN_HLEN.

After the fix:
cat netperf.tso-on.reorder-off.fixed
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
192.168.3.1 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.01    5001.46
[   81.888489] skb->len 65230 skb->gso_size 1448 skb->proto 0x8100
skb->mac_len 0 vlan_depth 18 type 0x800

CC: Vlad Yasevich <vyasevic@redhat.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Daniel Borkman <dborkman@redhat.com>
CC: David S. Miller <davem@davemloft.net>

Fixes:1e785f48d29a ("net: Start with correct mac_len in
skb_network_protocol")
Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 25 additions and 10 deletions Side-by-side Diff

... ... @@ -2283,8 +2283,8 @@
2283 2283  
2284 2284 __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2285 2285 {
  2286 + unsigned int vlan_depth = skb->mac_len;
2286 2287 __be16 type = skb->protocol;
2287   - int vlan_depth = skb->mac_len;
2288 2288  
2289 2289 /* Tunnel gso handlers can set protocol to ethernet. */
2290 2290 if (type == htons(ETH_P_TEB)) {
2291 2291  
2292 2292  
... ... @@ -2297,15 +2297,30 @@
2297 2297 type = eth->h_proto;
2298 2298 }
2299 2299  
2300   - while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
2301   - struct vlan_hdr *vh;
  2300 + /* if skb->protocol is 802.1Q/AD then the header should already be
  2301 + * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
  2302 + * ETH_HLEN otherwise
  2303 + */
  2304 + if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
  2305 + if (vlan_depth) {
  2306 + if (unlikely(WARN_ON(vlan_depth < VLAN_HLEN)))
  2307 + return 0;
  2308 + vlan_depth -= VLAN_HLEN;
  2309 + } else {
  2310 + vlan_depth = ETH_HLEN;
  2311 + }
  2312 + do {
  2313 + struct vlan_hdr *vh;
2302 2314  
2303   - if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
2304   - return 0;
  2315 + if (unlikely(!pskb_may_pull(skb,
  2316 + vlan_depth + VLAN_HLEN)))
  2317 + return 0;
2305 2318  
2306   - vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2307   - type = vh->h_vlan_encapsulated_proto;
2308   - vlan_depth += VLAN_HLEN;
  2319 + vh = (struct vlan_hdr *)(skb->data + vlan_depth);
  2320 + type = vh->h_vlan_encapsulated_proto;
  2321 + vlan_depth += VLAN_HLEN;
  2322 + } while (type == htons(ETH_P_8021Q) ||
  2323 + type == htons(ETH_P_8021AD));
2309 2324 }
2310 2325  
2311 2326 *depth = vlan_depth;