Commit 971f10eca186cab238c49daa91f703c5a001b0b1
tcp: better TCP_SKB_CB layout to reduce cache line misses
TCP maintains lists of skb in write queue, and in receive queues (in order and out of order queues) Scanning these lists both in input and output path usually requires access to skb->next, TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq These fields are currently in two different cache lines, meaning we waste lot of memory bandwidth when these queues are big and flows have either packet drops or packet reorders. We can move TCP_SKB_CB(skb)->header at the end of TCP_SKB_CB, because this header is not used in fast path. This allows TCP to search much faster in the skb lists. Even with regular flows, we save one cache line miss in fast path. Thanks to Christoph Paasch for noticing we need to cleanup skb->cb[] (IPCB/IP6CB) before entering IP stack in tx path, and that I forgot IPCB use in tcp_v4_hnd_req() and tcp_v4_save_options(). Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 4 changed files with 30 additions and 13 deletions Side-by-side Diff
... | ... | @@ -696,12 +696,6 @@ |
696 | 696 | * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately. |
697 | 697 | */ |
698 | 698 | struct tcp_skb_cb { |
699 | - union { | |
700 | - struct inet_skb_parm h4; | |
701 | -#if IS_ENABLED(CONFIG_IPV6) | |
702 | - struct inet6_skb_parm h6; | |
703 | -#endif | |
704 | - } header; /* For incoming frames */ | |
705 | 699 | __u32 seq; /* Starting sequence number */ |
706 | 700 | __u32 end_seq; /* SEQ + FIN + SYN + datalen */ |
707 | 701 | __u32 tcp_tw_isn; /* isn chosen by tcp_timewait_state_process() */ |
... | ... | @@ -720,6 +714,12 @@ |
720 | 714 | __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ |
721 | 715 | /* 1 byte hole */ |
722 | 716 | __u32 ack_seq; /* Sequence number ACK'd */ |
717 | + union { | |
718 | + struct inet_skb_parm h4; | |
719 | +#if IS_ENABLED(CONFIG_IPV6) | |
720 | + struct inet6_skb_parm h6; | |
721 | +#endif | |
722 | + } header; /* For incoming frames */ | |
723 | 723 | }; |
724 | 724 | |
725 | 725 | #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) |
... | ... | @@ -886,18 +886,16 @@ |
886 | 886 | */ |
887 | 887 | static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) |
888 | 888 | { |
889 | - const struct ip_options *opt = &(IPCB(skb)->opt); | |
889 | + const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; | |
890 | 890 | struct ip_options_rcu *dopt = NULL; |
891 | 891 | |
892 | 892 | if (opt && opt->optlen) { |
893 | 893 | int opt_size = sizeof(*dopt) + opt->optlen; |
894 | 894 | |
895 | 895 | dopt = kmalloc(opt_size, GFP_ATOMIC); |
896 | - if (dopt) { | |
897 | - if (ip_options_echo(&dopt->opt, skb)) { | |
898 | - kfree(dopt); | |
899 | - dopt = NULL; | |
900 | - } | |
896 | + if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) { | |
897 | + kfree(dopt); | |
898 | + dopt = NULL; | |
901 | 899 | } |
902 | 900 | } |
903 | 901 | return dopt; |
... | ... | @@ -1431,7 +1429,7 @@ |
1431 | 1429 | |
1432 | 1430 | #ifdef CONFIG_SYN_COOKIES |
1433 | 1431 | if (!th->syn) |
1434 | - sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); | |
1432 | + sk = cookie_v4_check(sk, skb, &TCP_SKB_CB(skb)->header.h4.opt); | |
1435 | 1433 | #endif |
1436 | 1434 | return sk; |
1437 | 1435 | } |
... | ... | @@ -1636,6 +1634,13 @@ |
1636 | 1634 | |
1637 | 1635 | th = tcp_hdr(skb); |
1638 | 1636 | iph = ip_hdr(skb); |
1637 | + /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() | |
1638 | + * barrier() makes sure compiler wont play fool^Waliasing games. | |
1639 | + */ | |
1640 | + memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), | |
1641 | + sizeof(struct inet_skb_parm)); | |
1642 | + barrier(); | |
1643 | + | |
1639 | 1644 | TCP_SKB_CB(skb)->seq = ntohl(th->seq); |
1640 | 1645 | TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + |
1641 | 1646 | skb->len - th->doff * 4); |
... | ... | @@ -974,6 +974,11 @@ |
974 | 974 | |
975 | 975 | /* Our usage of tstamp should remain private */ |
976 | 976 | skb->tstamp.tv64 = 0; |
977 | + | |
978 | + /* Cleanup our debris for IP stacks */ | |
979 | + memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), | |
980 | + sizeof(struct inet6_skb_parm))); | |
981 | + | |
977 | 982 | err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); |
978 | 983 | |
979 | 984 | if (likely(err <= 0)) |
... | ... | @@ -1412,6 +1412,13 @@ |
1412 | 1412 | |
1413 | 1413 | th = tcp_hdr(skb); |
1414 | 1414 | hdr = ipv6_hdr(skb); |
1415 | + /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() | |
1416 | + * barrier() makes sure compiler wont play fool^Waliasing games. | |
1417 | + */ | |
1418 | + memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), | |
1419 | + sizeof(struct inet6_skb_parm)); | |
1420 | + barrier(); | |
1421 | + | |
1415 | 1422 | TCP_SKB_CB(skb)->seq = ntohl(th->seq); |
1416 | 1423 | TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + |
1417 | 1424 | skb->len - th->doff*4); |
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 2dc49d
-
mentioned in commit 2077ee
-
mentioned in commit 2077ee
-
mentioned in commit 870c31
-
mentioned in commit 870c31
-
mentioned in commit 04f81f
-
mentioned in commit 2dc49d
-
mentioned in commit 04f81f
-
mentioned in commit 8ce486
-
mentioned in commit a04a48
-
mentioned in commit 1e340b
-
mentioned in commit 8ce486
-
mentioned in commit a04a48
-
mentioned in commit 1e340b