Commit 971f10eca186cab238c49daa91f703c5a001b0b1

Authored by Eric Dumazet
Committed by David S. Miller
1 parent a224772db8

tcp: better TCP_SKB_CB layout to reduce cache line misses

TCP maintains lists of skb in write queue, and in receive queues
(in order and out of order queues)

Scanning these lists both in input and output path usually requires
access to skb->next, TCP_SKB_CB(skb)->seq, and TCP_SKB_CB(skb)->end_seq

These fields are currently in two different cache lines, meaning we
waste lot of memory bandwidth when these queues are big and flows
have either packet drops or packet reorders.

We can move TCP_SKB_CB(skb)->header at the end of TCP_SKB_CB, because
this header is not used in fast path. This allows TCP to search much faster
in the skb lists.

Even with regular flows, we save one cache line miss in fast path.

Thanks to Christoph Paasch for noticing we need to cleanup
skb->cb[] (IPCB/IP6CB) before entering IP stack in tx path,
and that I forgot IPCB use in tcp_v4_hnd_req() and tcp_v4_save_options().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 4 changed files with 30 additions and 13 deletions Side-by-side Diff

... ... @@ -696,12 +696,6 @@
696 696 * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
697 697 */
698 698 struct tcp_skb_cb {
699   - union {
700   - struct inet_skb_parm h4;
701   -#if IS_ENABLED(CONFIG_IPV6)
702   - struct inet6_skb_parm h6;
703   -#endif
704   - } header; /* For incoming frames */
705 699 __u32 seq; /* Starting sequence number */
706 700 __u32 end_seq; /* SEQ + FIN + SYN + datalen */
707 701 __u32 tcp_tw_isn; /* isn chosen by tcp_timewait_state_process() */
... ... @@ -720,6 +714,12 @@
720 714 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
721 715 /* 1 byte hole */
722 716 __u32 ack_seq; /* Sequence number ACK'd */
  717 + union {
  718 + struct inet_skb_parm h4;
  719 +#if IS_ENABLED(CONFIG_IPV6)
  720 + struct inet6_skb_parm h6;
  721 +#endif
  722 + } header; /* For incoming frames */
723 723 };
724 724  
725 725 #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
... ... @@ -886,18 +886,16 @@
886 886 */
887 887 static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
888 888 {
889   - const struct ip_options *opt = &(IPCB(skb)->opt);
  889 + const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
890 890 struct ip_options_rcu *dopt = NULL;
891 891  
892 892 if (opt && opt->optlen) {
893 893 int opt_size = sizeof(*dopt) + opt->optlen;
894 894  
895 895 dopt = kmalloc(opt_size, GFP_ATOMIC);
896   - if (dopt) {
897   - if (ip_options_echo(&dopt->opt, skb)) {
898   - kfree(dopt);
899   - dopt = NULL;
900   - }
  896 + if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
  897 + kfree(dopt);
  898 + dopt = NULL;
901 899 }
902 900 }
903 901 return dopt;
... ... @@ -1431,7 +1429,7 @@
1431 1429  
1432 1430 #ifdef CONFIG_SYN_COOKIES
1433 1431 if (!th->syn)
1434   - sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
  1432 + sk = cookie_v4_check(sk, skb, &TCP_SKB_CB(skb)->header.h4.opt);
1435 1433 #endif
1436 1434 return sk;
1437 1435 }
... ... @@ -1636,6 +1634,13 @@
1636 1634  
1637 1635 th = tcp_hdr(skb);
1638 1636 iph = ip_hdr(skb);
  1637 + /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
  1638 + * barrier() makes sure compiler wont play fool^Waliasing games.
  1639 + */
  1640 + memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
  1641 + sizeof(struct inet_skb_parm));
  1642 + barrier();
  1643 +
1639 1644 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1640 1645 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1641 1646 skb->len - th->doff * 4);
net/ipv4/tcp_output.c
... ... @@ -974,6 +974,11 @@
974 974  
975 975 /* Our usage of tstamp should remain private */
976 976 skb->tstamp.tv64 = 0;
  977 +
  978 + /* Cleanup our debris for IP stacks */
  979 + memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
  980 + sizeof(struct inet6_skb_parm)));
  981 +
977 982 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
978 983  
979 984 if (likely(err <= 0))
... ... @@ -1412,6 +1412,13 @@
1412 1412  
1413 1413 th = tcp_hdr(skb);
1414 1414 hdr = ipv6_hdr(skb);
  1415 + /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
  1416 + * barrier() makes sure compiler wont play fool^Waliasing games.
  1417 + */
  1418 + memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
  1419 + sizeof(struct inet6_skb_parm));
  1420 + barrier();
  1421 +
1415 1422 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1416 1423 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1417 1424 skb->len - th->doff*4);