Commit b73c3d0e4f0e1961e15bec18720e48aabebe2109

Authored by Tom Herbert
Committed by David S. Miller
1 parent 5ed20a68cd

net: Save TX flow hash in sock and set in skbuf on xmit

For a connected socket we can precompute the flow hash for setting
in skb->hash on output. This is a performance advantage over
calculating the skb->hash for every packet on the connection. The
computation is done using the common hash algorithm to be consistent
with computations done for packets of the connection in other states
where thers is no socket (e.g. time-wait, syn-recv, syn-cookies).

This patch adds sk_txhash to the sock structure. inet_set_txhash and
ip6_set_txhash functions are added which are called from points in
TCP and UDP where socket moves to established state.

skb_set_hash_from_sk is a function which sets skb->hash from the
sock txhash value. This is called in UDP and TCP transmit path when
transmitting within the context of a socket.

Tested: ran super_netperf with 200 TCP_RR streams over a vxlan
interface (in this case skb_get_hash called on every TX packet to
create a UDP source port).

Before fix:

  95.02% CPU utilization
  154/256/505 90/95/99% latencies
  1.13042e+06 tps

  Time in functions:
    0.28% skb_flow_dissect
    0.21% __skb_get_hash

After fix:

  94.95% CPU utilization
  156/254/485 90/95/99% latencies
  1.15447e+06

  Neither __skb_get_hash nor skb_flow_dissect appear in perf

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 8 changed files with 50 additions and 0 deletions Side-by-side Diff

... ... @@ -31,6 +31,7 @@
31 31 #include <net/route.h>
32 32 #include <net/snmp.h>
33 33 #include <net/flow.h>
  34 +#include <net/flow_keys.h>
34 35  
35 36 struct sock;
36 37  
... ... @@ -351,6 +352,19 @@
351 352 {
352 353 return csum_tcpudp_nofold(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
353 354 skb->len, proto, 0);
  355 +}
  356 +
  357 +static inline void inet_set_txhash(struct sock *sk)
  358 +{
  359 + struct inet_sock *inet = inet_sk(sk);
  360 + struct flow_keys keys;
  361 +
  362 + keys.src = inet->inet_saddr;
  363 + keys.dst = inet->inet_daddr;
  364 + keys.port16[0] = inet->inet_sport;
  365 + keys.port16[1] = inet->inet_dport;
  366 +
  367 + sk->sk_txhash = flow_hash_from_keys(&keys);
354 368 }
355 369  
356 370 /*
... ... @@ -19,6 +19,7 @@
19 19 #include <net/if_inet6.h>
20 20 #include <net/ndisc.h>
21 21 #include <net/flow.h>
  22 +#include <net/flow_keys.h>
22 23 #include <net/snmp.h>
23 24  
24 25 #define SIN6_LEN_RFC2133 24
... ... @@ -682,6 +683,20 @@
682 683 if (hlimit < 0)
683 684 hlimit = ip6_dst_hoplimit(dst);
684 685 return hlimit;
  686 +}
  687 +
  688 +static inline void ip6_set_txhash(struct sock *sk)
  689 +{
  690 + struct inet_sock *inet = inet_sk(sk);
  691 + struct ipv6_pinfo *np = inet6_sk(sk);
  692 + struct flow_keys keys;
  693 +
  694 + keys.src = (__force __be32)ipv6_addr_hash(&np->saddr);
  695 + keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
  696 + keys.port16[0] = inet->inet_sport;
  697 + keys.port16[1] = inet->inet_dport;
  698 +
  699 + sk->sk_txhash = flow_hash_from_keys(&keys);
685 700 }
686 701  
687 702 /*
... ... @@ -273,6 +273,7 @@
273 273 * @sk_rcvtimeo: %SO_RCVTIMEO setting
274 274 * @sk_sndtimeo: %SO_SNDTIMEO setting
275 275 * @sk_rxhash: flow hash received from netif layer
  276 + * @sk_txhash: computed flow hash for use on transmit
276 277 * @sk_filter: socket filtering instructions
277 278 * @sk_protinfo: private area, net family specific, when not using slab
278 279 * @sk_timer: sock cleanup timer
... ... @@ -347,6 +348,7 @@
347 348 #ifdef CONFIG_RPS
348 349 __u32 sk_rxhash;
349 350 #endif
  351 + __u32 sk_txhash;
350 352 #ifdef CONFIG_NET_RX_BUSY_POLL
351 353 unsigned int sk_napi_id;
352 354 unsigned int sk_ll_usec;
... ... @@ -1980,6 +1982,14 @@
1980 1982 }
1981 1983 }
1982 1984  
  1985 +static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
  1986 +{
  1987 + if (sk->sk_txhash) {
  1988 + skb->l4_hash = 1;
  1989 + skb->hash = sk->sk_txhash;
  1990 + }
  1991 +}
  1992 +
1983 1993 /*
1984 1994 * Queue a received datagram if it will fit. Stream and sequenced
1985 1995 * protocols can't normally use this as they need to fit buffers in
... ... @@ -1994,6 +2004,7 @@
1994 2004 skb_orphan(skb);
1995 2005 skb->sk = sk;
1996 2006 skb->destructor = sock_wfree;
  2007 + skb_set_hash_from_sk(skb, sk);
1997 2008 /*
1998 2009 * We used to take a refcount on sk, but following operation
1999 2010 * is enough to guarantee sk_free() wont free this sock until
... ... @@ -76,6 +76,7 @@
76 76 inet->inet_daddr = fl4->daddr;
77 77 inet->inet_dport = usin->sin_port;
78 78 sk->sk_state = TCP_ESTABLISHED;
  79 + inet_set_txhash(sk);
79 80 inet->inet_id = jiffies;
80 81  
81 82 sk_dst_set(sk, &rt->dst);
... ... @@ -208,6 +208,8 @@
208 208 inet->inet_dport = usin->sin_port;
209 209 inet->inet_daddr = daddr;
210 210  
  211 + inet_set_txhash(sk);
  212 +
211 213 inet_csk(sk)->icsk_ext_hdr_len = 0;
212 214 if (inet_opt)
213 215 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
... ... @@ -1334,6 +1336,7 @@
1334 1336 newinet->mc_ttl = ip_hdr(skb)->ttl;
1335 1337 newinet->rcv_tos = ip_hdr(skb)->tos;
1336 1338 inet_csk(newsk)->icsk_ext_hdr_len = 0;
  1339 + inet_set_txhash(newsk);
1337 1340 if (inet_opt)
1338 1341 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1339 1342 newinet->inet_id = newtp->write_seq ^ jiffies;
net/ipv4/tcp_output.c
... ... @@ -916,6 +916,7 @@
916 916 skb_orphan(skb);
917 917 skb->sk = sk;
918 918 skb->destructor = tcp_wfree;
  919 + skb_set_hash_from_sk(skb, sk);
919 920 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
920 921  
921 922 /* Build TCP header and checksum it. */
... ... @@ -199,6 +199,7 @@
199 199 NULL);
200 200  
201 201 sk->sk_state = TCP_ESTABLISHED;
  202 + ip6_set_txhash(sk);
202 203 out:
203 204 fl6_sock_release(flowlabel);
204 205 return err;
... ... @@ -198,6 +198,8 @@
198 198 sk->sk_v6_daddr = usin->sin6_addr;
199 199 np->flow_label = fl6.flowlabel;
200 200  
  201 + ip6_set_txhash(sk);
  202 +
201 203 /*
202 204 * TCP over IPv4
203 205 */
... ... @@ -1131,6 +1133,8 @@
1131 1133 newnp->saddr = ireq->ir_v6_loc_addr;
1132 1134 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1133 1135 newsk->sk_bound_dev_if = ireq->ir_iif;
  1136 +
  1137 + ip6_set_txhash(newsk);
1134 1138  
1135 1139 /* Now IPv6 options...
1136 1140