Commit 5051ebd275de672b807c28d93002c2fb0514a3c9

Authored by Eric Dumazet
Committed by David S. Miller
1 parent 512615b6b8

ipv4: udp: optimize unicast RX path

We first locate the (local port) hash chain head
If few sockets are in this chain, we proceed with previous lookup algo.

If too many sockets are listed, we take a look at the secondary
(port, address) hash chain we added in previous patch.

We choose the shortest chain and proceed with a RCU lookup on the elected chain.

But, if we chose (port, address) chain, and fail to find a socket on given address,
 we must try another lookup on (port, INADDR_ANY) chain to find socket not bound
to a particular IP.

-> No extra cost for typical setups, where the first lookup will probabbly
be performed.

RCU lookups everywhere, we dont acquire spinlock.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 112 additions and 3 deletions Side-by-side Diff

... ... @@ -298,6 +298,91 @@
298 298 return score;
299 299 }
300 300  
  301 +/*
  302 + * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
  303 + */
  304 +#define SCORE2_MAX (1 + 2 + 2 + 2)
  305 +static inline int compute_score2(struct sock *sk, struct net *net,
  306 + __be32 saddr, __be16 sport,
  307 + __be32 daddr, unsigned int hnum, int dif)
  308 +{
  309 + int score = -1;
  310 +
  311 + if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) {
  312 + struct inet_sock *inet = inet_sk(sk);
  313 +
  314 + if (inet->inet_rcv_saddr != daddr)
  315 + return -1;
  316 + if (inet->inet_num != hnum)
  317 + return -1;
  318 +
  319 + score = (sk->sk_family == PF_INET ? 1 : 0);
  320 + if (inet->inet_daddr) {
  321 + if (inet->inet_daddr != saddr)
  322 + return -1;
  323 + score += 2;
  324 + }
  325 + if (inet->inet_dport) {
  326 + if (inet->inet_dport != sport)
  327 + return -1;
  328 + score += 2;
  329 + }
  330 + if (sk->sk_bound_dev_if) {
  331 + if (sk->sk_bound_dev_if != dif)
  332 + return -1;
  333 + score += 2;
  334 + }
  335 + }
  336 + return score;
  337 +}
  338 +
  339 +#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
  340 + hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
  341 +
  342 +/* called with read_rcu_lock() */
  343 +static struct sock *udp4_lib_lookup2(struct net *net,
  344 + __be32 saddr, __be16 sport,
  345 + __be32 daddr, unsigned int hnum, int dif,
  346 + struct udp_hslot *hslot2, unsigned int slot2)
  347 +{
  348 + struct sock *sk, *result;
  349 + struct hlist_nulls_node *node;
  350 + int score, badness;
  351 +
  352 +begin:
  353 + result = NULL;
  354 + badness = -1;
  355 + udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
  356 + score = compute_score2(sk, net, saddr, sport,
  357 + daddr, hnum, dif);
  358 + if (score > badness) {
  359 + result = sk;
  360 + badness = score;
  361 + if (score == SCORE2_MAX)
  362 + goto exact_match;
  363 + }
  364 + }
  365 + /*
  366 + * if the nulls value we got at the end of this lookup is
  367 + * not the expected one, we must restart lookup.
  368 + * We probably met an item that was moved to another chain.
  369 + */
  370 + if (get_nulls_value(node) != slot2)
  371 + goto begin;
  372 +
  373 + if (result) {
  374 +exact_match:
  375 + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
  376 + result = NULL;
  377 + else if (unlikely(compute_score2(result, net, saddr, sport,
  378 + daddr, hnum, dif) < badness)) {
  379 + sock_put(result);
  380 + goto begin;
  381 + }
  382 + }
  383 + return result;
  384 +}
  385 +
301 386 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
302 387 * harder than this. -DaveM
303 388 */
304 389  
... ... @@ -308,11 +393,35 @@
308 393 struct sock *sk, *result;
309 394 struct hlist_nulls_node *node;
310 395 unsigned short hnum = ntohs(dport);
311   - unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
312   - struct udp_hslot *hslot = &udptable->hash[hash];
  396 + unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
  397 + struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
313 398 int score, badness;
314 399  
315 400 rcu_read_lock();
  401 + if (hslot->count > 10) {
  402 + hash2 = udp4_portaddr_hash(net, daddr, hnum);
  403 + slot2 = hash2 & udptable->mask;
  404 + hslot2 = &udptable->hash2[slot2];
  405 + if (hslot->count < hslot2->count)
  406 + goto begin;
  407 +
  408 + result = udp4_lib_lookup2(net, saddr, sport,
  409 + daddr, hnum, dif,
  410 + hslot2, slot2);
  411 + if (!result) {
  412 + hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum);
  413 + slot2 = hash2 & udptable->mask;
  414 + hslot2 = &udptable->hash2[slot2];
  415 + if (hslot->count < hslot2->count)
  416 + goto begin;
  417 +
  418 + result = udp4_lib_lookup2(net, INADDR_ANY, sport,
  419 + daddr, hnum, dif,
  420 + hslot2, slot2);
  421 + }
  422 + rcu_read_unlock();
  423 + return result;
  424 + }
316 425 begin:
317 426 result = NULL;
318 427 badness = -1;
... ... @@ -329,7 +438,7 @@
329 438 * not the expected one, we must restart lookup.
330 439 * We probably met an item that was moved to another chain.
331 440 */
332   - if (get_nulls_value(node) != hash)
  441 + if (get_nulls_value(node) != slot)
333 442 goto begin;
334 443  
335 444 if (result) {