Commit adc9300e78e6091a7eaa1821213836379d4dbaa8

Authored by Eric Dumazet
Committed by David S. Miller
1 parent d6f144830b

net: use jump_label to shortcut RPS if not setup

Most machines dont use RPS/RFS, and pay a fair amount of instructions in
netif_receive_skb() / netif_rx() / get_rps_cpu() just to discover
RPS/RFS is not setup.

Add a jump_label named rps_needed.

If no device rps_map or global rps_sock_flow_table is setup,
netif_receive_skb() / netif_rx() do a single instruction instead of many
ones, including conditional jumps.

jmp +0    (if CONFIG_JUMP_LABEL=y)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 4 changed files with 26 additions and 16 deletions Side-by-side Diff

include/linux/netdevice.h
... ... @@ -214,6 +214,11 @@
214 214 #include <linux/cache.h>
215 215 #include <linux/skbuff.h>
216 216  
  217 +#ifdef CONFIG_RPS
  218 +#include <linux/jump_label.h>
  219 +extern struct jump_label_key rps_needed;
  220 +#endif
  221 +
217 222 struct neighbour;
218 223 struct neigh_parms;
219 224 struct sk_buff;
... ... @@ -2711,6 +2711,8 @@
2711 2711 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2712 2712 EXPORT_SYMBOL(rps_sock_flow_table);
2713 2713  
  2714 +struct jump_label_key rps_needed __read_mostly;
  2715 +
2714 2716 static struct rps_dev_flow *
2715 2717 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2716 2718 struct rps_dev_flow *rflow, u16 next_cpu)
... ... @@ -2994,7 +2996,7 @@
2994 2996  
2995 2997 trace_netif_rx(skb);
2996 2998 #ifdef CONFIG_RPS
2997   - {
  2999 + if (static_branch(&rps_needed)) {
2998 3000 struct rps_dev_flow voidflow, *rflow = &voidflow;
2999 3001 int cpu;
3000 3002  
3001 3003  
... ... @@ -3009,14 +3011,13 @@
3009 3011  
3010 3012 rcu_read_unlock();
3011 3013 preempt_enable();
3012   - }
3013   -#else
  3014 + } else
  3015 +#endif
3014 3016 {
3015 3017 unsigned int qtail;
3016 3018 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3017 3019 put_cpu();
3018 3020 }
3019   -#endif
3020 3021 return ret;
3021 3022 }
3022 3023 EXPORT_SYMBOL(netif_rx);
... ... @@ -3359,7 +3360,7 @@
3359 3360 return NET_RX_SUCCESS;
3360 3361  
3361 3362 #ifdef CONFIG_RPS
3362   - {
  3363 + if (static_branch(&rps_needed)) {
3363 3364 struct rps_dev_flow voidflow, *rflow = &voidflow;
3364 3365 int cpu, ret;
3365 3366  
3366 3367  
3367 3368  
3368 3369  
... ... @@ -3370,16 +3371,12 @@
3370 3371 if (cpu >= 0) {
3371 3372 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3372 3373 rcu_read_unlock();
3373   - } else {
3374   - rcu_read_unlock();
3375   - ret = __netif_receive_skb(skb);
  3374 + return ret;
3376 3375 }
3377   -
3378   - return ret;
  3376 + rcu_read_unlock();
3379 3377 }
3380   -#else
3381   - return __netif_receive_skb(skb);
3382 3378 #endif
  3379 + return __netif_receive_skb(skb);
3383 3380 }
3384 3381 EXPORT_SYMBOL(netif_receive_skb);
3385 3382  
net/core/net-sysfs.c
... ... @@ -606,9 +606,12 @@
606 606 rcu_assign_pointer(queue->rps_map, map);
607 607 spin_unlock(&rps_map_lock);
608 608  
609   - if (old_map)
  609 + if (map)
  610 + jump_label_inc(&rps_needed);
  611 + if (old_map) {
610 612 kfree_rcu(old_map, rcu);
611   -
  613 + jump_label_dec(&rps_needed);
  614 + }
612 615 free_cpumask_var(mask);
613 616 return len;
614 617 }
net/core/sysctl_net_core.c
... ... @@ -68,8 +68,13 @@
68 68  
69 69 if (sock_table != orig_sock_table) {
70 70 rcu_assign_pointer(rps_sock_flow_table, sock_table);
71   - synchronize_rcu();
72   - vfree(orig_sock_table);
  71 + if (sock_table)
  72 + jump_label_inc(&rps_needed);
  73 + if (orig_sock_table) {
  74 + jump_label_dec(&rps_needed);
  75 + synchronize_rcu();
  76 + vfree(orig_sock_table);
  77 + }
73 78 }
74 79 }
75 80