Commit a6cc0cfa72e0b6d9f2c8fd858aacc32313c4f272

Authored by John Fastabend
Committed by David S. Miller
1 parent 1ec4864b10

net: Add layer 2 hardware acceleration operations for macvlan devices

Add a operations structure that allows a network interface to export
the fact that it supports package forwarding in hardware between
physical interfaces and other mac layer devices assigned to it (such
as macvlans). This operaions structure can be used by virtual mac
devices to bypass software switching so that forwarding can be done
in hardware more efficiently.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 8 changed files with 89 additions and 8 deletions Side-by-side Diff

drivers/net/macvlan.c
... ... @@ -297,7 +297,13 @@
297 297 int ret;
298 298 const struct macvlan_dev *vlan = netdev_priv(dev);
299 299  
300   - ret = macvlan_queue_xmit(skb, dev);
  300 + if (vlan->fwd_priv) {
  301 + skb->dev = vlan->lowerdev;
  302 + ret = dev_hard_start_xmit(skb, skb->dev, NULL, vlan->fwd_priv);
  303 + } else {
  304 + ret = macvlan_queue_xmit(skb, dev);
  305 + }
  306 +
301 307 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
302 308 struct macvlan_pcpu_stats *pcpu_stats;
303 309  
... ... @@ -347,6 +353,21 @@
347 353 goto hash_add;
348 354 }
349 355  
  356 + if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) {
  357 + vlan->fwd_priv =
  358 + lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
  359 +
  360 + /* If we get a NULL pointer back, or if we get an error
  361 + * then we should just fall through to the non accelerated path
  362 + */
  363 + if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
  364 + vlan->fwd_priv = NULL;
  365 + } else {
  366 + dev->features &= ~NETIF_F_LLTX;
  367 + return 0;
  368 + }
  369 + }
  370 +
350 371 err = -EBUSY;
351 372 if (macvlan_addr_busy(vlan->port, dev->dev_addr))
352 373 goto out;
... ... @@ -367,6 +388,11 @@
367 388 del_unicast:
368 389 dev_uc_del(lowerdev, dev->dev_addr);
369 390 out:
  391 + if (vlan->fwd_priv) {
  392 + lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
  393 + vlan->fwd_priv);
  394 + vlan->fwd_priv = NULL;
  395 + }
370 396 return err;
371 397 }
372 398  
... ... @@ -375,6 +401,13 @@
375 401 struct macvlan_dev *vlan = netdev_priv(dev);
376 402 struct net_device *lowerdev = vlan->lowerdev;
377 403  
  404 + if (vlan->fwd_priv) {
  405 + lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
  406 + vlan->fwd_priv);
  407 + vlan->fwd_priv = NULL;
  408 + return 0;
  409 + }
  410 +
378 411 dev_uc_unsync(lowerdev, dev);
379 412 dev_mc_unsync(lowerdev, dev);
380 413  
... ... @@ -833,6 +866,7 @@
833 866 if (err < 0)
834 867 goto destroy_port;
835 868  
  869 + dev->priv_flags |= IFF_MACVLAN;
836 870 err = netdev_upper_dev_link(lowerdev, dev);
837 871 if (err)
838 872 goto destroy_port;
include/linux/if_macvlan.h
... ... @@ -61,6 +61,7 @@
61 61 struct hlist_node hlist;
62 62 struct macvlan_port *port;
63 63 struct net_device *lowerdev;
  64 + void *fwd_priv;
64 65 struct macvlan_pcpu_stats __percpu *pcpu_stats;
65 66  
66 67 DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
include/linux/netdev_features.h
... ... @@ -62,6 +62,7 @@
62 62 NETIF_F_HW_VLAN_STAG_TX_BIT, /* Transmit VLAN STAG HW acceleration */
63 63 NETIF_F_HW_VLAN_STAG_RX_BIT, /* Receive VLAN STAG HW acceleration */
64 64 NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */
  65 + NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */
65 66  
66 67 /*
67 68 * Add your fresh new feature above and remember to update
... ... @@ -116,6 +117,7 @@
116 117 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
117 118 #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
118 119 #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
  120 +#define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD)
119 121  
120 122 /* Features valid for ethtool to change */
121 123 /* = all defined minus driver/device-class-related */
include/linux/netdevice.h
... ... @@ -962,6 +962,25 @@
962 962 * Called by vxlan to notify the driver about a UDP port and socket
963 963 * address family that vxlan is not listening to anymore. The operation
964 964 * is protected by the vxlan_net->sock_lock.
  965 + *
  966 + * void* (*ndo_dfwd_add_station)(struct net_device *pdev,
  967 + * struct net_device *dev)
  968 + * Called by upper layer devices to accelerate switching or other
  969 + * station functionality into hardware. 'pdev is the lowerdev
  970 + * to use for the offload and 'dev' is the net device that will
  971 + * back the offload. Returns a pointer to the private structure
  972 + * the upper layer will maintain.
  973 + * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv)
  974 + * Called by upper layer device to delete the station created
  975 + * by 'ndo_dfwd_add_station'. 'pdev' is the net device backing
  976 + * the station and priv is the structure returned by the add
  977 + * operation.
  978 + * netdev_tx_t (*ndo_dfwd_start_xmit)(struct sk_buff *skb,
  979 + * struct net_device *dev,
  980 + * void *priv);
  981 + * Callback to use for xmit over the accelerated station. This
  982 + * is used in place of ndo_start_xmit on accelerated net
  983 + * devices.
965 984 */
966 985 struct net_device_ops {
967 986 int (*ndo_init)(struct net_device *dev);
... ... @@ -1098,6 +1117,15 @@
1098 1117 void (*ndo_del_vxlan_port)(struct net_device *dev,
1099 1118 sa_family_t sa_family,
1100 1119 __be16 port);
  1120 +
  1121 + void* (*ndo_dfwd_add_station)(struct net_device *pdev,
  1122 + struct net_device *dev);
  1123 + void (*ndo_dfwd_del_station)(struct net_device *pdev,
  1124 + void *priv);
  1125 +
  1126 + netdev_tx_t (*ndo_dfwd_start_xmit) (struct sk_buff *skb,
  1127 + struct net_device *dev,
  1128 + void *priv);
1101 1129 };
1102 1130  
1103 1131 /*
... ... @@ -1195,6 +1223,7 @@
1195 1223 /* Management operations */
1196 1224 const struct net_device_ops *netdev_ops;
1197 1225 const struct ethtool_ops *ethtool_ops;
  1226 + const struct forwarding_accel_ops *fwd_ops;
1198 1227  
1199 1228 /* Hardware header description */
1200 1229 const struct header_ops *header_ops;
... ... @@ -2388,7 +2417,7 @@
2388 2417 int dev_get_phys_port_id(struct net_device *dev,
2389 2418 struct netdev_phys_port_id *ppid);
2390 2419 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2391   - struct netdev_queue *txq);
  2420 + struct netdev_queue *txq, void *accel_priv);
2392 2421 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
2393 2422  
2394 2423 extern int netdev_budget;
... ... @@ -2965,6 +2994,11 @@
2965 2994 unsigned int size)
2966 2995 {
2967 2996 dev->gso_max_size = size;
  2997 +}
  2998 +
  2999 +static inline bool netif_is_macvlan(struct net_device *dev)
  3000 +{
  3001 + return dev->priv_flags & IFF_MACVLAN;
2968 3002 }
2969 3003  
2970 3004 static inline bool netif_is_bond_master(struct net_device *dev)
include/uapi/linux/if.h
... ... @@ -83,6 +83,7 @@
83 83 #define IFF_SUPP_NOFCS 0x80000 /* device supports sending custom FCS */
84 84 #define IFF_LIVE_ADDR_CHANGE 0x100000 /* device supports hardware address
85 85 * change when it's running */
  86 +#define IFF_MACVLAN 0x200000 /* Macvlan device */
86 87  
87 88  
88 89 #define IF_GET_IFACE 0x0001 /* for querying only */
... ... @@ -2538,7 +2538,7 @@
2538 2538 }
2539 2539  
2540 2540 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2541   - struct netdev_queue *txq)
  2541 + struct netdev_queue *txq, void *accel_priv)
2542 2542 {
2543 2543 const struct net_device_ops *ops = dev->netdev_ops;
2544 2544 int rc = NETDEV_TX_OK;
2545 2545  
... ... @@ -2604,9 +2604,13 @@
2604 2604 dev_queue_xmit_nit(skb, dev);
2605 2605  
2606 2606 skb_len = skb->len;
2607   - rc = ops->ndo_start_xmit(skb, dev);
  2607 + if (accel_priv)
  2608 + rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);
  2609 + else
  2610 + rc = ops->ndo_start_xmit(skb, dev);
  2611 +
2608 2612 trace_net_dev_xmit(skb, rc, dev, skb_len);
2609   - if (rc == NETDEV_TX_OK)
  2613 + if (rc == NETDEV_TX_OK && txq)
2610 2614 txq_trans_update(txq);
2611 2615 return rc;
2612 2616 }
... ... @@ -2622,7 +2626,10 @@
2622 2626 dev_queue_xmit_nit(nskb, dev);
2623 2627  
2624 2628 skb_len = nskb->len;
2625   - rc = ops->ndo_start_xmit(nskb, dev);
  2629 + if (accel_priv)
  2630 + rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);
  2631 + else
  2632 + rc = ops->ndo_start_xmit(nskb, dev);
2626 2633 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2627 2634 if (unlikely(rc != NETDEV_TX_OK)) {
2628 2635 if (rc & ~NETDEV_TX_MASK)
... ... @@ -2647,6 +2654,7 @@
2647 2654 out:
2648 2655 return rc;
2649 2656 }
  2657 +EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
2650 2658  
2651 2659 static void qdisc_pkt_len_init(struct sk_buff *skb)
2652 2660 {
... ... @@ -2854,7 +2862,7 @@
2854 2862  
2855 2863 if (!netif_xmit_stopped(txq)) {
2856 2864 __this_cpu_inc(xmit_recursion);
2857   - rc = dev_hard_start_xmit(skb, dev, txq);
  2865 + rc = dev_hard_start_xmit(skb, dev, txq, NULL);
2858 2866 __this_cpu_dec(xmit_recursion);
2859 2867 if (dev_xmit_complete(rc)) {
2860 2868 HARD_TX_UNLOCK(dev, txq);
... ... @@ -96,6 +96,7 @@
96 96 [NETIF_F_LOOPBACK_BIT] = "loopback",
97 97 [NETIF_F_RXFCS_BIT] = "rx-fcs",
98 98 [NETIF_F_RXALL_BIT] = "rx-all",
  99 + [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
99 100 };
100 101  
101 102 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
net/sched/sch_generic.c
... ... @@ -126,7 +126,7 @@
126 126  
127 127 HARD_TX_LOCK(dev, txq, smp_processor_id());
128 128 if (!netif_xmit_frozen_or_stopped(txq))
129   - ret = dev_hard_start_xmit(skb, dev, txq);
  129 + ret = dev_hard_start_xmit(skb, dev, txq, NULL);
130 130  
131 131 HARD_TX_UNLOCK(dev, txq);
132 132