Commit a46dc748caea185d4d0978280a1af0112bf6a8f8

Authored by David S. Miller

Merge branch 'macvtap_capture'

Vlad Yasevich says:

====================
Add packet capture support on macvtap device

Change from RFC:
  - moved to the rx_handler approach.

This series adds support for packet capturing on macvtap device.
The initial approach was to simply export the capturing code as
a function from the core network.  While simple, it was not
a very architecturally clean approach.

The new appraoch is to provide macvtap with its rx_handler which can
is attached to the macvtap device itself.   Macvlan will simply requeue
the packet with an updated skb->dev.  BTW, macvlan layer already does this
for macvlan devices.  So, now macvtap and macvlan have almost the
same exact input path.

I've toyed with short-circuting the input path for macvtap by returning
RX_HANDLER_ANOTHER, but that just made the code more complicated and
didn't provide any kind of measurable gain (at least according to
netperf and perf runs on the host).

To see if there was a performance regression, I ran 1, 2 and 4 netperf
STREAM and MAERTS tests agains the VM from both remote host and another
guest on the same system.   The command ran was
    netperf -H $host -t $test -l 20 -i 10 -I 95 -c -C

The numbers I was getting with the new code were consistently very
slightly (1-2%) better then the old code.  I don't consider this
an improvement, but it's not a regression! :)

Running 'perf record' on the host didn't show any new hot spots
and cpu utilization stayed about the same.  This was better
then I expected from simply looking at the code.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 3 changed files Side-by-side Diff

drivers/net/macvlan.c
... ... @@ -120,7 +120,7 @@
120 120 struct net_device *dev = vlan->dev;
121 121  
122 122 if (local)
123   - return vlan->forward(dev, skb);
  123 + return dev_forward_skb(dev, skb);
124 124  
125 125 skb->dev = dev;
126 126 if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
... ... @@ -128,7 +128,7 @@
128 128 else
129 129 skb->pkt_type = PACKET_MULTICAST;
130 130  
131   - return vlan->receive(skb);
  131 + return netif_rx(skb);
132 132 }
133 133  
134 134 static u32 macvlan_hash_mix(const struct macvlan_dev *vlan)
... ... @@ -251,7 +251,7 @@
251 251 skb->dev = dev;
252 252 skb->pkt_type = PACKET_HOST;
253 253  
254   - ret = vlan->receive(skb);
  254 + ret = netif_rx(skb);
255 255  
256 256 out:
257 257 macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, 0);
... ... @@ -803,10 +803,7 @@
803 803 }
804 804  
805 805 int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
806   - struct nlattr *tb[], struct nlattr *data[],
807   - int (*receive)(struct sk_buff *skb),
808   - int (*forward)(struct net_device *dev,
809   - struct sk_buff *skb))
  806 + struct nlattr *tb[], struct nlattr *data[])
810 807 {
811 808 struct macvlan_dev *vlan = netdev_priv(dev);
812 809 struct macvlan_port *port;
... ... @@ -848,8 +845,6 @@
848 845 vlan->lowerdev = lowerdev;
849 846 vlan->dev = dev;
850 847 vlan->port = port;
851   - vlan->receive = receive;
852   - vlan->forward = forward;
853 848 vlan->set_features = MACVLAN_FEATURES;
854 849  
855 850 vlan->mode = MACVLAN_MODE_VEPA;
... ... @@ -894,9 +889,7 @@
894 889 static int macvlan_newlink(struct net *src_net, struct net_device *dev,
895 890 struct nlattr *tb[], struct nlattr *data[])
896 891 {
897   - return macvlan_common_newlink(src_net, dev, tb, data,
898   - netif_rx,
899   - dev_forward_skb);
  892 + return macvlan_common_newlink(src_net, dev, tb, data);
900 893 }
901 894  
902 895 void macvlan_dellink(struct net_device *dev, struct list_head *head)
drivers/net/macvtap.c
... ... @@ -70,6 +70,11 @@
70 70 #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
71 71 #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG)
72 72  
  73 +static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev)
  74 +{
  75 + return rcu_dereference(dev->rx_handler_data);
  76 +}
  77 +
73 78 /*
74 79 * RCU usage:
75 80 * The macvtap_queue and the macvlan_dev are loosely coupled, the
76 81  
77 82  
78 83  
79 84  
... ... @@ -271,24 +276,27 @@
271 276 sock_put(&qlist[j]->sk);
272 277 }
273 278  
274   -/*
275   - * Forward happens for data that gets sent from one macvlan
276   - * endpoint to another one in bridge mode. We just take
277   - * the skb and put it into the receive queue.
278   - */
279   -static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
  279 +static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
280 280 {
281   - struct macvlan_dev *vlan = netdev_priv(dev);
282   - struct macvtap_queue *q = macvtap_get_queue(dev, skb);
  281 + struct sk_buff *skb = *pskb;
  282 + struct net_device *dev = skb->dev;
  283 + struct macvlan_dev *vlan;
  284 + struct macvtap_queue *q;
283 285 netdev_features_t features = TAP_FEATURES;
284 286  
  287 + vlan = macvtap_get_vlan_rcu(dev);
  288 + if (!vlan)
  289 + return RX_HANDLER_PASS;
  290 +
  291 + q = macvtap_get_queue(dev, skb);
285 292 if (!q)
286   - goto drop;
  293 + return RX_HANDLER_PASS;
287 294  
288 295 if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len)
289 296 goto drop;
290 297  
291   - skb->dev = dev;
  298 + skb_push(skb, ETH_HLEN);
  299 +
292 300 /* Apply the forward feature mask so that we perform segmentation
293 301 * according to users wishes. This only works if VNET_HDR is
294 302 * enabled.
295 303  
296 304  
297 305  
... ... @@ -320,24 +328,15 @@
320 328  
321 329 wake_up:
322 330 wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
323   - return NET_RX_SUCCESS;
  331 + return RX_HANDLER_CONSUMED;
324 332  
325 333 drop:
  334 + /* Count errors/drops only here, thus don't care about args. */
  335 + macvlan_count_rx(vlan, 0, 0, 0);
326 336 kfree_skb(skb);
327   - return NET_RX_DROP;
  337 + return RX_HANDLER_CONSUMED;
328 338 }
329 339  
330   -/*
331   - * Receive is for data from the external interface (lowerdev),
332   - * in case of macvtap, we can treat that the same way as
333   - * forward, which macvlan cannot.
334   - */
335   -static int macvtap_receive(struct sk_buff *skb)
336   -{
337   - skb_push(skb, ETH_HLEN);
338   - return macvtap_forward(skb->dev, skb);
339   -}
340   -
341 340 static int macvtap_get_minor(struct macvlan_dev *vlan)
342 341 {
343 342 int retval = -ENOMEM;
... ... @@ -385,6 +384,8 @@
385 384 struct nlattr *data[])
386 385 {
387 386 struct macvlan_dev *vlan = netdev_priv(dev);
  387 + int err;
  388 +
388 389 INIT_LIST_HEAD(&vlan->queue_list);
389 390  
390 391 /* Since macvlan supports all offloads by default, make
391 392  
392 393  
... ... @@ -392,16 +393,20 @@
392 393 */
393 394 vlan->tap_features = TUN_OFFLOADS;
394 395  
  396 + err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan);
  397 + if (err)
  398 + return err;
  399 +
395 400 /* Don't put anything that may fail after macvlan_common_newlink
396 401 * because we can't undo what it does.
397 402 */
398   - return macvlan_common_newlink(src_net, dev, tb, data,
399   - macvtap_receive, macvtap_forward);
  403 + return macvlan_common_newlink(src_net, dev, tb, data);
400 404 }
401 405  
402 406 static void macvtap_dellink(struct net_device *dev,
403 407 struct list_head *head)
404 408 {
  409 + netdev_rx_handler_unregister(dev);
405 410 macvtap_del_queues(dev);
406 411 macvlan_dellink(dev, head);
407 412 }
... ... @@ -725,9 +730,8 @@
725 730 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
726 731 }
727 732 if (vlan) {
728   - local_bh_disable();
729   - macvlan_start_xmit(skb, vlan->dev);
730   - local_bh_enable();
  733 + skb->dev = vlan->dev;
  734 + dev_queue_xmit(skb);
731 735 } else {
732 736 kfree_skb(skb);
733 737 }
include/linux/if_macvlan.h
... ... @@ -69,8 +69,6 @@
69 69 netdev_features_t set_features;
70 70 enum macvlan_mode mode;
71 71 u16 flags;
72   - int (*receive)(struct sk_buff *skb);
73   - int (*forward)(struct net_device *dev, struct sk_buff *skb);
74 72 /* This array tracks active taps. */
75 73 struct macvtap_queue __rcu *taps[MAX_MACVTAP_QUEUES];
76 74 /* This list tracks all taps (both enabled and disabled) */
... ... @@ -103,10 +101,7 @@
103 101 extern void macvlan_common_setup(struct net_device *dev);
104 102  
105 103 extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
106   - struct nlattr *tb[], struct nlattr *data[],
107   - int (*receive)(struct sk_buff *skb),
108   - int (*forward)(struct net_device *dev,
109   - struct sk_buff *skb));
  104 + struct nlattr *tb[], struct nlattr *data[]);
110 105  
111 106 extern void macvlan_count_rx(const struct macvlan_dev *vlan,
112 107 unsigned int len, bool success,