Commit 501c774cb13c3ef8fb7fc5f08fa19473f7d9a0db

Authored by Arnd Bergmann
Committed by David S. Miller
1 parent 02df55d28c

net/macvtap: add vhost support

This adds support for passing a macvtap file descriptor into
vhost-net, much like we already do for tun/tap.

Most of the new code is taken from the respective patch
in the tun driver and may get consolidated in the future.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 4 changed files with 97 additions and 24 deletions Side-by-side Diff

drivers/net/macvtap.c
... ... @@ -58,6 +58,8 @@
58 58 static struct class *macvtap_class;
59 59 static struct cdev macvtap_cdev;
60 60  
  61 +static const struct proto_ops macvtap_socket_ops;
  62 +
61 63 /*
62 64 * RCU usage:
63 65 * The macvtap_queue and the macvlan_dev are loosely coupled, the
... ... @@ -176,7 +178,7 @@
176 178 return -ENOLINK;
177 179  
178 180 skb_queue_tail(&q->sk.sk_receive_queue, skb);
179   - wake_up(q->sk.sk_sleep);
  181 + wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND);
180 182 return 0;
181 183 }
182 184  
... ... @@ -242,7 +244,7 @@
242 244 return;
243 245  
244 246 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
245   - wake_up_interruptible_sync(sk->sk_sleep);
  247 + wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND);
246 248 }
247 249  
248 250 static int macvtap_open(struct inode *inode, struct file *file)
... ... @@ -270,6 +272,8 @@
270 272 init_waitqueue_head(&q->sock.wait);
271 273 q->sock.type = SOCK_RAW;
272 274 q->sock.state = SS_CONNECTED;
  275 + q->sock.file = file;
  276 + q->sock.ops = &macvtap_socket_ops;
273 277 sock_init_data(&q->sock, &q->sk);
274 278 q->sk.sk_write_space = macvtap_sock_write_space;
275 279  
276 280  
277 281  
278 282  
279 283  
... ... @@ -387,33 +391,21 @@
387 391  
388 392 rcu_read_lock_bh();
389 393 vlan = rcu_dereference(q->vlan);
390   - macvlan_count_rx(vlan, len, ret == 0, 0);
  394 + if (vlan)
  395 + macvlan_count_rx(vlan, len, ret == 0, 0);
391 396 rcu_read_unlock_bh();
392 397  
393 398 return ret ? ret : len;
394 399 }
395 400  
396   -static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
397   - unsigned long count, loff_t pos)
  401 +static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
  402 + const struct iovec *iv, unsigned long len,
  403 + int noblock)
398 404 {
399   - struct file *file = iocb->ki_filp;
400   - struct macvtap_queue *q = file->private_data;
401   -
402 405 DECLARE_WAITQUEUE(wait, current);
403 406 struct sk_buff *skb;
404   - ssize_t len, ret = 0;
  407 + ssize_t ret = 0;
405 408  
406   - if (!q) {
407   - ret = -ENOLINK;
408   - goto out;
409   - }
410   -
411   - len = iov_length(iv, count);
412   - if (len < 0) {
413   - ret = -EINVAL;
414   - goto out;
415   - }
416   -
417 409 add_wait_queue(q->sk.sk_sleep, &wait);
418 410 while (len) {
419 411 current->state = TASK_INTERRUPTIBLE;
... ... @@ -421,7 +413,7 @@
421 413 /* Read frames from the queue */
422 414 skb = skb_dequeue(&q->sk.sk_receive_queue);
423 415 if (!skb) {
424   - if (file->f_flags & O_NONBLOCK) {
  416 + if (noblock) {
425 417 ret = -EAGAIN;
426 418 break;
427 419 }
428 420  
... ... @@ -440,7 +432,24 @@
440 432  
441 433 current->state = TASK_RUNNING;
442 434 remove_wait_queue(q->sk.sk_sleep, &wait);
  435 + return ret;
  436 +}
443 437  
  438 +static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
  439 + unsigned long count, loff_t pos)
  440 +{
  441 + struct file *file = iocb->ki_filp;
  442 + struct macvtap_queue *q = file->private_data;
  443 + ssize_t len, ret = 0;
  444 +
  445 + len = iov_length(iv, count);
  446 + if (len < 0) {
  447 + ret = -EINVAL;
  448 + goto out;
  449 + }
  450 +
  451 + ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
  452 + ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
444 453 out:
445 454 return ret;
446 455 }
... ... @@ -537,6 +546,53 @@
537 546 .compat_ioctl = macvtap_compat_ioctl,
538 547 #endif
539 548 };
  549 +
  550 +static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
  551 + struct msghdr *m, size_t total_len)
  552 +{
  553 + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
  554 + return macvtap_get_user(q, m->msg_iov, total_len,
  555 + m->msg_flags & MSG_DONTWAIT);
  556 +}
  557 +
  558 +static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
  559 + struct msghdr *m, size_t total_len,
  560 + int flags)
  561 +{
  562 + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
  563 + int ret;
  564 + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
  565 + return -EINVAL;
  566 + ret = macvtap_do_read(q, iocb, m->msg_iov, total_len,
  567 + flags & MSG_DONTWAIT);
  568 + if (ret > total_len) {
  569 + m->msg_flags |= MSG_TRUNC;
  570 + ret = flags & MSG_TRUNC ? ret : total_len;
  571 + }
  572 + return ret;
  573 +}
  574 +
  575 +/* Ops structure to mimic raw sockets with tun */
  576 +static const struct proto_ops macvtap_socket_ops = {
  577 + .sendmsg = macvtap_sendmsg,
  578 + .recvmsg = macvtap_recvmsg,
  579 +};
  580 +
  581 +/* Get an underlying socket object from tun file. Returns error unless file is
  582 + * attached to a device. The returned object works like a packet socket, it
  583 + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
  584 + * holding a reference to the file for as long as the socket is in use. */
  585 +struct socket *macvtap_get_socket(struct file *file)
  586 +{
  587 + struct macvtap_queue *q;
  588 + if (file->f_op != &macvtap_fops)
  589 + return ERR_PTR(-EINVAL);
  590 + q = file->private_data;
  591 + if (!q)
  592 + return ERR_PTR(-EBADFD);
  593 + return &q->sock;
  594 +}
  595 +EXPORT_SYMBOL_GPL(macvtap_get_socket);
540 596  
541 597 static int macvtap_init(void)
542 598 {
drivers/vhost/Kconfig
1 1 config VHOST_NET
2 2 tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)"
3   - depends on NET && EVENTFD && (TUN || !TUN) && EXPERIMENTAL
  3 + depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) && EXPERIMENTAL
4 4 ---help---
5 5 This kernel module can be loaded in host kernel to accelerate
6 6 guest networking with virtio_net. Not to be confused with virtio_net
... ... @@ -22,6 +22,7 @@
22 22 #include <linux/if_packet.h>
23 23 #include <linux/if_arp.h>
24 24 #include <linux/if_tun.h>
  25 +#include <linux/if_macvlan.h>
25 26  
26 27 #include <net/sock.h>
27 28  
28 29  
... ... @@ -452,13 +453,16 @@
452 453 return ERR_PTR(r);
453 454 }
454 455  
455   -static struct socket *get_tun_socket(int fd)
  456 +static struct socket *get_tap_socket(int fd)
456 457 {
457 458 struct file *file = fget(fd);
458 459 struct socket *sock;
459 460 if (!file)
460 461 return ERR_PTR(-EBADF);
461 462 sock = tun_get_socket(file);
  463 + if (!IS_ERR(sock))
  464 + return sock;
  465 + sock = macvtap_get_socket(file);
462 466 if (IS_ERR(sock))
463 467 fput(file);
464 468 return sock;
... ... @@ -473,7 +477,7 @@
473 477 sock = get_raw_socket(fd);
474 478 if (!IS_ERR(sock))
475 479 return sock;
476   - sock = get_tun_socket(fd);
  480 + sock = get_tap_socket(fd);
477 481 if (!IS_ERR(sock))
478 482 return sock;
479 483 return ERR_PTR(-ENOTSOCK);
include/linux/if_macvlan.h
... ... @@ -7,6 +7,19 @@
7 7 #include <linux/netlink.h>
8 8 #include <net/netlink.h>
9 9  
  10 +#if defined(CONFIG_MACVTAP) || defined(CONFIG_MACVTAP_MODULE)
  11 +struct socket *macvtap_get_socket(struct file *);
  12 +#else
  13 +#include <linux/err.h>
  14 +#include <linux/errno.h>
  15 +struct file;
  16 +struct socket;
  17 +static inline struct socket *macvtap_get_socket(struct file *f)
  18 +{
  19 + return ERR_PTR(-EINVAL);
  20 +}
  21 +#endif /* CONFIG_MACVTAP */
  22 +
10 23 struct macvlan_port;
11 24 struct macvtap_queue;
12 25