Commit 501c774cb13c3ef8fb7fc5f08fa19473f7d9a0db
Committed by
David S. Miller
1 parent
02df55d28c
Exists in
master
and in
20 other branches
net/macvtap: add vhost support
This adds support for passing a macvtap file descriptor into vhost-net, much like we already do for tun/tap. Most of the new code is taken from the respective patch in the tun driver and may get consolidated in the future. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Sridhar Samudrala <sri@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 4 changed files with 97 additions and 24 deletions Side-by-side Diff
drivers/net/macvtap.c
... | ... | @@ -58,6 +58,8 @@ |
58 | 58 | static struct class *macvtap_class; |
59 | 59 | static struct cdev macvtap_cdev; |
60 | 60 | |
61 | +static const struct proto_ops macvtap_socket_ops; | |
62 | + | |
61 | 63 | /* |
62 | 64 | * RCU usage: |
63 | 65 | * The macvtap_queue and the macvlan_dev are loosely coupled, the |
... | ... | @@ -176,7 +178,7 @@ |
176 | 178 | return -ENOLINK; |
177 | 179 | |
178 | 180 | skb_queue_tail(&q->sk.sk_receive_queue, skb); |
179 | - wake_up(q->sk.sk_sleep); | |
181 | + wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); | |
180 | 182 | return 0; |
181 | 183 | } |
182 | 184 | |
... | ... | @@ -242,7 +244,7 @@ |
242 | 244 | return; |
243 | 245 | |
244 | 246 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
245 | - wake_up_interruptible_sync(sk->sk_sleep); | |
247 | + wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); | |
246 | 248 | } |
247 | 249 | |
248 | 250 | static int macvtap_open(struct inode *inode, struct file *file) |
... | ... | @@ -270,6 +272,8 @@ |
270 | 272 | init_waitqueue_head(&q->sock.wait); |
271 | 273 | q->sock.type = SOCK_RAW; |
272 | 274 | q->sock.state = SS_CONNECTED; |
275 | + q->sock.file = file; | |
276 | + q->sock.ops = &macvtap_socket_ops; | |
273 | 277 | sock_init_data(&q->sock, &q->sk); |
274 | 278 | q->sk.sk_write_space = macvtap_sock_write_space; |
275 | 279 | |
276 | 280 | |
277 | 281 | |
278 | 282 | |
279 | 283 | |
... | ... | @@ -387,33 +391,21 @@ |
387 | 391 | |
388 | 392 | rcu_read_lock_bh(); |
389 | 393 | vlan = rcu_dereference(q->vlan); |
390 | - macvlan_count_rx(vlan, len, ret == 0, 0); | |
394 | + if (vlan) | |
395 | + macvlan_count_rx(vlan, len, ret == 0, 0); | |
391 | 396 | rcu_read_unlock_bh(); |
392 | 397 | |
393 | 398 | return ret ? ret : len; |
394 | 399 | } |
395 | 400 | |
396 | -static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, | |
397 | - unsigned long count, loff_t pos) | |
401 | +static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, | |
402 | + const struct iovec *iv, unsigned long len, | |
403 | + int noblock) | |
398 | 404 | { |
399 | - struct file *file = iocb->ki_filp; | |
400 | - struct macvtap_queue *q = file->private_data; | |
401 | - | |
402 | 405 | DECLARE_WAITQUEUE(wait, current); |
403 | 406 | struct sk_buff *skb; |
404 | - ssize_t len, ret = 0; | |
407 | + ssize_t ret = 0; | |
405 | 408 | |
406 | - if (!q) { | |
407 | - ret = -ENOLINK; | |
408 | - goto out; | |
409 | - } | |
410 | - | |
411 | - len = iov_length(iv, count); | |
412 | - if (len < 0) { | |
413 | - ret = -EINVAL; | |
414 | - goto out; | |
415 | - } | |
416 | - | |
417 | 409 | add_wait_queue(q->sk.sk_sleep, &wait); |
418 | 410 | while (len) { |
419 | 411 | current->state = TASK_INTERRUPTIBLE; |
... | ... | @@ -421,7 +413,7 @@ |
421 | 413 | /* Read frames from the queue */ |
422 | 414 | skb = skb_dequeue(&q->sk.sk_receive_queue); |
423 | 415 | if (!skb) { |
424 | - if (file->f_flags & O_NONBLOCK) { | |
416 | + if (noblock) { | |
425 | 417 | ret = -EAGAIN; |
426 | 418 | break; |
427 | 419 | } |
428 | 420 | |
... | ... | @@ -440,7 +432,24 @@ |
440 | 432 | |
441 | 433 | current->state = TASK_RUNNING; |
442 | 434 | remove_wait_queue(q->sk.sk_sleep, &wait); |
435 | + return ret; | |
436 | +} | |
443 | 437 | |
438 | +static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, | |
439 | + unsigned long count, loff_t pos) | |
440 | +{ | |
441 | + struct file *file = iocb->ki_filp; | |
442 | + struct macvtap_queue *q = file->private_data; | |
443 | + ssize_t len, ret = 0; | |
444 | + | |
445 | + len = iov_length(iv, count); | |
446 | + if (len < 0) { | |
447 | + ret = -EINVAL; | |
448 | + goto out; | |
449 | + } | |
450 | + | |
451 | + ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); | |
452 | + ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ | |
444 | 453 | out: |
445 | 454 | return ret; |
446 | 455 | } |
... | ... | @@ -537,6 +546,53 @@ |
537 | 546 | .compat_ioctl = macvtap_compat_ioctl, |
538 | 547 | #endif |
539 | 548 | }; |
549 | + | |
550 | +static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, | |
551 | + struct msghdr *m, size_t total_len) | |
552 | +{ | |
553 | + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); | |
554 | + return macvtap_get_user(q, m->msg_iov, total_len, | |
555 | + m->msg_flags & MSG_DONTWAIT); | |
556 | +} | |
557 | + | |
558 | +static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, | |
559 | + struct msghdr *m, size_t total_len, | |
560 | + int flags) | |
561 | +{ | |
562 | + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); | |
563 | + int ret; | |
564 | + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) | |
565 | + return -EINVAL; | |
566 | + ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, | |
567 | + flags & MSG_DONTWAIT); | |
568 | + if (ret > total_len) { | |
569 | + m->msg_flags |= MSG_TRUNC; | |
570 | + ret = flags & MSG_TRUNC ? ret : total_len; | |
571 | + } | |
572 | + return ret; | |
573 | +} | |
574 | + | |
575 | +/* Ops structure to mimic raw sockets with tun */ | |
576 | +static const struct proto_ops macvtap_socket_ops = { | |
577 | + .sendmsg = macvtap_sendmsg, | |
578 | + .recvmsg = macvtap_recvmsg, | |
579 | +}; | |
580 | + | |
581 | +/* Get an underlying socket object from tun file. Returns error unless file is | |
582 | + * attached to a device. The returned object works like a packet socket, it | |
583 | + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for | |
584 | + * holding a reference to the file for as long as the socket is in use. */ | |
585 | +struct socket *macvtap_get_socket(struct file *file) | |
586 | +{ | |
587 | + struct macvtap_queue *q; | |
588 | + if (file->f_op != &macvtap_fops) | |
589 | + return ERR_PTR(-EINVAL); | |
590 | + q = file->private_data; | |
591 | + if (!q) | |
592 | + return ERR_PTR(-EBADFD); | |
593 | + return &q->sock; | |
594 | +} | |
595 | +EXPORT_SYMBOL_GPL(macvtap_get_socket); | |
540 | 596 | |
541 | 597 | static int macvtap_init(void) |
542 | 598 | { |
drivers/vhost/Kconfig
1 | 1 | config VHOST_NET |
2 | 2 | tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)" |
3 | - depends on NET && EVENTFD && (TUN || !TUN) && EXPERIMENTAL | |
3 | + depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) && EXPERIMENTAL | |
4 | 4 | ---help--- |
5 | 5 | This kernel module can be loaded in host kernel to accelerate |
6 | 6 | guest networking with virtio_net. Not to be confused with virtio_net |
drivers/vhost/net.c
... | ... | @@ -22,6 +22,7 @@ |
22 | 22 | #include <linux/if_packet.h> |
23 | 23 | #include <linux/if_arp.h> |
24 | 24 | #include <linux/if_tun.h> |
25 | +#include <linux/if_macvlan.h> | |
25 | 26 | |
26 | 27 | #include <net/sock.h> |
27 | 28 | |
28 | 29 | |
... | ... | @@ -452,13 +453,16 @@ |
452 | 453 | return ERR_PTR(r); |
453 | 454 | } |
454 | 455 | |
455 | -static struct socket *get_tun_socket(int fd) | |
456 | +static struct socket *get_tap_socket(int fd) | |
456 | 457 | { |
457 | 458 | struct file *file = fget(fd); |
458 | 459 | struct socket *sock; |
459 | 460 | if (!file) |
460 | 461 | return ERR_PTR(-EBADF); |
461 | 462 | sock = tun_get_socket(file); |
463 | + if (!IS_ERR(sock)) | |
464 | + return sock; | |
465 | + sock = macvtap_get_socket(file); | |
462 | 466 | if (IS_ERR(sock)) |
463 | 467 | fput(file); |
464 | 468 | return sock; |
... | ... | @@ -473,7 +477,7 @@ |
473 | 477 | sock = get_raw_socket(fd); |
474 | 478 | if (!IS_ERR(sock)) |
475 | 479 | return sock; |
476 | - sock = get_tun_socket(fd); | |
480 | + sock = get_tap_socket(fd); | |
477 | 481 | if (!IS_ERR(sock)) |
478 | 482 | return sock; |
479 | 483 | return ERR_PTR(-ENOTSOCK); |
include/linux/if_macvlan.h
... | ... | @@ -7,6 +7,19 @@ |
7 | 7 | #include <linux/netlink.h> |
8 | 8 | #include <net/netlink.h> |
9 | 9 | |
10 | +#if defined(CONFIG_MACVTAP) || defined(CONFIG_MACVTAP_MODULE) | |
11 | +struct socket *macvtap_get_socket(struct file *); | |
12 | +#else | |
13 | +#include <linux/err.h> | |
14 | +#include <linux/errno.h> | |
15 | +struct file; | |
16 | +struct socket; | |
17 | +static inline struct socket *macvtap_get_socket(struct file *f) | |
18 | +{ | |
19 | + return ERR_PTR(-EINVAL); | |
20 | +} | |
21 | +#endif /* CONFIG_MACVTAP */ | |
22 | + | |
10 | 23 | struct macvlan_port; |
11 | 24 | struct macvtap_queue; |
12 | 25 |