Commit d9d52b5178af586d679c1052fb161ee05ea2e83f

Authored by Michael S. Tsirkin
1 parent 7ef527377b

tun: add ioctl to modify vnet header size

virtio added mergeable buffers mode where 2 bytes of extra info is put
after vnet header but before actual data (tun does not need this data).
In hindsight, it would have been better to add the new info *before* the
packet: as it is, users need a lot of tricky code to skip the extra 2
bytes in the middle of the iovec, and in fact applications seem to get
it wrong, and only work with specific iovec layout.  The fact we might
need to split iovec also means we might in theory overflow iovec max
size.

This patch adds a simpler way for applications to handle this,
and future proofs the interface against further extensions,
by making the size of the virtio net header configurable
from userspace. As a result, tun driver will simply
skip the extra 2 bytes on both input and output.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: David S. Miller <davem@davemloft.net>

Showing 2 changed files with 30 additions and 4 deletions Side-by-side Diff

... ... @@ -110,6 +110,9 @@
110 110 struct tap_filter txflt;
111 111 struct socket socket;
112 112 struct socket_wq wq;
  113 +
  114 + int vnet_hdr_sz;
  115 +
113 116 #ifdef TUN_DEBUG
114 117 int debug;
115 118 #endif
... ... @@ -563,7 +566,7 @@
563 566 }
564 567  
565 568 if (tun->flags & TUN_VNET_HDR) {
566   - if ((len -= sizeof(gso)) > count)
  569 + if ((len -= tun->vnet_hdr_sz) > count)
567 570 return -EINVAL;
568 571  
569 572 if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
... ... @@ -575,7 +578,7 @@
575 578  
576 579 if (gso.hdr_len > len)
577 580 return -EINVAL;
578   - offset += sizeof(gso);
  581 + offset += tun->vnet_hdr_sz;
579 582 }
580 583  
581 584 if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
... ... @@ -718,7 +721,7 @@
718 721  
719 722 if (tun->flags & TUN_VNET_HDR) {
720 723 struct virtio_net_hdr gso = { 0 }; /* no info leak */
721   - if ((len -= sizeof(gso)) < 0)
  724 + if ((len -= tun->vnet_hdr_sz) < 0)
722 725 return -EINVAL;
723 726  
724 727 if (skb_is_gso(skb)) {
... ... @@ -749,7 +752,7 @@
749 752 if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total,
750 753 sizeof(gso))))
751 754 return -EFAULT;
752   - total += sizeof(gso);
  755 + total += tun->vnet_hdr_sz;
753 756 }
754 757  
755 758 len = min_t(int, skb->len, len);
... ... @@ -1035,6 +1038,7 @@
1035 1038 tun->dev = dev;
1036 1039 tun->flags = flags;
1037 1040 tun->txflt.count = 0;
  1041 + tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
1038 1042  
1039 1043 err = -ENOMEM;
1040 1044 sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
... ... @@ -1177,6 +1181,7 @@
1177 1181 struct sock_fprog fprog;
1178 1182 struct ifreq ifr;
1179 1183 int sndbuf;
  1184 + int vnet_hdr_sz;
1180 1185 int ret;
1181 1186  
1182 1187 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
... ... @@ -1320,6 +1325,25 @@
1320 1325 }
1321 1326  
1322 1327 tun->socket.sk->sk_sndbuf = sndbuf;
  1328 + break;
  1329 +
  1330 + case TUNGETVNETHDRSZ:
  1331 + vnet_hdr_sz = tun->vnet_hdr_sz;
  1332 + if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
  1333 + ret = -EFAULT;
  1334 + break;
  1335 +
  1336 + case TUNSETVNETHDRSZ:
  1337 + if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) {
  1338 + ret = -EFAULT;
  1339 + break;
  1340 + }
  1341 + if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) {
  1342 + ret = -EINVAL;
  1343 + break;
  1344 + }
  1345 +
  1346 + tun->vnet_hdr_sz = vnet_hdr_sz;
1323 1347 break;
1324 1348  
1325 1349 case TUNATTACHFILTER:
include/linux/if_tun.h
... ... @@ -51,6 +51,8 @@
51 51 #define TUNSETSNDBUF _IOW('T', 212, int)
52 52 #define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog)
53 53 #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
  54 +#define TUNGETVNETHDRSZ _IOR('T', 215, int)
  55 +#define TUNSETVNETHDRSZ _IOW('T', 216, int)
54 56  
55 57 /* TUNSETIFF ifr flags */
56 58 #define IFF_TUN 0x0001