Commit 885291761dba2bfe04df4c0f7bb75e4c920ab82e

Authored by Jason Wang
Committed by David S. Miller
1 parent 87f40dd6ce

tuntap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS

We try to linearize part of the skb when the number of iov is greater than
MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than
one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest
network.

Solve this problem by calculate the pages needed for iov before trying to do
zerocopy and switch to use copy instead of zerocopy if it needs more than
MAX_SKB_FRAGS.

This is done through introducing a new helper to count the pages for iov, and
call uarg->callback() manually when switching from zerocopy to copy to notify
vhost.

We can do further optimization on top.

The bug were introduced from commit 0690899b4d4501b3505be069b9a687e68ccbe15b
(tun: experimental zero copy tx support)

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 38 additions and 24 deletions Side-by-side Diff

... ... @@ -1035,6 +1035,29 @@
1035 1035 return 0;
1036 1036 }
1037 1037  
  1038 +static unsigned long iov_pages(const struct iovec *iv, int offset,
  1039 + unsigned long nr_segs)
  1040 +{
  1041 + unsigned long seg, base;
  1042 + int pages = 0, len, size;
  1043 +
  1044 + while (nr_segs && (offset >= iv->iov_len)) {
  1045 + offset -= iv->iov_len;
  1046 + ++iv;
  1047 + --nr_segs;
  1048 + }
  1049 +
  1050 + for (seg = 0; seg < nr_segs; seg++) {
  1051 + base = (unsigned long)iv[seg].iov_base + offset;
  1052 + len = iv[seg].iov_len - offset;
  1053 + size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
  1054 + pages += size;
  1055 + offset = 0;
  1056 + }
  1057 +
  1058 + return pages;
  1059 +}
  1060 +
1038 1061 /* Get packet from user space buffer */
1039 1062 static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1040 1063 void *msg_control, const struct iovec *iv,
1041 1064  
1042 1065  
... ... @@ -1082,32 +1105,18 @@
1082 1105 return -EINVAL;
1083 1106 }
1084 1107  
1085   - if (msg_control)
1086   - zerocopy = true;
1087   -
1088   - if (zerocopy) {
1089   - /* Userspace may produce vectors with count greater than
1090   - * MAX_SKB_FRAGS, so we need to linearize parts of the skb
1091   - * to let the rest of data to be fit in the frags.
1092   - */
1093   - if (count > MAX_SKB_FRAGS) {
1094   - copylen = iov_length(iv, count - MAX_SKB_FRAGS);
1095   - if (copylen < offset)
1096   - copylen = 0;
1097   - else
1098   - copylen -= offset;
1099   - } else
1100   - copylen = 0;
1101   - /* There are 256 bytes to be copied in skb, so there is enough
1102   - * room for skb expand head in case it is used.
  1108 + if (msg_control) {
  1109 + /* There are 256 bytes to be copied in skb, so there is
  1110 + * enough room for skb expand head in case it is used.
1103 1111 * The rest of the buffer is mapped from userspace.
1104 1112 */
1105   - if (copylen < gso.hdr_len)
1106   - copylen = gso.hdr_len;
1107   - if (!copylen)
1108   - copylen = GOODCOPY_LEN;
  1113 + copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN;
1109 1114 linear = copylen;
1110   - } else {
  1115 + if (iov_pages(iv, offset + copylen, count) <= MAX_SKB_FRAGS)
  1116 + zerocopy = true;
  1117 + }
  1118 +
  1119 + if (!zerocopy) {
1111 1120 copylen = len;
1112 1121 linear = gso.hdr_len;
1113 1122 }
1114 1123  
... ... @@ -1121,8 +1130,13 @@
1121 1130  
1122 1131 if (zerocopy)
1123 1132 err = zerocopy_sg_from_iovec(skb, iv, offset, count);
1124   - else
  1133 + else {
1125 1134 err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len);
  1135 + if (!err && msg_control) {
  1136 + struct ubuf_info *uarg = msg_control;
  1137 + uarg->callback(uarg, false);
  1138 + }
  1139 + }
1126 1140  
1127 1141 if (err) {
1128 1142 tun->dev->stats.rx_dropped++;