Commit 70181d51209cbcdf9ce2171eac3f3458281d2947

Authored by Jason Wang
Committed by David S. Miller
1 parent 16e3d9648a

vhost_net: remove tx polling state

After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
errors when setting backend), we in fact track the polling state through
poll->wqh, so there's no need to duplicate the work with an extra
vhost_net_polling_state. So this patch removes this and make the code simpler.

This patch also removes the all tx starting/stopping code in tx path according
to Michael's suggestion.

Netperf test shows almost the same result in stream test, but gets improvements
on TCP_RR tests (both zerocopy or copy) especially on low load cases.

Tested between multiqueue kvm guest and external host with two direct
connected 82599s.

zerocopy disabled:

sessions|transaction rates|normalize|
before/after/+improvements
1 | 9510.24/11727.29/+23.3%    | 693.54/887.68/+28.0%   |
25| 192931.50/241729.87/+25.3% | 2376.80/2771.70/+16.6% |
50| 277634.64/291905.76/+5%    | 3118.36/3230.11/+3.6%  |

zerocopy enabled:

sessions|transaction rates|normalize|
before/after/+improvements
1 | 7318.33/11929.76/+63.0%    | 521.86/843.30/+61.6%   |
25| 167264.88/242422.15/+44.9% | 2181.60/2788.16/+27.8% |
50| 272181.02/294347.04/+8.1%  | 3071.56/3257.85/+6.1%  |

Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 2 changed files with 9 additions and 68 deletions Side-by-side Diff

... ... @@ -64,20 +64,10 @@
64 64 VHOST_NET_VQ_MAX = 2,
65 65 };
66 66  
67   -enum vhost_net_poll_state {
68   - VHOST_NET_POLL_DISABLED = 0,
69   - VHOST_NET_POLL_STARTED = 1,
70   - VHOST_NET_POLL_STOPPED = 2,
71   -};
72   -
73 67 struct vhost_net {
74 68 struct vhost_dev dev;
75 69 struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
76 70 struct vhost_poll poll[VHOST_NET_VQ_MAX];
77   - /* Tells us whether we are polling a socket for TX.
78   - * We only do this when socket buffer fills up.
79   - * Protected by tx vq lock. */
80   - enum vhost_net_poll_state tx_poll_state;
81 71 /* Number of TX recently submitted.
82 72 * Protected by tx vq lock. */
83 73 unsigned tx_packets;
... ... @@ -155,28 +145,6 @@
155 145 }
156 146 }
157 147  
158   -/* Caller must have TX VQ lock */
159   -static void tx_poll_stop(struct vhost_net *net)
160   -{
161   - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
162   - return;
163   - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
164   - net->tx_poll_state = VHOST_NET_POLL_STOPPED;
165   -}
166   -
167   -/* Caller must have TX VQ lock */
168   -static int tx_poll_start(struct vhost_net *net, struct socket *sock)
169   -{
170   - int ret;
171   -
172   - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
173   - return 0;
174   - ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
175   - if (!ret)
176   - net->tx_poll_state = VHOST_NET_POLL_STARTED;
177   - return ret;
178   -}
179   -
180 148 /* In case of DMA done not in order in lower device driver for some reason.
181 149 * upend_idx is used to track end of used idx, done_idx is used to track head
182 150 * of used idx. Once lower device DMA done contiguously, we will signal KVM
... ... @@ -242,7 +210,7 @@
242 210 .msg_flags = MSG_DONTWAIT,
243 211 };
244 212 size_t len, total_len = 0;
245   - int err, wmem;
  213 + int err;
246 214 size_t hdr_size;
247 215 struct socket *sock;
248 216 struct vhost_ubuf_ref *uninitialized_var(ubufs);
249 217  
... ... @@ -253,19 +221,9 @@
253 221 if (!sock)
254 222 return;
255 223  
256   - wmem = atomic_read(&sock->sk->sk_wmem_alloc);
257   - if (wmem >= sock->sk->sk_sndbuf) {
258   - mutex_lock(&vq->mutex);
259   - tx_poll_start(net, sock);
260   - mutex_unlock(&vq->mutex);
261   - return;
262   - }
263   -
264 224 mutex_lock(&vq->mutex);
265 225 vhost_disable_notify(&net->dev, vq);
266 226  
267   - if (wmem < sock->sk->sk_sndbuf / 2)
268   - tx_poll_stop(net);
269 227 hdr_size = vq->vhost_hlen;
270 228 zcopy = vq->ubufs;
271 229  
272 230  
273 231  
... ... @@ -285,23 +243,14 @@
285 243 if (head == vq->num) {
286 244 int num_pends;
287 245  
288   - wmem = atomic_read(&sock->sk->sk_wmem_alloc);
289   - if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
290   - tx_poll_start(net, sock);
291   - set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
292   - break;
293   - }
294 246 /* If more outstanding DMAs, queue the work.
295 247 * Handle upend_idx wrap around
296 248 */
297 249 num_pends = likely(vq->upend_idx >= vq->done_idx) ?
298 250 (vq->upend_idx - vq->done_idx) :
299 251 (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
300   - if (unlikely(num_pends > VHOST_MAX_PEND)) {
301   - tx_poll_start(net, sock);
302   - set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
  252 + if (unlikely(num_pends > VHOST_MAX_PEND))
303 253 break;
304   - }
305 254 if (unlikely(vhost_enable_notify(&net->dev, vq))) {
306 255 vhost_disable_notify(&net->dev, vq);
307 256 continue;
... ... @@ -364,8 +313,6 @@
364 313 UIO_MAXIOV;
365 314 }
366 315 vhost_discard_vq_desc(vq, 1);
367   - if (err == -EAGAIN || err == -ENOBUFS)
368   - tx_poll_start(net, sock);
369 316 break;
370 317 }
371 318 if (err != len)
... ... @@ -628,7 +575,6 @@
628 575  
629 576 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
630 577 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
631   - n->tx_poll_state = VHOST_NET_POLL_DISABLED;
632 578  
633 579 f->private_data = n;
634 580  
635 581  
636 582  
637 583  
638 584  
639 585  
... ... @@ -638,32 +584,24 @@
638 584 static void vhost_net_disable_vq(struct vhost_net *n,
639 585 struct vhost_virtqueue *vq)
640 586 {
  587 + struct vhost_poll *poll = n->poll + (vq - n->vqs);
641 588 if (!vq->private_data)
642 589 return;
643   - if (vq == n->vqs + VHOST_NET_VQ_TX) {
644   - tx_poll_stop(n);
645   - n->tx_poll_state = VHOST_NET_POLL_DISABLED;
646   - } else
647   - vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
  590 + vhost_poll_stop(poll);
648 591 }
649 592  
650 593 static int vhost_net_enable_vq(struct vhost_net *n,
651 594 struct vhost_virtqueue *vq)
652 595 {
  596 + struct vhost_poll *poll = n->poll + (vq - n->vqs);
653 597 struct socket *sock;
654   - int ret;
655 598  
656 599 sock = rcu_dereference_protected(vq->private_data,
657 600 lockdep_is_held(&vq->mutex));
658 601 if (!sock)
659 602 return 0;
660   - if (vq == n->vqs + VHOST_NET_VQ_TX) {
661   - n->tx_poll_state = VHOST_NET_POLL_STOPPED;
662   - ret = tx_poll_start(n, sock);
663   - } else
664   - ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
665 603  
666   - return ret;
  604 + return vhost_poll_start(poll, sock->file);
667 605 }
668 606  
669 607 static struct socket *vhost_net_stop_vq(struct vhost_net *n,
drivers/vhost/vhost.c
... ... @@ -89,6 +89,9 @@
89 89 unsigned long mask;
90 90 int ret = 0;
91 91  
  92 + if (poll->wqh)
  93 + return 0;
  94 +
92 95 mask = file->f_op->poll(file, &poll->table);
93 96 if (mask)
94 97 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);