Commit 2839400f8fe28ce216eeeba3fb97bdf90977f7ad

Authored by Asias He
Committed by Michael S. Tsirkin
1 parent f2f0173d6a

vhost: move vhost-net zerocopy fields to net.c

On top of 'vhost: Allow device specific fields per vq', we can move device
specific fields to device virt queue from vhost virt queue.

Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

Showing 3 changed files with 142 additions and 101 deletions Side-by-side Diff

... ... @@ -64,8 +64,24 @@
64 64 VHOST_NET_VQ_MAX = 2,
65 65 };
66 66  
  67 +struct vhost_ubuf_ref {
  68 + struct kref kref;
  69 + wait_queue_head_t wait;
  70 + struct vhost_virtqueue *vq;
  71 +};
  72 +
67 73 struct vhost_net_virtqueue {
68 74 struct vhost_virtqueue vq;
  75 + /* vhost zerocopy support fields below: */
  76 + /* last used idx for outstanding DMA zerocopy buffers */
  77 + int upend_idx;
  78 + /* first used idx for DMA done zerocopy buffers */
  79 + int done_idx;
  80 + /* an array of userspace buffers info */
  81 + struct ubuf_info *ubuf_info;
  82 + /* Reference counting for outstanding ubufs.
  83 + * Protected by vq mutex. Writers must also take device mutex. */
  84 + struct vhost_ubuf_ref *ubufs;
69 85 };
70 86  
71 87 struct vhost_net {
... ... @@ -82,6 +98,88 @@
82 98 bool tx_flush;
83 99 };
84 100  
  101 +static unsigned vhost_zcopy_mask __read_mostly;
  102 +
  103 +void vhost_enable_zcopy(int vq)
  104 +{
  105 + vhost_zcopy_mask |= 0x1 << vq;
  106 +}
  107 +
  108 +static void vhost_zerocopy_done_signal(struct kref *kref)
  109 +{
  110 + struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
  111 + kref);
  112 + wake_up(&ubufs->wait);
  113 +}
  114 +
  115 +struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
  116 + bool zcopy)
  117 +{
  118 + struct vhost_ubuf_ref *ubufs;
  119 + /* No zero copy backend? Nothing to count. */
  120 + if (!zcopy)
  121 + return NULL;
  122 + ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL);
  123 + if (!ubufs)
  124 + return ERR_PTR(-ENOMEM);
  125 + kref_init(&ubufs->kref);
  126 + init_waitqueue_head(&ubufs->wait);
  127 + ubufs->vq = vq;
  128 + return ubufs;
  129 +}
  130 +
  131 +void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
  132 +{
  133 + kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
  134 +}
  135 +
  136 +void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
  137 +{
  138 + kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
  139 + wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
  140 + kfree(ubufs);
  141 +}
  142 +
  143 +int vhost_net_set_ubuf_info(struct vhost_net *n)
  144 +{
  145 + bool zcopy;
  146 + int i;
  147 +
  148 + for (i = 0; i < n->dev.nvqs; ++i) {
  149 + zcopy = vhost_zcopy_mask & (0x1 << i);
  150 + if (!zcopy)
  151 + continue;
  152 + n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) *
  153 + UIO_MAXIOV, GFP_KERNEL);
  154 + if (!n->vqs[i].ubuf_info)
  155 + goto err;
  156 + }
  157 + return 0;
  158 +
  159 +err:
  160 + while (i--) {
  161 + zcopy = vhost_zcopy_mask & (0x1 << i);
  162 + if (!zcopy)
  163 + continue;
  164 + kfree(n->vqs[i].ubuf_info);
  165 + }
  166 + return -ENOMEM;
  167 +}
  168 +
  169 +void vhost_net_reset_ubuf_info(struct vhost_net *n)
  170 +{
  171 + int i;
  172 +
  173 + for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
  174 + n->vqs[i].done_idx = 0;
  175 + n->vqs[i].upend_idx = 0;
  176 + n->vqs[i].ubufs = NULL;
  177 + kfree(n->vqs[i].ubuf_info);
  178 + n->vqs[i].ubuf_info = NULL;
  179 + }
  180 +
  181 +}
  182 +
85 183 static void vhost_net_tx_packet(struct vhost_net *net)
86 184 {
87 185 ++net->tx_packets;
88 186  
... ... @@ -157,10 +255,12 @@
157 255 static int vhost_zerocopy_signal_used(struct vhost_net *net,
158 256 struct vhost_virtqueue *vq)
159 257 {
  258 + struct vhost_net_virtqueue *nvq =
  259 + container_of(vq, struct vhost_net_virtqueue, vq);
160 260 int i;
161 261 int j = 0;
162 262  
163   - for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
  263 + for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
164 264 if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
165 265 vhost_net_tx_err(net);
166 266 if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
... ... @@ -172,7 +272,7 @@
172 272 break;
173 273 }
174 274 if (j)
175   - vq->done_idx = i;
  275 + nvq->done_idx = i;
176 276 return j;
177 277 }
178 278  
... ... @@ -203,6 +303,7 @@
203 303 static void handle_tx(struct vhost_net *net)
204 304 {
205 305 struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_TX].vq;
  306 + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
206 307 unsigned out, in, s;
207 308 int head;
208 309 struct msghdr msg = {
... ... @@ -229,7 +330,7 @@
229 330 vhost_disable_notify(&net->dev, vq);
230 331  
231 332 hdr_size = vq->vhost_hlen;
232   - zcopy = vq->ubufs;
  333 + zcopy = nvq->ubufs;
233 334  
234 335 for (;;) {
235 336 /* Release DMAs done buffers first */
... ... @@ -250,9 +351,10 @@
250 351 /* If more outstanding DMAs, queue the work.
251 352 * Handle upend_idx wrap around
252 353 */
253   - num_pends = likely(vq->upend_idx >= vq->done_idx) ?
254   - (vq->upend_idx - vq->done_idx) :
255   - (vq->upend_idx + UIO_MAXIOV - vq->done_idx);
  354 + num_pends = likely(nvq->upend_idx >= nvq->done_idx) ?
  355 + (nvq->upend_idx - nvq->done_idx) :
  356 + (nvq->upend_idx + UIO_MAXIOV -
  357 + nvq->done_idx);
256 358 if (unlikely(num_pends > VHOST_MAX_PEND))
257 359 break;
258 360 if (unlikely(vhost_enable_notify(&net->dev, vq))) {
259 361  
260 362  
261 363  
262 364  
263 365  
264 366  
265 367  
... ... @@ -278,34 +380,34 @@
278 380 break;
279 381 }
280 382 zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN ||
281   - vq->upend_idx != vq->done_idx);
  383 + nvq->upend_idx != nvq->done_idx);
282 384  
283 385 /* use msg_control to pass vhost zerocopy ubuf info to skb */
284 386 if (zcopy_used) {
285   - vq->heads[vq->upend_idx].id = head;
  387 + vq->heads[nvq->upend_idx].id = head;
286 388 if (!vhost_net_tx_select_zcopy(net) ||
287 389 len < VHOST_GOODCOPY_LEN) {
288 390 /* copy don't need to wait for DMA done */
289   - vq->heads[vq->upend_idx].len =
  391 + vq->heads[nvq->upend_idx].len =
290 392 VHOST_DMA_DONE_LEN;
291 393 msg.msg_control = NULL;
292 394 msg.msg_controllen = 0;
293 395 ubufs = NULL;
294 396 } else {
295 397 struct ubuf_info *ubuf;
296   - ubuf = vq->ubuf_info + vq->upend_idx;
  398 + ubuf = nvq->ubuf_info + nvq->upend_idx;
297 399  
298   - vq->heads[vq->upend_idx].len =
  400 + vq->heads[nvq->upend_idx].len =
299 401 VHOST_DMA_IN_PROGRESS;
300 402 ubuf->callback = vhost_zerocopy_callback;
301   - ubuf->ctx = vq->ubufs;
302   - ubuf->desc = vq->upend_idx;
  403 + ubuf->ctx = nvq->ubufs;
  404 + ubuf->desc = nvq->upend_idx;
303 405 msg.msg_control = ubuf;
304 406 msg.msg_controllen = sizeof(ubuf);
305   - ubufs = vq->ubufs;
  407 + ubufs = nvq->ubufs;
306 408 kref_get(&ubufs->kref);
307 409 }
308   - vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV;
  410 + nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
309 411 }
310 412 /* TODO: Check specific error and bomb out unless ENOBUFS? */
311 413 err = sock->ops->sendmsg(NULL, sock, &msg, len);
... ... @@ -313,8 +415,8 @@
313 415 if (zcopy_used) {
314 416 if (ubufs)
315 417 vhost_ubuf_put(ubufs);
316   - vq->upend_idx = ((unsigned)vq->upend_idx - 1) %
317   - UIO_MAXIOV;
  418 + nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
  419 + % UIO_MAXIOV;
318 420 }
319 421 vhost_discard_vq_desc(vq, 1);
320 422 break;
... ... @@ -564,7 +666,7 @@
564 666 struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
565 667 struct vhost_dev *dev;
566 668 struct vhost_virtqueue **vqs;
567   - int r;
  669 + int r, i;
568 670  
569 671 if (!n)
570 672 return -ENOMEM;
... ... @@ -579,6 +681,12 @@
579 681 vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq;
580 682 n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick;
581 683 n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick;
  684 + for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
  685 + n->vqs[i].ubufs = NULL;
  686 + n->vqs[i].ubuf_info = NULL;
  687 + n->vqs[i].upend_idx = 0;
  688 + n->vqs[i].done_idx = 0;
  689 + }
582 690 r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
583 691 if (r < 0) {
584 692 kfree(n);
585 693  
586 694  
... ... @@ -652,15 +760,15 @@
652 760 {
653 761 vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
654 762 vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
655   - if (n->vqs[VHOST_NET_VQ_TX].vq.ubufs) {
  763 + if (n->vqs[VHOST_NET_VQ_TX].ubufs) {
656 764 mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
657 765 n->tx_flush = true;
658 766 mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
659 767 /* Wait for all lower device DMAs done. */
660   - vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].vq.ubufs);
  768 + vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs);
661 769 mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
662 770 n->tx_flush = false;
663   - kref_init(&n->vqs[VHOST_NET_VQ_TX].vq.ubufs->kref);
  771 + kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref);
664 772 mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
665 773 }
666 774 }
... ... @@ -675,6 +783,7 @@
675 783 vhost_net_flush(n);
676 784 vhost_dev_stop(&n->dev);
677 785 vhost_dev_cleanup(&n->dev, false);
  786 + vhost_net_reset_ubuf_info(n);
678 787 if (tx_sock)
679 788 fput(tx_sock->file);
680 789 if (rx_sock)
... ... @@ -756,6 +865,7 @@
756 865 {
757 866 struct socket *sock, *oldsock;
758 867 struct vhost_virtqueue *vq;
  868 + struct vhost_net_virtqueue *nvq;
759 869 struct vhost_ubuf_ref *ubufs, *oldubufs = NULL;
760 870 int r;
761 871  
... ... @@ -769,6 +879,7 @@
769 879 goto err;
770 880 }
771 881 vq = &n->vqs[index].vq;
  882 + nvq = &n->vqs[index];
772 883 mutex_lock(&vq->mutex);
773 884  
774 885 /* Verify that ring has been setup correctly. */
... ... @@ -801,8 +912,8 @@
801 912 if (r)
802 913 goto err_used;
803 914  
804   - oldubufs = vq->ubufs;
805   - vq->ubufs = ubufs;
  915 + oldubufs = nvq->ubufs;
  916 + nvq->ubufs = ubufs;
806 917  
807 918 n->tx_packets = 0;
808 919 n->tx_zcopy_err = 0;
... ... @@ -853,6 +964,7 @@
853 964 vhost_net_stop(n, &tx_sock, &rx_sock);
854 965 vhost_net_flush(n);
855 966 err = vhost_dev_reset_owner(&n->dev);
  967 + vhost_net_reset_ubuf_info(n);
856 968 done:
857 969 mutex_unlock(&n->dev.mutex);
858 970 if (tx_sock)
859 971  
... ... @@ -928,11 +1040,17 @@
928 1040 return vhost_net_reset_owner(n);
929 1041 default:
930 1042 mutex_lock(&n->dev.mutex);
  1043 + if (ioctl == VHOST_SET_OWNER) {
  1044 + r = vhost_net_set_ubuf_info(n);
  1045 + if (r)
  1046 + goto out;
  1047 + }
931 1048 r = vhost_dev_ioctl(&n->dev, ioctl, argp);
932 1049 if (r == -ENOIOCTLCMD)
933 1050 r = vhost_vring_ioctl(&n->dev, ioctl, argp);
934 1051 else
935 1052 vhost_net_flush(n);
  1053 +out:
936 1054 mutex_unlock(&n->dev.mutex);
937 1055 return r;
938 1056 }
drivers/vhost/vhost.c
... ... @@ -33,8 +33,6 @@
33 33 VHOST_MEMORY_F_LOG = 0x1,
34 34 };
35 35  
36   -static unsigned vhost_zcopy_mask __read_mostly;
37   -
38 36 #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
39 37 #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
40 38  
... ... @@ -191,9 +189,6 @@
191 189 vq->call_ctx = NULL;
192 190 vq->call = NULL;
193 191 vq->log_ctx = NULL;
194   - vq->upend_idx = 0;
195   - vq->done_idx = 0;
196   - vq->ubufs = NULL;
197 192 }
198 193  
199 194 static int vhost_worker(void *data)
200 195  
201 196  
... ... @@ -253,20 +248,12 @@
253 248 vq->log = NULL;
254 249 kfree(vq->heads);
255 250 vq->heads = NULL;
256   - kfree(vq->ubuf_info);
257   - vq->ubuf_info = NULL;
258 251 }
259 252  
260   -void vhost_enable_zcopy(int vq)
261   -{
262   - vhost_zcopy_mask |= 0x1 << vq;
263   -}
264   -
265 253 /* Helper to allocate iovec buffers for all vqs. */
266 254 static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
267 255 {
268 256 int i;
269   - bool zcopy;
270 257  
271 258 for (i = 0; i < dev->nvqs; ++i) {
272 259 dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect *
273 260  
... ... @@ -275,14 +262,8 @@
275 262 GFP_KERNEL);
276 263 dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads *
277 264 UIO_MAXIOV, GFP_KERNEL);
278   - zcopy = vhost_zcopy_mask & (0x1 << i);
279   - if (zcopy)
280   - dev->vqs[i]->ubuf_info =
281   - kmalloc(sizeof *dev->vqs[i]->ubuf_info *
282   - UIO_MAXIOV, GFP_KERNEL);
283 265 if (!dev->vqs[i]->indirect || !dev->vqs[i]->log ||
284   - !dev->vqs[i]->heads ||
285   - (zcopy && !dev->vqs[i]->ubuf_info))
  266 + !dev->vqs[i]->heads)
286 267 goto err_nomem;
287 268 }
288 269 return 0;
... ... @@ -321,7 +302,6 @@
321 302 dev->vqs[i]->log = NULL;
322 303 dev->vqs[i]->indirect = NULL;
323 304 dev->vqs[i]->heads = NULL;
324   - dev->vqs[i]->ubuf_info = NULL;
325 305 dev->vqs[i]->dev = dev;
326 306 mutex_init(&dev->vqs[i]->mutex);
327 307 vhost_vq_reset(dev, dev->vqs[i]);
... ... @@ -1550,40 +1530,5 @@
1550 1530 vq_err(vq, "Failed to enable notification at %p: %d\n",
1551 1531 &vq->used->flags, r);
1552 1532 }
1553   -}
1554   -
1555   -static void vhost_zerocopy_done_signal(struct kref *kref)
1556   -{
1557   - struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
1558   - kref);
1559   - wake_up(&ubufs->wait);
1560   -}
1561   -
1562   -struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
1563   - bool zcopy)
1564   -{
1565   - struct vhost_ubuf_ref *ubufs;
1566   - /* No zero copy backend? Nothing to count. */
1567   - if (!zcopy)
1568   - return NULL;
1569   - ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL);
1570   - if (!ubufs)
1571   - return ERR_PTR(-ENOMEM);
1572   - kref_init(&ubufs->kref);
1573   - init_waitqueue_head(&ubufs->wait);
1574   - ubufs->vq = vq;
1575   - return ubufs;
1576   -}
1577   -
1578   -void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
1579   -{
1580   - kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
1581   -}
1582   -
1583   -void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
1584   -{
1585   - kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
1586   - wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
1587   - kfree(ubufs);
1588 1533 }
drivers/vhost/vhost.h
... ... @@ -54,18 +54,6 @@
54 54  
55 55 struct vhost_virtqueue;
56 56  
57   -struct vhost_ubuf_ref {
58   - struct kref kref;
59   - wait_queue_head_t wait;
60   - struct vhost_virtqueue *vq;
61   -};
62   -
63   -struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy);
64   -void vhost_ubuf_put(struct vhost_ubuf_ref *);
65   -void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *);
66   -
67   -struct ubuf_info;
68   -
69 57 /* The virtqueue structure describes a queue attached to a device. */
70 58 struct vhost_virtqueue {
71 59 struct vhost_dev *dev;
... ... @@ -130,16 +118,6 @@
130 118 /* Log write descriptors */
131 119 void __user *log_base;
132 120 struct vhost_log *log;
133   - /* vhost zerocopy support fields below: */
134   - /* last used idx for outstanding DMA zerocopy buffers */
135   - int upend_idx;
136   - /* first used idx for DMA done zerocopy buffers */
137   - int done_idx;
138   - /* an array of userspace buffers info */
139   - struct ubuf_info *ubuf_info;
140   - /* Reference counting for outstanding ubufs.
141   - * Protected by vq mutex. Writers must also take device mutex. */
142   - struct vhost_ubuf_ref *ubufs;
143 121 };
144 122  
145 123 struct vhost_dev {