Blame view
drivers/vhost/net.c
28.1 KB
3a4d5c94e
|
1 2 3 4 5 6 7 8 9 10 11 12 |
/* Copyright (C) 2009 Red Hat, Inc. * Author: Michael S. Tsirkin <mst@redhat.com> * * This work is licensed under the terms of the GNU GPL, version 2. * * virtio-net server in host kernel. */ #include <linux/compat.h> #include <linux/eventfd.h> #include <linux/vhost.h> #include <linux/virtio_net.h> |
3a4d5c94e
|
13 14 |
#include <linux/miscdevice.h> #include <linux/module.h> |
bab632d69
|
15 |
#include <linux/moduleparam.h> |
3a4d5c94e
|
16 17 |
#include <linux/mutex.h> #include <linux/workqueue.h> |
3a4d5c94e
|
18 |
#include <linux/file.h> |
5a0e3ad6a
|
19 |
#include <linux/slab.h> |
23cc5a991
|
20 |
#include <linux/vmalloc.h> |
3a4d5c94e
|
21 22 23 24 25 |
#include <linux/net.h> #include <linux/if_packet.h> #include <linux/if_arp.h> #include <linux/if_tun.h> |
501c774cb
|
26 |
#include <linux/if_macvlan.h> |
c53cff5e4
|
27 |
#include <linux/if_vlan.h> |
3a4d5c94e
|
28 29 30 31 |
#include <net/sock.h> #include "vhost.h" |
f9611c43a
|
32 |
static int experimental_zcopytx = 1; |
bab632d69
|
33 |
module_param(experimental_zcopytx, int, 0444); |
f9611c43a
|
34 35 |
MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" " 1 -Enable; 0 - Disable"); |
bab632d69
|
36 |
|
3a4d5c94e
|
37 38 39 |
/* Max number of bytes transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others. */ #define VHOST_NET_WEIGHT 0x80000 |
bab632d69
|
40 41 42 |
/* MAX number of TX used buffers for outstanding zerocopy */ #define VHOST_MAX_PEND 128 #define VHOST_GOODCOPY_LEN 256 |
eaae8132e
|
43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
/* * For transmit, used buffer len is unused; we override it to track buffer * status internally; used for zerocopy tx only. */ /* Lower device DMA failed */ #define VHOST_DMA_FAILED_LEN 3 /* Lower device DMA done */ #define VHOST_DMA_DONE_LEN 2 /* Lower device DMA in progress */ #define VHOST_DMA_IN_PROGRESS 1 /* Buffer unused */ #define VHOST_DMA_CLEAR_LEN 0 #define VHOST_DMA_IS_DONE(len) ((len) >= VHOST_DMA_DONE_LEN) |
3a4d5c94e
|
57 |
enum { |
8570a6e72
|
58 59 60 61 62 63 |
VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | (1ULL << VIRTIO_NET_F_MRG_RXBUF), }; enum { |
3a4d5c94e
|
64 65 66 67 |
VHOST_NET_VQ_RX = 0, VHOST_NET_VQ_TX = 1, VHOST_NET_VQ_MAX = 2, }; |
fe729a57c
|
68 |
struct vhost_net_ubuf_ref { |
0ad8b480d
|
69 70 71 72 73 74 |
/* refcount follows semantics similar to kref: * 0: object is released * 1: no outstanding ubufs * >1: outstanding ubufs */ atomic_t refcount; |
2839400f8
|
75 76 77 |
wait_queue_head_t wait; struct vhost_virtqueue *vq; }; |
3ab2e420e
|
78 79 |
struct vhost_net_virtqueue { struct vhost_virtqueue vq; |
81f95a558
|
80 81 82 83 84 85 |
/* hdr is used to store the virtio header. * Since each iovec has >= 1 byte length, we never need more than * header length entries to store the header. */ struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)]; size_t vhost_hlen; size_t sock_hlen; |
2839400f8
|
86 87 88 89 90 91 92 93 94 |
/* vhost zerocopy support fields below: */ /* last used idx for outstanding DMA zerocopy buffers */ int upend_idx; /* first used idx for DMA done zerocopy buffers */ int done_idx; /* an array of userspace buffers info */ struct ubuf_info *ubuf_info; /* Reference counting for outstanding ubufs. * Protected by vq mutex. Writers must also take device mutex. */ |
fe729a57c
|
95 |
struct vhost_net_ubuf_ref *ubufs; |
3ab2e420e
|
96 |
}; |
3a4d5c94e
|
97 98 |
struct vhost_net { struct vhost_dev dev; |
3ab2e420e
|
99 |
struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX]; |
3a4d5c94e
|
100 |
struct vhost_poll poll[VHOST_NET_VQ_MAX]; |
eaae8132e
|
101 102 103 104 105 106 |
/* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; /* Number of times zerocopy TX recently failed. * Protected by tx vq lock. */ unsigned tx_zcopy_err; |
1280c27f8
|
107 108 |
/* Flush in progress. Protected by tx vq lock. */ bool tx_flush; |
3a4d5c94e
|
109 |
}; |
fe729a57c
|
110 |
static unsigned vhost_net_zcopy_mask __read_mostly; |
2839400f8
|
111 |
|
fe729a57c
|
112 |
static void vhost_net_enable_zcopy(int vq) |
2839400f8
|
113 |
{ |
fe729a57c
|
114 |
vhost_net_zcopy_mask |= 0x1 << vq; |
2839400f8
|
115 |
} |
fe729a57c
|
116 117 |
static struct vhost_net_ubuf_ref * vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) |
2839400f8
|
118 |
{ |
fe729a57c
|
119 |
struct vhost_net_ubuf_ref *ubufs; |
2839400f8
|
120 121 122 123 124 125 |
/* No zero copy backend? Nothing to count. */ if (!zcopy) return NULL; ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); if (!ubufs) return ERR_PTR(-ENOMEM); |
0ad8b480d
|
126 |
atomic_set(&ubufs->refcount, 1); |
2839400f8
|
127 128 129 130 |
init_waitqueue_head(&ubufs->wait); ubufs->vq = vq; return ubufs; } |
0ad8b480d
|
131 |
static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) |
2839400f8
|
132 |
{ |
0ad8b480d
|
133 134 135 136 |
int r = atomic_sub_return(1, &ubufs->refcount); if (unlikely(!r)) wake_up(&ubufs->wait); return r; |
2839400f8
|
137 |
} |
fe729a57c
|
138 |
static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) |
2839400f8
|
139 |
{ |
0ad8b480d
|
140 141 |
vhost_net_ubuf_put(ubufs); wait_event(ubufs->wait, !atomic_read(&ubufs->refcount)); |
c38e39c37
|
142 143 144 145 146 |
} static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) { vhost_net_ubuf_put_and_wait(ubufs); |
2839400f8
|
147 148 |
kfree(ubufs); } |
b1ad8496c
|
149 150 |
static void vhost_net_clear_ubuf_info(struct vhost_net *n) { |
b1ad8496c
|
151 |
int i; |
288cfe78c
|
152 153 154 |
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { kfree(n->vqs[i].ubuf_info); n->vqs[i].ubuf_info = NULL; |
b1ad8496c
|
155 156 |
} } |
0a1febf7b
|
157 |
static int vhost_net_set_ubuf_info(struct vhost_net *n) |
2839400f8
|
158 159 160 |
{ bool zcopy; int i; |
288cfe78c
|
161 |
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { |
fe729a57c
|
162 |
zcopy = vhost_net_zcopy_mask & (0x1 << i); |
2839400f8
|
163 164 165 166 167 168 169 170 171 172 |
if (!zcopy) continue; n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) * UIO_MAXIOV, GFP_KERNEL); if (!n->vqs[i].ubuf_info) goto err; } return 0; err: |
288cfe78c
|
173 |
vhost_net_clear_ubuf_info(n); |
2839400f8
|
174 175 |
return -ENOMEM; } |
0a1febf7b
|
176 |
static void vhost_net_vq_reset(struct vhost_net *n) |
2839400f8
|
177 178 |
{ int i; |
288cfe78c
|
179 |
vhost_net_clear_ubuf_info(n); |
2839400f8
|
180 181 182 183 |
for (i = 0; i < VHOST_NET_VQ_MAX; i++) { n->vqs[i].done_idx = 0; n->vqs[i].upend_idx = 0; n->vqs[i].ubufs = NULL; |
81f95a558
|
184 185 |
n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; |
2839400f8
|
186 187 188 |
} } |
eaae8132e
|
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
static void vhost_net_tx_packet(struct vhost_net *net) { ++net->tx_packets; if (net->tx_packets < 1024) return; net->tx_packets = 0; net->tx_zcopy_err = 0; } static void vhost_net_tx_err(struct vhost_net *net) { ++net->tx_zcopy_err; } static bool vhost_net_tx_select_zcopy(struct vhost_net *net) { |
1280c27f8
|
205 206 207 208 209 |
/* TX flush waits for outstanding DMAs to be done. * Don't start new DMAs. */ return !net->tx_flush && net->tx_packets / 64 >= net->tx_zcopy_err; |
eaae8132e
|
210 |
} |
bab632d69
|
211 212 213 214 215 |
static bool vhost_sock_zcopy(struct socket *sock) { return unlikely(experimental_zcopytx) && sock_flag(sock->sk, SOCK_ZEROCOPY); } |
3a4d5c94e
|
216 217 218 219 220 221 |
/* Pop first len bytes from iovec. Return number of segments used. */ static int move_iovec_hdr(struct iovec *from, struct iovec *to, size_t len, int iov_count) { int seg = 0; size_t size; |
d47effe1b
|
222 |
|
3a4d5c94e
|
223 224 225 226 227 228 229 230 231 232 233 234 235 |
while (len && seg < iov_count) { size = min(from->iov_len, len); to->iov_base = from->iov_base; to->iov_len = size; from->iov_len -= size; from->iov_base += size; len -= size; ++from; ++to; ++seg; } return seg; } |
8dd014adf
|
236 237 238 239 240 241 |
/* Copy iovec entries for len bytes from iovec. */ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, size_t len, int iovcount) { int seg = 0; size_t size; |
d47effe1b
|
242 |
|
8dd014adf
|
243 244 245 246 247 248 249 250 251 252 |
while (len && seg < iovcount) { size = min(from->iov_len, len); to->iov_base = from->iov_base; to->iov_len = size; len -= size; ++from; ++to; ++seg; } } |
3a4d5c94e
|
253 |
|
b211616d7
|
254 255 256 257 258 |
/* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM * guest used idx. */ |
094afe7d5
|
259 260 |
static void vhost_zerocopy_signal_used(struct vhost_net *net, struct vhost_virtqueue *vq) |
b211616d7
|
261 |
{ |
2839400f8
|
262 263 |
struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); |
c92112aed
|
264 |
int i, add; |
b211616d7
|
265 |
int j = 0; |
2839400f8
|
266 |
for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { |
eaae8132e
|
267 268 |
if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) vhost_net_tx_err(net); |
b211616d7
|
269 270 |
if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { vq->heads[i].len = VHOST_DMA_CLEAR_LEN; |
b211616d7
|
271 272 273 274 |
++j; } else break; } |
c92112aed
|
275 276 277 278 279 280 281 |
while (j) { add = min(UIO_MAXIOV - nvq->done_idx, j); vhost_add_used_and_signal_n(vq->dev, vq, &vq->heads[nvq->done_idx], add); nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV; j -= add; } |
b211616d7
|
282 |
} |
eaae8132e
|
283 |
static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) |
b211616d7
|
284 |
{ |
fe729a57c
|
285 |
struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; |
b211616d7
|
286 |
struct vhost_virtqueue *vq = ubufs->vq; |
0ad8b480d
|
287 |
int cnt; |
24eb21a14
|
288 |
|
b0c057ca7
|
289 |
rcu_read_lock_bh(); |
19c73b3e0
|
290 291 292 |
/* set len to mark this desc buffers done DMA */ vq->heads[ubuf->desc].len = success ? VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; |
0ad8b480d
|
293 |
cnt = vhost_net_ubuf_put(ubufs); |
19c73b3e0
|
294 |
|
24eb21a14
|
295 296 |
/* * Trigger polling thread if guest stopped submitting new buffers: |
0ad8b480d
|
297 |
* in this case, the refcount after decrement will eventually reach 1. |
24eb21a14
|
298 299 300 301 |
* We also trigger polling periodically after each 16 packets * (the value 16 here is more or less arbitrary, it's tuned to trigger * less than 10% of times). */ |
0ad8b480d
|
302 |
if (cnt <= 1 || !(cnt % 16)) |
24eb21a14
|
303 |
vhost_poll_queue(&vq->poll); |
b0c057ca7
|
304 305 |
rcu_read_unlock_bh(); |
b211616d7
|
306 |
} |
3a4d5c94e
|
307 308 309 310 |
/* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { |
2839400f8
|
311 |
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; |
81f95a558
|
312 |
struct vhost_virtqueue *vq = &nvq->vq; |
d5675bd20
|
313 314 |
unsigned out, in, s; int head; |
3a4d5c94e
|
315 316 317 318 319 320 321 322 323 |
struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, .msg_controllen = 0, .msg_iov = vq->iov, .msg_flags = MSG_DONTWAIT, }; size_t len, total_len = 0; |
70181d512
|
324 |
int err; |
3a4d5c94e
|
325 |
size_t hdr_size; |
28457ee69
|
326 |
struct socket *sock; |
fe729a57c
|
327 |
struct vhost_net_ubuf_ref *uninitialized_var(ubufs); |
cedb9bdce
|
328 |
bool zcopy, zcopy_used; |
28457ee69
|
329 |
|
2e26af79b
|
330 331 |
mutex_lock(&vq->mutex); sock = vq->private_data; |
3a4d5c94e
|
332 |
if (!sock) |
2e26af79b
|
333 |
goto out; |
3a4d5c94e
|
334 |
|
8ea8cf89e
|
335 |
vhost_disable_notify(&net->dev, vq); |
3a4d5c94e
|
336 |
|
81f95a558
|
337 |
hdr_size = nvq->vhost_hlen; |
2839400f8
|
338 |
zcopy = nvq->ubufs; |
3a4d5c94e
|
339 340 |
for (;;) { |
bab632d69
|
341 342 |
/* Release DMAs done buffers first */ if (zcopy) |
eaae8132e
|
343 |
vhost_zerocopy_signal_used(net, vq); |
bab632d69
|
344 |
|
f7c6be404
|
345 346 347 348 349 350 |
/* If more outstanding DMAs, queue the work. * Handle upend_idx wrap around */ if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND) % UIO_MAXIOV == nvq->done_idx)) break; |
47283bef7
|
351 |
head = vhost_get_vq_desc(vq, vq->iov, |
3a4d5c94e
|
352 353 354 |
ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); |
d5675bd20
|
355 |
/* On error, stop handling until the next kick. */ |
7b3384fc3
|
356 |
if (unlikely(head < 0)) |
d5675bd20
|
357 |
break; |
3a4d5c94e
|
358 359 |
/* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { |
8ea8cf89e
|
360 361 |
if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); |
3a4d5c94e
|
362 363 364 365 366 367 368 369 370 371 372 |
continue; } break; } if (in) { vq_err(vq, "Unexpected descriptor format for TX: " "out %d, int %d ", out, in); break; } /* Skip header. TODO: support TSO. */ |
81f95a558
|
373 |
s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out); |
3a4d5c94e
|
374 375 376 377 378 379 380 |
msg.msg_iovlen = out; len = iov_length(vq->iov, out); /* Sanity check */ if (!len) { vq_err(vq, "Unexpected header len for TX: " "%zd expected %zd ", |
81f95a558
|
381 |
iov_length(nvq->hdr, s), hdr_size); |
3a4d5c94e
|
382 383 |
break; } |
ce21a0291
|
384 385 386 387 388 |
zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN && (nvq->upend_idx + 1) % UIO_MAXIOV != nvq->done_idx && vhost_net_tx_select_zcopy(net); |
cedb9bdce
|
389 |
|
bab632d69
|
390 |
/* use msg_control to pass vhost zerocopy ubuf info to skb */ |
cedb9bdce
|
391 |
if (zcopy_used) { |
ce21a0291
|
392 393 |
struct ubuf_info *ubuf; ubuf = nvq->ubuf_info + nvq->upend_idx; |
2839400f8
|
394 |
vq->heads[nvq->upend_idx].id = head; |
ce21a0291
|
395 396 397 398 399 400 401 |
vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; ubuf->ctx = nvq->ubufs; ubuf->desc = nvq->upend_idx; msg.msg_control = ubuf; msg.msg_controllen = sizeof(ubuf); ubufs = nvq->ubufs; |
0ad8b480d
|
402 |
atomic_inc(&ubufs->refcount); |
2839400f8
|
403 |
nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; |
ce21a0291
|
404 |
} else { |
4364d5f96
|
405 |
msg.msg_control = NULL; |
ce21a0291
|
406 407 |
ubufs = NULL; } |
3a4d5c94e
|
408 409 410 |
/* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(NULL, sock, &msg, len); if (unlikely(err < 0)) { |
cedb9bdce
|
411 |
if (zcopy_used) { |
ce21a0291
|
412 |
vhost_net_ubuf_put(ubufs); |
2839400f8
|
413 414 |
nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; |
bab632d69
|
415 |
} |
8dd014adf
|
416 |
vhost_discard_vq_desc(vq, 1); |
3a4d5c94e
|
417 418 419 |
break; } if (err != len) |
95c0ec6a9
|
420 421 422 |
pr_debug("Truncated TX packet: " " len %d != %zd ", err, len); |
cedb9bdce
|
423 |
if (!zcopy_used) |
bab632d69
|
424 |
vhost_add_used_and_signal(&net->dev, vq, head, 0); |
c8fb217af
|
425 |
else |
eaae8132e
|
426 |
vhost_zerocopy_signal_used(net, vq); |
3a4d5c94e
|
427 |
total_len += len; |
eaae8132e
|
428 |
vhost_net_tx_packet(net); |
3a4d5c94e
|
429 430 431 432 433 |
if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); break; } } |
2e26af79b
|
434 |
out: |
3a4d5c94e
|
435 |
mutex_unlock(&vq->mutex); |
3a4d5c94e
|
436 |
} |
8dd014adf
|
437 438 439 440 |
static int peek_head_len(struct sock *sk) { struct sk_buff *head; int len = 0; |
783e39885
|
441 |
unsigned long flags; |
8dd014adf
|
442 |
|
783e39885
|
443 |
spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); |
8dd014adf
|
444 |
head = skb_peek(&sk->sk_receive_queue); |
c53cff5e4
|
445 |
if (likely(head)) { |
8dd014adf
|
446 |
len = head->len; |
c53cff5e4
|
447 448 449 |
if (vlan_tx_tag_present(head)) len += VLAN_HLEN; } |
783e39885
|
450 |
spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); |
8dd014adf
|
451 452 453 454 455 456 457 458 459 460 |
return len; } /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue * @datalen - data length we'll be reading * @iovcount - returned count of io vectors we fill * @log - vhost log * @log_num - log offset |
94249369e
|
461 |
* @quota - headcount quota, 1 for big buffer |
8dd014adf
|
462 463 464 465 466 467 468 |
* returns number of buffer heads allocated, negative on error */ static int get_rx_bufs(struct vhost_virtqueue *vq, struct vring_used_elem *heads, int datalen, unsigned *iovcount, struct vhost_log *log, |
94249369e
|
469 470 |
unsigned *log_num, unsigned int quota) |
8dd014adf
|
471 472 473 474 475 476 |
{ unsigned int out, in; int seg = 0; int headcount = 0; unsigned d; int r, nlogs = 0; |
94249369e
|
477 |
while (datalen > 0 && headcount < quota) { |
e0e9b4064
|
478 |
if (unlikely(seg >= UIO_MAXIOV)) { |
8dd014adf
|
479 480 481 |
r = -ENOBUFS; goto err; } |
47283bef7
|
482 |
r = vhost_get_vq_desc(vq, vq->iov + seg, |
8dd014adf
|
483 484 |
ARRAY_SIZE(vq->iov) - seg, &out, &in, log, log_num); |
a39ee449f
|
485 486 487 488 |
if (unlikely(r < 0)) goto err; d = r; |
8dd014adf
|
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 |
if (d == vq->num) { r = 0; goto err; } if (unlikely(out || in <= 0)) { vq_err(vq, "unexpected descriptor format for RX: " "out %d, in %d ", out, in); r = -EINVAL; goto err; } if (unlikely(log)) { nlogs += *log_num; log += *log_num; } heads[headcount].id = d; heads[headcount].len = iov_length(vq->iov + seg, in); datalen -= heads[headcount].len; ++headcount; seg += in; } heads[headcount - 1].len += datalen; *iovcount = seg; if (unlikely(log)) *log_num = nlogs; |
d8316f399
|
514 515 516 517 518 519 |
/* Detect overrun */ if (unlikely(datalen > 0)) { r = UIO_MAXIOV + 1; goto err; } |
8dd014adf
|
520 521 522 523 524 |
return headcount; err: vhost_discard_vq_desc(vq, headcount); return r; } |
3a4d5c94e
|
525 526 |
/* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ |
94249369e
|
527 |
static void handle_rx(struct vhost_net *net) |
3a4d5c94e
|
528 |
{ |
81f95a558
|
529 530 |
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; struct vhost_virtqueue *vq = &nvq->vq; |
8dd014adf
|
531 532 533 534 535 536 537 538 539 540 |
unsigned uninitialized_var(in), log; struct vhost_log *vq_log; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, /* FIXME: get and handle RX aux data. */ .msg_controllen = 0, .msg_iov = vq->iov, .msg_flags = MSG_DONTWAIT, }; |
8dd014adf
|
541 542 543 544 |
struct virtio_net_hdr_mrg_rxbuf hdr = { .hdr.flags = 0, .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE }; |
8dd014adf
|
545 |
size_t total_len = 0; |
910a578f7
|
546 547 |
int err, mergeable; s16 headcount; |
8dd014adf
|
548 549 |
size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; |
2e26af79b
|
550 |
struct socket *sock; |
8dd014adf
|
551 |
|
8dd014adf
|
552 |
mutex_lock(&vq->mutex); |
2e26af79b
|
553 554 555 |
sock = vq->private_data; if (!sock) goto out; |
8ea8cf89e
|
556 |
vhost_disable_notify(&net->dev, vq); |
2e26af79b
|
557 |
|
81f95a558
|
558 559 |
vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; |
8dd014adf
|
560 |
|
ea16c5143
|
561 |
vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ? |
8dd014adf
|
562 |
vq->log : NULL; |
ea16c5143
|
563 |
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); |
8dd014adf
|
564 565 566 567 568 |
while ((sock_len = peek_head_len(sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, |
94249369e
|
569 570 |
&in, vq_log, &log, likely(mergeable) ? UIO_MAXIOV : 1); |
8dd014adf
|
571 572 573 |
/* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) break; |
d8316f399
|
574 575 576 577 578 579 580 581 582 |
/* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { msg.msg_iovlen = 1; err = sock->ops->recvmsg(NULL, sock, &msg, 1, MSG_DONTWAIT | MSG_TRUNC); pr_debug("Discarded rx packet: len %zd ", sock_len); continue; } |
8dd014adf
|
583 584 |
/* OK, now we need to know about added descriptors. */ if (!headcount) { |
8ea8cf89e
|
585 |
if (unlikely(vhost_enable_notify(&net->dev, vq))) { |
8dd014adf
|
586 587 |
/* They have slipped one in as we were * doing that: check again. */ |
8ea8cf89e
|
588 |
vhost_disable_notify(&net->dev, vq); |
8dd014adf
|
589 590 591 592 593 594 595 596 597 |
continue; } /* Nothing new? Wait for eventfd to tell us * they refilled. */ break; } /* We don't need to be notified again. */ if (unlikely((vhost_hlen))) /* Skip header. TODO: support TSO. */ |
81f95a558
|
598 |
move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in); |
8dd014adf
|
599 600 |
else /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: |
a290aec88
|
601 |
* needed because recvmsg can modify msg_iov. */ |
81f95a558
|
602 |
copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in); |
8dd014adf
|
603 604 605 606 607 608 609 610 611 612 613 614 615 616 |
msg.msg_iovlen = in; err = sock->ops->recvmsg(NULL, sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); /* Userspace might have consumed the packet meanwhile: * it's not supposed to do this usually, but might be hard * to prevent. Discard data we got (if any) and keep going. */ if (unlikely(err != sock_len)) { pr_debug("Discarded rx packet: " " len %d, expected %zd ", err, sock_len); vhost_discard_vq_desc(vq, headcount); continue; } if (unlikely(vhost_hlen) && |
81f95a558
|
617 |
memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0, |
8dd014adf
|
618 619 620 621 622 623 624 |
vhost_hlen)) { vq_err(vq, "Unable to write vnet_hdr at addr %p ", vq->iov->iov_base); break; } /* TODO: Should check and handle checksum. */ |
cfbdab951
|
625 |
if (likely(mergeable) && |
81f95a558
|
626 |
memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount, |
8dd014adf
|
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 |
offsetof(typeof(hdr), num_buffers), sizeof hdr.num_buffers)) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); break; } vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, headcount); if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len); total_len += vhost_len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); break; } } |
2e26af79b
|
643 |
out: |
8dd014adf
|
644 |
mutex_unlock(&vq->mutex); |
8dd014adf
|
645 |
} |
c23f3445e
|
646 |
static void handle_tx_kick(struct vhost_work *work) |
3a4d5c94e
|
647 |
{ |
c23f3445e
|
648 649 650 |
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); |
3a4d5c94e
|
651 652 |
handle_tx(net); } |
c23f3445e
|
653 |
static void handle_rx_kick(struct vhost_work *work) |
3a4d5c94e
|
654 |
{ |
c23f3445e
|
655 656 657 |
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, poll.work); struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); |
3a4d5c94e
|
658 659 |
handle_rx(net); } |
c23f3445e
|
660 |
static void handle_tx_net(struct vhost_work *work) |
3a4d5c94e
|
661 |
{ |
c23f3445e
|
662 663 |
struct vhost_net *net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work); |
3a4d5c94e
|
664 665 |
handle_tx(net); } |
c23f3445e
|
666 |
static void handle_rx_net(struct vhost_work *work) |
3a4d5c94e
|
667 |
{ |
c23f3445e
|
668 669 |
struct vhost_net *net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work); |
3a4d5c94e
|
670 671 672 673 674 |
handle_rx(net); } static int vhost_net_open(struct inode *inode, struct file *f) { |
23cc5a991
|
675 |
struct vhost_net *n; |
c23f3445e
|
676 |
struct vhost_dev *dev; |
3ab2e420e
|
677 |
struct vhost_virtqueue **vqs; |
59566b6e8
|
678 |
int i; |
c23f3445e
|
679 |
|
23cc5a991
|
680 681 682 683 684 685 |
n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); if (!n) { n = vmalloc(sizeof *n); if (!n) return -ENOMEM; } |
3ab2e420e
|
686 687 |
vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); if (!vqs) { |
d04257b07
|
688 |
kvfree(n); |
3ab2e420e
|
689 690 |
return -ENOMEM; } |
c23f3445e
|
691 692 |
dev = &n->dev; |
3ab2e420e
|
693 694 695 696 |
vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq; vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; |
2839400f8
|
697 698 699 700 701 |
for (i = 0; i < VHOST_NET_VQ_MAX; i++) { n->vqs[i].ubufs = NULL; n->vqs[i].ubuf_info = NULL; n->vqs[i].upend_idx = 0; n->vqs[i].done_idx = 0; |
81f95a558
|
702 703 |
n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; |
2839400f8
|
704 |
} |
59566b6e8
|
705 |
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); |
3a4d5c94e
|
706 |
|
c23f3445e
|
707 708 |
vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); |
3a4d5c94e
|
709 710 711 712 713 714 715 716 717 |
f->private_data = n; return 0; } static void vhost_net_disable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { |
3ab2e420e
|
718 719 720 |
struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); |
3a4d5c94e
|
721 722 |
if (!vq->private_data) return; |
70181d512
|
723 |
vhost_poll_stop(poll); |
3a4d5c94e
|
724 |
} |
2b8b328b6
|
725 |
static int vhost_net_enable_vq(struct vhost_net *n, |
3a4d5c94e
|
726 727 |
struct vhost_virtqueue *vq) { |
3ab2e420e
|
728 729 730 |
struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); struct vhost_poll *poll = n->poll + (nvq - n->vqs); |
28457ee69
|
731 |
struct socket *sock; |
22fa90c7f
|
732 |
sock = vq->private_data; |
3a4d5c94e
|
733 |
if (!sock) |
2b8b328b6
|
734 |
return 0; |
2b8b328b6
|
735 |
|
70181d512
|
736 |
return vhost_poll_start(poll, sock->file); |
3a4d5c94e
|
737 738 739 740 741 742 743 744 |
} static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { struct socket *sock; mutex_lock(&vq->mutex); |
22fa90c7f
|
745 |
sock = vq->private_data; |
3a4d5c94e
|
746 |
vhost_net_disable_vq(n, vq); |
22fa90c7f
|
747 |
vq->private_data = NULL; |
3a4d5c94e
|
748 749 750 751 752 753 754 |
mutex_unlock(&vq->mutex); return sock; } static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, struct socket **rx_sock) { |
3ab2e420e
|
755 756 |
*tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq); *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); |
3a4d5c94e
|
757 758 759 760 761 |
} static void vhost_net_flush_vq(struct vhost_net *n, int index) { vhost_poll_flush(n->poll + index); |
3ab2e420e
|
762 |
vhost_poll_flush(&n->vqs[index].vq.poll); |
3a4d5c94e
|
763 764 765 766 767 768 |
} static void vhost_net_flush(struct vhost_net *n) { vhost_net_flush_vq(n, VHOST_NET_VQ_TX); vhost_net_flush_vq(n, VHOST_NET_VQ_RX); |
2839400f8
|
769 |
if (n->vqs[VHOST_NET_VQ_TX].ubufs) { |
3ab2e420e
|
770 |
mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); |
1280c27f8
|
771 |
n->tx_flush = true; |
3ab2e420e
|
772 |
mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); |
1280c27f8
|
773 |
/* Wait for all lower device DMAs done. */ |
fe729a57c
|
774 |
vhost_net_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); |
3ab2e420e
|
775 |
mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); |
1280c27f8
|
776 |
n->tx_flush = false; |
0ad8b480d
|
777 |
atomic_set(&n->vqs[VHOST_NET_VQ_TX].ubufs->refcount, 1); |
3ab2e420e
|
778 |
mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); |
1280c27f8
|
779 |
} |
3a4d5c94e
|
780 781 782 783 784 785 786 787 788 789 |
} static int vhost_net_release(struct inode *inode, struct file *f) { struct vhost_net *n = f->private_data; struct socket *tx_sock; struct socket *rx_sock; vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); |
b211616d7
|
790 |
vhost_dev_stop(&n->dev); |
ea5d40465
|
791 |
vhost_dev_cleanup(&n->dev, false); |
81f95a558
|
792 |
vhost_net_vq_reset(n); |
3a4d5c94e
|
793 |
if (tx_sock) |
09aaacf02
|
794 |
sockfd_put(tx_sock); |
3a4d5c94e
|
795 |
if (rx_sock) |
09aaacf02
|
796 |
sockfd_put(rx_sock); |
b0c057ca7
|
797 798 |
/* Make sure no callbacks are outstanding */ synchronize_rcu_bh(); |
3a4d5c94e
|
799 800 801 |
/* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); |
3ab2e420e
|
802 |
kfree(n->dev.vqs); |
d04257b07
|
803 |
kvfree(n); |
3a4d5c94e
|
804 805 806 807 808 809 810 811 812 813 814 |
return 0; } static struct socket *get_raw_socket(int fd) { struct { struct sockaddr_ll sa; char buf[MAX_ADDR_LEN]; } uaddr; int uaddr_len = sizeof uaddr, r; struct socket *sock = sockfd_lookup(fd, &r); |
d47effe1b
|
815 |
|
3a4d5c94e
|
816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 |
if (!sock) return ERR_PTR(-ENOTSOCK); /* Parameter checking */ if (sock->sk->sk_type != SOCK_RAW) { r = -ESOCKTNOSUPPORT; goto err; } r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, &uaddr_len, 0); if (r) goto err; if (uaddr.sa.sll_family != AF_PACKET) { r = -EPFNOSUPPORT; goto err; } return sock; err: |
09aaacf02
|
836 |
sockfd_put(sock); |
3a4d5c94e
|
837 838 |
return ERR_PTR(r); } |
501c774cb
|
839 |
static struct socket *get_tap_socket(int fd) |
3a4d5c94e
|
840 841 842 |
{ struct file *file = fget(fd); struct socket *sock; |
d47effe1b
|
843 |
|
3a4d5c94e
|
844 845 846 |
if (!file) return ERR_PTR(-EBADF); sock = tun_get_socket(file); |
501c774cb
|
847 848 849 |
if (!IS_ERR(sock)) return sock; sock = macvtap_get_socket(file); |
3a4d5c94e
|
850 851 852 853 854 855 856 857 |
if (IS_ERR(sock)) fput(file); return sock; } static struct socket *get_socket(int fd) { struct socket *sock; |
d47effe1b
|
858 |
|
3a4d5c94e
|
859 860 861 862 863 864 |
/* special case to disable backend */ if (fd == -1) return NULL; sock = get_raw_socket(fd); if (!IS_ERR(sock)) return sock; |
501c774cb
|
865 |
sock = get_tap_socket(fd); |
3a4d5c94e
|
866 867 868 869 870 871 872 873 874 |
if (!IS_ERR(sock)) return sock; return ERR_PTR(-ENOTSOCK); } static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) { struct socket *sock, *oldsock; struct vhost_virtqueue *vq; |
2839400f8
|
875 |
struct vhost_net_virtqueue *nvq; |
fe729a57c
|
876 |
struct vhost_net_ubuf_ref *ubufs, *oldubufs = NULL; |
3a4d5c94e
|
877 878 879 880 881 882 883 884 885 886 887 |
int r; mutex_lock(&n->dev.mutex); r = vhost_dev_check_owner(&n->dev); if (r) goto err; if (index >= VHOST_NET_VQ_MAX) { r = -ENOBUFS; goto err; } |
3ab2e420e
|
888 |
vq = &n->vqs[index].vq; |
2839400f8
|
889 |
nvq = &n->vqs[index]; |
3a4d5c94e
|
890 891 892 893 894 |
mutex_lock(&vq->mutex); /* Verify that ring has been setup correctly. */ if (!vhost_vq_access_ok(vq)) { r = -EFAULT; |
1dace8c80
|
895 |
goto err_vq; |
3a4d5c94e
|
896 897 898 899 |
} sock = get_socket(fd); if (IS_ERR(sock)) { r = PTR_ERR(sock); |
1dace8c80
|
900 |
goto err_vq; |
3a4d5c94e
|
901 902 903 |
} /* start polling new socket */ |
22fa90c7f
|
904 |
oldsock = vq->private_data; |
11fe88393
|
905 |
if (sock != oldsock) { |
fe729a57c
|
906 907 |
ubufs = vhost_net_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock)); |
bab632d69
|
908 909 910 911 |
if (IS_ERR(ubufs)) { r = PTR_ERR(ubufs); goto err_ubufs; } |
692a998b9
|
912 |
|
d47effe1b
|
913 |
vhost_net_disable_vq(n, vq); |
22fa90c7f
|
914 |
vq->private_data = sock; |
f59281daf
|
915 916 |
r = vhost_init_used(vq); if (r) |
692a998b9
|
917 |
goto err_used; |
2b8b328b6
|
918 919 920 |
r = vhost_net_enable_vq(n, vq); if (r) goto err_used; |
692a998b9
|
921 |
|
2839400f8
|
922 923 |
oldubufs = nvq->ubufs; nvq->ubufs = ubufs; |
64e9a9b8a
|
924 925 926 |
n->tx_packets = 0; n->tx_zcopy_err = 0; |
1280c27f8
|
927 |
n->tx_flush = false; |
dd1f4078f
|
928 |
} |
3a4d5c94e
|
929 |
|
1680e9063
|
930 |
mutex_unlock(&vq->mutex); |
c047e5f31
|
931 |
if (oldubufs) { |
c38e39c37
|
932 |
vhost_net_ubuf_put_wait_and_free(oldubufs); |
c047e5f31
|
933 |
mutex_lock(&vq->mutex); |
eaae8132e
|
934 |
vhost_zerocopy_signal_used(n, vq); |
c047e5f31
|
935 936 |
mutex_unlock(&vq->mutex); } |
bab632d69
|
937 |
|
3a4d5c94e
|
938 939 |
if (oldsock) { vhost_net_flush_vq(n, index); |
09aaacf02
|
940 |
sockfd_put(oldsock); |
3a4d5c94e
|
941 |
} |
1dace8c80
|
942 |
|
1680e9063
|
943 944 |
mutex_unlock(&n->dev.mutex); return 0; |
692a998b9
|
945 |
err_used: |
22fa90c7f
|
946 |
vq->private_data = oldsock; |
692a998b9
|
947 948 |
vhost_net_enable_vq(n, vq); if (ubufs) |
c38e39c37
|
949 |
vhost_net_ubuf_put_wait_and_free(ubufs); |
bab632d69
|
950 |
err_ubufs: |
09aaacf02
|
951 |
sockfd_put(sock); |
1dace8c80
|
952 953 |
err_vq: mutex_unlock(&vq->mutex); |
3a4d5c94e
|
954 955 956 957 958 959 960 961 962 963 |
err: mutex_unlock(&n->dev.mutex); return r; } static long vhost_net_reset_owner(struct vhost_net *n) { struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; |
150b9e51a
|
964 |
struct vhost_memory *memory; |
d47effe1b
|
965 |
|
3a4d5c94e
|
966 967 968 969 |
mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; |
150b9e51a
|
970 971 972 973 974 |
memory = vhost_dev_reset_owner_prepare(); if (!memory) { err = -ENOMEM; goto done; } |
3a4d5c94e
|
975 976 |
vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); |
150b9e51a
|
977 |
vhost_dev_reset_owner(&n->dev, memory); |
81f95a558
|
978 |
vhost_net_vq_reset(n); |
3a4d5c94e
|
979 980 981 |
done: mutex_unlock(&n->dev.mutex); if (tx_sock) |
09aaacf02
|
982 |
sockfd_put(tx_sock); |
3a4d5c94e
|
983 |
if (rx_sock) |
09aaacf02
|
984 |
sockfd_put(rx_sock); |
3a4d5c94e
|
985 986 987 988 989 |
return err; } static int vhost_net_set_features(struct vhost_net *n, u64 features) { |
8dd014adf
|
990 |
size_t vhost_hlen, sock_hlen, hdr_len; |
3a4d5c94e
|
991 |
int i; |
8dd014adf
|
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 |
hdr_len = (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr); if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) { /* vhost provides vnet_hdr */ vhost_hlen = hdr_len; sock_hlen = 0; } else { /* socket provides vnet_hdr */ vhost_hlen = 0; sock_hlen = hdr_len; } |
3a4d5c94e
|
1005 1006 1007 1008 1009 1010 |
mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&n->dev)) { mutex_unlock(&n->dev.mutex); return -EFAULT; } |
3a4d5c94e
|
1011 |
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { |
3ab2e420e
|
1012 |
mutex_lock(&n->vqs[i].vq.mutex); |
ea16c5143
|
1013 |
n->vqs[i].vq.acked_features = features; |
81f95a558
|
1014 1015 |
n->vqs[i].vhost_hlen = vhost_hlen; n->vqs[i].sock_hlen = sock_hlen; |
3ab2e420e
|
1016 |
mutex_unlock(&n->vqs[i].vq.mutex); |
3a4d5c94e
|
1017 |
} |
3a4d5c94e
|
1018 1019 1020 |
mutex_unlock(&n->dev.mutex); return 0; } |
b1ad8496c
|
1021 1022 1023 1024 1025 |
static long vhost_net_set_owner(struct vhost_net *n) { int r; mutex_lock(&n->dev.mutex); |
05c053519
|
1026 1027 1028 1029 |
if (vhost_dev_has_owner(&n->dev)) { r = -EBUSY; goto out; } |
b1ad8496c
|
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 |
r = vhost_net_set_ubuf_info(n); if (r) goto out; r = vhost_dev_set_owner(&n->dev); if (r) vhost_net_clear_ubuf_info(n); vhost_net_flush(n); out: mutex_unlock(&n->dev.mutex); return r; } |
3a4d5c94e
|
1041 1042 1043 1044 1045 1046 1047 1048 1049 |
static long vhost_net_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { struct vhost_net *n = f->private_data; void __user *argp = (void __user *)arg; u64 __user *featurep = argp; struct vhost_vring_file backend; u64 features; int r; |
d47effe1b
|
1050 |
|
3a4d5c94e
|
1051 1052 |
switch (ioctl) { case VHOST_NET_SET_BACKEND: |
d3553a524
|
1053 1054 |
if (copy_from_user(&backend, argp, sizeof backend)) return -EFAULT; |
3a4d5c94e
|
1055 1056 |
return vhost_net_set_backend(n, backend.index, backend.fd); case VHOST_GET_FEATURES: |
0dd05a3b6
|
1057 |
features = VHOST_NET_FEATURES; |
d3553a524
|
1058 1059 1060 |
if (copy_to_user(featurep, &features, sizeof features)) return -EFAULT; return 0; |
3a4d5c94e
|
1061 |
case VHOST_SET_FEATURES: |
d3553a524
|
1062 1063 |
if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; |
0dd05a3b6
|
1064 |
if (features & ~VHOST_NET_FEATURES) |
3a4d5c94e
|
1065 1066 1067 1068 |
return -EOPNOTSUPP; return vhost_net_set_features(n, features); case VHOST_RESET_OWNER: return vhost_net_reset_owner(n); |
b1ad8496c
|
1069 1070 |
case VHOST_SET_OWNER: return vhost_net_set_owner(n); |
3a4d5c94e
|
1071 1072 |
default: mutex_lock(&n->dev.mutex); |
935cdee7e
|
1073 1074 1075 1076 1077 |
r = vhost_dev_ioctl(&n->dev, ioctl, argp); if (r == -ENOIOCTLCMD) r = vhost_vring_ioctl(&n->dev, ioctl, argp); else vhost_net_flush(n); |
3a4d5c94e
|
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 |
mutex_unlock(&n->dev.mutex); return r; } } #ifdef CONFIG_COMPAT static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) { return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); } #endif |
373a83a69
|
1090 |
static const struct file_operations vhost_net_fops = { |
3a4d5c94e
|
1091 1092 1093 1094 1095 1096 1097 |
.owner = THIS_MODULE, .release = vhost_net_release, .unlocked_ioctl = vhost_net_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vhost_net_compat_ioctl, #endif .open = vhost_net_open, |
6038f373a
|
1098 |
.llseek = noop_llseek, |
3a4d5c94e
|
1099 1100 1101 |
}; static struct miscdevice vhost_net_misc = { |
7c7c7f01c
|
1102 1103 1104 |
.minor = VHOST_NET_MINOR, .name = "vhost-net", .fops = &vhost_net_fops, |
3a4d5c94e
|
1105 |
}; |
a8d3782f9
|
1106 |
static int vhost_net_init(void) |
3a4d5c94e
|
1107 |
{ |
bab632d69
|
1108 |
if (experimental_zcopytx) |
fe729a57c
|
1109 |
vhost_net_enable_zcopy(VHOST_NET_VQ_TX); |
c23f3445e
|
1110 |
return misc_register(&vhost_net_misc); |
3a4d5c94e
|
1111 1112 |
} module_init(vhost_net_init); |
a8d3782f9
|
1113 |
static void vhost_net_exit(void) |
3a4d5c94e
|
1114 1115 |
{ misc_deregister(&vhost_net_misc); |
3a4d5c94e
|
1116 1117 1118 1119 1120 1121 1122 |
} module_exit(vhost_net_exit); MODULE_VERSION("0.0.1"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Michael S. Tsirkin"); MODULE_DESCRIPTION("Host kernel accelerator for virtio net"); |
7c7c7f01c
|
1123 1124 |
MODULE_ALIAS_MISCDEV(VHOST_NET_MINOR); MODULE_ALIAS("devname:vhost-net"); |