diff --git a/conf.c b/conf.c index 0a845cd..27fa44a 100644 --- a/conf.c +++ b/conf.c @@ -44,6 +44,7 @@ #include "lineread.h" #include "isolation.h" #include "log.h" +#include "vhost_user.h" /** * next_chunk - Return the next piece of a string delimited by a character @@ -721,9 +722,12 @@ static void print_usage(const char *name, int status) info( " -I, --ns-ifname NAME namespace interface name"); info( " default: same interface name as external one"); } else { - info( " -s, --socket PATH UNIX domain socket path"); + info( " -s, --socket, --socket-path PATH UNIX domain socket path"); info( " default: probe free path starting from " UNIX_SOCK_PATH, 1); + info( " --vhost-user Enable vhost-user mode"); + info( " UNIX domain socket is provided by -s option"); + info( " --print-capabilities print back-end capabilities in JSON format"); } info( " -F, --fd FD Use FD as pre-opened connected socket"); @@ -1109,6 +1113,7 @@ void conf(struct ctx *c, int argc, char **argv) {"help", no_argument, NULL, 'h' }, {"socket", required_argument, NULL, 's' }, {"fd", required_argument, NULL, 'F' }, + {"socket-path", required_argument, NULL, 's' }, /* vhost-user mandatory */ {"ns-ifname", required_argument, NULL, 'I' }, {"pcap", required_argument, NULL, 'p' }, {"pid", required_argument, NULL, 'P' }, @@ -1155,6 +1160,8 @@ void conf(struct ctx *c, int argc, char **argv) {"config-net", no_argument, NULL, 17 }, {"no-copy-routes", no_argument, NULL, 18 }, {"no-copy-addrs", no_argument, NULL, 19 }, + {"vhost-user", no_argument, NULL, 20 }, + {"print-capabilities", no_argument, NULL, 21 }, /* vhost-user mandatory */ { 0 }, }; char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 }; @@ -1314,7 +1321,6 @@ void conf(struct ctx *c, int argc, char **argv) sizeof(c->ip6.ifname_out), "%s", optarg); if (ret <= 0 || ret >= (int)sizeof(c->ip6.ifname_out)) die("Invalid interface name: %s", optarg); - break; case 17: if (c->mode != MODE_PASTA) @@ -1336,6 +1342,16 @@ void conf(struct ctx *c, int argc, char **argv) warn("--no-copy-addrs will be dropped soon"); c->no_copy_addrs = copy_addrs_opt = true; break; + case 20: + if (c->mode == MODE_PASTA) { + err("--vhost-user is for passt mode only"); + usage(argv[0]); + } + c->mode = MODE_VU; + break; + case 21: + vu_print_capabilities(); + break; case 'd': if (c->debug) die("Multiple --debug options given"); diff --git a/passt.c b/passt.c index 7da05c7..2dc4f16 100644 --- a/passt.c +++ b/passt.c @@ -280,6 +280,7 @@ int main(int argc, char **argv) pasta_netns_quit_init(&c); tap_sock_init(&c); + vu_init(&c); secret_init(&c); @@ -390,6 +391,12 @@ loop: case EPOLL_TYPE_ICMPV6: icmp_sock_handler(&c, AF_INET6, ref); break; + case EPOLL_TYPE_VHOST_CMD: + tap_handler_vu(&c, eventmask); + break; + case EPOLL_TYPE_VHOST_KICK: + vu_kick_cb(&c, ref); + break; default: /* Can't happen */ ASSERT(0); diff --git a/passt.h b/passt.h index 521fb1b..46c7a62 100644 --- a/passt.h +++ b/passt.h @@ -145,6 +145,7 @@ struct fqdn { enum passt_modes { MODE_PASST, MODE_PASTA, + MODE_VU, }; /** diff --git a/tap.c b/tap.c index d7d1c3e..2f0d88c 100644 --- a/tap.c +++ b/tap.c @@ -58,6 +58,7 @@ #include "packet.h" #include "tap.h" #include "log.h" +#include "vhost_user.h" /* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */ static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf); @@ -76,19 +77,22 @@ static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf); */ int tap_send(const struct ctx *c, const void *data, size_t len) { + int flags = MSG_NOSIGNAL | MSG_DONTWAIT; + uint32_t vnet_len = htonl(len); + pcap(data, len); - if (c->mode == MODE_PASST) { - int flags = MSG_NOSIGNAL | MSG_DONTWAIT; - uint32_t vnet_len = htonl(len); - + switch (c->mode) { + case MODE_PASST: if (send(c->fd_tap, &vnet_len, 4, flags) < 0) return -1; - return send(c->fd_tap, data, len, flags); + case MODE_PASTA: + return write(c->fd_tap, (char *)data, len); + case MODE_VU: + return vu_send(c, data, len); } - - return write(c->fd_tap, (char *)data, len); + return 0; } /** @@ -465,10 +469,20 @@ size_t tap_send_frames(const struct ctx *c, const struct iovec *iov, size_t n) if (!n) return 0; - if (c->mode == MODE_PASTA) + switch (c->mode) { + case MODE_PASTA: m = tap_send_frames_pasta(c, iov, n); - else + break; + case MODE_PASST: m = tap_send_frames_passt(c, iov, n); + break; + case MODE_VU: + m = tap_send_frames_vu(c, iov, n); + break; + default: + m = 0; + break; + } if (m < n) debug("tap: failed to send %zu frames of %zu", n - m, n); @@ -1249,11 +1263,17 @@ static void tap_sock_unix_init(struct ctx *c) ev.data.u64 = ref.u64; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev); - info("You can now start qemu (>= 7.2, with commit 13c6be96618c):"); - info(" kvm ... -device virtio-net-pci,netdev=s -netdev stream,id=s,server=off,addr.type=unix,addr.path=%s", - addr.sun_path); - info("or qrap, for earlier qemu versions:"); - info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio"); + if (c->mode == MODE_VU) { + info("You can start qemu with:"); + info(" kvm ... -chardev socket,id=chr0,path=%s -netdev vhost-user,id=netdev0,chardev=chr0 -device virtio-net,netdev=netdev0 -object memory-backend-memfd,id=memfd0,share=on,size=$RAMSIZE -numa node,memdev=memfd0\n", + addr.sun_path); + } else { + info("You can now start qemu (>= 7.2, with commit 13c6be96618c):"); + info(" kvm ... -device virtio-net-pci,netdev=s -netdev stream,id=s,server=off,addr.type=unix,addr.path=%s", + addr.sun_path); + info("or qrap, for earlier qemu versions:"); + info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio"); + } } /** @@ -1263,7 +1283,7 @@ static void tap_sock_unix_init(struct ctx *c) */ void tap_listen_handler(struct ctx *c, uint32_t events) { - union epoll_ref ref = { .type = EPOLL_TYPE_TAP_PASST }; + union epoll_ref ref; struct epoll_event ev = { 0 }; int v = INT_MAX / 2; struct ucred ucred; @@ -1304,7 +1324,13 @@ void tap_listen_handler(struct ctx *c, uint32_t events) trace("tap: failed to set SO_SNDBUF to %i", v); ref.fd = c->fd_tap; - ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP; + if (c->mode == MODE_VU) { + ref.type = EPOLL_TYPE_VHOST_CMD; + ev.events = EPOLLIN | EPOLLRDHUP; + } else { + ref.type = EPOLL_TYPE_TAP_PASST; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; + } ev.data.u64 = ref.u64; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); } @@ -1388,12 +1414,21 @@ void tap_sock_init(struct ctx *c) ASSERT(c->one_off); ref.fd = c->fd_tap; - if (c->mode == MODE_PASST) + switch (c->mode) { + case MODE_PASST: ref.type = EPOLL_TYPE_TAP_PASST; - else + ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP; + break; + case MODE_PASTA: ref.type = EPOLL_TYPE_TAP_PASTA; + ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP; + break; + case MODE_VU: + ref.type = EPOLL_TYPE_VHOST_CMD; + ev.events = EPOLLIN | EPOLLRDHUP; + break; + } - ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP; ev.data.u64 = ref.u64; epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev); return; diff --git a/tcp.c b/tcp.c index 1e37714..b21da94 100644 --- a/tcp.c +++ b/tcp.c @@ -1049,7 +1049,8 @@ size_t tcp_fill_headers4(const struct ctx *c, tcp_fill_header(th, conn, seq); - tcp_update_check_tcp4(iph, th); + if (c->mode != MODE_VU || *c->pcap) + tcp_update_check_tcp4(iph, th); return ip_len; } @@ -1090,7 +1091,8 @@ size_t tcp_fill_headers6(const struct ctx *c, tcp_fill_header(th, conn, seq); - tcp_update_check_tcp6(ip6h, th); + if (c->mode != MODE_VU || *c->pcap) + tcp_update_check_tcp6(ip6h, th); return ip_len; } diff --git a/udp.c b/udp.c index de5313d..8c8f7c3 100644 --- a/udp.c +++ b/udp.c @@ -681,9 +681,12 @@ static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h, uh->dest = htons(dstport); uh->len = ip6h->payload_len; uh->check = 0; - uh->check = csum(uh, payload_len, - proto_ipv6_header_psum(payload_len, IPPROTO_UDP, - src, dst)); + if (c->mode != MODE_VU || *c->pcap) + uh->check = csum(uh, payload_len, + proto_ipv6_header_psum(payload_len, IPPROTO_UDP, + src, dst)); + else + uh->check = 0xffff; /* zero checksum is invalid with IPv6 */ return ip_len; }