udp: vhost-user RX nocopy
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
This commit is contained in:
parent
2d5528c9be
commit
95aebad0a4
7 changed files with 266 additions and 16 deletions
6
Makefile
6
Makefile
|
@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
|
||||||
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
|
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
|
||||||
icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
|
icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
|
||||||
ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
|
ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
|
||||||
tcp_buf.c tcp_splice.c tcp_vu.c udp.c util.c vhost_user.c virtio.c
|
tcp_buf.c tcp_splice.c tcp_vu.c udp.c udp_vu.c util.c vhost_user.c virtio.c
|
||||||
QRAP_SRCS = qrap.c
|
QRAP_SRCS = qrap.c
|
||||||
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
|
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
|
||||||
|
|
||||||
|
@ -56,8 +56,8 @@ MANPAGES = passt.1 pasta.1 qrap.1
|
||||||
PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
|
PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
|
||||||
flow_table.h icmp.h inany.h iov.h ip.h isolation.h lineread.h log.h \
|
flow_table.h icmp.h inany.h iov.h ip.h isolation.h lineread.h log.h \
|
||||||
ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h tap.h \
|
ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h siphash.h tap.h \
|
||||||
tcp.h tcp_buf.h tcp_conn.h tcp_splice.h tcp_vu.h udp.h util.h \
|
tcp.h tcp_buf.h tcp_conn.h tcp_splice.h tcp_vu.h udp.h udp_internal.h \
|
||||||
vhost_user.h virtio.h
|
udp_vu.h util.h vhost_user.h virtio.h
|
||||||
HEADERS = $(PASST_HEADERS) seccomp.h
|
HEADERS = $(PASST_HEADERS) seccomp.h
|
||||||
|
|
||||||
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
|
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
|
||||||
|
|
3
passt.c
3
passt.c
|
@ -383,6 +383,9 @@ loop:
|
||||||
tcp_timer_handler(&c, ref);
|
tcp_timer_handler(&c, ref);
|
||||||
break;
|
break;
|
||||||
case EPOLL_TYPE_UDP:
|
case EPOLL_TYPE_UDP:
|
||||||
|
if (c.mode == MODE_VU)
|
||||||
|
udp_vu_sock_handler(&c, ref, eventmask, &now);
|
||||||
|
else
|
||||||
udp_buf_sock_handler(&c, ref, eventmask, &now);
|
udp_buf_sock_handler(&c, ref, eventmask, &now);
|
||||||
break;
|
break;
|
||||||
case EPOLL_TYPE_ICMP:
|
case EPOLL_TYPE_ICMP:
|
||||||
|
|
1
passt.h
1
passt.h
|
@ -42,6 +42,7 @@ union epoll_ref;
|
||||||
#include "fwd.h"
|
#include "fwd.h"
|
||||||
#include "tcp.h"
|
#include "tcp.h"
|
||||||
#include "udp.h"
|
#include "udp.h"
|
||||||
|
#include "udp_vu.h"
|
||||||
#include "vhost_user.h"
|
#include "vhost_user.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
15
udp.c
15
udp.c
|
@ -120,9 +120,7 @@
|
||||||
#include "tap.h"
|
#include "tap.h"
|
||||||
#include "pcap.h"
|
#include "pcap.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
#include "udp_internal.h"
|
||||||
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
|
|
||||||
#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct udp_tap_port - Port tracking based on tap-facing source port
|
* struct udp_tap_port - Port tracking based on tap-facing source port
|
||||||
|
@ -230,11 +228,11 @@ static struct mmsghdr udp6_l2_mh_sock [UDP_MAX_FRAMES];
|
||||||
static struct iovec udp4_iov_splice [UDP_MAX_FRAMES];
|
static struct iovec udp4_iov_splice [UDP_MAX_FRAMES];
|
||||||
static struct iovec udp6_iov_splice [UDP_MAX_FRAMES];
|
static struct iovec udp6_iov_splice [UDP_MAX_FRAMES];
|
||||||
|
|
||||||
static struct sockaddr_in udp4_localname = {
|
struct sockaddr_in udp4_localname = {
|
||||||
.sin_family = AF_INET,
|
.sin_family = AF_INET,
|
||||||
.sin_addr = IN4ADDR_LOOPBACK_INIT,
|
.sin_addr = IN4ADDR_LOOPBACK_INIT,
|
||||||
};
|
};
|
||||||
static struct sockaddr_in6 udp6_localname = {
|
struct sockaddr_in6 udp6_localname = {
|
||||||
.sin6_family = AF_INET6,
|
.sin6_family = AF_INET6,
|
||||||
.sin6_addr = IN6ADDR_LOOPBACK_INIT,
|
.sin6_addr = IN6ADDR_LOOPBACK_INIT,
|
||||||
};
|
};
|
||||||
|
@ -567,7 +565,7 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n,
|
||||||
*
|
*
|
||||||
* Return: size of tap frame with headers
|
* Return: size of tap frame with headers
|
||||||
*/
|
*/
|
||||||
static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
|
size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
|
||||||
size_t data_len, struct sockaddr_in *s_in,
|
size_t data_len, struct sockaddr_in *s_in,
|
||||||
in_port_t dstport, const struct timespec *now)
|
in_port_t dstport, const struct timespec *now)
|
||||||
{
|
{
|
||||||
|
@ -608,6 +606,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
|
||||||
uh->source = s_in->sin_port;
|
uh->source = s_in->sin_port;
|
||||||
uh->dest = htons(dstport);
|
uh->dest = htons(dstport);
|
||||||
uh->len = htons(data_len + sizeof(struct udphdr));
|
uh->len = htons(data_len + sizeof(struct udphdr));
|
||||||
|
uh->check = 0;
|
||||||
|
|
||||||
return ip_len;
|
return ip_len;
|
||||||
}
|
}
|
||||||
|
@ -621,7 +620,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
|
||||||
*
|
*
|
||||||
* Return: size of tap frame with headers
|
* Return: size of tap frame with headers
|
||||||
*/
|
*/
|
||||||
static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
|
size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
|
||||||
size_t data_len, struct sockaddr_in6 *s_in6,
|
size_t data_len, struct sockaddr_in6 *s_in6,
|
||||||
in_port_t dstport, const struct timespec *now)
|
in_port_t dstport, const struct timespec *now)
|
||||||
{
|
{
|
||||||
|
@ -681,7 +680,7 @@ static size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
|
||||||
uh->dest = htons(dstport);
|
uh->dest = htons(dstport);
|
||||||
uh->len = ip6h->payload_len;
|
uh->len = ip6h->payload_len;
|
||||||
uh->check = 0;
|
uh->check = 0;
|
||||||
if (c->mode != MODE_VU || *c->pcap)
|
if (c->mode != MODE_VU)
|
||||||
uh->check = csum(uh, payload_len,
|
uh->check = csum(uh, payload_len,
|
||||||
proto_ipv6_header_psum(payload_len, IPPROTO_UDP,
|
proto_ipv6_header_psum(payload_len, IPPROTO_UDP,
|
||||||
src, dst));
|
src, dst));
|
||||||
|
|
21
udp_internal.h
Normal file
21
udp_internal.h
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
* Copyright (c) 2021 Red Hat GmbH
|
||||||
|
* Author: Stefano Brivio <sbrivio@redhat.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef UDP_INTERNAL_H
|
||||||
|
#define UDP_INTERNAL_H
|
||||||
|
|
||||||
|
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
|
||||||
|
#define UDP_MAX_FRAMES 32 /* max # of frames to receive at once */
|
||||||
|
|
||||||
|
extern struct sockaddr_in udp4_localname;
|
||||||
|
extern struct sockaddr_in6 udp6_localname;
|
||||||
|
|
||||||
|
size_t udp_update_hdr4(const struct ctx *c, struct iphdr *iph,
|
||||||
|
size_t data_len, struct sockaddr_in *s_in,
|
||||||
|
in_port_t dstport, const struct timespec *now);
|
||||||
|
size_t udp_update_hdr6(const struct ctx *c, struct ipv6hdr *ip6h,
|
||||||
|
size_t data_len, struct sockaddr_in6 *s_in6,
|
||||||
|
in_port_t dstport, const struct timespec *now);
|
||||||
|
#endif /* UDP_INTERNAL_H */
|
218
udp_vu.c
Normal file
218
udp_vu.c
Normal file
|
@ -0,0 +1,218 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <net/ethernet.h>
|
||||||
|
#include <net/if.h>
|
||||||
|
#include <netinet/in.h>
|
||||||
|
#include <netinet/ip.h>
|
||||||
|
#include <netinet/udp.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <sys/uio.h>
|
||||||
|
#include <linux/virtio_net.h>
|
||||||
|
|
||||||
|
#include "checksum.h"
|
||||||
|
#include "util.h"
|
||||||
|
#include "ip.h"
|
||||||
|
#include "passt.h"
|
||||||
|
#include "pcap.h"
|
||||||
|
#include "log.h"
|
||||||
|
#include "vhost_user.h"
|
||||||
|
#include "udp_internal.h"
|
||||||
|
#include "udp_vu.h"
|
||||||
|
|
||||||
|
/* vhost-user */
|
||||||
|
static const struct virtio_net_hdr vu_header = {
|
||||||
|
.flags = VIRTIO_NET_HDR_F_DATA_VALID,
|
||||||
|
.gso_type = VIRTIO_NET_HDR_GSO_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
|
static unsigned char buffer[65536];
|
||||||
|
static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE];
|
||||||
|
static unsigned int indexes [VIRTQUEUE_MAX_SIZE];
|
||||||
|
|
||||||
|
void udp_vu_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
|
||||||
|
const struct timespec *now)
|
||||||
|
{
|
||||||
|
VuDev *vdev = (VuDev *)&c->vdev;
|
||||||
|
VuVirtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
|
||||||
|
size_t l2_hdrlen, vnet_hdrlen, fillsize;
|
||||||
|
ssize_t data_len;
|
||||||
|
in_port_t dstport = ref.udp.port;
|
||||||
|
bool has_mrg_rxbuf, v6 = ref.udp.v6;
|
||||||
|
struct msghdr msg;
|
||||||
|
int i, iov_count, iov_used, virtqueue_max;
|
||||||
|
|
||||||
|
if (c->no_udp || !(events & EPOLLIN))
|
||||||
|
return;
|
||||||
|
|
||||||
|
has_mrg_rxbuf = vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF);
|
||||||
|
if (has_mrg_rxbuf) {
|
||||||
|
vnet_hdrlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
||||||
|
virtqueue_max = VIRTQUEUE_MAX_SIZE;
|
||||||
|
} else {
|
||||||
|
vnet_hdrlen = sizeof(struct virtio_net_hdr);
|
||||||
|
virtqueue_max = 1;
|
||||||
|
}
|
||||||
|
l2_hdrlen = vnet_hdrlen + sizeof(struct ethhdr) + sizeof(struct udphdr);
|
||||||
|
|
||||||
|
if (v6) {
|
||||||
|
l2_hdrlen += sizeof(struct ipv6hdr);
|
||||||
|
|
||||||
|
udp6_localname.sin6_port = htons(dstport);
|
||||||
|
msg.msg_name = &udp6_localname;
|
||||||
|
msg.msg_namelen = sizeof(udp6_localname);
|
||||||
|
} else {
|
||||||
|
l2_hdrlen += sizeof(struct iphdr);
|
||||||
|
|
||||||
|
udp4_localname.sin_port = htons(dstport);
|
||||||
|
msg.msg_name = &udp4_localname;
|
||||||
|
msg.msg_namelen = sizeof(udp4_localname);
|
||||||
|
}
|
||||||
|
|
||||||
|
msg.msg_control = NULL;
|
||||||
|
msg.msg_controllen = 0;
|
||||||
|
msg.msg_flags = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < UDP_MAX_FRAMES; i++) {
|
||||||
|
struct virtio_net_hdr_mrg_rxbuf *vh;
|
||||||
|
struct ethhdr *eh;
|
||||||
|
char *base;
|
||||||
|
size_t size;
|
||||||
|
|
||||||
|
fillsize = USHRT_MAX;
|
||||||
|
iov_count = 0;
|
||||||
|
while (fillsize && iov_count < virtqueue_max) {
|
||||||
|
VuVirtqElement *elem;
|
||||||
|
|
||||||
|
elem = vu_queue_pop(vdev, vq, sizeof(VuVirtqElement), buffer);
|
||||||
|
if (!elem)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (elem->in_num < 1) {
|
||||||
|
err("virtio-net receive queue contains no in buffers");
|
||||||
|
vu_queue_rewind(vdev, vq, iov_count);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ASSERT(elem->in_num == 1);
|
||||||
|
ASSERT(elem->in_sg[0].iov_len >= l2_hdrlen);
|
||||||
|
|
||||||
|
indexes[iov_count] = elem->index;
|
||||||
|
if (iov_count == 0) {
|
||||||
|
iov_vu[0].iov_base = (char *)elem->in_sg[0].iov_base + l2_hdrlen;
|
||||||
|
iov_vu[0].iov_len = elem->in_sg[0].iov_len - l2_hdrlen;
|
||||||
|
} else {
|
||||||
|
iov_vu[iov_count].iov_base = elem->in_sg[0].iov_base;
|
||||||
|
iov_vu[iov_count].iov_len = elem->in_sg[0].iov_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iov_vu[iov_count].iov_len > fillsize)
|
||||||
|
iov_vu[iov_count].iov_len = fillsize;
|
||||||
|
|
||||||
|
fillsize -= iov_vu[iov_count].iov_len;
|
||||||
|
|
||||||
|
iov_count++;
|
||||||
|
}
|
||||||
|
if (iov_count == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
msg.msg_iov = iov_vu;
|
||||||
|
msg.msg_iovlen = iov_count;
|
||||||
|
|
||||||
|
data_len = recvmsg(ref.fd, &msg, 0);
|
||||||
|
if (data_len < 0) {
|
||||||
|
vu_queue_rewind(vdev, vq, iov_count);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
iov_used = 0;
|
||||||
|
size = data_len;
|
||||||
|
while (size) {
|
||||||
|
if (iov_vu[iov_used].iov_len > size)
|
||||||
|
iov_vu[iov_used].iov_len = size;
|
||||||
|
|
||||||
|
size -= iov_vu[iov_used].iov_len;
|
||||||
|
iov_used++;
|
||||||
|
}
|
||||||
|
|
||||||
|
base = (char *)iov_vu[0].iov_base - l2_hdrlen;
|
||||||
|
size = iov_vu[0].iov_len + l2_hdrlen;
|
||||||
|
|
||||||
|
/* release unused buffers */
|
||||||
|
vu_queue_rewind(vdev, vq, iov_count - iov_used);
|
||||||
|
|
||||||
|
/* vnet_header */
|
||||||
|
vh = (struct virtio_net_hdr_mrg_rxbuf *)base;
|
||||||
|
vh->hdr = vu_header;
|
||||||
|
if (has_mrg_rxbuf)
|
||||||
|
vh->num_buffers = htole16(iov_used);
|
||||||
|
|
||||||
|
/* ethernet header */
|
||||||
|
eh = (struct ethhdr *)(base + vnet_hdrlen);
|
||||||
|
|
||||||
|
memcpy(eh->h_dest, c->mac_guest, sizeof(eh->h_dest));
|
||||||
|
memcpy(eh->h_source, c->mac, sizeof(eh->h_source));
|
||||||
|
|
||||||
|
/* initialize header */
|
||||||
|
if (v6) {
|
||||||
|
struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1);
|
||||||
|
struct udphdr *uh = (struct udphdr *)(ip6h + 1);
|
||||||
|
uint32_t sum;
|
||||||
|
|
||||||
|
eh->h_proto = htons(ETH_P_IPV6);
|
||||||
|
|
||||||
|
*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
|
||||||
|
|
||||||
|
udp_update_hdr6(c, ip6h, data_len, &udp6_localname,
|
||||||
|
dstport, now);
|
||||||
|
if (*c->pcap) {
|
||||||
|
sum = proto_ipv6_header_psum(ip6h->payload_len,
|
||||||
|
IPPROTO_UDP,
|
||||||
|
&ip6h->saddr,
|
||||||
|
&ip6h->daddr);
|
||||||
|
|
||||||
|
iov_vu[0].iov_base = uh;
|
||||||
|
iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh);
|
||||||
|
uh->check = csum_iov(iov_vu, iov_used, sum);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
struct iphdr *iph = (struct iphdr *)(eh + 1);
|
||||||
|
struct udphdr *uh = (struct udphdr *)(iph + 1);
|
||||||
|
uint32_t sum;
|
||||||
|
|
||||||
|
eh->h_proto = htons(ETH_P_IP);
|
||||||
|
|
||||||
|
*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
|
||||||
|
|
||||||
|
udp_update_hdr4(c, iph, data_len, &udp4_localname,
|
||||||
|
dstport, now);
|
||||||
|
if (*c->pcap) {
|
||||||
|
sum = proto_ipv4_header_psum(iph->tot_len,
|
||||||
|
IPPROTO_UDP,
|
||||||
|
(struct in_addr){ .s_addr = iph->saddr },
|
||||||
|
(struct in_addr){ .s_addr = iph->daddr });
|
||||||
|
|
||||||
|
iov_vu[0].iov_base = uh;
|
||||||
|
iov_vu[0].iov_len = size - l2_hdrlen + sizeof(*uh);
|
||||||
|
uh->check = csum_iov(iov_vu, iov_used, sum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set iov for pcap logging */
|
||||||
|
iov_vu[0].iov_base = base + vnet_hdrlen;
|
||||||
|
iov_vu[0].iov_len = size - vnet_hdrlen;
|
||||||
|
pcap_iov(iov_vu, iov_used);
|
||||||
|
|
||||||
|
/* set iov_len for vu_queue_fill_by_index(); */
|
||||||
|
iov_vu[0].iov_base = base;
|
||||||
|
iov_vu[0].iov_len = size;
|
||||||
|
|
||||||
|
/* send packets */
|
||||||
|
for (i = 0; i < iov_used; i++)
|
||||||
|
vu_queue_fill_by_index(vdev, vq, indexes[i],
|
||||||
|
iov_vu[i].iov_len, i);
|
||||||
|
|
||||||
|
vu_queue_flush(vdev, vq, iov_used);
|
||||||
|
vu_queue_notify(vdev, vq);
|
||||||
|
}
|
||||||
|
}
|
8
udp_vu.h
Normal file
8
udp_vu.h
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#ifndef UDP_VU_H
|
||||||
|
#define UDP_VU_H
|
||||||
|
|
||||||
|
void udp_vu_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||||
|
uint32_t events, const struct timespec *now);
|
||||||
|
#endif /* UDP_VU_H */
|
Loading…
Reference in a new issue