passt: Introduce packet batching mechanism
Receive packets in batches from AF_UNIX, check if they can be sent with a single syscall, and batch them up with sendmmsg() in case. A bit rudimentary, currently only implemented for UDP, but it seems to work. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
5b0c88d4ef
commit
1f7cf04d34
8 changed files with 337 additions and 122 deletions
31
icmp.c
31
icmp.c
|
@ -77,45 +77,54 @@ void icmp_sock_handler(struct ctx *c, int s, uint32_t events)
|
||||||
* icmp_tap_handler() - Handle packets from tap
|
* icmp_tap_handler() - Handle packets from tap
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @af: Address family, AF_INET or AF_INET6
|
* @af: Address family, AF_INET or AF_INET6
|
||||||
* @in: Input buffer
|
* @msg: Input message
|
||||||
* @len: Length, including UDP header
|
* @count: Message count (always 1 for ICMP)
|
||||||
|
*
|
||||||
|
* Return: count of consumed packets (always 1, even if malformed)
|
||||||
*/
|
*/
|
||||||
void icmp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
|
int icmp_tap_handler(struct ctx *c, int af, void *addr,
|
||||||
|
struct tap_msg *msg, int count)
|
||||||
{
|
{
|
||||||
|
(void)count;
|
||||||
|
|
||||||
if (af == AF_INET) {
|
if (af == AF_INET) {
|
||||||
struct icmphdr *ih = (struct icmphdr *)in;
|
struct icmphdr *ih = (struct icmphdr *)msg[0].l4h;
|
||||||
struct sockaddr_in sa = {
|
struct sockaddr_in sa = {
|
||||||
.sin_family = AF_INET,
|
.sin_family = AF_INET,
|
||||||
.sin_addr.s_addr = htonl(INADDR_ANY),
|
.sin_addr.s_addr = htonl(INADDR_ANY),
|
||||||
};
|
};
|
||||||
|
|
||||||
if (len < sizeof(*ih) || ih->type != ICMP_ECHO)
|
if (msg[0].l4_len < sizeof(*ih) || ih->type != ICMP_ECHO)
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
sa.sin_port = ih->un.echo.id;
|
sa.sin_port = ih->un.echo.id;
|
||||||
bind(c->icmp.s4, (struct sockaddr *)&sa, sizeof(sa));
|
bind(c->icmp.s4, (struct sockaddr *)&sa, sizeof(sa));
|
||||||
|
|
||||||
sa.sin_addr = *(struct in_addr *)addr;
|
sa.sin_addr = *(struct in_addr *)addr;
|
||||||
sendto(c->icmp.s4, in, len, MSG_DONTWAIT,
|
sendto(c->icmp.s4, msg[0].l4h, msg[0].l4_len,
|
||||||
|
MSG_DONTWAIT | MSG_NOSIGNAL,
|
||||||
(struct sockaddr *)&sa, sizeof(sa));
|
(struct sockaddr *)&sa, sizeof(sa));
|
||||||
} else if (af == AF_INET6) {
|
} else if (af == AF_INET6) {
|
||||||
struct sockaddr_in6 sa = {
|
struct sockaddr_in6 sa = {
|
||||||
.sin6_family = AF_INET6,
|
.sin6_family = AF_INET6,
|
||||||
.sin6_addr = IN6ADDR_ANY_INIT,
|
.sin6_addr = IN6ADDR_ANY_INIT,
|
||||||
};
|
};
|
||||||
struct icmp6hdr *ih = (struct icmp6hdr *)in;
|
struct icmp6hdr *ih = (struct icmp6hdr *)msg[0].l4h;
|
||||||
|
|
||||||
if (len < sizeof(*ih) ||
|
if (msg[0].l4_len < sizeof(*ih) ||
|
||||||
(ih->icmp6_type != 128 && ih->icmp6_type != 129))
|
(ih->icmp6_type != 128 && ih->icmp6_type != 129))
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
sa.sin6_port = ih->icmp6_identifier;
|
sa.sin6_port = ih->icmp6_identifier;
|
||||||
bind(c->icmp.s6, (struct sockaddr *)&sa, sizeof(sa));
|
bind(c->icmp.s6, (struct sockaddr *)&sa, sizeof(sa));
|
||||||
|
|
||||||
sa.sin6_addr = *(struct in6_addr *)addr;
|
sa.sin6_addr = *(struct in6_addr *)addr;
|
||||||
sendto(c->icmp.s6, in, len, MSG_DONTWAIT | MSG_NOSIGNAL,
|
sendto(c->icmp.s6, msg[0].l4h, msg[0].l4_len,
|
||||||
|
MSG_DONTWAIT | MSG_NOSIGNAL,
|
||||||
(struct sockaddr *)&sa, sizeof(sa));
|
(struct sockaddr *)&sa, sizeof(sa));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
3
icmp.h
3
icmp.h
|
@ -4,7 +4,8 @@
|
||||||
struct ctx;
|
struct ctx;
|
||||||
|
|
||||||
void icmp_sock_handler(struct ctx *c, int s, uint32_t events);
|
void icmp_sock_handler(struct ctx *c, int s, uint32_t events);
|
||||||
void icmp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len);
|
int icmp_tap_handler(struct ctx *c, int af, void *addr,
|
||||||
|
struct tap_msg *msg, int count);
|
||||||
int icmp_sock_init(struct ctx *c);
|
int icmp_sock_init(struct ctx *c);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
290
passt.c
290
passt.c
|
@ -19,6 +19,7 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
|
#include <sys/uio.h>
|
||||||
#include <sys/un.h>
|
#include <sys/un.h>
|
||||||
#include <ifaddrs.h>
|
#include <ifaddrs.h>
|
||||||
#include <linux/if_ether.h>
|
#include <linux/if_ether.h>
|
||||||
|
@ -56,7 +57,9 @@
|
||||||
|
|
||||||
#define EPOLL_EVENTS 10
|
#define EPOLL_EVENTS 10
|
||||||
|
|
||||||
#define TIMER_INTERVAL 20 /* ms, for protocol periodic handlers */
|
#define TAP_NMSG 32 /* maximum messages to buffer from tap */
|
||||||
|
|
||||||
|
#define TIMER_INTERVAL 100 /* ms, for protocol periodic handlers */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sock_unix() - Create and bind AF_UNIX socket, add to epoll list
|
* sock_unix() - Create and bind AF_UNIX socket, add to epoll list
|
||||||
|
@ -303,87 +306,182 @@ static void get_dns(struct ctx *c)
|
||||||
/**
|
/**
|
||||||
* tap4_handler() - IPv4 and ARP packet handler for tap file descriptor
|
* tap4_handler() - IPv4 and ARP packet handler for tap file descriptor
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @len: Total L2 packet length
|
* @msg: Array of messages with the same L3 protocol
|
||||||
* @in: Packet buffer, L2 headers
|
* @count: Count of messages with the same L3 protocol
|
||||||
|
*
|
||||||
|
* Return: count of packets consumed by handlers
|
||||||
*/
|
*/
|
||||||
static void tap4_handler(struct ctx *c, char *in, size_t len)
|
static int tap4_handler(struct ctx *c, struct tap_msg *msg, size_t count)
|
||||||
{
|
{
|
||||||
char buf_s[INET_ADDRSTRLEN] __attribute((__unused__));
|
char buf_s[INET_ADDRSTRLEN] __attribute((__unused__));
|
||||||
char buf_d[INET_ADDRSTRLEN] __attribute((__unused__));
|
char buf_d[INET_ADDRSTRLEN] __attribute((__unused__));
|
||||||
struct ethhdr *eh = (struct ethhdr *)in;
|
struct ethhdr *eh = (struct ethhdr *)msg[0].start;
|
||||||
struct iphdr *iph = (struct iphdr *)(eh + 1);
|
struct iphdr *iph, *prev_iph = NULL;
|
||||||
|
struct udphdr *uh, *prev_uh = NULL;
|
||||||
|
size_t len = msg[0].len;
|
||||||
|
unsigned int i;
|
||||||
char *l4h;
|
char *l4h;
|
||||||
|
|
||||||
if (!c->v4)
|
if (!c->v4)
|
||||||
return;
|
return count;
|
||||||
|
|
||||||
if (arp(c, eh, len) || dhcp(c, eh, len))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (len < sizeof(*eh) + sizeof(*iph))
|
if (len < sizeof(*eh) + sizeof(*iph))
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
l4h = (char *)iph + iph->ihl * 4;
|
if (arp(c, eh, len) || dhcp(c, eh, len))
|
||||||
len -= (intptr_t)l4h - (intptr_t)eh;
|
return 1;
|
||||||
|
|
||||||
if (iph->protocol == IPPROTO_ICMP) {
|
for (i = 0; i < count; i++) {
|
||||||
|
len = msg[i].len;
|
||||||
|
if (len < sizeof(*eh) + sizeof(*iph))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
eh = (struct ethhdr *)msg[i].start;
|
||||||
|
iph = (struct iphdr *)(eh + 1);
|
||||||
|
l4h = (char *)iph + iph->ihl * 4;
|
||||||
|
|
||||||
|
msg[i].l4h = l4h;
|
||||||
|
msg[i].l4_len = len - ((intptr_t)l4h - (intptr_t)eh);
|
||||||
|
|
||||||
|
if (iph->protocol != IPPROTO_TCP &&
|
||||||
|
iph->protocol != IPPROTO_UDP)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (len < sizeof(*uh))
|
||||||
|
break;
|
||||||
|
|
||||||
|
uh = (struct udphdr *)l4h;
|
||||||
|
|
||||||
|
if (!i) {
|
||||||
|
prev_iph = iph;
|
||||||
|
prev_uh = uh;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iph->tos != prev_iph->tos ||
|
||||||
|
iph->frag_off != prev_iph->frag_off ||
|
||||||
|
iph->protocol != prev_iph->protocol ||
|
||||||
|
iph->saddr != prev_iph->saddr ||
|
||||||
|
iph->daddr != prev_iph->daddr ||
|
||||||
|
uh->source != prev_uh->source ||
|
||||||
|
uh->dest != prev_uh->dest)
|
||||||
|
break;
|
||||||
|
|
||||||
|
prev_iph = iph;
|
||||||
|
prev_uh = uh;
|
||||||
|
}
|
||||||
|
|
||||||
|
eh = (struct ethhdr *)msg[0].start;
|
||||||
|
iph = (struct iphdr *)(eh + 1);
|
||||||
|
|
||||||
|
if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP ||
|
||||||
|
iph->protocol == IPPROTO_SCTP) {
|
||||||
|
uh = (struct udphdr *)msg[0].l4h;
|
||||||
|
|
||||||
|
if (msg[0].len < sizeof(*uh))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
debug("%s from tap: %s:%i -> %s:%i (%i packet%s)",
|
||||||
|
getprotobynumber(iph->protocol)->p_name,
|
||||||
|
inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)),
|
||||||
|
ntohs(uh->source),
|
||||||
|
inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d)),
|
||||||
|
ntohs(uh->dest),
|
||||||
|
i, i > 1 ? "s" : "");
|
||||||
|
} else if (iph->protocol == IPPROTO_ICMP) {
|
||||||
debug("icmp from tap: %s -> %s",
|
debug("icmp from tap: %s -> %s",
|
||||||
inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)),
|
inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)),
|
||||||
inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d)));
|
inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d)));
|
||||||
} else if (iph->protocol == IPPROTO_TCP ||
|
|
||||||
iph->protocol == IPPROTO_UDP ||
|
|
||||||
iph->protocol == IPPROTO_SCTP) {
|
|
||||||
struct tcphdr *th = (struct tcphdr *)l4h;
|
|
||||||
|
|
||||||
if (len < sizeof(*th) && len < sizeof(struct udphdr))
|
|
||||||
return;
|
|
||||||
|
|
||||||
debug("%s from tap: %s:%i -> %s:%i",
|
|
||||||
getprotobynumber(iph->protocol)->p_name,
|
|
||||||
inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)),
|
|
||||||
ntohs(th->source),
|
|
||||||
inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d)),
|
|
||||||
ntohs(th->dest));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iph->protocol == IPPROTO_TCP)
|
if (iph->protocol == IPPROTO_TCP)
|
||||||
tcp_tap_handler(c, AF_INET, &iph->daddr, l4h, len);
|
return tcp_tap_handler(c, AF_INET, &iph->daddr, msg, i);
|
||||||
else if (iph->protocol == IPPROTO_UDP)
|
|
||||||
udp_tap_handler(c, AF_INET, &iph->daddr, l4h, len);
|
if (iph->protocol == IPPROTO_UDP)
|
||||||
else if (iph->protocol == IPPROTO_ICMP)
|
return udp_tap_handler(c, AF_INET, &iph->daddr, msg, i);
|
||||||
icmp_tap_handler(c, AF_INET, &iph->daddr, l4h, len);
|
|
||||||
|
if (iph->protocol == IPPROTO_ICMP)
|
||||||
|
icmp_tap_handler(c, AF_INET, &iph->daddr, msg, 1);
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tap6_handler() - IPv6 packet handler for tap file descriptor
|
* tap6_handler() - IPv6 packet handler for tap file descriptor
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @len: Total L2 packet length
|
* @msg: Array of messages with the same L3 protocol
|
||||||
* @in: Packet buffer, L2 headers
|
* @count: Count of messages with the same L3 protocol
|
||||||
*/
|
*/
|
||||||
static void tap6_handler(struct ctx *c, char *in, size_t len)
|
static int tap6_handler(struct ctx *c, struct tap_msg *msg, size_t count)
|
||||||
{
|
{
|
||||||
char buf_s[INET6_ADDRSTRLEN] __attribute((__unused__));
|
char buf_s[INET6_ADDRSTRLEN] __attribute((__unused__));
|
||||||
char buf_d[INET6_ADDRSTRLEN] __attribute((__unused__));
|
char buf_d[INET6_ADDRSTRLEN] __attribute((__unused__));
|
||||||
struct ethhdr *eh = (struct ethhdr *)in;
|
struct ethhdr *eh = (struct ethhdr *)msg[0].start;
|
||||||
struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1);
|
struct udphdr *uh, *prev_uh = NULL;
|
||||||
uint8_t proto;
|
uint8_t proto = 0, prev_proto = 0;
|
||||||
|
size_t len = msg[0].len;
|
||||||
|
struct ipv6hdr *ip6h;
|
||||||
|
unsigned int i;
|
||||||
char *l4h;
|
char *l4h;
|
||||||
|
|
||||||
if (!c->v6)
|
if (!c->v6)
|
||||||
return;
|
return count;
|
||||||
|
|
||||||
if (len < sizeof(*eh) + sizeof(*ip6h))
|
if (len < sizeof(*eh) + sizeof(*ip6h))
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
if (ndp(c, eh, len) || dhcpv6(c, eh, len))
|
if (ndp(c, eh, len) || dhcpv6(c, eh, len))
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
l4h = ipv6_l4hdr(ip6h, &proto);
|
for (i = 0; i < count; i++) {
|
||||||
|
struct ipv6hdr *p_ip6h;
|
||||||
|
|
||||||
c->addr6_guest = ip6h->saddr;
|
len = msg[i].len;
|
||||||
ip6h->saddr = c->addr6;
|
if (len < sizeof(*eh) + sizeof(*ip6h))
|
||||||
|
return 1;
|
||||||
|
|
||||||
len -= (intptr_t)l4h - (intptr_t)eh;
|
eh = (struct ethhdr *)msg[i].start;
|
||||||
|
ip6h = (struct ipv6hdr *)(eh + 1);
|
||||||
|
l4h = ipv6_l4hdr(ip6h, &proto);
|
||||||
|
|
||||||
|
msg[i].l4h = l4h;
|
||||||
|
msg[i].l4_len = len - ((intptr_t)l4h - (intptr_t)eh);
|
||||||
|
|
||||||
|
c->addr6_guest = ip6h->saddr;
|
||||||
|
ip6h->saddr = c->addr6;
|
||||||
|
|
||||||
|
if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (len < sizeof(*uh))
|
||||||
|
break;
|
||||||
|
|
||||||
|
uh = (struct udphdr *)l4h;
|
||||||
|
|
||||||
|
if (!i) {
|
||||||
|
p_ip6h = ip6h;
|
||||||
|
prev_proto = proto;
|
||||||
|
prev_uh = uh;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (proto != prev_proto ||
|
||||||
|
memcmp(&ip6h->saddr, &p_ip6h->saddr, sizeof(ip6h->saddr)) ||
|
||||||
|
memcmp(&ip6h->daddr, &p_ip6h->daddr, sizeof(ip6h->daddr)) ||
|
||||||
|
uh->source != prev_uh->source ||
|
||||||
|
uh->dest != prev_uh->dest)
|
||||||
|
break;
|
||||||
|
|
||||||
|
p_ip6h = ip6h;
|
||||||
|
prev_proto = proto;
|
||||||
|
prev_uh = uh;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prev_proto)
|
||||||
|
proto = prev_proto;
|
||||||
|
|
||||||
|
eh = (struct ethhdr *)msg[0].start;
|
||||||
|
ip6h = (struct ipv6hdr *)(eh + 1);
|
||||||
|
|
||||||
if (proto == IPPROTO_ICMPV6) {
|
if (proto == IPPROTO_ICMPV6) {
|
||||||
debug("icmpv6 from tap: %s ->\n\t%s",
|
debug("icmpv6 from tap: %s ->\n\t%s",
|
||||||
|
@ -391,27 +489,34 @@ static void tap6_handler(struct ctx *c, char *in, size_t len)
|
||||||
inet_ntop(AF_INET6, &ip6h->daddr, buf_d, sizeof(buf_d)));
|
inet_ntop(AF_INET6, &ip6h->daddr, buf_d, sizeof(buf_d)));
|
||||||
} else if (proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
|
} else if (proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
|
||||||
proto == IPPROTO_SCTP) {
|
proto == IPPROTO_SCTP) {
|
||||||
struct tcphdr *th = (struct tcphdr *)l4h;
|
uh = (struct udphdr *)msg[0].l4h;
|
||||||
|
|
||||||
if (len < sizeof(*th) && len < sizeof(struct udphdr))
|
if (msg[0].len < sizeof(*uh))
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
debug("%s from tap: [%s]:%i\n\t-> [%s]:%i",
|
debug("%s from tap: [%s]:%i\n\t-> [%s]:%i (%i packet%s)",
|
||||||
getprotobynumber(proto)->p_name,
|
getprotobynumber(proto)->p_name,
|
||||||
inet_ntop(AF_INET6, &ip6h->saddr, buf_s, sizeof(buf_s)),
|
inet_ntop(AF_INET6, &ip6h->saddr, buf_s, sizeof(buf_s)),
|
||||||
ntohs(th->source),
|
ntohs(uh->source),
|
||||||
inet_ntop(AF_INET6, &ip6h->daddr, buf_d, sizeof(buf_d)),
|
inet_ntop(AF_INET6, &ip6h->daddr, buf_d, sizeof(buf_d)),
|
||||||
ntohs(th->dest));
|
ntohs(uh->dest),
|
||||||
|
i, i > 1 ? "s" : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (proto == IPPROTO_TCP)
|
if (proto == IPPROTO_TCP)
|
||||||
tcp_tap_handler(c, AF_INET6, &ip6h->daddr, l4h, len);
|
return tcp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i);
|
||||||
else if (proto == IPPROTO_UDP)
|
|
||||||
udp_tap_handler(c, AF_INET6, &ip6h->daddr, l4h, len);
|
if (proto == IPPROTO_UDP)
|
||||||
else if (proto == IPPROTO_ICMPV6)
|
return udp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i);
|
||||||
icmp_tap_handler(c, AF_INET6, &ip6h->daddr, l4h, len);
|
|
||||||
|
if (proto == IPPROTO_ICMPV6)
|
||||||
|
icmp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, 1);
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char tap_buf[ETH_MAX_MTU * TAP_NMSG];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tap_handler() - Packet handler for tap file descriptor
|
* tap_handler() - Packet handler for tap file descriptor
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
|
@ -420,33 +525,74 @@ static void tap6_handler(struct ctx *c, char *in, size_t len)
|
||||||
*/
|
*/
|
||||||
static int tap_handler(struct ctx *c)
|
static int tap_handler(struct ctx *c)
|
||||||
{
|
{
|
||||||
char buf[ETH_MAX_MTU];
|
int msg_count = 0, same, rcv = 0, i = 0;
|
||||||
|
struct tap_msg msg[UIO_MAXIOV];
|
||||||
|
ssize_t n, rem, fill;
|
||||||
struct ethhdr *eh;
|
struct ethhdr *eh;
|
||||||
uint32_t vnet_len;
|
char *p = tap_buf;
|
||||||
ssize_t n;
|
|
||||||
|
|
||||||
eh = (struct ethhdr *)buf;
|
fill = ETH_MAX_MTU * (TAP_NMSG - 1);
|
||||||
|
|
||||||
while ((n = recv(c->fd_unix, &vnet_len, 4, MSG_DONTWAIT)) == 4) {
|
while ((n = recv(c->fd_unix, p, fill, MSG_DONTWAIT)) > 0) {
|
||||||
n = recv(c->fd_unix, buf, ntohl(vnet_len), MSG_DONTWAIT);
|
fill -= n;
|
||||||
|
while (n > 0) {
|
||||||
|
ssize_t len = ntohl(*(uint32_t *)p);
|
||||||
|
|
||||||
if (n < (ssize_t)sizeof(*eh))
|
p += sizeof(uint32_t);
|
||||||
break;
|
n -= sizeof(uint32_t);
|
||||||
|
|
||||||
switch (ntohs(eh->h_proto)) {
|
if (len < (ssize_t)sizeof(*eh))
|
||||||
case ETH_P_IP:
|
break;
|
||||||
|
|
||||||
|
/* At most one packet might not fit in a single read */
|
||||||
|
if (len > n) {
|
||||||
|
rem = recv(c->fd_unix, p + n, fill,
|
||||||
|
MSG_DONTWAIT);
|
||||||
|
rcv = errno;
|
||||||
|
if (rem <= 0 || rem + n != len)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
msg[msg_count].start = p;
|
||||||
|
msg[msg_count++].len = len;
|
||||||
|
|
||||||
|
n -= len;
|
||||||
|
p += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rcv = errno;
|
||||||
|
|
||||||
|
while (i < msg_count) {
|
||||||
|
eh = (struct ethhdr *)msg[i].start;
|
||||||
|
switch (ntohs(eh->h_proto)) {
|
||||||
case ETH_P_ARP:
|
case ETH_P_ARP:
|
||||||
tap4_handler(c, buf, n);
|
tap4_handler(c, msg + i, 1);
|
||||||
|
i++;
|
||||||
|
break;
|
||||||
|
case ETH_P_IP:
|
||||||
|
for (same = 1; i + same < msg_count; same++) {
|
||||||
|
eh = (struct ethhdr *)msg[i + same].start;
|
||||||
|
if (ntohs(eh->h_proto) != ETH_P_IP)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i += tap4_handler(c, msg + i, same);
|
||||||
break;
|
break;
|
||||||
case ETH_P_IPV6:
|
case ETH_P_IPV6:
|
||||||
tap6_handler(c, buf, n);
|
for (same = 1; i + same < msg_count; same++) {
|
||||||
|
eh = (struct ethhdr *)msg[i + same].start;
|
||||||
|
if (ntohs(eh->h_proto) != ETH_P_IPV6)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i += tap6_handler(c, msg + i, same);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
i++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n >= 0 || errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
|
if (n >= 0 || rcv == EINTR || rcv == EAGAIN || rcv == EWOULDBLOCK)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_unix, NULL);
|
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_unix, NULL);
|
||||||
|
@ -552,7 +698,7 @@ int main(int argc, char **argv)
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if DEBUG
|
#if DEBUG || 1
|
||||||
openlog("passt", LOG_PERROR, LOG_DAEMON);
|
openlog("passt", LOG_PERROR, LOG_DAEMON);
|
||||||
#else
|
#else
|
||||||
openlog("passt", 0, LOG_DAEMON);
|
openlog("passt", 0, LOG_DAEMON);
|
||||||
|
@ -610,7 +756,7 @@ listen:
|
||||||
UNIX_SOCK_PATH " -net nic,model=virtio");
|
UNIX_SOCK_PATH " -net nic,model=virtio");
|
||||||
|
|
||||||
c.fd_unix = accept(fd_unix, NULL, NULL);
|
c.fd_unix = accept(fd_unix, NULL, NULL);
|
||||||
ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP | EPOLLERR | EPOLLHUP;
|
ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP;
|
||||||
ev.data.fd = c.fd_unix;
|
ev.data.fd = c.fd_unix;
|
||||||
epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev);
|
epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev);
|
||||||
|
|
||||||
|
|
14
passt.h
14
passt.h
|
@ -1,5 +1,19 @@
|
||||||
#define UNIX_SOCK_PATH "/tmp/passt.socket"
|
#define UNIX_SOCK_PATH "/tmp/passt.socket"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct tap_msg - Generic message descriptor for arrays of messages
|
||||||
|
* @start: Pointer to message start
|
||||||
|
* @l4_start: Pointer to L4 header
|
||||||
|
* @len: Message length, with L2 headers
|
||||||
|
* @l4_len: Message length, with L4 headers
|
||||||
|
*/
|
||||||
|
struct tap_msg {
|
||||||
|
char *start;
|
||||||
|
char *l4h;
|
||||||
|
size_t len;
|
||||||
|
size_t l4_len;
|
||||||
|
};
|
||||||
|
|
||||||
#include "icmp.h"
|
#include "icmp.h"
|
||||||
#include "tcp.h"
|
#include "tcp.h"
|
||||||
|
|
||||||
|
|
44
tcp.c
44
tcp.c
|
@ -583,7 +583,7 @@ static int tcp_sock_hash_match(struct tcp_conn *conn, int af, void *addr,
|
||||||
static unsigned int tcp_sock_hash(struct ctx *c, int af, void *addr,
|
static unsigned int tcp_sock_hash(struct ctx *c, int af, void *addr,
|
||||||
in_port_t tap_port, in_port_t sock_port)
|
in_port_t tap_port, in_port_t sock_port)
|
||||||
{
|
{
|
||||||
uint64_t b;
|
uint64_t b = 0;
|
||||||
|
|
||||||
if (af == AF_INET) {
|
if (af == AF_INET) {
|
||||||
struct {
|
struct {
|
||||||
|
@ -853,7 +853,7 @@ static uint32_t tcp_seq_init(struct ctx *c, int af, void *addr,
|
||||||
in_port_t dstport, in_port_t srcport)
|
in_port_t dstport, in_port_t srcport)
|
||||||
{
|
{
|
||||||
struct timespec ts = { 0 };
|
struct timespec ts = { 0 };
|
||||||
uint32_t ns, seq;
|
uint32_t ns, seq = 0;
|
||||||
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
|
||||||
|
@ -1186,31 +1186,39 @@ out:
|
||||||
* tcp_tap_handler() - Handle packets from tap and state transitions
|
* tcp_tap_handler() - Handle packets from tap and state transitions
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @af: Address family, AF_INET or AF_INET6
|
* @af: Address family, AF_INET or AF_INET6
|
||||||
* @in: Input buffer
|
* @msg: Input messages
|
||||||
* @len: Length, including TCP header
|
* @count: Message count
|
||||||
|
*
|
||||||
|
* Return: count of consumed packets
|
||||||
*/
|
*/
|
||||||
void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
|
int tcp_tap_handler(struct ctx *c, int af, void *addr,
|
||||||
|
struct tap_msg *msg, int count)
|
||||||
{
|
{
|
||||||
struct tcphdr *th = (struct tcphdr *)in;
|
/* TODO: Implement message batching for TCP */
|
||||||
|
struct tcphdr *th = (struct tcphdr *)msg[0].l4h;
|
||||||
|
size_t len = msg[0].l4_len;
|
||||||
|
|
||||||
size_t off, skip = 0;
|
size_t off, skip = 0;
|
||||||
int s, ws;
|
int s, ws;
|
||||||
|
|
||||||
|
(void)count;
|
||||||
|
|
||||||
if (len < sizeof(*th))
|
if (len < sizeof(*th))
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
off = th->doff * 4;
|
off = th->doff * 4;
|
||||||
if (off < sizeof(*th) || off > len)
|
if (off < sizeof(*th) || off > len)
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
if ((s = tcp_sock_hash_lookup(c, af, addr, th->source, th->dest)) < 0) {
|
if ((s = tcp_sock_hash_lookup(c, af, addr, th->source, th->dest)) < 0) {
|
||||||
if (th->syn)
|
if (th->syn)
|
||||||
tcp_conn_from_tap(c, af, addr, th, len);
|
tcp_conn_from_tap(c, af, addr, th, len);
|
||||||
return;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (th->rst) {
|
if (th->rst) {
|
||||||
tcp_close_and_epoll_del(c, s);
|
tcp_close_and_epoll_del(c, s);
|
||||||
return;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
tcp_clamp_window(s, th, len, th->syn && th->ack);
|
tcp_clamp_window(s, th, len, th->syn && th->ack);
|
||||||
|
@ -1224,7 +1232,7 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
|
||||||
case SOCK_SYN_SENT:
|
case SOCK_SYN_SENT:
|
||||||
if (!th->syn || !th->ack) {
|
if (!th->syn || !th->ack) {
|
||||||
tcp_rst(c, s);
|
tcp_rst(c, s);
|
||||||
return;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
tc[s].mss_guest = tcp_opt_get(th, len, OPT_MSS, NULL, NULL);
|
tc[s].mss_guest = tcp_opt_get(th, len, OPT_MSS, NULL, NULL);
|
||||||
|
@ -1234,12 +1242,12 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
|
||||||
ws = tcp_opt_get(th, len, OPT_WS, NULL, NULL);
|
ws = tcp_opt_get(th, len, OPT_WS, NULL, NULL);
|
||||||
if (ws > MAX_WS) {
|
if (ws > MAX_WS) {
|
||||||
if (tcp_send_to_tap(c, s, RST, NULL, 0))
|
if (tcp_send_to_tap(c, s, RST, NULL, 0))
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
tc[s].seq_to_tap = 0;
|
tc[s].seq_to_tap = 0;
|
||||||
tc[s].ws_allowed = 0;
|
tc[s].ws_allowed = 0;
|
||||||
tcp_send_to_tap(c, s, SYN, NULL, 0);
|
tcp_send_to_tap(c, s, SYN, NULL, 0);
|
||||||
return;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* info.tcpi_bytes_acked already includes one byte for SYN, but
|
/* info.tcpi_bytes_acked already includes one byte for SYN, but
|
||||||
|
@ -1261,7 +1269,7 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
|
||||||
|
|
||||||
if (!th->ack) {
|
if (!th->ack) {
|
||||||
tcp_rst(c, s);
|
tcp_rst(c, s);
|
||||||
return;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
tcp_set_state(s, ESTABLISHED);
|
tcp_set_state(s, ESTABLISHED);
|
||||||
|
@ -1294,7 +1302,8 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (skip < len - off &&
|
if (skip < len - off &&
|
||||||
tcp_send_to_sock(c, s, in + off + skip, len - off - skip,
|
tcp_send_to_sock(c, s,
|
||||||
|
msg[0].l4h + off + skip, len - off - skip,
|
||||||
th->psh ? 0 : MSG_MORE))
|
th->psh ? 0 : MSG_MORE))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1311,7 +1320,8 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
|
||||||
tcp_sock_consume(s, ntohl(th->ack_seq));
|
tcp_sock_consume(s, ntohl(th->ack_seq));
|
||||||
|
|
||||||
if (skip < len - off &&
|
if (skip < len - off &&
|
||||||
tcp_send_to_sock(c, s, in + off + skip, len - off - skip,
|
tcp_send_to_sock(c, s,
|
||||||
|
msg[0].l4h + off + skip, len - off - skip,
|
||||||
th->psh ? 0 : MSG_MORE))
|
th->psh ? 0 : MSG_MORE))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1331,6 +1341,8 @@ void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
|
||||||
case CLOSED: /* ;) */
|
case CLOSED: /* ;) */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
3
tcp.h
3
tcp.h
|
@ -4,7 +4,8 @@
|
||||||
struct ctx;
|
struct ctx;
|
||||||
|
|
||||||
void tcp_sock_handler(struct ctx *c, int s, uint32_t events);
|
void tcp_sock_handler(struct ctx *c, int s, uint32_t events);
|
||||||
void tcp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len);
|
int tcp_tap_handler(struct ctx *c, int af, void *addr,
|
||||||
|
struct tap_msg *msg, int count);
|
||||||
int tcp_sock_init(struct ctx *c);
|
int tcp_sock_init(struct ctx *c);
|
||||||
void tcp_timer(struct ctx *c, struct timespec *ts);
|
void tcp_timer(struct ctx *c, struct timespec *ts);
|
||||||
|
|
||||||
|
|
71
udp.c
71
udp.c
|
@ -23,6 +23,7 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define _GNU_SOURCE
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
@ -35,6 +36,7 @@
|
||||||
#include <sys/epoll.h>
|
#include <sys/epoll.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
|
#include <sys/uio.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <linux/ip.h>
|
#include <linux/ip.h>
|
||||||
#include <linux/ipv6.h>
|
#include <linux/ipv6.h>
|
||||||
|
@ -105,43 +107,72 @@ void udp_sock_handler(struct ctx *c, int s, uint32_t events)
|
||||||
* udp_tap_handler() - Handle packets from tap
|
* udp_tap_handler() - Handle packets from tap
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @af: Address family, AF_INET or AF_INET6
|
* @af: Address family, AF_INET or AF_INET6
|
||||||
* @in: Input buffer
|
* @msg: Input messages
|
||||||
* @len: Length, including UDP header
|
* @count: Message count
|
||||||
|
*
|
||||||
|
* Return: count of consumed packets
|
||||||
*/
|
*/
|
||||||
void udp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len)
|
int udp_tap_handler(struct ctx *c, int af, void *addr,
|
||||||
|
struct tap_msg *msg, int count)
|
||||||
{
|
{
|
||||||
struct udphdr *uh = (struct udphdr *)in;
|
/* The caller already checks that all the messages have the same source
|
||||||
int s;
|
* and destination, so we can just take those from the first message.
|
||||||
|
*/
|
||||||
|
struct udphdr *uh = (struct udphdr *)msg[0].l4h;
|
||||||
|
struct mmsghdr mm[UIO_MAXIOV] = { 0 };
|
||||||
|
struct iovec m[UIO_MAXIOV];
|
||||||
|
struct sockaddr_in6 s_in6;
|
||||||
|
struct sockaddr_in s_in;
|
||||||
|
struct sockaddr *sa;
|
||||||
|
socklen_t sl;
|
||||||
|
int i, s;
|
||||||
|
|
||||||
(void)c;
|
(void)c;
|
||||||
|
|
||||||
if (af == AF_INET) {
|
if (af == AF_INET) {
|
||||||
struct sockaddr_in sa = {
|
s_in = (struct sockaddr_in) {
|
||||||
.sin_family = AF_INET,
|
.sin_family = AF_INET,
|
||||||
.sin_port = uh->dest,
|
.sin_port = uh->dest,
|
||||||
|
.sin_addr = *(struct in_addr *)addr,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!(s = udp4_sock_port[ntohs(uh->source)]))
|
sa = (struct sockaddr *)&s_in;
|
||||||
return;
|
sl = sizeof(s_in);
|
||||||
|
|
||||||
sa.sin_addr = *(struct in_addr *)addr;
|
|
||||||
|
|
||||||
sendto(s, in + sizeof(*uh), len - sizeof(*uh), MSG_DONTWAIT,
|
|
||||||
(struct sockaddr *)&sa, sizeof(sa));
|
|
||||||
} else if (af == AF_INET6) {
|
} else if (af == AF_INET6) {
|
||||||
struct sockaddr_in6 sa = {
|
s_in6 = (struct sockaddr_in6) {
|
||||||
.sin6_family = AF_INET6,
|
.sin6_family = AF_INET6,
|
||||||
.sin6_port = uh->dest,
|
.sin6_port = uh->dest,
|
||||||
.sin6_addr = *(struct in6_addr *)addr,
|
.sin6_addr = *(struct in6_addr *)addr,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!(s = udp6_sock_port[ntohs(uh->source)]))
|
sa = (struct sockaddr *)&s_in6;
|
||||||
return;
|
sl = sizeof(s_in6);
|
||||||
|
} else {
|
||||||
sendto(s, in + sizeof(*uh), len - sizeof(*uh),
|
return count;
|
||||||
MSG_DONTWAIT | MSG_NOSIGNAL,
|
|
||||||
(struct sockaddr *)&sa, sizeof(sa));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < count; i++) {
|
||||||
|
m[i].iov_base = (char *)((struct udphdr *)msg[i].l4h + 1);
|
||||||
|
m[i].iov_len = msg[i].l4_len - sizeof(*uh);
|
||||||
|
|
||||||
|
mm[i].msg_hdr.msg_name = sa;
|
||||||
|
mm[i].msg_hdr.msg_namelen = sl;
|
||||||
|
|
||||||
|
mm[i].msg_hdr.msg_iov = m + i;
|
||||||
|
mm[i].msg_hdr.msg_iovlen = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (af == AF_INET) {
|
||||||
|
if (!(s = udp4_sock_port[ntohs(uh->source)]))
|
||||||
|
return count;
|
||||||
|
} else if (af == AF_INET6) {
|
||||||
|
if (!(s = udp6_sock_port[ntohs(uh->source)]))
|
||||||
|
return count;
|
||||||
|
} else {
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sendmmsg(s, mm, count, MSG_DONTWAIT | MSG_NOSIGNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
3
udp.h
3
udp.h
|
@ -1,3 +1,4 @@
|
||||||
void udp_sock_handler(struct ctx *c, int s, uint32_t events);
|
void udp_sock_handler(struct ctx *c, int s, uint32_t events);
|
||||||
void udp_tap_handler(struct ctx *c, int af, void *addr, char *in, size_t len);
|
int udp_tap_handler(struct ctx *c, int af, void *addr,
|
||||||
|
struct tap_msg *msg, int count);
|
||||||
int udp_sock_init(struct ctx *c);
|
int udp_sock_init(struct ctx *c);
|
||||||
|
|
Loading…
Reference in a new issue