tap: Completely de-serialise input message batches
Until now, messages would be passed to protocol handlers in a single batch only if they happened to be dequeued in a row. Packets interleaved between different connections would result in multiple calls to the same protocol handler for a single connection. Instead, keep track of incoming packet descriptors, arrange them in sequences, and call protocol handlers only as we completely sorted input messages in batches. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
dfc4513190
commit
dd581730e5
7 changed files with 444 additions and 275 deletions
20
icmp.c
20
icmp.c
|
@ -141,23 +141,26 @@ void icmp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
|
|||
* Return: count of consumed packets (always 1, even if malformed)
|
||||
*/
|
||||
int icmp_tap_handler(struct ctx *c, int af, void *addr,
|
||||
struct tap_msg *msg, int count, struct timespec *now)
|
||||
struct tap_l4_msg *msg, int count, struct timespec *now)
|
||||
{
|
||||
(void)count;
|
||||
|
||||
if (af == AF_INET) {
|
||||
struct icmphdr *ih = (struct icmphdr *)msg[0].l4h;
|
||||
union icmp_epoll_ref iref = { .v6 = 0 };
|
||||
struct sockaddr_in sa = {
|
||||
.sin_family = AF_INET,
|
||||
.sin_addr = { .s_addr = INADDR_ANY },
|
||||
.sin_port = ih->un.echo.id,
|
||||
};
|
||||
struct icmphdr *ih;
|
||||
int id, s;
|
||||
|
||||
ih = (struct icmphdr *)(pkt_buf + msg[0].pkt_buf_offset);
|
||||
|
||||
if (msg[0].l4_len < sizeof(*ih) || ih->type != ICMP_ECHO)
|
||||
return 1;
|
||||
|
||||
sa.sin_port = ih->un.echo.id;
|
||||
|
||||
iref.id = id = ntohs(ih->un.echo.id);
|
||||
|
||||
if ((s = icmp_id_map[V4][id].sock) <= 0) {
|
||||
|
@ -171,22 +174,25 @@ int icmp_tap_handler(struct ctx *c, int af, void *addr,
|
|||
bitmap_set(icmp_act[V4], id);
|
||||
|
||||
sa.sin_addr = *(struct in_addr *)addr;
|
||||
sendto(s, msg[0].l4h, msg[0].l4_len, MSG_NOSIGNAL,
|
||||
sendto(s, ih, msg[0].l4_len, MSG_NOSIGNAL,
|
||||
(struct sockaddr *)&sa, sizeof(sa));
|
||||
} else if (af == AF_INET6) {
|
||||
struct icmp6hdr *ih = (struct icmp6hdr *)msg[0].l4h;
|
||||
union icmp_epoll_ref iref = { .v6 = 1 };
|
||||
struct sockaddr_in6 sa = {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_addr = IN6ADDR_ANY_INIT,
|
||||
.sin6_port = ih->icmp6_identifier,
|
||||
};
|
||||
struct icmp6hdr *ih;
|
||||
int id, s;
|
||||
|
||||
ih = (struct icmp6hdr *)(pkt_buf + msg[0].pkt_buf_offset);
|
||||
|
||||
if (msg[0].l4_len < sizeof(*ih) ||
|
||||
(ih->icmp6_type != 128 && ih->icmp6_type != 129))
|
||||
return 1;
|
||||
|
||||
sa.sin6_port = ih->icmp6_identifier;
|
||||
|
||||
iref.id = id = ntohs(ih->icmp6_identifier);
|
||||
if ((s = icmp_id_map[V6][id].sock) <= 0) {
|
||||
s = sock_l4(c, AF_INET6, IPPROTO_ICMPV6, id, 0,
|
||||
|
@ -200,7 +206,7 @@ int icmp_tap_handler(struct ctx *c, int af, void *addr,
|
|||
bitmap_set(icmp_act[V6], id);
|
||||
|
||||
sa.sin6_addr = *(struct in6_addr *)addr;
|
||||
sendto(s, msg[0].l4h, msg[0].l4_len, MSG_NOSIGNAL,
|
||||
sendto(s, ih, msg[0].l4_len, MSG_NOSIGNAL,
|
||||
(struct sockaddr *)&sa, sizeof(sa));
|
||||
}
|
||||
|
||||
|
|
2
icmp.h
2
icmp.h
|
@ -8,7 +8,7 @@ struct ctx;
|
|||
void icmp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
|
||||
struct timespec *now);
|
||||
int icmp_tap_handler(struct ctx *c, int af, void *addr,
|
||||
struct tap_msg *msg, int count, struct timespec *now);
|
||||
struct tap_l4_msg *msg, int count, struct timespec *now);
|
||||
void icmp_timer(struct ctx *c, struct timespec *ts);
|
||||
|
||||
/**
|
||||
|
|
16
passt.h
16
passt.h
|
@ -3,15 +3,21 @@
|
|||
|
||||
/**
|
||||
* struct tap_msg - Generic message descriptor for arrays of messages
|
||||
* @start: Pointer to message start
|
||||
* @l4_start: Pointer to L4 header
|
||||
* @pkt_buf_offset: Offset from @pkt_buf
|
||||
* @len: Message length, with L2 headers
|
||||
* @l4_len: Message length, with L4 headers
|
||||
*/
|
||||
struct tap_msg {
|
||||
char *start;
|
||||
char *l4h;
|
||||
uint32_t pkt_buf_offset;
|
||||
uint16_t len;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct tap_l4_msg - Layer-4 message descriptor for protocol handlers
|
||||
* @pkt_buf_offset: Offset of message from @pkt_buf
|
||||
* @l4_len: Length of Layer-4 payload, host order
|
||||
*/
|
||||
struct tap_l4_msg {
|
||||
uint32_t pkt_buf_offset;
|
||||
uint16_t l4_len;
|
||||
};
|
||||
|
||||
|
|
630
tap.c
630
tap.c
|
@ -50,7 +50,9 @@
|
|||
#include "dhcpv6.h"
|
||||
#include "pcap.h"
|
||||
|
||||
static struct tap_msg tap_msgs[TAP_MSGS];
|
||||
/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
|
||||
static struct tap_msg seq4[TAP_MSGS];
|
||||
static struct tap_msg seq6[TAP_MSGS];
|
||||
|
||||
/**
|
||||
* tap_send() - Send frame, with qemu socket header if needed
|
||||
|
@ -198,172 +200,288 @@ void tap_ip_send(struct ctx *c, struct in6_addr *src, uint8_t proto,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* struct l4_seq4_t - Message sequence for one protocol handler call, IPv4
|
||||
* @msgs: Count of messages in sequence
|
||||
* @protocol: Protocol number
|
||||
* @source: Source port
|
||||
* @dest: Destination port
|
||||
* @saddr: Source address
|
||||
* @daddr: Destination address
|
||||
* @msg: Array of messages that can be handled in a single call
|
||||
*/
|
||||
static struct tap_l4_seq4 {
|
||||
uint16_t msgs;
|
||||
uint8_t protocol;
|
||||
|
||||
uint16_t source;
|
||||
uint16_t dest;
|
||||
|
||||
uint32_t saddr;
|
||||
uint32_t daddr;
|
||||
|
||||
struct tap_l4_msg msg[UIO_MAXIOV];
|
||||
} l4_seq4[UIO_MAXIOV /* Arbitrary: TAP_MSGS in theory, so limit in users */];
|
||||
|
||||
/**
|
||||
* struct l4_seq6_t - Message sequence for one protocol handler call, IPv6
|
||||
* @msgs: Count of messages in sequence
|
||||
* @protocol: Protocol number
|
||||
* @source: Source port
|
||||
* @dest: Destination port
|
||||
* @saddr: Source address
|
||||
* @daddr: Destination address
|
||||
* @msg: Array of messages that can be handled in a single call
|
||||
*/
|
||||
static struct tap_l4_seq6 {
|
||||
uint16_t msgs;
|
||||
uint8_t protocol;
|
||||
|
||||
uint16_t source;
|
||||
uint16_t dest;
|
||||
|
||||
struct in6_addr saddr;
|
||||
struct in6_addr daddr;
|
||||
|
||||
struct tap_l4_msg msg[UIO_MAXIOV];
|
||||
} l4_seq6[UIO_MAXIOV /* Arbitrary: TAP_MSGS in theory, so limit in users */];
|
||||
|
||||
/**
|
||||
* tap_packet_debug() - Print debug message for packet(s) from guest/tap
|
||||
* @iph: IPv4 header, can be NULL
|
||||
* @ip6h: IPv6 header, can be NULL
|
||||
* @seq4: Pointer to @struct tap_l4_seq4, can be NULL
|
||||
* @proto6: IPv6 protocol, for IPv6
|
||||
* @seq6: Pointer to @struct tap_l4_seq6, can be NULL
|
||||
* @count: Count of packets in this sequence
|
||||
*/
|
||||
static void tap_packet_debug(struct iphdr *iph, struct ipv6hdr *ip6h,
|
||||
struct tap_l4_seq4 *seq4, uint8_t proto6,
|
||||
struct tap_l4_seq6 *seq6, int count)
|
||||
{
|
||||
char buf6s[INET6_ADDRSTRLEN], buf6d[INET6_ADDRSTRLEN];
|
||||
char buf4s[INET_ADDRSTRLEN], buf4d[INET_ADDRSTRLEN];
|
||||
uint8_t proto;
|
||||
|
||||
if (iph || seq4) {
|
||||
inet_ntop(AF_INET, iph ? &iph->saddr : &seq4->saddr,
|
||||
buf4s, sizeof(buf4s)),
|
||||
inet_ntop(AF_INET, iph ? &iph->daddr : &seq4->daddr,
|
||||
buf4d, sizeof(buf4d)),
|
||||
proto = iph ? iph->protocol : seq4->protocol;
|
||||
} else {
|
||||
inet_ntop(AF_INET6, ip6h ? &ip6h->saddr : &seq6->saddr,
|
||||
buf6s, sizeof(buf6s)),
|
||||
inet_ntop(AF_INET6, ip6h ? &ip6h->daddr : &seq6->daddr,
|
||||
buf6d, sizeof(buf6d)),
|
||||
proto = proto6;
|
||||
}
|
||||
|
||||
if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
|
||||
debug("protocol %i from tap: %s:%i -> %s:%i (%i packet%s)",
|
||||
proto, seq4 ? buf4s : buf6s,
|
||||
ntohs(seq4 ? seq4->source : seq6->source),
|
||||
seq4 ? buf4d : buf6d,
|
||||
ntohs(seq4 ? seq4->dest : seq6->dest),
|
||||
count, count == 1 ? "" : "s");
|
||||
} else {
|
||||
debug("protocol %i from tap: %s -> %s (%i packet%s)",
|
||||
proto, iph ? buf4s : buf6s, iph ? buf4d : buf6d,
|
||||
count, count == 1 ? "" : "s");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tap4_handler() - IPv4 and ARP packet handler for tap file descriptor
|
||||
* @c: Execution context
|
||||
* @msg: Array of messages with the same L3 protocol
|
||||
* @count: Count of messages with the same L3 protocol
|
||||
* @msg: Array of messages with IPv4 or ARP protocol
|
||||
* @count: Count of messages
|
||||
* @now: Current timestamp
|
||||
* @first: First call for an IPv4 packet in this batch
|
||||
*
|
||||
* Return: count of packets consumed by handlers
|
||||
*/
|
||||
static int tap4_handler(struct ctx *c, struct tap_msg *msg, size_t count,
|
||||
struct timespec *now, int first)
|
||||
struct timespec *now)
|
||||
{
|
||||
char buf_s[INET_ADDRSTRLEN] __attribute((__unused__));
|
||||
char buf_d[INET_ADDRSTRLEN] __attribute((__unused__));
|
||||
struct ethhdr *eh = (struct ethhdr *)msg[0].start;
|
||||
struct iphdr *iph, *prev_iph = NULL;
|
||||
struct udphdr *uh, *prev_uh = NULL;
|
||||
size_t len = msg[0].len;
|
||||
unsigned int i;
|
||||
unsigned int i, j, seq_count;
|
||||
struct tap_l4_msg *l4_msg;
|
||||
struct tap_l4_seq4 *seq;
|
||||
size_t len, l4_len;
|
||||
struct ethhdr *eh;
|
||||
struct iphdr *iph;
|
||||
struct udphdr *uh;
|
||||
char *l4h;
|
||||
|
||||
if (!c->v4)
|
||||
return count;
|
||||
|
||||
if (len < sizeof(*eh) + sizeof(*iph))
|
||||
return 1;
|
||||
|
||||
if (arp(c, eh, len) || dhcp(c, eh, len))
|
||||
return 1;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
i = 0;
|
||||
resume:
|
||||
for (seq_count = 0, seq = NULL; i < count; i++) {
|
||||
eh = (struct ethhdr *)(pkt_buf + msg[i].pkt_buf_offset);
|
||||
len = msg[i].len;
|
||||
|
||||
if (len < sizeof(*eh))
|
||||
continue;
|
||||
|
||||
if (ntohs(eh->h_proto) == ETH_P_ARP && arp(c, eh, len))
|
||||
continue;
|
||||
|
||||
if (len < sizeof(*eh) + sizeof(*iph))
|
||||
return 1;
|
||||
continue;
|
||||
|
||||
eh = (struct ethhdr *)msg[i].start;
|
||||
iph = (struct iphdr *)(eh + 1);
|
||||
l4h = (char *)iph + iph->ihl * 4;
|
||||
if ((iph->ihl * 4) + sizeof(*eh) > len)
|
||||
continue;
|
||||
if (iph->ihl * 4 < sizeof(*iph))
|
||||
continue;
|
||||
|
||||
if (first && c->addr4_seen != iph->saddr) {
|
||||
if (iph->saddr && c->addr4_seen != iph->saddr) {
|
||||
c->addr4_seen = iph->saddr;
|
||||
proto_update_l2_buf(NULL, NULL, &c->addr4_seen);
|
||||
}
|
||||
|
||||
msg[i].l4h = l4h;
|
||||
msg[i].l4_len = len - ((intptr_t)l4h - (intptr_t)eh);
|
||||
l4h = (char *)iph + iph->ihl * 4;
|
||||
l4_len = len - ((intptr_t)l4h - (intptr_t)eh);
|
||||
|
||||
if (iph->protocol != IPPROTO_TCP &&
|
||||
iph->protocol != IPPROTO_UDP)
|
||||
break;
|
||||
if (iph->protocol == IPPROTO_ICMP) {
|
||||
struct tap_l4_msg icmp_msg = { l4h - pkt_buf,
|
||||
l4_len };
|
||||
|
||||
if (len < sizeof(*uh))
|
||||
break;
|
||||
if (l4_len < sizeof(struct icmphdr))
|
||||
continue;
|
||||
|
||||
uh = (struct udphdr *)l4h;
|
||||
|
||||
if (!i) {
|
||||
prev_iph = iph;
|
||||
prev_uh = uh;
|
||||
tap_packet_debug(iph, NULL, NULL, 0, NULL, 1);
|
||||
if (!c->no_icmp) {
|
||||
icmp_tap_handler(c, AF_INET, &iph->daddr,
|
||||
&icmp_msg, 1, now);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (iph->tos != prev_iph->tos ||
|
||||
iph->frag_off != prev_iph->frag_off ||
|
||||
iph->protocol != prev_iph->protocol ||
|
||||
iph->saddr != prev_iph->saddr ||
|
||||
iph->daddr != prev_iph->daddr ||
|
||||
uh->source != prev_uh->source ||
|
||||
uh->dest != prev_uh->dest)
|
||||
if (l4_len < sizeof(*uh))
|
||||
continue;
|
||||
|
||||
uh = (struct udphdr *)l4h;
|
||||
|
||||
if (iph->protocol == IPPROTO_UDP && dhcp(c, eh, len))
|
||||
continue;
|
||||
|
||||
if (iph->protocol != IPPROTO_TCP &&
|
||||
iph->protocol != IPPROTO_UDP) {
|
||||
tap_packet_debug(iph, NULL, NULL, 0, NULL, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
#define L4_MATCH(iph, uh, seq) \
|
||||
(seq->protocol == iph->protocol && \
|
||||
seq->source == uh->source && seq->dest == uh->dest && \
|
||||
seq->saddr == iph->saddr && seq->daddr == iph->daddr)
|
||||
|
||||
#define L4_SET(iph, uh, seq) \
|
||||
do { \
|
||||
seq->protocol = iph->protocol; \
|
||||
seq->source = uh->source; \
|
||||
seq->dest = uh->dest; \
|
||||
seq->saddr = iph->saddr; \
|
||||
seq->daddr = iph->daddr; \
|
||||
} while (0)
|
||||
|
||||
if (seq && L4_MATCH(iph, uh, seq) && seq->msgs < UIO_MAXIOV)
|
||||
goto append;
|
||||
|
||||
for (seq = l4_seq4 + seq_count - 1; seq >= l4_seq4; seq--) {
|
||||
if (L4_MATCH(iph, uh, seq)) {
|
||||
if (seq->msgs >= UIO_MAXIOV)
|
||||
seq = l4_seq4 - 1;
|
||||
break;
|
||||
|
||||
prev_iph = iph;
|
||||
prev_uh = uh;
|
||||
}
|
||||
}
|
||||
|
||||
eh = (struct ethhdr *)msg[0].start;
|
||||
iph = (struct iphdr *)(eh + 1);
|
||||
|
||||
if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP ||
|
||||
iph->protocol == IPPROTO_SCTP) {
|
||||
uh = (struct udphdr *)msg[0].l4h;
|
||||
|
||||
if (msg[0].len < sizeof(*uh))
|
||||
return 1;
|
||||
|
||||
debug("%s (%i) from tap: %s:%i -> %s:%i (%i packet%s)",
|
||||
IP_PROTO_STR(iph->protocol), iph->protocol,
|
||||
inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)),
|
||||
ntohs(uh->source),
|
||||
inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d)),
|
||||
ntohs(uh->dest),
|
||||
i, i > 1 ? "s" : "");
|
||||
} else if (iph->protocol == IPPROTO_ICMP) {
|
||||
debug("icmp from tap: %s -> %s",
|
||||
inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)),
|
||||
inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d)));
|
||||
if (seq < l4_seq4) {
|
||||
seq = l4_seq4 + seq_count++;
|
||||
L4_SET(iph, uh, seq);
|
||||
seq->msgs = 0;
|
||||
}
|
||||
|
||||
if (iph->protocol == IPPROTO_TCP) {
|
||||
#undef L4_MATCH
|
||||
#undef L4_SET
|
||||
|
||||
append:
|
||||
l4_msg = &seq->msg[seq->msgs++];
|
||||
|
||||
l4_msg->pkt_buf_offset = l4h - pkt_buf;
|
||||
l4_msg->l4_len = l4_len;
|
||||
|
||||
if (seq_count == UIO_MAXIOV)
|
||||
break; /* Resume after flushing if i < count */
|
||||
}
|
||||
|
||||
for (j = 0, seq = l4_seq4; j < seq_count; j++, seq++) {
|
||||
int n = seq->msgs;
|
||||
|
||||
l4_msg = seq->msg;
|
||||
|
||||
tap_packet_debug(NULL, NULL, seq, 0, NULL, n);
|
||||
|
||||
if (seq->protocol == IPPROTO_TCP) {
|
||||
if (c->no_tcp)
|
||||
return i;
|
||||
return tcp_tap_handler(c, AF_INET, &iph->daddr, msg, i, now);
|
||||
}
|
||||
|
||||
if (iph->protocol == IPPROTO_UDP) {
|
||||
continue;
|
||||
while ((n -= tcp_tap_handler(c, AF_INET, &seq->daddr,
|
||||
l4_msg, n, now)));
|
||||
} else if (seq->protocol == IPPROTO_UDP) {
|
||||
if (c->no_udp)
|
||||
return i;
|
||||
return udp_tap_handler(c, AF_INET, &iph->daddr, msg, i, now);
|
||||
continue;
|
||||
while ((n -= udp_tap_handler(c, AF_INET, &seq->daddr,
|
||||
l4_msg, n, now)));
|
||||
}
|
||||
}
|
||||
|
||||
if (iph->protocol == IPPROTO_ICMP) {
|
||||
if (c->no_icmp)
|
||||
return 1;
|
||||
icmp_tap_handler(c, AF_INET, &iph->daddr, msg, 1, now);
|
||||
}
|
||||
if (i < count)
|
||||
goto resume;
|
||||
|
||||
return 1;
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* tap6_handler() - IPv6 packet handler for tap file descriptor
|
||||
* @c: Execution context
|
||||
* @msg: Array of messages with the same L3 protocol
|
||||
* @count: Count of messages with the same L3 protocol
|
||||
* @msg: Array of messages with IPv6 protocol
|
||||
* @count: Count of messages
|
||||
* @now: Current timestamp
|
||||
* @first: First call for an IPv6 packet in this batch
|
||||
*
|
||||
* Return: count of packets consumed by handlers
|
||||
*/
|
||||
static int tap6_handler(struct ctx *c, struct tap_msg *msg, size_t count,
|
||||
struct timespec *now, int first)
|
||||
struct timespec *now)
|
||||
{
|
||||
char buf_s[INET6_ADDRSTRLEN], buf_d[INET6_ADDRSTRLEN];
|
||||
struct ethhdr *eh = (struct ethhdr *)msg[0].start;
|
||||
struct udphdr *uh, *prev_uh = NULL;
|
||||
uint8_t proto = 0, prev_proto = 0;
|
||||
size_t len = msg[0].len;
|
||||
unsigned int i, j, seq_count = 0;
|
||||
struct tap_l4_msg *l4_msg;
|
||||
struct tap_l4_seq6 *seq;
|
||||
struct ipv6hdr *ip6h;
|
||||
unsigned int i;
|
||||
size_t len, l4_len;
|
||||
struct ethhdr *eh;
|
||||
struct udphdr *uh;
|
||||
uint8_t proto;
|
||||
char *l4h;
|
||||
|
||||
if (!c->v6)
|
||||
return count;
|
||||
|
||||
if (len < sizeof(*eh) + sizeof(*ip6h))
|
||||
return 1;
|
||||
|
||||
if (ndp(c, eh, len) || dhcpv6(c, eh, len))
|
||||
return 1;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
struct ipv6hdr *p_ip6h;
|
||||
|
||||
i = 0;
|
||||
resume:
|
||||
for (seq_count = 0, seq = NULL; i < count; i++) {
|
||||
eh = (struct ethhdr *)(pkt_buf + msg[i].pkt_buf_offset);
|
||||
len = msg[i].len;
|
||||
|
||||
if (len < sizeof(*eh))
|
||||
continue;
|
||||
|
||||
if (len < sizeof(*eh) + sizeof(*ip6h))
|
||||
return 1;
|
||||
|
||||
eh = (struct ethhdr *)msg[i].start;
|
||||
ip6h = (struct ipv6hdr *)(eh + 1);
|
||||
l4h = ipv6_l4hdr(ip6h, &proto);
|
||||
|
||||
msg[i].l4h = l4h;
|
||||
msg[i].l4_len = len - ((intptr_t)l4h - (intptr_t)eh);
|
||||
|
||||
if (first) {
|
||||
if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr)) {
|
||||
c->addr6_ll_seen = ip6h->saddr;
|
||||
|
||||
|
@ -373,82 +491,119 @@ static int tap6_handler(struct ctx *c, struct tap_msg *msg, size_t count,
|
|||
} else {
|
||||
c->addr6_seen = ip6h->saddr;
|
||||
}
|
||||
|
||||
if (ntohs(ip6h->payload_len) >
|
||||
len - sizeof(*eh) - sizeof(*ip6h))
|
||||
continue;
|
||||
|
||||
if (!(l4h = ipv6_l4hdr(ip6h, &proto)))
|
||||
continue;
|
||||
|
||||
l4_len = len - ((intptr_t)l4h - (intptr_t)eh);
|
||||
|
||||
if (proto == IPPROTO_ICMPV6) {
|
||||
struct tap_l4_msg icmpv6_msg = { l4h - pkt_buf,
|
||||
l4_len };
|
||||
|
||||
if (l4_len < sizeof(struct icmp6hdr))
|
||||
continue;
|
||||
|
||||
if (ndp(c, eh, len))
|
||||
continue;
|
||||
|
||||
tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1);
|
||||
if (!c->no_icmp) {
|
||||
icmp_tap_handler(c, AF_INET6, &ip6h->daddr,
|
||||
&icmpv6_msg, 1, now);
|
||||
}
|
||||
|
||||
ip6h->saddr = c->addr6;
|
||||
|
||||
if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
|
||||
break;
|
||||
|
||||
if (len < sizeof(*uh))
|
||||
break;
|
||||
|
||||
uh = (struct udphdr *)l4h;
|
||||
|
||||
if (!i) {
|
||||
p_ip6h = ip6h;
|
||||
prev_proto = proto;
|
||||
prev_uh = uh;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (proto != prev_proto ||
|
||||
memcmp(&ip6h->saddr, &p_ip6h->saddr, sizeof(ip6h->saddr)) ||
|
||||
memcmp(&ip6h->daddr, &p_ip6h->daddr, sizeof(ip6h->daddr)) ||
|
||||
uh->source != prev_uh->source ||
|
||||
uh->dest != prev_uh->dest)
|
||||
if (l4_len < sizeof(*uh))
|
||||
continue;
|
||||
|
||||
uh = (struct udphdr *)l4h;
|
||||
|
||||
if (proto == IPPROTO_UDP && dhcpv6(c, eh, len))
|
||||
continue;
|
||||
|
||||
ip6h->saddr = c->addr6;
|
||||
|
||||
if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
|
||||
tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
#define L4_MATCH(ip6h, proto, uh, seq) \
|
||||
(seq->protocol == proto && \
|
||||
seq->source == uh->source && seq->dest == uh->dest && \
|
||||
!memcmp(&seq->saddr, &ip6h->saddr, sizeof(seq->saddr)) && \
|
||||
!memcmp(&seq->daddr, &ip6h->daddr, sizeof(seq->daddr)))
|
||||
|
||||
#define L4_SET(ip6h, proto, uh, seq) \
|
||||
do { \
|
||||
seq->protocol = proto; \
|
||||
seq->source = uh->source; \
|
||||
seq->dest = uh->dest; \
|
||||
seq->saddr = ip6h->saddr; \
|
||||
seq->daddr = ip6h->daddr; \
|
||||
} while (0)
|
||||
|
||||
if (seq && L4_MATCH(ip6h, proto, uh, seq) &&
|
||||
seq->msgs < UIO_MAXIOV)
|
||||
goto append;
|
||||
|
||||
for (seq = l4_seq6 + seq_count - 1; seq >= l4_seq6; seq--) {
|
||||
if (L4_MATCH(ip6h, proto, uh, seq)) {
|
||||
if (seq->msgs >= UIO_MAXIOV)
|
||||
seq = l4_seq6 - 1;
|
||||
break;
|
||||
|
||||
p_ip6h = ip6h;
|
||||
prev_proto = proto;
|
||||
prev_uh = uh;
|
||||
}
|
||||
}
|
||||
|
||||
if (prev_proto)
|
||||
proto = prev_proto;
|
||||
|
||||
eh = (struct ethhdr *)msg[0].start;
|
||||
ip6h = (struct ipv6hdr *)(eh + 1);
|
||||
|
||||
if (proto == IPPROTO_ICMPV6) {
|
||||
debug("icmpv6 from tap: %s ->\n\t%s",
|
||||
inet_ntop(AF_INET6, &ip6h->saddr, buf_s, sizeof(buf_s)),
|
||||
inet_ntop(AF_INET6, &ip6h->daddr, buf_d, sizeof(buf_d)));
|
||||
} else if (proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
|
||||
proto == IPPROTO_SCTP) {
|
||||
uh = (struct udphdr *)msg[0].l4h;
|
||||
|
||||
if (msg[0].len < sizeof(*uh))
|
||||
return 1;
|
||||
|
||||
debug("%s (%i) from tap: [%s]:%i\n\t-> [%s]:%i (%i packet%s)",
|
||||
IP_PROTO_STR(proto), proto,
|
||||
inet_ntop(AF_INET6, &ip6h->saddr, buf_s, sizeof(buf_s)),
|
||||
ntohs(uh->source),
|
||||
inet_ntop(AF_INET6, &ip6h->daddr, buf_d, sizeof(buf_d)),
|
||||
ntohs(uh->dest),
|
||||
i, i > 1 ? "s" : "");
|
||||
if (seq < l4_seq6) {
|
||||
seq = l4_seq6 + seq_count++;
|
||||
L4_SET(ip6h, proto, uh, seq);
|
||||
seq->msgs = 0;
|
||||
}
|
||||
|
||||
if (proto == IPPROTO_TCP) {
|
||||
#undef L4_MATCH
|
||||
#undef L4_SET
|
||||
|
||||
append:
|
||||
l4_msg = &seq->msg[seq->msgs++];
|
||||
|
||||
l4_msg->pkt_buf_offset = l4h - pkt_buf;
|
||||
l4_msg->l4_len = l4_len;
|
||||
|
||||
if (seq_count == UIO_MAXIOV)
|
||||
break; /* Resume after flushing if i < count */
|
||||
}
|
||||
|
||||
for (j = 0, seq = l4_seq6; j < seq_count; j++, seq++) {
|
||||
int n = seq->msgs;
|
||||
|
||||
l4_msg = seq->msg;
|
||||
|
||||
tap_packet_debug(NULL, NULL, NULL, seq->protocol, seq, n);
|
||||
|
||||
if (seq->protocol == IPPROTO_TCP) {
|
||||
if (c->no_tcp)
|
||||
return i;
|
||||
return tcp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i, now);
|
||||
}
|
||||
|
||||
if (proto == IPPROTO_UDP) {
|
||||
continue;
|
||||
while ((n -= tcp_tap_handler(c, AF_INET6, &seq->daddr,
|
||||
l4_msg, n, now)));
|
||||
} else if (seq->protocol == IPPROTO_UDP) {
|
||||
if (c->no_udp)
|
||||
return i;
|
||||
return udp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i, now);
|
||||
continue;
|
||||
while ((n -= udp_tap_handler(c, AF_INET6, &seq->daddr,
|
||||
l4_msg, n, now)));
|
||||
}
|
||||
}
|
||||
|
||||
if (proto == IPPROTO_ICMPV6) {
|
||||
if (c->no_icmp)
|
||||
return 1;
|
||||
icmp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, 1, now);
|
||||
}
|
||||
if (i < count)
|
||||
goto resume;
|
||||
|
||||
return 1;
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -460,10 +615,14 @@ static int tap6_handler(struct ctx *c, struct tap_msg *msg, size_t count,
|
|||
*/
|
||||
static int tap_handler_passt(struct ctx *c, struct timespec *now)
|
||||
{
|
||||
int msg_count = 0, same, i = 0, first_v4 = 1, first_v6 = 1;
|
||||
int seq4_i, seq6_i;
|
||||
struct ethhdr *eh;
|
||||
char *p = pkt_buf;
|
||||
ssize_t n, rem;
|
||||
char *p;
|
||||
|
||||
redo:
|
||||
p = pkt_buf;
|
||||
seq4_i = seq6_i = rem = 0;
|
||||
|
||||
n = recv(c->fd_tap, p, TAP_BUF_FILL, MSG_DONTWAIT);
|
||||
if (n < 0) {
|
||||
|
@ -479,30 +638,27 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now)
|
|||
while (n > (ssize_t)sizeof(uint32_t)) {
|
||||
ssize_t len = ntohl(*(uint32_t *)p);
|
||||
|
||||
if (len < (ssize_t)sizeof(*eh) || len > ETH_MAX_MTU)
|
||||
return 0;
|
||||
|
||||
p += sizeof(uint32_t);
|
||||
n -= sizeof(uint32_t);
|
||||
|
||||
/* At most one packet might not fit in a single read */
|
||||
/* At most one packet might not fit in a single read, and this
|
||||
* needs to be blocking.
|
||||
*/
|
||||
if (len > n) {
|
||||
rem = recv(c->fd_tap, p + n, len - n, MSG_DONTWAIT);
|
||||
rem = recv(c->fd_tap, p + n, len - n, 0);
|
||||
if ((n += rem) != len)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Complete the partial read above before discarding a malformed
|
||||
* frame, otherwise the stream will be inconsistent.
|
||||
*/
|
||||
if (len < (ssize_t)sizeof(*eh) || len > ETH_MAX_MTU)
|
||||
goto next;
|
||||
|
||||
pcap(p, len);
|
||||
|
||||
tap_msgs[msg_count].start = p;
|
||||
tap_msgs[msg_count++].len = len;
|
||||
|
||||
n -= len;
|
||||
p += len;
|
||||
}
|
||||
|
||||
while (i < msg_count) {
|
||||
eh = (struct ethhdr *)tap_msgs[i].start;
|
||||
eh = (struct ethhdr *)p;
|
||||
|
||||
if (memcmp(c->mac_guest, eh->h_source, ETH_ALEN)) {
|
||||
memcpy(c->mac_guest, eh->h_source, ETH_ALEN);
|
||||
|
@ -511,52 +667,33 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now)
|
|||
|
||||
switch (ntohs(eh->h_proto)) {
|
||||
case ETH_P_ARP:
|
||||
if (c->v4)
|
||||
tap4_handler(c, tap_msgs + i, 1, now, 1);
|
||||
i++;
|
||||
break;
|
||||
case ETH_P_IP:
|
||||
for (same = 1; i + same < msg_count &&
|
||||
same < UIO_MAXIOV; same++) {
|
||||
struct tap_msg *next = &tap_msgs[i + same];
|
||||
|
||||
eh = (struct ethhdr *)next->start;
|
||||
if (ntohs(eh->h_proto) != ETH_P_IP)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!c->v4) {
|
||||
i += same;
|
||||
break;
|
||||
}
|
||||
|
||||
i += tap4_handler(c, tap_msgs + i, same, now, first_v4);
|
||||
first_v4 = 0;
|
||||
seq4[seq4_i].pkt_buf_offset = p - pkt_buf;
|
||||
seq4[seq4_i++].len = len;
|
||||
break;
|
||||
case ETH_P_IPV6:
|
||||
for (same = 1; i + same < msg_count &&
|
||||
same < UIO_MAXIOV; same++) {
|
||||
struct tap_msg *next = &tap_msgs[i + same];
|
||||
|
||||
eh = (struct ethhdr *)next->start;
|
||||
if (ntohs(eh->h_proto) != ETH_P_IPV6)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!c->v6) {
|
||||
i += same;
|
||||
break;
|
||||
}
|
||||
|
||||
i += tap6_handler(c, tap_msgs + i, same, now, first_v6);
|
||||
first_v6 = 0;
|
||||
seq6[seq6_i].pkt_buf_offset = p - pkt_buf;
|
||||
seq6[seq6_i++].len = len;
|
||||
break;
|
||||
default:
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
|
||||
next:
|
||||
p += len;
|
||||
n -= len;
|
||||
}
|
||||
|
||||
if (seq4_i)
|
||||
tap4_handler(c, seq4, seq4_i, now);
|
||||
|
||||
if (seq6_i)
|
||||
tap6_handler(c, seq6, seq6_i, now);
|
||||
|
||||
/* We can't use EPOLLET otherwise. */
|
||||
if (rem)
|
||||
goto redo;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -569,14 +706,19 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now)
|
|||
*/
|
||||
static int tap_handler_pasta(struct ctx *c, struct timespec *now)
|
||||
{
|
||||
struct tap_msg msg = { .start = pkt_buf };
|
||||
ssize_t n;
|
||||
ssize_t n = 0, len;
|
||||
int err, seq4_i = 0, seq6_i = 0;
|
||||
|
||||
while ((n = read(c->fd_tap, pkt_buf, TAP_BUF_BYTES)) > 0) {
|
||||
struct ethhdr *eh = (struct ethhdr *)pkt_buf;
|
||||
msg.len = n;
|
||||
restart:
|
||||
while ((len = read(c->fd_tap, pkt_buf + n, TAP_BUF_BYTES - n)) > 0) {
|
||||
struct ethhdr *eh = (struct ethhdr *)(pkt_buf + n);
|
||||
|
||||
pcap(msg.start, msg.len);
|
||||
if (len < (ssize_t)sizeof(*eh) || len > ETH_MAX_MTU) {
|
||||
n += len;
|
||||
continue;
|
||||
}
|
||||
|
||||
pcap(pkt_buf + n, len);
|
||||
|
||||
if (memcmp(c->mac_guest, eh->h_source, ETH_ALEN)) {
|
||||
memcpy(c->mac_guest, eh->h_source, ETH_ALEN);
|
||||
|
@ -585,21 +727,33 @@ static int tap_handler_pasta(struct ctx *c, struct timespec *now)
|
|||
|
||||
switch (ntohs(eh->h_proto)) {
|
||||
case ETH_P_ARP:
|
||||
if (c->v4)
|
||||
tap4_handler(c, &msg, 1, now, 1);
|
||||
break;
|
||||
case ETH_P_IP:
|
||||
if (c->v4)
|
||||
tap4_handler(c, &msg, 1, now, 1);
|
||||
seq4[seq4_i].pkt_buf_offset = n;
|
||||
seq4[seq4_i++].len = len;
|
||||
break;
|
||||
case ETH_P_IPV6:
|
||||
if (c->v6)
|
||||
tap6_handler(c, &msg, 1, now, 1);
|
||||
seq6[seq6_i].pkt_buf_offset = n;
|
||||
seq6[seq6_i++].len = len;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
n += len;
|
||||
}
|
||||
|
||||
if (!n || errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
|
||||
if (len < 0 && errno == EINTR)
|
||||
goto restart;
|
||||
|
||||
err = errno;
|
||||
|
||||
if (seq4_i)
|
||||
tap4_handler(c, seq4, seq4_i, now);
|
||||
|
||||
if (seq6_i)
|
||||
tap6_handler(c, seq6, seq6_i, now);
|
||||
|
||||
if (len > 0 || err == EAGAIN)
|
||||
return 0;
|
||||
|
||||
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, c->fd_tap, NULL);
|
||||
|
@ -753,12 +907,14 @@ void tap_sock_init(struct ctx *c)
|
|||
close(c->fd_tap);
|
||||
}
|
||||
|
||||
if (c->mode == MODE_PASST)
|
||||
if (c->mode == MODE_PASST) {
|
||||
tap_sock_init_unix(c);
|
||||
else
|
||||
ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
|
||||
} else {
|
||||
tap_sock_init_tun(c);
|
||||
|
||||
ev.events = EPOLLIN | EPOLLRDHUP;
|
||||
}
|
||||
|
||||
ev.data.fd = c->fd_tap;
|
||||
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
|
||||
}
|
||||
|
|
26
tcp.c
26
tcp.c
|
@ -333,6 +333,7 @@
|
|||
#include <sys/random.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
|
@ -645,7 +646,7 @@ static struct mmsghdr tcp_l2_mh_tap [TCP_TAP_FRAMES] = {
|
|||
};
|
||||
|
||||
/* sendmsg() to socket */
|
||||
static struct iovec tcp_tap_iov [TAP_MSGS];
|
||||
static struct iovec tcp_tap_iov [UIO_MAXIOV];
|
||||
|
||||
/* Bitmap, activity monitoring needed for connection via tap */
|
||||
static uint8_t tcp_act[MAX_TAP_CONNS / 8] = { 0 };
|
||||
|
@ -1968,7 +1969,7 @@ out_restore_iov:
|
|||
* @now: Current timestamp
|
||||
*/
|
||||
static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
|
||||
struct tap_msg *msg, int count,
|
||||
struct tap_l4_msg *msg, int count,
|
||||
struct timespec *now)
|
||||
{
|
||||
int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1;
|
||||
|
@ -1979,10 +1980,13 @@ static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
|
|||
ssize_t len;
|
||||
|
||||
for (i = 0, iov_i = 0; i < count; i++) {
|
||||
struct tcphdr *th = (struct tcphdr *)msg[i].l4h;
|
||||
uint32_t seq, seq_offset, ack_seq;
|
||||
size_t len = msg[i].l4_len, off;
|
||||
struct tcphdr *th;
|
||||
char *data;
|
||||
size_t off;
|
||||
|
||||
th = (struct tcphdr *)(pkt_buf + msg[i].pkt_buf_offset);
|
||||
len = msg[i].l4_len;
|
||||
|
||||
if (len < sizeof(*th)) {
|
||||
tcp_rst(c, conn);
|
||||
|
@ -2152,19 +2156,11 @@ out:
|
|||
* Return: count of consumed packets
|
||||
*/
|
||||
int tcp_tap_handler(struct ctx *c, int af, void *addr,
|
||||
struct tap_msg *msg, int count, struct timespec *now)
|
||||
struct tap_l4_msg *msg, int count, struct timespec *now)
|
||||
{
|
||||
struct tcphdr *th = (struct tcphdr *)msg[0].l4h;
|
||||
size_t len = msg[0].l4_len, off;
|
||||
struct tcphdr *th = (struct tcphdr *)(pkt_buf + msg[0].pkt_buf_offset);
|
||||
uint16_t len = msg[0].l4_len;
|
||||
struct tcp_tap_conn *conn;
|
||||
int ws;
|
||||
|
||||
if (len < sizeof(*th))
|
||||
return 1;
|
||||
|
||||
off = th->doff * 4;
|
||||
if (off < sizeof(*th) || off > len)
|
||||
return 1;
|
||||
|
||||
conn = tcp_hash_lookup(c, af, addr, htons(th->source), htons(th->dest));
|
||||
if (!conn) {
|
||||
|
|
13
udp.c
13
udp.c
|
@ -879,12 +879,12 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
|
|||
* Return: count of consumed packets
|
||||
*/
|
||||
int udp_tap_handler(struct ctx *c, int af, void *addr,
|
||||
struct tap_msg *msg, int count, struct timespec *now)
|
||||
struct tap_l4_msg *msg, int count, struct timespec *now)
|
||||
{
|
||||
/* The caller already checks that all the messages have the same source
|
||||
* and destination, so we can just take those from the first message.
|
||||
*/
|
||||
struct udphdr *uh = (struct udphdr *)msg[0].l4h;
|
||||
struct udphdr *uh = (struct udphdr *)(pkt_buf + msg[0].pkt_buf_offset);
|
||||
struct mmsghdr mm[UIO_MAXIOV] = { 0 };
|
||||
struct iovec m[UIO_MAXIOV];
|
||||
struct sockaddr_in6 s_in6;
|
||||
|
@ -972,7 +972,10 @@ int udp_tap_handler(struct ctx *c, int af, void *addr,
|
|||
}
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
m[i].iov_base = (char *)((struct udphdr *)msg[i].l4h + 1);
|
||||
struct udphdr *uh;
|
||||
|
||||
uh = (struct udphdr *)(msg[i].pkt_buf_offset + pkt_buf);
|
||||
m[i].iov_base = (char *)(uh + 1);
|
||||
m[i].iov_len = msg[i].l4_len - sizeof(*uh);
|
||||
|
||||
mm[i].msg_hdr.msg_name = sa;
|
||||
|
@ -1084,12 +1087,14 @@ static void udp_splice_iov_init(void)
|
|||
*
|
||||
* Return: 0 on success, -1 on failure
|
||||
*/
|
||||
int udp_sock_init(struct ctx *c)
|
||||
int udp_sock_init(struct ctx *c, struct timespec *now)
|
||||
{
|
||||
union udp_epoll_ref uref = { .bound = 1 };
|
||||
in_port_t dst;
|
||||
int s;
|
||||
|
||||
(void)now;
|
||||
|
||||
for (dst = 0; dst < USHRT_MAX; dst++) {
|
||||
if (!bitmap_isset(c->udp.port_to_tap, dst))
|
||||
continue;
|
||||
|
|
4
udp.h
4
udp.h
|
@ -6,8 +6,8 @@
|
|||
void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
|
||||
struct timespec *now);
|
||||
int udp_tap_handler(struct ctx *c, int af, void *addr,
|
||||
struct tap_msg *msg, int count, struct timespec *now);
|
||||
int udp_sock_init(struct ctx *c);
|
||||
struct tap_l4_msg *msg, int count, struct timespec *now);
|
||||
int udp_sock_init(struct ctx *c, struct timespec *now);
|
||||
void udp_timer(struct ctx *c, struct timespec *ts);
|
||||
void udp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
|
||||
uint32_t *ip_da);
|
||||
|
|
Loading…
Reference in a new issue