7291b70ba7
For some reason, this is reported only with musl, and older glibc versions (2.31, at least). Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
275 lines
7.3 KiB
C
275 lines
7.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later
|
|
* Copyright Red Hat
|
|
* Author: David Gibson <david@gibson.dropbear.id.au>
|
|
*
|
|
* UDP flow tracking functions
|
|
*/
|
|
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <sys/uio.h>
|
|
#include <unistd.h>
|
|
|
|
#include "util.h"
|
|
#include "passt.h"
|
|
#include "flow_table.h"
|
|
|
|
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
|
|
|
|
/**
|
|
* udp_at_sidx() - Get UDP specific flow at given sidx
|
|
* @sidx: Flow and side to retrieve
|
|
*
|
|
* Return: UDP specific flow at @sidx, or NULL of @sidx is invalid. Asserts if
|
|
* the flow at @sidx is not FLOW_UDP.
|
|
*/
|
|
struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
|
|
{
|
|
union flow *flow = flow_at_sidx(sidx);
|
|
|
|
if (!flow)
|
|
return NULL;
|
|
|
|
ASSERT(flow->f.type == FLOW_UDP);
|
|
return &flow->udp;
|
|
}
|
|
|
|
/*
|
|
* udp_flow_close() - Close and clean up UDP flow
|
|
* @c: Execution context
|
|
* @uflow: UDP flow
|
|
*/
|
|
static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
|
|
{
|
|
if (uflow->s[INISIDE] >= 0) {
|
|
/* The listening socket needs to stay in epoll */
|
|
close(uflow->s[INISIDE]);
|
|
uflow->s[INISIDE] = -1;
|
|
}
|
|
|
|
if (uflow->s[TGTSIDE] >= 0) {
|
|
/* But the flow specific one needs to be removed */
|
|
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, uflow->s[TGTSIDE], NULL);
|
|
close(uflow->s[TGTSIDE]);
|
|
uflow->s[TGTSIDE] = -1;
|
|
}
|
|
flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
|
|
if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
|
|
flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));
|
|
}
|
|
|
|
/**
|
|
* udp_flow_new() - Common setup for a new UDP flow
|
|
* @c: Execution context
|
|
* @flow: Initiated flow
|
|
* @s_ini: Initiating socket (or -1)
|
|
* @now: Timestamp
|
|
*
|
|
* Return: UDP specific flow, if successful, NULL on failure
|
|
*/
|
|
static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
|
|
int s_ini, const struct timespec *now)
|
|
{
|
|
const struct flowside *ini = &flow->f.side[INISIDE];
|
|
struct udp_flow *uflow = NULL;
|
|
const struct flowside *tgt;
|
|
uint8_t tgtpif;
|
|
|
|
if (!inany_is_unicast(&ini->eaddr) || ini->eport == 0) {
|
|
flow_trace(flow, "Invalid endpoint to initiate UDP flow");
|
|
goto cancel;
|
|
}
|
|
|
|
if (!(tgt = flow_target(c, flow, IPPROTO_UDP)))
|
|
goto cancel;
|
|
tgtpif = flow->f.pif[TGTSIDE];
|
|
|
|
uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
|
|
uflow->ts = now->tv_sec;
|
|
uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
|
|
|
|
if (s_ini >= 0) {
|
|
/* When using auto port-scanning the listening port could go
|
|
* away, so we need to duplicate the socket
|
|
*/
|
|
uflow->s[INISIDE] = fcntl(s_ini, F_DUPFD_CLOEXEC, 0);
|
|
if (uflow->s[INISIDE] < 0) {
|
|
flow_err(uflow,
|
|
"Couldn't duplicate listening socket: %s",
|
|
strerror(errno));
|
|
goto cancel;
|
|
}
|
|
}
|
|
|
|
if (pif_is_socket(tgtpif)) {
|
|
struct mmsghdr discard[UIO_MAXIOV] = { 0 };
|
|
union {
|
|
flow_sidx_t sidx;
|
|
uint32_t data;
|
|
} fref = {
|
|
.sidx = FLOW_SIDX(flow, TGTSIDE),
|
|
};
|
|
int rc;
|
|
|
|
uflow->s[TGTSIDE] = flowside_sock_l4(c, EPOLL_TYPE_UDP_REPLY,
|
|
tgtpif, tgt, fref.data);
|
|
if (uflow->s[TGTSIDE] < 0) {
|
|
flow_dbg(uflow,
|
|
"Couldn't open socket for spliced flow: %s",
|
|
strerror(errno));
|
|
goto cancel;
|
|
}
|
|
|
|
if (flowside_connect(c, uflow->s[TGTSIDE], tgtpif, tgt) < 0) {
|
|
flow_dbg(uflow,
|
|
"Couldn't connect flow socket: %s",
|
|
strerror(errno));
|
|
goto cancel;
|
|
}
|
|
|
|
/* It's possible, if unlikely, that we could receive some
|
|
* unrelated packets in between the bind() and connect() of this
|
|
* socket. For now we just discard these. We could consider
|
|
* trying to redirect these to an appropriate handler, if we
|
|
* need to.
|
|
*/
|
|
rc = recvmmsg(uflow->s[TGTSIDE], discard, ARRAY_SIZE(discard),
|
|
MSG_DONTWAIT, NULL);
|
|
if (rc >= ARRAY_SIZE(discard)) {
|
|
flow_dbg(uflow,
|
|
"Too many (%d) spurious reply datagrams", rc);
|
|
goto cancel;
|
|
} else if (rc > 0) {
|
|
flow_trace(uflow,
|
|
"Discarded %d spurious reply datagrams", rc);
|
|
} else if (errno != EAGAIN) {
|
|
flow_err(uflow,
|
|
"Unexpected error discarding datagrams: %s",
|
|
strerror(errno));
|
|
}
|
|
}
|
|
|
|
flow_hash_insert(c, FLOW_SIDX(uflow, INISIDE));
|
|
|
|
/* If the target side is a socket, it will be a reply socket that knows
|
|
* its own flowside. But if it's tap, then we need to look it up by
|
|
* hash.
|
|
*/
|
|
if (!pif_is_socket(tgtpif))
|
|
flow_hash_insert(c, FLOW_SIDX(uflow, TGTSIDE));
|
|
FLOW_ACTIVATE(uflow);
|
|
|
|
return FLOW_SIDX(uflow, TGTSIDE);
|
|
|
|
cancel:
|
|
if (uflow)
|
|
udp_flow_close(c, uflow);
|
|
flow_alloc_cancel(flow);
|
|
return FLOW_SIDX_NONE;
|
|
}
|
|
|
|
/**
|
|
* udp_flow_from_sock() - Find or create UDP flow for "listening" socket
|
|
* @c: Execution context
|
|
* @ref: epoll reference of the receiving socket
|
|
* @s_in: Source socket address, filled in by recvmmsg()
|
|
* @now: Timestamp
|
|
*
|
|
* #syscalls fcntl arm:fcntl64 ppc64:fcntl64 i686:fcntl64
|
|
*
|
|
* Return: sidx for the destination side of the flow for this packet, or
|
|
* FLOW_SIDX_NONE if we couldn't find or create a flow.
|
|
*/
|
|
flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
|
|
const union sockaddr_inany *s_in,
|
|
const struct timespec *now)
|
|
{
|
|
struct udp_flow *uflow;
|
|
union flow *flow;
|
|
flow_sidx_t sidx;
|
|
|
|
ASSERT(ref.type == EPOLL_TYPE_UDP_LISTEN);
|
|
|
|
sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, s_in, ref.udp.port);
|
|
if ((uflow = udp_at_sidx(sidx))) {
|
|
uflow->ts = now->tv_sec;
|
|
return flow_sidx_opposite(sidx);
|
|
}
|
|
|
|
if (!(flow = flow_alloc())) {
|
|
char sastr[SOCKADDR_STRLEN];
|
|
|
|
debug("Couldn't allocate flow for UDP datagram from %s %s",
|
|
pif_name(ref.udp.pif),
|
|
sockaddr_ntop(s_in, sastr, sizeof(sastr)));
|
|
return FLOW_SIDX_NONE;
|
|
}
|
|
|
|
flow_initiate_sa(flow, ref.udp.pif, s_in, ref.udp.port);
|
|
return udp_flow_new(c, flow, ref.fd, now);
|
|
}
|
|
|
|
/**
|
|
* udp_flow_from_tap() - Find or create UDP flow for tap packets
|
|
* @c: Execution context
|
|
* @pif: pif on which the packet is arriving
|
|
* @af: Address family, AF_INET or AF_INET6
|
|
* @saddr: Source address on guest side
|
|
* @daddr: Destination address guest side
|
|
* @srcport: Source port on guest side
|
|
* @dstport: Destination port on guest side
|
|
*
|
|
* Return: sidx for the destination side of the flow for this packet, or
|
|
* FLOW_SIDX_NONE if we couldn't find or create a flow.
|
|
*/
|
|
flow_sidx_t udp_flow_from_tap(const struct ctx *c,
|
|
uint8_t pif, sa_family_t af,
|
|
const void *saddr, const void *daddr,
|
|
in_port_t srcport, in_port_t dstport,
|
|
const struct timespec *now)
|
|
{
|
|
struct udp_flow *uflow;
|
|
union flow *flow;
|
|
flow_sidx_t sidx;
|
|
|
|
ASSERT(pif == PIF_TAP);
|
|
|
|
sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr,
|
|
srcport, dstport);
|
|
if ((uflow = udp_at_sidx(sidx))) {
|
|
uflow->ts = now->tv_sec;
|
|
return flow_sidx_opposite(sidx);
|
|
}
|
|
|
|
if (!(flow = flow_alloc())) {
|
|
char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
|
|
|
|
debug("Couldn't allocate flow for UDP datagram from %s %s:%hu -> %s:%hu",
|
|
pif_name(pif),
|
|
inet_ntop(af, saddr, sstr, sizeof(sstr)), srcport,
|
|
inet_ntop(af, daddr, dstr, sizeof(dstr)), dstport);
|
|
return FLOW_SIDX_NONE;
|
|
}
|
|
|
|
flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport);
|
|
|
|
return udp_flow_new(c, flow, -1, now);
|
|
}
|
|
|
|
/**
|
|
* udp_flow_timer() - Handler for timed events related to a given flow
|
|
* @c: Execution context
|
|
* @uflow: UDP flow
|
|
* @now: Current timestamp
|
|
*
|
|
* Return: true if the flow is ready to free, false otherwise
|
|
*/
|
|
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
|
|
const struct timespec *now)
|
|
{
|
|
if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT)
|
|
return false;
|
|
|
|
udp_flow_close(c, uflow);
|
|
return true;
|
|
}
|