udp, udp_flow: Add instrumentation, handle EPOLLERR without queued errors

Link: https://github.com/containers/podman/issues/23686#issuecomment-2324945010
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
Stefano Brivio 2024-09-04 18:28:49 +02:00
parent d098e0527a
commit 0c6c20dee5
3 changed files with 68 additions and 14 deletions

72
udp.c
View file

@ -387,11 +387,11 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx,
* udp_sock_recverr() - Receive and clear an error from a socket * udp_sock_recverr() - Receive and clear an error from a socket
* @s: Socket to receive from * @s: Socket to receive from
* *
* Return: true if errors received and processed, false if no more errors * Return: ee_errno, 0 on empty queue
* *
* #syscalls recvmsg * #syscalls recvmsg
*/ */
static bool udp_sock_recverr(int s) static int udp_sock_recverr(int s)
{ {
const struct sock_extended_err *ee; const struct sock_extended_err *ee;
const struct cmsghdr *hdr; const struct cmsghdr *hdr;
@ -410,12 +410,13 @@ static bool udp_sock_recverr(int s)
if (rc < 0) { if (rc < 0) {
if (errno != EAGAIN && errno != EWOULDBLOCK) if (errno != EAGAIN && errno != EWOULDBLOCK)
err_perror("Failed to read error queue"); err_perror("Failed to read error queue");
return false;
return 0;
} }
if (!(mh.msg_flags & MSG_ERRQUEUE)) { if (!(mh.msg_flags & MSG_ERRQUEUE)) {
err("Missing MSG_ERRQUEUE flag reading error queue"); err("Missing MSG_ERRQUEUE flag reading error queue");
return false; return 0;
} }
hdr = CMSG_FIRSTHDR(&mh); hdr = CMSG_FIRSTHDR(&mh);
@ -424,7 +425,7 @@ static bool udp_sock_recverr(int s)
(hdr->cmsg_level == IPPROTO_IPV6 && (hdr->cmsg_level == IPPROTO_IPV6 &&
hdr->cmsg_type == IPV6_RECVERR))) { hdr->cmsg_type == IPV6_RECVERR))) {
err("Unexpected cmsg reading error queue"); err("Unexpected cmsg reading error queue");
return false; return 0;
} }
ee = (const struct sock_extended_err *)CMSG_DATA(hdr); ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
@ -433,7 +434,7 @@ static bool udp_sock_recverr(int s)
debug("%s error on UDP socket %i: %s", debug("%s error on UDP socket %i: %s",
str_ee_origin(ee), s, strerror(ee->ee_errno)); str_ee_origin(ee), s, strerror(ee->ee_errno));
return true; return ee->ee_errno;
} }
/** /**
@ -441,12 +442,15 @@ static bool udp_sock_recverr(int s)
* @c: Execution context * @c: Execution context
* @s: Socket to receive from * @s: Socket to receive from
* @events: epoll events bitmap * @events: epoll events bitmap
* @mmh mmsghdr array to receive into * @mmh: mmsghdr array to receive into
* @recv_err: Set to last error in queue. If none: -1 on EPOLLERR, 0 otherwise
*
* Return: count of datagrams received
* *
* #syscalls recvmmsg arm:recvmmsg_time64 i686:recvmmsg_time64 * #syscalls recvmmsg arm:recvmmsg_time64 i686:recvmmsg_time64
*/ */
static int udp_sock_recv(const struct ctx *c, int s, uint32_t events, static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
struct mmsghdr *mmh) struct mmsghdr *mmh, int *recv_err)
{ {
/* For not entirely clear reasons (data locality?) pasta gets better /* For not entirely clear reasons (data locality?) pasta gets better
* throughput if we receive tap datagrams one at a atime. For small * throughput if we receive tap datagrams one at a atime. For small
@ -461,8 +465,13 @@ static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
/* Clear any errors first */ /* Clear any errors first */
if (events & EPOLLERR) { if (events & EPOLLERR) {
while (udp_sock_recverr(s)) bool found = false;
; int ret;
while ((ret = udp_sock_recverr(s)))
found = true;
*recv_err = found ? ret : -1;
} }
if (!(events & EPOLLIN)) if (!(events & EPOLLIN))
@ -490,9 +499,10 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now) uint32_t events, const struct timespec *now)
{ {
const socklen_t sasize = sizeof(udp_meta[0].s_in); const socklen_t sasize = sizeof(udp_meta[0].s_in);
int recv_err = 0;
int n, i; int n, i;
if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv)) <= 0) if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv, &recv_err)) <= 0)
return; return;
/* We divide datagrams into batches based on how we need to send them, /* We divide datagrams into batches based on how we need to send them,
@ -562,11 +572,49 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
struct udp_flow *uflow = udp_at_sidx(ref.flowside); struct udp_flow *uflow = udp_at_sidx(ref.flowside);
int from_s = uflow->s[ref.flowside.sidei]; int from_s = uflow->s[ref.flowside.sidei];
uint8_t topif = pif_at_sidx(tosidx); uint8_t topif = pif_at_sidx(tosidx);
int recv_err = 0;
int n, i; int n, i;
ASSERT(!c->no_udp && uflow); ASSERT(!c->no_udp && uflow);
if ((n = udp_sock_recv(c, from_s, events, udp_mh_recv)) <= 0) n = udp_sock_recv(c, from_s, events, udp_mh_recv, &recv_err);
if (recv_err == -1) {
struct flow_common *f = &uflow->f;
char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN];
char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN];
const struct flowside *ini = &f->side[INISIDE];
const struct flowside *tgt = &f->side[TGTSIDE];
flow_err(uflow, "EPOLLERR without error queue, closing flow");
err("Last recorded errno was: %i (%s)", uflow->last_errno,
strerror(uflow->last_errno));
flow_log_(f, LOG_ERR,
"%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu",
pif_name(f->pif[INISIDE]),
inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
ini->eport,
inany_ntop(&ini->oaddr, fstr0, sizeof(fstr0)),
ini->oport,
pif_name(f->pif[TGTSIDE]),
inany_ntop(&tgt->oaddr, fstr1, sizeof(fstr1)),
tgt->oport,
inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)),
tgt->eport);
udp_flow_close(c, uflow);
return;
}
if (recv_err) {
struct udp_flow *uflow = udp_at_sidx(udp_meta[0].tosidx);
uflow->last_errno = recv_err;
flow_err(uflow, "Recorded errno %i (%s)", recv_err,
strerror(recv_err));
}
if (n <= 0)
return; return;
flow_trace(uflow, "Received %d datagrams on reply socket", n); flow_trace(uflow, "Received %d datagrams on reply socket", n);

View file

@ -34,12 +34,12 @@ struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
return &flow->udp; return &flow->udp;
} }
/* /**
* udp_flow_close() - Close and clean up UDP flow * udp_flow_close() - Close and clean up UDP flow
* @c: Execution context * @c: Execution context
* @uflow: UDP flow * @uflow: UDP flow
*/ */
static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow) void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
{ {
if (uflow->s[INISIDE] >= 0) { if (uflow->s[INISIDE] >= 0) {
/* The listening socket needs to stay in epoll */ /* The listening socket needs to stay in epoll */
@ -53,6 +53,9 @@ static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
close(uflow->s[TGTSIDE]); close(uflow->s[TGTSIDE]);
uflow->s[TGTSIDE] = -1; uflow->s[TGTSIDE] = -1;
} }
uflow->last_errno = 0;
flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE)); flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
if (!pif_is_socket(uflow->f.pif[TGTSIDE])) if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE)); flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));

View file

@ -19,6 +19,8 @@ struct udp_flow {
time_t ts; time_t ts;
int s[SIDES]; int s[SIDES];
int last_errno;
}; };
struct udp_flow *udp_at_sidx(flow_sidx_t sidx); struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
@ -30,6 +32,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
const void *saddr, const void *daddr, const void *saddr, const void *daddr,
in_port_t srcport, in_port_t dstport, in_port_t srcport, in_port_t dstport,
const struct timespec *now); const struct timespec *now);
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
const struct timespec *now); const struct timespec *now);