udp, udp_flow: Add instrumentation, handle EPOLLERR without queued errors
Link: https://github.com/containers/podman/issues/23686#issuecomment-2324945010 Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
d098e0527a
commit
0c6c20dee5
3 changed files with 68 additions and 14 deletions
72
udp.c
72
udp.c
|
@ -387,11 +387,11 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx,
|
|||
* udp_sock_recverr() - Receive and clear an error from a socket
|
||||
* @s: Socket to receive from
|
||||
*
|
||||
* Return: true if errors received and processed, false if no more errors
|
||||
* Return: ee_errno, 0 on empty queue
|
||||
*
|
||||
* #syscalls recvmsg
|
||||
*/
|
||||
static bool udp_sock_recverr(int s)
|
||||
static int udp_sock_recverr(int s)
|
||||
{
|
||||
const struct sock_extended_err *ee;
|
||||
const struct cmsghdr *hdr;
|
||||
|
@ -410,12 +410,13 @@ static bool udp_sock_recverr(int s)
|
|||
if (rc < 0) {
|
||||
if (errno != EAGAIN && errno != EWOULDBLOCK)
|
||||
err_perror("Failed to read error queue");
|
||||
return false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!(mh.msg_flags & MSG_ERRQUEUE)) {
|
||||
err("Missing MSG_ERRQUEUE flag reading error queue");
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
hdr = CMSG_FIRSTHDR(&mh);
|
||||
|
@ -424,7 +425,7 @@ static bool udp_sock_recverr(int s)
|
|||
(hdr->cmsg_level == IPPROTO_IPV6 &&
|
||||
hdr->cmsg_type == IPV6_RECVERR))) {
|
||||
err("Unexpected cmsg reading error queue");
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
|
||||
|
@ -433,7 +434,7 @@ static bool udp_sock_recverr(int s)
|
|||
debug("%s error on UDP socket %i: %s",
|
||||
str_ee_origin(ee), s, strerror(ee->ee_errno));
|
||||
|
||||
return true;
|
||||
return ee->ee_errno;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -441,12 +442,15 @@ static bool udp_sock_recverr(int s)
|
|||
* @c: Execution context
|
||||
* @s: Socket to receive from
|
||||
* @events: epoll events bitmap
|
||||
* @mmh mmsghdr array to receive into
|
||||
* @mmh: mmsghdr array to receive into
|
||||
* @recv_err: Set to last error in queue. If none: -1 on EPOLLERR, 0 otherwise
|
||||
*
|
||||
* Return: count of datagrams received
|
||||
*
|
||||
* #syscalls recvmmsg arm:recvmmsg_time64 i686:recvmmsg_time64
|
||||
*/
|
||||
static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
|
||||
struct mmsghdr *mmh)
|
||||
struct mmsghdr *mmh, int *recv_err)
|
||||
{
|
||||
/* For not entirely clear reasons (data locality?) pasta gets better
|
||||
* throughput if we receive tap datagrams one at a atime. For small
|
||||
|
@ -461,8 +465,13 @@ static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
|
|||
|
||||
/* Clear any errors first */
|
||||
if (events & EPOLLERR) {
|
||||
while (udp_sock_recverr(s))
|
||||
;
|
||||
bool found = false;
|
||||
int ret;
|
||||
|
||||
while ((ret = udp_sock_recverr(s)))
|
||||
found = true;
|
||||
|
||||
*recv_err = found ? ret : -1;
|
||||
}
|
||||
|
||||
if (!(events & EPOLLIN))
|
||||
|
@ -490,9 +499,10 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
|
|||
uint32_t events, const struct timespec *now)
|
||||
{
|
||||
const socklen_t sasize = sizeof(udp_meta[0].s_in);
|
||||
int recv_err = 0;
|
||||
int n, i;
|
||||
|
||||
if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv)) <= 0)
|
||||
if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv, &recv_err)) <= 0)
|
||||
return;
|
||||
|
||||
/* We divide datagrams into batches based on how we need to send them,
|
||||
|
@ -562,11 +572,49 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
|
|||
struct udp_flow *uflow = udp_at_sidx(ref.flowside);
|
||||
int from_s = uflow->s[ref.flowside.sidei];
|
||||
uint8_t topif = pif_at_sidx(tosidx);
|
||||
int recv_err = 0;
|
||||
int n, i;
|
||||
|
||||
ASSERT(!c->no_udp && uflow);
|
||||
|
||||
if ((n = udp_sock_recv(c, from_s, events, udp_mh_recv)) <= 0)
|
||||
n = udp_sock_recv(c, from_s, events, udp_mh_recv, &recv_err);
|
||||
if (recv_err == -1) {
|
||||
struct flow_common *f = &uflow->f;
|
||||
char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN];
|
||||
char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN];
|
||||
const struct flowside *ini = &f->side[INISIDE];
|
||||
const struct flowside *tgt = &f->side[TGTSIDE];
|
||||
|
||||
flow_err(uflow, "EPOLLERR without error queue, closing flow");
|
||||
err("Last recorded errno was: %i (%s)", uflow->last_errno,
|
||||
strerror(uflow->last_errno));
|
||||
|
||||
flow_log_(f, LOG_ERR,
|
||||
"%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu",
|
||||
pif_name(f->pif[INISIDE]),
|
||||
inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
|
||||
ini->eport,
|
||||
inany_ntop(&ini->oaddr, fstr0, sizeof(fstr0)),
|
||||
ini->oport,
|
||||
pif_name(f->pif[TGTSIDE]),
|
||||
inany_ntop(&tgt->oaddr, fstr1, sizeof(fstr1)),
|
||||
tgt->oport,
|
||||
inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)),
|
||||
tgt->eport);
|
||||
|
||||
udp_flow_close(c, uflow);
|
||||
return;
|
||||
}
|
||||
|
||||
if (recv_err) {
|
||||
struct udp_flow *uflow = udp_at_sidx(udp_meta[0].tosidx);
|
||||
|
||||
uflow->last_errno = recv_err;
|
||||
flow_err(uflow, "Recorded errno %i (%s)", recv_err,
|
||||
strerror(recv_err));
|
||||
}
|
||||
|
||||
if (n <= 0)
|
||||
return;
|
||||
|
||||
flow_trace(uflow, "Received %d datagrams on reply socket", n);
|
||||
|
|
|
@ -34,12 +34,12 @@ struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
|
|||
return &flow->udp;
|
||||
}
|
||||
|
||||
/*
|
||||
/**
|
||||
* udp_flow_close() - Close and clean up UDP flow
|
||||
* @c: Execution context
|
||||
* @uflow: UDP flow
|
||||
*/
|
||||
static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
|
||||
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
|
||||
{
|
||||
if (uflow->s[INISIDE] >= 0) {
|
||||
/* The listening socket needs to stay in epoll */
|
||||
|
@ -53,6 +53,9 @@ static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
|
|||
close(uflow->s[TGTSIDE]);
|
||||
uflow->s[TGTSIDE] = -1;
|
||||
}
|
||||
|
||||
uflow->last_errno = 0;
|
||||
|
||||
flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
|
||||
if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
|
||||
flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));
|
||||
|
|
|
@ -19,6 +19,8 @@ struct udp_flow {
|
|||
|
||||
time_t ts;
|
||||
int s[SIDES];
|
||||
|
||||
int last_errno;
|
||||
};
|
||||
|
||||
struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
|
||||
|
@ -30,6 +32,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
|
|||
const void *saddr, const void *daddr,
|
||||
in_port_t srcport, in_port_t dstport,
|
||||
const struct timespec *now);
|
||||
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
|
||||
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
|
||||
const struct timespec *now);
|
||||
|
||||
|
|
Loading…
Reference in a new issue