udp, udp_flow: Add instrumentation, handle EPOLLERR without queued errors
Link: https://github.com/containers/podman/issues/23686#issuecomment-2324945010 Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
d098e0527a
commit
0c6c20dee5
3 changed files with 68 additions and 14 deletions
72
udp.c
72
udp.c
|
@ -387,11 +387,11 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx,
|
||||||
* udp_sock_recverr() - Receive and clear an error from a socket
|
* udp_sock_recverr() - Receive and clear an error from a socket
|
||||||
* @s: Socket to receive from
|
* @s: Socket to receive from
|
||||||
*
|
*
|
||||||
* Return: true if errors received and processed, false if no more errors
|
* Return: ee_errno, 0 on empty queue
|
||||||
*
|
*
|
||||||
* #syscalls recvmsg
|
* #syscalls recvmsg
|
||||||
*/
|
*/
|
||||||
static bool udp_sock_recverr(int s)
|
static int udp_sock_recverr(int s)
|
||||||
{
|
{
|
||||||
const struct sock_extended_err *ee;
|
const struct sock_extended_err *ee;
|
||||||
const struct cmsghdr *hdr;
|
const struct cmsghdr *hdr;
|
||||||
|
@ -410,12 +410,13 @@ static bool udp_sock_recverr(int s)
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
if (errno != EAGAIN && errno != EWOULDBLOCK)
|
if (errno != EAGAIN && errno != EWOULDBLOCK)
|
||||||
err_perror("Failed to read error queue");
|
err_perror("Failed to read error queue");
|
||||||
return false;
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(mh.msg_flags & MSG_ERRQUEUE)) {
|
if (!(mh.msg_flags & MSG_ERRQUEUE)) {
|
||||||
err("Missing MSG_ERRQUEUE flag reading error queue");
|
err("Missing MSG_ERRQUEUE flag reading error queue");
|
||||||
return false;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
hdr = CMSG_FIRSTHDR(&mh);
|
hdr = CMSG_FIRSTHDR(&mh);
|
||||||
|
@ -424,7 +425,7 @@ static bool udp_sock_recverr(int s)
|
||||||
(hdr->cmsg_level == IPPROTO_IPV6 &&
|
(hdr->cmsg_level == IPPROTO_IPV6 &&
|
||||||
hdr->cmsg_type == IPV6_RECVERR))) {
|
hdr->cmsg_type == IPV6_RECVERR))) {
|
||||||
err("Unexpected cmsg reading error queue");
|
err("Unexpected cmsg reading error queue");
|
||||||
return false;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
|
ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
|
||||||
|
@ -433,7 +434,7 @@ static bool udp_sock_recverr(int s)
|
||||||
debug("%s error on UDP socket %i: %s",
|
debug("%s error on UDP socket %i: %s",
|
||||||
str_ee_origin(ee), s, strerror(ee->ee_errno));
|
str_ee_origin(ee), s, strerror(ee->ee_errno));
|
||||||
|
|
||||||
return true;
|
return ee->ee_errno;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -441,12 +442,15 @@ static bool udp_sock_recverr(int s)
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @s: Socket to receive from
|
* @s: Socket to receive from
|
||||||
* @events: epoll events bitmap
|
* @events: epoll events bitmap
|
||||||
* @mmh mmsghdr array to receive into
|
* @mmh: mmsghdr array to receive into
|
||||||
|
* @recv_err: Set to last error in queue. If none: -1 on EPOLLERR, 0 otherwise
|
||||||
|
*
|
||||||
|
* Return: count of datagrams received
|
||||||
*
|
*
|
||||||
* #syscalls recvmmsg arm:recvmmsg_time64 i686:recvmmsg_time64
|
* #syscalls recvmmsg arm:recvmmsg_time64 i686:recvmmsg_time64
|
||||||
*/
|
*/
|
||||||
static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
|
static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
|
||||||
struct mmsghdr *mmh)
|
struct mmsghdr *mmh, int *recv_err)
|
||||||
{
|
{
|
||||||
/* For not entirely clear reasons (data locality?) pasta gets better
|
/* For not entirely clear reasons (data locality?) pasta gets better
|
||||||
* throughput if we receive tap datagrams one at a atime. For small
|
* throughput if we receive tap datagrams one at a atime. For small
|
||||||
|
@ -461,8 +465,13 @@ static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
|
||||||
|
|
||||||
/* Clear any errors first */
|
/* Clear any errors first */
|
||||||
if (events & EPOLLERR) {
|
if (events & EPOLLERR) {
|
||||||
while (udp_sock_recverr(s))
|
bool found = false;
|
||||||
;
|
int ret;
|
||||||
|
|
||||||
|
while ((ret = udp_sock_recverr(s)))
|
||||||
|
found = true;
|
||||||
|
|
||||||
|
*recv_err = found ? ret : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(events & EPOLLIN))
|
if (!(events & EPOLLIN))
|
||||||
|
@ -490,9 +499,10 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||||
uint32_t events, const struct timespec *now)
|
uint32_t events, const struct timespec *now)
|
||||||
{
|
{
|
||||||
const socklen_t sasize = sizeof(udp_meta[0].s_in);
|
const socklen_t sasize = sizeof(udp_meta[0].s_in);
|
||||||
|
int recv_err = 0;
|
||||||
int n, i;
|
int n, i;
|
||||||
|
|
||||||
if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv)) <= 0)
|
if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv, &recv_err)) <= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* We divide datagrams into batches based on how we need to send them,
|
/* We divide datagrams into batches based on how we need to send them,
|
||||||
|
@ -562,11 +572,49 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||||
struct udp_flow *uflow = udp_at_sidx(ref.flowside);
|
struct udp_flow *uflow = udp_at_sidx(ref.flowside);
|
||||||
int from_s = uflow->s[ref.flowside.sidei];
|
int from_s = uflow->s[ref.flowside.sidei];
|
||||||
uint8_t topif = pif_at_sidx(tosidx);
|
uint8_t topif = pif_at_sidx(tosidx);
|
||||||
|
int recv_err = 0;
|
||||||
int n, i;
|
int n, i;
|
||||||
|
|
||||||
ASSERT(!c->no_udp && uflow);
|
ASSERT(!c->no_udp && uflow);
|
||||||
|
|
||||||
if ((n = udp_sock_recv(c, from_s, events, udp_mh_recv)) <= 0)
|
n = udp_sock_recv(c, from_s, events, udp_mh_recv, &recv_err);
|
||||||
|
if (recv_err == -1) {
|
||||||
|
struct flow_common *f = &uflow->f;
|
||||||
|
char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN];
|
||||||
|
char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN];
|
||||||
|
const struct flowside *ini = &f->side[INISIDE];
|
||||||
|
const struct flowside *tgt = &f->side[TGTSIDE];
|
||||||
|
|
||||||
|
flow_err(uflow, "EPOLLERR without error queue, closing flow");
|
||||||
|
err("Last recorded errno was: %i (%s)", uflow->last_errno,
|
||||||
|
strerror(uflow->last_errno));
|
||||||
|
|
||||||
|
flow_log_(f, LOG_ERR,
|
||||||
|
"%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu",
|
||||||
|
pif_name(f->pif[INISIDE]),
|
||||||
|
inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
|
||||||
|
ini->eport,
|
||||||
|
inany_ntop(&ini->oaddr, fstr0, sizeof(fstr0)),
|
||||||
|
ini->oport,
|
||||||
|
pif_name(f->pif[TGTSIDE]),
|
||||||
|
inany_ntop(&tgt->oaddr, fstr1, sizeof(fstr1)),
|
||||||
|
tgt->oport,
|
||||||
|
inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)),
|
||||||
|
tgt->eport);
|
||||||
|
|
||||||
|
udp_flow_close(c, uflow);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (recv_err) {
|
||||||
|
struct udp_flow *uflow = udp_at_sidx(udp_meta[0].tosidx);
|
||||||
|
|
||||||
|
uflow->last_errno = recv_err;
|
||||||
|
flow_err(uflow, "Recorded errno %i (%s)", recv_err,
|
||||||
|
strerror(recv_err));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n <= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
flow_trace(uflow, "Received %d datagrams on reply socket", n);
|
flow_trace(uflow, "Received %d datagrams on reply socket", n);
|
||||||
|
|
|
@ -34,12 +34,12 @@ struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
|
||||||
return &flow->udp;
|
return &flow->udp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
* udp_flow_close() - Close and clean up UDP flow
|
* udp_flow_close() - Close and clean up UDP flow
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @uflow: UDP flow
|
* @uflow: UDP flow
|
||||||
*/
|
*/
|
||||||
static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
|
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
|
||||||
{
|
{
|
||||||
if (uflow->s[INISIDE] >= 0) {
|
if (uflow->s[INISIDE] >= 0) {
|
||||||
/* The listening socket needs to stay in epoll */
|
/* The listening socket needs to stay in epoll */
|
||||||
|
@ -53,6 +53,9 @@ static void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
|
||||||
close(uflow->s[TGTSIDE]);
|
close(uflow->s[TGTSIDE]);
|
||||||
uflow->s[TGTSIDE] = -1;
|
uflow->s[TGTSIDE] = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uflow->last_errno = 0;
|
||||||
|
|
||||||
flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
|
flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
|
||||||
if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
|
if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
|
||||||
flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));
|
flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));
|
||||||
|
|
|
@ -19,6 +19,8 @@ struct udp_flow {
|
||||||
|
|
||||||
time_t ts;
|
time_t ts;
|
||||||
int s[SIDES];
|
int s[SIDES];
|
||||||
|
|
||||||
|
int last_errno;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
|
struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
|
||||||
|
@ -30,6 +32,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
|
||||||
const void *saddr, const void *daddr,
|
const void *saddr, const void *daddr,
|
||||||
in_port_t srcport, in_port_t dstport,
|
in_port_t srcport, in_port_t dstport,
|
||||||
const struct timespec *now);
|
const struct timespec *now);
|
||||||
|
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
|
||||||
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
|
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
|
||||||
const struct timespec *now);
|
const struct timespec *now);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue