tcp: Use runtime tests for TCP_INFO fields

In order to use particular fields from the TCP_INFO getsockopt() we
need them to be in structure returned by the runtime kernel.  We attempt
to determine that with the HAS_BYTES_ACKED and HAS_MIN_RTT defines, probed
in the Makefile.

However, that's not correct, because the kernel headers we compile against
may not be the same as the runtime kernel.  We instead should check against
the size of structure returned from the TCP_INFO getsockopt() as we already
do for tcpi_snd_wnd.  Switch from the compile time flags to a runtime
test.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2024-10-24 15:59:22 +11:00 committed by Stefano Brivio
parent 81143813a6
commit e7fcd0c348
2 changed files with 26 additions and 36 deletions

View file

@ -67,16 +67,6 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
udp.h udp_flow.h util.h udp.h udp_flow.h util.h
HEADERS = $(PASST_HEADERS) seccomp.h HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_bytes_acked = 0 };
ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
FLAGS += -DHAS_BYTES_ACKED
endif
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_min_rtt = 0 };
ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
FLAGS += -DHAS_MIN_RTT
endif
C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);} C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);}
ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0) ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
FLAGS += -DHAS_GETRANDOM FLAGS += -DHAS_GETRANDOM

26
tcp.c
View file

@ -370,6 +370,10 @@ socklen_t tcp_info_size;
/* Kernel reports sending window in TCP_INFO (kernel commit 8f7baad7f035) */ /* Kernel reports sending window in TCP_INFO (kernel commit 8f7baad7f035) */
#define snd_wnd_cap tcp_info_cap(snd_wnd) #define snd_wnd_cap tcp_info_cap(snd_wnd)
/* Kernel reports bytes acked in TCP_INFO (kernel commit 0df48c26d84) */
#define bytes_acked_cap tcp_info_cap(bytes_acked)
/* Kernel reports minimum RTT in TCP_INFO (kernel commit cd9b266095f4) */
#define min_rtt_cap tcp_info_cap(min_rtt)
/* sendmsg() to socket */ /* sendmsg() to socket */
static struct iovec tcp_iov [UIO_MAXIOV]; static struct iovec tcp_iov [UIO_MAXIOV];
@ -677,11 +681,10 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn, static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
const struct tcp_info_linux *tinfo) const struct tcp_info_linux *tinfo)
{ {
#ifdef HAS_MIN_RTT
const struct flowside *tapside = TAPFLOW(conn); const struct flowside *tapside = TAPFLOW(conn);
int i, hole = -1; int i, hole = -1;
if (!tinfo->tcpi_min_rtt || if (!min_rtt_cap ||
(int)tinfo->tcpi_min_rtt > LOW_RTT_THRESHOLD) (int)tinfo->tcpi_min_rtt > LOW_RTT_THRESHOLD)
return; return;
@ -702,10 +705,6 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
if (hole == LOW_RTT_TABLE_SIZE) if (hole == LOW_RTT_TABLE_SIZE)
hole = 0; hole = 0;
inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any); inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any);
#else
(void)conn;
(void)tinfo;
#endif /* HAS_MIN_RTT */
} }
/** /**
@ -1121,15 +1120,14 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
uint32_t new_wnd_to_tap = prev_wnd_to_tap; uint32_t new_wnd_to_tap = prev_wnd_to_tap;
int s = conn->sock; int s = conn->sock;
#ifndef HAS_BYTES_ACKED if (!bytes_acked_cap) {
(void)force_seq;
conn->seq_ack_to_tap = conn->seq_from_tap; conn->seq_ack_to_tap = conn->seq_from_tap;
if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap)) if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
conn->seq_ack_to_tap = prev_ack_to_tap; conn->seq_ack_to_tap = prev_ack_to_tap;
#else } else {
if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL || tcp_rtt_dst_low(conn) if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL ||
|| CONN_IS_CLOSING(conn) || (conn->flags & LOCAL) || force_seq) { tcp_rtt_dst_low(conn) || CONN_IS_CLOSING(conn) ||
(conn->flags & LOCAL) || force_seq) {
conn->seq_ack_to_tap = conn->seq_from_tap; conn->seq_ack_to_tap = conn->seq_from_tap;
} else if (conn->seq_ack_to_tap != conn->seq_from_tap) { } else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
if (!tinfo) { if (!tinfo) {
@ -1144,7 +1142,7 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap)) if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
conn->seq_ack_to_tap = prev_ack_to_tap; conn->seq_ack_to_tap = prev_ack_to_tap;
} }
#endif /* !HAS_BYTES_ACKED */ }
if (!snd_wnd_cap) { if (!snd_wnd_cap) {
tcp_get_sndbuf(conn); tcp_get_sndbuf(conn);
@ -2641,6 +2639,8 @@ int tcp_init(struct ctx *c)
#define dbg_tcpi(f_) debug("TCP_INFO tcpi_%s field%s supported", \ #define dbg_tcpi(f_) debug("TCP_INFO tcpi_%s field%s supported", \
STRINGIFY(f_), tcp_info_cap(f_) ? " " : " not ") STRINGIFY(f_), tcp_info_cap(f_) ? " " : " not ")
dbg_tcpi(snd_wnd); dbg_tcpi(snd_wnd);
dbg_tcpi(bytes_acked);
dbg_tcpi(min_rtt);
#undef dbg_tcpi #undef dbg_tcpi
return 0; return 0;