From 16ae03260800b8044efa541edcf43d4fb83b740d Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 30 Nov 2023 13:02:08 +1100 Subject: [PATCH] flow, tcp: Generalise connection types Currently TCP connections use a 1-bit selector, 'spliced', to determine the rest of the contents of the structure. We want to generalise the TCP connection table to other types of flows in other protocols. Make a start on this by replacing the tcp_conn_common structure with a new flow_common structure with an enum rather than a simple boolean indicating the type of flow. Signed-off-by: David Gibson Signed-off-by: Stefano Brivio --- Makefile | 8 +++---- flow.c | 18 +++++++++++++++ flow.h | 36 ++++++++++++++++++++++++++++++ tcp.c | 63 +++++++++++++++++++++++++++++++++++++--------------- tcp_conn.h | 24 ++++++-------------- tcp_splice.c | 3 ++- 6 files changed, 112 insertions(+), 40 deletions(-) create mode 100644 flow.c create mode 100644 flow.h diff --git a/Makefile b/Makefile index 6c53695..e2970e3 100644 --- a/Makefile +++ b/Makefile @@ -44,15 +44,15 @@ FLAGS += -DARCH=\"$(TARGET_ARCH)\" FLAGS += -DVERSION=\"$(VERSION)\" FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS) -PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c icmp.c igmp.c \ - isolation.c lineread.c log.c mld.c ndp.c netlink.c packet.c passt.c \ - pasta.c pcap.c port_fwd.c tap.c tcp.c tcp_splice.c udp.c util.c +PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c icmp.c \ + igmp.c isolation.c lineread.c log.c mld.c ndp.c netlink.c packet.c \ + passt.c pasta.c pcap.c port_fwd.c tap.c tcp.c tcp_splice.c udp.c util.c QRAP_SRCS = qrap.c SRCS = $(PASST_SRCS) $(QRAP_SRCS) MANPAGES = passt.1 pasta.1 qrap.1 -PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \ +PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h icmp.h \ inany.h isolation.h lineread.h log.h ndp.h netlink.h packet.h passt.h \ pasta.h pcap.h pif.h port_fwd.h siphash.h tap.h tcp.h tcp_conn.h \ tcp_splice.h udp.h util.h diff --git a/flow.c b/flow.c new file mode 100644 index 0000000..b71358a --- /dev/null +++ b/flow.c @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson + * + * Tracking for logical "flows" of packets. + */ + +#include + +#include "flow.h" + +const char *flow_type_str[] = { + [FLOW_TYPE_NONE] = "", + [FLOW_TCP] = "TCP connection", + [FLOW_TCP_SPLICE] = "TCP connection (spliced)", +}; +static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES, + "flow_type_str[] doesn't match enum flow_type"); diff --git a/flow.h b/flow.h new file mode 100644 index 0000000..351837d --- /dev/null +++ b/flow.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson + * + * Tracking for logical "flows" of packets. + */ +#ifndef FLOW_H +#define FLOW_H + +/** + * enum flow_type - Different types of packet flows we track + */ +enum flow_type { + /* Represents an invalid or unused flow */ + FLOW_TYPE_NONE = 0, + /* A TCP connection between a socket and tap interface */ + FLOW_TCP, + /* A TCP connection between a host socket and ns socket */ + FLOW_TCP_SPLICE, + + FLOW_NUM_TYPES, +}; + +extern const char *flow_type_str[]; +#define FLOW_TYPE(f) \ + ((f)->type < FLOW_NUM_TYPES ? flow_type_str[(f)->type] : "?") + +/** + * struct flow_common - Common fields for packet flows + * @type: Type of packet flow + */ +struct flow_common { + uint8_t type; +}; + +#endif /* FLOW_H */ diff --git a/tcp.c b/tcp.c index e683dd9..8139dac 100644 --- a/tcp.c +++ b/tcp.c @@ -299,6 +299,7 @@ #include "tcp_splice.h" #include "log.h" #include "inany.h" +#include "flow.h" #include "tcp_conn.h" @@ -584,7 +585,7 @@ static inline struct tcp_tap_conn *conn_at_idx(int idx) { if ((idx < 0) || (idx >= TCP_MAX_CONNS)) return NULL; - ASSERT(!(CONN(idx)->c.spliced)); + ASSERT(CONN(idx)->f.type == FLOW_TCP); return CONN(idx); } @@ -1319,14 +1320,21 @@ void tcp_table_compact(struct ctx *c, union tcp_conn *hole) from = tc + c->tcp.conn_count; memcpy(hole, from, sizeof(*hole)); - if (from->c.spliced) - tcp_splice_conn_update(c, &hole->splice); - else + switch (from->f.type) { + case FLOW_TCP: tcp_tap_conn_update(c, &from->tap, &hole->tap); + break; + case FLOW_TCP_SPLICE: + tcp_splice_conn_update(c, &hole->splice); + break; + default: + die("Unexpected %s in tcp_table_compact()", + FLOW_TYPE(&from->f)); + } - debug("TCP: table compaction (spliced=%d): old index %li, new index %li, " + debug("TCP: table compaction (%s): old index %li, new index %li, " "from: %p, to: %p", - from->c.spliced, CONN_IDX(from), CONN_IDX(hole), + FLOW_TYPE(&from->f), CONN_IDX(from), CONN_IDX(hole), (void *)from, (void *)hole); memset(from, 0, sizeof(*from)); @@ -1402,12 +1410,18 @@ void tcp_defer_handler(struct ctx *c) tcp_l2_data_buf_flush(c); for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) { - if (conn->c.spliced) { - if (conn->splice.flags & CLOSING) - tcp_splice_destroy(c, conn); - } else { + switch (conn->f.type) { + case FLOW_TCP: if (conn->tap.events == CLOSED) tcp_conn_destroy(c, conn); + break; + case FLOW_TCP_SPLICE: + if (conn->splice.flags & CLOSING) + tcp_splice_destroy(c, conn); + break; + default: + die("Unexpected %s in tcp_defer_handler()", + FLOW_TYPE(&conn->f)); } } } @@ -2016,7 +2030,7 @@ static void tcp_conn_from_tap(struct ctx *c, } conn = CONN(c->tcp.conn_count++); - conn->c.spliced = false; + conn->f.type = FLOW_TCP; conn->sock = s; conn->timer = -1; conn_event(c, conn, TAP_SYN_RCVD); @@ -2726,7 +2740,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c, const struct sockaddr *sa, const struct timespec *now) { - conn->c.spliced = false; + conn->f.type = FLOW_TCP; conn->sock = s; conn->timer = -1; conn->ws_to_tap = conn->ws_from_tap = 0; @@ -2909,10 +2923,17 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events) { union tcp_conn *conn = tc + ref.tcp.index; - if (conn->c.spliced) - tcp_splice_sock_handler(c, &conn->splice, ref.fd, events); - else + switch (conn->f.type) { + case FLOW_TCP: tcp_tap_sock_handler(c, &conn->tap, events); + break; + case FLOW_TCP_SPLICE: + tcp_splice_sock_handler(c, &conn->splice, ref.fd, events); + break; + default: + die("Unexpected %s in tcp_sock_handler_compact()", + FLOW_TYPE(&conn->f)); + } } /** @@ -3244,11 +3265,17 @@ void tcp_timer(struct ctx *c, const struct timespec *ts) } for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) { - if (conn->c.spliced) { - tcp_splice_timer(c, conn); - } else { + switch (conn->f.type) { + case FLOW_TCP: if (conn->tap.events == CLOSED) tcp_conn_destroy(c, conn); + break; + case FLOW_TCP_SPLICE: + tcp_splice_timer(c, conn); + break; + default: + die("Unexpected %s in tcp_timer()", + FLOW_TYPE(&conn->f)); } } diff --git a/tcp_conn.h b/tcp_conn.h index 0c6a35b..136f8da 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -9,19 +9,9 @@ #ifndef TCP_CONN_H #define TCP_CONN_H -/** - * struct tcp_conn_common - Common fields for spliced and non-spliced - * @spliced: Is this a spliced connection? - */ -struct tcp_conn_common { - bool spliced :1; -}; - -extern const char *tcp_common_flag_str[]; - /** * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced) - * @c: Fields common with tcp_splice_conn + * @f: Generic flow information * @in_epoll: Is the connection in the epoll set? * @next_index: Connection index of next item in hash chain, -1 for none * @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS @@ -46,8 +36,8 @@ extern const char *tcp_common_flag_str[]; * @seq_init_from_tap: Initial sequence number from tap */ struct tcp_tap_conn { - /* Must be first element to match tcp_splice_conn */ - struct tcp_conn_common c; + /* Must be first element */ + struct flow_common f; bool in_epoll :1; int next_index :TCP_CONN_INDEX_BITS + 2; @@ -121,7 +111,7 @@ struct tcp_tap_conn { #define SIDES 2 /** * struct tcp_splice_conn - Descriptor for a spliced TCP connection - * @c: Fields common with tcp_tap_conn + * @f: Generic flow information * @in_epoll: Is the connection in the epoll set? * @s: File descriptor for sockets * @pipe: File descriptors for pipes @@ -131,8 +121,8 @@ struct tcp_tap_conn { * @written: Bytes written (not fully written from one other side read) */ struct tcp_splice_conn { - /* Must be first element to match tcp_tap_conn */ - struct tcp_conn_common c; + /* Must be first element */ + struct flow_common f; bool in_epoll :1; int s[SIDES]; @@ -168,7 +158,7 @@ struct tcp_splice_conn { * @splice: Fields specific to spliced connections */ union tcp_conn { - struct tcp_conn_common c; + struct flow_common f; struct tcp_tap_conn tap; struct tcp_splice_conn splice; }; diff --git a/tcp_splice.c b/tcp_splice.c index 0a23584..c55c491 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -54,6 +54,7 @@ #include "tcp_splice.h" #include "siphash.h" #include "inany.h" +#include "flow.h" #include "tcp_conn.h" @@ -476,7 +477,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, if (setsockopt(s, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int))) trace("TCP (spliced): failed to set TCP_QUICKACK on %i", s); - conn->c.spliced = true; + conn->f.type = FLOW_TCP_SPLICE; conn->s[0] = s; if (tcp_splice_new(c, conn, ref.port, ref.pif))