flow, tcp: Move TCP connection table to unified flow table

We want to generalise "connection" tracking to things other than true TCP
connections.  Continue implenenting this by renaming the TCP connection
table to the "flow table" and moving it to flow.c.  The definitions are
split between flow.h and flow_table.h - we need this separation to avoid
circular dependencies: the definitions in flow.h will be needed by many
headers using the flow mechanism, but flow_table.h needs all those protocol
specific headers in order to define the full flow table entry.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2023-11-30 13:02:09 +11:00 committed by Stefano Brivio
parent 16ae032608
commit f08ce92a13
9 changed files with 107 additions and 82 deletions

View file

@ -52,10 +52,10 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS)
MANPAGES = passt.1 pasta.1 qrap.1 MANPAGES = passt.1 pasta.1 qrap.1
PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h icmp.h \ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h \
inany.h isolation.h lineread.h log.h ndp.h netlink.h packet.h passt.h \ flow_table.h icmp.h inany.h isolation.h lineread.h log.h ndp.h \
pasta.h pcap.h pif.h port_fwd.h siphash.h tap.h tcp.h tcp_conn.h \ netlink.h packet.h passt.h pasta.h pcap.h pif.h port_fwd.h siphash.h \
tcp_splice.h udp.h util.h tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
HEADERS = $(PASST_HEADERS) seccomp.h HEADERS = $(PASST_HEADERS) seccomp.h
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 }; C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };

11
flow.c
View file

@ -6,8 +6,16 @@
*/ */
#include <stdint.h> #include <stdint.h>
#include <unistd.h>
#include <string.h>
#include "util.h"
#include "passt.h"
#include "siphash.h"
#include "inany.h"
#include "flow.h" #include "flow.h"
#include "tcp_conn.h"
#include "flow_table.h"
const char *flow_type_str[] = { const char *flow_type_str[] = {
[FLOW_TYPE_NONE] = "<none>", [FLOW_TYPE_NONE] = "<none>",
@ -16,3 +24,6 @@ const char *flow_type_str[] = {
}; };
static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES, static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
"flow_type_str[] doesn't match enum flow_type"); "flow_type_str[] doesn't match enum flow_type");
/* Global Flow Table */
union flow flowtab[FLOW_MAX];

8
flow.h
View file

@ -33,4 +33,12 @@ struct flow_common {
uint8_t type; uint8_t type;
}; };
#define FLOW_INDEX_BITS 17 /* 128k - 1 */
#define FLOW_MAX MAX_FROM_BITS(FLOW_INDEX_BITS)
#define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
#define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
union flow;
#endif /* FLOW_H */ #endif /* FLOW_H */

25
flow_table.h Normal file
View file

@ -0,0 +1,25 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright Red Hat
* Author: David Gibson <david@gibson.dropbear.id.au>
*
* Definitions for the global table of packet flows.
*/
#ifndef FLOW_TABLE_H
#define FLOW_TABLE_H
/**
* union flow - Descriptor for a logical packet flow (e.g. connection)
* @f: Fields common between all variants
* @tcp: Fields for non-spliced TCP connections
* @tcp_splice: Fields for spliced TCP connections
*/
union flow {
struct flow_common f;
struct tcp_tap_conn tcp;
struct tcp_splice_conn tcp_splice;
};
/* Global Flow Table */
extern union flow flowtab[];
#endif /* FLOW_TABLE_H */

View file

@ -219,6 +219,7 @@ struct ip6_ctx {
* @pasta_conf_ns: Configure namespace after creating it * @pasta_conf_ns: Configure namespace after creating it
* @no_copy_routes: Don't copy all routes when configuring target namespace * @no_copy_routes: Don't copy all routes when configuring target namespace
* @no_copy_addrs: Don't copy all addresses when configuring namespace * @no_copy_addrs: Don't copy all addresses when configuring namespace
* @flow_count: Number of tracked packet flows (connections etc.)
* @no_tcp: Disable TCP operation * @no_tcp: Disable TCP operation
* @tcp: Context for TCP protocol handler * @tcp: Context for TCP protocol handler
* @no_tcp: Disable UDP operation * @no_tcp: Disable UDP operation
@ -277,6 +278,8 @@ struct ctx {
int no_copy_routes; int no_copy_routes;
int no_copy_addrs; int no_copy_addrs;
unsigned flow_count;
int no_tcp; int no_tcp;
struct tcp_ctx tcp; struct tcp_ctx tcp;
int no_udp; int no_udp;

87
tcp.c
View file

@ -302,14 +302,14 @@
#include "flow.h" #include "flow.h"
#include "tcp_conn.h" #include "tcp_conn.h"
#include "flow_table.h"
#define TCP_FRAMES_MEM 128 #define TCP_FRAMES_MEM 128
#define TCP_FRAMES \ #define TCP_FRAMES \
(c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1) (c->mode == MODE_PASST ? TCP_FRAMES_MEM : 1)
#define TCP_HASH_TABLE_LOAD 70 /* % */ #define TCP_HASH_TABLE_LOAD 70 /* % */
#define TCP_HASH_TABLE_SIZE (TCP_MAX_CONNS * 100 / \ #define TCP_HASH_TABLE_SIZE (FLOW_MAX * 100 / TCP_HASH_TABLE_LOAD)
TCP_HASH_TABLE_LOAD)
#define MAX_WS 8 #define MAX_WS 8
#define MAX_WINDOW (1 << (16 + (MAX_WS))) #define MAX_WINDOW (1 << (16 + (MAX_WS)))
@ -570,11 +570,8 @@ tcp6_l2_flags_buf[TCP_FRAMES_MEM];
static unsigned int tcp6_l2_flags_buf_used; static unsigned int tcp6_l2_flags_buf_used;
/* TCP connections */ #define CONN(idx) (&flowtab[(idx)].tcp)
union tcp_conn tc[TCP_MAX_CONNS]; #define CONN_IDX(conn) ((union flow *)(conn) - flowtab)
#define CONN(idx) (&tc[(idx)].tap)
#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc)
/** conn_at_idx() - Find a connection by index, if present /** conn_at_idx() - Find a connection by index, if present
* @idx: Index of connection to lookup * @idx: Index of connection to lookup
@ -583,7 +580,7 @@ union tcp_conn tc[TCP_MAX_CONNS];
*/ */
static inline struct tcp_tap_conn *conn_at_idx(int idx) static inline struct tcp_tap_conn *conn_at_idx(int idx)
{ {
if ((idx < 0) || (idx >= TCP_MAX_CONNS)) if ((idx < 0) || (idx >= FLOW_MAX))
return NULL; return NULL;
ASSERT(CONN(idx)->f.type == FLOW_TCP); ASSERT(CONN(idx)->f.type == FLOW_TCP);
return CONN(idx); return CONN(idx);
@ -1306,26 +1303,26 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c,
* @c: Execution context * @c: Execution context
* @hole: Pointer to recently closed connection * @hole: Pointer to recently closed connection
*/ */
void tcp_table_compact(struct ctx *c, union tcp_conn *hole) void tcp_table_compact(struct ctx *c, union flow *hole)
{ {
union tcp_conn *from; union flow *from;
if (CONN_IDX(hole) == --c->tcp.conn_count) { if (CONN_IDX(hole) == --c->flow_count) {
debug("TCP: table compaction: maximum index was %li (%p)", debug("TCP: table compaction: maximum index was %li (%p)",
CONN_IDX(hole), (void *)hole); CONN_IDX(hole), (void *)hole);
memset(hole, 0, sizeof(*hole)); memset(hole, 0, sizeof(*hole));
return; return;
} }
from = tc + c->tcp.conn_count; from = flowtab + c->flow_count;
memcpy(hole, from, sizeof(*hole)); memcpy(hole, from, sizeof(*hole));
switch (from->f.type) { switch (from->f.type) {
case FLOW_TCP: case FLOW_TCP:
tcp_tap_conn_update(c, &from->tap, &hole->tap); tcp_tap_conn_update(c, &from->tcp, &hole->tcp);
break; break;
case FLOW_TCP_SPLICE: case FLOW_TCP_SPLICE:
tcp_splice_conn_update(c, &hole->splice); tcp_splice_conn_update(c, &hole->tcp_splice);
break; break;
default: default:
die("Unexpected %s in tcp_table_compact()", die("Unexpected %s in tcp_table_compact()",
@ -1343,18 +1340,18 @@ void tcp_table_compact(struct ctx *c, union tcp_conn *hole)
/** /**
* tcp_conn_destroy() - Close sockets, trigger hash table removal and compaction * tcp_conn_destroy() - Close sockets, trigger hash table removal and compaction
* @c: Execution context * @c: Execution context
* @conn_union: Connection pointer (container union) * @flow: Flow table entry for this connection
*/ */
static void tcp_conn_destroy(struct ctx *c, union tcp_conn *conn_union) static void tcp_conn_destroy(struct ctx *c, union flow *flow)
{ {
const struct tcp_tap_conn *conn = &conn_union->tap; const struct tcp_tap_conn *conn = &flow->tcp;
close(conn->sock); close(conn->sock);
if (conn->timer != -1) if (conn->timer != -1)
close(conn->timer); close(conn->timer);
tcp_hash_remove(c, conn); tcp_hash_remove(c, conn);
tcp_table_compact(c, conn_union); tcp_table_compact(c, flow);
} }
static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn); static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
@ -1404,24 +1401,24 @@ static void tcp_l2_data_buf_flush(const struct ctx *c)
*/ */
void tcp_defer_handler(struct ctx *c) void tcp_defer_handler(struct ctx *c)
{ {
union tcp_conn *conn; union flow *flow;
tcp_l2_flags_buf_flush(c); tcp_l2_flags_buf_flush(c);
tcp_l2_data_buf_flush(c); tcp_l2_data_buf_flush(c);
for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) { for (flow = flowtab + c->flow_count - 1; flow >= flowtab; flow--) {
switch (conn->f.type) { switch (flow->f.type) {
case FLOW_TCP: case FLOW_TCP:
if (conn->tap.events == CLOSED) if (flow->tcp.events == CLOSED)
tcp_conn_destroy(c, conn); tcp_conn_destroy(c, flow);
break; break;
case FLOW_TCP_SPLICE: case FLOW_TCP_SPLICE:
if (conn->splice.flags & CLOSING) if (flow->tcp_splice.flags & CLOSING)
tcp_splice_destroy(c, conn); tcp_splice_destroy(c, flow);
break; break;
default: default:
die("Unexpected %s in tcp_defer_handler()", die("Unexpected %s in tcp_defer_handler()",
FLOW_TYPE(&conn->f)); FLOW_TYPE(&flow->f));
} }
} }
} }
@ -2003,7 +2000,7 @@ static void tcp_conn_from_tap(struct ctx *c,
(void)saddr; (void)saddr;
if (c->tcp.conn_count >= TCP_MAX_CONNS) if (c->flow_count >= FLOW_MAX)
return; return;
if ((s = tcp_conn_pool_sock(pool)) < 0) if ((s = tcp_conn_pool_sock(pool)) < 0)
@ -2029,7 +2026,7 @@ static void tcp_conn_from_tap(struct ctx *c,
} }
} }
conn = CONN(c->tcp.conn_count++); conn = CONN(c->flow_count++);
conn->f.type = FLOW_TCP; conn->f.type = FLOW_TCP;
conn->sock = s; conn->sock = s;
conn->timer = -1; conn->timer = -1;
@ -2775,24 +2772,24 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
{ {
struct sockaddr_storage sa; struct sockaddr_storage sa;
socklen_t sl = sizeof(sa); socklen_t sl = sizeof(sa);
union tcp_conn *conn; union flow *flow;
int s; int s;
if (c->no_tcp || c->tcp.conn_count >= TCP_MAX_CONNS) if (c->no_tcp || c->flow_count >= FLOW_MAX)
return; return;
s = accept4(ref.fd, (struct sockaddr *)&sa, &sl, SOCK_NONBLOCK); s = accept4(ref.fd, (struct sockaddr *)&sa, &sl, SOCK_NONBLOCK);
if (s < 0) if (s < 0)
return; return;
conn = tc + c->tcp.conn_count++; flow = flowtab + c->flow_count++;
if (c->mode == MODE_PASTA && if (c->mode == MODE_PASTA &&
tcp_splice_conn_from_sock(c, ref.tcp_listen, &conn->splice, tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice,
s, (struct sockaddr *)&sa)) s, (struct sockaddr *)&sa))
return; return;
tcp_tap_conn_from_sock(c, ref.tcp_listen, &conn->tap, s, tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s,
(struct sockaddr *)&sa, now); (struct sockaddr *)&sa, now);
} }
@ -2921,18 +2918,18 @@ static void tcp_tap_sock_handler(struct ctx *c, struct tcp_tap_conn *conn,
*/ */
void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events) void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
{ {
union tcp_conn *conn = tc + ref.tcp.index; union flow *flow = flowtab + ref.tcp.index;
switch (conn->f.type) { switch (flow->f.type) {
case FLOW_TCP: case FLOW_TCP:
tcp_tap_sock_handler(c, &conn->tap, events); tcp_tap_sock_handler(c, &flow->tcp, events);
break; break;
case FLOW_TCP_SPLICE: case FLOW_TCP_SPLICE:
tcp_splice_sock_handler(c, &conn->splice, ref.fd, events); tcp_splice_sock_handler(c, &flow->tcp_splice, ref.fd, events);
break; break;
default: default:
die("Unexpected %s in tcp_sock_handler_compact()", die("Unexpected %s in tcp_sock_handler_compact()",
FLOW_TYPE(&conn->f)); FLOW_TYPE(&flow->f));
} }
} }
@ -3248,7 +3245,7 @@ static int tcp_port_rebind_outbound(void *arg)
*/ */
void tcp_timer(struct ctx *c, const struct timespec *ts) void tcp_timer(struct ctx *c, const struct timespec *ts)
{ {
union tcp_conn *conn; union flow *flow;
(void)ts; (void)ts;
@ -3264,18 +3261,18 @@ void tcp_timer(struct ctx *c, const struct timespec *ts)
} }
} }
for (conn = tc + c->tcp.conn_count - 1; conn >= tc; conn--) { for (flow = flowtab + c->flow_count - 1; flow >= flowtab; flow--) {
switch (conn->f.type) { switch (flow->f.type) {
case FLOW_TCP: case FLOW_TCP:
if (conn->tap.events == CLOSED) if (flow->tcp.events == CLOSED)
tcp_conn_destroy(c, conn); tcp_conn_destroy(c, flow);
break; break;
case FLOW_TCP_SPLICE: case FLOW_TCP_SPLICE:
tcp_splice_timer(c, conn); tcp_splice_timer(c, flow);
break; break;
default: default:
die("Unexpected %s in tcp_timer()", die("Unexpected %s in tcp_timer()",
FLOW_TYPE(&conn->f)); FLOW_TYPE(&flow->f));
} }
} }

5
tcp.h
View file

@ -8,9 +8,6 @@
#define TCP_TIMER_INTERVAL 1000 /* ms */ #define TCP_TIMER_INTERVAL 1000 /* ms */
#define TCP_CONN_INDEX_BITS 17 /* 128k - 1 */
#define TCP_MAX_CONNS MAX_FROM_BITS(TCP_CONN_INDEX_BITS)
struct ctx; struct ctx;
void tcp_timer_handler(struct ctx *c, union epoll_ref ref); void tcp_timer_handler(struct ctx *c, union epoll_ref ref);
@ -56,7 +53,6 @@ union tcp_listen_epoll_ref {
/** /**
* struct tcp_ctx - Execution context for TCP routines * struct tcp_ctx - Execution context for TCP routines
* @hash_secret: 128-bit secret for hash functions, ISN and hash table * @hash_secret: 128-bit secret for hash functions, ISN and hash table
* @conn_count: Count of total connections in connection table
* @port_to_tap: Ports bound host-side, packets to tap or spliced * @port_to_tap: Ports bound host-side, packets to tap or spliced
* @fwd_in: Port forwarding configuration for inbound packets * @fwd_in: Port forwarding configuration for inbound packets
* @fwd_out: Port forwarding configuration for outbound packets * @fwd_out: Port forwarding configuration for outbound packets
@ -66,7 +62,6 @@ union tcp_listen_epoll_ref {
*/ */
struct tcp_ctx { struct tcp_ctx {
uint64_t hash_secret[2]; uint64_t hash_secret[2];
int conn_count;
struct port_fwd fwd_in; struct port_fwd fwd_in;
struct port_fwd fwd_out; struct port_fwd fwd_out;
struct timespec timer_run; struct timespec timer_run;

View file

@ -40,7 +40,7 @@ struct tcp_tap_conn {
struct flow_common f; struct flow_common f;
bool in_epoll :1; bool in_epoll :1;
int next_index :TCP_CONN_INDEX_BITS + 2; int next_index :FLOW_INDEX_BITS + 2;
#define TCP_RETRANS_BITS 3 #define TCP_RETRANS_BITS 3
unsigned int retrans :TCP_RETRANS_BITS; unsigned int retrans :TCP_RETRANS_BITS;
@ -151,21 +151,6 @@ struct tcp_splice_conn {
uint32_t written[SIDES]; uint32_t written[SIDES];
}; };
/**
* union tcp_conn - Descriptor for a TCP connection (spliced or non-spliced)
* @c: Fields common between all variants
* @tap: Fields specific to non-spliced connections
* @splice: Fields specific to spliced connections
*/
union tcp_conn {
struct flow_common f;
struct tcp_tap_conn tap;
struct tcp_splice_conn splice;
};
/* TCP connections */
extern union tcp_conn tc[];
/* Socket pools */ /* Socket pools */
#define TCP_SOCK_POOL_SIZE 32 #define TCP_SOCK_POOL_SIZE 32
@ -173,9 +158,9 @@ extern int init_sock_pool4 [TCP_SOCK_POOL_SIZE];
extern int init_sock_pool6 [TCP_SOCK_POOL_SIZE]; extern int init_sock_pool6 [TCP_SOCK_POOL_SIZE];
void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new); void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new);
void tcp_table_compact(struct ctx *c, union tcp_conn *hole); void tcp_table_compact(struct ctx *c, union flow *hole);
void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union); void tcp_splice_destroy(struct ctx *c, union flow *flow);
void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union); void tcp_splice_timer(struct ctx *c, union flow *flow);
int tcp_conn_pool_sock(int pool[]); int tcp_conn_pool_sock(int pool[]);
int tcp_conn_new_sock(const struct ctx *c, sa_family_t af); int tcp_conn_new_sock(const struct ctx *c, sa_family_t af);
void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af); void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af);

View file

@ -57,6 +57,7 @@
#include "flow.h" #include "flow.h"
#include "tcp_conn.h" #include "tcp_conn.h"
#include "flow_table.h"
#define MAX_PIPE_SIZE (8UL * 1024 * 1024) #define MAX_PIPE_SIZE (8UL * 1024 * 1024)
#define TCP_SPLICE_PIPE_POOL_SIZE 32 #define TCP_SPLICE_PIPE_POOL_SIZE 32
@ -76,7 +77,7 @@ static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2];
#define CONN_V4(x) (!CONN_V6(x)) #define CONN_V4(x) (!CONN_V6(x))
#define CONN_HAS(conn, set) ((conn->events & (set)) == (set)) #define CONN_HAS(conn, set) ((conn->events & (set)) == (set))
#define CONN(idx) (&tc[(idx)].splice) #define CONN(idx) (&tc[(idx)].splice)
#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc) #define CONN_IDX(conn) ((union flow *)(conn) - flowtab)
/* Display strings for connection events */ /* Display strings for connection events */
static const char *tcp_splice_event_str[] __attribute((__unused__)) = { static const char *tcp_splice_event_str[] __attribute((__unused__)) = {
@ -254,11 +255,11 @@ void tcp_splice_conn_update(const struct ctx *c, struct tcp_splice_conn *new)
/** /**
* tcp_splice_destroy() - Close spliced connection and pipes, clear * tcp_splice_destroy() - Close spliced connection and pipes, clear
* @c: Execution context * @c: Execution context
* @conn_union: Spliced connection (container union) * @flow: Flow table entry
*/ */
void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union) void tcp_splice_destroy(struct ctx *c, union flow *flow)
{ {
struct tcp_splice_conn *conn = &conn_union->splice; struct tcp_splice_conn *conn = &flow->tcp_splice;
int side; int side;
for (side = 0; side < SIDES; side++) { for (side = 0; side < SIDES; side++) {
@ -283,7 +284,7 @@ void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union)
conn->flags = 0; conn->flags = 0;
debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn)); debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn));
tcp_table_compact(c, conn_union); tcp_table_compact(c, flow);
} }
/** /**
@ -775,15 +776,15 @@ void tcp_splice_init(struct ctx *c)
/** /**
* tcp_splice_timer() - Timer for spliced connections * tcp_splice_timer() - Timer for spliced connections
* @c: Execution context * @c: Execution context
* @conn_union: Spliced connection (container union) * @flow: Flow table entry
*/ */
void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union) void tcp_splice_timer(struct ctx *c, union flow *flow)
{ {
struct tcp_splice_conn *conn = &conn_union->splice; struct tcp_splice_conn *conn = &flow->tcp_splice;
int side; int side;
if (conn->flags & CLOSING) { if (conn->flags & CLOSING) {
tcp_splice_destroy(c, conn_union); tcp_splice_destroy(c, flow);
return; return;
} }