diff --git a/tcp.c b/tcp.c index 90eb840..61700d6 100644 --- a/tcp.c +++ b/tcp.c @@ -98,11 +98,11 @@ * Connection tracking and storage * ------------------------------- * - * Connections are tracked by the @tc array of struct tcp_tap_conn, containing - * addresses, ports, TCP states and parameters. This is statically allocated and - * indexed by an arbitrary connection number. The array is compacted whenever a - * connection is closed, by remapping the highest connection index in use to the - * one freed up. + * Connections are tracked by struct tcp_tap_conn entries in the @tc + * array, containing addresses, ports, TCP states and parameters. This + * is statically allocated and indexed by an arbitrary connection + * number. The array is compacted whenever a connection is closed, by + * remapping the highest connection index in use to the one freed up. * * References used for the epoll interface report the connection index used for * the @tc array. @@ -588,10 +588,10 @@ static unsigned int tcp6_l2_flags_buf_used; static size_t tcp6_l2_flags_buf_bytes; /* TCP connections */ -static struct tcp_tap_conn tc[TCP_MAX_CONNS]; +union tcp_conn tc[TCP_MAX_CONNS]; -#define CONN(index) (tc + (index)) -#define CONN_IDX(conn) ((conn) - tc) +#define CONN(index) (&tc[(index)].tap) +#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc) /** conn_at_idx() - Find a connection by index, if present * @index: Index of connection to lookup @@ -1351,26 +1351,28 @@ static struct tcp_tap_conn *tcp_hash_lookup(const struct ctx *c, * @c: Execution context * @hole: Pointer to recently closed connection */ -static void tcp_table_compact(struct ctx *c, struct tcp_tap_conn *hole) +void tcp_table_compact(struct ctx *c, union tcp_conn *hole) { - struct tcp_tap_conn *from, *to; + union tcp_conn *from; if (CONN_IDX(hole) == --c->tcp.conn_count) { - debug("TCP: hash table compaction: maximum index was %li (%p)", + debug("TCP: table compaction: maximum index was %li (%p)", CONN_IDX(hole), hole); memset(hole, 0, sizeof(*hole)); return; } - from = CONN(c->tcp.conn_count); + from = tc + c->tcp.conn_count; memcpy(hole, from, sizeof(*hole)); - to = hole; - tcp_tap_conn_update(c, from, to); + if (from->c.spliced) + tcp_splice_conn_update(c, &hole->splice); + else + tcp_tap_conn_update(c, &from->tap, &hole->tap); - debug("TCP: hash table compaction: old index %li, new index %li, " - "sock %i, from: %p, to: %p", - CONN_IDX(from), CONN_IDX(to), from->sock, from, to); + debug("TCP: table compaction (spliced=%d): old index %li, new index %li, " + "from: %p, to: %p", + from->c.spliced, CONN_IDX(from), CONN_IDX(hole), from, hole); memset(from, 0, sizeof(*from)); } @@ -1387,7 +1389,7 @@ static void tcp_conn_destroy(struct ctx *c, struct tcp_tap_conn *conn) close(conn->timer); tcp_hash_remove(conn); - tcp_table_compact(c, conn); + tcp_table_compact(c, (union tcp_conn *)conn); } static void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn); @@ -1535,7 +1537,9 @@ void tcp_defer_handler(struct ctx *c) if (c->tcp.conn_count < MIN(max_files, max_conns)) return; - for (conn = CONN(c->tcp.conn_count - 1); conn >= tc; conn--) { + for (conn = CONN(c->tcp.conn_count - 1); conn >= CONN(0); conn--) { + if (conn->c.spliced) + continue; if (conn->events == CLOSED) tcp_conn_destroy(c, conn); } @@ -3433,7 +3437,9 @@ void tcp_timer(struct ctx *c, const struct timespec *ts) } } - for (conn = CONN(c->tcp.conn_count - 1); conn >= tc; conn--) { + for (conn = CONN(c->tcp.conn_count - 1); conn >= CONN(0); conn--) { + if (conn->c.spliced) + continue; if (conn->events == CLOSED) tcp_conn_destroy(c, conn); } diff --git a/tcp.h b/tcp.h index bba0f38..49738ef 100644 --- a/tcp.h +++ b/tcp.h @@ -54,7 +54,7 @@ union tcp_epoll_ref { /** * struct tcp_ctx - Execution context for TCP routines * @hash_secret: 128-bit secret for hash functions, ISN and hash table - * @conn_count: Count of connections (not spliced) in connection table + * @conn_count: Count of total connections in connection table * @splice_conn_count: Count of spliced connections in connection table * @port_to_tap: Ports bound host-side, packets to tap or spliced * @fwd_in: Port forwarding configuration for inbound packets diff --git a/tcp_conn.h b/tcp_conn.h index 39d104a..4295f7d 100644 --- a/tcp_conn.h +++ b/tcp_conn.h @@ -195,4 +195,10 @@ union tcp_conn { struct tcp_splice_conn splice; }; +/* TCP connections */ +extern union tcp_conn tc[]; + +void tcp_splice_conn_update(struct ctx *c, struct tcp_splice_conn *new); +void tcp_table_compact(struct ctx *c, union tcp_conn *hole); + #endif /* TCP_CONN_H */ diff --git a/tcp_splice.c b/tcp_splice.c index 7dcd1cb..c986a9c 100644 --- a/tcp_splice.c +++ b/tcp_splice.c @@ -16,7 +16,7 @@ * For local traffic directed to TCP ports configured for direct * mapping between namespaces, packets are directly translated between * L4 sockets using a pair of splice() syscalls. These connections are - * tracked in the @tc_splice array of struct tcp_splice_conn, using + * tracked by struct tcp_splice_conn entries in the @tc array, using * these events: * * - SPLICE_CONNECT: connection accepted, connecting to target @@ -57,7 +57,7 @@ #define MAX_PIPE_SIZE (8UL * 1024 * 1024) #define TCP_SPLICE_PIPE_POOL_SIZE 16 -#define TCP_SPLICE_CONN_PRESSURE 30 /* % of splice_conn_count */ +#define TCP_SPLICE_CONN_PRESSURE 30 /* % of conn_count */ #define TCP_SPLICE_FILE_PRESSURE 30 /* % of c->nofile */ /* From tcp.c */ @@ -72,11 +72,8 @@ static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2][2]; #define CONN_V6(x) (x->flags & SPLICE_V6) #define CONN_V4(x) (!CONN_V6(x)) #define CONN_HAS(conn, set) ((conn->events & (set)) == (set)) -#define CONN(index) (tc_splice + (index)) -#define CONN_IDX(conn) ((conn) - tc_splice) - -/* Spliced connections */ -static struct tcp_splice_conn tc_splice[TCP_SPLICE_MAX_CONNS]; +#define CONN(index) (&tc[(index)].splice) +#define CONN_IDX(conn) ((union tcp_conn *)(conn) - tc) /* Display strings for connection events */ static const char *tcp_splice_event_str[] __attribute((__unused__)) = { @@ -248,43 +245,13 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn, * @c: Execution context * @new: New location of tcp_splice_conn */ -static void tcp_splice_conn_update(struct ctx *c, struct tcp_splice_conn *new) +void tcp_splice_conn_update(struct ctx *c, struct tcp_splice_conn *new) { tcp_splice_epoll_ctl(c, new); if (tcp_splice_epoll_ctl(c, new)) conn_flag(c, new, CLOSING); } -/** - * tcp_table_splice_compact - Compact spliced connection table - * @c: Execution context - * @hole: Pointer to recently closed connection - */ -static void tcp_table_splice_compact(struct ctx *c, - struct tcp_splice_conn *hole) -{ - struct tcp_splice_conn *move; - - if (CONN_IDX(hole) == --c->tcp.splice_conn_count) { - debug("TCP (spliced): index %li (max) removed", CONN_IDX(hole)); - return; - } - - move = CONN(c->tcp.splice_conn_count); - - memcpy(hole, move, sizeof(*hole)); - - move->a = move->b = -1; - move->a_read = move->a_written = move->b_read = move->b_written = 0; - move->pipe_a_b[0] = move->pipe_a_b[1] = -1; - move->pipe_b_a[0] = move->pipe_b_a[1] = -1; - move->flags = move->events = 0; - - debug("TCP (spliced): index %li moved to %li", - CONN_IDX(move), CONN_IDX(hole)); - tcp_splice_conn_update(c, hole); -} - /** * tcp_splice_destroy() - Close spliced connection and pipes, clear * @c: Execution context @@ -319,7 +286,8 @@ static void tcp_splice_destroy(struct ctx *c, struct tcp_splice_conn *conn) conn->flags = 0; debug("TCP (spliced): index %li, CLOSED", CONN_IDX(conn)); - tcp_table_splice_compact(c, conn); + c->tcp.splice_conn_count--; + tcp_table_compact(c, (union tcp_conn *)conn); } /** @@ -553,7 +521,7 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref, if (ref.r.p.tcp.tcp.listen) { int s; - if (c->tcp.splice_conn_count >= TCP_SPLICE_MAX_CONNS) + if (c->tcp.conn_count >= TCP_MAX_CONNS) return; if ((s = accept4(ref.r.s, NULL, NULL, SOCK_NONBLOCK)) < 0) @@ -565,8 +533,9 @@ void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref, s); } - conn = CONN(c->tcp.splice_conn_count++); + conn = CONN(c->tcp.conn_count++); conn->c.spliced = true; + c->tcp.splice_conn_count++; conn->a = s; conn->flags = ref.r.p.tcp.tcp.v6 ? SPLICE_V6 : 0; @@ -845,9 +814,10 @@ void tcp_splice_timer(struct ctx *c) { struct tcp_splice_conn *conn; - for (conn = CONN(c->tcp.splice_conn_count - 1); - conn >= tc_splice; - conn--) { + for (conn = CONN(c->tcp.conn_count - 1); conn >= CONN(0); conn--) { + if (!conn->c.spliced) + continue; + if (conn->flags & CLOSING) { tcp_splice_destroy(c, conn); return; @@ -890,12 +860,12 @@ void tcp_splice_defer_handler(struct ctx *c) int max_files = c->nofile / 100 * TCP_SPLICE_FILE_PRESSURE; struct tcp_splice_conn *conn; - if (c->tcp.splice_conn_count < MIN(max_files / 6, max_conns)) + if (c->tcp.conn_count < MIN(max_files / 6, max_conns)) return; - for (conn = CONN(c->tcp.splice_conn_count - 1); - conn >= tc_splice; - conn--) { + for (conn = CONN(c->tcp.conn_count - 1); conn >= CONN(0); conn--) { + if (!conn->c.spliced) + continue; if (conn->flags & CLOSING) tcp_splice_destroy(c, conn); } diff --git a/tcp_splice.h b/tcp_splice.h index 2c4bff3..e8c70e9 100644 --- a/tcp_splice.h +++ b/tcp_splice.h @@ -6,8 +6,6 @@ #ifndef TCP_SPLICE_H #define TCP_SPLICE_H -#define TCP_SPLICE_MAX_CONNS (128 * 1024) - void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref, uint32_t events); void tcp_splice_init(struct ctx *c);