udp: Separate tracking of inbound and outbound packet flows

Each entry udp_splice_map[v6][N] keeps information about two essentially
unrelated packet flows. @ns_conn_sock, @ns_conn_ts and @init_bound_sock
track a packet flow from port N in the host init namespace to some other
port in the pasta namespace (the one @ns_conn_sock is connected to).
@init_conn_sock, @init_conn_ts and @ns_bound_sock track packet flow from
port N in the pasta namespace to some other port in the host init namespace
(the one @init_conn_sock is connected to).

Split udp_splice_map[][] into two separate tables for the two directions.
Each entry in each table is a 'struct udp_splice_flow' with @orig_sock
(previously the bound socket), @target_sock (previously the connected
socket) and @ts (the timeout for the target socket).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2022-11-30 15:13:02 +11:00 committed by Stefano Brivio
parent 4ebb4905e9
commit 729edc241d

114
udp.c
View file

@ -41,50 +41,50 @@
* pair of recvmmsg() and sendmmsg() deals with this case. * pair of recvmmsg() and sendmmsg() deals with this case.
* *
* The connection tracking for PASTA mode is slightly complicated by the absence * The connection tracking for PASTA mode is slightly complicated by the absence
* of actual connections, see struct udp_splice_port, and these examples: * of actual connections, see struct udp_splice_flow, and these examples:
* *
* - from init to namespace: * - from init to namespace:
* *
* - forward direction: 127.0.0.1:5000 -> 127.0.0.1:80 in init from bound * - forward direction: 127.0.0.1:5000 -> 127.0.0.1:80 in init from bound
* socket s, with epoll reference: index = 80, splice = UDP_TO_NS * socket s, with epoll reference: index = 80, splice = UDP_TO_NS
* - if udp_splice_map[V4][5000].ns_conn_sock: * - if udp_splice_to_ns[V4][5000].target_sock:
* - send packet to udp4_splice_map[5000].ns_conn_sock * - send packet to udp_splice_to_ns[V4][5000].target_sock
* - otherwise: * - otherwise:
* - create new socket udp_splice_map[V4][5000].ns_conn_sock * - create new socket udp_splice_to_ns[V4][5000].target_sock
* - bind in namespace to 127.0.0.1:5000 * - bind in namespace to 127.0.0.1:5000
* - connect in namespace to 127.0.0.1:80 (note: this destination port * - connect in namespace to 127.0.0.1:80 (note: this destination port
* might be remapped to another port instead) * might be remapped to another port instead)
* - add to epoll with reference: index = 5000, splice: UDP_BACK_TO_INIT * - add to epoll with reference: index = 5000, splice: UDP_BACK_TO_INIT
* - set udp_splice_map[V4][5000].init_bound_sock to s * - set udp_splice_to_ns[V4][5000].orig_sock to s
* - update udp_splice_map[V4][5000].ns_conn_ts with current time * - update udp_splice_to_ns[V4][5000].ts with current time
* *
* - reverse direction: 127.0.0.1:80 -> 127.0.0.1:5000 in namespace from * - reverse direction: 127.0.0.1:80 -> 127.0.0.1:5000 in namespace from
* connected socket s, having epoll reference: index = 5000, * connected socket s, having epoll reference: index = 5000,
* splice = UDP_BACK_TO_INIT * splice = UDP_BACK_TO_INIT
* - if udp_splice_map[V4][5000].init_bound_sock: * - if udp_splice_to_ns[V4][5000].orig_sock:
* - send to udp_splice_map[V4][5000].init_bound_sock, with destination * - send to udp_splice_to_ns[V4][5000].orig_sock, with destination port
* port 5000 * 5000
* - otherwise, discard * - otherwise, discard
* *
* - from namespace to init: * - from namespace to init:
* *
* - forward direction: 127.0.0.1:2000 -> 127.0.0.1:22 in namespace from bound * - forward direction: 127.0.0.1:2000 -> 127.0.0.1:22 in namespace from bound
* socket s, with epoll reference: index = 22, splice = UDP_TO_INIT * socket s, with epoll reference: index = 22, splice = UDP_TO_INIT
* - if udp4_splice_map[V4][2000].init_conn_sock: * - if udp4_splice_to_init[V4][2000].target_sock:
* - send packet to udp4_splice_map[2000].init_conn_sock * - send packet to udp_splice_to_init[V4][2000].target_sock
* - otherwise: * - otherwise:
* - create new socket udp_splice_map[V4][2000].init_conn_sock * - create new socket udp_splice_to_init[V4][2000].target_sock
* - bind in init to 127.0.0.1:2000 * - bind in init to 127.0.0.1:2000
* - connect in init to 127.0.0.1:22 (note: this destination port * - connect in init to 127.0.0.1:22 (note: this destination port
* might be remapped to another port instead) * might be remapped to another port instead)
* - add to epoll with reference: index = 2000, splice = UDP_BACK_TO_NS * - add to epoll with reference: index = 2000, splice = UDP_BACK_TO_NS
* - set udp_splice_map[V4][2000].ns_bound_sock to s * - set udp_splice_to_init[V4][2000].orig_sock to s
* - update udp_splice_map[V4][2000].init_conn_ts with current time * - update udp_splice_to_init[V4][2000].ts with current time
* *
* - reverse direction: 127.0.0.1:22 -> 127.0.0.1:2000 in init from connected * - reverse direction: 127.0.0.1:22 -> 127.0.0.1:2000 in init from connected
* socket s, having epoll reference: index = 2000, splice = UDP_BACK_TO_NS * socket s, having epoll reference: index = 2000, splice = UDP_BACK_TO_NS
* - if udp_splice_map[V4][2000].ns_bound_sock: * - if udp_splice_to_init[V4][2000].orig_sock:
* - send to udp_splice_map[V4][2000].ns_bound_sock, with destination port * - send to udp_splice_to_init[V4][2000].orig_sock, with destination port
* 2000 * 2000
* - otherwise, discard * - otherwise, discard
*/ */
@ -138,28 +138,26 @@ struct udp_tap_port {
}; };
/** /**
* struct udp_splice_port - Source port tracking for traffic between namespaces * struct udp_splice_flow - Spliced "connection"
* @ns_conn_sock: Socket connected in namespace for init source port * @orig_sock: Originating socket, bound to dest port in source ns of
* @init_conn_sock: Socket connected in init for namespace source port * originating datagram
* @ns_conn_ts: Timestamp of activity for socket connected in namespace * @target_sock: Target socket, bound to source port of originating
* @init_conn_ts: Timestamp of activity for socket connceted in init * datagram in dest ns, connected to dest port of
* @ns_bound_sock: Bound socket in namespace for this source port in init * originating datagram in dest ns
* @init_bound_sock: Bound socket in init for this source port in namespace * @ts: Activity timestamp
*/ */
struct udp_splice_port { struct udp_splice_flow {
int ns_conn_sock; int orig_sock;
int init_conn_sock; int target_sock;
time_t ts;
time_t ns_conn_ts;
time_t init_conn_ts;
int ns_bound_sock;
int init_bound_sock;
}; };
/* Port tracking, arrays indexed by packet source port (host order) */ /* Port tracking, arrays indexed by packet source port (host order) */
static struct udp_tap_port udp_tap_map [IP_VERSIONS][NUM_PORTS]; static struct udp_tap_port udp_tap_map [IP_VERSIONS][NUM_PORTS];
static struct udp_splice_port udp_splice_map [IP_VERSIONS][NUM_PORTS];
/* Spliced "connections" indexed by originating source port (host order) */
static struct udp_splice_flow udp_splice_to_ns [IP_VERSIONS][NUM_PORTS];
static struct udp_splice_flow udp_splice_to_init[IP_VERSIONS][NUM_PORTS];
enum udp_act_type { enum udp_act_type {
UDP_ACT_TAP, UDP_ACT_TAP,
@ -421,8 +419,16 @@ int udp_splice_connect(const struct ctx *c, int v6, int bound_sock,
.r.p.udp.udp = { .splice = splice, .v6 = v6, .r.p.udp.udp = { .splice = splice, .v6 = v6,
.port = src } .port = src }
}; };
struct udp_splice_port *sp = &udp_splice_map[v6 ? V6 : V4][src]; struct udp_splice_flow *flow;
int s; int act, s;
if (splice == UDP_BACK_TO_INIT) {
flow = &udp_splice_to_ns[v6 ? V6 : V4][src];
act = UDP_ACT_NS_CONN;
} else {
flow = &udp_splice_to_init[v6 ? V6 : V4][src];
act = UDP_ACT_INIT_CONN;
}
s = socket(v6 ? AF_INET6 : AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, s = socket(v6 ? AF_INET6 : AF_INET, SOCK_DGRAM | SOCK_NONBLOCK,
IPPROTO_UDP); IPPROTO_UDP);
@ -461,15 +467,9 @@ int udp_splice_connect(const struct ctx *c, int v6, int bound_sock,
goto fail; goto fail;
} }
if (splice == UDP_BACK_TO_INIT) { flow->orig_sock = bound_sock;
sp->init_bound_sock = bound_sock; flow->target_sock = s;
sp->ns_conn_sock = s; bitmap_set(udp_act[v6 ? V6 : V4][act], src);
bitmap_set(udp_act[v6 ? V6 : V4][UDP_ACT_NS_CONN], src);
} else if (splice == UDP_BACK_TO_NS) {
sp->ns_bound_sock = bound_sock;
sp->init_conn_sock = s;
bitmap_set(udp_act[v6 ? V6 : V4][UDP_ACT_INIT_CONN], src);
}
ev.data.u64 = ref.u64; ev.data.u64 = ref.u64;
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, s, &ev); epoll_ctl(c->epollfd, EPOLL_CTL_ADD, s, &ev);
@ -556,7 +556,7 @@ static void udp_sock_handler_splice(const struct ctx *c, union epoll_ref ref,
case UDP_TO_NS: case UDP_TO_NS:
src += c->udp.fwd_out.rdelta[src]; src += c->udp.fwd_out.rdelta[src];
if (!(s = udp_splice_map[v6][src].ns_conn_sock)) { if (!(s = udp_splice_to_ns[v6][src].target_sock)) {
struct udp_splice_connect_ns_arg arg = { struct udp_splice_connect_ns_arg arg = {
c, v6, ref.r.s, src, dst, -1, c, v6, ref.r.s, src, dst, -1,
}; };
@ -565,10 +565,10 @@ static void udp_sock_handler_splice(const struct ctx *c, union epoll_ref ref,
if ((s = arg.s) < 0) if ((s = arg.s) < 0)
return; return;
} }
udp_splice_map[v6][src].ns_conn_ts = now->tv_sec; udp_splice_to_ns[v6][src].ts = now->tv_sec;
break; break;
case UDP_BACK_TO_INIT: case UDP_BACK_TO_INIT:
if (!(s = udp_splice_map[v6][dst].init_bound_sock)) if (!(s = udp_splice_to_ns[v6][dst].orig_sock))
return; return;
send_dst = dst; send_dst = dst;
@ -576,16 +576,16 @@ static void udp_sock_handler_splice(const struct ctx *c, union epoll_ref ref,
case UDP_TO_INIT: case UDP_TO_INIT:
src += c->udp.fwd_in.rdelta[src]; src += c->udp.fwd_in.rdelta[src];
if (!(s = udp_splice_map[v6][src].init_conn_sock)) { if (!(s = udp_splice_to_init[v6][src].target_sock)) {
s = udp_splice_connect(c, v6, ref.r.s, src, dst, s = udp_splice_connect(c, v6, ref.r.s, src, dst,
UDP_BACK_TO_NS); UDP_BACK_TO_NS);
if (s < 0) if (s < 0)
return; return;
} }
udp_splice_map[v6][src].init_conn_ts = now->tv_sec; udp_splice_to_init[v6][src].ts = now->tv_sec;
break; break;
case UDP_BACK_TO_NS: case UDP_BACK_TO_NS:
if (!(s = udp_splice_map[v6][dst].ns_bound_sock)) if (!(s = udp_splice_to_init[v6][dst].orig_sock))
return; return;
send_dst = dst; send_dst = dst;
@ -1286,7 +1286,7 @@ int udp_init(struct ctx *c)
static void udp_timer_one(struct ctx *c, int v6, enum udp_act_type type, static void udp_timer_one(struct ctx *c, int v6, enum udp_act_type type,
in_port_t port, const struct timespec *ts) in_port_t port, const struct timespec *ts)
{ {
struct udp_splice_port *sp; struct udp_splice_flow *flow;
struct udp_tap_port *tp; struct udp_tap_port *tp;
int s = -1; int s = -1;
@ -1301,17 +1301,17 @@ static void udp_timer_one(struct ctx *c, int v6, enum udp_act_type type,
break; break;
case UDP_ACT_INIT_CONN: case UDP_ACT_INIT_CONN:
sp = &udp_splice_map[v6 ? V6 : V4][port]; flow = &udp_splice_to_init[v6 ? V6 : V4][port];
if (ts->tv_sec - sp->init_conn_ts > UDP_CONN_TIMEOUT) if (ts->tv_sec - flow->ts > UDP_CONN_TIMEOUT)
s = sp->init_conn_sock; s = flow->target_sock;
break; break;
case UDP_ACT_NS_CONN: case UDP_ACT_NS_CONN:
sp = &udp_splice_map[v6 ? V6 : V4][port]; flow = &udp_splice_to_ns[v6 ? V6 : V4][port];
if (ts->tv_sec - sp->ns_conn_ts > UDP_CONN_TIMEOUT) if (ts->tv_sec - flow->ts > UDP_CONN_TIMEOUT)
s = sp->ns_conn_sock; s = flow->target_sock;
break; break;
default: default: