udp: Split splice field in udp_epoll_ref into (mostly) independent bits

The @splice field in union udp_epoll_ref can have a number of values for
different types of "spliced" packet flows.  Split it into several single
bit fields with more or less independent meanings.  The new @splice field
is just a boolean indicating whether the socket is associated with a
spliced flow, making it identical to the @splice fiend in tcp_epoll_ref.

The new bit @orig, indicates whether this is a socket which can originate
new udp packet flows (created with -u or -U) or a socket created on the
fly to handle reply socket.  @ns indicates whether the socket lives in the
init namespace or the pasta namespace.

Making these bits more orthogonal to each other will simplify some future
cleanups.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2022-11-30 15:13:06 +11:00 committed by Stefano Brivio
parent 8517239243
commit d9394eb9b7
3 changed files with 35 additions and 35 deletions

View file

@ -31,6 +31,8 @@ struct tap_l4_msg {
union epoll_ref; union epoll_ref;
#include <stdbool.h>
#include "packet.h" #include "packet.h"
#include "icmp.h" #include "icmp.h"
#include "port_fwd.h" #include "port_fwd.h"

53
udp.c
View file

@ -46,19 +46,20 @@
* - from init to namespace: * - from init to namespace:
* *
* - forward direction: 127.0.0.1:5000 -> 127.0.0.1:80 in init from socket s, * - forward direction: 127.0.0.1:5000 -> 127.0.0.1:80 in init from socket s,
* with epoll reference: index = 80, splice = UDP_TO_NS * with epoll reference: index = 80, splice = 1, orig = 1, ns = 0
* - if udp_splice_to_ns[V4][5000].target_sock: * - if udp_splice_to_ns[V4][5000].target_sock:
* - send packet to udp_splice_to_ns[V4][5000].target_sock, with * - send packet to udp_splice_to_ns[V4][5000].target_sock, with
* destination port 80 * destination port 80
* - otherwise: * - otherwise:
* - create new socket udp_splice_to_ns[V4][5000].target_sock * - create new socket udp_splice_to_ns[V4][5000].target_sock
* - bind in namespace to 127.0.0.1:5000 * - bind in namespace to 127.0.0.1:5000
* - add to epoll with reference: index = 5000, splice: UDP_BACK_TO_INIT * - add to epoll with reference: index = 5000, splice = 1, orig = 0,
* ns = 1
* - set udp_splice_to_ns[V4][5000].orig_sock to s * - set udp_splice_to_ns[V4][5000].orig_sock to s
* - update udp_splice_to_ns[V4][5000].ts with current time * - update udp_splice_to_ns[V4][5000].ts with current time
* *
* - reverse direction: 127.0.0.1:80 -> 127.0.0.1:5000 in namespace socket s, * - reverse direction: 127.0.0.1:80 -> 127.0.0.1:5000 in namespace socket s,
* having epoll reference: index = 5000, splice = UDP_BACK_TO_INIT * having epoll reference: index = 5000, splice = 1, orig = 0, ns = 1
* - if udp_splice_to_ns[V4][5000].orig_sock: * - if udp_splice_to_ns[V4][5000].orig_sock:
* - send to udp_splice_to_ns[V4][5000].orig_sock, with destination port * - send to udp_splice_to_ns[V4][5000].orig_sock, with destination port
* 5000 * 5000
@ -67,19 +68,20 @@
* - from namespace to init: * - from namespace to init:
* *
* - forward direction: 127.0.0.1:2000 -> 127.0.0.1:22 in namespace from * - forward direction: 127.0.0.1:2000 -> 127.0.0.1:22 in namespace from
* socket s, with epoll reference: index = 22, splice = UDP_TO_INIT * socket s, with epoll reference: index = 22, splice = 1, orig = 1, ns = 1
* - if udp4_splice_to_init[V4][2000].target_sock: * - if udp4_splice_to_init[V4][2000].target_sock:
* - send packet to udp_splice_to_init[V4][2000].target_sock, with * - send packet to udp_splice_to_init[V4][2000].target_sock, with
* destination port 22 * destination port 22
* - otherwise: * - otherwise:
* - create new socket udp_splice_to_init[V4][2000].target_sock * - create new socket udp_splice_to_init[V4][2000].target_sock
* - bind in init to 127.0.0.1:2000 * - bind in init to 127.0.0.1:2000
* - add to epoll with reference: index = 2000, splice = UDP_BACK_TO_NS * - add to epoll with reference: index = 2000, splice = 1, orig = 0,
* ns = 0
* - set udp_splice_to_init[V4][2000].orig_sock to s * - set udp_splice_to_init[V4][2000].orig_sock to s
* - update udp_splice_to_init[V4][2000].ts with current time * - update udp_splice_to_init[V4][2000].ts with current time
* *
* - reverse direction: 127.0.0.1:22 -> 127.0.0.1:2000 in init from socket s, * - reverse direction: 127.0.0.1:22 -> 127.0.0.1:2000 in init from socket s,
* having epoll reference: index = 2000, splice = UDP_BACK_TO_NS * having epoll reference: index = 2000, splice = 1, orig = 0, ns = 0
* - if udp_splice_to_init[V4][2000].orig_sock: * - if udp_splice_to_init[V4][2000].orig_sock:
* - send to udp_splice_to_init[V4][2000].orig_sock, with destination port * - send to udp_splice_to_init[V4][2000].orig_sock, with destination port
* 2000 * 2000
@ -404,17 +406,17 @@ static void udp_sock6_iov_init(void)
* #syscalls:pasta getsockname * #syscalls:pasta getsockname
*/ */
int udp_splice_new(const struct ctx *c, int v6, int bound_sock, in_port_t src, int udp_splice_new(const struct ctx *c, int v6, int bound_sock, in_port_t src,
int splice) bool ns)
{ {
struct epoll_event ev = { .events = EPOLLIN | EPOLLRDHUP | EPOLLHUP }; struct epoll_event ev = { .events = EPOLLIN | EPOLLRDHUP | EPOLLHUP };
union epoll_ref ref = { .r.proto = IPPROTO_UDP, union epoll_ref ref = { .r.proto = IPPROTO_UDP,
.r.p.udp.udp = { .splice = splice, .v6 = v6, .r.p.udp.udp = { .splice = true, .ns = ns,
.port = src } .v6 = v6, .port = src }
}; };
struct udp_splice_flow *flow; struct udp_splice_flow *flow;
int act, s; int act, s;
if (splice == UDP_BACK_TO_INIT) { if (ns) {
flow = &udp_splice_to_ns[v6 ? V6 : V4][src]; flow = &udp_splice_to_ns[v6 ? V6 : V4][src];
act = UDP_ACT_SPLICE_NS; act = UDP_ACT_SPLICE_NS;
} else { } else {
@ -498,8 +500,7 @@ static int udp_splice_new_ns(void *arg)
if (ns_enter(a->c)) if (ns_enter(a->c))
return 0; return 0;
a->s = udp_splice_new(a->c, a->v6, a->bound_sock, a->src, a->s = udp_splice_new(a->c, a->v6, a->bound_sock, a->src, true);
UDP_BACK_TO_INIT);
return 0; return 0;
} }
@ -537,8 +538,8 @@ static void udp_sock_handler_splice(const struct ctx *c, union epoll_ref ref,
src = ntohs(sa->sin_port); src = ntohs(sa->sin_port);
} }
switch (ref.r.p.udp.udp.splice) {
case UDP_TO_NS: if (ref.r.p.udp.udp.orig && !ref.r.p.udp.udp.ns) {
src += c->udp.fwd_out.rdelta[src]; src += c->udp.fwd_out.rdelta[src];
if (!(s = udp_splice_to_ns[v6][src].target_sock)) { if (!(s = udp_splice_to_ns[v6][src].target_sock)) {
@ -550,27 +551,24 @@ static void udp_sock_handler_splice(const struct ctx *c, union epoll_ref ref,
if ((s = arg.s) < 0) if ((s = arg.s) < 0)
return; return;
} }
udp_splice_to_ns[v6][src].ts = now->tv_sec; udp_splice_to_ns[v6][src].ts = now->tv_sec;
break; } else if (!ref.r.p.udp.udp.orig && ref.r.p.udp.udp.ns) {
case UDP_BACK_TO_INIT:
if (!(s = udp_splice_to_ns[v6][dst].orig_sock)) if (!(s = udp_splice_to_ns[v6][dst].orig_sock))
return; return;
break; } else if (ref.r.p.udp.udp.orig && ref.r.p.udp.udp.ns) {
case UDP_TO_INIT:
src += c->udp.fwd_in.rdelta[src]; src += c->udp.fwd_in.rdelta[src];
if (!(s = udp_splice_to_init[v6][src].target_sock)) { if (!(s = udp_splice_to_init[v6][src].target_sock)) {
s = udp_splice_new(c, v6, ref.r.s, src, UDP_BACK_TO_NS); s = udp_splice_new(c, v6, ref.r.s, src, false);
if (s < 0) if (s < 0)
return; return;
} }
udp_splice_to_init[v6][src].ts = now->tv_sec; udp_splice_to_init[v6][src].ts = now->tv_sec;
break; } else if (!ref.r.p.udp.udp.orig && !ref.r.p.udp.udp.ns) {
case UDP_BACK_TO_NS:
if (!(s = udp_splice_to_init[v6][dst].orig_sock)) if (!(s = udp_splice_to_init[v6][dst].orig_sock))
return; return;
break; } else {
default:
return; return;
} }
@ -1096,15 +1094,16 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if (c->mode == MODE_PASTA) { if (c->mode == MODE_PASTA) {
bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) }; bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
uref.udp.splice = UDP_TO_NS; uref.udp.splice = uref.udp.orig = true;
sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname, sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32); port, uref.u32);
} }
if (ns) { if (ns) {
uref.udp.splice = uref.udp.orig = uref.udp.ns = true;
bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) }; bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
uref.udp.splice = UDP_TO_INIT;
sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname, sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32); port, uref.u32);
@ -1129,7 +1128,7 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if (c->mode == MODE_PASTA) { if (c->mode == MODE_PASTA) {
bind_addr = &in6addr_loopback; bind_addr = &in6addr_loopback;
uref.udp.splice = UDP_TO_NS; uref.udp.splice = uref.udp.orig = true;
sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname, sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32); port, uref.u32);
@ -1137,7 +1136,7 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if (ns) { if (ns) {
bind_addr = &in6addr_loopback; bind_addr = &in6addr_loopback;
uref.udp.splice = UDP_TO_INIT; uref.udp.splice = uref.udp.orig = uref.udp.ns = true;
sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname, sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32); port, uref.u32);

15
udp.h
View file

@ -23,20 +23,19 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
* union udp_epoll_ref - epoll reference portion for TCP connections * union udp_epoll_ref - epoll reference portion for TCP connections
* @bound: Set if this file descriptor is a bound socket * @bound: Set if this file descriptor is a bound socket
* @splice: Set if descriptor is associated to "spliced" connection * @splice: Set if descriptor is associated to "spliced" connection
* @orig: Set if a spliced socket which can originate "connections"
* @ns: Set if this is a socket in the pasta network namespace
* @v6: Set for IPv6 sockets or connections * @v6: Set for IPv6 sockets or connections
* @port: Source port for connected sockets, bound port otherwise * @port: Source port for connected sockets, bound port otherwise
* @u32: Opaque u32 value of reference * @u32: Opaque u32 value of reference
*/ */
union udp_epoll_ref { union udp_epoll_ref {
struct { struct {
uint32_t splice:3, bool splice:1,
#define UDP_TO_NS 1 orig:1,
#define UDP_TO_INIT 2 ns:1,
#define UDP_BACK_TO_NS 3 v6:1;
#define UDP_BACK_TO_INIT 4 uint32_t port:16;
v6:1,
port:16;
} udp; } udp;
uint32_t u32; uint32_t u32;
}; };