udp: Don't explicitly track originating socket for spliced "connections"
When we look up udp_splice_to_ns[][].orig_sock in udp_sock_handler_splice() we're finding the socket on which the originating packet for the "connection" was received on. However, we don't specifically need this socket to be the originating one - we just need one that's bound to the the source port of this reply packet in the init namespace. We can look this up in udp_splice_to_init[v6][src].target_sock, whose defining characteristic is exactly that. The same applies with init and ns swapped. In practice, of course, the port we locate this way will always be the originating port, since we couldn't have started this "connection" if it wasn't. Change this, and we no longer need the @orig_sock field at all. That leaves just @target_sock which we rename to simply @sock. The whole udp_splice_flow structure now more represents a single bound port than a "flow" per se, so rename and recomment it accordingly. Likewise the udp_splice_to_{ns,init} names are now misleading, since the ports in those maps are used in both directions. Rename them to udp_splice_{ns,init} indicating the location where the described socket is bound. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
27bfebb061
commit
7610034fef
1 changed files with 52 additions and 61 deletions
111
udp.c
111
udp.c
|
@ -41,50 +41,46 @@
|
||||||
* pair of recvmmsg() and sendmmsg() deals with this case.
|
* pair of recvmmsg() and sendmmsg() deals with this case.
|
||||||
*
|
*
|
||||||
* The connection tracking for PASTA mode is slightly complicated by the absence
|
* The connection tracking for PASTA mode is slightly complicated by the absence
|
||||||
* of actual connections, see struct udp_splice_flow, and these examples:
|
* of actual connections, see struct udp_splice_port, and these examples:
|
||||||
*
|
*
|
||||||
* - from init to namespace:
|
* - from init to namespace:
|
||||||
*
|
*
|
||||||
* - forward direction: 127.0.0.1:5000 -> 127.0.0.1:80 in init from socket s,
|
* - forward direction: 127.0.0.1:5000 -> 127.0.0.1:80 in init from socket s,
|
||||||
* with epoll reference: index = 80, splice = 1, orig = 1, ns = 0
|
* with epoll reference: index = 80, splice = 1, orig = 1, ns = 0
|
||||||
* - if udp_splice_to_ns[V4][5000].target_sock:
|
* - if udp_splice_ns[V4][5000].sock:
|
||||||
* - send packet to udp_splice_to_ns[V4][5000].target_sock, with
|
* - send packet to udp_splice_ns[V4][5000].sock, with destination port
|
||||||
* destination port 80
|
* 80
|
||||||
* - otherwise:
|
* - otherwise:
|
||||||
* - create new socket udp_splice_to_ns[V4][5000].target_sock
|
* - create new socket udp_splice_ns[V4][5000].sock
|
||||||
* - bind in namespace to 127.0.0.1:5000
|
* - bind in namespace to 127.0.0.1:5000
|
||||||
* - add to epoll with reference: index = 5000, splice = 1, orig = 0,
|
* - add to epoll with reference: index = 5000, splice = 1, orig = 0,
|
||||||
* ns = 1
|
* ns = 1
|
||||||
* - set udp_splice_to_ns[V4][5000].orig_sock to s
|
* - update udp_splice_ns[V4][5000].ts with current time
|
||||||
* - update udp_splice_to_ns[V4][5000].ts with current time
|
|
||||||
*
|
*
|
||||||
* - reverse direction: 127.0.0.1:80 -> 127.0.0.1:5000 in namespace socket s,
|
* - reverse direction: 127.0.0.1:80 -> 127.0.0.1:5000 in namespace socket s,
|
||||||
* having epoll reference: index = 5000, splice = 1, orig = 0, ns = 1
|
* having epoll reference: index = 5000, splice = 1, orig = 0, ns = 1
|
||||||
* - if udp_splice_to_ns[V4][5000].orig_sock:
|
* - if udp_splice_init[V4][80].sock:
|
||||||
* - send to udp_splice_to_ns[V4][5000].orig_sock, with destination port
|
* - send to udp_splice_init[V4][80].sock, with destination port 5000
|
||||||
* 5000
|
|
||||||
* - otherwise, discard
|
* - otherwise, discard
|
||||||
*
|
*
|
||||||
* - from namespace to init:
|
* - from namespace to init:
|
||||||
*
|
*
|
||||||
* - forward direction: 127.0.0.1:2000 -> 127.0.0.1:22 in namespace from
|
* - forward direction: 127.0.0.1:2000 -> 127.0.0.1:22 in namespace from
|
||||||
* socket s, with epoll reference: index = 22, splice = 1, orig = 1, ns = 1
|
* socket s, with epoll reference: index = 22, splice = 1, orig = 1, ns = 1
|
||||||
* - if udp4_splice_to_init[V4][2000].target_sock:
|
* - if udp4_splice_init[V4][2000].sock:
|
||||||
* - send packet to udp_splice_to_init[V4][2000].target_sock, with
|
* - send packet to udp_splice_init[V4][2000].sock, with destination
|
||||||
* destination port 22
|
* port 22
|
||||||
* - otherwise:
|
* - otherwise:
|
||||||
* - create new socket udp_splice_to_init[V4][2000].target_sock
|
* - create new socket udp_splice_init[V4][2000].sock
|
||||||
* - bind in init to 127.0.0.1:2000
|
* - bind in init to 127.0.0.1:2000
|
||||||
* - add to epoll with reference: index = 2000, splice = 1, orig = 0,
|
* - add to epoll with reference: index = 2000, splice = 1, orig = 0,
|
||||||
* ns = 0
|
* ns = 0
|
||||||
* - set udp_splice_to_init[V4][2000].orig_sock to s
|
* - update udp_splice_init[V4][2000].ts with current time
|
||||||
* - update udp_splice_to_init[V4][2000].ts with current time
|
|
||||||
*
|
*
|
||||||
* - reverse direction: 127.0.0.1:22 -> 127.0.0.1:2000 in init from socket s,
|
* - reverse direction: 127.0.0.1:22 -> 127.0.0.1:2000 in init from socket s,
|
||||||
* having epoll reference: index = 2000, splice = 1, orig = 0, ns = 0
|
* having epoll reference: index = 2000, splice = 1, orig = 0, ns = 0
|
||||||
* - if udp_splice_to_init[V4][2000].orig_sock:
|
* - if udp_splice_ns[V4][22].sock:
|
||||||
* - send to udp_splice_to_init[V4][2000].orig_sock, with destination port
|
* - send to udp_splice_ns[V4][22].sock, with destination port 2000
|
||||||
* 2000
|
|
||||||
* - otherwise, discard
|
* - otherwise, discard
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -137,25 +133,21 @@ struct udp_tap_port {
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct udp_splice_flow - Spliced "connection"
|
* struct udp_splice_port - Bound socket for spliced communication
|
||||||
* @orig_sock: Originating socket, bound to dest port in source ns of
|
* @sock: Socket bound to index port
|
||||||
* originating datagram
|
|
||||||
* @target_sock: Target socket, bound to source port of originating
|
|
||||||
* datagram in dest ns
|
|
||||||
* @ts: Activity timestamp
|
* @ts: Activity timestamp
|
||||||
*/
|
*/
|
||||||
struct udp_splice_flow {
|
struct udp_splice_port {
|
||||||
int orig_sock;
|
int sock;
|
||||||
int target_sock;
|
|
||||||
time_t ts;
|
time_t ts;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Port tracking, arrays indexed by packet source port (host order) */
|
/* Port tracking, arrays indexed by packet source port (host order) */
|
||||||
static struct udp_tap_port udp_tap_map [IP_VERSIONS][NUM_PORTS];
|
static struct udp_tap_port udp_tap_map [IP_VERSIONS][NUM_PORTS];
|
||||||
|
|
||||||
/* Spliced "connections" indexed by bound port of target_sock (host order) */
|
/* "Spliced" sockets indexed by bound port (host order) */
|
||||||
static struct udp_splice_flow udp_splice_to_ns [IP_VERSIONS][NUM_PORTS];
|
static struct udp_splice_port udp_splice_ns [IP_VERSIONS][NUM_PORTS];
|
||||||
static struct udp_splice_flow udp_splice_to_init[IP_VERSIONS][NUM_PORTS];
|
static struct udp_splice_port udp_splice_init[IP_VERSIONS][NUM_PORTS];
|
||||||
|
|
||||||
enum udp_act_type {
|
enum udp_act_type {
|
||||||
UDP_ACT_TAP,
|
UDP_ACT_TAP,
|
||||||
|
@ -397,7 +389,6 @@ static void udp_sock6_iov_init(void)
|
||||||
* udp_splice_new() - Create and prepare socket for "spliced" binding
|
* udp_splice_new() - Create and prepare socket for "spliced" binding
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @v6: Set for IPv6 sockets
|
* @v6: Set for IPv6 sockets
|
||||||
* @bound_sock: Originating bound socket
|
|
||||||
* @src: Source port of original connection, host order
|
* @src: Source port of original connection, host order
|
||||||
* @splice: UDP_BACK_TO_INIT from init, UDP_BACK_TO_NS from namespace
|
* @splice: UDP_BACK_TO_INIT from init, UDP_BACK_TO_NS from namespace
|
||||||
*
|
*
|
||||||
|
@ -405,22 +396,21 @@ static void udp_sock6_iov_init(void)
|
||||||
*
|
*
|
||||||
* #syscalls:pasta getsockname
|
* #syscalls:pasta getsockname
|
||||||
*/
|
*/
|
||||||
int udp_splice_new(const struct ctx *c, int v6, int bound_sock, in_port_t src,
|
int udp_splice_new(const struct ctx *c, int v6, in_port_t src, bool ns)
|
||||||
bool ns)
|
|
||||||
{
|
{
|
||||||
struct epoll_event ev = { .events = EPOLLIN | EPOLLRDHUP | EPOLLHUP };
|
struct epoll_event ev = { .events = EPOLLIN | EPOLLRDHUP | EPOLLHUP };
|
||||||
union epoll_ref ref = { .r.proto = IPPROTO_UDP,
|
union epoll_ref ref = { .r.proto = IPPROTO_UDP,
|
||||||
.r.p.udp.udp = { .splice = true, .ns = ns,
|
.r.p.udp.udp = { .splice = true, .ns = ns,
|
||||||
.v6 = v6, .port = src }
|
.v6 = v6, .port = src }
|
||||||
};
|
};
|
||||||
struct udp_splice_flow *flow;
|
struct udp_splice_port *sp;
|
||||||
int act, s;
|
int act, s;
|
||||||
|
|
||||||
if (ns) {
|
if (ns) {
|
||||||
flow = &udp_splice_to_ns[v6 ? V6 : V4][src];
|
sp = &udp_splice_ns[v6 ? V6 : V4][src];
|
||||||
act = UDP_ACT_SPLICE_NS;
|
act = UDP_ACT_SPLICE_NS;
|
||||||
} else {
|
} else {
|
||||||
flow = &udp_splice_to_init[v6 ? V6 : V4][src];
|
sp = &udp_splice_init[v6 ? V6 : V4][src];
|
||||||
act = UDP_ACT_SPLICE_INIT;
|
act = UDP_ACT_SPLICE_INIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -455,8 +445,7 @@ int udp_splice_new(const struct ctx *c, int v6, int bound_sock, in_port_t src,
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
flow->orig_sock = bound_sock;
|
sp->sock = s;
|
||||||
flow->target_sock = s;
|
|
||||||
bitmap_set(udp_act[v6 ? V6 : V4][act], src);
|
bitmap_set(udp_act[v6 ? V6 : V4][act], src);
|
||||||
|
|
||||||
ev.data.u64 = ref.u64;
|
ev.data.u64 = ref.u64;
|
||||||
|
@ -472,7 +461,6 @@ fail:
|
||||||
* struct udp_splice_new_ns_arg - Arguments for udp_splice_new_ns()
|
* struct udp_splice_new_ns_arg - Arguments for udp_splice_new_ns()
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @v6: Set for IPv6
|
* @v6: Set for IPv6
|
||||||
* @bound_sock: Originating bound socket
|
|
||||||
* @src: Source port of originating datagram, host order
|
* @src: Source port of originating datagram, host order
|
||||||
* @dst: Destination port of originating datagram, host order
|
* @dst: Destination port of originating datagram, host order
|
||||||
* @s: Newly created socket or negative error code
|
* @s: Newly created socket or negative error code
|
||||||
|
@ -480,7 +468,6 @@ fail:
|
||||||
struct udp_splice_new_ns_arg {
|
struct udp_splice_new_ns_arg {
|
||||||
const struct ctx *c;
|
const struct ctx *c;
|
||||||
int v6;
|
int v6;
|
||||||
int bound_sock;
|
|
||||||
in_port_t src;
|
in_port_t src;
|
||||||
int s;
|
int s;
|
||||||
};
|
};
|
||||||
|
@ -500,7 +487,7 @@ static int udp_splice_new_ns(void *arg)
|
||||||
if (ns_enter(a->c))
|
if (ns_enter(a->c))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
a->s = udp_splice_new(a->c, a->v6, a->bound_sock, a->src, true);
|
a->s = udp_splice_new(a->c, a->v6, a->src, true);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -542,9 +529,9 @@ static void udp_sock_handler_splice(const struct ctx *c, union epoll_ref ref,
|
||||||
if (ref.r.p.udp.udp.orig && !ref.r.p.udp.udp.ns) {
|
if (ref.r.p.udp.udp.orig && !ref.r.p.udp.udp.ns) {
|
||||||
src += c->udp.fwd_out.rdelta[src];
|
src += c->udp.fwd_out.rdelta[src];
|
||||||
|
|
||||||
if (!(s = udp_splice_to_ns[v6][src].target_sock)) {
|
if (!(s = udp_splice_ns[v6][src].sock)) {
|
||||||
struct udp_splice_new_ns_arg arg = {
|
struct udp_splice_new_ns_arg arg = {
|
||||||
c, v6, ref.r.s, src, -1,
|
c, v6, src, -1,
|
||||||
};
|
};
|
||||||
|
|
||||||
NS_CALL(udp_splice_new_ns, &arg);
|
NS_CALL(udp_splice_new_ns, &arg);
|
||||||
|
@ -552,21 +539,25 @@ static void udp_sock_handler_splice(const struct ctx *c, union epoll_ref ref,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
udp_splice_to_ns[v6][src].ts = now->tv_sec;
|
udp_splice_ns[v6][src].ts = now->tv_sec;
|
||||||
} else if (!ref.r.p.udp.udp.orig && ref.r.p.udp.udp.ns) {
|
} else if (!ref.r.p.udp.udp.orig && ref.r.p.udp.udp.ns) {
|
||||||
if (!(s = udp_splice_to_ns[v6][dst].orig_sock))
|
src += c->udp.fwd_in.rdelta[src];
|
||||||
|
|
||||||
|
if (!(s = udp_splice_init[v6][src].sock))
|
||||||
return;
|
return;
|
||||||
} else if (ref.r.p.udp.udp.orig && ref.r.p.udp.udp.ns) {
|
} else if (ref.r.p.udp.udp.orig && ref.r.p.udp.udp.ns) {
|
||||||
src += c->udp.fwd_in.rdelta[src];
|
src += c->udp.fwd_in.rdelta[src];
|
||||||
|
|
||||||
if (!(s = udp_splice_to_init[v6][src].target_sock)) {
|
if (!(s = udp_splice_init[v6][src].sock)) {
|
||||||
s = udp_splice_new(c, v6, ref.r.s, src, false);
|
s = udp_splice_new(c, v6, src, false);
|
||||||
if (s < 0)
|
if (s < 0)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
udp_splice_to_init[v6][src].ts = now->tv_sec;
|
udp_splice_init[v6][src].ts = now->tv_sec;
|
||||||
} else if (!ref.r.p.udp.udp.orig && !ref.r.p.udp.udp.ns) {
|
} else if (!ref.r.p.udp.udp.orig && !ref.r.p.udp.udp.ns) {
|
||||||
if (!(s = udp_splice_to_init[v6][dst].orig_sock))
|
src += c->udp.fwd_out.rdelta[src];
|
||||||
|
|
||||||
|
if (!(s = udp_splice_ns[v6][src].sock))
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
return;
|
return;
|
||||||
|
@ -1097,7 +1088,7 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
|
||||||
|
|
||||||
s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr,
|
s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr,
|
||||||
ifname, port, uref.u32);
|
ifname, port, uref.u32);
|
||||||
udp_splice_to_init[V4][port].target_sock = s;
|
udp_splice_init[V4][port].sock = s;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
uref.udp.splice = uref.udp.orig = uref.udp.ns = true;
|
uref.udp.splice = uref.udp.orig = uref.udp.ns = true;
|
||||||
|
@ -1106,7 +1097,7 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
|
||||||
|
|
||||||
s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr,
|
s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr,
|
||||||
ifname, port, uref.u32);
|
ifname, port, uref.u32);
|
||||||
udp_splice_to_ns[V4][port].target_sock = s;
|
udp_splice_ns[V4][port].sock = s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1131,7 +1122,7 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
|
||||||
|
|
||||||
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr,
|
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr,
|
||||||
ifname, port, uref.u32);
|
ifname, port, uref.u32);
|
||||||
udp_splice_to_init[V6][port].target_sock = s;
|
udp_splice_init[V6][port].sock = s;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
bind_addr = &in6addr_loopback;
|
bind_addr = &in6addr_loopback;
|
||||||
|
@ -1139,7 +1130,7 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
|
||||||
|
|
||||||
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr,
|
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr,
|
||||||
ifname, port, uref.u32);
|
ifname, port, uref.u32);
|
||||||
udp_splice_to_ns[V6][port].target_sock = s;
|
udp_splice_ns[V6][port].sock = s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1242,7 +1233,7 @@ int udp_init(struct ctx *c)
|
||||||
static void udp_timer_one(struct ctx *c, int v6, enum udp_act_type type,
|
static void udp_timer_one(struct ctx *c, int v6, enum udp_act_type type,
|
||||||
in_port_t port, const struct timespec *ts)
|
in_port_t port, const struct timespec *ts)
|
||||||
{
|
{
|
||||||
struct udp_splice_flow *flow;
|
struct udp_splice_port *sp;
|
||||||
struct udp_tap_port *tp;
|
struct udp_tap_port *tp;
|
||||||
int s = -1;
|
int s = -1;
|
||||||
|
|
||||||
|
@ -1257,17 +1248,17 @@ static void udp_timer_one(struct ctx *c, int v6, enum udp_act_type type,
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case UDP_ACT_SPLICE_INIT:
|
case UDP_ACT_SPLICE_INIT:
|
||||||
flow = &udp_splice_to_init[v6 ? V6 : V4][port];
|
sp = &udp_splice_init[v6 ? V6 : V4][port];
|
||||||
|
|
||||||
if (ts->tv_sec - flow->ts > UDP_CONN_TIMEOUT)
|
if (ts->tv_sec - sp->ts > UDP_CONN_TIMEOUT)
|
||||||
s = flow->target_sock;
|
s = sp->sock;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case UDP_ACT_SPLICE_NS:
|
case UDP_ACT_SPLICE_NS:
|
||||||
flow = &udp_splice_to_ns[v6 ? V6 : V4][port];
|
sp = &udp_splice_ns[v6 ? V6 : V4][port];
|
||||||
|
|
||||||
if (ts->tv_sec - flow->ts > UDP_CONN_TIMEOUT)
|
if (ts->tv_sec - sp->ts > UDP_CONN_TIMEOUT)
|
||||||
s = flow->target_sock;
|
s = sp->sock;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
Loading…
Reference in a new issue