flow: Common address information for target side

Require the address and port information for the target (non
initiating) side to be populated when a flow enters TGT state.
Implement that for TCP and ICMP.  For now this leaves some information
redundantly recorded in both generic and type specific fields.  We'll
fix that in later patches.

For TCP we now use the information from the flow to construct the
destination socket address in both tcp_conn_from_tap() and
tcp_splice_connect().

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2024-07-18 15:26:28 +10:00 committed by Stefano Brivio
parent 8012f5ff55
commit 4e2d36e83f
8 changed files with 153 additions and 78 deletions

38
flow.c
View file

@ -165,8 +165,10 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
*/
static void flow_set_state(struct flow_common *f, enum flow_state state)
{
char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN];
char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN];
const struct flowside *ini = &f->side[INISIDE];
const struct flowside *tgt = &f->side[TGTSIDE];
uint8_t oldstate = f->state;
ASSERT(state < FLOW_NUM_STATES);
@ -177,19 +179,24 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
FLOW_STATE(f));
if (MAX(state, oldstate) >= FLOW_STATE_TGT)
flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => %s",
flow_log_(f, LOG_DEBUG,
"%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu",
pif_name(f->pif[INISIDE]),
inany_ntop(&ini->eaddr, estr, sizeof(estr)),
inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
ini->eport,
inany_ntop(&ini->faddr, fstr, sizeof(fstr)),
inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)),
ini->fport,
pif_name(f->pif[TGTSIDE]));
pif_name(f->pif[TGTSIDE]),
inany_ntop(&tgt->faddr, fstr1, sizeof(fstr1)),
tgt->fport,
inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)),
tgt->eport);
else if (MAX(state, oldstate) >= FLOW_STATE_INI)
flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => ?",
pif_name(f->pif[INISIDE]),
inany_ntop(&ini->eaddr, estr, sizeof(estr)),
inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
ini->eport,
inany_ntop(&ini->faddr, fstr, sizeof(fstr)),
inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)),
ini->fport);
}
@ -261,21 +268,34 @@ const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
}
/**
* flow_target() - Move flow to TGT, setting TGTSIDE details
* flow_target_af() - Move flow to TGT, setting TGTSIDE details
* @flow: Flow to change state
* @pif: pif of the target side
* @af: Address family for @eaddr and @faddr
* @saddr: Source address (pointer to in_addr or in6_addr)
* @sport: Endpoint port
* @daddr: Destination address (pointer to in_addr or in6_addr)
* @dport: Destination port
*
* Return: pointer to the target flowside information
*/
void flow_target(union flow *flow, uint8_t pif)
const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
sa_family_t af,
const void *saddr, in_port_t sport,
const void *daddr, in_port_t dport)
{
struct flow_common *f = &flow->f;
struct flowside *tgt = &f->side[TGTSIDE];
ASSERT(pif != PIF_NONE);
ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI);
ASSERT(f->type == FLOW_TYPE_NONE);
ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE);
flowside_from_af(tgt, af, daddr, dport, saddr, sport);
f->pif[TGTSIDE] = pif;
flow_set_state(f, FLOW_STATE_TGT);
return tgt;
}
/**

View file

@ -134,7 +134,10 @@ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
const union sockaddr_inany *ssa,
in_port_t dport);
void flow_target(union flow *flow, uint8_t pif);
const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
sa_family_t af,
const void *saddr, in_port_t sport,
const void *daddr, in_port_t dport);
union flow *flow_set_type(union flow *flow, enum flow_type type);
#define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_)

3
icmp.c
View file

@ -183,7 +183,8 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
return NULL;
flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id);
flow_target(flow, PIF_HOST);
/* FIXME: Record outbound source address when known */
flow_target_af(flow, PIF_HOST, af, NULL, 0, daddr, 0);
pingf = FLOW_SET_TYPE(flow, flowtype, ping);
pingf->seq = -1;

View file

@ -187,7 +187,6 @@ static inline bool inany_is_unspecified(const union inany_addr *a)
*
* Return: true if @a is in fe80::/10 (IPv6 link local unicast)
*/
/* cppcheck-suppress unusedFunction */
static inline bool inany_is_linklocal6(const union inany_addr *a)
{
return IN6_IS_ADDR_LINKLOCAL(&a->a6);

40
pif.c
View file

@ -7,9 +7,14 @@
#include <stdint.h>
#include <assert.h>
#include <netinet/in.h>
#include "util.h"
#include "pif.h"
#include "siphash.h"
#include "ip.h"
#include "inany.h"
#include "passt.h"
const char *pif_type_str[] = {
[PIF_NONE] = "<none>",
@ -19,3 +24,38 @@ const char *pif_type_str[] = {
};
static_assert(ARRAY_SIZE(pif_type_str) == PIF_NUM_TYPES,
"pif_type_str[] doesn't match enum pif_type");
/** pif_sockaddr() - Construct a socket address suitable for an interface
* @c: Execution context
* @sa: Pointer to sockaddr to fill in
* @sl: Updated to relevant length of initialised @sa
* @pif: Interface to create the socket address
* @addr: IPv[46] address
* @port: Port (host byte order)
*/
void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
uint8_t pif, const union inany_addr *addr, in_port_t port)
{
const struct in_addr *v4 = inany_v4(addr);
ASSERT(pif_is_socket(pif));
if (v4) {
sa->sa_family = AF_INET;
sa->sa4.sin_addr = *v4;
sa->sa4.sin_port = htons(port);
memset(&sa->sa4.sin_zero, 0, sizeof(sa->sa4.sin_zero));
*sl = sizeof(sa->sa4);
} else {
sa->sa_family = AF_INET6;
sa->sa6.sin6_addr = addr->a6;
sa->sa6.sin6_port = htons(port);
if (pif == PIF_HOST && IN6_IS_ADDR_LINKLOCAL(&addr->a6))
sa->sa6.sin6_scope_id = c->ifi6;
else
sa->sa6.sin6_scope_id = 0;
sa->sa6.sin6_flowinfo = 0;
*sl = sizeof(sa->sa6);
}
}

17
pif.h
View file

@ -7,6 +7,9 @@
#ifndef PIF_H
#define PIF_H
union inany_addr;
union sockaddr_inany;
/**
* enum pif_type - Type of passt/pasta interface ("pif")
*
@ -43,4 +46,18 @@ static inline const char *pif_name(uint8_t pif)
return pif_type(pif);
}
/**
* pif_is_socket() - Is interface implemented via L4 sockets?
* @pif: pif to check
*
* Return: true of @pif is an L4 socket based interface, otherwise false
*/
static inline bool pif_is_socket(uint8_t pif)
{
return pif == PIF_HOST || pif == PIF_SPLICE;
}
void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
uint8_t pif, const union inany_addr *addr, in_port_t port);
#endif /* PIF_H */

82
tcp.c
View file

@ -1647,18 +1647,10 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
{
in_port_t srcport = ntohs(th->source);
in_port_t dstport = ntohs(th->dest);
struct sockaddr_in addr4 = {
.sin_family = AF_INET,
.sin_port = htons(dstport),
.sin_addr = *(struct in_addr *)daddr,
};
struct sockaddr_in6 addr6 = {
.sin6_family = AF_INET6,
.sin6_port = htons(dstport),
.sin6_addr = *(struct in6_addr *)daddr,
};
const struct sockaddr *sa;
const struct flowside *ini, *tgt;
struct tcp_tap_conn *conn;
union inany_addr dstaddr; /* FIXME: Avoid bulky temporary */
union sockaddr_inany sa;
union flow *flow;
int s = -1, mss;
socklen_t sl;
@ -1666,9 +1658,22 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
if (!(flow = flow_alloc()))
return;
flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, daddr, dstport);
ini = flow_initiate_af(flow, PIF_TAP,
af, saddr, srcport, daddr, dstport);
flow_target(flow, PIF_HOST);
dstaddr = ini->faddr;
if (!c->no_map_gw) {
if (inany_equals4(&dstaddr, &c->ip4.gw))
dstaddr = inany_loopback4;
else if (inany_equals6(&dstaddr, &c->ip6.gw))
dstaddr = inany_loopback6;
}
/* FIXME: Record outbound source address when known */
tgt = flow_target_af(flow, PIF_HOST, AF_INET6,
NULL, 0, /* Kernel decides source address */
&dstaddr, dstport);
conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
if (af == AF_INET) {
@ -1687,9 +1692,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
dstport);
goto cancel;
}
sa = (struct sockaddr *)&addr4;
sl = sizeof(addr4);
} else if (af == AF_INET6) {
if (IN6_IS_ADDR_UNSPECIFIED(saddr) ||
IN6_IS_ADDR_MULTICAST(saddr) || srcport == 0 ||
@ -1704,9 +1706,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
dstport);
goto cancel;
}
sa = (struct sockaddr *)&addr6;
sl = sizeof(addr6);
} else {
ASSERT(0);
}
@ -1714,12 +1713,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
if ((s = tcp_conn_sock(c, af)) < 0)
goto cancel;
if (!c->no_map_gw) {
if (af == AF_INET && IN4_ARE_ADDR_EQUAL(daddr, &c->ip4.gw))
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
if (af == AF_INET6 && IN6_ARE_ADDR_EQUAL(daddr, &c->ip6.gw))
addr6.sin6_addr = in6addr_loopback;
}
pif_sockaddr(c, &sa, &sl, PIF_HOST, &tgt->eaddr, tgt->eport);
/* Use bind() to check if the target address is local (EADDRINUSE or
* similar) and already bound, and set the LOCAL flag in that case.
@ -1731,7 +1725,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
*
* So, if bind() succeeds, close the socket, get a new one, and proceed.
*/
if (bind(s, sa, sl)) {
if (bind(s, &sa.sa, sl)) {
if (errno != EADDRNOTAVAIL && errno != EACCES)
conn_flag(c, conn, LOCAL);
} else {
@ -1741,7 +1735,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
goto cancel;
}
if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)) {
if (inany_is_linklocal6(&tgt->eaddr)) {
struct sockaddr_in6 addr6_ll = {
.sin6_family = AF_INET6,
.sin6_addr = c->ip6.addr_ll,
@ -1749,6 +1743,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
};
if (bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll)))
goto cancel;
} else if (!inany_is_loopback(&tgt->eaddr)) {
tcp_bind_outbound(c, s, af);
}
conn->sock = s;
@ -1784,12 +1780,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
tcp_hash_insert(c, conn);
if ((af == AF_INET && !IN4_IS_ADDR_LOOPBACK(&addr4.sin_addr)) ||
(af == AF_INET6 && !IN6_IS_ADDR_LOOPBACK(&addr6.sin6_addr) &&
!IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)))
tcp_bind_outbound(c, s, af);
if (connect(s, sa, sl)) {
if (connect(s, &sa.sa, sl)) {
if (errno != EINPROGRESS) {
tcp_rst(c, conn);
goto cancel;
@ -2297,9 +2288,25 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
const union sockaddr_inany *sa,
const struct timespec *now)
{
union inany_addr saddr, daddr; /* FIXME: avoid bulky temporaries */
struct tcp_tap_conn *conn;
in_port_t srcport;
flow_target(flow, PIF_TAP);
inany_from_sockaddr(&saddr, &srcport, sa);
tcp_snat_inbound(c, &saddr);
if (inany_v4(&saddr)) {
daddr = inany_from_v4(c->ip4.addr_seen);
} else {
if (inany_is_linklocal6(&saddr))
daddr.a6 = c->ip6.addr_ll_seen;
else
daddr.a6 = c->ip6.addr_seen;
}
dstport += c->tcp.fwd_in.delta[dstport];
flow_target_af(flow, PIF_TAP, AF_INET6,
&saddr, srcport, &daddr, dstport);
conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
conn->sock = s;
@ -2307,10 +2314,9 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
conn->ws_to_tap = conn->ws_from_tap = 0;
conn_event(c, conn, SOCK_ACCEPTED);
inany_from_sockaddr(&conn->faddr, &conn->fport, sa);
conn->eport = dstport + c->tcp.fwd_in.delta[dstport];
tcp_snat_inbound(c, &conn->faddr);
conn->faddr = saddr;
conn->fport = srcport;
conn->eport = dstport;
tcp_seq_init(c, conn, now);
tcp_hash_insert(c, conn);

View file

@ -340,31 +340,20 @@ static int tcp_splice_connect_finish(const struct ctx *c,
* tcp_splice_connect() - Create and connect socket for new spliced connection
* @c: Execution context
* @conn: Connection pointer
* @af: Address family
* @pif: pif on which to create socket
* @port: Destination port, host order
*
* Return: 0 for connect() succeeded or in progress, negative value on error
*/
static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
sa_family_t af, uint8_t pif, in_port_t port)
static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
{
struct sockaddr_in6 addr6 = {
.sin6_family = AF_INET6,
.sin6_port = htons(port),
.sin6_addr = IN6ADDR_LOOPBACK_INIT,
};
struct sockaddr_in addr4 = {
.sin_family = AF_INET,
.sin_port = htons(port),
.sin_addr = IN4ADDR_LOOPBACK_INIT,
};
const struct sockaddr *sa;
const struct flowside *tgt = &conn->f.side[TGTSIDE];
sa_family_t af = inany_v4(&tgt->eaddr) ? AF_INET : AF_INET6;
uint8_t tgtpif = conn->f.pif[TGTSIDE];
union sockaddr_inany sa;
socklen_t sl;
if (pif == PIF_HOST)
if (tgtpif == PIF_HOST)
conn->s[1] = tcp_conn_sock(c, af);
else if (pif == PIF_SPLICE)
else if (tgtpif == PIF_SPLICE)
conn->s[1] = tcp_conn_sock_ns(c, af);
else
ASSERT(0);
@ -378,15 +367,9 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
conn->s[1]);
}
if (CONN_V6(conn)) {
sa = (struct sockaddr *)&addr6;
sl = sizeof(addr6);
} else {
sa = (struct sockaddr *)&addr4;
sl = sizeof(addr4);
}
pif_sockaddr(c, &sa, &sl, tgtpif, &tgt->eaddr, tgt->eport);
if (connect(conn->s[1], sa, sl)) {
if (connect(conn->s[1], &sa.sa, sl)) {
if (errno != EINPROGRESS) {
flow_trace(conn, "Couldn't connect socket for splice: %s",
strerror(errno));
@ -491,7 +474,13 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
return false;
}
flow_target(flow, tgtpif);
/* FIXME: Record outbound source address when known */
if (af == AF_INET)
flow_target_af(flow, tgtpif, AF_INET,
NULL, 0, &in4addr_loopback, dstport);
else
flow_target_af(flow, tgtpif, AF_INET6,
NULL, 0, &in6addr_loopback, dstport);
conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice);
conn->flags = af == AF_INET ? 0 : SPLICE_V6;
@ -503,7 +492,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0);
if (tcp_splice_connect(c, conn, af, tgtpif, dstport))
if (tcp_splice_connect(c, conn))
conn_flag(c, conn, CLOSING);
FLOW_ACTIVATE(conn);