flow: Clarify flow entry life cycle, introduce uniform logging

Our allocation scheme for flow entries means there are some
non-obvious constraints on when what things can be done with an entry.
Add a big doc comment explaining the life cycle.

In addition, make a FLOW_START() macro to mark one of the important
transitions.  This encourages correct usage, by making it natural to
only access the flow type specific structure after calling it.  It
also logs that a new flow has been created, which is useful for
debugging.

We also add logging when a flow's lifecycle ends.  This doesn't need a
new helper, because it can only happen either from flow_alloc_cancel()
or from the flow deferred handler.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2024-02-28 22:25:10 +11:00 committed by Stefano Brivio
parent d0550f97cd
commit 0f938c3b9a
5 changed files with 95 additions and 18 deletions

77
flow.c
View file

@ -34,6 +34,46 @@ static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
/* Global Flow Table */ /* Global Flow Table */
/**
* DOC: Theory of Operation - flow entry life cycle
*
* An individual flow table entry moves through these logical states, usually in
* this order.
*
* FREE - Part of the general pool of free flow table entries
* Operations:
* - flow_alloc() finds an entry and moves it to ALLOC state
*
* ALLOC - A tentatively allocated entry
* Operations:
* - flow_alloc_cancel() returns the entry to FREE state
* - FLOW_START() set the entry's type and moves to START state
* Caveats:
* - It's not safe to write fields in the flow entry
* - It's not safe to allocate further entries with flow_alloc()
* - It's not safe to return to the main epoll loop (use FLOW_START()
* to move to START state before doing so)
* - It's not safe to use flow_*() logging functions
*
* START - An entry being prepared by flow type specific code
* Operations:
* - Flow type specific fields may be accessed
* - flow_*() logging functions
* - flow_alloc_cancel() returns the entry to FREE state
* Caveats:
* - Returning to the main epoll loop or allocating another entry
* with flow_alloc() implicitly moves the entry to ACTIVE state.
*
* ACTIVE - An active flow entry managed by flow type specific code
* Operations:
* - Flow type specific fields may be accessed
* - flow_*() logging functions
* - Flow may be expired by returning 'true' from flow type specific
* deferred or timer handler. This will return it to FREE state.
* Caveats:
* - It's not safe to call flow_alloc_cancel()
*/
/** /**
* DOC: Theory of Operation - allocating and freeing flow entries * DOC: Theory of Operation - allocating and freeing flow entries
* *
@ -109,6 +149,39 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
logmsg(pri, "Flow %u (%s): %s", flow_idx(f), FLOW_TYPE(f), msg); logmsg(pri, "Flow %u (%s): %s", flow_idx(f), FLOW_TYPE(f), msg);
} }
/**
* flow_start() - Set flow type for new flow and log
* @flow: Flow to set type for
* @type: Type for new flow
* @iniside: Which side initiated the new flow
*
* Return: @flow
*
* Should be called before setting any flow type specific fields in the flow
* table entry.
*/
union flow *flow_start(union flow *flow, enum flow_type type,
unsigned iniside)
{
(void)iniside;
flow->f.type = type;
flow_dbg(flow, "START %s", flow_type_str[flow->f.type]);
return flow;
}
/**
* flow_end() - Clear flow type for finished flow and log
* @flow: Flow to clear
*/
static void flow_end(union flow *flow)
{
if (flow->f.type == FLOW_TYPE_NONE)
return; /* Nothing to do */
flow_dbg(flow, "END %s", flow_type_str[flow->f.type]);
flow->f.type = FLOW_TYPE_NONE;
}
/** /**
* flow_alloc() - Allocate a new flow * flow_alloc() - Allocate a new flow
* *
@ -157,7 +230,7 @@ void flow_alloc_cancel(union flow *flow)
{ {
ASSERT(flow_first_free > FLOW_IDX(flow)); ASSERT(flow_first_free > FLOW_IDX(flow));
flow->f.type = FLOW_TYPE_NONE; flow_end(flow);
/* Put it back in a length 1 free cluster, don't attempt to fully /* Put it back in a length 1 free cluster, don't attempt to fully
* reverse flow_alloc()s steps. This will get folded together the next * reverse flow_alloc()s steps. This will get folded together the next
* time flow_defer_handler runs anyway() */ * time flow_defer_handler runs anyway() */
@ -227,7 +300,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
} }
if (closed) { if (closed) {
flow->f.type = FLOW_TYPE_NONE; flow_end(flow);
if (free_head) { if (free_head) {
/* Add slot to current free cluster */ /* Add slot to current free cluster */

5
flow.h
View file

@ -45,6 +45,11 @@ struct flow_common {
#define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */ #define FLOW_TABLE_PRESSURE 30 /* % of FLOW_MAX */
#define FLOW_FILE_PRESSURE 30 /* % of c->nofile */ #define FLOW_FILE_PRESSURE 30 /* % of c->nofile */
union flow *flow_start(union flow *flow, enum flow_type type,
unsigned iniside);
#define FLOW_START(flow_, t_, var_, i_) \
(&flow_start((flow_), (t_), (i_))->var_)
/** /**
* struct flow_sidx - ID for one side of a specific flow * struct flow_sidx - ID for one side of a specific flow
* @side: Side referenced (0 or 1) * @side: Side referenced (0 or 1)

15
tcp.c
View file

@ -1976,8 +1976,7 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
goto cancel; goto cancel;
} }
conn = &flow->tcp; conn = FLOW_START(flow, FLOW_TCP, tcp, TAPSIDE);
conn->f.type = FLOW_TCP;
conn->sock = s; conn->sock = s;
conn->timer = -1; conn->timer = -1;
conn_event(c, conn, TAP_SYN_RCVD); conn_event(c, conn, TAP_SYN_RCVD);
@ -2681,18 +2680,19 @@ static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr)
* tcp_tap_conn_from_sock() - Initialize state for non-spliced connection * tcp_tap_conn_from_sock() - Initialize state for non-spliced connection
* @c: Execution context * @c: Execution context
* @ref: epoll reference of listening socket * @ref: epoll reference of listening socket
* @conn: connection structure to initialize * @flow: flow to initialise
* @s: Accepted socket * @s: Accepted socket
* @sa: Peer socket address (from accept()) * @sa: Peer socket address (from accept())
* @now: Current timestamp * @now: Current timestamp
*/ */
static void tcp_tap_conn_from_sock(struct ctx *c, static void tcp_tap_conn_from_sock(struct ctx *c,
union tcp_listen_epoll_ref ref, union tcp_listen_epoll_ref ref,
struct tcp_tap_conn *conn, int s, union flow *flow, int s,
const union sockaddr_inany *sa, const union sockaddr_inany *sa,
const struct timespec *now) const struct timespec *now)
{ {
conn->f.type = FLOW_TCP; struct tcp_tap_conn *conn = FLOW_START(flow, FLOW_TCP, tcp, SOCKSIDE);
conn->sock = s; conn->sock = s;
conn->timer = -1; conn->timer = -1;
conn->ws_to_tap = conn->ws_from_tap = 0; conn->ws_to_tap = conn->ws_from_tap = 0;
@ -2738,11 +2738,10 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
goto cancel; goto cancel;
if (c->mode == MODE_PASTA && if (c->mode == MODE_PASTA &&
tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice, tcp_splice_conn_from_sock(c, ref.tcp_listen, flow, s, &sa))
s, &sa))
return; return;
tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s, &sa, now); tcp_tap_conn_from_sock(c, ref.tcp_listen, flow, s, &sa, now);
return; return;
cancel: cancel:

View file

@ -432,7 +432,7 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
* tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection * tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection
* @c: Execution context * @c: Execution context
* @ref: epoll reference of listening socket * @ref: epoll reference of listening socket
* @conn: connection structure to initialize * @flow: flow to initialise
* @s: Accepted socket * @s: Accepted socket
* @sa: Peer address of connection * @sa: Peer address of connection
* *
@ -440,10 +440,10 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
* #syscalls:pasta setsockopt * #syscalls:pasta setsockopt
*/ */
bool tcp_splice_conn_from_sock(const struct ctx *c, bool tcp_splice_conn_from_sock(const struct ctx *c,
union tcp_listen_epoll_ref ref, union tcp_listen_epoll_ref ref, union flow *flow,
struct tcp_splice_conn *conn, int s, int s, const union sockaddr_inany *sa)
const union sockaddr_inany *sa)
{ {
struct tcp_splice_conn *conn;
union inany_addr aany; union inany_addr aany;
in_port_t port; in_port_t port;
@ -453,7 +453,8 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
if (!inany_is_loopback(&aany)) if (!inany_is_loopback(&aany))
return false; return false;
conn->f.type = FLOW_TCP_SPLICE; conn = FLOW_START(flow, FLOW_TCP_SPLICE, tcp_splice, 0);
conn->flags = inany_v4(&aany) ? 0 : SPLICE_V6; conn->flags = inany_v4(&aany) ? 0 : SPLICE_V6;
conn->s[0] = s; conn->s[0] = s;
conn->s[1] = -1; conn->s[1] = -1;

View file

@ -12,9 +12,8 @@ union sockaddr_inany;
void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref, void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
uint32_t events); uint32_t events);
bool tcp_splice_conn_from_sock(const struct ctx *c, bool tcp_splice_conn_from_sock(const struct ctx *c,
union tcp_listen_epoll_ref ref, union tcp_listen_epoll_ref ref, union flow *flow,
struct tcp_splice_conn *conn, int s, int s, const union sockaddr_inany *sa);
const union sockaddr_inany *sa);
void tcp_splice_init(struct ctx *c); void tcp_splice_init(struct ctx *c);
#endif /* TCP_SPLICE_H */ #endif /* TCP_SPLICE_H */