tcp: unify payload and flags l2 frames array

In order to reduce static memory and code footprint, we merge
the array for l2 flag frames into the one for payload frames.

This change also ensures that no flag message will be sent out
over the l2 media bypassing already queued payload messages.

Performance measurements with iperf3, where we force all
traffic via the tap queue, show no significant difference:

Dual traffic both directions sinmultaneously, with patch:
========================================================
host->ns:
--------
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-100.00 sec  36.3 GBytes  3.12 Gbits/sec  4759       sender
[  5]   0.00-100.04 sec  36.3 GBytes  3.11 Gbits/sec             receiver

ns->host:
---------
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-100.00 sec   321 GBytes  27.6 Gbits/sec            receiver

Dual traffic both directions sinmultaneously, without patch:
============================================================
host->ns:
--------
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-100.00 sec  35.0 GBytes  3.01 Gbits/sec  6001       sender
[  5]   0.00-100.04 sec  34.8 GBytes  2.99 Gbits/sec            receiver

ns->host
--------
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-100.00 sec   345 GBytes  29.6 Gbits/sec            receiver

Single connection, with patch:
==============================
host->ns:
---------
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-100.00 sec   138 GBytes  11.8 Gbits/sec  922       sender
[  5]   0.00-100.04 sec   138 GBytes  11.8 Gbits/sec            receiver

ns->host:
-----------
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-100.00 sec   430 GBytes  36.9 Gbits/sec            receiver

Single connection, without patch:
=================================
host->ns:
------------
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-100.00 sec   139 GBytes  11.9 Gbits/sec  900       sender
[  5]   0.00-100.04 sec   139 GBytes  11.9 Gbits/sec            receiver

ns->host:
---------
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-100.00 sec   440 GBytes  37.8 Gbits/sec            receiver

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
Jon Maloy 2024-11-05 20:07:44 -05:00 committed by Stefano Brivio
parent 9a0e544f05
commit 78da088f7b
4 changed files with 17 additions and 70 deletions

1
tcp.c
View file

@ -937,7 +937,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn)
/* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */ /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
void tcp_defer_handler(struct ctx *c) void tcp_defer_handler(struct ctx *c)
{ {
tcp_flags_flush(c);
tcp_payload_flush(c); tcp_payload_flush(c);
} }

View file

@ -20,7 +20,7 @@
#include <netinet/ip.h> #include <netinet/ip.h>
#include <linux/tcp.h> #include <netinet/tcp.h>
#include "util.h" #include "util.h"
#include "ip.h" #include "ip.h"
@ -59,22 +59,10 @@ static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516")
static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM]; static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
static unsigned int tcp_payload_used; static unsigned int tcp_payload_used;
static struct tap_hdr tcp_flags_tap_hdr[TCP_FRAMES_MEM];
/* IPv4 headers for TCP segment without payload */
static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
/* TCP segments without payload for IPv4 frames */
static struct tcp_flags_t tcp_flags[TCP_FRAMES_MEM];
static unsigned int tcp_flags_used;
/* IPv6 headers for TCP segment without payload */
static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
/* recvmsg()/sendmsg() data for tap */ /* recvmsg()/sendmsg() data for tap */
static struct iovec iov_sock [TCP_FRAMES_MEM + 1]; static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS]; static struct iovec tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
static struct iovec tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
/** /**
* tcp_update_l2_buf() - Update Ethernet header buffers with addresses * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
@ -103,15 +91,6 @@ void tcp_sock_iov_init(const struct ctx *c)
for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) { for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) {
tcp6_payload_ip[i] = ip6; tcp6_payload_ip[i] = ip6;
tcp4_payload_ip[i] = iph; tcp4_payload_ip[i] = iph;
tcp_payload[i].th.doff = sizeof(struct tcphdr) / 4;
tcp_payload[i].th.ack = 1;
}
for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
tcp6_flags_ip[i] = ip6;
tcp4_flags_ip[i] = iph;
tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
tcp_flags[i].th.ack = 1;
} }
for (i = 0; i < TCP_FRAMES_MEM; i++) { for (i = 0; i < TCP_FRAMES_MEM; i++) {
@ -121,25 +100,6 @@ void tcp_sock_iov_init(const struct ctx *c)
iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr); iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i]; iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
} }
for (i = 0; i < TCP_FRAMES_MEM; i++) {
struct iovec *iov = tcp_l2_flags_iov[i];
iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
}
}
/**
* tcp_flags_flush() - Send out buffers for segments with no data (flags)
* @c: Execution context
*/
void tcp_flags_flush(const struct ctx *c)
{
tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
tcp_flags_used);
tcp_flags_used = 0;
} }
/** /**
@ -171,7 +131,7 @@ static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns,
} }
/** /**
* tcp_payload_flush() - Send out buffers for segments with data * tcp_payload_flush() - Send out buffers for segments with data or flags
* @c: Execution context * @c: Execution context
*/ */
void tcp_payload_flush(const struct ctx *c) void tcp_payload_flush(const struct ctx *c)
@ -197,37 +157,35 @@ void tcp_payload_flush(const struct ctx *c)
*/ */
int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
{ {
struct tcp_flags_t *payload; struct tcp_payload_t *payload;
struct iovec *iov; struct iovec *iov;
size_t optlen; size_t optlen;
size_t l4len; size_t l4len;
uint32_t seq; uint32_t seq;
int ret; int ret;
iov = tcp_l2_flags_iov[tcp_flags_used]; iov = tcp_l2_iov[tcp_payload_used];
if (CONN_V4(conn)) { if (CONN_V4(conn)) {
iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[tcp_flags_used]); iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src; iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
} else { } else {
iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[tcp_flags_used]); iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
} }
payload = iov[TCP_IOV_PAYLOAD].iov_base; payload = iov[TCP_IOV_PAYLOAD].iov_base;
seq = conn->seq_to_tap; seq = conn->seq_to_tap;
ret = tcp_prepare_flags(c, conn, flags, &payload->th, ret = tcp_prepare_flags(c, conn, flags, &payload->th,
&payload->opts, &optlen); (struct tcp_syn_opts *)&payload->data, &optlen);
if (ret <= 0) if (ret <= 0)
return ret; return ret;
tcp_flags_used++; tcp_payload_used++;
l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false); l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
iov[TCP_IOV_PAYLOAD].iov_len = l4len; iov[TCP_IOV_PAYLOAD].iov_len = l4len;
if (flags & DUP_ACK) { if (flags & DUP_ACK) {
struct iovec *dup_iov; struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base, memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
iov[TCP_IOV_TAP].iov_len); iov[TCP_IOV_TAP].iov_len);
dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base; dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
@ -237,8 +195,8 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len; dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
} }
if (tcp_flags_used > TCP_FRAMES_MEM - 2) if (tcp_payload_used > TCP_FRAMES_MEM - 2)
tcp_flags_flush(c); tcp_payload_flush(c);
return 0; return 0;
} }
@ -254,6 +212,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
ssize_t dlen, int no_csum, uint32_t seq) ssize_t dlen, int no_csum, uint32_t seq)
{ {
struct tcp_payload_t *payload;
const uint16_t *check = NULL; const uint16_t *check = NULL;
struct iovec *iov; struct iovec *iov;
size_t l4len; size_t l4len;
@ -274,6 +233,11 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]); iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
} }
payload = iov[TCP_IOV_PAYLOAD].iov_base;
payload->th.th_off = sizeof(struct tcphdr) / 4;
payload->th.th_x2 = 0;
payload->th.th_flags = 0;
payload->th.ack = 1;
l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false); l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
iov[TCP_IOV_PAYLOAD].iov_len = l4len; iov[TCP_IOV_PAYLOAD].iov_len = l4len;
if (++tcp_payload_used > TCP_FRAMES_MEM - 1) if (++tcp_payload_used > TCP_FRAMES_MEM - 1)

View file

@ -7,7 +7,6 @@
#define TCP_BUF_H #define TCP_BUF_H
void tcp_sock_iov_init(const struct ctx *c); void tcp_sock_iov_init(const struct ctx *c);
void tcp_flags_flush(const struct ctx *c);
void tcp_payload_flush(const struct ctx *c); void tcp_payload_flush(const struct ctx *c);
int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn); int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn);
int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags); int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags);

View file

@ -134,21 +134,6 @@ struct tcp_syn_opts {
.ws = TCP_OPT_WS(ws_), \ .ws = TCP_OPT_WS(ws_), \
}) })
/**
* struct tcp_flags_t - TCP header and data to send zero-length
* segments (flags)
* @th: TCP header
* @opts TCP options
*/
struct tcp_flags_t {
struct tcphdr th;
struct tcp_syn_opts opts;
#ifdef __AVX2__
} __attribute__ ((packed, aligned(32)));
#else
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
extern char tcp_buf_discard [MAX_WINDOW]; extern char tcp_buf_discard [MAX_WINDOW];
void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn, void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,