tcp: Check if connection is local or low RTT was seen before using large MSS
If the connection is local or the RTT was comparable to the time it takes to queue a batch of messages, we can safely use a large MSS regardless of the sending buffer, but otherwise not. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
f6bff339a9
commit
e1a2e2780c
1 changed files with 59 additions and 0 deletions
59
tcp.c
59
tcp.c
|
@ -369,6 +369,9 @@
|
||||||
|
|
||||||
#define PORT_DETECT_INTERVAL 1000
|
#define PORT_DETECT_INTERVAL 1000
|
||||||
|
|
||||||
|
#define LOW_RTT_TABLE_SIZE 8
|
||||||
|
#define LOW_RTT_THRESHOLD 5 /* us */
|
||||||
|
|
||||||
/* We need to include <linux/tcp.h> for tcpi_bytes_acked, instead of
|
/* We need to include <linux/tcp.h> for tcpi_bytes_acked, instead of
|
||||||
* <netinet/tcp.h>, but that doesn't include a definition for SOL_TCP
|
* <netinet/tcp.h>, but that doesn't include a definition for SOL_TCP
|
||||||
*/
|
*/
|
||||||
|
@ -440,6 +443,7 @@ struct tcp_tap_conn;
|
||||||
* @a.a4.a: IPv4 address
|
* @a.a4.a: IPv4 address
|
||||||
* @tap_port: Guest-facing tap port
|
* @tap_port: Guest-facing tap port
|
||||||
* @sock_port: Remote, socket-facing port
|
* @sock_port: Remote, socket-facing port
|
||||||
|
* @local: Destination is local
|
||||||
* @state: TCP connection state
|
* @state: TCP connection state
|
||||||
* @seq_to_tap: Next sequence for packets to tap
|
* @seq_to_tap: Next sequence for packets to tap
|
||||||
* @seq_ack_from_tap: Last ACK number received from tap
|
* @seq_ack_from_tap: Last ACK number received from tap
|
||||||
|
@ -476,6 +480,7 @@ struct tcp_tap_conn {
|
||||||
} a;
|
} a;
|
||||||
in_port_t tap_port;
|
in_port_t tap_port;
|
||||||
in_port_t sock_port;
|
in_port_t sock_port;
|
||||||
|
int local;
|
||||||
enum tcp_state state;
|
enum tcp_state state;
|
||||||
|
|
||||||
uint32_t seq_to_tap;
|
uint32_t seq_to_tap;
|
||||||
|
@ -536,6 +541,9 @@ static int tcp_sock_init_lo [USHRT_MAX][IP_VERSIONS];
|
||||||
static int tcp_sock_init_ext [USHRT_MAX][IP_VERSIONS];
|
static int tcp_sock_init_ext [USHRT_MAX][IP_VERSIONS];
|
||||||
static int tcp_sock_ns [USHRT_MAX][IP_VERSIONS];
|
static int tcp_sock_ns [USHRT_MAX][IP_VERSIONS];
|
||||||
|
|
||||||
|
/* Table of destinations with very low RTT (assumed to be local), LRU */
|
||||||
|
static struct in6_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tcp_remap_to_tap() - Set delta for port translation toward guest/tap
|
* tcp_remap_to_tap() - Set delta for port translation toward guest/tap
|
||||||
* @port: Original destination port, host order
|
* @port: Original destination port, host order
|
||||||
|
@ -680,6 +688,47 @@ static int init_sock_pool6 [TCP_SOCK_POOL_SIZE];
|
||||||
static int ns_sock_pool4 [TCP_SOCK_POOL_SIZE];
|
static int ns_sock_pool4 [TCP_SOCK_POOL_SIZE];
|
||||||
static int ns_sock_pool6 [TCP_SOCK_POOL_SIZE];
|
static int ns_sock_pool6 [TCP_SOCK_POOL_SIZE];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* tcp_rtt_dst_low() - Check if low RTT was seen for connection endpoint
|
||||||
|
* @conn: Connection pointer
|
||||||
|
* Return: 1 if destination is in low RTT table, 0 otherwise
|
||||||
|
*/
|
||||||
|
static int tcp_rtt_dst_low(struct tcp_tap_conn *conn)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < LOW_RTT_TABLE_SIZE; i++)
|
||||||
|
if (!memcmp(&conn->a.a6, low_rtt_dst + i, sizeof(conn->a.a6)))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* tcp_rtt_dst_check() - Check tcpi_min_rtt, insert endpoint in table if low
|
||||||
|
* @conn: Connection pointer
|
||||||
|
* @info: Pointer to struct tcp_info for socket
|
||||||
|
*/
|
||||||
|
static void tcp_rtt_dst_check(struct tcp_tap_conn *conn, struct tcp_info *info)
|
||||||
|
{
|
||||||
|
int i, hole = -1;
|
||||||
|
|
||||||
|
if (!info->tcpi_min_rtt || (int)info->tcpi_min_rtt > LOW_RTT_THRESHOLD)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) {
|
||||||
|
if (!memcmp(&conn->a.a6, low_rtt_dst + i, sizeof(conn->a.a6)))
|
||||||
|
return;
|
||||||
|
if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i))
|
||||||
|
hole = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(low_rtt_dst + hole++, &conn->a.a6, sizeof(conn->a.a6));
|
||||||
|
if (hole == LOW_RTT_TABLE_SIZE)
|
||||||
|
hole = 0;
|
||||||
|
memcpy(low_rtt_dst + hole, &in6addr_any, sizeof(conn->a.a6));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tcp_tap_state() - Set given TCP state for tap connection, report to stderr
|
* tcp_tap_state() - Set given TCP state for tap connection, report to stderr
|
||||||
* @conn: Connection pointer
|
* @conn: Connection pointer
|
||||||
|
@ -1258,6 +1307,11 @@ static int tcp_send_to_tap(struct ctx *c, struct tcp_tap_conn *conn, int flags,
|
||||||
mss -= sizeof(struct iphdr);
|
mss -= sizeof(struct iphdr);
|
||||||
else
|
else
|
||||||
mss -= sizeof(struct ipv6hdr);
|
mss -= sizeof(struct ipv6hdr);
|
||||||
|
|
||||||
|
if (!conn->local && !tcp_rtt_dst_low(conn))
|
||||||
|
mss = MIN(mss, PAGE_SIZE);
|
||||||
|
else
|
||||||
|
mss = ROUND_DOWN(mss, PAGE_SIZE);
|
||||||
}
|
}
|
||||||
*(uint16_t *)data = htons(mss);
|
*(uint16_t *)data = htons(mss);
|
||||||
|
|
||||||
|
@ -1588,6 +1642,11 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
|
||||||
|
|
||||||
tcp_hash_insert(c, conn, af, addr);
|
tcp_hash_insert(c, conn, af, addr);
|
||||||
|
|
||||||
|
if (!bind(s, sa, sl))
|
||||||
|
tcp_rst(c, conn); /* Nobody is listening then */
|
||||||
|
if (errno != EADDRNOTAVAIL)
|
||||||
|
conn->local = 1;
|
||||||
|
|
||||||
if (connect(s, sa, sl)) {
|
if (connect(s, sa, sl)) {
|
||||||
tcp_tap_state(conn, TAP_SYN_SENT);
|
tcp_tap_state(conn, TAP_SYN_SENT);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue