904b86ade7
This introduces a number of fundamental changes that would be quite messy to split. Summary: - advertised window scaling can be as big as we want, we just need to clamp window sizes to avoid exceeding the size of our "discard" buffer for unacknowledged data from socket - add macros to compare sequence numbers - force sending ACK to guest/tap on PSH segments, always in pasta mode, whenever we see an overlapping segment, or when we reach a given threshold compared to our window - we don't actually use recvmmsg() here, fix comments and label - introduce pools for pre-opened sockets and pipes, to decrease latency on new connections - set receiving and sending buffer sizes to the maximum allowed, kernel will clamp and round appropriately - defer clean-up of spliced and non-spliced connection to timer - in tcp_send_to_tap(), there's no need anymore to keep a large buffer, shrink it down to what we actually need - introduce SO_RCVLOWAT setting and activity tracking for spliced connections, to coalesce data moved by splice() calls as much as possible - as we now have a compacted connection table, there's no need to keep sparse bitmaps tracking connection activity -- simply go through active connections with a loop in the timer handler - always clamp the advertised window to half our sending buffer, too, to minimise retransmissions from the guest/tap - set TCP_QUICKACK for originating socket in spliced connections, there's no need to delay them - fix up timeout for unacknowledged data from socket Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
64 lines
2.1 KiB
C
64 lines
2.1 KiB
C
#ifndef TCP_H
|
|
#define TCP_H
|
|
|
|
#define TCP_TIMER_INTERVAL 20 /* ms */
|
|
|
|
#define TCP_MAX_CONNS (128 * 1024)
|
|
#define TCP_MAX_SOCKS (TCP_MAX_CONNS + USHRT_MAX * 2)
|
|
|
|
struct ctx;
|
|
|
|
void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
|
|
struct timespec *now);
|
|
int tcp_tap_handler(struct ctx *c, int af, void *addr,
|
|
struct tap_l4_msg *msg, int count, struct timespec *now);
|
|
int tcp_sock_init(struct ctx *c, struct timespec *now);
|
|
void tcp_timer(struct ctx *c, struct timespec *ts);
|
|
void tcp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
|
|
uint32_t *ip_da);
|
|
void tcp_remap_to_tap(in_port_t port, in_port_t delta);
|
|
void tcp_remap_to_init(in_port_t port, in_port_t delta);
|
|
|
|
/**
|
|
* union tcp_epoll_ref - epoll reference portion for TCP connections
|
|
* @listen: Set if this file descriptor is a listening socket
|
|
* @splice: Set if descriptor is associated to a spliced connection
|
|
* @v6: Set for IPv6 sockets or connections
|
|
* @index: Index of connection in table, or port for bound sockets
|
|
* @u32: Opaque u32 value of reference
|
|
*/
|
|
union tcp_epoll_ref {
|
|
struct {
|
|
uint32_t listen:1,
|
|
splice:1,
|
|
v6:1,
|
|
index:20;
|
|
};
|
|
uint32_t u32;
|
|
};
|
|
|
|
/**
|
|
* struct tcp_ctx - Execution context for TCP routines
|
|
* @hash_secret: 128-bit secret for hash functions, ISN and hash table
|
|
* @tap_conn_count: Count of tap connections in connection table
|
|
* @splice_conn_count: Count of spliced connections in connection table
|
|
* @port_to_tap: Ports bound host-side, packets to tap or spliced
|
|
* @port_to_init: Ports bound namespace-side, spliced to init
|
|
* @timer_run: Timestamp of most recent timer run
|
|
* @kernel_snd_wnd: Kernel reports sending window (with commit 8f7baad7f035)
|
|
* @pipe_size: Size of pipes for spliced connections
|
|
* @refill_ts: Time of last refill operation for pools of sockets/pipes
|
|
*/
|
|
struct tcp_ctx {
|
|
uint64_t hash_secret[2];
|
|
int tap_conn_count;
|
|
int splice_conn_count;
|
|
uint8_t port_to_tap [USHRT_MAX / 8];
|
|
uint8_t port_to_init [USHRT_MAX / 8];
|
|
struct timespec timer_run;
|
|
int kernel_snd_wnd;
|
|
size_t pipe_size;
|
|
struct timespec refill_ts;
|
|
};
|
|
|
|
#endif /* TCP_H */
|