tcp, tap: Turn tcp_probe_mem() into sock_probe_mem(), use for AF_UNIX socket too

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
Stefano Brivio 2021-10-05 19:27:04 +02:00
parent eef4e82903
commit d4d61480b6
7 changed files with 49 additions and 40 deletions

View file

@ -374,6 +374,7 @@ int main(int argc, char **argv)
perror("setrlimit"); perror("setrlimit");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
sock_probe_mem(&c);
proto_update_l2_buf(c.mac_guest, c.mac, &c.addr4); proto_update_l2_buf(c.mac_guest, c.mac, &c.addr4);

View file

@ -126,6 +126,8 @@ enum passt_modes {
* @no_dhcpv6: Disable DHCPv6 server * @no_dhcpv6: Disable DHCPv6 server
* @no_ndp: Disable NDP handler altogether * @no_ndp: Disable NDP handler altogether
* @no_ra: Disable router advertisements * @no_ra: Disable router advertisements
* @low_wmem: Low probed net.core.wmem_max
* @low_rmem: Low probed net.core.rmem_max
*/ */
struct ctx { struct ctx {
enum passt_modes mode; enum passt_modes mode;
@ -177,6 +179,9 @@ struct ctx {
int no_dhcpv6; int no_dhcpv6;
int no_ndp; int no_ndp;
int no_ra; int no_ra;
int low_wmem;
int low_rmem;
}; };
void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s, void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,

8
tap.c
View file

@ -773,7 +773,7 @@ static void tap_sock_init_unix(struct ctx *c)
struct sockaddr_un addr = { struct sockaddr_un addr = {
.sun_family = AF_UNIX, .sun_family = AF_UNIX,
}; };
int i, ret; int i, ret, v = INT_MAX / 2;
if (c->fd_tap_listen) if (c->fd_tap_listen)
close(c->fd_tap_listen); close(c->fd_tap_listen);
@ -833,6 +833,12 @@ static void tap_sock_init_unix(struct ctx *c)
addr.sun_path); addr.sun_path);
c->fd_tap = accept(fd, NULL, NULL); c->fd_tap = accept(fd, NULL, NULL);
if (!c->low_rmem)
setsockopt(c->fd_tap, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v));
if (!c->low_wmem)
setsockopt(c->fd_tap, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v));
} }
static int tun_ns_fd = -1; static int tun_ns_fd = -1;

38
tcp.c
View file

@ -341,9 +341,6 @@
#define TCP_TAP_FRAMES 32 #define TCP_TAP_FRAMES 32
#define RCVBUF_BIG (2 * 1024 * 1024)
#define SNDBUF_BIG (2 * 1024 * 1024)
#define SNDBUF_SMALL (128 * 1024)
#define MAX_PIPE_SIZE (2 * 1024 * 1024) #define MAX_PIPE_SIZE (2 * 1024 * 1024)
#define TCP_HASH_TABLE_LOAD 70 /* % */ #define TCP_HASH_TABLE_LOAD 70 /* % */
@ -753,33 +750,6 @@ static void tcp_splice_state(struct tcp_splice_conn *conn, enum tcp_state state)
conn->state = state; conn->state = state;
} }
/**
* tcp_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed
* @c: Execution context
*/
static void tcp_probe_mem(struct ctx *c)
{
int v = INT_MAX / 2, s;
socklen_t sl;
if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
c->tcp.low_wmem = c->tcp.low_rmem = 1;
return;
}
sl = sizeof(v);
if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)) ||
getsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, &sl) || v < SNDBUF_BIG)
c->tcp.low_wmem = 1;
v = INT_MAX / 2;
if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)) ||
getsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, &sl) || v < RCVBUF_BIG)
c->tcp.low_rmem = 1;
close(s);
}
/** /**
* tcp_get_sndbuf() - Get, scale SO_SNDBUF between thresholds (1 to 0.5 usage) * tcp_get_sndbuf() - Get, scale SO_SNDBUF between thresholds (1 to 0.5 usage)
* @conn: Connection pointer * @conn: Connection pointer
@ -814,10 +784,10 @@ static void tcp_sock_set_bufsize(struct ctx *c, int s)
if (s == -1) if (s == -1)
return; return;
if (!c->tcp.low_rmem) if (!c->low_rmem)
setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)); setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v));
if (!c->tcp.low_wmem) if (!c->low_wmem)
setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)); setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v));
} }
@ -1325,7 +1295,7 @@ static int tcp_send_to_tap(struct ctx *c, struct tcp_tap_conn *conn, int flags,
else else
mss -= sizeof(struct ipv6hdr); mss -= sizeof(struct ipv6hdr);
if (c->tcp.low_wmem && if (c->low_wmem &&
!conn->local && !tcp_rtt_dst_low(conn)) !conn->local && !tcp_rtt_dst_low(conn))
mss = MIN(mss, PAGE_SIZE); mss = MIN(mss, PAGE_SIZE);
else else
@ -3342,8 +3312,6 @@ int tcp_sock_init(struct ctx *c, struct timespec *now)
getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM); getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM);
tcp_probe_mem(c);
for (port = 0; port < USHRT_MAX; port++) { for (port = 0; port < USHRT_MAX; port++) {
if (!bitmap_isset(c->tcp.port_to_tap, port)) if (!bitmap_isset(c->tcp.port_to_tap, port))
continue; continue;

4
tcp.h
View file

@ -51,8 +51,6 @@ union tcp_epoll_ref {
* @pipe_size: Size of pipes for spliced connections * @pipe_size: Size of pipes for spliced connections
* @refill_ts: Time of last refill operation for pools of sockets/pipes * @refill_ts: Time of last refill operation for pools of sockets/pipes
* @port_detect_ts: Time of last TCP port detection/rebind, if enabled * @port_detect_ts: Time of last TCP port detection/rebind, if enabled
* @low_wmem: Low probed net.core.wmem_max
* @low_rmem: Low probed net.core.rmem_max
*/ */
struct tcp_ctx { struct tcp_ctx {
uint64_t hash_secret[2]; uint64_t hash_secret[2];
@ -67,8 +65,6 @@ struct tcp_ctx {
size_t pipe_size; size_t pipe_size;
struct timespec refill_ts; struct timespec refill_ts;
struct timespec port_detect_ts; struct timespec port_detect_ts;
int low_wmem;
int low_rmem;
}; };
#endif /* TCP_H */ #endif /* TCP_H */

28
util.c
View file

@ -212,6 +212,34 @@ int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port,
return fd; return fd;
} }
/**
* sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed
* @c: Execution context
*/
void sock_probe_mem(struct ctx *c)
{
int v = INT_MAX / 2, s;
socklen_t sl;
if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
c->low_wmem = c->low_rmem = 1;
return;
}
sl = sizeof(v);
if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)) ||
getsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, &sl) || v < SNDBUF_BIG)
c->low_wmem = 1;
v = INT_MAX / 2;
if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)) ||
getsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, &sl) || v < RCVBUF_BIG)
c->low_rmem = 1;
close(s);
}
/** /**
* timespec_diff_ms() - Report difference in milliseconds between two timestamps * timespec_diff_ms() - Report difference in milliseconds between two timestamps
* @a: Minuend timestamp * @a: Minuend timestamp

5
util.h
View file

@ -116,6 +116,10 @@ void debug(const char *format, ...);
.daddr = IN6ADDR_ANY_INIT, \ .daddr = IN6ADDR_ANY_INIT, \
} }
#define RCVBUF_BIG (2 * 1024 * 1024)
#define SNDBUF_BIG (4 * 1024 * 1024)
#define SNDBUF_SMALL (128 * 1024)
#include <linux/ipv6.h> #include <linux/ipv6.h>
#include <net/if.h> #include <net/if.h>
#include <linux/ip.h> #include <linux/ip.h>
@ -133,6 +137,7 @@ struct ctx;
char *ipv6_l4hdr(struct ipv6hdr *ip6h, uint8_t *proto); char *ipv6_l4hdr(struct ipv6hdr *ip6h, uint8_t *proto);
int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port,
enum bind_type bind_addr, uint32_t data); enum bind_type bind_addr, uint32_t data);
void sock_probe_mem(struct ctx *c);
int timespec_diff_ms(struct timespec *a, struct timespec *b); int timespec_diff_ms(struct timespec *a, struct timespec *b);
void bitmap_set(uint8_t *map, int bit); void bitmap_set(uint8_t *map, int bit);
void bitmap_clear(uint8_t *map, int bit); void bitmap_clear(uint8_t *map, int bit);