tcp: Probe net.core.{r,w}mem_max, don't set SO_{RCV,SND}BUF if low
If net.core.rmem_max and net.core.wmem_max sysctls have low values, we can get bigger buffers by not trying to set them high -- the kernel would lock their values to what we get. Try, instead, to get bigger buffers by queueing as much as possible, and if maximum values in tcp_wmem and tcp_rmem are bigger than this, that will work. While at it, drop QUICKACK option for non-spliced sockets, I set that earlier by mistake. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
e1a2e2780c
commit
683043e200
2 changed files with 26 additions and 16 deletions
34
tcp.c
34
tcp.c
|
@ -807,14 +807,17 @@ static void tcp_get_sndbuf(struct tcp_tap_conn *conn)
|
||||||
* tcp_sock_set_bufsize() - Set SO_RCVBUF and SO_SNDBUF to maximum values
|
* tcp_sock_set_bufsize() - Set SO_RCVBUF and SO_SNDBUF to maximum values
|
||||||
* @s: Socket, can be -1 to avoid check in the caller
|
* @s: Socket, can be -1 to avoid check in the caller
|
||||||
*/
|
*/
|
||||||
static void tcp_sock_set_bufsize(int s)
|
static void tcp_sock_set_bufsize(struct ctx *c, int s)
|
||||||
{
|
{
|
||||||
int v = INT_MAX / 2; /* Kernel clamps and rounds, no need to check */
|
int v = INT_MAX / 2; /* Kernel clamps and rounds, no need to check */
|
||||||
|
|
||||||
if (s == -1)
|
if (s == -1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (!c->tcp.low_rmem)
|
||||||
setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v));
|
setsockopt(s, SOL_SOCKET, SO_RCVBUF, &v, sizeof(v));
|
||||||
|
|
||||||
|
if (!c->tcp.low_wmem)
|
||||||
setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v));
|
setsockopt(s, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1308,7 +1311,8 @@ static int tcp_send_to_tap(struct ctx *c, struct tcp_tap_conn *conn, int flags,
|
||||||
else
|
else
|
||||||
mss -= sizeof(struct ipv6hdr);
|
mss -= sizeof(struct ipv6hdr);
|
||||||
|
|
||||||
if (!conn->local && !tcp_rtt_dst_low(conn))
|
if (c->tcp.low_wmem &&
|
||||||
|
!conn->local && !tcp_rtt_dst_low(conn))
|
||||||
mss = MIN(mss, PAGE_SIZE);
|
mss = MIN(mss, PAGE_SIZE);
|
||||||
else
|
else
|
||||||
mss = ROUND_DOWN(mss, PAGE_SIZE);
|
mss = ROUND_DOWN(mss, PAGE_SIZE);
|
||||||
|
@ -1571,7 +1575,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
|
||||||
if (s < 0)
|
if (s < 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
tcp_sock_set_bufsize(s);
|
tcp_sock_set_bufsize(c, s);
|
||||||
|
|
||||||
if (af == AF_INET && addr4.sin_addr.s_addr == c->gw4)
|
if (af == AF_INET && addr4.sin_addr.s_addr == c->gw4)
|
||||||
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
||||||
|
@ -2560,13 +2564,15 @@ static int tcp_splice_connect(struct ctx *c, struct tcp_splice_conn *conn,
|
||||||
.sin_addr = { .s_addr = htonl(INADDR_LOOPBACK) },
|
.sin_addr = { .s_addr = htonl(INADDR_LOOPBACK) },
|
||||||
};
|
};
|
||||||
const struct sockaddr *sa;
|
const struct sockaddr *sa;
|
||||||
|
int ret, one = 1;
|
||||||
socklen_t sl;
|
socklen_t sl;
|
||||||
int ret;
|
|
||||||
|
|
||||||
conn->to = sock_conn;
|
conn->to = sock_conn;
|
||||||
|
|
||||||
if (s <= 0)
|
if (s <= 0)
|
||||||
tcp_sock_set_bufsize(sock_conn);
|
tcp_sock_set_bufsize(c, sock_conn);
|
||||||
|
|
||||||
|
setsockopt(s, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
|
||||||
|
|
||||||
if (v6) {
|
if (v6) {
|
||||||
sa = (struct sockaddr *)&addr6;
|
sa = (struct sockaddr *)&addr6;
|
||||||
|
@ -3157,7 +3163,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port)
|
||||||
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
|
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
|
||||||
tref.u32);
|
tref.u32);
|
||||||
if (s > 0)
|
if (s > 0)
|
||||||
tcp_sock_set_bufsize(s);
|
tcp_sock_set_bufsize(c, s);
|
||||||
else
|
else
|
||||||
s = -1;
|
s = -1;
|
||||||
|
|
||||||
|
@ -3170,7 +3176,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port)
|
||||||
s = sock_l4(c, AF_INET, IPPROTO_TCP, port,
|
s = sock_l4(c, AF_INET, IPPROTO_TCP, port,
|
||||||
BIND_LOOPBACK, tref.u32);
|
BIND_LOOPBACK, tref.u32);
|
||||||
if (s > 0)
|
if (s > 0)
|
||||||
tcp_sock_set_bufsize(s);
|
tcp_sock_set_bufsize(c, s);
|
||||||
else
|
else
|
||||||
s = -1;
|
s = -1;
|
||||||
|
|
||||||
|
@ -3192,7 +3198,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port)
|
||||||
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
|
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
|
||||||
tref.u32);
|
tref.u32);
|
||||||
if (s > 0)
|
if (s > 0)
|
||||||
tcp_sock_set_bufsize(s);
|
tcp_sock_set_bufsize(c, s);
|
||||||
else
|
else
|
||||||
s = -1;
|
s = -1;
|
||||||
|
|
||||||
|
@ -3205,7 +3211,7 @@ static void tcp_sock_init_one(struct ctx *c, int ns, in_port_t port)
|
||||||
s = sock_l4(c, AF_INET6, IPPROTO_TCP, port,
|
s = sock_l4(c, AF_INET6, IPPROTO_TCP, port,
|
||||||
BIND_LOOPBACK, tref.u32);
|
BIND_LOOPBACK, tref.u32);
|
||||||
if (s > 0)
|
if (s > 0)
|
||||||
tcp_sock_set_bufsize(s);
|
tcp_sock_set_bufsize(c, s);
|
||||||
else
|
else
|
||||||
s = -1;
|
s = -1;
|
||||||
|
|
||||||
|
@ -3287,7 +3293,7 @@ struct tcp_sock_refill_arg {
|
||||||
static int tcp_sock_refill(void *arg)
|
static int tcp_sock_refill(void *arg)
|
||||||
{
|
{
|
||||||
struct tcp_sock_refill_arg *a = (struct tcp_sock_refill_arg *)arg;
|
struct tcp_sock_refill_arg *a = (struct tcp_sock_refill_arg *)arg;
|
||||||
int i, *p4, *p6, one = 1;
|
int i, *p4, *p6;
|
||||||
|
|
||||||
if (a->ns) {
|
if (a->ns) {
|
||||||
if (ns_enter(a->c->pasta_pid))
|
if (ns_enter(a->c->pasta_pid))
|
||||||
|
@ -3304,8 +3310,7 @@ static int tcp_sock_refill(void *arg)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
*p4 = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
|
*p4 = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
|
||||||
setsockopt(*p4, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
|
tcp_sock_set_bufsize(a->c, *p4);
|
||||||
tcp_sock_set_bufsize(*p4);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; a->c->v6 && i < TCP_SOCK_POOL_SIZE; i++, p6++) {
|
for (i = 0; a->c->v6 && i < TCP_SOCK_POOL_SIZE; i++, p6++) {
|
||||||
|
@ -3314,8 +3319,7 @@ static int tcp_sock_refill(void *arg)
|
||||||
}
|
}
|
||||||
*p6 = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK,
|
*p6 = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK,
|
||||||
IPPROTO_TCP);
|
IPPROTO_TCP);
|
||||||
setsockopt(*p6, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
|
tcp_sock_set_bufsize(a->c, *p6);
|
||||||
tcp_sock_set_bufsize(*p6);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -3334,6 +3338,8 @@ int tcp_sock_init(struct ctx *c, struct timespec *now)
|
||||||
|
|
||||||
getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM);
|
getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM);
|
||||||
|
|
||||||
|
tcp_probe_mem(c);
|
||||||
|
|
||||||
for (port = 0; port < USHRT_MAX; port++) {
|
for (port = 0; port < USHRT_MAX; port++) {
|
||||||
if (!bitmap_isset(c->tcp.port_to_tap, port))
|
if (!bitmap_isset(c->tcp.port_to_tap, port))
|
||||||
continue;
|
continue;
|
||||||
|
|
4
tcp.h
4
tcp.h
|
@ -51,6 +51,8 @@ union tcp_epoll_ref {
|
||||||
* @pipe_size: Size of pipes for spliced connections
|
* @pipe_size: Size of pipes for spliced connections
|
||||||
* @refill_ts: Time of last refill operation for pools of sockets/pipes
|
* @refill_ts: Time of last refill operation for pools of sockets/pipes
|
||||||
* @port_detect_ts: Time of last TCP port detection/rebind, if enabled
|
* @port_detect_ts: Time of last TCP port detection/rebind, if enabled
|
||||||
|
* @low_wmem: Low probed net.core.wmem_max
|
||||||
|
* @low_rmem: Low probed net.core.rmem_max
|
||||||
*/
|
*/
|
||||||
struct tcp_ctx {
|
struct tcp_ctx {
|
||||||
uint64_t hash_secret[2];
|
uint64_t hash_secret[2];
|
||||||
|
@ -65,6 +67,8 @@ struct tcp_ctx {
|
||||||
size_t pipe_size;
|
size_t pipe_size;
|
||||||
struct timespec refill_ts;
|
struct timespec refill_ts;
|
||||||
struct timespec port_detect_ts;
|
struct timespec port_detect_ts;
|
||||||
|
int low_wmem;
|
||||||
|
int low_rmem;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* TCP_H */
|
#endif /* TCP_H */
|
||||||
|
|
Loading…
Reference in a new issue