tcp_splice: Don't pool pipes in pairs

To reduce latencies, the tcp splice code maintains a pool of pre-opened
pipes to use for new connections.  This is structured as an array of pairs
of pipes, with each pipe, of course, being a pair of fds.  Thus when we
use the pool, a single pool "slot" provides both the a->b and b->a pipes.

There's no strong reason to store the pool in pairs, though - we can
with not much difficulty instead take the a->b and b->a pipes for a new
connection independently from separate slots in the pool, or even take one
from the the pool and create the other as we need it, if there's only one
pipe left in the pool.

This marginally increases the length of code, but simplifies the structure
of the pipe pool.  We should be able to re-shrink the code with later
changes, too.

In the process we also fix some minor bugs:
- If we both failed to find a pipe in the pool and to create a new one, we
  didn't log an error and would silently drop the connection.  That could
  make debugging such a situation difficult.  Add in an error message for
  that case
- When refilling the pool, if we were only able to open a single pipe in
  the pair, we attempted to rollback, but instead of closing the opened
  pipe, we instead closed the pipe we failed to open (probably leading to
  some ignored EBADFD errors).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2023-11-07 13:42:45 +11:00 committed by Stefano Brivio
parent 6357010cab
commit 0e8e534850

View file

@ -58,7 +58,7 @@
#include "tcp_conn.h" #include "tcp_conn.h"
#define MAX_PIPE_SIZE (8UL * 1024 * 1024) #define MAX_PIPE_SIZE (8UL * 1024 * 1024)
#define TCP_SPLICE_PIPE_POOL_SIZE 16 #define TCP_SPLICE_PIPE_POOL_SIZE 32
#define TCP_SPLICE_CONN_PRESSURE 30 /* % of conn_count */ #define TCP_SPLICE_CONN_PRESSURE 30 /* % of conn_count */
#define TCP_SPLICE_FILE_PRESSURE 30 /* % of c->nofile */ #define TCP_SPLICE_FILE_PRESSURE 30 /* % of c->nofile */
@ -69,7 +69,7 @@ static int ns_sock_pool4 [TCP_SOCK_POOL_SIZE];
static int ns_sock_pool6 [TCP_SOCK_POOL_SIZE]; static int ns_sock_pool6 [TCP_SOCK_POOL_SIZE];
/* Pool of pre-opened pipes */ /* Pool of pre-opened pipes */
static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2][2]; static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2];
#define CONN_V6(x) (x->flags & SPLICE_V6) #define CONN_V6(x) (x->flags & SPLICE_V6)
#define CONN_V4(x) (!CONN_V6(x)) #define CONN_V4(x) (!CONN_V6(x))
@ -307,19 +307,16 @@ static int tcp_splice_connect_finish(const struct ctx *c,
conn->pipe_a_b[1] = conn->pipe_b_a[1] = -1; conn->pipe_a_b[1] = conn->pipe_b_a[1] = -1;
for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) { for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
if (splice_pipe_pool[i][0][0] >= 0) { if (splice_pipe_pool[i][0] >= 0) {
SWAP(conn->pipe_a_b[0], splice_pipe_pool[i][0][0]); SWAP(conn->pipe_a_b[0], splice_pipe_pool[i][0]);
SWAP(conn->pipe_a_b[1], splice_pipe_pool[i][0][1]); SWAP(conn->pipe_a_b[1], splice_pipe_pool[i][1]);
SWAP(conn->pipe_b_a[0], splice_pipe_pool[i][1][0]);
SWAP(conn->pipe_b_a[1], splice_pipe_pool[i][1][1]);
break; break;
} }
} }
if (conn->pipe_a_b[0] < 0) { if (conn->pipe_a_b[0] < 0) {
if (pipe2(conn->pipe_a_b, O_NONBLOCK | O_CLOEXEC) || if (pipe2(conn->pipe_a_b, O_NONBLOCK | O_CLOEXEC)) {
pipe2(conn->pipe_b_a, O_NONBLOCK | O_CLOEXEC)) { err("TCP (spliced): cannot create a->b pipe: %s",
strerror(errno));
conn_flag(c, conn, CLOSING); conn_flag(c, conn, CLOSING);
return -EIO; return -EIO;
} }
@ -328,6 +325,22 @@ static int tcp_splice_connect_finish(const struct ctx *c,
trace("TCP (spliced): cannot set a->b pipe size to %lu", trace("TCP (spliced): cannot set a->b pipe size to %lu",
c->tcp.pipe_size); c->tcp.pipe_size);
} }
}
for (; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
if (splice_pipe_pool[i][0] >= 0) {
SWAP(conn->pipe_b_a[0], splice_pipe_pool[i][0]);
SWAP(conn->pipe_b_a[1], splice_pipe_pool[i][1]);
break;
}
}
if (conn->pipe_b_a[0] < 0) {
if (pipe2(conn->pipe_b_a, O_NONBLOCK | O_CLOEXEC)) {
err("TCP (spliced): cannot create b->a pipe: %s",
strerror(errno));
conn_flag(c, conn, CLOSING);
return -EIO;
}
if (fcntl(conn->pipe_b_a[0], F_SETPIPE_SZ, c->tcp.pipe_size)) { if (fcntl(conn->pipe_b_a[0], F_SETPIPE_SZ, c->tcp.pipe_size)) {
trace("TCP (spliced): cannot set b->a pipe size to %lu", trace("TCP (spliced): cannot set b->a pipe size to %lu",
@ -716,12 +729,12 @@ close:
*/ */
static void tcp_set_pipe_size(struct ctx *c) static void tcp_set_pipe_size(struct ctx *c)
{ {
int probe_pipe[TCP_SPLICE_PIPE_POOL_SIZE * 2][2], i, j; int probe_pipe[TCP_SPLICE_PIPE_POOL_SIZE][2], i, j;
c->tcp.pipe_size = MAX_PIPE_SIZE; c->tcp.pipe_size = MAX_PIPE_SIZE;
smaller: smaller:
for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE * 2; i++) { for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
if (pipe2(probe_pipe[i], O_CLOEXEC)) { if (pipe2(probe_pipe[i], O_CLOEXEC)) {
i++; i++;
break; break;
@ -736,7 +749,7 @@ smaller:
close(probe_pipe[j][1]); close(probe_pipe[j][1]);
} }
if (i == TCP_SPLICE_PIPE_POOL_SIZE * 2) if (i == TCP_SPLICE_PIPE_POOL_SIZE)
return; return;
if (!(c->tcp.pipe_size /= 2)) { if (!(c->tcp.pipe_size /= 2)) {
@ -756,25 +769,14 @@ static void tcp_splice_pipe_refill(const struct ctx *c)
int i; int i;
for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) { for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
if (splice_pipe_pool[i][0][0] >= 0) if (splice_pipe_pool[i][0] >= 0)
break; break;
if (pipe2(splice_pipe_pool[i][0], O_NONBLOCK | O_CLOEXEC)) if (pipe2(splice_pipe_pool[i], O_NONBLOCK | O_CLOEXEC))
continue; continue;
if (pipe2(splice_pipe_pool[i][1], O_NONBLOCK | O_CLOEXEC)) {
close(splice_pipe_pool[i][1][0]);
close(splice_pipe_pool[i][1][1]);
continue;
}
if (fcntl(splice_pipe_pool[i][0][0], F_SETPIPE_SZ, if (fcntl(splice_pipe_pool[i][0], F_SETPIPE_SZ,
c->tcp.pipe_size)) { c->tcp.pipe_size)) {
trace("TCP (spliced): cannot set a->b pipe size to %lu", trace("TCP (spliced): cannot set pool pipe size to %lu",
c->tcp.pipe_size);
}
if (fcntl(splice_pipe_pool[i][1][0], F_SETPIPE_SZ,
c->tcp.pipe_size)) {
trace("TCP (spliced): cannot set b->a pipe size to %lu",
c->tcp.pipe_size); c->tcp.pipe_size);
} }
} }