tcp: Improve handling of fallback if socket pool is empty on new splice
When creating a new spliced connection, we need to get a socket in the other ns from the originating one. To avoid excessive ns switches we usually get these from a pool refilled on a timer. However, if the pool runs out we need a fallback. Currently that's done by passing -1 as the socket to tcp_splice_connnect() and running it in the target ns. This means that tcp_splice_connect() itself needs to have different cases depending on whether it's given an existing socket or not, which is a separate concern from what it's mostly doing. We change it to require a suitable open socket to be passed in, and ensuring in the caller that we have one. This requires adding the fallback paths to the caller, tcp_splice_new(). We use slightly different approaches for a socket in the init ns versus the guest ns. This also means that we no longer need to run tcp_splice_connect() itself in the guest ns, which allows us to remove a bunch of boilerplate code. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
dc467d526f
commit
6ccab72d9b
3 changed files with 31 additions and 59 deletions
2
tcp.c
2
tcp.c
|
@ -1882,7 +1882,7 @@ int tcp_conn_pool_sock(int pool[])
|
||||||
*
|
*
|
||||||
* Return: socket number on success, negative code if socket creation failed
|
* Return: socket number on success, negative code if socket creation failed
|
||||||
*/
|
*/
|
||||||
static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
|
int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
|
||||||
{
|
{
|
||||||
int s;
|
int s;
|
||||||
|
|
||||||
|
|
|
@ -193,6 +193,7 @@ void tcp_table_compact(struct ctx *c, union tcp_conn *hole);
|
||||||
void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union);
|
void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union);
|
||||||
void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union);
|
void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union);
|
||||||
int tcp_conn_pool_sock(int pool[]);
|
int tcp_conn_pool_sock(int pool[]);
|
||||||
|
int tcp_conn_new_sock(const struct ctx *c, sa_family_t af);
|
||||||
void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af);
|
void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af);
|
||||||
void tcp_splice_refill(const struct ctx *c);
|
void tcp_splice_refill(const struct ctx *c);
|
||||||
|
|
||||||
|
|
87
tcp_splice.c
87
tcp_splice.c
|
@ -87,6 +87,9 @@ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = {
|
||||||
"RCVLOWAT_ACT_B", "CLOSING",
|
"RCVLOWAT_ACT_B", "CLOSING",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Forward declaration */
|
||||||
|
static int tcp_sock_refill_ns(void *arg);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tcp_splice_conn_epoll_events() - epoll events masks for given state
|
* tcp_splice_conn_epoll_events() - epoll events masks for given state
|
||||||
* @events: Connection event flags
|
* @events: Connection event flags
|
||||||
|
@ -347,12 +350,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
|
||||||
* Return: 0 for connect() succeeded or in progress, negative value on error
|
* Return: 0 for connect() succeeded or in progress, negative value on error
|
||||||
*/
|
*/
|
||||||
static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
|
static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
|
||||||
int s, in_port_t port)
|
int sock_conn, in_port_t port)
|
||||||
{
|
{
|
||||||
int sock_conn = (s >= 0) ? s : socket(CONN_V6(conn) ? AF_INET6 :
|
|
||||||
AF_INET,
|
|
||||||
SOCK_STREAM | SOCK_NONBLOCK,
|
|
||||||
IPPROTO_TCP);
|
|
||||||
struct sockaddr_in6 addr6 = {
|
struct sockaddr_in6 addr6 = {
|
||||||
.sin6_family = AF_INET6,
|
.sin6_family = AF_INET6,
|
||||||
.sin6_port = htons(port),
|
.sin6_port = htons(port),
|
||||||
|
@ -366,19 +365,8 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
|
||||||
const struct sockaddr *sa;
|
const struct sockaddr *sa;
|
||||||
socklen_t sl;
|
socklen_t sl;
|
||||||
|
|
||||||
if (sock_conn < 0)
|
|
||||||
return -errno;
|
|
||||||
|
|
||||||
if (sock_conn > SOCKET_MAX) {
|
|
||||||
close(sock_conn);
|
|
||||||
return -EIO;
|
|
||||||
}
|
|
||||||
|
|
||||||
conn->b = sock_conn;
|
conn->b = sock_conn;
|
||||||
|
|
||||||
if (s < 0)
|
|
||||||
tcp_sock_set_bufsize(c, conn->b);
|
|
||||||
|
|
||||||
if (setsockopt(conn->b, SOL_TCP, TCP_QUICKACK,
|
if (setsockopt(conn->b, SOL_TCP, TCP_QUICKACK,
|
||||||
&((int){ 1 }), sizeof(int))) {
|
&((int){ 1 }), sizeof(int))) {
|
||||||
trace("TCP (spliced): failed to set TCP_QUICKACK on socket %i",
|
trace("TCP (spliced): failed to set TCP_QUICKACK on socket %i",
|
||||||
|
@ -409,36 +397,6 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* struct tcp_splice_connect_ns_arg - Arguments for tcp_splice_connect_ns()
|
|
||||||
* @c: Execution context
|
|
||||||
* @conn: Accepted inbound connection
|
|
||||||
* @port: Destination port, host order
|
|
||||||
* @ret: Return value of tcp_splice_connect_ns()
|
|
||||||
*/
|
|
||||||
struct tcp_splice_connect_ns_arg {
|
|
||||||
const struct ctx *c;
|
|
||||||
struct tcp_splice_conn *conn;
|
|
||||||
in_port_t port;
|
|
||||||
int ret;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* tcp_splice_connect_ns() - Enter namespace and call tcp_splice_connect()
|
|
||||||
* @arg: See struct tcp_splice_connect_ns_arg
|
|
||||||
*
|
|
||||||
* Return: 0
|
|
||||||
*/
|
|
||||||
static int tcp_splice_connect_ns(void *arg)
|
|
||||||
{
|
|
||||||
struct tcp_splice_connect_ns_arg *a;
|
|
||||||
|
|
||||||
a = (struct tcp_splice_connect_ns_arg *)arg;
|
|
||||||
ns_enter(a->c);
|
|
||||||
a->ret = tcp_splice_connect(a->c, a->conn, -1, a->port);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tcp_splice_new() - Handle new spliced connection
|
* tcp_splice_new() - Handle new spliced connection
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
|
@ -451,24 +409,37 @@ static int tcp_splice_connect_ns(void *arg)
|
||||||
static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
|
static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
|
||||||
in_port_t port, int outbound)
|
in_port_t port, int outbound)
|
||||||
{
|
{
|
||||||
int *p, s = -1;
|
int s = -1;
|
||||||
|
|
||||||
if (outbound)
|
/* If the pool is empty we take slightly different approaches
|
||||||
p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
|
* for init or ns sockets. For init sockets we just open a
|
||||||
else
|
* new one without refilling the pool to keep latency down.
|
||||||
p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
|
* For ns sockets, we're going to incur the latency of
|
||||||
|
* entering the ns anyway, so we might as well refill the
|
||||||
|
* pool.
|
||||||
|
*/
|
||||||
|
if (outbound) {
|
||||||
|
int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
|
||||||
|
int af = CONN_V6(conn) ? AF_INET6 : AF_INET;
|
||||||
|
|
||||||
s = tcp_conn_pool_sock(p);
|
s = tcp_conn_pool_sock(p);
|
||||||
|
if (s < 0)
|
||||||
|
s = tcp_conn_new_sock(c, af);
|
||||||
|
} else {
|
||||||
|
int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
|
||||||
|
|
||||||
/* No socket available in namespace: create a new one for connect() */
|
/* If pool is empty, refill it first */
|
||||||
if (s < 0 && !outbound) {
|
if (p[TCP_SOCK_POOL_SIZE-1] < 0)
|
||||||
struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
|
NS_CALL(tcp_sock_refill_ns, c);
|
||||||
|
|
||||||
NS_CALL(tcp_splice_connect_ns, &ns_arg);
|
s = tcp_conn_pool_sock(p);
|
||||||
return ns_arg.ret;
|
}
|
||||||
|
|
||||||
|
if (s < 0) {
|
||||||
|
warn("Couldn't open connectable socket for splice (%d)", s);
|
||||||
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Otherwise, the socket will connect on the side it was created on */
|
|
||||||
return tcp_splice_connect(c, conn, s, port);
|
return tcp_splice_connect(c, conn, s, port);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue