tcp: Remove v6 flag from tcp_epoll_ref
This bit in the TCP specific epoll reference indicates whether the connection is IPv6 or IPv4. However the sites which refer to it are already calling accept() which (optionally) returns an address for the remote end of the connection. We can use the sa_family field in that address to determine the connection type independent of the epoll reference. This does have a cost: for the spliced case, it means we now need to get that address from accept() which introduces an extran copy_to_user(). However, in future we want to allow handling IPv4 connectons through IPv6 sockets, which means we won't be able to determine the IP version at the time we create the listening socket and epoll reference. So, at some point we'll have to pay this cost anyway. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
7f1f2f3f51
commit
034fa8a58d
3 changed files with 7 additions and 13 deletions
10
tcp.c
10
tcp.c
|
@ -662,8 +662,7 @@ static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||||
{
|
{
|
||||||
int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
|
int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
|
||||||
union epoll_ref ref = { .r.proto = IPPROTO_TCP, .r.s = conn->sock,
|
union epoll_ref ref = { .r.proto = IPPROTO_TCP, .r.s = conn->sock,
|
||||||
.r.p.tcp.tcp.index = CONN_IDX(conn),
|
.r.p.tcp.tcp.index = CONN_IDX(conn) };
|
||||||
.r.p.tcp.tcp.v6 = CONN_V6(conn) };
|
|
||||||
struct epoll_event ev = { .data.u64 = ref.u64 };
|
struct epoll_event ev = { .data.u64 = ref.u64 };
|
||||||
|
|
||||||
if (conn->events == CLOSED) {
|
if (conn->events == CLOSED) {
|
||||||
|
@ -2745,7 +2744,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union epoll_ref ref,
|
||||||
conn->ws_to_tap = conn->ws_from_tap = 0;
|
conn->ws_to_tap = conn->ws_from_tap = 0;
|
||||||
conn_event(c, conn, SOCK_ACCEPTED);
|
conn_event(c, conn, SOCK_ACCEPTED);
|
||||||
|
|
||||||
if (ref.r.p.tcp.tcp.v6) {
|
if (sa->sa_family == AF_INET6) {
|
||||||
struct sockaddr_in6 sa6;
|
struct sockaddr_in6 sa6;
|
||||||
|
|
||||||
memcpy(&sa6, sa, sizeof(sa6));
|
memcpy(&sa6, sa, sizeof(sa6));
|
||||||
|
@ -3019,8 +3018,7 @@ static void tcp_sock_init6(const struct ctx *c,
|
||||||
in_port_t port)
|
in_port_t port)
|
||||||
{
|
{
|
||||||
in_port_t idx = port + c->tcp.fwd_in.delta[port];
|
in_port_t idx = port + c->tcp.fwd_in.delta[port];
|
||||||
union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.v6 = 1,
|
union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.index = idx };
|
||||||
.tcp.index = idx };
|
|
||||||
int s;
|
int s;
|
||||||
|
|
||||||
s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, tref.u32);
|
s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port, tref.u32);
|
||||||
|
@ -3084,7 +3082,7 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port)
|
||||||
{
|
{
|
||||||
in_port_t idx = port + c->tcp.fwd_out.delta[port];
|
in_port_t idx = port + c->tcp.fwd_out.delta[port];
|
||||||
union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1,
|
union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1,
|
||||||
.tcp.v6 = 1, .tcp.index = idx };
|
.tcp.index = idx };
|
||||||
int s;
|
int s;
|
||||||
|
|
||||||
assert(c->mode == MODE_PASTA);
|
assert(c->mode == MODE_PASTA);
|
||||||
|
|
2
tcp.h
2
tcp.h
|
@ -33,7 +33,6 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
|
||||||
* union tcp_epoll_ref - epoll reference portion for TCP connections
|
* union tcp_epoll_ref - epoll reference portion for TCP connections
|
||||||
* @listen: Set if this file descriptor is a listening socket
|
* @listen: Set if this file descriptor is a listening socket
|
||||||
* @outbound: Listening socket maps to outbound, spliced connection
|
* @outbound: Listening socket maps to outbound, spliced connection
|
||||||
* @v6: Set for IPv6 sockets or connections
|
|
||||||
* @timer: Reference is a timerfd descriptor for connection
|
* @timer: Reference is a timerfd descriptor for connection
|
||||||
* @index: Index of connection in table, or port for bound sockets
|
* @index: Index of connection in table, or port for bound sockets
|
||||||
* @u32: Opaque u32 value of reference
|
* @u32: Opaque u32 value of reference
|
||||||
|
@ -42,7 +41,6 @@ union tcp_epoll_ref {
|
||||||
struct {
|
struct {
|
||||||
uint32_t listen:1,
|
uint32_t listen:1,
|
||||||
outbound:1,
|
outbound:1,
|
||||||
v6:1,
|
|
||||||
timer:1,
|
timer:1,
|
||||||
index:20;
|
index:20;
|
||||||
} tcp;
|
} tcp;
|
||||||
|
|
|
@ -167,11 +167,9 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
|
||||||
{
|
{
|
||||||
int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
|
int m = conn->c.in_epoll ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
|
||||||
union epoll_ref ref_a = { .r.proto = IPPROTO_TCP, .r.s = conn->a,
|
union epoll_ref ref_a = { .r.proto = IPPROTO_TCP, .r.s = conn->a,
|
||||||
.r.p.tcp.tcp.index = CONN_IDX(conn),
|
.r.p.tcp.tcp.index = CONN_IDX(conn) };
|
||||||
.r.p.tcp.tcp.v6 = CONN_V6(conn) };
|
|
||||||
union epoll_ref ref_b = { .r.proto = IPPROTO_TCP, .r.s = conn->b,
|
union epoll_ref ref_b = { .r.proto = IPPROTO_TCP, .r.s = conn->b,
|
||||||
.r.p.tcp.tcp.index = CONN_IDX(conn),
|
.r.p.tcp.tcp.index = CONN_IDX(conn) };
|
||||||
.r.p.tcp.tcp.v6 = CONN_V6(conn) };
|
|
||||||
struct epoll_event ev_a = { .data.u64 = ref_a.u64 };
|
struct epoll_event ev_a = { .data.u64 = ref_a.u64 };
|
||||||
struct epoll_event ev_b = { .data.u64 = ref_b.u64 };
|
struct epoll_event ev_b = { .data.u64 = ref_b.u64 };
|
||||||
uint32_t events_a, events_b;
|
uint32_t events_a, events_b;
|
||||||
|
@ -517,7 +515,7 @@ bool tcp_splice_conn_from_sock(struct ctx *c, union epoll_ref ref,
|
||||||
{
|
{
|
||||||
assert(c->mode == MODE_PASTA);
|
assert(c->mode == MODE_PASTA);
|
||||||
|
|
||||||
if (ref.r.p.tcp.tcp.v6) {
|
if (sa->sa_family == AF_INET6) {
|
||||||
const struct sockaddr_in6 *sa6;
|
const struct sockaddr_in6 *sa6;
|
||||||
|
|
||||||
sa6 = (const struct sockaddr_in6 *)sa;
|
sa6 = (const struct sockaddr_in6 *)sa;
|
||||||
|
|
Loading…
Reference in a new issue