tcp: Avoid SO_ACCEPTCONN getsockopt() by noting listening/data sockets numbers

...the rest is reshuffling existing macros to use the bits we need in
TCP code.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
Stefano Brivio 2021-04-25 13:34:04 +02:00
parent 48afbe321e
commit db1fe773a3
5 changed files with 83 additions and 52 deletions

45
passt.c
View file

@ -613,40 +613,39 @@ static int tap_handler(struct ctx *c)
/** /**
* sock_handler() - Event handler for L4 sockets * sock_handler() - Event handler for L4 sockets
* @c: Execution context * @c: Execution context
* @fd: File descriptor associated to event * @s: Socket associated to event
* @events epoll events * @events epoll events
*/ */
static void sock_handler(struct ctx *c, int fd, uint32_t events) static void sock_handler(struct ctx *c, int s, uint32_t events)
{ {
socklen_t sl; socklen_t sl;
int so; int proto;
sl = sizeof(so); sl = sizeof(proto);
#define IN(x, proto) (x >= c->proto.fd_min && x <= c->proto.fd_max) if ( FD_PROTO(s, udp) && !FD_PROTO(s, icmp) && !FD_PROTO(s, tcp))
proto = IPPROTO_UDP;
else if (FD_PROTO(s, tcp) && !FD_PROTO(s, icmp) && !FD_PROTO(s, udp))
proto = IPPROTO_TCP;
else if (FD_PROTO(s, icmp) && !FD_PROTO(s, udp) && !FD_PROTO(s, tcp))
proto = IPPROTO_ICMP; /* Fits ICMPv6 below, too */
else if (getsockopt(s, SOL_SOCKET, SO_PROTOCOL, &proto, &sl))
proto = -1;
if (IN(fd, udp) && !IN(fd, icmp) && !IN(fd, tcp)) if (proto == -1) {
so = IPPROTO_UDP; epoll_ctl(c->epollfd, EPOLL_CTL_DEL, s, NULL);
else if (IN(fd, tcp) && !IN(fd, icmp) && !IN(fd, udp)) close(s);
so = IPPROTO_TCP;
else if (IN(fd, icmp) && !IN(fd, udp) && !IN(fd, tcp))
so = IPPROTO_ICMP; /* Fits ICMPv6 below, too */
else if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &so, &sl)) {
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, fd, NULL);
close(fd);
return; return;
} }
#undef IN debug("%s: packet from socket %i", getprotobynumber(proto)->p_name, s);
debug("%s: packet from socket %i", getprotobynumber(so)->p_name, fd); if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)
icmp_sock_handler(c, s, events);
if (so == IPPROTO_ICMP || so == IPPROTO_ICMPV6) else if (proto == IPPROTO_TCP)
icmp_sock_handler(c, fd, events); tcp_sock_handler(c, s, events);
else if (so == IPPROTO_TCP) else if (proto == IPPROTO_UDP)
tcp_sock_handler(c, fd, events); udp_sock_handler(c, s, events);
else if (so == IPPROTO_UDP)
udp_sock_handler(c, fd, events);
} }
/** /**

40
tcp.c
View file

@ -1003,10 +1003,8 @@ static void tcp_conn_from_sock(struct ctx *c, int fd)
if (s == -1) if (s == -1)
return; return;
if (s < c->tcp.fd_min) CHECK_SET_MIN_MAX(c->tcp.fd_, s);
c->tcp.fd_min = s; CHECK_SET_MIN_MAX(c->tcp.fd_conn_, s);
if (s > c->tcp.fd_max)
c->tcp.fd_max = s;
if (sa_l.ss_family == AF_INET) { if (sa_l.ss_family == AF_INET) {
struct sockaddr_in *sa4 = (struct sockaddr_in *)&sa_r; struct sockaddr_in *sa4 = (struct sockaddr_in *)&sa_r;
@ -1406,7 +1404,7 @@ static void tcp_connect_finish(struct ctx *c, int s)
void tcp_sock_handler(struct ctx *c, int s, uint32_t events) void tcp_sock_handler(struct ctx *c, int s, uint32_t events)
{ {
socklen_t sl; socklen_t sl;
int so; int accept;
if (tc[s].s == LAST_ACK) { if (tc[s].s == LAST_ACK) {
tcp_send_to_tap(c, s, ACK, NULL, 0); tcp_send_to_tap(c, s, ACK, NULL, 0);
@ -1414,21 +1412,28 @@ void tcp_sock_handler(struct ctx *c, int s, uint32_t events)
return; return;
} }
if (tc[s].s == SOCK_SYN_SENT) { if (tc[s].s == SOCK_SYN_SENT) {
/* This can only be a socket error or a shutdown from remote */ /* This can only be a socket error or a shutdown from remote */
tcp_rst(c, s); tcp_rst(c, s);
return; return;
} }
if (IN_INTERVAL(c->tcp.fd_listen_min, c->tcp.fd_listen_max, s) &&
!IN_INTERVAL(c->tcp.fd_conn_min, c->tcp.fd_conn_max, s))
accept = 1;
else if (IN_INTERVAL(c->tcp.fd_conn_min, c->tcp.fd_conn_max, s) &&
!IN_INTERVAL(c->tcp.fd_listen_min, c->tcp.fd_listen_max, s))
accept = 0;
else if (getsockopt(s, SOL_SOCKET, SO_ACCEPTCONN, &accept, &sl))
accept = -1;
sl = sizeof(so); if ((events & EPOLLERR) || accept == -1) {
if ((events & EPOLLERR) ||
getsockopt(s, SOL_SOCKET, SO_ACCEPTCONN, &so, &sl)) {
if (tc[s].s != CLOSED) if (tc[s].s != CLOSED)
tcp_rst(c, s); tcp_rst(c, s);
return; return;
} }
if (so) { if (accept) {
tcp_conn_from_sock(c, s); tcp_conn_from_sock(c, s);
return; return;
} }
@ -1466,15 +1471,24 @@ void tcp_sock_handler(struct ctx *c, int s, uint32_t events)
int tcp_sock_init(struct ctx *c) int tcp_sock_init(struct ctx *c)
{ {
in_port_t port; in_port_t port;
int s = 0;
c->tcp.fd_min = INT_MAX; c->tcp.fd_min = c->tcp.fd_listen_min = c->tcp.fd_conn_min = INT_MAX;
c->tcp.fd_max = 0; c->tcp.fd_max = c->tcp.fd_listen_max = c->tcp.fd_conn_max = 0;
CHECK_SET_MIN_MAX(c->tcp.fd_listen_, s);
for (port = 0; port < (1 << 15) + (1 << 14); port++) { for (port = 0; port < (1 << 15) + (1 << 14); port++) {
if (c->v4 && sock_l4_add(c, 4, IPPROTO_TCP, port) < 0) if (c->v4) {
if ((s = sock_l4_add(c, 4, IPPROTO_TCP, port)) < 0)
return -1; return -1;
if (c->v6 && sock_l4_add(c, 6, IPPROTO_TCP, port) < 0) CHECK_SET_MIN_MAX(c->tcp.fd_listen_, s);
}
if (c->v6) {
if ((s = sock_l4_add(c, 6, IPPROTO_TCP, port)) < 0)
return -1; return -1;
CHECK_SET_MIN_MAX(c->tcp.fd_listen_, s);
}
} }
getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM); getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM);

8
tcp.h
View file

@ -14,11 +14,19 @@ void tcp_timer(struct ctx *c, struct timespec *ts);
* @hash_secret: 128-bit secret for hash functions, ISN and hash table * @hash_secret: 128-bit secret for hash functions, ISN and hash table
* @fd_min: Lowest file descriptor number for TCP ever used * @fd_min: Lowest file descriptor number for TCP ever used
* @fd_max: Highest file descriptor number for TCP ever used * @fd_max: Highest file descriptor number for TCP ever used
* @fd_listen_min: Lowest file descriptor number for listening sockets
* @fd_listen_max: Highest file descriptor number for listening sockets
* @fd_conn_min: Lowest file descriptor number for connected sockets
* @fd_conn_max: Highest file descriptor number for connected sockets
*/ */
struct tcp_ctx { struct tcp_ctx {
uint64_t hash_secret[2]; uint64_t hash_secret[2];
int fd_min; int fd_min;
int fd_max; int fd_max;
int fd_listen_min;
int fd_listen_max;
int fd_conn_min;
int fd_conn_max;
}; };
#endif /* TCP_H */ #endif /* TCP_H */

19
util.c
View file

@ -25,6 +25,7 @@
#include <stdarg.h> #include <stdarg.h>
#include "passt.h" #include "passt.h"
#include "util.h"
#define logfn(name, level) \ #define logfn(name, level) \
void name(const char *format, ...) { \ void name(const char *format, ...) { \
@ -189,20 +190,10 @@ int sock_l4_add(struct ctx *c, int v, uint16_t proto, uint16_t port)
return -1; return -1;
} }
#define CHECK_SET_MIN_MAX(ipproto, proto_ctx, fd) \ CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_ICMP, icmp, fd);
if (proto == (ipproto)) { \ CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_ICMPV6, icmp, fd);
if (fd < c->proto_ctx.fd_min) \ CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_TCP, tcp, fd);
c->proto_ctx.fd_min = (fd); \ CHECK_SET_MIN_MAX_PROTO_FD(proto, IPPROTO_UDP, udp, fd);
if (fd > c->proto_ctx.fd_max) \
c->proto_ctx.fd_max = (fd); \
}
CHECK_SET_MIN_MAX(IPPROTO_ICMP, icmp, fd);
CHECK_SET_MIN_MAX(IPPROTO_ICMPV6, icmp, fd);
CHECK_SET_MIN_MAX(IPPROTO_TCP, tcp, fd);
CHECK_SET_MIN_MAX(IPPROTO_UDP, udp, fd);
#undef CHECK_SET_MIN_MAX
if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)
goto epoll_add; goto epoll_add;

19
util.h
View file

@ -8,6 +8,25 @@ void debug(const char *format, ...);
#define debug(...) { } #define debug(...) { }
#endif #endif
#define CHECK_SET_MIN_MAX(basename, fd) \
do { \
if ((fd) < basename##min) \
basename##min = (fd); \
if ((fd) > basename##max) \
basename##max = (fd); \
} while (0)
#define CHECK_SET_MIN_MAX_PROTO_FD(proto, ipproto, proto_ctx, fd) \
do { \
if ((proto) == (ipproto)) \
CHECK_SET_MIN_MAX(c->proto_ctx.fd_, (fd)); \
} while (0)
#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b))
#define FD_PROTO(x, proto) \
(IN_INTERVAL(c->proto.fd_min, c->proto.fd_max, (x)))
uint16_t csum_fold(uint32_t sum); uint16_t csum_fold(uint32_t sum);
uint16_t csum_ip4(void *buf, size_t len); uint16_t csum_ip4(void *buf, size_t len);
void csum_tcp4(struct iphdr *iph); void csum_tcp4(struct iphdr *iph);