util: sock_l4() determine protocol from epoll type rather than the reverse

sock_l4() creates a socket of the given IP protocol number, and adds it to
the epoll state.  Currently it determines the correct tag for the epoll
data based on the protocol.  However, we have some future cases where we
might want different semantics, and therefore epoll types, for sockets of
the same protocol.  So, change sock_l4() to take the epoll type as an
explicit parameter, and determine the protocol from that.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
David Gibson 2024-07-05 20:43:59 +10:00 committed by Stefano Brivio
parent b625ed5fee
commit 74c1c5efcf
7 changed files with 81 additions and 67 deletions

41
epoll_type.h Normal file
View file

@ -0,0 +1,41 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright Red Hat
* Author: David Gibson <david@gibson.dropbear.id.au>
*/
#ifndef EPOLL_TYPE_H
#define EPOLL_TYPE_H
/**
* enum epoll_type - Different types of fds we poll over
*/
enum epoll_type {
/* Special value to indicate an invalid type */
EPOLL_TYPE_NONE = 0,
/* Connected TCP sockets */
EPOLL_TYPE_TCP,
/* Connected TCP sockets (spliced) */
EPOLL_TYPE_TCP_SPLICE,
/* Listening TCP sockets */
EPOLL_TYPE_TCP_LISTEN,
/* timerfds used for TCP timers */
EPOLL_TYPE_TCP_TIMER,
/* UDP sockets */
EPOLL_TYPE_UDP,
/* ICMP/ICMPv6 ping sockets */
EPOLL_TYPE_PING,
/* inotify fd watching for end of netns (pasta) */
EPOLL_TYPE_NSQUIT_INOTIFY,
/* timer fd watching for end of netns, fallback for inotify (pasta) */
EPOLL_TYPE_NSQUIT_TIMER,
/* tuntap character device */
EPOLL_TYPE_TAP_PASTA,
/* socket connected to qemu */
EPOLL_TYPE_TAP_PASST,
/* socket listening for qemu socket connections */
EPOLL_TYPE_TAP_LISTEN,
EPOLL_NUM_TYPES,
};
#endif /* EPOLL_TYPE_H */

2
icmp.c
View file

@ -179,7 +179,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
}
ref.flowside = FLOW_SIDX(flow, TGTSIDE);
pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if,
pingf->sock = sock_l4(c, af, EPOLL_TYPE_PING, bind_addr, bind_if,
0, ref.data);
if (pingf->sock < 0) {

32
passt.h
View file

@ -23,38 +23,6 @@ union epoll_ref;
#include "tcp.h"
#include "udp.h"
/**
* enum epoll_type - Different types of fds we poll over
*/
enum epoll_type {
/* Special value to indicate an invalid type */
EPOLL_TYPE_NONE = 0,
/* Connected TCP sockets */
EPOLL_TYPE_TCP,
/* Connected TCP sockets (spliced) */
EPOLL_TYPE_TCP_SPLICE,
/* Listening TCP sockets */
EPOLL_TYPE_TCP_LISTEN,
/* timerfds used for TCP timers */
EPOLL_TYPE_TCP_TIMER,
/* UDP sockets */
EPOLL_TYPE_UDP,
/* ICMP/ICMPv6 ping sockets */
EPOLL_TYPE_PING,
/* inotify fd watching for end of netns (pasta) */
EPOLL_TYPE_NSQUIT_INOTIFY,
/* timer fd watching for end of netns, fallback for inotify (pasta) */
EPOLL_TYPE_NSQUIT_TIMER,
/* tuntap character device */
EPOLL_TYPE_TAP_PASTA,
/* socket connected to qemu */
EPOLL_TYPE_TAP_PASST,
/* socket listening for qemu socket connections */
EPOLL_TYPE_TAP_LISTEN,
EPOLL_NUM_TYPES,
};
/**
* union epoll_ref - Breakdown of reference for epoll fd bookkeeping
* @type: Type of fd (tells us what to do with events)

10
tcp.c
View file

@ -2467,7 +2467,7 @@ static int tcp_sock_init_af(const struct ctx *c, sa_family_t af, in_port_t port,
};
int s;
s = sock_l4(c, af, IPPROTO_TCP, addr, ifname, port, tref.u32);
s = sock_l4(c, af, EPOLL_TYPE_TCP_LISTEN, addr, ifname, port, tref.u32);
if (c->tcp.fwd_in.mode == FWD_AUTO) {
if (af == AF_INET || af == AF_UNSPEC)
@ -2531,8 +2531,8 @@ static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port)
ASSERT(c->mode == MODE_PASTA);
s = sock_l4(c, AF_INET, IPPROTO_TCP, &in4addr_loopback, NULL, port,
tref.u32);
s = sock_l4(c, AF_INET, EPOLL_TYPE_TCP_LISTEN, &in4addr_loopback,
NULL, port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@ -2557,8 +2557,8 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port)
ASSERT(c->mode == MODE_PASTA);
s = sock_l4(c, AF_INET6, IPPROTO_TCP, &in6addr_loopback, NULL, port,
tref.u32);
s = sock_l4(c, AF_INET6, EPOLL_TYPE_TCP_LISTEN, &in6addr_loopback,
NULL, port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else

12
udp.c
View file

@ -917,7 +917,7 @@ int udp_tap_handler(struct ctx *c, uint8_t pif,
if (!IN4_IS_ADDR_LOOPBACK(&s_in.sin_addr))
bind_addr = c->ip4.addr_out;
s = sock_l4(c, AF_INET, IPPROTO_UDP, &bind_addr,
s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP, &bind_addr,
bind_if, src, uref.u32);
if (s < 0)
return p->count - idx;
@ -972,7 +972,7 @@ int udp_tap_handler(struct ctx *c, uint8_t pif,
!IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr))
bind_addr = &c->ip6.addr_out;
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr,
s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP, bind_addr,
bind_if, src, uref.u32);
if (s < 0)
return p->count - idx;
@ -1047,13 +1047,13 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
uref.v6 = 0;
if (!ns) {
r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP, addr,
r4 = s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP, addr,
ifname, port, uref.u32);
udp_tap_map[V4][port].sock = s < 0 ? -1 : s;
udp_splice_init[V4][port].sock = s < 0 ? -1 : s;
} else {
r4 = s = sock_l4(c, AF_INET, IPPROTO_UDP,
r4 = s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP,
&in4addr_loopback,
ifname, port, uref.u32);
udp_splice_ns[V4][port].sock = s < 0 ? -1 : s;
@ -1064,13 +1064,13 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
uref.v6 = 1;
if (!ns) {
r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP, addr,
r6 = s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP, addr,
ifname, port, uref.u32);
udp_tap_map[V6][port].sock = s < 0 ? -1 : s;
udp_splice_init[V6][port].sock = s < 0 ? -1 : s;
} else {
r6 = s = sock_l4(c, AF_INET6, IPPROTO_UDP,
r6 = s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP,
&in6addr_loopback,
ifname, port, uref.u32);
udp_splice_ns[V6][port].sock = s < 0 ? -1 : s;

48
util.c
View file

@ -35,7 +35,7 @@
/**
* sock_l4_sa() - Create and bind socket to socket address, add to epoll list
* @c: Execution context
* @proto: Protocol number
* @type: epoll type
* @sa: Socket address to bind to
* @sl: Length of @sa
* @ifname: Interface for binding, NULL for any
@ -44,34 +44,38 @@
*
* Return: newly created socket, negative error code on failure
*/
static int sock_l4_sa(const struct ctx *c, uint8_t proto,
static int sock_l4_sa(const struct ctx *c, enum epoll_type type,
const void *sa, socklen_t sl,
const char *ifname, bool v6only, uint32_t data)
{
sa_family_t af = ((const struct sockaddr *)sa)->sa_family;
union epoll_ref ref = { .data = data };
union epoll_ref ref = { .type = type, .data = data };
struct epoll_event ev;
int fd, y = 1, ret;
uint8_t proto;
int socktype;
switch (proto) {
case IPPROTO_TCP:
ref.type = EPOLL_TYPE_TCP_LISTEN;
switch (type) {
case EPOLL_TYPE_TCP_LISTEN:
proto = IPPROTO_TCP;
socktype = SOCK_STREAM | SOCK_NONBLOCK;
break;
case IPPROTO_UDP:
ref.type = EPOLL_TYPE_UDP;
case EPOLL_TYPE_UDP:
proto = IPPROTO_UDP;
socktype = SOCK_DGRAM | SOCK_NONBLOCK;
break;
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
ref.type = EPOLL_TYPE_PING;
case EPOLL_TYPE_PING:
if (af == AF_INET)
proto = IPPROTO_ICMP;
else
proto = IPPROTO_ICMPV6;
socktype = SOCK_DGRAM | SOCK_NONBLOCK;
break;
default:
return -EPFNOSUPPORT; /* Not implemented. */
ASSERT(0);
}
if (proto == IPPROTO_TCP)
fd = socket(af, SOCK_STREAM | SOCK_NONBLOCK, proto);
else
fd = socket(af, SOCK_DGRAM | SOCK_NONBLOCK, proto);
fd = socket(af, socktype, proto);
ret = -errno;
if (fd < 0) {
@ -118,14 +122,14 @@ static int sock_l4_sa(const struct ctx *c, uint8_t proto,
* this is fine. This might also fail for ICMP because of a
* broken SELinux policy, see icmp_tap_handler().
*/
if (proto != IPPROTO_ICMP && proto != IPPROTO_ICMPV6) {
if (type != EPOLL_TYPE_PING) {
ret = -errno;
close(fd);
return ret;
}
}
if (proto == IPPROTO_TCP && listen(fd, 128) < 0) {
if (type == EPOLL_TYPE_TCP_LISTEN && listen(fd, 128) < 0) {
ret = -errno;
warn("TCP socket listen: %s", strerror(-ret));
close(fd);
@ -146,7 +150,7 @@ static int sock_l4_sa(const struct ctx *c, uint8_t proto,
* sock_l4() - Create and bind socket for given L4, add to epoll list
* @c: Execution context
* @af: Address family, AF_INET or AF_INET6
* @proto: Protocol number
* @type: epoll type
* @bind_addr: Address for binding, NULL for any
* @ifname: Interface for binding, NULL for any
* @port: Port, host order
@ -154,7 +158,7 @@ static int sock_l4_sa(const struct ctx *c, uint8_t proto,
*
* Return: newly created socket, negative error code on failure
*/
int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
const void *bind_addr, const char *ifname, uint16_t port,
uint32_t data)
{
@ -167,7 +171,7 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
};
if (bind_addr)
addr4.sin_addr = *(struct in_addr *)bind_addr;
return sock_l4_sa(c, proto, &addr4, sizeof(addr4), ifname,
return sock_l4_sa(c, type, &addr4, sizeof(addr4), ifname,
false, data);
}
@ -188,7 +192,7 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
sizeof(c->ip6.addr_ll)))
addr6.sin6_scope_id = c->ifi6;
}
return sock_l4_sa(c, proto, &addr6, sizeof(addr6), ifname,
return sock_l4_sa(c, type, &addr6, sizeof(addr6), ifname,
af == AF_INET6, data);
}
default:

3
util.h
View file

@ -137,13 +137,14 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
#include <limits.h>
#include <stdint.h>
#include "epoll_type.h"
#include "packet.h"
struct ctx;
/* cppcheck-suppress funcArgNamesDifferent */
__attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); }
int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
const void *bind_addr, const char *ifname, uint16_t port,
uint32_t data);
void sock_probe_mem(struct ctx *c);