epoll: Always use epoll_ref for the epoll data variable
epoll_ref contains a variety of information useful when handling epoll events on our sockets, and we place it in the epoll_event data field returned by epoll. However, for a few other things we use the 'fd' field in the standard union of types for that data field. This actually introduces a bug which is vanishingly unlikely to hit in practice, but very nasty if it ever did: theoretically if we had a very large file descriptor number for fd_tap or fd_tap_listen it could overflow into bits that overlap with the 'proto' field in epoll_ref. With some very bad luck this could mean that we mistakenly think an event on a regular socket is an event on fd_tap or fd_tap_listen. More practically, using different (but overlapping) fields of the epoll_data means we can't unify dispatch for the various different objects in the epoll. Therefore use the same epoll_ref as the data for the tap fds and the netns quit fd, adding new fd type values to describe them. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
3401644453
commit
6a6735ece4
4 changed files with 29 additions and 12 deletions
11
passt.c
11
passt.c
|
@ -60,6 +60,8 @@ char *epoll_type_str[EPOLL_TYPE_MAX + 1] = {
|
||||||
[EPOLL_TYPE_UDP] = "UDP socket",
|
[EPOLL_TYPE_UDP] = "UDP socket",
|
||||||
[EPOLL_TYPE_ICMP] = "ICMP socket",
|
[EPOLL_TYPE_ICMP] = "ICMP socket",
|
||||||
[EPOLL_TYPE_ICMPV6] = "ICMPv6 socket",
|
[EPOLL_TYPE_ICMPV6] = "ICMPv6 socket",
|
||||||
|
[EPOLL_TYPE_NSQUIT] = "namespace inotify",
|
||||||
|
[EPOLL_TYPE_TAP] = "tap device",
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -328,12 +330,11 @@ loop:
|
||||||
|
|
||||||
for (i = 0; i < nfds; i++) {
|
for (i = 0; i < nfds; i++) {
|
||||||
union epoll_ref ref = *((union epoll_ref *)&events[i].data.u64);
|
union epoll_ref ref = *((union epoll_ref *)&events[i].data.u64);
|
||||||
int fd = events[i].data.fd;
|
|
||||||
|
|
||||||
if (fd == c.fd_tap || fd == c.fd_tap_listen)
|
if (ref.type == EPOLL_TYPE_TAP)
|
||||||
tap_handler(&c, fd, events[i].events, &now);
|
tap_handler(&c, ref.fd, events[i].events, &now);
|
||||||
else if (fd == quit_fd)
|
else if (ref.type == EPOLL_TYPE_NSQUIT)
|
||||||
pasta_netns_quit_handler(&c, fd);
|
pasta_netns_quit_handler(&c, quit_fd);
|
||||||
else
|
else
|
||||||
sock_handler(&c, ref, events[i].events, &now);
|
sock_handler(&c, ref, events[i].events, &now);
|
||||||
}
|
}
|
||||||
|
|
6
passt.h
6
passt.h
|
@ -55,8 +55,12 @@ enum epoll_type {
|
||||||
EPOLL_TYPE_ICMP,
|
EPOLL_TYPE_ICMP,
|
||||||
/* ICMPv6 sockets */
|
/* ICMPv6 sockets */
|
||||||
EPOLL_TYPE_ICMPV6,
|
EPOLL_TYPE_ICMPV6,
|
||||||
|
/* inotify fd watching for end of netns (pasta) */
|
||||||
|
EPOLL_TYPE_NSQUIT,
|
||||||
|
/* tap char device, or qemu socket fd */
|
||||||
|
EPOLL_TYPE_TAP,
|
||||||
|
|
||||||
EPOLL_TYPE_MAX = EPOLL_TYPE_ICMPV6,
|
EPOLL_TYPE_MAX = EPOLL_TYPE_TAP,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
8
pasta.c
8
pasta.c
|
@ -365,7 +365,10 @@ void pasta_ns_conf(struct ctx *c)
|
||||||
int pasta_netns_quit_init(struct ctx *c)
|
int pasta_netns_quit_init(struct ctx *c)
|
||||||
{
|
{
|
||||||
int flags = O_NONBLOCK | O_CLOEXEC;
|
int flags = O_NONBLOCK | O_CLOEXEC;
|
||||||
struct epoll_event ev = { .events = EPOLLIN };
|
union epoll_ref ref = { .type = EPOLL_TYPE_NSQUIT };
|
||||||
|
struct epoll_event ev = {
|
||||||
|
.events = EPOLLIN
|
||||||
|
};
|
||||||
int inotify_fd;
|
int inotify_fd;
|
||||||
|
|
||||||
if (c->mode != MODE_PASTA || c->no_netns_quit || !*c->netns_base)
|
if (c->mode != MODE_PASTA || c->no_netns_quit || !*c->netns_base)
|
||||||
|
@ -381,7 +384,8 @@ int pasta_netns_quit_init(struct ctx *c)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ev.data.fd = inotify_fd;
|
ref.fd = inotify_fd;
|
||||||
|
ev.data.u64 = ref.u64;
|
||||||
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, inotify_fd, &ev);
|
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, inotify_fd, &ev);
|
||||||
|
|
||||||
return inotify_fd;
|
return inotify_fd;
|
||||||
|
|
16
tap.c
16
tap.c
|
@ -1071,6 +1071,7 @@ restart:
|
||||||
static void tap_sock_unix_init(struct ctx *c)
|
static void tap_sock_unix_init(struct ctx *c)
|
||||||
{
|
{
|
||||||
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||||
|
union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
|
||||||
struct epoll_event ev = { 0 };
|
struct epoll_event ev = { 0 };
|
||||||
struct sockaddr_un addr = {
|
struct sockaddr_un addr = {
|
||||||
.sun_family = AF_UNIX,
|
.sun_family = AF_UNIX,
|
||||||
|
@ -1123,8 +1124,9 @@ static void tap_sock_unix_init(struct ctx *c)
|
||||||
|
|
||||||
listen(fd, 0);
|
listen(fd, 0);
|
||||||
|
|
||||||
ev.data.fd = c->fd_tap_listen = fd;
|
ref.fd = c->fd_tap_listen = fd;
|
||||||
ev.events = EPOLLIN | EPOLLET;
|
ev.events = EPOLLIN | EPOLLET;
|
||||||
|
ev.data.u64 = ref.u64;
|
||||||
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev);
|
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap_listen, &ev);
|
||||||
|
|
||||||
info("You can now start qemu (>= 7.2, with commit 13c6be96618c):");
|
info("You can now start qemu (>= 7.2, with commit 13c6be96618c):");
|
||||||
|
@ -1141,6 +1143,7 @@ static void tap_sock_unix_init(struct ctx *c)
|
||||||
*/
|
*/
|
||||||
static void tap_sock_unix_new(struct ctx *c, uint32_t events)
|
static void tap_sock_unix_new(struct ctx *c, uint32_t events)
|
||||||
{
|
{
|
||||||
|
union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
|
||||||
struct epoll_event ev = { 0 };
|
struct epoll_event ev = { 0 };
|
||||||
int v = INT_MAX / 2;
|
int v = INT_MAX / 2;
|
||||||
struct ucred ucred;
|
struct ucred ucred;
|
||||||
|
@ -1180,8 +1183,9 @@ static void tap_sock_unix_new(struct ctx *c, uint32_t events)
|
||||||
setsockopt(c->fd_tap, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)))
|
setsockopt(c->fd_tap, SOL_SOCKET, SO_SNDBUF, &v, sizeof(v)))
|
||||||
trace("tap: failed to set SO_SNDBUF to %i", v);
|
trace("tap: failed to set SO_SNDBUF to %i", v);
|
||||||
|
|
||||||
ev.data.fd = c->fd_tap;
|
ref.fd = c->fd_tap;
|
||||||
ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
|
ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
|
||||||
|
ev.data.u64 = ref.u64;
|
||||||
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
|
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1226,6 +1230,7 @@ static int tap_ns_tun(void *arg)
|
||||||
*/
|
*/
|
||||||
static void tap_sock_tun_init(struct ctx *c)
|
static void tap_sock_tun_init(struct ctx *c)
|
||||||
{
|
{
|
||||||
|
union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
|
||||||
struct epoll_event ev = { 0 };
|
struct epoll_event ev = { 0 };
|
||||||
|
|
||||||
NS_CALL(tap_ns_tun, c);
|
NS_CALL(tap_ns_tun, c);
|
||||||
|
@ -1234,8 +1239,9 @@ static void tap_sock_tun_init(struct ctx *c)
|
||||||
|
|
||||||
pasta_ns_conf(c);
|
pasta_ns_conf(c);
|
||||||
|
|
||||||
ev.data.fd = c->fd_tap;
|
ref.fd = c->fd_tap;
|
||||||
ev.events = EPOLLIN | EPOLLRDHUP;
|
ev.events = EPOLLIN | EPOLLRDHUP;
|
||||||
|
ev.data.u64 = ref.u64;
|
||||||
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
|
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1257,11 +1263,13 @@ void tap_sock_init(struct ctx *c)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c->fd_tap != -1) { /* Passed as --fd */
|
if (c->fd_tap != -1) { /* Passed as --fd */
|
||||||
|
union epoll_ref ref = { .type = EPOLL_TYPE_TAP };
|
||||||
struct epoll_event ev = { 0 };
|
struct epoll_event ev = { 0 };
|
||||||
ASSERT(c->one_off);
|
ASSERT(c->one_off);
|
||||||
|
|
||||||
ev.data.fd = c->fd_tap;
|
ref.fd = c->fd_tap;
|
||||||
ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
|
ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
|
||||||
|
ev.data.u64 = ref.u64;
|
||||||
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
|
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, c->fd_tap, &ev);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue