pasta: Add fallback timer mechanism to check if namespace is gone
We don't know how frequently this happens, but hitting fs.inotify.max_user_watches or similar sysctl limits is definitely not out of question, and Paul mentioned that, for example, Podman's CI environments hit similar issues in the past. Introduce a fallback mechanism based on a timer file descriptor: we grab the directory handle at startup, and we can then use openat(), triggered periodically, to check if the (network) namespace directory still exists. If openat() fails at some point, exit. Link: https://github.com/containers/podman/pull/21563#issuecomment-1943505707 Reported-by: Paul Holzinger <pholzing@redhat.com> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
f57a2fb4d5
commit
8f3f8e190c
4 changed files with 112 additions and 44 deletions
34
passt.c
34
passt.c
|
@ -61,17 +61,18 @@
|
||||||
char pkt_buf[PKT_BUF_BYTES] __attribute__ ((aligned(PAGE_SIZE)));
|
char pkt_buf[PKT_BUF_BYTES] __attribute__ ((aligned(PAGE_SIZE)));
|
||||||
|
|
||||||
char *epoll_type_str[] = {
|
char *epoll_type_str[] = {
|
||||||
[EPOLL_TYPE_TCP] = "connected TCP socket",
|
[EPOLL_TYPE_TCP] = "connected TCP socket",
|
||||||
[EPOLL_TYPE_TCP_SPLICE] = "connected spliced TCP socket",
|
[EPOLL_TYPE_TCP_SPLICE] = "connected spliced TCP socket",
|
||||||
[EPOLL_TYPE_TCP_LISTEN] = "listening TCP socket",
|
[EPOLL_TYPE_TCP_LISTEN] = "listening TCP socket",
|
||||||
[EPOLL_TYPE_TCP_TIMER] = "TCP timer",
|
[EPOLL_TYPE_TCP_TIMER] = "TCP timer",
|
||||||
[EPOLL_TYPE_UDP] = "UDP socket",
|
[EPOLL_TYPE_UDP] = "UDP socket",
|
||||||
[EPOLL_TYPE_ICMP] = "ICMP socket",
|
[EPOLL_TYPE_ICMP] = "ICMP socket",
|
||||||
[EPOLL_TYPE_ICMPV6] = "ICMPv6 socket",
|
[EPOLL_TYPE_ICMPV6] = "ICMPv6 socket",
|
||||||
[EPOLL_TYPE_NSQUIT] = "namespace inotify",
|
[EPOLL_TYPE_NSQUIT_INOTIFY] = "namespace inotify watch",
|
||||||
[EPOLL_TYPE_TAP_PASTA] = "/dev/net/tun device",
|
[EPOLL_TYPE_NSQUIT_TIMER] = "namespace timer watch",
|
||||||
[EPOLL_TYPE_TAP_PASST] = "connected qemu socket",
|
[EPOLL_TYPE_TAP_PASTA] = "/dev/net/tun device",
|
||||||
[EPOLL_TYPE_TAP_LISTEN] = "listening qemu socket",
|
[EPOLL_TYPE_TAP_PASST] = "connected qemu socket",
|
||||||
|
[EPOLL_TYPE_TAP_LISTEN] = "listening qemu socket",
|
||||||
};
|
};
|
||||||
static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
|
static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
|
||||||
"epoll_type_str[] doesn't match enum epoll_type");
|
"epoll_type_str[] doesn't match enum epoll_type");
|
||||||
|
@ -201,7 +202,7 @@ void exit_handler(int signal)
|
||||||
*/
|
*/
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
int nfds, i, devnull_fd = -1, pidfile_fd = -1, quit_fd;
|
int nfds, i, devnull_fd = -1, pidfile_fd = -1;
|
||||||
struct epoll_event events[EPOLL_EVENTS];
|
struct epoll_event events[EPOLL_EVENTS];
|
||||||
char *log_name, argv0[PATH_MAX], *name;
|
char *log_name, argv0[PATH_MAX], *name;
|
||||||
struct ctx c = { 0 };
|
struct ctx c = { 0 };
|
||||||
|
@ -274,7 +275,7 @@ int main(int argc, char **argv)
|
||||||
if (c.force_stderr || isatty(fileno(stdout)))
|
if (c.force_stderr || isatty(fileno(stdout)))
|
||||||
__openlog(log_name, LOG_PERROR, LOG_DAEMON);
|
__openlog(log_name, LOG_PERROR, LOG_DAEMON);
|
||||||
|
|
||||||
quit_fd = pasta_netns_quit_init(&c);
|
pasta_netns_quit_init(&c);
|
||||||
|
|
||||||
tap_sock_init(&c);
|
tap_sock_init(&c);
|
||||||
|
|
||||||
|
@ -370,8 +371,11 @@ loop:
|
||||||
case EPOLL_TYPE_TAP_LISTEN:
|
case EPOLL_TYPE_TAP_LISTEN:
|
||||||
tap_listen_handler(&c, eventmask);
|
tap_listen_handler(&c, eventmask);
|
||||||
break;
|
break;
|
||||||
case EPOLL_TYPE_NSQUIT:
|
case EPOLL_TYPE_NSQUIT_INOTIFY:
|
||||||
pasta_netns_quit_handler(&c, quit_fd);
|
pasta_netns_quit_inotify_handler(&c, ref.fd);
|
||||||
|
break;
|
||||||
|
case EPOLL_TYPE_NSQUIT_TIMER:
|
||||||
|
pasta_netns_quit_timer_handler(&c, ref);
|
||||||
break;
|
break;
|
||||||
case EPOLL_TYPE_TCP:
|
case EPOLL_TYPE_TCP:
|
||||||
tcp_sock_handler(&c, ref, eventmask);
|
tcp_sock_handler(&c, ref, eventmask);
|
||||||
|
|
6
passt.h
6
passt.h
|
@ -64,7 +64,9 @@ enum epoll_type {
|
||||||
/* ICMPv6 sockets */
|
/* ICMPv6 sockets */
|
||||||
EPOLL_TYPE_ICMPV6,
|
EPOLL_TYPE_ICMPV6,
|
||||||
/* inotify fd watching for end of netns (pasta) */
|
/* inotify fd watching for end of netns (pasta) */
|
||||||
EPOLL_TYPE_NSQUIT,
|
EPOLL_TYPE_NSQUIT_INOTIFY,
|
||||||
|
/* timer fd watching for end of netns, fallback for inotify (pasta) */
|
||||||
|
EPOLL_TYPE_NSQUIT_TIMER,
|
||||||
/* tuntap character device */
|
/* tuntap character device */
|
||||||
EPOLL_TYPE_TAP_PASTA,
|
EPOLL_TYPE_TAP_PASTA,
|
||||||
/* socket connected to qemu */
|
/* socket connected to qemu */
|
||||||
|
@ -84,6 +86,7 @@ enum epoll_type {
|
||||||
* @udp: UDP-specific reference part
|
* @udp: UDP-specific reference part
|
||||||
* @icmp: ICMP-specific reference part
|
* @icmp: ICMP-specific reference part
|
||||||
* @data: Data handled by protocol handlers
|
* @data: Data handled by protocol handlers
|
||||||
|
* @nsdir_fd: netns dirfd for fallback timer checking if namespace is gone
|
||||||
* @u64: Opaque reference for epoll_ctl() and epoll_wait()
|
* @u64: Opaque reference for epoll_ctl() and epoll_wait()
|
||||||
*/
|
*/
|
||||||
union epoll_ref {
|
union epoll_ref {
|
||||||
|
@ -99,6 +102,7 @@ union epoll_ref {
|
||||||
union udp_epoll_ref udp;
|
union udp_epoll_ref udp;
|
||||||
union icmp_epoll_ref icmp;
|
union icmp_epoll_ref icmp;
|
||||||
uint32_t data;
|
uint32_t data;
|
||||||
|
int nsdir_fd;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
uint64_t u64;
|
uint64_t u64;
|
||||||
|
|
111
pasta.c
111
pasta.c
|
@ -30,6 +30,7 @@
|
||||||
#include <sys/epoll.h>
|
#include <sys/epoll.h>
|
||||||
#include <sys/inotify.h>
|
#include <sys/inotify.h>
|
||||||
#include <sys/mount.h>
|
#include <sys/mount.h>
|
||||||
|
#include <sys/timerfd.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
@ -357,46 +358,78 @@ void pasta_ns_conf(struct ctx *c)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pasta_netns_quit_init() - Watch network namespace to quit once it's gone
|
* pasta_netns_quit_timer() - Set up fallback timer to monitor namespace
|
||||||
* @c: Execution context
|
|
||||||
*
|
*
|
||||||
* Return: inotify file descriptor, -1 on failure or if not needed/applicable
|
* Return: timerfd file descriptor, negative error code on failure
|
||||||
*/
|
*/
|
||||||
int pasta_netns_quit_init(const struct ctx *c)
|
static int pasta_netns_quit_timer(void)
|
||||||
{
|
{
|
||||||
int flags = O_NONBLOCK | O_CLOEXEC;
|
int fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
|
||||||
union epoll_ref ref = { .type = EPOLL_TYPE_NSQUIT };
|
struct itimerspec it = { { 1, 0 }, { 1, 0 } }; /* one-second interval */
|
||||||
struct epoll_event ev = {
|
|
||||||
.events = EPOLLIN
|
|
||||||
};
|
|
||||||
int inotify_fd;
|
|
||||||
|
|
||||||
if (c->mode != MODE_PASTA || c->no_netns_quit || !*c->netns_base)
|
if (fd == -1) {
|
||||||
return -1;
|
err("timerfd_create(): %s", strerror(errno));
|
||||||
|
return -errno;
|
||||||
if ((inotify_fd = inotify_init1(flags)) < 0) {
|
|
||||||
perror("inotify_init(): won't quit once netns is gone");
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inotify_add_watch(inotify_fd, c->netns_dir, IN_DELETE) < 0) {
|
if (timerfd_settime(fd, 0, &it, NULL) < 0) {
|
||||||
perror("inotify_add_watch(): won't quit once netns is gone");
|
err("timerfd_settime(): %s", strerror(errno));
|
||||||
return -1;
|
close(fd);
|
||||||
|
return -errno;
|
||||||
}
|
}
|
||||||
|
|
||||||
ref.fd = inotify_fd;
|
return fd;
|
||||||
ev.data.u64 = ref.u64;
|
|
||||||
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, inotify_fd, &ev);
|
|
||||||
|
|
||||||
return inotify_fd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pasta_netns_quit_handler() - Handle ns directory events, exit if ns is gone
|
* pasta_netns_quit_init() - Watch network namespace to quit once it's gone
|
||||||
|
* @c: Execution context
|
||||||
|
*/
|
||||||
|
void pasta_netns_quit_init(const struct ctx *c)
|
||||||
|
{
|
||||||
|
union epoll_ref ref = { .type = EPOLL_TYPE_NSQUIT_INOTIFY };
|
||||||
|
struct epoll_event ev = { .events = EPOLLIN };
|
||||||
|
int flags = O_NONBLOCK | O_CLOEXEC;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
if (c->mode != MODE_PASTA || c->no_netns_quit || !*c->netns_base)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if ((fd = inotify_init1(flags)) < 0)
|
||||||
|
warn("inotify_init1(): %s, use a timer", strerror(errno));
|
||||||
|
|
||||||
|
if (fd >= 0 && inotify_add_watch(fd, c->netns_dir, IN_DELETE) < 0) {
|
||||||
|
warn("inotify_add_watch(): %s, use a timer",
|
||||||
|
strerror(errno));
|
||||||
|
close(fd);
|
||||||
|
fd = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fd < 0) {
|
||||||
|
if ((fd = pasta_netns_quit_timer()) < 0)
|
||||||
|
die("Failed to set up fallback netns timer, exiting");
|
||||||
|
|
||||||
|
ref.nsdir_fd = open(c->netns_dir, O_CLOEXEC | O_RDONLY);
|
||||||
|
if (ref.nsdir_fd < 0)
|
||||||
|
die("netns dir open: %s, exiting", strerror(errno));
|
||||||
|
|
||||||
|
ref.type = EPOLL_TYPE_NSQUIT_TIMER;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fd > FD_REF_MAX)
|
||||||
|
die("netns monitor file number %i too big, exiting", fd);
|
||||||
|
|
||||||
|
ref.fd = fd;
|
||||||
|
ev.data.u64 = ref.u64;
|
||||||
|
epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pasta_netns_quit_inotify_handler() - Handle inotify watch, exit if ns is gone
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @inotify_fd: inotify file descriptor with watch on namespace directory
|
* @inotify_fd: inotify file descriptor with watch on namespace directory
|
||||||
*/
|
*/
|
||||||
void pasta_netns_quit_handler(struct ctx *c, int inotify_fd)
|
void pasta_netns_quit_inotify_handler(struct ctx *c, int inotify_fd)
|
||||||
{
|
{
|
||||||
char buf[sizeof(struct inotify_event) + NAME_MAX + 1];
|
char buf[sizeof(struct inotify_event) + NAME_MAX + 1];
|
||||||
const struct inotify_event *in_ev = (struct inotify_event *)buf;
|
const struct inotify_event *in_ev = (struct inotify_event *)buf;
|
||||||
|
@ -410,3 +443,29 @@ void pasta_netns_quit_handler(struct ctx *c, int inotify_fd)
|
||||||
info("Namespace %s is gone, exiting", c->netns_base);
|
info("Namespace %s is gone, exiting", c->netns_base);
|
||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pasta_netns_quit_timer_handler() - Handle timer, exit if ns is gone
|
||||||
|
* @c: Execution context
|
||||||
|
* @ref: epoll reference for timer descriptor
|
||||||
|
*/
|
||||||
|
void pasta_netns_quit_timer_handler(struct ctx *c, union epoll_ref ref)
|
||||||
|
{
|
||||||
|
uint64_t expirations;
|
||||||
|
ssize_t n;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
n = read(ref.fd, &expirations, sizeof(expirations));
|
||||||
|
if (n < 0)
|
||||||
|
die("Namespace watch timer read() error: %s", strerror(errno));
|
||||||
|
if ((size_t)n < sizeof(expirations))
|
||||||
|
warn("Namespace watch timer: short read(): %zi", n);
|
||||||
|
|
||||||
|
fd = openat(ref.nsdir_fd, c->netns_base, O_PATH | O_CLOEXEC);
|
||||||
|
if (fd < 0) {
|
||||||
|
info("Namespace %s is gone, exiting", c->netns_base);
|
||||||
|
exit(EXIT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
|
5
pasta.h
5
pasta.h
|
@ -13,7 +13,8 @@ void pasta_start_ns(struct ctx *c, uid_t uid, gid_t gid,
|
||||||
int argc, char *argv[]);
|
int argc, char *argv[]);
|
||||||
void pasta_ns_conf(struct ctx *c);
|
void pasta_ns_conf(struct ctx *c);
|
||||||
void pasta_child_handler(int signal);
|
void pasta_child_handler(int signal);
|
||||||
int pasta_netns_quit_init(const struct ctx *c);
|
void pasta_netns_quit_init(const struct ctx *c);
|
||||||
void pasta_netns_quit_handler(struct ctx *c, int inotify_fd);
|
void pasta_netns_quit_inotify_handler(struct ctx *c, int inotify_fd);
|
||||||
|
void pasta_netns_quit_timer_handler(struct ctx *c, union epoll_ref ref);
|
||||||
|
|
||||||
#endif /* PASTA_H */
|
#endif /* PASTA_H */
|
||||||
|
|
Loading…
Reference in a new issue