passt, pasta: Completely avoid dynamic memory allocation

Replace libc functions that might dynamically allocate memory with own
implementations or wrappers.

Drop brk(2) from list of allowed syscalls in seccomp profile.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
Stefano Brivio 2021-10-14 01:21:29 +02:00
parent 66d5930ec7
commit 32d07f5e59
6 changed files with 389 additions and 28 deletions

10
conf.c
View file

@ -275,12 +275,11 @@ overlap:
*/ */
static void get_dns(struct ctx *c) static void get_dns(struct ctx *c)
{ {
int dns4_set, dns6_set, dnss_set, dns_set; int dns4_set, dns6_set, dnss_set, dns_set, fd;
struct in6_addr *dns6 = &c->dns6[0]; struct in6_addr *dns6 = &c->dns6[0];
struct fqdn *s = c->dns_search; struct fqdn *s = c->dns_search;
uint32_t *dns4 = &c->dns4[0]; uint32_t *dns4 = &c->dns4[0];
char buf[BUFSIZ], *p, *end; char buf[BUFSIZ], *p, *end;
FILE *r;
dns4_set = !c->v4 || !!*dns4; dns4_set = !c->v4 || !!*dns4;
dns6_set = !c->v6 || !IN6_IS_ADDR_UNSPECIFIED(dns6); dns6_set = !c->v6 || !IN6_IS_ADDR_UNSPECIFIED(dns6);
@ -290,11 +289,10 @@ static void get_dns(struct ctx *c)
if (dns_set && dnss_set) if (dns_set && dnss_set)
return; return;
r = fopen("/etc/resolv.conf", "r"); if ((fd = open("/etc/resolv.conf", O_RDONLY)) < 0)
if (!r)
goto out; goto out;
while (fgets(buf, BUFSIZ, r)) { while (line_read(buf, BUFSIZ, fd)) {
if (!dns_set && strstr(buf, "nameserver ") == buf) { if (!dns_set && strstr(buf, "nameserver ") == buf) {
p = strrchr(buf, ' '); p = strrchr(buf, ' ');
if (!p) if (!p)
@ -333,7 +331,7 @@ static void get_dns(struct ctx *c)
} }
} }
fclose(r); close(fd);
out: out:
if (!dns_set && dns4 == c->dns4 && dns6 == c->dns6) if (!dns_set && dns4 == c->dns4 && dns6 == c->dns6)

View file

@ -588,10 +588,10 @@ int dhcpv6(struct ctx *c, struct ethhdr *eh, size_t len)
*/ */
void dhcpv6_init(struct ctx *c) void dhcpv6_init(struct ctx *c)
{ {
struct tm y2k = { 0, 0, 0, 1, 0, 100, 0, 0, 0, 0, NULL }; time_t y2k = 946684800; /* Epoch to 2000-01-01T00:00:00Z, no mktime() */
uint32_t duid_time; uint32_t duid_time;
duid_time = htonl(difftime(time(NULL), mktime(&y2k))); duid_time = htonl(difftime(time(NULL), y2k));
resp.server_id.duid_time = duid_time; resp.server_id.duid_time = duid_time;
resp_not_on_link.server_id.duid_time = duid_time; resp_not_on_link.server_id.duid_time = duid_time;

16
passt.c
View file

@ -192,10 +192,10 @@ static void seccomp(struct ctx *c)
* *
* Return: 0 once interrupted, non-zero on failure * Return: 0 once interrupted, non-zero on failure
* *
* #syscalls read write open close fork dup2 exit chdir brk ioctl writev syslog * #syscalls read write open close fork dup2 exit chdir ioctl writev syslog
* #syscalls prlimit64 epoll_ctl epoll_create1 epoll_wait accept4 accept listen * #syscalls prlimit64 epoll_ctl epoll_create1 epoll_wait accept4 accept listen
* #syscalls socket bind connect getsockopt setsockopt recvfrom sendto shutdown * #syscalls socket bind connect getsockopt setsockopt recvfrom sendto shutdown
* #syscalls openat fstat fcntl lseek * #syscalls openat fstat fcntl lseek clone setsid exit_group
* #syscalls:pasta rt_sigreturn * #syscalls:pasta rt_sigreturn
*/ */
int main(int argc, char **argv) int main(int argc, char **argv)
@ -226,16 +226,16 @@ int main(int argc, char **argv)
if (madvise(pkt_buf, TAP_BUF_BYTES, MADV_HUGEPAGE)) if (madvise(pkt_buf, TAP_BUF_BYTES, MADV_HUGEPAGE))
perror("madvise"); perror("madvise");
openlog(log_name, 0, LOG_DAEMON); __openlog(log_name, 0, LOG_DAEMON);
setlogmask(LOG_MASK(LOG_EMERG)); __setlogmask(LOG_MASK(LOG_EMERG));
conf(&c, argc, argv); conf(&c, argc, argv);
seccomp(&c); seccomp(&c);
if (!c.debug && (c.stderr || isatty(fileno(stdout)))) if (!c.debug && (c.stderr || isatty(fileno(stdout))))
openlog(log_name, LOG_PERROR, LOG_DAEMON); __openlog(log_name, LOG_PERROR, LOG_DAEMON);
c.epollfd = epoll_create1(0); c.epollfd = epoll_create1(0);
if (c.epollfd == -1) { if (c.epollfd == -1) {
@ -271,11 +271,11 @@ int main(int argc, char **argv)
dhcpv6_init(&c); dhcpv6_init(&c);
if (c.debug) if (c.debug)
setlogmask(LOG_UPTO(LOG_DEBUG)); __setlogmask(LOG_UPTO(LOG_DEBUG));
else if (c.quiet) else if (c.quiet)
setlogmask(LOG_UPTO(LOG_ERR)); __setlogmask(LOG_UPTO(LOG_ERR));
else else
setlogmask(LOG_UPTO(LOG_INFO)); __setlogmask(LOG_UPTO(LOG_INFO));
if (isatty(fileno(stdout)) && !c.foreground) if (isatty(fileno(stdout)) && !c.foreground)
daemon(0, 0); daemon(0, 0);

253
pasta.c Normal file
View file

@ -0,0 +1,253 @@
// SPDX-License-Identifier: AGPL-3.0-or-later
/* PASST - Plug A Simple Socket Transport
* for qemu/UNIX domain socket mode
*
* PASTA - Pack A Subtle Tap Abstraction
* for network namespace/tap device mode
*
* pasta.c - pasta (namespace) specific implementations
*
* Copyright (c) 2020-2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
*
* #syscalls:pasta clone unshare waitid kill execve exit_group rt_sigprocmask
* #syscalls:pasta geteuid getdents64 readlink setsid
*/
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <syslog.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <signal.h>
#include <dirent.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <net/ethernet.h>
#include <sys/syscall.h>
#include "util.h"
#include "passt.h"
#include "netlink.h"
/* PID of child, in case we created a namespace, and its procfs link */
static int pasta_child_pid;
static char pasta_child_ns[PATH_MAX];
/**
* pasta_ns_cleanup() - Look for processes in namespace, terminate them
*/
static void pasta_ns_cleanup(void)
{
char proc_path[PATH_MAX], ns_link[PATH_MAX], buf[BUFSIZ];
int recheck = 0, found = 0, waited = 0;
int dir_fd, n;
if (!*pasta_child_ns)
return;
loop:
if ((dir_fd = open("/proc", O_RDONLY | O_DIRECTORY)) < 0)
return;
while ((n = syscall(SYS_getdents64, dir_fd, buf, BUFSIZ)) > 0) {
struct dirent *dp = (struct dirent *)buf;
int pos = 0;
while (pos < n) {
pid_t pid;
errno = 0;
pid = strtol(dp->d_name, NULL, 0);
if (!pid || errno)
goto next;
snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net", pid);
if (readlink(proc_path, ns_link, PATH_MAX) < 0)
goto next;
if (!strncmp(ns_link, pasta_child_ns, PATH_MAX)) {
found = 1;
if (waited)
kill(pid, SIGKILL);
else
kill(pid, SIGQUIT);
}
next:
dp = (struct dirent *)(buf + (pos += dp->d_reclen));
}
}
close(dir_fd);
if (!found)
return;
if (waited) {
if (recheck) {
info("Some processes in namespace didn't quit");
} else {
found = 0;
recheck = 1;
goto loop;
}
return;
}
info("Waiting for all processes in namespace to terminate");
sleep(1);
waited = 1;
goto loop;
}
/**
* pasta_child_handler() - Exit once shell exits (if we started it), reap clones
* @signal: Unused, handler deals with SIGCHLD only
*/
void pasta_child_handler(int signal)
{
siginfo_t infop;
(void)signal;
if (pasta_child_pid &&
!waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) {
if (infop.si_pid == pasta_child_pid) {
pasta_ns_cleanup();
exit(EXIT_SUCCESS);
}
}
waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
waitid(P_ALL, 0, NULL, WEXITED | WNOHANG);
}
/**
* pasta_wait_for_ns() - Busy loop until we can enter the target namespace
* @arg: Execution context
*
* Return: 0
*/
static int pasta_wait_for_ns(void *arg)
{
struct ctx *c = (struct ctx *)arg;
char ns[PATH_MAX];
if (c->netns_only)
goto netns;
snprintf(ns, PATH_MAX, "/proc/%i/ns/user", pasta_child_pid);
do
while ((c->pasta_userns_fd = open(ns, O_RDONLY)) < 0);
while (setns(c->pasta_userns_fd, 0) && !close(c->pasta_userns_fd));
netns:
snprintf(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid);
do
while ((c->pasta_netns_fd = open(ns, O_RDONLY)) < 0);
while (setns(c->pasta_netns_fd, 0) && !close(c->pasta_netns_fd));
return 0;
}
/**
* pasta_start_ns() - Fork shell in new namespace if target ns is not given
* @c: Execution context
*/
void pasta_start_ns(struct ctx *c)
{
char buf[BUFSIZ], *shell, proc_path[PATH_MAX];
int euid = geteuid();
int fd;
c->foreground = 1;
if (!c->debug)
c->quiet = 1;
if ((pasta_child_pid = fork()) == -1) {
perror("fork");
exit(EXIT_FAILURE);
}
if (pasta_child_pid) {
NS_CALL(pasta_wait_for_ns, c);
snprintf(proc_path, PATH_MAX, "/proc/%i/ns/net",
pasta_child_pid);
readlink(proc_path, pasta_child_ns, PATH_MAX);
return;
}
if (unshare(CLONE_NEWNET | (c->netns_only ? 0 : CLONE_NEWUSER))) {
perror("unshare");
exit(EXIT_FAILURE);
}
if (!c->netns_only) {
snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1);
fd = open("/proc/self/uid_map", O_WRONLY);
write(fd, buf, strlen(buf));
close(fd);
fd = open("/proc/self/setgroups", O_WRONLY);
write(fd, "deny", sizeof("deny"));
close(fd);
fd = open("/proc/self/gid_map", O_WRONLY);
write(fd, buf, strlen(buf));
close(fd);
}
fd = open("/proc/sys/net/ipv4/ping_group_range", O_WRONLY);
write(fd, "0 0", strlen("0 0"));
close(fd);
shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh";
if (strstr(shell, "/bash"))
execve(shell, ((char *[]) { shell, "-l", NULL }), environ);
else
execve(shell, ((char *[]) { shell, NULL }), environ);
perror("execve");
exit(EXIT_FAILURE);
}
/**
* pasta_ns_conf() - Set up loopback and tap interfaces in namespace as needed
* @c: Execution context
*/
void pasta_ns_conf(struct ctx *c)
{
nl_link(1, 1 /* lo */, MAC_ZERO, 1);
if (c->pasta_conf_ns) {
nl_link(1, c->pasta_ifi, c->mac_guest, 1);
if (c->v4) {
nl_addr(1, c->pasta_ifi, AF_INET, &c->addr4,
__builtin_popcount(c->mask4), NULL);
nl_route(1, c->pasta_ifi, AF_INET, &c->gw4);
}
if (c->v6) {
nl_addr(1, c->pasta_ifi, AF_INET6, &c->addr6, 64, NULL);
nl_route(1, c->pasta_ifi, AF_INET6, &c->gw6);
}
} else {
nl_link(1, c->pasta_ifi, c->mac_guest, 0);
}
proto_update_l2_buf(c->mac_guest, NULL, NULL);
}

129
util.c
View file

@ -37,24 +37,27 @@
#include "util.h" #include "util.h"
#include "passt.h" #include "passt.h"
/* For __openlog() and __setlogmask() wrappers, and __vsyslog() (replacement) */
static int log_mask;
static int log_sock = -1;
static char log_ident[BUFSIZ];
static int log_opt;
static time_t log_debug_start;
#define logfn(name, level) \ #define logfn(name, level) \
void name(const char *format, ...) { \ void name(const char *format, ...) { \
char ts[sizeof("Mmm dd hh:mm:ss.")]; \
struct timespec tp; \ struct timespec tp; \
struct tm *tm; \
va_list args; \ va_list args; \
\ \
if (setlogmask(0) & LOG_MASK(LOG_DEBUG)) { \ if (setlogmask(0) & LOG_MASK(LOG_DEBUG)) { \
clock_gettime(CLOCK_REALTIME, &tp); \ clock_gettime(CLOCK_REALTIME, &tp); \
tm = gmtime(&tp.tv_sec); \ fprintf(stderr, "%lu.%04lu: ", \
strftime(ts, sizeof(ts), "%b %d %T.", tm); \ tp.tv_sec - log_debug_start, \
\
fprintf(stderr, "%s%04lu: ", ts, \
tp.tv_nsec / (100 * 1000)); \ tp.tv_nsec / (100 * 1000)); \
} \ } \
\ \
va_start(args, format); \ va_start(args, format); \
vsyslog(level, format, args); \ __vsyslog(level, format, args); \
va_end(args); \ va_end(args); \
\ \
if (setlogmask(0) & LOG_MASK(LOG_DEBUG) || \ if (setlogmask(0) & LOG_MASK(LOG_DEBUG) || \
@ -72,6 +75,79 @@ logfn(warn, LOG_WARNING)
logfn(info, LOG_INFO) logfn(info, LOG_INFO)
logfn(debug, LOG_DEBUG) logfn(debug, LOG_DEBUG)
/**
* __openlog() - Non-optional openlog() wrapper, to allow custom vsyslog()
* @ident: openlog() identity (program name)
* @option: openlog() options
* @facility: openlog() facility (LOG_DAEMON)
*/
void __openlog(const char *ident, int option, int facility)
{
struct timespec tp;
clock_gettime(CLOCK_REALTIME, &tp);
log_debug_start = tp.tv_sec;
if (log_sock < 0) {
struct sockaddr_un a = { .sun_family = AF_UNIX, };
log_sock = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (log_sock < 0)
return;
strncpy(a.sun_path, _PATH_LOG, sizeof(a.sun_path));
if (connect(log_sock, (const struct sockaddr *)&a, sizeof(a))) {
close(log_sock);
log_sock = -1;
return;
}
}
log_mask |= facility;
strncpy(log_ident, ident, sizeof(log_ident) - 1);
log_opt = option;
openlog(ident, option, facility);
}
/**
* __setlogmask() - setlogmask() wrapper, to allow custom vsyslog()
* @mask: Same as setlogmask() mask
*/
void __setlogmask(int mask)
{
log_mask = mask;
setlogmask(mask);
}
/**
* __vsyslog() - vsyslog() implementation not using heap memory
* @pri: Facility and level map, same as priority for vsyslog()
* @format: Same as vsyslog() format
* @ap: Same as vsyslog() ap
*/
void __vsyslog(int pri, const char *format, va_list ap)
{
char buf[BUFSIZ];
int n;
if (!(LOG_MASK(LOG_PRI(pri)) & log_mask))
return;
/* Send without name and timestamp, the system logger should add them */
n = snprintf(buf, BUFSIZ, "<%i> ", pri);
n += vsnprintf(buf + n, BUFSIZ - n, format, ap);
if (format[strlen(format)] != '\n')
n += snprintf(buf + n, BUFSIZ - n, "\n");
if (log_opt | LOG_PERROR)
fprintf(stderr, buf + sizeof("<0>"));
send(log_sock, buf, n, 0);
}
/** /**
* ipv6_l4hdr() - Find pointer to L4 header in IPv6 packet and extract protocol * ipv6_l4hdr() - Find pointer to L4 header in IPv6 packet and extract protocol
* @ip6h: IPv6 header * @ip6h: IPv6 header
@ -291,6 +367,35 @@ int bitmap_isset(uint8_t *map, int bit)
return map[bit / 8] & (1 << bit % 8); return map[bit / 8] & (1 << bit % 8);
} }
/**
* line_read() - Same as fgets(), without using heap, a file instead of a stream
* @buf: Read buffer
* @len: Maximum line length
* @fd: File descriptor for reading
*
* Return: @buf if a line is found, NULL on EOF or error
*/
char *line_read(char *buf, size_t len, int fd)
{
char *p;
int n;
n = read(fd, buf, --len);
if (n <= 0)
return NULL;
buf[len] = 0;
if (!(p = strchr(buf, '\n')))
return buf;
*p = 0;
if (p == buf)
return buf;
lseek(fd, (p - buf) - n + 1, SEEK_CUR);
return buf;
}
/** /**
* procfs_scan_listen() - Set bits for listening TCP or UDP sockets from procfs * procfs_scan_listen() - Set bits for listening TCP or UDP sockets from procfs
* @name: Corresponding name of file under /proc/net/ * @name: Corresponding name of file under /proc/net/
@ -302,14 +407,14 @@ void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude)
char line[200], path[PATH_MAX]; char line[200], path[PATH_MAX];
unsigned long port; unsigned long port;
unsigned int state; unsigned int state;
FILE *fp; int fd;
snprintf(path, PATH_MAX, "/proc/net/%s", name); snprintf(path, PATH_MAX, "/proc/net/%s", name);
if (!(fp = fopen(path, "r"))) if ((fd = open(path, O_RDONLY)) < 0)
return; return;
fgets(line, sizeof(line), fp); line_read(line, sizeof(line), fd);
while (fgets(line, sizeof(line), fp)) { while (line_read(line, sizeof(line), fd)) {
if (sscanf(line, "%*u: %*x:%lx %*x:%*x %x", &port, &state) != 2) if (sscanf(line, "%*u: %*x:%lx %*x:%*x %x", &port, &state) != 2)
continue; continue;
@ -324,7 +429,7 @@ void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude)
bitmap_set(map, port); bitmap_set(map, port);
} }
fclose(fp); close(fd);
} }
/** /**

5
util.h
View file

@ -133,6 +133,7 @@ enum {
#include <net/if.h> #include <net/if.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <limits.h> #include <limits.h>
#include <stdarg.h>
enum bind_type { enum bind_type {
BIND_ANY = 0, BIND_ANY = 0,
@ -143,6 +144,9 @@ enum bind_type {
struct ctx; struct ctx;
void __openlog(const char *ident, int option, int facility);
void __vsyslog(int pri, const char *fmt, va_list ap);
void __setlogmask(int mask);
char *ipv6_l4hdr(struct ipv6hdr *ip6h, uint8_t *proto); char *ipv6_l4hdr(struct ipv6hdr *ip6h, uint8_t *proto);
int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port, int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port,
enum bind_type bind_addr, uint32_t data); enum bind_type bind_addr, uint32_t data);
@ -151,5 +155,6 @@ int timespec_diff_ms(struct timespec *a, struct timespec *b);
void bitmap_set(uint8_t *map, int bit); void bitmap_set(uint8_t *map, int bit);
void bitmap_clear(uint8_t *map, int bit); void bitmap_clear(uint8_t *map, int bit);
int bitmap_isset(uint8_t *map, int bit); int bitmap_isset(uint8_t *map, int bit);
char *line_read(char *buf, size_t len, int fd);
void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude); void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude);
int ns_enter(struct ctx *c); int ns_enter(struct ctx *c);