From 33b1bdd079f1b40dffb040e40579d7434c28d10a Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 26 Jan 2022 06:55:28 +0100 Subject: [PATCH] seccomp: Add a number of alternate and per-arch syscalls Depending on the C library, but not necessarily in all the functions we use, statx() might be used instead of stat(), getdents() instead of getdents64(), readlinkat() instead of readlink(), openat() instead of open(). On aarch64, it's clone() and not fork(), and dup3() instead of dup2() -- just allow the existing alternative instead of dealing with per-arch selections. Since glibc commit 9a7565403758 ("posix: Consolidate fork implementation"), we need to allow set_robust_list() for fork()/clone(), even in a single-threaded context. On some architectures, epoll_pwait() is provided instead of epoll_wait(), but never both. Same with newfstat() and fstat(), sigreturn() and rt_sigreturn(), getdents64() and getdents(), readlink() and readlinkat(), unlink() and unlinkat(), whereas pipe() might not be available, but pipe2() always is, exclusively or not. Seen on Fedora 34: newfstatat() is used on top of fstat(). syslog() is an actual system call on some glibc/arch combinations, instead of a connect()/send() implementation. On ppc64 and ppc64le, _llseek(), recv(), send() and getuid() are used. For ppc64 only: ugetrlimit() for the getrlimit() implementation, plus sigreturn() and fcntl64(). On s390x, additionally, we need to allow socketcall() (on top of socket()), and sigreturn() also for passt (not just for pasta). Signed-off-by: Stefano Brivio --- README.md | 2 +- conf.c | 2 +- passt.c | 14 +++++++++----- pasta.c | 3 ++- tap.c | 2 +- tcp.c | 2 +- 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 8345656..ee689f5 100644 --- a/README.md +++ b/README.md @@ -233,7 +233,7 @@ speeding up local connections, and usually requiring NAT. _pasta_: * ✅ root operation not allowed outside user namespaces * ✅ all capabilities dropped, other than `CAP_NET_BIND_SERVICE` (if granted) * ✅ no external dependencies (other than a standard C library) -* ✅ restrictive seccomp profiles (46 syscalls allowed for _passt_, 58 for +* ✅ restrictive seccomp profiles (50 syscalls allowed for _passt_, 62 for _pasta_) * ✅ static checkers in continuous integration (clang-tidy, cppcheck) * 🛠️ rework of TCP state machine (flags instead of states), TCP timers, and code diff --git a/conf.c b/conf.c index 6810144..7859f25 100644 --- a/conf.c +++ b/conf.c @@ -11,7 +11,7 @@ * Copyright (c) 2020-2021 Red Hat GmbH * Author: Stefano Brivio * - * #syscalls stat + * #syscalls stat|statx */ #include diff --git a/passt.c b/passt.c index 4f2b896..3c9fb90 100644 --- a/passt.c +++ b/passt.c @@ -273,12 +273,16 @@ static void pid_file(struct ctx *c) { * * Return: non-zero on failure * - * #syscalls read write open close fork dup2 exit chdir ioctl writev syslog - * #syscalls prlimit64 epoll_ctl epoll_create1 epoll_wait accept4 accept listen + * #syscalls read write open|openat close fork|clone dup2|dup3 ioctl writev * #syscalls socket bind connect getsockopt setsockopt recvfrom sendto shutdown - * #syscalls openat fstat fcntl lseek clone setsid exit_group getpid - * #syscalls clock_gettime newfstatat - * #syscalls:pasta rt_sigreturn + * #syscalls accept4 accept listen set_robust_list getrlimit setrlimit + * #syscalls openat fcntl lseek clone setsid exit exit_group getpid chdir + * #syscalls epoll_ctl epoll_create1 epoll_wait|epoll_pwait epoll_pwait + * #syscalls prlimit64 clock_gettime fstat|newfstat newfstatat syslog + * #syscalls ppc64le:_llseek ppc64le:recv ppc64le:send ppc64le:getuid + * #syscalls ppc64:_llseek ppc64:recv ppc64:send ppc64:getuid ppc64:ugetrlimit + * #syscalls s390x:socketcall s390x:sigreturn + * #syscalls:pasta rt_sigreturn|sigreturn ppc64:sigreturn ppc64:fcntl64 */ int main(int argc, char **argv) { diff --git a/pasta.c b/pasta.c index 3928ad0..bce30d4 100644 --- a/pasta.c +++ b/pasta.c @@ -12,7 +12,8 @@ * Author: Stefano Brivio * * #syscalls:pasta clone unshare waitid kill execve exit_group rt_sigprocmask - * #syscalls:pasta geteuid getdents64 readlink setsid nanosleep clock_nanosleep + * #syscalls:pasta geteuid getdents64|getdents readlink|readlinkat setsid + * #syscalls:pasta nanosleep clock_nanosleep */ #include diff --git a/tap.c b/tap.c index d2f234d..2bf6f71 100644 --- a/tap.c +++ b/tap.c @@ -772,7 +772,7 @@ restart: * tap_sock_init_unix() - Create and bind AF_UNIX socket, wait for connection * @c: Execution context * - * #syscalls:passt unlink + * #syscalls:passt unlink|unlinkat */ static void tap_sock_init_unix(struct ctx *c) { diff --git a/tcp.c b/tcp.c index 36c2bb5..01f09e9 100644 --- a/tcp.c +++ b/tcp.c @@ -304,7 +304,7 @@ * - SPLICE_FIN_TO: FIN (EPOLLRDHUP) seen from connected socket * - SPLICE_FIN_BOTH: FIN (EPOLLRDHUP) seen from both sides * - * #syscalls pipe pipe2 + * #syscalls pipe|pipe2 pipe2 */ #include