passt, pasta: Add seccomp support

List of allowed syscalls comes from comments in the form:
	#syscalls <list>

for syscalls needed both in passt and pasta mode, and:
	#syscalls:pasta <list>
	#syscalls:passt <list>

for syscalls specifically needed in pasta or passt mode only.

seccomp.sh builds a list of BPF statements from those comments,
prefixed by a binary search tree to keep lookup fast.

While at it, clean up a bit the Makefile using wildcards.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
Stefano Brivio 2021-10-13 22:25:03 +02:00
parent f318174a93
commit 66d5930ec7
10 changed files with 259 additions and 9 deletions

View file

@ -2,6 +2,7 @@ CFLAGS += -Wall -Wextra -pedantic
CFLAGS += -DRLIMIT_STACK_VAL=$(shell ulimit -s)
CFLAGS += -DPAGE_SIZE=$(shell getconf PAGE_SIZE)
CFLAGS += -DNETNS_RUN_DIR=\"/run/netns\"
CFLAGS += -DPASST_AUDIT_ARCH=AUDIT_ARCH_$(shell uname -m | tr [a-z] [A-Z])
prefix ?= /usr/local
@ -13,14 +14,12 @@ avx2: clean all
static: CFLAGS += -static
static: clean all
passt: passt.c passt.h arp.c arp.h checksum.c checksum.h conf.c conf.h \
dhcp.c dhcp.h dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h \
netlink.c netlink.h pasta.c pasta.h siphash.c siphash.h tap.c tap.h \
icmp.c icmp.h tcp.c tcp.h udp.c udp.h util.c util.h
$(CC) $(CFLAGS) \
passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c pasta.c pcap.c \
ndp.c netlink.c siphash.c tap.c icmp.c tcp.c udp.c util.c \
-o passt
seccomp.h: *.c $(filter-out seccomp.h,$(wildcard *.h))
@ ./seccomp.sh
passt: $(filter-out qrap.c,$(wildcard *.c)) \
$(filter-out qrap.h,$(wildcard *.h)) seccomp.h
$(CC) $(CFLAGS) $(filter-out qrap.c,$(wildcard *.c)) -o passt
pasta: passt
ln -s passt pasta
@ -35,7 +34,7 @@ qrap: qrap.c passt.h
.PHONY: clean
clean:
-${RM} passt *.o qrap pasta pasta.1 passt4netns \
-${RM} passt *.o seccomp.h qrap pasta pasta.1 passt4netns \
passt.tar passt.tar.gz *.deb *.rpm
install: passt pasta qrap

2
conf.c
View file

@ -10,6 +10,8 @@
*
* Copyright (c) 2020-2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
*
* #syscalls stat
*/
#define _GNU_SOURCE

1
igmp.c
View file

@ -1 +1,2 @@
/* TO BE IMPLEMENTED */
__attribute__((__unused__)) static void __(void) { }

1
mld.c
View file

@ -1 +1,2 @@
/* TO BE IMPLEMENTED */
__attribute__((__unused__)) static void __(void) { }

36
passt.c
View file

@ -51,7 +51,12 @@
#include <time.h>
#include <syslog.h>
#include <sys/stat.h>
#include <seccomp.h>
#include <sys/prctl.h>
#include <linux/filter.h>
#include <stddef.h>
#include "seccomp.h"
#include "util.h"
#include "passt.h"
#include "dhcp.h"
@ -157,12 +162,41 @@ void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
udp_update_l2_buf(eth_d, eth_s, ip_da);
}
/**
* seccomp() - Set up seccomp filters depending on mode, won't return on failure
* @c: Execution context
*/
static void seccomp(struct ctx *c)
{
struct sock_fprog prog;
if (c->mode == MODE_PASST) {
prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
prog.filter = filter_passt;
} else {
prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
prog.filter = filter_pasta;
}
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
perror("prctl");
exit(EXIT_FAILURE);
}
}
/**
* main() - Entry point and main loop
* @argc: Argument count
* @argv: Options, plus optional target PID for pasta mode
*
* Return: 0 once interrupted, non-zero on failure
*
* #syscalls read write open close fork dup2 exit chdir brk ioctl writev syslog
* #syscalls prlimit64 epoll_ctl epoll_create1 epoll_wait accept4 accept listen
* #syscalls socket bind connect getsockopt setsockopt recvfrom sendto shutdown
* #syscalls openat fstat fcntl lseek
* #syscalls:pasta rt_sigreturn
*/
int main(int argc, char **argv)
{
@ -198,6 +232,8 @@ int main(int argc, char **argv)
conf(&c, argc, argv);
seccomp(&c);
if (!c.debug && (c.stderr || isatty(fileno(stdout))))
openlog(log_name, LOG_PERROR, LOG_DAEMON);

180
seccomp.sh Executable file
View file

@ -0,0 +1,180 @@
#!/bin/sh -eu
#
# SPDX-License-Identifier: AGPL-3.0-or-later
#
# PASST - Plug A Simple Socket Transport
# for qemu/UNIX domain socket mode
#
# PASTA - Pack A Subtle Tap Abstraction
# for network namespace/tap device mode
#
# seccomp.sh - Build seccomp profiles from "#syscalls[:PROFILE]" comments in code
#
# Copyright (c) 2021 Red Hat GmbH
# Author: Stefano Brivio <sbrivio@redhat.com>
TMP="$(mktemp)"
OUT="seccomp.h"
HEADER="/* This file was automatically generated by $(basename ${0}) */"
# Prefix for each profile: check that 'arch' in seccomp_data is matching
PRE='
struct sock_filter filter_@PROFILE@[] = {
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
(offsetof(struct seccomp_data, arch))),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, PASST_AUDIT_ARCH, 0, @KILL@),
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
(offsetof(struct seccomp_data, nr))),
'
# Suffix for each profile: return actions
POST=' BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
};
'
# Syscall, @NR@: number, @ALLOW@: offset to RET_ALLOW, @NAME@: syscall name
CALL=' BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, @NR@, @ALLOW@, 0), /* @NAME@ */'
# Binary search tree node or leaf, @NR@: value, @R@: right jump, @L@: left jump
BST=' BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, @NR@, @R@, @L@),'
# sub() - Substitute in-place file line with processed template line
# $1: Line number
# $@: Replacement for @KEY@ in the form KEY:value
sub() {
IFS=
__line_no="${1}"
__template="$(eval printf '%s' "\${${2}}")"
shift; shift
sed -i "${__line_no}s#.*#${__template}#" "${TMP}"
for __def in ${@}; do
__key="@${__def%%:*}@"
__value="${__def#*:}"
sed -i "${__line_no}s/${__key}/${__value}/" "${TMP}"
done
unset IFS
}
# finish() - Finalise header file from temporary files with prefix and suffix
# $1: Variable name of prefix
# $@: Replacements for prefix variable
finish() {
IFS=
__out="$(eval printf '%s' "\${${1}}")"
shift
for __def in ${@}; do
__key="@${__def%%:*}@"
__value="${__def#*:}"
__out="$(printf '%s' "${__out}" | sed "s#${__key}#${__value}#")"
done
printf '%s\n' "${__out}" >> "${OUT}"
cat "${TMP}" >> "${OUT}"
rm "${TMP}"
printf '%s' "${POST}" >> "${OUT}"
unset IFS
}
# log2() - Binary logarithm
# $1: Operand
log2() {
__x=-1
__y=${1}
while [ ${__y} -gt 0 ]; do : $((__y >>= 1)); __x=$((__x + 1)); done
echo ${__x}
}
# gen_profile() - Build struct sock_filter for a single profile
# $1: Profile name
# $@: Names of allowed system calls, amount padded to next power of two
gen_profile() {
__profile="${1}"
shift
__statements_calls=${#}
__bst_levels=$(log2 $(( __statements_calls / 4 )) )
__statements_bst=$(( __statements_calls / 4 - 1 ))
__statements=$((__statements_calls + __statements_bst))
for __i in $(seq 1 ${__statements_bst} ); do
echo -1 >> "${TMP}"
done
for __i in $(seq 1 ${__statements_calls} ); do
ausyscall $(eval echo \${${__i}}) --exact >> "${TMP}"
done
sort -go "${TMP}" "${TMP}"
__distance=$(( __statements_calls / 2 ))
__level_nodes=1
__ll=0
__line=1
for __level in $(seq 1 $(( __bst_levels - 1 )) ); do
# Nodes
__cmp_pos=${__distance}
for __node in $(seq 1 ${__level_nodes}); do
__cmp_line=$(( __statements_bst + __cmp_pos ))
__lr=$(( __ll + 1 ))
__nr="$(sed -n ${__cmp_line}p "${TMP}")"
sub ${__line} BST "NR:${__nr}" "L:${__ll}" "R:${__lr}"
__ll=${__lr}
__line=$(( __line + 1 ))
__cmp_pos=$(( __cmp_pos + __distance * 2 ))
done
__distance=$(( __distance / 2 ))
__level_nodes=$(( __level_nodes * 2 ))
done
# Leaves
__ll=$(( __level_nodes - 1 ))
__lr=$(( __ll + __distance - 1 ))
__cmp_pos=${__distance}
for __leaf in $(seq 1 ${__level_nodes}); do
__cmp_line=$(( __statements_bst + __cmp_pos ))
__nr="$(sed -n ${__cmp_line}p "${TMP}")"
sub ${__line} BST "NR:${__nr}" "L:${__ll}" "R:${__lr}"
__ll=$(( __lr + __distance - 1 ))
__lr=$(( __ll + __distance))
__line=$(( __line + 1 ))
__cmp_pos=$(( __cmp_pos + __distance * 2 ))
done
# Calls
for __i in $(seq $(( __statements_bst + 1 )) ${__statements}); do
__nr="$(sed -n ${__i}p "${TMP}")"
__name=$(ausyscall ${__nr})
__allow=$(( __statements - __i + 1 ))
sub ${__i} CALL "NR:${__nr}" "NAME:${__name}" "ALLOW:${__allow}"
done
finish PRE "PROFILE:${__profile}" "KILL:$(( __statements + 1))"
}
printf '%s\n' "${HEADER}" > "${OUT}"
__profiles="$(sed -n 's/[\t ]*\*[\t ]*#syscalls:\([^ ]*\).*/\1/p' *.[ch] | sort -u)"
for __p in ${__profiles}; do
__calls="$(sed -n 's/[\t ]*\*[\t ]*#syscalls\(:'"${__p}"'\|\)[\t ]\{1,\}\(.*\)/\2/p' *.[ch] | tr ' ' '\n' | sort -u)"
echo "seccomp profile ${__p} allows: ${__calls}" | tr '\n' ' ' | fmt -t
# Pad here to keep gen_profile() "simple"
__count=0
for __c in ${__calls}; do __count=$(( __count + 1 )); done
__padded=$(( 1 << (( $(log2 ${__count}) + 1 )) ))
for __i in $( seq ${__count} $(( __padded - 1 )) ); do
__calls="${__calls} tuxcall"
done
gen_profile "${__p}" ${__calls}
done

11
tap.c
View file

@ -10,6 +10,8 @@
*
* Copyright (c) 2020-2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
*
* #syscalls recvfrom sendto
*/
#define _GNU_SOURCE
@ -768,6 +770,8 @@ restart:
/**
* tap_sock_init_unix() - Create and bind AF_UNIX socket, wait for connection
* @c: Execution context
*
* #syscalls:passt unlink
*/
static void tap_sock_init_unix(struct ctx *c)
{
@ -819,8 +823,13 @@ static void tap_sock_init_unix(struct ctx *c)
}
info("UNIX domain socket bound at %s\n", addr.sun_path);
#ifdef PASST_LEGACY_NO_OPTIONS
/*
* syscalls:passt chmod
*/
chmod(addr.sun_path,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
#endif
pcap_init(c, i);
@ -850,6 +859,8 @@ static int tun_ns_fd = -1;
* @c: Execution context
*
* Return: 0
*
* #syscalls:pasta ioctl
*/
static int tap_ns_tun(void *arg)
{

11
tcp.c
View file

@ -303,6 +303,8 @@
* - SPLICE_FIN_FROM: FIN (EPOLLRDHUP) seen from originating socket
* - SPLICE_FIN_TO: FIN (EPOLLRDHUP) seen from connected socket
* - SPLICE_FIN_BOTH: FIN (EPOLLRDHUP) seen from both sides
*
* #syscalls pipe pipe2
*/
#define _GNU_SOURCE
@ -2078,6 +2080,9 @@ static void tcp_sock_consume(struct tcp_tap_conn *conn, uint32_t ack_seq)
* @now: Current timestamp
*
* Return: negative on connection reset, 0 otherwise
*
* #syscalls recvmsg
* #syscalls:passt sendmmsg sendmsg
*/
static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn,
struct timespec *now)
@ -2320,6 +2325,8 @@ out:
* @msg: Array of messages from tap
* @count: Count of messages
* @now: Current timestamp
*
* #syscalls sendmsg
*/
static void tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
struct tap_l4_msg *msg, int count,
@ -2965,6 +2972,8 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref,
* @c: Execution context
* @ref: epoll reference
* @events: epoll events bitmap
*
* #syscalls splice
*/
void tcp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
uint32_t events)
@ -3525,6 +3534,8 @@ static int tcp_sock_refill(void *arg)
* @c: Execution context
*
* Return: 0 on success, -1 on failure
*
* #syscalls getrandom
*/
int tcp_sock_init(struct ctx *c, struct timespec *now)
{

7
udp.c
View file

@ -419,6 +419,8 @@ static void udp_sock6_iov_init(void)
* @splice: UDP_BACK_TO_INIT from init, UDP_BACK_TO_NS from namespace
*
* Return: connected socket, negative error code on failure
*
* #syscalls:pasta getsockname
*/
int udp_splice_connect(struct ctx *c, int v6, int bound_sock,
in_port_t src, in_port_t dst, int splice)
@ -640,6 +642,9 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
* @ref: epoll reference
* @events: epoll events bitmap
* @now: Current timestamp
*
* #syscalls recvmmsg
* #syscalls:passt sendmmsg sendmsg
*/
void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
struct timespec *now)
@ -877,6 +882,8 @@ void udp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
* @now: Current timestamp
*
* Return: count of consumed packets
*
* #syscalls sendmmsg
*/
int udp_tap_handler(struct ctx *c, int af, void *addr,
struct tap_l4_msg *msg, int count, struct timespec *now)

2
util.c
View file

@ -332,6 +332,8 @@ void procfs_scan_listen(char *name, uint8_t *map, uint8_t *exclude)
* @c: Execution context
*
* Return: 0 on success, -1 on failure
*
* #syscalls:pasta setns
*/
int ns_enter(struct ctx *c)
{