passt, pasta: Introduce command-line options and port re-mapping

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
Stefano Brivio 2021-08-12 15:42:43 +02:00
parent 1b1b27c06a
commit 1e49d194d0
17 changed files with 1657 additions and 704 deletions

View file

@ -6,20 +6,15 @@ all: passt pasta passt4netns qrap
avx2: CFLAGS += -Ofast -mavx2 -ftree-vectorize -funroll-loops
avx2: clean all
avx2_debug: CFLAGS += -Ofast -mavx2 -ftree-vectorize -funroll-loops -DDEBUG -g
avx2_debug: clean all
static: CFLAGS += -static
static: clean all
debug: CFLAGS += -static -DDEBUG -g
debug: clean all
passt: passt.c passt.h arp.c arp.h checksum.c checksum.h dhcp.c dhcp.h \
dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h siphash.c siphash.h \
tap.c tap.h icmp.c icmp.h tcp.c tcp.h udp.c udp.h util.c util.h
$(CC) $(CFLAGS) passt.c arp.c checksum.c dhcp.c dhcpv6.c pcap.c ndp.c \
siphash.c tap.c icmp.c tcp.c udp.c util.c -o passt
passt: passt.c passt.h arp.c arp.h checksum.c checksum.h conf.c conf.h \
dhcp.c dhcp.h dhcpv6.c dhcpv6.h pcap.c pcap.h ndp.c ndp.h \
siphash.c siphash.h tap.c tap.h icmp.c icmp.h tcp.c tcp.h \
udp.c udp.h util.c util.h
$(CC) $(CFLAGS) passt.c arp.c checksum.c conf.c dhcp.c dhcpv6.c \
pcap.c ndp.c siphash.c tap.c icmp.c tcp.c udp.c util.c -o passt
pasta: passt
ln -s passt pasta

1163
conf.c Normal file

File diff suppressed because it is too large Load diff

1
conf.h Normal file
View file

@ -0,0 +1 @@
void conf(struct ctx *c, int argc, char **argv);

9
dhcp.c
View file

@ -272,6 +272,9 @@ int dhcp(struct ctx *c, struct ethhdr *eh, size_t len)
if (uh->dest != htons(67))
return 0;
if (c->no_dhcp)
return 1;
mlen = len - sizeof(*eh) - iph->ihl * 4 - sizeof(*uh);
if (mlen != ntohs(uh->len) - sizeof(*uh) ||
mlen < offsetof(struct msg, o) ||
@ -305,6 +308,12 @@ int dhcp(struct ctx *c, struct ethhdr *eh, size_t len)
*(unsigned long *)opts[3].s = c->gw4;
*(unsigned long *)opts[54].s = c->gw4;
if (c->mtu) {
opts[26].slen = 2;
opts[26].s[0] = c->mtu / 256;
opts[26].s[1] = c->mtu % 256;
}
for (i = 0, opts[6].slen = 0; c->dns4[i]; i++) {
((uint32_t *)opts[6].s)[i] = c->dns4[i];
opts[6].slen += sizeof(uint32_t);

View file

@ -461,6 +461,9 @@ int dhcpv6(struct ctx *c, struct ethhdr *eh, size_t len)
if (!uh || proto != IPPROTO_UDP || uh->dest != htons(547))
return 0;
if (c->no_dhcpv6)
return 1;
if (!IN6_IS_ADDR_MULTICAST(&ip6h->daddr))
return -1;

14
ndp.c
View file

@ -64,6 +64,9 @@ int ndp(struct ctx *c, struct ethhdr *eh, size_t len)
ih->icmp6_type < RS || ih->icmp6_type > NA)
return 0;
if (c->no_ndp)
return 1;
ehr = (struct ethhdr *)buf;
ip6hr = (struct ipv6hdr *)(ehr + 1);
ihr = (struct icmp6hdr *)(ip6hr + 1);
@ -91,6 +94,9 @@ int ndp(struct ctx *c, struct ethhdr *eh, size_t len)
size_t len = 0;
int i, n;
if (c->no_ra)
return 1;
info("NDP: received RS, sending RA");
ihr->icmp6_type = RA;
ihr->icmp6_code = 0;
@ -110,6 +116,14 @@ int ndp(struct ctx *c, struct ethhdr *eh, size_t len)
memcpy(p, &c->addr6, 8); /* prefix */
p += 16;
if (c->mtu) {
*p++ = 5; /* type */
*p++ = 1; /* length */
p += 2; /* reserved */
*(uint32_t *)p = htonl(c->mtu); /* MTU */
p += 4;
}
for (n = 0; !IN6_IS_ADDR_UNSPECIFIED(&c->dns6[n]); n++);
if (n) {
*p++ = 25; /* RDNSS */

532
passt.c
View file

@ -25,13 +25,13 @@
#include <sys/epoll.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/resource.h>
#include <sys/uio.h>
#include <ifaddrs.h>
#include <sys/wait.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <arpa/inet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
@ -46,8 +46,6 @@
#include <netdb.h>
#include <string.h>
#include <errno.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <time.h>
#include <syslog.h>
#include <sys/stat.h>
@ -60,6 +58,7 @@
#include "udp.h"
#include "pcap.h"
#include "tap.h"
#include "conf.h"
#define EPOLL_EVENTS 10
@ -68,7 +67,6 @@
char pkt_buf [PKT_BUF_BYTES];
#ifdef DEBUG
char *ip_proto_str[IPPROTO_SCTP + 1] = {
[IPPROTO_ICMP] = "ICMP",
[IPPROTO_TCP] = "TCP",
@ -76,318 +74,6 @@ char *ip_proto_str[IPPROTO_SCTP + 1] = {
[IPPROTO_ICMPV6] = "ICMPV6",
[IPPROTO_SCTP] = "SCTP",
};
#endif
/**
* struct nl_request - Netlink request filled and sent by get_routes()
* @nlh: Netlink message header
* @rtm: Routing Netlink message
*/
struct nl_request {
struct nlmsghdr nlh;
struct rtmsg rtm;
};
/**
* get_routes() - Get default route and fill in routable interface name
* @c: Execution context
*/
static void get_routes(struct ctx *c)
{
struct nl_request req = {
.nlh.nlmsg_type = RTM_GETROUTE,
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL,
.nlh.nlmsg_len = sizeof(struct nl_request),
.nlh.nlmsg_seq = 1,
.rtm.rtm_family = AF_INET,
.rtm.rtm_table = RT_TABLE_MAIN,
.rtm.rtm_scope = RT_SCOPE_UNIVERSE,
.rtm.rtm_type = RTN_UNICAST,
};
struct sockaddr_nl addr = {
.nl_family = AF_NETLINK,
};
struct nlmsghdr *nlh;
struct rtattr *rta;
struct rtmsg *rtm;
char buf[BUFSIZ];
int s, n, na;
c->v6 = -1;
s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (s < 0) {
perror("netlink socket");
goto out;
}
if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
perror("netlink bind");
goto out;
}
v6:
if (send(s, &req, sizeof(req), 0) < 0) {
perror("netlink send");
goto out;
}
n = recv(s, &buf, sizeof(buf), 0);
if (n < 0) {
perror("netlink recv");
goto out;
}
nlh = (struct nlmsghdr *)buf;
for ( ; NLMSG_OK(nlh, n); nlh = NLMSG_NEXT(nlh, n)) {
rtm = (struct rtmsg *)NLMSG_DATA(nlh);
if (rtm->rtm_dst_len ||
(rtm->rtm_family != AF_INET && rtm->rtm_family != AF_INET6))
continue;
rta = (struct rtattr *)RTM_RTA(rtm);
na = RTM_PAYLOAD(nlh);
for ( ; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
if (rta->rta_type == RTA_GATEWAY &&
rtm->rtm_family == AF_INET && !c->v4) {
memcpy(&c->gw4, RTA_DATA(rta), sizeof(c->gw4));
c->v4 = 1;
}
if (rta->rta_type == RTA_GATEWAY &&
rtm->rtm_family == AF_INET6 && !c->v6) {
memcpy(&c->gw6, RTA_DATA(rta), sizeof(c->gw6));
c->v6 = 1;
}
if (rta->rta_type == RTA_OIF && !*c->ifn) {
if_indextoname(*(unsigned *)RTA_DATA(rta),
c->ifn);
}
}
if (nlh->nlmsg_type == NLMSG_DONE)
break;
}
if (c->v6 == -1) {
c->v6 = 0;
req.rtm.rtm_family = AF_INET6;
req.nlh.nlmsg_seq++;
recv(s, &buf, sizeof(buf), 0);
goto v6;
}
out:
close(s);
if (!(c->v4 || c->v6) || !*c->ifn) {
err("No routing information");
exit(EXIT_FAILURE);
}
}
/**
* get_addrs() - Fetch MAC, IP addresses, masks of external routable interface
* @c: Execution context
*/
static void get_addrs(struct ctx *c)
{
struct ifreq ifr = {
.ifr_addr.sa_family = AF_INET,
};
struct ifaddrs *ifaddr, *ifa;
int s, v4 = 0, v6 = 0;
if (getifaddrs(&ifaddr) == -1) {
perror("getifaddrs");
goto out;
}
for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) {
struct sockaddr_in *in_addr;
struct sockaddr_in6 *in6_addr;
if (strcmp(ifa->ifa_name, c->ifn))
continue;
if (!ifa->ifa_addr)
continue;
if (ifa->ifa_addr->sa_family == AF_INET && !v4) {
in_addr = (struct sockaddr_in *)ifa->ifa_addr;
c->addr4_seen = c->addr4 = in_addr->sin_addr.s_addr;
in_addr = (struct sockaddr_in *)ifa->ifa_netmask;
c->mask4 = in_addr->sin_addr.s_addr;
v4 = 1;
} else if (ifa->ifa_addr->sa_family == AF_INET6 && !v6) {
in6_addr = (struct sockaddr_in6 *)ifa->ifa_addr;
memcpy(&c->addr6, &in6_addr->sin6_addr,
sizeof(c->addr6));
memcpy(&c->addr6_seen, &in6_addr->sin6_addr,
sizeof(c->addr6_seen));
memcpy(&c->addr6_ll_seen, &in6_addr->sin6_addr,
sizeof(c->addr6_seen));
v6 = 1;
}
if (v4 == c->v4 && v6 == c->v6)
break;
}
freeifaddrs(ifaddr);
if (v4 != c->v4 || v6 != c->v6)
goto out;
s = socket(AF_INET, SOCK_DGRAM, 0);
if (s < 0) {
perror("socket SIOCGIFHWADDR");
goto out;
}
strncpy(ifr.ifr_name, c->ifn, IF_NAMESIZE);
if (ioctl(s, SIOCGIFHWADDR, &ifr) < 0) {
perror("SIOCGIFHWADDR");
goto out;
}
close(s);
memcpy(c->mac, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
return;
out:
err("Couldn't get addresses for routable interface");
exit(EXIT_FAILURE);
}
/**
* get_dns() - Get nameserver addresses from local /etc/resolv.conf
* @c: Execution context
*/
static void get_dns(struct ctx *c)
{
struct in6_addr *dns6 = &c->dns6[0];
struct fqdn *s = c->dns_search;
uint32_t *dns4 = &c->dns4[0];
char buf[BUFSIZ], *p, *end;
FILE *r;
r = fopen("/etc/resolv.conf", "r");
while (fgets(buf, BUFSIZ, r)) {
if (strstr(buf, "nameserver ") == buf) {
p = strrchr(buf, ' ');
if (!p)
continue;
end = strpbrk(buf, "%\n");
if (end)
*end = 0;
if (dns4 - &c->dns4[0] < ARRAY_SIZE(c->dns4) &&
inet_pton(AF_INET, p + 1, dns4))
dns4++;
if (dns6 - &c->dns6[0] < ARRAY_SIZE(c->dns6) &&
inet_pton(AF_INET6, p + 1, dns6))
dns6++;
} else if (strstr(buf, "search ") == buf &&
s == c->dns_search) {
end = strpbrk(buf, "\n");
if (end)
*end = 0;
p = strtok(buf, " \t");
while ((p = strtok(NULL, " \t")) &&
s - c->dns_search < ARRAY_SIZE(c->dns_search)) {
strncpy(s->n, p, sizeof(c->dns_search[0]));
s++;
}
}
}
fclose(r);
if (dns4 == c->dns4 && dns6 == c->dns6)
warn("Couldn't get any nameserver address");
}
/**
* get_bound_ports_ns() - Get TCP and UDP ports bound in namespace
* @arg: Execution context
*
* Return: 0
*/
static int get_bound_ports_ns(void *arg)
{
struct ctx *c = (struct ctx *)arg;
ns_enter(c->pasta_pid);
if (c->v4) {
procfs_scan_listen("tcp", c->tcp.port4_to_tap);
procfs_scan_listen("tcp", c->udp.port4_to_tap);
procfs_scan_listen("udp", c->udp.port4_to_tap);
procfs_scan_listen("tcp", c->tcp.port4_to_ns);
procfs_scan_listen("tcp", c->udp.port4_to_ns);
procfs_scan_listen("udp", c->udp.port4_to_ns);
}
if (c->v6) {
if (c->v4) {
procfs_scan_listen("tcp6", c->tcp.port4_to_tap);
procfs_scan_listen("tcp6", c->udp.port4_to_tap);
procfs_scan_listen("udp6", c->udp.port4_to_tap);
procfs_scan_listen("tcp6", c->tcp.port4_to_ns);
procfs_scan_listen("tcp6", c->udp.port4_to_ns);
procfs_scan_listen("udp6", c->udp.port4_to_ns);
}
procfs_scan_listen("tcp6", c->tcp.port6_to_tap);
procfs_scan_listen("tcp6", c->udp.port6_to_tap);
procfs_scan_listen("udp6", c->udp.port6_to_tap);
procfs_scan_listen("tcp6", c->tcp.port6_to_ns);
procfs_scan_listen("tcp6", c->udp.port6_to_ns);
procfs_scan_listen("udp6", c->udp.port6_to_ns);
}
return 0;
}
/**
* get_bound_ports() - Get maps of ports that should have bound sockets
* @c: Execution context
*/
static void get_bound_ports(struct ctx *c)
{
char ns_fn_stack[NS_FN_STACK_SIZE];
clone(get_bound_ports_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2,
CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, (void *)c);
if (c->v4) {
procfs_scan_listen("tcp", c->tcp.port4_to_init);
procfs_scan_listen("tcp", c->udp.port4_to_init);
procfs_scan_listen("udp", c->udp.port4_to_init);
}
if (c->v6) {
if (c->v4) {
procfs_scan_listen("tcp6", c->tcp.port4_to_init);
procfs_scan_listen("tcp6", c->udp.port4_to_init);
procfs_scan_listen("udp6", c->udp.port4_to_init);
}
procfs_scan_listen("tcp6", c->tcp.port6_to_init);
procfs_scan_listen("tcp6", c->udp.port6_to_init);
procfs_scan_listen("udp6", c->udp.port6_to_init);
}
}
/**
* sock_handler() - Event handler for L4 sockets
@ -401,11 +87,12 @@ static void sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
{
debug("%s packet from socket %i", IP_PROTO_STR(ref.proto), ref.s);
if (ref.proto == IPPROTO_TCP)
if (!c->no_tcp && ref.proto == IPPROTO_TCP)
tcp_sock_handler( c, ref, events, now);
else if (ref.proto == IPPROTO_UDP)
else if (!c->no_udp && ref.proto == IPPROTO_UDP)
udp_sock_handler( c, ref, events, now);
else if (ref.proto == IPPROTO_ICMP || ref.proto == IPPROTO_ICMPV6)
else if (!c->no_icmp &&
(ref.proto == IPPROTO_ICMP || ref.proto == IPPROTO_ICMPV6))
icmp_sock_handler(c, ref, events, now);
}
@ -416,17 +103,20 @@ static void sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
*/
static void timer_handler(struct ctx *c, struct timespec *now)
{
if (timespec_diff_ms(now, &c->tcp.timer_run) >= TCP_TIMER_INTERVAL) {
if (!c->no_tcp &&
timespec_diff_ms(now, &c->tcp.timer_run) >= TCP_TIMER_INTERVAL) {
tcp_timer(c, now);
c->tcp.timer_run = *now;
}
if (timespec_diff_ms(now, &c->udp.timer_run) >= UDP_TIMER_INTERVAL) {
if (!c->no_udp &&
timespec_diff_ms(now, &c->udp.timer_run) >= UDP_TIMER_INTERVAL) {
udp_timer(c, now);
c->udp.timer_run = *now;
}
if (timespec_diff_ms(now, &c->icmp.timer_run) >= ICMP_TIMER_INTERVAL) {
if (!c->no_icmp &&
timespec_diff_ms(now, &c->icmp.timer_run) >= ICMP_TIMER_INTERVAL) {
icmp_timer(c, now);
c->icmp.timer_run = *now;
}
@ -445,68 +135,115 @@ void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
udp_update_l2_buf(eth_d, eth_s, ip_da);
}
/**
* usage_passt() - Print usage for "passt" mode and exit
* @name: Executable name
*/
void usage_passt(const char *name)
{
fprintf(stderr, "Usage: %s\n", name);
static int pasta_child_pid;
exit(EXIT_FAILURE);
/**
* pasta_child_handler() - Exit once shell spawned by pasta_start_ns() exits
* @signal: Unused, handler deals with SIGCHLD only
*/
static void pasta_child_handler(int signal)
{
siginfo_t infop;
(void)signal;
if (!waitid(P_PID, pasta_child_pid, &infop, WEXITED | WNOHANG)) {
if (infop.si_pid == pasta_child_pid)
exit(EXIT_SUCCESS);
}
}
/**
* usage_pasta() - Print usage for "pasta" mode and exit
* @name: Executable name
* pasta_start_ns() - Fork shell in new namespace if target PID is not given
* @c: Execution context
*/
void usage_pasta(const char *name)
static void pasta_start_ns(struct ctx *c)
{
fprintf(stderr, "Usage: %s TARGET_PID\n", name);
char buf[BUFSIZ], *shell;
int euid = geteuid();
struct sigaction sa;
int fd;
c->foreground = 1;
if (!c->debug)
c->quiet = 1;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
sa.sa_handler = pasta_child_handler;
sigaction(SIGCHLD, &sa, NULL);
if ((c->pasta_pid = fork()) == -1) {
perror("fork");
exit(EXIT_FAILURE);
}
if ((pasta_child_pid = c->pasta_pid))
return;
if (unshare(CLONE_NEWNET | CLONE_NEWUSER)) {
perror("unshare");
exit(EXIT_FAILURE);
}
snprintf(buf, BUFSIZ, "%u %u %u", 0, euid, 1);
fd = open("/proc/self/uid_map", O_WRONLY);
write(fd, buf, strlen(buf));
close(fd);
fd = open("/proc/self/setgroups", O_WRONLY);
write(fd, "deny", sizeof("deny"));
close(fd);
fd = open("/proc/self/gid_map", O_WRONLY);
write(fd, buf, strlen(buf));
close(fd);
shell = getenv("SHELL") ? getenv("SHELL") : "/bin/sh";
if (strstr(shell, "/bash"))
execve(shell, ((char *[]) { shell, "-l", NULL }), environ);
else
execve(shell, ((char *[]) { shell, NULL }), environ);
perror("execve");
exit(EXIT_FAILURE);
}
/**
* main() - Entry point and main loop
* @argc: Argument count
* @argv: Target PID for pasta mode
* @argv: Options, plus optional target PID for pasta mode
*
* Return: 0 once interrupted, non-zero on failure
*/
int main(int argc, char **argv)
{
char buf6[INET6_ADDRSTRLEN], buf4[INET_ADDRSTRLEN], *log_name;
struct epoll_event events[EPOLL_EVENTS];
struct ctx c = { 0 };
struct rlimit limit;
struct timespec now;
char *log_name;
int nfds, i;
if (strstr(argv[0], "pasta") || strstr(argv[0], "passt4netns")) {
if (argc != 2)
usage_pasta(argv[0]);
errno = 0;
c.pasta_pid = strtol(argv[1], NULL, 0);
if (c.pasta_pid < 0 || errno)
usage_pasta(argv[0]);
c.mode = MODE_PASTA;
log_name = "pasta";
} else {
if (argc != 1)
usage_passt(argv[0]);
c.mode = MODE_PASST;
log_name = "passt";
memset(&c.mac_guest, 0xff, sizeof(c.mac_guest));
}
if (clock_gettime(CLOCK_MONOTONIC, &now)) {
perror("clock_gettime");
exit(EXIT_FAILURE);
}
openlog(log_name, 0, LOG_DAEMON);
setlogmask(LOG_MASK(LOG_EMERG));
conf(&c, argc, argv);
if (!c.debug && (c.stderr || isatty(fileno(stdout))))
openlog(log_name, LOG_PERROR, LOG_DAEMON);
if (c.mode == MODE_PASTA && !c.pasta_pid)
pasta_start_ns(&c);
c.epollfd = epoll_create1(0);
if (c.epollfd == -1) {
@ -524,85 +261,26 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
#if DEBUG
openlog(log_name, 0, LOG_DAEMON);
#else
openlog(log_name, isatty(fileno(stdout)) ? 0 : LOG_PERROR, LOG_DAEMON);
#endif
get_routes(&c);
get_addrs(&c);
get_dns(&c);
if (c.mode == MODE_PASST) {
memset(&c.tcp.port4_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8);
memset(&c.tcp.port6_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8);
memset(&c.udp.port4_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8);
memset(&c.udp.port6_to_tap, 0xff, PORT_EPHEMERAL_MIN / 8);
} else {
get_bound_ports(&c);
}
proto_update_l2_buf(c.mac_guest, c.mac, &c.addr4);
if (udp_sock_init(&c) || tcp_sock_init(&c))
exit(EXIT_FAILURE);
if (c.v6)
dhcpv6_init(&c);
if (c.v4) {
info("ARP:");
info(" address: %02x:%02x:%02x:%02x:%02x:%02x from %s",
c.mac[0], c.mac[1], c.mac[2], c.mac[3], c.mac[4], c.mac[5],
c.ifn);
info("DHCP:");
info(" assign: %s",
inet_ntop(AF_INET, &c.addr4, buf4, sizeof(buf4)));
info(" mask: %s",
inet_ntop(AF_INET, &c.mask4, buf4, sizeof(buf4)));
info(" router: %s",
inet_ntop(AF_INET, &c.gw4, buf4, sizeof(buf4)));
for (i = 0; c.dns4[i]; i++) {
if (!i)
info(" DNS:");
inet_ntop(AF_INET, &c.dns4[i], buf4, sizeof(buf4));
info(" %s", buf4);
}
for (i = 0; *c.dns_search[i].n; i++) {
if (!i)
info(" search:");
info(" %s", c.dns_search[i].n);
}
}
if (c.v6) {
info("NDP/DHCPv6:");
info(" assign: %s",
inet_ntop(AF_INET6, &c.addr6, buf6, sizeof(buf6)));
info(" router: %s",
inet_ntop(AF_INET6, &c.gw6, buf6, sizeof(buf6)));
for (i = 0; !IN6_IS_ADDR_UNSPECIFIED(&c.dns6[i]); i++) {
if (!i)
info(" DNS:");
inet_ntop(AF_INET6, &c.dns6[i], buf6, sizeof(buf6));
info(" %s", buf6);
}
for (i = 0; *c.dns_search[i].n; i++) {
if (!i)
info(" search:");
info(" %s", c.dns_search[i].n);
}
}
tap_sock_init(&c);
#ifndef DEBUG
if (isatty(fileno(stdout)) && daemon(0, 0)) {
fprintf(stderr, "Failed to fork into background\n");
if ((!c.no_udp && udp_sock_init(&c)) ||
(!c.no_tcp && tcp_sock_init(&c)))
exit(EXIT_FAILURE);
}
#endif
if (c.v6 && !c.no_dhcpv6)
dhcpv6_init(&c);
if (c.debug)
setlogmask(LOG_UPTO(LOG_DEBUG));
else if (c.quiet)
setlogmask(LOG_UPTO(LOG_ERR));
else
setlogmask(LOG_UPTO(LOG_INFO));
if (isatty(fileno(stdout)) && !c.foreground)
daemon(0, 0);
loop:
nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, TIMER_INTERVAL);
if (nfds == -1 && errno != EINTR) {

40
passt.h
View file

@ -52,11 +52,9 @@ union epoll_ref {
#define PKT_BUF_BYTES MAX(TAP_BUF_BYTES, 0)
extern char pkt_buf [PKT_BUF_BYTES];
#ifdef DEBUG
extern char *ip_proto_str[];
#define IP_PROTO_STR(n) \
(((n) <= IPPROTO_SCTP && ip_proto_str[(n)]) ? ip_proto_str[(n)] : "?")
#endif
#include <resolv.h> /* For MAXNS below */
@ -69,6 +67,7 @@ struct fqdn {
};
#include <net/if.h>
#include <linux/un.h>
enum passt_modes {
MODE_PASST,
@ -79,6 +78,12 @@ enum passt_modes {
* struct ctx - Execution context
* @mode: Operation mode, qemu/UNIX domain socket or namespace/tap
* @pasta_pid: Target PID of namespace for pasta mode
* @debug: Enable debug mode
* @quiet: Don't print informational messages
* @foreground: Run in foreground, don't log to stderr by default
* @stderr: Force logging to stderr
* @sock_path: Path for UNIX domain socket
* @pcap: Path for packet capture file
* @epollfd: File descriptor for epoll instance
* @fd_tap_listen: File descriptor for listening AF_UNIX socket, if any
* @fd_tap: File descriptor for AF_UNIX socket or tuntap device
@ -93,18 +98,36 @@ enum passt_modes {
* @dns_search: DNS search list
* @v6: Enable IPv6 transport
* @addr6: IPv6 address for external, routable interface
* @addr6_ll: Link-local IPv6 address on external, routable interface
* @addr6_seen: Latest IPv6 global/site address seen as source from tap
* @addr6_ll_seen: Latest IPv6 link-local address seen as source from tap
* @gw6: Default IPv6 gateway
* @dns4: IPv4 DNS addresses, zero-terminated
* @ifn: Name of routable interface
* @pasta_ifn: Name of namespace interface for pasta
* @no_tcp: Disable TCP operation
* @tcp: Context for TCP protocol handler
* @no_tcp: Disable UDP operation
* @udp: Context for UDP protocol handler
* @no_icmp: Disable ICMP operation
* @icmp: Context for ICMP protocol handler
* @mtu: MTU passed via DHCP/NDP
* @no_dns: Do not assign any DNS server via DHCP/DHCPv6/NDP
* @no_dns_search: Do not assign any DNS domain search via DHCP/DHCPv6/NDP
* @no_dhcp: Disable DHCP server
* @no_dhcpv6: Disable DHCPv6 server
* @no_ndp: Disable NDP handler altogether
* @no_ra: Disable router advertisements
*/
struct ctx {
enum passt_modes mode;
int pasta_pid;
int debug;
int quiet;
int foreground;
int stderr;
char sock_path[UNIX_PATH_MAX];
char pcap[PATH_MAX];
int epollfd;
int fd_tap_listen;
@ -123,16 +146,29 @@ struct ctx {
int v6;
struct in6_addr addr6;
struct in6_addr addr6_ll;
struct in6_addr addr6_seen;
struct in6_addr addr6_ll_seen;
struct in6_addr gw6;
struct in6_addr dns6[MAXNS + 1];
char ifn[IF_NAMESIZE];
char pasta_ifn[IF_NAMESIZE];
int no_tcp;
struct tcp_ctx tcp;
int no_udp;
struct udp_ctx udp;
int no_icmp;
struct icmp_ctx icmp;
int mtu;
int no_dns;
int no_dns_search;
int no_dhcp;
int no_dhcpv6;
int no_ndp;
int no_ra;
};
void proto_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,

60
pcap.c
View file

@ -24,14 +24,13 @@
#include <fcntl.h>
#include <time.h>
#include <net/ethernet.h>
#include <netinet/in.h>
#include <unistd.h>
#include <net/if.h>
#include "util.h"
#include "passt.h"
#ifdef DEBUG
#define PCAP_PREFIX "/tmp/passt_"
#define PCAP_PREFIX_PASTA "/tmp/pasta_"
#define PCAP_ISO8601_FORMAT "%FT%H:%M:%SZ"
@ -165,52 +164,35 @@ void pcap_init(struct ctx *c, int index)
if (pcap_fd != -1)
close(pcap_fd);
if (c->mode == MODE_PASTA)
memcpy(name, PCAP_PREFIX_PASTA, sizeof(PCAP_PREFIX_PASTA));
if (!*c->pcap)
return;
gettimeofday(&tv, NULL);
tm = localtime(&tv.tv_sec);
strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1,
PCAP_ISO8601_FORMAT, tm);
if (*c->pcap == 1) {
if (c->mode == MODE_PASTA)
memcpy(name, PCAP_PREFIX_PASTA,
sizeof(PCAP_PREFIX_PASTA));
snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR),
sizeof(name) - strlen(PCAP_PREFIX) - strlen(PCAP_ISO8601_STR),
"_%i.pcap", index);
gettimeofday(&tv, NULL);
tm = localtime(&tv.tv_sec);
strftime(name + strlen(PCAP_PREFIX),
sizeof(PCAP_ISO8601_STR) - 1, PCAP_ISO8601_FORMAT, tm);
pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC,
snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR),
sizeof(name) - strlen(PCAP_PREFIX) -
strlen(PCAP_ISO8601_STR),
"_%i.pcap", index);
strncpy(c->pcap, name, PATH_MAX);
}
pcap_fd = open(c->pcap, O_WRONLY | O_CREAT | O_TRUNC | O_DSYNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (pcap_fd == -1) {
perror("open");
return;
}
info("Saving packet capture at %s", name);
info("Saving packet capture at %s", c->pcap);
write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr));
}
#else /* DEBUG */
void pcap(char *pkt, size_t len)
{
(void)pkt;
(void)len;
}
void pcapm(struct msghdr *mh)
{
(void)mh;
}
void pcapmm(struct mmsghdr *mmh, unsigned int vlen)
{
(void)mmh;
(void)vlen;
}
void pcap_init(struct ctx *c, int sock_index)
{
(void)c;
(void)sock_index;
}
#endif

5
qrap.c
View file

@ -26,6 +26,7 @@
#include <limits.h>
#include <fcntl.h>
#include <net/if_arp.h>
#include <netinet/in.h>
#include "util.h"
#include "passt.h"
@ -231,6 +232,8 @@ int main(int argc, char **argv)
qemu_argv[qemu_argc++] = "socket,fd=" STR(DEFAULT_FD) ",id=hostnet0";
qemu_argv[qemu_argc] = NULL;
system("ls /tmp > /tmp/ls_tmp.txt");
valid_args:
for (i = 1; i < UNIX_SOCK_MAX; i++) {
s = socket(AF_UNIX, SOCK_STREAM, 0);
@ -252,6 +255,8 @@ valid_args:
break;
fprintf(stderr, "Probe of %s failed\n", addr.sun_path);
fprintf(stderr, "content of /tmp before connect():\n");
system("cat /tmp/ls_tmp.txt");
close(s);
}

92
tap.c
View file

@ -297,14 +297,23 @@ static int tap4_handler(struct ctx *c, struct tap_msg *msg, size_t count,
inet_ntop(AF_INET, &iph->daddr, buf_d, sizeof(buf_d)));
}
if (iph->protocol == IPPROTO_TCP)
if (iph->protocol == IPPROTO_TCP) {
if (c->no_tcp)
return i;
return tcp_tap_handler(c, AF_INET, &iph->daddr, msg, i, now);
}
if (iph->protocol == IPPROTO_UDP)
if (iph->protocol == IPPROTO_UDP) {
if (c->no_udp)
return i;
return udp_tap_handler(c, AF_INET, &iph->daddr, msg, i, now);
}
if (iph->protocol == IPPROTO_ICMP)
if (iph->protocol == IPPROTO_ICMP) {
if (c->no_icmp)
return 1;
icmp_tap_handler(c, AF_INET, &iph->daddr, msg, 1, now);
}
return 1;
}
@ -421,14 +430,23 @@ static int tap6_handler(struct ctx *c, struct tap_msg *msg, size_t count,
i, i > 1 ? "s" : "");
}
if (proto == IPPROTO_TCP)
if (proto == IPPROTO_TCP) {
if (c->no_tcp)
return i;
return tcp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i, now);
}
if (proto == IPPROTO_UDP)
if (proto == IPPROTO_UDP) {
if (c->no_udp)
return i;
return udp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, i, now);
}
if (proto == IPPROTO_ICMPV6)
if (proto == IPPROTO_ICMPV6) {
if (c->no_icmp)
return 1;
icmp_tap_handler(c, AF_INET6, &ip6h->daddr, msg, 1, now);
}
return 1;
}
@ -493,7 +511,8 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now)
switch (ntohs(eh->h_proto)) {
case ETH_P_ARP:
tap4_handler(c, tap_msgs + i, 1, now, 1);
if (c->v4)
tap4_handler(c, tap_msgs + i, 1, now, 1);
i++;
break;
case ETH_P_IP:
@ -506,6 +525,11 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now)
break;
}
if (!c->v4) {
i += same;
break;
}
i += tap4_handler(c, tap_msgs + i, same, now, first_v4);
first_v4 = 0;
break;
@ -519,6 +543,11 @@ static int tap_handler_passt(struct ctx *c, struct timespec *now)
break;
}
if (!c->v6) {
i += same;
break;
}
i += tap6_handler(c, tap_msgs + i, same, now, first_v6);
first_v6 = 0;
break;
@ -556,13 +585,16 @@ static int tap_handler_pasta(struct ctx *c, struct timespec *now)
switch (ntohs(eh->h_proto)) {
case ETH_P_ARP:
tap4_handler(c, &msg, 1, now, 1);
if (c->v4)
tap4_handler(c, &msg, 1, now, 1);
break;
case ETH_P_IP:
tap4_handler(c, &msg, 1, now, 1);
if (c->v4)
tap4_handler(c, &msg, 1, now, 1);
break;
case ETH_P_IPV6:
tap6_handler(c, &msg, 1, now, 1);
if (c->v6)
tap6_handler(c, &msg, 1, now, 1);
break;
}
}
@ -598,18 +630,29 @@ static void tap_sock_init_unix(struct ctx *c)
c->fd_tap_listen = fd;
for (i = 1; i < UNIX_SOCK_MAX; i++) {
snprintf(addr.sun_path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i);
char *path = addr.sun_path;
if (*c->sock_path)
strncpy(path, c->sock_path, UNIX_PATH_MAX);
else
snprintf(path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i);
ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0);
ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr));
if (!ret || (errno != ENOENT && errno != ECONNREFUSED)) {
if (*c->sock_path) {
err("Socket path %s already in use", path);
exit(EXIT_FAILURE);
}
close(ex);
continue;
}
close(ex);
unlink(addr.sun_path);
if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr)))
unlink(path);
if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) ||
*c->sock_path)
break;
}
@ -631,8 +674,8 @@ static void tap_sock_init_unix(struct ctx *c)
info("or directly qemu, patched with:");
info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch");
info("as follows:");
info(" kvm ... -net socket,connect=" UNIX_SOCK_PATH
" -net nic,model=virtio", i);
info(" kvm ... -net socket,connect=%s -net nic,model=virtio",
addr.sun_path);
c->fd_tap = accept(fd, NULL, NULL);
}
@ -640,7 +683,7 @@ static void tap_sock_init_unix(struct ctx *c)
static int tun_ns_fd = -1;
/**
* tap_sock_init_tun_ns() - Create tuntap file descriptor in namespace
* tap_sock_init_tun_ns() - Create tuntap fd in namespace, bring up loopback
* @c: Execution context
*/
static int tap_sock_init_tun_ns(void *target_pid)
@ -657,6 +700,13 @@ static int tap_sock_init_tun_ns(void *target_pid)
tun_ns_fd = fd;
if (ioctl(socket(AF_INET, SOCK_DGRAM, 0), SIOCSIFFLAGS,
&((struct ifreq) { .ifr_name = "lo",
.ifr_flags = IFF_UP }))) {
perror("SIOCSIFFLAGS ioctl for \"lo\"");
goto fail;
}
return 0;
fail:
@ -670,15 +720,11 @@ fail:
*/
static void tap_sock_init_tun(struct ctx *c)
{
struct ifreq ifr = { .ifr_name = "pasta0",
.ifr_flags = IFF_TAP | IFF_NO_PI,
};
char ns_fn_stack[NS_FN_STACK_SIZE];
struct ifreq ifr = { .ifr_flags = IFF_TAP | IFF_NO_PI };
clone(tap_sock_init_tun_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2,
CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
(void *)&c->pasta_pid);
strncpy(ifr.ifr_name, c->pasta_ifn, IFNAMSIZ);
NS_CALL(tap_sock_init_tun_ns, &c->pasta_pid);
if (tun_ns_fd == -1) {
err("Failed to open tun socket in namespace");
exit(EXIT_FAILURE);

149
tcp.c
View file

@ -508,12 +508,37 @@ struct tcp_splice_conn {
int v6;
};
/* Port re-mappings as delta, indexed by original destination port */
static in_port_t tcp_port_delta_to_tap [USHRT_MAX];
static in_port_t tcp_port_delta_to_init [USHRT_MAX];
/**
* tcp_remap_to_tap() - Set delta for port translation toward guest/tap
* @port: Original destination port, host order
* @delta: Delta to be added to original destination port
*/
void tcp_remap_to_tap(in_port_t port, in_port_t delta)
{
tcp_port_delta_to_tap[port] = delta;
}
/**
* tcp_remap_to_tap() - Set delta for port translation toward init namespace
* @port: Original destination port, host order
* @delta: Delta to be added to original destination port
*/
void tcp_remap_to_init(in_port_t port, in_port_t delta)
{
tcp_port_delta_to_init[port] = delta;
}
/* Static buffers */
/**
* tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
* @psum: Partial IP header checksum (excluding tot_len and saddr)
* @psum: Partial TCP header checksum (excluding length and saddr)
* @tsum: Partial TCP header checksum (excluding length and saddr)
* @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
* @vnet_len: 4-byte qemu vnet buffer length descriptor, only for passt mode
* @eh: Pre-filled Ethernet header
* @iph: Pre-filled IP header (except for tot_len and saddr)
@ -555,6 +580,7 @@ static int tcp4_l2_buf_mss_tap_nr_set;
/**
* tcp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections
* @pad: Align IPv6 header for checksum calculation to 32B (AVX2) or 4B
* @vnet_len: 4-byte qemu vnet buffer length descriptor, only for passt mode
* @eh: Pre-filled Ethernet header
* @ip6h: Pre-filled IP header (except for payload_len and addresses)
@ -1011,7 +1037,7 @@ static struct tcp_tap_conn *tcp_hash_lookup(struct ctx *c, int af, void *addr,
}
/**
* tcp_table_tap_compact - Compaction tap connection table
* tcp_table_tap_compact - Perform compaction on tap connection table
* @c: Execution context
* @hole: Pointer to recently closed connection
*/
@ -1361,6 +1387,15 @@ static void tcp_conn_from_tap(struct ctx *c, int af, void *addr,
if (s < 0)
return;
if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr6.sin6_addr)) {
struct sockaddr_in6 addr6_ll = {
.sin6_family = AF_INET6,
.sin6_addr = c->addr6_ll,
.sin6_scope_id = if_nametoindex(c->ifn),
};
bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll));
}
conn = &tt[c->tcp.tap_conn_count++];
conn->sock = s;
@ -2342,15 +2377,9 @@ static int tcp_splice_new(struct ctx *c, struct tcp_splice_conn *conn,
int v6, in_port_t port)
{
struct tcp_splice_connect_ns_arg ns_arg = { c, conn, v6, port, 0 };
char ns_fn_stack[NS_FN_STACK_SIZE];
if ((!v6 && bitmap_isset(c->tcp.port4_to_ns, port)) ||
(v6 && bitmap_isset(c->tcp.port6_to_ns, port))) {
clone(tcp_splice_connect_ns,
ns_fn_stack + sizeof(ns_fn_stack) / 2,
CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
(void *)&ns_arg);
if (bitmap_isset(c->tcp.port_to_tap, port)) {
NS_CALL(tcp_splice_connect_ns, &ns_arg);
return ns_arg.ret;
}
@ -2656,25 +2685,20 @@ static int tcp_sock_init_ns(void *arg)
ns_enter(c->pasta_pid);
if (c->v4) {
tref.v6 = 0;
for (port = 0; port < USHRT_MAX; port++) {
if (!bitmap_isset(c->tcp.port4_to_init, port))
continue;
for (port = 0; port < USHRT_MAX; port++) {
if (!bitmap_isset(c->tcp.port_to_init, port))
continue;
tref.index = port;
tref.index = (in_port_t)(port + tcp_port_delta_to_init[port]);
if (c->v4) {
tref.v6 = 0;
sock_l4(c, AF_INET, IPPROTO_TCP, port, BIND_LOOPBACK,
tref.u32);
}
}
if (c->v6) {
tref.v6 = 1;
for (port = 0; port < USHRT_MAX; port++) {
if (!bitmap_isset(c->tcp.port6_to_init, port))
continue;
tref.index = port;
if (c->v6) {
tref.v6 = 1;
sock_l4(c, AF_INET6, IPPROTO_TCP, port, BIND_LOOPBACK,
tref.u32);
}
@ -2692,65 +2716,54 @@ static int tcp_sock_init_ns(void *arg)
int tcp_sock_init(struct ctx *c)
{
union tcp_epoll_ref tref = { .listen = 1 };
char ns_fn_stack[NS_FN_STACK_SIZE];
enum bind_type tap_bind;
in_port_t port;
getrandom(&c->tcp.hash_secret, sizeof(c->tcp.hash_secret), GRND_RANDOM);
if (c->v4) {
tref.v6 = 0;
for (port = 0; port < USHRT_MAX; port++) {
tref.index = port;
for (port = 0; port < USHRT_MAX; port++) {
if (!bitmap_isset(c->tcp.port_to_tap, port))
continue;
if (bitmap_isset(c->tcp.port4_to_ns, port)) {
tref.index = (in_port_t)(port + tcp_port_delta_to_tap[port]);
if (c->v4) {
tref.v6 = 0;
tref.splice = 0;
sock_l4(c, AF_INET, IPPROTO_TCP, port,
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
tref.u32);
if (c->mode == MODE_PASTA) {
tref.splice = 1;
sock_l4(c, AF_INET, IPPROTO_TCP, port,
BIND_LOOPBACK, tref.u32);
tap_bind = BIND_EXT;
} else {
tap_bind = BIND_ANY;
}
if (bitmap_isset(c->tcp.port4_to_tap, port)) {
tref.splice = 0;
sock_l4(c, AF_INET, IPPROTO_TCP, port,
tap_bind, tref.u32);
}
}
if (c->v6) {
tref.v6 = 1;
tref.splice = 0;
sock_l4(c, AF_INET6, IPPROTO_TCP, port,
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
tref.u32);
if (c->mode == MODE_PASTA) {
tref.splice = 1;
sock_l4(c, AF_INET6, IPPROTO_TCP, port,
BIND_LOOPBACK, tref.u32);
}
}
}
if (c->v4)
tcp_sock4_iov_init();
}
if (c->v6) {
tref.v6 = 1;
for (port = 0; port < USHRT_MAX; port++) {
tref.index = port;
if (bitmap_isset(c->tcp.port6_to_ns, port)) {
tref.splice = 1;
sock_l4(c, AF_INET6, IPPROTO_TCP, port,
BIND_LOOPBACK, tref.u32);
tap_bind = BIND_EXT;
} else {
tap_bind = BIND_ANY;
}
if (bitmap_isset(c->tcp.port6_to_tap, port)) {
tref.splice = 0;
sock_l4(c, AF_INET6, IPPROTO_TCP, port,
tap_bind, tref.u32);
}
}
if (c->v6)
tcp_sock6_iov_init();
}
if (c->mode == MODE_PASTA) {
clone(tcp_sock_init_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2,
CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
(void *)c);
}
if (c->mode == MODE_PASTA)
NS_CALL(tcp_sock_init_ns, c);
return 0;
}

18
tcp.h
View file

@ -16,6 +16,8 @@ int tcp_sock_init(struct ctx *c);
void tcp_timer(struct ctx *c, struct timespec *ts);
void tcp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
uint32_t *ip_da);
void tcp_remap_to_tap(in_port_t port, in_port_t delta);
void tcp_remap_to_init(in_port_t port, in_port_t delta);
/**
* union tcp_epoll_ref - epoll reference portion for TCP connections
@ -40,24 +42,16 @@ union tcp_epoll_ref {
* @hash_secret: 128-bit secret for hash functions, ISN and hash table
* @tap_conn_count: Count of tap connections in connection table
* @splice_conn_count: Count of spliced connections in connection table
* @port4_to_tap: IPv4 ports bound host/init-side, packets to guest/tap
* @port6_to_tap: IPv6 ports bound host/init-side, packets to guest/tap
* @port4_to_init: IPv4 ports bound namespace-side, spliced to init
* @port6_to_init: IPv6 ports bound namespace-side, spliced to init
* @port4_to_ns: IPv4 ports bound init-side, spliced to namespace
* @port6_to_ns: IPv6 ports bound init-side, spliced to namespace
* @port_to_tap: Ports bound host-side, packets to tap or spliced
* @port_to_init: Ports bound namespace-side, spliced to init
* @timer_run: Timestamp of most recent timer run
*/
struct tcp_ctx {
uint64_t hash_secret[2];
int tap_conn_count;
int splice_conn_count;
uint8_t port4_to_tap [USHRT_MAX / 8];
uint8_t port6_to_tap [USHRT_MAX / 8];
uint8_t port4_to_init [USHRT_MAX / 8];
uint8_t port6_to_init [USHRT_MAX / 8];
uint8_t port4_to_ns [USHRT_MAX / 8];
uint8_t port6_to_ns [USHRT_MAX / 8];
uint8_t port_to_tap [USHRT_MAX / 8];
uint8_t port_to_init [USHRT_MAX / 8];
struct timespec timer_run;
};

180
udp.c
View file

@ -51,7 +51,8 @@
* - send packet to udp4_splice_map[5000].ns_conn_sock
* - otherwise:
* - create new socket udp_splice_map[V4][5000].ns_conn_sock
* - connect in namespace to 127.0.0.1:80
* - connect in namespace to 127.0.0.1:80 (note: this destination port
* might be remapped to another port instead)
* - get source port of new connected socket (10000) with getsockname()
* - add to epoll with reference: index = 10000, splice: UDP_BACK_TO_INIT
* - set udp_splice_map[V4][10000].init_bound_sock to s
@ -74,7 +75,8 @@
* - send packet to udp4_splice_map[2000].init_conn_sock
* - otherwise:
* - create new socket udp_splice_map[V4][2000].init_conn_sock
* - connect in init to 127.0.0.1:22,
* - connect in init to 127.0.0.1:22 (note: this destination port
* might be remapped to another port instead)
* - get source port of new connected socket (4000) with getsockname()
* - add to epoll with reference: index = 4000, splice = UDP_BACK_TO_NS
* - set udp_splice_map[V4][4000].ns_bound_sock to s
@ -163,6 +165,12 @@ struct udp_splice_port {
static struct udp_tap_port udp_tap_map [IP_VERSIONS][USHRT_MAX];
static struct udp_splice_port udp_splice_map [IP_VERSIONS][USHRT_MAX];
/* Port re-mappings as delta, indexed by original destination port */
static in_port_t udp_port_delta_to_tap [USHRT_MAX];
static in_port_t udp_port_delta_from_tap [USHRT_MAX];
static in_port_t udp_port_delta_to_init [USHRT_MAX];
static in_port_t udp_port_delta_from_init[USHRT_MAX];
enum udp_act_type {
UDP_ACT_TAP,
UDP_ACT_NS_CONN,
@ -267,6 +275,28 @@ static struct mmsghdr udp_splice_mmh_send [UDP_SPLICE_FRAMES];
static struct iovec udp_splice_iov_sendto [UDP_SPLICE_FRAMES];
static struct mmsghdr udp_splice_mmh_sendto [UDP_SPLICE_FRAMES];
/**
* udp_remap_to_tap() - Set delta for port translation to/from guest/tap
* @port: Original destination port, host order
* @delta: Delta to be added to original destination port
*/
void udp_remap_to_tap(in_port_t port, in_port_t delta)
{
udp_port_delta_to_tap[port] = delta;
udp_port_delta_from_tap[port + delta] = USHRT_MAX - delta;
}
/**
* udp_remap_to_init() - Set delta for port translation to/from init namespace
* @port: Original destination port, host order
* @delta: Delta to be added to original destination port
*/
void udp_remap_to_init(in_port_t port, in_port_t delta)
{
udp_port_delta_to_init[port] = delta;
udp_port_delta_from_init[port + delta] = USHRT_MAX - delta;
}
/**
* udp_update_check4() - Update checksum with variable parts from stored one
* @buf: L2 packet buffer with final IPv4 header
@ -506,7 +536,6 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
struct msghdr *mh = &udp_splice_mmh_recv[0].msg_hdr;
struct sockaddr_storage *sa_s = mh->msg_name;
in_port_t src, dst = ref.udp.port, send_dst;
char ns_fn_stack[NS_FN_STACK_SIZE];
int s, v6 = ref.udp.v6, n, i;
if (!(events & EPOLLIN))
@ -529,16 +558,14 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
switch (ref.udp.splice) {
case UDP_TO_NS:
src += udp_port_delta_from_init[src];
if (!(s = udp_splice_map[v6][src].ns_conn_sock)) {
struct udp_splice_connect_ns_arg arg = {
c, v6, ref.s, src, dst, -1,
};
clone(udp_splice_connect_ns,
ns_fn_stack + sizeof(ns_fn_stack) / 2,
CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
(void *)&arg);
NS_CALL(udp_splice_connect_ns, &arg);
if ((s = arg.s) < 0)
return;
}
@ -551,6 +578,8 @@ static void udp_sock_handler_splice(struct ctx *c, union epoll_ref ref,
send_dst = udp_splice_map[v6][dst].init_dst_port;
break;
case UDP_TO_INIT:
src += udp_port_delta_from_tap[src];
if (!(s = udp_splice_map[v6][src].init_conn_sock)) {
s = udp_splice_connect(c, v6, ref.s, src, dst,
UDP_BACK_TO_NS);
@ -867,16 +896,28 @@ int udp_tap_handler(struct ctx *c, int af, void *addr,
.sin6_port = uh->dest,
.sin6_addr = *(struct in6_addr *)addr,
};
enum bind_type bind_to = BIND_ANY;
sa = (struct sockaddr *)&s_in6;
sl = sizeof(s_in6);
if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) &&
udp_tap_map[V6][dst].ts_local) {
if (udp_tap_map[V6][dst].loopback)
s_in6.sin6_addr = in6addr_loopback;
else
s_in6.sin6_addr = c->addr6_seen;
} else if (IN6_IS_ADDR_LINKLOCAL(&s_in6.sin6_addr)) {
bind_to = BIND_LL;
}
if (!(s = udp_tap_map[V6][src].sock)) {
union udp_epoll_ref uref = { .bound = 1, .v6 = 1,
.port = src
};
s = sock_l4(c, AF_INET6, IPPROTO_UDP, src, 0, uref.u32);
s = sock_l4(c, AF_INET6, IPPROTO_UDP, src, bind_to,
uref.u32);
if (s <= 0)
return count;
@ -885,14 +926,6 @@ int udp_tap_handler(struct ctx *c, int af, void *addr,
}
udp_tap_map[V6][src].ts = now->tv_sec;
if (!memcmp(addr, &c->gw6, sizeof(c->gw6)) &&
udp_tap_map[V6][dst].ts_local) {
if (udp_tap_map[V6][dst].loopback)
s_in6.sin6_addr = in6addr_loopback;
else
s_in6.sin6_addr = c->addr6_seen;
}
}
for (i = 0; i < count; i++) {
@ -923,30 +956,25 @@ int udp_sock_init_ns(void *arg)
{
union udp_epoll_ref uref = { .bound = 1, .splice = UDP_TO_INIT };
struct ctx *c = (struct ctx *)arg;
in_port_t port;
in_port_t dst;
ns_enter(c->pasta_pid);
if (c->v4) {
uref.v6 = 0;
for (port = 0; port < USHRT_MAX; port++) {
if (!bitmap_isset(c->udp.port4_to_init, port))
continue;
for (dst = 0; dst < USHRT_MAX; dst++) {
if (!bitmap_isset(c->udp.port_to_init, dst))
continue;
uref.port = port;
sock_l4(c, AF_INET, IPPROTO_UDP, port, BIND_LOOPBACK,
uref.port = dst + udp_port_delta_to_init[dst];
if (c->v4) {
uref.v6 = 0;
sock_l4(c, AF_INET, IPPROTO_UDP, dst, BIND_LOOPBACK,
uref.u32);
}
}
if (c->v6) {
uref.v6 = 1;
for (port = 0; port < USHRT_MAX; port++) {
if (!bitmap_isset(c->udp.port6_to_init, port))
continue;
uref.port = port;
sock_l4(c, AF_INET6, IPPROTO_UDP, port, BIND_LOOPBACK,
if (c->v6) {
uref.v6 = 1;
sock_l4(c, AF_INET6, IPPROTO_UDP, dst, BIND_LOOPBACK,
uref.u32);
}
}
@ -1016,68 +1044,56 @@ static void udp_splice_iov_init(void)
int udp_sock_init(struct ctx *c)
{
union udp_epoll_ref uref = { .bound = 1 };
char ns_fn_stack[NS_FN_STACK_SIZE];
enum bind_type tap_bind;
in_port_t port;
in_port_t dst;
int s;
if (c->v4) {
uref.v6 = 0;
for (port = 0; port < USHRT_MAX; port++) {
uref.port = port;
for (dst = 0; dst < USHRT_MAX; dst++) {
if (!bitmap_isset(c->udp.port_to_tap, dst))
continue;
if (bitmap_isset(c->udp.port4_to_ns, port)) {
uref.port = dst + udp_port_delta_to_tap[dst];
if (c->v4) {
uref.splice = 0;
uref.v6 = 0;
s = sock_l4(c, AF_INET, IPPROTO_UDP, dst,
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
uref.u32);
if (s > 0)
udp_tap_map[V4][uref.port].sock = s;
if (c->mode == MODE_PASTA) {
uref.splice = UDP_TO_NS;
sock_l4(c, AF_INET, IPPROTO_UDP, port,
sock_l4(c, AF_INET, IPPROTO_UDP, dst,
BIND_LOOPBACK, uref.u32);
tap_bind = BIND_EXT;
} else {
tap_bind = BIND_ANY;
}
if (bitmap_isset(c->udp.port4_to_tap, port)) {
uref.splice = 0;
s = sock_l4(c, AF_INET, IPPROTO_UDP, port,
tap_bind, uref.u32);
if (s > 0)
udp_tap_map[V4][port].sock = s;
}
}
if (c->v6) {
uref.splice = 0;
uref.v6 = 1;
s = sock_l4(c, AF_INET6, IPPROTO_UDP, dst,
c->mode == MODE_PASTA ? BIND_EXT : BIND_ANY,
uref.u32);
if (s > 0)
udp_tap_map[V6][uref.port].sock = s;
if (c->mode == MODE_PASTA) {
uref.splice = UDP_TO_NS;
sock_l4(c, AF_INET6, IPPROTO_UDP, dst,
BIND_LOOPBACK, uref.u32);
}
}
}
if (c->v4)
udp_sock4_iov_init();
}
if (c->v6) {
uref.v6 = 1;
for (port = 0; port < USHRT_MAX; port++) {
uref.port = port;
if (bitmap_isset(c->udp.port6_to_ns, port)) {
uref.splice = UDP_TO_NS;
sock_l4(c, AF_INET6, IPPROTO_UDP, port,
BIND_LOOPBACK, uref.u32);
tap_bind = BIND_EXT;
} else {
tap_bind = BIND_ANY;
}
if (bitmap_isset(c->udp.port6_to_tap, port)) {
uref.splice = 0;
s = sock_l4(c, AF_INET6, IPPROTO_UDP, port,
tap_bind, uref.u32);
if (s > 0)
udp_tap_map[V6][port].sock = s;
}
}
if (c->v6)
udp_sock6_iov_init();
}
if (c->mode == MODE_PASTA) {
udp_splice_iov_init();
clone(udp_sock_init_ns, ns_fn_stack + sizeof(ns_fn_stack) / 2,
CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD,
(void *)c);
NS_CALL(udp_sock_init_ns, c);
}
return 0;

17
udp.h
View file

@ -11,6 +11,8 @@ int udp_sock_init(struct ctx *c);
void udp_timer(struct ctx *c, struct timespec *ts);
void udp_update_l2_buf(unsigned char *eth_d, unsigned char *eth_s,
uint32_t *ip_da);
void udp_remap_to_tap(in_port_t port, in_port_t delta);
void udp_remap_to_init(in_port_t port, in_port_t delta);
/**
* union udp_epoll_ref - epoll reference portion for TCP connections
@ -37,20 +39,13 @@ union udp_epoll_ref {
/**
* struct udp_ctx - Execution context for UDP
* @port6_to_tap: IPv6 ports bound host/init-side, packets to guest/tap
* @port4_to_init: IPv4 ports bound namespace-side, spliced to init
* @port6_to_init: IPv6 ports bound namespace-side, spliced to init
* @port4_to_ns: IPv4 ports bound init-side, spliced to namespace
* @port6_to_ns: IPv6 ports bound init-side, spliced to namespace
* @port_to_tap: Ports bound host-side, data to tap or ns L4 socket
* @port_to_init: Ports bound namespace-side, data to init L4 socket
* @timer_run: Timestamp of most recent timer run
*/
struct udp_ctx {
uint8_t port4_to_tap [USHRT_MAX / 8];
uint8_t port6_to_tap [USHRT_MAX / 8];
uint8_t port4_to_init [USHRT_MAX / 8];
uint8_t port6_to_init [USHRT_MAX / 8];
uint8_t port4_to_ns [USHRT_MAX / 8];
uint8_t port6_to_ns [USHRT_MAX / 8];
uint8_t port_to_tap [USHRT_MAX / 8];
uint8_t port_to_init [USHRT_MAX / 8];
struct timespec timer_run;
};

47
util.c
View file

@ -36,7 +36,6 @@
#include "util.h"
#include "passt.h"
#ifdef DEBUG
#define logfn(name, level) \
void name(const char *format, ...) { \
char ts[sizeof("Mmm dd hh:mm:ss.")]; \
@ -44,37 +43,33 @@ void name(const char *format, ...) { \
struct tm *tm; \
va_list args; \
\
clock_gettime(CLOCK_REALTIME, &tp); \
tm = gmtime(&tp.tv_sec); \
strftime(ts, sizeof(ts), "%b %d %T.", tm); \
if (setlogmask(0) & LOG_MASK(LOG_DEBUG)) { \
clock_gettime(CLOCK_REALTIME, &tp); \
tm = gmtime(&tp.tv_sec); \
strftime(ts, sizeof(ts), "%b %d %T.", tm); \
\
fprintf(stderr, "%s%04lu: ", ts, tp.tv_nsec / (100 * 1000)); \
va_start(args, format); \
vsyslog(level, format, args); \
va_end(args); \
va_start(args, format); \
vfprintf(stderr, format, args); \
va_end(args); \
if (format[strlen(format)] != '\n') \
fprintf(stderr, "\n"); \
}
#else
#define logfn(name, level) \
void name(const char *format, ...) { \
va_list args; \
fprintf(stderr, "%s%04lu: ", ts, \
tp.tv_nsec / (100 * 1000)); \
} \
\
va_start(args, format); \
vsyslog(level, format, args); \
va_end(args); \
\
if (setlogmask(0) & LOG_MASK(LOG_DEBUG) || \
setlogmask(0) == LOG_MASK(LOG_EMERG)) { \
va_start(args, format); \
vfprintf(stderr, format, args); \
va_end(args); \
if (format[strlen(format)] != '\n') \
fprintf(stderr, "\n"); \
} \
}
#endif
logfn(err, LOG_ERR)
logfn(warn, LOG_WARNING)
logfn(info, LOG_INFO)
#ifdef DEBUG
logfn(debug, LOG_DEBUG)
#endif
/**
* ipv6_l4hdr() - Find pointer to L4 header in IPv6 packet and extract protocol
@ -171,12 +166,16 @@ int sock_l4(struct ctx *c, int af, uint8_t proto, uint16_t port,
sa = (const struct sockaddr *)&addr4;
sl = sizeof(addr4);
} else {
if (bind_addr == BIND_LOOPBACK)
if (bind_addr == BIND_LOOPBACK) {
addr6.sin6_addr = in6addr_loopback;
else if (bind_addr == BIND_EXT)
} else if (bind_addr == BIND_EXT) {
addr6.sin6_addr = c->addr6;
else
} else if (bind_addr == BIND_LL) {
addr6.sin6_addr = c->addr6_ll;
addr6.sin6_scope_id = if_nametoindex(c->ifn);
} else {
addr6.sin6_addr = in6addr_any;
}
sa = (const struct sockaddr *)&addr6;
sl = sizeof(addr6);

14
util.h
View file

@ -1,12 +1,7 @@
void err(const char *format, ...);
void warn(const char *format, ...);
void info(const char *format, ...);
#ifdef DEBUG
void debug(const char *format, ...);
#else
#define debug(...) { }
#endif
#define CHECK_SET_MIN_MAX(basename, fd) \
do { \
@ -53,6 +48,14 @@ void debug(const char *format, ...);
#define PORT_IS_EPHEMERAL(port) ((port) >= PORT_EPHEMERAL_MIN)
#define NS_FN_STACK_SIZE (RLIMIT_STACK_VAL * 1024 / 4)
#define NS_CALL(fn, arg) \
do { \
char ns_fn_stack[NS_FN_STACK_SIZE]; \
\
clone((fn), ns_fn_stack + sizeof(ns_fn_stack) / 2, \
CLONE_VM | CLONE_VFORK | CLONE_FILES | SIGCHLD, \
(void *)(arg)); \
} while (0)
#if __BYTE_ORDER == __BIG_ENDIAN
#define L2_BUF_ETH_IP4_INIT \
@ -120,6 +123,7 @@ void debug(const char *format, ...);
enum bind_type {
BIND_ANY = 0,
BIND_LOOPBACK,
BIND_LL,
BIND_EXT,
};