passt: Add support for multiple instances in different network namespaces

...sharing the same filesystem. Instead of a fixed path for the UNIX
domain socket, passt now uses a path with a counter, probing for
existing instances, and picking the first free one.

The demo script is updated accordingly -- it can now be started several
times to create multiple namespaces with an instance of passt each,
with addressing reflecting separate subnets, and NDP proxying between
them.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
Stefano Brivio 2021-05-21 11:14:51 +02:00
parent 8ce188ecb0
commit 19d254bbbb
6 changed files with 114 additions and 44 deletions

View file

@ -30,7 +30,7 @@ ipv6_mangle() {
if [ ${__c} -lt 7 ]; then if [ ${__c} -lt 7 ]; then
printf "${__16b}:" printf "${__16b}:"
else else
printf "abcd\n" && break printf "%04x\n" $((0xabc0 + ${2})) && break
fi fi
__c=$((__c + 1)) __c=$((__c + 1))
done done
@ -40,43 +40,66 @@ ipv6_mangle() {
ndp_setup() { ndp_setup() {
sysctl -w net.ipv6.conf.all.proxy_ndp=1 sysctl -w net.ipv6.conf.all.proxy_ndp=1
ip -6 neigh add proxy "${1}" dev "$(ipv6_dev)" ip -6 neigh add proxy "${1}" dev "$(ipv6_dev)"
for i in `seq 1 63`; do
__neigh="$(ipv6_mangle ${1} ${i})"
if [ "${__neigh}" != "${1}" ]; then
ip -6 neigh add proxy "${__neigh}" dev "${2}"
fi
done
} }
ip netns del passt 2>/dev/null || : ns_idx=0
ip link del veth_passt 2>/dev/null || : for i in `seq 1 63`; do
ip netns add passt ns="passt_${i}"
ip link add veth_passt up netns passt type veth peer name veth_passt ns_idx=${i}
ip link set dev veth_passt up
ip -n passt link set dev lo up
busy=0
for p in $(pidof passt); do
[ "$(ip netns identify ${p})" = "${ns}" ] && busy=1 && break
done
[ ${busy} -eq 0 ] && break
done
ip -n passt addr add 192.0.2.2/24 dev veth_passt [ ${busy} -ne 0 ] && echo "Couldn't create namespace" && exit 1
ip addr add 192.0.2.1/24 dev veth_passt
ip -n passt route add default via 192.0.2.1 ip netns del "${ns}" 2>/dev/null || :
ip netns add "${ns}"
ip link del "veth_${ns}" 2>/dev/null || :
ip link add "veth_${ns}" up netns "${ns}" type veth peer name "veth_${ns}"
ip link set dev "veth_${ns}" up
ip -n "${ns}" link set dev lo up
ipv4_main="192.0.2.$(((ns_idx - 1) * 4 + 1))"
ipv4_ns="192.0.2.$(((ns_idx - 1) * 4 + 2))"
ip -n "${ns}" addr add "${ipv4_ns}/30" dev "veth_${ns}"
ip addr add "${ipv4_main}/30" dev "veth_${ns}"
ip -n "${ns}" route add default via "${ipv4_main}"
sysctl -w net.ipv4.ip_forward=1 sysctl -w net.ipv4.ip_forward=1
nft delete table passt_nat 2>/dev/null || : nft delete table "${ns}_nat" 2>/dev/null || :
nft add table passt_nat nft add table "${ns}_nat"
nft 'add chain passt_nat postrouting { type nat hook postrouting priority -100 ; }' nft add chain "${ns}_nat" postrouting '{ type nat hook postrouting priority -100 ; }'
nft add rule passt_nat postrouting ip saddr 192.0.2.2 masquerade nft add rule "${ns}_nat" postrouting ip saddr "${ipv4_ns}" masquerade
ipv6_addr="$(ipv6_devaddr "$(ipv6_dev)")" ipv6_addr="$(ipv6_devaddr "$(ipv6_dev)")"
ipv6_passt="$(ipv6_mangle "${ipv6_addr}")" ipv6_passt="$(ipv6_mangle "${ipv6_addr}" ${ns_idx})"
ndp_setup "${ipv6_passt}" ndp_setup "${ipv6_passt}" "veth_${ns}"
ip -n passt addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev veth_passt ip -n "${ns}" addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev "veth_${ns}"
ip addr add "${ipv6_addr}" dev veth_passt ip addr add "${ipv6_addr}" dev "veth_${ns}"
ip route add "${ipv6_passt}" dev veth_passt ip route add "${ipv6_passt}" dev "veth_${ns}"
passt_ll="$(ipv6_ll_addr "veth_passt")" passt_ll="$(ipv6_ll_addr "veth_${ns}")"
main_ll="$(get_token "link/ether" $(ip -o li sh veth_passt))" main_ll="$(get_token "link/ether" $(ip -o li sh "veth_${ns}"))"
ip neigh add "${passt_ll%%/*}" dev veth_passt lladdr "${main_ll}" ip neigh add "${passt_ll%%/*}" dev "veth_${ns}" lladdr "${main_ll}"
ip -n passt route add default via "${passt_ll%%/*}" dev veth_passt ip -n "${ns}" route add default via "${passt_ll%%/*}" dev "veth_${ns}"
sysctl -w net.ipv6.conf.all.forwarding=1 sysctl -w net.ipv6.conf.all.forwarding=1
ethtool -K veth_passt tx off ethtool -K "veth_${ns}" tx off
ip netns exec passt ethtool -K veth_passt tx off ip netns exec "${ns}" ethtool -K "veth_${ns}" tx off
ip netns exec passt sysctl -w net.ipv4.ping_group_range="0 2147483647" ip netns exec "${ns}" sysctl -w net.ipv4.ping_group_range="0 2147483647"
sysctl -w net.core.rmem_max=16777216 sysctl -w net.core.rmem_max=16777216
@ -84,5 +107,12 @@ sysctl -w net.core.wmem_max=16777216
sysctl -w net.core.rmem_default=16777216 sysctl -w net.core.rmem_default=16777216
sysctl -w net.core.wmem_default=16777216 sysctl -w net.core.wmem_default=16777216
echo
echo "Namespace ${ns} set up, addresses:"
echo " ${ipv4_ns}"
echo " ${ipv6_passt}"
echo
echo "Starting passt..."
echo
ip netns exec passt ./passt ip netns exec "${ns}" ./passt

46
passt.c
View file

@ -20,7 +20,6 @@
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/uio.h> #include <sys/uio.h>
#include <sys/un.h>
#include <ifaddrs.h> #include <ifaddrs.h>
#include <linux/if_ether.h> #include <linux/if_ether.h>
#include <linux/if_packet.h> #include <linux/if_packet.h>
@ -30,6 +29,7 @@
#include <linux/tcp.h> #include <linux/tcp.h>
#include <linux/udp.h> #include <linux/udp.h>
#include <linux/icmpv6.h> #include <linux/icmpv6.h>
#include <linux/un.h>
#include <linux/if_link.h> #include <linux/if_link.h>
#include <net/ethernet.h> #include <net/ethernet.h>
#include <stdlib.h> #include <stdlib.h>
@ -82,31 +82,50 @@ static char *ip_proto_str[IPPROTO_SCTP + 1] = {
/** /**
* sock_unix() - Create and bind AF_UNIX socket, add to epoll list * sock_unix() - Create and bind AF_UNIX socket, add to epoll list
* @index: Index used in socket path, filled on success
* *
* Return: newly created socket, doesn't return on error * Return: newly created socket, doesn't return on error
*/ */
static int sock_unix(void) static int sock_unix(int *index)
{ {
int fd = socket(AF_UNIX, SOCK_STREAM, 0); int fd = socket(AF_UNIX, SOCK_STREAM, 0), ex;
struct sockaddr_un addr = { struct sockaddr_un addr = {
.sun_family = AF_UNIX, .sun_family = AF_UNIX,
.sun_path = UNIX_SOCK_PATH,
}; };
int i, ret;
if (fd < 0) { if (fd < 0) {
perror("UNIX socket"); perror("UNIX socket");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
unlink(UNIX_SOCK_PATH); for (i = 1; i < UNIX_SOCK_MAX; i++) {
if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { snprintf(addr.sun_path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i);
ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0);
ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr));
if (!ret || errno != ECONNREFUSED) {
close(ex);
continue;
}
close(ex);
unlink(addr.sun_path);
if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr)))
break;
}
if (i == UNIX_SOCK_MAX) {
perror("UNIX socket bind"); perror("UNIX socket bind");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
chmod(UNIX_SOCK_PATH, info("UNIX domain socket bound at %s\n", addr.sun_path);
chmod(addr.sun_path,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
*index = i;
return fd; return fd;
} }
@ -743,11 +762,11 @@ void usage(const char *name)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
struct epoll_event events[EPOLL_EVENTS]; struct epoll_event events[EPOLL_EVENTS];
int nfds, i, fd_unix, sock_index;
char buf6[INET6_ADDRSTRLEN]; char buf6[INET6_ADDRSTRLEN];
char buf4[INET_ADDRSTRLEN]; char buf4[INET_ADDRSTRLEN];
struct epoll_event ev = { 0 }; struct epoll_event ev = { 0 };
struct ctx c = { 0 }; struct ctx c = { 0 };
int nfds, i, fd_unix;
struct rlimit limit; struct rlimit limit;
struct timespec now; struct timespec now;
@ -785,7 +804,7 @@ int main(int argc, char **argv)
get_addrs(&c); get_addrs(&c);
get_dns(&c); get_dns(&c);
fd_unix = sock_unix(); fd_unix = sock_unix(&sock_index);
if (icmp_sock_init(&c) || udp_sock_init(&c) || tcp_sock_init(&c)) if (icmp_sock_init(&c) || udp_sock_init(&c) || tcp_sock_init(&c))
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
@ -795,7 +814,7 @@ int main(int argc, char **argv)
memset(&c.mac_guest, 0xff, sizeof(c.mac_guest)); memset(&c.mac_guest, 0xff, sizeof(c.mac_guest));
pcap_init(); pcap_init(sock_index);
if (c.v4) { if (c.v4) {
info("ARP:"); info("ARP:");
@ -841,14 +860,14 @@ int main(int argc, char **argv)
} }
listen: listen:
listen(fd_unix, 1); listen(fd_unix, 0);
info("You can now start qrap:"); info("You can now start qrap:");
info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio"); info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
info("or directly qemu, patched with:"); info("or directly qemu, patched with:");
info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch"); info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch");
info("as follows:"); info("as follows:");
info(" kvm ... -net socket,connect=" info(" kvm ... -net socket,connect=" UNIX_SOCK_PATH
UNIX_SOCK_PATH " -net nic,model=virtio"); " -net nic,model=virtio", sock_index);
#ifndef DEBUG #ifndef DEBUG
if (daemon(0, 0)) { if (daemon(0, 0)) {
@ -858,6 +877,7 @@ listen:
#endif #endif
c.fd_unix = accept(fd_unix, NULL, NULL); c.fd_unix = accept(fd_unix, NULL, NULL);
ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP; ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP;
ev.data.fd = c.fd_unix; ev.data.fd = c.fd_unix;
epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev); epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev);

View file

@ -1,4 +1,5 @@
#define UNIX_SOCK_PATH "/tmp/passt.socket" #define UNIX_SOCK_MAX 100
#define UNIX_SOCK_PATH "/tmp/passt_%i.socket"
/** /**
* struct tap_msg - Generic message descriptor for arrays of messages * struct tap_msg - Generic message descriptor for arrays of messages
@ -26,6 +27,8 @@ struct fqdn {
char n[NS_MAXDNAME]; char n[NS_MAXDNAME];
}; };
#include <net/if.h>
/** /**
* struct ctx - Execution context * struct ctx - Execution context
* @epollfd: file descriptor for epoll instance * @epollfd: file descriptor for epoll instance

14
pcap.c
View file

@ -20,6 +20,10 @@
#include <time.h> #include <time.h>
#include <net/ethernet.h> #include <net/ethernet.h>
#include <unistd.h> #include <unistd.h>
#include <net/if.h>
#include "passt.h"
#include "util.h"
#ifdef DEBUG #ifdef DEBUG
@ -77,9 +81,9 @@ void pcap(char *pkt, size_t len)
write(pcap_fd, pkt, len); write(pcap_fd, pkt, len);
} }
void pcap_init(void) void pcap_init(int sock_index)
{ {
char name[] = PCAP_PREFIX PCAP_ISO8601_STR ".pcap"; char name[] = PCAP_PREFIX PCAP_ISO8601_STR STR(UNIX_SOCK_MAX) ".pcap";
struct timeval tv; struct timeval tv;
struct tm *tm; struct tm *tm;
@ -88,6 +92,10 @@ void pcap_init(void)
strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1, strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1,
PCAP_ISO8601_FORMAT, tm); PCAP_ISO8601_FORMAT, tm);
snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR),
sizeof(name) - strlen(PCAP_PREFIX) - strlen(PCAP_ISO8601_STR),
"_%i.pcap", sock_index);
pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC, pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (pcap_fd == -1) { if (pcap_fd == -1) {
@ -95,6 +103,8 @@ void pcap_init(void)
return; return;
} }
info("Saving packet capture at %s", name);
write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr)); write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr));
} }

2
pcap.h
View file

@ -1,2 +1,2 @@
void pcap(char *pkt, size_t len); void pcap(char *pkt, size_t len);
void pcap_init(void); void pcap_init(int sock_index);

7
util.h
View file

@ -29,6 +29,9 @@ void debug(const char *format, ...);
#define MAX(x, y) (((x) > (y)) ? (x) : (y)) #define MAX(x, y) (((x) > (y)) ? (x) : (y))
#endif #endif
#define STRINGIFY(x) #x
#define STR(x) STRINGIFY(x)
#define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0]))) #define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0])))
#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b)) #define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b))
@ -37,6 +40,10 @@ void debug(const char *format, ...);
#define PORT_IS_EPHEMERAL(port) ((port) >= (1 << 15) + (1 << 14)) /* RFC 6335 */ #define PORT_IS_EPHEMERAL(port) ((port) >= (1 << 15) + (1 << 14)) /* RFC 6335 */
#include <linux/ipv6.h>
#include <net/if.h>
#include <linux/ip.h>
uint16_t csum_fold(uint32_t sum); uint16_t csum_fold(uint32_t sum);
uint16_t csum_ip4(void *buf, size_t len); uint16_t csum_ip4(void *buf, size_t len);
void csum_tcp4(struct iphdr *iph); void csum_tcp4(struct iphdr *iph);