passt: Add support for multiple instances in different network namespaces
...sharing the same filesystem. Instead of a fixed path for the UNIX domain socket, passt now uses a path with a counter, probing for existing instances, and picking the first free one. The demo script is updated accordingly -- it can now be started several times to create multiple namespaces with an instance of passt each, with addressing reflecting separate subnets, and NDP proxying between them. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
8ce188ecb0
commit
19d254bbbb
6 changed files with 114 additions and 44 deletions
84
doc/demo.sh
84
doc/demo.sh
|
@ -30,7 +30,7 @@ ipv6_mangle() {
|
||||||
if [ ${__c} -lt 7 ]; then
|
if [ ${__c} -lt 7 ]; then
|
||||||
printf "${__16b}:"
|
printf "${__16b}:"
|
||||||
else
|
else
|
||||||
printf "abcd\n" && break
|
printf "%04x\n" $((0xabc0 + ${2})) && break
|
||||||
fi
|
fi
|
||||||
__c=$((__c + 1))
|
__c=$((__c + 1))
|
||||||
done
|
done
|
||||||
|
@ -40,43 +40,66 @@ ipv6_mangle() {
|
||||||
ndp_setup() {
|
ndp_setup() {
|
||||||
sysctl -w net.ipv6.conf.all.proxy_ndp=1
|
sysctl -w net.ipv6.conf.all.proxy_ndp=1
|
||||||
ip -6 neigh add proxy "${1}" dev "$(ipv6_dev)"
|
ip -6 neigh add proxy "${1}" dev "$(ipv6_dev)"
|
||||||
|
|
||||||
|
for i in `seq 1 63`; do
|
||||||
|
__neigh="$(ipv6_mangle ${1} ${i})"
|
||||||
|
if [ "${__neigh}" != "${1}" ]; then
|
||||||
|
ip -6 neigh add proxy "${__neigh}" dev "${2}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
ip netns del passt 2>/dev/null || :
|
ns_idx=0
|
||||||
ip link del veth_passt 2>/dev/null || :
|
for i in `seq 1 63`; do
|
||||||
ip netns add passt
|
ns="passt_${i}"
|
||||||
ip link add veth_passt up netns passt type veth peer name veth_passt
|
ns_idx=${i}
|
||||||
ip link set dev veth_passt up
|
|
||||||
ip -n passt link set dev lo up
|
|
||||||
|
|
||||||
|
busy=0
|
||||||
|
for p in $(pidof passt); do
|
||||||
|
[ "$(ip netns identify ${p})" = "${ns}" ] && busy=1 && break
|
||||||
|
done
|
||||||
|
[ ${busy} -eq 0 ] && break
|
||||||
|
done
|
||||||
|
|
||||||
ip -n passt addr add 192.0.2.2/24 dev veth_passt
|
[ ${busy} -ne 0 ] && echo "Couldn't create namespace" && exit 1
|
||||||
ip addr add 192.0.2.1/24 dev veth_passt
|
|
||||||
ip -n passt route add default via 192.0.2.1
|
ip netns del "${ns}" 2>/dev/null || :
|
||||||
|
ip netns add "${ns}"
|
||||||
|
ip link del "veth_${ns}" 2>/dev/null || :
|
||||||
|
ip link add "veth_${ns}" up netns "${ns}" type veth peer name "veth_${ns}"
|
||||||
|
ip link set dev "veth_${ns}" up
|
||||||
|
ip -n "${ns}" link set dev lo up
|
||||||
|
|
||||||
|
ipv4_main="192.0.2.$(((ns_idx - 1) * 4 + 1))"
|
||||||
|
ipv4_ns="192.0.2.$(((ns_idx - 1) * 4 + 2))"
|
||||||
|
|
||||||
|
ip -n "${ns}" addr add "${ipv4_ns}/30" dev "veth_${ns}"
|
||||||
|
ip addr add "${ipv4_main}/30" dev "veth_${ns}"
|
||||||
|
ip -n "${ns}" route add default via "${ipv4_main}"
|
||||||
|
|
||||||
sysctl -w net.ipv4.ip_forward=1
|
sysctl -w net.ipv4.ip_forward=1
|
||||||
nft delete table passt_nat 2>/dev/null || :
|
nft delete table "${ns}_nat" 2>/dev/null || :
|
||||||
nft add table passt_nat
|
nft add table "${ns}_nat"
|
||||||
nft 'add chain passt_nat postrouting { type nat hook postrouting priority -100 ; }'
|
nft add chain "${ns}_nat" postrouting '{ type nat hook postrouting priority -100 ; }'
|
||||||
nft add rule passt_nat postrouting ip saddr 192.0.2.2 masquerade
|
nft add rule "${ns}_nat" postrouting ip saddr "${ipv4_ns}" masquerade
|
||||||
|
|
||||||
ipv6_addr="$(ipv6_devaddr "$(ipv6_dev)")"
|
ipv6_addr="$(ipv6_devaddr "$(ipv6_dev)")"
|
||||||
ipv6_passt="$(ipv6_mangle "${ipv6_addr}")"
|
ipv6_passt="$(ipv6_mangle "${ipv6_addr}" ${ns_idx})"
|
||||||
ndp_setup "${ipv6_passt}"
|
ndp_setup "${ipv6_passt}" "veth_${ns}"
|
||||||
ip -n passt addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev veth_passt
|
ip -n "${ns}" addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev "veth_${ns}"
|
||||||
ip addr add "${ipv6_addr}" dev veth_passt
|
ip addr add "${ipv6_addr}" dev "veth_${ns}"
|
||||||
ip route add "${ipv6_passt}" dev veth_passt
|
ip route add "${ipv6_passt}" dev "veth_${ns}"
|
||||||
passt_ll="$(ipv6_ll_addr "veth_passt")"
|
passt_ll="$(ipv6_ll_addr "veth_${ns}")"
|
||||||
main_ll="$(get_token "link/ether" $(ip -o li sh veth_passt))"
|
main_ll="$(get_token "link/ether" $(ip -o li sh "veth_${ns}"))"
|
||||||
ip neigh add "${passt_ll%%/*}" dev veth_passt lladdr "${main_ll}"
|
ip neigh add "${passt_ll%%/*}" dev "veth_${ns}" lladdr "${main_ll}"
|
||||||
ip -n passt route add default via "${passt_ll%%/*}" dev veth_passt
|
ip -n "${ns}" route add default via "${passt_ll%%/*}" dev "veth_${ns}"
|
||||||
|
|
||||||
sysctl -w net.ipv6.conf.all.forwarding=1
|
sysctl -w net.ipv6.conf.all.forwarding=1
|
||||||
|
|
||||||
|
|
||||||
ethtool -K veth_passt tx off
|
ethtool -K "veth_${ns}" tx off
|
||||||
ip netns exec passt ethtool -K veth_passt tx off
|
ip netns exec "${ns}" ethtool -K "veth_${ns}" tx off
|
||||||
ip netns exec passt sysctl -w net.ipv4.ping_group_range="0 2147483647"
|
ip netns exec "${ns}" sysctl -w net.ipv4.ping_group_range="0 2147483647"
|
||||||
|
|
||||||
|
|
||||||
sysctl -w net.core.rmem_max=16777216
|
sysctl -w net.core.rmem_max=16777216
|
||||||
|
@ -84,5 +107,12 @@ sysctl -w net.core.wmem_max=16777216
|
||||||
sysctl -w net.core.rmem_default=16777216
|
sysctl -w net.core.rmem_default=16777216
|
||||||
sysctl -w net.core.wmem_default=16777216
|
sysctl -w net.core.wmem_default=16777216
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "Namespace ${ns} set up, addresses:"
|
||||||
|
echo " ${ipv4_ns}"
|
||||||
|
echo " ${ipv6_passt}"
|
||||||
|
echo
|
||||||
|
echo "Starting passt..."
|
||||||
|
echo
|
||||||
|
|
||||||
ip netns exec passt ./passt
|
ip netns exec "${ns}" ./passt
|
||||||
|
|
46
passt.c
46
passt.c
|
@ -20,7 +20,6 @@
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
#include <sys/uio.h>
|
#include <sys/uio.h>
|
||||||
#include <sys/un.h>
|
|
||||||
#include <ifaddrs.h>
|
#include <ifaddrs.h>
|
||||||
#include <linux/if_ether.h>
|
#include <linux/if_ether.h>
|
||||||
#include <linux/if_packet.h>
|
#include <linux/if_packet.h>
|
||||||
|
@ -30,6 +29,7 @@
|
||||||
#include <linux/tcp.h>
|
#include <linux/tcp.h>
|
||||||
#include <linux/udp.h>
|
#include <linux/udp.h>
|
||||||
#include <linux/icmpv6.h>
|
#include <linux/icmpv6.h>
|
||||||
|
#include <linux/un.h>
|
||||||
#include <linux/if_link.h>
|
#include <linux/if_link.h>
|
||||||
#include <net/ethernet.h>
|
#include <net/ethernet.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
@ -82,31 +82,50 @@ static char *ip_proto_str[IPPROTO_SCTP + 1] = {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sock_unix() - Create and bind AF_UNIX socket, add to epoll list
|
* sock_unix() - Create and bind AF_UNIX socket, add to epoll list
|
||||||
|
* @index: Index used in socket path, filled on success
|
||||||
*
|
*
|
||||||
* Return: newly created socket, doesn't return on error
|
* Return: newly created socket, doesn't return on error
|
||||||
*/
|
*/
|
||||||
static int sock_unix(void)
|
static int sock_unix(int *index)
|
||||||
{
|
{
|
||||||
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
int fd = socket(AF_UNIX, SOCK_STREAM, 0), ex;
|
||||||
struct sockaddr_un addr = {
|
struct sockaddr_un addr = {
|
||||||
.sun_family = AF_UNIX,
|
.sun_family = AF_UNIX,
|
||||||
.sun_path = UNIX_SOCK_PATH,
|
|
||||||
};
|
};
|
||||||
|
int i, ret;
|
||||||
|
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
perror("UNIX socket");
|
perror("UNIX socket");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
unlink(UNIX_SOCK_PATH);
|
for (i = 1; i < UNIX_SOCK_MAX; i++) {
|
||||||
if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
snprintf(addr.sun_path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i);
|
||||||
|
|
||||||
|
ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0);
|
||||||
|
ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr));
|
||||||
|
if (!ret || errno != ECONNREFUSED) {
|
||||||
|
close(ex);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
close(ex);
|
||||||
|
|
||||||
|
unlink(addr.sun_path);
|
||||||
|
if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr)))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == UNIX_SOCK_MAX) {
|
||||||
perror("UNIX socket bind");
|
perror("UNIX socket bind");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
chmod(UNIX_SOCK_PATH,
|
info("UNIX domain socket bound at %s\n", addr.sun_path);
|
||||||
|
chmod(addr.sun_path,
|
||||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
|
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
|
||||||
|
|
||||||
|
*index = i;
|
||||||
|
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -743,11 +762,11 @@ void usage(const char *name)
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct epoll_event events[EPOLL_EVENTS];
|
struct epoll_event events[EPOLL_EVENTS];
|
||||||
|
int nfds, i, fd_unix, sock_index;
|
||||||
char buf6[INET6_ADDRSTRLEN];
|
char buf6[INET6_ADDRSTRLEN];
|
||||||
char buf4[INET_ADDRSTRLEN];
|
char buf4[INET_ADDRSTRLEN];
|
||||||
struct epoll_event ev = { 0 };
|
struct epoll_event ev = { 0 };
|
||||||
struct ctx c = { 0 };
|
struct ctx c = { 0 };
|
||||||
int nfds, i, fd_unix;
|
|
||||||
struct rlimit limit;
|
struct rlimit limit;
|
||||||
struct timespec now;
|
struct timespec now;
|
||||||
|
|
||||||
|
@ -785,7 +804,7 @@ int main(int argc, char **argv)
|
||||||
get_addrs(&c);
|
get_addrs(&c);
|
||||||
get_dns(&c);
|
get_dns(&c);
|
||||||
|
|
||||||
fd_unix = sock_unix();
|
fd_unix = sock_unix(&sock_index);
|
||||||
|
|
||||||
if (icmp_sock_init(&c) || udp_sock_init(&c) || tcp_sock_init(&c))
|
if (icmp_sock_init(&c) || udp_sock_init(&c) || tcp_sock_init(&c))
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
|
@ -795,7 +814,7 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
memset(&c.mac_guest, 0xff, sizeof(c.mac_guest));
|
memset(&c.mac_guest, 0xff, sizeof(c.mac_guest));
|
||||||
|
|
||||||
pcap_init();
|
pcap_init(sock_index);
|
||||||
|
|
||||||
if (c.v4) {
|
if (c.v4) {
|
||||||
info("ARP:");
|
info("ARP:");
|
||||||
|
@ -841,14 +860,14 @@ int main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
listen:
|
listen:
|
||||||
listen(fd_unix, 1);
|
listen(fd_unix, 0);
|
||||||
info("You can now start qrap:");
|
info("You can now start qrap:");
|
||||||
info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
|
info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
|
||||||
info("or directly qemu, patched with:");
|
info("or directly qemu, patched with:");
|
||||||
info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch");
|
info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch");
|
||||||
info("as follows:");
|
info("as follows:");
|
||||||
info(" kvm ... -net socket,connect="
|
info(" kvm ... -net socket,connect=" UNIX_SOCK_PATH
|
||||||
UNIX_SOCK_PATH " -net nic,model=virtio");
|
" -net nic,model=virtio", sock_index);
|
||||||
|
|
||||||
#ifndef DEBUG
|
#ifndef DEBUG
|
||||||
if (daemon(0, 0)) {
|
if (daemon(0, 0)) {
|
||||||
|
@ -858,6 +877,7 @@ listen:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
c.fd_unix = accept(fd_unix, NULL, NULL);
|
c.fd_unix = accept(fd_unix, NULL, NULL);
|
||||||
|
|
||||||
ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP;
|
ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP;
|
||||||
ev.data.fd = c.fd_unix;
|
ev.data.fd = c.fd_unix;
|
||||||
epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev);
|
epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev);
|
||||||
|
|
5
passt.h
5
passt.h
|
@ -1,4 +1,5 @@
|
||||||
#define UNIX_SOCK_PATH "/tmp/passt.socket"
|
#define UNIX_SOCK_MAX 100
|
||||||
|
#define UNIX_SOCK_PATH "/tmp/passt_%i.socket"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct tap_msg - Generic message descriptor for arrays of messages
|
* struct tap_msg - Generic message descriptor for arrays of messages
|
||||||
|
@ -26,6 +27,8 @@ struct fqdn {
|
||||||
char n[NS_MAXDNAME];
|
char n[NS_MAXDNAME];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#include <net/if.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct ctx - Execution context
|
* struct ctx - Execution context
|
||||||
* @epollfd: file descriptor for epoll instance
|
* @epollfd: file descriptor for epoll instance
|
||||||
|
|
14
pcap.c
14
pcap.c
|
@ -20,6 +20,10 @@
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <net/ethernet.h>
|
#include <net/ethernet.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <net/if.h>
|
||||||
|
|
||||||
|
#include "passt.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
|
||||||
|
@ -77,9 +81,9 @@ void pcap(char *pkt, size_t len)
|
||||||
write(pcap_fd, pkt, len);
|
write(pcap_fd, pkt, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
void pcap_init(void)
|
void pcap_init(int sock_index)
|
||||||
{
|
{
|
||||||
char name[] = PCAP_PREFIX PCAP_ISO8601_STR ".pcap";
|
char name[] = PCAP_PREFIX PCAP_ISO8601_STR STR(UNIX_SOCK_MAX) ".pcap";
|
||||||
struct timeval tv;
|
struct timeval tv;
|
||||||
struct tm *tm;
|
struct tm *tm;
|
||||||
|
|
||||||
|
@ -88,6 +92,10 @@ void pcap_init(void)
|
||||||
strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1,
|
strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1,
|
||||||
PCAP_ISO8601_FORMAT, tm);
|
PCAP_ISO8601_FORMAT, tm);
|
||||||
|
|
||||||
|
snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR),
|
||||||
|
sizeof(name) - strlen(PCAP_PREFIX) - strlen(PCAP_ISO8601_STR),
|
||||||
|
"_%i.pcap", sock_index);
|
||||||
|
|
||||||
pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC,
|
pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC,
|
||||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
||||||
if (pcap_fd == -1) {
|
if (pcap_fd == -1) {
|
||||||
|
@ -95,6 +103,8 @@ void pcap_init(void)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info("Saving packet capture at %s", name);
|
||||||
|
|
||||||
write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr));
|
write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
2
pcap.h
2
pcap.h
|
@ -1,2 +1,2 @@
|
||||||
void pcap(char *pkt, size_t len);
|
void pcap(char *pkt, size_t len);
|
||||||
void pcap_init(void);
|
void pcap_init(int sock_index);
|
||||||
|
|
7
util.h
7
util.h
|
@ -29,6 +29,9 @@ void debug(const char *format, ...);
|
||||||
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define STRINGIFY(x) #x
|
||||||
|
#define STR(x) STRINGIFY(x)
|
||||||
|
|
||||||
#define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0])))
|
#define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0])))
|
||||||
|
|
||||||
#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b))
|
#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b))
|
||||||
|
@ -37,6 +40,10 @@ void debug(const char *format, ...);
|
||||||
|
|
||||||
#define PORT_IS_EPHEMERAL(port) ((port) >= (1 << 15) + (1 << 14)) /* RFC 6335 */
|
#define PORT_IS_EPHEMERAL(port) ((port) >= (1 << 15) + (1 << 14)) /* RFC 6335 */
|
||||||
|
|
||||||
|
#include <linux/ipv6.h>
|
||||||
|
#include <net/if.h>
|
||||||
|
#include <linux/ip.h>
|
||||||
|
|
||||||
uint16_t csum_fold(uint32_t sum);
|
uint16_t csum_fold(uint32_t sum);
|
||||||
uint16_t csum_ip4(void *buf, size_t len);
|
uint16_t csum_ip4(void *buf, size_t len);
|
||||||
void csum_tcp4(struct iphdr *iph);
|
void csum_tcp4(struct iphdr *iph);
|
||||||
|
|
Loading…
Reference in a new issue