passt: Add support for multiple instances in different network namespaces
...sharing the same filesystem. Instead of a fixed path for the UNIX domain socket, passt now uses a path with a counter, probing for existing instances, and picking the first free one. The demo script is updated accordingly -- it can now be started several times to create multiple namespaces with an instance of passt each, with addressing reflecting separate subnets, and NDP proxying between them. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
8ce188ecb0
commit
19d254bbbb
6 changed files with 114 additions and 44 deletions
84
doc/demo.sh
84
doc/demo.sh
|
@ -30,7 +30,7 @@ ipv6_mangle() {
|
|||
if [ ${__c} -lt 7 ]; then
|
||||
printf "${__16b}:"
|
||||
else
|
||||
printf "abcd\n" && break
|
||||
printf "%04x\n" $((0xabc0 + ${2})) && break
|
||||
fi
|
||||
__c=$((__c + 1))
|
||||
done
|
||||
|
@ -40,43 +40,66 @@ ipv6_mangle() {
|
|||
ndp_setup() {
|
||||
sysctl -w net.ipv6.conf.all.proxy_ndp=1
|
||||
ip -6 neigh add proxy "${1}" dev "$(ipv6_dev)"
|
||||
|
||||
for i in `seq 1 63`; do
|
||||
__neigh="$(ipv6_mangle ${1} ${i})"
|
||||
if [ "${__neigh}" != "${1}" ]; then
|
||||
ip -6 neigh add proxy "${__neigh}" dev "${2}"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
ip netns del passt 2>/dev/null || :
|
||||
ip link del veth_passt 2>/dev/null || :
|
||||
ip netns add passt
|
||||
ip link add veth_passt up netns passt type veth peer name veth_passt
|
||||
ip link set dev veth_passt up
|
||||
ip -n passt link set dev lo up
|
||||
ns_idx=0
|
||||
for i in `seq 1 63`; do
|
||||
ns="passt_${i}"
|
||||
ns_idx=${i}
|
||||
|
||||
busy=0
|
||||
for p in $(pidof passt); do
|
||||
[ "$(ip netns identify ${p})" = "${ns}" ] && busy=1 && break
|
||||
done
|
||||
[ ${busy} -eq 0 ] && break
|
||||
done
|
||||
|
||||
ip -n passt addr add 192.0.2.2/24 dev veth_passt
|
||||
ip addr add 192.0.2.1/24 dev veth_passt
|
||||
ip -n passt route add default via 192.0.2.1
|
||||
[ ${busy} -ne 0 ] && echo "Couldn't create namespace" && exit 1
|
||||
|
||||
ip netns del "${ns}" 2>/dev/null || :
|
||||
ip netns add "${ns}"
|
||||
ip link del "veth_${ns}" 2>/dev/null || :
|
||||
ip link add "veth_${ns}" up netns "${ns}" type veth peer name "veth_${ns}"
|
||||
ip link set dev "veth_${ns}" up
|
||||
ip -n "${ns}" link set dev lo up
|
||||
|
||||
ipv4_main="192.0.2.$(((ns_idx - 1) * 4 + 1))"
|
||||
ipv4_ns="192.0.2.$(((ns_idx - 1) * 4 + 2))"
|
||||
|
||||
ip -n "${ns}" addr add "${ipv4_ns}/30" dev "veth_${ns}"
|
||||
ip addr add "${ipv4_main}/30" dev "veth_${ns}"
|
||||
ip -n "${ns}" route add default via "${ipv4_main}"
|
||||
|
||||
sysctl -w net.ipv4.ip_forward=1
|
||||
nft delete table passt_nat 2>/dev/null || :
|
||||
nft add table passt_nat
|
||||
nft 'add chain passt_nat postrouting { type nat hook postrouting priority -100 ; }'
|
||||
nft add rule passt_nat postrouting ip saddr 192.0.2.2 masquerade
|
||||
nft delete table "${ns}_nat" 2>/dev/null || :
|
||||
nft add table "${ns}_nat"
|
||||
nft add chain "${ns}_nat" postrouting '{ type nat hook postrouting priority -100 ; }'
|
||||
nft add rule "${ns}_nat" postrouting ip saddr "${ipv4_ns}" masquerade
|
||||
|
||||
ipv6_addr="$(ipv6_devaddr "$(ipv6_dev)")"
|
||||
ipv6_passt="$(ipv6_mangle "${ipv6_addr}")"
|
||||
ndp_setup "${ipv6_passt}"
|
||||
ip -n passt addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev veth_passt
|
||||
ip addr add "${ipv6_addr}" dev veth_passt
|
||||
ip route add "${ipv6_passt}" dev veth_passt
|
||||
passt_ll="$(ipv6_ll_addr "veth_passt")"
|
||||
main_ll="$(get_token "link/ether" $(ip -o li sh veth_passt))"
|
||||
ip neigh add "${passt_ll%%/*}" dev veth_passt lladdr "${main_ll}"
|
||||
ip -n passt route add default via "${passt_ll%%/*}" dev veth_passt
|
||||
ipv6_passt="$(ipv6_mangle "${ipv6_addr}" ${ns_idx})"
|
||||
ndp_setup "${ipv6_passt}" "veth_${ns}"
|
||||
ip -n "${ns}" addr add "${ipv6_passt}/$(ipv6_mask "${ipv6_addr}")" dev "veth_${ns}"
|
||||
ip addr add "${ipv6_addr}" dev "veth_${ns}"
|
||||
ip route add "${ipv6_passt}" dev "veth_${ns}"
|
||||
passt_ll="$(ipv6_ll_addr "veth_${ns}")"
|
||||
main_ll="$(get_token "link/ether" $(ip -o li sh "veth_${ns}"))"
|
||||
ip neigh add "${passt_ll%%/*}" dev "veth_${ns}" lladdr "${main_ll}"
|
||||
ip -n "${ns}" route add default via "${passt_ll%%/*}" dev "veth_${ns}"
|
||||
|
||||
sysctl -w net.ipv6.conf.all.forwarding=1
|
||||
|
||||
|
||||
ethtool -K veth_passt tx off
|
||||
ip netns exec passt ethtool -K veth_passt tx off
|
||||
ip netns exec passt sysctl -w net.ipv4.ping_group_range="0 2147483647"
|
||||
ethtool -K "veth_${ns}" tx off
|
||||
ip netns exec "${ns}" ethtool -K "veth_${ns}" tx off
|
||||
ip netns exec "${ns}" sysctl -w net.ipv4.ping_group_range="0 2147483647"
|
||||
|
||||
|
||||
sysctl -w net.core.rmem_max=16777216
|
||||
|
@ -84,5 +107,12 @@ sysctl -w net.core.wmem_max=16777216
|
|||
sysctl -w net.core.rmem_default=16777216
|
||||
sysctl -w net.core.wmem_default=16777216
|
||||
|
||||
echo
|
||||
echo "Namespace ${ns} set up, addresses:"
|
||||
echo " ${ipv4_ns}"
|
||||
echo " ${ipv6_passt}"
|
||||
echo
|
||||
echo "Starting passt..."
|
||||
echo
|
||||
|
||||
ip netns exec passt ./passt
|
||||
ip netns exec "${ns}" ./passt
|
||||
|
|
46
passt.c
46
passt.c
|
@ -20,7 +20,6 @@
|
|||
#include <sys/ioctl.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/un.h>
|
||||
#include <ifaddrs.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/if_packet.h>
|
||||
|
@ -30,6 +29,7 @@
|
|||
#include <linux/tcp.h>
|
||||
#include <linux/udp.h>
|
||||
#include <linux/icmpv6.h>
|
||||
#include <linux/un.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -82,31 +82,50 @@ static char *ip_proto_str[IPPROTO_SCTP + 1] = {
|
|||
|
||||
/**
|
||||
* sock_unix() - Create and bind AF_UNIX socket, add to epoll list
|
||||
* @index: Index used in socket path, filled on success
|
||||
*
|
||||
* Return: newly created socket, doesn't return on error
|
||||
*/
|
||||
static int sock_unix(void)
|
||||
static int sock_unix(int *index)
|
||||
{
|
||||
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
int fd = socket(AF_UNIX, SOCK_STREAM, 0), ex;
|
||||
struct sockaddr_un addr = {
|
||||
.sun_family = AF_UNIX,
|
||||
.sun_path = UNIX_SOCK_PATH,
|
||||
};
|
||||
int i, ret;
|
||||
|
||||
if (fd < 0) {
|
||||
perror("UNIX socket");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
unlink(UNIX_SOCK_PATH);
|
||||
if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
||||
for (i = 1; i < UNIX_SOCK_MAX; i++) {
|
||||
snprintf(addr.sun_path, UNIX_PATH_MAX, UNIX_SOCK_PATH, i);
|
||||
|
||||
ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0);
|
||||
ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr));
|
||||
if (!ret || errno != ECONNREFUSED) {
|
||||
close(ex);
|
||||
continue;
|
||||
}
|
||||
close(ex);
|
||||
|
||||
unlink(addr.sun_path);
|
||||
if (!bind(fd, (const struct sockaddr *)&addr, sizeof(addr)))
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == UNIX_SOCK_MAX) {
|
||||
perror("UNIX socket bind");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
chmod(UNIX_SOCK_PATH,
|
||||
info("UNIX domain socket bound at %s\n", addr.sun_path);
|
||||
chmod(addr.sun_path,
|
||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
|
||||
|
||||
*index = i;
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
|
@ -743,11 +762,11 @@ void usage(const char *name)
|
|||
int main(int argc, char **argv)
|
||||
{
|
||||
struct epoll_event events[EPOLL_EVENTS];
|
||||
int nfds, i, fd_unix, sock_index;
|
||||
char buf6[INET6_ADDRSTRLEN];
|
||||
char buf4[INET_ADDRSTRLEN];
|
||||
struct epoll_event ev = { 0 };
|
||||
struct ctx c = { 0 };
|
||||
int nfds, i, fd_unix;
|
||||
struct rlimit limit;
|
||||
struct timespec now;
|
||||
|
||||
|
@ -785,7 +804,7 @@ int main(int argc, char **argv)
|
|||
get_addrs(&c);
|
||||
get_dns(&c);
|
||||
|
||||
fd_unix = sock_unix();
|
||||
fd_unix = sock_unix(&sock_index);
|
||||
|
||||
if (icmp_sock_init(&c) || udp_sock_init(&c) || tcp_sock_init(&c))
|
||||
exit(EXIT_FAILURE);
|
||||
|
@ -795,7 +814,7 @@ int main(int argc, char **argv)
|
|||
|
||||
memset(&c.mac_guest, 0xff, sizeof(c.mac_guest));
|
||||
|
||||
pcap_init();
|
||||
pcap_init(sock_index);
|
||||
|
||||
if (c.v4) {
|
||||
info("ARP:");
|
||||
|
@ -841,14 +860,14 @@ int main(int argc, char **argv)
|
|||
}
|
||||
|
||||
listen:
|
||||
listen(fd_unix, 1);
|
||||
listen(fd_unix, 0);
|
||||
info("You can now start qrap:");
|
||||
info(" ./qrap 5 kvm ... -net socket,fd=5 -net nic,model=virtio");
|
||||
info("or directly qemu, patched with:");
|
||||
info(" qemu/0001-net-Allow-also-UNIX-domain-sockets-to-be-used-as-net.patch");
|
||||
info("as follows:");
|
||||
info(" kvm ... -net socket,connect="
|
||||
UNIX_SOCK_PATH " -net nic,model=virtio");
|
||||
info(" kvm ... -net socket,connect=" UNIX_SOCK_PATH
|
||||
" -net nic,model=virtio", sock_index);
|
||||
|
||||
#ifndef DEBUG
|
||||
if (daemon(0, 0)) {
|
||||
|
@ -858,6 +877,7 @@ listen:
|
|||
#endif
|
||||
|
||||
c.fd_unix = accept(fd_unix, NULL, NULL);
|
||||
|
||||
ev.events = EPOLLIN | EPOLLRDHUP | EPOLLERR | EPOLLHUP;
|
||||
ev.data.fd = c.fd_unix;
|
||||
epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev);
|
||||
|
|
5
passt.h
5
passt.h
|
@ -1,4 +1,5 @@
|
|||
#define UNIX_SOCK_PATH "/tmp/passt.socket"
|
||||
#define UNIX_SOCK_MAX 100
|
||||
#define UNIX_SOCK_PATH "/tmp/passt_%i.socket"
|
||||
|
||||
/**
|
||||
* struct tap_msg - Generic message descriptor for arrays of messages
|
||||
|
@ -26,6 +27,8 @@ struct fqdn {
|
|||
char n[NS_MAXDNAME];
|
||||
};
|
||||
|
||||
#include <net/if.h>
|
||||
|
||||
/**
|
||||
* struct ctx - Execution context
|
||||
* @epollfd: file descriptor for epoll instance
|
||||
|
|
14
pcap.c
14
pcap.c
|
@ -20,6 +20,10 @@
|
|||
#include <time.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <unistd.h>
|
||||
#include <net/if.h>
|
||||
|
||||
#include "passt.h"
|
||||
#include "util.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
|
@ -77,9 +81,9 @@ void pcap(char *pkt, size_t len)
|
|||
write(pcap_fd, pkt, len);
|
||||
}
|
||||
|
||||
void pcap_init(void)
|
||||
void pcap_init(int sock_index)
|
||||
{
|
||||
char name[] = PCAP_PREFIX PCAP_ISO8601_STR ".pcap";
|
||||
char name[] = PCAP_PREFIX PCAP_ISO8601_STR STR(UNIX_SOCK_MAX) ".pcap";
|
||||
struct timeval tv;
|
||||
struct tm *tm;
|
||||
|
||||
|
@ -88,6 +92,10 @@ void pcap_init(void)
|
|||
strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1,
|
||||
PCAP_ISO8601_FORMAT, tm);
|
||||
|
||||
snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR),
|
||||
sizeof(name) - strlen(PCAP_PREFIX) - strlen(PCAP_ISO8601_STR),
|
||||
"_%i.pcap", sock_index);
|
||||
|
||||
pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC,
|
||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
||||
if (pcap_fd == -1) {
|
||||
|
@ -95,6 +103,8 @@ void pcap_init(void)
|
|||
return;
|
||||
}
|
||||
|
||||
info("Saving packet capture at %s", name);
|
||||
|
||||
write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr));
|
||||
}
|
||||
|
||||
|
|
2
pcap.h
2
pcap.h
|
@ -1,2 +1,2 @@
|
|||
void pcap(char *pkt, size_t len);
|
||||
void pcap_init(void);
|
||||
void pcap_init(int sock_index);
|
||||
|
|
7
util.h
7
util.h
|
@ -29,6 +29,9 @@ void debug(const char *format, ...);
|
|||
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#define STRINGIFY(x) #x
|
||||
#define STR(x) STRINGIFY(x)
|
||||
|
||||
#define ARRAY_SIZE(a) ((int)(sizeof(a) / sizeof((a)[0])))
|
||||
|
||||
#define IN_INTERVAL(a, b, x) ((x) >= (a) && (x) <= (b))
|
||||
|
@ -37,6 +40,10 @@ void debug(const char *format, ...);
|
|||
|
||||
#define PORT_IS_EPHEMERAL(port) ((port) >= (1 << 15) + (1 << 14)) /* RFC 6335 */
|
||||
|
||||
#include <linux/ipv6.h>
|
||||
#include <net/if.h>
|
||||
#include <linux/ip.h>
|
||||
|
||||
uint16_t csum_fold(uint32_t sum);
|
||||
uint16_t csum_ip4(void *buf, size_t len);
|
||||
void csum_tcp4(struct iphdr *iph);
|
||||
|
|
Loading…
Reference in a new issue