From fa2d20908d061fc7a4c56e793487da861af58aca Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sat, 18 Jul 2020 01:02:39 +0200 Subject: [PATCH] merd: Switch to AF_UNIX for qemu tap, provide wrapper We can bypass a full-fledged network interface between qemu and merd by connecting the qemu tap file descriptor to a provided UNIX domain socket: this could be implemented in qemu eventually, qrap covers this meanwhile. This also avoids the need for the AF_PACKET socket towards the guest. Signed-off-by: Stefano Brivio --- Makefile | 9 ++-- merd.c | 129 +++++++++++++++++++++++++++---------------------------- qrap.c | 79 ++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 68 deletions(-) create mode 100644 qrap.c diff --git a/Makefile b/Makefile index 6d96f47..e5942dc 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,13 @@ CFLAGS += -Wall -Wextra -pedantic -all: merd +all: merd qrap -merd: merd.c +merd: merd.c merd.h $(CC) $(CFLAGS) merd.c -o merd +qrap: qrap.c merd.h + $(CC) $(CFLAGS) qrap.o -o qrap + .PHONY: clean clean: - -${RM} merd + -${RM} merd qrap diff --git a/merd.c b/merd.c index df2e511..b046e7e 100644 --- a/merd.c +++ b/merd.c @@ -1,13 +1,14 @@ /* MERD - MacVTap Egress and Routing Daemon + * + * merd.c - Daemon implementation * * Author: Stefano Brivio * License: GPLv2 * - * Grab packets from Ethernet interface via AF_PACKET, build AF_INET sockets for - * each 5-tuple from ICMP, TCP, UDP packets, perform connection tracking and - * forward them with destination address NAT. Forward packets received on - * sockets back to the AF_PACKET interface (typically, a macvtap, tap or veth - * interface towards a network namespace or a VM). + * Grab Ethernet frames via AF_UNIX socket, build AF_INET sockets for each + * 5-tuple from ICMP, TCP, UDP packets, perform connection tracking and forward + * them with destination address NAT. Forward packets received on sockets back + * to the UNIX domain socket (typically, a tap file descriptor from qemu). * * TODO: * - steal packets from AF_INET sockets (using eBPF/XDP, or a new socket @@ -27,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +46,8 @@ #include #include +#include "merd.h" + #define EPOLL_EVENTS 10 #define CT_SIZE 4096 @@ -73,54 +77,40 @@ struct ct4 { * struct ctx - Execution context * @epollfd: file descriptor for epoll instance * @ext_addr4: IPv4 address for external, routable interface - * @tap_idx: Interface index for tap interface - * @fd_tap4: IPv4 AF_PACKET socket for tap interface + * @fd_unix: AF_UNIX socket for tap file descriptor * @map4: Connection tracking table */ struct ctx { int epollfd; unsigned long ext_addr4; - int tap_idx; - int fd_tap4; + int fd_unix; struct ct4 map4[CT_SIZE]; }; /** - * sock4_l3() - Create and bind AF_PACKET socket for IPv4, add to epoll list - * @c: Execution context - * @ifn: Name of tap interface - * @type: AF_PACKET protocol type + * sock_unix() - Create and bind AF_UNIX socket, add to epoll list * * Return: newly created socket, doesn't return on error */ -static int sock4_l3(struct ctx *c, const char *ifn, int type) +static int sock_unix(void) { - struct sockaddr_ll addr = { - .sll_family = AF_PACKET, - .sll_protocol = htons(ETH_P_IP), - .sll_ifindex = if_nametoindex(ifn), + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + .sun_path = UNIX_SOCK_PATH, }; - struct epoll_event ev = { 0 }; int fd; - fd = socket(AF_PACKET, type, htons(ETH_P_IP)); + fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd < 0) { - perror("L3 socket"); + perror("UNIX socket"); exit(EXIT_FAILURE); } + unlink(UNIX_SOCK_PATH); if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { - perror("L3 bind"); + perror("UNIX socket bind"); exit(EXIT_FAILURE); } - - ev.events = EPOLLIN; - ev.data.fd = fd; - if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) { - perror("epoll_ctl"); - exit(EXIT_FAILURE); - } - return fd; } @@ -207,7 +197,7 @@ static int sock4_l4(struct ctx *c, uint16_t proto, uint16_t port) */ void usage(const char *name) { - fprintf(stderr, "Usage: %s IF_TAP IF_EXT\n", name); + fprintf(stderr, "Usage: %s IF_EXT\n", name); exit(EXIT_FAILURE); } @@ -411,7 +401,7 @@ static void csum_tcp4(uint16_t *in) } /** - * tap4_handler() - Packet handler for tap interface + * tap4_handler() - Packet handler for tap file descriptor * @c: Execution context * @len: Total L2 packet length * @in: Packet buffer, L2 headers @@ -433,21 +423,6 @@ static void tap4_handler(struct ctx *c, int len, char *in) if (fd == -1) return; - nat4_out(c->ext_addr4, in + ETH_HLEN); - - switch (iph->protocol) { - case IPPROTO_TCP: - csum_tcp4((uint16_t *)(in + ETH_HLEN)); - break; - case IPPROTO_UDP: - uh->check = 0; - break; - case IPPROTO_ICMP: - break; - default: - return; - } - if (iph->protocol == IPPROTO_ICMP) { fprintf(stderr, "icmp from tap: %s -> %s (socket %i)\n", inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)), @@ -463,6 +438,21 @@ static void tap4_handler(struct ctx *c, int len, char *in) fd); } + nat4_out(c->ext_addr4, in + ETH_HLEN); + + switch (iph->protocol) { + case IPPROTO_TCP: + csum_tcp4((uint16_t *)(in + ETH_HLEN)); + break; + case IPPROTO_UDP: + uh->check = 0; + break; + case IPPROTO_ICMP: + break; + default: + return; + } + if (sendto(fd, in + sizeof(struct ethhdr) + sizeof(struct iphdr), len - sizeof(struct ethhdr) - 4 * iph->ihl, 0, (struct sockaddr *)&addr, sizeof(addr)) < 0) @@ -478,12 +468,6 @@ static void tap4_handler(struct ctx *c, int len, char *in) */ static void ext4_handler(struct ctx *c, int len, char *in) { - struct sockaddr_ll addr = { - .sll_family = AF_PACKET, - .sll_protocol = ntohs(ETH_P_IP), - .sll_ifindex = c->tap_idx, - .sll_halen = ETHER_ADDR_LEN, - }; struct iphdr *iph = (struct iphdr *)in; struct tcphdr *th = (struct tcphdr *)(iph + 1); char buf_s[BUFSIZ], buf_d[BUFSIZ]; @@ -507,8 +491,6 @@ static void ext4_handler(struct ctx *c, int len, char *in) uh->check = 0; } - memcpy(&addr.sll_addr, entry->hs, ETH_ALEN); - eh = (struct ethhdr *)buf; memcpy(eh->h_dest, entry->hs, ETH_ALEN); memcpy(eh->h_source, entry->hd, ETH_ALEN); @@ -531,9 +513,8 @@ static void ext4_handler(struct ctx *c, int len, char *in) ntohs(th->dest)); } - if (sendto(c->fd_tap4, buf, len + sizeof(struct ethhdr), 0, - (struct sockaddr *)&addr, sizeof(addr)) < 0) - perror("sendto"); + if (send(c->fd_unix, buf, len + sizeof(struct ethhdr), 0) < 0) + perror("send"); } /** @@ -546,18 +527,18 @@ static void ext4_handler(struct ctx *c, int len, char *in) int main(int argc, char **argv) { struct epoll_event events[EPOLL_EVENTS]; - const char *if_tap, *if_ext; + struct epoll_event ev = { 0 }; struct ctx c = { 0 }; + const char *if_ext; char buf[1 << 16]; int nfds, i, len; + int fd_unix; - if (argc != 3) + if (argc != 2) usage(argv[0]); - if_tap = argv[1]; - if_ext = argv[2]; + if_ext = argv[1]; getaddrs_ext(&c, if_ext); - c.tap_idx = if_nametoindex(if_tap); c.epollfd = epoll_create1(0); if (c.epollfd == -1) { @@ -565,7 +546,17 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - c.fd_tap4 = sock4_l3(&c, if_tap, SOCK_RAW); + fd_unix = sock_unix(); +listen: + listen(fd_unix, 1); + fprintf(stderr, + "You can now start qrap:\n\t" + "./qrap 42 kvm ... -net tap,fd=42 -net nic,model=virtio ...\n"); + + c.fd_unix = accept(fd_unix, NULL, NULL); + ev.events = EPOLLIN; + ev.data.fd = c.fd_unix; + epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev); loop: nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, -1); @@ -576,15 +567,23 @@ loop: for (i = 0; i < nfds; i++) { len = recv(events[i].data.fd, buf, sizeof(buf), MSG_DONTWAIT); + + if (events[i].data.fd == c.fd_unix && len <= 0) { + epoll_ctl(c.epollfd, EPOLL_CTL_DEL, c.fd_unix, &ev); + close(c.fd_unix); + goto listen; + } + if (len == 0) continue; + if (len < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) break; goto out; } - if (events[i].data.fd == c.fd_tap4) + if (events[i].data.fd == c.fd_unix) tap4_handler(&c, len, buf); else ext4_handler(&c, len, buf); diff --git a/qrap.c b/qrap.c new file mode 100644 index 0000000..b246a7e --- /dev/null +++ b/qrap.c @@ -0,0 +1,79 @@ +/* MERD - MacVTap Egress and Routing Daemon + * + * qrap.c - qemu wrapper connecting UNIX domain socket to tap file descriptor + * + * Author: Stefano Brivio + * License: GPLv2 + * + * TODO: Implement this functionality directly in qemu: we have TCP and UDP + * socket back-ends already. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "merd.h" + +/** + * usage() - Print usage and exit + * @name: Executable name + */ +void usage(const char *name) +{ + fprintf(stderr, "Usage: %s FDNUM QEMU_CMD ...\n", name); + + exit(EXIT_FAILURE); +} + +/** + * main() - Entry point and main loop + * @argc: Argument count + * @argv: File descriptor number, then qemu with arguments + * + * Return: 0 once interrupted, non-zero on failure + */ +int main(int argc, char **argv) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + .sun_path = UNIX_SOCK_PATH, + }; + long fd; + int s; + + if (argc < 3) + usage(argv[0]); + + fd = strtol(argv[1], NULL, 0); + if (fd < 3 || fd > INT_MAX || errno) + usage(argv[0]); + + s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (connect(s, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (dup2(s, (int)fd) < 0) { + perror("dup"); + exit(EXIT_FAILURE); + } + + close(s); + + execvp(argv[2], argv + 2); + perror("execvp"); + + return EXIT_FAILURE; +}