merd: Switch to AF_UNIX for qemu tap, provide wrapper
We can bypass a full-fledged network interface between qemu and merd by connecting the qemu tap file descriptor to a provided UNIX domain socket: this could be implemented in qemu eventually, qrap covers this meanwhile. This also avoids the need for the AF_PACKET socket towards the guest. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
cefcf0bc2c
commit
fa2d20908d
3 changed files with 149 additions and 68 deletions
9
Makefile
9
Makefile
|
@ -1,10 +1,13 @@
|
||||||
CFLAGS += -Wall -Wextra -pedantic
|
CFLAGS += -Wall -Wextra -pedantic
|
||||||
|
|
||||||
all: merd
|
all: merd qrap
|
||||||
|
|
||||||
merd: merd.c
|
merd: merd.c merd.h
|
||||||
$(CC) $(CFLAGS) merd.c -o merd
|
$(CC) $(CFLAGS) merd.c -o merd
|
||||||
|
|
||||||
|
qrap: qrap.c merd.h
|
||||||
|
$(CC) $(CFLAGS) qrap.o -o qrap
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
-${RM} merd
|
-${RM} merd qrap
|
||||||
|
|
129
merd.c
129
merd.c
|
@ -1,13 +1,14 @@
|
||||||
/* MERD - MacVTap Egress and Routing Daemon
|
/* MERD - MacVTap Egress and Routing Daemon
|
||||||
|
*
|
||||||
|
* merd.c - Daemon implementation
|
||||||
*
|
*
|
||||||
* Author: Stefano Brivio <sbrivio@redhat.com>
|
* Author: Stefano Brivio <sbrivio@redhat.com>
|
||||||
* License: GPLv2
|
* License: GPLv2
|
||||||
*
|
*
|
||||||
* Grab packets from Ethernet interface via AF_PACKET, build AF_INET sockets for
|
* Grab Ethernet frames via AF_UNIX socket, build AF_INET sockets for each
|
||||||
* each 5-tuple from ICMP, TCP, UDP packets, perform connection tracking and
|
* 5-tuple from ICMP, TCP, UDP packets, perform connection tracking and forward
|
||||||
* forward them with destination address NAT. Forward packets received on
|
* them with destination address NAT. Forward packets received on sockets back
|
||||||
* sockets back to the AF_PACKET interface (typically, a macvtap, tap or veth
|
* to the UNIX domain socket (typically, a tap file descriptor from qemu).
|
||||||
* interface towards a network namespace or a VM).
|
|
||||||
*
|
*
|
||||||
* TODO:
|
* TODO:
|
||||||
* - steal packets from AF_INET sockets (using eBPF/XDP, or a new socket
|
* - steal packets from AF_INET sockets (using eBPF/XDP, or a new socket
|
||||||
|
@ -27,6 +28,7 @@
|
||||||
#include <sys/epoll.h>
|
#include <sys/epoll.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
#include <sys/un.h>
|
||||||
#include <ifaddrs.h>
|
#include <ifaddrs.h>
|
||||||
#include <linux/if_ether.h>
|
#include <linux/if_ether.h>
|
||||||
#include <linux/if_packet.h>
|
#include <linux/if_packet.h>
|
||||||
|
@ -44,6 +46,8 @@
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <linux/ip.h>
|
#include <linux/ip.h>
|
||||||
|
|
||||||
|
#include "merd.h"
|
||||||
|
|
||||||
#define EPOLL_EVENTS 10
|
#define EPOLL_EVENTS 10
|
||||||
#define CT_SIZE 4096
|
#define CT_SIZE 4096
|
||||||
|
|
||||||
|
@ -73,54 +77,40 @@ struct ct4 {
|
||||||
* struct ctx - Execution context
|
* struct ctx - Execution context
|
||||||
* @epollfd: file descriptor for epoll instance
|
* @epollfd: file descriptor for epoll instance
|
||||||
* @ext_addr4: IPv4 address for external, routable interface
|
* @ext_addr4: IPv4 address for external, routable interface
|
||||||
* @tap_idx: Interface index for tap interface
|
* @fd_unix: AF_UNIX socket for tap file descriptor
|
||||||
* @fd_tap4: IPv4 AF_PACKET socket for tap interface
|
|
||||||
* @map4: Connection tracking table
|
* @map4: Connection tracking table
|
||||||
*/
|
*/
|
||||||
struct ctx {
|
struct ctx {
|
||||||
int epollfd;
|
int epollfd;
|
||||||
unsigned long ext_addr4;
|
unsigned long ext_addr4;
|
||||||
int tap_idx;
|
int fd_unix;
|
||||||
int fd_tap4;
|
|
||||||
struct ct4 map4[CT_SIZE];
|
struct ct4 map4[CT_SIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sock4_l3() - Create and bind AF_PACKET socket for IPv4, add to epoll list
|
* sock_unix() - Create and bind AF_UNIX socket, add to epoll list
|
||||||
* @c: Execution context
|
|
||||||
* @ifn: Name of tap interface
|
|
||||||
* @type: AF_PACKET protocol type
|
|
||||||
*
|
*
|
||||||
* Return: newly created socket, doesn't return on error
|
* Return: newly created socket, doesn't return on error
|
||||||
*/
|
*/
|
||||||
static int sock4_l3(struct ctx *c, const char *ifn, int type)
|
static int sock_unix(void)
|
||||||
{
|
{
|
||||||
struct sockaddr_ll addr = {
|
struct sockaddr_un addr = {
|
||||||
.sll_family = AF_PACKET,
|
.sun_family = AF_UNIX,
|
||||||
.sll_protocol = htons(ETH_P_IP),
|
.sun_path = UNIX_SOCK_PATH,
|
||||||
.sll_ifindex = if_nametoindex(ifn),
|
|
||||||
};
|
};
|
||||||
struct epoll_event ev = { 0 };
|
|
||||||
int fd;
|
int fd;
|
||||||
|
|
||||||
fd = socket(AF_PACKET, type, htons(ETH_P_IP));
|
fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
perror("L3 socket");
|
perror("UNIX socket");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unlink(UNIX_SOCK_PATH);
|
||||||
if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
||||||
perror("L3 bind");
|
perror("UNIX socket bind");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
ev.events = EPOLLIN;
|
|
||||||
ev.data.fd = fd;
|
|
||||||
if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) {
|
|
||||||
perror("epoll_ctl");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,7 +197,7 @@ static int sock4_l4(struct ctx *c, uint16_t proto, uint16_t port)
|
||||||
*/
|
*/
|
||||||
void usage(const char *name)
|
void usage(const char *name)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Usage: %s IF_TAP IF_EXT\n", name);
|
fprintf(stderr, "Usage: %s IF_EXT\n", name);
|
||||||
|
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
@ -411,7 +401,7 @@ static void csum_tcp4(uint16_t *in)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tap4_handler() - Packet handler for tap interface
|
* tap4_handler() - Packet handler for tap file descriptor
|
||||||
* @c: Execution context
|
* @c: Execution context
|
||||||
* @len: Total L2 packet length
|
* @len: Total L2 packet length
|
||||||
* @in: Packet buffer, L2 headers
|
* @in: Packet buffer, L2 headers
|
||||||
|
@ -433,21 +423,6 @@ static void tap4_handler(struct ctx *c, int len, char *in)
|
||||||
if (fd == -1)
|
if (fd == -1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
nat4_out(c->ext_addr4, in + ETH_HLEN);
|
|
||||||
|
|
||||||
switch (iph->protocol) {
|
|
||||||
case IPPROTO_TCP:
|
|
||||||
csum_tcp4((uint16_t *)(in + ETH_HLEN));
|
|
||||||
break;
|
|
||||||
case IPPROTO_UDP:
|
|
||||||
uh->check = 0;
|
|
||||||
break;
|
|
||||||
case IPPROTO_ICMP:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (iph->protocol == IPPROTO_ICMP) {
|
if (iph->protocol == IPPROTO_ICMP) {
|
||||||
fprintf(stderr, "icmp from tap: %s -> %s (socket %i)\n",
|
fprintf(stderr, "icmp from tap: %s -> %s (socket %i)\n",
|
||||||
inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)),
|
inet_ntop(AF_INET, &iph->saddr, buf_s, sizeof(buf_s)),
|
||||||
|
@ -463,6 +438,21 @@ static void tap4_handler(struct ctx *c, int len, char *in)
|
||||||
fd);
|
fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nat4_out(c->ext_addr4, in + ETH_HLEN);
|
||||||
|
|
||||||
|
switch (iph->protocol) {
|
||||||
|
case IPPROTO_TCP:
|
||||||
|
csum_tcp4((uint16_t *)(in + ETH_HLEN));
|
||||||
|
break;
|
||||||
|
case IPPROTO_UDP:
|
||||||
|
uh->check = 0;
|
||||||
|
break;
|
||||||
|
case IPPROTO_ICMP:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (sendto(fd, in + sizeof(struct ethhdr) + sizeof(struct iphdr),
|
if (sendto(fd, in + sizeof(struct ethhdr) + sizeof(struct iphdr),
|
||||||
len - sizeof(struct ethhdr) - 4 * iph->ihl, 0,
|
len - sizeof(struct ethhdr) - 4 * iph->ihl, 0,
|
||||||
(struct sockaddr *)&addr, sizeof(addr)) < 0)
|
(struct sockaddr *)&addr, sizeof(addr)) < 0)
|
||||||
|
@ -478,12 +468,6 @@ static void tap4_handler(struct ctx *c, int len, char *in)
|
||||||
*/
|
*/
|
||||||
static void ext4_handler(struct ctx *c, int len, char *in)
|
static void ext4_handler(struct ctx *c, int len, char *in)
|
||||||
{
|
{
|
||||||
struct sockaddr_ll addr = {
|
|
||||||
.sll_family = AF_PACKET,
|
|
||||||
.sll_protocol = ntohs(ETH_P_IP),
|
|
||||||
.sll_ifindex = c->tap_idx,
|
|
||||||
.sll_halen = ETHER_ADDR_LEN,
|
|
||||||
};
|
|
||||||
struct iphdr *iph = (struct iphdr *)in;
|
struct iphdr *iph = (struct iphdr *)in;
|
||||||
struct tcphdr *th = (struct tcphdr *)(iph + 1);
|
struct tcphdr *th = (struct tcphdr *)(iph + 1);
|
||||||
char buf_s[BUFSIZ], buf_d[BUFSIZ];
|
char buf_s[BUFSIZ], buf_d[BUFSIZ];
|
||||||
|
@ -507,8 +491,6 @@ static void ext4_handler(struct ctx *c, int len, char *in)
|
||||||
uh->check = 0;
|
uh->check = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(&addr.sll_addr, entry->hs, ETH_ALEN);
|
|
||||||
|
|
||||||
eh = (struct ethhdr *)buf;
|
eh = (struct ethhdr *)buf;
|
||||||
memcpy(eh->h_dest, entry->hs, ETH_ALEN);
|
memcpy(eh->h_dest, entry->hs, ETH_ALEN);
|
||||||
memcpy(eh->h_source, entry->hd, ETH_ALEN);
|
memcpy(eh->h_source, entry->hd, ETH_ALEN);
|
||||||
|
@ -531,9 +513,8 @@ static void ext4_handler(struct ctx *c, int len, char *in)
|
||||||
ntohs(th->dest));
|
ntohs(th->dest));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sendto(c->fd_tap4, buf, len + sizeof(struct ethhdr), 0,
|
if (send(c->fd_unix, buf, len + sizeof(struct ethhdr), 0) < 0)
|
||||||
(struct sockaddr *)&addr, sizeof(addr)) < 0)
|
perror("send");
|
||||||
perror("sendto");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -546,18 +527,18 @@ static void ext4_handler(struct ctx *c, int len, char *in)
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct epoll_event events[EPOLL_EVENTS];
|
struct epoll_event events[EPOLL_EVENTS];
|
||||||
const char *if_tap, *if_ext;
|
struct epoll_event ev = { 0 };
|
||||||
struct ctx c = { 0 };
|
struct ctx c = { 0 };
|
||||||
|
const char *if_ext;
|
||||||
char buf[1 << 16];
|
char buf[1 << 16];
|
||||||
int nfds, i, len;
|
int nfds, i, len;
|
||||||
|
int fd_unix;
|
||||||
|
|
||||||
if (argc != 3)
|
if (argc != 2)
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
if_tap = argv[1];
|
|
||||||
if_ext = argv[2];
|
|
||||||
|
|
||||||
|
if_ext = argv[1];
|
||||||
getaddrs_ext(&c, if_ext);
|
getaddrs_ext(&c, if_ext);
|
||||||
c.tap_idx = if_nametoindex(if_tap);
|
|
||||||
|
|
||||||
c.epollfd = epoll_create1(0);
|
c.epollfd = epoll_create1(0);
|
||||||
if (c.epollfd == -1) {
|
if (c.epollfd == -1) {
|
||||||
|
@ -565,7 +546,17 @@ int main(int argc, char **argv)
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
c.fd_tap4 = sock4_l3(&c, if_tap, SOCK_RAW);
|
fd_unix = sock_unix();
|
||||||
|
listen:
|
||||||
|
listen(fd_unix, 1);
|
||||||
|
fprintf(stderr,
|
||||||
|
"You can now start qrap:\n\t"
|
||||||
|
"./qrap 42 kvm ... -net tap,fd=42 -net nic,model=virtio ...\n");
|
||||||
|
|
||||||
|
c.fd_unix = accept(fd_unix, NULL, NULL);
|
||||||
|
ev.events = EPOLLIN;
|
||||||
|
ev.data.fd = c.fd_unix;
|
||||||
|
epoll_ctl(c.epollfd, EPOLL_CTL_ADD, c.fd_unix, &ev);
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, -1);
|
nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, -1);
|
||||||
|
@ -576,15 +567,23 @@ loop:
|
||||||
|
|
||||||
for (i = 0; i < nfds; i++) {
|
for (i = 0; i < nfds; i++) {
|
||||||
len = recv(events[i].data.fd, buf, sizeof(buf), MSG_DONTWAIT);
|
len = recv(events[i].data.fd, buf, sizeof(buf), MSG_DONTWAIT);
|
||||||
|
|
||||||
|
if (events[i].data.fd == c.fd_unix && len <= 0) {
|
||||||
|
epoll_ctl(c.epollfd, EPOLL_CTL_DEL, c.fd_unix, &ev);
|
||||||
|
close(c.fd_unix);
|
||||||
|
goto listen;
|
||||||
|
}
|
||||||
|
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (len < 0) {
|
if (len < 0) {
|
||||||
if (errno == EAGAIN || errno == EWOULDBLOCK)
|
if (errno == EAGAIN || errno == EWOULDBLOCK)
|
||||||
break;
|
break;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (events[i].data.fd == c.fd_tap4)
|
if (events[i].data.fd == c.fd_unix)
|
||||||
tap4_handler(&c, len, buf);
|
tap4_handler(&c, len, buf);
|
||||||
else
|
else
|
||||||
ext4_handler(&c, len, buf);
|
ext4_handler(&c, len, buf);
|
||||||
|
|
79
qrap.c
Normal file
79
qrap.c
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
/* MERD - MacVTap Egress and Routing Daemon
|
||||||
|
*
|
||||||
|
* qrap.c - qemu wrapper connecting UNIX domain socket to tap file descriptor
|
||||||
|
*
|
||||||
|
* Author: Stefano Brivio <sbrivio@redhat.com>
|
||||||
|
* License: GPLv2
|
||||||
|
*
|
||||||
|
* TODO: Implement this functionality directly in qemu: we have TCP and UDP
|
||||||
|
* socket back-ends already.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/un.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
#include "merd.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* usage() - Print usage and exit
|
||||||
|
* @name: Executable name
|
||||||
|
*/
|
||||||
|
void usage(const char *name)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Usage: %s FDNUM QEMU_CMD ...\n", name);
|
||||||
|
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* main() - Entry point and main loop
|
||||||
|
* @argc: Argument count
|
||||||
|
* @argv: File descriptor number, then qemu with arguments
|
||||||
|
*
|
||||||
|
* Return: 0 once interrupted, non-zero on failure
|
||||||
|
*/
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
struct sockaddr_un addr = {
|
||||||
|
.sun_family = AF_UNIX,
|
||||||
|
.sun_path = UNIX_SOCK_PATH,
|
||||||
|
};
|
||||||
|
long fd;
|
||||||
|
int s;
|
||||||
|
|
||||||
|
if (argc < 3)
|
||||||
|
usage(argv[0]);
|
||||||
|
|
||||||
|
fd = strtol(argv[1], NULL, 0);
|
||||||
|
if (fd < 3 || fd > INT_MAX || errno)
|
||||||
|
usage(argv[0]);
|
||||||
|
|
||||||
|
s = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||||
|
if (s < 0) {
|
||||||
|
perror("socket");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (connect(s, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
||||||
|
perror("connect");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dup2(s, (int)fd) < 0) {
|
||||||
|
perror("dup");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(s);
|
||||||
|
|
||||||
|
execvp(argv[2], argv + 2);
|
||||||
|
perror("execvp");
|
||||||
|
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
Loading…
Reference in a new issue