passt/netlink.c
Stefano Brivio 675174d4ba conf, tap: Split netlink and pasta functions, allow interface configuration
Move netlink routines to their own file, and use netlink to configure
or fetch all the information we need, except for the TUNSETIFF ioctl.

Move pasta-specific functions to their own file as well, add
parameters and calls to configure the tap interface in the namespace.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2021-10-14 13:15:12 +02:00

514 lines
12 KiB
C

// SPDX-License-Identifier: AGPL-3.0-or-later
/* PASST - Plug A Simple Socket Transport
* for qemu/UNIX domain socket mode
*
* PASTA - Pack A Subtle Tap Abstraction
* for network namespace/tap device mode
*
* netlink.c - rtnetlink routines: interfaces, addresses, routes
*
* Copyright (c) 2020-2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
*/
#define _GNU_SOURCE
#include <sched.h>
#include <string.h>
#include <stddef.h>
#include <errno.h>
#include <sys/types.h>
#include <limits.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <linux/if_ether.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include "util.h"
#include "passt.h"
#include "netlink.h"
/* Socket in init, in target namespace, sequence (just needs to be monotonic) */
static int nl_sock = -1;
static int nl_sock_ns = -1;
static int nl_seq;
/**
* __nl_sock_init() - Set up netlink sockets in init and target namespace
* @arg: Execution context
*
* Return: 0
*/
static int __nl_sock_init(void *arg)
{
struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
struct ctx *c = (struct ctx *)arg;
int *s = &nl_sock, v = 1;
ns:
if (((*s) = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0 ||
bind(*s, (struct sockaddr *)&addr, sizeof(addr)) ||
setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &v, sizeof(v)))
*s = -1;
if (*s == -1 || !c || s == &nl_sock_ns)
return 0;
ns_enter((struct ctx *)arg);
s = &nl_sock_ns;
goto ns;
}
/**
* nl_sock_init() - Call __nl_sock_init() and check for failures
* @c: Execution context
*
* Return: -EIO if sockets couldn't be set up, 0 otherwise
*/
int nl_sock_init(struct ctx *c)
{
if (c->mode == MODE_PASTA) {
NS_CALL(__nl_sock_init, c);
if (nl_sock_ns == -1)
return -EIO;
} else {
__nl_sock_init(NULL);
}
if (nl_sock == -1)
return -EIO;
return 0;
}
/**
* nl_req() - Send netlink request and read response
* @ns: Use netlink socket in namespace
* @buf: Buffer for response (at least BUFSIZ long)
* @req: Request with netlink header
* @len: Request length
*
* Return: received length on success, negative error code on failure
*/
static int nl_req(int ns, char *buf, void *req, ssize_t len)
{
int n, s = ns ? nl_sock_ns : nl_sock, done = 0;
char flush[BUFSIZ];
while (!done && (n = recv(s, flush, sizeof(flush), MSG_DONTWAIT)) > 0) {
struct nlmsghdr *nh = (struct nlmsghdr *)flush;
for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
if (nh->nlmsg_type == NLMSG_DONE ||
nh->nlmsg_type == NLMSG_ERROR) {
done = 1;
break;
}
}
}
if ((send(s, req, len, 0) < len) || (len = recv(s, buf, BUFSIZ, 0)) < 0)
return -errno;
return len;
}
/**
* nl_get_ext_if() - Get interface index supporting IP versions being probed
* @v4: Probe IPv4 support, set to ENABLED or DISABLED on return
* @v6: Probe IPv4 support, set to ENABLED or DISABLED on return
*
* Return: interface index, 0 if not found
*/
unsigned int nl_get_ext_if(int *v4, int *v6)
{
struct { struct nlmsghdr nlh; struct rtmsg rtm; } req = {
.nlh.nlmsg_type = RTM_GETROUTE,
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP | NLM_F_EXCL,
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
.nlh.nlmsg_seq = nl_seq++,
.rtm.rtm_table = RT_TABLE_MAIN,
.rtm.rtm_scope = RT_SCOPE_UNIVERSE,
.rtm.rtm_type = RTN_UNICAST,
};
unsigned int i, first_v4 = 0, first_v6 = 0;
uint8_t has_v4[PAGE_SIZE * 8 / 8] = { 0 }; /* See __dev_alloc_name() */
uint8_t has_v6[PAGE_SIZE * 8 / 8] = { 0 }; /* in kernel */
struct nlmsghdr *nh;
struct rtattr *rta;
struct rtmsg *rtm;
char buf[BUFSIZ];
long *word, tmp;
int n, na, *v;
uint8_t *vmap;
if (*v4 == IP_VERSION_PROBE) {
v = v4;
req.rtm.rtm_family = AF_INET;
vmap = has_v4;
} else if (*v6 == IP_VERSION_PROBE) {
v6:
v = v6;
req.rtm.rtm_family = AF_INET6;
vmap = has_v6;
} else {
return 0;
}
n = nl_req(0, buf, &req, sizeof(req));
nh = (struct nlmsghdr *)buf;
for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
rtm = (struct rtmsg *)NLMSG_DATA(nh);
if (rtm->rtm_dst_len || rtm->rtm_family != req.rtm.rtm_family)
continue;
for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh);
RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
unsigned int ifi;
if (rta->rta_type != RTA_OIF)
continue;
ifi = *(unsigned int *)RTA_DATA(rta);
if (*v4 == IP_VERSION_DISABLED ||
*v6 == IP_VERSION_DISABLED) {
*v = IP_VERSION_ENABLED;
return ifi;
}
if (v == v4 && !first_v4)
first_v4 = ifi;
if (v == v6 && !first_v6)
first_v6 = ifi;
bitmap_set(vmap, ifi);
}
}
if (v == v4 && *v6 == IP_VERSION_PROBE) {
req.nlh.nlmsg_seq = nl_seq++;
goto v6;
}
word = (long *)has_v4;
for (i = 0; i < ARRAY_SIZE(has_v4) / sizeof(long); i++, word++) {
int ifi;
tmp = *word;
while ((n = ffsl(tmp))) {
ifi = i * sizeof(long) * 8 + n - 1;
if (!first_v4)
first_v4 = ifi;
tmp &= ~(1UL << (n - 1));
if (bitmap_isset(has_v6, ifi)) {
*v4 = *v6 = IP_VERSION_ENABLED;
return ifi;
}
}
}
if (first_v4) {
*v4 = IP_VERSION_ENABLED;
*v6 = IP_VERSION_DISABLED;
return first_v4;
}
if (first_v6) {
*v4 = IP_VERSION_ENABLED;
*v6 = IP_VERSION_DISABLED;
return first_v6;
}
err("No external routable interface for any IP protocol");
return 0;
}
/**
* nl_route() - Get/set default gateway for given interface and address family
* @ns: Use netlink socket in namespace
* @ifi: Interface index
* @af: Address family
* @gw: Default gateway to fill if zero, to set if not
*/
void nl_route(int ns, unsigned int ifi, sa_family_t af, void *gw)
{
int set = (af == AF_INET6 && !IN6_IS_ADDR_UNSPECIFIED(gw)) ||
(af == AF_INET && *(uint32_t *)gw);
struct req_t {
struct nlmsghdr nlh;
struct rtmsg rtm;
struct rtattr rta;
unsigned int ifi;
union {
struct {
struct rtattr rta_dst;
struct in6_addr d;
struct rtattr rta_gw;
struct in6_addr a;
} r6;
struct {
struct rtattr rta_dst;
uint32_t d;
struct rtattr rta_gw;
uint32_t a;
uint8_t end;
} r4;
};
} req = {
.nlh.nlmsg_type = set ? RTM_NEWROUTE : RTM_GETROUTE,
.nlh.nlmsg_flags = NLM_F_REQUEST,
.nlh.nlmsg_seq = nl_seq++,
.rtm.rtm_family = af,
.rtm.rtm_table = RT_TABLE_MAIN,
.rtm.rtm_scope = RT_SCOPE_UNIVERSE,
.rtm.rtm_type = RTN_UNICAST,
.rta.rta_type = RTA_OIF,
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
.ifi = ifi,
};
struct nlmsghdr *nh;
struct rtattr *rta;
struct rtmsg *rtm;
char buf[BUFSIZ];
int n, na;
if (set) {
if (af == AF_INET6) {
req.nlh.nlmsg_len = sizeof(req);
req.r6.rta_dst.rta_type = RTA_DST;
req.r6.rta_dst.rta_len = RTA_LENGTH(sizeof(req.r6.d));
memcpy(&req.r6.a, gw, sizeof(req.r6.a));
req.r6.rta_gw.rta_type = RTA_GATEWAY;
req.r6.rta_gw.rta_len = RTA_LENGTH(sizeof(req.r6.a));
} else {
req.nlh.nlmsg_len = offsetof(struct req_t, r4.end);
req.r4.rta_dst.rta_type = RTA_DST;
req.r4.rta_dst.rta_len = RTA_LENGTH(sizeof(req.r4.d));
req.r4.a = *(uint32_t *)gw;
req.r4.rta_gw.rta_type = RTA_GATEWAY;
req.r4.rta_gw.rta_len = RTA_LENGTH(sizeof(req.r4.a));
}
req.rtm.rtm_protocol = RTPROT_BOOT;
req.nlh.nlmsg_flags |= NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
} else {
req.nlh.nlmsg_len = offsetof(struct req_t, r6);
req.nlh.nlmsg_flags |= NLM_F_DUMP;
}
n = nl_req(ns, buf, &req, req.nlh.nlmsg_len);
if (set)
return;
nh = (struct nlmsghdr *)buf;
for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
if (nh->nlmsg_type != RTM_NEWROUTE)
goto next;
rtm = (struct rtmsg *)NLMSG_DATA(nh);
if (rtm->rtm_dst_len)
continue;
for (rta = (struct rtattr *)RTM_RTA(rtm), na = RTM_PAYLOAD(nh);
RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
if (rta->rta_type != RTA_GATEWAY)
continue;
memcpy(gw, RTA_DATA(rta), RTA_PAYLOAD(rta));
return;
}
next:
if (nh->nlmsg_type == NLMSG_DONE)
break;
}
}
/**
* nl_addr() - Get/set IP addresses
* @ns: Use netlink socket in namespace
* @ifi: Interface index
* @af: Address family
* @addr: Global address to fill if zero, to set if not, ignored if NULL
* @addr_l: Link-scoped address to fill, NULL if not requested
*/
void nl_addr(int ns, unsigned int ifi, sa_family_t af,
void *addr, int prefix_len, void *addr_l)
{
int set = addr && ((af == AF_INET6 && !IN6_IS_ADDR_UNSPECIFIED(addr)) ||
(af == AF_INET && *(uint32_t *)addr));
struct req_t {
struct nlmsghdr nlh;
struct ifaddrmsg ifa;
union {
struct {
struct rtattr rta_l;
uint32_t l;
struct rtattr rta_a;
uint32_t a;
uint8_t end;
} a4;
struct {
struct rtattr rta_l;
struct in6_addr l;
struct rtattr rta_a;
struct in6_addr a;
} a6;
};
} req = {
.nlh.nlmsg_type = set ? RTM_NEWADDR : RTM_GETADDR,
.nlh.nlmsg_flags = NLM_F_REQUEST,
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
.nlh.nlmsg_seq = nl_seq++,
.ifa.ifa_family = af,
.ifa.ifa_index = ifi,
.ifa.ifa_prefixlen = prefix_len,
};
struct ifaddrmsg *ifa;
struct nlmsghdr *nh;
struct rtattr *rta;
char buf[BUFSIZ];
int n, na;
if (set) {
if (af == AF_INET6) {
req.nlh.nlmsg_len = sizeof(req);
memcpy(&req.a6.l, addr, sizeof(req.a6.l));
req.a6.rta_l.rta_len = RTA_LENGTH(sizeof(req.a6.l));
req.a4.rta_l.rta_type = IFA_LOCAL;
memcpy(&req.a6.a, addr, sizeof(req.a6.a));
req.a6.rta_a.rta_len = RTA_LENGTH(sizeof(req.a6.a));
req.a6.rta_a.rta_type = IFA_ADDRESS;
} else {
req.nlh.nlmsg_len = offsetof(struct req_t, a4.end);
req.a4.l = req.a4.a = *(uint32_t *)addr;
req.a4.rta_l.rta_len = RTA_LENGTH(sizeof(req.a4.l));
req.a4.rta_l.rta_type = IFA_LOCAL;
req.a4.rta_a.rta_len = RTA_LENGTH(sizeof(req.a4.a));
req.a4.rta_a.rta_type = IFA_ADDRESS;
}
req.ifa.ifa_scope = RT_SCOPE_UNIVERSE;
req.nlh.nlmsg_flags |= NLM_F_CREATE | NLM_F_ACK | NLM_F_EXCL;
} else {
req.nlh.nlmsg_flags |= NLM_F_DUMP;
}
n = nl_req(ns, buf, &req, req.nlh.nlmsg_len);
if (set)
return;
nh = (struct nlmsghdr *)buf;
for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
if (nh->nlmsg_type != RTM_NEWADDR)
goto next;
ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
for (rta = (struct rtattr *)IFA_RTA(ifa), na = RTM_PAYLOAD(nh);
RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
if (rta->rta_type != IFA_ADDRESS)
continue;
if (af == AF_INET && addr && !*(uint32_t *)addr)
memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
else if (af == AF_INET6 && addr &&
ifa->ifa_scope == RT_SCOPE_UNIVERSE &&
IN6_IS_ADDR_UNSPECIFIED(addr))
memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
if (addr_l &&
af == AF_INET6 && ifa->ifa_scope == RT_SCOPE_LINK &&
IN6_IS_ADDR_UNSPECIFIED(addr_l))
memcpy(addr_l, RTA_DATA(rta), RTA_PAYLOAD(rta));
}
next:
if (nh->nlmsg_type == NLMSG_DONE)
break;
}
}
/**
* nl_link() - Get/set link attributes
* @ns: Use netlink socket in namespace
* @ifi: Interface index
* @mac: MAC address to fill, if passed as zero, to set otherwise
* @up: If set, bring up the link
*/
void nl_link(int ns, unsigned int ifi, void *mac, int up)
{
int change = !MAC_IS_ZERO(mac) || up;
struct {
struct nlmsghdr nlh;
struct ifinfomsg ifm;
struct rtattr rta;
unsigned char mac[ETH_ALEN];
} req = {
.nlh.nlmsg_type = change ? RTM_NEWLINK : RTM_GETLINK,
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
.nlh.nlmsg_flags = NLM_F_REQUEST | (change ? NLM_F_ACK : 0),
.nlh.nlmsg_seq = nl_seq++,
.ifm.ifi_family = AF_UNSPEC,
.ifm.ifi_index = ifi,
.ifm.ifi_flags = up ? IFF_UP : 0,
.ifm.ifi_change = up ? IFF_UP : 0,
.rta.rta_type = IFLA_ADDRESS,
.rta.rta_len = RTA_LENGTH(ETH_ALEN),
};
struct ifinfomsg *ifm;
struct nlmsghdr *nh;
struct rtattr *rta;
char buf[BUFSIZ];
int n, na;
if (!MAC_IS_ZERO(mac)) {
req.nlh.nlmsg_len = sizeof(req);
memcpy(req.mac, mac, ETH_ALEN);
}
n = nl_req(ns, buf, &req, req.nlh.nlmsg_len);
if (!MAC_IS_ZERO(mac) || up)
return;
nh = (struct nlmsghdr *)buf;
for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
if (nh->nlmsg_type != RTM_NEWLINK)
goto next;
ifm = (struct ifinfomsg *)NLMSG_DATA(nh);
for (rta = (struct rtattr *)IFLA_RTA(ifm), na = RTM_PAYLOAD(nh);
RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
if (rta->rta_type != IFLA_ADDRESS)
continue;
memcpy(mac, RTA_DATA(rta), ETH_ALEN);
break;
}
next:
if (nh->nlmsg_type == NLMSG_DONE)
break;
}
}