passt: Relicense to GPL 2.0, or any later version
In practical terms, passt doesn't benefit from the additional
protection offered by the AGPL over the GPL, because it's not
suitable to be executed over a computer network.
Further, restricting the distribution under the version 3 of the GPL
wouldn't provide any practical advantage either, as long as the passt
codebase is concerned, and might cause unnecessary compatibility
dilemmas.
Change licensing terms to the GNU General Public License Version 2,
or any later version, with written permission from all current and
past contributors, namely: myself, David Gibson, Laine Stump, Andrea
Bolognani, Paul Holzinger, Richard W.M. Jones, Chris Kuhn, Florian
Weimer, Giuseppe Scrivano, Stefan Hajnoczi, and Vasiliy Ulyanov.
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-04-05 20:11:44 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2021-10-11 12:01:31 +02:00
|
|
|
|
|
|
|
/* PASST - Plug A Simple Socket Transport
|
|
|
|
* for qemu/UNIX domain socket mode
|
|
|
|
*
|
|
|
|
* PASTA - Pack A Subtle Tap Abstraction
|
|
|
|
* for network namespace/tap device mode
|
|
|
|
*
|
|
|
|
* netlink.c - rtnetlink routines: interfaces, addresses, routes
|
|
|
|
*
|
|
|
|
* Copyright (c) 2020-2021 Red Hat GmbH
|
|
|
|
* Author: Stefano Brivio <sbrivio@redhat.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sched.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <limits.h>
|
2023-03-21 04:54:59 +01:00
|
|
|
#include <unistd.h>
|
2023-03-08 04:00:22 +01:00
|
|
|
#include <signal.h>
|
2021-10-11 12:01:31 +02:00
|
|
|
#include <stdlib.h>
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
#include <stdbool.h>
|
2021-10-11 12:01:31 +02:00
|
|
|
#include <stdint.h>
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
#include <netinet/in.h>
|
2021-10-21 04:26:08 +02:00
|
|
|
#include <netinet/if_ether.h>
|
|
|
|
|
2021-10-11 12:01:31 +02:00
|
|
|
#include <linux/netlink.h>
|
|
|
|
#include <linux/rtnetlink.h>
|
|
|
|
|
|
|
|
#include "util.h"
|
|
|
|
#include "passt.h"
|
2022-09-24 09:53:15 +02:00
|
|
|
#include "log.h"
|
2021-10-11 12:01:31 +02:00
|
|
|
#include "netlink.h"
|
|
|
|
|
2023-03-08 03:43:25 +01:00
|
|
|
#define NLBUFSIZ (8192 * sizeof(struct nlmsghdr)) /* See netlink(7) */
|
|
|
|
|
2021-10-11 12:01:31 +02:00
|
|
|
/* Socket in init, in target namespace, sequence (just needs to be monotonic) */
|
|
|
|
static int nl_sock = -1;
|
|
|
|
static int nl_sock_ns = -1;
|
|
|
|
static int nl_seq;
|
|
|
|
|
|
|
|
/**
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
* nl_sock_init_do() - Set up netlink sockets in init or target namespace
|
|
|
|
* @arg: Execution context, if running from namespace, NULL otherwise
|
2021-10-11 12:01:31 +02:00
|
|
|
*
|
|
|
|
* Return: 0
|
|
|
|
*/
|
2021-10-21 04:26:08 +02:00
|
|
|
static int nl_sock_init_do(void *arg)
|
2021-10-11 12:01:31 +02:00
|
|
|
{
|
|
|
|
struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
int *s = arg ? &nl_sock_ns : &nl_sock;
|
2022-01-25 19:55:54 +01:00
|
|
|
#ifdef NETLINK_GET_STRICT_CHK
|
|
|
|
int y = 1;
|
|
|
|
#endif
|
2021-10-11 12:01:31 +02:00
|
|
|
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
if (arg)
|
|
|
|
ns_enter((struct ctx *)arg);
|
|
|
|
|
2023-02-07 16:10:46 +01:00
|
|
|
*s = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
|
|
|
|
if (*s < 0 || bind(*s, (struct sockaddr *)&addr, sizeof(addr))) {
|
2021-10-11 12:01:31 +02:00
|
|
|
*s = -1;
|
|
|
|
return 0;
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
}
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2022-01-25 19:55:54 +01:00
|
|
|
#ifdef NETLINK_GET_STRICT_CHK
|
2022-04-05 07:10:30 +02:00
|
|
|
if (setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &y, sizeof(y)))
|
|
|
|
debug("netlink: cannot set NETLINK_GET_STRICT_CHK on %i", *s);
|
2022-01-25 19:55:54 +01:00
|
|
|
#endif
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
return 0;
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
* nl_sock_init() - Call nl_sock_init_do(), won't return on failure
|
2021-10-11 12:01:31 +02:00
|
|
|
* @c: Execution context
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
* @ns: Get socket in namespace, not in init
|
2021-10-11 12:01:31 +02:00
|
|
|
*/
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
void nl_sock_init(const struct ctx *c, bool ns)
|
2021-10-11 12:01:31 +02:00
|
|
|
{
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
if (ns) {
|
2021-10-21 04:26:08 +02:00
|
|
|
NS_CALL(nl_sock_init_do, c);
|
2021-10-11 12:01:31 +02:00
|
|
|
if (nl_sock_ns == -1)
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
goto fail;
|
2021-10-11 12:01:31 +02:00
|
|
|
} else {
|
2021-10-21 04:26:08 +02:00
|
|
|
nl_sock_init_do(NULL);
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (nl_sock == -1)
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
goto fail;
|
2021-10-11 12:01:31 +02:00
|
|
|
|
conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()
Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.
Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.
To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.
As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.
While at it:
- move the v4_only && v6_only options check just after the first
option processing loop, as this is more strictly related to
option parsing proper
- update the man page, explaining that CAP_NET_BIND_SERVICE is
*not* the preferred way to bind ports, because passt and pasta
can be abused to allow other processes to make effective usage
of it. Add a note about the recommended sysctl instead
- simplify nl_sock_init_do() now that it's called once for each
case
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2022-10-13 18:21:27 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
fail:
|
2023-02-15 09:24:37 +01:00
|
|
|
die("Failed to get netlink socket");
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* nl_req() - Send netlink request and read response
|
|
|
|
* @ns: Use netlink socket in namespace
|
2023-05-14 13:30:18 +02:00
|
|
|
* @buf: Buffer for response (at least NLBUFSIZ long)
|
2021-10-11 12:01:31 +02:00
|
|
|
* @req: Request with netlink header
|
|
|
|
* @len: Request length
|
|
|
|
*
|
|
|
|
* Return: received length on success, negative error code on failure
|
|
|
|
*/
|
2022-03-26 07:23:21 +01:00
|
|
|
static int nl_req(int ns, char *buf, const void *req, ssize_t len)
|
2021-10-11 12:01:31 +02:00
|
|
|
{
|
2021-10-20 00:05:11 +02:00
|
|
|
int s = ns ? nl_sock_ns : nl_sock, done = 0;
|
2023-03-08 03:43:25 +01:00
|
|
|
char flush[NLBUFSIZ];
|
2021-10-20 00:05:11 +02:00
|
|
|
ssize_t n;
|
2021-10-11 12:01:31 +02:00
|
|
|
|
|
|
|
while (!done && (n = recv(s, flush, sizeof(flush), MSG_DONTWAIT)) > 0) {
|
|
|
|
struct nlmsghdr *nh = (struct nlmsghdr *)flush;
|
2021-10-20 00:05:11 +02:00
|
|
|
size_t nm = n;
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2021-10-20 00:05:11 +02:00
|
|
|
for ( ; NLMSG_OK(nh, nm); nh = NLMSG_NEXT(nh, nm)) {
|
2021-10-11 12:01:31 +02:00
|
|
|
if (nh->nlmsg_type == NLMSG_DONE ||
|
|
|
|
nh->nlmsg_type == NLMSG_ERROR) {
|
|
|
|
done = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-08 03:43:25 +01:00
|
|
|
if ((send(s, req, len, 0) < len) ||
|
|
|
|
(len = recv(s, buf, NLBUFSIZ, 0)) < 0)
|
2021-10-11 12:01:31 +02:00
|
|
|
return -errno;
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2022-07-22 07:31:13 +02:00
|
|
|
* nl_get_ext_if() - Get interface index supporting IP version being probed
|
|
|
|
* @af: Address family (AF_INET or AF_INET6) to look for connectivity
|
|
|
|
* for.
|
2021-10-11 12:01:31 +02:00
|
|
|
*
|
|
|
|
* Return: interface index, 0 if not found
|
|
|
|
*/
|
2022-07-22 07:31:13 +02:00
|
|
|
unsigned int nl_get_ext_if(sa_family_t af)
|
2021-10-11 12:01:31 +02:00
|
|
|
{
|
|
|
|
struct { struct nlmsghdr nlh; struct rtmsg rtm; } req = {
|
|
|
|
.nlh.nlmsg_type = RTM_GETROUTE,
|
2021-10-20 00:05:11 +02:00
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
|
2021-10-11 12:01:31 +02:00
|
|
|
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
|
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
|
|
|
|
.rtm.rtm_table = RT_TABLE_MAIN,
|
|
|
|
.rtm.rtm_scope = RT_SCOPE_UNIVERSE,
|
|
|
|
.rtm.rtm_type = RTN_UNICAST,
|
2022-07-22 07:31:13 +02:00
|
|
|
.rtm.rtm_family = af,
|
2021-10-11 12:01:31 +02:00
|
|
|
};
|
|
|
|
struct nlmsghdr *nh;
|
|
|
|
struct rtattr *rta;
|
2023-03-08 03:43:25 +01:00
|
|
|
char buf[NLBUFSIZ];
|
2022-04-05 07:10:30 +02:00
|
|
|
ssize_t n;
|
|
|
|
size_t na;
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2022-04-05 07:10:30 +02:00
|
|
|
if ((n = nl_req(0, buf, &req, sizeof(req))) < 0)
|
|
|
|
return 0;
|
|
|
|
|
2021-10-11 12:01:31 +02:00
|
|
|
nh = (struct nlmsghdr *)buf;
|
|
|
|
|
|
|
|
for ( ; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n)) {
|
2022-09-28 06:33:19 +02:00
|
|
|
struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2022-07-22 07:31:13 +02:00
|
|
|
if (rtm->rtm_dst_len || rtm->rtm_family != af)
|
2021-10-11 12:01:31 +02:00
|
|
|
continue;
|
|
|
|
|
2021-10-20 00:05:11 +02:00
|
|
|
for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
|
|
|
|
rta = RTA_NEXT(rta, na)) {
|
2021-10-11 12:01:31 +02:00
|
|
|
unsigned int ifi;
|
|
|
|
|
|
|
|
if (rta->rta_type != RTA_OIF)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ifi = *(unsigned int *)RTA_DATA(rta);
|
|
|
|
|
2022-07-22 07:31:13 +02:00
|
|
|
return ifi;
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2023-08-03 09:19:42 +02:00
|
|
|
* nl_route_get_def() - Get default route for given interface and address family
|
|
|
|
* @ifi: Interface index
|
2021-10-11 12:01:31 +02:00
|
|
|
* @af: Address family
|
2023-08-03 09:19:42 +02:00
|
|
|
* @gw: Default gateway to fill on NL_GET
|
2021-10-11 12:01:31 +02:00
|
|
|
*/
|
2023-08-03 09:19:42 +02:00
|
|
|
void nl_route_get_def(unsigned int ifi, sa_family_t af, void *gw)
|
|
|
|
{
|
|
|
|
struct req_t {
|
|
|
|
struct nlmsghdr nlh;
|
|
|
|
struct rtmsg rtm;
|
|
|
|
struct rtattr rta;
|
|
|
|
unsigned int ifi;
|
|
|
|
} req = {
|
|
|
|
.nlh.nlmsg_type = RTM_GETROUTE,
|
|
|
|
.nlh.nlmsg_len = sizeof(req),
|
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
|
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
|
|
|
|
.rtm.rtm_family = af,
|
|
|
|
.rtm.rtm_table = RT_TABLE_MAIN,
|
|
|
|
.rtm.rtm_scope = RT_SCOPE_UNIVERSE,
|
|
|
|
.rtm.rtm_type = RTN_UNICAST,
|
|
|
|
|
|
|
|
.rta.rta_type = RTA_OIF,
|
|
|
|
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
|
|
|
|
.ifi = ifi,
|
|
|
|
};
|
|
|
|
struct nlmsghdr *nh;
|
|
|
|
char buf[NLBUFSIZ];
|
|
|
|
ssize_t n;
|
|
|
|
|
|
|
|
if ((n = nl_req(0, buf, &req, req.nlh.nlmsg_len)) < 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (nh = (struct nlmsghdr *)buf;
|
|
|
|
NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
|
|
|
|
nh = NLMSG_NEXT(nh, n)) {
|
|
|
|
struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
|
|
|
|
struct rtattr *rta;
|
|
|
|
size_t na;
|
|
|
|
|
|
|
|
if (nh->nlmsg_type != RTM_NEWROUTE)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (rtm->rtm_dst_len)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
|
|
|
|
rta = RTA_NEXT(rta, na)) {
|
|
|
|
if (rta->rta_type != RTA_GATEWAY)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
memcpy(gw, RTA_DATA(rta), RTA_PAYLOAD(rta));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* nl_route_set_def() - Set default route for given interface and address family
|
|
|
|
* @ifi: Interface index in target namespace
|
|
|
|
* @af: Address family
|
|
|
|
* @gw: Default gateway to set
|
|
|
|
*/
|
|
|
|
void nl_route_set_def(unsigned int ifi, sa_family_t af, void *gw)
|
2021-10-11 12:01:31 +02:00
|
|
|
{
|
|
|
|
struct req_t {
|
|
|
|
struct nlmsghdr nlh;
|
|
|
|
struct rtmsg rtm;
|
|
|
|
struct rtattr rta;
|
|
|
|
unsigned int ifi;
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
struct rtattr rta_dst;
|
|
|
|
struct in6_addr d;
|
|
|
|
struct rtattr rta_gw;
|
|
|
|
struct in6_addr a;
|
|
|
|
} r6;
|
|
|
|
struct {
|
|
|
|
struct rtattr rta_dst;
|
|
|
|
uint32_t d;
|
|
|
|
struct rtattr rta_gw;
|
|
|
|
uint32_t a;
|
|
|
|
} r4;
|
2021-10-21 04:26:08 +02:00
|
|
|
} set;
|
2021-10-11 12:01:31 +02:00
|
|
|
} req = {
|
2023-08-03 09:19:42 +02:00
|
|
|
.nlh.nlmsg_type = RTM_NEWROUTE,
|
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK |
|
|
|
|
NLM_F_CREATE | NLM_F_EXCL,
|
2021-10-11 12:01:31 +02:00
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
|
|
|
|
.rtm.rtm_family = af,
|
|
|
|
.rtm.rtm_table = RT_TABLE_MAIN,
|
|
|
|
.rtm.rtm_scope = RT_SCOPE_UNIVERSE,
|
|
|
|
.rtm.rtm_type = RTN_UNICAST,
|
2023-08-03 09:19:42 +02:00
|
|
|
.rtm.rtm_protocol = RTPROT_BOOT,
|
2021-10-11 12:01:31 +02:00
|
|
|
|
|
|
|
.rta.rta_type = RTA_OIF,
|
|
|
|
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
|
2023-08-03 09:19:42 +02:00
|
|
|
.ifi = ifi,
|
2021-10-11 12:01:31 +02:00
|
|
|
};
|
2023-03-08 03:43:25 +01:00
|
|
|
char buf[NLBUFSIZ];
|
2021-10-21 04:26:08 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
if (af == AF_INET6) {
|
|
|
|
size_t rta_len = RTA_LENGTH(sizeof(req.set.r6.d));
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
req.nlh.nlmsg_len = offsetof(struct req_t, set.r6)
|
|
|
|
+ sizeof(req.set.r6);
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
req.set.r6.rta_dst.rta_type = RTA_DST;
|
|
|
|
req.set.r6.rta_dst.rta_len = rta_len;
|
2021-10-21 04:26:08 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
memcpy(&req.set.r6.a, gw, sizeof(req.set.r6.a));
|
|
|
|
req.set.r6.rta_gw.rta_type = RTA_GATEWAY;
|
|
|
|
req.set.r6.rta_gw.rta_len = rta_len;
|
|
|
|
} else {
|
|
|
|
size_t rta_len = RTA_LENGTH(sizeof(req.set.r4.d));
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
req.nlh.nlmsg_len = offsetof(struct req_t, set.r4)
|
|
|
|
+ sizeof(req.set.r4);
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
req.set.r4.rta_dst.rta_type = RTA_DST;
|
|
|
|
req.set.r4.rta_dst.rta_len = rta_len;
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
req.set.r4.a = *(uint32_t *)gw;
|
|
|
|
req.set.r4.rta_gw.rta_type = RTA_GATEWAY;
|
|
|
|
req.set.r4.rta_gw.rta_len = rta_len;
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
nl_req(1, buf, &req, req.nlh.nlmsg_len);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* nl_route_dup() - Copy routes for given interface and address family
|
|
|
|
* @ifi: Interface index in outer network namespace
|
|
|
|
* @ifi_ns: Interface index in target namespace for NL_SET, NL_DUP
|
|
|
|
* @af: Address family
|
|
|
|
*/
|
|
|
|
void nl_route_dup(unsigned int ifi, unsigned int ifi_ns, sa_family_t af)
|
|
|
|
{
|
|
|
|
struct req_t {
|
|
|
|
struct nlmsghdr nlh;
|
|
|
|
struct rtmsg rtm;
|
|
|
|
struct rtattr rta;
|
|
|
|
unsigned int ifi;
|
|
|
|
} req = {
|
|
|
|
.nlh.nlmsg_type = RTM_GETROUTE,
|
|
|
|
.nlh.nlmsg_len = sizeof(req),
|
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
|
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
|
|
|
|
.rtm.rtm_family = af,
|
|
|
|
.rtm.rtm_table = RT_TABLE_MAIN,
|
|
|
|
.rtm.rtm_scope = RT_SCOPE_UNIVERSE,
|
|
|
|
.rtm.rtm_type = RTN_UNICAST,
|
|
|
|
|
|
|
|
.rta.rta_type = RTA_OIF,
|
|
|
|
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
|
|
|
|
.ifi = ifi,
|
|
|
|
};
|
|
|
|
char buf[NLBUFSIZ], resp[NLBUFSIZ];
|
|
|
|
unsigned dup_routes = 0;
|
|
|
|
ssize_t n, nlmsgs_size;
|
|
|
|
struct nlmsghdr *nh;
|
|
|
|
unsigned i;
|
netlink: Add functionality to copy routes from outer namespace
Instead of just fetching the default gateway and configuring a single
equivalent route in the target namespace, on 'pasta --config-net', it
might be desirable in some cases to copy the whole set of routes
corresponding to a given output interface.
For instance, in:
https://github.com/containers/podman/issues/18539
IPv4 Default Route Does Not Propagate to Pasta Containers on Hetzner VPSes
configuring the default gateway won't work without a gateway-less
route (specifying the output interface only), because the default
gateway is, somewhat dubiously, not on the same subnet as the
container.
This is a similar case to the one covered by commit 7656a6f88882
("conf: Adjust netmask on mismatch between IPv4 address/netmask and
gateway"), and I'm not exactly proud of that workaround.
We also have:
https://bugs.passt.top/show_bug.cgi?id=49
pasta does not work with tap-style interface
for which, eventually, we should be able to configure a gateway-less
route in the target namespace.
Introduce different operation modes for nl_route(), including a new
NL_DUP one, not exposed yet, which simply parrots back to the kernel
the route dump for a given interface from the outer namespace, fixing
up flags and interface indices on the way, and requesting to add the
same routes in the target namespace, on the interface we manage.
For n routes we want to duplicate, send n identical netlink requests
including the full dump: routes might depend on each other and the
kernel processes RTM_NEWROUTE messages sequentially, not atomically,
and repeating the full dump naturally resolves dependencies without
the need to actually calculate them.
I'm not kidding, it actually works pretty well.
Link: https://github.com/containers/podman/issues/18539
Link: https://bugs.passt.top/show_bug.cgi?id=49
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
2023-05-14 13:49:43 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
if ((n = nl_req(0, buf, &req, req.nlh.nlmsg_len)) < 0)
|
2021-10-11 12:01:31 +02:00
|
|
|
return;
|
|
|
|
|
netlink: Add functionality to copy routes from outer namespace
Instead of just fetching the default gateway and configuring a single
equivalent route in the target namespace, on 'pasta --config-net', it
might be desirable in some cases to copy the whole set of routes
corresponding to a given output interface.
For instance, in:
https://github.com/containers/podman/issues/18539
IPv4 Default Route Does Not Propagate to Pasta Containers on Hetzner VPSes
configuring the default gateway won't work without a gateway-less
route (specifying the output interface only), because the default
gateway is, somewhat dubiously, not on the same subnet as the
container.
This is a similar case to the one covered by commit 7656a6f88882
("conf: Adjust netmask on mismatch between IPv4 address/netmask and
gateway"), and I'm not exactly proud of that workaround.
We also have:
https://bugs.passt.top/show_bug.cgi?id=49
pasta does not work with tap-style interface
for which, eventually, we should be able to configure a gateway-less
route in the target namespace.
Introduce different operation modes for nl_route(), including a new
NL_DUP one, not exposed yet, which simply parrots back to the kernel
the route dump for a given interface from the outer namespace, fixing
up flags and interface indices on the way, and requesting to add the
same routes in the target namespace, on the interface we manage.
For n routes we want to duplicate, send n identical netlink requests
including the full dump: routes might depend on each other and the
kernel processes RTM_NEWROUTE messages sequentially, not atomically,
and repeating the full dump naturally resolves dependencies without
the need to actually calculate them.
I'm not kidding, it actually works pretty well.
Link: https://github.com/containers/podman/issues/18539
Link: https://bugs.passt.top/show_bug.cgi?id=49
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
2023-05-14 13:49:43 +02:00
|
|
|
nlmsgs_size = n;
|
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
for (nh = (struct nlmsghdr *)buf;
|
|
|
|
NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
|
|
|
|
nh = NLMSG_NEXT(nh, n)) {
|
|
|
|
struct rtmsg *rtm = (struct rtmsg *)NLMSG_DATA(nh);
|
|
|
|
struct rtattr *rta;
|
|
|
|
size_t na;
|
netlink: Add functionality to copy routes from outer namespace
Instead of just fetching the default gateway and configuring a single
equivalent route in the target namespace, on 'pasta --config-net', it
might be desirable in some cases to copy the whole set of routes
corresponding to a given output interface.
For instance, in:
https://github.com/containers/podman/issues/18539
IPv4 Default Route Does Not Propagate to Pasta Containers on Hetzner VPSes
configuring the default gateway won't work without a gateway-less
route (specifying the output interface only), because the default
gateway is, somewhat dubiously, not on the same subnet as the
container.
This is a similar case to the one covered by commit 7656a6f88882
("conf: Adjust netmask on mismatch between IPv4 address/netmask and
gateway"), and I'm not exactly proud of that workaround.
We also have:
https://bugs.passt.top/show_bug.cgi?id=49
pasta does not work with tap-style interface
for which, eventually, we should be able to configure a gateway-less
route in the target namespace.
Introduce different operation modes for nl_route(), including a new
NL_DUP one, not exposed yet, which simply parrots back to the kernel
the route dump for a given interface from the outer namespace, fixing
up flags and interface indices on the way, and requesting to add the
same routes in the target namespace, on the interface we manage.
For n routes we want to duplicate, send n identical netlink requests
including the full dump: routes might depend on each other and the
kernel processes RTM_NEWROUTE messages sequentially, not atomically,
and repeating the full dump naturally resolves dependencies without
the need to actually calculate them.
I'm not kidding, it actually works pretty well.
Link: https://github.com/containers/podman/issues/18539
Link: https://bugs.passt.top/show_bug.cgi?id=49
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
2023-05-14 13:49:43 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
if (nh->nlmsg_type != RTM_NEWROUTE)
|
2021-10-11 12:01:31 +02:00
|
|
|
continue;
|
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
nh->nlmsg_seq = nl_seq++;
|
|
|
|
nh->nlmsg_pid = 0;
|
|
|
|
nh->nlmsg_flags &= ~NLM_F_DUMP_FILTERED;
|
|
|
|
nh->nlmsg_flags |= NLM_F_REQUEST | NLM_F_ACK |
|
|
|
|
NLM_F_CREATE;
|
|
|
|
dup_routes++;
|
|
|
|
|
2021-10-20 00:05:11 +02:00
|
|
|
for (rta = RTM_RTA(rtm), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
|
|
|
|
rta = RTA_NEXT(rta, na)) {
|
2023-08-03 09:19:42 +02:00
|
|
|
if (rta->rta_type == RTA_OIF)
|
netlink: Add functionality to copy routes from outer namespace
Instead of just fetching the default gateway and configuring a single
equivalent route in the target namespace, on 'pasta --config-net', it
might be desirable in some cases to copy the whole set of routes
corresponding to a given output interface.
For instance, in:
https://github.com/containers/podman/issues/18539
IPv4 Default Route Does Not Propagate to Pasta Containers on Hetzner VPSes
configuring the default gateway won't work without a gateway-less
route (specifying the output interface only), because the default
gateway is, somewhat dubiously, not on the same subnet as the
container.
This is a similar case to the one covered by commit 7656a6f88882
("conf: Adjust netmask on mismatch between IPv4 address/netmask and
gateway"), and I'm not exactly proud of that workaround.
We also have:
https://bugs.passt.top/show_bug.cgi?id=49
pasta does not work with tap-style interface
for which, eventually, we should be able to configure a gateway-less
route in the target namespace.
Introduce different operation modes for nl_route(), including a new
NL_DUP one, not exposed yet, which simply parrots back to the kernel
the route dump for a given interface from the outer namespace, fixing
up flags and interface indices on the way, and requesting to add the
same routes in the target namespace, on the interface we manage.
For n routes we want to duplicate, send n identical netlink requests
including the full dump: routes might depend on each other and the
kernel processes RTM_NEWROUTE messages sequentially, not atomically,
and repeating the full dump naturally resolves dependencies without
the need to actually calculate them.
I'm not kidding, it actually works pretty well.
Link: https://github.com/containers/podman/issues/18539
Link: https://bugs.passt.top/show_bug.cgi?id=49
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
2023-05-14 13:49:43 +02:00
|
|
|
*(unsigned int *)RTA_DATA(rta) = ifi_ns;
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
}
|
netlink: Add functionality to copy routes from outer namespace
Instead of just fetching the default gateway and configuring a single
equivalent route in the target namespace, on 'pasta --config-net', it
might be desirable in some cases to copy the whole set of routes
corresponding to a given output interface.
For instance, in:
https://github.com/containers/podman/issues/18539
IPv4 Default Route Does Not Propagate to Pasta Containers on Hetzner VPSes
configuring the default gateway won't work without a gateway-less
route (specifying the output interface only), because the default
gateway is, somewhat dubiously, not on the same subnet as the
container.
This is a similar case to the one covered by commit 7656a6f88882
("conf: Adjust netmask on mismatch between IPv4 address/netmask and
gateway"), and I'm not exactly proud of that workaround.
We also have:
https://bugs.passt.top/show_bug.cgi?id=49
pasta does not work with tap-style interface
for which, eventually, we should be able to configure a gateway-less
route in the target namespace.
Introduce different operation modes for nl_route(), including a new
NL_DUP one, not exposed yet, which simply parrots back to the kernel
the route dump for a given interface from the outer namespace, fixing
up flags and interface indices on the way, and requesting to add the
same routes in the target namespace, on the interface we manage.
For n routes we want to duplicate, send n identical netlink requests
including the full dump: routes might depend on each other and the
kernel processes RTM_NEWROUTE messages sequentially, not atomically,
and repeating the full dump naturally resolves dependencies without
the need to actually calculate them.
I'm not kidding, it actually works pretty well.
Link: https://github.com/containers/podman/issues/18539
Link: https://bugs.passt.top/show_bug.cgi?id=49
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
2023-05-14 13:49:43 +02:00
|
|
|
|
2023-08-03 09:19:42 +02:00
|
|
|
nh = (struct nlmsghdr *)buf;
|
|
|
|
/* Routes might have dependencies between each other, and the kernel
|
|
|
|
* processes RTM_NEWROUTE messages sequentially. For n valid routes, we
|
|
|
|
* might need to send up to n requests to get all of them inserted.
|
|
|
|
* Routes that have been already inserted won't cause the whole request
|
|
|
|
* to fail, so we can simply repeat the whole request. This approach
|
|
|
|
* avoids the need to calculate dependencies: let the kernel do that.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < dup_routes; i++)
|
|
|
|
nl_req(1, resp, nh, nlmsgs_size);
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2023-08-03 09:19:41 +02:00
|
|
|
* nl_addr_get() - Get IP address for given interface and address family
|
2023-05-14 18:44:53 +02:00
|
|
|
* @ifi: Interface index in outer network namespace
|
2021-10-11 12:01:31 +02:00
|
|
|
* @af: Address family
|
2023-08-03 09:19:41 +02:00
|
|
|
* @addr: Global address to fill
|
|
|
|
* @prefix_len: Mask or prefix length, to fill (for IPv4)
|
|
|
|
* @addr_l: Link-scoped address to fill (for IPv6)
|
|
|
|
*/
|
|
|
|
void nl_addr_get(unsigned int ifi, sa_family_t af, void *addr,
|
|
|
|
int *prefix_len, void *addr_l)
|
|
|
|
{
|
|
|
|
struct req_t {
|
|
|
|
struct nlmsghdr nlh;
|
|
|
|
struct ifaddrmsg ifa;
|
|
|
|
} req = {
|
|
|
|
.nlh.nlmsg_type = RTM_GETADDR,
|
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
|
|
|
|
.nlh.nlmsg_len = sizeof(req),
|
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
|
|
|
|
.ifa.ifa_family = af,
|
|
|
|
.ifa.ifa_index = ifi,
|
|
|
|
};
|
|
|
|
struct nlmsghdr *nh;
|
|
|
|
char buf[NLBUFSIZ];
|
|
|
|
ssize_t n;
|
|
|
|
|
|
|
|
if ((n = nl_req(0, buf, &req, req.nlh.nlmsg_len)) < 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (nh = (struct nlmsghdr *)buf;
|
|
|
|
NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
|
|
|
|
nh = NLMSG_NEXT(nh, n)) {
|
|
|
|
struct ifaddrmsg *ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
|
|
|
|
struct rtattr *rta;
|
|
|
|
size_t na;
|
|
|
|
|
|
|
|
if (nh->nlmsg_type != RTM_NEWADDR)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ifa->ifa_index != ifi)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (rta = IFA_RTA(ifa), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
|
|
|
|
rta = RTA_NEXT(rta, na)) {
|
|
|
|
if (rta->rta_type != IFA_ADDRESS)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (af == AF_INET) {
|
|
|
|
memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
|
|
|
|
*prefix_len = ifa->ifa_prefixlen;
|
|
|
|
} else if (af == AF_INET6 && addr &&
|
|
|
|
ifa->ifa_scope == RT_SCOPE_UNIVERSE) {
|
|
|
|
memcpy(addr, RTA_DATA(rta), RTA_PAYLOAD(rta));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (addr_l &&
|
|
|
|
af == AF_INET6 && ifa->ifa_scope == RT_SCOPE_LINK)
|
|
|
|
memcpy(addr_l, RTA_DATA(rta), RTA_PAYLOAD(rta));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* nl_add_set() - Set IP addresses for given interface and address family
|
|
|
|
* @ifi: Interface index
|
|
|
|
* @af: Address family
|
|
|
|
* @addr: Global address to set
|
|
|
|
* @prefix_len: Mask or prefix length to set
|
2021-10-11 12:01:31 +02:00
|
|
|
*/
|
2023-08-03 09:19:41 +02:00
|
|
|
void nl_addr_set(unsigned int ifi, sa_family_t af, void *addr, int prefix_len)
|
2021-10-11 12:01:31 +02:00
|
|
|
{
|
|
|
|
struct req_t {
|
|
|
|
struct nlmsghdr nlh;
|
|
|
|
struct ifaddrmsg ifa;
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
struct rtattr rta_l;
|
|
|
|
uint32_t l;
|
|
|
|
struct rtattr rta_a;
|
|
|
|
uint32_t a;
|
|
|
|
} a4;
|
|
|
|
struct {
|
|
|
|
struct rtattr rta_l;
|
|
|
|
struct in6_addr l;
|
|
|
|
struct rtattr rta_a;
|
|
|
|
struct in6_addr a;
|
|
|
|
} a6;
|
2021-10-21 04:26:08 +02:00
|
|
|
} set;
|
2021-10-11 12:01:31 +02:00
|
|
|
} req = {
|
2023-08-03 09:19:41 +02:00
|
|
|
.nlh.nlmsg_type = RTM_NEWADDR,
|
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK |
|
|
|
|
NLM_F_CREATE | NLM_F_EXCL,
|
2021-10-11 12:01:31 +02:00
|
|
|
.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
|
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
|
|
|
|
.ifa.ifa_family = af,
|
2023-08-03 09:19:41 +02:00
|
|
|
.ifa.ifa_index = ifi,
|
|
|
|
.ifa.ifa_prefixlen = prefix_len,
|
|
|
|
.ifa.ifa_scope = RT_SCOPE_UNIVERSE,
|
2021-10-11 12:01:31 +02:00
|
|
|
};
|
2023-03-08 03:43:25 +01:00
|
|
|
char buf[NLBUFSIZ];
|
2021-10-21 04:26:08 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
if (af == AF_INET6) {
|
|
|
|
size_t rta_len = RTA_LENGTH(sizeof(req.set.a6.l));
|
2022-10-11 00:36:30 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
/* By default, strictly speaking, it's duplicated */
|
|
|
|
req.ifa.ifa_flags = IFA_F_NODAD;
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
req.nlh.nlmsg_len = offsetof(struct req_t, set.a6)
|
|
|
|
+ sizeof(req.set.a6);
|
2021-10-21 04:26:08 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
memcpy(&req.set.a6.l, addr, sizeof(req.set.a6.l));
|
|
|
|
req.set.a6.rta_l.rta_len = rta_len;
|
|
|
|
req.set.a4.rta_l.rta_type = IFA_LOCAL;
|
|
|
|
memcpy(&req.set.a6.a, addr, sizeof(req.set.a6.a));
|
|
|
|
req.set.a6.rta_a.rta_len = rta_len;
|
|
|
|
req.set.a6.rta_a.rta_type = IFA_ADDRESS;
|
|
|
|
} else {
|
|
|
|
size_t rta_len = RTA_LENGTH(sizeof(req.set.a4.l));
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
req.nlh.nlmsg_len = offsetof(struct req_t, set.a4)
|
|
|
|
+ sizeof(req.set.a4);
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
req.set.a4.l = req.set.a4.a = *(uint32_t *)addr;
|
|
|
|
req.set.a4.rta_l.rta_len = rta_len;
|
|
|
|
req.set.a4.rta_l.rta_type = IFA_LOCAL;
|
|
|
|
req.set.a4.rta_a.rta_len = rta_len;
|
|
|
|
req.set.a4.rta_a.rta_type = IFA_ADDRESS;
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
nl_req(1, buf, &req, req.nlh.nlmsg_len);
|
|
|
|
}
|
2023-05-14 18:44:53 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
/**
|
|
|
|
* nl_addr_dup() - Copy IP addresses for given interface and address family
|
|
|
|
* @ifi: Interface index in outer network namespace
|
|
|
|
* @ifi_ns: Interface index in target namespace
|
|
|
|
* @af: Address family
|
|
|
|
*/
|
|
|
|
void nl_addr_dup(unsigned int ifi, unsigned int ifi_ns, sa_family_t af)
|
|
|
|
{
|
|
|
|
struct req_t {
|
|
|
|
struct nlmsghdr nlh;
|
|
|
|
struct ifaddrmsg ifa;
|
|
|
|
} req = {
|
|
|
|
.nlh.nlmsg_type = RTM_GETADDR,
|
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
|
|
|
|
.nlh.nlmsg_len = sizeof(req),
|
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
|
|
|
|
.ifa.ifa_family = af,
|
|
|
|
.ifa.ifa_index = ifi,
|
|
|
|
.ifa.ifa_prefixlen = 0,
|
|
|
|
};
|
|
|
|
char buf[NLBUFSIZ], resp[NLBUFSIZ];
|
|
|
|
ssize_t n, nlmsgs_size;
|
|
|
|
struct nlmsghdr *nh;
|
|
|
|
|
|
|
|
if ((n = nl_req(0, buf, &req, sizeof(req))) < 0)
|
2021-10-11 12:01:31 +02:00
|
|
|
return;
|
|
|
|
|
2023-05-14 18:44:53 +02:00
|
|
|
nlmsgs_size = n;
|
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
for (nh = (struct nlmsghdr *)buf;
|
|
|
|
NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
|
|
|
|
nh = NLMSG_NEXT(nh, n)) {
|
|
|
|
struct ifaddrmsg *ifa;
|
|
|
|
struct rtattr *rta;
|
|
|
|
size_t na;
|
|
|
|
|
2021-10-11 12:01:31 +02:00
|
|
|
if (nh->nlmsg_type != RTM_NEWADDR)
|
2023-08-03 09:19:41 +02:00
|
|
|
continue;
|
2021-10-11 12:01:31 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
nh->nlmsg_seq = nl_seq++;
|
|
|
|
nh->nlmsg_pid = 0;
|
|
|
|
nh->nlmsg_flags &= ~NLM_F_DUMP_FILTERED;
|
|
|
|
nh->nlmsg_flags |= NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
|
2023-05-14 18:44:53 +02:00
|
|
|
|
2021-10-11 12:01:31 +02:00
|
|
|
ifa = (struct ifaddrmsg *)NLMSG_DATA(nh);
|
2023-05-14 18:44:53 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
if (ifa->ifa_scope == RT_SCOPE_LINK || ifa->ifa_index != ifi) {
|
2023-05-14 18:44:53 +02:00
|
|
|
ifa->ifa_family = AF_UNSPEC;
|
2023-08-03 09:19:41 +02:00
|
|
|
continue;
|
2023-05-14 18:44:53 +02:00
|
|
|
}
|
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
ifa->ifa_index = ifi_ns;
|
2023-05-14 18:44:53 +02:00
|
|
|
|
2021-10-20 00:05:11 +02:00
|
|
|
for (rta = IFA_RTA(ifa), na = RTM_PAYLOAD(nh); RTA_OK(rta, na);
|
|
|
|
rta = RTA_NEXT(rta, na)) {
|
2023-08-03 09:19:41 +02:00
|
|
|
if (rta->rta_type == IFA_LABEL)
|
2023-05-14 18:44:53 +02:00
|
|
|
rta->rta_type = IFA_UNSPEC;
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
}
|
2023-05-14 18:44:53 +02:00
|
|
|
|
2023-08-03 09:19:41 +02:00
|
|
|
nl_req(1, resp, buf, nlmsgs_size);
|
2021-10-11 12:01:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
netlink: Split up functionality of nl_link()
nl_link() performs a number of functions: it can bring links up, set MAC
address and MTU and also retrieve the existing MAC. This makes for a small
number of lines of code, but high conceptual complexity: it's quite hard
to follow what's going on both in nl_link() itself and it's also not very
obvious which function its callers are intending to use.
Clarify this, by splitting nl_link() into nl_link_up(), nl_link_set_mac(),
and nl_link_get_mac(). The first brings up a link, optionally setting the
MTU, the others get or set the MAC address.
This fixes an arguable bug in pasta_ns_conf(): it looks as though that was
intended to retrieve the guest MAC whether or not c->pasta_conf_ns is set.
However, it only actually does so in the !c->pasta_conf_ns case: the fact
that we set up==1 means we would only ever set, never get, the MAC in the
nl_link() call in the other path. We get away with this because the MAC
will quickly be discovered once we receive packets on the tap interface.
Still, it's neater to always get the MAC address here.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-08-03 09:19:40 +02:00
|
|
|
* nl_link_get_mac() - Get link MAC address
|
2021-10-11 12:01:31 +02:00
|
|
|
* @ns: Use netlink socket in namespace
|
|
|
|
* @ifi: Interface index
|
netlink: Split up functionality of nl_link()
nl_link() performs a number of functions: it can bring links up, set MAC
address and MTU and also retrieve the existing MAC. This makes for a small
number of lines of code, but high conceptual complexity: it's quite hard
to follow what's going on both in nl_link() itself and it's also not very
obvious which function its callers are intending to use.
Clarify this, by splitting nl_link() into nl_link_up(), nl_link_set_mac(),
and nl_link_get_mac(). The first brings up a link, optionally setting the
MTU, the others get or set the MAC address.
This fixes an arguable bug in pasta_ns_conf(): it looks as though that was
intended to retrieve the guest MAC whether or not c->pasta_conf_ns is set.
However, it only actually does so in the !c->pasta_conf_ns case: the fact
that we set up==1 means we would only ever set, never get, the MAC in the
nl_link() call in the other path. We get away with this because the MAC
will quickly be discovered once we receive packets on the tap interface.
Still, it's neater to always get the MAC address here.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-08-03 09:19:40 +02:00
|
|
|
* @mac: Fill with current MAC address
|
2021-10-11 12:01:31 +02:00
|
|
|
*/
|
netlink: Split up functionality of nl_link()
nl_link() performs a number of functions: it can bring links up, set MAC
address and MTU and also retrieve the existing MAC. This makes for a small
number of lines of code, but high conceptual complexity: it's quite hard
to follow what's going on both in nl_link() itself and it's also not very
obvious which function its callers are intending to use.
Clarify this, by splitting nl_link() into nl_link_up(), nl_link_set_mac(),
and nl_link_get_mac(). The first brings up a link, optionally setting the
MTU, the others get or set the MAC address.
This fixes an arguable bug in pasta_ns_conf(): it looks as though that was
intended to retrieve the guest MAC whether or not c->pasta_conf_ns is set.
However, it only actually does so in the !c->pasta_conf_ns case: the fact
that we set up==1 means we would only ever set, never get, the MAC in the
nl_link() call in the other path. We get away with this because the MAC
will quickly be discovered once we receive packets on the tap interface.
Still, it's neater to always get the MAC address here.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-08-03 09:19:40 +02:00
|
|
|
void nl_link_get_mac(int ns, unsigned int ifi, void *mac)
|
2021-10-11 12:01:31 +02:00
|
|
|
{
|
2022-02-23 10:50:09 +01:00
|
|
|
struct req_t {
|
2021-10-11 12:01:31 +02:00
|
|
|
struct nlmsghdr nlh;
|
|
|
|
struct ifinfomsg ifm;
|
|
|
|
} req = {
|
netlink: Split up functionality of nl_link()
nl_link() performs a number of functions: it can bring links up, set MAC
address and MTU and also retrieve the existing MAC. This makes for a small
number of lines of code, but high conceptual complexity: it's quite hard
to follow what's going on both in nl_link() itself and it's also not very
obvious which function its callers are intending to use.
Clarify this, by splitting nl_link() into nl_link_up(), nl_link_set_mac(),
and nl_link_get_mac(). The first brings up a link, optionally setting the
MTU, the others get or set the MAC address.
This fixes an arguable bug in pasta_ns_conf(): it looks as though that was
intended to retrieve the guest MAC whether or not c->pasta_conf_ns is set.
However, it only actually does so in the !c->pasta_conf_ns case: the fact
that we set up==1 means we would only ever set, never get, the MAC in the
nl_link() call in the other path. We get away with this because the MAC
will quickly be discovered once we receive packets on the tap interface.
Still, it's neater to always get the MAC address here.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-08-03 09:19:40 +02:00
|
|
|
.nlh.nlmsg_type = RTM_GETLINK,
|
|
|
|
.nlh.nlmsg_len = sizeof(req),
|
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
|
2021-10-14 13:05:56 +02:00
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
.ifm.ifi_family = AF_UNSPEC,
|
|
|
|
.ifm.ifi_index = ifi,
|
2021-10-11 12:01:31 +02:00
|
|
|
};
|
|
|
|
struct nlmsghdr *nh;
|
2023-03-08 03:43:25 +01:00
|
|
|
char buf[NLBUFSIZ];
|
2022-04-05 07:10:30 +02:00
|
|
|
ssize_t n;
|
|
|
|
|
netlink: Split up functionality of nl_link()
nl_link() performs a number of functions: it can bring links up, set MAC
address and MTU and also retrieve the existing MAC. This makes for a small
number of lines of code, but high conceptual complexity: it's quite hard
to follow what's going on both in nl_link() itself and it's also not very
obvious which function its callers are intending to use.
Clarify this, by splitting nl_link() into nl_link_up(), nl_link_set_mac(),
and nl_link_get_mac(). The first brings up a link, optionally setting the
MTU, the others get or set the MAC address.
This fixes an arguable bug in pasta_ns_conf(): it looks as though that was
intended to retrieve the guest MAC whether or not c->pasta_conf_ns is set.
However, it only actually does so in the !c->pasta_conf_ns case: the fact
that we set up==1 means we would only ever set, never get, the MAC in the
nl_link() call in the other path. We get away with this because the MAC
will quickly be discovered once we receive packets on the tap interface.
Still, it's neater to always get the MAC address here.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-08-03 09:19:40 +02:00
|
|
|
n = nl_req(ns, buf, &req, sizeof(req));
|
|
|
|
if (n < 0)
|
2021-10-11 12:01:31 +02:00
|
|
|
return;
|
|
|
|
|
netlink: Split up functionality of nl_link()
nl_link() performs a number of functions: it can bring links up, set MAC
address and MTU and also retrieve the existing MAC. This makes for a small
number of lines of code, but high conceptual complexity: it's quite hard
to follow what's going on both in nl_link() itself and it's also not very
obvious which function its callers are intending to use.
Clarify this, by splitting nl_link() into nl_link_up(), nl_link_set_mac(),
and nl_link_get_mac(). The first brings up a link, optionally setting the
MTU, the others get or set the MAC address.
This fixes an arguable bug in pasta_ns_conf(): it looks as though that was
intended to retrieve the guest MAC whether or not c->pasta_conf_ns is set.
However, it only actually does so in the !c->pasta_conf_ns case: the fact
that we set up==1 means we would only ever set, never get, the MAC in the
nl_link() call in the other path. We get away with this because the MAC
will quickly be discovered once we receive packets on the tap interface.
Still, it's neater to always get the MAC address here.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-08-03 09:19:40 +02:00
|
|
|
for (nh = (struct nlmsghdr *)buf;
|
|
|
|
NLMSG_OK(nh, n) && nh->nlmsg_type != NLMSG_DONE;
|
|
|
|
nh = NLMSG_NEXT(nh, n)) {
|
|
|
|
struct ifinfomsg *ifm = (struct ifinfomsg *)NLMSG_DATA(nh);
|
|
|
|
struct rtattr *rta;
|
|
|
|
size_t na;
|
2021-10-14 13:05:56 +02:00
|
|
|
|
2021-10-11 12:01:31 +02:00
|
|
|
if (nh->nlmsg_type != RTM_NEWLINK)
|
netlink: Split up functionality of nl_link()
nl_link() performs a number of functions: it can bring links up, set MAC
address and MTU and also retrieve the existing MAC. This makes for a small
number of lines of code, but high conceptual complexity: it's quite hard
to follow what's going on both in nl_link() itself and it's also not very
obvious which function its callers are intending to use.
Clarify this, by splitting nl_link() into nl_link_up(), nl_link_set_mac(),
and nl_link_get_mac(). The first brings up a link, optionally setting the
MTU, the others get or set the MAC address.
This fixes an arguable bug in pasta_ns_conf(): it looks as though that was
intended to retrieve the guest MAC whether or not c->pasta_conf_ns is set.
However, it only actually does so in the !c->pasta_conf_ns case: the fact
that we set up==1 means we would only ever set, never get, the MAC in the
nl_link() call in the other path. We get away with this because the MAC
will quickly be discovered once we receive packets on the tap interface.
Still, it's neater to always get the MAC address here.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-08-03 09:19:40 +02:00
|
|
|
continue;
|
2021-10-11 12:01:31 +02:00
|
|
|
|
netlink: Split up functionality of nl_link()
nl_link() performs a number of functions: it can bring links up, set MAC
address and MTU and also retrieve the existing MAC. This makes for a small
number of lines of code, but high conceptual complexity: it's quite hard
to follow what's going on both in nl_link() itself and it's also not very
obvious which function its callers are intending to use.
Clarify this, by splitting nl_link() into nl_link_up(), nl_link_set_mac(),
and nl_link_get_mac(). The first brings up a link, optionally setting the
MTU, the others get or set the MAC address.
This fixes an arguable bug in pasta_ns_conf(): it looks as though that was
intended to retrieve the guest MAC whether or not c->pasta_conf_ns is set.
However, it only actually does so in the !c->pasta_conf_ns case: the fact
that we set up==1 means we would only ever set, never get, the MAC in the
nl_link() call in the other path. We get away with this because the MAC
will quickly be discovered once we receive packets on the tap interface.
Still, it's neater to always get the MAC address here.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-08-03 09:19:40 +02:00
|
|
|
for (rta = IFLA_RTA(ifm), na = RTM_PAYLOAD(nh);
|
|
|
|
RTA_OK(rta, na);
|
2021-10-20 00:05:11 +02:00
|
|
|
rta = RTA_NEXT(rta, na)) {
|
2021-10-11 12:01:31 +02:00
|
|
|
if (rta->rta_type != IFLA_ADDRESS)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
memcpy(mac, RTA_DATA(rta), ETH_ALEN);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
netlink: Split up functionality of nl_link()
nl_link() performs a number of functions: it can bring links up, set MAC
address and MTU and also retrieve the existing MAC. This makes for a small
number of lines of code, but high conceptual complexity: it's quite hard
to follow what's going on both in nl_link() itself and it's also not very
obvious which function its callers are intending to use.
Clarify this, by splitting nl_link() into nl_link_up(), nl_link_set_mac(),
and nl_link_get_mac(). The first brings up a link, optionally setting the
MTU, the others get or set the MAC address.
This fixes an arguable bug in pasta_ns_conf(): it looks as though that was
intended to retrieve the guest MAC whether or not c->pasta_conf_ns is set.
However, it only actually does so in the !c->pasta_conf_ns case: the fact
that we set up==1 means we would only ever set, never get, the MAC in the
nl_link() call in the other path. We get away with this because the MAC
will quickly be discovered once we receive packets on the tap interface.
Still, it's neater to always get the MAC address here.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2023-08-03 09:19:40 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* nl_link_set_mac() - Set link MAC address
|
|
|
|
* @ns: Use netlink socket in namespace
|
|
|
|
* @ifi: Interface index
|
|
|
|
* @mac: MAC address to set
|
|
|
|
*/
|
|
|
|
void nl_link_set_mac(int ns, unsigned int ifi, void *mac)
|
|
|
|
{
|
|
|
|
struct req_t {
|
|
|
|
struct nlmsghdr nlh;
|
|
|
|
struct ifinfomsg ifm;
|
|
|
|
struct rtattr rta;
|
|
|
|
unsigned char mac[ETH_ALEN];
|
|
|
|
} req = {
|
|
|
|
.nlh.nlmsg_type = RTM_NEWLINK,
|
|
|
|
.nlh.nlmsg_len = sizeof(req),
|
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
|
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
.ifm.ifi_family = AF_UNSPEC,
|
|
|
|
.ifm.ifi_index = ifi,
|
|
|
|
.rta.rta_type = IFLA_ADDRESS,
|
|
|
|
.rta.rta_len = RTA_LENGTH(ETH_ALEN),
|
|
|
|
};
|
|
|
|
char buf[NLBUFSIZ];
|
|
|
|
|
|
|
|
memcpy(req.mac, mac, ETH_ALEN);
|
|
|
|
|
|
|
|
nl_req(ns, buf, &req, sizeof(req));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* nl_link_up() - Bring link up
|
|
|
|
* @ns: Use netlink socket in namespace
|
|
|
|
* @ifi: Interface index
|
|
|
|
* @mtu: If non-zero, set interface MTU
|
|
|
|
*/
|
|
|
|
void nl_link_up(int ns, unsigned int ifi, int mtu)
|
|
|
|
{
|
|
|
|
struct req_t {
|
|
|
|
struct nlmsghdr nlh;
|
|
|
|
struct ifinfomsg ifm;
|
|
|
|
struct rtattr rta;
|
|
|
|
unsigned int mtu;
|
|
|
|
} req = {
|
|
|
|
.nlh.nlmsg_type = RTM_NEWLINK,
|
|
|
|
.nlh.nlmsg_len = sizeof(req),
|
|
|
|
.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
|
|
|
|
.nlh.nlmsg_seq = nl_seq++,
|
|
|
|
.ifm.ifi_family = AF_UNSPEC,
|
|
|
|
.ifm.ifi_index = ifi,
|
|
|
|
.ifm.ifi_flags = IFF_UP,
|
|
|
|
.ifm.ifi_change = IFF_UP,
|
|
|
|
.rta.rta_type = IFLA_MTU,
|
|
|
|
.rta.rta_len = RTA_LENGTH(sizeof(unsigned int)),
|
|
|
|
.mtu = mtu,
|
|
|
|
};
|
|
|
|
char buf[NLBUFSIZ];
|
|
|
|
|
|
|
|
if (!mtu)
|
|
|
|
/* Shorten request to drop MTU attribute */
|
|
|
|
req.nlh.nlmsg_len = offsetof(struct req_t, rta);
|
|
|
|
|
|
|
|
nl_req(ns, buf, &req, req.nlh.nlmsg_len);
|
|
|
|
}
|