conf: Bind inbound ports with CAP_NET_BIND_SERVICE before isolate_user()

Even if CAP_NET_BIND_SERVICE is granted, we'll lose the capability in
the target user namespace as we isolate the process, which means
we're unable to bind to low ports at that point.

Bind inbound ports, and only those, before isolate_user(). Keep the
handling of outbound ports (for pasta mode only) after the setup of
the namespace, because that's where we'll bind them.

To this end, initialise the netlink socket for the init namespace
before isolate_user() as well, as we actually need to know the
addresses of the upstream interface before binding ports, in case
they're not explicitly passed by the user.

As we now call nl_sock_init() twice, checking its return code from
conf() twice looks a bit heavy: make it exit(), instead, as we
can't do much if we don't have netlink sockets.

While at it:

- move the v4_only && v6_only options check just after the first
  option processing loop, as this is more strictly related to
  option parsing proper

- update the man page, explaining that CAP_NET_BIND_SERVICE is
  *not* the preferred way to bind ports, because passt and pasta
  can be abused to allow other processes to make effective usage
  of it. Add a note about the recommended sysctl instead

- simplify nl_sock_init_do() now that it's called once for each
  case

Reported-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
Stefano Brivio 2022-10-13 18:21:27 +02:00
parent 40abd447c8
commit 3e2eb4337b
5 changed files with 99 additions and 64 deletions

71
conf.c
View file

@ -1530,6 +1530,11 @@ void conf(struct ctx *c, int argc, char **argv)
}
} while (name != -1);
if (v4_only && v6_only) {
err("Options ipv4-only and ipv6-only are mutually exclusive");
usage(argv[0]);
}
ret = conf_ugid(runas, &uid, &gid);
if (ret)
usage(argv[0]);
@ -1539,6 +1544,30 @@ void conf(struct ctx *c, int argc, char **argv)
logfile, logsize);
}
nl_sock_init(c, false);
if (!v6_only)
c->ifi4 = conf_ip4(ifi, &c->ip4, c->mac);
if (!v4_only)
c->ifi6 = conf_ip6(ifi, &c->ip6, c->mac);
if (!c->ifi4 && !c->ifi6) {
err("External interface not usable");
exit(EXIT_FAILURE);
}
/* Inbound port options can be parsed now (after IPv4/IPv6 settings) */
optind = 1;
do {
struct port_fwd *fwd;
name = getopt_long(argc, argv, optstring, options, NULL);
if ((name == 't' && (fwd = &c->tcp.fwd_in)) ||
(name == 'u' && (fwd = &c->udp.fwd_in.f))) {
if (!optarg || conf_ports(c, name, optarg, fwd))
usage(argv[0]);
}
} while (name != -1);
if (c->mode == MODE_PASTA) {
if (conf_pasta_ns(&netns_only, userns, netns,
optind, argc, argv) < 0)
@ -1561,50 +1590,20 @@ void conf(struct ctx *c, int argc, char **argv)
}
}
if (nl_sock_init(c)) {
err("Failed to get netlink socket");
exit(EXIT_FAILURE);
}
if (c->mode == MODE_PASTA)
nl_sock_init(c, true);
if (v4_only && v6_only) {
err("Options ipv4-only and ipv6-only are mutually exclusive");
usage(argv[0]);
}
if (!v6_only)
c->ifi4 = conf_ip4(ifi, &c->ip4, c->mac);
if (!v4_only)
c->ifi6 = conf_ip6(ifi, &c->ip6, c->mac);
if (!c->ifi4 && !c->ifi6) {
err("External interface not usable");
exit(EXIT_FAILURE);
}
/* Now we can process port configuration options */
/* ...and outbound port options now that namespaces are set up. */
optind = 1;
do {
struct port_fwd *fwd = NULL;
struct port_fwd *fwd;
name = getopt_long(argc, argv, optstring, options, NULL);
switch (name) {
case 't':
case 'u':
case 'T':
case 'U':
if (name == 't')
fwd = &c->tcp.fwd_in;
else if (name == 'T')
fwd = &c->tcp.fwd_out;
else if (name == 'u')
fwd = &c->udp.fwd_in.f;
else if (name == 'U')
fwd = &c->udp.fwd_out.f;
if ((name == 'T' && (fwd = &c->tcp.fwd_out)) ||
(name == 'U' && (fwd = &c->udp.fwd_out.f))) {
if (!optarg || conf_ports(c, name, optarg, fwd))
usage(argv[0]);
break;
default:
break;
}
} while (name != -1);

View file

@ -19,6 +19,7 @@
#include <sys/types.h>
#include <limits.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <unistd.h>
#include <arpa/inet.h>
@ -39,57 +40,58 @@ static int nl_sock_ns = -1;
static int nl_seq;
/**
* nl_sock_init_do() - Set up netlink sockets in init and target namespace
* @arg: Execution context
* nl_sock_init_do() - Set up netlink sockets in init or target namespace
* @arg: Execution context, if running from namespace, NULL otherwise
*
* Return: 0
*/
static int nl_sock_init_do(void *arg)
{
struct sockaddr_nl addr = { .nl_family = AF_NETLINK, };
int *s = &nl_sock;
int *s = arg ? &nl_sock_ns : &nl_sock;
#ifdef NETLINK_GET_STRICT_CHK
int y = 1;
#endif
ns:
if (((*s) = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0 ||
bind(*s, (struct sockaddr *)&addr, sizeof(addr)))
*s = -1;
if (arg)
ns_enter((struct ctx *)arg);
if (*s == -1 || !arg || s == &nl_sock_ns)
if (((*s) = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0 ||
bind(*s, (struct sockaddr *)&addr, sizeof(addr))) {
*s = -1;
return 0;
}
#ifdef NETLINK_GET_STRICT_CHK
if (setsockopt(*s, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &y, sizeof(y)))
debug("netlink: cannot set NETLINK_GET_STRICT_CHK on %i", *s);
#endif
ns_enter((struct ctx *)arg);
s = &nl_sock_ns;
goto ns;
return 0;
}
/**
* nl_sock_init() - Call nl_sock_init_do() and check for failures
* nl_sock_init() - Call nl_sock_init_do(), won't return on failure
* @c: Execution context
*
* Return: -EIO if sockets couldn't be set up, 0 otherwise
* @ns: Get socket in namespace, not in init
*/
int nl_sock_init(const struct ctx *c)
void nl_sock_init(const struct ctx *c, bool ns)
{
if (c->mode == MODE_PASTA) {
if (ns) {
NS_CALL(nl_sock_init_do, c);
if (nl_sock_ns == -1)
return -EIO;
goto fail;
} else {
nl_sock_init_do(NULL);
}
if (nl_sock == -1)
return -EIO;
goto fail;
return 0;
return;
fail:
err("Failed to get netlink socket");
exit(EXIT_FAILURE);
}
/**

View file

@ -6,7 +6,7 @@
#ifndef NETLINK_H
#define NETLINK_H
int nl_sock_init(const struct ctx *c);
void nl_sock_init(const struct ctx *c, bool ns);
unsigned int nl_get_ext_if(sa_family_t af);
void nl_route(int ns, unsigned int ifi, sa_family_t af, void *gw);
void nl_addr(int ns, unsigned int ifi, sa_family_t af,

47
passt.1
View file

@ -771,14 +771,47 @@ possible to bind sockets to foreign addresses.
.SS Binding to low numbered ports (well-known or system ports, up to 1023)
If the port forwarding configuration requires binding to port numbers lower than
1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will fail if not
running as root, or without the \fICAP_NET_BIND_SERVICE\fR Linux capability, see
\fBservices\fR(5) and \fBcapabilities\fR(7). To grant the
\fICAP_NET_BIND_SERVICE\fR capability to passt, you can issue, as root:
If the port forwarding configuration requires binding to ports with numbers
lower than 1024, \fBpasst\fR and \fBpasta\fR will try to bind to them, but will
fail, unless, either:
.IP \(bu 2
the \fIsys.net.ipv4.ip_unprivileged_port_start\fR sysctl is set to the number
of the lowest port \fBpasst\fR and \fBpasta\fR need. For example, as root:
.nf
sysctl -w net.ipv4.ip_unprivileged_port_start=443
.fi
\fBNote\fR: this is the recommended way of enabling \fBpasst\fR and \fBpasta\fR
to bind to ports with numbers below 1024.
.IP \(bu
or the \fICAP_NET_BIND_SERVICE\fR Linux capability is granted, see
\fBservices\fR(5) and \fBcapabilities\fR(7).
This is, in general, \fBnot the recommended way\fR, because \fBpasst\fR and
\fBpasta\fR might be used as vector to effectively use this capability from
another process.
However, if your environment is sufficiently controlled by an LSM (Linux
Security Module) such as \fIAppArmor\fR, \fISELinux\fR, \fISmack\fR or
\fITOMOYO\fR, and no other processes can interact in such a way in virtue of
this, granting this capability to \fBpasst\fR and \fBpasta\fR only can
effectively prevent other processes from utilising it.
Note that this will not work for automatic detection and forwarding of ports
with \fBpasta\fR, because \fBpasta\fR will relinquish this capability at
runtime.
To grant this capability, you can issue, as root:
.nf
for p in $(which passt passt.avx2); do
setcap 'cap_net_bind_service=+ep' "${p}"
done
.fi
.RS
setcap 'cap_net_bind_service=+ep' $(which passt)
.RE
.SS ICMP/ICMPv6 Echo sockets

1
tap.c
View file

@ -30,6 +30,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/uio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <unistd.h>
#include <netinet/ip.h>