conf, tcp, udp: Allow specification of interface to bind to

Since kernel version 5.7, commit c427bfec18f2 ("net: core: enable
SO_BINDTODEVICE for non-root users"), we can bind sockets to
interfaces, if they haven't been bound yet (as in bind()).

Introduce an optional interface specification for forwarded ports,
prefixed by %, that can be passed together with an address.

Reported use case: running local services that use ports we want
to have externally forwarded:
  https://github.com/containers/podman/issues/14425

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
This commit is contained in:
Stefano Brivio 2022-10-07 04:53:40 +02:00
parent a62ed181db
commit c1eff9a3c6
9 changed files with 88 additions and 47 deletions

31
conf.c
View file

@ -180,8 +180,8 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
struct port_fwd *fwd)
{
char addr_buf[sizeof(struct in6_addr)] = { 0 }, *addr = addr_buf;
char buf[BUFSIZ], *spec, *ifname = NULL, *p;
uint8_t exclude[PORT_BITMAP_SIZE] = { 0 };
char buf[BUFSIZ], *spec, *p;
sa_family_t af = AF_UNSPEC;
bool exclude_only = true;
@ -209,9 +209,9 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
for (i = 0; i < PORT_EPHEMERAL_MIN; i++) {
if (optname == 't')
tcp_sock_init(c, 0, AF_UNSPEC, NULL, i);
tcp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i);
else if (optname == 'u')
udp_sock_init(c, 0, AF_UNSPEC, NULL, i);
udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i);
}
return 0;
@ -231,6 +231,14 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
if (optname != 't' && optname != 'u')
goto bad;
if ((ifname = strchr(buf, '%'))) {
if (spec - ifname >= IFNAMSIZ - 1)
goto bad;
*ifname = 0;
ifname++;
}
if (inet_pton(AF_INET, buf, addr))
af = AF_INET;
else if (inet_pton(AF_INET6, buf, addr))
@ -278,9 +286,9 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
bitmap_set(fwd->map, i);
if (optname == 't')
tcp_sock_init(c, 0, af, addr, i);
tcp_sock_init(c, 0, af, addr, ifname, i);
else if (optname == 'u')
udp_sock_init(c, 0, af, addr, i);
udp_sock_init(c, 0, af, addr, ifname, i);
}
return 0;
@ -324,9 +332,9 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
fwd->delta[i] = mapped_range.first - orig_range.first;
if (optname == 't')
tcp_sock_init(c, 0, af, addr, i);
tcp_sock_init(c, 0, af, addr, ifname, i);
else if (optname == 'u')
udp_sock_init(c, 0, af, addr, i);
udp_sock_init(c, 0, af, addr, ifname, i);
}
} while ((p = next_chunk(p, ',')));
@ -720,8 +728,9 @@ static void usage(const char *name)
info( " 'all': forward all unbound, non-ephemeral ports");
info( " a comma-separated list, optionally ranged with '-'");
info( " and optional target ports after ':', with optional");
info( " address specification suffixed by '/'. Ranges can be");
info( " reduced by excluding ports or ranges prefixed by '~'");
info( " address specification suffixed by '/' and optional");
info( " interface prefixed by '%%'. Ranges can be reduced by");
info( " excluding ports or ranges prefixed by '~'");
info( " Examples:");
info( " -t 22 Forward local port 22 to 22 on guest");
info( " -t 22:23 Forward local port 22 to 23 on guest");
@ -740,6 +749,7 @@ static void usage(const char *name)
exit(EXIT_FAILURE);
pasta_opts:
info( " -t, --tcp-ports SPEC TCP port forwarding to namespace");
info( " can be specified multiple times");
info( " SPEC can be:");
@ -747,7 +757,8 @@ pasta_opts:
info( " 'auto': forward all ports currently bound in namespace");
info( " a comma-separated list, optionally ranged with '-'");
info( " and optional target ports after ':', with optional");
info( " address specification suffixed by '/'. Examples:");
info( " address specification suffixed by '/' and optional");
info( " interface prefixed by '%%'. Examples:");
info( " -t 22 Forward local port 22 to port 22 in netns");
info( " -t 22:23 Forward local port 22 to port 23");
info( " -t 22,25 Forward ports 22, 25 to ports 22, 25");

4
icmp.c
View file

@ -169,7 +169,7 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr,
iref.icmp.id = id = ntohs(ih->un.echo.id);
if ((s = icmp_id_map[V4][id].sock) <= 0) {
s = sock_l4(c, AF_INET, IPPROTO_ICMP, NULL, id,
s = sock_l4(c, AF_INET, IPPROTO_ICMP, NULL, NULL, id,
iref.u32);
if (s < 0)
goto fail_sock;
@ -207,7 +207,7 @@ int icmp_tap_handler(const struct ctx *c, int af, const void *addr,
iref.icmp.id = id = ntohs(ih->icmp6_identifier);
if ((s = icmp_id_map[V6][id].sock) <= 0) {
s = sock_l4(c, AF_INET6, IPPROTO_ICMPV6, NULL, id,
s = sock_l4(c, AF_INET6, IPPROTO_ICMPV6, NULL, NULL, id,
iref.u32);
if (s < 0)
goto fail_sock;

12
passt.1
View file

@ -325,7 +325,8 @@ For low (< 1024) ports, see \fBNOTES\fR.
.BR ports
A comma-separated list of ports, optionally ranged with \fI-\fR, and,
optionally, with target ports after \fI:\fR, if they differ. Specific addresses
can be bound as well, separated by \fI/\fR. Within given ranges, selected ports
can be bound as well, separated by \fI/\fR, and also, since Linux 5.7, limited
to specific interfaces, prefixed by \fI%\fR. Within given ranges, selected ports
and ranges can be excluded by an additional specification prefixed by \fI~\fR.
Specifying excluded ranges only implies that all other ports are forwarded.
Examples:
@ -349,6 +350,9 @@ Forward local ports 22 to 80 to corresponding ports on the guest plus 10
-t 192.0.2.1/22
Forward local port 22, bound to 192.0.2.1, to port 22 on the guest
.TP
-t 192.0.2.1%eth0/22
Forward local port 22, bound to 192.0.2.1 and interface eth0, to port 22
.TP
-t 2000-5000,~3000-3010
Forward local ports 2000 to 5000, but not 3000 to 3010
.TP
@ -399,7 +403,8 @@ periodically derived (every second) from listening sockets reported by
.BR ports
A comma-separated list of ports, optionally ranged with \fI-\fR, and,
optionally, with target ports after \fI:\fR, if they differ. Specific addresses
can be bound as well, separated by \fI/\fR. Within given ranges, selected ports
can be bound as well, separated by \fI/\fR, and also, since Linux 5.7, limited
to specific interfaces, prefixed by \fI%\fR. Within given ranges, selected ports
and ranges can be excluded by an additional specification prefixed by \fI~\fR.
Specifying excluded ranges only implies that all other ports are forwarded.
Examples:
@ -424,6 +429,9 @@ namespace
-t 192.0.2.1/22
Forward local port 22, bound to 192.0.2.1, to port 22 in the target namespace
.TP
-t 192.0.2.1%eth0/22
Forward local port 22, bound to 192.0.2.1 and interface eth0, to port 22
.TP
-t 2000-5000,~3000-3010
Forward local ports 2000 to 5000, but not 3000 to 3010
.TP

27
tcp.c
View file

@ -3078,10 +3078,11 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
* @ns: In pasta mode, if set, bind with loopback address in namespace
* @af: Address family to select a specific IP version, or AF_UNSPEC
* @addr: Pointer to address for binding, NULL if not configured
* @ifname: Name of interface to bind to, NULL if not configured
* @port: Port, host order
*/
void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, in_port_t port)
const void *addr, const char *ifname, in_port_t port)
{
union tcp_epoll_ref tref = { .tcp.listen = 1 };
const void *bind_addr;
@ -3103,8 +3104,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
tref.tcp.splice = 0;
if (!ns) {
s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, port,
tref.u32);
s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@ -3118,8 +3119,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
tref.tcp.splice = 1;
s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, port,
tref.u32);
s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@ -3144,8 +3145,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
tref.tcp.splice = 0;
if (!ns) {
s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, port,
tref.u32);
s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@ -3159,8 +3160,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
bind_addr = &in6addr_loopback;
tref.tcp.splice = 1;
s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, port,
tref.u32);
s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
port, tref.u32);
if (s >= 0)
tcp_sock_set_bufsize(c, s);
else
@ -3193,7 +3194,7 @@ static int tcp_sock_init_ns(void *arg)
if (!bitmap_isset(c->tcp.fwd_out.map, port))
continue;
tcp_sock_init(c, 1, AF_UNSPEC, NULL, port);
tcp_sock_init(c, 1, AF_UNSPEC, NULL, NULL, port);
}
return 0;
@ -3410,7 +3411,8 @@ static int tcp_port_rebind(void *arg)
if ((a->c->ifi4 && tcp_sock_ns[port][V4] == -1) ||
(a->c->ifi6 && tcp_sock_ns[port][V6] == -1))
tcp_sock_init(a->c, 1, AF_UNSPEC, NULL, port);
tcp_sock_init(a->c, 1, AF_UNSPEC, NULL, NULL,
port);
}
} else {
for (port = 0; port < NUM_PORTS; port++) {
@ -3443,7 +3445,8 @@ static int tcp_port_rebind(void *arg)
if ((a->c->ifi4 && tcp_sock_init_ext[port][V4] == -1) ||
(a->c->ifi6 && tcp_sock_init_ext[port][V6] == -1))
tcp_sock_init(a->c, 0, AF_UNSPEC, NULL, port);
tcp_sock_init(a->c, 0, AF_UNSPEC, NULL, NULL,
port);
}
}

2
tcp.h
View file

@ -21,7 +21,7 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
int tcp_tap_handler(struct ctx *c, int af, const void *addr,
const struct pool *p, const struct timespec *now);
void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, in_port_t port);
const void *addr, const char *ifname, in_port_t port);
int tcp_init(struct ctx *c);
void tcp_timer(struct ctx *c, const struct timespec *ts);
void tcp_defer_handler(struct ctx *c);

35
udp.c
View file

@ -1005,7 +1005,7 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr,
union udp_epoll_ref uref = { .udp.bound = 1,
.udp.port = src };
s = sock_l4(c, AF_INET, IPPROTO_UDP, NULL, src,
s = sock_l4(c, AF_INET, IPPROTO_UDP, NULL, NULL, src,
uref.u32);
if (s < 0)
return p->count;
@ -1057,8 +1057,8 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr,
.udp.v6 = 1,
.udp.port = src };
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, src,
uref.u32);
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, NULL,
src, uref.u32);
if (s < 0)
return p->count;
@ -1111,10 +1111,11 @@ int udp_tap_handler(struct ctx *c, int af, const void *addr,
* @ns: In pasta mode, if set, bind with loopback address in namespace
* @af: Address family to select a specific IP version, or AF_UNSPEC
* @addr: Pointer to address for binding, NULL if not configured
* @ifname: Name of interface to bind to, NULL if not configured
* @port: Port, host order
*/
void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, in_port_t port)
const void *addr, const char *ifname, in_port_t port)
{
union udp_epoll_ref uref = { .udp.bound = 1 };
const void *bind_addr;
@ -1138,8 +1139,8 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if (!ns) {
uref.udp.splice = 0;
s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, port,
uref.u32);
s = sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32);
udp_tap_map[V4][uref.udp.port].sock = s;
}
@ -1148,16 +1149,16 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
uref.udp.splice = UDP_TO_NS;
sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, port,
uref.u32);
sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32);
}
if (ns) {
bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
uref.udp.splice = UDP_TO_INIT;
sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, port,
uref.u32);
sock_l4(c, AF_INET, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32);
}
}
@ -1171,8 +1172,8 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
if (!ns) {
uref.udp.splice = 0;
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, port,
uref.u32);
s = sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32);
udp_tap_map[V6][uref.udp.port].sock = s;
}
@ -1181,16 +1182,16 @@ void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
bind_addr = &in6addr_loopback;
uref.udp.splice = UDP_TO_NS;
sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, port,
uref.u32);
sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32);
}
if (ns) {
bind_addr = &in6addr_loopback;
uref.udp.splice = UDP_TO_INIT;
sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, port,
uref.u32);
sock_l4(c, AF_INET6, IPPROTO_UDP, bind_addr, ifname,
port, uref.u32);
}
}
}
@ -1213,7 +1214,7 @@ int udp_sock_init_ns(void *arg)
if (!bitmap_isset(c->udp.fwd_out.f.map, dst))
continue;
udp_sock_init(c, 1, AF_UNSPEC, NULL, dst);
udp_sock_init(c, 1, AF_UNSPEC, NULL, NULL, dst);
}
return 0;

2
udp.h
View file

@ -13,7 +13,7 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
int udp_tap_handler(struct ctx *c, int af, const void *addr,
const struct pool *p, const struct timespec *now);
void udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
const void *addr, in_port_t port);
const void *addr, const char *ifname, in_port_t port);
int udp_init(struct ctx *c);
void udp_timer(struct ctx *c, const struct timespec *ts);
void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,

19
util.c
View file

@ -90,13 +90,15 @@ found:
* @af: Address family, AF_INET or AF_INET6
* @proto: Protocol number
* @bind_addr: Address for binding, NULL for any
* @ifname: Interface for binding, NULL for any
* @port: Port, host order
* @data: epoll reference portion for protocol handlers
*
* Return: newly created socket, -1 on error
*/
int sock_l4(const struct ctx *c, int af, uint8_t proto,
const void *bind_addr, uint16_t port, uint32_t data)
const void *bind_addr, const char *ifname, uint16_t port,
uint32_t data)
{
union epoll_ref ref = { .r.proto = proto, .r.p.data = data };
struct sockaddr_in addr4 = {
@ -163,6 +165,21 @@ int sock_l4(const struct ctx *c, int af, uint8_t proto,
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)))
debug("Failed to set SO_REUSEADDR on socket %i", fd);
if (ifname) {
/* Supported since kernel version 5.7, commit c427bfec18f2
* ("net: core: enable SO_BINDTODEVICE for non-root users"). If
* it's unsupported, don't bind the socket at all, because the
* user might rely on this to filter incoming connections.
*/
if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
ifname, strlen(ifname))) {
warn("Can't bind socket for %s port %u to %s, closing",
ip_proto_str[proto], port, ifname);
close(fd);
return -1;
}
}
if (bind(fd, sa, sl) < 0) {
/* We'll fail to bind to low ports if we don't have enough
* capabilities, and we'll fail to bind on already bound ports,

3
util.h
View file

@ -194,7 +194,8 @@ __attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); }
char *ipv6_l4hdr(const struct pool *p, int index, size_t offset, uint8_t *proto,
size_t *dlen);
int sock_l4(const struct ctx *c, int af, uint8_t proto,
const void *bind_addr, uint16_t port, uint32_t data);
const void *bind_addr, const char *ifname, uint16_t port,
uint32_t data);
void sock_probe_mem(struct ctx *c);
int timespec_diff_ms(const struct timespec *a, const struct timespec *b);
void bitmap_set(uint8_t *map, int bit);