fwd: Direct inbound spliced forwards to the guest's external address
In pasta mode, where addressing permits we "splice" connections, forwarding directly from host socket to guest/container socket without any L2 or L3 processing. This gives us a very large performance improvement when it's possible. Since the traffic is from a local socket within the guest, it will go over the guest's 'lo' interface, and accordingly we set the guest side address to be the loopback address. However this has a surprising side effect: sometimes guests will run services that are only supposed to be used within the guest and are therefore bound to only 127.0.0.1 and/or ::1. pasta's forwarding exposes those services to the host, which isn't generally what we want. Correct this by instead forwarding inbound "splice" flows to the guest's external address. Link: https://github.com/containers/podman/issues/24045 Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
This commit is contained in:
parent
58e6d68599
commit
b4dace8f46
4 changed files with 53 additions and 12 deletions
9
conf.c
9
conf.c
|
@ -912,6 +912,9 @@ pasta_opts:
|
||||||
" -U, --udp-ns SPEC UDP port forwarding to init namespace\n"
|
" -U, --udp-ns SPEC UDP port forwarding to init namespace\n"
|
||||||
" SPEC is as described above\n"
|
" SPEC is as described above\n"
|
||||||
" default: auto\n"
|
" default: auto\n"
|
||||||
|
" --host-lo-to-ns-lo DEPRECATED:\n"
|
||||||
|
" Translate host-loopback forwards to\n"
|
||||||
|
" namespace loopback\n"
|
||||||
" --userns NSPATH Target user namespace to join\n"
|
" --userns NSPATH Target user namespace to join\n"
|
||||||
" --netns PATH|NAME Target network namespace to join\n"
|
" --netns PATH|NAME Target network namespace to join\n"
|
||||||
" --netns-only Don't join existing user namespace\n"
|
" --netns-only Don't join existing user namespace\n"
|
||||||
|
@ -1289,6 +1292,7 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||||
{"netns-only", no_argument, NULL, 20 },
|
{"netns-only", no_argument, NULL, 20 },
|
||||||
{"map-host-loopback", required_argument, NULL, 21 },
|
{"map-host-loopback", required_argument, NULL, 21 },
|
||||||
{"map-guest-addr", required_argument, NULL, 22 },
|
{"map-guest-addr", required_argument, NULL, 22 },
|
||||||
|
{"host-lo-to-ns-lo", no_argument, NULL, 23 },
|
||||||
{"dns-host", required_argument, NULL, 24 },
|
{"dns-host", required_argument, NULL, 24 },
|
||||||
{ 0 },
|
{ 0 },
|
||||||
};
|
};
|
||||||
|
@ -1467,6 +1471,11 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||||
conf_nat(optarg, &c->ip4.map_guest_addr,
|
conf_nat(optarg, &c->ip4.map_guest_addr,
|
||||||
&c->ip6.map_guest_addr, NULL);
|
&c->ip6.map_guest_addr, NULL);
|
||||||
break;
|
break;
|
||||||
|
case 23:
|
||||||
|
if (c->mode != MODE_PASTA)
|
||||||
|
die("--host-lo-to-ns-lo is for pasta mode only");
|
||||||
|
c->host_lo_to_ns_lo = 1;
|
||||||
|
break;
|
||||||
case 24:
|
case 24:
|
||||||
if (inet_pton(AF_INET6, optarg, &c->ip6.dns_host) &&
|
if (inet_pton(AF_INET6, optarg, &c->ip6.dns_host) &&
|
||||||
!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_host))
|
!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_host))
|
||||||
|
|
31
fwd.c
31
fwd.c
|
@ -447,20 +447,35 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
|
||||||
(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
|
(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
|
||||||
/* spliceable */
|
/* spliceable */
|
||||||
|
|
||||||
/* Preserve the specific loopback adddress used, but let the
|
/* The traffic will go over the guest's 'lo' interface, but by
|
||||||
* kernel pick a source port on the target side
|
* default use its external address, so we don't inadvertently
|
||||||
|
* expose services that listen only on the guest's loopback
|
||||||
|
* address. That can be overridden by --host-lo-to-ns-lo which
|
||||||
|
* will instead forward to the loopback address in the guest.
|
||||||
|
*
|
||||||
|
* In either case, let the kernel pick the source address to
|
||||||
|
* match.
|
||||||
*/
|
*/
|
||||||
tgt->oaddr = ini->eaddr;
|
if (inany_v4(&ini->eaddr)) {
|
||||||
|
if (c->host_lo_to_ns_lo)
|
||||||
|
tgt->eaddr = inany_loopback4;
|
||||||
|
else
|
||||||
|
tgt->eaddr = inany_from_v4(c->ip4.addr_seen);
|
||||||
|
tgt->oaddr = inany_any4;
|
||||||
|
} else {
|
||||||
|
if (c->host_lo_to_ns_lo)
|
||||||
|
tgt->eaddr = inany_loopback6;
|
||||||
|
else
|
||||||
|
tgt->eaddr.a6 = c->ip6.addr_seen;
|
||||||
|
tgt->oaddr = inany_any6;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Let the kernel pick source port */
|
||||||
tgt->oport = 0;
|
tgt->oport = 0;
|
||||||
if (proto == IPPROTO_UDP)
|
if (proto == IPPROTO_UDP)
|
||||||
/* But for UDP preserve the source port */
|
/* But for UDP preserve the source port */
|
||||||
tgt->oport = ini->eport;
|
tgt->oport = ini->eport;
|
||||||
|
|
||||||
if (inany_v4(&ini->eaddr))
|
|
||||||
tgt->eaddr = inany_loopback4;
|
|
||||||
else
|
|
||||||
tgt->eaddr = inany_loopback6;
|
|
||||||
|
|
||||||
return PIF_SPLICE;
|
return PIF_SPLICE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
23
passt.1
23
passt.1
|
@ -605,6 +605,13 @@ Configure UDP port forwarding from target namespace to init namespace.
|
||||||
|
|
||||||
Default is \fBauto\fR.
|
Default is \fBauto\fR.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.BR \-\-host-lo-to-ns-lo " " (DEPRECATED)
|
||||||
|
If specified, connections forwarded with \fB\-t\fR and \fB\-u\fR from
|
||||||
|
the host's loopback address will appear on the loopback address in the
|
||||||
|
guest as well. Without this option such forwarded packets will appear
|
||||||
|
to come from the guest's public address.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.BR \-\-userns " " \fIspec
|
.BR \-\-userns " " \fIspec
|
||||||
Target user namespace to join, as a path. If PID is given, without this option,
|
Target user namespace to join, as a path. If PID is given, without this option,
|
||||||
|
@ -893,8 +900,9 @@ interfaces, and it would also be impossible for guest or target
|
||||||
namespace to route answers back.
|
namespace to route answers back.
|
||||||
|
|
||||||
For convenience, the source address on these packets is translated to
|
For convenience, the source address on these packets is translated to
|
||||||
the address specified by the \fB\-\-map-host-loopback\fR option. If
|
the address specified by the \fB\-\-map-host-loopback\fR option (with
|
||||||
not specified this defaults, somewhat arbitrarily, to the address of
|
some exceptions in pasta mode, see next section below). If not
|
||||||
|
specified this defaults, somewhat arbitrarily, to the address of
|
||||||
default IPv4 or IPv6 gateway (if any) -- this is known to be an
|
default IPv4 or IPv6 gateway (if any) -- this is known to be an
|
||||||
existing, valid address on the same subnet. If \fB\-\-no-map-gw\fR or
|
existing, valid address on the same subnet. If \fB\-\-no-map-gw\fR or
|
||||||
\fB\-\-map-host-loopback none\fR are specified this translation is
|
\fB\-\-map-host-loopback none\fR are specified this translation is
|
||||||
|
@ -931,8 +939,15 @@ and the new socket using the \fBsplice\fR(2) system call, and for UDP, a pair
|
||||||
of \fBrecvmmsg\fR(2) and \fBsendmmsg\fR(2) system calls deals with packet
|
of \fBrecvmmsg\fR(2) and \fBsendmmsg\fR(2) system calls deals with packet
|
||||||
transfers.
|
transfers.
|
||||||
|
|
||||||
This bypass only applies to local connections and traffic, because it's not
|
Because it's not possible to bind sockets to foreign addresses, this
|
||||||
possible to bind sockets to foreign addresses.
|
bypass only applies to local connections and traffic. It also means
|
||||||
|
that the address translation differs slightly from passt mode.
|
||||||
|
Connections from loopback to loopback on the host will appear to come
|
||||||
|
from the target namespace's public address within the guest, unless
|
||||||
|
\fB\-\-host-lo-to-ns-lo\fR is specified, in which case they will
|
||||||
|
appear to come from loopback in the namespace as well. The latter
|
||||||
|
behaviour used to be the default, but is usually undesirable, since it
|
||||||
|
can unintentionally expose namespace local services to the host.
|
||||||
|
|
||||||
.SS Binding to low numbered ports (well-known or system ports, up to 1023)
|
.SS Binding to low numbered ports (well-known or system ports, up to 1023)
|
||||||
|
|
||||||
|
|
2
passt.h
2
passt.h
|
@ -225,6 +225,7 @@ struct ip6_ctx {
|
||||||
* @no_dhcpv6: Disable DHCPv6 server
|
* @no_dhcpv6: Disable DHCPv6 server
|
||||||
* @no_ndp: Disable NDP handler altogether
|
* @no_ndp: Disable NDP handler altogether
|
||||||
* @no_ra: Disable router advertisements
|
* @no_ra: Disable router advertisements
|
||||||
|
* @host_lo_to_ns_lo: Map host loopback addresses to ns loopback addresses
|
||||||
* @freebind: Allow binding of non-local addresses for forwarding
|
* @freebind: Allow binding of non-local addresses for forwarding
|
||||||
* @low_wmem: Low probed net.core.wmem_max
|
* @low_wmem: Low probed net.core.wmem_max
|
||||||
* @low_rmem: Low probed net.core.rmem_max
|
* @low_rmem: Low probed net.core.rmem_max
|
||||||
|
@ -285,6 +286,7 @@ struct ctx {
|
||||||
int no_dhcpv6;
|
int no_dhcpv6;
|
||||||
int no_ndp;
|
int no_ndp;
|
||||||
int no_ra;
|
int no_ra;
|
||||||
|
int host_lo_to_ns_lo;
|
||||||
int freebind;
|
int freebind;
|
||||||
|
|
||||||
int low_wmem;
|
int low_wmem;
|
||||||
|
|
Loading…
Reference in a new issue