33482d5bf2
PASTA (Pack A Subtle Tap Abstraction) provides quasi-native host connectivity to an otherwise disconnected, unprivileged network and user namespace, similarly to slirp4netns. Given that the implementation is largely overlapping with PASST, no separate binary is built: 'pasta' (and 'passt4netns' for clarity) both link to 'passt', and the mode of operation is selected depending on how the binary is invoked. Usage example: $ unshare -rUn # echo $$ 1871759 $ ./pasta 1871759 # From another terminal # udhcpc -i pasta0 2>/dev/null # ping -c1 pasta.pizza PING pasta.pizza (64.190.62.111) 56(84) bytes of data. 64 bytes from 64.190.62.111 (64.190.62.111): icmp_seq=1 ttl=255 time=34.6 ms --- pasta.pizza ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 34.575/34.575/34.575/0.000 ms # ping -c1 spaghetti.pizza PING spaghetti.pizza(2606:4700:3034::6815:147a (2606:4700:3034::6815:147a)) 56 data bytes 64 bytes from 2606:4700:3034::6815:147a (2606:4700:3034::6815:147a): icmp_seq=1 ttl=255 time=29.0 ms --- spaghetti.pizza ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 28.967/28.967/28.967/0.000 ms This entails a major rework, especially with regard to the storage of tracked connections and to the semantics of epoll(7) references. Indexing TCP and UDP bindings merely by socket proved to be inflexible and unsuitable to handle different connection flows: pasta also provides Layer-2 to Layer-2 socket mapping between init and a separate namespace for local connections, using a pair of splice() system calls for TCP, and a recvmmsg()/sendmmsg() pair for UDP local bindings. For instance, building on the previous example: # ip link set dev lo up # iperf3 -s $ iperf3 -c ::1 -Z -w 32M -l 1024k -P2 | tail -n4 [SUM] 0.00-10.00 sec 52.3 GBytes 44.9 Gbits/sec 283 sender [SUM] 0.00-10.43 sec 52.3 GBytes 43.1 Gbits/sec receiver iperf Done. epoll(7) references now include a generic part in order to demultiplex data to the relevant protocol handler, using 24 bits for the socket number, and an opaque portion reserved for usage by the single protocol handlers, in order to track sockets back to corresponding connections and bindings. A number of fixes pertaining to TCP state machine and congestion window handling are also included here. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
186 lines
4.3 KiB
C
186 lines
4.3 KiB
C
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
/* PASST - Plug A Simple Socket Transport
|
|
* for qemu/UNIX domain socket mode
|
|
*
|
|
* PASTA - Pack A Subtle Tap Abstraction
|
|
* for network namespace/tap device mode
|
|
*
|
|
* ndp.c - NDP support for PASST
|
|
*
|
|
* Copyright (c) 2020-2021 Red Hat GmbH
|
|
* Author: Stefano Brivio <sbrivio@redhat.com>
|
|
*
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <arpa/inet.h>
|
|
#include <linux/if_ether.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/icmpv6.h>
|
|
#include <linux/udp.h>
|
|
#include <net/if.h>
|
|
#include <net/if_arp.h>
|
|
|
|
#include "util.h"
|
|
#include "passt.h"
|
|
#include "tap.h"
|
|
|
|
#define RS 133
|
|
#define RA 134
|
|
#define NS 135
|
|
#define NA 136
|
|
|
|
/**
|
|
* ndp() - Check for NDP solicitations, reply as needed
|
|
* @c: Execution context
|
|
* @len: Total L2 packet length
|
|
* @eh: Packet buffer, Ethernet header
|
|
*
|
|
* Return: 0 if not handled here, 1 if handled, -1 on failure
|
|
*/
|
|
int ndp(struct ctx *c, struct ethhdr *eh, size_t len)
|
|
{
|
|
struct ethhdr *ehr;
|
|
struct ipv6hdr *ip6h = (struct ipv6hdr *)(eh + 1), *ip6hr;
|
|
struct icmp6hdr *ih, *ihr;
|
|
char buf[BUFSIZ] = { 0 };
|
|
uint8_t proto, *p;
|
|
|
|
if (len < sizeof(*ehr) + sizeof(*ip6h) + sizeof(ih))
|
|
return 0;
|
|
|
|
ih = (struct icmp6hdr *)ipv6_l4hdr(ip6h, &proto);
|
|
if (!ih)
|
|
return -1;
|
|
|
|
if (proto != IPPROTO_ICMPV6 ||
|
|
ih->icmp6_type < RS || ih->icmp6_type > NA)
|
|
return 0;
|
|
|
|
ehr = (struct ethhdr *)buf;
|
|
ip6hr = (struct ipv6hdr *)(ehr + 1);
|
|
ihr = (struct icmp6hdr *)(ip6hr + 1);
|
|
|
|
if (ih->icmp6_type == NS) {
|
|
if (len < sizeof(*ehr) + sizeof(*ip6h) + sizeof(ih) +
|
|
sizeof(struct in6_addr))
|
|
return -1;
|
|
|
|
info("NDP: received NS, sending NA");
|
|
ihr->icmp6_type = NA;
|
|
ihr->icmp6_code = 0;
|
|
ihr->icmp6_router = 1;
|
|
ihr->icmp6_solicited = 1;
|
|
ihr->icmp6_override = 1;
|
|
|
|
p = (unsigned char *)(ihr + 1);
|
|
memcpy(p, ih + 1, sizeof(struct in6_addr)); /* target address */
|
|
p += 16;
|
|
*p++ = 2; /* target ll */
|
|
*p++ = 1; /* length */
|
|
memcpy(p, c->mac, ETH_ALEN);
|
|
p += 6;
|
|
} else if (ih->icmp6_type == RS) {
|
|
size_t len = 0;
|
|
int i, n;
|
|
|
|
info("NDP: received RS, sending RA");
|
|
ihr->icmp6_type = RA;
|
|
ihr->icmp6_code = 0;
|
|
ihr->icmp6_rt_lifetime = htons(3600);
|
|
ihr->icmp6_addrconf_managed = 1;
|
|
|
|
p = (unsigned char *)(ihr + 1);
|
|
p += 8; /* reachable, retrans time */
|
|
*p++ = 3; /* prefix */
|
|
*p++ = 4; /* length */
|
|
*p++ = 64; /* prefix length */
|
|
*p++ = 0xc0; /* prefix flags: L, A */
|
|
*(uint32_t *)p = htonl(3600); /* lifetime */
|
|
p += 4;
|
|
*(uint32_t *)p = htonl(3600); /* preferred lifetime */
|
|
p += 8;
|
|
memcpy(p, &c->addr6, 8); /* prefix */
|
|
p += 16;
|
|
|
|
for (n = 0; !IN6_IS_ADDR_UNSPECIFIED(&c->dns6[n]); n++);
|
|
if (n) {
|
|
*p++ = 25; /* RDNSS */
|
|
*p++ = 1 + 2 * n; /* length */
|
|
p += 2; /* reserved */
|
|
*(uint32_t *)p = htonl(60); /* lifetime */
|
|
p += 4;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
memcpy(p, &c->dns6[i], 16); /* address */
|
|
p += 16;
|
|
}
|
|
}
|
|
|
|
for (n = 0; *c->dns_search[n].n; n++)
|
|
len += strlen(c->dns_search[n].n) + 2;
|
|
if (len) {
|
|
*p++ = 31; /* DNSSL */
|
|
*p++ = 2 + (len + 8 - 1) / 8; /* length */
|
|
p += 2; /* reserved */
|
|
*(uint32_t *)p = htonl(60); /* lifetime */
|
|
p += 4;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
char *dot;
|
|
|
|
*(p++) = '.';
|
|
|
|
strncpy((char *)p, c->dns_search[i].n,
|
|
sizeof(buf) -
|
|
((intptr_t)p - (intptr_t)buf));
|
|
for (dot = (char *)p - 1; *dot; dot++) {
|
|
if (*dot == '.')
|
|
*dot = strcspn(dot + 1, ".");
|
|
}
|
|
p += strlen(c->dns_search[i].n);
|
|
*(p++) = 0;
|
|
}
|
|
|
|
memset(p, 0, len % 8); /* padding */
|
|
p += len % 8;
|
|
}
|
|
|
|
*p++ = 1; /* source ll */
|
|
*p++ = 1; /* length */
|
|
memcpy(p, c->mac, ETH_ALEN);
|
|
p += 6;
|
|
} else {
|
|
return 1;
|
|
}
|
|
|
|
len = (uintptr_t)p - (uintptr_t)ihr - sizeof(*ihr);
|
|
|
|
ip6hr->daddr = ip6h->saddr;
|
|
ip6hr->saddr = c->gw6;
|
|
ip6hr->payload_len = htons(sizeof(*ihr) + len);
|
|
ip6hr->hop_limit = IPPROTO_ICMPV6;
|
|
ihr->icmp6_cksum = 0;
|
|
ihr->icmp6_cksum = csum_ip4(ip6hr, sizeof(*ip6hr) +
|
|
sizeof(*ihr) + len);
|
|
|
|
ip6hr->version = 6;
|
|
ip6hr->nexthdr = IPPROTO_ICMPV6;
|
|
ip6hr->hop_limit = 255;
|
|
|
|
len += sizeof(*ehr) + sizeof(*ip6hr) + sizeof(*ihr);
|
|
memcpy(ehr->h_dest, eh->h_source, ETH_ALEN);
|
|
memcpy(ehr->h_source, c->mac, ETH_ALEN);
|
|
ehr->h_proto = htons(ETH_P_IPV6);
|
|
|
|
if (tap_send(c, ehr, len, 0) < 0)
|
|
perror("NDP: send");
|
|
|
|
return 1;
|
|
}
|