passt/tap.h

114 lines
3.7 KiB
C
Raw Permalink Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright (c) 2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
*/
#ifndef TAP_H
#define TAP_H
/*
* TCP frame iovec array:
* TCP_IOV_VNET vnet length
* TCP_IOV_ETH ethernet header
* TCP_IOV_IP IP (v4/v6) header
* TCP_IOV_PAYLOAD IP payload (TCP header + data)
* TCP_IOV_NUM is the number of entries in the iovec array
*/
#define TCP_IOV_VNET 0
#define TCP_IOV_ETH 1
#define TCP_IOV_IP 2
#define TCP_IOV_PAYLOAD 3
#define TCP_IOV_NUM 4
/**
* struct tap_hdr - L2 and tap specific headers
* @vnet_len: Frame length (for qemu socket transport)
* @eh: Ethernet header
*/
struct tap_hdr {
uint32_t vnet_len;
struct ethhdr eh;
} __attribute__((packed));
#define TAP_HDR_INIT(proto) { .eh.h_proto = htons_constant(proto) }
static inline size_t tap_hdr_len_(const struct ctx *c)
{
if (c->mode == MODE_PASST)
return sizeof(struct tap_hdr);
else
return sizeof(struct ethhdr);
}
/**
* tap_iov_base() - Find start of tap frame
* @c: Execution context
* @taph: Pointer to L2 header buffer
*
* Returns: pointer to the start of tap frame - suitable for an
* iov_base to be passed to tap_send_frames())
*/
static inline void *tap_iov_base(const struct ctx *c, struct tap_hdr *taph)
{
return (char *)(taph + 1) - tap_hdr_len_(c);
}
/**
* tap_iov_len() - Finalize tap frame and return total length
* @c: Execution context
* @taph: Tap header to finalize
* @plen: L2 payload length (excludes L2 and tap specific headers)
*
* Returns: length of the tap frame including L2 and tap specific
* headers - suitable for an iov_len to be passed to
* tap_send_frames()
*/
static inline size_t tap_iov_len(const struct ctx *c, struct tap_hdr *taph,
size_t plen)
{
if (c->mode == MODE_PASST)
taph->vnet_len = htonl(plen + sizeof(taph->eh));
return plen + tap_hdr_len_(c);
}
struct in_addr tap_ip4_daddr(const struct ctx *c);
void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
struct in_addr dst, in_port_t dport,
const void *in, size_t len);
void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
const void *in, size_t len);
const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
const struct in6_addr *src);
void tap_udp6_send(const struct ctx *c,
const struct in6_addr *src, in_port_t sport,
const struct in6_addr *dst, in_port_t dport,
uint32_t flow, const void *in, size_t len);
void tap_icmp6_send(const struct ctx *c,
const struct in6_addr *src, const struct in6_addr *dst,
const void *in, size_t len);
int tap_send(const struct ctx *c, const void *data, size_t len);
size_t tap_send_frames(const struct ctx *c, const struct iovec *iov, size_t n);
size_t tap_send_iov(const struct ctx *c, struct iovec iov[][TCP_IOV_NUM],
size_t n);
void eth_update_mac(struct ethhdr *eh,
const unsigned char *eth_d, const unsigned char *eth_s);
void tap_listen_handler(struct ctx *c, uint32_t events);
void tap_handler_pasta(struct ctx *c, uint32_t events,
const struct timespec *now);
void tap_handler_passt(struct ctx *c, uint32_t events,
const struct timespec *now);
void tap_sock_reset(struct ctx *c);
void tap_sock_update_buf(void *base, size_t size);
passt: Add PASTA mode, major rework PASTA (Pack A Subtle Tap Abstraction) provides quasi-native host connectivity to an otherwise disconnected, unprivileged network and user namespace, similarly to slirp4netns. Given that the implementation is largely overlapping with PASST, no separate binary is built: 'pasta' (and 'passt4netns' for clarity) both link to 'passt', and the mode of operation is selected depending on how the binary is invoked. Usage example: $ unshare -rUn # echo $$ 1871759 $ ./pasta 1871759 # From another terminal # udhcpc -i pasta0 2>/dev/null # ping -c1 pasta.pizza PING pasta.pizza (64.190.62.111) 56(84) bytes of data. 64 bytes from 64.190.62.111 (64.190.62.111): icmp_seq=1 ttl=255 time=34.6 ms --- pasta.pizza ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 34.575/34.575/34.575/0.000 ms # ping -c1 spaghetti.pizza PING spaghetti.pizza(2606:4700:3034::6815:147a (2606:4700:3034::6815:147a)) 56 data bytes 64 bytes from 2606:4700:3034::6815:147a (2606:4700:3034::6815:147a): icmp_seq=1 ttl=255 time=29.0 ms --- spaghetti.pizza ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 28.967/28.967/28.967/0.000 ms This entails a major rework, especially with regard to the storage of tracked connections and to the semantics of epoll(7) references. Indexing TCP and UDP bindings merely by socket proved to be inflexible and unsuitable to handle different connection flows: pasta also provides Layer-2 to Layer-2 socket mapping between init and a separate namespace for local connections, using a pair of splice() system calls for TCP, and a recvmmsg()/sendmmsg() pair for UDP local bindings. For instance, building on the previous example: # ip link set dev lo up # iperf3 -s $ iperf3 -c ::1 -Z -w 32M -l 1024k -P2 | tail -n4 [SUM] 0.00-10.00 sec 52.3 GBytes 44.9 Gbits/sec 283 sender [SUM] 0.00-10.43 sec 52.3 GBytes 43.1 Gbits/sec receiver iperf Done. epoll(7) references now include a generic part in order to demultiplex data to the relevant protocol handler, using 24 bits for the socket number, and an opaque portion reserved for usage by the single protocol handlers, in order to track sockets back to corresponding connections and bindings. A number of fixes pertaining to TCP state machine and congestion window handling are also included here. Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2021-07-17 08:34:53 +02:00
void tap_sock_init(struct ctx *c);
void pool_flush_all(void);
void tap_handler_all(struct ctx *c, const struct timespec *now);
void packet_add_do(struct pool *p, size_t len, const char *start,
const char *func, int line);
void packet_add_all_do(struct ctx *c, ssize_t len, char *p,
const char *func, int line);
#define packet_add_all(p, len, start) \
packet_add_all_do(p, len, start, __func__, __LINE__)
#endif /* TAP_H */