passt/pcap.c
Stefano Brivio 33482d5bf2 passt: Add PASTA mode, major rework
PASTA (Pack A Subtle Tap Abstraction) provides quasi-native host
connectivity to an otherwise disconnected, unprivileged network
and user namespace, similarly to slirp4netns. Given that the
implementation is largely overlapping with PASST, no separate binary
is built: 'pasta' (and 'passt4netns' for clarity) both link to
'passt', and the mode of operation is selected depending on how the
binary is invoked. Usage example:

	$ unshare -rUn
	# echo $$
	1871759

	$ ./pasta 1871759	# From another terminal

	# udhcpc -i pasta0 2>/dev/null
	# ping -c1 pasta.pizza
	PING pasta.pizza (64.190.62.111) 56(84) bytes of data.
	64 bytes from 64.190.62.111 (64.190.62.111): icmp_seq=1 ttl=255 time=34.6 ms

	--- pasta.pizza ping statistics ---
	1 packets transmitted, 1 received, 0% packet loss, time 0ms
	rtt min/avg/max/mdev = 34.575/34.575/34.575/0.000 ms
	# ping -c1 spaghetti.pizza
	PING spaghetti.pizza(2606:4700:3034::6815:147a (2606:4700:3034::6815:147a)) 56 data bytes
	64 bytes from 2606:4700:3034::6815:147a (2606:4700:3034::6815:147a): icmp_seq=1 ttl=255 time=29.0 ms

	--- spaghetti.pizza ping statistics ---
	1 packets transmitted, 1 received, 0% packet loss, time 0ms
	rtt min/avg/max/mdev = 28.967/28.967/28.967/0.000 ms

This entails a major rework, especially with regard to the storage of
tracked connections and to the semantics of epoll(7) references.

Indexing TCP and UDP bindings merely by socket proved to be
inflexible and unsuitable to handle different connection flows: pasta
also provides Layer-2 to Layer-2 socket mapping between init and a
separate namespace for local connections, using a pair of splice()
system calls for TCP, and a recvmmsg()/sendmmsg() pair for UDP local
bindings. For instance, building on the previous example:

	# ip link set dev lo up
	# iperf3 -s

	$ iperf3 -c ::1 -Z -w 32M -l 1024k -P2 | tail -n4
	[SUM]   0.00-10.00  sec  52.3 GBytes  44.9 Gbits/sec  283             sender
	[SUM]   0.00-10.43  sec  52.3 GBytes  43.1 Gbits/sec                  receiver

	iperf Done.

epoll(7) references now include a generic part in order to
demultiplex data to the relevant protocol handler, using 24
bits for the socket number, and an opaque portion reserved for
usage by the single protocol handlers, in order to track sockets
back to corresponding connections and bindings.

A number of fixes pertaining to TCP state machine and congestion
window handling are also included here.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
2021-07-17 11:04:22 +02:00

138 lines
2.8 KiB
C

// SPDX-License-Identifier: AGPL-3.0-or-later
/* PASST - Plug A Simple Socket Transport
* for qemu/UNIX domain socket mode
*
* PASTA - Pack A Subtle Tap Abstraction
* for network namespace/tap device mode
*
* pcap.c - Packet capture for PASST/PASTA
*
* Copyright (c) 2021 Red Hat GmbH
* Author: Stefano Brivio <sbrivio@redhat.com>
*/
#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#include <net/ethernet.h>
#include <unistd.h>
#include <net/if.h>
#include "util.h"
#include "passt.h"
#ifdef DEBUG
#define PCAP_PREFIX "/tmp/passt_"
#define PCAP_PREFIX_PASTA "/tmp/pasta_"
#define PCAP_ISO8601_FORMAT "%FT%H:%M:%SZ"
#define PCAP_ISO8601_STR "YYYY-MM-ddTHH:mm:ssZ"
#define PCAP_VERSION_MINOR 4
static int pcap_fd = -1;
/* See pcap.h from libpcap, or pcap-savefile(5) */
static struct {
uint32_t magic;
#define PCAP_MAGIC 0xa1b2c3d4
uint16_t major;
#define PCAP_VERSION_MAJOR 2
uint16_t minor;
#define PCAP_VERSION_MINOR 4
int32_t thiszone;
uint32_t sigfigs;
uint32_t snaplen;
uint32_t linktype;
#define PCAP_LINKTYPE_ETHERNET 1
} pcap_hdr = {
PCAP_MAGIC, PCAP_VERSION_MAJOR, PCAP_VERSION_MINOR, 0, 0, ETH_MAX_MTU,
PCAP_LINKTYPE_ETHERNET
};
struct pcap_pkthdr {
uint32_t tv_sec;
uint32_t tv_usec;
uint32_t caplen;
uint32_t len;
};
/**
* pcap() - Capture a single frame to pcap file
* @pkt: Pointer to data buffer, including L2 headers
* @len: L2 packet length
*/
void pcap(char *pkt, size_t len)
{
struct pcap_pkthdr h;
struct timeval tv;
if (pcap_fd == -1)
return;
gettimeofday(&tv, NULL);
h.tv_sec = tv.tv_sec;
h.tv_usec = tv.tv_usec;
h.caplen = h.len = len;
write(pcap_fd, &h, sizeof(h));
write(pcap_fd, pkt, len);
}
/**
* pcap_init() - Initialise pcap file
* @c: Execution context
* @index: pcap name index: passt instance number or pasta target pid
*/
void pcap_init(struct ctx *c, int index)
{
char name[] = PCAP_PREFIX PCAP_ISO8601_STR STR(UINT_MAX) ".pcap";
struct timeval tv;
struct tm *tm;
if (pcap_fd != -1)
close(pcap_fd);
if (c->mode == MODE_PASTA)
memcpy(name, PCAP_PREFIX_PASTA, sizeof(PCAP_PREFIX_PASTA));
gettimeofday(&tv, NULL);
tm = localtime(&tv.tv_sec);
strftime(name + strlen(PCAP_PREFIX), sizeof(PCAP_ISO8601_STR) - 1,
PCAP_ISO8601_FORMAT, tm);
snprintf(name + strlen(PCAP_PREFIX) + strlen(PCAP_ISO8601_STR),
sizeof(name) - strlen(PCAP_PREFIX) - strlen(PCAP_ISO8601_STR),
"_%i.pcap", index);
pcap_fd = open(name, O_WRONLY | O_CREAT | O_APPEND | O_DSYNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (pcap_fd == -1) {
perror("open");
return;
}
info("Saving packet capture at %s", name);
write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr));
}
#else /* DEBUG */
void pcap(char *pkt, size_t len) {
(void)pkt;
(void)len;
}
void pcap_init(void) { }
#endif