diff --git a/Makefile b/Makefile
index d4e1096..3328f83 100644
--- a/Makefile
+++ b/Makefile
@@ -20,6 +20,7 @@ $(if $(TARGET),,$(error Failed to get target architecture))
 # Get 'uname -m'-like architecture description for target
 TARGET_ARCH := $(firstword $(subst -, ,$(TARGET)))
 TARGET_ARCH := $(patsubst [:upper:],[:lower:],$(TARGET_ARCH))
+TARGET_ARCH := $(patsubst arm%,arm,$(TARGET_ARCH))
 TARGET_ARCH := $(subst powerpc,ppc,$(TARGET_ARCH))
 
 # On some systems enabling optimization also enables source fortification,
@@ -29,7 +30,7 @@ ifeq ($(shell $(CC) -O2 -dM -E - < /dev/null 2>&1 | grep ' _FORTIFY_SOURCE ' > /
 FORTIFY_FLAG := -D_FORTIFY_SOURCE=2
 endif
 
-FLAGS := -Wall -Wextra -Wno-format-zero-length
+FLAGS := -Wall -Wextra -Wno-format-zero-length -Wformat-security
 FLAGS += -pedantic -std=c11 -D_XOPEN_SOURCE=700 -D_GNU_SOURCE
 FLAGS +=  $(FORTIFY_FLAG) -O2 -pie -fPIE
 FLAGS += -DPAGE_SIZE=$(shell getconf PAGE_SIZE)
@@ -109,9 +110,9 @@ passt-repair: $(PASST_REPAIR_SRCS) seccomp_repair.h
 	$(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) $(PASST_REPAIR_SRCS) -o passt-repair $(LDFLAGS)
 
 valgrind: EXTRA_SYSCALLS += rt_sigprocmask rt_sigtimedwait rt_sigaction	\
-			    rt_sigreturn getpid gettid kill clock_gettime mmap \
-			    mmap2 munmap open unlink gettimeofday futex statx \
-			    readlink
+			    rt_sigreturn getpid gettid kill clock_gettime \
+			    mmap|mmap2 munmap open unlink gettimeofday futex \
+			    statx readlink
 valgrind: FLAGS += -g -DVALGRIND
 valgrind: all
 
diff --git a/checksum.c b/checksum.c
index b01e0fe..0894eca 100644
--- a/checksum.c
+++ b/checksum.c
@@ -85,7 +85,7 @@
  */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))
-uint32_t sum_16b(const void *buf, size_t len)
+static uint32_t sum_16b(const void *buf, size_t len)
 {
 	const uint16_t *p = buf;
 	uint32_t sum = 0;
@@ -107,7 +107,7 @@ uint32_t sum_16b(const void *buf, size_t len)
  *
  * Return: 16-bit folded sum
  */
-uint16_t csum_fold(uint32_t sum)
+static uint16_t csum_fold(uint32_t sum)
 {
 	while (sum >> 16)
 		sum = (sum & 0xffff) + (sum >> 16);
@@ -161,6 +161,21 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol,
 	return psum;
 }
 
+/**
+ * csum() - Compute TCP/IP-style checksum
+ * @buf:	Input buffer
+ * @len:	Input length
+ * @init:	Initial 32-bit checksum, 0 for no pre-computed checksum
+ *
+ * Return: 16-bit folded, complemented checksum
+ */
+/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
+__attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
+static uint16_t csum(const void *buf, size_t len, uint32_t init)
+{
+	return (uint16_t)~csum_fold(csum_unfolded(buf, len, init));
+}
+
 /**
  * csum_udp4() - Calculate and set checksum for a UDP over IPv4 packet
  * @udp4hr:	UDP header, initialised apart from checksum
@@ -482,21 +497,6 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init)
 }
 #endif /* !__AVX2__ */
 
-/**
- * csum() - Compute TCP/IP-style checksum
- * @buf:	Input buffer
- * @len:	Input length
- * @init:	Initial 32-bit checksum, 0 for no pre-computed checksum
- *
- * Return: 16-bit folded, complemented checksum
- */
-/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
-__attribute__((optimize("-fno-strict-aliasing")))	/* See csum_16b() */
-uint16_t csum(const void *buf, size_t len, uint32_t init)
-{
-	return (uint16_t)~csum_fold(csum_unfolded(buf, len, init));
-}
-
 /**
  * csum_iov_tail() - Calculate unfolded checksum for the tail of an IO vector
  * @tail:	IO vector tail to checksum
diff --git a/checksum.h b/checksum.h
index e243c97..683a09b 100644
--- a/checksum.h
+++ b/checksum.h
@@ -11,8 +11,6 @@ struct icmphdr;
 struct icmp6hdr;
 struct iov_tail;
 
-uint32_t sum_16b(const void *buf, size_t len);
-uint16_t csum_fold(uint32_t sum);
 uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init);
 uint16_t csum_ip4_header(uint16_t l3len, uint8_t protocol,
 			 struct in_addr saddr, struct in_addr daddr);
@@ -32,7 +30,6 @@ void csum_icmp6(struct icmp6hdr *icmp6hr,
 		const struct in6_addr *saddr, const struct in6_addr *daddr,
 		const void *payload, size_t dlen);
 uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init);
-uint16_t csum(const void *buf, size_t len, uint32_t init);
 uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init);
 
 #endif /* CHECKSUM_H */
diff --git a/conf.c b/conf.c
index 18017f5..a6d7e22 100644
--- a/conf.c
+++ b/conf.c
@@ -16,6 +16,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <getopt.h>
+#include <libgen.h>
 #include <string.h>
 #include <sched.h>
 #include <sys/types.h>
@@ -123,6 +124,75 @@ static int parse_port_range(const char *s, char **endptr,
 	return 0;
 }
 
+/**
+ * conf_ports_range_except() - Set up forwarding for a range of ports minus a
+ *                             bitmap of exclusions
+ * @c:		Execution context
+ * @optname:	Short option name, t, T, u, or U
+ * @optarg:	Option argument (port specification)
+ * @fwd:	Pointer to @fwd_ports to be updated
+ * @addr:	Listening address
+ * @ifname:	Listening interface
+ * @first:	First port to forward
+ * @last:	Last port to forward
+ * @exclude:	Bitmap of ports to exclude
+ * @to:		Port to translate @first to when forwarding
+ * @weak:	Ignore errors, as long as at least one port is mapped
+ */
+static void conf_ports_range_except(const struct ctx *c, char optname,
+				    const char *optarg, struct fwd_ports *fwd,
+				    const union inany_addr *addr,
+				    const char *ifname,
+				    uint16_t first, uint16_t last,
+				    const uint8_t *exclude, uint16_t to,
+				    bool weak)
+{
+	bool bound_one = false;
+	unsigned i;
+	int ret;
+
+	if (first == 0) {
+		die("Can't forward port 0 for option '-%c %s'",
+		    optname, optarg);
+	}
+
+	for (i = first; i <= last; i++) {
+		if (bitmap_isset(exclude, i))
+			continue;
+
+		if (bitmap_isset(fwd->map, i)) {
+			warn(
+"Altering mapping of already mapped port number: %s", optarg);
+		}
+
+		bitmap_set(fwd->map, i);
+		fwd->delta[i] = to - first;
+
+		if (optname == 't')
+			ret = tcp_sock_init(c, addr, ifname, i);
+		else if (optname == 'u')
+			ret = udp_sock_init(c, 0, addr, ifname, i);
+		else
+			/* No way to check in advance for -T and -U */
+			ret = 0;
+
+		if (ret == -ENFILE || ret == -EMFILE) {
+			die("Can't open enough sockets for port specifier: %s",
+			    optarg);
+		}
+
+		if (!ret) {
+			bound_one = true;
+		} else if (!weak) {
+			die("Failed to bind port %u (%s) for option '-%c %s'",
+			    i, strerror_(-ret), optname, optarg);
+		}
+	}
+
+	if (!bound_one)
+		die("Failed to bind any port for '-%c %s'", optname, optarg);
+}
+
 /**
  * conf_ports() - Parse port configuration options, initialise UDP/TCP sockets
  * @c:		Execution context
@@ -135,10 +205,9 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
 {
 	union inany_addr addr_buf = inany_any6, *addr = &addr_buf;
 	char buf[BUFSIZ], *spec, *ifname = NULL, *p;
-	bool exclude_only = true, bound_one = false;
 	uint8_t exclude[PORT_BITMAP_SIZE] = { 0 };
+	bool exclude_only = true;
 	unsigned i;
-	int ret;
 
 	if (!strcmp(optarg, "none")) {
 		if (fwd->mode)
@@ -173,32 +242,15 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
 
 		fwd->mode = FWD_ALL;
 
-		/* Skip port 0.  It has special meaning for many socket APIs, so
-		 * trying to bind it is not really safe.
-		 */
-		for (i = 1; i < NUM_PORTS; i++) {
+		/* Exclude ephemeral ports */
+		for (i = 0; i < NUM_PORTS; i++)
 			if (fwd_port_is_ephemeral(i))
-				continue;
-
-			bitmap_set(fwd->map, i);
-			if (optname == 't') {
-				ret = tcp_sock_init(c, NULL, NULL, i);
-				if (ret == -ENFILE || ret == -EMFILE)
-					goto enfile;
-				if (!ret)
-					bound_one = true;
-			} else if (optname == 'u') {
-				ret = udp_sock_init(c, 0, NULL, NULL, i);
-				if (ret == -ENFILE || ret == -EMFILE)
-					goto enfile;
-				if (!ret)
-					bound_one = true;
-			}
-		}
-
-		if (!bound_one)
-			goto bind_all_fail;
+				bitmap_set(exclude, i);
 
+		conf_ports_range_except(c, optname, optarg, fwd,
+					NULL, NULL,
+					1, NUM_PORTS - 1, exclude,
+					1, true);
 		return;
 	}
 
@@ -275,37 +327,15 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
 	} while ((p = next_chunk(p, ',')));
 
 	if (exclude_only) {
-		/* Skip port 0.  It has special meaning for many socket APIs, so
-		 * trying to bind it is not really safe.
-		 */
-		for (i = 1; i < NUM_PORTS; i++) {
-			if (fwd_port_is_ephemeral(i) ||
-			    bitmap_isset(exclude, i))
-				continue;
-
-			bitmap_set(fwd->map, i);
-
-			if (optname == 't') {
-				ret = tcp_sock_init(c, addr, ifname, i);
-				if (ret == -ENFILE || ret == -EMFILE)
-					goto enfile;
-				if (!ret)
-					bound_one = true;
-			} else if (optname == 'u') {
-				ret = udp_sock_init(c, 0, addr, ifname, i);
-				if (ret == -ENFILE || ret == -EMFILE)
-					goto enfile;
-				if (!ret)
-					bound_one = true;
-			} else {
-				/* No way to check in advance for -T and -U */
-				bound_one = true;
-			}
-		}
-
-		if (!bound_one)
-			goto bind_all_fail;
+		/* Exclude ephemeral ports */
+		for (i = 0; i < NUM_PORTS; i++)
+			if (fwd_port_is_ephemeral(i))
+				bitmap_set(exclude, i);
 
+		conf_ports_range_except(c, optname, optarg, fwd,
+					addr, ifname,
+					1, NUM_PORTS - 1, exclude,
+					1, true);
 		return;
 	}
 
@@ -334,40 +364,18 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
 		if ((*p != '\0')  && (*p != ',')) /* Garbage after the ranges */
 			goto bad;
 
-		for (i = orig_range.first; i <= orig_range.last; i++) {
-			if (bitmap_isset(fwd->map, i))
-				warn(
-"Altering mapping of already mapped port number: %s", optarg);
-
-			if (bitmap_isset(exclude, i))
-				continue;
-
-			bitmap_set(fwd->map, i);
-
-			fwd->delta[i] = mapped_range.first - orig_range.first;
-
-			ret = 0;
-			if (optname == 't')
-				ret = tcp_sock_init(c, addr, ifname, i);
-			else if (optname == 'u')
-				ret = udp_sock_init(c, 0, addr, ifname, i);
-			if (ret)
-				goto bind_fail;
-		}
+		conf_ports_range_except(c, optname, optarg, fwd,
+					addr, ifname,
+					orig_range.first, orig_range.last,
+					exclude,
+					mapped_range.first, false);
 	} while ((p = next_chunk(p, ',')));
 
 	return;
-enfile:
-	die("Can't open enough sockets for port specifier: %s", optarg);
 bad:
 	die("Invalid port specifier %s", optarg);
 mode_conflict:
 	die("Port forwarding mode '%s' conflicts with previous mode", optarg);
-bind_fail:
-	die("Failed to bind port %u (%s) for option '-%c %s', exiting",
-	    i, strerror_(-ret), optname, optarg);
-bind_all_fail:
-	die("Failed to bind any port for '-%c %s', exiting", optname, optarg);
 }
 
 /**
@@ -406,6 +414,76 @@ static unsigned add_dns6(struct ctx *c, const struct in6_addr *addr,
 	return 1;
 }
 
+/**
+ * add_dns_resolv4() - Possibly add one IPv4 nameserver from host's resolv.conf
+ * @c:		Execution context
+ * @ns:		Nameserver address
+ * @idx:	Pointer to index of current IPv4 resolver entry, set on return
+ */
+static void add_dns_resolv4(struct ctx *c, struct in_addr *ns, unsigned *idx)
+{
+	if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_host))
+		c->ip4.dns_host = *ns;
+
+	/* Special handling if guest or container can only access local
+	 * addresses via redirect, or if the host gateway is also a resolver and
+	 * we shadow its address
+	 */
+	if (IN4_IS_ADDR_LOOPBACK(ns) ||
+	    IN4_ARE_ADDR_EQUAL(ns, &c->ip4.map_host_loopback)) {
+		if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match)) {
+			if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_host_loopback))
+				return;		/* Address unreachable */
+
+			*ns = c->ip4.map_host_loopback;
+			c->ip4.dns_match = c->ip4.map_host_loopback;
+		} else {
+			/* No general host mapping, but requested for DNS
+			 * (--dns-forward and --no-map-gw): advertise resolver
+			 * address from --dns-forward, and map that to loopback
+			 */
+			*ns = c->ip4.dns_match;
+		}
+	}
+
+	*idx += add_dns4(c, ns, *idx);
+}
+
+/**
+ * add_dns_resolv6() - Possibly add one IPv6 nameserver from host's resolv.conf
+ * @c:		Execution context
+ * @ns:		Nameserver address
+ * @idx:	Pointer to index of current IPv6 resolver entry, set on return
+ */
+static void add_dns_resolv6(struct ctx *c, struct in6_addr *ns, unsigned *idx)
+{
+	if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_host))
+		c->ip6.dns_host = *ns;
+
+	/* Special handling if guest or container can only access local
+	 * addresses via redirect, or if the host gateway is also a resolver and
+	 * we shadow its address
+	 */
+	if (IN6_IS_ADDR_LOOPBACK(ns) ||
+	    IN6_ARE_ADDR_EQUAL(ns, &c->ip6.map_host_loopback)) {
+		if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_match)) {
+			if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_host_loopback))
+				return;		/* Address unreachable */
+
+			*ns = c->ip6.map_host_loopback;
+			c->ip6.dns_match = c->ip6.map_host_loopback;
+		} else {
+			/* No general host mapping, but requested for DNS
+			 * (--dns-forward and --no-map-gw): advertise resolver
+			 * address from --dns-forward, and map that to loopback
+			 */
+			*ns = c->ip6.dns_match;
+		}
+	}
+
+	*idx += add_dns6(c, ns, *idx);
+}
+
 /**
  * add_dns_resolv() - Possibly add ns from host resolv.conf to configuration
  * @c:		Execution context
@@ -422,48 +500,11 @@ static void add_dns_resolv(struct ctx *c, const char *nameserver,
 	struct in6_addr ns6;
 	struct in_addr ns4;
 
-	if (idx4 && inet_pton(AF_INET, nameserver, &ns4)) {
-		if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_host))
-			c->ip4.dns_host = ns4;
+	if (idx4 && inet_pton(AF_INET, nameserver, &ns4))
+		add_dns_resolv4(c, &ns4, idx4);
 
-		/* Special handling if guest or container can only access local
-		 * addresses via redirect, or if the host gateway is also a
-		 * resolver and we shadow its address
-		 */
-		if (IN4_IS_ADDR_LOOPBACK(&ns4) ||
-		    IN4_ARE_ADDR_EQUAL(&ns4, &c->ip4.map_host_loopback)) {
-			if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_host_loopback))
-				return;
-
-			ns4 = c->ip4.map_host_loopback;
-			if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match))
-				c->ip4.dns_match = c->ip4.map_host_loopback;
-		}
-
-		*idx4 += add_dns4(c, &ns4, *idx4);
-	}
-
-	if (idx6 && inet_pton(AF_INET6, nameserver, &ns6)) {
-		if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_host))
-			c->ip6.dns_host = ns6;
-
-		/* Special handling if guest or container can only access local
-		 * addresses via redirect, or if the host gateway is also a
-		 * resolver and we shadow its address
-		 */
-		if (IN6_IS_ADDR_LOOPBACK(&ns6) ||
-		    IN6_ARE_ADDR_EQUAL(&ns6, &c->ip6.map_host_loopback)) {
-			if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_host_loopback))
-				return;
-
-			ns6 = c->ip6.map_host_loopback;
-
-			if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_match))
-				c->ip6.dns_match = c->ip6.map_host_loopback;
-		}
-
-		*idx6 += add_dns6(c, &ns6, *idx6);
-	}
+	if (idx6 && inet_pton(AF_INET6, nameserver, &ns6))
+		add_dns_resolv6(c, &ns6, idx6);
 }
 
 /**
@@ -991,6 +1032,45 @@ pasta_opts:
 	_exit(status);
 }
 
+/**
+ * conf_mode() - Determine passt/pasta's operating mode from command line
+ * @argc:	Argument count
+ * @argv:	Command line arguments
+ *
+ * Return: mode to operate in, PASTA or PASST
+ */
+enum passt_modes conf_mode(int argc, char *argv[])
+{
+	int vhost_user = 0;
+	const struct option optvu[] = {
+		{"vhost-user",	no_argument,		&vhost_user,	1 },
+		{ 0 },
+	};
+	char argv0[PATH_MAX], *basearg0;
+	int name;
+
+	optind = 0;
+	do {
+		name = getopt_long(argc, argv, "-:", optvu, NULL);
+	} while (name != -1);
+
+	if (vhost_user)
+		return MODE_VU;
+
+	if (argc < 1)
+		die("Cannot determine argv[0]");
+
+	strncpy(argv0, argv[0], PATH_MAX - 1);
+	basearg0 = basename(argv0);
+	if (strstr(basearg0, "pasta"))
+		return MODE_PASTA;
+
+	if (strstr(basearg0, "passt"))
+		return MODE_PASST;
+
+	die("Cannot determine mode, invoke as \"passt\" or \"pasta\"");
+}
+
 /**
  * conf_print() - Print fundamental configuration parameters
  * @c:		Execution context
@@ -1225,6 +1305,8 @@ static void conf_nat(const char *arg, struct in_addr *addr4,
 		*addr6 = in6addr_any;
 		if (no_map_gw)
 			*no_map_gw = 1;
+
+		return;
 	}
 
 	if (inet_pton(AF_INET6, arg, addr6)	&&
@@ -1388,16 +1470,17 @@ void conf(struct ctx *c, int argc, char **argv)
 		{"repair-path",	required_argument,	NULL,		28 },
 		{ 0 },
 	};
+	const char *optstring = "+dqfel:hs:F:I:p:P:m:a:n:M:g:i:o:D:S:H:461t:u:T:U:";
 	const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt";
 	char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 };
 	bool copy_addrs_opt = false, copy_routes_opt = false;
 	enum fwd_ports_mode fwd_default = FWD_NONE;
 	bool v4_only = false, v6_only = false;
 	unsigned dns4_idx = 0, dns6_idx = 0;
+	unsigned long max_mtu = IP_MAX_MTU;
 	struct fqdn *dnss = c->dns_search;
 	unsigned int ifi4 = 0, ifi6 = 0;
 	const char *logfile = NULL;
-	const char *optstring;
 	size_t logsize = 0;
 	char *runas = NULL;
 	long fd_tap_opt;
@@ -1408,11 +1491,11 @@ void conf(struct ctx *c, int argc, char **argv)
 	if (c->mode == MODE_PASTA) {
 		c->no_dhcp_dns = c->no_dhcp_dns_search = 1;
 		fwd_default = FWD_AUTO;
-		optstring = "+dqfel:hF:I:p:P:m:a:n:M:g:i:o:D:S:H:46t:u:T:U:";
-	} else {
-		optstring = "+dqfel:hs:F:p:P:m:a:n:M:g:i:o:D:S:H:461t:u:";
 	}
 
+	if (tap_l2_max_len(c) - ETH_HLEN < max_mtu)
+		max_mtu = tap_l2_max_len(c) - ETH_HLEN;
+	c->mtu = ROUND_DOWN(max_mtu, sizeof(uint32_t));
 	c->tcp.fwd_in.mode = c->tcp.fwd_out.mode = FWD_UNSET;
 	c->udp.fwd_in.mode = c->udp.fwd_out.mode = FWD_UNSET;
 	memcpy(c->our_tap_mac, MAC_OUR_LAA, ETH_ALEN);
@@ -1580,9 +1663,8 @@ void conf(struct ctx *c, int argc, char **argv)
 
 			die("Invalid host nameserver address: %s", optarg);
 		case 25:
-			if (c->mode == MODE_PASTA)
-				die("--vhost-user is for passt mode only");
-			c->mode = MODE_VU;
+			/* Already handled in conf_mode() */
+			ASSERT(c->mode == MODE_VU);
 			break;
 		case 26:
 			vu_print_capabilities();
@@ -1593,7 +1675,14 @@ void conf(struct ctx *c, int argc, char **argv)
 				die("Invalid FQDN: %s", optarg);
 			break;
 		case 28:
-			/* Handle this once we checked --vhost-user */
+			if (c->mode != MODE_VU && strcmp(optarg, "none"))
+				die("--repair-path is for vhost-user mode only");
+
+			if (snprintf_check(c->repair_path,
+					   sizeof(c->repair_path), "%s",
+					   optarg))
+				die("Invalid passt-repair path: %s", optarg);
+
 			break;
 		case 'd':
 			c->debug = 1;
@@ -1613,6 +1702,9 @@ void conf(struct ctx *c, int argc, char **argv)
 			c->foreground = 1;
 			break;
 		case 's':
+			if (c->mode == MODE_PASTA)
+				die("-s is for passt / vhost-user mode only");
+
 			ret = snprintf(c->sock_path, sizeof(c->sock_path), "%s",
 				       optarg);
 			if (ret <= 0 || ret >= (int)sizeof(c->sock_path))
@@ -1625,7 +1717,8 @@ void conf(struct ctx *c, int argc, char **argv)
 			fd_tap_opt = strtol(optarg, NULL, 0);
 
 			if (errno ||
-			    fd_tap_opt <= STDERR_FILENO || fd_tap_opt > INT_MAX)
+			    (fd_tap_opt != STDIN_FILENO && fd_tap_opt <= STDERR_FILENO) ||
+			    fd_tap_opt > INT_MAX)
 				die("Invalid --fd: %s", optarg);
 
 			c->fd_tap = fd_tap_opt;
@@ -1633,6 +1726,9 @@ void conf(struct ctx *c, int argc, char **argv)
 			*c->sock_path = 0;
 			break;
 		case 'I':
+			if (c->mode != MODE_PASTA)
+				die("-I is for pasta mode only");
+
 			ret = snprintf(c->pasta_ifn, IFNAMSIZ, "%s",
 				       optarg);
 			if (ret <= 0 || ret >= IFNAMSIZ)
@@ -1652,20 +1748,24 @@ void conf(struct ctx *c, int argc, char **argv)
 				die("Invalid PID file: %s", optarg);
 
 			break;
-		case 'm':
+		case 'm': {
+			unsigned long mtu;
+			char *e;
+
 			errno = 0;
-			c->mtu = strtol(optarg, NULL, 0);
+			mtu = strtoul(optarg, &e, 0);
 
-			if (!c->mtu) {
-				c->mtu = -1;
-				break;
-			}
-
-			if (c->mtu < ETH_MIN_MTU || c->mtu > (int)ETH_MAX_MTU ||
-			    errno)
+			if (errno || *e)
 				die("Invalid MTU: %s", optarg);
 
+			if (mtu > max_mtu) {
+				die("MTU %lu too large (max %lu)",
+				    mtu, max_mtu);
+			}
+
+			c->mtu = mtu;
 			break;
+		}
 		case 'a':
 			if (inet_pton(AF_INET6, optarg, &c->ip6.addr)	&&
 			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr)	&&
@@ -1785,11 +1885,16 @@ void conf(struct ctx *c, int argc, char **argv)
 			break;
 		case 't':
 		case 'u':
-		case 'T':
-		case 'U':
 		case 'D':
 			/* Handle these later, once addresses are configured */
 			break;
+		case 'T':
+		case 'U':
+			if (c->mode != MODE_PASTA)
+				die("-%c is for pasta mode only", name);
+
+			/* Handle properly later, once addresses are configured */
+			break;
 		case 'h':
 			usage(argv[0], stdout, EXIT_SUCCESS);
 			break;
@@ -1837,9 +1942,21 @@ void conf(struct ctx *c, int argc, char **argv)
 		c->ifi4 = conf_ip4(ifi4, &c->ip4);
 	if (!v4_only)
 		c->ifi6 = conf_ip6(ifi6, &c->ip6);
+
+	if (c->ifi4 && c->mtu < IPV4_MIN_MTU) {
+		warn("MTU %"PRIu16" is too small for IPv4 (minimum %u)",
+		     c->mtu, IPV4_MIN_MTU);
+	}
+	if (c->ifi6 && c->mtu < IPV6_MIN_MTU) {
+		warn("MTU %"PRIu16" is too small for IPv6 (minimum %u)",
+			     c->mtu, IPV6_MIN_MTU);
+	}
+
 	if ((*c->ip4.ifname_out && !c->ifi4) ||
 	    (*c->ip6.ifname_out && !c->ifi6))
 		die("External interface not usable");
+
+
 	if (!c->ifi4 && !c->ifi6) {
 		info("No external interface as template, switch to local mode");
 
@@ -1866,8 +1983,8 @@ void conf(struct ctx *c, int argc, char **argv)
 	if (c->ifi4 && IN4_IS_ADDR_UNSPECIFIED(&c->ip4.guest_gw))
 		c->no_dhcp = 1;
 
-	/* Inbound port options, DNS, and --repair-path can be parsed now, after
-	 * IPv4/IPv6 settings and --vhost-user.
+	/* Inbound port options and DNS can be parsed now, after IPv4/IPv6
+	 * settings
 	 */
 	fwd_probe_ephemeral();
 	udp_portmap_clear();
@@ -1913,16 +2030,6 @@ void conf(struct ctx *c, int argc, char **argv)
 			}
 
 			die("Cannot use DNS address %s", optarg);
-		} else if (name == 28) {
-			if (c->mode != MODE_VU && strcmp(optarg, "none"))
-				die("--repair-path is for vhost-user mode only");
-
-			if (snprintf_check(c->repair_path,
-					   sizeof(c->repair_path), "%s",
-					   optarg))
-				die("Invalid passt-repair path: %s", optarg);
-
-			break;
 		}
 	} while (name != -1);
 
@@ -1971,9 +2078,6 @@ void conf(struct ctx *c, int argc, char **argv)
 		c->no_dhcpv6 = 1;
 	}
 
-	if (!c->mtu)
-		c->mtu = ROUND_DOWN(ETH_MAX_MTU - ETH_HLEN, sizeof(uint32_t));
-
 	get_dns(c);
 
 	if (!*c->pasta_ifn) {
diff --git a/conf.h b/conf.h
index 9d2143d..b45ad74 100644
--- a/conf.h
+++ b/conf.h
@@ -6,6 +6,7 @@
 #ifndef CONF_H
 #define CONF_H
 
+enum passt_modes conf_mode(int argc, char *argv[]);
 void conf(struct ctx *c, int argc, char **argv);
 
 #endif /* CONF_H */
diff --git a/contrib/fedora/passt.spec b/contrib/fedora/passt.spec
index 6a83f8b..745cf01 100644
--- a/contrib/fedora/passt.spec
+++ b/contrib/fedora/passt.spec
@@ -44,7 +44,7 @@ Requires(preun): %{name}
 Requires(preun): policycoreutils
 
 %description selinux
-This package adds SELinux enforcement to passt(1) and pasta(1).
+This package adds SELinux enforcement to passt(1), pasta(1), passt-repair(1).
 
 %prep
 %setup -q -n passt-%{git_hash}
@@ -82,6 +82,7 @@ make -f %{_datadir}/selinux/devel/Makefile
 install -p -m 644 -D passt.pp %{buildroot}%{_datadir}/selinux/packages/%{selinuxtype}/passt.pp
 install -p -m 644 -D passt.if %{buildroot}%{_datadir}/selinux/devel/include/distributed/passt.if
 install -p -m 644 -D pasta.pp %{buildroot}%{_datadir}/selinux/packages/%{selinuxtype}/pasta.pp
+install -p -m 644 -D passt-repair.pp %{buildroot}%{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp
 popd
 
 %pre selinux
@@ -90,11 +91,13 @@ popd
 %post selinux
 %selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/passt.pp
 %selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/pasta.pp
+%selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp
 
 %postun selinux
 if [ $1 -eq 0 ]; then
 	%selinux_modules_uninstall -s %{selinuxtype} passt
 	%selinux_modules_uninstall -s %{selinuxtype} pasta
+	%selinux_modules_uninstall -s %{selinuxtype} passt-repair
 fi
 
 %posttrans selinux
@@ -124,6 +127,7 @@ fi
 %{_datadir}/selinux/packages/%{selinuxtype}/passt.pp
 %{_datadir}/selinux/devel/include/distributed/passt.if
 %{_datadir}/selinux/packages/%{selinuxtype}/pasta.pp
+%{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp
 
 %changelog
 {{{ passt_git_changelog }}}
diff --git a/contrib/selinux/passt-repair.te b/contrib/selinux/passt-repair.te
index e3ffbcd..7157dfb 100644
--- a/contrib/selinux/passt-repair.te
+++ b/contrib/selinux/passt-repair.te
@@ -28,12 +28,22 @@ require {
 	type console_device_t;
 	type user_devpts_t;
 	type user_tmp_t;
+
+	# Workaround: passt-repair needs to needs to access socket files
+	# that passt, started by libvirt, might create under different
+	# labels, depending on whether passt is started as root or not.
+	#
+	# However, libvirt doesn't maintain its own policy, which makes
+	# updates particularly complicated. To avoid breakage in the short
+	# term, deal with that in passt's own policy.
+	type qemu_var_run_t;
+	type virt_var_run_t;
 }
 
 type passt_repair_t;
 domain_type(passt_repair_t);
 type passt_repair_exec_t;
-files_type(passt_repair_exec_t);
+corecmd_executable_file(passt_repair_exec_t);
 
 role unconfined_r types passt_repair_t;
 
@@ -41,7 +51,8 @@ allow passt_repair_t passt_repair_exec_t:file { read execute execute_no_trans en
 type_transition unconfined_t passt_repair_exec_t:process passt_repair_t;
 allow unconfined_t passt_repair_t:process transition;
 
-allow passt_repair_t self:capability { dac_override net_admin net_raw };
+allow passt_repair_t self:capability { dac_override dac_read_search net_admin net_raw };
+allow passt_repair_t self:capability2 bpf;
 
 allow passt_repair_t console_device_t:chr_file { append open getattr read write ioctl };
 allow passt_repair_t user_devpts_t:chr_file { append open getattr read write ioctl };
@@ -50,9 +61,27 @@ allow passt_repair_t unconfined_t:unix_stream_socket { connectto read write };
 allow passt_repair_t passt_t:unix_stream_socket { connectto read write };
 allow passt_repair_t user_tmp_t:unix_stream_socket { connectto read write };
 
-allow passt_repair_t unconfined_t:sock_file { read write };
-allow passt_repair_t passt_t:sock_file { read write };
-allow passt_repair_t user_tmp_t:sock_file { read write };
+allow passt_repair_t user_tmp_t:dir { getattr read search watch };
+
+allow passt_repair_t unconfined_t:sock_file { getattr read write };
+allow passt_repair_t passt_t:sock_file { getattr read write };
+allow passt_repair_t user_tmp_t:sock_file { getattr read write };
 
 allow passt_repair_t unconfined_t:tcp_socket { read setopt write };
 allow passt_repair_t passt_t:tcp_socket { read setopt write };
+
+# Workaround: passt-repair needs to needs to access socket files
+# that passt, started by libvirt, might create under different
+# labels, depending on whether passt is started as root or not.
+#
+# However, libvirt doesn't maintain its own policy, which makes
+# updates particularly complicated. To avoid breakage in the short
+# term, deal with that in passt's own policy.
+allow passt_repair_t qemu_var_run_t:unix_stream_socket { connectto read write };
+allow passt_repair_t virt_var_run_t:unix_stream_socket { connectto read write };
+
+allow passt_repair_t qemu_var_run_t:dir { getattr read search watch };
+allow passt_repair_t virt_var_run_t:dir { getattr read search watch };
+
+allow passt_repair_t qemu_var_run_t:sock_file { getattr read write };
+allow passt_repair_t virt_var_run_t:sock_file { getattr read write };
diff --git a/contrib/selinux/passt.te b/contrib/selinux/passt.te
index f595079..eb9ce72 100644
--- a/contrib/selinux/passt.te
+++ b/contrib/selinux/passt.te
@@ -29,6 +29,9 @@ require {
 	# particularly complicated. To avoid breakage in the short term,
 	# deal with it in passt's own policy.
 	type svirt_image_t;
+	type svirt_tmpfs_t;
+	type svirt_t;
+	type null_device_t;
 
 	class file { ioctl getattr setattr create read write unlink open relabelto execute execute_no_trans map };
 	class dir { search write add_name remove_name mounton };
@@ -45,8 +48,8 @@ require {
 	type net_conf_t;
 	type proc_net_t;
 	type node_t;
-	class tcp_socket { create accept listen name_bind name_connect getattr };
-	class udp_socket { create accept listen };
+	class tcp_socket { create accept listen name_bind name_connect getattr ioctl };
+	class udp_socket { create accept listen getattr };
 	class icmp_socket { bind create name_bind node_bind setopt read write };
 	class sock_file { create unlink write };
 
@@ -129,8 +132,8 @@ corenet_udp_sendrecv_all_ports(passt_t)
 allow passt_t node_t:icmp_socket { name_bind node_bind };
 allow passt_t port_t:icmp_socket name_bind;
 
-allow passt_t self:tcp_socket { create getopt setopt connect bind listen accept shutdown read write getattr };
-allow passt_t self:udp_socket { create getopt setopt connect bind read write };
+allow passt_t self:tcp_socket { create getopt setopt connect bind listen accept shutdown read write getattr ioctl };
+allow passt_t self:udp_socket { create getopt setopt connect bind read write getattr };
 allow passt_t self:icmp_socket { bind create setopt read write };
 
 allow passt_t user_tmp_t:dir { add_name write };
@@ -143,3 +146,5 @@ allow passt_t unconfined_t:unix_stream_socket { read write };
 # particularly complicated. To avoid breakage in the short term,
 # deal with it in passt's own policy.
 allow passt_t svirt_image_t:file { read write map };
+allow passt_t svirt_tmpfs_t:file { read write map };
+allow passt_t null_device_t:chr_file map;
diff --git a/dhcp.c b/dhcp.c
index 401cb5b..b0de04b 100644
--- a/dhcp.c
+++ b/dhcp.c
@@ -64,9 +64,9 @@ static struct opt opts[255];
 #define OPT_MIN		60 /* RFC 951 */
 
 /* Total option size (excluding end option) is 576 (RFC 2131), minus
- * offset of options (268), minus end option and its length (2).
+ * offset of options (268), minus end option (1).
  */
-#define OPT_MAX		306
+#define OPT_MAX		307
 
 /**
  * dhcp_init() - Initialise DHCP options
@@ -127,7 +127,7 @@ struct msg {
 	uint8_t sname[64];
 	uint8_t file[128];
 	uint32_t magic;
-	uint8_t o[OPT_MAX + 2 /* End option and its length */ ];
+	uint8_t o[OPT_MAX + 1 /* End option */ ];
 } __attribute__((__packed__));
 
 /**
@@ -143,7 +143,7 @@ static bool fill_one(struct msg *m, int o, int *offset)
 	size_t slen = opts[o].slen;
 
 	/* If we don't have space to write the option, then just skip */
-	if (*offset + 1 /* length of option */ + slen > OPT_MAX)
+	if (*offset + 2 /* code and length of option */ + slen > OPT_MAX)
 		return true;
 
 	m->o[*offset] = o;
@@ -194,7 +194,6 @@ static int fill(struct msg *m)
 	}
 
 	m->o[offset++] = 255;
-	m->o[offset++] = 0;
 
 	if (offset < OPT_MIN) {
 		memset(&m->o[offset], 0, OPT_MIN - offset);
@@ -418,7 +417,7 @@ int dhcp(const struct ctx *c, const struct pool *p)
 		       &c->ip4.guest_gw, sizeof(c->ip4.guest_gw));
 	}
 
-	if (c->mtu != -1) {
+	if (c->mtu) {
 		opts[26].slen = 2;
 		opts[26].s[0] = c->mtu / 256;
 		opts[26].s[1] = c->mtu % 256;
diff --git a/dhcpv6.c b/dhcpv6.c
index 373a988..ba16c66 100644
--- a/dhcpv6.c
+++ b/dhcpv6.c
@@ -144,7 +144,9 @@ struct opt_ia_addr {
 struct opt_status_code {
 	struct opt_hdr hdr;
 	uint16_t code;
-	char status_msg[sizeof(STR_NOTONLINK) - 1];
+	/* "nonstring" is only supported since clang 23 */
+	/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
+	__attribute__((nonstring)) char status_msg[sizeof(STR_NOTONLINK) - 1];
 } __attribute__((packed));
 
 /**
diff --git a/doc/platform-requirements/.gitignore b/doc/platform-requirements/.gitignore
index 3b5a10a..f6272cf 100644
--- a/doc/platform-requirements/.gitignore
+++ b/doc/platform-requirements/.gitignore
@@ -1,3 +1,4 @@
+/listen-vs-repair
 /reuseaddr-priority
 /recv-zero
 /udp-close-dup
diff --git a/doc/platform-requirements/Makefile b/doc/platform-requirements/Makefile
index 6a7d374..83930ef 100644
--- a/doc/platform-requirements/Makefile
+++ b/doc/platform-requirements/Makefile
@@ -3,8 +3,8 @@
 # Copyright Red Hat
 # Author: David Gibson <david@gibson.dropbear.id.au>
 
-TARGETS = reuseaddr-priority recv-zero udp-close-dup
-SRCS = reuseaddr-priority.c recv-zero.c udp-close-dup.c
+TARGETS = reuseaddr-priority recv-zero udp-close-dup listen-vs-repair
+SRCS = reuseaddr-priority.c recv-zero.c udp-close-dup.c listen-vs-repair.c
 CFLAGS = -Wall
 
 all: cppcheck clang-tidy $(TARGETS:%=check-%)
diff --git a/doc/platform-requirements/common.h b/doc/platform-requirements/common.h
index 8844b1e..e85fc2b 100644
--- a/doc/platform-requirements/common.h
+++ b/doc/platform-requirements/common.h
@@ -15,6 +15,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+__attribute__((format(printf, 1, 2), noreturn))
 static inline void die(const char *fmt, ...)
 {
 	va_list ap;
diff --git a/doc/platform-requirements/listen-vs-repair.c b/doc/platform-requirements/listen-vs-repair.c
new file mode 100644
index 0000000..d31fe3f
--- /dev/null
+++ b/doc/platform-requirements/listen-vs-repair.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* liste-vs-repair.c
+ *
+ * Do listening sockets have address conflicts with sockets under repair
+ * ====================================================================
+ *
+ * When we accept() an incoming connection the accept()ed socket will have the
+ * same local address as the listening socket.  This can be a complication on
+ * migration.  On the migration target we've already set up listening sockets
+ * according to the command line.  However to restore connections that we're
+ * migrating in we need to bind the new sockets to the same address, which would
+ * be an address conflict on the face of it.  This test program verifies that
+ * enabling repair mode before bind() correctly suppresses that conflict.
+ *
+ * Copyright Red Hat
+ * Author: David Gibson <david@gibson.dropbear.id.au>
+ */
+
+/* NOLINTNEXTLINE(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp) */
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "common.h"
+
+#define PORT	13256U
+#define CPORT	13257U
+
+/* 127.0.0.1:PORT */
+static const struct sockaddr_in addr = SOCKADDR_INIT(INADDR_LOOPBACK, PORT);
+
+/* 127.0.0.1:CPORT */
+static const struct sockaddr_in caddr = SOCKADDR_INIT(INADDR_LOOPBACK, CPORT);
+
+/* Put ourselves into a network sandbox */
+static void net_sandbox(void)
+{
+	/* NOLINTNEXTLINE(altera-struct-pack-align) */
+	const struct req_t {
+		struct nlmsghdr nlh;
+		struct ifinfomsg ifm;
+	} __attribute__((packed)) req = {
+		.nlh.nlmsg_type		= RTM_NEWLINK,
+		.nlh.nlmsg_flags	= NLM_F_REQUEST,
+		.nlh.nlmsg_len		= sizeof(req),
+		.nlh.nlmsg_seq		= 1,
+		.ifm.ifi_family		= AF_UNSPEC,
+                .ifm.ifi_index		= 1,
+                .ifm.ifi_flags		= IFF_UP,
+                .ifm.ifi_change		= IFF_UP,
+	};
+	int nl;
+
+	if (unshare(CLONE_NEWUSER | CLONE_NEWNET))
+		die("unshare(): %s\n", strerror(errno));
+
+	/* Bring up lo in the new netns */
+	nl = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+	if (nl < 0)
+		die("Can't create netlink socket: %s\n", strerror(errno));
+
+	if (send(nl, &req, sizeof(req), 0) < 0)
+		die("Netlink send(): %s\n", strerror(errno));
+	close(nl);
+}
+
+static void check(void)
+{
+	int s1, s2, op;
+
+	s1 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	if (s1 < 0)
+		die("socket() 1: %s\n", strerror(errno));
+
+	if (bind(s1, (struct sockaddr *)&addr, sizeof(addr)))
+		die("bind() 1: %s\n", strerror(errno));
+
+	if (listen(s1, 0))
+		die("listen(): %s\n", strerror(errno));
+
+	s2 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	if (s2 < 0)
+		die("socket() 2: %s\n", strerror(errno));
+
+	op = TCP_REPAIR_ON;
+	if (setsockopt(s2, SOL_TCP, TCP_REPAIR, &op, sizeof(op)))
+		die("TCP_REPAIR: %s\n", strerror(errno));
+
+	if (bind(s2, (struct sockaddr *)&addr, sizeof(addr)))
+		die("bind() 2: %s\n", strerror(errno));
+
+	if (connect(s2, (struct sockaddr *)&caddr, sizeof(caddr)))
+		die("connect(): %s\n", strerror(errno));
+
+	op = TCP_REPAIR_OFF_NO_WP;
+	if (setsockopt(s2, SOL_TCP, TCP_REPAIR, &op, sizeof(op)))
+		die("TCP_REPAIR: %s\n", strerror(errno));
+
+	close(s1);
+	close(s2);
+}
+
+int main(int argc, char *argv[])
+{
+	(void)argc;
+	(void)argv;
+
+	net_sandbox();
+
+	check();
+
+	printf("Repair mode appears to properly suppress conflicts with listening sockets\n");
+
+	exit(0);
+}
diff --git a/doc/platform-requirements/reuseaddr-priority.c b/doc/platform-requirements/reuseaddr-priority.c
index 701b6ff..af39a39 100644
--- a/doc/platform-requirements/reuseaddr-priority.c
+++ b/doc/platform-requirements/reuseaddr-priority.c
@@ -46,13 +46,13 @@
 /* Different cases for receiving socket configuration */
 enum sock_type {
 	/* Socket is bound to 0.0.0.0:DSTPORT and not connected */
-	SOCK_BOUND_ANY = 0,
+	SOCK_BOUND_ANY,
 
 	/* Socket is bound to 127.0.0.1:DSTPORT and not connected */
-	SOCK_BOUND_LO = 1,
+	SOCK_BOUND_LO,
 
 	/* Socket is bound to 0.0.0.0:DSTPORT and connected to 127.0.0.1:SRCPORT */
-	SOCK_CONNECTED = 2,
+	SOCK_CONNECTED,
 
 	NUM_SOCK_TYPES,
 };
diff --git a/epoll_type.h b/epoll_type.h
index 7f2a121..12ac59b 100644
--- a/epoll_type.h
+++ b/epoll_type.h
@@ -22,8 +22,8 @@ enum epoll_type {
 	EPOLL_TYPE_TCP_TIMER,
 	/* UDP "listening" sockets */
 	EPOLL_TYPE_UDP_LISTEN,
-	/* UDP socket for replies on a specific flow */
-	EPOLL_TYPE_UDP_REPLY,
+	/* UDP socket for a specific flow */
+	EPOLL_TYPE_UDP,
 	/* ICMP/ICMPv6 ping sockets */
 	EPOLL_TYPE_PING,
 	/* inotify fd watching for end of netns (pasta) */
diff --git a/flow.c b/flow.c
index cc881e8..6a5c8aa 100644
--- a/flow.c
+++ b/flow.c
@@ -53,30 +53,8 @@ const uint8_t flow_proto[] = {
 static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
 	      "flow_proto[] doesn't match enum flow_type");
 
-#define foreach_flow(i, flow, bound)					\
-	for ((i) = 0, (flow) = &flowtab[(i)];				\
-	     (i) < (bound);						\
-	     (i)++, (flow) = &flowtab[(i)])				\
-		if ((flow)->f.state == FLOW_STATE_FREE)			\
-			(i) += (flow)->free.n - 1;			\
-		else
-
-#define foreach_active_flow(i, flow, bound)				\
-	foreach_flow((i), (flow), (bound))				\
-		if ((flow)->f.state != FLOW_STATE_ACTIVE)		\
-			/* NOLINTNEXTLINE(bugprone-branch-clone) */	\
-			continue;					\
-		else
-
-#define foreach_tcp_flow(i, flow, bound)				\
-	foreach_active_flow((i), (flow), (bound))			\
-		if ((flow)->f.type != FLOW_TCP)				\
-			/* NOLINTNEXTLINE(bugprone-branch-clone) */	\
-			continue;					\
-		else
-
-#define foreach_established_tcp_flow(i, flow, bound)			\
-	foreach_tcp_flow((i), (flow), (bound))				\
+#define foreach_established_tcp_flow(flow)				\
+	flow_foreach_of_type((flow), FLOW_TCP)				\
 		if (!tcp_flow_is_established(&(flow)->tcp))		\
 			/* NOLINTNEXTLINE(bugprone-branch-clone) */	\
 			continue;					\
@@ -103,7 +81,7 @@ static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
  *
  * Free cluster list
  *    flow_first_free gives the index of the first (lowest index) free cluster.
- *    Each free cluster has the index of the next free cluster, or MAX_FLOW if
+ *    Each free cluster has the index of the next free cluster, or FLOW_MAX if
  *    it is the last free cluster.  Together these form a linked list of free
  *    clusters, in strictly increasing order of index.
  *
@@ -289,11 +267,13 @@ int flowside_connect(const struct ctx *c, int s,
 
 /** flow_log_ - Log flow-related message
  * @f:		flow the message is related to
+ * @newline:	Append newline at the end of the message, if missing
  * @pri:	Log priority
  * @fmt:	Format string
  * @...:	printf-arguments
  */
-void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
+void flow_log_(const struct flow_common *f, bool newline, int pri,
+	       const char *fmt, ...)
 {
 	const char *type_or_state;
 	char msg[BUFSIZ];
@@ -309,7 +289,7 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
 	else
 		type_or_state = FLOW_TYPE(f);
 
-	logmsg(true, false, pri,
+	logmsg(newline, false, pri,
 	       "Flow %u (%s): %s", flow_idx(f), type_or_state, msg);
 }
 
@@ -329,7 +309,7 @@ void flow_log_details_(const struct flow_common *f, int pri,
 	const struct flowside *tgt = &f->side[TGTSIDE];
 
 	if (state >= FLOW_STATE_TGT)
-		flow_log_(f, pri,
+		flow_log_(f, true, pri,
 			  "%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu",
 			  pif_name(f->pif[INISIDE]),
 			  inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
@@ -342,7 +322,7 @@ void flow_log_details_(const struct flow_common *f, int pri,
 			  inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)),
 			  tgt->eport);
 	else if (state >= FLOW_STATE_INI)
-		flow_log_(f, pri, "%s [%s]:%hu -> [%s]:%hu => ?",
+		flow_log_(f, true, pri, "%s [%s]:%hu -> [%s]:%hu => ?",
 			  pif_name(f->pif[INISIDE]),
 			  inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
 			  ini->eport,
@@ -363,7 +343,7 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
 	ASSERT(oldstate < FLOW_NUM_STATES);
 
 	f->state = state;
-	flow_log_(f, LOG_DEBUG, "%s -> %s", flow_state_str[oldstate],
+	flow_log_(f, true, LOG_DEBUG, "%s -> %s", flow_state_str[oldstate],
 		  FLOW_STATE(f));
 
 	flow_log_details_(f, LOG_DEBUG, MAX(state, oldstate));
@@ -416,18 +396,27 @@ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
  * @flow:	Flow to change state
  * @pif:	pif of the initiating side
  * @ssa:	Source socket address
+ * @daddr:	Destination address (may be NULL)
  * @dport:	Destination port
  *
  * Return: pointer to the initiating flowside information
  */
 struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
 				  const union sockaddr_inany *ssa,
+				  const union inany_addr *daddr,
 				  in_port_t dport)
 {
 	struct flowside *ini = &flow->f.side[INISIDE];
 
-	inany_from_sockaddr(&ini->eaddr, &ini->eport, ssa);
-	if (inany_v4(&ini->eaddr))
+	if (inany_from_sockaddr(&ini->eaddr, &ini->eport, ssa) < 0) {
+		char str[SOCKADDR_STRLEN];
+
+		ASSERT_WITH_MSG(0, "Bad socket address %s",
+				sockaddr_ntop(ssa, str, sizeof(str)));
+	}
+	if (daddr)
+		ini->oaddr = *daddr;
+	else if (inany_v4(&ini->eaddr))
 		ini->oaddr = inany_any4;
 	else
 		ini->oaddr = inany_any6;
@@ -771,19 +760,30 @@ flow_sidx_t flow_lookup_af(const struct ctx *c,
  * @proto:	Protocol of the flow (IP L4 protocol number)
  * @pif:	Interface of the flow
  * @esa:	Socket address of the endpoint
+ * @oaddr:	Our address (may be NULL)
  * @oport:	Our port number
  *
  * Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
  */
 flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
-			   const void *esa, in_port_t oport)
+			   const void *esa,
+			   const union inany_addr *oaddr, in_port_t oport)
 {
 	struct flowside side = {
 		.oport = oport,
 	};
 
-	inany_from_sockaddr(&side.eaddr, &side.eport, esa);
-	if (inany_v4(&side.eaddr))
+	if (inany_from_sockaddr(&side.eaddr, &side.eport, esa) < 0) {
+		char str[SOCKADDR_STRLEN];
+
+		warn("Flow lookup on bad socket address %s",
+		     sockaddr_ntop(esa, str, sizeof(str)));
+		return FLOW_SIDX_NONE;
+	}
+
+	if (oaddr)
+		side.oaddr = *oaddr;
+	else if (inany_v4(&side.eaddr))
 		side.oaddr = inany_any4;
 	else
 		side.oaddr = inany_any6;
@@ -800,8 +800,9 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 {
 	struct flow_free_cluster *free_head = NULL;
 	unsigned *last_next = &flow_first_free;
+	bool to_free[FLOW_MAX] = { 0 };
 	bool timer = false;
-	unsigned idx;
+	union flow *flow;
 
 	if (timespec_diff_ms(now, &flow_timer_run) >= FLOW_TIMER_INTERVAL) {
 		timer = true;
@@ -810,49 +811,12 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 
 	ASSERT(!flow_new_entry); /* Incomplete flow at end of cycle */
 
-	for (idx = 0; idx < FLOW_MAX; idx++) {
-		union flow *flow = &flowtab[idx];
+	/* Check which flows we might need to close first, but don't free them
+	 * yet as it's not safe to do that in the middle of flow_foreach().
+	 */
+	flow_foreach(flow) {
 		bool closed = false;
 
-		switch (flow->f.state) {
-		case FLOW_STATE_FREE: {
-			unsigned skip = flow->free.n;
-
-			/* First entry of a free cluster must have n >= 1 */
-			ASSERT(skip);
-
-			if (free_head) {
-				/* Merge into preceding free cluster */
-				free_head->n += flow->free.n;
-				flow->free.n = flow->free.next = 0;
-			} else {
-				/* New free cluster, add to chain */
-				free_head = &flow->free;
-				*last_next = idx;
-				last_next = &free_head->next;
-			}
-
-			/* Skip remaining empty entries */
-			idx += skip - 1;
-			continue;
-		}
-
-		case FLOW_STATE_NEW:
-		case FLOW_STATE_INI:
-		case FLOW_STATE_TGT:
-		case FLOW_STATE_TYPED:
-			/* Incomplete flow at end of cycle */
-			ASSERT(false);
-			break;
-
-		case FLOW_STATE_ACTIVE:
-			/* Nothing to do */
-			break;
-
-		default:
-			ASSERT(false);
-		}
-
 		switch (flow->f.type) {
 		case FLOW_TYPE_NONE:
 			ASSERT(false);
@@ -871,7 +835,7 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 				closed = icmp_ping_timer(c, &flow->ping, now);
 			break;
 		case FLOW_UDP:
-			closed = udp_flow_defer(&flow->udp);
+			closed = udp_flow_defer(c, &flow->udp, now);
 			if (!closed && timer)
 				closed = udp_flow_timer(c, &flow->udp, now);
 			break;
@@ -880,24 +844,67 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 			;
 		}
 
-		if (closed) {
-			flow_set_state(&flow->f, FLOW_STATE_FREE);
-			memset(flow, 0, sizeof(*flow));
+		to_free[FLOW_IDX(flow)] = closed;
+	}
+
+	/* Second step: actually free the flows */
+	flow_foreach_slot(flow) {
+		switch (flow->f.state) {
+		case FLOW_STATE_FREE: {
+			unsigned skip = flow->free.n;
+
+			/* First entry of a free cluster must have n >= 1 */
+			ASSERT(skip);
 
 			if (free_head) {
-				/* Add slot to current free cluster */
-				ASSERT(idx == FLOW_IDX(free_head) + free_head->n);
-				free_head->n++;
+				/* Merge into preceding free cluster */
+				free_head->n += flow->free.n;
 				flow->free.n = flow->free.next = 0;
 			} else {
-				/* Create new free cluster */
+				/* New free cluster, add to chain */
 				free_head = &flow->free;
-				free_head->n = 1;
-				*last_next = idx;
+				*last_next = FLOW_IDX(flow);
 				last_next = &free_head->next;
 			}
-		} else {
-			free_head = NULL;
+
+			/* Skip remaining empty entries */
+			flow += skip - 1;
+			continue;
+		}
+
+		case FLOW_STATE_NEW:
+		case FLOW_STATE_INI:
+		case FLOW_STATE_TGT:
+		case FLOW_STATE_TYPED:
+			/* Incomplete flow at end of cycle */
+			ASSERT(false);
+			break;
+
+		case FLOW_STATE_ACTIVE:
+			if (to_free[FLOW_IDX(flow)]) {
+				flow_set_state(&flow->f, FLOW_STATE_FREE);
+				memset(flow, 0, sizeof(*flow));
+
+				if (free_head) {
+					/* Add slot to current free cluster */
+					ASSERT(FLOW_IDX(flow) ==
+					    FLOW_IDX(free_head) + free_head->n);
+					free_head->n++;
+					flow->free.n = flow->free.next = 0;
+				} else {
+					/* Create new free cluster */
+					free_head = &flow->free;
+					free_head->n = 1;
+					*last_next = FLOW_IDX(flow);
+					last_next = &free_head->next;
+				}
+			} else {
+				free_head = NULL;
+			}
+			break;
+
+		default:
+			ASSERT(false);
 		}
 	}
 
@@ -907,22 +914,23 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 /**
  * flow_migrate_source_rollback() - Disable repair mode, return failure
  * @c:		Execution context
- * @max_flow:	Maximum index of affected flows
+ * @bound:	No need to roll back flow indices >= @bound
  * @ret:	Negative error code
  *
  * Return: @ret
  */
-static int flow_migrate_source_rollback(struct ctx *c, unsigned max_flow,
-					int ret)
+static int flow_migrate_source_rollback(struct ctx *c, unsigned bound, int ret)
 {
 	union flow *flow;
-	unsigned i;
 
 	debug("...roll back migration");
 
-	foreach_established_tcp_flow(i, flow, max_flow)
+	foreach_established_tcp_flow(flow) {
+		if (FLOW_IDX(flow) >= bound)
+			break;
 		if (tcp_flow_repair_off(c, &flow->tcp))
 			die("Failed to roll back TCP_REPAIR mode");
+	}
 
 	if (repair_flush(c))
 		die("Failed to roll back TCP_REPAIR mode");
@@ -930,6 +938,21 @@ static int flow_migrate_source_rollback(struct ctx *c, unsigned max_flow,
 	return ret;
 }
 
+/**
+ * flow_migrate_need_repair() - Do we need to set repair mode for any flow?
+ *
+ * Return: true if repair mode is needed, false otherwise
+ */
+static bool flow_migrate_need_repair(void)
+{
+	union flow *flow;
+
+	foreach_established_tcp_flow(flow)
+		return true;
+
+	return false;
+}
+
 /**
  * flow_migrate_repair_all() - Turn repair mode on or off for all flows
  * @c:		Execution context
@@ -940,10 +963,13 @@ static int flow_migrate_source_rollback(struct ctx *c, unsigned max_flow,
 static int flow_migrate_repair_all(struct ctx *c, bool enable)
 {
 	union flow *flow;
-	unsigned i;
 	int rc;
 
-	foreach_established_tcp_flow(i, flow, FLOW_MAX) {
+	/* If we don't have a repair helper, there's nothing we can do */
+	if (c->fd_repair < 0)
+		return 0;
+
+	foreach_established_tcp_flow(flow) {
 		if (enable)
 			rc = tcp_flow_repair_on(c, &flow->tcp);
 		else
@@ -952,14 +978,15 @@ static int flow_migrate_repair_all(struct ctx *c, bool enable)
 		if (rc) {
 			debug("Can't %s repair mode: %s",
 			      enable ? "enable" : "disable", strerror_(-rc));
-			return flow_migrate_source_rollback(c, i, rc);
+			return flow_migrate_source_rollback(c, FLOW_IDX(flow),
+							    rc);
 		}
 	}
 
 	if ((rc = repair_flush(c))) {
 		debug("Can't %s repair mode: %s",
 		      enable ? "enable" : "disable", strerror_(-rc));
-		return flow_migrate_source_rollback(c, i, rc);
+		return flow_migrate_source_rollback(c, FLOW_IDX(flow), rc);
 	}
 
 	return 0;
@@ -981,6 +1008,9 @@ int flow_migrate_source_pre(struct ctx *c, const struct migrate_stage *stage,
 	(void)stage;
 	(void)fd;
 
+	if (flow_migrate_need_repair())
+		repair_wait(c);
+
 	if ((rc = flow_migrate_repair_all(c, true)))
 		return -rc;
 
@@ -1001,14 +1031,16 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
 	uint32_t count = 0;
 	bool first = true;
 	union flow *flow;
-	unsigned i;
 	int rc;
 
 	(void)c;
 	(void)stage;
 
-	foreach_established_tcp_flow(i, flow, FLOW_MAX)
-		count++;
+	/* If we don't have a repair helper, we can't migrate TCP flows */
+	if (c->fd_repair >= 0) {
+		foreach_established_tcp_flow(flow)
+			count++;
+	}
 
 	count = htonl(count);
 	if (write_all_buf(fd, &count, sizeof(count))) {
@@ -1019,6 +1051,9 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
 
 	debug("Sending %u flows", ntohl(count));
 
+	if (!count)
+		return 0;
+
 	/* Dump and send information that can be stored in the flow table.
 	 *
 	 * Limited rollback options here: if we fail to transfer any data (that
@@ -1026,10 +1061,11 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
 	 * stream might now be inconsistent, and we might have closed listening
 	 * TCP sockets, so just terminate.
 	 */
-	foreach_established_tcp_flow(i, flow, FLOW_MAX) {
+	foreach_established_tcp_flow(flow) {
 		rc = tcp_flow_migrate_source(fd, &flow->tcp);
 		if (rc) {
-			err("Can't send data, flow %u: %s", i, strerror_(-rc));
+			flow_err(flow, "Can't send data: %s",
+				 strerror_(-rc));
 			if (!first)
 				die("Inconsistent migration state, exiting");
 
@@ -1052,10 +1088,11 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
 	 * failures but not if the stream might be inconsistent (reported here
 	 * as EIO).
 	 */
-	foreach_established_tcp_flow(i, flow, FLOW_MAX) {
-		rc = tcp_flow_migrate_source_ext(fd, i, &flow->tcp);
+	foreach_established_tcp_flow(flow) {
+		rc = tcp_flow_migrate_source_ext(fd, &flow->tcp);
 		if (rc) {
-			err("Extended data for flow %u: %s", i, strerror_(-rc));
+			flow_err(flow, "Can't send extended data: %s",
+				 strerror_(-rc));
 
 			if (rc == -EIO)
 				die("Inconsistent migration state, exiting");
@@ -1088,6 +1125,11 @@ int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage,
 	count = ntohl(count);
 	debug("Receiving %u flows", count);
 
+	if (!count)
+		return 0;
+
+	repair_wait(c);
+
 	if ((rc = flow_migrate_repair_all(c, true)))
 		return -rc;
 
@@ -1097,8 +1139,8 @@ int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage,
 	for (i = 0; i < count; i++) {
 		rc = tcp_flow_migrate_target(c, fd);
 		if (rc) {
-			debug("Migration data failure at flow %u: %s, abort",
-			      i, strerror_(-rc));
+			flow_dbg(FLOW(i), "Migration data failure, abort: %s",
+				 strerror_(-rc));
 			return -rc;
 		}
 	}
@@ -1106,10 +1148,10 @@ int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage,
 	repair_flush(c);
 
 	for (i = 0; i < count; i++) {
-		rc = tcp_flow_migrate_target_ext(c, flowtab + i, fd);
+		rc = tcp_flow_migrate_target_ext(c, &flowtab[i].tcp, fd);
 		if (rc) {
-			debug("Migration data failure at flow %u: %s, abort",
-			      i, strerror_(-rc));
+			flow_dbg(FLOW(i), "Migration data failure, abort: %s",
+				 strerror_(-rc));
 			return -rc;
 		}
 	}
diff --git a/flow.h b/flow.h
index 675726e..cac618a 100644
--- a/flow.h
+++ b/flow.h
@@ -243,7 +243,8 @@ flow_sidx_t flow_lookup_af(const struct ctx *c,
 			   const void *eaddr, const void *oaddr,
 			   in_port_t eport, in_port_t oport);
 flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
-			   const void *esa, in_port_t oport);
+			   const void *esa,
+			   const union inany_addr *oaddr, in_port_t oport);
 
 union flow;
 
@@ -258,11 +259,11 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
 int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage,
 			int fd);
 
-void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
-	__attribute__((format(printf, 3, 4)));
-
-#define flow_log(f_, pri, ...)	flow_log_(&(f_)->f, (pri), __VA_ARGS__)
+void flow_log_(const struct flow_common *f, bool newline, int pri,
+	       const char *fmt, ...)
+	__attribute__((format(printf, 4, 5)));
 
+#define flow_log(f_, pri, ...)	flow_log_(&(f_)->f, true, (pri), __VA_ARGS__)
 #define flow_dbg(f, ...)	flow_log((f), LOG_DEBUG, __VA_ARGS__)
 #define flow_err(f, ...)	flow_log((f), LOG_ERR, __VA_ARGS__)
 
@@ -272,6 +273,16 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
 			flow_dbg((f), __VA_ARGS__);			\
 	} while (0)
 
+#define flow_log_perror_(f, pri, ...)					\
+	do {								\
+		int errno_ = errno;					\
+		flow_log_((f), false, (pri), __VA_ARGS__);		\
+		logmsg(true, true, (pri), ": %s", strerror_(errno_));	\
+	} while (0)
+
+#define flow_dbg_perror(f_, ...) flow_log_perror_(&(f_)->f, LOG_DEBUG, __VA_ARGS__)
+#define flow_perror(f_, ...)	flow_log_perror_(&(f_)->f, LOG_ERR, __VA_ARGS__)
+
 void flow_log_details_(const struct flow_common *f, int pri,
 		       enum flow_state state);
 #define flow_log_details(f_, pri) \
diff --git a/flow_table.h b/flow_table.h
index 9a2ff24..3f3f4b7 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -50,6 +50,42 @@ extern union flow flowtab[];
 #define flow_foreach_sidei(sidei_) \
 	for ((sidei_) = INISIDE; (sidei_) < SIDES; (sidei_)++)
 
+
+/**
+ * flow_foreach_slot() - Step through each flow table entry
+ * @flow:	Takes values of pointer to each flow table entry
+ *
+ * Includes FREE slots.
+ */
+#define flow_foreach_slot(flow)						\
+	for ((flow) = flowtab; FLOW_IDX(flow) < FLOW_MAX; (flow)++)
+
+/**
+ * flow_foreach() - Step through each active flow
+ * @flow:	Takes values of pointer to each active flow
+ */
+#define flow_foreach(flow)						\
+	flow_foreach_slot((flow))					\
+		if ((flow)->f.state == FLOW_STATE_FREE)			\
+			(flow) += (flow)->free.n - 1;			\
+		else if ((flow)->f.state != FLOW_STATE_ACTIVE) {	\
+			flow_err((flow), "Bad flow state during traversal"); \
+			continue;					\
+		} else
+
+/**
+ * flow_foreach_of_type() - Step through each active flow of given type
+ * @flow:	Takes values of pointer to each flow
+ * @type_:	Type of flow to traverse
+ */
+#define flow_foreach_of_type(flow, type_)				\
+	flow_foreach((flow))						\
+	if ((flow)->f.type != (type_))					\
+			/* NOLINTNEXTLINE(bugprone-branch-clone) */	\
+			continue;					\
+		else
+
+
 /** flow_idx() - Index of flow from common structure
  * @f:	Common flow fields pointer
  *
@@ -57,6 +93,7 @@ extern union flow flowtab[];
  */
 static inline unsigned flow_idx(const struct flow_common *f)
 {
+	/* NOLINTNEXTLINE(clang-analyzer-security.PointerSub) */
 	return (union flow *)f - flowtab;
 }
 
@@ -163,6 +200,7 @@ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
 					const void *daddr, in_port_t dport);
 struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
 				  const union sockaddr_inany *ssa,
+				  const union inany_addr *daddr,
 				  in_port_t dport);
 const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
 				      sa_family_t af,
diff --git a/fwd.c b/fwd.c
index 2829cd2..250cf56 100644
--- a/fwd.c
+++ b/fwd.c
@@ -323,6 +323,30 @@ static bool fwd_guest_accessible(const struct ctx *c,
 	return fwd_guest_accessible6(c, &addr->a6);
 }
 
+/**
+ * nat_outbound() - Apply address translation for outbound (TAP to HOST)
+ * @c:		Execution context
+ * @addr:	Input address (as seen on TAP interface)
+ * @translated:	Output address (as seen on HOST interface)
+ *
+ * Only handles translations that depend *only* on the address.  Anything
+ * related to specific ports or flows is handled elsewhere.
+ */
+static void nat_outbound(const struct ctx *c, const union inany_addr *addr,
+			 union inany_addr *translated)
+{
+	if (inany_equals4(addr, &c->ip4.map_host_loopback))
+		*translated = inany_loopback4;
+	else if (inany_equals6(addr, &c->ip6.map_host_loopback))
+		*translated = inany_loopback6;
+	else if (inany_equals4(addr, &c->ip4.map_guest_addr))
+		*translated = inany_from_v4(c->ip4.addr);
+	else if (inany_equals6(addr, &c->ip6.map_guest_addr))
+		translated->a6 = c->ip6.addr;
+	else
+		*translated = *addr;
+}
+
 /**
  * fwd_nat_from_tap() - Determine to forward a flow from the tap interface
  * @c:		Execution context
@@ -342,16 +366,8 @@ uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
 	else if (is_dns_flow(proto, ini) &&
 		   inany_equals6(&ini->oaddr, &c->ip6.dns_match))
 		tgt->eaddr.a6 = c->ip6.dns_host;
-	else if (inany_equals4(&ini->oaddr, &c->ip4.map_host_loopback))
-		tgt->eaddr = inany_loopback4;
-	else if (inany_equals6(&ini->oaddr, &c->ip6.map_host_loopback))
-		tgt->eaddr = inany_loopback6;
-	else if (inany_equals4(&ini->oaddr, &c->ip4.map_guest_addr))
-		tgt->eaddr = inany_from_v4(c->ip4.addr);
-	else if (inany_equals6(&ini->oaddr, &c->ip6.map_guest_addr))
-		tgt->eaddr.a6 = c->ip6.addr;
 	else
-		tgt->eaddr = ini->oaddr;
+		nat_outbound(c, &ini->oaddr, &tgt->eaddr);
 
 	tgt->eport = ini->oport;
 
@@ -402,7 +418,7 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
 	else
 		tgt->eaddr = inany_loopback6;
 
-	/* Preserve the specific loopback adddress used, but let the kernel pick
+	/* Preserve the specific loopback address used, but let the kernel pick
 	 * a source port on the target side
 	 */
 	tgt->oaddr = ini->eaddr;
@@ -423,6 +439,42 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
 	return PIF_HOST;
 }
 
+/**
+ * nat_inbound() - Apply address translation for inbound (HOST to TAP)
+ * @c:		Execution context
+ * @addr:	Input address (as seen on HOST interface)
+ * @translated:	Output address (as seen on TAP interface)
+ *
+ * Return: true on success, false if it couldn't translate the address
+ *
+ * Only handles translations that depend *only* on the address.  Anything
+ * related to specific ports or flows is handled elsewhere.
+ */
+bool nat_inbound(const struct ctx *c, const union inany_addr *addr,
+		 union inany_addr *translated)
+{
+	if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_host_loopback) &&
+	    inany_equals4(addr, &in4addr_loopback)) {
+		/* Specifically 127.0.0.1, not 127.0.0.0/8 */
+		*translated = inany_from_v4(c->ip4.map_host_loopback);
+	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_host_loopback) &&
+		   inany_equals6(addr, &in6addr_loopback)) {
+		translated->a6 = c->ip6.map_host_loopback;
+	} else if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_guest_addr) &&
+		   inany_equals4(addr, &c->ip4.addr)) {
+		*translated = inany_from_v4(c->ip4.map_guest_addr);
+	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_guest_addr) &&
+		   inany_equals6(addr, &c->ip6.addr)) {
+		translated->a6 = c->ip6.map_guest_addr;
+	} else if (fwd_guest_accessible(c, addr)) {
+		*translated = *addr;
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
 /**
  * fwd_nat_from_host() - Determine to forward a flow from the host interface
  * @c:		Execution context
@@ -479,20 +531,7 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
 		return PIF_SPLICE;
 	}
 
-	if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_host_loopback) &&
-	    inany_equals4(&ini->eaddr, &in4addr_loopback)) {
-		/* Specifically 127.0.0.1, not 127.0.0.0/8 */
-		tgt->oaddr = inany_from_v4(c->ip4.map_host_loopback);
-	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_host_loopback) &&
-		   inany_equals6(&ini->eaddr, &in6addr_loopback)) {
-		tgt->oaddr.a6 = c->ip6.map_host_loopback;
-	} else if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.map_guest_addr) &&
-		   inany_equals4(&ini->eaddr, &c->ip4.addr)) {
-		tgt->oaddr = inany_from_v4(c->ip4.map_guest_addr);
-	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.map_guest_addr) &&
-		   inany_equals6(&ini->eaddr, &c->ip6.addr)) {
-		tgt->oaddr.a6 = c->ip6.map_guest_addr;
-	} else if (!fwd_guest_accessible(c, &ini->eaddr)) {
+	if (!nat_inbound(c, &ini->eaddr, &tgt->oaddr)) {
 		if (inany_v4(&ini->eaddr)) {
 			if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.our_tap_addr))
 				/* No source address we can use */
@@ -501,8 +540,6 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
 		} else {
 			tgt->oaddr.a6 = c->ip6.our_tap_ll;
 		}
-	} else {
-		tgt->oaddr = ini->eaddr;
 	}
 	tgt->oport = ini->eport;
 
diff --git a/fwd.h b/fwd.h
index 3562f3c..0458a3c 100644
--- a/fwd.h
+++ b/fwd.h
@@ -7,6 +7,7 @@
 #ifndef FWD_H
 #define FWD_H
 
+union inany_addr;
 struct flowside;
 
 /* Number of ports for both TCP and UDP */
@@ -47,6 +48,8 @@ void fwd_scan_ports_udp(struct fwd_ports *fwd, const struct fwd_ports *rev,
 			const struct fwd_ports *tcp_rev);
 void fwd_scan_ports_init(struct ctx *c);
 
+bool nat_inbound(const struct ctx *c, const union inany_addr *addr,
+		 union inany_addr *translated);
 uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
 			 const struct flowside *ini, struct flowside *tgt);
 uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
diff --git a/icmp.c b/icmp.c
index bcf498d..7e2b342 100644
--- a/icmp.c
+++ b/icmp.c
@@ -85,7 +85,7 @@ void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
 
 	n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
 	if (n < 0) {
-		flow_err(pingf, "recvfrom() error: %s", strerror_(errno));
+		flow_perror(pingf, "recvfrom() error");
 		return;
 	}
 
@@ -300,8 +300,7 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 
 	pif_sockaddr(c, &sa, &sl, PIF_HOST, &tgt->eaddr, 0);
 	if (sendto(pingf->sock, pkt, l4len, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
-		flow_dbg(pingf, "failed to relay request to socket: %s",
-			 strerror_(errno));
+		flow_dbg_perror(pingf, "failed to relay request to socket");
 	} else {
 		flow_dbg(pingf,
 			 "echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
diff --git a/inany.h b/inany.h
index 6a12c29..7ca5cbd 100644
--- a/inany.h
+++ b/inany.h
@@ -237,23 +237,30 @@ static inline void inany_from_af(union inany_addr *aa,
 }
 
 /** inany_from_sockaddr - Extract IPv[46] address and port number from sockaddr
- * @aa:		Pointer to store IPv[46] address
+ * @dst:	Pointer to store IPv[46] address (output)
  * @port:	Pointer to store port number, host order
- * @addr:	AF_INET or AF_INET6 socket address
+ * @addr:	Socket address
+ *
+ * Return: 0 on success, -1 on error (bad address family)
  */
-static inline void inany_from_sockaddr(union inany_addr *aa, in_port_t *port,
-				       const union sockaddr_inany *sa)
+static inline int inany_from_sockaddr(union inany_addr *dst, in_port_t *port,
+				      const void *addr)
 {
+	const union sockaddr_inany *sa = (const union sockaddr_inany *)addr;
+
 	if (sa->sa_family == AF_INET6) {
-		inany_from_af(aa, AF_INET6, &sa->sa6.sin6_addr);
+		inany_from_af(dst, AF_INET6, &sa->sa6.sin6_addr);
 		*port = ntohs(sa->sa6.sin6_port);
-	} else if (sa->sa_family == AF_INET) {
-		inany_from_af(aa, AF_INET, &sa->sa4.sin_addr);
-		*port = ntohs(sa->sa4.sin_port);
-	} else {
-		/* Not valid to call with other address families */
-		ASSERT(0);
+		return 0;
 	}
+
+	if (sa->sa_family == AF_INET) {
+		inany_from_af(dst, AF_INET, &sa->sa4.sin_addr);
+		*port = ntohs(sa->sa4.sin_port);
+		return 0;
+	}
+
+	return -1;
 }
 
 /** inany_siphash_feed- Fold IPv[46] address into an in-progress siphash
diff --git a/iov.c b/iov.c
index 3b12272..91e87a7 100644
--- a/iov.c
+++ b/iov.c
@@ -26,7 +26,8 @@
 #include "iov.h"
 
 
-/* iov_skip_bytes() - Skip leading bytes of an IO vector
+/**
+ * iov_skip_bytes() - Skip leading bytes of an IO vector
  * @iov:	IO vector
  * @n:		Number of entries in @iov
  * @skip:	Number of leading bytes of @iov to skip
@@ -56,8 +57,8 @@ size_t iov_skip_bytes(const struct iovec *iov, size_t n,
 }
 
 /**
- * iov_from_buf - Copy data from a buffer to an I/O vector (struct iovec)
- *                efficiently.
+ * iov_from_buf() - Copy data from a buffer to an I/O vector (struct iovec)
+ *                  efficiently.
  *
  * @iov:       Pointer to the array of struct iovec describing the
  *             scatter/gather I/O vector.
@@ -96,8 +97,8 @@ size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
 }
 
 /**
- * iov_to_buf - Copy data from a scatter/gather I/O vector (struct iovec) to
- *		a buffer efficiently.
+ * iov_to_buf() - Copy data from a scatter/gather I/O vector (struct iovec) to
+ *		  a buffer efficiently.
  *
  * @iov:       Pointer to the array of struct iovec describing the scatter/gather
  *             I/O vector.
@@ -136,8 +137,8 @@ size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
 }
 
 /**
- * iov_size - Calculate the total size of a scatter/gather I/O vector
- *            (struct iovec).
+ * iov_size() - Calculate the total size of a scatter/gather I/O vector
+ *              (struct iovec).
  *
  * @iov:       Pointer to the array of struct iovec describing the
  *             scatter/gather I/O vector.
@@ -203,6 +204,7 @@ size_t iov_tail_size(struct iov_tail *tail)
  *	    overruns the IO vector, is not contiguous or doesn't have the
  *	    requested alignment.
  */
+/* cppcheck-suppress [staticFunction,unmatchedSuppression] */
 void *iov_peek_header_(struct iov_tail *tail, size_t len, size_t align)
 {
 	char *p;
diff --git a/ip.h b/ip.h
index 1544dbf..24509d9 100644
--- a/ip.h
+++ b/ip.h
@@ -36,13 +36,14 @@
 		.tos		= 0,					\
 		.tot_len	= 0,					\
 		.id		= 0,					\
-		.frag_off	= 0,					\
+		.frag_off	= htons(IP_DF), 			\
 		.ttl		= 0xff,					\
 		.protocol	= (proto),				\
 		.saddr		= 0,					\
 		.daddr		= 0,					\
 	}
 #define L2_BUF_IP4_PSUM(proto)	((uint32_t)htons_constant(0x4500) +	\
+				 (uint32_t)htons_constant(IP_DF) +	\
 				 (uint32_t)htons(0xff00 | (proto)))
 
 
@@ -90,10 +91,34 @@ struct ipv6_opt_hdr {
 	 */
 } __attribute__((packed));	/* required for some archs */
 
+/**
+ * ip6_set_flow_lbl() - Set flow label in an IPv6 header
+ * @ip6h:	Pointer to IPv6 header, updated
+ * @flow:	Set @ip6h flow label to the low 20 bits of this integer
+ */
+static inline void ip6_set_flow_lbl(struct ipv6hdr *ip6h, uint32_t flow)
+{
+	ip6h->flow_lbl[0] = (flow >> 16) & 0xf;
+	ip6h->flow_lbl[1] = (flow >> 8) & 0xff;
+	ip6h->flow_lbl[2] = (flow >> 0) & 0xff;
+}
+
+/** ip6_get_flow_lbl() - Get flow label from an IPv6 header
+ * @ip6h:	Pointer to IPv6 header
+ *
+ * Return: flow label from @ip6h as an integer (<= 20 bits)
+ */
+static inline uint32_t ip6_get_flow_lbl(const struct ipv6hdr *ip6h)
+{
+	return (ip6h->flow_lbl[0] & 0xf) << 16 |
+		ip6h->flow_lbl[1] << 8 |
+		ip6h->flow_lbl[2];
+}
+
 char *ipv6_l4hdr(const struct pool *p, int idx, size_t offset, uint8_t *proto,
 		 size_t *dlen);
 
-/* IPv6 link-local all-nodes multicast adddress, ff02::1 */
+/* IPv6 link-local all-nodes multicast address, ff02::1 */
 static const struct in6_addr in6addr_ll_all_nodes = {
 	.s6_addr = {
 		0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -104,4 +129,11 @@ static const struct in6_addr in6addr_ll_all_nodes = {
 /* IPv4 Limited Broadcast (RFC 919, Section 7), 255.255.255.255 */
 static const struct in_addr in4addr_broadcast = { 0xffffffff };
 
+#ifndef IPV4_MIN_MTU
+#define IPV4_MIN_MTU		68
+#endif
+#ifndef IPV6_MIN_MTU
+#define IPV6_MIN_MTU		1280
+#endif
+
 #endif /* IP_H */
diff --git a/isolation.c b/isolation.c
index c944fb3..bbcd23b 100644
--- a/isolation.c
+++ b/isolation.c
@@ -129,7 +129,7 @@ static void drop_caps_ep_except(uint64_t keep)
  * additional layer of protection.  Executing this requires
  * CAP_SETPCAP, which we will have within our userns.
  *
- * Note that dropping capabilites from the bounding set limits
+ * Note that dropping capabilities from the bounding set limits
  * exec()ed processes, but does not remove them from the effective or
  * permitted sets, so it doesn't reduce our own capabilities.
  */
@@ -174,8 +174,8 @@ static void clamp_caps(void)
  * Should:
  *  - drop unneeded capabilities
  *  - close all open files except for standard streams and the one from --fd
- * Musn't:
- *  - remove filesytem access (we need to access files during setup)
+ * Mustn't:
+ *  - remove filesystem access (we need to access files during setup)
  */
 void isolate_initial(int argc, char **argv)
 {
@@ -194,7 +194,7 @@ void isolate_initial(int argc, char **argv)
 	 *
 	 * It's debatable whether it's useful to drop caps when we
 	 * retain SETUID and SYS_ADMIN, but we might as well.  We drop
-	 * further capabilites in isolate_user() and
+	 * further capabilities in isolate_user() and
 	 * isolate_prefork().
 	 */
 	keep = BIT(CAP_NET_BIND_SERVICE) | BIT(CAP_SETUID) | BIT(CAP_SETGID) |
diff --git a/log.c b/log.c
index 95e4576..5d7d76f 100644
--- a/log.c
+++ b/log.c
@@ -56,7 +56,7 @@ bool		log_stderr = true;	/* Not daemonised, no shell spawned */
  *
  * Return: pointer to @now, or NULL if there was an error retrieving the time
  */
-const struct timespec *logtime(struct timespec *ts)
+static const struct timespec *logtime(struct timespec *ts)
 {
 	if (clock_gettime(CLOCK_MONOTONIC, ts))
 		return NULL;
@@ -249,6 +249,30 @@ static void logfile_write(bool newline, bool cont, int pri,
 		log_written += n;
 }
 
+/**
+ * passt_vsyslog() - vsyslog() implementation not using heap memory
+ * @newline:	Append newline at the end of the message, if missing
+ * @pri:	Facility and level map, same as priority for vsyslog()
+ * @format:	Same as vsyslog() format
+ * @ap:		Same as vsyslog() ap
+ */
+static void passt_vsyslog(bool newline, int pri, const char *format, va_list ap)
+{
+	char buf[BUFSIZ];
+	int n;
+
+	/* Send without timestamp, the system logger should add it */
+	n = snprintf(buf, BUFSIZ, "<%i> %s: ", pri, log_ident);
+
+	n += vsnprintf(buf + n, BUFSIZ - n, format, ap);
+
+	if (newline && format[strlen(format)] != '\n')
+		n += snprintf(buf + n, BUFSIZ - n, "\n");
+
+	if (log_sock >= 0 && send(log_sock, buf, n, 0) != n && log_stderr)
+		FPRINTF(stderr, "Failed to send %i bytes to syslog\n", n);
+}
+
 /**
  * vlogmsg() - Print or send messages to log or output files as configured
  * @newline:	Append newline at the end of the message, if missing
@@ -257,6 +281,7 @@ static void logfile_write(bool newline, bool cont, int pri,
  * @format:	Message
  * @ap:		Variable argument list
  */
+/* cppcheck-suppress [staticFunction,unmatchedSuppression] */
 void vlogmsg(bool newline, bool cont, int pri, const char *format, va_list ap)
 {
 	bool debug_print = (log_mask & LOG_MASK(LOG_DEBUG)) && log_file == -1;
@@ -373,35 +398,11 @@ void __setlogmask(int mask)
 	setlogmask(mask);
 }
 
-/**
- * passt_vsyslog() - vsyslog() implementation not using heap memory
- * @newline:	Append newline at the end of the message, if missing
- * @pri:	Facility and level map, same as priority for vsyslog()
- * @format:	Same as vsyslog() format
- * @ap:		Same as vsyslog() ap
- */
-void passt_vsyslog(bool newline, int pri, const char *format, va_list ap)
-{
-	char buf[BUFSIZ];
-	int n;
-
-	/* Send without timestamp, the system logger should add it */
-	n = snprintf(buf, BUFSIZ, "<%i> %s: ", pri, log_ident);
-
-	n += vsnprintf(buf + n, BUFSIZ - n, format, ap);
-
-	if (newline && format[strlen(format)] != '\n')
-		n += snprintf(buf + n, BUFSIZ - n, "\n");
-
-	if (log_sock >= 0 && send(log_sock, buf, n, 0) != n && log_stderr)
-		FPRINTF(stderr, "Failed to send %i bytes to syslog\n", n);
-}
-
 /**
  * logfile_init() - Open log file and write header with PID, version, path
  * @name:	Identifier for header: passt or pasta
  * @path:	Path to log file
- * @size:	Maximum size of log file: log_cut_size is calculatd here
+ * @size:	Maximum size of log file: log_cut_size is calculated here
  */
 void logfile_init(const char *name, const char *path, size_t size)
 {
diff --git a/log.h b/log.h
index 22c7b9a..08aa88c 100644
--- a/log.h
+++ b/log.h
@@ -55,7 +55,6 @@ void trace_init(int enable);
 
 void __openlog(const char *ident, int option, int facility);
 void logfile_init(const char *name, const char *path, size_t size);
-void passt_vsyslog(bool newline, int pri, const char *format, va_list ap);
 void __setlogmask(int mask);
 
 #endif /* LOG_H */
diff --git a/migrate.c b/migrate.c
index 0fca77b..48d63a0 100644
--- a/migrate.c
+++ b/migrate.c
@@ -96,8 +96,8 @@ static int seen_addrs_target_v1(struct ctx *c,
 	return 0;
 }
 
-/* Stages for version 1 */
-static const struct migrate_stage stages_v1[] = {
+/* Stages for version 2 */
+static const struct migrate_stage stages_v2[] = {
 	{
 		.name = "observed addresses",
 		.source = seen_addrs_source_v1,
@@ -118,7 +118,11 @@ static const struct migrate_stage stages_v1[] = {
 
 /* Supported encoding versions, from latest (most preferred) to oldest */
 static const struct migrate_version versions[] = {
-	{ 1,	stages_v1, },
+	{ 2,	stages_v2, },
+	/* v1 was released, but not widely used.  It had bad endianness for the
+	 * MSS and omitted timestamps, which meant it usually wouldn't work.
+	 * Therefore we don't attempt to support compatibility with it.
+	 */
 	{ 0 },
 };
 
diff --git a/ndp.c b/ndp.c
index 37bf7a3..b664034 100644
--- a/ndp.c
+++ b/ndp.c
@@ -256,7 +256,7 @@ static void ndp_ra(const struct ctx *c, const struct in6_addr *dst)
 
 	ptr = &ra.var[0];
 
-	if (c->mtu != -1) {
+	if (c->mtu) {
 		struct opt_mtu *mtu = (struct opt_mtu *)ptr;
 		*mtu = (struct opt_mtu) {
 			.header = {
@@ -328,6 +328,7 @@ static void ndp_ra(const struct ctx *c, const struct in6_addr *dst)
 
 	memcpy(&ra.source_ll.mac, c->our_tap_mac, ETH_ALEN);
 
+	/* NOLINTNEXTLINE(clang-analyzer-security.PointerSub) */
 	ndp_send(c, dst, &ra, ptr - (unsigned char *)&ra);
 }
 
diff --git a/netlink.c b/netlink.c
index 37d8b5b..a052504 100644
--- a/netlink.c
+++ b/netlink.c
@@ -355,7 +355,7 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
  *
  * Return: true if a gateway was found, false otherwise
  */
-bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
+static bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
 {
 	int nh_len = RTA_PAYLOAD(rta);
 	struct rtnexthop *rtnh;
diff --git a/packet.c b/packet.c
index 03a11e6..72c6158 100644
--- a/packet.c
+++ b/packet.c
@@ -23,51 +23,73 @@
 #include "log.h"
 
 /**
- * packet_check_range() - Check if a packet memory range is valid
+ * packet_check_range() - Check if a memory range is valid for a pool
  * @p:		Packet pool
- * @offset:	Offset of data range in packet descriptor
+ * @ptr:	Start of desired data range
  * @len:	Length of desired data range
- * @start:	Start of the packet descriptor
  * @func:	For tracing: name of calling function
  * @line:	For tracing: caller line of function call
  *
  * Return: 0 if the range is valid, -1 otherwise
  */
-static int packet_check_range(const struct pool *p, size_t offset, size_t len,
-			      const char *start, const char *func, int line)
+static int packet_check_range(const struct pool *p, const char *ptr, size_t len,
+			      const char *func, int line)
 {
+	if (len > PACKET_MAX_LEN) {
+		debug("packet range length %zu (max %zu), %s:%i",
+		      len, PACKET_MAX_LEN, func, line);
+		return -1;
+	}
+
 	if (p->buf_size == 0) {
 		int ret;
 
-		ret = vu_packet_check_range((void *)p->buf, offset, len, start);
+		ret = vu_packet_check_range((void *)p->buf, ptr, len);
 
 		if (ret == -1)
-			trace("cannot find region, %s:%i", func, line);
+			debug("cannot find region, %s:%i", func, line);
 
 		return ret;
 	}
 
-	if (start < p->buf) {
-		trace("packet start %p before buffer start %p, "
-		      "%s:%i", (void *)start, (void *)p->buf, func, line);
+	if (ptr < p->buf) {
+		debug("packet range start %p before buffer start %p, %s:%i",
+		      (void *)ptr, (void *)p->buf, func, line);
 		return -1;
 	}
 
-	if (start + len + offset > p->buf + p->buf_size) {
-		trace("packet offset plus length %zu from size %zu, "
-		      "%s:%i", start - p->buf + len + offset,
-		      p->buf_size, func, line);
+	if (len > p->buf_size) {
+		debug("packet range length %zu larger than buffer %zu, %s:%i",
+		      len, p->buf_size, func, line);
+		return -1;
+	}
+
+	if ((size_t)(ptr - p->buf) > p->buf_size - len) {
+		debug("packet range %p, len %zu after buffer end %p, %s:%i",
+		      (void *)ptr, len, (void *)(p->buf + p->buf_size),
+		      func, line);
 		return -1;
 	}
 
 	return 0;
 }
+/**
+ * pool_full() - Is a packet pool full?
+ * @p:		Pointer to packet pool
+ *
+ * Return: true if the pool is full, false if more packets can be added
+ */
+bool pool_full(const struct pool *p)
+{
+	return p->count >= p->size;
+}
+
 /**
  * packet_add_do() - Add data as packet descriptor to given pool
  * @p:		Existing pool
  * @len:	Length of new descriptor
  * @start:	Start of data
- * @func:	For tracing: name of calling function, NULL means no trace()
+ * @func:	For tracing: name of calling function
  * @line:	For tracing: caller line of function call
  */
 void packet_add_do(struct pool *p, size_t len, const char *start,
@@ -75,26 +97,63 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
 {
 	size_t idx = p->count;
 
-	if (idx >= p->size) {
-		trace("add packet index %zu to pool with size %zu, %s:%i",
+	if (pool_full(p)) {
+		debug("add packet index %zu to pool with size %zu, %s:%i",
 		      idx, p->size, func, line);
 		return;
 	}
 
-	if (packet_check_range(p, 0, len, start, func, line))
+	if (packet_check_range(p, start, len, func, line))
 		return;
 
-	if (len > UINT16_MAX) {
-		trace("add packet length %zu, %s:%i", len, func, line);
-		return;
-	}
-
 	p->pkt[idx].iov_base = (void *)start;
 	p->pkt[idx].iov_len = len;
 
 	p->count++;
 }
 
+/**
+ * packet_get_try_do() - Get data range from packet descriptor from given pool
+ * @p:		Packet pool
+ * @idx:	Index of packet descriptor in pool
+ * @offset:	Offset of data range in packet descriptor
+ * @len:	Length of desired data range
+ * @left:	Length of available data after range, set on return, can be NULL
+ * @func:	For tracing: name of calling function
+ * @line:	For tracing: caller line of function call
+ *
+ * Return: pointer to start of data range, NULL on invalid range or descriptor
+ */
+void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset,
+			size_t len, size_t *left, const char *func, int line)
+{
+	char *ptr;
+
+	ASSERT_WITH_MSG(p->count <= p->size,
+			"Corrupt pool count: %zu, size: %zu, %s:%i",
+			p->count, p->size, func, line);
+
+	if (idx >= p->count) {
+		debug("packet %zu from pool count: %zu, %s:%i",
+		      idx, p->count, func, line);
+		return NULL;
+	}
+
+	if (offset > p->pkt[idx].iov_len ||
+	    len > (p->pkt[idx].iov_len - offset))
+		return NULL;
+
+	ptr = (char *)p->pkt[idx].iov_base + offset;
+
+	ASSERT_WITH_MSG(!packet_check_range(p, ptr, len, func, line),
+			"Corrupt packet pool, %s:%i", func, line);
+
+	if (left)
+		*left = p->pkt[idx].iov_len - offset - len;
+
+	return ptr;
+}
+
 /**
  * packet_get_do() - Get data range from packet descriptor from given pool
  * @p:		Packet pool
@@ -102,47 +161,24 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
  * @offset:	Offset of data range in packet descriptor
  * @len:	Length of desired data range
  * @left:	Length of available data after range, set on return, can be NULL
- * @func:	For tracing: name of calling function, NULL means no trace()
+ * @func:	For tracing: name of calling function
  * @line:	For tracing: caller line of function call
  *
- * Return: pointer to start of data range, NULL on invalid range or descriptor
+ * Return: as packet_get_try_do() but log a trace message when returning NULL
  */
-void *packet_get_do(const struct pool *p, size_t idx, size_t offset,
-		    size_t len, size_t *left, const char *func, int line)
+void *packet_get_do(const struct pool *p, const size_t idx,
+		    size_t offset, size_t len, size_t *left,
+		    const char *func, int line)
 {
-	if (idx >= p->size || idx >= p->count) {
-		if (func) {
-			trace("packet %zu from pool size: %zu, count: %zu, "
-			      "%s:%i", idx, p->size, p->count, func, line);
-		}
-		return NULL;
+	void *r = packet_get_try_do(p, idx, offset, len, left, func, line);
+
+	if (!r) {
+		trace("missing packet data length %zu, offset %zu from "
+		      "length %zu, %s:%i",
+		      len, offset, p->pkt[idx].iov_len, func, line);
 	}
 
-	if (len > UINT16_MAX) {
-		if (func) {
-			trace("packet data length %zu, %s:%i",
-			      len, func, line);
-		}
-		return NULL;
-	}
-
-	if (len + offset > p->pkt[idx].iov_len) {
-		if (func) {
-			trace("data length %zu, offset %zu from length %zu, "
-			      "%s:%i", len, offset, p->pkt[idx].iov_len,
-			      func, line);
-		}
-		return NULL;
-	}
-
-	if (packet_check_range(p, offset, len, p->pkt[idx].iov_base,
-			       func, line))
-		return NULL;
-
-	if (left)
-		*left = p->pkt[idx].iov_len - offset - len;
-
-	return (char *)p->pkt[idx].iov_base + offset;
+	return r;
 }
 
 /**
diff --git a/packet.h b/packet.h
index 3f70e94..c94780a 100644
--- a/packet.h
+++ b/packet.h
@@ -6,6 +6,11 @@
 #ifndef PACKET_H
 #define PACKET_H
 
+#include <stdbool.h>
+
+/* Maximum size of a single packet stored in pool, including headers */
+#define PACKET_MAX_LEN	((size_t)UINT16_MAX)
+
 /**
  * struct pool - Generic pool of packets stored in a buffer
  * @buf:	Buffer storing packet descriptors,
@@ -21,27 +26,29 @@ struct pool {
 	size_t buf_size;
 	size_t size;
 	size_t count;
-	struct iovec pkt[1];
+	struct iovec pkt[];
 };
 
-int vu_packet_check_range(void *buf, size_t offset, size_t len,
-			  const char *start);
+int vu_packet_check_range(void *buf, const char *ptr, size_t len);
 void packet_add_do(struct pool *p, size_t len, const char *start,
 		   const char *func, int line);
+void *packet_get_try_do(const struct pool *p, const size_t idx,
+			size_t offset, size_t len, size_t *left,
+			const char *func, int line);
 void *packet_get_do(const struct pool *p, const size_t idx,
 		    size_t offset, size_t len, size_t *left,
 		    const char *func, int line);
+bool pool_full(const struct pool *p);
 void pool_flush(struct pool *p);
 
 #define packet_add(p, len, start)					\
 	packet_add_do(p, len, start, __func__, __LINE__)
 
+#define packet_get_try(p, idx, offset, len, left)			\
+	packet_get_try_do(p, idx, offset, len, left, __func__, __LINE__)
 #define packet_get(p, idx, offset, len, left)				\
 	packet_get_do(p, idx, offset, len, left, __func__, __LINE__)
 
-#define packet_get_try(p, idx, offset, len, left)			\
-	packet_get_do(p, idx, offset, len, left, NULL, 0)
-
 #define PACKET_POOL_DECL(_name, _size, _buf)				\
 struct _name ## _t {							\
 	char *buf;							\
diff --git a/passt-repair.1 b/passt-repair.1
index 7c1b140..e65aadd 100644
--- a/passt-repair.1
+++ b/passt-repair.1
@@ -16,13 +16,17 @@
 .B passt-repair
 is a privileged helper setting and clearing repair mode on TCP sockets on behalf
 of \fBpasst\fR(1), as instructed via single-byte commands over a UNIX domain
-socket, specified by \fIPATH\fR.
+socket.
 
 It can be used to migrate TCP connections between guests without granting
 additional capabilities to \fBpasst\fR(1) itself: to migrate TCP connections,
 \fBpasst\fR(1) leverages repair mode, which needs the \fBCAP_NET_ADMIN\fR
 capability (see \fBcapabilities\fR(7)) to be set or cleared.
 
+If \fIPATH\fR represents a UNIX domain socket, \fBpasst-repair\fR(1) attempts to
+connect to it. If it is a directory, \fBpasst-repair\fR(1) waits until a file
+ending with \fI.repair\fR appears in it, and then attempts to connect to it.
+
 .SH PROTOCOL
 
 \fBpasst-repair\fR(1) connects to \fBpasst\fR(1) using the socket specified via
diff --git a/passt-repair.c b/passt-repair.c
index e0c366e..ff1c44f 100644
--- a/passt-repair.c
+++ b/passt-repair.c
@@ -16,11 +16,14 @@
  * off. Reply by echoing the command. Exit on EOF.
  */
 
+#include <sys/inotify.h>
 #include <sys/prctl.h>
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/stat.h>
 #include <sys/un.h>
 #include <errno.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -39,6 +42,8 @@
 #include "seccomp_repair.h"
 
 #define SCM_MAX_FD 253 /* From Linux kernel (include/net/scm.h), not in UAPI */
+#define REPAIR_EXT		".repair"
+#define REPAIR_EXT_LEN		strlen(REPAIR_EXT)
 
 /**
  * main() - Entry point and whole program with loop
@@ -51,6 +56,9 @@
  * #syscalls:repair socket s390x:socketcall i686:socketcall
  * #syscalls:repair recvfrom recvmsg arm:recv ppc64le:recv
  * #syscalls:repair sendto sendmsg arm:send ppc64le:send
+ * #syscalls:repair stat|statx stat64|statx statx
+ * #syscalls:repair fstat|fstat64 newfstatat|fstatat64
+ * #syscalls:repair inotify_init1 inotify_add_watch
  */
 int main(int argc, char **argv)
 {
@@ -58,12 +66,14 @@ int main(int argc, char **argv)
 	     __attribute__ ((aligned(__alignof__(struct cmsghdr))));
 	struct sockaddr_un a = { AF_UNIX, "" };
 	int fds[SCM_MAX_FD], s, ret, i, n = 0;
+	bool inotify_dir = false;
 	struct sock_fprog prog;
 	int8_t cmd = INT8_MAX;
 	struct cmsghdr *cmsg;
 	struct msghdr msg;
 	struct iovec iov;
 	size_t cmsg_len;
+	struct stat sb;
 	int op;
 
 	prctl(PR_SET_DUMPABLE, 0);
@@ -90,19 +100,96 @@ int main(int argc, char **argv)
 		_exit(2);
 	}
 
-	ret = snprintf(a.sun_path, sizeof(a.sun_path), "%s", argv[1]);
-	if (ret <= 0 || ret >= (int)sizeof(a.sun_path)) {
-		fprintf(stderr, "Invalid socket path: %s\n", argv[1]);
-		_exit(2);
-	}
-
 	if ((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
 		fprintf(stderr, "Failed to create AF_UNIX socket: %i\n", errno);
 		_exit(1);
 	}
 
-	if (connect(s, (struct sockaddr *)&a, sizeof(a))) {
-		fprintf(stderr, "Failed to connect to %s: %s\n", argv[1],
+	if ((stat(argv[1], &sb))) {
+		fprintf(stderr, "Can't stat() %s: %i\n", argv[1], errno);
+		_exit(1);
+	}
+
+	if ((sb.st_mode & S_IFMT) == S_IFDIR) {
+		char buf[sizeof(struct inotify_event) + NAME_MAX + 1]
+		   __attribute__ ((aligned(__alignof__(struct inotify_event))));
+		const struct inotify_event *ev = NULL;
+		char path[PATH_MAX + 1];
+		bool found = false;
+		ssize_t n;
+		int fd;
+
+		if ((fd = inotify_init1(IN_CLOEXEC)) < 0) {
+			fprintf(stderr, "inotify_init1: %i\n", errno);
+			_exit(1);
+		}
+
+		if (inotify_add_watch(fd, argv[1], IN_CREATE) < 0) {
+			fprintf(stderr, "inotify_add_watch: %i\n", errno);
+			_exit(1);
+		}
+
+		do {
+			char *p;
+
+			n = read(fd, buf, sizeof(buf));
+			if (n < 0) {
+				fprintf(stderr, "inotify read: %i", errno);
+				_exit(1);
+			}
+			buf[n - 1] = '\0';
+
+			if (n < (ssize_t)sizeof(*ev)) {
+				fprintf(stderr, "Short inotify read: %zi", n);
+				continue;
+			}
+
+			for (p = buf; p < buf + n; p += sizeof(*ev) + ev->len) {
+				ev = (const struct inotify_event *)p;
+
+				if (ev->len >= REPAIR_EXT_LEN &&
+				    !memcmp(ev->name +
+					    strnlen(ev->name, ev->len) -
+					    REPAIR_EXT_LEN,
+					    REPAIR_EXT, REPAIR_EXT_LEN)) {
+					found = true;
+					break;
+				}
+			}
+		} while (!found);
+
+		if (ev->len > NAME_MAX + 1 || ev->name[ev->len - 1] != '\0') {
+			fprintf(stderr, "Invalid filename from inotify\n");
+			_exit(1);
+		}
+
+		snprintf(path, sizeof(path), "%s/%s", argv[1], ev->name);
+		if ((stat(path, &sb))) {
+			fprintf(stderr, "Can't stat() %s: %i\n", path, errno);
+			_exit(1);
+		}
+
+		ret = snprintf(a.sun_path, sizeof(a.sun_path), "%s", path);
+		inotify_dir = true;
+	} else {
+		ret = snprintf(a.sun_path, sizeof(a.sun_path), "%s", argv[1]);
+	}
+
+	if (ret <= 0 || ret >= (int)sizeof(a.sun_path)) {
+		fprintf(stderr, "Invalid socket path");
+		_exit(2);
+	}
+
+	if ((sb.st_mode & S_IFMT) != S_IFSOCK) {
+		fprintf(stderr, "%s is not a socket\n", a.sun_path);
+		_exit(2);
+	}
+
+	while (connect(s, (struct sockaddr *)&a, sizeof(a))) {
+		if (inotify_dir && errno == ECONNREFUSED)
+			continue;
+
+		fprintf(stderr, "Failed to connect to %s: %s\n", a.sun_path,
 			strerror(errno));
 		_exit(1);
 	}
diff --git a/passt.c b/passt.c
index 68d1a28..388d10f 100644
--- a/passt.c
+++ b/passt.c
@@ -68,7 +68,7 @@ char *epoll_type_str[] = {
 	[EPOLL_TYPE_TCP_LISTEN]		= "listening TCP socket",
 	[EPOLL_TYPE_TCP_TIMER]		= "TCP timer",
 	[EPOLL_TYPE_UDP_LISTEN]		= "listening UDP socket",
-	[EPOLL_TYPE_UDP_REPLY]		= "UDP reply socket",
+	[EPOLL_TYPE_UDP]		= "UDP flow socket",
 	[EPOLL_TYPE_PING]	= "ICMP/ICMPv6 ping socket",
 	[EPOLL_TYPE_NSQUIT_INOTIFY]	= "namespace inotify watch",
 	[EPOLL_TYPE_NSQUIT_TIMER]	= "namespace timer watch",
@@ -166,7 +166,7 @@ void proto_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
  *
  * #syscalls exit_group
  */
-void exit_handler(int signal)
+static void exit_handler(int signal)
 {
 	(void)signal;
 
@@ -191,7 +191,6 @@ int main(int argc, char **argv)
 {
 	struct epoll_event events[EPOLL_EVENTS];
 	int nfds, i, devnull_fd = -1;
-	char argv0[PATH_MAX], *name;
 	struct ctx c = { 0 };
 	struct rlimit limit;
 	struct timespec now;
@@ -213,27 +212,18 @@ int main(int argc, char **argv)
 	sigaction(SIGTERM, &sa, NULL);
 	sigaction(SIGQUIT, &sa, NULL);
 
-	if (argc < 1)
-		_exit(EXIT_FAILURE);
+	c.mode = conf_mode(argc, argv);
 
-	strncpy(argv0, argv[0], PATH_MAX - 1);
-	name = basename(argv0);
-	if (strstr(name, "pasta")) {
+	if (c.mode == MODE_PASTA) {
 		sa.sa_handler = pasta_child_handler;
 		if (sigaction(SIGCHLD, &sa, NULL))
 			die_perror("Couldn't install signal handlers");
-
-		c.mode = MODE_PASTA;
-	} else if (strstr(name, "passt")) {
-		c.mode = MODE_PASST;
-	} else {
-		_exit(EXIT_FAILURE);
 	}
 
 	if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
 		die_perror("Couldn't set disposition for SIGPIPE");
 
-	madvise(pkt_buf, TAP_BUF_BYTES, MADV_HUGEPAGE);
+	madvise(pkt_buf, sizeof(pkt_buf), MADV_HUGEPAGE);
 
 	c.epollfd = epoll_create1(EPOLL_CLOEXEC);
 	if (c.epollfd == -1)
@@ -349,8 +339,8 @@ loop:
 		case EPOLL_TYPE_UDP_LISTEN:
 			udp_listen_sock_handler(&c, ref, eventmask, &now);
 			break;
-		case EPOLL_TYPE_UDP_REPLY:
-			udp_reply_sock_handler(&c, ref, eventmask, &now);
+		case EPOLL_TYPE_UDP:
+			udp_sock_handler(&c, ref, eventmask, &now);
 			break;
 		case EPOLL_TYPE_PING:
 			icmp_sock_handler(&c, ref);
diff --git a/passt.h b/passt.h
index 1f0dab5..8693794 100644
--- a/passt.h
+++ b/passt.h
@@ -69,12 +69,9 @@ union epoll_ref {
 static_assert(sizeof(union epoll_ref) <= sizeof(union epoll_data),
 	      "epoll_ref must have same size as epoll_data");
 
-#define TAP_BUF_BYTES							\
-	ROUND_DOWN(((ETH_MAX_MTU + sizeof(uint32_t)) * 128), PAGE_SIZE)
-#define TAP_MSGS							\
-	DIV_ROUND_UP(TAP_BUF_BYTES, ETH_ZLEN - 2 * ETH_ALEN + sizeof(uint32_t))
+/* Large enough for ~128 maximum size frames */
+#define PKT_BUF_BYTES		(8UL << 20)
 
-#define PKT_BUF_BYTES		MAX(TAP_BUF_BYTES, 0)
 extern char pkt_buf		[PKT_BUF_BYTES];
 
 extern char *epoll_type_str[];
@@ -274,6 +271,8 @@ struct ctx {
 	int fd_repair;
 	unsigned char our_tap_mac[ETH_ALEN];
 	unsigned char guest_mac[ETH_ALEN];
+	uint16_t mtu;
+
 	uint64_t hash_secret[2];
 
 	int ifi4;
@@ -298,7 +297,6 @@ struct ctx {
 	int no_icmp;
 	struct icmp_ctx icmp;
 
-	int mtu;
 	int no_dns;
 	int no_dns_search;
 	int no_dhcp_dns;
diff --git a/pasta.c b/pasta.c
index 585a51c..017fa32 100644
--- a/pasta.c
+++ b/pasta.c
@@ -319,7 +319,7 @@ void pasta_ns_conf(struct ctx *c)
 	if (c->pasta_conf_ns) {
 		unsigned int flags = IFF_UP;
 
-		if (c->mtu != -1)
+		if (c->mtu)
 			nl_link_set_mtu(nl_sock_ns, c->pasta_ifi, c->mtu);
 
 		if (c->ifi6) /* Avoid duplicate address detection on link up */
@@ -498,17 +498,23 @@ void pasta_netns_quit_init(const struct ctx *c)
  */
 void pasta_netns_quit_inotify_handler(struct ctx *c, int inotify_fd)
 {
-	char buf[sizeof(struct inotify_event) + NAME_MAX + 1];
-	const struct inotify_event *in_ev = (struct inotify_event *)buf;
+	char buf[sizeof(struct inotify_event) + NAME_MAX + 1]
+		__attribute__ ((aligned(__alignof__(struct inotify_event))));
+	const struct inotify_event *ev;
+	ssize_t n;
+	char *p;
 
-	if (read(inotify_fd, buf, sizeof(buf)) < (ssize_t)sizeof(*in_ev))
+	if ((n = read(inotify_fd, buf, sizeof(buf))) < (ssize_t)sizeof(*ev))
 		return;
 
-	if (strncmp(in_ev->name, c->netns_base, sizeof(c->netns_base)))
-		return;
+	for (p = buf; p < buf + n; p += sizeof(*ev) + ev->len) {
+		ev = (const struct inotify_event *)p;
 
-	info("Namespace %s is gone, exiting", c->netns_base);
-	_exit(EXIT_SUCCESS);
+		if (!strncmp(ev->name, c->netns_base, sizeof(c->netns_base))) {
+			info("Namespace %s is gone, exiting", c->netns_base);
+			_exit(EXIT_SUCCESS);
+		}
+	}
 }
 
 /**
diff --git a/pcap.c b/pcap.c
index 3d623cf..e95aa6f 100644
--- a/pcap.c
+++ b/pcap.c
@@ -33,33 +33,12 @@
 #include "log.h"
 #include "pcap.h"
 #include "iov.h"
+#include "tap.h"
 
 #define PCAP_VERSION_MINOR 4
 
 static int pcap_fd = -1;
 
-/* See pcap.h from libpcap, or pcap-savefile(5) */
-static const struct {
-	uint32_t magic;
-#define PCAP_MAGIC		0xa1b2c3d4
-
-	uint16_t major;
-#define PCAP_VERSION_MAJOR	2
-
-	uint16_t minor;
-#define PCAP_VERSION_MINOR	4
-
-	int32_t thiszone;
-	uint32_t sigfigs;
-	uint32_t snaplen;
-
-	uint32_t linktype;
-#define PCAP_LINKTYPE_ETHERNET	1
-} pcap_hdr = {
-	PCAP_MAGIC, PCAP_VERSION_MAJOR, PCAP_VERSION_MINOR, 0, 0, ETH_MAX_MTU,
-	PCAP_LINKTYPE_ETHERNET
-};
-
 struct pcap_pkthdr {
 	uint32_t tv_sec;
 	uint32_t tv_usec;
@@ -162,6 +141,29 @@ void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset)
  */
 void pcap_init(struct ctx *c)
 {
+	/* See pcap.h from libpcap, or pcap-savefile(5) */
+#define PCAP_MAGIC		0xa1b2c3d4
+#define PCAP_VERSION_MAJOR	2
+#define PCAP_VERSION_MINOR	4
+#define PCAP_LINKTYPE_ETHERNET	1
+	const struct {
+		uint32_t magic;
+		uint16_t major;
+		uint16_t minor;
+
+		int32_t thiszone;
+		uint32_t sigfigs;
+		uint32_t snaplen;
+
+		uint32_t linktype;
+	} pcap_hdr = {
+		.magic = PCAP_MAGIC,
+		.major = PCAP_VERSION_MAJOR,
+		.minor = PCAP_VERSION_MINOR,
+		.snaplen = tap_l2_max_len(c),
+		.linktype = PCAP_LINKTYPE_ETHERNET
+	};
+
 	if (pcap_fd != -1)
 		return;
 
diff --git a/repair.c b/repair.c
index 3ee089f..149fe51 100644
--- a/repair.c
+++ b/repair.c
@@ -27,6 +27,10 @@
 
 #define SCM_MAX_FD 253 /* From Linux kernel (include/net/scm.h), not in UAPI */
 
+/* Wait for a while for TCP_REPAIR helper to connect if it's not there yet */
+#define REPAIR_ACCEPT_TIMEOUT_MS	10
+#define REPAIR_ACCEPT_TIMEOUT_US	(REPAIR_ACCEPT_TIMEOUT_MS * 1000)
+
 /* Pending file descriptors for next repair_flush() call, or command change */
 static int repair_fds[SCM_MAX_FD];
 
@@ -138,6 +142,34 @@ void repair_handler(struct ctx *c, uint32_t events)
 	repair_close(c);
 }
 
+/**
+ * repair_wait() - Wait (with timeout) for TCP_REPAIR helper to connect
+ * @c:		Execution context
+ */
+void repair_wait(struct ctx *c)
+{
+	struct timeval tv = { .tv_sec = 0,
+			      .tv_usec = (long)(REPAIR_ACCEPT_TIMEOUT_US) };
+	static_assert(REPAIR_ACCEPT_TIMEOUT_US < 1000 * 1000,
+		      ".tv_usec is greater than 1000 * 1000");
+
+	if (c->fd_repair >= 0 || c->fd_repair_listen == -1)
+		return;
+
+	if (setsockopt(c->fd_repair_listen, SOL_SOCKET, SO_RCVTIMEO,
+		       &tv, sizeof(tv))) {
+		err_perror("Set timeout on TCP_REPAIR listening socket");
+		return;
+	}
+
+	repair_listen_handler(c, EPOLLIN);
+
+	tv.tv_usec = 0;
+	if (setsockopt(c->fd_repair_listen, SOL_SOCKET, SO_RCVTIMEO,
+		       &tv, sizeof(tv)))
+		err_perror("Clear timeout on TCP_REPAIR listening socket");
+}
+
 /**
  * repair_flush() - Flush current set of sockets to helper, with current command
  * @c:		Execution context
diff --git a/repair.h b/repair.h
index de279d6..1d37922 100644
--- a/repair.h
+++ b/repair.h
@@ -10,6 +10,7 @@ void repair_sock_init(const struct ctx *c);
 void repair_listen_handler(struct ctx *c, uint32_t events);
 void repair_handler(struct ctx *c, uint32_t events);
 void repair_close(struct ctx *c);
+void repair_wait(struct ctx *c);
 int repair_flush(struct ctx *c);
 int repair_set(struct ctx *c, int s, int cmd);
 
diff --git a/seccomp.sh b/seccomp.sh
index 4c521ae..a7bc417 100755
--- a/seccomp.sh
+++ b/seccomp.sh
@@ -255,7 +255,7 @@ for __p in ${__profiles}; do
 	__calls="${__calls} ${EXTRA_SYSCALLS:-}"
 	__calls="$(filter ${__calls})"
 
-	cols="$(stty -a | sed -n 's/.*columns \([0-9]*\).*/\1/p' || :)" 2>/dev/null
+	cols="$(stty -a 2>/dev/null | sed -n 's/.*columns \([0-9]*\).*/\1/p' || :)" 2>/dev/null
 	case $cols in [0-9]*) col_args="-w ${cols}";; *) col_args="";; esac
 	echo "seccomp profile ${__p} allows: ${__calls}" | tr '\n' ' ' | fmt -t ${col_args}
 
diff --git a/tap.c b/tap.c
index d0673e5..d630f6d 100644
--- a/tap.c
+++ b/tap.c
@@ -62,13 +62,64 @@
 #include "vhost_user.h"
 #include "vu_common.h"
 
+/* Maximum allowed frame lengths (including L2 header) */
+
+/* Verify that an L2 frame length limit is large enough to contain the header,
+ * but small enough to fit in the packet pool
+ */
+#define CHECK_FRAME_LEN(len) \
+	static_assert((len) >= ETH_HLEN && (len) <= PACKET_MAX_LEN,	\
+		      #len " has bad value")
+
+CHECK_FRAME_LEN(L2_MAX_LEN_PASTA);
+CHECK_FRAME_LEN(L2_MAX_LEN_PASST);
+CHECK_FRAME_LEN(L2_MAX_LEN_VU);
+
+/* We try size the packet pools so that we can use a single batch for the entire
+ * packet buffer.  This might be exceeded for vhost-user, though, which uses its
+ * own buffers rather than pkt_buf.
+ *
+ * This is just a tuning parameter, the code will work with slightly more
+ * overhead if it's incorrect.  So, we estimate based on the minimum practical
+ * frame size - an empty UDP datagram - rather than the minimum theoretical
+ * frame size.
+ *
+ * FIXME: Profile to work out how big this actually needs to be to amortise
+ *        per-batch syscall overheads
+ */
+#define TAP_MSGS_IP4							\
+	DIV_ROUND_UP(sizeof(pkt_buf),					\
+		     ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr))
+#define TAP_MSGS_IP6							\
+	DIV_ROUND_UP(sizeof(pkt_buf),					\
+		     ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
+
 /* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */
-static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS, pkt_buf);
-static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS, pkt_buf);
+static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS_IP4, pkt_buf);
+static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS_IP6, pkt_buf);
 
 #define TAP_SEQS		128 /* Different L4 tuples in one batch */
 #define FRAGMENT_MSG_RATE	10  /* # seconds between fragment warnings */
 
+/**
+ * tap_l2_max_len() - Maximum frame size (including L2 header) for current mode
+ * @c:		Execution context
+ */
+unsigned long tap_l2_max_len(const struct ctx *c)
+{
+	/* NOLINTBEGIN(bugprone-branch-clone): values can be the same */
+	switch (c->mode) {
+	case MODE_PASST:
+		return L2_MAX_LEN_PASST;
+	case MODE_PASTA:
+		return L2_MAX_LEN_PASTA;
+	case MODE_VU:
+		return L2_MAX_LEN_VU;
+	}
+	/* NOLINTEND(bugprone-branch-clone) */
+	ASSERT(0);
+}
+
 /**
  * tap_send_single() - Send a single frame
  * @c:		Execution context
@@ -122,7 +173,7 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
  *
  * Return: pointer at which to write the packet's payload
  */
-static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto)
+void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto)
 {
 	struct ethhdr *eh = (struct ethhdr *)buf;
 
@@ -143,8 +194,8 @@ static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto)
  *
  * Return: pointer at which to write the packet's payload
  */
-static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
-			   struct in_addr dst, size_t l4len, uint8_t proto)
+void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
+		    struct in_addr dst, size_t l4len, uint8_t proto)
 {
 	uint16_t l3len = l4len + sizeof(*ip4h);
 
@@ -153,17 +204,17 @@ static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
 	ip4h->tos = 0;
 	ip4h->tot_len = htons(l3len);
 	ip4h->id = 0;
-	ip4h->frag_off = 0;
+	ip4h->frag_off = htons(IP_DF);
 	ip4h->ttl = 255;
 	ip4h->protocol = proto;
 	ip4h->saddr = src.s_addr;
 	ip4h->daddr = dst.s_addr;
 	ip4h->check = csum_ip4_header(l3len, proto, src, dst);
-	return ip4h + 1;
+	return (char *)ip4h + sizeof(*ip4h);
 }
 
 /**
- * tap_udp4_send() - Send UDP over IPv4 packet
+ * tap_push_uh4() - Build UDPv4 header with checksum
  * @c:		Execution context
  * @src:	IPv4 source address
  * @sport:	UDP source port
@@ -171,16 +222,14 @@ static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
  * @dport:	UDP destination port
  * @in:		UDP payload contents (not including UDP header)
  * @dlen:	UDP payload length (not including UDP header)
+ *
+ * Return: pointer at which to write the packet's payload
  */
-void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
+void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen)
 {
 	size_t l4len = dlen + sizeof(struct udphdr);
-	char buf[USHRT_MAX];
-	struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
-	struct udphdr *uh = tap_push_ip4h(ip4h, src, dst, l4len, IPPROTO_UDP);
-	char *data = (char *)(uh + 1);
 	const struct iovec iov = {
 		.iov_base = (void *)in,
 		.iov_len = dlen
@@ -191,8 +240,30 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 	uh->dest = htons(dport);
 	uh->len = htons(l4len);
 	csum_udp4(uh, src, dst, &payload);
-	memcpy(data, in, dlen);
+	return (char *)uh + sizeof(*uh);
+}
 
+/**
+ * tap_udp4_send() - Send UDP over IPv4 packet
+ * @c:		Execution context
+ * @src:	IPv4 source address
+ * @sport:	UDP source port
+ * @dst:	IPv4 destination address
+ * @dport:	UDP destination port
+ * @in:	UDP payload contents (not including UDP header)
+ * @dlen:	UDP payload length (not including UDP header)
+ */
+void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
+		   struct in_addr dst, in_port_t dport,
+		   const void *in, size_t dlen)
+{
+	size_t l4len = dlen + sizeof(struct udphdr);
+	char buf[USHRT_MAX];
+	struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
+	struct udphdr *uh = tap_push_ip4h(ip4h, src, dst, l4len, IPPROTO_UDP);
+	char *data = tap_push_uh4(uh, src, sport, dst, dport, in, dlen);
+
+	memcpy(data, in, dlen);
 	tap_send_single(c, buf, dlen + (data - buf));
 }
 
@@ -229,10 +300,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
  *
  * Return: pointer at which to write the packet's payload
  */
-static void *tap_push_ip6h(struct ipv6hdr *ip6h,
-			   const struct in6_addr *src,
-			   const struct in6_addr *dst,
-			   size_t l4len, uint8_t proto, uint32_t flow)
+void *tap_push_ip6h(struct ipv6hdr *ip6h,
+		    const struct in6_addr *src, const struct in6_addr *dst,
+		    size_t l4len, uint8_t proto, uint32_t flow)
 {
 	ip6h->payload_len = htons(l4len);
 	ip6h->priority = 0;
@@ -241,10 +311,40 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h,
 	ip6h->hop_limit = 255;
 	ip6h->saddr = *src;
 	ip6h->daddr = *dst;
-	ip6h->flow_lbl[0] = (flow >> 16) & 0xf;
-	ip6h->flow_lbl[1] = (flow >> 8) & 0xff;
-	ip6h->flow_lbl[2] = (flow >> 0) & 0xff;
-	return ip6h + 1;
+	ip6_set_flow_lbl(ip6h, flow);
+	return (char *)ip6h + sizeof(*ip6h);
+}
+
+/**
+ * tap_push_uh6() - Build UDPv6 header with checksum
+ * @c:		Execution context
+ * @src:	IPv6 source address
+ * @sport:	UDP source port
+ * @dst:	IPv6 destination address
+ * @dport:	UDP destination port
+ * @flow:	Flow label
+ * @in:		UDP payload contents (not including UDP header)
+ * @dlen:	UDP payload length (not including UDP header)
+ *
+ * Return: pointer at which to write the packet's payload
+ */
+void *tap_push_uh6(struct udphdr *uh,
+		   const struct in6_addr *src, in_port_t sport,
+		   const struct in6_addr *dst, in_port_t dport,
+		   void *in, size_t dlen)
+{
+	size_t l4len = dlen + sizeof(struct udphdr);
+	const struct iovec iov = {
+		.iov_base = in,
+		.iov_len = dlen
+	};
+	struct iov_tail payload = IOV_TAIL(&iov, 1, 0);
+
+	uh->source = htons(sport);
+	uh->dest = htons(dport);
+	uh->len = htons(l4len);
+	csum_udp6(uh, src, dst, &payload);
+	return (char *)uh + sizeof(*uh);
 }
 
 /**
@@ -255,7 +355,7 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h,
  * @dst:	IPv6 destination address
  * @dport:	UDP destination port
  * @flow:	Flow label
- * @in:		UDP payload contents (not including UDP header)
+ * @in:	UDP payload contents (not including UDP header)
  * @dlen:	UDP payload length (not including UDP header)
  */
 void tap_udp6_send(const struct ctx *c,
@@ -268,19 +368,9 @@ void tap_udp6_send(const struct ctx *c,
 	struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
 	struct udphdr *uh = tap_push_ip6h(ip6h, src, dst,
 					  l4len, IPPROTO_UDP, flow);
-	char *data = (char *)(uh + 1);
-	const struct iovec iov = {
-		.iov_base = in,
-		.iov_len = dlen
-	};
-	struct iov_tail payload = IOV_TAIL(&iov, 1, 0);
+	char *data = tap_push_uh6(uh, src, sport, dst, dport, in, dlen);
 
-	uh->source = htons(sport);
-	uh->dest = htons(dport);
-	uh->len = htons(l4len);
-	csum_udp6(uh, src, dst, &payload);
 	memcpy(data, in, dlen);
-
 	tap_send_single(c, buf, dlen + (data - buf));
 }
 
@@ -469,6 +559,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
  * struct l4_seq4_t - Message sequence for one protocol handler call, IPv4
  * @msgs:	Count of messages in sequence
  * @protocol:	Protocol number
+ * @ttl:	Time to live
  * @source:	Source port
  * @dest:	Destination port
  * @saddr:	Source address
@@ -477,6 +568,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
  */
 static struct tap4_l4_t {
 	uint8_t protocol;
+	uint8_t ttl;
 
 	uint16_t source;
 	uint16_t dest;
@@ -491,14 +583,17 @@ static struct tap4_l4_t {
  * struct l4_seq6_t - Message sequence for one protocol handler call, IPv6
  * @msgs:	Count of messages in sequence
  * @protocol:	Protocol number
+ * @flow_lbl:	IPv6 flow label
  * @source:	Source port
  * @dest:	Destination port
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @hop_limit:	Hop limit
  * @msg:	Array of messages that can be handled in a single call
  */
 static struct tap6_l4_t {
 	uint8_t protocol;
+	uint32_t flow_lbl :20;
 
 	uint16_t source;
 	uint16_t dest;
@@ -506,6 +601,8 @@ static struct tap6_l4_t {
 	struct in6_addr saddr;
 	struct in6_addr daddr;
 
+	uint8_t hop_limit;
+
 	struct pool_l4_t p;
 } tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
 
@@ -694,7 +791,8 @@ resume:
 #define L4_MATCH(iph, uh, seq)							\
 	((seq)->protocol == (iph)->protocol &&					\
 	 (seq)->source   == (uh)->source    && (seq)->dest  == (uh)->dest &&	\
-	 (seq)->saddr.s_addr == (iph)->saddr && (seq)->daddr.s_addr == (iph)->daddr)
+	 (seq)->saddr.s_addr == (iph)->saddr &&				\
+	 (seq)->daddr.s_addr == (iph)->daddr && (seq)->ttl == (iph)->ttl)
 
 #define L4_SET(iph, uh, seq)						\
 	do {								\
@@ -703,6 +801,7 @@ resume:
 		(seq)->dest		= (uh)->dest;			\
 		(seq)->saddr.s_addr	= (iph)->saddr;			\
 		(seq)->daddr.s_addr	= (iph)->daddr;			\
+		(seq)->ttl		= (iph)->ttl;			\
 	} while (0)
 
 		if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV)
@@ -744,14 +843,14 @@ append:
 			for (k = 0; k < p->count; )
 				k += tcp_tap_handler(c, PIF_TAP, AF_INET,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     0, p, k, now);
 		} else if (seq->protocol == IPPROTO_UDP) {
 			if (c->no_udp)
 				continue;
 			for (k = 0; k < p->count; )
 				k += udp_tap_handler(c, PIF_TAP, AF_INET,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     seq->ttl, p, k, now);
 		}
 	}
 
@@ -872,16 +971,20 @@ resume:
 		((seq)->protocol == (proto)                &&		\
 		 (seq)->source   == (uh)->source           &&		\
 		 (seq)->dest == (uh)->dest                 &&		\
+		 (seq)->flow_lbl == ip6_get_flow_lbl(ip6h) &&		\
 		 IN6_ARE_ADDR_EQUAL(&(seq)->saddr, saddr)  &&		\
-		 IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr))
+		 IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr)  &&		\
+		 (seq)->hop_limit == (ip6h)->hop_limit)
 
 #define L4_SET(ip6h, proto, uh, seq)					\
 	do {								\
 		(seq)->protocol	= (proto);				\
 		(seq)->source	= (uh)->source;				\
 		(seq)->dest	= (uh)->dest;				\
+		(seq)->flow_lbl	= ip6_get_flow_lbl(ip6h);		\
 		(seq)->saddr	= *saddr;				\
 		(seq)->daddr	= *daddr;				\
+		(seq)->hop_limit = (ip6h)->hop_limit;			\
 	} while (0)
 
 		if (seq && L4_MATCH(ip6h, proto, uh, seq) &&
@@ -925,14 +1028,14 @@ append:
 			for (k = 0; k < p->count; )
 				k += tcp_tap_handler(c, PIF_TAP, AF_INET6,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     seq->flow_lbl, p, k, now);
 		} else if (seq->protocol == IPPROTO_UDP) {
 			if (c->no_udp)
 				continue;
 			for (k = 0; k < p->count; )
 				k += udp_tap_handler(c, PIF_TAP, AF_INET6,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     seq->hop_limit, p, k, now);
 		}
 	}
 
@@ -967,8 +1070,10 @@ void tap_handler(struct ctx *c, const struct timespec *now)
  * @c:		Execution context
  * @l2len:	Total L2 packet length
  * @p:		Packet buffer
+ * @now:	Current timestamp
  */
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
+void tap_add_packet(struct ctx *c, ssize_t l2len, char *p,
+		    const struct timespec *now)
 {
 	const struct ethhdr *eh;
 
@@ -984,9 +1089,17 @@ void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
 	switch (ntohs(eh->h_proto)) {
 	case ETH_P_ARP:
 	case ETH_P_IP:
+		if (pool_full(pool_tap4)) {
+			tap4_handler(c, pool_tap4, now);
+			pool_flush(pool_tap4);
+		}
 		packet_add(pool_tap4, l2len, p);
 		break;
 	case ETH_P_IPV6:
+		if (pool_full(pool_tap6)) {
+			tap6_handler(c, pool_tap6, now);
+			pool_flush(pool_tap6);
+		}
 		packet_add(pool_tap6, l2len, p);
 		break;
 	default:
@@ -1037,7 +1150,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
 
 	do {
 		n = recv(c->fd_tap, pkt_buf + partial_len,
-			 TAP_BUF_BYTES - partial_len, MSG_DONTWAIT);
+			 sizeof(pkt_buf) - partial_len, MSG_DONTWAIT);
 	} while ((n < 0) && errno == EINTR);
 
 	if (n < 0) {
@@ -1054,7 +1167,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
 	while (n >= (ssize_t)sizeof(uint32_t)) {
 		uint32_t l2len = ntohl_unaligned(p);
 
-		if (l2len < sizeof(struct ethhdr) || l2len > ETH_MAX_MTU) {
+		if (l2len < sizeof(struct ethhdr) || l2len > L2_MAX_LEN_PASST) {
 			err("Bad frame size from guest, resetting connection");
 			tap_sock_reset(c);
 			return;
@@ -1067,7 +1180,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now)
 		p += sizeof(uint32_t);
 		n -= sizeof(uint32_t);
 
-		tap_add_packet(c, l2len, p);
+		tap_add_packet(c, l2len, p, now);
 
 		p += l2len;
 		n -= l2len;
@@ -1108,8 +1221,10 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
 
 	tap_flush_pools();
 
-	for (n = 0; n <= (ssize_t)(TAP_BUF_BYTES - ETH_MAX_MTU); n += len) {
-		len = read(c->fd_tap, pkt_buf + n, ETH_MAX_MTU);
+	for (n = 0;
+	     n <= (ssize_t)(sizeof(pkt_buf) - L2_MAX_LEN_PASTA);
+	     n += len) {
+		len = read(c->fd_tap, pkt_buf + n, L2_MAX_LEN_PASTA);
 
 		if (len == 0) {
 			die("EOF on tap device, exiting");
@@ -1127,10 +1242,10 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now)
 
 		/* Ignore frames of bad length */
 		if (len < (ssize_t)sizeof(struct ethhdr) ||
-		    len > (ssize_t)ETH_MAX_MTU)
+		    len > (ssize_t)L2_MAX_LEN_PASTA)
 			continue;
 
-		tap_add_packet(c, len, pkt_buf + n);
+		tap_add_packet(c, len, pkt_buf + n, now);
 	}
 
 	tap_handler(c, now);
@@ -1328,8 +1443,8 @@ void tap_sock_update_pool(void *base, size_t size)
 {
 	int i;
 
-	pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS, base, size);
-	pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS, base, size);
+	pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS_IP4, base, size);
+	pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS_IP6, base, size);
 
 	for (i = 0; i < TAP_SEQS; i++) {
 		tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, base, size);
diff --git a/tap.h b/tap.h
index dfbd8b9..6fe3d15 100644
--- a/tap.h
+++ b/tap.h
@@ -6,7 +6,32 @@
 #ifndef TAP_H
 #define TAP_H
 
-#define ETH_HDR_INIT(proto) { .h_proto = htons_constant(proto) }
+/** L2_MAX_LEN_PASTA - Maximum frame length for pasta mode (with L2 header)
+ *
+ * The kernel tuntap device imposes a maximum frame size of 65535 including
+ * 'hard_header_len' (14 bytes for L2 Ethernet in the case of "tap" mode).
+ */
+#define L2_MAX_LEN_PASTA	USHRT_MAX
+
+/** L2_MAX_LEN_PASST - Maximum frame length for passt mode (with L2 header)
+ *
+ * The only structural limit the QEMU socket protocol imposes on frames is
+ * (2^32-1) bytes, but that would be ludicrously long in practice.  For now,
+ * limit it somewhat arbitrarily to 65535 bytes.  FIXME: Work out an appropriate
+ * limit with more precision.
+ */
+#define L2_MAX_LEN_PASST	USHRT_MAX
+
+/** L2_MAX_LEN_VU - Maximum frame length for vhost-user mode (with L2 header)
+ *
+ * vhost-user allows multiple buffers per frame, each of which can be quite
+ * large, so the inherent frame size limit is rather large.  Much larger than is
+ * actually useful for IP.  For now limit arbitrarily to 65535 bytes. FIXME:
+ * Work out an appropriate limit with more precision.
+ */
+#define L2_MAX_LEN_VU		USHRT_MAX
+
+struct udphdr;
 
 /**
  * struct tap_hdr - tap backend specific headers
@@ -44,6 +69,23 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
 		thdr->vnet_len = htonl(l2len);
 }
 
+unsigned long tap_l2_max_len(const struct ctx *c);
+void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto);
+void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
+		     struct in_addr dst, size_t l4len, uint8_t proto);
+void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
+		   struct in_addr dst, in_port_t dport,
+		   const void *in, size_t dlen);
+void *tap_push_uh6(struct udphdr *uh,
+		   const struct in6_addr *src, in_port_t sport,
+		   const struct in6_addr *dst, in_port_t dport,
+		   void *in, size_t dlen);
+void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
+		    struct in_addr dst, size_t l4len, uint8_t proto);
+void *tap_push_ip6h(struct ipv6hdr *ip6h,
+		    const struct in6_addr *src,
+		    const struct in6_addr *dst,
+		    size_t l4len, uint8_t proto, uint32_t flow);
 void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 		   struct in_addr dst, in_port_t dport,
 		   const void *in, size_t dlen);
@@ -51,6 +93,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
 		    const void *in, size_t l4len);
 const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
 				     const struct in6_addr *src);
+void *tap_push_ip6h(struct ipv6hdr *ip6h,
+		    const struct in6_addr *src, const struct in6_addr *dst,
+		    size_t l4len, uint8_t proto, uint32_t flow);
 void tap_udp6_send(const struct ctx *c,
 		   const struct in6_addr *src, in_port_t sport,
 		   const struct in6_addr *dst, in_port_t dport,
@@ -74,6 +119,7 @@ void tap_sock_update_pool(void *base, size_t size);
 void tap_backend_init(struct ctx *c);
 void tap_flush_pools(void);
 void tap_handler(struct ctx *c, const struct timespec *now);
-void tap_add_packet(struct ctx *c, ssize_t l2len, char *p);
+void tap_add_packet(struct ctx *c, ssize_t l2len, char *p,
+		    const struct timespec *now);
 
 #endif /* TAP_H */
diff --git a/tcp.c b/tcp.c
index 98e1c6a..0ac298a 100644
--- a/tcp.c
+++ b/tcp.c
@@ -434,19 +434,20 @@ static struct tcp_tap_conn *conn_at_sidx(flow_sidx_t sidx)
 }
 
 /**
- * tcp_set_peek_offset() - Set SO_PEEK_OFF offset on a socket if supported
- * @s:          Socket to update
+ * tcp_set_peek_offset() - Set SO_PEEK_OFF offset on connection if supported
+ * @conn:	Pointer to the TCP connection structure
  * @offset:     Offset in bytes
  *
  * Return:      -1 when it fails, 0 otherwise.
  */
-int tcp_set_peek_offset(int s, int offset)
+int tcp_set_peek_offset(const struct tcp_tap_conn *conn, int offset)
 {
 	if (!peek_offset_cap)
 		return 0;
 
-	if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset))) {
-		err("Failed to set SO_PEEK_OFF to %i in socket %i", offset, s);
+	if (setsockopt(conn->sock, SOL_SOCKET, SO_PEEK_OFF,
+		       &offset, sizeof(offset))) {
+		flow_perror(conn, "Failed to set SO_PEEK_OFF to %i", offset);
 		return -1;
 	}
 	return 0;
@@ -551,8 +552,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 
 		fd = timerfd_create(CLOCK_MONOTONIC, 0);
 		if (fd == -1 || fd > FD_REF_MAX) {
-			flow_dbg(conn, "failed to get timer: %s",
-				 strerror_(errno));
+			flow_dbg_perror(conn, "failed to get timer");
 			if (fd > -1)
 				close(fd);
 			conn->timer = -1;
@@ -561,8 +561,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 		conn->timer = fd;
 
 		if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, conn->timer, &ev)) {
-			flow_dbg(conn, "failed to add timer: %s",
-				 strerror_(errno));
+			flow_dbg_perror(conn, "failed to add timer");
 			close(conn->timer);
 			conn->timer = -1;
 			return;
@@ -587,7 +586,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 		 (unsigned long long)it.it_value.tv_nsec / 1000 / 1000);
 
 	if (timerfd_settime(conn->timer, 0, &it, NULL))
-		flow_err(conn, "failed to set timer: %s", strerror_(errno));
+		flow_perror(conn, "failed to set timer");
 }
 
 /**
@@ -789,7 +788,8 @@ static void tcp_sock_set_nodelay(int s)
  * @th:		TCP header (updated)
  * @payload:	TCP payload
  */
-void tcp_update_csum(uint32_t psum, struct tcphdr *th, struct iov_tail *payload)
+static void tcp_update_csum(uint32_t psum, struct tcphdr *th,
+			    struct iov_tail *payload)
 {
 	th->check = 0;
 	psum = csum_unfolded(th, sizeof(*th), psum);
@@ -965,9 +965,7 @@ void tcp_fill_headers(const struct tcp_tap_conn *conn,
 		ip6h->version = 6;
 		ip6h->nexthdr = IPPROTO_TCP;
 
-		ip6h->flow_lbl[0] = (conn->sock >> 16) & 0xf;
-		ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff;
-		ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff;
+		ip6_set_flow_lbl(ip6h, conn->sock);
 
 		if (!no_tcp_csum) {
 			psum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
@@ -1141,7 +1139,7 @@ int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
 	if (flags & SYN) {
 		int mss;
 
-		if (c->mtu == -1) {
+		if (!c->mtu) {
 			mss = tinfo.tcpi_snd_mss;
 		} else {
 			mss = c->mtu - sizeof(struct tcphdr);
@@ -1216,8 +1214,8 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn)
 	if (conn->events == CLOSED)
 		return;
 
-	if (!tcp_send_flag(c, conn, RST))
-		conn_event(c, conn, CLOSED);
+	tcp_send_flag(c, conn, RST);
+	conn_event(c, conn, CLOSED);
 }
 
 /**
@@ -1386,10 +1384,10 @@ static void tcp_bind_outbound(const struct ctx *c,
 		if (bind(s, &bind_sa.sa, sl)) {
 			char sstr[INANY_ADDRSTRLEN];
 
-			flow_dbg(conn,
-				 "Can't bind TCP outbound socket to %s:%hu: %s",
-				 inany_ntop(&tgt->oaddr, sstr, sizeof(sstr)),
-				 tgt->oport, strerror_(errno));
+			flow_dbg_perror(conn,
+					"Can't bind TCP outbound socket to %s:%hu",
+					inany_ntop(&tgt->oaddr, sstr, sizeof(sstr)),
+					tgt->oport);
 		}
 	}
 
@@ -1398,9 +1396,9 @@ static void tcp_bind_outbound(const struct ctx *c,
 			if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
 				       c->ip4.ifname_out,
 				       strlen(c->ip4.ifname_out))) {
-				flow_dbg(conn, "Can't bind IPv4 TCP socket to"
-					 " interface %s: %s", c->ip4.ifname_out,
-					 strerror_(errno));
+				flow_dbg_perror(conn,
+						"Can't bind IPv4 TCP socket to interface %s",
+						c->ip4.ifname_out);
 			}
 		}
 	} else if (bind_sa.sa_family == AF_INET6) {
@@ -1408,9 +1406,9 @@ static void tcp_bind_outbound(const struct ctx *c,
 			if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
 				       c->ip6.ifname_out,
 				       strlen(c->ip6.ifname_out))) {
-				flow_dbg(conn, "Can't bind IPv6 TCP socket to"
-					 " interface %s: %s", c->ip6.ifname_out,
-					 strerror_(errno));
+				flow_dbg_perror(conn,
+						"Can't bind IPv6 TCP socket to interface %s",
+						c->ip6.ifname_out);
 			}
 		}
 	}
@@ -1548,9 +1546,8 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
 
 	if (c->mode == MODE_VU) { /* To rebind to same oport after migration */
 		sl = sizeof(sa);
-		if (!getsockname(s, &sa.sa, &sl))
-			inany_from_sockaddr(&tgt->oaddr, &tgt->oport, &sa);
-		else
+		if (getsockname(s, &sa.sa, &sl) ||
+		    inany_from_sockaddr(&tgt->oaddr, &tgt->oport, &sa) < 0)
 			err_perror("Can't get local address for socket %i", s);
 	}
 
@@ -1760,7 +1757,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			   "fast re-transmit, ACK: %u, previous sequence: %u",
 			   max_ack_seq, conn->seq_to_tap);
 		conn->seq_to_tap = max_ack_seq;
-		if (tcp_set_peek_offset(conn->sock, 0)) {
+		if (tcp_set_peek_offset(conn, 0)) {
 			tcp_rst(c, conn);
 			return -1;
 		}
@@ -1857,7 +1854,7 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
 	conn->seq_ack_to_tap = conn->seq_from_tap;
 
 	conn_event(c, conn, ESTABLISHED);
-	if (tcp_set_peek_offset(conn->sock, 0)) {
+	if (tcp_set_peek_offset(conn, 0)) {
 		tcp_rst(c, conn);
 		return;
 	}
@@ -1870,6 +1867,75 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
 	tcp_data_from_sock(c, conn);
 }
 
+/**
+ * tcp_rst_no_conn() - Send RST in response to a packet with no connection
+ * @c:		Execution context
+ * @af:		Address family, AF_INET or AF_INET6
+ * @saddr:	Source address of the packet we're responding to
+ * @daddr:	Destination address of the packet we're responding to
+ * @flow_lbl:	IPv6 flow label (ignored for IPv4)
+ * @th:		TCP header of the packet we're responding to
+ * @l4len:	Packet length, including TCP header
+ */
+static void tcp_rst_no_conn(const struct ctx *c, int af,
+			    const void *saddr, const void *daddr,
+			    uint32_t flow_lbl,
+			    const struct tcphdr *th, size_t l4len)
+{
+	struct iov_tail payload = IOV_TAIL(NULL, 0, 0);
+	struct tcphdr *rsth;
+	char buf[USHRT_MAX];
+	uint32_t psum = 0;
+	size_t rst_l2len;
+
+	/* Don't respond to RSTs without a connection */
+	if (th->rst)
+		return;
+
+	if (af == AF_INET) {
+		struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
+		const struct in_addr *rst_src = daddr;
+		const struct in_addr *rst_dst = saddr;
+
+		rsth = tap_push_ip4h(ip4h, *rst_src, *rst_dst,
+				     sizeof(*rsth), IPPROTO_TCP);
+		psum = proto_ipv4_header_psum(sizeof(*rsth), IPPROTO_TCP,
+					      *rst_src, *rst_dst);
+
+	} else {
+		struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
+		const struct in6_addr *rst_src = daddr;
+		const struct in6_addr *rst_dst = saddr;
+
+		rsth = tap_push_ip6h(ip6h, rst_src, rst_dst,
+				     sizeof(*rsth), IPPROTO_TCP, flow_lbl);
+		psum = proto_ipv6_header_psum(sizeof(*rsth), IPPROTO_TCP,
+					      rst_src, rst_dst);
+	}
+
+	memset(rsth, 0, sizeof(*rsth));
+
+	rsth->source = th->dest;
+	rsth->dest = th->source;
+	rsth->rst = 1;
+	rsth->doff = sizeof(*rsth) / 4UL;
+
+	/* Sequence matching logic from RFC 9293 section 3.10.7.1 */
+	if (th->ack) {
+		rsth->seq = th->ack_seq;
+	} else {
+		size_t dlen = l4len - th->doff * 4UL;
+		uint32_t ack = ntohl(th->seq) + dlen;
+
+		rsth->ack_seq = htonl(ack);
+		rsth->ack = 1;
+	}
+
+	tcp_update_csum(psum, rsth, &payload);
+	rst_l2len = ((char *)rsth - buf) + sizeof(*rsth);
+	tap_send_single(c, buf, rst_l2len);
+}
+
 /**
  * tcp_tap_handler() - Handle packets from tap and state transitions
  * @c:		Execution context
@@ -1877,6 +1943,7 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
  * @af:		Address family, AF_INET or AF_INET6
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @flow_lbl:	IPv6 flow label (ignored for IPv4)
  * @p:		Pool of TCP packets, with TCP headers
  * @idx:	Index of first packet in pool to process
  * @now:	Current timestamp
@@ -1884,7 +1951,7 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
  * Return: count of consumed packets
  */
 int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
-		    const void *saddr, const void *daddr,
+		    const void *saddr, const void *daddr, uint32_t flow_lbl,
 		    const struct pool *p, int idx, const struct timespec *now)
 {
 	struct tcp_tap_conn *conn;
@@ -1917,6 +1984,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		if (opts && th->syn && !th->ack)
 			tcp_conn_from_tap(c, af, saddr, daddr, th,
 					  opts, optlen, now);
+		else
+			tcp_rst_no_conn(c, af, saddr, daddr, flow_lbl, th, len);
 		return 1;
 	}
 
@@ -1953,7 +2022,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 			goto reset;
 
 		conn_event(c, conn, ESTABLISHED);
-		if (tcp_set_peek_offset(conn->sock, 0))
+		if (tcp_set_peek_offset(conn, 0))
 			goto reset;
 
 		if (th->fin) {
@@ -1979,6 +2048,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 
 	/* Established connections not accepting data from tap */
 	if (conn->events & TAP_FIN_RCVD) {
+		tcp_sock_consume(conn, ntohl(th->ack_seq));
 		tcp_update_seqack_from_tap(c, conn, ntohl(th->ack_seq));
 		tcp_tap_window_update(conn, ntohs(th->window));
 		tcp_data_from_sock(c, conn);
@@ -2130,12 +2200,11 @@ void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
 	 * mode only, below.
 	 */
 	ini = flow_initiate_sa(flow, ref.tcp_listen.pif, &sa,
-			       ref.tcp_listen.port);
+			       NULL, ref.tcp_listen.port);
 
 	if (c->mode == MODE_VU) { /* Rebind to same address after migration */
-		if (!getsockname(s, &sa.sa, &sl))
-			inany_from_sockaddr(&ini->oaddr, &ini->oport, &sa);
-		else
+		if (getsockname(s, &sa.sa, &sl) ||
+		    inany_from_sockaddr(&ini->oaddr, &ini->oport, &sa) < 0)
 			err_perror("Can't get local address for socket %i", s);
 	}
 
@@ -2193,7 +2262,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 	 * and we just set the timer to a new point in the future: discard it.
 	 */
 	if (timerfd_gettime(conn->timer, &check_armed))
-		flow_err(conn, "failed to read timer: %s", strerror_(errno));
+		flow_perror(conn, "failed to read timer");
 
 	if (check_armed.it_value.tv_sec || check_armed.it_value.tv_nsec)
 		return;
@@ -2217,7 +2286,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 			conn->seq_to_tap = conn->seq_ack_from_tap;
 			if (!conn->wnd_from_tap)
 				conn->wnd_from_tap = 1; /* Zero-window probe */
-			if (tcp_set_peek_offset(conn->sock, 0)) {
+			if (tcp_set_peek_offset(conn, 0)) {
 				tcp_rst(c, conn);
 			} else {
 				tcp_data_from_sock(c, conn);
@@ -2235,8 +2304,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 		 * ~ACK_TO_TAP_DUE or ~ACK_FROM_TAP_DUE.
 		 */
 		if (timerfd_settime(conn->timer, 0, &new, &old))
-			flow_err(conn, "failed to set timer: %s",
-				 strerror_(errno));
+			flow_perror(conn, "failed to set timer");
 
 		if (old.it_value.tv_sec == ACT_TIMEOUT) {
 			flow_dbg(conn, "activity timeout");
@@ -2430,7 +2498,7 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port)
  * @c:		Execution context
  * @port:	Port, host order
  */
-void tcp_ns_sock_init(const struct ctx *c, in_port_t port)
+static void tcp_ns_sock_init(const struct ctx *c, in_port_t port)
 {
 	ASSERT(!c->no_tcp);
 
@@ -2711,6 +2779,9 @@ int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn)
 {
 	int rc = 0;
 
+	if (conn->sock < 0)
+		return 0;
+
 	if ((rc = repair_set(c, conn->sock, TCP_REPAIR_ON)))
 		err("Failed to set TCP_REPAIR");
 
@@ -2728,6 +2799,9 @@ int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn)
 {
 	int rc = 0;
 
+	if (conn->sock < 0)
+		return 0;
+
 	if ((rc = repair_set(c, conn->sock, TCP_REPAIR_OFF)))
 		err("Failed to clear TCP_REPAIR");
 
@@ -2736,20 +2810,21 @@ int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn)
 
 /**
  * tcp_flow_dump_tinfo() - Dump window scale, tcpi_state, tcpi_options
- * @c:		Execution context
+ * @conn:	Pointer to the TCP connection structure
  * @t:		Extended migration data
  *
  * Return: 0 on success, negative error code on failure
  */
-static int tcp_flow_dump_tinfo(int s, struct tcp_tap_transfer_ext *t)
+static int tcp_flow_dump_tinfo(const struct tcp_tap_conn *conn,
+			       struct tcp_tap_transfer_ext *t)
 {
 	struct tcp_info tinfo;
 	socklen_t sl;
 
 	sl = sizeof(tinfo);
-	if (getsockopt(s, SOL_TCP, TCP_INFO, &tinfo, &sl)) {
+	if (getsockopt(conn->sock, SOL_TCP, TCP_INFO, &tinfo, &sl)) {
 		int rc = -errno;
-		err_perror("Querying TCP_INFO, socket %i", s);
+		flow_perror(conn, "Querying TCP_INFO");
 		return rc;
 	}
 
@@ -2763,39 +2838,95 @@ static int tcp_flow_dump_tinfo(int s, struct tcp_tap_transfer_ext *t)
 
 /**
  * tcp_flow_dump_mss() - Dump MSS clamp (not current MSS) via TCP_MAXSEG
- * @c:		Execution context
+ * @conn:	Pointer to the TCP connection structure
  * @t:		Extended migration data
  *
  * Return: 0 on success, negative error code on failure
  */
-static int tcp_flow_dump_mss(int s, struct tcp_tap_transfer_ext *t)
+static int tcp_flow_dump_mss(const struct tcp_tap_conn *conn,
+			     struct tcp_tap_transfer_ext *t)
 {
 	socklen_t sl = sizeof(t->mss);
+	int val;
 
-	if (getsockopt(s, SOL_TCP, TCP_MAXSEG, &t->mss, &sl)) {
+	if (getsockopt(conn->sock, SOL_TCP, TCP_MAXSEG, &val, &sl)) {
 		int rc = -errno;
-		err_perror("Getting MSS, socket %i", s);
+		flow_perror(conn, "Getting MSS");
 		return rc;
 	}
 
+	t->mss = (uint32_t)val;
+
+	return 0;
+}
+
+
+/**
+ * tcp_flow_dump_timestamp() - Dump RFC 7323 timestamp via TCP_TIMESTAMP
+ * @conn:	Pointer to the TCP connection structure
+ * @t:		Extended migration data (tcpi_options must be populated)
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static int tcp_flow_dump_timestamp(const struct tcp_tap_conn *conn,
+				   struct tcp_tap_transfer_ext *t)
+{
+	int val = 0;
+
+	if (t->tcpi_options & TCPI_OPT_TIMESTAMPS) {
+		socklen_t sl = sizeof(val);
+
+		if (getsockopt(conn->sock, SOL_TCP, TCP_TIMESTAMP, &val, &sl)) {
+			int rc = -errno;
+			flow_perror(conn, "Getting RFC 7323 timestamp");
+			return rc;
+		}
+	}
+
+	t->timestamp = (uint32_t)val;
+	return 0;
+}
+
+/**
+ * tcp_flow_repair_timestamp() - Restore RFC 7323 timestamp via TCP_TIMESTAMP
+ * @conn:	Pointer to the TCP connection structure
+ * @t:		Extended migration data
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static int tcp_flow_repair_timestamp(const struct tcp_tap_conn *conn,
+				   const struct tcp_tap_transfer_ext *t)
+{
+	int val = (int)t->timestamp;
+
+	if (t->tcpi_options & TCPI_OPT_TIMESTAMPS) {
+		if (setsockopt(conn->sock, SOL_TCP, TCP_TIMESTAMP,
+			       &val, sizeof(val))) {
+			int rc = -errno;
+			flow_perror(conn, "Setting RFC 7323 timestamp");
+			return rc;
+		}
+	}
+
 	return 0;
 }
 
 /**
  * tcp_flow_dump_wnd() - Dump current tcp_repair_window parameters
- * @c:		Execution context
+ * @conn:	Pointer to the TCP connection structure
  * @t:		Extended migration data
  *
  * Return: 0 on success, negative error code on failure
  */
-static int tcp_flow_dump_wnd(int s, struct tcp_tap_transfer_ext *t)
+static int tcp_flow_dump_wnd(const struct tcp_tap_conn *conn,
+			     struct tcp_tap_transfer_ext *t)
 {
 	struct tcp_repair_window wnd;
 	socklen_t sl = sizeof(wnd);
 
-	if (getsockopt(s, IPPROTO_TCP, TCP_REPAIR_WINDOW, &wnd, &sl)) {
+	if (getsockopt(conn->sock, IPPROTO_TCP, TCP_REPAIR_WINDOW, &wnd, &sl)) {
 		int rc = -errno;
-		err_perror("Getting window repair data, socket %i", s);
+		flow_perror(conn, "Getting window repair data");
 		return rc;
 	}
 
@@ -2819,12 +2950,13 @@ static int tcp_flow_dump_wnd(int s, struct tcp_tap_transfer_ext *t)
 
 /**
  * tcp_flow_repair_wnd() - Restore window parameters from extended data
- * @c:		Execution context
+ * @conn:	Pointer to the TCP connection structure
  * @t:		Extended migration data
  *
  * Return: 0 on success, negative error code on failure
  */
-static int tcp_flow_repair_wnd(int s, const struct tcp_tap_transfer_ext *t)
+static int tcp_flow_repair_wnd(const struct tcp_tap_conn *conn,
+			       const struct tcp_tap_transfer_ext *t)
 {
 	struct tcp_repair_window wnd;
 
@@ -2834,9 +2966,10 @@ static int tcp_flow_repair_wnd(int s, const struct tcp_tap_transfer_ext *t)
 	wnd.rcv_wnd	= t->rcv_wnd;
 	wnd.rcv_wup	= t->rcv_wup;
 
-	if (setsockopt(s, IPPROTO_TCP, TCP_REPAIR_WINDOW, &wnd, sizeof(wnd))) {
+	if (setsockopt(conn->sock, IPPROTO_TCP, TCP_REPAIR_WINDOW,
+		       &wnd, sizeof(wnd))) {
 		int rc = -errno;
-		err_perror("Setting window data, socket %i", s);
+		flow_perror(conn, "Setting window data");
 		return rc;
 	}
 
@@ -2845,16 +2978,17 @@ static int tcp_flow_repair_wnd(int s, const struct tcp_tap_transfer_ext *t)
 
 /**
  * tcp_flow_select_queue() - Select queue (receive or send) for next operation
- * @s:		Socket
+ * @conn:	Connection to select queue for
  * @queue:	TCP_RECV_QUEUE or TCP_SEND_QUEUE
  *
  * Return: 0 on success, negative error code on failure
  */
-static int tcp_flow_select_queue(int s, int queue)
+static int tcp_flow_select_queue(const struct tcp_tap_conn *conn, int queue)
 {
-	if (setsockopt(s, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) {
+	if (setsockopt(conn->sock, SOL_TCP, TCP_REPAIR_QUEUE,
+		       &queue, sizeof(queue))) {
 		int rc = -errno;
-		err_perror("Selecting TCP_SEND_QUEUE, socket %i", s);
+		flow_perror(conn, "Selecting TCP_SEND_QUEUE");
 		return rc;
 	}
 
@@ -2863,26 +2997,28 @@ static int tcp_flow_select_queue(int s, int queue)
 
 /**
  * tcp_flow_dump_sndqueue() - Dump send queue, length of sent and not sent data
- * @s:		Socket
+ * @conn:	Connection to dump queue for
  * @t:		Extended migration data
  *
  * Return: 0 on success, negative error code on failure
  *
  * #syscalls:vu ioctl
  */
-static int tcp_flow_dump_sndqueue(int s, struct tcp_tap_transfer_ext *t)
+static int tcp_flow_dump_sndqueue(const struct tcp_tap_conn *conn,
+				  struct tcp_tap_transfer_ext *t)
 {
+	int s = conn->sock;
 	ssize_t rc;
 
 	if (ioctl(s, SIOCOUTQ, &t->sndq) < 0) {
 		rc = -errno;
-		err_perror("Getting send queue size, socket %i", s);
+		flow_perror(conn, "Getting send queue size");
 		return rc;
 	}
 
 	if (ioctl(s, SIOCOUTQNSD, &t->notsent) < 0) {
 		rc = -errno;
-		err_perror("Getting not sent count, socket %i", s);
+		flow_perror(conn, "Getting not sent count");
 		return rc;
 	}
 
@@ -2901,14 +3037,16 @@ static int tcp_flow_dump_sndqueue(int s, struct tcp_tap_transfer_ext *t)
 	}
 
 	if (t->notsent > t->sndq) {
-		err("Invalid notsent count socket %i, send: %u, not sent: %u",
-		    s, t->sndq, t->notsent);
+		flow_err(conn,
+			 "Invalid notsent count socket %i, send: %u, not sent: %u",
+			 s, t->sndq, t->notsent);
 		return -EINVAL;
 	}
 
 	if (t->sndq > TCP_MIGRATE_SND_QUEUE_MAX) {
-		err("Send queue too large to migrate socket %i: %u bytes",
-		    s, t->sndq);
+		flow_err(conn,
+			 "Send queue too large to migrate socket %i: %u bytes",
+			 s, t->sndq);
 		return -ENOBUFS;
 	}
 
@@ -2919,13 +3057,13 @@ static int tcp_flow_dump_sndqueue(int s, struct tcp_tap_transfer_ext *t)
 			rc = 0;
 		} else {
 			rc = -errno;
-			err_perror("Can't read send queue, socket %i", s);
+			flow_perror(conn, "Can't read send queue");
 			return rc;
 		}
 	}
 
 	if ((uint32_t)rc < t->sndq) {
-		err("Short read migrating send queue");
+		flow_err(conn, "Short read migrating send queue");
 		return -ENXIO;
 	}
 
@@ -2936,19 +3074,20 @@ static int tcp_flow_dump_sndqueue(int s, struct tcp_tap_transfer_ext *t)
 
 /**
  * tcp_flow_repair_queue() - Restore contents of a given (pre-selected) queue
- * @s:		Socket
+ * @conn:	Connection to repair queue for
  * @len:	Length of data to be restored
  * @buf:	Buffer with content of pending data queue
  *
  * Return: 0 on success, negative error code on failure
  */
-static int tcp_flow_repair_queue(int s, size_t len, uint8_t *buf)
+static int tcp_flow_repair_queue(const struct tcp_tap_conn *conn,
+				 size_t len, uint8_t *buf)
 {
 	size_t chunk = len;
 	uint8_t *p = buf;
 
 	while (len > 0) {
-		ssize_t rc = send(s, p, MIN(len, chunk), 0);
+		ssize_t rc = send(conn->sock, p, MIN(len, chunk), 0);
 
 		if (rc < 0) {
 			if ((errno == ENOBUFS || errno == ENOMEM) &&
@@ -2958,7 +3097,7 @@ static int tcp_flow_repair_queue(int s, size_t len, uint8_t *buf)
 			}
 
 			rc = -errno;
-			err_perror("Can't write queue, socket %i", s);
+			flow_perror(conn, "Can't write queue");
 			return rc;
 		}
 
@@ -2971,18 +3110,18 @@ static int tcp_flow_repair_queue(int s, size_t len, uint8_t *buf)
 
 /**
  * tcp_flow_dump_seq() - Dump current sequence of pre-selected queue
- * @s:		Socket
+ * @conn:	Pointer to the TCP connection structure
  * @v:		Sequence value, set on return
  *
  * Return: 0 on success, negative error code on failure
  */
-static int tcp_flow_dump_seq(int s, uint32_t *v)
+static int tcp_flow_dump_seq(const struct tcp_tap_conn *conn, uint32_t *v)
 {
 	socklen_t sl = sizeof(*v);
 
-	if (getsockopt(s, SOL_TCP, TCP_QUEUE_SEQ, v, &sl)) {
+	if (getsockopt(conn->sock, SOL_TCP, TCP_QUEUE_SEQ, v, &sl)) {
 		int rc = -errno;
-		err_perror("Dumping sequence, socket %i", s);
+		flow_perror(conn, "Dumping sequence");
 		return rc;
 	}
 
@@ -2991,16 +3130,17 @@ static int tcp_flow_dump_seq(int s, uint32_t *v)
 
 /**
  * tcp_flow_repair_seq() - Restore sequence for pre-selected queue
- * @s:		Socket
+ * @conn:	Connection to repair sequences for
  * @v:		Sequence value to be set
  *
  * Return: 0 on success, negative error code on failure
  */
-static int tcp_flow_repair_seq(int s, const uint32_t *v)
+static int tcp_flow_repair_seq(const struct tcp_tap_conn *conn,
+			       const uint32_t *v)
 {
-	if (setsockopt(s, SOL_TCP, TCP_QUEUE_SEQ, v, sizeof(*v))) {
+	if (setsockopt(conn->sock, SOL_TCP, TCP_QUEUE_SEQ, v, sizeof(*v))) {
 		int rc = -errno;
-		err_perror("Setting sequence, socket %i", s);
+		flow_perror(conn, "Setting sequence");
 		return rc;
 	}
 
@@ -3009,15 +3149,17 @@ static int tcp_flow_repair_seq(int s, const uint32_t *v)
 
 /**
  * tcp_flow_dump_rcvqueue() - Dump receive queue and its length, seal/block it
- * @s:		Socket
+ * @conn:	Pointer to the TCP connection structure
  * @t:		Extended migration data
  *
  * Return: 0 on success, negative error code on failure
  *
  * #syscalls:vu ioctl
  */
-static int tcp_flow_dump_rcvqueue(int s, struct tcp_tap_transfer_ext *t)
+static int tcp_flow_dump_rcvqueue(const struct tcp_tap_conn *conn,
+				  struct tcp_tap_transfer_ext *t)
 {
+	int s = conn->sock;
 	ssize_t rc;
 
 	if (ioctl(s, SIOCINQ, &t->rcvq) < 0) {
@@ -3037,8 +3179,9 @@ static int tcp_flow_dump_rcvqueue(int s, struct tcp_tap_transfer_ext *t)
 		t->rcvq--;
 
 	if (t->rcvq > TCP_MIGRATE_RCV_QUEUE_MAX) {
-		err("Receive queue too large to migrate socket %i: %u bytes",
-		    s, t->rcvq);
+		flow_err(conn,
+			 "Receive queue too large to migrate socket: %u bytes",
+			 t->rcvq);
 		return -ENOBUFS;
 	}
 
@@ -3048,13 +3191,13 @@ static int tcp_flow_dump_rcvqueue(int s, struct tcp_tap_transfer_ext *t)
 			rc = 0;
 		} else {
 			rc = -errno;
-			err_perror("Can't read receive queue for socket %i", s);
+			flow_perror(conn, "Can't read receive queue");
 			return rc;
 		}
 	}
 
 	if ((uint32_t)rc < t->rcvq) {
-		err("Short read migrating receive queue");
+		flow_err(conn, "Short read migrating receive queue");
 		return -ENXIO;
 	}
 
@@ -3063,12 +3206,13 @@ static int tcp_flow_dump_rcvqueue(int s, struct tcp_tap_transfer_ext *t)
 
 /**
  * tcp_flow_repair_opt() - Set repair "options" (MSS, scale, SACK, timestamps)
- * @s:		Socket
+ * @conn:	Pointer to the TCP connection structure
  * @t:		Extended migration data
  *
  * Return: 0 on success, negative error code on failure
  */
-int tcp_flow_repair_opt(int s, const struct tcp_tap_transfer_ext *t)
+static int tcp_flow_repair_opt(const struct tcp_tap_conn *conn,
+			       const struct tcp_tap_transfer_ext *t)
 {
 	const struct tcp_repair_opt opts[] = {
 		{ TCPOPT_WINDOW,		t->snd_ws + (t->rcv_ws << 16) },
@@ -3082,9 +3226,9 @@ int tcp_flow_repair_opt(int s, const struct tcp_tap_transfer_ext *t)
 				!!(t->tcpi_options & TCPI_OPT_SACK) +
 				!!(t->tcpi_options & TCPI_OPT_TIMESTAMPS));
 
-	if (setsockopt(s, SOL_TCP, TCP_REPAIR_OPTIONS, opts, sl)) {
+	if (setsockopt(conn->sock, SOL_TCP, TCP_REPAIR_OPTIONS, opts, sl)) {
 		int rc = -errno;
-		err_perror("Setting repair options, socket %i", s);
+		flow_perror(conn, "Setting repair options");
 		return rc;
 	}
 
@@ -3141,52 +3285,53 @@ int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn)
 /**
  * tcp_flow_migrate_source_ext() - Dump queues, close sockets, send final data
  * @fd:		Descriptor for state migration
- * @fidx:	Flow index
  * @conn:	Pointer to the TCP connection structure
  *
  * Return: 0 on success, negative (not -EIO) on failure, -EIO on sending failure
  */
-int tcp_flow_migrate_source_ext(int fd, int fidx,
-				const struct tcp_tap_conn *conn)
+int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn)
 {
 	uint32_t peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap;
-	struct tcp_tap_transfer_ext *t = &migrate_ext[fidx];
+	struct tcp_tap_transfer_ext *t = &migrate_ext[FLOW_IDX(conn)];
 	int s = conn->sock;
 	int rc;
 
 	/* Disable SO_PEEK_OFF, it will make accessing the queues in repair mode
 	 * weird.
 	 */
-	if (tcp_set_peek_offset(s, -1)) {
+	if (tcp_set_peek_offset(conn, -1)) {
 		rc = -errno;
 		goto fail;
 	}
 
-	if ((rc = tcp_flow_dump_tinfo(s, t)))
+	if ((rc = tcp_flow_dump_tinfo(conn, t)))
 		goto fail;
 
-	if ((rc = tcp_flow_dump_mss(s, t)))
+	if ((rc = tcp_flow_dump_mss(conn, t)))
 		goto fail;
 
-	if ((rc = tcp_flow_dump_wnd(s, t)))
+	if ((rc = tcp_flow_dump_timestamp(conn, t)))
 		goto fail;
 
-	if ((rc = tcp_flow_select_queue(s, TCP_SEND_QUEUE)))
+	if ((rc = tcp_flow_dump_wnd(conn, t)))
 		goto fail;
 
-	if ((rc = tcp_flow_dump_sndqueue(s, t)))
+	if ((rc = tcp_flow_select_queue(conn, TCP_SEND_QUEUE)))
 		goto fail;
 
-	if ((rc = tcp_flow_dump_seq(s, &t->seq_snd)))
+	if ((rc = tcp_flow_dump_sndqueue(conn, t)))
 		goto fail;
 
-	if ((rc = tcp_flow_select_queue(s, TCP_RECV_QUEUE)))
+	if ((rc = tcp_flow_dump_seq(conn, &t->seq_snd)))
 		goto fail;
 
-	if ((rc = tcp_flow_dump_rcvqueue(s, t)))
+	if ((rc = tcp_flow_select_queue(conn, TCP_RECV_QUEUE)))
 		goto fail;
 
-	if ((rc = tcp_flow_dump_seq(s, &t->seq_rcv)))
+	if ((rc = tcp_flow_dump_rcvqueue(conn, t)))
+		goto fail;
+
+	if ((rc = tcp_flow_dump_seq(conn, &t->seq_rcv)))
 		goto fail;
 
 	close(s);
@@ -3197,14 +3342,14 @@ int tcp_flow_migrate_source_ext(int fd, int fidx,
 	t->seq_rcv	-= t->rcvq;
 	t->seq_snd	-= t->sndq;
 
-	debug("Extended migration data, socket %i sequences send %u receive %u",
-	      s, t->seq_snd, t->seq_rcv);
-	debug("  pending queues: send %u not sent %u receive %u",
-	      t->sndq, t->notsent, t->rcvq);
-	debug("  window: snd_wl1 %u snd_wnd %u max %u rcv_wnd %u rcv_wup %u",
-	      t->snd_wl1, t->snd_wnd, t->max_window, t->rcv_wnd, t->rcv_wup);
-	debug("  SO_PEEK_OFF %s  offset=%"PRIu32,
-	      peek_offset_cap ? "enabled" : "disabled", peek_offset);
+	flow_dbg(conn, "Extended migration data, socket %i sequences send %u receive %u",
+		 s, t->seq_snd, t->seq_rcv);
+	flow_dbg(conn, "  pending queues: send %u not sent %u receive %u",
+		 t->sndq, t->notsent, t->rcvq);
+	flow_dbg(conn, "  window: snd_wl1 %u snd_wnd %u max %u rcv_wnd %u rcv_wup %u",
+		 t->snd_wl1, t->snd_wnd, t->max_window, t->rcv_wnd, t->rcv_wup);
+	flow_dbg(conn, "  SO_PEEK_OFF %s  offset=%"PRIu32,
+		 peek_offset_cap ? "enabled" : "disabled", peek_offset);
 
 	/* Endianness fix-ups */
 	t->seq_snd	= htonl(t->seq_snd);
@@ -3212,6 +3357,8 @@ int tcp_flow_migrate_source_ext(int fd, int fidx,
 	t->sndq		= htonl(t->sndq);
 	t->notsent	= htonl(t->notsent);
 	t->rcvq		= htonl(t->rcvq);
+	t->mss		= htonl(t->mss);
+	t->timestamp	= htonl(t->timestamp);
 
 	t->snd_wl1	= htonl(t->snd_wl1);
 	t->snd_wnd	= htonl(t->snd_wnd);
@@ -3220,17 +3367,17 @@ int tcp_flow_migrate_source_ext(int fd, int fidx,
 	t->rcv_wup	= htonl(t->rcv_wup);
 
 	if (write_all_buf(fd, t, sizeof(*t))) {
-		err_perror("Failed to write extended data, socket %i", s);
+		flow_perror(conn, "Failed to write extended data");
 		return -EIO;
 	}
 
 	if (write_all_buf(fd, tcp_migrate_snd_queue, ntohl(t->sndq))) {
-		err_perror("Failed to write send queue data, socket %i", s);
+		flow_perror(conn, "Failed to write send queue data");
 		return -EIO;
 	}
 
 	if (write_all_buf(fd, tcp_migrate_rcv_queue, ntohl(t->rcvq))) {
-		err_perror("Failed to write receive queue data, socket %i", s);
+		flow_perror(conn, "Failed to write receive queue data");
 		return -EIO;
 	}
 
@@ -3245,7 +3392,7 @@ fail:
 	t->tcpi_state = 0; /* Not defined: tell the target to skip this flow */
 
 	if (write_all_buf(fd, t, sizeof(*t))) {
-		err_perror("Failed to write extended data, socket %i", s);
+		flow_perror(conn, "Failed to write extended data");
 		return -EIO;
 	}
 
@@ -3262,34 +3409,25 @@ fail:
  *
  * Return: 0 on success, negative error code on failure
  */
-int tcp_flow_repair_socket(struct ctx *c, struct tcp_tap_conn *conn)
+static int tcp_flow_repair_socket(struct ctx *c, struct tcp_tap_conn *conn)
 {
 	sa_family_t af = CONN_V4(conn) ? AF_INET : AF_INET6;
-	const struct flowside *sockside = HOSTFLOW(conn);
-	union sockaddr_inany a;
-	socklen_t sl;
 	int s, rc;
 
-	pif_sockaddr(c, &a, &sl, PIF_HOST, &sockside->oaddr, sockside->oport);
-
 	if ((conn->sock = socket(af, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC,
 				 IPPROTO_TCP)) < 0) {
 		rc = -errno;
-		err_perror("Failed to create socket for migrated flow");
+		flow_perror(conn, "Failed to create socket for migrated flow");
 		return rc;
 	}
 	s = conn->sock;
 
 	if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &(int){ 1 }, sizeof(int)))
-		debug_perror("Setting SO_REUSEADDR on socket %i", s);
+		flow_dbg_perror(conn, "Failed to set SO_REUSEADDR on socket %i",
+				s);
 
 	tcp_sock_set_nodelay(s);
 
-	if ((rc = bind(s, &a.sa, sizeof(a)))) {
-		err_perror("Failed to bind socket for migrated flow");
-		goto err;
-	}
-
 	if ((rc = tcp_flow_repair_on(c, conn)))
 		goto err;
 
@@ -3301,6 +3439,30 @@ err:
 	return rc;
 }
 
+/**
+ * tcp_flow_repair_bind() - Bind socket in repair mode
+ * @c:		Execution context
+ * @conn:	Pointer to the TCP connection structure
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static int tcp_flow_repair_bind(const struct ctx *c, struct tcp_tap_conn *conn)
+{
+	const struct flowside *sockside = HOSTFLOW(conn);
+	union sockaddr_inany a;
+	socklen_t sl;
+
+	pif_sockaddr(c, &a, &sl, PIF_HOST, &sockside->oaddr, sockside->oport);
+
+	if (bind(conn->sock, &a.sa, sizeof(a))) {
+		int rc = -errno;
+		flow_perror(conn, "Failed to bind socket for migrated flow");
+		return rc;
+	}
+
+	return 0;
+}
+
 /**
  * tcp_flow_repair_connect() - Connect socket in repair mode, then turn it off
  * @c:		Execution context
@@ -3317,7 +3479,7 @@ static int tcp_flow_repair_connect(const struct ctx *c,
 	rc = flowside_connect(c, conn->sock, PIF_HOST, tgt);
 	if (rc) {
 		rc = -errno;
-		err_perror("Failed to connect migrated socket %i", conn->sock);
+		flow_perror(conn, "Failed to connect migrated socket");
 		return rc;
 	}
 
@@ -3348,8 +3510,8 @@ int tcp_flow_migrate_target(struct ctx *c, int fd)
 	}
 
 	if (read_all_buf(fd, &t, sizeof(t))) {
+		flow_perror(flow, "Failed to receive migration data");
 		flow_alloc_cancel(flow);
-		err_perror("Failed to receive migration data");
 		return -errno;
 	}
 
@@ -3380,8 +3542,9 @@ int tcp_flow_migrate_target(struct ctx *c, int fd)
 	conn->seq_init_from_tap		= ntohl(t.seq_init_from_tap);
 
 	if ((rc = tcp_flow_repair_socket(c, conn))) {
-		flow_err(flow, "Can't set up socket: %s, drop", strerror_(rc));
-		flow_alloc_cancel(flow);
+		flow_err(flow, "Can't set up socket: %s, drop", strerror_(-rc));
+		/* Can't leave the flow in an incomplete state */
+		FLOW_ACTIVATE(conn);
 		return 0;
 	}
 
@@ -3394,26 +3557,25 @@ int tcp_flow_migrate_target(struct ctx *c, int fd)
 /**
  * tcp_flow_migrate_target_ext() - Receive extended data for flow, set, connect
  * @c:		Execution context
- * @flow:	Existing flow for this connection data
+ * @conn:	Connection entry to complete with extra data
  * @fd:		Descriptor for state migration
  *
  * Return: 0 on success, negative on fatal failure, but 0 on single flow failure
  */
-int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
+int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd)
 {
-	struct tcp_tap_conn *conn = &flow->tcp;
 	uint32_t peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap;
 	struct tcp_tap_transfer_ext t;
 	int s = conn->sock, rc;
 
 	if (read_all_buf(fd, &t, sizeof(t))) {
 		rc = -errno;
-		err_perror("Failed to read extended data for socket %i", s);
+		flow_perror(conn, "Failed to read extended data");
 		return rc;
 	}
 
 	if (!t.tcpi_state) { /* Source wants us to skip this flow */
-		flow_err(flow, "Dropping as requested by source");
+		flow_err(conn, "Dropping as requested by source");
 		goto fail;
 	}
 
@@ -3423,6 +3585,8 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
 	t.sndq		= ntohl(t.sndq);
 	t.notsent	= ntohl(t.notsent);
 	t.rcvq		= ntohl(t.rcvq);
+	t.mss		= ntohl(t.mss);
+	t.timestamp	= ntohl(t.timestamp);
 
 	t.snd_wl1	= ntohl(t.snd_wl1);
 	t.snd_wnd	= ntohl(t.snd_wnd);
@@ -3430,60 +3594,73 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
 	t.rcv_wnd	= ntohl(t.rcv_wnd);
 	t.rcv_wup	= ntohl(t.rcv_wup);
 
-	debug("Extended migration data, socket %i sequences send %u receive %u",
-	      s, t.seq_snd, t.seq_rcv);
-	debug("  pending queues: send %u not sent %u receive %u",
-	      t.sndq, t.notsent, t.rcvq);
-	debug("  window: snd_wl1 %u snd_wnd %u max %u rcv_wnd %u rcv_wup %u",
-	      t.snd_wl1, t.snd_wnd, t.max_window, t.rcv_wnd, t.rcv_wup);
-	debug("  SO_PEEK_OFF %s  offset=%"PRIu32,
-	      peek_offset_cap ? "enabled" : "disabled", peek_offset);
+	flow_dbg(conn,
+		 "Extended migration data, socket %i sequences send %u receive %u",
+		 s, t.seq_snd, t.seq_rcv);
+	flow_dbg(conn, "  pending queues: send %u not sent %u receive %u",
+		 t.sndq, t.notsent, t.rcvq);
+	flow_dbg(conn,
+		 "  window: snd_wl1 %u snd_wnd %u max %u rcv_wnd %u rcv_wup %u",
+		 t.snd_wl1, t.snd_wnd, t.max_window, t.rcv_wnd, t.rcv_wup);
+	flow_dbg(conn, "  SO_PEEK_OFF %s  offset=%"PRIu32,
+		 peek_offset_cap ? "enabled" : "disabled", peek_offset);
 
 	if (t.sndq > TCP_MIGRATE_SND_QUEUE_MAX || t.notsent > t.sndq ||
 	    t.rcvq > TCP_MIGRATE_RCV_QUEUE_MAX) {
-		err("Bad queues socket %i, send: %u, not sent: %u, receive: %u",
-		    s, t.sndq, t.notsent, t.rcvq);
+		flow_err(conn,
+			 "Bad queues socket %i, send: %u, not sent: %u, receive: %u",
+			 s, t.sndq, t.notsent, t.rcvq);
 		return -EINVAL;
 	}
 
 	if (read_all_buf(fd, tcp_migrate_snd_queue, t.sndq)) {
 		rc = -errno;
-		err_perror("Failed to read send queue data, socket %i", s);
+		flow_perror(conn, "Failed to read send queue data");
 		return rc;
 	}
 
 	if (read_all_buf(fd, tcp_migrate_rcv_queue, t.rcvq)) {
 		rc = -errno;
-		err_perror("Failed to read receive queue data, socket %i", s);
+		flow_perror(conn, "Failed to read receive queue data");
 		return rc;
 	}
 
-	if (tcp_flow_select_queue(s, TCP_SEND_QUEUE))
+	if (conn->sock < 0)
+		/* We weren't able to create the socket, discard flow */
 		goto fail;
 
-	if (tcp_flow_repair_seq(s, &t.seq_snd))
+	if (tcp_flow_repair_bind(c, conn))
 		goto fail;
 
-	if (tcp_flow_select_queue(s, TCP_RECV_QUEUE))
+	if (tcp_flow_repair_timestamp(conn, &t))
 		goto fail;
 
-	if (tcp_flow_repair_seq(s, &t.seq_rcv))
+	if (tcp_flow_select_queue(conn, TCP_SEND_QUEUE))
+		goto fail;
+
+	if (tcp_flow_repair_seq(conn, &t.seq_snd))
+		goto fail;
+
+	if (tcp_flow_select_queue(conn, TCP_RECV_QUEUE))
+		goto fail;
+
+	if (tcp_flow_repair_seq(conn, &t.seq_rcv))
 		goto fail;
 
 	if (tcp_flow_repair_connect(c, conn))
 		goto fail;
 
-	if (tcp_flow_repair_queue(s, t.rcvq, tcp_migrate_rcv_queue))
+	if (tcp_flow_repair_queue(conn, t.rcvq, tcp_migrate_rcv_queue))
 		goto fail;
 
-	if (tcp_flow_select_queue(s, TCP_SEND_QUEUE))
+	if (tcp_flow_select_queue(conn, TCP_SEND_QUEUE))
 		goto fail;
 
-	if (tcp_flow_repair_queue(s, t.sndq - t.notsent,
+	if (tcp_flow_repair_queue(conn, t.sndq - t.notsent,
 				  tcp_migrate_snd_queue))
 		goto fail;
 
-	if (tcp_flow_repair_opt(s, &t))
+	if (tcp_flow_repair_opt(conn, &t))
 		goto fail;
 
 	/* If we sent a FIN sent and it was acknowledged (TCP_FIN_WAIT2), don't
@@ -3498,19 +3675,19 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
 
 		v = TCP_SEND_QUEUE;
 		if (setsockopt(s, SOL_TCP, TCP_REPAIR_QUEUE, &v, sizeof(v)))
-			debug_perror("Selecting repair queue, socket %i", s);
+			flow_perror(conn, "Selecting repair queue");
 		else
 			shutdown(s, SHUT_WR);
 	}
 
-	if (tcp_flow_repair_wnd(s, &t))
+	if (tcp_flow_repair_wnd(conn, &t))
 		goto fail;
 
 	tcp_flow_repair_off(c, conn);
 	repair_flush(c);
 
 	if (t.notsent) {
-		if (tcp_flow_repair_queue(s, t.notsent,
+		if (tcp_flow_repair_queue(conn, t.notsent,
 					  tcp_migrate_snd_queue +
 					  (t.sndq - t.notsent))) {
 			/* This sometimes seems to fail for unclear reasons.
@@ -3530,23 +3707,26 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
 	if (t.tcpi_state == TCP_FIN_WAIT1)
 		shutdown(s, SHUT_WR);
 
-	if (tcp_set_peek_offset(conn->sock, peek_offset))
+	if (tcp_set_peek_offset(conn, peek_offset))
 		goto fail;
 
 	tcp_send_flag(c, conn, ACK);
 	tcp_data_from_sock(c, conn);
 
 	if ((rc = tcp_epoll_ctl(c, conn))) {
-		debug("Failed to subscribe to epoll for migrated socket %i: %s",
-		      conn->sock, strerror_(-rc));
+		flow_dbg(conn,
+			 "Failed to subscribe to epoll for migrated socket: %s",
+			 strerror_(-rc));
 		goto fail;
 	}
 
 	return 0;
 
 fail:
-	tcp_flow_repair_off(c, conn);
-	repair_flush(c);
+	if (conn->sock >= 0) {
+		tcp_flow_repair_off(c, conn);
+		repair_flush(c);
+	}
 
 	conn->flags = 0; /* Not waiting for ACK, don't schedule timer */
 	tcp_rst(c, conn);
diff --git a/tcp.h b/tcp.h
index cf30744..234a803 100644
--- a/tcp.h
+++ b/tcp.h
@@ -16,7 +16,7 @@ void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
 void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
 		      uint32_t events);
 int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
-		    const void *saddr, const void *daddr,
+		    const void *saddr, const void *daddr, uint32_t flow_lbl,
 		    const struct pool *p, int idx, const struct timespec *now);
 int tcp_sock_init(const struct ctx *c, const union inany_addr *addr,
 		  const char *ifname, in_port_t port);
@@ -25,7 +25,6 @@ void tcp_timer(struct ctx *c, const struct timespec *now);
 void tcp_defer_handler(struct ctx *c);
 
 void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s);
-int tcp_set_peek_offset(int s, int offset);
 
 extern bool peek_offset_cap;
 
diff --git a/tcp_buf.c b/tcp_buf.c
index 72d99c5..0530563 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -125,7 +125,7 @@ static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns,
 
 		conn->seq_to_tap = seq;
 		peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap;
-		if (tcp_set_peek_offset(conn->sock, peek_offset))
+		if (tcp_set_peek_offset(conn, peek_offset))
 			tcp_rst(c, conn);
 	}
 }
@@ -304,7 +304,7 @@ int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 			   conn->seq_ack_from_tap, conn->seq_to_tap);
 		conn->seq_to_tap = conn->seq_ack_from_tap;
 		already_sent = 0;
-		if (tcp_set_peek_offset(s, 0)) {
+		if (tcp_set_peek_offset(conn, 0)) {
 			tcp_rst(c, conn);
 			return -1;
 		}
diff --git a/tcp_conn.h b/tcp_conn.h
index 42dff48..35d813d 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -152,6 +152,7 @@ struct tcp_tap_transfer {
  * @notsent:		Part of pending send queue that wasn't sent out yet
  * @rcvq:		Length of pending receive queue
  * @mss:		Socket-side MSS clamp
+ * @timestamp:		RFC 7323 timestamp
  * @snd_wl1:		Next sequence used in window probe (next sequence - 1)
  * @snd_wnd:		Socket-side sending window
  * @max_window:		Window clamp
@@ -171,6 +172,7 @@ struct tcp_tap_transfer_ext {
 	uint32_t	rcvq;
 
 	uint32_t	mss;
+	uint32_t	timestamp;
 
 	/* We can't just use struct tcp_repair_window: we need network order */
 	uint32_t	snd_wl1;
@@ -233,13 +235,11 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn);
 int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn);
 int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn);
 
-int tcp_flow_migrate_shrink_window(int fidx, const struct tcp_tap_conn *conn);
 int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn);
-int tcp_flow_migrate_source_ext(int fd, int fidx,
-				const struct tcp_tap_conn *conn);
+int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn);
 
 int tcp_flow_migrate_target(struct ctx *c, int fd);
-int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd);
+int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd);
 
 bool tcp_flow_is_established(const struct tcp_tap_conn *conn);
 
diff --git a/tcp_internal.h b/tcp_internal.h
index 9cf31f5..36c6533 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -166,8 +166,6 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
 
 struct tcp_info_linux;
 
-void tcp_update_csum(uint32_t psum, struct tcphdr *th,
-		     struct iov_tail *payload);
 void tcp_fill_headers(const struct tcp_tap_conn *conn,
 		      struct tap_hdr *taph,
 		      struct iphdr *ip4h, struct ipv6hdr *ip6h,
@@ -179,5 +177,6 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
 int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
 		      int flags, struct tcphdr *th, struct tcp_syn_opts *opts,
 		      size_t *optlen);
+int tcp_set_peek_offset(const struct tcp_tap_conn *conn, int offset);
 
 #endif /* TCP_INTERNAL_H */
diff --git a/tcp_splice.c b/tcp_splice.c
index 5d845c9..60455d6 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -164,7 +164,7 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
 	if (epoll_ctl(c->epollfd, m, conn->s[0], &ev[0]) ||
 	    epoll_ctl(c->epollfd, m, conn->s[1], &ev[1])) {
 		int ret = -errno;
-		flow_err(conn, "ERROR on epoll_ctl(): %s", strerror_(errno));
+		flow_perror(conn, "ERROR on epoll_ctl()");
 		return ret;
 	}
 
@@ -317,8 +317,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
 
 		if (conn->pipe[sidei][0] < 0) {
 			if (pipe2(conn->pipe[sidei], O_NONBLOCK | O_CLOEXEC)) {
-				flow_err(conn, "cannot create %d->%d pipe: %s",
-					 sidei, !sidei, strerror_(errno));
+				flow_perror(conn, "cannot create %d->%d pipe",
+					    sidei, !sidei);
 				conn_flag(c, conn, CLOSING);
 				return -EIO;
 			}
@@ -482,8 +482,7 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
 
 		rc = getsockopt(ref.fd, SOL_SOCKET, SO_ERROR, &err, &sl);
 		if (rc)
-			flow_err(conn, "Error retrieving SO_ERROR: %s",
-				 strerror_(errno));
+			flow_perror(conn, "Error retrieving SO_ERROR");
 		else
 			flow_trace(conn, "Error event on socket: %s",
 				   strerror_(err));
@@ -521,20 +520,21 @@ swap:
 		int more = 0;
 
 retry:
-		readlen = splice(conn->s[fromsidei], NULL,
-				 conn->pipe[fromsidei][1], NULL,
-				 c->tcp.pipe_size,
-				 SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
-		flow_trace(conn, "%zi from read-side call", readlen);
-		if (readlen < 0) {
-			if (errno == EINTR)
-				goto retry;
+		do
+			readlen = splice(conn->s[fromsidei], NULL,
+					 conn->pipe[fromsidei][1], NULL,
+					 c->tcp.pipe_size,
+					 SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
+		while (readlen < 0 && errno == EINTR);
 
-			if (errno != EAGAIN)
-				goto close;
-		} else if (!readlen) {
+		if (readlen < 0 && errno != EAGAIN)
+			goto close;
+
+		flow_trace(conn, "%zi from read-side call", readlen);
+
+		if (!readlen) {
 			eof = 1;
-		} else {
+		} else if (readlen > 0) {
 			never_read = 0;
 
 			if (readlen >= (long)c->tcp.pipe_size * 90 / 100)
@@ -544,10 +544,16 @@ retry:
 				conn_flag(c, conn, lowat_act_flag);
 		}
 
-eintr:
-		written = splice(conn->pipe[fromsidei][0], NULL,
-				 conn->s[!fromsidei], NULL, c->tcp.pipe_size,
-				 SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
+		do
+			written = splice(conn->pipe[fromsidei][0], NULL,
+					 conn->s[!fromsidei], NULL,
+					 c->tcp.pipe_size,
+					 SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
+		while (written < 0 && errno == EINTR);
+
+		if (written < 0 && errno != EAGAIN)
+			goto close;
+
 		flow_trace(conn, "%zi from write-side call (passed %zi)",
 			   written, c->tcp.pipe_size);
 
@@ -579,12 +585,6 @@ eintr:
 		conn->written[fromsidei] += written > 0 ? written : 0;
 
 		if (written < 0) {
-			if (errno == EINTR)
-				goto eintr;
-
-			if (errno != EAGAIN)
-				goto close;
-
 			if (conn->read[fromsidei] == conn->written[fromsidei])
 				break;
 
diff --git a/tcp_vu.c b/tcp_vu.c
index 0622f17..57587cc 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -38,7 +38,6 @@
 static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + 1];
 static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
 static int head[VIRTQUEUE_MAX_SIZE + 1];
-static int head_cnt;
 
 /**
  * tcp_vu_hdrlen() - return the size of the header in level 2 frame (TCP)
@@ -183,7 +182,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 static ssize_t tcp_vu_sock_recv(const struct ctx *c,
 				const struct tcp_tap_conn *conn, bool v6,
 				uint32_t already_sent, size_t fillsize,
-				int *iov_cnt)
+				int *iov_cnt, int *head_cnt)
 {
 	struct vu_dev *vdev = c->vdev;
 	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
@@ -202,7 +201,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c,
 	vu_init_elem(elem, &iov_vu[1], VIRTQUEUE_MAX_SIZE);
 
 	elem_cnt = 0;
-	head_cnt = 0;
+	*head_cnt = 0;
 	while (fillsize > 0 && elem_cnt < VIRTQUEUE_MAX_SIZE) {
 		struct iovec *iov;
 		size_t frame_size, dlen;
@@ -221,7 +220,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c,
 		ASSERT(iov->iov_len >= hdrlen);
 		iov->iov_base = (char *)iov->iov_base + hdrlen;
 		iov->iov_len -= hdrlen;
-		head[head_cnt++] = elem_cnt;
+		head[(*head_cnt)++] = elem_cnt;
 
 		fillsize -= dlen;
 		elem_cnt += cnt;
@@ -261,17 +260,18 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c,
 		len -= iov->iov_len;
 	}
 	/* adjust head count */
-	while (head_cnt > 0 && head[head_cnt - 1] >= i)
-		head_cnt--;
+	while (*head_cnt > 0 && head[*head_cnt - 1] >= i)
+		(*head_cnt)--;
+
 	/* mark end of array */
-	head[head_cnt] = i;
+	head[*head_cnt] = i;
 	*iov_cnt = i;
 
 	/* release unused buffers */
 	vu_queue_rewind(vq, elem_cnt - i);
 
 	/* restore space for headers in iov */
-	for (i = 0; i < head_cnt; i++) {
+	for (i = 0; i < *head_cnt; i++) {
 		struct iovec *iov = &elem[head[i]].in_sg[0];
 
 		iov->iov_base = (char *)iov->iov_base - hdrlen;
@@ -357,11 +357,11 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 	struct vu_dev *vdev = c->vdev;
 	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
 	ssize_t len, previous_dlen;
+	int i, iov_cnt, head_cnt;
 	size_t hdrlen, fillsize;
 	int v6 = CONN_V6(conn);
 	uint32_t already_sent;
 	const uint16_t *check;
-	int i, iov_cnt;
 
 	if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
 		debug("Got packet, but RX virtqueue not usable yet");
@@ -376,7 +376,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 			   conn->seq_ack_from_tap, conn->seq_to_tap);
 		conn->seq_to_tap = conn->seq_ack_from_tap;
 		already_sent = 0;
-		if (tcp_set_peek_offset(conn->sock, 0)) {
+		if (tcp_set_peek_offset(conn, 0)) {
 			tcp_rst(c, conn);
 			return -1;
 		}
@@ -396,7 +396,8 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 	/* collect the buffers from vhost-user and fill them with the
 	 * data from the socket
 	 */
-	len = tcp_vu_sock_recv(c, conn, v6, already_sent, fillsize, &iov_cnt);
+	len = tcp_vu_sock_recv(c, conn, v6, already_sent, fillsize,
+			       &iov_cnt, &head_cnt);
 	if (len < 0) {
 		if (len != -EAGAIN && len != -EWOULDBLOCK) {
 			tcp_rst(c, conn);
diff --git a/test/lib/term b/test/lib/term
index ed690de..089364c 100755
--- a/test/lib/term
+++ b/test/lib/term
@@ -19,6 +19,7 @@ STATUS_FILE_INDEX=0
 STATUS_COLS=
 STATUS_PASS=0
 STATUS_FAIL=0
+STATUS_SKIPPED=0
 
 PR_RED='\033[1;31m'
 PR_GREEN='\033[1;32m'
@@ -439,19 +440,21 @@ info_layout() {
 # status_test_ok() - Update counter of passed tests, log and display message
 status_test_ok() {
 	STATUS_PASS=$((STATUS_PASS + 1))
-	tmux set status-right "PASS: ${STATUS_PASS} | FAIL: ${STATUS_FAIL} | #(TZ="UTC" date -Iseconds)"
+	tmux set status-right "PASS: ${STATUS_PASS} | FAIL: ${STATUS_FAIL} | SKIPPED: ${STATUS_SKIPPED} | #(TZ="UTC" date -Iseconds)"
 	info_passed
 }
 
 # status_test_fail() - Update counter of failed tests, log and display message
 status_test_fail() {
 	STATUS_FAIL=$((STATUS_FAIL + 1))
-	tmux set status-right "PASS: ${STATUS_PASS} | FAIL: ${STATUS_FAIL} | #(TZ="UTC" date -Iseconds)"
+	tmux set status-right "PASS: ${STATUS_PASS} | FAIL: ${STATUS_FAIL} | SKIPPED: ${STATUS_SKIPPED} | #(TZ="UTC" date -Iseconds)"
 	info_failed
 }
 
 # status_test_fail() - Update counter of failed tests, log and display message
 status_test_skip() {
+	STATUS_SKIPPED=$((STATUS_SKIPPED + 1))
+	tmux set status-right "PASS: ${STATUS_PASS} | FAIL: ${STATUS_FAIL} | SKIPPED: ${STATUS_SKIPPED} | #(TZ="UTC" date -Iseconds)"
 	info_skipped
 }
 
diff --git a/test/lib/test b/test/lib/test
index 758250a..7349674 100755
--- a/test/lib/test
+++ b/test/lib/test
@@ -20,10 +20,7 @@ test_iperf3s() {
 	__sctx="${1}"
 	__port="${2}"
 
-	pane_or_context_run_bg "${__sctx}" 				\
-		 'iperf3 -s -p'${__port}' & echo $! > s.pid'		\
-
-	sleep 1		# Wait for server to be ready
+	pane_or_context_run "${__sctx}" 'iperf3 -s -p'${__port}' -D -I s.pid'
 }
 
 # test_iperf3k() - Kill iperf3 server
@@ -31,7 +28,7 @@ test_iperf3s() {
 test_iperf3k() {
 	__sctx="${1}"
 
-	pane_or_context_run "${__sctx}" 'kill -INT $(cat s.pid); rm s.pid'
+	pane_or_context_run "${__sctx}" 'kill -INT $(cat s.pid)'
 
 	sleep 1		# Wait for kernel to free up ports
 }
diff --git a/test/run b/test/run
index 4e86f30..f73c311 100755
--- a/test/run
+++ b/test/run
@@ -202,7 +202,7 @@ skip_distro() {
 	perf_finish
 	[ ${CI} -eq 1 ] && video_stop
 
-	log "PASS: ${STATUS_PASS}, FAIL: ${STATUS_FAIL}"
+	log "PASS: ${STATUS_PASS}, FAIL: ${STATUS_FAIL}, SKIPPED: ${STATUS_SKIPPED}"
 
 	pause_continue \
 		"Press any key to keep test session open"	\
@@ -236,7 +236,7 @@ run_selected() {
 	done
 	teardown "${__setup}"
 
-	log "PASS: ${STATUS_PASS}, FAIL: ${STATUS_FAIL}"
+	log "PASS: ${STATUS_PASS}, FAIL: ${STATUS_FAIL}, SKIPPED: ${STATUS_SKIPPED}"
 
 	pause_continue \
 		"Press any key to keep test session open"	\
@@ -307,4 +307,4 @@ fi
 
 tail -n1 ${LOGFILE}
 echo "Log at ${LOGFILE}"
-exit $(tail -n1 ${LOGFILE} | sed -n 's/.*FAIL: \(.*\)$/\1/p')
+exit $(tail -n1 ${LOGFILE} | sed -n 's/.*FAIL: \(.*\),.*$/\1/p')
diff --git a/udp.c b/udp.c
index 923cc38..ca28b37 100644
--- a/udp.c
+++ b/udp.c
@@ -39,27 +39,30 @@
  * could receive packets from multiple flows, so we use a hash table match to
  * find the specific flow for a datagram.
  *
- * When a UDP flow is initiated from a listening socket we take a duplicate of
- * the socket and store it in uflow->s[INISIDE].  This will last for the
+ * Flow sockets
+ * ============
+ *
+ * When a UDP flow targets a socket, we create a "flow" socket in
+ * uflow->s[TGTSIDE] both to deliver datagrams to the target side and receive
+ * replies on the target side.  This socket is both bound and connected and has
+ * EPOLL_TYPE_UDP.  The connect() means it will only receive datagrams
+ * associated with this flow, so the epoll reference directly points to the flow
+ * and we don't need a hash lookup.
+ *
+ * When a flow is initiated from a listening socket, we create a "flow" socket
+ * with the same bound address as the listening socket, but also connect()ed to
+ * the flow's peer.  This is stored in uflow->s[INISIDE] and will last for the
  * lifetime of the flow, even if the original listening socket is closed due to
  * port auto-probing.  The duplicate is used to deliver replies back to the
  * originating side.
  *
- * Reply sockets
- * =============
- *
- * When a UDP flow targets a socket, we create a "reply" socket in
- * uflow->s[TGTSIDE] both to deliver datagrams to the target side and receive
- * replies on the target side.  This socket is both bound and connected and has
- * EPOLL_TYPE_UDP_REPLY.  The connect() means it will only receive datagrams
- * associated with this flow, so the epoll reference directly points to the flow
- * and we don't need a hash lookup.
- *
- * NOTE: it's possible that the reply socket could have a bound address
- * overlapping with an unrelated listening socket.  We assume datagrams for the
- * flow will come to the reply socket in preference to a listening socket.  The
- * sample program doc/platform-requirements/reuseaddr-priority.c documents and
- * tests that assumption.
+ * NOTE: A flow socket can have a bound address overlapping with a listening
+ * socket.  That will happen naturally for flows initiated from a socket, but is
+ * also possible (though unlikely) for tap initiated flows, depending on the
+ * source port.  We assume datagrams for the flow will come to a connect()ed
+ * socket in preference to a listening socket.  The sample program
+ * doc/platform-requirements/reuseaddr-priority.c documents and tests that
+ * assumption.
  *
  * "Spliced" flows
  * ===============
@@ -71,8 +74,7 @@
  * actually used; it doesn't make sense for datagrams and instead a pair of
  * recvmmsg() and sendmmsg() is used to forward the datagrams.
  *
- * Note that a spliced flow will have *both* a duplicated listening socket and a
- * reply socket (see above).
+ * Note that a spliced flow will have two flow sockets (see above).
  */
 
 #include <sched.h>
@@ -87,6 +89,8 @@
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp6.h>
 #include <stdint.h>
 #include <stddef.h>
 #include <string.h>
@@ -112,6 +116,14 @@
 #include "udp_internal.h"
 #include "udp_vu.h"
 
+#define UDP_MAX_FRAMES		32  /* max # of frames to receive at once */
+
+/* Maximum UDP data to be returned in ICMP messages */
+#define ICMP4_MAX_DLEN 8
+#define ICMP6_MAX_DLEN (IPV6_MIN_MTU			\
+			- sizeof(struct udphdr)	\
+			- sizeof(struct ipv6hdr))
+
 /* "Spliced" sockets indexed by bound port (host order) */
 static int udp_splice_ns  [IP_VERSIONS][NUM_PORTS];
 static int udp_splice_init[IP_VERSIONS][NUM_PORTS];
@@ -128,26 +140,31 @@ static struct ethhdr udp4_eth_hdr;
 static struct ethhdr udp6_eth_hdr;
 
 /**
- * struct udp_meta_t - Pre-cooked headers and metadata for UDP packets
+ * struct udp_meta_t - Pre-cooked headers for UDP packets
  * @ip6h:	Pre-filled IPv6 header (except for payload_len and addresses)
  * @ip4h:	Pre-filled IPv4 header (except for tot_len and saddr)
  * @taph:	Tap backend specific header
- * @s_in:	Source socket address, filled in by recvmmsg()
- * @tosidx:	sidx for the destination side of this datagram's flow
  */
 static struct udp_meta_t {
 	struct ipv6hdr ip6h;
 	struct iphdr ip4h;
 	struct tap_hdr taph;
-
-	union sockaddr_inany s_in;
-	flow_sidx_t tosidx;
 }
 #ifdef __AVX2__
 __attribute__ ((aligned(32)))
 #endif
 udp_meta[UDP_MAX_FRAMES];
 
+#define PKTINFO_SPACE					\
+	MAX(CMSG_SPACE(sizeof(struct in_pktinfo)),	\
+	    CMSG_SPACE(sizeof(struct in6_pktinfo)))
+
+#define RECVERR_SPACE							\
+	MAX(CMSG_SPACE(sizeof(struct sock_extended_err) +		\
+		       sizeof(struct sockaddr_in)),			\
+	    CMSG_SPACE(sizeof(struct sock_extended_err) +		\
+		       sizeof(struct sockaddr_in6)))
+
 /**
  * enum udp_iov_idx - Indices for the buffers making up a single UDP frame
  * @UDP_IOV_TAP         tap specific header
@@ -224,8 +241,6 @@ static void udp_iov_init_one(const struct ctx *c, size_t i)
 	tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph);
 	tiov[UDP_IOV_PAYLOAD].iov_base = payload;
 
-	mh->msg_name	= &meta->s_in;
-	mh->msg_namelen	= sizeof(meta->s_in);
 	mh->msg_iov	= siov;
 	mh->msg_iovlen	= 1;
 }
@@ -245,41 +260,6 @@ static void udp_iov_init(const struct ctx *c)
 		udp_iov_init_one(c, i);
 }
 
-/**
- * udp_splice_prepare() - Prepare one datagram for splicing
- * @mmh:	Receiving mmsghdr array
- * @idx:	Index of the datagram to prepare
- */
-static void udp_splice_prepare(struct mmsghdr *mmh, unsigned idx)
-{
-	udp_mh_splice[idx].msg_hdr.msg_iov->iov_len = mmh[idx].msg_len;
-}
-
-/**
- * udp_splice_send() - Send a batch of datagrams from socket to socket
- * @c:		Execution context
- * @start:	Index of batch's first datagram in udp[46]_l2_buf
- * @n:		Number of datagrams in batch
- * @src:	Source port for datagram (target side)
- * @dst:	Destination port for datagrams (target side)
- * @ref:	epoll reference for origin socket
- * @now:	Timestamp
- */
-static void udp_splice_send(const struct ctx *c, size_t start, size_t n,
-			    flow_sidx_t tosidx)
-{
-	const struct flowside *toside = flowside_at_sidx(tosidx);
-	const struct udp_flow *uflow = udp_at_sidx(tosidx);
-	uint8_t topif = pif_at_sidx(tosidx);
-	int s = uflow->s[tosidx.sidei];
-	socklen_t sl;
-
-	pif_sockaddr(c, &udp_splice_to, &sl, topif,
-		     &toside->eaddr, toside->eport);
-
-	sendmmsg(s, udp_mh_splice + start, n, MSG_NOSIGNAL);
-}
-
 /**
  * udp_update_hdr4() - Update headers for one IPv4 datagram
  * @ip4h:		Pre-filled IPv4 header (except for tot_len and saddr)
@@ -402,28 +382,172 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
 	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
 }
 
+/**
+ * udp_send_tap_icmp4() - Construct and send ICMPv4 to local peer
+ * @c:		Execution context
+ * @ee:	Extended error descriptor
+ * @toside:	Destination side of flow
+ * @saddr:	Address of ICMP generating node
+ * @in:	First bytes (max 8) of original UDP message body
+ * @dlen:	Length of the read part of original UDP message body
+ */
+static void udp_send_tap_icmp4(const struct ctx *c,
+			       const struct sock_extended_err *ee,
+			       const struct flowside *toside,
+			       struct in_addr saddr,
+			       const void *in, size_t dlen)
+{
+	struct in_addr oaddr = toside->oaddr.v4mapped.a4;
+	struct in_addr eaddr = toside->eaddr.v4mapped.a4;
+	in_port_t eport = toside->eport;
+	in_port_t oport = toside->oport;
+	struct {
+		struct icmphdr icmp4h;
+		struct iphdr ip4h;
+		struct udphdr uh;
+		char data[ICMP4_MAX_DLEN];
+	} __attribute__((packed, aligned(__alignof__(max_align_t)))) msg;
+	size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen;
+	size_t l4len = dlen + sizeof(struct udphdr);
+
+	ASSERT(dlen <= ICMP4_MAX_DLEN);
+	memset(&msg, 0, sizeof(msg));
+	msg.icmp4h.type = ee->ee_type;
+	msg.icmp4h.code = ee->ee_code;
+	if (ee->ee_type == ICMP_DEST_UNREACH && ee->ee_code == ICMP_FRAG_NEEDED)
+		msg.icmp4h.un.frag.mtu = htons((uint16_t) ee->ee_info);
+
+	/* Reconstruct the original headers as returned in the ICMP message */
+	tap_push_ip4h(&msg.ip4h, eaddr, oaddr, l4len, IPPROTO_UDP);
+	tap_push_uh4(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
+	memcpy(&msg.data, in, dlen);
+
+	tap_icmp4_send(c, saddr, eaddr, &msg, msglen);
+}
+
+
+/**
+ * udp_send_tap_icmp6() - Construct and send ICMPv6 to local peer
+ * @c:		Execution context
+ * @ee:	Extended error descriptor
+ * @toside:	Destination side of flow
+ * @saddr:	Address of ICMP generating node
+ * @in:	First bytes (max 1232) of original UDP message body
+ * @dlen:	Length of the read part of original UDP message body
+ * @flow:	IPv6 flow identifier
+ */
+static void udp_send_tap_icmp6(const struct ctx *c,
+			       const struct sock_extended_err *ee,
+			       const struct flowside *toside,
+			       const struct in6_addr *saddr,
+			       void *in, size_t dlen, uint32_t flow)
+{
+	const struct in6_addr *oaddr = &toside->oaddr.a6;
+	const struct in6_addr *eaddr = &toside->eaddr.a6;
+	in_port_t eport = toside->eport;
+	in_port_t oport = toside->oport;
+	struct {
+		struct icmp6_hdr icmp6h;
+		struct ipv6hdr ip6h;
+		struct udphdr uh;
+		char data[ICMP6_MAX_DLEN];
+	} __attribute__((packed, aligned(__alignof__(max_align_t)))) msg;
+	size_t msglen = sizeof(msg) - sizeof(msg.data) + dlen;
+	size_t l4len = dlen + sizeof(struct udphdr);
+
+	ASSERT(dlen <= ICMP6_MAX_DLEN);
+	memset(&msg, 0, sizeof(msg));
+	msg.icmp6h.icmp6_type = ee->ee_type;
+	msg.icmp6h.icmp6_code = ee->ee_code;
+	if (ee->ee_type == ICMP6_PACKET_TOO_BIG)
+		msg.icmp6h.icmp6_dataun.icmp6_un_data32[0] = htonl(ee->ee_info);
+
+	/* Reconstruct the original headers as returned in the ICMP message */
+	tap_push_ip6h(&msg.ip6h, eaddr, oaddr, l4len, IPPROTO_UDP, flow);
+	tap_push_uh6(&msg.uh, eaddr, eport, oaddr, oport, in, dlen);
+	memcpy(&msg.data, in, dlen);
+
+	tap_icmp6_send(c, saddr, eaddr, &msg, msglen);
+}
+
+/**
+ * udp_pktinfo() - Retrieve packet destination address from cmsg
+ * @msg:	msghdr into which message has been received
+ * @dst:	(Local) destination address of message in @mh (output)
+ *
+ * Return: 0 on success, -1 if the information was missing (@dst is set to
+ *         inany_any6).
+ */
+static int udp_pktinfo(struct msghdr *msg, union inany_addr *dst)
+{
+	struct cmsghdr *hdr;
+
+	for (hdr = CMSG_FIRSTHDR(msg); hdr; hdr = CMSG_NXTHDR(msg, hdr)) {
+		if (hdr->cmsg_level == IPPROTO_IP &&
+		    hdr->cmsg_type == IP_PKTINFO) {
+			const struct in_pktinfo *i4 = (void *)CMSG_DATA(hdr);
+
+			*dst = inany_from_v4(i4->ipi_addr);
+			return 0;
+		}
+
+		if (hdr->cmsg_level == IPPROTO_IPV6 &&
+			   hdr->cmsg_type == IPV6_PKTINFO) {
+			const struct in6_pktinfo *i6 = (void *)CMSG_DATA(hdr);
+
+			dst->a6 = i6->ipi6_addr;
+			return 0;
+		}
+	}
+
+	debug("Missing PKTINFO cmsg on datagram");
+	*dst = inany_any6;
+	return -1;
+}
+
 /**
  * udp_sock_recverr() - Receive and clear an error from a socket
- * @s:		Socket to receive from
+ * @c:		Execution context
+ * @s:		Socket to receive errors from
+ * @sidx:	Flow and side of @s, or FLOW_SIDX_NONE if unknown
+ * @pif:	Interface on which the error occurred
+ *              (only used if @sidx == FLOW_SIDX_NONE)
+ * @port:	Local port number of @s (only used if @sidx == FLOW_SIDX_NONE)
  *
  * Return: 1 if error received and processed, 0 if no more errors in queue, < 0
  *         if there was an error reading the queue
  *
  * #syscalls recvmsg
  */
-static int udp_sock_recverr(int s)
+static int udp_sock_recverr(const struct ctx *c, int s, flow_sidx_t sidx,
+			    uint8_t pif, in_port_t port)
 {
+	char buf[PKTINFO_SPACE + RECVERR_SPACE];
 	const struct sock_extended_err *ee;
-	const struct cmsghdr *hdr;
-	char buf[CMSG_SPACE(sizeof(*ee))];
+	char data[ICMP6_MAX_DLEN];
+	struct cmsghdr *hdr;
+	struct iovec iov = {
+		.iov_base = data,
+		.iov_len = sizeof(data)
+	};
+	union sockaddr_inany src;
 	struct msghdr mh = {
-		.msg_name = NULL,
-		.msg_namelen = 0,
-		.msg_iov = NULL,
-		.msg_iovlen = 0,
+		.msg_name = &src,
+		.msg_namelen = sizeof(src),
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
 		.msg_control = buf,
 		.msg_controllen = sizeof(buf),
 	};
+	const struct flowside *fromside, *toside;
+	union inany_addr offender, otap;
+	char astr[INANY_ADDRSTRLEN];
+	char sastr[SOCKADDR_STRLEN];
+	const struct in_addr *o4;
+	in_port_t offender_port;
+	struct udp_flow *uflow;
+	uint8_t topif;
+	size_t dlen;
 	ssize_t rc;
 
 	rc = recvmsg(s, &mh, MSG_ERRQUEUE);
@@ -440,33 +564,102 @@ static int udp_sock_recverr(int s)
 		return -1;
 	}
 
-	hdr = CMSG_FIRSTHDR(&mh);
-	if (!((hdr->cmsg_level == IPPROTO_IP &&
-	       hdr->cmsg_type == IP_RECVERR) ||
-	      (hdr->cmsg_level == IPPROTO_IPV6 &&
-	       hdr->cmsg_type == IPV6_RECVERR))) {
-		err("Unexpected cmsg reading error queue");
+	for (hdr = CMSG_FIRSTHDR(&mh); hdr; hdr = CMSG_NXTHDR(&mh, hdr)) {
+		if ((hdr->cmsg_level == IPPROTO_IP &&
+		      hdr->cmsg_type == IP_RECVERR) ||
+		     (hdr->cmsg_level == IPPROTO_IPV6 &&
+		      hdr->cmsg_type == IPV6_RECVERR))
+			break;
+	}
+
+	if (!hdr) {
+		err("Missing RECVERR cmsg in error queue");
 		return -1;
 	}
 
 	ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
 
-	/* TODO: When possible propagate and otherwise handle errors */
 	debug("%s error on UDP socket %i: %s",
 	      str_ee_origin(ee), s, strerror_(ee->ee_errno));
 
+	if (!flow_sidx_valid(sidx)) {
+		/* No hint from the socket, determine flow from addresses */
+		union inany_addr dst;
+
+		if (udp_pktinfo(&mh, &dst) < 0) {
+			debug("Missing PKTINFO on UDP error");
+			return 1;
+		}
+
+		sidx = flow_lookup_sa(c, IPPROTO_UDP, pif, &src, &dst, port);
+		if (!flow_sidx_valid(sidx)) {
+			debug("Ignoring UDP error without flow");
+			return 1;
+		}
+	} else {
+		pif = pif_at_sidx(sidx);
+	}
+
+	uflow = udp_at_sidx(sidx);
+	ASSERT(uflow);
+	fromside = &uflow->f.side[sidx.sidei];
+	toside = &uflow->f.side[!sidx.sidei];
+	topif = uflow->f.pif[!sidx.sidei];
+	dlen = rc;
+
+	if (inany_from_sockaddr(&offender, &offender_port,
+				SO_EE_OFFENDER(ee)) < 0)
+		goto fail;
+
+	if (pif != PIF_HOST || topif != PIF_TAP)
+		/* XXX Can we support any other cases? */
+		goto fail;
+
+	/* If the offender *is* the endpoint, make sure our translation is
+	 * consistent with the flow's translation.  This matters if the flow
+	 * endpoint has a port specific translation (like --dns-match).
+	 */
+	if (inany_equals(&offender, &fromside->eaddr))
+		otap = toside->oaddr;
+	else if (!nat_inbound(c, &offender, &otap))
+		goto fail;
+
+	if (hdr->cmsg_level == IPPROTO_IP &&
+	    (o4 = inany_v4(&otap)) && inany_v4(&toside->eaddr)) {
+		dlen = MIN(dlen, ICMP4_MAX_DLEN);
+		udp_send_tap_icmp4(c, ee, toside, *o4, data, dlen);
+		return 1;
+	}
+
+	if (hdr->cmsg_level == IPPROTO_IPV6 && !inany_v4(&toside->eaddr)) {
+		udp_send_tap_icmp6(c, ee, toside, &otap.a6, data, dlen,
+				   FLOW_IDX(uflow));
+		return 1;
+	}
+
+fail:
+	flow_dbg(uflow, "Can't propagate %s error from %s %s to %s %s",
+		 str_ee_origin(ee),
+		 pif_name(pif),
+		 sockaddr_ntop(SO_EE_OFFENDER(ee), sastr, sizeof(sastr)),
+		 pif_name(topif),
+		 inany_ntop(&toside->eaddr, astr, sizeof(astr)));
 	return 1;
 }
 
 /**
  * udp_sock_errs() - Process errors on a socket
  * @c:		Execution context
- * @s:		Socket to receive from
- * @events:	epoll events bitmap
+ * @s:		Socket to receive errors from
+ * @sidx:	Flow and side of @s, or FLOW_SIDX_NONE if unknown
+ * @pif:	Interface on which the error occurred
+ *              (only used if @sidx == FLOW_SIDX_NONE)
+ * @port:	Local port number of @s (only used if @sidx == FLOW_SIDX_NONE)
  *
  * Return: Number of errors handled, or < 0 if we have an unrecoverable error
  */
-int udp_sock_errs(const struct ctx *c, int s, uint32_t events)
+static int udp_sock_errs(const struct ctx *c, int s, flow_sidx_t sidx,
+			 uint8_t pif, in_port_t port)
 {
 	unsigned n_err = 0;
 	socklen_t errlen;
@@ -474,11 +667,8 @@ int udp_sock_errs(const struct ctx *c, int s, uint32_t events)
 
 	ASSERT(!c->no_udp);
 
-	if (!(events & EPOLLERR))
-		return 0; /* Nothing to do */
-
 	/* Empty the error queue */
-	while ((rc = udp_sock_recverr(s)) > 0)
+	while ((rc = udp_sock_recverr(c, s, sidx, pif, port)) > 0)
 		n_err += rc;
 
 	if (rc < 0)
@@ -505,37 +695,62 @@ int udp_sock_errs(const struct ctx *c, int s, uint32_t events)
 	return n_err;
 }
 
+/**
+ * udp_peek_addr() - Get source address for next packet
+ * @s:		Socket to get information from
+ * @src:	Socket address (output)
+ * @dst:	(Local) destination address (output)
+ *
+ * Return: 0 if no more packets, 1 on success, -ve error code on error
+ */
+static int udp_peek_addr(int s, union sockaddr_inany *src,
+			 union inany_addr *dst)
+{
+	char sastr[SOCKADDR_STRLEN], dstr[INANY_ADDRSTRLEN];
+	char cmsg[PKTINFO_SPACE];
+	struct msghdr msg = {
+		.msg_name = src,
+		.msg_namelen = sizeof(*src),
+		.msg_control = cmsg,
+		.msg_controllen = sizeof(cmsg),
+	};
+	int rc;
+
+	rc = recvmsg(s, &msg, MSG_PEEK | MSG_DONTWAIT);
+	if (rc < 0) {
+		if (errno == EAGAIN || errno == EWOULDBLOCK)
+			return 0;
+		return -errno;
+	}
+
+	udp_pktinfo(&msg, dst);
+
+	trace("Peeked UDP datagram: %s -> %s",
+	      sockaddr_ntop(src, sastr, sizeof(sastr)),
+	      inany_ntop(dst, dstr, sizeof(dstr)));
+
+	return 1;
+}
+
 /**
  * udp_sock_recv() - Receive datagrams from a socket
  * @c:		Execution context
  * @s:		Socket to receive from
- * @events:	epoll events bitmap
  * @mmh		mmsghdr array to receive into
+ * @n:		Maximum number of datagrams to receive
  *
  * Return: Number of datagrams received
  *
  * #syscalls recvmmsg arm:recvmmsg_time64 i686:recvmmsg_time64
  */
-static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
-			 struct mmsghdr *mmh)
+static int udp_sock_recv(const struct ctx *c, int s, struct mmsghdr *mmh, int n)
 {
-	/* For not entirely clear reasons (data locality?) pasta gets better
-	 * throughput if we receive tap datagrams one at a atime.  For small
-	 * splice datagrams throughput is slightly better if we do batch, but
-	 * it's slightly worse for large splice datagrams.  Since we don't know
-	 * before we receive whether we'll use tap or splice, always go one at a
-	 * time for pasta mode.
-	 */
-	int n = (c->mode == MODE_PASTA ? 1 : UDP_MAX_FRAMES);
-
 	ASSERT(!c->no_udp);
 
-	if (!(events & EPOLLIN))
-		return 0;
-
 	n = recvmmsg(s, mmh, n, 0, NULL);
 	if (n < 0) {
-		err_perror("Error receiving datagrams");
+		trace("Error receiving datagrams: %s", strerror_(errno));
+		/* Bail out and let the EPOLLERR handler deal with it */
 		return 0;
 	}
 
@@ -543,78 +758,121 @@ static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
 }
 
 /**
- * udp_buf_listen_sock_handler() - Handle new data from socket
+ * udp_sock_to_sock() - Forward datagrams from socket to socket
  * @c:		Execution context
- * @ref:	epoll reference
- * @events:	epoll events bitmap
- * @now:	Current timestamp
+ * @from_s:	Socket to receive datagrams from
+ * @n:		Maximum number of datagrams to forward
+ * @tosidx:	Flow & side to forward datagrams to
  *
- * #syscalls recvmmsg
+ * #syscalls sendmmsg
  */
-static void udp_buf_listen_sock_handler(const struct ctx *c,
-					union epoll_ref ref, uint32_t events,
-					const struct timespec *now)
+static void udp_sock_to_sock(const struct ctx *c, int from_s, int n,
+			     flow_sidx_t tosidx)
 {
-	const socklen_t sasize = sizeof(udp_meta[0].s_in);
-	int n, i;
+	const struct flowside *toside = flowside_at_sidx(tosidx);
+	const struct udp_flow *uflow = udp_at_sidx(tosidx);
+	uint8_t topif = pif_at_sidx(tosidx);
+	int to_s = uflow->s[tosidx.sidei];
+	socklen_t sl;
+	int i;
 
-	if (udp_sock_errs(c, ref.fd, events) < 0) {
-		err("UDP: Unrecoverable error on listening socket:"
-		    " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
-		/* FIXME: what now?  close/re-open socket? */
+	if ((n = udp_sock_recv(c, from_s, udp_mh_recv, n)) <= 0)
 		return;
+
+	for (i = 0; i < n; i++) {
+		udp_mh_splice[i].msg_hdr.msg_iov->iov_len
+			= udp_mh_recv[i].msg_len;
 	}
 
-	if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv)) <= 0)
+	pif_sockaddr(c, &udp_splice_to, &sl, topif,
+		     &toside->eaddr, toside->eport);
+
+	sendmmsg(to_s, udp_mh_splice, n, MSG_NOSIGNAL);
+}
+
+/**
+ * udp_buf_sock_to_tap() - Forward datagrams from socket to tap
+ * @c:		Execution context
+ * @s:		Socket to read data from
+ * @n:		Maximum number of datagrams to forward
+ * @tosidx:	Flow & side to forward data from @s to
+ */
+static void udp_buf_sock_to_tap(const struct ctx *c, int s, int n,
+				flow_sidx_t tosidx)
+{
+	const struct flowside *toside = flowside_at_sidx(tosidx);
+	int i;
+
+	if ((n = udp_sock_recv(c, s, udp_mh_recv, n)) <= 0)
 		return;
 
-	/* We divide datagrams into batches based on how we need to send them,
-	 * determined by udp_meta[i].tosidx.  To avoid either two passes through
-	 * the array, or recalculating tosidx for a single entry, we have to
-	 * populate it one entry *ahead* of the loop counter.
-	 */
-	udp_meta[0].tosidx = udp_flow_from_sock(c, ref, &udp_meta[0].s_in, now);
-	udp_mh_recv[0].msg_hdr.msg_namelen = sasize;
-	for (i = 0; i < n; ) {
-		flow_sidx_t batchsidx = udp_meta[i].tosidx;
-		uint8_t batchpif = pif_at_sidx(batchsidx);
-		int batchstart = i;
+	for (i = 0; i < n; i++)
+		udp_tap_prepare(udp_mh_recv, i, toside, false);
 
-		do {
-			if (pif_is_socket(batchpif)) {
-				udp_splice_prepare(udp_mh_recv, i);
-			} else if (batchpif == PIF_TAP) {
-				udp_tap_prepare(udp_mh_recv, i,
-						flowside_at_sidx(batchsidx),
-						false);
+	tap_send_frames(c, &udp_l2_iov[0][0], UDP_NUM_IOVS, n);
+}
+
+/**
+ * udp_sock_fwd() - Forward datagrams from a possibly unconnected socket
+ * @c:		Execution context
+ * @s:		Socket to forward from
+ * @frompif:	Interface to which @s belongs
+ * @port:	Our (local) port number of @s
+ * @now:	Current timestamp
+ */
+void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
+		  in_port_t port, const struct timespec *now)
+{
+	union sockaddr_inany src;
+	union inany_addr dst;
+	int rc;
+
+	while ((rc = udp_peek_addr(s, &src, &dst)) != 0) {
+		bool discard = false;
+		flow_sidx_t tosidx;
+		uint8_t topif;
+
+		if (rc < 0) {
+			trace("Error peeking at socket address: %s",
+			      strerror_(-rc));
+			/* Clear errors & carry on */
+			if (udp_sock_errs(c, s, FLOW_SIDX_NONE,
+					  frompif, port) < 0) {
+				err(
+"UDP: Unrecoverable error on listening socket: (%s port %hu)",
+				    pif_name(frompif), port);
+				/* FIXME: what now?  close/re-open socket? */
 			}
+			continue;
+		}
 
-			if (++i >= n)
-				break;
+		tosidx = udp_flow_from_sock(c, frompif, &dst, port, &src, now);
+		topif = pif_at_sidx(tosidx);
 
-			udp_meta[i].tosidx = udp_flow_from_sock(c, ref,
-								&udp_meta[i].s_in,
-								now);
-			udp_mh_recv[i].msg_hdr.msg_namelen = sasize;
-		} while (flow_sidx_eq(udp_meta[i].tosidx, batchsidx));
-
-		if (pif_is_socket(batchpif)) {
-			udp_splice_send(c, batchstart, i - batchstart,
-					batchsidx);
-		} else if (batchpif == PIF_TAP) {
-			tap_send_frames(c, &udp_l2_iov[batchstart][0],
-					UDP_NUM_IOVS, i - batchstart);
-		} else if (flow_sidx_valid(batchsidx)) {
-			flow_sidx_t fromsidx = flow_sidx_opposite(batchsidx);
-			struct udp_flow *uflow = udp_at_sidx(batchsidx);
+		if (pif_is_socket(topif)) {
+			udp_sock_to_sock(c, s, 1, tosidx);
+		} else if (topif == PIF_TAP) {
+			if (c->mode == MODE_VU)
+				udp_vu_sock_to_tap(c, s, 1, tosidx);
+			else
+				udp_buf_sock_to_tap(c, s, 1, tosidx);
+		} else if (flow_sidx_valid(tosidx)) {
+			struct udp_flow *uflow = udp_at_sidx(tosidx);
 
 			flow_err(uflow,
 				 "No support for forwarding UDP from %s to %s",
-				 pif_name(pif_at_sidx(fromsidx)),
-				 pif_name(batchpif));
+				 pif_name(frompif), pif_name(topif));
+			discard = true;
 		} else {
-			debug("Discarding %d datagrams without flow",
-			      i - batchstart);
+			debug("Discarding datagram without flow");
+			discard = true;
+		}
+
+		if (discard) {
+			struct msghdr msg = { 0 };
+
+			if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
+				debug_perror("Failed to discard datagram");
 		}
 	}
 }
@@ -630,87 +888,69 @@ void udp_listen_sock_handler(const struct ctx *c,
 			     union epoll_ref ref, uint32_t events,
 			     const struct timespec *now)
 {
-	if (c->mode == MODE_VU) {
-		udp_vu_listen_sock_handler(c, ref, events, now);
-		return;
-	}
-
-	udp_buf_listen_sock_handler(c, ref, events, now);
+	if (events & (EPOLLERR | EPOLLIN))
+		udp_sock_fwd(c, ref.fd, ref.udp.pif, ref.udp.port, now);
 }
 
 /**
- * udp_buf_reply_sock_handler() - Handle new data from flow specific socket
+ * udp_sock_handler() - Handle new data from flow specific socket
  * @c:		Execution context
  * @ref:	epoll reference
  * @events:	epoll events bitmap
  * @now:	Current timestamp
- *
- * #syscalls recvmmsg
  */
-static void udp_buf_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
-				       uint32_t events,
-				       const struct timespec *now)
+void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
+		      uint32_t events, const struct timespec *now)
 {
-	flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside);
-	const struct flowside *toside = flowside_at_sidx(tosidx);
 	struct udp_flow *uflow = udp_at_sidx(ref.flowside);
-	uint8_t topif = pif_at_sidx(tosidx);
-	int n, i, from_s;
 
 	ASSERT(!c->no_udp && uflow);
 
-	from_s = uflow->s[ref.flowside.sidei];
-
-	if (udp_sock_errs(c, from_s, events) < 0) {
-		flow_err(uflow, "Unrecoverable error on reply socket");
-		flow_err_details(uflow);
-		udp_flow_close(c, uflow);
-		return;
+	if (events & EPOLLERR) {
+		if (udp_sock_errs(c, ref.fd, ref.flowside, PIF_NONE, 0) < 0) {
+			flow_err(uflow, "Unrecoverable error on flow socket");
+			goto fail;
+		}
 	}
 
-	if ((n = udp_sock_recv(c, from_s, events, udp_mh_recv)) <= 0)
-		return;
+	if (events & EPOLLIN) {
+		/* For not entirely clear reasons (data locality?) pasta gets
+		 * better throughput if we receive tap datagrams one at a
+		 * time.  For small splice datagrams throughput is slightly
+		 * better if we do batch, but it's slightly worse for large
+		 * splice datagrams.  Since we don't know the size before we
+		 * receive, always go one at a time for pasta mode.
+		 */
+		size_t n = (c->mode == MODE_PASTA ? 1 : UDP_MAX_FRAMES);
+		flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside);
+		uint8_t topif = pif_at_sidx(tosidx);
+		int s = ref.fd;
 
-	flow_trace(uflow, "Received %d datagrams on reply socket", n);
-	uflow->ts = now->tv_sec;
+		flow_trace(uflow, "Received data on reply socket");
+		uflow->ts = now->tv_sec;
 
-	for (i = 0; i < n; i++) {
-		if (pif_is_socket(topif))
-			udp_splice_prepare(udp_mh_recv, i);
-		else if (topif == PIF_TAP)
-			udp_tap_prepare(udp_mh_recv, i, toside, false);
-		/* Restore sockaddr length clobbered by recvmsg() */
-		udp_mh_recv[i].msg_hdr.msg_namelen = sizeof(udp_meta[i].s_in);
+		if (pif_is_socket(topif)) {
+			udp_sock_to_sock(c, ref.fd, n, tosidx);
+		} else if (topif == PIF_TAP) {
+			if (c->mode == MODE_VU) {
+				udp_vu_sock_to_tap(c, s, UDP_MAX_FRAMES,
+						   tosidx);
+			} else {
+				udp_buf_sock_to_tap(c, s, n, tosidx);
+			}
+		} else {
+			flow_err(uflow,
+				 "No support for forwarding UDP from %s to %s",
+				 pif_name(pif_at_sidx(ref.flowside)),
+				 pif_name(topif));
+			goto fail;
+		}
 	}
+	return;
 
-	if (pif_is_socket(topif)) {
-		udp_splice_send(c, 0, n, tosidx);
-	} else if (topif == PIF_TAP) {
-		tap_send_frames(c, &udp_l2_iov[0][0], UDP_NUM_IOVS, n);
-	} else {
-		uint8_t frompif = pif_at_sidx(ref.flowside);
-
-		flow_err(uflow, "No support for forwarding UDP from %s to %s",
-			 pif_name(frompif), pif_name(topif));
-	}
-}
-
-/**
- * udp_reply_sock_handler() - Handle new data from flow specific socket
- * @c:		Execution context
- * @ref:	epoll reference
- * @events:	epoll events bitmap
- * @now:	Current timestamp
- */
-void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
-			    uint32_t events, const struct timespec *now)
-{
-	if (c->mode == MODE_VU) {
-		udp_vu_reply_sock_handler(c, ref, events, now);
-		return;
-	}
-
-	udp_buf_reply_sock_handler(c, ref, events, now);
+fail:
+	flow_err_details(uflow);
+	udp_flow_close(c, uflow);
 }
 
 /**
@@ -720,6 +960,7 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
  * @af:		Address family, AF_INET or AF_INET6
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @ttl:	TTL or hop limit for packets to be sent in this call
  * @p:		Pool of UDP packets, with UDP headers
  * @idx:	Index of first packet to process
  * @now:	Current timestamp
@@ -730,7 +971,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
  */
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now)
+		    uint8_t ttl, const struct pool *p, int idx,
+		    const struct timespec *now)
 {
 	const struct flowside *toside;
 	struct mmsghdr mm[UIO_MAXIOV];
@@ -778,7 +1020,7 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 	}
 	toside = flowside_at_sidx(tosidx);
 
-	s = udp_at_sidx(tosidx)->s[tosidx.sidei];
+	s = uflow->s[tosidx.sidei];
 	ASSERT(s >= 0);
 
 	pif_sockaddr(c, &to_sa, &sl, topif, &toside->eaddr, toside->eport);
@@ -809,6 +1051,24 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		mm[i].msg_hdr.msg_controllen = 0;
 		mm[i].msg_hdr.msg_flags = 0;
 
+		if (ttl != uflow->ttl[tosidx.sidei]) {
+			uflow->ttl[tosidx.sidei] = ttl;
+			if (af == AF_INET) {
+				if (setsockopt(s, IPPROTO_IP, IP_TTL,
+					       &ttl, sizeof(ttl)) < 0)
+					flow_perror(uflow,
+						    "setsockopt IP_TTL");
+			} else {
+				/* IPv6 hop_limit cannot be only 1 byte */
+				int hop_limit = ttl;
+
+				if (setsockopt(s, SOL_IPV6, IPV6_UNICAST_HOPS,
+					       &hop_limit, sizeof(hop_limit)) < 0)
+					flow_perror(uflow,
+						    "setsockopt IPV6_UNICAST_HOPS");
+			}
+		}
+
 		count++;
 	}
 
diff --git a/udp.h b/udp.h
index de2df6d..8f8531a 100644
--- a/udp.h
+++ b/udp.h
@@ -11,11 +11,12 @@
 void udp_portmap_clear(void);
 void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
 			     uint32_t events, const struct timespec *now);
-void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
-			    uint32_t events, const struct timespec *now);
+void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
+		      uint32_t events, const struct timespec *now);
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now);
+		    uint8_t ttl, const struct pool *p, int idx,
+		    const struct timespec *now);
 int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
 		  const char *ifname, in_port_t port);
 int udp_init(struct ctx *c);
diff --git a/udp_flow.c b/udp_flow.c
index 83c2568..4c6b3c2 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -9,10 +9,12 @@
 #include <fcntl.h>
 #include <sys/uio.h>
 #include <unistd.h>
+#include <netinet/udp.h>
 
 #include "util.h"
 #include "passt.h"
 #include "flow_table.h"
+#include "udp_internal.h"
 
 #define UDP_CONN_TIMEOUT	180 /* s, timeout for ephemeral or local bind */
 
@@ -41,121 +43,145 @@ struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
  */
 void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
 {
+	unsigned sidei;
+
 	if (uflow->closed)
 		return; /* Nothing to do */
 
-	if (uflow->s[INISIDE] >= 0) {
-		/* The listening socket needs to stay in epoll */
-		close(uflow->s[INISIDE]);
-		uflow->s[INISIDE] = -1;
+	flow_foreach_sidei(sidei) {
+		flow_hash_remove(c, FLOW_SIDX(uflow, sidei));
+		if (uflow->s[sidei] >= 0) {
+			epoll_del(c, uflow->s[sidei]);
+			close(uflow->s[sidei]);
+			uflow->s[sidei] = -1;
+		}
 	}
 
-	if (uflow->s[TGTSIDE] >= 0) {
-		/* But the flow specific one needs to be removed */
-		epoll_del(c, uflow->s[TGTSIDE]);
-		close(uflow->s[TGTSIDE]);
-		uflow->s[TGTSIDE] = -1;
-	}
-	flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
-	if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
-		flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));
-
 	uflow->closed = true;
 }
 
+/**
+ * udp_flow_sock() - Create, bind and connect a flow specific UDP socket
+ * @c:		Execution context
+ * @uflow:	UDP flow to open socket for
+ * @sidei:	Side of @uflow to open socket for
+ *
+ * Return: fd of new socket on success, -ve error code on failure
+ */
+static int udp_flow_sock(const struct ctx *c,
+			 struct udp_flow *uflow, unsigned sidei)
+{
+	const struct flowside *side = &uflow->f.side[sidei];
+	uint8_t pif = uflow->f.pif[sidei];
+	union {
+		flow_sidx_t sidx;
+		uint32_t data;
+	} fref = { .sidx = FLOW_SIDX(uflow, sidei) };
+	int s;
+
+	s = flowside_sock_l4(c, EPOLL_TYPE_UDP, pif, side, fref.data);
+	if (s < 0) {
+		flow_dbg_perror(uflow, "Couldn't open flow specific socket");
+		return s;
+	}
+
+	if (flowside_connect(c, s, pif, side) < 0) {
+		int rc = -errno;
+
+		epoll_del(c, s);
+		close(s);
+
+		flow_dbg_perror(uflow, "Couldn't connect flow socket");
+		return rc;
+	}
+
+	/* It's possible, if unlikely, that we could receive some packets in
+	 * between the bind() and connect() which may or may not be for this
+	 * flow.  Being UDP we could just discard them, but it's not ideal.
+	 *
+	 * There's also a tricky case if a bunch of datagrams for a new flow
+	 * arrive in rapid succession, the first going to the original listening
+	 * socket and later ones going to this new socket.  If we forwarded the
+	 * datagrams from the new socket immediately here they would go before
+	 * the datagram which established the flow.  Again, not strictly wrong
+	 * for UDP, but not ideal.
+	 *
+	 * So, we flag that the new socket is in a transient state where it
+	 * might have datagrams for a different flow queued.  Before the next
+	 * epoll cycle, udp_flow_defer() will flush out any such datagrams, and
+	 * thereafter everything on the new socket should be strictly for this
+	 * flow.
+	 */
+	if (sidei)
+		uflow->flush1 = true;
+	else
+		uflow->flush0 = true;
+
+	return s;
+}
+
 /**
  * udp_flow_new() - Common setup for a new UDP flow
  * @c:		Execution context
  * @flow:	Initiated flow
- * @s_ini:	Initiating socket (or -1)
  * @now:	Timestamp
  *
  * Return: UDP specific flow, if successful, NULL on failure
+ *
+ * #syscalls getsockname
  */
 static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
-				int s_ini, const struct timespec *now)
+				const struct timespec *now)
 {
 	struct udp_flow *uflow = NULL;
 	const struct flowside *tgt;
-	uint8_t tgtpif;
+	unsigned sidei;
 
 	if (!(tgt = flow_target(c, flow, IPPROTO_UDP)))
 		goto cancel;
-	tgtpif = flow->f.pif[TGTSIDE];
 
 	uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
 	uflow->ts = now->tv_sec;
 	uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
+	uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0;
 
-	if (s_ini >= 0) {
-		/* When using auto port-scanning the listening port could go
-		 * away, so we need to duplicate the socket
-		 */
-		uflow->s[INISIDE] = fcntl(s_ini, F_DUPFD_CLOEXEC, 0);
-		if (uflow->s[INISIDE] < 0) {
-			flow_err(uflow,
-				 "Couldn't duplicate listening socket: %s",
-				 strerror_(errno));
+	flow_foreach_sidei(sidei) {
+		if (pif_is_socket(uflow->f.pif[sidei]))
+			if ((uflow->s[sidei] = udp_flow_sock(c, uflow, sidei)) < 0)
+				goto cancel;
+	}
+
+	if (uflow->s[TGTSIDE] >= 0 && inany_is_unspecified(&tgt->oaddr)) {
+		/* When we target a socket, we connect() it, but might not
+		 * always bind(), leaving the kernel to pick our address.  In
+		 * that case connect() will implicitly bind() the socket, but we
+		 * need to determine its local address so that we can match
+		 * reply packets back to the correct flow.  Update the flow with
+		 * the information from getsockname() */
+		union sockaddr_inany sa;
+		socklen_t sl = sizeof(sa);
+		in_port_t port;
+
+		if (getsockname(uflow->s[TGTSIDE], &sa.sa, &sl) < 0 ||
+		    inany_from_sockaddr(&uflow->f.side[TGTSIDE].oaddr,
+					&port, &sa) < 0) {
+			flow_perror(uflow, "Unable to determine local address");
+			goto cancel;
+		}
+		if (port != tgt->oport) {
+			flow_err(uflow, "Unexpected local port");
 			goto cancel;
 		}
 	}
 
-	if (pif_is_socket(tgtpif)) {
-		struct mmsghdr discard[UIO_MAXIOV] = { 0 };
-		union {
-			flow_sidx_t sidx;
-			uint32_t data;
-		} fref = {
-			.sidx = FLOW_SIDX(flow, TGTSIDE),
-		};
-		int rc;
-
-		uflow->s[TGTSIDE] = flowside_sock_l4(c, EPOLL_TYPE_UDP_REPLY,
-						     tgtpif, tgt, fref.data);
-		if (uflow->s[TGTSIDE] < 0) {
-			flow_dbg(uflow,
-				 "Couldn't open socket for spliced flow: %s",
-				 strerror_(errno));
-			goto cancel;
-		}
-
-		if (flowside_connect(c, uflow->s[TGTSIDE], tgtpif, tgt) < 0) {
-			flow_dbg(uflow,
-				 "Couldn't connect flow socket: %s",
-				 strerror_(errno));
-			goto cancel;
-		}
-
-		/* It's possible, if unlikely, that we could receive some
-		 * unrelated packets in between the bind() and connect() of this
-		 * socket.  For now we just discard these.  We could consider
-		 * trying to redirect these to an appropriate handler, if we
-		 * need to.
-		 */
-		rc = recvmmsg(uflow->s[TGTSIDE], discard, ARRAY_SIZE(discard),
-			      MSG_DONTWAIT, NULL);
-		if (rc >= ARRAY_SIZE(discard)) {
-			flow_dbg(uflow,
-				 "Too many (%d) spurious reply datagrams", rc);
-			goto cancel;
-		} else if (rc > 0) {
-			flow_trace(uflow,
-				   "Discarded %d spurious reply datagrams", rc);
-		} else if (errno != EAGAIN) {
-			flow_err(uflow,
-				 "Unexpected error discarding datagrams: %s",
-				 strerror_(errno));
-		}
-	}
-
-	flow_hash_insert(c, FLOW_SIDX(uflow, INISIDE));
-
-	/* If the target side is a socket, it will be a reply socket that knows
-	 * its own flowside.  But if it's tap, then we need to look it up by
-	 * hash.
+	/* Tap sides always need to be looked up by hash.  Socket sides don't
+	 * always, but sometimes do (receiving packets on a socket not specific
+	 * to one flow).  Unconditionally hash both sides so all our bases are
+	 * covered
 	 */
-	if (!pif_is_socket(tgtpif))
-		flow_hash_insert(c, FLOW_SIDX(uflow, TGTSIDE));
+	flow_foreach_sidei(sidei)
+		flow_hash_insert(c, FLOW_SIDX(uflow, sidei));
+
 	FLOW_ACTIVATE(uflow);
 
 	return FLOW_SIDX(uflow, TGTSIDE);
@@ -168,9 +194,11 @@ cancel:
 }
 
 /**
- * udp_flow_from_sock() - Find or create UDP flow for "listening" socket
+ * udp_flow_from_sock() - Find or create UDP flow for incoming datagram
  * @c:		Execution context
- * @ref:	epoll reference of the receiving socket
+ * @pif:	Interface the datagram is arriving from
+ * @dst:	Our (local) address to which the datagram is arriving
+ * @port:	Our (local) port number to which the datagram is arriving
  * @s_in:	Source socket address, filled in by recvmmsg()
  * @now:	Timestamp
  *
@@ -179,7 +207,8 @@ cancel:
  * Return: sidx for the destination side of the flow for this packet, or
  *         FLOW_SIDX_NONE if we couldn't find or create a flow.
  */
-flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
+flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
+			       const union inany_addr *dst, in_port_t port,
 			       const union sockaddr_inany *s_in,
 			       const struct timespec *now)
 {
@@ -188,9 +217,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
 	union flow *flow;
 	flow_sidx_t sidx;
 
-	ASSERT(ref.type == EPOLL_TYPE_UDP_LISTEN);
-
-	sidx = flow_lookup_sa(c, IPPROTO_UDP, ref.udp.pif, s_in, ref.udp.port);
+	sidx = flow_lookup_sa(c, IPPROTO_UDP, pif, s_in, dst, port);
 	if ((uflow = udp_at_sidx(sidx))) {
 		uflow->ts = now->tv_sec;
 		return flow_sidx_opposite(sidx);
@@ -200,12 +227,11 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
 		char sastr[SOCKADDR_STRLEN];
 
 		debug("Couldn't allocate flow for UDP datagram from %s %s",
-		      pif_name(ref.udp.pif),
-		      sockaddr_ntop(s_in, sastr, sizeof(sastr)));
+		      pif_name(pif), sockaddr_ntop(s_in, sastr, sizeof(sastr)));
 		return FLOW_SIDX_NONE;
 	}
 
-	ini = flow_initiate_sa(flow, ref.udp.pif, s_in, ref.udp.port);
+	ini = flow_initiate_sa(flow, pif, s_in, dst, port);
 
 	if (!inany_is_unicast(&ini->eaddr) ||
 	    ini->eport == 0 || ini->oport == 0) {
@@ -218,7 +244,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
 		return FLOW_SIDX_NONE;
 	}
 
-	return udp_flow_new(c, flow, ref.fd, now);
+	return udp_flow_new(c, flow, now);
 }
 
 /**
@@ -274,17 +300,45 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 		return FLOW_SIDX_NONE;
 	}
 
-	return udp_flow_new(c, flow, -1, now);
+	return udp_flow_new(c, flow, now);
+}
+
+/**
+ * udp_flush_flow() - Flush datagrams that might not be for this flow
+ * @c:		Execution context
+ * @uflow:	Flow to handle
+ * @sidei:	Side of the flow to flush
+ * @now:	Current timestamp
+ */
+static void udp_flush_flow(const struct ctx *c,
+			   const struct udp_flow *uflow, unsigned sidei,
+			   const struct timespec *now)
+{
+	/* We don't know exactly where the datagrams will come from, but we know
+	 * they'll have an interface and oport matching this flow */
+	udp_sock_fwd(c, uflow->s[sidei], uflow->f.pif[sidei],
+		     uflow->f.side[sidei].oport, now);
 }
 
 /**
  * udp_flow_defer() - Deferred per-flow handling (clean up aborted flows)
+ * @c:		Execution context
  * @uflow:	Flow to handle
+ * @now:	Current timestamp
  *
  * Return: true if the connection is ready to free, false otherwise
  */
-bool udp_flow_defer(const struct udp_flow *uflow)
+bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow,
+		    const struct timespec *now)
 {
+	if (uflow->flush0) {
+		udp_flush_flow(c, uflow, INISIDE, now);
+		uflow->flush0 = false;
+	}
+	if (uflow->flush1) {
+		udp_flush_flow(c, uflow, TGTSIDE, now);
+		uflow->flush1 = false;
+	}
 	return uflow->closed;
 }
 
diff --git a/udp_flow.h b/udp_flow.h
index 9a1b059..4c528e9 100644
--- a/udp_flow.h
+++ b/udp_flow.h
@@ -8,9 +8,12 @@
 #define UDP_FLOW_H
 
 /**
- * struct udp - Descriptor for a flow of UDP packets
+ * struct udp_flow - Descriptor for a flow of UDP packets
  * @f:		Generic flow information
+ * @ttl:	TTL or hop_limit for both sides
  * @closed:	Flow is already closed
+ * @flush0:	@s[0] may have datagrams queued for other flows
+ * @flush1:	@s[1] may have datagrams queued for other flows
  * @ts:		Activity timestamp
  * @s:		Socket fd (or -1) for each side of the flow
  */
@@ -18,13 +21,19 @@ struct udp_flow {
 	/* Must be first element */
 	struct flow_common f;
 
-	bool closed :1;
+	uint8_t ttl[SIDES];
+
+	bool	closed	:1,
+		flush0	:1,
+		flush1	:1;
+
 	time_t ts;
 	int s[SIDES];
 };
 
 struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
-flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
+flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
+			       const union inany_addr *dst, in_port_t port,
 			       const union sockaddr_inany *s_in,
 			       const struct timespec *now);
 flow_sidx_t udp_flow_from_tap(const struct ctx *c,
@@ -33,7 +42,8 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 			      in_port_t srcport, in_port_t dstport,
 			      const struct timespec *now);
 void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
-bool udp_flow_defer(const struct udp_flow *uflow);
+bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow,
+		    const struct timespec *now);
 bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
 		    const struct timespec *now);
 
diff --git a/udp_internal.h b/udp_internal.h
index cc80e30..96d11cf 100644
--- a/udp_internal.h
+++ b/udp_internal.h
@@ -8,8 +8,6 @@
 
 #include "tap.h" /* needed by udp_meta_t */
 
-#define UDP_MAX_FRAMES		32  /* max # of frames to receive at once */
-
 /**
  * struct udp_payload_t - UDP header and data for inbound messages
  * @uh:		UDP header
@@ -30,5 +28,7 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
 size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
                        const struct flowside *toside, size_t dlen,
 		       bool no_udp_csum);
-int udp_sock_errs(const struct ctx *c, int s, uint32_t events);
+void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
+		  in_port_t port, const struct timespec *now);
+
 #endif /* UDP_INTERNAL_H */
diff --git a/udp_vu.c b/udp_vu.c
index 4123510..1f89509 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -57,35 +57,16 @@ static size_t udp_vu_hdrlen(bool v6)
 	return hdrlen;
 }
 
-/**
- * udp_vu_sock_info() - get socket information
- * @s:		Socket to get information from
- * @s_in:	Socket address (output)
- *
- * Return: 0 if socket address can be read, -1 otherwise
- */
-static int udp_vu_sock_info(int s, union sockaddr_inany *s_in)
-{
-	struct msghdr msg = {
-		.msg_name = s_in,
-		.msg_namelen = sizeof(union sockaddr_inany),
-	};
-
-	return recvmsg(s, &msg, MSG_PEEK | MSG_DONTWAIT);
-}
-
 /**
  * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
  * @c:		Execution context
  * @s:		Socket to receive from
- * @events:	epoll events bitmap
  * @v6:		Set for IPv6 connections
  * @dlen:	Size of received data (output)
  *
  * Return: Number of iov entries used to store the datagram
  */
-static int udp_vu_sock_recv(const struct ctx *c, int s, uint32_t events,
-			    bool v6, ssize_t *dlen)
+static int udp_vu_sock_recv(const struct ctx *c, int s, bool v6, ssize_t *dlen)
 {
 	struct vu_dev *vdev = c->vdev;
 	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
@@ -95,9 +76,6 @@ static int udp_vu_sock_recv(const struct ctx *c, int s, uint32_t events,
 
 	ASSERT(!c->no_udp);
 
-	if (!(events & EPOLLIN))
-		return 0;
-
 	/* compute L2 header length */
 	hdrlen = udp_vu_hdrlen(v6);
 
@@ -214,125 +192,27 @@ static void udp_vu_csum(const struct flowside *toside, int iov_used)
 }
 
 /**
- * udp_vu_listen_sock_handler() - Handle new data from socket
+ * udp_vu_sock_to_tap() - Forward datagrams from socket to tap
  * @c:		Execution context
- * @ref:	epoll reference
- * @events:	epoll events bitmap
- * @now:	Current timestamp
+ * @s:		Socket to read data from
+ * @n:		Maximum number of datagrams to forward
+ * @tosidx:	Flow & side to forward data from @s to
  */
-void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
-				uint32_t events, const struct timespec *now)
+void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
 {
-	struct vu_dev *vdev = c->vdev;
-	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
-	int i;
-
-	if (udp_sock_errs(c, ref.fd, events) < 0) {
-		err("UDP: Unrecoverable error on listening socket:"
-		    " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
-		return;
-	}
-
-	for (i = 0; i < UDP_MAX_FRAMES; i++) {
-		const struct flowside *toside;
-		union sockaddr_inany s_in;
-		flow_sidx_t sidx;
-		uint8_t pif;
-		ssize_t dlen;
-		int iov_used;
-		bool v6;
-
-		if (udp_vu_sock_info(ref.fd, &s_in) < 0)
-			break;
-
-		sidx = udp_flow_from_sock(c, ref, &s_in, now);
-		pif = pif_at_sidx(sidx);
-
-		if (pif != PIF_TAP) {
-			if (flow_sidx_valid(sidx)) {
-				flow_sidx_t fromsidx = flow_sidx_opposite(sidx);
-				struct udp_flow *uflow = udp_at_sidx(sidx);
-
-				flow_err(uflow,
-					"No support for forwarding UDP from %s to %s",
-					pif_name(pif_at_sidx(fromsidx)),
-					pif_name(pif));
-			} else {
-				debug("Discarding 1 datagram without flow");
-			}
-
-			continue;
-		}
-
-		toside = flowside_at_sidx(sidx);
-
-		v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
-
-		iov_used = udp_vu_sock_recv(c, ref.fd, events, v6, &dlen);
-		if (iov_used <= 0)
-			break;
-
-		udp_vu_prepare(c, toside, dlen);
-		if (*c->pcap) {
-			udp_vu_csum(toside, iov_used);
-			pcap_iov(iov_vu, iov_used,
-				 sizeof(struct virtio_net_hdr_mrg_rxbuf));
-		}
-		vu_flush(vdev, vq, elem, iov_used);
-	}
-}
-
-/**
- * udp_vu_reply_sock_handler() - Handle new data from flow specific socket
- * @c:		Execution context
- * @ref:	epoll reference
- * @events:	epoll events bitmap
- * @now:	Current timestamp
- */
-void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
-			        uint32_t events, const struct timespec *now)
-{
-	flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside);
 	const struct flowside *toside = flowside_at_sidx(tosidx);
-	struct udp_flow *uflow = udp_at_sidx(ref.flowside);
-	int from_s = uflow->s[ref.flowside.sidei];
+	bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
 	struct vu_dev *vdev = c->vdev;
 	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
 	int i;
 
-	ASSERT(!c->no_udp);
-
-	if (udp_sock_errs(c, from_s, events) < 0) {
-		flow_err(uflow, "Unrecoverable error on reply socket");
-		flow_err_details(uflow);
-		udp_flow_close(c, uflow);
-		return;
-	}
-
-	for (i = 0; i < UDP_MAX_FRAMES; i++) {
-		uint8_t topif = pif_at_sidx(tosidx);
+	for (i = 0; i < n; i++) {
 		ssize_t dlen;
 		int iov_used;
-		bool v6;
 
-		ASSERT(uflow);
-
-		if (topif != PIF_TAP) {
-			uint8_t frompif = pif_at_sidx(ref.flowside);
-
-			flow_err(uflow,
-				 "No support for forwarding UDP from %s to %s",
-				 pif_name(frompif), pif_name(topif));
-			continue;
-		}
-
-		v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
-
-		iov_used = udp_vu_sock_recv(c, from_s, events, v6, &dlen);
+		iov_used = udp_vu_sock_recv(c, s, v6, &dlen);
 		if (iov_used <= 0)
 			break;
-		flow_trace(uflow, "Received 1 datagram on reply socket");
-		uflow->ts = now->tv_sec;
 
 		udp_vu_prepare(c, toside, dlen);
 		if (*c->pcap) {
diff --git a/udp_vu.h b/udp_vu.h
index ba7018d..576b0e7 100644
--- a/udp_vu.h
+++ b/udp_vu.h
@@ -6,8 +6,8 @@
 #ifndef UDP_VU_H
 #define UDP_VU_H
 
-void udp_vu_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
-				uint32_t events, const struct timespec *now);
-void udp_vu_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
-			       uint32_t events, const struct timespec *now);
+void udp_vu_listen_sock_data(const struct ctx *c, union epoll_ref ref,
+			     const struct timespec *now);
+void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx);
+
 #endif /* UDP_VU_H */
diff --git a/util.c b/util.c
index 656e86a..f5497d4 100644
--- a/util.c
+++ b/util.c
@@ -71,7 +71,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
 	case EPOLL_TYPE_UDP_LISTEN:
 		freebind = c->freebind;
 		/* fallthrough */
-	case EPOLL_TYPE_UDP_REPLY:
+	case EPOLL_TYPE_UDP:
 		proto = IPPROTO_UDP;
 		socktype = SOCK_DGRAM | SOCK_NONBLOCK;
 		break;
@@ -109,11 +109,15 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
 		debug("Failed to set SO_REUSEADDR on socket %i", fd);
 
 	if (proto == IPPROTO_UDP) {
+		int pktinfo = af == AF_INET ? IP_PKTINFO : IPV6_RECVPKTINFO;
+		int recverr = af == AF_INET ? IP_RECVERR : IPV6_RECVERR;
 		int level = af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
-		int opt = af == AF_INET ? IP_RECVERR : IPV6_RECVERR;
 
-		if (setsockopt(fd, level, opt, &y, sizeof(y)))
+		if (setsockopt(fd, level, recverr, &y, sizeof(y)))
 			die_perror("Failed to set RECVERR on socket %i", fd);
+
+		if (setsockopt(fd, level, pktinfo, &y, sizeof(y)))
+			die_perror("Failed to set PKTINFO on socket %i", fd);
 	}
 
 	if (ifname && *ifname) {
@@ -871,7 +875,9 @@ void close_open_files(int argc, char **argv)
 			errno = 0;
 			fd = strtol(optarg, NULL, 0);
 
-			if (errno || fd <= STDERR_FILENO || fd > INT_MAX)
+			if (errno ||
+			    (fd != STDIN_FILENO && fd <= STDERR_FILENO) ||
+			    fd > INT_MAX)
 				die("Invalid --fd: %s", optarg);
 		}
 	} while (name != -1);
@@ -1017,3 +1023,22 @@ void encode_domain_name(char *buf, const char *domain_name)
 	}
 	p[i] = 0L;
 }
+
+/**
+ * abort_with_msg() - Print error message and abort
+ * @fmt:	Format string
+ * @...:	Format parameters
+ */
+void abort_with_msg(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vlogmsg(true, false, LOG_CRIT, fmt, ap);
+	va_end(ap);
+
+	/* This may actually cause a SIGSYS instead of SIGABRT, due to seccomp,
+	 * but that will still get the job done.
+	 */
+	abort();
+}
diff --git a/util.h b/util.h
index 50e96d3..5947337 100644
--- a/util.h
+++ b/util.h
@@ -31,18 +31,9 @@
 #ifndef SECCOMP_RET_KILL_PROCESS
 #define SECCOMP_RET_KILL_PROCESS	SECCOMP_RET_KILL
 #endif
-#ifndef ETH_MAX_MTU
-#define ETH_MAX_MTU			USHRT_MAX
-#endif
-#ifndef ETH_MIN_MTU
-#define ETH_MIN_MTU			68
-#endif
 #ifndef IP_MAX_MTU
 #define IP_MAX_MTU			USHRT_MAX
 #endif
-#ifndef IPV6_MIN_MTU
-#define IPV6_MIN_MTU			1280
-#endif
 
 #ifndef MIN
 #define MIN(x, y)		(((x) < (y)) ? (x) : (y))
@@ -70,27 +61,22 @@
 #define STRINGIFY(x)	#x
 #define STR(x)		STRINGIFY(x)
 
-#ifdef CPPCHECK_6936
+void abort_with_msg(const char *fmt, ...)
+	__attribute__((format(printf, 1, 2), noreturn));
+
 /* Some cppcheck versions get confused by aborts inside a loop, causing
  * it to give false positive uninitialised variable warnings later in
  * the function, because it doesn't realise the non-initialising path
  * already exited.  See https://trac.cppcheck.net/ticket/13227
+ *
+ * Therefore, avoid using the usual do while wrapper we use to force the macro
+ * to act like a single statement requiring a ';'.
  */
-#define ASSERT(expr)		\
-	((expr) ? (void)0 : abort())
-#else
+#define ASSERT_WITH_MSG(expr, ...)					\
+	((expr) ? (void)0 : abort_with_msg(__VA_ARGS__))
 #define ASSERT(expr)							\
-	do {								\
-		if (!(expr)) {						\
-			err("ASSERTION FAILED in %s (%s:%d): %s",	\
-			    __func__, __FILE__, __LINE__, STRINGIFY(expr)); \
-			/* This may actually SIGSYS, due to seccomp,	\
-			 * but that will still get the job done		\
-			 */						\
-			abort();					\
-		}							\
-	} while (0)
-#endif
+	ASSERT_WITH_MSG((expr), "ASSERTION FAILED in %s (%s:%d): %s",	\
+			__func__, __FILE__, __LINE__, STRINGIFY(expr))
 
 #ifdef P_tmpdir
 #define TMPDIR		P_tmpdir
@@ -385,6 +371,16 @@ static inline int wrap_accept4(int sockfd, struct sockaddr *addr,
 #define accept4(s, addr, addrlen, flags) \
 	wrap_accept4((s), (addr), (addrlen), (flags))
 
+static inline int wrap_getsockname(int sockfd, struct sockaddr *addr,
+/* cppcheck-suppress constParameterPointer */
+				   socklen_t *addrlen)
+{
+	sa_init(addr, addrlen);
+	return getsockname(sockfd, addr, addrlen);
+}
+#define getsockname(s, addr, addrlen) \
+	wrap_getsockname((s), (addr), (addrlen))
+
 #define PASST_MAXDNAME 254 /* 253 (RFC 1035) + 1 (the terminator) */
 void encode_domain_name(char *buf, const char *domain_name);
 
diff --git a/vhost_user.c b/vhost_user.c
index be1aa94..e8377bb 100644
--- a/vhost_user.c
+++ b/vhost_user.c
@@ -302,13 +302,13 @@ static void vu_message_write(int conn_fd, struct vhost_user_msg *vmsg)
  * @conn_fd:	vhost-user command socket
  * @vmsg:	vhost-user message
  */
-static void vu_send_reply(int conn_fd, struct vhost_user_msg *msg)
+static void vu_send_reply(int conn_fd, struct vhost_user_msg *vmsg)
 {
-	msg->hdr.flags &= ~VHOST_USER_VERSION_MASK;
-	msg->hdr.flags |= VHOST_USER_VERSION;
-	msg->hdr.flags |= VHOST_USER_REPLY_MASK;
+	vmsg->hdr.flags &= ~VHOST_USER_VERSION_MASK;
+	vmsg->hdr.flags |= VHOST_USER_VERSION;
+	vmsg->hdr.flags |= VHOST_USER_REPLY_MASK;
 
-	vu_message_write(conn_fd, msg);
+	vu_message_write(conn_fd, vmsg);
 }
 
 /**
@@ -319,7 +319,7 @@ static void vu_send_reply(int conn_fd, struct vhost_user_msg *msg)
  * Return: True as a reply is requested
  */
 static bool vu_get_features_exec(struct vu_dev *vdev,
-				 struct vhost_user_msg *msg)
+				 struct vhost_user_msg *vmsg)
 {
 	uint64_t features =
 		1ULL << VIRTIO_F_VERSION_1 |
@@ -329,9 +329,9 @@ static bool vu_get_features_exec(struct vu_dev *vdev,
 
 	(void)vdev;
 
-	vmsg_set_reply_u64(msg, features);
+	vmsg_set_reply_u64(vmsg, features);
 
-	debug("Sending back to guest u64: 0x%016"PRIx64, msg->payload.u64);
+	debug("Sending back to guest u64: 0x%016"PRIx64, vmsg->payload.u64);
 
 	return true;
 }
@@ -357,11 +357,11 @@ static void vu_set_enable_all_rings(struct vu_dev *vdev, bool enable)
  * Return: False as no reply is requested
  */
 static bool vu_set_features_exec(struct vu_dev *vdev,
-				 struct vhost_user_msg *msg)
+				 struct vhost_user_msg *vmsg)
 {
-	debug("u64: 0x%016"PRIx64, msg->payload.u64);
+	debug("u64: 0x%016"PRIx64, vmsg->payload.u64);
 
-	vdev->features = msg->payload.u64;
+	vdev->features = vmsg->payload.u64;
 	/* We only support devices conforming to VIRTIO 1.0 or
 	 * later
 	 */
@@ -382,10 +382,10 @@ static bool vu_set_features_exec(struct vu_dev *vdev,
  * Return: False as no reply is requested
  */
 static bool vu_set_owner_exec(struct vu_dev *vdev,
-			      struct vhost_user_msg *msg)
+			      struct vhost_user_msg *vmsg)
 {
 	(void)vdev;
-	(void)msg;
+	(void)vmsg;
 
 	return false;
 }
@@ -423,9 +423,9 @@ static bool map_ring(struct vu_dev *vdev, struct vu_virtq *vq)
  * #syscalls:vu mmap|mmap2 munmap
  */
 static bool vu_set_mem_table_exec(struct vu_dev *vdev,
-				  struct vhost_user_msg *msg)
+				  struct vhost_user_msg *vmsg)
 {
-	struct vhost_user_memory m = msg->payload.memory, *memory = &m;
+	struct vhost_user_memory m = vmsg->payload.memory, *memory = &m;
 	unsigned int i;
 
 	for (i = 0; i < vdev->nregions; i++) {
@@ -465,7 +465,7 @@ static bool vu_set_mem_table_exec(struct vu_dev *vdev,
 		 */
 		mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
 				 PROT_READ | PROT_WRITE, MAP_SHARED |
-				 MAP_NORESERVE, msg->fds[i], 0);
+				 MAP_NORESERVE, vmsg->fds[i], 0);
 
 		if (mmap_addr == MAP_FAILED)
 			die_perror("vhost-user region mmap error");
@@ -474,7 +474,7 @@ static bool vu_set_mem_table_exec(struct vu_dev *vdev,
 		debug("    mmap_addr:       0x%016"PRIx64,
 		      dev_region->mmap_addr);
 
-		close(msg->fds[i]);
+		close(vmsg->fds[i]);
 	}
 
 	for (i = 0; i < VHOST_USER_MAX_QUEUES; i++) {
@@ -517,7 +517,7 @@ static void vu_close_log(struct vu_dev *vdev)
  * vu_log_kick() - Inform the front-end that the log has been modified
  * @vdev:	vhost-user device
  */
-void vu_log_kick(const struct vu_dev *vdev)
+static void vu_log_kick(const struct vu_dev *vdev)
 {
 	if (vdev->log_call_fd != -1) {
 		int rc;
@@ -541,7 +541,7 @@ static void vu_log_page(uint8_t *log_table, uint64_t page)
 
 /**
  * vu_log_write() - Log memory write
- * @dev:	vhost-user device
+ * @vdev:	vhost-user device
  * @address:	Memory address
  * @length:	Memory size
  */
@@ -566,23 +566,23 @@ void vu_log_write(const struct vu_dev *vdev, uint64_t address, uint64_t length)
  * @vdev:	vhost-user device
  * @vmsg:	vhost-user message
  *
- * Return: False as no reply is requested
+ * Return: True as a reply is requested
  *
  * #syscalls:vu mmap|mmap2 munmap
  */
 static bool vu_set_log_base_exec(struct vu_dev *vdev,
-				 struct vhost_user_msg *msg)
+				 struct vhost_user_msg *vmsg)
 {
 	uint64_t log_mmap_size, log_mmap_offset;
 	void *base;
 	int fd;
 
-	if (msg->fd_num != 1 || msg->hdr.size != sizeof(msg->payload.log))
+	if (vmsg->fd_num != 1 || vmsg->hdr.size != sizeof(vmsg->payload.log))
 		die("vhost-user: Invalid log_base message");
 
-	fd = msg->fds[0];
-	log_mmap_offset = msg->payload.log.mmap_offset;
-	log_mmap_size = msg->payload.log.mmap_size;
+	fd = vmsg->fds[0];
+	log_mmap_offset = vmsg->payload.log.mmap_offset;
+	log_mmap_size = vmsg->payload.log.mmap_size;
 
 	debug("vhost-user log mmap_offset: %"PRId64, log_mmap_offset);
 	debug("vhost-user log mmap_size:   %"PRId64, log_mmap_size);
@@ -599,8 +599,8 @@ static bool vu_set_log_base_exec(struct vu_dev *vdev,
 	vdev->log_table = base;
 	vdev->log_size = log_mmap_size;
 
-	msg->hdr.size = sizeof(msg->payload.u64);
-	msg->fd_num = 0;
+	vmsg->hdr.size = sizeof(vmsg->payload.u64);
+	vmsg->fd_num = 0;
 
 	return true;
 }
@@ -613,15 +613,15 @@ static bool vu_set_log_base_exec(struct vu_dev *vdev,
  * Return: False as no reply is requested
  */
 static bool vu_set_log_fd_exec(struct vu_dev *vdev,
-			       struct vhost_user_msg *msg)
+			       struct vhost_user_msg *vmsg)
 {
-	if (msg->fd_num != 1)
+	if (vmsg->fd_num != 1)
 		die("Invalid log_fd message");
 
 	if (vdev->log_call_fd != -1)
 		close(vdev->log_call_fd);
 
-	vdev->log_call_fd = msg->fds[0];
+	vdev->log_call_fd = vmsg->fds[0];
 
 	debug("Got log_call_fd: %d", vdev->log_call_fd);
 
@@ -636,10 +636,10 @@ static bool vu_set_log_fd_exec(struct vu_dev *vdev,
  * Return: False as no reply is requested
  */
 static bool vu_set_vring_num_exec(struct vu_dev *vdev,
-				  struct vhost_user_msg *msg)
+				  struct vhost_user_msg *vmsg)
 {
-	unsigned int idx = msg->payload.state.index;
-	unsigned int num = msg->payload.state.num;
+	unsigned int idx = vmsg->payload.state.index;
+	unsigned int num = vmsg->payload.state.num;
 
 	trace("State.index: %u", idx);
 	trace("State.num:   %u", num);
@@ -656,13 +656,13 @@ static bool vu_set_vring_num_exec(struct vu_dev *vdev,
  * Return: False as no reply is requested
  */
 static bool vu_set_vring_addr_exec(struct vu_dev *vdev,
-				   struct vhost_user_msg *msg)
+				   struct vhost_user_msg *vmsg)
 {
 	/* We need to copy the payload to vhost_vring_addr structure
-         * to access index because address of msg->payload.addr
+         * to access index because address of vmsg->payload.addr
          * can be unaligned as it is packed.
          */
-	struct vhost_vring_addr addr = msg->payload.addr;
+	struct vhost_vring_addr addr = vmsg->payload.addr;
 	struct vu_virtq *vq = &vdev->vq[addr.index];
 
 	debug("vhost_vring_addr:");
@@ -677,7 +677,7 @@ static bool vu_set_vring_addr_exec(struct vu_dev *vdev,
 	debug("    log_guest_addr:   0x%016" PRIx64,
 	      (uint64_t)addr.log_guest_addr);
 
-	vq->vra = msg->payload.addr;
+	vq->vra = vmsg->payload.addr;
 	vq->vring.flags = addr.flags;
 	vq->vring.log_guest_addr = addr.log_guest_addr;
 
@@ -702,10 +702,10 @@ static bool vu_set_vring_addr_exec(struct vu_dev *vdev,
  * Return: False as no reply is requested
  */
 static bool vu_set_vring_base_exec(struct vu_dev *vdev,
-				   struct vhost_user_msg *msg)
+				   struct vhost_user_msg *vmsg)
 {
-	unsigned int idx = msg->payload.state.index;
-	unsigned int num = msg->payload.state.num;
+	unsigned int idx = vmsg->payload.state.index;
+	unsigned int num = vmsg->payload.state.num;
 
 	debug("State.index: %u", idx);
 	debug("State.num:   %u", num);
@@ -723,13 +723,13 @@ static bool vu_set_vring_base_exec(struct vu_dev *vdev,
  * Return: True as a reply is requested
  */
 static bool vu_get_vring_base_exec(struct vu_dev *vdev,
-				   struct vhost_user_msg *msg)
+				   struct vhost_user_msg *vmsg)
 {
-	unsigned int idx = msg->payload.state.index;
+	unsigned int idx = vmsg->payload.state.index;
 
 	debug("State.index: %u", idx);
-	msg->payload.state.num = vdev->vq[idx].last_avail_idx;
-	msg->hdr.size = sizeof(msg->payload.state);
+	vmsg->payload.state.num = vdev->vq[idx].last_avail_idx;
+	vmsg->hdr.size = sizeof(vmsg->payload.state);
 
 	vdev->vq[idx].started = false;
 	vdev->vq[idx].vring.avail = 0;
@@ -771,21 +771,21 @@ static void vu_set_watch(const struct vu_dev *vdev, int idx)
  * 			       close fds if NOFD bit is set
  * @vmsg:	vhost-user message
  */
-static void vu_check_queue_msg_file(struct vhost_user_msg *msg)
+static void vu_check_queue_msg_file(struct vhost_user_msg *vmsg)
 {
-	bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
-	int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
+	int idx = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
 
 	if (idx >= VHOST_USER_MAX_QUEUES)
 		die("Invalid vhost-user queue index: %u", idx);
 
 	if (nofd) {
-		vmsg_close_fds(msg);
+		vmsg_close_fds(vmsg);
 		return;
 	}
 
-	if (msg->fd_num != 1)
-		die("Invalid fds in vhost-user request: %d", msg->hdr.request);
+	if (vmsg->fd_num != 1)
+		die("Invalid fds in vhost-user request: %d", vmsg->hdr.request);
 }
 
 /**
@@ -797,14 +797,14 @@ static void vu_check_queue_msg_file(struct vhost_user_msg *msg)
  * Return: False as no reply is requested
  */
 static bool vu_set_vring_kick_exec(struct vu_dev *vdev,
-				   struct vhost_user_msg *msg)
+				   struct vhost_user_msg *vmsg)
 {
-	bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
-	int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
+	int idx = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
 
-	debug("u64: 0x%016"PRIx64, msg->payload.u64);
+	debug("u64: 0x%016"PRIx64, vmsg->payload.u64);
 
-	vu_check_queue_msg_file(msg);
+	vu_check_queue_msg_file(vmsg);
 
 	if (vdev->vq[idx].kick_fd != -1) {
 		epoll_del(vdev->context, vdev->vq[idx].kick_fd);
@@ -813,7 +813,7 @@ static bool vu_set_vring_kick_exec(struct vu_dev *vdev,
 	}
 
 	if (!nofd)
-		vdev->vq[idx].kick_fd = msg->fds[0];
+		vdev->vq[idx].kick_fd = vmsg->fds[0];
 
 	debug("Got kick_fd: %d for vq: %d", vdev->vq[idx].kick_fd, idx);
 
@@ -837,14 +837,14 @@ static bool vu_set_vring_kick_exec(struct vu_dev *vdev,
  * Return: False as no reply is requested
  */
 static bool vu_set_vring_call_exec(struct vu_dev *vdev,
-				   struct vhost_user_msg *msg)
+				   struct vhost_user_msg *vmsg)
 {
-	bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
-	int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
+	int idx = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
 
-	debug("u64: 0x%016"PRIx64, msg->payload.u64);
+	debug("u64: 0x%016"PRIx64, vmsg->payload.u64);
 
-	vu_check_queue_msg_file(msg);
+	vu_check_queue_msg_file(vmsg);
 
 	if (vdev->vq[idx].call_fd != -1) {
 		close(vdev->vq[idx].call_fd);
@@ -852,11 +852,11 @@ static bool vu_set_vring_call_exec(struct vu_dev *vdev,
 	}
 
 	if (!nofd)
-		vdev->vq[idx].call_fd = msg->fds[0];
+		vdev->vq[idx].call_fd = vmsg->fds[0];
 
 	/* in case of I/O hang after reconnecting */
 	if (vdev->vq[idx].call_fd != -1)
-		eventfd_write(msg->fds[0], 1);
+		eventfd_write(vmsg->fds[0], 1);
 
 	debug("Got call_fd: %d for vq: %d", vdev->vq[idx].call_fd, idx);
 
@@ -872,14 +872,14 @@ static bool vu_set_vring_call_exec(struct vu_dev *vdev,
  * Return: False as no reply is requested
  */
 static bool vu_set_vring_err_exec(struct vu_dev *vdev,
-				  struct vhost_user_msg *msg)
+				  struct vhost_user_msg *vmsg)
 {
-	bool nofd = msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
-	int idx = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
+	int idx = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
 
-	debug("u64: 0x%016"PRIx64, msg->payload.u64);
+	debug("u64: 0x%016"PRIx64, vmsg->payload.u64);
 
-	vu_check_queue_msg_file(msg);
+	vu_check_queue_msg_file(vmsg);
 
 	if (vdev->vq[idx].err_fd != -1) {
 		close(vdev->vq[idx].err_fd);
@@ -887,7 +887,7 @@ static bool vu_set_vring_err_exec(struct vu_dev *vdev,
 	}
 
 	if (!nofd)
-		vdev->vq[idx].err_fd = msg->fds[0];
+		vdev->vq[idx].err_fd = vmsg->fds[0];
 
 	return false;
 }
@@ -901,7 +901,7 @@ static bool vu_set_vring_err_exec(struct vu_dev *vdev,
  * Return: True as a reply is requested
  */
 static bool vu_get_protocol_features_exec(struct vu_dev *vdev,
-					  struct vhost_user_msg *msg)
+					  struct vhost_user_msg *vmsg)
 {
 	uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |
 			    1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
@@ -909,7 +909,7 @@ static bool vu_get_protocol_features_exec(struct vu_dev *vdev,
 			    1ULL << VHOST_USER_PROTOCOL_F_RARP;
 
 	(void)vdev;
-	vmsg_set_reply_u64(msg, features);
+	vmsg_set_reply_u64(vmsg, features);
 
 	return true;
 }
@@ -922,13 +922,13 @@ static bool vu_get_protocol_features_exec(struct vu_dev *vdev,
  * Return: False as no reply is requested
  */
 static bool vu_set_protocol_features_exec(struct vu_dev *vdev,
-					  struct vhost_user_msg *msg)
+					  struct vhost_user_msg *vmsg)
 {
-	uint64_t features = msg->payload.u64;
+	uint64_t features = vmsg->payload.u64;
 
 	debug("u64: 0x%016"PRIx64, features);
 
-	vdev->protocol_features = msg->payload.u64;
+	vdev->protocol_features = vmsg->payload.u64;
 
 	return false;
 }
@@ -941,11 +941,11 @@ static bool vu_set_protocol_features_exec(struct vu_dev *vdev,
  * Return: True as a reply is requested
  */
 static bool vu_get_queue_num_exec(struct vu_dev *vdev,
-				  struct vhost_user_msg *msg)
+				  struct vhost_user_msg *vmsg)
 {
 	(void)vdev;
 
-	vmsg_set_reply_u64(msg, VHOST_USER_MAX_QUEUES);
+	vmsg_set_reply_u64(vmsg, VHOST_USER_MAX_QUEUES);
 
 	return true;
 }
@@ -958,10 +958,10 @@ static bool vu_get_queue_num_exec(struct vu_dev *vdev,
  * Return: False as no reply is requested
  */
 static bool vu_set_vring_enable_exec(struct vu_dev *vdev,
-				     struct vhost_user_msg *msg)
+				     struct vhost_user_msg *vmsg)
 {
-	unsigned int enable = msg->payload.state.num;
-	unsigned int idx = msg->payload.state.index;
+	unsigned int enable = vmsg->payload.state.num;
+	unsigned int idx = vmsg->payload.state.index;
 
 	debug("State.index:  %u", idx);
 	debug("State.enable: %u", enable);
@@ -974,17 +974,17 @@ static bool vu_set_vring_enable_exec(struct vu_dev *vdev,
 }
 
 /**
- * vu_set_send_rarp_exec() - vhost-user specification says: "Broadcast a fake
- * 			     RARP to notify the migration is terminated",
- * 			     but passt doesn't need to update any ARP table,
- * 			     so do nothing to silence QEMU bogus error message
+ * vu_send_rarp_exec() - vhost-user specification says: "Broadcast a fake
+ * 			 RARP to notify the migration is terminated",
+ * 			 but passt doesn't need to update any ARP table,
+ * 			 so do nothing to silence QEMU bogus error message
  * @vdev:	vhost-user device
  * @vmsg:	vhost-user message
  *
  * Return: False as no reply is requested
  */
 static bool vu_send_rarp_exec(struct vu_dev *vdev,
-			      struct vhost_user_msg *msg)
+			      struct vhost_user_msg *vmsg)
 {
 	char macstr[ETH_ADDRSTRLEN];
 
@@ -993,7 +993,7 @@ static bool vu_send_rarp_exec(struct vu_dev *vdev,
 	/* ignore the command */
 
 	debug("Ignore command VHOST_USER_SEND_RARP for %s",
-	      eth_ntop((unsigned char *)&msg->payload.u64, macstr,
+	      eth_ntop((unsigned char *)&vmsg->payload.u64, macstr,
 		       sizeof(macstr)));
 
 	return false;
@@ -1008,12 +1008,12 @@ static bool vu_send_rarp_exec(struct vu_dev *vdev,
  *         and set bit 8 as we don't provide our own fd.
  */
 static bool vu_set_device_state_fd_exec(struct vu_dev *vdev,
-					struct vhost_user_msg *msg)
+					struct vhost_user_msg *vmsg)
 {
-	unsigned int direction = msg->payload.transfer_state.direction;
-	unsigned int phase = msg->payload.transfer_state.phase;
+	unsigned int direction = vmsg->payload.transfer_state.direction;
+	unsigned int phase = vmsg->payload.transfer_state.phase;
 
-	if (msg->fd_num != 1)
+	if (vmsg->fd_num != 1)
 		die("Invalid device_state_fd message");
 
 	if (phase != VHOST_USER_TRANSFER_STATE_PHASE_STOPPED)
@@ -1021,13 +1021,13 @@ static bool vu_set_device_state_fd_exec(struct vu_dev *vdev,
 
 	if (direction != VHOST_USER_TRANSFER_STATE_DIRECTION_SAVE &&
 	    direction != VHOST_USER_TRANSFER_STATE_DIRECTION_LOAD)
-		die("Invalide device_state_fd direction: %d", direction);
+		die("Invalid device_state_fd direction: %d", direction);
 
-	migrate_request(vdev->context, msg->fds[0],
+	migrate_request(vdev->context, vmsg->fds[0],
 			direction == VHOST_USER_TRANSFER_STATE_DIRECTION_LOAD);
 
 	/* We don't provide a new fd for the data transfer */
-	vmsg_set_reply_u64(msg, VHOST_USER_VRING_NOFD_MASK);
+	vmsg_set_reply_u64(vmsg, VHOST_USER_VRING_NOFD_MASK);
 
 	return true;
 }
@@ -1041,9 +1041,9 @@ static bool vu_set_device_state_fd_exec(struct vu_dev *vdev,
  */
 /* cppcheck-suppress constParameterCallback */
 static bool vu_check_device_state_exec(struct vu_dev *vdev,
-				       struct vhost_user_msg *msg)
+				       struct vhost_user_msg *vmsg)
 {
-	vmsg_set_reply_u64(msg, vdev->context->device_state_result);
+	vmsg_set_reply_u64(vmsg, vdev->context->device_state_result);
 
 	return true;
 }
@@ -1051,7 +1051,6 @@ static bool vu_check_device_state_exec(struct vu_dev *vdev,
 /**
  * vu_init() - Initialize vhost-user device structure
  * @c:		execution context
- * @vdev:	vhost-user device
  */
 void vu_init(struct ctx *c)
 {
@@ -1134,7 +1133,7 @@ static void vu_sock_reset(struct vu_dev *vdev)
 }
 
 static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev,
-					struct vhost_user_msg *msg) = {
+					struct vhost_user_msg *vmsg) = {
 	[VHOST_USER_GET_FEATURES]	   = vu_get_features_exec,
 	[VHOST_USER_SET_FEATURES]	   = vu_set_features_exec,
 	[VHOST_USER_GET_PROTOCOL_FEATURES] = vu_get_protocol_features_exec,
@@ -1165,7 +1164,7 @@ static bool (*vu_handle[VHOST_USER_MAX])(struct vu_dev *vdev,
  */
 void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events)
 {
-	struct vhost_user_msg msg = { 0 };
+	struct vhost_user_msg vmsg = { 0 };
 	bool need_reply, reply_requested;
 	int ret;
 
@@ -1174,38 +1173,38 @@ void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events)
 		return;
 	}
 
-	ret = vu_message_read_default(fd, &msg);
+	ret = vu_message_read_default(fd, &vmsg);
 	if (ret == 0) {
 		vu_sock_reset(vdev);
 		return;
 	}
 	debug("================ Vhost user message ================");
-	debug("Request: %s (%d)", vu_request_to_string(msg.hdr.request),
-		msg.hdr.request);
-	debug("Flags:   0x%x", msg.hdr.flags);
-	debug("Size:    %u", msg.hdr.size);
+	debug("Request: %s (%d)", vu_request_to_string(vmsg.hdr.request),
+		vmsg.hdr.request);
+	debug("Flags:   0x%x", vmsg.hdr.flags);
+	debug("Size:    %u", vmsg.hdr.size);
 
-	need_reply = msg.hdr.flags & VHOST_USER_NEED_REPLY_MASK;
+	need_reply = vmsg.hdr.flags & VHOST_USER_NEED_REPLY_MASK;
 
-	if (msg.hdr.request >= 0 && msg.hdr.request < VHOST_USER_MAX &&
-	    vu_handle[msg.hdr.request])
-		reply_requested = vu_handle[msg.hdr.request](vdev, &msg);
+	if (vmsg.hdr.request >= 0 && vmsg.hdr.request < VHOST_USER_MAX &&
+	    vu_handle[vmsg.hdr.request])
+		reply_requested = vu_handle[vmsg.hdr.request](vdev, &vmsg);
 	else
-		die("Unhandled request: %d", msg.hdr.request);
+		die("Unhandled request: %d", vmsg.hdr.request);
 
 	/* cppcheck-suppress legacyUninitvar */
 	if (!reply_requested && need_reply) {
-		msg.payload.u64 = 0;
-		msg.hdr.flags = 0;
-		msg.hdr.size = sizeof(msg.payload.u64);
-		msg.fd_num = 0;
+		vmsg.payload.u64 = 0;
+		vmsg.hdr.flags = 0;
+		vmsg.hdr.size = sizeof(vmsg.payload.u64);
+		vmsg.fd_num = 0;
 		reply_requested = true;
 	}
 
 	if (reply_requested)
-		vu_send_reply(fd, &msg);
+		vu_send_reply(fd, &vmsg);
 
-	if (msg.hdr.request == VHOST_USER_CHECK_DEVICE_STATE &&
+	if (vmsg.hdr.request == VHOST_USER_CHECK_DEVICE_STATE &&
 	    vdev->context->device_state_result == 0 &&
 	    !vdev->context->migrate_target) {
 		info("Migration complete, exiting");
diff --git a/vhost_user.h b/vhost_user.h
index e769cb1..f2ae2da 100644
--- a/vhost_user.h
+++ b/vhost_user.h
@@ -184,7 +184,7 @@ union vhost_user_payload {
 };
 
 /**
- * struct vhost_user_msg - vhost-use message
+ * struct vhost_user_msg - vhost-user message
  * @hdr:		Message header
  * @payload:		Message payload
  * @fds:		File descriptors associated with the message
@@ -241,7 +241,6 @@ static inline bool vu_queue_started(const struct vu_virtq *vq)
 void vu_print_capabilities(void);
 void vu_init(struct ctx *c);
 void vu_cleanup(struct vu_dev *vdev);
-void vu_log_kick(const struct vu_dev *vdev);
 void vu_log_write(const struct vu_dev *vdev, uint64_t address,
 		  uint64_t length);
 void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events);
diff --git a/virtio.c b/virtio.c
index 2b58e4d..83906aa 100644
--- a/virtio.c
+++ b/virtio.c
@@ -156,9 +156,9 @@ static inline uint16_t vring_avail_ring(const struct vu_virtq *vq, int i)
 }
 
 /**
- * virtq_used_event - Get location of used event indices
+ * virtq_used_event() - Get location of used event indices
  *		      (only with VIRTIO_F_EVENT_IDX)
- * @vq		Virtqueue
+ * @vq:		Virtqueue
  *
  * Return: return the location of the used event index
  */
@@ -170,7 +170,7 @@ static inline uint16_t *virtq_used_event(const struct vu_virtq *vq)
 
 /**
  * vring_get_used_event() - Get the used event from the available ring
- * @vq		Virtqueue
+ * @vq:		Virtqueue
  *
  * Return: the used event (available only if VIRTIO_RING_F_EVENT_IDX is set)
  *         used_event is a performant alternative where the driver
@@ -235,6 +235,7 @@ static int virtqueue_read_indirect_desc(const struct vu_dev *dev,
 		memcpy(desc, orig_desc, read_len);
 		len -= read_len;
 		addr += read_len;
+		/* NOLINTNEXTLINE(bugprone-sizeof-expression,cert-arr39-c) */
 		desc += read_len / sizeof(struct vring_desc);
 	}
 
@@ -243,9 +244,9 @@ static int virtqueue_read_indirect_desc(const struct vu_dev *dev,
 
 /**
  * enum virtqueue_read_desc_state - State in the descriptor chain
- * @VIRTQUEUE_READ_DESC_ERROR	Found an invalid descriptor
- * @VIRTQUEUE_READ_DESC_DONE	No more descriptors in the chain
- * @VIRTQUEUE_READ_DESC_MORE	there are more descriptors in the chain
+ * @VIRTQUEUE_READ_DESC_ERROR:	Found an invalid descriptor
+ * @VIRTQUEUE_READ_DESC_DONE:	No more descriptors in the chain
+ * @VIRTQUEUE_READ_DESC_MORE:	there are more descriptors in the chain
  */
 enum virtqueue_read_desc_state {
 	VIRTQUEUE_READ_DESC_ERROR = -1,
@@ -286,7 +287,7 @@ static int virtqueue_read_next_desc(const struct vring_desc *desc,
  *
  * Return: true if the virtqueue is empty, false otherwise
  */
-bool vu_queue_empty(struct vu_virtq *vq)
+static bool vu_queue_empty(struct vu_virtq *vq)
 {
 	if (!vq->vring.avail)
 		return true;
@@ -346,8 +347,9 @@ void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq)
 		die_perror("Error writing vhost-user queue eventfd");
 }
 
-/* virtq_avail_event() -  Get location of available event indices
- *			      (only with VIRTIO_F_EVENT_IDX)
+/**
+ * virtq_avail_event() -  Get location of available event indices
+ *			  (only with VIRTIO_F_EVENT_IDX)
  * @vq:		Virtqueue
  *
  * Return: return the location of the available event index
@@ -420,8 +422,8 @@ static bool virtqueue_map_desc(const struct vu_dev *dev,
 }
 
 /**
- * vu_queue_map_desc - Map the virtqueue descriptor ring into our virtual
- * 		       address space
+ * vu_queue_map_desc() - Map the virtqueue descriptor ring into our virtual
+ * 			 address space
  * @dev:	Vhost-user device
  * @vq:		Virtqueue
  * @idx:	First descriptor ring entry to map
@@ -504,7 +506,7 @@ static int vu_queue_map_desc(const struct vu_dev *dev,
  * vu_queue_pop() - Pop an entry from the virtqueue
  * @dev:	Vhost-user device
  * @vq:		Virtqueue
- * @elem:	Virtqueue element to file with the entry information
+ * @elem:	Virtqueue element to fill with the entry information
  *
  * Return: -1 if there is an error, 0 otherwise
  */
@@ -544,7 +546,7 @@ int vu_queue_pop(const struct vu_dev *dev, struct vu_virtq *vq,
 }
 
 /**
- * vu_queue_detach_element() - Detach an element from the virqueue
+ * vu_queue_detach_element() - Detach an element from the virtqueue
  * @vq:		Virtqueue
  */
 void vu_queue_detach_element(struct vu_virtq *vq)
@@ -554,7 +556,7 @@ void vu_queue_detach_element(struct vu_virtq *vq)
 }
 
 /**
- * vu_queue_unpop() - Push back the previously popped element from the virqueue
+ * vu_queue_unpop() - Push back the previously popped element from the virtqueue
  * @vq:		Virtqueue
  */
 /* cppcheck-suppress unusedFunction */
@@ -568,6 +570,8 @@ void vu_queue_unpop(struct vu_virtq *vq)
  * vu_queue_rewind() - Push back a given number of popped elements
  * @vq:		Virtqueue
  * @num:	Number of element to unpop
+ *
+ * Return: True on success, false if not
  */
 bool vu_queue_rewind(struct vu_virtq *vq, unsigned int num)
 {
@@ -671,9 +675,10 @@ static void vu_log_queue_fill(const struct vu_dev *vdev, struct vu_virtq *vq,
  * @len:	Size of the element
  * @idx:	Used ring entry index
  */
-void vu_queue_fill_by_index(const struct vu_dev *vdev, struct vu_virtq *vq,
-			    unsigned int index, unsigned int len,
-			    unsigned int idx)
+static void vu_queue_fill_by_index(const struct vu_dev *vdev,
+				   struct vu_virtq *vq,
+				   unsigned int index, unsigned int len,
+				   unsigned int idx)
 {
 	struct vring_used_elem uelem;
 
diff --git a/virtio.h b/virtio.h
index 0a59441..d8beb88 100644
--- a/virtio.h
+++ b/virtio.h
@@ -150,7 +150,7 @@ static inline bool has_feature(uint64_t features, unsigned int fbit)
 /**
  * vu_has_feature() - Check if a virtio-net feature is available
  * @vdev:	Vhost-user device
- * @bit:	Feature to check
+ * @fbit:	Feature to check
  *
  * Return:	True if the feature is available
  */
@@ -163,7 +163,7 @@ static inline bool vu_has_feature(const struct vu_dev *vdev,
 /**
  * vu_has_protocol_feature() - Check if a vhost-user feature is available
  * @vdev:	Vhost-user device
- * @bit:	Feature to check
+ * @fbit:	Feature to check
  *
  * Return:	True if the feature is available
  */
@@ -174,16 +174,12 @@ static inline bool vu_has_protocol_feature(const struct vu_dev *vdev,
 	return has_feature(vdev->protocol_features, fbit);
 }
 
-bool vu_queue_empty(struct vu_virtq *vq);
 void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq);
 int vu_queue_pop(const struct vu_dev *dev, struct vu_virtq *vq,
 		 struct vu_virtq_element *elem);
 void vu_queue_detach_element(struct vu_virtq *vq);
 void vu_queue_unpop(struct vu_virtq *vq);
 bool vu_queue_rewind(struct vu_virtq *vq, unsigned int num);
-void vu_queue_fill_by_index(const struct vu_dev *vdev, struct vu_virtq *vq,
-			    unsigned int index, unsigned int len,
-			    unsigned int idx);
 void vu_queue_fill(const struct vu_dev *vdev, struct vu_virtq *vq,
 		   const struct vu_virtq_element *elem, unsigned int len,
 		   unsigned int idx);
diff --git a/vu_common.c b/vu_common.c
index 48826b1..5e6fd4a 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -26,24 +26,25 @@
  * vu_packet_check_range() - Check if a given memory zone is contained in
  * 			     a mapped guest memory region
  * @buf:	Array of the available memory regions
- * @offset:	Offset of data range in packet descriptor
+ * @ptr:	Start of desired data range
  * @size:	Length of desired data range
- * @start:	Start of the packet descriptor
  *
  * Return: 0 if the zone is in a mapped memory region, -1 otherwise
  */
-int vu_packet_check_range(void *buf, size_t offset, size_t len,
-			  const char *start)
+int vu_packet_check_range(void *buf, const char *ptr, size_t len)
 {
 	struct vu_dev_region *dev_region;
 
 	for (dev_region = buf; dev_region->mmap_addr; dev_region++) {
+		uintptr_t base_addr = dev_region->mmap_addr +
+			dev_region->mmap_offset;
 		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
-		char *m = (char *)(uintptr_t)dev_region->mmap_addr;
+		const char *base = (const char *)base_addr;
 
-		if (m <= start &&
-		    start + offset + len <= m + dev_region->mmap_offset +
-					       dev_region->size)
+		ASSERT(base_addr >= dev_region->mmap_addr);
+
+		if (len <= dev_region->size && base <= ptr &&
+		    (size_t)(ptr - base) <= dev_region->size - len)
 			return 0;
 	}
 
@@ -194,7 +195,7 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
 			tap_add_packet(vdev->context,
 				       elem[count].out_sg[0].iov_len - hdrlen,
 				       (char *)elem[count].out_sg[0].iov_base +
-				        hdrlen);
+				       hdrlen, now);
 		} else {
 			/* vnet header can be in a separate iovec */
 			if (elem[count].out_num != 2) {
@@ -206,7 +207,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
 			} else {
 				tap_add_packet(vdev->context,
 					       elem[count].out_sg[1].iov_len,
-					       (char *)elem[count].out_sg[1].iov_base);
+					       (char *)elem[count].out_sg[1].iov_base,
+					       now);
 			}
 		}