cppcheck: Don't check the system headers

We pass -I options to cppcheck so that it will find the system headers. Then we need to pass a bunch more options to suppress the zillions of cppcheck errors found in those headers. It turns out, however, that it's not recommended to give the system headers to cppcheck anyway. Instead it has built-in knowledge of the ANSI libc and uses that as the basis of its checks. We do need to suppress missingIncludeSystem warnings instead though. Not bothering with the system headers makes the cppcheck runtime go from ~37s to ~14s on my machine, which is a pretty nice win. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
linux_dep: Fix CLOSE_RANGE_UNSHARE availability handling
2024-11-08 08:26:21 +01:00 · 2024-11-08 08:26:17 +01:00 · 2024-11-08 08:26:15 +01:00 · 2024-11-08 08:25:58 +01:00 · 2024-11-08 08:24:58 +01:00 · 2024-11-08 08:24:52 +01:00
64 changed files with 1756 additions and 1180 deletions
--- a/.clang-format
+++ b/.clang-format
@ -0,0 +1,126 @@
 # SPDX-License-Identifier: GPL-2.0
 #
 # clang-format configuration file. Intended for clang-format >= 11.
 #
 # For more information, see:
 #
 #   Documentation/dev-tools/clang-format.rst
 #   https://clang.llvm.org/docs/ClangFormat.html
 #   https://clang.llvm.org/docs/ClangFormatStyleOptions.html
 #
 ---
 AccessModifierOffset: -4
 AlignAfterOpenBracket: Align
 AlignConsecutiveAssignments: false
 AlignConsecutiveDeclarations: false
 AlignEscapedNewlines: Left
 AlignOperands: true
 AlignTrailingComments: false
 AllowAllParametersOfDeclarationOnNextLine: false
 AllowShortBlocksOnASingleLine: false
 AllowShortCaseLabelsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: None
 AllowShortIfStatementsOnASingleLine: false
 AllowShortLoopsOnASingleLine: false
 AlwaysBreakAfterDefinitionReturnType: None
 AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: false
 AlwaysBreakTemplateDeclarations: false
 BinPackArguments: true
 BinPackParameters: true
 BraceWrapping:
  AfterClass: false
  AfterControlStatement: false
  AfterEnum: false
  AfterFunction: true
  AfterNamespace: true
  AfterObjCDeclaration: false
  AfterStruct: false
  AfterUnion: false
  AfterExternBlock: false
  BeforeCatch: false
  BeforeElse: false
  IndentBraces: false
  SplitEmptyFunction: true
  SplitEmptyRecord: true
  SplitEmptyNamespace: true
 BreakBeforeBinaryOperators: None
 BreakBeforeBraces: Custom
 BreakBeforeInheritanceComma: false
 BreakBeforeTernaryOperators: false
 BreakConstructorInitializersBeforeComma: false
 BreakConstructorInitializers: BeforeComma
 BreakAfterJavaFieldAnnotations: false
 BreakStringLiterals: false
 ColumnLimit: 80
 CommentPragmas: '^ IWYU pragma:'
 CompactNamespaces: false
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
 ConstructorInitializerIndentWidth: 8
 ContinuationIndentWidth: 8
 Cpp11BracedListStyle: false
 DerivePointerAlignment: false
 DisableFormat: false
 ExperimentalAutoDetectBinPacking: false
 FixNamespaceComments: false
 # Taken from:
 #   git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ tools/ \
 #   | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$,  - '\1'," \
 #   | LC_ALL=C sort -u
 ForEachMacros:
  - 'for_each_nst'
 IncludeBlocks: Preserve
 IncludeCategories:
  - Regex: '.*'
    Priority: 1
 IncludeIsMainRegex: '(Test)?$'
 IndentCaseLabels: false
 IndentGotoLabels: false
 IndentPPDirectives: None
 IndentWidth: 8
 IndentWrappedFunctionNames: false
 JavaScriptQuotes: Leave
 JavaScriptWrapImports: true
 KeepEmptyLinesAtTheStartOfBlocks: false
 MacroBlockBegin: ''
 MacroBlockEnd: ''
 MaxEmptyLinesToKeep: 1
 NamespaceIndentation: None
 ObjCBinPackProtocolList: Auto
 ObjCBlockIndentWidth: 8
 ObjCSpaceAfterProperty: true
 ObjCSpaceBeforeProtocolList: true
 # Taken from git's rules
 PenaltyBreakAssignment: 10
 PenaltyBreakBeforeFirstCallParameter: 30
 PenaltyBreakComment: 10
 PenaltyBreakFirstLessLess: 0
 PenaltyBreakString: 10
 PenaltyExcessCharacter: 100
 PenaltyReturnTypeOnItsOwnLine: 60
 PointerAlignment: Right
 ReflowComments: false
 SortIncludes: false
 SortUsingDeclarations: false
 SpaceAfterCStyleCast: false
 SpaceAfterTemplateKeyword: true
 SpaceBeforeAssignmentOperators: true
 SpaceBeforeCtorInitializerColon: true
 SpaceBeforeInheritanceColon: true
 SpaceBeforeParens: ControlStatementsExceptForEachMacros
 SpaceBeforeRangeBasedForLoopColon: true
 SpaceInEmptyParentheses: false
 SpacesBeforeTrailingComments: 1
 SpacesInAngles: false
 SpacesInContainerLiterals: false
 SpacesInCStyleCastParentheses: false
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
 Standard: Cpp03
 TabWidth: 8
 UseTab: Always
 ...
--- a/.clang-tidy
+++ b/.clang-tidy
@ -0,0 +1,93 @@
 ---
 Checks:
    - "clang-diagnostic-*,clang-analyzer-*,*,-modernize-*"
    #	TODO: enable once https://bugs.llvm.org/show_bug.cgi?id=41311 is fixed
    - "-clang-analyzer-valist.Uninitialized"
    #	Dubious value, would kill readability
    - "-cppcoreguidelines-init-variables"
    #	Dubious value over the compiler's built-in warning.  Would
    #	increase verbosity.
    - "-bugprone-assignment-in-if-condition"
    #	Debatable whether these improve readability, right now it would look
    #	like a mess
    - "-google-readability-braces-around-statements"
    - "-hicpp-braces-around-statements"
    - "-readability-braces-around-statements"
    #	TODO: in most cases they are justified, but probably not everywhere
    #
    - "-readability-magic-numbers"
    - "-cppcoreguidelines-avoid-magic-numbers"
    #	TODO: this is Linux-only for the moment, nice to fix eventually
    - "-llvmlibc-restrict-system-libc-headers"
    #	Those are needed for syscalls, epoll_wait flags, etc.
    - "-hicpp-signed-bitwise"
    #	Probably not doable to impement this without plain memcpy(), memset()
    - "-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling"
    #	TODO: not really important, but nice to fix eventually
    - "-llvm-include-order"
    #	Dubious value, would kill readability
    - "-readability-isolate-declaration"
    #	TODO: nice to fix eventually
    - "-bugprone-narrowing-conversions"
    - "-cppcoreguidelines-narrowing-conversions"
    #	TODO: check, fix, and more in general constify wherever possible
    - "-cppcoreguidelines-avoid-non-const-global-variables"
    #	TODO: check paths where it might make sense to improve performance
    - "-altera-unroll-loops"
    - "-altera-id-dependent-backward-branch"
    #	Not much can be done about them other than being careful
    - "-bugprone-easily-swappable-parameters"
    #	TODO: split reported functions
    - "-readability-function-cognitive-complexity"
    #	"Poor" alignment needed for structs reflecting message formats/headers
    - "-altera-struct-pack-align"
    #	TODO: check again if multithreading is implemented
    - "-concurrency-mt-unsafe"
    #	Complains about any identifier <3 characters, reasonable for
    #	globals, pointlessly verbose for locals and parameters.
    - "-readability-identifier-length"
    #	Wants to include headers which *directly* provide the things
    #	we use.  That sounds nice, but means it will often want a OS
    #	specific header instead of a mostly standard one, such as
    #	<linux/limits.h> instead of <limits.h>.
    - "-misc-include-cleaner"
    #	Want to replace all #defines of integers with enums.  Kind of
    #	makes sense when those defines form an enum-like set, but
    #	weird for cases like standalone constants, and causes other
    #	awkwardness for a bunch of cases we use
    - "-cppcoreguidelines-macro-to-enum"
    #	It's been a couple of centuries since multiplication has been granted
    #	precedence over addition in modern mathematical notation. Adding
    #	parentheses to reinforce that certainly won't improve readability.
    - "-readability-math-missing-parentheses"
 WarningsAsErrors: "*"
 HeaderFileExtensions:
    - h
 ImplementationFileExtensions:
    - c
 HeaderFilterRegex: ""
 FormatStyle: none
 CheckOptions:
    bugprone-suspicious-string-compare.WarnOnImplicitComparison: "false"
 SystemHeaders: false
--- a/.clangd
+++ b/.clangd
@ -0,0 +1,3 @@
 CompileFlags:
    # Don't try to interpret our headers as C++'
    Add: [-xc, -Wall]
--- a/161
+++ b/161
@ -15,24 +15,11 @@ VERSION ?= $(shell git describe --tags HEAD 2>/dev/null || echo "unknown\ versio
 # the IPv6 socket API? (Linux does)
 DUAL_STACK_SOCKETS := 1
 RLIMIT_STACK_VAL := $(shell /bin/sh -c 'ulimit -s')
 ifeq ($(RLIMIT_STACK_VAL),unlimited)
 RLIMIT_STACK_VAL := 1024
 endif
 TARGET ?= $(shell $(CC) -dumpmachine)
 # Get 'uname -m'-like architecture description for target
 TARGET_ARCH := $(shell echo $(TARGET) | cut -f1 -d- | tr [A-Z] [a-z])
 TARGET_ARCH := $(shell echo $(TARGET_ARCH) | sed 's/powerpc/ppc/')
 AUDIT_ARCH := $(shell echo $(TARGET_ARCH) | tr [a-z] [A-Z] | sed 's/^ARM.*/ARM/')
 AUDIT_ARCH := $(shell echo $(AUDIT_ARCH) | sed 's/I[456]86/I386/')
 AUDIT_ARCH := $(shell echo $(AUDIT_ARCH) | sed 's/PPC64/PPC/')
 AUDIT_ARCH := $(shell echo $(AUDIT_ARCH) | sed 's/PPCLE/PPC64LE/')
 AUDIT_ARCH := $(shell echo $(AUDIT_ARCH) | sed 's/MIPS64EL/MIPSEL64/')
 AUDIT_ARCH := $(shell echo $(AUDIT_ARCH) | sed 's/HPPA/PARISC/')
 AUDIT_ARCH := $(shell echo $(AUDIT_ARCH) | sed 's/SH4/SH/')
 # On some systems enabling optimization also enables source fortification,
 # automagically. Do not override it.
 FORTIFY_FLAG :=
@ -44,10 +31,6 @@ FLAGS := -Wall -Wextra -Wno-format-zero-length
 FLAGS += -pedantic -std=c11 -D_XOPEN_SOURCE=700 -D_GNU_SOURCE
 FLAGS +=  $(FORTIFY_FLAG) -O2 -pie -fPIE
 FLAGS += -DPAGE_SIZE=$(shell getconf PAGE_SIZE)
 FLAGS += -DNETNS_RUN_DIR=\"/run/netns\"
 FLAGS += -DPASST_AUDIT_ARCH=AUDIT_ARCH_$(AUDIT_ARCH)
 FLAGS += -DRLIMIT_STACK_VAL=$(RLIMIT_STACK_VAL)
 FLAGS += -DARCH=\"$(TARGET_ARCH)\"
 FLAGS += -DVERSION=\"$(VERSION)\"
 FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
@ -67,21 +50,6 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
 	udp.h udp_flow.h util.h
 HEADERS = $(PASST_HEADERS) seccomp.h
 C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
 ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
 	FLAGS += -DHAS_SND_WND
 endif
 C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_bytes_acked = 0 };
 ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
 	FLAGS += -DHAS_BYTES_ACKED
 endif
 C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_min_rtt = 0 };
 ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
 	FLAGS += -DHAS_MIN_RTT
 endif
 C := \#include <sys/random.h>\nint main(){int a=getrandom(0, 0, 0);}
 ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
 	FLAGS += -DHAS_GETRANDOM
@ -91,11 +59,6 @@ ifeq ($(shell :|$(CC) -fstack-protector-strong -S -xc - -o - >/dev/null 2>&1; ec
 	FLAGS += -fstack-protector-strong
 endif
 C := \#define _GNU_SOURCE\n\#include <fcntl.h>\nint x = FALLOC_FL_COLLAPSE_RANGE;
 ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
 	EXTRA_SYSCALLS += fallocate
 endif
 prefix		?= /usr/local
 exec_prefix	?= $(prefix)
 bindir		?= $(exec_prefix)/bin
@ -132,7 +95,7 @@ pasta.avx2 pasta.1 pasta: pasta%: passt%
 	ln -sf $< $@
 qrap: $(QRAP_SRCS) passt.h
-	$(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) $(QRAP_SRCS) -o qrap $(LDFLAGS)
+	$(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) -DARCH=\"$(TARGET_ARCH)\" $(QRAP_SRCS) -o qrap $(LDFLAGS)
 valgrind: EXTRA_SYSCALLS += rt_sigprocmask rt_sigtimedwait rt_sigaction	\
 			    rt_sigreturn getpid gettid kill clock_gettime mmap \
@ -196,116 +159,11 @@ docs: README.md
 		done < README.md;					\
 	) > README.plain.md
-# Checkers currently disabled for clang-tidy:
+clang-tidy: $(PASST_SRCS) $(HEADERS)
-# - llvmlibc-restrict-system-libc-headers
+	clang-tidy $(PASST_SRCS) -- $(filter-out -pie,$(FLAGS) $(CFLAGS) $(CPPFLAGS)) \
-#	TODO: this is Linux-only for the moment, nice to fix eventually
+	           -DCLANG_TIDY_58992
 #
 # - google-readability-braces-around-statements
 # - hicpp-braces-around-statements
 # - readability-braces-around-statements
 #	Debatable whether that improves readability, right now it would look
 #	like a mess
 #
 # - readability-magic-numbers
 # - cppcoreguidelines-avoid-magic-numbers
 #	TODO: in most cases they are justified, but probably not everywhere
 #
 # - clang-analyzer-valist.Uninitialized
 #	TODO: enable once https://bugs.llvm.org/show_bug.cgi?id=41311 is fixed
 #
 # - clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling
 #	Probably not doable to impement this without plain memcpy(), memset()
 #
 # - cppcoreguidelines-init-variables
 #	Dubious value, would kill readability
 #
 # - hicpp-signed-bitwise
 #	Those are needed for syscalls, epoll_wait flags, etc.
 #
 # - llvm-include-order
 #	TODO: not really important, but nice to fix eventually
 #
 # - readability-isolate-declaration
 #	Dubious value, would kill readability
 #
 # - bugprone-narrowing-conversions
 # - cppcoreguidelines-narrowing-conversions
 #	TODO: nice to fix eventually
 #
 # - cppcoreguidelines-avoid-non-const-global-variables
 #	TODO: check, fix, and more in general constify wherever possible
 #
 # - altera-unroll-loops
 # - altera-id-dependent-backward-branch
 #	TODO: check paths where it might make sense to improve performance
 #
 # - bugprone-easily-swappable-parameters
 #	Not much can be done about them other than being careful
 #
 # - readability-function-cognitive-complexity
 #	TODO: split reported functions
 #
 # - altera-struct-pack-align
 #	"Poor" alignment needed for structs reflecting message formats/headers
 #
 # - concurrency-mt-unsafe
 #	TODO: check again if multithreading is implemented
 #
 # - readability-identifier-length
 #	Complains about any identifier <3 characters, reasonable for
 #	globals, pointlessly verbose for locals and parameters.
 #
 # - bugprone-assignment-in-if-condition
 #	Dubious value over the compiler's built-in warning.  Would
 #	increase verbosity.
 #
 # - misc-include-cleaner
 #	Wants to include headers which *directly* provide the things
 #	we use.  That sounds nice, but means it will often want a OS
 #	specific header instead of a mostly standard one, such as
 #	<linux/limits.h> instead of <limits.h>.
 #
 # - cppcoreguidelines-macro-to-enum
 #	Want to replace all #defines of integers with enums.  Kind of
 #	makes sense when those defines form an enum-like set, but
 #	weird for cases like standalone constants, and causes other
 #	awkwardness for a bunch of cases we use
-clang-tidy: $(SRCS) $(HEADERS)
+cppcheck: $(PASST_SRCS) $(HEADERS)
 	clang-tidy -checks=*,-modernize-*,\
 	-clang-analyzer-valist.Uninitialized,\
 	-cppcoreguidelines-init-variables,\
 	-bugprone-assignment-in-if-condition,\
 	-google-readability-braces-around-statements,\
 	-hicpp-braces-around-statements,\
 	-readability-braces-around-statements,\
 	-readability-magic-numbers,\
 	-llvmlibc-restrict-system-libc-headers,\
 	-hicpp-signed-bitwise,\
 	-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,\
 	-llvm-include-order,\
 	-cppcoreguidelines-avoid-magic-numbers,\
 	-readability-isolate-declaration,\
 	-bugprone-narrowing-conversions,\
 	-cppcoreguidelines-narrowing-conversions,\
 	-cppcoreguidelines-avoid-non-const-global-variables,\
 	-altera-unroll-loops,-altera-id-dependent-backward-branch,\
 	-bugprone-easily-swappable-parameters,\
 	-readability-function-cognitive-complexity,\
 	-altera-struct-pack-align,\
 	-concurrency-mt-unsafe,\
 	-readability-identifier-length,\
 	-misc-include-cleaner,\
 	-cppcoreguidelines-macro-to-enum \
 	-config='{CheckOptions: [{key: bugprone-suspicious-string-compare.WarnOnImplicitComparison, value: "false"}]}' \
 	--warnings-as-errors=* $(SRCS) -- $(filter-out -pie,$(FLAGS) $(CFLAGS) $(CPPFLAGS)) -DCLANG_TIDY_58992
 SYSTEM_INCLUDES := /usr/include $(wildcard /usr/include/$(TARGET))
 ifeq ($(shell $(CC) -v 2>&1 | grep -c "gcc version"),1)
 VER := $(shell $(CC) -dumpversion)
 SYSTEM_INCLUDES += /usr/lib/gcc/$(TARGET)/$(VER)/include
 endif
 cppcheck: $(SRCS) $(HEADERS)
 	if cppcheck --check-level=exhaustive /dev/null > /dev/null 2>&1; then \
 		CPPCHECK_EXHAUSTIVE="--check-level=exhaustive";		\
 	else								\
@ -314,11 +172,8 @@ cppcheck: $(SRCS) $(HEADERS)
 	cppcheck --std=c11 --error-exitcode=1 --enable=all --force	\
 	--inconclusive --library=posix --quiet				\
 	$${CPPCHECK_EXHAUSTIVE}						\
 	$(SYSTEM_INCLUDES:%=-I%)					\
 	$(SYSTEM_INCLUDES:%=--config-exclude=%)				\
 	$(SYSTEM_INCLUDES:%=--suppress=*:%/*)				\
 	$(SYSTEM_INCLUDES:%=--suppress=unmatchedSuppression:%/*)	\
 	--inline-suppr							\
 	--suppress=missingIncludeSystem \
 	--suppress=unusedStructMember					\
-	$(filter -D%,$(FLAGS) $(CFLAGS) $(CPPFLAGS))			\
+	$(filter -D%,$(FLAGS) $(CFLAGS) $(CPPFLAGS)) -D CPPCHECK_6936  \
-	$(SRCS) $(HEADERS)
+	$(PASST_SRCS) $(HEADERS)
--- a/arch.c
+++ b/arch.c
@ -19,6 +19,7 @@
 #include <unistd.h>
 #include "log.h"
 #include "util.h"
 /**
 * arch_avx2_exec() - Switch to AVX2 build if supported
@ -40,8 +41,11 @@ void arch_avx2_exec(char **argv)
 	if (__builtin_cpu_supports("avx2")) {
 		char new_path[PATH_MAX + sizeof(".avx2")];
-		snprintf(new_path, PATH_MAX + sizeof(".avx2"), "%s.avx2", exe);
+		if (snprintf_check(new_path, PATH_MAX + sizeof(".avx2"),
-		execve(new_path, argv, environ);
+				   "%s.avx2", exe))
 			die_perror("Can't build AVX2 executable path");
 		execv(new_path, argv);
 		warn_perror("Can't run AVX2 build, using non-AVX2 version");
 	}
 }
--- a/arp.c
+++ b/arp.c
@ -59,14 +59,12 @@ int arp(const struct ctx *c, const struct pool *p)
 	    ah->ar_op  != htons(ARPOP_REQUEST))
 		return 1;
-	/* Discard announcements (but not 0.0.0.0 "probes"): we might have the
+	/* Discard announcements, but not 0.0.0.0 "probes" */
-	 * same IP address, hide that.
+	if (memcmp(am->sip, &in4addr_any, sizeof(am->sip)) &&
 	 */
 	if (memcmp(am->sip, (unsigned char[4]){ 0 }, sizeof(am->tip)) &&
 	    !memcmp(am->sip, am->tip, sizeof(am->sip)))
 		return 1;
-	/* Don't resolve our own address, either. */
+	/* Don't resolve the guest's assigned address, either. */
 	if (!memcmp(am->tip, &c->ip4.addr, sizeof(am->tip)))
 		return 1;
--- a/checksum.c
+++ b/checksum.c
@ -59,6 +59,7 @@
 #include "util.h"
 #include "ip.h"
 #include "checksum.h"
 #include "iov.h"
 /* Checksums are optional for UDP over IPv4, so we usually just set
 * them to 0.  Change this to 1 to calculate real UDP over IPv4
@ -165,22 +166,24 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol,
 * @udp4hr:	UDP header, initialised apart from checksum
 * @saddr:	IPv4 source address
 * @daddr:	IPv4 destination address
- * @payload:	UDP packet payload
+ * @iov:	Pointer to the array of IO vectors
- * @dlen:	Length of @payload (not including UDP header)
+ * @iov_cnt:	Length of the array
 * @offset:	UDP payload offset in the iovec array
 */
 void csum_udp4(struct udphdr *udp4hr,
 	       struct in_addr saddr, struct in_addr daddr,
-	       const void *payload, size_t dlen)
+	       const struct iovec *iov, int iov_cnt, size_t offset)
 {
 	/* UDP checksums are optional, so don't bother */
 	udp4hr->check = 0;
 	if (UDP4_REAL_CHECKSUMS) {
-		uint16_t l4len = dlen + sizeof(struct udphdr);
+		uint16_t l4len = iov_size(iov, iov_cnt) - offset +
 				 sizeof(struct udphdr);
 		uint32_t psum = proto_ipv4_header_psum(l4len, IPPROTO_UDP,
 						       saddr, daddr);
 		psum = csum_unfolded(udp4hr, sizeof(struct udphdr), psum);
-		udp4hr->check = csum(payload, dlen, psum);
+		udp4hr->check = csum_iov(iov, iov_cnt, offset, psum);
 	}
 }
@ -226,19 +229,24 @@ uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol,
 /**
 * csum_udp6() - Calculate and set checksum for a UDP over IPv6 packet
 * @udp6hr:	UDP header, initialised apart from checksum
- * @payload:	UDP packet payload
+ * @saddr:	Source address
- * @dlen:	Length of @payload (not including UDP header)
+ * @daddr:	Destination address
 * @iov:	Pointer to the array of IO vectors
 * @iov_cnt:	Length of the array
 * @offset:	UDP payload offset in the iovec array
 */
 void csum_udp6(struct udphdr *udp6hr,
 	       const struct in6_addr *saddr, const struct in6_addr *daddr,
-	       const void *payload, size_t dlen)
+	       const struct iovec *iov, int iov_cnt, size_t offset)
 {
-	uint32_t psum = proto_ipv6_header_psum(dlen + sizeof(struct udphdr),
+	uint16_t l4len = iov_size(iov, iov_cnt) - offset +
-					       IPPROTO_UDP, saddr, daddr);
+			 sizeof(struct udphdr);
 	uint32_t psum = proto_ipv6_header_psum(l4len, IPPROTO_UDP,
 					       saddr, daddr);
 	udp6hr->check = 0;
 	psum = csum_unfolded(udp6hr, sizeof(struct udphdr), psum);
-	udp6hr->check = csum(payload, dlen, psum);
+	udp6hr->check = csum_iov(iov, iov_cnt, offset, psum);
 }
 /**
@ -497,16 +505,26 @@ uint16_t csum(const void *buf, size_t len, uint32_t init)
 *
 * @iov		Pointer to the array of IO vectors
 * @n		Length of the array
 * @offset:	Offset of the data to checksum within the full data length
 * @init	Initial 32-bit checksum, 0 for no pre-computed checksum
 *
 * Return: 16-bit folded, complemented checksum
 */
-/* cppcheck-suppress unusedFunction */
+uint16_t csum_iov(const struct iovec *iov, size_t n, size_t offset,
-uint16_t csum_iov(const struct iovec *iov, size_t n, uint32_t init)
+		  uint32_t init)
 {
 	unsigned int i;
 	size_t first;
-	for (i = 0; i < n; i++)
+	i = iov_skip_bytes(iov, n, offset, &first);
 	if (i >= n)
 		return (uint16_t)~csum_fold(init);
 	init = csum_unfolded((char *)iov[i].iov_base + first,
 			     iov[i].iov_len - first, init);
 	i++;
 	for (; i < n; i++)
 		init = csum_unfolded(iov[i].iov_base, iov[i].iov_len, init);
 	return (uint16_t)~csum_fold(init);
--- a/checksum.h
+++ b/checksum.h
@ -19,19 +19,20 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol,
 				struct in_addr saddr, struct in_addr daddr);
 void csum_udp4(struct udphdr *udp4hr,
 	       struct in_addr saddr, struct in_addr daddr,
-	       const void *payload, size_t dlen);
+	       const struct iovec *iov, int iov_cnt, size_t offset);
 void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t dlen);
 uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol,
 				const struct in6_addr *saddr,
 				const struct in6_addr *daddr);
 void csum_udp6(struct udphdr *udp6hr,
 	       const struct in6_addr *saddr, const struct in6_addr *daddr,
-	       const void *payload, size_t dlen);
+	       const struct iovec *iov, int iov_cnt, size_t offset);
 void csum_icmp6(struct icmp6hdr *icmp6hr,
 		const struct in6_addr *saddr, const struct in6_addr *daddr,
 		const void *payload, size_t dlen);
 uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init);
 uint16_t csum(const void *buf, size_t len, uint32_t init);
-uint16_t csum_iov(const struct iovec *iov, size_t n, uint32_t init);
+uint16_t csum_iov(const struct iovec *iov, size_t n, size_t offset,
 		  uint32_t init);
 #endif /* CHECKSUM_H */
--- a/conf.c
+++ b/conf.c
@ -46,6 +46,8 @@
 #include "isolation.h"
 #include "log.h"
 #define NETNS_RUN_DIR	"/run/netns"
 /**
 * next_chunk - Return the next piece of a string delimited by a character
 * @s:		String to search
@ -116,11 +118,10 @@ static int parse_port_range(const char *s, char **endptr,
 static void conf_ports(const struct ctx *c, char optname, const char *optarg,
 		       struct fwd_ports *fwd)
 {
-	char addr_buf[sizeof(struct in6_addr)] = { 0 }, *addr = addr_buf;
+	union inany_addr addr_buf = inany_any6, *addr = &addr_buf;
 	char buf[BUFSIZ], *spec, *ifname = NULL, *p;
 	bool exclude_only = true, bound_one = false;
 	uint8_t exclude[PORT_BITMAP_SIZE] = { 0 };
 	sa_family_t af = AF_UNSPEC;
 	unsigned i;
 	int ret;
@ -166,15 +167,13 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
 			bitmap_set(fwd->map, i);
 			if (optname == 't') {
-				ret = tcp_sock_init(c, AF_UNSPEC, NULL, NULL,
+				ret = tcp_sock_init(c, NULL, NULL, i);
 						    i);
 				if (ret == -ENFILE || ret == -EMFILE)
 					goto enfile;
 				if (!ret)
 					bound_one = true;
 			} else if (optname == 'u') {
-				ret = udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL,
+				ret = udp_sock_init(c, 0, NULL, NULL, i);
 						    i);
 				if (ret == -ENFILE || ret == -EMFILE)
 					goto enfile;
 				if (!ret)
@ -226,11 +225,7 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
 				p++;
 			}
-			if (inet_pton(AF_INET, p, addr))
+			if (!inany_pton(p, addr))
 				af = AF_INET;
 			else if (inet_pton(AF_INET6, p, addr))
 				af = AF_INET6;
 			else
 				goto bad;
 		}
 	} else {
@ -276,13 +271,13 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
 			bitmap_set(fwd->map, i);
 			if (optname == 't') {
-				ret = tcp_sock_init(c, af, addr, ifname, i);
+				ret = tcp_sock_init(c, addr, ifname, i);
 				if (ret == -ENFILE || ret == -EMFILE)
 					goto enfile;
 				if (!ret)
 					bound_one = true;
 			} else if (optname == 'u') {
-				ret = udp_sock_init(c, 0, af, addr, ifname, i);
+				ret = udp_sock_init(c, 0, addr, ifname, i);
 				if (ret == -ENFILE || ret == -EMFILE)
 					goto enfile;
 				if (!ret)
@ -338,9 +333,9 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
 			ret = 0;
 			if (optname == 't')
-				ret = tcp_sock_init(c, af, addr, ifname, i);
+				ret = tcp_sock_init(c, addr, ifname, i);
 			else if (optname == 'u')
-				ret = udp_sock_init(c, 0, af, addr, ifname, i);
+				ret = udp_sock_init(c, 0, addr, ifname, i);
 			if (ret)
 				goto bind_fail;
 		}
@ -581,10 +576,15 @@ static void conf_pasta_ns(int *netns_only, char *userns, char *netns,
 			if (pidval < 0 || pidval > INT_MAX)
 				die("Invalid PID %s", argv[optind]);
-			snprintf(netns, PATH_MAX, "/proc/%ld/ns/net", pidval);
+			if (snprintf_check(netns, PATH_MAX,
-			if (!*userns)
+					   "/proc/%ld/ns/net", pidval))
-				snprintf(userns, PATH_MAX, "/proc/%ld/ns/user",
+				die_perror("Can't build netns path");
-					 pidval);
+
 			if (!*userns) {
 				if (snprintf_check(userns, PATH_MAX,
 						   "/proc/%ld/ns/user", pidval))
 					die_perror("Can't build userns path");
 			}
 		}
 	}
@ -735,19 +735,19 @@ static unsigned int conf_ip6(unsigned int ifi, struct ip6_ctx *ip6)
 static void usage(const char *name, FILE *f, int status)
 {
 	if (strstr(name, "pasta")) {
-		fprintf(f, "Usage: %s [OPTION]... [COMMAND] [ARGS]...\n", name);
+		FPRINTF(f, "Usage: %s [OPTION]... [COMMAND] [ARGS]...\n", name);
-		fprintf(f, "       %s [OPTION]... PID\n", name);
+		FPRINTF(f, "       %s [OPTION]... PID\n", name);
-		fprintf(f, "       %s [OPTION]... --netns [PATH|NAME]\n", name);
+		FPRINTF(f, "       %s [OPTION]... --netns [PATH|NAME]\n", name);
-		fprintf(f,
+		FPRINTF(f,
 			"\n"
 			"Without PID or --netns, run the given command or a\n"
 			"default shell in a new network and user namespace, and\n"
 			"connect it via pasta.\n");
 	} else {
-		fprintf(f, "Usage: %s [OPTION]...\n", name);
+		FPRINTF(f, "Usage: %s [OPTION]...\n", name);
 	}
-	fprintf(f,
+	FPRINTF(f,
 		"\n"
 		"  -d, --debug		Be verbose\n"
 		"      --trace		Be extra verbose, implies --debug\n"
@ -764,17 +764,17 @@ static void usage(const char *name, FILE *f, int status)
 		"  --version		Show version and exit\n");
 	if (strstr(name, "pasta")) {
-		fprintf(f,
+		FPRINTF(f,
 			"  -I, --ns-ifname NAME	namespace interface name\n"
 			"    default: same interface name as external one\n");
 	} else {
-		fprintf(f,
+		FPRINTF(f,
 			"  -s, --socket PATH	UNIX domain socket path\n"
 			"    default: probe free path starting from "
 			UNIX_SOCK_PATH "\n", 1);
 	}
-	fprintf(f,
+	FPRINTF(f,
 		"  -F, --fd FD		Use FD as pre-opened connected socket\n"
 		"  -p, --pcap FILE	Log tap-facing traffic to pcap file\n"
 		"  -P, --pid FILE	Write own PID to the given file\n"
@ -805,28 +805,28 @@ static void usage(const char *name, FILE *f, int status)
 		"    can be specified multiple times\n"
 		"    a single, empty option disables DNS information\n");
 	if (strstr(name, "pasta"))
-		fprintf(f, "    default: don't use any addresses\n");
+		FPRINTF(f, "    default: don't use any addresses\n");
 	else
-		fprintf(f, "    default: use addresses from /etc/resolv.conf\n");
+		FPRINTF(f, "    default: use addresses from /etc/resolv.conf\n");
-	fprintf(f,
+	FPRINTF(f,
 		"  -S, --search LIST	Space-separated list, search domains\n"
 		"    a single, empty option disables the DNS search list\n");
 	if (strstr(name, "pasta"))
-		fprintf(f, "    default: don't use any search list\n");
+		FPRINTF(f, "    default: don't use any search list\n");
 	else
-		fprintf(f, "    default: use search list from /etc/resolv.conf\n");
+		FPRINTF(f, "    default: use search list from /etc/resolv.conf\n");
 	if (strstr(name, "pasta"))
-		fprintf(f, "  --dhcp-dns	\tPass DNS list via DHCP/DHCPv6/NDP\n");
+		FPRINTF(f, "  --dhcp-dns	\tPass DNS list via DHCP/DHCPv6/NDP\n");
 	else
-		fprintf(f, "  --no-dhcp-dns	No DNS list in DHCP/DHCPv6/NDP\n");
+		FPRINTF(f, "  --no-dhcp-dns	No DNS list in DHCP/DHCPv6/NDP\n");
 	if (strstr(name, "pasta"))
-		fprintf(f, "  --dhcp-search	Pass list via DHCP/DHCPv6/NDP\n");
+		FPRINTF(f, "  --dhcp-search	Pass list via DHCP/DHCPv6/NDP\n");
 	else
-		fprintf(f, "  --no-dhcp-search	No list in DHCP/DHCPv6/NDP\n");
+		FPRINTF(f, "  --no-dhcp-search	No list in DHCP/DHCPv6/NDP\n");
-	fprintf(f,
+	FPRINTF(f,
 		"  --map-host-loopback ADDR	Translate ADDR to refer to host\n"
 	        "    can be specified zero to two times (for IPv4 and IPv6)\n"
 		"    default: gateway address\n"
@ -836,6 +836,9 @@ static void usage(const char *name, FILE *f, int status)
 		"  --dns-forward ADDR	Forward DNS queries sent to ADDR\n"
 		"    can be specified zero to two times (for IPv4 and IPv6)\n"
 		"    default: don't forward DNS queries\n"
 		"  --dns-host ADDR	Host nameserver to direct queries to\n"
 		"    can be specified zero to two times (for IPv4 and IPv6)\n"
 		"    default: first nameserver from host's /etc/resolv.conf\n"
 		"  --no-tcp		Disable TCP protocol handler\n"
 		"  --no-udp		Disable UDP protocol handler\n"
 		"  --no-icmp		Disable ICMP/ICMPv6 protocol handler\n"
@ -843,6 +846,7 @@ static void usage(const char *name, FILE *f, int status)
 		"  --no-ndp		Disable NDP responses\n"
 		"  --no-dhcpv6		Disable DHCPv6 server\n"
 		"  --no-ra		Disable router advertisements\n"
 		"  --freebind		Bind to any address for forwarding\n"
 		"  --no-map-gw		Don't map gateway address to host\n"
 		"  -4, --ipv4-only	Enable IPv4 operation only\n"
 		"  -6, --ipv6-only	Enable IPv6 operation only\n");
@ -850,7 +854,7 @@ static void usage(const char *name, FILE *f, int status)
 	if (strstr(name, "pasta"))
 		goto pasta_opts;
-	fprintf(f,
+	FPRINTF(f,
 		"  -1, --one-off	Quit after handling one single client\n"
 		"  -t, --tcp-ports SPEC	TCP port forwarding to guest\n"
 		"    can be specified multiple times\n"
@ -881,7 +885,7 @@ static void usage(const char *name, FILE *f, int status)
 pasta_opts:
-	fprintf(f,
+	FPRINTF(f,
 		"  -t, --tcp-ports SPEC	TCP port forwarding to namespace\n"
 		"    can be specified multiple times\n"
 		"    SPEC can be:\n"
@ -915,6 +919,9 @@ pasta_opts:
 		"  -U, --udp-ns SPEC	UDP port forwarding to init namespace\n"
 		"    SPEC is as described above\n"
 		"    default: auto\n"
 		"  --host-lo-to-ns-lo	DEPRECATED:\n"
 		"			Translate host-loopback forwards to\n"
 		"			namespace loopback\n"
 		"  --userns NSPATH 	Target user namespace to join\n"
 		"  --netns PATH|NAME	Target network namespace to join\n"
 		"  --netns-only		Don't join existing user namespace\n"
@ -1189,7 +1196,11 @@ static void conf_open_files(struct ctx *c)
 	if (c->mode != MODE_PASTA && c->fd_tap == -1)
 		c->fd_tap_listen = tap_sock_unix_open(c->sock_path);
-	c->pidfile_fd = pidfile_open(c->pidfile);
+	if (*c->pidfile) {
 		c->pidfile_fd = output_file_open(c->pidfile, O_WRONLY);
 		if (c->pidfile_fd < 0)
 			die_perror("Couldn't open PID file %s", c->pidfile);
 	}
 }
 /**
@ -1262,6 +1273,7 @@ void conf(struct ctx *c, int argc, char **argv)
 		{"no-dhcpv6",	no_argument,		&c->no_dhcpv6,	1 },
 		{"no-ndp",	no_argument,		&c->no_ndp,	1 },
 		{"no-ra",	no_argument,		&c->no_ra,	1 },
 		{"freebind",	no_argument,		&c->freebind,	1 },
 		{"no-map-gw",	no_argument,		&no_map_gw,	1 },
 		{"ipv4-only",	no_argument,		NULL,		'4' },
 		{"ipv6-only",	no_argument,		NULL,		'6' },
@ -1291,6 +1303,8 @@ void conf(struct ctx *c, int argc, char **argv)
 		{"netns-only",	no_argument,		NULL,		20 },
 		{"map-host-loopback", required_argument, NULL,		21 },
 		{"map-guest-addr", required_argument,	NULL,		22 },
 		{"host-lo-to-ns-lo", no_argument, 	NULL,		23 },
 		{"dns-host",	required_argument,	NULL,		24 },
 		{ 0 },
 	};
 	const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt";
@ -1413,9 +1427,9 @@ void conf(struct ctx *c, int argc, char **argv)
 			break;
 		case 14:
-			fprintf(stdout,
+			FPRINTF(stdout,
 				c->mode == MODE_PASTA ? "pasta " : "passt ");
-			fprintf(stdout, VERSION_BLOB);
+			FPRINTF(stdout, VERSION_BLOB);
 			exit(EXIT_SUCCESS);
 		case 15:
 			ret = snprintf(c->ip4.ifname_out,
@ -1468,6 +1482,23 @@ void conf(struct ctx *c, int argc, char **argv)
 			conf_nat(optarg, &c->ip4.map_guest_addr,
 				 &c->ip6.map_guest_addr, NULL);
 			break;
 		case 23:
 			if (c->mode != MODE_PASTA)
 				die("--host-lo-to-ns-lo is for pasta mode only");
 			c->host_lo_to_ns_lo = 1;
 			break;
 		case 24:
 			if (inet_pton(AF_INET6, optarg, &c->ip6.dns_host) &&
 			    !IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_host))
 				break;
 			if (inet_pton(AF_INET, optarg, &c->ip4.dns_host) &&
 			    !IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_host)   &&
 			    !IN4_IS_ADDR_BROADCAST(&c->ip4.dns_host))
 				break;
 			die("Invalid host nameserver address: %s", optarg);
 			break;
 		case 'd':
 			c->debug = 1;
 			c->quiet = 0;
--- a/contrib/apparmor/abstractions/passt
+++ b/contrib/apparmor/abstractions/passt
@ -34,6 +34,8 @@
  owner @{PROC}/@{pid}/uid_map		r,	# conf_ugid()
  @{PROC}/sys/net/ipv4/ip_local_port_range r,	# fwd_probe_ephemeral()
  network netlink raw,				# nl_sock_init_do(), netlink.c
  network inet stream,				# tcp.c
--- a/contrib/selinux/passt.te
+++ b/contrib/selinux/passt.te
@ -50,6 +50,7 @@ require {
 	type passwd_file_t;
 	class netlink_route_socket { bind create nlmsg_read };
 	type sysctl_net_t;
 	class capability { sys_tty_config setuid setgid };
 	class cap_userns { setpcap sys_admin sys_ptrace };
@ -104,6 +105,8 @@ allow passt_t net_conf_t:lnk_file read;
 allow passt_t tmp_t:sock_file { create unlink write };
 allow passt_t self:netlink_route_socket { bind create nlmsg_read read write setopt };
 kernel_search_network_sysctl(passt_t)
 allow passt_t sysctl_net_t:dir search;
 allow passt_t sysctl_net_t:file { open read };
 corenet_tcp_bind_all_nodes(passt_t)
 corenet_udp_bind_all_nodes(passt_t)
--- a/contrib/selinux/pasta.te
+++ b/contrib/selinux/pasta.te
@ -196,7 +196,7 @@ allow pasta_t ifconfig_var_run_t:dir { read search watch };
 allow pasta_t self:tun_socket create;
 allow pasta_t tun_tap_device_t:chr_file { ioctl open read write };
 allow pasta_t sysctl_net_t:dir search;
-allow pasta_t sysctl_net_t:file { open write };
+allow pasta_t sysctl_net_t:file { open read write };
 allow pasta_t kernel_t:system module_request;
 allow pasta_t nsfs_t:file read;
--- a/dhcpv6.c
+++ b/dhcpv6.c
@ -296,47 +296,42 @@ static struct opt_hdr *dhcpv6_opt(const struct pool *p, size_t *offset,
 static struct opt_hdr *dhcpv6_ia_notonlink(const struct pool *p,
 					   struct in6_addr *la)
 {
 	int ia_types[2] = { OPT_IA_NA, OPT_IA_TA }, *ia_type;
 	const struct opt_ia_addr *opt_addr;
 	char buf[INET6_ADDRSTRLEN];
 	struct in6_addr req_addr;
 	const struct opt_hdr *h;
 	struct opt_hdr *ia;
 	size_t offset;
 	int ia_type;
-	ia_type = OPT_IA_NA;
+	foreach(ia_type, ia_types) {
 ia_ta:
 		offset = 0;
-	while ((ia = dhcpv6_opt(p, &offset, ia_type))) {
+		while ((ia = dhcpv6_opt(p, &offset, *ia_type))) {
 			if (ntohs(ia->l) < OPT_VSIZE(ia_na))
 				return NULL;
 			offset += sizeof(struct opt_ia_na);
 			while ((h = dhcpv6_opt(p, &offset, OPT_IAAADR))) {
 			const struct opt_ia_addr *opt_addr;
 				if (ntohs(h->l) != OPT_VSIZE(ia_addr))
 					return NULL;
 				opt_addr = (const struct opt_ia_addr *)h;
 				req_addr = opt_addr->addr;
-			if (!IN6_ARE_ADDR_EQUAL(la, &req_addr)) {
+				if (!IN6_ARE_ADDR_EQUAL(la, &req_addr))
-				info("DHCPv6: requested address %s not on link",
+					goto err;
 				     inet_ntop(AF_INET6, &req_addr,
 					       buf, sizeof(buf)));
 				return ia;
 			}
 				offset += sizeof(struct opt_ia_addr);
 			}
 		}
 	if (ia_type == OPT_IA_NA) {
 		ia_type = OPT_IA_TA;
 		goto ia_ta;
 	}
 	return NULL;
 err:
 	info("DHCPv6: requested address %s not on link",
 	     inet_ntop(AF_INET6, &req_addr, buf, sizeof(buf)));
 	return ia;
 }
 /**
@ -428,11 +423,11 @@ search:
 int dhcpv6(struct ctx *c, const struct pool *p,
 	   const struct in6_addr *saddr, const struct in6_addr *daddr)
 {
-	struct opt_hdr *ia, *bad_ia, *client_id;
+	const struct opt_hdr *client_id, *server_id, *ia;
 	const struct opt_hdr *server_id;
 	const struct in6_addr *src;
 	const struct msg_hdr *mh;
 	const struct udphdr *uh;
 	struct opt_hdr *bad_ia;
 	size_t mlen, n;
 	uh = packet_get(p, 0, 0, sizeof(*uh), &mlen);
--- a/flow.c
+++ b/flow.c
@ -283,28 +283,23 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
 	       "Flow %u (%s): %s", flow_idx(f), type_or_state, msg);
 }
-/**
+/** flow_log_details_() - Log the details of a flow
- * flow_set_state() - Change flow's state
+ * @f:		flow to log
- * @f:		Flow changing state
+ * @pri:	Log priority
- * @state:	New state
+ * @state:	State to log details according to
 *
 * Logs the details of the flow: endpoints, interfaces, type etc.
 */
-static void flow_set_state(struct flow_common *f, enum flow_state state)
+void flow_log_details_(const struct flow_common *f, int pri,
 		       enum flow_state state)
 {
 	char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN];
 	char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN];
 	const struct flowside *ini = &f->side[INISIDE];
 	const struct flowside *tgt = &f->side[TGTSIDE];
 	uint8_t oldstate = f->state;
-	ASSERT(state < FLOW_NUM_STATES);
+	if (state >= FLOW_STATE_TGT)
-	ASSERT(oldstate < FLOW_NUM_STATES);
+		flow_log_(f, pri,
 	f->state = state;
 	flow_log_(f, LOG_DEBUG, "%s -> %s", flow_state_str[oldstate],
 		  FLOW_STATE(f));
 	if (MAX(state, oldstate) >= FLOW_STATE_TGT)
 		flow_log_(f, LOG_DEBUG,
 			  "%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu",
 			  pif_name(f->pif[INISIDE]),
 			  inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
@ -316,8 +311,8 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
 			  tgt->oport,
 			  inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)),
 			  tgt->eport);
-	else if (MAX(state, oldstate) >= FLOW_STATE_INI)
+	else if (state >= FLOW_STATE_INI)
-		flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => ?",
+		flow_log_(f, pri, "%s [%s]:%hu -> [%s]:%hu => ?",
 			  pif_name(f->pif[INISIDE]),
 			  inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
 			  ini->eport,
@ -325,6 +320,25 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
 			  ini->oport);
 }
 /**
 * flow_set_state() - Change flow's state
 * @f:		Flow changing state
 * @state:	New state
 */
 static void flow_set_state(struct flow_common *f, enum flow_state state)
 {
 	uint8_t oldstate = f->state;
 	ASSERT(state < FLOW_NUM_STATES);
 	ASSERT(oldstate < FLOW_NUM_STATES);
 	f->state = state;
 	flow_log_(f, LOG_DEBUG, "%s -> %s", flow_state_str[oldstate],
 		  FLOW_STATE(f));
 	flow_log_details_(f, LOG_DEBUG, MAX(state, oldstate));
 }
 /**
 * flow_initiate_() - Move flow to INI, setting pif[INISIDE]
 * @flow:	Flow to change state
@ -697,7 +711,7 @@ static flow_sidx_t flowside_lookup(const struct ctx *c, uint8_t proto,
 	       !(FLOW_PROTO(&flow->f) == proto &&
 		 flow->f.pif[sidx.sidei] == pif &&
 		 flowside_eq(&flow->f.side[sidx.sidei], side)))
-		b = (b + 1) % FLOW_HASH_SIZE;
+		b = mod_sub(b, 1, FLOW_HASH_SIZE);
 	return flow_hashtab[b];
 }
@ -832,7 +846,8 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
 				closed = icmp_ping_timer(c, &flow->ping, now);
 			break;
 		case FLOW_UDP:
-			if (timer)
+			closed = udp_flow_defer(&flow->udp);
 			if (!closed && timer)
 				closed = udp_flow_timer(c, &flow->udp, now);
 			break;
 		default:
--- a/flow.h
+++ b/flow.h
@ -264,4 +264,11 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
 			flow_dbg((f), __VA_ARGS__);			\
 	} while (0)
 void flow_log_details_(const struct flow_common *f, int pri,
 		       enum flow_state state);
 #define flow_log_details(f_, pri) \
 	flow_log_details_(&((f_)->f), (pri), (f_)->f.state)
 #define flow_dbg_details(f_)	flow_log_details((f_), LOG_DEBUG)
 #define flow_err_details(f_)	flow_log_details((f_), LOG_ERR)
 #endif /* FLOW_H */
--- a/flow_table.h
+++ b/flow_table.h
@ -110,7 +110,7 @@ static inline const struct flowside *flowside_at_sidx(flow_sidx_t sidx)
 	const union flow *flow = flow_at_sidx(sidx);
 	if (!flow)
-		return PIF_NONE;
+		return NULL;
 	return &flow->f.side[sidx.sidei];
 }
--- a/fwd.c
+++ b/fwd.c
@ -75,8 +75,8 @@ void fwd_probe_ephemeral(void)
 	if (*end || errno)
 		goto parse_err;
-	if (min < 0 || min >= NUM_PORTS ||
+	if (min < 0 || min >= (long)NUM_PORTS ||
-	    max < 0 || max >= NUM_PORTS)
+	    max < 0 || max >= (long)NUM_PORTS)
 		goto parse_err;
 	fwd_ephemeral_min = min;
@ -447,20 +447,35 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
 	    (proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
 		/* spliceable */
-		/* Preserve the specific loopback adddress used, but let the
+		/* The traffic will go over the guest's 'lo' interface, but by
-		 * kernel pick a source port on the target side
+		 * default use its external address, so we don't inadvertently
 		 * expose services that listen only on the guest's loopback
 		 * address.  That can be overridden by --host-lo-to-ns-lo which
 		 * will instead forward to the loopback address in the guest.
 		 *
 		 * In either case, let the kernel pick the source address to
 		 * match.
 		 */
-		tgt->oaddr = ini->eaddr;
+		if (inany_v4(&ini->eaddr)) {
 			if (c->host_lo_to_ns_lo)
 				tgt->eaddr = inany_loopback4;
 			else
 				tgt->eaddr = inany_from_v4(c->ip4.addr_seen);
 			tgt->oaddr = inany_any4;
 		} else {
 			if (c->host_lo_to_ns_lo)
 				tgt->eaddr = inany_loopback6;
 			else
 				tgt->eaddr.a6 = c->ip6.addr_seen;
 			tgt->oaddr = inany_any6;
 		}
 		/* Let the kernel pick source port */
 		tgt->oport = 0;
 		if (proto == IPPROTO_UDP)
 			/* But for UDP preserve the source port */
 			tgt->oport = ini->eport;
 		if (inany_v4(&ini->eaddr))
 			tgt->eaddr = inany_loopback4;
 		else
 			tgt->eaddr = inany_loopback6;
 		return PIF_SPLICE;
 	}
--- a/inany.c
+++ b/inany.c
@ -36,3 +36,23 @@ const char *inany_ntop(const union inany_addr *src, char *dst, socklen_t size)
 	return inet_ntop(AF_INET6, &src->a6, dst, size);
 }
 /** inany_pton - Parse an IPv[46] address from text format
 * @src:	IPv[46] address
 * @dst:	output buffer, filled with parsed address
 *
 * Return: On success, 1, if no parseable address is found, 0
 */
 int inany_pton(const char *src, union inany_addr *dst)
 {
 	if (inet_pton(AF_INET, src, &dst->v4mapped.a4)) {
 		memset(&dst->v4mapped.zero, 0, sizeof(dst->v4mapped.zero));
 		memset(&dst->v4mapped.one, 0xff, sizeof(dst->v4mapped.one));
 		return 1;
 	}
 	if (inet_pton(AF_INET6, src, &dst->a6))
 		return 1;
 	return 0;
 }
--- a/inany.h
+++ b/inany.h
@ -270,5 +270,6 @@ static inline void inany_siphash_feed(struct siphash_state *state,
 #define INANY_ADDRSTRLEN	MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)
 const char *inany_ntop(const union inany_addr *src, char *dst, socklen_t size);
 int inany_pton(const char *src, union inany_addr *dst);
 #endif /* INANY_H */
--- a/linux_dep.h
+++ b/linux_dep.h
@ -0,0 +1,144 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later
 * Copyright Red Hat
 *
 * Declarations for Linux specific dependencies
 */
 #ifndef LINUX_DEP_H
 #define LINUX_DEP_H
 /* struct tcp_info_linux - Information from Linux TCP_INFO getsockopt()
 *
 * Largely derived from include/linux/tcp.h in the Linux kernel
 *
 * Some fields returned by TCP_INFO have been there for ages and are shared with
 * BSD.  struct tcp_info from netinet/tcp.h has only those fields.  There are
 * also a many Linux specific extensions to the structure, which are only found
 * in the linux/tcp.h version of struct tcp_info.
 *
 * We want to use some of those extension fields, when available.  We can test
 * for availability in the runtime kernel using the length returned from
 * getsockopt(). However, we won't necessarily be compiled against the same
 * kernel headers as we'll run with, so compiling directly against linux/tcp.h
 * means wrapping every field access in an #ifdef whose #else does the same
 * thing as when the field is missing at runtime.  This rapidly gets messy.
 *
 * Instead we define here struct tcp_info_linux which includes all the Linux
 * extensions that we want to use.  This is taken from v6.11 of the kernel.
 */
 struct tcp_info_linux {
 	uint8_t		tcpi_state;
 	uint8_t		tcpi_ca_state;
 	uint8_t		tcpi_retransmits;
 	uint8_t		tcpi_probes;
 	uint8_t		tcpi_backoff;
 	uint8_t		tcpi_options;
 	uint8_t		tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
 	uint8_t		tcpi_delivery_rate_app_limited:1, tcpi_fastopen_client_fail:2;
 	uint32_t	tcpi_rto;
 	uint32_t	tcpi_ato;
 	uint32_t	tcpi_snd_mss;
 	uint32_t	tcpi_rcv_mss;
 	uint32_t	tcpi_unacked;
 	uint32_t	tcpi_sacked;
 	uint32_t	tcpi_lost;
 	uint32_t	tcpi_retrans;
 	uint32_t	tcpi_fackets;
 	/* Times. */
 	uint32_t	tcpi_last_data_sent;
 	uint32_t	tcpi_last_ack_sent;
 	uint32_t	tcpi_last_data_recv;
 	uint32_t	tcpi_last_ack_recv;
 	/* Metrics. */
 	uint32_t	tcpi_pmtu;
 	uint32_t	tcpi_rcv_ssthresh;
 	uint32_t	tcpi_rtt;
 	uint32_t	tcpi_rttvar;
 	uint32_t	tcpi_snd_ssthresh;
 	uint32_t	tcpi_snd_cwnd;
 	uint32_t	tcpi_advmss;
 	uint32_t	tcpi_reordering;
 	uint32_t	tcpi_rcv_rtt;
 	uint32_t	tcpi_rcv_space;
 	uint32_t	tcpi_total_retrans;
 	/* Linux extensions */
 	uint64_t	tcpi_pacing_rate;
 	uint64_t	tcpi_max_pacing_rate;
 	uint64_t	tcpi_bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
 	uint64_t	tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
 	uint32_t	tcpi_segs_out;	     /* RFC4898 tcpEStatsPerfSegsOut */
 	uint32_t	tcpi_segs_in;	     /* RFC4898 tcpEStatsPerfSegsIn */
 	uint32_t	tcpi_notsent_bytes;
 	uint32_t	tcpi_min_rtt;
 	uint32_t	tcpi_data_segs_in;	/* RFC4898 tcpEStatsDataSegsIn */
 	uint32_t	tcpi_data_segs_out;	/* RFC4898 tcpEStatsDataSegsOut */
 	uint64_t	tcpi_delivery_rate;
 	uint64_t	tcpi_busy_time;      /* Time (usec) busy sending data */
 	uint64_t	tcpi_rwnd_limited;   /* Time (usec) limited by receive window */
 	uint64_t	tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
 	uint32_t	tcpi_delivered;
 	uint32_t	tcpi_delivered_ce;
 	uint64_t	tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
 	uint64_t	tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
 	uint32_t	tcpi_dsack_dups;     /* RFC4898 tcpEStatsStackDSACKDups */
 	uint32_t	tcpi_reord_seen;     /* reordering events seen */
 	uint32_t	tcpi_rcv_ooopack;    /* Out-of-order packets received */
 	uint32_t	tcpi_snd_wnd;	     /* peer's advertised receive window after
 					      * scaling (bytes)
 					      */
 	uint32_t	tcpi_rcv_wnd;	     /* local advertised receive window after
 					      * scaling (bytes)
 					      */
 	uint32_t 	tcpi_rehash;         /* PLB or timeout triggered rehash attempts */
 	uint16_t	tcpi_total_rto;	/* Total number of RTO timeouts, including
 					 * SYN/SYN-ACK and recurring timeouts.
 					 */
 	uint16_t	tcpi_total_rto_recoveries;	/* Total number of RTO
 							 * recoveries, including any
 							 * unfinished recovery.
 							 */
 	uint32_t	tcpi_total_rto_time;	/* Total time spent in RTO recoveries
 						 * in milliseconds, including any
 						 * unfinished recovery.
 						 */
 };
 #include <linux/falloc.h>
 #ifndef FALLOC_FL_COLLAPSE_RANGE
 #define FALLOC_FL_COLLAPSE_RANGE	0x08
 #endif
 #include <linux/close_range.h>
 /* glibc < 2.34 and musl as of 1.2.5 need these */
 #ifndef SYS_close_range
 #define SYS_close_range		436
 #endif
 #ifndef CLOSE_RANGE_UNSHARE	/* Linux kernel < 5.9 */
 #define CLOSE_RANGE_UNSHARE	(1U << 1)
 #endif
 __attribute__ ((weak))
 /* cppcheck-suppress funcArgNamesDifferent */
 int close_range(unsigned int first, unsigned int last, int flags) {
 	return syscall(SYS_close_range, first, last, flags);
 }
 #endif /* LINUX_DEP_H */
--- a/log.c
+++ b/log.c
@ -26,6 +26,7 @@
 #include <stdarg.h>
 #include <sys/socket.h>
 #include "linux_dep.h"
 #include "log.h"
 #include "util.h"
 #include "passt.h"
@ -92,7 +93,6 @@ const char *logfile_prefix[] = {
 	"         ",		/* LOG_DEBUG */
 };
 #ifdef FALLOC_FL_COLLAPSE_RANGE
 /**
 * logfile_rotate_fallocate() - Write header, set log_written after fallocate()
 * @fd:		Log file descriptor
@ -126,7 +126,6 @@ static void logfile_rotate_fallocate(int fd, const struct timespec *now)
 	log_written -= log_cut_size;
 }
 #endif /* FALLOC_FL_COLLAPSE_RANGE */
 /**
 * logfile_rotate_move() - Fallback: move recent entries toward start, then cut
@ -198,21 +197,17 @@ out:
 *
 * Return: 0 on success, negative error code on failure
 *
- * #syscalls fcntl
+ * #syscalls fcntl fallocate
 *
 * fallocate() passed as EXTRA_SYSCALL only if FALLOC_FL_COLLAPSE_RANGE is there
 */
 static int logfile_rotate(int fd, const struct timespec *now)
 {
 	if (fcntl(fd, F_SETFL, O_RDWR /* Drop O_APPEND: explicit lseek() */))
 		return -errno;
 #ifdef FALLOC_FL_COLLAPSE_RANGE
 	/* Only for Linux >= 3.15, extent-based ext4 or XFS, glibc >= 2.18 */
 	if (!fallocate(fd, FALLOC_FL_COLLAPSE_RANGE, 0, log_cut_size))
 		logfile_rotate_fallocate(fd, now);
 	else
 #endif
 		logfile_rotate_move(fd, now);
 	if (fcntl(fd, F_SETFL, O_RDWR | O_APPEND))
@ -274,7 +269,7 @@ void vlogmsg(bool newline, bool cont, int pri, const char *format, va_list ap)
 		char timestr[LOGTIME_STRLEN];
 		logtime_fmt(timestr, sizeof(timestr), now);
-		fprintf(stderr, "%s: ", timestr);
+		FPRINTF(stderr, "%s: ", timestr);
 	}
 	if ((log_mask & LOG_MASK(LOG_PRI(pri))) || !log_conf_parsed) {
@ -293,7 +288,7 @@ void vlogmsg(bool newline, bool cont, int pri, const char *format, va_list ap)
 	    (log_stderr && (log_mask & LOG_MASK(LOG_PRI(pri))))) {
 		(void)vfprintf(stderr, format, ap);
 		if (newline && format[strlen(format)] != '\n')
-			fprintf(stderr, "\n");
+			FPRINTF(stderr, "\n");
 	}
 }
@ -399,7 +394,7 @@ void passt_vsyslog(bool newline, int pri, const char *format, va_list ap)
 		n += snprintf(buf + n, BUFSIZ - n, "\n");
 	if (log_sock >= 0 && send(log_sock, buf, n, 0) != n && log_stderr)
-		fprintf(stderr, "Failed to send %i bytes to syslog\n", n);
+		FPRINTF(stderr, "Failed to send %i bytes to syslog\n", n);
 }
 /**
@ -416,8 +411,7 @@ void logfile_init(const char *name, const char *path, size_t size)
 	if (readlink("/proc/self/exe", exe, PATH_MAX - 1) < 0)
 		die_perror("Failed to read own /proc/self/exe link");
-	log_file = open(path, O_CREAT | O_TRUNC | O_APPEND | O_RDWR | O_CLOEXEC,
+	log_file = output_file_open(path, O_APPEND | O_RDWR);
 			S_IRUSR | S_IWUSR);
 	if (log_file == -1)
 		die_perror("Couldn't open log file %s", path);
@ -433,4 +427,3 @@ void logfile_init(const char *name, const char *path, size_t size)
 	/* For FALLOC_FL_COLLAPSE_RANGE: VFS block size can be up to one page */
 	log_cut_size = ROUND_UP(log_size * LOGFILE_CUT_RATIO / 100, PAGE_SIZE);
 }
--- a/ndp.c
+++ b/ndp.c
@ -234,8 +234,8 @@ int ndp(struct ctx *c, const struct icmp6hdr *ih, const struct in6_addr *saddr,
 		return 1;
 	if (ih->icmp6_type == NS) {
-		struct ndp_ns *ns = packet_get(p, 0, 0, sizeof(struct ndp_ns),
+		const struct ndp_ns *ns =
-					       NULL);
+			packet_get(p, 0, 0, sizeof(struct ndp_ns), NULL);
 		if (!ns)
 			return -1;
--- a/netlink.c
+++ b/netlink.c
@ -353,7 +353,7 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
 */
 bool nl_route_get_def_multipath(struct rtattr *rta, void *gw)
 {
-	size_t nh_len = RTA_PAYLOAD(rta);
+	int nh_len = RTA_PAYLOAD(rta);
 	struct rtnexthop *rtnh;
 	bool found = false;
 	int hops = -1;
@ -582,7 +582,7 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
 				*(unsigned int *)RTA_DATA(rta) = ifi_dst;
 			} else if (rta->rta_type == RTA_MULTIPATH) {
-				size_t nh_len = RTA_PAYLOAD(rta);
+				int nh_len = RTA_PAYLOAD(rta);
 				struct rtnexthop *rtnh;
 				for (rtnh = (struct rtnexthop *)RTA_DATA(rta);
--- a/passt.1
+++ b/passt.1
@ -95,7 +95,7 @@ detached PID namespace after starting, because the PID itself cannot change.
 Default is to fork into background.
 .TP
-.BR \-e ", " \-\-stderr
+.BR \-e ", " \-\-stderr " " (DEPRECATED)
 This option has no effect, and is maintained for compatibility purposes only.
 Note that this configuration option is \fBdeprecated\fR and will be removed in a
@ -249,10 +249,19 @@ the host.
 .TP
 .BR \-\-dns-forward " " \fIaddr
 Map \fIaddr\fR (IPv4 or IPv6) as seen from guest or namespace to the
-first configured DNS resolver (with corresponding IP version). Maps
+nameserver (with corresponding IP version) specified by the
-only UDP and TCP traffic to port 53 or port 853.  Replies are
+\fB\-\-dns-host\fR option. Maps only UDP and TCP traffic to port 53 or
-translated back with a reverse mapping.  This option can be specified
+port 853.  Replies are translated back with a reverse mapping.  This
-zero to two times (once for IPv4, once for IPv6).
+option can be specified zero to two times (once for IPv4, once for
 IPv6).
 .TP
 .BR \-\-dns-host " " \fIaddr
 Configure the host nameserver which guest or namespace queries to the
 \fB\-\-dns-forward\fR address will be redirected to. This option can
 be specified zero to two times (once for IPv4, once for IPv6).
 By default, the first nameserver from the host's
 \fI/etc/resolv.conf\fR.
 .TP
 .BR \-S ", " \-\-search " " \fIlist
@ -327,6 +336,16 @@ namespace will be silently dropped.
 Disable Router Advertisements. Router Solicitations coming from guest or target
 namespace will be ignored.
 .TP
 .BR \-\-freebind
 Allow any binding address to be specified for \fB-t\fR and \fB-u\fR
 options.  Usually binding addresses must be addresses currently
 configured on the host.  With \fB\-\-freebind\fR, the
 \fBIP_FREEBIND\fR or \fBIPV6_FREEBIND\fR socket option is enabled
 allowing any address to be used.  This is typically used to bind
 addresses which might be configured on the host in future, at which
 point the forwarding will immediately start operating.
 .TP
 .BR \-\-map-host-loopback " " \fIaddr
 Translate \fIaddr\fR to refer to the host. Packets from the guest to
@ -586,6 +605,13 @@ Configure UDP port forwarding from target namespace to init namespace.
 Default is \fBauto\fR.
 .TP
 .BR \-\-host-lo-to-ns-lo " " (DEPRECATED)
 If specified, connections forwarded with \fB\-t\fR and \fB\-u\fR from
 the host's loopback address will appear on the loopback address in the
 guest as well.  Without this option such forwarded packets will appear
 to come from the guest's public address.
 .TP
 .BR \-\-userns " " \fIspec
 Target user namespace to join, as a path. If PID is given, without this option,
@ -863,38 +889,41 @@ root@localhost's password:
 .SH NOTES
-.SS Handling of traffic with local destination and source addresses
+.SS Handling of traffic with loopback destination and source addresses
-Both \fBpasst\fR and \fBpasta\fR can bind on ports with a local address,
+Both \fBpasst\fR and \fBpasta\fR can bind on ports with a loopback
-depending on the configuration. Local destination or source addresses need to be
+address (127.0.0.0/8 or ::1), depending on the configuration. Loopback
-changed before packets are delivered to the guest or target namespace: most
+destination or source addresses need to be changed before packets are
-operating systems would drop packets received from non-loopback interfaces with
+delivered to the guest or target namespace: most operating systems
-local addresses, and it would also be impossible for guest or target namespace
+would drop packets received with loopback addresses on non-loopback
-to route answers back.
+interfaces, and it would also be impossible for guest or target
 namespace to route answers back.
-For convenience, and somewhat arbitrarily, the source address on these packets
+For convenience, the source address on these packets is translated to
-is translated to the address of the default IPv4 or IPv6 gateway (if any) --
+the address specified by the \fB\-\-map-host-loopback\fR option (with
-this is known to be an existing, valid address on the same subnet.
+some exceptions in pasta mode, see next section below).  If not
 specified this defaults, somewhat arbitrarily, to the address of
 default IPv4 or IPv6 gateway (if any) -- this is known to be an
 existing, valid address on the same subnet.  If \fB\-\-no-map-gw\fR or
 \fB\-\-map-host-loopback none\fR are specified this translation is
 disabled and packets with loopback addresses are simply dropped.
-Loopback destination addresses are instead translated to the observed external
+Loopback destination addresses are translated to the observed external
-address of the guest or target namespace. For IPv6 packets, if usage of a
+address of the guest or target namespace. For IPv6, the observed
-link-local address by guest or namespace has ever been observed, and the
+link-local address is used if the translated source address is
-original destination address is also a link-local address, the observed
+link-local, otherwise the observed global address is used. For both
-link-local address is used. Otherwise, the observed global address is used. For
+IPv4 and IPv6, if no addresses have been seen yet, the configured
-both IPv4 and IPv6, if no addresses have been seen yet, the configured addresses
+addresses will be used instead.
 will be used instead.
 For example, if \fBpasst\fR or \fBpasta\fR receive a connection from 127.0.0.1,
 with destination 127.0.0.10, and the default IPv4 gateway is 192.0.2.1, while
 the last observed source address from guest or namespace is 192.0.2.2, this will
 be translated to a connection from 192.0.2.1 to 192.0.2.2.
-Similarly, for traffic coming from guest or namespace, packets with destination
+Similarly, for traffic coming from guest or namespace, packets with
-address corresponding to the default gateway will have their destination address
+destination address corresponding to the \fB\-\-map-host-loopback\fR
-translated to a loopback address, if and only if a packet, in the opposite
+address will have their destination address translated to a loopback
-direction, with a loopback destination or source address, port-wise matching for
+address.
 UDP, or connection-wise for TCP, has been recently forwarded to guest or
 namespace. This behaviour can be disabled with \-\-no\-map\-gw.
 .SS Handling of local traffic in pasta
@ -910,8 +939,15 @@ and the new socket using the \fBsplice\fR(2) system call, and for UDP, a pair
 of \fBrecvmmsg\fR(2) and \fBsendmmsg\fR(2) system calls deals with packet
 transfers.
-This bypass only applies to local connections and traffic, because it's not
+Because it's not possible to bind sockets to foreign addresses, this
-possible to bind sockets to foreign addresses.
+bypass only applies to local connections and traffic.  It also means
 that the address translation differs slightly from passt mode.
 Connections from loopback to loopback on the host will appear to come
 from the target namespace's public address within the guest, unless
 \fB\-\-host-lo-to-ns-lo\fR is specified, in which case they will
 appear to come from loopback in the namespace as well.  The latter
 behaviour used to be the default, but is usually undesirable, since it
 can unintentionally expose namespace local services to the host.
 .SS Binding to low numbered ports (well-known or system ports, up to 1023)
--- a/passt.c
+++ b/passt.c
@ -207,7 +207,8 @@ int main(int argc, char **argv)
 	struct timespec now;
 	struct sigaction sa;
-	clock_gettime(CLOCK_MONOTONIC, &log_start);
+	if (clock_gettime(CLOCK_MONOTONIC, &log_start))
 		die_perror("Failed to get CLOCK_MONOTONIC time");
 	arch_avx2_exec(argv);
@ -265,7 +266,8 @@ int main(int argc, char **argv)
 	secret_init(&c);
-	clock_gettime(CLOCK_MONOTONIC, &now);
+	if (clock_gettime(CLOCK_MONOTONIC, &now))
 		die_perror("Failed to get CLOCK_MONOTONIC time");
 	flow_init();
@ -307,13 +309,15 @@ int main(int argc, char **argv)
 	timer_init(&c, &now);
 loop:
-	/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
+	/* NOLINTBEGIN(bugprone-branch-clone): intervals can be the same */
 	/* cppcheck-suppress [duplicateValueTernary, unmatchedSuppression] */
 	nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, TIMER_INTERVAL);
 	/* NOLINTEND(bugprone-branch-clone) */
 	if (nfds == -1 && errno != EINTR)
 		die_perror("epoll_wait() failed in main loop");
-	clock_gettime(CLOCK_MONOTONIC, &now);
+	if (clock_gettime(CLOCK_MONOTONIC, &now))
 		err_perror("Failed to get CLOCK_MONOTONIC time");
 	for (i = 0; i < nfds; i++) {
 		union epoll_ref ref = *((union epoll_ref *)&events[i].data.u64);
--- a/passt.h
+++ b/passt.h
@ -225,6 +225,8 @@ struct ip6_ctx {
 * @no_dhcpv6:		Disable DHCPv6 server
 * @no_ndp:		Disable NDP handler altogether
 * @no_ra:		Disable router advertisements
 * @host_lo_to_ns_lo:	Map host loopback addresses to ns loopback addresses
 * @freebind:		Allow binding of non-local addresses for forwarding
 * @low_wmem:		Low probed net.core.wmem_max
 * @low_rmem:		Low probed net.core.rmem_max
 */
@ -284,6 +286,8 @@ struct ctx {
 	int no_dhcpv6;
 	int no_ndp;
 	int no_ra;
 	int host_lo_to_ns_lo;
 	int freebind;
 	int low_wmem;
 	int low_rmem;
--- a/pasta.c
+++ b/pasta.c
@ -102,7 +102,9 @@ static int pasta_wait_for_ns(void *arg)
 	int flags = O_RDONLY | O_CLOEXEC;
 	char ns[PATH_MAX];
-	snprintf(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid);
+	if (snprintf_check(ns, PATH_MAX, "/proc/%i/ns/net", pasta_child_pid))
 		die_perror("Can't build netns path");
 	do {
 		while ((c->pasta_netns_fd = open(ns, flags)) < 0) {
 			if (errno != ENOENT)
@ -239,8 +241,11 @@ void pasta_start_ns(struct ctx *c, uid_t uid, gid_t gid,
 		c->quiet = 1;
 	/* Configure user and group mappings */
-	snprintf(uidmap, BUFSIZ, "0 %u 1", uid);
+	if (snprintf_check(uidmap, BUFSIZ, "0 %u 1", uid))
-	snprintf(gidmap, BUFSIZ, "0 %u 1", gid);
+		die_perror("Can't build uidmap");
 	if (snprintf_check(gidmap, BUFSIZ, "0 %u 1", gid))
 		die_perror("Can't build gidmap");
 	if (write_file("/proc/self/uid_map", uidmap) ||
 	    write_file("/proc/self/setgroups", "deny") ||
@ -427,12 +432,12 @@ static int pasta_netns_quit_timer(void)
 */
 void pasta_netns_quit_init(const struct ctx *c)
 {
 	union epoll_ref ref = { .type = EPOLL_TYPE_NSQUIT_INOTIFY };
 	struct epoll_event ev = { .events = EPOLLIN };
 	int flags = O_NONBLOCK | O_CLOEXEC;
 	struct statfs s = { 0 };
 	bool try_inotify = true;
 	int fd = -1, dir_fd;
 	union epoll_ref ref;
 	if (c->mode != MODE_PASTA || c->no_netns_quit || !*c->netns_base)
 		return;
@ -463,6 +468,7 @@ void pasta_netns_quit_init(const struct ctx *c)
 		ref.type = EPOLL_TYPE_NSQUIT_TIMER;
 	} else {
 		close(dir_fd);
 		ref.type = EPOLL_TYPE_NSQUIT_INOTIFY;
 	}
 	if (fd > FD_REF_MAX)
--- a/pcap.c
+++ b/pcap.c
@ -86,9 +86,8 @@ static void pcap_frame(const struct iovec *iov, size_t iovcnt,
 		.caplen = l2len,
 		.len = l2len
 	};
 	struct iovec hiov = { &h, sizeof(h) };
-	if (write_remainder(pcap_fd, &hiov, 1, 0) < 0 ||
+	if (write_all_buf(pcap_fd, &h, sizeof(h)) < 0 ||
 	    write_remainder(pcap_fd, iov, iovcnt, offset) < 0)
 		debug_perror("Cannot log packet, length %zu", l2len);
 }
@ -101,12 +100,14 @@ static void pcap_frame(const struct iovec *iov, size_t iovcnt,
 void pcap(const char *pkt, size_t l2len)
 {
 	struct iovec iov = { (char *)pkt, l2len };
-	struct timespec now;
+	struct timespec now = { 0 };
 	if (pcap_fd == -1)
 		return;
-	clock_gettime(CLOCK_REALTIME, &now);
+	if (clock_gettime(CLOCK_REALTIME, &now))
 		err_perror("Failed to get CLOCK_REALTIME time");
 	pcap_frame(&iov, 1, 0, &now);
 }
@ -120,13 +121,14 @@ void pcap(const char *pkt, size_t l2len)
 void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
 		   size_t offset)
 {
-	struct timespec now;
+	struct timespec now = { 0 };
 	unsigned int i;
 	if (pcap_fd == -1)
 		return;
-	clock_gettime(CLOCK_REALTIME, &now);
+	if (clock_gettime(CLOCK_REALTIME, &now))
 		err_perror("Failed to get CLOCK_REALTIME time");
 	for (i = 0; i < n; i++)
 		pcap_frame(iov + i * frame_parts, frame_parts, offset, &now);
@ -139,17 +141,20 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
 * @iov:	Pointer to the array of struct iovec describing the I/O vector
 *		containing packet data to write, including L2 header
 * @iovcnt:	Number of buffers (@iov entries)
 * @offset:	Offset of the L2 frame within the full data length
 */
 /* cppcheck-suppress unusedFunction */
-void pcap_iov(const struct iovec *iov, size_t iovcnt)
+void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset)
 {
-	struct timespec now;
+	struct timespec now = { 0 };
 	if (pcap_fd == -1)
 		return;
-	clock_gettime(CLOCK_REALTIME, &now);
+	if (clock_gettime(CLOCK_REALTIME, &now))
-	pcap_frame(iov, iovcnt, 0, &now);
+		err_perror("Failed to get CLOCK_REALTIME time");
 	pcap_frame(iov, iovcnt, offset, &now);
 }
 /**
@ -158,18 +163,15 @@ void pcap_iov(const struct iovec *iov, size_t iovcnt)
 */
 void pcap_init(struct ctx *c)
 {
 	int flags = O_WRONLY | O_CREAT | O_TRUNC;
 	if (pcap_fd != -1)
 		return;
 	if (!*c->pcap)
 		return;
-	flags |= c->foreground ? O_CLOEXEC : 0;
+	pcap_fd = output_file_open(c->pcap, O_WRONLY);
 	pcap_fd = open(c->pcap, flags, S_IRUSR | S_IWUSR);
 	if (pcap_fd == -1) {
-		perror("open");
+		err_perror("Couldn't open pcap file %s", c->pcap);
 		return;
 	}
--- a/pcap.h
+++ b/pcap.h
@ -9,7 +9,7 @@
 void pcap(const char *pkt, size_t l2len);
 void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
 		   size_t offset);
-void pcap_iov(const struct iovec *iov, size_t iovcnt);
+void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset);
 void pcap_init(struct ctx *c);
 #endif /* PCAP_H */
--- a/pif.c
+++ b/pif.c
@ -59,3 +59,45 @@ void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
 		*sl = sizeof(sa->sa6);
 	}
 }
 /** pif_sock_l4() - Open a socket bound to an address on a specified interface
 * @c:		Execution context
 * @type:	Socket epoll type
 * @pif:	Interface for this socket
 * @addr:	Address to bind to, or NULL for dual-stack any
 * @ifname:	Interface for binding, NULL for any
 * @port:	Port number to bind to (host byte order)
 * @data:	epoll reference portion for protocol handlers
 *
 * NOTE: For namespace pifs, this must be called having already entered the
 * relevant namespace.
 *
 * Return: newly created socket, negative error code on failure
 */
 int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
 		const union inany_addr *addr, const char *ifname,
 		in_port_t port, uint32_t data)
 {
 	union sockaddr_inany sa = {
 		.sa6.sin6_family = AF_INET6,
 		.sa6.sin6_addr = in6addr_any,
 		.sa6.sin6_port = htons(port),
 	};
 	socklen_t sl;
 	ASSERT(pif_is_socket(pif));
 	if (pif == PIF_SPLICE) {
 		/* Sanity checks */
 		ASSERT(!ifname);
 		ASSERT(addr && inany_is_loopback(addr));
 	}
 	if (!addr)
 		return sock_l4_sa(c, type, &sa, sizeof(sa.sa6),
 				  ifname, false, data);
 	pif_sockaddr(c, &sa, &sl, pif, addr, port);
 	return sock_l4_sa(c, type, &sa, sl,
 			  ifname, sa.sa_family == AF_INET6, data);
 }
--- a/pif.h
+++ b/pif.h
@ -59,5 +59,8 @@ static inline bool pif_is_socket(uint8_t pif)
 void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
 		  uint8_t pif, const union inany_addr *addr, in_port_t port);
 int pif_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
 		const union inany_addr *addr, const char *ifname,
 		in_port_t port, uint32_t data);
 #endif /* PIF_H */
--- a/seccomp.sh
+++ b/seccomp.sh
@ -20,6 +20,15 @@ OUT="$(mktemp)"
 [ -z "${ARCH}" ] && ARCH="$(uname -m)"
 [ -z "${CC}" ] && CC="cc"
 AUDIT_ARCH="AUDIT_ARCH_$(echo ${ARCH} | tr [a-z] [A-Z]             \
                                      | sed 's/^ARM.*/ARM/'        \
                                      | sed 's/I[456]86/I386/'     \
                                      | sed 's/PPC64/PPC/'         \
                                      | sed 's/PPCLE/PPC64LE/'     \
                                      | sed 's/MIPS64EL/MIPSEL64/' \
                                      | sed 's/HPPA/PARISC/'       \
                                      | sed 's/SH4/SH/')"
 HEADER="/* This file was automatically generated by $(basename ${0}) */
 #ifndef AUDIT_ARCH_PPC64LE
@ -32,7 +41,7 @@ struct sock_filter filter_@PROFILE@[] = {
 	/* cppcheck-suppress [badBitmaskCheck, unmatchedSuppression] */
 	BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
 		 (offsetof(struct seccomp_data, arch))),
-	BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, PASST_AUDIT_ARCH, 0, @KILL@),
+	BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, @AUDIT_ARCH@, 0, @KILL@),
 	/* cppcheck-suppress [badBitmaskCheck, unmatchedSuppression] */
 	BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
 		 (offsetof(struct seccomp_data, nr))),
@ -233,7 +242,8 @@ gen_profile() {
 		sub ${__i} CALL "NR:${__nr}" "NAME:${__name}" "ALLOW:${__allow}"
 	done
-	finish PRE "PROFILE:${__profile}" "KILL:$(( __statements + 1))"
+	finish PRE "PROFILE:${__profile}" "KILL:$(( __statements + 1))" \
 	       "AUDIT_ARCH:${AUDIT_ARCH}"
 }
 printf '%s\n' "${HEADER}" > "${OUT}"
--- a/tap.c
+++ b/tap.c
@ -172,11 +172,15 @@ void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
 	struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
 	struct udphdr *uh = tap_push_ip4h(ip4h, src, dst, l4len, IPPROTO_UDP);
 	char *data = (char *)(uh + 1);
 	const struct iovec iov = {
 		.iov_base = (void *)in,
 		.iov_len = dlen
 	};
 	uh->source = htons(sport);
 	uh->dest = htons(dport);
 	uh->len = htons(l4len);
-	csum_udp4(uh, src, dst, in, dlen);
+	csum_udp4(uh, src, dst, &iov, 1, 0);
 	memcpy(data, in, dlen);
 	tap_send_single(c, buf, dlen + (data - buf));
@ -247,7 +251,7 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h,
 void tap_udp6_send(const struct ctx *c,
 		   const struct in6_addr *src, in_port_t sport,
 		   const struct in6_addr *dst, in_port_t dport,
-		   uint32_t flow, const void *in, size_t dlen)
+		   uint32_t flow, void *in, size_t dlen)
 {
 	size_t l4len = dlen + sizeof(struct udphdr);
 	char buf[USHRT_MAX];
@ -255,11 +259,15 @@ void tap_udp6_send(const struct ctx *c,
 	struct udphdr *uh = tap_push_ip6h(ip6h, src, dst,
 					  l4len, IPPROTO_UDP, flow);
 	char *data = (char *)(uh + 1);
 	const struct iovec iov = {
 		.iov_base = in,
 		.iov_len = dlen
 	};
 	uh->source = htons(sport);
 	uh->dest = htons(dport);
 	uh->len = htons(l4len);
-	csum_udp6(uh, src, dst, in, dlen);
+	csum_udp6(uh, src, dst, &iov, 1, 0);
 	memcpy(data, in, dlen);
 	tap_send_single(c, buf, dlen + (data - buf));
@ -982,24 +990,17 @@ static void tap_sock_reset(struct ctx *c)
 }
 /**
- * tap_handler_passt() - Packet handler for AF_UNIX file descriptor
+ * tap_passt_input() - Handler for new data on the socket to qemu
 * @c:		Execution context
 * @events:	epoll events
 * @now:	Current timestamp
 */
-void tap_handler_passt(struct ctx *c, uint32_t events,
+static void tap_passt_input(struct ctx *c, const struct timespec *now)
 		       const struct timespec *now)
 {
 	static const char *partial_frame;
 	static ssize_t partial_len = 0;
 	ssize_t n;
 	char *p;
 	if (events & (EPOLLRDHUP | EPOLLHUP | EPOLLERR)) {
 		tap_sock_reset(c);
 		return;
 	}
 	tap_flush_pools();
 	if (partial_len) {
@ -1010,10 +1011,13 @@ void tap_handler_passt(struct ctx *c, uint32_t events,
 		memmove(pkt_buf, partial_frame, partial_len);
 	}
-	n = recv(c->fd_tap, pkt_buf + partial_len, TAP_BUF_BYTES - partial_len,
+	do {
-		 MSG_DONTWAIT);
+		n = recv(c->fd_tap, pkt_buf + partial_len,
 			 TAP_BUF_BYTES - partial_len, MSG_DONTWAIT);
 	} while ((n < 0) && errno == EINTR);
 	if (n < 0) {
-		if (errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK) {
+		if (errno != EAGAIN && errno != EWOULDBLOCK) {
 			err_perror("Receive error on guest connection, reset");
 			tap_sock_reset(c);
 		}
@ -1051,6 +1055,63 @@ void tap_handler_passt(struct ctx *c, uint32_t events,
 	tap_handler(c, now);
 }
 /**
 * tap_handler_passt() - Event handler for AF_UNIX file descriptor
 * @c:		Execution context
 * @events:	epoll events
 * @now:	Current timestamp
 */
 void tap_handler_passt(struct ctx *c, uint32_t events,
 		       const struct timespec *now)
 {
 	if (events & (EPOLLRDHUP | EPOLLHUP | EPOLLERR)) {
 		tap_sock_reset(c);
 		return;
 	}
 	if (events & EPOLLIN)
 		tap_passt_input(c, now);
 }
 /**
 * tap_pasta_input() - Handler for new data on the socket to hypervisor
 * @c:		Execution context
 * @now:	Current timestamp
 */
 static void tap_pasta_input(struct ctx *c, const struct timespec *now)
 {
 	ssize_t n, len;
 	tap_flush_pools();
 	for (n = 0; n <= (ssize_t)(TAP_BUF_BYTES - ETH_MAX_MTU); n += len) {
 		len = read(c->fd_tap, pkt_buf + n, ETH_MAX_MTU);
 		if (len == 0) {
 			die("EOF on tap device, exiting");
 		} else if (len < 0) {
 			if (errno == EINTR) {
 				len = 0;
 				continue;
 			}
 			if (errno == EAGAIN && errno == EWOULDBLOCK)
 				break; /* all done for now */
 			die("Error on tap device, exiting");
 		}
 		/* Ignore frames of bad length */
 		if (len < (ssize_t)sizeof(struct ethhdr) ||
 		    len > (ssize_t)ETH_MAX_MTU)
 			continue;
 		tap_add_packet(c, len, pkt_buf + n);
 	}
 	tap_handler(c, now);
 }
 /**
 * tap_handler_pasta() - Packet handler for /dev/net/tun file descriptor
 * @c:		Execution context
@ -1060,46 +1121,11 @@ void tap_handler_passt(struct ctx *c, uint32_t events,
 void tap_handler_pasta(struct ctx *c, uint32_t events,
 		       const struct timespec *now)
 {
 	ssize_t n, len;
 	int ret;
 	if (events & (EPOLLRDHUP | EPOLLHUP | EPOLLERR))
 		die("Disconnect event on /dev/net/tun device, exiting");
-redo:
+	if (events & EPOLLIN)
-	n = 0;
+		tap_pasta_input(c, now);
 	tap_flush_pools();
 restart:
 	while ((len = read(c->fd_tap, pkt_buf + n, TAP_BUF_BYTES - n)) > 0) {
 		if (len < (ssize_t)sizeof(struct ethhdr) ||
 		    len > (ssize_t)ETH_MAX_MTU) {
 			n += len;
 			continue;
 		}
 		tap_add_packet(c, len, pkt_buf + n);
 		if ((n += len) == TAP_BUF_BYTES)
 			break;
 	}
 	if (len < 0 && errno == EINTR)
 		goto restart;
 	ret = errno;
 	tap_handler(c, now);
 	if (len > 0 || ret == EAGAIN)
 		return;
 	if (n == TAP_BUF_BYTES)
 		goto redo;
 	die("Error on tap device, exiting");
 }
 /**
@ -1110,7 +1136,7 @@ restart:
 */
 int tap_sock_unix_open(char *sock_path)
 {
-	int fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
 	struct sockaddr_un addr = {
 		.sun_family = AF_UNIX,
 	};
@ -1125,10 +1151,12 @@ int tap_sock_unix_open(char *sock_path)
 		if (*sock_path)
 			memcpy(path, sock_path, UNIX_PATH_MAX);
-		else
+		else if (snprintf_check(path, UNIX_PATH_MAX - 1,
-			snprintf(path, UNIX_PATH_MAX - 1, UNIX_SOCK_PATH, i);
+					UNIX_SOCK_PATH, i))
 			die_perror("Can't build UNIX domain socket path");
-		ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0);
+		ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC,
 			    0);
 		if (ex < 0)
 			die_perror("Failed to check for UNIX domain conflicts");
@ -1261,7 +1289,7 @@ static int tap_ns_tun(void *arg)
 	if (fd < 0)
 		die_perror("Failed to open() /dev/net/tun");
-	rc = ioctl(fd, TUNSETIFF, &ifr);
+	rc = ioctl(fd, (int)TUNSETIFF, &ifr);
 	if (rc < 0)
 		die_perror("TUNSETIFF ioctl on /dev/net/tun failed");
--- a/tap.h
+++ b/tap.h
@ -53,7 +53,7 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
 void tap_udp6_send(const struct ctx *c,
 		   const struct in6_addr *src, in_port_t sport,
 		   const struct in6_addr *dst, in_port_t dport,
-		   uint32_t flow, const void *in, size_t dlen);
+		   uint32_t flow, void *in, size_t dlen);
 void tap_icmp6_send(const struct ctx *c,
 		    const struct in6_addr *src, const struct in6_addr *dst,
 		    const void *in, size_t l4len);
--- a/tcp.c
+++ b/tcp.c
@ -274,6 +274,7 @@
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <stddef.h>
@ -286,8 +287,6 @@
 #include <time.h>
 #include <arpa/inet.h>
 #include <linux/tcp.h> /* For struct tcp_info */
 #include "checksum.h"
 #include "util.h"
 #include "iov.h"
@ -300,6 +299,7 @@
 #include "log.h"
 #include "inany.h"
 #include "flow.h"
 #include "linux_dep.h"
 #include "flow_table.h"
 #include "tcp_internal.h"
@ -308,11 +308,6 @@
 /* MSS rounding: see SET_MSS() */
 #define MSS_DEFAULT			536
 #define WINDOW_DEFAULT			14600		/* RFC 6928 */
 #ifdef HAS_SND_WND
 # define KERNEL_REPORTS_SND_WND(c)	((c)->tcp.kernel_snd_wnd)
 #else
 # define KERNEL_REPORTS_SND_WND(c)	(0 && (c))
 #endif
 #define ACK_INTERVAL			10		/* ms */
 #define SYN_TIMEOUT			10		/* s */
@ -323,11 +318,6 @@
 #define LOW_RTT_TABLE_SIZE		8
 #define LOW_RTT_THRESHOLD		10 /* us */
 /* We need to include <linux/tcp.h> for tcpi_bytes_acked, instead of
 * <netinet/tcp.h>, but that doesn't include a definition for SOL_TCP
 */
 #define SOL_TCP				IPPROTO_TCP
 #define ACK_IF_NEEDED	0		/* See tcp_send_flag() */
 #define CONN_IS_CLOSING(conn)						\
@ -371,6 +361,20 @@ char		tcp_buf_discard		[MAX_WINDOW];
 /* Does the kernel support TCP_PEEK_OFF? */
 bool peek_offset_cap;
 /* Size of data returned by TCP_INFO getsockopt() */
 socklen_t tcp_info_size;
 #define tcp_info_cap(f_)						\
 	((offsetof(struct tcp_info_linux, tcpi_##f_) +			\
 	  sizeof(((struct tcp_info_linux *)NULL)->tcpi_##f_)) <= tcp_info_size)
 /* Kernel reports sending window in TCP_INFO (kernel commit 8f7baad7f035) */
 #define snd_wnd_cap	tcp_info_cap(snd_wnd)
 /* Kernel reports bytes acked in TCP_INFO (kernel commit 0df48c26d84) */
 #define bytes_acked_cap	tcp_info_cap(bytes_acked)
 /* Kernel reports minimum RTT in TCP_INFO (kernel commit cd9b266095f4) */
 #define min_rtt_cap	tcp_info_cap(min_rtt)
 /* sendmsg() to socket */
 static struct iovec	tcp_iov			[UIO_MAXIOV];
@ -424,27 +428,23 @@ int tcp_set_peek_offset(int s, int offset)
 */
 static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
 {
 	uint32_t rdhup;
 	if (!events)
 		return 0;
 	rdhup = (events & SOCK_FIN_RCVD) ? 0 : EPOLLRDHUP;
 	if (events & ESTABLISHED) {
 		if (events & TAP_FIN_SENT)
 			return EPOLLET;
 		if (conn_flags & STALLED)
-			return EPOLLIN | EPOLLOUT | rdhup | EPOLLET;
+			return EPOLLIN | EPOLLOUT | EPOLLRDHUP | EPOLLET;
-		return EPOLLIN | rdhup;
+		return EPOLLIN | EPOLLRDHUP;
 	}
 	if (events == TAP_SYN_RCVD)
-		return EPOLLOUT | EPOLLET | rdhup;
+		return EPOLLOUT | EPOLLET | EPOLLRDHUP;
-	return rdhup;
+	return EPOLLET | EPOLLRDHUP;
 }
 /**
@ -549,7 +549,8 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
 		 (unsigned long long)it.it_value.tv_sec,
 		 (unsigned long long)it.it_value.tv_nsec / 1000 / 1000);
-	timerfd_settime(conn->timer, 0, &it, NULL);
+	if (timerfd_settime(conn->timer, 0, &it, NULL))
 		flow_err(conn, "failed to set timer: %s", strerror(errno));
 }
 /**
@ -679,13 +680,12 @@ static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
 * @tinfo:	Pointer to struct tcp_info for socket
 */
 static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
-			      const struct tcp_info *tinfo)
+			      const struct tcp_info_linux *tinfo)
 {
 #ifdef HAS_MIN_RTT
 	const struct flowside *tapside = TAPFLOW(conn);
 	int i, hole = -1;
-	if (!tinfo->tcpi_min_rtt ||
+	if (!min_rtt_cap ||
 	    (int)tinfo->tcpi_min_rtt > LOW_RTT_THRESHOLD)
 		return;
@ -706,10 +706,6 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
 	if (hole == LOW_RTT_TABLE_SIZE)
 		hole = 0;
 	inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any);
 #else
 	(void)conn;
 	(void)tinfo;
 #endif /* HAS_MIN_RTT */
 }
 /**
@ -756,34 +752,106 @@ static void tcp_sock_set_bufsize(const struct ctx *c, int s)
 }
 /**
- * tcp_update_check_tcp4() - Update TCP checksum from stored one
+ * tcp_update_check_tcp4() - Calculate TCP checksum for IPv4
 * @iph:	IPv4 header
- * @th:		TCP header followed by TCP payload
+ * @iov:	Pointer to the array of IO vectors
 * @iov_cnt:	Length of the array
 * @l4offset:	IPv4 payload offset in the iovec array
 */
-static void tcp_update_check_tcp4(const struct iphdr *iph, struct tcphdr *th)
+static void tcp_update_check_tcp4(const struct iphdr *iph,
 				  const struct iovec *iov, int iov_cnt,
 				  size_t l4offset)
 {
 	uint16_t l4len = ntohs(iph->tot_len) - sizeof(struct iphdr);
 	struct in_addr saddr = { .s_addr = iph->saddr };
 	struct in_addr daddr = { .s_addr = iph->daddr };
-	uint32_t sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr);
+	size_t check_ofs;
 	uint16_t *check;
 	int check_idx;
 	uint32_t sum;
 	char *ptr;
-	th->check = 0;
+	sum = proto_ipv4_header_psum(l4len, IPPROTO_TCP, saddr, daddr);
-	th->check = csum(th, l4len, sum);
+
 	check_idx = iov_skip_bytes(iov, iov_cnt,
 				   l4offset + offsetof(struct tcphdr, check),
 				   &check_ofs);
 	if (check_idx >= iov_cnt) {
 		err("TCP4 buffer is too small, iov size %zd, check offset %zd",
 		    iov_size(iov, iov_cnt),
 		    l4offset + offsetof(struct tcphdr, check));
 		return;
 	}
 	if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) {
 		err("TCP4 checksum field memory is not contiguous "
 		    "check_ofs %zd check_idx %d iov_len %zd",
 		    check_ofs, check_idx, iov[check_idx].iov_len);
 		return;
 	}
 	ptr = (char *)iov[check_idx].iov_base + check_ofs;
 	if ((uintptr_t)ptr & (__alignof__(*check) - 1)) {
 		err("TCP4 checksum field is not correctly aligned in memory");
 		return;
 	}
 	check = (uint16_t *)ptr;
 	*check = 0;
 	*check = csum_iov(iov, iov_cnt, l4offset, sum);
 }
 /**
 * tcp_update_check_tcp6() - Calculate TCP checksum for IPv6
 * @ip6h:	IPv6 header
- * @th:		TCP header followed by TCP payload
+ * @iov:	Pointer to the array of IO vectors
 * @iov_cnt:	Length of the array
 * @l4offset:	IPv6 payload offset in the iovec array
 */
-static void tcp_update_check_tcp6(struct ipv6hdr *ip6h, struct tcphdr *th)
+static void tcp_update_check_tcp6(const struct ipv6hdr *ip6h,
 				  const struct iovec *iov, int iov_cnt,
 				  size_t l4offset)
 {
 	uint16_t l4len = ntohs(ip6h->payload_len);
-	uint32_t sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
+	size_t check_ofs;
-					      &ip6h->saddr, &ip6h->daddr);
+	uint16_t *check;
 	int check_idx;
 	uint32_t sum;
 	char *ptr;
-	th->check = 0;
+	sum = proto_ipv6_header_psum(l4len, IPPROTO_TCP, &ip6h->saddr,
-	th->check = csum(th, l4len, sum);
+				     &ip6h->daddr);
 	check_idx = iov_skip_bytes(iov, iov_cnt,
 				   l4offset + offsetof(struct tcphdr, check),
 				   &check_ofs);
 	if (check_idx >= iov_cnt) {
 		err("TCP6 buffer is too small, iov size %zd, check offset %zd",
 		    iov_size(iov, iov_cnt),
 		    l4offset + offsetof(struct tcphdr, check));
 		return;
 	}
 	if (check_ofs + sizeof(*check) > iov[check_idx].iov_len) {
 		err("TCP6 checksum field memory is not contiguous "
 		    "check_ofs %zd check_idx %d iov_len %zd",
 		    check_ofs, check_idx, iov[check_idx].iov_len);
 		return;
 	}
 	ptr = (char *)iov[check_idx].iov_base + check_ofs;
 	if ((uintptr_t)ptr & (__alignof__(*check) - 1)) {
 		err("TCP6 checksum field is not correctly aligned in memory");
 		return;
 	}
 	check = (uint16_t *)ptr;
 	*check = 0;
 	*check = csum_iov(iov, iov_cnt, l4offset, sum);
 }
 /**
@ -869,7 +937,6 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn)
 /* cppcheck-suppress [constParameterPointer, unmatchedSuppression] */
 void tcp_defer_handler(struct ctx *c)
 {
 	tcp_flags_flush(c);
 	tcp_payload_flush(c);
 }
@ -903,23 +970,24 @@ static void tcp_fill_header(struct tcphdr *th,
 * @conn:		Connection pointer
 * @taph:		tap backend specific header
 * @iph:		Pointer to IPv4 header
- * @th:		Pointer to TCP header
+ * @bp:			Pointer to TCP header followed by TCP payload
 * @dlen:		TCP payload length
 * @check:		Checksum, if already known
 * @seq:		Sequence number for this segment
 * @no_tcp_csum:	Do not set TCP checksum
 *
 * Return: The IPv4 payload length, host order
 */
 static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn,
 				struct tap_hdr *taph,
-				struct iphdr *iph, struct tcphdr *th,
+				struct iphdr *iph, struct tcp_payload_t *bp,
 				size_t dlen, const uint16_t *check,
-				uint32_t seq)
+				uint32_t seq, bool no_tcp_csum)
 {
 	const struct flowside *tapside = TAPFLOW(conn);
 	const struct in_addr *src4 = inany_v4(&tapside->oaddr);
 	const struct in_addr *dst4 = inany_v4(&tapside->eaddr);
-	size_t l4len = dlen + sizeof(*th);
+	size_t l4len = dlen + sizeof(bp->th);
 	size_t l3len = l4len + sizeof(*iph);
 	ASSERT(src4 && dst4);
@ -931,9 +999,18 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn,
 	iph->check = check ? *check :
 			     csum_ip4_header(l3len, IPPROTO_TCP, *src4, *dst4);
-	tcp_fill_header(th, conn, seq);
+	tcp_fill_header(&bp->th, conn, seq);
-	tcp_update_check_tcp4(iph, th);
+	if (no_tcp_csum) {
 		bp->th.check = 0;
 	} else {
 		const struct iovec iov = {
 			.iov_base = bp,
 			.iov_len = ntohs(iph->tot_len) - sizeof(struct iphdr),
 		};
 		tcp_update_check_tcp4(iph, &iov, 1, 0);
 	}
 	tap_hdr_update(taph, l3len + sizeof(struct ethhdr));
@ -945,20 +1022,21 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn,
 * @conn:		Connection pointer
 * @taph:		tap backend specific header
 * @ip6h:		Pointer to IPv6 header
- * @th:		Pointer to TCP header
+ * @bp:			Pointer to TCP header followed by TCP payload
 * @dlen:		TCP payload length
 * @check:		Checksum, if already known
 * @seq:		Sequence number for this segment
 * @no_tcp_csum:	Do not set TCP checksum
 *
 * Return: The IPv6 payload length, host order
 */
 static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn,
 				struct tap_hdr *taph,
-				struct ipv6hdr *ip6h, struct tcphdr *th,
+				struct ipv6hdr *ip6h, struct tcp_payload_t *bp,
-				size_t dlen, uint32_t seq)
+				size_t dlen, uint32_t seq, bool no_tcp_csum)
 {
 	const struct flowside *tapside = TAPFLOW(conn);
-	size_t l4len = dlen + sizeof(*th);
+	size_t l4len = dlen + sizeof(bp->th);
 	ip6h->payload_len = htons(l4len);
 	ip6h->saddr = tapside->oaddr.a6;
@ -972,9 +1050,18 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn,
 	ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff;
 	ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff;
-	tcp_fill_header(th, conn, seq);
+	tcp_fill_header(&bp->th, conn, seq);
-	tcp_update_check_tcp6(ip6h, th);
+	if (no_tcp_csum) {
 		bp->th.check = 0;
 	} else {
 		const struct iovec iov = {
 			.iov_base = bp,
 			.iov_len = ntohs(ip6h->payload_len)
 		};
 		tcp_update_check_tcp6(ip6h, &iov, 1, 0);
 	}
 	tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr));
@ -988,12 +1075,14 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn,
 * @dlen:	TCP payload length
 * @check:	Checksum, if already known
 * @seq:	Sequence number for this segment
 * @no_tcp_csum: Do not set TCP checksum
 *
 * Return: IP payload length, host order
 */
 size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 			       struct iovec *iov, size_t dlen,
-			       const uint16_t *check, uint32_t seq)
+			       const uint16_t *check, uint32_t seq,
 			       bool no_tcp_csum)
 {
 	const struct flowside *tapside = TAPFLOW(conn);
 	const struct in_addr *a4 = inany_v4(&tapside->oaddr);
@ -1002,13 +1091,13 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 		return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base,
 					 iov[TCP_IOV_IP].iov_base,
 					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
-					 check, seq);
+					 check, seq, no_tcp_csum);
 	}
 	return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
 				 iov[TCP_IOV_IP].iov_base,
 				 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
-				 seq);
+				 seq, no_tcp_csum);
 }
 /**
@ -1021,25 +1110,24 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 * Return: 1 if sequence or window were updated, 0 otherwise
 */
 int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
-			  int force_seq, struct tcp_info *tinfo)
+			  bool force_seq, struct tcp_info_linux *tinfo)
 {
 	uint32_t prev_wnd_to_tap = conn->wnd_to_tap << conn->ws_to_tap;
 	uint32_t prev_ack_to_tap = conn->seq_ack_to_tap;
 	/* cppcheck-suppress [ctunullpointer, unmatchedSuppression] */
 	socklen_t sl = sizeof(*tinfo);
-	struct tcp_info tinfo_new;
+	struct tcp_info_linux tinfo_new;
 	uint32_t new_wnd_to_tap = prev_wnd_to_tap;
 	int s = conn->sock;
-#ifndef HAS_BYTES_ACKED
+	if (!bytes_acked_cap) {
 	(void)force_seq;
 		conn->seq_ack_to_tap = conn->seq_from_tap;
 		if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
 			conn->seq_ack_to_tap = prev_ack_to_tap;
-#else
+	} else {
-	if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL || tcp_rtt_dst_low(conn)
+		if ((unsigned)SNDBUF_GET(conn) < SNDBUF_SMALL ||
-	    || CONN_IS_CLOSING(conn) || (conn->flags & LOCAL) || force_seq) {
+		    tcp_rtt_dst_low(conn) || CONN_IS_CLOSING(conn) ||
 		    (conn->flags & LOCAL) || force_seq) {
 			conn->seq_ack_to_tap = conn->seq_from_tap;
 		} else if (conn->seq_ack_to_tap != conn->seq_from_tap) {
 			if (!tinfo) {
@ -1054,9 +1142,9 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
 			if (SEQ_LT(conn->seq_ack_to_tap, prev_ack_to_tap))
 				conn->seq_ack_to_tap = prev_ack_to_tap;
 		}
-#endif /* !HAS_BYTES_ACKED */
+	}
-	if (!KERNEL_REPORTS_SND_WND(c)) {
+	if (!snd_wnd_cap) {
 		tcp_get_sndbuf(conn);
 		new_wnd_to_tap = MIN(SNDBUF_GET(conn), MAX_WINDOW);
 		conn->wnd_to_tap = MIN(new_wnd_to_tap >> conn->ws_to_tap,
@ -1074,7 +1162,6 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
 		}
 	}
 #ifdef HAS_SND_WND
 	if ((conn->flags & LOCAL) || tcp_rtt_dst_low(conn)) {
 		new_wnd_to_tap = tinfo->tcpi_snd_wnd;
 	} else {
@ -1082,7 +1169,6 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
 		new_wnd_to_tap = MIN((int)tinfo->tcpi_snd_wnd,
 				     SNDBUF_GET(conn));
 	}
 #endif
 	new_wnd_to_tap = MIN(new_wnd_to_tap, MAX_WINDOW);
 	if (!(conn->events & ESTABLISHED))
@ -1140,11 +1226,11 @@ static void tcp_update_seqack_from_tap(const struct ctx *c,
 *	     0 if there is no flag to send
 *	     1 otherwise
 */
-int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn,
+int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
-		      int flags, struct tcphdr *th, char *data,
+		      int flags, struct tcphdr *th, struct tcp_syn_opts *opts,
 		      size_t *optlen)
 {
-	struct tcp_info tinfo = { 0 };
+	struct tcp_info_linux tinfo = { 0 };
 	socklen_t sl = sizeof(tinfo);
 	int s = conn->sock;
@ -1157,27 +1243,16 @@ int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn,
 		return -ECONNRESET;
 	}
 #ifdef HAS_SND_WND
 	if (!c->tcp.kernel_snd_wnd && tinfo.tcpi_snd_wnd)
 		c->tcp.kernel_snd_wnd = 1;
 #endif
 	if (!(conn->flags & LOCAL))
 		tcp_rtt_dst_check(conn, &tinfo);
-	if (!tcp_update_seqack_wnd(c, conn, flags, &tinfo) && !flags)
+	if (!tcp_update_seqack_wnd(c, conn, !!flags, &tinfo) && !flags)
 		return 0;
 	*optlen = 0;
 	if (flags & SYN) {
 		int mss;
 		/* Options: MSS, NOP and window scale (8 bytes) */
 		*optlen = OPT_MSS_LEN + 1 + OPT_WS_LEN;
 		*data++ = OPT_MSS;
 		*data++ = OPT_MSS_LEN;
 		if (c->mtu == -1) {
 			mss = tinfo.tcpi_snd_mss;
 		} else {
@ -1193,16 +1268,11 @@ int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn,
 			else if (mss > PAGE_SIZE)
 				mss = ROUND_DOWN(mss, PAGE_SIZE);
 		}
 		*(uint16_t *)data = htons(MIN(USHRT_MAX, mss));
 		data += OPT_MSS_LEN - 2;
 		conn->ws_to_tap = MIN(MAX_WS, tinfo.tcpi_snd_wscale);
-		*data++ = OPT_NOP;
+		*opts = TCP_SYN_OPTS(mss, conn->ws_to_tap);
-		*data++ = OPT_WS;
+		*optlen = sizeof(*opts);
 		*data++ = OPT_WS_LEN;
 		*data++ = conn->ws_to_tap;
 	} else if (!(flags & RST)) {
 		flags |= ACK;
 	}
@ -1239,7 +1309,8 @@ int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn,
 *
 * Return: negative error code on connection reset, 0 otherwise
 */
-int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
+static int tcp_send_flag(const struct ctx *c, struct tcp_tap_conn *conn,
 			 int flags)
 {
 	return tcp_buf_send_flag(c, conn, flags);
 }
@ -1249,7 +1320,7 @@ int tcp_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 * @c:		Execution context
 * @conn:	Connection pointer
 */
-void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn)
+void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	if (conn->events == CLOSED)
 		return;
@ -1339,7 +1410,7 @@ static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
 {
 	int s;
-	s = socket(af, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
+	s = socket(af, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, IPPROTO_TCP);
 	if (s > FD_REF_MAX) {
 		close(s);
@ -1467,7 +1538,7 @@ static void tcp_bind_outbound(const struct ctx *c,
 * @optlen:	Bytes in options: caller MUST ensure available length
 * @now:	Current timestamp
 */
-static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
+static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
 			      const void *saddr, const void *daddr,
 			      const struct tcphdr *th, const char *opts,
 			      size_t optlen, const struct timespec *now)
@ -1632,7 +1703,7 @@ static int tcp_sock_consume(const struct tcp_tap_conn *conn, uint32_t ack_seq)
 *
 * #syscalls recvmsg
 */
-static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
+static int tcp_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	return tcp_buf_data_from_sock(c, conn);
 }
@ -1648,7 +1719,7 @@ static int tcp_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 *
 * Return: count of consumed packets
 */
-static int tcp_data_from_tap(struct ctx *c, struct tcp_tap_conn *conn,
+static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			     const struct pool *p, int idx)
 {
 	int i, iov_i, ack = 0, fin = 0, retr = 0, keep = -1, partial_send = 0;
@ -1846,7 +1917,8 @@ out:
 * @opts:	Pointer to start of options
 * @optlen:	Bytes in options: caller MUST ensure available length
 */
-static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
+static void tcp_conn_from_sock_finish(const struct ctx *c,
 				      struct tcp_tap_conn *conn,
 				      const struct tcphdr *th,
 				      const char *opts, size_t optlen)
 {
@ -1869,11 +1941,12 @@ static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
 		return;
 	}
 	tcp_send_flag(c, conn, ACK);
 	/* The client might have sent data already, which we didn't
 	 * dequeue waiting for SYN,ACK from tap -- check now.
 	 */
 	tcp_data_from_sock(c, conn);
 	tcp_send_flag(c, conn, ACK);
 }
 /**
@ -1889,7 +1962,7 @@ static void tcp_conn_from_sock_finish(struct ctx *c, struct tcp_tap_conn *conn,
 *
 * Return: count of consumed packets
 */
-int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
+int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		    const void *saddr, const void *daddr,
 		    const struct pool *p, int idx, const struct timespec *now)
 {
@ -2027,7 +2100,7 @@ reset:
 * @c:		Execution context
 * @conn:	Connection pointer
 */
-static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn)
+static void tcp_connect_finish(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	socklen_t sl;
 	int so;
@ -2053,8 +2126,8 @@ static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn)
 * @sa:		Peer socket address (from accept())
 * @now:	Current timestamp
 */
-static void tcp_tap_conn_from_sock(struct ctx *c, union flow *flow, int s,
+static void tcp_tap_conn_from_sock(const struct ctx *c, union flow *flow,
-				   const struct timespec *now)
+				   int s, const struct timespec *now)
 {
 	struct tcp_tap_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
 	uint64_t hash;
@ -2085,7 +2158,7 @@ static void tcp_tap_conn_from_sock(struct ctx *c, union flow *flow, int s,
 * @ref:	epoll reference of listening socket
 * @now:	Current timestamp
 */
-void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
+void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
 			const struct timespec *now)
 {
 	const struct flowside *ini;
@ -2150,7 +2223,7 @@ cancel:
 *
 * #syscalls timerfd_gettime arm:timerfd_gettime64 i686:timerfd_gettime64
 */
-void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
+void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
 {
 	struct itimerspec check_armed = { { 0 }, { 0 } };
 	struct tcp_tap_conn *conn = &FLOW(ref.flow)->tcp;
@ -2162,7 +2235,9 @@ void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
 	 * timer is currently armed, this event came from a previous setting,
 	 * and we just set the timer to a new point in the future: discard it.
 	 */
-	timerfd_gettime(conn->timer, &check_armed);
+	if (timerfd_gettime(conn->timer, &check_armed))
 		flow_err(conn, "failed to read timer: %s", strerror(errno));
 	if (check_armed.it_value.tv_sec || check_armed.it_value.tv_nsec)
 		return;
@ -2200,7 +2275,10 @@ void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
 		 * case. This avoids having to preemptively reset the timer on
 		 * ~ACK_TO_TAP_DUE or ~ACK_FROM_TAP_DUE.
 		 */
-		timerfd_settime(conn->timer, 0, &new, &old);
+		if (timerfd_settime(conn->timer, 0, &new, &old))
 			flow_err(conn, "failed to set timer: %s",
 				 strerror(errno));
 		if (old.it_value.tv_sec == ACT_TIMEOUT) {
 			flow_dbg(conn, "activity timeout");
 			tcp_rst(c, conn);
@ -2214,19 +2292,14 @@ void tcp_timer_handler(struct ctx *c, union epoll_ref ref)
 * @ref:	epoll reference
 * @events:	epoll events bitmap
 */
-void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
+void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
 		      uint32_t events)
 {
 	struct tcp_tap_conn *conn = conn_at_sidx(ref.flowside);
 	ASSERT(!c->no_tcp);
 	ASSERT(pif_at_sidx(ref.flowside) != PIF_TAP);
 	if (events & EPOLLRDHUP) {
 		flow_err(conn, "EPOLLRDHUP: events=0x%x  conn->events=0x%x "
 			 "conn->flags=0x%x\n", events, conn->events,
 			 conn->flags);
 	}
 	if (conn->events == CLOSED)
 		return;
@ -2251,7 +2324,7 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
 			tcp_data_from_sock(c, conn);
 		if (events & EPOLLOUT)
-			tcp_update_seqack_wnd(c, conn, 0, NULL);
+			tcp_update_seqack_wnd(c, conn, false, NULL);
 		return;
 	}
@ -2274,17 +2347,16 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events)
 }
 /**
- * tcp_sock_init_af() - Initialise listening socket for a given af and port
+ * tcp_sock_init_one() - Initialise listening socket for address and port
 * @c:		Execution context
- * @af:		Address family to listen on
+ * @addr:	Pointer to address for binding, NULL for dual stack any
 * @port:	Port, host order
 * @addr:	Pointer to address for binding, NULL if not configured
 * @ifname:	Name of interface to bind to, NULL if not configured
 * @port:	Port, host order
 *
 * Return: fd for the new listening socket, negative error code on failure
 */
-static int tcp_sock_init_af(const struct ctx *c, sa_family_t af, in_port_t port,
+static int tcp_sock_init_one(const struct ctx *c, const union inany_addr *addr,
-			    const void *addr, const char *ifname)
+			     const char *ifname, in_port_t port)
 {
 	union tcp_listen_epoll_ref tref = {
 		.port = port,
@ -2292,12 +2364,13 @@ static int tcp_sock_init_af(const struct ctx *c, sa_family_t af, in_port_t port,
 	};
 	int s;
-	s = sock_l4(c, af, EPOLL_TYPE_TCP_LISTEN, addr, ifname, port, tref.u32);
+	s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_HOST, addr,
 				ifname, port, tref.u32);
 	if (c->tcp.fwd_in.mode == FWD_AUTO) {
-		if (af == AF_INET  || af == AF_UNSPEC)
+		if (!addr || inany_v4(addr))
 			tcp_sock_init_ext[port][V4] = s < 0 ? -1 : s;
-		if (af == AF_INET6 || af == AF_UNSPEC)
+		if (!addr || !inany_v4(addr))
 			tcp_sock_init_ext[port][V6] = s < 0 ? -1 : s;
 	}
@ -2311,31 +2384,32 @@ static int tcp_sock_init_af(const struct ctx *c, sa_family_t af, in_port_t port,
 /**
 * tcp_sock_init() - Create listening sockets for a given host ("inbound") port
 * @c:		Execution context
 * @af:		Address family to select a specific IP version, or AF_UNSPEC
 * @addr:	Pointer to address for binding, NULL if not configured
 * @ifname:	Name of interface to bind to, NULL if not configured
 * @port:	Port, host order
 *
 * Return: 0 on (partial) success, negative error code on (complete) failure
 */
-int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
+int tcp_sock_init(const struct ctx *c, const union inany_addr *addr,
 		  const char *ifname, in_port_t port)
 {
 	int r4 = FD_REF_MAX + 1, r6 = FD_REF_MAX + 1;
 	ASSERT(!c->no_tcp);
-	if (af == AF_UNSPEC && c->ifi4 && c->ifi6)
+	if (!addr && c->ifi4 && c->ifi6)
 		/* Attempt to get a dual stack socket */
-		if (tcp_sock_init_af(c, AF_UNSPEC, port, addr, ifname) >= 0)
+		if (tcp_sock_init_one(c, NULL, ifname, port) >= 0)
 			return 0;
 	/* Otherwise create a socket per IP version */
-	if ((af == AF_INET  || af == AF_UNSPEC) && c->ifi4)
+	if ((!addr || inany_v4(addr)) && c->ifi4)
-		r4 = tcp_sock_init_af(c, AF_INET, port, addr, ifname);
+		r4 = tcp_sock_init_one(c, addr ? addr : &inany_any4,
 				       ifname, port);
-	if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6)
+	if ((!addr || !inany_v4(addr)) && c->ifi6)
-		r6 = tcp_sock_init_af(c, AF_INET6, port, addr, ifname);
+		r6 = tcp_sock_init_one(c, addr ? addr : &inany_any6,
 				       ifname, port);
 	if (IN_INTERVAL(0, FD_REF_MAX, r4) || IN_INTERVAL(0, FD_REF_MAX, r6))
 		return 0;
@ -2358,7 +2432,7 @@ static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port)
 	ASSERT(c->mode == MODE_PASTA);
-	s = sock_l4(c, AF_INET, EPOLL_TYPE_TCP_LISTEN, &in4addr_loopback,
+	s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_SPLICE, &inany_loopback4,
 			NULL, port, tref.u32);
 	if (s >= 0)
 		tcp_sock_set_bufsize(c, s);
@ -2384,7 +2458,7 @@ static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port)
 	ASSERT(c->mode == MODE_PASTA);
-	s = sock_l4(c, AF_INET6, EPOLL_TYPE_TCP_LISTEN, &in6addr_loopback,
+	s = pif_sock_l4(c, EPOLL_TYPE_TCP_LISTEN, PIF_SPLICE, &inany_loopback6,
 			NULL, port, tref.u32);
 	if (s >= 0)
 		tcp_sock_set_bufsize(c, s);
@ -2487,7 +2561,7 @@ static void tcp_sock_refill_init(const struct ctx *c)
 *
 * Return: true if supported, false otherwise
 */
-bool tcp_probe_peek_offset_cap(sa_family_t af)
+static bool tcp_probe_peek_offset_cap(sa_family_t af)
 {
 	bool ret = false;
 	int s, optv = 0;
@ -2504,6 +2578,34 @@ bool tcp_probe_peek_offset_cap(sa_family_t af)
 	return ret;
 }
 /**
 * tcp_probe_tcp_info() - Check what data TCP_INFO reports
 *
 * Return: Number of bytes returned by TCP_INFO getsockopt()
 */
 static socklen_t tcp_probe_tcp_info(void)
 {
 	struct tcp_info_linux tinfo;
 	socklen_t sl = sizeof(tinfo);
 	int s;
 	s = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
 	if (s < 0) {
 		warn_perror("Temporary TCP socket creation failed");
 		return false;
 	}
 	if (getsockopt(s, SOL_TCP, TCP_INFO, &tinfo, &sl)) {
 		warn_perror("Failed to get TCP_INFO on temporary socket");
 		close(s);
 		return false;
 	}
 	close(s);
 	return sl;
 }
 /**
 * tcp_init() - Get initial sequence, hash secret, initialise per-socket data
 * @c:		Execution context
@ -2514,11 +2616,7 @@ int tcp_init(struct ctx *c)
 {
 	ASSERT(!c->no_tcp);
-	if (c->ifi4)
+	tcp_sock_iov_init(c);
 		tcp_sock4_iov_init(c);
 	if (c->ifi6)
 		tcp_sock6_iov_init(c);
 	memset(init_sock_pool4,		0xff,	sizeof(init_sock_pool4));
 	memset(init_sock_pool6,		0xff,	sizeof(init_sock_pool6));
@ -2537,6 +2635,15 @@ int tcp_init(struct ctx *c)
 			  (!c->ifi6 || tcp_probe_peek_offset_cap(AF_INET6));
 	debug("SO_PEEK_OFF%ssupported", peek_offset_cap ? " " : " not ");
 	tcp_info_size = tcp_probe_tcp_info();
 #define dbg_tcpi(f_)	debug("TCP_INFO tcpi_%s field%s supported",	\
 			      STRINGIFY(f_), tcp_info_cap(f_) ? " " : " not ")
 	dbg_tcpi(snd_wnd);
 	dbg_tcpi(bytes_acked);
 	dbg_tcpi(min_rtt);
 #undef dbg_tcpi
 	return 0;
 }
@ -2578,7 +2685,7 @@ static void tcp_port_rebind(struct ctx *c, bool outbound)
 			if (outbound)
 				tcp_ns_sock_init(c, port);
 			else
-				tcp_sock_init(c, AF_UNSPEC, NULL, NULL, port);
+				tcp_sock_init(c, NULL, NULL, port);
 		}
 	}
 }
--- a/tcp.h
+++ b/tcp.h
@ -10,14 +10,15 @@
 struct ctx;
-void tcp_timer_handler(struct ctx *c, union epoll_ref ref);
+void tcp_timer_handler(const struct ctx *c, union epoll_ref ref);
-void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
+void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
 			const struct timespec *now);
-void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events);
+void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
-int tcp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
+		      uint32_t events);
 int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 		    const void *saddr, const void *daddr,
 		    const struct pool *p, int idx, const struct timespec *now);
-int tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
+int tcp_sock_init(const struct ctx *c, const union inany_addr *addr,
 		  const char *ifname, in_port_t port);
 int tcp_init(struct ctx *c);
 void tcp_timer(struct ctx *c, const struct timespec *now);
@ -58,16 +59,12 @@ union tcp_listen_epoll_ref {
 * @fwd_in:		Port forwarding configuration for inbound packets
 * @fwd_out:		Port forwarding configuration for outbound packets
 * @timer_run:		Timestamp of most recent timer run
 * @kernel_snd_wnd:	Kernel reports sending window (with commit 8f7baad7f035)
 * @pipe_size:		Size of pipes for spliced connections
 */
 struct tcp_ctx {
 	struct fwd_ports fwd_in;
 	struct fwd_ports fwd_out;
 	struct timespec timer_run;
 #ifdef HAS_SND_WND
 	int kernel_snd_wnd;
 #endif
 	size_t pipe_size;
 };
--- a/tcp_buf.c
+++ b/tcp_buf.c
@ -20,7 +20,7 @@
 #include <netinet/ip.h>
-#include <linux/tcp.h>
+#include <netinet/tcp.h>
 #include "util.h"
 #include "ip.h"
@ -38,88 +38,32 @@
 	(c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
 /* Static buffers */
 /**
 * struct tcp_payload_t - TCP header and data to send segments with payload
 * @th:		TCP header
 * @data:	TCP data
 */
 struct tcp_payload_t {
 	struct tcphdr th;
 	uint8_t data[IP_MAX_MTU - sizeof(struct tcphdr)];
 #ifdef __AVX2__
 } __attribute__ ((packed, aligned(32)));    /* For AVX2 checksum routines */
 #else
 } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
 #endif
-/**
+/* Ethernet header for IPv4 and IPv6 frames */
 * struct tcp_flags_t - TCP header and data to send zero-length
 *                      segments (flags)
 * @th:		TCP header
 * @opts	TCP options
 */
 struct tcp_flags_t {
 	struct tcphdr th;
 	char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
 #ifdef __AVX2__
 } __attribute__ ((packed, aligned(32)));
 #else
 } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
 #endif
 /* Ethernet header for IPv4 frames */
 static struct ethhdr		tcp4_eth_src;
 static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers */
 static struct iphdr		tcp4_payload_ip[TCP_FRAMES_MEM];
 /* TCP segments with payload for IPv4 frames */
 static struct tcp_payload_t	tcp4_payload[TCP_FRAMES_MEM];
 static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516");
 /* References tracking the owner connection of frames in the tap outqueue */
 static struct tcp_tap_conn *tcp4_frame_conns[TCP_FRAMES_MEM];
 static unsigned int tcp4_payload_used;
 static struct tap_hdr		tcp4_flags_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers for TCP segment without payload */
 static struct iphdr		tcp4_flags_ip[TCP_FRAMES_MEM];
 /* TCP segments without payload for IPv4 frames */
 static struct tcp_flags_t	tcp4_flags[TCP_FRAMES_MEM];
 static unsigned int tcp4_flags_used;
 /* Ethernet header for IPv6 frames */
 static struct ethhdr		tcp6_eth_src;
-static struct tap_hdr		tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
+static struct tap_hdr		tcp_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv6 headers */
 static struct ipv6hdr		tcp6_payload_ip[TCP_FRAMES_MEM];
 /* TCP headers and data for IPv6 frames */
 static struct tcp_payload_t	tcp6_payload[TCP_FRAMES_MEM];
-static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516");
+/* IP headers for IPv4 and IPv6 */
 struct iphdr		tcp4_payload_ip[TCP_FRAMES_MEM];
 struct ipv6hdr		tcp6_payload_ip[TCP_FRAMES_MEM];
 /* TCP segments with payload for IPv4 and IPv6 frames */
 static struct tcp_payload_t	tcp_payload[TCP_FRAMES_MEM];
 static_assert(MSS4 <= sizeof(tcp_payload[0].data), "MSS4 is greater than 65516");
 static_assert(MSS6 <= sizeof(tcp_payload[0].data), "MSS6 is greater than 65516");
 /* References tracking the owner connection of frames in the tap outqueue */
-static struct tcp_tap_conn *tcp6_frame_conns[TCP_FRAMES_MEM];
+static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
-static unsigned int tcp6_payload_used;
+static unsigned int tcp_payload_used;
 static struct tap_hdr		tcp6_flags_tap_hdr[TCP_FRAMES_MEM];
 /* IPv6 headers for TCP segment without payload */
 static struct ipv6hdr		tcp6_flags_ip[TCP_FRAMES_MEM];
 /* TCP segment without payload for IPv6 frames */
 static struct tcp_flags_t	tcp6_flags[TCP_FRAMES_MEM];
 static unsigned int tcp6_flags_used;
 /* recvmsg()/sendmsg() data for tap */
 static struct iovec	iov_sock		[TCP_FRAMES_MEM + 1];
-static struct iovec	tcp4_l2_iov		[TCP_FRAMES_MEM][TCP_NUM_IOVS];
+static struct iovec	tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
-static struct iovec	tcp6_l2_iov		[TCP_FRAMES_MEM][TCP_NUM_IOVS];
+
 static struct iovec	tcp4_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
 static struct iovec	tcp6_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
 /**
 * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
 * @eth_d:	Ethernet destination address, NULL if unchanged
@ -132,105 +76,30 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
 }
 /**
- * tcp_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
+ * tcp_sock_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
 * @c:		Execution context
 */
-void tcp_sock4_iov_init(const struct ctx *c)
+void tcp_sock_iov_init(const struct ctx *c)
 {
 	struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
 	struct iovec *iov;
 	int i;
 	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
 	for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
 		tcp4_payload_ip[i] = iph;
 		tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp4_payload[i].th.ack = 1;
 	}
 	for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) {
 		tcp4_flags_ip[i] = iph;
 		tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp4_flags[i].th.ack = 1;
 	}
 	for (i = 0; i < TCP_FRAMES_MEM; i++) {
 		iov = tcp4_l2_iov[i];
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]);
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
 	}
 	for (i = 0; i < TCP_FRAMES_MEM; i++) {
 		iov = tcp4_l2_flags_iov[i];
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]);
 		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i];
 	}
 }
 /**
 * tcp_sock6_iov_init() - Initialise scatter-gather L2 buffers for IPv6 sockets
 * @c:		Execution context
 */
 void tcp_sock6_iov_init(const struct ctx *c)
 {
 	struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP);
-	struct iovec *iov;
+	struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
 	int i;
 	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
 	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
-	for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
+	for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) {
 		tcp6_payload_ip[i] = ip6;
-		tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
+		tcp4_payload_ip[i] = iph;
 		tcp6_payload[i].th.ack = 1;
 	}
 	for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) {
 		tcp6_flags_ip[i] = ip6;
 		tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp6_flags[i].th .ack = 1;
 	}
 	for (i = 0; i < TCP_FRAMES_MEM; i++) {
-		iov = tcp6_l2_iov[i];
+		struct iovec *iov = tcp_l2_iov[i];
-		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]);
+		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_payload_tap_hdr[i]);
-		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
+		iov[TCP_IOV_ETH].iov_len = sizeof(struct ethhdr);
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]);
+		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
 	}
 	for (i = 0; i < TCP_FRAMES_MEM; i++) {
 		iov = tcp6_l2_flags_iov[i];
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]);
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i];
 	}
 }
 /**
 * tcp_flags_flush() - Send out buffers for segments with no data (flags)
 * @c:		Execution context
 */
 void tcp_flags_flush(const struct ctx *c)
 {
 	tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS,
 			tcp6_flags_used);
 	tcp6_flags_used = 0;
 	tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS,
 			tcp4_flags_used);
 	tcp4_flags_used = 0;
 }
 /**
@ -240,7 +109,7 @@ void tcp_flags_flush(const struct ctx *c)
 * @frames:	Two-dimensional array containing queued frames with sub-iovs
 * @num_frames:	Number of entries in the two arrays to be compared
 */
-static void tcp_revert_seq(struct ctx *c, struct tcp_tap_conn **conns,
+static void tcp_revert_seq(const struct ctx *c, struct tcp_tap_conn **conns,
 			   struct iovec (*frames)[TCP_NUM_IOVS], int num_frames)
 {
 	int i;
@ -262,28 +131,20 @@ static void tcp_revert_seq(struct ctx *c, struct tcp_tap_conn **conns,
 }
 /**
- * tcp_payload_flush() - Send out buffers for segments with data
+ * tcp_payload_flush() - Send out buffers for segments with data or flags
 * @c:		Execution context
 */
-void tcp_payload_flush(struct ctx *c)
+void tcp_payload_flush(const struct ctx *c)
 {
 	size_t m;
-	m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS,
+	m = tap_send_frames(c, &tcp_l2_iov[0][0], TCP_NUM_IOVS,
-			    tcp6_payload_used);
+			    tcp_payload_used);
-	if (m != tcp6_payload_used) {
+	if (m != tcp_payload_used) {
-		tcp_revert_seq(c, &tcp6_frame_conns[m], &tcp6_l2_iov[m],
+		tcp_revert_seq(c, &tcp_frame_conns[m], &tcp_l2_iov[m],
-			       tcp6_payload_used - m);
+			       tcp_payload_used - m);
 	}
-	tcp6_payload_used = 0;
+	tcp_payload_used = 0;
 	m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
 			    tcp4_payload_used);
 	if (m != tcp4_payload_used) {
 		tcp_revert_seq(c, &tcp4_frame_conns[m], &tcp4_l2_iov[m],
 			       tcp4_payload_used - m);
 	}
 	tcp4_payload_used = 0;
 }
 /**
@ -294,58 +155,48 @@ void tcp_payload_flush(struct ctx *c)
 *
 * Return: negative error code on connection reset, 0 otherwise
 */
-int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
+int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 {
-	struct tcp_flags_t *payload;
+	struct tcp_payload_t *payload;
 	struct iovec *iov;
 	size_t optlen;
 	size_t l4len;
 	uint32_t seq;
 	int ret;
-	if (CONN_V4(conn))
+	iov = tcp_l2_iov[tcp_payload_used];
-		iov = tcp4_l2_flags_iov[tcp4_flags_used++];
+	if (CONN_V4(conn)) {
-	else
+		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
-		iov = tcp6_l2_flags_iov[tcp6_flags_used++];
+		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
 	} else {
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
 		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
 	}
 	payload = iov[TCP_IOV_PAYLOAD].iov_base;
 	seq = conn->seq_to_tap;
 	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
-				payload->opts, &optlen);
+				(struct tcp_syn_opts *)&payload->data, &optlen);
-	if (ret <= 0) {
+	if (ret <= 0)
 		if (CONN_V4(conn))
 			tcp4_flags_used--;
 		else
 			tcp6_flags_used--;
 		return ret;
 	}
-	l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq);
+	tcp_payload_used++;
 	l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false);
 	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 	if (flags & DUP_ACK) {
-		struct iovec *dup_iov;
+		struct iovec *dup_iov = tcp_l2_iov[tcp_payload_used++];
 		int i;
-		if (CONN_V4(conn))
+		memcpy(dup_iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_TAP].iov_base,
-			dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++];
+		       iov[TCP_IOV_TAP].iov_len);
-		else
+		dup_iov[TCP_IOV_ETH].iov_base = iov[TCP_IOV_ETH].iov_base;
-			dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++];
+		dup_iov[TCP_IOV_IP] = iov[TCP_IOV_IP];
-
+		memcpy(dup_iov[TCP_IOV_PAYLOAD].iov_base,
-		for (i = 0; i < TCP_NUM_IOVS; i++)
+		       iov[TCP_IOV_PAYLOAD].iov_base, l4len);
-			memcpy(dup_iov[i].iov_base, iov[i].iov_base,
+		dup_iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 			       iov[i].iov_len);
 		dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len;
 	}
-	if (CONN_V4(conn)) {
+	if (tcp_payload_used > TCP_FRAMES_MEM - 2)
-		if (tcp4_flags_used > TCP_FRAMES_MEM - 2)
+		tcp_payload_flush(c);
 			tcp_flags_flush(c);
 	} else {
 		if (tcp6_flags_used > TCP_FRAMES_MEM - 2)
 			tcp_flags_flush(c);
 	}
 	return 0;
 }
@ -358,39 +209,39 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 * @no_csum:	Don't compute IPv4 checksum, use the one from previous buffer
 * @seq:	Sequence number to be sent
 */
-static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
+static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			    ssize_t dlen, int no_csum, uint32_t seq)
 {
 	struct tcp_payload_t *payload;
 	const uint16_t *check = NULL;
 	struct iovec *iov;
 	size_t l4len;
 	conn->seq_to_tap = seq + dlen;
-
+	tcp_frame_conns[tcp_payload_used] = conn;
 	iov = tcp_l2_iov[tcp_payload_used];
 	if (CONN_V4(conn)) {
 		struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
 		const uint16_t *check = NULL;
 		if (no_csum) {
 			struct iovec *iov_prev = tcp_l2_iov[tcp_payload_used - 1];
 			struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
 			check = &iph->check;
 		}
-
+		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
-		tcp4_frame_conns[tcp4_payload_used] = conn;
+		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
 		iov = tcp4_l2_iov[tcp4_payload_used++];
 		l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq);
 		iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 		if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
 			tcp_payload_flush(c);
 	} else if (CONN_V6(conn)) {
-		tcp6_frame_conns[tcp6_payload_used] = conn;
+		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[tcp_payload_used]);
-
+		iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src;
 		iov = tcp6_l2_iov[tcp6_payload_used++];
 		l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq);
 		iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 		if (tcp6_payload_used > TCP_FRAMES_MEM - 1)
 			tcp_payload_flush(c);
 	}
 	payload = iov[TCP_IOV_PAYLOAD].iov_base;
 	payload->th.th_off = sizeof(struct tcphdr) / 4;
 	payload->th.th_x2 = 0;
 	payload->th.th_flags = 0;
 	payload->th.ack = 1;
 	l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false);
 	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 	if (++tcp_payload_used > TCP_FRAMES_MEM - 1)
 		tcp_payload_flush(c);
 }
 /**
@ -402,12 +253,11 @@ static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
 *
 * #syscalls recvmsg
 */
-int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
+int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 {
 	uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
 	int fill_bufs, send_bufs = 0, last_len, iov_rem = 0;
-	int sendlen, len, dlen, v4 = CONN_V4(conn);
+	int len, dlen, i, s = conn->sock;
 	int s = conn->sock, i, ret = 0;
 	struct msghdr mh_sock = { 0 };
 	uint16_t mss = MSS_GET(conn);
 	uint32_t already_sent, seq;
@ -454,19 +304,15 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 		mh_sock.msg_iovlen = fill_bufs;
 	}
-	if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) ||
+	if (tcp_payload_used + fill_bufs > TCP_FRAMES_MEM) {
 	    (!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
 		tcp_payload_flush(c);
 		/* Silence Coverity CWE-125 false positive */
-		tcp4_payload_used = tcp6_payload_used = 0;
+		tcp_payload_used = 0;
 	}
 	for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) {
-		if (v4)
+		iov->iov_base = &tcp_payload[tcp_payload_used + i].data;
 			iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data;
 		else
 			iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data;
 		iov->iov_len = mss;
 	}
 	if (iov_rem)
@ -477,12 +323,19 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 		len = recvmsg(s, &mh_sock, MSG_PEEK);
 	while (len < 0 && errno == EINTR);
-	if (len < 0)
+	if (len < 0) {
-		goto err;
+		if (errno != EAGAIN && errno != EWOULDBLOCK) {
 			tcp_rst(c, conn);
 			return -errno;
 		}
 		return 0;
 	}
 	if (!len) {
 		if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) {
-			if ((ret = tcp_buf_send_flag(c, conn, FIN | ACK))) {
+			int ret = tcp_buf_send_flag(c, conn, FIN | ACK);
 			if (ret) {
 				tcp_rst(c, conn);
 				return ret;
 			}
@ -493,28 +346,27 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 		return 0;
 	}
 	sendlen = len;
 	if (!peek_offset_cap)
-		sendlen -= already_sent;
+		len -= already_sent;
-	if (sendlen <= 0) {
+	if (len <= 0) {
 		conn_flag(c, conn, STALLED);
 		return 0;
 	}
 	conn_flag(c, conn, ~STALLED);
-	send_bufs = DIV_ROUND_UP(sendlen, mss);
+	send_bufs = DIV_ROUND_UP(len, mss);
-	last_len = sendlen - (send_bufs - 1) * mss;
+	last_len = len - (send_bufs - 1) * mss;
 	/* Likely, some new data was acked too. */
-	tcp_update_seqack_wnd(c, conn, 0, NULL);
+	tcp_update_seqack_wnd(c, conn, false, NULL);
 	/* Finally, queue to tap */
 	dlen = mss;
 	seq = conn->seq_to_tap;
 	for (i = 0; i < send_bufs; i++) {
-		int no_csum = i && i != send_bufs - 1 && tcp4_payload_used;
+		int no_csum = i && i != send_bufs - 1 && tcp_payload_used;
 		if (i == send_bufs - 1)
 			dlen = last_len;
@ -526,12 +378,4 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 	conn_flag(c, conn, ACK_FROM_TAP_DUE);
 	return 0;
 err:
 	if (errno != EAGAIN && errno != EWOULDBLOCK) {
 		ret = -errno;
 		tcp_rst(c, conn);
 	}
 	return ret;
 }
--- a/tcp_buf.h
+++ b/tcp_buf.h
@ -6,11 +6,9 @@
 #ifndef TCP_BUF_H
 #define TCP_BUF_H
-void tcp_sock4_iov_init(const struct ctx *c);
+void tcp_sock_iov_init(const struct ctx *c);
-void tcp_sock6_iov_init(const struct ctx *c);
+void tcp_payload_flush(const struct ctx *c);
-void tcp_flags_flush(const struct ctx *c);
+int tcp_buf_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn);
-void tcp_payload_flush(struct ctx *c);
+int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags);
 int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
 int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
 #endif  /*TCP_BUF_H */
--- a/tcp_internal.h
+++ b/tcp_internal.h
@ -33,9 +33,7 @@
 #define OPT_EOL		0
 #define OPT_NOP		1
 #define OPT_MSS		2
 #define OPT_MSS_LEN	4
 #define OPT_WS		3
 #define OPT_WS_LEN	3
 #define OPT_SACKP	4
 #define OPT_SACK	5
 #define OPT_TS		8
@ -63,6 +61,79 @@ enum tcp_iov_parts {
 	TCP_NUM_IOVS
 };
 /**
 * struct tcp_payload_t - TCP header and data to send segments with payload
 * @th:		TCP header
 * @data:	TCP data
 */
 struct tcp_payload_t {
 	struct tcphdr th;
 	uint8_t data[IP_MAX_MTU - sizeof(struct tcphdr)];
 #ifdef __AVX2__
 } __attribute__ ((packed, aligned(32)));    /* For AVX2 checksum routines */
 #else
 } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
 #endif
 /** struct tcp_opt_nop - TCP NOP option
 * @kind:	Option kind (OPT_NOP = 1)
 */
 struct tcp_opt_nop {
 	uint8_t kind;
 } __attribute__ ((packed));
 #define TCP_OPT_NOP		((struct tcp_opt_nop){ .kind = OPT_NOP, })
 /** struct tcp_opt_mss - TCP MSS option
 * @kind:	Option kind (OPT_MSS == 2)
 * @len:	Option length (4)
 * @mss:	Maximum Segment Size
 */
 struct tcp_opt_mss {
 	uint8_t kind;
 	uint8_t len;
 	uint16_t mss;
 } __attribute__ ((packed));
 #define TCP_OPT_MSS(mss_)				\
 	((struct tcp_opt_mss) {				\
 		.kind = OPT_MSS,			\
 		.len = sizeof(struct tcp_opt_mss),	\
 		.mss = htons(mss_),			\
 	})
 /** struct tcp_opt_ws - TCP Window Scaling option
 * @kind:	Option kind (OPT_WS == 3)
 * @len:	Option length (3)
 * @shift:	Window scaling shift
 */
 struct tcp_opt_ws {
 	uint8_t kind;
 	uint8_t len;
 	uint8_t shift;
 } __attribute__ ((packed));
 #define TCP_OPT_WS(shift_)				\
 	((struct tcp_opt_ws) {				\
 		.kind = OPT_WS,				\
 		.len = sizeof(struct tcp_opt_ws),	\
 		.shift = (shift_),			\
 	})
 /** struct tcp_syn_opts - TCP options we apply to SYN packets
 * @mss:	Maximum Segment Size (MSS) option
 * @nop:	NOP opt (for alignment)
 * @ws:		Window Scaling (WS) option
 */
 struct tcp_syn_opts {
 	struct tcp_opt_mss mss;
 	struct tcp_opt_nop nop;
 	struct tcp_opt_ws ws;
 } __attribute__ ((packed));
 #define TCP_SYN_OPTS(mss_, ws_)				\
 	((struct tcp_syn_opts){				\
 		.mss = TCP_OPT_MSS(mss_),		\
 		.nop = TCP_OPT_NOP,			\
 		.ws = TCP_OPT_WS(ws_),			\
 	})
 extern char tcp_buf_discard [MAX_WINDOW];
 void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
@ -82,19 +153,23 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
 		conn_event_do(c, conn, event);				\
 	} while (0)
-void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
+void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
 #define tcp_rst(c, conn)						\
 	do {								\
 		flow_dbg((conn), "TCP reset at %s:%i", __func__, __LINE__); \
 		tcp_rst_do(c, conn);					\
 	} while (0)
 struct tcp_info_linux;
 size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 			       struct iovec *iov, size_t dlen,
-			       const uint16_t *check, uint32_t seq);
+			       const uint16_t *check, uint32_t seq,
 			       bool no_tcp_csum);
 int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
-			  int force_seq, struct tcp_info *tinfo);
+			  bool force_seq, struct tcp_info_linux *tinfo);
-int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, int flags,
+int tcp_prepare_flags(const struct ctx *c, struct tcp_tap_conn *conn,
-		      struct tcphdr *th, char *data, size_t *optlen);
+		      int flags, struct tcphdr *th, struct tcp_syn_opts *opts,
 		      size_t *optlen);
 #endif /* TCP_INTERNAL_H */
--- a/tcp_splice.c
+++ b/tcp_splice.c
@ -320,7 +320,7 @@ static int tcp_splice_connect_finish(const struct ctx *c,
 			}
 			if (fcntl(conn->pipe[sidei][0], F_SETPIPE_SZ,
-				  c->tcp.pipe_size)) {
+				  c->tcp.pipe_size) != (int)c->tcp.pipe_size) {
 				flow_trace(conn,
 					   "cannot set %d->%d pipe size to %zu",
 					   sidei, !sidei, c->tcp.pipe_size);
@ -503,7 +503,7 @@ swap:
 	lowat_act_flag = RCVLOWAT_ACT(fromsidei);
 	while (1) {
-		ssize_t readlen, to_write = 0, written;
+		ssize_t readlen, written, pending;
 		int more = 0;
 retry:
@ -518,14 +518,11 @@ retry:
 			if (errno != EAGAIN)
 				goto close;
 			to_write = c->tcp.pipe_size;
 		} else if (!readlen) {
 			eof = 1;
 			to_write = c->tcp.pipe_size;
 		} else {
 			never_read = 0;
-			to_write += readlen;
+
 			if (readlen >= (long)c->tcp.pipe_size * 90 / 100)
 				more = SPLICE_F_MORE;
@ -535,10 +532,10 @@ retry:
 eintr:
 		written = splice(conn->pipe[fromsidei][0], NULL,
-				 conn->s[!fromsidei], NULL, to_write,
+				 conn->s[!fromsidei], NULL, c->tcp.pipe_size,
 				 SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
 		flow_trace(conn, "%zi from write-side call (passed %zi)",
-			   written, to_write);
+			   written, c->tcp.pipe_size);
 		/* Most common case: skip updating counters. */
 		if (readlen > 0 && readlen == written) {
@ -584,10 +581,9 @@ eintr:
 		if (never_read && written == (long)(c->tcp.pipe_size))
 			goto retry;
-		if (!never_read && written < to_write) {
+		pending = conn->read[fromsidei] - conn->written[fromsidei];
-			to_write -= written;
+		if (!never_read && written > 0 && written < pending)
 			goto retry;
 		}
 		if (eof)
 			break;
@ -676,7 +672,7 @@ static void tcp_splice_pipe_refill(const struct ctx *c)
 			continue;
 		if (fcntl(splice_pipe_pool[i][0], F_SETPIPE_SZ,
-			  c->tcp.pipe_size)) {
+			  c->tcp.pipe_size) != (int)c->tcp.pipe_size) {
 			trace("TCP (spliced): cannot set pool pipe size to %zu",
 			      c->tcp.pipe_size);
 		}
--- a/test/Makefile
+++ b/test/Makefile
@ -8,7 +8,6 @@
 WGET = wget -c
 DEBIAN_IMGS = debian-8.11.0-openstack-amd64.qcow2 \
 	debian-9-nocloud-amd64-daily-20200210-166.qcow2 \
 	debian-10-nocloud-amd64.qcow2 \
 	debian-10-generic-arm64.qcow2 \
 	debian-10-generic-ppc64el-20220911-1135.qcow2 \
@ -42,8 +41,7 @@ OPENSUSE_IMGS = openSUSE-Leap-15.1-JeOS.x86_64-kvm-and-xen.qcow2 \
 	openSUSE-Leap-15.2-JeOS.x86_64-kvm-and-xen.qcow2 \
 	openSUSE-Leap-15.3-JeOS.x86_64-kvm-and-xen.qcow2 \
 	openSUSE-Tumbleweed-ARM-JeOS-efi.aarch64.raw.xz \
-	openSUSE-Tumbleweed-ARM-JeOS-efi.armv7l.raw.xz \
+	openSUSE-Tumbleweed-ARM-JeOS-efi.armv7l.raw.xz
 	openSUSE-Tumbleweed-JeOS.x86_64-kvm-and-xen.qcow2
 UBUNTU_OLD_IMGS = trusty-server-cloudimg-amd64-disk1.img \
 	trusty-server-cloudimg-i386-disk1.img \
@ -135,9 +133,6 @@ realclean: clean
 debian-8.11.0-openstack-%.qcow2:
 	$(WGET) -O $@ https://cloud.debian.org/images/cloud/OpenStack/archive/8.11.0/debian-8.11.0-openstack-$*.qcow2
 debian-9-nocloud-%-daily-20200210-166.qcow2:
 	$(WGET) -O $@ https://cloud.debian.org/images/cloud/stretch/daily/20200210-166/debian-9-nocloud-$*-daily-20200210-166.qcow2
 debian-10-nocloud-%.qcow2:
 	$(WGET) -O $@ https://cloud.debian.org/images/cloud/buster/latest/debian-10-nocloud-$*.qcow2
@ -203,9 +198,6 @@ openSUSE-Tumbleweed-ARM-JeOS-efi.aarch64.raw.xz:
 openSUSE-Tumbleweed-ARM-JeOS-efi.armv7l.raw.xz:
 	$(WGET) -O $@ http://download.opensuse.org/ports/armv7hl/tumbleweed/appliances/openSUSE-Tumbleweed-ARM-JeOS-efi.armv7l.raw.xz
 openSUSE-Tumbleweed-JeOS.x86_64-kvm-and-xen.qcow2:
 	$(WGET) -O $@ https://download.opensuse.org/tumbleweed/appliances/openSUSE-Tumbleweed-JeOS.x86_64-kvm-and-xen.qcow2
 # Ubuntu downloads
 trusty-server-cloudimg-%-disk1.img:
 	$(WGET) -O $@ https://cloud-images.ubuntu.com/trusty/current/trusty-server-cloudimg-$*-disk1.img
--- a/test/lib/setup
+++ b/test/lib/setup
@ -58,7 +58,7 @@ setup_passt() {
 	context_run_bg qemu 'qemu-system-'"${QEMU_ARCH}"		   \
 		' -machine accel=kvm'                                      \
 		' -m '${VMEM}' -cpu host -smp '${VCPUS}                    \
-		' -kernel ' "/boot/vmlinuz-$(uname -r)"			   \
+		' -kernel '"${KERNEL}"					   \
 		' -initrd '${INITRAMFS}' -nographic -serial stdio'	   \
 		' -nodefaults'						   \
 		' -append "console=ttyS0 mitigations=off apparmor=0" '	   \
@ -159,7 +159,7 @@ setup_passt_in_ns() {
 		' -machine accel=kvm'                                      \
 		' -M accel=kvm:tcg'                                        \
 		' -m '${VMEM}' -cpu host -smp '${VCPUS}                    \
-		' -kernel ' "/boot/vmlinuz-$(uname -r)"			   \
+		' -kernel '"${KERNEL}"					   \
 		' -initrd '${INITRAMFS}' -nographic -serial stdio'	   \
 		' -nodefaults'						   \
 		' -append "console=ttyS0 mitigations=off apparmor=0" '	   \
@ -230,7 +230,7 @@ setup_two_guests() {
 	context_run_bg qemu_1 'qemu-system-'"${QEMU_ARCH}"		     \
 		' -M accel=kvm:tcg'                                          \
 		' -m '${VMEM}' -cpu host -smp '${VCPUS}                      \
-		' -kernel ' "/boot/vmlinuz-$(uname -r)"			     \
+		' -kernel '"${KERNEL}"					     \
 		' -initrd '${INITRAMFS}' -nographic -serial stdio'	     \
 		' -nodefaults'						     \
 		' -append "console=ttyS0 mitigations=off apparmor=0" '	     \
@ -243,7 +243,7 @@ setup_two_guests() {
 	context_run_bg qemu_2 'qemu-system-'"${QEMU_ARCH}"		     \
 		' -M accel=kvm:tcg'                                          \
 		' -m '${VMEM}' -cpu host -smp '${VCPUS}                      \
-		' -kernel ' "/boot/vmlinuz-$(uname -r)"			     \
+		' -kernel '"${KERNEL}"					     \
 		' -initrd '${INITRAMFS}' -nographic -serial stdio'	     \
 		' -nodefaults'						     \
 		' -append "console=ttyS0 mitigations=off apparmor=0" '	     \
--- a/test/lib/term
+++ b/test/lib/term
@ -31,8 +31,8 @@ PR_DELAY_INIT=100 # ms
 # $@:	Message to print
 info() {
 	tmux select-pane -t ${PANE_INFO}
-	echo "${@}" >> $STATEBASE/log_pipe
+	printf "${@}\n" >> $STATEBASE/log_pipe
-	echo "${@}" >> "${LOGFILE}"
+	printf "${@}\n" >> "${LOGFILE}"
 }
 # info_n() - Highlight, print message to pane and to log file without newline
@ -47,13 +47,13 @@ info_n() {
 # $@:	Message to print
 info_nolog() {
 	tmux select-pane -t ${PANE_INFO}
-	echo "${@}" >> $STATEBASE/log_pipe
+	printf "${@}\n" >> $STATEBASE/log_pipe
 }
 # info_nolog() - Print message to log file
 # $@:	Message to print
 log() {
-	echo "${@}" >> "${LOGFILE}"
+	printf "${@}\n" >> "${LOGFILE}"
 }
 # info_nolog_n() - Send message to pane without highlighting it, without newline
@ -664,7 +664,7 @@ pause_continue() {
 # run_term() - Start tmux session, running entry point, with recording if needed
 run_term() {
-	TMUX="tmux new-session -s passt_test -eSTATEBASE=$STATEBASE -ePCAP=$PCAP -eDEBUG=$DEBUG"
+	TMUX="tmux new-session -s passt_test -eSTATEBASE=$STATEBASE -ePCAP=$PCAP -eDEBUG=$DEBUG -eTRACE=$TRACE -eKERNEL=$KERNEL"
 	if [ ${CI} -eq 1 ]; then
 		printf '\e[8;50;240t'
--- a/test/nstool.c
+++ b/test/nstool.c
@ -33,10 +33,15 @@
 #define die(...)						\
 	do {							\
-		fprintf(stderr, __VA_ARGS__);	\
+		fprintf(stderr, "nstool: " __VA_ARGS__);	\
 		exit(1);					\
 	} while (0)
 #define err(...)						\
 	do {							\
 		fprintf(stderr, "nstool: " __VA_ARGS__);	\
 	} while (0)
 struct ns_type {
 	int flag;
 	const char *name;
@ -156,6 +161,9 @@ static int connect_ctl(const char *sockpath, bool wait,
 static void cmd_hold(int argc, char *argv[])
 {
 	struct sigaction sa = {
 		.sa_handler = SIG_IGN,
 	};
 	int fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, PF_UNIX);
 	struct sockaddr_un addr;
 	const char *sockpath = argv[1];
@ -185,6 +193,10 @@ static void cmd_hold(int argc, char *argv[])
 	if (!getcwd(info.cwd, sizeof(info.cwd)))
 		die("getcwd(): %s\n", strerror(errno));
 	rc = sigaction(SIGPIPE, &sa, NULL);
 	if (rc)
 		die("sigaction(SIGPIPE): %s\n", strerror(errno));
 	do {
 		int afd = accept(fd, NULL, NULL);
 		char buf;
@ -193,17 +205,21 @@ static void cmd_hold(int argc, char *argv[])
 			die("accept(): %s\n", strerror(errno));
 		rc = write(afd, &info, sizeof(info));
-		if (rc < 0)
+		if (rc < 0) {
-			die("write(): %s\n", strerror(errno));
+			err("holder write() to control socket: %s\n",
 			    strerror(errno));
 		}
 		if ((size_t)rc < sizeof(info))
-			die("short write() on control socket\n");
+			err("holder short write() on control socket\n");
 		rc = read(afd, &buf, sizeof(buf));
-		if (rc < 0)
+		if (rc < 0) {
-			die("read(): %s\n", strerror(errno));
+			err("holder read() on control socket: %s\n",
 			    strerror(errno));
 		}
 		close(afd);
-	} while (rc == 0);
+	} while (rc <= 0);
 	unlink(sockpath);
 }
@ -346,7 +362,7 @@ static int openns(const char *fmt, ...)
 }
 static pid_t sig_pid;
-static void sig_handler(int signum)
+static void sig_propagate(int signum)
 {
 	int err;
@ -358,7 +374,7 @@ static void sig_handler(int signum)
 static void wait_for_child(pid_t pid)
 {
 	struct sigaction sa = {
-		.sa_handler = sig_handler,
+		.sa_handler = sig_propagate,
 		.sa_flags = SA_RESETHAND,
 	};
 	int status, err;
--- a/test/passt/dhcp
+++ b/test/passt/dhcp
@ -49,6 +49,8 @@ check	[ "__SEARCH__" = "__HOST_SEARCH__" ]
 test	DHCPv6: address
 guest	/sbin/dhclient -6 __IFNAME__
 # Wait for DAD to complete
 guest	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 gout	ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.prefixlen == 128).local] | .[0]'
 hout	HOST_ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__HOST_IFNAME6__").addr_info[] | select(.scope == "global" and .deprecated != true).local] | .[0]'
 check	[ "__ADDR6__" = "__HOST_ADDR6__" ]
--- a/test/passt/ndp
+++ b/test/passt/ndp
@ -16,13 +16,15 @@ htools	ip jq sipcalc grep cut
 test	Interface name
 gout	IFNAME ip -j link show | jq -rM '.[] | select(.link_type == "ether").ifname'
-guest	ip link set dev __IFNAME__ up && sleep 2
+guest	ip link set dev __IFNAME__ up
 # Wait for DAD to complete
 guest	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 hout	HOST_IFNAME6 ip -j -6 route show|jq -rM '[.[] | select(.dst == "default").dev] | .[0]'
 check	[ -n "__IFNAME__" ]
 test	SLAAC: prefix
-gout	ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.scope == "global" and .prefixlen == 64).local] | .[0]'
+gout	ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.scope == "global" and .protocol == "kernel_ra") | .local + "/" + (.prefixlen | tostring)] | .[0]'
-gout	PREFIX6 sipcalc __ADDR6__/64 | grep prefix | cut -d' ' -f4
+gout	PREFIX6 sipcalc __ADDR6__ | grep prefix | cut -d' ' -f4
 hout	HOST_ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__HOST_IFNAME6__").addr_info[] | select(.scope == "global" and .deprecated != true).local] | .[0]'
 hout	HOST_PREFIX6 sipcalc __HOST_ADDR6__/64 | grep prefix | cut -d' ' -f4
 check	[ "__PREFIX6__" = "__HOST_PREFIX6__" ]
--- a/test/passt_in_ns/dhcp
+++ b/test/passt_in_ns/dhcp
@ -52,6 +52,8 @@ check	[ "__SEARCH__" = "__HOST_SEARCH__" ]
 test	DHCPv6: address
 guest	/sbin/dhclient -6 __IFNAME__
 # Wait for DAD to complete
 guest	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 gout	ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.prefixlen == 128).local] | .[0]'
 hout	HOST_ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__HOST_IFNAME6__").addr_info[] | select(.scope == "global" and .deprecated != true).local] | .[0]'
 check	[ "__ADDR6__" = "__HOST_ADDR6__" ]
--- a/test/passt_in_ns/tcp
+++ b/test/passt_in_ns/tcp
@ -32,7 +32,7 @@ host	socat -u OPEN:__BASEPATH__/big.bin TCP4:127.0.0.1:10001
 guestw
 guest	cmp test_big.bin /root/big.bin
-test	TCP/IPv4: host to ns: big transfer
+test	TCP/IPv4: host to ns (spliced): big transfer
 nsb	socat -u TCP4-LISTEN:10002 OPEN:__TEMP_NS_BIG__,create,trunc
 sleep	1
 host	socat -u OPEN:__BASEPATH__/big.bin TCP4:127.0.0.1:10002
@ -90,7 +90,7 @@ host	socat -u OPEN:__BASEPATH__/small.bin TCP4:127.0.0.1:10001
 guestw
 guest	cmp test_small.bin /root/small.bin
-test	TCP/IPv4: host to ns: small transfer
+test	TCP/IPv4: host to ns (spliced): small transfer
 nsb	socat -u TCP4-LISTEN:10002 OPEN:__TEMP_NS_SMALL__,create,trunc
 sleep	1
 host	socat -u OPEN:__BASEPATH__/small.bin TCP4:127.0.0.1:10002
@ -146,7 +146,7 @@ host	socat -u OPEN:__BASEPATH__/big.bin TCP6:[::1]:10001
 guestw
 guest	cmp test_big.bin /root/big.bin
-test	TCP/IPv6: host to ns: big transfer
+test	TCP/IPv6: host to ns (spliced): big transfer
 nsb	socat -u TCP6-LISTEN:10002 OPEN:__TEMP_NS_BIG__,create,trunc
 sleep	1
 host	socat -u OPEN:__BASEPATH__/big.bin TCP6:[::1]:10002
@ -204,7 +204,7 @@ host	socat -u OPEN:__BASEPATH__/small.bin TCP6:[::1]:10001
 guestw
 guest	cmp test_small.bin /root/small.bin
-test	TCP/IPv6: host to ns: small transfer
+test	TCP/IPv6: host to ns (spliced): small transfer
 nsb	socat -u TCP6-LISTEN:10002 OPEN:__TEMP_NS_SMALL__,create,trunc
 sleep	1
 host	socat -u OPEN:__BASEPATH__/small.bin TCP6:[::1]:10002
--- a/test/passt_in_ns/udp
+++ b/test/passt_in_ns/udp
@ -30,7 +30,7 @@ host	socat -u OPEN:__BASEPATH__/medium.bin UDP4:127.0.0.1:10001,shut-null
 guestw
 guest	cmp test.bin /root/medium.bin
-test	UDP/IPv4: host to ns
+test	UDP/IPv4: host to ns (recvmmsg/sendmmsg)
 nsb	socat -u UDP4-LISTEN:10002,null-eof OPEN:__TEMP_NS__,create,trunc
 sleep	1
 host	socat -u OPEN:__BASEPATH__/medium.bin UDP4:127.0.0.1:10002,shut-null
@ -88,7 +88,7 @@ host	socat -u OPEN:__BASEPATH__/medium.bin UDP6:[::1]:10001,shut-null
 guestw
 guest	cmp test.bin /root/medium.bin
-test	UDP/IPv6: host to ns
+test	UDP/IPv6: host to ns (recvmmsg/sendmmsg)
 nsb	socat -u UDP6-LISTEN:10002,null-eof OPEN:__TEMP_NS__,create,trunc
 sleep	1
 host	socat -u OPEN:__BASEPATH__/medium.bin UDP6:[::1]:10002,shut-null
--- a/test/pasta/dhcp
+++ b/test/pasta/dhcp
@ -35,6 +35,8 @@ check	[ __MTU__ = 65520 ]
 test	DHCPv6: address
 ns	/sbin/dhclient -6 --no-pid __IFNAME__
 # Wait for DAD to complete
 ns	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 hout	HOST_IFNAME6 ip -j -6 route show|jq -rM '[.[] | select(.dst == "default").dev] | .[0]'
 nsout	ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.prefixlen == 128).local] | .[0]'
 hout	HOST_ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__HOST_IFNAME6__").addr_info[] | select(.scope == "global" and .deprecated != true).local] | .[0]'
--- a/test/pasta/ndp
+++ b/test/pasta/ndp
@ -18,11 +18,12 @@ test	Interface name
 nsout	IFNAME ip -j link show | jq -rM '.[] | select(.link_type == "ether").ifname'
 check	[ -n "__IFNAME__" ]
 ns	ip link set dev __IFNAME__ up
-sleep	2
+# Wait for DAD to complete
 ns	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 test	SLAAC: prefix
-nsout	ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.scope == "global" and .prefixlen == 64).local] | .[0]'
+nsout	ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.scope == "global" and .protocol == "kernel_ra") | .local + "/" + (.prefixlen | tostring)] | .[0]'
-nsout	PREFIX6 sipcalc __ADDR6__/64 | grep prefix | cut -d' ' -f4
+nsout	PREFIX6 sipcalc __ADDR6__ | grep prefix | cut -d' ' -f4
 hout	HOST_ADDR6 ip -j -6 addr show|jq -rM ['.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.scope == "global" and .deprecated != true).local] | .[0]'
 hout	HOST_PREFIX6 sipcalc __HOST_ADDR6__/64 | grep prefix | cut -d' ' -f4
 check	[ "__PREFIX6__" = "__HOST_PREFIX6__" ]
--- a/test/pasta/tcp
+++ b/test/pasta/tcp
@ -19,8 +19,8 @@ set	TEMP_NS_BIG __STATEDIR__/test_ns_big.bin
 set	TEMP_SMALL __STATEDIR__/test_small.bin
 set	TEMP_NS_SMALL __STATEDIR__/test_ns_small.bin
-test	TCP/IPv4: host to ns: big transfer
+test	TCP/IPv4: host to ns (spliced): big transfer
-nsb	socat -u TCP4-LISTEN:10002,bind=127.0.0.1 OPEN:__TEMP_NS_BIG__,create,trunc
+nsb	socat -u TCP4-LISTEN:10002 OPEN:__TEMP_NS_BIG__,create,trunc
 host	socat -u OPEN:__BASEPATH__/big.bin TCP4:127.0.0.1:10002
 nsw
 check	cmp __BASEPATH__/big.bin __TEMP_NS_BIG__
@ -38,8 +38,8 @@ ns	socat -u OPEN:__BASEPATH__/big.bin TCP4:__GW__:10003
 hostw
 check	cmp __BASEPATH__/big.bin __TEMP_BIG__
-test	TCP/IPv4: host to ns: small transfer
+test	TCP/IPv4: host to ns (spliced): small transfer
-nsb	socat -u TCP4-LISTEN:10002,bind=127.0.0.1 OPEN:__TEMP_NS_SMALL__,create,trunc
+nsb	socat -u TCP4-LISTEN:10002 OPEN:__TEMP_NS_SMALL__,create,trunc
 host	socat OPEN:__BASEPATH__/small.bin TCP4:127.0.0.1:10002
 nsw
 check	cmp __BASEPATH__/small.bin __TEMP_NS_SMALL__
@ -57,8 +57,8 @@ ns	socat -u OPEN:__BASEPATH__/small.bin TCP4:__GW__:10003
 hostw
 check	cmp __BASEPATH__/small.bin __TEMP_SMALL__
-test	TCP/IPv6: host to ns: big transfer
+test	TCP/IPv6: host to ns (spliced): big transfer
-nsb	socat -u TCP6-LISTEN:10002,bind=[::1] OPEN:__TEMP_NS_BIG__,create,trunc
+nsb	socat -u TCP6-LISTEN:10002 OPEN:__TEMP_NS_BIG__,create,trunc
 host	socat -u OPEN:__BASEPATH__/big.bin TCP6:[::1]:10002
 nsw
 check	cmp __BASEPATH__/big.bin __TEMP_NS_BIG__
@ -77,8 +77,8 @@ ns	socat -u OPEN:__BASEPATH__/big.bin TCP6:[__GW6__%__IFNAME__]:10003
 hostw
 check	cmp __BASEPATH__/big.bin __TEMP_BIG__
-test	TCP/IPv6: host to ns: small transfer
+test	TCP/IPv6: host to ns (spliced): small transfer
-nsb	socat -u TCP6-LISTEN:10002,bind=[::1] OPEN:__TEMP_NS_SMALL__,create,trunc
+nsb	socat -u TCP6-LISTEN:10002 OPEN:__TEMP_NS_SMALL__,create,trunc
 host	socat -u OPEN:__BASEPATH__/small.bin TCP6:[::1]:10002
 nsw
 check	cmp __BASEPATH__/small.bin __TEMP_NS_SMALL__
--- a/test/pasta/udp
+++ b/test/pasta/udp
@ -17,8 +17,8 @@ htools	dd socat ip jq
 set	TEMP __STATEDIR__/test.bin
 set	TEMP_NS __STATEDIR__/test_ns.bin
-test	UDP/IPv4: host to ns
+test	UDP/IPv4: host to ns (recvmmsg/sendmmsg)
-nsb	socat -u UDP4-LISTEN:10002,bind=127.0.0.1,null-eof OPEN:__TEMP_NS__,create,trunc
+nsb	socat -u UDP4-LISTEN:10002,null-eof OPEN:__TEMP_NS__,create,trunc
 host	socat OPEN:__BASEPATH__/medium.bin UDP4:127.0.0.1:10002,shut-null
 nsw
 check	cmp __BASEPATH__/medium.bin __TEMP_NS__
@ -37,8 +37,8 @@ ns	socat -u OPEN:__BASEPATH__/medium.bin UDP4:__GW__:10003,shut-null
 hostw
 check	cmp __BASEPATH__/medium.bin __TEMP__
-test	UDP/IPv6: host to ns
+test	UDP/IPv6: host to ns (recvmmsg/sendmmsg)
-nsb	socat -u UDP6-LISTEN:10002,bind=[::1],null-eof OPEN:__TEMP_NS__,create,trunc
+nsb	socat -u UDP6-LISTEN:10002,null-eof OPEN:__TEMP_NS__,create,trunc
 host	socat -u OPEN:__BASEPATH__/medium.bin UDP6:[::1]:10002,shut-null
 nsw
 check	cmp __BASEPATH__/medium.bin __TEMP_NS__
--- a/test/perf/passt_tcp
+++ b/test/perf/passt_tcp
@ -116,6 +116,8 @@ iperf3k	ns
 # Reducing MTU below 1280 deconfigures IPv6, get our address back
 guest	dhclient -6 -x
 guest	dhclient -6 __IFNAME__
 # Wait for DAD to complete
 guest	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 tl	TCP RR latency over IPv4: guest to host
 lat	-
--- a/test/perf/pasta_tcp
+++ b/test/perf/pasta_tcp
@ -211,7 +211,7 @@ tr	TCP throughput over IPv6: host to ns
 iperf3s	ns 10002
 nsout	IFNAME ip -j link show | jq -rM '.[] | select(.link_type == "ether").ifname'
-nsout	ADDR6 ip -j -6 addr show|jq -rM '.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.scope == "global" and .prefixlen == 64).local'
+nsout	ADDR6 ip -j -6 addr show|jq -rM '.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.scope == "global").local'
 bw	-
 bw	-
 bw	-
--- a/test/perf/pasta_udp
+++ b/test/perf/pasta_udp
@ -196,7 +196,7 @@ tr	UDP throughput over IPv6: host to ns
 iperf3s	ns 10002
 nsout	IFNAME ip -j link show | jq -rM '.[] | select(.link_type == "ether").ifname'
-nsout	ADDR6 ip -j -6 addr show|jq -rM '.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.scope == "global" and .prefixlen == 64).local'
+nsout	ADDR6 ip -j -6 addr show|jq -rM '.[] | select(.ifname == "__IFNAME__").addr_info[] | select(.scope == "global").local'
 iperf3	BW host __ADDR6__ 10002 __TIME__ __OPTS__ -b 8G -l 1472
 bw	__BW__ 0.3 0.5
 iperf3	BW host __ADDR6__ 10002 __TIME__ __OPTS__ -b 12G -l 3972
--- a/test/run
+++ b/test/run
@ -38,6 +38,9 @@ TRACE=${TRACE:-0}
 # If set, tell passt and pasta to take packet captures
 PCAP=${PCAP:-0}
 # Custom kernel to boot guests with, if given
 KERNEL=${KERNEL:-"/boot/vmlinuz-$(uname -r)"}
 COMMIT="$(git log --oneline --no-decorate -1)"
 . lib/util
--- a/test/two_guests/basic
+++ b/test/two_guests/basic
@ -36,9 +36,13 @@ check	[ "__ADDR2__" = "__HOST_ADDR__" ]
 test	DHCPv6: addresses
 # Link is up now, wait for DAD to complete
-sleep	2
+guest1	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 guest2	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 guest1	/sbin/dhclient -6 __IFNAME1__
 guest2	/sbin/dhclient -6 __IFNAME2__
 # Wait for DAD to complete on the DHCP address
 guest1	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 guest2	while ip -j -6 addr show tentative | jq -e '.[].addr_info'; do sleep 0.1; done
 g1out	ADDR1_6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__IFNAME1__").addr_info[] | select(.prefixlen == 128).local] | .[0]'
 g2out	ADDR2_6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__IFNAME2__").addr_info[] | select(.prefixlen == 128).local] | .[0]'
 hout	HOST_ADDR6 ip -j -6 addr show|jq -rM '[.[] | select(.ifname == "__HOST_IFNAME6__").addr_info[] | select(.scope == "global" and .deprecated != true).local] | .[0]'
@ -48,33 +52,33 @@ check	[ "__ADDR2_6__" = "__HOST_ADDR6__" ]
 test	TCP/IPv4: guest 1 > guest 2
 g1out	GW1 ip -j -4 route show|jq -rM '.[] | select(.dst == "default").gateway'
 guest2b	socat -u TCP4-LISTEN:10004 OPEN:msg,create,trunc
 sleep	1
 guest1	echo "Hello_from_guest_1" | socat -u STDIN TCP4:__GW1__:10004
 guest2w
 sleep	1
 g2out	MSG2 cat msg
 check	[ "__MSG2__" = "Hello_from_guest_1" ]
 test	TCP/IPv6: guest 2 > guest 1
 g2out	GW2_6 ip -j -6 route show|jq -rM '.[] | select(.dst == "default").gateway'
 guest1b	socat -u TCP6-LISTEN:10001 OPEN:msg,create,trunc
 sleep	1
 guest2	echo "Hello_from_guest_2" | socat -u STDIN TCP6:[__GW2_6__%__IFNAME2__]:10001
 guest1w
 sleep	1
 g1out	MSG1 cat msg
 check	[ "__MSG1__" = "Hello_from_guest_2" ]
 test	UDP/IPv4: guest 1 > guest 2
 guest2b	socat -u TCP4-LISTEN:10004 OPEN:msg,create,trunc
 sleep	1
 guest1	echo "Hello_from_guest_1" | socat -u STDIN TCP4:__GW1__:10004
 guest2w
 sleep	1
 g2out	MSG2 cat msg
 check	[ "__MSG2__" = "Hello_from_guest_1" ]
 test	UDP/IPv6: guest 2 > guest 1
 guest1b	socat -u TCP6-LISTEN:10001 OPEN:msg,create,trunc
 sleep	1
 guest2	echo "Hello_from_guest_2" | socat -u STDIN TCP6:[__GW2_6__%__IFNAME2__]:10001
 guest1w
 sleep	1
 g1out	MSG1 cat msg
 check	[ "__MSG1__" = "Hello_from_guest_2" ]
--- a/udp.c
+++ b/udp.c
@ -169,11 +169,11 @@ udp_meta[UDP_MAX_FRAMES];
 * @UDP_NUM_IOVS        the number of entries in the iovec array
 */
 enum udp_iov_idx {
-	UDP_IOV_TAP	= 0,
+	UDP_IOV_TAP,
-	UDP_IOV_ETH	= 1,
+	UDP_IOV_ETH,
-	UDP_IOV_IP	= 2,
+	UDP_IOV_IP,
-	UDP_IOV_PAYLOAD	= 3,
+	UDP_IOV_PAYLOAD,
-	UDP_NUM_IOVS
+	UDP_NUM_IOVS,
 };
 /* IOVs and msghdr arrays for receiving datagrams from sockets */
@ -298,11 +298,13 @@ static void udp_splice_send(const struct ctx *c, size_t start, size_t n,
 * @bp:			Pointer to udp_payload_t to update
 * @toside:		Flowside for destination side
 * @dlen:		Length of UDP payload
 * @no_udp_csum:	Do not set UDP checksum
 *
 * Return: size of IPv4 payload (UDP header + data)
 */
 static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
-			      const struct flowside *toside, size_t dlen)
+			      const struct flowside *toside, size_t dlen,
 			      bool no_udp_csum)
 {
 	const struct in_addr *src = inany_v4(&toside->oaddr);
 	const struct in_addr *dst = inany_v4(&toside->eaddr);
@ -319,22 +321,33 @@ static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
 	bp->uh.source = htons(toside->oport);
 	bp->uh.dest = htons(toside->eport);
 	bp->uh.len = htons(l4len);
-	csum_udp4(&bp->uh, *src, *dst, bp->data, dlen);
+	if (no_udp_csum) {
 		bp->uh.check = 0;
 	} else {
 		const struct iovec iov = {
 			.iov_base = bp->data,
 			.iov_len = dlen
 		};
 		csum_udp4(&bp->uh, *src, *dst, &iov, 1, 0);
 	}
 	return l4len;
 }
 /**
 * udp_update_hdr6() - Update headers for one IPv6 datagram
- * @ip6h:	Pre-filled IPv6 header (except for payload_len and addresses)
+ * @ip6h:		Pre-filled IPv6 header (except for payload_len and
 * 			addresses)
 * @bp:			Pointer to udp_payload_t to update
 * @toside:		Flowside for destination side
 * @dlen:		Length of UDP payload
 * @no_udp_csum:	Do not set UDP checksum
 *
 * Return: size of IPv6 payload (UDP header + data)
 */
 static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
-			      const struct flowside *toside, size_t dlen)
+			      const struct flowside *toside, size_t dlen,
 			      bool no_udp_csum)
 {
 	uint16_t l4len = dlen + sizeof(bp->uh);
@ -348,7 +361,20 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
 	bp->uh.source = htons(toside->oport);
 	bp->uh.dest = htons(toside->eport);
 	bp->uh.len = ip6h->payload_len;
-	csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, bp->data, dlen);
+	if (no_udp_csum) {
 		/* 0 is an invalid checksum for UDP IPv6 and dropped by
 		 * the kernel stack, even if the checksum is disabled by virtio
 		 * flags. We need to put any non-zero value here.
 		 */
 		bp->uh.check = 0xffff;
 	} else {
 		const struct iovec iov = {
 			.iov_base = bp->data,
 			.iov_len = dlen
 		};
 		csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6,
 			  &iov, 1, 0);
 	}
 	return l4len;
 }
@ -358,9 +384,11 @@ static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
 * @mmh:	Receiving mmsghdr array
 * @idx:	Index of the datagram to prepare
 * @toside:	Flowside for destination side
 * @no_udp_csum: Do not set UDP checksum
 */
-static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx,
+static void udp_tap_prepare(const struct mmsghdr *mmh,
-			    const struct flowside *toside)
+			    unsigned idx, const struct flowside *toside,
 			    bool no_udp_csum)
 {
 	struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx];
 	struct udp_payload_t *bp = &udp_payload[idx];
@ -368,13 +396,15 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx,
 	size_t l4len;
 	if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
-		l4len = udp_update_hdr6(&bm->ip6h, bp, toside, mmh[idx].msg_len);
+		l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
 					mmh[idx].msg_len, no_udp_csum);
 		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
 			       sizeof(udp6_eth_hdr));
 		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
 		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
 	} else {
-		l4len = udp_update_hdr4(&bm->ip4h, bp, toside, mmh[idx].msg_len);
+		l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
 					mmh[idx].msg_len, no_udp_csum);
 		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
 			       sizeof(udp4_eth_hdr));
 		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
@ -387,7 +417,8 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx,
 * udp_sock_recverr() - Receive and clear an error from a socket
 * @s:		Socket to receive from
 *
- * Return: ee_errno, 0 on empty queue
+ * Return: 1 if error received and processed, 0 if no more errors in queue, < 0
 *         if there was an error reading the queue
 *
 * #syscalls recvmsg
 */
@ -408,15 +439,16 @@ static int udp_sock_recverr(int s)
 	rc = recvmsg(s, &mh, MSG_ERRQUEUE);
 	if (rc < 0) {
-		if (errno != EAGAIN && errno != EWOULDBLOCK)
+		if (errno == EAGAIN || errno == EWOULDBLOCK)
 			err_perror("Failed to read error queue");
 			return 0;
 		err_perror("UDP: Failed to read error queue");
 		return -1;
 	}
 	if (!(mh.msg_flags & MSG_ERRQUEUE)) {
 		err("Missing MSG_ERRQUEUE flag reading error queue");
-		return 0;
+		return -1;
 	}
 	hdr = CMSG_FIRSTHDR(&mh);
@ -425,7 +457,7 @@ static int udp_sock_recverr(int s)
 	      (hdr->cmsg_level == IPPROTO_IPV6 &&
 	       hdr->cmsg_type == IPV6_RECVERR))) {
 		err("Unexpected cmsg reading error queue");
-		return 0;
+		return -1;
 	}
 	ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
@ -434,7 +466,54 @@ static int udp_sock_recverr(int s)
 	debug("%s error on UDP socket %i: %s",
 	      str_ee_origin(ee), s, strerror(ee->ee_errno));
-	return ee->ee_errno;
+	return 1;
 }
 /**
 * udp_sock_errs() - Process errors on a socket
 * @c:		Execution context
 * @s:		Socket to receive from
 * @events:	epoll events bitmap
 *
 * Return: Number of errors handled, or < 0 if we have an unrecoverable error
 */
 static int udp_sock_errs(const struct ctx *c, int s, uint32_t events)
 {
 	unsigned n_err = 0;
 	socklen_t errlen;
 	int rc, err;
 	ASSERT(!c->no_udp);
 	if (!(events & EPOLLERR))
 		return 0; /* Nothing to do */
 	/* Empty the error queue */
 	while ((rc = udp_sock_recverr(s)) > 0)
 		n_err += rc;
 	if (rc < 0)
 		return -1; /* error reading error, unrecoverable */
 	errlen = sizeof(err);
 	if (getsockopt(s, SOL_SOCKET, SO_ERROR, &err, &errlen) < 0 ||
 	    errlen != sizeof(err)) {
 		err_perror("Error reading SO_ERROR");
 		return -1;  /* error reading error, unrecoverable */
 	}
 	if (err) {
 		debug("Unqueued error on UDP socket %i: %s", s, strerror(err));
 		n_err++;
 	}
 	if (!n_err) {
 		/* EPOLLERR, but no errors to clear !? */
 		err("EPOLLERR event without reported errors on socket %i", s);
 		return -1; /* no way to clear, unrecoverable */
 	}
 	return n_err;
 }
 /**
@ -442,15 +521,14 @@ static int udp_sock_recverr(int s)
 * @c:		Execution context
 * @s:		Socket to receive from
 * @events:	epoll events bitmap
- * @mmh:	mmsghdr array to receive into
+ * @mmh		mmsghdr array to receive into
 * @recv_err:	Set to last error in queue. If none: -1 on EPOLLERR, 0 otherwise
 *
- * Return: count of datagrams received
+ * Return: Number of datagrams received
 *
 * #syscalls recvmmsg arm:recvmmsg_time64 i686:recvmmsg_time64
 */
 static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
-			 struct mmsghdr *mmh, int *recv_err)
+			 struct mmsghdr *mmh)
 {
 	/* For not entirely clear reasons (data locality?) pasta gets better
 	 * throughput if we receive tap datagrams one at a atime.  For small
@ -463,17 +541,6 @@ static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
 	ASSERT(!c->no_udp);
 	/* Clear any errors first */
 	if (events & EPOLLERR) {
 		bool found = false;
 		int ret;
 		while ((ret = udp_sock_recverr(s)))
 			found = true;
 		*recv_err = found ? ret : -1;
 	}
 	if (!(events & EPOLLIN))
 		return 0;
@ -499,10 +566,16 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
 			     uint32_t events, const struct timespec *now)
 {
 	const socklen_t sasize = sizeof(udp_meta[0].s_in);
 	int recv_err = 0;
 	int n, i;
-	if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv, &recv_err)) <= 0)
+	if (udp_sock_errs(c, ref.fd, events) < 0) {
 		err("UDP: Unrecoverable error on listening socket:"
 		    " (%s port %hu)", pif_name(ref.udp.pif), ref.udp.port);
 		/* FIXME: what now?  close/re-open socket? */
 		return;
 	}
 	if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv)) <= 0)
 		return;
 	/* We divide datagrams into batches based on how we need to send them,
@ -522,7 +595,8 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
 				udp_splice_prepare(udp_mh_recv, i);
 			} else if (batchpif == PIF_TAP) {
 				udp_tap_prepare(udp_mh_recv, i,
-						flowside_at_sidx(batchsidx));
+						flowside_at_sidx(batchsidx),
 						false);
 			}
 			if (++i >= n)
@ -570,51 +644,21 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 	flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside);
 	const struct flowside *toside = flowside_at_sidx(tosidx);
 	struct udp_flow *uflow = udp_at_sidx(ref.flowside);
 	int from_s = uflow->s[ref.flowside.sidei];
 	uint8_t topif = pif_at_sidx(tosidx);
-	int recv_err = 0;
+	int n, i, from_s;
 	int n, i;
 	ASSERT(!c->no_udp && uflow);
-	n = udp_sock_recv(c, from_s, events, udp_mh_recv, &recv_err);
+	from_s = uflow->s[ref.flowside.sidei];
 	if (recv_err == -1) {
 		struct flow_common *f = &uflow->f;
 		char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN];
 		char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN];
 		const struct flowside *ini = &f->side[INISIDE];
 		const struct flowside *tgt = &f->side[TGTSIDE];
 		flow_err(uflow, "EPOLLERR without error queue, closing flow");
 		err("Last recorded errno was: %i (%s)", uflow->last_errno,
 		    strerror(uflow->last_errno));
 		flow_log_(f, LOG_ERR,
 			  "%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu",
 			  pif_name(f->pif[INISIDE]),
 			  inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
 			  ini->eport,
 			  inany_ntop(&ini->oaddr, fstr0, sizeof(fstr0)),
 			  ini->oport,
 			  pif_name(f->pif[TGTSIDE]),
 			  inany_ntop(&tgt->oaddr, fstr1, sizeof(fstr1)),
 			  tgt->oport,
 			  inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)),
 			  tgt->eport);
 	if (udp_sock_errs(c, from_s, events) < 0) {
 		flow_err(uflow, "Unrecoverable error on reply socket");
 		flow_err_details(uflow);
 		udp_flow_close(c, uflow);
 		return;
 	}
-	if (recv_err) {
+	if ((n = udp_sock_recv(c, from_s, events, udp_mh_recv)) <= 0)
 		struct udp_flow *uflow = udp_at_sidx(udp_meta[0].tosidx);
 		uflow->last_errno = recv_err;
 		flow_err(uflow, "Recorded errno %i (%s)", recv_err,
 			 strerror(recv_err));
 	}
 	if (n <= 0)
 		return;
 	flow_trace(uflow, "Received %d datagrams on reply socket", n);
@ -624,7 +668,7 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 		if (pif_is_socket(topif))
 			udp_splice_prepare(udp_mh_recv, i);
 		else if (topif == PIF_TAP)
-			udp_tap_prepare(udp_mh_recv, i, toside);
+			udp_tap_prepare(udp_mh_recv, i, toside, false);
 		/* Restore sockaddr length clobbered by recvmsg() */
 		udp_mh_recv[i].msg_hdr.msg_namelen = sizeof(udp_meta[i].s_in);
 	}
@ -751,69 +795,61 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 * udp_sock_init() - Initialise listening sockets for a given port
 * @c:		Execution context
 * @ns:		In pasta mode, if set, bind with loopback address in namespace
 * @af:		Address family to select a specific IP version, or AF_UNSPEC
 * @addr:	Pointer to address for binding, NULL if not configured
 * @ifname:	Name of interface to bind to, NULL if not configured
 * @port:	Port, host order
 *
 * Return: 0 on (partial) success, negative error code on (complete) failure
 */
-int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
+int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
-		  const void *addr, const char *ifname, in_port_t port)
+		  const char *ifname, in_port_t port)
 {
-	union udp_listen_epoll_ref uref = { .port = port };
+	union udp_listen_epoll_ref uref = {
 		.pif = ns ? PIF_SPLICE : PIF_HOST,
 		.port = port,
 	};
 	int r4 = FD_REF_MAX + 1, r6 = FD_REF_MAX + 1;
 	ASSERT(!c->no_udp);
-	if (ns)
+	if (!addr && c->ifi4 && c->ifi6 && !ns) {
 		uref.pif = PIF_SPLICE;
 	else
 		uref.pif = PIF_HOST;
 	if (af == AF_UNSPEC && c->ifi4 && c->ifi6) {
 		int s;
 		/* Attempt to get a dual stack socket */
-		if (!ns) {
+		s = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_HOST,
-			s = sock_l4(c, AF_UNSPEC, EPOLL_TYPE_UDP_LISTEN,
+				NULL, ifname, port, uref.u32);
 				    addr, ifname, port, uref.u32);
 		udp_splice_init[V4][port] = s < 0 ? -1 : s;
 		udp_splice_init[V6][port] = s < 0 ? -1 : s;
 		} else {
 			s = sock_l4(c, AF_UNSPEC, EPOLL_TYPE_UDP_LISTEN,
 				    &in4addr_loopback, ifname, port, uref.u32);
 			udp_splice_ns[V4][port] = s < 0 ? -1 : s;
 			udp_splice_ns[V6][port] = s < 0 ? -1 : s;
 		}
 		if (IN_INTERVAL(0, FD_REF_MAX, s))
 			return 0;
 	}
-	if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) {
+	if ((!addr || inany_v4(addr)) && c->ifi4) {
 		if (!ns) {
-			r4 = sock_l4(c, AF_INET, EPOLL_TYPE_UDP_LISTEN,
+			r4 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_HOST,
-				     addr, ifname, port, uref.u32);
+					 addr ? addr : &inany_any4, ifname,
 					 port, uref.u32);
 			udp_splice_init[V4][port] = r4 < 0 ? -1 : r4;
 		} else {
-			r4  = sock_l4(c, AF_INET, EPOLL_TYPE_UDP_LISTEN,
+			r4  = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_SPLICE,
-				      &in4addr_loopback,
+					  &inany_loopback4, ifname,
-				      ifname, port, uref.u32);
+					  port, uref.u32);
 			udp_splice_ns[V4][port] = r4 < 0 ? -1 : r4;
 		}
 	}
-	if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) {
+	if ((!addr || !inany_v4(addr)) && c->ifi6) {
 		if (!ns) {
-			r6 = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP_LISTEN,
+			r6 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_HOST,
-				     addr, ifname, port, uref.u32);
+					 addr ? addr : &inany_any6, ifname,
 					 port, uref.u32);
 			udp_splice_init[V6][port] = r6 < 0 ? -1 : r6;
 		} else {
-			r6 = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP_LISTEN,
+			r6 = pif_sock_l4(c, EPOLL_TYPE_UDP_LISTEN, PIF_SPLICE,
-				     &in6addr_loopback,
+					 &inany_loopback6, ifname,
-				     ifname, port, uref.u32);
+					 port, uref.u32);
 			udp_splice_ns[V6][port] = r6 < 0 ? -1 : r6;
 		}
 	}
@ -881,7 +917,7 @@ static void udp_port_rebind(struct ctx *c, bool outbound)
 		if ((c->ifi4 && socks[V4][port] == -1) ||
 		    (c->ifi6 && socks[V6][port] == -1))
-			udp_sock_init(c, outbound, AF_UNSPEC, NULL, NULL, port);
+			udp_sock_init(c, outbound, NULL, NULL, port);
 	}
 }
--- a/udp.h
+++ b/udp.h
@ -16,8 +16,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
 		    const struct pool *p, int idx, const struct timespec *now);
-int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
+int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
-		  const void *addr, const char *ifname, in_port_t port);
+		  const char *ifname, in_port_t port);
 int udp_init(struct ctx *c);
 void udp_timer(struct ctx *c, const struct timespec *now);
 void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s);
--- a/udp_flow.c
+++ b/udp_flow.c
@ -34,13 +34,16 @@ struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
 	return &flow->udp;
 }
-/**
+/*
 * udp_flow_close() - Close and clean up UDP flow
 * @c:		Execution context
 * @uflow:	UDP flow
 */
 void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
 {
 	if (uflow->closed)
 		return; /* Nothing to do */
 	if (uflow->s[INISIDE] >= 0) {
 		/* The listening socket needs to stay in epoll */
 		close(uflow->s[INISIDE]);
@ -53,12 +56,11 @@ void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
 		close(uflow->s[TGTSIDE]);
 		uflow->s[TGTSIDE] = -1;
 	}
 	uflow->last_errno = 0;
 	flow_hash_remove(c, FLOW_SIDX(uflow, INISIDE));
 	if (!pif_is_socket(uflow->f.pif[TGTSIDE]))
 		flow_hash_remove(c, FLOW_SIDX(uflow, TGTSIDE));
 	uflow->closed = true;
 }
 /**
@ -259,6 +261,17 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 	return udp_flow_new(c, flow, -1, now);
 }
 /**
 * udp_flow_defer() - Deferred per-flow handling (clean up aborted flows)
 * @uflow:	Flow to handle
 *
 * Return: true if the connection is ready to free, false otherwise
 */
 bool udp_flow_defer(const struct udp_flow *uflow)
 {
 	return uflow->closed;
 }
 /**
 * udp_flow_timer() - Handler for timed events related to a given flow
 * @c:		Execution context
--- a/udp_flow.h
+++ b/udp_flow.h
@ -10,6 +10,7 @@
 /**
 * struct udp - Descriptor for a flow of UDP packets
 * @f:		Generic flow information
 * @closed:	Flow is already closed
 * @ts:		Activity timestamp
 * @s:		Socket fd (or -1) for each side of the flow
 */
@ -17,10 +18,9 @@ struct udp_flow {
 	/* Must be first element */
 	struct flow_common f;
 	bool closed :1;
 	time_t ts;
 	int s[SIDES];
 	int last_errno;
 };
 struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
@ -33,6 +33,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 			      in_port_t srcport, in_port_t dstport,
 			      const struct timespec *now);
 void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
 bool udp_flow_defer(const struct udp_flow *uflow);
 bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
 		    const struct timespec *now);
--- a/util.c
+++ b/util.c
@ -28,6 +28,7 @@
 #include <linux/errqueue.h>
 #include <getopt.h>
 #include "linux_dep.h"
 #include "util.h"
 #include "iov.h"
 #include "passt.h"
@ -52,6 +53,7 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
 {
 	sa_family_t af = ((const struct sockaddr *)sa)->sa_family;
 	union epoll_ref ref = { .type = type, .data = data };
 	bool freebind = false;
 	struct epoll_event ev;
 	int fd, y = 1, ret;
 	uint8_t proto;
@ -61,8 +63,11 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
 	case EPOLL_TYPE_TCP_LISTEN:
 		proto = IPPROTO_TCP;
 		socktype = SOCK_STREAM | SOCK_NONBLOCK;
 		freebind = c->freebind;
 		break;
 	case EPOLL_TYPE_UDP_LISTEN:
 		freebind = c->freebind;
 		/* fallthrough */
 	case EPOLL_TYPE_UDP_REPLY:
 		proto = IPPROTO_UDP;
 		socktype = SOCK_DGRAM | SOCK_NONBLOCK;
@ -127,6 +132,18 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
 		}
 	}
 	if (freebind) {
 		int level = af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
 		int opt = af == AF_INET ? IP_FREEBIND : IPV6_FREEBIND;
 		if (setsockopt(fd, level, opt, &y, sizeof(y))) {
 			err_perror("Failed to set %s on socket %i",
 				   af == AF_INET ? "IP_FREEBIND"
 				                 : "IPV6_FREEBIND",
 				   fd);
 		}
 	}
 	if (bind(fd, sa, sl) < 0) {
 		/* We'll fail to bind to low ports if we don't have enough
 		 * capabilities, and we'll fail to bind on already bound ports,
@ -157,58 +174,6 @@ int sock_l4_sa(const struct ctx *c, enum epoll_type type,
 	return fd;
 }
 /**
 * sock_l4() - Create and bind socket for given L4, add to epoll list
 * @c:		Execution context
 * @af:		Address family, AF_INET or AF_INET6
 * @type:	epoll type
 * @bind_addr:	Address for binding, NULL for any
 * @ifname:	Interface for binding, NULL for any
 * @port:	Port, host order
 * @data:	epoll reference portion for protocol handlers
 *
 * Return: newly created socket, negative error code on failure
 */
 int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
 	    const void *bind_addr, const char *ifname, uint16_t port,
 	    uint32_t data)
 {
 	switch (af) {
 	case AF_INET: {
 		struct sockaddr_in addr4 = {
 			.sin_family = AF_INET,
 			.sin_port = htons(port),
 			{ 0 }, { 0 },
 		};
 		if (bind_addr)
 			addr4.sin_addr = *(struct in_addr *)bind_addr;
 		return sock_l4_sa(c, type, &addr4, sizeof(addr4), ifname,
 				  false, data);
 	}
 	case AF_UNSPEC:
 		if (!DUAL_STACK_SOCKETS || bind_addr)
 			 return -EINVAL;
 		/* fallthrough */
 	case AF_INET6: {
 		struct sockaddr_in6 addr6 = {
 			.sin6_family = AF_INET6,
 			.sin6_port = htons(port),
 			0, IN6ADDR_ANY_INIT, 0,
 		};
 		if (bind_addr) {
 			addr6.sin6_addr = *(struct in6_addr *)bind_addr;
 			if (IN6_IS_ADDR_LINKLOCAL(bind_addr))
 				addr6.sin6_scope_id = c->ifi6;
 		}
 		return sock_l4_sa(c, type, &addr6, sizeof(addr6), ifname,
 				  af == AF_INET6, data);
 	}
 	default:
 		return -EINVAL;
 	}
 }
 /**
 * sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed
@ -219,7 +184,8 @@ void sock_probe_mem(struct ctx *c)
 	int v = INT_MAX / 2, s;
 	socklen_t sl;
-	if ((s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+	s = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP);
 	if (s < 0) {
 		c->low_wmem = c->low_rmem = 1;
 		return;
 	}
@ -249,7 +215,7 @@ void sock_probe_mem(struct ctx *c)
 int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b)
 {
 	if (a->tv_nsec < b->tv_nsec) {
-		return (b->tv_nsec - a->tv_nsec) / 1000 +
+		return (a->tv_nsec + 1000000000 - b->tv_nsec) / 1000 +
 		       (a->tv_sec - b->tv_sec - 1) * 1000000;
 	}
@ -443,25 +409,20 @@ void pidfile_write(int fd, pid_t pid)
 }
 /**
- * pidfile_open() - Open PID file if needed
+ * output_file_open() - Open file for output, if needed
- * @path:	Path for PID file, empty string if no PID file is requested
+ * @path:	Path for output file
 * @flags:	Flags for open() other than O_CREAT, O_TRUNC, O_CLOEXEC
 *
- * Return: descriptor for PID file, -1 if path is NULL, won't return on failure
+ * Return: file descriptor on success, -1 on failure with errno set by open()
 */
-int pidfile_open(const char *path)
+int output_file_open(const char *path, int flags)
 {
-	int fd;
+	/* We use O_CLOEXEC here, but clang-tidy as of LLVM 16 to 19 looks for
-
+	 * it in the 'mode' argument if we have one
-	if (!*path)
+	 */
-		return -1;
+	return open(path, O_CREAT | O_TRUNC | O_CLOEXEC | flags,
-
+		    /* NOLINTNEXTLINE(android-cloexec-open) */
-	if ((fd = open(path, O_CREAT | O_TRUNC | O_WRONLY | O_CLOEXEC,
+		    S_IRUSR | S_IWUSR);
 			     S_IRUSR | S_IWUSR)) < 0) {
 		perror("PID file open");
 		exit(EXIT_FAILURE);
 	}
 	return fd;
 }
 /**
@ -485,16 +446,11 @@ int __daemon(int pidfile_fd, int devnull_fd)
 		exit(EXIT_SUCCESS);
 	}
-	errno = 0;
+	if (setsid()				< 0 ||
-
+	    dup2(devnull_fd, STDIN_FILENO)	< 0 ||
-	setsid();
+	    dup2(devnull_fd, STDOUT_FILENO)	< 0 ||
-
+	    dup2(devnull_fd, STDERR_FILENO)	< 0 ||
-	dup2(devnull_fd, STDIN_FILENO);
+	    close(devnull_fd))
 	dup2(devnull_fd, STDOUT_FILENO);
 	dup2(devnull_fd, STDERR_FILENO);
 	close(devnull_fd);
 	if (errno)
 		exit(EXIT_FAILURE);
 	return 0;
@ -582,6 +538,36 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 #endif
 }
 /* write_all_buf() - write all of a buffer to an fd
 * @fd:		File descriptor
 * @buf:	Pointer to base of buffer
 * @len:	Length of buffer
 *
 * Return: 0 on success, -1 on error (with errno set)
 *
 * #syscalls write
 */
 int write_all_buf(int fd, const void *buf, size_t len)
 {
 	const char *p = buf;
 	size_t left = len;
 	while (left) {
 		ssize_t rc;
 		do
 			rc = write(fd, p, left);
 		while ((rc < 0) && errno == EINTR);
 		if (rc < 0)
 			return -1;
 		p += rc;
 		left -= rc;
 	}
 	return 0;
 }
 /* write_remainder() - write the tail of an IO vector to an fd
 * @fd:		File descriptor
 * @iov:	IO vector
@ -590,28 +576,30 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 *
 * Return: 0 on success, -1 on error (with errno set)
 *
- * #syscalls write writev
+ * #syscalls writev
 */
 int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip)
 {
-	size_t offset, i;
+	size_t i = 0, offset;
-	while ((i = iov_skip_bytes(iov, iovcnt, skip, &offset)) < iovcnt) {
+	while ((i += iov_skip_bytes(iov + i, iovcnt - i, skip, &offset)) < iovcnt) {
 		ssize_t rc;
 		if (offset) {
-			rc = write(fd, (char *)iov[i].iov_base + offset,
+			/* Write the remainder of the partially written buffer */
-				   iov[i].iov_len - offset);
+			if (write_all_buf(fd, (char *)iov[i].iov_base + offset,
-		} else {
+					  iov[i].iov_len - offset) < 0)
-			rc = writev(fd, &iov[i], iovcnt - i);
+				return -1;
 			i++;
 		}
 		/* Write as much of the remaining whole buffers as we can */
 		rc = writev(fd, &iov[i], iovcnt - i);
 		if (rc < 0)
 			return -1;
-		skip += rc;
+		skip = rc;
 	}
 	return 0;
 }
@ -750,6 +738,48 @@ void close_open_files(int argc, char **argv)
 			rc = close_range(fd + 1, ~0U, CLOSE_RANGE_UNSHARE);
 	}
-	if (rc)
+	if (rc) {
 		if (errno == ENOSYS || errno == EINVAL) {
 			/* This probably means close_range() or the
 			 * CLOSE_RANGE_UNSHARE flag is not supported by the
 			 * kernel.  Not much we can do here except carry on and
 			 * hope for the best.
 			 */
 			warn(
 "Can't use close_range() to ensure no files leaked by parent");
 		} else {
 			die_perror("Failed to close files leaked by parent");
 		}
 	}
 }
 /**
 * snprintf_check() - snprintf() wrapper, checking for truncation and errors
 * @str:	Output buffer
 * @size:	Maximum size to write to @str
 * @format:	Message
 *
 * Return: false on success, true on truncation or error, sets errno on failure
 */
 bool snprintf_check(char *str, size_t size, const char *format, ...)
 {
 	va_list ap;
 	int rc;
 	va_start(ap, format);
 	rc = vsnprintf(str, size, format, ap);
 	va_end(ap);
 	if (rc < 0) {
 		errno = EIO;
 		return true;
 	}
 	if ((size_t)rc >= size) {
 		errno = ENOBUFS;
 		return true;
 	}
 	return false;
 }
--- a/util.h
+++ b/util.h
@ -11,12 +11,12 @@
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include <signal.h>
 #include <arpa/inet.h>
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <linux/close_range.h>
 #include "log.h"
@ -67,6 +67,15 @@
 #define STRINGIFY(x)	#x
 #define STR(x)		STRINGIFY(x)
 #ifdef CPPCHECK_6936
 /* Some cppcheck versions get confused by aborts inside a loop, causing
 * it to give false positive uninitialised variable warnings later in
 * the function, because it doesn't realise the non-initialising path
 * already exited.  See https://trac.cppcheck.net/ticket/13227
 */
 #define ASSERT(expr)		\
 	((expr) ? (void)0 : abort())
 #else
 #define ASSERT(expr)							\
 	do {								\
 		if (!(expr)) {						\
@ -78,6 +87,7 @@
 			abort();					\
 		}							\
 	} while (0)
 #endif
 #ifdef P_tmpdir
 #define TMPDIR		P_tmpdir
@ -91,6 +101,9 @@
 #define ARRAY_SIZE(a)		((int)(sizeof(a) / sizeof((a)[0])))
 #define foreach(item, array)						\
 	for ((item) = (array); (item) - (array) < ARRAY_SIZE(array); (item)++)
 #define IN_INTERVAL(a, b, x)	((x) >= (a) && (x) <= (b))
 #define FD_PROTO(x, proto)						\
 	(IN_INTERVAL(c->proto.fd_min, c->proto.fd_max, (x)))
@ -131,7 +144,7 @@ static inline uint32_t ntohl_unaligned(const void *p)
 	return ntohl(val);
 }
-#define NS_FN_STACK_SIZE	(RLIMIT_STACK_VAL * 1024 / 8)
+#define NS_FN_STACK_SIZE	(1024 * 1024) /* 1MiB */
 int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 	     void *arg);
 #define NS_CALL(fn, arg)						\
@ -144,9 +157,9 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 			 (void *)(arg));				\
 	} while (0)
-#define RCVBUF_BIG		(2UL * 1024 * 1024)
+#define RCVBUF_BIG		(2ULL * 1024 * 1024)
-#define SNDBUF_BIG		(4UL * 1024 * 1024)
+#define SNDBUF_BIG		(4ULL * 1024 * 1024)
-#define SNDBUF_SMALL		(128UL * 1024)
+#define SNDBUF_SMALL		(128ULL * 1024)
 #include <net/if.h>
 #include <limits.h>
@ -157,33 +170,9 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 struct ctx;
 /* cppcheck-suppress funcArgNamesDifferent */
 __attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); }
 #ifdef CLOSE_RANGE_UNSHARE	/* Linux kernel >= 5.9 */
 /* glibc < 2.34 and musl as of 1.2.5 need these */
 #ifndef SYS_close_range
 #define SYS_close_range		436
 #endif
 __attribute__ ((weak))
 /* cppcheck-suppress funcArgNamesDifferent */
 int close_range(unsigned int first, unsigned int last, int flags) {
 	return syscall(SYS_close_range, first, last, flags);
 }
 #else
 /* No reasonable fallback option */
 /* cppcheck-suppress funcArgNamesDifferent */
 int close_range(unsigned int first, unsigned int last, int flags) {
 	return 0;
 }
 #endif
 int sock_l4_sa(const struct ctx *c, enum epoll_type type,
 	       const void *sa, socklen_t sl,
 	       const char *ifname, bool v6only, uint32_t data);
 int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
 	    const void *bind_addr, const char *ifname, uint16_t port,
 	    uint32_t data);
 void sock_probe_mem(struct ctx *c);
 long timespec_diff_ms(const struct timespec *a, const struct timespec *b);
 int64_t timespec_diff_us(const struct timespec *a, const struct timespec *b);
@ -195,13 +184,15 @@ char *line_read(char *buf, size_t len, int fd);
 void ns_enter(const struct ctx *c);
 bool ns_is_init(void);
 int open_in_ns(const struct ctx *c, const char *path, int flags);
-int pidfile_open(const char *path);
+int output_file_open(const char *path, int flags);
 void pidfile_write(int fd, pid_t pid);
 int __daemon(int pidfile_fd, int devnull_fd);
 int fls(unsigned long x);
 int write_file(const char *path, const char *buf);
 int write_all_buf(int fd, const void *buf, size_t len);
 int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip);
 void close_open_files(int argc, char **argv);
 bool snprintf_check(char *str, size_t size, const char *format, ...);
 /**
 * af_name() - Return name of an address family
@ -269,6 +260,9 @@ static inline bool mod_between(unsigned x, unsigned i, unsigned j, unsigned m)
 	return mod_sub(x, i, m) < mod_sub(j, i, m);
 }
 /* FPRINTF() intentionally silences cert-err33-c clang-tidy warnings */
 #define FPRINTF(f, ...)	(void)fprintf(f, __VA_ARGS__)
 /*
 * Workarounds for https://github.com/llvm/llvm-project/issues/58992
 *