diff --git a/Makefile b/Makefile index 080c748..667ddfb 100644 --- a/Makefile +++ b/Makefile @@ -56,27 +56,6 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \ tcp_splice.h udp.h util.h HEADERS = $(PASST_HEADERS) seccomp.h -# On gcc 11 and 12, with -O2 and -flto, tcp_hash() and siphash_20b(), if -# inlined, seem to be hitting something similar to: -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78993 -# from the pointer arithmetic used from the tcp_tap_handler() path to get the -# remote connection address. -# -# TODO: With the same combination, in ndp(), gcc optimises away the store of -# hop_limit in the IPv6 header (temporarily set to the protocol number for -# convenience, to mimic the ICMPv6 checksum pseudo-header) before the call to -# csum_unaligned(). Mark csum_unaligned() as "noipa" as a quick work-around, -# while we figure out if a corresponding gcc issue has already been reported. -ifeq (,$(filter-out 11 12, $(shell $(CC) -dumpversion))) -ifneq (,$(filter -flto%,$(FLAGS) $(CFLAGS) $(CPPFLAGS))) -ifneq (,$(filter -O2,$(FLAGS) $(CFLAGS) $(CPPFLAGS))) - FLAGS += -DTCP_HASH_NOINLINE - FLAGS += -DSIPHASH_20B_NOINLINE - FLAGS += -DCSUM_UNALIGNED_NO_IPA -endif -endif -endif - C := \#include \nstruct tcp_info x = { .tcpi_snd_wnd = 0 }; ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0) FLAGS += -DHAS_SND_WND diff --git a/checksum.c b/checksum.c index 29769d9..9631f91 100644 --- a/checksum.c +++ b/checksum.c @@ -69,6 +69,8 @@ * * Return: 32-bit sum of 16-bit words */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */ uint32_t sum_16b(const void *buf, size_t len) { const uint16_t *p = buf; @@ -107,9 +109,8 @@ uint16_t csum_fold(uint32_t sum) * * Return: 16-bit IPv4-style checksum */ -#if CSUM_UNALIGNED_NO_IPA -__attribute__((__noipa__)) /* See comment in Makefile */ -#endif +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */ uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init) { return (uint16_t)~csum_fold(sum_16b(buf, len) + init); @@ -245,6 +246,8 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, * - sum_a/sum_b unpacking is interleaved and not sequential to reduce stalls * - coding style adaptation */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */ static uint32_t csum_avx2(const void *buf, size_t len, uint32_t init) { __m256i a, b, sum256, sum_a_hi, sum_a_lo, sum_b_hi, sum_b_lo, c, d; @@ -391,6 +394,8 @@ less_than_128_bytes: * * Return: 16-bit folded, complemented checksum sum */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */ uint16_t csum(const void *buf, size_t len, uint32_t init) { return (uint16_t)~csum_fold(csum_avx2(buf, len, init)); @@ -406,6 +411,8 @@ uint16_t csum(const void *buf, size_t len, uint32_t init) * * Return: 16-bit folded, complemented checksum */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */ uint16_t csum(const void *buf, size_t len, uint32_t init) { return csum_unaligned(buf, len, init); diff --git a/siphash.c b/siphash.c index 811918b..e8b144d 100644 --- a/siphash.c +++ b/siphash.c @@ -104,6 +104,17 @@ * * Return: the 64-bit hash output */ +/* Type-Based Alias Analysis (TBAA) optimisation in gcc 11 and 12 (-flto -O2) + * makes these functions essentially useless by allowing reordering of stores of + * input data across function calls. Not even declaring @in as char pointer is + * enough: disable gcc's interpretation of strict aliasing altogether. See also: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106706 + * https://stackoverflow.com/questions/2958633/gcc-strict-aliasing-and-horror-stories + * https://lore.kernel.org/all/alpine.LFD.2.00.0901121128080.6528__33422.5328093909$1232291247$gmane$org@localhost.localdomain/ + */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* cppcheck-suppress unusedFunction */ uint64_t siphash_8b(const uint8_t *in, const uint64_t *k) { @@ -123,6 +134,8 @@ uint64_t siphash_8b(const uint8_t *in, const uint64_t *k) * * Return: 32 bits obtained by XORing the two halves of the 64-bit hash output */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */ /* cppcheck-suppress unusedFunction */ uint32_t siphash_12b(const uint8_t *in, const uint64_t *k) { @@ -148,9 +161,8 @@ uint32_t siphash_12b(const uint8_t *in, const uint64_t *k) * * Return: the 64-bit hash output */ -#if SIPHASH_20B_NOINLINE -__attribute__((__noinline__)) /* See comment in Makefile */ -#endif +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */ uint64_t siphash_20b(const uint8_t *in, const uint64_t *k) { uint32_t *in32 = (uint32_t *)in; @@ -179,6 +191,8 @@ uint64_t siphash_20b(const uint8_t *in, const uint64_t *k) * * Return: the 64-bit hash output */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */ /* cppcheck-suppress unusedFunction */ uint32_t siphash_32b(const uint8_t *in, const uint64_t *k) { @@ -205,6 +219,8 @@ uint32_t siphash_32b(const uint8_t *in, const uint64_t *k) * * Return: 32 bits obtained by XORing the two halves of the 64-bit hash output */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */ uint32_t siphash_36b(const uint8_t *in, const uint64_t *k) { uint32_t *in32 = (uint32_t *)in; diff --git a/tcp.c b/tcp.c index 21c319d..cbd537e 100644 --- a/tcp.c +++ b/tcp.c @@ -1182,12 +1182,6 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn, * * Return: hash value, already modulo size of the hash table */ -#if TCP_HASH_NOINLINE -__attribute__((__noinline__)) /* See comment in Makefile */ -#endif -__attribute__((optimize("O0"))) /* TODO: with -O2 and -flto on gcc 12.2, - * siphash_20b() doesn't see 'addr', why? - */ static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *addr, in_port_t tap_port, in_port_t sock_port) {