treewide: Disable gcc strict aliasing rules as needed, drop workarounds

Recently, commit 4ddbcb9c0c ("tcp: Disable optimisations for tcp_hash()") worked around yet another issue we hit with gcc 12 and '-flto -O2': some stores affecting the input data to siphash_20b() were omitted altogether, and tcp_hash() wouldn't get the correct hash for incoming connections. Digging further into this revealed that, at least according to gcc's interpretation of C99 aliasing rules, passing pointers to functions with different types compared to the effective type of the object (for example, a uint8_t pointer to an anonymous struct, as it happens in tcp_hash()), doesn't guarantee that stores are not reordered across the function call. This means that, in general, our checksum and hash functions might not see parts of input data that was intended to be provided by callers. Not even switching from uint8_t to character types, which should be appropriate here, according to C99 (ISO/IEC 9899, TC3, draft N1256), section 6.5, "Expressions", paragraph 7: An object shall have its stored value accessed only by an lvalue expression that has one of the following types: [...] — a character type. does the trick. I guess this is also subject to interpretation: casting passed pointers to character types, and then using those as different types, might still violate (dubious) aliasing rules. Disable gcc strict aliasing rules for potentially affected functions, which, in turn, disables gcc's Type-Based Alias Analysis (TBAA) optimisations based on those function arguments. Drop the existing workarounds. Also the (seemingly?) bogus 'maybe-uninitialized' warning on the tcp_tap_handler() > tcp_hash() > siphash_20b() path goes away with -fno-strict-aliasing on siphash_20b(). Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
2025-04-29 14:48:50 +02:00 · 2023-02-27 01:57:36 +01:00 · 2023-02-27 01:57:36 +01:00 · a48c5c2abf
commit a48c5c2abf
parent da46fdac36
4 changed files with 29 additions and 33 deletions
--- a/21
+++ b/21
@ -56,27 +56,6 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \
 	tcp_splice.h udp.h util.h
 HEADERS = $(PASST_HEADERS) seccomp.h
 # On gcc 11 and 12, with -O2 and -flto, tcp_hash() and siphash_20b(), if
 # inlined, seem to be hitting something similar to:
 #	https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78993
 # from the pointer arithmetic used from the tcp_tap_handler() path to get the
 # remote connection address.
 #
 # TODO: With the same combination, in ndp(), gcc optimises away the store of
 # hop_limit in the IPv6 header (temporarily set to the protocol number for
 # convenience, to mimic the ICMPv6 checksum pseudo-header) before the call to
 # csum_unaligned(). Mark csum_unaligned() as "noipa" as a quick work-around,
 # while we figure out if a corresponding gcc issue has already been reported.
 ifeq (,$(filter-out 11 12, $(shell $(CC) -dumpversion)))
 ifneq (,$(filter -flto%,$(FLAGS) $(CFLAGS) $(CPPFLAGS)))
 ifneq (,$(filter -O2,$(FLAGS) $(CFLAGS) $(CPPFLAGS)))
 	FLAGS += -DTCP_HASH_NOINLINE
 	FLAGS += -DSIPHASH_20B_NOINLINE
 	FLAGS += -DCSUM_UNALIGNED_NO_IPA
 endif
 endif
 endif
 C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
 ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
 	FLAGS += -DHAS_SND_WND
--- a/checksum.c
+++ b/checksum.c
@ -69,6 +69,8 @@
 *
 * Return: 32-bit sum of 16-bit words
 */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))	/* See siphash_8b() */
 uint32_t sum_16b(const void *buf, size_t len)
 {
 	const uint16_t *p = buf;
@ -107,9 +109,8 @@ uint16_t csum_fold(uint32_t sum)
 *
 * Return: 16-bit IPv4-style checksum
 */
-#if CSUM_UNALIGNED_NO_IPA
+/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
-__attribute__((__noipa__))	/* See comment in Makefile */
+__attribute__((optimize("-fno-strict-aliasing")))	/* See siphash_8b() */
 #endif
 uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
 {
 	return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
@ -245,6 +246,8 @@ void csum_icmp6(struct icmp6hdr *icmp6hr,
 * - sum_a/sum_b unpacking is interleaved and not sequential to reduce stalls
 * - coding style adaptation
 */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))	/* See siphash_8b() */
 static uint32_t csum_avx2(const void *buf, size_t len, uint32_t init)
 {
 	__m256i a, b, sum256, sum_a_hi, sum_a_lo, sum_b_hi, sum_b_lo, c, d;
@ -391,6 +394,8 @@ less_than_128_bytes:
 *
 * Return: 16-bit folded, complemented checksum sum
 */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))	/* See siphash_8b() */
 uint16_t csum(const void *buf, size_t len, uint32_t init)
 {
 	return (uint16_t)~csum_fold(csum_avx2(buf, len, init));
@ -406,6 +411,8 @@ uint16_t csum(const void *buf, size_t len, uint32_t init)
 *
 * Return: 16-bit folded, complemented checksum
 */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))	/* See siphash_8b() */
 uint16_t csum(const void *buf, size_t len, uint32_t init)
 {
 	return csum_unaligned(buf, len, init);
--- a/siphash.c
+++ b/siphash.c
@ -104,6 +104,17 @@
 *
 * Return: the 64-bit hash output
 */
 /* Type-Based Alias Analysis (TBAA) optimisation in gcc 11 and 12 (-flto -O2)
 * makes these functions essentially useless by allowing reordering of stores of
 * input data across function calls. Not even declaring @in as char pointer is
 * enough: disable gcc's interpretation of strict aliasing altogether. See also:
 *
 *	https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106706
 *	https://stackoverflow.com/questions/2958633/gcc-strict-aliasing-and-horror-stories
 *	https://lore.kernel.org/all/alpine.LFD.2.00.0901121128080.6528__33422.5328093909$1232291247$gmane$org@localhost.localdomain/
 */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))
 /* cppcheck-suppress unusedFunction */
 uint64_t siphash_8b(const uint8_t *in, const uint64_t *k)
 {
@ -123,6 +134,8 @@ uint64_t siphash_8b(const uint8_t *in, const uint64_t *k)
 *
 * Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
 */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))	/* See siphash_8b() */
 /* cppcheck-suppress unusedFunction */
 uint32_t siphash_12b(const uint8_t *in, const uint64_t *k)
 {
@ -148,9 +161,8 @@ uint32_t siphash_12b(const uint8_t *in, const uint64_t *k)
 *
 * Return: the 64-bit hash output
 */
-#if SIPHASH_20B_NOINLINE
+/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
-__attribute__((__noinline__))	/* See comment in Makefile */
+__attribute__((optimize("-fno-strict-aliasing")))	/* See siphash_8b() */
 #endif
 uint64_t siphash_20b(const uint8_t *in, const uint64_t *k)
 {
 	uint32_t *in32 = (uint32_t *)in;
@ -179,6 +191,8 @@ uint64_t siphash_20b(const uint8_t *in, const uint64_t *k)
 *
 * Return: the 64-bit hash output
 */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))	/* See siphash_8b() */
 /* cppcheck-suppress unusedFunction */
 uint32_t siphash_32b(const uint8_t *in, const uint64_t *k)
 {
@ -205,6 +219,8 @@ uint32_t siphash_32b(const uint8_t *in, const uint64_t *k)
 *
 * Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
 */
 /* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
 __attribute__((optimize("-fno-strict-aliasing")))	/* See siphash_8b() */
 uint32_t siphash_36b(const uint8_t *in, const uint64_t *k)
 {
 	uint32_t *in32 = (uint32_t *)in;
--- a/tcp.c
+++ b/tcp.c
@ -1182,12 +1182,6 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn,
 *
 * Return: hash value, already modulo size of the hash table
 */
 #if TCP_HASH_NOINLINE
 __attribute__((__noinline__))	/* See comment in Makefile */
 #endif
 __attribute__((optimize("O0")))	/* TODO: with -O2 and -flto on gcc 12.2,
 				 * siphash_20b() doesn't see 'addr', why?
 				 */
 static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *addr,
 			     in_port_t tap_port, in_port_t sock_port)
 {