treewide: Disable gcc strict aliasing rules as needed, drop workarounds
Recently, commit 4ddbcb9c0c
("tcp: Disable optimisations
for tcp_hash()") worked around yet another issue we hit with gcc 12
and '-flto -O2': some stores affecting the input data to siphash_20b()
were omitted altogether, and tcp_hash() wouldn't get the correct hash
for incoming connections.
Digging further into this revealed that, at least according to gcc's
interpretation of C99 aliasing rules, passing pointers to functions
with different types compared to the effective type of the object
(for example, a uint8_t pointer to an anonymous struct, as it happens
in tcp_hash()), doesn't guarantee that stores are not reordered
across the function call.
This means that, in general, our checksum and hash functions might
not see parts of input data that was intended to be provided by
callers.
Not even switching from uint8_t to character types, which should be
appropriate here, according to C99 (ISO/IEC 9899, TC3, draft N1256),
section 6.5, "Expressions", paragraph 7:
An object shall have its stored value accessed only by an lvalue
expression that has one of the following types:
[...]
— a character type.
does the trick. I guess this is also subject to interpretation:
casting passed pointers to character types, and then using those as
different types, might still violate (dubious) aliasing rules.
Disable gcc strict aliasing rules for potentially affected functions,
which, in turn, disables gcc's Type-Based Alias Analysis (TBAA)
optimisations based on those function arguments.
Drop the existing workarounds. Also the (seemingly?) bogus
'maybe-uninitialized' warning on the tcp_tap_handler() > tcp_hash() >
siphash_20b() path goes away with -fno-strict-aliasing on
siphash_20b().
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
This commit is contained in:
parent
da46fdac36
commit
a48c5c2abf
4 changed files with 29 additions and 33 deletions
21
Makefile
21
Makefile
|
@ -56,27 +56,6 @@ PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h icmp.h \
|
||||||
tcp_splice.h udp.h util.h
|
tcp_splice.h udp.h util.h
|
||||||
HEADERS = $(PASST_HEADERS) seccomp.h
|
HEADERS = $(PASST_HEADERS) seccomp.h
|
||||||
|
|
||||||
# On gcc 11 and 12, with -O2 and -flto, tcp_hash() and siphash_20b(), if
|
|
||||||
# inlined, seem to be hitting something similar to:
|
|
||||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78993
|
|
||||||
# from the pointer arithmetic used from the tcp_tap_handler() path to get the
|
|
||||||
# remote connection address.
|
|
||||||
#
|
|
||||||
# TODO: With the same combination, in ndp(), gcc optimises away the store of
|
|
||||||
# hop_limit in the IPv6 header (temporarily set to the protocol number for
|
|
||||||
# convenience, to mimic the ICMPv6 checksum pseudo-header) before the call to
|
|
||||||
# csum_unaligned(). Mark csum_unaligned() as "noipa" as a quick work-around,
|
|
||||||
# while we figure out if a corresponding gcc issue has already been reported.
|
|
||||||
ifeq (,$(filter-out 11 12, $(shell $(CC) -dumpversion)))
|
|
||||||
ifneq (,$(filter -flto%,$(FLAGS) $(CFLAGS) $(CPPFLAGS)))
|
|
||||||
ifneq (,$(filter -O2,$(FLAGS) $(CFLAGS) $(CPPFLAGS)))
|
|
||||||
FLAGS += -DTCP_HASH_NOINLINE
|
|
||||||
FLAGS += -DSIPHASH_20B_NOINLINE
|
|
||||||
FLAGS += -DCSUM_UNALIGNED_NO_IPA
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
|
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
|
||||||
ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
|
ifeq ($(shell printf "$(C)" | $(CC) -S -xc - -o - >/dev/null 2>&1; echo $$?),0)
|
||||||
FLAGS += -DHAS_SND_WND
|
FLAGS += -DHAS_SND_WND
|
||||||
|
|
13
checksum.c
13
checksum.c
|
@ -69,6 +69,8 @@
|
||||||
*
|
*
|
||||||
* Return: 32-bit sum of 16-bit words
|
* Return: 32-bit sum of 16-bit words
|
||||||
*/
|
*/
|
||||||
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
|
__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */
|
||||||
uint32_t sum_16b(const void *buf, size_t len)
|
uint32_t sum_16b(const void *buf, size_t len)
|
||||||
{
|
{
|
||||||
const uint16_t *p = buf;
|
const uint16_t *p = buf;
|
||||||
|
@ -107,9 +109,8 @@ uint16_t csum_fold(uint32_t sum)
|
||||||
*
|
*
|
||||||
* Return: 16-bit IPv4-style checksum
|
* Return: 16-bit IPv4-style checksum
|
||||||
*/
|
*/
|
||||||
#if CSUM_UNALIGNED_NO_IPA
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
__attribute__((__noipa__)) /* See comment in Makefile */
|
__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */
|
||||||
#endif
|
|
||||||
uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
|
uint16_t csum_unaligned(const void *buf, size_t len, uint32_t init)
|
||||||
{
|
{
|
||||||
return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
|
return (uint16_t)~csum_fold(sum_16b(buf, len) + init);
|
||||||
|
@ -245,6 +246,8 @@ void csum_icmp6(struct icmp6hdr *icmp6hr,
|
||||||
* - sum_a/sum_b unpacking is interleaved and not sequential to reduce stalls
|
* - sum_a/sum_b unpacking is interleaved and not sequential to reduce stalls
|
||||||
* - coding style adaptation
|
* - coding style adaptation
|
||||||
*/
|
*/
|
||||||
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
|
__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */
|
||||||
static uint32_t csum_avx2(const void *buf, size_t len, uint32_t init)
|
static uint32_t csum_avx2(const void *buf, size_t len, uint32_t init)
|
||||||
{
|
{
|
||||||
__m256i a, b, sum256, sum_a_hi, sum_a_lo, sum_b_hi, sum_b_lo, c, d;
|
__m256i a, b, sum256, sum_a_hi, sum_a_lo, sum_b_hi, sum_b_lo, c, d;
|
||||||
|
@ -391,6 +394,8 @@ less_than_128_bytes:
|
||||||
*
|
*
|
||||||
* Return: 16-bit folded, complemented checksum sum
|
* Return: 16-bit folded, complemented checksum sum
|
||||||
*/
|
*/
|
||||||
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
|
__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */
|
||||||
uint16_t csum(const void *buf, size_t len, uint32_t init)
|
uint16_t csum(const void *buf, size_t len, uint32_t init)
|
||||||
{
|
{
|
||||||
return (uint16_t)~csum_fold(csum_avx2(buf, len, init));
|
return (uint16_t)~csum_fold(csum_avx2(buf, len, init));
|
||||||
|
@ -406,6 +411,8 @@ uint16_t csum(const void *buf, size_t len, uint32_t init)
|
||||||
*
|
*
|
||||||
* Return: 16-bit folded, complemented checksum
|
* Return: 16-bit folded, complemented checksum
|
||||||
*/
|
*/
|
||||||
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
|
__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */
|
||||||
uint16_t csum(const void *buf, size_t len, uint32_t init)
|
uint16_t csum(const void *buf, size_t len, uint32_t init)
|
||||||
{
|
{
|
||||||
return csum_unaligned(buf, len, init);
|
return csum_unaligned(buf, len, init);
|
||||||
|
|
22
siphash.c
22
siphash.c
|
@ -104,6 +104,17 @@
|
||||||
*
|
*
|
||||||
* Return: the 64-bit hash output
|
* Return: the 64-bit hash output
|
||||||
*/
|
*/
|
||||||
|
/* Type-Based Alias Analysis (TBAA) optimisation in gcc 11 and 12 (-flto -O2)
|
||||||
|
* makes these functions essentially useless by allowing reordering of stores of
|
||||||
|
* input data across function calls. Not even declaring @in as char pointer is
|
||||||
|
* enough: disable gcc's interpretation of strict aliasing altogether. See also:
|
||||||
|
*
|
||||||
|
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106706
|
||||||
|
* https://stackoverflow.com/questions/2958633/gcc-strict-aliasing-and-horror-stories
|
||||||
|
* https://lore.kernel.org/all/alpine.LFD.2.00.0901121128080.6528__33422.5328093909$1232291247$gmane$org@localhost.localdomain/
|
||||||
|
*/
|
||||||
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
|
__attribute__((optimize("-fno-strict-aliasing")))
|
||||||
/* cppcheck-suppress unusedFunction */
|
/* cppcheck-suppress unusedFunction */
|
||||||
uint64_t siphash_8b(const uint8_t *in, const uint64_t *k)
|
uint64_t siphash_8b(const uint8_t *in, const uint64_t *k)
|
||||||
{
|
{
|
||||||
|
@ -123,6 +134,8 @@ uint64_t siphash_8b(const uint8_t *in, const uint64_t *k)
|
||||||
*
|
*
|
||||||
* Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
|
* Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
|
||||||
*/
|
*/
|
||||||
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
|
__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */
|
||||||
/* cppcheck-suppress unusedFunction */
|
/* cppcheck-suppress unusedFunction */
|
||||||
uint32_t siphash_12b(const uint8_t *in, const uint64_t *k)
|
uint32_t siphash_12b(const uint8_t *in, const uint64_t *k)
|
||||||
{
|
{
|
||||||
|
@ -148,9 +161,8 @@ uint32_t siphash_12b(const uint8_t *in, const uint64_t *k)
|
||||||
*
|
*
|
||||||
* Return: the 64-bit hash output
|
* Return: the 64-bit hash output
|
||||||
*/
|
*/
|
||||||
#if SIPHASH_20B_NOINLINE
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
__attribute__((__noinline__)) /* See comment in Makefile */
|
__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */
|
||||||
#endif
|
|
||||||
uint64_t siphash_20b(const uint8_t *in, const uint64_t *k)
|
uint64_t siphash_20b(const uint8_t *in, const uint64_t *k)
|
||||||
{
|
{
|
||||||
uint32_t *in32 = (uint32_t *)in;
|
uint32_t *in32 = (uint32_t *)in;
|
||||||
|
@ -179,6 +191,8 @@ uint64_t siphash_20b(const uint8_t *in, const uint64_t *k)
|
||||||
*
|
*
|
||||||
* Return: the 64-bit hash output
|
* Return: the 64-bit hash output
|
||||||
*/
|
*/
|
||||||
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
|
__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */
|
||||||
/* cppcheck-suppress unusedFunction */
|
/* cppcheck-suppress unusedFunction */
|
||||||
uint32_t siphash_32b(const uint8_t *in, const uint64_t *k)
|
uint32_t siphash_32b(const uint8_t *in, const uint64_t *k)
|
||||||
{
|
{
|
||||||
|
@ -205,6 +219,8 @@ uint32_t siphash_32b(const uint8_t *in, const uint64_t *k)
|
||||||
*
|
*
|
||||||
* Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
|
* Return: 32 bits obtained by XORing the two halves of the 64-bit hash output
|
||||||
*/
|
*/
|
||||||
|
/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */
|
||||||
|
__attribute__((optimize("-fno-strict-aliasing"))) /* See siphash_8b() */
|
||||||
uint32_t siphash_36b(const uint8_t *in, const uint64_t *k)
|
uint32_t siphash_36b(const uint8_t *in, const uint64_t *k)
|
||||||
{
|
{
|
||||||
uint32_t *in32 = (uint32_t *)in;
|
uint32_t *in32 = (uint32_t *)in;
|
||||||
|
|
6
tcp.c
6
tcp.c
|
@ -1182,12 +1182,6 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn,
|
||||||
*
|
*
|
||||||
* Return: hash value, already modulo size of the hash table
|
* Return: hash value, already modulo size of the hash table
|
||||||
*/
|
*/
|
||||||
#if TCP_HASH_NOINLINE
|
|
||||||
__attribute__((__noinline__)) /* See comment in Makefile */
|
|
||||||
#endif
|
|
||||||
__attribute__((optimize("O0"))) /* TODO: with -O2 and -flto on gcc 12.2,
|
|
||||||
* siphash_20b() doesn't see 'addr', why?
|
|
||||||
*/
|
|
||||||
static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *addr,
|
static unsigned int tcp_hash(const struct ctx *c, const union inany_addr *addr,
|
||||||
in_port_t tap_port, in_port_t sock_port)
|
in_port_t tap_port, in_port_t sock_port)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue