diff options
| author | Adam Stylinski <kungfujesus06@gmail.com> | 2024-12-21 11:04:47 -0500 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2024-12-23 14:06:35 +0100 |
| commit | 06bba674706a238e4b7bee49cf76911851bf8f0c (patch) | |
| tree | 5fa2c7284b232aa97ee0999f2bf50a696cb59bc0 | |
| parent | 87d8e9540844ff7afd7d466d4e218544578b248b (diff) | |
| download | Project-Tick-06bba674706a238e4b7bee49cf76911851bf8f0c.tar.gz Project-Tick-06bba674706a238e4b7bee49cf76911851bf8f0c.zip | |
Fix unaligned access in ACLE based crc32
This fixes a rightful complaint from the alignment sanitizer that we
alias memory in an unaligned fashion. A nice added bonus is that this
improves performance a tiny bit on the larger buffers, perhaps due to
loops that idiomatically decrement a count and increment a single buffer
pointer rather than the maze of conditional pointer reassignments.
While here, let's write a unit test just for this. Since this is the only
variant that accesses memory in a potentially unaligned fashion that doesn't
explicitly go byte by byte or use intrinsics that don't require alignment,
we'll enable it only for this function for now. Adding more tests later if
need be should be possible. For everything else not crc, we're relying on
ubsan to hopefully catch things by chance.
| -rw-r--r-- | arch/arm/crc32_acle.c | 42 | ||||
| -rw-r--r-- | test/test_crc32.cc | 34 |
2 files changed, 54 insertions, 22 deletions
diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c index 116bcab1c2..a3a1fefc03 100644 --- a/arch/arm/crc32_acle.c +++ b/arch/arm/crc32_acle.c @@ -11,9 +11,9 @@ Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) { Z_REGISTER uint32_t c; - Z_REGISTER const uint16_t *buf2; - Z_REGISTER const uint32_t *buf4; - Z_REGISTER const uint64_t *buf8; + Z_REGISTER uint16_t buf2; + Z_REGISTER uint32_t buf4; + Z_REGISTER uint64_t buf8; c = ~crc; @@ -29,45 +29,43 @@ Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, si len--; } - if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { - buf2 = (const uint16_t *) buf; - c = __crc32h(c, *buf2++); + if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & (sizeof(uint32_t) - 1))) { + buf2 = *((uint16_t*)buf); + c = __crc32h(c, buf2); + buf += sizeof(uint16_t); len -= sizeof(uint16_t); - buf4 = (const uint32_t *) buf2; - } else { - buf4 = (const uint32_t *) buf; } - if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { - c = __crc32w(c, *buf4++); + if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & (sizeof(uint64_t) - 1))) { + buf4 = *((uint32_t*)buf); + c = __crc32w(c, buf4); len -= sizeof(uint32_t); + buf += sizeof(uint32_t); } - buf8 = (const uint64_t *) buf4; - } else { - buf8 = (const uint64_t *) buf; } while (len >= sizeof(uint64_t)) { - c = __crc32d(c, *buf8++); + buf8 = *((uint64_t*)buf); + c = __crc32d(c, buf8); len -= sizeof(uint64_t); + buf += sizeof(uint64_t); } if (len >= sizeof(uint32_t)) { - buf4 = (const uint32_t *) buf8; - c = __crc32w(c, *buf4++); + buf4 = *((uint32_t*)buf); + c = __crc32w(c, buf4); len -= sizeof(uint32_t); - buf2 = (const uint16_t *) buf4; - } else { - buf2 = (const uint16_t *) buf8; + buf += sizeof(uint32_t); } if (len >= sizeof(uint16_t)) { - c = __crc32h(c, *buf2++); + buf2 = *((uint16_t*)buf); + c = __crc32h(c, buf2); len -= sizeof(uint16_t); + buf += sizeof(uint16_t); } - buf = (const unsigned char *) buf2; if (len) { c = __crc32b(c, *buf); } diff --git a/test/test_crc32.cc b/test/test_crc32.cc index 245a92fb94..ea839383df 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -8,6 +8,8 @@ #include <stdio.h> #include <string.h> #include <stdlib.h> +#include "zutil.h" +#include "zutil_p.h" extern "C" { # include "zbuild.h" @@ -195,6 +197,22 @@ public: } }; +/* Specifically to test where we had dodgy alignment in the acle CRC32 + * function. All others are either byte level access or use intrinsics + * that work with unaligned access */ +class crc32_align : public ::testing::TestWithParam<int> { +public: + void hash(int param, crc32_func crc32) { + uint8_t *buf = (uint8_t*)zng_alloc(sizeof(uint8_t) * (128 + param)); + if (buf != NULL) { + (void)crc32(0, buf + param, 128); + } else { + FAIL(); + } + zng_free(buf); + } +}; + INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests)); #define TEST_CRC32(name, func, support_flag) \ @@ -210,10 +228,26 @@ TEST_CRC32(braid, PREFIX(crc32_braid), 1) #ifdef DISABLE_RUNTIME_CPU_DETECTION TEST_CRC32(native, native_crc32, 1) + #else #ifdef ARM_ACLE +static const int align_offsets[] = { + 1, 2, 3, 4, 5, 6, 7 +}; + +#define TEST_CRC32_ALIGN(name, func, support_flag) \ + TEST_P(crc32_align, name) { \ + if (!(support_flag)) { \ + GTEST_SKIP(); \ + return; \ + } \ + hash(GetParam(), func); \ + } + +INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_offsets)); TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32) +TEST_CRC32_ALIGN(acle_align, crc32_acle, test_cpu_features.arm.has_crc32) #endif #ifdef POWER8_VSX_CRC32 TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07) |
