summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Stylinski <kungfujesus06@gmail.com>2024-12-21 11:04:47 -0500
committerHans Kristian Rosbach <hk-github@circlestorm.org>2024-12-23 14:06:35 +0100
commit06bba674706a238e4b7bee49cf76911851bf8f0c (patch)
tree5fa2c7284b232aa97ee0999f2bf50a696cb59bc0
parent87d8e9540844ff7afd7d466d4e218544578b248b (diff)
downloadProject-Tick-06bba674706a238e4b7bee49cf76911851bf8f0c.tar.gz
Project-Tick-06bba674706a238e4b7bee49cf76911851bf8f0c.zip
Fix unaligned access in ACLE based crc32
This fixes a rightful complaint from the alignment sanitizer that we alias memory in an unaligned fashion. A nice added bonus is that this improves performance a tiny bit on the larger buffers, perhaps due to loops that idiomatically decrement a count and increment a single buffer pointer rather than the maze of conditional pointer reassignments. While here, let's write a unit test just for this. Since this is the only variant that accesses memory in a potentially unaligned fashion that doesn't explicitly go byte by byte or use intrinsics that don't require alignment, we'll enable it only for this function for now. Adding more tests later if need be should be possible. For everything else not crc, we're relying on ubsan to hopefully catch things by chance.
-rw-r--r--arch/arm/crc32_acle.c42
-rw-r--r--test/test_crc32.cc34
2 files changed, 54 insertions, 22 deletions
diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c
index 116bcab1c2..a3a1fefc03 100644
--- a/arch/arm/crc32_acle.c
+++ b/arch/arm/crc32_acle.c
@@ -11,9 +11,9 @@
Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) {
Z_REGISTER uint32_t c;
- Z_REGISTER const uint16_t *buf2;
- Z_REGISTER const uint32_t *buf4;
- Z_REGISTER const uint64_t *buf8;
+ Z_REGISTER uint16_t buf2;
+ Z_REGISTER uint32_t buf4;
+ Z_REGISTER uint64_t buf8;
c = ~crc;
@@ -29,45 +29,43 @@ Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, si
len--;
}
- if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) {
- buf2 = (const uint16_t *) buf;
- c = __crc32h(c, *buf2++);
+ if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & (sizeof(uint32_t) - 1))) {
+ buf2 = *((uint16_t*)buf);
+ c = __crc32h(c, buf2);
+ buf += sizeof(uint16_t);
len -= sizeof(uint16_t);
- buf4 = (const uint32_t *) buf2;
- } else {
- buf4 = (const uint32_t *) buf;
}
- if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) {
- c = __crc32w(c, *buf4++);
+ if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & (sizeof(uint64_t) - 1))) {
+ buf4 = *((uint32_t*)buf);
+ c = __crc32w(c, buf4);
len -= sizeof(uint32_t);
+ buf += sizeof(uint32_t);
}
- buf8 = (const uint64_t *) buf4;
- } else {
- buf8 = (const uint64_t *) buf;
}
while (len >= sizeof(uint64_t)) {
- c = __crc32d(c, *buf8++);
+ buf8 = *((uint64_t*)buf);
+ c = __crc32d(c, buf8);
len -= sizeof(uint64_t);
+ buf += sizeof(uint64_t);
}
if (len >= sizeof(uint32_t)) {
- buf4 = (const uint32_t *) buf8;
- c = __crc32w(c, *buf4++);
+ buf4 = *((uint32_t*)buf);
+ c = __crc32w(c, buf4);
len -= sizeof(uint32_t);
- buf2 = (const uint16_t *) buf4;
- } else {
- buf2 = (const uint16_t *) buf8;
+ buf += sizeof(uint32_t);
}
if (len >= sizeof(uint16_t)) {
- c = __crc32h(c, *buf2++);
+ buf2 = *((uint16_t*)buf);
+ c = __crc32h(c, buf2);
len -= sizeof(uint16_t);
+ buf += sizeof(uint16_t);
}
- buf = (const unsigned char *) buf2;
if (len) {
c = __crc32b(c, *buf);
}
diff --git a/test/test_crc32.cc b/test/test_crc32.cc
index 245a92fb94..ea839383df 100644
--- a/test/test_crc32.cc
+++ b/test/test_crc32.cc
@@ -8,6 +8,8 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
+#include "zutil.h"
+#include "zutil_p.h"
extern "C" {
# include "zbuild.h"
@@ -195,6 +197,22 @@ public:
}
};
+/* Specifically to test where we had dodgy alignment in the acle CRC32
+ * function. All others are either byte level access or use intrinsics
+ * that work with unaligned access */
+class crc32_align : public ::testing::TestWithParam<int> {
+public:
+ void hash(int param, crc32_func crc32) {
+ uint8_t *buf = (uint8_t*)zng_alloc(sizeof(uint8_t) * (128 + param));
+ if (buf != NULL) {
+ (void)crc32(0, buf + param, 128);
+ } else {
+ FAIL();
+ }
+ zng_free(buf);
+ }
+};
+
INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests));
#define TEST_CRC32(name, func, support_flag) \
@@ -210,10 +228,26 @@ TEST_CRC32(braid, PREFIX(crc32_braid), 1)
#ifdef DISABLE_RUNTIME_CPU_DETECTION
TEST_CRC32(native, native_crc32, 1)
+
#else
#ifdef ARM_ACLE
+static const int align_offsets[] = {
+ 1, 2, 3, 4, 5, 6, 7
+};
+
+#define TEST_CRC32_ALIGN(name, func, support_flag) \
+ TEST_P(crc32_align, name) { \
+ if (!(support_flag)) { \
+ GTEST_SKIP(); \
+ return; \
+ } \
+ hash(GetParam(), func); \
+ }
+
+INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_offsets));
TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32)
+TEST_CRC32_ALIGN(acle_align, crc32_acle, test_cpu_features.arm.has_crc32)
#endif
#ifdef POWER8_VSX_CRC32
TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07)