diff options
| -rw-r--r-- | arch/arm/crc32_acle.c | 42 | ||||
| -rw-r--r-- | test/test_crc32.cc | 34 |
2 files changed, 54 insertions, 22 deletions
diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c index 116bcab1c2..a3a1fefc03 100644 --- a/arch/arm/crc32_acle.c +++ b/arch/arm/crc32_acle.c @@ -11,9 +11,9 @@ Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) { Z_REGISTER uint32_t c; - Z_REGISTER const uint16_t *buf2; - Z_REGISTER const uint32_t *buf4; - Z_REGISTER const uint64_t *buf8; + Z_REGISTER uint16_t buf2; + Z_REGISTER uint32_t buf4; + Z_REGISTER uint64_t buf8; c = ~crc; @@ -29,45 +29,43 @@ Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, si len--; } - if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { - buf2 = (const uint16_t *) buf; - c = __crc32h(c, *buf2++); + if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & (sizeof(uint32_t) - 1))) { + buf2 = *((uint16_t*)buf); + c = __crc32h(c, buf2); + buf += sizeof(uint16_t); len -= sizeof(uint16_t); - buf4 = (const uint32_t *) buf2; - } else { - buf4 = (const uint32_t *) buf; } - if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { - c = __crc32w(c, *buf4++); + if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & (sizeof(uint64_t) - 1))) { + buf4 = *((uint32_t*)buf); + c = __crc32w(c, buf4); len -= sizeof(uint32_t); + buf += sizeof(uint32_t); } - buf8 = (const uint64_t *) buf4; - } else { - buf8 = (const uint64_t *) buf; } while (len >= sizeof(uint64_t)) { - c = __crc32d(c, *buf8++); + buf8 = *((uint64_t*)buf); + c = __crc32d(c, buf8); len -= sizeof(uint64_t); + buf += sizeof(uint64_t); } if (len >= sizeof(uint32_t)) { - buf4 = (const uint32_t *) buf8; - c = __crc32w(c, *buf4++); + buf4 = *((uint32_t*)buf); + c = __crc32w(c, buf4); len -= sizeof(uint32_t); - buf2 = (const uint16_t *) buf4; - } else { - buf2 = (const uint16_t *) buf8; + buf += sizeof(uint32_t); } if (len >= sizeof(uint16_t)) { - c = __crc32h(c, *buf2++); + buf2 = *((uint16_t*)buf); + c = __crc32h(c, buf2); len -= sizeof(uint16_t); + buf += sizeof(uint16_t); } - buf = (const unsigned char *) buf2; if (len) { c = __crc32b(c, *buf); } diff --git a/test/test_crc32.cc b/test/test_crc32.cc index 245a92fb94..ea839383df 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -8,6 +8,8 @@ #include <stdio.h> #include <string.h> #include <stdlib.h> +#include "zutil.h" +#include "zutil_p.h" extern "C" { # include "zbuild.h" @@ -195,6 +197,22 @@ public: } }; +/* Specifically to test where we had dodgy alignment in the acle CRC32 + * function. All others are either byte level access or use intrinsics + * that work with unaligned access */ +class crc32_align : public ::testing::TestWithParam<int> { +public: + void hash(int param, crc32_func crc32) { + uint8_t *buf = (uint8_t*)zng_alloc(sizeof(uint8_t) * (128 + param)); + if (buf != NULL) { + (void)crc32(0, buf + param, 128); + } else { + FAIL(); + } + zng_free(buf); + } +}; + INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests)); #define TEST_CRC32(name, func, support_flag) \ @@ -210,10 +228,26 @@ TEST_CRC32(braid, PREFIX(crc32_braid), 1) #ifdef DISABLE_RUNTIME_CPU_DETECTION TEST_CRC32(native, native_crc32, 1) + #else #ifdef ARM_ACLE +static const int align_offsets[] = { + 1, 2, 3, 4, 5, 6, 7 +}; + +#define TEST_CRC32_ALIGN(name, func, support_flag) \ + TEST_P(crc32_align, name) { \ + if (!(support_flag)) { \ + GTEST_SKIP(); \ + return; \ + } \ + hash(GetParam(), func); \ + } + +INSTANTIATE_TEST_SUITE_P(crc32_alignment, crc32_align, testing::ValuesIn(align_offsets)); TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32) +TEST_CRC32_ALIGN(acle_align, crc32_acle, test_cpu_features.arm.has_crc32) #endif #ifdef POWER8_VSX_CRC32 TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07) |
