From c861f8bb1ff0908207372a90f240c297f32af345 Mon Sep 17 00:00:00 2001 From: Nathan Moinvaziri Date: Thu, 5 Mar 2026 19:00:54 -0800 Subject: Add shared align/tail helpers for CRC32 ARMv8. --- arch/arm/crc32_armv8.c | 48 ++++---------------------- arch/arm/crc32_armv8_p.h | 72 +++++++++++++++++++++++++++++++++++++++ arch/arm/crc32_armv8_pmull_eor3.c | 49 +++----------------------- 3 files changed, 82 insertions(+), 87 deletions(-) create mode 100644 arch/arm/crc32_armv8_p.h diff --git a/arch/arm/crc32_armv8.c b/arch/arm/crc32_armv8.c index 55dac2a564..08043f7b02 100644 --- a/arch/arm/crc32_armv8.c +++ b/arch/arm/crc32_armv8.c @@ -8,58 +8,22 @@ #include "zbuild.h" #include "acle_intrins.h" +#include "crc32_armv8_p.h" Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) { uint32_t c = ~crc; if (UNLIKELY(len == 1)) { c = __crc32b(c, *buf); - c = ~c; - return c; + return ~c; } + /* Align to 8-byte boundary for tail processing */ uintptr_t align_diff = ALIGN_DIFF(buf, 8); - if (align_diff) { - if (len && (align_diff & 1)) { - c = __crc32b(c, *buf++); - len--; - } + if (align_diff) + c = crc32_armv8_align(c, &buf, &len, align_diff); - if (len >= 2 && (align_diff & 2)) { - c = __crc32h(c, *((uint16_t*)buf)); - buf += 2; - len -= 2; - } - - if (len >= 4 && (align_diff & 4)) { - c = __crc32w(c, *((uint32_t*)buf)); - len -= 4; - buf += 4; - } - } - - while (len >= 8) { - c = __crc32d(c, *((uint64_t*)buf)); - len -= 8; - buf += 8; - } - - if (len & 4) { - c = __crc32w(c, *((uint32_t*)buf)); - buf += 4; - } - - if (len & 2) { - c = __crc32h(c, *((uint16_t*)buf)); - buf += 2; - } - - if (len & 1) { - c = __crc32b(c, *buf); - } - - c = ~c; - return c; + return crc32_armv8_tail(c, buf, len); } Z_INTERNAL Z_TARGET_CRC uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) { diff --git a/arch/arm/crc32_armv8_p.h b/arch/arm/crc32_armv8_p.h new file mode 100644 index 0000000000..83543d6a38 --- /dev/null +++ b/arch/arm/crc32_armv8_p.h @@ -0,0 +1,72 @@ +/* crc32_armv8_p.h -- Private shared inline ARMv8 CRC32 functions + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef CRC32_ARMV8_P_H +#define CRC32_ARMV8_P_H + +#include "zbuild.h" +#include "acle_intrins.h" + +Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const uint8_t **buf, + size_t *len, uintptr_t align_diff) { + if (*len && (align_diff & 1)) { + uint8_t val = **buf; + crc = __crc32b(crc, val); + *buf += 1; + *len -= 1; + } + + if (*len >= 2 && (align_diff & 2)) { + uint16_t val = *((uint16_t*)*buf); + crc = __crc32h(crc, val); + *buf += 2; + *len -= 2; + } + + if (*len >= 4 && (align_diff & 4)) { + uint32_t val = *((uint32_t*)*buf); + crc = __crc32w(crc, val); + *buf += 4; + *len -= 4; + } + + if (*len >= 8 && (align_diff & 8)) { + uint64_t val = *((uint64_t*)*buf); + crc = __crc32d(crc, val); + *buf += 8; + *len -= 8; + } + + return crc; +} + +Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, const uint8_t *buf, size_t len) { + while (len >= 8) { + uint64_t val = *((uint64_t*)buf); + crc = __crc32d(crc, val); + buf += 8; + len -= 8; + } + + if (len & 4) { + uint32_t val = *((uint32_t*)buf); + crc = __crc32w(crc, val); + buf += 4; + } + + if (len & 2) { + uint16_t val = *((uint16_t*)buf); + crc = __crc32h(crc, val); + buf += 2; + } + + if (len & 1) { + uint8_t val = *buf; + crc = __crc32b(crc, val); + } + + return ~crc; +} + +#endif /* CRC32_ARMV8_P_H */ diff --git a/arch/arm/crc32_armv8_pmull_eor3.c b/arch/arm/crc32_armv8_pmull_eor3.c index 5b491be4ab..40260533ea 100644 --- a/arch/arm/crc32_armv8_pmull_eor3.c +++ b/arch/arm/crc32_armv8_pmull_eor3.c @@ -13,6 +13,7 @@ #include "zutil.h" #include "acle_intrins.h" #include "neon_intrins.h" +#include "crc32_armv8_p.h" /* Carryless multiply low 64 bits: a[0] * b[0] */ static inline uint64x2_t clmul_lo(uint64x2_t a, uint64x2_t b) { @@ -77,30 +78,8 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con /* Align to 16-byte boundary for vector path */ uintptr_t align_diff = ALIGN_DIFF(buf, 16); - if (align_diff) { - if (len && (align_diff & 1)) { - crc0 = __crc32b(crc0, *buf++); - len--; - } - - if (len >= 2 && (align_diff & 2)) { - crc0 = __crc32h(crc0, *((uint16_t*)buf)); - buf += 2; - len -= 2; - } - - if (len >= 4 && (align_diff & 4)) { - crc0 = __crc32w(crc0, *((uint32_t*)buf)); - len -= 4; - buf += 4; - } - - if (len >= 8 && (align_diff & 8)) { - crc0 = __crc32d(crc0, *((uint64_t*)buf)); - buf += 8; - len -= 8; - } - } + if (align_diff) + crc0 = crc32_armv8_align(crc0, &buf, &len, align_diff); /* 3-way scalar CRC + 9-way PMULL folding (192 bytes/iter) */ if (len >= 192) { @@ -246,27 +225,7 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con } /* Process remaining bytes */ - while (len >= 8) { - crc0 = __crc32d(crc0, *((uint64_t*)buf)); - len -= 8; - buf += 8; - } - - if (len & 4) { - crc0 = __crc32w(crc0, *((uint32_t*)buf)); - buf += 4; - } - - if (len & 2) { - crc0 = __crc32h(crc0, *((uint16_t*)buf)); - buf += 2; - } - - if (len & 1) { - crc0 = __crc32b(crc0, *buf); - } - - return ~crc0; + return crc32_armv8_tail(crc0, buf, len); } Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_copy_armv8_pmull_eor3(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) { -- cgit 0.0.5-2-1-g0f52