diff options
| author | Nathan Moinvaziri <nathan@nathanm.com> | 2026-03-05 19:00:54 -0800 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2026-03-14 02:14:21 +0100 |
| commit | c861f8bb1ff0908207372a90f240c297f32af345 (patch) | |
| tree | 955a1e02659484e84387e23f37c4f36885ed9f1d | |
| parent | 376911b1724839e76363a40bc9191240f55ac57a (diff) | |
| download | Project-Tick-c861f8bb1ff0908207372a90f240c297f32af345.tar.gz Project-Tick-c861f8bb1ff0908207372a90f240c297f32af345.zip | |
Add shared align/tail helpers for CRC32 ARMv8.
| -rw-r--r-- | arch/arm/crc32_armv8.c | 48 | ||||
| -rw-r--r-- | arch/arm/crc32_armv8_p.h | 72 | ||||
| -rw-r--r-- | arch/arm/crc32_armv8_pmull_eor3.c | 49 |
3 files changed, 82 insertions, 87 deletions
diff --git a/arch/arm/crc32_armv8.c b/arch/arm/crc32_armv8.c index 55dac2a564..08043f7b02 100644 --- a/arch/arm/crc32_armv8.c +++ b/arch/arm/crc32_armv8.c @@ -8,58 +8,22 @@ #include "zbuild.h" #include "acle_intrins.h" +#include "crc32_armv8_p.h" Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) { uint32_t c = ~crc; if (UNLIKELY(len == 1)) { c = __crc32b(c, *buf); - c = ~c; - return c; + return ~c; } + /* Align to 8-byte boundary for tail processing */ uintptr_t align_diff = ALIGN_DIFF(buf, 8); - if (align_diff) { - if (len && (align_diff & 1)) { - c = __crc32b(c, *buf++); - len--; - } + if (align_diff) + c = crc32_armv8_align(c, &buf, &len, align_diff); - if (len >= 2 && (align_diff & 2)) { - c = __crc32h(c, *((uint16_t*)buf)); - buf += 2; - len -= 2; - } - - if (len >= 4 && (align_diff & 4)) { - c = __crc32w(c, *((uint32_t*)buf)); - len -= 4; - buf += 4; - } - } - - while (len >= 8) { - c = __crc32d(c, *((uint64_t*)buf)); - len -= 8; - buf += 8; - } - - if (len & 4) { - c = __crc32w(c, *((uint32_t*)buf)); - buf += 4; - } - - if (len & 2) { - c = __crc32h(c, *((uint16_t*)buf)); - buf += 2; - } - - if (len & 1) { - c = __crc32b(c, *buf); - } - - c = ~c; - return c; + return crc32_armv8_tail(c, buf, len); } Z_INTERNAL Z_TARGET_CRC uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) { diff --git a/arch/arm/crc32_armv8_p.h b/arch/arm/crc32_armv8_p.h new file mode 100644 index 0000000000..83543d6a38 --- /dev/null +++ b/arch/arm/crc32_armv8_p.h @@ -0,0 +1,72 @@ +/* crc32_armv8_p.h -- Private shared inline ARMv8 CRC32 functions + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef CRC32_ARMV8_P_H +#define CRC32_ARMV8_P_H + +#include "zbuild.h" +#include "acle_intrins.h" + +Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const uint8_t **buf, + size_t *len, uintptr_t align_diff) { + if (*len && (align_diff & 1)) { + uint8_t val = **buf; + crc = __crc32b(crc, val); + *buf += 1; + *len -= 1; + } + + if (*len >= 2 && (align_diff & 2)) { + uint16_t val = *((uint16_t*)*buf); + crc = __crc32h(crc, val); + *buf += 2; + *len -= 2; + } + + if (*len >= 4 && (align_diff & 4)) { + uint32_t val = *((uint32_t*)*buf); + crc = __crc32w(crc, val); + *buf += 4; + *len -= 4; + } + + if (*len >= 8 && (align_diff & 8)) { + uint64_t val = *((uint64_t*)*buf); + crc = __crc32d(crc, val); + *buf += 8; + *len -= 8; + } + + return crc; +} + +Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, const uint8_t *buf, size_t len) { + while (len >= 8) { + uint64_t val = *((uint64_t*)buf); + crc = __crc32d(crc, val); + buf += 8; + len -= 8; + } + + if (len & 4) { + uint32_t val = *((uint32_t*)buf); + crc = __crc32w(crc, val); + buf += 4; + } + + if (len & 2) { + uint16_t val = *((uint16_t*)buf); + crc = __crc32h(crc, val); + buf += 2; + } + + if (len & 1) { + uint8_t val = *buf; + crc = __crc32b(crc, val); + } + + return ~crc; +} + +#endif /* CRC32_ARMV8_P_H */ diff --git a/arch/arm/crc32_armv8_pmull_eor3.c b/arch/arm/crc32_armv8_pmull_eor3.c index 5b491be4ab..40260533ea 100644 --- a/arch/arm/crc32_armv8_pmull_eor3.c +++ b/arch/arm/crc32_armv8_pmull_eor3.c @@ -13,6 +13,7 @@ #include "zutil.h" #include "acle_intrins.h" #include "neon_intrins.h" +#include "crc32_armv8_p.h" /* Carryless multiply low 64 bits: a[0] * b[0] */ static inline uint64x2_t clmul_lo(uint64x2_t a, uint64x2_t b) { @@ -77,30 +78,8 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con /* Align to 16-byte boundary for vector path */ uintptr_t align_diff = ALIGN_DIFF(buf, 16); - if (align_diff) { - if (len && (align_diff & 1)) { - crc0 = __crc32b(crc0, *buf++); - len--; - } - - if (len >= 2 && (align_diff & 2)) { - crc0 = __crc32h(crc0, *((uint16_t*)buf)); - buf += 2; - len -= 2; - } - - if (len >= 4 && (align_diff & 4)) { - crc0 = __crc32w(crc0, *((uint32_t*)buf)); - len -= 4; - buf += 4; - } - - if (len >= 8 && (align_diff & 8)) { - crc0 = __crc32d(crc0, *((uint64_t*)buf)); - buf += 8; - len -= 8; - } - } + if (align_diff) + crc0 = crc32_armv8_align(crc0, &buf, &len, align_diff); /* 3-way scalar CRC + 9-way PMULL folding (192 bytes/iter) */ if (len >= 192) { @@ -246,27 +225,7 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con } /* Process remaining bytes */ - while (len >= 8) { - crc0 = __crc32d(crc0, *((uint64_t*)buf)); - len -= 8; - buf += 8; - } - - if (len & 4) { - crc0 = __crc32w(crc0, *((uint32_t*)buf)); - buf += 4; - } - - if (len & 2) { - crc0 = __crc32h(crc0, *((uint16_t*)buf)); - buf += 2; - } - - if (len & 1) { - crc0 = __crc32b(crc0, *buf); - } - - return ~crc0; + return crc32_armv8_tail(crc0, buf, len); } Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_copy_armv8_pmull_eor3(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) { |
