diff options
| author | Nathan Moinvaziri <nathan@nathanm.com> | 2026-03-05 19:01:45 -0800 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2026-03-14 02:14:21 +0100 |
| commit | 110f7c753d11b8a3037ab8c5fd4bcfac589a85f4 (patch) | |
| tree | 0a67ccd94879e67eaa930ef42da8f14fa0b5da3f | |
| parent | c861f8bb1ff0908207372a90f240c297f32af345 (diff) | |
| download | Project-Tick-110f7c753d11b8a3037ab8c5fd4bcfac589a85f4.tar.gz Project-Tick-110f7c753d11b8a3037ab8c5fd4bcfac589a85f4.zip | |
Implement interleaved copying for CRC32 ARMv8.
| -rw-r--r-- | arch/arm/crc32_armv8.c | 21 | ||||
| -rw-r--r-- | arch/arm/crc32_armv8_p.h | 37 | ||||
| -rw-r--r-- | arch/arm/crc32_armv8_pmull_eor3.c | 5 |
3 files changed, 50 insertions, 13 deletions
diff --git a/arch/arm/crc32_armv8.c b/arch/arm/crc32_armv8.c index 08043f7b02..b222a832e9 100644 --- a/arch/arm/crc32_armv8.c +++ b/arch/arm/crc32_armv8.c @@ -10,25 +10,30 @@ #include "acle_intrins.h" #include "crc32_armv8_p.h" -Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) { +Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_copy_impl(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len, + const int COPY) { uint32_t c = ~crc; if (UNLIKELY(len == 1)) { - c = __crc32b(c, *buf); + if (COPY) + *dst = *src; + c = __crc32b(c, *src); return ~c; } /* Align to 8-byte boundary for tail processing */ - uintptr_t align_diff = ALIGN_DIFF(buf, 8); + uintptr_t align_diff = ALIGN_DIFF(src, 8); if (align_diff) - c = crc32_armv8_align(c, &buf, &len, align_diff); + c = crc32_armv8_align(c, &dst, &src, &len, align_diff, COPY); + + return crc32_armv8_tail(c, dst, src, len, COPY); +} - return crc32_armv8_tail(c, buf, len); +Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) { + return crc32_copy_impl(crc, NULL, buf, len, 0); } Z_INTERNAL Z_TARGET_CRC uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) { - crc = crc32_armv8(crc, src, len); - memcpy(dst, src, len); - return crc; + return crc32_copy_impl(crc, dst, src, len, 1); } #endif diff --git a/arch/arm/crc32_armv8_p.h b/arch/arm/crc32_armv8_p.h index 83543d6a38..e72c4c0ad1 100644 --- a/arch/arm/crc32_armv8_p.h +++ b/arch/arm/crc32_armv8_p.h @@ -8,10 +8,14 @@ #include "zbuild.h" #include "acle_intrins.h" -Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const uint8_t **buf, - size_t *len, uintptr_t align_diff) { +Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, uint8_t **dst, const uint8_t **buf, + size_t *len, uintptr_t align_diff, const int COPY) { if (*len && (align_diff & 1)) { uint8_t val = **buf; + if (COPY) { + **dst = val; + *dst += 1; + } crc = __crc32b(crc, val); *buf += 1; *len -= 1; @@ -19,6 +23,10 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const if (*len >= 2 && (align_diff & 2)) { uint16_t val = *((uint16_t*)*buf); + if (COPY) { + memcpy(*dst, &val, 2); + *dst += 2; + } crc = __crc32h(crc, val); *buf += 2; *len -= 2; @@ -26,6 +34,10 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const if (*len >= 4 && (align_diff & 4)) { uint32_t val = *((uint32_t*)*buf); + if (COPY) { + memcpy(*dst, &val, 4); + *dst += 4; + } crc = __crc32w(crc, val); *buf += 4; *len -= 4; @@ -33,6 +45,10 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const if (*len >= 8 && (align_diff & 8)) { uint64_t val = *((uint64_t*)*buf); + if (COPY) { + memcpy(*dst, &val, 8); + *dst += 8; + } crc = __crc32d(crc, val); *buf += 8; *len -= 8; @@ -41,9 +57,14 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const return crc; } -Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, const uint8_t *buf, size_t len) { +Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, uint8_t *dst, const uint8_t *buf, + size_t len, const int COPY) { while (len >= 8) { uint64_t val = *((uint64_t*)buf); + if (COPY) { + memcpy(dst, &val, 8); + dst += 8; + } crc = __crc32d(crc, val); buf += 8; len -= 8; @@ -51,18 +72,28 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, const if (len & 4) { uint32_t val = *((uint32_t*)buf); + if (COPY) { + memcpy(dst, &val, 4); + dst += 4; + } crc = __crc32w(crc, val); buf += 4; } if (len & 2) { uint16_t val = *((uint16_t*)buf); + if (COPY) { + memcpy(dst, &val, 2); + dst += 2; + } crc = __crc32h(crc, val); buf += 2; } if (len & 1) { uint8_t val = *buf; + if (COPY) + *dst = val; crc = __crc32b(crc, val); } diff --git a/arch/arm/crc32_armv8_pmull_eor3.c b/arch/arm/crc32_armv8_pmull_eor3.c index 40260533ea..27e7006b3f 100644 --- a/arch/arm/crc32_armv8_pmull_eor3.c +++ b/arch/arm/crc32_armv8_pmull_eor3.c @@ -70,6 +70,7 @@ static inline uint64x2_t crc_shift(uint32_t crc, size_t nbytes) { Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, const uint8_t *buf, size_t len) { uint32_t crc0 = ~crc; + uint8_t *dst = NULL; if (UNLIKELY(len == 1)) { crc0 = __crc32b(crc0, *buf); @@ -79,7 +80,7 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con /* Align to 16-byte boundary for vector path */ uintptr_t align_diff = ALIGN_DIFF(buf, 16); if (align_diff) - crc0 = crc32_armv8_align(crc0, &buf, &len, align_diff); + crc0 = crc32_armv8_align(crc0, &dst, &buf, &len, align_diff, 0); /* 3-way scalar CRC + 9-way PMULL folding (192 bytes/iter) */ if (len >= 192) { @@ -225,7 +226,7 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con } /* Process remaining bytes */ - return crc32_armv8_tail(crc0, buf, len); + return crc32_armv8_tail(crc0, NULL, buf, len, 0); } Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_copy_armv8_pmull_eor3(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) { |
