summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@nathanm.com>2026-03-05 19:01:45 -0800
committerHans Kristian Rosbach <hk-github@circlestorm.org>2026-03-14 02:14:21 +0100
commit110f7c753d11b8a3037ab8c5fd4bcfac589a85f4 (patch)
tree0a67ccd94879e67eaa930ef42da8f14fa0b5da3f
parentc861f8bb1ff0908207372a90f240c297f32af345 (diff)
downloadProject-Tick-110f7c753d11b8a3037ab8c5fd4bcfac589a85f4.tar.gz
Project-Tick-110f7c753d11b8a3037ab8c5fd4bcfac589a85f4.zip
Implement interleaved copying for CRC32 ARMv8.
-rw-r--r--arch/arm/crc32_armv8.c21
-rw-r--r--arch/arm/crc32_armv8_p.h37
-rw-r--r--arch/arm/crc32_armv8_pmull_eor3.c5
3 files changed, 50 insertions, 13 deletions
diff --git a/arch/arm/crc32_armv8.c b/arch/arm/crc32_armv8.c
index 08043f7b02..b222a832e9 100644
--- a/arch/arm/crc32_armv8.c
+++ b/arch/arm/crc32_armv8.c
@@ -10,25 +10,30 @@
#include "acle_intrins.h"
#include "crc32_armv8_p.h"
-Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) {
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_copy_impl(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len,
+ const int COPY) {
uint32_t c = ~crc;
if (UNLIKELY(len == 1)) {
- c = __crc32b(c, *buf);
+ if (COPY)
+ *dst = *src;
+ c = __crc32b(c, *src);
return ~c;
}
/* Align to 8-byte boundary for tail processing */
- uintptr_t align_diff = ALIGN_DIFF(buf, 8);
+ uintptr_t align_diff = ALIGN_DIFF(src, 8);
if (align_diff)
- c = crc32_armv8_align(c, &buf, &len, align_diff);
+ c = crc32_armv8_align(c, &dst, &src, &len, align_diff, COPY);
+
+ return crc32_armv8_tail(c, dst, src, len, COPY);
+}
- return crc32_armv8_tail(c, buf, len);
+Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) {
+ return crc32_copy_impl(crc, NULL, buf, len, 0);
}
Z_INTERNAL Z_TARGET_CRC uint32_t crc32_copy_armv8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
- crc = crc32_armv8(crc, src, len);
- memcpy(dst, src, len);
- return crc;
+ return crc32_copy_impl(crc, dst, src, len, 1);
}
#endif
diff --git a/arch/arm/crc32_armv8_p.h b/arch/arm/crc32_armv8_p.h
index 83543d6a38..e72c4c0ad1 100644
--- a/arch/arm/crc32_armv8_p.h
+++ b/arch/arm/crc32_armv8_p.h
@@ -8,10 +8,14 @@
#include "zbuild.h"
#include "acle_intrins.h"
-Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const uint8_t **buf,
- size_t *len, uintptr_t align_diff) {
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, uint8_t **dst, const uint8_t **buf,
+ size_t *len, uintptr_t align_diff, const int COPY) {
if (*len && (align_diff & 1)) {
uint8_t val = **buf;
+ if (COPY) {
+ **dst = val;
+ *dst += 1;
+ }
crc = __crc32b(crc, val);
*buf += 1;
*len -= 1;
@@ -19,6 +23,10 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const
if (*len >= 2 && (align_diff & 2)) {
uint16_t val = *((uint16_t*)*buf);
+ if (COPY) {
+ memcpy(*dst, &val, 2);
+ *dst += 2;
+ }
crc = __crc32h(crc, val);
*buf += 2;
*len -= 2;
@@ -26,6 +34,10 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const
if (*len >= 4 && (align_diff & 4)) {
uint32_t val = *((uint32_t*)*buf);
+ if (COPY) {
+ memcpy(*dst, &val, 4);
+ *dst += 4;
+ }
crc = __crc32w(crc, val);
*buf += 4;
*len -= 4;
@@ -33,6 +45,10 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const
if (*len >= 8 && (align_diff & 8)) {
uint64_t val = *((uint64_t*)*buf);
+ if (COPY) {
+ memcpy(*dst, &val, 8);
+ *dst += 8;
+ }
crc = __crc32d(crc, val);
*buf += 8;
*len -= 8;
@@ -41,9 +57,14 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_align(uint32_t crc, const
return crc;
}
-Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, const uint8_t *buf, size_t len) {
+Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, uint8_t *dst, const uint8_t *buf,
+ size_t len, const int COPY) {
while (len >= 8) {
uint64_t val = *((uint64_t*)buf);
+ if (COPY) {
+ memcpy(dst, &val, 8);
+ dst += 8;
+ }
crc = __crc32d(crc, val);
buf += 8;
len -= 8;
@@ -51,18 +72,28 @@ Z_FORCEINLINE static Z_TARGET_CRC uint32_t crc32_armv8_tail(uint32_t crc, const
if (len & 4) {
uint32_t val = *((uint32_t*)buf);
+ if (COPY) {
+ memcpy(dst, &val, 4);
+ dst += 4;
+ }
crc = __crc32w(crc, val);
buf += 4;
}
if (len & 2) {
uint16_t val = *((uint16_t*)buf);
+ if (COPY) {
+ memcpy(dst, &val, 2);
+ dst += 2;
+ }
crc = __crc32h(crc, val);
buf += 2;
}
if (len & 1) {
uint8_t val = *buf;
+ if (COPY)
+ *dst = val;
crc = __crc32b(crc, val);
}
diff --git a/arch/arm/crc32_armv8_pmull_eor3.c b/arch/arm/crc32_armv8_pmull_eor3.c
index 40260533ea..27e7006b3f 100644
--- a/arch/arm/crc32_armv8_pmull_eor3.c
+++ b/arch/arm/crc32_armv8_pmull_eor3.c
@@ -70,6 +70,7 @@ static inline uint64x2_t crc_shift(uint32_t crc, size_t nbytes) {
Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, const uint8_t *buf, size_t len) {
uint32_t crc0 = ~crc;
+ uint8_t *dst = NULL;
if (UNLIKELY(len == 1)) {
crc0 = __crc32b(crc0, *buf);
@@ -79,7 +80,7 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con
/* Align to 16-byte boundary for vector path */
uintptr_t align_diff = ALIGN_DIFF(buf, 16);
if (align_diff)
- crc0 = crc32_armv8_align(crc0, &buf, &len, align_diff);
+ crc0 = crc32_armv8_align(crc0, &dst, &buf, &len, align_diff, 0);
/* 3-way scalar CRC + 9-way PMULL folding (192 bytes/iter) */
if (len >= 192) {
@@ -225,7 +226,7 @@ Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_armv8_pmull_eor3(uint32_t crc, con
}
/* Process remaining bytes */
- return crc32_armv8_tail(crc0, buf, len);
+ return crc32_armv8_tail(crc0, NULL, buf, len, 0);
}
Z_INTERNAL Z_TARGET_PMULL_EOR3 uint32_t crc32_copy_armv8_pmull_eor3(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {