summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans Kristian Rosbach <hk-git@circlestorm.org>2025-11-11 22:47:52 +0100
committerHans Kristian Rosbach <hk-github@circlestorm.org>2025-11-15 00:46:38 +0100
commita72d5f249b31d80990a0b687bd7a822301205f0c (patch)
tree2f3036c61c67f78a2fca3c362a05acdb8e88dcc0
parent8003f57828f7310aaa035519bfa17c93b5621977 (diff)
downloadProject-Tick-a72d5f249b31d80990a0b687bd7a822301205f0c.tar.gz
Project-Tick-a72d5f249b31d80990a0b687bd7a822301205f0c.zip
- Unify crc32_chorba, chorba_sse2 and chorba_sse41 dispatch functions.
- Fixed alignment diff calculation in crc32_chorba. - Fixed length check to happen early, avoiding extra branches for too short lengths, this also allows removing one function call to crc32_braid_internal to handle those. Gbench shows ~0.15-0.25ns saved per call for lengths shorter than CHORBA_SMALL_THRESHOLD. - Avoid calculating aligned len if buffer is already aligned
-rw-r--r--arch/generic/crc32_chorba_c.c33
-rw-r--r--arch/x86/chorba_sse2.c24
-rw-r--r--arch/x86/chorba_sse41.c27
-rw-r--r--crc32.h6
4 files changed, 42 insertions, 48 deletions
diff --git a/arch/generic/crc32_chorba_c.c b/arch/generic/crc32_chorba_c.c
index 4041abd46e..6f90d3c09f 100644
--- a/arch/generic/crc32_chorba_c.c
+++ b/arch/generic/crc32_chorba_c.c
@@ -1448,32 +1448,31 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const
#endif // OPTIMAL_CMP == 64
Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) {
+ uint64_t* aligned_buf;
uint32_t c = (~crc) & 0xffffffff;
+ uintptr_t algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 7)) & 7;
- uint64_t* aligned_buf;
- size_t aligned_len;
- unsigned long algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 0xF)) & 0xF;
- if (algn_diff < len) {
+ if (len > algn_diff + CHORBA_SMALL_THRESHOLD) {
if (algn_diff) {
c = crc32_braid_internal(c, buf, algn_diff);
+ len -= algn_diff;
}
aligned_buf = (uint64_t*) (buf + algn_diff);
- aligned_len = len - algn_diff;
- if(aligned_len > CHORBA_LARGE_THRESHOLD)
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
+ if(len > CHORBA_LARGE_THRESHOLD) {
+ c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
+# if OPTIMAL_CMP == 64
+ } else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
+ c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, len);
+# endif
+ } else {
# if OPTIMAL_CMP == 64
- else if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD && aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD)
- c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
- else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT)
- c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
+ c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, len);
# else
- else if (aligned_len > CHORBA_SMALL_THRESHOLD_32BIT)
- c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, aligned_len);
+ c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, len);
# endif
- else
- c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
- }
- else {
+ }
+ } else {
+ // Process too short lengths using crc32_braid
c = crc32_braid_internal(c, buf, len);
}
diff --git a/arch/x86/chorba_sse2.c b/arch/x86/chorba_sse2.c
index 3e25d7586b..f79a5ac00d 100644
--- a/arch/x86/chorba_sse2.c
+++ b/arch/x86/chorba_sse2.c
@@ -847,30 +847,26 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_
}
Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len) {
- uint32_t c;
uint64_t* aligned_buf;
- size_t aligned_len;
+ uint32_t c = (~crc) & 0xffffffff;
+ uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
- c = (~crc) & 0xffffffff;
- unsigned long algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
- if (algn_diff < len) {
+ if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
if (algn_diff) {
c = crc32_braid_internal(c, buf, algn_diff);
+ len -= algn_diff;
}
aligned_buf = (uint64_t*) (buf + algn_diff);
- aligned_len = len - algn_diff;
#if !defined(WITHOUT_CHORBA)
- if(aligned_len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
+ if(len > CHORBA_LARGE_THRESHOLD) {
+ c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
} else
#endif
- if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
- c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
- } else {
- c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
+ {
+ c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
}
- }
- else {
+ } else {
+ // Process too short lengths using crc32_braid
c = crc32_braid_internal(c, buf, len);
}
diff --git a/arch/x86/chorba_sse41.c b/arch/x86/chorba_sse41.c
index aebede45e2..a7568a2800 100644
--- a/arch/x86/chorba_sse41.c
+++ b/arch/x86/chorba_sse41.c
@@ -305,33 +305,28 @@ static Z_FORCEINLINE uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t c
}
Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len) {
- uint32_t c;
uint64_t* aligned_buf;
- size_t aligned_len;
-
- c = (~crc) & 0xffffffff;
+ uint32_t c = (~crc) & 0xffffffff;
uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
- if (algn_diff < len) {
+
+ if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
if (algn_diff) {
c = crc32_braid_internal(c, buf, algn_diff);
+ len -= algn_diff;
}
aligned_buf = (uint64_t*) (buf + algn_diff);
- aligned_len = len - algn_diff;
#if !defined(WITHOUT_CHORBA)
- if(aligned_len > CHORBA_LARGE_THRESHOLD) {
- c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
+ if(len > CHORBA_LARGE_THRESHOLD) {
+ c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
} else
#endif
- if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD &&
- aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
- c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, aligned_len);
- } else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
- c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
+ if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
+ c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, len);
} else {
- c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
+ c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
}
- }
- else {
+ } else {
+ // Process too short lengths using crc32_braid
c = crc32_braid_internal(c, buf, len);
}
diff --git a/crc32.h b/crc32.h
index e26b59e520..d41af8f01f 100644
--- a/crc32.h
+++ b/crc32.h
@@ -13,7 +13,11 @@
#define CHORBA_MEDIUM_UPPER_THRESHOLD 32768
#define CHORBA_MEDIUM_LOWER_THRESHOLD 8192
#define CHORBA_SMALL_THRESHOLD_64BIT 72
-#define CHORBA_SMALL_THRESHOLD_32BIT 80
+#if OPTIMAL_CMP == 64
+# define CHORBA_SMALL_THRESHOLD 72
+#else
+# define CHORBA_SMALL_THRESHOLD 80
+#endif
typedef struct crc32_fold_s {
uint8_t fold[CRC32_FOLD_BUFFER_SIZE];