Split CRC32 Braid and Chorba word types

author: Cameron Cawley <ccawley2011@gmail.com> 2025-10-02 22:08:05 +0100
committer: Hans Kristian Rosbach <hk-github@circlestorm.org> 2026-02-17 23:23:12 +0100
commit: 3e391c13074083eee416c424cccf1d87a32fd5bf (patch)
tree: f06e03edcd84e29336ebe6caada5ff8c7db7605e
parent: 4844fe1a0bac37cacb54e4ec678e6a25544244ee (diff)
download: Project-Tick-3e391c13074083eee416c424cccf1d87a32fd5bf.tar.gz
Project-Tick-3e391c13074083eee416c424cccf1d87a32fd5bf.zip
16 files changed, 227 insertions, 221 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2b0481b9ca..84b64e9a3b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1216,10 +1216,10 @@ set(ZLIB_PRIVATE_HDRS
     arch_functions.h
     chunkset_tpl.h
     compare256_rle.h
-    crc32.h
     crc32_braid_p.h
     crc32_braid_comb_p.h
     crc32_braid_tbl.h
+    crc32_chorba_p.h
     deflate.h
     deflate_p.h
     fallback_builtins.h
diff --git a/arch/arm/crc32_armv8.c b/arch/arm/crc32_armv8.c
index d42f5cc130..55dac2a564 100644
--- a/arch/arm/crc32_armv8.c
+++ b/arch/arm/crc32_armv8.c
@@ -8,7 +8,6 @@
 
 #include "zbuild.h"
 #include "acle_intrins.h"
-#include "crc32.h"
 
 Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) {
     uint32_t c = ~crc;
diff --git a/arch/arm/crc32_armv8_pmull_eor3.c b/arch/arm/crc32_armv8_pmull_eor3.c
index d9912907ee..38f2a854b1 100644
--- a/arch/arm/crc32_armv8_pmull_eor3.c
+++ b/arch/arm/crc32_armv8_pmull_eor3.c
@@ -13,7 +13,6 @@
 #include "zutil.h"
 #include "acle_intrins.h"
 #include "neon_intrins.h"
-#include "crc32.h"
 
 /* Carryless multiply low 64 bits: a[0] * b[0] */
 static inline uint64x2_t clmul_lo(uint64x2_t a, uint64x2_t b) {
diff --git a/arch/generic/crc32_chorba_c.c b/arch/generic/crc32_chorba_c.c
index 1759d4757d..87e4fdf674 100644
--- a/arch/generic/crc32_chorba_c.c
+++ b/arch/generic/crc32_chorba_c.c
@@ -1,18 +1,20 @@
 #include "zbuild.h"
+#include "zendian.h"
 #if defined(__EMSCRIPTEN__)
 #  include "zutil_p.h"
 #endif
 #include "zmemory.h"
+#include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
 #include "generic_functions.h"
 
 /* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */
-#define bitbuffer_size_bytes (16 * 1024 * sizeof(z_word_t))
-#define bitbuffer_size_zwords (bitbuffer_size_bytes / sizeof(z_word_t))
+#define bitbuffer_size_bytes (16 * 1024 * sizeof(chorba_word_t))
+#define bitbuffer_size_zwords (bitbuffer_size_bytes / sizeof(chorba_word_t))
 #define bitbuffer_size_qwords (bitbuffer_size_bytes / sizeof(uint64_t))
 
-#if defined(HAVE_MAY_ALIAS) && BRAID_W != 8
+#if defined(HAVE_MAY_ALIAS) && CHORBA_W != 8
     typedef uint64_t __attribute__ ((__may_alias__)) uint64a_t;
 #else
     typedef uint64_t uint64a_t;
@@ -34,11 +36,11 @@
  * @note Requires minimum input size of 118960 + 512 bytes
  * @note Uses 128KB temporary buffer
  */
-Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_word_t *input, size_t len) {
+Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const chorba_word_t *input, size_t len) {
 #if defined(__EMSCRIPTEN__)
-    z_word_t *bitbuffer = (z_word_t*)zng_alloc(bitbuffer_size_bytes);
+    chorba_word_t *bitbuffer = (chorba_word_t*)zng_alloc(bitbuffer_size_bytes);
 #else
-    ALIGNED_(16) z_word_t bitbuffer[bitbuffer_size_zwords];
+    ALIGNED_(16) chorba_word_t bitbuffer[bitbuffer_size_zwords];
 #endif
     const uint8_t *bitbuffer_bytes = (const uint8_t*)bitbuffer;
     uint64a_t *bitbuffer_qwords = (uint64a_t*)bitbuffer;
@@ -46,72 +48,72 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor
 
     size_t i = 0;
 
-    z_word_t next1 = Z_WORD_FROM_LE(~crc);
-
-    z_word_t next2 = 0;
-    z_word_t next3 = 0;
-    z_word_t next4 = 0;
-    z_word_t next5 = 0;
-    z_word_t next6 = 0;
-    z_word_t next7 = 0;
-    z_word_t next8 = 0;
-    z_word_t next9 = 0;
-    z_word_t next10 = 0;
-    z_word_t next11 = 0;
-    z_word_t next12 = 0;
-    z_word_t next13 = 0;
-    z_word_t next14 = 0;
-    z_word_t next15 = 0;
-    z_word_t next16 = 0;
-    z_word_t next17 = 0;
-    z_word_t next18 = 0;
-    z_word_t next19 = 0;
-    z_word_t next20 = 0;
-    z_word_t next21 = 0;
-    z_word_t next22 = 0;
+    chorba_word_t next1 = CHORBA_WORD_FROM_LE(~crc);
+
+    chorba_word_t next2 = 0;
+    chorba_word_t next3 = 0;
+    chorba_word_t next4 = 0;
+    chorba_word_t next5 = 0;
+    chorba_word_t next6 = 0;
+    chorba_word_t next7 = 0;
+    chorba_word_t next8 = 0;
+    chorba_word_t next9 = 0;
+    chorba_word_t next10 = 0;
+    chorba_word_t next11 = 0;
+    chorba_word_t next12 = 0;
+    chorba_word_t next13 = 0;
+    chorba_word_t next14 = 0;
+    chorba_word_t next15 = 0;
+    chorba_word_t next16 = 0;
+    chorba_word_t next17 = 0;
+    chorba_word_t next18 = 0;
+    chorba_word_t next19 = 0;
+    chorba_word_t next20 = 0;
+    chorba_word_t next21 = 0;
+    chorba_word_t next22 = 0;
     crc = 0;
 
     // do a first pass to zero out bitbuffer
-    for (; i < (14848 * sizeof(z_word_t)); i += (32 * sizeof(z_word_t))) {
-        z_word_t in1, in2, in3, in4, in5, in6, in7, in8;
-        z_word_t in9, in10, in11, in12, in13, in14, in15, in16;
-        z_word_t in17, in18, in19, in20, in21, in22, in23, in24;
-        z_word_t in25, in26, in27, in28, in29, in30, in31, in32;
-        int out_offset1 = ((i / sizeof(z_word_t)) + 14848) % bitbuffer_size_zwords;
-        int out_offset2 = ((i / sizeof(z_word_t)) + 14880) % bitbuffer_size_zwords;
-
-        in1 = input[i / sizeof(z_word_t) + 0] ^ next1;
-        in2 = input[i / sizeof(z_word_t) + 1] ^ next2;
-        in3 = input[i / sizeof(z_word_t) + 2] ^ next3;
-        in4 = input[i / sizeof(z_word_t) + 3] ^ next4;
-        in5 = input[i / sizeof(z_word_t) + 4] ^ next5;
-        in6 = input[i / sizeof(z_word_t) + 5] ^ next6;
-        in7 = input[i / sizeof(z_word_t) + 6] ^ next7;
-        in8 = input[i / sizeof(z_word_t) + 7] ^ next8 ^ in1;
-        in9 = input[i / sizeof(z_word_t) + 8] ^ next9 ^ in2;
-        in10 = input[i / sizeof(z_word_t) + 9] ^ next10 ^ in3;
-        in11 = input[i / sizeof(z_word_t) + 10] ^ next11 ^ in4;
-        in12 = input[i / sizeof(z_word_t) + 11] ^ next12 ^ in1 ^ in5;
-        in13 = input[i / sizeof(z_word_t) + 12] ^ next13 ^ in2 ^ in6;
-        in14 = input[i / sizeof(z_word_t) + 13] ^ next14 ^ in3 ^ in7;
-        in15 = input[i / sizeof(z_word_t) + 14] ^ next15 ^ in4 ^ in8;
-        in16 = input[i / sizeof(z_word_t) + 15] ^ next16 ^ in5 ^ in9;
-        in17 = input[i / sizeof(z_word_t) + 16] ^ next17 ^ in6 ^ in10;
-        in18 = input[i / sizeof(z_word_t) + 17] ^ next18 ^ in7 ^ in11;
-        in19 = input[i / sizeof(z_word_t) + 18] ^ next19 ^ in8 ^ in12;
-        in20 = input[i / sizeof(z_word_t) + 19] ^ next20 ^ in9 ^ in13;
-        in21 = input[i / sizeof(z_word_t) + 20] ^ next21 ^ in10 ^ in14;
-        in22 = input[i / sizeof(z_word_t) + 21] ^ next22 ^ in11 ^ in15;
-        in23 = input[i / sizeof(z_word_t) + 22] ^ in1 ^ in12 ^ in16;
-        in24 = input[i / sizeof(z_word_t) + 23] ^ in2 ^ in13 ^ in17;
-        in25 = input[i / sizeof(z_word_t) + 24] ^ in3 ^ in14 ^ in18;
-        in26 = input[i / sizeof(z_word_t) + 25] ^ in4 ^ in15 ^ in19;
-        in27 = input[i / sizeof(z_word_t) + 26] ^ in5 ^ in16 ^ in20;
-        in28 = input[i / sizeof(z_word_t) + 27] ^ in6 ^ in17 ^ in21;
-        in29 = input[i / sizeof(z_word_t) + 28] ^ in7 ^ in18 ^ in22;
-        in30 = input[i / sizeof(z_word_t) + 29] ^ in8 ^ in19 ^ in23;
-        in31 = input[i / sizeof(z_word_t) + 30] ^ in9 ^ in20 ^ in24;
-        in32 = input[i / sizeof(z_word_t) + 31] ^ in10 ^ in21 ^ in25;
+    for (; i < (14848 * sizeof(chorba_word_t)); i += (32 * sizeof(chorba_word_t))) {
+        chorba_word_t in1, in2, in3, in4, in5, in6, in7, in8;
+        chorba_word_t in9, in10, in11, in12, in13, in14, in15, in16;
+        chorba_word_t in17, in18, in19, in20, in21, in22, in23, in24;
+        chorba_word_t in25, in26, in27, in28, in29, in30, in31, in32;
+        int out_offset1 = ((i / sizeof(chorba_word_t)) + 14848) % bitbuffer_size_zwords;
+        int out_offset2 = ((i / sizeof(chorba_word_t)) + 14880) % bitbuffer_size_zwords;
+
+        in1 = input[i / sizeof(chorba_word_t) + 0] ^ next1;
+        in2 = input[i / sizeof(chorba_word_t) + 1] ^ next2;
+        in3 = input[i / sizeof(chorba_word_t) + 2] ^ next3;
+        in4 = input[i / sizeof(chorba_word_t) + 3] ^ next4;
+        in5 = input[i / sizeof(chorba_word_t) + 4] ^ next5;
+        in6 = input[i / sizeof(chorba_word_t) + 5] ^ next6;
+        in7 = input[i / sizeof(chorba_word_t) + 6] ^ next7;
+        in8 = input[i / sizeof(chorba_word_t) + 7] ^ next8 ^ in1;
+        in9 = input[i / sizeof(chorba_word_t) + 8] ^ next9 ^ in2;
+        in10 = input[i / sizeof(chorba_word_t) + 9] ^ next10 ^ in3;
+        in11 = input[i / sizeof(chorba_word_t) + 10] ^ next11 ^ in4;
+        in12 = input[i / sizeof(chorba_word_t) + 11] ^ next12 ^ in1 ^ in5;
+        in13 = input[i / sizeof(chorba_word_t) + 12] ^ next13 ^ in2 ^ in6;
+        in14 = input[i / sizeof(chorba_word_t) + 13] ^ next14 ^ in3 ^ in7;
+        in15 = input[i / sizeof(chorba_word_t) + 14] ^ next15 ^ in4 ^ in8;
+        in16 = input[i / sizeof(chorba_word_t) + 15] ^ next16 ^ in5 ^ in9;
+        in17 = input[i / sizeof(chorba_word_t) + 16] ^ next17 ^ in6 ^ in10;
+        in18 = input[i / sizeof(chorba_word_t) + 17] ^ next18 ^ in7 ^ in11;
+        in19 = input[i / sizeof(chorba_word_t) + 18] ^ next19 ^ in8 ^ in12;
+        in20 = input[i / sizeof(chorba_word_t) + 19] ^ next20 ^ in9 ^ in13;
+        in21 = input[i / sizeof(chorba_word_t) + 20] ^ next21 ^ in10 ^ in14;
+        in22 = input[i / sizeof(chorba_word_t) + 21] ^ next22 ^ in11 ^ in15;
+        in23 = input[i / sizeof(chorba_word_t) + 22] ^ in1 ^ in12 ^ in16;
+        in24 = input[i / sizeof(chorba_word_t) + 23] ^ in2 ^ in13 ^ in17;
+        in25 = input[i / sizeof(chorba_word_t) + 24] ^ in3 ^ in14 ^ in18;
+        in26 = input[i / sizeof(chorba_word_t) + 25] ^ in4 ^ in15 ^ in19;
+        in27 = input[i / sizeof(chorba_word_t) + 26] ^ in5 ^ in16 ^ in20;
+        in28 = input[i / sizeof(chorba_word_t) + 27] ^ in6 ^ in17 ^ in21;
+        in29 = input[i / sizeof(chorba_word_t) + 28] ^ in7 ^ in18 ^ in22;
+        in30 = input[i / sizeof(chorba_word_t) + 29] ^ in8 ^ in19 ^ in23;
+        in31 = input[i / sizeof(chorba_word_t) + 30] ^ in9 ^ in20 ^ in24;
+        in32 = input[i / sizeof(chorba_word_t) + 31] ^ in10 ^ in21 ^ in25;
 
         next1 = in11 ^ in22 ^ in26;
         next2 = in12 ^ in23 ^ in27;
@@ -171,47 +173,47 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor
     }
 
     // one intermediate pass where we pull half the values
-    for (; i < (14880 * sizeof(z_word_t)); i += (32 * sizeof(z_word_t))) {
-        z_word_t in1, in2, in3, in4, in5, in6, in7, in8;
-        z_word_t in9, in10, in11, in12, in13, in14, in15, in16;
-        z_word_t in17, in18, in19, in20, in21, in22, in23, in24;
-        z_word_t in25, in26, in27, in28, in29, in30, in31, in32;
-        int in_offset = (i / sizeof(z_word_t)) % bitbuffer_size_zwords;
-        int out_offset1 = ((i / sizeof(z_word_t)) + 14848) % bitbuffer_size_zwords;
-        int out_offset2 = ((i / sizeof(z_word_t)) + 14880) % bitbuffer_size_zwords;
-
-        in1 = input[i / sizeof(z_word_t) + 0] ^ next1;
-        in2 = input[i / sizeof(z_word_t) + 1] ^ next2;
-        in3 = input[i / sizeof(z_word_t) + 2] ^ next3;
-        in4 = input[i / sizeof(z_word_t) + 3] ^ next4;
-        in5 = input[i / sizeof(z_word_t) + 4] ^ next5;
-        in6 = input[i / sizeof(z_word_t) + 5] ^ next6;
-        in7 = input[i / sizeof(z_word_t) + 6] ^ next7;
-        in8 = input[i / sizeof(z_word_t) + 7] ^ next8 ^ in1;
-        in9 = input[i / sizeof(z_word_t) + 8] ^ next9 ^ in2;
-        in10 = input[i / sizeof(z_word_t) + 9] ^ next10 ^ in3;
-        in11 = input[i / sizeof(z_word_t) + 10] ^ next11 ^ in4;
-        in12 = input[i / sizeof(z_word_t) + 11] ^ next12 ^ in1 ^ in5;
-        in13 = input[i / sizeof(z_word_t) + 12] ^ next13 ^ in2 ^ in6;
-        in14 = input[i / sizeof(z_word_t) + 13] ^ next14 ^ in3 ^ in7;
-        in15 = input[i / sizeof(z_word_t) + 14] ^ next15 ^ in4 ^ in8;
-        in16 = input[i / sizeof(z_word_t) + 15] ^ next16 ^ in5 ^ in9;
-        in17 = input[i / sizeof(z_word_t) + 16] ^ next17 ^ in6 ^ in10;
-        in18 = input[i / sizeof(z_word_t) + 17] ^ next18 ^ in7 ^ in11;
-        in19 = input[i / sizeof(z_word_t) + 18] ^ next19 ^ in8 ^ in12;
-        in20 = input[i / sizeof(z_word_t) + 19] ^ next20 ^ in9 ^ in13;
-        in21 = input[i / sizeof(z_word_t) + 20] ^ next21 ^ in10 ^ in14;
-        in22 = input[i / sizeof(z_word_t) + 21] ^ next22 ^ in11 ^ in15;
-        in23 = input[i / sizeof(z_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[in_offset + 22];
-        in24 = input[i / sizeof(z_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[in_offset + 23];
-        in25 = input[i / sizeof(z_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[in_offset + 24];
-        in26 = input[i / sizeof(z_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[in_offset + 25];
-        in27 = input[i / sizeof(z_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[in_offset + 26];
-        in28 = input[i / sizeof(z_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[in_offset + 27];
-        in29 = input[i / sizeof(z_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[in_offset + 28];
-        in30 = input[i / sizeof(z_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[in_offset + 29];
-        in31 = input[i / sizeof(z_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[in_offset + 30];
-        in32 = input[i / sizeof(z_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[in_offset + 31];
+    for (; i < (14880 * sizeof(chorba_word_t)); i += (32 * sizeof(chorba_word_t))) {
+        chorba_word_t in1, in2, in3, in4, in5, in6, in7, in8;
+        chorba_word_t in9, in10, in11, in12, in13, in14, in15, in16;
+        chorba_word_t in17, in18, in19, in20, in21, in22, in23, in24;
+        chorba_word_t in25, in26, in27, in28, in29, in30, in31, in32;
+        int in_offset = (i / sizeof(chorba_word_t)) % bitbuffer_size_zwords;
+        int out_offset1 = ((i / sizeof(chorba_word_t)) + 14848) % bitbuffer_size_zwords;
+        int out_offset2 = ((i / sizeof(chorba_word_t)) + 14880) % bitbuffer_size_zwords;
+
+        in1 = input[i / sizeof(chorba_word_t) + 0] ^ next1;
+        in2 = input[i / sizeof(chorba_word_t) + 1] ^ next2;
+        in3 = input[i / sizeof(chorba_word_t) + 2] ^ next3;
+        in4 = input[i / sizeof(chorba_word_t) + 3] ^ next4;
+        in5 = input[i / sizeof(chorba_word_t) + 4] ^ next5;
+        in6 = input[i / sizeof(chorba_word_t) + 5] ^ next6;
+        in7 = input[i / sizeof(chorba_word_t) + 6] ^ next7;
+        in8 = input[i / sizeof(chorba_word_t) + 7] ^ next8 ^ in1;
+        in9 = input[i / sizeof(chorba_word_t) + 8] ^ next9 ^ in2;
+        in10 = input[i / sizeof(chorba_word_t) + 9] ^ next10 ^ in3;
+        in11 = input[i / sizeof(chorba_word_t) + 10] ^ next11 ^ in4;
+        in12 = input[i / sizeof(chorba_word_t) + 11] ^ next12 ^ in1 ^ in5;
+        in13 = input[i / sizeof(chorba_word_t) + 12] ^ next13 ^ in2 ^ in6;
+        in14 = input[i / sizeof(chorba_word_t) + 13] ^ next14 ^ in3 ^ in7;
+        in15 = input[i / sizeof(chorba_word_t) + 14] ^ next15 ^ in4 ^ in8;
+        in16 = input[i / sizeof(chorba_word_t) + 15] ^ next16 ^ in5 ^ in9;
+        in17 = input[i / sizeof(chorba_word_t) + 16] ^ next17 ^ in6 ^ in10;
+        in18 = input[i / sizeof(chorba_word_t) + 17] ^ next18 ^ in7 ^ in11;
+        in19 = input[i / sizeof(chorba_word_t) + 18] ^ next19 ^ in8 ^ in12;
+        in20 = input[i / sizeof(chorba_word_t) + 19] ^ next20 ^ in9 ^ in13;
+        in21 = input[i / sizeof(chorba_word_t) + 20] ^ next21 ^ in10 ^ in14;
+        in22 = input[i / sizeof(chorba_word_t) + 21] ^ next22 ^ in11 ^ in15;
+        in23 = input[i / sizeof(chorba_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[in_offset + 22];
+        in24 = input[i / sizeof(chorba_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[in_offset + 23];
+        in25 = input[i / sizeof(chorba_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[in_offset + 24];
+        in26 = input[i / sizeof(chorba_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[in_offset + 25];
+        in27 = input[i / sizeof(chorba_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[in_offset + 26];
+        in28 = input[i / sizeof(chorba_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[in_offset + 27];
+        in29 = input[i / sizeof(chorba_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[in_offset + 28];
+        in30 = input[i / sizeof(chorba_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[in_offset + 29];
+        in31 = input[i / sizeof(chorba_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[in_offset + 30];
+        in32 = input[i / sizeof(chorba_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[in_offset + 31];
 
         next1 = in11 ^ in22 ^ in26;
         next2 = in12 ^ in23 ^ in27;
@@ -270,47 +272,47 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor
         bitbuffer[out_offset2 + 21] = in32;
     }
 
-    for (; (i + (14870 + 64) * sizeof(z_word_t)) < len; i += (32 * sizeof(z_word_t))) {
-        z_word_t in1, in2, in3, in4, in5, in6, in7, in8;
-        z_word_t in9, in10, in11, in12, in13, in14, in15, in16;
-        z_word_t in17, in18, in19, in20, in21, in22, in23, in24;
-        z_word_t in25, in26, in27, in28, in29, in30, in31, in32;
-        int in_offset = (i / sizeof(z_word_t)) % bitbuffer_size_zwords;
-        int out_offset1 = ((i / sizeof(z_word_t)) + 14848) % bitbuffer_size_zwords;
-        int out_offset2 = ((i / sizeof(z_word_t)) + 14880) % bitbuffer_size_zwords;
-
-        in1 = input[i / sizeof(z_word_t) + 0] ^ next1 ^ bitbuffer[in_offset + 0];
-        in2 = input[i / sizeof(z_word_t) + 1] ^ next2 ^ bitbuffer[in_offset + 1];
-        in3 = input[i / sizeof(z_word_t) + 2] ^ next3 ^ bitbuffer[in_offset + 2];
-        in4 = input[i / sizeof(z_word_t) + 3] ^ next4 ^ bitbuffer[in_offset + 3];
-        in5 = input[i / sizeof(z_word_t) + 4] ^ next5 ^ bitbuffer[in_offset + 4];
-        in6 = input[i / sizeof(z_word_t) + 5] ^ next6 ^ bitbuffer[in_offset + 5];
-        in7 = input[i / sizeof(z_word_t) + 6] ^ next7 ^ bitbuffer[in_offset + 6];
-        in8 = input[i / sizeof(z_word_t) + 7] ^ next8 ^ in1 ^ bitbuffer[in_offset + 7];
-        in9 = input[i / sizeof(z_word_t) + 8] ^ next9 ^ in2 ^ bitbuffer[in_offset + 8];
-        in10 = input[i / sizeof(z_word_t) + 9] ^ next10 ^ in3 ^ bitbuffer[in_offset + 9];
-        in11 = input[i / sizeof(z_word_t) + 10] ^ next11 ^ in4 ^ bitbuffer[in_offset + 10];
-        in12 = input[i / sizeof(z_word_t) + 11] ^ next12 ^ in1 ^ in5 ^ bitbuffer[in_offset + 11];
-        in13 = input[i / sizeof(z_word_t) + 12] ^ next13 ^ in2 ^ in6 ^ bitbuffer[in_offset + 12];
-        in14 = input[i / sizeof(z_word_t) + 13] ^ next14 ^ in3 ^ in7 ^ bitbuffer[in_offset + 13];
-        in15 = input[i / sizeof(z_word_t) + 14] ^ next15 ^ in4 ^ in8 ^ bitbuffer[in_offset + 14];
-        in16 = input[i / sizeof(z_word_t) + 15] ^ next16 ^ in5 ^ in9 ^ bitbuffer[in_offset + 15];
-        in17 = input[i / sizeof(z_word_t) + 16] ^ next17 ^ in6 ^ in10 ^ bitbuffer[in_offset + 16];
-        in18 = input[i / sizeof(z_word_t) + 17] ^ next18 ^ in7 ^ in11 ^ bitbuffer[in_offset + 17];
-        in19 = input[i / sizeof(z_word_t) + 18] ^ next19 ^ in8 ^ in12 ^ bitbuffer[in_offset + 18];
-        in20 = input[i / sizeof(z_word_t) + 19] ^ next20 ^ in9 ^ in13 ^ bitbuffer[in_offset + 19];
-        in21 = input[i / sizeof(z_word_t) + 20] ^ next21 ^ in10 ^ in14 ^ bitbuffer[in_offset + 20];
-        in22 = input[i / sizeof(z_word_t) + 21] ^ next22 ^ in11 ^ in15 ^ bitbuffer[in_offset + 21];
-        in23 = input[i / sizeof(z_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[in_offset + 22];
-        in24 = input[i / sizeof(z_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[in_offset + 23];
-        in25 = input[i / sizeof(z_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[in_offset + 24];
-        in26 = input[i / sizeof(z_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[in_offset + 25];
-        in27 = input[i / sizeof(z_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[in_offset + 26];
-        in28 = input[i / sizeof(z_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[in_offset + 27];
-        in29 = input[i / sizeof(z_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[in_offset + 28];
-        in30 = input[i / sizeof(z_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[in_offset + 29];
-        in31 = input[i / sizeof(z_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[in_offset + 30];
-        in32 = input[i / sizeof(z_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[in_offset + 31];
+    for (; (i + (14870 + 64) * sizeof(chorba_word_t)) < len; i += (32 * sizeof(chorba_word_t))) {
+        chorba_word_t in1, in2, in3, in4, in5, in6, in7, in8;
+        chorba_word_t in9, in10, in11, in12, in13, in14, in15, in16;
+        chorba_word_t in17, in18, in19, in20, in21, in22, in23, in24;
+        chorba_word_t in25, in26, in27, in28, in29, in30, in31, in32;
+        int in_offset = (i / sizeof(chorba_word_t)) % bitbuffer_size_zwords;
+        int out_offset1 = ((i / sizeof(chorba_word_t)) + 14848) % bitbuffer_size_zwords;
+        int out_offset2 = ((i / sizeof(chorba_word_t)) + 14880) % bitbuffer_size_zwords;
+
+        in1 = input[i / sizeof(chorba_word_t) + 0] ^ next1 ^ bitbuffer[in_offset + 0];
+        in2 = input[i / sizeof(chorba_word_t) + 1] ^ next2 ^ bitbuffer[in_offset + 1];
+        in3 = input[i / sizeof(chorba_word_t) + 2] ^ next3 ^ bitbuffer[in_offset + 2];
+        in4 = input[i / sizeof(chorba_word_t) + 3] ^ next4 ^ bitbuffer[in_offset + 3];
+        in5 = input[i / sizeof(chorba_word_t) + 4] ^ next5 ^ bitbuffer[in_offset + 4];
+        in6 = input[i / sizeof(chorba_word_t) + 5] ^ next6 ^ bitbuffer[in_offset + 5];
+        in7 = input[i / sizeof(chorba_word_t) + 6] ^ next7 ^ bitbuffer[in_offset + 6];
+        in8 = input[i / sizeof(chorba_word_t) + 7] ^ next8 ^ in1 ^ bitbuffer[in_offset + 7];
+        in9 = input[i / sizeof(chorba_word_t) + 8] ^ next9 ^ in2 ^ bitbuffer[in_offset + 8];
+        in10 = input[i / sizeof(chorba_word_t) + 9] ^ next10 ^ in3 ^ bitbuffer[in_offset + 9];
+        in11 = input[i / sizeof(chorba_word_t) + 10] ^ next11 ^ in4 ^ bitbuffer[in_offset + 10];
+        in12 = input[i / sizeof(chorba_word_t) + 11] ^ next12 ^ in1 ^ in5 ^ bitbuffer[in_offset + 11];
+        in13 = input[i / sizeof(chorba_word_t) + 12] ^ next13 ^ in2 ^ in6 ^ bitbuffer[in_offset + 12];
+        in14 = input[i / sizeof(chorba_word_t) + 13] ^ next14 ^ in3 ^ in7 ^ bitbuffer[in_offset + 13];
+        in15 = input[i / sizeof(chorba_word_t) + 14] ^ next15 ^ in4 ^ in8 ^ bitbuffer[in_offset + 14];
+        in16 = input[i / sizeof(chorba_word_t) + 15] ^ next16 ^ in5 ^ in9 ^ bitbuffer[in_offset + 15];
+        in17 = input[i / sizeof(chorba_word_t) + 16] ^ next17 ^ in6 ^ in10 ^ bitbuffer[in_offset + 16];
+        in18 = input[i / sizeof(chorba_word_t) + 17] ^ next18 ^ in7 ^ in11 ^ bitbuffer[in_offset + 17];
+        in19 = input[i / sizeof(chorba_word_t) + 18] ^ next19 ^ in8 ^ in12 ^ bitbuffer[in_offset + 18];
+        in20 = input[i / sizeof(chorba_word_t) + 19] ^ next20 ^ in9 ^ in13 ^ bitbuffer[in_offset + 19];
+        in21 = input[i / sizeof(chorba_word_t) + 20] ^ next21 ^ in10 ^ in14 ^ bitbuffer[in_offset + 20];
+        in22 = input[i / sizeof(chorba_word_t) + 21] ^ next22 ^ in11 ^ in15 ^ bitbuffer[in_offset + 21];
+        in23 = input[i / sizeof(chorba_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[in_offset + 22];
+        in24 = input[i / sizeof(chorba_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[in_offset + 23];
+        in25 = input[i / sizeof(chorba_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[in_offset + 24];
+        in26 = input[i / sizeof(chorba_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[in_offset + 25];
+        in27 = input[i / sizeof(chorba_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[in_offset + 26];
+        in28 = input[i / sizeof(chorba_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[in_offset + 27];
+        in29 = input[i / sizeof(chorba_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[in_offset + 28];
+        in30 = input[i / sizeof(chorba_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[in_offset + 29];
+        in31 = input[i / sizeof(chorba_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[in_offset + 30];
+        in32 = input[i / sizeof(chorba_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[in_offset + 31];
 
         next1 = in11 ^ in22 ^ in26;
         next2 = in12 ^ in23 ^ in27;
@@ -369,31 +371,31 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor
         bitbuffer[out_offset2 + 21] = in32;
     }
 
-    bitbuffer[(i / sizeof(z_word_t) + 0) % bitbuffer_size_zwords] ^= next1;
-    bitbuffer[(i / sizeof(z_word_t) + 1) % bitbuffer_size_zwords] ^= next2;
-    bitbuffer[(i / sizeof(z_word_t) + 2) % bitbuffer_size_zwords] ^= next3;
-    bitbuffer[(i / sizeof(z_word_t) + 3) % bitbuffer_size_zwords] ^= next4;
-    bitbuffer[(i / sizeof(z_word_t) + 4) % bitbuffer_size_zwords] ^= next5;
-    bitbuffer[(i / sizeof(z_word_t) + 5) % bitbuffer_size_zwords] ^= next6;
-    bitbuffer[(i / sizeof(z_word_t) + 6) % bitbuffer_size_zwords] ^= next7;
-    bitbuffer[(i / sizeof(z_word_t) + 7) % bitbuffer_size_zwords] ^= next8;
-    bitbuffer[(i / sizeof(z_word_t) + 8) % bitbuffer_size_zwords] ^= next9;
-    bitbuffer[(i / sizeof(z_word_t) + 9) % bitbuffer_size_zwords] ^= next10;
-    bitbuffer[(i / sizeof(z_word_t) + 10) % bitbuffer_size_zwords] ^= next11;
-    bitbuffer[(i / sizeof(z_word_t) + 11) % bitbuffer_size_zwords] ^= next12;
-    bitbuffer[(i / sizeof(z_word_t) + 12) % bitbuffer_size_zwords] ^= next13;
-    bitbuffer[(i / sizeof(z_word_t) + 13) % bitbuffer_size_zwords] ^= next14;
-    bitbuffer[(i / sizeof(z_word_t) + 14) % bitbuffer_size_zwords] ^= next15;
-    bitbuffer[(i / sizeof(z_word_t) + 15) % bitbuffer_size_zwords] ^= next16;
-    bitbuffer[(i / sizeof(z_word_t) + 16) % bitbuffer_size_zwords] ^= next17;
-    bitbuffer[(i / sizeof(z_word_t) + 17) % bitbuffer_size_zwords] ^= next18;
-    bitbuffer[(i / sizeof(z_word_t) + 18) % bitbuffer_size_zwords] ^= next19;
-    bitbuffer[(i / sizeof(z_word_t) + 19) % bitbuffer_size_zwords] ^= next20;
-    bitbuffer[(i / sizeof(z_word_t) + 20) % bitbuffer_size_zwords] ^= next21;
-    bitbuffer[(i / sizeof(z_word_t) + 21) % bitbuffer_size_zwords] ^= next22;
+    bitbuffer[(i / sizeof(chorba_word_t) + 0) % bitbuffer_size_zwords] ^= next1;
+    bitbuffer[(i / sizeof(chorba_word_t) + 1) % bitbuffer_size_zwords] ^= next2;
+    bitbuffer[(i / sizeof(chorba_word_t) + 2) % bitbuffer_size_zwords] ^= next3;
+    bitbuffer[(i / sizeof(chorba_word_t) + 3) % bitbuffer_size_zwords] ^= next4;
+    bitbuffer[(i / sizeof(chorba_word_t) + 4) % bitbuffer_size_zwords] ^= next5;
+    bitbuffer[(i / sizeof(chorba_word_t) + 5) % bitbuffer_size_zwords] ^= next6;
+    bitbuffer[(i / sizeof(chorba_word_t) + 6) % bitbuffer_size_zwords] ^= next7;
+    bitbuffer[(i / sizeof(chorba_word_t) + 7) % bitbuffer_size_zwords] ^= next8;
+    bitbuffer[(i / sizeof(chorba_word_t) + 8) % bitbuffer_size_zwords] ^= next9;
+    bitbuffer[(i / sizeof(chorba_word_t) + 9) % bitbuffer_size_zwords] ^= next10;
+    bitbuffer[(i / sizeof(chorba_word_t) + 10) % bitbuffer_size_zwords] ^= next11;
+    bitbuffer[(i / sizeof(chorba_word_t) + 11) % bitbuffer_size_zwords] ^= next12;
+    bitbuffer[(i / sizeof(chorba_word_t) + 12) % bitbuffer_size_zwords] ^= next13;
+    bitbuffer[(i / sizeof(chorba_word_t) + 13) % bitbuffer_size_zwords] ^= next14;
+    bitbuffer[(i / sizeof(chorba_word_t) + 14) % bitbuffer_size_zwords] ^= next15;
+    bitbuffer[(i / sizeof(chorba_word_t) + 15) % bitbuffer_size_zwords] ^= next16;
+    bitbuffer[(i / sizeof(chorba_word_t) + 16) % bitbuffer_size_zwords] ^= next17;
+    bitbuffer[(i / sizeof(chorba_word_t) + 17) % bitbuffer_size_zwords] ^= next18;
+    bitbuffer[(i / sizeof(chorba_word_t) + 18) % bitbuffer_size_zwords] ^= next19;
+    bitbuffer[(i / sizeof(chorba_word_t) + 19) % bitbuffer_size_zwords] ^= next20;
+    bitbuffer[(i / sizeof(chorba_word_t) + 20) % bitbuffer_size_zwords] ^= next21;
+    bitbuffer[(i / sizeof(chorba_word_t) + 21) % bitbuffer_size_zwords] ^= next22;
 
     for (int j = 14870; j < 14870 + 64; j++) {
-        bitbuffer[(j + (i / sizeof(z_word_t))) % bitbuffer_size_zwords] = 0;
+        bitbuffer[(j + (i / sizeof(chorba_word_t))) % bitbuffer_size_zwords] = 0;
     }
 
     uint64_t next1_64 = 0;
@@ -482,7 +484,7 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor
     return ~crc;
 }
 
-#  if OPTIMAL_CMP == 64
+#  if CHORBA_W == 8
 /* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */
 Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive(uint32_t crc, const uint64_t* input, size_t len) {
     uint64_t bitbuffer[32768 / sizeof(uint64_t)];
@@ -570,8 +572,8 @@ Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive(uint32_t crc, const uint64
         uint64_t out4;
         uint64_t out5;
 
-        in1 = input[i / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t))];
-        in2 = input[(i + 8) / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 1)];
+        in1 = input[i / sizeof(chorba_word_t)] ^ bitbuffer[(i / sizeof(uint64_t))];
+        in2 = input[(i + 8) / sizeof(chorba_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 1)];
         in1 = Z_U64_FROM_LE(in1) ^ next1_64;
         in2 = Z_U64_FROM_LE(in2) ^ next2_64;
 
@@ -585,8 +587,8 @@ Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive(uint32_t crc, const uint64
         b3 = (in2 >> 45) ^ (in2 << 44);
         b4 = (in2 >> 20);
 
-        in3 = input[(i + 16) / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 2)];
-        in4 = input[(i + 24) / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 3)];
+        in3 = input[(i + 16) / sizeof(chorba_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 2)];
+        in4 = input[(i + 24) / sizeof(chorba_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 3)];
         in3 = Z_U64_FROM_LE(in3) ^ next3_64 ^ a1;
         in4 = Z_U64_FROM_LE(in4) ^ next4_64 ^ a2 ^ b1;
 
@@ -1062,7 +1064,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint64
     return crc32_braid(~crc, (uint8_t*)final, len-i);
 }
 
-#else // OPTIMAL_CMP == 64
+#else // CHORBA_W == 8
 
 Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit(uint32_t crc, const uint32_t *input, size_t len) {
     uint32_t final[20] = {0};
@@ -1235,7 +1237,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit(uint32_t crc, const
 
     return crc32_braid(~crc, (uint8_t*)final, len-i);
 }
-#endif // OPTIMAL_CMP == 64
+#endif // CHORBA_W == 8
 
 Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) {
     uintptr_t align_diff = ALIGN_DIFF(buf, 8);
@@ -1248,8 +1250,8 @@ Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) {
         buf += align_diff;
     }
     if (len > CHORBA_LARGE_THRESHOLD)
-        return crc32_chorba_118960_nondestructive(crc, (const z_word_t*)buf, len);
-#if OPTIMAL_CMP == 64
+        return crc32_chorba_118960_nondestructive(crc, (const chorba_word_t*)buf, len);
+#if CHORBA_W == 8
     if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD)
         return crc32_chorba_32768_nondestructive(crc, (const uint64_t*)buf, len);
     return crc32_chorba_small_nondestructive(crc, (const uint64_t*)buf, len);
diff --git a/arch/generic/generic_functions.h b/arch/generic/generic_functions.h
index 1b296b8f92..c150a2f010 100644
--- a/arch/generic/generic_functions.h
+++ b/arch/generic/generic_functions.h
@@ -7,7 +7,6 @@
 
 #include "zendian.h"
 #include "deflate.h"
-#include "crc32_braid_p.h"
 
 typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
 typedef uint32_t (*adler32_copy_func)(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -33,10 +32,6 @@ uint32_t crc32_copy_braid(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t
 
 #ifndef WITHOUT_CHORBA
   uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len);
-  uint32_t crc32_chorba_118960_nondestructive (uint32_t crc, const z_word_t* input, size_t len);
-  uint32_t crc32_chorba_32768_nondestructive (uint32_t crc, const uint64_t* input, size_t len);
-  uint32_t crc32_chorba_small_nondestructive (uint32_t crc, const uint64_t* input, size_t len);
-  uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const uint32_t* input, size_t len);
   uint32_t crc32_copy_chorba(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
diff --git a/arch/loongarch/crc32_la.c b/arch/loongarch/crc32_la.c
index 55a694bae6..f1bd314e65 100644
--- a/arch/loongarch/crc32_la.c
+++ b/arch/loongarch/crc32_la.c
@@ -6,7 +6,6 @@
 #ifdef LOONGARCH_CRC
 
 #include "zbuild.h"
-#include "crc32.h"
 
 #include <larchintrin.h>
 
diff --git a/arch/x86/crc32_chorba_sse2.c b/arch/x86/crc32_chorba_sse2.c
index 4e94ca151a..8ecd74443e 100644
--- a/arch/x86/crc32_chorba_sse2.c
+++ b/arch/x86/crc32_chorba_sse2.c
@@ -1,9 +1,9 @@
 #if defined(X86_SSE2) && !defined(WITHOUT_CHORBA_SSE)
 
 #include "zbuild.h"
+#include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
-#include "crc32.h"
 #include <emmintrin.h>
 #include "arch/x86/x86_intrins.h"
 #include "arch_functions.h"
@@ -857,7 +857,7 @@ Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t l
     }
 #if !defined(WITHOUT_CHORBA)
     if (len > CHORBA_LARGE_THRESHOLD)
-        return crc32_chorba_118960_nondestructive(crc, (const z_word_t*)buf, len);
+        return crc32_chorba_118960_nondestructive(crc, (const chorba_word_t*)buf, len);
 #endif
     return chorba_small_nondestructive_sse2(crc, (const uint64_t*)buf, len);
 }
diff --git a/arch/x86/crc32_chorba_sse41.c b/arch/x86/crc32_chorba_sse41.c
index 96ba00ff3b..4e750cbd8d 100644
--- a/arch/x86/crc32_chorba_sse41.c
+++ b/arch/x86/crc32_chorba_sse41.c
@@ -1,9 +1,9 @@
 #if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE)
 
 #include "zbuild.h"
+#include "crc32_chorba_p.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
-#include "crc32.h"
 #include <emmintrin.h>
 #include <smmintrin.h>
 #include "arch/x86/x86_intrins.h"
@@ -315,7 +315,7 @@ Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t
     }
 #if !defined(WITHOUT_CHORBA)
     if (len > CHORBA_LARGE_THRESHOLD)
-        return crc32_chorba_118960_nondestructive(crc, (z_word_t*)buf, len);
+        return crc32_chorba_118960_nondestructive(crc, (chorba_word_t*)buf, len);
 #endif
     if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD)
         return crc32_chorba_32768_nondestructive_sse41(crc, (const uint64_t*)buf, len);
diff --git a/arch/x86/crc32_pclmulqdq_tpl.h b/arch/x86/crc32_pclmulqdq_tpl.h
index b7ed17f809..8677f1e872 100644
--- a/arch/x86/crc32_pclmulqdq_tpl.h
+++ b/arch/x86/crc32_pclmulqdq_tpl.h
@@ -23,7 +23,6 @@
 #include <wmmintrin.h>
 #include <smmintrin.h> // _mm_extract_epi32
 
-#include "crc32.h"
 #include "crc32_braid_p.h"
 #include "crc32_braid_tbl.h"
 #include "crc32_p.h"
diff --git a/arch_functions.h b/arch_functions.h
index a53b2f7b43..979c968624 100644
--- a/arch_functions.h
+++ b/arch_functions.h
@@ -8,7 +8,6 @@
 
 #include "zbuild.h"
 #include "zutil.h"
-#include "crc32.h"
 #include "deflate.h"
 #include "fallback_builtins.h"
 
diff --git a/crc32.h b/crc32.h
deleted file mode 100644
index e205777598..0000000000
--- a/crc32.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* crc32.h -- crc32 folding interface
- * Copyright (C) 2021 Nathan Moinvaziri
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-#ifndef CRC32_H_
-#define CRC32_H_
-
-/* Size thresholds for Chorba algorithm variants */
-#define CHORBA_LARGE_THRESHOLD (sizeof(z_word_t) * 64 * 1024)
-#define CHORBA_MEDIUM_UPPER_THRESHOLD 32768
-#define CHORBA_MEDIUM_LOWER_THRESHOLD 8192
-#define CHORBA_SMALL_THRESHOLD_64BIT 72
-#if OPTIMAL_CMP == 64
-#  define CHORBA_SMALL_THRESHOLD 72
-#else
-#  define CHORBA_SMALL_THRESHOLD 80
-#endif
-
-#endif
diff --git a/crc32_braid_p.h b/crc32_braid_p.h
index af26ebedda..624e22ecd9 100644
--- a/crc32_braid_p.h
+++ b/crc32_braid_p.h
@@ -8,6 +8,9 @@
 
 /* Define BRAID_W and the associated z_word_t type. If BRAID_W is not defined, then a braided
    calculation is not used, and the associated tables and code are not compiled.
+
+   TODO: According to crc32_braid_c.c, BRAID_N=5, BRAID_W=4 is fastest with Sparc64-VII,
+   PowerPC POWER9, and MIPS64 Octeon II processors.
  */
 #ifdef ARCH_64BIT
 #  define BRAID_W 8
diff --git a/crc32_chorba_p.h b/crc32_chorba_p.h
new file mode 100644
index 0000000000..f599e707b0
--- /dev/null
+++ b/crc32_chorba_p.h
@@ -0,0 +1,34 @@
+/* crc32_chorba_p.h -- crc32 chorba interface
+ * Copyright (C) 2021 Nathan Moinvaziri
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef CRC32_CHORBA_P_H_
+#define CRC32_CHORBA_P_H_
+
+#include "zendian.h"
+
+/* Size thresholds for Chorba algorithm variants */
+
+#define CHORBA_LARGE_THRESHOLD (sizeof(chorba_word_t) * 64 * 1024)
+#define CHORBA_MEDIUM_UPPER_THRESHOLD 32768
+#define CHORBA_MEDIUM_LOWER_THRESHOLD 8192
+#define CHORBA_SMALL_THRESHOLD_64BIT 72
+#ifdef ARCH_64BIT
+#  define CHORBA_SMALL_THRESHOLD 72
+#  define CHORBA_W 8
+#  define CHORBA_WORD_FROM_LE(word) Z_U64_FROM_LE(word)
+    typedef uint64_t chorba_word_t;
+#else
+#  define CHORBA_SMALL_THRESHOLD 80
+#  define CHORBA_W 4
+#  define CHORBA_WORD_FROM_LE(word) Z_U32_FROM_LE(word)
+    typedef uint32_t chorba_word_t;
+#endif
+
+Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive (uint32_t crc, const chorba_word_t* input, size_t len);
+Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive (uint32_t crc, const uint64_t* input, size_t len);
+Z_INTERNAL uint32_t crc32_chorba_small_nondestructive (uint32_t crc, const uint64_t* input, size_t len);
+Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const uint32_t* input, size_t len);
+
+#endif /* CRC32_CHORBA_P_H_ */
diff --git a/deflate.h b/deflate.h
index cf88436590..85435636d4 100644
--- a/deflate.h
+++ b/deflate.h
@@ -13,7 +13,6 @@
 #include "zutil.h"
 #include "zendian.h"
 #include "zmemory.h"
-#include "crc32.h"
 
 #ifdef S390_DFLTCC_DEFLATE
 #  include "arch/s390/dfltcc_common.h"
diff --git a/functable.h b/functable.h
index bb33fdb869..95e05d5ed7 100644
--- a/functable.h
+++ b/functable.h
@@ -7,7 +7,6 @@
 #define FUNCTABLE_H_
 
 #include "deflate.h"
-#include "crc32.h"
 
 #ifdef DISABLE_RUNTIME_CPU_DETECTION
 
diff --git a/inflate.h b/inflate.h
index c2ecb78080..224d688c5e 100644
--- a/inflate.h
+++ b/inflate.h
@@ -11,8 +11,6 @@
 #ifndef INFLATE_H_
 #define INFLATE_H_
 
-#include "crc32.h"
-
 #ifdef S390_DFLTCC_INFLATE
 #  include "arch/s390/dfltcc_common.h"
 #  define HAVE_ARCH_INFLATE_STATE
author	Cameron Cawley <ccawley2011@gmail.com>	2025-10-02 22:08:05 +0100
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>	2026-02-17 23:23:12 +0100
commit	3e391c13074083eee416c424cccf1d87a32fd5bf (patch)
tree	f06e03edcd84e29336ebe6caada5ff8c7db7605e
parent	4844fe1a0bac37cacb54e4ec678e6a25544244ee (diff)
download	Project-Tick-3e391c13074083eee416c424cccf1d87a32fd5bf.tar.gz Project-Tick-3e391c13074083eee416c424cccf1d87a32fd5bf.zip