diff options
| author | Cameron Cawley <ccawley2011@gmail.com> | 2025-10-02 22:08:05 +0100 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2026-02-17 23:23:12 +0100 |
| commit | 3e391c13074083eee416c424cccf1d87a32fd5bf (patch) | |
| tree | f06e03edcd84e29336ebe6caada5ff8c7db7605e | |
| parent | 4844fe1a0bac37cacb54e4ec678e6a25544244ee (diff) | |
| download | Project-Tick-3e391c13074083eee416c424cccf1d87a32fd5bf.tar.gz Project-Tick-3e391c13074083eee416c424cccf1d87a32fd5bf.zip | |
Split CRC32 Braid and Chorba word types
| -rw-r--r-- | CMakeLists.txt | 2 | ||||
| -rw-r--r-- | arch/arm/crc32_armv8.c | 1 | ||||
| -rw-r--r-- | arch/arm/crc32_armv8_pmull_eor3.c | 1 | ||||
| -rw-r--r-- | arch/generic/crc32_chorba_c.c | 368 | ||||
| -rw-r--r-- | arch/generic/generic_functions.h | 5 | ||||
| -rw-r--r-- | arch/loongarch/crc32_la.c | 1 | ||||
| -rw-r--r-- | arch/x86/crc32_chorba_sse2.c | 4 | ||||
| -rw-r--r-- | arch/x86/crc32_chorba_sse41.c | 4 | ||||
| -rw-r--r-- | arch/x86/crc32_pclmulqdq_tpl.h | 1 | ||||
| -rw-r--r-- | arch_functions.h | 1 | ||||
| -rw-r--r-- | crc32.h | 19 | ||||
| -rw-r--r-- | crc32_braid_p.h | 3 | ||||
| -rw-r--r-- | crc32_chorba_p.h | 34 | ||||
| -rw-r--r-- | deflate.h | 1 | ||||
| -rw-r--r-- | functable.h | 1 | ||||
| -rw-r--r-- | inflate.h | 2 |
16 files changed, 227 insertions, 221 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b0481b9ca..84b64e9a3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1216,10 +1216,10 @@ set(ZLIB_PRIVATE_HDRS arch_functions.h chunkset_tpl.h compare256_rle.h - crc32.h crc32_braid_p.h crc32_braid_comb_p.h crc32_braid_tbl.h + crc32_chorba_p.h deflate.h deflate_p.h fallback_builtins.h diff --git a/arch/arm/crc32_armv8.c b/arch/arm/crc32_armv8.c index d42f5cc130..55dac2a564 100644 --- a/arch/arm/crc32_armv8.c +++ b/arch/arm/crc32_armv8.c @@ -8,7 +8,6 @@ #include "zbuild.h" #include "acle_intrins.h" -#include "crc32.h" Z_INTERNAL Z_TARGET_CRC uint32_t crc32_armv8(uint32_t crc, const uint8_t *buf, size_t len) { uint32_t c = ~crc; diff --git a/arch/arm/crc32_armv8_pmull_eor3.c b/arch/arm/crc32_armv8_pmull_eor3.c index d9912907ee..38f2a854b1 100644 --- a/arch/arm/crc32_armv8_pmull_eor3.c +++ b/arch/arm/crc32_armv8_pmull_eor3.c @@ -13,7 +13,6 @@ #include "zutil.h" #include "acle_intrins.h" #include "neon_intrins.h" -#include "crc32.h" /* Carryless multiply low 64 bits: a[0] * b[0] */ static inline uint64x2_t clmul_lo(uint64x2_t a, uint64x2_t b) { diff --git a/arch/generic/crc32_chorba_c.c b/arch/generic/crc32_chorba_c.c index 1759d4757d..87e4fdf674 100644 --- a/arch/generic/crc32_chorba_c.c +++ b/arch/generic/crc32_chorba_c.c @@ -1,18 +1,20 @@ #include "zbuild.h" +#include "zendian.h" #if defined(__EMSCRIPTEN__) # include "zutil_p.h" #endif #include "zmemory.h" +#include "crc32_chorba_p.h" #include "crc32_braid_p.h" #include "crc32_braid_tbl.h" #include "generic_functions.h" /* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */ -#define bitbuffer_size_bytes (16 * 1024 * sizeof(z_word_t)) -#define bitbuffer_size_zwords (bitbuffer_size_bytes / sizeof(z_word_t)) +#define bitbuffer_size_bytes (16 * 1024 * sizeof(chorba_word_t)) +#define bitbuffer_size_zwords (bitbuffer_size_bytes / sizeof(chorba_word_t)) #define bitbuffer_size_qwords (bitbuffer_size_bytes / sizeof(uint64_t)) -#if defined(HAVE_MAY_ALIAS) && BRAID_W != 8 +#if defined(HAVE_MAY_ALIAS) && CHORBA_W != 8 typedef uint64_t __attribute__ ((__may_alias__)) uint64a_t; #else typedef uint64_t uint64a_t; @@ -34,11 +36,11 @@ * @note Requires minimum input size of 118960 + 512 bytes * @note Uses 128KB temporary buffer */ -Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_word_t *input, size_t len) { +Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const chorba_word_t *input, size_t len) { #if defined(__EMSCRIPTEN__) - z_word_t *bitbuffer = (z_word_t*)zng_alloc(bitbuffer_size_bytes); + chorba_word_t *bitbuffer = (chorba_word_t*)zng_alloc(bitbuffer_size_bytes); #else - ALIGNED_(16) z_word_t bitbuffer[bitbuffer_size_zwords]; + ALIGNED_(16) chorba_word_t bitbuffer[bitbuffer_size_zwords]; #endif const uint8_t *bitbuffer_bytes = (const uint8_t*)bitbuffer; uint64a_t *bitbuffer_qwords = (uint64a_t*)bitbuffer; @@ -46,72 +48,72 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor size_t i = 0; - z_word_t next1 = Z_WORD_FROM_LE(~crc); - - z_word_t next2 = 0; - z_word_t next3 = 0; - z_word_t next4 = 0; - z_word_t next5 = 0; - z_word_t next6 = 0; - z_word_t next7 = 0; - z_word_t next8 = 0; - z_word_t next9 = 0; - z_word_t next10 = 0; - z_word_t next11 = 0; - z_word_t next12 = 0; - z_word_t next13 = 0; - z_word_t next14 = 0; - z_word_t next15 = 0; - z_word_t next16 = 0; - z_word_t next17 = 0; - z_word_t next18 = 0; - z_word_t next19 = 0; - z_word_t next20 = 0; - z_word_t next21 = 0; - z_word_t next22 = 0; + chorba_word_t next1 = CHORBA_WORD_FROM_LE(~crc); + + chorba_word_t next2 = 0; + chorba_word_t next3 = 0; + chorba_word_t next4 = 0; + chorba_word_t next5 = 0; + chorba_word_t next6 = 0; + chorba_word_t next7 = 0; + chorba_word_t next8 = 0; + chorba_word_t next9 = 0; + chorba_word_t next10 = 0; + chorba_word_t next11 = 0; + chorba_word_t next12 = 0; + chorba_word_t next13 = 0; + chorba_word_t next14 = 0; + chorba_word_t next15 = 0; + chorba_word_t next16 = 0; + chorba_word_t next17 = 0; + chorba_word_t next18 = 0; + chorba_word_t next19 = 0; + chorba_word_t next20 = 0; + chorba_word_t next21 = 0; + chorba_word_t next22 = 0; crc = 0; // do a first pass to zero out bitbuffer - for (; i < (14848 * sizeof(z_word_t)); i += (32 * sizeof(z_word_t))) { - z_word_t in1, in2, in3, in4, in5, in6, in7, in8; - z_word_t in9, in10, in11, in12, in13, in14, in15, in16; - z_word_t in17, in18, in19, in20, in21, in22, in23, in24; - z_word_t in25, in26, in27, in28, in29, in30, in31, in32; - int out_offset1 = ((i / sizeof(z_word_t)) + 14848) % bitbuffer_size_zwords; - int out_offset2 = ((i / sizeof(z_word_t)) + 14880) % bitbuffer_size_zwords; - - in1 = input[i / sizeof(z_word_t) + 0] ^ next1; - in2 = input[i / sizeof(z_word_t) + 1] ^ next2; - in3 = input[i / sizeof(z_word_t) + 2] ^ next3; - in4 = input[i / sizeof(z_word_t) + 3] ^ next4; - in5 = input[i / sizeof(z_word_t) + 4] ^ next5; - in6 = input[i / sizeof(z_word_t) + 5] ^ next6; - in7 = input[i / sizeof(z_word_t) + 6] ^ next7; - in8 = input[i / sizeof(z_word_t) + 7] ^ next8 ^ in1; - in9 = input[i / sizeof(z_word_t) + 8] ^ next9 ^ in2; - in10 = input[i / sizeof(z_word_t) + 9] ^ next10 ^ in3; - in11 = input[i / sizeof(z_word_t) + 10] ^ next11 ^ in4; - in12 = input[i / sizeof(z_word_t) + 11] ^ next12 ^ in1 ^ in5; - in13 = input[i / sizeof(z_word_t) + 12] ^ next13 ^ in2 ^ in6; - in14 = input[i / sizeof(z_word_t) + 13] ^ next14 ^ in3 ^ in7; - in15 = input[i / sizeof(z_word_t) + 14] ^ next15 ^ in4 ^ in8; - in16 = input[i / sizeof(z_word_t) + 15] ^ next16 ^ in5 ^ in9; - in17 = input[i / sizeof(z_word_t) + 16] ^ next17 ^ in6 ^ in10; - in18 = input[i / sizeof(z_word_t) + 17] ^ next18 ^ in7 ^ in11; - in19 = input[i / sizeof(z_word_t) + 18] ^ next19 ^ in8 ^ in12; - in20 = input[i / sizeof(z_word_t) + 19] ^ next20 ^ in9 ^ in13; - in21 = input[i / sizeof(z_word_t) + 20] ^ next21 ^ in10 ^ in14; - in22 = input[i / sizeof(z_word_t) + 21] ^ next22 ^ in11 ^ in15; - in23 = input[i / sizeof(z_word_t) + 22] ^ in1 ^ in12 ^ in16; - in24 = input[i / sizeof(z_word_t) + 23] ^ in2 ^ in13 ^ in17; - in25 = input[i / sizeof(z_word_t) + 24] ^ in3 ^ in14 ^ in18; - in26 = input[i / sizeof(z_word_t) + 25] ^ in4 ^ in15 ^ in19; - in27 = input[i / sizeof(z_word_t) + 26] ^ in5 ^ in16 ^ in20; - in28 = input[i / sizeof(z_word_t) + 27] ^ in6 ^ in17 ^ in21; - in29 = input[i / sizeof(z_word_t) + 28] ^ in7 ^ in18 ^ in22; - in30 = input[i / sizeof(z_word_t) + 29] ^ in8 ^ in19 ^ in23; - in31 = input[i / sizeof(z_word_t) + 30] ^ in9 ^ in20 ^ in24; - in32 = input[i / sizeof(z_word_t) + 31] ^ in10 ^ in21 ^ in25; + for (; i < (14848 * sizeof(chorba_word_t)); i += (32 * sizeof(chorba_word_t))) { + chorba_word_t in1, in2, in3, in4, in5, in6, in7, in8; + chorba_word_t in9, in10, in11, in12, in13, in14, in15, in16; + chorba_word_t in17, in18, in19, in20, in21, in22, in23, in24; + chorba_word_t in25, in26, in27, in28, in29, in30, in31, in32; + int out_offset1 = ((i / sizeof(chorba_word_t)) + 14848) % bitbuffer_size_zwords; + int out_offset2 = ((i / sizeof(chorba_word_t)) + 14880) % bitbuffer_size_zwords; + + in1 = input[i / sizeof(chorba_word_t) + 0] ^ next1; + in2 = input[i / sizeof(chorba_word_t) + 1] ^ next2; + in3 = input[i / sizeof(chorba_word_t) + 2] ^ next3; + in4 = input[i / sizeof(chorba_word_t) + 3] ^ next4; + in5 = input[i / sizeof(chorba_word_t) + 4] ^ next5; + in6 = input[i / sizeof(chorba_word_t) + 5] ^ next6; + in7 = input[i / sizeof(chorba_word_t) + 6] ^ next7; + in8 = input[i / sizeof(chorba_word_t) + 7] ^ next8 ^ in1; + in9 = input[i / sizeof(chorba_word_t) + 8] ^ next9 ^ in2; + in10 = input[i / sizeof(chorba_word_t) + 9] ^ next10 ^ in3; + in11 = input[i / sizeof(chorba_word_t) + 10] ^ next11 ^ in4; + in12 = input[i / sizeof(chorba_word_t) + 11] ^ next12 ^ in1 ^ in5; + in13 = input[i / sizeof(chorba_word_t) + 12] ^ next13 ^ in2 ^ in6; + in14 = input[i / sizeof(chorba_word_t) + 13] ^ next14 ^ in3 ^ in7; + in15 = input[i / sizeof(chorba_word_t) + 14] ^ next15 ^ in4 ^ in8; + in16 = input[i / sizeof(chorba_word_t) + 15] ^ next16 ^ in5 ^ in9; + in17 = input[i / sizeof(chorba_word_t) + 16] ^ next17 ^ in6 ^ in10; + in18 = input[i / sizeof(chorba_word_t) + 17] ^ next18 ^ in7 ^ in11; + in19 = input[i / sizeof(chorba_word_t) + 18] ^ next19 ^ in8 ^ in12; + in20 = input[i / sizeof(chorba_word_t) + 19] ^ next20 ^ in9 ^ in13; + in21 = input[i / sizeof(chorba_word_t) + 20] ^ next21 ^ in10 ^ in14; + in22 = input[i / sizeof(chorba_word_t) + 21] ^ next22 ^ in11 ^ in15; + in23 = input[i / sizeof(chorba_word_t) + 22] ^ in1 ^ in12 ^ in16; + in24 = input[i / sizeof(chorba_word_t) + 23] ^ in2 ^ in13 ^ in17; + in25 = input[i / sizeof(chorba_word_t) + 24] ^ in3 ^ in14 ^ in18; + in26 = input[i / sizeof(chorba_word_t) + 25] ^ in4 ^ in15 ^ in19; + in27 = input[i / sizeof(chorba_word_t) + 26] ^ in5 ^ in16 ^ in20; + in28 = input[i / sizeof(chorba_word_t) + 27] ^ in6 ^ in17 ^ in21; + in29 = input[i / sizeof(chorba_word_t) + 28] ^ in7 ^ in18 ^ in22; + in30 = input[i / sizeof(chorba_word_t) + 29] ^ in8 ^ in19 ^ in23; + in31 = input[i / sizeof(chorba_word_t) + 30] ^ in9 ^ in20 ^ in24; + in32 = input[i / sizeof(chorba_word_t) + 31] ^ in10 ^ in21 ^ in25; next1 = in11 ^ in22 ^ in26; next2 = in12 ^ in23 ^ in27; @@ -171,47 +173,47 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor } // one intermediate pass where we pull half the values - for (; i < (14880 * sizeof(z_word_t)); i += (32 * sizeof(z_word_t))) { - z_word_t in1, in2, in3, in4, in5, in6, in7, in8; - z_word_t in9, in10, in11, in12, in13, in14, in15, in16; - z_word_t in17, in18, in19, in20, in21, in22, in23, in24; - z_word_t in25, in26, in27, in28, in29, in30, in31, in32; - int in_offset = (i / sizeof(z_word_t)) % bitbuffer_size_zwords; - int out_offset1 = ((i / sizeof(z_word_t)) + 14848) % bitbuffer_size_zwords; - int out_offset2 = ((i / sizeof(z_word_t)) + 14880) % bitbuffer_size_zwords; - - in1 = input[i / sizeof(z_word_t) + 0] ^ next1; - in2 = input[i / sizeof(z_word_t) + 1] ^ next2; - in3 = input[i / sizeof(z_word_t) + 2] ^ next3; - in4 = input[i / sizeof(z_word_t) + 3] ^ next4; - in5 = input[i / sizeof(z_word_t) + 4] ^ next5; - in6 = input[i / sizeof(z_word_t) + 5] ^ next6; - in7 = input[i / sizeof(z_word_t) + 6] ^ next7; - in8 = input[i / sizeof(z_word_t) + 7] ^ next8 ^ in1; - in9 = input[i / sizeof(z_word_t) + 8] ^ next9 ^ in2; - in10 = input[i / sizeof(z_word_t) + 9] ^ next10 ^ in3; - in11 = input[i / sizeof(z_word_t) + 10] ^ next11 ^ in4; - in12 = input[i / sizeof(z_word_t) + 11] ^ next12 ^ in1 ^ in5; - in13 = input[i / sizeof(z_word_t) + 12] ^ next13 ^ in2 ^ in6; - in14 = input[i / sizeof(z_word_t) + 13] ^ next14 ^ in3 ^ in7; - in15 = input[i / sizeof(z_word_t) + 14] ^ next15 ^ in4 ^ in8; - in16 = input[i / sizeof(z_word_t) + 15] ^ next16 ^ in5 ^ in9; - in17 = input[i / sizeof(z_word_t) + 16] ^ next17 ^ in6 ^ in10; - in18 = input[i / sizeof(z_word_t) + 17] ^ next18 ^ in7 ^ in11; - in19 = input[i / sizeof(z_word_t) + 18] ^ next19 ^ in8 ^ in12; - in20 = input[i / sizeof(z_word_t) + 19] ^ next20 ^ in9 ^ in13; - in21 = input[i / sizeof(z_word_t) + 20] ^ next21 ^ in10 ^ in14; - in22 = input[i / sizeof(z_word_t) + 21] ^ next22 ^ in11 ^ in15; - in23 = input[i / sizeof(z_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[in_offset + 22]; - in24 = input[i / sizeof(z_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[in_offset + 23]; - in25 = input[i / sizeof(z_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[in_offset + 24]; - in26 = input[i / sizeof(z_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[in_offset + 25]; - in27 = input[i / sizeof(z_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[in_offset + 26]; - in28 = input[i / sizeof(z_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[in_offset + 27]; - in29 = input[i / sizeof(z_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[in_offset + 28]; - in30 = input[i / sizeof(z_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[in_offset + 29]; - in31 = input[i / sizeof(z_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[in_offset + 30]; - in32 = input[i / sizeof(z_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[in_offset + 31]; + for (; i < (14880 * sizeof(chorba_word_t)); i += (32 * sizeof(chorba_word_t))) { + chorba_word_t in1, in2, in3, in4, in5, in6, in7, in8; + chorba_word_t in9, in10, in11, in12, in13, in14, in15, in16; + chorba_word_t in17, in18, in19, in20, in21, in22, in23, in24; + chorba_word_t in25, in26, in27, in28, in29, in30, in31, in32; + int in_offset = (i / sizeof(chorba_word_t)) % bitbuffer_size_zwords; + int out_offset1 = ((i / sizeof(chorba_word_t)) + 14848) % bitbuffer_size_zwords; + int out_offset2 = ((i / sizeof(chorba_word_t)) + 14880) % bitbuffer_size_zwords; + + in1 = input[i / sizeof(chorba_word_t) + 0] ^ next1; + in2 = input[i / sizeof(chorba_word_t) + 1] ^ next2; + in3 = input[i / sizeof(chorba_word_t) + 2] ^ next3; + in4 = input[i / sizeof(chorba_word_t) + 3] ^ next4; + in5 = input[i / sizeof(chorba_word_t) + 4] ^ next5; + in6 = input[i / sizeof(chorba_word_t) + 5] ^ next6; + in7 = input[i / sizeof(chorba_word_t) + 6] ^ next7; + in8 = input[i / sizeof(chorba_word_t) + 7] ^ next8 ^ in1; + in9 = input[i / sizeof(chorba_word_t) + 8] ^ next9 ^ in2; + in10 = input[i / sizeof(chorba_word_t) + 9] ^ next10 ^ in3; + in11 = input[i / sizeof(chorba_word_t) + 10] ^ next11 ^ in4; + in12 = input[i / sizeof(chorba_word_t) + 11] ^ next12 ^ in1 ^ in5; + in13 = input[i / sizeof(chorba_word_t) + 12] ^ next13 ^ in2 ^ in6; + in14 = input[i / sizeof(chorba_word_t) + 13] ^ next14 ^ in3 ^ in7; + in15 = input[i / sizeof(chorba_word_t) + 14] ^ next15 ^ in4 ^ in8; + in16 = input[i / sizeof(chorba_word_t) + 15] ^ next16 ^ in5 ^ in9; + in17 = input[i / sizeof(chorba_word_t) + 16] ^ next17 ^ in6 ^ in10; + in18 = input[i / sizeof(chorba_word_t) + 17] ^ next18 ^ in7 ^ in11; + in19 = input[i / sizeof(chorba_word_t) + 18] ^ next19 ^ in8 ^ in12; + in20 = input[i / sizeof(chorba_word_t) + 19] ^ next20 ^ in9 ^ in13; + in21 = input[i / sizeof(chorba_word_t) + 20] ^ next21 ^ in10 ^ in14; + in22 = input[i / sizeof(chorba_word_t) + 21] ^ next22 ^ in11 ^ in15; + in23 = input[i / sizeof(chorba_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[in_offset + 22]; + in24 = input[i / sizeof(chorba_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[in_offset + 23]; + in25 = input[i / sizeof(chorba_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[in_offset + 24]; + in26 = input[i / sizeof(chorba_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[in_offset + 25]; + in27 = input[i / sizeof(chorba_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[in_offset + 26]; + in28 = input[i / sizeof(chorba_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[in_offset + 27]; + in29 = input[i / sizeof(chorba_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[in_offset + 28]; + in30 = input[i / sizeof(chorba_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[in_offset + 29]; + in31 = input[i / sizeof(chorba_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[in_offset + 30]; + in32 = input[i / sizeof(chorba_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[in_offset + 31]; next1 = in11 ^ in22 ^ in26; next2 = in12 ^ in23 ^ in27; @@ -270,47 +272,47 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor bitbuffer[out_offset2 + 21] = in32; } - for (; (i + (14870 + 64) * sizeof(z_word_t)) < len; i += (32 * sizeof(z_word_t))) { - z_word_t in1, in2, in3, in4, in5, in6, in7, in8; - z_word_t in9, in10, in11, in12, in13, in14, in15, in16; - z_word_t in17, in18, in19, in20, in21, in22, in23, in24; - z_word_t in25, in26, in27, in28, in29, in30, in31, in32; - int in_offset = (i / sizeof(z_word_t)) % bitbuffer_size_zwords; - int out_offset1 = ((i / sizeof(z_word_t)) + 14848) % bitbuffer_size_zwords; - int out_offset2 = ((i / sizeof(z_word_t)) + 14880) % bitbuffer_size_zwords; - - in1 = input[i / sizeof(z_word_t) + 0] ^ next1 ^ bitbuffer[in_offset + 0]; - in2 = input[i / sizeof(z_word_t) + 1] ^ next2 ^ bitbuffer[in_offset + 1]; - in3 = input[i / sizeof(z_word_t) + 2] ^ next3 ^ bitbuffer[in_offset + 2]; - in4 = input[i / sizeof(z_word_t) + 3] ^ next4 ^ bitbuffer[in_offset + 3]; - in5 = input[i / sizeof(z_word_t) + 4] ^ next5 ^ bitbuffer[in_offset + 4]; - in6 = input[i / sizeof(z_word_t) + 5] ^ next6 ^ bitbuffer[in_offset + 5]; - in7 = input[i / sizeof(z_word_t) + 6] ^ next7 ^ bitbuffer[in_offset + 6]; - in8 = input[i / sizeof(z_word_t) + 7] ^ next8 ^ in1 ^ bitbuffer[in_offset + 7]; - in9 = input[i / sizeof(z_word_t) + 8] ^ next9 ^ in2 ^ bitbuffer[in_offset + 8]; - in10 = input[i / sizeof(z_word_t) + 9] ^ next10 ^ in3 ^ bitbuffer[in_offset + 9]; - in11 = input[i / sizeof(z_word_t) + 10] ^ next11 ^ in4 ^ bitbuffer[in_offset + 10]; - in12 = input[i / sizeof(z_word_t) + 11] ^ next12 ^ in1 ^ in5 ^ bitbuffer[in_offset + 11]; - in13 = input[i / sizeof(z_word_t) + 12] ^ next13 ^ in2 ^ in6 ^ bitbuffer[in_offset + 12]; - in14 = input[i / sizeof(z_word_t) + 13] ^ next14 ^ in3 ^ in7 ^ bitbuffer[in_offset + 13]; - in15 = input[i / sizeof(z_word_t) + 14] ^ next15 ^ in4 ^ in8 ^ bitbuffer[in_offset + 14]; - in16 = input[i / sizeof(z_word_t) + 15] ^ next16 ^ in5 ^ in9 ^ bitbuffer[in_offset + 15]; - in17 = input[i / sizeof(z_word_t) + 16] ^ next17 ^ in6 ^ in10 ^ bitbuffer[in_offset + 16]; - in18 = input[i / sizeof(z_word_t) + 17] ^ next18 ^ in7 ^ in11 ^ bitbuffer[in_offset + 17]; - in19 = input[i / sizeof(z_word_t) + 18] ^ next19 ^ in8 ^ in12 ^ bitbuffer[in_offset + 18]; - in20 = input[i / sizeof(z_word_t) + 19] ^ next20 ^ in9 ^ in13 ^ bitbuffer[in_offset + 19]; - in21 = input[i / sizeof(z_word_t) + 20] ^ next21 ^ in10 ^ in14 ^ bitbuffer[in_offset + 20]; - in22 = input[i / sizeof(z_word_t) + 21] ^ next22 ^ in11 ^ in15 ^ bitbuffer[in_offset + 21]; - in23 = input[i / sizeof(z_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[in_offset + 22]; - in24 = input[i / sizeof(z_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[in_offset + 23]; - in25 = input[i / sizeof(z_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[in_offset + 24]; - in26 = input[i / sizeof(z_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[in_offset + 25]; - in27 = input[i / sizeof(z_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[in_offset + 26]; - in28 = input[i / sizeof(z_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[in_offset + 27]; - in29 = input[i / sizeof(z_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[in_offset + 28]; - in30 = input[i / sizeof(z_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[in_offset + 29]; - in31 = input[i / sizeof(z_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[in_offset + 30]; - in32 = input[i / sizeof(z_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[in_offset + 31]; + for (; (i + (14870 + 64) * sizeof(chorba_word_t)) < len; i += (32 * sizeof(chorba_word_t))) { + chorba_word_t in1, in2, in3, in4, in5, in6, in7, in8; + chorba_word_t in9, in10, in11, in12, in13, in14, in15, in16; + chorba_word_t in17, in18, in19, in20, in21, in22, in23, in24; + chorba_word_t in25, in26, in27, in28, in29, in30, in31, in32; + int in_offset = (i / sizeof(chorba_word_t)) % bitbuffer_size_zwords; + int out_offset1 = ((i / sizeof(chorba_word_t)) + 14848) % bitbuffer_size_zwords; + int out_offset2 = ((i / sizeof(chorba_word_t)) + 14880) % bitbuffer_size_zwords; + + in1 = input[i / sizeof(chorba_word_t) + 0] ^ next1 ^ bitbuffer[in_offset + 0]; + in2 = input[i / sizeof(chorba_word_t) + 1] ^ next2 ^ bitbuffer[in_offset + 1]; + in3 = input[i / sizeof(chorba_word_t) + 2] ^ next3 ^ bitbuffer[in_offset + 2]; + in4 = input[i / sizeof(chorba_word_t) + 3] ^ next4 ^ bitbuffer[in_offset + 3]; + in5 = input[i / sizeof(chorba_word_t) + 4] ^ next5 ^ bitbuffer[in_offset + 4]; + in6 = input[i / sizeof(chorba_word_t) + 5] ^ next6 ^ bitbuffer[in_offset + 5]; + in7 = input[i / sizeof(chorba_word_t) + 6] ^ next7 ^ bitbuffer[in_offset + 6]; + in8 = input[i / sizeof(chorba_word_t) + 7] ^ next8 ^ in1 ^ bitbuffer[in_offset + 7]; + in9 = input[i / sizeof(chorba_word_t) + 8] ^ next9 ^ in2 ^ bitbuffer[in_offset + 8]; + in10 = input[i / sizeof(chorba_word_t) + 9] ^ next10 ^ in3 ^ bitbuffer[in_offset + 9]; + in11 = input[i / sizeof(chorba_word_t) + 10] ^ next11 ^ in4 ^ bitbuffer[in_offset + 10]; + in12 = input[i / sizeof(chorba_word_t) + 11] ^ next12 ^ in1 ^ in5 ^ bitbuffer[in_offset + 11]; + in13 = input[i / sizeof(chorba_word_t) + 12] ^ next13 ^ in2 ^ in6 ^ bitbuffer[in_offset + 12]; + in14 = input[i / sizeof(chorba_word_t) + 13] ^ next14 ^ in3 ^ in7 ^ bitbuffer[in_offset + 13]; + in15 = input[i / sizeof(chorba_word_t) + 14] ^ next15 ^ in4 ^ in8 ^ bitbuffer[in_offset + 14]; + in16 = input[i / sizeof(chorba_word_t) + 15] ^ next16 ^ in5 ^ in9 ^ bitbuffer[in_offset + 15]; + in17 = input[i / sizeof(chorba_word_t) + 16] ^ next17 ^ in6 ^ in10 ^ bitbuffer[in_offset + 16]; + in18 = input[i / sizeof(chorba_word_t) + 17] ^ next18 ^ in7 ^ in11 ^ bitbuffer[in_offset + 17]; + in19 = input[i / sizeof(chorba_word_t) + 18] ^ next19 ^ in8 ^ in12 ^ bitbuffer[in_offset + 18]; + in20 = input[i / sizeof(chorba_word_t) + 19] ^ next20 ^ in9 ^ in13 ^ bitbuffer[in_offset + 19]; + in21 = input[i / sizeof(chorba_word_t) + 20] ^ next21 ^ in10 ^ in14 ^ bitbuffer[in_offset + 20]; + in22 = input[i / sizeof(chorba_word_t) + 21] ^ next22 ^ in11 ^ in15 ^ bitbuffer[in_offset + 21]; + in23 = input[i / sizeof(chorba_word_t) + 22] ^ in1 ^ in12 ^ in16 ^ bitbuffer[in_offset + 22]; + in24 = input[i / sizeof(chorba_word_t) + 23] ^ in2 ^ in13 ^ in17 ^ bitbuffer[in_offset + 23]; + in25 = input[i / sizeof(chorba_word_t) + 24] ^ in3 ^ in14 ^ in18 ^ bitbuffer[in_offset + 24]; + in26 = input[i / sizeof(chorba_word_t) + 25] ^ in4 ^ in15 ^ in19 ^ bitbuffer[in_offset + 25]; + in27 = input[i / sizeof(chorba_word_t) + 26] ^ in5 ^ in16 ^ in20 ^ bitbuffer[in_offset + 26]; + in28 = input[i / sizeof(chorba_word_t) + 27] ^ in6 ^ in17 ^ in21 ^ bitbuffer[in_offset + 27]; + in29 = input[i / sizeof(chorba_word_t) + 28] ^ in7 ^ in18 ^ in22 ^ bitbuffer[in_offset + 28]; + in30 = input[i / sizeof(chorba_word_t) + 29] ^ in8 ^ in19 ^ in23 ^ bitbuffer[in_offset + 29]; + in31 = input[i / sizeof(chorba_word_t) + 30] ^ in9 ^ in20 ^ in24 ^ bitbuffer[in_offset + 30]; + in32 = input[i / sizeof(chorba_word_t) + 31] ^ in10 ^ in21 ^ in25 ^ bitbuffer[in_offset + 31]; next1 = in11 ^ in22 ^ in26; next2 = in12 ^ in23 ^ in27; @@ -369,31 +371,31 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor bitbuffer[out_offset2 + 21] = in32; } - bitbuffer[(i / sizeof(z_word_t) + 0) % bitbuffer_size_zwords] ^= next1; - bitbuffer[(i / sizeof(z_word_t) + 1) % bitbuffer_size_zwords] ^= next2; - bitbuffer[(i / sizeof(z_word_t) + 2) % bitbuffer_size_zwords] ^= next3; - bitbuffer[(i / sizeof(z_word_t) + 3) % bitbuffer_size_zwords] ^= next4; - bitbuffer[(i / sizeof(z_word_t) + 4) % bitbuffer_size_zwords] ^= next5; - bitbuffer[(i / sizeof(z_word_t) + 5) % bitbuffer_size_zwords] ^= next6; - bitbuffer[(i / sizeof(z_word_t) + 6) % bitbuffer_size_zwords] ^= next7; - bitbuffer[(i / sizeof(z_word_t) + 7) % bitbuffer_size_zwords] ^= next8; - bitbuffer[(i / sizeof(z_word_t) + 8) % bitbuffer_size_zwords] ^= next9; - bitbuffer[(i / sizeof(z_word_t) + 9) % bitbuffer_size_zwords] ^= next10; - bitbuffer[(i / sizeof(z_word_t) + 10) % bitbuffer_size_zwords] ^= next11; - bitbuffer[(i / sizeof(z_word_t) + 11) % bitbuffer_size_zwords] ^= next12; - bitbuffer[(i / sizeof(z_word_t) + 12) % bitbuffer_size_zwords] ^= next13; - bitbuffer[(i / sizeof(z_word_t) + 13) % bitbuffer_size_zwords] ^= next14; - bitbuffer[(i / sizeof(z_word_t) + 14) % bitbuffer_size_zwords] ^= next15; - bitbuffer[(i / sizeof(z_word_t) + 15) % bitbuffer_size_zwords] ^= next16; - bitbuffer[(i / sizeof(z_word_t) + 16) % bitbuffer_size_zwords] ^= next17; - bitbuffer[(i / sizeof(z_word_t) + 17) % bitbuffer_size_zwords] ^= next18; - bitbuffer[(i / sizeof(z_word_t) + 18) % bitbuffer_size_zwords] ^= next19; - bitbuffer[(i / sizeof(z_word_t) + 19) % bitbuffer_size_zwords] ^= next20; - bitbuffer[(i / sizeof(z_word_t) + 20) % bitbuffer_size_zwords] ^= next21; - bitbuffer[(i / sizeof(z_word_t) + 21) % bitbuffer_size_zwords] ^= next22; + bitbuffer[(i / sizeof(chorba_word_t) + 0) % bitbuffer_size_zwords] ^= next1; + bitbuffer[(i / sizeof(chorba_word_t) + 1) % bitbuffer_size_zwords] ^= next2; + bitbuffer[(i / sizeof(chorba_word_t) + 2) % bitbuffer_size_zwords] ^= next3; + bitbuffer[(i / sizeof(chorba_word_t) + 3) % bitbuffer_size_zwords] ^= next4; + bitbuffer[(i / sizeof(chorba_word_t) + 4) % bitbuffer_size_zwords] ^= next5; + bitbuffer[(i / sizeof(chorba_word_t) + 5) % bitbuffer_size_zwords] ^= next6; + bitbuffer[(i / sizeof(chorba_word_t) + 6) % bitbuffer_size_zwords] ^= next7; + bitbuffer[(i / sizeof(chorba_word_t) + 7) % bitbuffer_size_zwords] ^= next8; + bitbuffer[(i / sizeof(chorba_word_t) + 8) % bitbuffer_size_zwords] ^= next9; + bitbuffer[(i / sizeof(chorba_word_t) + 9) % bitbuffer_size_zwords] ^= next10; + bitbuffer[(i / sizeof(chorba_word_t) + 10) % bitbuffer_size_zwords] ^= next11; + bitbuffer[(i / sizeof(chorba_word_t) + 11) % bitbuffer_size_zwords] ^= next12; + bitbuffer[(i / sizeof(chorba_word_t) + 12) % bitbuffer_size_zwords] ^= next13; + bitbuffer[(i / sizeof(chorba_word_t) + 13) % bitbuffer_size_zwords] ^= next14; + bitbuffer[(i / sizeof(chorba_word_t) + 14) % bitbuffer_size_zwords] ^= next15; + bitbuffer[(i / sizeof(chorba_word_t) + 15) % bitbuffer_size_zwords] ^= next16; + bitbuffer[(i / sizeof(chorba_word_t) + 16) % bitbuffer_size_zwords] ^= next17; + bitbuffer[(i / sizeof(chorba_word_t) + 17) % bitbuffer_size_zwords] ^= next18; + bitbuffer[(i / sizeof(chorba_word_t) + 18) % bitbuffer_size_zwords] ^= next19; + bitbuffer[(i / sizeof(chorba_word_t) + 19) % bitbuffer_size_zwords] ^= next20; + bitbuffer[(i / sizeof(chorba_word_t) + 20) % bitbuffer_size_zwords] ^= next21; + bitbuffer[(i / sizeof(chorba_word_t) + 21) % bitbuffer_size_zwords] ^= next22; for (int j = 14870; j < 14870 + 64; j++) { - bitbuffer[(j + (i / sizeof(z_word_t))) % bitbuffer_size_zwords] = 0; + bitbuffer[(j + (i / sizeof(chorba_word_t))) % bitbuffer_size_zwords] = 0; } uint64_t next1_64 = 0; @@ -482,7 +484,7 @@ Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive(uint32_t crc, const z_wor return ~crc; } -# if OPTIMAL_CMP == 64 +# if CHORBA_W == 8 /* Implement Chorba algorithm from https://arxiv.org/abs/2412.16398 */ Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive(uint32_t crc, const uint64_t* input, size_t len) { uint64_t bitbuffer[32768 / sizeof(uint64_t)]; @@ -570,8 +572,8 @@ Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive(uint32_t crc, const uint64 uint64_t out4; uint64_t out5; - in1 = input[i / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t))]; - in2 = input[(i + 8) / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 1)]; + in1 = input[i / sizeof(chorba_word_t)] ^ bitbuffer[(i / sizeof(uint64_t))]; + in2 = input[(i + 8) / sizeof(chorba_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 1)]; in1 = Z_U64_FROM_LE(in1) ^ next1_64; in2 = Z_U64_FROM_LE(in2) ^ next2_64; @@ -585,8 +587,8 @@ Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive(uint32_t crc, const uint64 b3 = (in2 >> 45) ^ (in2 << 44); b4 = (in2 >> 20); - in3 = input[(i + 16) / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 2)]; - in4 = input[(i + 24) / sizeof(z_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 3)]; + in3 = input[(i + 16) / sizeof(chorba_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 2)]; + in4 = input[(i + 24) / sizeof(chorba_word_t)] ^ bitbuffer[(i / sizeof(uint64_t) + 3)]; in3 = Z_U64_FROM_LE(in3) ^ next3_64 ^ a1; in4 = Z_U64_FROM_LE(in4) ^ next4_64 ^ a2 ^ b1; @@ -1062,7 +1064,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive(uint32_t crc, const uint64 return crc32_braid(~crc, (uint8_t*)final, len-i); } -#else // OPTIMAL_CMP == 64 +#else // CHORBA_W == 8 Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit(uint32_t crc, const uint32_t *input, size_t len) { uint32_t final[20] = {0}; @@ -1235,7 +1237,7 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit(uint32_t crc, const return crc32_braid(~crc, (uint8_t*)final, len-i); } -#endif // OPTIMAL_CMP == 64 +#endif // CHORBA_W == 8 Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) { uintptr_t align_diff = ALIGN_DIFF(buf, 8); @@ -1248,8 +1250,8 @@ Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) { buf += align_diff; } if (len > CHORBA_LARGE_THRESHOLD) - return crc32_chorba_118960_nondestructive(crc, (const z_word_t*)buf, len); -#if OPTIMAL_CMP == 64 + return crc32_chorba_118960_nondestructive(crc, (const chorba_word_t*)buf, len); +#if CHORBA_W == 8 if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) return crc32_chorba_32768_nondestructive(crc, (const uint64_t*)buf, len); return crc32_chorba_small_nondestructive(crc, (const uint64_t*)buf, len); diff --git a/arch/generic/generic_functions.h b/arch/generic/generic_functions.h index 1b296b8f92..c150a2f010 100644 --- a/arch/generic/generic_functions.h +++ b/arch/generic/generic_functions.h @@ -7,7 +7,6 @@ #include "zendian.h" #include "deflate.h" -#include "crc32_braid_p.h" typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len); typedef uint32_t (*adler32_copy_func)(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); @@ -33,10 +32,6 @@ uint32_t crc32_copy_braid(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t #ifndef WITHOUT_CHORBA uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len); - uint32_t crc32_chorba_118960_nondestructive (uint32_t crc, const z_word_t* input, size_t len); - uint32_t crc32_chorba_32768_nondestructive (uint32_t crc, const uint64_t* input, size_t len); - uint32_t crc32_chorba_small_nondestructive (uint32_t crc, const uint64_t* input, size_t len); - uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const uint32_t* input, size_t len); uint32_t crc32_copy_chorba(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); #endif diff --git a/arch/loongarch/crc32_la.c b/arch/loongarch/crc32_la.c index 55a694bae6..f1bd314e65 100644 --- a/arch/loongarch/crc32_la.c +++ b/arch/loongarch/crc32_la.c @@ -6,7 +6,6 @@ #ifdef LOONGARCH_CRC #include "zbuild.h" -#include "crc32.h" #include <larchintrin.h> diff --git a/arch/x86/crc32_chorba_sse2.c b/arch/x86/crc32_chorba_sse2.c index 4e94ca151a..8ecd74443e 100644 --- a/arch/x86/crc32_chorba_sse2.c +++ b/arch/x86/crc32_chorba_sse2.c @@ -1,9 +1,9 @@ #if defined(X86_SSE2) && !defined(WITHOUT_CHORBA_SSE) #include "zbuild.h" +#include "crc32_chorba_p.h" #include "crc32_braid_p.h" #include "crc32_braid_tbl.h" -#include "crc32.h" #include <emmintrin.h> #include "arch/x86/x86_intrins.h" #include "arch_functions.h" @@ -857,7 +857,7 @@ Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t l } #if !defined(WITHOUT_CHORBA) if (len > CHORBA_LARGE_THRESHOLD) - return crc32_chorba_118960_nondestructive(crc, (const z_word_t*)buf, len); + return crc32_chorba_118960_nondestructive(crc, (const chorba_word_t*)buf, len); #endif return chorba_small_nondestructive_sse2(crc, (const uint64_t*)buf, len); } diff --git a/arch/x86/crc32_chorba_sse41.c b/arch/x86/crc32_chorba_sse41.c index 96ba00ff3b..4e750cbd8d 100644 --- a/arch/x86/crc32_chorba_sse41.c +++ b/arch/x86/crc32_chorba_sse41.c @@ -1,9 +1,9 @@ #if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE) #include "zbuild.h" +#include "crc32_chorba_p.h" #include "crc32_braid_p.h" #include "crc32_braid_tbl.h" -#include "crc32.h" #include <emmintrin.h> #include <smmintrin.h> #include "arch/x86/x86_intrins.h" @@ -315,7 +315,7 @@ Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t } #if !defined(WITHOUT_CHORBA) if (len > CHORBA_LARGE_THRESHOLD) - return crc32_chorba_118960_nondestructive(crc, (z_word_t*)buf, len); + return crc32_chorba_118960_nondestructive(crc, (chorba_word_t*)buf, len); #endif if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) return crc32_chorba_32768_nondestructive_sse41(crc, (const uint64_t*)buf, len); diff --git a/arch/x86/crc32_pclmulqdq_tpl.h b/arch/x86/crc32_pclmulqdq_tpl.h index b7ed17f809..8677f1e872 100644 --- a/arch/x86/crc32_pclmulqdq_tpl.h +++ b/arch/x86/crc32_pclmulqdq_tpl.h @@ -23,7 +23,6 @@ #include <wmmintrin.h> #include <smmintrin.h> // _mm_extract_epi32 -#include "crc32.h" #include "crc32_braid_p.h" #include "crc32_braid_tbl.h" #include "crc32_p.h" diff --git a/arch_functions.h b/arch_functions.h index a53b2f7b43..979c968624 100644 --- a/arch_functions.h +++ b/arch_functions.h @@ -8,7 +8,6 @@ #include "zbuild.h" #include "zutil.h" -#include "crc32.h" #include "deflate.h" #include "fallback_builtins.h" diff --git a/crc32.h b/crc32.h deleted file mode 100644 index e205777598..0000000000 --- a/crc32.h +++ /dev/null @@ -1,19 +0,0 @@ -/* crc32.h -- crc32 folding interface - * Copyright (C) 2021 Nathan Moinvaziri - * For conditions of distribution and use, see copyright notice in zlib.h - */ -#ifndef CRC32_H_ -#define CRC32_H_ - -/* Size thresholds for Chorba algorithm variants */ -#define CHORBA_LARGE_THRESHOLD (sizeof(z_word_t) * 64 * 1024) -#define CHORBA_MEDIUM_UPPER_THRESHOLD 32768 -#define CHORBA_MEDIUM_LOWER_THRESHOLD 8192 -#define CHORBA_SMALL_THRESHOLD_64BIT 72 -#if OPTIMAL_CMP == 64 -# define CHORBA_SMALL_THRESHOLD 72 -#else -# define CHORBA_SMALL_THRESHOLD 80 -#endif - -#endif diff --git a/crc32_braid_p.h b/crc32_braid_p.h index af26ebedda..624e22ecd9 100644 --- a/crc32_braid_p.h +++ b/crc32_braid_p.h @@ -8,6 +8,9 @@ /* Define BRAID_W and the associated z_word_t type. If BRAID_W is not defined, then a braided calculation is not used, and the associated tables and code are not compiled. + + TODO: According to crc32_braid_c.c, BRAID_N=5, BRAID_W=4 is fastest with Sparc64-VII, + PowerPC POWER9, and MIPS64 Octeon II processors. */ #ifdef ARCH_64BIT # define BRAID_W 8 diff --git a/crc32_chorba_p.h b/crc32_chorba_p.h new file mode 100644 index 0000000000..f599e707b0 --- /dev/null +++ b/crc32_chorba_p.h @@ -0,0 +1,34 @@ +/* crc32_chorba_p.h -- crc32 chorba interface + * Copyright (C) 2021 Nathan Moinvaziri + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef CRC32_CHORBA_P_H_ +#define CRC32_CHORBA_P_H_ + +#include "zendian.h" + +/* Size thresholds for Chorba algorithm variants */ + +#define CHORBA_LARGE_THRESHOLD (sizeof(chorba_word_t) * 64 * 1024) +#define CHORBA_MEDIUM_UPPER_THRESHOLD 32768 +#define CHORBA_MEDIUM_LOWER_THRESHOLD 8192 +#define CHORBA_SMALL_THRESHOLD_64BIT 72 +#ifdef ARCH_64BIT +# define CHORBA_SMALL_THRESHOLD 72 +# define CHORBA_W 8 +# define CHORBA_WORD_FROM_LE(word) Z_U64_FROM_LE(word) + typedef uint64_t chorba_word_t; +#else +# define CHORBA_SMALL_THRESHOLD 80 +# define CHORBA_W 4 +# define CHORBA_WORD_FROM_LE(word) Z_U32_FROM_LE(word) + typedef uint32_t chorba_word_t; +#endif + +Z_INTERNAL uint32_t crc32_chorba_118960_nondestructive (uint32_t crc, const chorba_word_t* input, size_t len); +Z_INTERNAL uint32_t crc32_chorba_32768_nondestructive (uint32_t crc, const uint64_t* input, size_t len); +Z_INTERNAL uint32_t crc32_chorba_small_nondestructive (uint32_t crc, const uint64_t* input, size_t len); +Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const uint32_t* input, size_t len); + +#endif /* CRC32_CHORBA_P_H_ */ @@ -13,7 +13,6 @@ #include "zutil.h" #include "zendian.h" #include "zmemory.h" -#include "crc32.h" #ifdef S390_DFLTCC_DEFLATE # include "arch/s390/dfltcc_common.h" diff --git a/functable.h b/functable.h index bb33fdb869..95e05d5ed7 100644 --- a/functable.h +++ b/functable.h @@ -7,7 +7,6 @@ #define FUNCTABLE_H_ #include "deflate.h" -#include "crc32.h" #ifdef DISABLE_RUNTIME_CPU_DETECTION @@ -11,8 +11,6 @@ #ifndef INFLATE_H_ #define INFLATE_H_ -#include "crc32.h" - #ifdef S390_DFLTCC_INFLATE # include "arch/s390/dfltcc_common.h" # define HAVE_ARCH_INFLATE_STATE |
