diff options
| author | Hans Kristian Rosbach <hk-git@circlestorm.org> | 2025-11-11 22:47:52 +0100 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2025-11-15 00:46:38 +0100 |
| commit | a72d5f249b31d80990a0b687bd7a822301205f0c (patch) | |
| tree | 2f3036c61c67f78a2fca3c362a05acdb8e88dcc0 | |
| parent | 8003f57828f7310aaa035519bfa17c93b5621977 (diff) | |
| download | Project-Tick-a72d5f249b31d80990a0b687bd7a822301205f0c.tar.gz Project-Tick-a72d5f249b31d80990a0b687bd7a822301205f0c.zip | |
- Unify crc32_chorba, chorba_sse2 and chorba_sse41 dispatch functions.
- Fixed alignment diff calculation in crc32_chorba.
- Fixed length check to happen early, avoiding extra branches for too short lengths,
this also allows removing one function call to crc32_braid_internal to handle those.
Gbench shows ~0.15-0.25ns saved per call for lengths shorter than CHORBA_SMALL_THRESHOLD.
- Avoid calculating aligned len if buffer is already aligned
| -rw-r--r-- | arch/generic/crc32_chorba_c.c | 33 | ||||
| -rw-r--r-- | arch/x86/chorba_sse2.c | 24 | ||||
| -rw-r--r-- | arch/x86/chorba_sse41.c | 27 | ||||
| -rw-r--r-- | crc32.h | 6 |
4 files changed, 42 insertions, 48 deletions
diff --git a/arch/generic/crc32_chorba_c.c b/arch/generic/crc32_chorba_c.c index 4041abd46e..6f90d3c09f 100644 --- a/arch/generic/crc32_chorba_c.c +++ b/arch/generic/crc32_chorba_c.c @@ -1448,32 +1448,31 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const #endif // OPTIMAL_CMP == 64 Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) { + uint64_t* aligned_buf; uint32_t c = (~crc) & 0xffffffff; + uintptr_t algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 7)) & 7; - uint64_t* aligned_buf; - size_t aligned_len; - unsigned long algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 0xF)) & 0xF; - if (algn_diff < len) { + if (len > algn_diff + CHORBA_SMALL_THRESHOLD) { if (algn_diff) { c = crc32_braid_internal(c, buf, algn_diff); + len -= algn_diff; } aligned_buf = (uint64_t*) (buf + algn_diff); - aligned_len = len - algn_diff; - if(aligned_len > CHORBA_LARGE_THRESHOLD) - c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len); + if(len > CHORBA_LARGE_THRESHOLD) { + c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len); +# if OPTIMAL_CMP == 64 + } else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) { + c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, len); +# endif + } else { # if OPTIMAL_CMP == 64 - else if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD && aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD) - c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, aligned_len); - else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) - c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, aligned_len); + c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, len); # else - else if (aligned_len > CHORBA_SMALL_THRESHOLD_32BIT) - c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, aligned_len); + c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, len); # endif - else - c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len); - } - else { + } + } else { + // Process too short lengths using crc32_braid c = crc32_braid_internal(c, buf, len); } diff --git a/arch/x86/chorba_sse2.c b/arch/x86/chorba_sse2.c index 3e25d7586b..f79a5ac00d 100644 --- a/arch/x86/chorba_sse2.c +++ b/arch/x86/chorba_sse2.c @@ -847,30 +847,26 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_ } Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len) { - uint32_t c; uint64_t* aligned_buf; - size_t aligned_len; + uint32_t c = (~crc) & 0xffffffff; + uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15; - c = (~crc) & 0xffffffff; - unsigned long algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15; - if (algn_diff < len) { + if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) { if (algn_diff) { c = crc32_braid_internal(c, buf, algn_diff); + len -= algn_diff; } aligned_buf = (uint64_t*) (buf + algn_diff); - aligned_len = len - algn_diff; #if !defined(WITHOUT_CHORBA) - if(aligned_len > CHORBA_LARGE_THRESHOLD) { - c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len); + if(len > CHORBA_LARGE_THRESHOLD) { + c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len); } else #endif - if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) { - c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len); - } else { - c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len); + { + c = chorba_small_nondestructive_sse2(c, aligned_buf, len); } - } - else { + } else { + // Process too short lengths using crc32_braid c = crc32_braid_internal(c, buf, len); } diff --git a/arch/x86/chorba_sse41.c b/arch/x86/chorba_sse41.c index aebede45e2..a7568a2800 100644 --- a/arch/x86/chorba_sse41.c +++ b/arch/x86/chorba_sse41.c @@ -305,33 +305,28 @@ static Z_FORCEINLINE uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t c } Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len) { - uint32_t c; uint64_t* aligned_buf; - size_t aligned_len; - - c = (~crc) & 0xffffffff; + uint32_t c = (~crc) & 0xffffffff; uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15; - if (algn_diff < len) { + + if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) { if (algn_diff) { c = crc32_braid_internal(c, buf, algn_diff); + len -= algn_diff; } aligned_buf = (uint64_t*) (buf + algn_diff); - aligned_len = len - algn_diff; #if !defined(WITHOUT_CHORBA) - if(aligned_len > CHORBA_LARGE_THRESHOLD) { - c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len); + if(len > CHORBA_LARGE_THRESHOLD) { + c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len); } else #endif - if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD && - aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD) { - c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, aligned_len); - } else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) { - c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len); + if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) { + c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, len); } else { - c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len); + c = chorba_small_nondestructive_sse2(c, aligned_buf, len); } - } - else { + } else { + // Process too short lengths using crc32_braid c = crc32_braid_internal(c, buf, len); } @@ -13,7 +13,11 @@ #define CHORBA_MEDIUM_UPPER_THRESHOLD 32768 #define CHORBA_MEDIUM_LOWER_THRESHOLD 8192 #define CHORBA_SMALL_THRESHOLD_64BIT 72 -#define CHORBA_SMALL_THRESHOLD_32BIT 80 +#if OPTIMAL_CMP == 64 +# define CHORBA_SMALL_THRESHOLD 72 +#else +# define CHORBA_SMALL_THRESHOLD 80 +#endif typedef struct crc32_fold_s { uint8_t fold[CRC32_FOLD_BUFFER_SIZE]; |
