diff options
| author | Adam Stylinski <kungfujesus06@gmail.com> | 2025-02-16 12:13:00 -0500 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2025-03-28 20:43:59 +0100 |
| commit | 724dc0cfb4805dfd57983080ec4d2b3c53262e87 (patch) | |
| tree | b9bd4347f3059cb5976ec37c9ad25535bd56b9e8 /test | |
| parent | 2bba7e8468e808b7a7d5c1045d339eb5ffd12591 (diff) | |
| download | Project-Tick-724dc0cfb4805dfd57983080ec4d2b3c53262e87.tar.gz Project-Tick-724dc0cfb4805dfd57983080ec4d2b3c53262e87.zip | |
Explicit SSE2 vectorization of Chorba CRC method
The version that's currently in the generic implementation for 32768
byte buffers leverages the stack. It manages to autovectorize but
unfortunately the trips to the stack hurt its performance for CPUs which
need this the most. This version is explicitly SIMD vectorized and
doesn't use trips to the stack. In my testing it's ~10% faster than the
"small" variant, and about 42% faster than the "32768" variant.
Diffstat (limited to 'test')
| -rw-r--r-- | test/benchmarks/benchmark_crc32.cc | 6 | ||||
| -rw-r--r-- | test/test_crc32.cc | 3 |
2 files changed, 9 insertions, 0 deletions
diff --git a/test/benchmarks/benchmark_crc32.cc b/test/benchmarks/benchmark_crc32.cc index 5c5751afc7..e51cff7bba 100644 --- a/test/benchmarks/benchmark_crc32.cc +++ b/test/benchmarks/benchmark_crc32.cc @@ -68,6 +68,12 @@ BENCHMARK_CRC32(braid, crc32_braid, 1); BENCHMARK_CRC32(native, native_crc32, 1); #else +#ifndef WITHOUT_CHORBA +# if defined(X86_SSE2) && !defined(NO_CHORBA_SSE2) + BENCHMARK_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2); +# endif +#endif + #ifdef ARM_CRC32 BENCHMARK_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32); #endif diff --git a/test/test_crc32.cc b/test/test_crc32.cc index ee301ef602..f6aac12a97 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -281,5 +281,8 @@ TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq) #ifdef X86_VPCLMULQDQ_CRC TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq)) #endif +#if !defined(WITHOUT_CHORBA) && defined(X86_SSE2) && !defined(NO_CHORBA_SSE2) +TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2) +#endif #endif |
