summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorAdam Stylinski <kungfujesus06@gmail.com>2025-02-16 12:13:00 -0500
committerHans Kristian Rosbach <hk-github@circlestorm.org>2025-03-28 20:43:59 +0100
commit724dc0cfb4805dfd57983080ec4d2b3c53262e87 (patch)
treeb9bd4347f3059cb5976ec37c9ad25535bd56b9e8 /test
parent2bba7e8468e808b7a7d5c1045d339eb5ffd12591 (diff)
downloadProject-Tick-724dc0cfb4805dfd57983080ec4d2b3c53262e87.tar.gz
Project-Tick-724dc0cfb4805dfd57983080ec4d2b3c53262e87.zip
Explicit SSE2 vectorization of Chorba CRC method
The version that's currently in the generic implementation for 32768 byte buffers leverages the stack. It manages to autovectorize but unfortunately the trips to the stack hurt its performance for CPUs which need this the most. This version is explicitly SIMD vectorized and doesn't use trips to the stack. In my testing it's ~10% faster than the "small" variant, and about 42% faster than the "32768" variant.
Diffstat (limited to 'test')
-rw-r--r--test/benchmarks/benchmark_crc32.cc6
-rw-r--r--test/test_crc32.cc3
2 files changed, 9 insertions, 0 deletions
diff --git a/test/benchmarks/benchmark_crc32.cc b/test/benchmarks/benchmark_crc32.cc
index 5c5751afc7..e51cff7bba 100644
--- a/test/benchmarks/benchmark_crc32.cc
+++ b/test/benchmarks/benchmark_crc32.cc
@@ -68,6 +68,12 @@ BENCHMARK_CRC32(braid, crc32_braid, 1);
BENCHMARK_CRC32(native, native_crc32, 1);
#else
+#ifndef WITHOUT_CHORBA
+# if defined(X86_SSE2) && !defined(NO_CHORBA_SSE2)
+ BENCHMARK_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2);
+# endif
+#endif
+
#ifdef ARM_CRC32
BENCHMARK_CRC32(armv8, crc32_armv8, test_cpu_features.arm.has_crc32);
#endif
diff --git a/test/test_crc32.cc b/test/test_crc32.cc
index ee301ef602..f6aac12a97 100644
--- a/test/test_crc32.cc
+++ b/test/test_crc32.cc
@@ -281,5 +281,8 @@ TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
#ifdef X86_VPCLMULQDQ_CRC
TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512_common && test_cpu_features.x86.has_vpclmulqdq))
#endif
+#if !defined(WITHOUT_CHORBA) && defined(X86_SSE2) && !defined(NO_CHORBA_SSE2)
+TEST_CRC32(chorba_sse2, crc32_chorba_sse2, test_cpu_features.x86.has_sse2)
+#endif
#endif