summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorHans Kristian Rosbach <hk-git@circlestorm.org>2025-04-11 00:46:06 +0200
committerHans Kristian Rosbach <hk-github@circlestorm.org>2025-04-14 23:28:38 +0200
commit00a3168d5dd2e93ae65f83b91228f00f0bf507be (patch)
tree91756787eaf42d219dc504255188bb0e0a341563 /test
parentcfd90c7e1ace237b271ad000826051a4571af170 (diff)
downloadProject-Tick-00a3168d5dd2e93ae65f83b91228f00f0bf507be.tar.gz
Project-Tick-00a3168d5dd2e93ae65f83b91228f00f0bf507be.zip
Add AVX512 version of compare256
Improve the speed of sub-16 byte matches by first using a 128-bit intrinsic, after that use only 512-bit intrinsics. This requires us to overlap on the last run, but this is cheaper than processing the tail using a 256-bit and then a 128-bit run. Change benchmark steps to avoid it hitting chunk boundaries of one or the other function as much, this gives more fair benchmarks.
Diffstat (limited to 'test')
-rw-r--r--test/benchmarks/benchmark_compare256.cc5
-rw-r--r--test/test_compare256.cc3
2 files changed, 7 insertions, 1 deletions
diff --git a/test/benchmarks/benchmark_compare256.cc b/test/benchmarks/benchmark_compare256.cc
index c27bff1360..8ed2d0eb3d 100644
--- a/test/benchmarks/benchmark_compare256.cc
+++ b/test/benchmarks/benchmark_compare256.cc
@@ -59,7 +59,7 @@ public:
} \
Bench(state, fptr); \
} \
- BENCHMARK_REGISTER_F(compare256, name)->Range(1, MAX_COMPARE_SIZE);
+ BENCHMARK_REGISTER_F(compare256, name)->Arg(1)->Arg(10)->Arg(40)->Arg(80)->Arg(100)->Arg(175)->Arg(256);
#ifdef DISABLE_RUNTIME_CPU_DETECTION
BENCHMARK_COMPARE256(native, native_compare256, 1);
@@ -80,6 +80,9 @@ BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
BENCHMARK_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2);
#endif
+#if defined(X86_AVX512) && defined(HAVE_BUILTIN_CTZLL)
+BENCHMARK_COMPARE256(avx512, compare256_avx512, test_cpu_features.x86.has_avx512_common);
+#endif
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
BENCHMARK_COMPARE256(neon, compare256_neon, test_cpu_features.arm.has_neon);
#endif
diff --git a/test/test_compare256.cc b/test/test_compare256.cc
index 035e63c966..f367cd0f4e 100644
--- a/test/test_compare256.cc
+++ b/test/test_compare256.cc
@@ -79,6 +79,9 @@ TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2)
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
TEST_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2)
#endif
+#if defined(X86_AVX512) && defined(HAVE_BUILTIN_CTZLL)
+TEST_COMPARE256(avx512, compare256_avx512, test_cpu_features.x86.has_avx512_common)
+#endif
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
TEST_COMPARE256(neon, compare256_neon, test_cpu_features.arm.has_neon)
#endif