diff options
| author | Hans Kristian Rosbach <hk-git@circlestorm.org> | 2025-04-11 00:46:06 +0200 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2025-04-14 23:28:38 +0200 |
| commit | 00a3168d5dd2e93ae65f83b91228f00f0bf507be (patch) | |
| tree | 91756787eaf42d219dc504255188bb0e0a341563 /test | |
| parent | cfd90c7e1ace237b271ad000826051a4571af170 (diff) | |
| download | Project-Tick-00a3168d5dd2e93ae65f83b91228f00f0bf507be.tar.gz Project-Tick-00a3168d5dd2e93ae65f83b91228f00f0bf507be.zip | |
Add AVX512 version of compare256
Improve the speed of sub-16 byte matches by first using a
128-bit intrinsic, after that use only 512-bit intrinsics.
This requires us to overlap on the last run, but this is cheaper than
processing the tail using a 256-bit and then a 128-bit run.
Change benchmark steps to avoid it hitting chunk boundaries
of one or the other function as much, this gives more fair benchmarks.
Diffstat (limited to 'test')
| -rw-r--r-- | test/benchmarks/benchmark_compare256.cc | 5 | ||||
| -rw-r--r-- | test/test_compare256.cc | 3 |
2 files changed, 7 insertions, 1 deletions
diff --git a/test/benchmarks/benchmark_compare256.cc b/test/benchmarks/benchmark_compare256.cc index c27bff1360..8ed2d0eb3d 100644 --- a/test/benchmarks/benchmark_compare256.cc +++ b/test/benchmarks/benchmark_compare256.cc @@ -59,7 +59,7 @@ public: } \ Bench(state, fptr); \ } \ - BENCHMARK_REGISTER_F(compare256, name)->Range(1, MAX_COMPARE_SIZE); + BENCHMARK_REGISTER_F(compare256, name)->Arg(1)->Arg(10)->Arg(40)->Arg(80)->Arg(100)->Arg(175)->Arg(256); #ifdef DISABLE_RUNTIME_CPU_DETECTION BENCHMARK_COMPARE256(native, native_compare256, 1); @@ -80,6 +80,9 @@ BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2); #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) BENCHMARK_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2); #endif +#if defined(X86_AVX512) && defined(HAVE_BUILTIN_CTZLL) +BENCHMARK_COMPARE256(avx512, compare256_avx512, test_cpu_features.x86.has_avx512_common); +#endif #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) BENCHMARK_COMPARE256(neon, compare256_neon, test_cpu_features.arm.has_neon); #endif diff --git a/test/test_compare256.cc b/test/test_compare256.cc index 035e63c966..f367cd0f4e 100644 --- a/test/test_compare256.cc +++ b/test/test_compare256.cc @@ -79,6 +79,9 @@ TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2) #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) TEST_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2) #endif +#if defined(X86_AVX512) && defined(HAVE_BUILTIN_CTZLL) +TEST_COMPARE256(avx512, compare256_avx512, test_cpu_features.x86.has_avx512_common) +#endif #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) TEST_COMPARE256(neon, compare256_neon, test_cpu_features.arm.has_neon) #endif |
