diff options
| author | Nathan Moinvaziri <nathan@nathanm.com> | 2025-12-26 08:56:41 -0800 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2025-12-28 14:07:36 +0100 |
| commit | 1a32c9977f6d220e178d9bf0856d713ceb0837dc (patch) | |
| tree | ae4a4fc919592df0a86772f0214b16daf2cca781 | |
| parent | a8a12f465758accea6a9dd37d6979c014a42a7c6 (diff) | |
| download | Project-Tick-1a32c9977f6d220e178d9bf0856d713ceb0837dc.tar.gz Project-Tick-1a32c9977f6d220e178d9bf0856d713ceb0837dc.zip | |
Add missing adler32_copy_vmx implementation
| -rw-r--r-- | arch/power/adler32_vmx.c | 13 | ||||
| -rw-r--r-- | arch/power/power_functions.h | 3 | ||||
| -rw-r--r-- | functable.c | 1 | ||||
| -rw-r--r-- | test/benchmarks/benchmark_adler32_copy.cc | 3 |
4 files changed, 17 insertions, 3 deletions
diff --git a/arch/power/adler32_vmx.c b/arch/power/adler32_vmx.c index 9ab53e1fa0..004d3fce68 100644 --- a/arch/power/adler32_vmx.c +++ b/arch/power/adler32_vmx.c @@ -118,7 +118,7 @@ static void vmx_accum32(uint32_t *s, const uint8_t *buf, size_t len) { vec_ste(s2acc, 0, s+1); } -Z_INTERNAL uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len) { +static inline uint32_t adler32_impl(uint32_t adler, const uint8_t *buf, size_t len) { uint32_t sum2; uint32_t pair[16] ALIGNED_(16); memset(&pair[2], 0, 14); @@ -183,4 +183,15 @@ Z_INTERNAL uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len) /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */ return (pair[1] << 16) | pair[0]; } + +Z_INTERNAL uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len) { + return adler32_impl(adler, buf, len); +} + +/* VMX stores can have higher latency than optimized memcpy */ +Z_INTERNAL uint32_t adler32_copy_vmx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) { + adler = adler32_impl(adler, src, len); + memcpy(dst, src, len); + return adler; +} #endif diff --git a/arch/power/power_functions.h b/arch/power/power_functions.h index fe3a64821a..7697073afb 100644 --- a/arch/power/power_functions.h +++ b/arch/power/power_functions.h @@ -9,6 +9,7 @@ #ifdef PPC_VMX uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_copy_vmx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); void slide_hash_vmx(deflate_state *s); #endif @@ -33,6 +34,8 @@ uint32_t longest_match_slow_power9(deflate_state *const s, uint32_t cur_match); # if defined(PPC_VMX) && defined(__ALTIVEC__) # undef native_adler32 # define native_adler32 adler32_vmx +# undef native_adler32_copy +# define native_adler32_copy adler32_copy_vmx # undef native_slide_hash # define native_slide_hash slide_hash_vmx # endif diff --git a/functable.c b/functable.c index e33d8ef3aa..1085751916 100644 --- a/functable.c +++ b/functable.c @@ -249,6 +249,7 @@ static int init_functable(void) { #ifdef PPC_VMX if (cf.power.has_altivec) { ft.adler32 = &adler32_vmx; + ft.adler32_copy = &adler32_copy_vmx; ft.slide_hash = &slide_hash_vmx; } #endif diff --git a/test/benchmarks/benchmark_adler32_copy.cc b/test/benchmarks/benchmark_adler32_copy.cc index 58c3b4a01e..05b1f0fac4 100644 --- a/test/benchmarks/benchmark_adler32_copy.cc +++ b/test/benchmarks/benchmark_adler32_copy.cc @@ -97,8 +97,7 @@ BENCHMARK_ADLER32_BASELINE_COPY(neon_copy_baseline, adler32_neon, test_cpu_featu #endif #ifdef PPC_VMX -//BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_copy_vmx, test_cpu_features.power.has_altivec); -BENCHMARK_ADLER32_BASELINE_COPY(vmx_copy_baseline, adler32_vmx, test_cpu_features.power.has_altivec); +BENCHMARK_ADLER32_COPY(vmx, adler32_copy_vmx, test_cpu_features.power.has_altivec); #endif #ifdef POWER8_VSX //BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_copy_power8, test_cpu_features.power.has_arch_2_07); |
