summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@nathanm.com>2025-12-02 15:25:56 -0800
committerGitHub <noreply@github.com>2025-12-03 00:25:56 +0100
commitceef7bfce81e62306b4ad14f1f175413397735c6 (patch)
tree9dc2591682bbc84c77d031edbf5625accaf0e121
parent368a926fdb0fee977719ff927178b8eff0de4a62 (diff)
downloadProject-Tick-ceef7bfce81e62306b4ad14f1f175413397735c6.tar.gz
Project-Tick-ceef7bfce81e62306b4ad14f1f175413397735c6.zip
Rename adler32_fold_copy to adler32_copy (#2026)
There are no folding techniques in adler32 implementations. It is simply hashing while copying. - Rename adler32_fold_copy to adler32_copy. - Remove unnecessary adler32_fold.c file. - Reorder adler32_copy functions last in source file for consistency. - Rename adler32_rvv_impl to adler32_copy_impl for consistency. - Replace dst != NULL with 1 in adler32_copy_neon to remove branching.
-rw-r--r--CMakeLists.txt2
-rw-r--r--Makefile.in2
-rw-r--r--arch/arm/adler32_neon.c8
-rw-r--r--arch/arm/arm_functions.h6
-rw-r--r--arch/generic/Makefile.in7
-rw-r--r--arch/generic/adler32_c.c7
-rw-r--r--arch/generic/adler32_fold_c.c15
-rw-r--r--arch/generic/generic_functions.h4
-rw-r--r--arch/loongarch/adler32_lasx.c12
-rw-r--r--arch/loongarch/adler32_lsx.c8
-rw-r--r--arch/loongarch/loongarch_functions.h12
-rw-r--r--arch/riscv/adler32_rvv.c10
-rw-r--r--arch/riscv/riscv_functions.h6
-rw-r--r--arch/x86/adler32_avx2.c14
-rw-r--r--arch/x86/adler32_avx512.c11
-rw-r--r--arch/x86/adler32_avx512_vnni.c2
-rw-r--r--arch/x86/adler32_sse42.c2
-rw-r--r--arch/x86/x86_functions.h24
-rw-r--r--deflate_p.h2
-rw-r--r--functable.c28
-rw-r--r--functable.h2
-rw-r--r--inflate.c2
-rw-r--r--test/benchmarks/benchmark_adler32_copy.cc18
23 files changed, 91 insertions, 113 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f4d184bb5d..f59bd9e68d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1366,7 +1366,6 @@ set(ZLIB_SRCS
set(ZLIB_ALL_FALLBACK_SRCS
arch/generic/adler32_c.c
- arch/generic/adler32_fold_c.c
arch/generic/chunkset_c.c
arch/generic/compare256_c.c
arch/generic/crc32_braid_c.c
@@ -1381,7 +1380,6 @@ elseif(${ARCH} STREQUAL "x86_64" AND WITH_SSE2)
# x86_64 always has SSE2, so let the SSE2 functions act as fallbacks.
list(APPEND ZLIB_GENERIC_SRCS
arch/generic/adler32_c.c
- arch/generic/adler32_fold_c.c
arch/generic/crc32_braid_c.c
arch/generic/crc32_fold_c.c
)
diff --git a/Makefile.in b/Makefile.in
index 1ef4f85d75..1e248a8935 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -76,7 +76,6 @@ pkgconfigdir = ${libdir}/pkgconfig
OBJZ = \
arch/generic/adler32_c.o \
- arch/generic/adler32_fold_c.o \
arch/generic/chunkset_c.o \
arch/generic/compare256_c.o \
arch/generic/crc32_braid_c.o \
@@ -117,7 +116,6 @@ OBJC = $(OBJZ) $(OBJG)
PIC_OBJZ = \
arch/generic/adler32_c.lo \
- arch/generic/adler32_fold_c.lo \
arch/generic/chunkset_c.lo \
arch/generic/compare256_c.lo \
arch/generic/crc32_braid_c.lo \
diff --git a/arch/arm/adler32_neon.c b/arch/arm/adler32_neon.c
index 33c84228a7..53cf48253f 100644
--- a/arch/arm/adler32_neon.c
+++ b/arch/arm/adler32_neon.c
@@ -265,7 +265,7 @@ static void NEON_handle_tail(uint32_t *pair, const uint8_t *buf, size_t len) {
}
}
-static Z_FORCEINLINE uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
+static Z_FORCEINLINE uint32_t adler32_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
/* split Adler-32 into component sums */
uint32_t sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
@@ -376,11 +376,11 @@ static Z_FORCEINLINE uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *ds
}
Z_INTERNAL uint32_t adler32_neon(uint32_t adler, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, NULL, src, len, 0);
+ return adler32_copy_impl(adler, NULL, src, len, 0);
}
-Z_INTERNAL uint32_t adler32_fold_copy_neon(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, dst, src, len, dst != NULL);
+Z_INTERNAL uint32_t adler32_copy_neon(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+ return adler32_copy_impl(adler, dst, src, len, 1);
}
#endif
diff --git a/arch/arm/arm_functions.h b/arch/arm/arm_functions.h
index f313655e79..2175c94d59 100644
--- a/arch/arm/arm_functions.h
+++ b/arch/arm/arm_functions.h
@@ -7,7 +7,7 @@
#ifdef ARM_NEON
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_neon(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t adler32_copy_neon(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_neon(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
# ifdef HAVE_BUILTIN_CTZLL
@@ -40,8 +40,8 @@ void slide_hash_armv6(deflate_state *s);
# if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON
# undef native_adler32
# define native_adler32 adler32_neon
-# undef native_adler32_fold_copy
-# define native_adler32_fold_copy adler32_fold_copy_neon
+# undef native_adler32_copy
+# define native_adler32_copy adler32_copy_neon
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_neon
# undef native_inflate_fast
diff --git a/arch/generic/Makefile.in b/arch/generic/Makefile.in
index ba20e9e5fb..46e59894d9 100644
--- a/arch/generic/Makefile.in
+++ b/arch/generic/Makefile.in
@@ -14,7 +14,6 @@ TOPDIR=$(SRCTOP)
all: \
adler32_c.o adler32_c.lo \
- adler32_fold_c.o adler32_fold_c.lo \
chunkset_c.o chunkset_c.lo \
compare256_c.o compare256_c.lo \
crc32_braid_c.o crc32_braid_c.lo \
@@ -29,12 +28,6 @@ adler32_c.o: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
adler32_c.lo: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c
-adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
-
-adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
- $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
-
chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
diff --git a/arch/generic/adler32_c.c b/arch/generic/adler32_c.c
index da32c95a3d..99aeb6767d 100644
--- a/arch/generic/adler32_c.c
+++ b/arch/generic/adler32_c.c
@@ -7,7 +7,6 @@
#include "functable.h"
#include "adler32_p.h"
-/* ========================================================================= */
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
uint32_t sum2;
unsigned n;
@@ -52,3 +51,9 @@ Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
/* do remaining bytes (less than NMAX, still just one modulo) */
return adler32_len_64(adler, buf, len, sum2);
}
+
+Z_INTERNAL uint32_t adler32_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+ adler = FUNCTABLE_CALL(adler32)(adler, src, len);
+ memcpy(dst, src, len);
+ return adler;
+}
diff --git a/arch/generic/adler32_fold_c.c b/arch/generic/adler32_fold_c.c
deleted file mode 100644
index 397dd10400..0000000000
--- a/arch/generic/adler32_fold_c.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/* adler32_fold.c -- adler32 folding interface
- * Copyright (C) 2022 Adam Stylinski
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#include "zbuild.h"
-#include "functable.h"
-
-#include <limits.h>
-
-Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
- adler = FUNCTABLE_CALL(adler32)(adler, src, len);
- memcpy(dst, src, len);
- return adler;
-}
diff --git a/arch/generic/generic_functions.h b/arch/generic/generic_functions.h
index 6e18e34045..a04aca3825 100644
--- a/arch/generic/generic_functions.h
+++ b/arch/generic/generic_functions.h
@@ -19,7 +19,7 @@ typedef void (*slide_hash_func)(deflate_state *s);
uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t adler32_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_c(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
@@ -51,7 +51,7 @@ void slide_hash_c(deflate_state *s);
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// Generic code
# define native_adler32 adler32_c
-# define native_adler32_fold_copy adler32_fold_copy_c
+# define native_adler32_copy adler32_copy_c
# define native_chunkmemset_safe chunkmemset_safe_c
#ifndef WITHOUT_CHORBA
# define native_crc32 crc32_chorba
diff --git a/arch/loongarch/adler32_lasx.c b/arch/loongarch/adler32_lasx.c
index 2cef16c0cf..ab416f9b27 100644
--- a/arch/loongarch/adler32_lasx.c
+++ b/arch/loongarch/adler32_lasx.c
@@ -31,10 +31,10 @@ static inline uint32_t partial_hsum256(__m256i x) {
return (uint32_t)__lasx_xvpickve2gr_wu(sum2, 0);
}
-extern uint32_t adler32_fold_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+extern uint32_t adler32_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
extern uint32_t adler32_lsx(uint32_t adler, const uint8_t *src, size_t len);
-static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
+static inline uint32_t adler32_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
if (src == NULL) return 1L;
if (len == 0) return adler;
@@ -51,7 +51,7 @@ rem_peel:
}
} else if (len < 32) {
if (COPY) {
- return adler32_fold_copy_lsx(adler, dst, src, len);
+ return adler32_copy_lsx(adler, dst, src, len);
} else {
return adler32_lsx(adler, src, len);
}
@@ -117,11 +117,11 @@ rem_peel:
}
Z_INTERNAL uint32_t adler32_lasx(uint32_t adler, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, NULL, src, len, 0);
+ return adler32_copy_impl(adler, NULL, src, len, 0);
}
-Z_INTERNAL uint32_t adler32_fold_copy_lasx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, dst, src, len, 1);
+Z_INTERNAL uint32_t adler32_copy_lasx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+ return adler32_copy_impl(adler, dst, src, len, 1);
}
#endif
diff --git a/arch/loongarch/adler32_lsx.c b/arch/loongarch/adler32_lsx.c
index 7f43262ec0..8c997f3ac0 100644
--- a/arch/loongarch/adler32_lsx.c
+++ b/arch/loongarch/adler32_lsx.c
@@ -29,7 +29,7 @@ static inline uint32_t hsum(__m128i x) {
return __lsx_vpickve2gr_w(sum4, 0);
}
-static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
+static inline uint32_t adler32_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
if (src == NULL) return 1L;
if (len == 0) return adler;
@@ -146,11 +146,11 @@ rem_peel:
}
Z_INTERNAL uint32_t adler32_lsx(uint32_t adler, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, NULL, src, len, 0);
+ return adler32_copy_impl(adler, NULL, src, len, 0);
}
-Z_INTERNAL uint32_t adler32_fold_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, dst, src, len, 1);
+Z_INTERNAL uint32_t adler32_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+ return adler32_copy_impl(adler, dst, src, len, 1);
}
#endif
diff --git a/arch/loongarch/loongarch_functions.h b/arch/loongarch/loongarch_functions.h
index 798c1484c2..54950629cb 100644
--- a/arch/loongarch/loongarch_functions.h
+++ b/arch/loongarch/loongarch_functions.h
@@ -16,7 +16,7 @@ void crc32_fold_loongarch64(crc32_fold *crc, const uint8_t *src, size_t len,
#ifdef LOONGARCH_LSX
uint32_t adler32_lsx(uint32_t adler, const uint8_t *src, size_t len);
-uint32_t adler32_fold_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t adler32_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
void slide_hash_lsx(deflate_state *s);
# ifdef HAVE_BUILTIN_CTZ
uint32_t compare256_lsx(const uint8_t *src0, const uint8_t *src1);
@@ -29,7 +29,7 @@ void inflate_fast_lsx(PREFIX3(stream) *strm, uint32_t start);
#ifdef LOONGARCH_LASX
uint32_t adler32_lasx(uint32_t adler, const uint8_t *src, size_t len);
-uint32_t adler32_fold_copy_lasx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t adler32_copy_lasx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
void slide_hash_lasx(deflate_state *s);
# ifdef HAVE_BUILTIN_CTZ
uint32_t compare256_lasx(const uint8_t *src0, const uint8_t *src1);
@@ -53,8 +53,8 @@ void inflate_fast_lasx(PREFIX3(stream) *strm, uint32_t start);
# if defined(LOONGARCH_LSX) && defined(__loongarch_sx)
# undef native_adler32
# define native_adler32 adler32_lsx
-# undef native_adler32_fold_copy
-# define native_adler32_fold_copy adler32_fold_copy_lsx
+# undef native_adler32_copy
+# define native_adler32_copy adler32_copy_lsx
# undef native_slide_hash
# define native_slide_hash slide_hash_lsx
# undef native_chunkmemset_safe
@@ -73,8 +73,8 @@ void inflate_fast_lasx(PREFIX3(stream) *strm, uint32_t start);
# if defined(LOONGARCH_LASX) && defined(__loongarch_asx)
# undef native_adler32
# define native_adler32 adler32_lasx
-# undef native_adler32_fold_copy
-# define native_adler32_fold_copy adler32_fold_copy_lasx
+# undef native_adler32_copy
+# define native_adler32_copy adler32_copy_lasx
# undef native_slide_hash
# define native_slide_hash slide_hash_lasx
# undef native_chunkmemset_safe
diff --git a/arch/riscv/adler32_rvv.c b/arch/riscv/adler32_rvv.c
index d822d75af6..586f99a22c 100644
--- a/arch/riscv/adler32_rvv.c
+++ b/arch/riscv/adler32_rvv.c
@@ -12,7 +12,7 @@
#include "zbuild.h"
#include "adler32_p.h"
-static inline uint32_t adler32_rvv_impl(uint32_t adler, uint8_t* restrict dst, const uint8_t *src, size_t len, int COPY) {
+static inline uint32_t adler32_copy_impl(uint32_t adler, uint8_t* restrict dst, const uint8_t *src, size_t len, int COPY) {
/* split Adler-32 into component sums */
uint32_t sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
@@ -125,12 +125,12 @@ static inline uint32_t adler32_rvv_impl(uint32_t adler, uint8_t* restrict dst, c
return adler | (sum2 << 16);
}
-Z_INTERNAL uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
- return adler32_rvv_impl(adler, dst, src, len, 1);
+Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len) {
+ return adler32_copy_impl(adler, NULL, buf, len, 0);
}
-Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len) {
- return adler32_rvv_impl(adler, NULL, buf, len, 0);
+Z_INTERNAL uint32_t adler32_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+ return adler32_copy_impl(adler, dst, src, len, 1);
}
#endif // RISCV_RVV
diff --git a/arch/riscv/riscv_functions.h b/arch/riscv/riscv_functions.h
index d68dded92c..7334eb64fd 100644
--- a/arch/riscv/riscv_functions.h
+++ b/arch/riscv/riscv_functions.h
@@ -11,7 +11,7 @@
#ifdef RISCV_RVV
uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t adler32_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_rvv(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1);
@@ -30,8 +30,8 @@ uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len);
# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__)
# undef native_adler32
# define native_adler32 adler32_rvv
-# undef native_adler32_fold_copy
-# define native_adler32_fold_copy adler32_fold_copy_rvv
+# undef native_adler32_copy
+# define native_adler32_copy adler32_copy_rvv
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_rvv
# undef native_compare256
diff --git a/arch/x86/adler32_avx2.c b/arch/x86/adler32_avx2.c
index df502fd383..90c0605581 100644
--- a/arch/x86/adler32_avx2.c
+++ b/arch/x86/adler32_avx2.c
@@ -15,10 +15,10 @@
#include "adler32_avx2_p.h"
#include "x86_intrins.h"
-extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+extern uint32_t adler32_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len);
-static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
+static inline uint32_t adler32_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
if (src == NULL) return 1L;
if (len == 0) return adler;
@@ -35,7 +35,7 @@ rem_peel:
}
} else if (len < 32) {
if (COPY) {
- return adler32_fold_copy_sse42(adler, dst, src, len);
+ return adler32_copy_sse42(adler, dst, src, len);
} else {
return adler32_ssse3(adler, src, len);
}
@@ -108,7 +108,7 @@ rem_peel:
_mm256_storeu_si256((__m256i*)dst, vbuf);
dst += 32;
}
-
+
vs1 = _mm256_add_epi32(vs1, vs1_sad);
vs3 = _mm256_add_epi32(vs3, vs1_0);
__m256i v_short_sum2 = _mm256_maddubs_epi16(vbuf, dot2v_0); // sum 32 uint8s to 16 shorts
@@ -170,11 +170,11 @@ rem_peel:
}
Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, NULL, src, len, 0);
+ return adler32_copy_impl(adler, NULL, src, len, 0);
}
-Z_INTERNAL uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, dst, src, len, 1);
+Z_INTERNAL uint32_t adler32_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+ return adler32_copy_impl(adler, dst, src, len, 1);
}
#endif
diff --git a/arch/x86/adler32_avx512.c b/arch/x86/adler32_avx512.c
index 626c4807f8..88d3a80b6e 100644
--- a/arch/x86/adler32_avx512.c
+++ b/arch/x86/adler32_avx512.c
@@ -15,7 +15,7 @@
#include "x86_intrins.h"
#include "adler32_avx512_p.h"
-static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
+static inline uint32_t adler32_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
if (src == NULL) return 1L;
if (len == 0) return adler;
@@ -96,13 +96,12 @@ rem_peel:
return adler;
}
-Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, dst, src, len, 1);
+Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, size_t len) {
+ return adler32_copy_impl(adler, NULL, src, len, 0);
}
-Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, size_t len) {
- return adler32_fold_copy_impl(adler, NULL, src, len, 0);
+Z_INTERNAL uint32_t adler32_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+ return adler32_copy_impl(adler, dst, src, len, 1);
}
#endif
-
diff --git a/arch/x86/adler32_avx512_vnni.c b/arch/x86/adler32_avx512_vnni.c
index 4c5cfc1cad..2ab73bc3ca 100644
--- a/arch/x86/adler32_avx512_vnni.c
+++ b/arch/x86/adler32_avx512_vnni.c
@@ -109,7 +109,7 @@ rem_peel:
return adler;
}
-Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
if (src == NULL) return 1L;
if (len == 0) return adler;
diff --git a/arch/x86/adler32_sse42.c b/arch/x86/adler32_sse42.c
index df0739d165..86ff9fe49d 100644
--- a/arch/x86/adler32_sse42.c
+++ b/arch/x86/adler32_sse42.c
@@ -13,7 +13,7 @@
#ifdef X86_SSE42
-Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+Z_INTERNAL uint32_t adler32_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
uint32_t adler0, adler1;
adler1 = (adler >> 16) & 0xffff;
adler0 = adler & 0xffff;
diff --git a/arch/x86/x86_functions.h b/arch/x86/x86_functions.h
index 5d9065e1b3..8cace60f2f 100644
--- a/arch/x86/x86_functions.h
+++ b/arch/x86/x86_functions.h
@@ -41,12 +41,12 @@ void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
#endif
#ifdef X86_SSE42
-uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t adler32_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif
#ifdef X86_AVX2
uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t adler32_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
# ifdef HAVE_BUILTIN_CTZ
@@ -59,7 +59,7 @@ uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, unsigned len, unsign
#endif
#ifdef X86_AVX512
uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t adler32_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* chunkmemset_safe_avx512(uint8_t *out, uint8_t *from, unsigned len, unsigned left);
void inflate_fast_avx512(PREFIX3(stream)* strm, uint32_t start);
# ifdef HAVE_BUILTIN_CTZLL
@@ -70,7 +70,7 @@ void inflate_fast_avx512(PREFIX3(stream)* strm, uint32_t start);
#endif
#ifdef X86_AVX512VNNI
uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
-uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint32_t adler32_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif
#ifdef X86_PCLMULQDQ_CRC
@@ -126,8 +126,8 @@ uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
# endif
// X86 - SSE4.2
# if defined(X86_SSE42) && defined(__SSE4_2__)
-# undef native_adler32_fold_copy
-# define native_adler32_fold_copy adler32_fold_copy_sse42
+# undef native_adler32_copy
+# define native_adler32_copy adler32_copy_sse42
# endif
// X86 - PCLMUL
# if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__)
@@ -146,8 +146,8 @@ uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
# if defined(X86_AVX2) && defined(__AVX2__)
# undef native_adler32
# define native_adler32 adler32_avx2
-# undef native_adler32_fold_copy
-# define native_adler32_fold_copy adler32_fold_copy_avx2
+# undef native_adler32_copy
+# define native_adler32_copy adler32_copy_avx2
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_avx2
# undef native_inflate_fast
@@ -167,8 +167,8 @@ uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
# if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
# undef native_adler32
# define native_adler32 adler32_avx512
-# undef native_adler32_fold_copy
-# define native_adler32_fold_copy adler32_fold_copy_avx512
+# undef native_adler32_copy
+# define native_adler32_copy adler32_copy_avx512
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_avx512
# undef native_inflate_fast
@@ -185,8 +185,8 @@ uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
# if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__)
# undef native_adler32
# define native_adler32 adler32_avx512_vnni
-# undef native_adler32_fold_copy
-# define native_adler32_fold_copy adler32_fold_copy_avx512_vnni
+# undef native_adler32_copy
+# define native_adler32_copy adler32_copy_avx512_vnni
# endif
// X86 - VPCLMULQDQ
# if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__)
diff --git a/deflate_p.h b/deflate_p.h
index e803300e9a..ecaad5c554 100644
--- a/deflate_p.h
+++ b/deflate_p.h
@@ -156,7 +156,7 @@ Z_FORCEINLINE static unsigned read_buf(PREFIX3(stream) *strm, unsigned char *buf
FUNCTABLE_CALL(crc32_fold_copy)(&s->crc_fold, buf, strm->next_in, len);
#endif
} else if (s->wrap == 1) {
- strm->adler = FUNCTABLE_CALL(adler32_fold_copy)(strm->adler, buf, strm->next_in, len);
+ strm->adler = FUNCTABLE_CALL(adler32_copy)(strm->adler, buf, strm->next_in, len);
} else {
memcpy(buf, strm->next_in, len);
}
diff --git a/functable.c b/functable.c
index 8924f7351f..fcfb2f36d0 100644
--- a/functable.c
+++ b/functable.c
@@ -79,7 +79,7 @@ static int init_functable(void) {
# if (defined(__x86_64__) || defined(_M_X64)) && defined(X86_SSE2)
// x86_64 always has SSE2, so we can use SSE2 functions as fallbacks where available.
ft.adler32 = &adler32_c;
- ft.adler32_fold_copy = &adler32_fold_copy_c;
+ ft.adler32_copy = &adler32_copy_c;
ft.crc32 = &crc32_braid;
ft.crc32_fold = &crc32_fold_c;
ft.crc32_fold_copy = &crc32_fold_copy_c;
@@ -93,7 +93,7 @@ static int init_functable(void) {
# endif
#else // WITH_ALL_FALLBACKS
ft.adler32 = &adler32_c;
- ft.adler32_fold_copy = &adler32_fold_copy_c;
+ ft.adler32_copy = &adler32_copy_c;
ft.chunkmemset_safe = &chunkmemset_safe_c;
ft.crc32 = &crc32_braid;
ft.crc32_fold = &crc32_fold_c;
@@ -153,7 +153,7 @@ static int init_functable(void) {
// X86 - SSE4.2
#ifdef X86_SSE42
if (cf.x86.has_sse42) {
- ft.adler32_fold_copy = &adler32_fold_copy_sse42;
+ ft.adler32_copy = &adler32_copy_sse42;
}
#endif
// X86 - PCLMUL
@@ -174,7 +174,7 @@ static int init_functable(void) {
* to remain intact. They also allow for a count operand that isn't the CL register, avoiding contention there */
if (cf.x86.has_avx2 && cf.x86.has_bmi2) {
ft.adler32 = &adler32_avx2;
- ft.adler32_fold_copy = &adler32_fold_copy_avx2;
+ ft.adler32_copy = &adler32_copy_avx2;
ft.chunkmemset_safe = &chunkmemset_safe_avx2;
ft.inflate_fast = &inflate_fast_avx2;
ft.slide_hash = &slide_hash_avx2;
@@ -189,7 +189,7 @@ static int init_functable(void) {
#ifdef X86_AVX512
if (cf.x86.has_avx512_common) {
ft.adler32 = &adler32_avx512;
- ft.adler32_fold_copy = &adler32_fold_copy_avx512;
+ ft.adler32_copy = &adler32_copy_avx512;
ft.chunkmemset_safe = &chunkmemset_safe_avx512;
ft.inflate_fast = &inflate_fast_avx512;
# ifdef HAVE_BUILTIN_CTZLL
@@ -202,7 +202,7 @@ static int init_functable(void) {
#ifdef X86_AVX512VNNI
if (cf.x86.has_avx512vnni) {
ft.adler32 = &adler32_avx512_vnni;
- ft.adler32_fold_copy = &adler32_fold_copy_avx512_vnni;
+ ft.adler32_copy = &adler32_copy_avx512_vnni;
}
#endif
// X86 - VPCLMULQDQ
@@ -233,7 +233,7 @@ static int init_functable(void) {
# endif
{
ft.adler32 = &adler32_neon;
- ft.adler32_fold_copy = &adler32_fold_copy_neon;
+ ft.adler32_copy = &adler32_copy_neon;
ft.chunkmemset_safe = &chunkmemset_safe_neon;
ft.inflate_fast = &inflate_fast_neon;
ft.slide_hash = &slide_hash_neon;
@@ -288,7 +288,7 @@ static int init_functable(void) {
#ifdef RISCV_RVV
if (cf.riscv.has_rvv) {
ft.adler32 = &adler32_rvv;
- ft.adler32_fold_copy = &adler32_fold_copy_rvv;
+ ft.adler32_copy = &adler32_copy_rvv;
ft.chunkmemset_safe = &chunkmemset_safe_rvv;
ft.compare256 = &compare256_rvv;
ft.inflate_fast = &inflate_fast_rvv;
@@ -322,7 +322,7 @@ static int init_functable(void) {
#ifdef LOONGARCH_LSX
if (cf.loongarch.has_lsx) {
ft.adler32 = &adler32_lsx;
- ft.adler32_fold_copy = &adler32_fold_copy_lsx;
+ ft.adler32_copy = &adler32_copy_lsx;
ft.slide_hash = slide_hash_lsx;
# ifdef HAVE_BUILTIN_CTZ
ft.compare256 = &compare256_lsx;
@@ -336,7 +336,7 @@ static int init_functable(void) {
#ifdef LOONGARCH_LASX
if (cf.loongarch.has_lasx) {
ft.adler32 = &adler32_lasx;
- ft.adler32_fold_copy = &adler32_fold_copy_lasx;
+ ft.adler32_copy = &adler32_copy_lasx;
ft.slide_hash = slide_hash_lasx;
# ifdef HAVE_BUILTIN_CTZ
ft.compare256 = &compare256_lasx;
@@ -353,7 +353,7 @@ static int init_functable(void) {
// Assign function pointers individually for atomic operation
FUNCTABLE_ASSIGN(ft, force_init);
FUNCTABLE_VERIFY_ASSIGN(ft, adler32);
- FUNCTABLE_VERIFY_ASSIGN(ft, adler32_fold_copy);
+ FUNCTABLE_VERIFY_ASSIGN(ft, adler32_copy);
FUNCTABLE_VERIFY_ASSIGN(ft, chunkmemset_safe);
FUNCTABLE_VERIFY_ASSIGN(ft, compare256);
FUNCTABLE_VERIFY_ASSIGN(ft, crc32);
@@ -382,9 +382,9 @@ static uint32_t adler32_stub(uint32_t adler, const uint8_t* buf, size_t len) {
return functable.adler32(adler, buf, len);
}
-static uint32_t adler32_fold_copy_stub(uint32_t adler, uint8_t* dst, const uint8_t* src, size_t len) {
+static uint32_t adler32_copy_stub(uint32_t adler, uint8_t* dst, const uint8_t* src, size_t len) {
FUNCTABLE_INIT_ABORT;
- return functable.adler32_fold_copy(adler, dst, src, len);
+ return functable.adler32_copy(adler, dst, src, len);
}
static uint8_t* chunkmemset_safe_stub(uint8_t* out, uint8_t *from, unsigned len, unsigned left) {
@@ -446,7 +446,7 @@ static void slide_hash_stub(deflate_state* s) {
Z_INTERNAL struct functable_s functable = {
force_init_stub,
adler32_stub,
- adler32_fold_copy_stub,
+ adler32_copy_stub,
chunkmemset_safe_stub,
compare256_stub,
crc32_stub,
diff --git a/functable.h b/functable.h
index 91308e5686..209db4a67d 100644
--- a/functable.h
+++ b/functable.h
@@ -26,7 +26,7 @@
struct functable_s {
int (* force_init) (void);
uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, size_t len);
- uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+ uint32_t (* adler32_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint8_t* (* chunkmemset_safe) (uint8_t *out, uint8_t *from, unsigned len, unsigned left);
uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1);
uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, size_t len);
diff --git a/inflate.c b/inflate.c
index 8baa725c0e..edba74cb9d 100644
--- a/inflate.c
+++ b/inflate.c
@@ -32,7 +32,7 @@ static inline void inf_chksum_cpy(PREFIX3(stream) *strm, uint8_t *dst,
} else
#endif
{
- strm->adler = state->check = FUNCTABLE_CALL(adler32_fold_copy)(state->check, dst, src, copy);
+ strm->adler = state->check = FUNCTABLE_CALL(adler32_copy)(state->check, dst, src, copy);
}
}
diff --git a/test/benchmarks/benchmark_adler32_copy.cc b/test/benchmarks/benchmark_adler32_copy.cc
index 2027904af5..2be1d39fd0 100644
--- a/test/benchmarks/benchmark_adler32_copy.cc
+++ b/test/benchmarks/benchmark_adler32_copy.cc
@@ -92,16 +92,16 @@ BENCHMARK_ADLER32_BASELINE_COPY(native, native_adler32, 1);
#ifdef ARM_NEON
/* If we inline this copy for neon, the function would go here */
-BENCHMARK_ADLER32_COPY(neon, adler32_fold_copy_neon, test_cpu_features.arm.has_neon);
+BENCHMARK_ADLER32_COPY(neon, adler32_copy_neon, test_cpu_features.arm.has_neon);
BENCHMARK_ADLER32_BASELINE_COPY(neon_copy_baseline, adler32_neon, test_cpu_features.arm.has_neon);
#endif
#ifdef PPC_VMX
-//BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_fold_copy_vmx, test_cpu_features.power.has_altivec);
+//BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_copy_vmx, test_cpu_features.power.has_altivec);
BENCHMARK_ADLER32_BASELINE_COPY(vmx_copy_baseline, adler32_vmx, test_cpu_features.power.has_altivec);
#endif
#ifdef POWER8_VSX
-//BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_fold_copy_power8, test_cpu_features.power.has_arch_2_07);
+//BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_copy_power8, test_cpu_features.power.has_arch_2_07);
BENCHMARK_ADLER32_BASELINE_COPY(power8, adler32_power8, test_cpu_features.power.has_arch_2_07);
#endif
@@ -112,28 +112,28 @@ BENCHMARK_ADLER32_BASELINE_COPY(rvv, adler32_rvv, test_cpu_features.riscv.has_rv
#ifdef X86_SSE42
BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, test_cpu_features.x86.has_ssse3);
-BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, test_cpu_features.x86.has_sse42);
+BENCHMARK_ADLER32_COPY(sse42, adler32_copy_sse42, test_cpu_features.x86.has_sse42);
#endif
#ifdef X86_AVX2
BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, test_cpu_features.x86.has_avx2);
-BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, test_cpu_features.x86.has_avx2);
+BENCHMARK_ADLER32_COPY(avx2, adler32_copy_avx2, test_cpu_features.x86.has_avx2);
#endif
#ifdef X86_AVX512
BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, test_cpu_features.x86.has_avx512_common);
-BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.has_avx512_common);
+BENCHMARK_ADLER32_COPY(avx512, adler32_copy_avx512, test_cpu_features.x86.has_avx512_common);
#endif
#ifdef X86_AVX512VNNI
BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
-BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
+BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_copy_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
#endif
#ifdef LOONGARCH_LSX
BENCHMARK_ADLER32_BASELINE_COPY(lsx_baseline, adler32_lsx, test_cpu_features.loongarch.has_lsx);
-BENCHMARK_ADLER32_COPY(lsx, adler32_fold_copy_lsx, test_cpu_features.loongarch.has_lsx);
+BENCHMARK_ADLER32_COPY(lsx, adler32_copy_lsx, test_cpu_features.loongarch.has_lsx);
#endif
#ifdef LOONGARCH_LASX
BENCHMARK_ADLER32_BASELINE_COPY(lasx_baseline, adler32_lasx, test_cpu_features.loongarch.has_lasx);
-BENCHMARK_ADLER32_COPY(lasx, adler32_fold_copy_lasx, test_cpu_features.loongarch.has_lasx);
+BENCHMARK_ADLER32_COPY(lasx, adler32_copy_lasx, test_cpu_features.loongarch.has_lasx);
#endif
#endif