summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/pkgcheck.yml4
-rw-r--r--CMakeLists.txt50
-rw-r--r--README.md3
-rwxr-xr-xconfigure10
-rw-r--r--functable.c29
-rw-r--r--test/benchmarks/benchmark_slidehash.cc2
6 files changed, 82 insertions, 16 deletions
diff --git a/.github/workflows/pkgcheck.yml b/.github/workflows/pkgcheck.yml
index 58af3a51cb..3ec5461f84 100644
--- a/.github/workflows/pkgcheck.yml
+++ b/.github/workflows/pkgcheck.yml
@@ -137,7 +137,7 @@ jobs:
CFLAGS: ${{ matrix.cflags }}
CXXFLAGS: ${{ matrix.cxxflags }}
CHOST: ${{ matrix.chost }}
- CMAKE_ARGS: ${{ matrix.cmake-args }}
+ CMAKE_ARGS: ${{ matrix.cmake-args }} -DWITH_ALL_FALLBACKS=ON
CONFIGURE_ARGS: ${{ matrix.configure-args }}
LDFLAGS: ${{ matrix.ldflags }}
@@ -147,7 +147,7 @@ jobs:
CC: ${{ matrix.compiler }}
CFLAGS: ${{ matrix.cflags }}
CHOST: ${{ matrix.chost }}
- CMAKE_ARGS: ${{ matrix.cmake-args }}
+ CMAKE_ARGS: ${{ matrix.cmake-args }} -DWITH_ALL_FALLBACKS=ON
CONFIGURE_ARGS: ${{ matrix.configure-args }}
LDFLAGS: ${{ matrix.ldflags }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a30ed2fea..31e8c69152 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -90,6 +90,7 @@ endif()
option(WITH_GZFILEOP "Compile with support for gzFile related functions" ON)
option(ZLIB_COMPAT "Compile with zlib compatible API" OFF)
option(WITH_OPTIM "Build with optimisation" ON)
+option(WITH_ALL_FALLBACKS "Build all generic fallback functions (Useful for Gbench)" OFF)
option(WITH_REDUCED_MEM "Reduced memory usage for special cases (reduces performance)" OFF)
option(WITH_NEW_STRATEGIES "Use new strategies" ON)
option(WITH_CRC32_CHORBA "Enable optimized CRC32 algorithm Chorba" ON)
@@ -151,6 +152,7 @@ mark_as_advanced(FORCE
ZLIB_SYMBOL_PREFIX
WITH_REDUCED_MEM
WITH_CRC32_CHORBA
+ WITH_ALL_FALLBACKS
WITH_ARMV8 WITH_NEON
WITH_ARMV6
WITH_DFLTCC_DEFLATE
@@ -713,6 +715,7 @@ else()
endif()
if(WITH_OPTIM)
+ add_definitions(-DWITH_OPTIM)
if(BASEARCH_ARM_FOUND)
add_definitions(-DARM_FEATURES)
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
@@ -1160,6 +1163,9 @@ if(WITH_OPTIM)
endif()
endif()
endif()
+else()
+ # If WITH_OPTIM is disabled, we need all the fallbacks.
+ set(WITH_ALL_FALLBACKS ON)
endif()
message(STATUS "Architecture-specific source files: ${ZLIB_ARCH_SRCS}")
@@ -1267,14 +1273,6 @@ set(ZLIB_PRIVATE_HDRS
zutil_p.h
)
set(ZLIB_SRCS
- arch/generic/adler32_c.c
- arch/generic/adler32_fold_c.c
- arch/generic/chunkset_c.c
- arch/generic/compare256_c.c
- arch/generic/crc32_braid_c.c
- arch/generic/crc32_c.c
- arch/generic/crc32_fold_c.c
- arch/generic/slide_hash_c.c
adler32.c
compress.c
crc32.c
@@ -1298,6 +1296,39 @@ set(ZLIB_SRCS
zutil.c
)
+set(ZLIB_ALL_FALLBACK_SRCS
+ arch/generic/adler32_c.c
+ arch/generic/adler32_fold_c.c
+ arch/generic/chunkset_c.c
+ arch/generic/compare256_c.c
+ arch/generic/crc32_braid_c.c
+ arch/generic/crc32_c.c
+ arch/generic/crc32_fold_c.c
+ arch/generic/slide_hash_c.c
+)
+
+if(WITH_ALL_FALLBACKS)
+ list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
+ add_definitions(-DWITH_ALL_FALLBACKS)
+elseif(${ARCH} STREQUAL "x86_64" AND WITH_SSE2)
+ # x86_64 always has SSE2, so let the SSE2 functions act as fallbacks.
+ list(APPEND ZLIB_GENERIC_SRCS
+ arch/generic/adler32_c.c
+ arch/generic/adler32_fold_c.c
+ arch/generic/crc32_braid_c.c
+ arch/generic/crc32_c.c
+ arch/generic/crc32_fold_c.c
+ )
+
+ # x86_64 does not need compare256 fallback if we have BUILTIN_CTZ
+ if(NOT HAVE_BUILTIN_CTZ)
+ list(APPEND ZLIB_GENERIC_SRCS arch/generic/compare256_c.c)
+ endif()
+else()
+ list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
+ add_definitions(-DWITH_ALL_FALLBACKS)
+endif()
+
if(WITH_CRC32_CHORBA)
list(APPEND ZLIB_SRCS arch/generic/crc32_chorba_c.c)
endif()
@@ -1316,7 +1347,7 @@ set(ZLIB_GZFILE_SRCS
gzwrite.c
)
-set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_HDRS} ${ZLIB_ARCH_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
+set(ZLIB_ALL_SRCS ${ZLIB_GENERIC_SRCS} ${ZLIB_SRCS} ${ZLIB_ARCH_HDRS} ${ZLIB_ARCH_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
if(WITH_GZFILEOP)
list(APPEND ZLIB_ALL_SRCS ${ZLIB_GZFILE_PRIVATE_HDRS} ${ZLIB_GZFILE_SRCS})
endif()
@@ -1542,6 +1573,7 @@ add_feature_info(WITH_GTEST WITH_GTEST "Build gtest_zlib")
add_feature_info(WITH_FUZZERS WITH_FUZZERS "Build test/fuzz")
add_feature_info(WITH_BENCHMARKS WITH_BENCHMARKS "Build test/benchmarks")
add_feature_info(WITH_BENCHMARK_APPS WITH_BENCHMARK_APPS "Build application benchmarks")
+add_feature_info(WITH_ALL_FALLBACKS WITH_ALL_FALLBACKS "Build all generic fallback functions")
add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation")
add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies")
add_feature_info(WITH_CRC32_CHORBA WITH_CRC32_CHORBA "Use optimized CRC32 algorithm Chorba")
diff --git a/README.md b/README.md
index c0ddc62268..81d967fb03 100644
--- a/README.md
+++ b/README.md
@@ -203,7 +203,7 @@ Advanced Build Options
| WITH_SSE42 | | Build with SSE42 intrinsics | ON |
| WITH_PCLMULQDQ | | Build with PCLMULQDQ intrinsics | ON |
| WITH_VPCLMULQDQ | --without-vpclmulqdq | Build with VPCLMULQDQ intrinsics | ON |
-| WITH_ARMV8 | --without-armv8 | Build with ARMv8 intrinsics | ON |
+| WITH_ARMV8 | --without-armv8 | Build with ARMv8 intrinsics | ON |
| WITH_NEON | --without-neon | Build with NEON intrinsics | ON |
| WITH_ARMV6 | --without-armv6 | Build with ARMv6 intrinsics | ON |
| WITH_ALTIVEC | --without-altivec | Build with AltiVec (VMX) intrinsics | ON |
@@ -216,6 +216,7 @@ Advanced Build Options
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |
| ZLIBNG_ENABLE_TESTS | | Test zlib-ng specific API | ON |
+| WITH_ALL_FALLBACKS | | Build with all c-fallbacks (useful for Gbench comparisons) | OFF |
Related Projects
diff --git a/configure b/configure
index fcfc795d0b..9cec2d0374 100755
--- a/configure
+++ b/configure
@@ -1774,6 +1774,16 @@ if test $without_new_strategies -eq 1; then
SFLAGS="${SFLAGS} -DNO_QUICK_STRATEGY -DNO_MEDIUM_STRATEGY"
fi
+# CMake can exclude building some of the generic fallback functions,
+# configure does not have the detection code to do so.
+CFLAGS="${CFLAGS} -DWITH_ALL_FALLBACKS"
+SFLAGS="${SFLAGS} -DWITH_ALL_FALLBACKS"
+
+if test $without_optimizations -eq 0; then
+ CFLAGS="${CFLAGS} -DWITH_OPTIM"
+ SFLAGS="${SFLAGS} -DWITH_OPTIM"
+fi
+
ARCHDIR='arch/generic'
ARCH_STATIC_OBJS=''
ARCH_SHARED_OBJS=''
diff --git a/functable.c b/functable.c
index 1f8f52fd7c..4481fdb9df 100644
--- a/functable.c
+++ b/functable.c
@@ -47,9 +47,26 @@ static void init_functable(void) {
struct cpu_features cf;
cpu_check_features(&cf);
-
- // Generic code
ft.force_init = &force_init_empty;
+
+ // Set up generic C code fallbacks
+#ifndef WITH_ALL_FALLBACKS
+# if (defined(__x86_64__) || defined(_M_X64)) && defined(X86_SSE2)
+ // x86_64 always has SSE2, so we can use SSE2 functions as fallbacks where available.
+ ft.adler32 = &adler32_c;
+ ft.adler32_fold_copy = &adler32_fold_copy_c;
+ ft.crc32 = &crc32_c;
+ ft.crc32_fold = &crc32_fold_c;
+ ft.crc32_fold_copy = &crc32_fold_copy_c;
+ ft.crc32_fold_final = &crc32_fold_final_c;
+ ft.crc32_fold_reset = &crc32_fold_reset_c;
+# ifndef HAVE_BUILTIN_CTZ
+ ft.longest_match = &longest_match_c;
+ ft.longest_match_slow = &longest_match_slow_c;
+ ft.compare256 = &compare256_c;
+# endif
+# endif
+#else // WITH_ALL_FALLBACKS
ft.adler32 = &adler32_c;
ft.adler32_fold_copy = &adler32_fold_copy_c;
ft.chunkmemset_safe = &chunkmemset_safe_c;
@@ -63,8 +80,10 @@ static void init_functable(void) {
ft.longest_match = &longest_match_c;
ft.longest_match_slow = &longest_match_slow_c;
ft.compare256 = &compare256_c;
+#endif
// Select arch-optimized functions
+#ifdef WITH_OPTIM
// X86 - SSE2
#ifdef X86_SSE2
@@ -73,9 +92,9 @@ static void init_functable(void) {
# endif
{
ft.chunkmemset_safe = &chunkmemset_safe_sse2;
-#if !defined(WITHOUT_CHORBA) && !defined(NO_CHORBA_SSE)
+# if !defined(WITHOUT_CHORBA) && !defined(NO_CHORBA_SSE)
ft.crc32 = &crc32_chorba_sse2;
-#endif
+# endif
ft.inflate_fast = &inflate_fast_sse2;
ft.slide_hash = &slide_hash_sse2;
# ifdef HAVE_BUILTIN_CTZ
@@ -301,6 +320,8 @@ static void init_functable(void) {
}
#endif
+#endif // WITH_OPTIM
+
// Assign function pointers individually for atomic operation
FUNCTABLE_ASSIGN(ft, force_init);
FUNCTABLE_ASSIGN(ft, adler32);
diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc
index 6f3b1221fc..4479a935b2 100644
--- a/test/benchmarks/benchmark_slidehash.cc
+++ b/test/benchmarks/benchmark_slidehash.cc
@@ -77,7 +77,9 @@ public:
} \
BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(512, MAX_RANDOM_INTS);
+#if defined(WITH_ALL_FALLBACKS) || !defined(__x86_64__)
BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
+#endif
#ifdef DISABLE_RUNTIME_CPU_DETECTION
BENCHMARK_SLIDEHASH(native, native_slide_hash, 1);