summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@nathanm.com>2026-02-07 00:00:44 -0800
committerHans Kristian Rosbach <hk-github@circlestorm.org>2026-03-08 23:33:57 +0100
commit30206c1cac40a8bed335405ed2e64559fb7b40bc (patch)
tree80867b5eed2ee5f9ba4cce677b9a8071dd494cdb
parentb3bcd2104f483b47f4483be9d17be0d00b2a384a (diff)
downloadProject-Tick-30206c1cac40a8bed335405ed2e64559fb7b40bc.tar.gz
Project-Tick-30206c1cac40a8bed335405ed2e64559fb7b40bc.zip
Add compile-time native feature detection macros
Creates [ARCH]_[FEAT]_NATIVE preprocessor defines that can be re-used in functable to bypass CPU checks. They are from DISABLE_RUNTIME_CPU_DETECTION preprocessor logic.
-rw-r--r--arch/arm/arm_functions.h11
-rw-r--r--arch/arm/arm_natives.h31
-rw-r--r--arch/loongarch/loongarch_functions.h10
-rw-r--r--arch/loongarch/loongarch_natives.h25
-rw-r--r--arch/power/power_functions.h11
-rw-r--r--arch/power/power_natives.h27
-rw-r--r--arch/riscv/riscv_functions.h7
-rw-r--r--arch/riscv/riscv_natives.h19
-rw-r--r--arch/s390/s390_functions.h5
-rw-r--r--arch/s390/s390_natives.h14
-rw-r--r--arch/x86/x86_functions.h28
-rw-r--r--arch/x86/x86_natives.h52
-rw-r--r--arch_natives.h24
-rw-r--r--functable.c177
14 files changed, 374 insertions, 67 deletions
diff --git a/arch/arm/arm_functions.h b/arch/arm/arm_functions.h
index 34ba87b067..bc77adb977 100644
--- a/arch/arm/arm_functions.h
+++ b/arch/arm/arm_functions.h
@@ -5,6 +5,8 @@
#ifndef ARM_FUNCTIONS_H_
#define ARM_FUNCTIONS_H_
+#include "arm_natives.h"
+
#ifdef ARM_NEON
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t adler32_copy_neon(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -29,15 +31,14 @@ uint32_t crc32_copy_armv8_pmull_eor3(uint32_t crc, uint8_t *dst, const uint8_t *
void slide_hash_armv6(deflate_state *s);
#endif
-
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// ARM - SIMD
-# if (defined(ARM_SIMD) && defined(__ARM_FEATURE_SIMD32)) || defined(ARM_NOCHECK_SIMD)
+# ifdef ARM_SIMD_NATIVE
# undef native_slide_hash
# define native_slide_hash slide_hash_armv6
# endif
// ARM - NEON
-# if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON
+# ifdef ARM_NEON_NATIVE
# undef native_adler32
# define native_adler32 adler32_neon
# undef native_adler32_copy
@@ -56,14 +57,14 @@ void slide_hash_armv6(deflate_state *s);
# define native_slide_hash slide_hash_neon
# endif
// ARM - CRC32
-# if (defined(ARM_CRC32) && defined(__ARM_FEATURE_CRC32))
+# ifdef ARM_CRC32_NATIVE
# undef native_crc32
# define native_crc32 crc32_armv8
# undef native_crc32_copy
# define native_crc32_copy crc32_copy_armv8
# endif
// ARM - PMULL EOR3
-# if (defined(ARM_PMULL_EOR3) && defined(__ARM_FEATURE_CRC32) && defined(__ARM_FEATURE_CRYPTO) && defined(__ARM_FEATURE_SHA3))
+# ifdef ARM_PMULL_EOR3_NATIVE
# undef native_crc32
# define native_crc32 crc32_armv8_pmull_eor3
# undef native_crc32_copy
diff --git a/arch/arm/arm_natives.h b/arch/arm/arm_natives.h
new file mode 100644
index 0000000000..311e33e958
--- /dev/null
+++ b/arch/arm/arm_natives.h
@@ -0,0 +1,31 @@
+/* arm_natives.h -- ARM compile-time feature detection macros.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef ARM_NATIVES_H_
+#define ARM_NATIVES_H_
+
+#if defined(__ARM_FEATURE_SIMD32)
+# ifdef ARM_SIMD
+# define ARM_SIMD_NATIVE
+# endif
+#endif
+/* NEON is guaranteed on ARM64 (like SSE2 on x86-64) */
+#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(ARCH_64BIT)
+# ifdef ARM_NEON
+# define ARM_NEON_NATIVE
+# endif
+#endif
+/* CRC32 is optional in ARMv8.0, mandatory in ARMv8.1+ */
+#if defined(__ARM_FEATURE_CRC32) || (defined(__ARM_ARCH) && __ARM_ARCH >= 801)
+# ifdef ARM_CRC32
+# define ARM_CRC32_NATIVE
+# endif
+#endif
+#if defined(__ARM_FEATURE_CRC32) && defined(__ARM_FEATURE_CRYPTO) && defined(__ARM_FEATURE_SHA3)
+# ifdef ARM_PMULL_EOR3
+# define ARM_PMULL_EOR3_NATIVE
+# endif
+#endif
+
+#endif /* ARM_NATIVES_H_ */
diff --git a/arch/loongarch/loongarch_functions.h b/arch/loongarch/loongarch_functions.h
index 922c6c4165..0ec8bd66d7 100644
--- a/arch/loongarch/loongarch_functions.h
+++ b/arch/loongarch/loongarch_functions.h
@@ -8,6 +8,8 @@
#ifndef LOONGARCH_FUNCTIONS_H_
#define LOONGARCH_FUNCTIONS_H_
+#include "loongarch_natives.h"
+
#ifdef LOONGARCH_CRC
uint32_t crc32_loongarch64(uint32_t crc, const uint8_t *buf, size_t len);
uint32_t crc32_copy_loongarch64(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
@@ -36,14 +38,14 @@ void slide_hash_lasx(deflate_state *s);
#endif
#ifdef DISABLE_RUNTIME_CPU_DETECTION
-// LOONGARCH - CRC32 - All known CPUs has crc instructions
-# if defined(LOONGARCH_CRC)
+// LOONGARCH - CRC32
+# ifdef LOONGARCH_CRC_NATIVE
# undef native_crc32
# define native_crc32 crc32_loongarch64
# undef native_crc32_copy
# define native_crc32_copy crc32_copy_loongarch64
# endif
-# if defined(LOONGARCH_LSX) && defined(__loongarch_sx)
+# ifdef LOONGARCH_LSX_NATIVE
# undef native_adler32
# define native_adler32 adler32_lsx
# undef native_adler32_copy
@@ -61,7 +63,7 @@ void slide_hash_lasx(deflate_state *s);
# undef native_slide_hash
# define native_slide_hash slide_hash_lsx
# endif
-# if defined(LOONGARCH_LASX) && defined(__loongarch_asx)
+# ifdef LOONGARCH_LASX_NATIVE
# undef native_adler32
# define native_adler32 adler32_lasx
# undef native_adler32_copy
diff --git a/arch/loongarch/loongarch_natives.h b/arch/loongarch/loongarch_natives.h
new file mode 100644
index 0000000000..35f6d3c7bd
--- /dev/null
+++ b/arch/loongarch/loongarch_natives.h
@@ -0,0 +1,25 @@
+/* loongarch_natives.h -- LoongArch compile-time feature detection macros.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef LOONGARCH_NATIVES_H_
+#define LOONGARCH_NATIVES_H_
+
+#if defined(__loongarch__)
+// All known CPUs have crc instructions
+# ifdef LOONGARCH_CRC
+# define LOONGARCH_CRC_NATIVE
+# endif
+#endif
+#if defined(__loongarch_sx)
+# ifdef LOONGARCH_LSX
+# define LOONGARCH_LSX_NATIVE
+# endif
+#endif
+#if defined(__loongarch_asx)
+# ifdef LOONGARCH_LASX
+# define LOONGARCH_LASX_NATIVE
+# endif
+#endif
+
+#endif /* LOONGARCH_NATIVES_H_ */
diff --git a/arch/power/power_functions.h b/arch/power/power_functions.h
index 49ea89e819..ccc7754a4c 100644
--- a/arch/power/power_functions.h
+++ b/arch/power/power_functions.h
@@ -7,6 +7,8 @@
#ifndef POWER_FUNCTIONS_H_
#define POWER_FUNCTIONS_H_
+#include "power_natives.h"
+
#ifdef PPC_VMX
uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t adler32_copy_vmx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -29,10 +31,9 @@ uint32_t longest_match_power9(deflate_state *const s, uint32_t cur_match);
uint32_t longest_match_slow_power9(deflate_state *const s, uint32_t cur_match);
#endif
-
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// Power - VMX
-# if defined(PPC_VMX) && defined(__ALTIVEC__)
+# ifdef PPC_VMX_NATIVE
# undef native_adler32
# define native_adler32 adler32_vmx
# undef native_adler32_copy
@@ -41,7 +42,7 @@ uint32_t longest_match_slow_power9(deflate_state *const s, uint32_t cur_match);
# define native_slide_hash slide_hash_vmx
# endif
// Power8 - VSX
-# if defined(POWER8_VSX) && defined(_ARCH_PWR8) && defined(__VSX__)
+# ifdef POWER8_VSX_NATIVE
# undef native_adler32
# define native_adler32 adler32_power8
# undef native_adler32_copy
@@ -53,14 +54,14 @@ uint32_t longest_match_slow_power9(deflate_state *const s, uint32_t cur_match);
# undef native_slide_hash
# define native_slide_hash slide_hash_power8
# endif
-# if defined(POWER8_VSX_CRC32) && defined(_ARCH_PWR8) && defined(__VSX__)
+# ifdef POWER8_VSX_CRC32_NATIVE
# undef native_crc32
# define native_crc32 crc32_power8
# undef native_crc32_copy
# define native_crc32_copy crc32_copy_power8
# endif
// Power9
-# if defined(POWER9) && defined(_ARCH_PWR9)
+# ifdef POWER9_NATIVE
# undef native_compare256
# define native_compare256 compare256_power9
# undef native_longest_match
diff --git a/arch/power/power_natives.h b/arch/power/power_natives.h
new file mode 100644
index 0000000000..59ec8a8aed
--- /dev/null
+++ b/arch/power/power_natives.h
@@ -0,0 +1,27 @@
+/* power_natives.h -- POWER compile-time feature detection macros.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef POWER_NATIVES_H_
+#define POWER_NATIVES_H_
+
+#if defined(__ALTIVEC__)
+# ifdef PPC_VMX
+# define PPC_VMX_NATIVE
+# endif
+#endif
+#if defined(_ARCH_PWR8) && defined(__VSX__)
+# ifdef POWER8_VSX
+# define POWER8_VSX_NATIVE
+# endif
+# ifdef POWER8_VSX_CRC32
+# define POWER8_VSX_CRC32_NATIVE
+# endif
+#endif
+#if defined(_ARCH_PWR9)
+# ifdef POWER9
+# define POWER9_NATIVE
+# endif
+#endif
+
+#endif /* POWER_NATIVES_H_ */
diff --git a/arch/riscv/riscv_functions.h b/arch/riscv/riscv_functions.h
index 9e641966a0..89120ffabf 100644
--- a/arch/riscv/riscv_functions.h
+++ b/arch/riscv/riscv_functions.h
@@ -9,6 +9,8 @@
#ifndef RISCV_FUNCTIONS_H_
#define RISCV_FUNCTIONS_H_
+#include "riscv_natives.h"
+
#ifdef RISCV_RVV
uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t adler32_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
@@ -28,7 +30,7 @@ uint32_t crc32_copy_riscv64_zbc(uint32_t crc, uint8_t *dst, const uint8_t *src,
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// RISCV - RVV
-# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__)
+# ifdef RISCV_RVV_NATIVE
# undef native_adler32
# define native_adler32 adler32_rvv
# undef native_adler32_copy
@@ -46,9 +48,8 @@ uint32_t crc32_copy_riscv64_zbc(uint32_t crc, uint8_t *dst, const uint8_t *src,
# undef native_slide_hash
# define native_slide_hash slide_hash_rvv
# endif
-
// RISCV - CRC32
-# if (defined(RISCV_CRC32_ZBC) && defined (__riscv_zbc))
+# ifdef RISCV_ZBC_NATIVE
# undef native_crc32
# define native_crc32 crc32_riscv64_zbc
# undef native_crc32_copy
diff --git a/arch/riscv/riscv_natives.h b/arch/riscv/riscv_natives.h
new file mode 100644
index 0000000000..38d7aba648
--- /dev/null
+++ b/arch/riscv/riscv_natives.h
@@ -0,0 +1,19 @@
+/* riscv_natives.h -- RISCV compile-time feature detection macros.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef RISCV_NATIVES_H_
+#define RISCV_NATIVES_H_
+
+#if defined(__riscv_v) && defined(__linux__)
+# ifdef RISCV_RVV
+# define RISCV_RVV_NATIVE
+# endif
+#endif
+#if defined(__riscv_zbc)
+# ifdef RISCV_CRC32_ZBC
+# define RISCV_ZBC_NATIVE
+# endif
+#endif
+
+#endif /* RISCV_NATIVES_H_ */
diff --git a/arch/s390/s390_functions.h b/arch/s390/s390_functions.h
index 7de83abb6e..30647051f4 100644
--- a/arch/s390/s390_functions.h
+++ b/arch/s390/s390_functions.h
@@ -5,6 +5,8 @@
#ifndef S390_FUNCTIONS_H_
#define S390_FUNCTIONS_H_
+#include "s390_natives.h"
+
#ifdef S390_CRC32_VX
uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
uint32_t crc32_copy_s390_vx(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
@@ -19,7 +21,8 @@ uint32_t crc32_copy_s390_vx(uint32_t crc, uint8_t *dst, const uint8_t *src, size
#endif
#ifdef DISABLE_RUNTIME_CPU_DETECTION
-# if defined(S390_CRC32_VX) && defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__)
+// S390 - CRC32 VX
+# ifdef S390_CRC32_VX_NATIVE
# undef native_crc32
# define native_crc32 crc32_s390_vx
# undef native_crc32_copy
diff --git a/arch/s390/s390_natives.h b/arch/s390/s390_natives.h
new file mode 100644
index 0000000000..5da913daf5
--- /dev/null
+++ b/arch/s390/s390_natives.h
@@ -0,0 +1,14 @@
+/* s390_natives.h -- s390 compile-time feature detection macros.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef S390_NATIVES_H_
+#define S390_NATIVES_H_
+
+#if defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__)
+# ifdef S390_CRC32_VX
+# define S390_CRC32_VX_NATIVE
+# endif
+#endif
+
+#endif /* S390_NATIVES_H_ */
diff --git a/arch/x86/x86_functions.h b/arch/x86/x86_functions.h
index f6ec9a137c..7b628a851a 100644
--- a/arch/x86/x86_functions.h
+++ b/arch/x86/x86_functions.h
@@ -6,6 +6,8 @@
#ifndef X86_FUNCTIONS_H_
#define X86_FUNCTIONS_H_
+#include "x86_natives.h"
+
/* So great news, your compiler is broken and causes stack smashing. Rather than
* notching out its compilation we'll just remove the assignment in the functable.
* Further context:
@@ -80,7 +82,7 @@ uint32_t crc32_copy_vpclmulqdq(uint32_t crc, uint8_t *dst, const uint8_t *src, s
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// X86 - SSE2
-# if (defined(X86_SSE2) && defined(__SSE2__)) || (defined(ARCH_X86) && defined(ARCH_64BIT))
+# ifdef X86_SSE2_NATIVE
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_sse2
# undef native_compare256
@@ -101,7 +103,7 @@ uint32_t crc32_copy_vpclmulqdq(uint32_t crc, uint8_t *dst, const uint8_t *src, s
# define native_slide_hash slide_hash_sse2
# endif
// X86 - SSSE3
-# if defined(X86_SSSE3) && defined(__SSSE3__)
+# ifdef X86_SSSE3_NATIVE
# undef native_adler32
# define native_adler32 adler32_ssse3
# undef native_adler32_copy
@@ -112,26 +114,26 @@ uint32_t crc32_copy_vpclmulqdq(uint32_t crc, uint8_t *dst, const uint8_t *src, s
# define native_inflate_fast inflate_fast_ssse3
# endif
// X86 - SSE4.1
-# if defined(X86_SSE41) && defined(__SSE4_1__) && !defined(WITHOUT_CHORBA_SSE)
-# undef native_crc32
-# define native_crc32 crc32_chorba_sse41
-# undef native_crc32_copy
-# define native_crc32_copy crc32_copy_chorba_sse41
+# if defined(X86_SSE41_NATIVE) && !defined(WITHOUT_CHORBA_SSE)
+# undef native_crc32
+# define native_crc32 crc32_chorba_sse41
+# undef native_crc32_copy
+# define native_crc32_copy crc32_copy_chorba_sse41
# endif
// X86 - SSE4.2
-# if defined(X86_SSE42) && defined(__SSE4_2__)
+# ifdef X86_SSE42_NATIVE
# undef native_adler32_copy
# define native_adler32_copy adler32_copy_sse42
# endif
// X86 - PCLMUL
-# if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__)
+# ifdef X86_PCLMULQDQ_NATIVE
# undef native_crc32
# define native_crc32 crc32_pclmulqdq
# undef native_crc32_copy
# define native_crc32_copy crc32_copy_pclmulqdq
# endif
// X86 - AVX2
-# if defined(X86_AVX2) && defined(__AVX2__)
+# ifdef X86_AVX2_NATIVE
# undef native_adler32
# define native_adler32 adler32_avx2
# undef native_adler32_copy
@@ -150,7 +152,7 @@ uint32_t crc32_copy_vpclmulqdq(uint32_t crc, uint8_t *dst, const uint8_t *src, s
# define native_slide_hash slide_hash_avx2
# endif
// X86 - AVX512 (F,DQ,BW,Vl)
-# if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
+# ifdef X86_AVX512_NATIVE
# undef native_adler32
# define native_adler32 adler32_avx512
# undef native_adler32_copy
@@ -166,14 +168,14 @@ uint32_t crc32_copy_vpclmulqdq(uint32_t crc, uint8_t *dst, const uint8_t *src, s
# undef native_longest_match_slow
# define native_longest_match_slow longest_match_slow_avx512
// X86 - AVX512 (VNNI)
-# if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__)
+# ifdef X86_AVX512VNNI_NATIVE
# undef native_adler32
# define native_adler32 adler32_avx512_vnni
# undef native_adler32_copy
# define native_adler32_copy adler32_copy_avx512_vnni
# endif
// X86 - VPCLMULQDQ
-# if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__)
+# ifdef X86_VPCLMULQDQ_NATIVE
# undef native_crc32
# define native_crc32 crc32_vpclmulqdq
# undef native_crc32_copy
diff --git a/arch/x86/x86_natives.h b/arch/x86/x86_natives.h
new file mode 100644
index 0000000000..75f249d909
--- /dev/null
+++ b/arch/x86/x86_natives.h
@@ -0,0 +1,52 @@
+/* x86_natives.h -- x86 compile-time feature detection macros.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef X86_NATIVES_H_
+#define X86_NATIVES_H_
+
+#if defined(__SSE2__) || (defined(ARCH_X86) && defined(ARCH_64BIT))
+# ifdef X86_SSE2
+# define X86_SSE2_NATIVE
+# endif
+#endif
+#if defined(__SSSE3__)
+# ifdef X86_SSSE3
+# define X86_SSSE3_NATIVE
+# endif
+#endif
+#if defined(__SSE4_1__)
+# ifdef X86_SSE41
+# define X86_SSE41_NATIVE
+# endif
+#endif
+#if defined(__SSE4_2__)
+# ifdef X86_SSE42
+# define X86_SSE42_NATIVE
+# endif
+#endif
+#if defined(__PCLMUL__)
+# ifdef X86_PCLMULQDQ_CRC
+# define X86_PCLMULQDQ_NATIVE
+# endif
+# if defined(__AVX512F__) && defined(__VPCLMULQDQ__)
+# define X86_VPCLMULQDQ_NATIVE
+# endif
+#endif
+#if defined(__AVX2__)
+# ifdef X86_AVX2
+# define X86_AVX2_NATIVE
+# endif
+#endif
+#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
+# ifdef X86_AVX512
+# define X86_AVX512_NATIVE
+# endif
+#endif
+#if defined(__AVX512VNNI__)
+# ifdef X86_AVX512VNNI
+# define X86_AVX512VNNI_NATIVE
+# endif
+#endif
+
+#endif /* X86_NATIVES_H_ */
diff --git a/arch_natives.h b/arch_natives.h
new file mode 100644
index 0000000000..5fe44516d4
--- /dev/null
+++ b/arch_natives.h
@@ -0,0 +1,24 @@
+/* arch_natives.h -- Compile-time feature detection macros for all architectures.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef ARCH_NATIVES_H_
+#define ARCH_NATIVES_H_
+
+#include "zbuild.h"
+
+#if defined(X86_FEATURES)
+# include "arch/x86/x86_natives.h"
+#elif defined(ARM_FEATURES)
+# include "arch/arm/arm_natives.h"
+#elif defined(PPC_FEATURES) || defined(POWER_FEATURES)
+# include "arch/power/power_natives.h"
+#elif defined(S390_FEATURES)
+# include "arch/s390/s390_natives.h"
+#elif defined(RISCV_FEATURES)
+# include "arch/riscv/riscv_natives.h"
+#elif defined(LOONGARCH_FEATURES)
+# include "arch/loongarch/loongarch_natives.h"
+#endif
+
+#endif /* ARCH_NATIVES_H_ */
diff --git a/functable.c b/functable.c
index b976e72509..478051f40d 100644
--- a/functable.c
+++ b/functable.c
@@ -77,12 +77,47 @@ static int init_functable(void) {
// Set up generic C code fallbacks
#ifndef WITH_ALL_FALLBACKS
-# if defined(ARCH_X86) && defined(ARCH_64BIT) && defined(X86_SSE2)
- // x86_64 always has SSE2, so we can use SSE2 functions as fallbacks where available.
+ // Only use necessary generic functions when no suitable simd versions are available.
+# ifdef X86_SSE2_NATIVE
+ // x86_64 always has SSE2
ft.adler32 = &adler32_c;
ft.adler32_copy = &adler32_copy_c;
ft.crc32 = &crc32_braid;
ft.crc32_copy = &crc32_copy_braid;
+# elif defined(ARM_NEON_NATIVE)
+# ifndef ARM_CRC32_NATIVE
+ ft.crc32 = &crc32_braid;
+ ft.crc32_copy = &crc32_copy_braid;
+# endif
+# elif defined(POWER8_VSX_NATIVE)
+# ifndef POWER9_NATIVE
+ ft.compare256 = &compare256_c;
+ ft.longest_match = &longest_match_c;
+ ft.longest_match_slow = &longest_match_slow_c;
+# endif
+# ifndef POWER8_VSX_CRC32_NATIVE
+ ft.crc32 = &crc32_braid;
+ ft.crc32_copy = &crc32_copy_braid;
+# endif
+# elif defined(LOONGARCH_LSX_NATIVE)
+# ifndef LOONGARCH_CRC
+ ft.crc32 = &crc32_braid;
+ ft.crc32_copy = &crc32_copy_braid;
+# endif
+# elif defined(RISCV_RVV_NATIVE)
+# ifndef RISCV_ZBC_NATIVE
+ ft.crc32 = &crc32_braid;
+ ft.crc32_copy = &crc32_copy_braid;
+# endif
+# elif defined(S390_CRC32_VX_NATIVE)
+ ft.adler32 = &adler32_c;
+ ft.adler32_copy = &adler32_copy_c;
+ ft.chunkmemset_safe = &chunkmemset_safe_c;
+ ft.compare256 = &compare256_c;
+ ft.inflate_fast = &inflate_fast_c;
+ ft.longest_match = &longest_match_c;
+ ft.longest_match_slow = &longest_match_slow_c;
+ ft.slide_hash = &slide_hash_c;
# endif
#else // WITH_ALL_FALLBACKS
ft.adler32 = &adler32_c;
@@ -108,60 +143,82 @@ static int init_functable(void) {
// X86 - SSE2
#ifdef X86_SSE2
-# ifdef ARCH_32BIT
+# ifndef X86_SSE2_NATIVE
if (cf.x86.has_sse2)
# endif
{
+# ifndef X86_AVX2_NATIVE
ft.chunkmemset_safe = &chunkmemset_safe_sse2;
ft.compare256 = &compare256_sse2;
-# if !defined(WITHOUT_CHORBA_SSE)
- ft.crc32 = &crc32_chorba_sse2;
- ft.crc32_copy = &crc32_copy_chorba_sse2;
-# endif
ft.inflate_fast = &inflate_fast_sse2;
ft.longest_match = &longest_match_sse2;
ft.longest_match_slow = &longest_match_slow_sse2;
ft.slide_hash = &slide_hash_sse2;
+# endif
+# if !defined(WITHOUT_CHORBA_SSE) && !defined(X86_PCLMULQDQ_NATIVE)
+ ft.crc32 = &crc32_chorba_sse2;
+ ft.crc32_copy = &crc32_copy_chorba_sse2;
+# endif
}
#endif
// X86 - SSSE3
#ifdef X86_SSSE3
- if (cf.x86.has_ssse3) {
+# ifndef X86_SSSE3_NATIVE
+ if (cf.x86.has_ssse3)
+# endif
+ {
ft.adler32 = &adler32_ssse3;
ft.adler32_copy = &adler32_copy_ssse3;
+# ifndef X86_AVX2_NATIVE
ft.chunkmemset_safe = &chunkmemset_safe_ssse3;
ft.inflate_fast = &inflate_fast_ssse3;
+# endif
}
#endif
// X86 - SSE4.1
-#if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE)
- if (cf.x86.has_sse41) {
+#if defined(X86_SSE41) && !defined(X86_PCLMULQDQ_NATIVE)
+# ifndef X86_SSE41_NATIVE
+ if (cf.x86.has_sse41)
+# endif
+ {
+# ifndef WITHOUT_CHORBA_SSE
ft.crc32 = &crc32_chorba_sse41;
ft.crc32_copy = &crc32_copy_chorba_sse41;
+# endif
}
#endif
// X86 - SSE4.2
-#ifdef X86_SSE42
- if (cf.x86.has_sse42) {
+#if defined(X86_SSE42) && !defined(X86_AVX512_NATIVE)
+# ifndef X86_SSE42_NATIVE
+ if (cf.x86.has_sse42)
+# endif
+ {
ft.adler32_copy = &adler32_copy_sse42;
}
#endif
// X86 - PCLMUL
-#ifdef X86_PCLMULQDQ_CRC
- if (cf.x86.has_pclmulqdq) {
+#if defined(X86_PCLMULQDQ_CRC) && !defined(X86_VPCLMULQDQ_NATIVE)
+# ifndef X86_PCLMULQDQ_NATIVE
+ if (cf.x86.has_pclmulqdq)
+# endif
+ {
ft.crc32 = &crc32_pclmulqdq;
ft.crc32_copy = &crc32_copy_pclmulqdq;
}
#endif
- // X86 - AVX
+ // X86 - AVX2
#ifdef X86_AVX2
/* BMI2 support is all but implicit with AVX2 but let's sanity check this just in case. Enabling BMI2 allows for
* flagless shifts, resulting in fewer flag stalls for the pipeline, and allows us to set destination registers
* for the shift results as an operand, eliminating several register-register moves when the original value needs
* to remain intact. They also allow for a count operand that isn't the CL register, avoiding contention there */
- if (cf.x86.has_avx2 && cf.x86.has_bmi2) {
+# ifndef X86_AVX2_NATIVE
+ if (cf.x86.has_avx2 && cf.x86.has_bmi2)
+# endif
+ {
+# ifndef X86_AVX512_NATIVE
ft.adler32 = &adler32_avx2;
ft.adler32_copy = &adler32_copy_avx2;
ft.chunkmemset_safe = &chunkmemset_safe_avx2;
@@ -169,14 +226,20 @@ static int init_functable(void) {
ft.inflate_fast = &inflate_fast_avx2;
ft.longest_match = &longest_match_avx2;
ft.longest_match_slow = &longest_match_slow_avx2;
+# endif
ft.slide_hash = &slide_hash_avx2;
}
#endif
// X86 - AVX512 (F,DQ,BW,Vl)
#ifdef X86_AVX512
- if (cf.x86.has_avx512_common) {
+# ifndef X86_AVX512_NATIVE
+ if (cf.x86.has_avx512_common)
+# endif
+ {
+# ifndef X86_AVX512VNNI_NATIVE
ft.adler32 = &adler32_avx512;
ft.adler32_copy = &adler32_copy_avx512;
+# endif
ft.chunkmemset_safe = &chunkmemset_safe_avx512;
ft.compare256 = &compare256_avx512;
ft.inflate_fast = &inflate_fast_avx512;
@@ -185,14 +248,20 @@ static int init_functable(void) {
}
#endif
#ifdef X86_AVX512VNNI
- if (cf.x86.has_avx512vnni) {
+# ifndef X86_AVX512VNNI_NATIVE
+ if (cf.x86.has_avx512vnni)
+# endif
+ {
ft.adler32 = &adler32_avx512_vnni;
ft.adler32_copy = &adler32_copy_avx512_vnni;
}
#endif
// X86 - VPCLMULQDQ
#ifdef X86_VPCLMULQDQ_CRC
- if (cf.x86.has_pclmulqdq && cf.x86.has_avx512_common && cf.x86.has_vpclmulqdq) {
+# ifndef X86_VPCLMULQDQ_NATIVE
+ if (cf.x86.has_pclmulqdq && cf.x86.has_avx512_common && cf.x86.has_vpclmulqdq)
+# endif
+ {
ft.crc32 = &crc32_vpclmulqdq;
ft.crc32_copy = &crc32_copy_vpclmulqdq;
}
@@ -200,8 +269,8 @@ static int init_functable(void) {
// ARM - SIMD
-#ifdef ARM_SIMD
-# ifndef ARM_NOCHECK_SIMD
+#if defined(ARM_SIMD) && !defined(ARM_NEON_NATIVE)
+# ifndef ARM_SIMD_NATIVE
if (cf.arm.has_simd)
# endif
{
@@ -210,7 +279,7 @@ static int init_functable(void) {
#endif
// ARM - NEON
#ifdef ARM_NEON
-# ifndef ARM_NOCHECK_NEON
+# ifndef ARM_NEON_NATIVE
if (cf.arm.has_neon)
# endif
{
@@ -225,15 +294,21 @@ static int init_functable(void) {
}
#endif
// ARM - CRC32
-#ifdef ARM_CRC32
- if (cf.arm.has_crc32) {
+#if defined(ARM_CRC32) && !defined(ARM_PMULL_EOR3_NATIVE)
+# ifndef ARM_CRC32_NATIVE
+ if (cf.arm.has_crc32)
+# endif
+ {
ft.crc32 = &crc32_armv8;
ft.crc32_copy = &crc32_copy_armv8;
}
#endif
// ARM - PMULL EOR3
#ifdef ARM_PMULL_EOR3
- if (cf.arm.has_crc32 && cf.arm.has_pmull && cf.arm.has_eor3 && cf.arm.has_fast_pmull) {
+# ifndef ARM_PMULL_EOR3_NATIVE
+ if (cf.arm.has_crc32 && cf.arm.has_pmull && cf.arm.has_eor3 && cf.arm.has_fast_pmull)
+# endif
+ {
ft.crc32 = &crc32_armv8_pmull_eor3;
ft.crc32_copy = &crc32_copy_armv8_pmull_eor3;
}
@@ -241,7 +316,10 @@ static int init_functable(void) {
// Power - VMX
#ifdef PPC_VMX
- if (cf.power.has_altivec) {
+# ifndef PPC_VMX_NATIVE
+ if (cf.power.has_altivec)
+# endif
+ {
ft.adler32 = &adler32_vmx;
ft.adler32_copy = &adler32_copy_vmx;
ft.slide_hash = &slide_hash_vmx;
@@ -249,7 +327,10 @@ static int init_functable(void) {
#endif
// Power8 - VSX
#ifdef POWER8_VSX
- if (cf.power.has_arch_2_07) {
+# ifndef POWER8_VSX_NATIVE
+ if (cf.power.has_arch_2_07)
+# endif
+ {
ft.adler32 = &adler32_power8;
ft.adler32_copy = &adler32_copy_power8;
ft.chunkmemset_safe = &chunkmemset_safe_power8;
@@ -258,14 +339,20 @@ static int init_functable(void) {
}
#endif
#ifdef POWER8_VSX_CRC32
- if (cf.power.has_arch_2_07) {
+# ifndef POWER8_VSX_CRC32_NATIVE
+ if (cf.power.has_arch_2_07)
+# endif
+ {
ft.crc32 = &crc32_power8;
ft.crc32_copy = &crc32_copy_power8;
}
#endif
// Power9
#ifdef POWER9
- if (cf.power.has_arch_3_00) {
+# ifndef POWER9_NATIVE
+ if (cf.power.has_arch_3_00)
+# endif
+ {
ft.compare256 = &compare256_power9;
ft.longest_match = &longest_match_power9;
ft.longest_match_slow = &longest_match_slow_power9;
@@ -275,7 +362,10 @@ static int init_functable(void) {
// RISCV - RVV
#ifdef RISCV_RVV
- if (cf.riscv.has_rvv) {
+# ifndef RISCV_RVV_NATIVE
+ if (cf.riscv.has_rvv)
+# endif
+ {
ft.adler32 = &adler32_rvv;
ft.adler32_copy = &adler32_copy_rvv;
ft.chunkmemset_safe = &chunkmemset_safe_rvv;
@@ -289,7 +379,10 @@ static int init_functable(void) {
// RISCV - ZBC
#ifdef RISCV_CRC32_ZBC
- if (cf.riscv.has_zbc) {
+# ifndef RISCV_ZBC_NATIVE
+ if (cf.riscv.has_zbc)
+# endif
+ {
ft.crc32 = &crc32_riscv64_zbc;
ft.crc32_copy = &crc32_copy_riscv64_zbc;
}
@@ -297,7 +390,10 @@ static int init_functable(void) {
// S390
#ifdef S390_CRC32_VX
- if (cf.s390.has_vx) {
+# ifndef S390_CRC32_VX_NATIVE
+ if (cf.s390.has_vx)
+# endif
+ {
ft.crc32 = &crc32_s390_vx;
ft.crc32_copy = &crc32_copy_s390_vx;
}
@@ -305,13 +401,19 @@ static int init_functable(void) {
// LOONGARCH
#ifdef LOONGARCH_CRC
- if (cf.loongarch.has_crc) {
+# ifndef LOONGARCH_CRC_NATIVE
+ if (cf.loongarch.has_crc)
+# endif
+ {
ft.crc32 = &crc32_loongarch64;
ft.crc32_copy = &crc32_copy_loongarch64;
}
#endif
-#ifdef LOONGARCH_LSX
- if (cf.loongarch.has_lsx) {
+#if defined(LOONGARCH_LSX) && !defined(LOONGARCH_LASX_NATIVE)
+# ifndef LOONGARCH_LSX_NATIVE
+ if (cf.loongarch.has_lsx)
+# endif
+ {
ft.adler32 = &adler32_lsx;
ft.adler32_copy = &adler32_copy_lsx;
ft.chunkmemset_safe = &chunkmemset_safe_lsx;
@@ -323,7 +425,10 @@ static int init_functable(void) {
}
#endif
#ifdef LOONGARCH_LASX
- if (cf.loongarch.has_lasx) {
+# ifndef LOONGARCH_LASX_NATIVE
+ if (cf.loongarch.has_lasx)
+# endif
+ {
ft.adler32 = &adler32_lasx;
ft.adler32_copy = &adler32_copy_lasx;
ft.chunkmemset_safe = &chunkmemset_safe_lasx;