summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/arm_features.c12
-rw-r--r--arch/arm/arm_features.h8
-rw-r--r--arch/power/power_features.c12
-rw-r--r--arch/power/power_features.h10
-rw-r--r--arch/s390/crc32-vx.c2
-rw-r--r--arch/s390/s390_features.c8
-rw-r--r--arch/s390/s390_features.h6
-rw-r--r--arch/x86/x86_features.c59
-rw-r--r--arch/x86/x86_features.h26
-rw-r--r--cpu_features.c17
-rw-r--r--cpu_features.h16
-rw-r--r--deflate.c3
-rw-r--r--functable.c37
-rw-r--r--inflate.c3
-rw-r--r--test/benchmarks/benchmark_adler32.cc16
-rw-r--r--test/benchmarks/benchmark_adler32_copy.cc30
-rw-r--r--test/benchmarks/benchmark_compare256.cc10
-rw-r--r--test/benchmarks/benchmark_crc32.cc10
-rw-r--r--test/benchmarks/benchmark_main.cc6
-rw-r--r--test/benchmarks/benchmark_slidehash.cc12
-rw-r--r--test/test_adler32.cc16
-rw-r--r--test/test_compare256.cc10
-rw-r--r--test/test_cpu_features.h8
-rw-r--r--test/test_crc32.cc12
-rw-r--r--test/test_main.cc8
25 files changed, 180 insertions, 177 deletions
diff --git a/arch/arm/arm_features.c b/arch/arm/arm_features.c
index d41c13acb0..7394351fa1 100644
--- a/arch/arm/arm_features.c
+++ b/arch/arm/arm_features.c
@@ -1,4 +1,5 @@
#include "../../zbuild.h"
+#include "arm_features.h"
#if defined(__linux__) && defined(HAVE_SYS_AUXV_H)
# include <sys/auxv.h>
@@ -71,14 +72,11 @@ static inline int arm_has_neon() {
}
#endif
-Z_INTERNAL int arm_cpu_has_neon;
-Z_INTERNAL int arm_cpu_has_crc32;
-
-void Z_INTERNAL arm_check_features(void) {
+void Z_INTERNAL arm_check_features(struct arm_cpu_features *features) {
#if defined(__aarch64__) || defined(_M_ARM64)
- arm_cpu_has_neon = 1; /* always available */
+ features->has_neon = 1; /* always available */
#else
- arm_cpu_has_neon = arm_has_neon();
+ features->has_neon = arm_has_neon();
#endif
- arm_cpu_has_crc32 = arm_has_crc32();
+ features->has_crc32 = arm_has_crc32();
}
diff --git a/arch/arm/arm_features.h b/arch/arm/arm_features.h
index 7998e79422..6fcd8d3eb5 100644
--- a/arch/arm/arm_features.h
+++ b/arch/arm/arm_features.h
@@ -5,9 +5,11 @@
#ifndef ARM_H_
#define ARM_H_
-extern int arm_cpu_has_neon;
-extern int arm_cpu_has_crc32;
+struct arm_cpu_features {
+ int has_neon;
+ int has_crc32;
+};
-void Z_INTERNAL arm_check_features(void);
+void Z_INTERNAL arm_check_features(struct arm_cpu_features *features);
#endif /* ARM_H_ */
diff --git a/arch/power/power_features.c b/arch/power/power_features.c
index 0614ff0f25..003a4c6e3c 100644
--- a/arch/power/power_features.c
+++ b/arch/power/power_features.c
@@ -13,11 +13,7 @@
#include "../../zbuild.h"
#include "power_features.h"
-Z_INTERNAL int power_cpu_has_altivec = 0;
-Z_INTERNAL int power_cpu_has_arch_2_07 = 0;
-Z_INTERNAL int power_cpu_has_arch_3_00 = 0;
-
-void Z_INTERNAL power_check_features(void) {
+void Z_INTERNAL power_check_features(struct power_cpu_features *features) {
#ifdef PPC_FEATURES
unsigned long hwcap;
#ifdef __FreeBSD__
@@ -27,7 +23,7 @@ void Z_INTERNAL power_check_features(void) {
#endif
if (hwcap & PPC_FEATURE_HAS_ALTIVEC)
- power_cpu_has_altivec = 1;
+ features->has_altivec = 1;
#endif
#ifdef POWER_FEATURES
@@ -39,8 +35,8 @@ void Z_INTERNAL power_check_features(void) {
#endif
if (hwcap2 & PPC_FEATURE2_ARCH_2_07)
- power_cpu_has_arch_2_07 = 1;
+ features->has_arch_2_07 = 1;
if (hwcap2 & PPC_FEATURE2_ARCH_3_00)
- power_cpu_has_arch_3_00 = 1;
+ features->has_arch_3_00 = 1;
#endif
}
diff --git a/arch/power/power_features.h b/arch/power/power_features.h
index 8df9f9e958..9252364cc4 100644
--- a/arch/power/power_features.h
+++ b/arch/power/power_features.h
@@ -7,10 +7,12 @@
#ifndef POWER_H_
#define POWER_H_
-extern int power_cpu_has_altivec;
-extern int power_cpu_has_arch_2_07;
-extern int power_cpu_has_arch_3_00;
+struct power_cpu_features {
+ int has_altivec;
+ int has_arch_2_07;
+ int has_arch_3_00;
+};
-void Z_INTERNAL power_check_features(void);
+void Z_INTERNAL power_check_features(struct power_cpu_features *features);
#endif /* POWER_H_ */
diff --git a/arch/s390/crc32-vx.c b/arch/s390/crc32-vx.c
index 78c0be5cee..acfa21887e 100644
--- a/arch/s390/crc32-vx.c
+++ b/arch/s390/crc32-vx.c
@@ -198,7 +198,7 @@ static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, size_t len) {
#define VX_ALIGNMENT 16L
#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
-uint32_t Z_INTERNAL PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf, size_t len) {
+uint32_t Z_INTERNAL crc32_s390_vx(uint32_t crc, const unsigned char *buf, size_t len) {
size_t prealign, aligned, remaining;
if (len < VX_MIN_LEN + VX_ALIGN_MASK)
diff --git a/arch/s390/s390_features.c b/arch/s390/s390_features.c
index 0658e4bbeb..711b7dd460 100644
--- a/arch/s390/s390_features.c
+++ b/arch/s390/s390_features.c
@@ -5,10 +5,6 @@
# include <sys/auxv.h>
#endif
-Z_INTERNAL int PREFIX(s390_cpu_has_vx) = 0;
-
-void Z_INTERNAL PREFIX(s390_check_features)(void) {
-#ifdef S390_FEATURES
- PREFIX(s390_cpu_has_vx) = getauxval(AT_HWCAP) & HWCAP_S390_VX;
-#endif
+void Z_INTERNAL s390_check_features(struct s390_cpu_features *features) {
+ features->has_vx = getauxval(AT_HWCAP) & HWCAP_S390_VX;
}
diff --git a/arch/s390/s390_features.h b/arch/s390/s390_features.h
index 9e2608fa9e..b8ffef74d8 100644
--- a/arch/s390/s390_features.h
+++ b/arch/s390/s390_features.h
@@ -1,8 +1,10 @@
#ifndef S390_FEATURES_H_
#define S390_FEATURES_H_
-extern int PREFIX(s390_cpu_has_vx);
+struct s390_cpu_features {
+ int has_vx;
+};
-void Z_INTERNAL PREFIX(s390_check_features)(void);
+void Z_INTERNAL s390_check_features(struct s390_cpu_features *features);
#endif
diff --git a/arch/x86/x86_features.c b/arch/x86/x86_features.c
index 2c5cb54c65..4ff7f63ee8 100644
--- a/arch/x86/x86_features.c
+++ b/arch/x86/x86_features.c
@@ -8,6 +8,7 @@
*/
#include "../../zbuild.h"
+#include "x86_features.h"
#ifdef _WIN32
# include <intrin.h>
@@ -18,18 +19,6 @@
#include <string.h>
-Z_INTERNAL int x86_cpu_has_avx2;
-Z_INTERNAL int x86_cpu_has_avx512;
-Z_INTERNAL int x86_cpu_has_avx512vnni;
-Z_INTERNAL int x86_cpu_has_sse2;
-Z_INTERNAL int x86_cpu_has_ssse3;
-Z_INTERNAL int x86_cpu_has_sse41;
-Z_INTERNAL int x86_cpu_has_sse42;
-Z_INTERNAL int x86_cpu_has_pclmulqdq;
-Z_INTERNAL int x86_cpu_has_vpclmulqdq;
-Z_INTERNAL int x86_cpu_has_os_save_ymm;
-Z_INTERNAL int x86_cpu_has_os_save_zmm;
-
static inline void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
#ifdef _WIN32
unsigned int registers[4];
@@ -68,27 +57,27 @@ static inline uint64_t xgetbv(unsigned int xcr) {
#endif
}
-void Z_INTERNAL x86_check_features(void) {
+void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) {
unsigned eax, ebx, ecx, edx;
unsigned maxbasic;
cpuid(0, &maxbasic, &ebx, &ecx, &edx);
cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
- x86_cpu_has_sse2 = edx & 0x4000000;
- x86_cpu_has_ssse3 = ecx & 0x200;
- x86_cpu_has_sse41 = ecx & 0x80000;
- x86_cpu_has_sse42 = ecx & 0x100000;
- x86_cpu_has_pclmulqdq = ecx & 0x2;
+ features->has_sse2 = edx & 0x4000000;
+ features->has_ssse3 = ecx & 0x200;
+ features->has_sse41 = ecx & 0x80000;
+ features->has_sse42 = ecx & 0x100000;
+ features->has_pclmulqdq = ecx & 0x2;
if (ecx & 0x08000000) {
uint64_t xfeature = xgetbv(0);
- x86_cpu_has_os_save_ymm = ((xfeature & 0x06) == 0x06);
- x86_cpu_has_os_save_zmm = ((xfeature & 0xe6) == 0xe6);
+ features->has_os_save_ymm = ((xfeature & 0x06) == 0x06);
+ features->has_os_save_zmm = ((xfeature & 0xe6) == 0xe6);
} else {
- x86_cpu_has_os_save_ymm = 0;
- x86_cpu_has_os_save_zmm = 0;
+ features->has_os_save_ymm = 0;
+ features->has_os_save_zmm = 0;
}
if (maxbasic >= 7) {
@@ -96,27 +85,27 @@ void Z_INTERNAL x86_check_features(void) {
// check BMI1 bit
// Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
- x86_cpu_has_vpclmulqdq = ecx & 0x400;
+ features->has_vpclmulqdq = ecx & 0x400;
// check AVX2 bit if the OS supports saving YMM registers
- if (x86_cpu_has_os_save_ymm) {
- x86_cpu_has_avx2 = ebx & 0x20;
+ if (features->has_os_save_ymm) {
+ features->has_avx2 = ebx & 0x20;
} else {
- x86_cpu_has_avx2 = 0;
+ features->has_avx2 = 0;
}
// check AVX512 bits if the OS supports saving ZMM registers
- if (x86_cpu_has_os_save_zmm) {
- x86_cpu_has_avx512 = ebx & 0x00010000;
- x86_cpu_has_avx512vnni = ecx & 0x800;
+ if (features->has_os_save_zmm) {
+ features->has_avx512 = ebx & 0x00010000;
+ features->has_avx512vnni = ecx & 0x800;
} else {
- x86_cpu_has_avx512 = 0;
- x86_cpu_has_avx512vnni = 0;
+ features->has_avx512 = 0;
+ features->has_avx512vnni = 0;
}
} else {
- x86_cpu_has_avx2 = 0;
- x86_cpu_has_avx512 = 0;
- x86_cpu_has_avx512vnni = 0;
- x86_cpu_has_vpclmulqdq = 0;
+ features->has_avx2 = 0;
+ features->has_avx512 = 0;
+ features->has_avx512vnni = 0;
+ features->has_vpclmulqdq = 0;
}
}
diff --git a/arch/x86/x86_features.h b/arch/x86/x86_features.h
index 06677b2e12..00b510ffc1 100644
--- a/arch/x86/x86_features.h
+++ b/arch/x86/x86_features.h
@@ -6,18 +6,20 @@
#ifndef X86_FEATURES_H_
#define X86_FEATURES_H_
-extern int x86_cpu_has_avx2;
-extern int x86_cpu_has_avx512;
-extern int x86_cpu_has_avx512vnni;
-extern int x86_cpu_has_sse2;
-extern int x86_cpu_has_ssse3;
-extern int x86_cpu_has_sse41;
-extern int x86_cpu_has_sse42;
-extern int x86_cpu_has_pclmulqdq;
-extern int x86_cpu_has_vpclmulqdq;
-extern int x86_cpu_has_os_save_ymm;
-extern int x86_cpu_has_os_save_zmm;
+struct x86_cpu_features {
+ int has_avx2;
+ int has_avx512;
+ int has_avx512vnni;
+ int has_sse2;
+ int has_ssse3;
+ int has_sse41;
+ int has_sse42;
+ int has_pclmulqdq;
+ int has_vpclmulqdq;
+ int has_os_save_ymm;
+ int has_os_save_zmm;
+};
-void Z_INTERNAL x86_check_features(void);
+void Z_INTERNAL x86_check_features(struct x86_cpu_features *features);
#endif /* CPU_H_ */
diff --git a/cpu_features.c b/cpu_features.c
index b5e7257696..b69a01304a 100644
--- a/cpu_features.c
+++ b/cpu_features.c
@@ -4,21 +4,18 @@
*/
#include "zbuild.h"
-
#include "cpu_features.h"
+#include <string.h>
-Z_INTERNAL void cpu_check_features(void) {
- static int features_checked = 0;
- if (features_checked)
- return;
+Z_INTERNAL void cpu_check_features(struct cpu_features *features) {
+ memset(features, 0, sizeof(struct cpu_features));
#if defined(X86_FEATURES)
- x86_check_features();
+ x86_check_features(&features->x86);
#elif defined(ARM_FEATURES)
- arm_check_features();
+ arm_check_features(&features->arm);
#elif defined(PPC_FEATURES) || defined(POWER_FEATURES)
- power_check_features();
+ power_check_features(&features->power);
#elif defined(S390_FEATURES)
- PREFIX(s390_check_features)();
+ s390_check_features(&features->s390);
#endif
- features_checked = 1;
}
diff --git a/cpu_features.h b/cpu_features.h
index 22d70da3d9..14eb19a751 100644
--- a/cpu_features.h
+++ b/cpu_features.h
@@ -20,7 +20,19 @@
# include "arch/s390/s390_features.h"
#endif
-extern void cpu_check_features(void);
+struct cpu_features {
+#if defined(X86_FEATURES)
+ struct x86_cpu_features x86;
+#elif defined(ARM_FEATURES)
+ struct arm_cpu_features arm;
+#elif defined(PPC_FEATURES) || defined(POWER_FEATURES)
+ struct power_cpu_features power;
+#elif defined(S390_FEATURES)
+ struct s390_cpu_features s390;
+#endif
+};
+
+extern void cpu_check_features(struct cpu_features *features);
/* adler32 */
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
@@ -134,7 +146,7 @@ extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
#elif defined(POWER8_VSX)
extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
#elif defined(S390_CRC32_VX)
-extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const uint8_t *buf, size_t len);
+extern uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
#endif
/* compare256 */
diff --git a/deflate.c b/deflate.c
index 273967b426..3ea92a82dd 100644
--- a/deflate.c
+++ b/deflate.c
@@ -48,7 +48,6 @@
*/
#include "zbuild.h"
-#include "cpu_features.h"
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
@@ -195,8 +194,6 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
deflate_state *s;
int wrap = 1;
- cpu_check_features();
-
if (strm == NULL)
return Z_STREAM_ERROR;
diff --git a/functable.c b/functable.c
index da9d10ec5b..c7d477c7f0 100644
--- a/functable.c
+++ b/functable.c
@@ -13,8 +13,9 @@
static void init_functable(void) {
struct functable_s ft;
+ struct cpu_features cf;
- cpu_check_features();
+ cpu_check_features(&cf);
// Generic code
ft.adler32 = &adler32_c;
@@ -58,7 +59,7 @@ static void init_functable(void) {
// X86 - SSE2
#ifdef X86_SSE2
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
- if (x86_cpu_has_sse2)
+ if (cf.x86.has_sse2)
# endif
{
ft.chunkmemset_safe = &chunkmemset_safe_sse2;
@@ -74,18 +75,18 @@ static void init_functable(void) {
#endif
// X86 - SSSE3
#ifdef X86_SSSE3
- if (x86_cpu_has_ssse3)
+ if (cf.x86.has_ssse3)
ft.adler32 = &adler32_ssse3;
#endif
// X86 - SSE4
#if defined(X86_SSE41) && defined(X86_SSE2)
- if (x86_cpu_has_sse41) {
+ if (cf.x86.has_sse41) {
ft.chunkmemset_safe = &chunkmemset_safe_sse41;
ft.inflate_fast = &inflate_fast_sse41;
}
#endif
#ifdef X86_SSE42
- if (x86_cpu_has_sse42) {
+ if (cf.x86.has_sse42) {
ft.adler32_fold_copy = &adler32_fold_copy_sse42;
ft.insert_string = &insert_string_sse4;
ft.quick_insert_string = &quick_insert_string_sse4;
@@ -94,7 +95,7 @@ static void init_functable(void) {
#endif
// X86 - PCLMUL
#ifdef X86_PCLMULQDQ_CRC
- if (x86_cpu_has_pclmulqdq) {
+ if (cf.x86.has_pclmulqdq) {
ft.crc32 = &crc32_pclmulqdq;
ft.crc32_fold = &crc32_fold_pclmulqdq;
ft.crc32_fold_copy = &crc32_fold_pclmulqdq_copy;
@@ -104,7 +105,7 @@ static void init_functable(void) {
#endif
// X86 - AVX
#ifdef X86_AVX2
- if (x86_cpu_has_avx2) {
+ if (cf.x86.has_avx2) {
ft.adler32 = &adler32_avx2;
ft.adler32_fold_copy = &adler32_fold_copy_avx2;
ft.chunkmemset_safe = &chunkmemset_safe_avx;
@@ -119,20 +120,20 @@ static void init_functable(void) {
}
#endif
#ifdef X86_AVX512
- if (x86_cpu_has_avx512) {
+ if (cf.x86.has_avx512) {
ft.adler32 = &adler32_avx512;
ft.adler32_fold_copy = &adler32_fold_copy_avx512;
}
#endif
#ifdef X86_AVX512VNNI
- if (x86_cpu_has_avx512vnni) {
+ if (cf.x86.has_avx512vnni) {
ft.adler32 = &adler32_avx512_vnni;
ft.adler32_fold_copy = &adler32_fold_copy_avx512_vnni;
}
#endif
// X86 - VPCLMULQDQ
#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC)
- if (x86_cpu_has_pclmulqdq && x86_cpu_has_avx512 && x86_cpu_has_vpclmulqdq) {
+ if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) {
ft.crc32 = &crc32_vpclmulqdq;
ft.crc32_fold = &crc32_fold_vpclmulqdq;
ft.crc32_fold_copy = &crc32_fold_vpclmulqdq_copy;
@@ -145,7 +146,7 @@ static void init_functable(void) {
// ARM - NEON
#ifdef ARM_NEON
# ifndef ARM_NOCHECK_NEON
- if (arm_cpu_has_neon)
+ if (cf.arm.has_neon)
# endif
{
ft.adler32 = &adler32_neon;
@@ -162,7 +163,7 @@ static void init_functable(void) {
#endif
// ARM - ACLE
#ifdef ARM_ACLE
- if (arm_cpu_has_crc32) {
+ if (cf.arm.has_crc32) {
ft.crc32 = &crc32_acle;
ft.insert_string = &insert_string_acle;
ft.quick_insert_string = &quick_insert_string_acle;
@@ -173,14 +174,14 @@ static void init_functable(void) {
// Power - VMX
#ifdef PPC_VMX
- if (power_cpu_has_altivec) {
+ if (cf.power.has_altivec) {
ft.adler32 = &adler32_vmx;
ft.slide_hash = &slide_hash_vmx;
}
#endif
// Power8 - VSX
#ifdef POWER8_VSX
- if (power_cpu_has_arch_2_07) {
+ if (cf.power.has_arch_2_07) {
ft.adler32 = &adler32_power8;
ft.chunkmemset_safe = &chunkmemset_safe_power8;
ft.chunksize = &chunksize_power8;
@@ -189,12 +190,12 @@ static void init_functable(void) {
}
#endif
#ifdef POWER8_VSX_CRC32
- if (power_cpu_has_arch_2_07)
+ if (cf.power.has_arch_2_07)
ft.crc32 = &crc32_power8;
#endif
// Power9
#ifdef POWER9
- if (power_cpu_has_arch_3_00) {
+ if (cf.power.has_arch_3_00) {
ft.compare256 = &compare256_power9;
ft.longest_match = &longest_match_power9;
ft.longest_match_slow = &longest_match_slow_power9;
@@ -204,8 +205,8 @@ static void init_functable(void) {
// S390
#ifdef S390_CRC32_VX
- if (PREFIX(s390_cpu_has_vx))
- ft.crc32 = &PREFIX(s390_crc32_vx);
+ if (cf.s390.has_vx)
+ ft.crc32 = crc32_s390_vx;
#endif
// Assign function pointers individually for atomic operation
diff --git a/inflate.c b/inflate.c
index 506bb2a50a..df4c56a168 100644
--- a/inflate.c
+++ b/inflate.c
@@ -5,7 +5,6 @@
#include "zbuild.h"
#include "zutil.h"
-#include "cpu_features.h"
#include "inftrees.h"
#include "inflate.h"
#include "inflate_p.h"
@@ -140,8 +139,6 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windo
int32_t ret;
struct inflate_state *state;
- cpu_check_features();
-
if (strm == NULL)
return Z_STREAM_ERROR;
strm->msg = NULL; /* in case we return an error */
diff --git a/test/benchmarks/benchmark_adler32.cc b/test/benchmarks/benchmark_adler32.cc
index 19691376fb..5b0b65d67b 100644
--- a/test/benchmarks/benchmark_adler32.cc
+++ b/test/benchmarks/benchmark_adler32.cc
@@ -11,7 +11,7 @@
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
-# include "cpu_features.h"
+# include "../test_cpu_features.h"
}
#define MAX_RANDOM_INTS (1024 * 1024)
@@ -65,25 +65,25 @@ public:
BENCHMARK_ADLER32(c, adler32_c, 1);
#ifdef ARM_NEON
-BENCHMARK_ADLER32(neon, adler32_neon, arm_cpu_has_neon);
+BENCHMARK_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon);
#endif
#ifdef PPC_VMX
-BENCHMARK_ADLER32(vmx, adler32_vmx, power_cpu_has_altivec);
+BENCHMARK_ADLER32(vmx, adler32_vmx, test_cpu_features.power.has_altivec);
#endif
#ifdef POWER8_VSX
-BENCHMARK_ADLER32(power8, adler32_power8, power_cpu_has_arch_2_07);
+BENCHMARK_ADLER32(power8, adler32_power8, test_cpu_features.power.has_arch_2_07);
#endif
#ifdef X86_SSSE3
-BENCHMARK_ADLER32(ssse3, adler32_ssse3, x86_cpu_has_ssse3);
+BENCHMARK_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3);
#endif
#ifdef X86_AVX2
-BENCHMARK_ADLER32(avx2, adler32_avx2, x86_cpu_has_avx2);
+BENCHMARK_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2);
#endif
#ifdef X86_AVX512
-BENCHMARK_ADLER32(avx512, adler32_avx512, x86_cpu_has_avx512);
+BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512);
#endif
#ifdef X86_AVX512VNNI
-BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, x86_cpu_has_avx512vnni);
+BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
#endif
diff --git a/test/benchmarks/benchmark_adler32_copy.cc b/test/benchmarks/benchmark_adler32_copy.cc
index d508a004aa..cbee780b76 100644
--- a/test/benchmarks/benchmark_adler32_copy.cc
+++ b/test/benchmarks/benchmark_adler32_copy.cc
@@ -12,7 +12,7 @@
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
-# include "cpu_features.h"
+# include "../test_cpu_features.h"
}
#define MAX_RANDOM_INTS (1024 * 1024)
@@ -87,32 +87,32 @@ BENCHMARK_ADLER32_BASELINE_COPY(c, adler32_c, 1);
#ifdef ARM_NEON
/* If we inline this copy for neon, the function would go here */
-//BENCHMARK_ADLER32_COPY(neon, adler32_neon, arm_cpu_has_neon);
-BENCHMARK_ADLER32_BASELINE_COPY(neon_copy_baseline, adler32_neon, arm_cpu_has_neon);
+//BENCHMARK_ADLER32_COPY(neon, adler32_neon, test_cpu_features.arm.has_neon);
+BENCHMARK_ADLER32_BASELINE_COPY(neon_copy_baseline, adler32_neon, test_cpu_features.arm.has_neon);
#endif
#ifdef PPC_VMX
-//BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_fold_copy_vmx, power_cpu_has_altivec);
-BENCHMARK_ADLER32_BASELINE_COPY(vmx_copy_baseline, adler32_vmx, power_cpu_has_altivec);
+//BENCHMARK_ADLER32_COPY(vmx_inline_copy, adler32_fold_copy_vmx, test_cpu_features.power.has_altivec);
+BENCHMARK_ADLER32_BASELINE_COPY(vmx_copy_baseline, adler32_vmx, test_cpu_features.power.has_altivec);
#endif
#ifdef POWER8_VSX
-//BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_fold_copy_power8, power_cpu_has_arch_2_07);
-BENCHMARK_ADLER32_BASELINE_COPY(power8, adler32_power8, power_cpu_has_arch_2_07);
+//BENCHMARK_ADLER32_COPY(power8_inline_copy, adler32_fold_copy_power8, test_cpu_features.power.has_arch_2_07);
+BENCHMARK_ADLER32_BASELINE_COPY(power8, adler32_power8, test_cpu_features.power.has_arch_2_07);
#endif
#ifdef X86_SSE42
-BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, x86_cpu_has_ssse3);
-BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, x86_cpu_has_sse42);
+BENCHMARK_ADLER32_BASELINE_COPY(sse42_baseline, adler32_ssse3, test_cpu_features.x86.has_ssse3);
+BENCHMARK_ADLER32_COPY(sse42, adler32_fold_copy_sse42, test_cpu_features.x86.has_sse42);
#endif
#ifdef X86_AVX2
-BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, x86_cpu_has_avx2);
-BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, x86_cpu_has_avx2);
+BENCHMARK_ADLER32_BASELINE_COPY(avx2_baseline, adler32_avx2, test_cpu_features.x86.has_avx2);
+BENCHMARK_ADLER32_COPY(avx2, adler32_fold_copy_avx2, test_cpu_features.x86.has_avx2);
#endif
#ifdef X86_AVX512
-BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, x86_cpu_has_avx512);
-BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, x86_cpu_has_avx512);
+BENCHMARK_ADLER32_BASELINE_COPY(avx512_baseline, adler32_avx512, test_cpu_features.x86.has_avx512);
+BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.has_avx512);
#endif
#ifdef X86_AVX512VNNI
-BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, x86_cpu_has_avx512vnni);
-BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, x86_cpu_has_avx512vnni);
+BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
+BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
#endif
diff --git a/test/benchmarks/benchmark_compare256.cc b/test/benchmarks/benchmark_compare256.cc
index 54f6b14b84..00c6cc6f63 100644
--- a/test/benchmarks/benchmark_compare256.cc
+++ b/test/benchmarks/benchmark_compare256.cc
@@ -10,7 +10,7 @@
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
-# include "cpu_features.h"
+# include "../test_cpu_features.h"
}
#define MAX_COMPARE_SIZE (256)
@@ -71,14 +71,14 @@ BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1);
#endif
#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
-BENCHMARK_COMPARE256(sse2, compare256_sse2, x86_cpu_has_sse2);
+BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-BENCHMARK_COMPARE256(avx2, compare256_avx2, x86_cpu_has_avx2);
+BENCHMARK_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2);
#endif
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
-BENCHMARK_COMPARE256(neon, compare256_neon, arm_cpu_has_neon);
+BENCHMARK_COMPARE256(neon, compare256_neon, test_cpu_features.arm.has_neon);
#endif
#ifdef POWER9
-BENCHMARK_COMPARE256(power9, compare256_power9, power_cpu_has_arch_3_00);
+BENCHMARK_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00);
#endif
diff --git a/test/benchmarks/benchmark_crc32.cc b/test/benchmarks/benchmark_crc32.cc
index b5ecda5179..b2b9673d9f 100644
--- a/test/benchmarks/benchmark_crc32.cc
+++ b/test/benchmarks/benchmark_crc32.cc
@@ -11,7 +11,7 @@
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
-# include "cpu_features.h"
+# include "../test_cpu_features.h"
}
#define MAX_RANDOM_INTS (1024 * 1024)
@@ -58,12 +58,12 @@ public:
BENCHMARK_CRC32(braid, PREFIX(crc32_braid), 1);
#ifdef ARM_ACLE
-BENCHMARK_CRC32(acle, crc32_acle, arm_cpu_has_crc32);
+BENCHMARK_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32);
#elif defined(POWER8_VSX)
-BENCHMARK_CRC32(power8, crc32_power8, power_cpu_has_arch_2_07);
+BENCHMARK_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07);
#elif defined(S390_CRC32_VX)
-BENCHMARK_CRC32(vx, PREFIX(s390_crc32_vx), PREFIX(s390_cpu_has_vx));
+BENCHMARK_CRC32(vx, crc32_s390_vx, test_cpu_features.s390.has_vx);
#elif defined(X86_PCLMULQDQ_CRC)
/* CRC32 fold does a memory copy while hashing */
-BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, x86_cpu_has_pclmulqdq);
+BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq);
#endif
diff --git a/test/benchmarks/benchmark_main.cc b/test/benchmarks/benchmark_main.cc
index ee8b614897..3ef2c5e87d 100644
--- a/test/benchmarks/benchmark_main.cc
+++ b/test/benchmarks/benchmark_main.cc
@@ -10,13 +10,15 @@
#ifndef BUILD_ALT
extern "C" {
# include "zbuild.h"
-# include "cpu_features.h"
+# include "../test_cpu_features.h"
+
+ struct cpu_features test_cpu_features;
}
#endif
int main(int argc, char** argv) {
#ifndef BUILD_ALT
- cpu_check_features();
+ cpu_check_features(&test_cpu_features);
#endif
::benchmark::Initialize(&argc, argv);
diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc
index 5ffa7039d0..238cc1f658 100644
--- a/test/benchmarks/benchmark_slidehash.cc
+++ b/test/benchmarks/benchmark_slidehash.cc
@@ -11,7 +11,7 @@ extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
# include "deflate.h"
-# include "cpu_features.h"
+# include "../test_cpu_features.h"
}
#define MAX_RANDOM_INTS 32768
@@ -69,18 +69,18 @@ public:
BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
#ifdef ARM_NEON
-BENCHMARK_SLIDEHASH(neon, slide_hash_neon, arm_cpu_has_neon);
+BENCHMARK_SLIDEHASH(neon, slide_hash_neon, test_cpu_features.arm.has_neon);
#endif
#ifdef POWER8_VSX
-BENCHMARK_SLIDEHASH(power8, slide_hash_power8, power_cpu_has_arch_2_07);
+BENCHMARK_SLIDEHASH(power8, slide_hash_power8, test_cpu_features.power.has_arch_2_07);
#endif
#ifdef PPC_VMX
-BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, power_cpu_has_altivec);
+BENCHMARK_SLIDEHASH(vmx, slide_hash_vmx, test_cpu_features.power.has_altivec);
#endif
#ifdef X86_SSE2
-BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, x86_cpu_has_sse2);
+BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2);
#endif
#ifdef X86_AVX2
-BENCHMARK_SLIDEHASH(avx2, slide_hash_avx2, x86_cpu_has_avx2);
+BENCHMARK_SLIDEHASH(avx2, slide_hash_avx2, test_cpu_features.x86.has_avx2);
#endif
diff --git a/test/test_adler32.cc b/test/test_adler32.cc
index 7f88f25565..4dfe63f203 100644
--- a/test/test_adler32.cc
+++ b/test/test_adler32.cc
@@ -10,7 +10,7 @@
extern "C" {
# include "zbuild.h"
-# include "cpu_features.h"
+# include "test_cpu_features.h"
}
#include <gtest/gtest.h>
@@ -365,22 +365,22 @@ INSTANTIATE_TEST_SUITE_P(adler32, adler32_variant, testing::ValuesIn(tests));
TEST_ADLER32(c, adler32_c, 1)
#ifdef ARM_NEON
-TEST_ADLER32(neon, adler32_neon, arm_cpu_has_neon)
+TEST_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon)
#elif defined(POWER8_VSX)
-TEST_ADLER32(power8, adler32_power8, power_cpu_has_arch_2_07)
+TEST_ADLER32(power8, adler32_power8, test_cpu_features.power.has_arch_2_07)
#elif defined(PPC_VMX)
-TEST_ADLER32(vmx, adler32_vmx, power_cpu_has_altivec)
+TEST_ADLER32(vmx, adler32_vmx, test_cpu_features.power.has_altivec)
#endif
#ifdef X86_SSSE3
-TEST_ADLER32(ssse3, adler32_ssse3, x86_cpu_has_ssse3)
+TEST_ADLER32(ssse3, adler32_ssse3, test_cpu_features.x86.has_ssse3)
#endif
#ifdef X86_AVX2
-TEST_ADLER32(avx2, adler32_avx2, x86_cpu_has_avx2)
+TEST_ADLER32(avx2, adler32_avx2, test_cpu_features.x86.has_avx2)
#endif
#ifdef X86_AVX512
-TEST_ADLER32(avx512, adler32_avx512, x86_cpu_has_avx512)
+TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512)
#endif
#ifdef X86_AVX512VNNI
-TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, x86_cpu_has_avx512vnni)
+TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni)
#endif
diff --git a/test/test_compare256.cc b/test/test_compare256.cc
index 663ad96334..f920d1d205 100644
--- a/test/test_compare256.cc
+++ b/test/test_compare256.cc
@@ -10,7 +10,7 @@
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
-# include "cpu_features.h"
+# include "test_cpu_features.h"
}
#include <gtest/gtest.h>
@@ -70,14 +70,14 @@ TEST_COMPARE256(unaligned_64, compare256_unaligned_64, 1)
#endif
#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
-TEST_COMPARE256(sse2, compare256_sse2, x86_cpu_has_sse2)
+TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2)
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
-TEST_COMPARE256(avx2, compare256_avx2, x86_cpu_has_avx2)
+TEST_COMPARE256(avx2, compare256_avx2, test_cpu_features.x86.has_avx2)
#endif
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
-TEST_COMPARE256(neon, compare256_neon, arm_cpu_has_neon)
+TEST_COMPARE256(neon, compare256_neon, test_cpu_features.arm.has_neon)
#endif
#ifdef POWER9
-TEST_COMPARE256(power9, compare256_power9, power_cpu_has_arch_3_00)
+TEST_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00)
#endif
diff --git a/test/test_cpu_features.h b/test/test_cpu_features.h
new file mode 100644
index 0000000000..1bb4b13a08
--- /dev/null
+++ b/test/test_cpu_features.h
@@ -0,0 +1,8 @@
+#ifndef TEST_CPU_FEATURES_H
+#define TEST_CPU_FEATURES_H
+
+#include "cpu_features.h"
+
+extern struct cpu_features test_cpu_features;
+
+#endif
diff --git a/test/test_crc32.cc b/test/test_crc32.cc
index 4d0b5b966a..f194b4ccf5 100644
--- a/test/test_crc32.cc
+++ b/test/test_crc32.cc
@@ -12,7 +12,7 @@
extern "C" {
# include "zbuild.h"
# include "zutil_p.h"
-# include "cpu_features.h"
+# include "test_cpu_features.h"
}
#include <gtest/gtest.h>
@@ -209,14 +209,14 @@ INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests));
TEST_CRC32(braid, PREFIX(crc32_braid), 1)
#ifdef ARM_ACLE
-TEST_CRC32(acle, crc32_acle, arm_cpu_has_crc32)
+TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32)
#elif defined(POWER8_VSX_CRC32)
-TEST_CRC32(power8, crc32_power8, power_cpu_has_arch_2_07)
+TEST_CRC32(power8, crc32_power8, test_cpu_features.power.has_arch_2_07)
#elif defined(S390_CRC32_VX)
-TEST_CRC32(vx, PREFIX(s390_crc32_vx), PREFIX(s390_cpu_has_vx))
+TEST_CRC32(vx, crc32_s390_vx, test_cpu_features.s390.has_vx)
#elif defined(X86_PCLMULQDQ_CRC)
-TEST_CRC32(pclmulqdq, crc32_pclmulqdq, x86_cpu_has_pclmulqdq)
+TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
# ifdef X86_VPCLMULQDQ_CRC
-TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (x86_cpu_has_pclmulqdq && x86_cpu_has_avx512 && x86_cpu_has_vpclmulqdq))
+TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq))
# endif
#endif
diff --git a/test/test_main.cc b/test/test_main.cc
index c129db259f..82b39e4874 100644
--- a/test/test_main.cc
+++ b/test/test_main.cc
@@ -6,12 +6,14 @@
extern "C" {
# include "zbuild.h"
-# include "cpu_features.h"
+# include "test_cpu_features.h"
+
+ struct cpu_features test_cpu_features;
}
GTEST_API_ int main(int argc, char **argv) {
printf("Running main() from %s\n", __FILE__);
- cpu_check_features();
+ cpu_check_features(&test_cpu_features);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
-} \ No newline at end of file
+}