summaryrefslogtreecommitdiff
path: root/neozip/arch/x86/x86_features.c
diff options
context:
space:
mode:
Diffstat (limited to 'neozip/arch/x86/x86_features.c')
-rw-r--r--neozip/arch/x86/x86_features.c128
1 files changed, 128 insertions, 0 deletions
diff --git a/neozip/arch/x86/x86_features.c b/neozip/arch/x86/x86_features.c
new file mode 100644
index 0000000000..5eba18bf8a
--- /dev/null
+++ b/neozip/arch/x86/x86_features.c
@@ -0,0 +1,128 @@
+/* x86_features.c - x86 feature check
+ *
+ * Copyright (C) 2013 Intel Corporation. All rights reserved.
+ * Author:
+ * Jim Kukunas
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef X86_FEATURES
+
+#include "zbuild.h"
+#include "x86_features.h"
+
+#if defined(HAVE_CPUID_MS)
+# include <intrin.h>
+#elif defined(HAVE_CPUID_GNU)
+// Newer versions of GCC and clang come with cpuid.h
+# include <cpuid.h>
+# ifdef X86_HAVE_XSAVE_INTRIN
+# if __GNUC__ == 8
+# include <xsaveintrin.h>
+# else
+# include <immintrin.h>
+# endif
+# endif
+#endif
+
+static inline void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
+#if defined(HAVE_CPUID_MS)
+ unsigned int registers[4];
+ __cpuid((int *)registers, info);
+
+ *eax = registers[0];
+ *ebx = registers[1];
+ *ecx = registers[2];
+ *edx = registers[3];
+#elif defined(HAVE_CPUID_GNU)
+ *eax = *ebx = *ecx = *edx = 0;
+ __cpuid(info, *eax, *ebx, *ecx, *edx);
+#else
+ /* When using this fallback, the faster SSE/AVX code is disabled */
+ *eax = *ebx = *ecx = *edx = 0;
+#endif
+}
+
+static inline void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
+#if defined(HAVE_CPUID_MS)
+ unsigned int registers[4];
+ __cpuidex((int *)registers, info, subinfo);
+
+ *eax = registers[0];
+ *ebx = registers[1];
+ *ecx = registers[2];
+ *edx = registers[3];
+#elif defined(HAVE_CPUID_GNU)
+ *eax = *ebx = *ecx = *edx = 0;
+ __cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx);
+#else
+ /* When using this fallback, the faster SSE/AVX code is disabled */
+ *eax = *ebx = *ecx = *edx = 0;
+#endif
+}
+
+static inline uint64_t xgetbv(unsigned int xcr) {
+#if defined(_MSC_VER) || defined(X86_HAVE_XSAVE_INTRIN)
+ return _xgetbv(xcr);
+#elif defined(__GNUC__)
+ uint32_t eax, edx;
+ __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr));
+ return (uint64_t)(edx) << 32 | eax;
+#else
+ /* When using this fallback, some of the faster code is disabled */
+ return 0;
+#endif
+}
+
+void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) {
+ unsigned eax, ebx, ecx, edx;
+ unsigned maxbasic;
+
+ cpuid(0, &maxbasic, &ebx, &ecx, &edx);
+ cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
+
+ features->has_sse2 = edx & 0x4000000;
+ features->has_ssse3 = ecx & 0x200;
+ features->has_sse41 = ecx & 0x80000;
+ features->has_sse42 = ecx & 0x100000;
+ features->has_pclmulqdq = ecx & 0x2;
+
+ if (ecx & 0x08000000) {
+ uint64_t xfeature = xgetbv(0);
+
+ features->has_os_save_ymm = ((xfeature & 0x06) == 0x06);
+ features->has_os_save_zmm = ((xfeature & 0xe6) == 0xe6);
+ }
+
+ if (maxbasic >= 7) {
+ // Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
+ cpuidex(7, 0, &eax, &ebx, &ecx, &edx);
+
+ // check BMI2 bit
+ features->has_bmi2 = ebx & 0x100;
+
+ // check AVX2 bit if the OS supports saving YMM registers
+ if (features->has_os_save_ymm) {
+ features->has_avx2 = ebx & 0x20;
+ features->has_vpclmulqdq = ecx & 0x400;
+ }
+
+ // check AVX512 bits if the OS supports saving ZMM registers
+ if (features->has_os_save_zmm) {
+ features->has_avx512f = ebx & 0x00010000;
+ if (features->has_avx512f) {
+ // According to the Intel Software Developer's Manual, AVX512F must be enabled too in order to enable
+ // AVX512(DQ,BW,VL).
+ features->has_avx512dq = ebx & 0x00020000;
+ features->has_avx512bw = ebx & 0x40000000;
+ features->has_avx512vl = ebx & 0x80000000;
+ }
+ features->has_avx512_common = features->has_avx512f && features->has_avx512dq && features->has_avx512bw \
+ && features->has_avx512vl && features->has_bmi2;
+ features->has_avx512vnni = ecx & 0x800;
+ }
+ }
+}
+
+#endif