summaryrefslogtreecommitdiff
path: root/neozip/cmake/detect-intrinsics.cmake
diff options
context:
space:
mode:
Diffstat (limited to 'neozip/cmake/detect-intrinsics.cmake')
-rw-r--r--neozip/cmake/detect-intrinsics.cmake770
1 files changed, 770 insertions, 0 deletions
diff --git a/neozip/cmake/detect-intrinsics.cmake b/neozip/cmake/detect-intrinsics.cmake
new file mode 100644
index 0000000000..c524c17bbe
--- /dev/null
+++ b/neozip/cmake/detect-intrinsics.cmake
@@ -0,0 +1,770 @@
+# detect-intrinsics.cmake -- Detect compiler intrinsics support
+# Licensed under the Zlib license, see LICENSE.md for details
+
+macro(check_armv8_compiler_flag)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ check_c_compiler_flag("-march=armv8-a+crc" HAVE_MARCH_ARMV8_CRC)
+ if(HAVE_MARCH_ARMV8_CRC)
+ set(ARMV8FLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ARMv8 support")
+ else()
+ check_c_compiler_flag("-march=armv8-a+crc+simd" HAVE_MARCH_ARMV8_CRC_SIMD)
+ if(HAVE_MARCH_ARMV8_CRC_SIMD)
+ set(ARMV8FLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ARMv8 support")
+ else()
+ check_c_compiler_flag("-Wa,-march=armv8-a+crc" HAVE_WA_MARCH_ARMV8_CRC)
+ if(HAVE_WA_MARCH_ARMV8_CRC)
+ set(ARMV8FLAG "-Wa,-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ARMv8 support")
+ else()
+ check_c_compiler_flag("-Wa,-march=armv8-a+crc+simd" HAVE_WA_MARCH_ARMV8_CRC_SIMD)
+ if(HAVE_WA_MARCH_ARMV8_CRC_SIMD)
+ set(ARMV8FLAG "-Wa,-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ARMv8 support")
+ endif()
+ endif()
+ endif()
+ endif()
+ endif()
+ endif()
+ # Check whether compiler supports ARMv8 inline asm
+ set(CMAKE_REQUIRED_FLAGS "${ARMV8FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "unsigned int f(unsigned int a, unsigned int b) {
+ unsigned int c;
+ #ifdef __aarch64__
+ __asm__( \"crc32w %w0, %w1, %w2\" : \"=r\" (c) : \"r\" (a), \"r\" (b));
+ #else
+ __asm__( \"crc32w %0, %1, %2\" : \"=r\" (c) : \"r\" (a), \"r\" (b));
+ #endif
+ return (int)c;
+ }
+ int main(void) { return f(1,2); }"
+ HAVE_ARMV8_INLINE_ASM
+ )
+ # Check whether compiler supports ARMv8 intrinsics
+ check_c_source_compiles(
+ "#if defined(_MSC_VER)
+ #include <intrin.h>
+ #else
+ #include <arm_acle.h>
+ #endif
+ unsigned int f(unsigned int a, unsigned int b) {
+ return __crc32w(a, b);
+ }
+ int main(void) { return 0; }"
+ HAVE_ARMV8_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_armv8_pmull_eor3_compiler_flag)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ check_c_compiler_flag("-march=armv8.2-a+crc+crypto+sha3" HAVE_MARCH_ARMV8_CRYPTO_SHA3)
+ if(HAVE_MARCH_ARMV8_CRYPTO_SHA3)
+ set(PMULLEOR3FLAG "-march=armv8.2-a+crc+crypto+sha3" CACHE INTERNAL "Compiler option to enable ARMv8 PMULL+EOR3 support")
+ else()
+ check_c_compiler_flag("-march=armv8-a+crc+crypto+sha3" HAVE_MARCH_ARMV8A_CRYPTO_SHA3)
+ if(HAVE_MARCH_ARMV8A_CRYPTO_SHA3)
+ set(PMULLEOR3FLAG "-march=armv8-a+crc+crypto+sha3" CACHE INTERNAL "Compiler option to enable ARMv8 PMULL+EOR3 support")
+ endif()
+ endif()
+ endif()
+ endif()
+ # Check whether compiler supports ARMv8 PMULL + EOR3 intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${PMULLEOR3FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
+ # include <arm64_neon.h>
+ #else
+ # include <arm_neon.h>
+ #endif
+ #ifdef _MSC_VER
+ __n128 f(__n64 a, __n64 b) {
+ #else
+ poly128_t f(poly64_t a, poly64_t b) {
+ #endif
+ return vmull_p64(a, b);
+ }
+ uint64x2_t g(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
+ return veor3q_u64(a, b, c);
+ }
+ int main(void) { return 0; }"
+ HAVE_ARMV8_PMULL_EOR3_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_armv6_compiler_flag)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ check_c_compiler_flag("-march=armv6" HAVE_MARCH_ARMV6)
+ if(HAVE_MARCH_ARMV6)
+ set(ARMV6FLAG "-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support")
+ else()
+ check_c_compiler_flag("-Wa,-march=armv6" HAVE_WA_MARCH_ARMV6)
+ if(HAVE_WA_MARCH_ARMV6)
+ set(ARMV6FLAG "-Wa,-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support")
+ endif()
+ endif()
+ endif()
+ endif()
+ # Check whether compiler supports ARMv6 inline asm
+ set(CMAKE_REQUIRED_FLAGS "${ARMV6FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "unsigned int f(unsigned int a, unsigned int b) {
+ unsigned int c;
+ __asm__( \"uqsub16 %0, %1, %2\" : \"=r\" (c) : \"r\" (a), \"r\" (b) );
+ return (int)c;
+ }
+ int main(void) { return f(1,2); }"
+ HAVE_ARMV6_INLINE_ASM
+ )
+ # Check whether compiler supports ARMv6 intrinsics
+ check_c_source_compiles(
+ "#if defined(_MSC_VER)
+ #include <intrin.h>
+ #else
+ #include <arm_acle.h>
+ #endif
+ unsigned int f(unsigned int a, unsigned int b) {
+ #if defined(_MSC_VER)
+ return _arm_uqsub16(a, b);
+ #else
+ return __uqsub16(a, b);
+ #endif
+ }
+ int main(void) { return f(1,2); }"
+ HAVE_ARMV6_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_avx512_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mbmi2")
+ else()
+ set(AVX512FLAG "/arch:AVX512")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC")
+ # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
+ # instruction scheduling unless you specify a reasonable -mtune= target
+ set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mbmi2")
+ if(NOT MSVC)
+ check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE)
+ if(HAVE_CASCADE_LAKE)
+ set(AVX512FLAG "${AVX512FLAG} -mtune=cascadelake")
+ else()
+ set(AVX512FLAG "${AVX512FLAG} -mtune=skylake-avx512")
+ endif()
+ unset(HAVE_CASCADE_LAKE)
+ endif()
+ elseif(MSVC)
+ set(AVX512FLAG "/arch:AVX512")
+ endif()
+ endif()
+ # Check whether compiler supports AVX512 intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <immintrin.h>
+ __m512i f(__m512i y) {
+ __m512i x = _mm512_set1_epi8(2);
+ return _mm512_sub_epi8(x, y);
+ }
+ int main(void) { return 0; }"
+ HAVE_AVX512_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_avx512vnni_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
+ set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni -mbmi2")
+ else()
+ set(AVX512VNNIFLAG "/arch:AVX512")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC")
+ set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni -mbmi2")
+ if(NOT MSVC)
+ check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE)
+ if(HAVE_CASCADE_LAKE)
+ set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=cascadelake")
+ else()
+ set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=skylake-avx512")
+ endif()
+ unset(HAVE_CASCADE_LAKE)
+ endif()
+ elseif(MSVC)
+ set(AVX512VNNIFLAG "/arch:AVX512")
+ endif()
+ endif()
+ # Check whether compiler supports AVX512vnni intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <immintrin.h>
+ int main(void) {
+ const __m512i z512 = _mm512_setzero_si512();
+ const __m256i z256 = _mm256_setzero_si256();
+ volatile __m512i r512 = _mm512_dpbusd_epi32(z512, z512, z512);
+ volatile __m256i r256 = _mm256_dpbusd_epi32(z256, z256, z256);
+ (void)r512;
+ (void)r256;
+ return 0;
+ }"
+ HAVE_AVX512VNNI_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_avx2_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(AVX2FLAG "-mavx2 -mbmi2")
+ else()
+ set(AVX2FLAG "/arch:AVX2")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC")
+ set(AVX2FLAG "-mavx2 -mbmi2")
+ elseif(MSVC)
+ set(AVX2FLAG "/arch:AVX2")
+ endif()
+ endif()
+ # Check whether compiler supports AVX2 intrinics
+ set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <immintrin.h>
+ __m256i f(__m256i x) {
+ const __m256i y = _mm256_set1_epi16(1);
+ return _mm256_subs_epu16(x, y);
+ }
+ int main(void) { return 0; }"
+ HAVE_AVX2_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_neon_compiler_flag)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(ARCH_64BIT)
+ set(NEONFLAG "-march=armv8-a+simd")
+ else()
+ set(NEONFLAG "-mfpu=neon")
+ endif()
+ endif()
+ endif()
+ # Check whether compiler supports NEON flag
+ set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#if defined(_M_ARM64) || defined(_M_ARM64EC)
+ # include <arm64_neon.h>
+ #else
+ # include <arm_neon.h>
+ #endif
+ int main() { return 0; }"
+ NEON_AVAILABLE FAIL_REGEX "not supported")
+ # Check whether compiler native flag is enough for NEON support
+ # Some GCC versions don't enable FPU (vector unit) when using -march=native
+ if(NEON_AVAILABLE AND NATIVEFLAG AND ARCH_32BIT)
+ check_c_source_compiles(
+ "#include <arm_neon.h>
+ uint8x16_t f(uint8x16_t x, uint8x16_t y) {
+ return vaddq_u8(x, y);
+ }
+ int main(int argc, char* argv[]) {
+ uint8x16_t a = vdupq_n_u8(argc);
+ uint8x16_t b = vdupq_n_u8(argc);
+ uint8x16_t result = f(a, b);
+ return result[0];
+ }"
+ ARM_NEON_SUPPORT_NATIVE
+ )
+ if(NOT ARM_NEON_SUPPORT_NATIVE)
+ set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG} -mfpu=neon ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <arm_neon.h>
+ uint8x16_t f(uint8x16_t x, uint8x16_t y) {
+ return vaddq_u8(x, y);
+ }
+ int main(int argc, char* argv[]) {
+ uint8x16_t a = vdupq_n_u8(argc);
+ uint8x16_t b = vdupq_n_u8(argc);
+ uint8x16_t result = f(a, b);
+ return result[0];
+ }"
+ ARM_NEON_SUPPORT_NATIVE_MFPU
+ )
+ if(ARM_NEON_SUPPORT_NATIVE_MFPU)
+ set(NEONFLAG "-mfpu=neon")
+ else()
+ # Remove local NEON_AVAILABLE variable and overwrite the cache
+ unset(NEON_AVAILABLE)
+ set(NEON_AVAILABLE "" CACHE INTERNAL "NEON support available" FORCE)
+ endif()
+ endif()
+ endif()
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_neon_ld4_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ if(ARCH_64BIT)
+ set(NEONFLAG "-march=armv8-a+simd")
+ else()
+ set(NEONFLAG "-mfpu=neon")
+ endif()
+ endif()
+ endif()
+ # Check whether compiler supports loading 4 neon vecs into a register range
+ set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
+ # include <arm64_neon.h>
+ #else
+ # include <arm_neon.h>
+ #endif
+ int32x4x4_t f(int var[16]) { return vld1q_s32_x4(var); }
+ int main(void) { return 0; }"
+ NEON_HAS_LD4)
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_pclmulqdq_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC")
+ set(PCLMULFLAG "-mpclmul")
+ endif()
+ endif()
+ # Check whether compiler supports PCLMULQDQ intrinsics
+ if(NOT (APPLE AND ARCH_32BIT))
+ # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now.
+ set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <immintrin.h>
+ #include <wmmintrin.h>
+ __m128i f(__m128i a, __m128i b) { return _mm_clmulepi64_si128(a, b, 0x10); }
+ int main(void) { return 0; }"
+ HAVE_PCLMULQDQ_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+ else()
+ set(HAVE_PCLMULQDQ_INTRIN OFF)
+ endif()
+endmacro()
+
+macro(check_vpclmulqdq_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang|IntelLLVM|NVHPC")
+ set(VPCLMULFLAG "-mvpclmulqdq")
+ endif()
+ endif()
+ # Check whether compiler supports VPCLMULQDQ intrinsics
+ if(NOT (APPLE AND ARCH_32BIT))
+ set(CMAKE_REQUIRED_FLAGS "${VPCLMULFLAG} ${AVX2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <immintrin.h>
+ #include <wmmintrin.h>
+ __m256i f(__m256i a) {
+ __m256i b = _mm256_setzero_si256();
+ return _mm256_clmulepi64_epi128(a, b, 0x10);
+ }
+ int main(void) { return 0; }"
+ HAVE_VPCLMULQDQ_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+ else()
+ set(HAVE_VPCLMULQDQ_INTRIN OFF)
+ endif()
+endmacro()
+
+macro(check_ppc_intrinsics)
+ # Check if compiler supports AltiVec
+ set(CMAKE_REQUIRED_FLAGS "-maltivec ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <altivec.h>
+ int main(void)
+ {
+ vector int a = vec_splats(0);
+ vector int b = vec_splats(0);
+ a = vec_add(a, b);
+ return 0;
+ }"
+ HAVE_ALTIVEC
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+
+ if(HAVE_ALTIVEC)
+ set(PPCFLAGS "-maltivec")
+ endif()
+
+ set(CMAKE_REQUIRED_FLAGS "-maltivec -mno-vsx ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <altivec.h>
+ int main(void)
+ {
+ vector int a = vec_splats(0);
+ vector int b = vec_splats(0);
+ a = vec_add(a, b);
+ return 0;
+ }"
+ HAVE_NOVSX
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+
+ if(HAVE_NOVSX)
+ set(PPCFLAGS "${PPCFLAGS} -mno-vsx")
+ endif()
+
+ # Check if we have what we need for AltiVec optimizations
+ set(CMAKE_REQUIRED_FLAGS "${PPCFLAGS} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <sys/auxv.h>
+ #ifdef __FreeBSD__
+ #include <machine/cpu.h>
+ #endif
+ int main() {
+ #if defined(__FreeBSD__) || defined(__OpenBSD__)
+ unsigned long hwcap;
+ elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
+ return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
+ #else
+ return (getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
+ #endif
+ }"
+ HAVE_VMX
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_power8_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ set(POWER8FLAG "-mcpu=power8")
+ endif()
+ endif()
+ # Check if we have what we need for POWER8 optimizations
+ set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <sys/auxv.h>
+ #ifdef __FreeBSD__
+ #include <machine/cpu.h>
+ #endif
+ int main() {
+ #if defined(__FreeBSD__) || defined(__OpenBSD__)
+ unsigned long hwcap;
+ elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap));
+ return (hwcap & PPC_FEATURE2_ARCH_2_07);
+ #else
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
+ #endif
+ }"
+ HAVE_POWER8_INTRIN
+ )
+ if(NOT HAVE_POWER8_INTRIN AND HAVE_LINUX_AUXVEC_H)
+ check_c_source_compiles(
+ "#include <sys/auxv.h>
+ #include <linux/auxvec.h>
+ int main() {
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
+ }"
+ HAVE_POWER8_INTRIN2
+ )
+ if(HAVE_POWER8_INTRIN2)
+ set(POWER8_NEED_AUXVEC_H 1)
+ set(HAVE_POWER8_INTRIN ${HAVE_POWER8_INTRIN2} CACHE INTERNAL "Have POWER8 intrinsics" FORCE)
+ unset(HAVE_POWER8_INTRIN2 CACHE)
+ endif()
+ endif()
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_rvv_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ set(RISCVFLAG "-march=rv64gcv")
+ endif()
+ endif()
+ # Check whether compiler supports RVV
+ set(CMAKE_REQUIRED_FLAGS "${RISCVFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <riscv_vector.h>
+ int main() {
+ return 0;
+ }"
+ HAVE_RVV_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_riscv_zbc_ext)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ set(RISCVZBCFLAG "-march=rv64gc_zbc")
+ endif()
+ endif()
+ # Check whether compiler supports RISC-V Zbc inline asm
+ # gcc-11 / clang-14 at least
+ set(CMAKE_REQUIRED_FLAGS "${RISCVZBCFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <stdint.h>
+ uint64_t f(uint64_t a, uint64_t b) {
+ uint64_t c;
+ __asm__ __volatile__ (\"clmul %[result], %[input_a], %[input_b]\" : [result] \"=r\" (c) : [input_a] \"r\" (a), [input_b] \"r\" (b));
+ return c;
+ }
+ int main(void) { return f(1, 2); }"
+ HAVE_RISCV_ZBC
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_s390_intrinsics)
+ check_c_source_compiles(
+ "#include <sys/auxv.h>
+ #ifndef HWCAP_S390_VXRS
+ #define HWCAP_S390_VXRS (1 << 11)
+ #endif
+ int main() {
+ return (getauxval(AT_HWCAP) & HWCAP_S390_VXRS);
+ }"
+ HAVE_S390_INTRIN
+ )
+endmacro()
+
+macro(check_power9_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+ set(POWER9FLAG "-mcpu=power9")
+ endif()
+ endif()
+ # Check if we have what we need for POWER9 optimizations
+ set(CMAKE_REQUIRED_FLAGS "${POWER9FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <sys/auxv.h>
+ #ifdef __FreeBSD__
+ #include <machine/cpu.h>
+ #endif
+ int main() {
+ #if defined(__FreeBSD__) || defined(__OpenBSD__)
+ unsigned long hwcap;
+ elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap));
+ return (hwcap & PPC_FEATURE2_ARCH_3_00);
+ #else
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00);
+ #endif
+ }"
+ HAVE_POWER9_INTRIN
+ )
+ if(NOT HAVE_POWER9_INTRIN AND HAVE_LINUX_AUXVEC_H)
+ check_c_source_compiles(
+ "#include <sys/auxv.h>
+ #include <linux/auxvec.h>
+ int main() {
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00);
+ }"
+ HAVE_POWER9_INTRIN2
+ )
+ if(HAVE_POWER9_INTRIN2)
+ set(POWER9_NEED_AUXVEC_H 1)
+ set(HAVE_POWER9_INTRIN ${HAVE_POWER9_INTRIN2} CACHE INTERNAL "Have POWER9 intrinsics" FORCE)
+ unset(HAVE_POWER9_INTRIN2 CACHE)
+ endif()
+ endif()
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_sse2_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(SSE2FLAG "-msse2")
+ else()
+ set(SSE2FLAG "/arch:SSE2")
+ endif()
+ elseif(MSVC)
+ if(ARCH_32BIT)
+ set(SSE2FLAG "/arch:SSE2")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC")
+ set(SSE2FLAG "-msse2")
+ endif()
+ endif()
+ # Check whether compiler supports SSE2 intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <immintrin.h>
+ __m128i f(__m128i x, __m128i y) { return _mm_sad_epu8(x, y); }
+ int main(void) { return 0; }"
+ HAVE_SSE2_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_ssse3_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(SSSE3FLAG "-mssse3")
+ else()
+ set(SSSE3FLAG "/arch:SSSE3")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC")
+ set(SSSE3FLAG "-mssse3")
+ endif()
+ endif()
+ # Check whether compiler supports SSSE3 intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <immintrin.h>
+ __m128i f(__m128i u) {
+ __m128i v = _mm_set1_epi32(1);
+ return _mm_hadd_epi32(u, v);
+ }
+ int main(void) { return 0; }"
+ HAVE_SSSE3_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_sse41_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(SSE41FLAG "-msse4.1")
+ else()
+ set(SSE41FLAG "/arch:SSE4.1")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC")
+ set(SSE41FLAG "-msse4.1")
+ endif()
+ endif()
+ # Check whether compiler supports SSE4.1 intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${SSE41FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <smmintrin.h>
+ __m128i f(__m128i a, __m128i b) { return _mm_min_epi32(a, b); }
+ int main(void) { return 0; }"
+ HAVE_SSE41_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_sse42_intrinsics)
+ if(NOT NATIVEFLAG)
+ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+ if(CMAKE_HOST_UNIX OR APPLE)
+ set(SSE42FLAG "-msse4.2")
+ else()
+ set(SSE42FLAG "/arch:SSE4.2")
+ endif()
+ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "NVHPC")
+ set(SSE42FLAG "-msse4.2")
+ endif()
+ endif()
+ # Check whether compiler supports SSE4.2 intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <nmmintrin.h>
+ unsigned int f(unsigned int a, unsigned int b) { return _mm_crc32_u32(a, b); }
+ int main(void) { return 0; }"
+ HAVE_SSE42_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_vgfma_intrinsics)
+ if(NOT NATIVEFLAG)
+ set(VGFMAFLAG "-march=z13")
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU")
+ set(VGFMAFLAG "${VGFMAFLAG} -mzarch")
+ endif()
+ if(CMAKE_C_COMPILER_ID MATCHES "Clang")
+ set(VGFMAFLAG "${VGFMAFLAG} -fzvector")
+ endif()
+ endif()
+ # Check whether compiler supports "VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE" intrinsic
+ set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <vecintrin.h>
+ int main(void) {
+ unsigned long long a __attribute__((vector_size(16))) = { 0 };
+ unsigned long long b __attribute__((vector_size(16))) = { 0 };
+ unsigned char c __attribute__((vector_size(16))) = { 0 };
+ c = vec_gfmsum_accum_128(a, b, c);
+ return c[0];
+ }"
+ HAVE_VGFMA_INTRIN FAIL_REGEX "not supported")
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_xsave_intrinsics)
+ if(NOT NATIVEFLAG AND NOT MSVC AND NOT CMAKE_C_COMPILER_ID MATCHES "Intel")
+ set(XSAVEFLAG "-mxsave")
+ endif()
+ set(CMAKE_REQUIRED_FLAGS "${XSAVEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#ifdef _MSC_VER
+ # include <intrin.h>
+ #elif __GNUC__ == 8 && __GNUC_MINOR__ > 1
+ # include <xsaveintrin.h>
+ #else
+ # include <immintrin.h>
+ #endif
+ unsigned int f(unsigned int a) { return (int) _xgetbv(a); }
+ int main(void) { return 0; }"
+ HAVE_XSAVE_INTRIN FAIL_REGEX "not supported")
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_la64_crc_intrinsics)
+ # Check whether compiler supports "crc" intrinsic
+ set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <larchintrin.h>
+ int main(void) {
+ char ch = 'a';
+ int crc = __crc_w_b_w(ch, 0);
+ return crc;
+ }"
+ HAVE_LA64_CRC_INTRIN)
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_lsx_intrinsics)
+ if(NOT NATIVEFLAG)
+ set(LSXFLAG "-mlsx")
+ endif()
+ # Check whether compiler supports LSX intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${LSXFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <lsxintrin.h>
+ __m128i f(__m128i a, __m128i b) {
+ return __lsx_vabsd_b(a, b);
+ }
+ int main(void) { return 0; }"
+ HAVE_LSX_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()
+
+macro(check_lasx_intrinsics)
+ if(NOT NATIVEFLAG)
+ set(LASXFLAG "-mlasx")
+ endif()
+ # Check whether compiler supports LASX intrinsics
+ set(CMAKE_REQUIRED_FLAGS "${LASXFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
+ check_c_source_compiles(
+ "#include <lasxintrin.h>
+ __m256i f(__m256i a, __m256i b) {
+ return __lasx_xvabsd_b(a, b);
+ }
+ int main(void) { return 0; }"
+ HAVE_LASX_INTRIN
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+endmacro()