diff options
Diffstat (limited to 'neozip/arch/x86/x86_functions.h')
| -rw-r--r-- | neozip/arch/x86/x86_functions.h | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/neozip/arch/x86/x86_functions.h b/neozip/arch/x86/x86_functions.h new file mode 100644 index 0000000000..881c6efe23 --- /dev/null +++ b/neozip/arch/x86/x86_functions.h @@ -0,0 +1,196 @@ +/* x86_functions.h -- x86 implementations for arch-specific functions. + * Copyright (C) 2013 Intel Corporation Jim Kukunas + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef X86_FUNCTIONS_H_ +#define X86_FUNCTIONS_H_ + +#include "x86_natives.h" + +/* So great news, your compiler is broken and causes stack smashing. Rather than + * notching out its compilation we'll just remove the assignment in the functable. + * Further context: + * https://developercommunity.visualstudio.com/t/Stack-corruption-with-v142-toolchain-whe/10853479 */ +#if defined(_MSC_VER) && defined(ARCH_32BIT) && _MSC_VER >= 1920 && _MSC_VER <= 1929 +#define NO_CHORBA_SSE +#endif + +#ifdef X86_SSE2 +uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from, size_t len, size_t left); +uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); +void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start); +uint32_t longest_match_sse2(deflate_state *const s, uint32_t cur_match); +uint32_t longest_match_slow_sse2(deflate_state *const s, uint32_t cur_match); +void slide_hash_sse2(deflate_state *s); + +# if !defined(WITHOUT_CHORBA_SSE) + uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len); + uint32_t crc32_copy_chorba_sse2(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); + uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint8_t *buf, size_t len); +# endif +#endif + +#ifdef X86_SSSE3 +uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_copy_ssse3(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +uint8_t* chunkmemset_safe_ssse3(uint8_t *out, uint8_t *from, size_t len, size_t left); +void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); +#endif + +#if defined(X86_SSE41) && !defined(WITHOUT_CHORBA_SSE) + uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len); + uint32_t crc32_copy_chorba_sse41(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); +#endif + +#ifdef X86_SSE42 +uint32_t adler32_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +#endif + +#ifdef X86_AVX2 +uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, size_t len, size_t left); +uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); +void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start); +uint32_t longest_match_avx2(deflate_state *const s, uint32_t cur_match); +uint32_t longest_match_slow_avx2(deflate_state *const s, uint32_t cur_match); +void slide_hash_avx2(deflate_state *s); +#endif +#ifdef X86_AVX512 +uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +uint8_t* chunkmemset_safe_avx512(uint8_t *out, uint8_t *from, size_t len, size_t left); +uint32_t compare256_avx512(const uint8_t *src0, const uint8_t *src1); +void inflate_fast_avx512(PREFIX3(stream)* strm, uint32_t start); +uint32_t longest_match_avx512(deflate_state *const s, uint32_t cur_match); +uint32_t longest_match_slow_avx512(deflate_state *const s, uint32_t cur_match); +#endif +#ifdef X86_AVX512VNNI +uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +#endif + +#ifdef X86_PCLMULQDQ_CRC +uint32_t crc32_pclmulqdq(uint32_t crc, const uint8_t *buf, size_t len); +uint32_t crc32_copy_pclmulqdq(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); +#endif +#ifdef X86_VPCLMULQDQ_AVX2 +uint32_t crc32_vpclmulqdq_avx2(uint32_t crc, const uint8_t *buf, size_t len); +uint32_t crc32_copy_vpclmulqdq_avx2(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); +#endif +#ifdef X86_VPCLMULQDQ_AVX512 +uint32_t crc32_vpclmulqdq_avx512(uint32_t crc, const uint8_t *buf, size_t len); +uint32_t crc32_copy_vpclmulqdq_avx512(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); +#endif + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// X86 - SSE2 +# ifdef X86_SSE2_NATIVE +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_sse2 +# undef native_compare256 +# define native_compare256 compare256_sse2 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_sse2 +# undef native_longest_match +# define native_longest_match longest_match_sse2 +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_sse2 +# if !defined(WITHOUT_CHORBA_SSE) +# undef native_crc32 +# define native_crc32 crc32_chorba_sse2 +# undef native_crc32_copy +# define native_crc32_copy crc32_copy_chorba_sse2 +# endif +# undef native_slide_hash +# define native_slide_hash slide_hash_sse2 +# endif +// X86 - SSSE3 +# ifdef X86_SSSE3_NATIVE +# undef native_adler32 +# define native_adler32 adler32_ssse3 +# undef native_adler32_copy +# define native_adler32_copy adler32_copy_ssse3 +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_ssse3 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_ssse3 +# endif +// X86 - SSE4.1 +# if defined(X86_SSE41_NATIVE) && !defined(WITHOUT_CHORBA_SSE) +# undef native_crc32 +# define native_crc32 crc32_chorba_sse41 +# undef native_crc32_copy +# define native_crc32_copy crc32_copy_chorba_sse41 +# endif +// X86 - SSE4.2 +# ifdef X86_SSE42_NATIVE +# undef native_adler32_copy +# define native_adler32_copy adler32_copy_sse42 +# endif +// X86 - PCLMUL +# ifdef X86_PCLMULQDQ_NATIVE +# undef native_crc32 +# define native_crc32 crc32_pclmulqdq +# undef native_crc32_copy +# define native_crc32_copy crc32_copy_pclmulqdq +# endif +// X86 - AVX2 +# ifdef X86_AVX2_NATIVE +# undef native_adler32 +# define native_adler32 adler32_avx2 +# undef native_adler32_copy +# define native_adler32_copy adler32_copy_avx2 +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_avx2 +# undef native_compare256 +# define native_compare256 compare256_avx2 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_avx2 +# undef native_longest_match +# define native_longest_match longest_match_avx2 +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_avx2 +# undef native_slide_hash +# define native_slide_hash slide_hash_avx2 +# endif +// X86 - AVX512 (F,DQ,BW,Vl) +# ifdef X86_AVX512_NATIVE +# undef native_adler32 +# define native_adler32 adler32_avx512 +# undef native_adler32_copy +# define native_adler32_copy adler32_copy_avx512 +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_avx512 +# undef native_compare256 +# define native_compare256 compare256_avx512 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_avx512 +# undef native_longest_match +# define native_longest_match longest_match_avx512 +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_avx512 +// X86 - AVX512 (VNNI) +# ifdef X86_AVX512VNNI_NATIVE +# undef native_adler32 +# define native_adler32 adler32_avx512_vnni +# undef native_adler32_copy +# define native_adler32_copy adler32_copy_avx512_vnni +# endif +# endif +// X86 - VPCLMULQDQ +# ifdef X86_VPCLMULQDQ_AVX512_NATIVE +# undef native_crc32 +# define native_crc32 crc32_vpclmulqdq_avx512 +# undef native_crc32_copy +# define native_crc32_copy crc32_copy_vpclmulqdq_avx512 +# elif defined(X86_VPCLMULQDQ_AVX2_NATIVE) +# undef native_crc32 +# define native_crc32 crc32_vpclmulqdq_avx2 +# undef native_crc32_copy +# define native_crc32_copy crc32_copy_vpclmulqdq_avx2 +# endif +#endif + +#endif /* X86_FUNCTIONS_H_ */ |
