summaryrefslogtreecommitdiff
path: root/cpu_features.h
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@nathanm.com>2022-08-28 20:27:37 -0700
committerHans Kristian Rosbach <hk-github@circlestorm.org>2023-02-05 17:51:46 +0100
commitaa1109bb2ee79680b0ac966b1de8724ba5083093 (patch)
treeb3541be72e171ac12645d21b97d0be0d8aa0bd98 /cpu_features.h
parent941ce547cc5e17d01bba861d3edc1f6b27e42a43 (diff)
downloadProject-Tick-aa1109bb2ee79680b0ac966b1de8724ba5083093.tar.gz
Project-Tick-aa1109bb2ee79680b0ac966b1de8724ba5083093.zip
Use arch-specific versions of inflate_fast.
This should reduce the cost of indirection that occurs when calling functable chunk copying functions inside inflate_fast. It should also allow the compiler to optimize the inflate fast path for the specific architecture.
Diffstat (limited to 'cpu_features.h')
-rw-r--r--cpu_features.h18
1 files changed, 18 insertions, 0 deletions
diff --git a/cpu_features.h b/cpu_features.h
index 72e40a1652..d211cb112e 100644
--- a/cpu_features.h
+++ b/cpu_features.h
@@ -110,6 +110,24 @@ extern uint8_t* chunkmemset_power8(uint8_t *out, unsigned dist, unsigned len);
extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
+/* inflate fast loop */
+extern void inflate_fast_c(void *strm, uint32_t start);
+#ifdef X86_SSE2_CHUNKSET
+extern void inflate_fast_sse2(void *strm, uint32_t start);
+#endif
+#ifdef X86_SSE41
+extern void inflate_fast_sse41(void *strm, uint32_t start);
+#endif
+#ifdef X86_AVX_CHUNKSET
+extern void inflate_fast_avx(void *strm, uint32_t start);
+#endif
+#ifdef ARM_NEON_CHUNKSET
+extern void inflate_fast_neon(void *strm, uint32_t start);
+#endif
+#ifdef POWER8_VSX_CHUNKSET
+extern void inflate_fast_power8(void *strm, uint32_t start);
+#endif
+
/* CRC32 */
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);