Add 'neozip/' from commit 'c2712b8a345191f6ed79558c089777df94590087'

git-subtree-dir: neozip git-subtree-mainline: b1e34e861b5d732afe828d58aad2c638135061fd git-subtree-split: c2712b8a345191f6ed79558c089777df94590087
author: Mehmet Samet Duman <yongdohyun@projecttick.org> 2026-04-02 19:56:09 +0300
committer: Mehmet Samet Duman <yongdohyun@projecttick.org> 2026-04-02 19:56:09 +0300
commit: 7fb132859fda54aa96bc9dd46d302b343eeb5a02 (patch)
tree: b43ae77d7451fb470a260c03349a1caf2846c5e5 /neozip/arch/riscv
parent: b1e34e861b5d732afe828d58aad2c638135061fd (diff)
parent: c2712b8a345191f6ed79558c089777df94590087 (diff)
download: Project-Tick-7fb132859fda54aa96bc9dd46d302b343eeb5a02.tar.gz
Project-Tick-7fb132859fda54aa96bc9dd46d302b343eeb5a02.zip
11 files changed, 743 insertions, 0 deletions
diff --git a/neozip/arch/riscv/Makefile.in b/neozip/arch/riscv/Makefile.in
new file mode 100644
index 0000000000..43176eee6e
--- /dev/null
+++ b/neozip/arch/riscv/Makefile.in
@@ -0,0 +1,72 @@
+# Makefile for zlib-ng
+# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler
+# Copyright (C) 2024 Hans Kristian Rosbach
+# Copyright (C) 2025 Yin Tong <yintong.ustc@bytedance.com>, ByteDance
+# For conditions of distribution and use, see copyright notice in zlib.h
+
+CC=
+CFLAGS=
+SFLAGS=
+INCLUDES=
+SUFFIX=
+
+SRCDIR=.
+SRCTOP=../..
+TOPDIR=$(SRCTOP)
+
+RVVFLAG=
+RVVZBCFLAG=
+ZBCFLAG=
+
+all: \
+	riscv_features.o riscv_features.lo \
+	adler32_rvv.o adler32_rvv.lo \
+	chunkset_rvv.o chunkset_rvv.lo \
+	compare256_rvv.o compare256_rvv.lo \
+	slide_hash_rvv.o slide_hash_rvv.lo \
+        crc32_zbc.o crc32_zbc.lo
+
+riscv_features.o: $(SRCDIR)/riscv_features.c
+	$(CC) $(CFLAGS) $(RVVZBCFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/riscv_features.c
+
+riscv_features.lo: $(SRCDIR)/riscv_features.c
+	$(CC) $(SFLAGS) $(RVVZBCFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/riscv_features.c
+
+adler32_rvv.o: $(SRCDIR)/adler32_rvv.c
+	$(CC) $(CFLAGS) $(RVVFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_rvv.c
+
+adler32_rvv.lo: $(SRCDIR)/adler32_rvv.c
+	$(CC) $(SFLAGS) $(RVVFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_rvv.c
+
+chunkset_rvv.o: $(SRCDIR)/chunkset_rvv.c
+	$(CC) $(CFLAGS) $(RVVFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_rvv.c
+
+chunkset_rvv.lo: $(SRCDIR)/chunkset_rvv.c
+	$(CC) $(SFLAGS) $(RVVFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_rvv.c
+
+compare256_rvv.o: $(SRCDIR)/compare256_rvv.c
+	$(CC) $(CFLAGS) $(RVVFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_rvv.c
+
+compare256_rvv.lo: $(SRCDIR)/compare256_rvv.c
+	$(CC) $(SFLAGS) $(RVVFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_rvv.c
+
+slide_hash_rvv.o: $(SRCDIR)/slide_hash_rvv.c
+	$(CC) $(CFLAGS) $(RVVFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_rvv.c
+
+slide_hash_rvv.lo: $(SRCDIR)/slide_hash_rvv.c
+	$(CC) $(SFLAGS) $(RVVFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_rvv.c
+
+crc32_zbc.o: $(SRCDIR)/crc32_zbc.c
+	$(CC) $(CFLAGS) $(ZBCFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_zbc.c
+
+crc32_zbc.lo: $(SRCDIR)/crc32_zbc.c
+	$(CC) $(SFLAGS) $(ZBCFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_zbc.c
+
+mostlyclean: clean
+clean:
+	rm -f *.o *.lo *~
+	rm -rf objs
+	rm -f *.gcda *.gcno *.gcov
+
+distclean: clean
+	rm -f Makefile
diff --git a/neozip/arch/riscv/README.md b/neozip/arch/riscv/README.md
new file mode 100644
index 0000000000..013095c373
--- /dev/null
+++ b/neozip/arch/riscv/README.md
@@ -0,0 +1,45 @@
+# Building RISC-V Target with Cmake #
+
+> **Warning**
+> Runtime rvv detection (using `hwcap`) requires linux kernel 6.5 or newer.
+>
+> When running on older kernels, we fall back to compile-time detection, potentially this can cause crashes if rvv is enabled at compile but not supported by the target cpu.
+> Therefore if older kernel support is needed, rvv should be disabled if the target cpu does not support it.
+## Prerequisite: Build RISC-V Clang Toolchain and QEMU ##
+
+If you don't have prebuilt clang and riscv64 qemu, you can refer to the [script](https://github.com/sifive/prepare-riscv-toolchain-qemu/blob/main/prepare_riscv_toolchain_qemu.sh) to get the source. Copy the script to the zlib-ng root directory, and run it to download the source and build them. Modify the content according to your conditions (e.g., toolchain version).
+
+```bash
+./prepare_riscv_toolchain_qemu.sh
+```
+
+After running script, clang & qemu are built in `build-toolchain-qemu/riscv-clang/` & `build-toolchain-qemu/riscv-qemu/`.
+
+`build-toolchain-qemu/riscv-clang/` is your `TOOLCHAIN_PATH`.
+`build-toolchain-qemu/riscv-qemu/bin/qemu-riscv64` is your `QEMU_PATH`.
+
+You can also download the prebuilt toolchain & qemu from [the release page](https://github.com/sifive/prepare-riscv-toolchain-qemu/releases), and enjoy using them.
+
+## Cross-Compile for RISC-V Target ##
+
+```bash
+cmake -G Ninja -B ./build-riscv \
+  -D CMAKE_TOOLCHAIN_FILE=./cmake/toolchain-riscv.cmake \
+  -D CMAKE_INSTALL_PREFIX=./build-riscv/install \
+  -D TOOLCHAIN_PATH={TOOLCHAIN_PATH} \
+  -D QEMU_PATH={QEMU_PATH} \
+  .
+
+cmake --build ./build-riscv
+```
+
+Disable the option if there is no RVV support:
+```
+-D WITH_RVV=OFF
+```
+
+## Run Unittests on User Mode QEMU ##
+
+```bash
+cd ./build-riscv && ctest --verbose
+```
diff --git a/neozip/arch/riscv/adler32_rvv.c b/neozip/arch/riscv/adler32_rvv.c
new file mode 100644
index 0000000000..e446189302
--- /dev/null
+++ b/neozip/arch/riscv/adler32_rvv.c
@@ -0,0 +1,119 @@
+/* adler32_rvv.c - RVV version of adler32
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef RISCV_RVV
+
+#include "zbuild.h"
+#include "adler32_p.h"
+
+#include <riscv_vector.h>
+
+Z_FORCEINLINE static uint32_t adler32_copy_impl(uint32_t adler, uint8_t* restrict dst, const uint8_t *src, size_t len, int COPY) {
+    /* split Adler-32 into component sums */
+    uint32_t sum2 = (adler >> 16) & 0xffff;
+    adler &= 0xffff;
+
+    /* in case user likes doing a byte at a time, keep it fast */
+    if (UNLIKELY(len == 1))
+        return adler32_copy_tail(adler, dst, src, 1, sum2, 1, 1, COPY);
+
+    /* in case short lengths are provided, keep it somewhat fast */
+    if (UNLIKELY(len < 16))
+        return adler32_copy_tail(adler, dst, src, len, sum2, 1, 15, COPY);
+
+    size_t left = len;
+    size_t vl = __riscv_vsetvlmax_e8m1();
+    vl = MIN(vl, 256);
+    vuint32m4_t v_buf32_accu = __riscv_vmv_v_x_u32m4(0, vl);
+    vuint32m4_t v_adler32_prev_accu = __riscv_vmv_v_x_u32m4(0, vl);
+    vuint16m2_t v_buf16_accu;
+
+    /*
+     * We accumulate 8-bit data, and to prevent overflow, we have to use a 32-bit accumulator.
+     * However, adding 8-bit data into a 32-bit accumulator isn't efficient. We use 16-bit & 32-bit
+     * accumulators to boost performance.
+     *
+     * The block_size is the largest multiple of vl that <= 256, because overflow would occur when
+     * vl > 256 (255 * 256 <= UINT16_MAX).
+     *
+     * We accumulate 8-bit data into a 16-bit accumulator and then
+     * move the data into the 32-bit accumulator at the last iteration.
+     */
+    size_t block_size = (256 / vl) * vl;
+    size_t nmax_limit = (NMAX / block_size);
+    size_t cnt = 0;
+    while (left >= block_size) {
+        v_buf16_accu = __riscv_vmv_v_x_u16m2(0, vl);
+        size_t subprob = block_size;
+        while (subprob > 0) {
+            vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1(src, vl);
+            if (COPY) __riscv_vse8_v_u8m1(dst, v_buf8, vl);
+            v_adler32_prev_accu = __riscv_vwaddu_wv_u32m4(v_adler32_prev_accu, v_buf16_accu, vl);
+            v_buf16_accu = __riscv_vwaddu_wv_u16m2(v_buf16_accu, v_buf8, vl);
+            src += vl;
+            if (COPY) dst += vl;
+            subprob -= vl;
+        }
+        v_adler32_prev_accu = __riscv_vmacc_vx_u32m4(v_adler32_prev_accu, block_size / vl, v_buf32_accu, vl);
+        v_buf32_accu = __riscv_vwaddu_wv_u32m4(v_buf32_accu, v_buf16_accu, vl);
+        left -= block_size;
+        /* do modulo once each block of NMAX size */
+        if (++cnt >= nmax_limit) {
+            v_adler32_prev_accu = __riscv_vremu_vx_u32m4(v_adler32_prev_accu, BASE, vl);
+            v_buf32_accu = __riscv_vremu_vx_u32m4(v_buf32_accu, BASE, vl);
+            cnt = 0;
+        }
+    }
+    /* the left len <= 256 now, we can use 16-bit accum safely */
+    v_buf16_accu = __riscv_vmv_v_x_u16m2(0, vl);
+    size_t res = left;
+    while (left >= vl) {
+        vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1(src, vl);
+        if (COPY) __riscv_vse8_v_u8m1(dst, v_buf8, vl);
+        v_adler32_prev_accu = __riscv_vwaddu_wv_u32m4(v_adler32_prev_accu, v_buf16_accu, vl);
+        v_buf16_accu = __riscv_vwaddu_wv_u16m2(v_buf16_accu, v_buf8, vl);
+        src += vl;
+        if (COPY) dst += vl;
+        left -= vl;
+    }
+    v_adler32_prev_accu = __riscv_vmacc_vx_u32m4(v_adler32_prev_accu, res / vl, v_buf32_accu, vl);
+    v_adler32_prev_accu = __riscv_vremu_vx_u32m4(v_adler32_prev_accu, BASE, vl);
+    v_buf32_accu = __riscv_vwaddu_wv_u32m4(v_buf32_accu, v_buf16_accu, vl);
+
+    vuint32m4_t v_seq = __riscv_vid_v_u32m4(vl);
+    vuint32m4_t v_rev_seq = __riscv_vrsub_vx_u32m4(v_seq, vl, vl);
+    vuint32m4_t v_sum32_accu = __riscv_vmul_vv_u32m4(v_buf32_accu, v_rev_seq, vl);
+
+    v_sum32_accu = __riscv_vadd_vv_u32m4(v_sum32_accu, __riscv_vmul_vx_u32m4(v_adler32_prev_accu, vl, vl), vl);
+
+    vuint32m1_t v_sum2_sum = __riscv_vmv_s_x_u32m1(0, vl);
+    v_sum2_sum = __riscv_vredsum_vs_u32m4_u32m1(v_sum32_accu, v_sum2_sum, vl);
+    uint32_t sum2_sum = __riscv_vmv_x_s_u32m1_u32(v_sum2_sum) % BASE;
+
+    sum2 += (sum2_sum + adler * ((len - left) % BASE));
+
+    vuint32m1_t v_adler_sum = __riscv_vmv_s_x_u32m1(0, vl);
+    v_adler_sum = __riscv_vredsum_vs_u32m4_u32m1(v_buf32_accu, v_adler_sum, vl);
+    uint32_t adler_sum = __riscv_vmv_x_s_u32m1_u32(v_adler_sum) % BASE;
+
+    adler += adler_sum;
+
+    sum2 %= BASE;
+    adler %= BASE;
+
+    /* Process tail (left < 256). */
+    return adler32_copy_tail(adler, dst, src, left, sum2, left != 0, 255, COPY);
+}
+
+Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len) {
+    return adler32_copy_impl(adler, NULL, buf, len, 0);
+}
+
+Z_INTERNAL uint32_t adler32_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
+    return adler32_copy_impl(adler, dst, src, len, 1);
+}
+
+#endif // RISCV_RVV
diff --git a/neozip/arch/riscv/chunkset_rvv.c b/neozip/arch/riscv/chunkset_rvv.c
new file mode 100644
index 0000000000..cd8ed3cfd2
--- /dev/null
+++ b/neozip/arch/riscv/chunkset_rvv.c
@@ -0,0 +1,126 @@
+/* chunkset_rvv.c - RVV version of chunkset
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef RISCV_RVV
+
+#include "zbuild.h"
+
+#include <riscv_vector.h>
+
+/*
+ * RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC,
+ * so we prefer using large size chunk and copy memory as much as possible.
+ */
+#define HAVE_CHUNKMEMSET_2
+#define HAVE_CHUNKMEMSET_4
+#define HAVE_CHUNKMEMSET_8
+
+#define CHUNK_MEMSET_RVV_IMPL(from, chunk, elen)                        \
+do {                                                                    \
+    size_t vl, len = sizeof(*chunk) / sizeof(uint##elen##_t);           \
+    uint##elen##_t val = *(uint##elen##_t*)from;                        \
+    uint##elen##_t* chunk_p = (uint##elen##_t*)chunk;                   \
+    do {                                                                \
+        vl = __riscv_vsetvl_e##elen##m4(len);                           \
+        vuint##elen##m4_t v_val = __riscv_vmv_v_x_u##elen##m4(val, vl); \
+        __riscv_vse##elen##_v_u##elen##m4(chunk_p, v_val, vl);          \
+        len -= vl; chunk_p += vl;                                       \
+    } while (len > 0);                                                  \
+} while (0)
+
+/* We don't have a 32-byte datatype for RISC-V arch. */
+typedef struct chunk_s {
+    uint64_t data[4];
+} chunk_t;
+
+static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
+    CHUNK_MEMSET_RVV_IMPL(from, chunk, 16);
+}
+
+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
+    CHUNK_MEMSET_RVV_IMPL(from, chunk, 32);
+}
+
+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
+    CHUNK_MEMSET_RVV_IMPL(from, chunk, 64);
+}
+
+static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
+    memcpy(chunk->data, (uint8_t *)s, sizeof(*chunk));
+}
+
+static inline void storechunk(uint8_t *out, chunk_t *chunk) {
+    memcpy(out, chunk->data, sizeof(*chunk));
+}
+
+#define CHUNKSIZE        chunksize_rvv
+#define CHUNKCOPY        chunkcopy_rvv
+#define CHUNKUNROLL      chunkunroll_rvv
+#define CHUNKMEMSET      chunkmemset_rvv
+#define CHUNKMEMSET_SAFE chunkmemset_safe_rvv
+
+#define HAVE_CHUNKCOPY
+
+/*
+ * Assuming that the length is non-zero, and that `from` lags `out` by at least
+ * sizeof chunk_t bytes, please see the comments in chunkset_tpl.h.
+ *
+ * We load/store a single chunk once in the `CHUNKCOPY`.
+ * However, RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC,
+ * such that, we prefer copy large memory size once to make good use of the the RVV advance.
+ *
+ * To be aligned to the other platforms, we didn't modify `CHUNKCOPY` method a lot,
+ * but we still copy as much memory as possible for some conditions.
+ *
+ * case 1: out - from >= len (no overlap)
+ *         We can use memcpy to copy `len` size once
+ *         because the memory layout would be the same.
+ *
+ * case 2: overlap
+ *         We copy N chunks using memcpy at once, aiming to achieve our goal:
+ *         to copy as much memory as possible.
+ *
+ *         After using a single memcpy to copy N chunks, we have to use series of
+ *         loadchunk and storechunk to ensure the result is correct.
+ */
+static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, size_t len) {
+    Assert(len > 0, "chunkcopy should never have a length 0");
+    size_t dist = out - from;
+    if (out < from || dist >= len) {
+        memcpy(out, from, len);
+        out += len;
+        from += len;
+        return out;
+    }
+
+    size_t align = ((len - 1) % sizeof(chunk_t)) + 1;
+    memcpy(out, from, sizeof(chunk_t));
+    out += align;
+    from += align;
+    len -= align;
+
+    size_t vl = (dist / sizeof(chunk_t)) * sizeof(chunk_t);
+    while (len > dist) {
+        memcpy(out, from, vl);
+        out += vl;
+        from += vl;
+        len -= vl;
+    }
+
+    if (len > 0) {
+        memcpy(out, from, len);
+        out += len;
+    }
+    return out;
+}
+
+#include "chunkset_tpl.h"
+
+#define INFLATE_FAST     inflate_fast_rvv
+
+#include "inffast_tpl.h"
+
+#endif
diff --git a/neozip/arch/riscv/compare256_rvv.c b/neozip/arch/riscv/compare256_rvv.c
new file mode 100644
index 0000000000..edb18a3766
--- /dev/null
+++ b/neozip/arch/riscv/compare256_rvv.c
@@ -0,0 +1,48 @@
+/* compare256_rvv.c - RVV version of compare256
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef RISCV_RVV
+
+#include "zbuild.h"
+#include "zmemory.h"
+#include "deflate.h"
+
+#include <riscv_vector.h>
+
+static inline uint32_t compare256_rvv_static(const uint8_t *src0, const uint8_t *src1) {
+    uint32_t len = 0;
+    size_t vl;
+    long found_diff;
+    do {
+        vl = __riscv_vsetvl_e8m4(256 - len);
+        vuint8m4_t v_src0 = __riscv_vle8_v_u8m4(src0, vl);
+        vuint8m4_t v_src1 = __riscv_vle8_v_u8m4(src1, vl);
+        vbool2_t v_mask = __riscv_vmsne_vv_u8m4_b2(v_src0, v_src1, vl);
+        found_diff = __riscv_vfirst_m_b2(v_mask, vl);
+        if (found_diff >= 0)
+            return len + (uint32_t)found_diff;
+        src0 += vl, src1 += vl, len += vl;
+    } while (len < 256);
+
+    return 256;
+}
+
+Z_INTERNAL uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1) {
+    return compare256_rvv_static(src0, src1);
+}
+
+#define LONGEST_MATCH       longest_match_rvv
+#define COMPARE256          compare256_rvv_static
+
+#include "match_tpl.h"
+
+#define LONGEST_MATCH_SLOW
+#define LONGEST_MATCH       longest_match_slow_rvv
+#define COMPARE256          compare256_rvv_static
+
+#include "match_tpl.h"
+
+#endif // RISCV_RVV
diff --git a/neozip/arch/riscv/crc32_zbc.c b/neozip/arch/riscv/crc32_zbc.c
new file mode 100644
index 0000000000..cf52279b80
--- /dev/null
+++ b/neozip/arch/riscv/crc32_zbc.c
@@ -0,0 +1,103 @@
+/* crc32_zbc.c - RISCV Zbc version of crc32
+ * Copyright (C) 2025 ByteDance. All rights reserved.
+ * Contributed by Yin Tong <yintong.ustc@bytedance.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef RISCV_CRC32_ZBC
+
+#include "zbuild.h"
+#include "arch_functions.h"
+
+#define CLMUL_MIN_LEN 16   // Minimum size of buffer for _crc32_clmul
+#define CLMUL_CHUNK_LEN 16 // Length of chunk for clmul
+
+#define CONSTANT_R3 0x1751997d0ULL
+#define CONSTANT_R4 0x0ccaa009eULL
+#define CONSTANT_R5 0x163cd6124ULL
+#define MASK32 0xFFFFFFFF
+#define CRCPOLY_TRUE_LE_FULL 0x1DB710641ULL
+#define CONSTANT_RU 0x1F7011641ULL
+
+static inline uint64_t clmul(uint64_t a, uint64_t b) {
+    uint64_t res;
+    __asm__ volatile("clmul %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+    return res;
+}
+
+static inline uint64_t clmulh(uint64_t a, uint64_t b) {
+    uint64_t res;
+    __asm__ volatile("clmulh %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+    return res;
+}
+
+Z_FORCEINLINE static uint32_t crc32_clmul_impl(uint64_t crc, const unsigned char *buf, uint64_t len) {
+    const uint64_t *buf64 = (const uint64_t *)buf;
+    uint64_t low = buf64[0] ^ crc;
+    uint64_t high = buf64[1];
+
+    if (len < 16)
+        goto finish_fold;
+    len -= 16;
+    buf64 += 2;
+
+    // process each 16-byte block
+    while (len >= 16) {
+        uint64_t t2 = clmul(CONSTANT_R4, high);
+        uint64_t t3 = clmulh(CONSTANT_R4, high);
+
+        uint64_t t0_new = clmul(CONSTANT_R3, low);
+        uint64_t t1_new = clmulh(CONSTANT_R3, low);
+
+        // Combine the results and XOR with new data
+        low = t0_new ^ t2;
+        high = t1_new ^ t3;
+        low ^= buf64[0];
+        high ^= buf64[1];
+
+        buf64 += 2;
+        len -= 16;
+    }
+
+finish_fold:
+    // Fold the 128-bit result into 64 bits
+    uint64_t fold_t3 = clmulh(low, CONSTANT_R4);
+    uint64_t fold_t2 = clmul(low, CONSTANT_R4);
+    low = high ^ fold_t2;
+    high = fold_t3;
+
+    // Combine the low and high parts and perform polynomial reduction
+    uint64_t combined = (low >> 32) | ((high & MASK32) << 32);
+    uint64_t reduced_low = clmul(low & MASK32, CONSTANT_R5) ^ combined;
+
+    // Barrett reduction step
+    uint64_t barrett = clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32;
+    barrett = clmul(barrett, CRCPOLY_TRUE_LE_FULL);
+    uint64_t final = barrett ^ reduced_low;
+
+    // Return the high 32 bits as the final CRC
+    return (uint32_t)(final >> 32);
+}
+
+Z_INTERNAL uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len) {
+    if (len < CLMUL_MIN_LEN) {
+        return crc32_braid(crc, buf, len);
+    }
+
+    uint64_t unaligned_length = len % CLMUL_CHUNK_LEN;
+    if (unaligned_length) {
+        crc = crc32_braid(crc, buf, unaligned_length);
+        buf += unaligned_length;
+        len -= unaligned_length;
+    }
+
+    crc = crc32_clmul_impl(~crc, buf, len);
+    return ~crc;
+}
+
+Z_INTERNAL uint32_t crc32_copy_riscv64_zbc(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
+    crc = crc32_riscv64_zbc(crc, src, len);
+    memcpy(dst, src, len);
+    return crc;
+}
+#endif
diff --git a/neozip/arch/riscv/riscv_features.c b/neozip/arch/riscv/riscv_features.c
new file mode 100644
index 0000000000..b23f10a699
--- /dev/null
+++ b/neozip/arch/riscv/riscv_features.c
@@ -0,0 +1,99 @@
+#ifdef RISCV_FEATURES
+
+#define _DEFAULT_SOURCE 1 /* For syscall() */
+
+#include "zbuild.h"
+#include "riscv_features.h"
+
+#include <sys/utsname.h>
+
+#if defined(__linux__) && defined(HAVE_SYS_AUXV_H)
+#  include <sys/auxv.h>
+#endif
+
+#if defined(__linux__) && defined(HAVE_ASM_HWPROBE_H)
+#  include <asm/hwprobe.h>
+#  include <sys/syscall.h> /* For __NR_riscv_hwprobe */
+#  include <unistd.h> /* For syscall() */
+#endif
+
+#define ISA_V_HWCAP (1 << ('v' - 'a'))
+#define ISA_ZBC_HWCAP (1 << 29)
+
+static int riscv_check_features_runtime_hwprobe(struct riscv_cpu_features *features) {
+#if defined(__NR_riscv_hwprobe) && defined(RISCV_HWPROBE_KEY_IMA_EXT_0)
+    struct riscv_hwprobe probes[] = {
+        {RISCV_HWPROBE_KEY_IMA_EXT_0, 0},
+    };
+    int ret;
+    unsigned i;
+
+    ret = syscall(__NR_riscv_hwprobe, probes, sizeof(probes) / sizeof(probes[0]), 0, NULL, 0);
+
+    if (ret != 0) {
+        /* Kernel does not support hwprobe */
+        return 0;
+    }
+
+    for (i = 0; i < sizeof(probes) / sizeof(probes[0]); i++) {
+        switch (probes[i].key) {
+        case RISCV_HWPROBE_KEY_IMA_EXT_0:
+#  ifdef RISCV_HWPROBE_IMA_V
+            features->has_rvv = !!(probes[i].value & RISCV_HWPROBE_IMA_V);
+#  endif
+#  ifdef RISCV_HWPROBE_EXT_ZBC
+            features->has_zbc = !!(probes[i].value & RISCV_HWPROBE_EXT_ZBC);
+#  endif
+            break;
+        }
+    }
+
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+static int riscv_check_features_runtime_hwcap(struct riscv_cpu_features *features) {
+#if defined(__linux__) && defined(HAVE_SYS_AUXV_H)
+    unsigned long hw_cap = getauxval(AT_HWCAP);
+
+    features->has_rvv = hw_cap & ISA_V_HWCAP;
+    features->has_zbc = hw_cap & ISA_ZBC_HWCAP;
+
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+static void riscv_check_features_runtime(struct riscv_cpu_features *features) {
+    if (riscv_check_features_runtime_hwprobe(features))
+        return;
+
+    riscv_check_features_runtime_hwcap(features);
+}
+
+void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features) {
+    riscv_check_features_runtime(features);
+#ifdef RISCV_RVV
+    if (features->has_rvv) {
+        size_t e8m1_vec_len;
+        intptr_t vtype_reg_val;
+        // Check that a vuint8m1_t vector is at least 16 bytes and that tail
+        // agnostic and mask agnostic mode are supported
+        //
+        __asm__ volatile(
+                "vsetvli %0, zero, e8, m1, ta, ma\n\t"
+                "csrr %1, vtype"
+                : "=r"(e8m1_vec_len), "=r"(vtype_reg_val));
+
+        // The RVV target is supported if the VILL bit of VTYPE (the MSB bit of
+        // VTYPE) is not set and the length of a vuint8m1_t vector is at least 16
+        // bytes
+        features->has_rvv = (vtype_reg_val >= 0 && e8m1_vec_len >= 16);
+    }
+#endif
+}
+
+#endif
diff --git a/neozip/arch/riscv/riscv_features.h b/neozip/arch/riscv/riscv_features.h
new file mode 100644
index 0000000000..42855a1b6b
--- /dev/null
+++ b/neozip/arch/riscv/riscv_features.h
@@ -0,0 +1,19 @@
+/* riscv_features.h -- check for riscv features.
+ *
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef RISCV_FEATURES_H_
+#define RISCV_FEATURES_H_
+
+struct riscv_cpu_features {
+    int has_rvv;
+    int has_zbc;
+};
+
+void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features);
+
+#endif /* RISCV_FEATURES_H_ */
diff --git a/neozip/arch/riscv/riscv_functions.h b/neozip/arch/riscv/riscv_functions.h
new file mode 100644
index 0000000000..89120ffabf
--- /dev/null
+++ b/neozip/arch/riscv/riscv_functions.h
@@ -0,0 +1,60 @@
+/* riscv_functions.h -- RISCV implementations for arch-specific functions.
+ *
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef RISCV_FUNCTIONS_H_
+#define RISCV_FUNCTIONS_H_
+
+#include "riscv_natives.h"
+
+#ifdef RISCV_RVV
+uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
+uint32_t adler32_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+uint8_t* chunkmemset_safe_rvv(uint8_t *out, uint8_t *from, size_t len, size_t left);
+uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1);
+
+uint32_t longest_match_rvv(deflate_state *const s, uint32_t cur_match);
+uint32_t longest_match_slow_rvv(deflate_state *const s, uint32_t cur_match);
+void slide_hash_rvv(deflate_state *s);
+void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
+#endif
+
+#ifdef RISCV_CRC32_ZBC
+uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len);
+uint32_t crc32_copy_riscv64_zbc(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len);
+#endif
+
+#ifdef DISABLE_RUNTIME_CPU_DETECTION
+// RISCV - RVV
+#  ifdef RISCV_RVV_NATIVE
+#    undef native_adler32
+#    define native_adler32 adler32_rvv
+#    undef native_adler32_copy
+#    define native_adler32_copy adler32_copy_rvv
+#    undef native_chunkmemset_safe
+#    define native_chunkmemset_safe chunkmemset_safe_rvv
+#    undef native_compare256
+#    define native_compare256 compare256_rvv
+#    undef native_inflate_fast
+#    define native_inflate_fast inflate_fast_rvv
+#    undef native_longest_match
+#    define native_longest_match longest_match_rvv
+#    undef native_longest_match_slow
+#    define native_longest_match_slow longest_match_slow_rvv
+#    undef native_slide_hash
+#    define native_slide_hash slide_hash_rvv
+#  endif
+// RISCV - CRC32
+#  ifdef RISCV_ZBC_NATIVE
+#    undef native_crc32
+#    define native_crc32 crc32_riscv64_zbc
+#    undef native_crc32_copy
+#    define native_crc32_copy crc32_copy_riscv64_zbc
+#  endif
+#endif
+
+#endif /* RISCV_FUNCTIONS_H_ */
diff --git a/neozip/arch/riscv/riscv_natives.h b/neozip/arch/riscv/riscv_natives.h
new file mode 100644
index 0000000000..38d7aba648
--- /dev/null
+++ b/neozip/arch/riscv/riscv_natives.h
@@ -0,0 +1,19 @@
+/* riscv_natives.h -- RISCV compile-time feature detection macros.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef RISCV_NATIVES_H_
+#define RISCV_NATIVES_H_
+
+#if defined(__riscv_v) && defined(__linux__)
+#  ifdef RISCV_RVV
+#    define RISCV_RVV_NATIVE
+#  endif
+#endif
+#if defined(__riscv_zbc)
+#  ifdef RISCV_CRC32_ZBC
+#    define RISCV_ZBC_NATIVE
+#  endif
+#endif
+
+#endif /* RISCV_NATIVES_H_ */
diff --git a/neozip/arch/riscv/slide_hash_rvv.c b/neozip/arch/riscv/slide_hash_rvv.c
new file mode 100644
index 0000000000..e794c38204
--- /dev/null
+++ b/neozip/arch/riscv/slide_hash_rvv.c
@@ -0,0 +1,33 @@
+/* slide_hash_rvv.c - RVV version of slide_hash
+ * Copyright (C) 2023 SiFive, Inc. All rights reserved.
+ * Contributed by Alex Chiang <alex.chiang@sifive.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef RISCV_RVV
+
+#include "zbuild.h"
+#include "deflate.h"
+
+#include <riscv_vector.h>
+
+static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) {
+    size_t vl;
+    while (entries > 0) {
+        vl = __riscv_vsetvl_e16m4(entries);
+        vuint16m4_t v_tab = __riscv_vle16_v_u16m4(table, vl);
+        vuint16m4_t v_diff = __riscv_vssubu_vx_u16m4(v_tab, wsize, vl);
+        __riscv_vse16_v_u16m4(table, v_diff, vl);
+        table += vl, entries -= vl;
+    }
+}
+
+Z_INTERNAL void slide_hash_rvv(deflate_state *s) {
+    Assert(s->w_size <= UINT16_MAX, "w_size should fit in uint16_t");
+    uint16_t wsize = (uint16_t)s->w_size;
+
+    slide_hash_chain(s->head, HASH_SIZE, wsize);
+    slide_hash_chain(s->prev, wsize, wsize);
+}
+
+#endif // RISCV_RVV
author	Mehmet Samet Duman <yongdohyun@projecttick.org>	2026-04-02 19:56:09 +0300
committer	Mehmet Samet Duman <yongdohyun@projecttick.org>	2026-04-02 19:56:09 +0300
commit	7fb132859fda54aa96bc9dd46d302b343eeb5a02 (patch)
tree	b43ae77d7451fb470a260c03349a1caf2846c5e5 /neozip/arch/riscv
parent	b1e34e861b5d732afe828d58aad2c638135061fd (diff)
parent	c2712b8a345191f6ed79558c089777df94590087 (diff)
download	Project-Tick-7fb132859fda54aa96bc9dd46d302b343eeb5a02.tar.gz Project-Tick-7fb132859fda54aa96bc9dd46d302b343eeb5a02.zip