summaryrefslogtreecommitdiff
path: root/neozip/arch/riscv/crc32_zbc.c
diff options
context:
space:
mode:
Diffstat (limited to 'neozip/arch/riscv/crc32_zbc.c')
-rw-r--r--neozip/arch/riscv/crc32_zbc.c103
1 files changed, 103 insertions, 0 deletions
diff --git a/neozip/arch/riscv/crc32_zbc.c b/neozip/arch/riscv/crc32_zbc.c
new file mode 100644
index 0000000000..cf52279b80
--- /dev/null
+++ b/neozip/arch/riscv/crc32_zbc.c
@@ -0,0 +1,103 @@
+/* crc32_zbc.c - RISCV Zbc version of crc32
+ * Copyright (C) 2025 ByteDance. All rights reserved.
+ * Contributed by Yin Tong <yintong.ustc@bytedance.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef RISCV_CRC32_ZBC
+
+#include "zbuild.h"
+#include "arch_functions.h"
+
+#define CLMUL_MIN_LEN 16 // Minimum size of buffer for _crc32_clmul
+#define CLMUL_CHUNK_LEN 16 // Length of chunk for clmul
+
+#define CONSTANT_R3 0x1751997d0ULL
+#define CONSTANT_R4 0x0ccaa009eULL
+#define CONSTANT_R5 0x163cd6124ULL
+#define MASK32 0xFFFFFFFF
+#define CRCPOLY_TRUE_LE_FULL 0x1DB710641ULL
+#define CONSTANT_RU 0x1F7011641ULL
+
+static inline uint64_t clmul(uint64_t a, uint64_t b) {
+ uint64_t res;
+ __asm__ volatile("clmul %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+ return res;
+}
+
+static inline uint64_t clmulh(uint64_t a, uint64_t b) {
+ uint64_t res;
+ __asm__ volatile("clmulh %0, %1, %2" : "=r"(res) : "r"(a), "r"(b));
+ return res;
+}
+
+Z_FORCEINLINE static uint32_t crc32_clmul_impl(uint64_t crc, const unsigned char *buf, uint64_t len) {
+ const uint64_t *buf64 = (const uint64_t *)buf;
+ uint64_t low = buf64[0] ^ crc;
+ uint64_t high = buf64[1];
+
+ if (len < 16)
+ goto finish_fold;
+ len -= 16;
+ buf64 += 2;
+
+ // process each 16-byte block
+ while (len >= 16) {
+ uint64_t t2 = clmul(CONSTANT_R4, high);
+ uint64_t t3 = clmulh(CONSTANT_R4, high);
+
+ uint64_t t0_new = clmul(CONSTANT_R3, low);
+ uint64_t t1_new = clmulh(CONSTANT_R3, low);
+
+ // Combine the results and XOR with new data
+ low = t0_new ^ t2;
+ high = t1_new ^ t3;
+ low ^= buf64[0];
+ high ^= buf64[1];
+
+ buf64 += 2;
+ len -= 16;
+ }
+
+finish_fold:
+ // Fold the 128-bit result into 64 bits
+ uint64_t fold_t3 = clmulh(low, CONSTANT_R4);
+ uint64_t fold_t2 = clmul(low, CONSTANT_R4);
+ low = high ^ fold_t2;
+ high = fold_t3;
+
+ // Combine the low and high parts and perform polynomial reduction
+ uint64_t combined = (low >> 32) | ((high & MASK32) << 32);
+ uint64_t reduced_low = clmul(low & MASK32, CONSTANT_R5) ^ combined;
+
+ // Barrett reduction step
+ uint64_t barrett = clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32;
+ barrett = clmul(barrett, CRCPOLY_TRUE_LE_FULL);
+ uint64_t final = barrett ^ reduced_low;
+
+ // Return the high 32 bits as the final CRC
+ return (uint32_t)(final >> 32);
+}
+
+Z_INTERNAL uint32_t crc32_riscv64_zbc(uint32_t crc, const uint8_t *buf, size_t len) {
+ if (len < CLMUL_MIN_LEN) {
+ return crc32_braid(crc, buf, len);
+ }
+
+ uint64_t unaligned_length = len % CLMUL_CHUNK_LEN;
+ if (unaligned_length) {
+ crc = crc32_braid(crc, buf, unaligned_length);
+ buf += unaligned_length;
+ len -= unaligned_length;
+ }
+
+ crc = crc32_clmul_impl(~crc, buf, len);
+ return ~crc;
+}
+
+Z_INTERNAL uint32_t crc32_copy_riscv64_zbc(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len) {
+ crc = crc32_riscv64_zbc(crc, src, len);
+ memcpy(dst, src, len);
+ return crc;
+}
+#endif