diff options
Diffstat (limited to 'neozip/arch/riscv/chunkset_rvv.c')
| -rw-r--r-- | neozip/arch/riscv/chunkset_rvv.c | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/neozip/arch/riscv/chunkset_rvv.c b/neozip/arch/riscv/chunkset_rvv.c new file mode 100644 index 0000000000..cd8ed3cfd2 --- /dev/null +++ b/neozip/arch/riscv/chunkset_rvv.c @@ -0,0 +1,126 @@ +/* chunkset_rvv.c - RVV version of chunkset + * Copyright (C) 2023 SiFive, Inc. All rights reserved. + * Contributed by Alex Chiang <alex.chiang@sifive.com> + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef RISCV_RVV + +#include "zbuild.h" + +#include <riscv_vector.h> + +/* + * RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC, + * so we prefer using large size chunk and copy memory as much as possible. + */ +#define HAVE_CHUNKMEMSET_2 +#define HAVE_CHUNKMEMSET_4 +#define HAVE_CHUNKMEMSET_8 + +#define CHUNK_MEMSET_RVV_IMPL(from, chunk, elen) \ +do { \ + size_t vl, len = sizeof(*chunk) / sizeof(uint##elen##_t); \ + uint##elen##_t val = *(uint##elen##_t*)from; \ + uint##elen##_t* chunk_p = (uint##elen##_t*)chunk; \ + do { \ + vl = __riscv_vsetvl_e##elen##m4(len); \ + vuint##elen##m4_t v_val = __riscv_vmv_v_x_u##elen##m4(val, vl); \ + __riscv_vse##elen##_v_u##elen##m4(chunk_p, v_val, vl); \ + len -= vl; chunk_p += vl; \ + } while (len > 0); \ +} while (0) + +/* We don't have a 32-byte datatype for RISC-V arch. */ +typedef struct chunk_s { + uint64_t data[4]; +} chunk_t; + +static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { + CHUNK_MEMSET_RVV_IMPL(from, chunk, 16); +} + +static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { + CHUNK_MEMSET_RVV_IMPL(from, chunk, 32); +} + +static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { + CHUNK_MEMSET_RVV_IMPL(from, chunk, 64); +} + +static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { + memcpy(chunk->data, (uint8_t *)s, sizeof(*chunk)); +} + +static inline void storechunk(uint8_t *out, chunk_t *chunk) { + memcpy(out, chunk->data, sizeof(*chunk)); +} + +#define CHUNKSIZE chunksize_rvv +#define CHUNKCOPY chunkcopy_rvv +#define CHUNKUNROLL chunkunroll_rvv +#define CHUNKMEMSET chunkmemset_rvv +#define CHUNKMEMSET_SAFE chunkmemset_safe_rvv + +#define HAVE_CHUNKCOPY + +/* + * Assuming that the length is non-zero, and that `from` lags `out` by at least + * sizeof chunk_t bytes, please see the comments in chunkset_tpl.h. + * + * We load/store a single chunk once in the `CHUNKCOPY`. + * However, RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC, + * such that, we prefer copy large memory size once to make good use of the the RVV advance. + * + * To be aligned to the other platforms, we didn't modify `CHUNKCOPY` method a lot, + * but we still copy as much memory as possible for some conditions. + * + * case 1: out - from >= len (no overlap) + * We can use memcpy to copy `len` size once + * because the memory layout would be the same. + * + * case 2: overlap + * We copy N chunks using memcpy at once, aiming to achieve our goal: + * to copy as much memory as possible. + * + * After using a single memcpy to copy N chunks, we have to use series of + * loadchunk and storechunk to ensure the result is correct. + */ +static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, size_t len) { + Assert(len > 0, "chunkcopy should never have a length 0"); + size_t dist = out - from; + if (out < from || dist >= len) { + memcpy(out, from, len); + out += len; + from += len; + return out; + } + + size_t align = ((len - 1) % sizeof(chunk_t)) + 1; + memcpy(out, from, sizeof(chunk_t)); + out += align; + from += align; + len -= align; + + size_t vl = (dist / sizeof(chunk_t)) * sizeof(chunk_t); + while (len > dist) { + memcpy(out, from, vl); + out += vl; + from += vl; + len -= vl; + } + + if (len > 0) { + memcpy(out, from, len); + out += len; + } + return out; +} + +#include "chunkset_tpl.h" + +#define INFLATE_FAST inflate_fast_rvv + +#include "inffast_tpl.h" + +#endif |
