diff options
| -rw-r--r-- | arch/arm/arm_functions.h | 2 | ||||
| -rw-r--r-- | arch/arm/chunkset_neon.c | 2 | ||||
| -rw-r--r-- | arch/generic/generic_functions.h | 2 | ||||
| -rw-r--r-- | arch/loongarch/chunkset_lasx.c | 4 | ||||
| -rw-r--r-- | arch/loongarch/chunkset_lsx.c | 2 | ||||
| -rw-r--r-- | arch/loongarch/loongarch_functions.h | 4 | ||||
| -rw-r--r-- | arch/power/power_functions.h | 2 | ||||
| -rw-r--r-- | arch/riscv/chunkset_rvv.c | 8 | ||||
| -rw-r--r-- | arch/riscv/riscv_functions.h | 2 | ||||
| -rw-r--r-- | arch/x86/chunkset_avx2.c | 4 | ||||
| -rw-r--r-- | arch/x86/chunkset_avx512.c | 20 | ||||
| -rw-r--r-- | arch/x86/chunkset_ssse3.c | 2 | ||||
| -rw-r--r-- | arch/x86/x86_functions.h | 8 | ||||
| -rw-r--r-- | chunkset_tpl.h | 48 | ||||
| -rw-r--r-- | functable.c | 2 | ||||
| -rw-r--r-- | functable.h | 2 | ||||
| -rw-r--r-- | inflate_p.h | 10 | ||||
| -rw-r--r-- | zbuild.h | 2 |
18 files changed, 64 insertions, 62 deletions
diff --git a/arch/arm/arm_functions.h b/arch/arm/arm_functions.h index 35dd12a2d9..34ba87b067 100644 --- a/arch/arm/arm_functions.h +++ b/arch/arm/arm_functions.h @@ -8,7 +8,7 @@ #ifdef ARM_NEON uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_copy_neon(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint8_t* chunkmemset_safe_neon(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_neon(uint8_t *out, uint8_t *from, size_t len, size_t left); uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1); void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start); uint32_t longest_match_neon(deflate_state *const s, uint32_t cur_match); diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c index a208451547..7bc932f939 100644 --- a/arch/arm/chunkset_neon.c +++ b/arch/arm/chunkset_neon.c @@ -43,7 +43,7 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { vst1q_u8(out, *chunk); } -static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; *chunk_rem = lut_rem.remval; diff --git a/arch/generic/generic_functions.h b/arch/generic/generic_functions.h index f8e564432d..1b296b8f92 100644 --- a/arch/generic/generic_functions.h +++ b/arch/generic/generic_functions.h @@ -20,7 +20,7 @@ typedef void (*slide_hash_func)(deflate_state *s); uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint8_t* chunkmemset_safe_c(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_c(uint8_t *out, uint8_t *from, size_t len, size_t left); #ifdef WITH_ALL_FALLBACKS uint32_t compare256_8(const uint8_t *src0, const uint8_t *src1); diff --git a/arch/loongarch/chunkset_lasx.c b/arch/loongarch/chunkset_lasx.c index 38000474bf..a85c07d094 100644 --- a/arch/loongarch/chunkset_lasx.c +++ b/arch/loongarch/chunkset_lasx.c @@ -48,7 +48,7 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { __lasx_xvst(*chunk, out, 0); } -static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; __m256i ret_vec; /* While technically we only need to read 4 or 8 bytes into this vector register for a lot of cases, GCC is @@ -97,7 +97,7 @@ static inline chunk_t halfchunk2whole(halfchunk_t *chunk) { return lasx_zext_128(*chunk); } -static inline halfchunk_t GET_HALFCHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline halfchunk_t GET_HALFCHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; __m128i perm_vec, ret_vec; __msan_unpoison(buf + dist, 16 - dist); diff --git a/arch/loongarch/chunkset_lsx.c b/arch/loongarch/chunkset_lsx.c index 631052d74a..e626157f40 100644 --- a/arch/loongarch/chunkset_lsx.c +++ b/arch/loongarch/chunkset_lsx.c @@ -40,7 +40,7 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { __lsx_vst(*chunk, out, 0); } -static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; __m128i perm_vec, ret_vec; /* Important to note: diff --git a/arch/loongarch/loongarch_functions.h b/arch/loongarch/loongarch_functions.h index 34281432f5..922c6c4165 100644 --- a/arch/loongarch/loongarch_functions.h +++ b/arch/loongarch/loongarch_functions.h @@ -16,7 +16,7 @@ uint32_t crc32_copy_loongarch64(uint32_t crc, uint8_t *dst, const uint8_t *src, #ifdef LOONGARCH_LSX uint32_t adler32_lsx(uint32_t adler, const uint8_t *src, size_t len); uint32_t adler32_copy_lsx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint8_t* chunkmemset_safe_lsx(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_lsx(uint8_t *out, uint8_t *from, size_t len, size_t left); uint32_t compare256_lsx(const uint8_t *src0, const uint8_t *src1); void inflate_fast_lsx(PREFIX3(stream) *strm, uint32_t start); uint32_t longest_match_lsx(deflate_state *const s, uint32_t cur_match); @@ -27,7 +27,7 @@ void slide_hash_lsx(deflate_state *s); #ifdef LOONGARCH_LASX uint32_t adler32_lasx(uint32_t adler, const uint8_t *src, size_t len); uint32_t adler32_copy_lasx(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint8_t* chunkmemset_safe_lasx(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_lasx(uint8_t *out, uint8_t *from, size_t len, size_t left); uint32_t compare256_lasx(const uint8_t *src0, const uint8_t *src1); void inflate_fast_lasx(PREFIX3(stream) *strm, uint32_t start); uint32_t longest_match_lasx(deflate_state *const s, uint32_t cur_match); diff --git a/arch/power/power_functions.h b/arch/power/power_functions.h index 86f21ad667..49ea89e819 100644 --- a/arch/power/power_functions.h +++ b/arch/power/power_functions.h @@ -16,7 +16,7 @@ void slide_hash_vmx(deflate_state *s); #ifdef POWER8_VSX uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_copy_power8(uint32_t adler, uint8_t *dst, const uint8_t *buf, size_t len); -uint8_t* chunkmemset_safe_power8(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_power8(uint8_t *out, uint8_t *from, size_t len, size_t left); uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); uint32_t crc32_copy_power8(uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); void slide_hash_power8(deflate_state *s); diff --git a/arch/riscv/chunkset_rvv.c b/arch/riscv/chunkset_rvv.c index 6b35f30daa..cd8ed3cfd2 100644 --- a/arch/riscv/chunkset_rvv.c +++ b/arch/riscv/chunkset_rvv.c @@ -86,17 +86,17 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { * After using a single memcpy to copy N chunks, we have to use series of * loadchunk and storechunk to ensure the result is correct. */ -static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { +static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, size_t len) { Assert(len > 0, "chunkcopy should never have a length 0"); - ptrdiff_t dist = out - from; - if (dist < 0 || dist >= len) { + size_t dist = out - from; + if (out < from || dist >= len) { memcpy(out, from, len); out += len; from += len; return out; } - int32_t align = ((len - 1) % sizeof(chunk_t)) + 1; + size_t align = ((len - 1) % sizeof(chunk_t)) + 1; memcpy(out, from, sizeof(chunk_t)); out += align; from += align; diff --git a/arch/riscv/riscv_functions.h b/arch/riscv/riscv_functions.h index e87989cca2..9e641966a0 100644 --- a/arch/riscv/riscv_functions.h +++ b/arch/riscv/riscv_functions.h @@ -12,7 +12,7 @@ #ifdef RISCV_RVV uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint8_t* chunkmemset_safe_rvv(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_rvv(uint8_t *out, uint8_t *from, size_t len, size_t left); uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1); uint32_t longest_match_rvv(deflate_state *const s, uint32_t cur_match); diff --git a/arch/x86/chunkset_avx2.c b/arch/x86/chunkset_avx2.c index 98b6af03b0..953d3b05e2 100644 --- a/arch/x86/chunkset_avx2.c +++ b/arch/x86/chunkset_avx2.c @@ -51,7 +51,7 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { _mm256_storeu_si256((__m256i *)out, *chunk); } -static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; __m256i ret_vec; /* While technically we only need to read 4 or 8 bytes into this vector register for a lot of cases, GCC is @@ -100,7 +100,7 @@ static inline chunk_t halfchunk2whole(halfchunk_t *chunk) { return _mm256_zextsi128_si256(*chunk); } -static inline halfchunk_t GET_HALFCHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline halfchunk_t GET_HALFCHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; __m128i perm_vec, ret_vec; __msan_unpoison(buf + dist, 16 - dist); diff --git a/arch/x86/chunkset_avx512.c b/arch/x86/chunkset_avx512.c index 901fea0cbd..60450c653b 100644 --- a/arch/x86/chunkset_avx512.c +++ b/arch/x86/chunkset_avx512.c @@ -26,12 +26,12 @@ typedef __mmask16 halfmask_t; #define HAVE_CHUNKCOPY #define HAVE_HALFCHUNKCOPY -static inline halfmask_t gen_half_mask(unsigned len) { - return (halfmask_t)_bzhi_u32(0xFFFF, len); +static inline halfmask_t gen_half_mask(size_t len) { + return (halfmask_t)_bzhi_u32(0xFFFF, (unsigned)len); } -static inline mask_t gen_mask(unsigned len) { - return (mask_t)_bzhi_u32(0xFFFFFFFF, len); +static inline mask_t gen_mask(size_t len) { + return (mask_t)_bzhi_u32(0xFFFFFFFF, (unsigned)len); } static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { @@ -68,11 +68,11 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { _mm256_storeu_si256((__m256i *)out, *chunk); } -static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { +static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, size_t len) { Assert(len > 0, "chunkcopy should never have a length 0"); chunk_t chunk; - uint32_t rem = len % sizeof(chunk_t); + size_t rem = len % sizeof(chunk_t); if (len < sizeof(chunk_t)) { mask_t rem_mask = gen_mask(rem); @@ -103,7 +103,7 @@ static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len #if defined(_MSC_VER) && _MSC_VER < 1943 # pragma optimize("", off) #endif -static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; __m256i ret_vec; *chunk_rem = lut_rem.remval; @@ -143,7 +143,7 @@ static inline chunk_t halfchunk2whole(halfchunk_t *chunk) { return _mm256_zextsi128_si256(*chunk); } -static inline halfchunk_t GET_HALFCHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline halfchunk_t GET_HALFCHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; __m128i perm_vec, ret_vec; halfmask_t load_mask = gen_half_mask(dist); @@ -156,11 +156,11 @@ static inline halfchunk_t GET_HALFCHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, u return ret_vec; } -static inline uint8_t* HALFCHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { +static inline uint8_t* HALFCHUNKCOPY(uint8_t *out, uint8_t const *from, size_t len) { Assert(len > 0, "chunkcopy should never have a length 0"); halfchunk_t chunk; - uint32_t rem = len % sizeof(halfchunk_t); + size_t rem = len % sizeof(halfchunk_t); if (rem == 0) { rem = sizeof(halfchunk_t); } diff --git a/arch/x86/chunkset_ssse3.c b/arch/x86/chunkset_ssse3.c index b0ec74ef84..3085f57a35 100644 --- a/arch/x86/chunkset_ssse3.c +++ b/arch/x86/chunkset_ssse3.c @@ -38,7 +38,7 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { _mm_storeu_si128((__m128i *)out, *chunk); } -static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; __m128i perm_vec, ret_vec; /* Important to note: diff --git a/arch/x86/x86_functions.h b/arch/x86/x86_functions.h index e1c99137dd..b1d623cace 100644 --- a/arch/x86/x86_functions.h +++ b/arch/x86/x86_functions.h @@ -15,7 +15,7 @@ #endif #ifdef X86_SSE2 -uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from, size_t len, size_t left); uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start); uint32_t longest_match_sse2(deflate_state *const s, uint32_t cur_match); @@ -32,7 +32,7 @@ void slide_hash_sse2(deflate_state *s); #ifdef X86_SSSE3 uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_copy_ssse3(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint8_t* chunkmemset_safe_ssse3(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_ssse3(uint8_t *out, uint8_t *from, size_t len, size_t left); void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); #endif @@ -48,7 +48,7 @@ uint32_t adler32_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, si #ifdef X86_AVX2 uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_avx2(uint8_t *out, uint8_t *from, size_t len, size_t left); uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start); uint32_t longest_match_avx2(deflate_state *const s, uint32_t cur_match); @@ -58,7 +58,7 @@ void slide_hash_avx2(deflate_state *s); #ifdef X86_AVX512 uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -uint8_t* chunkmemset_safe_avx512(uint8_t *out, uint8_t *from, unsigned len, unsigned left); +uint8_t* chunkmemset_safe_avx512(uint8_t *out, uint8_t *from, size_t len, size_t left); uint32_t compare256_avx512(const uint8_t *src0, const uint8_t *src1); void inflate_fast_avx512(PREFIX3(stream)* strm, uint32_t start); uint32_t longest_match_avx512(deflate_state *const s, uint32_t cur_match); diff --git a/chunkset_tpl.h b/chunkset_tpl.h index d6e40e6f86..82511f0e8c 100644 --- a/chunkset_tpl.h +++ b/chunkset_tpl.h @@ -21,10 +21,10 @@ static inline size_t CHUNKSIZE(void) { without iteration, which will hopefully make the branch prediction more reliable. */ #ifndef HAVE_CHUNKCOPY -static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { +static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, size_t len) { Assert(len > 0, "chunkcopy should never have a length 0"); chunk_t chunk; - int32_t align = ((len - 1) % sizeof(chunk_t)) + 1; + size_t align = ((len - 1) % sizeof(chunk_t)) + 1; loadchunk(from, &chunk); storechunk(out, &chunk); out += align; @@ -64,21 +64,21 @@ static inline uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) #ifndef HAVE_CHUNK_MAG /* Loads a magazine to feed into memory of the pattern */ -static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { +static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, size_t *chunk_rem, size_t dist) { /* This code takes string of length dist from "from" and repeats * it for as many times as can fit in a chunk_t (vector register) */ - uint64_t cpy_dist; - uint64_t bytes_remaining = sizeof(chunk_t); + size_t cpy_dist; + size_t bytes_remaining = sizeof(chunk_t); chunk_t chunk_load; uint8_t *cur_chunk = (uint8_t *)&chunk_load; while (bytes_remaining) { cpy_dist = MIN(dist, bytes_remaining); - memcpy(cur_chunk, buf, (size_t)cpy_dist); + memcpy(cur_chunk, buf, cpy_dist); bytes_remaining -= cpy_dist; cur_chunk += cpy_dist; /* This allows us to bypass an expensive integer division since we're effectively * counting in this loop, anyway */ - *chunk_rem = (uint32_t)cpy_dist; + *chunk_rem = cpy_dist; } return chunk_load; @@ -86,9 +86,10 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t #endif #if defined(HAVE_HALF_CHUNK) && !defined(HAVE_HALFCHUNKCOPY) -static inline uint8_t* HALFCHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { +static inline uint8_t* HALFCHUNKCOPY(uint8_t *out, uint8_t const *from, size_t len) { + Assert(len > 0, "halfchunkcopy should never have a length 0"); halfchunk_t chunk; - int32_t align = ((len - 1) % sizeof(halfchunk_t)) + 1; + size_t align = ((len - 1) % sizeof(halfchunk_t)) + 1; loadhalfchunk(from, &chunk); storehalfchunk(out, &chunk); out += align; @@ -107,22 +108,21 @@ static inline uint8_t* HALFCHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned /* Copy DIST bytes from OUT - DIST into OUT + DIST * k, for 0 <= k < LEN/DIST. Return OUT + LEN. */ -static inline uint8_t* CHUNKMEMSET(uint8_t *out, uint8_t *from, unsigned len) { +static inline uint8_t* CHUNKMEMSET(uint8_t *out, uint8_t *from, size_t len) { /* Debug performance related issues when len < sizeof(uint64_t): Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */ Assert(from != out, "chunkmemset cannot have a distance 0"); chunk_t chunk_load; - uint32_t chunk_mod = 0; - uint32_t adv_amount; - int64_t sdist = out - from; - uint64_t dist = llabs(sdist); + size_t chunk_mod = 0; + size_t adv_amount; + size_t dist = (size_t)ABS(out - from); /* We are supporting the case for when we are reading bytes from ahead in the buffer. * We now have to handle this, though it wasn't _quite_ clear if this rare circumstance * always needed to be handled here or if we're just now seeing it because we are * dispatching to this function, more */ - if (sdist < 0 && dist < len) { + if (out < from && dist < len) { #ifdef HAVE_MASKED_READWRITE /* We can still handle this case if we can mitigate over writing _and_ we * fit the entirety of the copy length with one load */ @@ -158,7 +158,7 @@ static inline uint8_t* CHUNKMEMSET(uint8_t *out, uint8_t *from, unsigned len) { return HALFCHUNKCOPY(out, from, len); if ((dist % 2) != 0 || dist == 6) { - halfchunk_t halfchunk_load = GET_HALFCHUNK_MAG(from, &chunk_mod, (unsigned)dist); + halfchunk_t halfchunk_load = GET_HALFCHUNK_MAG(from, &chunk_mod, dist); if (len == sizeof(halfchunk_t)) { storehalfchunk(out, &halfchunk_load); @@ -192,7 +192,7 @@ static inline uint8_t* CHUNKMEMSET(uint8_t *out, uint8_t *from, unsigned len) { chunkmemset_16(from, &chunk_load); } else #endif - chunk_load = GET_CHUNK_MAG(from, &chunk_mod, (unsigned)dist); + chunk_load = GET_CHUNK_MAG(from, &chunk_mod, dist); adv_amount = sizeof(chunk_t) - chunk_mod; @@ -224,11 +224,11 @@ rem_bytes: return out; } -Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, unsigned len, unsigned left) { +Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, size_t len, size_t left) { #if OPTIMAL_CMP < 32 - static const uint32_t align_mask = 7; + static const uintptr_t align_mask = 7; #elif OPTIMAL_CMP == 32 - static const uint32_t align_mask = 3; + static const uintptr_t align_mask = 3; #endif len = MIN(len, left); @@ -258,16 +258,16 @@ Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, unsigned len, return out; } -static inline uint8_t *CHUNKCOPY_SAFE(uint8_t *out, uint8_t *from, uint64_t len, uint8_t *safe) +static inline uint8_t *CHUNKCOPY_SAFE(uint8_t *out, uint8_t *from, size_t len, uint8_t *safe) { if (out == from) return out + len; - uint64_t safelen = (safe - out); + size_t safelen = (safe - out); len = MIN(len, safelen); #ifndef HAVE_MASKED_READWRITE - uint64_t from_dist = (uint64_t)llabs(safe - from); + size_t from_dist = (size_t)ABS(safe - from); if (UNLIKELY(from_dist < sizeof(chunk_t) || safelen < sizeof(chunk_t))) { while (len--) { *out++ = *from++; @@ -277,5 +277,5 @@ static inline uint8_t *CHUNKCOPY_SAFE(uint8_t *out, uint8_t *from, uint64_t len, } #endif - return CHUNKMEMSET(out, from, (unsigned)len); + return CHUNKMEMSET(out, from, len); } diff --git a/functable.c b/functable.c index 632115c586..b3c611622c 100644 --- a/functable.c +++ b/functable.c @@ -371,7 +371,7 @@ static uint32_t adler32_copy_stub(uint32_t adler, uint8_t* dst, const uint8_t* s return functable.adler32_copy(adler, dst, src, len); } -static uint8_t* chunkmemset_safe_stub(uint8_t* out, uint8_t *from, unsigned len, unsigned left) { +static uint8_t* chunkmemset_safe_stub(uint8_t* out, uint8_t *from, size_t len, size_t left) { FUNCTABLE_INIT_ABORT; return functable.chunkmemset_safe(out, from, len, left); } diff --git a/functable.h b/functable.h index 0fa2e9724a..bb33fdb869 100644 --- a/functable.h +++ b/functable.h @@ -27,7 +27,7 @@ struct functable_s { int (* force_init) (void); uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, size_t len); uint32_t (* adler32_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); - uint8_t* (* chunkmemset_safe) (uint8_t *out, uint8_t *from, unsigned len, unsigned left); + uint8_t* (* chunkmemset_safe) (uint8_t *out, uint8_t *from, size_t len, size_t left); uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1); uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, size_t len); uint32_t (* crc32_copy) (uint32_t crc, uint8_t *dst, const uint8_t *src, size_t len); diff --git a/inflate_p.h b/inflate_p.h index 5ec72ce9d2..0d04f72bb8 100644 --- a/inflate_p.h +++ b/inflate_p.h @@ -201,16 +201,16 @@ static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) { } /* Behave like chunkcopy, but avoid writing beyond of legal output. */ -static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len, uint8_t *safe) { - uint64_t safelen = safe - out; +static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, size_t len, uint8_t *safe) { + size_t safelen = safe - out; len = MIN(len, safelen); int32_t olap_src = from >= out && from < out + len; int32_t olap_dst = out >= from && out < from + len; - uint64_t tocopy; + size_t tocopy; /* For all cases without overlap, memcpy is ideal */ if (!(olap_src || olap_dst)) { - memcpy(out, from, (size_t)len); + memcpy(out, from, len); return out + len; } @@ -224,7 +224,7 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len, * initial bulk memcpy of the nonoverlapping region. Then, we can leverage the size of this to determine the safest * atomic memcpy size we can pick such that we have non-overlapping regions. This effectively becomes a safe look * behind or lookahead distance. */ - uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows + size_t non_olap_size = (size_t)ABS(from - out); /* So this doesn't give use a worst case scenario of function calls in a loop, * we want to instead break this down into copy blocks of fixed lengths @@ -137,6 +137,8 @@ #define MIN(a, b) ((a) > (b) ? (b) : (a)) /* Maximum of a and b. */ #define MAX(a, b) ((a) < (b) ? (b) : (a)) +/* Absolute value of a. */ +#define ABS(a) ((a) < 0 ? -(a) : (a)) /* Ignore unused variable warning */ #define Z_UNUSED(var) (void)(var) |
