summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@nathanm.com>2022-06-29 08:57:11 -0700
committerHans Kristian Rosbach <hk-github@circlestorm.org>2022-08-17 14:41:18 +0200
commite22195e5bcb10851f96e0b56e396696b152e81af (patch)
tree8f0f55a0e9f0eef84ba904c78101c57d0773dbbf
parentb18c815056e9b20175d44732bd7416f1fe6a931e (diff)
downloadProject-Tick-e22195e5bcb10851f96e0b56e396696b152e81af.tar.gz
Project-Tick-e22195e5bcb10851f96e0b56e396696b152e81af.zip
Don't use unaligned access for memcpy instructions due to GCC 11 assuming it is aligned in certain instances.
-rw-r--r--arch/arm/chunkset_neon.c8
-rw-r--r--arch/power/chunkset_power8.c6
-rw-r--r--arch/x86/chunkset_avx.c6
-rw-r--r--arch/x86/chunkset_sse2.c6
-rw-r--r--arch/x86/chunkset_sse41.c8
-rw-r--r--chunkset.c10
-rw-r--r--compare256.c8
-rw-r--r--deflate.h10
-rw-r--r--inffast.c2
-rw-r--r--inflate_p.h6
-rw-r--r--insert_string_tpl.h2
-rw-r--r--match_tpl.h12
-rw-r--r--zbuild.h15
13 files changed, 45 insertions, 54 deletions
diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c
index 29065f77c4..3b8d2c0010 100644
--- a/arch/arm/chunkset_neon.c
+++ b/arch/arm/chunkset_neon.c
@@ -38,19 +38,19 @@ static const lut_rem_pair perm_idx_lut[13] = {
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
uint16_t tmp;
- zmemcpy_2(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint32_t tmp;
- zmemcpy_4(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
uint64_t tmp;
- zmemcpy_8(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
}
@@ -76,7 +76,7 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t
/* See note in chunkset_sse41.c for why this is ok */
__msan_unpoison(buf + dist, 16 - dist);
#endif
-
+
/* This version of table is only available on aarch64 */
#if defined(_M_ARM64) || defined(__aarch64__)
uint8x16_t ret_vec = vld1q_u8(buf);
diff --git a/arch/power/chunkset_power8.c b/arch/power/chunkset_power8.c
index 47e5485262..83928308d6 100644
--- a/arch/power/chunkset_power8.c
+++ b/arch/power/chunkset_power8.c
@@ -16,19 +16,19 @@ typedef vector unsigned char chunk_t;
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
uint16_t tmp;
- zmemcpy_2(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = (vector unsigned char)vec_splats(tmp);
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint32_t tmp;
- zmemcpy_4(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = (vector unsigned char)vec_splats(tmp);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
uint64_t tmp;
- zmemcpy_8(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = (vector unsigned char)vec_splats(tmp);
}
diff --git a/arch/x86/chunkset_avx.c b/arch/x86/chunkset_avx.c
index 024b37c304..c4a4d9b052 100644
--- a/arch/x86/chunkset_avx.c
+++ b/arch/x86/chunkset_avx.c
@@ -52,19 +52,19 @@ static const lut_rem_pair perm_idx_lut[29] = {
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
int16_t tmp;
- zmemcpy_2(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm256_set1_epi16(tmp);
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
int32_t tmp;
- zmemcpy_4(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm256_set1_epi32(tmp);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
int64_t tmp;
- zmemcpy_8(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm256_set1_epi64x(tmp);
}
diff --git a/arch/x86/chunkset_sse2.c b/arch/x86/chunkset_sse2.c
index 8e3166f70e..eddf5d9895 100644
--- a/arch/x86/chunkset_sse2.c
+++ b/arch/x86/chunkset_sse2.c
@@ -17,19 +17,19 @@ typedef __m128i chunk_t;
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
int16_t tmp;
- zmemcpy_2(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi16(tmp);
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
int32_t tmp;
- zmemcpy_4(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi32(tmp);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
int64_t tmp;
- zmemcpy_8(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi64x(tmp);
}
diff --git a/arch/x86/chunkset_sse41.c b/arch/x86/chunkset_sse41.c
index 42b44d0512..c148db0924 100644
--- a/arch/x86/chunkset_sse41.c
+++ b/arch/x86/chunkset_sse41.c
@@ -41,19 +41,19 @@ static const lut_rem_pair perm_idx_lut[13] = {
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
int16_t tmp;
- zmemcpy_2(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi16(tmp);
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
int32_t tmp;
- zmemcpy_4(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi32(tmp);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
int64_t tmp;
- zmemcpy_8(&tmp, from);
+ memcpy(&tmp, from, sizeof(tmp));
*chunk = _mm_set1_epi64x(tmp);
}
@@ -69,7 +69,7 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t
lut_rem_pair lut_rem = perm_idx_lut[dist - 3];
__m128i perm_vec, ret_vec;
#ifdef Z_MEMORY_SANITIZER
- /* Important to note:
+ /* Important to note:
* This is _not_ to subvert the memory sanitizer but to instead unpoison some
* bytes we willingly and purposefully load uninitialized that we swizzle over
* in a vector register, anyway. If what we assume is wrong about what is used,
diff --git a/chunkset.c b/chunkset.c
index ca35929f3a..169e411233 100644
--- a/chunkset.c
+++ b/chunkset.c
@@ -13,20 +13,20 @@ typedef uint64_t chunk_t;
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
uint8_t *dest = (uint8_t *)chunk;
- zmemcpy_4(dest, from);
- zmemcpy_4(dest+4, from);
+ memcpy(dest, from, sizeof(uint32_t));
+ memcpy(dest+4, from, sizeof(uint32_t));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
- zmemcpy_8(chunk, from);
+ memcpy(chunk, from, sizeof(uint64_t));
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
- zmemcpy_8(chunk, (uint8_t *)s);
+ memcpy(chunk, (uint8_t *)s, sizeof(uint64_t));
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
- zmemcpy_8(out, chunk);
+ memcpy(out, chunk, sizeof(uint64_t));
}
#define CHUNKSIZE chunksize_c
diff --git a/compare256.c b/compare256.c
index 3c05969f9f..b11ac24ee0 100644
--- a/compare256.c
+++ b/compare256.c
@@ -101,8 +101,8 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const
do {
uint32_t sv, mv, diff;
- zmemcpy_4(&sv, src0);
- zmemcpy_4(&mv, src1);
+ memcpy(&sv, src0, sizeof(sv));
+ memcpy(&mv, src1, sizeof(mv));
diff = sv ^ mv;
if (diff) {
@@ -141,8 +141,8 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const
do {
uint64_t sv, mv, diff;
- zmemcpy_8(&sv, src0);
- zmemcpy_8(&mv, src1);
+ memcpy(&sv, src0, sizeof(sv));
+ memcpy(&mv, src1, sizeof(mv));
diff = sv ^ mv;
if (diff) {
diff --git a/deflate.h b/deflate.h
index f8920df59c..ccb246a818 100644
--- a/deflate.h
+++ b/deflate.h
@@ -306,7 +306,7 @@ static inline void put_short(deflate_state *s, uint16_t w) {
#if BYTE_ORDER == BIG_ENDIAN
w = ZSWAP16(w);
#endif
- zmemcpy_2(&s->pending_buf[s->pending], &w);
+ memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
s->pending += 2;
}
@@ -318,7 +318,7 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) {
#if BYTE_ORDER == LITTLE_ENDIAN
w = ZSWAP16(w);
#endif
- zmemcpy_2(&s->pending_buf[s->pending], &w);
+ memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
s->pending += 2;
}
@@ -330,7 +330,7 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) {
#if BYTE_ORDER == BIG_ENDIAN
dw = ZSWAP32(dw);
#endif
- zmemcpy_4(&s->pending_buf[s->pending], &dw);
+ memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
s->pending += 4;
}
@@ -342,7 +342,7 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
#if BYTE_ORDER == LITTLE_ENDIAN
dw = ZSWAP32(dw);
#endif
- zmemcpy_4(&s->pending_buf[s->pending], &dw);
+ memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
s->pending += 4;
}
@@ -354,7 +354,7 @@ static inline void put_uint64(deflate_state *s, uint64_t lld) {
#if BYTE_ORDER == BIG_ENDIAN
lld = ZSWAP64(lld);
#endif
- zmemcpy_8(&s->pending_buf[s->pending], &lld);
+ memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld));
s->pending += 8;
}
diff --git a/inffast.c b/inffast.c
index 36923317de..bfb1c83134 100644
--- a/inffast.c
+++ b/inffast.c
@@ -14,7 +14,7 @@
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
uint64_t chunk;
- zmemcpy_8(&chunk, in);
+ memcpy(&chunk, in, sizeof(chunk));
#if BYTE_ORDER == LITTLE_ENDIAN
return chunk << bits;
diff --git a/inflate_p.h b/inflate_p.h
index 20f6b1a8c3..7122d7ce65 100644
--- a/inflate_p.h
+++ b/inflate_p.h
@@ -176,21 +176,21 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len,
}
if (tocopy >= 8) {
- zmemcpy_8(out, from);
+ memcpy(out, from, 8);
out += 8;
from += 8;
tocopy -= 8;
}
if (tocopy >= 4) {
- zmemcpy_4(out, from);
+ memcpy(out, from, 4);
out += 4;
from += 4;
tocopy -= 4;
}
if (tocopy >= 2) {
- zmemcpy_2(out, from);
+ memcpy(out, from, 2);
out += 2;
from += 2;
tocopy -= 2;
diff --git a/insert_string_tpl.h b/insert_string_tpl.h
index 643a5e0e31..4acd67fd62 100644
--- a/insert_string_tpl.h
+++ b/insert_string_tpl.h
@@ -31,7 +31,7 @@
#ifndef HASH_CALC_READ
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
- zmemcpy_4(&val, strstart);
+ memcpy(&val, strstart, sizeof(val));
# else
# define HASH_CALC_READ \
val = ((uint32_t)(strstart[0])); \
diff --git a/match_tpl.h b/match_tpl.h
index 3fc71c15a0..fbd34e58a5 100644
--- a/match_tpl.h
+++ b/match_tpl.h
@@ -74,11 +74,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#endif
#ifdef UNALIGNED64_OK
- zmemcpy_8(scan_start, scan);
- zmemcpy_8(scan_end, scan+offset);
+ memcpy(scan_start, scan, sizeof(uint64_t));
+ memcpy(scan_end, scan+offset, sizeof(uint64_t));
#elif defined(UNALIGNED_OK)
- zmemcpy_4(scan_start, scan);
- zmemcpy_4(scan_end, scan+offset);
+ memcpy(scan_start, scan, sizeof(uint32_t));
+ memcpy(scan_end, scan+offset, sizeof(uint32_t));
#else
scan_end[0] = *(scan+offset);
scan_end[1] = *(scan+offset+1);
@@ -201,9 +201,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
#endif
#ifdef UNALIGNED64_OK
- zmemcpy_8(scan_end, scan+offset);
+ memcpy(scan_end, scan+offset, sizeof(uint64_t));
#elif defined(UNALIGNED_OK)
- zmemcpy_4(scan_end, scan+offset);
+ memcpy(scan_end, scan+offset, sizeof(uint32_t));
#else
scan_end[0] = *(scan+offset);
scan_end[1] = *(scan+offset+1);
diff --git a/zbuild.h b/zbuild.h
index d82c1c388e..10a7fd6b3d 100644
--- a/zbuild.h
+++ b/zbuild.h
@@ -218,31 +218,22 @@
# endif
#endif
-/* Force compiler to emit unaligned memory accesses if unaligned access is supported
+/* Force compiler to emit unaligned memory comparisons if unaligned access is supported
on the architecture, otherwise don't assume unaligned access is supported. Older
- compilers don't optimize memcpy and memcmp calls to unaligned access instructions
- when it is supported on the architecture resulting in significant performance impact.
- Newer compilers might optimize memcpy but not all optimize memcmp for all integer types. */
+ compilers don't optimize memcmp calls for all integer types to unaligned access instructions
+ when it is supported on the architecture resulting in significant performance impact. */
#ifdef UNALIGNED_OK
-# define zmemcpy_2(dest, src) (*((uint16_t *)(dest)) = *((uint16_t *)(src)))
# define zmemcmp_2(str1, str2) (*((uint16_t *)(str1)) != *((uint16_t *)(str2)))
-# define zmemcpy_4(dest, src) (*((uint32_t *)(dest)) = *((uint32_t *)(src)))
# define zmemcmp_4(str1, str2) (*((uint32_t *)(str1)) != *((uint32_t *)(str2)))
# if defined(UNALIGNED64_OK) && (UINTPTR_MAX == UINT64_MAX)
-# define zmemcpy_8(dest, src) (*((uint64_t *)(dest)) = *((uint64_t *)(src)))
# define zmemcmp_8(str1, str2) (*((uint64_t *)(str1)) != *((uint64_t *)(str2)))
# else
-# define zmemcpy_8(dest, src) (((uint32_t *)(dest))[0] = ((uint32_t *)(src))[0], \
- ((uint32_t *)(dest))[1] = ((uint32_t *)(src))[1])
# define zmemcmp_8(str1, str2) (((uint32_t *)(str1))[0] != ((uint32_t *)(str2))[0] || \
((uint32_t *)(str1))[1] != ((uint32_t *)(str2))[1])
# endif
#else
-# define zmemcpy_2(dest, src) memcpy(dest, src, 2)
# define zmemcmp_2(str1, str2) memcmp(str1, str2, 2)
-# define zmemcpy_4(dest, src) memcpy(dest, src, 4)
# define zmemcmp_4(str1, str2) memcmp(str1, str2, 4)
-# define zmemcpy_8(dest, src) memcpy(dest, src, 8)
# define zmemcmp_8(str1, str2) memcmp(str1, str2, 8)
#endif