summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans Kristian Rosbach <hk-git@circlestorm.org>2024-12-17 23:02:32 +0100
committerHans Kristian Rosbach <hk-github@circlestorm.org>2024-12-21 00:46:48 +0100
commit509f6b5818cc8f804574ffe124d91d1c0696f753 (patch)
tree60c32f2da361d7ce6ea9eb107e97f6e85cb6aad1
parent4fa76be6c00914f92d74142fc36c88f868cdb69c (diff)
downloadProject-Tick-509f6b5818cc8f804574ffe124d91d1c0696f753.tar.gz
Project-Tick-509f6b5818cc8f804574ffe124d91d1c0696f753.zip
Since we long ago make unaligned reads safe (by using memcpy or intrinsics),
it is time to replace the UNALIGNED_OK checks that have since really only been used to select the optimal comparison sizes for the arch instead.
-rw-r--r--.github/workflows/cmake.yml4
-rw-r--r--CMakeLists.txt8
-rw-r--r--README.md3
-rw-r--r--arch/generic/compare256_c.c6
-rw-r--r--arch/generic/generic_functions.h35
-rw-r--r--chunkset_tpl.h8
-rw-r--r--cmake/detect-sanitizer.cmake11
-rw-r--r--compare256_rle.h4
-rwxr-xr-xconfigure10
-rw-r--r--deflate_rle.c4
-rw-r--r--insert_string_tpl.h18
-rw-r--r--match_tpl.h22
-rw-r--r--test/benchmarks/benchmark_compare256.cc10
-rw-r--r--test/benchmarks/benchmark_compare256_rle.cc10
-rw-r--r--test/test_compare256.cc11
-rw-r--r--test/test_compare256_rle.cc10
-rw-r--r--zbuild.h44
17 files changed, 89 insertions, 129 deletions
diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index a0f3bc57fa..02f98272ed 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -81,11 +81,11 @@ jobs:
build-src-dir: ../zlib-ng/test/add-subdirectory-project
readonly-project-dir: true
- - name: Ubuntu GCC -O1 No Unaligned UBSAN
+ - name: Ubuntu GCC -O1 UBSAN
os: ubuntu-latest
compiler: gcc
cxx-compiler: g++
- cmake-args: -DWITH_UNALIGNED=OFF -DWITH_SANITIZER=Undefined
+ cmake-args: -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_o1
cflags: -O1
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 11e9ae5579..01edc15376 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -93,7 +93,6 @@ option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF)
-option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON)
set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols.
Useful when embedding into a larger library.
@@ -147,7 +146,6 @@ mark_as_advanced(FORCE
WITH_RVV
WITH_INFLATE_STRICT
WITH_INFLATE_ALLOW_INVALID_DIST
- WITH_UNALIGNED
INSTALL_UTILS
)
@@ -336,12 +334,6 @@ if(NOT WITH_NATIVE_INSTRUCTIONS)
endforeach()
endif()
-# Set architecture alignment requirements
-if(NOT WITH_UNALIGNED)
- add_definitions(-DNO_UNALIGNED)
- message(STATUS "Unaligned reads manually disabled")
-endif()
-
# Apply warning compiler flags
if(WITH_MAINTAINER_WARNINGS)
add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE})
diff --git a/README.md b/README.md
index 411621b52f..28aad7f1dc 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ Features
* Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
* Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
* Support for hardware-accelerated deflate using IBM Z DFLTCC
-* Unaligned memory read/writes and large bit buffer improvements
+* Safe unaligned memory read/writes and large bit buffer improvements
* Includes improvements from Cloudflare and Intel forks
* Configure, CMake, and NMake build system support
* Comprehensive set of CMake unit tests
@@ -213,7 +213,6 @@ Advanced Build Options
| WITH_CRC32_VX | --without-crc32-vx | Build with vectorized CRC32 on IBM Z | ON |
| WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z | OFF |
| WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z | OFF |
-| WITH_UNALIGNED | --without-unaligned | Allow optimizations that use unaligned reads if safe on current arch| ON |
| WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF |
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |
diff --git a/arch/generic/compare256_c.c b/arch/generic/compare256_c.c
index 0c12cb3a4e..3704c2f6cc 100644
--- a/arch/generic/compare256_c.c
+++ b/arch/generic/compare256_c.c
@@ -57,7 +57,7 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
#include "match_tpl.h"
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
@@ -138,8 +138,8 @@ Z_INTERNAL uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *
#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
-/* UNALIGNED64_OK, 64-bit integer comparison */
+#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
+/* 64-bit integer comparison */
static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
diff --git a/arch/generic/generic_functions.h b/arch/generic/generic_functions.h
index e243f32665..eaba70c31d 100644
--- a/arch/generic/generic_functions.h
+++ b/arch/generic/generic_functions.h
@@ -28,13 +28,13 @@ void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
-uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
+ uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
# ifdef HAVE_BUILTIN_CTZ
- uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
+ uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
# endif
-# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
- uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
+# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
+ uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
# endif
#endif
@@ -43,29 +43,24 @@ typedef void (*slide_hash_func)(deflate_state *s);
void slide_hash_c(deflate_state *s);
uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
-# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
+uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
-# ifdef HAVE_BUILTIN_CTZ
+ uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
+# ifdef HAVE_BUILTIN_CTZ
uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
-# endif
-# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
- uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
-# endif
+ uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
# endif
-
-uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
-# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
- uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
- uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
-# ifdef UNALIGNED64_OK
+# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
+ uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
-# endif
# endif
+#endif
// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
-# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
+# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
# define longest_match_generic longest_match_unaligned_64
# define longest_match_slow_generic longest_match_slow_unaligned_64
# define compare256_generic compare256_unaligned_64
diff --git a/chunkset_tpl.h b/chunkset_tpl.h
index 5d4cacbd9d..383b4d8f84 100644
--- a/chunkset_tpl.h
+++ b/chunkset_tpl.h
@@ -227,17 +227,15 @@ rem_bytes:
}
Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, unsigned len, unsigned left) {
-#if !defined(UNALIGNED64_OK)
-# if !defined(UNALIGNED_OK)
+#if OPTIMAL_CMP < 32
static const uint32_t align_mask = 7;
-# else
+#elif OPTIMAL_CMP == 32
static const uint32_t align_mask = 3;
-# endif
#endif
len = MIN(len, left);
-#if !defined(UNALIGNED64_OK)
+#if OPTIMAL_CMP < 64
while (((uintptr_t)out & align_mask) && (len > 0)) {
*out++ = *from++;
--len;
diff --git a/cmake/detect-sanitizer.cmake b/cmake/detect-sanitizer.cmake
index f9521ec2f5..b71c1a37f3 100644
--- a/cmake/detect-sanitizer.cmake
+++ b/cmake/detect-sanitizer.cmake
@@ -111,6 +111,7 @@ endmacro()
macro(add_undefined_sanitizer)
set(known_checks
+ alignment
array-bounds
bool
bounds
@@ -137,10 +138,6 @@ macro(add_undefined_sanitizer)
vptr
)
- # Only check for alignment sanitizer flag if unaligned access is not supported
- if(NOT WITH_UNALIGNED)
- list(APPEND known_checks alignment)
- endif()
# Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
if(NOT CMAKE_C_FLAGS MATCHES "-O0")
list(APPEND known_checks object-size)
@@ -153,12 +150,6 @@ macro(add_undefined_sanitizer)
add_compile_options(-fsanitize=${supported_checks})
add_link_options(-fsanitize=${supported_checks})
- # Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if
- # it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing.
- if(WITH_UNALIGNED)
- add_compile_options(-fno-sanitize=alignment)
- endif()
-
add_common_sanitizer_flags()
else()
message(STATUS "Undefined behavior sanitizer is not supported")
diff --git a/compare256_rle.h b/compare256_rle.h
index 0f3998d4a3..ccfbeba2a6 100644
--- a/compare256_rle.h
+++ b/compare256_rle.h
@@ -42,7 +42,7 @@ static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1
return 256;
}
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
@@ -100,7 +100,7 @@ static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const ui
#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
/* 64-bit unaligned integer comparison */
static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const uint8_t *src1) {
uint32_t src0_cmp32, len = 0;
diff --git a/configure b/configure
index e6270ae035..5cd8163ada 100755
--- a/configure
+++ b/configure
@@ -87,7 +87,6 @@ mandir=${mandir-'${prefix}/share/man'}
shared_ext='.so'
shared=1
gzfileops=1
-unalignedok=1
compat=0
cover=0
build32=0
@@ -164,7 +163,6 @@ case "$1" in
echo ' [--warn] Enables extra compiler warnings' | tee -a configure.log
echo ' [--debug] Enables extra debug prints during operation' | tee -a configure.log
echo ' [--zlib-compat] Compiles for zlib-compatible API instead of zlib-ng API' | tee -a configure.log
- echo ' [--without-unaligned] Compiles without fast unaligned access' | tee -a configure.log
echo ' [--without-gzfileops] Compiles without the gzfile parts of the API enabled' | tee -a configure.log
echo ' [--without-optimizations] Compiles without support for optional instruction sets' | tee -a configure.log
echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log
@@ -195,7 +193,6 @@ case "$1" in
-s* | --shared | --enable-shared) shared=1; shift ;;
-t | --static) shared=0; shift ;;
--zlib-compat) compat=1; shift ;;
- --without-unaligned) unalignedok=0; shift ;;
--without-gzfileops) gzfileops=0; shift ;;
--cover) cover=1; shift ;;
-3* | --32) build32=1; shift ;;
@@ -876,13 +873,6 @@ else
PIC_TESTOBJG="\$(OBJG)"
fi
-# set architecture alignment requirements
-if test $unalignedok -eq 0; then
- CFLAGS="${CFLAGS} -DNO_UNALIGNED"
- SFLAGS="${SFLAGS} -DNO_UNALIGNED"
- echo "Unaligned reads manually disabled." | tee -a configure.log
-fi
-
# enable reduced memory configuration
if test $reducedmem -eq 1; then
echo "Configuring for reduced memory environment." | tee -a configure.log
diff --git a/deflate_rle.c b/deflate_rle.c
index e8e501b1d2..551fe02a06 100644
--- a/deflate_rle.c
+++ b/deflate_rle.c
@@ -10,8 +10,8 @@
#include "deflate_p.h"
#include "functable.h"
-#ifdef UNALIGNED_OK
-# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+#if OPTIMAL_CMP >= 32
+# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
# define compare256_rle compare256_rle_unaligned_64
# elif defined(HAVE_BUILTIN_CTZ)
# define compare256_rle compare256_rle_unaligned_32
diff --git a/insert_string_tpl.h b/insert_string_tpl.h
index a5685c4ed7..e7037c04e6 100644
--- a/insert_string_tpl.h
+++ b/insert_string_tpl.h
@@ -29,21 +29,13 @@
# define HASH_CALC_MASK HASH_MASK
#endif
#ifndef HASH_CALC_READ
-# ifdef UNALIGNED_OK
-# if BYTE_ORDER == LITTLE_ENDIAN
-# define HASH_CALC_READ \
- memcpy(&val, strstart, sizeof(val));
-# else
-# define HASH_CALC_READ \
- memcpy(&val, strstart, sizeof(val)); \
- val = ZSWAP32(val);
-# endif
+# if BYTE_ORDER == LITTLE_ENDIAN
+# define HASH_CALC_READ \
+ memcpy(&val, strstart, sizeof(val));
# else
# define HASH_CALC_READ \
- val = ((uint32_t)(strstart[0])); \
- val |= ((uint32_t)(strstart[1]) << 8); \
- val |= ((uint32_t)(strstart[2]) << 16); \
- val |= ((uint32_t)(strstart[3]) << 24);
+ memcpy(&val, strstart, sizeof(val)); \
+ val = ZSWAP32(val);
# endif
#endif
diff --git a/match_tpl.h b/match_tpl.h
index 9c258242cd..f44da750fb 100644
--- a/match_tpl.h
+++ b/match_tpl.h
@@ -40,7 +40,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
uint32_t chain_length, nice_match, best_len, offset;
uint32_t lookahead = s->lookahead;
Pos match_offset = 0;
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
uint8_t scan_start[8];
#endif
uint8_t scan_end[8];
@@ -59,20 +59,20 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* to find the next best match length.
*/
offset = best_len-1;
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
if (best_len >= sizeof(uint32_t)) {
offset -= 2;
-#ifdef UNALIGNED64_OK
+#if OPTIMAL_CMP >= 64
if (best_len >= sizeof(uint64_t))
offset -= 4;
#endif
}
#endif
-#ifdef UNALIGNED64_OK
+#if OPTIMAL_CMP >= 64
memcpy(scan_start, scan, sizeof(uint64_t));
memcpy(scan_end, scan+offset, sizeof(uint64_t));
-#elif defined(UNALIGNED_OK)
+#elif OPTIMAL_CMP >= 32
memcpy(scan_start, scan, sizeof(uint32_t));
memcpy(scan_end, scan+offset, sizeof(uint32_t));
#else
@@ -138,7 +138,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* that depend on those values. However the length of the match is limited to the
* lookahead, so the output of deflate is not affected by the uninitialized values.
*/
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
if (best_len < sizeof(uint32_t)) {
for (;;) {
if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
@@ -146,7 +146,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
break;
GOTO_NEXT_CHAIN;
}
-# ifdef UNALIGNED64_OK
+# if OPTIMAL_CMP >= 64
} else if (best_len >= sizeof(uint64_t)) {
for (;;) {
if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
@@ -186,19 +186,19 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
return best_len;
offset = best_len-1;
-#ifdef UNALIGNED_OK
+#if OPTIMAL_CMP >= 32
if (best_len >= sizeof(uint32_t)) {
offset -= 2;
-#ifdef UNALIGNED64_OK
+#if OPTIMAL_CMP >= 64
if (best_len >= sizeof(uint64_t))
offset -= 4;
#endif
}
#endif
-#ifdef UNALIGNED64_OK
+#if OPTIMAL_CMP >= 64
memcpy(scan_end, scan+offset, sizeof(uint64_t));
-#elif defined(UNALIGNED_OK)
+#elif OPTIMAL_CMP >= 32
memcpy(scan_end, scan+offset, sizeof(uint32_t));
#else
scan_end[0] = *(scan+offset);
diff --git a/test/benchmarks/benchmark_compare256.cc b/test/benchmarks/benchmark_compare256.cc
index efdbbacc9f..a9aa0fca65 100644
--- a/test/benchmarks/benchmark_compare256.cc
+++ b/test/benchmarks/benchmark_compare256.cc
@@ -66,14 +66,14 @@ BENCHMARK_COMPARE256(c, compare256_c, 1);
BENCHMARK_COMPARE256(native, native_compare256, 1);
#else
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1);
-#ifdef HAVE_BUILTIN_CTZ
+# if defined(HAVE_BUILTIN_CTZ)
BENCHMARK_COMPARE256(unaligned_32, compare256_unaligned_32, 1);
-#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+# endif
+# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1);
-#endif
+# endif
#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
diff --git a/test/benchmarks/benchmark_compare256_rle.cc b/test/benchmarks/benchmark_compare256_rle.cc
index 3b1eced453..9eb299f3b4 100644
--- a/test/benchmarks/benchmark_compare256_rle.cc
+++ b/test/benchmarks/benchmark_compare256_rle.cc
@@ -61,12 +61,12 @@ public:
BENCHMARK_COMPARE256_RLE(c, compare256_rle_c, 1);
-#ifdef UNALIGNED_OK
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
BENCHMARK_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1);
-#ifdef HAVE_BUILTIN_CTZ
+# if defined(HAVE_BUILTIN_CTZ)
BENCHMARK_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1);
-#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+# endif
+# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
BENCHMARK_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1);
-#endif
+# endif
#endif
diff --git a/test/test_compare256.cc b/test/test_compare256.cc
index e1662cdf04..97e2847037 100644
--- a/test/test_compare256.cc
+++ b/test/test_compare256.cc
@@ -65,15 +65,16 @@ TEST_COMPARE256(c, compare256_c, 1)
TEST_COMPARE256(native, native_compare256, 1)
#else
-#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
TEST_COMPARE256(unaligned_16, compare256_unaligned_16, 1)
-#ifdef HAVE_BUILTIN_CTZ
+# if defined(HAVE_BUILTIN_CTZ)
TEST_COMPARE256(unaligned_32, compare256_unaligned_32, 1)
-#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+# endif
+# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
TEST_COMPARE256(unaligned_64, compare256_unaligned_64, 1)
+# endif
#endif
-#endif
+
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
TEST_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2)
#endif
diff --git a/test/test_compare256_rle.cc b/test/test_compare256_rle.cc
index 5333ce7689..6c5d9d4f8f 100644
--- a/test/test_compare256_rle.cc
+++ b/test/test_compare256_rle.cc
@@ -52,12 +52,12 @@ static inline void compare256_rle_match_check(compare256_rle_func compare256_rle
TEST_COMPARE256_RLE(c, compare256_rle_c, 1)
-#ifdef UNALIGNED_OK
+#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
TEST_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1)
-#ifdef HAVE_BUILTIN_CTZ
+# if defined(HAVE_BUILTIN_CTZ)
TEST_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1)
-#endif
-#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+# endif
+# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
TEST_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1)
-#endif
+# endif
#endif
diff --git a/zbuild.h b/zbuild.h
index 0023a235d2..4d3fc5f2e4 100644
--- a/zbuild.h
+++ b/zbuild.h
@@ -243,29 +243,31 @@
# define Tracecv(c, x)
#endif
-#ifndef NO_UNALIGNED
-# if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64)
-# define UNALIGNED_OK
-# define UNALIGNED64_OK
-# elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \
- defined(__i686__) || defined(_X86_) || defined(_M_IX86)
-# define UNALIGNED_OK
-# elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
-# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
-# define UNALIGNED_OK
-# define UNALIGNED64_OK
-# endif
-# elif defined(__arm__) || (_M_ARM >= 7)
-# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
-# define UNALIGNED_OK
-# endif
-# elif defined(__powerpc64__) || defined(__ppc64__)
-# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-# define UNALIGNED_OK
-# define UNALIGNED64_OK
-# endif
+#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64)
+# define OPTIMAL_CMP 64
+#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \
+ defined(__i686__) || defined(_X86_) || defined(_M_IX86)
+# define OPTIMAL_CMP 32
+#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
+# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
+# define OPTIMAL_CMP 64
+# endif
+#elif defined(__arm__) || (_M_ARM >= 7)
+# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__)
+# define OPTIMAL_CMP 32
# endif
+#elif defined(__powerpc64__) || defined(__ppc64__)
+# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define OPTIMAL_CMP 64
+# endif
+#endif
+#if defined(NO_UNALIGNED)
+# undef OPTIMAL_CMP
#endif
+#if !defined(OPTIMAL_CMP)
+# define OPTIMAL_CMP 8
+#endif
+
#if defined(__has_feature)
# if __has_feature(address_sanitizer)