From 7a107a1bc626406efe8c970ef53b5af68609bfe2 Mon Sep 17 00:00:00 2001 From: Nathan Moinvaziri Date: Sun, 24 May 2020 10:01:05 -0700 Subject: Rename match_p.h to match_tpl.h since it has been converted to a template header file. --- CMakeLists.txt | 2 +- arch/x86/compare258_avx.c | 10 ++-- arch/x86/compare258_sse.c | 2 +- compare258.c | 8 +-- match_p.h | 139 ---------------------------------------------- match_tpl.h | 139 ++++++++++++++++++++++++++++++++++++++++++++++ win32/Makefile.a64 | 6 +- win32/Makefile.arm | 6 +- win32/Makefile.msc | 6 +- 9 files changed, 159 insertions(+), 159 deletions(-) delete mode 100644 match_p.h create mode 100644 match_tpl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fbdd58102..5a5d0dabd3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -764,7 +764,7 @@ set(ZLIB_PRIVATE_HDRS inflate_p.h inftrees.h insert_string_tpl.h - match_p.h + match_tpl.h memcopy.h trees.h trees_emit.h diff --git a/arch/x86/compare258_avx.c b/arch/x86/compare258_avx.c index 34e93bbcba..fe38d16c9e 100644 --- a/arch/x86/compare258_avx.c +++ b/arch/x86/compare258_avx.c @@ -18,13 +18,13 @@ /* UNALIGNED_OK, AVX2 intrinsic comparison */ static inline int32_t compare256_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) { int32_t len = 0; - + do { __m256i ymm_src0, ymm_src1, ymm_cmp; ymm_src0 = _mm256_loadu_si256((__m256i*)src0); ymm_src1 = _mm256_loadu_si256((__m256i*)src1); ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1); /* non-identical bytes = 00, identical bytes = FF */ - int mask = _mm256_movemask_epi8(ymm_cmp); + int mask = _mm256_movemask_epi8(ymm_cmp); if ((unsigned int)mask != 0xFFFFFFFF) { int match_byte = __builtin_ctz(~mask); /* Invert bits so identical = 0 */ return (int32_t)(len + match_byte); @@ -35,7 +35,7 @@ static inline int32_t compare256_unaligned_avx2_static(const unsigned char *src0 ymm_src0 = _mm256_loadu_si256((__m256i*)src0); ymm_src1 = _mm256_loadu_si256((__m256i*)src1); ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1); - mask = _mm256_movemask_epi8(ymm_cmp); + mask = _mm256_movemask_epi8(ymm_cmp); if ((unsigned int)mask != 0xFFFFFFFF) { int match_byte = __builtin_ctz(~mask); return (int32_t)(len + match_byte); @@ -47,7 +47,7 @@ static inline int32_t compare256_unaligned_avx2_static(const unsigned char *src0 return 256; } -static inline int32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) { +static inline int32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) { if (*(uint16_t *)src0 != *(uint16_t *)src1) return (*src0 == *src1); @@ -62,6 +62,6 @@ int32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char #define COMPARE256 compare256_unaligned_avx2_static #define COMPARE258 compare258_unaligned_avx2_static -#include "match_p.h" +#include "match_tpl.h" #endif diff --git a/arch/x86/compare258_sse.c b/arch/x86/compare258_sse.c index 9695529e2f..2c274823fc 100644 --- a/arch/x86/compare258_sse.c +++ b/arch/x86/compare258_sse.c @@ -69,6 +69,6 @@ int32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char #define COMPARE256 compare256_unaligned_sse4_static #define COMPARE258 compare258_unaligned_sse4_static -#include "match_p.h" +#include "match_tpl.h" #endif diff --git a/compare258.c b/compare258.c index 76d062779c..cb75d0974d 100644 --- a/compare258.c +++ b/compare258.c @@ -61,7 +61,7 @@ int32_t compare258_c(const unsigned char *src0, const unsigned char *src1) { #define COMPARE256 compare256_c_static #define COMPARE258 compare258_c_static -#include "match_p.h" +#include "match_tpl.h" #ifdef UNALIGNED_OK /* UNALIGNED_OK, 16-bit integer comparison */ @@ -101,7 +101,7 @@ int32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char * #define COMPARE256 compare256_unaligned_16_static #define COMPARE258 compare258_unaligned_16_static -#include "match_p.h" +#include "match_tpl.h" #ifdef HAVE_BUILTIN_CTZ /* UNALIGNED_OK, 32-bit integer comparison */ @@ -139,7 +139,7 @@ int32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char * #define COMPARE256 compare256_unaligned_32_static #define COMPARE258 compare258_unaligned_32_static -#include "match_p.h" +#include "match_tpl.h" #endif @@ -179,7 +179,7 @@ int32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char * #define COMPARE256 compare256_unaligned_64_static #define COMPARE258 compare258_unaligned_64_static -#include "match_p.h" +#include "match_tpl.h" #endif diff --git a/match_p.h b/match_p.h deleted file mode 100644 index 93aa0d475f..0000000000 --- a/match_p.h +++ /dev/null @@ -1,139 +0,0 @@ - -#include "zbuild.h" -#include "deflate.h" -#include "functable.h" - -#ifndef BESTCMP_TYPE -#define BESTCMP_TYPE - -#ifdef UNALIGNED_OK -#if MIN_MATCH >= 4 -typedef uint32_t bestcmp_t; -#elif MIN_MATCH >= 2 -typedef uint16_t bestcmp_t; -#else -typedef uint8_t bestcmp_t; -#endif -#else -typedef uint8_t bestcmp_t; -#endif - -#endif - -/* - * Set match_start to the longest match starting at the given string and - * return its length. Matches shorter or equal to prev_length are discarded, - * in which case the result is equal to prev_length and match_start is garbage. - * - * IN assertions: cur_match is the head of the hash chain for the current - * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1 - * OUT assertion: the match length is not greater than s->lookahead - */ -int32_t LONGEST_MATCH(deflate_state *const s, IPos cur_match) { - unsigned int strstart = s->strstart; - const unsigned wmask = s->w_mask; - unsigned char *window = s->window; - unsigned char *scan = window + strstart; - const Pos *prev = s->prev; - unsigned chain_length; - IPos limit; - unsigned int len, best_len, nice_match; - bestcmp_t scan_end, scan_start; - - /* - * The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple - * of 16. It is easy to get rid of this optimization if necessary. - */ - Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - - /* - * Do not waste too much time if we already have a good match - */ - best_len = s->prev_length; - if (best_len == 0) - best_len = 1; - chain_length = s->max_chain_length; - if (best_len >= s->good_match) - chain_length >>= 2; - - /* - * Do not look for matches beyond the end of the input. This is - * necessary to make deflate deterministic - */ - nice_match = (unsigned int)s->nice_match > s->lookahead ? s->lookahead : (unsigned int)s->nice_match; - - /* - * Stop when cur_match becomes <= limit. To simplify the code, - * we prevent matches with the string of window index 0 - */ - limit = strstart > MAX_DIST(s) ? strstart - MAX_DIST(s) : 0; - - scan_start = *(bestcmp_t *)(scan); - scan_end = *(bestcmp_t *)(scan+best_len-1); - - Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead"); - do { - unsigned char *match; - int cont; - if (cur_match >= strstart) - break; - - /* - * Skip to next match if the match length cannot increase - * or if the match length is less than 2. Note that the checks - * below for insufficient lookahead only occur occasionally - * for performance reasons. Therefore uninitialized memory - * will be accessed and conditional jumps will be made that - * depend on those values. However the length of the match - * is limited to the lookahead, so the output of deflate is not - * affected by the uninitialized values. - */ - cont = 1; - do { - match = window + cur_match; - if (LIKELY(*(bestcmp_t *)(match+best_len-1) != scan_end || - *(bestcmp_t *)(match) != scan_start)) { - if ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0) { - continue; - } - cont = 0; - } - break; - } while (1); - - if (!cont) - break; - -#if MIN_MATCH >= 2 && defined(UNALIGNED_OK) - len = COMPARE256(scan+2, match+2) + 2; -#else - len = COMPARE258(scan, match); -#endif - - Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan"); - - if (len > best_len) { - s->match_start = cur_match; - best_len = len; - if (len >= nice_match) - break; - scan_end = *(bestcmp_t *)(scan+best_len-1); - } else { - /* - * The probability of finding a match later if we here - * is pretty low, so for performance it's best to - * outright stop here for the lower compression levels - */ - if (s->level < TRIGGER_LEVEL) - break; - } - } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0); - - if (best_len <= s->lookahead) - return best_len; - return s->lookahead; -} - -#undef LONGEST_MATCH -#undef COMPARE256 -#undef COMPARE258 diff --git a/match_tpl.h b/match_tpl.h new file mode 100644 index 0000000000..93aa0d475f --- /dev/null +++ b/match_tpl.h @@ -0,0 +1,139 @@ + +#include "zbuild.h" +#include "deflate.h" +#include "functable.h" + +#ifndef BESTCMP_TYPE +#define BESTCMP_TYPE + +#ifdef UNALIGNED_OK +#if MIN_MATCH >= 4 +typedef uint32_t bestcmp_t; +#elif MIN_MATCH >= 2 +typedef uint16_t bestcmp_t; +#else +typedef uint8_t bestcmp_t; +#endif +#else +typedef uint8_t bestcmp_t; +#endif + +#endif + +/* + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is garbage. + * + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1 + * OUT assertion: the match length is not greater than s->lookahead + */ +int32_t LONGEST_MATCH(deflate_state *const s, IPos cur_match) { + unsigned int strstart = s->strstart; + const unsigned wmask = s->w_mask; + unsigned char *window = s->window; + unsigned char *scan = window + strstart; + const Pos *prev = s->prev; + unsigned chain_length; + IPos limit; + unsigned int len, best_len, nice_match; + bestcmp_t scan_end, scan_start; + + /* + * The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple + * of 16. It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* + * Do not waste too much time if we already have a good match + */ + best_len = s->prev_length; + if (best_len == 0) + best_len = 1; + chain_length = s->max_chain_length; + if (best_len >= s->good_match) + chain_length >>= 2; + + /* + * Do not look for matches beyond the end of the input. This is + * necessary to make deflate deterministic + */ + nice_match = (unsigned int)s->nice_match > s->lookahead ? s->lookahead : (unsigned int)s->nice_match; + + /* + * Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0 + */ + limit = strstart > MAX_DIST(s) ? strstart - MAX_DIST(s) : 0; + + scan_start = *(bestcmp_t *)(scan); + scan_end = *(bestcmp_t *)(scan+best_len-1); + + Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead"); + do { + unsigned char *match; + int cont; + if (cur_match >= strstart) + break; + + /* + * Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks + * below for insufficient lookahead only occur occasionally + * for performance reasons. Therefore uninitialized memory + * will be accessed and conditional jumps will be made that + * depend on those values. However the length of the match + * is limited to the lookahead, so the output of deflate is not + * affected by the uninitialized values. + */ + cont = 1; + do { + match = window + cur_match; + if (LIKELY(*(bestcmp_t *)(match+best_len-1) != scan_end || + *(bestcmp_t *)(match) != scan_start)) { + if ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0) { + continue; + } + cont = 0; + } + break; + } while (1); + + if (!cont) + break; + +#if MIN_MATCH >= 2 && defined(UNALIGNED_OK) + len = COMPARE256(scan+2, match+2) + 2; +#else + len = COMPARE258(scan, match); +#endif + + Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan"); + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) + break; + scan_end = *(bestcmp_t *)(scan+best_len-1); + } else { + /* + * The probability of finding a match later if we here + * is pretty low, so for performance it's best to + * outright stop here for the lower compression levels + */ + if (s->level < TRIGGER_LEVEL) + break; + } + } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0); + + if (best_len <= s->lookahead) + return best_len; + return s->lookahead; +} + +#undef LONGEST_MATCH +#undef COMPARE256 +#undef COMPARE258 diff --git a/win32/Makefile.a64 b/win32/Makefile.a64 index 52aafd3606..ece0150725 100644 --- a/win32/Makefile.a64 +++ b/win32/Makefile.a64 @@ -126,9 +126,9 @@ compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32.h deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/match_p.h $(SRCDIR)/functable.h -deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/match_p.h $(SRCDIR)/functable.h -deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/match_p.h $(SRCDIR)/functable.h +deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h +deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h +deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h $(SRCDIR)/functable.h diff --git a/win32/Makefile.arm b/win32/Makefile.arm index 2c3a82d4e8..2e64886c5d 100644 --- a/win32/Makefile.arm +++ b/win32/Makefile.arm @@ -140,9 +140,9 @@ compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32.h deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/match_p.h $(SRCDIR)/functable.h -deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/match_p.h $(SRCDIR)/functable.h -deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/match_p.h $(SRCDIR)/functable.h +deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h +deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h +deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h $(SRCDIR)/functable.h diff --git a/win32/Makefile.msc b/win32/Makefile.msc index ec24b45f01..e12bc592a5 100644 --- a/win32/Makefile.msc +++ b/win32/Makefile.msc @@ -131,10 +131,10 @@ compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32.h deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h -deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/match_p.h $(SRCDIR)/functable.h -deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/match_p.h $(SRCDIR)/functable.h +deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h +deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h deflate_quick.obj: $(SRCDIR)/arch/x86/deflate_quick.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/memcopy.h -deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/match_p.h $(SRCDIR)/functable.h +deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/memcopy.h $(SRCDIR)/functable.h -- cgit 0.0.5-2-1-g0f52