summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@solidstatenetworks.com>2020-07-25 21:24:48 -0400
committerHans Kristian Rosbach <hk-github@circlestorm.org>2020-08-23 09:56:11 +0200
commitb0a3461245219f6bb4885de6c4529facabcf6a44 (patch)
treef13121dd554bb6796576375a011ca732fedf3004
parent0c931adffb42328cd20cbea716bbff73a60514f5 (diff)
downloadProject-Tick-b0a3461245219f6bb4885de6c4529facabcf6a44.tar.gz
Project-Tick-b0a3461245219f6bb4885de6c4529facabcf6a44.zip
Use unaligned 32-bit and 64-bit compare based on best match length when searching for matches.
Move TRIGGER_LEVEL to match_tpl.h since it is only used in longest match. Use early return inside match loops instead of cont variable. Added back two variable check for platforms that don't supported unaligned access.
-rw-r--r--deflate.h2
-rw-r--r--match_tpl.h147
2 files changed, 103 insertions, 46 deletions
diff --git a/deflate.h b/deflate.h
index ce6106cffa..93f7b5b70a 100644
--- a/deflate.h
+++ b/deflate.h
@@ -404,8 +404,6 @@ void ZLIB_INTERNAL flush_pending(PREFIX3(streamp) strm);
* used.
*/
-#define TRIGGER_LEVEL 5
-
/* Bit buffer and compress bits calculation debugging */
#ifdef ZLIB_DEBUG
# define cmpr_bits_add(s, len) s->compressed_len += (len)
diff --git a/match_tpl.h b/match_tpl.h
index 24bf01de79..44bd04da33 100644
--- a/match_tpl.h
+++ b/match_tpl.h
@@ -7,13 +7,11 @@
#define BESTCMP_TYPE
#ifdef UNALIGNED_OK
-#if MIN_MATCH >= 4
+# ifdef UNALIGNED64_OK
+typedef uint64_t bestcmp_t;
+# else
typedef uint32_t bestcmp_t;
-#elif MIN_MATCH >= 2
-typedef uint16_t bestcmp_t;
-#else
-typedef uint8_t bestcmp_t;
-#endif
+# endif
#else
typedef uint8_t bestcmp_t;
#endif
@@ -35,10 +33,26 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
ZLIB_REGISTER unsigned char *window = s->window;
ZLIB_REGISTER unsigned char *scan = window + strstart;
const Pos *prev = s->prev;
- unsigned chain_length;
Pos limit;
- unsigned int best_len, nice_match;
- bestcmp_t scan_end, scan_start;
+ uint32_t chain_length, nice_match, best_len, offset;
+ bestcmp_t scan_end;
+#ifndef UNALIGNED_OK
+ bestcmp_t scan_end0;
+#else
+ bestcmp_t scan_start;
+#endif
+
+#define EARLY_EXIT_TRIGGER_LEVEL 5
+
+#define RETURN_BEST_LEN \
+ if (best_len <= s->lookahead) \
+ return best_len; \
+ return s->lookahead;
+
+#define GOTO_NEXT_CHAIN \
+ if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
+ continue; \
+ RETURN_BEST_LEN;
/*
* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple
@@ -46,12 +60,33 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
*/
Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+ best_len = s->prev_length ? s->prev_length : 1;
+
+ /*
+ * Calculate read offset which should only extend an extra byte
+ * to find the next best match length.
+ */
+ offset = best_len-1;
+#ifdef UNALIGNED_OK
+ if (best_len >= sizeof(uint32_t)) {
+ offset -= 2;
+#ifdef UNALIGNED64_OK
+ if (best_len >= sizeof(uint64_t))
+ offset -= 4;
+#endif
+ }
+#endif
+
+ scan_end = *(bestcmp_t *)(scan+offset);
+#ifndef UNALIGNED_OK
+ scan_end0 = *(bestcmp_t *)(scan+offset+1);
+#else
+ scan_start = *(bestcmp_t *)(scan);
+#endif
+
/*
* Do not waste too much time if we already have a good match
*/
- best_len = s->prev_length;
- if (best_len == 0)
- best_len = 1;
chain_length = s->max_chain_length;
if (best_len >= s->good_match)
chain_length >>= 2;
@@ -60,7 +95,7 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* Do not look for matches beyond the end of the input. This is
* necessary to make deflate deterministic
*/
- nice_match = (unsigned int)s->nice_match > s->lookahead ? s->lookahead : (unsigned int)s->nice_match;
+ nice_match = (uint32_t)s->nice_match > s->lookahead ? s->lookahead : (uint32_t)s->nice_match;
/*
* Stop when cur_match becomes <= limit. To simplify the code,
@@ -68,14 +103,10 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
*/
limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
- scan_start = *(bestcmp_t *)(scan);
- scan_end = *(bestcmp_t *)(scan+best_len-1);
-
Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
- do {
+ for (;;) {
ZLIB_REGISTER unsigned char *match;
ZLIB_REGISTER unsigned int len;
- int cont;
if (cur_match >= strstart)
break;
@@ -89,50 +120,78 @@ ZLIB_INTERNAL int32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* is limited to the lookahead, so the output of deflate is not
* affected by the uninitialized values.
*/
- cont = 1;
- do {
- match = window + cur_match;
- if (LIKELY(*(bestcmp_t *)(match+best_len-1) != scan_end ||
- *(bestcmp_t *)(match) != scan_start)) {
- if ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0) {
- continue;
- }
- cont = 0;
+#ifdef UNALIGNED_OK
+ if (best_len < sizeof(uint32_t)) {
+ for (;;) {
+ match = window + cur_match;
+ if (UNLIKELY(*(uint16_t *)(match+offset) == (uint16_t)scan_end &&
+ *(uint16_t *)(match) == (uint16_t)scan_start))
+ break;
+ GOTO_NEXT_CHAIN;
}
- break;
- } while (1);
-
- if (!cont)
- break;
-
-#if MIN_MATCH >= 2 && defined(UNALIGNED_OK)
- len = COMPARE256(scan+2, match+2) + 2;
+# ifdef UNALIGNED64_OK
+ } else if (best_len >= sizeof(uint64_t)) {
+ for (;;) {
+ match = window + cur_match;
+ if (UNLIKELY(*(uint64_t *)(match+offset) == (uint64_t)scan_end &&
+ *(uint64_t *)(match) == (uint64_t)scan_start))
+ break;
+ GOTO_NEXT_CHAIN;
+ }
+# endif
+ } else {
+ for (;;) {
+ match = window + cur_match;
+ if (UNLIKELY(*(uint32_t *)(match+offset) == (uint32_t)scan_end &&
+ *(uint32_t *)(match) == (uint32_t)scan_start))
+ break;
+ GOTO_NEXT_CHAIN;
+ }
+ }
#else
- len = COMPARE258(scan, match);
+ for (;;) {
+ match = window + cur_match;
+ if (UNLIKELY(match[offset] == scan_end && match[offset+1] == scan_end0 &&
+ match[0] == scan[0] && match[1] == scan[1]))
+ break;
+ GOTO_NEXT_CHAIN;
+ }
#endif
-
+ len = COMPARE256(scan+2, match+2) + 2;
Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
if (len > best_len) {
s->match_start = cur_match;
best_len = len;
- if (len >= nice_match)
+ if (best_len >= nice_match)
break;
- scan_end = *(bestcmp_t *)(scan+best_len-1);
+ offset = best_len-1;
+#ifdef UNALIGNED_OK
+ if (best_len >= sizeof(uint32_t)) {
+ offset -= 2;
+#ifdef UNALIGNED64_OK
+ if (best_len >= sizeof(uint64_t))
+ offset -= 4;
+#endif
+ }
+#endif
+ scan_end = *(bestcmp_t *)(scan+offset);
+#ifndef UNALIGNED_OK
+ scan_end0 = *(bestcmp_t *)(scan+offset+1);
+#endif
} else {
/*
* The probability of finding a match later if we here
* is pretty low, so for performance it's best to
* outright stop here for the lower compression levels
*/
- if (s->level < TRIGGER_LEVEL)
+ if (s->level < EARLY_EXIT_TRIGGER_LEVEL)
break;
}
- } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0);
+ GOTO_NEXT_CHAIN;
+ }
- if (best_len <= s->lookahead)
- return best_len;
- return s->lookahead;
+ RETURN_BEST_LEN;
}
#undef LONGEST_MATCH