summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@nathanm.com>2025-11-28 19:31:10 -0800
committerHans Kristian Rosbach <hk-github@circlestorm.org>2025-12-01 11:21:30 +0100
commit2c8703510b62bc726e1d2ff9e04e10f221b43cb2 (patch)
treedda73eb097d1df00cca63b2bf3d6f48badba1024
parent6e6ec1c33cb2b183b31dbe3ccf85cba5e6db1f85 (diff)
downloadProject-Tick-2c8703510b62bc726e1d2ff9e04e10f221b43cb2.tar.gz
Project-Tick-2c8703510b62bc726e1d2ff9e04e10f221b43cb2.zip
Add quick_insert_value for optimized hash insertion
Reduces the number of reads by two Co-authored-by: Brian Pane <brianp@brianp.net> trifectatechfoundation/zlib-rs#374 trifectatechfoundation/zlib-rs#375
-rw-r--r--deflate.c2
-rw-r--r--deflate.h4
-rw-r--r--deflate_fast.c15
-rw-r--r--deflate_quick.c26
-rw-r--r--insert_string.c1
-rw-r--r--insert_string_roll.c1
-rw-r--r--insert_string_tpl.h22
7 files changed, 63 insertions, 8 deletions
diff --git a/deflate.c b/deflate.c
index 9372c21f0c..79baa181cc 100644
--- a/deflate.c
+++ b/deflate.c
@@ -1137,10 +1137,12 @@ static void lm_set_level(deflate_state *s, int level) {
s->update_hash = &update_hash_roll;
s->insert_string = &insert_string_roll;
s->quick_insert_string = &quick_insert_string_roll;
+ s->quick_insert_value = &quick_insert_value_roll;
} else {
s->update_hash = update_hash;
s->insert_string = insert_string;
s->quick_insert_string = quick_insert_string;
+ s->quick_insert_value = quick_insert_value;
}
s->level = level;
diff --git a/deflate.h b/deflate.h
index 6d5558d66c..72bd0198f6 100644
--- a/deflate.h
+++ b/deflate.h
@@ -122,14 +122,17 @@ typedef struct internal_state deflate_state;
typedef uint32_t (* update_hash_cb) (uint32_t h, uint32_t val);
typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count);
typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str);
+typedef Pos (* quick_insert_value_cb) (deflate_state *const s, uint32_t str, uint32_t val);
uint32_t update_hash (uint32_t h, uint32_t val);
void insert_string (deflate_state *const s, uint32_t str, uint32_t count);
Pos quick_insert_string (deflate_state *const s, uint32_t str);
+Pos quick_insert_value (deflate_state *const s, uint32_t str, uint32_t val);
uint32_t update_hash_roll (uint32_t h, uint32_t val);
void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count);
Pos quick_insert_string_roll(deflate_state *const s, uint32_t str);
+Pos quick_insert_value_roll (deflate_state *const s, uint32_t str, uint32_t val);
/* Struct for memory allocation handling */
typedef struct deflate_allocs_s {
@@ -233,6 +236,7 @@ struct ALIGNED_(64) internal_state {
update_hash_cb update_hash;
insert_string_cb insert_string;
quick_insert_string_cb quick_insert_string;
+ quick_insert_value_cb quick_insert_value;
/* Hash function callbacks that can be configured depending on the deflate
* algorithm being used */
diff --git a/deflate_fast.c b/deflate_fast.c
index efb95b856e..d3bc12ffb2 100644
--- a/deflate_fast.c
+++ b/deflate_fast.c
@@ -5,6 +5,7 @@
*/
#include "zbuild.h"
+#include "zmemory.h"
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
@@ -21,6 +22,8 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
uint32_t match_len = 0;
for (;;) {
+ uint8_t lc;
+
/* Make sure that we always have enough lookahead, except
* at the end of the input file. We need STD_MAX_MATCH bytes
* for the next match, plus WANT_MIN_MATCH bytes to insert the
@@ -39,8 +42,14 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
* dictionary, and set hash_head to the head of the hash chain:
*/
if (s->lookahead >= WANT_MIN_MATCH) {
- Pos hash_head = quick_insert_string(s, s->strstart);
+#if BYTE_ORDER == LITTLE_ENDIAN
+ uint32_t str_val = zng_memread_4(&s->window[s->strstart]);
+#else
+ uint32_t str_val = ZSWAP32(zng_memread_4(&s->window[s->strstart]));
+#endif
+ Pos hash_head = quick_insert_value(s, s->strstart, str_val);
int64_t dist = (int64_t)s->strstart - hash_head;
+ lc = (uint8_t)str_val;
/* Find the longest match, discarding those <= prev_length.
* At this point we have always match length < WANT_MIN_MATCH
@@ -53,6 +62,8 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
match_len = FUNCTABLE_CALL(longest_match)(s, hash_head);
/* longest_match() sets match_start */
}
+ } else {
+ lc = s->window[s->strstart];
}
if (match_len >= WANT_MIN_MATCH) {
@@ -84,7 +95,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
match_len = 0;
} else {
/* No match, output a literal byte */
- bflush = zng_tr_tally_lit(s, s->window[s->strstart]);
+ bflush = zng_tr_tally_lit(s, lc);
s->lookahead--;
s->strstart++;
}
diff --git a/deflate_quick.c b/deflate_quick.c
index f9312a2eba..5f49dfa69b 100644
--- a/deflate_quick.c
+++ b/deflate_quick.c
@@ -58,6 +58,8 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
}
for (;;) {
+ uint8_t lc;
+
if (UNLIKELY(s->pending + ((BIT_BUF_SIZE + 7) >> 3) >= s->pending_buf_size)) {
PREFIX(flush_pending)(s->strm);
if (s->strm->avail_out == 0) {
@@ -81,14 +83,25 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
}
if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) {
- Pos hash_head = quick_insert_string(s, s->strstart);
+#if BYTE_ORDER == LITTLE_ENDIAN
+ uint32_t str_val = zng_memread_4(&s->window[s->strstart]);
+#else
+ uint32_t str_val = ZSWAP32(zng_memread_4(&s->window[s->strstart]));
+#endif
+ Pos hash_head = quick_insert_value(s, s->strstart, str_val);
int64_t dist = (int64_t)s->strstart - hash_head;
+ lc = (uint8_t)str_val;
if (dist <= MAX_DIST(s) && dist > 0) {
- const uint8_t *str_start = s->window + s->strstart;
const uint8_t *match_start = s->window + hash_head;
-
- if (zng_memcmp_2(str_start, match_start) == 0) {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ uint32_t match_val = zng_memread_4(match_start);
+#else
+ uint32_t match_val = ZSWAP32(zng_memread_4(match_start));
+#endif
+
+ if (str_val == match_val) {
+ const uint8_t *str_start = s->window + s->strstart;
uint32_t match_len = FUNCTABLE_CALL(compare256)(str_start+2, match_start+2) + 2;
if (match_len >= WANT_MIN_MATCH) {
@@ -107,9 +120,10 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
}
}
}
+ } else {
+ lc = s->window[s->strstart];
}
-
- zng_tr_emit_lit(s, static_ltree, s->window[s->strstart]);
+ zng_tr_emit_lit(s, static_ltree, lc);
s->strstart++;
s->lookahead--;
}
diff --git a/insert_string.c b/insert_string.c
index 0c72347fc6..2826921292 100644
--- a/insert_string.c
+++ b/insert_string.c
@@ -17,5 +17,6 @@
#define UPDATE_HASH update_hash
#define INSERT_STRING insert_string
#define QUICK_INSERT_STRING quick_insert_string
+#define QUICK_INSERT_VALUE quick_insert_value
#include "insert_string_tpl.h"
diff --git a/insert_string_roll.c b/insert_string_roll.c
index 8693f96f59..bf12967c76 100644
--- a/insert_string_roll.c
+++ b/insert_string_roll.c
@@ -20,5 +20,6 @@
#define UPDATE_HASH update_hash_roll
#define INSERT_STRING insert_string_roll
#define QUICK_INSERT_STRING quick_insert_string_roll
+#define QUICK_INSERT_VALUE quick_insert_value_roll
#include "insert_string_tpl.h"
diff --git a/insert_string_tpl.h b/insert_string_tpl.h
index eac1bfe045..64cc743458 100644
--- a/insert_string_tpl.h
+++ b/insert_string_tpl.h
@@ -52,6 +52,28 @@ Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) {
}
/* ===========================================================================
+ * Quick insert string str in the dictionary using a pre-read value and set match_head
+ * to the previous head of the hash chain (the most recent string with same hash key).
+ * Return the previous length of the hash chain.
+ */
+Z_INTERNAL Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t val) {
+ uint32_t hm;
+ Pos head;
+
+ HASH_CALC_VAR_INIT;
+ HASH_CALC(HASH_CALC_VAR, val);
+ HASH_CALC_VAR &= HASH_CALC_MASK;
+ hm = HASH_CALC_VAR;
+
+ head = s->head[hm];
+ if (LIKELY(head != str)) {
+ s->prev[str & s->w_mask] = head;
+ s->head[hm] = (Pos)str;
+ }
+ return head;
+}
+
+/* ===========================================================================
* Quick insert string str in the dictionary and set match_head to the previous head
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.