summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans Kristian Rosbach <hk-git@circlestorm.org>2025-11-30 22:31:49 +0100
committerHans Kristian Rosbach <hk-github@circlestorm.org>2025-12-07 12:41:30 +0100
commitbf95fa0fba9764608aa4164d9ead5839d2edde87 (patch)
tree47a0d2c7df4e1070705df678bd8dd0879d3d5ce6
parente2cd66c048de41a05a13cf5e6b32a2b72577c82b (diff)
downloadProject-Tick-bf95fa0fba9764608aa4164d9ead5839d2edde87.tar.gz
Project-Tick-bf95fa0fba9764608aa4164d9ead5839d2edde87.zip
Inline all uses of quick_insert_string*/quick_insert_value*.
Inline all uses of update_hash*. Inline insert_string into deflate_quick, deflate_fast and deflate_medium. Remove insert_string from deflate_state Use local function pointer for insert_string. Fix level check to actually check level and not `s->max_chain_length <= 1024`.
-rw-r--r--CMakeLists.txt2
-rw-r--r--Makefile.in2
-rw-r--r--deflate.c40
-rw-r--r--deflate.h18
-rw-r--r--deflate_fast.c3
-rw-r--r--deflate_medium.c1
-rw-r--r--deflate_quick.c1
-rw-r--r--deflate_slow.c20
-rw-r--r--insert_string.c20
-rw-r--r--insert_string_p.h57
-rw-r--r--insert_string_roll.c25
-rw-r--r--insert_string_tpl.h20
-rw-r--r--match_tpl.h21
-rw-r--r--test/benchmarks/benchmark_insert_string.cc3
14 files changed, 120 insertions, 113 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f59bd9e68d..5bb6906071 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1329,6 +1329,7 @@ set(ZLIB_PRIVATE_HDRS
inflate.h
inflate_p.h
inftrees.h
+ insert_string_p.h
insert_string_tpl.h
match_tpl.h
trees.h
@@ -1358,7 +1359,6 @@ set(ZLIB_SRCS
inflate.c
inftrees.c
insert_string.c
- insert_string_roll.c
trees.c
uncompr.c
zutil.c
diff --git a/Makefile.in b/Makefile.in
index 1e248a8935..19fced0002 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -98,7 +98,6 @@ OBJZ = \
inflate.o \
inftrees.o \
insert_string.o \
- insert_string_roll.o \
trees.o \
uncompr.o \
zutil.o \
@@ -138,7 +137,6 @@ PIC_OBJZ = \
inflate.lo \
inftrees.lo \
insert_string.lo \
- insert_string_roll.lo \
trees.lo \
uncompr.lo \
zutil.lo \
diff --git a/deflate.c b/deflate.c
index 79baa181cc..19a9ddf5b9 100644
--- a/deflate.c
+++ b/deflate.c
@@ -51,6 +51,7 @@
#include "functable.h"
#include "deflate.h"
#include "deflate_p.h"
+#include "insert_string_p.h"
/* Avoid conflicts with zlib.h macros */
#ifdef ZLIB_COMPAT
@@ -415,6 +416,7 @@ static int deflateStateCheck(PREFIX3(stream) *strm) {
/* ========================================================================= */
int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8_t *dictionary, uint32_t dictLength) {
deflate_state *s;
+ insert_string_cb insert_string_func;
unsigned int str, n;
int wrap;
uint32_t avail;
@@ -427,6 +429,11 @@ int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8
if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead)
return Z_STREAM_ERROR;
+ if (s->level >= 9)
+ insert_string_func = insert_string_roll;
+ else
+ insert_string_func = insert_string;
+
/* when using zlib wrappers, compute Adler-32 for provided dictionary */
if (wrap == 1)
strm->adler = FUNCTABLE_CALL(adler32)(strm->adler, dictionary, dictLength);
@@ -454,7 +461,7 @@ int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8
while (s->lookahead >= STD_MIN_MATCH) {
str = s->strstart;
n = s->lookahead - (STD_MIN_MATCH - 1);
- s->insert_string(s, str, n);
+ insert_string_func(s, str, n);
s->strstart = str + n;
s->lookahead = STD_MIN_MATCH - 1;
PREFIX(fill_window)(s);
@@ -1129,22 +1136,6 @@ static void lm_set_level(deflate_state *s, int level) {
s->good_match = configuration_table[level].good_length;
s->nice_match = configuration_table[level].nice_length;
s->max_chain_length = configuration_table[level].max_chain;
-
- /* Use rolling hash for deflate_slow algorithm with level 9. It allows us to
- * properly lookup different hash chains to speed up longest_match search. Since hashing
- * method changes depending on the level we cannot put this into functable. */
- if (s->max_chain_length > 1024) {
- s->update_hash = &update_hash_roll;
- s->insert_string = &insert_string_roll;
- s->quick_insert_string = &quick_insert_string_roll;
- s->quick_insert_value = &quick_insert_value_roll;
- } else {
- s->update_hash = update_hash;
- s->insert_string = insert_string;
- s->quick_insert_string = quick_insert_string;
- s->quick_insert_value = quick_insert_value;
- }
-
s->level = level;
}
@@ -1182,12 +1173,19 @@ static void lm_init(deflate_state *s) {
*/
void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) {
+ insert_string_cb insert_string_func;
unsigned n;
unsigned int more; /* Amount of free space at the end of the window. */
unsigned int wsize = s->w_size;
+ int level = s->level;
Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
+ if (level >= 9)
+ insert_string_func = insert_string_roll;
+ else
+ insert_string_func = insert_string;
+
do {
more = s->window_size - s->lookahead - s->strstart;
@@ -1231,17 +1229,17 @@ void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) {
/* Initialize the hash value now that we have some input: */
if (s->lookahead + s->insert >= STD_MIN_MATCH) {
unsigned int str = s->strstart - s->insert;
- if (UNLIKELY(s->max_chain_length > 1024)) {
- s->ins_h = s->update_hash(s->window[str], s->window[str+1]);
+ if (UNLIKELY(level >= 9)) {
+ s->ins_h = update_hash_roll(s->window[str], s->window[str+1]);
} else if (str >= 1) {
- s->quick_insert_string(s, str + 2 - STD_MIN_MATCH);
+ quick_insert_string(s, str + 2 - STD_MIN_MATCH);
}
unsigned int count = s->insert;
if (UNLIKELY(s->lookahead == 1)) {
count -= 1;
}
if (count > 0) {
- s->insert_string(s, str, count);
+ insert_string_func(s, str, count);
s->insert -= count;
}
}
diff --git a/deflate.h b/deflate.h
index 72bd0198f6..d91b35fc29 100644
--- a/deflate.h
+++ b/deflate.h
@@ -119,20 +119,9 @@ typedef uint16_t Pos;
/* Type definitions for hash callbacks */
typedef struct internal_state deflate_state;
-typedef uint32_t (* update_hash_cb) (uint32_t h, uint32_t val);
typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count);
-typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str);
-typedef Pos (* quick_insert_value_cb) (deflate_state *const s, uint32_t str, uint32_t val);
-
-uint32_t update_hash (uint32_t h, uint32_t val);
void insert_string (deflate_state *const s, uint32_t str, uint32_t count);
-Pos quick_insert_string (deflate_state *const s, uint32_t str);
-Pos quick_insert_value (deflate_state *const s, uint32_t str, uint32_t val);
-
-uint32_t update_hash_roll (uint32_t h, uint32_t val);
void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count);
-Pos quick_insert_string_roll(deflate_state *const s, uint32_t str);
-Pos quick_insert_value_roll (deflate_state *const s, uint32_t str, uint32_t val);
/* Struct for memory allocation handling */
typedef struct deflate_allocs_s {
@@ -233,13 +222,6 @@ struct ALIGNED_(64) internal_state {
* max_insert_length is used only for compression levels <= 6.
*/
- update_hash_cb update_hash;
- insert_string_cb insert_string;
- quick_insert_string_cb quick_insert_string;
- quick_insert_value_cb quick_insert_value;
- /* Hash function callbacks that can be configured depending on the deflate
- * algorithm being used */
-
int level; /* compression level (1..9) */
int strategy; /* favor or force Huffman coding*/
diff --git a/deflate_fast.c b/deflate_fast.c
index d3bc12ffb2..ad650fa1ed 100644
--- a/deflate_fast.c
+++ b/deflate_fast.c
@@ -9,6 +9,7 @@
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
+#include "insert_string_p.h"
/* ===========================================================================
* Compress as much as possible from the input stream, return the current
@@ -82,7 +83,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
match_len--; /* string at strstart already in table */
s->strstart++;
- insert_string(s, s->strstart, match_len);
+ insert_string_static(s, s->strstart, match_len);
s->strstart += match_len;
} else {
s->strstart += match_len;
diff --git a/deflate_medium.c b/deflate_medium.c
index ae7c737ecb..40c9b6278a 100644
--- a/deflate_medium.c
+++ b/deflate_medium.c
@@ -11,6 +11,7 @@
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
+#include "insert_string_p.h"
struct match {
uint16_t match_start;
diff --git a/deflate_quick.c b/deflate_quick.c
index 5f49dfa69b..961ec87c9a 100644
--- a/deflate_quick.c
+++ b/deflate_quick.c
@@ -23,6 +23,7 @@
#include "deflate_p.h"
#include "functable.h"
#include "trees_emit.h"
+#include "insert_string_p.h"
extern const ct_data static_ltree[L_CODES+2];
extern const ct_data static_dtree[D_CODES];
diff --git a/deflate_slow.c b/deflate_slow.c
index e9d7fc76ce..f50e71b36e 100644
--- a/deflate_slow.c
+++ b/deflate_slow.c
@@ -8,6 +8,7 @@
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
+#include "insert_string_p.h"
/* ===========================================================================
* Same as deflate_medium, but achieves better compression. We use a lazy
@@ -15,13 +16,17 @@
* no better match at the next window position.
*/
Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
- int bflush; /* set if current block must be flushed */
match_func longest_match;
+ insert_string_cb insert_string_func;
+ int bflush; /* set if current block must be flushed */
- if (s->max_chain_length <= 1024)
- longest_match = FUNCTABLE_FPTR(longest_match);
- else
+ if (s->level >= 9) {
longest_match = FUNCTABLE_FPTR(longest_match_slow);
+ insert_string_func = insert_string_roll;
+ } else {
+ longest_match = FUNCTABLE_FPTR(longest_match);
+ insert_string_func = insert_string;
+ }
/* Process the input block. */
for (;;) {
@@ -44,7 +49,10 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
*/
Pos hash_head = 0;
if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) {
- hash_head = s->quick_insert_string(s, s->strstart);
+ if (s->level >= 9)
+ hash_head = quick_insert_string_roll(s, s->strstart);
+ else
+ hash_head = quick_insert_string(s, s->strstart);
}
/* Find the longest match, discarding those <= prev_length.
@@ -93,7 +101,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
unsigned int insert_cnt = mov_fwd;
if (UNLIKELY(insert_cnt > max_insert - s->strstart))
insert_cnt = max_insert - s->strstart;
- s->insert_string(s, s->strstart + 1, insert_cnt);
+ insert_string_func(s, s->strstart + 1, insert_cnt);
}
s->prev_length = 0;
s->match_available = 0;
diff --git a/insert_string.c b/insert_string.c
index 2826921292..dd1960fbe6 100644
--- a/insert_string.c
+++ b/insert_string.c
@@ -1,4 +1,4 @@
-/* insert_string.c -- insert_string integer hash variant
+/* insert_string.c -- make insert_string functions from static inlined functions
*
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
@@ -7,16 +7,12 @@
#include "zbuild.h"
#include "deflate.h"
+#include "insert_string_p.h"
-#define HASH_SLIDE 16
+void insert_string(deflate_state *const s, uint32_t str, uint32_t count) {
+ insert_string_static(s, str, count);
+}
-#define HASH_CALC(h, val) h = ((val * 2654435761U) >> HASH_SLIDE);
-#define HASH_CALC_VAR h
-#define HASH_CALC_VAR_INIT uint32_t h
-
-#define UPDATE_HASH update_hash
-#define INSERT_STRING insert_string
-#define QUICK_INSERT_STRING quick_insert_string
-#define QUICK_INSERT_VALUE quick_insert_value
-
-#include "insert_string_tpl.h"
+void insert_string_roll(deflate_state *const s, uint32_t str, uint32_t count) {
+ insert_string_roll_static(s, str, count);
+}
diff --git a/insert_string_p.h b/insert_string_p.h
new file mode 100644
index 0000000000..cd02234407
--- /dev/null
+++ b/insert_string_p.h
@@ -0,0 +1,57 @@
+#ifndef INSERT_STRING_P_H_
+#define INSERT_STRING_P_H_
+
+/* insert_string_p.h -- insert_string function generator
+ *
+ * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ */
+
+// Normal insert_string, levels 1-8
+#define HASH_SLIDE 16
+
+#define HASH_CALC(h, val) h = ((val * 2654435761U) >> HASH_SLIDE);
+#define HASH_CALC_MASK HASH_MASK
+#define HASH_CALC_VAR h
+#define HASH_CALC_VAR_INIT uint32_t h
+#define HASH_CALC_OFFSET 0
+
+#define UPDATE_HASH update_hash
+#define INSERT_STRING insert_string_static
+#define QUICK_INSERT_STRING quick_insert_string
+#define QUICK_INSERT_VALUE quick_insert_value
+
+#include "insert_string_tpl.h"
+
+// Cleanup
+#undef HASH_SLIDE
+#undef HASH_CALC
+#undef HASH_CALC_READ
+#undef HASH_CALC_MASK
+#undef HASH_CALC_OFFSET
+#undef HASH_CALC_VAR
+#undef HASH_CALC_VAR_INIT
+#undef UPDATE_HASH
+#undef INSERT_STRING
+#undef QUICK_INSERT_STRING
+#undef QUICK_INSERT_VALUE
+
+// Rolling insert_string, level 9
+#define HASH_SLIDE 5
+
+#define HASH_CALC(h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val))
+#define HASH_CALC_VAR s->ins_h
+#define HASH_CALC_VAR_INIT
+#define HASH_CALC_READ val = strstart[0]
+#define HASH_CALC_MASK (32768u - 1u)
+#define HASH_CALC_OFFSET (STD_MIN_MATCH-1)
+
+#define UPDATE_HASH update_hash_roll
+#define INSERT_STRING insert_string_roll_static
+#define QUICK_INSERT_STRING quick_insert_string_roll
+#define QUICK_INSERT_VALUE quick_insert_value_roll
+
+#include "insert_string_tpl.h"
+
+#endif
diff --git a/insert_string_roll.c b/insert_string_roll.c
deleted file mode 100644
index bf12967c76..0000000000
--- a/insert_string_roll.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* insert_string_roll.c -- insert_string rolling hash variant
- *
- * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- */
-
-#include "zbuild.h"
-#include "deflate.h"
-
-#define HASH_SLIDE 5
-
-#define HASH_CALC(h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val))
-#define HASH_CALC_VAR s->ins_h
-#define HASH_CALC_VAR_INIT
-#define HASH_CALC_READ val = strstart[0]
-#define HASH_CALC_MASK (32768u - 1u)
-#define HASH_CALC_OFFSET (STD_MIN_MATCH-1)
-
-#define UPDATE_HASH update_hash_roll
-#define INSERT_STRING insert_string_roll
-#define QUICK_INSERT_STRING quick_insert_string_roll
-#define QUICK_INSERT_VALUE quick_insert_value_roll
-
-#include "insert_string_tpl.h"
diff --git a/insert_string_tpl.h b/insert_string_tpl.h
index 64cc743458..04a9b0a6b6 100644
--- a/insert_string_tpl.h
+++ b/insert_string_tpl.h
@@ -1,6 +1,3 @@
-#ifndef INSERT_STRING_H_
-#define INSERT_STRING_H_
-
/* insert_string_tpl.h -- Private insert_string functions shared with more than
* one insert string implementation
*
@@ -22,14 +19,6 @@
*
*/
-#include "zmemory.h"
-
-#ifndef HASH_CALC_OFFSET
-# define HASH_CALC_OFFSET 0
-#endif
-#ifndef HASH_CALC_MASK
-# define HASH_CALC_MASK HASH_MASK
-#endif
#ifndef HASH_CALC_READ
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
@@ -46,7 +35,7 @@
* input characters, so that a running hash key can be computed from the
* previous key instead of complete recalculation each time.
*/
-Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) {
+Z_FORCEINLINE static uint32_t UPDATE_HASH(uint32_t h, uint32_t val) {
HASH_CALC(h, val);
return h & HASH_CALC_MASK;
}
@@ -56,7 +45,7 @@ Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) {
* to the previous head of the hash chain (the most recent string with same hash key).
* Return the previous length of the hash chain.
*/
-Z_INTERNAL Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t val) {
+Z_FORCEINLINE static Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t val) {
uint32_t hm;
Pos head;
@@ -78,7 +67,7 @@ Z_INTERNAL Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.
*/
-Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) {
+Z_FORCEINLINE static Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) {
uint8_t *strstart = s->window + str + HASH_CALC_OFFSET;
uint32_t val, hm;
Pos head;
@@ -105,7 +94,7 @@ Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) {
* input characters and the first STD_MIN_MATCH bytes of str are valid
* (except for the last STD_MIN_MATCH-1 bytes of the input file).
*/
-Z_INTERNAL void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t count) {
+Z_FORCEINLINE static void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t count) {
uint8_t *strstart = s->window + str + HASH_CALC_OFFSET;
uint8_t *strend = strstart + count;
@@ -130,4 +119,3 @@ Z_INTERNAL void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t cou
}
}
}
-#endif
diff --git a/match_tpl.h b/match_tpl.h
index 47e9aed9f5..94de125e4b 100644
--- a/match_tpl.h
+++ b/match_tpl.h
@@ -8,13 +8,10 @@
* https://github.com/gildor2/fast_zlib
*/
-#ifndef MATCH_TPL_H
-#define MATCH_TPL_H
+#include "insert_string_p.h"
#define EARLY_EXIT_TRIGGER_LEVEL 5
-#endif
-
/* Set match_start to the longest match starting at the given string and
* return its length. Matches shorter or equal to prev_length are discarded,
* in which case the result is equal to prev_length and match_start is garbage.
@@ -91,12 +88,13 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
* these strings are not yet inserted into the hash table.
*/
- hash = s->update_hash(0, scan[1]);
- hash = s->update_hash(hash, scan[2]);
+ // use update_hash_roll for deflate_slow
+ hash = update_hash_roll(0, scan[1]);
+ hash = update_hash_roll(hash, scan[2]);
for (i = 3; i <= best_len; i++) {
- hash = s->update_hash(hash, scan[i]);
-
+ // use update_hash_roll for deflate_slow
+ hash = update_hash_roll(hash, scan[i]);
/* If we're starting with best_len >= 3, we can use offset search. */
pos = s->head[hash];
if (pos < cur_match) {
@@ -203,9 +201,10 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
*/
scan_endstr = scan + len - (STD_MIN_MATCH+1);
- hash = s->update_hash(0, scan_endstr[0]);
- hash = s->update_hash(hash, scan_endstr[1]);
- hash = s->update_hash(hash, scan_endstr[2]);
+ // use update_hash_roll for deflate_slow
+ hash = update_hash_roll(0, scan_endstr[0]);
+ hash = update_hash_roll(hash, scan_endstr[1]);
+ hash = update_hash_roll(hash, scan_endstr[2]);
pos = s->head[hash];
if (pos < cur_match) {
diff --git a/test/benchmarks/benchmark_insert_string.cc b/test/benchmarks/benchmark_insert_string.cc
index 1ff6e694aa..fb0f99e8e7 100644
--- a/test/benchmarks/benchmark_insert_string.cc
+++ b/test/benchmarks/benchmark_insert_string.cc
@@ -14,11 +14,14 @@ extern "C" {
# include "deflate.h"
# include "arch_functions.h"
# include "../test_cpu_features.h"
+# include "insert_string_p.h"
}
#define MAX_WSIZE 32768
#define TEST_WINDOW_SIZE (MAX_WSIZE * 2)
+typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str);
+
// Base class with common setup/teardown for both insert_string benchmarks
class insert_string_base: public benchmark::Fixture {
protected: