diff options
| author | Nathan Moinvaziri <nathan@nathanm.com> | 2026-01-31 16:57:24 -0800 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2026-02-18 13:57:07 +0100 |
| commit | ee2dd805d6418dee2cce27d6bbef20f400fc934e (patch) | |
| tree | a197707a38a143169bf51e3a73dad97345fdab97 | |
| parent | 2f6f0e84571d420edea04cbd5ffbf0a13934a535 (diff) | |
| download | Project-Tick-ee2dd805d6418dee2cce27d6bbef20f400fc934e.tar.gz Project-Tick-ee2dd805d6418dee2cce27d6bbef20f400fc934e.zip | |
Optimize symbol buffer access based on platform unaligned access
| -rw-r--r-- | deflate.c | 2 | ||||
| -rw-r--r-- | deflate.h | 8 | ||||
| -rw-r--r-- | deflate_p.h | 9 | ||||
| -rw-r--r-- | trees.c | 12 |
4 files changed, 24 insertions, 7 deletions
@@ -167,7 +167,7 @@ Z_INTERNAL deflate_allocs* alloc_deflate(PREFIX3(stream) *strm, int windowBits, int window_size = DEFLATE_ADJUST_WINDOW_SIZE((1 << windowBits) * 2); int prev_size = (1 << windowBits) * (int)sizeof(Pos); int head_size = HASH_SIZE * sizeof(Pos); - int pending_size = lit_bufsize * LIT_BUFS; + int pending_size = (lit_bufsize * LIT_BUFS) + 1; int state_size = sizeof(deflate_state); int alloc_size = sizeof(deflate_allocs); @@ -27,9 +27,11 @@ # define GZIP #endif -/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at - the cost of a larger memory footprint */ -#ifndef NO_LIT_MEM +/* LIT_MEM uses separate distance/length buffers instead of the overlaid sym_buf. + This uses ~20% more memory but is 1-2% faster on platforms without fast unaligned + access. By default, LIT_MEM is only enabled when OPTIMAL_CMP < 32. Define LIT_MEM + to force separate buffers, or NO_LIT_MEM to force sym_buf usage. */ +#if !defined(LIT_MEM) && !defined(NO_LIT_MEM) && (OPTIMAL_CMP < 32) # define LIT_MEM #endif diff --git a/deflate_p.h b/deflate_p.h index ae340f8f37..f60970bab3 100644 --- a/deflate_p.h +++ b/deflate_p.h @@ -11,6 +11,7 @@ #include "functable.h" #include "fallback_builtins.h" +#include "zmemory.h" /* Forward declare common non-inlined functions declared in deflate.c */ @@ -68,9 +69,13 @@ static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) { s->l_buf[sym_next] = c; s->sym_next = sym_next + 1; #else +# if OPTIMAL_CMP >= 32 + zng_memwrite_4(&s->sym_buf[sym_next], Z_U32_TO_LE((uint32_t)c << 16)); +# else s->sym_buf[sym_next] = 0; s->sym_buf[sym_next+1] = 0; s->sym_buf[sym_next+2] = c; +# endif s->sym_next = sym_next + 3; #endif s->dyn_ltree[c].Freq++; @@ -90,9 +95,13 @@ static inline int zng_tr_tally_dist(deflate_state* s, uint32_t dist, uint32_t le s->l_buf[sym_next] = (uint8_t)len; s->sym_next = sym_next + 1; #else +# if OPTIMAL_CMP >= 32 + zng_memwrite_4(&s->sym_buf[sym_next], Z_U32_TO_LE(dist | ((uint32_t)len << 16))); +# else s->sym_buf[sym_next] = (uint8_t)(dist); s->sym_buf[sym_next+1] = (uint8_t)(dist >> 8); s->sym_buf[sym_next+2] = (uint8_t)len; +# endif s->sym_next = sym_next + 3; #endif s->matches++; @@ -730,9 +730,15 @@ static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data dist = d_buf[sx]; lc = l_buf[sx++]; #else - dist = sym_buf[sx++] & 0xff; - dist += (unsigned)(sym_buf[sx++] & 0xff) << 8; - lc = sym_buf[sx++]; +# if OPTIMAL_CMP >= 32 + uint32_t val = Z_U32_FROM_LE(zng_memread_4(&sym_buf[sx])); + dist = val & 0xffff; + lc = (val >> 16) & 0xff; +# else + dist = sym_buf[sx] + ((unsigned)sym_buf[sx + 1] << 8); + lc = sym_buf[sx + 2]; +# endif + sx += 3; #endif if (dist == 0) { zng_emit_lit(s, ltree, lc); |
