summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--deflate.c2
-rw-r--r--deflate.h8
-rw-r--r--deflate_p.h9
-rw-r--r--trees.c12
4 files changed, 24 insertions, 7 deletions
diff --git a/deflate.c b/deflate.c
index e0f89fab9d..81e1ac5a5f 100644
--- a/deflate.c
+++ b/deflate.c
@@ -167,7 +167,7 @@ Z_INTERNAL deflate_allocs* alloc_deflate(PREFIX3(stream) *strm, int windowBits,
int window_size = DEFLATE_ADJUST_WINDOW_SIZE((1 << windowBits) * 2);
int prev_size = (1 << windowBits) * (int)sizeof(Pos);
int head_size = HASH_SIZE * sizeof(Pos);
- int pending_size = lit_bufsize * LIT_BUFS;
+ int pending_size = (lit_bufsize * LIT_BUFS) + 1;
int state_size = sizeof(deflate_state);
int alloc_size = sizeof(deflate_allocs);
diff --git a/deflate.h b/deflate.h
index 85435636d4..3f9f8f4686 100644
--- a/deflate.h
+++ b/deflate.h
@@ -27,9 +27,11 @@
# define GZIP
#endif
-/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
- the cost of a larger memory footprint */
-#ifndef NO_LIT_MEM
+/* LIT_MEM uses separate distance/length buffers instead of the overlaid sym_buf.
+ This uses ~20% more memory but is 1-2% faster on platforms without fast unaligned
+ access. By default, LIT_MEM is only enabled when OPTIMAL_CMP < 32. Define LIT_MEM
+ to force separate buffers, or NO_LIT_MEM to force sym_buf usage. */
+#if !defined(LIT_MEM) && !defined(NO_LIT_MEM) && (OPTIMAL_CMP < 32)
# define LIT_MEM
#endif
diff --git a/deflate_p.h b/deflate_p.h
index ae340f8f37..f60970bab3 100644
--- a/deflate_p.h
+++ b/deflate_p.h
@@ -11,6 +11,7 @@
#include "functable.h"
#include "fallback_builtins.h"
+#include "zmemory.h"
/* Forward declare common non-inlined functions declared in deflate.c */
@@ -68,9 +69,13 @@ static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) {
s->l_buf[sym_next] = c;
s->sym_next = sym_next + 1;
#else
+# if OPTIMAL_CMP >= 32
+ zng_memwrite_4(&s->sym_buf[sym_next], Z_U32_TO_LE((uint32_t)c << 16));
+# else
s->sym_buf[sym_next] = 0;
s->sym_buf[sym_next+1] = 0;
s->sym_buf[sym_next+2] = c;
+# endif
s->sym_next = sym_next + 3;
#endif
s->dyn_ltree[c].Freq++;
@@ -90,9 +95,13 @@ static inline int zng_tr_tally_dist(deflate_state* s, uint32_t dist, uint32_t le
s->l_buf[sym_next] = (uint8_t)len;
s->sym_next = sym_next + 1;
#else
+# if OPTIMAL_CMP >= 32
+ zng_memwrite_4(&s->sym_buf[sym_next], Z_U32_TO_LE(dist | ((uint32_t)len << 16)));
+# else
s->sym_buf[sym_next] = (uint8_t)(dist);
s->sym_buf[sym_next+1] = (uint8_t)(dist >> 8);
s->sym_buf[sym_next+2] = (uint8_t)len;
+# endif
s->sym_next = sym_next + 3;
#endif
s->matches++;
diff --git a/trees.c b/trees.c
index 28ea5d2f1f..0f35b68d93 100644
--- a/trees.c
+++ b/trees.c
@@ -730,9 +730,15 @@ static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data
dist = d_buf[sx];
lc = l_buf[sx++];
#else
- dist = sym_buf[sx++] & 0xff;
- dist += (unsigned)(sym_buf[sx++] & 0xff) << 8;
- lc = sym_buf[sx++];
+# if OPTIMAL_CMP >= 32
+ uint32_t val = Z_U32_FROM_LE(zng_memread_4(&sym_buf[sx]));
+ dist = val & 0xffff;
+ lc = (val >> 16) & 0xff;
+# else
+ dist = sym_buf[sx] + ((unsigned)sym_buf[sx + 1] << 8);
+ lc = sym_buf[sx + 2];
+# endif
+ sx += 3;
#endif
if (dist == 0) {
zng_emit_lit(s, ltree, lc);