diff options
| author | Nathan Moinvaziri <nathan@nathanm.com> | 2026-01-20 11:34:22 -0800 |
|---|---|---|
| committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2026-02-20 11:02:12 +0100 |
| commit | d225a913909176588060c2d5eb1d58bacd11c8c8 (patch) | |
| tree | 947640780c467b6499b74de156f54f3c8ab57b13 | |
| parent | 87b0f1d597b6a5d24ff1622e8c6a50db4ab20c65 (diff) | |
| download | Project-Tick-d225a913909176588060c2d5eb1d58bacd11c8c8.tar.gz Project-Tick-d225a913909176588060c2d5eb1d58bacd11c8c8.zip | |
Keep bi_buf/bi_valid in registers across compress_block loop
Refactor the emit functions to take bi_buf and bi_valid by reference,
allowing compress_block() to keep these values in CPU registers for the
entire duration of the main compression loop instead of reloading them
from memory on every iteration.
This eliminates two memory loads (s->bi_buf, s->bi_valid) and two memory
stores per symbol in the hot path.
| -rw-r--r-- | trees.c | 14 | ||||
| -rw-r--r-- | trees_emit.h | 54 |
2 files changed, 37 insertions, 31 deletions
@@ -724,6 +724,10 @@ static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data unsigned char *sym_buf = s->sym_buf; #endif + /* Keep bi_buf and bi_valid in registers across the entire loop */ + uint64_t bi_buf = s->bi_buf; + uint32_t bi_valid = s->bi_valid; + if (sym_next != 0) { do { #ifdef LIT_MEM @@ -741,9 +745,9 @@ static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data sx += 3; #endif if (dist == 0) { - zng_emit_lit(s, ltree, lc); + zng_emit_lit(s, ltree, lc, &bi_buf, &bi_valid); } else { - zng_emit_dist(s, ltree, dtree, lc, dist); + zng_emit_dist(s, ltree, dtree, lc, dist, &bi_buf, &bi_valid); } /* literal or match pair ? */ /* Check for no overlay of pending_buf on needed symbols */ @@ -755,7 +759,11 @@ static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data } while (sx < sym_next); } - zng_emit_end_block(s, ltree, 0); + zng_emit_end_block(s, ltree, 0, &bi_buf, &bi_valid); + + /* Write back to state */ + s->bi_buf = bi_buf; + s->bi_valid = bi_valid; } /* =========================================================================== diff --git a/trees_emit.h b/trees_emit.h index 1585dbd756..2a18fe28d3 100644 --- a/trees_emit.h +++ b/trees_emit.h @@ -103,31 +103,21 @@ static inline void bi_windup(deflate_state *s) { /* =========================================================================== * Emit literal code */ -static inline uint32_t zng_emit_lit(deflate_state *s, const ct_data *ltree, unsigned c) { - uint32_t bi_valid = s->bi_valid; - uint64_t bi_buf = s->bi_buf; - - send_code(s, c, ltree, bi_buf, bi_valid); - - s->bi_valid = bi_valid; - s->bi_buf = bi_buf; - +static inline void zng_emit_lit(deflate_state *s, const ct_data *ltree, unsigned c, + uint64_t *bi_buf, uint32_t *bi_valid) { + send_code(s, c, ltree, *bi_buf, *bi_valid); Tracecv(isgraph(c & 0xff), (stderr, " '%c' ", c)); - - return ltree[c].Len; } /* =========================================================================== * Emit match distance/length code */ -static uint32_t zng_emit_dist(deflate_state *s, const ct_data *ltree, const ct_data *dtree, - uint32_t lc, uint32_t dist) { +static inline uint32_t zng_emit_dist(deflate_state *s, const ct_data *ltree, const ct_data *dtree, + uint32_t lc, uint32_t dist, uint64_t *bi_buf, uint32_t *bi_valid) { uint32_t c, extra; uint8_t code; uint64_t match_bits; uint32_t match_bits_len; - uint32_t bi_valid = s->bi_valid; - uint64_t bi_buf = s->bi_buf; /* Send the length code, len is the match length - STD_MIN_MATCH */ code = zng_length_code[lc]; @@ -159,10 +149,7 @@ static uint32_t zng_emit_dist(deflate_state *s, const ct_data *ltree, const ct_d match_bits_len += extra; } - send_bits(s, match_bits, match_bits_len, bi_buf, bi_valid); - - s->bi_valid = bi_valid; - s->bi_buf = bi_buf; + send_bits(s, match_bits, match_bits_len, *bi_buf, *bi_valid); return match_bits_len; } @@ -170,12 +157,9 @@ static uint32_t zng_emit_dist(deflate_state *s, const ct_data *ltree, const ct_d /* =========================================================================== * Emit end block */ -static inline void zng_emit_end_block(deflate_state *s, const ct_data *ltree, const int last) { - uint32_t bi_valid = s->bi_valid; - uint64_t bi_buf = s->bi_buf; - send_code(s, END_BLOCK, ltree, bi_buf, bi_valid); - s->bi_valid = bi_valid; - s->bi_buf = bi_buf; +static inline void zng_emit_end_block(deflate_state *s, const ct_data *ltree, const int last, + uint64_t *bi_buf, uint32_t *bi_valid) { + send_code(s, END_BLOCK, ltree, *bi_buf, *bi_valid); Tracev((stderr, "\n+++ Emit End Block: Last: %u Pending: %u Total Out: %" PRIu64 "\n", last, s->pending, (uint64_t)s->strm->total_out)); Z_UNUSED(last); @@ -185,7 +169,12 @@ static inline void zng_emit_end_block(deflate_state *s, const ct_data *ltree, co * Emit literal and count bits */ static inline void zng_tr_emit_lit(deflate_state *s, const ct_data *ltree, unsigned c) { - cmpr_bits_add(s, zng_emit_lit(s, ltree, c)); + uint64_t bi_buf = s->bi_buf; + uint32_t bi_valid = s->bi_valid; + zng_emit_lit(s, ltree, c, &bi_buf, &bi_valid); + s->bi_buf = bi_buf; + s->bi_valid = bi_valid; + cmpr_bits_add(s, ltree[c].Len); } /* =========================================================================== @@ -193,7 +182,12 @@ static inline void zng_tr_emit_lit(deflate_state *s, const ct_data *ltree, unsig */ static inline void zng_tr_emit_dist(deflate_state *s, const ct_data *ltree, const ct_data *dtree, uint32_t lc, uint32_t dist) { - cmpr_bits_add(s, zng_emit_dist(s, ltree, dtree, lc, dist)); + uint64_t bi_buf = s->bi_buf; + uint32_t bi_valid = s->bi_valid; + uint32_t bits = zng_emit_dist(s, ltree, dtree, lc, dist, &bi_buf, &bi_valid); + s->bi_buf = bi_buf; + s->bi_valid = bi_valid; + cmpr_bits_add(s, bits); } /* =========================================================================== @@ -222,7 +216,11 @@ static inline void zng_tr_emit_align(deflate_state *s) { * Emit an end block and align bit buffer if last block */ static inline void zng_tr_emit_end_block(deflate_state *s, const ct_data *ltree, const int last) { - zng_emit_end_block(s, ltree, last); + uint64_t bi_buf = s->bi_buf; + uint32_t bi_valid = s->bi_valid; + zng_emit_end_block(s, ltree, last, &bi_buf, &bi_valid); + s->bi_buf = bi_buf; + s->bi_valid = bi_valid; cmpr_bits_add(s, 7); if (last) zng_tr_emit_align(s); |
