summaryrefslogtreecommitdiff
path: root/docs/handbook/neozip/huffman-coding.md
diff options
context:
space:
mode:
Diffstat (limited to 'docs/handbook/neozip/huffman-coding.md')
-rw-r--r--docs/handbook/neozip/huffman-coding.md643
1 files changed, 643 insertions, 0 deletions
diff --git a/docs/handbook/neozip/huffman-coding.md b/docs/handbook/neozip/huffman-coding.md
new file mode 100644
index 0000000000..51998941d5
--- /dev/null
+++ b/docs/handbook/neozip/huffman-coding.md
@@ -0,0 +1,643 @@
+# Huffman Coding
+
+## Overview
+
+Huffman coding is the heart of DEFLATE compression. It assigns
+variable-length bit codes to symbols: shorter codes for frequent symbols,
+longer codes for rare ones. Neozip implements full Huffman tree construction,
+code generation, and bitstream emission in `trees.c` and `trees_emit.h`.
+
+---
+
+## Data Structures
+
+### `ct_data` — Code/Tree Node
+
+```c
+typedef union ct_data_s {
+ struct {
+ uint16_t freq; // Frequency count (during tree building)
+ uint16_t code; // Bit string (after tree building)
+ };
+ struct {
+ uint16_t dad; // Father node in Huffman tree
+ uint16_t len; // Bit length of the code
+ };
+} ct_data;
+```
+
+The union reuses the same 4 bytes: during tree construction, `freq` and
+`dad` are used; after code generation, `code` and `len` replace them.
+
+### `tree_desc` — Tree Descriptor
+
+```c
+typedef struct tree_desc_s {
+ ct_data *dyn_tree; // The dynamic tree being built
+ int max_code; // Largest code with non-zero frequency
+ const static_tree_desc *stat_desc; // Corresponding static tree description
+} tree_desc;
+```
+
+Each deflate state maintains three tree descriptors:
+```c
+struct ALIGNED_(64) internal_state {
+ tree_desc l_desc; // Literal/length tree descriptor
+ tree_desc d_desc; // Distance tree descriptor
+ tree_desc bl_desc; // Bit-length tree descriptor (for encoding the dynamic trees)
+ // ...
+ ct_data dyn_ltree[HEAP_SIZE]; // Literal/length tree (2*L_CODES+1 = 573)
+ ct_data dyn_dtree[2*D_CODES+1]; // Distance tree (2*30+1 = 61)
+ ct_data bl_tree[2*BL_CODES+1]; // Bit-length tree (2*19+1 = 39)
+};
+```
+
+### `static_tree_desc` — Static Tree Description
+
+```c
+struct static_tree_desc_s {
+ const ct_data *static_tree; // Static tree (NULL for bit lengths)
+ const int *extra_bits; // Extra bits for each code (or NULL)
+ int extra_base; // First code with extra bits
+ int elems; // Maximum number of elements in tree
+ unsigned int max_length; // Maximum code bit length
+};
+```
+
+Three static descriptors exist:
+```c
+static const static_tree_desc static_l_desc = {
+ static_ltree, extra_lbits, LITERALS + 1, L_CODES, MAX_BITS
+};
+static const static_tree_desc static_d_desc = {
+ static_dtree, extra_dbits, 0, D_CODES, MAX_BITS
+};
+static const static_tree_desc static_bl_desc = {
+ NULL, extra_blbits, 0, BL_CODES, MAX_BL_BITS
+};
+```
+
+---
+
+## Constants
+
+```c
+#define L_CODES 286 // Number of literal/length codes (256 literals + END_BLOCK + 29 lengths)
+#define D_CODES 30 // Number of distance codes
+#define BL_CODES 19 // Number of bit-length codes
+#define HEAP_SIZE (2*L_CODES + 1) // = 573
+#define MAX_BITS 15 // Maximum Huffman code length
+#define MAX_BL_BITS 7 // Maximum bit-length code length
+#define END_BLOCK 256 // End of block symbol
+#define LITERALS 256 // Number of literal bytes (0..255)
+#define LENGTH_CODES 29 // Number of length codes (not counting END_BLOCK)
+```
+
+### Extra Bits Tables
+
+Length codes (257–285) carry 0–5 extra bits:
+```c
+static const int extra_lbits[LENGTH_CODES] = {
+ 0,0,0,0,0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3, 4,4,4,4, 5,5,5,5, 0
+};
+```
+
+Distance codes (0–29) carry 0–13 extra bits:
+```c
+static const int extra_dbits[D_CODES] = {
+ 0,0,0,0, 1,1, 2,2, 3,3, 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,10, 11,11, 12,12, 13,13
+};
+```
+
+Bit-length codes (used to encode dynamic tree) carry 0–7 extra bits:
+```c
+static const int extra_blbits[BL_CODES] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 2,3,7
+};
+```
+
+---
+
+## Tree Construction
+
+### `build_tree()`
+
+The main tree-building function:
+
+```c
+static void build_tree(deflate_state *s, tree_desc *desc) {
+ ct_data *tree = desc->dyn_tree;
+ const ct_data *stree = desc->stat_desc->static_tree;
+ int elems = desc->stat_desc->elems;
+ int n, m;
+ int max_code = -1;
+ int node;
+
+ // Step 1: Initialize the heap with leaf frequencies
+ s->heap_len = 0;
+ s->heap_max = HEAP_SIZE;
+
+ for (n = 0; n < elems; n++) {
+ if (tree[n].freq != 0) {
+ s->heap[++(s->heap_len)] = max_code = n;
+ s->depth[n] = 0;
+ } else {
+ tree[n].len = 0;
+ }
+ }
+
+ // Step 2: Ensure at least two codes exist
+ while (s->heap_len < 2) {
+ node = s->heap[++(s->heap_len)] =
+ (max_code < 2 ? ++max_code : 0);
+ tree[node].freq = 1;
+ s->depth[node] = 0;
+ }
+ desc->max_code = max_code;
+
+ // Step 3: Build the Huffman tree using a min-heap
+ // Nodes elems..HEAP_SIZE-1 are internal nodes
+ for (n = s->heap_len / 2; n >= 1; n--)
+ pqdownheap(s, tree, n);
+
+ node = elems;
+ do {
+ n = s->heap[1]; // Least frequent
+ pqremove(s, tree, n);
+ m = s->heap[1]; // Next least frequent
+
+ s->heap[--(s->heap_max)] = n;
+ s->heap[--(s->heap_max)] = m;
+
+ // Create internal node
+ tree[node].freq = tree[n].freq + tree[m].freq;
+ s->depth[node] = MAX(s->depth[n], s->depth[m]) + 1;
+ tree[n].dad = tree[m].dad = (uint16_t)node;
+
+ s->heap[1] = node++;
+ pqdownheap(s, tree, 1);
+ } while (s->heap_len >= 2);
+
+ s->heap[--(s->heap_max)] = s->heap[1];
+
+ // Step 4: Compute code lengths and generate codes
+ gen_bitlen(s, desc);
+ gen_codes(tree, max_code, s->bl_count);
+}
+```
+
+### `pqdownheap()` — Min-Heap Maintenance
+
+```c
+static void pqdownheap(deflate_state *s, ct_data *tree, int k) {
+ int v = s->heap[k];
+ int j = k << 1; // Left child
+
+ while (j <= s->heap_len) {
+ // Select smaller child
+ if (j < s->heap_len &&
+ SMALLER(tree, s->heap[j+1], s->heap[j], s->depth))
+ j++;
+ // If v is smaller than both children, stop
+ if (SMALLER(tree, v, s->heap[j], s->depth))
+ break;
+ s->heap[k] = s->heap[j];
+ k = j;
+ j <<= 1;
+ }
+ s->heap[k] = v;
+}
+```
+
+The `SMALLER` macro compares by frequency first, then by depth:
+```c
+#define SMALLER(tree, n, m, depth) \
+ (tree[n].freq < tree[m].freq || \
+ (tree[n].freq == tree[m].freq && depth[n] <= depth[m]))
+```
+
+### `gen_bitlen()` — Bit Length Generation
+
+Converts the tree structure into code lengths, enforcing the maximum
+code length constraint:
+
+```c
+static void gen_bitlen(deflate_state *s, tree_desc *desc) {
+ ct_data *tree = desc->dyn_tree;
+ int max_code = desc->max_code;
+ const ct_data *stree = desc->stat_desc->static_tree;
+ const int *extra = desc->stat_desc->extra_bits;
+ int base = desc->stat_desc->extra_base;
+ int max_length = desc->stat_desc->max_length;
+ int overflow = 0;
+
+ // Traverse the tree via heap and set bit lengths
+ for (int bits = 0; bits <= MAX_BITS; bits++)
+ s->bl_count[bits] = 0;
+
+ tree[s->heap[s->heap_max]].len = 0; // Root has length 0
+
+ for (int h = s->heap_max + 1; h < HEAP_SIZE; h++) {
+ int n = s->heap[h];
+ int bits = tree[tree[n].dad].len + 1;
+ if (bits > max_length) {
+ bits = max_length;
+ overflow++;
+ }
+ tree[n].len = (uint16_t)bits;
+ if (n > max_code) continue; // Not a leaf
+
+ s->bl_count[bits]++;
+ // Account for extra bits in cost calculation
+ }
+
+ if (overflow == 0) return;
+
+ // Adjust bit lengths to stay within max_length
+ // Find the deepest non-full level and redistribute
+ // ...
+}
+```
+
+### `gen_codes()` — Code Generation
+
+Converts bit lengths into canonical Huffman codes:
+
+```c
+static void gen_codes(ct_data *tree, int max_code, uint16_t *bl_count) {
+ uint16_t next_code[MAX_BITS + 1];
+ unsigned code = 0;
+
+ // Step 1: Compute the first code for each bit length
+ for (int bits = 1; bits <= MAX_BITS; bits++) {
+ code = (code + bl_count[bits - 1]) << 1;
+ next_code[bits] = (uint16_t)code;
+ }
+
+ // Step 2: Assign codes
+ for (int n = 0; n <= max_code; n++) {
+ int len = tree[n].len;
+ if (len == 0) continue;
+ tree[n].code = (uint16_t)bi_reverse(next_code[len]++, len);
+ }
+}
+```
+
+The `bi_reverse()` function reverses the bit order because DEFLATE uses
+reversed (LSB-first) Huffman codes.
+
+---
+
+## Static Huffman Trees
+
+DEFLATE defines fixed Huffman tables for BTYPE=01 blocks:
+
+**Literal/Length codes**:
+| Value | Bits | Codes |
+|---|---|---|
+| 0–143 | 8 | 00110000 – 10111111 |
+| 144–255 | 9 | 110010000 – 111111111 |
+| 256–279 | 7 | 0000000 – 0010111 |
+| 280–287 | 8 | 11000000 – 11000111 |
+
+**Distance codes**: All 30 codes use 5 bits (0–29).
+
+Static tables are precomputed:
+```c
+static const ct_data static_ltree[L_CODES + 2];
+static const ct_data static_dtree[D_CODES];
+```
+
+---
+
+## Dynamic Tree Encoding
+
+For BTYPE=10 blocks, the Huffman trees must be transmitted before the data.
+DEFLATE encodes the trees themselves using a third Huffman tree (the
+"bit-length tree").
+
+### Tree Encoding Steps
+
+1. **`scan_tree()`** — Find repeat patterns in the code length sequence:
+
+```c
+static void scan_tree(deflate_state *s, ct_data *tree, int max_code) {
+ int prevlen = -1;
+ int curlen;
+ int nextlen = tree[0].len;
+ int count = 0;
+ int max_count = 7;
+ int min_count = 4;
+
+ for (int n = 0; n <= max_code; n++) {
+ curlen = nextlen;
+ nextlen = tree[n + 1].len;
+ if (++count < max_count && curlen == nextlen) continue;
+
+ if (count < min_count) {
+ s->bl_tree[curlen].freq += count;
+ } else if (curlen != 0) {
+ if (curlen != prevlen) s->bl_tree[curlen].freq++;
+ s->bl_tree[REP_3_6].freq++; // Code 16: repeat 3-6 times
+ } else if (count <= 10) {
+ s->bl_tree[REPZ_3_10].freq++; // Code 17: repeat 0, 3-10 times
+ } else {
+ s->bl_tree[REPZ_11_138].freq++; // Code 18: repeat 0, 11-138 times
+ }
+ // Reset for next sequence
+ count = 0;
+ prevlen = curlen;
+ }
+}
+```
+
+Special codes for run-length encoding:
+```c
+#define REP_3_6 16 // Repeat previous length, 3-6 times (2 extra bits)
+#define REPZ_3_10 17 // Repeat a zero length, 3-10 times (3 extra bits)
+#define REPZ_11_138 18 // Repeat a zero length, 11-138 times (7 extra bits)
+```
+
+2. **`send_tree()`** — Emit the encoded tree to the bitstream:
+
+```c
+static void send_tree(deflate_state *s, ct_data *tree, int max_code) {
+ int prevlen = -1;
+ int curlen, nextlen = tree[0].len;
+ int count = 0;
+
+ for (int n = 0; n <= max_code; n++) {
+ curlen = nextlen;
+ nextlen = tree[n + 1].len;
+ if (++count < max_count && curlen == nextlen) continue;
+
+ if (count < min_count) {
+ do { send_code(s, curlen, s->bl_tree); } while (--count);
+ } else if (curlen != 0) {
+ if (curlen != prevlen) {
+ send_code(s, curlen, s->bl_tree);
+ count--;
+ }
+ send_code(s, REP_3_6, s->bl_tree);
+ send_bits(s, count - 3, 2); // 2 extra bits
+ } else if (count <= 10) {
+ send_code(s, REPZ_3_10, s->bl_tree);
+ send_bits(s, count - 3, 3); // 3 extra bits
+ } else {
+ send_code(s, REPZ_11_138, s->bl_tree);
+ send_bits(s, count - 11, 7); // 7 extra bits
+ }
+ count = 0;
+ prevlen = curlen;
+ }
+}
+```
+
+3. **Bit length code order** — The 19 bit-length codes are transmitted in
+ a permuted order to minimize trailing zeros:
+
+```c
+static const uint8_t bl_order[BL_CODES] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+};
+```
+
+---
+
+## Bit Output Engine
+
+### `send_bits()` — 64-bit Bit Buffer
+
+From `trees_emit.h`:
+
+```c
+static inline void send_bits(deflate_state *s, int value, unsigned length) {
+ s->bi_buf |= ((uint64_t)value << s->bi_valid);
+ s->bi_valid += length;
+
+ if (s->bi_valid >= BIT_BUF_SIZE) { // BIT_BUF_SIZE = 64
+ put_uint64(s, s->bi_buf); // Flush 8 bytes to pending buffer
+ s->bi_valid -= BIT_BUF_SIZE;
+ s->bi_buf = (uint64_t)value >> (length - s->bi_valid);
+ }
+}
+```
+
+The 64-bit `bi_buf` accumulates bits and flushes complete 8-byte words to
+the pending output buffer. This is more efficient than the traditional
+16-bit buffer used in original zlib.
+
+### `send_code()` — Emit a Huffman Code
+
+```c
+#define send_code(s, c, tree) send_bits(s, tree[c].code, tree[c].len)
+```
+
+### `bi_windup()` — Byte-Align the Output
+
+```c
+static inline void bi_windup(deflate_state *s) {
+ if (s->bi_valid > 56) {
+ put_uint64(s, s->bi_buf);
+ } else {
+ // Flush remaining bytes
+ while (s->bi_valid >= 8) {
+ put_byte(s, s->bi_buf & 0xff);
+ s->bi_buf >>= 8;
+ s->bi_valid -= 8;
+ }
+ }
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+}
+```
+
+---
+
+## Block Emission
+
+### `compress_block()`
+
+Emits all symbols in the current block using Huffman codes:
+
+```c
+static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data *dtree) {
+ uint32_t sym_buf_index = 0;
+ unsigned dist, lc, code;
+
+ if (s->sym_next != 0) {
+ do {
+ // Read (distance, length/literal) from symbol buffer
+ dist = s->sym_buf[sym_buf_index++];
+ dist |= (uint32_t)s->sym_buf[sym_buf_index++] << 8;
+ lc = s->sym_buf[sym_buf_index++];
+
+ if (dist == 0) {
+ // Literal byte
+ send_code(s, lc, ltree);
+ } else {
+ // Length/distance pair
+ code = zng_length_code[lc];
+ send_code(s, code + LITERALS + 1, ltree); // Length code
+ int extra = extra_lbits[code];
+ if (extra) send_bits(s, lc - base_length[code], extra);
+
+ dist--;
+ code = d_code(dist);
+ send_code(s, code, dtree); // Distance code
+ extra = extra_dbits[code];
+ if (extra) send_bits(s, dist - base_dist[code], extra);
+ }
+ } while (sym_buf_index < s->sym_next);
+ }
+
+ send_code(s, END_BLOCK, ltree); // Emit end-of-block
+}
+```
+
+### Combined Base+Extra Tables
+
+From `trees_emit.h`, base values and extra bits are combined into tables
+for faster lookup:
+
+```c
+struct lut_pair {
+ uint16_t base;
+ uint8_t extra;
+};
+
+// Length base values and extra bits (indexed by length code)
+static const struct lut_pair base_length_lut[LENGTH_CODES] = {
+ {0,0}, {1,0}, {2,0}, {3,0}, {4,0}, {5,0}, {6,0}, {7,0},
+ {8,1}, {10,1}, {12,1}, {14,1},
+ {16,2}, {20,2}, {24,2}, {28,2},
+ {32,3}, {40,3}, {48,3}, {56,3},
+ {64,4}, {80,4}, {96,4}, {112,4},
+ {128,5}, {160,5}, {192,5}, {224,5}, {0,0}
+};
+
+// Distance base values and extra bits (indexed by distance code)
+static const struct lut_pair base_dist_lut[D_CODES] = { ... };
+```
+
+---
+
+## Block Type Selection
+
+### `zng_tr_flush_block()`
+
+Decides the block type (stored, static, or dynamic) and emits the block:
+
+```c
+void Z_INTERNAL zng_tr_flush_block(deflate_state *s, char *buf,
+ uint32_t stored_len, int last) {
+ uint32_t opt_lenb, static_lenb;
+ int max_blindex = 0;
+
+ if (s->level > 0) {
+ // Build dynamic trees
+ build_tree(s, &(s->l_desc));
+ build_tree(s, &(s->d_desc));
+
+ // Determine number of bit-length codes to send
+ max_blindex = build_bl_tree(s);
+
+ // Compute block sizes
+ opt_lenb = (s->opt_len + 3 + 7) >> 3; // Dynamic block size
+ static_lenb = (s->static_len + 3 + 7) >> 3; // Static block size
+ } else {
+ opt_lenb = static_lenb = stored_len + 5;
+ }
+
+ if (stored_len + 4 <= opt_lenb && buf != NULL) {
+ // Stored block is smallest
+ zng_tr_stored_block(s, buf, stored_len, last);
+ } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
+ // Static Huffman block
+ send_bits(s, (STATIC_TREES << 1) + last, 3);
+ compress_block(s, static_ltree, static_dtree);
+ } else {
+ // Dynamic Huffman block
+ send_bits(s, (DYN_TREES << 1) + last, 3);
+ send_all_trees(s, s->l_desc.max_code + 1,
+ s->d_desc.max_code + 1,
+ max_blindex + 1);
+ compress_block(s, s->dyn_ltree, s->dyn_dtree);
+ }
+
+ init_block(s); // Reset for next block
+
+ if (last) bi_windup(s); // Byte-align the final block
+}
+```
+
+Block type constants:
+```c
+#define STORED_BLOCK 0
+#define STATIC_TREES 1
+#define DYN_TREES 2
+```
+
+### `init_block()` — Reset Tree State
+
+```c
+void Z_INTERNAL init_block(deflate_state *s) {
+ // Reset literal/length frequencies
+ for (int n = 0; n < L_CODES; n++)
+ s->dyn_ltree[n].freq = 0;
+ // Reset distance frequencies
+ for (int n = 0; n < D_CODES; n++)
+ s->dyn_dtree[n].freq = 0;
+ // Reset bit-length frequencies
+ for (int n = 0; n < BL_CODES; n++)
+ s->bl_tree[n].freq = 0;
+
+ s->dyn_ltree[END_BLOCK].freq = 1;
+ s->opt_len = s->static_len = 0;
+ s->sym_next = s->matches = 0;
+}
+```
+
+---
+
+## Symbol Buffer
+
+During compression, literals and length/distance pairs are recorded in the
+symbol buffer before Huffman encoding:
+
+```c
+// In deflate_state:
+unsigned char *sym_buf; // Buffer for literals and matches
+uint32_t sym_next; // Next free position
+uint32_t sym_end; // Size of sym_buf (lit_bufsize * 3)
+```
+
+Each symbol occupies 3 bytes:
+- **Literal**: `{ 0x00, 0x00, byte_value }`
+- **Match**: `{ dist_low, dist_high, length - STD_MIN_MATCH }`
+
+```c
+// Record a literal
+static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) {
+ s->sym_buf[s->sym_next++] = 0;
+ s->sym_buf[s->sym_next++] = 0;
+ s->sym_buf[s->sym_next++] = c;
+ s->dyn_ltree[c].freq++;
+ return (s->sym_next == s->sym_end);
+}
+
+// Record a match
+static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist,
+ uint32_t len) {
+ s->sym_buf[s->sym_next++] = (uint8_t)(dist);
+ s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8);
+ s->sym_buf[s->sym_next++] = (uint8_t)len;
+ s->matches++;
+ dist--;
+ s->dyn_ltree[zng_length_code[len] + LITERALS + 1].freq++;
+ s->dyn_dtree[d_code(dist)].freq++;
+ return (s->sym_next == s->sym_end);
+}
+```
+
+When the buffer is full (`sym_next == sym_end`), the block is flushed.