summaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorMehmet Samet Duman <yongdohyun@projecttick.org>2026-04-05 17:37:54 +0300
committerMehmet Samet Duman <yongdohyun@projecttick.org>2026-04-05 17:37:54 +0300
commit32f5f761bc8e960293b4f4feaf973dd0da26d0f8 (patch)
tree8d0436fdd093d5255c3b75e45f9741882b22e2e4 /docs
parent64f4ddfa97c19f371fe1847b20bd26803f0a25d5 (diff)
downloadProject-Tick-32f5f761bc8e960293b4f4feaf973dd0da26d0f8.tar.gz
Project-Tick-32f5f761bc8e960293b4f4feaf973dd0da26d0f8.zip
NOISSUE Project Tick Handbook is Released!
Assisted-by: Claude:Opus-4.6-High Signed-off-by: Mehmet Samet Duman <yongdohyun@projecttick.org>
Diffstat (limited to 'docs')
-rw-r--r--docs/handbook/Project-Tick/architecture.md579
-rw-r--r--docs/handbook/Project-Tick/build-systems.md711
-rw-r--r--docs/handbook/Project-Tick/ci-cd-pipeline.md599
-rw-r--r--docs/handbook/Project-Tick/coding-standards.md558
-rw-r--r--docs/handbook/Project-Tick/contributing.md545
-rw-r--r--docs/handbook/Project-Tick/faq.md683
-rw-r--r--docs/handbook/Project-Tick/getting-started.md637
-rw-r--r--docs/handbook/Project-Tick/glossary.md556
-rw-r--r--docs/handbook/Project-Tick/licensing.md371
-rw-r--r--docs/handbook/Project-Tick/overview.md335
-rw-r--r--docs/handbook/Project-Tick/release-process.md374
-rw-r--r--docs/handbook/Project-Tick/repository-structure.md625
-rw-r--r--docs/handbook/Project-Tick/security-policy.md282
-rw-r--r--docs/handbook/Project-Tick/trademark-policy.md283
-rw-r--r--docs/handbook/archived/overview.md275
-rw-r--r--docs/handbook/archived/projt-launcher.md444
-rw-r--r--docs/handbook/archived/projt-modpack.md245
-rw-r--r--docs/handbook/archived/ptlibzippy.md501
-rw-r--r--docs/handbook/cgit/api-reference.md468
-rw-r--r--docs/handbook/cgit/architecture.md422
-rw-r--r--docs/handbook/cgit/authentication.md288
-rw-r--r--docs/handbook/cgit/building.md272
-rw-r--r--docs/handbook/cgit/caching-system.md287
-rw-r--r--docs/handbook/cgit/code-style.md356
-rw-r--r--docs/handbook/cgit/configuration.md351
-rw-r--r--docs/handbook/cgit/css-theming.md522
-rw-r--r--docs/handbook/cgit/deployment.md369
-rw-r--r--docs/handbook/cgit/diff-engine.md352
-rw-r--r--docs/handbook/cgit/filter-system.md358
-rw-r--r--docs/handbook/cgit/html-rendering.md380
-rw-r--r--docs/handbook/cgit/lua-integration.md428
-rw-r--r--docs/handbook/cgit/overview.md262
-rw-r--r--docs/handbook/cgit/repository-discovery.md355
-rw-r--r--docs/handbook/cgit/snapshot-system.md246
-rw-r--r--docs/handbook/cgit/testing.md335
-rw-r--r--docs/handbook/cgit/ui-modules.md544
-rw-r--r--docs/handbook/cgit/url-routing.md331
-rw-r--r--docs/handbook/ci/branch-strategy.md388
-rw-r--r--docs/handbook/ci/codeowners.md370
-rw-r--r--docs/handbook/ci/commit-linting.md418
-rw-r--r--docs/handbook/ci/formatting.md298
-rw-r--r--docs/handbook/ci/nix-infrastructure.md611
-rw-r--r--docs/handbook/ci/overview.md494
-rw-r--r--docs/handbook/ci/pr-validation.md378
-rw-r--r--docs/handbook/ci/rate-limiting.md321
-rw-r--r--docs/handbook/cmark/architecture.md283
-rw-r--r--docs/handbook/cmark/ast-node-system.md383
-rw-r--r--docs/handbook/cmark/block-parsing.md310
-rw-r--r--docs/handbook/cmark/building.md268
-rw-r--r--docs/handbook/cmark/cli-usage.md249
-rw-r--r--docs/handbook/cmark/code-style.md293
-rw-r--r--docs/handbook/cmark/commonmark-renderer.md344
-rw-r--r--docs/handbook/cmark/html-renderer.md258
-rw-r--r--docs/handbook/cmark/inline-parsing.md317
-rw-r--r--docs/handbook/cmark/iterator-system.md267
-rw-r--r--docs/handbook/cmark/latex-renderer.md320
-rw-r--r--docs/handbook/cmark/man-renderer.md272
-rw-r--r--docs/handbook/cmark/memory-management.md351
-rw-r--r--docs/handbook/cmark/overview.md256
-rw-r--r--docs/handbook/cmark/public-api.md637
-rw-r--r--docs/handbook/cmark/reference-system.md307
-rw-r--r--docs/handbook/cmark/render-framework.md294
-rw-r--r--docs/handbook/cmark/scanner-system.md223
-rw-r--r--docs/handbook/cmark/testing.md281
-rw-r--r--docs/handbook/cmark/utf8-handling.md340
-rw-r--r--docs/handbook/cmark/xml-renderer.md291
-rw-r--r--docs/handbook/corebinutils/architecture.md665
-rw-r--r--docs/handbook/corebinutils/building.md429
-rw-r--r--docs/handbook/corebinutils/cat.md211
-rw-r--r--docs/handbook/corebinutils/chmod.md296
-rw-r--r--docs/handbook/corebinutils/code-style.md351
-rw-r--r--docs/handbook/corebinutils/cp.md270
-rw-r--r--docs/handbook/corebinutils/date.md352
-rw-r--r--docs/handbook/corebinutils/dd.md407
-rw-r--r--docs/handbook/corebinutils/df.md264
-rw-r--r--docs/handbook/corebinutils/echo.md158
-rw-r--r--docs/handbook/corebinutils/ed.md306
-rw-r--r--docs/handbook/corebinutils/error-handling.md315
-rw-r--r--docs/handbook/corebinutils/expr.md194
-rw-r--r--docs/handbook/corebinutils/hostname.md154
-rw-r--r--docs/handbook/corebinutils/kill.md237
-rw-r--r--docs/handbook/corebinutils/ln.md190
-rw-r--r--docs/handbook/corebinutils/ls.md314
-rw-r--r--docs/handbook/corebinutils/mkdir.md194
-rw-r--r--docs/handbook/corebinutils/mv.md285
-rw-r--r--docs/handbook/corebinutils/overview.md362
-rw-r--r--docs/handbook/corebinutils/ps.md298
-rw-r--r--docs/handbook/corebinutils/pwd.md152
-rw-r--r--docs/handbook/corebinutils/realpath.md119
-rw-r--r--docs/handbook/corebinutils/rm.md293
-rw-r--r--docs/handbook/corebinutils/sleep.md218
-rw-r--r--docs/handbook/corebinutils/test.md248
-rw-r--r--docs/handbook/corebinutils/timeout.md297
-rw-r--r--docs/handbook/forgewrapper/architecture.md1202
-rw-r--r--docs/handbook/forgewrapper/building.md1843
-rw-r--r--docs/handbook/forgewrapper/overview.md270
-rw-r--r--docs/handbook/genqrcode/architecture.md948
-rw-r--r--docs/handbook/genqrcode/building.md570
-rw-r--r--docs/handbook/genqrcode/cli-usage.md382
-rw-r--r--docs/handbook/genqrcode/code-style.md351
-rw-r--r--docs/handbook/genqrcode/encoding-modes.md591
-rw-r--r--docs/handbook/genqrcode/error-correction.md455
-rw-r--r--docs/handbook/genqrcode/masking-algorithms.md578
-rw-r--r--docs/handbook/genqrcode/micro-qr.md456
-rw-r--r--docs/handbook/genqrcode/overview.md502
-rw-r--r--docs/handbook/genqrcode/public-api.md912
-rw-r--r--docs/handbook/genqrcode/reed-solomon.md347
-rw-r--r--docs/handbook/genqrcode/testing.md398
-rw-r--r--docs/handbook/hooks/logging-system.md492
-rw-r--r--docs/handbook/hooks/mirror-configuration.md627
-rw-r--r--docs/handbook/hooks/notification-system.md538
-rw-r--r--docs/handbook/hooks/overview.md712
-rw-r--r--docs/handbook/hooks/post-receive-hook.md778
-rw-r--r--docs/handbook/images4docker/architecture.md504
-rw-r--r--docs/handbook/images4docker/base-images.md825
-rw-r--r--docs/handbook/images4docker/ci-cd-integration.md396
-rw-r--r--docs/handbook/images4docker/creating-new-images.md338
-rw-r--r--docs/handbook/images4docker/overview.md304
-rw-r--r--docs/handbook/images4docker/qt6-verification.md283
-rw-r--r--docs/handbook/images4docker/troubleshooting.md395
-rw-r--r--docs/handbook/json4cpp/architecture.md613
-rw-r--r--docs/handbook/json4cpp/basic-usage.md601
-rw-r--r--docs/handbook/json4cpp/binary-formats.md411
-rw-r--r--docs/handbook/json4cpp/building.md430
-rw-r--r--docs/handbook/json4cpp/code-style.md209
-rw-r--r--docs/handbook/json4cpp/custom-types.md465
-rw-r--r--docs/handbook/json4cpp/element-access.md581
-rw-r--r--docs/handbook/json4cpp/exception-handling.md368
-rw-r--r--docs/handbook/json4cpp/iteration.md339
-rw-r--r--docs/handbook/json4cpp/json-patch.md341
-rw-r--r--docs/handbook/json4cpp/json-pointer.md361
-rw-r--r--docs/handbook/json4cpp/overview.md330
-rw-r--r--docs/handbook/json4cpp/parsing-internals.md493
-rw-r--r--docs/handbook/json4cpp/performance.md275
-rw-r--r--docs/handbook/json4cpp/sax-interface.md337
-rw-r--r--docs/handbook/json4cpp/serialization.md528
-rw-r--r--docs/handbook/json4cpp/testing.md190
-rw-r--r--docs/handbook/json4cpp/value-types.md474
-rw-r--r--docs/handbook/libnbtplusplus/architecture.md607
-rw-r--r--docs/handbook/libnbtplusplus/building.md401
-rw-r--r--docs/handbook/libnbtplusplus/code-style.md299
-rw-r--r--docs/handbook/libnbtplusplus/compound-tags.md602
-rw-r--r--docs/handbook/libnbtplusplus/endian-handling.md359
-rw-r--r--docs/handbook/libnbtplusplus/io-system.md672
-rw-r--r--docs/handbook/libnbtplusplus/list-tags.md682
-rw-r--r--docs/handbook/libnbtplusplus/overview.md422
-rw-r--r--docs/handbook/libnbtplusplus/tag-system.md643
-rw-r--r--docs/handbook/libnbtplusplus/testing.md291
-rw-r--r--docs/handbook/libnbtplusplus/visitor-pattern.md333
-rw-r--r--docs/handbook/libnbtplusplus/zlib-integration.md514
-rw-r--r--docs/handbook/meshmc/account-management.md470
-rw-r--r--docs/handbook/meshmc/application-lifecycle.md373
-rw-r--r--docs/handbook/meshmc/architecture.md724
-rw-r--r--docs/handbook/meshmc/building.md554
-rw-r--r--docs/handbook/meshmc/code-style.md315
-rw-r--r--docs/handbook/meshmc/component-system.md540
-rw-r--r--docs/handbook/meshmc/contributing.md130
-rw-r--r--docs/handbook/meshmc/dependencies.md241
-rw-r--r--docs/handbook/meshmc/instance-management.md483
-rw-r--r--docs/handbook/meshmc/java-detection.md411
-rw-r--r--docs/handbook/meshmc/launch-system.md569
-rw-r--r--docs/handbook/meshmc/mod-system.md410
-rw-r--r--docs/handbook/meshmc/network-layer.md551
-rw-r--r--docs/handbook/meshmc/overview.md269
-rw-r--r--docs/handbook/meshmc/platform-support.md353
-rw-r--r--docs/handbook/meshmc/release-notes.md222
-rw-r--r--docs/handbook/meshmc/settings-system.md402
-rw-r--r--docs/handbook/meshmc/theme-system.md417
-rw-r--r--docs/handbook/meshmc/ui-system.md511
-rw-r--r--docs/handbook/meta/architecture.md624
-rw-r--r--docs/handbook/meta/data-models.md582
-rw-r--r--docs/handbook/meta/deployment.md285
-rw-r--r--docs/handbook/meta/fabric-metadata.md323
-rw-r--r--docs/handbook/meta/forge-metadata.md492
-rw-r--r--docs/handbook/meta/java-runtime-metadata.md546
-rw-r--r--docs/handbook/meta/mojang-metadata.md480
-rw-r--r--docs/handbook/meta/neoforge-metadata.md334
-rw-r--r--docs/handbook/meta/overview.md386
-rw-r--r--docs/handbook/meta/quilt-metadata.md267
-rw-r--r--docs/handbook/meta/setup.md480
-rw-r--r--docs/handbook/meta/update-pipeline.md330
-rw-r--r--docs/handbook/mnv/architecture.md549
-rw-r--r--docs/handbook/mnv/building.md636
-rw-r--r--docs/handbook/mnv/code-style.md408
-rw-r--r--docs/handbook/mnv/contributing.md293
-rw-r--r--docs/handbook/mnv/gui-extension.md410
-rw-r--r--docs/handbook/mnv/overview.md381
-rw-r--r--docs/handbook/mnv/platform-support.md306
-rw-r--r--docs/handbook/mnv/scripting.md541
-rw-r--r--docs/handbook/neozip/api-reference.md459
-rw-r--r--docs/handbook/neozip/architecture.md1075
-rw-r--r--docs/handbook/neozip/arm-optimizations.md403
-rw-r--r--docs/handbook/neozip/building.md491
-rw-r--r--docs/handbook/neozip/checksum-algorithms.md461
-rw-r--r--docs/handbook/neozip/code-style.md259
-rw-r--r--docs/handbook/neozip/deflate-algorithms.md797
-rw-r--r--docs/handbook/neozip/gzip-support.md413
-rw-r--r--docs/handbook/neozip/hardware-acceleration.md447
-rw-r--r--docs/handbook/neozip/huffman-coding.md643
-rw-r--r--docs/handbook/neozip/inflate-engine.md665
-rw-r--r--docs/handbook/neozip/overview.md509
-rw-r--r--docs/handbook/neozip/performance-tuning.md361
-rw-r--r--docs/handbook/neozip/testing.md317
-rw-r--r--docs/handbook/neozip/x86-optimizations.md439
-rw-r--r--docs/handbook/ofborg/amqp-infrastructure.md631
-rw-r--r--docs/handbook/ofborg/architecture.md814
-rw-r--r--docs/handbook/ofborg/build-executor.md657
-rw-r--r--docs/handbook/ofborg/building.md530
-rw-r--r--docs/handbook/ofborg/code-style.md332
-rw-r--r--docs/handbook/ofborg/configuration.md472
-rw-r--r--docs/handbook/ofborg/contributing.md326
-rw-r--r--docs/handbook/ofborg/data-flow.md346
-rw-r--r--docs/handbook/ofborg/deployment.md413
-rw-r--r--docs/handbook/ofborg/evaluation-system.md602
-rw-r--r--docs/handbook/ofborg/github-integration.md603
-rw-r--r--docs/handbook/ofborg/message-system.md731
-rw-r--r--docs/handbook/ofborg/overview.md571
-rw-r--r--docs/handbook/ofborg/webhook-receiver.md470
-rw-r--r--docs/handbook/tomlplusplus/architecture.md920
-rw-r--r--docs/handbook/tomlplusplus/arrays.md625
-rw-r--r--docs/handbook/tomlplusplus/basic-usage.md705
-rw-r--r--docs/handbook/tomlplusplus/building.md474
-rw-r--r--docs/handbook/tomlplusplus/code-style.md277
-rw-r--r--docs/handbook/tomlplusplus/formatting.md546
-rw-r--r--docs/handbook/tomlplusplus/node-system.md625
-rw-r--r--docs/handbook/tomlplusplus/overview.md474
-rw-r--r--docs/handbook/tomlplusplus/parsing.md494
-rw-r--r--docs/handbook/tomlplusplus/path-system.md412
-rw-r--r--docs/handbook/tomlplusplus/tables.md551
-rw-r--r--docs/handbook/tomlplusplus/testing.md226
-rw-r--r--docs/handbook/tomlplusplus/unicode-handling.md335
-rw-r--r--docs/handbook/tomlplusplus/values.md547
232 files changed, 101144 insertions, 0 deletions
diff --git a/docs/handbook/Project-Tick/architecture.md b/docs/handbook/Project-Tick/architecture.md
new file mode 100644
index 0000000000..9cb7d90eb2
--- /dev/null
+++ b/docs/handbook/Project-Tick/architecture.md
@@ -0,0 +1,579 @@
+# Project Tick — Mono-Repo Architecture
+
+## Architectural Philosophy
+
+Project Tick is structured as a unified monorepo where each top-level directory
+represents an independent component. This architecture provides:
+
+- **Atomic cross-project changes** — A single commit can update a library and
+ every consumer simultaneously, eliminating version skew.
+- **Unified CI** — One orchestrator workflow (`ci.yml`) detects which
+ sub-projects are affected by a change and dispatches builds accordingly.
+- **Shared tooling** — Nix flakes, lefthook hooks, REUSE compliance, and
+ code formatting apply uniformly across the entire tree.
+- **Independent buildability** — Despite living in one repository, each
+ sub-project maintains its own build system and can be built in isolation.
+
+---
+
+## Repository Layout
+
+```
+Project-Tick/
+├── .github/ # GitHub Actions, issue templates, CODEOWNERS
+│ ├── workflows/ # 50+ CI workflow files
+│ ├── ISSUE_TEMPLATE/ # Bug report, suggestion, RFC templates
+│ ├── CODEOWNERS # Ownership mapping for review routing
+│ ├── dco.yml # DCO bot configuration
+│ └── pull_request_template.md
+│
+├── LICENSES/ # 20 SPDX-compliant license texts
+├── REUSE.toml # Path-to-license mapping
+├── CONTRIBUTING.md # Contribution guidelines, CLA, DCO
+├── SECURITY.md # Vulnerability reporting policy
+├── TRADEMARK.md # Trademark and brand usage policy
+├── CODE_OF_CONDUCT.md # Code of Conduct v2
+├── README.md # Root README
+│
+├── flake.nix # Top-level Nix flake (dev shells, LLVM 22)
+├── flake.lock # Pinned Nix inputs
+├── bootstrap.sh # Linux/macOS dependency bootstrap
+├── bootstrap.cmd # Windows dependency bootstrap
+├── lefthook.yml # Git hooks (REUSE lint, checkpatch)
+│
+├── meshmc/ # MeshMC launcher (C++23, Qt 6, CMake)
+├── mnv/ # MNV text editor (C, Autotools/CMake)
+├── cgit/ # cgit Git web interface (C, Make)
+│
+├── neozip/ # Compression library (C, CMake)
+├── json4cpp/ # JSON library (C++, CMake/Meson)
+├── tomlplusplus/ # TOML library (C++17, Meson/CMake)
+├── libnbtplusplus/ # NBT library (C++, CMake)
+├── cmark/ # Markdown library (C, CMake)
+├── genqrcode/ # QR code library (C, CMake/Autotools)
+├── forgewrapper/ # Forge bootstrap (Java, Gradle)
+│
+├── corebinutils/ # BSD utility ports (C, Make)
+│
+├── meta/ # Metadata generator (Python, Poetry)
+├── ofborg/ # tickborg CI bot (Rust, Cargo)
+├── images4docker/ # Docker build environments (Dockerfile)
+├── ci/ # CI tooling (Nix, JavaScript)
+├── hooks/ # Git hook scripts
+│
+├── archived/ # Deprecated sub-projects
+│ ├── projt-launcher/
+│ ├── projt-modpack/
+│ ├── projt-minicraft-modpack/
+│ └── ptlibzippy/
+│
+└── docs/ # Documentation
+ └── handbook/ # Developer handbook by component
+```
+
+---
+
+## Dependency Graph
+
+### Compile-Time Dependencies
+
+MeshMC is the primary integration point. It consumes most of the library
+sub-projects either directly or indirectly:
+
+```
+meshmc
+├─── json4cpp # JSON configuration parsing
+│ └── (header-only, no transitive deps)
+│
+├─── tomlplusplus # TOML instance/mod configuration
+│ └── (header-only, no transitive deps)
+│
+├─── libnbtplusplus # Minecraft world/data NBT parsing
+│ └── zlib # Compressed NBT support (optional)
+│
+├─── neozip # General compression (zlib-compatible API)
+│ └── (CPU intrinsics, no library deps)
+│
+├─── cmark # Markdown changelog/news rendering
+│ └── (no deps)
+│
+├─── genqrcode # QR code generation for account linking
+│ └── libpng # PNG output (optional, for CLI tool)
+│
+├─── forgewrapper # Runtime: Forge mod loader bootstrap
+│ └── (Java SPI, no compile-time deps from meshmc)
+│
+├─── Qt 6 # External: GUI framework
+│ ├── Core, Widgets, Concurrent
+│ ├── Network, NetworkAuth
+│ ├── Test, Xml
+│ └── QuaZip (Qt 6) # ZIP archive handling
+│
+├─── libarchive # External: Archive extraction
+└─── ECM # External: Extra CMake Modules
+```
+
+### Runtime Dependencies
+
+```
+meshmc (running)
+├─── forgewrapper.jar # Loaded at Minecraft launch for Forge ≥1.13
+├─── meta/ JSON manifests # Fetched over HTTP for version discovery
+│ ├── Mojang versions
+│ ├── Forge / NeoForge versions
+│ ├── Fabric / Quilt versions
+│ └── Java runtime versions
+├─── JDK 17+ # For running Minecraft
+└─── System zlib / neozip # Linked at build time
+```
+
+### CI Dependencies
+
+```
+ci.yml (orchestrator)
+├─── ci/github-script/ # JavaScript: commit lint, PR prep, reviews
+│ ├── lint-commits.js # Conventional Commits validation
+│ ├── prepare.js # PR validation
+│ ├── reviews.js # Review state management
+│ └── withRateLimit.js # GitHub API rate limiting
+│
+├─── ci/default.nix # Nix: treefmt, codeowners-validator
+│ ├── treefmt-nix # Multi-language formatting
+│ │ ├── actionlint # GitHub Actions YAML lint
+│ │ ├── biome # JavaScript/TypeScript formatting
+│ │ ├── nixfmt # Nix formatting
+│ │ ├── yamlfmt # YAML formatting
+│ │ └── zizmor # GitHub Actions security scanning
+│ └── codeowners-validator # CODEOWNERS file validation
+│
+├─── ci/pinned.json # Pinned Nixpkgs revision
+│
+├─── images4docker/ # Docker build environments (40 distros)
+│
+└─── ofborg/tickborg/ # Distributed CI bot
+ ├── RabbitMQ (AMQP) # Message queue
+ └── GitHub API # Check runs, PR comments
+```
+
+### Full Dependency Matrix
+
+| Consumer | json4cpp | toml++ | libnbt++ | neozip | cmark | genqrcode | forgewrapper | meta | Qt 6 |
+|----------|----------|--------|----------|--------|-------|-----------|--------------|------|------|
+| meshmc | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ (runtime) | ✓ (runtime, HTTP) | ✓ |
+| meta | — | — | — | — | — | — | — | — | — |
+| tickborg | — | — | — | — | — | — | — | — | — |
+| corebinutils | — | — | — | — | — | — | — | — | — |
+| mnv | — | — | — | — | — | — | — | — | — |
+| cgit | — | — | — | — | — | — | — | — | — |
+
+The library sub-projects (json4cpp, tomlplusplus, libnbtplusplus, neozip,
+cmark, genqrcode) are consumed exclusively by MeshMC within the monorepo.
+External consumers can also use them independently.
+
+---
+
+## Build System Architecture
+
+Each sub-project uses the build system best suited to its upstream lineage:
+
+```
+ ┌────────────────────────┐
+ │ Nix Flake (top-level) │
+ │ Development Shells │
+ └──────────┬─────────────┘
+ │
+ ┌────────────────────┼────────────────────┐
+ │ │ │
+ ┌─────▼─────┐ ┌─────▼─────┐ ┌─────▼─────┐
+ │ CMake │ │ Other │ │ Package │
+ │ Projects │ │ Systems │ │ Managers │
+ └─────┬─────┘ └─────┬─────┘ └─────┬─────┘
+ │ │ │
+ ┌───────┼────────┐ ┌──────┼──────┐ ┌─────┼─────┐
+ │ │ │ │ │ │ │ │ │
+meshmc neozip cmark toml++ cgit corebinutils meta tickborg
+json4 genqr libnbt (Meson)(Make) (Make) (Poetry)(Cargo)
+(CMake)(CMake) (CMake) mnv forgewrapper
+ (Auto) (Gradle)
+```
+
+### CMake Projects (Ninja Multi-Config)
+
+MeshMC and its library dependencies use CMake with the Ninja Multi-Config
+generator. MeshMC ships `CMakePresets.json` with platform-specific presets:
+
+| Preset | Platform | Toolchain |
+|--------|----------|-----------|
+| `linux` | Linux | System compiler |
+| `macos` | macOS | vcpkg |
+| `macos_universal` | macOS Universal | x86_64 + arm64 |
+| `windows_mingw` | Windows | MinGW |
+| `windows_msvc` | Windows | MSVC + vcpkg |
+
+All presets share a hidden `base` preset that enforces:
+- Ninja Multi-Config generator
+- Build directory: `build/`
+- Install directory: `install/`
+- LTO enabled by default
+
+### CMake Compiler Requirements
+
+| Compiler | Minimum Version | Standard |
+|----------|----------------|----------|
+| GCC | 13 | C++23 |
+| Clang | 17 | C++23 |
+| MSVC | 19.36 | C++23 |
+
+CMake minimum version: **3.28**
+
+### Meson Project (tomlplusplus)
+
+toml++ uses Meson as its primary build system with CMake as an alternative.
+The Meson build supports both header-only and compiled modes.
+
+### Make Projects (cgit, corebinutils)
+
+cgit uses a traditional `Makefile` that first builds a bundled version of Git,
+then builds cgit itself. The Makefile supports `NO_LUA=1` and
+`LUA_PKGCONFIG=luaXX` options.
+
+corebinutils uses a `./configure && make` workflow with `config.mk` for
+toolchain configuration. It selects musl-gcc by default and falls back to
+system gcc/clang.
+
+### Autotools Projects (mnv, genqrcode, neozip)
+
+MNV supports both CMake and traditional Autotools (`./configure && make`).
+GenQRCode uses Autotools (`autogen.sh` → `./configure` → `make`).
+NeoZip supports both CMake and a `./configure` script.
+
+### Gradle Project (forgewrapper)
+
+ForgeWrapper uses Gradle for building. The project includes a `gradlew`
+wrapper script and uses JPMS (Java Platform Module System) via the
+`jigsaw/` directory.
+
+### Cargo Workspace (tickborg)
+
+The `ofborg/` directory contains a Cargo workspace with two crates:
+- `tickborg` — The main CI bot
+- `tickborg-simple-build` — Simplified build runner
+
+The workspace uses Rust 2021 edition with `resolver = "2"`.
+
+### Poetry Project (meta)
+
+The `meta/` component uses Poetry for Python dependency management. Key
+dependencies include `requests`, `cachecontrol`, `pydantic`, and `filelock`.
+It provides CLI entry points for generating and updating version metadata
+for each supported mod loader.
+
+---
+
+## CI/CD Architecture
+
+### Orchestrator Pattern
+
+Project Tick uses a single monolithic CI orchestrator (`ci.yml`) that gates
+all other workflows. The orchestrator:
+
+1. **Classifies the event** — Push, PR, merge queue, tag, scheduled, or manual
+2. **Detects changed files** — Maps file paths to affected sub-projects
+3. **Determines run level** — `minimal`, `standard`, or `full`
+4. **Dispatches per-project builds** — Only builds what changed
+
+```
+Event (push/PR/merge_queue/tag)
+ │
+ ▼
+┌──────────┐
+│ Gate │ ── classify event, detect changes, set run level
+└────┬─────┘
+ │
+ ├──► Lint & Checks (commit messages, formatting, CODEOWNERS)
+ │
+ ├──► meshmc-build.yml (if meshmc/ changed)
+ ├──► neozip-ci.yml (if neozip/ changed)
+ ├──► cmark-ci.yml (if cmark/ changed)
+ ├──► json4cpp-ci.yml (if json4cpp/ changed)
+ ├──► tomlplusplus-ci.yml (if tomlplusplus/ changed)
+ ├──► libnbtplusplus-ci.yml (if libnbtplusplus/ changed)
+ ├──► genqrcode-ci.yml (if genqrcode/ changed)
+ ├──► forgewrapper-build.yml (if forgewrapper/ changed)
+ ├──► cgit-ci.yml (if cgit/ changed)
+ ├──► corebinutils-ci.yml (if corebinutils/ changed)
+ ├──► mnv-ci.yml (if mnv/ changed)
+ │
+ └──► Release workflows (if tag push)
+ ├── meshmc-release.yml
+ ├── meshmc-publish.yml
+ └── neozip-release.yml
+```
+
+### Workflow Inventory
+
+The `.github/workflows/` directory contains 50+ workflow files:
+
+**Core CI:**
+- `ci.yml` — Monolithic orchestrator
+- `ci-lint.yml` — Commit message and formatting checks
+- `ci-schedule.yml` — Scheduled jobs
+
+**Per-Project CI:**
+- `meshmc-build.yml`, `meshmc-codeql.yml`, `meshmc-container.yml`, `meshmc-nix.yml`
+- `neozip-ci.yml`, `neozip-cmake.yml`, `neozip-configure.yml`, `neozip-analyze.yml`, `neozip-codeql.yml`, `neozip-fuzz.yml`, `neozip-lint.yml`
+- `json4cpp-ci.yml`, `json4cpp-fuzz.yml`, `json4cpp-amalgam.yml`, `json4cpp-flawfinder.yml`, `json4cpp-semgrep.yml`
+- `cmark-ci.yml`, `cmark-fuzz.yml`
+- `tomlplusplus-ci.yml`, `tomlplusplus-fuzz.yml`
+- `mnv-ci.yml`, `mnv-codeql.yml`, `mnv-coverity.yml`
+- `cgit-ci.yml`, `corebinutils-ci.yml`
+- `forgewrapper-build.yml`, `libnbtplusplus-ci.yml`, `genqrcode-ci.yml`
+
+**Release & Publishing:**
+- `meshmc-release.yml`, `meshmc-publish.yml`
+- `neozip-release.yml`
+- `images4docker-build.yml`
+- `tomlplusplus-gh-pages.yml`, `json4cpp-publish-docs.yml`
+
+**Repository Maintenance:**
+- `repo-dependency-review.yml`, `repo-labeler.yml`, `repo-scorecards.yml`, `repo-stale.yml`
+- `meshmc-backport.yml`, `meshmc-blocked-prs.yml`, `meshmc-merge-blocking-pr.yml`
+- `meshmc-flake-update.yml`
+
+### Concurrency Control
+
+The CI orchestrator uses a concurrency key that varies by event type:
+
+| Event | Concurrency Group |
+|-------|-------------------|
+| Merge queue | `ci-<merge_group_head_ref>` |
+| Pull request | `ci-pr-<PR_number>` |
+| Push | `ci-<ref>` |
+
+In-progress runs are cancelled for pushes and PRs but **not** for merge queue
+entries, ensuring merge queue integrity.
+
+---
+
+## Branch Strategy
+
+Branch classification is defined in `ci/supportedBranches.js`:
+
+| Branch Pattern | Type | Priority | Description |
+|----------------|------|----------|-------------|
+| `master` | development / primary | 0 (highest) | Main development branch |
+| `release-*` | development / primary | 1 | Release branches (e.g., `release-7.0`) |
+| `staging-*` | development / secondary | 2 | Pre-release staging |
+| `staging-next-*` | development / secondary | 3 | Next staging cycle |
+| `feature-*` | wip | — | Feature development |
+| `fix-*` | wip | — | Bug fixes |
+| `backport-*` | wip | — | Cherry-picks to release branches |
+| `revert-*` | wip | — | Reverted changes |
+| `wip-*` | wip | — | Work in progress |
+| `dependabot-*` | wip | — | Automated dependency updates |
+
+Version tags follow: `<major>.<minor>.<patch>` (e.g., `7.0.0`).
+
+---
+
+## Shared Infrastructure
+
+### Nix Flake (Top-Level)
+
+The root `flake.nix` provides a development shell for the entire monorepo:
+
+- **Toolchain:** LLVM 22 (Clang, clang-tidy)
+- **clang-tidy-diff:** Wrapped Python script for incremental analysis
+- **Submodule initialization:** Automatic via `shellHook`
+- **Systems:** All `lib.systems.flakeExposed` (x86_64, aarch64 on Linux/macOS
+ and other exotic platforms)
+
+### CI Nix Configuration
+
+The `ci/default.nix` provides:
+
+- **treefmt** — Multi-language formatter with:
+ - `actionlint` — GitHub Actions YAML validation
+ - `biome` — JavaScript formatting (single quotes, no semicolons)
+ - `nixfmt` — Nix formatting (RFC style)
+ - `yamlfmt` — YAML formatting (retain line breaks)
+ - `zizmor` — GitHub Actions security scanning
+ - `keep-sorted` — Sort blocks marked with `keep-sorted` comments
+- **codeowners-validator** — Validates the CODEOWNERS file
+
+### Lefthook Git Hooks
+
+Pre-commit hooks configured in `lefthook.yml`:
+
+1. **reuse-lint** — Validates REUSE compliance. If missing licenses are
+ detected, downloads them and stages the fix automatically.
+2. **checkpatch** — Runs `scripts/checkpatch.pl` on staged C/C++ and CMake
+ diffs. Skipped during merge and rebase operations.
+
+Pre-push hooks:
+1. **reuse-lint** — Final REUSE compliance check before push.
+
+### Bootstrap Scripts
+
+`bootstrap.sh` (Linux/macOS) and `bootstrap.cmd` (Windows) handle first-time
+setup:
+
+- Detect the host distribution (Debian, Ubuntu, Fedora, RHEL, openSUSE, Arch,
+ macOS)
+- Install required dependencies via the native package manager
+- Initialize and update Git submodules
+- Install and configure lefthook
+
+The bootstrap scripts check for:
+- Build tools: npm, Go, lefthook, reuse
+- Libraries: Qt6Core, quazip1-qt6, zlib, ECM (via pkg-config)
+
+---
+
+## Security Architecture
+
+### Supply Chain
+
+- All Nix inputs are pinned with content hashes (`flake.lock`, `ci/pinned.json`)
+- GitHub Actions use pinned action versions with SHA references
+- `step-security/harden-runner` is used in CI workflows
+- `repo-dependency-review.yml` scans dependency changes
+- `repo-scorecards.yml` tracks OpenSSF Scorecard compliance
+
+### Code Quality
+
+- CodeQL analysis for meshmc, mnv, and neozip
+- Fuzz testing for neozip, json4cpp, cmark, and tomlplusplus
+- Semgrep and Flawfinder static analysis for json4cpp
+- Coverity scanning for mnv
+- clang-tidy checks enabled via `MeshMC_ENABLE_CLANG_TIDY` CMake option
+
+### Compiler Hardening (MeshMC)
+
+MeshMC's CMakeLists.txt enables:
+- `-fstack-protector-strong --param=ssp-buffer-size=4` — Stack smashing protection
+- `-O3 -D_FORTIFY_SOURCE=2` — Buffer overflow detection
+- `-Wall -pedantic` — Comprehensive warnings
+- ASLR and PIE via `CMAKE_POSITION_INDEPENDENT_CODE ON`
+
+---
+
+## Data Flow
+
+### MeshMC Launch Sequence
+
+```
+User clicks "Launch" in MeshMC
+ │
+ ▼
+MeshMC reads instance configuration
+ │ (tomlplusplus for TOML, json4cpp for JSON)
+ │
+ ▼
+MeshMC fetches version metadata
+ │ (HTTP → meta/ JSON manifests)
+ │
+ ▼
+MeshMC downloads/verifies game assets
+ │ (neozip for decompression, libarchive for extraction)
+ │
+ ▼
+MeshMC prepares launch environment
+ │ (libnbtplusplus for world data if needed)
+ │
+ ▼
+[If Forge ≥1.13] ForgeWrapper bootstraps Forge
+ │ (Java SPI, installer extraction)
+ │
+ ▼
+Minecraft process spawned with JDK 17+
+```
+
+### CI Build Flow
+
+```
+Developer pushes commit
+ │
+ ▼
+ci.yml Gate job runs
+ │ ─ classifies event type
+ │ ─ detects changed files
+ │ ─ maps to affected sub-projects
+ │
+ ▼
+ci-lint.yml runs in parallel
+ │ ─ Conventional Commits validation
+ │ ─ treefmt formatting check
+ │ ─ CODEOWNERS validation
+ │
+ ▼
+Per-project CI dispatched
+ │ ─ CMake configure + build + test
+ │ ─ Multi-platform matrix
+ │ ─ CodeQL / fuzz / static analysis
+ │
+ ▼
+Results posted as GitHub check runs
+ │
+ ▼
+[If tag push] Release workflow triggered
+ ─ Build release binaries
+ ─ Create GitHub release
+ ─ Publish artifacts
+```
+
+### Metadata Generation Flow
+
+```
+meta/ update scripts run (cron or manual)
+ │
+ ├─► updateMojang → fetches Mojang version manifest
+ ├─► updateForge → fetches Forge version list
+ ├─► updateNeoForge → fetches NeoForge version list
+ ├─► updateFabric → fetches Fabric loader versions
+ ├─► updateQuilt → fetches Quilt loader versions
+ ├─► updateLiteloader → fetches LiteLoader versions
+ └─► updateJava → fetches Java runtime versions
+ │
+ ▼
+generate scripts produce JSON manifests
+ │
+ ▼
+Manifests deployed (git push or static hosting)
+ │
+ ▼
+MeshMC reads manifests at startup
+```
+
+---
+
+## Module Boundaries
+
+### Interface Contracts
+
+Each library sub-project provides well-defined interfaces:
+
+| Library | Include Path | Namespace | API Style |
+|---------|-------------|-----------|-----------|
+| json4cpp | `<nlohmann/json.hpp>` | `nlohmann` | Header-only, template-based |
+| tomlplusplus | `<toml++/toml.hpp>` | `toml` | Header-only, C++17 |
+| libnbtplusplus | `<nbt/nbt.h>` | `nbt` | Compiled library, C++11 |
+| neozip | `<zlib.h>` or `<zlib-ng.h>` | C API | Drop-in zlib replacement |
+| cmark | `<cmark.h>` | C API | Compiled library |
+| genqrcode | `<qrencode.h>` | C API | Compiled library |
+| forgewrapper | Java SPI | `io.github.zekerzhayard.forgewrapper` | JAR, service provider |
+
+### Versioning Independence
+
+Each sub-project maintains its own version number:
+
+| Project | Versioning | Current |
+|---------|-----------|---------|
+| meshmc | `MAJOR.MINOR.HOTFIX` (CMake) | 7.0.0 |
+| meta | `MAJOR.MINOR.PATCH-REV` (pyproject.toml) | 0.0.5-1 |
+| forgewrapper | Gradle `version` property | (see gradle.properties) |
+| neozip | CMake project version | (follows zlib-ng) |
+| Other libraries | Follow upstream versioning | — |
+
+The monorepo does not impose a single version across sub-projects. Each
+component releases independently based on its own cadence.
diff --git a/docs/handbook/Project-Tick/build-systems.md b/docs/handbook/Project-Tick/build-systems.md
new file mode 100644
index 0000000000..d47fa9ee63
--- /dev/null
+++ b/docs/handbook/Project-Tick/build-systems.md
@@ -0,0 +1,711 @@
+# Project Tick — Build Systems
+
+## Overview
+
+Project Tick uses seven distinct build systems across its sub-projects, each
+chosen to match the upstream heritage and language ecosystem of the component.
+This document provides a comprehensive reference for each build system, common
+patterns, and cross-cutting concerns.
+
+---
+
+## Build System Matrix
+
+| Build System | Sub-Projects | Language | Configuration |
+|-------------|-------------|----------|---------------|
+| **CMake** | meshmc, neozip, cmark, genqrcode, json4cpp, libnbtplusplus, mnv | C/C++ | `CMakeLists.txt`, `CMakePresets.json` |
+| **Meson** | tomlplusplus | C++ | `meson.build`, `meson_options.txt` |
+| **Make (GNU Make)** | cgit, corebinutils | C | `Makefile`, `GNUmakefile` |
+| **Autotools** | mnv, genqrcode, neozip | C | `configure.ac`, `Makefile.am`, `configure` |
+| **Gradle** | forgewrapper | Java | `build.gradle`, `settings.gradle` |
+| **Cargo** | tickborg | Rust | `Cargo.toml`, `Cargo.lock` |
+| **Poetry** | meta | Python | `pyproject.toml`, `poetry.lock` |
+| **Nix** | CI, dev shells, deployments | Multi | `flake.nix`, `default.nix` |
+
+---
+
+## CMake
+
+CMake is the dominant build system in Project Tick, used by seven sub-projects.
+
+### Minimum Versions
+
+| Component | CMake Minimum | C++ Standard | C Standard |
+|-----------|--------------|-------------|-----------|
+| meshmc | 3.28 | C++23 | C23 (C11 on MSVC) |
+| neozip | 3.14 | — | C11 |
+| cmark | 3.5 | — | C99 |
+| genqrcode | 3.5 | — | C99 |
+| json4cpp | 3.1 | C++11 | — |
+| libnbtplusplus | 3.15 | C++11 | — |
+| mnv | 3.10 | — | C11 |
+
+### MeshMC CMake Configuration
+
+MeshMC has the most sophisticated CMake setup in the monorepo, including:
+
+#### CMake Presets (`meshmc/CMakePresets.json`)
+
+All presets inherit from a hidden `base` preset:
+
+```json
+{
+ "name": "base",
+ "hidden": true,
+ "generator": "Ninja Multi-Config",
+ "binaryDir": "build",
+ "installDir": "install",
+ "cacheVariables": {
+ "ENABLE_LTO": "ON"
+ }
+}
+```
+
+Platform presets:
+
+| Preset | OS | Toolchain | vcpkg |
+|--------|-----|-----------|-------|
+| `linux` | Linux | System | No |
+| `macos` | macOS | System | Yes (`$VCPKG_ROOT`) |
+| `macos_universal` | macOS | Universal (x86_64+arm64) | Yes |
+| `windows_mingw` | Windows | MinGW | No |
+| `windows_msvc` | Windows | MSVC | Yes (`$VCPKG_ROOT`) |
+
+Usage:
+
+```bash
+# Configure
+cmake --preset linux
+
+# Build
+cmake --build --preset linux
+
+# Test (uses CTest)
+cd build && ctest --output-on-failure
+
+# Install
+cmake --install build --config Release --prefix install
+```
+
+#### CMake Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `ENABLE_LTO` | OFF | Link Time Optimization |
+| `MeshMC_DISABLE_JAVA_DOWNLOADER` | OFF | Disable Java auto-download |
+| `MeshMC_ENABLE_CLANG_TIDY` | OFF | Run clang-tidy during build |
+
+#### External Dependencies (find_package)
+
+```cmake
+find_package(Qt6 REQUIRED COMPONENTS
+ Core Widgets Concurrent Network NetworkAuth Test Xml
+)
+find_package(ECM NO_MODULE REQUIRED)
+find_package(LibArchive REQUIRED)
+```
+
+Additional Qt queries via `QMakeQuery`:
+- `QT_INSTALL_PLUGINS` → Plugin directory
+- `QT_INSTALL_LIBS` → Library directory
+- `QT_INSTALL_LIBEXECS` → Libexec directory
+
+#### Compiler Configuration
+
+```cmake
+set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_CXX_STANDARD_REQUIRED true)
+set(CMAKE_C_STANDARD 23) # C11 on MSVC
+set(CMAKE_AUTOMOC ON) # Qt meta-object compiler
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+```
+
+Compiler flags (GCC/Clang):
+```
+-Wall -pedantic -Wno-deprecated-declarations
+-fstack-protector-strong --param=ssp-buffer-size=4
+-O3 -D_FORTIFY_SOURCE=2
+-DQT_NO_DEPRECATED_WARNINGS=Y
+```
+
+MSVC flags:
+```
+/W4 /DQT_NO_DEPRECATED_WARNINGS=Y
+```
+
+macOS additionally:
+```
+-stdlib=libc++
+```
+
+#### LTO (Link Time Optimization)
+
+When `ENABLE_LTO` is ON, MeshMC uses `CheckIPOSupported`:
+
+```cmake
+include(CheckIPOSupported)
+check_ipo_supported(RESULT ipo_supported OUTPUT ipo_error)
+if(ipo_supported)
+ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
+ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_MINSIZEREL TRUE)
+ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELWITHDEBINFO TRUE)
+endif()
+```
+
+LTO is **not** enabled for Debug builds.
+
+#### Versioning
+
+```cmake
+set(MeshMC_VERSION_MAJOR 7)
+set(MeshMC_VERSION_MINOR 0)
+set(MeshMC_VERSION_HOTFIX 0)
+set(MeshMC_RELEASE_VERSION_NAME "7.0.0")
+```
+
+#### Build Targets
+
+The meshmc CMake tree produces:
+- Main executable (`meshmc`)
+- Libraries in `libraries/` subdirectory
+- Java JARs in `${PROJECT_BINARY_DIR}/jars`
+- Tests (via ECMAddTests when `BUILD_TESTING` is ON)
+
+### NeoZip CMake Configuration
+
+NeoZip supports both CMake and traditional `./configure`:
+
+```bash
+# CMake
+mkdir build && cd build
+cmake .. -G Ninja \
+ -DZLIB_COMPAT=ON \ # zlib-compatible API
+ -DWITH_GTEST=ON # Enable Google Test
+ninja
+ctest
+
+# Autotools
+./configure
+make -j$(nproc)
+make test
+```
+
+Key CMake variables:
+- `ZLIB_COMPAT` — Build with zlib-compatible API
+- `WITH_GTEST` — Build with Google Test
+- `WITH_BENCHMARKS` — Build benchmarks
+- Architecture-specific SIMD flags are auto-detected
+
+### cmark CMake Configuration
+
+```bash
+mkdir build && cd build
+cmake .. -G Ninja \
+ -DCMARK_TESTS=ON \
+ -DCMARK_SHARED=ON
+ninja
+ctest
+```
+
+### json4cpp CMake Configuration
+
+json4cpp supports CMake, Meson (via `meson.build`), and Bazel:
+
+```bash
+mkdir build && cd build
+cmake .. -G Ninja \
+ -DJSON_BuildTests=ON
+ninja
+ctest
+```
+
+The library is header-only; the CMake build is primarily for tests.
+
+### libnbt++ CMake Configuration
+
+```bash
+mkdir build && cd build
+cmake .. \
+ -DNBT_BUILD_SHARED=OFF \
+ -DNBT_USE_ZLIB=ON \
+ -DNBT_BUILD_TESTS=ON
+make -j$(nproc)
+ctest
+```
+
+### genqrcode CMake Configuration
+
+```bash
+mkdir build && cd build
+cmake .. -G Ninja
+ninja
+```
+
+Also supports Autotools:
+```bash
+./autogen.sh
+./configure
+make -j$(nproc)
+```
+
+---
+
+## Meson
+
+### tomlplusplus
+
+toml++ uses Meson as its primary build system:
+
+```bash
+meson setup build
+ninja -C build
+ninja -C build test
+```
+
+Meson options (from `meson_options.txt`):
+- Build mode (header-only vs compiled)
+- Test configuration
+- Example programs
+
+Also supports CMake as an alternative:
+
+```bash
+mkdir build && cd build
+cmake .. -G Ninja
+ninja
+ctest
+```
+
+---
+
+## GNU Make
+
+### cgit
+
+cgit uses a traditional `Makefile` that first builds Git as a dependency:
+
+```bash
+# Initialize Git submodule
+git submodule init
+git submodule update
+
+# Build (builds Git first, then cgit)
+make
+
+# Install (default: /var/www/htdocs/cgit)
+sudo make install
+```
+
+Build options:
+- `NO_LUA=1` — Build without Lua scripting support
+- `LUA_PKGCONFIG=lua5.1` — Specify Lua implementation
+- Custom paths via `cgit.conf`
+
+### corebinutils
+
+CoreBinUtils uses a `./configure` script that generates toolchain overrides,
+then builds with GNU Make:
+
+```bash
+./configure
+make -f GNUmakefile -j$(nproc) all
+make -f GNUmakefile test
+```
+
+The `configure` script:
+- Selects musl-gcc or musl-capable clang by preference
+- Falls back to system gcc/clang
+- Generates `config.mk` with `CC`, `AR`, `RANLIB`, `CPPFLAGS`, `CFLAGS`,
+ `LDFLAGS`
+
+Each subdirectory (e.g., `cat/`, `ls/`, `cp/`) has its own `GNUmakefile`
+that the top-level `GNUmakefile` orchestrates.
+
+---
+
+## Autotools
+
+### mnv
+
+MNV supports both CMake and traditional Autotools:
+
+```bash
+# Autotools (traditional)
+./configure --with-features=huge --enable-gui=auto
+make -j$(nproc)
+sudo make install
+
+# CMake (alternative)
+mkdir build && cd build
+cmake .. -G Ninja
+ninja
+```
+
+The Autotools build system supports extensive feature flags:
+- `--with-features={tiny,small,normal,big,huge}`
+- `--enable-gui={auto,no,gtk2,gtk3,motif,...}`
+- `--enable-python3interp`
+- `--enable-luainterp`
+- And many more
+
+### genqrcode
+
+GenQRCode uses Autotools:
+
+```bash
+./autogen.sh # Generate configure from configure.ac
+./configure # Configure
+make -j$(nproc) # Build
+make check # Run tests
+sudo make install # Install
+```
+
+### neozip
+
+NeoZip's `./configure` is a custom script (not GNU Autoconf):
+
+```bash
+./configure
+make -j$(nproc)
+make test
+sudo make install
+```
+
+---
+
+## Gradle
+
+### forgewrapper
+
+ForgeWrapper uses Gradle for Java builds:
+
+```bash
+# Build
+./gradlew build
+
+# Test
+./gradlew test
+
+# Clean
+./gradlew clean
+
+# Generate JAR
+./gradlew jar
+```
+
+Project structure:
+```
+forgewrapper/
+├── build.gradle # Build configuration
+├── gradle.properties # Version and settings
+├── settings.gradle # Project name and modules
+├── gradlew # Unix wrapper script
+├── gradlew.bat # Windows wrapper script
+├── gradle/ # Gradle wrapper JAR
+├── jigsaw/ # JPMS module configuration
+└── src/
+ └── main/java/ # Source code
+```
+
+The Gradle wrapper (`gradlew`) pins the Gradle version so no system-wide
+Gradle installation is needed.
+
+---
+
+## Cargo
+
+### tickborg
+
+The `ofborg/` directory contains a Cargo workspace:
+
+```toml
+[workspace]
+members = [
+ "tickborg",
+ "tickborg-simple-build"
+]
+resolver = "2"
+
+[profile.release]
+debug = true
+```
+
+#### Building
+
+```bash
+cd ofborg
+
+# Build all workspace crates
+cargo build
+
+# Build in release mode
+cargo build --release
+
+# Run tests
+cargo test
+
+# Run lints
+cargo clippy
+
+# Format
+cargo fmt
+
+# Build specific crate
+cargo build -p tickborg
+```
+
+#### Workspace Structure
+
+```
+ofborg/
+├── Cargo.toml # Workspace root
+├── Cargo.lock # Locked dependencies
+├── tickborg/ # Main CI bot crate
+│ ├── Cargo.toml
+│ └── src/
+└── tickborg-simple-build/ # Simplified build crate
+ ├── Cargo.toml
+ └── src/
+```
+
+The workspace uses `resolver = "2"` (Rust 2021 edition resolver) and enables
+debug symbols in release builds for profiling.
+
+---
+
+## Poetry
+
+### meta
+
+The `meta/` component uses Poetry for Python dependency management:
+
+```bash
+cd meta
+
+# Install dependencies
+poetry install
+
+# Run in Poetry environment
+poetry run generateMojang
+
+# Or activate shell
+poetry shell
+generateMojang
+```
+
+#### pyproject.toml
+
+```toml
+[tool.poetry]
+name = "meta"
+version = "0.0.5-1"
+description = "ProjT Launcher meta generator"
+license = "MS-PL"
+
+[tool.poetry.dependencies]
+python = ">=3.10,<4.0"
+cachecontrol = "^0.14.0"
+requests = "^2.31.0"
+filelock = "^3.20.3"
+packaging = "^25.0"
+pydantic = "^1.10.13"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+```
+
+#### CLI Entry Points
+
+Poetry scripts provide named commands:
+
+| Command | Function |
+|---------|----------|
+| `generateFabric` | `meta.run.generate_fabric:main` |
+| `generateForge` | `meta.run.generate_forge:main` |
+| `generateLiteloader` | `meta.run.generate_liteloader:main` |
+| `generateMojang` | `meta.run.generate_mojang:main` |
+| `generateNeoForge` | `meta.run.generate_neoforge:main` |
+| `generateQuilt` | `meta.run.generate_quilt:main` |
+| `generateJava` | `meta.run.generate_java:main` |
+| `updateFabric` | `meta.run.update_fabric:main` |
+| `updateForge` | `meta.run.update_forge:main` |
+| `updateLiteloader` | `meta.run.update_liteloader:main` |
+| `updateMojang` | `meta.run.update_mojang:main` |
+| `updateNeoForge` | `meta.run.update_neoforge:main` |
+| `updateQuilt` | `meta.run.update_quilt:main` |
+| `updateJava` | `meta.run.update_java:main` |
+| `index` | `meta.run.index:main` |
+
+---
+
+## Nix
+
+Nix is used across the monorepo for reproducible development environments,
+CI tooling, and deployment.
+
+### Top-Level Flake (`flake.nix`)
+
+```nix
+{
+ description = "Project Tick is a project dedicated to providing developers
+ with ease of use and users with long-lasting software.";
+
+ inputs = {
+ nixpkgs.url = "https://channels.nixos.org/nixos-unstable/nixexprs.tar.xz";
+ };
+}
+```
+
+Provides:
+- `devShells.default` — LLVM 22 toolchain with clang-tidy-diff
+- `formatter` — nixfmt-rfc-style
+- Systems: all `lib.systems.flakeExposed`
+
+The dev shell automatically runs `git submodule update --init --force` on
+entry.
+
+### CI Nix (`ci/default.nix`)
+
+The CI Nix expression provides:
+
+1. **treefmt** — Multi-language formatter:
+ - `actionlint` — GitHub Actions YAML lint
+ - `biome` — JavaScript (single quotes, no semicolons)
+ - `keep-sorted` — Sort annotated blocks
+ - `nixfmt` — Nix formatting (RFC style)
+ - `yamlfmt` — YAML (retain line breaks)
+ - `zizmor` — GitHub Actions security scanning
+
+2. **codeowners-validator** — Built from source with patches:
+ - `owners-file-name.patch`
+ - `permissions.patch`
+
+3. **Pinned Nixpkgs** — `ci/pinned.json` locks the Nixpkgs revision:
+ ```bash
+ # Update pinned revision
+ ./ci/update-pinned.sh
+ ```
+
+### Meta Nix (`meta/flake.nix`)
+
+The meta component provides a NixOS module for deployment:
+
+```nix
+services.blockgame-meta = {
+ enable = true;
+ settings = {
+ DEPLOY_TO_GIT = "true";
+ GIT_AUTHOR_NAME = "...";
+ GIT_AUTHOR_EMAIL = "...";
+ };
+};
+```
+
+### MeshMC Nix (`meshmc/flake.nix`)
+
+MeshMC provides its own Nix flake for building:
+
+```bash
+cd meshmc
+nix build
+```
+
+### Per-Project Nix Files
+
+Several sub-projects include `default.nix`, `shell.nix`, or `flake.nix` for
+Nix-based development:
+
+| Project | Nix Files |
+|---------|-----------|
+| meshmc | `flake.nix`, `default.nix`, `shell.nix` |
+| meta | `flake.nix` |
+| ofborg | `flake.nix`, `default.nix`, `shell.nix`, `service.nix` |
+| ci | `default.nix` |
+| ci/github-script | `shell.nix` |
+| cmark | `shell.nix` |
+
+---
+
+## Cross-Cutting Build Concerns
+
+### Compile Commands Database
+
+MeshMC generates `compile_commands.json` via:
+
+```cmake
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+```
+
+This file is used by clang-tidy, clangd, and other tools for accurate
+code analysis.
+
+### Testing Frameworks
+
+| Project | Test Framework | Runner |
+|---------|---------------|--------|
+| meshmc | Qt Test + CTest | `ctest` |
+| neozip | Google Test + CTest | `ctest` |
+| json4cpp | Catch2 + CTest | `ctest` |
+| tomlplusplus | Catch2 | `ninja test` |
+| libnbtplusplus | CTest | `ctest` |
+| cmark | Custom + CTest | `ctest` |
+| forgewrapper | JUnit + Gradle | `./gradlew test` |
+| tickborg | Rust built-in | `cargo test` |
+| corebinutils | Custom shell tests | `make test` |
+| mnv | Custom test framework | `make test` |
+
+### Parallel Build Support
+
+All build systems support parallel builds:
+
+```bash
+# CMake/Ninja
+cmake --build build -j$(nproc)
+
+# Make
+make -j$(nproc)
+
+# Cargo
+cargo build -j $(nproc)
+
+# Gradle
+./gradlew build --parallel
+```
+
+### Out-of-Source Build Enforcement
+
+MeshMC enforces out-of-source builds:
+
+```cmake
+string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BUILD_DIR}" IS_IN_SOURCE_BUILD)
+if(IS_IN_SOURCE_BUILD)
+ message(FATAL_ERROR "You are building MeshMC in-source.")
+endif()
+```
+
+### WSL Build Rejection
+
+MeshMC explicitly rejects builds on WSL (Windows Subsystem for Linux):
+
+```cmake
+if(CMAKE_HOST_SYSTEM_VERSION MATCHES ".*[Mm]icrosoft.*" OR
+ CMAKE_HOST_SYSTEM_VERSION MATCHES ".*WSL.*")
+ message(FATAL_ERROR "Building MeshMC is not supported in Linux-on-Windows distributions.")
+endif()
+```
+
+---
+
+## Build Quick Reference
+
+| Action | meshmc | neozip | cgit | toml++ | tickborg | meta | forgewrapper |
+|--------|--------|--------|------|--------|----------|------|-------------|
+| Configure | `cmake --preset linux` | `cmake -B build` | — | `meson setup build` | — | — | — |
+| Build | `cmake --build --preset linux` | `ninja -C build` | `make` | `ninja -C build` | `cargo build` | `poetry install` | `./gradlew build` |
+| Test | `ctest` | `ctest` | — | `ninja -C build test` | `cargo test` | — | `./gradlew test` |
+| Install | `cmake --install build` | `ninja -C build install` | `make install` | `ninja -C build install` | `cargo install` | — | — |
+| Clean | rm -rf build | rm -rf build | `make clean` | rm -rf build | `cargo clean` | — | `./gradlew clean` |
+| Format | `clang-format -i` | — | — | — | `cargo fmt` | — | — |
+| Lint | `clang-tidy` | — | — | — | `cargo clippy` | — | — |
diff --git a/docs/handbook/Project-Tick/ci-cd-pipeline.md b/docs/handbook/Project-Tick/ci-cd-pipeline.md
new file mode 100644
index 0000000000..78d36e97cb
--- /dev/null
+++ b/docs/handbook/Project-Tick/ci-cd-pipeline.md
@@ -0,0 +1,599 @@
+# Project Tick — CI/CD Pipeline
+
+## Overview
+
+Project Tick uses a multi-layered CI/CD pipeline that orchestrates builds,
+tests, security scans, and releases across all sub-projects in the monorepo.
+The pipeline combines GitHub Actions, Nix-based tooling, and the custom
+tickborg distributed CI system.
+
+---
+
+## Architecture
+
+### Three-Layer CI Strategy
+
+```
+Layer 1: GitHub Actions (ci.yml orchestrator)
+ ├── Event classification and change detection
+ ├── Per-project workflow dispatch
+ └── Release and publishing workflows
+
+Layer 2: CI Tooling (ci/ directory)
+ ├── treefmt (multi-language formatting)
+ ├── codeowners-validator
+ ├── commit linting (Conventional Commits)
+ └── Pinned Nix environment
+
+Layer 3: tickborg (ofborg/ distributed CI)
+ ├── RabbitMQ-based job distribution
+ ├── Multi-platform build execution
+ └── GitHub check run reporting
+```
+
+---
+
+## GitHub Actions — The Orchestrator
+
+### ci.yml — Monolithic Gate
+
+The primary CI workflow (`ci.yml`) is the single entry point for all CI
+activity. Every push, pull request, merge queue entry, tag push, and manual
+dispatch flows through this workflow.
+
+#### Trigger Events
+
+```yaml
+on:
+ push:
+ branches: ["**"]
+ tags: ["*"]
+ pull_request:
+ types: [opened, synchronize, reopened, ready_for_review]
+ pull_request_target:
+ types: [closed, labeled]
+ merge_group:
+ types: [checks_requested]
+ workflow_dispatch:
+ inputs:
+ force-all:
+ description: "Force run all project CI pipelines"
+ type: boolean
+ default: false
+ build-type:
+ description: "Build configuration for meshmc/forgewrapper"
+ type: choice
+ options: [Debug, Release]
+ default: Debug
+```
+
+#### Permissions
+
+The orchestrator runs with minimal permissions:
+
+```yaml
+permissions:
+ contents: read
+```
+
+#### Concurrency Control
+
+```yaml
+concurrency:
+ group: >-
+ ci-${{
+ github.event_name == 'merge_group' && github.event.merge_group.head_ref ||
+ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) ||
+ github.ref
+ }}
+ cancel-in-progress: ${{ github.event_name != 'merge_group' }}
+```
+
+| Event | Concurrency Group | Cancel In-Progress |
+|-------|-------------------|--------------------|
+| Merge queue | `ci-<head_ref>` | No |
+| Pull request | `ci-pr-<number>` | Yes |
+| Push | `ci-<ref>` | Yes |
+
+Merge queue runs are never cancelled to maintain queue integrity.
+
+#### Stage 0: Gate & Triage
+
+The `gate` job is the first job that runs. It:
+
+1. **Classifies the event:** push, PR, merge queue, tag, backport, dependabot,
+ scheduled, etc.
+2. **Detects changed files:** Maps file paths to sub-project flags
+3. **Sets run level:** `minimal`, `standard`, or `full`
+4. **Exports output variables** for downstream jobs:
+ - Event classification flags (`is_push`, `is_pr`, `is_merge_queue`, etc.)
+ - Per-project change flags (`meshmc_changed`, `neozip_changed`, etc.)
+ - Run level for downstream decisions
+
+Draft PRs are automatically skipped:
+```yaml
+if: >-
+ !(github.event_name == 'pull_request' && github.event.pull_request.draft)
+```
+
+### ci-lint.yml — Lint & Checks
+
+Called from `ci.yml` before builds start. Runs commit message validation and
+formatting checks.
+
+#### Commit Message Linting
+
+Uses `ci/github-script/lint-commits.js` via `actions/github-script`:
+
+```yaml
+- name: Lint commit messages
+ uses: actions/github-script@v7
+```
+
+The linter validates Conventional Commits format:
+```
+type(scope): description
+```
+
+Valid types: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`,
+`build`, `ci`, `chore`, `revert`.
+
+#### Security Hardening
+
+All CI jobs use `step-security/harden-runner`:
+
+```yaml
+- name: Harden runner
+ uses: step-security/harden-runner@v2
+ with:
+ egress-policy: audit
+```
+
+---
+
+## Workflow Inventory
+
+### Per-Project CI Workflows
+
+Each sub-project has dedicated CI workflows that build, test, and analyze
+the component:
+
+#### MeshMC (7 workflows)
+
+| Workflow | Purpose |
+|----------|---------|
+| `meshmc-build.yml` | Multi-platform build matrix |
+| `meshmc-codeql.yml` | CodeQL security analysis |
+| `meshmc-container.yml` | Container (Docker/Podman) build |
+| `meshmc-nix.yml` | Nix build verification |
+| `meshmc-backport.yml` | Automated backport PR creation |
+| `meshmc-blocked-prs.yml` | Track and manage blocked PRs |
+| `meshmc-merge-blocking-pr.yml` | Merge queue blocking logic |
+| `meshmc-flake-update.yml` | Automated Nix flake update |
+
+#### NeoZip (12 workflows)
+
+| Workflow | Purpose |
+|----------|---------|
+| `neozip-ci.yml` | Primary CI |
+| `neozip-cmake.yml` | CMake build matrix |
+| `neozip-configure.yml` | Autotools (configure) build |
+| `neozip-analyze.yml` | Static analysis |
+| `neozip-codeql.yml` | CodeQL security scanning |
+| `neozip-fuzz.yml` | Fuzz testing |
+| `neozip-lint.yml` | Code linting |
+| `neozip-libpng.yml` | libpng integration test |
+| `neozip-link.yml` | Link validation |
+| `neozip-osb.yml` | OpenSSF Scorecard |
+| `neozip-pigz.yml` | pigz compatibility test |
+| `neozip-pkgcheck.yml` | Package check |
+| `neozip-release.yml` | Release workflow |
+
+#### json4cpp (7 workflows)
+
+| Workflow | Purpose |
+|----------|---------|
+| `json4cpp-ci.yml` | Primary CI |
+| `json4cpp-fuzz.yml` | Fuzz testing |
+| `json4cpp-amalgam.yml` | Amalgamation (single-header) build |
+| `json4cpp-amalgam-comment.yml` | Amalgamation PR comment |
+| `json4cpp-flawfinder.yml` | Flawfinder static analysis |
+| `json4cpp-semgrep.yml` | Semgrep security scanning |
+| `json4cpp-publish-docs.yml` | Documentation publishing |
+
+#### Other Sub-Projects
+
+| Workflow | Sub-Project | Purpose |
+|----------|------------|---------|
+| `cmark-ci.yml` | cmark | Build and test |
+| `cmark-fuzz.yml` | cmark | Fuzz testing |
+| `tomlplusplus-ci.yml` | tomlplusplus | Build and test |
+| `tomlplusplus-fuzz.yml` | tomlplusplus | Fuzz testing |
+| `tomlplusplus-gh-pages.yml` | tomlplusplus | Documentation deployment |
+| `mnv-ci.yml` | mnv | Build and test |
+| `mnv-codeql.yml` | mnv | CodeQL analysis |
+| `mnv-coverity.yml` | mnv | Coverity scan |
+| `mnv-link-check.yml` | mnv | Documentation link check |
+| `cgit-ci.yml` | cgit | Build and test |
+| `corebinutils-ci.yml` | corebinutils | Build and test |
+| `forgewrapper-build.yml` | forgewrapper | Gradle build |
+| `libnbtplusplus-ci.yml` | libnbtplusplus | Build and test |
+| `genqrcode-ci.yml` | genqrcode | Build and test |
+| `images4docker-build.yml` | images4docker | Docker image build |
+
+### Release & Publishing Workflows
+
+| Workflow | Purpose |
+|----------|---------|
+| `meshmc-release.yml` | Create MeshMC releases |
+| `meshmc-publish.yml` | Publish MeshMC artifacts |
+| `neozip-release.yml` | Create NeoZip releases |
+
+### Repository Maintenance Workflows
+
+| Workflow | Purpose |
+|----------|---------|
+| `repo-dependency-review.yml` | Scan dependency changes for vulnerabilities |
+| `repo-labeler.yml` | Auto-label PRs by changed paths |
+| `repo-scorecards.yml` | OpenSSF Scorecard compliance tracking |
+| `repo-stale.yml` | Mark and close stale issues/PRs |
+
+---
+
+## Change Detection
+
+The CI orchestrator maps changed file paths to sub-project flags:
+
+| Path Pattern | Flag | Sub-Project |
+|-------------|------|-------------|
+| `meshmc/**` | `meshmc_changed` | MeshMC |
+| `neozip/**` | `neozip_changed` | NeoZip |
+| `json4cpp/**` | `json4cpp_changed` | json4cpp |
+| `tomlplusplus/**` | `tomlplusplus_changed` | tomlplusplus |
+| `libnbtplusplus/**` | `libnbt_changed` | libnbt++ |
+| `cmark/**` | `cmark_changed` | cmark |
+| `genqrcode/**` | `genqrcode_changed` | genqrcode |
+| `forgewrapper/**` | `forgewrapper_changed` | ForgeWrapper |
+| `cgit/**` | `cgit_changed` | cgit |
+| `corebinutils/**` | `corebinutils_changed` | CoreBinUtils |
+| `mnv/**` | `mnv_changed` | MNV |
+| `ofborg/**` | `ofborg_changed` | tickborg |
+| `meta/**` | `meta_changed` | Meta |
+| `images4docker/**` | `docker_changed` | Images4Docker |
+| `ci/**` | `ci_changed` | CI tooling |
+| `archived/**` | `archived_changed` | Archived |
+
+### Force-All Mode
+
+All projects are built when:
+- `force-all` is set to `true` in a manual dispatch
+- The event is a merge queue entry (`is_merge_queue`)
+
+---
+
+## tickborg — Distributed CI
+
+tickborg is a RabbitMQ-based distributed CI system adapted from NixOS's
+ofborg. It runs alongside GitHub Actions to provide:
+
+### Capabilities
+
+1. **Automatic change detection** — Detects changed sub-projects in PRs based
+ on file paths and commit scopes
+2. **Native build system execution** — Builds each project using its own build
+ system (CMake, Meson, Make, Cargo, Gradle, Autotools)
+3. **Multi-platform support** — Builds on 7 platform/architecture combinations
+4. **GitHub integration** — Posts results as check runs and PR comments
+
+### Platform Matrix
+
+| Platform | Runner | Architecture |
+|----------|--------|-------------|
+| `x86_64-linux` | `ubuntu-latest` | x86_64 |
+| `aarch64-linux` | `ubuntu-24.04-arm` | ARM64 |
+| `x86_64-darwin` | `macos-15` | x86_64 |
+| `aarch64-darwin` | `macos-15` | Apple Silicon |
+| `x86_64-windows` | `windows-2025` | x86_64 |
+| `aarch64-windows` | `windows-2025` | ARM64 |
+| `x86_64-freebsd` | `ubuntu-latest` (VM) | x86_64 |
+
+### Bot Commands
+
+tickborg responds to `@tickbot` commands in PR comments:
+
+```
+@tickbot build meshmc neozip cmark # Build specific projects
+@tickbot test meshmc # Run tests for a project
+@tickbot eval # Full evaluation (detect + label)
+```
+
+### WIP Suppression
+
+PRs with titles starting with `WIP:` or containing `[WIP]` suppress
+automatic builds.
+
+### Commit-Based Triggers
+
+tickborg reads Conventional Commits scopes to determine builds:
+
+| Commit Message | Triggered Build |
+|---------------|-----------------|
+| `feat(meshmc): add chunk loading` | meshmc |
+| `fix(neozip): handle empty archives` | neozip |
+| `cmark: fix buffer overflow` | cmark |
+| `chore(ci): update workflow` | (no build) |
+
+---
+
+## CI Tooling (ci/ Directory)
+
+### Directory Structure
+
+```
+ci/
+├── OWNERS # Code ownership
+├── README.md # CI documentation
+├── default.nix # Nix CI entry point
+├── pinned.json # Pinned Nixpkgs revision
+├── update-pinned.sh # Update pinned.json
+├── supportedBranches.js # Branch classification
+├── codeowners-validator/
+│ ├── default.nix # Build codeowners-validator
+│ ├── owners-file-name.patch # Patch for file naming
+│ └── permissions.patch # Patch for permissions
+└── github-script/
+ ├── run # CLI entry (local testing)
+ ├── lint-commits.js # Conventional Commits linter
+ ├── prepare.js # PR preparation/validation
+ ├── reviews.js # Review state management
+ ├── get-pr-commit-details.js # Extract PR commit info
+ ├── withRateLimit.js # GitHub API rate limiting
+ ├── package.json # npm dependencies
+ └── shell.nix # Nix development shell
+```
+
+### treefmt Configuration
+
+The CI `default.nix` configures treefmt with these formatters:
+
+| Formatter | Language/Format | Settings |
+|-----------|----------------|----------|
+| `actionlint` | GitHub Actions YAML | Default |
+| `biome` | JavaScript/TypeScript | Single quotes, no semicolons, editorconfig |
+| `keep-sorted` | Any (annotated blocks) | Default |
+| `nixfmt` | Nix | RFC style |
+| `yamlfmt` | YAML | Retain line breaks |
+| `zizmor` | GitHub Actions security | Default |
+
+Files matching `*.min.js` are excluded from biome formatting.
+
+### Pinned Nixpkgs
+
+`ci/pinned.json` contains content-addressed references to:
+- `nixpkgs` — The Nixpkgs revision used for CI tools
+- `treefmt-nix` — The treefmt-nix revision
+
+Updated via:
+```bash
+./ci/update-pinned.sh
+```
+
+### Local CI Testing
+
+CI scripts can be tested locally:
+
+```bash
+cd ci/github-script
+nix-shell # or: nix develop
+gh auth login
+./run lint-commits <owner> <repo> <pr-number>
+./run prepare <owner> <repo> <pr-number>
+```
+
+---
+
+## Docker Images (images4docker)
+
+### Purpose
+
+The `images4docker/` directory provides 40 Dockerfiles for building MeshMC
+across different Linux distributions and versions. Each image includes the
+Qt 6 toolchain and all MeshMC build dependencies.
+
+### Image Registry
+
+Images are published to:
+```
+ghcr.io/project-tick-infra/images/<target_name>:<target_tag>
+```
+
+### Build Schedule
+
+The `images4docker-build.yml` workflow runs:
+- On push to `main` (when Dockerfiles, workflow, or README change)
+- On a daily schedule at **03:17 UTC**
+
+Currently 35 targets are actively built (Qt6-compatible set).
+
+### Supported Package Managers
+
+| Package Manager | Distributions |
+|----------------|---------------|
+| `apt` | Debian, Ubuntu |
+| `dnf` | Fedora, RHEL, CentOS |
+| `apk` | Alpine |
+| `zypper` | openSUSE, SLES |
+| `yum` | Older CentOS/RHEL |
+| `pacman` | Arch Linux |
+| `xbps` | Void Linux |
+| `nix` | NixOS |
+| `emerge` | Gentoo |
+
+### Qt 6 Requirement
+
+Qt 6 is **mandatory** for all images. If Qt 6 packages are unavailable on a
+given distribution, the Docker build fails intentionally — there is no Qt 5
+fallback. This ensures all CI builds use a consistent Qt version.
+
+---
+
+## Security Scanning
+
+### CodeQL
+
+CodeQL analysis runs for security-critical components:
+
+| Component | Schedule | Languages |
+|-----------|----------|-----------|
+| meshmc | Per-PR, scheduled | C++ |
+| mnv | Per-PR, scheduled | C |
+| neozip | Per-PR, scheduled | C |
+
+### Fuzz Testing
+
+Continuous fuzz testing for parser and compression libraries:
+
+| Component | Infrastructure | Workflow |
+|-----------|---------------|----------|
+| neozip | OSS-Fuzz + custom | `neozip-fuzz.yml` |
+| json4cpp | OSS-Fuzz + custom | `json4cpp-fuzz.yml` |
+| cmark | Custom fuzzers | `cmark-fuzz.yml` |
+| tomlplusplus | Custom fuzzers | `tomlplusplus-fuzz.yml` |
+
+### Static Analysis
+
+| Tool | Component | Workflow |
+|------|-----------|----------|
+| Semgrep | json4cpp | `json4cpp-semgrep.yml` |
+| Flawfinder | json4cpp | `json4cpp-flawfinder.yml` |
+| Coverity | mnv | `mnv-coverity.yml` |
+| clang-tidy | meshmc | Via `MeshMC_ENABLE_CLANG_TIDY` |
+
+### Dependency Review
+
+`repo-dependency-review.yml` scans dependency changes in PRs for known
+vulnerabilities using GitHub's dependency review action.
+
+### OpenSSF Scorecard
+
+`repo-scorecards.yml` tracks the project's OpenSSF Scorecard score, measuring
+security practices across dimensions like branch protection, dependency
+updates, fuzzing, and signed releases.
+
+---
+
+## Release Pipeline
+
+### MeshMC Releases
+
+1. A release tag is pushed (e.g., `7.0.0`)
+2. `ci.yml` detects `is_release_tag` and dispatches release workflows
+3. `meshmc-release.yml`:
+ - Builds release binaries for all platforms
+ - Creates GitHub release with changelog
+ - Uploads platform-specific artifacts
+4. `meshmc-publish.yml`:
+ - Publishes artifacts to distribution channels
+
+### NeoZip Releases
+
+Similar tag-triggered flow via `neozip-release.yml`.
+
+### Documentation Deployment
+
+- `tomlplusplus-gh-pages.yml` — Deploys toml++ documentation to GitHub Pages
+- `json4cpp-publish-docs.yml` — Publishes json4cpp API documentation
+
+---
+
+## Branch Classification
+
+The `ci/supportedBranches.js` module classifies branches for CI decisions:
+
+```javascript
+const typeConfig = {
+ master: ['development', 'primary'],
+ release: ['development', 'primary'],
+ staging: ['development', 'secondary'],
+ 'staging-next': ['development', 'secondary'],
+ feature: ['wip'],
+ fix: ['wip'],
+ backport: ['wip'],
+ revert: ['wip'],
+ wip: ['wip'],
+ dependabot: ['wip'],
+}
+```
+
+Branch ordering (for base branch detection):
+```javascript
+const orderConfig = {
+ master: 0, // Highest priority
+ release: 1,
+ staging: 2,
+ 'staging-next': 3,
+}
+```
+
+The `classify()` function parses branch names to extract:
+- `prefix` — Branch type prefix
+- `version` — Optional version number (e.g., `7.0`)
+- `stable` — Whether the branch has a version (release branch)
+- `type` — Classification from `typeConfig`
+- `order` — Priority for base branch detection
+
+---
+
+## DCO Enforcement
+
+The `.github/dco.yml` configuration:
+
+```yaml
+allowRemediationCommits:
+ individual: false
+```
+
+This means:
+- Every commit must have a `Signed-off-by` tag
+- Remediation commits (adding sign-off after the fact) are **not** allowed
+- Contributors must either sign off each commit individually or use
+ `git rebase --signoff` to retroactively sign all commits
+
+---
+
+## Environment Variables
+
+### Shared CI Environment
+
+```yaml
+env:
+ CI: true
+ FORCE_ALL: ${{ github.event.inputs.force-all == 'true' || github.event_name == 'merge_group' }}
+```
+
+### Per-Workflow Variables
+
+Individual workflows may set additional variables specific to their build
+systems (CMake flags, Cargo features, Gradle properties, etc.).
+
+---
+
+## Monitoring and Diagnostics
+
+### CI Health Indicators
+
+| Indicator | Source |
+|-----------|--------|
+| Build status badge | `meshmc-build.yml` badge in README |
+| OpenSSF Scorecard | `repo-scorecards.yml` |
+| Code coverage | Per-project coverage workflows |
+| Dependency freshness | Dependabot/Renovate alerts |
+| Stale issue count | `repo-stale.yml` |
+
+### Debugging Failed Builds
+
+1. Check the GitHub Actions run log
+2. Identify which job failed (gate, lint, or per-project build)
+3. For commit lint failures: fix commit message format
+4. For build failures: reproduce locally using the same build system
+5. For formatting failures: run treefmt locally via `nix develop -f ci/`
diff --git a/docs/handbook/Project-Tick/coding-standards.md b/docs/handbook/Project-Tick/coding-standards.md
new file mode 100644
index 0000000000..581e9b5cca
--- /dev/null
+++ b/docs/handbook/Project-Tick/coding-standards.md
@@ -0,0 +1,558 @@
+# Project Tick — Coding Standards
+
+## Overview
+
+Project Tick spans multiple programming languages across its sub-projects.
+This document defines the coding standards for each language used in the
+monorepo. While each sub-project follows conventions appropriate to its
+upstream lineage, these standards provide the organizational baseline.
+
+---
+
+## C Style (neozip, cmark, genqrcode, cgit, corebinutils, mnv)
+
+### General Principles
+
+- Follow the existing style of the upstream codebase you are modifying
+- Use C11 as the minimum standard (C23 preferred for new code in meshmc)
+- Keep functions short and focused
+- Prefer stack allocation over heap allocation where possible
+
+### Formatting
+
+- **Indentation:** 4 spaces for neozip, cmark, genqrcode; tabs for cgit, mnv
+- **Line length:** 80 characters preferred, 120 maximum
+- **Braces:** K&R style (opening brace on same line for functions in some
+ components, next line in others — follow the file you are editing)
+- **Spaces:** Space after keywords (`if`, `for`, `while`, `switch`), no space
+ before parentheses in function calls
+
+### Naming
+
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Functions | `snake_case` | `compress_block()` |
+| Variables | `snake_case` | `block_size` |
+| Constants | `UPPER_SNAKE_CASE` | `MAX_BUFFER_SIZE` |
+| Macros | `UPPER_SNAKE_CASE` | `ZLIB_VERSION` |
+| Types (struct/enum) | `snake_case` or `PascalCase` (follow upstream) | `z_stream`, `inflate_state` |
+| Struct members | `snake_case` | `total_out` |
+
+### Memory Management
+
+- Always check `malloc`/`calloc`/`realloc` return values
+- Free resources in reverse order of allocation
+- Use `sizeof(*ptr)` instead of `sizeof(type)` for allocation
+- Avoid variable-length arrays (VLAs) — use heap allocation instead
+
+### Error Handling
+
+- Return error codes (negative values for errors, zero for success)
+- Document error codes in function headers
+- Avoid `goto` except for error-cleanup patterns
+
+### neozip-Specific
+
+NeoZip follows zlib-ng conventions:
+- Maintain zlib API compatibility
+- Use SIMD intrinsics via architecture-specific files in `arch/`
+- Guard intrinsics with appropriate CPU feature checks
+- Use `zng_` prefixed names for native API functions
+
+### corebinutils-Specific
+
+CoreBinUtils follows FreeBSD kernel style:
+- Tabs for indentation
+- BSD `err()` / `warn()` family for error reporting
+- `POSIX.1-2008` compliance where feasible
+- musl-compatible system calls
+
+### cgit-Specific
+
+cgit follows its own established style:
+- Tabs for indentation
+- Functions prefixed with module name (e.g., `ui_log_`, `cache_`)
+- HTML output via `html()`, `htmlf()`, `html_attr()` functions
+
+### mnv-Specific
+
+MNV follows Vim coding conventions:
+- Tabs for indentation
+- VimScript naming conventions for script functions
+- `FEAT_*` macros for feature gating
+- Descriptive function names with module prefixes
+
+---
+
+## C++ Style (meshmc, json4cpp, tomlplusplus, libnbtplusplus)
+
+### General Principles
+
+- Use modern C++ idioms (RAII, smart pointers, range-based for, etc.)
+- Prefer `std::string_view` over `const char*` for read-only strings
+- Prefer `std::optional` over sentinel values
+- Avoid raw `new`/`delete` — use smart pointers or containers
+- Use `auto` judiciously — prefer explicit types for public APIs
+
+### C++ Standard by Component
+
+| Component | Standard | Reason |
+|-----------|----------|--------|
+| meshmc | C++23 | Active development, modern compiler requirement |
+| json4cpp | C++11 (minimum), C++17 (full features) | Wide compatibility |
+| tomlplusplus | C++17 | Modern features, wide support |
+| libnbtplusplus | C++11 | Compatibility with older compilers |
+
+### Formatting (meshmc)
+
+MeshMC uses `clang-format` for automated formatting. The `.clang-format`
+file at `meshmc/.clang-format` defines the canonical style. Key settings:
+
+- **Indentation:** 4 spaces
+- **Line length:** 120 characters
+- **Braces:** Allman style (braces on their own lines) for functions;
+ attached for control flow
+- **Pointer alignment:** Left-aligned (`int* ptr`, not `int *ptr`)
+- **Include ordering:** Sorted, grouped by category
+
+Always run clang-format before committing:
+
+```bash
+clang-format -i path/to/file.cpp
+```
+
+### Naming (meshmc)
+
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Classes | `PascalCase` | `InstanceList` |
+| Methods | `camelCase` | `loadInstance()` |
+| Member variables | `m_camelCase` | `m_instanceList` |
+| Static members | `s_camelCase` | `s_instance` |
+| Local variables | `camelCase` | `blockSize` |
+| Constants | `UPPER_SNAKE_CASE` or `k_PascalCase` | `MAX_RETRIES` |
+| Namespaces | `PascalCase` or `lowercase` | `Application`, `net` |
+| Template params | `PascalCase` | `typename ValueType` |
+| Enum values | `PascalCase` | `LoadState::Ready` |
+| Files | `PascalCase` | `InstanceList.cpp`, `InstanceList.h` |
+
+### Headers
+
+- Use `#pragma once` instead of include guards
+- Include what you use (IWYU principle)
+- Forward-declare when possible to reduce compile times
+- Order includes: own header first, project headers, third-party, standard
+
+```cpp
+#include "InstanceList.h" // Own header
+
+#include "Application.h" // Project headers
+#include "FileSystem.h"
+
+#include <nlohmann/json.hpp> // Third-party
+#include <toml++/toml.hpp>
+
+#include <memory> // Standard library
+#include <string>
+#include <vector>
+```
+
+### Qt Conventions (meshmc)
+
+- Use Qt container types only when interfacing with Qt APIs
+- Prefer `std::` containers for internal logic
+- Use `Q_OBJECT` macro for all QObject subclasses
+- Use signal/slot connections with lambda syntax
+- Use `QStringLiteral()` for string literals in Qt contexts
+- Follow Qt naming: signals as verbs (`clicked()`), slots as verb-phrases
+ (`handleClicked()`)
+
+### Error Handling (meshmc)
+
+- Use exceptions for truly exceptional conditions
+- Use `std::optional` for expected absence of values
+- Use result types for operations that can fail in expected ways
+- Log errors with Qt's logging categories (`QLoggingCategory`)
+
+### Smart Pointer Usage
+
+```cpp
+// Owned heap objects
+std::unique_ptr<Instance> instance = std::make_unique<Instance>();
+
+// Shared ownership
+std::shared_ptr<Settings> settings = std::make_shared<Settings>();
+
+// Non-owning observation (prefer raw pointer or reference)
+Instance* observer = instance.get();
+
+// Qt parent-child ownership
+auto* widget = new QWidget(parent); // Qt manages lifetime
+```
+
+### json4cpp-Specific
+
+json4cpp follows nlohmann/json conventions:
+- Header-only library
+- Heavy template metaprogramming
+- ADL-based serialization (`to_json`/`from_json`)
+- Namespace: `nlohmann`
+
+### tomlplusplus-Specific
+
+toml++ follows its own established conventions:
+- Header-only by default
+- Namespace: `toml`
+- Works without RTTI or exceptions
+- C++17 with optional C++20 features
+
+### libnbtplusplus-Specific
+
+libnbt++ uses C++11:
+- Tag types as class hierarchy (`nbt::tag_compound`, `nbt::tag_list`, etc.)
+- Stream-based I/O
+- Namespace: `nbt`
+
+---
+
+## Rust Style (tickborg)
+
+### General Principles
+
+- Follow the [Rust API Guidelines](https://rust-lang.github.io/api-guidelines/)
+- Use `rustfmt` for formatting (default configuration)
+- Use `clippy` for linting
+- Handle errors with `Result<T, E>` — avoid `unwrap()` in library code
+
+### Naming
+
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Crates | `snake_case` | `tickborg` |
+| Modules | `snake_case` | `build_runner` |
+| Types | `PascalCase` | `BuildResult` |
+| Functions | `snake_case` | `run_build()` |
+| Constants | `UPPER_SNAKE_CASE` | `MAX_RETRIES` |
+| Traits | `PascalCase` | `Buildable` |
+| Enum variants | `PascalCase` | `Status::Success` |
+
+### Cargo Workspace
+
+The tickborg Cargo workspace uses `resolver = "2"` with two crates:
+
+```toml
+[workspace]
+members = [
+ "tickborg",
+ "tickborg-simple-build"
+]
+resolver = "2"
+
+[profile.release]
+debug = true # Debug info in release builds
+```
+
+### Error Handling
+
+```rust
+// Use anyhow for application errors
+use anyhow::{Context, Result};
+
+fn process_pr(pr: &PullRequest) -> Result<BuildResult> {
+ let changes = detect_changes(pr)
+ .context("failed to detect changed projects")?;
+ // ...
+}
+
+// Use thiserror for library errors
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+enum BuildError {
+ #[error("build failed for {project}: {reason}")]
+ BuildFailed { project: String, reason: String },
+ #[error("project not found: {0}")]
+ ProjectNotFound(String),
+}
+```
+
+---
+
+## Java Style (forgewrapper)
+
+### General Principles
+
+- Follow standard Java conventions (Oracle / Google style)
+- Target JDK 17 as the minimum
+- Use JPMS (Java Platform Module System) where applicable
+
+### Naming
+
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Packages | `lowercase.dotted` | `io.github.zekerzhayard.forgewrapper` |
+| Classes | `PascalCase` | `ForgeWrapper` |
+| Interfaces | `PascalCase` (often prefixed with `I`) | `IFileDetector` |
+| Methods | `camelCase` | `detectFiles()` |
+| Constants | `UPPER_SNAKE_CASE` | `FORGE_VERSION` |
+| Local variables | `camelCase` | `installerPath` |
+
+### Gradle Build
+
+ForgeWrapper uses Gradle with the wrapper script (`gradlew`/`gradlew.bat`):
+
+```bash
+./gradlew build # Build
+./gradlew test # Test
+./gradlew clean # Clean
+```
+
+### Service Provider Interface
+
+ForgeWrapper uses Java SPI for extension:
+
+```java
+// Service interface
+public interface IFileDetector {
+ // Custom file detection logic
+}
+
+// Registration via META-INF/services/
+// META-INF/services/io.github.zekerzhayard.forgewrapper.installer.detector.IFileDetector
+```
+
+---
+
+## Python Style (meta)
+
+### General Principles
+
+- Follow [PEP 8](https://peps.python.org/pep-0008/)
+- Target Python 3.10+ (as specified in `pyproject.toml`)
+- Use type hints for function signatures
+- Use Poetry for dependency management
+
+### Naming
+
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Modules | `snake_case` | `generate_forge.py` |
+| Functions | `snake_case` | `update_mojang()` |
+| Classes | `PascalCase` | `VersionList` |
+| Constants | `UPPER_SNAKE_CASE` | `DEPLOY_TO_GIT` |
+| Variables | `snake_case` | `version_data` |
+
+### Dependencies
+
+From `pyproject.toml`:
+
+```toml
+[tool.poetry.dependencies]
+python = ">=3.10,<4.0"
+cachecontrol = "^0.14.0"
+requests = "^2.31.0"
+filelock = "^3.20.3"
+packaging = "^25.0"
+pydantic = "^1.10.13"
+```
+
+### CLI Entry Points
+
+Meta provides Poetry scripts for each operation:
+
+```bash
+poetry run generateFabric
+poetry run generateForge
+poetry run generateNeoForge
+poetry run generateQuilt
+poetry run generateMojang
+poetry run generateJava
+poetry run updateFabric
+poetry run updateForge
+# ... etc.
+```
+
+---
+
+## Shell Script Style (bootstrap.sh, hooks, CI scripts)
+
+### General Principles
+
+- Use Bash for complex scripts, POSIX sh for simple ones
+- Start with `#!/usr/bin/env bash`
+- Enable strict mode: `set -euo pipefail`
+- Use `shellcheck` for linting
+
+### Naming
+
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Functions | `snake_case` | `detect_distro()` |
+| Local variables | `snake_case` | `distro_id` |
+| Environment variables | `UPPER_SNAKE_CASE` | `DISTRO_ID` |
+| Constants | `UPPER_SNAKE_CASE` | `RED`, `GREEN`, `NC` |
+
+### Error Handling
+
+Use colored output functions as defined in `bootstrap.sh`:
+
+```bash
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+info() { printf "${CYAN}[INFO]${NC} %s\n" "$*"; }
+ok() { printf "${GREEN}[ OK ]${NC} %s\n" "$*"; }
+warn() { printf "${YELLOW}[WARN]${NC} %s\n" "$*"; }
+err() { printf "${RED}[ERR]${NC} %s\n" "$*" >&2; }
+```
+
+### Best Practices
+
+- Quote all variable expansions: `"$var"` not `$var`
+- Use `[[ ]]` for conditional tests (Bash)
+- Use `$()` for command substitution (not backticks)
+- Use `local` for function-scoped variables
+- Check command existence with `command -v`
+
+---
+
+## JavaScript / Node.js Style (CI scripts)
+
+### General Principles
+
+The CI scripts in `ci/github-script/` follow the formatting enforced by
+`biome` (configured in `ci/default.nix`):
+
+- **Quotes:** Single quotes
+- **Semicolons:** None (ASI)
+- **Indentation:** 2 spaces
+
+### Naming
+
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Functions | `camelCase` | `lintCommits()` |
+| Variables | `camelCase` | `prNumber` |
+| Constants | `UPPER_SNAKE_CASE` or `camelCase` | `MAX_RETRIES` |
+| Files | `kebab-case` | `lint-commits.js` |
+| Modules | `camelCase` exports | `module.exports = { classify }` |
+
+---
+
+## Nix Style (flake.nix, ci/default.nix, deployment modules)
+
+### General Principles
+
+- Format with `nixfmt` (RFC style, as configured in CI)
+- Use `let ... in` for local bindings
+- Prefer attribute sets over positional arguments
+- Pin all inputs with content hashes
+
+### Naming
+
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Attributes | `camelCase` | `devShells`, `nixpkgsFor` |
+| Packages | `kebab-case` | `clang-tidy-diff` |
+| Variables | `camelCase` | `forAllSystems` |
+| Functions | `camelCase` | `mkShell` |
+
+---
+
+## CMake Style (meshmc, libraries)
+
+### General Principles
+
+- Minimum CMake version: 3.28 (meshmc), 3.15 (libnbtplusplus)
+- Use `target_*` commands instead of directory-level `include_directories()`
+- Export compile commands: `CMAKE_EXPORT_COMPILE_COMMANDS ON`
+- Use `find_package()` for external dependencies
+
+### Naming
+
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Variables | `PascalCase` or `UPPER_CASE` | `MeshMC_VERSION_MAJOR` |
+| Options | `PascalCase` | `ENABLE_LTO`, `NBT_BUILD_SHARED` |
+| Targets | `PascalCase` | `MeshMC`, `nbt++` |
+| Functions | `snake_case` | `query_qmake()` |
+
+### CMake Options (meshmc)
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `ENABLE_LTO` | OFF | Enable Link Time Optimization |
+| `MeshMC_DISABLE_JAVA_DOWNLOADER` | OFF | Disable built-in Java downloader |
+| `MeshMC_ENABLE_CLANG_TIDY` | OFF | Enable clang-tidy during compilation |
+
+---
+
+## Dockerfile Style (images4docker)
+
+### General Principles
+
+- One base image per Dockerfile
+- Use multi-stage builds where appropriate
+- Minimize layer count
+- Validate Qt 6 availability during build (fail fast)
+- Pin base image tags
+
+### Naming
+
+- Dockerfiles: `<distro>.Dockerfile`
+- Image tags: `ghcr.io/project-tick-infra/images/<target_name>:<target_tag>`
+
+---
+
+## Cross-Language Standards
+
+### Git Commit Messages
+
+All languages follow the same Conventional Commits format (see
+[contributing.md](contributing.md)):
+
+```
+type(scope): short description
+```
+
+### SPDX Headers
+
+All source files should include SPDX headers appropriate to their comment
+syntax:
+
+```c
+// SPDX-License-Identifier: GPL-3.0-or-later
+// SPDX-FileCopyrightText: 2026 Project Tick
+```
+
+```python
+# SPDX-License-Identifier: MS-PL
+# SPDX-FileCopyrightText: 2026 Project Tick
+```
+
+```rust
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: NixOS Contributors & Project Tick
+```
+
+### Static Analysis
+
+| Language | Tool | Integration |
+|----------|------|-------------|
+| C/C++ | clang-tidy | `MeshMC_ENABLE_CLANG_TIDY`, CI |
+| C/C++ | clang-format | Pre-commit hook, CI |
+| C/C++ | checkpatch.pl | Pre-commit hook |
+| C/C++ | CodeQL | CI workflows |
+| C/C++ | Coverity | CI (mnv only) |
+| C/C++ | Flawfinder | CI (json4cpp) |
+| C/C++ | Semgrep | CI (json4cpp) |
+| Rust | clippy | `cargo clippy` |
+| Rust | rustfmt | `cargo fmt` |
+| Python | (standard linters) | Poetry dev deps |
+| JavaScript | biome | treefmt CI |
+| Nix | nixfmt | treefmt CI |
+| YAML | yamlfmt | treefmt CI |
+| GitHub Actions | actionlint, zizmor | treefmt CI |
diff --git a/docs/handbook/Project-Tick/contributing.md b/docs/handbook/Project-Tick/contributing.md
new file mode 100644
index 0000000000..fa967d7116
--- /dev/null
+++ b/docs/handbook/Project-Tick/contributing.md
@@ -0,0 +1,545 @@
+# Project Tick — Contributing Guide
+
+## Overview
+
+Project Tick welcomes contributions from the community. This guide covers the
+full contribution lifecycle — from setting up your environment to getting your
+changes merged. It applies to all sub-projects within the monorepo.
+
+All contributions are subject to the Project Tick Contributor License Agreement
+(CLA), the Developer Certificate of Origin (DCO), and the project's Code of
+Conduct.
+
+---
+
+## Table of Contents
+
+1. [Quick Start](#quick-start)
+2. [AI Policy](#ai-policy)
+3. [Contributor License Agreement](#contributor-license-agreement)
+4. [Developer Certificate of Origin](#developer-certificate-of-origin)
+5. [Commit Message Format](#commit-message-format)
+6. [Branch Naming](#branch-naming)
+7. [PR Workflow](#pr-workflow)
+8. [Code Review Process](#code-review-process)
+9. [Issue Templates](#issue-templates)
+10. [PR Requirements Checklist](#pr-requirements-checklist)
+11. [What Not to Do](#what-not-to-do)
+12. [Documentation](#documentation)
+13. [Contact](#contact)
+
+---
+
+## Quick Start
+
+```bash
+# 1. Fork and clone
+git clone --recursive https://github.com/YOUR_USERNAME/Project-Tick.git
+cd Project-Tick
+
+# 2. Create a branch
+git checkout -b feat/my-change
+
+# 3. Make changes, format, and lint
+clang-format -i changed_files.cpp
+reuse lint
+
+# 4. Commit with sign-off
+git commit -s -a -m "feat(meshmc): add new feature"
+
+# 5. Push and open a PR
+git push origin feat/my-change
+```
+
+---
+
+## AI Policy
+
+Project Tick has strict rules regarding generative AI usage. This policy is
+adapted from matplotlib's contributing guide and the Linux Kernel policy guide.
+
+### Rules
+
+1. **Do not post raw AI output** as comments on GitHub or the project's Discord
+ server. Such comments are typically formulaic and low-quality.
+
+2. **If you use AI tools** to develop code or documentation, you must:
+ - Fully understand the proposed changes
+ - Be able to explain why they are the correct approach
+ - Add personal value based on your own competency
+
+3. **AI-generated low-value contributions will be rejected.** Taking input,
+ feeding it to an AI, and posting the result without adding value is not
+ acceptable.
+
+### Signed-off-by and AI
+
+**AI agents MUST NOT add `Signed-off-by` tags.** Only humans can legally
+certify the Developer Certificate of Origin. The human submitter is responsible
+for:
+
+- Reviewing all AI-generated code
+- Ensuring compliance with licensing requirements
+- Adding their own `Signed-off-by` tag
+- Taking full responsibility for the contribution
+
+### AI Attribution
+
+When AI tools contribute to development, include an `Assisted-by` tag in the
+commit message:
+
+```
+Assisted-by: AGENT_NAME:MODEL_VERSION [TOOL1] [TOOL2]
+```
+
+Where:
+- `AGENT_NAME` — Name of the AI tool or framework
+- `MODEL_VERSION` — Specific model version used
+- `[TOOL1] [TOOL2]` — Optional specialized analysis tools (e.g., coccinelle,
+ sparse, smatch, clang-tidy)
+
+Basic development tools (git, gcc, make, editors) should **not** be listed.
+
+Example:
+
+```
+feat(meshmc): optimize chunk loading algorithm
+
+Improved chunk loading performance by 40% using spatial hashing.
+
+Signed-off-by: Jane Developer <jane@example.com>
+Assisted-by: Claude:claude-3-opus coccinelle sparse
+```
+
+---
+
+## Contributor License Agreement
+
+By submitting a contribution, you agree to the **Project Tick Contributor
+License Agreement (CLA)**.
+
+The CLA ensures that:
+
+- You have the legal right to submit the contribution
+- The contribution does not knowingly infringe third-party rights
+- Project Tick may distribute the contribution under the applicable license(s)
+- Long-term governance and license consistency can be maintained
+
+The CLA applies to **all intentional contributions**, including:
+- Source code
+- Documentation
+- Tests
+- Data
+- Media assets
+- Configuration files
+
+The full CLA text is available at:
+<https://projecttick.org/licenses/PT-CLA-2.0.txt>
+
+**If you do not agree to the CLA, do not submit contributions.**
+
+---
+
+## Developer Certificate of Origin
+
+Every commit in Project Tick must include a DCO sign-off line. The sign-off
+certifies that you wrote the code or have the right to submit it under the
+project's licenses.
+
+### How to Sign Off
+
+Add the `-s` flag to `git commit`:
+
+```bash
+git commit -s -a
+```
+
+This appends the following line to your commit message:
+
+```
+Signed-off-by: Your Name <your.email@example.com>
+```
+
+### Retroactive Sign-Off
+
+If you forgot to sign off, you can retroactively sign all commits in your
+branch:
+
+```bash
+git rebase --signoff develop
+git push --force
+```
+
+### DCO Bot Enforcement
+
+A DCO bot automatically checks every PR. PRs missing sign-off will be
+labeled and blocked from merging. The bot configuration
+(`.github/dco.yml`) does not allow remediation commits — every commit
+in the PR must have a sign-off.
+
+```yaml
+# .github/dco.yml
+allowRemediationCommits:
+ individual: false
+```
+
+### Important Distinction
+
+**Signing** (GPG/SSH signatures) and **signing-off** (DCO `Signed-off-by`) are
+two different things. The DCO sign-off is the minimum requirement. GPG signing
+is recommended but not required.
+
+---
+
+## Commit Message Format
+
+Project Tick uses [Conventional Commits](https://www.conventionalcommits.org/)
+format. The CI system validates commit messages via
+`ci/github-script/lint-commits.js`.
+
+### Format
+
+```
+type(scope): short description
+
+Optional longer explanation of what changed and why.
+
+Signed-off-by: Your Name <your.email@example.com>
+```
+
+### Types
+
+| Type | Description |
+|------|-------------|
+| `feat` | New feature |
+| `fix` | Bug fix |
+| `docs` | Documentation only |
+| `style` | Formatting, whitespace (no code change) |
+| `refactor` | Code restructuring (no feature/fix) |
+| `perf` | Performance improvement |
+| `test` | Adding or updating tests |
+| `build` | Build system or dependency changes |
+| `ci` | CI configuration changes |
+| `chore` | Maintenance tasks |
+| `revert` | Reverting a previous commit |
+
+### Scopes
+
+The scope identifies which sub-project is affected:
+
+| Scope | Sub-Project |
+|-------|-------------|
+| `meshmc` | MeshMC launcher |
+| `mnv` | MNV text editor |
+| `cgit` | cgit web interface |
+| `neozip` | NeoZip compression library |
+| `json4cpp` | Json4C++ JSON library |
+| `tomlplusplus` | toml++ TOML library |
+| `libnbt` | libnbt++ NBT library |
+| `cmark` | cmark Markdown library |
+| `genqrcode` | GenQRCode QR library |
+| `forgewrapper` | ForgeWrapper Java shim |
+| `corebinutils` | CoreBinUtils BSD ports |
+| `meta` | Metadata generator |
+| `tickborg` | tickborg CI bot |
+| `ci` | CI infrastructure |
+| `docker` | images4docker |
+| `docs` | Documentation |
+
+For changes spanning the component's sub-structure, add a nested scope:
+
+```
+projtlauncher(fix): fix crash on startup with invalid config
+```
+
+### Examples
+
+```
+feat(meshmc): add chunk loading optimization
+fix(neozip): handle empty archives in inflate
+docs(cmark): fix API reference typo
+ci(json4cpp): update build matrix for ARM64
+build(tomlplusplus): bump meson minimum to 0.60
+refactor(corebinutils): simplify ls output formatting
+test(libnbt): add round-trip test for compressed NBT
+chore(meta): update poetry.lock
+```
+
+### tickborg Integration
+
+The tickborg CI bot reads commit scopes to determine which sub-projects
+to build:
+
+| Commit Message | Auto-Build |
+|---------------|------------|
+| `feat(meshmc): add chunk loading` | meshmc |
+| `cmark: fix buffer overflow` | cmark |
+| `fix(neozip): handle empty archives` | neozip |
+| `chore(ci): update workflow` | (CI changes only) |
+
+---
+
+## Branch Naming
+
+Use the following branch name prefixes:
+
+| Prefix | Purpose | Example |
+|--------|---------|---------|
+| `feature-*` or `feat/*` | New features | `feature-chunk-loading` |
+| `fix-*` or `fix/*` | Bug fixes | `fix-crash-on-startup` |
+| `backport-*` | Cherry-picks to release | `backport-7.0-fix-123` |
+| `revert-*` | Reverted changes | `revert-pr-456` |
+| `wip-*` or `wip/*` | Work in progress | `wip-new-ui` |
+
+Development branches managed by maintainers:
+
+| Branch | Purpose |
+|--------|---------|
+| `master` | Main development branch |
+| `release-X.Y` | Release stabilization (e.g., `release-7.0`) |
+| `staging-*` | Pre-release staging |
+| `staging-next-*` | Next staging cycle |
+
+### WIP Convention
+
+If a PR title begins with `WIP:` or contains `[WIP]`, the tickborg bot
+will **not** automatically build its affected projects. This lets you
+push incomplete work for early review without triggering full CI.
+
+---
+
+## PR Workflow
+
+### Step-by-Step
+
+1. **Fork** the repository on GitHub
+
+2. **Clone** your fork with submodules:
+ ```bash
+ git clone --recursive https://github.com/YOUR_USERNAME/Project-Tick.git
+ ```
+
+3. **Set up upstream remote:**
+ ```bash
+ git remote add upstream https://github.com/Project-Tick/Project-Tick.git
+ ```
+
+4. **Create a feature branch:**
+ ```bash
+ git fetch upstream
+ git checkout -b feature/my-change upstream/master
+ ```
+
+5. **Develop your changes:**
+ - Write code
+ - Add tests for new functionality
+ - Update documentation if needed
+ - Run clang-format on changed C/C++ files
+ - Check REUSE compliance
+
+6. **Commit with sign-off:**
+ ```bash
+ git add -A
+ git commit -s -m "feat(scope): description"
+ ```
+
+7. **Push to your fork:**
+ ```bash
+ git push origin feature/my-change
+ ```
+
+8. **Open a PR** against `master` on the upstream repository
+
+9. **Fill in the PR template** — The template reminds you to:
+ - Sign off commits
+ - Sign the CLA
+ - Provide a clear description
+
+### Keeping Your Branch Updated
+
+```bash
+git fetch upstream
+git rebase upstream/master
+git push --force-with-lease origin feature/my-change
+```
+
+---
+
+## Code Review Process
+
+### Automated Checks
+
+Every PR goes through automated CI:
+
+1. **Gate job** — Event classification and change detection
+2. **Commit lint** — Validates Conventional Commits format
+3. **Formatting check** — treefmt validates code style
+4. **CODEOWNERS validation** — Ensures proper ownership rules
+5. **Per-project CI** — Builds and tests affected sub-projects
+6. **CodeQL analysis** — Security scanning (for meshmc, mnv, neozip)
+7. **DCO check** — Verifies all commits are signed off
+
+### Maintainer Review
+
+After automated checks pass:
+
+1. A maintainer reviews the code for:
+ - Correctness
+ - Design and architecture fit
+ - Test coverage
+ - Documentation completeness
+ - License compliance
+
+2. The maintainer may request changes by:
+ - Leaving inline comments
+ - Requesting specific modifications
+ - Asking clarifying questions
+
+3. Address all feedback by pushing additional commits or amending existing
+ ones. Sign off every commit.
+
+4. Once approved, the maintainer merges the PR.
+
+### Review Routing
+
+The `CODEOWNERS` file routes reviews automatically. All paths are currently
+owned by `@YongDo-Hyun`, covering:
+
+- `.github/` — Actions, templates, workflows
+- `archived/` — All archived sub-projects
+- `cgit/` — Including contrib, filters, tests
+- `cmark/` — Including all subdirectories
+- `corebinutils/` — All utility directories
+- Every other sub-project directory
+
+---
+
+## Issue Templates
+
+Project Tick provides structured issue templates in `.github/ISSUE_TEMPLATE/`:
+
+### Bug Report (`bug_report.yml`)
+
+Fields:
+- **Operating System** — Windows, macOS, Linux, Other (multi-select)
+- **Version of MeshMC** — Text field for version number
+- Steps to reproduce
+- Expected vs actual behavior
+- Logs/crash reports
+
+Before filing a bug, check:
+- The [FAQ](https://github.com/Project-Tick/MeshMC/wiki/FAQ)
+- That the bug is not caused by Minecraft or mods
+- That the issue hasn't been reported before
+
+### Suggestion (`suggestion.yml`)
+
+For feature requests and improvements.
+
+### RFC (`rfc.yml`)
+
+For larger architectural proposals that need discussion before implementation.
+
+### Configuration (`config.yml`)
+
+Controls which templates appear and provides links to external resources (e.g.,
+Discord for general help questions).
+
+---
+
+## PR Requirements Checklist
+
+Before submitting a PR, verify:
+
+- [ ] Code compiles without warnings
+- [ ] clang-format applied to changed C/C++ files
+- [ ] All existing tests pass
+- [ ] New tests added for new functionality
+- [ ] All commits signed off (`git commit -s`)
+- [ ] Commit messages follow Conventional Commits format
+- [ ] Documentation updated if needed
+- [ ] REUSE compliance verified (`reuse lint`)
+- [ ] Clear PR description explaining what and why
+- [ ] Related issues referenced
+- [ ] One logical change per PR
+
+### What Must Be Separate PRs
+
+The following must **never** be combined in a single PR:
+
+- **Refactors** — Code restructuring without behavior change
+- **Features** — New functionality
+- **Third-party updates** — Library/dependency version bumps
+
+Third-party library updates require standalone PRs with documented rationale
+explaining why the update is needed.
+
+---
+
+## What Not to Do
+
+1. **Don't mix refactors with features.** Each PR should contain one logical
+ change.
+
+2. **Don't skip sign-off.** The DCO bot will block your PR.
+
+3. **Don't post raw AI output.** All contributions must reflect genuine
+ understanding and personal competence.
+
+4. **Don't submit without testing.** Run the test suite for affected
+ sub-projects.
+
+5. **Don't ignore CI failures.** Fix them before requesting review.
+
+6. **Don't force-push to shared branches.** Only force-push to your own
+ feature branches.
+
+7. **Don't submit changes without REUSE compliance.** Every new file needs
+ SPDX headers.
+
+---
+
+## Documentation
+
+### Where Documentation Lives
+
+| Location | Content |
+|----------|---------|
+| `docs/handbook/` | Developer handbook organized by sub-project |
+| `docs/contributing/` | Contribution-specific guides |
+| `docs/` | General documentation |
+| `meshmc/doc/` | MeshMC-specific docs |
+| `meshmc/BUILD.md` | MeshMC build instructions |
+| `ofborg/doc/` | tickborg documentation |
+| Sub-project `README.md` files | Per-component overviews |
+
+### Documentation Standards
+
+- Use Markdown for all documentation
+- Follow the existing heading structure
+- Include code examples where appropriate
+- Cross-reference related documents
+- Add SPDX license headers to new documentation files:
+ ```
+ <!-- SPDX-License-Identifier: CC0-1.0 -->
+ ```
+
+---
+
+## Contact
+
+| Channel | Address |
+|---------|---------|
+| GitHub Issues | [Project-Tick/Project-Tick/issues](https://github.com/Project-Tick/Project-Tick/issues) |
+| Email | [projecttick@projecttick.org](mailto:projecttick@projecttick.org) |
+
+---
+
+## License
+
+By contributing to Project Tick, you agree to license your work under the
+project's applicable licenses. See the `LICENSES/` directory for details.
+
+The specific license for each sub-project is tracked in `REUSE.toml`. Ensure
+your contributions comply with the license of the sub-project you are
+modifying.
diff --git a/docs/handbook/Project-Tick/faq.md b/docs/handbook/Project-Tick/faq.md
new file mode 100644
index 0000000000..0ecc50110c
--- /dev/null
+++ b/docs/handbook/Project-Tick/faq.md
@@ -0,0 +1,683 @@
+# Project Tick — FAQ & Troubleshooting
+
+## General Questions
+
+### What is Project Tick?
+
+Project Tick is a unified monorepo containing MeshMC (a custom Minecraft
+launcher), supporting libraries, infrastructure tooling, and developer
+utilities. The project encompasses 15+ sub-projects spanning C, C++23, Rust,
+Java, Python, JavaScript, Nix, Shell, and Dockerfile.
+
+### Why a monorepo?
+
+A single repository provides:
+
+- **Atomic changes** — Cross-cutting modifications (e.g., updating neozip and
+ MeshMC together) land in a single commit.
+- **Unified CI** — One orchestrator workflow dispatches per-component CI based
+ on changed files.
+- **Shared tooling** — Formatting, linting, license compliance, and Git hooks
+ apply uniformly.
+- **Simplified dependency management** — Internal libraries (json4cpp,
+ tomlplusplus, libnbtplusplus, neozip) are consumed as source, not as
+ external packages.
+
+### Why C++23?
+
+MeshMC uses C++23 for:
+
+- `std::expected` for error handling without exceptions in performance paths
+- `std::format` / `std::print` for type-safe formatting
+- `std::ranges` improvements for cleaner data transformations
+- Deducing `this` for CRTP replacement
+- `if consteval` for compile-time branching
+
+Minimum compiler support: Clang 18+, GCC 14+, MSVC 17.10+.
+
+### Why fork zlib-ng instead of using it directly?
+
+neozip is a maintained fork of zlib-ng with Project Tick-specific
+modifications:
+
+- Build system integration with MeshMC's CMake configuration
+- Custom SIMD dispatch tuned for the project's use patterns
+- Consistent licensing and REUSE annotations
+- Patches carried forward as the upstream project evolves
+
+### Why fork Vim?
+
+MNV extends Vim with modern development features while maintaining backward
+compatibility. The fork is maintained in-tree to allow tight integration with
+the Project Tick development workflow.
+
+### Why fork nlohmann/json?
+
+json4cpp is a fork of nlohmann/json maintained for:
+
+- Build system compatibility with MeshMC
+- Consistent REUSE/SPDX license annotations
+- Controlled update cadence synchronized with launcher releases
+
+### What platforms does MeshMC support?
+
+| Platform | Architecture | Status |
+|-----------------|-------------|---------------|
+| Linux | x86_64 | Full support |
+| Linux | aarch64 | Full support |
+| macOS | x86_64 | Full support |
+| macOS | aarch64 | Full support |
+| Windows | x86_64 | Full support |
+| Windows | aarch64 | Full support |
+| WSL | — | Not supported |
+
+### How do I contact the project?
+
+- **Security issues**: yongdohyun@projecttick.org (see `SECURITY.md`)
+- **General inquiries**: Open a GitHub issue or discussion
+- **Trademark questions**: yongdohyun@projecttick.org
+
+---
+
+## Build Problems
+
+### CMake: "Could not find a configuration file for package Qt6"
+
+Qt 6 is not installed or not in the CMake search path.
+
+**Solution (Linux — Package Manager):**
+
+```bash
+# Debian/Ubuntu
+sudo apt install qt6-base-dev qt6-5compat-dev
+
+# Fedora
+sudo dnf install qt6-qtbase-devel qt6-qt5compat-devel
+
+# Arch
+sudo pacman -S qt6-base qt6-5compat
+```
+
+**Solution (Nix):**
+
+```bash
+# From the meshmc/ directory:
+nix develop
+# Or if direnv is set up:
+cd meshmc/ # .envrc activates automatically
+```
+
+**Solution (macOS):**
+
+```bash
+brew install qt@6
+export CMAKE_PREFIX_PATH="$(brew --prefix qt@6)"
+```
+
+### CMake: "CMake 3.28 or higher is required"
+
+MeshMC requires CMake 3.28+ for C++23 module support.
+
+**Solution:**
+
+```bash
+# Nix (provides latest CMake)
+nix develop
+
+# pip (if system package is too old)
+pip install --user cmake
+
+# Snap
+sudo snap install cmake --classic
+```
+
+### CMake: "Could NOT find ECM"
+
+Extra CMake Modules (ECM) from the KDE project is required.
+
+```bash
+# Debian/Ubuntu
+sudo apt install extra-cmake-modules
+
+# Fedora
+sudo dnf install extra-cmake-modules
+
+# Arch
+sudo pacman -S extra-cmake-modules
+
+# Nix
+# Already included in flake.nix devShell
+```
+
+### "In-source builds are not allowed"
+
+MeshMC's CMake configuration prohibits building in the source directory.
+
+**Solution:**
+
+```bash
+cd meshmc/
+cmake -B build -S .
+cmake --build build
+```
+
+Never run `cmake .` directly in the source tree. Always use `-B <builddir>`.
+
+### "compiler does not support C++23"
+
+Your compiler is too old. MeshMC requires:
+- Clang 18+
+- GCC 14+
+- MSVC 17.10+ (Visual Studio 2022 17.10)
+
+**Solution:**
+
+```bash
+# Check your compiler version
+clang++ --version
+g++ --version
+
+# Use Nix for guaranteed Clang 22
+nix develop
+```
+
+### neozip: configure fails on macOS
+
+macOS may not have all required build tools.
+
+```bash
+# Install Xcode command line tools
+xcode-select --install
+
+# Use Homebrew for missing dependencies
+brew install autoconf automake libtool
+```
+
+### forgewrapper: Gradle build fails
+
+Ensure you use the Gradle wrapper, not a system-installed Gradle:
+
+```bash
+cd forgewrapper/
+./gradlew build # Unix
+gradlew.bat build # Windows
+```
+
+The Gradle wrapper (`gradlew`) downloads the correct Gradle version
+automatically. Do not use `gradle build` with a system installation.
+
+### genqrcode: autogen.sh fails
+
+Install Autotools prerequisites:
+
+```bash
+# Debian/Ubuntu
+sudo apt install autoconf automake libtool pkg-config
+
+# Then bootstrap:
+cd genqrcode/
+./autogen.sh
+./configure
+make
+```
+
+### corebinutils: GNUmakefile errors
+
+corebinutils requires BSD make extensions and may not build with all GNU Make
+versions. Run the configure script first:
+
+```bash
+cd corebinutils/
+./configure
+make -f GNUmakefile
+```
+
+### cgit: missing Git submodule
+
+cgit requires a bundled Git source tree as a submodule.
+
+```bash
+git submodule update --init --recursive cgit/git/
+cd cgit/
+make
+```
+
+If the `cgit/git/` directory is empty, the submodule was not initialized.
+
+### MeshMC: vcpkg dependencies fail (Windows)
+
+```powershell
+# Ensure vcpkg is bootstrapped
+cd meshmc
+.\bootstrap.cmd
+
+# Or manually:
+git clone https://github.com/microsoft/vcpkg.git
+.\vcpkg\bootstrap-vcpkg.bat
+.\vcpkg\vcpkg install --triplet x64-windows
+```
+
+---
+
+## CI Problems
+
+### CI: "REUSE lint failed"
+
+Every file in the repository must have a license annotation. Check which
+files are non-compliant:
+
+```bash
+reuse lint
+```
+
+Fix by adding the file to `REUSE.toml`:
+
+```toml
+[[annotations]]
+path = ["path/to/new/file"]
+SPDX-FileCopyrightText = "YYYY Your Name"
+SPDX-License-Identifier = "MIT"
+```
+
+Or add an SPDX header to the file itself:
+
+```c
+// SPDX-FileCopyrightText: 2025 Your Name
+// SPDX-License-Identifier: GPL-3.0-or-later
+```
+
+### CI: "DCO check failed"
+
+Your commit is missing the `Signed-off-by` line.
+
+**Fix the last commit:**
+
+```bash
+git commit --amend -s
+```
+
+**Fix older commits (interactive rebase):**
+
+```bash
+git rebase -i HEAD~N
+# Mark commits as "edit", then for each:
+git commit --amend -s
+git rebase --continue
+```
+
+**Prevent future failures:**
+
+```bash
+# Always use -s flag:
+git commit -s -m "feat(meshmc): add feature"
+```
+
+### CI: "Conventional Commits lint failed"
+
+Commit messages must follow the Conventional Commits format:
+
+```
+type(scope): description
+
+[body]
+
+[footer]
+Signed-off-by: Name <email>
+```
+
+Valid types: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`,
+`build`, `ci`, `chore`, `revert`.
+
+**Bad:**
+```
+Fixed the bug
+Update README
+```
+
+**Good:**
+```
+fix(meshmc): resolve crash on instance deletion
+docs: update getting-started guide
+```
+
+### CI: "checkpatch failed"
+
+The lefthook pre-commit hook runs checkpatch on C/C++ changes. Common
+issues:
+
+- Trailing whitespace
+- Mixed tabs and spaces
+- Missing newline at end of file
+- Lines exceeding column limit
+
+### CI: "treefmt check failed"
+
+treefmt checks that all files are formatted correctly. Run locally:
+
+```bash
+# Via Nix
+nix run .#treefmt
+
+# Or format individual files
+clang-format -i file.cpp # C/C++
+black file.py # Python
+rustfmt file.rs # Rust
+nixfmt file.nix # Nix
+shfmt -w file.sh # Shell
+```
+
+### CI: workflow not triggered
+
+Check whether your changes match the workflow's path filters. The monolithic
+`ci.yml` uses change detection to only run relevant sub-project CI:
+
+```yaml
+# File changes are analyzed in:
+# .github/actions/change-analysis/
+```
+
+If you modify only documentation, MeshMC build workflows will not trigger.
+This is intentional.
+
+---
+
+## Git & Repository Questions
+
+### How do I clone the repository?
+
+```bash
+git clone --recurse-submodules https://github.com/AetherMC/Project-Tick.git
+cd Project-Tick
+```
+
+The `--recurse-submodules` flag is critical — cgit depends on a bundled Git
+source tree.
+
+### How do I update submodules?
+
+```bash
+git submodule update --init --recursive
+```
+
+### How do I set up the development environment?
+
+**Option A — Nix (recommended):**
+
+```bash
+nix develop
+```
+
+**Option B — bootstrap script:**
+
+```bash
+# Linux
+./bootstrap.sh
+
+# Windows
+bootstrap.cmd
+```
+
+**Option C — Manual installation:**
+
+See the [Getting Started](getting-started.md) guide for per-platform
+instructions.
+
+### How do I run the bootstrap script?
+
+```bash
+chmod +x bootstrap.sh
+./bootstrap.sh
+```
+
+The script detects your distribution (Debian/Ubuntu, Fedora/RHEL, SUSE,
+Arch, macOS) and verifies that required dependencies are installed. It does
+**not** install packages automatically — it reports what is missing.
+
+### How do I contribute?
+
+1. Fork the repository
+2. Create a feature branch: `git checkout -b feature-my-change`
+3. Make changes following coding standards
+4. Commit with sign-off: `git commit -s`
+5. Push and open a pull request
+6. Sign the CLA (PT-CLA-2.0) if first contribution
+
+See [Contributing](contributing.md) for full details.
+
+### What is the AI contribution policy?
+
+AI-assisted contributions are accepted under these rules:
+
+- AI-generated code must be reviewed and understood by the contributor
+- Commit messages must include `Assisted-by: <tool>` in the trailer
+- The human contributor is legally responsible for the code
+- AI-generated test data and documentation are explicitly welcome
+- Fully autonomous AI commits without human review are not accepted
+
+### How do I use lefthook?
+
+lefthook is configured in `lefthook.yml` and runs Git hooks automatically:
+
+```bash
+# Install lefthook
+go install github.com/evilmartians/lefthook@latest
+
+# Or via Nix
+nix profile install nixpkgs#lefthook
+
+# Install hooks
+lefthook install
+```
+
+After installation, REUSE lint and checkpatch run automatically on
+`git commit`.
+
+---
+
+## Library Questions
+
+### How do I use json4cpp in my CMake project?
+
+```cmake
+# As a subdirectory (recommended in monorepo)
+add_subdirectory(json4cpp)
+target_link_libraries(my_target PRIVATE nlohmann_json::nlohmann_json)
+```
+
+```cpp
+#include <nlohmann/json.hpp>
+using json = nlohmann::json;
+
+json j = json::parse(R"({"key": "value"})");
+std::string val = j["key"];
+```
+
+### How do I use tomlplusplus?
+
+```cpp
+#include <toml++/toml.hpp>
+
+auto config = toml::parse_file("config.toml");
+auto value = config["section"]["key"].value<std::string>();
+```
+
+### How do I use libnbtplusplus?
+
+```cpp
+#include <nbt/nbt.hpp>
+
+// Read NBT from file
+std::ifstream file("level.dat", std::ios::binary);
+auto tag = nbt::io::read_compound(file);
+
+// Access data
+auto& level = tag->at("Data").as<nbt::tag_compound>();
+std::string name = level.at("LevelName").as<nbt::tag_string>().get();
+```
+
+### How do I use neozip?
+
+neozip is API-compatible with zlib:
+
+```c
+#include <zlib.h>
+
+// Compress
+z_stream strm = {0};
+deflateInit(&strm, Z_DEFAULT_COMPRESSION);
+// ... standard zlib API usage
+deflateEnd(&strm);
+```
+
+### How does forgewrapper work?
+
+ForgeWrapper uses Java SPI to provide a file detector for Forge's installer:
+
+```java
+// META-INF/services/io.github.zekerzhayard.forgewrapper.installer.detector.IFileDetector
+// Points to the SPI implementation class
+```
+
+MeshMC uses forgewrapper as a runtime dependency when launching Forge-based
+Minecraft instances.
+
+---
+
+## Nix Questions
+
+### Nix: "error: experimental feature 'flakes' is disabled"
+
+Enable flakes in your Nix configuration:
+
+```bash
+# ~/.config/nix/nix.conf
+experimental-features = nix-command flakes
+```
+
+Or pass the flag:
+
+```bash
+nix --experimental-features 'nix-command flakes' develop
+```
+
+### Nix: "error: getting status of /nix/store/..."
+
+The Nix store may be corrupted. Try:
+
+```bash
+nix-store --verify --check-contents --repair
+```
+
+### Nix: flake.lock is outdated
+
+```bash
+nix flake update
+git add flake.lock
+git commit -s -m "build(nix): update flake.lock"
+```
+
+### Nix: how do I update CI's pinned nixpkgs?
+
+```bash
+cd ci/
+./update-pinned.sh
+git add pinned.json
+git commit -s -m "ci: update pinned nixpkgs"
+```
+
+---
+
+## tickborg Questions
+
+### What is tickborg?
+
+tickborg is Project Tick's CI bot, forked from ofborg. It listens for GitHub
+events via AMQP (RabbitMQ) and performs automated builds and tests.
+
+### How do I use tickborg commands?
+
+In a pull request comment:
+
+```
+@tickbot build meshmc # Build MeshMC
+@tickbot test meshmc # Build and test MeshMC
+@tickbot eval meshmc # Evaluate MeshMC Nix expression
+```
+
+### How do I deploy tickborg locally?
+
+```bash
+cd ofborg/
+cp example.config.json config.json
+# Edit config.json with your AMQP credentials
+docker-compose up -d
+```
+
+See `ofborg/DEPLOY.md` for full deployment instructions.
+
+---
+
+## Platform-Specific Questions
+
+### Can I build on WSL?
+
+No. Project Tick explicitly does not support WSL for development. Use:
+
+- **Native Linux** for Linux builds
+- **Native Windows with MSVC** for Windows builds
+- **macOS** for macOS builds
+
+The `bootstrap.sh` script will exit with an error if it detects a WSL
+environment.
+
+### macOS: "xcrun: error: invalid active developer path"
+
+Install Xcode command line tools:
+
+```bash
+xcode-select --install
+```
+
+### Windows: "bootstrap.cmd fails with Scoop not found"
+
+Install Scoop first:
+
+```powershell
+Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+Invoke-RestMethod -Uri https://get.scoop.sh | Invoke-Expression
+```
+
+Then re-run `bootstrap.cmd`.
+
+---
+
+## Licensing Questions
+
+### Which license applies to my contribution?
+
+Contributions inherit the license of the component you are modifying:
+- MeshMC → GPL-3.0-or-later
+- neozip → Zlib
+- json4cpp → MIT
+- tomlplusplus → MIT
+- libnbtplusplus → LGPL-3.0-or-later
+- cgit → GPL-2.0-only
+- forgewrapper → MIT
+- meta → MS-PL
+- corebinutils → BSD (varies by utility)
+
+### How do I check license compliance?
+
+```bash
+reuse lint
+```
+
+This command checks that every file has proper SPDX annotations, either via
+file headers or `REUSE.toml` entries.
+
+### What is PT-CLA-2.0?
+
+The Project Tick Contributor License Agreement, version 2.0. It grants
+the project a perpetual, irrevocable license to use your contribution. You
+retain copyright ownership. The CLA must be signed once before your first
+PR can be merged.
diff --git a/docs/handbook/Project-Tick/getting-started.md b/docs/handbook/Project-Tick/getting-started.md
new file mode 100644
index 0000000000..10768e7862
--- /dev/null
+++ b/docs/handbook/Project-Tick/getting-started.md
@@ -0,0 +1,637 @@
+# Project Tick — Getting Started
+
+## Prerequisites
+
+Before working with Project Tick, ensure you have the following base tools
+installed on your system:
+
+| Tool | Minimum Version | Purpose |
+|------|----------------|---------|
+| Git | 2.30+ | Source control, submodule management |
+| CMake | 3.28+ | Build system for C/C++ projects |
+| Ninja | 1.10+ | Fast build backend for CMake |
+| C++ compiler | GCC 13+ / Clang 17+ / MSVC 19.36+ | C++23 compilation |
+| C compiler | GCC 13+ / Clang 17+ / MSVC 19.36+ | C11/C23 compilation |
+| pkg-config | any | Library discovery |
+| Python | 3.10+ | meta/ component |
+| Rust | stable | tickborg CI bot |
+| JDK | 17+ | ForgeWrapper, Minecraft runtime |
+| Node.js | 22+ | CI scripts |
+
+### Optional but Recommended
+
+| Tool | Purpose |
+|------|---------|
+| Nix | Reproducible builds, development shells |
+| Go | Installing lefthook |
+| lefthook | Git hooks manager |
+| reuse | REUSE license compliance checking |
+| clang-format | Code formatting |
+| clang-tidy | Static analysis |
+| npm | CI script dependencies |
+| Docker/Podman | Container-based builds |
+| scdoc | Man page generation |
+
+---
+
+## Cloning the Repository
+
+Project Tick uses Git submodules. Always clone recursively:
+
+```bash
+git clone --recursive https://github.com/Project-Tick/Project-Tick.git
+cd Project-Tick
+```
+
+If you already cloned without `--recursive`:
+
+```bash
+git submodule update --init --recursive
+```
+
+The repository is large. If you only need a specific sub-project, you can
+do a sparse checkout:
+
+```bash
+git clone --filter=blob:none --sparse https://github.com/Project-Tick/Project-Tick.git
+cd Project-Tick
+git sparse-checkout set meshmc json4cpp tomlplusplus libnbtplusplus neozip
+git submodule update --init --recursive
+```
+
+---
+
+## Bootstrap (Recommended First Step)
+
+The fastest way to get a working development environment is to use the
+bootstrap script. It detects your platform, installs missing dependencies,
+initializes submodules, and sets up lefthook.
+
+### Linux / macOS
+
+```bash
+./bootstrap.sh
+```
+
+The script supports the following distributions:
+
+| Distribution | Package Manager | Detection |
+|-------------|-----------------|-----------|
+| Debian | apt | `/etc/os-release` ID |
+| Ubuntu, Linux Mint, Pop!_OS | apt | `/etc/os-release` ID |
+| Fedora | dnf | `/etc/os-release` ID |
+| RHEL, CentOS, Rocky, Alma | dnf/yum | `/etc/os-release` ID |
+| openSUSE, SLES | zypper | `/etc/os-release` ID |
+| Arch, Manjaro, EndeavourOS | pacman | `/etc/os-release` ID |
+| macOS | Homebrew | `uname -s` = Darwin |
+
+The bootstrap script checks for:
+
+- **Build tools:** npm, Go, lefthook, reuse
+- **Libraries:** Qt6Core, quazip1-qt6, zlib, ECM (via pkg-config)
+
+If any dependencies are missing, it installs them using the appropriate
+package manager with `sudo`.
+
+### Windows
+
+```cmd
+bootstrap.cmd
+```
+
+Uses [Scoop](https://scoop.sh) for CLI tools and
+[vcpkg](https://github.com/microsoft/vcpkg) for C/C++ libraries.
+
+### Nix
+
+If you have Nix installed with flakes support:
+
+```bash
+nix develop
+```
+
+This drops you into a development shell with LLVM 22, clang-tidy, and all
+necessary tooling. The shell hook automatically initializes submodules.
+
+---
+
+## Building MeshMC (Primary Application)
+
+MeshMC is the main application in the Project Tick ecosystem. Here's how to
+build it from source.
+
+### Step 1: Install Dependencies
+
+#### Debian / Ubuntu
+
+```bash
+sudo apt-get install \
+ cmake ninja-build extra-cmake-modules pkg-config \
+ qt6-base-dev libquazip1-qt6-dev zlib1g-dev \
+ libcmark-dev libarchive-dev libqrencode-dev libtomlplusplus-dev \
+ scdoc
+```
+
+#### Fedora
+
+```bash
+sudo dnf install \
+ cmake ninja-build extra-cmake-modules pkgconf \
+ qt6-qtbase-devel quazip-qt6-devel zlib-devel \
+ cmark-devel libarchive-devel qrencode-devel tomlplusplus-devel \
+ scdoc
+```
+
+#### Arch Linux
+
+```bash
+sudo pacman -S --needed \
+ cmake ninja extra-cmake-modules pkgconf \
+ qt6-base quazip-qt6 zlib \
+ cmark libarchive qrencode tomlplusplus \
+ scdoc
+```
+
+#### openSUSE
+
+```bash
+sudo zypper install \
+ cmake ninja extra-cmake-modules pkg-config \
+ qt6-base-devel quazip-qt6-devel zlib-devel \
+ cmark-devel libarchive-devel qrencode-devel tomlplusplus-devel \
+ scdoc
+```
+
+#### macOS (Homebrew)
+
+```bash
+brew install \
+ cmake ninja extra-cmake-modules \
+ qt@6 quazip zlib \
+ cmark libarchive qrencode tomlplusplus \
+ scdoc
+```
+
+### Step 2: Configure with CMake Presets
+
+MeshMC ships `CMakePresets.json` with platform-specific presets:
+
+```bash
+cd meshmc
+
+# Linux
+cmake --preset linux
+
+# macOS
+cmake --preset macos
+
+# macOS Universal Binary (x86_64 + arm64)
+cmake --preset macos_universal
+
+# Windows (MinGW)
+cmake --preset windows_mingw
+
+# Windows (MSVC)
+cmake --preset windows_msvc
+```
+
+All presets use Ninja Multi-Config, output to `build/`, and install to
+`install/`.
+
+### Step 3: Build
+
+```bash
+# Using preset (matches the configure preset name)
+cmake --build --preset linux
+
+# Or manually with Ninja
+cmake --build build --config Release
+```
+
+### Step 4: Install (Optional)
+
+```bash
+cmake --install build --config Release --prefix install
+```
+
+The built binary appears at `install/bin/meshmc`.
+
+### Step 5: Run Tests
+
+```bash
+cd build
+ctest --output-on-failure
+```
+
+### Building with Nix
+
+```bash
+cd meshmc
+nix build
+```
+
+Or enter a development shell:
+
+```bash
+nix develop
+cmake --preset linux
+cmake --build --preset linux
+```
+
+### Building with Container (Podman/Docker)
+
+MeshMC provides a `Containerfile`:
+
+```bash
+cd meshmc
+podman build -t meshmc .
+```
+
+---
+
+## Building Other Sub-Projects
+
+### NeoZip (Compression Library)
+
+```bash
+cd neozip
+
+# CMake build
+mkdir build && cd build
+cmake .. -G Ninja
+ninja
+ctest --output-on-failure
+
+# Or Autotools
+./configure
+make -j$(nproc)
+make test
+```
+
+### cmark (Markdown Library)
+
+```bash
+cd cmark
+mkdir build && cd build
+cmake .. -G Ninja
+ninja
+ctest --output-on-failure
+```
+
+### json4cpp (JSON Library)
+
+```bash
+cd json4cpp
+mkdir build && cd build
+cmake .. -G Ninja
+ninja
+ctest --output-on-failure
+```
+
+json4cpp is header-only. For most uses, just include
+`<nlohmann/json.hpp>` or `<nlohmann/json_fwd.hpp>` and point your
+include path at `json4cpp/include/` or `json4cpp/single_include/`.
+
+### tomlplusplus (TOML Library)
+
+```bash
+cd tomlplusplus
+
+# Meson (primary)
+meson setup build
+ninja -C build
+ninja -C build test
+
+# Or CMake
+mkdir build && cd build
+cmake .. -G Ninja
+ninja
+ctest --output-on-failure
+```
+
+toml++ is header-only. Include `<toml++/toml.hpp>` or use the single
+header `toml.hpp`.
+
+### libnbt++ (NBT Library)
+
+```bash
+cd libnbtplusplus
+mkdir build && cd build
+cmake ..
+make -j$(nproc)
+ctest --output-on-failure
+```
+
+CMake options:
+- `NBT_BUILD_SHARED=OFF` — Build static library (default)
+- `NBT_USE_ZLIB=ON` — Enable zlib support for compressed NBT (default)
+- `NBT_BUILD_TESTS=ON` — Build tests (default)
+
+### GenQRCode (QR Code Library)
+
+```bash
+cd genqrcode
+
+# Autotools
+./autogen.sh
+./configure
+make -j$(nproc)
+make check
+
+# Or CMake
+mkdir build && cd build
+cmake .. -G Ninja
+ninja
+ctest --output-on-failure
+```
+
+### ForgeWrapper (Java)
+
+```bash
+cd forgewrapper
+./gradlew build
+```
+
+The JAR is produced in `build/libs/`.
+
+### CoreBinUtils (BSD Utilities)
+
+```bash
+cd corebinutils
+./configure
+make -f GNUmakefile -j$(nproc) all
+
+# Run tests
+make -f GNUmakefile test
+```
+
+Uses musl-first toolchain selection by default.
+
+### MNV (Text Editor)
+
+```bash
+cd mnv
+
+# CMake
+mkdir build && cd build
+cmake .. -G Ninja
+ninja
+
+# Or Autotools
+./configure
+make -j$(nproc)
+```
+
+### cgit (Git Web Interface)
+
+```bash
+cd cgit
+
+# Initialize Git submodule (cgit bundles its own git)
+git submodule init
+git submodule update
+
+make
+sudo make install
+```
+
+Installs to `/var/www/htdocs/cgit` by default. Provide a `cgit.conf`
+file to customize.
+
+### Meta (Metadata Generator)
+
+```bash
+cd meta
+
+# Install dependencies with Poetry
+pip install poetry
+poetry install
+
+# Update Mojang versions
+poetry run updateMojang
+
+# Generate all metadata
+poetry run generateMojang
+poetry run generateForge
+poetry run generateNeoForge
+poetry run generateFabric
+poetry run generateQuilt
+poetry run generateJava
+```
+
+### tickborg (CI Bot)
+
+```bash
+cd ofborg/tickborg
+cargo build
+cargo test
+cargo check
+```
+
+---
+
+## Setting Up the Development Environment
+
+### Git Hooks with Lefthook
+
+After cloning, install lefthook to enable pre-commit and pre-push hooks:
+
+```bash
+# Install lefthook (if not already installed)
+go install github.com/evilmartians/lefthook@latest
+
+# Or via npm
+npm i -g lefthook
+
+# Install hooks in the repository
+lefthook install
+```
+
+The hooks perform:
+
+1. **Pre-commit:**
+ - REUSE license compliance check (auto-downloads missing licenses)
+ - checkpatch.pl on staged C/C++/CMake changes
+
+2. **Pre-push:**
+ - Final REUSE compliance check
+
+### REUSE Compliance
+
+Ensure every file has proper SPDX headers:
+
+```bash
+# Check compliance
+reuse lint
+
+# Download missing license texts
+reuse download --all
+```
+
+### Code Formatting
+
+MeshMC uses clang-format for C/C++ formatting:
+
+```bash
+# Format a file
+clang-format -i path/to/file.cpp
+
+# Check formatting (CI style)
+clang-format --dry-run --Werror path/to/file.cpp
+```
+
+The CI system uses `treefmt` with biome (JavaScript), nixfmt (Nix), and
+yamlfmt (YAML) for other file types.
+
+### IDE Setup
+
+#### VS Code
+
+Recommended extensions:
+- C/C++ (ms-vscode.cpptools)
+- CMake Tools (ms-vscode.cmake-tools)
+- clangd (llvm-vs-code-extensions.vscode-clangd)
+
+MeshMC generates `compile_commands.json` via
+`CMAKE_EXPORT_COMPILE_COMMANDS ON` for full IDE support.
+
+#### CLion
+
+Open the `meshmc/CMakeLists.txt` directly. CLion natively supports CMake
+presets — select the appropriate platform preset.
+
+#### Vim/MNV
+
+Use the `compile_commands.json` with a language server like `clangd` or
+`ccls`.
+
+---
+
+## First Contribution Workflow
+
+1. **Fork** the repository on GitHub
+2. **Clone** your fork:
+ ```bash
+ git clone --recursive https://github.com/YOUR_USERNAME/Project-Tick.git
+ ```
+3. **Create a branch:**
+ ```bash
+ git checkout -b feature/my-change
+ ```
+4. **Make your changes**
+5. **Format and lint:**
+ ```bash
+ clang-format -i changed_files.cpp
+ reuse lint
+ ```
+6. **Commit with sign-off and conventional format:**
+ ```bash
+ git commit -s -a -m "feat(meshmc): add new feature description"
+ ```
+7. **Push and create a PR:**
+ ```bash
+ git push origin feature/my-change
+ ```
+8. Open a pull request against the `master` branch
+
+See [contributing.md](contributing.md) for detailed contribution guidelines.
+
+---
+
+## Troubleshooting
+
+### CMake can't find Qt 6
+
+Ensure Qt 6 is installed and discoverable:
+
+```bash
+# Check if Qt6Core is available
+pkg-config --modversion Qt6Core
+
+# If using a custom Qt installation, set CMAKE_PREFIX_PATH
+cmake --preset linux -DCMAKE_PREFIX_PATH=/path/to/qt6
+```
+
+### Submodules are empty
+
+```bash
+git submodule update --init --recursive --force
+```
+
+### Build fails on WSL
+
+MeshMC explicitly blocks WSL builds in its CMakeLists.txt:
+
+```
+Building MeshMC is not supported in Linux-on-Windows distributions.
+```
+
+Build natively on Windows using the `windows_msvc` or `windows_mingw` preset
+instead.
+
+### In-source build error
+
+MeshMC enforces out-of-source builds. If you see this error:
+
+```
+You are building MeshMC in-source. Please separate the build tree from the source tree.
+```
+
+Create a separate build directory:
+
+```bash
+cd meshmc
+cmake --preset linux # Uses build/ automatically
+```
+
+### Missing ECM (Extra CMake Modules)
+
+Install the ECM package for your distribution:
+
+```bash
+# Debian/Ubuntu
+sudo apt-get install extra-cmake-modules
+
+# Fedora
+sudo dnf install extra-cmake-modules
+
+# Arch
+sudo pacman -S extra-cmake-modules
+```
+
+### Nix build fails
+
+Ensure you have flakes enabled:
+
+```bash
+# Check Nix version
+nix --version
+
+# Enable flakes (if not already)
+echo "experimental-features = nix-command flakes" >> ~/.config/nix/nix.conf
+```
+
+### Poetry not found
+
+```bash
+pip install poetry
+# Or
+pipx install poetry
+```
+
+### Rust/Cargo not found
+
+```bash
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+source ~/.cargo/env
+```
+
+### cgit build fails (missing Git submodule)
+
+```bash
+cd cgit
+git submodule init
+git submodule update
+# Or download manually:
+make get-git
+```
diff --git a/docs/handbook/Project-Tick/glossary.md b/docs/handbook/Project-Tick/glossary.md
new file mode 100644
index 0000000000..cca34ef3c9
--- /dev/null
+++ b/docs/handbook/Project-Tick/glossary.md
@@ -0,0 +1,556 @@
+# Project Tick — Glossary
+
+## A
+
+### Adler-32
+A checksum algorithm designed by Mark Adler, used in the zlib data format.
+neozip provides SIMD-accelerated implementations of Adler-32 via architecture-
+specific intrinsics (SSE4.2, AVX2, NEON, VMX).
+
+### AMQP (Advanced Message Queuing Protocol)
+A wire-level protocol for message-oriented middleware. tickborg uses AMQP
+(via RabbitMQ) to receive build requests from GitHub webhooks and dispatch
+them to worker nodes.
+
+### AppImage
+A portable Linux application format that bundles all dependencies into a
+single executable file. MeshMC distributes Linux releases as AppImages.
+
+### Autotools
+A build system suite (autoconf, automake, libtool) used by several
+sub-projects including genqrcode. The `./autogen.sh` script bootstraps
+the build, producing `configure` and `Makefile.in`.
+
+### AUR (Arch User Repository)
+A community-driven repository for Arch Linux packages. MeshMC publishes a
+PKGBUILD to the AUR for Arch users.
+
+### AVX2 / AVX-512
+Advanced Vector Extensions — x86 SIMD instruction sets providing 256-bit and
+512-bit vector operations. neozip uses AVX2 for accelerated CRC-32, Adler-32,
+and deflate hash chain insertion.
+
+---
+
+## B
+
+### Bazel
+An open-source build system from Google. json4cpp provides `BUILD.bazel` and
+`MODULE.bazel` files for Bazel-based builds as an alternative to CMake.
+
+### BSD License
+A permissive open-source license family. Project Tick uses BSD-1-Clause,
+BSD-2-Clause, BSD-3-Clause, and BSD-4-Clause across various components,
+primarily in corebinutils (FreeBSD-derived code).
+
+### BSL-1.0 (Boost Software License)
+A permissive license used by some utility code in the monorepo.
+
+---
+
+## C
+
+### Cargo
+The Rust package manager and build system. tickborg uses a Cargo workspace
+with two crates: `tickborg` and `tickborg-simple-build`.
+
+### CC-BY-SA-4.0
+Creative Commons Attribution-ShareAlike 4.0 International. Used for
+documentation content within the Project Tick monorepo.
+
+### CC0-1.0
+Creative Commons Zero. A public domain dedication used for trivial
+configuration files and metadata.
+
+### CGI (Common Gateway Interface)
+A protocol for web servers to execute programs and return their output. cgit
+is a CGI application that generates HTML from Git repositories.
+
+### cgit
+A fast, lightweight web interface for Git repositories, written in C. The
+Project Tick fork includes UI customizations and is linked against a bundled
+Git source tree.
+
+### CLA (Contributor License Agreement)
+A legal agreement between a contributor and the project. Project Tick uses
+PT-CLA-2.0, which must be signed before contributions are accepted.
+
+### Clang
+The C/C++ compiler from the LLVM project. MeshMC requires Clang 18+ or
+equivalent. The Nix development shell provides LLVM 22 (Clang 22).
+
+### clang-format
+An automatic C/C++ code formatter. MeshMC's `.clang-format` defines the
+project's formatting rules, enforced by CI.
+
+### clang-tidy
+A C/C++ static analysis tool. MeshMC's `.clang-tidy` configures enabled
+checks. CI runs clang-tidy as part of the lint stage.
+
+### CMake
+A cross-platform build system generator. The primary build system for MeshMC,
+neozip, json4cpp, libnbtplusplus, genqrcode, cmark, and MNV. Project Tick
+requires CMake 3.28+ for MeshMC.
+
+### CMake Presets
+A JSON-based configuration file (`CMakePresets.json`) that defines named sets
+of CMake configure, build, and test options. MeshMC uses presets for each
+target platform.
+
+### CODEOWNERS
+A GitHub feature that maps file paths to responsible reviewers. Project Tick's
+`.github/CODEOWNERS` routes all reviews to `@YongDo-Hyun`.
+
+### CodeQL
+GitHub's semantic code analysis engine for finding security vulnerabilities.
+Configured in `.github/codeql/` for C, C++, and Java scanning.
+
+### cmark
+A standard-compliant CommonMark Markdown parser written in C. Licensed under
+BSD-2-Clause. Provides both a library and a CLI tool.
+
+### CommonMark
+A strongly-defined specification for Markdown syntax. cmark is the reference
+C implementation of the CommonMark spec.
+
+### Conventional Commits
+A commit message convention: `type(scope): description`. The CI lint stage
+(`ci/github-script/lint-commits.js`) enforces this convention.
+
+### corebinutils
+A collection of core Unix utilities ported from FreeBSD. Provides minimal
+implementations of commands like `cat`, `ls`, `cp`, `mv`, `rm`, `mkdir`,
+`chmod`, `echo`, `kill`, `ps`, and 30+ others.
+
+### Coverity
+A commercial static analysis tool. Some sub-projects include Coverity scan
+integration in their CI workflows.
+
+### CRC-32
+Cyclic Redundancy Check with a 32-bit output. Used in gzip, PNG, and other
+formats. neozip provides SIMD-accelerated CRC-32 using PCLMULQDQ (x86),
+PMULL (ARM), and hardware instructions on s390x.
+
+### Crowdin
+A localization management platform. MeshMC uses Crowdin for translation
+management (`launcher/translations/`).
+
+### CurseForge
+A Minecraft mod hosting platform. MeshMC integrates with CurseForge for mod
+discovery and installation via `launcher/modplatform/`.
+
+---
+
+## D
+
+### DCO (Developer Certificate of Origin)
+A lightweight alternative to a CLA. Contributors certify their right to
+submit code by adding `Signed-off-by:` to commit messages (`git commit -s`).
+Enforced by `.github/dco.yml`.
+
+### Deflate
+A lossless compression algorithm combining LZ77 and Huffman coding. neozip
+provides multiple deflate strategies: fast, medium, slow (best), quick,
+huffman-only, RLE, and stored.
+
+### DFLTCC (Deflate Conversion Call)
+A hardware instruction on IBM z15+ mainframes (s390x) that performs deflate
+compression/decompression in hardware. neozip supports DFLTCC via
+`arch/s390/`.
+
+### direnv
+A shell extension that loads environment variables from `.envrc` files.
+Project Tick uses direnv with Nix (`use flake`) for automatic development
+environment activation.
+
+### Docker / Containerfile
+Container image build specifications. images4docker provides 40 Dockerfiles
+for CI build environments. MeshMC includes a `Containerfile` for container
+builds.
+
+---
+
+## E
+
+### ECM (Extra CMake Modules)
+A set of additional CMake modules provided by the KDE project. Required as a
+build dependency for MeshMC.
+
+---
+
+## F
+
+### Fabric
+A lightweight Minecraft mod loader. MeshMC supports Fabric modding via
+`launcher/modplatform/`. The meta generator produces Fabric version metadata
+(`generate_fabric.py`).
+
+### Flake (Nix)
+A Nix feature that provides reproducible, hermetic project definitions. The
+root `flake.nix` defines the development shell with LLVM 22, Qt 6, and all
+build dependencies.
+
+### Flatpak / Flathub
+A Linux application sandboxing and distribution system. MeshMC is published
+on Flathub after release.
+
+### Forge (Minecraft Forge)
+A Minecraft mod loader for modifying the game. forgewrapper provides a Java
+shim using SPI (Service Provider Interface) for Forge's boot process.
+
+### ForgeWrapper
+A Java library that uses JPMS (Java Platform Module System) and SPI to
+integrate with Minecraft Forge's installer/detector mechanism. Located at
+`forgewrapper/`.
+
+### FreeBSD
+A Unix-like operating system. corebinutils contains utilities ported from
+FreeBSD's coreutils.
+
+### Fuzz Testing
+A testing technique that provides random/malformed inputs to find crashes and
+vulnerabilities. neozip, cmark, meta, and tomlplusplus include fuzz testing
+targets.
+
+---
+
+## G
+
+### Garnix
+A CI platform for Nix projects. The meta sub-project uses Garnix
+(`meta/garnix.yaml`).
+
+### genqrcode
+A C library for generating QR codes. Supports all QR code versions (1–40),
+multiple error correction levels (L/M/Q/H), and various encoding modes
+(numeric, alphanumeric, byte, kanji, ECI).
+
+### GHCR (GitHub Container Registry)
+GitHub's container image registry. images4docker images are pushed to GHCR.
+
+### GPL (GNU General Public License)
+A copyleft license family. Project Tick uses GPL-2.0-only (cgit),
+GPL-3.0-only (archived projects), and GPL-3.0-or-later (MeshMC,
+images4docker).
+
+### Gradle
+A build automation tool for JVM projects. forgewrapper uses Gradle with the
+Gradle Wrapper (`gradlew`).
+
+---
+
+## H
+
+### Huffman Coding
+A lossless data compression algorithm using variable-length codes. neozip's
+`trees.c` implements Huffman tree construction for the deflate algorithm.
+
+---
+
+## I
+
+### images4docker
+A collection of 40 Dockerfiles providing CI build environments for every
+supported Linux distribution. Qt 6 is a mandatory dependency in all images.
+Images are rebuilt daily at 03:17 UTC.
+
+---
+
+## J
+
+### JPMS (Java Platform Module System)
+Introduced in Java 9 (Project Jigsaw), JPMS provides a module system for
+Java. forgewrapper uses JPMS configuration (`jigsaw/`) for proper module
+encapsulation.
+
+### json4cpp
+A fork of nlohmann/json — a header-only JSON library for C++. Licensed under
+MIT. Provides SAX and DOM parsing, serialization, JSON Pointer, JSON Patch,
+JSON Merge Patch, and CBOR/MessagePack/UBJSON/BSON support.
+
+---
+
+## L
+
+### lefthook
+A fast, cross-platform Git hooks manager. Configured in `lefthook.yml` to
+run REUSE lint and checkpatch on pre-commit.
+
+### LGPL (GNU Lesser General Public License)
+A copyleft license that permits linking from proprietary code.
+libnbtplusplus uses LGPL-3.0, genqrcode uses LGPL-2.1.
+
+### libnbtplusplus
+A C++ library for reading and writing Minecraft's NBT (Named Binary Tag)
+format. Used by MeshMC to parse and modify Minecraft world data.
+
+### LLVM
+A compiler infrastructure providing Clang, LLD, and other tools. Project
+Tick's Nix development shell provides LLVM 22.
+
+### Lua
+A lightweight scripting language. cgit uses Lua for content filtering
+(`filter.c`).
+
+### LZ77
+A lossless compression algorithm that replaces repeated occurrences with
+references (length, distance pairs). The foundation of the deflate algorithm
+implemented in neozip.
+
+---
+
+## M
+
+### Make (GNU Make)
+A build automation tool. Used by cgit, corebinutils, and cmark. cgit uses
+a plain Makefile, while corebinutils uses GNUmakefile.
+
+### MeshMC
+The primary application in the Project Tick ecosystem. A custom Minecraft
+launcher written in C++23 with Qt 6, supporting multiple mod loaders,
+instance management, and cross-platform deployment.
+
+### Meson
+A build system focused on speed and simplicity. tomlplusplus uses Meson as
+its primary build system.
+
+### MIT License
+A permissive open-source license. Used by json4cpp, tomlplusplus,
+forgewrapper, tickborg, and archived/projt-minicraft-modpack.
+
+### MNV
+A fork of the Vim text editor with modern enhancements. Written in C, built
+with CMake or Autotools.
+
+### Modrinth
+A Minecraft mod hosting platform. MeshMC integrates with Modrinth for mod
+discovery and installation.
+
+### Mojang
+The developer of Minecraft. meta generates Mojang version metadata
+(`generate_mojang.py`).
+
+### MS-PL (Microsoft Public License)
+An open-source license used by the meta sub-project.
+
+### MSVC (Microsoft Visual C++)
+Microsoft's C/C++ compiler. MeshMC requires MSVC 17.10+ (Visual Studio 2022)
+for Windows builds.
+
+### musl
+A lightweight C standard library implementation for Linux. Some neozip CI
+builds test against musl for static linking compatibility.
+
+---
+
+## N
+
+### NBT (Named Binary Tag)
+A binary format used by Minecraft for storing structured data (worlds,
+entities, items). libnbtplusplus provides C++ types for all NBT tag types:
+`tag_byte`, `tag_short`, `tag_int`, `tag_long`, `tag_float`, `tag_double`,
+`tag_string`, `tag_byte_array`, `tag_list`, `tag_compound`,
+`tag_int_array`, `tag_long_array`.
+
+### NEON
+ARM's SIMD instruction set for 128-bit vector operations. neozip uses NEON
+for accelerated CRC-32, Adler-32, and slide hash on AArch64.
+
+### NeoForge
+A community fork of Minecraft Forge. MeshMC supports NeoForge modding. The
+meta generator produces NeoForge version metadata (`generate_neoforge.py`).
+
+### neozip
+Project Tick's fork of zlib-ng, a high-performance zlib replacement with
+SIMD acceleration across x86, ARM, Power, s390x, RISC-V, and LoongArch
+architectures. Licensed under the Zlib license.
+
+### Nix
+A purely functional package manager. Project Tick uses Nix flakes for
+reproducible development environments, CI tooling, and package builds.
+
+### nixpkgs
+The Nix package collection. CI pins a specific nixpkgs revision in
+`ci/pinned.json` for reproducible builds.
+
+### NSIS (Nullsoft Scriptable Install System)
+A Windows installer creation tool. MNV uses NSIS (`mnv/nsis/`) for Windows
+distribution.
+
+---
+
+## O
+
+### ofborg
+The upstream project from which tickborg is forked. A CI system for the
+Nixpkgs package repository that processes GitHub events via AMQP.
+
+---
+
+## P
+
+### PCLMULQDQ
+An x86 instruction for carry-less multiplication used to accelerate CRC-32
+computation. neozip uses PCLMULQDQ via `arch/x86/`.
+
+### PKGBUILD
+An Arch Linux package build script. MeshMC maintains a PKGBUILD for AUR
+distribution.
+
+### PMULL
+An ARM instruction for polynomial multiplication, used for CRC-32
+acceleration on AArch64. neozip's ARM CRC implementation uses PMULL.
+
+### Poetry
+A Python dependency management and packaging tool. The meta sub-project uses
+Poetry (`meta/pyproject.toml`, `meta/poetry.lock`).
+
+### PR (Pull Request)
+A GitHub mechanism for proposing code changes. All changes to protected
+branches must go through PRs with passing CI and review approval.
+
+---
+
+## Q
+
+### QR Code (Quick Response Code)
+A two-dimensional barcode format. genqrcode generates QR codes supporting
+versions 1–40, four error correction levels (L/M/Q/H), and multiple encoding
+modes.
+
+### Qt 6
+A cross-platform application framework. MeshMC uses Qt 6 for its GUI
+(widgets, dialogs, themes). Qt 6 is a mandatory dependency across all
+images4docker build environments.
+
+### Quilt
+A Minecraft mod loader forked from Fabric. MeshMC supports Quilt modding. The
+meta generator produces Quilt version metadata (`generate_quilt.py`).
+
+---
+
+## R
+
+### RabbitMQ
+An AMQP message broker. tickborg connects to RabbitMQ to receive build
+requests dispatched from GitHub webhooks.
+
+### Reed-Solomon
+An error-correcting code used in QR codes. genqrcode implements Reed-Solomon
+error correction in `rsecc.c`.
+
+### Renovate
+An automated dependency update bot. Configured in `meta/renovate.json`.
+
+### REUSE
+A specification from the FSFE (Free Software Foundation Europe) for
+expressing license and copyright information. Project Tick's `REUSE.toml`
+maps every file path to its SPDX license identifier.
+
+### RISC-V
+An open-source instruction set architecture. neozip includes RISC-V SIMD
+optimizations via the RVV (Vector) and ZBC (Carry-less Multiply) extensions
+in `arch/riscv/`.
+
+---
+
+## S
+
+### Scoop
+A Windows package manager. `bootstrap.cmd` uses Scoop to install
+dependencies on Windows.
+
+### Semgrep
+A pattern-based static analysis tool for security scanning. Some CI workflows
+include Semgrep scans.
+
+### SemVer (Semantic Versioning)
+A versioning scheme: `MAJOR.MINOR.PATCH`. MAJOR for breaking changes, MINOR
+for backwards-compatible features, PATCH for bug fixes.
+
+### SIMD (Single Instruction, Multiple Data)
+A parallel processing technique. neozip heavily uses SIMD for performance-
+critical operations: SSE2/SSE4.2/AVX2/AVX-512 (x86), NEON (ARM), VMX/VSX
+(Power), DFLTCC (s390x), RVV (RISC-V), LSX/LASX (LoongArch).
+
+### SPI (Service Provider Interface)
+A Java API pattern for extensibility. forgewrapper uses SPI via
+`IFileDetector.java` to integrate with Forge's installer mechanism.
+
+### SPDX (Software Package Data Exchange)
+A standard for communicating software license information. All Project Tick
+licenses use SPDX identifiers. The `LICENSES/` directory contains full SPDX-
+named license text files.
+
+### SSE (Streaming SIMD Extensions)
+x86 SIMD instruction sets (SSE2, SSE4.2). neozip uses SSE for baseline
+SIMD acceleration on x86 platforms.
+
+---
+
+## T
+
+### tickborg
+Project Tick's CI bot, forked from ofborg. A Rust application that listens on
+AMQP for build requests and executes them. Bot commands: `@tickbot build`,
+`@tickbot test`, `@tickbot eval`.
+
+### TOML (Tom's Obvious Minimal Language)
+A configuration file format. tomlplusplus is a C++17 header-only TOML parser
+and serializer supporting TOML v1.0.0.
+
+### tomlplusplus
+A header-only C++17 TOML library. Licensed under MIT. Provides parsing,
+serialization, and manipulation of TOML documents. Built with Meson or CMake.
+
+### treefmt
+A universal code formatter dispatcher. Configured in `ci/default.nix` to run
+all language-specific formatters in a single pass.
+
+---
+
+## U
+
+### Unlicense
+A public domain dedication license. Used for some trivial files in the
+monorepo.
+
+---
+
+## V
+
+### vcpkg
+Microsoft's C/C++ package manager. MeshMC uses vcpkg for Windows dependency
+management (`meshmc/vcpkg.json`, `meshmc/vcpkg-configuration.json`).
+
+### Vim
+A highly configurable text editor. MNV is a fork of Vim with additional
+features. Licensed under the Vim license + GPL-3.0.
+
+### VMX / VSX
+IBM Power architecture SIMD instruction sets (Vector Multimedia Extension /
+Vector Scalar Extension). neozip uses VMX/VSX for Power8/9 acceleration.
+
+---
+
+## W
+
+### WSL (Windows Subsystem for Linux)
+A compatibility layer for running Linux on Windows. Project Tick does **not**
+support building under WSL; native Windows builds via MSVC are required.
+
+---
+
+## Z
+
+### zlib
+The original compression library implementing the deflate algorithm. neozip
+is a high-performance fork of zlib-ng, which itself is a modernized fork of
+zlib.
+
+### zlib-ng
+A modernized fork of zlib with SIMD optimizations. neozip is Project Tick's
+fork of zlib-ng with additional modifications.
+
+### Zlib License
+A permissive open-source license. Used by neozip and archived/ptlibzippy.
diff --git a/docs/handbook/Project-Tick/licensing.md b/docs/handbook/Project-Tick/licensing.md
new file mode 100644
index 0000000000..d80281a42e
--- /dev/null
+++ b/docs/handbook/Project-Tick/licensing.md
@@ -0,0 +1,371 @@
+# Project Tick — Licensing
+
+## Overview
+
+Project Tick is a multi-licensed ecosystem. Because the monorepo contains
+components with diverse origins — from BSD utility ports to GPL-licensed
+applications to MIT/Zlib libraries — each sub-project carries the license
+appropriate to its upstream lineage and Project Tick's own contributions.
+
+The project uses the [REUSE](https://reuse.software/) specification (version
+3.0) for license compliance. Every file in the repository is annotated with
+SPDX license identifiers and copyright statements, either inline in file
+headers or via the `REUSE.toml` configuration file.
+
+---
+
+## License Inventory
+
+The `LICENSES/` directory contains 20 distinct SPDX-compliant license texts:
+
+| SPDX Identifier | License Name | Category |
+|-----------------|-------------|----------|
+| `Apache-2.0` | Apache License 2.0 | Permissive |
+| `BSD-1-Clause` | BSD 1-Clause License | Permissive |
+| `BSD-2-Clause` | BSD 2-Clause "Simplified" License | Permissive |
+| `BSD-3-Clause` | BSD 3-Clause "New" License | Permissive |
+| `BSD-4-Clause` | BSD 4-Clause "Original" License | Permissive |
+| `BSL-1.0` | Boost Software License 1.0 | Permissive |
+| `CC-BY-SA-4.0` | Creative Commons Attribution-ShareAlike 4.0 | Creative Commons |
+| `CC0-1.0` | Creative Commons Zero 1.0 Universal | Public Domain Dedication |
+| `GPL-2.0-only` | GNU General Public License v2.0 only | Copyleft |
+| `GPL-3.0-only` | GNU General Public License v3.0 only | Copyleft |
+| `GPL-3.0-or-later` | GNU General Public License v3.0 or later | Copyleft |
+| `LGPL-2.0-or-later` | GNU Lesser General Public License v2.0 or later | Weak Copyleft |
+| `LGPL-2.1-or-later` | GNU Lesser General Public License v2.1 or later | Weak Copyleft |
+| `LGPL-3.0-or-later` | GNU Lesser General Public License v3.0 or later | Weak Copyleft |
+| `LicenseRef-Qt-Commercial` | Qt Commercial License (reference) | Proprietary |
+| `MIT` | MIT License | Permissive |
+| `MS-PL` | Microsoft Public License | Permissive |
+| `Unlicense` | The Unlicense | Public Domain Dedication |
+| `Vim` | Vim License | Permissive (custom) |
+| `Zlib` | zlib License | Permissive |
+
+---
+
+## Per-Component License Map
+
+### Applications
+
+| Component | Directory | License | Copyright |
+|-----------|-----------|---------|-----------|
+| **MeshMC** | `meshmc/` | GPL-3.0-or-later | 2026 Project Tick |
+| MeshMC (historical code) | `meshmc/` | Apache-2.0 (incorporated work) | 2012–2022 MultiMC Contributors |
+| **MNV** | `mnv/` | Vim AND GPL-3.0-or-later | Bram Moolenaar & Vim Contributors & Project Tick |
+| **cgit** | `cgit/` | GPL-2.0-only | cgit Contributors & Project Tick |
+
+### Libraries
+
+| Component | Directory | License | Copyright |
+|-----------|-----------|---------|-----------|
+| **NeoZip** | `neozip/` | Zlib | Zlib Contributors & Zlib-ng Contributors & Project Tick |
+| **Json4C++** | `json4cpp/` | MIT | Json4C++ Contributors & Project Tick |
+| **toml++** | `tomlplusplus/` | MIT | Toml++ Contributors & Project Tick |
+| **libnbt++** | `libnbtplusplus/` | LGPL-3.0-or-later | libnbtplusplus Contributors & ljfa-ag & Project Tick |
+| **cmark** | `cmark/` | BSD-2-Clause AND MIT AND CC-BY-SA-4.0 | CMark Contributors & Project Tick |
+| **GenQRCode** | `genqrcode/` | LGPL-2.1-or-later | GenQRCode Contributors & Project Tick |
+| **ForgeWrapper** | `forgewrapper/` | MIT | ForgeWrapper Contributors & Project Tick |
+
+### System Utilities
+
+| Component | Directory | License | Copyright |
+|-----------|-----------|---------|-----------|
+| **CoreBinUtils** | `corebinutils/` | BSD-1-Clause AND BSD-2-Clause AND BSD-3-Clause AND BSD-4-Clause AND MIT | FreeBSD Contributors & Project Tick |
+
+### Infrastructure
+
+| Component | Directory | License | Copyright |
+|-----------|-----------|---------|-----------|
+| **Meta** | `meta/` | MS-PL | MultiMC Contributors & PolyMC Contributors & PrismLauncher Contributors & Project Tick |
+| **tickborg** | `ofborg/` | MIT | NixOS Contributors & Project Tick |
+| **Images4Docker** | `images4docker/` | GPL-3.0-or-later | Project Tick |
+
+### Archived
+
+| Component | Directory | License | Copyright |
+|-----------|-----------|---------|-----------|
+| **ProjT Launcher** | `archived/projt-launcher/` | GPL-3.0-only | MultiMC Contributors & Prism Launcher Contributors & PolyMC Contributors & Project Tick |
+| **ProjT Modpack** | `archived/projt-modpack/` | GPL-3.0-only | Project Tick |
+| **ProjT Minicraft Modpack** | `archived/projt-minicraft-modpack/` | MIT | Project Tick |
+| **ptlibzippy** | `archived/ptlibzippy/` | Zlib | Zlib Contributors & Project Tick |
+
+---
+
+## REUSE.toml Analysis
+
+The `REUSE.toml` file (version 1) uses `[[annotations]]` blocks to map file
+paths to their SPDX license identifiers and copyright statements. This is the
+primary mechanism for bulk license annotation.
+
+### Infrastructure and Configuration Files
+
+```toml
+[[annotations]]
+path = [
+ ".gitignore", ".gitattributes", ".gitmodules", ".github/**",
+ ".envrc", ".markdownlint.yaml", ".markdownlintignore",
+ "Containerfile", "default.nix", "flake.lock", "flake.nix",
+ "shell.nix", "vcpkg-configuration.json", "vcpkg.json",
+ ".clang-format", ".clang-tidy", "CODEOWNERS", "hooks/**", "ci/**"
+]
+SPDX-License-Identifier = "CC0-1.0"
+SPDX-FileCopyrightText = "NONE"
+```
+
+Configuration files, CI scripts, Git metadata, and build system configuration
+are placed in the public domain under CC0-1.0 with no copyright claim.
+
+### Documentation
+
+```toml
+[[annotations]]
+path = ["**/*.md", "doc/**"]
+SPDX-License-Identifier = "CC0-1.0"
+SPDX-FileCopyrightText = "2026 Project Tick"
+```
+
+All Markdown files and documentation are CC0-1.0, allowing unrestricted reuse.
+
+### MeshMC-Specific Files
+
+```toml
+# Launcher packaging
+path = ["launcher/package/**"]
+SPDX-License-Identifier = "GPL-3.0-or-later"
+
+# Qt UI files
+path = ["**/*.ui"]
+SPDX-License-Identifier = "GPL-3.0-or-later"
+
+# CMake presets
+path = ["CMakePresets.json"]
+SPDX-License-Identifier = "GPL-3.0-or-later"
+
+# Nix build files
+path = ["nix/**"]
+SPDX-License-Identifier = "GPL-3.0-or-later"
+
+# Branding and resources
+path = ["branding/**", "launcher/resources/**"]
+SPDX-License-Identifier = "CC0-1.0"
+```
+
+### CMake Build Files
+
+```toml
+path = ["cmake/**", "**/CMakeLists.txt"]
+SPDX-License-Identifier = "BSD-3-Clause"
+SPDX-FileCopyrightText = "Various authors"
+```
+
+CMake modules and build definitions use BSD-3-Clause, reflecting their diverse
+authorship.
+
+### Test Data
+
+```toml
+path = ["**/testdata/**"]
+SPDX-License-Identifier = "CC0-1.0"
+SPDX-FileCopyrightText = "NONE"
+```
+
+Test data has no copyright claims and is in the public domain.
+
+---
+
+## License Compatibility
+
+### Core Dependency Chain
+
+MeshMC (GPL-3.0-or-later) links against libraries with the following licenses:
+
+| Library | License | GPL-3.0 Compatible? |
+|---------|---------|---------------------|
+| json4cpp | MIT | Yes — permissive |
+| tomlplusplus | MIT | Yes — permissive |
+| libnbtplusplus | LGPL-3.0-or-later | Yes — LGPL is GPL-compatible |
+| neozip | Zlib | Yes — permissive |
+| cmark | BSD-2-Clause/MIT | Yes — permissive |
+| genqrcode | LGPL-2.1-or-later | Yes — LGPL is GPL-compatible |
+| Qt 6 | LGPL-3.0 / GPL-3.0 / Commercial | Yes — LGPL/GPL-compatible |
+| QuaZip | LGPL-2.1 | Yes — LGPL is GPL-compatible |
+| libarchive | BSD-2-Clause | Yes — permissive |
+| ECM | BSD-3-Clause | Yes — permissive |
+
+All library dependencies are GPL-3.0 compatible. The GPL-3.0-or-later license
+of MeshMC governs the combined work.
+
+### ForgeWrapper (Runtime)
+
+ForgeWrapper (MIT) is loaded at runtime as a separate Java process, not linked
+at compile time. The MIT license is compatible with GPL-3.0 for distribution
+purposes, and runtime invocation does not create a derivative work concern.
+
+### Meta (MS-PL)
+
+The MS-PL (Microsoft Public License) used by `meta/` is a permissive license
+that allows use, modification, and redistribution. It is generally considered
+compatible with GPL for independent components. Since `meta/` is a standalone
+Python project that generates JSON data consumed by MeshMC over HTTP, there is
+no linking relationship.
+
+### CoreBinUtils (Multi-BSD)
+
+CoreBinUtils uses a combination of BSD-1-Clause, BSD-2-Clause, BSD-3-Clause,
+BSD-4-Clause, and MIT — reflecting the diverse origins of FreeBSD utilities.
+The BSD-4-Clause (advertising clause) applies only to the specific files that
+carry it. All BSD variants are permissive and do not impose copyleft
+obligations.
+
+### MNV (Vim License + GPL-3.0-or-later)
+
+MNV uses a dual license: the Vim license (a permissive custom license) and
+GPL-3.0-or-later. The Vim license is similar to the Charityware license and
+allows free use, modification, and redistribution. The GPL-3.0-or-later
+applies to Project Tick's modifications.
+
+### cgit (GPL-2.0-only)
+
+cgit uses GPL-2.0-only (not "or later"), which means it cannot be
+relicensed under GPL-3.0. It remains an independent component with no
+linking relationship to GPL-3.0 components.
+
+---
+
+## SPDX Headers
+
+### Inline Headers
+
+Source files should include SPDX headers at the top:
+
+```c
+// SPDX-FileCopyrightText: 2026 Project Tick
+// SPDX-License-Identifier: GPL-3.0-or-later
+```
+
+```python
+# SPDX-FileCopyrightText: 2026 Project Tick
+# SPDX-License-Identifier: MS-PL
+```
+
+```cmake
+# SPDX-FileCopyrightText: 2026 Project Tick
+# SPDX-License-Identifier: BSD-3-Clause
+```
+
+### REUSE.toml Bulk Annotations
+
+For files where inline headers are impractical (binary files, generated files,
+configuration files), use `REUSE.toml` annotations with glob patterns:
+
+```toml
+[[annotations]]
+path = ["pattern/**"]
+SPDX-License-Identifier = "LICENSE-ID"
+SPDX-FileCopyrightText = "Copyright holder"
+```
+
+### Checking Compliance
+
+```bash
+# Install reuse tool
+pip install reuse
+
+# Check entire repository
+reuse lint
+
+# Download missing license texts
+reuse download --all
+```
+
+The pre-commit hook via lefthook automatically runs `reuse lint` and
+downloads missing licenses if needed.
+
+---
+
+## Adding New Files
+
+When adding new files to the repository:
+
+1. **Determine the appropriate license** based on the sub-project:
+ - Files in `meshmc/` → GPL-3.0-or-later
+ - Files in `neozip/` → Zlib
+ - Files in `json4cpp/` → MIT
+ - Files in `meta/` → MS-PL
+ - Documentation → CC0-1.0
+ - Configuration/build files → CC0-1.0 or BSD-3-Clause
+ - Test data → CC0-1.0
+
+2. **Add SPDX headers** to the file (if it supports comments)
+
+3. **Or add a REUSE.toml annotation** for files without comment support
+
+4. **Run `reuse lint`** to verify compliance
+
+### Adding New Sub-Projects
+
+If adding an entirely new sub-project:
+
+1. Add a `[[annotations]]` block to `REUSE.toml` for the new directory
+2. Place the appropriate license text in `LICENSES/` if not already present
+3. Ensure all files have proper SPDX identifiers
+4. Document the license in the sub-project's README
+
+---
+
+## Third-Party License Obligations
+
+### Attribution Requirements
+
+Several licenses in the ecosystem require attribution in distributed binaries:
+
+| License | Attribution Requirement |
+|---------|----------------------|
+| Apache-2.0 | NOTICE file, license text |
+| BSD-2-Clause | License text in documentation |
+| BSD-3-Clause | License text in documentation |
+| BSD-4-Clause | License text + advertising clause |
+| MIT | License text |
+| LGPL-2.1/3.0 | License text, source availability |
+| GPL-2.0/3.0 | Full source code availability |
+| MS-PL | License text |
+| CC-BY-SA-4.0 | Attribution, ShareAlike |
+
+### Copyleft Obligations
+
+| License | Source Obligation | Dynamic Linking |
+|---------|------------------|-----------------|
+| GPL-2.0-only | Full source for the program | Derivative work |
+| GPL-3.0-only/or-later | Full source for the program | Derivative work |
+| LGPL-2.1-or-later | Source for LGPL portions, object files for relinking | Permitted without GPL |
+| LGPL-3.0-or-later | Source for LGPL portions, installation info | Permitted without GPL |
+
+---
+
+## Trademark vs. License
+
+It is crucial to understand that **open source licenses do not grant trademark
+rights**. As stated in `TRADEMARK.md`:
+
+> Open source licenses govern the use, modification, and redistribution of
+> source code only. They do **not** grant rights to use the Project Tick name,
+> logo, or branding.
+
+See [trademark-policy.md](trademark-policy.md) for the full trademark policy.
+
+---
+
+## CLA and License Grants
+
+The Project Tick Contributor License Agreement (CLA) ensures that all
+contributions can be distributed under the project's existing licenses. By
+signing the CLA, contributors:
+
+1. Confirm they have the legal right to make the contribution
+2. Grant Project Tick a perpetual license to distribute the contribution
+3. Agree not to knowingly infringe third-party rights
+
+This allows Project Tick to maintain license consistency across the ecosystem
+without requiring future relicensing negotiations.
+
+CLA text: <https://projecttick.org/licenses/PT-CLA-2.0.txt>
diff --git a/docs/handbook/Project-Tick/overview.md b/docs/handbook/Project-Tick/overview.md
new file mode 100644
index 0000000000..6d36f7f914
--- /dev/null
+++ b/docs/handbook/Project-Tick/overview.md
@@ -0,0 +1,335 @@
+# Project Tick — Organization Overview
+
+## Introduction
+
+Project Tick is a modular software ecosystem organized as a unified monorepo. It
+encompasses applications, libraries, system utilities, infrastructure tooling,
+and metadata generators — all managed under a single repository at
+`github.com/Project-Tick/Project-Tick`. The project is dedicated to providing
+developers with ease of use and users with long-lasting software.
+
+The monorepo approach ensures tight integration between components while
+preserving the independence of each sub-project. Every directory at the
+repository root represents an autonomous module, library, tool, or application
+that can be built and used standalone or as part of the larger system.
+
+Project Tick focuses on three guiding principles:
+
+1. **Reproducible builds** — Nix flakes and pinned dependencies ensure every
+ build produces identical output regardless of the host environment.
+2. **Minimal dependencies** — Each component pulls only what it strictly needs.
+3. **Full control over the software stack** — From compression libraries to text
+ editors, Project Tick maintains its own forks and adaptations to guarantee
+ long-term stability and security.
+
+---
+
+## Mission
+
+Project Tick exists to build, package, and run software across multiple
+platforms with complete transparency and reproducibility. The project provides:
+
+- A custom Minecraft launcher (MeshMC) with deep mod-loader integration
+- System-level UNIX utilities ported from FreeBSD
+- A text editor fork (MNV) with modern enhancements
+- Foundational C/C++ libraries for compression, serialization, and parsing
+- Infrastructure for CI/CD, container images, and metadata generation
+- Git web interfaces and documentation tooling
+
+Every component feeds up into the broader mission: an ecosystem where every
+dependency is accounted for, every license is tracked, and every build is
+reproducible.
+
+---
+
+## Sub-Projects
+
+Project Tick contains the following top-level components, organized by category.
+
+### Applications
+
+| Directory | Name | Description | Language | License |
+|-----------|------|-------------|----------|---------|
+| `meshmc/` | **MeshMC** | Custom Minecraft launcher focused on predictability, long-term stability, and simplicity. Supports Forge, NeoForge, Fabric, and Quilt mod loaders. Built with Qt 6 and C++23. Current version: 7.0.0. | C++ | GPL-3.0-or-later |
+| `mnv/` | **MNV** | Greatly improved fork of the Vi/Vim text editor. Features multi-level undo, syntax highlighting, command-line history, spell checking, filename completion, block operations, and a script language. Provides a POSIX-compatible vi implementation in its minimal build. | C | Vim AND GPL-3.0-or-later |
+| `cgit/` | **cgit** | Fast CGI-based web interface for Git repositories. Uses a built-in cache to decrease server I/O pressure. Supports Lua scripting for custom filters. | C | GPL-2.0-only |
+
+### Libraries
+
+| Directory | Name | Description | Language | License |
+|-----------|------|-------------|----------|---------|
+| `neozip/` | **NeoZip** | Next-generation zlib/zlib-ng fork for data compression. Features SIMD-accelerated implementations (SSE2, AVX2, AVX-512, NEON, etc.) for Adler32, CRC32, inflate, and deflate. Supports CPU intrinsics on x86-64, ARM, Power, RISC-V, LoongArch, and s390x. | C | Zlib |
+| `json4cpp/` | **Json4C++** | Header-only JSON library for modern C++ (nlohmann/json fork). Supports JSON Pointer, JSON Patch, JSON Merge Patch, BSON, CBOR, MessagePack, UBJSON, and BJData binary formats. Single-header and multi-header modes. | C++ | MIT |
+| `tomlplusplus/` | **toml++** | Header-only TOML parser and serializer for C++17. Passes all tests in the official toml-test suite. Supports serialization to JSON and YAML, proper UTF-8 handling, and works with or without exceptions and RTTI. | C++ | MIT |
+| `libnbtplusplus/` | **libnbt++** | C++ library for Minecraft's Named Binary Tag (NBT) file format. Reads and writes compressed and uncompressed NBT files. Version 3 is a ground-up rewrite for usability. | C++ | LGPL-3.0-or-later |
+| `cmark/` | **cmark** | CommonMark reference implementation in C. Provides a C API for parsing and rendering Markdown documents. Includes fuzz testing infrastructure. | C | BSD-2-Clause AND MIT AND CC-BY-SA-4.0 |
+| `genqrcode/` | **GenQRCode** | QR Code encoding library (libqrencode fork). Supports QR Code model 2 per JIS X0510:2004 / ISO/IEC 18004. Handles numeric, alphanumeric, kanji (Shift-JIS), and 8-bit data. Also supports Micro QR Code (experimental). | C | LGPL-2.1-or-later |
+| `forgewrapper/` | **ForgeWrapper** | Java library enabling launchers to start Minecraft 1.13+ with Forge. Provides a service-provider interface (`IFileDetector`) for custom file detection rules. Built with Gradle. | Java | MIT |
+
+### System Utilities
+
+| Directory | Name | Description | Language | License |
+|-----------|------|-------------|----------|---------|
+| `corebinutils/` | **CoreBinUtils** | Collection of BSD/FreeBSD core utilities ported to Linux. Includes `cat`, `chmod`, `cp`, `date`, `dd`, `df`, `echo`, `ed`, `expr`, `hostname`, `kill`, `ln`, `ls`, `mkdir`, `mv`, `nproc`, `ps`, `pwd`, `realpath`, `rm`, `rmdir`, `sh`, `sleep`, `stty`, `sync`, `test`, `timeout`, and more. Uses musl-first toolchain selection. | C | BSD-1-Clause AND BSD-2-Clause AND BSD-3-Clause AND BSD-4-Clause AND MIT |
+
+### Infrastructure & Tooling
+
+| Directory | Name | Description | Language | License |
+|-----------|------|-------------|----------|---------|
+| `meta/` | **Meta** | Metadata generator for the MeshMC launcher. Generates JSON manifests and JARs for Mojang, Forge, NeoForge, Fabric, Quilt, LiteLoader, and Java runtime versions. Written in Python, uses Poetry for dependency management. Deployable as a NixOS service. | Python | MS-PL |
+| `ofborg/` | **tickborg** | Distributed RabbitMQ-based CI system adapted from NixOS ofborg. Automatically detects changed projects in PRs, builds affected sub-projects using their native build systems, posts results as GitHub check runs, and supports multi-platform builds (Linux, macOS, Windows, FreeBSD). | Rust | MIT |
+| `images4docker/` | **Images4Docker** | Collection of 40 Dockerfiles for building MeshMC across different Linux distributions. Each image includes the Qt 6 toolchain and all MeshMC build dependencies. Supports apt, dnf, apk, zypper, yum, pacman, xbps, nix, and emerge package managers. Rebuilt daily at 03:17 UTC. | Dockerfile | GPL-3.0-or-later |
+| `ci/` | **CI Infrastructure** | CI support files including Nix-based tooling (treefmt, codeowners-validator), GitHub Actions JavaScript helpers (commit linting, PR preparation, review management), branch classification, and pinned Nixpkgs for reproducible formatting. | Nix, JavaScript | CC0-1.0 |
+| `hooks/` | **Git Hooks** | Lefthook-managed Git hooks for pre-commit REUSE license checking and code style validation via checkpatch. | Shell | CC0-1.0 |
+
+### Archived Projects
+
+| Directory | Name | Description | License |
+|-----------|------|-------------|---------|
+| `archived/projt-launcher/` | **ProjT Launcher** | Original Minecraft launcher (predecessor to MeshMC). Based on MultiMC/PrismLauncher/PolyMC. | GPL-3.0-only |
+| `archived/projt-modpack/` | **ProjT Modpack** | Minecraft modpack distribution tooling. | GPL-3.0-only |
+| `archived/projt-minicraft-modpack/` | **ProjT Minicraft Modpack** | Minicraft modpack distribution. | MIT |
+| `archived/ptlibzippy/` | **ptlibzippy** | ZIP library (predecessor to NeoZip integration). | Zlib |
+
+### Documentation & Configuration
+
+| Directory / File | Description |
+|------------------|-------------|
+| `docs/` | Project documentation including the developer handbook |
+| `LICENSES/` | SPDX-compliant license texts (20 distinct licenses) |
+| `REUSE.toml` | REUSE 3.0 compliance annotations mapping paths to licenses |
+| `flake.nix` | Top-level Nix flake providing development shells with LLVM 22 |
+| `flake.lock` | Locked Nix inputs for reproducibility |
+| `bootstrap.sh` | Cross-distro bootstrap script for dependency installation |
+| `bootstrap.cmd` | Windows bootstrap script using Scoop and vcpkg |
+| `lefthook.yml` | Git hooks configuration for pre-commit checks |
+| `.github/` | GitHub Actions workflows, issue templates, PR template, CODEOWNERS, DCO enforcement |
+
+---
+
+## Technology Stack
+
+### Programming Languages
+
+| Language | Where Used |
+|----------|------------|
+| C | neozip, cmark, genqrcode, cgit, corebinutils, mnv |
+| C++ (C++23) | meshmc, json4cpp (C++11/17), tomlplusplus (C++17), libnbtplusplus (C++11) |
+| Rust | tickborg (ofborg) |
+| Java | forgewrapper |
+| Python | meta |
+| JavaScript / Node.js | CI scripts (github-script) |
+| Nix | CI infrastructure, development shells, NixOS deployment modules |
+| Shell (Bash/POSIX) | bootstrap scripts, hooks, build orchestration |
+| Dockerfile | images4docker |
+| CMake | meshmc, neozip, cmark, genqrcode, json4cpp, libnbtplusplus, mnv |
+
+### Build Systems
+
+| Build System | Projects |
+|--------------|----------|
+| CMake | meshmc, neozip, cmark, genqrcode, json4cpp, libnbtplusplus, mnv |
+| Meson | tomlplusplus |
+| Make (GNU Make) | cgit, corebinutils |
+| Autotools | mnv (configure), genqrcode (configure.ac), neozip (configure) |
+| Gradle | forgewrapper |
+| Cargo | tickborg |
+| Poetry | meta |
+| Nix | CI, development shells, deployment |
+
+### Frameworks & Key Dependencies
+
+| Dependency | Used By | Purpose |
+|------------|---------|---------|
+| Qt 6 (Core, Widgets, Concurrent, Network, NetworkAuth, Test, Xml) | meshmc | GUI framework |
+| QuaZip (Qt 6) | meshmc | ZIP archive support |
+| zlib / NeoZip | meshmc, neozip | Data compression |
+| libarchive | meshmc | Archive extraction |
+| Extra CMake Modules (ECM) | meshmc | KDE CMake utilities |
+| RabbitMQ (AMQP) | tickborg | Message queue for distributed CI |
+| Poetry | meta | Python dependency management |
+| Crowdin | meshmc | Translation management |
+
+---
+
+## How Sub-Projects Relate
+
+The Project Tick ecosystem has clear dependency chains:
+
+```
+meshmc (application)
+├── json4cpp (JSON parsing)
+├── tomlplusplus (TOML configuration parsing)
+├── libnbtplusplus (Minecraft NBT format)
+├── neozip (compression)
+├── cmark (Markdown rendering)
+├── genqrcode (QR code generation)
+├── forgewrapper (Forge mod loader integration)
+└── meta (version metadata feeds)
+
+tickborg (CI system)
+├── Detects changes across all sub-projects
+├── Builds using native build systems
+└── Posts results as GitHub check runs
+
+images4docker (container images)
+├── Provides build environments for meshmc CI
+└── Covers 40 Linux distributions with Qt 6
+
+corebinutils (standalone)
+└── Independent FreeBSD utility ports
+
+mnv (standalone)
+└── Independent Vim fork
+
+cgit (standalone)
+└── Independent Git web interface
+```
+
+MeshMC is the primary consumer of the library sub-projects. The `meta/`
+component generates the version metadata that MeshMC uses to discover and
+download Minecraft versions, mod loaders, and Java runtimes. The `forgewrapper/`
+component is a Java shim that MeshMC invokes at runtime to bootstrap Forge.
+
+The `tickborg` system orchestrates CI across the entire monorepo, detecting
+which sub-projects are affected by a given change and building only those
+projects using their respective build systems.
+
+---
+
+## Repository Governance
+
+Project Tick is maintained by its core contributors under the oversight of
+Mehmet Samet Duman (trademark holder). The project uses:
+
+- **CODEOWNERS** for ownership-based review routing
+- **DCO (Developer Certificate of Origin)** sign-off on every commit
+- **CLA (Contributor License Agreement)** for all contributions
+- **Conventional Commits** for structured commit messages
+- **REUSE 3.0** for license compliance
+
+The Code of Conduct (Version 2, 15 February 2026) establishes behavioral and
+ethical standards focused on technical integrity, licensing compliance,
+infrastructure security, and good-faith collaboration.
+
+---
+
+## Official Communication Channels
+
+| Channel | URL / Address |
+|---------|---------------|
+| GitHub Issues | `github.com/Project-Tick/Project-Tick/issues` |
+| Email | `projecttick@projecttick.org` |
+| Trademark inquiries | `yongdohyun@projecttick.org` |
+| CLA text | `projecttick.org/licenses/PT-CLA-2.0.txt` |
+| Crowdin (translations) | `crowdin.com/project/projtlauncher` |
+
+---
+
+## Version History
+
+Project Tick evolved from several predecessor projects:
+
+1. **MultiMC** (2012–2022) — The original custom Minecraft launcher. Apache-2.0
+ licensed. MeshMC's launcher code incorporates work from this project.
+
+2. **PolyMC / PrismLauncher** — Community forks of MultiMC. The `meta/`
+ component traces its lineage through these projects (MS-PL license).
+
+3. **ProjT Launcher** — Project Tick's first launcher, now archived in
+ `archived/projt-launcher/`. GPL-3.0-only.
+
+4. **MeshMC** — The current-generation launcher, a ground-up evolution with
+ C++23, Qt 6, and the full library stack.
+
+The infrastructure components have diverse origins:
+
+- **tickborg** is adapted from NixOS's [ofborg](https://github.com/NixOS/ofborg)
+- **neozip** is based on [zlib-ng](https://github.com/zlib-ng/zlib-ng)
+- **json4cpp** is based on [nlohmann/json](https://github.com/nlohmann/json)
+- **tomlplusplus** is based on [marzer/tomlplusplus](https://github.com/marzer/tomlplusplus)
+- **cgit** is the Project Tick fork of the cgit Git web interface
+- **cmark** is the Project Tick fork of the CommonMark reference implementation
+- **mnv** is the Project Tick fork of Vim
+- **corebinutils** contains FreeBSD utility ports
+
+---
+
+## Platform Support
+
+### MeshMC (Primary Application)
+
+| Platform | Architecture | Status |
+|----------|-------------|--------|
+| Linux | x86_64 | Fully supported |
+| Linux | aarch64 | Fully supported |
+| macOS | x86_64 | Fully supported |
+| macOS | aarch64 (Apple Silicon) | Fully supported |
+| macOS | Universal Binary | Supported via `macos_universal` preset |
+| Windows | x86_64 (MSVC) | Fully supported |
+| Windows | x86_64 (MinGW) | Fully supported |
+| Windows | aarch64 | Supported |
+| FreeBSD | x86_64 | Supported (VM-based CI) |
+
+### MNV
+
+MNV runs on MS-Windows (7, 8, 10, 11), macOS, Haiku, VMS, and almost all
+flavors of UNIX.
+
+### CoreBinUtils
+
+Targets Linux with musl-first toolchain selection. Also builds against glibc
+when musl is unavailable.
+
+### tickborg CI Platform Matrix
+
+| Platform | Runner |
+|----------|--------|
+| `x86_64-linux` | `ubuntu-latest` |
+| `aarch64-linux` | `ubuntu-24.04-arm` |
+| `x86_64-darwin` | `macos-15` |
+| `aarch64-darwin` | `macos-15` |
+| `x86_64-windows` | `windows-2025` |
+| `aarch64-windows` | `windows-2025` |
+| `x86_64-freebsd` | `ubuntu-latest` (VM) |
+
+---
+
+## Quick Links
+
+| Resource | Path |
+|----------|------|
+| Root README | `README.md` |
+| Contributing Guide | `CONTRIBUTING.md` |
+| Security Policy | `SECURITY.md` |
+| Code of Conduct | `CODE_OF_CONDUCT.md` |
+| Trademark Policy | `TRADEMARK.md` |
+| License Directory | `LICENSES/` |
+| REUSE Configuration | `REUSE.toml` |
+| MeshMC Build Guide | `meshmc/BUILD.md` |
+| CI Infrastructure | `ci/README.md` |
+| tickborg Documentation | `ofborg/README.md` |
+| Developer Handbook | `docs/handbook/` |
+
+---
+
+## Naming Conventions
+
+- **Project Tick** — The umbrella organization and monorepo name
+- **MeshMC** — The Minecraft launcher application
+- **MNV** — The text editor (Vi/Vim fork)
+- **tickborg** — The distributed CI bot (the Rust workspace in `ofborg/tickborg/`)
+- **ofborg** — The upstream project tickborg is derived from; also the directory name (`ofborg/`)
+- **NeoZip** — The compression library (zlib-ng fork)
+- **Json4C++** — The JSON library (nlohmann/json fork)
+- **toml++** — The TOML library
+- **libnbt++** — The NBT library
+- **GenQRCode** — The QR code library (libqrencode fork)
+- **ForgeWrapper** — The Forge bootstrapper
+- **CoreBinUtils** — The BSD utility ports
+- **Meta** — The launcher metadata generator
+- **Images4Docker** — The Docker image collection
+- **cgit** — The Git web interface
+
+The trademark "Project Tick" and associated branding are owned by Mehmet Samet
+Duman. See `TRADEMARK.md` for usage policies.
diff --git a/docs/handbook/Project-Tick/release-process.md b/docs/handbook/Project-Tick/release-process.md
new file mode 100644
index 0000000000..3c292eaaae
--- /dev/null
+++ b/docs/handbook/Project-Tick/release-process.md
@@ -0,0 +1,374 @@
+# Project Tick — Release Process
+
+## Overview
+
+Project Tick uses a per-component release methodology. Each sub-project
+maintains its own version numbering, release cadence, and distribution
+channels. Releases are triggered by Git tags and automated through GitHub
+Actions workflows.
+
+---
+
+## Versioning Schemes
+
+### Semantic Versioning (SemVer)
+
+Most sub-projects follow [Semantic Versioning 2.0.0](https://semver.org/):
+
+```
+MAJOR.MINOR.PATCH[-PRERELEASE][+BUILD]
+```
+
+| Component | Current Version | Source of Truth |
+|-----------------|-----------------|-----------------------------------------|
+| MeshMC | 7.0.0 | `meshmc/CMakeLists.txt` (project()) |
+| meta | 0.0.5-1 | `meta/pyproject.toml` ([tool.poetry]) |
+| neozip | — | `neozip/CMakeLists.txt` |
+| json4cpp | — | `json4cpp/CMakeLists.txt` |
+| tomlplusplus | — | `tomlplusplus/meson.build` |
+| libnbtplusplus | — | `libnbtplusplus/CMakeLists.txt` |
+| forgewrapper | — | `forgewrapper/gradle.properties` |
+| cmark | — | `cmark/CMakeLists.txt` |
+| genqrcode | — | `genqrcode/configure.ac` |
+| tickborg | — | `ofborg/Cargo.toml` |
+
+### MeshMC Version Details
+
+MeshMC's version is defined in its root `CMakeLists.txt`:
+
+```cmake
+project(MeshMC
+ VERSION 7.0.0
+ DESCRIPTION "MeshMC — Custom Minecraft Launcher"
+ HOMEPAGE_URL "https://meshmc.org"
+ LANGUAGES CXX C
+)
+```
+
+The version is decomposed into CMAKE variables and compiled into the binary
+via `buildconfig/`:
+- `MeshMC_VERSION_MAJOR` — 7
+- `MeshMC_VERSION_MINOR` — 0
+- `MeshMC_VERSION_PATCH` — 0
+
+### meta Version Details
+
+The meta package uses Poetry versioning with a Debian-style suffix:
+
+```toml
+[tool.poetry]
+name = "meta"
+version = "0.0.5-1"
+```
+
+---
+
+## Branch Strategy
+
+### Branch Types
+
+| Branch Pattern | Purpose | Protected | CI Level |
+|------------------|-------------------------------|-----------|-------------|
+| `master` | Main development branch | Yes | Full |
+| `release-*` | Release preparation branches | Yes | Full |
+| `staging-*` | Integration testing branches | No | Partial |
+| `feature-*` | Feature development | No | PR-only |
+| `fix-*` | Bug fix branches | No | PR-only |
+
+### Branch Classification Logic
+
+Branch classification is implemented in `ci/supportedBranches.js`:
+
+```javascript
+// Simplified representation of the classify() function:
+function classify(branch) {
+ if (branch === 'master')
+ return { level: 'full', protected: true };
+ if (branch.startsWith('release-'))
+ return { level: 'full', protected: true };
+ if (branch.startsWith('staging-'))
+ return { level: 'partial', protected: false };
+ return { level: 'pr-only', protected: false };
+}
+```
+
+Protected branches cannot receive direct pushes; all changes must go through
+pull requests with passing CI.
+
+---
+
+## Release Workflow
+
+### Phase 1 — Feature Freeze
+
+1. A release branch is created from `master`:
+ ```bash
+ git checkout -b release-7.1.0 master
+ ```
+
+2. Only bug fixes and documentation updates are merged into the release branch.
+
+3. CI runs full validation on the release branch (same as `master`).
+
+### Phase 2 — Version Bump
+
+1. Update the version in the component's source of truth:
+ - **MeshMC**: Edit `meshmc/CMakeLists.txt` `project(VERSION ...)`
+ - **meta**: Edit `meta/pyproject.toml` `version = "..."`
+ - **neozip**: Edit `neozip/CMakeLists.txt`
+ - **Other CMake projects**: Edit the relevant `CMakeLists.txt`
+ - **Meson projects**: Edit `meson.build`
+ - **Gradle projects**: Edit `gradle.properties`
+ - **Cargo projects**: Edit `Cargo.toml`
+
+2. Update changelogs:
+ - MeshMC maintains `meshmc/changelog.md`
+ - Other components maintain changelogs in their directories
+
+3. Commit the version bump:
+ ```bash
+ git add -A
+ git commit -s -m "release: bump MeshMC to 7.1.0"
+ ```
+
+### Phase 3 — Tagging
+
+Create an annotated Git tag:
+
+```bash
+git tag -a v7.1.0 -m "MeshMC 7.1.0"
+git push origin v7.1.0
+```
+
+Tag naming conventions:
+- **MeshMC**: `v<MAJOR>.<MINOR>.<PATCH>` (e.g., `v7.1.0`)
+- **neozip**: `neozip-v<VERSION>` (e.g., `neozip-v2.2.3`)
+- **json4cpp**: `json4cpp-v<VERSION>`
+- **Other**: `<component>-v<VERSION>`
+
+### Phase 4 — Automated Build & Publish
+
+Pushing a tag triggers the corresponding release workflow:
+
+| Tag Pattern | Workflow | Artifacts |
+|----------------------|-------------------------------|---------------------------------------|
+| `v*` | `meshmc-release.yml` | Linux/macOS/Windows binaries |
+| `v*` | `meshmc-publish.yml` | Flathub, AUR, packaging repos |
+| `neozip-v*` | `neozip-release.yml` | Source archive, shared libraries |
+| `json4cpp-v*` | (manual) | Updated single-header |
+| `images4docker-v*` | `images4docker-build.yml` | Docker images to GHCR |
+
+---
+
+## MeshMC Release Details
+
+### Build Matrix
+
+MeshMC releases build for all supported platforms:
+
+| Platform | Compiler | Qt Version | Output Format |
+|-------------------|----------------|------------|----------------------|
+| Linux (x86_64) | Clang 18+ | 6.x | AppImage, tar.gz |
+| Linux (aarch64) | Clang 18+ | 6.x | AppImage, tar.gz |
+| macOS (x86_64) | Apple Clang 16+| 6.x | .dmg, .app |
+| macOS (aarch64) | Apple Clang 16+| 6.x | .dmg, .app |
+| Windows (x86_64) | MSVC 17.10+ | 6.x | .msi, portable .zip |
+| Windows (aarch64) | MSVC 17.10+ | 6.x | .msi, portable .zip |
+
+### Release Workflow Steps
+
+```
+meshmc-release.yml:
+ 1. Checkout code at tag
+ 2. Set up dependencies (via .github/actions/meshmc/setup-dependencies/)
+ 3. Configure with CMake presets (Release mode)
+ 4. Build
+ 5. Run tests (ctest)
+ 6. Package (via .github/actions/meshmc/package/)
+ 7. Create GitHub Release with artifacts
+ 8. Upload checksums (SHA-256)
+```
+
+### Post-Release Publishing
+
+```
+meshmc-publish.yml:
+ 1. Download release artifacts
+ 2. Update Flathub manifest
+ 3. Update AUR PKGBUILD
+ 4. Update packaging repository
+ 5. Notify announcement channels
+```
+
+---
+
+## neozip Release Details
+
+neozip releases produce:
+- Source tarball (`neozip-<version>.tar.gz`)
+- Pre-built shared libraries for major platforms
+- CMake package configuration files
+
+The build matrix covers multiple architectures to validate SIMD
+optimizations:
+- x86_64 (SSE2, SSE4.2, AVX2, AVX-512)
+- aarch64 (NEON, ARMv8 CRC)
+- s390x (DFLTCC hardware deflate)
+- ppc64le (VMX, VSX)
+- riscv64 (RVV, ZBC) — when available
+
+---
+
+## Docker Image Releases
+
+### images4docker Rebuild Cycle
+
+Docker images are rebuilt on a scheduled basis:
+
+```yaml
+# .github/workflows/images4docker-build.yml
+on:
+ schedule:
+ - cron: '17 3 * * *' # Daily at 03:17 UTC
+ push:
+ branches: [master]
+ paths: ['images4docker/**']
+```
+
+Each push to `master` that modifies `images4docker/` also triggers a rebuild.
+All 40 Dockerfiles are built and pushed to GitHub Container Registry (GHCR).
+
+---
+
+## meta Release Details
+
+The meta package uses Poetry for releases:
+
+```bash
+# Build distribution
+poetry build
+
+# Publish to PyPI (if applicable)
+poetry publish
+```
+
+meta also defines CLI scripts in `pyproject.toml`:
+
+```toml
+[tool.poetry.scripts]
+generate_fabric = "meta.run.generate_fabric:main"
+generate_forge = "meta.run.generate_forge:main"
+generate_mojang = "meta.run.generate_mojang:main"
+generate_neoforge = "meta.run.generate_neoforge:main"
+generate_quilt = "meta.run.generate_quilt:main"
+generate_java = "meta.run.generate_java:main"
+update_mojang = "meta.run.update_mojang:main"
+```
+
+---
+
+## tickborg (ofborg) Release Details
+
+tickborg is deployed as a Docker container:
+
+```bash
+# Build
+docker build -t tickborg:latest ofborg/
+
+# Deploy with docker-compose
+cd ofborg && docker-compose up -d
+```
+
+The Rust workspace produces two binaries:
+- `tickborg` — Main CI bot with AMQP integration
+- `tickborg-simple-build` — Simplified builder for local testing
+
+Deployment is managed via `ofborg/DEPLOY.md` and `ofborg/service.nix` for
+NixOS.
+
+---
+
+## Hotfix Process
+
+For critical security fixes or regressions:
+
+1. **Branch from the release tag**:
+ ```bash
+ git checkout -b hotfix-7.0.1 v7.0.0
+ ```
+
+2. **Apply the minimal fix** — only the patch, no feature additions.
+
+3. **Bump the PATCH version**.
+
+4. **Tag and release**:
+ ```bash
+ git tag -a v7.0.1 -m "MeshMC 7.0.1 — Security hotfix"
+ git push origin v7.0.1
+ ```
+
+5. **Cherry-pick to master**:
+ ```bash
+ git checkout master
+ git cherry-pick <hotfix-commit>
+ ```
+
+---
+
+## Pre-release / Development Builds
+
+Development builds are produced on every push to `master`:
+
+- MeshMC: `meshmc-ci.yml` produces nightly artifacts
+- neozip: `neozip-ci.yml` attaches build artifacts
+- Other components: CI produces artifacts accessible from workflow runs
+
+Pre-release builds are not tagged and are identified by commit SHA or
+workflow run number.
+
+---
+
+## Release Checklist
+
+### Before Tagging
+
+- [ ] All CI checks pass on the release branch
+- [ ] Version number updated in source of truth
+- [ ] Changelog updated with all changes since last release
+- [ ] Security advisories addressed
+- [ ] License compliance verified (`reuse lint` passes)
+- [ ] Documentation updated for new features
+- [ ] Breaking changes documented with migration guide
+
+### After Tagging
+
+- [ ] GitHub Release created with artifacts
+- [ ] SHA-256 checksums published
+- [ ] Package repositories updated (Flathub, AUR, etc.)
+- [ ] Downstream dependencies notified
+- [ ] Announcement published
+
+### Post-Release
+
+- [ ] Release branch merged back to `master` (if applicable)
+- [ ] Confirm all distribution channels have the new version
+- [ ] Monitor issue tracker for regression reports
+- [ ] Begin next development cycle with version bump on `master`
+
+---
+
+## Dependency Updates
+
+### Automated Updates
+
+- **Renovate**: Configured in `meta/renovate.json` for automated dependency
+ PRs in the meta sub-project
+- **Nix flake**: `nix flake update` refreshes all Nix inputs
+- **CI pinning**: `ci/update-pinned.sh` updates the pinned Nixpkgs revision
+
+### Manual Updates
+
+- **Git submodules**: `git submodule update --remote` for cgit's bundled Git
+- **vcpkg**: Update `meshmc/vcpkg.json` and `meshmc/vcpkg-configuration.json`
+- **Poetry lock**: `cd meta && poetry update`
+- **Cargo lock**: `cd ofborg && cargo update`
diff --git a/docs/handbook/Project-Tick/repository-structure.md b/docs/handbook/Project-Tick/repository-structure.md
new file mode 100644
index 0000000000..44b64f0aa8
--- /dev/null
+++ b/docs/handbook/Project-Tick/repository-structure.md
@@ -0,0 +1,625 @@
+# Project Tick — Repository Structure
+
+## Overview
+
+The Project Tick monorepo contains all source code, libraries, applications,
+infrastructure tooling, documentation, and archived projects under a single
+Git repository. This document provides a complete map of every top-level
+directory and significant file.
+
+---
+
+## Root Directory
+
+```
+Project-Tick/
+│
+├── .envrc # direnv configuration (loads Nix shell)
+├── .gitattributes # Git attribute rules (LFS, diff, merge)
+├── .gitignore # Root-level ignore patterns
+├── .gitmodules # Git submodule definitions
+├── bootstrap.cmd # Windows bootstrap script (Scoop + vcpkg)
+├── bootstrap.sh # Linux/macOS bootstrap script
+├── CODE_OF_CONDUCT.md # Code of Conduct v2 (15 Feb 2026)
+├── CONTRIBUTING.md # Contribution guidelines, CLA, DCO, AI policy
+├── flake.lock # Nix flake lock file (pinned inputs)
+├── flake.nix # Top-level Nix flake (LLVM 22 dev shell)
+├── lefthook.yml # Git hooks config (REUSE lint, checkpatch)
+├── README.md # Root README with project overview
+├── REUSE.toml # REUSE 3.0 license annotations
+├── SECURITY.md # Security vulnerability reporting
+├── TRADEMARK.md # Trademark and brand policy
+├── tree.txt # Static directory tree snapshot
+│
+├── .github/ # GitHub configuration
+├── archived/ # Deprecated sub-projects
+├── cgit/ # cgit Git web interface
+├── ci/ # CI infrastructure and tooling
+├── cmark/ # cmark Markdown parser
+├── corebinutils/ # BSD/FreeBSD core utilities
+├── docs/ # Documentation
+├── forgewrapper/ # ForgeWrapper Java shim
+├── genqrcode/ # QR code encoding library
+├── hooks/ # Git hook scripts
+├── images4docker/ # Docker build environments
+├── json4cpp/ # JSON library (nlohmann/json fork)
+├── libnbtplusplus/ # NBT library
+├── LICENSES/ # SPDX license texts
+├── meshmc/ # MeshMC Minecraft launcher
+├── meta/ # Metadata generator
+├── mnv/ # MNV text editor (Vim fork)
+├── neozip/ # Compression library (zlib-ng fork)
+├── ofborg/ # tickborg CI bot
+└── tomlplusplus/ # TOML library
+```
+
+---
+
+## .github/ — GitHub Configuration
+
+```
+.github/
+├── CODEOWNERS # Review routing (all paths → @YongDo-Hyun)
+├── dco.yml # DCO bot config (no remediation commits)
+├── pull_request_template.md # PR template (sign-off & CLA reminder)
+│
+├── ISSUE_TEMPLATE/
+│ ├── bug_report.yml # Structured bug report form
+│ ├── config.yml # Issue template configuration
+│ ├── rfc.yml # RFC (Request for Comments) template
+│ └── suggestion.yml # Feature suggestion template
+│
+├── actions/ # Reusable composite actions
+│ ├── change-analysis/ # File change detection logic
+│ ├── meshmc/
+│ │ ├── package/ # MeshMC packaging action
+│ │ └── setup-dependencies/ # MeshMC dependency setup action
+│ └── mnv/
+│ └── test_artefacts/ # MNV test artifacts action
+│
+├── codeql/ # CodeQL analysis configuration
+│
+└── workflows/ # 50+ GitHub Actions workflow files
+ ├── ci.yml # Monolithic CI orchestrator
+ ├── ci-lint.yml # Commit/format linting
+ ├── ci-schedule.yml # Scheduled CI jobs
+ ├── meshmc-*.yml # MeshMC workflows (8 files)
+ ├── neozip-*.yml # NeoZip workflows (12 files)
+ ├── json4cpp-*.yml # json4cpp workflows (7 files)
+ ├── cmark-*.yml # cmark workflows (2 files)
+ ├── tomlplusplus-*.yml # toml++ workflows (3 files)
+ ├── mnv-*.yml # MNV workflows (4 files)
+ ├── cgit-ci.yml # cgit CI
+ ├── corebinutils-ci.yml # CoreBinUtils CI
+ ├── forgewrapper-build.yml # ForgeWrapper CI
+ ├── libnbtplusplus-ci.yml # libnbt++ CI
+ ├── genqrcode-ci.yml # GenQRCode CI
+ ├── images4docker-build.yml # Docker image builds
+ └── repo-*.yml # Repository maintenance (4 files)
+```
+
+---
+
+## meshmc/ — MeshMC Launcher
+
+The largest sub-project. A Qt 6 / C++23 custom Minecraft launcher.
+
+```
+meshmc/
+├── .clang-format # C++ formatting rules
+├── .clang-tidy # Static analysis configuration
+├── .envrc # direnv configuration
+├── .gitattributes # File attribute rules
+├── .gitignore # Ignore patterns
+├── .markdownlint.yaml # Markdown lint config
+├── .markdownlintignore # Markdown lint ignore
+├── BUILD.md # Comprehensive build guide
+├── CMakeLists.txt # Root CMake configuration
+├── CMakePresets.json # Platform-specific CMake presets
+├── CONTRIBUTING.md # MeshMC-specific contribution guide
+├── COPYING.md # License information
+├── Containerfile # Container build file
+├── README.md # MeshMC overview
+├── REUSE.toml # License annotations
+├── changelog.md # Version changelog
+├── default.nix # Nix build expression
+├── flake.lock # Nix flake lock
+├── flake.nix # Nix flake
+├── shell.nix # Nix development shell
+├── vcpkg-configuration.json # vcpkg configuration
+├── vcpkg.json # vcpkg dependencies
+│
+├── branding/ # Icons, logos, splash screens
+├── build/ # Build output directory
+├── buildconfig/ # Compile-time configuration generation
+├── cmake/ # Custom CMake modules
+├── doc/ # MeshMC-specific documentation
+├── install/ # Install output directory
+├── LICENSES/ # MeshMC-specific license copies
+├── nix/ # Nix packaging
+├── scripts/ # Build and maintenance scripts
+├── updater/ # Auto-update mechanism
+│
+├── launcher/ # Main application source
+│ ├── main.cpp # Entry point
+│ ├── Application.cpp/.h # Application singleton
+│ ├── CMakeLists.txt # Launcher CMake
+│ ├── icons/ # Icon management
+│ ├── java/ # Java runtime management
+│ ├── launch/ # Game launch logic
+│ ├── meta/ # Metadata handling
+│ ├── minecraft/ # Minecraft-specific logic
+│ ├── modplatform/ # Mod platform integration
+│ ├── mojang/ # Mojang API integration
+│ ├── net/ # Networking
+│ ├── news/ # News feed
+│ ├── notifications/ # User notifications
+│ ├── resources/ # Qt resources
+│ ├── screenshots/ # Screenshot management
+│ ├── settings/ # Settings system
+│ ├── tasks/ # Async task framework
+│ ├── testdata/ # Test fixtures
+│ ├── tools/ # Tool integrations
+│ ├── translations/ # i18n (Crowdin)
+│ ├── ui/ # Qt UI (widgets, dialogs, themes)
+│ └── updater/ # In-app updater
+│
+└── libraries/ # Bundled library integrations
+```
+
+Key source files in `launcher/`:
+- `Application.cpp` — Application lifecycle management
+- `BaseInstance.cpp` — Minecraft instance abstraction
+- `InstanceList.cpp` — Instance collection management
+- `LaunchController.cpp` — Game launch orchestration
+- `FileSystem.cpp` — Cross-platform file operations
+- `Json.cpp` — JSON utilities (wrapping json4cpp)
+- `GZip.cpp` — Compression utilities (wrapping zlib/neozip)
+
+---
+
+## mnv/ — MNV Text Editor
+
+Vim fork with modern enhancements.
+
+```
+mnv/
+├── CMakeLists.txt # CMake build (alternative)
+├── CMakePresets.json # CMake presets
+├── configure # Autotools configure script
+├── CONTRIBUTING.md # Contribution guide
+├── COPYING.md # License
+├── LICENSE # Vim license text
+├── Makefile # Root Makefile
+├── README.md # Overview
+├── SECURITY.md # Security policy
+│
+├── ci/ # MNV-specific CI scripts
+├── cmake/ # CMake modules
+├── lang/ # Language support files
+├── nsis/ # Windows installer (NSIS)
+├── pixmaps/ # Icons and graphics
+├── runtime/ # Runtime files (docs, syntax, plugins, etc.)
+├── src/ # C source code
+├── tools/ # Development tools
+└── build/ # Build output
+```
+
+---
+
+## cgit/ — Git Web Interface
+
+```
+cgit/
+├── Makefile # Build system
+├── cgit.c # Main CGI entry point
+├── cgit.h # Core data structures
+├── cgit.css # Default stylesheet
+├── cgit.js # Client-side JavaScript
+├── cgit.mk # Build configuration
+├── cgitrc.5.txt # Man page source
+├── COPYING # GPL-2.0 license
+├── README # Build instructions
+├── robots.txt # Default robots.txt
+│
+├── cache.c/.h # Response caching
+├── cmd.c/.h # Command dispatching
+├── configfile.c/.h # Configuration parsing
+├── filter.c # Content filtering (Lua)
+├── html.c/.h # HTML output generation
+├── parsing.c # Git object parsing
+├── scan-tree.c/.h # Repository scanning
+├── shared.c # Shared utilities
+│
+├── ui-*.c/.h # UI modules:
+│ ├── ui-atom # Atom feed
+│ ├── ui-blame # File blame view
+│ ├── ui-blob # File content view
+│ ├── ui-clone # Clone URL display
+│ ├── ui-commit # Commit view
+│ ├── ui-diff # Diff view
+│ ├── ui-log # Commit log
+│ ├── ui-patch # Patch view
+│ ├── ui-plain # Plain text view
+│ ├── ui-refs # Reference listing
+│ ├── ui-repolist # Repository listing
+│ ├── ui-shared # Shared UI utilities
+│ ├── ui-snapshot # Tarball/zip snapshots
+│ ├── ui-ssdiff # Side-by-side diff
+│ ├── ui-stats # Statistics
+│ ├── ui-summary # Repository summary
+│ ├── ui-tag # Tag view
+│ └── ui-tree # Tree view
+│
+├── contrib/ # Third-party contributions
+├── filters/ # Content filter scripts
+├── git/ # Bundled Git source (submodule)
+└── tests/ # Test suite
+```
+
+---
+
+## neozip/ — Compression Library
+
+zlib-ng fork with SIMD acceleration.
+
+```
+neozip/
+├── CMakeLists.txt # CMake build
+├── configure # Autotools-style configure
+├── Makefile.in # Make template
+├── FAQ.zlib # zlib FAQ
+├── INDEX.md # File index
+├── LICENSE.md # Zlib license
+├── PORTING.md # Porting guide
+├── README.md # Overview
+│
+├── adler32.c # Adler-32 checksum
+├── compress.c # compression wrapper
+├── crc32.c # CRC-32 checksum
+├── deflate.c # Deflate compression
+├── deflate_fast.c # Fast deflate strategy
+├── deflate_huff.c # Huffman-only strategy
+├── deflate_medium.c # Medium deflate strategy
+├── deflate_quick.c # Quick deflate strategy
+├── deflate_rle.c # RLE deflate strategy
+├── deflate_slow.c # Slow (best) deflate strategy
+├── deflate_stored.c # Stored (no compression)
+├── inflate.c # Inflate decompression
+├── infback.c # Inflate back-stream
+├── trees.c # Huffman tree construction
+├── uncompr.c # Uncompress wrapper
+├── gzlib.c # gzip file utilities
+├── gzread.c # gzip read
+├── gzwrite.c # gzip write
+│
+├── arch/ # Architecture-specific SIMD code
+│ ├── x86/ # SSE2, SSE4, AVX2, AVX512, PCLMULQDQ
+│ ├── arm/ # NEON, ARMv8 CRC, PMULL
+│ ├── power/ # VMX, VSX, Power8/9
+│ ├── s390/ # DFLTCC (hardware deflate)
+│ ├── riscv/ # RVV, ZBC
+│ └── loongarch/ # LSX, LASX
+│
+├── cmake/ # CMake modules
+├── doc/ # Documentation
+├── test/ # Test suite
+├── tools/ # Development tools
+└── win32/ # Windows-specific files
+```
+
+---
+
+## Libraries (Other)
+
+### json4cpp/
+
+```
+json4cpp/
+├── CMakeLists.txt # CMake build
+├── meson.build # Meson build (alternative)
+├── BUILD.bazel # Bazel build (alternative)
+├── MODULE.bazel # Bazel module
+├── Package.swift # Swift Package Manager
+├── Makefile # Convenience Makefile
+├── LICENSE.MIT # MIT license
+├── README.md # Comprehensive usage guide
+│
+├── include/nlohmann/ # Public headers
+├── single_include/ # Amalgamated single header
+├── src/ # Implementation (for split build)
+├── docs/ # MkDocs documentation
+├── tests/ # Catch2 test suite
+├── cmake/ # CMake modules
+└── tools/ # Development tools
+```
+
+### tomlplusplus/
+
+```
+tomlplusplus/
+├── meson.build # Primary build (Meson)
+├── meson_options.txt # Meson options
+├── CMakeLists.txt # CMake build (alternative)
+├── LICENSE # MIT license
+├── README.md # Overview and usage
+├── toml.hpp # Single header include
+│
+├── include/toml++/ # Multi-header includes
+├── src/ # Implementation files
+├── docs/ # Documentation
+├── examples/ # Usage examples
+├── tests/ # Test suite
+├── toml-test/ # Official TOML test suite
+├── fuzzing/ # Fuzz testing
+└── tools/ # Development tools
+```
+
+### libnbtplusplus/
+
+```
+libnbtplusplus/
+├── CMakeLists.txt # CMake build
+├── COPYING # LGPL-3.0 full text
+├── COPYING.LESSER # LGPL-3.0 lesser clause
+├── README.md # Build guide
+│
+├── include/ # Public headers (nbt::tag_* types)
+├── src/ # Implementation
+└── test/ # Test suite
+```
+
+### genqrcode/
+
+```
+genqrcode/
+├── CMakeLists.txt # CMake build
+├── configure.ac # Autotools configuration
+├── autogen.sh # Autotools bootstrap
+├── Makefile.am # Autotools Makefile
+├── COPYING # LGPL-2.1 license
+├── README.md # Overview
+│
+├── qrencode.c/.h # Main encoding API
+├── qrinput.c/.h # Input processing
+├── qrspec.c/.h # QR specification tables
+├── bitstream.c/.h # Bit stream utilities
+├── mask.c/.h # Masking patterns
+├── rsecc.c/.h # Reed-Solomon error correction
+├── split.c/.h # Data splitting
+├── qrenc.c # CLI tool
+│
+├── cmake/ # CMake modules
+├── tests/ # Test suite
+└── use/ # Usage examples
+```
+
+### forgewrapper/
+
+```
+forgewrapper/
+├── build.gradle # Gradle build script
+├── settings.gradle # Gradle settings
+├── gradle.properties # Build properties
+├── gradlew # Unix Gradle wrapper
+├── gradlew.bat # Windows Gradle wrapper
+├── LICENSE # MIT license
+├── README.md # Usage guide
+│
+├── gradle/ # Gradle wrapper JAR
+├── jigsaw/ # JPMS module configuration
+└── src/
+ └── main/java/ # Java source
+ └── io/github/zekerzhayard/forgewrapper/
+ └── installer/
+ └── detector/
+ └── IFileDetector.java # SPI interface
+```
+
+---
+
+## Infrastructure
+
+### meta/ — Metadata Generator
+
+```
+meta/
+├── pyproject.toml # Poetry project configuration
+├── poetry.lock # Locked Python dependencies
+├── requirements.txt # pip requirements (alternative)
+├── README.md # Deployment guide
+├── COPYING / LICENSE # MS-PL license
+├── config.sh / config.example.sh # Shell configuration
+├── init.sh # Initialization script
+├── update.sh # Update script
+├── flake.nix / flake.lock # Nix flake
+├── garnix.yaml # Garnix CI configuration
+├── renovate.json # Renovate dependency updates
+│
+├── meta/ # Python package
+│ └── run/ # CLI entry points
+│ ├── generate_fabric.py
+│ ├── generate_forge.py
+│ ├── generate_mojang.py
+│ ├── generate_neoforge.py
+│ ├── generate_quilt.py
+│ ├── generate_java.py
+│ └── ...
+│
+├── cache/ / caches/ # Cached upstream data
+├── launcher/ # Launcher configuration
+├── public/ # Generated output
+├── upstream/ # Upstream source data
+├── fuzz/ # Fuzz testing
+├── nix/ # Nix packaging
+└── venv/ # Python virtual environment
+```
+
+### ofborg/ — tickborg CI Bot
+
+```
+ofborg/
+├── Cargo.toml # Workspace root
+├── Cargo.lock # Locked Rust dependencies
+├── Dockerfile # Container build
+├── docker-compose.yml # Multi-container deployment
+├── DEPLOY.md # Deployment guide
+├── README.md # Overview and bot commands
+├── LICENSE # MIT license
+├── default.nix # Nix build
+├── flake.nix / flake.lock # Nix flake
+├── shell.nix # Development shell
+├── service.nix # NixOS service module
+├── config.production.json # Production config
+├── config.public.json # Public config
+├── example.config.json # Example config
+│
+├── tickborg/ # Main CI bot (Rust crate)
+│ ├── Cargo.toml
+│ └── src/
+│
+├── tickborg-simple-build/ # Simplified builder (Rust crate)
+│ ├── Cargo.toml
+│ └── src/
+│
+├── ofborg/ # Upstream ofborg (reference)
+├── ofborg-simple-build/ # Upstream simple-build
+├── ofborg-viewer/ # Build status viewer
+│
+├── deploy/ # Deployment scripts
+├── doc/ # Documentation
+└── target/ # Cargo build output
+```
+
+### images4docker/
+
+```
+images4docker/
+├── README.md # Overview and workflow docs
+├── LICENSE # GPL-3.0 license
+│
+├── dockerfiles/ # 40 Dockerfile-per-distro files
+│ ├── debian-12.Dockerfile
+│ ├── ubuntu-24.04.Dockerfile
+│ ├── fedora-41.Dockerfile
+│ ├── alpine-3.20.Dockerfile
+│ └── ... (36 more)
+│
+└── LICENSES/ # License copies
+```
+
+### ci/ — CI Infrastructure
+
+```
+ci/
+├── OWNERS # CI code ownership
+├── README.md # CI documentation
+├── default.nix # Nix CI entry (treefmt, validator)
+├── pinned.json # Pinned Nixpkgs revision + hash
+├── supportedBranches.js # Branch classification logic
+├── update-pinned.sh # Update pinned.json
+│
+├── codeowners-validator/ # CODEOWNERS validation tool
+│ ├── default.nix
+│ ├── owners-file-name.patch
+│ └── permissions.patch
+│
+└── github-script/ # GitHub Actions JS helpers
+ ├── run # CLI entry point
+ ├── lint-commits.js # Conventional Commits linter
+ ├── prepare.js # PR preparation
+ ├── reviews.js # Review state management
+ ├── get-pr-commit-details.js
+ ├── withRateLimit.js # API rate limiting
+ ├── package.json # npm dependencies
+ └── shell.nix # Nix dev shell
+```
+
+---
+
+## LICENSES/ — License Texts
+
+```
+LICENSES/
+├── Apache-2.0.txt
+├── BSD-1-Clause.txt
+├── BSD-2-Clause.txt
+├── BSD-3-Clause.txt
+├── BSD-4-Clause.txt
+├── BSL-1.0.txt
+├── CC-BY-SA-4.0.txt
+├── CC0-1.0.txt
+├── GPL-2.0-only.txt
+├── GPL-3.0-only.txt
+├── GPL-3.0-or-later.txt
+├── LGPL-2.0-or-later.txt
+├── LGPL-2.1-or-later.txt
+├── LGPL-3.0-or-later.txt
+├── LicenseRef-Qt-Commercial.txt
+├── MIT.txt
+├── MS-PL.txt
+├── Unlicense.txt
+├── Vim.txt
+└── Zlib.txt
+```
+
+20 SPDX-compliant license texts covering all sub-projects and their
+dependencies.
+
+---
+
+## corebinutils/ — BSD Utilities
+
+```
+corebinutils/
+├── config.mk # Toolchain configuration
+├── configure # Toolchain detection script
+├── GNUmakefile # Top-level orchestrator
+├── README.md # Build instructions
+│
+├── build/ # Shared build infrastructure
+├── contrib/ # Contributed utilities
+│
+├── cat/ ├── chmod/ ├── cp/
+├── chflags/ ├── cpuset/ ├── csh/
+├── date/ ├── dd/ ├── df/
+├── domainname/ ├── echo/ ├── ed/
+├── expr/ ├── freebsd-version/ ├── getfacl/
+├── hostname/ ├── kill/ ├── ln/
+├── ls/ ├── mkdir/ ├── mv/
+├── nproc/ ├── pax/ ├── pkill/
+├── ps/ ├── pwait/ ├── pwd/
+├── realpath/ ├── rm/ ├── rmail/
+├── rmdir/ ├── setfacl/ ├── sh/
+├── sleep/ ├── stty/ ├── sync/
+├── test/ ├── timeout/ └── uuidgen/
+```
+
+Each utility subdirectory contains its own `GNUmakefile` and source files.
+
+---
+
+## docs/ — Documentation
+
+```
+docs/
+└── handbook/ # Developer handbook
+ ├── Project-Tick/ # Organization-level docs (this directory)
+ └── [per-project]/ # Per-sub-project documentation
+```
+
+---
+
+## archived/ — Deprecated Projects
+
+```
+archived/
+├── projt-launcher/ # Original launcher (GPL-3.0-only)
+├── projt-modpack/ # Modpack tooling (GPL-3.0-only)
+├── projt-minicraft-modpack/ # Minicraft modpack (MIT)
+└── ptlibzippy/ # ZIP library (Zlib)
+```
+
+These projects are kept for historical reference but are no longer actively
+maintained. MeshMC supersedes projt-launcher, and neozip supersedes
+ptlibzippy.
diff --git a/docs/handbook/Project-Tick/security-policy.md b/docs/handbook/Project-Tick/security-policy.md
new file mode 100644
index 0000000000..f81028978d
--- /dev/null
+++ b/docs/handbook/Project-Tick/security-policy.md
@@ -0,0 +1,282 @@
+# Project Tick — Security Policy
+
+## Overview
+
+Project Tick takes the security of its software ecosystem seriously. This
+document describes how to report security vulnerabilities, the disclosure
+process, and the security practices applied across the monorepo.
+
+Given that Project Tick includes components ranging from compression libraries
+(NeoZip) to a full application (MeshMC) to CI infrastructure (tickborg), a
+vulnerability in any sub-project could have cascading effects. The project
+maintains a unified security posture across all components.
+
+---
+
+## Reporting Vulnerabilities
+
+### How to Report
+
+If you discover a security vulnerability in any Project Tick component, report
+it via email:
+
+**[projecttick@projecttick.org](mailto:projecttick@projecttick.org)**
+
+### Do NOT
+
+- Open a public GitHub issue for security vulnerabilities
+- Post vulnerability details on Discord or social media
+- Publish exploit code before the issue is resolved
+
+### What to Include
+
+When submitting a security report, include as much of the following as
+possible:
+
+| Field | Description |
+|-------|-------------|
+| **Affected component** | Which sub-project (e.g., meshmc, neozip, libnbtplusplus) |
+| **Affected versions** | Version numbers or commit hashes |
+| **Steps to reproduce** | Detailed reproduction steps |
+| **Expected behavior** | What should happen |
+| **Actual behavior** | What actually happens (crash, data leak, etc.) |
+| **Impact assessment** | Your assessment of severity and exploitability |
+| **Logs or crash reports** | Stack traces, core dumps, error messages |
+| **Proof of concept** | Minimal reproducer (if available) |
+| **Suggested fix** | If you have one |
+
+### Example Report
+
+```
+Subject: [SECURITY] Buffer overflow in NeoZip deflate_fast
+
+Affected component: neozip
+Affected versions: All versions based on zlib-ng 2.x
+
+Steps to reproduce:
+1. Create a specially crafted gzip stream with [details]
+2. Call inflate() with the crafted input
+3. Observe buffer overflow at deflate_fast.c:42
+
+Impact: Remote code execution via crafted compressed data.
+Severity: Critical (CVSS 9.8)
+
+PoC: Attached file crash_input.gz
+
+Suggested fix: Add bounds check at deflate_fast.c:42 before
+memcpy call.
+```
+
+---
+
+## Disclosure Process
+
+### Timeline
+
+Project Tick follows a responsible disclosure process:
+
+1. **Acknowledgment** — You will receive an acknowledgment within **48 hours**
+ of your report.
+
+2. **Triage** — The security team assesses severity and impact within
+ **7 days**.
+
+3. **Fix development** — A fix is developed privately. Timeline depends on
+ severity:
+ - **Critical (CVSS 9.0+):** Fix within **7 days**
+ - **High (CVSS 7.0–8.9):** Fix within **14 days**
+ - **Medium (CVSS 4.0–6.9):** Fix within **30 days**
+ - **Low (CVSS 0.1–3.9):** Fix within **90 days**
+
+4. **Coordinated disclosure** — The fix is released, and the vulnerability is
+ disclosed publicly. Credit is given to the reporter (unless anonymity is
+ requested).
+
+5. **Advisory publication** — A security advisory is published on GitHub with
+ the CVE ID (if assigned).
+
+### Embargo
+
+During the fix development period:
+
+- Details of the vulnerability are kept confidential
+- Only the core maintainers and the reporter have access
+- Pre-disclosure to downstream distributors may occur for critical issues
+- The reporter is asked not to disclose until the fix is released
+
+---
+
+## Supported Components
+
+### Security-Critical Components
+
+The following components handle untrusted input and are considered
+security-critical:
+
+| Component | Risk Area | Threat Model |
+|-----------|-----------|--------------|
+| **neozip** | Compression/decompression | Crafted compressed streams (e.g., zip bombs, buffer overflows) |
+| **libnbtplusplus** | Binary data parsing | Malicious NBT files from untrusted sources |
+| **json4cpp** | JSON parsing | Crafted JSON input (e.g., deeply nested objects, huge numbers) |
+| **tomlplusplus** | TOML parsing | Crafted TOML configuration files |
+| **cmark** | Markdown parsing | Crafted Markdown (e.g., pathological regex, huge nesting) |
+| **genqrcode** | QR code encoding | Crafted encoding input |
+| **meshmc** | Application | Network input (OAuth, HTTP APIs), file parsing, mod loading |
+| **forgewrapper** | Java runtime | Classpath manipulation, installer extraction |
+| **cgit** | Web interface | HTTP request handling, repository traversal |
+| **mnv** | Text editor | Modeline parsing, file format handling |
+| **corebinutils** | System utilities | Command-line input, file operations |
+| **tickborg** | CI bot | AMQP messages, GitHub API responses |
+| **meta** | Metadata generation | Upstream API responses (Mojang, Forge, etc.) |
+
+### Fuzz Testing Coverage
+
+Several sub-projects maintain active fuzz testing:
+
+| Component | Fuzz Infrastructure | CI Workflow |
+|-----------|-------------------|-------------|
+| neozip | OSS-Fuzz, custom fuzzers | `neozip-fuzz.yml` |
+| json4cpp | OSS-Fuzz, custom fuzzers | `json4cpp-fuzz.yml` |
+| cmark | Custom fuzzers in `fuzz/` | `cmark-fuzz.yml` |
+| tomlplusplus | Custom fuzzers in `fuzzing/` | `tomlplusplus-fuzz.yml` |
+
+### Static Analysis Coverage
+
+| Component | Tool | CI Workflow |
+|-----------|------|-------------|
+| meshmc | CodeQL | `meshmc-codeql.yml` |
+| mnv | CodeQL, Coverity | `mnv-codeql.yml`, `mnv-coverity.yml` |
+| neozip | CodeQL | `neozip-codeql.yml` |
+| json4cpp | Semgrep, Flawfinder | `json4cpp-semgrep.yml`, `json4cpp-flawfinder.yml` |
+
+---
+
+## Security Practices
+
+### Compiler Hardening
+
+MeshMC's build system enables several hardening flags:
+
+```cmake
+# Stack protection
+-fstack-protector-strong --param=ssp-buffer-size=4
+
+# Buffer overflow detection
+-O3 -D_FORTIFY_SOURCE=2
+
+# Comprehensive warnings
+-Wall -pedantic
+
+# Position-independent code (ASLR support)
+CMAKE_POSITION_INDEPENDENT_CODE ON
+```
+
+### Supply Chain Security
+
+1. **Pinned Dependencies**
+ - Nix inputs are content-addressed and locked in `flake.lock`
+ - CI Nixpkgs revision is pinned in `ci/pinned.json` with SHA256 hashes
+ - GitHub Actions use SHA-pinned action references
+
+2. **Runner Hardening**
+ - CI workflows use `step-security/harden-runner` with egress auditing
+ - `repo-scorecards.yml` tracks OpenSSF Scorecard compliance
+ - `repo-dependency-review.yml` scans dependency changes for known
+ vulnerabilities
+
+3. **Code Signing**
+ - Release artifacts are signed
+ - Git commits can be GPG/SSH signed (recommended but not required)
+
+4. **CODEOWNERS Enforcement**
+ - The `codeowners-validator` tool (built from source in `ci/`) validates
+ the `CODEOWNERS` file to ensure all paths have designated reviewers
+
+5. **GitHub Actions Security**
+ - `zizmor` scans workflows for security issues
+ - `actionlint` validates workflow syntax
+ - Minimal permissions (`contents: read` by default)
+
+### Network Security (MeshMC)
+
+MeshMC handles network operations for:
+- OAuth2 authentication (Microsoft account login via Qt6 NetworkAuth)
+- HTTP APIs (Mojang, Forge, Fabric, Quilt, Modrinth, CurseForge)
+- File downloads (game assets, mods, Java runtimes)
+
+Security measures:
+- TLS/HTTPS enforced for all network connections
+- Certificate validation via Qt's SSL stack
+- Download integrity verification (SHA-1, SHA-256 checksums)
+- No execution of downloaded code without user consent
+
+### Infrastructure Security
+
+The Code of Conduct (Section 4.2) explicitly prohibits:
+
+- Intentional submission of malicious code
+- Supply-chain compromise attempts
+- Infrastructure abuse, including CI/CD exploitation or service disruption
+- License violations or intentional misattribution
+
+Violations are treated as serious misconduct and may result in immediate
+and permanent bans.
+
+---
+
+## Vulnerability History
+
+Security advisories are published on the GitHub repository's Security tab:
+
+```
+https://github.com/Project-Tick/Project-Tick/security/advisories
+```
+
+---
+
+## Third-Party Component Security
+
+Since Project Tick includes forks of upstream projects (zlib-ng, nlohmann/json,
+toml++, libqrencode, Vim, cgit, ofborg), security vulnerabilities in upstream
+projects may affect Project Tick.
+
+### Monitoring
+
+- Upstream security advisories are monitored
+- Dependabot alerts are enabled for Cargo, npm, and pip dependencies
+- The `repo-dependency-review.yml` workflow checks for known vulnerabilities
+ in dependency changes
+
+### Patching Policy
+
+- **Critical upstream vulnerabilities** — Patches are applied within 48 hours
+ and backported to all supported release branches
+- **High upstream vulnerabilities** — Patches applied within 7 days
+- **Other upstream vulnerabilities** — Incorporated in the next regular sync
+
+### Upstream Tracking
+
+| Component | Upstream | Tracking |
+|-----------|----------|----------|
+| neozip | zlib-ng/zlib-ng | GitHub releases, OSS-Fuzz |
+| json4cpp | nlohmann/json | GitHub releases, OSS-Fuzz |
+| tomlplusplus | marzer/tomlplusplus | GitHub releases |
+| cmark | commonmark/cmark | GitHub releases |
+| genqrcode | fukuchi/libqrencode | GitHub releases |
+| mnv | vim/vim | GitHub security advisories |
+| cgit | zx2c4/cgit | Mailing list |
+| ofborg/tickborg | NixOS/ofborg | GitHub releases |
+
+---
+
+## Contact
+
+For security-related inquiries:
+
+| Channel | Address |
+|---------|---------|
+| Security reports | [projecttick@projecttick.org](mailto:projecttick@projecttick.org) |
+| General inquiries | [projecttick@projecttick.org](mailto:projecttick@projecttick.org) |
+| Trademark | [yongdohyun@projecttick.org](mailto:yongdohyun@projecttick.org) |
+
+**Do not use GitHub issues for security reports.**
diff --git a/docs/handbook/Project-Tick/trademark-policy.md b/docs/handbook/Project-Tick/trademark-policy.md
new file mode 100644
index 0000000000..62e56eb7b5
--- /dev/null
+++ b/docs/handbook/Project-Tick/trademark-policy.md
@@ -0,0 +1,283 @@
+# Project Tick — Trademark Policy
+
+## Overview
+
+This document summarizes the Project Tick trademark and brand policy as defined
+in `TRADEMARK.md` at the repository root. The trademarks are separate from the
+open source licenses that govern the source code.
+
+---
+
+## Trademark Ownership
+
+The following marks are owned by **Mehmet Samet Duman**:
+
+- **Project Tick™** — The project name
+- **Project Tick logo** — The project visual identity
+- All related branding elements
+
+Collectively, these are referred to as the "Marks."
+
+All rights in the Marks are reserved.
+
+---
+
+## Relationship to Open Source Licenses
+
+This is the most important distinction to understand:
+
+**Open source licenses do NOT grant trademark rights.**
+
+Each repository under the Project Tick namespace is licensed under its
+respective open source license (MIT, BSD, GPL, MS-PL, etc.). These licenses
+govern use, modification, and redistribution of **source code only**.
+
+Open source licenses specifically **do not** grant:
+
+- Rights to use the Project Tick name
+- Rights to use the Project Tick logo
+- Rights to use Project Tick branding or trade dress
+- Rights to imply affiliation, endorsement, sponsorship, or official status
+
+Trademark rights are legally separate from copyright licenses.
+
+---
+
+## Permitted Uses
+
+The following uses are generally permitted **without** prior written permission:
+
+### 1. Factual References
+
+You may make factual references to Project Tick:
+
+> "This software is compatible with Project Tick."
+
+### 2. Unmodified Official Releases
+
+You may accurately describe unmodified official releases:
+
+> "This package contains Project Tick MeshMC version 7.0.0."
+
+### 3. Non-Commercial Commentary
+
+Non-commercial commentary, research, educational, and journalistic references
+are permitted:
+
+> "In our analysis of open-source Minecraft launchers, Project Tick's MeshMC
+> demonstrated strong performance."
+
+### Conditions for Permitted Use
+
+Even permitted uses must not:
+
+- Create confusion regarding the origin of software
+- Suggest sponsorship, approval, or endorsement by Project Tick
+- Present modified versions as official releases
+
+---
+
+## Modified and Redistributed Versions
+
+Open source licenses permit modification and redistribution of source code.
+However, trademark restrictions apply to how modified versions are named and
+presented.
+
+### Requirements for Forks and Derivatives
+
+| Requirement | Details |
+|------------|---------|
+| Must not use Project Tick name/logo as if official | Forks must use distinct branding |
+| Must clearly indicate modification | Derivative works must state they are modified |
+| Must not use "Official," "Certified," etc. | Unless explicitly authorized |
+
+### Examples
+
+**Permissible:**
+
+> "Based on Project Tick"
+>
+> "MyLauncher — derived from Project Tick MeshMC"
+
+**Impermissible (without authorization):**
+
+> "Official Project Tick Build"
+>
+> "Project Tick Certified Edition"
+>
+> "Project Tick Pro"
+
+---
+
+## Commercial Use
+
+The Marks may **not** be used in the following commercial contexts without
+prior written permission:
+
+| Context | Example |
+|---------|---------|
+| Product name | "Project Tick Hosting Service" |
+| Company name | "Project Tick Solutions LLC" |
+| SaaS service name | "Project Tick Cloud" |
+| Domain name | `projecttick-hosting.com` |
+| Paid advertising | Google Ads using "Project Tick" |
+| Promotional materials | Brochures featuring the logo |
+
+### SaaS and Hosted Services
+
+Operating a commercial service using Project Tick source code **does not**
+grant the right to represent that service as an official Project Tick service.
+
+Only services directly operated by Mehmet Samet Duman under the Project Tick
+identity may use the Marks in a commercial context.
+
+---
+
+## Official Releases
+
+An "Official Project Tick Release" must meet **all** of the following criteria:
+
+1. Built and distributed by the Project Tick maintainers
+2. Published through official communication channels
+3. Identified by official release tags or signatures
+
+Modified builds, even if fully compliant with the applicable open source
+license, **must not** be presented as official releases.
+
+---
+
+## Logo Usage
+
+The Project Tick logo is protected by both copyright and trademark law.
+
+### Prohibited Modifications
+
+The logo may **not** be:
+
+| Action | Status |
+|--------|--------|
+| Modified | Prohibited |
+| Recolored | Prohibited |
+| Combined with other marks | Prohibited |
+| Used for commercial services | Prohibited |
+| Embedded in derivative branding | Prohibited |
+| Used as a favicon for unofficial sites | Prohibited |
+
+Written authorization is required for any logo use beyond factual reference.
+
+### Creative Commons and Trademark
+
+If the logo is licensed under a Creative Commons license (e.g., CC BY-NC-ND),
+that license applies within its stated scope but **does not waive trademark
+protections**. The CC license governs copyright only; trademark restrictions
+remain in full force.
+
+---
+
+## Domain Names and Corporate Identifiers
+
+The Marks may **not** be used in:
+
+| Context | Prohibited Without Permission |
+|---------|------------------------------|
+| Domain names | `projecttick.io`, `meshmc-official.com` |
+| Social media handles | `@projecttick`, `@meshmc_official` |
+| Corporate names | "Project Tick Inc." |
+| Registered business identifiers | EIN/tax registration using the name |
+
+---
+
+## Prohibited Uses
+
+The following uses are **strictly prohibited** regardless of context:
+
+1. **Implying endorsement or affiliation** with Project Tick when none exists
+2. **Misrepresenting unofficial builds** as official releases
+3. **Using the Marks in a misleading or deceptive manner**
+4. **Using the Marks in ways that damage reputation or goodwill**
+5. **Registering confusingly similar names** (trademarks, domains, handles)
+
+---
+
+## Enforcement
+
+### Reservation of Rights
+
+All rights not expressly granted in the TRADEMARK.md policy are reserved.
+Failure to enforce any provision does **not** constitute a waiver of rights.
+
+Project Tick reserves the right to update the trademark policy at any time.
+
+### What Happens If You Violate the Policy
+
+1. You may receive a cease-and-desist notice
+2. You may be asked to rename your project/service/domain
+3. Legal action may be pursued for willful infringement
+4. Pull request and issue access may be revoked
+
+---
+
+## Practical Guidance for Common Scenarios
+
+### Scenario: Creating a Fork
+
+You may fork the source code under the applicable open source license, but:
+
+- Choose a **new name** for your fork (not containing "Project Tick" or "MeshMC")
+- Create **new branding** (logo, icons, splash screens)
+- Clearly state: "Based on Project Tick" or "Derived from MeshMC"
+- Do not use "official," "certified," "authorized," or similar terms
+
+### Scenario: Writing About Project Tick
+
+You may write about Project Tick in articles, blog posts, academic papers, and
+reviews. You may use the name "Project Tick" in factual context. You may
+include screenshots. Do not imply endorsement.
+
+### Scenario: Packaging for a Linux Distribution
+
+Distribution packagers may use the Project Tick name for unmodified source
+packages built from official release tarballs. If patches are applied that
+materially change behavior, the package description should note that it
+contains modifications.
+
+### Scenario: Hosting a Mirror
+
+You may host a source code mirror. You should not use the Marks in the mirror's
+domain name without permission. The mirror description should clearly indicate
+it is an unofficial mirror.
+
+### Scenario: Creating a Plugin or Mod
+
+You may create plugins, mods, or extensions for MeshMC. You may refer to
+MeshMC compatibility. You must not name your project in a way that suggests
+it is an official Project Tick product.
+
+---
+
+## Contact
+
+For trademark permission requests or questions:
+
+**[yongdohyun@projecttick.org](mailto:yongdohyun@projecttick.org)**
+
+For general project inquiries:
+
+**[projecttick@projecttick.org](mailto:projecttick@projecttick.org)**
+
+---
+
+## Summary Table
+
+| Use Case | Allowed? | Condition |
+|----------|----------|-----------|
+| Factual reference | Yes | Must be accurate |
+| Describing unmodified official releases | Yes | Must be unmodified |
+| Non-commercial research/education | Yes | No endorsement implied |
+| Fork with Project Tick branding | No | Must rebrand |
+| Fork with "Based on" attribution | Yes | Clear distinction |
+| Commercial product name | No | Requires written permission |
+| Domain name with "projecttick" | No | Requires written permission |
+| Logo in derivative branding | No | Requires written permission |
+| Blog post mentioning Project Tick | Yes | No endorsement implied |
+| Linux distro package | Yes | If from official source |
diff --git a/docs/handbook/archived/overview.md b/docs/handbook/archived/overview.md
new file mode 100644
index 0000000000..c6d066c8d3
--- /dev/null
+++ b/docs/handbook/archived/overview.md
@@ -0,0 +1,275 @@
+# Archived Projects — Overview
+
+## Purpose
+
+The `archived/` directory contains legacy Project Tick projects that are no longer
+actively developed. These projects remain in the monorepo for historical reference,
+documentation completeness, and potential future reuse of components.
+
+Archived projects are not built, tested, or deployed by the current CI pipeline.
+They are preserved as-is at the time of archival.
+
+---
+
+## Archived Projects
+
+| Directory | Project Name | Type | License | Status |
+|---------------------------|---------------------|------------------------|----------|-------------|
+| `archived/projt-launcher/` | ProjT Launcher | Minecraft Launcher (C++/Qt) | GPL-3.0 | Archived |
+| `archived/projt-modpack/` | ProjT Modpack | Minecraft Modpack | GPL-3.0 | Archived |
+| `archived/projt-minicraft-modpack/` | MiniCraft Modpack | Minecraft Modpack Collection | MIT | Archived |
+| `archived/ptlibzippy/` | PTlibzippy | Compression Library (C)| zlib License | Archived |
+
+---
+
+## Why Projects Are Archived
+
+Projects are moved to `archived/` when they meet one or more of these criteria:
+
+1. **Superseded by a newer project** — The functionality has been replaced by a different
+ component in the monorepo (e.g., ProjT Launcher was the standalone launcher before
+ MeshMC took over as the primary launcher)
+2. **No longer maintained** — The project has reached end-of-life and no further
+ development is planned
+3. **Completed scope** — The project achieved its intended purpose and doesn't need
+ ongoing changes (e.g., modpack archives)
+4. **Consolidation** — Standalone repositories were merged into the monorepo as
+ subtrees, and the project's active development has ended
+
+---
+
+## Project Summaries
+
+### ProjT Launcher (`archived/projt-launcher/`)
+
+ProjT Launcher was a structurally disciplined Minecraft launcher fork of Prism Launcher.
+It was engineered for long-term maintainability, architectural clarity, and controlled
+ecosystem evolution.
+
+**Key characteristics**:
+- Written in C++23 with Qt 6
+- CMake build system with presets for Linux, macOS, Windows (MSVC and MinGW)
+- Layered architecture: UI (Qt Widgets) → Core/Domain → Tasks → Networking
+- Detached fork libraries: zlib, bzip2, quazip, cmark, tomlplusplus, libqrencode, libnbtplusplus
+- Nix-based CI and reproducible builds
+- Containerized build support (Dockerfile/Containerfile)
+- Comprehensive documentation in `docs/` and `docs/handbook/`
+
+**Notable features at time of archival**:
+- Launcher Hub (web-based dashboard using CEF on Linux, native on Windows/macOS)
+- Modrinth collection import
+- Fabric/Quilt/NeoForge mod loader support
+- Java runtime auto-detection and management
+- Multi-platform packaging: RPM, DEB, AppImage, Flatpak, macOS App Bundle, Windows MSI
+
+**Last known version**: 0.0.5-1 (draft)
+
+**License heritage**: GPL-3.0, with upstream license blocks from Prism Launcher
+(GPL-3.0), PolyMC (GPL-3.0), and MultiMC (Apache-2.0).
+
+For full documentation, see [projt-launcher.md](projt-launcher.md).
+
+---
+
+### ProjT Modpack (`archived/projt-modpack/`)
+
+ProjT Modpack was a Minecraft modpack curated by Project Tick. The project contained
+modpack configuration files and promotional assets.
+
+**Key characteristics**:
+- Licensed under GPL-3.0
+- Contains promotional images (ProjT1.png, ProjT2.png, ProjT3.png)
+- Affiliate banner assets (affiliate-banner-bg.webp, affiliate-banner-fg.webp)
+- Minimal README — the modpack itself was distributed through launcher platforms
+
+**Status**: Archived with no active maintenance. The modpack distribution was
+handled through the ProjT Launcher and mod platform integrations (Modrinth,
+CurseForge).
+
+For full documentation, see [projt-modpack.md](projt-modpack.md).
+
+---
+
+### MiniCraft Modpack (`archived/projt-minicraft-modpack/`)
+
+The MiniCraft Modpack is a historical archive of Minecraft modpack releases
+organized into multiple "seasons" (S1 through S4). This is a collection of
+pre-built modpack ZIP files rather than a source code project.
+
+**Key characteristics**:
+- Licensed under MIT
+- Organized by season:
+ - **MiniCraft S1**: Versions from 12.1.5 through 13.0.0, including beta/alpha/pre-release builds
+ - **MiniCraft S2**: Versions with mixed naming (A00051c74C, L3.0, R10056a75A, N1.0, N2.0)
+ - **MiniCraft S3**: Versions from 1.0 through 1.2.0.3, plus a DEV-1.2 build
+ - **MiniCraft S4**: Alpha versions (0.0.1–0.0.3), Beta versions (0.1–0.2.1), and
+ releases including a "LASTMAJORRELEASE-2.0.0"
+- Contains compiled ZIP archives, not source code
+
+**Archive purpose**: Preserves the complete release history of the MiniCraft
+modpack series for historical reference.
+
+---
+
+### PTlibzippy (`archived/ptlibzippy/`)
+
+PTlibzippy is a Project Tick fork of the zlib compression library, version 0.0.5.1.
+It's a general-purpose lossless data compression library implementing the DEFLATE
+algorithm (RFC 1950, 1951, 1952).
+
+**Key characteristics**:
+- Written in C
+- CMake and Autotools (configure/Makefile) build systems
+- Bazel build support (BUILD.bazel, MODULE.bazel)
+- Extensive cross-platform support (Unix, Windows, Amiga, OS/400, QNX, VMS)
+- Thread-safe implementation
+- Custom PNG shim layer (`ptlibzippy_pngshim.c`) for libpng integration
+- Prefix support for symbol namespacing (`PTLIBZIPPY_PREFIX`)
+- Language bindings: Ada, C#/.NET, Delphi, Python, Perl, Java, Tcl
+
+**License**: zlib license (permissive, compatible with GPL)
+
+**Why forked**: The fork was maintained to resolve symbol conflicts when bundling
+zlib alongside libpng in the ProjT Launcher. The custom `ptlibzippy_pngshim.c`
+and symbol prefixing prevented linker conflicts in the launcher's build.
+
+For full documentation, see [ptlibzippy.md](ptlibzippy.md).
+
+---
+
+## Directory Structure
+
+```
+archived/
+├── projt-launcher/ # ProjT Launcher (C++/Qt Minecraft Launcher)
+│ ├── CMakeLists.txt # Root CMake build file
+│ ├── CMakePresets.json # Build presets (linux, macos, windows_msvc, windows_mingw)
+│ ├── Containerfile # Docker/Podman build container
+│ ├── CHANGELOG.md # Release changelog
+│ ├── COPYING.md # License (GPL-3.0 + upstream notices)
+│ ├── MAINTAINERS # Maintainer contact info
+│ ├── README # Project overview and build instructions
+│ ├── default.nix # Nix build via flake-compat
+│ ├── bootstrap/ # Platform bootstrapping (macOS)
+│ ├── buildconfig/ # Build configuration templates
+│ ├── ci/ # CI infrastructure (own copy, pre-monorepo)
+│ ├── cmake/ # CMake modules and vcpkg integration
+│ ├── docs/ # Developer and user documentation
+│ │ ├── architecture/ # Architecture overview
+│ │ ├── contributing/ # Contributing guides
+│ │ └── handbook/ # User/developer handbook
+│ └── ...
+├── projt-modpack/ # ProjT Modpack
+│ ├── COPYING.md # License (GPL-3.0)
+│ ├── LICENSE # GPL-3.0 full text
+│ ├── README.md # Minimal README
+│ └── *.png, *.webp # Promotional assets
+├── projt-minicraft-modpack/ # MiniCraft Modpack Archive
+│ ├── LICENSE # MIT License
+│ ├── README.md # Minimal README
+│ └── MiniCraft/ # Season-organized modpack ZIPs
+│ ├── MiniCraft S1/ # Season 1 releases
+│ ├── MiniCraft S2/ # Season 2 releases
+│ ├── MiniCraft S3/ # Season 3 releases
+│ └── MiniCraft S4/ # Season 4 releases
+└── ptlibzippy/ # PTlibzippy (zlib fork)
+ ├── CMakeLists.txt # CMake build system
+ ├── BUILD.bazel # Bazel build
+ ├── MODULE.bazel # Bazel module definition
+ ├── Makefile.in # Autotools Makefile template
+ ├── configure # Autotools configure script
+ ├── COPYING.md # zlib license
+ ├── README # Library overview
+ ├── README-cmake.md # CMake build instructions
+ ├── FAQ # Frequently asked questions
+ ├── INDEX # File listing
+ ├── ptlibzippy.h # Public API header
+ ├── ptzippyconf.h # Configuration header
+ ├── adler32.c # Adler-32 checksum
+ ├── compress.c # Compression API
+ ├── crc32.c # CRC-32 checksum
+ ├── deflate.c # DEFLATE compression
+ ├── inflate.c # DEFLATE decompression
+ ├── ptlibzippy_pngshim.c # PNG integration shim
+ ├── ptzippyutil.c # Internal utilities
+ └── contrib/ # Third-party contributions
+ ├── ada/ # Ada bindings
+ ├── blast/ # PKWare DCL decompressor
+ ├── crc32vx/ # Vectorized CRC-32 (s390x)
+ ├── delphi/ # Delphi bindings
+ ├── dotzlib/ # .NET bindings
+ ├── gcc_gvmat64/ # x86-64 assembly optimizations
+ └── ...
+```
+
+---
+
+## Ownership
+
+All archived projects are owned by `@YongDo-Hyun` as defined in `ci/OWNERS`:
+
+```
+/archived/projt-launcher/ @YongDo-Hyun
+/archived/projt-minicraft-modpack/ @YongDo-Hyun
+/archived/projt-modpack/ @YongDo-Hyun
+/archived/ptlibzippy/ @YongDo-Hyun
+```
+
+---
+
+## Historical Context
+
+### Timeline
+
+The archived projects represent different phases of Project Tick's development:
+
+1. **Early phase** (2024–2025): MiniCraft Modpack was created as a community modpack
+ project with seasonal releases
+2. **ProjT Modpack** (2025): A curated modpack distributed through the ProjT Launcher
+3. **ProjT Launcher** (2025–2026): The main Minecraft launcher, forked from Prism Launcher,
+ representing the most significant engineering investment in the archive
+4. **PTlibzippy** (2025–2026): A zlib fork created to solve symbol conflicts in the
+ launcher's build system
+
+### Relationship to Current Projects
+
+| Archived Project | Successor/Replacement |
+|---------------------|-----------------------------------|
+| ProjT Launcher | MeshMC (`meshmc/`) |
+| ProjT Modpack | No direct successor |
+| MiniCraft Modpack | No direct successor |
+| PTlibzippy | System zlib (no longer bundled) |
+
+---
+
+## Policy
+
+### Modifying Archived Code
+
+Archived projects should generally not be modified. Exceptions:
+
+- **License compliance**: Updating license headers or COPYING files
+- **Security fixes**: Critical vulnerabilities in code that might be referenced externally
+- **Documentation**: Fixing links, adding archival notes
+
+### Removing Archived Projects
+
+Archived projects should not be removed from the monorepo. They serve as:
+- Historical reference for design decisions
+- License compliance (preserving upstream attribution)
+- Knowledge base for understanding the evolution of current projects
+
+### Referencing Archived Code
+
+When referencing data or patterns from archived projects in new code:
+- Copy the relevant code rather than importing from `archived/`
+- Document the source with a comment
+- Ensure license compatibility
+
+---
+
+## Related Documentation
+
+- [ProjT Launcher](projt-launcher.md) — Detailed launcher documentation
+- [ProjT Modpack](projt-modpack.md) — Modpack project details
+- [PTlibzippy](ptlibzippy.md) — Compression library documentation
diff --git a/docs/handbook/archived/projt-launcher.md b/docs/handbook/archived/projt-launcher.md
new file mode 100644
index 0000000000..d0a3413d1c
--- /dev/null
+++ b/docs/handbook/archived/projt-launcher.md
@@ -0,0 +1,444 @@
+# ProjT Launcher
+
+## Overview
+
+ProjT Launcher was a structurally disciplined Minecraft launcher engineered for long-term
+maintainability, architectural clarity, and controlled ecosystem evolution. It was a fork
+of Prism Launcher (itself forked from PolyMC, which forked from MultiMC) that diverged
+intentionally to prevent maintenance decay, dependency drift, and architectural erosion.
+
+**Status**: Archived — superseded by MeshMC (`meshmc/`).
+
+---
+
+## Project Identity
+
+| Property | Value |
+|-------------------|--------------------------------------------------------|
+| **Name** | ProjT Launcher |
+| **Location** | `archived/projt-launcher/` |
+| **Language** | C++23 / Qt 6 |
+| **Build System** | CMake 3.25+ |
+| **License** | GPL-3.0-only |
+| **Copyright** | 2026 Project Tick |
+| **Upstream** | Prism Launcher → PolyMC → MultiMC |
+| **Last Version** | 0.0.5-1 (draft) |
+| **Website** | https://projecttick.org/p/projt-launcher/ |
+| **Releases** | https://gitlab.com/Project-Tick/core/ProjT-Launcher/-/releases |
+
+---
+
+## Why ProjT Launcher Existed
+
+The README states four key motivations:
+
+1. **Long-term maintainability** — Explicit architectural constraints and review rules
+ prevent uncontrolled technical debt
+2. **Controlled third-party integration** — External dependencies are maintained as
+ detached forks with documented patch and update policies
+3. **Deterministic CI and builds** — Exact dependency versions and constrained build
+ inputs enable reproducible builds across environments
+4. **Structural clarity** — Enforced MVVM boundaries and clearly separated modules
+ simplify review, refactoring, and long-term contribution
+
+---
+
+## Architecture
+
+### Layered Model
+
+The launcher followed a strict layered architecture documented in
+`docs/architecture/OVERVIEW.md`:
+
+```
+┌─────────────────────────────────────────────────────────┐
+│ Layer 1: UI + ViewModels (launcher/ui/, viewmodels/) │
+│ Qt Widgets screens, dialogs, widgets │
+├─────────────────────────────────────────────────────────┤
+│ Layer 2: Core/Domain (launcher/, minecraft/, java/) │
+│ Models, settings, instance management, launch logic │
+├─────────────────────────────────────────────────────────┤
+│ Layer 3: Task System (launcher/tasks/) │
+│ Long-running async work: downloads, extraction │
+├─────────────────────────────────────────────────────────┤
+│ Layer 4: Networking (launcher/net/) │
+│ HTTP requests, API adapters │
+├─────────────────────────────────────────────────────────┤
+│ Layer 5: Mod Platform Integrations (modplatform/) │
+│ Modrinth, CurseForge, ATLauncher, Technic, FTB │
+└─────────────────────────────────────────────────────────┘
+```
+
+### Module Boundaries
+
+| Rule | Description |
+|------|-------------|
+| UI must not perform I/O | No file or network operations in the UI layer |
+| Core/Tasks must not depend on Qt Widgets | Keeps the domain logic testable |
+| ViewModels must be widget-free | Only expose data and actions |
+| Use Task for anything > few milliseconds | Background jobs with progress reporting |
+| Dependencies flow downward | `ui` → `core` → `data` (storage/net) |
+
+### Directory Layout
+
+```
+ProjT-Launcher/
+├── launcher/ # Main application
+│ ├── ui/ # Qt Widgets
+│ │ ├── pages/ # Main screens
+│ │ ├── widgets/ # Reusable components
+│ │ ├── dialogs/ # Modal windows
+│ │ └── setupwizard/ # First-run wizard
+│ ├── minecraft/ # Game logic
+│ │ ├── auth/ # Account authentication (Microsoft)
+│ │ ├── launch/ # Game process management
+│ │ ├── mod/ # Mod loading and management
+│ │ └── versions/ # Version parsing and resolution
+│ ├── net/ # Networking layer
+│ ├── tasks/ # Background job system
+│ ├── java/ # Java runtime discovery and management
+│ ├── modplatform/ # Mod platform APIs
+│ ├── resources/ # Images, themes, assets
+│ ├── icons/ # Application icons
+│ └── translations/ # Internationalization files (.ts)
+├── tests/ # Unit tests
+├── cmake/ # CMake build modules
+├── docs/ # Documentation
+├── website/ # Eleventy-based project website
+├── bot/ # Automation (Cloudflare Workers)
+└── meta/ # Metadata generator (Python)
+```
+
+---
+
+## Build System
+
+### CMake Configuration
+
+The root `CMakeLists.txt` began with:
+
+```cmake
+cmake_minimum_required(VERSION 3.25)
+project(Launcher)
+
+set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_CXX_STANDARD_REQUIRED true)
+set(CMAKE_C_STANDARD_REQUIRED true)
+```
+
+### Build Presets
+
+```bash
+cmake --preset [macos OR linux OR windows_msvc OR windows_mingw]
+cmake --build --preset [macos OR linux OR windows_msvc OR windows_mingw] --config [Debug OR Release]
+```
+
+### Requirements
+
+| Tool | Version |
+|----------|----------|
+| CMake | 3.25+ |
+| Qt | 6.10.x |
+| Compiler | C++20/23 |
+
+### Compiler Flags (MSVC)
+
+```cmake
+# Security and optimization flags:
+"$<$<COMPILE_LANGUAGE:C,CXX>:/GS>" # Buffer security checks
+"$<$<CONFIG:Release>:/Gw;/Gy;/guard:cf>" # Size optimization + control flow guard
+"$<$<COMPILE_LANGUAGE:C,CXX>:/LTCG;/MANIFEST:NO;/STACK:8388608>" # LTO, 8MB stack
+```
+
+The 8MB stack size was required because ATL's pack list needed 3-4 MiB as of the
+time of development.
+
+### Output Directory Macros
+
+The build system used custom macros for managing output directories:
+
+```cmake
+macro(projt_push_output_dirs name)
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${Launcher_OUTPUT_ROOT}/${name}/$<CONFIG>")
+ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${Launcher_OUTPUT_ROOT}/${name}/$<CONFIG>")
+ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${Launcher_OUTPUT_ROOT}/${name}/$<CONFIG>")
+endmacro()
+
+macro(projt_pop_output_dirs)
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${_PROJT_PREV_RUNTIME}")
+ ...
+endmacro()
+```
+
+Similar push/pop macros existed for:
+- `projt_push_install_libdir` / `projt_pop_install_libdir`
+- `projt_push_install_includedir` / `projt_pop_install_includedir`
+- `projt_push_install_libexecdir` / `projt_pop_install_libexecdir`
+- `projt_push_autogen_disabled` / `projt_pop_autogen_disabled`
+
+These allowed different build components to use isolated output directories without
+polluting the global CMake state.
+
+### Linux Installation Paths
+
+```cmake
+set(Launcher_BUNDLED_LIBDIR "${CMAKE_INSTALL_LIBDIR}/projtlauncher")
+set(Launcher_BUNDLED_INCLUDEDIR "include/projtlauncher")
+set(Launcher_BUNDLED_LIBEXECDIR "libexec/projtlauncher")
+```
+
+### Qt Deprecation Policy
+
+```cmake
+add_compile_definitions(QT_WARN_DEPRECATED_UP_TO=0x060400)
+add_compile_definitions(QT_DISABLE_DEPRECATED_UP_TO=0x060400)
+```
+
+This configured Qt to warn about APIs deprecated before Qt 6.4.0 and hard-disable
+them at compile time.
+
+---
+
+## Nix Build
+
+The `default.nix` used `flake-compat` to provide a traditional Nix interface:
+
+```nix
+(import (fetchTarball {
+ url = "https://github.com/edolstra/flake-compat/archive/ff81ac966bb2cae68946d5ed5fc4994f96d0ffec.tar.gz";
+ sha256 = "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=";
+}) { src = ./.; }).defaultNix
+```
+
+Quick build:
+
+```bash
+nix build .#projtlauncher
+```
+
+---
+
+## Container Build
+
+The `Containerfile` defined a Debian-based build environment:
+
+```dockerfile
+ARG DEBIAN_VERSION=stable-slim
+FROM docker.io/library/debian:${DEBIAN_VERSION}
+
+ARG QT_VERSION=6.10.2
+
+# Compilers: clang, lld, llvm, temurin-17-jdk
+# Build system: cmake, ninja-build, extra-cmake-modules, pkg-config
+# Dependencies: cmark, gamemode-dev, libarchive-dev, libcmark-dev,
+# libgl1-mesa-dev, libqrencode-dev, libtomlplusplus-dev,
+# scdoc, zlib1g-dev
+# Tooling: clang-format, clang-tidy, git
+
+ENV CMAKE_LINKER_TYPE=lld
+```
+
+Qt was installed via `aqtinstall`:
+
+```dockerfile
+RUN pip3 install --break-system-packages aqtinstall
+RUN aqt install-qt ...
+```
+
+---
+
+## Detached Fork Libraries
+
+The launcher maintained its own forks of several upstream libraries:
+
+| Library | Directory | Purpose |
+|---------------|----------------|------------------------|
+| PTlibzippy | `ptlibzippy/` | Compression (zlib fork)|
+| bzip2 | `bzip2/` | Compression |
+| quazip | `quazip/` | ZIP handling |
+| cmark | `cmark/` | Markdown parsing |
+| tomlplusplus | `tomlplusplus/`| TOML parsing |
+| libqrencode | `libqrencode/` | QR code generation |
+| libnbtplusplus| `libnbtplusplus/` | NBT format (Minecraft)|
+| gamemode | `gamemode/` | Linux GameMode support |
+
+These were maintained with documented patch and update policies to prevent
+dependency drift while staying reasonably current with upstream.
+
+### Vendored Libraries
+
+| Library | Directory | Purpose |
+|-----------|----------------|--------------------|
+| LocalPeer | `LocalPeer/` | Single instance |
+| murmur2 | `murmur2/` | Hash functions |
+| qdcss | `qdcss/` | Dark CSS |
+| rainbow | `rainbow/` | Terminal colors |
+| systeminfo| `systeminfo/` | System information |
+
+---
+
+## Features at Time of Archival
+
+### Changelog (v0.0.5-1 Draft)
+
+**Highlights from the last release cycle:**
+
+- Improved Fabric/Quilt component version resolution with better Minecraft-version alignment
+- Added Launcher Hub support (web-based dashboard)
+- Strengthened version comparison logic, especially for release-candidate handling
+- Added Modrinth collection import for existing instances
+- Switched Linux Launcher Hub backend from QtWebEngine to CEF
+- Added native cockpit dashboard for Launcher Hub
+
+**Platform support:**
+
+| Platform | Backend | Packaging |
+|-----------|----------------------|------------------------------|
+| Linux | CEF-based Hub | DEB, RPM, AppImage, Flatpak |
+| macOS | Native WebView | App Bundle |
+| Windows | Native WebView | MSI, Portable |
+
+---
+
+## CI Infrastructure (Pre-Monorepo)
+
+The launcher had its own CI infrastructure in `ci/`, which was the predecessor
+to the current monorepo CI system. It included:
+
+- `ci/default.nix` — Nix CI entry point
+- `ci/pinned.json` — Pinned dependencies
+- `ci/supportedBranches.js` — Branch classification
+- `ci/github-script/` — GitHub Actions helpers
+- `ci/eval/` — Nix evaluation infrastructure
+ - `attrpaths.nix` — Attribute path enumeration
+ - `chunk.nix` — Evaluation chunking
+ - `diff.nix` — Evaluation diffing
+ - `outpaths.nix` — Output path computation
+ - `compare/` — Statistics comparison
+- `ci/nixpkgs-vet.nix` / `ci/nixpkgs-vet.sh` — Nixpkgs vetting
+- `ci/parse.nix` — CI configuration parsing
+- `ci/supportedSystems.json` — Supported target systems
+- `ci/supportedVersions.nix` — Supported version matrix
+
+Some of these patterns were carried forward into the monorepo CI system.
+
+---
+
+## Documentation Structure
+
+The launcher had extensive documentation:
+
+```
+docs/
+├── APPLE_SILICON_RATIONALE.md
+├── BUILD_SYSTEM.md
+├── FUZZING.md
+├── README.md
+├── architecture/
+│ └── OVERVIEW.md
+├── contributing/
+│ ├── ARCHITECTURE.md
+│ ├── CODE_STYLE.md
+│ ├── GETTING_STARTED.md
+│ ├── LAUNCHER_TEST_MATRIX.md
+│ ├── PROJECT_STRUCTURE.md
+│ ├── README.md
+│ ├── TESTING.md
+│ └── WORKFLOW.md
+└── handbook/
+ ├── README.md
+ ├── bot.md, bzip2.md, cmark.md, ...
+ ├── help-pages/
+ │ ├── apis.md, custom-commands.md, ...
+ │ └── environment-variables.md
+ └── wiki/
+ ├── development/
+ │ ├── instructions/
+ │ │ ├── linux.md, macos.md, windows.md
+ │ └── translating.md
+ ├── getting-started/
+ │ ├── installing-projtlauncher.md
+ │ ├── installing-java.md
+ │ ├── create-instance.md
+ │ └── download-modpacks.md
+ └── help-pages/
+ └── ... (mirrors of handbook help-pages)
+```
+
+---
+
+## Maintainership
+
+```
+[Mehmet Samet Duman]
+GitHub: @YongDo-Hyun
+Email: yongdohyun@mail.projecttick.org
+Paths: **
+```
+
+The project was maintained by a single maintainer with full ownership of all paths.
+
+---
+
+## License
+
+The launcher carried a multi-layer license history:
+
+```
+ProjT Launcher - Minecraft Launcher
+Copyright (C) 2026 Project Tick
+License: GPL-3.0-only
+
+Incorporates work from:
+├── Prism Launcher (Copyright 2022-2025 Prism Launcher Contributors, GPL-3.0)
+│ └── Incorporates:
+│ └── MultiMC (Copyright 2013-2021 MultiMC Contributors, Apache-2.0)
+└── PolyMC (Copyright 2021-2022 PolyMC Contributors, GPL-3.0)
+```
+
+The logo carried a separate license:
+- Original: Prism Launcher Logo © Prism Launcher Contributors (CC BY-SA 4.0)
+- Modified: ProjT Launcher Logo © 2026 Project Tick (CC BY-SA 4.0)
+
+---
+
+## Why It Was Archived
+
+ProjT Launcher was archived when MeshMC (`meshmc/`) became the primary launcher
+in the Project Tick monorepo. MeshMC continued the development trajectory with:
+- Updated architecture decisions
+- Continued the same mod platform integrations
+- Maintained the same CMake/Qt/Nix build infrastructure
+- Carried forward the detached fork library approach
+
+The launcher code remains in `archived/` as a reference for:
+- Design patterns (layered architecture, task system)
+- Build system techniques (CMake push/pop macros, vcpkg integration)
+- CI patterns (GitHub script infrastructure)
+- License compliance (preserving upstream attribution chains)
+
+---
+
+## Building (for Reference)
+
+If someone needs to build the archived launcher for historical purposes:
+
+```bash
+cd archived/projt-launcher/
+git submodule update --init --recursive
+
+# Linux:
+cmake --preset linux
+cmake --build --preset linux --config Release
+
+# macOS:
+cmake --preset macos
+cmake --build --preset macos --config Release
+
+# Windows (MSVC):
+cmake --preset windows_msvc
+cmake --build --preset windows_msvc --config Release
+```
+
+Note: Build success is not guaranteed since the archived code is not maintained
+and dependencies may have changed.
diff --git a/docs/handbook/archived/projt-modpack.md b/docs/handbook/archived/projt-modpack.md
new file mode 100644
index 0000000000..702400a77c
--- /dev/null
+++ b/docs/handbook/archived/projt-modpack.md
@@ -0,0 +1,245 @@
+# ProjT Modpack
+
+## Overview
+
+ProjT Modpack was a curated Minecraft modpack created and distributed by Project Tick.
+The project served as the official modpack offering alongside the ProjT Launcher, providing
+a pre-configured set of mods for the Project Tick community.
+
+**Status**: Archived — no longer maintained or distributed.
+
+---
+
+## Project Identity
+
+| Property | Value |
+|-------------------|-----------------------------------------------------|
+| **Name** | ProjT Modpack |
+| **Location** | `archived/projt-modpack/` |
+| **Type** | Minecraft Modpack |
+| **License** | GPL-3.0-or-later |
+| **Copyright** | 2025–2026 Project Tick |
+
+---
+
+## Repository Contents
+
+The modpack repository contained modpack configuration files and promotional assets:
+
+```
+archived/projt-modpack/
+├── .DS_Store # macOS filesystem metadata (artifact)
+├── .gitattributes # Git line ending and diff configuration
+├── COPYING.md # GPL-3.0 license summary with copyright notice
+├── LICENSE # Full GPL-3.0 license text
+├── README.md # Minimal project README
+├── ProjT1.png # Promotional image 1
+├── ProjT2.png # Promotional image 2
+├── ProjT3.png # Promotional image 3
+├── affiliate-banner-bg.webp # Affiliate banner background
+├── affiliate-banner-fg.webp # Affiliate banner foreground
+└── bisect-icon.webp # Bisect hosting icon
+```
+
+---
+
+## License
+
+The modpack was licensed under GPL-3.0-or-later:
+
+```
+ProjT Modpack - Minecraft Modpack by Project Tick
+Copyright (C) 2025-2026 Project Tick
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+```
+
+Note: This is GPL-3.0-**or-later** (with the "or any later version" clause), unlike
+the ProjT Launcher which was GPL-3.0-**only**.
+
+---
+
+## Promotional Assets
+
+The repository included several promotional images used for marketing and distribution:
+
+### Modpack Screenshots
+
+| File | Description |
+|-----------|------------------------------------------|
+| `ProjT1.png` | Promotional screenshot/image 1 |
+| `ProjT2.png` | Promotional screenshot/image 2 |
+| `ProjT3.png` | Promotional screenshot/image 3 |
+
+These were likely used on mod platform listing pages (Modrinth, CurseForge) and
+the Project Tick website.
+
+### Affiliate Assets
+
+| File | Format | Description |
+|---------------------------|--------|--------------------------------------|
+| `affiliate-banner-bg.webp` | WebP | Affiliate banner background image |
+| `affiliate-banner-fg.webp` | WebP | Affiliate banner foreground overlay |
+| `bisect-icon.webp` | WebP | Bisect Hosting affiliate icon |
+
+The affiliate assets suggest the modpack had hosting partnership integrations,
+specifically with [Bisect Hosting](https://www.bisecthosting.com/), a popular
+Minecraft server hosting provider.
+
+---
+
+## Distribution
+
+### Primary Distribution Channels
+
+The modpack was distributed through:
+
+1. **ProjT Launcher** — Native integration via the launcher's modpack download dialog
+2. **Mod platforms** — Listed on Modrinth and/or CurseForge for wider discoverability
+3. **Project Tick website** — https://projecttick.org/ (no longer active for this modpack)
+
+### Installation Flow
+
+Users could install the modpack through the ProjT Launcher:
+
+1. Open ProjT Launcher
+2. Navigate to the modpack browser
+3. Search for "ProjT Modpack" or browse curated packs
+4. Click Install — the launcher handles mod downloading and configuration
+5. Launch the game with the modpack pre-configured
+
+---
+
+## Relationship to ProjT Launcher
+
+The ProjT Modpack was tightly coupled with the ProjT Launcher:
+
+- The launcher's modpack platform integrations (Modrinth, CurseForge, ATLauncher,
+ Technic, FTB) enabled direct modpack installation
+- The modpack was the launcher's "showcase" offering — a reference configuration
+ demonstrating what the launcher could manage
+- Promotional assets were shared between the modpack and launcher marketing
+
+When the ProjT Launcher was archived, the modpack lost its primary distribution
+channel and was archived alongside it.
+
+---
+
+## Relationship to MiniCraft Modpack
+
+The ProjT Modpack was a separate project from the MiniCraft Modpack
+(`archived/projt-minicraft-modpack/`):
+
+| Aspect | ProjT Modpack | MiniCraft Modpack |
+|----------------|----------------------------------|--------------------------------------|
+| **License** | GPL-3.0 | MIT |
+| **Content** | Curated mod configuration | Pre-built modpack ZIPs |
+| **Format** | Platform-distributed configs | Self-contained ZIP archives |
+| **Versioning** | Standard semver | Season-based (S1–S4) |
+| **Distribution**| Mod platforms + launcher | Direct download |
+| **Period** | 2025–2026 | 2024–2026 |
+
+---
+
+## Why It Was Archived
+
+The ProjT Modpack was archived because:
+
+1. **Distribution channel archived** — The ProjT Launcher, which was the primary
+ distribution mechanism, was itself archived
+2. **Community consolidation** — Project Tick's focus shifted to other projects
+ (MeshMC, corebinutils, cgit, etc.)
+3. **No standalone value** — The modpack configuration files without a corresponding
+ launcher integration had limited utility
+
+---
+
+## Historical Significance
+
+The ProjT Modpack was significant in Project Tick's history because:
+
+- **Community engagement** — It was one of the first user-facing products, giving
+ the community something to interact with directly
+- **Platform integration testing** — It served as a test bed for the launcher's
+ modpack download and installation workflows
+- **Branding** — The promotional assets established Project Tick's visual identity
+ in the Minecraft modding community
+- **Ecosystem validation** — It validated the end-to-end flow from mod curation
+ → platform listing → launcher installation → gameplay
+
+---
+
+## File Details
+
+### .gitattributes
+
+The repository included Git attributes for handling binary files and line endings:
+
+```
+# Binary files should not be diffed
+*.png binary
+*.webp binary
+```
+
+### README.md
+
+The README was minimal:
+
+```markdown
+# ProjT Modpack
+```
+
+This suggests the modpack's detailed description was maintained on the mod platform
+listing pages rather than in the repository.
+
+---
+
+## Ownership
+
+Maintained by `@YongDo-Hyun` as defined in `ci/OWNERS`:
+
+```
+/archived/projt-modpack/ @YongDo-Hyun
+```
+
+---
+
+## Assets Inventory
+
+### Image Assets
+
+| Asset | Format | Size Category | Purpose |
+|---------------------------|--------|---------------|------------------|
+| `ProjT1.png` | PNG | Full-size | Promotional |
+| `ProjT2.png` | PNG | Full-size | Promotional |
+| `ProjT3.png` | PNG | Full-size | Promotional |
+| `affiliate-banner-bg.webp`| WebP | Banner-size | Affiliate |
+| `affiliate-banner-fg.webp`| WebP | Banner-size | Affiliate |
+| `bisect-icon.webp` | WebP | Icon-size | Affiliate |
+
+The use of WebP for affiliate/banner assets and PNG for screenshots reflects
+the different quality requirements:
+- PNG for screenshots — lossless quality for game imagery
+- WebP for banners — smaller file size for web distribution
+
+---
+
+## Mod Content
+
+The repository does not contain the mod files themselves (`.jar` files) — these
+were downloaded dynamically through the launcher's mod platform integrations.
+The modpack definition (which mods, versions, and configurations to include)
+was stored in the platform-specific manifest format (e.g., Modrinth's
+`modrinth.index.json` or CurseForge's `manifest.json`), which is not present
+in the archived copy.
+
+This is typical for modpack distribution: the repository contains metadata and
+marketing assets, while the actual mod binaries are served by the platform CDNs.
diff --git a/docs/handbook/archived/ptlibzippy.md b/docs/handbook/archived/ptlibzippy.md
new file mode 100644
index 0000000000..21ad2d6ce1
--- /dev/null
+++ b/docs/handbook/archived/ptlibzippy.md
@@ -0,0 +1,501 @@
+# PTlibzippy
+
+## Overview
+
+PTlibzippy is a Project Tick fork of the [zlib](https://zlib.net/) data compression library.
+It is a general-purpose lossless data compression library implementing the DEFLATE algorithm
+as specified in RFCs 1950 (zlib format), 1951 (deflate format), and 1952 (gzip format).
+
+PTlibzippy was version 0.0.5.1 and was maintained as a detached fork to solve symbol
+conflicts when bundling zlib alongside libpng in the ProjT Launcher's build system.
+
+**Status**: Archived — system zlib is now used instead of a bundled fork.
+
+---
+
+## Project Identity
+
+| Property | Value |
+|-------------------|----------------------------------------------------------|
+| **Name** | PTlibzippy |
+| **Location** | `archived/ptlibzippy/` |
+| **Language** | C |
+| **Version** | 0.0.5.1 |
+| **License** | zlib license (permissive) |
+| **Copyright** | 1995–2026 Jean-loup Gailly and Mark Adler; 2026 Project Tick |
+| **Homepage** | https://projecttick.org/p/zlib |
+| **FAQ** | https://projecttick.org/p/zlib/zlib_faq.html |
+| **Contact** | community@community.projecttick.org |
+
+---
+
+## Why a zlib Fork?
+
+The fork was created to solve a specific technical problem in the ProjT Launcher:
+
+### The Symbol Conflict Problem
+
+When the launcher bundled both zlib and libpng as static libraries, the linker
+encountered duplicate symbol definitions. Both zlib and libpng's internal zlib
+usage exported identical function names (e.g., `deflate`, `inflate`, `compress`),
+causing link-time errors or runtime symbol resolution ambiguities.
+
+### The Solution
+
+PTlibzippy addressed this through:
+
+1. **Symbol prefixing** — The `PTLIBZIPPY_PREFIX` CMake option enables renaming all
+ public symbols with a custom prefix, preventing collisions
+2. **PNG shim layer** — A custom `ptlibzippy_pngshim.c` file provided a compatibility
+ layer between the renamed zlib symbols and libpng's expectations
+3. **Custom header names** — Headers were renamed (`ptlibzippy.h`, `ptzippyconf.h`,
+ `ptzippyguts.h`, `ptzippyutil.h`) to avoid include-path conflicts
+
+As noted in the ProjT Launcher changelog:
+> "zlib symbol handling was refined to use libpng-targeted shim overrides instead
+> of global prefixing."
+
+---
+
+## License
+
+The zlib license is permissive and compatible with GPL:
+
+```
+Copyright notice:
+
+ (C) 1995-2026 Jean-loup Gailly and Mark Adler
+ (C) 2026 Project Tick
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+```
+
+---
+
+## Build System
+
+PTlibzippy supported three build systems:
+
+### CMake (Primary)
+
+```cmake
+cmake_minimum_required(VERSION 3.12...3.31)
+
+project(
+ PTlibzippy
+ LANGUAGES C
+ VERSION 0.0.5.1
+ HOMEPAGE_URL "https://projecttick.org/p/zlib"
+ DESCRIPTION "PTlibzippy - a general-purpose lossless data-compression library")
+```
+
+#### CMake Options
+
+| Option | Default | Description |
+|---------------------------|---------|----------------------------------------------|
+| `PTLIBZIPPY_BUILD_TESTING`| `ON` | Enable building test programs |
+| `PTLIBZIPPY_BUILD_SHARED` | `ON` | Build shared library (`libptlibzippy.so`) |
+| `PTLIBZIPPY_BUILD_STATIC` | `ON` | Build static library (`libptlibzippystatic.a`)|
+| `PTLIBZIPPY_INSTALL` | `ON` | Enable `make install` target |
+| `PTLIBZIPPY_PREFIX` | `OFF` | Enable symbol prefixing for all public APIs |
+
+#### Feature Detection
+
+The CMake build detected platform capabilities:
+
+```cmake
+check_type_size(off64_t OFF64_T) # Large file support
+check_function_exists(fseeko HAVE_FSEEKO) # POSIX fseeko
+check_include_file(stdarg.h HAVE_STDARG_H)
+check_include_file(unistd.h HAVE_UNISTD_H)
+```
+
+And generated `ptzippyconf.h` with the results:
+
+```cmake
+configure_file(${PTlibzippy_BINARY_DIR}/ptzippyconf.h.cmakein
+ ${PTlibzippy_BINARY_DIR}/ptzippyconf.h)
+```
+
+#### Visibility Attributes
+
+On non-Windows platforms, the build checked for GCC visibility attributes:
+
+```c
+void f(void) __attribute__ ((visibility("hidden")));
+```
+
+This enabled hiding internal symbols from the shared library's public API.
+
+#### Library Targets
+
+**Shared library:**
+
+```cmake
+add_library(ptlibzippy SHARED ${PTLIBZIPPY_SRCS} ...)
+add_library(PTlibzippy::PTlibzippy ALIAS ptlibzippy)
+
+target_compile_definitions(ptlibzippy
+ PRIVATE PTLIBZIPPY_BUILD PTLIBZIPPY_INTERNAL= ...)
+```
+
+**Static library:**
+
+```cmake
+add_library(ptlibzippystatic STATIC ${PTLIBZIPPY_SRCS} ...)
+```
+
+On Windows, the static library gets a `"s"` suffix to distinguish from the import library.
+
+#### pkg-config
+
+A `ptlibzippy.pc` file was generated for pkg-config integration:
+
+```cmake
+configure_file(${PTlibzippy_SOURCE_DIR}/ptlibzippy.pc.cmakein
+ ${PTLIBZIPPY_PC} @ONLY)
+```
+
+### Autotools
+
+Traditional Unix build:
+
+```bash
+./configure
+make
+make test
+make install
+```
+
+### Bazel
+
+```
+BUILD.bazel # Build rules
+MODULE.bazel # Module definition
+```
+
+---
+
+## Source Files
+
+### Public Headers
+
+| Header | Purpose |
+|----------------|----------------------------------------------------|
+| `ptlibzippy.h` | Public API (compress, decompress, gzip, etc.) |
+| `ptzippyconf.h`| Configuration header (generated at build time) |
+
+### Private Headers
+
+| Header | Purpose |
+|----------------|----------------------------------------------------|
+| `crc32.h` | CRC-32 lookup tables |
+| `deflate.h` | DEFLATE compression state machine |
+| `ptzippyguts.h` | Internal definitions (gzip state) |
+| `inffast.h` | Fast inflate inner loop |
+| `inffixed.h` | Fixed Huffman code tables |
+| `inflate.h` | Inflate state machine |
+| `inftrees.h` | Huffman tree building |
+| `trees.h` | Dynamic Huffman tree encoding |
+| `ptzippyutil.h` | System-level utilities |
+
+### Source Files
+
+| Source | Purpose |
+|-------------------------|-----------------------------------------------|
+| `adler32.c` | Adler-32 checksum computation |
+| `compress.c` | Compression convenience API |
+| `crc32.c` | CRC-32 checksum computation |
+| `deflate.c` | DEFLATE compression algorithm |
+| `gzclose.c` | gzip file close |
+| `gzlib.c` | gzip file utility functions |
+| `gzread.c` | gzip file reading |
+| `gzwrite.c` | gzip file writing |
+| `inflate.c` | DEFLATE decompression algorithm |
+| `infback.c` | Inflate using a callback interface |
+| `inftrees.c` | Generate Huffman trees for inflate |
+| `inffast.c` | Fast inner loop for inflate |
+| `ptlibzippy_pngshim.c` | PNG integration shim (Project Tick addition) |
+| `trees.c` | Output compressed data using Huffman coding |
+| `uncompr.c` | Decompression convenience API |
+| `ptzippyutil.c` | Operating system interface utilities |
+
+### Project Tick Additions
+
+The following files were added or renamed by Project Tick (not present in upstream zlib):
+
+| File | Change Type | Purpose |
+|-------------------------|-------------|--------------------------------------|
+| `ptlibzippy_pngshim.c` | Added | Shim for libpng symbol compatibility |
+| `ptzippyguts.h` | Renamed | From `gzguts.h` |
+| `ptzippyutil.c` | Renamed | From `zutil.c` |
+| `ptzippyutil.h` | Renamed | From `zutil.h` |
+| `ptzippyconf.h` | Renamed | From `zconf.h` |
+| `ptlibzippy.h` | Renamed | From `zlib.h` |
+| `ptlibzippy.pc.cmakein` | Renamed | From `zlib.pc.cmakein` |
+| `COPYING.md` | Modified | Added Project Tick copyright |
+
+---
+
+## Symbol Prefixing
+
+The `PTLIBZIPPY_PREFIX` option enables symbol prefixing for all public API functions.
+When enabled, all zlib functions are prefixed to avoid collisions:
+
+| Original Symbol | Prefixed Symbol (example) |
+|----------------|---------------------------|
+| `deflate` | `pt_deflate` |
+| `inflate` | `pt_inflate` |
+| `compress` | `pt_compress` |
+| `uncompress` | `pt_uncompress` |
+| `crc32` | `pt_crc32` |
+| `adler32` | `pt_adler32` |
+
+The prefix is configured through `ptzippyconf.h`:
+
+```cmake
+set(PT_PREFIX ${PTLIBZIPPY_PREFIX})
+file(APPEND ${PTCONF_OUT_FILE} "#cmakedefine PT_PREFIX 1\n")
+```
+
+---
+
+## PNG Shim Layer
+
+The `ptlibzippy_pngshim.c` file was the key Project Tick addition. It provided a
+compatibility layer that allowed libpng to use PTlibzippy's renamed symbols
+transparently.
+
+Without the shim, libpng would look for standard zlib function names (`deflate`,
+`inflate`, etc.) and fail to link against PTlibzippy's prefixed versions.
+
+The shim worked by:
+1. Including PTlibzippy's headers (with prefixed symbols)
+2. Providing wrapper functions with the original zlib names
+3. Each wrapper forwarded to the corresponding PTlibzippy function
+
+This approach was described in the changelog as:
+> "zlib symbol handling was refined to use libpng-targeted shim overrides instead
+> of global prefixing"
+
+---
+
+## Cross-Platform Support
+
+PTlibzippy inherited zlib's extensive platform support:
+
+| Platform | Build System | Notes |
+|------------------|-------------------------------|-------------------------------|
+| Linux | CMake, Autotools, Makefile | Primary development platform |
+| macOS | CMake, Autotools | |
+| Windows | CMake, NMake, MSVC | DLL and static library |
+| Windows (MinGW) | CMake, Makefile | |
+| Cygwin | CMake, Autotools | DLL naming handled |
+| Amiga | Makefile.pup, Makefile.sas | SAS/C compiler |
+| OS/400 | Custom makefiles | IBM i (formerly AS/400) |
+| QNX | Custom makefiles | QNX Neutrino |
+| VMS | make_vms.com | OpenVMS command procedure |
+
+### Platform-Specific Notes from README
+
+- **64-bit Irix**: `deflate.c` must be compiled without optimization with `-O`
+- **Digital Unix 4.0D**: Requires `cc -std1` for correct `gzprintf` behavior
+- **HP-UX 9.05**: Some versions of `/bin/cc` are incompatible
+- **PalmOS**: Supported via external port (https://palmzlib.sourceforge.net/)
+
+---
+
+## Third-Party Contributions
+
+The `contrib/` directory contained community-contributed extensions:
+
+| Directory | Description |
+|----------------|------------------------------------------------|
+| `contrib/ada/` | Ada programming language bindings |
+| `contrib/blast/`| PKWare Data Compression Library decompressor |
+| `contrib/crc32vx/`| Vectorized CRC-32 for IBM z/Architecture |
+| `contrib/delphi/`| Borland Delphi bindings |
+| `contrib/dotzlib/`| .NET (C#) bindings |
+| `contrib/gcc_gvmat64/`| x86-64 assembly optimizations |
+
+Ada bindings included full package specifications:
+
+```
+contrib/ada/ptlib.ads # Package spec
+contrib/ada/ptlib.adb # Package body
+contrib/ada/ptlib-thin.ads # Thin binding spec
+contrib/ada/ptlib-thin.adb # Thin binding body
+contrib/ada/ptlib-streams.ads # Stream interface spec
+contrib/ada/ptlib-streams.adb # Stream interface body
+```
+
+---
+
+## FAQ Highlights
+
+From the project FAQ:
+
+**Q: Is PTlibzippy Y2K-compliant?**
+A: Yes. PTlibzippy doesn't handle dates.
+
+**Q: Can zlib handle .zip archives?**
+A: Not by itself. See `contrib/minizip`.
+
+**Q: Can zlib handle .Z files?**
+A: No. Use `uncompress` or `gunzip` subprocess.
+
+**Q: How can I make a Unix shared library?**
+A: Default build produces shared + static libraries:
+```bash
+make distclean
+./configure
+make
+```
+
+---
+
+## Language Bindings
+
+PTlibzippy (and its zlib base) was accessible from many languages:
+
+| Language | Interface |
+|----------|---------------------------------------------------|
+| C | Native API via `ptlibzippy.h` |
+| C++ | Direct C API usage |
+| Ada | `contrib/ada/` bindings |
+| C# (.NET)| `contrib/dotzlib/` bindings |
+| Delphi | `contrib/delphi/` bindings |
+| Java | `java.util.zip` package (JDK built-in) |
+| Perl | IO::Compress module |
+| Python | `zlib` module (Python standard library) |
+| Tcl | Built-in zlib support |
+
+---
+
+## Integration with ProjT Launcher
+
+In the ProjT Launcher's CMake build, PTlibzippy was used via:
+
+```cmake
+# From cmake/usePTlibzippy.cmake (referenced in the launcher's cmake/ directory)
+```
+
+The launcher's `CMakeLists.txt` imported PTlibzippy alongside other compression
+libraries (bzip2, quazip) to handle:
+
+- Mod archive extraction (`.zip`, `.jar` files)
+- Instance backup/restore
+- Asset pack handling
+- Modpack import/export (Modrinth `.mrpack`, CurseForge `.zip` formats)
+
+---
+
+## Why It Was Archived
+
+PTlibzippy was archived when:
+
+1. **Symbol conflict resolution matured** — The launcher's build system evolved to
+ handle zlib/libpng coexistence without a custom fork
+2. **System zlib preferred** — Using the system's zlib package reduced maintenance
+ burden and ensured security patches were applied promptly
+3. **Launcher archived** — When ProjT Launcher was archived, its dependency libraries
+ (including PTlibzippy) were archived alongside it
+4. **MeshMC approach** — The successor launcher (MeshMC) uses system libraries or
+ vendored sources with different conflict resolution strategies
+
+---
+
+## Building (for Reference)
+
+### CMake
+
+```bash
+cd archived/ptlibzippy/
+mkdir build && cd build
+cmake ..
+make
+make test
+```
+
+### With Symbol Prefixing
+
+```bash
+cmake .. -DPTLIBZIPPY_PREFIX=pt_
+make
+```
+
+### Autotools
+
+```bash
+cd archived/ptlibzippy/
+./configure
+make
+make test
+make install
+```
+
+### Static-Only Build
+
+```bash
+cmake .. -DPTLIBZIPPY_BUILD_SHARED=OFF
+make
+```
+
+Note: Build success is not guaranteed since the archived code is not maintained.
+
+---
+
+## File Index
+
+From the project `INDEX` file:
+
+```
+CMakeLists.txt cmake build file
+ChangeLog history of changes
+FAQ Frequently Asked Questions about zlib
+INDEX file listing
+Makefile dummy Makefile that tells you to ./configure
+Makefile.in template for Unix Makefile
+README project overview
+configure configure script for Unix
+make_vms.com makefile for VMS
+test/example.c zlib usage examples for build testing
+test/minigzip.c minimal gzip-like functionality for build testing
+test/infcover.c inf*.c code coverage for build coverage testing
+treebuild.xml XML description of source file dependencies
+ptzippyconf.h zlib configuration header (template)
+ptlibzippy.h zlib public API header
+
+amiga/ makefiles for Amiga SAS C
+doc/ documentation for formats and algorithms
+msdos/ makefiles for MSDOS
+old/ legacy makefiles and documentation
+os400/ makefiles for OS/400
+qnx/ makefiles for QNX
+watcom/ makefiles for OpenWatcom
+win32/ makefiles for Windows
+```
+
+---
+
+## Ownership
+
+Maintained by `@YongDo-Hyun` as defined in `ci/OWNERS`:
+
+```
+/archived/ptlibzippy/ @YongDo-Hyun
+```
diff --git a/docs/handbook/cgit/api-reference.md b/docs/handbook/cgit/api-reference.md
new file mode 100644
index 0000000000..0c38564e74
--- /dev/null
+++ b/docs/handbook/cgit/api-reference.md
@@ -0,0 +1,468 @@
+# cgit — API Reference
+
+## Overview
+
+This document catalogs all public function prototypes, types, and global
+variables exported by cgit's header files. Functions are grouped by header
+file and module.
+
+## `cgit.h` — Core Types and Functions
+
+### Core Structures
+
+```c
+struct cgit_environment {
+ const char *cgit_config; /* CGIT_CONFIG env var */
+ const char *http_host; /* HTTP_HOST */
+ const char *https; /* HTTPS */
+ const char *no_http; /* NO_HTTP */
+ const char *http_cookie; /* HTTP_COOKIE */
+ const char *request_method; /* REQUEST_METHOD */
+ const char *query_string; /* QUERY_STRING */
+ const char *http_referer; /* HTTP_REFERER */
+ const char *path_info; /* PATH_INFO */
+ const char *script_name; /* SCRIPT_NAME */
+ const char *server_name; /* SERVER_NAME */
+ const char *server_port; /* SERVER_PORT */
+ const char *http_accept; /* HTTP_ACCEPT */
+ int authenticated; /* authentication result */
+};
+
+struct cgit_query {
+ char *raw;
+ char *repo;
+ char *page;
+ char *search;
+ char *grep;
+ char *head;
+ char *sha1;
+ char *sha2;
+ char *path;
+ char *name;
+ char *url;
+ char *mimetype;
+ char *etag;
+ int nohead;
+ int ofs;
+ int has_symref;
+ int has_sha1;
+ int has_dot;
+ int ignored;
+ char *sort;
+ int showmsg;
+ int ssdiff;
+ int show_all;
+ int context;
+ int follow;
+ int dt;
+ int log_hierarchical_threading;
+};
+
+struct cgit_page {
+ const char *mimetype;
+ const char *charset;
+ const char *filename;
+ const char *etag;
+ const char *title;
+ int status;
+ time_t modified;
+ time_t expires;
+ size_t size;
+};
+
+struct cgit_config {
+ char *root_title;
+ char *root_desc;
+ char *root_readme;
+ char *root_coc;
+ char *root_cla;
+ char *root_homepage;
+ char *root_homepage_title;
+ struct string_list root_links;
+ char *css;
+ struct string_list css_list;
+ char *js;
+ struct string_list js_list;
+ char *logo;
+ char *logo_link;
+ char *favicon;
+ char *header;
+ char *footer;
+ char *head_include;
+ char *module_link;
+ char *virtual_root;
+ char *script_name;
+ char *section;
+ char *cache_root;
+ char *robots;
+ char *clone_prefix;
+ char *clone_url;
+ char *readme;
+ char *agefile;
+ char *project_list;
+ char *strict_export;
+ char *mimetype_file;
+ /* ... filter pointers, integer flags, limits ... */
+ int cache_size;
+ int cache_root_ttl;
+ int cache_repo_ttl;
+ int cache_dynamic_ttl;
+ int cache_static_ttl;
+ int cache_about_ttl;
+ int cache_snapshot_ttl;
+ int cache_scanrc_ttl;
+ int max_repo_count;
+ int max_commit_count;
+ int max_message_length;
+ int max_repodesc_length;
+ int max_blob_size;
+ int max_stats;
+ int max_atom_items;
+ int max_subtree_commits;
+ int summary_branches;
+ int summary_tags;
+ int summary_log;
+ int snapshots;
+ int enable_http_clone;
+ int enable_index_links;
+ int enable_index_owner;
+ int enable_blame;
+ int enable_commit_graph;
+ int enable_log_filecount;
+ int enable_log_linecount;
+ int enable_remote_branches;
+ int enable_subject_links;
+ int enable_html_serving;
+ int enable_subtree;
+ int enable_tree_linenumbers;
+ int enable_git_config;
+ int enable_filter_overrides;
+ int enable_follow_links;
+ int embedded;
+ int noheader;
+ int noplainemail;
+ int local_time;
+ int case_sensitive_sort;
+ int section_sort;
+ int section_from_path;
+ int side_by_side_diffs;
+ int remove_suffix;
+ int scan_hidden_path;
+ int branch_sort;
+ int commit_sort;
+ int renamelimit;
+};
+
+struct cgit_repo {
+ char *url;
+ char *name;
+ char *basename;
+ char *path;
+ char *desc;
+ char *owner;
+ char *homepage;
+ char *defbranch;
+ char *section;
+ char *clone_url;
+ char *logo;
+ char *logo_link;
+ char *readme;
+ char *module_link;
+ char *extra_head_content;
+ char *snapshot_prefix;
+ struct string_list badges;
+ struct cgit_filter *about_filter;
+ struct cgit_filter *commit_filter;
+ struct cgit_filter *source_filter;
+ struct cgit_filter *email_filter;
+ struct cgit_filter *owner_filter;
+ int snapshots;
+ int enable_blame;
+ int enable_commit_graph;
+ int enable_log_filecount;
+ int enable_log_linecount;
+ int enable_remote_branches;
+ int enable_subject_links;
+ int enable_html_serving;
+ int enable_subtree;
+ int max_stats;
+ int max_subtree_commits;
+ int branch_sort;
+ int commit_sort;
+ int hide;
+ int ignore;
+};
+
+struct cgit_context {
+ struct cgit_environment env;
+ struct cgit_query qry;
+ struct cgit_config cfg;
+ struct cgit_page page;
+ struct cgit_repo *repo;
+};
+```
+
+### Global Variables
+
+```c
+extern struct cgit_context ctx;
+extern struct cgit_repolist cgit_repolist;
+extern const char *cgit_version;
+```
+
+### Repository Management
+
+```c
+extern struct cgit_repo *cgit_add_repo(const char *url);
+extern struct cgit_repo *cgit_get_repoinfo(const char *url);
+```
+
+### Parsing Functions
+
+```c
+extern void cgit_parse_url(const char *url);
+extern struct commitinfo *cgit_parse_commit(struct commit *commit);
+extern struct taginfo *cgit_parse_tag(struct tag *tag);
+extern void cgit_free_commitinfo(struct commitinfo *info);
+extern void cgit_free_taginfo(struct taginfo *info);
+```
+
+### Diff Functions
+
+```c
+typedef void (*filepair_fn)(struct diff_filepair *pair);
+typedef void (*linediff_fn)(char *line, int len);
+
+extern void cgit_diff_tree(const struct object_id *old_oid,
+ const struct object_id *new_oid,
+ filepair_fn fn, const char *prefix,
+ int renamelimit);
+extern void cgit_diff_commit(struct commit *commit, filepair_fn fn,
+ const char *prefix);
+extern void cgit_diff_files(const struct object_id *old_oid,
+ const struct object_id *new_oid,
+ unsigned long *old_size,
+ unsigned long *new_size,
+ int *binary, int context,
+ int ignorews, linediff_fn fn);
+```
+
+### Snapshot Functions
+
+```c
+extern int cgit_parse_snapshots_mask(const char *str);
+
+extern const struct cgit_snapshot_format cgit_snapshot_formats[];
+```
+
+### Filter Functions
+
+```c
+extern struct cgit_filter *cgit_new_filter(const char *cmd, filter_type type);
+extern int cgit_open_filter(struct cgit_filter *filter, ...);
+extern int cgit_close_filter(struct cgit_filter *filter);
+```
+
+### Utility Functions
+
+```c
+extern const char *cgit_repobasename(const char *reponame);
+extern char *cgit_default_repo_desc;
+extern int cgit_ref_path_exists(const char *path, const char *ref, int file_only);
+```
+
+## `html.h` — HTML Output Functions
+
+```c
+extern const char *fmt(const char *format, ...);
+extern char *fmtalloc(const char *format, ...);
+
+extern void html_raw(const char *data, size_t size);
+extern void html(const char *txt);
+extern void htmlf(const char *format, ...);
+extern void html_txt(const char *txt);
+extern void html_ntxt(const char *txt, int len);
+extern void html_attr(const char *txt);
+extern void html_url_path(const char *txt);
+extern void html_url_arg(const char *txt);
+extern void html_hidden(const char *name, const char *value);
+extern void html_option(const char *value, const char *text,
+ const char *selected_value);
+extern void html_link_open(const char *url, const char *title,
+ const char *class);
+extern void html_link_close(void);
+extern void html_include(const char *filename);
+extern void html_checkbox(const char *name, int value);
+extern void html_txt_input(const char *name, const char *value, int size);
+```
+
+## `ui-shared.h` — Page Layout and Links
+
+### HTTP and Layout
+
+```c
+extern void cgit_print_http_headers(void);
+extern void cgit_print_docstart(void);
+extern void cgit_print_docend(void);
+extern void cgit_print_pageheader(void);
+extern void cgit_print_layout_start(void);
+extern void cgit_print_layout_end(void);
+extern void cgit_print_error(const char *msg);
+extern void cgit_print_error_page(int code, const char *msg,
+ const char *fmt, ...);
+```
+
+### URL Generation
+
+```c
+extern const char *cgit_repourl(const char *reponame);
+extern const char *cgit_fileurl(const char *reponame, const char *pagename,
+ const char *filename, const char *query);
+extern const char *cgit_pageurl(const char *reponame, const char *pagename,
+ const char *query);
+extern const char *cgit_currurl(void);
+extern const char *cgit_rooturl(void);
+```
+
+### Link Functions
+
+```c
+extern void cgit_summary_link(const char *name, const char *title,
+ const char *class, const char *head);
+extern void cgit_tag_link(const char *name, const char *title,
+ const char *class, const char *tag);
+extern void cgit_tree_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+extern void cgit_log_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path,
+ int ofs, const char *grep, const char *pattern,
+ int showmsg, int follow);
+extern void cgit_commit_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+extern void cgit_patch_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+extern void cgit_refs_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+extern void cgit_diff_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *new_rev, const char *old_rev,
+ const char *path, int toggle);
+extern void cgit_stats_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *path);
+extern void cgit_plain_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+extern void cgit_blame_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+extern void cgit_object_link(struct object *obj);
+extern void cgit_submodule_link(const char *name, const char *path,
+ const char *commit);
+extern void cgit_print_snapshot_links(const char *repo, const char *head,
+ const char *hex, int snapshots);
+extern void cgit_print_branches(int max);
+extern void cgit_print_tags(int max);
+```
+
+### Diff Helpers
+
+```c
+extern void cgit_print_diff_hunk_header(int oldofs, int oldcnt,
+ int newofs, int newcnt,
+ const char *func);
+extern void cgit_print_diff_line_prefix(int type);
+```
+
+## `cmd.h` — Command Dispatch
+
+```c
+struct cgit_cmd {
+ const char *name;
+ void (*fn)(struct cgit_context *ctx);
+ unsigned int want_hierarchical:1;
+ unsigned int want_repo:1;
+ unsigned int want_layout:1;
+ unsigned int want_vpath:1;
+ unsigned int is_clone:1;
+};
+
+extern struct cgit_cmd *cgit_get_cmd(const char *name);
+```
+
+## `cache.h` — Cache System
+
+```c
+typedef void (*cache_fill_fn)(void *cbdata);
+
+extern int cache_process(int size, const char *path, const char *key,
+ int ttl, cache_fill_fn fn, void *cbdata);
+extern int cache_ls(const char *path);
+extern unsigned long hash_str(const char *str);
+```
+
+## `configfile.h` — Configuration File Parser
+
+```c
+typedef void (*configfile_value_fn)(const char *name, const char *value);
+
+extern int parse_configfile(const char *filename, configfile_value_fn fn);
+```
+
+## `scan-tree.h` — Repository Scanner
+
+```c
+typedef void (*repo_config_fn)(struct cgit_repo *repo,
+ const char *name, const char *value);
+
+extern void scan_projects(const char *path, const char *projectsfile,
+ repo_config_fn fn);
+extern void scan_tree(const char *path, repo_config_fn fn);
+```
+
+## `filter.c` — Filter Types
+
+```c
+#define ABOUT_FILTER 0
+#define COMMIT_FILTER 1
+#define SOURCE_FILTER 2
+#define EMAIL_FILTER 3
+#define AUTH_FILTER 4
+#define OWNER_FILTER 5
+
+typedef int filter_type;
+```
+
+## UI Module Entry Points
+
+Each `ui-*.c` module exposes one or more public functions:
+
+| Module | Function | Description |
+|--------|----------|-------------|
+| `ui-atom.c` | `cgit_print_atom(char *tip, char *path, int max)` | Generate Atom feed |
+| `ui-blame.c` | `cgit_print_blame(void)` | Render blame view |
+| `ui-blob.c` | `cgit_print_blob(const char *hex, char *path, const char *head, int file_only)` | Display blob content |
+| `ui-clone.c` | `cgit_clone_info(void)` | HTTP clone: `info/refs` |
+| `ui-clone.c` | `cgit_clone_objects(void)` | HTTP clone: pack objects |
+| `ui-clone.c` | `cgit_clone_head(void)` | HTTP clone: `HEAD` ref |
+| `ui-commit.c` | `cgit_print_commit(const char *rev, const char *prefix)` | Display commit |
+| `ui-diff.c` | `cgit_print_diff(const char *new_rev, const char *old_rev, const char *prefix, int show_ctrls, int raw)` | Render diff |
+| `ui-diff.c` | `cgit_print_diffstat(const struct object_id *old, const struct object_id *new, const char *prefix)` | Render diffstat |
+| `ui-log.c` | `cgit_print_log(const char *tip, int ofs, int cnt, char *grep, char *pattern, char *path, int pager, int commit_graph, int commit_sort)` | Display log |
+| `ui-patch.c` | `cgit_print_patch(const char *new_rev, const char *old_rev, const char *prefix)` | Generate patch |
+| `ui-plain.c` | `cgit_print_plain(void)` | Serve raw file content |
+| `ui-refs.c` | `cgit_print_refs(void)` | Display branches and tags |
+| `ui-repolist.c` | `cgit_print_repolist(void)` | Repository index page |
+| `ui-snapshot.c` | `cgit_print_snapshot(const char *head, const char *hex, const char *prefix, const char *filename, int snapshots)` | Generate archive |
+| `ui-stats.c` | `cgit_print_stats(void)` | Display statistics |
+| `ui-summary.c` | `cgit_print_summary(void)` | Repository summary page |
+| `ui-ssdiff.c` | `cgit_ssdiff_header_begin(void)` | Start ssdiff output |
+| `ui-ssdiff.c` | `cgit_ssdiff_header_end(void)` | End ssdiff header |
+| `ui-ssdiff.c` | `cgit_ssdiff_footer(void)` | End ssdiff output |
+| `ui-tag.c` | `cgit_print_tag(const char *revname)` | Display tag |
+| `ui-tree.c` | `cgit_print_tree(const char *rev, char *path)` | Display tree |
diff --git a/docs/handbook/cgit/architecture.md b/docs/handbook/cgit/architecture.md
new file mode 100644
index 0000000000..e35633a505
--- /dev/null
+++ b/docs/handbook/cgit/architecture.md
@@ -0,0 +1,422 @@
+# cgit — Architecture
+
+## High-Level Component Map
+
+```
+┌──────────────────────────────────────────────────────────────┐
+│ cgit.c │
+│ constructor_environment() [__attribute__((constructor))] │
+│ prepare_context() → config_cb() → querystring_cb() │
+│ authenticate_cookie() → process_request() → main() │
+├──────────────────────────────────────────────────────────────┤
+│ Command Dispatcher │
+│ cmd.c │
+│ cgit_get_cmd() → static cmds[] table (23 entries) │
+│ struct cgit_cmd { name, fn, want_repo, want_vpath, is_clone }│
+├──────────┬───────────┬───────────┬───────────────────────────┤
+│ UI Layer │ Caching │ Filters │ HTTP Clone │
+│ ui-*.c │ cache.c │ filter.c │ ui-clone.c │
+│ (17 mods)│ cache.h │ │ │
+├──────────┴───────────┴───────────┴───────────────────────────┤
+│ Core Utilities │
+│ shared.c — global vars, repo mgmt, diff wrappers │
+│ parsing.c — cgit_parse_commit(), cgit_parse_tag(), │
+│ cgit_parse_url() │
+│ html.c — entity escaping, URL encoding, form helpers │
+│ configfile.c — line-oriented name=value parser │
+│ scan-tree.c — filesystem repository discovery │
+├──────────────────────────────────────────────────────────────┤
+│ Vendored git library │
+│ git/ — full Git 2.46.0 source; linked via cgit.mk │
+│ Provides: object store, diff engine (xdiff), refs, revwalk, │
+│ archive, notes, commit graph, blame, packfile │
+└──────────────────────────────────────────────────────────────┘
+```
+
+## Global State
+
+cgit uses a single global variable to carry all request state:
+
+```c
+/* shared.c */
+struct cgit_repolist cgit_repolist; /* Array of all known repositories */
+struct cgit_context ctx; /* Current request context */
+```
+
+### `struct cgit_context`
+
+```c
+struct cgit_context {
+ struct cgit_environment env; /* CGI env vars (HTTP_HOST, QUERY_STRING, etc.) */
+ struct cgit_query qry; /* Parsed URL/query parameters */
+ struct cgit_config cfg; /* All global config directives */
+ struct cgit_repo *repo; /* Pointer into cgit_repolist.repos[] or NULL */
+ struct cgit_page page; /* HTTP response metadata (mimetype, status, etag) */
+};
+```
+
+### `struct cgit_environment`
+
+Populated by `prepare_context()` via `getenv()`:
+
+```c
+struct cgit_environment {
+ const char *cgit_config; /* $CGIT_CONFIG (default: /etc/cgitrc) */
+ const char *http_host; /* $HTTP_HOST */
+ const char *https; /* $HTTPS ("on" if TLS) */
+ const char *no_http; /* $NO_HTTP (non-NULL → CLI mode) */
+ const char *path_info; /* $PATH_INFO */
+ const char *query_string; /* $QUERY_STRING */
+ const char *request_method; /* $REQUEST_METHOD */
+ const char *script_name; /* $SCRIPT_NAME */
+ const char *server_name; /* $SERVER_NAME */
+ const char *server_port; /* $SERVER_PORT */
+ const char *http_cookie; /* $HTTP_COOKIE */
+ const char *http_referer; /* $HTTP_REFERER */
+ unsigned int content_length; /* $CONTENT_LENGTH */
+ int authenticated; /* Set by auth filter (0 or 1) */
+};
+```
+
+### `struct cgit_page`
+
+Controls HTTP response headers:
+
+```c
+struct cgit_page {
+ time_t modified; /* Last-Modified header */
+ time_t expires; /* Expires header */
+ size_t size; /* Content-Length (0 = omit) */
+ const char *mimetype; /* Content-Type (default: "text/html") */
+ const char *charset; /* charset param (default: "UTF-8") */
+ const char *filename; /* Content-Disposition filename */
+ const char *etag; /* ETag header value */
+ const char *title; /* HTML <title> */
+ int status; /* HTTP status code (0 = 200) */
+ const char *statusmsg; /* HTTP status message */
+};
+```
+
+## Request Lifecycle — Detailed
+
+### Phase 1: Pre-main Initialization
+
+```c
+__attribute__((constructor))
+static void constructor_environment()
+{
+ setenv("GIT_CONFIG_NOSYSTEM", "1", 1);
+ setenv("GIT_ATTR_NOSYSTEM", "1", 1);
+ unsetenv("HOME");
+ unsetenv("XDG_CONFIG_HOME");
+}
+```
+
+This runs before `main()` on every invocation. It prevents Git from loading
+`/etc/gitconfig`, `~/.gitconfig`, or any `$XDG_CONFIG_HOME/git/config`, ensuring
+complete isolation from the host system's Git configuration.
+
+### Phase 2: Context Preparation
+
+`prepare_context()` zero-initializes `ctx` and sets every configuration field
+to its default value. Key defaults:
+
+| Field | Default |
+|-------|---------|
+| `cfg.cache_size` | `0` (disabled) |
+| `cfg.cache_root` | `CGIT_CACHE_ROOT` (`/var/cache/cgit`) |
+| `cfg.cache_repo_ttl` | `5` minutes |
+| `cfg.cache_root_ttl` | `5` minutes |
+| `cfg.cache_static_ttl` | `-1` (never expires) |
+| `cfg.max_repo_count` | `50` |
+| `cfg.max_commit_count` | `50` |
+| `cfg.max_msg_len` | `80` |
+| `cfg.max_repodesc_len` | `80` |
+| `cfg.enable_http_clone` | `1` |
+| `cfg.enable_index_owner` | `1` |
+| `cfg.enable_tree_linenumbers` | `1` |
+| `cfg.summary_branches` | `10` |
+| `cfg.summary_log` | `10` |
+| `cfg.summary_tags` | `10` |
+| `cfg.difftype` | `DIFF_UNIFIED` |
+| `cfg.robots` | `"index, nofollow"` |
+| `cfg.root_title` | `"Git repository browser"` |
+
+The function also reads all CGI environment variables and sets
+`page.mimetype = "text/html"`, `page.charset = PAGE_ENCODING` (`"UTF-8"`).
+
+### Phase 3: Configuration Parsing
+
+```c
+parse_configfile(ctx.env.cgit_config, config_cb);
+```
+
+`parse_configfile()` (in `configfile.c`) opens the file, reads lines of the
+form `name=value`, skips comments (`#` and `;`), and calls the callback for each
+directive. It supports recursive `include=` directives up to 8 levels deep.
+
+`config_cb()` (in `cgit.c`) is a ~200-line chain of `if/else if` blocks that
+maps directive names to `ctx.cfg.*` fields. When `repo.url=` is encountered,
+`cgit_add_repo()` allocates a new repository entry; subsequent `repo.*`
+directives configure that entry via `repo_config()`.
+
+Special directive: `scan-path=` triggers immediate filesystem scanning via
+`scan_tree()` or `scan_projects()`, or via a cached repolist file if
+`cache-size > 0`.
+
+### Phase 4: Query String Parsing
+
+```c
+http_parse_querystring(ctx.qry.raw, querystring_cb);
+```
+
+`querystring_cb()` maps short parameter names to `ctx.qry.*` fields:
+
+| Parameter | Field | Purpose |
+|-----------|-------|---------|
+| `r` | `qry.repo` | Repository URL |
+| `p` | `qry.page` | Page name |
+| `url` | `qry.url` | Combined repo/page/path |
+| `h` | `qry.head` | Branch/ref |
+| `id` | `qry.oid` | Object ID |
+| `id2` | `qry.oid2` | Second object ID (for diffs) |
+| `ofs` | `qry.ofs` | Pagination offset |
+| `q` | `qry.search` | Search query |
+| `qt` | `qry.grep` | Search type |
+| `path` | `qry.path` | File path |
+| `name` | `qry.name` | Snapshot filename |
+| `dt` | `qry.difftype` | Diff type (0/1/2) |
+| `context` | `qry.context` | Diff context lines |
+| `ignorews` | `qry.ignorews` | Ignore whitespace |
+| `follow` | `qry.follow` | Follow renames |
+| `showmsg` | `qry.showmsg` | Show full messages |
+| `s` | `qry.sort` | Sort order |
+| `period` | `qry.period` | Stats period |
+
+The `url=` parameter receives special processing via `cgit_parse_url()` (in
+`parsing.c`), which iteratively splits the URL at `/` characters, looking for
+the longest prefix that matches a known repository URL.
+
+### Phase 5: Authentication
+
+`authenticate_cookie()` checks three cases:
+
+1. **No auth filter** → set `ctx.env.authenticated = 1` and return.
+2. **POST to login page** → call `authenticate_post()`, which reads up to
+ `MAX_AUTHENTICATION_POST_BYTES` (4096) from stdin, pipes it to the auth
+ filter with function `"authenticate-post"`, and exits.
+3. **Normal request** → invoke auth filter with function
+ `"authenticate-cookie"`. The filter's exit code becomes
+ `ctx.env.authenticated`.
+
+The auth filter receives 12 arguments:
+
+```
+function, cookie, method, query_string, http_referer,
+path_info, http_host, https, repo, page, fullurl, loginurl
+```
+
+### Phase 6: Cache Envelope
+
+If `ctx.cfg.cache_size > 0`, the request is wrapped in `cache_process()`:
+
+```c
+cache_process(ctx.cfg.cache_size, ctx.cfg.cache_root,
+ cache_key, ttl, fill_fn);
+```
+
+This constructs a filename from the FNV-1 hash of the cache key, attempts to
+open an existing slot, verifies the key matches, checks expiry, and either
+serves cached content or locks and fills a new slot. See the Caching System
+document for full details.
+
+### Phase 7: Command Dispatch
+
+```c
+cmd = cgit_get_cmd();
+```
+
+`cgit_get_cmd()` (in `cmd.c`) performs a linear scan of the static `cmds[]`
+table:
+
+```c
+static struct cgit_cmd cmds[] = {
+ def_cmd(HEAD, 1, 0, 1),
+ def_cmd(atom, 1, 0, 0),
+ def_cmd(about, 0, 0, 0),
+ def_cmd(blame, 1, 1, 0),
+ def_cmd(blob, 1, 0, 0),
+ def_cmd(cla, 0, 0, 0),
+ def_cmd(commit, 1, 1, 0),
+ def_cmd(coc, 0, 0, 0),
+ def_cmd(diff, 1, 1, 0),
+ def_cmd(info, 1, 0, 1),
+ def_cmd(log, 1, 1, 0),
+ def_cmd(ls_cache, 0, 0, 0),
+ def_cmd(objects, 1, 0, 1),
+ def_cmd(patch, 1, 1, 0),
+ def_cmd(plain, 1, 0, 0),
+ def_cmd(rawdiff, 1, 1, 0),
+ def_cmd(refs, 1, 0, 0),
+ def_cmd(repolist, 0, 0, 0),
+ def_cmd(snapshot, 1, 0, 0),
+ def_cmd(stats, 1, 1, 0),
+ def_cmd(summary, 1, 0, 0),
+ def_cmd(tag, 1, 0, 0),
+ def_cmd(tree, 1, 1, 0),
+};
+```
+
+The `def_cmd` macro expands to `{#name, name##_fn, want_repo, want_vpath, is_clone}`.
+
+Default page if none specified:
+- With a repository → `"summary"`
+- Without a repository → `"repolist"`
+
+### Phase 8: Repository Preparation
+
+If `cmd->want_repo` is set:
+
+1. `prepare_repo_env()` calls `setenv("GIT_DIR", ctx.repo->path, 1)`,
+ `setup_git_directory_gently()`, and `load_display_notes()`.
+2. `prepare_repo_cmd()` resolves the default branch (via `guess_defbranch()`
+ which checks `HEAD` → `refs/heads/*`), resolves the requested head to an OID,
+ sorts submodules, chooses the README file, and sets the page title.
+
+### Phase 9: Page Rendering
+
+The handler function (`cmd->fn()`) is called. Most handlers follow this
+pattern:
+
+```c
+cgit_print_layout_start(); /* HTTP headers + HTML doctype + header + tabs */
+/* ... page-specific content ... */
+cgit_print_layout_end(); /* footer + closing tags */
+```
+
+`cgit_print_layout_start()` calls:
+- `cgit_print_http_headers()` — Content-Type, Last-Modified, Expires, ETag
+- `cgit_print_docstart()` — `<!DOCTYPE html>`, `<html>`, CSS/JS includes
+- `cgit_print_pageheader()` — header table, navigation tabs, breadcrumbs
+
+## Module Dependency Graph
+
+```
+cgit.c ──→ cmd.c ──→ ui-*.c (all modules)
+ │ │
+ │ └──→ cache.c
+ │
+ ├──→ configfile.c
+ ├──→ scan-tree.c ──→ configfile.c
+ ├──→ ui-shared.c ──→ html.c
+ ├──→ ui-stats.c
+ ├──→ ui-blob.c
+ ├──→ ui-summary.c
+ └──→ filter.c
+
+ui-commit.c ──→ ui-diff.c ──→ ui-ssdiff.c
+ui-summary.c ──→ ui-log.c, ui-refs.c, ui-blob.c, ui-plain.c
+ui-log.c ──→ ui-shared.c
+All ui-*.c ──→ html.c, ui-shared.c
+```
+
+## The `struct cgit_cmd` Pattern
+
+Each command in `cmd.c` is defined as a static function that wraps the
+corresponding UI module:
+
+```c
+static void log_fn(void)
+{
+ cgit_print_log(ctx.qry.oid, ctx.qry.ofs, ctx.cfg.max_commit_count,
+ ctx.qry.grep, ctx.qry.search, ctx.qry.path, 1,
+ ctx.repo->enable_commit_graph,
+ ctx.repo->commit_sort);
+}
+```
+
+The thin wrapper pattern means all context is accessed via the global `ctx`
+struct, and the wrapper simply extracts the relevant fields and passes them to
+the module function.
+
+## Repository List Management
+
+The `cgit_repolist` global is a dynamically-growing array:
+
+```c
+struct cgit_repolist {
+ int length; /* Allocated capacity */
+ int count; /* Number of repos */
+ struct cgit_repo *repos; /* Array */
+};
+```
+
+`cgit_add_repo()` doubles the array capacity when needed (starting from 8).
+Each new repo inherits defaults from `ctx.cfg.*` (snapshots, feature flags,
+filters, etc.).
+
+`cgit_get_repoinfo()` performs a linear scan (O(n)) to find a repo by URL.
+Ignored repos (`repo->ignore == 1`) are skipped.
+
+## Build System
+
+The build works in two stages:
+
+1. **Git build** — `make` in the top-level `cgit/` directory delegates to
+ `make -C git -f ../cgit.mk` which includes Git's own `Makefile`.
+
+2. **cgit link** — `cgit.mk` lists all cgit object files (`CGIT_OBJ_NAMES`),
+ compiles them with `CGIT_CFLAGS` (which embeds `CGIT_CONFIG`,
+ `CGIT_SCRIPT_NAME`, `CGIT_CACHE_ROOT` as string literals), and links them
+ against Git's `libgit.a`.
+
+Lua support is auto-detected via `pkg-config` (checking `luajit`, `lua`,
+`lua5.2`, `lua5.1` in order). Define `NO_LUA=1` to build without Lua.
+Linux systems get `HAVE_LINUX_SENDFILE` which enables the `sendfile()` syscall
+in the cache layer.
+
+## Thread Safety
+
+cgit runs as a **single-process CGI** — one process per HTTP request. There is
+no multi-threading. All global state (`ctx`, `cgit_repolist`, the static
+`diffbuf` in `shared.c`, the static format buffers in `html.c`) is safe because
+each process is fully isolated.
+
+The `fmt()` function in `html.c` uses a ring buffer of 8 static buffers
+(`static char buf[8][1024]`) to allow up to 8 nested `fmt()` calls in a single
+expression. The `bufidx` rotates via `bufidx = (bufidx + 1) & 7`.
+
+## Error Handling
+
+The codebase uses three assertion-style helpers from `shared.c`:
+
+```c
+int chk_zero(int result, char *msg); /* die if result != 0 */
+int chk_positive(int result, char *msg); /* die if result <= 0 */
+int chk_non_negative(int result, char *msg); /* die if result < 0 */
+```
+
+For user-facing errors, `cgit_print_error_page()` sets HTTP status, prints
+headers, renders the page skeleton, and displays the error message.
+
+## Type System
+
+cgit uses three enums defined in `cgit.h`:
+
+```c
+typedef enum {
+ DIFF_UNIFIED, DIFF_SSDIFF, DIFF_STATONLY
+} diff_type;
+
+typedef enum {
+ ABOUT, COMMIT, SOURCE, EMAIL, AUTH, OWNER
+} filter_type;
+```
+
+And three function pointer typedefs:
+
+```c
+typedef void (*configfn)(const char *name, const char *value);
+typedef void (*filepair_fn)(struct diff_filepair *pair);
+typedef void (*linediff_fn)(char *line, int len);
+```
diff --git a/docs/handbook/cgit/authentication.md b/docs/handbook/cgit/authentication.md
new file mode 100644
index 0000000000..a4fe000a87
--- /dev/null
+++ b/docs/handbook/cgit/authentication.md
@@ -0,0 +1,288 @@
+# cgit — Authentication
+
+## Overview
+
+cgit supports cookie-based authentication through the `auth-filter`
+mechanism. The authentication system intercepts requests before page
+rendering and delegates all credential validation to an external filter
+(exec or Lua script).
+
+Source file: `cgit.c` (authentication hooks), `filter.c` (filter execution).
+
+## Architecture
+
+Authentication is entirely filter-driven. cgit itself stores no credentials,
+sessions, or user databases. The auth filter is responsible for:
+
+1. Rendering login forms
+2. Validating credentials
+3. Setting/reading session cookies
+4. Determining authorization per-repository
+
+## Configuration
+
+```ini
+auth-filter=lua:/path/to/auth.lua
+# or
+auth-filter=exec:/path/to/auth.sh
+```
+
+The auth filter type is `AUTH_FILTER` (constant `4`) and receives 12
+arguments.
+
+## Authentication Flow
+
+### Request Processing in `cgit.c`
+
+Authentication is checked in `process_request()` after URL parsing and
+command dispatch:
+
+```c
+/* In process_request() */
+if (ctx.cfg.auth_filter) {
+ /* Step 1: Check current authentication state */
+ authenticate_cookie();
+
+ /* Step 2: Handle POST login attempts */
+ if (ctx.env.request_method &&
+ !strcmp(ctx.env.request_method, "POST"))
+ authenticate_post();
+
+ /* Step 3: Run the auth filter to decide access */
+ cmd->fn(&ctx);
+}
+```
+
+### `authenticate_cookie()`
+
+Opens the auth filter to check the current session cookie:
+
+```c
+static void authenticate_cookie(void)
+{
+ /* Open auth filter with current request context */
+ cgit_open_filter(ctx.cfg.auth_filter,
+ ctx.env.http_cookie, /* current cookies */
+ ctx.env.request_method, /* GET/POST */
+ ctx.env.query_string, /* full query */
+ ctx.env.http_referer, /* referer header */
+ ctx.env.path_info, /* request path */
+ ctx.env.http_host, /* hostname */
+ ctx.env.https ? "1" : "0", /* HTTPS flag */
+ ctx.qry.repo, /* repository name */
+ ctx.qry.page, /* page/command */
+ ctx.env.http_accept, /* accept header */
+ "cookie" /* authentication phase */
+ );
+ /* Read filter's response to determine auth state */
+ ctx.env.authenticated = /* filter exit code */;
+ cgit_close_filter(ctx.cfg.auth_filter);
+}
+```
+
+### `authenticate_post()`
+
+Handles login form submissions:
+
+```c
+static void authenticate_post(void)
+{
+ /* Read POST body for credentials */
+ /* Open auth filter with phase="post" */
+ cgit_open_filter(ctx.cfg.auth_filter,
+ /* ... same 11 args ... */
+ "post" /* authentication phase */
+ );
+ /* Filter processes credentials, may set cookies */
+ cgit_close_filter(ctx.cfg.auth_filter);
+}
+```
+
+### Authorization Check
+
+After authentication, the auth filter is called again before rendering each
+page to determine if the authenticated user has access to the requested
+repository and page:
+
+```c
+static int open_auth_filter(const char *repo, const char *page)
+{
+ cgit_open_filter(ctx.cfg.auth_filter,
+ /* ... request context ... */
+ "authorize" /* authorization phase */
+ );
+ int authorized = cgit_close_filter(ctx.cfg.auth_filter);
+ return authorized == 0; /* 0 = authorized */
+}
+```
+
+## Auth Filter Arguments
+
+The auth filter receives 12 arguments in total:
+
+| # | Argument | Description |
+|---|----------|-------------|
+| 1 | `filter_cmd` | The filter command itself |
+| 2 | `http_cookie` | Raw `HTTP_COOKIE` header value |
+| 3 | `request_method` | HTTP method (`GET`, `POST`) |
+| 4 | `query_string` | Raw query string |
+| 5 | `http_referer` | HTTP Referer header |
+| 6 | `path_info` | PATH_INFO from CGI |
+| 7 | `http_host` | Hostname |
+| 8 | `https` | `"1"` if HTTPS, `"0"` if HTTP |
+| 9 | `repo` | Repository URL |
+| 10 | `page` | Page/command name |
+| 11 | `http_accept` | HTTP Accept header |
+| 12 | `phase` | `"cookie"`, `"post"`, or `"authorize"` |
+
+## Filter Phases
+
+### `cookie` Phase
+
+Called on every request. The filter should:
+1. Read the session cookie from argument 2
+2. Validate the session
+3. Return exit code 0 if authenticated, non-zero otherwise
+
+### `post` Phase
+
+Called when the request method is POST. The filter should:
+1. Read POST body from stdin
+2. Validate credentials
+3. If valid, output a `Set-Cookie` header
+4. Output a redirect response (302)
+
+### `authorize` Phase
+
+Called after authentication to check per-repository access. The filter
+should:
+1. Check if the authenticated user can access the requested repo/page
+2. Return exit code 0 if authorized
+3. Return non-zero to deny access (cgit will show an error page)
+
+## Filter Return Codes
+
+| Exit Code | Meaning |
+|-----------|---------|
+| 0 | Success (authenticated/authorized) |
+| Non-zero | Failure (unauthenticated/unauthorized) |
+
+## Environment Variables
+
+The auth filter also has access to standard CGI environment variables:
+
+```c
+struct cgit_environment {
+ const char *cgit_config; /* $CGIT_CONFIG */
+ const char *http_host; /* $HTTP_HOST */
+ const char *https; /* $HTTPS */
+ const char *no_http; /* $NO_HTTP */
+ const char *http_cookie; /* $HTTP_COOKIE */
+ const char *request_method; /* $REQUEST_METHOD */
+ const char *query_string; /* $QUERY_STRING */
+ const char *http_referer; /* $HTTP_REFERER */
+ const char *path_info; /* $PATH_INFO */
+ const char *script_name; /* $SCRIPT_NAME */
+ const char *server_name; /* $SERVER_NAME */
+ const char *server_port; /* $SERVER_PORT */
+ const char *http_accept; /* $HTTP_ACCEPT */
+ int authenticated; /* set by auth filter */
+};
+```
+
+## Shipped Auth Filter
+
+cgit ships a Lua-based hierarchical authentication filter:
+
+### `filters/simple-hierarchical-auth.lua`
+
+This filter implements path-based access control using a simple user
+database and repository permission map.
+
+Features:
+- Cookie-based session management
+- Per-repository access control
+- Hierarchical path matching
+- Password hashing
+
+Usage:
+
+```ini
+auth-filter=lua:/usr/lib/cgit/filters/simple-hierarchical-auth.lua
+```
+
+## Cache Interaction
+
+Authentication affects cache keys. The cache key includes the
+authentication state and cookie:
+
+```c
+static const char *cache_key(void)
+{
+ return fmt("%s?%s?%s?%s?%s",
+ ctx.qry.raw,
+ ctx.env.http_host,
+ ctx.env.https ? "1" : "0",
+ ctx.env.authenticated ? "1" : "0",
+ ctx.env.http_cookie ? ctx.env.http_cookie : "");
+}
+```
+
+This ensures that:
+- Authenticated and unauthenticated users get separate cache entries
+- Different authenticated users (different cookies) get separate entries
+- The cache never leaks restricted content to unauthorized users
+
+## Security Considerations
+
+1. **HTTPS**: Always use HTTPS when authentication is enabled to protect
+ cookies and credentials in transit
+2. **Cookie flags**: Auth filter scripts should set `Secure`, `HttpOnly`,
+ and `SameSite` cookie flags
+3. **Session expiry**: Implement session timeouts in the auth filter
+4. **Password storage**: Never store passwords in plain text; use bcrypt or
+ similar hashing
+5. **CSRF protection**: The auth filter should implement CSRF tokens for
+ POST login forms
+6. **Cache poisoning**: The cache key includes auth state, but ensure the
+ auth filter is deterministic for the same cookie
+
+## Disabling Authentication
+
+By default, no auth filter is configured and all repositories are publicly
+accessible. To restrict access, set up the auth filter and optionally
+combine with `strict-export` for file-based visibility control.
+
+## Example: Custom Auth Filter (Shell)
+
+```bash
+#!/bin/bash
+# Simple auth filter skeleton
+PHASE="${12}"
+
+case "$PHASE" in
+ cookie)
+ COOKIE="$2"
+ if validate_session "$COOKIE"; then
+ exit 0 # authenticated
+ fi
+ exit 1 # not authenticated
+ ;;
+ post)
+ read -r POST_BODY
+ # Parse username/password from POST_BODY
+ # Validate credentials
+ # Set cookie header
+ echo "Status: 302 Found"
+ echo "Set-Cookie: session=TOKEN; HttpOnly; Secure"
+ echo "Location: $6"
+ echo
+ exit 0
+ ;;
+ authorize)
+ REPO="$9"
+ # Check if current user can access $REPO
+ exit 0 # authorized
+ ;;
+esac
+```
diff --git a/docs/handbook/cgit/building.md b/docs/handbook/cgit/building.md
new file mode 100644
index 0000000000..00f9e1244f
--- /dev/null
+++ b/docs/handbook/cgit/building.md
@@ -0,0 +1,272 @@
+# cgit — Building
+
+## Prerequisites
+
+| Dependency | Required | Purpose |
+|-----------|----------|---------|
+| GCC or Clang | Yes | C compiler (C99) |
+| GNU Make | Yes | Build system |
+| OpenSSL (libcrypto) | Yes | SHA-1 hash implementation (`SHA1_HEADER = <openssl/sha.h>`) |
+| zlib | Yes | Git object compression |
+| libcurl | No | Not used — `NO_CURL=1` is passed by cgit.mk |
+| Lua or LuaJIT | No | Lua filter support; auto-detected via pkg-config |
+| asciidoc / a2x | No | Man page / HTML / PDF documentation generation |
+| Python | No | Git's test harness (for `make test`) |
+
+## Build System Overview
+
+cgit uses a two-stage build that embeds itself within Git's build infrastructure:
+
+```
+cgit/Makefile
+ └── make -C git -f ../cgit.mk ../cgit
+ └── git/Makefile (included by cgit.mk)
+ └── Compile cgit objects + link against libgit.a
+```
+
+### Stage 1: Top-Level Makefile
+
+The top-level `Makefile` lives in `cgit/` and defines all user-configurable
+variables:
+
+```makefile
+CGIT_VERSION = 0.0.5-1-Project-Tick
+CGIT_SCRIPT_NAME = cgit.cgi
+CGIT_SCRIPT_PATH = /var/www/htdocs/cgit
+CGIT_DATA_PATH = $(CGIT_SCRIPT_PATH)
+CGIT_CONFIG = /etc/cgitrc
+CACHE_ROOT = /var/cache/cgit
+prefix = /usr/local
+libdir = $(prefix)/lib
+filterdir = $(libdir)/cgit/filters
+docdir = $(prefix)/share/doc/cgit
+mandir = $(prefix)/share/man
+SHA1_HEADER = <openssl/sha.h>
+GIT_VER = 2.46.0
+GIT_URL = https://www.kernel.org/pub/software/scm/git/git-$(GIT_VER).tar.xz
+```
+
+The main `cgit` target delegates to:
+
+```makefile
+cgit:
+ $(QUIET_SUBDIR0)git $(QUIET_SUBDIR1) -f ../cgit.mk ../cgit NO_CURL=1
+```
+
+This enters the `git/` subdirectory and runs `cgit.mk` from there, prefixing
+all cgit source paths with `../`.
+
+### Stage 2: cgit.mk
+
+`cgit.mk` is run inside the `git/` directory so it can `include Makefile` to
+inherit Git's build variables (`CC`, `CFLAGS`, linker flags, OS detection via
+`config.mak.uname`, etc.).
+
+Key sections:
+
+#### Version tracking
+
+```makefile
+$(CGIT_PREFIX)VERSION: force-version
+ @cd $(CGIT_PREFIX) && '$(SHELL_PATH_SQ)' ./gen-version.sh "$(CGIT_VERSION)"
+```
+
+The `gen-version.sh` script writes a `VERSION` file that is included by the
+build. Only `cgit.o` references `CGIT_VERSION`, so only that object is rebuilt
+when the version changes.
+
+#### CGIT_CFLAGS
+
+```makefile
+CGIT_CFLAGS += -DCGIT_CONFIG='"$(CGIT_CONFIG)"'
+CGIT_CFLAGS += -DCGIT_SCRIPT_NAME='"$(CGIT_SCRIPT_NAME)"'
+CGIT_CFLAGS += -DCGIT_CACHE_ROOT='"$(CACHE_ROOT)"'
+```
+
+These compile-time constants are used in `cgit.c` as default values in
+`prepare_context()`.
+
+#### Lua detection
+
+```makefile
+LUA_PKGCONFIG := $(shell for pc in luajit lua lua5.2 lua5.1; do \
+ $(PKG_CONFIG) --exists $$pc 2>/dev/null && echo $$pc && break; \
+done)
+```
+
+If Lua is found, its `--cflags` and `--libs` are appended to `CGIT_CFLAGS` and
+`CGIT_LIBS`. If not found, `NO_LUA=YesPlease` is set and `-DNO_LUA` is added.
+
+#### Linux sendfile
+
+```makefile
+ifeq ($(uname_S),Linux)
+ HAVE_LINUX_SENDFILE = YesPlease
+endif
+
+ifdef HAVE_LINUX_SENDFILE
+ CGIT_CFLAGS += -DHAVE_LINUX_SENDFILE
+endif
+```
+
+This enables the `sendfile()` syscall in `cache.c` for zero-copy writes from
+cache files to stdout.
+
+#### Object files
+
+All cgit source files are listed explicitly:
+
+```makefile
+CGIT_OBJ_NAMES += cgit.o cache.o cmd.o configfile.o filter.o html.o
+CGIT_OBJ_NAMES += parsing.o scan-tree.o shared.o
+CGIT_OBJ_NAMES += ui-atom.o ui-blame.o ui-blob.o ui-clone.o ui-commit.o
+CGIT_OBJ_NAMES += ui-diff.o ui-log.o ui-patch.o ui-plain.o ui-refs.o
+CGIT_OBJ_NAMES += ui-repolist.o ui-shared.o ui-snapshot.o ui-ssdiff.o
+CGIT_OBJ_NAMES += ui-stats.o ui-summary.o ui-tag.o ui-tree.o
+```
+
+The prefixed paths (`CGIT_OBJS := $(addprefix $(CGIT_PREFIX),$(CGIT_OBJ_NAMES))`)
+point back to the `cgit/` directory from inside `git/`.
+
+## Quick Build
+
+```bash
+cd cgit
+
+# Download the vendored Git source (required on first build)
+make get-git
+
+# Build cgit binary
+make -j$(nproc)
+```
+
+The output is a single binary named `cgit` in the `cgit/` directory.
+
+## Build Variables Reference
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `CGIT_VERSION` | `0.0.5-1-Project-Tick` | Compiled-in version string |
+| `CGIT_SCRIPT_NAME` | `cgit.cgi` | Name of the installed CGI binary |
+| `CGIT_SCRIPT_PATH` | `/var/www/htdocs/cgit` | CGI binary install directory |
+| `CGIT_DATA_PATH` | `$(CGIT_SCRIPT_PATH)` | Static assets (CSS, JS, images) directory |
+| `CGIT_CONFIG` | `/etc/cgitrc` | Default config file path (compiled in) |
+| `CACHE_ROOT` | `/var/cache/cgit` | Default cache directory (compiled in) |
+| `prefix` | `/usr/local` | Install prefix |
+| `libdir` | `$(prefix)/lib` | Library directory |
+| `filterdir` | `$(libdir)/cgit/filters` | Filter scripts install directory |
+| `docdir` | `$(prefix)/share/doc/cgit` | Documentation directory |
+| `mandir` | `$(prefix)/share/man` | Man page directory |
+| `SHA1_HEADER` | `<openssl/sha.h>` | SHA-1 implementation header |
+| `GIT_VER` | `2.46.0` | Git version to download and vendor |
+| `GIT_URL` | `https://...git-$(GIT_VER).tar.xz` | Git source download URL |
+| `NO_LUA` | (unset) | Set to any value to disable Lua support |
+| `LUA_PKGCONFIG` | (auto-detected) | Explicit pkg-config name for Lua |
+| `NO_C99_FORMAT` | (unset) | Define if your printf lacks `%zu`, `%lld` etc. |
+| `HAVE_LINUX_SENDFILE` | (auto on Linux) | Enable `sendfile()` in cache |
+| `V` | (unset) | Set to `1` for verbose build output |
+
+Overrides can be placed in a `cgit.conf` file (included by both `Makefile` and
+`cgit.mk` via `-include cgit.conf`).
+
+## Installation
+
+```bash
+make install # Install binary and static assets
+make install-doc # Install man pages, HTML docs, PDF docs
+make install-man # Man pages only
+make install-html # HTML docs only
+make install-pdf # PDF docs only
+```
+
+### Installed files
+
+| Path | Mode | Source |
+|------|------|--------|
+| `$(CGIT_SCRIPT_PATH)/$(CGIT_SCRIPT_NAME)` | 0755 | `cgit` binary |
+| `$(CGIT_DATA_PATH)/cgit.css` | 0644 | Default stylesheet |
+| `$(CGIT_DATA_PATH)/cgit.js` | 0644 | Client-side JavaScript |
+| `$(CGIT_DATA_PATH)/cgit.png` | 0644 | Default logo |
+| `$(CGIT_DATA_PATH)/favicon.ico` | 0644 | Default favicon |
+| `$(CGIT_DATA_PATH)/robots.txt` | 0644 | Robots exclusion file |
+| `$(filterdir)/*` | (varies) | Filter scripts from `filters/` |
+| `$(mandir)/man5/cgitrc.5` | 0644 | Man page (if `install-man`) |
+
+## Make Targets
+
+| Target | Description |
+|--------|-------------|
+| `all` | Build the cgit binary (default) |
+| `cgit` | Explicit build target |
+| `test` | Build everything (`all` target on git) then run `tests/` |
+| `install` | Install binary, CSS, JS, images, filters |
+| `install-doc` | Install man pages + HTML + PDF |
+| `install-man` | Man pages only |
+| `install-html` | HTML docs only |
+| `install-pdf` | PDF docs only |
+| `clean` | Remove cgit objects, VERSION, CGIT-CFLAGS, tags |
+| `cleanall` | `clean` + `make -C git clean` |
+| `clean-doc` | Remove generated doc files |
+| `get-git` | Download and extract Git source into `git/` |
+| `tags` | Generate ctags for all `*.[ch]` files |
+| `sparse` | Run `sparse` static analysis via cgit.mk |
+| `uninstall` | Remove installed binary and assets |
+| `uninstall-doc` | Remove installed documentation |
+
+## Documentation Generation
+
+Man pages are generated from `cgitrc.5.txt` using `asciidoc`/`a2x`:
+
+```makefile
+MAN5_TXT = $(wildcard *.5.txt)
+DOC_MAN5 = $(patsubst %.txt,%,$(MAN5_TXT))
+DOC_HTML = $(patsubst %.txt,%.html,$(MAN_TXT))
+DOC_PDF = $(patsubst %.txt,%.pdf,$(MAN_TXT))
+
+%.5 : %.5.txt
+ a2x -f manpage $<
+
+$(DOC_HTML): %.html : %.txt
+ $(TXT_TO_HTML) -o $@+ $< && mv $@+ $@
+
+$(DOC_PDF): %.pdf : %.txt
+ a2x -f pdf cgitrc.5.txt
+```
+
+## Cross-Compilation
+
+For cross-compiling (e.g. targeting MinGW on Linux):
+
+```bash
+make CC=x86_64-w64-mingw32-gcc
+```
+
+The `toolchain-mingw32.cmake` file in the repository is for CMake-based
+projects; cgit itself uses Make exclusively.
+
+## Customizing the Build
+
+Create a `cgit.conf` file alongside the Makefile:
+
+```makefile
+# cgit.conf — local build overrides
+CGIT_VERSION = 1.0.0-custom
+CGIT_CONFIG = /usr/local/etc/cgitrc
+CACHE_ROOT = /tmp/cgit-cache
+NO_LUA = 1
+```
+
+This file is `-include`d by both `Makefile` and `cgit.mk`, so it applies to
+all build stages.
+
+## Troubleshooting
+
+| Problem | Solution |
+|---------|----------|
+| `make: *** No rule to make target 'git/Makefile'` | Run `make get-git` first |
+| `lua.h: No such file or directory` | Install Lua dev package or set `NO_LUA=1` |
+| `openssl/sha.h: No such file or directory` | Install `libssl-dev` / `openssl-devel` |
+| `sendfile: undefined reference` | Set `HAVE_LINUX_SENDFILE=` (empty) on non-Linux |
+| Build fails with `redefinition of 'struct cache_slot'` | Git's `cache.h` conflict — cgit uses `CGIT_CACHE_H` guard |
+| `dlsym: symbol not found: write` | Lua filter's `write()` interposition requires `-ldl` (auto on Linux) |
+| Version shows as `unknown` | Run `./gen-version.sh "$(CGIT_VERSION)"` or check `VERSION` file |
diff --git a/docs/handbook/cgit/caching-system.md b/docs/handbook/cgit/caching-system.md
new file mode 100644
index 0000000000..5d3b723ed5
--- /dev/null
+++ b/docs/handbook/cgit/caching-system.md
@@ -0,0 +1,287 @@
+# cgit — Caching System
+
+## Overview
+
+cgit implements a file-based output cache that stores the fully rendered
+HTML/binary response for each unique request. The cache avoids regenerating
+pages for repeated identical requests. When caching is disabled
+(`cache-size=0`, the default), all output is written directly to `stdout`.
+
+Source files: `cache.c`, `cache.h`.
+
+## Cache Slot Structure
+
+Each cached response is represented by a `cache_slot`:
+
+```c
+struct cache_slot {
+ const char *key; /* request identifier (URL-based) */
+ int keylen; /* strlen(key) */
+ int ttl; /* time-to-live in minutes */
+ cache_fill_fn fn; /* callback to regenerate content */
+ int cache_fd; /* fd for the cache file */
+ int lock_fd; /* fd for the .lock file */
+ const char *cache_name;/* path: cache_root/hash(key) */
+ const char *lock_name; /* path: cache_name + ".lock" */
+ int match; /* 1 if cache file matches key */
+ struct stat cache_st; /* stat of the cache file */
+ int bufsize; /* size of the header buffer */
+ char buf[1024 + 4 * 20]; /* header: key + timestamps */
+};
+```
+
+The `cache_fill_fn` typedef:
+
+```c
+typedef void (*cache_fill_fn)(void *cbdata);
+```
+
+This callback is invoked to produce the page content when the cache needs
+filling. The callback writes directly to `stdout`, which is redirected to the
+lock file while cache filling is in progress.
+
+## Hash Function
+
+Cache file names are derived from the request key using the FNV-1 hash:
+
+```c
+unsigned long hash_str(const char *str)
+{
+ unsigned long h = 0x811c9dc5;
+ unsigned char *s = (unsigned char *)str;
+ while (*s) {
+ h *= 0x01000193;
+ h ^= (unsigned long)*s++;
+ }
+ return h;
+}
+```
+
+The resulting hash is formatted as `%lx` and joined with the configured
+`cache-root` directory to produce the cache file path. The lock file is
+the same path with `.lock` appended.
+
+## Slot Lifecycle
+
+A cache request goes through these phases, managed by `process_slot()`:
+
+### 1. Open (`open_slot`)
+
+Opens the cache file and reads the header. The header contains the original
+key followed by creation and expiry timestamps. If the stored key matches the
+current request key, `slot->match` is set to 1.
+
+```c
+static int open_slot(struct cache_slot *slot)
+{
+ slot->cache_fd = open(slot->cache_name, O_RDONLY);
+ if (slot->cache_fd == -1)
+ return errno;
+ if (fstat(slot->cache_fd, &slot->cache_st))
+ return errno;
+ /* read header into slot->buf */
+ return 0;
+}
+```
+
+### 2. Check Match
+
+If the file exists and the key matches, the code checks whether the entry
+has expired based on the TTL:
+
+```c
+static int is_expired(struct cache_slot *slot)
+{
+ if (slot->ttl < 0)
+ return 0; /* negative TTL = never expires */
+ return slot->cache_st.st_mtime + slot->ttl * 60 < time(NULL);
+}
+```
+
+A TTL of `-1` means the entry never expires (used for `cache-static-ttl`).
+
+### 3. Lock (`lock_slot`)
+
+Creates the `.lock` file with `O_WRONLY | O_CREAT | O_EXCL` and writes the
+header containing the key and timestamps. If locking fails (another process
+holds the lock), the stale cached content is served instead.
+
+```c
+static int lock_slot(struct cache_slot *slot)
+{
+ slot->lock_fd = open(slot->lock_name,
+ O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ if (slot->lock_fd == -1)
+ return errno;
+ /* write header: key + creation timestamp */
+ return 0;
+}
+```
+
+### 4. Fill (`fill_slot`)
+
+Redirects `stdout` to the lock file using `dup2()`, invokes the
+`cache_fill_fn` callback to generate the page content, then restores `stdout`:
+
+```c
+static int fill_slot(struct cache_slot *slot)
+{
+ /* save original stdout */
+ /* dup2(slot->lock_fd, STDOUT_FILENO) */
+ slot->fn(slot->cbdata);
+ /* restore original stdout */
+ return 0;
+}
+```
+
+### 5. Close and Rename
+
+After filling, the lock file is atomically renamed to the cache file:
+
+```c
+if (rename(slot->lock_name, slot->cache_name))
+ return errno;
+```
+
+This ensures readers never see a partially-written file.
+
+### 6. Print (`print_slot`)
+
+The cache file content (minus the header) is sent to `stdout`. On Linux,
+`sendfile()` is used for zero-copy output:
+
+```c
+static int print_slot(struct cache_slot *slot)
+{
+#ifdef HAVE_LINUX_SENDFILE
+ off_t start = slot->keylen + 1; /* skip header */
+ sendfile(STDOUT_FILENO, slot->cache_fd, &start,
+ slot->cache_st.st_size - start);
+#else
+ /* fallback: read()/write() loop */
+#endif
+}
+```
+
+## Process Slot State Machine
+
+`process_slot()` implements a state machine combining all phases:
+
+```
+START → open_slot()
+ ├── success + key match + not expired → print_slot() → DONE
+ ├── success + key match + expired → lock_slot()
+ │ ├── lock acquired → fill_slot() → close_slot() → open_slot() → print_slot()
+ │ └── lock failed → print_slot() (serve stale)
+ ├── success + key mismatch → lock_slot()
+ │ ├── lock acquired → fill_slot() → close_slot() → open_slot() → print_slot()
+ │ └── lock failed → fill_slot() (direct to stdout)
+ └── open failed → lock_slot()
+ ├── lock acquired → fill_slot() → close_slot() → open_slot() → print_slot()
+ └── lock failed → fill_slot() (direct to stdout, no cache)
+```
+
+## Public API
+
+```c
+/* Process a request through the cache */
+extern int cache_process(int size, const char *path, const char *key,
+ int ttl, cache_fill_fn fn, void *cbdata);
+
+/* List all cache entries (for debugging/administration) */
+extern int cache_ls(const char *path);
+
+/* Hash a string using FNV-1 */
+extern unsigned long hash_str(const char *str);
+```
+
+### `cache_process()`
+
+Parameters:
+- `size` — Maximum number of cache entries (from `cache-size`). If `0`,
+ caching is bypassed and `fn` is called directly.
+- `path` — Cache root directory.
+- `key` — Request identifier (derived from full URL + query string).
+- `ttl` — Time-to-live in minutes.
+- `fn` — Callback function that generates the page content.
+- `cbdata` — Opaque data passed to the callback.
+
+### `cache_ls()`
+
+Scans the cache root directory and prints information about each cache entry
+to `stdout`. Used for administrative inspection.
+
+## TTL Configuration Mapping
+
+Different page types have different TTLs:
+
+| Page Type | Config Directive | Default | Applied When |
+|-----------|-----------------|---------|--------------|
+| Repository list | `cache-root-ttl` | 5 min | `cmd->want_repo == 0` |
+| Repo pages | `cache-repo-ttl` | 5 min | `cmd->want_repo == 1` and dynamic |
+| Dynamic pages | `cache-dynamic-ttl` | 5 min | `cmd->want_vpath == 1` |
+| Static content | `cache-static-ttl` | -1 (never) | SHA-referenced content |
+| About pages | `cache-about-ttl` | 15 min | About/readme view |
+| Snapshots | `cache-snapshot-ttl` | 5 min | Snapshot downloads |
+| Scan results | `cache-scanrc-ttl` | 15 min | scan-path results |
+
+Static content uses a TTL of `-1` because SHA-addressed content is
+immutable — a given commit/tree/blob hash always refers to the same data.
+
+## Cache Key Generation
+
+The cache key is built from the complete query context in `cgit.c`:
+
+```c
+static const char *cache_key(void)
+{
+ return fmt("%s?%s?%s?%s?%s",
+ ctx.qry.raw, ctx.env.http_host,
+ ctx.env.https ? "1" : "0",
+ ctx.env.authenticated ? "1" : "0",
+ ctx.env.http_cookie ? ctx.env.http_cookie : "");
+}
+```
+
+The key captures: raw query string, hostname, HTTPS state, authentication
+state, and cookies. This ensures that authenticated users get different
+cache entries than unauthenticated users.
+
+## Concurrency
+
+The cache supports concurrent access from multiple CGI processes:
+
+1. **Atomic writes**: Content is written to a `.lock` file first, then
+ atomically renamed to the cache file. Readers never see partial content.
+2. **Non-blocking locks**: If a lock is already held, the process either
+ serves stale cached content (if available) or generates content directly
+ to stdout without caching.
+3. **No deadlocks**: Lock files are `O_EXCL`, not `flock()`. If a process
+ crashes while holding a lock, the stale `.lock` file remains. It is
+ typically cleaned up by the next successful writer.
+
+## Cache Directory Management
+
+The cache root directory (`cache-root`, default `/var/cache/cgit`) must be
+writable by the web server user. Cache files are created with mode `0600`
+(`S_IRUSR | S_IWUSR`).
+
+There is no built-in cache eviction. Old cache files persist until a new
+request with the same hash replaces them. Administrators should set up
+periodic cleanup (e.g., a cron job) to purge expired files:
+
+```bash
+find /var/cache/cgit -type f -mmin +60 -delete
+```
+
+## Disabling the Cache
+
+Set `cache-size=0` (the default). When `size` is 0, `cache_process()` calls
+the fill function directly, writing to stdout with no file I/O overhead:
+
+```c
+if (!size) {
+ fn(cbdata);
+ return 0;
+}
+```
diff --git a/docs/handbook/cgit/code-style.md b/docs/handbook/cgit/code-style.md
new file mode 100644
index 0000000000..d4059391dc
--- /dev/null
+++ b/docs/handbook/cgit/code-style.md
@@ -0,0 +1,356 @@
+# cgit — Code Style and Conventions
+
+## Overview
+
+cgit follows C99 conventions with a style influenced by the Linux kernel and
+Git project coding standards. This document describes the patterns, naming
+conventions, and idioms used throughout the codebase.
+
+## Language Standard
+
+cgit is written in C99, compiled with:
+
+```makefile
+CGIT_CFLAGS += -std=c99
+```
+
+No C11 or GNU extensions are required, though some platform-specific features
+(like `sendfile()` on Linux) are conditionally compiled.
+
+## Formatting
+
+### Indentation
+
+- Tabs for indentation (1 tab = 8 spaces display width, consistent with
+ Linux kernel/Git style)
+- No spaces for indentation alignment
+
+### Braces
+
+K&R style (opening brace on same line):
+
+```c
+if (condition) {
+ /* body */
+} else {
+ /* body */
+}
+
+static void function_name(int arg)
+{
+ /* function body */
+}
+```
+
+Functions place the opening brace on its own line. Control structures
+(`if`, `for`, `while`, `switch`) keep it on the same line.
+
+### Line Length
+
+No strict limit, but lines generally stay under 80 characters. Long function
+calls are broken across lines.
+
+## Naming Conventions
+
+### Functions
+
+Public API functions use the `cgit_` prefix:
+
+```c
+void cgit_print_commit(const char *rev, const char *prefix);
+void cgit_print_diff(const char *new_rev, const char *old_rev, ...);
+struct cgit_repo *cgit_add_repo(const char *url);
+struct cgit_repo *cgit_get_repoinfo(const char *url);
+int cgit_parse_snapshots_mask(const char *str);
+```
+
+Static (file-local) functions use descriptive names without prefix:
+
+```c
+static void config_cb(const char *name, const char *value);
+static void querystring_cb(const char *name, const char *value);
+static void process_request(void);
+static int open_slot(struct cache_slot *slot);
+```
+
+### Types
+
+Struct types use `cgit_` prefix with snake_case:
+
+```c
+struct cgit_context;
+struct cgit_repo;
+struct cgit_config;
+struct cgit_query;
+struct cgit_page;
+struct cgit_environment;
+struct cgit_cmd;
+struct cgit_filter;
+struct cgit_snapshot_format;
+```
+
+### Macros and Constants
+
+Uppercase with underscores:
+
+```c
+#define ABOUT_FILTER 0
+#define COMMIT_FILTER 1
+#define SOURCE_FILTER 2
+#define EMAIL_FILTER 3
+#define AUTH_FILTER 4
+#define DIFF_UNIFIED 0
+#define DIFF_SSDIFF 1
+#define DIFF_STATONLY 2
+#define FMT_BUFS 8
+#define FMT_SIZE 8192
+```
+
+### Variables
+
+Global variables use descriptive names:
+
+```c
+struct cgit_context ctx;
+struct cgit_repolist cgit_repolist;
+const char *cgit_version;
+```
+
+## File Organization
+
+### Header Files
+
+Each module has a corresponding header file with include guards:
+
+```c
+#ifndef UI_DIFF_H
+#define UI_DIFF_H
+
+extern void cgit_print_diff(const char *new_rev, const char *old_rev,
+ const char *prefix, int show_ctrls, int raw);
+extern void cgit_print_diffstat(const struct object_id *old,
+ const struct object_id *new,
+ const char *prefix);
+
+#endif /* UI_DIFF_H */
+```
+
+### Source Files
+
+Typical source file structure:
+
+1. License header (if present)
+2. Include directives
+3. Static (file-local) variables
+4. Static helper functions
+5. Public API functions
+
+### Module Pattern
+
+UI modules follow a consistent pattern with `ui-*.c` / `ui-*.h` pairs:
+
+```c
+/* ui-example.c */
+#include "cgit.h"
+#include "ui-example.h"
+#include "html.h"
+#include "ui-shared.h"
+
+static void helper_function(void)
+{
+ /* ... */
+}
+
+void cgit_print_example(void)
+{
+ /* main entry point */
+}
+```
+
+## Common Patterns
+
+### Global Context
+
+cgit uses a single global `struct cgit_context ctx` variable that holds all
+request state. Functions access it directly rather than passing it as a
+parameter:
+
+```c
+/* Access global context directly */
+if (ctx.repo && ctx.repo->enable_blame)
+ cgit_print_blame();
+
+/* Not: cgit_print_blame(&ctx) */
+```
+
+### Callback Functions
+
+Configuration and query parsing use callback function pointers:
+
+```c
+typedef void (*configfile_value_fn)(const char *name, const char *value);
+typedef void (*filepair_fn)(struct diff_filepair *pair);
+typedef void (*linediff_fn)(char *line, int len);
+typedef void (*cache_fill_fn)(void *cbdata);
+```
+
+### String Formatting
+
+The `fmt()` ring buffer is used for temporary string construction:
+
+```c
+const char *url = fmt("%s/%s/", ctx.cfg.virtual_root, repo->url);
+html_attr(url);
+```
+
+Never store `fmt()` results long-term — use `fmtalloc()` or `xstrdup()`.
+
+### NULL Checks
+
+Functions generally check for NULL pointers at the start:
+
+```c
+void cgit_print_blob(const char *hex, const char *path,
+ const char *head, int file_only)
+{
+ if (!hex && !path) {
+ cgit_print_error_page(400, "Bad request",
+ "Need either hex or path");
+ return;
+ }
+ /* ... */
+}
+```
+
+### Memory Management
+
+cgit uses Git's `xmalloc` / `xstrdup` / `xrealloc` wrappers that die on
+allocation failure:
+
+```c
+char *name = xstrdup(value);
+repo = xrealloc(repo, new_size);
+```
+
+No explicit `free()` calls in most paths — the CGI process exits after each
+request, and the OS reclaims all memory.
+
+### Boolean as Int
+
+Boolean values are represented as `int` (0 or 1), consistent with C99
+convention before `_Bool`:
+
+```c
+int enable_blame;
+int enable_commit_graph;
+int binary;
+int match;
+```
+
+### Typedef Avoidance
+
+Structs are generally not typedef'd — they use the `struct` keyword
+explicitly:
+
+```c
+struct cgit_repo *repo;
+struct cache_slot slot;
+```
+
+Exception: function pointer typedefs are used for callbacks:
+
+```c
+typedef void (*configfile_value_fn)(const char *name, const char *value);
+```
+
+## Error Handling
+
+### `die()` for Fatal Errors
+
+Unrecoverable errors use Git's `die()`:
+
+```c
+if (!ctx.repo)
+ die("no repository");
+```
+
+### Error Pages for User Errors
+
+User-facing errors use the error page function:
+
+```c
+cgit_print_error_page(404, "Not Found",
+ "No repository found for '%s'",
+ ctx.qry.repo);
+```
+
+### Return Codes
+
+Functions that can fail return int (0 = success, non-zero = error):
+
+```c
+static int open_slot(struct cache_slot *slot)
+{
+ slot->cache_fd = open(slot->cache_name, O_RDONLY);
+ if (slot->cache_fd == -1)
+ return errno;
+ return 0;
+}
+```
+
+## Preprocessor Usage
+
+Conditional compilation for platform features:
+
+```c
+#ifdef HAVE_LINUX_SENDFILE
+ sendfile(STDOUT_FILENO, slot->cache_fd, &off, size);
+#else
+ /* read/write fallback */
+#endif
+
+#ifdef HAVE_LUA
+ /* Lua filter support */
+#endif
+```
+
+## Git Library Integration
+
+cgit includes Git as a library. It uses Git's internal APIs directly:
+
+```c
+#include "git/cache.h"
+#include "git/object.h"
+#include "git/commit.h"
+#include "git/diff.h"
+#include "git/revision.h"
+#include "git/archive.h"
+```
+
+Functions from Git's library are called without wrapper layers:
+
+```c
+struct commit *commit = lookup_commit_reference(&oid);
+struct tree *tree = parse_tree_indirect(&oid);
+init_revisions(&rev, NULL);
+```
+
+## Documentation
+
+- Code comments are used sparingly, mainly for non-obvious logic
+- No Doxygen or similar documentation generators are used
+- Function documentation is in the header files as prototypes with
+ descriptive parameter names
+- The `cgitrc.5.txt` file provides user-facing documentation in
+ man page format
+
+## Commit Messages
+
+Commit messages follow the standard Git format:
+
+```
+subject: brief description (50 chars or less)
+
+Extended description wrapping at 72 characters. Explain what and why,
+not how.
+```
diff --git a/docs/handbook/cgit/configuration.md b/docs/handbook/cgit/configuration.md
new file mode 100644
index 0000000000..afc29fce07
--- /dev/null
+++ b/docs/handbook/cgit/configuration.md
@@ -0,0 +1,351 @@
+# cgit — Configuration Reference
+
+## Configuration File
+
+Default location: `/etc/cgitrc` (compiled in as `CGIT_CONFIG`). Override at
+runtime by setting the `$CGIT_CONFIG` environment variable.
+
+## File Format
+
+The configuration file uses a simple `name=value` format, parsed by
+`parse_configfile()` in `configfile.c`. Key rules:
+
+- Lines starting with `#` or `;` are comments
+- Leading whitespace on lines is skipped
+- No quoting mechanism — the value is everything after the `=` to end of line
+- Empty lines are ignored
+- Nesting depth for `include=` directives is limited to 8 levels
+
+```c
+int parse_configfile(const char *filename, configfile_value_fn fn)
+{
+ static int nesting;
+ /* ... */
+ if (nesting > 8)
+ return -1;
+ /* ... */
+ while (read_config_line(f, &name, &value))
+ fn(name.buf, value.buf);
+ /* ... */
+}
+```
+
+## Global Directives
+
+All global directives are processed by `config_cb()` in `cgit.c`. When a
+directive is encountered, the value is stored in the corresponding
+`ctx.cfg.*` field.
+
+### Site Identity
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `root-title` | `"Git repository browser"` | string | HTML page title for the index page |
+| `root-desc` | `"a fast webinterface for the git dscm"` | string | Subtitle text on the index page |
+| `root-readme` | (none) | path | Path to a file rendered on the site about page |
+| `root-coc` | (none) | path | Path to Code of Conduct file |
+| `root-cla` | (none) | path | Path to Contributor License Agreement file |
+| `root-homepage` | (none) | URL | External homepage URL |
+| `root-homepage-title` | (none) | string | Title text for the homepage link |
+| `root-link` | (none) | string | `label\|url` pairs for navigation links (can repeat) |
+| `logo` | `"/cgit.png"` | URL | Path to the site logo image |
+| `logo-link` | (none) | URL | URL the logo links to |
+| `favicon` | `"/favicon.ico"` | URL | Path to the favicon |
+| `css` | (none) | URL | Stylesheet URL (can repeat for multiple stylesheets) |
+| `js` | (none) | URL | JavaScript URL (can repeat) |
+| `header` | (none) | path | File included at the top of every page |
+| `footer` | (none) | path | File included at the bottom of every page |
+| `head-include` | (none) | path | File included in the HTML `<head>` |
+| `robots` | `"index, nofollow"` | string | Content for `<meta name="robots">` |
+
+### URL Configuration
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `virtual-root` | (none) | path | Base URL path when using URL rewriting (always ends with `/`) |
+| `script-name` | `CGIT_SCRIPT_NAME` | path | CGI script name (from `$SCRIPT_NAME` env var) |
+| `clone-prefix` | (none) | string | Prefix for clone URLs when auto-generating |
+| `clone-url` | (none) | string | Clone URL template (`$CGIT_REPO_URL` expanded) |
+
+When `virtual-root` is set, URLs use path-based routing:
+`/cgit/repo/log/path`. Without it, query-string routing is used:
+`?url=repo/log/path`.
+
+### Feature Flags
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `enable-http-clone` | `1` | int | Allow HTTP clone operations (HEAD, info/refs, objects/) |
+| `enable-index-links` | `0` | int | Show log/tree/commit links on the repo index page |
+| `enable-index-owner` | `1` | int | Show the Owner column on the repo index page |
+| `enable-blame` | `0` | int | Enable blame view for all repos |
+| `enable-commit-graph` | `0` | int | Show ASCII commit graph in log view |
+| `enable-log-filecount` | `0` | int | Show changed-file count in log view |
+| `enable-log-linecount` | `0` | int | Show added/removed line counts in log |
+| `enable-remote-branches` | `0` | int | Display remote tracking branches |
+| `enable-subject-links` | `0` | int | Show parent commit subjects instead of hashes |
+| `enable-html-serving` | `0` | int | Serve HTML files as-is from plain view |
+| `enable-subtree` | `0` | int | Detect and display git-subtree directories |
+| `enable-tree-linenumbers` | `1` | int | Show line numbers in file/blob view |
+| `enable-git-config` | `0` | int | Read `gitweb.*` and `cgit.*` from repo's git config |
+| `enable-filter-overrides` | `0` | int | Allow repos to override global filters |
+| `enable-follow-links` | `0` | int | Show "follow" links in log view for renames |
+| `embedded` | `0` | int | Omit HTML boilerplate for embedding in another page |
+| `noheader` | `0` | int | Suppress the page header |
+| `noplainemail` | `0` | int | Hide email addresses in output |
+| `local-time` | `0` | int | Display times in local timezone instead of UTC |
+
+### Limits
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `max-repo-count` | `50` | int | Repos per page on the index (≤0 → unlimited) |
+| `max-commit-count` | `50` | int | Commits per page in log view |
+| `max-message-length` | `80` | int | Truncate commit subject at this length |
+| `max-repodesc-length` | `80` | int | Truncate repo description at this length |
+| `max-blob-size` | `0` | int (KB) | Max blob size to display (0 = unlimited) |
+| `max-stats` | `0` | int | Stats period (0=disabled, 1=week, 2=month, 3=quarter, 4=year) |
+| `max-atom-items` | `10` | int | Number of entries in Atom feeds |
+| `max-subtree-commits` | `2000` | int | Max commits to scan for subtree trailers |
+| `renamelimit` | `-1` | int | Diff rename detection limit (-1 = Git default) |
+
+### Caching
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `cache-size` | `0` | int | Number of cache entries (0 = disabled) |
+| `cache-root` | `CGIT_CACHE_ROOT` | path | Directory for cache files |
+| `cache-root-ttl` | `5` | int (min) | TTL for repo-list pages |
+| `cache-repo-ttl` | `5` | int (min) | TTL for repo-specific pages |
+| `cache-dynamic-ttl` | `5` | int (min) | TTL for dynamic content |
+| `cache-static-ttl` | `-1` | int (min) | TTL for static content (-1 = forever) |
+| `cache-about-ttl` | `15` | int (min) | TTL for about/readme pages |
+| `cache-snapshot-ttl` | `5` | int (min) | TTL for snapshot pages |
+| `cache-scanrc-ttl` | `15` | int (min) | TTL for cached scan-path results |
+
+### Sorting
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `case-sensitive-sort` | `1` | int | Case-sensitive repo name sorting |
+| `section-sort` | `1` | int | Sort sections alphabetically |
+| `section-from-path` | `0` | int | Derive section name from path depth (>0 = from start, <0 = from end) |
+| `repository-sort` | `"name"` | string | Default sort field for repo list |
+| `branch-sort` | `0` | int | Branch sort: 0=name, 1=age |
+| `commit-sort` | `0` | int | Commit sort: 0=default, 1=date, 2=topo |
+
+### Snapshots
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `snapshots` | (none) | string | Space-separated list of enabled formats: `.tar` `.tar.gz` `.tar.bz2` `.tar.lz` `.tar.xz` `.tar.zst` `.zip`. Also accepts `all`. |
+
+### Filters
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `about-filter` | (none) | filter | Filter for rendering README/about content |
+| `source-filter` | (none) | filter | Filter for syntax highlighting source code |
+| `commit-filter` | (none) | filter | Filter for commit messages |
+| `email-filter` | (none) | filter | Filter for email display (2 args: email, page) |
+| `owner-filter` | (none) | filter | Filter for owner display |
+| `auth-filter` | (none) | filter | Authentication filter (12 args) |
+
+Filter values use the format `type:command`:
+- `exec:/path/to/script` — external process filter
+- `lua:/path/to/script.lua` — Lua script filter
+- Plain path without prefix defaults to `exec`
+
+### Display
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `summary-branches` | `10` | int | Branches shown on summary page |
+| `summary-tags` | `10` | int | Tags shown on summary page |
+| `summary-log` | `10` | int | Log entries shown on summary page |
+| `side-by-side-diffs` | `0` | int | Default to side-by-side diff view |
+| `remove-suffix` | `0` | int | Remove `.git` suffix from repo URLs |
+| `scan-hidden-path` | `0` | int | Include hidden dirs when scanning |
+
+### Miscellaneous
+
+| Directive | Default | Type | Description |
+|-----------|---------|------|-------------|
+| `agefile` | `"info/web/last-modified"` | path | File in repo checked for modification time |
+| `mimetype-file` | (none) | path | Apache-style mime.types file |
+| `mimetype.<ext>` | (none) | string | MIME type for a file extension |
+| `module-link` | (none) | URL | URL template for submodule links |
+| `strict-export` | (none) | path | Only export repos containing this file |
+| `project-list` | (none) | path | File listing project directories for `scan-path` |
+| `scan-path` | (none) | path | Directory to scan for git repositories |
+| `readme` | (none) | string | Default README file spec (can repeat) |
+| `include` | (none) | path | Include another config file |
+
+## Repository Directives
+
+Repository configuration begins with `repo.url=` which creates a new
+repository entry via `cgit_add_repo()`. Subsequent `repo.*` directives
+modify the most recently created repository via `repo_config()` in `cgit.c`.
+
+| Directive | Description |
+|-----------|-------------|
+| `repo.url` | Repository URL path (triggers new repo creation) |
+| `repo.path` | Filesystem path to the git repository |
+| `repo.name` | Display name |
+| `repo.basename` | Override for basename derivation |
+| `repo.desc` | Repository description |
+| `repo.owner` | Repository owner name |
+| `repo.homepage` | Project homepage URL |
+| `repo.defbranch` | Default branch name |
+| `repo.section` | Section heading for grouped display |
+| `repo.clone-url` | Clone URL (overrides global) |
+| `repo.readme` | README file spec (`[ref:]path`, can repeat) |
+| `repo.logo` | Per-repo logo URL |
+| `repo.logo-link` | Per-repo logo link URL |
+| `repo.extra-head-content` | Extra HTML for `<head>` |
+| `repo.snapshots` | Snapshot format mask (space-separated suffixes) |
+| `repo.snapshot-prefix` | Prefix for snapshot filenames |
+| `repo.enable-blame` | Override global enable-blame |
+| `repo.enable-commit-graph` | Override global enable-commit-graph |
+| `repo.enable-log-filecount` | Override global enable-log-filecount |
+| `repo.enable-log-linecount` | Override global enable-log-linecount |
+| `repo.enable-remote-branches` | Override global enable-remote-branches |
+| `repo.enable-subject-links` | Override global enable-subject-links |
+| `repo.enable-html-serving` | Override global enable-html-serving |
+| `repo.enable-subtree` | Override global enable-subtree |
+| `repo.max-stats` | Override global max-stats |
+| `repo.max-subtree-commits` | Override global max-subtree-commits |
+| `repo.branch-sort` | `"age"` or `"name"` |
+| `repo.commit-sort` | `"date"` or `"topo"` |
+| `repo.module-link` | Submodule URL template |
+| `repo.module-link.<submodule>` | Per-submodule URL |
+| `repo.badge` | Badge entry: `url\|imgurl` or just `imgurl` (can repeat) |
+| `repo.hide` | `1` = hide from listing (still accessible by URL) |
+| `repo.ignore` | `1` = completely ignore this repository |
+
+### Filter overrides (require `enable-filter-overrides=1`)
+
+| Directive | Description |
+|-----------|-------------|
+| `repo.about-filter` | Per-repo about filter |
+| `repo.commit-filter` | Per-repo commit filter |
+| `repo.source-filter` | Per-repo source filter |
+| `repo.email-filter` | Per-repo email filter |
+| `repo.owner-filter` | Per-repo owner filter |
+
+## Repository Defaults
+
+When a new repository is created by `cgit_add_repo()`, it inherits all global
+defaults from `ctx.cfg`:
+
+```c
+ret->section = ctx.cfg.section;
+ret->snapshots = ctx.cfg.snapshots;
+ret->enable_blame = ctx.cfg.enable_blame;
+ret->enable_commit_graph = ctx.cfg.enable_commit_graph;
+ret->enable_log_filecount = ctx.cfg.enable_log_filecount;
+ret->enable_log_linecount = ctx.cfg.enable_log_linecount;
+ret->enable_remote_branches = ctx.cfg.enable_remote_branches;
+ret->enable_subject_links = ctx.cfg.enable_subject_links;
+ret->enable_html_serving = ctx.cfg.enable_html_serving;
+ret->enable_subtree = ctx.cfg.enable_subtree;
+ret->max_stats = ctx.cfg.max_stats;
+ret->max_subtree_commits = ctx.cfg.max_subtree_commits;
+ret->branch_sort = ctx.cfg.branch_sort;
+ret->commit_sort = ctx.cfg.commit_sort;
+ret->module_link = ctx.cfg.module_link;
+ret->readme = ctx.cfg.readme;
+ret->about_filter = ctx.cfg.about_filter;
+ret->commit_filter = ctx.cfg.commit_filter;
+ret->source_filter = ctx.cfg.source_filter;
+ret->email_filter = ctx.cfg.email_filter;
+ret->owner_filter = ctx.cfg.owner_filter;
+ret->clone_url = ctx.cfg.clone_url;
+```
+
+This means global directives should appear *before* `repo.url=` entries, since
+they set the defaults for subsequently defined repositories.
+
+## Git Config Integration
+
+When `enable-git-config=1`, the `scan-tree` scanner reads each repository's
+`.git/config` and maps gitweb-compatible directives:
+
+```c
+if (!strcmp(key, "gitweb.owner"))
+ config_fn(repo, "owner", value);
+else if (!strcmp(key, "gitweb.description"))
+ config_fn(repo, "desc", value);
+else if (!strcmp(key, "gitweb.category"))
+ config_fn(repo, "section", value);
+else if (!strcmp(key, "gitweb.homepage"))
+ config_fn(repo, "homepage", value);
+else if (skip_prefix(key, "cgit.", &name))
+ config_fn(repo, name, value);
+```
+
+Any `cgit.*` key in the git config is passed directly to the repo config
+handler, allowing per-repo settings without modifying the global cgitrc.
+
+## README File Spec Format
+
+README directives support three forms:
+
+| Format | Meaning |
+|--------|---------|
+| `path` | File on disk, relative to repo path |
+| `/absolute/path` | File on disk, absolute |
+| `ref:path` | File tracked in the git repository at the given ref |
+| `:path` | File tracked in the default branch or query head |
+
+Multiple `readme` directives can be specified. cgit tries each in order and
+uses the first one found (checked via `cgit_ref_path_exists()` for tracked
+files, or `access(R_OK)` for disk files).
+
+## Macro Expansion
+
+The `expand_macros()` function (in `shared.c`) performs environment variable
+substitution in certain directive values (`cache-root`, `scan-path`,
+`project-list`, `include`). A `$VARNAME` or `${VARNAME}` in the value is
+replaced with the corresponding environment variable.
+
+## Example Configuration
+
+```ini
+# Site settings
+root-title=Project Tick Git
+root-desc=Source code for Project Tick
+logo=/cgit/cgit.png
+css=/cgit/cgit.css
+virtual-root=/cgit/
+
+# Features
+enable-commit-graph=1
+enable-blame=1
+enable-http-clone=1
+enable-index-links=1
+snapshots=tar.gz tar.xz zip
+max-stats=quarter
+
+# Caching
+cache-size=1000
+cache-root=/var/cache/cgit
+
+# Filters
+source-filter=exec:/usr/lib/cgit/filters/syntax-highlighting.py
+about-filter=exec:/usr/lib/cgit/filters/about-formatting.sh
+
+# Scanning
+scan-path=/srv/git/
+section-from-path=1
+
+# Or manual repo definitions:
+repo.url=myproject
+repo.path=/srv/git/myproject.git
+repo.desc=My awesome project
+repo.owner=Alice
+repo.readme=master:README.md
+repo.clone-url=https://git.example.com/myproject.git
+repo.snapshots=tar.gz zip
+repo.badge=https://ci.example.com/badge.svg|https://ci.example.com/
+```
diff --git a/docs/handbook/cgit/css-theming.md b/docs/handbook/cgit/css-theming.md
new file mode 100644
index 0000000000..0a7b404595
--- /dev/null
+++ b/docs/handbook/cgit/css-theming.md
@@ -0,0 +1,522 @@
+# cgit — CSS Theming
+
+## Overview
+
+cgit ships with a comprehensive CSS stylesheet (`cgit.css`) that controls
+the visual appearance of all pages. The stylesheet is designed with a light
+color scheme and semantic CSS classes that map directly to cgit's HTML
+structure.
+
+Source file: `cgit.css` (~450 lines).
+
+## Loading Stylesheets
+
+CSS files are specified via the `css` configuration directive:
+
+```ini
+css=/cgit/cgit.css
+```
+
+Multiple stylesheets can be loaded by repeating the directive:
+
+```ini
+css=/cgit/cgit.css
+css=/cgit/custom.css
+```
+
+Stylesheets are included in document order in the `<head>` section via
+`cgit_print_docstart()` in `ui-shared.c`.
+
+## Page Structure
+
+The HTML layout uses this basic structure:
+
+```html
+<body>
+ <div id='cgit'>
+ <table id='header'>...</table> <!-- site header with logo -->
+ <table id='navigation'>...</table> <!-- tab navigation -->
+ <div id='content'> <!-- page content -->
+ <!-- page-specific content -->
+ </div>
+ <div class='footer'>...</div> <!-- footer -->
+ </div>
+</body>
+```
+
+## Base Styles
+
+### Body and Layout
+
+```css
+body {
+ font-family: sans-serif;
+ font-size: 11px;
+ color: #000;
+ background: white;
+ padding: 4px;
+}
+
+div#cgit {
+ padding: 0;
+ margin: 0;
+ font-family: monospace;
+ font-size: 12px;
+}
+```
+
+### Header
+
+```css
+table#header {
+ width: 100%;
+ margin-bottom: 1em;
+}
+
+table#header td.logo {
+ /* logo cell */
+}
+
+table#header td.main {
+ font-size: 250%;
+ font-weight: bold;
+ vertical-align: bottom;
+ padding-left: 10px;
+}
+
+table#header td.sub {
+ color: #999;
+ font-size: 75%;
+ vertical-align: top;
+ padding-left: 10px;
+}
+```
+
+### Navigation Tabs
+
+```css
+table#navigation {
+ width: 100%;
+}
+
+table#navigation a {
+ padding: 2px 6px;
+ color: #000;
+ text-decoration: none;
+}
+
+table#navigation a:hover {
+ color: #00f;
+}
+```
+
+## Content Areas
+
+### Repository List
+
+```css
+table.list {
+ border-collapse: collapse;
+ border: solid 1px #aaa;
+ width: 100%;
+}
+
+table.list th {
+ text-align: left;
+ font-weight: bold;
+ background: #ddd;
+ border-bottom: solid 1px #aaa;
+ padding: 2px 4px;
+}
+
+table.list td {
+ padding: 2px 4px;
+ border: none;
+}
+
+table.list tr:hover {
+ background: #eee;
+}
+
+table.list td a {
+ color: #00f;
+ text-decoration: none;
+}
+
+table.list td a:hover {
+ text-decoration: underline;
+}
+```
+
+### Sections
+
+```css
+div.section-header {
+ background: #eee;
+ border: solid 1px #ddd;
+ padding: 2px 4px;
+ font-weight: bold;
+ margin-top: 1em;
+}
+```
+
+## Diff Styles
+
+### Diffstat
+
+```css
+table.diffstat {
+ border-collapse: collapse;
+ border: solid 1px #aaa;
+}
+
+table.diffstat td {
+ padding: 1px 4px;
+ border: none;
+}
+
+table.diffstat td.mode {
+ font-weight: bold;
+ /* status indicator: A/M/D/R */
+}
+
+table.diffstat td.graph {
+ width: 500px;
+}
+
+table.diffstat td.graph span.add {
+ background: #5f5;
+ /* green bar for additions */
+}
+
+table.diffstat td.graph span.rem {
+ background: #f55;
+ /* red bar for deletions */
+}
+
+table.diffstat .total {
+ font-weight: bold;
+ text-align: center;
+}
+```
+
+### Unified Diff
+
+```css
+table.diff {
+ width: 100%;
+}
+
+table.diff td div.head {
+ font-weight: bold;
+ margin-top: 1em;
+ color: #000;
+}
+
+table.diff td div.hunk {
+ color: #009;
+ /* hunk header @@ ... @@ */
+}
+
+table.diff td div.add {
+ color: green;
+ background: #dfd;
+}
+
+table.diff td div.del {
+ color: red;
+ background: #fdd;
+}
+```
+
+### Side-by-Side Diff
+
+```css
+table.ssdiff {
+ width: 100%;
+}
+
+table.ssdiff td {
+ font-family: monospace;
+ font-size: 12px;
+ padding: 1px 4px;
+ vertical-align: top;
+}
+
+table.ssdiff td.lineno {
+ text-align: right;
+ width: 3em;
+ background: #eee;
+ color: #999;
+}
+
+table.ssdiff td.add {
+ background: #dfd;
+}
+
+table.ssdiff td.del {
+ background: #fdd;
+}
+
+table.ssdiff td.changed {
+ background: #ffc;
+}
+
+table.ssdiff span.add {
+ background: #afa;
+ font-weight: bold;
+}
+
+table.ssdiff span.del {
+ background: #faa;
+ font-weight: bold;
+}
+```
+
+## Blob/Tree View
+
+```css
+table.blob {
+ border-collapse: collapse;
+ width: 100%;
+}
+
+table.blob td {
+ font-family: monospace;
+ font-size: 12px;
+ padding: 0 4px;
+ vertical-align: top;
+}
+
+table.blob td.linenumbers {
+ text-align: right;
+ color: #999;
+ background: #eee;
+ width: 3em;
+ border-right: solid 1px #ddd;
+}
+
+table.blob td.lines {
+ white-space: pre;
+}
+```
+
+### Tree Listing
+
+```css
+table.list td.ls-mode {
+ font-family: monospace;
+ width: 10em;
+}
+
+table.list td.ls-size {
+ text-align: right;
+ width: 5em;
+}
+```
+
+## Commit View
+
+```css
+table.commit-info {
+ border-collapse: collapse;
+ border: solid 1px #aaa;
+ margin-bottom: 1em;
+}
+
+table.commit-info th {
+ text-align: left;
+ font-weight: bold;
+ padding: 2px 6px;
+ vertical-align: top;
+}
+
+table.commit-info td {
+ padding: 2px 6px;
+}
+
+div.commit-subject {
+ font-weight: bold;
+ font-size: 125%;
+ margin: 1em 0 0.5em;
+}
+
+div.commit-msg {
+ white-space: pre;
+ font-family: monospace;
+}
+
+div.notes-header {
+ font-weight: bold;
+ padding-top: 1em;
+}
+
+div.notes {
+ white-space: pre;
+ font-family: monospace;
+ border-left: solid 3px #dd5;
+ padding: 0.5em;
+ background: #ffe;
+}
+```
+
+## Log View
+
+```css
+div.commit-graph {
+ font-family: monospace;
+ white-space: pre;
+ color: #000;
+}
+
+/* Column colors for commit graph */
+.column1 { color: #a00; }
+.column2 { color: #0a0; }
+.column3 { color: #00a; }
+.column4 { color: #aa0; }
+.column5 { color: #0aa; }
+.column6 { color: #a0a; }
+```
+
+## Stats View
+
+```css
+table.stats {
+ border-collapse: collapse;
+ border: solid 1px #aaa;
+}
+
+table.stats th {
+ text-align: left;
+ padding: 2px 6px;
+ background: #ddd;
+}
+
+table.stats td {
+ padding: 2px 6px;
+}
+
+div.stats-graph {
+ /* bar chart container */
+}
+```
+
+## Form Elements
+
+```css
+div.cgit-panel {
+ float: right;
+ margin: 0 0 0.5em 0.5em;
+ padding: 4px;
+ border: solid 1px #aaa;
+ background: #eee;
+}
+
+div.cgit-panel b {
+ display: block;
+ margin-bottom: 2px;
+}
+
+div.cgit-panel select,
+div.cgit-panel input {
+ font-size: 11px;
+}
+```
+
+## Customization Strategies
+
+### Method 1: Override Stylesheet
+
+Create a custom CSS file that overrides specific rules:
+
+```css
+/* /cgit/custom.css */
+body {
+ background: #1a1a2e;
+ color: #e0e0e0;
+}
+
+div#cgit {
+ background: #16213e;
+}
+
+table.list th {
+ background: #0f3460;
+ color: #e0e0e0;
+}
+```
+
+```ini
+css=/cgit/cgit.css
+css=/cgit/custom.css
+```
+
+### Method 2: Replace Stylesheet
+
+Replace the default stylesheet entirely:
+
+```ini
+css=/cgit/mytheme.css
+```
+
+### Method 3: head-include
+
+Inject inline styles via the `head-include` directive:
+
+```ini
+head-include=/etc/cgit/extra-head.html
+```
+
+```html
+<!-- /etc/cgit/extra-head.html -->
+<style>
+ body { background: #f0f0f0; }
+</style>
+```
+
+## CSS Classes Reference
+
+### Layout Classes
+
+| Class/ID | Element | Description |
+|----------|---------|-------------|
+| `#cgit` | div | Main container |
+| `#header` | table | Site header |
+| `#navigation` | table | Tab navigation |
+| `#content` | div | Page content area |
+| `.footer` | div | Page footer |
+
+### Content Classes
+
+| Class | Element | Description |
+|-------|---------|-------------|
+| `.list` | table | Data listing (repos, files, refs) |
+| `.blob` | table | File content display |
+| `.diff` | table | Unified diff |
+| `.ssdiff` | table | Side-by-side diff |
+| `.diffstat` | table | Diff statistics |
+| `.commit-info` | table | Commit metadata |
+| `.stats` | table | Statistics data |
+| `.cgit-panel` | div | Control panel |
+
+### Diff Classes
+
+| Class | Element | Description |
+|-------|---------|-------------|
+| `.add` | div/span | Added lines/chars |
+| `.del` | div/span | Deleted lines/chars |
+| `.hunk` | div | Hunk header |
+| `.ctx` | div | Context lines |
+| `.head` | div | File header |
+| `.changed` | td | Modified line (ssdiff) |
+| `.lineno` | td | Line number column |
+
+### Status Classes
+
+| Class | Description |
+|-------|-------------|
+| `.upd` | Modified file |
+| `.add` | Added file |
+| `.del` | Deleted file |
+| `.mode` | File mode indicator |
+| `.graph` | Graph bar container |
diff --git a/docs/handbook/cgit/deployment.md b/docs/handbook/cgit/deployment.md
new file mode 100644
index 0000000000..8c991726af
--- /dev/null
+++ b/docs/handbook/cgit/deployment.md
@@ -0,0 +1,369 @@
+# cgit — Deployment Guide
+
+## Overview
+
+cgit runs as a CGI application under a web server. This guide covers
+compilation, installation, web server configuration, and production tuning.
+
+## Prerequisites
+
+Build dependencies:
+- GCC or Clang (C99 compiler)
+- GNU Make
+- OpenSSL or compatible TLS library (for libgit HTTPS)
+- zlib (for git object decompression)
+- Optional: Lua or LuaJIT (for Lua filters)
+- Optional: pkg-config (for Lua detection)
+
+Runtime dependencies:
+- A CGI-capable web server (Apache, Nginx+fcgiwrap, lighttpd)
+- Git repositories on the filesystem
+
+## Building
+
+```bash
+# Clone/download the source
+cd cgit/
+
+# Build with defaults
+make
+
+# Or with custom settings
+make prefix=/usr CGIT_SCRIPT_PATH=/var/www/cgi-bin \
+ CGIT_CONFIG=/etc/cgitrc CACHE_ROOT=/var/cache/cgit
+
+# Install
+make install
+```
+
+### Build Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `prefix` | `/usr/local` | Installation prefix |
+| `CGIT_SCRIPT_PATH` | `$(prefix)/lib/cgit` | CGI binary directory |
+| `CGIT_DATA_PATH` | `$(prefix)/share/cgit` | Static files (CSS, images) |
+| `CGIT_CONFIG` | `/etc/cgitrc` | Default config file path |
+| `CACHE_ROOT` | `/var/cache/cgit` | Default cache directory |
+| `CGIT_SCRIPT_NAME` | `"/"` | Default CGI script name |
+| `NO_LUA` | (unset) | Set to 1 to disable Lua |
+
+### Installed Files
+
+```
+$(CGIT_SCRIPT_PATH)/cgit.cgi # CGI binary
+$(CGIT_DATA_PATH)/cgit.css # Stylesheet
+$(CGIT_DATA_PATH)/cgit.js # JavaScript
+$(CGIT_DATA_PATH)/cgit.png # Logo image
+$(CGIT_DATA_PATH)/robots.txt # Robots exclusion file
+```
+
+## Apache Configuration
+
+### CGI Module
+
+```apache
+# Enable CGI
+LoadModule cgi_module modules/mod_cgi.so
+
+# Basic CGI setup
+ScriptAlias /cgit/ /usr/lib/cgit/cgit.cgi/
+Alias /cgit-data/ /usr/share/cgit/
+
+<Directory "/usr/lib/cgit/">
+ AllowOverride None
+ Options +ExecCGI
+ Require all granted
+</Directory>
+
+<Directory "/usr/share/cgit/">
+ AllowOverride None
+ Require all granted
+</Directory>
+```
+
+### URL Rewriting (Clean URLs)
+
+```apache
+# Enable clean URLs via mod_rewrite
+RewriteEngine On
+RewriteRule ^/cgit/(.*)$ /usr/lib/cgit/cgit.cgi/$1 [PT]
+```
+
+With corresponding cgitrc:
+
+```ini
+virtual-root=/cgit/
+css=/cgit-data/cgit.css
+logo=/cgit-data/cgit.png
+```
+
+## Nginx Configuration
+
+Nginx does not support CGI natively. Use `fcgiwrap` or `spawn-fcgi`:
+
+### With fcgiwrap
+
+```bash
+# Install fcgiwrap
+# Start it (systemd, OpenRC, or manual)
+fcgiwrap -s unix:/run/fcgiwrap.sock &
+```
+
+```nginx
+server {
+ listen 80;
+ server_name git.example.com;
+
+ root /usr/share/cgit;
+
+ # Serve static files directly
+ location /cgit-data/ {
+ alias /usr/share/cgit/;
+ }
+
+ # Pass CGI requests to fcgiwrap
+ location /cgit {
+ include fastcgi_params;
+ fastcgi_param SCRIPT_FILENAME /usr/lib/cgit/cgit.cgi;
+ fastcgi_param PATH_INFO $uri;
+ fastcgi_param QUERY_STRING $args;
+ fastcgi_param HTTP_HOST $server_name;
+ fastcgi_pass unix:/run/fcgiwrap.sock;
+ }
+}
+```
+
+### With spawn-fcgi
+
+```bash
+spawn-fcgi -s /run/cgit.sock -n -- /usr/bin/fcgiwrap
+```
+
+## lighttpd Configuration
+
+```lighttpd
+server.modules += ("mod_cgi", "mod_alias", "mod_rewrite")
+
+alias.url = (
+ "/cgit-data/" => "/usr/share/cgit/",
+ "/cgit/" => "/usr/lib/cgit/cgit.cgi"
+)
+
+cgi.assign = (
+ "cgit.cgi" => ""
+)
+
+url.rewrite-once = (
+ "^/cgit/(.*)$" => "/cgit/cgit.cgi/$1"
+)
+```
+
+## Configuration File
+
+Create `/etc/cgitrc`:
+
+```ini
+# Site identity
+root-title=My Git Server
+root-desc=Git repository browser
+css=/cgit-data/cgit.css
+logo=/cgit-data/cgit.png
+favicon=/cgit-data/favicon.ico
+
+# URL routing
+virtual-root=/cgit/
+
+# Features
+enable-commit-graph=1
+enable-blame=1
+enable-http-clone=1
+enable-index-links=1
+snapshots=tar.gz tar.xz zip
+max-stats=quarter
+
+# Caching (recommended for production)
+cache-size=1000
+cache-root=/var/cache/cgit
+cache-root-ttl=5
+cache-repo-ttl=5
+cache-static-ttl=-1
+
+# Repository discovery
+scan-path=/srv/git/
+section-from-path=1
+enable-git-config=1
+
+# Filters
+source-filter=exec:/usr/lib/cgit/filters/syntax-highlighting.py
+about-filter=exec:/usr/lib/cgit/filters/about-formatting.sh
+```
+
+## Cache Directory Setup
+
+```bash
+# Create cache directory
+mkdir -p /var/cache/cgit
+
+# Set ownership to web server user
+chown www-data:www-data /var/cache/cgit
+chmod 700 /var/cache/cgit
+
+# Optional: periodic cleanup cron job
+echo "*/30 * * * * find /var/cache/cgit -type f -mmin +60 -delete" | \
+ crontab -u www-data -
+```
+
+## Repository Permissions
+
+The web server user needs read access to all git repositories:
+
+```bash
+# Option 1: Add web server user to git group
+usermod -aG git www-data
+
+# Option 2: Set directory permissions
+chmod -R g+rX /srv/git/
+
+# Option 3: Use ACLs
+setfacl -R -m u:www-data:rX /srv/git/
+setfacl -R -d -m u:www-data:rX /srv/git/
+```
+
+## HTTPS Setup
+
+For production, serve cgit over HTTPS:
+
+```nginx
+server {
+ listen 443 ssl;
+ server_name git.example.com;
+
+ ssl_certificate /etc/ssl/certs/git.example.com.pem;
+ ssl_certificate_key /etc/ssl/private/git.example.com.key;
+
+ # ... cgit configuration ...
+}
+
+server {
+ listen 80;
+ server_name git.example.com;
+ return 301 https://$server_name$request_uri;
+}
+```
+
+## Performance Tuning
+
+### Enable Caching
+
+The response cache is essential for performance:
+
+```ini
+cache-size=1000 # number of cache entries
+cache-root-ttl=5 # repo list: 5 minutes
+cache-repo-ttl=5 # repo pages: 5 minutes
+cache-static-ttl=-1 # static content: forever
+cache-about-ttl=15 # about pages: 15 minutes
+```
+
+### Limit Resource Usage
+
+```ini
+max-repo-count=100 # repos per page
+max-commit-count=50 # commits per page
+max-blob-size=512 # max blob display (KB)
+max-message-length=120 # truncate long subjects
+max-repodesc-length=80 # truncate descriptions
+```
+
+### Use Lua Filters
+
+Lua filters avoid fork/exec overhead:
+
+```ini
+source-filter=lua:/usr/share/cgit/filters/syntax-highlight.lua
+email-filter=lua:/usr/share/cgit/filters/email-libravatar.lua
+```
+
+### Optimize Git Access
+
+```bash
+# Run periodic git gc on repositories
+for repo in /srv/git/*.git; do
+ git -C "$repo" gc --auto
+done
+
+# Ensure pack files are optimized
+for repo in /srv/git/*.git; do
+ git -C "$repo" repack -a -d
+done
+```
+
+## Monitoring
+
+### Check Cache Status
+
+```bash
+# Count cache entries
+ls /var/cache/cgit/ | wc -l
+
+# Check cache hit rate (if access logs are enabled)
+grep "cgit.cgi" /var/log/nginx/access.log | tail -100
+```
+
+### Health Check
+
+```bash
+# Verify cgit is responding
+curl -s -o /dev/null -w "%{http_code}" http://localhost/cgit/
+```
+
+## Docker Deployment
+
+```dockerfile
+FROM alpine:latest
+
+RUN apk add --no-cache \
+ git make gcc musl-dev openssl-dev zlib-dev lua5.3-dev \
+ fcgiwrap nginx
+
+COPY cgit/ /build/cgit/
+WORKDIR /build/cgit
+RUN make && make install
+
+COPY cgitrc /etc/cgitrc
+COPY nginx.conf /etc/nginx/conf.d/cgit.conf
+
+EXPOSE 80
+CMD ["sh", "-c", "fcgiwrap -s unix:/run/fcgiwrap.sock & nginx -g 'daemon off;'"]
+```
+
+## systemd Service
+
+```ini
+# /etc/systemd/system/fcgiwrap-cgit.service
+[Unit]
+Description=fcgiwrap for cgit
+After=network.target
+
+[Service]
+ExecStart=/usr/bin/fcgiwrap -s unix:/run/fcgiwrap.sock
+User=www-data
+Group=www-data
+
+[Install]
+WantedBy=multi-user.target
+```
+
+## Troubleshooting
+
+| Symptom | Cause | Solution |
+|---------|-------|----------|
+| 500 Internal Server Error | CGI binary not executable | `chmod +x cgit.cgi` |
+| Blank page | Missing CSS path | Check `css=` directive |
+| No repositories shown | Wrong `scan-path` | Verify path and permissions |
+| Cache errors | Permission denied | Fix cache dir ownership |
+| Lua filter fails | Lua not compiled in | Rebuild without `NO_LUA` |
+| Clone fails | `enable-http-clone=0` | Set to `1` |
+| Missing styles | Static file alias wrong | Check web server alias config |
+| Timeout on large repos | No caching | Enable `cache-size` |
diff --git a/docs/handbook/cgit/diff-engine.md b/docs/handbook/cgit/diff-engine.md
new file mode 100644
index 0000000000..c82092842c
--- /dev/null
+++ b/docs/handbook/cgit/diff-engine.md
@@ -0,0 +1,352 @@
+# cgit — Diff Engine
+
+## Overview
+
+cgit's diff engine renders differences between commits, trees, and blobs.
+It supports three diff modes: unified, side-by-side, and stat-only. The
+engine leverages libgit's internal diff machinery and adds HTML rendering on
+top.
+
+Source files: `ui-diff.c`, `ui-diff.h`, `ui-ssdiff.c`, `ui-ssdiff.h`,
+`shared.c` (diff helpers).
+
+## Diff Types
+
+```c
+#define DIFF_UNIFIED 0 /* traditional unified diff */
+#define DIFF_SSDIFF 1 /* side-by-side diff */
+#define DIFF_STATONLY 2 /* only show diffstat */
+```
+
+The diff type is selected by the `ss` query parameter or the
+`side-by-side-diffs` configuration directive.
+
+## Diffstat
+
+### File Info Structure
+
+```c
+struct fileinfo {
+ char status; /* 'A'dd, 'D'elete, 'M'odify, 'R'ename, etc. */
+ unsigned long old_size;
+ unsigned long new_size;
+ int binary;
+ struct object_id old_oid; /* old blob SHA */
+ struct object_id new_oid; /* new blob SHA */
+ unsigned short old_mode;
+ unsigned short new_mode;
+ char *old_path;
+ char *new_path;
+ int added; /* lines added */
+ int removed; /* lines removed */
+};
+```
+
+### Collecting File Changes: `inspect_filepair()`
+
+For each changed file in a commit, `inspect_filepair()` records the change
+information:
+
+```c
+static void inspect_filepair(struct diff_filepair *pair)
+{
+ /* populate a fileinfo entry from the diff_filepair */
+ files++;
+ switch (pair->status) {
+ case DIFF_STATUS_ADDED:
+ info->status = 'A';
+ break;
+ case DIFF_STATUS_DELETED:
+ info->status = 'D';
+ break;
+ case DIFF_STATUS_MODIFIED:
+ info->status = 'M';
+ break;
+ case DIFF_STATUS_RENAMED:
+ info->status = 'R';
+ /* old_path and new_path differ */
+ break;
+ case DIFF_STATUS_COPIED:
+ info->status = 'C';
+ break;
+ /* ... */
+ }
+}
+```
+
+### Rendering Diffstat: `cgit_print_diffstat()`
+
+```c
+void cgit_print_diffstat(const struct object_id *old,
+ const struct object_id *new,
+ const char *prefix)
+```
+
+Renders an HTML table showing changed files with bar graphs:
+
+```html
+<table summary='diffstat' class='diffstat'>
+ <tr>
+ <td class='mode'>M</td>
+ <td class='upd'><a href='...'>src/main.c</a></td>
+ <td class='right'>42</td>
+ <td class='graph'>
+ <span class='add' style='width: 70%'></span>
+ <span class='rem' style='width: 30%'></span>
+ </td>
+ </tr>
+ ...
+ <tr class='total'>
+ <td colspan='3'>5 files changed, 120 insertions, 45 deletions</td>
+ </tr>
+</table>
+```
+
+The bar graph width is calculated proportionally to the maximum changed
+lines across all files.
+
+## Unified Diff
+
+### `cgit_print_diff()`
+
+The main diff rendering function:
+
+```c
+void cgit_print_diff(const char *new_rev, const char *old_rev,
+ const char *prefix, int show_ctrls, int raw)
+```
+
+Parameters:
+- `new_rev` — New commit SHA
+- `old_rev` — Old commit SHA (optional; defaults to parent)
+- `prefix` — Path prefix filter (show only diffs under this path)
+- `show_ctrls` — Show diff controls (diff type toggle buttons)
+- `raw` — Output raw diff without HTML wrapping
+
+### Diff Controls
+
+When `show_ctrls=1`, diff mode toggle buttons are rendered:
+
+```html
+<div class='cgit-panel'>
+ <b>Diff options</b>
+ <form method='get' action='...'>
+ <select name='dt'>
+ <option value='0'>unified</option>
+ <option value='1'>ssdiff</option>
+ <option value='2'>stat only</option>
+ </select>
+ <input type='submit' value='Go'/>
+ </form>
+</div>
+```
+
+### Filepair Callback: `filepair_cb()`
+
+For each changed file, `filepair_cb()` renders the diff:
+
+```c
+static void filepair_cb(struct diff_filepair *pair)
+{
+ /* emit file header */
+ htmlf("<div class='head'>%s</div>", pair->one->path);
+ /* set up diff options */
+ xdiff_opts.ctxlen = ctx.qry.context ?: 3;
+ /* run the diff and emit line-by-line output */
+ /* each line gets a CSS class: .add, .del, or .ctx */
+}
+```
+
+### Hunk Headers
+
+```c
+void cgit_print_diff_hunk_header(int oldofs, int oldcnt,
+ int newofs, int newcnt,
+ const char *func)
+```
+
+Renders hunk headers as:
+
+```html
+<div class='hunk'>@@ -oldofs,oldcnt +newofs,newcnt @@ func</div>
+```
+
+### Line Rendering
+
+Each diff line is rendered with a status prefix and CSS class:
+
+| Line Type | CSS Class | Prefix |
+|-----------|----------|--------|
+| Added | `.add` | `+` |
+| Removed | `.del` | `-` |
+| Context | `.ctx` | ` ` |
+| Hunk header | `.hunk` | `@@` |
+
+## Side-by-Side Diff (`ui-ssdiff.c`)
+
+The side-by-side diff view renders old and new versions in adjacent columns.
+
+### LCS Algorithm
+
+`ui-ssdiff.c` implements a Longest Common Subsequence (LCS) algorithm to
+align lines between old and new versions:
+
+```c
+/* LCS computation for line alignment */
+static int *lcs(char *a, int an, char *b, int bn)
+{
+ int *prev, *curr;
+ /* dynamic programming: build LCS table */
+ prev = calloc(bn + 1, sizeof(int));
+ curr = calloc(bn + 1, sizeof(int));
+ for (int i = 1; i <= an; i++) {
+ for (int j = 1; j <= bn; j++) {
+ if (a[i-1] == b[j-1])
+ curr[j] = prev[j-1] + 1;
+ else
+ curr[j] = MAX(prev[j], curr[j-1]);
+ }
+ SWAP(prev, curr);
+ }
+ return prev;
+}
+```
+
+### Deferred Lines
+
+Side-by-side rendering uses a deferred output model:
+
+```c
+struct deferred_lines {
+ int line_no;
+ char *line;
+ struct deferred_lines *next;
+};
+```
+
+Lines are collected and paired before output. For modified lines, the LCS
+algorithm identifies character-level changes and highlights them with
+`<span class='add'>` or `<span class='del'>` within each line.
+
+### Tab Expansion
+
+```c
+static char *replace_tabs(char *line)
+```
+
+Tabs are expanded to spaces for proper column alignment in side-by-side
+view. The tab width is 8 characters.
+
+### Rendering
+
+Side-by-side output uses a two-column `<table>`:
+
+```html
+<table class='ssdiff'>
+ <tr>
+ <td class='lineno'><a>42</a></td>
+ <td class='del'>old line content</td>
+ <td class='lineno'><a>42</a></td>
+ <td class='add'>new line content</td>
+ </tr>
+</table>
+```
+
+Changed characters within a line are highlighted with inline spans.
+
+## Low-Level Diff Helpers (`shared.c`)
+
+### Tree Diff
+
+```c
+void cgit_diff_tree(const struct object_id *old_oid,
+ const struct object_id *new_oid,
+ filepair_fn fn, const char *prefix,
+ int renamelimit)
+```
+
+Computes the diff between two tree objects (typically from two commits).
+Calls `fn` for each changed file pair. `renamelimit` controls rename
+detection threshold.
+
+### Commit Diff
+
+```c
+void cgit_diff_commit(struct commit *commit, filepair_fn fn,
+ const char *prefix)
+```
+
+Diffs a commit against its first parent. For root commits (no parent),
+diffs against an empty tree.
+
+### File Diff
+
+```c
+void cgit_diff_files(const struct object_id *old_oid,
+ const struct object_id *new_oid,
+ unsigned long *old_size,
+ unsigned long *new_size,
+ int *binary, int context,
+ int ignorews, linediff_fn fn)
+```
+
+Performs a line-level diff between two blobs. The `linediff_fn` callback is
+invoked for each output line (add/remove/context).
+
+## Diff in Context: Commit View
+
+`ui-commit.c` uses the diff engine to show changes in commit view:
+
+```c
+void cgit_print_commit(const char *rev, const char *prefix)
+{
+ /* ... commit metadata ... */
+ cgit_print_diff(ctx.qry.sha1, info->parent_sha1, prefix, 0, 0);
+}
+```
+
+## Diff in Context: Log View
+
+`ui-log.c` can optionally show per-commit diffstats:
+
+```c
+if (ctx.cfg.enable_log_filecount) {
+ cgit_diff_commit(commit, inspect_filepair, NULL);
+ /* display changed files count, added/removed */
+}
+```
+
+## Binary Detection
+
+Files are marked as binary when diffing if the content contains null bytes
+or exceeds the configured max-blob-size. Binary files are shown as:
+
+```
+Binary files differ
+```
+
+No line-level diff is performed for binary content.
+
+## Diff Configuration
+
+| Directive | Default | Effect |
+|-----------|---------|--------|
+| `side-by-side-diffs` | 0 | Default diff type |
+| `renamelimit` | -1 | Rename detection limit |
+| `max-blob-size` | 0 | Max blob size for display |
+| `enable-log-filecount` | 0 | Show file counts in log |
+| `enable-log-linecount` | 0 | Show line counts in log |
+
+## Raw Diff Output
+
+The `rawdiff` command outputs a plain-text unified diff without HTML
+wrapping, suitable for piping or downloading:
+
+```c
+static void cmd_rawdiff(struct cgit_context *ctx)
+{
+ ctx->page.mimetype = "text/plain";
+ cgit_print_diff(ctx->qry.sha1, ctx->qry.sha2,
+ ctx->qry.path, 0, 1 /* raw */);
+}
+```
diff --git a/docs/handbook/cgit/filter-system.md b/docs/handbook/cgit/filter-system.md
new file mode 100644
index 0000000000..be6f94e4b7
--- /dev/null
+++ b/docs/handbook/cgit/filter-system.md
@@ -0,0 +1,358 @@
+# cgit — Filter System
+
+## Overview
+
+cgit provides a pluggable content filtering pipeline that transforms text
+before it is rendered in HTML output. Filters are used for tasks such as
+syntax highlighting, README rendering, email obfuscation, and authentication.
+
+Source file: `filter.c`.
+
+## Filter Types
+
+Six filter types are defined, each identified by a constant and linked to an
+entry in the `filter_specs[]` table:
+
+```c
+#define ABOUT_FILTER 0 /* README/about page rendering */
+#define COMMIT_FILTER 1 /* commit message formatting */
+#define SOURCE_FILTER 2 /* source code syntax highlighting */
+#define EMAIL_FILTER 3 /* email address display */
+#define AUTH_FILTER 4 /* authentication/authorization */
+#define OWNER_FILTER 5 /* owner field display */
+```
+
+### Filter Specs Table
+
+```c
+static struct {
+ char *prefix;
+ int args;
+} filter_specs[] = {
+ [ABOUT_FILTER] = { "about", 1 },
+ [COMMIT_FILTER] = { "commit", 0 },
+ [SOURCE_FILTER] = { "source", 1 },
+ [EMAIL_FILTER] = { "email", 2 }, /* email, page */
+ [AUTH_FILTER] = { "auth", 12 },
+ [OWNER_FILTER] = { "owner", 0 },
+};
+```
+
+The `args` field specifies the number of *extra* arguments the filter
+receives (beyond the filter command itself).
+
+## Filter Structure
+
+```c
+struct cgit_filter {
+ char *cmd; /* command or script path */
+ int type; /* filter type constant */
+ int (*open)(struct cgit_filter *, ...); /* start filter */
+ int (*close)(struct cgit_filter *); /* finish filter */
+ void (*fprintf)(struct cgit_filter *, FILE *, const char *fmt, ...);
+ void (*cleanup)(struct cgit_filter *); /* free resources */
+ int argument_count; /* from filter_specs */
+};
+```
+
+Two implementations exist:
+
+| Implementation | Struct | Description |
+|---------------|--------|-------------|
+| Exec filter | `struct cgit_exec_filter` | Fork/exec an external process |
+| Lua filter | `struct cgit_lua_filter` | Execute a Lua script in-process |
+
+## Exec Filters
+
+Exec filters fork a child process and redirect `stdout` through a pipe. All
+data written to `stdout` while the filter is open passes through the child
+process, which can transform it before output.
+
+### Structure
+
+```c
+struct cgit_exec_filter {
+ struct cgit_filter base;
+ char *cmd;
+ char **argv;
+ int old_stdout; /* saved fd for restoring stdout */
+ int pipe_fh[2]; /* pipe: [read, write] */
+ pid_t pid; /* child process id */
+};
+```
+
+### Open Phase
+
+```c
+static int open_exec_filter(struct cgit_filter *base, ...)
+{
+ struct cgit_exec_filter *f = (struct cgit_exec_filter *)base;
+ /* create pipe */
+ pipe(f->pipe_fh);
+ /* save stdout */
+ f->old_stdout = dup(STDOUT_FILENO);
+ /* fork */
+ f->pid = fork();
+ if (f->pid == 0) {
+ /* child: redirect stdin from pipe read end */
+ dup2(f->pipe_fh[0], STDIN_FILENO);
+ close(f->pipe_fh[0]);
+ close(f->pipe_fh[1]);
+ /* exec the filter command with extra args from va_list */
+ execvp(f->cmd, f->argv);
+ /* on failure: */
+ exit(1);
+ }
+ /* parent: redirect stdout to pipe write end */
+ dup2(f->pipe_fh[1], STDOUT_FILENO);
+ close(f->pipe_fh[0]);
+ close(f->pipe_fh[1]);
+ return 0;
+}
+```
+
+### Close Phase
+
+```c
+static int close_exec_filter(struct cgit_filter *base)
+{
+ struct cgit_exec_filter *f = (struct cgit_exec_filter *)base;
+ int status;
+ fflush(stdout);
+ /* restore original stdout */
+ dup2(f->old_stdout, STDOUT_FILENO);
+ close(f->old_stdout);
+ /* wait for child */
+ waitpid(f->pid, &status, 0);
+ /* return child exit status */
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+ return -1;
+}
+```
+
+### Argument Passing
+
+Extra arguments (from `filter_specs[].args`) are passed via `va_list` in the
+open function and become `argv` entries for the child process:
+
+| Filter Type | argv[0] | argv[1] | argv[2] | ... |
+|-------------|---------|---------|---------|-----|
+| ABOUT | cmd | filename | — | — |
+| SOURCE | cmd | filename | — | — |
+| COMMIT | cmd | — | — | — |
+| OWNER | cmd | — | — | — |
+| EMAIL | cmd | email | page | — |
+| AUTH | cmd | (12 args: method, mimetype, http_host, https, authenticated, username, http_cookie, request_method, query_string, referer, path, http_accept) |
+
+## Lua Filters
+
+When cgit is compiled with Lua support, filters can be Lua scripts executed
+in-process without fork/exec overhead.
+
+### Structure
+
+```c
+struct cgit_lua_filter {
+ struct cgit_filter base;
+ char *script_file;
+ lua_State *lua_state;
+};
+```
+
+### Lua API
+
+The Lua script must define a `filter_open()` and `filter_close()` function.
+Data is passed to the Lua script through a custom `write()` function
+registered in the Lua environment.
+
+```lua
+-- Example source filter
+function filter_open(filename)
+ -- Called when the filter opens
+ -- filename is the file being processed
+end
+
+function write(str)
+ -- Called with chunks of content to filter
+ -- Write transformed output
+ html(str)
+end
+
+function filter_close()
+ -- Called when filtering is complete
+ return 0 -- return exit code
+end
+```
+
+### Lua C Bindings
+
+cgit registers several C functions into the Lua environment:
+
+```c
+lua_pushcfunction(lua_state, lua_html); /* html() */
+lua_pushcfunction(lua_state, lua_html_txt); /* html_txt() */
+lua_pushcfunction(lua_state, lua_html_attr); /* html_attr() */
+lua_pushcfunction(lua_state, lua_html_url_path); /* html_url_path() */
+lua_pushcfunction(lua_state, lua_html_url_arg); /* html_url_arg() */
+lua_pushcfunction(lua_state, lua_html_include); /* include() */
+```
+
+These correspond to the C functions in `html.c` and allow the Lua script to
+produce properly escaped HTML output.
+
+### Lua Filter Open
+
+```c
+static int open_lua_filter(struct cgit_filter *base, ...)
+{
+ struct cgit_lua_filter *f = (struct cgit_lua_filter *)base;
+ /* Load and execute the Lua script if not already loaded */
+ if (!f->lua_state) {
+ f->lua_state = luaL_newstate();
+ luaL_openlibs(f->lua_state);
+ /* register C bindings */
+ /* load script file */
+ }
+ /* redirect write() calls to the Lua state */
+ /* call filter_open() in the Lua script, passing extra args */
+ return 0;
+}
+```
+
+### Lua Filter Close
+
+```c
+static int close_lua_filter(struct cgit_filter *base)
+{
+ struct cgit_lua_filter *f = (struct cgit_lua_filter *)base;
+ /* call filter_close() in the Lua script */
+ /* return the script's exit code */
+ return lua_tointeger(f->lua_state, -1);
+}
+```
+
+## Filter Construction
+
+`cgit_new_filter()` creates a new filter instance:
+
+```c
+struct cgit_filter *cgit_new_filter(const char *cmd, filter_type type)
+{
+ if (!cmd || !*cmd)
+ return NULL;
+
+ if (!prefixcmp(cmd, "lua:")) {
+ /* create Lua filter */
+ return new_lua_filter(cmd + 4, type);
+ }
+ if (!prefixcmp(cmd, "exec:")) {
+ /* create exec filter, stripping prefix */
+ return new_exec_filter(cmd + 5, type);
+ }
+ /* default: treat as exec filter */
+ return new_exec_filter(cmd, type);
+}
+```
+
+Prefix rules:
+- `lua:/path/to/script.lua` → Lua filter
+- `exec:/path/to/script` → exec filter
+- `/path/to/script` (no prefix) → exec filter (backward compatibility)
+
+## Filter Usage Points
+
+### About Filter (`ABOUT_FILTER`)
+
+Applied when rendering README and about pages. Called from `ui-summary.c`
+and the about view:
+
+```c
+cgit_open_filter(ctx.repo->about_filter, filename);
+/* write README content */
+cgit_close_filter(ctx.repo->about_filter);
+```
+
+Common use: converting Markdown to HTML.
+
+### Source Filter (`SOURCE_FILTER`)
+
+Applied when displaying file contents in blob/tree views. Called from
+`ui-tree.c`:
+
+```c
+cgit_open_filter(ctx.repo->source_filter, filename);
+/* write file content */
+cgit_close_filter(ctx.repo->source_filter);
+```
+
+Common use: syntax highlighting.
+
+### Commit Filter (`COMMIT_FILTER`)
+
+Applied to commit messages in log and commit views. Called from `ui-log.c`
+and `ui-commit.c`:
+
+```c
+cgit_open_filter(ctx.repo->commit_filter);
+html_txt(info->msg);
+cgit_close_filter(ctx.repo->commit_filter);
+```
+
+Common use: linkifying issue references.
+
+### Email Filter (`EMAIL_FILTER`)
+
+Applied to author/committer email addresses. Receives the email address and
+current page name as arguments:
+
+```c
+cgit_open_filter(ctx.repo->email_filter, email, page);
+html_txt(email);
+cgit_close_filter(ctx.repo->email_filter);
+```
+
+Common use: gravatar integration, email obfuscation.
+
+### Auth Filter (`AUTH_FILTER`)
+
+Used for cookie-based authentication. Receives 12 arguments covering the
+full HTTP request context. See `authentication.md` for details.
+
+### Owner Filter (`OWNER_FILTER`)
+
+Applied when displaying the repository owner.
+
+## Shipped Filter Scripts
+
+cgit ships with filter scripts in the `filters/` directory:
+
+| Script | Type | Description |
+|--------|------|-------------|
+| `syntax-highlighting.py` | SOURCE | Python-based syntax highlighter using Pygments |
+| `syntax-highlighting.sh` | SOURCE | Shell-based highlighter (highlight command) |
+| `about-formatting.sh` | ABOUT | Renders markdown via `markdown` or `rst2html` |
+| `html-converters/md2html` | ABOUT | Standalone markdown-to-HTML converter |
+| `html-converters/rst2html` | ABOUT | reStructuredText-to-HTML converter |
+| `html-converters/txt2html` | ABOUT | Plain text to HTML converter |
+| `email-gravatar.py` | EMAIL | Adds gravatar avatars |
+| `email-libravatar.lua` | EMAIL | Lua-based libravatar integration |
+| `simple-hierarchical-auth.lua` | AUTH | Lua path-based authentication |
+
+## Error Handling
+
+If an exec filter's child process exits with a non-zero status, `close()`
+returns that status code. The calling code can check this to fall back to
+unfiltered output.
+
+If a Lua filter throws an error, the error message is logged via
+`die("lua error")` and the filter is aborted.
+
+## Performance Considerations
+
+- **Exec filters** have per-invocation fork/exec overhead. For high-traffic
+ sites, consider Lua filters or enabling the response cache.
+- **Lua filters** run in-process with no fork overhead but require Lua support
+ to be compiled in.
+- Filters are not called when serving cached responses — the cached output
+ already includes the filtered content.
diff --git a/docs/handbook/cgit/html-rendering.md b/docs/handbook/cgit/html-rendering.md
new file mode 100644
index 0000000000..dab14d66b2
--- /dev/null
+++ b/docs/handbook/cgit/html-rendering.md
@@ -0,0 +1,380 @@
+# cgit — HTML Rendering Engine
+
+## Overview
+
+cgit generates all HTML output through a set of low-level rendering functions
+defined in `html.c` and `html.h`. These functions handle entity escaping,
+URL encoding, and formatted output. Higher-level page structure is built by
+`ui-shared.c`.
+
+Source files: `html.c`, `html.h`, `ui-shared.c`, `ui-shared.h`.
+
+## Output Model
+
+All output functions write directly to `stdout` via `write(2)`. There is no
+internal buffering beyond the standard I/O buffer. This design works because
+cgit runs as a CGI process — each request is a separate process with its own
+stdout connected to the web server.
+
+## Core Output Functions
+
+### Raw Output
+
+```c
+void html_raw(const char *data, size_t size);
+```
+
+Writes raw bytes to stdout without any escaping. Used for binary content
+and pre-escaped strings.
+
+### Escaped Text Output
+
+```c
+void html(const char *txt);
+```
+
+Writes a string with HTML entity escaping:
+- `<` → `&lt;`
+- `>` → `&gt;`
+- `&` → `&amp;`
+
+```c
+void html_txt(const char *txt);
+```
+
+Same as `html()` but also escapes:
+- `"` → `&quot;`
+- `'` → `&#x27;`
+
+Used for text content that appears inside HTML tags.
+
+```c
+void html_ntxt(const char *txt, int len);
+```
+
+Length-limited version of `html_txt()`. Writes at most `len` characters,
+appending `...` if truncated.
+
+### Attribute Escaping
+
+```c
+void html_attr(const char *txt);
+```
+
+Escapes text for use in HTML attribute values. Escapes the same characters
+as `html_txt()`.
+
+## URL Encoding
+
+### URL Escape Table
+
+`html.c` defines a 256-entry escape table for URL encoding:
+
+```c
+static const char *url_escape_table[256] = {
+ "%00", "%01", "%02", ...,
+ [' '] = "+",
+ ['!'] = NULL, /* pass through */
+ ['"'] = "%22",
+ ['#'] = "%23",
+ ['%'] = "%25",
+ ['&'] = "%26",
+ ['+'] = "%2B",
+ ['?'] = "%3F",
+ /* letters, digits, '-', '_', '.', '~' pass through (NULL) */
+ ...
+};
+```
+
+Characters with a `NULL` entry pass through unmodified. All others are
+replaced with their percent-encoded representations.
+
+### URL Path Encoding
+
+```c
+void html_url_path(const char *txt);
+```
+
+Encodes a URL path component. Uses `url_escape_table` but preserves `/`
+characters (they are structural in paths).
+
+### URL Argument Encoding
+
+```c
+void html_url_arg(const char *txt);
+```
+
+Encodes a URL query parameter value. Uses `url_escape_table` including
+encoding `/` characters.
+
+## Formatted Output
+
+### `fmt()` — Ring Buffer Formatter
+
+```c
+const char *fmt(const char *format, ...);
+```
+
+A `printf`-style formatter that returns a pointer to an internal static
+buffer. Uses a ring of 8 buffers (each 8 KB) to allow multiple `fmt()`
+calls in a single expression:
+
+```c
+#define FMT_BUFS 8
+#define FMT_SIZE 8192
+
+static char bufs[FMT_BUFS][FMT_SIZE];
+static int bufidx;
+
+const char *fmt(const char *format, ...)
+{
+ bufidx = (bufidx + 1) % FMT_BUFS;
+ va_list args;
+ va_start(args, format);
+ vsnprintf(bufs[bufidx], FMT_SIZE, format, args);
+ va_end(args);
+ return bufs[bufidx];
+}
+```
+
+This is used extensively throughout cgit for constructing strings without
+explicit memory management. The ring buffer avoids use-after-free for up to
+8 nested calls.
+
+### `fmtalloc()` — Heap Formatter
+
+```c
+char *fmtalloc(const char *format, ...);
+```
+
+Like `fmt()` but allocates a new heap buffer with `xstrfmt()`. Used when
+the result must outlive the ring buffer cycle.
+
+### `htmlf()` — Formatted HTML
+
+```c
+void htmlf(const char *format, ...);
+```
+
+`printf`-style output directly to stdout. Does NOT perform HTML escaping —
+the caller must ensure the format string and arguments are safe.
+
+## Form Helpers
+
+### Hidden Fields
+
+```c
+void html_hidden(const char *name, const char *value);
+```
+
+Generates a hidden form field:
+
+```html
+<input type='hidden' name='name' value='value' />
+```
+
+Values are attribute-escaped.
+
+### Option Elements
+
+```c
+void html_option(const char *value, const char *text, const char *selected_value);
+```
+
+Generates an `<option>` element, marking it as selected if `value` matches
+`selected_value`:
+
+```html
+<option value='value' selected='selected'>text</option>
+```
+
+### Checkbox Input
+
+```c
+void html_checkbox(const char *name, int value);
+```
+
+Generates a checkbox input.
+
+### Text Input
+
+```c
+void html_txt_input(const char *name, const char *value, int size);
+```
+
+Generates a text input field.
+
+## Link Generation
+
+```c
+void html_link_open(const char *url, const char *title, const char *class);
+void html_link_close(void);
+```
+
+Generate `<a>` tags with optional title and class attributes. URL is
+path-escaped.
+
+## File Inclusion
+
+```c
+void html_include(const char *filename);
+```
+
+Reads a file from disk and writes its contents to stdout without escaping.
+Used for header/footer file inclusion configured via the `header` and
+`footer` directives.
+
+## Page Structure (`ui-shared.c`)
+
+### HTTP Headers
+
+```c
+void cgit_print_http_headers(void);
+```
+
+Emits HTTP response headers based on `ctx.page`:
+
+```
+Status: 200 OK
+Content-Type: text/html; charset=utf-8
+Last-Modified: Thu, 01 Jan 2024 00:00:00 GMT
+Expires: Thu, 01 Jan 2024 01:00:00 GMT
+ETag: "abc123"
+```
+
+Fields are only emitted when the corresponding `ctx.page` fields are set.
+
+### HTML Document Head
+
+```c
+void cgit_print_docstart(void);
+```
+
+Emits the HTML5 doctype, `<html>`, and `<head>` section:
+
+```html
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+ <title>repo - page</title>
+ <meta name='generator' content='cgit v0.0.5-1-Project-Tick'/>
+ <meta name='robots' content='index, nofollow'/>
+ <link rel='stylesheet' href='/cgit/cgit.css'/>
+ <link rel='icon' href='/favicon.ico'/>
+</head>
+```
+
+### Page Header
+
+```c
+void cgit_print_pageheader(void);
+```
+
+Renders the page header with logo, navigation tabs, and search form.
+Navigation tabs are context-sensitive — repository pages show
+summary/refs/log/tree/commit/diff/stats/etc.
+
+### Page Footer
+
+```c
+void cgit_print_docend(void);
+```
+
+Closes the HTML document with footer content and closing tags.
+
+### Full Page Layout
+
+```c
+void cgit_print_layout_start(void);
+void cgit_print_layout_end(void);
+```
+
+These wrap the page content, calling `cgit_print_http_headers()`,
+`cgit_print_docstart()`, `cgit_print_pageheader()`, etc. Commands with
+`want_layout=1` have their output wrapped in this skeleton.
+
+## Repository Navigation
+
+```c
+void cgit_print_repoheader(void);
+```
+
+For each page within a repository, renders:
+- Repository name and description
+- Navigation tabs: summary, refs, log, tree, commit, diff, stats
+- Clone URLs
+- Badges
+
+## Link Functions
+
+`ui-shared.c` provides numerous helper functions for generating
+context-aware links:
+
+```c
+void cgit_summary_link(const char *name, const char *title,
+ const char *class, const char *head);
+void cgit_tag_link(const char *name, const char *title,
+ const char *class, const char *tag);
+void cgit_tree_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+void cgit_log_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path,
+ int ofs, const char *grep, const char *pattern,
+ int showmsg, int follow);
+void cgit_commit_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+void cgit_patch_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+void cgit_refs_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+void cgit_diff_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *new_rev, const char *old_rev,
+ const char *path, int toggle_hierarchical_threading);
+void cgit_stats_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *path);
+void cgit_plain_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+void cgit_blame_link(const char *name, const char *title,
+ const char *class, const char *head,
+ const char *rev, const char *path);
+void cgit_object_link(struct object *obj);
+void cgit_submodule_link(const char *name, const char *path,
+ const char *commit);
+```
+
+Each function builds a complete `<a>` tag with the appropriate URL, including
+all required query parameters for the target page.
+
+## Diff Output Helpers
+
+```c
+void cgit_print_diff_hunk_header(int oldofs, int oldcnt,
+ int newofs, int newcnt, const char *func);
+void cgit_print_diff_line_prefix(int type);
+```
+
+These render diff hunks with proper CSS classes for syntax coloring (`.add`,
+`.del`, `.hunk`).
+
+## Error Pages
+
+```c
+void cgit_print_error(const char *msg);
+void cgit_print_error_page(int code, const char *msg, const char *fmt, ...);
+```
+
+`cgit_print_error_page()` sets the HTTP status code and wraps the error
+message in a full page layout.
+
+## Encoding
+
+All text output assumes UTF-8. The `Content-Type` header is always
+`charset=utf-8`. There is no character set conversion.
diff --git a/docs/handbook/cgit/lua-integration.md b/docs/handbook/cgit/lua-integration.md
new file mode 100644
index 0000000000..26d605862e
--- /dev/null
+++ b/docs/handbook/cgit/lua-integration.md
@@ -0,0 +1,428 @@
+# cgit — Lua Integration
+
+## Overview
+
+cgit supports Lua as an in-process scripting language for content filters.
+Lua filters avoid the fork/exec overhead of shell-based filters and have
+direct access to cgit's HTML output functions. Lua support is optional and
+auto-detected at compile time.
+
+Source files: `filter.c` (Lua filter implementation), `cgit.mk` (Lua detection).
+
+## Compile-Time Detection
+
+Lua support is detected by `cgit.mk` using `pkg-config`:
+
+```makefile
+ifndef NO_LUA
+LUAPKGS := luajit lua lua5.2 lua5.1
+LUAPKG := $(shell for p in $(LUAPKGS); do \
+ $(PKG_CONFIG) --exists $$p 2>/dev/null && echo $$p && break; done)
+ifneq ($(LUAPKG),)
+ CGIT_CFLAGS += -DHAVE_LUA $(shell $(PKG_CONFIG) --cflags $(LUAPKG))
+ CGIT_LIBS += $(shell $(PKG_CONFIG) --libs $(LUAPKG))
+endif
+endif
+```
+
+Detection order: `luajit` → `lua` → `lua5.2` → `lua5.1`.
+
+To disable Lua even when available:
+
+```bash
+make NO_LUA=1
+```
+
+The `HAVE_LUA` preprocessor define gates all Lua-related code:
+
+```c
+#ifdef HAVE_LUA
+/* Lua filter implementation */
+#else
+/* stub: cgit_new_filter() returns NULL for lua: prefix */
+#endif
+```
+
+## Lua Filter Structure
+
+```c
+struct cgit_lua_filter {
+ struct cgit_filter base; /* common filter fields */
+ char *script_file; /* path to Lua script */
+ lua_State *lua_state; /* Lua interpreter state */
+};
+```
+
+The `lua_State` is lazily initialized on first use and reused for subsequent
+invocations of the same filter.
+
+## C API Exposed to Lua
+
+cgit registers these C functions in the Lua environment:
+
+### `html(str)`
+
+Writes raw HTML to stdout (no escaping):
+
+```c
+static int lua_html(lua_State *L)
+{
+ const char *str = luaL_checkstring(L, 1);
+ html(str);
+ return 0;
+}
+```
+
+### `html_txt(str)`
+
+Writes HTML-escaped text:
+
+```c
+static int lua_html_txt(lua_State *L)
+{
+ const char *str = luaL_checkstring(L, 1);
+ html_txt(str);
+ return 0;
+}
+```
+
+### `html_attr(str)`
+
+Writes attribute-escaped text:
+
+```c
+static int lua_html_attr(lua_State *L)
+{
+ const char *str = luaL_checkstring(L, 1);
+ html_attr(str);
+ return 0;
+}
+```
+
+### `html_url_path(str)`
+
+Writes a URL-encoded path:
+
+```c
+static int lua_html_url_path(lua_State *L)
+{
+ const char *str = luaL_checkstring(L, 1);
+ html_url_path(str);
+ return 0;
+}
+```
+
+### `html_url_arg(str)`
+
+Writes a URL-encoded query argument:
+
+```c
+static int lua_html_url_arg(lua_State *L)
+{
+ const char *str = luaL_checkstring(L, 1);
+ html_url_arg(str);
+ return 0;
+}
+```
+
+### `html_include(filename)`
+
+Includes a file's contents in the output:
+
+```c
+static int lua_html_include(lua_State *L)
+{
+ const char *filename = luaL_checkstring(L, 1);
+ html_include(filename);
+ return 0;
+}
+```
+
+## Lua Filter Lifecycle
+
+### Initialization
+
+On first `open()`, the Lua state is created and the script is loaded:
+
+```c
+static int open_lua_filter(struct cgit_filter *base, ...)
+{
+ struct cgit_lua_filter *f = (struct cgit_lua_filter *)base;
+
+ if (!f->lua_state) {
+ /* Create new Lua state */
+ f->lua_state = luaL_newstate();
+ luaL_openlibs(f->lua_state);
+
+ /* Register C functions */
+ lua_pushcfunction(f->lua_state, lua_html);
+ lua_setglobal(f->lua_state, "html");
+ lua_pushcfunction(f->lua_state, lua_html_txt);
+ lua_setglobal(f->lua_state, "html_txt");
+ lua_pushcfunction(f->lua_state, lua_html_attr);
+ lua_setglobal(f->lua_state, "html_attr");
+ lua_pushcfunction(f->lua_state, lua_html_url_path);
+ lua_setglobal(f->lua_state, "html_url_path");
+ lua_pushcfunction(f->lua_state, lua_html_url_arg);
+ lua_setglobal(f->lua_state, "html_url_arg");
+ lua_pushcfunction(f->lua_state, lua_html_include);
+ lua_setglobal(f->lua_state, "include");
+
+ /* Load and execute the script file */
+ if (luaL_dofile(f->lua_state, f->script_file))
+ die("lua error: %s",
+ lua_tostring(f->lua_state, -1));
+ }
+
+ /* Redirect stdout writes to lua write() function */
+
+ /* Call filter_open() with filter-specific arguments */
+ lua_getglobal(f->lua_state, "filter_open");
+ /* push arguments from va_list */
+ lua_call(f->lua_state, nargs, 0);
+
+ return 0;
+}
+```
+
+### Data Flow
+
+While the filter is open, data written to stdout is intercepted via a custom
+`write()` function:
+
+```c
+/* The fprintf callback for Lua filters */
+static void lua_fprintf(struct cgit_filter *base, FILE *f,
+ const char *fmt, ...)
+{
+ struct cgit_lua_filter *lf = (struct cgit_lua_filter *)base;
+ /* format the string */
+ /* call the Lua write() function with the formatted text */
+ lua_getglobal(lf->lua_state, "write");
+ lua_pushstring(lf->lua_state, buf);
+ lua_call(lf->lua_state, 1, 0);
+}
+```
+
+### Close
+
+```c
+static int close_lua_filter(struct cgit_filter *base)
+{
+ struct cgit_lua_filter *f = (struct cgit_lua_filter *)base;
+
+ /* Call filter_close() */
+ lua_getglobal(f->lua_state, "filter_close");
+ lua_call(f->lua_state, 0, 1);
+
+ /* Get return code */
+ int rc = lua_tointeger(f->lua_state, -1);
+ lua_pop(f->lua_state, 1);
+
+ return rc;
+}
+```
+
+### Cleanup
+
+```c
+static void cleanup_lua_filter(struct cgit_filter *base)
+{
+ struct cgit_lua_filter *f = (struct cgit_lua_filter *)base;
+ if (f->lua_state)
+ lua_close(f->lua_state);
+}
+```
+
+## Lua Script Interface
+
+### Required Functions
+
+A Lua filter script must define these functions:
+
+```lua
+function filter_open(...)
+ -- Called when the filter opens
+ -- Arguments are filter-type specific
+end
+
+function write(str)
+ -- Called with content chunks to process
+ -- Transform and output using html() functions
+end
+
+function filter_close()
+ -- Called when filtering is complete
+ return 0 -- return exit code
+end
+```
+
+### Available Global Functions
+
+| Function | Description |
+|----------|-------------|
+| `html(str)` | Output raw HTML |
+| `html_txt(str)` | Output HTML-escaped text |
+| `html_attr(str)` | Output attribute-escaped text |
+| `html_url_path(str)` | Output URL-path-encoded text |
+| `html_url_arg(str)` | Output URL-argument-encoded text |
+| `include(filename)` | Include file contents in output |
+
+All standard Lua libraries are available (`string`, `table`, `math`, `io`,
+`os`, etc.).
+
+## Example Filters
+
+### Source Highlighting Filter
+
+```lua
+-- syntax-highlighting.lua
+local filename = ""
+local buffer = {}
+
+function filter_open(fn)
+ filename = fn
+ buffer = {}
+end
+
+function write(str)
+ table.insert(buffer, str)
+end
+
+function filter_close()
+ local content = table.concat(buffer)
+ local ext = filename:match("%.(%w+)$") or ""
+
+ -- Simple keyword highlighting
+ local keywords = {
+ ["function"] = true, ["local"] = true,
+ ["if"] = true, ["then"] = true,
+ ["end"] = true, ["return"] = true,
+ ["for"] = true, ["while"] = true,
+ ["do"] = true, ["else"] = true,
+ }
+
+ html("<pre><code>")
+ for line in content:gmatch("([^\n]*)\n?") do
+ html_txt(line)
+ html("\n")
+ end
+ html("</code></pre>")
+
+ return 0
+end
+```
+
+### Email Obfuscation Filter
+
+```lua
+-- email-obfuscate.lua
+function filter_open(email, page)
+ -- email = the email address
+ -- page = current page name
+end
+
+function write(str)
+ -- Replace @ with [at] for display
+ local obfuscated = str:gsub("@", " [at] ")
+ html_txt(obfuscated)
+end
+
+function filter_close()
+ return 0
+end
+```
+
+### About/README Filter
+
+```lua
+-- about-markdown.lua
+local buffer = {}
+
+function filter_open(filename)
+ buffer = {}
+end
+
+function write(str)
+ table.insert(buffer, str)
+end
+
+function filter_close()
+ local content = table.concat(buffer)
+ -- Process markdown (using a Lua markdown library)
+ -- or shell out to a converter
+ local handle = io.popen("cmark", "w")
+ handle:write(content)
+ local result = handle:read("*a")
+ handle:close()
+ html(result)
+ return 0
+end
+```
+
+### Auth Filter (Lua)
+
+```lua
+-- auth.lua
+-- The auth filter receives 12 arguments
+function filter_open(cookie, method, query, referer, path,
+ host, https, repo, page, accept, phase)
+ if phase == "cookie" then
+ -- Validate session cookie
+ if valid_session(cookie) then
+ return 0 -- authenticated
+ end
+ return 1 -- not authenticated
+ elseif phase == "post" then
+ -- Handle login form submission
+ elseif phase == "authorize" then
+ -- Check repository access
+ end
+end
+
+function write(str)
+ html(str)
+end
+
+function filter_close()
+ return 0
+end
+```
+
+## Performance
+
+Lua filters offer significant performance advantages over exec filters:
+
+| Aspect | Exec Filter | Lua Filter |
+|--------|-------------|------------|
+| Startup | fork() + exec() per request | One-time Lua state creation |
+| Process | New process per invocation | In-process |
+| Memory | Separate address space | Shared memory |
+| Latency | ~1-5ms fork overhead | ~0.01ms function call |
+| Libraries | Any language | Lua libraries only |
+
+## Limitations
+
+- Lua scripts run in the same process as cgit — a crash in the script
+ crashes cgit
+- Standard Lua I/O functions (`print`, `io.write`) bypass cgit's output
+ pipeline — use `html()` and friends instead
+- The Lua state persists between invocations within the same CGI process,
+ but CGI processes are typically short-lived
+- Error handling is via `die()` — a Lua error terminates the CGI process
+
+## Configuration
+
+```ini
+# Use Lua filter for source highlighting
+source-filter=lua:/usr/share/cgit/filters/syntax-highlight.lua
+
+# Use Lua filter for about pages
+about-filter=lua:/usr/share/cgit/filters/about-markdown.lua
+
+# Use Lua filter for authentication
+auth-filter=lua:/usr/share/cgit/filters/simple-hierarchical-auth.lua
+
+# Use Lua filter for email display
+email-filter=lua:/usr/share/cgit/filters/email-libravatar.lua
+```
diff --git a/docs/handbook/cgit/overview.md b/docs/handbook/cgit/overview.md
new file mode 100644
index 0000000000..bb09d33e8b
--- /dev/null
+++ b/docs/handbook/cgit/overview.md
@@ -0,0 +1,262 @@
+# cgit — Overview
+
+## What Is cgit?
+
+cgit is a fast, lightweight web frontend for Git repositories, implemented as a
+CGI application written in C. It links directly against libgit (the C library
+that forms the core of the `git` command-line tool), giving it native access to
+repository objects without spawning external processes for every request. This
+design makes cgit one of the fastest Git web interfaces available.
+
+The Project Tick fork carries version `0.0.5-1-Project-Tick` (defined in the
+top-level `Makefile` as `CGIT_VERSION`). It builds against Git 2.46.0 and
+extends the upstream cgit with features such as subtree display, SPDX license
+detection, badge support, Code of Conduct / CLA pages, root links, and an
+enhanced summary page with repository metadata.
+
+## Key Design Goals
+
+| Goal | How cgit achieves it |
+|------|---------------------|
+| **Speed** | Direct libgit linkage; file-based response cache; `sendfile()` on Linux |
+| **Security** | `GIT_CONFIG_NOSYSTEM=1` set at load time; HTML entity escaping in every output function; directory-traversal guards; auth-filter framework |
+| **Simplicity** | Single CGI binary; flat config file (`cgitrc`); no database requirement |
+| **Extensibility** | Pluggable filter system (exec / Lua) for about, commit, source, email, owner, and auth content |
+
+## Source File Map
+
+The entire cgit source tree lives in `cgit/`. Every `.c` file has a matching
+`.h` (with a few exceptions such as `shared.c` and `parsing.c` which declare
+their interfaces in `cgit.h`).
+
+### Core files
+
+| File | Purpose |
+|------|---------|
+| `cgit.h` | Master header — includes libgit headers; defines all major types (`cgit_repo`, `cgit_config`, `cgit_query`, `cgit_context`, etc.) and function prototypes |
+| `cgit.c` | Entry point — `prepare_context()`, `config_cb()`, `querystring_cb()`, `process_request()`, `main()` |
+| `shared.c` | Global variables (`cgit_repolist`, `ctx`); repo management (`cgit_add_repo`, `cgit_get_repoinfo`); diff helpers; parsing helpers |
+| `parsing.c` | Commit/tag parsing (`cgit_parse_commit`, `cgit_parse_tag`, `cgit_parse_url`) |
+| `cmd.c` | Command dispatch table — maps URL page names to handler functions |
+| `cmd.h` | `struct cgit_cmd` definition; `cgit_get_cmd()` prototype |
+| `configfile.c` | Generic `name=value` config parser (`parse_configfile`) |
+| `configfile.h` | `configfile_value_fn` typedef; `parse_configfile` prototype |
+
+### Infrastructure files
+
+| File | Purpose |
+|------|---------|
+| `cache.c` / `cache.h` | File-based response cache — FNV-1 hashing, slot open/lock/fill/unlock cycle |
+| `filter.c` | Filter framework — exec filters (fork/exec), Lua filters (`luaL_newstate`) |
+| `html.c` / `html.h` | HTML output primitives — entity escaping, URL encoding, form helpers |
+| `scan-tree.c` / `scan-tree.h` | Filesystem repository scanning — `scan_tree()`, `scan_projects()` |
+
+### UI modules (`ui-*.c` / `ui-*.h`)
+
+| Module | Page | Handler function |
+|--------|------|-----------------|
+| `ui-repolist` | `repolist` | `cgit_print_repolist()` |
+| `ui-summary` | `summary` | `cgit_print_summary()` |
+| `ui-log` | `log` | `cgit_print_log()` |
+| `ui-commit` | `commit` | `cgit_print_commit()` |
+| `ui-diff` | `diff` | `cgit_print_diff()` |
+| `ui-tree` | `tree` | `cgit_print_tree()` |
+| `ui-blob` | `blob` | `cgit_print_blob()` |
+| `ui-refs` | `refs` | `cgit_print_refs()` |
+| `ui-tag` | `tag` | `cgit_print_tag()` |
+| `ui-snapshot` | `snapshot` | `cgit_print_snapshot()` |
+| `ui-plain` | `plain` | `cgit_print_plain()` |
+| `ui-blame` | `blame` | `cgit_print_blame()` |
+| `ui-patch` | `patch` | `cgit_print_patch()` |
+| `ui-atom` | `atom` | `cgit_print_atom()` |
+| `ui-clone` | `HEAD` / `info` / `objects` | `cgit_clone_head()`, `cgit_clone_info()`, `cgit_clone_objects()` |
+| `ui-stats` | `stats` | `cgit_show_stats()` |
+| `ui-ssdiff` | (helper) | Side-by-side diff rendering via LCS algorithm |
+| `ui-shared` | (helper) | HTTP headers, HTML page skeleton, link generation |
+
+### Static assets
+
+| File | Description |
+|------|-------------|
+| `cgit.css` | Default stylesheet |
+| `cgit.js` | Client-side JavaScript (e.g. tree filtering) |
+| `cgit.png` | Default logo |
+| `favicon.ico` | Default favicon |
+| `robots.txt` | Default robots file |
+
+## Core Data Structures
+
+All major types are defined in `cgit.h`. The single global
+`struct cgit_context ctx` (declared in `shared.c`) holds the entire request
+state:
+
+```c
+struct cgit_context {
+ struct cgit_environment env; /* CGI environment variables */
+ struct cgit_query qry; /* Parsed query/URL parameters */
+ struct cgit_config cfg; /* Global configuration */
+ struct cgit_repo *repo; /* Currently selected repository (or NULL) */
+ struct cgit_page page; /* HTTP response metadata */
+};
+```
+
+### `struct cgit_repo`
+
+Represents a single Git repository. Key fields:
+
+```c
+struct cgit_repo {
+ char *url; /* URL-visible name (e.g. "myproject") */
+ char *name; /* Display name */
+ char *basename; /* Last path component */
+ char *path; /* Filesystem path to .git directory */
+ char *desc; /* Description string */
+ char *owner; /* Repository owner */
+ char *defbranch; /* Default branch (NULL → guess from HEAD) */
+ char *section; /* Section for grouped display */
+ char *clone_url; /* Clone URL override */
+ char *homepage; /* Project homepage URL */
+ struct string_list readme; /* README file references */
+ struct string_list badges; /* Badge image URLs */
+ int snapshots; /* Bitmask of enabled snapshot formats */
+ int enable_blame; /* Whether blame view is enabled */
+ int enable_commit_graph;/* Whether commit graph is shown in log */
+ int enable_subtree; /* Whether subtree detection is enabled */
+ int max_stats; /* Stats period index (0=disabled) */
+ int hide; /* 1 = hidden from listing */
+ int ignore; /* 1 = completely ignored */
+ struct cgit_filter *about_filter; /* Per-repo about filter */
+ struct cgit_filter *source_filter; /* Per-repo source highlighting */
+ struct cgit_filter *email_filter; /* Per-repo email filter */
+ struct cgit_filter *commit_filter; /* Per-repo commit message filter */
+ struct cgit_filter *owner_filter; /* Per-repo owner filter */
+ /* ... */
+};
+```
+
+### `struct cgit_query`
+
+Holds all parsed URL/query-string parameters:
+
+```c
+struct cgit_query {
+ int has_symref, has_oid, has_difftype;
+ char *raw; /* Raw query string */
+ char *repo; /* Repository URL */
+ char *page; /* Page name (log, commit, diff, ...) */
+ char *search; /* Search query (q=) */
+ char *grep; /* Search type (qt=) */
+ char *head; /* Branch/ref (h=) */
+ char *oid, *oid2; /* Object IDs (id=, id2=) */
+ char *path; /* Path within repository */
+ char *name; /* Snapshot filename */
+ int ofs; /* Pagination offset */
+ int showmsg; /* Show full commit messages in log */
+ diff_type difftype; /* DIFF_UNIFIED / DIFF_SSDIFF / DIFF_STATONLY */
+ int context; /* Diff context lines */
+ int ignorews; /* Ignore whitespace in diffs */
+ int follow; /* Follow renames in log */
+ char *vpath; /* Virtual path (set by cmd dispatch) */
+ /* ... */
+};
+```
+
+## Request Lifecycle
+
+1. **Environment setup** — The `constructor_environment()` function runs before
+ `main()` (via `__attribute__((constructor))`). It sets
+ `GIT_CONFIG_NOSYSTEM=1` and `GIT_ATTR_NOSYSTEM=1`, then unsets `HOME` and
+ `XDG_CONFIG_HOME` to prevent Git from reading user/system configurations.
+
+2. **Context initialization** — `prepare_context()` zeroes out `ctx` and sets
+ all configuration defaults (cache sizes, TTLs, feature flags, etc.). CGI
+ environment variables are read from `getenv()`.
+
+3. **Configuration parsing** — `parse_configfile()` reads the cgitrc file
+ (default `/etc/cgitrc`, overridable via `$CGIT_CONFIG`) and calls
+ `config_cb()` for each `name=value` pair. Repository definitions begin with
+ `repo.url=` and subsequent `repo.*` directives configure that repository.
+
+4. **Query parsing** — If running in CGI mode (no `$NO_HTTP`),
+ `http_parse_querystring()` breaks the query string into name/value pairs and
+ passes them to `querystring_cb()`. The `url=` parameter is further parsed by
+ `cgit_parse_url()` which splits it into repo, page, and path components.
+
+5. **Authentication** — `authenticate_cookie()` checks whether an `auth-filter`
+ is configured. If so, it invokes the filter with function
+ `"authenticate-cookie"` and sets `ctx.env.authenticated` from the filter's
+ exit code. POST requests to `/?p=login` route through
+ `authenticate_post()` instead.
+
+6. **Cache lookup** — If caching is enabled (`cache-size > 0`), a cache key is
+ constructed from the URL and passed to `cache_process()`. On a cache hit the
+ stored response is sent directly via `sendfile()`. On a miss, stdout is
+ redirected to a lock file and the request proceeds through normal processing.
+
+7. **Command dispatch** — `cgit_get_cmd()` looks up `ctx.qry.page` in the
+ static `cmds[]` table (defined in `cmd.c`). If the command requires a
+ repository (`want_repo == 1`), the repository is initialized via
+ `prepare_repo_env()` and `prepare_repo_cmd()`.
+
+8. **Page rendering** — The matched command's handler function is called. Each
+ handler uses `cgit_print_http_headers()`, `cgit_print_docstart()`,
+ `cgit_print_pageheader()`, and `cgit_print_docend()` (from `ui-shared.c`)
+ to frame their output inside a proper HTML document.
+
+9. **Cleanup** — `cgit_cleanup_filters()` reaps all filter resources (closing
+ Lua states, freeing argv arrays).
+
+## Version String
+
+The version is compiled into the binary via:
+
+```makefile
+CGIT_VERSION = 0.0.5-1-Project-Tick
+```
+
+and exposed as the global:
+
+```c
+const char *cgit_version = CGIT_VERSION;
+```
+
+This string appears in the HTML footer (rendered by `ui-shared.c`) and in patch
+output trailers.
+
+## Relationship to Git
+
+cgit is built *inside* the Git source tree. The `Makefile` downloads
+Git 2.46.0, extracts it as a `git/` subdirectory, then calls `make -C git -f
+../cgit.mk` which includes Git's own `Makefile` to inherit all build variables,
+object files, and linker flags. The resulting `cgit` binary is a statically
+linked combination of cgit's own object files and libgit.
+
+## Time Constants
+
+`cgit.h` defines convenience macros used for relative date display:
+
+```c
+#define TM_MIN 60
+#define TM_HOUR (TM_MIN * 60)
+#define TM_DAY (TM_HOUR * 24)
+#define TM_WEEK (TM_DAY * 7)
+#define TM_YEAR (TM_DAY * 365)
+#define TM_MONTH (TM_YEAR / 12.0)
+```
+
+These are used by `cgit_print_age()` in `ui-shared.c` to render "2 hours ago"
+style timestamps.
+
+## Default Encoding
+
+```c
+#define PAGE_ENCODING "UTF-8"
+```
+
+All commit messages are re-encoded to UTF-8 before display (see
+`cgit_parse_commit()` in `parsing.c`).
+
+## License
+
+cgit is licensed under the GNU General Public License v2. The `COPYING` file
+in the cgit directory contains the full text.
diff --git a/docs/handbook/cgit/repository-discovery.md b/docs/handbook/cgit/repository-discovery.md
new file mode 100644
index 0000000000..9b961e74cf
--- /dev/null
+++ b/docs/handbook/cgit/repository-discovery.md
@@ -0,0 +1,355 @@
+# cgit — Repository Discovery
+
+## Overview
+
+cgit discovers repositories through two mechanisms: explicit `repo.url=`
+entries in the configuration file, and automatic filesystem scanning via
+`scan-path`. The scan-tree subsystem recursively searches directories for
+git repositories and auto-configures them.
+
+Source files: `scan-tree.c`, `scan-tree.h`, `shared.c` (repository list management).
+
+## Manual Repository Configuration
+
+Repositories can be explicitly defined in the cgitrc file:
+
+```ini
+repo.url=myproject
+repo.path=/srv/git/myproject.git
+repo.desc=My project description
+repo.owner=Alice
+```
+
+Each `repo.url=` triggers `cgit_add_repo()` in `shared.c`, which creates a
+new `cgit_repo` entry in the global repository list.
+
+### `cgit_add_repo()`
+
+```c
+struct cgit_repo *cgit_add_repo(const char *url)
+{
+ struct cgit_repo *ret;
+ /* grow the repo array if needed */
+ if (cgit_repolist.count >= cgit_repolist.length) {
+ /* realloc with doubled capacity */
+ }
+ ret = &cgit_repolist.repos[cgit_repolist.count++];
+ /* initialize with defaults from ctx.cfg */
+ ret->url = xstrdup(url);
+ ret->name = ret->url;
+ ret->path = NULL;
+ ret->desc = cgit_default_repo_desc;
+ ret->owner = NULL;
+ ret->section = ctx.cfg.section;
+ ret->snapshots = ctx.cfg.snapshots;
+ /* ... inherit all global defaults ... */
+ return ret;
+}
+```
+
+## Repository Lookup
+
+```c
+struct cgit_repo *cgit_get_repoinfo(const char *url)
+{
+ int i;
+ for (i = 0; i < cgit_repolist.count; i++) {
+ if (!strcmp(cgit_repolist.repos[i].url, url))
+ return &cgit_repolist.repos[i];
+ }
+ return NULL;
+}
+```
+
+This is a linear scan — adequate for typical installations with dozens to
+hundreds of repositories.
+
+## Filesystem Scanning: `scan-path`
+
+The `scan-path` configuration directive triggers automatic repository
+discovery. When encountered in the config file, `scan_tree()` or
+`scan_projects()` is called immediately.
+
+### `scan_tree()`
+
+```c
+void scan_tree(const char *path, repo_config_fn fn)
+```
+
+Recursively scans `path` for git repositories:
+
+```c
+static void scan_path(const char *base, const char *path, repo_config_fn fn)
+{
+ DIR *dir;
+ struct dirent *ent;
+
+ dir = opendir(path);
+ if (!dir) return;
+
+ while ((ent = readdir(dir)) != NULL) {
+ /* skip "." and ".." */
+ /* skip hidden directories unless scan-hidden-path=1 */
+
+ if (is_git_dir(fullpath)) {
+ /* found a bare repository */
+ add_repo(base, fullpath, fn);
+ } else if (is_git_dir(fullpath + "/.git")) {
+ /* found a non-bare repository */
+ add_repo(base, fullpath + "/.git", fn);
+ } else {
+ /* recurse into subdirectory */
+ scan_path(base, fullpath, fn);
+ }
+ }
+ closedir(dir);
+}
+```
+
+### Git Directory Detection: `is_git_dir()`
+
+```c
+static int is_git_dir(const char *path)
+{
+ struct stat st;
+ struct strbuf pathbuf = STRBUF_INIT;
+
+ /* check for path/HEAD */
+ strbuf_addf(&pathbuf, "%s/HEAD", path);
+ if (stat(pathbuf.buf, &st)) {
+ strbuf_release(&pathbuf);
+ return 0;
+ }
+
+ /* check for path/objects */
+ strbuf_reset(&pathbuf);
+ strbuf_addf(&pathbuf, "%s/objects", path);
+ if (stat(pathbuf.buf, &st) || !S_ISDIR(st.st_mode)) {
+ strbuf_release(&pathbuf);
+ return 0;
+ }
+
+ /* check for path/refs */
+ strbuf_reset(&pathbuf);
+ strbuf_addf(&pathbuf, "%s/refs", path);
+ if (stat(pathbuf.buf, &st) || !S_ISDIR(st.st_mode)) {
+ strbuf_release(&pathbuf);
+ return 0;
+ }
+
+ strbuf_release(&pathbuf);
+ return 1;
+}
+```
+
+A directory is considered a git repository if it contains `HEAD`, `objects/`,
+and `refs/` subdirectories.
+
+### Repository Registration: `add_repo()`
+
+When a git directory is found, `add_repo()` creates a repository entry:
+
+```c
+static void add_repo(const char *base, const char *path, repo_config_fn fn)
+{
+ /* derive URL from path relative to base */
+ /* strip .git suffix if remove-suffix is set */
+ struct cgit_repo *repo = cgit_add_repo(url);
+ repo->path = xstrdup(path);
+
+ /* read gitweb config from the repo */
+ if (ctx.cfg.enable_git_config) {
+ char *gitconfig = fmt("%s/config", path);
+ parse_configfile(gitconfig, gitconfig_config);
+ }
+
+ /* read owner from filesystem */
+ if (!repo->owner) {
+ /* stat the repo dir and lookup uid owner */
+ struct stat st;
+ if (!stat(path, &st)) {
+ struct passwd *pw = getpwuid(st.st_uid);
+ if (pw)
+ repo->owner = xstrdup(pw->pw_name);
+ }
+ }
+
+ /* read description from description file */
+ if (!repo->desc) {
+ char *descfile = fmt("%s/description", path);
+ /* read first line */
+ }
+}
+```
+
+### Git Config Integration: `gitconfig_config()`
+
+When `enable-git-config=1`, each discovered repository's `.git/config` is
+parsed for metadata:
+
+```c
+static int gitconfig_config(const char *key, const char *value)
+{
+ if (!strcmp(key, "gitweb.owner"))
+ repo_config(repo, "owner", value);
+ else if (!strcmp(key, "gitweb.description"))
+ repo_config(repo, "desc", value);
+ else if (!strcmp(key, "gitweb.category"))
+ repo_config(repo, "section", value);
+ else if (!strcmp(key, "gitweb.homepage"))
+ repo_config(repo, "homepage", value);
+ else if (skip_prefix(key, "cgit.", &name))
+ repo_config(repo, name, value);
+ return 0;
+}
+```
+
+This is compatible with gitweb's configuration keys and also supports
+cgit-specific `cgit.*` keys.
+
+## Project List Scanning: `scan_projects()`
+
+```c
+void scan_projects(const char *path, const char *projectsfile,
+ repo_config_fn fn)
+```
+
+Instead of recursively scanning a directory, reads a text file listing
+project paths (one per line). Each path is appended to the base path and
+checked with `is_git_dir()`.
+
+This is useful for large installations where full recursive scanning is too
+slow.
+
+```ini
+project-list=/etc/cgit/projects.list
+scan-path=/srv/git
+```
+
+The `projects.list` file contains relative paths:
+
+```
+myproject.git
+team/frontend.git
+team/backend.git
+```
+
+## Section Derivation
+
+When `section-from-path` is set, repository sections are automatically
+derived from the directory structure:
+
+| Value | Behavior |
+|-------|----------|
+| `0` | No auto-sectioning |
+| `1` | First path component becomes section |
+| `2` | First two components become section |
+| `-1` | Last component becomes section |
+
+Example with `section-from-path=1` and `scan-path=/srv/git`:
+
+```
+/srv/git/team/project.git → section="team"
+/srv/git/personal/test.git → section="personal"
+```
+
+## Age File
+
+The modification time of a repository is determined by:
+
+1. The `agefile` (default: `info/web/last-modified`) — if this file exists
+ in the repository, its contents (a date string) or modification time is
+ used
+2. Otherwise, the mtime of the loose `refs/` directory
+3. As a fallback, the repository directory's own mtime
+
+```c
+static time_t read_agefile(const char *path)
+{
+ FILE *f;
+ static char buf[64];
+
+ f = fopen(path, "r");
+ if (!f)
+ return -1;
+ if (fgets(buf, sizeof(buf), f)) {
+ fclose(f);
+ return parse_date(buf, NULL);
+ }
+ fclose(f);
+ /* fallback to file mtime */
+ struct stat st;
+ if (!stat(path, &st))
+ return st.st_mtime;
+ return 0;
+}
+```
+
+## Repository List Management
+
+The global repository list is a dynamically-sized array:
+
+```c
+struct cgit_repolist {
+ int count;
+ int length; /* allocated capacity */
+ struct cgit_repo *repos;
+};
+
+struct cgit_repolist cgit_repolist;
+```
+
+### Sorting
+
+The repository list can be sorted by different criteria:
+
+```c
+static int cmp_name(const void *a, const void *b); /* by name */
+static int cmp_section(const void *a, const void *b); /* by section */
+static int cmp_idle(const void *a, const void *b); /* by age */
+```
+
+Sorting is controlled by the `repository-sort` directive and the `s` query
+parameter.
+
+## Repository Visibility
+
+Two directives control repository visibility:
+
+| Directive | Effect |
+|-----------|--------|
+| `repo.hide=1` | Repository is hidden from the index but accessible by URL |
+| `repo.ignore=1` | Repository is completely ignored |
+
+Additionally, `strict-export` restricts export to repositories containing a
+specific file (e.g., `git-daemon-export-ok`):
+
+```ini
+strict-export=git-daemon-export-ok
+```
+
+## Scan Path Caching
+
+Scanning large directory trees can be slow. The `cache-scanrc-ttl` directive
+controls how long scan results are cached:
+
+```ini
+cache-scanrc-ttl=15 # cache scan results for 15 minutes
+```
+
+When caching is enabled, the scan is performed only when the cached result
+expires.
+
+## Configuration Reference
+
+| Directive | Default | Description |
+|-----------|---------|-------------|
+| `scan-path` | (none) | Directory to scan for repos |
+| `project-list` | (none) | File listing project paths |
+| `enable-git-config` | 0 | Read repo metadata from git config |
+| `scan-hidden-path` | 0 | Include hidden directories in scan |
+| `remove-suffix` | 0 | Strip `.git` suffix from URLs |
+| `section-from-path` | 0 | Auto-derive section from path |
+| `strict-export` | (none) | Required file for repo visibility |
+| `agefile` | `info/web/last-modified` | File checked for repo age |
+| `cache-scanrc-ttl` | 15 | TTL for cached scan results (minutes) |
diff --git a/docs/handbook/cgit/snapshot-system.md b/docs/handbook/cgit/snapshot-system.md
new file mode 100644
index 0000000000..bb39047f48
--- /dev/null
+++ b/docs/handbook/cgit/snapshot-system.md
@@ -0,0 +1,246 @@
+# cgit — Snapshot System
+
+## Overview
+
+cgit can generate downloadable source archives (snapshots) from any git
+reference. Supported formats include tar, compressed tar variants, and zip.
+The snapshot system validates requests against a configured format mask and
+delegates archive generation to the git archive API.
+
+Source file: `ui-snapshot.c`, `ui-snapshot.h`.
+
+## Snapshot Format Table
+
+All supported formats are defined in `cgit_snapshot_formats[]`:
+
+```c
+const struct cgit_snapshot_format cgit_snapshot_formats[] = {
+ { ".zip", "application/x-zip", write_zip_archive, 0x01 },
+ { ".tar.gz", "application/x-gzip", write_tar_gzip_archive, 0x02 },
+ { ".tar.bz2", "application/x-bzip2", write_tar_bzip2_archive, 0x04 },
+ { ".tar", "application/x-tar", write_tar_archive, 0x08 },
+ { ".tar.xz", "application/x-xz", write_tar_xz_archive, 0x10 },
+ { ".tar.zst", "application/x-zstd", write_tar_zstd_archive, 0x20 },
+ { ".tar.lz", "application/x-lzip", write_tar_lzip_archive, 0x40 },
+ { NULL }
+};
+```
+
+### Format Structure
+
+```c
+struct cgit_snapshot_format {
+ const char *suffix; /* file extension */
+ const char *mimetype; /* HTTP Content-Type */
+ write_archive_fn_t fn; /* archive writer function */
+ int bit; /* bitmask flag */
+};
+```
+
+### Format Bitmask
+
+Each format has a power-of-two bit value. The `snapshots` configuration
+directive sets a bitmask by OR-ing the bits of enabled formats:
+
+| Suffix | Bit | Hex |
+|--------|-----|-----|
+| `.zip` | 0x01 | 1 |
+| `.tar.gz` | 0x02 | 2 |
+| `.tar.bz2` | 0x04 | 4 |
+| `.tar` | 0x08 | 8 |
+| `.tar.xz` | 0x10 | 16 |
+| `.tar.zst` | 0x20 | 32 |
+| `.tar.lz` | 0x40 | 64 |
+| all | 0x7F | 127 |
+
+### Parsing Snapshot Configuration
+
+`cgit_parse_snapshots_mask()` in `shared.c` converts the configuration
+string to a bitmask:
+
+```c
+int cgit_parse_snapshots_mask(const char *str)
+{
+ int mask = 0;
+ /* for each word in str */
+ /* compare against cgit_snapshot_formats[].suffix */
+ /* if match, mask |= format->bit */
+ /* "all" enables all formats */
+ return mask;
+}
+```
+
+## Snapshot Request Processing
+
+### Entry Point: `cgit_print_snapshot()`
+
+```c
+void cgit_print_snapshot(const char *head, const char *hex,
+ const char *prefix, const char *filename,
+ int snapshots)
+```
+
+Parameters:
+- `head` — Branch/tag reference
+- `hex` — Commit SHA
+- `prefix` — Archive prefix (directory name within archive)
+- `filename` — Requested filename (e.g., `myrepo-v1.0.tar.gz`)
+- `snapshots` — Enabled format bitmask
+
+### Reference Resolution: `get_ref_from_filename()`
+
+Decomposes the requested filename into a reference and format:
+
+```c
+static const struct cgit_snapshot_format *get_ref_from_filename(
+ const char *filename, char **ref)
+{
+ /* for each format suffix */
+ /* if filename ends with suffix */
+ /* extract the part before the suffix as the ref */
+ /* return the matching format */
+ /* strip repo prefix if present */
+}
+```
+
+Example decomposition:
+- `myrepo-v1.0.tar.gz` → ref=`v1.0`, format=`.tar.gz`
+- `myrepo-main.zip` → ref=`main`, format=`.zip`
+- `myrepo-abc1234.tar.xz` → ref=`abc1234`, format=`.tar.xz`
+
+The prefix `myrepo-` is the `snapshot-prefix` (defaults to the repo basename).
+
+### Validation
+
+Before generating an archive, the function validates:
+
+1. **Format enabled**: The format's bit must be set in the snapshot mask
+2. **Reference exists**: The ref must resolve to a valid git object
+3. **Object type**: Must be a commit, tag, or tree
+
+### Archive Generation: `write_archive_type()`
+
+```c
+static int write_archive_type(const char *format, const char *hex,
+ const char *prefix)
+{
+ struct archiver_args args;
+ memset(&args, 0, sizeof(args));
+ args.base = prefix; /* directory prefix in archive */
+ /* resolve hex to tree object */
+ /* call write_archive() from libgit */
+}
+```
+
+The actual archive creation is delegated to Git's `write_archive()` API,
+which handles tar and zip generation natively.
+
+### Compression Pipeline
+
+For compressed formats, the archive data is piped through compression:
+
+```c
+static int write_tar_gzip_archive(/* ... */)
+{
+ /* pipe tar output through gzip compression */
+}
+
+static int write_tar_bzip2_archive(/* ... */)
+{
+ /* pipe tar output through bzip2 compression */
+}
+
+static int write_tar_xz_archive(/* ... */)
+{
+ /* pipe tar output through xz compression */
+}
+
+static int write_tar_zstd_archive(/* ... */)
+{
+ /* pipe tar output through zstd compression */
+}
+
+static int write_tar_lzip_archive(/* ... */)
+{
+ /* pipe tar output through lzip compression */
+}
+```
+
+## HTTP Response
+
+Snapshot responses include:
+
+```
+Content-Type: application/x-gzip
+Content-Disposition: inline; filename="myrepo-v1.0.tar.gz"
+```
+
+The `Content-Disposition` header triggers a file download in browsers with
+the correct filename.
+
+## Snapshot Links
+
+Snapshot links on repository pages are generated by `ui-shared.c`:
+
+```c
+void cgit_print_snapshot_links(const char *repo, const char *head,
+ const char *hex, int snapshots)
+{
+ for (f = cgit_snapshot_formats; f->suffix; f++) {
+ if (!(snapshots & f->bit))
+ continue;
+ /* generate link: repo/snapshot/prefix-ref.suffix */
+ }
+}
+```
+
+These links appear on the summary page and optionally in the log view.
+
+## Snapshot Prefix
+
+The archive prefix (directory name inside the archive) is determined by:
+
+1. `repo.snapshot-prefix` if set
+2. Otherwise, the repository basename
+
+For a request like `myrepo-v1.0.tar.gz`, the archive contains files under
+`myrepo-v1.0/`.
+
+## Signature Detection
+
+cgit can detect and display signature files alongside snapshots. When a
+file matching `<snapshot-name>.asc` or `<snapshot-name>.sig` exists in the
+repository, a signature link is shown next to the snapshot download.
+
+## Configuration
+
+| Directive | Default | Description |
+|-----------|---------|-------------|
+| `snapshots` | (none) | Space-separated list of enabled suffixes |
+| `repo.snapshots` | (inherited) | Per-repo override |
+| `repo.snapshot-prefix` | (basename) | Per-repo archive prefix |
+| `cache-snapshot-ttl` | 5 min | Cache TTL for snapshot pages |
+
+### Enabling Snapshots
+
+```ini
+# Global: enable tar.gz and zip for all repos
+snapshots=tar.gz zip
+
+# Per-repo: enable all formats
+repo.url=myrepo
+repo.snapshots=all
+
+# Per-repo: disable snapshots
+repo.url=internal-tools
+repo.snapshots=
+```
+
+## Security Considerations
+
+- Snapshots are generated on-the-fly from git objects, so they always reflect
+ the repository's current state
+- Large repositories can produce large archives — consider enabling caching
+ and setting appropriate `max-blob-size` limits
+- Snapshot requests for non-existent refs return a 404 error page
+- The snapshot filename is sanitized to prevent path traversal
diff --git a/docs/handbook/cgit/testing.md b/docs/handbook/cgit/testing.md
new file mode 100644
index 0000000000..ee7b5979f9
--- /dev/null
+++ b/docs/handbook/cgit/testing.md
@@ -0,0 +1,335 @@
+# cgit — Testing
+
+## Overview
+
+cgit has a shell-based test suite in the `tests/` directory. Tests use
+Git's own test framework (`test-lib.sh`) and exercise cgit by invoking the
+CGI binary with simulated HTTP requests.
+
+Source location: `cgit/tests/`.
+
+## Test Framework
+
+The test harness is built on Git's `test-lib.sh`, sourced from the vendored
+Git tree at `git/t/test-lib.sh`. This provides:
+
+- TAP-compatible output
+- Test assertions (`test_expect_success`, `test_expect_failure`)
+- Temporary directory management (`trash` directories)
+- Color-coded pass/fail reporting
+
+### `setup.sh`
+
+All test scripts source `tests/setup.sh`, which provides:
+
+```bash
+# Core test helpers
+prepare_tests() # Create repos and config file
+run_test() # Execute a single test case
+cgit_query() # Invoke cgit with a query string
+cgit_url() # Invoke cgit with a virtual URL
+strip_headers() # Remove HTTP headers from CGI output
+```
+
+### Invoking cgit
+
+Tests invoke cgit as a CGI binary by setting environment variables:
+
+```bash
+cgit_query()
+{
+ CGIT_CONFIG="$PWD/cgitrc" QUERY_STRING="$1" cgit
+}
+
+cgit_url()
+{
+ CGIT_CONFIG="$PWD/cgitrc" QUERY_STRING="url=$1" cgit
+}
+```
+
+The `cgit` binary is on PATH (prepended by setup.sh). The response includes
+HTTP headers followed by HTML content. `strip_headers()` removes the
+headers for content-only assertions.
+
+## Test Repository Setup
+
+`setup_repos()` creates test repositories:
+
+```bash
+setup_repos()
+{
+ rm -rf cache
+ mkdir -p cache
+ mkrepo repos/foo 5 # 5 commits
+ mkrepo repos/bar 50 commit-graph # 50 commits with commit-graph
+ mkrepo repos/foo+bar 10 testplus # 10 commits + special chars
+ mkrepo "repos/with space" 2 # repo with spaces in name
+ mkrepo repos/filter 5 testplus # for filter tests
+}
+```
+
+### `mkrepo()`
+
+```bash
+mkrepo() {
+ name=$1
+ count=$2
+ test_create_repo "$name"
+ (
+ cd "$name"
+ n=1
+ while test $n -le $count; do
+ echo $n >file-$n
+ git add file-$n
+ git commit -m "commit $n"
+ n=$(expr $n + 1)
+ done
+ case "$3" in
+ testplus)
+ echo "hello" >a+b
+ git add a+b
+ git commit -m "add a+b"
+ git branch "1+2"
+ ;;
+ commit-graph)
+ git commit-graph write
+ ;;
+ esac
+ )
+}
+```
+
+### Test Configuration
+
+A `cgitrc` file is generated in the test directory with:
+
+```ini
+virtual-root=/
+cache-root=$PWD/cache
+cache-size=1021
+snapshots=tar.gz tar.bz tar.lz tar.xz tar.zst zip
+enable-log-filecount=1
+enable-log-linecount=1
+summary-log=5
+summary-branches=5
+summary-tags=5
+clone-url=git://example.org/$CGIT_REPO_URL.git
+enable-filter-overrides=1
+root-coc=$PWD/site-coc.txt
+root-cla=$PWD/site-cla.txt
+root-homepage=https://projecttick.org
+root-homepage-title=Project Tick
+root-link=GitHub|https://github.com/example
+root-link=GitLab|https://gitlab.com/example
+root-link=Codeberg|https://codeberg.org/example
+
+repo.url=foo
+repo.path=$PWD/repos/foo/.git
+
+repo.url=bar
+repo.path=$PWD/repos/bar/.git
+repo.desc=the bar repo
+
+repo.url=foo+bar
+repo.path=$PWD/repos/foo+bar/.git
+repo.desc=the foo+bar repo
+# ...
+```
+
+## Test Scripts
+
+### Test File Naming
+
+Tests follow the convention `tNNNN-description.sh`:
+
+| Test | Description |
+|------|-------------|
+| `t0001-validate-git-versions.sh` | Verify Git version compatibility |
+| `t0010-validate-html.sh` | Validate HTML output |
+| `t0020-validate-cache.sh` | Test cache system |
+| `t0101-index.sh` | Repository index page |
+| `t0102-summary.sh` | Repository summary page |
+| `t0103-log.sh` | Log view |
+| `t0104-tree.sh` | Tree view |
+| `t0105-commit.sh` | Commit view |
+| `t0106-diff.sh` | Diff view |
+| `t0107-snapshot.sh` | Snapshot downloads |
+| `t0108-patch.sh` | Patch view |
+| `t0109-gitconfig.sh` | Git config integration |
+| `t0110-rawdiff.sh` | Raw diff output |
+| `t0111-filter.sh` | Filter system |
+| `t0112-coc.sh` | Code of Conduct page |
+| `t0113-cla.sh` | CLA page |
+| `t0114-root-homepage.sh` | Root homepage links |
+
+### Number Ranges
+
+| Range | Category |
+|-------|----------|
+| `t0001-t0099` | Infrastructure/validation tests |
+| `t0100-t0199` | Feature tests |
+
+## Running Tests
+
+### All Tests
+
+```bash
+cd cgit/tests
+make
+```
+
+The Makefile discovers all `t*.sh` files and runs them:
+
+```makefile
+T = $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)
+
+all: $(T)
+
+$(T):
+ @'$(SHELL_PATH_SQ)' $@ $(CGIT_TEST_OPTS)
+```
+
+### Individual Tests
+
+```bash
+# Run a single test
+./t0101-index.sh
+
+# With verbose output
+./t0101-index.sh -v
+
+# With Valgrind
+./t0101-index.sh --valgrind
+```
+
+### Test Options
+
+Options are passed via `CGIT_TEST_OPTS` or command-line arguments:
+
+| Option | Description |
+|--------|-------------|
+| `-v`, `--verbose` | Show test details |
+| `--valgrind` | Run cgit under Valgrind |
+| `--debug` | Show shell trace |
+
+### Valgrind Support
+
+`setup.sh` intercepts the `--valgrind` flag and configures Valgrind
+instrumentation via a wrapper script in `tests/valgrind/`:
+
+```bash
+if test -n "$cgit_valgrind"; then
+ GIT_VALGRIND="$TEST_DIRECTORY/valgrind"
+ CGIT_VALGRIND=$(cd ../valgrind && pwd)
+ PATH="$CGIT_VALGRIND/bin:$PATH"
+fi
+```
+
+## Test Patterns
+
+### HTML Content Assertion
+
+```bash
+run_test 'repo index contains foo' '
+ cgit_url "/" | strip_headers | grep -q "foo"
+'
+```
+
+### HTTP Header Assertion
+
+```bash
+run_test 'content type is text/html' '
+ cgit_url "/" | head -1 | grep -q "Content-Type: text/html"
+'
+```
+
+### Snapshot Download
+
+```bash
+run_test 'snapshot is valid tar.gz' '
+ cgit_url "/foo/snapshot/foo-master.tar.gz" | strip_headers | \
+ gunzip | tar tf - >/dev/null
+'
+```
+
+### Negative Assertion
+
+```bash
+run_test 'no 404 on valid repo' '
+ ! cgit_url "/foo" | grep -q "404"
+'
+```
+
+### Lua Filter Conditional
+
+```bash
+if [ $CGIT_HAS_LUA -eq 1 ]; then
+ run_test 'lua filter works' '
+ cgit_url "/filter-lua/about/" | strip_headers | grep -q "filtered"
+ '
+fi
+```
+
+## Test Filter Scripts
+
+The `tests/filters/` directory contains simple filter scripts for testing:
+
+### `dump.sh`
+
+A passthrough filter that copies stdin to stdout, used to verify filter
+invocation:
+
+```bash
+#!/bin/sh
+cat
+```
+
+### `dump.lua`
+
+Lua equivalent of the dump filter:
+
+```lua
+function filter_open(...)
+end
+
+function write(str)
+ html(str)
+end
+
+function filter_close()
+ return 0
+end
+```
+
+## Cleanup
+
+```bash
+cd cgit/tests
+make clean
+```
+
+Removes the `trash` directories created by tests.
+
+## Writing New Tests
+
+1. Create a new file `tNNNN-description.sh`
+2. Source `setup.sh` and call `prepare_tests`:
+
+```bash
+#!/bin/sh
+. ./setup.sh
+prepare_tests "my new feature"
+
+run_test 'description of test case' '
+ cgit_url "/foo/my-page/" | strip_headers | grep -q "expected"
+'
+```
+
+3. Make it executable: `chmod +x tNNNN-description.sh`
+4. Run: `./tNNNN-description.sh -v`
+
+## CI Integration
+
+Tests are run as part of the CI pipeline. The `ci/` directory contains
+Nix-based CI configuration that builds cgit and runs the test suite in a
+reproducible environment.
diff --git a/docs/handbook/cgit/ui-modules.md b/docs/handbook/cgit/ui-modules.md
new file mode 100644
index 0000000000..b03a437a35
--- /dev/null
+++ b/docs/handbook/cgit/ui-modules.md
@@ -0,0 +1,544 @@
+# cgit — UI Modules
+
+## Overview
+
+cgit's user interface is implemented as a collection of `ui-*.c` modules,
+each responsible for rendering a specific page type. All modules share
+common infrastructure from `ui-shared.c` and `html.c`.
+
+## Module Map
+
+| Module | Page | Entry Function |
+|--------|------|---------------|
+| `ui-repolist.c` | Repository index | `cgit_print_repolist()` |
+| `ui-summary.c` | Repository summary | `cgit_print_summary()` |
+| `ui-log.c` | Commit log | `cgit_print_log()` |
+| `ui-tree.c` | File/directory tree | `cgit_print_tree()` |
+| `ui-blob.c` | File content | `cgit_print_blob()` |
+| `ui-commit.c` | Commit details | `cgit_print_commit()` |
+| `ui-diff.c` | Diff view | `cgit_print_diff()` |
+| `ui-ssdiff.c` | Side-by-side diff | `cgit_ssdiff_*()` |
+| `ui-patch.c` | Patch output | `cgit_print_patch()` |
+| `ui-refs.c` | Branch/tag listing | `cgit_print_refs()` |
+| `ui-tag.c` | Tag details | `cgit_print_tag()` |
+| `ui-stats.c` | Statistics | `cgit_print_stats()` |
+| `ui-atom.c` | Atom feed | `cgit_print_atom()` |
+| `ui-plain.c` | Raw file serving | `cgit_print_plain()` |
+| `ui-blame.c` | Blame view | `cgit_print_blame()` |
+| `ui-clone.c` | HTTP clone | `cgit_clone_info/objects/head()` |
+| `ui-snapshot.c` | Archive download | `cgit_print_snapshot()` |
+| `ui-shared.c` | Common layout | (shared functions) |
+
+## `ui-repolist.c` — Repository Index
+
+Renders the main page listing all configured repositories.
+
+### Functions
+
+```c
+void cgit_print_repolist(void)
+```
+
+### Features
+
+- Sortable columns: Name, Description, Owner, Idle (age)
+- Section grouping (based on `repo.section` or `section-from-path`)
+- Pagination with configurable `max-repo-count`
+- Age calculation via `read_agefile()` or ref modification time
+- Optional filter by search query
+
+### Sorting
+
+```c
+static int cmp_name(const void *a, const void *b);
+static int cmp_section(const void *a, const void *b);
+static int cmp_idle(const void *a, const void *b);
+```
+
+Sort field is selected by the `s` query parameter or `repository-sort`
+directive.
+
+### Age File Resolution
+
+```c
+static time_t read_agefile(const char *path)
+{
+ /* Try reading date from agefile content */
+ /* Fall back to file mtime */
+ /* Fall back to refs/ dir mtime */
+}
+```
+
+### Pagination
+
+```c
+static void print_pager(int items, int pagelen, char *search, char *sort)
+{
+ /* Render page navigation links */
+ /* [prev] 1 2 3 4 5 [next] */
+}
+```
+
+## `ui-summary.c` — Repository Summary
+
+Renders the overview page for a single repository.
+
+### Functions
+
+```c
+void cgit_print_summary(void)
+```
+
+### Content
+
+- Repository metadata table (description, owner, homepage, clone URLs)
+- SPDX license detection from `LICENSES/` directory
+- CODEOWNERS and MAINTAINERS file detection
+- Badges display
+- Branch listing (limited by `summary-branches`)
+- Tag listing (limited by `summary-tags`)
+- Recent commits (limited by `summary-log`)
+- Snapshot download links
+- README rendering (via about-filter)
+
+### License Detection
+
+```c
+/* Scan for SPDX license identifiers */
+/* Check LICENSES/ directory for .txt files */
+/* Extract license names from filenames */
+```
+
+### README Priority
+
+README files are tried in order of `repo.readme` entries:
+
+1. `ref:README.md` — tracked file in a specific ref
+2. `:README.md` — tracked file in HEAD
+3. `/path/to/README.md` — file on disk
+
+## `ui-log.c` — Commit Log
+
+Renders a paginated list of commits.
+
+### Functions
+
+```c
+void cgit_print_log(const char *tip, int ofs, int cnt,
+ char *grep, char *pattern, char *path,
+ int pager, int commit_graph, int commit_sort)
+```
+
+### Features
+
+- Commit graph visualization (ASCII art)
+- File change count per commit (when `enable-log-filecount=1`)
+- Line count per commit (when `enable-log-linecount=1`)
+- Grep/search within commit messages
+- Path filtering (show commits affecting a specific path)
+- Follow renames (when `enable-follow-links=1`)
+- Pagination with next/prev links
+
+### Commit Graph Colors
+
+```c
+static const char *column_colors_html[] = {
+ "<span class='column1'>",
+ "<span class='column2'>",
+ "<span class='column3'>",
+ "<span class='column4'>",
+ "<span class='column5'>",
+ "<span class='column6'>",
+};
+```
+
+### Decorations
+
+```c
+static void show_commit_decorations(struct commit *commit)
+{
+ /* Display branch/tag labels next to commits */
+ /* Uses git's decoration API */
+}
+```
+
+## `ui-tree.c` — Tree View
+
+Renders directory listings and file contents.
+
+### Functions
+
+```c
+void cgit_print_tree(const char *rev, char *path)
+```
+
+### Directory Listing
+
+For each entry in a tree object:
+
+```c
+/* For each tree entry */
+switch (entry->mode) {
+ case S_IFDIR: /* directory → link to subtree */
+ case S_IFREG: /* regular file → link to blob */
+ case S_IFLNK: /* symlink → show target */
+ case S_IFGITLINK: /* submodule → link to submodule */
+}
+```
+
+### File Display
+
+```c
+static void print_text_buffer(const char *name, char *buf,
+ unsigned long size)
+{
+ /* Show file content with line numbers */
+ /* Apply source filter if configured */
+}
+
+static void print_binary_buffer(char *buf, unsigned long size)
+{
+ /* Show "Binary file (N bytes)" message */
+}
+```
+
+### Walk Tree Context
+
+```c
+struct walk_tree_context {
+ char *curr_rev;
+ char *match_path;
+ int state; /* 0=searching, 1=found, 2=printed */
+};
+```
+
+The tree walker recursively descends into subdirectories to find the
+requested path.
+
+## `ui-blob.c` — Blob View
+
+Displays individual file content or serves raw file data.
+
+### Functions
+
+```c
+void cgit_print_blob(const char *hex, char *path,
+ const char *head, int file_only)
+int cgit_ref_path_exists(const char *path, const char *ref, int file_only)
+char *cgit_ref_read_file(const char *path, const char *ref,
+ unsigned long *size)
+```
+
+### MIME Detection
+
+When serving raw content, MIME types are detected from:
+1. The `mimetype.<ext>` configuration directives
+2. The `mimetype-file` (Apache-style mime.types)
+3. Default: `application/octet-stream`
+
+## `ui-commit.c` — Commit View
+
+Displays full commit details.
+
+### Functions
+
+```c
+void cgit_print_commit(const char *rev, const char *prefix)
+```
+
+### Content
+
+- Author and committer info (name, email, date)
+- Commit subject and full message
+- Parent commit links
+- Git notes
+- Commit decorations (branches, tags)
+- Diffstat
+- Full diff (unified or side-by-side)
+
+### Notes Display
+
+```c
+/* Check for git notes */
+struct strbuf notes = STRBUF_INIT;
+format_display_notes(&commit->object.oid, &notes, ...);
+if (notes.len) {
+ html("<div class='notes-header'>Notes</div>");
+ html("<div class='notes'>");
+ html_txt(notes.buf);
+ html("</div>");
+}
+```
+
+## `ui-diff.c` — Diff View
+
+Renders diffs between commits or trees.
+
+### Functions
+
+```c
+void cgit_print_diff(const char *new_rev, const char *old_rev,
+ const char *prefix, int show_ctrls, int raw)
+void cgit_print_diffstat(const struct object_id *old,
+ const struct object_id *new,
+ const char *prefix)
+```
+
+See [diff-engine.md](diff-engine.md) for detailed documentation.
+
+## `ui-ssdiff.c` — Side-by-Side Diff
+
+Renders two-column diff view with character-level highlighting.
+
+### Functions
+
+```c
+void cgit_ssdiff_header_begin(void)
+void cgit_ssdiff_header_end(void)
+void cgit_ssdiff_footer(void)
+```
+
+See [diff-engine.md](diff-engine.md) for LCS algorithm details.
+
+## `ui-patch.c` — Patch Output
+
+Generates a downloadable patch file.
+
+### Functions
+
+```c
+void cgit_print_patch(const char *new_rev, const char *old_rev,
+ const char *prefix)
+```
+
+Output is `text/plain` content suitable for `git apply`. Uses Git's
+`rev_info` and `log_tree_commit` to generate the patch.
+
+## `ui-refs.c` — References View
+
+Displays branches and tags with sorting.
+
+### Functions
+
+```c
+void cgit_print_refs(void)
+void cgit_print_branches(int max)
+void cgit_print_tags(int max)
+```
+
+### Branch Display
+
+Each branch row shows:
+- Branch name (link to log)
+- Idle time
+- Author of last commit
+
+### Tag Display
+
+Each tag row shows:
+- Tag name (link to tag)
+- Idle time
+- Author/tagger
+- Download links (if snapshots enabled)
+
+### Sorting
+
+```c
+static int cmp_branch_age(const void *a, const void *b);
+static int cmp_tag_age(const void *a, const void *b);
+static int cmp_branch_name(const void *a, const void *b);
+static int cmp_tag_name(const void *a, const void *b);
+```
+
+Sort order is controlled by `branch-sort` (0=name, 1=age).
+
+## `ui-tag.c` — Tag View
+
+Displays details of a specific tag.
+
+### Functions
+
+```c
+void cgit_print_tag(const char *revname)
+```
+
+### Content
+
+For annotated tags:
+- Tagger name and date
+- Tag message
+- Tagged object link
+
+For lightweight tags:
+- Redirects to the tagged object (commit, tree, or blob)
+
+## `ui-stats.c` — Statistics View
+
+Displays contributor statistics by period.
+
+### Functions
+
+```c
+void cgit_print_stats(void)
+```
+
+### Periods
+
+```c
+struct cgit_period {
+ const char *name; /* "week", "month", "quarter", "year" */
+ int max_periods;
+ int count;
+ /* accessor functions for period boundaries */
+};
+```
+
+### Data Collection
+
+```c
+static void collect_stats(struct cgit_period *period)
+{
+ /* Walk commit log */
+ /* Group commits by author and period */
+ /* Count additions/deletions per period */
+}
+```
+
+### Output
+
+- Bar chart showing commits per period
+- Author ranking table
+- Sortable by commit count
+
+## `ui-atom.c` — Atom Feed
+
+Generates an Atom XML feed.
+
+### Functions
+
+```c
+void cgit_print_atom(char *tip, char *path, int max)
+```
+
+### Output
+
+```xml
+<?xml version='1.0' encoding='utf-8'?>
+<feed xmlns='http://www.w3.org/2005/Atom'>
+ <title>repo - log</title>
+ <updated>2024-01-01T00:00:00Z</updated>
+ <entry>
+ <title>commit subject</title>
+ <updated>2024-01-01T00:00:00Z</updated>
+ <author><name>Alice</name><email>alice@example.com</email></author>
+ <id>urn:sha1:abc123</id>
+ <link href='commit URL'/>
+ <content type='text'>commit message</content>
+ </entry>
+</feed>
+```
+
+Limited by `max-atom-items` (default 10).
+
+## `ui-plain.c` — Raw File Serving
+
+Serves file content with proper MIME types.
+
+### Functions
+
+```c
+void cgit_print_plain(void)
+```
+
+### Features
+
+- MIME type detection by file extension
+- Directory listing (HTML) when path is a tree
+- Binary file serving with correct Content-Type
+- Security: HTML serving gated by `enable-html-serving`
+
+### Security
+
+When `enable-html-serving=0` (default), HTML files are served as
+`text/plain` to prevent XSS.
+
+## `ui-blame.c` — Blame View
+
+Displays line-by-line blame information.
+
+### Functions
+
+```c
+void cgit_print_blame(void)
+```
+
+### Implementation
+
+Uses Git's `blame_scoreboard` API:
+
+```c
+/* Set up blame scoreboard */
+/* Walk file history */
+/* For each line, emit: commit hash, author, line content */
+```
+
+### Output
+
+Each line shows:
+- Abbreviated commit hash (linked to commit view)
+- Line number
+- File content
+
+Requires `enable-blame=1`.
+
+## `ui-clone.c` — HTTP Clone Endpoints
+
+Serves the smart HTTP clone protocol.
+
+### Functions
+
+```c
+void cgit_clone_info(void) /* GET info/refs */
+void cgit_clone_objects(void) /* GET objects/* */
+void cgit_clone_head(void) /* GET HEAD */
+```
+
+### `cgit_clone_info()`
+
+Enumerates all refs and their SHA-1 hashes:
+
+```c
+static void print_ref_info(const char *refname,
+ const struct object_id *oid, ...)
+{
+ /* Output: sha1\trefname\n */
+}
+```
+
+### `cgit_clone_objects()`
+
+Serves loose objects and pack files from the object store.
+
+### `cgit_clone_head()`
+
+Returns the symbolic HEAD reference.
+
+Requires `enable-http-clone=1` (default).
+
+## `ui-snapshot.c` — Archive Downloads
+
+See [snapshot-system.md](snapshot-system.md) for detailed documentation.
+
+## `ui-shared.c` — Common Infrastructure
+
+Provides shared layout and link generation used by all modules.
+
+See [html-rendering.md](html-rendering.md) for detailed documentation.
+
+### Key Functions
+
+- Page skeleton: `cgit_print_docstart()`, `cgit_print_pageheader()`,
+ `cgit_print_docend()`
+- Links: `cgit_commit_link()`, `cgit_tree_link()`, `cgit_log_link()`, etc.
+- URLs: `cgit_repourl()`, `cgit_fileurl()`, `cgit_pageurl()`
+- Errors: `cgit_print_error_page()`
diff --git a/docs/handbook/cgit/url-routing.md b/docs/handbook/cgit/url-routing.md
new file mode 100644
index 0000000000..0adb3b7fc5
--- /dev/null
+++ b/docs/handbook/cgit/url-routing.md
@@ -0,0 +1,331 @@
+# cgit — URL Routing and Request Dispatch
+
+## Overview
+
+cgit supports two URL schemes: virtual-root (path-based) and query-string.
+Incoming requests are parsed into a `cgit_query` structure and dispatched to
+one of 23 command handlers via a function pointer table.
+
+Source files: `cgit.c` (querystring parsing, routing), `parsing.c`
+(`cgit_parse_url`), `cmd.c` (command table).
+
+## URL Schemes
+
+### Virtual Root (Path-Based)
+
+When `virtual-root` is configured, URLs use clean paths:
+
+```
+/cgit/ → repository list
+/cgit/repo.git/ → summary
+/cgit/repo.git/log/ → log (default branch)
+/cgit/repo.git/log/main/path → log for path on branch main
+/cgit/repo.git/tree/v1.0/src/ → tree view at tag v1.0
+/cgit/repo.git/commit/?id=abc → commit view
+```
+
+The path after the virtual root is passed in `PATH_INFO` and parsed by
+`cgit_parse_url()`.
+
+### Query-String (CGI)
+
+Without virtual root, all parameters are passed in the query string:
+
+```
+/cgit.cgi?url=repo.git/log/main/path&ofs=50
+```
+
+## Query Structure
+
+All parsed parameters are stored in `ctx.qry`:
+
+```c
+struct cgit_query {
+ char *raw; /* raw URL / PATH_INFO */
+ char *repo; /* repository URL */
+ char *page; /* page/command name */
+ char *search; /* search string */
+ char *grep; /* grep pattern */
+ char *head; /* branch reference */
+ char *sha1; /* object SHA-1 */
+ char *sha2; /* second SHA-1 (for diffs) */
+ char *path; /* file/dir path within repo */
+ char *name; /* snapshot name / ref name */
+ char *url; /* combined URL path */
+ char *mimetype; /* requested MIME type */
+ char *etag; /* ETag from client */
+ int nohead; /* suppress header */
+ int ofs; /* pagination offset */
+ int has_symref; /* path contains a symbolic ref */
+ int has_sha1; /* explicit SHA was given */
+ int has_dot; /* path contains '..' */
+ int ignored; /* request should be ignored */
+ char *sort; /* sort field */
+ int showmsg; /* show full commit message */
+ int ssdiff; /* side-by-side diff */
+ int show_all; /* show all items */
+ int context; /* diff context lines */
+ int follow; /* follow renames */
+ int log_hierarchical_threading;
+};
+```
+
+## URL Parsing: `cgit_parse_url()`
+
+In `parsing.c`, the URL is decomposed into repo, page, and path:
+
+```c
+void cgit_parse_url(const char *url)
+{
+ /* Step 1: try progressively longer prefixes as repo URLs */
+ /* For each '/' in the URL, check if the prefix matches a repo */
+
+ for (p = strchr(url, '/'); p; p = strchr(p + 1, '/')) {
+ *p = '\0';
+ repo = cgit_get_repoinfo(url);
+ *p = '/';
+ if (repo) {
+ ctx.qry.repo = xstrdup(url_prefix);
+ ctx.repo = repo;
+ url = p + 1; /* remaining part */
+ break;
+ }
+ }
+ /* if no '/' found, try the whole URL as a repo name */
+
+ /* Step 2: parse the remaining path as page/ref/path */
+ /* e.g., "log/main/src/file.c" → page="log", path="main/src/file.c" */
+ p = strchr(url, '/');
+ if (p) {
+ ctx.qry.page = xstrndup(url, p - url);
+ ctx.qry.path = trim_end(p + 1, '/');
+ } else if (*url) {
+ ctx.qry.page = xstrdup(url);
+ }
+}
+```
+
+## Query String Parsing: `querystring_cb()`
+
+HTTP query parameters and POST form data are decoded by `querystring_cb()`
+in `cgit.c`. The function maps URL parameter names to `ctx.qry` fields:
+
+```c
+static void querystring_cb(const char *name, const char *value)
+{
+ if (!strcmp(name, "url")) ctx.qry.url = xstrdup(value);
+ else if (!strcmp(name, "p")) ctx.qry.page = xstrdup(value);
+ else if (!strcmp(name, "q")) ctx.qry.search = xstrdup(value);
+ else if (!strcmp(name, "h")) ctx.qry.head = xstrdup(value);
+ else if (!strcmp(name, "id")) ctx.qry.sha1 = xstrdup(value);
+ else if (!strcmp(name, "id2")) ctx.qry.sha2 = xstrdup(value);
+ else if (!strcmp(name, "ofs")) ctx.qry.ofs = atoi(value);
+ else if (!strcmp(name, "path")) ctx.qry.path = xstrdup(value);
+ else if (!strcmp(name, "name")) ctx.qry.name = xstrdup(value);
+ else if (!strcmp(name, "mimetype")) ctx.qry.mimetype = xstrdup(value);
+ else if (!strcmp(name, "s")) ctx.qry.sort = xstrdup(value);
+ else if (!strcmp(name, "showmsg")) ctx.qry.showmsg = atoi(value);
+ else if (!strcmp(name, "ss")) ctx.qry.ssdiff = atoi(value);
+ else if (!strcmp(name, "all")) ctx.qry.show_all = atoi(value);
+ else if (!strcmp(name, "context")) ctx.qry.context = atoi(value);
+ else if (!strcmp(name, "follow")) ctx.qry.follow = atoi(value);
+ else if (!strcmp(name, "dt")) ctx.qry.dt = atoi(value);
+ else if (!strcmp(name, "grep")) ctx.qry.grep = xstrdup(value);
+ else if (!strcmp(name, "etag")) ctx.qry.etag = xstrdup(value);
+}
+```
+
+### URL Parameter Reference
+
+| Parameter | Query Field | Type | Description |
+|-----------|------------|------|-------------|
+| `url` | `qry.url` | string | Full URL path (repo/page/path) |
+| `p` | `qry.page` | string | Page/command name |
+| `q` | `qry.search` | string | Search string |
+| `h` | `qry.head` | string | Branch/ref name |
+| `id` | `qry.sha1` | string | Object SHA-1 |
+| `id2` | `qry.sha2` | string | Second SHA-1 (diffs) |
+| `ofs` | `qry.ofs` | int | Pagination offset |
+| `path` | `qry.path` | string | File path in repo |
+| `name` | `qry.name` | string | Reference/snapshot name |
+| `mimetype` | `qry.mimetype` | string | MIME type override |
+| `s` | `qry.sort` | string | Sort field |
+| `showmsg` | `qry.showmsg` | int | Show full commit message |
+| `ss` | `qry.ssdiff` | int | Side-by-side diff toggle |
+| `all` | `qry.show_all` | int | Show all entries |
+| `context` | `qry.context` | int | Diff context lines |
+| `follow` | `qry.follow` | int | Follow renames in log |
+| `dt` | `qry.dt` | int | Diff type |
+| `grep` | `qry.grep` | string | Grep pattern for log search |
+| `etag` | `qry.etag` | string | ETag for conditional requests |
+
+## Command Dispatch Table
+
+The command table in `cmd.c` maps page names to handler functions:
+
+```c
+#define def_cmd(name, want_hierarchical, want_repo, want_layout, want_vpath, is_clone) \
+ {#name, cmd_##name, want_hierarchical, want_repo, want_layout, want_vpath, is_clone}
+
+static struct cgit_cmd cmds[] = {
+ def_cmd(atom, 1, 1, 0, 0, 0),
+ def_cmd(about, 0, 1, 1, 0, 0),
+ def_cmd(blame, 1, 1, 1, 1, 0),
+ def_cmd(blob, 1, 1, 0, 0, 0),
+ def_cmd(commit, 1, 1, 1, 1, 0),
+ def_cmd(diff, 1, 1, 1, 1, 0),
+ def_cmd(head, 1, 1, 0, 0, 1),
+ def_cmd(info, 1, 1, 0, 0, 1),
+ def_cmd(log, 1, 1, 1, 1, 0),
+ def_cmd(ls_cache,0, 0, 0, 0, 0),
+ def_cmd(objects, 1, 1, 0, 0, 1),
+ def_cmd(patch, 1, 1, 1, 1, 0),
+ def_cmd(plain, 1, 1, 0, 1, 0),
+ def_cmd(rawdiff, 1, 1, 0, 1, 0),
+ def_cmd(refs, 1, 1, 1, 0, 0),
+ def_cmd(repolist,0, 0, 1, 0, 0),
+ def_cmd(snapshot, 1, 1, 0, 0, 0),
+ def_cmd(stats, 1, 1, 1, 1, 0),
+ def_cmd(summary, 1, 1, 1, 0, 0),
+ def_cmd(tag, 1, 1, 1, 0, 0),
+ def_cmd(tree, 1, 1, 1, 1, 0),
+};
+```
+
+### Command Flags
+
+| Flag | Meaning |
+|------|---------|
+| `want_hierarchical` | Parse hierarchical path from URL |
+| `want_repo` | Requires a repository context |
+| `want_layout` | Render within HTML page layout |
+| `want_vpath` | Accept a virtual path (file path in repo) |
+| `is_clone` | HTTP clone protocol endpoint |
+
+### Lookup: `cgit_get_cmd()`
+
+```c
+struct cgit_cmd *cgit_get_cmd(const char *name)
+{
+ for (int i = 0; i < ARRAY_SIZE(cmds); i++)
+ if (!strcmp(cmds[i].name, name))
+ return &cmds[i];
+ return NULL;
+}
+```
+
+The function performs a linear search. With 21 entries, this is fast enough.
+
+## Request Processing Flow
+
+In `process_request()` (`cgit.c`):
+
+```
+1. Parse PATH_INFO via cgit_parse_url()
+2. Parse QUERY_STRING via http_parse_querystring(querystring_cb)
+3. Parse POST body (for authentication forms)
+4. Resolve repository: cgit_get_repoinfo(ctx.qry.repo)
+5. Determine command: cgit_get_cmd(ctx.qry.page)
+6. If no page specified:
+ - With repo → default to "summary"
+ - Without repo → default to "repolist"
+7. Check command flags:
+ - want_repo but no repo → "Repository not found" error
+ - is_clone and HTTP clone disabled → 404
+8. Handle authentication if auth-filter is configured
+9. Execute: cmd->fn(&ctx)
+```
+
+### Hierarchical Path Resolution
+
+When `want_hierarchical=1`, cgit splits `ctx.qry.path` into a reference
+(branch/tag/SHA) and a file path. It tries progressively longer prefixes
+of the path as git references until one resolves:
+
+```
+path = "main/src/lib/file.c"
+try: "main" → found branch "main"
+ qry.head = "main"
+ qry.path = "src/lib/file.c"
+```
+
+If no prefix resolves, the entire path is treated as a file path within the
+default branch.
+
+## Clone Protocol Endpoints
+
+Three commands serve the Git HTTP clone protocol:
+
+| Endpoint | Path | Function |
+|----------|------|----------|
+| `info` | `repo/info/refs` | `cgit_clone_info()` — advertise refs |
+| `objects` | `repo/objects/*` | `cgit_clone_objects()` — serve packfiles |
+| `head` | `repo/HEAD` | `cgit_clone_head()` — serve HEAD ref |
+
+These are only active when `enable-http-clone=1` (default).
+
+## URL Generation
+
+`ui-shared.c` provides URL construction helpers:
+
+```c
+const char *cgit_repourl(const char *reponame);
+const char *cgit_fileurl(const char *reponame, const char *pagename,
+ const char *filename, const char *query);
+const char *cgit_pageurl(const char *reponame, const char *pagename,
+ const char *query);
+const char *cgit_currurl(void);
+```
+
+When `virtual-root` is set, these produce clean paths. Otherwise, they
+produce query-string URLs.
+
+### Example URL generation:
+
+```c
+/* With virtual-root=/cgit/ */
+cgit_repourl("myrepo")
+ → "/cgit/myrepo/"
+
+cgit_fileurl("myrepo", "tree", "src/main.c", "h=dev")
+ → "/cgit/myrepo/tree/src/main.c?h=dev"
+
+cgit_pageurl("myrepo", "log", "ofs=50")
+ → "/cgit/myrepo/log/?ofs=50"
+```
+
+## Content-Type and HTTP Headers
+
+The response content type is set by the command handler before generating
+output. Common types:
+
+| Page | Content-Type |
+|------|-------------|
+| HTML pages | `text/html` |
+| atom | `text/xml` |
+| blob | auto-detected from content |
+| plain | MIME type from extension or `application/octet-stream` |
+| snapshot | `application/x-gzip`, etc. |
+| patch | `text/plain` |
+| clone endpoints | `text/plain`, `application/x-git-packed-objects` |
+
+Headers are emitted by `cgit_print_http_headers()` in `ui-shared.c` before
+any page content.
+
+## Error Handling
+
+If a requested repository or page is not found, cgit renders an error page
+within the standard layout. HTTP status codes:
+
+| Condition | Status |
+|-----------|--------|
+| Normal page | 200 OK |
+| Auth redirect | 302 Found |
+| Not modified | 304 Not Modified |
+| Bad request | 400 Bad Request |
+| Auth required | 401 Unauthorized |
+| Repo not found | 404 Not Found |
+| Page not found | 404 Not Found |
+
+The status code is set in `ctx.page.status` and emitted by the HTTP header
+function.
diff --git a/docs/handbook/ci/branch-strategy.md b/docs/handbook/ci/branch-strategy.md
new file mode 100644
index 0000000000..89535c9f54
--- /dev/null
+++ b/docs/handbook/ci/branch-strategy.md
@@ -0,0 +1,388 @@
+# Branch Strategy
+
+## Overview
+
+The Project Tick monorepo uses a structured branch naming convention that enables
+CI scripts to automatically classify branches, determine valid base branches for PRs,
+and decide which checks to run. The classification logic lives in
+`ci/supportedBranches.js`.
+
+---
+
+## Branch Naming Convention
+
+### Format
+
+```
+prefix[-version[-suffix]]
+```
+
+Where:
+- `prefix` — The branch type (e.g., `master`, `release`, `feature`)
+- `version` — Optional semantic version (e.g., `1.0`, `2.5.1`)
+- `suffix` — Optional additional descriptor (e.g., `pre`, `hotfix`)
+
+### Parsing Regex
+
+```javascript
+/(?<prefix>[a-zA-Z-]+?)(-(?<version>\d+\.\d+(?:\.\d+)?)(?:-(?<suffix>.*))?)?$/
+```
+
+This regex extracts three named groups:
+
+| Group | Description | Example: `release-2.5.1-hotfix` |
+|-----------|----------------------------------|---------------------------------|
+| `prefix` | Branch type identifier | `release` |
+| `version` | Semantic version number | `2.5.1` |
+| `suffix` | Additional descriptor | `hotfix` |
+
+### Parse Examples
+
+```javascript
+split('master')
+// { prefix: 'master', version: undefined, suffix: undefined }
+
+split('release-1.0')
+// { prefix: 'release', version: '1.0', suffix: undefined }
+
+split('release-2.5.1')
+// { prefix: 'release', version: '2.5.1', suffix: undefined }
+
+split('staging-1.0')
+// { prefix: 'staging', version: '1.0', suffix: undefined }
+
+split('staging-next-1.0')
+// { prefix: 'staging-next', version: '1.0', suffix: undefined }
+
+split('feature-new-ui')
+// { prefix: 'feature', version: undefined, suffix: undefined }
+// Note: "new-ui" doesn't match version pattern, so prefix consumes it
+
+split('fix-crash-on-start')
+// { prefix: 'fix', version: undefined, suffix: undefined }
+
+split('backport-123-to-release-1.0')
+// { prefix: 'backport', version: undefined, suffix: undefined }
+// Note: "123-to-release-1.0" doesn't start with a version, so no match
+
+split('dependabot-npm')
+// { prefix: 'dependabot', version: undefined, suffix: undefined }
+```
+
+---
+
+## Branch Classification
+
+### Type Configuration
+
+```javascript
+const typeConfig = {
+ master: ['development', 'primary'],
+ release: ['development', 'primary'],
+ staging: ['development', 'secondary'],
+ 'staging-next': ['development', 'secondary'],
+ feature: ['wip'],
+ fix: ['wip'],
+ backport: ['wip'],
+ revert: ['wip'],
+ wip: ['wip'],
+ dependabot: ['wip'],
+}
+```
+
+### Branch Types
+
+| Prefix | Type Tags | Description |
+|----------------|------------------------------|-------------------------------------|
+| `master` | `development`, `primary` | Main development branch |
+| `release` | `development`, `primary` | Release branches (e.g., `release-1.0`) |
+| `staging` | `development`, `secondary` | Pre-release staging |
+| `staging-next` | `development`, `secondary` | Next staging cycle |
+| `feature` | `wip` | Feature development branches |
+| `fix` | `wip` | Bug fix branches |
+| `backport` | `wip` | Backport branches |
+| `revert` | `wip` | Revert branches |
+| `wip` | `wip` | Work-in-progress branches |
+| `dependabot` | `wip` | Automated dependency updates |
+
+Any branch with an unrecognized prefix defaults to type `['wip']`.
+
+### Type Tag Meanings
+
+| Tag | Purpose |
+|--------------|-------------------------------------------------------------|
+| `development` | A long-lived branch that receives PRs |
+| `primary` | The main target for new work (master or release branches) |
+| `secondary` | A staging area — receives from primary, not from WIP directly |
+| `wip` | A short-lived branch created for a specific task |
+
+---
+
+## Order Configuration
+
+Branch ordering determines which branch is preferred when multiple branches are
+equally good candidates as PR base branches:
+
+```javascript
+const orderConfig = {
+ master: 0,
+ release: 1,
+ staging: 2,
+ 'staging-next': 3,
+}
+```
+
+| Branch Prefix | Order | Preference |
+|----------------|-------|------------------------------------------|
+| `master` | 0 | Highest — default target for new work |
+| `release` | 1 | Second — for release-specific changes |
+| `staging` | 2 | Third — staging area |
+| `staging-next` | 3 | Fourth — next staging cycle |
+| All others | `Infinity` | Lowest — not considered as base branches |
+
+If two branches have the same number of commits ahead of a PR head, the one with
+the lower order is preferred. This means `master` is preferred over `release-1.0`
+when both are equally close.
+
+---
+
+## Classification Function
+
+```javascript
+function classify(branch) {
+ const { prefix, version } = split(branch)
+ return {
+ branch,
+ order: orderConfig[prefix] ?? Infinity,
+ stable: version != null,
+ type: typeConfig[prefix] ?? ['wip'],
+ version: version ?? 'dev',
+ }
+}
+```
+
+### Output Fields
+
+| Field | Type | Description |
+|----------|----------|------------------------------------------------------|
+| `branch` | String | The original branch name |
+| `order` | Number | Sort priority (lower = preferred as base) |
+| `stable` | Boolean | `true` if the branch has a version suffix |
+| `type` | Array | Type tags from `typeConfig` |
+| `version` | String | Extracted version number, or `'dev'` if none |
+
+### Classification Examples
+
+```javascript
+classify('master')
+// { branch: 'master', order: 0, stable: false, type: ['development', 'primary'], version: 'dev' }
+
+classify('release-1.0')
+// { branch: 'release-1.0', order: 1, stable: true, type: ['development', 'primary'], version: '1.0' }
+
+classify('release-2.5.1')
+// { branch: 'release-2.5.1', order: 1, stable: true, type: ['development', 'primary'], version: '2.5.1' }
+
+classify('staging-1.0')
+// { branch: 'staging-1.0', order: 2, stable: true, type: ['development', 'secondary'], version: '1.0' }
+
+classify('staging-next-1.0')
+// { branch: 'staging-next-1.0', order: 3, stable: true, type: ['development', 'secondary'], version: '1.0' }
+
+classify('feature-new-ui')
+// { branch: 'feature-new-ui', order: Infinity, stable: false, type: ['wip'], version: 'dev' }
+
+classify('fix-crash-on-start')
+// { branch: 'fix-crash-on-start', order: Infinity, stable: false, type: ['wip'], version: 'dev' }
+
+classify('dependabot-npm')
+// { branch: 'dependabot-npm', order: Infinity, stable: false, type: ['wip'], version: 'dev' }
+
+classify('wip-experiment')
+// { branch: 'wip-experiment', order: Infinity, stable: false, type: ['wip'], version: 'dev' }
+
+classify('random-unknown-branch')
+// { branch: 'random-unknown-branch', order: Infinity, stable: false, type: ['wip'], version: 'dev' }
+```
+
+---
+
+## Branch Flow Model
+
+### Development Flow
+
+```
+┌─────────────────────────────────────────────┐
+│ master │
+│ (primary development, receives all work) │
+└──────────┬──────────────────────┬───────────┘
+ │ fork │ fork
+ ▼ ▼
+┌──────────────────┐ ┌──────────────────────┐
+│ staging-X.Y │ │ release-X.Y │
+│ (secondary, │ │ (primary, │
+│ pre-release) │ │ stable release) │
+└──────────────────┘ └──────────────────────┘
+```
+
+### WIP Branch Flow
+
+```
+ master (or release-X.Y)
+ │
+ ┌────┴────┐
+ │ fork │
+ ▼ │
+ feature-* │
+ fix-* │
+ backport-* │
+ wip-* │
+ │ │
+ └──── PR ─┘
+ (merged back)
+```
+
+### Typical Branch Lifecycle
+
+1. **Create** — Developer creates `feature-my-change` from `master`
+2. **Develop** — Commits follow Conventional Commits format
+3. **PR** — Pull request targets `master` (or the appropriate release branch)
+4. **CI Validation** — `prepare.js` classifies branches, `lint-commits.js` checks messages
+5. **Review** — Code review by owners defined in `ci/OWNERS`
+6. **Merge** — PR is merged into the target branch
+7. **Cleanup** — The WIP branch is deleted
+
+---
+
+## How CI Uses Branch Classification
+
+### Commit Linting Exemptions
+
+PRs between development branches skip commit linting:
+
+```javascript
+if (
+ baseBranchType.includes('development') &&
+ headBranchType.includes('development') &&
+ pr.base.repo.id === pr.head.repo?.id
+) {
+ core.info('This PR is from one development branch to another. Skipping checks.')
+ return
+}
+```
+
+Exempted transitions:
+- `staging` → `master`
+- `staging-next` → `staging`
+- `release-X.Y` → `master`
+
+### Base Branch Suggestion
+
+For WIP branches, `prepare.js` finds the optimal base:
+
+1. Start with `master` as a candidate
+2. Compare commit distances to all `release-*` branches (sorted newest first)
+3. The branch with fewest commits ahead is the best candidate
+4. On ties, lower `order` wins (master > release > staging)
+
+### Release Branch Targeting Warning
+
+When a non-backport/fix/revert branch targets a release branch:
+
+```
+Warning: This PR targets release branch `release-1.0`.
+New features should typically target `master`.
+```
+
+---
+
+## Version Extraction
+
+The `stable` flag and `version` field enable version-aware CI decisions:
+
+| Branch | `stable` | `version` | Interpretation |
+|-------------------|----------|-----------|--------------------------------|
+| `master` | `false` | `'dev'` | Development, no specific version |
+| `release-1.0` | `true` | `'1.0'` | Release 1.0 |
+| `release-2.5.1` | `true` | `'2.5.1'` | Release 2.5.1 |
+| `staging-1.0` | `true` | `'1.0'` | Staging for release 1.0 |
+| `feature-foo` | `false` | `'dev'` | WIP, no version association |
+
+Release branches are sorted by version (descending) when computing base branch
+suggestions, so `release-2.0` is checked before `release-1.0`.
+
+---
+
+## Module Exports
+
+The `supportedBranches.js` module exports two functions:
+
+```javascript
+module.exports = { classify, split }
+```
+
+| Function | Purpose |
+|-----------|----------------------------------------------------------|
+| `classify` | Full classification: type tags, order, stability, version|
+| `split` | Parse branch name into prefix, version, suffix |
+
+These are imported by:
+- `ci/github-script/lint-commits.js` — For commit linting exemptions
+- `ci/github-script/prepare.js` — For branch targeting validation
+
+---
+
+## Self-Testing
+
+When `supportedBranches.js` is run directly (not imported as a module), it executes
+built-in tests:
+
+```bash
+cd ci/
+node supportedBranches.js
+```
+
+Output:
+
+```
+split(branch)
+master { prefix: 'master', version: undefined, suffix: undefined }
+release-1.0 { prefix: 'release', version: '1.0', suffix: undefined }
+release-2.5.1 { prefix: 'release', version: '2.5.1', suffix: undefined }
+staging-1.0 { prefix: 'staging', version: '1.0', suffix: undefined }
+staging-next-1.0 { prefix: 'staging-next', version: '1.0', suffix: undefined }
+feature-new-ui { prefix: 'feature', version: undefined, suffix: undefined }
+fix-crash-on-start { prefix: 'fix', version: undefined, suffix: undefined }
+...
+
+classify(branch)
+master { branch: 'master', order: 0, stable: false, type: ['development', 'primary'], version: 'dev' }
+release-1.0 { branch: 'release-1.0', order: 1, stable: true, type: ['development', 'primary'], version: '1.0' }
+...
+```
+
+---
+
+## Adding New Branch Types
+
+To add a new branch type:
+
+1. Add the prefix and type tags to `typeConfig`:
+
+```javascript
+const typeConfig = {
+ // ... existing entries ...
+ 'hotfix': ['wip'], // or ['development', 'primary'] if it's a long-lived branch
+}
+```
+
+2. If it should be a base branch candidate, add it to `orderConfig`:
+
+```javascript
+const orderConfig = {
+ // ... existing entries ...
+ hotfix: 4, // lower number = higher preference
+}
+```
+
+3. Update the self-tests at the bottom of the file.
diff --git a/docs/handbook/ci/codeowners.md b/docs/handbook/ci/codeowners.md
new file mode 100644
index 0000000000..0054a168f1
--- /dev/null
+++ b/docs/handbook/ci/codeowners.md
@@ -0,0 +1,370 @@
+# CODEOWNERS
+
+## Overview
+
+Project Tick uses a code ownership system based on the `ci/OWNERS` file. This file
+follows the same syntax as GitHub's native `CODEOWNERS` file but is stored in a
+custom location and validated by a patched version of the
+[codeowners-validator](https://github.com/mszostok/codeowners-validator) tool.
+
+The OWNERS file serves two purposes:
+1. **Automated review routing** — PR authors know who to request reviews from
+2. **Structural validation** — CI checks that referenced paths and users exist
+
+---
+
+## File Location and Format
+
+### Location
+
+```
+ci/OWNERS
+```
+
+Unlike GitHub's native CODEOWNERS (which must be in `.github/CODEOWNERS`,
+`CODEOWNERS`, or `docs/CODEOWNERS`), Project Tick stores ownership data in
+`ci/OWNERS` to keep CI infrastructure colocated.
+
+### Syntax
+
+The file uses CODEOWNERS syntax:
+
+```
+# Comments start with #
+# Pattern followed by one or more @owner references
+/path/pattern/ @owner1 @owner2
+```
+
+### Header
+
+```
+# This file describes who owns what in the Project Tick CI infrastructure.
+# Users/teams will get review requests for PRs that change their files.
+#
+# This file uses the same syntax as the natively supported CODEOWNERS file,
+# see https://help.github.com/articles/about-codeowners/ for documentation.
+#
+# Validated by ci/codeowners-validator.
+```
+
+---
+
+## Ownership Map
+
+The OWNERS file maps every major directory and subdirectory in the monorepo to
+code owners. Below is the complete ownership mapping:
+
+### GitHub Infrastructure
+
+```
+/.github/actions/change-analysis/ @YongDo-Hyun
+/.github/actions/meshmc/package/ @YongDo-Hyun
+/.github/actions/meshmc/setup-dependencies/ @YongDo-Hyun
+/.github/actions/mnv/test_artefacts/ @YongDo-Hyun
+/.github/codeql/ @YongDo-Hyun
+/.github/ISSUE_TEMPLATE/ @YongDo-Hyun
+/.github/workflows/ @YongDo-Hyun
+```
+
+### Archived Projects
+
+```
+/archived/projt-launcher/ @YongDo-Hyun
+/archived/projt-minicraft-modpack/ @YongDo-Hyun
+/archived/projt-modpack/ @YongDo-Hyun
+/archived/ptlibzippy/ @YongDo-Hyun
+```
+
+### Core Projects
+
+```
+/cgit/* @YongDo-Hyun
+/cgit/contrib/* @YongDo-Hyun
+/cgit/contrib/hooks/ @YongDo-Hyun
+/cgit/filters/ @YongDo-Hyun
+/cgit/tests/ @YongDo-Hyun
+
+/cmark/* @YongDo-Hyun
+/cmark/api_test/ @YongDo-Hyun
+/cmark/bench/ @YongDo-Hyun
+/cmark/cmake/ @YongDo-Hyun
+/cmark/data/ @YongDo-Hyun
+/cmark/fuzz/ @YongDo-Hyun
+/cmark/man/ @YongDo-Hyun
+/cmark/src/ @YongDo-Hyun
+/cmark/test/ @YongDo-Hyun
+/cmark/tools/ @YongDo-Hyun
+/cmark/wrappers/ @YongDo-Hyun
+```
+
+### Corebinutils (every utility individually owned)
+
+```
+/corebinutils/* @YongDo-Hyun
+/corebinutils/cat/ @YongDo-Hyun
+/corebinutils/chflags/ @YongDo-Hyun
+/corebinutils/chmod/ @YongDo-Hyun
+/corebinutils/contrib/* @YongDo-Hyun
+/corebinutils/contrib/libc-vis/ @YongDo-Hyun
+/corebinutils/contrib/libedit/ @YongDo-Hyun
+/corebinutils/contrib/printf/ @YongDo-Hyun
+/corebinutils/cp/ @YongDo-Hyun
+...
+/corebinutils/uuidgen/ @YongDo-Hyun
+```
+
+### Other Projects
+
+```
+/forgewrapper/* @YongDo-Hyun
+/forgewrapper/gradle/ @YongDo-Hyun
+/forgewrapper/jigsaw/ @YongDo-Hyun
+/forgewrapper/src/ @YongDo-Hyun
+
+/genqrcode/* @YongDo-Hyun
+/genqrcode/cmake/ @YongDo-Hyun
+/genqrcode/tests/ @YongDo-Hyun
+/genqrcode/use/ @YongDo-Hyun
+
+/hooks/ @YongDo-Hyun
+/images4docker/ @YongDo-Hyun
+
+/json4cpp/* @YongDo-Hyun
+/json4cpp/.reuse/ @YongDo-Hyun
+/json4cpp/cmake/ @YongDo-Hyun
+/json4cpp/docs/ @YongDo-Hyun
+/json4cpp/include/* @YongDo-Hyun
+...
+
+/libnbtplusplus/* @YongDo-Hyun
+/libnbtplusplus/include/* @YongDo-Hyun
+...
+
+/LICENSES/ @YongDo-Hyun
+
+/meshmc/* @YongDo-Hyun
+/meshmc/branding/ @YongDo-Hyun
+/meshmc/buildconfig/ @YongDo-Hyun
+/meshmc/cmake/* @YongDo-Hyun
+/meshmc/launcher/* @YongDo-Hyun
+...
+```
+
+---
+
+## Pattern Syntax
+
+### Glob Rules
+
+| Pattern | Matches |
+|---------------|------------------------------------------------------|
+| `/path/` | All files directly under `path/` |
+| `/path/*` | All files directly under `path/` (explicit) |
+| `/path/**` | All files recursively under `path/` |
+| `*.js` | All `.js` files everywhere |
+| `/path/*.md` | All `.md` files directly under `path/` |
+
+### Ownership Resolution
+
+When multiple patterns match a file, the **last matching rule** wins (just like
+Git's `.gitignore` and GitHub's native CODEOWNERS):
+
+```
+/meshmc/* @teamA # Matches all direct files
+/meshmc/launcher/* @teamB # More specific — wins for launcher files
+```
+
+A PR modifying `meshmc/launcher/main.cpp` would require review from `@teamB`.
+
+### Explicit Directory Listing
+
+The OWNERS file explicitly lists individual subdirectories rather than using `**`
+recursive globs. This is intentional:
+
+1. **Precision** — Each directory has explicit ownership
+2. **Validation** — The codeowners-validator checks that each listed path exists
+3. **Documentation** — The file serves as a directory map of the monorepo
+
+---
+
+## Validation
+
+### codeowners-validator
+
+The CI runs a patched version of `codeowners-validator` against the OWNERS file.
+The tool is built from source with Project Tick–specific patches.
+
+#### What It Validates
+
+| Check | Description |
+|-------------------------|------------------------------------------------|
+| **Path existence** | All paths in OWNERS exist in the repository |
+| **User/team existence** | All `@` references are valid GitHub users/teams|
+| **Syntax** | Pattern syntax is valid CODEOWNERS format |
+| **No orphaned patterns** | Patterns match at least one file |
+
+#### Custom Patches
+
+Two patches are applied to the upstream validator:
+
+**1. Custom OWNERS file path** (`owners-file-name.patch`)
+
+```go
+func openCodeownersFile(dir string) (io.Reader, error) {
+ if file, ok := os.LookupEnv("OWNERS_FILE"); ok {
+ return fs.Open(file)
+ }
+ // ... default CODEOWNERS paths
+}
+```
+
+Set `OWNERS_FILE=ci/OWNERS` to validate the custom location.
+
+**2. Removed write-access requirement** (`permissions.patch`)
+
+GitHub's native CODEOWNERS requires that listed users have write access to the
+repository. Project Tick's OWNERS file is used for review routing, not branch
+protection, so this check is removed:
+
+```go
+// Before: required push permission
+if t.Permissions["push"] { return nil }
+return newValidateError("Team cannot review PRs...")
+
+// After: any team membership is sufficient
+return nil
+```
+
+Also removes the `github.ScopeReadOrg` requirement from required OAuth scopes,
+allowing the validator to work with tokens generated for GitHub Apps.
+
+### Running Validation Locally
+
+```bash
+cd ci/
+nix-shell # enters the CI dev shell with codeowners-validator available
+
+# Set the custom OWNERS file path:
+export OWNERS_FILE=ci/OWNERS
+
+# Run validation:
+codeowners-validator
+```
+
+Or build and run directly:
+
+```bash
+nix-build ci/ -A codeownersValidator
+OWNERS_FILE=ci/OWNERS ./result/bin/codeowners-validator
+```
+
+---
+
+## MAINTAINERS File Relationship
+
+In addition to `ci/OWNERS`, individual projects may have a `MAINTAINERS` file
+(e.g., `archived/projt-launcher/MAINTAINERS`):
+
+```
+# MAINTAINERS
+#
+# Fields:
+# - Name: Display name
+# - GitHub: GitHub handle (with @)
+# - Email: Primary contact email
+# - Paths: Comma-separated glob patterns (repo-relative)
+
+[Mehmet Samet Duman]
+GitHub: @YongDo-Hyun
+Email: yongdohyun@mail.projecttick.org
+Paths: **
+```
+
+The `MAINTAINERS` file provides additional metadata (email, display name) that
+`OWNERS` doesn't support. The two files serve complementary purposes:
+
+| File | Purpose | Format |
+|--------------|--------------------------------------|-------------------|
+| `ci/OWNERS` | Automated review routing via CI | CODEOWNERS syntax |
+| `MAINTAINERS`| Human-readable contact information | INI-style blocks |
+
+---
+
+## Review Requirements
+
+### How Reviews Are Triggered
+
+When a PR modifies files matching an OWNERS pattern:
+
+1. The workflow identifies which owners are responsible for the changed paths
+2. Review requests are sent to the matching owners
+3. At least one approving review from a code owner is typically required before merge
+
+### Bot-Managed Reviews
+
+The CI bot (`github-actions[bot]`) manages automated reviews via `ci/github-script/reviews.js`:
+- Reviews are tagged with a `reviewKey` comment for identification
+- When issues are resolved, bot reviews are automatically dismissed or minimized
+- The `CHANGES_REQUESTED` state blocks merge until the review is dismissed
+
+---
+
+## Adding Ownership Entries
+
+### For a New Project Directory
+
+1. Add ownership patterns to `ci/OWNERS`:
+
+```
+/newproject/* @owner-handle
+/newproject/src/ @owner-handle
+/newproject/tests/ @owner-handle
+```
+
+2. List every subdirectory explicitly (not just the top-level with `**`)
+
+3. Run the validator locally:
+
+```bash
+cd ci/
+nix-shell
+OWNERS_FILE=ci/OWNERS codeowners-validator
+```
+
+4. Commit with a CI scope:
+
+```
+ci(repo): add ownership for newproject
+```
+
+### For a New Team or User
+
+Simply reference the new `@handle` in the ownership patterns:
+
+```
+/some/path/ @existing-owner @new-owner
+```
+
+The validator will check that `@new-owner` exists in the GitHub organization.
+
+---
+
+## Limitations
+
+### No Recursive Globs in Current File
+
+The current OWNERS file uses explicit directory listings rather than `/**` recursive
+globs. This means:
+- New subdirectories must be manually added to OWNERS
+- Deeply nested directories need their own entries
+- The file can grow large for projects with many subdirectories
+
+### Single Organization Scope
+
+All `@` references must be members of the repository's GitHub organization,
+or GitHub users with access to the repository.
+
+### No Per-File Patterns
+
+The file doesn't currently use file-level patterns (e.g., `*.nix @nix-team`).
+Ownership is assigned at the directory level.
diff --git a/docs/handbook/ci/commit-linting.md b/docs/handbook/ci/commit-linting.md
new file mode 100644
index 0000000000..9b8e9cc97d
--- /dev/null
+++ b/docs/handbook/ci/commit-linting.md
@@ -0,0 +1,418 @@
+# Commit Linting
+
+## Overview
+
+Project Tick enforces the [Conventional Commits](https://www.conventionalcommits.org/)
+specification for all commit messages. The commit linter (`ci/github-script/lint-commits.js`)
+runs automatically on every pull request to validate that every commit follows the required
+format.
+
+This ensures:
+- Consistent, machine-readable commit history
+- Automated changelog generation potential
+- Clear communication of change intent (feature, fix, refactor, etc.)
+- Monorepo-aware scoping that maps commits to project directories
+
+---
+
+## Commit Message Format
+
+### Structure
+
+```
+type(scope): subject
+```
+
+### Examples
+
+```
+feat(mnv): add new keybinding support
+fix(meshmc): resolve crash on startup
+ci(neozip): update build matrix
+docs(cmark): fix API reference
+refactor(corebinutils): simplify ls output logic
+chore(deps): bump tomlplusplus to v4.0.0
+revert(forgewrapper): undo jigsaw module changes
+```
+
+### Rules
+
+| Rule | Requirement |
+|-------------------------------|----------------------------------------------------------|
+| **Type** | Must be one of the supported types (see below) |
+| **Scope** | Optional, but should match a known project directory |
+| **Subject** | Must follow the type/scope with `: ` (colon + space) |
+| **Trailing period** | Subject must NOT end with a period |
+| **Subject case** | Subject should start with a lowercase letter (warning) |
+| **No fixup/squash commits** | `fixup!`, `squash!`, `amend!` prefixes are rejected |
+| **Breaking changes** | Use `!` after type/scope: `feat(mnv)!: remove API` |
+
+---
+
+## Supported Types
+
+The following Conventional Commit types are recognized:
+
+```javascript
+const CONVENTIONAL_TYPES = [
+ 'build',
+ 'chore',
+ 'ci',
+ 'docs',
+ 'feat',
+ 'fix',
+ 'perf',
+ 'refactor',
+ 'revert',
+ 'style',
+ 'test',
+]
+```
+
+| Type | Use When |
+|-----------|-------------------------------------------------------------|
+| `build` | Changes to the build system or external dependencies |
+| `chore` | Routine tasks, no production code change |
+| `ci` | CI configuration files and scripts |
+| `docs` | Documentation only changes |
+| `feat` | A new feature |
+| `fix` | A bug fix |
+| `perf` | A performance improvement |
+| `refactor`| Code change that neither fixes a bug nor adds a feature |
+| `revert` | Reverts a previous commit |
+| `style` | Formatting, semicolons, whitespace (no code change) |
+| `test` | Adding or correcting tests |
+
+---
+
+## Known Scopes
+
+Scopes correspond to directories in the Project Tick monorepo:
+
+```javascript
+const KNOWN_SCOPES = [
+ 'archived',
+ 'cgit',
+ 'ci',
+ 'cmark',
+ 'corebinutils',
+ 'forgewrapper',
+ 'genqrcode',
+ 'hooks',
+ 'images4docker',
+ 'json4cpp',
+ 'libnbtplusplus',
+ 'meshmc',
+ 'meta',
+ 'mnv',
+ 'neozip',
+ 'tomlplusplus',
+ 'repo',
+ 'deps',
+]
+```
+
+### Special Scopes
+
+| Scope | Meaning |
+|----------|----------------------------------------------------|
+| `repo` | Changes affecting the repository as a whole |
+| `deps` | Dependency updates not scoped to a single project |
+
+### Unknown Scope Handling
+
+Using an unknown scope generates a **warning** (not an error):
+
+```
+Commit abc123456789: scope "myproject" is not a known project.
+Known scopes: archived, cgit, ci, cmark, ...
+```
+
+This allows new scopes to be introduced before updating the linter.
+
+---
+
+## Validation Logic
+
+### Regex Pattern
+
+The commit message is validated against this regex:
+
+```javascript
+const conventionalRegex = new RegExp(
+ `^(${CONVENTIONAL_TYPES.join('|')})(\\(([^)]+)\\))?(!)?: .+$`,
+)
+```
+
+Expanded, this matches:
+
+```
+^(build|chore|ci|docs|feat|fix|perf|refactor|revert|style|test) # type
+(\(([^)]+)\))? # optional (scope)
+(!)? # optional breaking change marker
+: .+$ # colon, space, and subject
+```
+
+### Validation Order
+
+For each commit in the PR:
+
+1. **Check for fixup/squash/amend** — If the message starts with `amend!`, `fixup!`, or
+ `squash!`, the commit fails immediately. These commits should be rebased before merging:
+
+ ```javascript
+ const fixups = ['amend!', 'fixup!', 'squash!']
+ if (fixups.some((s) => msg.startsWith(s))) {
+ core.error(
+ `${logPrefix}: starts with "${fixups.find((s) => msg.startsWith(s))}". ` +
+ 'Did you forget to run `git rebase -i --autosquash`?',
+ )
+ failures.add(commit.sha)
+ continue
+ }
+ ```
+
+2. **Check Conventional Commits format** — If the regex doesn't match, the commit fails:
+
+ ```javascript
+ if (!conventionalRegex.test(msg)) {
+ core.error(
+ `${logPrefix}: "${msg}" does not follow Conventional Commits format. ` +
+ 'Expected: type(scope): subject (e.g. "feat(mnv): add keybinding")',
+ )
+ failures.add(commit.sha)
+ continue
+ }
+ ```
+
+3. **Check trailing period** — Subjects ending with `.` fail:
+
+ ```javascript
+ if (msg.endsWith('.')) {
+ core.error(`${logPrefix}: subject should not end with a period.`)
+ failures.add(commit.sha)
+ }
+ ```
+
+4. **Warn on unknown scope** — Non-standard scopes produce a warning:
+
+ ```javascript
+ if (scope && !KNOWN_SCOPES.includes(scope)) {
+ core.warning(
+ `${logPrefix}: scope "${scope}" is not a known project. ` +
+ `Known scopes: ${KNOWN_SCOPES.join(', ')}`,
+ )
+ warnings.add(commit.sha)
+ }
+ ```
+
+5. **Warn on uppercase subject** — If the first character after `: ` is uppercase, warn:
+
+ ```javascript
+ const subjectStart = msg.indexOf(': ') + 2
+ if (subjectStart < msg.length) {
+ const firstChar = msg[subjectStart]
+ if (firstChar === firstChar.toUpperCase() && firstChar !== firstChar.toLowerCase()) {
+ core.warning(`${logPrefix}: subject should start with lowercase letter.`)
+ warnings.add(commit.sha)
+ }
+ }
+ ```
+
+---
+
+## Branch-Based Exemptions
+
+The linter skips validation for PRs between development branches:
+
+```javascript
+const baseBranchType = classify(pr.base.ref.replace(/^refs\/heads\//, '')).type
+const headBranchType = classify(pr.head.ref.replace(/^refs\/heads\//, '')).type
+
+if (
+ baseBranchType.includes('development') &&
+ headBranchType.includes('development') &&
+ pr.base.repo.id === pr.head.repo?.id
+) {
+ core.info('This PR is from one development branch to another. Skipping checks.')
+ return
+}
+```
+
+This exempts:
+- `staging` → `master` merges
+- `staging-next` → `staging` merges
+- `release-X.Y` → `master` merges
+
+These are infrastructure merges where commits were already validated in their original PRs.
+
+The `classify()` function from `supportedBranches.js` determines branch types:
+
+| Branch Prefix | Type | Exempt as PR source? |
+|----------------|-------------------------|---------------------|
+| `master` | `development`, `primary` | Yes |
+| `release-*` | `development`, `primary` | Yes |
+| `staging-*` | `development`, `secondary` | Yes |
+| `staging-next-*`| `development`, `secondary` | Yes |
+| `feature-*` | `wip` | No |
+| `fix-*` | `wip` | No |
+| `backport-*` | `wip` | No |
+
+---
+
+## Commit Detail Extraction
+
+The linter uses `get-pr-commit-details.js` to extract commit information. Notably,
+this uses **git directly** rather than the GitHub API:
+
+```javascript
+async function getCommitDetailsForPR({ core, pr, repoPath }) {
+ await runGit({
+ args: ['fetch', `--depth=1`, 'origin', pr.base.sha],
+ repoPath, core,
+ })
+ await runGit({
+ args: ['fetch', `--depth=${pr.commits + 1}`, 'origin', pr.head.sha],
+ repoPath, core,
+ })
+
+ const shas = (
+ await runGit({
+ args: [
+ 'rev-list',
+ `--max-count=${pr.commits}`,
+ `${pr.base.sha}..${pr.head.sha}`,
+ ],
+ repoPath, core,
+ })
+ ).stdout.split('\n').map((s) => s.trim()).filter(Boolean)
+```
+
+### Why Not Use the GitHub API?
+
+The GitHub REST API's "list commits on a PR" endpoint has a hard limit of **250 commits**.
+For large PRs or release-branch merges, this is insufficient. Using git directly:
+- Has no commit count limit
+- Also returns changed file paths per commit (used for scope validation)
+- Is faster for bulk operations
+
+For each commit, the script extracts:
+
+| Field | Source | Purpose |
+|----------------------|-----------------------------|---------------------------------|
+| `sha` | `git rev-list` | Commit identifier |
+| `subject` | `git log --format=%s` | First line of commit message |
+| `changedPaths` | `git log --name-only` | Files changed in that commit |
+| `changedPathSegments` | Path splitting | Directory segments for scope matching |
+
+---
+
+## Error Output
+
+### Failures (block merge)
+
+```
+Error: Commit abc123456789: "Add new feature" does not follow Conventional Commits format.
+Expected: type(scope): subject (e.g. "feat(mnv): add keybinding")
+
+Error: Commit def456789012: starts with "fixup!".
+Did you forget to run `git rebase -i --autosquash`?
+
+Error: Commit ghi789012345: subject should not end with a period.
+
+Error: Please review the Conventional Commits guidelines at
+<https://www.conventionalcommits.org/> and the project CONTRIBUTING.md.
+
+Error: 3 commit(s) do not follow commit conventions.
+```
+
+### Warnings (informational)
+
+```
+Warning: Commit jkl012345678: scope "myproject" is not a known project.
+Known scopes: archived, cgit, ci, cmark, ...
+
+Warning: Commit mno345678901: subject should start with lowercase letter.
+
+Warning: 2 commit(s) have minor issues (see warnings above).
+```
+
+---
+
+## Local Testing
+
+Test the commit linter locally using the CLI runner:
+
+```bash
+cd ci/github-script
+nix-shell # enter Nix dev shell
+gh auth login # authenticate with GitHub
+./run lint-commits YongDo-Hyun Project-Tick 123 # lint PR #123
+```
+
+The `./run` CLI uses the `commander` package and authenticates via `gh auth token`:
+
+```javascript
+program
+ .command('lint-commits')
+ .description('Lint commit messages for Conventional Commits compliance.')
+ .argument('<owner>', 'Repository owner (e.g. YongDo-Hyun)')
+ .argument('<repo>', 'Repository name (e.g. Project-Tick)')
+ .argument('<pr>', 'Pull Request number')
+ .action(async (owner, repo, pr) => {
+ const lint = (await import('./lint-commits.js')).default
+ await run(lint, owner, repo, pr)
+ })
+```
+
+---
+
+## Best Practices
+
+### Writing Good Commit Messages
+
+1. **Use the correct type** — `feat` for features, `fix` for bugs, `docs` for documentation
+2. **Include a scope** — Helps identify which project is affected: `feat(meshmc): ...`
+3. **Use imperative mood** — "add feature" not "added feature" or "adds feature"
+4. **Keep subject under 72 characters** — For readability in `git log`
+5. **Start with lowercase** — `add feature` not `Add feature`
+6. **No trailing period** — `fix(cgit): resolve parse error` not `fix(cgit): resolve parse error.`
+
+### Handling Fixup Commits During Development
+
+During development, you can use `git commit --fixup=<sha>` freely. Before opening
+the PR (or before requesting review), squash them:
+
+```bash
+git rebase -i --autosquash origin/master
+```
+
+### Multiple Scopes
+
+If a commit touches multiple projects, either:
+- Use `repo` as the scope: `refactor(repo): update shared build config`
+- Use the primary affected project as the scope
+- Split the commit into separate per-project commits
+
+---
+
+## Adding New Types or Scopes
+
+### New Scope
+
+Add the scope to the `KNOWN_SCOPES` array in `ci/github-script/lint-commits.js`:
+
+```javascript
+const KNOWN_SCOPES = [
+ 'archived',
+ 'cgit',
+ // ...
+ 'newproject', // ← add here (keep sorted)
+ // ...
+]
+```
+
+### New Type
+
+Adding new types requires updating `CONVENTIONAL_TYPES` — but this should be done
+rarely, as the standard Conventional Commits types cover most use cases.
diff --git a/docs/handbook/ci/formatting.md b/docs/handbook/ci/formatting.md
new file mode 100644
index 0000000000..9d2ddb35a4
--- /dev/null
+++ b/docs/handbook/ci/formatting.md
@@ -0,0 +1,298 @@
+# Code Formatting
+
+## Overview
+
+Project Tick uses [treefmt](https://github.com/numtide/treefmt) orchestrated through
+[treefmt-nix](https://github.com/numtide/treefmt-nix) to enforce consistent code formatting
+across the entire monorepo. The formatting configuration lives in `ci/default.nix` and
+covers JavaScript, Nix, YAML, GitHub Actions workflows, and sorted-list enforcement.
+
+---
+
+## Configured Formatters
+
+### Summary Table
+
+| Formatter | Language/Files | Key Settings |
+|-------------|-------------------------------|-------------------------------------------|
+| `actionlint` | GitHub Actions YAML | Default (syntax + best practices) |
+| `biome` | JavaScript / TypeScript | Single quotes, optional semicolons |
+| `keep-sorted`| Any (marked sections) | Default |
+| `nixfmt` | Nix expressions | nixfmt-rfc-style |
+| `yamlfmt` | YAML files | Retain line breaks |
+| `zizmor` | GitHub Actions YAML | Security scanning |
+
+---
+
+### actionlint
+
+**Purpose**: Validates GitHub Actions workflow files for syntax errors, type mismatches,
+and best practices.
+
+**Scope**: `.github/workflows/*.yml`
+
+**Configuration**: Default — no custom settings.
+
+```nix
+programs.actionlint.enable = true;
+```
+
+**What it catches**:
+- Invalid workflow syntax
+- Missing or incorrect `runs-on` values
+- Type mismatches in expressions
+- Unknown action references
+
+---
+
+### biome
+
+**Purpose**: Formats JavaScript and TypeScript source files with consistent style.
+
+**Scope**: All `.js` and `.ts` files except `*.min.js`
+
+**Configuration**:
+
+```nix
+programs.biome = {
+ enable = true;
+ validate.enable = false;
+ settings.formatter = {
+ useEditorconfig = true;
+ };
+ settings.javascript.formatter = {
+ quoteStyle = "single";
+ semicolons = "asNeeded";
+ };
+ settings.json.formatter.enabled = false;
+};
+settings.formatter.biome.excludes = [
+ "*.min.js"
+];
+```
+
+**Style rules**:
+
+| Setting | Value | Effect |
+|---------------------|----------------|-------------------------------------------|
+| `useEditorconfig` | `true` | Respects `.editorconfig` (indent, etc.) |
+| `quoteStyle` | `"single"` | Uses `'string'` instead of `"string"` |
+| `semicolons` | `"asNeeded"` | Only inserts `;` where ASI requires it |
+| `validate.enable` | `false` | No lint-level validation, only formatting |
+| `json.formatter` | `disabled` | JSON files are not formatted by biome |
+
+**Exclusions**: `*.min.js` — Minified JavaScript files are never reformatted.
+
+---
+
+### keep-sorted
+
+**Purpose**: Enforces alphabetical ordering in marked sections of any file type.
+
+**Scope**: Files containing `keep-sorted` markers.
+
+```nix
+programs.keep-sorted.enable = true;
+```
+
+**Usage**: Add markers around sections that should stay sorted:
+
+```
+# keep-sorted start
+apple
+banana
+cherry
+# keep-sorted end
+```
+
+---
+
+### nixfmt
+
+**Purpose**: Formats Nix expressions according to the RFC-style convention.
+
+**Scope**: All `.nix` files.
+
+```nix
+programs.nixfmt = {
+ enable = true;
+ package = pkgs.nixfmt;
+};
+```
+
+The `pkgs.nixfmt` package from the pinned Nixpkgs provides the formatter. This
+is `nixfmt-rfc-style`, the official Nix formatting standard.
+
+---
+
+### yamlfmt
+
+**Purpose**: Formats YAML files with consistent indentation and structure.
+
+**Scope**: All `.yml` and `.yaml` files.
+
+```nix
+programs.yamlfmt = {
+ enable = true;
+ settings.formatter = {
+ retain_line_breaks = true;
+ };
+};
+```
+
+**Key setting**: `retain_line_breaks = true` — Preserves intentional blank lines between
+YAML sections, preventing the formatter from collapsing the file into a dense block.
+
+---
+
+### zizmor
+
+**Purpose**: Security scanner for GitHub Actions workflows. Detects injection
+vulnerabilities, insecure defaults, and untrusted input handling.
+
+**Scope**: `.github/workflows/*.yml`
+
+```nix
+programs.zizmor.enable = true;
+```
+
+**What it detects**:
+- Script injection via `${{ github.event.* }}` in `run:` steps
+- Insecure use of `pull_request_target`
+- Unquoted expressions that could be exploited
+- Dangerous permission configurations
+
+---
+
+## treefmt Global Settings
+
+```nix
+projectRootFile = ".git/config";
+settings.verbose = 1;
+settings.on-unmatched = "debug";
+```
+
+| Setting | Value | Purpose |
+|--------------------|---------------|----------------------------------------------|
+| `projectRootFile` | `.git/config` | Identifies repository root for treefmt |
+| `settings.verbose` | `1` | Logs which files each formatter processes |
+| `settings.on-unmatched` | `"debug"` | Files with no matching formatter are logged at debug level |
+
+---
+
+## Running Formatters
+
+### In CI
+
+The formatting check runs as a Nix derivation:
+
+```bash
+nix-build ci/ -A fmt.check
+```
+
+This:
+1. Copies the full source tree (excluding `.git`) into the Nix store
+2. Runs all configured formatters
+3. Fails with a diff if any file would be reformatted
+
+### Locally (Nix Shell)
+
+```bash
+cd ci/
+nix-shell # enter CI dev shell
+treefmt # format all files
+treefmt --check # check without modifying (dry run)
+```
+
+### Locally (Nix Build)
+
+```bash
+# Just check (no modification):
+nix-build ci/ -A fmt.check
+
+# Get the formatter binary:
+nix-build ci/ -A fmt.pkg
+./result/bin/treefmt
+```
+
+---
+
+## Source Tree Construction
+
+The treefmt check operates on a clean copy of the source tree:
+
+```nix
+fs = pkgs.lib.fileset;
+src = fs.toSource {
+ root = ../.;
+ fileset = fs.difference ../. (fs.maybeMissing ../.git);
+};
+```
+
+This:
+- Takes the entire repository directory (`../.` from `ci/`)
+- Excludes the `.git` directory (which is large and irrelevant for formatting)
+- `fs.maybeMissing` handles the case where `.git` doesn't exist (e.g., in tarballs)
+
+The resulting source is passed to`fmt.check`:
+
+```nix
+check = treefmtEval.config.build.check src;
+```
+
+---
+
+## Formatter Outputs
+
+The formatting system exposes three Nix attributes:
+
+```nix
+{
+ shell = treefmtEval.config.build.devShell; # Interactive shell
+ pkg = treefmtEval.config.build.wrapper; # treefmt binary
+ check = treefmtEval.config.build.check src; # CI check derivation
+}
+```
+
+| Attribute | Use Case |
+|------------|--------------------------------------------------------|
+| `fmt.shell` | `nix develop .#fmt.shell` — interactive formatting |
+| `fmt.pkg` | The treefmt wrapper with all formatters bundled |
+| `fmt.check` | `nix build .#fmt.check` — CI formatting check |
+
+---
+
+## Troubleshooting
+
+### "File would be reformatted"
+
+If CI fails with formatting issues:
+
+```bash
+# Enter the CI shell to get the exact same formatter versions:
+cd ci/
+nix-shell
+
+# Format all files:
+treefmt
+
+# Stage and commit the changes:
+git add -u
+git commit -m "style(repo): apply treefmt formatting"
+```
+
+### Editor Integration
+
+For real-time formatting in VS Code:
+
+1. Use the biome extension for JavaScript/TypeScript
+2. Configure single quotes and optional semicolons to match CI settings
+3. Use nixpkgs-fmt or nixfmt for Nix files
+
+### Formatter Conflicts
+
+Each file type has exactly one formatter assigned by treefmt. If a file matches
+multiple formatters, treefmt reports a conflict. The current configuration avoids
+this by:
+- Disabling biome's JSON formatter
+- Having non-overlapping file type coverage
diff --git a/docs/handbook/ci/nix-infrastructure.md b/docs/handbook/ci/nix-infrastructure.md
new file mode 100644
index 0000000000..27481ed46a
--- /dev/null
+++ b/docs/handbook/ci/nix-infrastructure.md
@@ -0,0 +1,611 @@
+# Nix Infrastructure
+
+## Overview
+
+The CI system for the Project Tick monorepo is built on Nix, using pinned dependency
+sources to guarantee reproducible builds and formatting checks. The primary entry point
+is `ci/default.nix`, which bootstraps the complete CI toolchain from `ci/pinned.json`.
+
+This document covers the Nix expressions in detail: how they work, what they produce,
+and how they integrate with the broader Project Tick build infrastructure.
+
+---
+
+## ci/default.nix — The CI Entry Point
+
+The `default.nix` file is the sole entry point for all Nix-based CI operations. It:
+
+1. Reads pinned source revisions from `pinned.json`
+2. Fetches the exact Nixpkgs tarball
+3. Configures the treefmt multi-formatter
+4. Builds the codeowners-validator
+5. Exposes a development shell with all CI tools
+
+### Top-level Structure
+
+```nix
+let
+ pinned = (builtins.fromJSON (builtins.readFile ./pinned.json)).pins;
+in
+{
+ system ? builtins.currentSystem,
+ nixpkgs ? null,
+}:
+let
+ nixpkgs' =
+ if nixpkgs == null then
+ fetchTarball {
+ inherit (pinned.nixpkgs) url;
+ sha256 = pinned.nixpkgs.hash;
+ }
+ else
+ nixpkgs;
+
+ pkgs = import nixpkgs' {
+ inherit system;
+ config = { };
+ overlays = [ ];
+ };
+```
+
+### Function Parameters
+
+| Parameter | Default | Purpose |
+|-----------|------------------------------|-------------------------------------------------|
+| `system` | `builtins.currentSystem` | Target system (e.g., `x86_64-linux`) |
+| `nixpkgs` | `null` (uses pinned) | Override Nixpkgs source for development/testing |
+
+When `nixpkgs` is `null` (the default), the pinned revision is fetched. When provided
+explicitly, the override is used instead — useful for testing against newer Nixpkgs.
+
+### Importing Nixpkgs
+
+The Nixpkgs tarball is imported with empty config and no overlays:
+
+```nix
+pkgs = import nixpkgs' {
+ inherit system;
+ config = { };
+ overlays = [ ];
+};
+```
+
+This ensures a "pure" package set with no user-specific customizations that could
+break CI reproducibility.
+
+---
+
+## Pinned Dependencies (pinned.json)
+
+### Format
+
+The `pinned.json` file uses the [npins](https://github.com/andir/npins) v5 format. It
+stores Git-based pins with full provenance information:
+
+```json
+{
+ "pins": {
+ "nixpkgs": {
+ "type": "Git",
+ "repository": {
+ "type": "GitHub",
+ "owner": "NixOS",
+ "repo": "nixpkgs"
+ },
+ "branch": "nixpkgs-unstable",
+ "submodules": false,
+ "revision": "bde09022887110deb780067364a0818e89258968",
+ "url": "https://github.com/NixOS/nixpkgs/archive/bde09022887110deb780067364a0818e89258968.tar.gz",
+ "hash": "13mi187zpa4rw680qbwp7pmykjia8cra3nwvjqmsjba3qhlzif5l"
+ },
+ "treefmt-nix": {
+ "type": "Git",
+ "repository": {
+ "type": "GitHub",
+ "owner": "numtide",
+ "repo": "treefmt-nix"
+ },
+ "branch": "main",
+ "submodules": false,
+ "revision": "e96d59dff5c0d7fddb9d113ba108f03c3ef99eca",
+ "url": "https://github.com/numtide/treefmt-nix/archive/e96d59dff5c0d7fddb9d113ba108f03c3ef99eca.tar.gz",
+ "hash": "02gqyxila3ghw8gifq3mns639x86jcq079kvfvjm42mibx7z5fzb"
+ }
+ },
+ "version": 5
+}
+```
+
+### Pin Fields
+
+| Field | Description |
+|--------------|------------------------------------------------------------|
+| `type` | Source type (`Git`) |
+| `repository` | Source location (`GitHub` with owner + repo) |
+| `branch` | Upstream branch being tracked |
+| `submodules` | Whether to fetch Git submodules (`false`) |
+| `revision` | Full commit SHA of the pinned revision |
+| `url` | Direct tarball download URL for the pinned revision |
+| `hash` | SRI hash (base32) for integrity verification |
+
+### Why Two Pins?
+
+| Pin | Tracked Branch | Purpose |
+|---------------|----------------------|--------------------------------------------|
+| `nixpkgs` | `nixpkgs-unstable` | Base package set: compilers, tools, libraries |
+| `treefmt-nix` | `main` | Code formatter orchestrator and its modules |
+
+The `nixpkgs-unstable` branch is used rather than a release branch to get recent
+tool versions while still being reasonably stable.
+
+---
+
+## Updating Pinned Dependencies
+
+### update-pinned.sh
+
+The update script is minimal:
+
+```bash
+#!/usr/bin/env nix-shell
+#!nix-shell -i bash -p npins
+
+set -euo pipefail
+
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+npins --lock-file pinned.json update
+```
+
+This:
+
+1. Enters a `nix-shell` with `npins` available
+2. Changes to the `ci/` directory (where `pinned.json` lives)
+3. Runs `npins update` to fetch the latest commit from each tracked branch
+4. Updates `pinned.json` with new revisions and hashes
+
+### When to Update
+
+- **Regularly**: To pick up security patches and tool updates
+- **When a formatter change is needed**: New treefmt-nix releases may add formatters
+- **When CI breaks on upstream**: Pin to a known-good revision
+
+### Manual Update Procedure
+
+```bash
+# From the repository root:
+cd ci/
+./update-pinned.sh
+
+# Review the diff:
+git diff pinned.json
+
+# Test locally:
+nix-build -A fmt.check
+
+# Commit:
+git add pinned.json
+git commit -m "ci: update pinned nixpkgs and treefmt-nix"
+```
+
+---
+
+## treefmt Integration
+
+### What is treefmt?
+
+[treefmt](https://github.com/numtide/treefmt) is a multi-language formatter orchestrator.
+It runs multiple formatters in parallel and ensures every file type has exactly one formatter.
+The `treefmt-nix` module provides a Nix-native way to configure it.
+
+### Configuration in default.nix
+
+```nix
+fmt =
+ let
+ treefmtNixSrc = fetchTarball {
+ inherit (pinned.treefmt-nix) url;
+ sha256 = pinned.treefmt-nix.hash;
+ };
+ treefmtEval = (import treefmtNixSrc).evalModule pkgs {
+ projectRootFile = ".git/config";
+
+ settings.verbose = 1;
+ settings.on-unmatched = "debug";
+
+ programs.actionlint.enable = true;
+
+ programs.biome = {
+ enable = true;
+ validate.enable = false;
+ settings.formatter = {
+ useEditorconfig = true;
+ };
+ settings.javascript.formatter = {
+ quoteStyle = "single";
+ semicolons = "asNeeded";
+ };
+ settings.json.formatter.enabled = false;
+ };
+ settings.formatter.biome.excludes = [
+ "*.min.js"
+ ];
+
+ programs.keep-sorted.enable = true;
+
+ programs.nixfmt = {
+ enable = true;
+ package = pkgs.nixfmt;
+ };
+
+ programs.yamlfmt = {
+ enable = true;
+ settings.formatter = {
+ retain_line_breaks = true;
+ };
+ };
+
+ programs.zizmor.enable = true;
+ };
+```
+
+### treefmt Settings
+
+| Setting | Value | Purpose |
+|----------------------------|---------------|---------------------------------------------|
+| `projectRootFile` | `.git/config` | Marker file to detect the repository root |
+| `settings.verbose` | `1` | Show which formatter processes each file |
+| `settings.on-unmatched` | `"debug"` | Log unmatched files at debug level |
+
+### Configured Formatters
+
+#### actionlint
+- **Purpose**: Lint GitHub Actions workflow YAML files
+- **Scope**: `.github/workflows/*.yml`
+- **Configuration**: Default settings
+
+#### biome
+- **Purpose**: Format JavaScript and TypeScript files
+- **Configuration**:
+ - `useEditorconfig = true` — Respects `.editorconfig` settings
+ - `quoteStyle = "single"` — Uses single quotes
+ - `semicolons = "asNeeded"` — Only adds semicolons where required by ASI
+ - `validate.enable = false` — No lint-level validation, only formatting
+ - `json.formatter.enabled = false` — Does not format JSON files
+- **Exclusions**: `*.min.js` — Minified JavaScript files are skipped
+
+#### keep-sorted
+- **Purpose**: Enforces sorted order in marked sections (e.g., dependency lists)
+- **Configuration**: Default settings
+
+#### nixfmt
+- **Purpose**: Format Nix expressions
+- **Package**: Uses `pkgs.nixfmt` from the pinned Nixpkgs
+- **Configuration**: Default nixfmt-rfc-style formatting
+
+#### yamlfmt
+- **Purpose**: Format YAML files
+- **Configuration**:
+ - `retain_line_breaks = true` — Preserves intentional blank lines
+
+#### zizmor
+- **Purpose**: Security scanning for GitHub Actions workflows
+- **Configuration**: Default settings
+- **Detects**: Injection vulnerabilities, insecure defaults, untrusted inputs
+
+### Formatter Source Tree
+
+The treefmt evaluation creates a source tree from the repository, excluding `.git`:
+
+```nix
+fs = pkgs.lib.fileset;
+src = fs.toSource {
+ root = ../.;
+ fileset = fs.difference ../. (fs.maybeMissing ../.git);
+};
+```
+
+This ensures the formatting check operates on the full repository contents while
+avoiding Git internals.
+
+### Outputs
+
+The `fmt` attribute set exposes three derivations:
+
+```nix
+{
+ shell = treefmtEval.config.build.devShell; # nix develop .#fmt.shell
+ pkg = treefmtEval.config.build.wrapper; # treefmt binary
+ check = treefmtEval.config.build.check src; # nix build .#fmt.check
+}
+```
+
+| Output | Type | Purpose |
+|------------|-------------|--------------------------------------------------|
+| `fmt.shell` | Dev shell | Interactive shell with treefmt available |
+| `fmt.pkg` | Binary | The treefmt wrapper with all formatters configured|
+| `fmt.check` | Check | A Nix derivation that fails if any file needs reformatting |
+
+---
+
+## codeowners-validator Derivation
+
+### Purpose
+
+The codeowners-validator checks that the `ci/OWNERS` file is structurally valid:
+- All referenced paths exist in the repository
+- All referenced GitHub users/teams exist in the organization
+- Glob patterns are syntactically correct
+
+### Build Definition
+
+```nix
+{
+ buildGoModule,
+ fetchFromGitHub,
+ fetchpatch,
+}:
+buildGoModule {
+ name = "codeowners-validator";
+ src = fetchFromGitHub {
+ owner = "mszostok";
+ repo = "codeowners-validator";
+ rev = "f3651e3810802a37bd965e6a9a7210728179d076";
+ hash = "sha256-5aSmmRTsOuPcVLWfDF6EBz+6+/Qpbj66udAmi1CLmWQ=";
+ };
+ patches = [
+ (fetchpatch {
+ name = "user-write-access-check";
+ url = "https://github.com/mszostok/codeowners-validator/compare/f3651e3...840eeb8.patch";
+ hash = "sha256-t3Dtt8SP9nbO3gBrM0nRE7+G6N/ZIaczDyVHYAG/6mU=";
+ })
+ ./permissions.patch
+ ./owners-file-name.patch
+ ];
+ postPatch = "rm -r docs/investigation";
+ vendorHash = "sha256-R+pW3xcfpkTRqfS2ETVOwG8PZr0iH5ewroiF7u8hcYI=";
+}
+```
+
+### Patches Applied
+
+#### 1. user-write-access-check (upstream PR #222)
+Fetched from the upstream repository. Modifies the write-access validation logic.
+
+#### 2. permissions.patch
+Undoes part of the upstream PR's write-access requirement:
+
+```diff
+ var reqScopes = map[github.Scope]struct{}{
+- github.ScopeReadOrg: {},
+ }
+```
+
+And removes the push permission checks for teams and users:
+
+```diff
+ for _, t := range v.repoTeams {
+ if strings.EqualFold(t.GetSlug(), team) {
+- if t.Permissions["push"] {
+- return nil
+- }
+- return newValidateError(...)
++ return nil
+ }
+ }
+```
+
+This is necessary because Project Tick's OWNERS file is used for code review routing,
+not for GitHub's native branch protection rules. Contributors listed in OWNERS don't
+need write access to the repository.
+
+#### 3. owners-file-name.patch
+Adds support for a custom CODEOWNERS file path via the `OWNERS_FILE` environment variable:
+
+```diff
+ func openCodeownersFile(dir string) (io.Reader, error) {
++ if file, ok := os.LookupEnv("OWNERS_FILE"); ok {
++ return fs.Open(file)
++ }
++
+ var detectedFiles []string
+```
+
+This allows the validator to check `ci/OWNERS` instead of the default `.github/CODEOWNERS`
+or `CODEOWNERS` paths.
+
+---
+
+## CI Dev Shell
+
+The top-level `shell` attribute combines all CI tools:
+
+```nix
+shell = pkgs.mkShell {
+ packages = [
+ fmt.pkg
+ codeownersValidator
+ ];
+};
+```
+
+This provides:
+- `treefmt` — The configured multi-formatter
+- `codeowners-validator` — The patched OWNERS validator
+
+Enter the shell:
+
+```bash
+cd ci/
+nix-shell # or: nix develop
+treefmt # format all files
+codeowners-validator # validate OWNERS
+```
+
+---
+
+## github-script Nix Shell
+
+The `ci/github-script/shell.nix` provides a separate dev shell for JavaScript CI scripts:
+
+```nix
+{
+ system ? builtins.currentSystem,
+ pkgs ? (import ../../ci { inherit system; }).pkgs,
+}:
+
+pkgs.callPackage (
+ {
+ gh,
+ importNpmLock,
+ mkShell,
+ nodejs,
+ }:
+ mkShell {
+ packages = [
+ gh
+ importNpmLock.hooks.linkNodeModulesHook
+ nodejs
+ ];
+
+ npmDeps = importNpmLock.buildNodeModules {
+ npmRoot = ./.;
+ inherit nodejs;
+ };
+ }
+) { }
+```
+
+### Key Features
+
+1. **Shared Nixpkgs**: Imports the pinned `pkgs` from `../../ci` (the parent `default.nix`)
+2. **Node.js**: Full Node.js runtime for running CI scripts
+3. **GitHub CLI**: `gh` for authentication (`gh auth token` is used by the `run` CLI)
+4. **npm Lockfile Integration**: `importNpmLock` builds `node_modules` from `package-lock.json`
+ in the Nix store, then `linkNodeModulesHook` symlinks it into the working directory
+
+---
+
+## Relationship to Root flake.nix
+
+The root `flake.nix` defines the overall development environment:
+
+```nix
+{
+ description = "Project Tick is a project dedicated to providing developers
+ with ease of use and users with long-lasting software.";
+
+ inputs = {
+ nixpkgs.url = "https://channels.nixos.org/nixos-unstable/nixexprs.tar.xz";
+ };
+
+ outputs = { self, nixpkgs }:
+ let
+ systems = lib.systems.flakeExposed;
+ forAllSystems = lib.genAttrs systems;
+ nixpkgsFor = forAllSystems (system: nixpkgs.legacyPackages.${system});
+ in
+ {
+ devShells = forAllSystems (system: ...);
+ formatter = forAllSystems (system: nixpkgsFor.${system}.nixfmt-rfc-style);
+ };
+}
+```
+
+The flake's `inputs.nixpkgs` uses `nixos-unstable` via Nix channels, while the CI
+`pinned.json` uses a specific commit from `nixpkgs-unstable`. These are related but
+independently pinned — the flake updates when `flake.lock` is refreshed, while CI
+pins update only when `update-pinned.sh` is explicitly run.
+
+### When Each Is Used
+
+| Context | Nix Source |
+|-------------------|-----------------------------------------------|
+| `nix develop` | Root `flake.nix` → `flake.lock` → nixpkgs |
+| CI formatting check| `ci/default.nix` → `ci/pinned.json` → nixpkgs|
+| CI script dev shell| `ci/github-script/shell.nix` → `ci/default.nix` |
+
+---
+
+## Evaluation and Build Commands
+
+### Building the Format Check
+
+```bash
+# From repository root:
+nix-build ci/ -A fmt.check
+
+# Or with flakes:
+nix build .#fmt.check
+```
+
+This produces a derivation that:
+1. Copies the entire source tree (minus `.git`) into the Nix store
+2. Runs all configured formatters
+3. Fails with a diff if any file would be reformatted
+
+### Entering the CI Shell
+
+```bash
+# Nix classic:
+nix-shell ci/
+
+# Nix flakes:
+nix develop ci/
+```
+
+### Building codeowners-validator
+
+```bash
+nix-build ci/ -A codeownersValidator
+./result/bin/codeowners-validator
+```
+
+---
+
+## Troubleshooting
+
+### "hash mismatch" on pinned.json update
+
+If `update-pinned.sh` produces a hash mismatch, the upstream source has changed
+at the same branch tip. Re-run the update:
+
+```bash
+cd ci/
+./update-pinned.sh
+```
+
+### Formatter version mismatch
+
+If local formatting produces different results than CI:
+
+1. Ensure you're using the same Nixpkgs pin: `nix-shell ci/`
+2. Run `treefmt` from within the CI shell
+3. If the issue persists, update pins: `./update-pinned.sh`
+
+### codeowners-validator fails to build
+
+The Go module build requires network access for vendored dependencies. Ensure:
+- The `vendorHash` in `codeowners-validator/default.nix` matches the actual Go module checksum
+- If upstream dependencies change, update `vendorHash`
+
+---
+
+## Security Considerations
+
+- **Hash verification**: All fetched tarballs are verified against their SRI hashes
+- **No overlays**: Nixpkgs is imported with empty overlays to prevent supply-chain attacks
+- **Pinned revisions**: Exact commit SHAs prevent upstream branch tampering
+- **zizmor**: GitHub Actions workflows are scanned for injection vulnerabilities
+- **actionlint**: Workflow syntax is validated to catch misconfigurations
+
+---
+
+## Summary
+
+The Nix infrastructure provides:
+
+1. **Reproducibility** — Identical tools and versions across all CI runs and developer machines
+2. **Composability** — Each component (treefmt, codeowners-validator) is independently buildable
+3. **Security** — Hash-verified dependencies, security scanning, no arbitrary overlays
+4. **Developer experience** — `nix-shell` provides a ready-to-use environment with zero manual setup
diff --git a/docs/handbook/ci/overview.md b/docs/handbook/ci/overview.md
new file mode 100644
index 0000000000..19d42cfe2a
--- /dev/null
+++ b/docs/handbook/ci/overview.md
@@ -0,0 +1,494 @@
+# CI Infrastructure — Overview
+
+## Purpose
+
+The `ci/` directory contains the Continuous Integration infrastructure for the Project Tick monorepo.
+It provides reproducible builds, automated code quality checks, commit message validation,
+pull request lifecycle management, and code ownership enforcement — all orchestrated through
+Nix expressions and JavaScript-based GitHub Actions scripts.
+
+The CI system is designed around three core principles:
+
+1. **Reproducibility** — Pinned Nix dependencies ensure identical builds across environments
+2. **Conventional Commits** — Enforced commit message format for automated changelog generation
+3. **Ownership-driven review** — CODEOWNERS-style file ownership with automated review routing
+
+---
+
+## Directory Structure
+
+```
+ci/
+├── OWNERS # Code ownership file (CODEOWNERS format)
+├── README.md # CI README with local testing instructions
+├── default.nix # Nix CI entry point — treefmt, codeowners-validator, shell
+├── pinned.json # Pinned Nixpkgs + treefmt-nix revisions (npins format)
+├── update-pinned.sh # Script to update pinned.json via npins
+├── supportedBranches.js # Branch classification logic for CI decisions
+├── codeowners-validator/ # Builds codeowners-validator from source (Go)
+│ ├── default.nix # Nix derivation for codeowners-validator
+│ ├── owners-file-name.patch # Patch: custom OWNERS file path via OWNERS_FILE env var
+│ └── permissions.patch # Patch: remove write-access check (not needed for non-native CODEOWNERS)
+└── github-script/ # JavaScript CI scripts for GitHub Actions
+ ├── run # CLI entry point for local testing (commander-based)
+ ├── lint-commits.js # Commit message linter (Conventional Commits)
+ ├── prepare.js # PR preparation: mergeability, branch targeting, touched files
+ ├── reviews.js # Review lifecycle: post, dismiss, minimize bot reviews
+ ├── get-pr-commit-details.js # Extract commit SHAs, subjects, changed paths via git
+ ├── withRateLimit.js # GitHub API rate limiting with Bottleneck
+ ├── package.json # Node.js dependencies (@actions/core, @actions/github, bottleneck, commander)
+ ├── package-lock.json # Lockfile for reproducible npm installs
+ ├── shell.nix # Nix dev shell for github-script (Node.js + gh CLI)
+ ├── README.md # Local testing documentation
+ ├── .editorconfig # Editor configuration
+ ├── .gitignore # Git ignore rules
+ └── .npmrc # npm configuration
+```
+
+---
+
+## How CI Works End-to-End
+
+### 1. Triggering
+
+CI runs are triggered by GitHub Actions workflows (defined in `.github/workflows/`) when
+pull requests are opened, updated, or merged against supported branches. The `supportedBranches.js`
+module classifies branches to determine which checks to run.
+
+### 2. Environment Setup
+
+The CI environment is bootstrapped via `ci/default.nix`, which:
+
+- Reads pinned dependency revisions from `ci/pinned.json`
+- Fetches the exact Nixpkgs tarball at the pinned commit
+- Imports `treefmt-nix` for code formatting
+- Builds the `codeowners-validator` tool with Project Tick–specific patches
+- Exposes a development shell with all CI tools available
+
+```nix
+# ci/default.nix — entry point
+let
+ pinned = (builtins.fromJSON (builtins.readFile ./pinned.json)).pins;
+in
+{
+ system ? builtins.currentSystem,
+ nixpkgs ? null,
+}:
+let
+ nixpkgs' =
+ if nixpkgs == null then
+ fetchTarball {
+ inherit (pinned.nixpkgs) url;
+ sha256 = pinned.nixpkgs.hash;
+ }
+ else
+ nixpkgs;
+
+ pkgs = import nixpkgs' {
+ inherit system;
+ config = { };
+ overlays = [ ];
+ };
+```
+
+### 3. Code Formatting (treefmt)
+
+The `default.nix` configures `treefmt-nix` with multiple formatters:
+
+| Formatter | Purpose | Configuration |
+|-------------|--------------------------------------|----------------------------------------------|
+| `actionlint` | GitHub Actions workflow linting | Enabled, no custom config |
+| `biome` | JavaScript/TypeScript formatting | Single quotes, no semicolons, no JSON format |
+| `keep-sorted`| Sorted list enforcement | Enabled, no custom config |
+| `nixfmt` | Nix expression formatting | Uses `pkgs.nixfmt` |
+| `yamlfmt` | YAML formatting | Retains line breaks |
+| `zizmor` | GitHub Actions security scanning | Enabled, no custom config |
+
+Biome is configured with specific style rules:
+
+```nix
+programs.biome = {
+ enable = true;
+ validate.enable = false;
+ settings.formatter = {
+ useEditorconfig = true;
+ };
+ settings.javascript.formatter = {
+ quoteStyle = "single";
+ semicolons = "asNeeded";
+ };
+ settings.json.formatter.enabled = false;
+};
+settings.formatter.biome.excludes = [
+ "*.min.js"
+];
+```
+
+### 4. Commit Linting
+
+When a PR is opened or updated, `ci/github-script/lint-commits.js` validates every commit
+message against the Conventional Commits specification. It checks:
+
+- Format: `type(scope): subject`
+- No `fixup!`, `squash!`, or `amend!` prefixes (must be rebased before merge)
+- No trailing period on subject line
+- Lowercase first letter in subject
+- Known scopes matching monorepo project directories
+
+The supported types are:
+
+```javascript
+const CONVENTIONAL_TYPES = [
+ 'build', 'chore', 'ci', 'docs', 'feat', 'fix',
+ 'perf', 'refactor', 'revert', 'style', 'test',
+]
+```
+
+And the known scopes correspond to monorepo directories:
+
+```javascript
+const KNOWN_SCOPES = [
+ 'archived', 'cgit', 'ci', 'cmark', 'corebinutils',
+ 'forgewrapper', 'genqrcode', 'hooks', 'images4docker',
+ 'json4cpp', 'libnbtplusplus', 'meshmc', 'meta', 'mnv',
+ 'neozip', 'tomlplusplus', 'repo', 'deps',
+]
+```
+
+### 5. PR Preparation and Validation
+
+The `ci/github-script/prepare.js` script handles PR lifecycle:
+
+1. **Mergeability check** — Polls GitHub's mergeability computation with exponential backoff
+ (5s, 10s, 20s, 40s, 80s retries)
+2. **Branch classification** — Classifies base and head branches using `supportedBranches.js`
+3. **Base branch suggestion** — For WIP branches, computes the optimal base branch by comparing
+ merge-base commit distances across `master` and all release branches
+4. **Merge conflict detection** — If the PR has conflicts, uses the head SHA directly; otherwise
+ uses the merge commit SHA
+5. **Touched file detection** — Identifies which CI-relevant paths were modified:
+ - `ci` — any file under `ci/`
+ - `pinned` — `ci/pinned.json` specifically
+ - `github` — any file under `.github/`
+
+### 6. Review Lifecycle Management
+
+The `ci/github-script/reviews.js` module manages bot reviews:
+
+- **`postReview()`** — Posts or updates a review with a tracking comment tag
+ (`<!-- projt review key: <key>; resolved: false -->`)
+- **`dismissReviews()`** — Dismisses, minimizes (marks as outdated), or resolves bot reviews
+ when the underlying issue is fixed
+- Reviews are tagged with a `reviewKey` to allow multiple independent review concerns
+ on the same PR
+
+### 7. Rate Limiting
+
+All GitHub API calls go through `ci/github-script/withRateLimit.js`, which uses the
+Bottleneck library for request throttling:
+
+- Read requests: controlled by a reservoir updated from the GitHub rate limit API
+- Write requests (`POST`, `PUT`, `PATCH`, `DELETE`): minimum 1 second between calls
+- The reservoir keeps 1000 spare requests for other concurrent jobs
+- Reservoir is refreshed every 60 seconds
+- Requests to `github.com` (not the API), `/rate_limit`, and `/search/` endpoints bypass throttling
+
+### 8. Code Ownership Validation
+
+The `ci/codeowners-validator/` builds a patched version of the
+[codeowners-validator](https://github.com/mszostok/codeowners-validator) tool:
+
+- Fetched from GitHub at a specific pinned commit
+- Two patches applied:
+ - `owners-file-name.patch` — Adds support for custom CODEOWNERS file path via `OWNERS_FILE` env var
+ - `permissions.patch` — Removes the write-access permission check (not needed since Project Tick
+ uses an `OWNERS` file rather than GitHub's native `CODEOWNERS`)
+
+This validates the `ci/OWNERS` file against the actual repository structure and GitHub
+organization membership.
+
+---
+
+## Component Interaction Flow
+
+```
+┌─────────────────────────────────────────┐
+│ GitHub Actions Workflow │
+│ (.github/workflows/*.yml) │
+└──────────────┬──────────────────────────┘
+ │ triggers
+ ▼
+┌──────────────────────────────────────────┐
+│ ci/default.nix │
+│ ┌─────────┐ ┌──────────────────────┐ │
+│ │pinned. │ │ treefmt-nix │ │
+│ │json │──│ (formatting checks) │ │
+│ └─────────┘ └──────────────────────┘ │
+│ ┌──────────────────────┐ │
+│ │ codeowners-validator │ │
+│ │ (OWNERS validation) │ │
+│ └──────────────────────┘ │
+└──────────────┬───────────────────────────┘
+ │ also triggers
+ ▼
+┌──────────────────────────────────────────┐
+│ ci/github-script/ │
+│ ┌────────────────┐ ┌───────────────┐ │
+│ │ prepare.js │ │ lint-commits │ │
+│ │ (PR validation) │ │ (commit msg) │ │
+│ └───────┬────────┘ └──────┬────────┘ │
+│ │ │ │
+│ ┌───────▼────────┐ ┌──────▼────────┐ │
+│ │ reviews.js │ │ supported │ │
+│ │ (bot reviews) │ │ Branches.js │ │
+│ └───────┬────────┘ └───────────────┘ │
+│ │ │
+│ ┌───────▼────────┐ │
+│ │ withRateLimit │ │
+│ │ (API throttle) │ │
+│ └────────────────┘ │
+└──────────────────────────────────────────┘
+```
+
+---
+
+## Key Design Decisions
+
+### Why Nix for CI?
+
+Nix ensures that every CI run uses the exact same versions of tools, compilers, and
+libraries. The `pinned.json` file locks specific commits of Nixpkgs and treefmt-nix,
+eliminating "works on my machine" problems.
+
+### Why a custom OWNERS file?
+
+GitHub's native CODEOWNERS has limitations:
+- Must be in `.github/CODEOWNERS`, `CODEOWNERS`, or `docs/CODEOWNERS`
+- Requires repository write access for all listed owners
+- Cannot be extended with custom validation
+
+Project Tick uses `ci/OWNERS` with the same glob pattern syntax but adds:
+- Custom file path support (via the `OWNERS_FILE` environment variable)
+- No write-access requirement (via the permissions patch)
+- Integration with the codeowners-validator for structural validation
+
+### Why Bottleneck for rate limiting?
+
+GitHub Actions can run many jobs in parallel, and each job makes API calls. Without
+throttling, a large CI run could exhaust the GitHub API rate limit (5000 requests/hour
+for authenticated requests). Bottleneck provides:
+- Concurrency control (1 concurrent request by default)
+- Reservoir-based rate limiting (dynamically updated from the API)
+- Separate throttling for mutative requests (1 second minimum between writes)
+
+### Why local testing support?
+
+The `ci/github-script/run` CLI allows developers to test CI scripts locally before
+pushing. This accelerates development and reduces CI feedback loops:
+
+```bash
+cd ci/github-script
+nix-shell # sets up Node.js + dependencies
+gh auth login # authenticate with GitHub
+./run lint-commits YongDo-Hyun Project-Tick 123
+./run prepare YongDo-Hyun Project-Tick 123
+```
+
+---
+
+## Pinned Dependencies
+
+The CI system pins two external Nix sources:
+
+| Dependency | Source | Branch | Purpose |
+|-------------|----------------------------------------------|--------------------|--------------------------------|
+| `nixpkgs` | `github:NixOS/nixpkgs` | `nixpkgs-unstable` | Base package set for CI tools |
+| `treefmt-nix`| `github:numtide/treefmt-nix` | `main` | Multi-formatter orchestrator |
+
+Pins are stored in `ci/pinned.json` in npins v5 format:
+
+```json
+{
+ "pins": {
+ "nixpkgs": {
+ "type": "Git",
+ "repository": {
+ "type": "GitHub",
+ "owner": "NixOS",
+ "repo": "nixpkgs"
+ },
+ "branch": "nixpkgs-unstable",
+ "revision": "bde09022887110deb780067364a0818e89258968",
+ "url": "https://github.com/NixOS/nixpkgs/archive/bde09022887110deb780067364a0818e89258968.tar.gz",
+ "hash": "13mi187zpa4rw680qbwp7pmykjia8cra3nwvjqmsjba3qhlzif5l"
+ },
+ "treefmt-nix": {
+ "type": "Git",
+ "repository": {
+ "type": "GitHub",
+ "owner": "numtide",
+ "repo": "treefmt-nix"
+ },
+ "branch": "main",
+ "revision": "e96d59dff5c0d7fddb9d113ba108f03c3ef99eca",
+ "url": "https://github.com/numtide/treefmt-nix/archive/e96d59dff5c0d7fddb9d113ba108f03c3ef99eca.tar.gz",
+ "hash": "02gqyxila3ghw8gifq3mns639x86jcq079kvfvjm42mibx7z5fzb"
+ }
+ },
+ "version": 5
+}
+```
+
+To update pins:
+
+```bash
+cd ci/
+./update-pinned.sh
+```
+
+This runs `npins --lock-file pinned.json update` to fetch the latest revisions.
+
+---
+
+## Node.js Dependencies (github-script)
+
+The `ci/github-script/package.json` declares:
+
+```json
+{
+ "private": true,
+ "dependencies": {
+ "@actions/core": "1.11.1",
+ "@actions/github": "6.0.1",
+ "bottleneck": "2.19.5",
+ "commander": "14.0.3"
+ }
+}
+```
+
+| Package | Version | Purpose |
+|-------------------|----------|-----------------------------------------------|
+| `@actions/core` | `1.11.1` | GitHub Actions core utilities (logging, outputs) |
+| `@actions/github` | `6.0.1` | GitHub API client (Octokit wrapper) |
+| `bottleneck` | `2.19.5` | Rate limiting / request throttling |
+| `commander` | `14.0.3` | CLI argument parsing for local `./run` tool |
+
+These versions are kept in sync with the
+[actions/github-script](https://github.com/actions/github-script) action.
+
+---
+
+## Nix Dev Shell
+
+The `ci/github-script/shell.nix` provides a development environment for working on
+the CI scripts locally:
+
+```nix
+{
+ system ? builtins.currentSystem,
+ pkgs ? (import ../../ci { inherit system; }).pkgs,
+}:
+
+pkgs.callPackage (
+ {
+ gh,
+ importNpmLock,
+ mkShell,
+ nodejs,
+ }:
+ mkShell {
+ packages = [
+ gh
+ importNpmLock.hooks.linkNodeModulesHook
+ nodejs
+ ];
+
+ npmDeps = importNpmLock.buildNodeModules {
+ npmRoot = ./.;
+ inherit nodejs;
+ };
+ }
+) { }
+```
+
+This gives you:
+- `nodejs` — Node.js runtime
+- `gh` — GitHub CLI for authentication
+- `importNpmLock.hooks.linkNodeModulesHook` — Automatically links `node_modules` from the Nix store
+
+---
+
+## Outputs Exposed by default.nix
+
+The `ci/default.nix` exposes the following attributes:
+
+| Attribute | Type | Description |
+|----------------------|-----------|--------------------------------------------------|
+| `pkgs` | Nixpkgs | The pinned Nixpkgs package set |
+| `fmt.shell` | Derivation| Dev shell with treefmt formatter available |
+| `fmt.pkg` | Derivation| The treefmt wrapper binary |
+| `fmt.check` | Derivation| A check derivation that fails if formatting drifts|
+| `codeownersValidator`| Derivation| Patched codeowners-validator binary |
+| `shell` | Derivation| Combined CI dev shell (fmt + codeowners-validator)|
+
+```nix
+rec {
+ inherit pkgs fmt;
+ codeownersValidator = pkgs.callPackage ./codeowners-validator { };
+
+ shell = pkgs.mkShell {
+ packages = [
+ fmt.pkg
+ codeownersValidator
+ ];
+ };
+}
+```
+
+---
+
+## Integration with Root Flake
+
+The root `flake.nix` provides:
+
+- Dev shells for all supported systems (`aarch64-linux`, `x86_64-linux`, etc.)
+- A formatter (`nixfmt-rfc-style`)
+- The CI `default.nix` is imported indirectly via the flake for Nix-based CI runs
+
+```nix
+{
+ description = "Project Tick is a project dedicated to providing developers
+ with ease of use and users with long-lasting software.";
+
+ inputs = {
+ nixpkgs.url = "https://channels.nixos.org/nixos-unstable/nixexprs.tar.xz";
+ };
+ ...
+}
+```
+
+---
+
+## Summary of CI Checks
+
+| Check | Tool / Script | Scope |
+|--------------------------|---------------------------|------------------------------------|
+| Code formatting | treefmt (biome, nixfmt, yamlfmt, actionlint, zizmor) | All source files |
+| Commit message format | `lint-commits.js` | All commits in a PR |
+| PR mergeability | `prepare.js` | Every PR |
+| Base branch targeting | `prepare.js` + `supportedBranches.js` | WIP → development PRs |
+| Code ownership validity | `codeowners-validator` | `ci/OWNERS` file |
+| GitHub Actions security | `zizmor` (via treefmt) | `.github/workflows/*.yml` |
+| Sorted list enforcement | `keep-sorted` (via treefmt)| Files with keep-sorted markers |
+
+---
+
+## Related Documentation
+
+- [Nix Infrastructure](nix-infrastructure.md) — Deep dive into the Nix expressions
+- [Commit Linting](commit-linting.md) — Commit message conventions and validation rules
+- [PR Validation](pr-validation.md) — Pull request checks and lifecycle management
+- [Branch Strategy](branch-strategy.md) — Branch naming, classification, and release branches
+- [CODEOWNERS](codeowners.md) — Ownership file format and validation
+- [Formatting](formatting.md) — Code formatting configuration and tools
+- [Rate Limiting](rate-limiting.md) — GitHub API rate limiting strategy
diff --git a/docs/handbook/ci/pr-validation.md b/docs/handbook/ci/pr-validation.md
new file mode 100644
index 0000000000..f7933d3e75
--- /dev/null
+++ b/docs/handbook/ci/pr-validation.md
@@ -0,0 +1,378 @@
+# PR Validation
+
+## Overview
+
+The `ci/github-script/prepare.js` script runs on every pull request to validate
+mergeability, classify branches, suggest optimal base branches, detect merge conflicts,
+and identify which CI-relevant paths were touched. It also manages bot review comments
+to guide contributors toward correct PR targeting.
+
+---
+
+## What prepare.js Does
+
+1. **Checks PR state** — Ensures the PR is still open
+2. **Waits for mergeability** — Polls GitHub until mergeability is computed
+3. **Classifies branches** — Categorizes base and head branches using `supportedBranches.js`
+4. **Validates branch targeting** — Warns if a feature branch targets a release branch
+5. **Suggests better base branches** — For WIP branches, finds the optimal base by comparing
+ commit distances
+6. **Computes merge SHAs** — Determines the merge commit SHA and target comparison SHA
+7. **Detects touched CI paths** — Identifies changes to `ci/`, `ci/pinned.json`, `.github/`
+
+---
+
+## Mergeability Check
+
+GitHub computes merge status asynchronously. The script polls with exponential backoff:
+
+```javascript
+for (const retryInterval of [5, 10, 20, 40, 80]) {
+ core.info('Checking whether the pull request can be merged...')
+ const prInfo = (
+ await github.rest.pulls.get({
+ ...context.repo,
+ pull_number,
+ })
+ ).data
+
+ if (prInfo.state !== 'open') throw new Error('PR is not open anymore.')
+
+ if (prInfo.mergeable == null) {
+ core.info(
+ `GitHub is still computing mergeability, waiting ${retryInterval}s...`,
+ )
+ await new Promise((resolve) => setTimeout(resolve, retryInterval * 1000))
+ continue
+ }
+ // ... process PR
+}
+throw new Error(
+ 'Timed out waiting for GitHub to compute mergeability. Check https://www.githubstatus.com.',
+)
+```
+
+### Retry Schedule
+
+| Attempt | Wait Time | Cumulative Wait |
+|---------|-----------|-----------------|
+| 1 | 5 seconds | 5 seconds |
+| 2 | 10 seconds| 15 seconds |
+| 3 | 20 seconds| 35 seconds |
+| 4 | 40 seconds| 75 seconds |
+| 5 | 80 seconds| 155 seconds |
+
+If mergeability is still not computed after ~2.5 minutes, the script throws an error
+with a link to [githubstatus.com](https://www.githubstatus.com) for checking GitHub's
+system status.
+
+---
+
+## Branch Classification
+
+Both the base and head branches are classified using `supportedBranches.js`:
+
+```javascript
+const baseClassification = classify(base.ref)
+core.setOutput('base', baseClassification)
+
+const headClassification =
+ base.repo.full_name === head.repo.full_name
+ ? classify(head.ref)
+ : { type: ['wip'] }
+core.setOutput('head', headClassification)
+```
+
+### Fork Handling
+
+For cross-fork PRs (where the head repo differs from the base repo), the head branch
+is always classified as `{ type: ['wip'] }` regardless of its name. This prevents
+fork branches from being treated as development branches.
+
+### Classification Output
+
+Each classification produces:
+
+```javascript
+{
+ branch: 'release-1.0',
+ order: 1,
+ stable: true,
+ type: ['development', 'primary'],
+ version: '1.0',
+}
+```
+
+| Field | Description |
+|-----------|------------------------------------------------------|
+| `branch` | The full branch name |
+| `order` | Ranking for base-branch preference (lower = better) |
+| `stable` | Whether the branch has a version suffix |
+| `type` | Array of type tags |
+| `version` | Extracted version number, or `'dev'` |
+
+---
+
+## Release Branch Targeting Warning
+
+If a WIP branch (feature, fix, etc.) targets a stable release branch, the script
+checks whether it's a backport:
+
+```javascript
+if (
+ baseClassification.stable &&
+ baseClassification.type.includes('primary')
+) {
+ const headPrefix = head.ref.split('-')[0]
+ if (!['backport', 'fix', 'revert'].includes(headPrefix)) {
+ core.warning(
+ `This PR targets release branch \`${base.ref}\`. ` +
+ 'New features should typically target \`master\`.',
+ )
+ }
+}
+```
+
+| Head Branch Prefix | Allowed to target release? | Reason |
+|-------------------|---------------------------|---------------------|
+| `backport-*` | Yes | Explicit backport |
+| `fix-*` | Yes | Bug fix for release |
+| `revert-*` | Yes | Reverting a change |
+| `feature-*` | Warning issued | Should target master|
+| `wip-*` | Warning issued | Should target master|
+
+---
+
+## Base Branch Suggestion
+
+For WIP branches, the script computes the optimal base branch by analyzing commit
+distances from the head to all candidate base branches:
+
+### Algorithm
+
+1. **List all branches** — Fetch all branches in the repository via pagination
+2. **Filter candidates** — Keep `master` and all stable primary branches (release-*)
+3. **Compute merge bases** — For each candidate, find the merge-base commit with the
+ PR head and count commits between them
+
+```javascript
+async function mergeBase({ branch, order, version }) {
+ const { data } = await github.rest.repos.compareCommitsWithBasehead({
+ ...context.repo,
+ basehead: `${branch}...${head.sha}`,
+ per_page: 1,
+ page: 2,
+ })
+ return {
+ branch,
+ order,
+ version,
+ commits: data.total_commits,
+ sha: data.merge_base_commit.sha,
+ }
+}
+```
+
+4. **Select the best** — The branch with the fewest commits ahead wins. If there's a tie,
+ the branch with the lowest `order` wins (i.e., `master` over `release-*`).
+
+```javascript
+let candidates = [await mergeBase(classify('master'))]
+for (const release of releases) {
+ const nextCandidate = await mergeBase(release)
+ if (candidates[0].commits === nextCandidate.commits)
+ candidates.push(nextCandidate)
+ if (candidates[0].commits > nextCandidate.commits)
+ candidates = [nextCandidate]
+ if (candidates[0].commits < 10000) break
+}
+
+const best = candidates.sort((a, b) => a.order - b.order).at(0)
+```
+
+5. **Post review if mismatch** — If the suggested base differs from the current base,
+ a bot review is posted:
+
+```javascript
+if (best.branch !== base.ref) {
+ const current = await mergeBase(classify(base.ref))
+ const body = [
+ `This PR targets \`${current.branch}\`, but based on the commit history ` +
+ `\`${best.branch}\` appears to be a better fit ` +
+ `(${current.commits - best.commits} fewer commits ahead).`,
+ '',
+ `If this is intentional, you can ignore this message. Otherwise:`,
+ `- [Change the base branch](...) to \`${best.branch}\`.`,
+ ].join('\n')
+
+ await postReview({ github, context, core, dry, body, reviewKey })
+}
+```
+
+6. **Dismiss reviews if correct** — If the base branch matches the suggestion, any
+ previous bot reviews are dismissed.
+
+### Early Termination
+
+The algorithm stops evaluating release branches once the candidate count drops below
+10,000 commits. This prevents unnecessary API calls for branches that are clearly
+not good candidates.
+
+---
+
+## Merge SHA Computation
+
+The script computes two key SHAs for downstream CI jobs:
+
+### Mergeable PR
+
+```javascript
+if (prInfo.mergeable) {
+ core.info('The PR can be merged.')
+ mergedSha = prInfo.merge_commit_sha
+ targetSha = (
+ await github.rest.repos.getCommit({
+ ...context.repo,
+ ref: prInfo.merge_commit_sha,
+ })
+ ).data.parents[0].sha
+}
+```
+
+- `mergedSha` — GitHub's trial merge commit SHA
+- `targetSha` — The first parent of the merge commit (base branch tip)
+
+### Conflicting PR
+
+```javascript
+else {
+ core.warning('The PR has a merge conflict.')
+ mergedSha = head.sha
+ targetSha = (
+ await github.rest.repos.compareCommitsWithBasehead({
+ ...context.repo,
+ basehead: `${base.sha}...${head.sha}`,
+ })
+ ).data.merge_base_commit.sha
+}
+```
+
+- `mergedSha` — Falls back to the head SHA (no merge commit exists)
+- `targetSha` — The merge-base between base and head
+
+---
+
+## Touched Path Detection
+
+The script identifies which CI-relevant paths were modified in the PR:
+
+```javascript
+const files = (
+ await github.paginate(github.rest.pulls.listFiles, {
+ ...context.repo,
+ pull_number,
+ per_page: 100,
+ })
+).map((file) => file.filename)
+
+const touched = []
+if (files.some((f) => f.startsWith('ci/'))) touched.push('ci')
+if (files.includes('ci/pinned.json')) touched.push('pinned')
+if (files.some((f) => f.startsWith('.github/'))) touched.push('github')
+core.setOutput('touched', touched)
+```
+
+| Touched Tag | Condition | Use Case |
+|------------|------------------------------------------|---------------------------------|
+| `ci` | Any file under `ci/` was changed | Re-run CI infrastructure checks |
+| `pinned` | `ci/pinned.json` specifically changed | Validate pin integrity |
+| `github` | Any file under `.github/` was changed | Re-run workflow lint checks |
+
+---
+
+## Outputs
+
+The script sets the following outputs for downstream workflow jobs:
+
+| Output | Type | Description |
+|-------------|--------|---------------------------------------------------|
+| `base` | Object | Base branch classification (branch, type, version) |
+| `head` | Object | Head branch classification |
+| `mergedSha` | String | Merge commit SHA (or head SHA if conflicting) |
+| `targetSha` | String | Base comparison SHA |
+| `touched` | Array | Which CI-relevant paths were modified |
+
+---
+
+## Review Lifecycle
+
+The `prepare.js` script integrates with `reviews.js` for bot review management:
+
+### Posting a Review
+
+When the script detects a branch targeting issue, it posts a `REQUEST_CHANGES` review:
+
+```javascript
+await postReview({ github, context, core, dry, body, reviewKey: 'prepare' })
+```
+
+The review body includes:
+- A description of the issue
+- A comparison of commit distances
+- A link to GitHub's "change base branch" documentation
+
+### Dismissing Reviews
+
+When the issue is resolved (correct base branch), previous reviews are dismissed:
+
+```javascript
+await dismissReviews({ github, context, core, dry, reviewKey: 'prepare' })
+```
+
+The `reviewKey` (`'prepare'`) ensures only reviews posted by this script are affected.
+
+---
+
+## Dry Run Mode
+
+When the `--no-dry` flag is NOT passed (default in local testing), all mutative
+operations (posting/dismissing reviews) are skipped:
+
+```javascript
+module.exports = async ({ github, context, core, dry }) => {
+ // ...
+ if (!dry) {
+ await github.rest.pulls.createReview({ ... })
+ }
+}
+```
+
+This allows safe local testing without modifying real PRs.
+
+---
+
+## Local Testing
+
+```bash
+cd ci/github-script
+nix-shell
+gh auth login
+
+# Dry run (default — no changes to the PR):
+./run prepare YongDo-Hyun Project-Tick 123
+
+# Live run (actually posts/dismisses reviews):
+./run prepare YongDo-Hyun Project-Tick 123 --no-dry
+```
+
+---
+
+## Error Conditions
+
+| Condition | Behavior |
+|-------------------------------------|----------------------------------------------|
+| PR is closed | Throws: `"PR is not open anymore."` |
+| Mergeability timeout | Throws: `"Timed out waiting for GitHub..."` |
+| API rate limit exceeded | Handled by `withRateLimit.js` |
+| Merge conflict | Warning issued; head SHA used as mergedSha |
+| Wrong base branch | REQUEST_CHANGES review posted |
diff --git a/docs/handbook/ci/rate-limiting.md b/docs/handbook/ci/rate-limiting.md
new file mode 100644
index 0000000000..4b349ee2b4
--- /dev/null
+++ b/docs/handbook/ci/rate-limiting.md
@@ -0,0 +1,321 @@
+# Rate Limiting
+
+## Overview
+
+The CI system interacts heavily with the GitHub REST API for PR validation, commit
+analysis, review management, and branch comparison. To prevent exhausting the
+GitHub API rate limit (5,000 requests/hour for authenticated tokens), all API calls
+are routed through `ci/github-script/withRateLimit.js`, which uses the
+[Bottleneck](https://github.com/SGrondin/bottleneck) library for request throttling.
+
+---
+
+## Architecture
+
+### Request Flow
+
+```
+┌──────────────────────────┐
+│ CI Script │
+│ (lint-commits.js, │
+│ prepare.js, etc.) │
+└────────────┬─────────────┘
+ │ github.rest.*
+ ▼
+┌──────────────────────────┐
+│ withRateLimit wrapper │
+│ ┌──────────────────┐ │
+│ │ allLimits │ │ ← Bottleneck (maxConcurrent: 1, reservoir: dynamic)
+│ │ (all requests) │ │
+│ └──────────────────┘ │
+│ ┌──────────────────┐ │
+│ │ writeLimits │ │ ← Bottleneck (minTime: 1000ms) chained to allLimits
+│ │ (POST/PUT/PATCH/ │ │
+│ │ DELETE only) │ │
+│ └──────────────────┘ │
+└────────────┬─────────────┘
+ │
+ ▼
+┌──────────────────────────┐
+│ GitHub REST API │
+│ api.github.com │
+└──────────────────────────┘
+```
+
+---
+
+## Implementation
+
+### Module Signature
+
+```javascript
+module.exports = async ({ github, core, maxConcurrent = 1 }, callback) => {
+```
+
+| Parameter | Type | Default | Description |
+|----------------|----------|---------|--------------------------------------|
+| `github` | Object | — | Octokit instance from `@actions/github` |
+| `core` | Object | — | `@actions/core` for logging |
+| `maxConcurrent` | Number | `1` | Maximum concurrent API requests |
+| `callback` | Function| — | The script logic to execute |
+
+### Bottleneck Configuration
+
+Two Bottleneck limiters are configured:
+
+#### allLimits — Controls all requests
+
+```javascript
+const allLimits = new Bottleneck({
+ maxConcurrent,
+ reservoir: 0, // Updated dynamically
+})
+```
+
+- `maxConcurrent: 1` — Only one API request at a time (prevents burst usage)
+- `reservoir: 0` — Starts empty; updated by `updateReservoir()` before first use
+
+#### writeLimits — Additional throttle for mutative requests
+
+```javascript
+const writeLimits = new Bottleneck({ minTime: 1000 }).chain(allLimits)
+```
+
+- `minTime: 1000` — At least 1 second between write requests
+- `.chain(allLimits)` — Write requests also go through the global limiter
+
+---
+
+## Request Classification
+
+The Octokit `request` hook intercepts every API call and routes it through
+the appropriate limiter:
+
+```javascript
+github.hook.wrap('request', async (request, options) => {
+ // Bypass: different host (e.g., github.com for raw downloads)
+ if (options.url.startsWith('https://github.com')) return request(options)
+
+ // Bypass: rate limit endpoint (doesn't count against quota)
+ if (options.url === '/rate_limit') return request(options)
+
+ // Bypass: search endpoints (separate rate limit pool)
+ if (options.url.startsWith('/search/')) return request(options)
+
+ stats.requests++
+
+ if (['POST', 'PUT', 'PATCH', 'DELETE'].includes(options.method))
+ return writeLimits.schedule(request.bind(null, options))
+ else
+ return allLimits.schedule(request.bind(null, options))
+})
+```
+
+### Bypass Rules
+
+| URL Pattern | Reason |
+|-------------------------------|---------------------------------------------|
+| `https://github.com/*` | Raw file downloads, not API calls |
+| `/rate_limit` | Meta-endpoint, doesn't count against quota |
+| `/search/*` | Separate rate limit pool (30 requests/min) |
+
+### Request Routing
+
+| HTTP Method | Limiter | Throttle Rule |
+|----------------------|------------------|----------------------------------|
+| `GET` | `allLimits` | Concurrency-limited + reservoir |
+| `POST` | `writeLimits` | 1 second minimum + concurrency |
+| `PUT` | `writeLimits` | 1 second minimum + concurrency |
+| `PATCH` | `writeLimits` | 1 second minimum + concurrency |
+| `DELETE` | `writeLimits` | 1 second minimum + concurrency |
+
+---
+
+## Reservoir Management
+
+### Dynamic Reservoir Updates
+
+The reservoir tracks how many API requests the script is allowed to make:
+
+```javascript
+async function updateReservoir() {
+ let response
+ try {
+ response = await github.rest.rateLimit.get()
+ } catch (err) {
+ core.error(`Failed updating reservoir:\n${err}`)
+ return
+ }
+ const reservoir = Math.max(0, response.data.resources.core.remaining - 1000)
+ core.info(`Updating reservoir to: ${reservoir}`)
+ allLimits.updateSettings({ reservoir })
+}
+```
+
+### Reserve Buffer
+
+The script always keeps **1,000 spare requests** for other concurrent jobs:
+
+```javascript
+const reservoir = Math.max(0, response.data.resources.core.remaining - 1000)
+```
+
+If the rate limit shows 3,500 remaining requests, the reservoir is set to 2,500.
+If remaining is below 1,000, the reservoir is set to 0 (all requests will queue).
+
+### Why 1,000?
+
+Other GitHub Actions jobs running in parallel (status checks, deployment workflows,
+external integrations) typically use fewer than 100 requests each. A 1,000-request
+buffer provides ample headroom:
+
+- Normal job: ~50–100 API calls
+- 10 concurrent jobs: ~500–1,000 API calls
+- Buffer: 1,000 requests — covers typical parallel workload
+
+### Update Schedule
+
+```javascript
+await updateReservoir() // Initial update before any work
+const reservoirUpdater = setInterval(updateReservoir, 60 * 1000) // Every 60 seconds
+```
+
+The reservoir is refreshed every minute to account for:
+- Other jobs consuming requests in parallel
+- Rate limit window resets (GitHub resets the limit every hour)
+
+### Cleanup
+
+```javascript
+try {
+ await callback(stats)
+} finally {
+ clearInterval(reservoirUpdater)
+ core.notice(
+ `Processed ${stats.prs} PRs, ${stats.issues} Issues, ` +
+ `made ${stats.requests + stats.artifacts} API requests ` +
+ `and downloaded ${stats.artifacts} artifacts.`,
+ )
+}
+```
+
+The interval is cleared in a `finally` block to prevent resource leaks even if
+the callback throws an error.
+
+---
+
+## Statistics Tracking
+
+The wrapper tracks four metrics:
+
+```javascript
+const stats = {
+ issues: 0,
+ prs: 0,
+ requests: 0,
+ artifacts: 0,
+}
+```
+
+| Metric | Incremented By | Purpose |
+|-------------|---------------------------------------|----------------------------------|
+| `requests` | Every throttled API call | Total API usage |
+| `prs` | Callback logic (PR processing) | PRs analyzed |
+| `issues` | Callback logic (issue processing) | Issues analyzed |
+| `artifacts` | Callback logic (artifact downloads) | Artifacts downloaded |
+
+At the end of execution, a summary is logged:
+
+```
+Notice: Processed 1 PRs, 0 Issues, made 15 API requests and downloaded 0 artifacts.
+```
+
+---
+
+## Error Handling
+
+### Rate Limit API Failure
+
+If the rate limit endpoint itself fails (network error, GitHub outage):
+
+```javascript
+try {
+ response = await github.rest.rateLimit.get()
+} catch (err) {
+ core.error(`Failed updating reservoir:\n${err}`)
+ return // Keep retrying on next interval
+}
+```
+
+The error is logged but does not crash the script. The reservoir retains its
+previous value, and the next 60-second interval will try again.
+
+### Exhausted Reservoir
+
+When the reservoir reaches 0:
+- All new requests queue in Bottleneck
+- Requests wait until the next `updateReservoir()` call adds capacity
+- If GitHub's rate limit has not reset, requests continue to queue
+- The script may time out if the rate limit window hasn't reset
+
+---
+
+## GitHub API Rate Limits Reference
+
+| Resource | Limit | Reset Period |
+|-------------|--------------------------|--------------|
+| Core REST API| 5,000 requests/hour | Rolling hour |
+| Search API | 30 requests/minute | Rolling minute|
+| GraphQL API | 5,000 points/hour | Rolling hour |
+
+The `withRateLimit.js` module only manages the **Core REST API** limit. Search
+requests bypass the throttle because they have a separate, lower limit that is
+rarely a concern for CI scripts.
+
+---
+
+## Usage in CI Scripts
+
+### Wrapping a Script
+
+```javascript
+const withRateLimit = require('./withRateLimit.js')
+
+module.exports = async ({ github, core }) => {
+ await withRateLimit({ github, core }, async (stats) => {
+ // All github.rest.* calls here are automatically throttled
+
+ const pr = await github.rest.pulls.get({
+ owner: 'YongDo-Hyun',
+ repo: 'Project-Tick',
+ pull_number: 123,
+ })
+ stats.prs++
+
+ // ... more API calls
+ })
+}
+```
+
+### Adjusting Concurrency
+
+For scripts that can safely parallelize reads:
+
+```javascript
+await withRateLimit({ github, core, maxConcurrent: 5 }, async (stats) => {
+ // Up to 5 concurrent GET requests
+ // Write requests still have 1-second minimum spacing
+})
+```
+
+---
+
+## Best Practices
+
+1. **Minimize API calls** — Use pagination efficiently, avoid redundant requests
+2. **Prefer git over API** — For commit data, `get-pr-commit-details.js` uses git directly
+ to bypass the 250-commit API limit and reduce API usage
+3. **Use the `stats` object** — Track what the script does for observability
+4. **Don't bypass the wrapper** — All API calls should go through the throttled Octokit instance
+5. **Handle network errors** — The wrapper handles rate limit API failures, but callback
+ scripts should handle their own API errors gracefully
diff --git a/docs/handbook/cmark/architecture.md b/docs/handbook/cmark/architecture.md
new file mode 100644
index 0000000000..e35bd2e578
--- /dev/null
+++ b/docs/handbook/cmark/architecture.md
@@ -0,0 +1,283 @@
+# cmark — Architecture
+
+## High-Level Design
+
+cmark implements a two-phase parsing pipeline that converts CommonMark Markdown into an Abstract Syntax Tree (AST), which can then be rendered into multiple output formats. The design separates concerns cleanly: block-level structure is identified first, then inline content is parsed within the appropriate blocks.
+
+```
+Input Text (UTF-8)
+ │
+ ▼
+┌──────────────────┐
+│ S_parser_feed │ Split input into lines (blocks.c)
+│ │ Handle UTF-8 BOM, CR/LF normalization
+└────────┬───────────┘
+ │
+ ▼
+┌──────────────────┐
+│ S_process_line │ Line-by-line block structure analysis (blocks.c)
+│ │ Open/close containers, detect leaf blocks
+└────────┬───────────┘
+ │
+ ▼
+┌──────────────────┐
+│ finalize_document│ Close all open blocks (blocks.c)
+│ │ Resolve reference link definitions
+└────────┬───────────┘
+ │
+ ▼
+┌──────────────────┐
+│ process_inlines │ Parse inline content in paragraphs/headings (blocks.c → inlines.c)
+│ │ Delimiter stack algorithm for emphasis
+│ │ Bracket stack for links/images
+└────────┬───────────┘
+ │
+ ▼
+┌──────────────────┐
+│ AST (cmark_node tree) │
+└────────┬───────────┘
+ │
+ ▼
+┌──────────────────┐
+│ Renderer │ Iterator-driven traversal
+│ (html/xml/ │ Enter/Exit events per node
+│ latex/man/cm) │
+└──────────────────┘
+ │
+ ▼
+ Output String
+```
+
+## Module Dependency Graph
+
+The internal header dependencies reveal the layered architecture:
+
+```
+cmark.h (public API — types, enums, function declarations)
+ ├── cmark_export.h (generated — DLL export macros)
+ └── cmark_version.h (generated — version constants)
+
+node.h (internal — struct cmark_node)
+ ├── cmark.h
+ └── buffer.h
+
+parser.h (internal — struct cmark_parser)
+ ├── references.h
+ ├── node.h
+ └── buffer.h
+
+iterator.h (internal — struct cmark_iter)
+ └── cmark.h
+
+render.h (internal — struct cmark_renderer)
+ └── buffer.h
+
+buffer.h (internal — cmark_strbuf)
+ └── cmark.h
+
+chunk.h (internal — cmark_chunk)
+ ├── cmark.h
+ ├── buffer.h
+ └── cmark_ctype.h
+
+references.h (internal — cmark_reference_map)
+ └── chunk.h
+
+inlines.h (internal — inline parsing API)
+ ├── chunk.h
+ └── references.h
+
+scanners.h (internal — scanner function declarations)
+ ├── cmark.h
+ └── chunk.h
+
+houdini.h (internal — HTML/URL escaping)
+ └── buffer.h
+
+cmark_ctype.h (internal — locale-independent char classification)
+ (no cmark dependencies)
+
+utf8.h (internal — UTF-8 processing)
+ └── buffer.h
+```
+
+## Phase 1: Block Structure (blocks.c)
+
+Block parsing operates on a state machine maintained in the `cmark_parser` struct (defined in `parser.h`):
+
+```c
+struct cmark_parser {
+ struct cmark_mem *mem; // Memory allocator
+ struct cmark_reference_map *refmap; // Link reference definitions
+ struct cmark_node *root; // Document root node
+ struct cmark_node *current; // Deepest open block
+ int line_number; // Current line being processed
+ bufsize_t offset; // Byte position in current line
+ bufsize_t column; // Virtual column (tabs expanded)
+ bufsize_t first_nonspace; // Position of first non-whitespace
+ bufsize_t first_nonspace_column; // Column of first non-whitespace
+ bufsize_t thematic_break_kill_pos; // Optimization for thematic break scanning
+ int indent; // Indentation level (first_nonspace_column - column)
+ bool blank; // Whether current line is blank
+ bool partially_consumed_tab; // Tab only partially used for indentation
+ cmark_strbuf curline; // Current line being processed
+ bufsize_t last_line_length; // Length of previous line (for end_column)
+ cmark_strbuf linebuf; // Buffer for accumulating partial lines across feeds
+ cmark_strbuf content; // Accumulated content for the current open block
+ int options; // Option flags
+ bool last_buffer_ended_with_cr; // For CR/LF handling across buffer boundaries
+ unsigned int total_size; // Total bytes fed (for reference expansion limiting)
+};
+```
+
+### Line Processing Flow
+
+For each line, `S_process_line()` does the following:
+
+1. **Increment line number**, store current line in `parser->curline`.
+2. **Check open blocks** (`check_open_blocks()`): Walk through the tree from root to the deepest open node. For each open container node, try to match the expected line prefix:
+ - Block quote: expect `>` (optionally preceded by up to 3 spaces)
+ - List item: expect indentation matching `marker_offset + padding`
+ - Code block (fenced): check for closing fence or skip fence offset spaces
+ - Code block (indented): expect 4+ spaces of indentation
+ - HTML block: check type-specific continuation rules
+3. **Try new container starts**: If not all open blocks matched, check if the current line starts a new container (block quote, list item).
+4. **Try new leaf blocks**: If the line doesn't continue an existing block or start a new container, check for:
+ - ATX heading (lines starting with 1-6 `#` characters)
+ - Setext heading (underlines of `=` or `-` following a paragraph)
+ - Thematic break (3+ `*`, `-`, or `_` on a line by themselves)
+ - Fenced code block (3+ backticks or tildes)
+ - HTML block (7 different start patterns)
+ - Indented code block (4+ spaces of indentation)
+5. **Add line content**: For blocks that accept lines (paragraph, heading, code block), append the line content to `parser->content`.
+6. **Handle lazy continuation**: Paragraphs support lazy continuation where a non-blank line can continue a paragraph even without matching container prefixes.
+
+### Finalization
+
+When a block is closed (either explicitly or because a new block replaces it), `finalize()` is called:
+
+- **Paragraphs**: Reference link definitions at the start are extracted and stored in `parser->refmap`. If only references remain, the paragraph node is deleted.
+- **Code blocks (fenced)**: The first line becomes the info string; remaining content becomes the code body.
+- **Code blocks (indented)**: Trailing blank lines are removed.
+- **Lists**: Tight/loose status is determined by checking for blank lines between items and their children.
+
+## Phase 2: Inline Parsing (inlines.c)
+
+After all block structure is finalized, `process_inlines()` walks the AST with an iterator and calls `cmark_parse_inlines()` for every node whose type `contains_inlines()` — specifically, `CMARK_NODE_PARAGRAPH` and `CMARK_NODE_HEADING`.
+
+The inline parser uses a `subject` struct that tracks:
+
+```c
+typedef struct {
+ cmark_mem *mem;
+ cmark_chunk input; // The text to parse
+ unsigned flags; // Skip flags for HTML constructs
+ int line; // Source line number
+ bufsize_t pos; // Current position in input
+ int block_offset; // Column offset of containing block
+ int column_offset; // Adjustment for multi-line inlines
+ cmark_reference_map *refmap; // Reference definitions
+ delimiter *last_delim; // Top of delimiter stack
+ bracket *last_bracket; // Top of bracket stack
+ bufsize_t backticks[MAXBACKTICKS + 1]; // Cache of backtick positions
+ bool scanned_for_backticks; // Whether full backtick scan done
+ bool no_link_openers; // Optimization flag
+} subject;
+```
+
+### Delimiter Stack Algorithm
+
+Emphasis (`*`, `_`) and smart quotes (`'`, `"`) use a delimiter stack. When a run of delimiter characters is found:
+
+1. `scan_delims()` determines whether the run can open and/or close emphasis, based on Unicode-aware flanking rules (checking whether surrounding characters are spaces or punctuation using `cmark_utf8proc_is_space()` and `cmark_utf8proc_is_punctuation_or_symbol()`).
+2. The delimiter is pushed onto the stack as a `delimiter` struct.
+3. When a closing delimiter is found, the stack is scanned backwards for a matching opener, and `S_insert_emph()` creates `CMARK_NODE_EMPH` or `CMARK_NODE_STRONG` nodes.
+
+### Bracket Stack Algorithm
+
+Links and images use a separate bracket stack:
+
+1. `[` pushes a bracket entry; `![` pushes one marked as `image = true`.
+2. When `]` is encountered, the bracket stack is searched for a matching opener.
+3. If found, the parser looks for `(url "title")` or `[ref]` after the `]`.
+4. For reference-style links, `cmark_reference_lookup()` is called against the parser's `refmap`.
+
+## Phase 3: AST Rendering
+
+All renderers traverse the AST using the iterator system. There are two rendering architectures:
+
+### Direct Renderers (no framework)
+- **HTML** (`html.c`): Uses `cmark_strbuf` directly. The `S_render_node()` function handles enter/exit events in a large switch statement. HTML escaping is done via `houdini_escape_html()`.
+- **XML** (`xml.c`): Similar direct approach with XML-specific escaping and indentation tracking.
+
+### Framework Renderers (via render.c)
+- **LaTeX** (`latex.c`), **man** (`man.c`), **CommonMark** (`commonmark.c`): These use the `cmark_render()` generic framework, which provides:
+ - Line wrapping at a configurable width
+ - Prefix management for indented output (block quotes, list items)
+ - Breakpoint tracking for intelligent line breaking
+ - Escape dispatch via function pointers (`outc`)
+
+The framework signature:
+
+```c
+char *cmark_render(cmark_node *root, int options, int width,
+ void (*outc)(cmark_renderer *, cmark_escaping, int32_t, unsigned char),
+ int (*render_node)(cmark_renderer *, cmark_node *,
+ cmark_event_type, int));
+```
+
+Each format-specific renderer supplies its own `outc` (character-level escaping) and `render_node` (node-level output) callback functions.
+
+## Key Design Decisions
+
+### Owning vs. Non-Owning Strings
+
+cmark uses two string types:
+
+- **`cmark_strbuf`** (buffer.h): Owning, growable byte buffer. Used for accumulating output and parser state. Memory is managed via the `cmark_mem` allocator.
+- **`cmark_chunk`** (chunk.h): Non-owning slice (pointer + length). Used for referencing substrings of the input during parsing without copying.
+
+### Node Memory Layout
+
+Every `cmark_node` uses a discriminated union (`node->as`) to store type-specific data without separate allocations:
+
+```c
+union {
+ cmark_list list; // list marker, start, tight, delimiter
+ cmark_code code; // info string, fence char/length/offset
+ cmark_heading heading; // level, setext flag, internal_offset
+ cmark_link link; // url, title
+ cmark_custom custom; // on_enter, on_exit
+ int html_block_type; // HTML block type (1-7)
+} as;
+```
+
+### Open Block Tracking
+
+During block parsing, open blocks are tracked via the `CMARK_NODE__OPEN` flag in `node->flags`. The parser maintains a `current` pointer to the deepest open block. When new blocks are created, they're added as children of the appropriate open container. When blocks are finalized (closed), control returns to the parent.
+
+### Reference Expansion Limiting
+
+To prevent superlinear growth from adversarial reference definitions, `parser->total_size` tracks total bytes fed. After finalization, `parser->refmap->max_ref_size` is set to `MAX(total_size, 100000)`, and each reference lookup deducts the reference's size from the available budget.
+
+## Error Handling
+
+cmark follows a defensive programming model:
+- NULL checks on all public API entry points (return 0 or NULL for invalid arguments)
+- `assert()` for internal invariants (only active in debug builds with `-DCMARK_DEBUG_NODES`)
+- Abort-on-allocation-failure in the default memory allocator
+- No exceptions (pure C99)
+- Invalid UTF-8 sequences are replaced with U+FFFD (when `CMARK_OPT_VALIDATE_UTF8` is set)
+
+## Thread Safety
+
+cmark is **not** thread-safe for concurrent access to the same parser or node tree. However, separate parser instances and separate node trees can be used in parallel from different threads, as there is no global mutable state (the `DEFAULT_MEM_ALLOCATOR` is read-only after initialization).
+
+## Cross-References
+
+- [block-parsing.md](block-parsing.md) — Detailed block-level parsing logic
+- [inline-parsing.md](inline-parsing.md) — Delimiter and bracket stack algorithms
+- [ast-node-system.md](ast-node-system.md) — Node struct internals
+- [render-framework.md](render-framework.md) — Generic render engine
+- [memory-management.md](memory-management.md) — Allocator and buffer details
+- [iterator-system.md](iterator-system.md) — AST traversal mechanics
diff --git a/docs/handbook/cmark/ast-node-system.md b/docs/handbook/cmark/ast-node-system.md
new file mode 100644
index 0000000000..3d25415eda
--- /dev/null
+++ b/docs/handbook/cmark/ast-node-system.md
@@ -0,0 +1,383 @@
+# cmark — AST Node System
+
+## Overview
+
+The AST (Abstract Syntax Tree) node system is defined across `node.h` (internal struct definitions) and `node.c` (node creation, destruction, accessor functions, and tree manipulation). Every element in a parsed CommonMark document is represented as a `cmark_node`. Nodes form a tree via parent/child/sibling pointers, with type-specific data stored in a discriminated union.
+
+## The `cmark_node` Struct
+
+Defined in `node.h`, this is the central data structure of the entire library:
+
+```c
+struct cmark_node {
+ cmark_mem *mem; // Memory allocator used for this node
+
+ struct cmark_node *next; // Next sibling
+ struct cmark_node *prev; // Previous sibling
+ struct cmark_node *parent; // Parent node
+ struct cmark_node *first_child; // First child
+ struct cmark_node *last_child; // Last child
+
+ void *user_data; // Arbitrary user-attached data
+
+ unsigned char *data; // String content (for text, code, HTML)
+ bufsize_t len; // Length of data
+
+ int start_line; // Source position: starting line (1-based)
+ int start_column; // Source position: starting column (1-based)
+ int end_line; // Source position: ending line
+ int end_column; // Source position: ending column
+ uint16_t type; // Node type (cmark_node_type enum value)
+ uint16_t flags; // Internal flags (open, last-line-blank, etc.)
+
+ union {
+ cmark_list list; // List-specific data
+ cmark_code code; // Code block-specific data
+ cmark_heading heading; // Heading-specific data
+ cmark_link link; // Link/image-specific data
+ cmark_custom custom; // Custom block/inline data
+ int html_block_type; // HTML block type (1-7)
+ } as;
+};
+```
+
+The union `as` means each node only occupies memory for one type-specific payload, keeping the struct compact. The largest union member determines the node's size.
+
+## Type-Specific Structs
+
+### `cmark_list` — List Properties
+
+```c
+typedef struct {
+ int marker_offset; // Indentation of list marker from left margin
+ int padding; // Total indentation (marker + content offset)
+ int start; // Starting number for ordered lists (0 for bullet)
+ unsigned char list_type; // CMARK_BULLET_LIST or CMARK_ORDERED_LIST
+ unsigned char delimiter; // CMARK_PERIOD_DELIM, CMARK_PAREN_DELIM, or CMARK_NO_DELIM
+ unsigned char bullet_char;// '*', '-', or '+' for bullet lists
+ bool tight; // Whether the list is tight (no blank lines between items)
+} cmark_list;
+```
+
+`marker_offset` and `padding` are used during block parsing to track indentation levels for list continuation. The `tight` flag is determined during block finalization by checking whether blank lines appear between list items or their children.
+
+### `cmark_code` — Code Block Properties
+
+```c
+typedef struct {
+ unsigned char *info; // Info string (language hint, e.g., "python")
+ uint8_t fence_length; // Length of opening fence (3+ backticks or tildes)
+ uint8_t fence_offset; // Indentation of fence from left margin
+ unsigned char fence_char; // '`' or '~'
+ int8_t fenced; // Whether this is a fenced code block (vs. indented)
+} cmark_code;
+```
+
+For indented code blocks, `fenced` is 0, and `info`, `fence_length`, `fence_char`, and `fence_offset` are unused. For fenced code blocks, `info` is extracted from the first line of the opening fence and stored as a separately allocated string.
+
+### `cmark_heading` — Heading Properties
+
+```c
+typedef struct {
+ int internal_offset; // Internal offset within the heading content
+ int8_t level; // Heading level (1-6)
+ bool setext; // Whether this is a setext-style heading (underlined)
+} cmark_heading;
+```
+
+ATX headings (`# Heading`) have `setext = false`. Setext headings (underlined with `=` or `-`) have `setext = true`. The `level` field is shared and defaults to 1 when a heading node is created.
+
+### `cmark_link` — Link and Image Properties
+
+```c
+typedef struct {
+ unsigned char *url; // Destination URL (separately allocated)
+ unsigned char *title; // Optional title text (separately allocated)
+} cmark_link;
+```
+
+Both `url` and `title` are separately allocated strings that must be freed when the node is destroyed. This struct is used for both `CMARK_NODE_LINK` and `CMARK_NODE_IMAGE`.
+
+### `cmark_custom` — Custom Block/Inline Properties
+
+```c
+typedef struct {
+ unsigned char *on_enter; // Literal text rendered when entering the node
+ unsigned char *on_exit; // Literal text rendered when leaving the node
+} cmark_custom;
+```
+
+Custom nodes allow embedding arbitrary content in the AST for extensions. Both strings are separately allocated.
+
+## Internal Flags
+
+The `flags` field uses bit flags defined in the `cmark_node__internal_flags` enum:
+
+```c
+enum cmark_node__internal_flags {
+ CMARK_NODE__OPEN = (1 << 0), // Block is still open (accepting content)
+ CMARK_NODE__LAST_LINE_BLANK = (1 << 1), // Last line of this block was blank
+ CMARK_NODE__LAST_LINE_CHECKED = (1 << 2), // blank-line status has been computed
+ CMARK_NODE__LIST_LAST_LINE_BLANK = (1 << 3), // (unused/reserved)
+};
+```
+
+- **`CMARK_NODE__OPEN`**: Set when a block is created during parsing. Cleared by `finalize()` when the block is closed. The parser's `current` pointer always points to a node with this flag set.
+- **`CMARK_NODE__LAST_LINE_BLANK`**: Set/cleared by `S_set_last_line_blank()` in `blocks.c` to track whether the most recent line added to this block was blank. Used for determining list tightness.
+- **`CMARK_NODE__LAST_LINE_CHECKED`**: Prevents redundant traversal when checking `S_ends_with_blank_line()`, which recursively descends into list items.
+
+## Node Creation
+
+### `cmark_node_new_with_mem()`
+
+The primary creation function (in `node.c`):
+
+```c
+cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) {
+ cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node));
+ node->mem = mem;
+ node->type = (uint16_t)type;
+
+ switch (node->type) {
+ case CMARK_NODE_HEADING:
+ node->as.heading.level = 1;
+ break;
+ case CMARK_NODE_LIST: {
+ cmark_list *list = &node->as.list;
+ list->list_type = CMARK_BULLET_LIST;
+ list->start = 0;
+ list->tight = false;
+ break;
+ }
+ default:
+ break;
+ }
+
+ return node;
+}
+```
+
+The `calloc()` zeroes all fields, so pointers start as NULL and numeric fields as 0. Only heading and list nodes need explicit default initialization.
+
+### `make_block()` — Parser-Internal Creation
+
+During block parsing, `make_block()` in `blocks.c` creates nodes with source position and the `CMARK_NODE__OPEN` flag:
+
+```c
+static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
+ int start_line, int start_column) {
+ cmark_node *e;
+ e = (cmark_node *)mem->calloc(1, sizeof(*e));
+ e->mem = mem;
+ e->type = (uint16_t)tag;
+ e->flags = CMARK_NODE__OPEN;
+ e->start_line = start_line;
+ e->start_column = start_column;
+ e->end_line = start_line;
+ return e;
+}
+```
+
+### Inline Node Creation
+
+The inline parser in `inlines.c` uses two factory functions:
+
+```c
+// Create an inline with string content (text, code, HTML)
+static inline cmark_node *make_literal(subject *subj, cmark_node_type t,
+ int start_column, int end_column) {
+ cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
+ e->mem = subj->mem;
+ e->type = (uint16_t)t;
+ e->start_line = e->end_line = subj->line;
+ e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
+ e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
+ return e;
+}
+
+// Create an inline with no value (emphasis, strong, etc.)
+static inline cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
+ cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
+ e->mem = mem;
+ e->type = t;
+ return e;
+}
+```
+
+## Node Destruction
+
+### `S_free_nodes()` — Iterative Subtree Freeing
+
+The `S_free_nodes()` function in `node.c` avoids recursion by splicing children into a flat linked list:
+
+```c
+static void S_free_nodes(cmark_node *e) {
+ cmark_mem *mem = e->mem;
+ cmark_node *next;
+ while (e != NULL) {
+ switch (e->type) {
+ case CMARK_NODE_CODE_BLOCK:
+ mem->free(e->data);
+ mem->free(e->as.code.info);
+ break;
+ case CMARK_NODE_TEXT:
+ case CMARK_NODE_HTML_INLINE:
+ case CMARK_NODE_CODE:
+ case CMARK_NODE_HTML_BLOCK:
+ mem->free(e->data);
+ break;
+ case CMARK_NODE_LINK:
+ case CMARK_NODE_IMAGE:
+ mem->free(e->as.link.url);
+ mem->free(e->as.link.title);
+ break;
+ case CMARK_NODE_CUSTOM_BLOCK:
+ case CMARK_NODE_CUSTOM_INLINE:
+ mem->free(e->as.custom.on_enter);
+ mem->free(e->as.custom.on_exit);
+ break;
+ default:
+ break;
+ }
+ if (e->last_child) {
+ // Splice children into list for flat iteration
+ e->last_child->next = e->next;
+ e->next = e->first_child;
+ }
+ next = e->next;
+ mem->free(e);
+ e = next;
+ }
+}
+```
+
+This splicing technique converts the tree into a flat list, allowing O(n) iterative freeing without a recursion stack. For each node with children, the children are prepended to the remaining list by connecting `last_child->next` to `e->next` and `e->next` to `first_child`.
+
+## Containership Rules
+
+The `S_can_contain()` function in `node.c` enforces which node types can contain which children:
+
+```c
+static bool S_can_contain(cmark_node *node, cmark_node *child) {
+ // Ancestor loop detection
+ if (child->first_child != NULL) {
+ cmark_node *cur = node->parent;
+ while (cur != NULL) {
+ if (cur == child) return false;
+ cur = cur->parent;
+ }
+ }
+
+ // Documents cannot be children
+ if (child->type == CMARK_NODE_DOCUMENT) return false;
+
+ switch (node->type) {
+ case CMARK_NODE_DOCUMENT:
+ case CMARK_NODE_BLOCK_QUOTE:
+ case CMARK_NODE_ITEM:
+ return cmark_node_is_block(child) && child->type != CMARK_NODE_ITEM;
+
+ case CMARK_NODE_LIST:
+ return child->type == CMARK_NODE_ITEM;
+
+ case CMARK_NODE_CUSTOM_BLOCK:
+ return true; // Custom blocks can contain anything
+
+ case CMARK_NODE_PARAGRAPH:
+ case CMARK_NODE_HEADING:
+ case CMARK_NODE_EMPH:
+ case CMARK_NODE_STRONG:
+ case CMARK_NODE_LINK:
+ case CMARK_NODE_IMAGE:
+ case CMARK_NODE_CUSTOM_INLINE:
+ return cmark_node_is_inline(child);
+
+ default:
+ break;
+ }
+ return false;
+}
+```
+
+Key rules:
+- **Document, block quote, list item**: Can contain any block except items
+- **List**: Can only contain items
+- **Custom block**: Can contain anything (no restrictions)
+- **Paragraph, heading, emphasis, strong, link, image, custom inline**: Can only contain inline nodes
+- **Leaf blocks** (thematic break, code block, HTML block): Cannot contain anything
+
+## Tree Manipulation
+
+### Unlinking
+
+The internal `S_node_unlink()` function detaches a node from its parent and siblings:
+
+```c
+static void S_node_unlink(cmark_node *node) {
+ if (node->prev) {
+ node->prev->next = node->next;
+ }
+ if (node->next) {
+ node->next->prev = node->prev;
+ }
+ // Update parent's first_child / last_child pointers
+ if (node->parent) {
+ if (node->parent->first_child == node)
+ node->parent->first_child = node->next;
+ if (node->parent->last_child == node)
+ node->parent->last_child = node->prev;
+ }
+ node->next = NULL;
+ node->prev = NULL;
+ node->parent = NULL;
+}
+```
+
+### String Setting Helper
+
+The `cmark_set_cstr()` function manages string assignment with proper memory handling:
+
+```c
+static bufsize_t cmark_set_cstr(cmark_mem *mem, unsigned char **dst,
+ const char *src) {
+ unsigned char *old = *dst;
+ bufsize_t len;
+ if (src && src[0]) {
+ len = (bufsize_t)strlen(src);
+ *dst = (unsigned char *)mem->realloc(NULL, len + 1);
+ memcpy(*dst, src, len + 1);
+ } else {
+ len = 0;
+ *dst = NULL;
+ }
+ if (old) {
+ mem->free(old);
+ }
+ return len;
+}
+```
+
+This function allocates a new copy of the source string, assigns it, then frees the old value — ensuring no memory leaks even when overwriting existing data.
+
+## Node Data Storage Pattern
+
+Nodes store their text content in two ways depending on type:
+
+1. **Direct storage** (`data` + `len`): Used by `CMARK_NODE_TEXT`, `CMARK_NODE_CODE`, `CMARK_NODE_CODE_BLOCK`, `CMARK_NODE_HTML_BLOCK`, and `CMARK_NODE_HTML_INLINE`. The `data` field points to a separately allocated buffer containing the text content.
+
+2. **Union storage** (`as.*`): Used by lists, code blocks (for the info string), headings, links/images, and custom nodes. These store structured data rather than raw text.
+
+3. **Hybrid**: `CMARK_NODE_CODE_BLOCK` uses both — `data` for the code content and `as.code.info` for the info string.
+
+## The `cmark_node_check()` Function
+
+For debug builds, `cmark_node_check()` validates the structural integrity of the tree. It checks that parent/child/sibling pointers are consistent and that the tree forms a valid structure. It returns the number of errors found and prints details to the provided `FILE*`.
+
+## Cross-References
+
+- [node.h](../../../cmark/src/node.h) — Struct definitions
+- [node.c](../../../cmark/src/node.c) — Implementation
+- [iterator-system.md](iterator-system.md) — How nodes are traversed
+- [block-parsing.md](block-parsing.md) — How block nodes are created during parsing
+- [inline-parsing.md](inline-parsing.md) — How inline nodes are created
+- [memory-management.md](memory-management.md) — Allocator integration
diff --git a/docs/handbook/cmark/block-parsing.md b/docs/handbook/cmark/block-parsing.md
new file mode 100644
index 0000000000..2c9efecd50
--- /dev/null
+++ b/docs/handbook/cmark/block-parsing.md
@@ -0,0 +1,310 @@
+# cmark — Block Parsing
+
+## Overview
+
+Block parsing is Phase 1 of cmark's two-phase parsing pipeline. Implemented in `blocks.c`, it processes the input line-by-line, identifying block-level document structure: paragraphs, headings, code blocks, block quotes, lists, thematic breaks, and HTML blocks. The result is a tree of `cmark_node` block nodes with accumulated text content. Inline parsing occurs in Phase 2.
+
+The algorithm follows the CommonMark specification's description at `http://spec.commonmark.org/0.24/#phase-1-block-structure`.
+
+## Key Constants
+
+```c
+#define CODE_INDENT 4 // Spaces required for indented code block
+#define TAB_STOP 4 // Tab stop width for column calculation
+```
+
+## Parser State
+
+The parser state is maintained in the `cmark_parser` struct (from `parser.h`). During line processing, these fields track the current position:
+
+- `offset` — byte position in the current line
+- `column` — virtual column number (tabs expanded to `TAB_STOP` boundaries)
+- `first_nonspace` — byte position of first non-whitespace character
+- `first_nonspace_column` — column of first non-whitespace character
+- `indent` — the difference `first_nonspace_column - column`, representing effective indentation
+- `blank` — whether the line is blank (only whitespace before line end)
+- `partially_consumed_tab` — set when a tab is only partially used for indentation
+
+## Input Feeding: `S_parser_feed()`
+
+The entry point for input is `S_parser_feed()`, which splits raw input into lines:
+
+```c
+static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
+ size_t len, bool eof);
+```
+
+### Line Splitting Logic
+
+The function scans for line-ending characters (`\n`, `\r`) and processes complete lines via `S_process_line()`. Partial lines are accumulated in `parser->linebuf`.
+
+Key handling:
+1. **UTF-8 BOM**: Skipped if found at the start of the first line (3-byte sequence `0xEF 0xBB 0xBF`).
+2. **CR/LF across buffer boundaries**: If the previous buffer ended with `\r` and the next starts with `\n`, the `\n` is skipped.
+3. **NULL bytes**: Replaced with the UTF-8 replacement character (U+FFFD, `0xEF 0xBF 0xBD`).
+4. **Total size tracking**: `parser->total_size` accumulates bytes fed, capped at `UINT_MAX`, used later for reference expansion limiting.
+
+### Line Termination
+
+Each line is terminated at `\n`, `\r`, or `\r\n`. The line content passed to `S_process_line()` does NOT include the line-ending characters themselves.
+
+## Line Processing: `S_process_line()`
+
+The main per-line processing function. For each line, it:
+
+1. Stores the line in `parser->curline`
+2. Creates a `cmark_chunk` wrapper for the line data
+3. Increments `parser->line_number`
+4. Calls `check_open_blocks()` to match existing containers
+5. Attempts to open new containers and leaf blocks
+6. Adds line content to the appropriate block
+
+### Step 1: Check Open Blocks
+
+```c
+static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
+ bool *all_matched);
+```
+
+Starting from the document root, this walks through the tree of open blocks (following `last_child` pointers). For each open container, it tries to match the expected line prefix.
+
+The matching rules for each container type:
+
+#### Block Quote
+```c
+static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input);
+```
+Expects `>` preceded by up to 3 spaces of indentation. After matching the `>`, optionally consumes one space or tab after it.
+
+#### List Item
+```c
+static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input,
+ cmark_node *container);
+```
+Expects indentation of at least `marker_offset + padding` characters. If the line is blank and the item has at least one child, the item continues (lazy continuation).
+
+#### Fenced Code Block
+```c
+static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
+ cmark_node *container, bool *should_continue);
+```
+For fenced code blocks: checks if the line is a closing fence (same fence char, length >= opening fence length, preceded by up to 3 spaces). If it is, the block is finalized. Otherwise, skips up to `fence_offset` spaces and continues.
+
+For indented code blocks: requires 4+ spaces of indentation, or a blank line.
+
+#### HTML Block
+```c
+static bool parse_html_block_prefix(cmark_parser *parser, cmark_node *container);
+```
+HTML block types 1-5 accept blank lines (continue until end condition is met). Types 6-7 do NOT accept blank lines.
+
+### Step 2: New Container Starts
+
+If not all open blocks were matched (`!all_matched`), the parser checks if the unmatched portion of the line starts a new container:
+
+- **Block quote**: Line starts with `>` (preceded by up to 3 spaces)
+- **List item**: Line starts with a list marker (bullet character or ordered number + delimiter)
+
+### Step 3: New Leaf Blocks
+
+The parser checks for new leaf block starts using scanner functions:
+
+- **ATX heading**: `scan_atx_heading_start()` — lines starting with 1-6 `#` characters
+- **Fenced code block**: `scan_open_code_fence()` — 3+ backticks or tildes
+- **HTML block**: `scan_html_block_start()` and `scan_html_block_start_7()` — 7 different HTML start patterns
+- **Setext heading**: `scan_setext_heading_line()` — underlines of `=` or `-` (only when following a paragraph)
+- **Thematic break**: `S_scan_thematic_break()` — 3+ `*`, `-`, or `_` characters
+
+### Step 4: Content Accumulation
+
+For blocks that accept lines (`accepts_lines()` returns true for paragraphs, headings, and code blocks), the line content is appended to `parser->content` via `add_line()`:
+
+```c
+static void add_line(cmark_chunk *ch, cmark_parser *parser) {
+ int chars_to_tab;
+ int i;
+ if (parser->partially_consumed_tab) {
+ parser->offset += 1; // skip over tab
+ chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
+ for (i = 0; i < chars_to_tab; i++) {
+ cmark_strbuf_putc(&parser->content, ' ');
+ }
+ }
+ cmark_strbuf_put(&parser->content, ch->data + parser->offset,
+ ch->len - parser->offset);
+}
+```
+
+When a tab is only partially consumed (e.g., the tab represents 4 columns but only 1 was needed for indentation), the remaining columns are emitted as spaces.
+
+## Adding Child Blocks
+
+```c
+static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
+ cmark_node_type block_type, int start_column);
+```
+
+When a new block is detected, `add_child()` creates it:
+
+1. If the parent can't contain the new block type (checked via `can_contain()`), the parent is finalized and the function moves up the tree until it finds a suitable ancestor.
+2. A new node is created with `make_block()` (which sets `CMARK_NODE__OPEN`).
+3. The node is linked as the last child of the parent.
+
+### Container Acceptance Rules
+
+```c
+static inline bool can_contain(cmark_node_type parent_type,
+ cmark_node_type child_type) {
+ return (parent_type == CMARK_NODE_DOCUMENT ||
+ parent_type == CMARK_NODE_BLOCK_QUOTE ||
+ parent_type == CMARK_NODE_ITEM ||
+ (parent_type == CMARK_NODE_LIST && child_type == CMARK_NODE_ITEM));
+}
+```
+
+Only documents, block quotes, list items, and lists (for items only) can contain other blocks.
+
+## List Item Parsing
+
+```c
+static bufsize_t parse_list_marker(cmark_mem *mem, cmark_chunk *input,
+ bufsize_t pos, bool interrupts_paragraph,
+ cmark_list **dataptr);
+```
+
+This function detects list markers:
+
+**Bullet markers**: `*`, `-`, or `+` followed by whitespace.
+
+**Ordered markers**: Up to 9 digits followed by `.` or `)` and whitespace. The 9-digit limit prevents integer overflow (max value ~999,999,999 fits in a 32-bit int).
+
+**Paragraph interruption rules**: When `interrupts_paragraph` is true (the marker would interrupt a preceding paragraph):
+- Bullet markers require non-blank content after them
+- Ordered markers must start at 1
+
+### List Matching
+
+```c
+static int lists_match(cmark_list *list_data, cmark_list *item_data) {
+ return (list_data->list_type == item_data->list_type &&
+ list_data->delimiter == item_data->delimiter &&
+ list_data->bullet_char == item_data->bullet_char);
+}
+```
+
+Two list items belong to the same list only if they share the same list type, delimiter style, and bullet character. This means `- item` and `* item` create separate lists.
+
+## Offset Advancement
+
+```c
+static void S_advance_offset(cmark_parser *parser, cmark_chunk *input,
+ bufsize_t count, bool columns);
+```
+
+This function advances `parser->offset` and `parser->column`. The `columns` parameter determines whether `count` measures bytes or virtual columns. Tab expansion is handled here:
+- When counting columns and a tab appears, `chars_to_tab = TAB_STOP - (column % TAB_STOP)` determines how many columns the tab represents
+- If only part of the tab is consumed (advancing fewer columns than the tab provides), `parser->partially_consumed_tab` is set
+
+## Finding First Non-Space
+
+```c
+static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input);
+```
+
+Scans from `parser->offset` forward, setting:
+- `parser->first_nonspace` — byte position
+- `parser->first_nonspace_column` — column of first non-whitespace
+- `parser->indent` — `first_nonspace_column - column`
+- `parser->blank` — whether the line is blank
+
+This function is idempotent — it won't re-scan if `first_nonspace > offset`.
+
+## Thematic Break Detection
+
+```c
+static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
+ bufsize_t offset);
+```
+
+Checks for 3 or more `*`, `_`, or `-` characters (optionally separated by spaces/tabs) on a line by themselves. Uses `parser->thematic_break_kill_pos` as an optimization to avoid re-scanning positions that already failed.
+
+## ATX Heading Trailing Hash Removal
+
+```c
+static void chop_trailing_hashtags(cmark_chunk *ch);
+```
+
+After an ATX heading line is identified, trailing `#` characters are removed from the content if they're preceded by a space. This implements the CommonMark rule that `## Heading ##` renders as "Heading" without trailing `#` marks.
+
+## Block Finalization
+
+```c
+static cmark_node *finalize(cmark_parser *parser, cmark_node *b);
+```
+
+When a block is closed (no longer accepting content), `finalize()` processes its accumulated content:
+
+### Paragraph Finalization
+Reference link definitions at the start are extracted:
+```c
+static bool resolve_reference_link_definitions(cmark_parser *parser);
+```
+This repeatedly calls `cmark_parse_reference_inline()` from `inlines.c` to parse reference definitions like `[label]: url "title"`. If the paragraph becomes empty after extracting all references, the paragraph node is deleted.
+
+### Code Block Finalization
+- **Fenced**: The first line becomes the info string (after HTML unescaping and trimming). Remaining content becomes the code body.
+- **Indented**: Trailing blank lines are removed, and a final newline is appended.
+
+### Heading and HTML Block Finalization
+Content is simply detached from the parser's content buffer and stored in `data`.
+
+### List Finalization
+Determines tight/loose status by checking:
+1. Non-final, non-empty list items ending with a blank line → loose
+2. Children of list items that end with blank lines (checked recursively via `S_ends_with_blank_line()`) → loose
+3. Otherwise → tight
+
+## Document Finalization
+
+```c
+static cmark_node *finalize_document(cmark_parser *parser);
+```
+
+Called by `cmark_parser_finish()`:
+
+1. All open blocks are finalized by walking from `parser->current` up to `parser->root`.
+2. The root document is finalized.
+3. Reference expansion limit is set: `refmap->max_ref_size = MAX(parser->total_size, 100000)`.
+4. `process_inlines()` is called, which uses an iterator to find all nodes that contain inlines (paragraphs and headings) and calls `cmark_parse_inlines()` on each.
+5. After inline parsing, the content buffer of each processed node is freed.
+
+## Inline Content Detection
+
+```c
+static inline bool contains_inlines(cmark_node_type block_type) {
+ return (block_type == CMARK_NODE_PARAGRAPH ||
+ block_type == CMARK_NODE_HEADING);
+}
+```
+
+Only paragraphs and headings have their string content parsed for inline elements. Code blocks, HTML blocks, and other leaf nodes preserve their content as-is.
+
+## Lazy Continuation Lines
+
+The CommonMark spec defines "lazy continuation lines" — lines that continue a paragraph without matching all container prefixes. For example:
+
+```markdown
+> This is a block quote
+with a lazy continuation line
+```
+
+The second line doesn't start with `>` but still belongs to the paragraph inside the block quote. The parser handles this by checking whether the line could be added to an existing open paragraph rather than closing and starting a new one.
+
+## Cross-References
+
+- [parser.h](../../../cmark/src/parser.h) — Parser struct definition
+- [blocks.c](../../../cmark/src/blocks.c) — Full implementation
+- [inline-parsing.md](inline-parsing.md) — Phase 2 parsing
+- [scanner-system.md](scanner-system.md) — Scanner functions used for block detection
+- [reference-system.md](reference-system.md) — How reference definitions are extracted
+- [ast-node-system.md](ast-node-system.md) — Node creation and tree structure
diff --git a/docs/handbook/cmark/building.md b/docs/handbook/cmark/building.md
new file mode 100644
index 0000000000..56272af2be
--- /dev/null
+++ b/docs/handbook/cmark/building.md
@@ -0,0 +1,268 @@
+# cmark — Building
+
+## Build System Overview
+
+cmark uses CMake (minimum version 3.14) as its build system. The top-level `CMakeLists.txt` defines the project as C/CXX with version 0.31.2. It configures C99 standard without extensions, sets up export header generation, CTest integration, and subdirectory targets for the library, CLI tool, tests, man pages, and fuzz harness.
+
+## Prerequisites
+
+- A C99-compliant compiler (GCC, Clang, MSVC)
+- CMake 3.14 or later
+- POSIX environment (for man page generation; skipped on Windows)
+- Optional: re2c (only needed if modifying `scanners.re`)
+- Optional: Python 3 (for running spec tests)
+
+## Basic Build Steps
+
+```bash
+# Out-of-source build (required — in-source builds are explicitly blocked)
+mkdir build && cd build
+cmake ..
+make
+```
+
+The CMakeLists.txt enforces out-of-source builds with:
+
+```cmake
+if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
+ message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make")
+endif()
+```
+
+## CMake Configuration Options
+
+### Library Type
+
+```cmake
+option(BUILD_SHARED_LIBS "Build the CMark library as shared" OFF)
+```
+
+By default, cmark builds as a **static library**. Set `-DBUILD_SHARED_LIBS=ON` for a shared library. When building as static, the compile definition `CMARK_STATIC_DEFINE` is automatically set.
+
+**Legacy options** (deprecated but still functional for backwards compatibility):
+- `CMARK_SHARED` — replaced by `BUILD_SHARED_LIBS`
+- `CMARK_STATIC` — replaced by `BUILD_SHARED_LIBS` (inverted logic)
+
+Both emit `AUTHOR_WARNING` messages advising migration to the standard CMake variable.
+
+### Fuzzing Support
+
+```cmake
+option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF)
+```
+
+When enabled, targets matching `fuzz` get `-fsanitize=fuzzer`, while all other targets get `-fsanitize=fuzzer-no-link`.
+
+### Build Types
+
+The project supports these build types via `CMAKE_BUILD_TYPE`:
+
+| Type | Description |
+|------|-------------|
+| `Release` | Default. Optimized build |
+| `Debug` | Adds `-DCMARK_DEBUG_NODES` for node integrity checking via `assert()` |
+| `Profile` | Adds `-pg` for profiling with gprof |
+| `Asan` | Address sanitizer (loads `FindAsan` module) |
+| `Ubsan` | Adds `-fsanitize=undefined` for undefined behavior sanitizer |
+
+Debug builds automatically add node structure checking:
+
+```cmake
+add_compile_options($<$<CONFIG:Debug>:-DCMARK_DEBUG_NODES>)
+```
+
+## Compiler Flags
+
+The `cmark_add_compile_options()` function applies compiler warnings per-target (not globally), so cmark can be used as a subdirectory in projects with other languages:
+
+**GCC/Clang:**
+```
+-Wall -Wextra -pedantic -Wstrict-prototypes (C only)
+```
+
+**MSVC:**
+```
+-D_CRT_SECURE_NO_WARNINGS
+```
+
+Visibility is set globally to hidden, with explicit export via the generated `cmark_export.h`:
+
+```cmake
+set(CMAKE_C_VISIBILITY_PRESET hidden)
+set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
+```
+
+## Library Target: `cmark`
+
+Defined in `src/CMakeLists.txt`, the `cmark` library target includes these source files:
+
+```cmake
+add_library(cmark
+ blocks.c buffer.c cmark.c cmark_ctype.c
+ commonmark.c houdini_href_e.c houdini_html_e.c houdini_html_u.c
+ html.c inlines.c iterator.c latex.c
+ man.c node.c references.c render.c
+ scanners.c scanners.re utf8.c xml.c)
+```
+
+Target properties:
+```cmake
+set_target_properties(cmark PROPERTIES
+ OUTPUT_NAME "cmark"
+ PDB_NAME libcmark # Avoid PDB name clash with executable
+ POSITION_INDEPENDENT_CODE YES
+ SOVERSION ${PROJECT_VERSION} # Includes minor + patch in soname
+ VERSION ${PROJECT_VERSION})
+```
+
+The library exposes headers via its interface include directories:
+```cmake
+target_include_directories(cmark INTERFACE
+ $<INSTALL_INTERFACE:include>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
+```
+
+The export header is generated automatically:
+```cmake
+generate_export_header(cmark BASE_NAME ${PROJECT_NAME})
+```
+
+This produces `cmark_export.h` containing `CMARK_EXPORT` macros that resolve to `__declspec(dllexport/dllimport)` on Windows or `__attribute__((visibility("default")))` on Unix.
+
+## Executable Target: `cmark_exe`
+
+```cmake
+add_executable(cmark_exe main.c)
+set_target_properties(cmark_exe PROPERTIES
+ OUTPUT_NAME "cmark"
+ INSTALL_RPATH "${Base_rpath}")
+target_link_libraries(cmark_exe PRIVATE cmark)
+```
+
+The executable has the same output name as the library (`cmark`), but the PDB names differ to avoid conflicts on Windows.
+
+## Generated Files
+
+Two files are generated at configure time:
+
+### `cmark_version.h`
+
+Generated from `cmark_version.h.in`:
+```cmake
+configure_file(cmark_version.h.in ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h)
+```
+
+Contains `CMARK_VERSION` (integer) and `CMARK_VERSION_STRING` (string) macros.
+
+### `libcmark.pc`
+
+Generated from `libcmark.pc.in` for pkg-config integration:
+```cmake
+configure_file(libcmark.pc.in ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc @ONLY)
+```
+
+## Test Infrastructure
+
+Tests are enabled via CMake's standard `BUILD_TESTING` option (defaults to ON):
+
+```cmake
+if(BUILD_TESTING)
+ add_subdirectory(api_test)
+ add_subdirectory(test testdir)
+endif()
+```
+
+### API Tests (`api_test/`)
+
+C-level API tests that exercise the public API functions directly — node creation, manipulation, parsing, rendering.
+
+### Spec Tests (`test/`)
+
+CommonMark specification conformance tests. These parse expected input/output pairs from the CommonMark spec and verify cmark produces the correct output.
+
+## RPATH Configuration
+
+For shared library builds, the install RPATH is set to the library directory:
+
+```cmake
+if(BUILD_SHARED_LIBS)
+ set(p "${CMAKE_INSTALL_FULL_LIBDIR}")
+ list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${p}" i)
+ if("${i}" STREQUAL "-1")
+ set(Base_rpath "${p}")
+ endif()
+endif()
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+```
+
+This ensures the executable can find the shared library at runtime without requiring `LD_LIBRARY_PATH`.
+
+## Man Page Generation
+
+Man pages are built on non-Windows platforms:
+
+```cmake
+if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
+ add_subdirectory(man)
+endif()
+```
+
+## Building for Fuzzing
+
+To build the libFuzzer harness:
+
+```bash
+mkdir build-fuzz && cd build-fuzz
+cmake -DCMARK_LIB_FUZZER=ON -DCMAKE_C_COMPILER=clang ..
+make
+```
+
+The fuzz targets are in the `fuzz/` subdirectory.
+
+## Platform-Specific Notes
+
+### OpenBSD
+
+The CLI tool uses `pledge(2)` on OpenBSD 6.0+ for sandboxing:
+```c
+#if defined(__OpenBSD__)
+# include <sys/param.h>
+# if OpenBSD >= 201605
+# define USE_PLEDGE
+# include <unistd.h>
+# endif
+#endif
+```
+
+The pledge sequence is:
+1. Before parsing: `pledge("stdio rpath", NULL)` — allows reading files
+2. After parsing, before rendering: `pledge("stdio", NULL)` — drops file read access
+
+### Windows
+
+On Windows (non-Cygwin), binary mode is set for stdin/stdout to prevent CR/LF translation:
+```c
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ _setmode(_fileno(stdin), _O_BINARY);
+ _setmode(_fileno(stdout), _O_BINARY);
+#endif
+```
+
+## Scanner Regeneration
+
+The `scanners.c` file is generated from `scanners.re` using re2c. To regenerate:
+
+```bash
+re2c --case-insensitive -b -i --no-generation-date -8 \
+ -o scanners.c scanners.re
+```
+
+The generated file is checked into the repository, so re2c is not required for normal builds.
+
+## Cross-References
+
+- [cli-usage.md](cli-usage.md) — Command-line tool details and options
+- [testing.md](testing.md) — Test framework details
+- [code-style.md](code-style.md) — Coding conventions
+- [scanner-system.md](scanner-system.md) — Scanner generation details
diff --git a/docs/handbook/cmark/cli-usage.md b/docs/handbook/cmark/cli-usage.md
new file mode 100644
index 0000000000..d77c3b8fa9
--- /dev/null
+++ b/docs/handbook/cmark/cli-usage.md
@@ -0,0 +1,249 @@
+# cmark — CLI Usage
+
+## Overview
+
+The `cmark` command-line tool (`main.c`) reads CommonMark input from files or stdin and renders it to one of five output formats. It serves as both a reference implementation and a practical conversion tool.
+
+## Entry Point
+
+```c
+int main(int argc, char *argv[]);
+```
+
+## Output Formats
+
+```c
+typedef enum {
+ FORMAT_NONE,
+ FORMAT_HTML,
+ FORMAT_XML,
+ FORMAT_MAN,
+ FORMAT_COMMONMARK,
+ FORMAT_LATEX,
+} writer_format;
+```
+
+Default: `FORMAT_HTML`.
+
+## Command-Line Options
+
+| Option | Long Form | Description |
+|--------|-----------|-------------|
+| `-t FORMAT` | `--to FORMAT` | Output format: `html`, `xml`, `man`, `commonmark`, `latex` |
+| | `--width N` | Wrapping width (0 = no wrapping; default 0). Only affects `commonmark`, `man`, `latex` |
+| | `--sourcepos` | Include source position information |
+| | `--hardbreaks` | Render soft breaks as hard breaks |
+| | `--nobreaks` | Render soft breaks as spaces |
+| | `--unsafe` | Allow raw HTML and dangerous URLs |
+| | `--smart` | Enable smart punctuation (curly quotes, em/en dashes, ellipses) |
+| | `--validate-utf8` | Validate and clean UTF-8 input |
+| `-h` | `--help` | Print usage information |
+| | `--version` | Print version string |
+
+## Option Parsing
+
+```c
+for (i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "--version") == 0) {
+ printf("cmark %s", cmark_version_string());
+ printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n");
+ exit(0);
+ } else if (strcmp(argv[i], "--sourcepos") == 0) {
+ options |= CMARK_OPT_SOURCEPOS;
+ } else if (strcmp(argv[i], "--hardbreaks") == 0) {
+ options |= CMARK_OPT_HARDBREAKS;
+ } else if (strcmp(argv[i], "--nobreaks") == 0) {
+ options |= CMARK_OPT_NOBREAKS;
+ } else if (strcmp(argv[i], "--smart") == 0) {
+ options |= CMARK_OPT_SMART;
+ } else if (strcmp(argv[i], "--unsafe") == 0) {
+ options |= CMARK_OPT_UNSAFE;
+ } else if (strcmp(argv[i], "--validate-utf8") == 0) {
+ options |= CMARK_OPT_VALIDATE_UTF8;
+ } else if ((strcmp(argv[i], "--to") == 0 || strcmp(argv[i], "-t") == 0) &&
+ i + 1 < argc) {
+ i++;
+ if (strcmp(argv[i], "man") == 0) writer = FORMAT_MAN;
+ else if (strcmp(argv[i], "html") == 0) writer = FORMAT_HTML;
+ else if (strcmp(argv[i], "xml") == 0) writer = FORMAT_XML;
+ else if (strcmp(argv[i], "commonmark") == 0) writer = FORMAT_COMMONMARK;
+ else if (strcmp(argv[i], "latex") == 0) writer = FORMAT_LATEX;
+ else {
+ fprintf(stderr, "Unknown format %s\n", argv[i]);
+ exit(1);
+ }
+ } else if (strcmp(argv[i], "--width") == 0 && i + 1 < argc) {
+ i++;
+ width = atoi(argv[i]);
+ } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
+ print_usage();
+ exit(0);
+ } else if (*argv[i] == '-') {
+ print_usage();
+ exit(1);
+ } else {
+ // Treat as filename
+ files[numfps++] = i;
+ }
+}
+```
+
+## Input Handling
+
+### File Input
+
+```c
+for (i = 0; i < numfps; i++) {
+ fp = fopen(argv[files[i]], "rb");
+ if (fp == NULL) {
+ fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]], strerror(errno));
+ exit(1);
+ }
+ // Read in chunks and feed to parser
+ while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
+ cmark_parser_feed(parser, buffer, bytes);
+ if (bytes < sizeof(buffer)) break;
+ }
+ fclose(fp);
+}
+```
+
+Files are opened in binary mode (`"rb"`) and read in chunks of `BUFFER_SIZE` (4096 bytes). Each chunk is fed to the streaming parser via `cmark_parser_feed()`.
+
+### Stdin Input
+
+```c
+if (numfps == 0) {
+ // Read from stdin
+ while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
+ cmark_parser_feed(parser, buffer, bytes);
+ if (bytes < sizeof(buffer)) break;
+ }
+}
+```
+
+When no files are specified, input is read from stdin.
+
+### Windows Binary Mode
+
+```c
+#if defined(_WIN32) && !defined(__CYGWIN__)
+_setmode(_fileno(stdin), _O_BINARY);
+_setmode(_fileno(stdout), _O_BINARY);
+#endif
+```
+
+On Windows, stdin and stdout are set to binary mode to prevent CR/LF translation.
+
+## Rendering
+
+```c
+document = cmark_parser_finish(parser);
+cmark_parser_free(parser);
+
+// Render based on format
+result = print_document(document, writer, width, options);
+```
+
+### `print_document()`
+
+```c
+static void print_document(cmark_node *document, writer_format writer,
+ int width, int options) {
+ char *result;
+ switch (writer) {
+ case FORMAT_HTML:
+ result = cmark_render_html(document, options);
+ break;
+ case FORMAT_XML:
+ result = cmark_render_xml(document, options);
+ break;
+ case FORMAT_MAN:
+ result = cmark_render_man(document, options, width);
+ break;
+ case FORMAT_COMMONMARK:
+ result = cmark_render_commonmark(document, options, width);
+ break;
+ case FORMAT_LATEX:
+ result = cmark_render_latex(document, options, width);
+ break;
+ default:
+ fprintf(stderr, "Unknown format %d\n", writer);
+ exit(1);
+ }
+ printf("%s", result);
+ document->mem->free(result);
+}
+```
+
+The rendered result is written to stdout and then freed.
+
+### Cleanup
+
+```c
+cmark_node_free(document);
+```
+
+The AST is freed after rendering.
+
+## OpenBSD Security
+
+```c
+#ifdef __OpenBSD__
+ if (pledge("stdio rpath", NULL) != 0) {
+ perror("pledge");
+ return 1;
+ }
+#endif
+```
+
+On OpenBSD, the program restricts itself to `stdio` and `rpath` (read-only file access) via `pledge()`. This prevents the cmark binary from performing any operations beyond reading files and writing to stdout/stderr.
+
+## Usage Examples
+
+```bash
+# Convert Markdown to HTML
+cmark input.md
+
+# Convert with smart punctuation
+cmark --smart input.md
+
+# Convert to man page with 72-column wrapping
+cmark -t man --width 72 input.md
+
+# Convert to LaTeX
+cmark -t latex input.md
+
+# Round-trip through CommonMark
+cmark -t commonmark input.md
+
+# Include source positions in output
+cmark --sourcepos input.md
+
+# Allow raw HTML passthrough
+cmark --unsafe input.md
+
+# Read from stdin
+echo "# Hello" | cmark
+
+# Validate UTF-8 input
+cmark --validate-utf8 input.md
+
+# Print version
+cmark --version
+```
+
+## Exit Codes
+
+- `0` — Success
+- `1` — Error (unknown option, file open failure, unknown format)
+
+## Cross-References
+
+- [main.c](../../cmark/src/main.c) — Full implementation
+- [public-api.md](public-api.md) — The C API functions called by main
+- [html-renderer.md](html-renderer.md) — `cmark_render_html()`
+- [xml-renderer.md](xml-renderer.md) — `cmark_render_xml()`
+- [latex-renderer.md](latex-renderer.md) — `cmark_render_latex()`
+- [man-renderer.md](man-renderer.md) — `cmark_render_man()`
+- [commonmark-renderer.md](commonmark-renderer.md) — `cmark_render_commonmark()`
diff --git a/docs/handbook/cmark/code-style.md b/docs/handbook/cmark/code-style.md
new file mode 100644
index 0000000000..0ac2af2def
--- /dev/null
+++ b/docs/handbook/cmark/code-style.md
@@ -0,0 +1,293 @@
+# cmark — Code Style and Conventions
+
+## Overview
+
+This document describes the coding conventions and patterns used throughout the cmark codebase. Understanding these conventions makes the source code easier to navigate.
+
+## Naming Conventions
+
+### Public API Functions
+
+All public functions use the `cmark_` prefix:
+```c
+cmark_node *cmark_node_new(cmark_node_type type);
+cmark_parser *cmark_parser_new(int options);
+char *cmark_render_html(cmark_node *root, int options);
+```
+
+### Internal (Static) Functions
+
+File-local static functions use the `S_` prefix:
+```c
+static void S_render_node(cmark_node *node, cmark_event_type ev_type,
+ struct render_state *state, int options);
+static cmark_node *S_node_new(cmark_node_type type, cmark_mem *mem);
+static void S_free_nodes(cmark_node *e);
+static bool S_is_leaf(cmark_node *node);
+static int S_get_enumlevel(cmark_node *node);
+```
+
+This convention makes it immediately clear whether a function has file-local scope.
+
+### Internal (Non-Static) Functions
+
+Functions that are internal to the library but shared across translation units use:
+- `cmark_` prefix (same as public) — declared in private headers (e.g., `parser.h`, `node.h`)
+- No `S_` prefix
+
+Examples:
+```c
+// In node.h (private header):
+void cmark_node_set_type(cmark_node *node, cmark_node_type type);
+cmark_node *make_block(cmark_mem *mem, cmark_node_type type,
+ int start_line, int start_column);
+```
+
+### Struct Members
+
+No prefix convention — struct members use plain names:
+```c
+struct cmark_node {
+ cmark_mem *mem;
+ cmark_node *next;
+ cmark_node *prev;
+ cmark_node *parent;
+ cmark_node *first_child;
+ cmark_node *last_child;
+ // ...
+};
+```
+
+### Type Names
+
+Typedefs use the `cmark_` prefix:
+```c
+typedef struct cmark_node cmark_node;
+typedef struct cmark_parser cmark_parser;
+typedef struct cmark_iter cmark_iter;
+typedef int32_t bufsize_t; // Exception: no cmark_ prefix
+```
+
+### Enum Values
+
+Enum constants use the `CMARK_` prefix with UPPER_CASE:
+```c
+typedef enum {
+ CMARK_NODE_NONE,
+ CMARK_NODE_DOCUMENT,
+ CMARK_NODE_BLOCK_QUOTE,
+ // ...
+} cmark_node_type;
+```
+
+### Preprocessor Macros
+
+Macros use UPPER_CASE, sometimes with `CMARK_` prefix:
+```c
+#define CMARK_OPT_SOURCEPOS (1 << 1)
+#define CMARK_BUF_INIT(mem) { mem, cmark_strbuf__initbuf, 0, 0 }
+#define MAX_LINK_LABEL_LENGTH 999
+#define CODE_INDENT 4
+```
+
+## Error Handling Patterns
+
+### Allocation Failure
+
+The default allocator (`xcalloc`, `xrealloc`) aborts on failure:
+```c
+static void *xcalloc(size_t nmemb, size_t size) {
+ void *ptr = calloc(nmemb, size);
+ if (!ptr) abort();
+ return ptr;
+}
+```
+
+Functions that allocate never return NULL — they either succeed or terminate. This eliminates NULL-check boilerplate throughout the codebase.
+
+### Invalid Input
+
+Functions that receive invalid arguments typically:
+1. Return 0/false/NULL for queries
+2. Do nothing for mutations
+3. Never crash
+
+Example from `node.c`:
+```c
+int cmark_node_set_heading_level(cmark_node *node, int level) {
+ if (node == NULL || node->type != CMARK_NODE_HEADING) return 0;
+ if (level < 1 || level > 6) return 0;
+ node->as.heading.level = level;
+ return 1;
+}
+```
+
+### Return Conventions
+
+- **0/1 for success/failure**: Setter functions return 1 on success, 0 on failure
+- **NULL for not found**: Lookup functions return NULL when the item doesn't exist
+- **Assertion for invariants**: Internal invariants use `assert()`:
+ ```c
+ assert(googled_node->type == CMARK_NODE_DOCUMENT);
+ ```
+
+## Header Guard Style
+
+```c
+#ifndef CMARK_NODE_H
+#define CMARK_NODE_H
+// ...
+#endif
+```
+
+Guards use `CMARK_` prefix + uppercase filename + `_H`.
+
+## Include Patterns
+
+### Public Headers
+```c
+#include "cmark.h" // Always first — provides all public types
+```
+
+### Private Headers
+```c
+#include "node.h" // Internal node definitions
+#include "parser.h" // Parser internals
+#include "buffer.h" // cmark_strbuf
+#include "chunk.h" // cmark_chunk
+#include "references.h" // Reference map
+#include "utf8.h" // UTF-8 utilities
+#include "scanners.h" // re2c-generated scanners
+```
+
+### System Headers
+```c
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+```
+
+## Inline Functions
+
+The `CMARK_INLINE` macro abstracts compiler-specific inline syntax:
+```c
+#ifdef _MSC_VER
+#define CMARK_INLINE __forceinline
+#else
+#define CMARK_INLINE __inline__
+#endif
+```
+
+Used for small, hot-path functions in headers:
+```c
+static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) { ... }
+static CMARK_INLINE cmark_chunk cmark_chunk_dup(...) { ... }
+```
+
+## Memory Ownership Patterns
+
+### Owning vs Non-Owning
+
+The `cmark_chunk` type makes ownership explicit:
+- `alloc > 0` → the chunk owns the memory and must free it
+- `alloc == 0` → the chunk borrows memory from elsewhere
+
+### Transfer of Ownership
+
+`cmark_strbuf_detach()` transfers ownership from a strbuf to the caller:
+```c
+unsigned char *data = cmark_strbuf_detach(&buf);
+// Caller now owns 'data' and must free it
+```
+
+### Consistent Cleanup
+
+Free functions null out pointers after freeing:
+```c
+static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
+ if (c->alloc)
+ mem->free((void *)c->data);
+ c->data = NULL; // NULL after free
+ c->alloc = 0;
+ c->len = 0;
+}
+```
+
+## Iterative vs Recursive Patterns
+
+The codebase avoids recursion for tree operations to prevent stack overflow on deeply nested input:
+
+### Iterative Tree Destruction
+`S_free_nodes()` uses sibling-list splicing instead of recursion:
+```c
+// Splice children into sibling chain
+if (e->first_child) {
+ cmark_node *last = e->last_child;
+ last->next = e->next;
+ e->next = e->first_child;
+}
+```
+
+### Iterator-Based Traversal
+All rendering uses `cmark_iter` instead of recursive `render_children()`:
+```c
+while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ S_render_node(cur, ev_type, &state, options);
+}
+```
+
+## Type Size Definitions
+
+```c
+typedef int32_t bufsize_t;
+```
+
+Buffer sizes use `int32_t` (not `size_t`) to:
+1. Allow negative values for error signaling
+2. Keep node structs compact (32-bit vs 64-bit on LP64)
+3. Limit maximum allocation to 2GB (adequate for text processing)
+
+## Bitmask Patterns
+
+Option flags use single-bit constants:
+```c
+#define CMARK_OPT_SOURCEPOS (1 << 1)
+#define CMARK_OPT_HARDBREAKS (1 << 2)
+#define CMARK_OPT_UNSAFE (1 << 17)
+#define CMARK_OPT_NOBREAKS (1 << 4)
+#define CMARK_OPT_VALIDATE_UTF8 (1 << 9)
+#define CMARK_OPT_SMART (1 << 10)
+```
+
+Tested with bitwise AND:
+```c
+if (options & CMARK_OPT_SOURCEPOS) { ... }
+```
+
+Combined with bitwise OR:
+```c
+int options = CMARK_OPT_SOURCEPOS | CMARK_OPT_SMART;
+```
+
+## Leaf Mask Pattern
+
+`S_is_leaf()` in `iterator.c` uses a bitmask for O(1) node-type classification:
+```c
+static const int S_leaf_mask =
+ (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) |
+ (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) | ...;
+
+static bool S_is_leaf(cmark_node *node) {
+ return ((1 << node->type) & S_leaf_mask) != 0;
+}
+```
+
+This is more efficient than a switch statement for a simple boolean classification.
+
+## Cross-References
+
+- [architecture.md](architecture.md) — Design decisions
+- [memory-management.md](memory-management.md) — Allocator patterns
+- [public-api.md](public-api.md) — Public API naming
diff --git a/docs/handbook/cmark/commonmark-renderer.md b/docs/handbook/cmark/commonmark-renderer.md
new file mode 100644
index 0000000000..01ffb3a987
--- /dev/null
+++ b/docs/handbook/cmark/commonmark-renderer.md
@@ -0,0 +1,344 @@
+# cmark — CommonMark Renderer
+
+## Overview
+
+The CommonMark renderer (`commonmark.c`) converts a `cmark_node` AST back into CommonMark-formatted Markdown text. This is significantly more complex than the other renderers because it must reproduce syntactically valid Markdown that, when re-parsed, produces an equivalent AST. It uses the generic render framework from `render.c`.
+
+## Entry Point
+
+```c
+char *cmark_render_commonmark(cmark_node *root, int options, int width);
+```
+
+- `root` — AST root node
+- `options` — Option flags
+- `width` — Target line width for wrapping; 0 disables wrapping
+
+## Character Escaping (`outc`)
+
+The CommonMark escaping is the most complex of all renderers. Three escaping modes exist:
+
+### NORMAL Mode
+
+Characters that could be interpreted as Markdown syntax must be backslash-escaped. Characters that trigger escaping:
+
+```c
+case '*':
+case '#':
+case '(':
+case ')':
+case '[':
+case ']':
+case '<':
+case '>':
+case '!':
+case '\\':
+ // Backslash-escaped: \*, \#, \(, etc.
+```
+
+Additionally:
+- `.` and `)` — only escaped at line start (after a digit), to prevent triggering ordered list syntax
+- `-`, `+`, `=`, `_` — only escaped at line start, to prevent thematic breaks, bullet lists, or setext headings
+- `~` — only escaped at line start
+- `&` — escaped to prevent entity references
+- `'`, `"` — escaped for smart punctuation
+
+For whitespace handling:
+- NBSP (`\xA0`) → `\xa0` (the literal non-breaking space character)
+- Tab → space (tabs cannot be reliably round-tripped)
+
+### URL Mode
+
+Only `(`, `)`, and whitespace `\x20` are escaped with backslashes. URLs in parenthesized `()` format need minimal escaping.
+
+### TITLE Mode
+
+For link titles, only the title delimiter character is escaped. The renderer currently always uses `"` as the title delimiter, so `"` is backslash-escaped within titles.
+
+## Backtick Sequence Analysis
+
+Two helper functions determine how to format inline code spans:
+
+### `longest_backtick_sequence()`
+
+```c
+static int longest_backtick_sequence(const char *code) {
+ int longest = 0;
+ int current = 0;
+ size_t i = 0;
+ size_t code_len = strlen(code);
+ while (i <= code_len) {
+ if (code[i] == '`') {
+ current++;
+ } else {
+ if (current > longest)
+ longest = current;
+ current = 0;
+ }
+ i++;
+ }
+ return longest;
+}
+```
+
+Finds the maximum run of consecutive backticks within a code string.
+
+### `shortest_unused_backtick_sequence()`
+
+```c
+static int shortest_unused_backtick_sequence(const char *code) {
+ int32_t used = 1; // Bitmask for sequences of length 1-31
+ int current = 0;
+ // ... scan for runs, set bits in 'used'
+ int i = 0;
+ while (used & 1) {
+ used >>= 1;
+ i++;
+ }
+ return i + 1;
+}
+```
+
+Determines the shortest backtick sequence (1-32) that does NOT appear in the code content. This ensures the code delimiter won't conflict with backticks inside the code.
+
+Uses a clever bit-manipulation approach: a 32-bit integer `used` tracks which backtick sequence lengths appear. After scanning, the position of the first unset bit gives the shortest unused length.
+
+## Autolink Detection
+
+```c
+static bool is_autolink(cmark_node *node) {
+ const char *title;
+ const char *url;
+ // ...
+ if (node->first_child->type != CMARK_NODE_TEXT) return false;
+ url = (char *)node->as.link.url;
+ title = (char *)node->as.link.title;
+ if (title && title[0]) return false; // Autolinks have no title
+ if (url &&
+ (strncmp(url, "http://", 7) == 0 || strncmp(url, "https://", 8) == 0 ||
+ strncmp(url, "mailto:", 7) == 0) &&
+ strcmp(url, (char *)node->first_child->data) == 0)
+ return true;
+ return false;
+}
+```
+
+A link is an autolink if:
+1. It has exactly one child, a text node
+2. No title
+3. URL starts with `http://`, `https://`, or `mailto:`
+4. The text exactly matches the URL
+
+## Node Rendering (`S_render_node`)
+
+### Block Nodes
+
+#### Document
+No output.
+
+#### Block Quote
+```
+ENTER: Sets prefix to "> " for first line and "> " for continuations
+EXIT: Restores prefix, adds blank line
+```
+
+The prefix mechanism is central to CommonMark rendering. When entering a block quote:
+```c
+cmark_strbuf_puts(renderer->prefix, "> ");
+```
+
+All content within the block quote is prefixed with `"> "` on each line.
+
+#### List
+```
+ENTER: Records tight/loose status, records bullet character
+EXIT: Restores prefix, adds blank line
+```
+
+The renderer stores whether the list is tight to control inter-item blank lines.
+
+#### Item
+```
+ENTER: Computes marker and indentation prefix
+EXIT: Restores prefix
+```
+
+**Bullet items:** Use `-`, `*`, or `+` (from `cmark_node_get_list_delim`). The prefix is set to appropriate indentation:
+
+```c
+// For a bullet item:
+"- " on the first line
+" " on continuation lines (indentation matches marker width)
+```
+
+**Ordered items:** Number is computed by counting previous siblings:
+```c
+list_number = cmark_node_get_list_start(node->parent);
+tmp = node;
+while (tmp->prev) {
+ tmp = tmp->prev;
+ list_number++;
+}
+```
+
+Format: `"N. "` or `"N) "` depending on delimiter type. Continuation indent matches the marker width.
+
+For tight lists, items don't emit blank lines between them.
+
+#### Heading
+**ATX headings** (levels 1-6):
+```
+### Content\n
+```
+
+The number of `#` characters matches the heading level. A newline follows the heading content.
+
+**Setext headings** (levels 1-2 when `width > 0`):
+Not used — the renderer always uses ATX headings.
+
+#### Code Block
+The renderer determines whether to use fenced or indented code:
+
+**Fenced code blocks:**
+```
+```[info]
+content
+```
+```
+
+The fence character is `` ` ``. The fence length is max(3, longest_backtick_in_content + 1).
+
+If the code has an info string, fenced blocks are always used (indented blocks cannot carry info strings).
+
+**Indented code blocks:**
+If there's no info string and `width == 0`, the renderer uses 4-space indentation by setting the prefix to `" "`.
+
+#### HTML Block
+Content is output LITERALLY (no escaping):
+```c
+cmark_render_ascii(renderer, (char *)node->data);
+```
+
+This preserves raw HTML exactly.
+
+#### Thematic Break
+```
+---\n
+```
+
+Uses `---` (three hyphens).
+
+#### Paragraph
+```
+ENTER: (nothing for tight, blank line for normal)
+EXIT: \n (newline after content)
+```
+
+In tight lists, paragraphs don't add blank lines before/after.
+
+### Inline Nodes
+
+#### Text
+Output with NORMAL escaping (all Markdown-significant characters escaped).
+
+#### Soft Break
+Depends on options:
+- `CMARK_OPT_HARDBREAKS`: `\\\n` (backslash line break)
+- `CMARK_OPT_NOBREAKS`: space
+- Default: newline
+
+#### Line Break
+```
+\\\n
+```
+
+Backslash followed by newline.
+
+#### Code (inline)
+The renderer selects delimiters using `shortest_unused_backtick_sequence()`:
+
+```c
+int numticks = shortest_unused_backtick_sequence(code);
+// output numticks backticks
+// if code starts or ends with backtick, add space padding
+// output literal code
+// output numticks backticks
+```
+
+If the code content starts or ends with a backtick, spaces are added inside the delimiters to prevent ambiguity:
+```
+`` `code` ``
+```
+
+#### Emphasis
+```
+ENTER: * or _ (delimiter character)
+EXIT: * or _ (matching delimiter)
+```
+
+The delimiter selection depends on what characters appear in the content. If the content contains `*`, `_` is preferred (and vice versa). The `emph_delim` variable tracks the chosen delimiter.
+
+#### Strong
+```
+ENTER: ** or __
+EXIT: ** or __
+```
+
+Same delimiter selection logic as emphasis.
+
+#### Link
+**Autolinks:**
+```
+<URL>
+```
+
+**Normal links:**
+```
+ENTER: [
+EXIT: ](URL "TITLE") or ](URL) if no title
+```
+
+The URL is output with URL escaping, the title with TITLE escaping.
+
+#### Image
+```
+ENTER: ![
+EXIT: ](URL "TITLE") or ](URL) if no title
+```
+
+Same as links but with `!` prefix.
+
+#### HTML Inline
+Output literally (no escaping).
+
+## Prefix Management
+
+The CommonMark renderer makes extensive use of the prefix system from `render.c`. Each line of output is prefixed with accumulated prefix strings from container nodes. For example, a list item inside a block quote:
+
+```
+> - Item text
+> continuation
+```
+
+The prefix stack would be:
+1. `"> "` from the block quote
+2. `" "` (continuation indent) from the list item
+
+The `cmark_renderer` struct maintains `prefix` and `begin_content` fields to handle this.
+
+## Round-Trip Fidelity
+
+The CommonMark renderer aims for round-trip fidelity: parsing the output should produce an AST equivalent to the input. This is not always perfectly achievable:
+
+1. **Whitespace normalization**: Some whitespace differences (e.g., number of blank lines) are lost.
+2. **Reference links**: Inline link syntax is always used; reference-style links are not preserved.
+3. **ATX vs setext**: Always uses ATX headings.
+4. **Indented vs fenced**: Logic selects one based on info string presence and width setting.
+5. **Emphasis delimiter**: May differ from the original (`*` vs `_`).
+
+## Cross-References
+
+- [commonmark.c](../../cmark/src/commonmark.c) — Full implementation
+- [render-framework.md](render-framework.md) — Generic render framework
+- [public-api.md](public-api.md) — `cmark_render_commonmark()` API docs
+- [scanner-system.md](scanner-system.md) — Scanners used for autolink detection
diff --git a/docs/handbook/cmark/html-renderer.md b/docs/handbook/cmark/html-renderer.md
new file mode 100644
index 0000000000..98406c300c
--- /dev/null
+++ b/docs/handbook/cmark/html-renderer.md
@@ -0,0 +1,258 @@
+# cmark — HTML Renderer
+
+## Overview
+
+The HTML renderer (`html.c`) converts a `cmark_node` AST into an HTML string. Unlike the LaTeX, man, and CommonMark renderers, it does NOT use the generic render framework from `render.c`. Instead, it writes directly to a `cmark_strbuf` buffer, giving it full control over output formatting.
+
+## Entry Point
+
+```c
+char *cmark_render_html(cmark_node *root, int options);
+```
+
+Creates an iterator over the AST, processes each node via `S_render_node()`, and returns the resulting HTML string. The caller is responsible for freeing the returned buffer.
+
+### Implementation
+
+```c
+char *cmark_render_html(cmark_node *root, int options) {
+ char *result;
+ cmark_strbuf html = CMARK_BUF_INIT(root->mem);
+ cmark_event_type ev_type;
+ cmark_node *cur;
+ struct render_state state = {&html, NULL};
+ cmark_iter *iter = cmark_iter_new(root);
+
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ S_render_node(cur, ev_type, &state, options);
+ }
+ result = (char *)cmark_strbuf_detach(&html);
+ cmark_iter_free(iter);
+ return result;
+}
+```
+
+## Render State
+
+```c
+struct render_state {
+ cmark_strbuf *html; // Output buffer
+ cmark_node *plain; // Non-NULL when rendering image alt text (plain text mode)
+};
+```
+
+The `plain` field is used for image alt text rendering. When entering an image node, `state->plain` is set to the image node. While `plain` is non-NULL, only text content is emitted (HTML tags are suppressed) — this ensures the `alt` attribute contains only plain text, not nested HTML. When the iterator exits the image node (`state->plain == node`), plain mode is cleared.
+
+## HTML Escaping
+
+```c
+static void escape_html(cmark_strbuf *dest, const unsigned char *source,
+ bufsize_t length) {
+ houdini_escape_html(dest, source, length, 0);
+}
+```
+
+Characters `<`, `>`, `&`, `"` are converted to their HTML entity equivalents. The `0` argument means "not secure mode" (no additional escaping).
+
+## Source Position Attributes
+
+```c
+static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html, int options) {
+ char buffer[BUFFER_SIZE];
+ if (CMARK_OPT_SOURCEPOS & options) {
+ snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"",
+ cmark_node_get_start_line(node), cmark_node_get_start_column(node),
+ cmark_node_get_end_line(node), cmark_node_get_end_column(node));
+ cmark_strbuf_puts(html, buffer);
+ }
+}
+```
+
+When `CMARK_OPT_SOURCEPOS` is set, all block-level elements receive a `data-sourcepos` attribute with format `"startline:startcol-endline:endcol"`.
+
+## Newline Helper
+
+```c
+static inline void cr(cmark_strbuf *html) {
+ if (html->size && html->ptr[html->size - 1] != '\n')
+ cmark_strbuf_putc(html, '\n');
+}
+```
+
+Ensures the output ends with a newline without adding redundant ones.
+
+## Node Rendering Logic
+
+The `S_render_node()` function handles each node type in a large switch statement. The `entering` boolean indicates whether this is an `CMARK_EVENT_ENTER` or `CMARK_EVENT_EXIT` event.
+
+### Block Nodes
+
+#### Document
+No output — the document node is purely structural.
+
+#### Block Quote
+```
+ENTER: \n<blockquote[sourcepos]>\n
+EXIT: \n</blockquote>\n
+```
+
+#### List
+```
+ENTER (bullet): \n<ul[sourcepos]>\n
+ENTER (ordered): \n<ol[sourcepos]>\n (or <ol start="N"> if start > 1)
+EXIT: </ul>\n or </ol>\n
+```
+
+#### Item
+```
+ENTER: \n<li[sourcepos]>
+EXIT: </li>\n
+```
+
+#### Heading
+```
+ENTER: \n<hN[sourcepos]> (where N = heading level)
+EXIT: </hN>\n
+```
+
+The heading level is injected into character arrays:
+```c
+char start_heading[] = "<h0";
+start_heading[2] = (char)('0' + node->as.heading.level);
+```
+
+#### Code Block
+Always a leaf node (single event). Output:
+```html
+<pre[sourcepos]><code>ESCAPED CONTENT</code></pre>\n
+```
+
+If the code block has an info string, a `class` attribute is added:
+```html
+<pre[sourcepos]><code class="language-INFO">ESCAPED CONTENT</code></pre>\n
+```
+
+The `"language-"` prefix is only added if the info string doesn't already start with `"language-"`.
+
+#### HTML Block
+When `CMARK_OPT_UNSAFE` is set, raw HTML is output verbatim. Otherwise, it's replaced with:
+```html
+<!-- raw HTML omitted -->
+```
+
+#### Thematic Break
+```html
+<hr[sourcepos] />\n
+```
+
+#### Paragraph
+The paragraph respects tight list context. The renderer checks if the paragraph's grandparent is a list with `tight = true`:
+
+```c
+parent = cmark_node_parent(node);
+grandparent = cmark_node_parent(parent);
+if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) {
+ tight = grandparent->as.list.tight;
+} else {
+ tight = false;
+}
+```
+
+In tight lists, the `<p>` tags are suppressed — content flows directly without wrapping.
+
+#### Custom Block
+On enter, outputs the `on_enter` text literally. On exit, outputs `on_exit`.
+
+### Inline Nodes
+
+#### Text
+```c
+escape_html(html, node->data, node->len);
+```
+
+All text content is HTML-escaped.
+
+#### Line Break
+```html
+<br />\n
+```
+
+#### Soft Break
+Behavior depends on options:
+- `CMARK_OPT_HARDBREAKS`: `<br />\n`
+- `CMARK_OPT_NOBREAKS`: single space
+- Default: `\n`
+
+#### Code (inline)
+```html
+<code>ESCAPED CONTENT</code>
+```
+
+#### HTML Inline
+Same as HTML block: verbatim with `CMARK_OPT_UNSAFE`, otherwise `<!-- raw HTML omitted -->`.
+
+#### Emphasis
+```
+ENTER: <em>
+EXIT: </em>
+```
+
+#### Strong
+```
+ENTER: <strong>
+EXIT: </strong>
+```
+
+#### Link
+```
+ENTER: <a href="ESCAPED_URL"[ title="ESCAPED_TITLE"]>
+EXIT: </a>
+```
+
+URL safety: If `CMARK_OPT_UNSAFE` is NOT set, the URL is checked against `_scan_dangerous_url()`. Dangerous URLs (`javascript:`, `vbscript:`, `file:`, certain `data:` schemes) produce an empty `href`.
+
+URL escaping uses `houdini_escape_href()` which percent-encodes special characters. Title escaping uses `escape_html()`.
+
+#### Image
+```
+ENTER: <img src="ESCAPED_URL" alt="
+ (enters plain text mode — state->plain = node)
+EXIT: "[ title="ESCAPED_TITLE"] />
+```
+
+During plain text mode (between enter and exit), only text content, code content, and HTML inline content are output (HTML-escaped), and breaks are rendered as spaces.
+
+#### Custom Inline
+On enter, outputs `on_enter` literally. On exit, outputs `on_exit`.
+
+## URL Safety
+
+Links and images check URL safety unless `CMARK_OPT_UNSAFE` is set:
+
+```c
+if (node->as.link.url && ((options & CMARK_OPT_UNSAFE) ||
+ !(_scan_dangerous_url(node->as.link.url)))) {
+ houdini_escape_href(html, node->as.link.url,
+ (bufsize_t)strlen((char *)node->as.link.url));
+}
+```
+
+The `_scan_dangerous_url()` scanner (from `scanners.c`) matches schemes: `javascript:`, `vbscript:`, `file:`, and `data:` (except for safe image MIME types: `image/png`, `image/gif`, `image/jpeg`, `image/webp`).
+
+## Differences from Framework Renderers
+
+The HTML renderer differs from the render-framework-based renderers in several ways:
+
+1. **No line wrapping**: HTML output has no configurable width or word-wrap logic.
+2. **No prefix management**: Block quotes and lists don't use prefix strings for indentation — they use HTML tags.
+3. **Direct buffer writes**: All output goes directly to a `cmark_strbuf`, with no escaping dispatch function.
+4. **No `width` parameter**: `cmark_render_html()` takes only `root` and `options`.
+
+## Cross-References
+
+- [html.c](../../cmark/src/html.c) — Full implementation
+- [render-framework.md](render-framework.md) — The alternative render architecture used by other renderers
+- [iterator-system.md](iterator-system.md) — How the AST is traversed
+- [scanner-system.md](scanner-system.md) — `_scan_dangerous_url()` for URL safety
+- [public-api.md](public-api.md) — `cmark_render_html()` API documentation
diff --git a/docs/handbook/cmark/inline-parsing.md b/docs/handbook/cmark/inline-parsing.md
new file mode 100644
index 0000000000..4485017305
--- /dev/null
+++ b/docs/handbook/cmark/inline-parsing.md
@@ -0,0 +1,317 @@
+# cmark — Inline Parsing
+
+## Overview
+
+Inline parsing is Phase 2 of cmark's pipeline. Implemented in `inlines.c`, it processes the text content of paragraph and heading nodes, recognizing emphasis (`*`, `_`), code spans (`` ` ``), links (`[text](url)`), images (`![alt](url)`), autolinks (`<url>`), raw HTML inline, hard line breaks, soft line breaks, and smart punctuation.
+
+The entry point is `cmark_parse_inlines()`, called from `process_inlines()` in `blocks.c` after all block structure has been finalized.
+
+## The `subject` Struct
+
+All inline parsing state is tracked in the `subject` struct:
+
+```c
+typedef struct {
+ cmark_mem *mem; // Memory allocator
+ cmark_chunk input; // The text being parsed
+ unsigned flags; // Skip flags for HTML constructs
+ int line; // Source line number
+ bufsize_t pos; // Current byte position in input
+ int block_offset; // Column offset of the containing block
+ int column_offset; // Adjustment for multi-line source position tracking
+ cmark_reference_map *refmap; // Link reference definitions
+ delimiter *last_delim; // Top of delimiter stack (linked list, newest first)
+ bracket *last_bracket; // Top of bracket stack (linked list, newest first)
+ bufsize_t backticks[MAXBACKTICKS + 1]; // Cached positions of backtick sequences
+ bool scanned_for_backticks; // Whether the full input has been scanned for backticks
+ bool no_link_openers; // Optimization: set when no link openers remain
+} subject;
+```
+
+`MAXBACKTICKS` is defined as 1000. The `backticks` array caches the positions of backtick sequences of each length, enabling O(1) lookup once the input has been fully scanned.
+
+### Skip Flags
+
+The `flags` field uses bit flags to track which HTML constructs have been confirmed absent:
+
+```c
+#define FLAG_SKIP_HTML_CDATA (1u << 0)
+#define FLAG_SKIP_HTML_DECLARATION (1u << 1)
+#define FLAG_SKIP_HTML_PI (1u << 2)
+#define FLAG_SKIP_HTML_COMMENT (1u << 3)
+```
+
+Once a scan for a particular HTML construct fails, the flag is set to avoid rescanning.
+
+## The Delimiter Stack
+
+Emphasis and smart punctuation use a delimiter stack. Each entry is:
+
+```c
+typedef struct delimiter {
+ struct delimiter *previous; // Link to older delimiter
+ struct delimiter *next; // Link to newer delimiter (towards top)
+ cmark_node *inl_text; // The text node created for this delimiter run
+ bufsize_t position; // Position in the input
+ bufsize_t length; // Number of delimiter characters remaining
+ unsigned char delim_char; // '*', '_', '\'', or '"'
+ bool can_open; // Whether this run can open emphasis
+ bool can_close; // Whether this run can close emphasis
+} delimiter;
+```
+
+The stack is a doubly-linked list with `last_delim` pointing to the newest entry.
+
+## The Bracket Stack
+
+Links and images use a separate bracket stack:
+
+```c
+typedef struct bracket {
+ struct bracket *previous; // Link to older bracket
+ cmark_node *inl_text; // The text node for '[' or '!['
+ bufsize_t position; // Position in the input
+ bool image; // Whether this is an image opener '!['
+ bool active; // Can still match (set to false when deactivated)
+ bool bracket_after; // Whether a '[' appeared after this bracket
+} bracket;
+```
+
+Brackets are deactivated (set `active = false`) when:
+- A matching `]` fails to produce a valid link (the opener is deactivated to prevent infinite loops)
+- An inner link is formed (outer brackets are deactivated per spec)
+
+## Emphasis Flanking Rules: `scan_delims()`
+
+```c
+static int scan_delims(subject *subj, unsigned char c, bool *can_open,
+ bool *can_close);
+```
+
+This function determines whether a run of `*`, `_`, `'`, or `"` characters can open and/or close emphasis, following the CommonMark spec's Unicode-aware flanking rules:
+
+1. The function looks at the character **before** the run and the character **after** the run.
+2. It uses `cmark_utf8proc_iterate()` to decode the surrounding characters as full Unicode code points.
+3. It classifies them using `cmark_utf8proc_is_space()` and `cmark_utf8proc_is_punctuation_or_symbol()`.
+
+The flanking rules:
+- **Left-flanking**: numdelims > 0, character after is not a space, AND (character after is not punctuation OR character before is a space or punctuation)
+- **Right-flanking**: numdelims > 0, character before is not a space, AND (character before is not punctuation OR character after is a space or punctuation)
+
+For `*`: `can_open = left_flanking`, `can_close = right_flanking`
+
+For `_`:
+```c
+*can_open = left_flanking &&
+ (!right_flanking || cmark_utf8proc_is_punctuation_or_symbol(before_char));
+*can_close = right_flanking &&
+ (!left_flanking || cmark_utf8proc_is_punctuation_or_symbol(after_char));
+```
+
+For `'` and `"` (smart punctuation):
+```c
+*can_open = left_flanking &&
+ (!right_flanking || before_char == '(' || before_char == '[') &&
+ before_char != ']' && before_char != ')';
+*can_close = right_flanking;
+```
+
+The function advances `subj->pos` past the delimiter run and returns the number of delimiter characters consumed. For quotes, only 1 delimiter is consumed regardless of how many appear.
+
+## Emphasis Resolution: `S_insert_emph()`
+
+```c
+static delimiter *S_insert_emph(subject *subj, delimiter *opener,
+ delimiter *closer);
+```
+
+When a closing delimiter is found that matches an opener on the stack, this function creates emphasis nodes:
+
+1. If the opener and closer have combined length >= 2 AND both have individual length >= 2, create a `CMARK_NODE_STRONG` node (consuming 2 characters from each).
+2. Otherwise, create a `CMARK_NODE_EMPH` node (consuming 1 character from each).
+3. All inline nodes between the opener and closer are moved to become children of the new emphasis node.
+4. Any delimiters between the opener and closer are removed from the stack.
+5. If the opener is exhausted (`length == 0`), it's removed from the stack.
+6. If the closer is exhausted, it's removed too; otherwise, processing continues.
+
+## Code Span Parsing: `handle_backticks()`
+
+```c
+static cmark_node *handle_backticks(subject *subj, int options);
+```
+
+When a backtick is encountered:
+
+1. `take_while(subj, isbacktick)` consumes the opening backtick run and records its length.
+2. `scan_to_closing_backticks()` searches forward for a matching backtick run of the same length.
+
+The scanning function uses the `subj->backticks[]` array to cache positions of backtick sequences. If `subj->scanned_for_backticks` is true and the cached position for the needed length is behind the current position, it immediately returns 0 (no match).
+
+If no closing backticks are found, the opening run is emitted as literal text. If found, the content between is extracted, normalized via `S_normalize_code()`:
+
+```c
+static void S_normalize_code(cmark_strbuf *s) {
+ // 1. Convert \r\n and \r to spaces
+ // 2. Convert \n to spaces
+ // 3. If content begins and ends with a space and contains non-space chars,
+ // strip one leading and one trailing space
+}
+```
+
+## Link Parsing
+
+When `]` is encountered after an opener on the bracket stack:
+
+### Inline Links: `[text](url "title")`
+
+The parser looks for `(` immediately after `]`, then:
+1. Skips optional whitespace
+2. Tries to parse a link destination (URL)
+3. Skips optional whitespace
+4. Optionally parses a link title (in single quotes, double quotes, or parentheses)
+5. Expects `)`
+
+### Reference Links: `[text][ref]` or `[text][]` or `[text]`
+
+If the inline link syntax doesn't match, the parser tries:
+1. `[text][ref]` — explicit reference
+2. `[text][]` — collapsed reference (label = text)
+3. `[text]` — shortcut reference (label = text)
+
+Reference lookup uses `cmark_reference_lookup()` against the parser's `refmap`.
+
+### URL Cleaning
+
+```c
+unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
+```
+
+Trims the URL, unescapes HTML entities, and handles angle-bracket-delimited URLs.
+
+### Autolinks
+
+```c
+static inline cmark_node *make_autolink(subject *subj, int start_column,
+ int end_column, cmark_chunk url,
+ int is_email);
+```
+
+Autolinks (`<http://example.com>` or `<user@example.com>`) are detected via the `scan_autolink_uri()` and `scan_autolink_email()` scanner functions. Email autolinks have `mailto:` prepended to the URL automatically:
+
+```c
+static unsigned char *cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
+ int is_email) {
+ cmark_strbuf buf = CMARK_BUF_INIT(mem);
+ cmark_chunk_trim(url);
+ if (is_email)
+ cmark_strbuf_puts(&buf, "mailto:");
+ houdini_unescape_html_f(&buf, url->data, url->len);
+ return cmark_strbuf_detach(&buf);
+}
+```
+
+## Smart Punctuation
+
+When `CMARK_OPT_SMART` is enabled, the inline parser transforms:
+
+```c
+static const char *EMDASH = "\xE2\x80\x94"; // —
+static const char *ENDASH = "\xE2\x80\x93"; // –
+static const char *ELLIPSES = "\xE2\x80\xA6"; // …
+static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C"; // "
+static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D"; // "
+static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98"; // '
+static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; // '
+```
+
+- `---` becomes em dash (—)
+- `--` becomes en dash (–)
+- `...` becomes ellipsis (…)
+- `'` and `"` are converted to curly quotes using the delimiter stack (open/close logic)
+
+## Hard and Soft Line Breaks
+
+- **Hard line break**: Two or more spaces before a line ending, or a backslash before a line ending. Creates a `CMARK_NODE_LINEBREAK` node.
+- **Soft line break**: A line ending not preceded by spaces or backslash. Creates a `CMARK_NODE_SOFTBREAK` node.
+
+## Special Character Dispatch
+
+```c
+static bufsize_t subject_find_special_char(subject *subj, int options);
+```
+
+This function scans forward from `subj->pos` looking for the next special character that needs inline processing. Special characters include:
+- Line endings (`\r`, `\n`)
+- Backtick (`` ` ``)
+- Backslash (`\`)
+- Ampersand (`&`)
+- Less-than (`<`)
+- Open bracket (`[`)
+- Close bracket (`]`)
+- Exclamation mark (`!`)
+- Emphasis characters (`*`, `_`)
+
+Any text between special characters is collected as a `CMARK_NODE_TEXT` node.
+
+## Source Position Tracking
+
+```c
+static void adjust_subj_node_newlines(subject *subj, cmark_node *node,
+ int matchlen, int extra, int options);
+```
+
+When `CMARK_OPT_SOURCEPOS` is enabled, this function adjusts source positions for multi-line inline constructs. It counts newlines in the just-matched span and updates:
+- `subj->line` — incremented by the number of newlines
+- `node->end_line` — adjusted for multi-line spans
+- `node->end_column` — set to characters after the last newline
+- `subj->column_offset` — adjusted for correct subsequent position calculations
+
+## Inline Node Factory Functions
+
+The inline parser uses efficient factory functions:
+
+```c
+// Macros for simple nodes
+#define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
+#define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
+#define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
+#define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG)
+```
+
+```c
+// Fast child appending (bypasses S_can_contain validation)
+static void append_child(cmark_node *node, cmark_node *child) {
+ cmark_node *old_last_child = node->last_child;
+ child->next = NULL;
+ child->prev = old_last_child;
+ child->parent = node;
+ node->last_child = child;
+ if (old_last_child) {
+ old_last_child->next = child;
+ } else {
+ node->first_child = child;
+ }
+}
+```
+
+This `append_child()` is a simplified version of the public `cmark_node_append_child()`, skipping containership validation since the inline parser always produces valid structures.
+
+## The Main Parse Loop
+
+```c
+void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
+ cmark_reference_map *refmap, int options);
+```
+
+This function initializes a `subject` from the parent node's `data` field, then repeatedly calls `parse_inline()` until the input is exhausted. Each call to `parse_inline()` finds the next special character, emits any preceding text as a `CMARK_NODE_TEXT`, and dispatches to the appropriate handler.
+
+After all characters are processed, the delimiter stack is processed to resolve any remaining emphasis, and then cleaned up.
+
+## Cross-References
+
+- [inlines.c](../../cmark/src/inlines.c) — Full implementation
+- [inlines.h](../../cmark/src/inlines.h) — Internal API declarations
+- [block-parsing.md](block-parsing.md) — Phase 1 that produces the input for inline parsing
+- [reference-system.md](reference-system.md) — How link references are stored and looked up
+- [scanner-system.md](scanner-system.md) — Scanner functions for HTML tags, autolinks, etc.
+- [utf8-handling.md](utf8-handling.md) — Unicode character classification for flanking rules
diff --git a/docs/handbook/cmark/iterator-system.md b/docs/handbook/cmark/iterator-system.md
new file mode 100644
index 0000000000..3cdcfda66e
--- /dev/null
+++ b/docs/handbook/cmark/iterator-system.md
@@ -0,0 +1,267 @@
+# cmark — Iterator System
+
+## Overview
+
+The iterator system (`iterator.c`, `iterator.h`) provides depth-first traversal of the AST using an event-based model. Each node is visited twice: once on `CMARK_EVENT_ENTER` (before children) and once on `CMARK_EVENT_EXIT` (after children). Leaf nodes receive both events in immediate succession.
+
+All renderers (HTML, XML, LaTeX, man, CommonMark) use the iterator as their traversal mechanism.
+
+## Public API
+
+```c
+cmark_iter *cmark_iter_new(cmark_node *root);
+void cmark_iter_free(cmark_iter *iter);
+cmark_event_type cmark_iter_next(cmark_iter *iter);
+cmark_node *cmark_iter_get_node(cmark_iter *iter);
+cmark_event_type cmark_iter_get_event_type(cmark_iter *iter);
+cmark_node *cmark_iter_get_root(cmark_iter *iter);
+void cmark_iter_reset(cmark_iter *iter, cmark_node *current, cmark_event_type event_type);
+```
+
+## Iterator State
+
+```c
+struct cmark_iter {
+ cmark_mem *mem;
+ cmark_node *root;
+ cmark_node *cur;
+ cmark_event_type ev_type;
+};
+```
+
+The iterator stores:
+- `root` — The subtree root (traversal boundary)
+- `cur` — Current node
+- `ev_type` — Current event (`CMARK_EVENT_ENTER`, `CMARK_EVENT_EXIT`, `CMARK_EVENT_DONE`, or `CMARK_EVENT_NONE`)
+
+## Event Types
+
+```c
+typedef enum {
+ CMARK_EVENT_NONE, // Initial state
+ CMARK_EVENT_DONE, // Traversal complete (exited root)
+ CMARK_EVENT_ENTER, // Entering a node (pre-children)
+ CMARK_EVENT_EXIT, // Exiting a node (post-children)
+} cmark_event_type;
+```
+
+## Leaf Node Detection
+
+```c
+static const int S_leaf_mask =
+ (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) |
+ (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) |
+ (1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) |
+ (1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE);
+
+static bool S_is_leaf(cmark_node *node) {
+ return ((1 << node->type) & S_leaf_mask) != 0;
+}
+```
+
+Leaf nodes are determined by a bitmask — not by checking whether `first_child` is NULL. This means an emphasis node with no children is still treated as a container (it receives separate enter and exit events).
+
+The leaf node types are:
+- **Block leaves**: `HTML_BLOCK`, `THEMATIC_BREAK`, `CODE_BLOCK`
+- **Inline leaves**: `TEXT`, `SOFTBREAK`, `LINEBREAK`, `CODE`, `HTML_INLINE`
+
+## Traversal Algorithm
+
+`cmark_iter_next()` implements the state machine:
+
+```c
+cmark_event_type cmark_iter_next(cmark_iter *iter) {
+ cmark_event_type ev_type = iter->ev_type;
+ cmark_node *cur = iter->cur;
+
+ if (ev_type == CMARK_EVENT_DONE) {
+ return CMARK_EVENT_DONE;
+ }
+
+ // For initial state, start with ENTER on root
+ if (ev_type == CMARK_EVENT_NONE) {
+ iter->ev_type = CMARK_EVENT_ENTER;
+ return iter->ev_type;
+ }
+
+ if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(cur)) {
+ // Container node being entered — descend to first child if it exists
+ if (cur->first_child) {
+ iter->ev_type = CMARK_EVENT_ENTER;
+ iter->cur = cur->first_child;
+ } else {
+ // Empty container — immediately exit
+ iter->ev_type = CMARK_EVENT_EXIT;
+ }
+ } else if (cur == iter->root) {
+ // Exiting root (or leaf at root) — done
+ iter->ev_type = CMARK_EVENT_DONE;
+ iter->cur = NULL;
+ } else if (cur->next) {
+ // Move to next sibling
+ iter->ev_type = CMARK_EVENT_ENTER;
+ iter->cur = cur->next;
+ } else if (cur->parent) {
+ // No more siblings — exit parent
+ iter->ev_type = CMARK_EVENT_EXIT;
+ iter->cur = cur->parent;
+ } else {
+ // Orphan node — done
+ assert(false);
+ iter->ev_type = CMARK_EVENT_DONE;
+ iter->cur = NULL;
+ }
+
+ return iter->ev_type;
+}
+```
+
+### State Transition Summary
+
+| Current State | Condition | Next State |
+|--------------|-----------|------------|
+| `NONE` | (initial) | `ENTER(root)` |
+| `ENTER(container)` | has children | `ENTER(first_child)` |
+| `ENTER(container)` | no children | `EXIT(container)` |
+| `ENTER(leaf)` or `EXIT(node)` | node == root | `DONE` |
+| `ENTER(leaf)` or `EXIT(node)` | has next sibling | `ENTER(next)` |
+| `ENTER(leaf)` or `EXIT(node)` | has parent | `EXIT(parent)` |
+| `DONE` | (terminal) | `DONE` |
+
+### Traversal Order Example
+
+For a document with a paragraph containing "Hello *world*":
+
+```
+Document
+└── Paragraph
+ ├── Text("Hello ")
+ ├── Emph
+ │ └── Text("world")
+ └── (end)
+```
+
+Event sequence:
+1. `ENTER(Document)`
+2. `ENTER(Paragraph)`
+3. `ENTER(Text "Hello ")` — leaf, immediate transition
+4. `ENTER(Emph)`
+5. `ENTER(Text "world")` — leaf, immediate transition
+6. `EXIT(Emph)`
+7. `EXIT(Paragraph)`
+8. `EXIT(Document)`
+9. `DONE`
+
+## Iterator Reset
+
+```c
+void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
+ cmark_event_type event_type) {
+ iter->cur = current;
+ iter->ev_type = event_type;
+}
+```
+
+Allows repositioning the iterator to any node and event type. This is used by renderers to skip subtrees — e.g., when the HTML renderer processes an image node, it may skip children after extracting alt text.
+
+## Text Node Consolidation
+
+```c
+void cmark_consolidate_text_nodes(cmark_node *root) {
+ if (root == NULL) return;
+ cmark_iter *iter = cmark_iter_new(root);
+ cmark_strbuf buf = CMARK_BUF_INIT(iter->mem);
+ cmark_event_type ev_type;
+ cmark_node *cur, *tmp, *next;
+
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT &&
+ cur->next && cur->next->type == CMARK_NODE_TEXT) {
+ // Merge consecutive text nodes
+ cmark_strbuf_clear(&buf);
+ cmark_strbuf_put(&buf, cur->data, cur->len);
+ tmp = cur->next;
+ while (tmp && tmp->type == CMARK_NODE_TEXT) {
+ cmark_iter_reset(iter, tmp, CMARK_EVENT_ENTER);
+ cmark_strbuf_put(&buf, tmp->data, tmp->len);
+ cur->end_column = tmp->end_column;
+ next = tmp->next;
+ cmark_node_free(tmp);
+ tmp = next;
+ }
+ // Replace cur's data with merged content
+ cmark_chunk_free(iter->mem, &cur->as.literal);
+ cmark_strbuf_trim(&buf);
+ // ... set cur->data and cur->len
+ }
+ }
+ cmark_strbuf_free(&buf);
+ cmark_iter_free(iter);
+}
+```
+
+This function merges adjacent text nodes into a single text node. Adjacent text nodes can arise from inline parsing (e.g., when backslash escapes split text). The function:
+
+1. Finds consecutive text node runs
+2. Concatenates their content into a buffer
+3. Updates the first node's content and end position
+4. Frees the subsequent nodes
+5. Uses `cmark_iter_reset()` to skip freed nodes
+
+## Usage Patterns
+
+### Standard Rendering Loop
+
+```c
+cmark_iter *iter = cmark_iter_new(root);
+while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ S_render_node(cur, ev_type, &state, options);
+}
+cmark_iter_free(iter);
+```
+
+### Skipping Children
+
+To skip rendering of a node's children (e.g., for image alt text in HTML):
+```c
+if (ev_type == CMARK_EVENT_ENTER) {
+ cmark_iter_reset(iter, node, CMARK_EVENT_EXIT);
+}
+```
+
+This jumps directly to the exit event, bypassing all children.
+
+### Safe Node Removal
+
+The iterator handles node removal between calls. Since `cmark_iter_next()` always follows `next` and `parent` pointers from the current position, removing the current node is safe as long as:
+1. The node's `next` and `parent` pointers remain valid
+2. The iterator is reset to skip the removed node's children
+
+## Thread Safety
+
+Iterators are NOT thread-safe. A single AST must not be iterated concurrently without external synchronization. However, since iterators only read the AST (never modify it), multiple read-only iterators on the same AST are safe if no modifications occur.
+
+## Memory
+
+The iterator allocates a `cmark_iter` struct using the root node's memory allocator:
+```c
+cmark_iter *cmark_iter_new(cmark_node *root) {
+ cmark_mem *mem = root->mem;
+ cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter));
+ iter->mem = mem;
+ iter->root = root;
+ iter->cur = root;
+ iter->ev_type = CMARK_EVENT_NONE;
+ return iter;
+}
+```
+
+## Cross-References
+
+- [iterator.c](../../cmark/src/iterator.c) — Iterator implementation
+- [iterator.h](../../cmark/src/iterator.h) — Iterator struct definition
+- [ast-node-system.md](ast-node-system.md) — The nodes being traversed
+- [html-renderer.md](html-renderer.md) — Example of iterator-driven rendering
+- [render-framework.md](render-framework.md) — Framework that wraps iterator use
diff --git a/docs/handbook/cmark/latex-renderer.md b/docs/handbook/cmark/latex-renderer.md
new file mode 100644
index 0000000000..d7a492d580
--- /dev/null
+++ b/docs/handbook/cmark/latex-renderer.md
@@ -0,0 +1,320 @@
+# cmark — LaTeX Renderer
+
+## Overview
+
+The LaTeX renderer (`latex.c`) converts a `cmark_node` AST into LaTeX source, suitable for compilation with `pdflatex`, `xelatex`, or `lualatex`. It uses the generic render framework from `render.c`, operating through a per-character output callback (`outc`) and a per-node render callback (`S_render_node`).
+
+## Entry Point
+
+```c
+char *cmark_render_latex(cmark_node *root, int options, int width);
+```
+
+- `root` — AST root node
+- `options` — Option flags (`CMARK_OPT_SOURCEPOS`, `CMARK_OPT_HARDBREAKS`, `CMARK_OPT_NOBREAKS`, `CMARK_OPT_UNSAFE`)
+- `width` — Target line width for hard-wrapping; 0 disables wrapping
+
+## Character Escaping (`outc`)
+
+The `outc` function handles per-character output decisions. It is the most complex part of the LaTeX renderer, with different behavior for three escaping modes:
+
+```c
+static void outc(cmark_renderer *renderer, cmark_escaping escape,
+ int32_t c, unsigned char nextc);
+```
+
+### LITERAL Mode
+Pass-through: all characters are output unchanged.
+
+### NORMAL Mode
+Extensive special-character handling:
+
+| Character | LaTeX Output | Purpose |
+|-----------|-------------|---------|
+| `$` | `\$` | Math mode delimiter |
+| `%` | `\%` | Comment character |
+| `&` | `\&` | Table column separator |
+| `_` | `\_` | Subscript operator |
+| `#` | `\#` | Parameter reference |
+| `^` | `\^{}` | Superscript operator |
+| `{` | `\{` | Group open |
+| `}` | `\}` | Group close |
+| `~` | `\textasciitilde{}` | Non-breaking space |
+| `[` | `{[}` | Optional argument bracket |
+| `]` | `{]}` | Optional argument bracket |
+| `\` | `\textbackslash{}` | Escape character |
+| `|` | `\textbar{}` | Pipe |
+| `'` | `\textquotesingle{}` | Straight single quote |
+| `"` | `\textquotedbl{}` | Straight double quote |
+| `` ` `` | `\textasciigrave{}` | Backtick |
+| `\xA0` (NBSP) | `~` | LaTeX non-breaking space |
+| `\x2014` (—) | `---` | Em dash |
+| `\x2013` (–) | `--` | En dash |
+| `\x2018` (') | `` ` `` | Left single quote |
+| `\x2019` (') | `'` | Right single quote |
+| `\x201C` (") | ` `` ` | Left double quote |
+| `\x201D` (") | `''` | Right double quote |
+
+### URL Mode
+Only these characters are escaped:
+- `$` → `\$`
+- `%` → `\%`
+- `&` → `\&`
+- `_` → `\_`
+- `#` → `\#`
+- `{` → `\{`
+- `}` → `\}`
+
+All other characters pass through unchanged.
+
+## Link Type Classification
+
+The renderer classifies links into five categories:
+
+```c
+typedef enum {
+ NO_LINK,
+ URL_AUTOLINK,
+ EMAIL_AUTOLINK,
+ NORMAL_LINK,
+ INTERNAL_LINK,
+} link_type;
+```
+
+### `get_link_type()`
+
+```c
+static link_type get_link_type(cmark_node *node) {
+ // 1. "mailto:" links where text matches url
+ // 2. "http[s]:" links where text matches url (with or without protocol)
+ // 3. Links starting with '#' → INTERNAL_LINK
+ // 4. Everything else → NORMAL_LINK
+}
+```
+
+Detection logic:
+1. **URL_AUTOLINK**: The `url` starts with `http://` or `https://`, the link has exactly one text child, and that child's content matches the URL (or matches the URL minus the protocol prefix).
+2. **EMAIL_AUTOLINK**: The `url` starts with `mailto:`, the link has exactly one text child, and that child's content matches the URL after `mailto:`.
+3. **INTERNAL_LINK**: The `url` starts with `#`.
+4. **NORMAL_LINK**: Everything else.
+
+## Enumeration Level
+
+For nested ordered lists, the renderer selects the appropriate LaTeX counter style:
+
+```c
+static int S_get_enumlevel(cmark_node *node) {
+ int enumlevel = 0;
+ cmark_node *tmp = node;
+ while (tmp) {
+ if (tmp->type == CMARK_NODE_LIST &&
+ cmark_node_get_list_type(tmp) == CMARK_ORDERED_LIST) {
+ enumlevel++;
+ }
+ tmp = tmp->parent;
+ }
+ return enumlevel;
+}
+```
+
+This walks up the tree, counting ordered list ancestors. LaTeX ordered lists cycle through: `enumi` (arabic), `enumii` (alpha), `enumiii` (roman), `enumiv` (Alpha).
+
+## Node Rendering (`S_render_node`)
+
+### Block Nodes
+
+#### Document
+No output.
+
+#### Block Quote
+```
+ENTER: \begin{quote}\n
+EXIT: \end{quote}\n
+```
+
+#### List
+```
+ENTER (bullet): \begin{itemize}\n
+ENTER (ordered): \begin{enumerate}\n
+ \def\labelenumI{COUNTER}\n (if start != 1)
+ \setcounter{enumI}{START-1}\n
+EXIT: \end{itemize}\n or \end{enumerate}\n
+```
+
+The counter is formatted based on enumeration level:
+- Level 1: `\arabic{enumi}.`
+- Level 2: `\alph{enumii}.` (surrounded by `(`)
+- Level 3: `\roman{enumiii}.`
+- Level 4: `\Alph{enumiv}.`
+
+Period delimiters use `.`, parenthesis delimiters use `)`.
+
+#### Item
+```
+ENTER: \item{} (empty braces prevent ligatures with following content)
+EXIT: \n
+```
+
+#### Heading
+```
+ENTER: \section{ or \subsection{ or \subsubsection{ or \paragraph{ or \subparagraph{
+EXIT: }\n
+```
+
+Mapping: level 1 → `\section`, level 2 → `\subsection`, level 3 → `\subsubsection`, level 4 → `\paragraph`, level 5 → `\subparagraph`.
+
+#### Code Block
+```latex
+\begin{verbatim}
+LITERAL CONTENT
+\end{verbatim}
+```
+
+The content is output in `LITERAL` escape mode (no character escaping). Info strings are ignored.
+
+#### HTML Block
+```
+ENTER: % raw HTML omitted\n (as a LaTeX comment)
+```
+
+Raw HTML is always omitted in LaTeX output, regardless of `CMARK_OPT_UNSAFE`.
+
+#### Thematic Break
+```
+\begin{center}\rule{0.5\linewidth}{\linethickness}\end{center}\n
+```
+
+#### Paragraph
+Same tight-list check as the HTML renderer:
+```c
+parent = cmark_node_parent(node);
+grandparent = cmark_node_parent(parent);
+tight = (grandparent && grandparent->type == CMARK_NODE_LIST) ?
+ grandparent->as.list.tight : false;
+```
+- Normal: newline before and after
+- Tight: no leading/trailing blank lines
+
+### Inline Nodes
+
+#### Text
+Output with NORMAL escaping.
+
+#### Soft Break
+Depends on options:
+- `CMARK_OPT_HARDBREAKS`: `\\\\\n`
+- `CMARK_OPT_NOBREAKS`: space
+- Default: newline
+
+#### Line Break
+```
+\\\\\n
+```
+
+#### Code (inline)
+```
+\texttt{ESCAPED CONTENT}
+```
+
+Special handling: Code content is output character-by-character with inline-code escaping. Special characters (`\`, `{`, `}`, `$`, `%`, `&`, `_`, `#`, `^`, `~`) are escaped.
+
+#### Emphasis
+```
+ENTER: \emph{
+EXIT: }
+```
+
+#### Strong
+```
+ENTER: \textbf{
+EXIT: }
+```
+
+#### Link
+Rendering depends on link type:
+
+**NORMAL_LINK:**
+```
+ENTER: \href{URL}{
+EXIT: }
+```
+
+**URL_AUTOLINK:**
+```
+ENTER: \url{URL}
+(children are skipped — no EXIT rendering needed)
+```
+
+**EMAIL_AUTOLINK:**
+```
+ENTER: \href{URL}{\nolinkurl{
+EXIT: }}
+```
+
+**INTERNAL_LINK:**
+```
+ENTER: (nothing — rendered as plain text)
+EXIT: (~\ref{LABEL})
+```
+
+Where `LABEL` is the URL with the leading `#` stripped.
+
+**NO_LINK:**
+No output.
+
+#### Image
+```
+ENTER: \protect\includegraphics{URL}
+```
+
+Image children (alt text) are skipped. If `CMARK_OPT_UNSAFE` is not set and the URL matches `_scan_dangerous_url()`, the URL is omitted.
+
+#### HTML Inline
+```
+% raw HTML omitted
+```
+
+Always omitted, regardless of `CMARK_OPT_UNSAFE`.
+
+## Source Position Comments
+
+When `CMARK_OPT_SOURCEPOS` is set, the renderer adds LaTeX comments before block elements:
+
+```c
+snprintf(buffer, BUFFER_SIZE, "%% %d:%d-%d:%d\n",
+ cmark_node_get_start_line(node), cmark_node_get_start_column(node),
+ cmark_node_get_end_line(node), cmark_node_get_end_column(node));
+```
+
+## Example Output
+
+Markdown input:
+```markdown
+# Hello World
+
+A paragraph with *emphasis* and **bold**.
+
+- Item 1
+- Item 2
+```
+
+LaTeX output:
+```latex
+\section{Hello World}
+
+A paragraph with \emph{emphasis} and \textbf{bold}.
+
+\begin{itemize}
+\item{}Item 1
+
+\item{}Item 2
+
+\end{itemize}
+```
+
+## Cross-References
+
+- [latex.c](../../cmark/src/latex.c) — Full implementation
+- [render-framework.md](render-framework.md) — Generic render framework (`cmark_render()`, `cmark_renderer`)
+- [public-api.md](public-api.md) — `cmark_render_latex()` API docs
+- [html-renderer.md](html-renderer.md) — Contrast with direct buffer renderer
diff --git a/docs/handbook/cmark/man-renderer.md b/docs/handbook/cmark/man-renderer.md
new file mode 100644
index 0000000000..cae1c6dbf3
--- /dev/null
+++ b/docs/handbook/cmark/man-renderer.md
@@ -0,0 +1,272 @@
+# cmark — Man Page Renderer
+
+## Overview
+
+The man page renderer (`man.c`) converts a `cmark_node` AST into roff/troff format suitable for the Unix `man` page system. It uses the generic render framework from `render.c`.
+
+## Entry Point
+
+```c
+char *cmark_render_man(cmark_node *root, int options, int width);
+```
+
+- `root` — AST root node
+- `options` — Option flags (`CMARK_OPT_HARDBREAKS`, `CMARK_OPT_NOBREAKS`, `CMARK_OPT_SOURCEPOS`, `CMARK_OPT_UNSAFE`)
+- `width` — Target line width for wrapping; 0 disables wrapping
+
+## Character Escaping (`S_outc`)
+
+The man page escaping is simpler than LaTeX. The `S_outc` function handles:
+
+```c
+static void S_outc(cmark_renderer *renderer, cmark_escaping escape,
+ int32_t c, unsigned char nextc) {
+ if (escape == LITERAL) {
+ cmark_render_code_point(renderer, c);
+ return;
+ }
+ switch (c) {
+ case 46: // '.' — if at line start
+ cmark_render_ascii(renderer, "\\&.");
+ break;
+ case 39: // '\'' — if at line start
+ cmark_render_ascii(renderer, "\\&'");
+ break;
+ case 45: // '-'
+ cmark_render_ascii(renderer, "\\-");
+ break;
+ case 92: // '\\'
+ cmark_render_ascii(renderer, "\\e");
+ break;
+ case 8216: // ' (left single quote)
+ cmark_render_ascii(renderer, "\\[oq]");
+ break;
+ case 8217: // ' (right single quote)
+ cmark_render_ascii(renderer, "\\[cq]");
+ break;
+ case 8220: // " (left double quote)
+ cmark_render_ascii(renderer, "\\[lq]");
+ break;
+ case 8221: // " (right double quote)
+ cmark_render_ascii(renderer, "\\[rq]");
+ break;
+ case 8212: // — (em dash)
+ cmark_render_ascii(renderer, "\\[em]");
+ break;
+ case 8211: // – (en dash)
+ cmark_render_ascii(renderer, "\\[en]");
+ break;
+ default:
+ cmark_render_code_point(renderer, c);
+ }
+}
+```
+
+### Line-Start Protection
+
+The `.` and `'` characters are only escaped when they appear at the beginning of a line, since roff interprets them as macro/command prefixes. The check:
+
+```c
+case 46:
+case 39:
+ if (renderer->begin_line) {
+ cmark_render_ascii(renderer, "\\&."); // or "\\&'"
+ }
+```
+
+The `\\&` prefix is a zero-width space that prevents roff from treating the character as a command prefix.
+
+## Block Number Tracking
+
+The renderer tracks nesting with a `block_number` variable for generating matching `.RS`/`.RE` (indent start/end) pairs:
+
+This variable is incremented when entering list items and block quotes, and decremented on exit. It controls the indentation level of nested content.
+
+## Node Rendering (`S_render_node`)
+
+### Block Nodes
+
+#### Document
+No output on enter or exit.
+
+#### Block Quote
+```
+ENTER: .RS\n
+EXIT: .RE\n
+```
+
+`.RS` pushes relative indentation, `.RE` pops it.
+
+#### List
+On exit, adds a blank output line (`cr()`) to separate from following content.
+
+#### Item
+```
+ENTER (bullet): .IP \(bu 2\n
+ENTER (ordered): .IP "N." 4\n (where N = list start + sibling count)
+EXIT: (cr if not last item)
+```
+
+The ordered item number is calculated by counting previous siblings:
+```c
+int list_number = cmark_node_get_list_start(node->parent);
+tmp = node;
+while (tmp->prev) {
+ tmp = tmp->prev;
+ list_number++;
+}
+```
+
+`.IP` sets an indented paragraph with a tag (bullet or number) and indentation width.
+
+#### Heading
+```
+ENTER (level 1): .SH\n (section heading)
+ENTER (level 2): .SS\n (subsection heading)
+ENTER (other): .PP\n\fB (paragraph, start bold)
+EXIT (other): \fR\n (end bold)
+```
+
+Level 1 and 2 headings use dedicated roff macros. Level 3+ are rendered as bold paragraphs.
+
+#### Code Block
+```
+.IP\n.nf\n\\f[C]\n
+LITERAL CONTENT
+\\f[]\n.fi\n
+```
+
+- `.nf` — no-fill (preformatted)
+- `\\f[C]` — switch to constant-width font
+- `\\f[]` — restore previous font
+- `.fi` — return to fill mode
+
+#### HTML Block
+```
+(nothing)
+```
+Raw HTML blocks are silently omitted in man output.
+
+#### Thematic Break
+There is no native roff thematic break. The renderer outputs nothing for this node type.
+
+#### Paragraph
+Same tight-list check as other renderers:
+```c
+tight = (grandparent && grandparent->type == CMARK_NODE_LIST) ?
+ grandparent->as.list.tight : false;
+```
+- Normal: `.PP\n` before content
+- Tight: no `.PP` prefix
+
+### Inline Nodes
+
+#### Text
+Output with NORMAL escaping.
+
+#### Soft Break
+Depends on options:
+- `CMARK_OPT_HARDBREAKS`: `.PD 0\n.P\n.PD\n`
+- `CMARK_OPT_NOBREAKS`: space
+- Default: newline
+
+The hardbreak sequence `.PD 0\n.P\n.PD\n` is a man page idiom that:
+1. Sets paragraph distance to 0 (`.PD 0`)
+2. Starts a new paragraph (`.P`)
+3. Restores default paragraph distance (`.PD`)
+
+#### Line Break
+Same as hardbreak:
+```
+.PD 0\n.P\n.PD\n
+```
+
+#### Code (inline)
+```
+\f[C]ESCAPED CONTENT\f[]
+```
+
+Font switch to `C` (constant-width), then restore.
+
+#### Emphasis
+```
+ENTER: \f[I] (italic font)
+EXIT: \f[] (restore font)
+```
+
+#### Strong
+```
+ENTER: \f[B] (bold font)
+EXIT: \f[] (restore font)
+```
+
+#### Link
+Links render their text content normally. On exit:
+```
+(ESCAPED_URL)
+```
+
+If the link URL is the same as the text content (autolink), the URL suffix is suppressed.
+
+#### Image
+```
+ENTER: [IMAGE:
+EXIT: ]
+```
+
+Images have no roff equivalent, so they're rendered as bracketed alt text.
+
+#### HTML Inline
+Silently omitted.
+
+## Source Position
+
+When `CMARK_OPT_SOURCEPOS` is set, man output includes roff comments:
+```
+.\" sourcepos: LINE:COL-LINE:COL
+```
+
+(The `.\"` prefix is the roff comment syntax.)
+
+## Example Output
+
+Markdown input:
+```markdown
+# My Tool
+
+A description with *emphasis*.
+
+## Options
+
+- `--flag` — Does something
+- `--other` — Does another thing
+```
+
+Man output:
+```roff
+.SH
+My Tool
+.PP
+A description with \f[I]emphasis\f[].
+.SS
+Options
+.IP \(bu 2
+\f[C]\-\-flag\f[] \[em] Does something
+.IP \(bu 2
+\f[C]\-\-other\f[] \[em] Does another thing
+```
+
+## Limitations
+
+1. **No heading levels > 2**: Levels 3+ are rendered as bold paragraphs, losing semantic heading structure.
+2. **No images**: Only alt text is shown in brackets.
+3. **No raw HTML**: Silently dropped.
+4. **No thematic breaks**: No visual separator is output.
+5. **No tables**: Not part of core CommonMark, but if extensions add them, the man renderer has no support.
+
+## Cross-References
+
+- [man.c](../../cmark/src/man.c) — Full implementation
+- [render-framework.md](render-framework.md) — Generic render framework
+- [public-api.md](public-api.md) — `cmark_render_man()` API docs
+- [latex-renderer.md](latex-renderer.md) — Another framework-based renderer
diff --git a/docs/handbook/cmark/memory-management.md b/docs/handbook/cmark/memory-management.md
new file mode 100644
index 0000000000..dbc0046cb9
--- /dev/null
+++ b/docs/handbook/cmark/memory-management.md
@@ -0,0 +1,351 @@
+# cmark — Memory Management
+
+## Overview
+
+cmark's memory management is built around three concepts:
+1. **Pluggable allocator** (`cmark_mem`) — a function-pointer table for calloc/realloc/free
+2. **Owning buffer** (`cmark_strbuf`) — a growable byte buffer that owns its memory
+3. **Non-owning slice** (`cmark_chunk`) — a view into either a `cmark_strbuf` or external memory
+
+## Pluggable Allocator
+
+### `cmark_mem` Structure
+
+```c
+typedef struct cmark_mem {
+ void *(*calloc)(size_t, size_t);
+ void *(*realloc)(void *, size_t);
+ void (*free)(void *);
+} cmark_mem;
+```
+
+All allocation throughout cmark respects this interface. Every node, buffer, parser, and iterator receives a `cmark_mem *` and uses it for all allocations.
+
+### Default Allocator
+
+```c
+static void *xcalloc(size_t nmemb, size_t size) {
+ void *ptr = calloc(nmemb, size);
+ if (!ptr) abort();
+ return ptr;
+}
+
+static void *xrealloc(void *ptr, size_t size) {
+ void *new_ptr = realloc(ptr, size);
+ if (!new_ptr) abort();
+ return new_ptr;
+}
+
+cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
+```
+
+The default allocator wraps standard `calloc`/`realloc`/`free`, adding `abort()` on allocation failure. This means cmark never returns NULL from allocations — it terminates on out-of-memory.
+
+### Getting the Default Allocator
+
+```c
+cmark_mem *cmark_get_default_mem_allocator(void) {
+ return &DEFAULT_MEM_ALLOCATOR;
+}
+```
+
+### Custom Allocator Usage
+
+Users can provide custom allocators (arena allocators, debug allocators, etc.) via:
+
+```c
+cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
+cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem);
+```
+
+The allocator propagates: nodes created by the parser inherit the parser's allocator. Iterators use the root node's allocator.
+
+## Growable Buffer (`cmark_strbuf`)
+
+### Structure
+
+```c
+struct cmark_strbuf {
+ cmark_mem *mem;
+ unsigned char *ptr;
+ bufsize_t asize; // allocated size
+ bufsize_t size; // used size (excluding NUL terminator)
+};
+```
+
+### Initialization
+
+```c
+#define CMARK_BUF_INIT(mem) { mem, cmark_strbuf__initbuf, 0, 0 }
+```
+
+`cmark_strbuf__initbuf` is a static empty buffer that avoids allocating for empty strings:
+```c
+unsigned char cmark_strbuf__initbuf[1] = {0};
+```
+
+This means: uninitialized/empty buffers point to a shared static empty string rather than NULL. This eliminates NULL checks throughout the code.
+
+### Growth Strategy
+
+```c
+void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
+ // Minimum allocation of 8 bytes
+ bufsize_t new_size = 8;
+ // Double until >= target (or use 2x current if growing existing)
+ if (buf->asize) {
+ new_size = buf->asize;
+ }
+ while (new_size < target_size) {
+ new_size *= 2;
+ }
+ // Allocate
+ if (buf->ptr == cmark_strbuf__initbuf) {
+ buf->ptr = (unsigned char *)buf->mem->calloc(new_size, 1);
+ } else {
+ buf->ptr = (unsigned char *)buf->mem->realloc(buf->ptr, new_size);
+ }
+ buf->asize = new_size;
+}
+```
+
+The growth strategy doubles the capacity each time, ensuring amortized O(1) appends. Minimum capacity is 8 bytes.
+
+When the buffer transitions from the shared static init to a real allocation, `calloc` is used (zero-initialized). Subsequent growths use `realloc`.
+
+### Key Operations
+
+```c
+// Appending
+void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len);
+void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
+void cmark_strbuf_putc(cmark_strbuf *buf, int c);
+
+// Printf-style
+void cmark_strbuf_printf(cmark_strbuf *buf, const char *fmt, ...);
+void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *fmt, va_list ap);
+
+// Manipulation
+void cmark_strbuf_clear(cmark_strbuf *buf); // Reset size to 0, keep allocation
+void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len);
+void cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
+void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf);
+void cmark_strbuf_swap(cmark_strbuf *a, cmark_strbuf *b);
+
+// Whitespace
+void cmark_strbuf_trim(cmark_strbuf *buf); // Trim leading and trailing whitespace
+void cmark_strbuf_normalize_whitespace(cmark_strbuf *buf); // Collapse runs to single space
+void cmark_strbuf_unescape(cmark_strbuf *buf); // Process backslash escapes
+
+// Lifecycle
+unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); // Return ptr, reset buf to init
+void cmark_strbuf_free(cmark_strbuf *buf); // Free memory, reset to init
+```
+
+### `cmark_strbuf_detach()`
+
+```c
+unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) {
+ unsigned char *data = buf->ptr;
+ if (buf->asize == 0) {
+ // Never allocated — return a new empty string
+ data = (unsigned char *)buf->mem->calloc(1, 1);
+ }
+ // Reset buffer to initial state
+ buf->ptr = cmark_strbuf__initbuf;
+ buf->asize = 0;
+ buf->size = 0;
+ return data;
+}
+```
+
+Transfers ownership of the buffer's memory to the caller. The buffer is reset to the empty init state. The caller must `free()` the returned pointer.
+
+### Whitespace Normalization
+
+```c
+void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) {
+ bool last_char_was_space = false;
+ bufsize_t r, w;
+ for (r = 0, w = 0; r < s->size; r++) {
+ if (cmark_isspace(s->ptr[r])) {
+ if (!last_char_was_space) {
+ s->ptr[w++] = ' ';
+ last_char_was_space = true;
+ }
+ } else {
+ s->ptr[w++] = s->ptr[r];
+ last_char_was_space = false;
+ }
+ }
+ cmark_strbuf_truncate(s, w);
+}
+```
+
+Collapses consecutive whitespace into a single space. Uses an in-place read/write cursor technique.
+
+### Backslash Unescape
+
+```c
+void cmark_strbuf_unescape(cmark_strbuf *buf) {
+ bufsize_t r, w;
+ for (r = 0, w = 0; r < buf->size; r++) {
+ if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
+ r++;
+ buf->ptr[w++] = buf->ptr[r];
+ }
+ cmark_strbuf_truncate(buf, w);
+}
+```
+
+Removes backslash escapes before punctuation characters, in-place.
+
+## Non-Owning Slice (`cmark_chunk`)
+
+### Structure
+
+```c
+typedef struct {
+ const unsigned char *data;
+ bufsize_t len;
+ bufsize_t alloc; // 0 if non-owning, > 0 if owning
+} cmark_chunk;
+```
+
+A `cmark_chunk` is either:
+- **Non-owning** (`alloc == 0`): Points into someone else's memory (e.g., the parser's input buffer)
+- **Owning** (`alloc > 0`): Owns its `data` pointer and must free it
+
+### Key Operations
+
+```c
+// Create a non-owning reference
+static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf);
+static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data);
+static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, bufsize_t len);
+
+// Free (only if owning)
+static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
+ if (c->alloc)
+ mem->free((void *)c->data);
+ c->data = NULL;
+ c->alloc = 0;
+ c->len = 0;
+}
+```
+
+### Ownership Transfer
+
+`cmark_chunk_buf_detach()` takes ownership of a `cmark_strbuf`'s memory:
+
+```c
+static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) {
+ cmark_chunk c;
+ c.len = buf->size;
+ c.data = cmark_strbuf_detach(buf);
+ c.alloc = 1; // Now owns the data
+ return c;
+}
+```
+
+### Non-Owning References
+
+`cmark_chunk_dup()` creates a non-owning view into existing memory:
+
+```c
+static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch,
+ bufsize_t pos, bufsize_t len) {
+ cmark_chunk c = {ch->data + pos, len, 0}; // alloc = 0: non-owning
+ return c;
+}
+```
+
+This is used extensively during parsing to avoid copying strings. For example, text node content during inline parsing initially points into the parser's line buffer. Only when the node outlives the parse does the data need to be copied.
+
+## Node Memory Management
+
+### Node Allocation
+
+```c
+static cmark_node *S_node_new(cmark_node_type type, cmark_mem *mem) {
+ cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node));
+ cmark_strbuf_init(mem, &node->content, 0);
+ node->type = (uint16_t)type;
+ node->mem = mem;
+ return node;
+}
+```
+
+Nodes are zero-initialized via `calloc`. The `mem` pointer is stored on the node for later freeing.
+
+### Node Deallocation
+
+```c
+static void S_free_nodes(cmark_node *e) {
+ cmark_node *next;
+ while (e != NULL) {
+ // Free type-specific data
+ switch (e->type) {
+ case CMARK_NODE_CODE_BLOCK:
+ cmark_chunk_free(e->mem, &e->as.code.info);
+ cmark_chunk_free(e->mem, &e->as.literal);
+ break;
+ case CMARK_NODE_LINK:
+ case CMARK_NODE_IMAGE:
+ e->mem->free(e->as.link.url);
+ e->mem->free(e->as.link.title);
+ break;
+ // ... other types
+ }
+ // Splice children into the free list
+ if (e->first_child) {
+ cmark_node *last = e->last_child;
+ last->next = e->next;
+ e->next = e->first_child;
+ }
+ // Advance and free
+ next = e->next;
+ e->mem->free(e);
+ e = next;
+ }
+}
+```
+
+This is an iterative (non-recursive) destructor that avoids stack overflow on deeply nested ASTs. The key technique is **sibling-list splicing**: children are inserted into the sibling chain before the current position, converting tree traversal into linear list traversal.
+
+### What Gets Freed Per Node Type
+
+| Node Type | Freed Data |
+|-----------|-----------|
+| `CODE_BLOCK` | `as.code.info` chunk, `as.literal` chunk |
+| `TEXT`, `HTML_BLOCK`, `HTML_INLINE`, `CODE` | `as.literal` chunk |
+| `LINK`, `IMAGE` | `as.link.url`, `as.link.title` |
+| `CUSTOM_BLOCK`, `CUSTOM_INLINE` | `as.custom.on_enter`, `as.custom.on_exit` |
+| `HEADING` | `as.heading.setext_content` (if chunk) |
+| All nodes | `content` strbuf |
+
+## Parser Memory
+
+The parser allocates:
+- A `cmark_parser` struct
+- A `cmark_strbuf` for the current line (`linebuf`)
+- A `cmark_strbuf` for collected content (`content`)
+- A `cmark_reference_map` for link references
+- Individual `cmark_node` objects for the AST
+
+When `cmark_parser_free()` is called, only the parser's own resources are freed — the AST is NOT freed (the user owns it). To free the AST, call `cmark_node_free()` on the root.
+
+## Memory Safety Patterns
+
+1. **No NULL returns**: The default allocator aborts on failure. User allocators should do the same or handle errors externally.
+2. **Init buffers**: `cmark_strbuf__initbuf` prevents NULL pointer dereferences on empty buffers.
+3. **Owning vs non-owning**: The `cmark_chunk.alloc` field prevents double-frees and ensures non-owning references are not freed.
+4. **Iterative destruction**: `S_free_nodes()` avoids stack overflow on deep trees.
+
+## Cross-References
+
+- [buffer.c](../../cmark/src/buffer.c), [buffer.h](../../cmark/src/buffer.h) — `cmark_strbuf` implementation
+- [chunk.h](../../cmark/src/chunk.h) — `cmark_chunk` definition
+- [cmark.c](../../cmark/src/cmark.c) — Default allocator, `cmark_get_default_mem_allocator()`
+- [node.c](../../cmark/src/node.c) — Node allocation and deallocation
+- [ast-node-system.md](ast-node-system.md) — Node structure and lifecycle
diff --git a/docs/handbook/cmark/overview.md b/docs/handbook/cmark/overview.md
new file mode 100644
index 0000000000..4fc95bdad7
--- /dev/null
+++ b/docs/handbook/cmark/overview.md
@@ -0,0 +1,256 @@
+# cmark — Overview
+
+## What Is cmark?
+
+cmark is a C library and command-line tool for parsing and rendering CommonMark (standardized Markdown). Written in C99, it implements a two-phase parsing architecture — block structure recognition followed by inline content parsing — producing an Abstract Syntax Tree (AST) that can be traversed, manipulated, and rendered into multiple output formats.
+
+**Language:** C (C99)
+**Build System:** CMake (minimum version 3.14)
+**Project Version:** 0.31.2
+**License:** BSD-2-Clause
+**Authors:** John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer
+
+## Core Architecture Summary
+
+cmark's processing pipeline follows this sequence:
+
+1. **Input** — UTF-8 text is fed to the parser, either all at once or incrementally via a streaming API.
+2. **Block Parsing** (`blocks.c`) — The input is scanned line-by-line to identify block-level structures (paragraphs, headings, code blocks, lists, block quotes, thematic breaks, HTML blocks).
+3. **Inline Parsing** (`inlines.c`) — Within paragraph and heading blocks, inline elements are parsed (emphasis, links, images, code spans, HTML inline, line breaks).
+4. **AST Construction** — A tree of `cmark_node` structures is built, with each node representing a document element.
+5. **Rendering** — The AST is traversed using an iterator and rendered to one of five output formats: HTML, XML, LaTeX, man (groff), or CommonMark.
+
+## Source File Map
+
+The `cmark/src/` directory contains the following source files, organized by responsibility:
+
+### Public API
+| File | Purpose |
+|------|---------|
+| `cmark.h` | Public API header — all exported types, enums, and function declarations |
+| `cmark.c` | Core glue — `cmark_markdown_to_html()`, default memory allocator, version info |
+| `main.c` | CLI entry point — argument parsing, file I/O, format dispatch |
+
+### AST Node System
+| File | Purpose |
+|------|---------|
+| `node.h` | Internal node struct definition, type-specific unions (`cmark_list`, `cmark_code`, `cmark_heading`, `cmark_link`, `cmark_custom`), internal flags |
+| `node.c` | Node creation/destruction, accessor functions, tree manipulation (insert, append, unlink, replace) |
+
+### Parsing
+| File | Purpose |
+|------|---------|
+| `parser.h` | Internal `cmark_parser` struct definition (parser state: line number, offset, column, indent, reference map) |
+| `blocks.c` | Block-level parsing — line-by-line analysis, open/close block logic, list item detection, finalization |
+| `inlines.c` | Inline-level parsing — emphasis/strong via delimiter stack, backtick code spans, links/images via bracket stack, autolinks, HTML inline |
+| `inlines.h` | Internal API: `cmark_parse_inlines()`, `cmark_parse_reference_inline()`, `cmark_clean_url()`, `cmark_clean_title()` |
+
+### Traversal
+| File | Purpose |
+|------|---------|
+| `iterator.h` | Internal `cmark_iter` struct with `cmark_iter_state` (current + next event/node pairs) |
+| `iterator.c` | Iterator implementation — `cmark_iter_new()`, `cmark_iter_next()`, `cmark_iter_reset()`, `cmark_consolidate_text_nodes()` |
+
+### Renderers
+| File | Purpose |
+|------|---------|
+| `render.h` | `cmark_renderer` struct, `cmark_escaping` enum (`LITERAL`, `NORMAL`, `TITLE`, `URL`) |
+| `render.c` | Generic render framework — line wrapping, prefix management, `cmark_render()` dispatch loop |
+| `html.c` | HTML renderer — `cmark_render_html()`, direct strbuf-based output, no render framework |
+| `xml.c` | XML renderer — `cmark_render_xml()`, direct strbuf-based output with CommonMark DTD |
+| `latex.c` | LaTeX renderer — `cmark_render_latex()`, uses render framework |
+| `man.c` | groff man renderer — `cmark_render_man()`, uses render framework |
+| `commonmark.c` | CommonMark renderer — `cmark_render_commonmark()`, uses render framework |
+
+### Text Processing and Utilities
+| File | Purpose |
+|------|---------|
+| `buffer.h` / `buffer.c` | `cmark_strbuf` — growable byte buffer with amortized O(1) append |
+| `chunk.h` | `cmark_chunk` — lightweight non-owning string slice (pointer + length) |
+| `utf8.h` / `utf8.c` | UTF-8 iteration, validation, encoding, case folding, Unicode property queries |
+| `references.h` / `references.c` | Link reference definition storage and lookup (sorted array with binary search) |
+| `scanners.h` / `scanners.c` | re2c-generated scanner functions for recognizing Markdown syntax patterns |
+| `scanners.re` | re2c source for scanner generation |
+| `cmark_ctype.h` / `cmark_ctype.c` | Locale-independent `cmark_isspace()`, `cmark_ispunct()`, `cmark_isdigit()`, `cmark_isalpha()` |
+| `houdini.h` | HTML/URL escaping and unescaping API |
+| `houdini_html_e.c` | HTML entity escaping |
+| `houdini_html_u.c` | HTML entity unescaping |
+| `houdini_href_e.c` | URL/href percent-encoding |
+| `entities.inc` | HTML entity name-to-codepoint lookup table |
+| `case_fold.inc` | Unicode case folding table for reference normalization |
+
+## The Simple Interface
+
+The simplest way to use cmark is a single function call defined in `cmark.c`:
+
+```c
+char *cmark_markdown_to_html(const char *text, size_t len, int options);
+```
+
+Internally, this calls `cmark_parse_document()` to build the AST, then `cmark_render_html()` to produce the output, and finally frees the document node. The caller is responsible for freeing the returned string.
+
+The implementation in `cmark.c`:
+
+```c
+char *cmark_markdown_to_html(const char *text, size_t len, int options) {
+ cmark_node *doc;
+ char *result;
+
+ doc = cmark_parse_document(text, len, options);
+ result = cmark_render_html(doc, options);
+ cmark_node_free(doc);
+
+ return result;
+}
+```
+
+## The Streaming Interface
+
+For large documents or streaming input, cmark provides an incremental parsing API:
+
+```c
+cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
+
+// Feed chunks of data as they arrive
+while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
+ cmark_parser_feed(parser, buffer, bytes);
+}
+
+// Finalize and get the AST
+cmark_node *document = cmark_parser_finish(parser);
+cmark_parser_free(parser);
+
+// Render to any format
+char *html = cmark_render_html(document, CMARK_OPT_DEFAULT);
+char *xml = cmark_render_xml(document, CMARK_OPT_DEFAULT);
+char *man = cmark_render_man(document, CMARK_OPT_DEFAULT, 72);
+char *tex = cmark_render_latex(document, CMARK_OPT_DEFAULT, 80);
+char *cm = cmark_render_commonmark(document, CMARK_OPT_DEFAULT, 0);
+
+// Cleanup
+cmark_node_free(document);
+```
+
+The parser accumulates input in an internal line buffer (`parser->linebuf`) and processes complete lines as they become available. The `S_parser_feed()` function in `blocks.c` scans for line-ending characters (`\n`, `\r`) and dispatches each complete line to `S_process_line()`.
+
+## Node Type Taxonomy
+
+cmark defines 21 node types in the `cmark_node_type` enum:
+
+### Block Nodes (container and leaf)
+| Enum Value | Type String | Container? | Accepts Lines? | Contains Inlines? |
+|-----------|-------------|------------|---------------|-------------------|
+| `CMARK_NODE_DOCUMENT` | "document" | Yes | No | No |
+| `CMARK_NODE_BLOCK_QUOTE` | "block_quote" | Yes | No | No |
+| `CMARK_NODE_LIST` | "list" | Yes (items only) | No | No |
+| `CMARK_NODE_ITEM` | "item" | Yes | No | No |
+| `CMARK_NODE_CODE_BLOCK` | "code_block" | No (leaf) | Yes | No |
+| `CMARK_NODE_HTML_BLOCK` | "html_block" | No (leaf) | No | No |
+| `CMARK_NODE_CUSTOM_BLOCK` | "custom_block" | Yes | No | No |
+| `CMARK_NODE_PARAGRAPH` | "paragraph" | No | Yes | Yes |
+| `CMARK_NODE_HEADING` | "heading" | No | Yes | Yes |
+| `CMARK_NODE_THEMATIC_BREAK` | "thematic_break" | No (leaf) | No | No |
+
+### Inline Nodes
+| Enum Value | Type String | Leaf? |
+|-----------|-------------|-------|
+| `CMARK_NODE_TEXT` | "text" | Yes |
+| `CMARK_NODE_SOFTBREAK` | "softbreak" | Yes |
+| `CMARK_NODE_LINEBREAK` | "linebreak" | Yes |
+| `CMARK_NODE_CODE` | "code" | Yes |
+| `CMARK_NODE_HTML_INLINE` | "html_inline" | Yes |
+| `CMARK_NODE_CUSTOM_INLINE` | "custom_inline" | No |
+| `CMARK_NODE_EMPH` | "emph" | No |
+| `CMARK_NODE_STRONG` | "strong" | No |
+| `CMARK_NODE_LINK` | "link" | No |
+| `CMARK_NODE_IMAGE` | "image" | No |
+
+Range sentinels are also defined for classification:
+- `CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT`
+- `CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK`
+- `CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT`
+- `CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE`
+
+## Option Flags
+
+Options are passed as a bitmask integer to parsing and rendering functions:
+
+```c
+#define CMARK_OPT_DEFAULT 0
+#define CMARK_OPT_SOURCEPOS (1 << 1) // Add data-sourcepos attributes
+#define CMARK_OPT_HARDBREAKS (1 << 2) // Render softbreaks as hard breaks
+#define CMARK_OPT_SAFE (1 << 3) // Legacy (now default behavior)
+#define CMARK_OPT_NOBREAKS (1 << 4) // Render softbreaks as spaces
+#define CMARK_OPT_NORMALIZE (1 << 8) // Legacy (no effect)
+#define CMARK_OPT_VALIDATE_UTF8 (1 << 9) // Validate UTF-8 input
+#define CMARK_OPT_SMART (1 << 10) // Smart quotes and dashes
+#define CMARK_OPT_UNSAFE (1 << 17) // Allow raw HTML and dangerous URLs
+```
+
+## Memory Management Model
+
+cmark uses a pluggable memory allocator defined by the `cmark_mem` struct:
+
+```c
+typedef struct cmark_mem {
+ void *(*calloc)(size_t, size_t);
+ void *(*realloc)(void *, size_t);
+ void (*free)(void *);
+} cmark_mem;
+```
+
+The default allocator in `cmark.c` wraps standard `calloc`/`realloc`/`free` with abort-on-NULL safety checks (`xcalloc`, `xrealloc`). Every node stores a pointer to the allocator it was created with (`node->mem`), ensuring consistent allocation/deallocation throughout the tree.
+
+## Version Information
+
+Runtime version queries:
+
+```c
+int cmark_version(void); // Returns CMARK_VERSION as integer (0xMMmmpp)
+const char *cmark_version_string(void); // Returns CMARK_VERSION_STRING
+```
+
+The version is encoded as a 24-bit integer where bits 16–23 are major, 8–15 are minor, and 0–7 are patch. For example, `0x001F02` represents version 0.31.2.
+
+## Backwards Compatibility Aliases
+
+For code written against older cmark API versions, these aliases are provided:
+
+```c
+#define CMARK_NODE_HEADER CMARK_NODE_HEADING
+#define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK
+#define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK
+#define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE
+```
+
+Short-name aliases (without the `CMARK_` prefix) are also available unless `CMARK_NO_SHORT_NAMES` is defined:
+
+```c
+#define NODE_DOCUMENT CMARK_NODE_DOCUMENT
+#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH
+#define BULLET_LIST CMARK_BULLET_LIST
+// ... and many more
+```
+
+## Cross-References
+
+- [architecture.md](architecture.md) — Detailed two-phase parsing pipeline, module dependency graph
+- [public-api.md](public-api.md) — Complete public API reference with all function signatures
+- [ast-node-system.md](ast-node-system.md) — Internal `cmark_node` struct, type-specific unions, tree operations
+- [block-parsing.md](block-parsing.md) — `blocks.c` line-by-line analysis, open block tracking, finalization
+- [inline-parsing.md](inline-parsing.md) — `inlines.c` delimiter algorithm, bracket stack, backtick scanning
+- [iterator-system.md](iterator-system.md) — AST traversal with enter/exit events
+- [html-renderer.md](html-renderer.md) — HTML output with escaping and source position
+- [xml-renderer.md](xml-renderer.md) — XML output with CommonMark DTD
+- [latex-renderer.md](latex-renderer.md) — LaTeX output via render framework
+- [man-renderer.md](man-renderer.md) — groff man page output
+- [commonmark-renderer.md](commonmark-renderer.md) — Round-trip CommonMark output
+- [render-framework.md](render-framework.md) — Shared `cmark_render()` engine for text-based renderers
+- [memory-management.md](memory-management.md) — Allocator model, buffer growth, node freeing
+- [utf8-handling.md](utf8-handling.md) — UTF-8 validation, iteration, case folding
+- [reference-system.md](reference-system.md) — Link reference definitions storage and resolution
+- [scanner-system.md](scanner-system.md) — re2c-generated pattern matching
+- [building.md](building.md) — CMake build configuration and options
+- [cli-usage.md](cli-usage.md) — Command-line tool usage
+- [testing.md](testing.md) — Test infrastructure (spec tests, API tests, fuzzing)
+- [code-style.md](code-style.md) — Coding conventions and naming patterns
diff --git a/docs/handbook/cmark/public-api.md b/docs/handbook/cmark/public-api.md
new file mode 100644
index 0000000000..7168282e23
--- /dev/null
+++ b/docs/handbook/cmark/public-api.md
@@ -0,0 +1,637 @@
+# cmark — Public API Reference
+
+## Header: `cmark.h`
+
+All public API functions, types, and constants are declared in `cmark.h`. Functions marked with `CMARK_EXPORT` are exported from the shared library. The header is usable from C++ via `extern "C"` guards.
+
+---
+
+## Type Definitions
+
+### Node Types
+
+```c
+typedef enum {
+ /* Error status */
+ CMARK_NODE_NONE,
+
+ /* Block nodes */
+ CMARK_NODE_DOCUMENT,
+ CMARK_NODE_BLOCK_QUOTE,
+ CMARK_NODE_LIST,
+ CMARK_NODE_ITEM,
+ CMARK_NODE_CODE_BLOCK,
+ CMARK_NODE_HTML_BLOCK,
+ CMARK_NODE_CUSTOM_BLOCK,
+ CMARK_NODE_PARAGRAPH,
+ CMARK_NODE_HEADING,
+ CMARK_NODE_THEMATIC_BREAK,
+
+ /* Range sentinels */
+ CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
+ CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK,
+
+ /* Inline nodes */
+ CMARK_NODE_TEXT,
+ CMARK_NODE_SOFTBREAK,
+ CMARK_NODE_LINEBREAK,
+ CMARK_NODE_CODE,
+ CMARK_NODE_HTML_INLINE,
+ CMARK_NODE_CUSTOM_INLINE,
+ CMARK_NODE_EMPH,
+ CMARK_NODE_STRONG,
+ CMARK_NODE_LINK,
+ CMARK_NODE_IMAGE,
+
+ CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
+ CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE
+} cmark_node_type;
+```
+
+### List Types
+
+```c
+typedef enum {
+ CMARK_NO_LIST,
+ CMARK_BULLET_LIST,
+ CMARK_ORDERED_LIST
+} cmark_list_type;
+```
+
+### Delimiter Types
+
+```c
+typedef enum {
+ CMARK_NO_DELIM,
+ CMARK_PERIOD_DELIM,
+ CMARK_PAREN_DELIM
+} cmark_delim_type;
+```
+
+### Event Types (for iterator)
+
+```c
+typedef enum {
+ CMARK_EVENT_NONE,
+ CMARK_EVENT_DONE,
+ CMARK_EVENT_ENTER,
+ CMARK_EVENT_EXIT
+} cmark_event_type;
+```
+
+### Opaque Types
+
+```c
+typedef struct cmark_node cmark_node;
+typedef struct cmark_parser cmark_parser;
+typedef struct cmark_iter cmark_iter;
+```
+
+### Memory Allocator
+
+```c
+typedef struct cmark_mem {
+ void *(*calloc)(size_t, size_t);
+ void *(*realloc)(void *, size_t);
+ void (*free)(void *);
+} cmark_mem;
+```
+
+---
+
+## Simple Interface
+
+### `cmark_markdown_to_html`
+
+```c
+CMARK_EXPORT
+char *cmark_markdown_to_html(const char *text, size_t len, int options);
+```
+
+Converts CommonMark text to HTML in a single call. The input `text` must be UTF-8 encoded. The returned string is null-terminated and allocated via the default allocator; the caller must free it with `free()`.
+
+**Implementation** (in `cmark.c`): Calls `cmark_parse_document()`, then `cmark_render_html()`, then `cmark_node_free()`.
+
+---
+
+## Node Classification
+
+### `cmark_node_is_block`
+
+```c
+CMARK_EXPORT bool cmark_node_is_block(cmark_node *node);
+```
+
+Returns `true` if `node->type` is between `CMARK_NODE_FIRST_BLOCK` and `CMARK_NODE_LAST_BLOCK` inclusive. Returns `false` for NULL.
+
+### `cmark_node_is_inline`
+
+```c
+CMARK_EXPORT bool cmark_node_is_inline(cmark_node *node);
+```
+
+Returns `true` if `node->type` is between `CMARK_NODE_FIRST_INLINE` and `CMARK_NODE_LAST_INLINE` inclusive. Returns `false` for NULL.
+
+### `cmark_node_is_leaf`
+
+```c
+CMARK_EXPORT bool cmark_node_is_leaf(cmark_node *node);
+```
+
+Returns `true` for node types that cannot have children:
+- `CMARK_NODE_THEMATIC_BREAK`
+- `CMARK_NODE_CODE_BLOCK`
+- `CMARK_NODE_TEXT`
+- `CMARK_NODE_SOFTBREAK`
+- `CMARK_NODE_LINEBREAK`
+- `CMARK_NODE_CODE`
+- `CMARK_NODE_HTML_INLINE`
+
+Note: `CMARK_NODE_HTML_BLOCK` is **not** classified as a leaf by `cmark_node_is_leaf()`, though the iterator treats it as one (see `S_leaf_mask` in `iterator.c`).
+
+---
+
+## Node Creation and Destruction
+
+### `cmark_node_new`
+
+```c
+CMARK_EXPORT cmark_node *cmark_node_new(cmark_node_type type);
+```
+
+Creates a new node of the given type using the default memory allocator. For `CMARK_NODE_HEADING`, the level defaults to 1. For `CMARK_NODE_LIST`, the list type defaults to `CMARK_BULLET_LIST` with `start = 0` and `tight = false`.
+
+### `cmark_node_new_with_mem`
+
+```c
+CMARK_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem);
+```
+
+Same as `cmark_node_new` but uses the specified memory allocator. All nodes in a single tree must use the same allocator.
+
+### `cmark_node_free`
+
+```c
+CMARK_EXPORT void cmark_node_free(cmark_node *node);
+```
+
+Frees the node and all its descendants. The node is first unlinked from its siblings/parent. The internal `S_free_nodes()` function iterates the subtree (splicing children into a flat list for iterative freeing) and releases type-specific memory:
+- `CMARK_NODE_CODE_BLOCK`: frees `data` and `as.code.info`
+- `CMARK_NODE_TEXT`, `CMARK_NODE_HTML_INLINE`, `CMARK_NODE_CODE`, `CMARK_NODE_HTML_BLOCK`: frees `data`
+- `CMARK_NODE_LINK`, `CMARK_NODE_IMAGE`: frees `as.link.url` and `as.link.title`
+- `CMARK_NODE_CUSTOM_BLOCK`, `CMARK_NODE_CUSTOM_INLINE`: frees `as.custom.on_enter` and `as.custom.on_exit`
+
+---
+
+## Tree Traversal
+
+### `cmark_node_next`
+
+```c
+CMARK_EXPORT cmark_node *cmark_node_next(cmark_node *node);
+```
+
+Returns the next sibling, or NULL.
+
+### `cmark_node_previous`
+
+```c
+CMARK_EXPORT cmark_node *cmark_node_previous(cmark_node *node);
+```
+
+Returns the previous sibling, or NULL.
+
+### `cmark_node_parent`
+
+```c
+CMARK_EXPORT cmark_node *cmark_node_parent(cmark_node *node);
+```
+
+Returns the parent node, or NULL.
+
+### `cmark_node_first_child`
+
+```c
+CMARK_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
+```
+
+Returns the first child, or NULL.
+
+### `cmark_node_last_child`
+
+```c
+CMARK_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
+```
+
+Returns the last child, or NULL.
+
+---
+
+## Iterator API
+
+### `cmark_iter_new`
+
+```c
+CMARK_EXPORT cmark_iter *cmark_iter_new(cmark_node *root);
+```
+
+Creates a new iterator starting at `root`. Returns NULL if `root` is NULL. The iterator begins in a pre-first state (`CMARK_EVENT_NONE`); the first call to `cmark_iter_next()` returns `CMARK_EVENT_ENTER` for the root.
+
+### `cmark_iter_free`
+
+```c
+CMARK_EXPORT void cmark_iter_free(cmark_iter *iter);
+```
+
+Frees the iterator. Does not free any nodes.
+
+### `cmark_iter_next`
+
+```c
+CMARK_EXPORT cmark_event_type cmark_iter_next(cmark_iter *iter);
+```
+
+Advances to the next node and returns the event type:
+- `CMARK_EVENT_ENTER` — entering a node (for non-leaf nodes, children follow)
+- `CMARK_EVENT_EXIT` — leaving a node (all children have been visited)
+- `CMARK_EVENT_DONE` — iteration complete (returned to root)
+
+Leaf nodes only generate `ENTER` events, never `EXIT`.
+
+### `cmark_iter_get_node`
+
+```c
+CMARK_EXPORT cmark_node *cmark_iter_get_node(cmark_iter *iter);
+```
+
+Returns the current node.
+
+### `cmark_iter_get_event_type`
+
+```c
+CMARK_EXPORT cmark_event_type cmark_iter_get_event_type(cmark_iter *iter);
+```
+
+Returns the current event type.
+
+### `cmark_iter_get_root`
+
+```c
+CMARK_EXPORT cmark_node *cmark_iter_get_root(cmark_iter *iter);
+```
+
+Returns the root node of the iteration.
+
+### `cmark_iter_reset`
+
+```c
+CMARK_EXPORT void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
+ cmark_event_type event_type);
+```
+
+Resets the iterator position. The node must be a descendant of the root (or the root itself).
+
+---
+
+## Node Accessors
+
+### User Data
+
+```c
+CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data);
+```
+
+Get/set arbitrary user data pointer. Returns 0 on failure, 1 on success. cmark does not manage the lifecycle of user data.
+
+### Type Information
+
+```c
+CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node);
+CMARK_EXPORT const char *cmark_node_get_type_string(cmark_node *node);
+```
+
+`cmark_node_get_type_string()` returns strings like `"document"`, `"paragraph"`, `"heading"`, `"text"`, `"emph"`, `"strong"`, `"link"`, `"image"`, etc. Returns `"<unknown>"` for unrecognized types.
+
+### String Content
+
+```c
+CMARK_EXPORT const char *cmark_node_get_literal(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content);
+```
+
+Works for `CMARK_NODE_HTML_BLOCK`, `CMARK_NODE_TEXT`, `CMARK_NODE_HTML_INLINE`, `CMARK_NODE_CODE`, and `CMARK_NODE_CODE_BLOCK`. Returns NULL / 0 for other types.
+
+### Heading Level
+
+```c
+CMARK_EXPORT int cmark_node_get_heading_level(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level);
+```
+
+Only works for `CMARK_NODE_HEADING`. Level must be 1–6. Returns 0 on error.
+
+### List Properties
+
+```c
+CMARK_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_list_type(cmark_node *node, cmark_list_type type);
+CMARK_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim);
+CMARK_EXPORT int cmark_node_get_list_start(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_list_start(cmark_node *node, int start);
+CMARK_EXPORT int cmark_node_get_list_tight(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
+```
+
+All list accessors only work for `CMARK_NODE_LIST`. `set_list_start` rejects negative values. `set_list_tight` interprets `tight == 1` as true.
+
+### Code Block Info
+
+```c
+CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info);
+```
+
+The info string from a fenced code block (e.g., `"python"` from ` ```python `). Only works for `CMARK_NODE_CODE_BLOCK`.
+
+### Link/Image Properties
+
+```c
+CMARK_EXPORT const char *cmark_node_get_url(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_url(cmark_node *node, const char *url);
+CMARK_EXPORT const char *cmark_node_get_title(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_title(cmark_node *node, const char *title);
+```
+
+Only work for `CMARK_NODE_LINK` and `CMARK_NODE_IMAGE`. Return NULL / 0 for other types.
+
+### Custom Block/Inline
+
+```c
+CMARK_EXPORT const char *cmark_node_get_on_enter(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_on_enter(cmark_node *node, const char *on_enter);
+CMARK_EXPORT const char *cmark_node_get_on_exit(cmark_node *node);
+CMARK_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit);
+```
+
+Only work for `CMARK_NODE_CUSTOM_BLOCK` and `CMARK_NODE_CUSTOM_INLINE`.
+
+### Source Position
+
+```c
+CMARK_EXPORT int cmark_node_get_start_line(cmark_node *node);
+CMARK_EXPORT int cmark_node_get_start_column(cmark_node *node);
+CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node);
+CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node);
+```
+
+Line and column numbers are 1-based. These are populated during parsing if `CMARK_OPT_SOURCEPOS` is set.
+
+---
+
+## Tree Manipulation
+
+### `cmark_node_unlink`
+
+```c
+CMARK_EXPORT void cmark_node_unlink(cmark_node *node);
+```
+
+Removes `node` from the tree (detaching from parent and siblings) without freeing its memory.
+
+### `cmark_node_insert_before`
+
+```c
+CMARK_EXPORT int cmark_node_insert_before(cmark_node *node, cmark_node *sibling);
+```
+
+Inserts `sibling` before `node`. Validates that the parent can contain the sibling (via `S_can_contain()`). Returns 1 on success, 0 on failure.
+
+### `cmark_node_insert_after`
+
+```c
+CMARK_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
+```
+
+Inserts `sibling` after `node`. Returns 1 on success, 0 on failure.
+
+### `cmark_node_replace`
+
+```c
+CMARK_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode);
+```
+
+Replaces `oldnode` with `newnode` in the tree. The old node is unlinked but not freed.
+
+### `cmark_node_prepend_child`
+
+```c
+CMARK_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child);
+```
+
+Adds `child` as the first child of `node`. Validates containership.
+
+### `cmark_node_append_child`
+
+```c
+CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child);
+```
+
+Adds `child` as the last child of `node`. Validates containership.
+
+### `cmark_consolidate_text_nodes`
+
+```c
+CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root);
+```
+
+Merges adjacent `CMARK_NODE_TEXT` children into single text nodes throughout the subtree. Uses an iterator to find consecutive text nodes and concatenates their data via `cmark_strbuf`.
+
+---
+
+## Parsing Functions
+
+### `cmark_parser_new`
+
+```c
+CMARK_EXPORT cmark_parser *cmark_parser_new(int options);
+```
+
+Creates a parser with the default memory allocator and a new document root.
+
+### `cmark_parser_new_with_mem`
+
+```c
+CMARK_EXPORT cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
+```
+
+Creates a parser with the specified allocator.
+
+### `cmark_parser_new_with_mem_into_root`
+
+```c
+CMARK_EXPORT cmark_parser *cmark_parser_new_with_mem_into_root(
+ int options, cmark_mem *mem, cmark_node *root);
+```
+
+Creates a parser that appends parsed content to an existing root node. Useful for assembling a single document from multiple parsed fragments.
+
+### `cmark_parser_free`
+
+```c
+CMARK_EXPORT void cmark_parser_free(cmark_parser *parser);
+```
+
+Frees the parser and its internal buffers. Does NOT free the parsed document tree.
+
+### `cmark_parser_feed`
+
+```c
+CMARK_EXPORT void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
+```
+
+Feeds a chunk of input data to the parser. Can be called multiple times for streaming input.
+
+### `cmark_parser_finish`
+
+```c
+CMARK_EXPORT cmark_node *cmark_parser_finish(cmark_parser *parser);
+```
+
+Finalizes parsing and returns the document root. Must be called after all input has been fed. Triggers `finalize_document()` which closes all open blocks and runs inline parsing.
+
+### `cmark_parse_document`
+
+```c
+CMARK_EXPORT cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
+```
+
+Convenience function equivalent to: create parser → feed entire buffer → finish → free parser. Returns the document root.
+
+### `cmark_parse_file`
+
+```c
+CMARK_EXPORT cmark_node *cmark_parse_file(FILE *f, int options);
+```
+
+Reads from a `FILE*` in 4096-byte chunks and parses incrementally.
+
+---
+
+## Rendering Functions
+
+### `cmark_render_html`
+
+```c
+CMARK_EXPORT char *cmark_render_html(cmark_node *root, int options);
+```
+
+Renders to HTML. Caller must free returned string.
+
+### `cmark_render_xml`
+
+```c
+CMARK_EXPORT char *cmark_render_xml(cmark_node *root, int options);
+```
+
+Renders to XML with CommonMark DTD. Includes `<?xml version="1.0" encoding="UTF-8"?>` header.
+
+### `cmark_render_man`
+
+```c
+CMARK_EXPORT char *cmark_render_man(cmark_node *root, int options, int width);
+```
+
+Renders to groff man page format. `width` controls line wrapping (0 = no wrap).
+
+### `cmark_render_commonmark`
+
+```c
+CMARK_EXPORT char *cmark_render_commonmark(cmark_node *root, int options, int width);
+```
+
+Renders back to CommonMark format. `width` controls line wrapping.
+
+### `cmark_render_latex`
+
+```c
+CMARK_EXPORT char *cmark_render_latex(cmark_node *root, int options, int width);
+```
+
+Renders to LaTeX. `width` controls line wrapping.
+
+---
+
+## Option Constants
+
+### Rendering Options
+
+```c
+#define CMARK_OPT_DEFAULT 0 // No special options
+#define CMARK_OPT_SOURCEPOS (1 << 1) // data-sourcepos attributes (HTML), sourcepos attributes (XML)
+#define CMARK_OPT_HARDBREAKS (1 << 2) // Render softbreaks as <br /> or \\
+#define CMARK_OPT_SAFE (1 << 3) // Legacy — safe mode is now default
+#define CMARK_OPT_UNSAFE (1 << 17) // Render raw HTML and dangerous URLs
+#define CMARK_OPT_NOBREAKS (1 << 4) // Render softbreaks as spaces
+```
+
+### Parsing Options
+
+```c
+#define CMARK_OPT_NORMALIZE (1 << 8) // Legacy — no effect
+#define CMARK_OPT_VALIDATE_UTF8 (1 << 9) // Replace invalid UTF-8 with U+FFFD
+#define CMARK_OPT_SMART (1 << 10) // Smart quotes and dashes
+```
+
+---
+
+## Memory Allocator
+
+### `cmark_get_default_mem_allocator`
+
+```c
+CMARK_EXPORT cmark_mem *cmark_get_default_mem_allocator(void);
+```
+
+Returns a pointer to the default allocator (`DEFAULT_MEM_ALLOCATOR` in `cmark.c`) which wraps `calloc`, `realloc`, and `free` with abort-on-failure guards.
+
+---
+
+## Version API
+
+### `cmark_version`
+
+```c
+CMARK_EXPORT int cmark_version(void);
+```
+
+Returns the version as a packed integer: `(major << 16) | (minor << 8) | patch`.
+
+### `cmark_version_string`
+
+```c
+CMARK_EXPORT const char *cmark_version_string(void);
+```
+
+Returns the version as a human-readable string (e.g., `"0.31.2"`).
+
+---
+
+## Node Integrity Checking
+
+```c
+CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out);
+```
+
+Validates the structural integrity of the node tree, printing errors to `out`. Returns the number of errors found. Available in all builds but primarily useful in debug builds.
+
+---
+
+## Cross-References
+
+- [ast-node-system.md](ast-node-system.md) — Internal struct definitions behind these opaque types
+- [iterator-system.md](iterator-system.md) — Detailed iterator mechanics
+- [memory-management.md](memory-management.md) — Allocator details and buffer management
+- [block-parsing.md](block-parsing.md) — How `cmark_parser_feed` and `cmark_parser_finish` work internally
+- [html-renderer.md](html-renderer.md) — How `cmark_render_html` generates output
diff --git a/docs/handbook/cmark/reference-system.md b/docs/handbook/cmark/reference-system.md
new file mode 100644
index 0000000000..0e63b5c796
--- /dev/null
+++ b/docs/handbook/cmark/reference-system.md
@@ -0,0 +1,307 @@
+# cmark — Reference System
+
+## Overview
+
+The reference system (`references.c`, `references.h`) manages link reference definitions — the `[label]: URL "title"` constructs in CommonMark. During block parsing, reference definitions are extracted and stored. During inline parsing, reference links (`[text][label]` and `[text]`) look up these stored definitions.
+
+## Data Structures
+
+### Reference Entry
+
+```c
+typedef struct cmark_reference {
+ struct cmark_reference *next; // Unused — leftover from old linked-list design
+ unsigned char *url;
+ unsigned char *title;
+ unsigned char *label;
+ unsigned int age; // Insertion order (for stable sorting)
+ unsigned int size; // Length of the label string
+} cmark_reference;
+```
+
+Each reference stores:
+- `label` — The normalized reference label (case-folded, whitespace-collapsed)
+- `url` — The destination URL
+- `title` — Optional title string (may be NULL)
+- `age` — Monotonically increasing counter for insertion order
+- `size` — Byte length of the label
+
+### Reference Map
+
+```c
+struct cmark_reference_map {
+ cmark_mem *mem;
+ cmark_reference **refs; // Sorted array of reference pointers
+ unsigned int size; // Number of entries
+ unsigned int ref_size; // Cumulative size of all labels + URLs + titles
+ unsigned int max_ref_size; // Maximum allowed ref_size (anti-DoS limit)
+ cmark_reference *last; // Most recently added reference
+ unsigned int asize; // Allocated capacity of refs array
+};
+```
+
+The map uses a **sorted array with binary search** for lookup, not a hash table. This gives O(log n) lookup and O(n) insertion with shifting.
+
+### Anti-DoS Limiting
+
+The `ref_size` and `max_ref_size` fields prevent pathological inputs from causing excessive memory usage:
+
+```c
+unsigned int max_ref_size; // Set to 100 * input length at parser init
+unsigned int ref_size; // Sum of all label + url + title lengths
+```
+
+When `ref_size` exceeds `max_ref_size`, new reference additions are silently rejected. This prevents quadratic memory blowup from inputs with many reference definitions.
+
+## Label Normalization
+
+```c
+static unsigned char *normalize_reference(cmark_mem *mem,
+ cmark_chunk *ref) {
+ cmark_strbuf normalized = CMARK_BUF_INIT(mem);
+
+ if (ref == NULL) return NULL;
+
+ if (ref->len == 0) return NULL;
+
+ cmark_utf8proc_case_fold(&normalized, ref->data, ref->len);
+ cmark_strbuf_trim(&normalized);
+ cmark_strbuf_normalize_whitespace(&normalized);
+
+ return cmark_strbuf_detach(&normalized);
+}
+```
+
+The normalization process (per CommonMark spec):
+1. **Case fold** — Uses Unicode case folding (not simple lowercasing), via `cmark_utf8proc_case_fold()`
+2. **Trim** — Remove leading and trailing whitespace
+3. **Collapse whitespace** — Replace runs of whitespace with a single space
+
+This means `[Foo Bar]`, `[FOO BAR]`, and `[foo bar]` all normalize to the same label.
+
+## Reference Creation
+
+```c
+static void cmark_reference_create(cmark_reference_map *map,
+ cmark_chunk *label,
+ cmark_chunk *url,
+ cmark_chunk *title) {
+ cmark_reference *ref;
+ unsigned char *reflabel = normalize_reference(map->mem, label);
+
+ if (reflabel == NULL) return;
+
+ // Anti-DoS: check cumulative size limit
+ if (map->ref_size > map->max_ref_size) {
+ map->mem->free(reflabel);
+ return;
+ }
+
+ ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
+ ref->label = reflabel;
+ ref->url = cmark_clean_url(map->mem, url);
+ ref->title = cmark_clean_title(map->mem, title);
+ ref->age = map->size;
+ ref->size = (unsigned int)strlen((char *)reflabel);
+
+ // Track cumulative size
+ map->ref_size += ref->size;
+ if (ref->url) map->ref_size += (unsigned int)strlen((char *)ref->url);
+ if (ref->title) map->ref_size += (unsigned int)strlen((char *)ref->title);
+
+ // Add to array
+ if (map->size >= map->asize) {
+ // Grow array (double capacity)
+ map->asize = map->asize ? 2 * map->asize : 8;
+ map->refs = (cmark_reference **)map->mem->realloc(
+ map->refs, map->asize * sizeof(cmark_reference *));
+ }
+ map->refs[map->size] = ref;
+ map->size++;
+ map->last = ref;
+}
+```
+
+References are appended to the array in insertion order. The array is NOT kept sorted during insertion — it's sorted once at lookup time (lazily).
+
+## Reference Lookup
+
+```c
+cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
+ cmark_chunk *label) {
+ if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) return NULL;
+ if (map == NULL || map->size == 0) return NULL;
+
+ unsigned char *norm = normalize_reference(map->mem, label);
+ if (norm == NULL) return NULL;
+
+ // Sort on first lookup
+ if (!map->sorted) {
+ qsort(map->refs, map->size, sizeof(cmark_reference *), refcmp);
+ // Remove duplicates (keep first occurrence)
+ // ...
+ map->sorted = true;
+ }
+
+ // Binary search
+ cmark_reference **found = (cmark_reference **)bsearch(
+ &norm, map->refs, map->size, sizeof(cmark_reference *), refcmp);
+
+ map->mem->free(norm);
+ return found ? *found : NULL;
+}
+```
+
+### Lazy Sorting
+
+The reference map is NOT sorted during insertion. On the first call to `cmark_reference_lookup()`, the array is sorted using `qsort()` with a comparison function:
+
+```c
+static int refcmp(const void *a, const void *b) {
+ const cmark_reference *refa = *(const cmark_reference **)a;
+ const cmark_reference *refb = *(const cmark_reference **)b;
+ int cmp = strcmp((char *)refa->label, (char *)refb->label);
+ if (cmp != 0) return cmp;
+ // Tie-break by age (earlier wins)
+ if (refa->age < refb->age) return -1;
+ if (refa->age > refb->age) return 1;
+ return 0;
+}
+```
+
+When labels collide (same normalized label), the first definition wins (lowest `age`).
+
+After sorting, duplicates are removed — entries with the same label as the preceding entry are freed:
+```c
+unsigned int write = 0;
+for (unsigned int read = 0; read < map->size; read++) {
+ if (write > 0 &&
+ strcmp((char *)map->refs[write-1]->label,
+ (char *)map->refs[read]->label) == 0) {
+ // Duplicate — free it
+ cmark_reference_free(map->mem, map->refs[read]);
+ } else {
+ map->refs[write++] = map->refs[read];
+ }
+}
+map->size = write;
+```
+
+### Binary Search
+
+After sorting and deduplication, lookups use standard `bsearch()`, giving O(log n) lookup time.
+
+## URL and Title Cleaning
+
+When creating references, URLs and titles are cleaned:
+
+### `cmark_clean_url()`
+```c
+unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
+```
+- Removes surrounding `<` and `>` if present (angle-bracket URLs)
+- Unescapes backslash escapes
+- Decodes entity references
+- Percent-encodes non-URL-safe characters via `houdini_escape_href()`
+
+### `cmark_clean_title()`
+```c
+unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
+```
+- Strips the first and last character (the delimiter: `"`, `'`, or `(`)
+- Unescapes backslash escapes
+- Decodes entity references
+
+## Integration with Parser
+
+### Extraction during Block Parsing
+
+Reference definitions are extracted when paragraphs are finalized:
+
+```c
+// In blocks.c, during paragraph finalization:
+while (cmark_parse_reference_inline(parser->mem, &node_content,
+ parser->refmap)) {
+ // Keep parsing references from the start of the paragraph
+}
+```
+
+### `cmark_parse_reference_inline()`
+
+```c
+int cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
+ cmark_reference_map *refmap) {
+ // Parse: [label]: destination "title"
+ // Returns 1 if a reference was found and consumed, 0 otherwise
+ subject subj;
+ // ... initialize subject on the input buffer
+ // Parse label
+ cmark_chunk lab = cmark_chunk_literal("");
+ cmark_chunk url = cmark_chunk_literal("");
+ cmark_chunk title = cmark_chunk_literal("");
+
+ if (!link_label(&subj, &lab) || lab.len == 0) return 0;
+ if (peek_char(&subj) != ':') return 0;
+ advance(&subj);
+ spnl(&subj); // skip spaces and up to one newline
+ if (!manual_scan_link_url(&subj, &url)) return 0;
+ // ... parse optional title
+ // ... validate: rest of line must be blank
+ cmark_reference_create(refmap, &lab, &url, &title);
+ // Remove consumed bytes from input
+ return 1;
+}
+```
+
+The parser repeatedly calls this function on paragraph content. Each successful parse removes the reference definition from the paragraph. If the entire paragraph consists of reference definitions, the paragraph node is removed from the AST.
+
+### Lookup during Inline Parsing
+
+In `inlines.c`, when a potential reference link is found:
+
+```c
+cmark_reference *ref = cmark_reference_lookup(subj->refmap, &raw_label);
+if (ref) {
+ // Create link node with ref->url and ref->title
+}
+```
+
+## Label Length Limit
+
+```c
+#define MAX_LINK_LABEL_LENGTH 999
+```
+
+Reference labels longer than 999 characters are rejected, per the CommonMark spec.
+
+## Map Lifecycle
+
+```c
+cmark_reference_map *cmark_reference_map_new(cmark_mem *mem);
+void cmark_reference_map_free(cmark_reference_map *map);
+```
+
+The map is created during parser initialization and freed when the parser is freed. The AST's reference links have already been resolved and store their own copies of URL and title — the reference map is not needed after parsing.
+
+### Cleanup
+
+```c
+void cmark_reference_map_free(cmark_reference_map *map) {
+ if (map == NULL) return;
+ for (unsigned int i = 0; i < map->size; i++) {
+ cmark_reference_free(map->mem, map->refs[i]);
+ }
+ map->mem->free(map->refs);
+ map->mem->free(map);
+}
+```
+
+Each reference and its strings (label, url, title) are freed, then the array and map struct are freed.
+
+## Cross-References
+
+- [references.c](../../cmark/src/references.c) — Implementation
+- [references.h](../../cmark/src/references.h) — Data structures
+- [block-parsing.md](block-parsing.md) — Reference extraction during paragraph finalization
+- [inline-parsing.md](inline-parsing.md) — Reference lookup during link resolution
+- [utf8-handling.md](utf8-handling.md) — Case folding used in label normalization
diff --git a/docs/handbook/cmark/render-framework.md b/docs/handbook/cmark/render-framework.md
new file mode 100644
index 0000000000..065b9c878f
--- /dev/null
+++ b/docs/handbook/cmark/render-framework.md
@@ -0,0 +1,294 @@
+# cmark — Render Framework
+
+## Overview
+
+The render framework (`render.c`, `render.h`) provides a generic rendering infrastructure used by three of the five renderers: LaTeX, man, and CommonMark. It handles line wrapping, prefix management, and character-level output dispatch. The HTML and XML renderers bypass this framework and write directly to buffers.
+
+## The `cmark_renderer` Structure
+
+```c
+struct cmark_renderer {
+ cmark_mem *mem;
+ cmark_strbuf *buffer; // Output buffer
+ cmark_strbuf *prefix; // Current line prefix (e.g., "> " for blockquotes)
+ int column; // Current column position (for wrapping)
+ int width; // Target width (0 = no wrapping)
+ int need_cr; // Pending newlines count
+ bufsize_t last_breakable; // Position of last breakable point in buffer
+ bool begin_line; // True if at the start of a line
+ bool begin_content; // True if no content has been output on current line (after prefix)
+ bool no_linebreaks; // Suppress newlines (for rendering within attributes)
+ bool in_tight_list_item; // Currently inside a tight list item
+ void (*outc)(cmark_renderer *, cmark_escaping, int32_t, unsigned char);
+ // Per-character output callback
+ int32_t (*render_node)(cmark_renderer *, cmark_node *, cmark_event_type, int);
+ // Per-node render callback
+};
+```
+
+### Key Fields
+
+- **`column`** — Tracks horizontal position for word-wrap decisions.
+- **`width`** — If > 0, enables automatic line wrapping at word boundaries.
+- **`prefix`** — Accumulated prefix string. For nested block quotes and list items, prefixes stack (e.g., `"> - "` for a list item inside a block quote).
+- **`last_breakable`** — Buffer position of the last whitespace where a line break could be inserted. Used for retroactive line wrapping.
+- **`begin_line`** — True immediately after a newline. Used by renderers to decide whether to escape line-start characters.
+- **`begin_content`** — True until the first non-prefix content on a line. Distinguished from `begin_line` because the prefix itself isn't "content".
+- **`no_linebreaks`** — When true, newlines are converted to spaces. Used when rendering content inside constructs that can't contain literal newlines.
+
+## Entry Point
+
+```c
+char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width,
+ void (*outc)(cmark_renderer *, cmark_escaping, int32_t, unsigned char),
+ int32_t (*render_node)(cmark_renderer *, cmark_node *,
+ cmark_event_type, int)) {
+ cmark_renderer renderer = {
+ mem,
+ &buf, // buffer
+ &pref, // prefix
+ 0, // column
+ width, // width
+ 0, // need_cr
+ 0, // last_breakable
+ true, // begin_line
+ true, // begin_content
+ false, // no_linebreaks
+ false, // in_tight_list_item
+ outc, // outc
+ render_node // render_node
+ };
+ // ... iterate AST, call render_node for each event
+ return (char *)cmark_strbuf_detach(&buf);
+}
+```
+
+The framework creates a `cmark_renderer`, iterates over the AST using `cmark_iter`, and calls the provided `render_node` function for each event. The `outc` callback handles per-character output with escaping decisions.
+
+## Escaping Modes
+
+```c
+typedef enum {
+ LITERAL, // No escaping — output characters as-is
+ NORMAL, // Full escaping for prose text
+ TITLE, // Escaping for link titles
+ URL, // Escaping for URLs
+} cmark_escaping;
+```
+
+Each renderer's `outc` function switches on this enum to determine how to handle special characters.
+
+## Output Functions
+
+### `cmark_render_code_point()`
+
+```c
+void cmark_render_code_point(cmark_renderer *renderer, int32_t c) {
+ cmark_utf8proc_encode_char(c, renderer->buffer);
+ renderer->column += 1;
+}
+```
+
+Low-level: encodes a single Unicode codepoint as UTF-8 into the buffer and advances the column counter.
+
+### `cmark_render_ascii()`
+
+```c
+void cmark_render_ascii(cmark_renderer *renderer, const char *s) {
+ int len = (int)strlen(s);
+ cmark_strbuf_puts(renderer->buffer, s);
+ renderer->column += len;
+}
+```
+
+Outputs an ASCII string and advances the column counter. Used for fixed escape sequences like `\&`, `\textbf{`, etc.
+
+### `S_out()` — Main Output Dispatcher
+
+```c
+static CMARK_INLINE void S_out(cmark_renderer *renderer, const char *source,
+ bool wrap, cmark_escaping escape) {
+ int length = (int)strlen(source);
+ unsigned char nextc;
+ int32_t c;
+ int i = 0;
+ int len;
+ cmark_chunk remainder = cmark_chunk_literal("");
+ int k = renderer->buffer->size - 1;
+
+ wrap = wrap && !renderer->no_linebreaks;
+
+ if (renderer->need_cr) {
+ // Output pending newlines
+ while (renderer->need_cr > 0) {
+ S_cr(renderer);
+ renderer->need_cr--;
+ }
+ }
+
+ while (i < length) {
+ if (renderer->begin_line) {
+ // Output prefix at start of each line
+ cmark_strbuf_puts(renderer->buffer, (char *)renderer->prefix->ptr);
+ renderer->column = renderer->prefix->size;
+ renderer->begin_line = false;
+ renderer->begin_content = true;
+ }
+
+ len = cmark_utf8proc_charlen((uint8_t *)source + i, length - i);
+ if (len == -1) { // Invalid UTF-8
+ // ... handle error
+ }
+
+ cmark_utf8proc_iterate((uint8_t *)source + i, len, &c);
+
+ if (c == 10) {
+ // Newline
+ cmark_strbuf_putc(renderer->buffer, '\n');
+ renderer->column = 0;
+ renderer->begin_line = true;
+ renderer->begin_content = true;
+ renderer->last_breakable = 0;
+ } else if (wrap) {
+ if (c == 32 && renderer->column > renderer->width / 2) {
+ // Space past half-width — mark as potential break point
+ renderer->last_breakable = renderer->buffer->size;
+ cmark_render_code_point(renderer, c);
+ } else if (renderer->column > renderer->width &&
+ renderer->last_breakable > 0) {
+ // Past target width with a break point — retroactively break
+ // Replace the space at last_breakable with newline + prefix
+ // ...
+ } else {
+ renderer->outc(renderer, escape, c, nextc);
+ }
+ } else {
+ renderer->outc(renderer, escape, c, nextc);
+ }
+
+ if (c != 10) {
+ renderer->begin_content = false;
+ }
+ i += len;
+ }
+}
+```
+
+This is the core output function. It:
+1. Handles deferred newlines (`need_cr`)
+2. Outputs line prefixes at the start of each line
+3. Tracks column position
+4. Implements word wrapping via retroactive line breaks
+5. Delegates character-level escaping to `renderer->outc()`
+
+### Line Wrapping Algorithm
+
+The wrapping algorithm uses a **retroactive break** strategy:
+
+1. As text flows through `S_out()`, spaces past the half-width mark are recorded as potential break points (`last_breakable`).
+2. When the column exceeds `width`, the buffer is split at `last_breakable`:
+ - Everything after the break point is saved in `remainder`
+ - A newline and the current prefix are inserted at the break point
+ - The remainder is reappended
+
+This avoids forward-looking: the renderer doesn't need to know the length of upcoming content to decide where to break.
+
+```c
+// Retroactive line break:
+remainder = cmark_chunk_dup(&renderer->buffer->..., last_breakable, ...);
+cmark_strbuf_truncate(renderer->buffer, last_breakable);
+cmark_strbuf_putc(renderer->buffer, '\n');
+cmark_strbuf_puts(renderer->buffer, (char *)renderer->prefix->ptr);
+cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len);
+renderer->column = renderer->prefix->size + cmark_chunk_len(&remainder);
+renderer->last_breakable = 0;
+renderer->begin_line = false;
+renderer->begin_content = false;
+```
+
+## Convenience Functions
+
+### `CR()`
+
+```c
+#define CR() renderer->need_cr = 1
+```
+
+Requests a newline before the next content output. Multiple `CR()` calls don't stack — only one newline is inserted.
+
+### `BLANKLINE()`
+
+```c
+#define BLANKLINE() renderer->need_cr = 2
+```
+
+Requests a blank line (two newlines) before the next content output.
+
+### `OUT()`
+
+```c
+#define OUT(s, wrap, escaping) (S_out(renderer, s, wrap, escaping))
+```
+
+### `LIT()`
+
+```c
+#define LIT(s) (S_out(renderer, s, false, LITERAL))
+```
+
+Output literal text (no escaping, no wrapping).
+
+### `NOBREAKS()`
+
+```c
+#define NOBREAKS(s) \
+ do { renderer->no_linebreaks = true; OUT(s, false, NORMAL); renderer->no_linebreaks = false; } while(0)
+```
+
+Output text with normal escaping but with newlines suppressed (converted to spaces).
+
+## Prefix Management
+
+Prefixes are used for block-level indentation. The renderer maintains a `cmark_strbuf` prefix that is output at the start of each line.
+
+### Usage Pattern
+
+```c
+// In commonmark.c, entering a block quote:
+cmark_strbuf_puts(renderer->prefix, "> ");
+// ... render children ...
+// On exit:
+cmark_strbuf_truncate(renderer->prefix, original_prefix_len);
+```
+
+Renderers save the prefix length before modifying it and restore it on exit. This creates a stack-like behavior for nested containers.
+
+## Framework vs Direct Rendering
+
+| Feature | Framework (render.c) | Direct (html.c, xml.c) |
+|---------|---------------------|----------------------|
+| Line wrapping | Yes (`width` parameter) | No |
+| Prefix management | Yes (automatic) | No (uses HTML tags) |
+| Per-char escaping | Via `outc` callback | Via `escape_html()` helper |
+| Column tracking | Yes | No |
+| Break points | Retroactive insertion | N/A |
+| `cmark_escaping` enum | Yes | No |
+
+## Which Renderers Use the Framework
+
+| Renderer | Uses Framework | Why/Why Not |
+|----------|---------------|-------------|
+| LaTeX (`latex.c`) | Yes | Needs wrapping for structured text |
+| man (`man.c`) | Yes | Needs wrapping for terminal display |
+| CommonMark (`commonmark.c`) | Yes | Needs wrapping and prefix management |
+| HTML (`html.c`) | No | HTML handles layout via browser |
+| XML (`xml.c`) | No | XML output is structural, not visual |
+
+## Cross-References
+
+- [render.c](../../cmark/src/render.c) — Framework implementation
+- [render.h](../../cmark/src/render.h) — `cmark_renderer` struct and `cmark_escaping` enum
+- [latex-renderer.md](latex-renderer.md) — LaTeX `outc` and `S_render_node`
+- [man-renderer.md](man-renderer.md) — Man `S_outc` and `S_render_node`
+- [commonmark-renderer.md](commonmark-renderer.md) — CommonMark `outc` and `S_render_node`
+- [html-renderer.md](html-renderer.md) — Direct renderer (no framework)
diff --git a/docs/handbook/cmark/scanner-system.md b/docs/handbook/cmark/scanner-system.md
new file mode 100644
index 0000000000..79adf03798
--- /dev/null
+++ b/docs/handbook/cmark/scanner-system.md
@@ -0,0 +1,223 @@
+# cmark — Scanner System
+
+## Overview
+
+The scanner system (`scanners.h`, `scanners.re`, `scanners.c`) provides fast pattern-matching functions used throughout cmark's block and inline parsers. The scanners are generated from re2c specifications and compiled into optimized C switch-statement automata. They perform context-free matching only (no backtracking, no captures beyond match length).
+
+## Architecture
+
+### Source Files
+
+- `scanners.re` — re2c source with pattern specifications
+- `scanners.c` — Generated C code (committed to the repository, regenerated manually)
+- `scanners.h` — Public declarations (macro wrappers and function prototypes)
+
+### Generation
+
+Scanners are regenerated from re2c source via:
+```bash
+re2c --case-insensitive -b -i --no-generation-date --8bit -o scanners.c scanners.re
+```
+
+Flags:
+- `--case-insensitive` — Case-insensitive matching
+- `-b` — Use bit vectors for character classes
+- `-i` — Use `if` statements instead of `switch`
+- `--no-generation-date` — Reproducible output
+- `--8bit` — 8-bit character width
+
+The generated code consists of state machines implemented as nested `switch`/`if` blocks with direct character comparisons. There are no regular expression structs, no DFA tables — the patterns are compiled directly into C control flow.
+
+## Scanner Interface
+
+### The `_scan_at` Wrapper
+
+```c
+#define _scan_at(scanner, s, p) scanner(s->input.data, s->input.len, p)
+```
+
+All scanner functions share the signature:
+```c
+bufsize_t scan_PATTERN(const unsigned char *s, bufsize_t len, bufsize_t offset);
+```
+
+Parameters:
+- `s` — Input byte string
+- `len` — Total length of `s`
+- `offset` — Starting position within `s`
+
+Return value:
+- Length of the match (in bytes) if successful
+- `0` if no match at the given position
+
+### Common Pattern
+
+```c
+// In blocks.c:
+matched = _scan_at(&scan_thematic_break, &input, first_nonspace);
+
+// In inlines.c:
+matched = _scan_at(&scan_autolink_uri, subj, subj->pos);
+```
+
+## Scanner Functions
+
+### Block Structure Scanners
+
+| Scanner | Purpose | Used In |
+|---------|---------|---------|
+| `scan_thematic_break` | Matches `***`, `---`, `___` (with optional spaces) | `blocks.c` |
+| `scan_atx_heading_start` | Matches `#{1,6}` followed by space or EOL | `blocks.c` |
+| `scan_setext_heading_line` | Matches `=+` or `-+` at line start | `blocks.c` |
+| `scan_open_code_fence` | Matches `` ``` `` or `~~~` (3+ fence chars) | `blocks.c` |
+| `scan_close_code_fence` | Matches closing fence (≥ opening length) | `blocks.c` |
+| `scan_html_block_start` | Matches HTML block type 1-5 openers | `blocks.c` |
+| `scan_html_block_start_7` | Matches HTML block type 6-7 openers | `blocks.c` |
+| `scan_html_block_end_1` | Matches `</script>`, `</pre>`, `</style>` | `blocks.c` |
+| `scan_html_block_end_2` | Matches `-->` | `blocks.c` |
+| `scan_html_block_end_3` | Matches `?>` | `blocks.c` |
+| `scan_html_block_end_4` | Matches `>` | `blocks.c` |
+| `scan_html_block_end_5` | Matches `]]>` | `blocks.c` |
+| `scan_link_title` | Matches `"..."`, `'...'`, or `(...)` titles | `inlines.c` |
+
+### Inline Scanners
+
+| Scanner | Purpose | Used In |
+|---------|---------|---------|
+| `scan_autolink_uri` | Matches URI autolinks `<scheme:path>` | `inlines.c` |
+| `scan_autolink_email` | Matches email autolinks `<user@host>` | `inlines.c` |
+| `scan_html_tag` | Matches inline HTML tags (open, close, comment, PI, CDATA, declaration) | `inlines.c` |
+| `scan_entity` | Matches HTML entities (`&amp;`, `&#123;`, `&#x1F;`) | `inlines.c` |
+| `scan_dangerous_url` | Matches `javascript:`, `vbscript:`, `file:`, `data:` URLs | `html.c` |
+| `scan_spacechars` | Matches runs of spaces and tabs | `inlines.c` |
+
+### Link/Reference Scanners
+
+| Scanner | Purpose | Used In |
+|---------|---------|---------|
+| `scan_link_url` | Matches link destinations (parenthesized or bare) | `inlines.c` |
+| `scan_link_title` | Matches quoted link titles | `inlines.c` |
+
+## Scanner Patterns (from `scanners.re`)
+
+### Thematic Break
+```
+thematic_break = (('*' [ \t]*){3,} | ('-' [ \t]*){3,} | ('_' [ \t]*){3,}) [ \t]* [\n]
+```
+Three or more `*`, `-`, or `_` characters, optionally separated by spaces/tabs.
+
+### ATX Heading
+```
+atx_heading_start = '#{1,6}' ([ \t]+ | [\n])
+```
+1-6 `#` characters followed by space/tab or newline.
+
+### Code Fence
+```
+open_code_fence = '`{3,}' [^`\n]* [\n] | '~{3,}' [^\n]* [\n]
+```
+Three or more backticks (not followed by backtick in info string) or three or more tildes.
+
+### HTML Block Start (Types 1-7)
+
+The CommonMark spec defines 7 types of HTML blocks, each matched by different scanners:
+
+1. `<script>`, `<pre>`, `<style>` (case-insensitive)
+2. `<!--`
+3. `<?`
+4. `<!` followed by uppercase letter (declaration)
+5. `<![CDATA[`
+6. HTML tags from a specific set (e.g., `<div>`, `<table>`, `<h1>`, etc.)
+7. Complete open/close tags (not `<script>`, `<pre>`, `<style>`)
+
+### Autolink URI
+```
+autolink_uri = '<' scheme ':' [^\x00-\x20<>]* '>'
+scheme = [A-Za-z][A-Za-z0-9+.\-]{1,31}
+```
+
+### Autolink Email
+```
+autolink_email = '<' [A-Za-z0-9.!#$%&'*+/=?^_`{|}~-]+ '@'
+ [A-Za-z0-9]([A-Za-z0-9-]{0,61}[A-Za-z0-9])?
+ ('.' [A-Za-z0-9]([A-Za-z0-9-]{0,61}[A-Za-z0-9])?)* '>'
+```
+
+### HTML Entity
+```
+entity = '&' ('#' ('x'|'X') [0-9a-fA-F]{1,6} | '#' [0-9]{1,7} | [A-Za-z][A-Za-z0-9]{1,31}) ';'
+```
+
+### Dangerous URL
+```
+dangerous_url = ('javascript' | 'vbscript' | 'file' | 'data'
+ (not followed by image MIME types)) ':'
+```
+
+Data URLs are allowed if followed by `image/png`, `image/gif`, `image/jpeg`, or `image/webp`.
+
+### HTML Tag
+```
+html_tag = open_tag | close_tag | html_comment | processing_instruction | declaration | cdata
+open_tag = '<' tag_name attribute* '/' ? '>'
+close_tag = '</' tag_name [ \t]* '>'
+html_comment = '<!--' ...
+processing_instruction = '<?' ...
+declaration = '<!' [A-Z]+ ...
+cdata = '<![CDATA[' ...
+```
+
+## Generated Code Structure
+
+The generated `scanners.c` contains functions like:
+
+```c
+bufsize_t _scan_thematic_break(const unsigned char *p, bufsize_t len,
+ bufsize_t offset) {
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p + offset;
+ // ... re2c-generated state machine
+ // Returns (bufsize_t)(p - start) on match, 0 on failure
+}
+```
+
+Each function is a self-contained state machine that:
+1. Starts at `p + offset`
+2. Walks forward byte-by-byte through the pattern
+3. Returns the match length or 0
+
+The generated code is typically hundreds of lines per scanner function, with deeply nested `if`/`switch` chains for the character transitions.
+
+## Performance Characteristics
+
+- **O(n)** in the length of the match — each scanner reads input exactly once
+- **No backtracking** — re2c generates DFA-based scanners
+- **No allocation** — scanners work on existing buffers, no heap allocation
+- **Branch prediction friendly** — the common case (no match) typically hits the first branch
+
+## Usage Example
+
+A typical block-parsing sequence using scanners:
+
+```c
+// Check if line starts a thematic break
+if (!indented &&
+ (input.data[first_nonspace] == '*' ||
+ input.data[first_nonspace] == '-' ||
+ input.data[first_nonspace] == '_')) {
+ matched = _scan_at(&scan_thematic_break, &input, first_nonspace);
+ if (matched) {
+ // Create thematic break node
+ }
+}
+```
+
+The manual character check before calling the scanner is an optimization — it avoids the function call overhead when the first character can't possibly start the pattern.
+
+## Cross-References
+
+- [scanners.h](../../cmark/src/scanners.h) — Scanner declarations and `_scan_at` macro
+- [scanners.re](../../cmark/src/scanners.re) — re2c source (if available)
+- [block-parsing.md](block-parsing.md) — Block-level scanner usage
+- [inline-parsing.md](inline-parsing.md) — Inline scanner usage
+- [html-renderer.md](html-renderer.md) — `scan_dangerous_url()` for URL safety
diff --git a/docs/handbook/cmark/testing.md b/docs/handbook/cmark/testing.md
new file mode 100644
index 0000000000..8797629e48
--- /dev/null
+++ b/docs/handbook/cmark/testing.md
@@ -0,0 +1,281 @@
+# cmark — Testing
+
+## Overview
+
+cmark has a multi-layered testing infrastructure: C API unit tests, spec conformance tests, pathological input tests, fuzz testing, and memory sanitizers. The build system integrates all of these through CMake and CTest.
+
+## Test Infrastructure (CMakeLists.txt)
+
+### API Tests
+
+```cmake
+add_executable(api_test api_test/main.c)
+target_link_libraries(api_test libcmark_static)
+add_test(NAME api_test COMMAND api_test)
+```
+
+The API test executable links against the static library and tests the public C API directly.
+
+### Spec Tests
+
+```cmake
+add_test(NAME spec_test
+ COMMAND ${PYTHON_EXECUTABLE} test/spec_tests.py
+ --spec test/spec.txt
+ --program $<TARGET_FILE:cmark>)
+```
+
+Spec tests run the `cmark` binary against the CommonMark specification. The Python script `spec_tests.py` parses the spec file, extracts input/output examples, runs `cmark` on each input, and compares the output.
+
+### Pathological Tests
+
+```cmake
+add_test(NAME pathological_test
+ COMMAND ${PYTHON_EXECUTABLE} test/pathological_tests.py
+ --program $<TARGET_FILE:cmark>)
+```
+
+These tests verify that cmark handles pathological inputs (deeply nested structures, long runs of special characters) without excessive time or memory usage.
+
+### Smart Punctuation Tests
+
+```cmake
+add_test(NAME smart_punct_test
+ COMMAND ${PYTHON_EXECUTABLE} test/spec_tests.py
+ --spec test/smart_punct.txt
+ --program $<TARGET_FILE:cmark>
+ --extensions "")
+```
+
+Tests for the `CMARK_OPT_SMART` option (curly quotes, em/en dashes, ellipses).
+
+### Roundtrip Tests
+
+```cmake
+add_test(NAME roundtrip_test
+ COMMAND ${PYTHON_EXECUTABLE} test/roundtrip_tests.py
+ --spec test/spec.txt
+ --program $<TARGET_FILE:cmark>)
+```
+
+Roundtrip tests verify that `cmark -t commonmark | cmark -t html` produces the same HTML as direct `cmark -t html`.
+
+### Entity Tests
+
+```cmake
+add_test(NAME entity_test
+ COMMAND ${PYTHON_EXECUTABLE} test/spec_tests.py
+ --spec test/entity.txt
+ --program $<TARGET_FILE:cmark>)
+```
+
+Tests HTML entity handling.
+
+### Regression Tests
+
+```cmake
+add_test(NAME regression_test
+ COMMAND ${PYTHON_EXECUTABLE} test/spec_tests.py
+ --spec test/regression.txt
+ --program $<TARGET_FILE:cmark>)
+```
+
+Regression tests cover previously discovered bugs.
+
+## API Test (`api_test/main.c`)
+
+The API test file is a single C source file with test functions covering every public API function. Test patterns used:
+
+### Test Macros
+
+```c
+#define OK(test, msg) \
+ if (test) { passes++; } \
+ else { failures++; fprintf(stderr, "FAIL: %s\n %s\n", __func__, msg); }
+
+#define INT_EQ(actual, expected, msg) \
+ if ((actual) == (expected)) { passes++; } \
+ else { failures++; fprintf(stderr, "FAIL: %s\n Expected %d got %d: %s\n", \
+ __func__, expected, actual, msg); }
+
+#define STR_EQ(actual, expected, msg) \
+ if (strcmp(actual, expected) == 0) { passes++; } \
+ else { failures++; fprintf(stderr, "FAIL: %s\n Expected \"%s\" got \"%s\": %s\n", \
+ __func__, expected, actual, msg); }
+```
+
+### Test Categories
+
+1. **Version tests**: Verify `cmark_version()` and `cmark_version_string()` return correct values
+2. **Constructor tests**: `cmark_node_new()` for each node type
+3. **Accessor tests**: Get/set for heading level, list type, list tight, content, etc.
+4. **Tree manipulation tests**: `cmark_node_append_child()`, `cmark_node_insert_before()`, etc.
+5. **Parser tests**: `cmark_parse_document()`, streaming `cmark_parser_feed()` + `cmark_parser_finish()`
+6. **Renderer tests**: Verify HTML, XML, man, LaTeX, CommonMark output for known inputs
+7. **Iterator tests**: `cmark_iter_new()`, traversal order, `cmark_iter_reset()`
+8. **Memory tests**: Custom allocator, `cmark_node_free()`, no leaks
+
+### Example Test Function
+
+```c
+static void test_md_to_html(const char *markdown, const char *expected_html,
+ const char *msg) {
+ char *html = cmark_markdown_to_html(markdown, strlen(markdown),
+ CMARK_OPT_DEFAULT);
+ STR_EQ(html, expected_html, msg);
+ free(html);
+}
+```
+
+## Spec Test Format
+
+The spec file (`test/spec.txt`) uses a specific format:
+
+```
+```````````````````````````````` example
+Markdown input here
+.
+<p>Expected HTML output here</p>
+````````````````````````````````
+```
+
+Each example is delimited by `example` markers. The `.` on a line by itself separates input from expected output.
+
+The Python test runner (`test/spec_tests.py`):
+1. Parses the spec file to extract examples
+2. For each example, runs the `cmark` binary with the input
+3. Compares the actual output with the expected output
+4. Reports pass/fail for each example
+
+## Pathological Input Tests
+
+The pathological test file (`test/pathological_tests.py`) generates adversarial inputs designed to trigger worst-case behavior:
+
+- Deeply nested block quotes (`> > > > > ...`)
+- Deeply nested lists
+- Long runs of backticks
+- Many consecutive closing brackets `]]]]]...]`
+- Long emphasis delimiter runs `***...***`
+- Repeated link definitions
+
+Each test verifies that cmark completes within a reasonable time bound (not quadratic or exponential).
+
+## Fuzzing
+
+### LibFuzzer
+
+```cmake
+if(CMARK_LIB_FUZZER)
+ add_executable(cmark_fuzz fuzz/cmark_fuzz.c)
+ target_link_libraries(cmark_fuzz libcmark_static)
+ target_compile_options(cmark_fuzz PRIVATE -fsanitize=fuzzer)
+ target_link_options(cmark_fuzz PRIVATE -fsanitize=fuzzer)
+endif()
+```
+
+The fuzzer entry point (`fuzz/cmark_fuzz.c`) implements:
+```c
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ // Parse data as CommonMark
+ // Render to all formats
+ // Free everything
+ // Return 0
+}
+```
+
+This subjects all parsers and renderers to random input.
+
+### Building with Fuzzing
+
+```bash
+cmake -DCMARK_LIB_FUZZER=ON \
+ -DCMAKE_C_COMPILER=clang \
+ -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link,address" \
+ ..
+make
+./cmark_fuzz corpus/
+```
+
+## Sanitizer Builds
+
+### Address Sanitizer (ASan)
+
+```bash
+cmake -DCMAKE_BUILD_TYPE=Asan ..
+```
+
+Sets flags: `-fsanitize=address -fno-omit-frame-pointer`
+
+Detects:
+- Buffer overflows (stack, heap, global)
+- Use-after-free
+- Double-free
+- Memory leaks (LSAN)
+
+### Undefined Behavior Sanitizer (UBSan)
+
+```bash
+cmake -DCMAKE_BUILD_TYPE=Ubsan ..
+```
+
+Sets flags: `-fsanitize=undefined`
+
+Detects:
+- Signed integer overflow
+- Null pointer dereference
+- Misaligned access
+- Invalid shift
+- Out-of-bounds array access
+
+## Running Tests
+
+### Full Test Suite
+
+```bash
+mkdir build && cd build
+cmake ..
+make
+ctest
+```
+
+### Verbose Output
+
+```bash
+ctest --verbose
+```
+
+### Single Test
+
+```bash
+ctest -R api_test
+ctest -R spec_test
+```
+
+### With ASan
+
+```bash
+mkdir build-asan && cd build-asan
+cmake -DCMAKE_BUILD_TYPE=Asan ..
+make
+ctest
+```
+
+## Test Data Files
+
+| File | Purpose |
+|------|---------|
+| `test/spec.txt` | CommonMark specification with examples |
+| `test/smart_punct.txt` | Smart punctuation examples |
+| `test/entity.txt` | HTML entity test cases |
+| `test/regression.txt` | Regression test cases |
+| `test/spec_tests.py` | Spec test runner script |
+| `test/pathological_tests.py` | Pathological input tests |
+| `test/roundtrip_tests.py` | CommonMark roundtrip tests |
+| `api_test/main.c` | C API unit tests |
+| `fuzz/cmark_fuzz.c` | LibFuzzer entry point |
+
+## Cross-References
+
+- [building.md](building.md) — Build configurations including test builds
+- [public-api.md](public-api.md) — API functions tested by `api_test`
+- [cli-usage.md](cli-usage.md) — The `cmark` binary tested by spec tests
diff --git a/docs/handbook/cmark/utf8-handling.md b/docs/handbook/cmark/utf8-handling.md
new file mode 100644
index 0000000000..c5bde6a320
--- /dev/null
+++ b/docs/handbook/cmark/utf8-handling.md
@@ -0,0 +1,340 @@
+# cmark — UTF-8 Handling
+
+## Overview
+
+The UTF-8 module (`utf8.c`, `utf8.h`) provides Unicode support for cmark: encoding, decoding, validation, iteration, case folding, and character classification. It incorporates data from `utf8proc` for case folding and character properties.
+
+## UTF-8 Encoding Fundamentals
+
+The module handles all four UTF-8 byte patterns:
+
+| Codepoint Range | Byte 1 | Byte 2 | Byte 3 | Byte 4 |
+|----------------|--------|--------|--------|--------|
+| U+0000–U+007F | 0xxxxxxx | | | |
+| U+0080–U+07FF | 110xxxxx | 10xxxxxx | | |
+| U+0800–U+FFFF | 1110xxxx | 10xxxxxx | 10xxxxxx | |
+| U+10000–U+10FFFF | 11110xxx | 10xxxxxx | 10xxxxxx | 10xxxxxx |
+
+## Byte Classification Table
+
+```c
+static const uint8_t utf8proc_utf8class[256] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00-0x0F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10-0x1F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20-0x2F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30-0x3F
+ // ...
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80-0x8F (continuation)
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90-0x9F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0-0xAF
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0-0xBF
+ 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xC0-0xCF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xD0-0xDF
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE0-0xEF
+ 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xF0-0xFF
+};
+```
+
+Lookup table that maps each byte to its UTF-8 sequence length:
+- `1` → ASCII single-byte character
+- `2` → Two-byte sequence lead byte (0xC2-0xDF)
+- `3` → Three-byte sequence lead byte (0xE0-0xEF)
+- `4` → Four-byte sequence lead byte (0xF0-0xF4)
+- `0` → Continuation byte (0x80-0xBF) or invalid lead byte (0xC0-0xC1, 0xF5-0xFF)
+
+Note: 0xC0 and 0xC1 are marked as `0` (invalid) because they would encode codepoints < 0x80, which is an overlong encoding.
+
+## UTF-8 Encoding
+
+```c
+void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
+ uint8_t dst[4];
+ bufsize_t len = 0;
+
+ assert(uc >= 0);
+
+ if (uc < 0x80) {
+ dst[0] = (uint8_t)(uc);
+ len = 1;
+ } else if (uc < 0x800) {
+ dst[0] = (uint8_t)(0xC0 + (uc >> 6));
+ dst[1] = 0x80 + (uc & 0x3F);
+ len = 2;
+ } else if (uc == 0xFFFF) {
+ // Invalid codepoint — encode replacement char
+ dst[0] = 0xEF; dst[1] = 0xBF; dst[2] = 0xBD;
+ len = 3;
+ } else if (uc == 0xFFFE) {
+ // Invalid codepoint — encode replacement char
+ dst[0] = 0xEF; dst[1] = 0xBF; dst[2] = 0xBD;
+ len = 3;
+ } else if (uc < 0x10000) {
+ dst[0] = (uint8_t)(0xE0 + (uc >> 12));
+ dst[1] = 0x80 + ((uc >> 6) & 0x3F);
+ dst[2] = 0x80 + (uc & 0x3F);
+ len = 3;
+ } else if (uc < 0x110000) {
+ dst[0] = (uint8_t)(0xF0 + (uc >> 18));
+ dst[1] = 0x80 + ((uc >> 12) & 0x3F);
+ dst[2] = 0x80 + ((uc >> 6) & 0x3F);
+ dst[3] = 0x80 + (uc & 0x3F);
+ len = 4;
+ } else {
+ // Out of range — encode replacement char U+FFFD
+ dst[0] = 0xEF; dst[1] = 0xBF; dst[2] = 0xBD;
+ len = 3;
+ }
+
+ cmark_strbuf_put(buf, dst, len);
+}
+```
+
+Encodes a single Unicode codepoint as UTF-8 into a `cmark_strbuf`. Invalid codepoints (U+FFFE, U+FFFF, > U+10FFFF) are replaced with U+FFFD (replacement character).
+
+## UTF-8 Validation and Iteration
+
+```c
+void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
+ bufsize_t size) {
+ bufsize_t i = 0;
+
+ while (i < size) {
+ bufsize_t byte_length = utf8proc_utf8class[line[i]];
+ int32_t codepoint = -1;
+
+ if (byte_length == 0) {
+ // Invalid lead byte — replace
+ cmark_utf8proc_encode_char(0xFFFD, dest);
+ i++;
+ continue;
+ }
+
+ // Check we have enough bytes
+ if (i + byte_length > size) {
+ // Truncated sequence — replace
+ cmark_utf8proc_encode_char(0xFFFD, dest);
+ i++;
+ continue;
+ }
+
+ // Decode and validate
+ switch (byte_length) {
+ case 1:
+ codepoint = line[i];
+ break;
+ case 2:
+ // Validate continuation byte
+ if ((line[i+1] & 0xC0) != 0x80) { /* invalid */ }
+ codepoint = ((line[i] & 0x1F) << 6) | (line[i+1] & 0x3F);
+ break;
+ case 3:
+ // Validate continuation bytes + overlong + surrogates
+ codepoint = ((line[i] & 0x0F) << 12) |
+ ((line[i+1] & 0x3F) << 6) |
+ (line[i+2] & 0x3F);
+ // Reject surrogates (U+D800-U+DFFF) and overlongs
+ break;
+ case 4:
+ // Validate continuation bytes + overlongs + max codepoint
+ codepoint = ((line[i] & 0x07) << 18) |
+ ((line[i+1] & 0x3F) << 12) |
+ ((line[i+2] & 0x3F) << 6) |
+ (line[i+3] & 0x3F);
+ break;
+ }
+
+ if (codepoint < 0) {
+ cmark_utf8proc_encode_char(0xFFFD, dest);
+ i++;
+ } else {
+ cmark_utf8proc_encode_char(codepoint, dest);
+ i += byte_length;
+ }
+ }
+}
+```
+
+This function validates UTF-8 and replaces invalid sequences with U+FFFD. It enforces:
+- No invalid lead bytes
+- No truncated sequences
+- No invalid continuation bytes
+- No overlong encodings
+- No surrogate codepoints (U+D800-U+DFFF)
+
+### Validation Rules (RFC 3629)
+
+For 3-byte sequences:
+```c
+// Reject overlongs: first byte 0xE0 requires second byte >= 0xA0
+if (line[i] == 0xE0 && line[i+1] < 0xA0) { /* overlong */ }
+// Reject surrogates: first byte 0xED requires second byte < 0xA0
+if (line[i] == 0xED && line[i+1] >= 0xA0) { /* surrogate */ }
+```
+
+For 4-byte sequences:
+```c
+// Reject overlongs: first byte 0xF0 requires second byte >= 0x90
+if (line[i] == 0xF0 && line[i+1] < 0x90) { /* overlong */ }
+// Reject codepoints > U+10FFFF: first byte 0xF4 requires second byte < 0x90
+if (line[i] == 0xF4 && line[i+1] >= 0x90) { /* out of range */ }
+```
+
+## UTF-8 Iterator
+
+```c
+void cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
+ int32_t *dst) {
+ *dst = -1;
+ if (str_len <= 0) return;
+
+ uint8_t length = utf8proc_utf8class[str[0]];
+ if (!length) return;
+ if (str_len >= length) {
+ switch (length) {
+ case 1:
+ *dst = str[0];
+ break;
+ case 2:
+ *dst = ((int32_t)(str[0] & 0x1F) << 6) | (str[1] & 0x3F);
+ break;
+ case 3:
+ *dst = ((int32_t)(str[0] & 0x0F) << 12) |
+ ((int32_t)(str[1] & 0x3F) << 6) |
+ (str[2] & 0x3F);
+ // Reject surrogates:
+ if (*dst >= 0xD800 && *dst < 0xE000) *dst = -1;
+ break;
+ case 4:
+ *dst = ((int32_t)(str[0] & 0x07) << 18) |
+ ((int32_t)(str[1] & 0x3F) << 12) |
+ ((int32_t)(str[2] & 0x3F) << 6) |
+ (str[3] & 0x3F);
+ if (*dst > 0x10FFFF) *dst = -1;
+ break;
+ }
+ }
+}
+```
+
+Decodes a single UTF-8 codepoint from a byte string. Sets `*dst` to -1 on error.
+
+## Case Folding
+
+```c
+void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
+ bufsize_t len) {
+ int32_t c;
+ bufsize_t i = 0;
+
+ while (i < len) {
+ bufsize_t char_len = cmark_utf8proc_charlen(str + i, len - i);
+ if (char_len < 0) {
+ cmark_utf8proc_encode_char(0xFFFD, dest);
+ i += 1;
+ continue;
+ }
+ cmark_utf8proc_iterate(str + i, char_len, &c);
+ if (c >= 0) {
+ // Look up case fold mapping
+ const int32_t *fold = cmark_utf8proc_case_fold_info(c);
+ if (fold) {
+ // Some characters fold to multiple codepoints
+ while (*fold >= 0) {
+ cmark_utf8proc_encode_char(*fold, dest);
+ fold++;
+ }
+ } else {
+ cmark_utf8proc_encode_char(c, dest);
+ }
+ }
+ i += char_len;
+ }
+}
+```
+
+Performs Unicode case folding (not lowercasing — case folding is more aggressive and designed for case-insensitive comparison). Used for normalizing link reference labels.
+
+### Case Fold Lookup
+
+```c
+static const int32_t *cmark_utf8proc_case_fold_info(int32_t c);
+```
+
+Uses a sorted table `cf_table` and binary search to find case fold mappings. Each entry maps a codepoint to one or more replacement codepoints (some characters fold to multiple characters, e.g., `ß` → `ss`).
+
+The table uses sentinel value `-1` to terminate multi-codepoint sequences.
+
+## Character Classification
+
+### `cmark_utf8proc_is_space()`
+
+```c
+int cmark_utf8proc_is_space(int32_t c) {
+ // ASCII spaces
+ if (c < 0x80) {
+ return (c == 9 || c == 10 || c == 12 || c == 13 || c == 32);
+ }
+ // Unicode Zs category
+ return (c == 0xa0 || c == 0x1680 ||
+ (c >= 0x2000 && c <= 0x200a) ||
+ c == 0x202f || c == 0x205f || c == 0x3000);
+}
+```
+
+Matches ASCII whitespace (HT, LF, FF, CR, SP) and Unicode Zs (space separator) characters including:
+- U+00A0 (NBSP)
+- U+1680 (Ogham space)
+- U+2000-U+200A (various typographic spaces)
+- U+202F (narrow NBSP)
+- U+205F (medium mathematical space)
+- U+3000 (ideographic space)
+
+### `cmark_utf8proc_is_punctuation()`
+
+```c
+int cmark_utf8proc_is_punctuation(int32_t c) {
+ // ASCII punctuation ranges
+ if (c < 128) {
+ return (c >= 33 && c <= 47) ||
+ (c >= 58 && c <= 64) ||
+ (c >= 91 && c <= 96) ||
+ (c >= 123 && c <= 126);
+ }
+ // Unicode Pc, Pd, Pe, Pf, Pi, Po, Ps categories
+ // Uses a table-driven approach for Unicode punctuation
+}
+```
+
+Returns true for ASCII punctuation (`!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `-`, `.`, `/`, `:`, `;`, `<`, `=`, `>`, `?`, `@`, `[`, `\\`, `]`, `^`, `_`, `` ` ``, `{`, `|`, `}`, `~`) and Unicode punctuation (categories Pc through Ps).
+
+These classification functions are critical for inline parsing, specifically for delimiter run classification — determining whether a `*` or `_` run is left-flanking or right-flanking depends on whether adjacent characters are spaces or punctuation.
+
+## Helper Functions
+
+### `cmark_utf8proc_charlen()`
+
+```c
+static CMARK_INLINE bufsize_t cmark_utf8proc_charlen(const uint8_t *str,
+ bufsize_t str_len) {
+ bufsize_t length = utf8proc_utf8class[str[0]];
+ if (!length || str_len < length) return -length;
+ return length;
+}
+```
+
+Returns the byte length of the UTF-8 character at the given position. Returns negative on error (invalid byte or truncated).
+
+## Usage in cmark
+
+1. **Input validation**: `cmark_utf8proc_check()` is called on input to replace invalid UTF-8 with U+FFFD
+2. **Reference normalization**: `cmark_utf8proc_case_fold()` is used by `normalize_reference()` in `references.c` for case-insensitive reference label matching
+3. **Delimiter classification**: `cmark_utf8proc_is_space()` and `cmark_utf8proc_is_punctuation()` are used in `inlines.c` for the left-flanking/right-flanking delimiter run rules
+4. **Entity decoding**: `cmark_utf8proc_encode_char()` is used when decoding HTML entities and numeric character references to produce their UTF-8 representation
+5. **Renderer output**: `cmark_render_code_point()` in `render.c` calls `cmark_utf8proc_encode_char()` for multi-byte character output
+
+## Cross-References
+
+- [utf8.c](../../cmark/src/utf8.c) — Implementation
+- [utf8.h](../../cmark/src/utf8.h) — Public interface
+- [inline-parsing.md](inline-parsing.md) — Uses character classification for delimiter rules
+- [reference-system.md](reference-system.md) — Uses case folding for label normalization
diff --git a/docs/handbook/cmark/xml-renderer.md b/docs/handbook/cmark/xml-renderer.md
new file mode 100644
index 0000000000..83218c7ef2
--- /dev/null
+++ b/docs/handbook/cmark/xml-renderer.md
@@ -0,0 +1,291 @@
+# cmark — XML Renderer
+
+## Overview
+
+The XML renderer (`xml.c`) produces an XML representation of the AST. Like the HTML renderer, it writes directly to a `cmark_strbuf` buffer rather than using the generic render framework. The output conforms to the CommonMark DTD.
+
+## Entry Point
+
+```c
+char *cmark_render_xml(cmark_node *root, int options);
+```
+
+Returns a complete XML document string. The caller must free the result.
+
+### Implementation
+
+```c
+char *cmark_render_xml(cmark_node *root, int options) {
+ char *result;
+ cmark_strbuf xml = CMARK_BUF_INIT(root->mem);
+ cmark_event_type ev_type;
+ cmark_node *cur;
+ struct render_state state = {&xml, 0};
+ cmark_iter *iter = cmark_iter_new(root);
+
+ cmark_strbuf_puts(&xml,
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
+
+ // optionally: <?xml-model href="CommonMark.rnc" ...?>
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ S_render_node(cur, ev_type, &state, options);
+ }
+ result = (char *)cmark_strbuf_detach(&xml);
+ cmark_iter_free(iter);
+ return result;
+}
+```
+
+## Render State
+
+```c
+struct render_state {
+ cmark_strbuf *xml; // Output buffer
+ int indent; // Current indentation level (number of spaces)
+};
+```
+
+The `indent` state tracks nesting depth, incremented by 2 for each container node entered.
+
+## XML Escaping
+
+```c
+static CMARK_INLINE void escape_xml(cmark_strbuf *dest, const unsigned char *source,
+ bufsize_t length) {
+ houdini_escape_html0(dest, source, length, 0);
+}
+```
+
+Escapes `<`, `>`, `&`, and `"` to their XML entity equivalents.
+
+## Indentation
+
+```c
+static void indent(struct render_state *state) {
+ int i;
+ for (i = 0; i < state->indent; i++) {
+ cmark_strbuf_putc(state->xml, ' ');
+ }
+}
+```
+
+Each level of nesting adds 2 spaces of indentation.
+
+## Source Position Attributes
+
+```c
+static void S_render_sourcepos(cmark_node *node, cmark_strbuf *xml, int options) {
+ char buffer[BUFFER_SIZE];
+ if (CMARK_OPT_SOURCEPOS & options) {
+ snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
+ cmark_node_get_start_line(node), cmark_node_get_start_column(node),
+ cmark_node_get_end_line(node), cmark_node_get_end_column(node));
+ cmark_strbuf_puts(xml, buffer);
+ }
+}
+```
+
+When `CMARK_OPT_SOURCEPOS` is active, XML elements receive `sourcepos="line:col-line:col"` attributes.
+
+## Node Type Name Table
+
+```c
+static const char *S_type_string(cmark_node *node) {
+ if (node->extension && node->extension->xml_tag_name_func) {
+ return node->extension->xml_tag_name_func(node->extension, node);
+ }
+ switch (node->type) {
+ case CMARK_NODE_DOCUMENT: return "document";
+ case CMARK_NODE_BLOCK_QUOTE: return "block_quote";
+ case CMARK_NODE_LIST: return "list";
+ case CMARK_NODE_ITEM: return "item";
+ case CMARK_NODE_CODE_BLOCK: return "code_block";
+ case CMARK_NODE_HTML_BLOCK: return "html_block";
+ case CMARK_NODE_CUSTOM_BLOCK: return "custom_block";
+ case CMARK_NODE_PARAGRAPH: return "paragraph";
+ case CMARK_NODE_HEADING: return "heading";
+ case CMARK_NODE_THEMATIC_BREAK: return "thematic_break";
+ case CMARK_NODE_TEXT: return "text";
+ case CMARK_NODE_SOFTBREAK: return "softbreak";
+ case CMARK_NODE_LINEBREAK: return "linebreak";
+ case CMARK_NODE_CODE: return "code";
+ case CMARK_NODE_HTML_INLINE: return "html_inline";
+ case CMARK_NODE_CUSTOM_INLINE: return "custom_inline";
+ case CMARK_NODE_EMPH: return "emph";
+ case CMARK_NODE_STRONG: return "strong";
+ case CMARK_NODE_LINK: return "link";
+ case CMARK_NODE_IMAGE: return "image";
+ case CMARK_NODE_NONE: return "NONE";
+ }
+ return "<unknown>";
+}
+```
+
+Each node type has a fixed XML tag name. Extensions can override this via `xml_tag_name_func`.
+
+## Node Rendering Logic
+
+### Leaf Nodes vs Container Nodes
+
+The XML renderer distinguishes between leaf (literal) nodes and container nodes:
+
+**Leaf nodes** (single event — `CMARK_EVENT_ENTER` only):
+- `CMARK_NODE_CODE_BLOCK`, `CMARK_NODE_HTML_BLOCK`, `CMARK_NODE_THEMATIC_BREAK`
+- `CMARK_NODE_TEXT`, `CMARK_NODE_SOFTBREAK`, `CMARK_NODE_LINEBREAK`
+- `CMARK_NODE_CODE`, `CMARK_NODE_HTML_INLINE`
+
+**Container nodes** (paired enter/exit events):
+- `CMARK_NODE_DOCUMENT`, `CMARK_NODE_BLOCK_QUOTE`, `CMARK_NODE_LIST`, `CMARK_NODE_ITEM`
+- `CMARK_NODE_PARAGRAPH`, `CMARK_NODE_HEADING`
+- `CMARK_NODE_EMPH`, `CMARK_NODE_STRONG`, `CMARK_NODE_LINK`, `CMARK_NODE_IMAGE`
+- `CMARK_NODE_CUSTOM_BLOCK`, `CMARK_NODE_CUSTOM_INLINE`
+
+### Leaf Node Rendering
+
+Literal nodes that contain text are rendered as:
+```xml
+ <tag_name>ESCAPED TEXT</tag_name>
+```
+
+For example, a text node with content "Hello & goodbye" becomes:
+```xml
+ <text>Hello &amp; goodbye</text>
+```
+
+Nodes without text content (thematic_break, softbreak, linebreak) are rendered as self-closing:
+```xml
+ <thematic_break />
+```
+
+### Container Node Rendering (Enter)
+
+On enter, the renderer outputs:
+```xml
+ <tag_name[sourcepos][ type-specific attributes]>
+```
+
+And increments the indent level by 2.
+
+#### Type-Specific Attributes on Enter
+
+**List attributes:**
+```c
+cmark_strbuf_printf(xml, " type=\"%s\" tight=\"%s\"",
+ cmark_node_get_list_type(node) == CMARK_BULLET_LIST
+ ? "bullet" : "ordered",
+ cmark_node_get_list_tight(node) ? "true" : "false");
+// For ordered lists only:
+int start = cmark_node_get_list_start(node);
+if (start != 1) {
+ snprintf(buffer, BUFFER_SIZE, " start=\"%d\"", start);
+}
+cmark_strbuf_printf(xml, " delimiter=\"%s\"",
+ cmark_node_get_list_delim(node) == CMARK_PAREN_DELIM
+ ? "paren" : "period");
+```
+
+**Heading attributes:**
+```c
+snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
+```
+
+**Code block attributes:**
+```c
+if (node->as.code.info) {
+ cmark_strbuf_puts(xml, " info=\"");
+ escape_xml(xml, node->as.code.info, (bufsize_t)strlen((char *)node->as.code.info));
+ cmark_strbuf_putc(xml, '"');
+}
+```
+
+**Link/Image attributes:**
+```c
+cmark_strbuf_puts(xml, " destination=\"");
+escape_xml(xml, node->as.link.url, (bufsize_t)strlen((char *)node->as.link.url));
+cmark_strbuf_putc(xml, '"');
+cmark_strbuf_puts(xml, " title=\"");
+escape_xml(xml, node->as.link.title, (bufsize_t)strlen((char *)node->as.link.title));
+cmark_strbuf_putc(xml, '"');
+```
+
+**Custom block/inline attributes:**
+```c
+cmark_strbuf_puts(xml, " on_enter=\"");
+escape_xml(xml, node->as.custom.on_enter, ...);
+cmark_strbuf_puts(xml, "\" on_exit=\"");
+escape_xml(xml, node->as.custom.on_exit, ...);
+```
+
+### Container Node Rendering (Exit)
+
+On exit, the indent level is decremented by 2, and the closing tag is output:
+```xml
+ </tag_name>
+```
+
+### Extension Support
+
+Extensions can add additional XML attributes via:
+```c
+if (node->extension && node->extension->xml_attr_func) {
+ node->extension->xml_attr_func(node->extension, node, xml);
+}
+```
+
+## Example Output
+
+Given this Markdown:
+
+```markdown
+# Hello
+
+A paragraph with *emphasis* and a [link](http://example.com "title").
+```
+
+The XML output (with `CMARK_OPT_SOURCEPOS`):
+
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE document SYSTEM "CommonMark.dtd">
+<document sourcepos="1:1-3:65" xmlns="http://commonmark.org/xml/1.0">
+ <heading sourcepos="1:1-1:7" level="1">
+ <text>Hello</text>
+ </heading>
+ <paragraph sourcepos="3:1-3:65">
+ <text>A paragraph with </text>
+ <emph>
+ <text>emphasis</text>
+ </emph>
+ <text> and a </text>
+ <link destination="http://example.com" title="title">
+ <text>link</text>
+ </link>
+ <text>.</text>
+ </paragraph>
+</document>
+```
+
+## CommonMark DTD
+
+The output references `CommonMark.dtd`, the DTD that defines:
+- Document element as the root
+- All CommonMark block and inline element types
+- Required attributes for lists, headings, links, images, and code blocks
+- Entity definitions for the markup model
+
+## Differences from HTML Renderer
+
+1. **Full AST preservation**: XML represents the complete AST structure, including node types that HTML merges or loses (e.g., softbreak, custom blocks/inlines).
+2. **Indentation tracking**: XML output is pretty-printed with nesting-based indentation.
+3. **No tight list logic**: The `tight` attribute is stored as metadata, but does not affect paragraph rendering — paragraphs always appear as `<paragraph>` elements.
+4. **No URL safety**: URLs are output as-is (escaped for XML), no `_scan_dangerous_url()` check.
+5. **No plain text mode**: Image children are rendered structurally, not flattened to alt text.
+
+## Cross-References
+
+- [xml.c](../../cmark/src/xml.c) — Full implementation
+- [html-renderer.md](html-renderer.md) — HTML renderer comparison
+- [iterator-system.md](iterator-system.md) — Traversal mechanism used
+- [public-api.md](public-api.md) — `cmark_render_xml()` API docs
diff --git a/docs/handbook/corebinutils/architecture.md b/docs/handbook/corebinutils/architecture.md
new file mode 100644
index 0000000000..7f6342c9f0
--- /dev/null
+++ b/docs/handbook/corebinutils/architecture.md
@@ -0,0 +1,665 @@
+# Corebinutils — Architecture
+
+## Repository Layout
+
+The corebinutils tree follows a straightforward directory-per-utility layout
+with a top-level orchestrator build system:
+
+```
+corebinutils/
+├── configure # POSIX sh configure script
+├── README.md # Top-level build instructions
+├── .gitattributes
+├── .gitignore
+│
+├── config.mk # [generated] feature detection results
+├── GNUmakefile # [generated] top-level build orchestrator
+│
+├── build/ # [generated] intermediate object files
+│ ├── configure/ # Configure test artifacts and logs
+│ ├── cat/ # Per-utility build intermediates
+│ ├── chmod/
+│ ├── ...
+│ └── sh/
+│
+├── out/ # [generated] final binaries
+│ └── bin/ # Staged executables (after `make stage`)
+│
+├── contrib/ # Shared library sources
+│ ├── libc-vis/ # vis(3)/unvis(3) implementation
+│ ├── libedit/ # editline(3) library
+│ └── printf/ # Shared printf format helpers
+│
+├── cat/ # Utility: cat
+│ ├── cat.c # Main source
+│ ├── cat.1 # Manual page (groff)
+│ ├── GNUmakefile # Per-utility build rules
+│ └── README.md # Port notes and differences
+│
+├── chmod/ # Utility: chmod
+│ ├── chmod.c # Main implementation
+│ ├── mode.c # Mode parsing library (shared with mkdir)
+│ ├── mode.h # Mode parsing header
+│ ├── GNUmakefile
+│ └── chmod.1
+│
+├── dd/ # Utility: dd (multi-file)
+│ ├── dd.c # Main control flow
+│ ├── dd.h # Shared types (IO, STAT, flags)
+│ ├── extern.h # Function declarations
+│ ├── args.c # JCL argument parser
+│ ├── conv.c # Conversion functions (block/unblock/def)
+│ ├── conv_tab.c # ASCII/EBCDIC conversion tables
+│ ├── gen.c # Signal handling helpers
+│ ├── misc.c # Summary, progress, timing
+│ ├── position.c # Input/output seek positioning
+│ └── GNUmakefile
+│
+├── ed/ # Utility: ed (multi-file)
+│ ├── main.c # Command dispatch and main loop
+│ ├── ed.h # Types (line_t, undo_t, constants)
+│ ├── compat.c / compat.h # Portability shims
+│ ├── buf.c # Buffer management (scratch file)
+│ ├── glbl.c # Global command (g/re/cmd)
+│ ├── io.c # File I/O (read_file, write_file)
+│ ├── re.c # Regular expression handling
+│ ├── sub.c # Substitution command
+│ └── undo.c # Undo stack management
+│
+├── ls/ # Utility: ls (multi-file)
+│ ├── ls.c # Main logic, option parsing, directory traversal
+│ ├── ls.h # Types (entry, context, enums)
+│ ├── extern.h # Cross-module declarations
+│ ├── print.c # Output formatting (columns, long, stream)
+│ ├── cmp.c # Sort comparison functions
+│ └── util.c # Helper functions
+│
+├── ps/ # Utility: ps (multi-file)
+│ ├── ps.c # Main logic, /proc scanning
+│ ├── ps.h # Types (kinfo_proc, KINFO, VAR)
+│ ├── extern.h # Cross-module declarations
+│ ├── fmt.c # Format string parsing
+│ ├── keyword.c # Output keyword definitions
+│ ├── print.c # Field value formatting
+│ └── nlist.c # Name list handling
+│
+└── sh/ # Utility: POSIX shell
+ ├── main.c # Shell entry point
+ ├── parser.c / parser.h # Command parser
+ ├── eval.c # Command evaluator
+ ├── exec.c # Command execution
+ ├── jobs.c # Job control
+ ├── var.c # Variable management
+ ├── trap.c # Signal/trap handling
+ ├── expand.c # Parameter expansion
+ ├── redir.c # I/O redirection
+ └── ... # (60+ additional files)
+```
+
+## Build System Architecture
+
+### Two-Level Build Organization
+
+The build system has two distinct levels:
+
+1. **Top-level orchestrator** — Generated `GNUmakefile` and `config.mk` that
+ coordinate all subdirectories.
+2. **Per-utility `GNUmakefile`** — Each utility directory has its own build
+ rules. These are the source of truth and are never overwritten by
+ `configure`.
+
+The top-level `GNUmakefile` invokes subdirectory builds via recursive make:
+
+```makefile
+build-%: prepare-%
+ +env CPPFLAGS="$(CPPFLAGS)" CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" \
+ $(MAKE) -C "$*" -f GNUmakefile $(SUBMAKE_OVERRIDES) all
+```
+
+### Shared Output Directories
+
+All utilities share centralized output directories to simplify packaging:
+
+```
+build/ # Object files, organized per-utility: build/cat/, build/chmod/, ...
+out/ # Final linked binaries
+out/bin/ # Staged binaries (after `make stage`)
+```
+
+Subdirectories get symbolic links (`build -> ../build/<util>`,
+`out -> ../out`) created by the `prepare-%` target:
+
+```makefile
+prepare-%:
+ @mkdir -p "$(MONO_BUILDDIR)/$*" "$(MONO_OUTDIR)"
+ @ln -sfn "../build/$*" "$*/build"
+ @ln -sfn "../out" "$*/out"
+```
+
+### Variable Propagation
+
+The top-level Makefile passes all detected toolchain variables to
+subdirectory builds via `SUBMAKE_OVERRIDES`:
+
+```makefile
+SUBMAKE_OVERRIDES = \
+ CC="$(CC)" \
+ AR="$(AR)" \
+ AWK="$(AWK)" \
+ RANLIB="$(RANLIB)" \
+ NM="$(NM)" \
+ SH="$(SH)" \
+ CRYPTO_LIBS="$(CRYPTO_LIBS)" \
+ EDITLINE_CPPFLAGS="$(EDITLINE_CPPFLAGS)" \
+ EDITLINE_LIBS="$(EDITLINE_LIBS)" \
+ PREFIX="$(PREFIX)" \
+ BINDIR="$(BINDIR)" \
+ DESTDIR="$(DESTDIR)" \
+ CROSS_COMPILING="$(CROSS_COMPILING)" \
+ EXEEXT="$(EXEEXT)"
+```
+
+This ensures every utility builds with the same compiler, flags, and
+library configuration.
+
+### Generated vs. Maintained Files
+
+| File | Generated? | Purpose |
+|------------------|------------|--------------------------------------|
+| `configure` | No | POSIX sh configure script |
+| `config.mk` | Yes | Feature detection macros |
+| `GNUmakefile` | Yes | Top-level orchestrator |
+| `*/GNUmakefile` | No | Per-utility build rules |
+| `build/` | Yes | Object file directory tree |
+| `out/` | Yes | Binary output directory |
+
+## Configure Script Architecture
+
+### Script Structure
+
+The `configure` script is a single POSIX shell file (no autoconf) organized
+into these phases:
+
+```
+1. Initialization — Set defaults, parse CLI arguments
+2. Compiler Detection — Find musl-first C compiler
+3. Tool Detection — Find make, ar, ranlib, nm, awk, sh, pkg-config
+4. Libc Identification — Determine musl vs glibc via binary inspection
+5. Header Probing — Check for ~40 system headers
+6. Function Probing — Check for ~20 C library functions
+7. Library Probing — Check for optional libraries (crypt, dl, pthread, rt)
+8. File Generation — Write config.mk and GNUmakefile
+```
+
+### Compiler Probing
+
+The compiler detection uses three progressive tests:
+
+```sh
+# Can it compile a simple program?
+can_compile_with() { ... }
+
+# Can it compile AND run? (native builds only)
+can_run_with() { ... }
+
+# Does it support C11 stdatomic.h?
+can_compile_stdatomic_with() { ... }
+```
+
+All three must pass. For cross-compilation (`--host != --build`), the
+run test is skipped.
+
+### Feature Detection Pattern
+
+Headers and functions are probed with a consistent pattern that records
+results as Make variables and C preprocessor defines:
+
+```sh
+check_header() {
+ hdr=$1
+ macro="HAVE_$(to_macro "$hdr")" # e.g., HAVE_SYS_ACL_H
+ if try_cc "#include <$hdr>
+ int main(void) { return 0; }"; then
+ record_cpp_define "$macro" 1
+ else
+ record_cpp_define "$macro" 0
+ fi
+}
+
+check_func() {
+ func=$1
+ includes=$2
+ macro="HAVE_$(to_macro "$func")" # e.g., HAVE_COPY_FILE_RANGE
+ if try_cc "$includes
+ int main(void) { void *p = (void *)(uintptr_t)&$func; return p == 0; }"; then
+ record_cpp_define "$macro" 1
+ else
+ record_cpp_define "$macro" 0
+ fi
+}
+```
+
+### Headers Probed
+
+The configure script checks for the following headers:
+
+```
+stdlib.h stdio.h stdint.h inttypes.h stdbool.h stddef.h
+string.h strings.h unistd.h errno.h fcntl.h signal.h
+sys/types.h sys/stat.h sys/time.h sys/resource.h sys/wait.h
+sys/select.h sys/ioctl.h sys/param.h sys/socket.h netdb.h
+poll.h sys/poll.h termios.h stropts.h pthread.h
+sys/event.h sys/timerfd.h sys/acl.h attr/xattr.h linux/xattr.h
+dlfcn.h langinfo.h locale.h wchar.h wctype.h
+```
+
+### Functions Probed
+
+```
+getcwd realpath fchdir fstatat openat copy_file_range
+memmove strlcpy strlcat explicit_bzero getline getentropy
+posix_spawn clock_gettime poll kqueue timerfd_create
+pipe2 closefrom getrandom
+```
+
+### Libraries Probed
+
+| Library | Symbol | Usage |
+|----------|---------------------|------------------------------------|
+| crypt | `crypt()` | Password hashing (`ed -x` legacy) |
+| dl | `dlopen()` | Dynamic loading |
+| pthread | `pthread_create()` | Threading support |
+| rt | `clock_gettime()` | High-resolution timing |
+| util | `openpty()` | Pseudo-terminal support |
+| attr | `setxattr()` | Extended attributes (`mv`, `cp`) |
+| selinux | `is_selinux_enabled()` | SELinux label support |
+
+## Code Organization Patterns
+
+### Single-File Utility Pattern
+
+Most simple utilities follow this structure:
+
+```c
+/* SPDX license header */
+
+#include <system-headers.h>
+
+struct options { ... };
+
+static const char *progname;
+
+static void usage(void) __attribute__((__noreturn__));
+static void error_errno(const char *, ...);
+static void error_msg(const char *, ...);
+
+int main(int argc, char *argv[])
+{
+ struct options opt;
+ int ch;
+
+ progname = program_name(argv[0]);
+
+ while ((ch = getopt(argc, argv, "...")) != -1) {
+ switch (ch) {
+ case 'f': opt.force = true; break;
+ /* ... */
+ default: usage();
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ /* Perform main operation */
+ for (int i = 0; i < argc; i++) {
+ if (process(argv[i], &opt) != 0)
+ exitval = 1;
+ }
+ return exitval;
+}
+```
+
+### Multi-File Utility Pattern
+
+Complex utilities split across files with a shared header:
+
+```
+utility/
+├── utility.c # main(), option parsing, top-level dispatch
+├── utility.h # Shared types, constants, macros
+├── extern.h # Function declarations for cross-module calls
+├── sub1.c # Functional subsystem (e.g., args.c, conv.c)
+├── sub2.c # Another subsystem (e.g., print.c, fmt.c)
+└── GNUmakefile # Build rules listing all .c files
+```
+
+### Header Guard Convention
+
+Headers use the BSD `_FILENAME_H_` pattern:
+
+```c
+#ifndef _PS_H_
+#define _PS_H_
+/* ... */
+#endif
+```
+
+### Portability Macros
+
+Common compatibility macros appear across multiple utilities:
+
+```c
+#ifndef __unused
+#define __unused __attribute__((__unused__))
+#endif
+
+#ifndef __dead2
+#define __dead2 __attribute__((__noreturn__))
+#endif
+
+#ifndef nitems
+#define nitems(array) (sizeof(array) / sizeof((array)[0]))
+#endif
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+```
+
+### POSIX Feature Test Macros
+
+Many utilities define feature test macros at the top of their main source
+file:
+
+```c
+#define _POSIX_C_SOURCE 200809L
+```
+
+Or rely on the configure-injected flags:
+
+```
+-D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700
+```
+
+## Shared Code Reuse
+
+### `mode.c` / `mode.h`
+
+The mode parsing library is shared between `chmod` and `mkdir`. It provides:
+
+- `mode_compile()` — Parse a mode string (numeric or symbolic) into a
+ compiled command array (`bitcmd_t`)
+- `mode_apply()` — Apply a compiled mode to an existing `mode_t`
+- `mode_free()` — Release compiled mode memory
+- `strmode()` — Convert `mode_t` to display string like `"drwxr-xr-x "`
+
+### `fts.c` / `fts.h`
+
+An in-tree FTS (File Tree Walk) implementation used by `cp`, `chflags`, and
+other utilities that do recursive directory traversal. This avoids depending
+on glibc's FTS implementation or `nftw(3)`.
+
+### `contrib/libc-vis/`
+
+BSD `vis(3)` / `unvis(3)` character encoding used by `ls` for safe
+display of filenames containing control characters or non-printable bytes.
+
+### Signal Name Tables
+
+`kill` and `timeout` both maintain identical `struct signal_entry` tables
+mapping signal names to numbers:
+
+```c
+struct signal_entry {
+ const char *name;
+ int number;
+};
+
+#define SIGNAL_ENTRY(name) { #name, SIG##name }
+
+static const struct signal_entry canonical_signals[] = {
+ SIGNAL_ENTRY(HUP),
+ SIGNAL_ENTRY(INT),
+ SIGNAL_ENTRY(QUIT),
+ /* ... ~30 standard signals ... */
+};
+```
+
+Both also share the same `normalize_signal_name()` function pattern that
+strips "SIG" prefixes and uppercases input.
+
+## Data Structures
+
+### Process Information (`ps`)
+
+The `ps` utility defines a Linux-compatible replacement for FreeBSD's
+`kinfo_proc`:
+
+```c
+struct kinfo_proc {
+ pid_t ki_pid, ki_ppid, ki_pgid, ki_sid;
+ dev_t ki_tdev;
+ uid_t ki_uid, ki_ruid, ki_svuid;
+ gid_t ki_groups[KI_NGROUPS];
+ char ki_comm[COMMLEN]; // 256 bytes
+ struct timeval ki_start;
+ uint64_t ki_runtime; // microseconds
+ uint64_t ki_size; // VSZ in bytes
+ uint64_t ki_rssize; // RSS in pages
+ int ki_nice;
+ char ki_stat; // BSD-like state (S,R,T,Z,D)
+ int ki_numthreads;
+ struct rusage ki_rusage;
+ /* ... */
+};
+```
+
+This struct is populated by reading `/proc/[pid]/stat` and
+`/proc/[pid]/status` files.
+
+### I/O State (`dd`)
+
+The `dd` utility uses two key structures for its I/O engine:
+
+```c
+typedef struct {
+ u_char *db; // Buffer address
+ u_char *dbp; // Current buffer I/O position
+ ssize_t dbcnt; // Current byte count in buffer
+ ssize_t dbrcnt; // Last read byte count
+ ssize_t dbsz; // Block size
+ u_int flags; // ISCHR | ISPIPE | ISTAPE | ISSEEK | NOREAD | ISTRUNC
+ const char *name; // Filename
+ int fd; // File descriptor
+ off_t offset; // Block count to skip
+ off_t seek_offset;// Sparse output seek offset
+} IO;
+
+typedef struct {
+ uintmax_t in_full, in_part; // Full/partial input blocks
+ uintmax_t out_full, out_part; // Full/partial output blocks
+ uintmax_t trunc; // Truncated records
+ uintmax_t swab; // Odd-length swab blocks
+ uintmax_t bytes; // Total bytes written
+ struct timespec start; // Start timestamp
+} STAT;
+```
+
+### Line Buffer (`ed`)
+
+The `ed` editor uses a doubly-linked list of line nodes with a scratch
+file backing store:
+
+```c
+typedef struct line {
+ struct line *q_forw; // Next line
+ struct line *q_back; // Previous line
+ off_t seek; // Offset in scratch file
+ int len; // Line length
+} line_t;
+```
+
+### File Entry (`ls`)
+
+The `ls` utility represents each directory entry with:
+
+```c
+struct entry {
+ struct stat sb;
+ struct file_time btime; // Birth time (via statx)
+ char *name; // Display name
+ char *link_target; // Symlink target (if applicable)
+ /* color, type classification, etc. */
+};
+```
+
+## Makefile Targets Reference
+
+### Top-Level Targets
+
+| Target | Description |
+|--------------------|-------------------------------------------------------|
+| `all` | Build all utilities |
+| `clean` | Remove `build/` and `out/` directories |
+| `distclean` | `clean` + remove generated `GNUmakefile`, `config.mk` |
+| `rebuild` | `clean` then `all` |
+| `reconfigure` | Re-run `./configure` |
+| `check` / `test` | Run all utility test suites |
+| `stage` | Copy binaries to `out/bin/` |
+| `install` | Copy binaries to `$DESTDIR$BINDIR` |
+| `status` | Show `out/` directory contents |
+| `list` | Print all subdirectory names |
+| `print-config` | Show active compiler and flags |
+| `help` | List available targets |
+
+### Per-Utility Targets
+
+Individual utilities can be built, cleaned, or tested:
+
+```sh
+make -f GNUmakefile build-cat # Build only cat
+make -f GNUmakefile clean-cat # Clean only cat
+make -f GNUmakefile check-cat # Test only cat
+make -f GNUmakefile cat # Alias for build-cat
+```
+
+### Target Dependencies
+
+```
+all
+ └── build-<util> (for each utility)
+ └── prepare-<util>
+ ├── mkdir -p build/<util> out/
+ ├── ln -sfn ../build/<util> <util>/build
+ └── ln -sfn ../out <util>/out
+
+stage
+ └── all
+ └── copy executables to out/bin/
+
+install
+ └── stage
+ └── copy out/bin/* to $DESTDIR$BINDIR/
+
+distclean
+ └── clean
+ └── remove build/ out/
+ └── unprepare
+ └── remove build/out symlinks from subdirs
+ └── remove GNUmakefile config.mk
+```
+
+## Cross-Compilation Support
+
+The configure script supports cross-compilation via `--host` and `--build`
+triples:
+
+```sh
+./configure --host=aarch64-linux-musl --build=x86_64-linux-musl \
+ --cc=aarch64-linux-musl-gcc
+```
+
+When `--host` differs from `--build`:
+- The executable run test (`can_run_with`) is skipped
+- `CROSS_COMPILING=1` is recorded in `config.mk`
+- The value propagates to all subdirectory builds
+
+## Typical Per-Utility GNUmakefile
+
+Each utility has a `GNUmakefile` following this general pattern:
+
+```makefile
+# cat/GNUmakefile
+
+PROG = cat
+SRCS = cat.c
+
+BUILDDIR ?= build
+OUTDIR ?= out
+
+CC ?= cc
+CPPFLAGS += -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700
+CFLAGS ?= -O2 -g -pipe
+LDFLAGS ?=
+
+OBJS = $(SRCS:.c=.o)
+OBJS := $(addprefix $(BUILDDIR)/,$(OBJS))
+
+all: $(OUTDIR)/$(PROG)
+
+$(OUTDIR)/$(PROG): $(OBJS)
+ $(CC) $(LDFLAGS) -o $@ $(OBJS) $(LDLIBS)
+
+$(BUILDDIR)/%.o: %.c
+ @mkdir -p $(dir $@)
+ $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
+
+clean:
+ rm -f $(OBJS) $(OUTDIR)/$(PROG)
+
+test:
+ @echo "SKIP: no tests for $(PROG)"
+
+.PHONY: all clean test
+```
+
+Multi-file utilities list all sources in `SRCS` and may link additional
+libraries:
+
+```makefile
+# dd/GNUmakefile
+SRCS = dd.c args.c conv.c conv_tab.c gen.c misc.c position.c
+LDLIBS += -lm # For dd's speed calculations
+```
+
+## Security Considerations
+
+### Input Validation Boundaries
+
+- **File paths**: Validated against `PATH_MAX` limits. Utilities like `rm`
+ explicitly reject `/`, `.`, and `..` as arguments.
+- **Numeric arguments**: Parsed with `strtoimax()` or `strtol()` with
+ explicit overflow checking.
+- **Signal numbers**: Validated against the compiled signal table, not
+ unchecked `atoi()`.
+- **Mode strings**: `mode_compile()` validates syntax before any filesystem
+ modification occurs.
+
+### Privilege Handling
+
+- `hostname` and `domainname` require root for set operations; they validate
+ the hostname length against the kernel's UTS namespace limit first.
+- `rm` refuses to delete `/` unless explicitly overridden.
+- `chmod -R` includes cycle detection to prevent infinite loops from symlink
+ chains.
+
+### Temporary File Safety
+
+- `ed` creates temporary scratch files in `$TMPDIR` (or `/tmp`) using
+ `mkstemp(3)`.
+- `dd` does not create temporary files — it operates on explicit input/output
+ file descriptors.
diff --git a/docs/handbook/corebinutils/building.md b/docs/handbook/corebinutils/building.md
new file mode 100644
index 0000000000..48ad098712
--- /dev/null
+++ b/docs/handbook/corebinutils/building.md
@@ -0,0 +1,429 @@
+# Corebinutils — Building
+
+## Prerequisites
+
+### Required Tools
+
+| Tool | Minimum Version | Purpose |
+|------------|----------------|-----------------------------------------|
+| C compiler | C11 support | Must support `<stdatomic.h>` |
+| `make` | GNU Make 4.x | Build orchestration |
+| `ar` | Any | Archive tool for static libraries |
+| `ranlib` | Any | Library index generation |
+| `awk` | POSIX | Build-time text processing |
+| `sh` | POSIX | Shell for scripts and tests |
+
+### Preferred Compiler: musl-based
+
+The configure script searches for musl-based compilers in this priority
+order:
+
+1. `musl-clang` — musl's Clang wrapper
+2. `clang --target=<arch>-linux-musl` — Clang targeting musl
+3. `clang --target=<arch>-unknown-linux-musl` — Clang with full triple
+4. `musl-gcc` — musl's GCC wrapper
+5. `clang` — Generic Clang (libc detected from output binary)
+6. `cc` — System default
+7. `gcc` — GNU CC
+
+If a glibc toolchain is detected, configure fails with:
+
+```
+configure: error: glibc toolchain detected; refusing by default
+ (use --allow-glibc to override)
+```
+
+### Libc Detection
+
+The configure script identifies the libc implementation through three
+methods (tried in order):
+
+1. **Binary inspection**: Compiles a test program, runs `file(1)` on it,
+ looks for `ld-musl` or `ld-linux` in the interpreter path
+2. **Preprocessor macros**: Checks for `__GLIBC__` or `__MUSL__` in the
+ compiler's predefined macros
+3. **Target triple**: Inspects the compiler's `-dumpmachine` output for
+ `musl` or `gnu`/`glibc` substrings
+
+### Optional Dependencies
+
+| Library | Symbol | Required By | Fallback |
+|------------|------------------|---------------------|----------------------|
+| `libcrypt` | `crypt()` | `ed` (legacy `-x`) | Feature disabled |
+| `libdl` | `dlopen()` | `sh` (loadable) | Feature disabled |
+| `libpthread`| `pthread_create()` | Various | Single-threaded |
+| `librt` | `clock_gettime()`| `dd`, `timeout` | Linked if needed |
+| `libutil` | `openpty()` | `sh`, `csh` | Pty feature disabled |
+| `libattr` | `setxattr()` | `mv`, `cp` | xattr not preserved |
+| `libselinux`| `is_selinux_enabled()` | SELinux labels | Labels not set |
+| `libedit` | editline(3) | `sh`, `csh` | No line editing |
+
+## Quick Build
+
+```sh
+cd corebinutils/
+
+# Step 1: Configure
+./configure
+
+# Step 2: Build all utilities
+make -f GNUmakefile -j$(nproc) all
+
+# Step 3: (Optional) Run tests
+make -f GNUmakefile test
+
+# Step 4: (Optional) Stage binaries
+make -f GNUmakefile stage
+```
+
+After a successful build, binaries appear in `out/` and staged copies in
+`out/bin/`.
+
+## Configure Script Reference
+
+### Usage
+
+```
+./configure [options]
+```
+
+### General Options
+
+| Option | Default | Description |
+|---------------------------|------------------|--------------------------------|
+| `--help` | | Show help and exit |
+| `--prefix=PATH` | `/usr/local` | Install prefix |
+| `--bindir=PATH` | `<prefix>/bin` | Install binary directory |
+| `--host=TRIPLE` | Auto-detected | Target host triple |
+| `--build=TRIPLE` | Auto-detected | Build system triple |
+
+### Toolchain Options
+
+| Option | Default | Description |
+|---------------------------|------------------|--------------------------------|
+| `--cc=COMMAND` | Auto-detected | Force specific compiler |
+| `--allow-glibc` | Off | Allow glibc toolchain |
+
+### Flag Options
+
+| Option | Default | Description |
+|---------------------------|------------------|--------------------------------|
+| `--extra-cppflags=FLAGS` | Empty | Extra preprocessor flags |
+| `--extra-cflags=FLAGS` | Empty | Extra compilation flags |
+| `--extra-ldflags=FLAGS` | Empty | Extra linker flags |
+
+### Local Path Options
+
+| Option | Default | Description |
+|---------------------------|------------------|--------------------------------|
+| `--with-local-dir=PATH` | `/usr/local` | Add `PATH/include` and `PATH/lib` |
+| `--without-local-dir` | | Disable local path probing |
+
+### Policy Options
+
+| Option | Default | Description |
+|------------------------------|---------|-----------------------------------|
+| `--enable-fail-if-missing` | Off | Fail on missing optional probes |
+
+### Environment Variables
+
+The configure script respects standard environment variables:
+
+| Variable | Purpose |
+|-------------|------------------------------------------------|
+| `CC` | C compiler (overridden by probing if unusable) |
+| `CPPFLAGS` | Preprocessor flags from environment |
+| `CFLAGS` | Compilation flags from environment |
+| `LDFLAGS` | Linker flags from environment |
+
+### Default Flags
+
+The configure script applies these base flags:
+
+```
+CPPFLAGS: -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=700 $env $extra
+CFLAGS: -O2 -g -pipe $env $extra
+LDFLAGS: $env $extra
+```
+
+## Configure Output
+
+### `config.mk`
+
+A Make include file with feature detection results:
+
+```makefile
+# Auto-generated by ./configure on 2026-04-05T12:00:00Z
+CONF_CPPFLAGS :=
+CONF_LDFLAGS :=
+CONF_LIBS :=
+CONFIGURE_TIMESTAMP := 2026-04-05T12:00:00Z
+CONFIGURE_HOST := x86_64-unknown-Linux
+CONFIGURE_BUILD := x86_64-unknown-Linux
+CONFIGURE_CC_MACHINE := x86_64-linux-musl
+CONFIGURE_LIBC := musl
+CROSS_COMPILING := 0
+EXEEXT :=
+HAVE_STDLIB_H := 1
+HAVE_COPY_FILE_RANGE := 1
+HAVE_STRLCPY := 1
+# ... (one entry per probed header/function)
+CONF_CPPFLAGS += -DHAVE_STDLIB_H=1 -DHAVE_COPY_FILE_RANGE=1 ...
+CONF_LIBS += -lcrypt -ldl -lpthread -lrt
+```
+
+### `GNUmakefile`
+
+The generated top-level Makefile contains:
+
+- Toolchain variables (`CC`, `AR`, `RANLIB`, etc.)
+- Flag variables (`CPPFLAGS`, `CFLAGS`, `LDFLAGS`)
+- Library variables (`CRYPTO_LIBS`, `EDITLINE_LIBS`)
+- Path variables (`PREFIX`, `BINDIR`, `MONO_BUILDDIR`, `MONO_OUTDIR`)
+- Subdirectory list (`SUBDIRS := cat chflags chmod cp ...`)
+- Build, clean, test, install, and utility targets
+
+### `build/configure/config.log`
+
+Detailed log of every compiler test and probe result. Useful for debugging
+configure failures:
+
+```
+$ musl-clang -x c build/configure/conftest.c -o build/configure/conftest
+$ build/configure/conftest
+checking for sys/acl.h... no
+checking for copy_file_range... yes
+```
+
+## Makefile Targets
+
+### Build Targets
+
+```sh
+make -f GNUmakefile all # Build all utilities
+make -f GNUmakefile cat # Build only cat (alias for build-cat)
+make -f GNUmakefile build-cat # Build only cat
+make -f GNUmakefile build-ls # Build only ls
+```
+
+### Clean Targets
+
+```sh
+make -f GNUmakefile clean # Remove build/ and out/
+make -f GNUmakefile clean-cat # Clean only cat's objects
+make -f GNUmakefile distclean # clean + remove GNUmakefile, config.mk
+make -f GNUmakefile maintainer-clean # Same as distclean
+```
+
+### Test Targets
+
+```sh
+make -f GNUmakefile test # Run all test suites
+make -f GNUmakefile check # Same as test
+make -f GNUmakefile check-cat # Test only cat
+make -f GNUmakefile check-ls # Test only ls
+```
+
+### Install Targets
+
+```sh
+make -f GNUmakefile stage # Copy binaries to out/bin/
+make -f GNUmakefile install # Install to $DESTDIR$PREFIX/bin
+make -f GNUmakefile install DESTDIR=/tmp/pkg # Staged install
+```
+
+### Information Targets
+
+```sh
+make -f GNUmakefile status # Show output directory contents
+make -f GNUmakefile list # List all utility subdirectories
+make -f GNUmakefile print-config # Show compiler and flags
+make -f GNUmakefile print-subdirs # List subdirectories
+make -f GNUmakefile help # Show available targets
+```
+
+### Rebuild and Reconfigure
+
+```sh
+make -f GNUmakefile rebuild # clean + all
+make -f GNUmakefile reconfigure # Re-run ./configure
+```
+
+## Cross-Compilation
+
+### Basic Cross-Compilation
+
+```sh
+./configure \
+ --host=aarch64-linux-musl \
+ --build=x86_64-linux-musl \
+ --cc=aarch64-linux-musl-gcc
+
+make -f GNUmakefile -j$(nproc) all
+```
+
+### Cross-Compilation with Clang
+
+```sh
+./configure \
+ --host=aarch64-linux-musl \
+ --cc="clang --target=aarch64-linux-musl --sysroot=/path/to/musl-sysroot"
+
+make -f GNUmakefile -j$(nproc) all
+```
+
+### Cross vs. Native Detection
+
+When `--host` matches `--build` (or both are auto-detected to the same
+value), `REQUIRE_RUNNABLE_CC=1` and the configure script verifies the
+compiler produces executables that can actually run. For cross-compilation,
+only compilation (not execution) is tested.
+
+## Build Customization
+
+### Custom Compiler Flags
+
+```sh
+# Debug build
+./configure --extra-cflags="-O0 -g3 -fsanitize=address,undefined"
+
+# Release build
+./configure --extra-cflags="-O3 -DNDEBUG -flto" --extra-ldflags="-flto"
+
+# With warnings
+./configure --extra-cflags="-Wall -Wextra -Werror"
+```
+
+### Custom Install Prefix
+
+```sh
+./configure --prefix=/opt/project-tick --bindir=/opt/project-tick/sbin
+make -f GNUmakefile -j$(nproc) all
+make -f GNUmakefile install
+```
+
+### Building Individual Utilities
+
+```sh
+# Configure once
+./configure
+
+# Build only what you need
+make -f GNUmakefile cat ls cp mv rm mkdir
+```
+
+### Forcing glibc
+
+```sh
+./configure --allow-glibc --cc=gcc
+```
+
+Note: The primary test target for corebinutils is musl. Building with glibc
+may expose minor differences in header availability or function signatures.
+
+## Troubleshooting
+
+### "no usable compiler found"
+
+The configure script could not find any C compiler that:
+1. Produces working executables
+2. Supports `<stdatomic.h>` (C11)
+3. Can run the output (native builds only)
+
+**Fix**: Install `musl-gcc` or `musl-clang`, or specify a compiler
+explicitly with `--cc=...`.
+
+### "glibc toolchain detected; refusing by default"
+
+The detected compiler links against glibc instead of musl.
+
+**Fix**: Install musl development tools or pass `--allow-glibc`.
+
+### Missing header warnings
+
+The configure log (`build/configure/config.log`) shows which headers were
+not found. Missing optional headers (e.g., `sys/acl.h`) disable related
+features but don't prevent building.
+
+### Linker errors for `-lcrypt` or `-lrt`
+
+Some utilities use optional libraries. If they're not found at configure
+time, the corresponding features are disabled. If you see linker errors:
+
+```sh
+# Check what was detected
+cat config.mk | grep CONF_LIBS
+```
+
+### Parallel build failures
+
+If `make -j$(nproc)` fails but `make -j1` succeeds, a subdirectory
+`GNUmakefile` may have missing dependencies. File a bug report.
+
+### Cleaning stale state
+
+```sh
+make -f GNUmakefile distclean
+./configure
+make -f GNUmakefile -j$(nproc) all
+```
+
+## Build Output Structure
+
+After a successful `make all && make stage`:
+
+```
+out/
+├── cat
+├── chmod
+├── cp
+├── csh
+├── date
+├── dd
+├── df
+├── echo
+├── ed
+├── expr
+├── hostname
+├── kill
+├── ln
+├── ls
+├── mkdir
+├── mv
+├── nproc
+├── pax
+├── ps
+├── pwd
+├── realpath
+├── rm
+├── rmdir
+├── sh
+├── sleep
+├── sync
+├── test
+├── timeout
+├── [ # Symlink or hardlink to test
+└── bin/ # Staged binaries (copies)
+```
+
+## CI Integration
+
+For CI pipelines, use the non-interactive build sequence:
+
+```sh
+#!/bin/sh
+set -eu
+
+cd corebinutils/
+./configure --prefix=/usr
+
+# Build with parallelism hinted by configure
+JOBS=$(grep -o 'JOBS_HINT := [0-9]*' GNUmakefile | cut -d' ' -f3)
+make -f GNUmakefile -j"${JOBS:-1}" all
+
+# Run tests (SKIP is OK, failures are not)
+make -f GNUmakefile test
+
+# Package
+make -f GNUmakefile install DESTDIR="$PWD/pkg"
+```
diff --git a/docs/handbook/corebinutils/cat.md b/docs/handbook/corebinutils/cat.md
new file mode 100644
index 0000000000..ddc2b842c5
--- /dev/null
+++ b/docs/handbook/corebinutils/cat.md
@@ -0,0 +1,211 @@
+# cat — Concatenate and Display Files
+
+## Overview
+
+`cat` reads files sequentially and writes their contents to standard output.
+It supports line numbering, non-printable character visualization, blank
+line squeezing, and efficient in-kernel copying.
+
+**Source**: `cat/cat.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+cat [-belnstuv] [file ...]
+```
+
+## Options
+
+| Flag | Long Form | Description |
+|------|-----------|-------------|
+| `-b` | — | Number non-blank output lines (starting at 1) |
+| `-e` | — | Display `$` at end of each line (implies `-v`) |
+| `-l` | — | Set exclusive advisory lock on stdout via `flock(2)` |
+| `-n` | — | Number all output lines |
+| `-s` | — | Squeeze multiple adjacent blank lines into one |
+| `-t` | — | Display TAB as `^I` (implies `-v`) |
+| `-u` | — | Disable output buffering (write immediately) |
+| `-v` | — | Visualize non-printing characters using `^X` and `M-X` notation |
+
+## Source Analysis
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Option parsing via `getopt(3)`, dispatch to `scanfiles()` |
+| `usage()` | Print usage message and exit |
+| `scanfiles()` | Iterate over file arguments, handle `-` as stdin |
+| `cook_cat()` | Process input with formatting (numbering, visualization, squeezing) |
+| `raw_cat()` | Fast buffer-based copy without formatting |
+| `in_kernel_copy()` | Zero-copy via `copy_file_range(2)` syscall |
+| `init_casper()` | BSD Capsicum sandbox init (disabled on Linux) |
+| `init_casper_net()` | BSD Casper network service (disabled on Linux) |
+| `udom_open()` | Unix domain socket support (disabled on Linux) |
+
+### Option Processing
+
+```c
+while ((ch = getopt(argc, argv, "belnstuv")) != -1)
+ switch (ch) {
+ case 'b': bflag = nflag = 1; break; /* implies -n */
+ case 'e': eflag = vflag = 1; break; /* implies -v */
+ case 'l': lflag = 1; break;
+ case 'n': nflag = 1; break;
+ case 's': sflag = 1; break;
+ case 't': tflag = vflag = 1; break; /* implies -v */
+ case 'u': setbuf(stdout, NULL); break;
+ case 'v': vflag = 1; break;
+ default: usage();
+ }
+```
+
+### I/O Strategy: Three Modes
+
+`cat` selects among three output strategies based on which flags are active:
+
+1. **`in_kernel_copy()`** — When no formatting flags are set and the output
+ supports `copy_file_range(2)`, data moves directly between file
+ descriptors inside the kernel, never entering user space.
+
+2. **`raw_cat()`** — When no formatting is needed but `copy_file_range` is
+ unavailable (e.g., stdin is a pipe). Uses an adaptive read buffer.
+
+3. **`cook_cat()`** — When any formatting flag (`-b`, `-e`, `-n`, `-s`,
+ `-t`, `-v`) is active. Processes each character individually.
+
+### Adaptive Buffer Sizing
+
+`raw_cat()` dynamically sizes its read buffer based on available physical
+memory:
+
+```c
+#define PHYSPAGES_THRESHOLD (32*1024)
+#define BUFSIZE_MAX (2*1024*1024) /* 2 MB */
+#define BUFSIZE_SMALL (128*1024) /* 128 KB */
+
+if (sysconf(_SC_PHYS_PAGES) > PHYSPAGES_THRESHOLD)
+ bsize = MIN(BUFSIZE_MAX, MAXPHYS * 8);
+else
+ bsize = BUFSIZE_SMALL;
+```
+
+On systems with more than 128 MB of RAM (`32K × 4K pages`), cat uses up
+to 2 MB buffers. On constrained systems, it falls back to 128 KB.
+
+### In-Kernel Copy
+
+When possible, `cat` uses the Linux `copy_file_range(2)` syscall for
+zero-copy I/O:
+
+```c
+static int
+in_kernel_copy(int from_fd, int to_fd)
+{
+ ssize_t ret;
+
+ do {
+ ret = copy_file_range(from_fd, NULL, to_fd, NULL, SSIZE_MAX, 0);
+ } while (ret > 0);
+
+ return (ret == 0) ? 0 : -1;
+}
+```
+
+This avoids two context switches per block (kernel→user for read,
+user→kernel for write) and can be significantly faster for large files.
+
+### Character Visualization
+
+When `-v` is active, `cook_cat()` renders non-printable characters:
+
+| Character Range | Rendering | Example |
+|----------------|-----------|---------|
+| `0x00–0x1F` | `^@` to `^_` | `^C` for ETX |
+| `0x7F` | `^?` | DEL character |
+| `0x80–0x9F` | `M-^@` to `M-^_` | Meta-control |
+| `0xA0–0xFE` | `M- ` to `M-~` | Meta-printable |
+| `0xFF` | `M-^?` | Meta-DEL |
+| TAB (`0x09`) | `^I` (with `-t`) or literal | |
+| Newline | `$\n` (with `-e`) or `\n` | |
+
+### Locale Support
+
+`cat` calls `setlocale(LC_CTYPE, "")` for wide character handling. In
+multibyte locales, the `-v` flag considers locale-specific printability
+via `iswprint(3)`. In the C locale, only ASCII printable characters
+pass through unmodified.
+
+### Lock Mode
+
+The `-l` flag acquires an exclusive advisory lock on stdout before
+writing:
+
+```c
+if (lflag)
+ flock(STDOUT_FILENO, LOCK_EX);
+```
+
+This prevents interleaved output when multiple `cat` processes write to
+the same file simultaneously. The lock is held for the entire duration
+of the program.
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `open(2)` | Open input files |
+| `read(2)` | Read file data into buffer |
+| `write(2)` | Write processed data to stdout |
+| `copy_file_range(2)` | Zero-copy kernel-to-kernel transfer |
+| `flock(2)` | Advisory locking (with `-l`) |
+| `fstat(2)` | Get file type for I/O strategy selection |
+| `sysconf(3)` | Query physical page count for buffer sizing |
+
+## BSD Features Disabled on Linux
+
+Several BSD-specific features are compiled out on Linux:
+
+- **Capsicum sandbox** (`cap_enter`, `cap_rights_limit`): The
+ `init_casper()` function is a no-op stub on Linux.
+- **Unix domain socket reading** (`udom_open()`): BSD `cat` can read
+ from Unix sockets via `connect(2)`. Disabled on Linux.
+- **`O_RESOLVE_BENEATH`**: BSD sandbox path resolution flag. Defined to 0
+ on Linux.
+
+## Examples
+
+```sh
+# Concatenate files
+cat file1.txt file2.txt > combined.txt
+
+# Number non-blank lines
+cat -b source.c
+
+# Show invisible characters
+cat -vet binary_file
+
+# Squeeze blank lines and number
+cat -sn logfile.txt
+
+# Lock stdout for atomic output
+cat -l data.csv >> shared_output.csv
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | All files read successfully |
+| 1 | Error opening or reading a file |
+
+## Edge Cases
+
+- Reading from stdin: When no files are specified or `-` is given, cat
+ reads from standard input.
+- Empty files: Produce no output but are not errors.
+- Binary files: Processed byte-by-byte; `-v` makes them viewable.
+- Named pipes and devices: `raw_cat()` handles them with buffered reads.
+ `copy_file_range` is not attempted on non-regular files.
diff --git a/docs/handbook/corebinutils/chmod.md b/docs/handbook/corebinutils/chmod.md
new file mode 100644
index 0000000000..b5f8a22886
--- /dev/null
+++ b/docs/handbook/corebinutils/chmod.md
@@ -0,0 +1,296 @@
+# chmod — Change File Permissions
+
+## Overview
+
+`chmod` changes the file mode (permission) bits of specified files. It supports
+both symbolic and numeric (octal) mode specifications, recursive directory
+traversal, symlink handling policies, ACL awareness, and verbose operation.
+
+**Source**: `chmod/chmod.c`, `chmod/mode.c`, `chmod/mode.h`
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+chmod [-fhvR [-H | -L | -P]] mode file ...
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-R` | Recursive: change files and directories recursively |
+| `-H` | Follow symlinks on the command line (with `-R` only) |
+| `-L` | Follow all symbolic links (with `-R` only) |
+| `-P` | Do not follow symbolic links (default with `-R`) |
+| `-f` | Force: suppress most error messages |
+| `-h` | Affect symlinks themselves, not their targets |
+| `-v` | Verbose: print changed files |
+| `-vv` | Very verbose: print all files, whether changed or not |
+
+## Source Analysis
+
+### chmod.c — Main Implementation
+
+#### Key Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options via `getopt(3)`, compile mode, dispatch traversal |
+| `walk_path()` | Stat a path and decide how to process it |
+| `walk_dir()` | Enumerate directory contents for recursive processing |
+| `apply_mode()` | Compile mode, apply via `fchmodat(2)`, report changes |
+| `stat_path()` | Wrapper choosing between `stat(2)` and `lstat(2)` |
+| `should_skip_acl_check()` | Cache per-filesystem ACL support detection |
+| `visited_push()` / `visited_check()` | Cycle detection via device/inode tracking |
+| `siginfo_handler()` | Handle SIGINFO/SIGUSR1 for progress reporting |
+| `join_path()` | Safe path concatenation with separator handling |
+
+#### Option Processing
+
+```c
+while ((ch = getopt(argc, argv, "HLPRfhv")) != -1)
+ switch (ch) {
+ case 'H': Hflag = 1; Lflag = 0; break;
+ case 'L': Lflag = 1; Hflag = 0; break;
+ case 'P': Hflag = Lflag = 0; break;
+ case 'R': Rflag = 1; break;
+ case 'f': fflag = 1; break;
+ case 'h': hflag = 1; break;
+ case 'v': vflag++; break; /* -v increments, -vv = 2 */
+ default: usage();
+ }
+```
+
+#### Recursive Traversal
+
+The `-R` flag triggers recursive directory traversal. `chmod` implements its
+own traversal with cycle detection rather than using `fts(3)`:
+
+```c
+static int
+walk_dir(const char *dir_path, const struct chmod_options *opts)
+{
+ DIR *dp;
+ struct dirent *de;
+ char *child_path;
+ int ret = 0;
+
+ dp = opendir(dir_path);
+ while ((de = readdir(dp)) != NULL) {
+ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+ continue;
+ child_path = join_path(dir_path, de->d_name);
+ ret |= walk_path(child_path, opts, false);
+ free(child_path);
+ }
+ closedir(dp);
+ return ret;
+}
+```
+
+#### Cycle Detection
+
+To prevent infinite traversal through symlink loops or bind mounts, `chmod`
+maintains a visited-path stack keyed on `(dev, ino)` pairs:
+
+```c
+static int visited_check(dev_t dev, ino_t ino); /* returns 1 if seen */
+static void visited_push(dev_t dev, ino_t ino); /* record as visited */
+```
+
+### mode.c — Mode Parsing Library
+
+#### Data Types
+
+```c
+typedef struct {
+ int cmd; /* '+', '-', '=', 'X', 'u', 'g', 'o' */
+ mode_t bits; /* Permission bits to modify */
+ mode_t who; /* Scope mask (user/group/other/all) */
+} bitcmd_t;
+```
+
+#### Key Functions
+
+| Function | Purpose |
+|----------|---------|
+| `mode_compile()` | Parse mode string into array of `bitcmd_t` operations |
+| `mode_apply()` | Apply compiled mode to an existing `mode_t` value |
+| `mode_free()` | Free compiled mode array |
+| `strmode()` | Convert `mode_t` to display string like `"drwxr-xr-x "` |
+| `get_current_umask()` | Atomically read process umask |
+
+#### Numeric Mode Parsing
+
+Numeric modes are parsed as octal:
+
+```c
+if (isdigit(*mode_string)) {
+ /* Parse octal: 755 → rwxr-xr-x, 0644 → rw-r--r-- */
+ val = strtol(mode_string, &ep, 8);
+ /* Set bits directly, clearing old permission bits */
+}
+```
+
+#### Symbolic Mode Parsing
+
+Symbolic modes follow the grammar:
+
+```
+mode ::= clause [, clause ...]
+clause ::= [who ...] [action ...] action
+who ::= 'u' | 'g' | 'o' | 'a'
+action ::= op [perm ...]
+op ::= '+' | '-' | '='
+perm ::= 'r' | 'w' | 'x' | 'X' | 's' | 't' | 'u' | 'g' | 'o'
+```
+
+Examples:
+- `u+rwx` — Add read/write/execute for user
+- `go-w` — Remove write for group and other
+- `a=rx` — Set all to read+execute only
+- `u=g` — Copy group permissions to user
+- `+X` — Add execute only if already executable or is a directory
+- `u+s` — Set SUID bit
+- `g+s` — Set SGID bit
+- `+t` — Set sticky bit
+
+#### The 'X' Permission
+
+The conditional execute permission `X` is a special case:
+
+```c
+/* 'X' only adds execute if:
+ * - The file is a directory, OR
+ * - Any execute bit is already set */
+if (cmd == 'X') {
+ if (S_ISDIR(old_mode) || (old_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
+ /* apply execute bits */
+}
+```
+
+This is commonly used with `-R` to make directories traversable without
+making regular files executable: `chmod -R u+rwX,go+rX dir/`
+
+#### Mode Compilation
+
+The `mode_compile()` function translates a mode string into an array of
+`bitcmd_t` instructions that can be applied to any `mode_t`:
+
+```c
+bitcmd_t *mode_compile(const char *mode_string);
+
+/* Usage: */
+bitcmd_t *set = mode_compile("u+rw,go+r");
+mode_t new_mode = mode_apply(set, old_mode);
+mode_free(set);
+```
+
+This two-phase approach lets the mode be parsed once and applied to many
+files during recursive traversal.
+
+#### strmode() Function
+
+Converts a numeric `mode_t` into a human-readable string:
+
+```c
+char buf[12];
+strmode(0100755, buf); /* "drwxr-xr-x " → for directories */
+strmode(0100644, buf); /* "-rw-r--r-- " → for regular files */
+```
+
+The output is always 11 characters: type + 9 permission chars + space.
+
+### Umask Interaction
+
+When no scope (`u`, `g`, `o`, `a`) is specified in a symbolic mode, the
+umask determines which bits are affected. The umask is read atomically:
+
+```c
+static mode_t
+get_current_umask(void)
+{
+ mode_t mask;
+ sigset_t set, oset;
+
+ sigfillset(&set);
+ sigprocmask(SIG_BLOCK, &set, &oset);
+ mask = umask(0);
+ umask(mask);
+ sigprocmask(SIG_SETMASK, &oset, NULL);
+ return mask;
+}
+```
+
+Signals are blocked during the read-restore cycle to prevent another
+thread or signal handler from seeing a zero umask.
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `fchmodat(2)` | Apply permission changes |
+| `fstatat(2)` | Get current file mode |
+| `lstat(2)` | Stat without following symlinks |
+| `opendir(3)` / `readdir(3)` | Directory traversal |
+| `sigaction(2)` | Install SIGINFO handler |
+| `umask(2)` | Read current umask |
+
+## ACL Integration
+
+`chmod` is aware of POSIX ACLs. When changing permissions on a file with
+ACLs, the ACL mask entry may need updating. The `should_skip_acl_check()`
+function caches whether a filesystem supports ACLs to avoid repeated
+`pathconf()` calls:
+
+```c
+static bool
+should_skip_acl_check(const char *path)
+{
+ /* Cache per-device ACL support to avoid pathconf() on every file */
+}
+```
+
+## Examples
+
+```sh
+# Set exact permissions
+chmod 755 script.sh
+chmod 0644 config.txt
+
+# Add execute for user
+chmod u+x program
+
+# Recursive: directories traversable, files not executable
+chmod -R u+rwX,go+rX project/
+
+# Remove write for everyone except owner
+chmod go-w important.txt
+
+# Copy group permissions to other
+chmod o=g shared_file
+
+# Set SUID
+chmod u+s /usr/local/bin/helper
+
+# Verbose mode
+chmod -Rv 755 bin/
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | All files changed successfully |
+| 1 | Error changing one or more files (partial failure) |
+
+## Differences from GNU chmod
+
+- No `--reference=FILE` option
+- No `--changes` (use `-v`)
+- `-h` flag affects symlinks (GNU uses `--no-dereference`)
+- `-vv` for very verbose (GNU only has one `-v` level)
+- ACL awareness is filesystem-dependent
+- Mode compiler supports `u=g` (copy from group to user)
diff --git a/docs/handbook/corebinutils/code-style.md b/docs/handbook/corebinutils/code-style.md
new file mode 100644
index 0000000000..2461903725
--- /dev/null
+++ b/docs/handbook/corebinutils/code-style.md
@@ -0,0 +1,351 @@
+# Code Style — Corebinutils Conventions
+
+## Overview
+
+The corebinutils codebase follows FreeBSD kernel style (KNF) with
+Linux-specific adaptations. This document catalogs the coding
+conventions observed across all utilities.
+
+## File Organization
+
+### Standard File Layout
+
+```c
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) YYYY Project Tick
+ * Copyright (c) YYYY The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution notice ...
+ */
+
+/* System headers (alphabetical) */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/* Local headers */
+#include "utility.h"
+
+/* Macros */
+#define BUFSIZE (128 * 1024)
+
+/* Types and structs */
+struct options { ... };
+
+/* Static function prototypes */
+static void usage(void);
+
+/* Static globals */
+static const char *progname;
+
+/* Functions (main last or first, utility-dependent) */
+```
+
+### Header Guard Style
+
+```c
+/* No #pragma once — uses traditional guards */
+#ifndef _DD_H_
+#define _DD_H_
+/* ... */
+#endif /* !_DD_H_ */
+```
+
+## Naming Conventions
+
+### Functions
+
+- **Lowercase with underscores**: `parse_args()`, `copy_file_data()`
+- **Static for file-scope**: All non-`main` functions are `static` unless
+ needed by other translation units
+- **Verb-first**: `read_file()`, `write_output()`, `parse_duration()`
+- **Predicate prefix**: `is_directory()`, `should_recurse()`, `has_flag()`
+
+### Variables
+
+- **Short names in small scopes**: `n`, `p`, `ch`, `sb`, `dp`
+- **Descriptive names in structs**: `suppress_newline`, `follow_mode`
+- **Constants as macros**: `BUFSIZE`, `EXIT_TIMEOUT`, `COMMLEN`
+- **Global flags**: Single-word or abbreviated: `verbose`, `force`, `rflag`
+
+### Struct Naming
+
+```c
+/* Tagged structs (no typedef for most) */
+struct options { ... };
+struct mount_entry { ... };
+
+/* Typedefs only for opaque or complex types */
+typedef struct line line_t;
+typedef struct { ... } bitcmd_t;
+typedef regex_t pattern_t;
+```
+
+## Option Processing
+
+### getopt(3) Pattern
+
+```c
+while ((ch = getopt(argc, argv, "fhilnRsvwx")) != -1) {
+ switch (ch) {
+ case 'f':
+ opts.force = true;
+ break;
+ case 'v':
+ opts.verbose = true;
+ break;
+ /* ... */
+ default:
+ usage();
+ }
+}
+argc -= optind;
+argv += optind;
+```
+
+### getopt_long(3) Pattern
+
+```c
+static const struct option long_options[] = {
+ {"color", optional_argument, NULL, 'G'},
+ {"group-directories-first", no_argument, NULL, OPT_GROUPDIRS},
+ {NULL, 0, NULL, 0},
+};
+
+while ((ch = getopt_long(argc, argv, optstring,
+ long_options, NULL)) != -1) { ... }
+```
+
+### Manual Parsing (echo)
+
+```c
+/* When getopt is too heavy */
+while (*argv && strcmp(*argv, "-n") == 0) {
+ suppress_newline = true;
+ argv++;
+}
+```
+
+## Error Handling
+
+### BSD err(3) Family
+
+```c
+#include <err.h>
+
+err(1, "open '%s'", path); /* perror-style with exit */
+errx(2, "invalid mode: %s", s); /* No errno, with exit */
+warn("stat '%s'", path); /* perror-style, no exit */
+warnx("skipping '%s'", path); /* No errno, no exit */
+```
+
+### Custom Error Functions
+
+Many utilities define their own for consistency:
+
+```c
+static void
+error_errno(const char *fmt, ...)
+{
+ int saved = errno;
+ va_list ap;
+ fprintf(stderr, "%s: ", progname);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fprintf(stderr, ": %s\n", strerror(saved));
+}
+
+static void
+error_msg(const char *fmt, ...)
+{
+ va_list ap;
+ fprintf(stderr, "%s: ", progname);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+}
+```
+
+### Usage Functions
+
+```c
+static void __dead2 /* noreturn attribute */
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-fiv] source target\n", progname);
+ exit(2);
+}
+```
+
+## Memory Management
+
+### Dynamic Allocation Patterns
+
+```c
+/* Always check allocation */
+char *buf = malloc(size);
+if (buf == NULL)
+ err(1, "malloc");
+
+/* strdup with check */
+char *copy = strdup(str);
+if (copy == NULL)
+ err(1, "strdup");
+
+/* Adaptive buffer sizing */
+size_t bufsize = BUFSIZE_MAX;
+while (bufsize >= BUFSIZE_MIN) {
+ buf = malloc(bufsize);
+ if (buf) break;
+ bufsize /= 2;
+}
+```
+
+### No Global malloc/free Tracking
+
+Utilities that process-exit after completion do not free final
+allocations — the OS reclaims all memory. Early-exit utilities
+(cat, echo, pwd) rely on this.
+
+## Portability Patterns
+
+### Conditional Compilation
+
+```c
+/* Feature detection from configure */
+#ifdef HAVE_SYS_XATTR_H
+#include <sys/xattr.h>
+#endif
+
+/* BSD vs Linux */
+#ifdef __linux__
+ /* Linux-specific path */
+#else
+ /* BSD fallback (rarely used) */
+#endif
+
+/* musl compatibility */
+#ifndef STAILQ_HEAD
+#define STAILQ_HEAD(name, type) ...
+#endif
+```
+
+### Inline Syscall Wrappers
+
+```c
+/* For syscalls not in musl headers */
+static int
+linux_statx(int dirfd, const char *path, int flags,
+ unsigned int mask, struct statx *stx)
+{
+ return syscall(__NR_statx, dirfd, path, flags, mask, stx);
+}
+```
+
+## Formatting
+
+### Indentation
+
+- **Tabs** for indentation (KNF style)
+- **8-space tab stops** (standard)
+- Continuation lines indented 4 spaces from operator
+
+### Braces
+
+```c
+/* K&R for functions */
+static void
+function_name(int arg)
+{
+ /* body */
+}
+
+/* Same-line for control flow */
+if (condition) {
+ /* body */
+} else {
+ /* body */
+}
+
+/* No braces for single statements */
+if (error)
+ return -1;
+```
+
+### Line Length
+
+- Target 80 columns
+- Long function signatures wrap at parameter boundaries
+- Long strings use concatenation
+
+### Switch Statements
+
+```c
+switch (ch) {
+case 'f':
+ force = true;
+ break;
+case 'v':
+ verbose++;
+ break;
+default:
+ usage();
+ /* NOTREACHED */
+}
+```
+
+## Common Macros
+
+```c
+/* Array size */
+#define nitems(x) (sizeof(x) / sizeof((x)[0]))
+
+/* Min/Max */
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+/* Noreturn */
+#define __dead2 __attribute__((__noreturn__))
+
+/* Unused parameter */
+#define __unused __attribute__((__unused__))
+```
+
+## Signal Handling Conventions
+
+```c
+/* Volatile sig_atomic_t for signal flags */
+static volatile sig_atomic_t info_requested;
+
+/* Minimal signal handlers (set flag only) */
+static void
+handler(int sig)
+{
+ (void)sig;
+ info_requested = 1;
+}
+
+/* Check flag in main loop */
+if (info_requested) {
+ report_progress();
+ info_requested = 0;
+}
+```
+
+## Build System Conventions
+
+- Per-utility `GNUmakefile` is the source of truth
+- Top-level `GNUmakefile` generated by `configure`
+- All object files go to `build/<utility>/`
+- Final binaries go to `out/bin/`
+- `CFLAGS` include `-Wall -Wextra -Werror` by default
diff --git a/docs/handbook/corebinutils/cp.md b/docs/handbook/corebinutils/cp.md
new file mode 100644
index 0000000000..b15bb01a2d
--- /dev/null
+++ b/docs/handbook/corebinutils/cp.md
@@ -0,0 +1,270 @@
+# cp — Copy Files and Directories
+
+## Overview
+
+`cp` copies files and directory trees. It supports recursive copying, archive
+mode (preserving metadata), symlink handling policies, sparse file detection,
+and interactive/forced overwrite modes.
+
+**Source**: `cp/cp.c`, `cp/utils.c`, `cp/extern.h`, `cp/fts.c`, `cp/fts.h`
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+cp [-HLPRafilnpsvx] [--sort] [-N mode] source_file target_file
+cp [-HLPRafilnpsvx] [--sort] [-N mode] source_file ... target_directory
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-R` / `-r` | Recursive: copy directories and their contents |
+| `-a` | Archive mode: equivalent to `-R -P -p` |
+| `-f` | Force: remove existing target before copying |
+| `-i` | Interactive: prompt before overwriting |
+| `-l` | Create hard links instead of copying |
+| `-n` | No-clobber: do not overwrite existing files |
+| `-p` | Preserve: maintain mode, ownership, timestamps |
+| `-s` | Create symbolic links instead of copying |
+| `-v` | Verbose: print each file as it is copied |
+| `-x` | One-filesystem: do not cross mount points |
+| `-H` | Follow symlinks on command line (with `-R`) |
+| `-L` | Follow all symbolic links (with `-R`) |
+| `-P` | Do not follow symbolic links (default with `-R`) |
+| `--sort` | Sort entries numerically during recursive copy |
+| `-N mode` | Apply negated permissions to regular files |
+
+## Source Analysis
+
+### cp.c — Main Logic
+
+#### Key Data Structures
+
+```c
+typedef struct {
+ char *p_end; /* Pointer to NULL at end of path */
+ char *target_end; /* Pointer to end of target base */
+ char p_path[PATH_MAX]; /* Current target path buffer */
+ int p_fd; /* Directory file descriptor */
+} PATH_T;
+
+struct options {
+ bool recursive;
+ bool force;
+ bool interactive;
+ bool no_clobber;
+ bool preserve;
+ bool hard_link;
+ bool symbolic_link;
+ bool verbose;
+ bool one_filesystem;
+ /* ... */
+};
+```
+
+#### Key Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options, stat destination, determine copy mode |
+| `copy()` | Main recursive copy driver using FTS traversal |
+| `ftscmp()` | qsort comparator for `--sort` numeric ordering |
+| `local_strlcpy()` | Portability wrapper for `strlcpy` |
+| `local_asprintf()` | Portability wrapper for `asprintf` |
+
+#### Copy Mode Detection
+
+`cp` determines the operation mode from the arguments:
+
+```c
+/* Three cases:
+ * 1. cp file1 file2 → file-to-file copy
+ * 2. cp file1 file2 dir/ → files into directory
+ * 3. cp -R dir1 dir2 → directory to new directory
+ */
+if (stat(target, &sb) == 0 && S_ISDIR(sb.st_mode))
+ type = DIR_TO_DIR;
+else if (argc == 2)
+ type = FILE_TO_FILE;
+else
+ usage(); /* Multiple sources require directory target */
+```
+
+### utils.c — Copy Engine
+
+#### Adaptive Buffer Sizing
+
+Like `cat`, `cp` adapts its I/O buffer to available memory:
+
+```c
+#define PHYSPAGES_THRESHOLD (32*1024)
+#define BUFSIZE_MAX (2*1024*1024)
+#define BUFSIZE_SMALL (MAXPHYS) /* 128 KB */
+
+static ssize_t
+copy_fallback(int from_fd, int to_fd)
+{
+ if (buf == NULL) {
+ if (sysconf(_SC_PHYS_PAGES) > PHYSPAGES_THRESHOLD)
+ bufsize = MIN(BUFSIZE_MAX, MAXPHYS * 8);
+ else
+ bufsize = BUFSIZE_SMALL;
+ buf = malloc(bufsize);
+ }
+ /* read/write loop */
+}
+```
+
+#### Key Functions in utils.c
+
+| Function | Purpose |
+|----------|---------|
+| `copy_fallback()` | Buffer-based file copy with adaptive sizing |
+| `copy_file()` | Copy regular file, potentially using `copy_file_range(2)` |
+| `copy_link()` | Copy symbolic link (read target, create new symlink) |
+| `copy_fifo()` | Copy FIFO via `mkfifo(2)` |
+| `copy_special()` | Copy device nodes via `mknod(2)` |
+| `setfile()` | Set timestamps, ownership, permissions on target |
+| `preserve_fd_acls()` | Copy POSIX ACLs between file descriptors |
+
+### FTS Traversal
+
+`cp -R` uses an in-tree FTS (File Traversal Stream) implementation:
+
+```c
+FTS *ftsp;
+FTSENT *curr;
+int fts_options = FTS_NOCHDIR | FTS_PHYSICAL;
+
+if (Lflag)
+ fts_options &= ~FTS_PHYSICAL;
+ fts_options |= FTS_LOGICAL;
+
+ftsp = fts_open(argv, fts_options, NULL);
+while ((curr = fts_read(ftsp)) != NULL) {
+ switch (curr->fts_info) {
+ case FTS_D: /* Directory pre-visit */
+ mkdir(target_path, curr->fts_statp->st_mode);
+ break;
+ case FTS_F: /* Regular file */
+ copy_file(curr->fts_path, target_path);
+ break;
+ case FTS_SL: /* Symbolic link */
+ copy_link(curr->fts_path, target_path);
+ break;
+ case FTS_DP: /* Directory post-visit */
+ setfile(curr->fts_statp, target_path);
+ break;
+ }
+}
+```
+
+### Symlink Handling Modes
+
+| Mode | Flag | Behavior |
+|------|------|----------|
+| Physical | `-P` (default) | Copy symlinks as symlinks |
+| Command-line follow | `-H` | Follow symlinks named on command line |
+| Logical | `-L` | Follow all symlinks, copy targets |
+
+### Archive Mode
+
+The `-a` flag combines three flags for complete archival:
+
+```sh
+cp -a source/ dest/
+# Equivalent to:
+cp -R -P -p source/ dest/
+```
+
+- `-R` — Recursive copy
+- `-P` — Don't follow symlinks (preserve them as-is)
+- `-p` — Preserve timestamps, ownership, and permissions
+
+### Metadata Preservation (`-p`)
+
+When `-p` is specified, `cp` preserves:
+
+| Metadata | System Call |
+|----------|-------------|
+| Access time | `utimensat(2)` |
+| Modification time | `utimensat(2)` |
+| File mode | `fchmod(2)` / `chmod(2)` |
+| Owner/group | `fchown(2)` / `lchown(2)` |
+| ACLs | `acl_get_fd()` / `acl_set_fd()` (if available) |
+
+### Cycle Detection
+
+During recursive copy, `cp` tracks visited directories by `(dev, ino)`
+pairs to detect filesystem cycles created by symlinks or bind mounts:
+
+```c
+/* If we've already visited this inode on this device, skip it */
+if (cycle_check(curr->fts_statp->st_dev, curr->fts_statp->st_ino)) {
+ warnx("%s: directory causes a cycle", curr->fts_path);
+ fts_set(ftsp, curr, FTS_SKIP);
+ continue;
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `open(2)` | Open source and target files |
+| `read(2)` / `write(2)` | Buffer-based data copy |
+| `copy_file_range(2)` | Zero-copy in-kernel transfer |
+| `mkdir(2)` | Create target directories |
+| `mkfifo(2)` | Create FIFO copies |
+| `mknod(2)` | Create device node copies |
+| `symlink(2)` | Create symbolic links |
+| `link(2)` | Create hard links (with `-l`) |
+| `readlink(2)` | Read symlink target |
+| `fchmod(2)` | Set permissions on target |
+| `fchown(2)` | Set ownership on target |
+| `utimensat(2)` | Set timestamps on target |
+| `fstat(2)` | Check file type and metadata |
+
+## Examples
+
+```sh
+# Simple file copy
+cp source.txt dest.txt
+
+# Recursive directory copy
+cp -R src/ dest/
+
+# Archive mode (preserve everything)
+cp -a project/ backup/project/
+
+# Interactive overwrite
+cp -i newfile.txt existing.txt
+
+# Create hard links instead of copies
+cp -l large_file.dat link_to_large_file.dat
+
+# Don't cross filesystem boundaries
+cp -Rx /home/user/ /backup/home/user/
+
+# Verbose recursive copy
+cp -Rv config/ /etc/myapp/
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | All files copied successfully |
+| 1 | Error copying one or more files |
+
+## Differences from GNU cp
+
+- No `--reflink` option for CoW copies
+- No `--sparse=auto/always/never` (sparse handling is automatic)
+- `--sort` flag for sorted recursive output (not in GNU)
+- `-N` flag for negated permissions (not in GNU)
+- Uses in-tree FTS instead of gnulib
+- No SELinux context preservation (use `--preserve=context` in GNU)
diff --git a/docs/handbook/corebinutils/date.md b/docs/handbook/corebinutils/date.md
new file mode 100644
index 0000000000..d498f406a5
--- /dev/null
+++ b/docs/handbook/corebinutils/date.md
@@ -0,0 +1,352 @@
+# date — Display and Set System Date
+
+## Overview
+
+`date` displays the current date and time, or sets the system clock. It
+supports strftime-based format strings, ISO 8601 output, RFC 2822 output,
+timezone overrides, date arithmetic via "vary" adjustments, and input
+date parsing.
+
+**Source**: `date/date.c`, `date/vary.c`, `date/vary.h`
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause / BSD-2-Clause (vary.c)
+
+## Synopsis
+
+```
+date [-jnRu] [-r seconds | filename] [-I[date|hours|minutes|seconds|ns]]
+ [-f input_fmt] [-v [+|-]val[ymwdHMS]] [-z output_zone]
+ [+output_format] [[[[[cc]yy]mm]dd]HH]MM[.ss]]
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-j` | Do not try to set the system clock |
+| `-n` | Same as `-j` (compatibility) |
+| `-R` | RFC 2822 format output |
+| `-u` | Use UTC instead of local time |
+| `-r seconds` | Display time from epoch seconds |
+| `-r filename` | Display modification time of file |
+| `-I[precision]` | ISO 8601 format output |
+| `-f input_fmt` | Parse input date using strptime format |
+| `-v adjustment` | Adjust date components (can be repeated) |
+| `-z timezone` | Use specified timezone for output |
+
+## Source Analysis
+
+### date.c — Main Implementation
+
+#### Key Data Structures
+
+```c
+struct iso8601_fmt {
+ const char *refname; /* "date", "hours", "minutes", etc. */
+ const char *format_string; /* strftime format */
+ bool include_zone; /* Whether to append timezone */
+};
+
+struct strbuf {
+ char *data;
+ size_t len;
+ size_t cap;
+};
+
+struct options {
+ const char *input_format; /* -f format string */
+ const char *output_zone; /* -z timezone */
+ const char *reference_arg; /* -r argument */
+ const char *time_operand; /* MMDDhhmm... or parsed date */
+ const char *format_operand; /* +format string */
+ struct vary *vary_chain; /* -v adjustments */
+ const struct iso8601_fmt *iso8601_selected;
+ bool no_set; /* -j flag */
+ bool rfc2822; /* -R flag */
+ bool use_utc; /* -u flag */
+};
+```
+
+#### ISO 8601 Formats
+
+```c
+static const struct iso8601_fmt iso8601_fmts[] = {
+ { "date", "%Y-%m-%d", false },
+ { "hours", "%Y-%m-%dT%H", true },
+ { "minutes", "%Y-%m-%dT%H:%M", true },
+ { "seconds", "%Y-%m-%dT%H:%M:%S", true },
+ { "ns", "%Y-%m-%dT%H:%M:%S,%N", true },
+};
+```
+
+#### Key Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options, resolve time, format output |
+| `parse_args()` | Option-by-option argument processing |
+| `validate_options()` | Check for conflicting options |
+| `set_timezone_or_die()` | Apply timezone via `setenv("TZ", ...)` |
+| `read_reference_time()` | Get time from `-r` argument (epoch or file mtime) |
+| `read_current_time()` | Get current time via `clock_gettime(2)` |
+| `parse_legacy_time()` | Parse `[[[[cc]yy]mm]dd]HH]MM[.ss]` format |
+| `parse_formatted_time()` | Parse via `strptime(3)` with `-f` format |
+| `parse_time_operand()` | Dispatch to legacy or formatted parser |
+| `set_system_time()` | Set clock via `clock_settime(2)` |
+| `apply_variations()` | Apply `-v` adjustments to broken-down time |
+| `expand_format_string()` | Expand `%N` (nanoseconds) in format strings |
+| `render_format()` | Format via `strftime(3)` with extensions |
+| `render_iso8601()` | Generate ISO 8601 output with timezone |
+| `render_numeric_timezone()` | Format `+HHMM` timezone offset |
+| `print_line_and_exit()` | Write output and exit |
+
+#### Main Flow
+
+```c
+int main(int argc, char **argv)
+{
+ parse_args(argc, argv, &options);
+ validate_options(&options);
+ setlocale(LC_TIME, "");
+
+ if (options.use_utc)
+ set_timezone_or_die("UTC0", "TZ=UTC0");
+
+ if (options.reference_arg != NULL)
+ read_reference_time(options.reference_arg, &ts);
+ else
+ read_current_time(&ts, &resolution);
+
+ if (options.time_operand != NULL) {
+ parse_time_operand(&options, &ts, &ts);
+ if (!options.no_set)
+ set_system_time(&ts);
+ }
+
+ localtime_or_die(ts.tv_sec, &tm);
+ apply_variations(&options, &tm);
+
+ /* Render output based on -I, -R, or +format */
+ output = render_format(format, &tm, ts.tv_nsec, resolution.tv_nsec);
+ print_line_and_exit(output);
+}
+```
+
+#### String Buffer Implementation
+
+`date.c` includes a custom growable string buffer for format expansion:
+
+```c
+static void strbuf_init(struct strbuf *buf);
+static void strbuf_reserve(struct strbuf *buf, size_t extra);
+static void strbuf_append_mem(struct strbuf *buf, const char *data, size_t len);
+static void strbuf_append_char(struct strbuf *buf, char ch);
+static void strbuf_append_str(struct strbuf *buf, const char *text);
+static char *strbuf_finish(struct strbuf *buf);
+```
+
+#### Nanosecond Format Extension
+
+The `%N` format specifier (not in standard `strftime`) is expanded
+manually before passing to `strftime(3)`:
+
+```c
+static void
+append_nsec_digits(struct strbuf *buf, const char *pending, size_t len,
+ long nsec, long resolution)
+{
+ /* Format nanoseconds with appropriate precision based on resolution */
+}
+```
+
+### vary.c — Date Arithmetic
+
+The `-v` flag enables relative date adjustments. Multiple `-v` flags can
+be chained to build complex date expressions.
+
+#### Adjustment Types
+
+| Code | Unit | Example |
+|------|------|---------|
+| `y` | Years | `-v +1y` (next year) |
+| `m` | Months | `-v -3m` (3 months ago) |
+| `w` | Weeks | `-v +2w` (2 weeks forward) |
+| `d` | Days | `-v +1d` (tomorrow) |
+| `H` | Hours | `-v -6H` (6 hours ago) |
+| `M` | Minutes | `-v +30M` (30 minutes forward) |
+| `S` | Seconds | `-v -10S` (10 seconds ago) |
+
+#### Named Values
+
+Month names and weekday names can be used with `=`:
+
+```sh
+date -v =monday # Next Monday
+date -v =january # Set month to January
+```
+
+#### Implementation
+
+```c
+struct trans {
+ int64_t value;
+ const char *name;
+};
+
+static const struct trans trans_mon[] = {
+ { 1, "january" }, { 2, "february" }, { 3, "march" },
+ { 4, "april" }, { 5, "may" }, { 6, "june" },
+ { 7, "july" }, { 8, "august" }, { 9, "september" },
+ { 10, "october" },{ 11, "november" }, { 12, "december" },
+ { -1, NULL }
+};
+
+static const struct trans trans_wday[] = {
+ { 0, "sunday" }, { 1, "monday" }, { 2, "tuesday" },
+ { 3, "wednesday" },{ 4, "thursday" },{ 5, "friday" },
+ { 6, "saturday" }, { -1, NULL }
+};
+```
+
+The `vary_apply()` function processes each adjustment in the chain,
+calling specific adjuster functions:
+
+```c
+static int adjyear(struct tm *tm, char type, int64_t value, bool normalize);
+static int adjmon(struct tm *tm, char type, int64_t value, bool is_text, bool normalize);
+static int adjday(struct tm *tm, char type, int64_t value, bool normalize);
+static int adjwday(struct tm *tm, char type, int64_t value, bool is_text, bool normalize);
+static int adjhour(struct tm *tm, char type, int64_t value, bool normalize);
+static int adjmin(struct tm *tm, char type, int64_t value, bool normalize);
+static int adjsec(struct tm *tm, char type, int64_t value, bool normalize);
+```
+
+Each adjuster modifies the broken-down `struct tm` and calls
+`normalize_tm()` to fix rolled-over fields via `mktime(3)`.
+
+### Timezone Handling
+
+```c
+static void
+set_timezone_or_die(const char *tz_value, const char *what)
+{
+ if (setenv("TZ", tz_value, 1) != 0)
+ die_errno("setenv %s", what);
+ tzset();
+}
+```
+
+The `-u` flag sets `TZ=UTC0`. The `-z` flag sets `TZ` to the specified
+value only for output formatting (input parsing uses the original timezone).
+
+### Legacy Time Format
+
+The BSD legacy format `[[[[cc]yy]mm]dd]HH]MM[.ss]` is parsed
+right-to-left:
+
+```c
+static void
+parse_legacy_time(const char *text, const struct timespec *base, struct timespec *ts)
+{
+ /* Parse from rightmost position:
+ * 1. [.ss] - optional seconds
+ * 2. MM - minutes (required)
+ * 3. HH - hours
+ * 4. dd - day
+ * 5. mm - month
+ * 6. [cc]yy - year
+ */
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `clock_gettime(2)` | Read current time with nanosecond precision |
+| `clock_settime(2)` | Set system clock (requires root) |
+| `stat(2)` | Get file modification time for `-r filename` |
+| `setenv(3)` | Set `TZ` environment variable |
+| `strftime(3)` | Format broken-down time |
+| `strptime(3)` | Parse time from formatted string |
+| `mktime(3)` | Normalize broken-down time |
+| `localtime_r(3)` | Thread-safe time conversion |
+
+## Format Strings
+
+`date` supports all `strftime(3)` format specifiers plus:
+
+| Specifier | Meaning |
+|-----------|---------|
+| `%N` | Nanoseconds (extension, expanded before strftime) |
+| `%+` | Default format (equivalent to `%a %b %e %T %Z %Y`) |
+
+Common `strftime` specifiers:
+
+| Specifier | Output |
+|-----------|--------|
+| `%Y` | 4-digit year (2026) |
+| `%m` | Month (01-12) |
+| `%d` | Day (01-31) |
+| `%H` | Hour (00-23) |
+| `%M` | Minute (00-59) |
+| `%S` | Second (00-60) |
+| `%T` | Time as `%H:%M:%S` |
+| `%Z` | Timezone abbreviation |
+| `%z` | Numeric timezone (`+0000`) |
+| `%s` | Epoch seconds |
+| `%a` | Abbreviated weekday |
+| `%b` | Abbreviated month |
+
+## Examples
+
+```sh
+# Default output
+date
+# → Sat Apr 5 14:30:00 UTC 2026
+
+# Custom format
+date "+%Y-%m-%d %H:%M:%S"
+# → 2026-04-05 14:30:00
+
+# ISO 8601
+date -Iseconds
+# → 2026-04-05T14:30:00+00:00
+
+# RFC 2822
+date -R
+# → Sat, 05 Apr 2026 14:30:00 +0000
+
+# UTC
+date -u
+
+# Epoch seconds
+date +%s
+# → 1775578200
+
+# Date arithmetic: tomorrow
+date -v +1d
+
+# Date arithmetic: last Monday
+date -v -monday
+
+# Date arithmetic: 3 months from now, at midnight
+date -v +3m -v 0H -v 0M -v 0S
+
+# Parse input format
+date -f "%Y%m%d" "20260405" "+%A, %B %d"
+# → Sunday, April 05
+
+# Display file modification time
+date -r /etc/passwd
+
+# Display epoch time
+date -r 1775578200
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Error (invalid format, failed to set time, etc.) |
diff --git a/docs/handbook/corebinutils/dd.md b/docs/handbook/corebinutils/dd.md
new file mode 100644
index 0000000000..df0108c231
--- /dev/null
+++ b/docs/handbook/corebinutils/dd.md
@@ -0,0 +1,407 @@
+# dd — Data Duplicator
+
+## Overview
+
+`dd` copies and optionally converts data between files or devices. It operates
+at the block level with configurable input/output block sizes, supports
+ASCII/EBCDIC conversion, case conversion, byte swapping, sparse output,
+speed throttling, and real-time progress reporting.
+
+**Source**: `dd/dd.c`, `dd/dd.h`, `dd/extern.h`, `dd/args.c`, `dd/conv.c`,
+`dd/conv_tab.c`, `dd/gen.c`, `dd/misc.c`, `dd/position.c`
+**Origin**: BSD 4.4, Keith Muller / Lance Visser
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+dd [operand=value ...]
+```
+
+## Operands
+
+`dd` uses a unique JCL-style syntax (not `getopt`):
+
+| Operand | Description | Default |
+|---------|-------------|---------|
+| `if=file` | Input file | stdin |
+| `of=file` | Output file | stdout |
+| `bs=n` | Block size (sets both ibs and obs) | 512 |
+| `ibs=n` | Input block size | 512 |
+| `obs=n` | Output block size | 512 |
+| `cbs=n` | Conversion block size | — |
+| `count=n` | Number of input blocks to copy | All |
+| `skip=n` / `iseek=n` | Skip n input blocks | 0 |
+| `seek=n` / `oseek=n` | Seek n output blocks | 0 |
+| `files=n` | Copy n input files (tape only) | 1 |
+| `fillchar=c` | Fill character for sync padding | NUL/space |
+| `speed=n` | Maximum bytes per second | Unlimited |
+| `status=value` | Progress reporting mode | — |
+
+### Size Suffixes
+
+Numeric values accept multiplier suffixes:
+
+| Suffix | Multiplier |
+|--------|-----------|
+| `b` | 512 |
+| `k` | 1024 |
+| `m` | 1024² (1,048,576) |
+| `g` | 1024³ (1,073,741,824) |
+| `t` | 1024⁴ |
+| `w` | `sizeof(int)` |
+| `x` | Multiplication (e.g., `2x512` = 1024) |
+
+### conv= Options
+
+| Conversion | Description |
+|------------|-------------|
+| `ascii` | EBCDIC to ASCII |
+| `ebcdic` | ASCII to EBCDIC |
+| `ibm` | ASCII to IBM EBCDIC |
+| `block` | Newline-terminated to fixed-length records |
+| `unblock` | Fixed-length records to newline-terminated |
+| `lcase` | Convert to lowercase |
+| `ucase` | Convert to uppercase |
+| `swab` | Swap every pair of bytes |
+| `noerror` | Continue after read errors |
+| `notrunc` | Don't truncate output file |
+| `sync` | Pad input blocks to ibs with NULs/spaces |
+| `sparse` | Seek over zero-filled output blocks |
+| `fsync` | Physically write output data and metadata |
+| `fdatasync` | Physically write output data |
+| `pareven` | Set even parity on output |
+| `parodd` | Set odd parity on output |
+| `parnone` | Strip parity from input |
+| `parset` | Set parity bit on output |
+
+### iflag= and oflag= Options
+
+| Flag | Description |
+|------|-------------|
+| `direct` | Use `O_DIRECT` for direct I/O |
+| `fullblock` | Accumulate full input blocks |
+
+### status= Values
+
+| Value | Description |
+|-------|-------------|
+| `noxfer` | Suppress transfer statistics |
+| `none` | Suppress everything |
+| `progress` | Print periodic progress via SIGALRM |
+
+## Source Architecture
+
+### File Responsibilities
+
+| File | Purpose |
+|------|---------|
+| `dd.c` | Main control flow: `main()`, `setup()`, `dd_in()`, `dd_close()` |
+| `dd.h` | Shared types: `IO`, `STAT`, conversion flags (40+ bit flags) |
+| `extern.h` | External function declarations and global variable exports |
+| `args.c` | JCL argument parser: `jcl()`, operand table, size parsing |
+| `conv.c` | Conversion functions: `def()`, `block()`, `unblock()` |
+| `conv_tab.c` | ASCII/EBCDIC translation tables (256-byte arrays) |
+| `gen.c` | Signal handling: `prepare_io()`, `before_io()`, `after_io()` |
+| `misc.c` | Summary output, progress reporting, timing |
+| `position.c` | Input/output positioning: `pos_in()`, `pos_out()` |
+
+### Key Data Structures
+
+#### IO Structure (I/O Stream State)
+
+```c
+typedef struct {
+ u_char *db; /* Buffer address */
+ u_char *dbp; /* Current buffer I/O pointer */
+ ssize_t dbcnt; /* Current buffer byte count */
+ ssize_t dbrcnt; /* Last read byte count */
+ ssize_t dbsz; /* Block size */
+ u_int flags; /* ISCHR | ISPIPE | ISTAPE | ISSEEK | NOREAD | ISTRUNC */
+ const char *name; /* Filename */
+ int fd; /* File descriptor */
+ off_t offset; /* Blocks to skip */
+ off_t seek_offset; /* Sparse output offset */
+} IO;
+```
+
+Device type flags:
+
+| Flag | Value | Meaning |
+|------|-------|---------|
+| `ISCHR` | 0x01 | Character device |
+| `ISPIPE` | 0x02 | Pipe or socket |
+| `ISTAPE` | 0x04 | Tape device |
+| `ISSEEK` | 0x08 | Seekable |
+| `NOREAD` | 0x10 | Write-only (output opened without read) |
+| `ISTRUNC` | 0x20 | Truncatable |
+
+#### STAT Structure (Statistics)
+
+```c
+typedef struct {
+ uintmax_t in_full; /* Full input blocks transferred */
+ uintmax_t in_part; /* Partial input blocks */
+ uintmax_t out_full; /* Full output blocks */
+ uintmax_t out_part; /* Partial output blocks */
+ uintmax_t trunc; /* Truncated records */
+ uintmax_t swab; /* Odd-length swab blocks */
+ uintmax_t bytes; /* Total bytes written */
+ struct timespec start; /* Start timestamp */
+} STAT;
+```
+
+#### Conversion Flags
+
+The `ddflags` global is a 64-bit bitmask with 37 defined flags:
+
+```c
+#define C_ASCII 0x0000000000000001ULL
+#define C_BLOCK 0x0000000000000002ULL
+#define C_BS 0x0000000000000004ULL
+/* ... 34 more flags ... */
+#define C_IDIRECT 0x0000000800000000ULL
+#define C_ODIRECT 0x0000001000000000ULL
+```
+
+### Argument Parsing (args.c)
+
+`dd` uses its own JCL-style parser instead of `getopt`:
+
+```c
+static const struct arg {
+ const char *name;
+ void (*f)(char *);
+ uint64_t set, noset;
+} args[] = {
+ { "bs", f_bs, C_BS, C_BS|C_IBS|C_OBS|C_OSYNC },
+ { "cbs", f_cbs, C_CBS, C_CBS },
+ { "conv", f_conv, 0, 0 },
+ { "count", f_count, C_COUNT, C_COUNT },
+ { "files", f_files, C_FILES, C_FILES },
+ { "fillchar", f_fillchar, C_FILL, C_FILL },
+ { "ibs", f_ibs, C_IBS, C_BS|C_IBS },
+ { "if", f_if, C_IF, C_IF },
+ { "iflag", f_iflag, 0, 0 },
+ { "obs", f_obs, C_OBS, C_BS|C_OBS },
+ { "of", f_of, C_OF, C_OF },
+ { "oflag", f_oflag, 0, 0 },
+ { "seek", f_seek, C_SEEK, C_SEEK },
+ { "skip", f_skip, C_SKIP, C_SKIP },
+ { "speed", f_speed, 0, 0 },
+ { "status", f_status, C_STATUS,C_STATUS },
+};
+```
+
+Arguments are looked up via `bsearch()` in the sorted table.
+
+The `noset` field prevents conflicting options: e.g., `bs=` sets
+`C_BS|C_IBS|C_OBS|C_OSYNC` and forbids re-specifying any of those.
+
+### Conversion Functions (conv.c)
+
+Three conversion modes:
+
+#### `def()` — Default (No Conversion)
+
+```c
+void def(void)
+{
+ if ((t = ctab) != NULL)
+ for (inp = in.dbp - (cnt = in.dbrcnt); cnt--; ++inp)
+ *inp = t[*inp];
+
+ out.dbp = in.dbp;
+ out.dbcnt = in.dbcnt;
+
+ if (in.dbcnt >= out.dbsz)
+ dd_out(0);
+}
+```
+
+Simple buffer pass-through with optional character table translation.
+
+#### `block()` — Variable → Fixed Length
+
+Converts newline-terminated records to fixed-length records padded with
+spaces to `cbs` bytes. Used for ASCII-to-EBCDIC record conversion.
+
+#### `unblock()` — Fixed → Variable Length
+
+Converts fixed-length records back to newline-terminated format by
+stripping trailing spaces and appending newlines.
+
+### Signal Handling (gen.c)
+
+`dd` handles several signals:
+
+| Signal | Handler | Purpose |
+|--------|---------|---------|
+| SIGINFO/SIGUSR1 | `siginfo_handler()` | Print transfer summary |
+| SIGALRM | `sigalarm_handler()` | Periodic progress (with `status=progress`) |
+| SIGINT/SIGTERM | Default + atexit | Print summary before exit |
+
+The `prepare_io()`, `before_io()`, `after_io()` functions manage signal
+masking during I/O operations to prevent interruption during critical
+sections.
+
+```c
+volatile sig_atomic_t need_summary; /* Set by SIGINFO */
+volatile sig_atomic_t need_progress; /* Set by SIGALRM */
+volatile sig_atomic_t kill_signal; /* Set by termination signals */
+```
+
+### Progress Reporting (misc.c)
+
+```c
+void summary(void)
+{
+ /* Print: "X+Y records in\nA+B records out\nN bytes transferred in T secs" */
+ double elapsed = secs_elapsed();
+ /* Print human-readable transfer rate */
+}
+```
+
+The `format_scaled()` helper renders byte counts in human-readable form
+(kB, MB, GB) using configurable base (1000 or 1024).
+
+### Buffer Allocation
+
+Direct I/O requires page-aligned buffers:
+
+```c
+static void *
+alloc_io_buffer(size_t size)
+{
+ if ((ddflags & (C_IDIRECT | C_ODIRECT)) == 0)
+ return malloc(size);
+
+ size_t alignment = sysconf(_SC_PAGESIZE);
+ if (alignment == 0 || alignment == (size_t)-1)
+ alignment = 4096;
+ void *buf;
+ posix_memalign(&buf, alignment, size);
+ return buf;
+}
+```
+
+### Sparse Output
+
+With `conv=sparse`, `dd` uses `lseek(2)` to skip over blocks of zeros
+instead of writing them:
+
+```c
+#define BISZERO(p, s) ((s) > 0 && *((const char *)p) == 0 && \
+ !memcmp((const void *)(p), (const void *)((const char *)p + 1), (s) - 1))
+```
+
+### Setup and I/O
+
+The `setup()` function in `dd.c` handles:
+
+1. Opening input/output files with appropriate flags
+2. Detecting device types (`getfdtype()`)
+3. Allocating I/O buffers
+4. Setting up character conversion tables (parity, case)
+5. Positioning input/output streams
+6. Truncating output if needed
+
+```c
+static void setup(void)
+{
+ /* Open input */
+ if (in.name == NULL) {
+ in.name = "stdin";
+ in.fd = STDIN_FILENO;
+ } else {
+ iflags = (ddflags & C_IDIRECT) ? O_DIRECT : 0;
+ in.fd = open(in.name, O_RDONLY | iflags, 0);
+ }
+
+ /* Open output */
+ oflags = O_CREAT;
+ if (!(ddflags & (C_SEEK | C_NOTRUNC)))
+ oflags |= O_TRUNC;
+ if (ddflags & C_OFSYNC)
+ oflags |= O_SYNC;
+ if (ddflags & C_ODIRECT)
+ oflags |= O_DIRECT;
+ out.fd = open(out.name, O_RDWR | oflags, DEFFILEMODE);
+
+ /* Allocate buffers */
+ if (!(ddflags & (C_BLOCK | C_UNBLOCK))) {
+ in.db = alloc_io_buffer(out.dbsz + in.dbsz - 1);
+ out.db = in.db; /* Single shared buffer */
+ } else {
+ in.db = alloc_io_buffer(MAX(in.dbsz, cbsz) + cbsz);
+ out.db = alloc_io_buffer(out.dbsz + cbsz);
+ }
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `open(2)` | Open input/output files |
+| `read(2)` | Read input blocks |
+| `write(2)` | Write output blocks |
+| `lseek(2)` | Position streams, sparse output |
+| `ftruncate(2)` | Truncate output file |
+| `close(2)` | Close file descriptors |
+| `ioctl(2)` | Tape device queries (`MTIOCGET`) |
+| `sigaction(2)` | Install signal handlers |
+| `setitimer(2)` | Periodic SIGALRM for progress |
+| `clock_gettime(2)` | Elapsed time calculation |
+| `posix_memalign(3)` | Page-aligned buffers for direct I/O |
+| `sysconf(3)` | Get page size |
+
+## Examples
+
+```sh
+# Copy a disk image
+dd if=/dev/sda of=disk.img bs=4M status=progress
+
+# Write an image to a device
+dd if=image.iso of=/dev/sdb bs=4M conv=fsync
+
+# Create a 1GB sparse file
+dd if=/dev/zero of=sparse.img bs=1 count=0 seek=1G
+
+# Convert ASCII to uppercase
+dd if=input.txt of=output.txt conv=ucase
+
+# Copy with direct I/O
+dd if=data.bin of=data2.bin bs=4k iflag=direct oflag=direct
+
+# Network transfer (with throttling)
+dd if=large_file.tar bs=1M speed=10M | ssh remote 'dd of=large_file.tar'
+
+# Skip first 100 blocks of input
+dd if=tape.raw of=data.bin skip=100 bs=512
+
+# Show progress with SIGUSR1
+dd if=/dev/sda of=backup.img bs=1M &
+kill -USR1 $!
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Error (open, read, write, or conversion failure) |
+
+## Summary Output Format
+
+```
+X+Y records in
+A+B records out
+N bytes (H) transferred in T.TTT secs (R/s)
+```
+
+Where:
+- `X` = full input blocks, `Y` = partial input blocks
+- `A` = full output blocks, `B` = partial output blocks
+- `N` = total bytes, `H` = human-readable size
+- `T` = elapsed seconds, `R` = transfer rate
diff --git a/docs/handbook/corebinutils/df.md b/docs/handbook/corebinutils/df.md
new file mode 100644
index 0000000000..c7b364f3e0
--- /dev/null
+++ b/docs/handbook/corebinutils/df.md
@@ -0,0 +1,264 @@
+# df — Display Filesystem Space Usage
+
+## Overview
+
+`df` reports total, used, and available disk space for mounted filesystems.
+It supports multiple output formats (POSIX, human-readable, SI), filesystem
+type filtering, inode display, and Linux-native mount information parsing.
+
+**Source**: `df/df.c` (single file, 100+ functions)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+df [-abcgHhiklmPTtx] [-t type] [file ...]
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-a` | Show all filesystems including zero-size ones |
+| `-b` | Display sizes in 512-byte blocks |
+| `-c` | Print total line at end |
+| `-g` | Display sizes in gigabytes |
+| `-H` | Human-readable with SI units (1000-based) |
+| `-h` | Human-readable with binary units (1024-based) |
+| `-i` | Show inode information instead of block usage |
+| `-k` | Display sizes in kilobytes |
+| `-l` | Show only local (non-remote) filesystems |
+| `-m` | Display sizes in megabytes |
+| `-P` | POSIX output format (one line per filesystem) |
+| `-T` | Show filesystem type column |
+| `-t type` | Filter to specified filesystem type |
+| `-x` | Exclude specified filesystem type |
+| `,` | Use thousands separator in output |
+
+## Source Analysis
+
+### Key Data Structures
+
+```c
+struct options {
+ bool show_all;
+ bool show_inodes;
+ bool show_type;
+ bool posix_format;
+ bool human_readable;
+ bool si_units;
+ bool local_only;
+ bool show_total;
+ bool thousands_separator;
+ /* block size settings from -b/-k/-m/-g or BLOCKSIZE env */
+};
+
+struct mount_entry {
+ char *source; /* Device path (/dev/sda1) */
+ char *target; /* Mount point (/home) */
+ char *fstype; /* Filesystem type (ext4, tmpfs) */
+ char *options; /* Mount options (rw,noatime) */
+ dev_t device; /* Device number */
+};
+
+struct mount_table {
+ struct mount_entry *entries;
+ size_t count;
+ size_t capacity;
+};
+
+struct row {
+ char *filesystem; /* Formatted filesystem column */
+ char *type; /* Filesystem type */
+ char *size; /* Total size */
+ char *used; /* Used space */
+ char *avail; /* Available space */
+ char *capacity; /* Percentage used */
+ char *mount_point; /* Mount point path */
+ char *iused; /* Inodes used */
+ char *ifree; /* Inodes free */
+};
+
+struct column_widths {
+ int filesystem;
+ int type;
+ int size;
+ int used;
+ int avail;
+ int capacity;
+ int mount_point;
+};
+```
+
+### Linux-Native Mount Parsing
+
+Unlike BSD which uses `getmntinfo(3)` / `statfs(2)`, this port reads
+`/proc/self/mountinfo` directly:
+
+```c
+static int
+parse_mountinfo(struct mount_table *table)
+{
+ FILE *fp = fopen("/proc/self/mountinfo", "r");
+ /* Parse each line:
+ * ID PARENT_ID MAJOR:MINOR ROOT MOUNT_POINT OPTIONS ... - FSTYPE SOURCE SUPER_OPTIONS
+ */
+ while (getline(&line, &linesz, fp) != -1) {
+ /* Extract fields, unescape special characters */
+ entry.source = unescape_mountinfo(source_str);
+ entry.target = unescape_mountinfo(target_str);
+ entry.fstype = strdup(fstype_str);
+ }
+}
+```
+
+#### Escape Handling
+
+Mount paths in `/proc/self/mountinfo` use octal escapes for special
+characters (spaces, newlines, backslashes):
+
+```c
+static char *
+unescape_mountinfo(const char *text)
+{
+ /* Convert \040 → space, \011 → tab, \012 → newline, \134 → backslash */
+}
+```
+
+### Filesystem Stats
+
+`df` uses `statvfs(2)` instead of BSD's `statfs(2)`:
+
+```c
+struct statvfs sv;
+if (statvfs(mount_point, &sv) != 0)
+ return -1;
+
+total_blocks = sv.f_blocks;
+free_blocks = sv.f_bfree;
+avail_blocks = sv.f_bavail; /* Available to unprivileged users */
+block_size = sv.f_frsize;
+
+total_inodes = sv.f_files;
+free_inodes = sv.f_ffree;
+```
+
+### Remote Filesystem Detection
+
+The `-l` (local only) flag requires distinguishing local from remote
+filesystems:
+
+```c
+static bool
+is_remote_filesystem(const struct mount_entry *entry)
+{
+ /* Check filesystem type */
+ if (strcmp(entry->fstype, "nfs") == 0 ||
+ strcmp(entry->fstype, "nfs4") == 0 ||
+ strcmp(entry->fstype, "cifs") == 0 ||
+ strcmp(entry->fstype, "smbfs") == 0 ||
+ strcmp(entry->fstype, "fuse.sshfs") == 0)
+ return true;
+
+ /* Check source for remote indicators (host:path or //host/share) */
+ if (strchr(entry->source, ':') != NULL)
+ return true;
+ if (entry->source[0] == '/' && entry->source[1] == '/')
+ return true;
+
+ return false;
+}
+```
+
+### Human-Readable Formatting
+
+```c
+static char *
+format_human_readable(uint64_t bytes, bool si)
+{
+ unsigned int base = si ? 1000 : 1024;
+ const char *const *units = si ? si_units : binary_units;
+ /* Scale and format: "1.5G", "234M", "45K" */
+}
+```
+
+### BLOCKSIZE Environment
+
+The `BLOCKSIZE` environment variable can override the default block size:
+
+```c
+char *bs = getenv("BLOCKSIZE");
+if (bs != NULL) {
+ /* Parse: "512", "K", "M", "G", or "1k", "4k", etc. */
+}
+```
+
+### Safe Integer Arithmetic
+
+`df` performs arithmetic with overflow protection:
+
+```c
+/* Safe multiplication with clamping */
+static uint64_t
+safe_mul(uint64_t a, uint64_t b)
+{
+ if (a != 0 && b > UINT64_MAX / a)
+ return UINT64_MAX; /* Clamp instead of overflow */
+ return a * b;
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `statvfs(2)` | Query filesystem statistics |
+| `stat(2)` | Identify filesystem for file arguments |
+| `open(2)` / `read(2)` | Parse `/proc/self/mountinfo` |
+
+## Examples
+
+```sh
+# Default output
+df
+
+# Human-readable sizes
+df -h
+
+# Show filesystem type
+df -hT
+
+# Only local filesystems
+df -hl
+
+# POSIX format
+df -P
+
+# Inode usage
+df -i
+
+# Specific filesystem
+df /home
+
+# Total line
+df -hc
+
+# Specific filesystem type
+df -t ext4
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Error accessing filesystem |
+
+## Differences from GNU df
+
+- Uses `/proc/self/mountinfo` directly (no libmount)
+- No `--output` for custom column selection
+- `-c` for total line (GNU uses `--total`)
+- BLOCKSIZE env var compatibility (BSD convention)
+- No `--sync` / `--no-sync` options
diff --git a/docs/handbook/corebinutils/echo.md b/docs/handbook/corebinutils/echo.md
new file mode 100644
index 0000000000..da7df16cec
--- /dev/null
+++ b/docs/handbook/corebinutils/echo.md
@@ -0,0 +1,158 @@
+# echo — Write Arguments to Standard Output
+
+## Overview
+
+`echo` writes its arguments to standard output, separated by spaces, followed
+by a newline. It is intentionally minimal — the FreeBSD/BSD implementation
+does not support GNU-style `-e` escape processing.
+
+**Source**: `echo/echo.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+echo [-n] [string ...]
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-n` | Suppress trailing newline |
+
+Only a leading `-n` is recognized as an option. Any other arguments
+(including `--`) are treated as literal strings and printed.
+
+## Source Analysis
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse arguments and write output |
+| `write_all()` | Retry-safe `write(2)` loop handling `EINTR` |
+| `warn_errno()` | Error reporting to stderr |
+| `trim_trailing_backslash_c()` | Check if final argument ends with `\c` |
+
+### Option Processing
+
+`echo` does NOT use `getopt(3)`. It manually checks for `-n`:
+
+```c
+int main(int argc, char *argv[])
+{
+ bool suppress_newline = false;
+
+ argv++; /* Skip program name */
+
+ /* Only leading -n flags are consumed */
+ while (*argv && strcmp(*argv, "-n") == 0) {
+ suppress_newline = true;
+ argv++;
+ }
+
+ /* Everything else is literal output */
+}
+```
+
+### The `\c` Convention
+
+If the **last** argument ends with `\c`, the trailing newline is suppressed
+and the `\c` itself is not printed:
+
+```c
+static bool
+trim_trailing_backslash_c(const char *arg, size_t *len)
+{
+ if (*len >= 2 && arg[*len - 2] == '\\' && arg[*len - 1] == 'c') {
+ *len -= 2;
+ return true; /* Suppress newline */
+ }
+ return false;
+}
+```
+
+### I/O Strategy
+
+Instead of `printf` or `writev`, echo uses a `write(2)` loop:
+
+```c
+static int
+write_all(int fd, const void *buf, size_t count)
+{
+ const char *p = buf;
+ ssize_t n;
+
+ while (count > 0) {
+ n = write(fd, p, count);
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ return -1;
+ }
+ p += n;
+ count -= n;
+ }
+ return 0;
+}
+```
+
+This avoids `IOV_MAX` limitations that would apply with `writev(2)` when
+there are many arguments.
+
+### Key Behaviors
+
+| Input | Output | Notes |
+|-------|--------|-------|
+| `echo hello` | `hello\n` | Basic usage |
+| `echo -n hello` | `hello` | No trailing newline |
+| `echo -n -n hello` | `hello` | Multiple `-n` consumed |
+| `echo -- hello` | `-- hello\n` | `--` is NOT end-of-options |
+| `echo -e hello` | `-e hello\n` | `-e` is NOT recognized |
+| `echo "hello\c"` | `hello` | `\c` suppresses newline |
+| `echo ""` | `\n` | Empty string → just newline |
+
+## Portability Notes
+
+- **BSD echo** (this implementation): Only `-n` and trailing `\c`
+- **GNU echo**: Supports `-e` for escape sequences (`\n`, `\t`, etc.)
+ and `-E` to disable them
+- **POSIX echo**: Behavior of `-n` and backslash sequences is
+ implementation-defined
+- **Shell built-in**: Most shells have a built-in `echo` that may differ
+ from the external command
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `write(2)` | All output to stdout |
+
+## Examples
+
+```sh
+# Simple output
+echo Hello, World!
+
+# No trailing newline
+echo -n "prompt> "
+
+# Literal dash-n (only leading -n is recognized)
+echo "The flag is -n"
+
+# Multiple arguments
+echo one two three
+# → "one two three"
+
+# Suppress newline with \c
+echo "no newline\c"
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Write error |
diff --git a/docs/handbook/corebinutils/ed.md b/docs/handbook/corebinutils/ed.md
new file mode 100644
index 0000000000..3e51e576b6
--- /dev/null
+++ b/docs/handbook/corebinutils/ed.md
@@ -0,0 +1,306 @@
+# ed — Line Editor
+
+## Overview
+
+`ed` is the standard POSIX line editor. It operates on a text buffer that
+resides in a temporary scratch file, supports regular expression search and
+substitution, global commands, undo, and file I/O. This implementation derives
+from Andrew Moore's BSD `ed` and the algorithm described in Kernighan and
+Plauger's *Software Tools in Pascal*.
+
+**Source**: `ed/main.c`, `ed/ed.h`, `ed/compat.c`, `ed/compat.h`, `ed/buf.c`,
+`ed/glbl.c`, `ed/io.c`, `ed/re.c`, `ed/sub.c`, `ed/undo.c`
+**Origin**: BSD 4.4, Andrew Moore (Talke Studio)
+**License**: BSD-2-Clause
+
+## Synopsis
+
+```
+ed [-] [-sx] [-p string] [file]
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-` | Suppress diagnostics (same as `-s`) |
+| `-s` | Script mode: suppress byte counts and `!` prompts |
+| `-x` | Encryption mode (**not supported on Linux**) |
+| `-p string` | Set the command prompt (default: no prompt) |
+
+## Source Architecture
+
+### File Responsibilities
+
+| File | Purpose | Key Functions |
+|------|---------|---------------|
+| `main.c` | Main loop, command dispatch, signals | `main()`, `exec_command()`, signal handlers |
+| `ed.h` | Types, constants, function prototypes | `line_t`, `undo_t`, error codes |
+| `compat.c/h` | Linux portability shims | `strlcpy`, `strlcat` replacements |
+| `buf.c` | Scratch file buffer management | `get_sbuf_line()`, `put_sbuf_line()` |
+| `glbl.c` | Global command (g/re/cmd) | `exec_global()`, mark management |
+| `io.c` | File I/O (read/write) | `read_file()`, `write_file()`, `read_stream()` |
+| `re.c` | Regular expression handling | `get_compiled_pattern()`, `search_*()` |
+| `sub.c` | Substitution command | `substitute()`, replacement parsing |
+| `undo.c` | Undo stack management | `push_undo_stack()`, `undo_last()` |
+
+### Key Data Structures
+
+#### Line Node (linked list element)
+
+```c
+typedef struct line {
+ struct line *q_forw; /* Next line in buffer */
+ struct line *q_back; /* Previous line in buffer */
+ off_t seek; /* Byte offset in scratch file */
+ int len; /* Line length */
+} line_t;
+```
+
+The edit buffer is a doubly-linked circular list of `line_t` nodes. Line
+content is stored in an external scratch file, not in memory.
+
+#### Undo Record
+
+```c
+typedef struct undo {
+ int type; /* UADD, UDEL, UMOV, VMOV */
+ line_t *h; /* Head of affected line range */
+ line_t *t; /* Tail of affected line range */
+ /* ... */
+} undo_t;
+```
+
+#### Constants
+
+```c
+#define ERR (-2) /* General error */
+#define EMOD (-3) /* Buffer modified warning */
+#define FATAL (-4) /* Fatal error (abort) */
+
+#define MINBUFSZ 512
+#define SE_MAX 30 /* Max regex subexpressions */
+#define LINECHARS INT_MAX
+```
+
+#### Global Flags
+
+```c
+#define GLB 001 /* Global command active */
+#define GPR 002 /* Print after command */
+#define GLS 004 /* List after command */
+#define GNP 010 /* Enumerate after command */
+#define GSG 020 /* Global substitute */
+```
+
+### Main Loop
+
+```c
+int main(volatile int argc, char **volatile argv)
+{
+ setlocale(LC_ALL, "");
+
+ /* Detect if invoked as "red" (restricted ed) */
+ red = (n = strlen(argv[0])) > 2 && argv[0][n - 3] == 'r';
+
+ /* Parse options */
+ while ((c = getopt(argc, argv, "p:sx")) != -1) { ... }
+
+ /* Signal setup */
+ signal(SIGHUP, signal_hup); /* Emergency save */
+ signal(SIGQUIT, SIG_IGN); /* Ignore quit */
+ signal(SIGINT, signal_int); /* Interrupt handling */
+ signal(SIGWINCH, handle_winch); /* Terminal resize */
+
+ /* Initialize buffers, load file if specified */
+ init_buffers();
+ if (argc && is_legal_filename(*argv))
+ read_file(*argv, 0);
+
+ /* Command loop */
+ for (;;) {
+ if (prompt) fputs(prompt, stdout);
+ status = get_tty_line();
+ if (status == EOF) break;
+ status = exec_command();
+ }
+}
+```
+
+### Buffer Management (buf.c)
+
+The scratch file strategy avoids unlimited memory consumption:
+
+- Lines are stored in a temporary file created with `mkstemp(3)`
+- `put_sbuf_line()` appends a line to the scratch file and returns its offset
+- `get_sbuf_line()` reads a line back from the scratch file by offset
+- The `line_t` linked list tracks offsets and lengths, not actual text
+
+```c
+/* Append line to scratch file, return its node */
+line_t *put_sbuf_line(const char *text);
+
+/* Read line from scratch file via offset */
+char *get_sbuf_line(const line_t *lp);
+```
+
+### File I/O (io.c)
+
+```c
+long read_file(char *fn, long n)
+{
+ /* Open file or pipe (if fn starts with '!') */
+ fp = (*fn == '!') ? popen(fn + 1, "r") : fopen(strip_escapes(fn), "r");
+
+ /* Read lines into buffer after line n */
+ size = read_stream(fp, n);
+
+ /* Print byte count unless in script mode */
+ if (!scripted)
+ fprintf(stdout, "%lu\n", size);
+}
+```
+
+The `read_stream()` function reads from `fp`, appending each line to the
+edit buffer via `put_sbuf_line()`, and maintaining the undo stack for
+rollback.
+
+### Regular Expressions (re.c)
+
+Uses POSIX `regex.h` (via `regcomp(3)` / `regexec(3)`):
+
+```c
+typedef regex_t pattern_t;
+
+pattern_t *get_compiled_pattern(void);
+/* Compiles the current regex pattern, caching the last used pattern */
+```
+
+### Substitution (sub.c)
+
+The `s/pattern/replacement/flags` command:
+- Supports `\1` through `\9` backreferences
+- `g` flag for global replacement
+- Count for nth occurrence replacement
+- `&` in replacement refers to the matched text
+
+### Undo (undo.c)
+
+Every buffer modification pushes an undo record:
+
+```c
+undo_t *push_undo_stack(int type, long from, long to);
+int undo_last(void); /* Reverse last modification */
+```
+
+Undo types: `UADD` (lines added), `UDEL` (lines deleted), `UMOV` (lines moved).
+
+### Signal Handling
+
+| Signal | Handler | Action |
+|--------|---------|--------|
+| `SIGHUP` | `signal_hup()` | Save buffer to `ed.hup` and exit |
+| `SIGINT` | `signal_int()` | Set interrupt flag, longjmp to command prompt |
+| `SIGWINCH` | `handle_winch()` | Update terminal width for `l` command |
+| `SIGQUIT` | `SIG_IGN` | Ignored |
+
+### Restricted Mode (red)
+
+When invoked as `red`, the editor restricts:
+- Shell commands (`!command`) are forbidden
+- Filenames with `/` or starting with `!` are rejected
+- Directory changes are prevented
+
+## Commands Reference
+
+| Command | Description |
+|---------|-------------|
+| `(.)a` | Append text after line |
+| `(.)i` | Insert text before line |
+| `(.,.)c` | Change (replace) lines |
+| `(.,.)d` | Delete lines |
+| `(.,.)p` | Print lines |
+| `(.,.)l` | List lines (show non-printable characters) |
+| `(.,.)n` | Number and print lines |
+| `(.,.)m(.)` | Move lines |
+| `(.,.)t(.)` | Copy (transfer) lines |
+| `(.,.)s/re/replacement/flags` | Substitute |
+| `(.,.)g/re/command` | Global: apply command to matching lines |
+| `(.,.)v/re/command` | Inverse global: apply to non-matching lines |
+| `(.,.)w file` | Write lines to file |
+| `(.,.)W file` | Append lines to file |
+| `e file` | Edit file (replaces buffer) |
+| `E file` | Edit unconditionally |
+| `f file` | Set default filename |
+| `r file` | Read file into buffer |
+| `(.)r !command` | Read command output into buffer |
+| `u` | Undo last command |
+| `(.)=` | Print line number |
+| `(.,.)j` | Join lines |
+| `(.)k(c)` | Mark line with character c |
+| `q` | Quit (warns if modified) |
+| `Q` | Quit unconditionally |
+| `H` | Toggle verbose error messages |
+| `h` | Print last error message |
+| `!command` | Execute shell command |
+
+### Addressing
+
+| Address | Meaning |
+|---------|---------|
+| `.` | Current line |
+| `$` | Last line |
+| `n` | Line number n |
+| `-n` / `+n` | Relative to current line |
+| `/re/` | Next line matching regex |
+| `?re?` | Previous line matching regex |
+| `'c` | Line marked with character c |
+| `,` | Equivalent to `1,$` |
+| `;` | Equivalent to `.,$` |
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `mkstemp(3)` | Create scratch file |
+| `read(2)` / `write(2)` | Scratch file I/O |
+| `lseek(2)` | Position in scratch file |
+| `fopen(3)` / `fclose(3)` | Read/write user files |
+| `popen(3)` / `pclose(3)` | Shell command execution |
+| `regcomp(3)` / `regexec(3)` | Regular expression matching |
+| `sigsetjmp(3)` / `siglongjmp(3)` | Interrupt recovery |
+
+## Examples
+
+```sh
+# Edit a file
+ed myfile.txt
+
+# Script mode (for automation)
+printf '1,3p\nq\n' | ed -s myfile.txt
+
+# Global substitution
+printf 'g/old/s//new/g\nw\nq\n' | ed -s myfile.txt
+
+# With prompt
+ed -p '> ' myfile.txt
+
+# Read from pipe
+echo '!ls -la' | ed
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Error |
+| 2 | Usage error |
+
+## Linux-Specific Notes
+
+- The `-x` (encryption) option prints an error and exits, as Linux
+ does not provide the BSD `des_setkey(3)` functions.
+- `strlcpy(3)` / `strlcat(3)` are provided by `compat.c` when not
+ available in the system libc.
+- The `SIGWINCH` handler uses `ioctl(TIOCGWINSZ)` for terminal size.
diff --git a/docs/handbook/corebinutils/error-handling.md b/docs/handbook/corebinutils/error-handling.md
new file mode 100644
index 0000000000..f82eec0b1c
--- /dev/null
+++ b/docs/handbook/corebinutils/error-handling.md
@@ -0,0 +1,315 @@
+# Error Handling — Corebinutils Patterns
+
+## Overview
+
+Corebinutils uses a layered error handling strategy: BSD `err(3)` functions
+as the primary interface, custom `error_errno()`/`error_msg()` wrappers in
+utilities that need more control, and consistent exit codes following
+POSIX conventions.
+
+## err(3) Family
+
+The BSD `<err.h>` functions are used throughout:
+
+```c
+#include <err.h>
+
+/* Fatal errors (print message + errno + exit) */
+err(1, "open '%s'", filename);
+/* → "utility: open 'file.txt': No such file or directory\n" */
+
+/* Fatal errors (print message + exit, no errno) */
+errx(2, "invalid option: -%c", ch);
+/* → "utility: invalid option: -z\n" */
+
+/* Non-fatal warnings (print message + errno, continue) */
+warn("stat '%s'", filename);
+/* → "utility: stat 'file.txt': Permission denied\n" */
+
+/* Non-fatal warnings (print message, no errno, continue) */
+warnx("skipping '%s': not a regular file", filename);
+/* → "utility: skipping 'foo': not a regular file\n" */
+```
+
+### When to Use Each
+
+| Function | Fatal? | Shows errno? | Use Case |
+|----------|--------|-------------|----------|
+| `err()` | Yes | Yes | Syscall failure, must exit |
+| `errx()` | Yes | No | Bad input, usage error |
+| `warn()` | No | Yes | Syscall failure, can continue |
+| `warnx()` | No | No | Validation issue, can continue |
+
+## Custom Error Functions
+
+Several utilities define their own error reporting for program name
+control or additional formatting:
+
+### Pattern: error_errno / error_msg
+
+```c
+static const char *progname;
+
+static void
+error_errno(const char *fmt, ...)
+{
+ int saved_errno = errno;
+ va_list ap;
+
+ fprintf(stderr, "%s: ", progname);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fprintf(stderr, ": %s\n", strerror(saved_errno));
+}
+
+static void
+error_msg(const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "%s: ", progname);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+}
+```
+
+Used by: `mkdir`, `chmod`, `hostname`, `domainname`, `nproc`
+
+### Pattern: die / die_errno
+
+```c
+static void __dead2
+die(const char *fmt, ...)
+{
+ va_list ap;
+ fprintf(stderr, "%s: ", progname);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+ exit(1);
+}
+
+static void __dead2
+die_errno(const char *fmt, ...)
+{
+ int saved = errno;
+ va_list ap;
+ fprintf(stderr, "%s: ", progname);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fprintf(stderr, ": %s\n", strerror(saved));
+ exit(1);
+}
+```
+
+Used by: `sleep`, `echo`
+
+### Pattern: verror_message (centralized)
+
+```c
+static void
+verror_message(const char *fmt, va_list ap, bool with_errno)
+{
+ int saved = errno;
+ fprintf(stderr, "%s: ", progname);
+ vfprintf(stderr, fmt, ap);
+ if (with_errno)
+ fprintf(stderr, ": %s", strerror(saved));
+ fputc('\n', stderr);
+}
+```
+
+## Exit Code Conventions
+
+### Standard Codes
+
+| Code | Meaning | Used By |
+|------|---------|---------|
+| 0 | Success | All utilities |
+| 1 | General error | Most utilities |
+| 2 | Usage/syntax error | test, expr, timeout, mv |
+
+### Utility-Specific Codes
+
+| Utility | Code | Meaning |
+|---------|------|---------|
+| `test` | 0 | Expression is true |
+| `test` | 1 | Expression is false |
+| `test` | 2 | Invalid expression |
+| `expr` | 0 | Non-null, non-zero result |
+| `expr` | 1 | Null or zero result |
+| `expr` | 2 | Invalid expression |
+| `expr` | 3 | Internal error |
+| `timeout` | 124 | Command timed out |
+| `timeout` | 125 | `timeout` itself failed |
+| `timeout` | 126 | Command not executable |
+| `timeout` | 127 | Command not found |
+
+### Exit on First Error vs. Accumulate
+
+Two patterns are observed:
+
+```c
+/* Pattern 1: Exit immediately on error */
+if (stat(path, &sb) < 0)
+ err(1, "stat");
+
+/* Pattern 2: Accumulate errors, exit with status */
+int errors = 0;
+for (i = 0; i < argc; i++) {
+ if (process(argv[i]) < 0) {
+ warn("failed: %s", argv[i]);
+ errors = 1;
+ }
+}
+return errors;
+```
+
+Pattern 2 is used by multi-argument utilities (rm, chmod, cp, ln)
+to process as many arguments as possible even when some fail.
+
+## errno Preservation
+
+All error functions save `errno` before calling any function that
+might modify it (like `fprintf`):
+
+```c
+static void
+error_errno(const char *fmt, ...)
+{
+ int saved = errno; /* Save before fprintf */
+ /* ... */
+ fprintf(stderr, ": %s\n", strerror(saved));
+}
+```
+
+## Signal Error Recovery
+
+### sigsetjmp/siglongjmp (ed)
+
+```c
+static sigjmp_buf jmpbuf;
+
+static void
+signal_handler(int sig)
+{
+ (void)sig;
+ siglongjmp(jmpbuf, 1);
+}
+
+/* In main loop */
+if (sigsetjmp(jmpbuf, 1) != 0) {
+ /* Returned from signal — reset state */
+ fputs("?\n", stderr);
+}
+```
+
+### Flag-Based (sleep, dd)
+
+```c
+static volatile sig_atomic_t got_signal;
+
+static void
+handler(int sig)
+{
+ got_signal = sig;
+}
+
+/* In main loop */
+if (got_signal) {
+ cleanup();
+ exit(128 + got_signal);
+}
+```
+
+## Validation Patterns
+
+### At System Boundaries
+
+```c
+/* Validate user input */
+if (argc < 2) {
+ usage();
+ /* NOTREACHED */
+}
+
+/* Validate parsed values */
+if (val < 0 || val > MAX_VALUE)
+ errx(2, "value out of range: %ld", val);
+
+/* Validate system call results */
+if (open(path, O_RDONLY) < 0)
+ err(1, "open");
+```
+
+### String-to-Number Conversion
+
+```c
+static long
+parse_number(const char *str)
+{
+ char *end;
+ errno = 0;
+ long val = strtol(str, &end, 10);
+
+ if (end == str || *end != '\0')
+ errx(2, "not a number: %s", str);
+ if (errno == ERANGE)
+ errx(2, "number out of range: %s", str);
+
+ return val;
+}
+```
+
+## Write Error Detection
+
+### Pattern: Check stdout at exit
+
+```c
+/* Catch write errors (e.g., broken pipe) */
+if (fclose(stdout) == EOF)
+ err(1, "stdout");
+
+/* Or equivalently */
+if (fflush(stdout) == EOF)
+ err(1, "write error");
+```
+
+### Pattern: write_all loop
+
+```c
+static int
+write_all(int fd, const void *buf, size_t count)
+{
+ const char *p = buf;
+ while (count > 0) {
+ ssize_t n = write(fd, p, count);
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ return -1;
+ }
+ p += n;
+ count -= n;
+ }
+ return 0;
+}
+```
+
+Used by: `echo`, `cat`, `dd`
+
+## Summary of Conventions
+
+1. Use `err(3)` family when available and sufficient
+2. Define custom wrappers only when program name control is needed
+3. Save `errno` immediately — before any library calls
+4. Exit 0 for success, 1 for errors, 2 for usage
+5. Multi-argument commands accumulate errors
+6. Validate at system boundaries (input parsing, syscall returns)
+7. Signal handlers set flags only — no complex logic
+8. Always check `write(2)` / `fclose(3)` return values
diff --git a/docs/handbook/corebinutils/expr.md b/docs/handbook/corebinutils/expr.md
new file mode 100644
index 0000000000..cd7e8a214c
--- /dev/null
+++ b/docs/handbook/corebinutils/expr.md
@@ -0,0 +1,194 @@
+# expr — Evaluate Expressions
+
+## Overview
+
+`expr` evaluates arithmetic, string, and logical expressions from the command
+line and writes the result to standard output. It implements a recursive
+descent parser with automatic type coercion between strings, numeric strings,
+and integers.
+
+**Source**: `expr/expr.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+expr expression
+```
+
+## Source Analysis
+
+### Value Types
+
+```c
+enum value_type {
+ INTEGER, /* Pure integer (from arithmetic) */
+ NUMERIC_STRING, /* String that looks like a number */
+ STRING, /* General string */
+};
+
+struct value {
+ enum value_type type;
+ union {
+ intmax_t ival;
+ char *sval;
+ };
+};
+```
+
+`expr` automatically coerces between types during operations. A value
+like `"42"` starts as `NUMERIC_STRING` and is promoted to `INTEGER` for
+arithmetic.
+
+### Parser Architecture
+
+`expr` uses a recursive descent parser with operator precedence:
+
+```
+parse_expr()
+ └── parse_or() /* | operator (lowest precedence) */
+ └── parse_and() /* & operator */
+ └── parse_compare() /* =, !=, <, >, <=, >= */
+ └── parse_add() /* +, - */
+ └── parse_mul() /* *, /, % */
+ └── parse_primary() /* atoms, ( expr ), : regex */
+```
+
+### Operators
+
+#### Arithmetic Operators
+
+| Operator | Description | Example |
+|----------|-------------|---------|
+| `+` | Addition | `expr 2 + 3` → `5` |
+| `-` | Subtraction | `expr 5 - 2` → `3` |
+| `*` | Multiplication | `expr 4 \* 3` → `12` |
+| `/` | Integer division | `expr 10 / 3` → `3` |
+| `%` | Modulo | `expr 10 % 3` → `1` |
+
+#### Comparison Operators
+
+| Operator | Description | Example |
+|----------|-------------|---------|
+| `=` | Equal | `expr abc = abc` → `1` |
+| `!=` | Not equal | `expr abc != def` → `1` |
+| `<` | Less than | `expr 1 \< 2` → `1` |
+| `>` | Greater than | `expr 2 \> 1` → `1` |
+| `<=` | Less or equal | `expr 1 \<= 1` → `1` |
+| `>=` | Greater or equal | `expr 2 \>= 1` → `1` |
+
+Comparisons between numeric strings use numeric ordering; otherwise
+locale-aware string comparison (`strcoll`) is used.
+
+#### Logical Operators
+
+| Operator | Description | Example |
+|----------|-------------|---------|
+| `\|` | OR (short-circuit) | `expr 0 \| 5` → `5` |
+| `&` | AND (short-circuit) | `expr 1 \& 2` → `1` |
+
+#### String/Regex Operators
+
+| Operator | Description | Example |
+|----------|-------------|---------|
+| `:` | Regex match | `expr hello : 'hel\(.*\)'` → `lo` |
+| `match` | Same as `:` | `expr match hello 'h.*'` |
+| `substr` | Substring | `expr substr hello 2 3` → `ell` |
+| `index` | Character position | `expr index hello l` → `3` |
+| `length` | String length | `expr length hello` → `5` |
+
+### Regex Matching
+
+The `:` operator uses POSIX basic regular expressions (`regcomp` with
+`REG_NOSUB` or group capture):
+
+```c
+/* expr STRING : REGEX */
+/* Returns captured \(...\) group or match length */
+```
+
+If the regex contains `\(...\)`, the captured substring is returned.
+Otherwise, the length of the match is returned.
+
+### Overflow Checking
+
+All arithmetic operations check for integer overflow:
+
+```c
+static intmax_t
+safe_add(intmax_t a, intmax_t b)
+{
+ if ((b > 0 && a > INTMAX_MAX - b) ||
+ (b < 0 && a < INTMAX_MIN - b))
+ errx(2, "integer overflow");
+ return a + b;
+}
+```
+
+### Locale Awareness
+
+String comparisons use `strcoll(3)` for locale-correct ordering:
+
+```c
+/* Compare as strings using locale collation */
+result = strcoll(left->sval, right->sval);
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `regcomp(3)` / `regexec(3)` | Regular expression matching |
+| `strcoll(3)` | Locale-aware string comparison |
+
+## Examples
+
+```sh
+# Arithmetic
+expr 2 + 3 # → 5
+expr 10 / 3 # → 3
+expr 7 % 4 # → 3
+
+# String length
+expr length "hello" # → 5
+
+# Regex match (capture group)
+expr "hello-world" : 'hello-\(.*\)' # → world
+
+# Regex match (length)
+expr "hello" : '.*' # → 5
+
+# Substring
+expr substr "hello" 2 3 # → ell
+
+# Index (first occurrence)
+expr index "hello" "lo" # → 3
+
+# Comparisons
+expr 42 = 42 # → 1
+expr abc \< def # → 1
+
+# Logical OR (returns first non-zero/non-empty)
+expr 0 \| 5 # → 5
+expr "" \| alt # → alt
+
+# In shell scripts
+count=$(expr $count + 1)
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Expression is neither null nor zero |
+| 1 | Expression is null or zero |
+| 2 | Expression is invalid |
+| 3 | Internal error |
+
+## Differences from GNU expr
+
+- No `--help` or `--version`
+- Identical POSIX semantics for `:` operator
+- Locale-aware string comparison by default
+- Overflow results in error, not wraparound
diff --git a/docs/handbook/corebinutils/hostname.md b/docs/handbook/corebinutils/hostname.md
new file mode 100644
index 0000000000..d6ea83bff2
--- /dev/null
+++ b/docs/handbook/corebinutils/hostname.md
@@ -0,0 +1,154 @@
+# hostname — Get or Set the System Hostname
+
+## Overview
+
+`hostname` reads or sets the system hostname. On Linux it uses `uname(2)` to
+read and `sethostname(2)` to write. The `-f` (FQDN) option is explicitly
+unsupported because resolving a fully qualified domain name requires
+NSS/DNS, which is outside the scope of a core utility.
+
+**Source**: `hostname/hostname.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+hostname [-s | -d] [name-of-host]
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-s` | Print short hostname (truncate at first `.`) |
+| `-d` | Print domain part only (after first `.`) |
+| `-f` | **Not supported on Linux** — exits with error |
+
+## Source Analysis
+
+### Data Structures
+
+```c
+struct options {
+ bool short_name; /* -s: truncate at first dot */
+ bool domain_only; /* -d: print after first dot */
+ bool set_mode; /* hostname was provided as argument */
+ const char *new_hostname;
+};
+```
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Dispatch between get/set modes |
+| `parse_args()` | `getopt(3)` option parsing |
+| `dup_hostname()` | Fetch hostname from `uname(2)` and duplicate it |
+| `print_hostname()` | Print full, short, or domain part |
+| `set_hostname()` | Set hostname via `sethostname(2)` |
+| `linux_hostname_max()` | Query max hostname length from UTS namespace |
+
+### Reading the Hostname
+
+```c
+static char *
+dup_hostname(void)
+{
+ struct utsname uts;
+
+ if (uname(&uts) < 0)
+ err(1, "uname");
+ return strdup(uts.nodename);
+}
+```
+
+### Setting the Hostname
+
+```c
+static void
+set_hostname(const char *name)
+{
+ size_t max_len = linux_hostname_max();
+ size_t len = strlen(name);
+
+ if (len > max_len)
+ errx(1, "hostname too long: %zu > %zu", len, max_len);
+
+ if (sethostname(name, len) < 0)
+ err(1, "sethostname");
+}
+```
+
+### Short/Domain Modes
+
+```c
+static void
+print_hostname(const char *hostname, const struct options *opts)
+{
+ if (opts->short_name) {
+ /* Truncate at first '.' */
+ const char *dot = strchr(hostname, '.');
+ if (dot)
+ printf("%.*s\n", (int)(dot - hostname), hostname);
+ else
+ puts(hostname);
+ } else if (opts->domain_only) {
+ /* Print after first '.' or empty */
+ const char *dot = strchr(hostname, '.');
+ puts(dot ? dot + 1 : "");
+ } else {
+ puts(hostname);
+ }
+}
+```
+
+### Max Hostname Length
+
+```c
+static size_t
+linux_hostname_max(void)
+{
+ long val = sysconf(_SC_HOST_NAME_MAX);
+ return (val > 0) ? (size_t)val : 64;
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `uname(2)` | Read current hostname |
+| `sethostname(2)` | Set new hostname (requires `CAP_SYS_ADMIN`) |
+| `sysconf(3)` | Query `_SC_HOST_NAME_MAX` |
+
+## Examples
+
+```sh
+# Print hostname
+hostname
+
+# Print short hostname
+hostname -s
+
+# Print domain part
+hostname -d
+
+# Set hostname (requires root)
+hostname myserver.example.com
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Error (sethostname failed, invalid option) |
+
+## Differences from GNU hostname
+
+- No `-f` / `--fqdn` — Linux requires NSS for FQDN resolution
+- No `--ip-address` / `-i`
+- No `--alias` / `-a`
+- No `--all-fqdns` / `--all-ip-addresses`
+- Simpler: read or set only, no DNS lookups
diff --git a/docs/handbook/corebinutils/kill.md b/docs/handbook/corebinutils/kill.md
new file mode 100644
index 0000000000..eb4d8d55bf
--- /dev/null
+++ b/docs/handbook/corebinutils/kill.md
@@ -0,0 +1,237 @@
+# kill — Send Signals to Processes
+
+## Overview
+
+`kill` sends signals to processes or lists available signals. This
+implementation supports both numeric and named signal specifications,
+real-time signals (`SIGRT`), and can be compiled as a shell built-in.
+
+**Source**: `kill/kill.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+kill [-s signal_name] pid ...
+kill -l [exit_status ...]
+kill -signal_name pid ...
+kill -signal_number pid ...
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-s signal` | Send the named signal |
+| `-l` | List available signal names |
+| `-signal_name` | Send named signal (e.g., `-TERM`) |
+| `-signal_number` | Send signal by number (e.g., `-15`) |
+
+## Source Analysis
+
+### Signal Table
+
+The signal table maps names to numbers using a macro-generated array:
+
+```c
+struct signal_entry {
+ const char *name;
+ int number;
+};
+
+#define SIGNAL_ENTRY(sig) { #sig, SIG##sig }
+
+static const struct signal_entry signal_table[] = {
+ SIGNAL_ENTRY(HUP),
+ SIGNAL_ENTRY(INT),
+ SIGNAL_ENTRY(QUIT),
+ SIGNAL_ENTRY(ILL),
+ SIGNAL_ENTRY(TRAP),
+ SIGNAL_ENTRY(ABRT),
+ SIGNAL_ENTRY(EMT), /* If available */
+ SIGNAL_ENTRY(FPE),
+ SIGNAL_ENTRY(KILL),
+ SIGNAL_ENTRY(BUS),
+ SIGNAL_ENTRY(SEGV),
+ SIGNAL_ENTRY(SYS),
+ SIGNAL_ENTRY(PIPE),
+ SIGNAL_ENTRY(ALRM),
+ SIGNAL_ENTRY(TERM),
+ SIGNAL_ENTRY(URG),
+ SIGNAL_ENTRY(STOP),
+ SIGNAL_ENTRY(TSTP),
+ SIGNAL_ENTRY(CONT),
+ SIGNAL_ENTRY(CHLD),
+ SIGNAL_ENTRY(TTIN),
+ SIGNAL_ENTRY(TTOU),
+ SIGNAL_ENTRY(IO),
+ SIGNAL_ENTRY(XCPU),
+ SIGNAL_ENTRY(XFSZ),
+ SIGNAL_ENTRY(VTALRM),
+ SIGNAL_ENTRY(PROF),
+ SIGNAL_ENTRY(WINCH),
+ SIGNAL_ENTRY(INFO), /* If available */
+ SIGNAL_ENTRY(USR1),
+ SIGNAL_ENTRY(USR2),
+ /* ... */
+};
+```
+
+### Key Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options and dispatch signal or list |
+| `normalize_signal_name()` | Canonicalize signal name (strip `SIG` prefix, uppercase) |
+| `parse_signal_option_token()` | Parse `-SIGNAL` shorthand |
+| `parse_signal_for_dash_s()` | Parse signal name/number for `-s` |
+| `signal_name_for_number()` | Reverse lookup: number → name |
+| `printsignals()` | List all signals (for `-l`) |
+| `max_signal_number()` | Find highest valid signal |
+| `parse_pid_argument()` | Parse and validate PID string |
+
+### Signal Name Normalization
+
+```c
+static const char *
+normalize_signal_name(const char *name)
+{
+ /* Strip optional "SIG" prefix */
+ if (strncasecmp(name, "SIG", 3) == 0)
+ name += 3;
+
+ /* Case-insensitive lookup in signal_table */
+ for (size_t i = 0; i < SIGNAL_TABLE_SIZE; i++) {
+ if (strcasecmp(name, signal_table[i].name) == 0)
+ return signal_table[i].name;
+ }
+ return NULL;
+}
+```
+
+### Parsing Signal Options
+
+The option parsing handles three forms:
+
+```c
+/* Form 1: kill -s SIGNAL pid */
+/* Form 2: kill -SIGNAL pid (dash prefix) */
+/* Form 3: kill -NUMBER pid */
+
+static int
+parse_signal_option_token(const char *token)
+{
+ /* Try as number first */
+ char *end;
+ long val = strtol(token, &end, 10);
+ if (*end == '\0' && val >= 0 && val <= max_signal_number())
+ return (int)val;
+
+ /* Try as name */
+ const char *name = normalize_signal_name(token);
+ if (name) {
+ /* Look up number from normalized name */
+ return number_for_name(name);
+ }
+
+ errx(2, "unknown signal: %s", token);
+}
+```
+
+### Real-Time Signal Support
+
+```c
+/* SIGRTMIN+n and SIGRTMAX-n notation */
+#ifdef SIGRTMIN
+ if (strncasecmp(name, "RTMIN", 5) == 0) {
+ int offset = (name[5] == '+') ? atoi(name + 6) : 0;
+ return SIGRTMIN + offset;
+ }
+ if (strncasecmp(name, "RTMAX", 5) == 0) {
+ int offset = (name[5] == '-') ? atoi(name + 6) : 0;
+ return SIGRTMAX - offset;
+ }
+#endif
+```
+
+### Listing Signals
+
+```c
+static void
+printsignals(FILE *fp)
+{
+ int columns = 0;
+ for (int sig = 1; sig <= max_signal_number(); sig++) {
+ const char *name = signal_name_for_number(sig);
+ if (name) {
+ fprintf(fp, "%s", name);
+ if (++columns >= 8) {
+ fputc('\n', fp);
+ columns = 0;
+ } else {
+ fputc('\t', fp);
+ }
+ }
+ }
+}
+```
+
+### Signal from Exit Status
+
+When given an exit status with `-l`, the signal number is extracted:
+
+```c
+/* exit_status > 128 means killed by signal (exit_status - 128) */
+if (exit_status > 128)
+ sig = exit_status - 128;
+```
+
+### Shell Built-in Integration
+
+```c
+#ifdef SHELL
+/* When compiled into the shell (sh/), kill is a built-in */
+/* Uses different error reporting and argument parsing */
+int killcmd(int argc, char *argv[]);
+#endif
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `kill(2)` | Send signal to process or process group |
+
+## Examples
+
+```sh
+# Send SIGTERM (default)
+kill 1234
+
+# Send SIGKILL
+kill -9 1234
+kill -KILL 1234
+kill -s KILL 1234
+
+# Send to process group
+kill -TERM -1234
+
+# List all signals
+kill -l
+
+# Signal name from exit status
+kill -l 137
+# → KILL (137 - 128 = 9 = SIGKILL)
+
+# Real-time signal
+kill -s RTMIN+3 1234
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | All signals sent successfully |
+| 1 | Error sending signal to at least one process |
+| 2 | Usage error |
diff --git a/docs/handbook/corebinutils/ln.md b/docs/handbook/corebinutils/ln.md
new file mode 100644
index 0000000000..848ebb72c0
--- /dev/null
+++ b/docs/handbook/corebinutils/ln.md
@@ -0,0 +1,190 @@
+# ln — Make Links
+
+## Overview
+
+`ln` creates hard links or symbolic links between files. It supports
+interactive prompting, forced overwriting, verbose output, and optional
+warnings for missing symbolic link targets.
+
+**Source**: `ln/ln.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+ln [-sfhivFLPnw] source_file [target_file]
+ln [-sfhivFLPnw] source_file ... target_dir
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-s` | Create symbolic links instead of hard links |
+| `-f` | Force: remove existing target files |
+| `-i` | Interactive: prompt before overwriting |
+| `-n` | Don't follow symlinks on the target |
+| `-v` | Verbose: print each link created |
+| `-w` | Warn if symbolic link target does not exist |
+| `-h` | Don't follow symlink if target is a symlink to a directory |
+| `-F` | Remove existing target directory before linking |
+| `-L` | Follow symlinks on the source |
+| `-P` | Don't follow symlinks on the source (default for hard links) |
+
+## Source Analysis
+
+### Data Structures
+
+```c
+struct ln_options {
+ bool force; /* -f: remove existing targets */
+ bool remove_dir; /* -F: remove existing directories */
+ bool no_target_follow; /* -n/-h: don't follow target symlinks */
+ bool interactive; /* -i: prompt before replace */
+ bool follow_source_symlink; /* -L: follow source symlinks */
+ bool symbolic; /* -s: create symlinks */
+ bool verbose; /* -v: print actions */
+ bool warn_missing; /* -w: warn on missing symlink target */
+ int linkch; /* Function: link or symlink */
+};
+```
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options, determine single vs. multi-target mode |
+| `linkit()` | Create one link (core logic) |
+| `remove_existing_target()` | Unlink or rmdir existing target |
+| `samedirent()` | Check if source and target are the same file |
+| `should_append_basename()` | Determine if target is a directory |
+| `stat_parent_dir()` | Stat the parent directory of a path |
+| `warn_missing_symlink_source()` | Check if symlink target exists |
+| `prompt_replace()` | Interactive yes/no prompt |
+
+### Core Linking Logic
+
+```c
+static int
+linkit(const char *source, const char *target,
+ const struct ln_options *opts)
+{
+ /* Check if target already exists */
+ if (lstat(target, &sb) == 0) {
+ /* Same file check */
+ if (samedirent(source, target)) {
+ warnx("%s and %s are the same", source, target);
+ return 1;
+ }
+
+ /* Interactive prompt */
+ if (opts->interactive && !prompt_replace(target))
+ return 0;
+
+ /* Remove existing target */
+ if (opts->force || opts->interactive)
+ remove_existing_target(target, opts);
+ }
+
+ /* Create the link */
+ if (opts->symbolic) {
+ if (symlink(source, target) < 0) {
+ warn("symlink %s -> %s", target, source);
+ return 1;
+ }
+ } else {
+ if (link(source, target) < 0) {
+ warn("link %s -> %s", target, source);
+ return 1;
+ }
+ }
+
+ /* Warn about dangling symlinks */
+ if (opts->symbolic && opts->warn_missing)
+ warn_missing_symlink_source(source, target);
+
+ /* Verbose output */
+ if (opts->verbose)
+ printf("%s -> %s\n", target, source);
+
+ return 0;
+}
+```
+
+### Sameness Detection
+
+```c
+static int
+samedirent(const char *source, const char *target)
+{
+ struct stat sb_src, sb_tgt;
+
+ if (stat(source, &sb_src) < 0)
+ return 0;
+ if (stat(target, &sb_tgt) < 0)
+ return 0;
+
+ return (sb_src.st_dev == sb_tgt.st_dev &&
+ sb_src.st_ino == sb_tgt.st_ino);
+}
+```
+
+### Target Resolution
+
+When the target is a directory, the source basename is appended:
+
+```c
+static int
+should_append_basename(const char *target,
+ const struct ln_options *opts)
+{
+ struct stat sb;
+ int (*statfn)(const char *, struct stat *);
+
+ /* -n/-h: use lstat to not follow target symlinks */
+ statfn = opts->no_target_follow ? lstat : stat;
+
+ if (statfn(target, &sb) == 0 && S_ISDIR(sb.st_mode))
+ return 1;
+ return 0;
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `link(2)` | Create hard link |
+| `symlink(2)` | Create symbolic link |
+| `lstat(2)` | Stat without following symlinks |
+| `stat(2)` | Stat following symlinks |
+| `unlink(2)` | Remove existing target |
+| `rmdir(2)` | Remove existing target directory (`-F`) |
+| `readlink(2)` | Resolve symlink for display |
+
+## Examples
+
+```sh
+# Hard link
+ln file1.txt file2.txt
+
+# Symbolic link
+ln -s /usr/local/bin/python3 /usr/bin/python
+
+# Force overwrite
+ln -sf new_target link_name
+
+# Verbose with warning
+ln -svw /opt/myapp/bin/app /usr/local/bin/app
+
+# Multiple files into directory
+ln -s file1 file2 file3 /tmp/links/
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | All links created successfully |
+| 1 | Error creating one or more links |
diff --git a/docs/handbook/corebinutils/ls.md b/docs/handbook/corebinutils/ls.md
new file mode 100644
index 0000000000..e6c6314170
--- /dev/null
+++ b/docs/handbook/corebinutils/ls.md
@@ -0,0 +1,314 @@
+# ls — List Directory Contents
+
+## Overview
+
+`ls` lists files and directory contents with extensive formatting,
+sorting, filtering, and colorization options. This implementation uses
+the Linux `statx(2)` syscall for file metadata (including birth time),
+and provides column, long, stream, and single-column layout modes.
+
+**Source**: `ls/ls.c`, `ls/ls.h`, `ls/print.c`, `ls/cmp.c`, `ls/util.c`,
+`ls/extern.h`
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+ls [-ABCFGHILPRSTUWabcdfghiklmnopqrstuvwxy1,] [--color[=when]]
+ [--group-directories-first] [file ...]
+```
+
+## Source Architecture
+
+### File Responsibilities
+
+| File | Purpose | Key Functions |
+|------|---------|---------------|
+| `ls.c` | Main, option parsing, directory traversal | `main()`, `parse_options()`, `collect_directory_entries()` |
+| `ls.h` | Type definitions, enums, structs | `layout_mode`, `sort_mode`, `time_field` |
+| `print.c` | Output formatting for all layout modes | `printlong()`, `printcol()`, `printstream()` |
+| `cmp.c` | Sorting comparators | `namecmp()`, `mtimecmp()`, `sizecmp()` |
+| `util.c` | Helper functions | `emalloc()`, `printescaped()` |
+| `extern.h` | Function prototypes across files | All cross-file declarations |
+
+### Enums (ls.h)
+
+```c
+enum layout_mode {
+ SINGLE, /* One file per line (-1) */
+ COLUMNS, /* Multi-column (-C) */
+ LONG, /* Long listing (-l) */
+ STREAM, /* Comma-separated (-m) */
+};
+
+enum sort_mode {
+ BY_NAME, /* Default alphabetical */
+ BY_TIME, /* -t: modification/access/birth/change time */
+ BY_SIZE, /* -S: file size */
+ BY_VERSION, /* --sort=version: version number sort */
+ UNSORTED, /* -f: no sorting */
+};
+
+enum time_field {
+ MTIME, /* Modification time (default) */
+ ATIME, /* Access time (-u) */
+ BTIME, /* Birth/creation time (-U) */
+ CTIME, /* Inode change time (-c) */
+};
+
+enum follow_mode {
+ FOLLOW_NEVER, /* -P: never follow symlinks */
+ FOLLOW_ALWAYS, /* -L: always follow */
+ FOLLOW_CMDLINE, /* -H: follow on command line only */
+};
+
+enum color_mode {
+ COLOR_NEVER,
+ COLOR_ALWAYS,
+ COLOR_AUTO, /* Only when stdout is a TTY */
+};
+```
+
+### File Time Struct
+
+```c
+struct file_time {
+ struct timespec ts;
+ bool available; /* False if filesystem doesn't support it */
+};
+```
+
+### statx(2) Integration
+
+Since musl libc may not provide `statx` wrappers, `ls` defines the
+syscall interface inline:
+
+```c
+static int
+linux_statx(int dirfd, const char *path, int flags,
+ unsigned int mask, struct statx *stx)
+{
+ return syscall(__NR_statx, dirfd, path, flags, mask, stx);
+}
+```
+
+This enables birth time (`btime`) on filesystems that support it
+(ext4, btrfs, XFS) where traditional `stat(2)` does not expose it.
+
+### Option Parsing
+
+```c
+static const char *optstring =
+ "ABCFGHILPRSTUWabcdfghiklmnopqrstuvwxy1,";
+
+static void
+parse_options(int argc, char *argv[])
+{
+ /* Short options via getopt(3) */
+ while ((ch = getopt_long(argc, argv, optstring,
+ long_options, NULL)) != -1) {
+ switch (ch) {
+ case 'l': layout = LONG; break;
+ case 'C': layout = COLUMNS; break;
+ case '1': layout = SINGLE; break;
+ case 'm': layout = STREAM; break;
+ case 't': sort = BY_TIME; break;
+ case 'S': sort = BY_SIZE; break;
+ case 'r': reverse = true; break;
+ case 'a': show_hidden = ALL; break;
+ case 'A': show_hidden = ALMOST_ALL; break;
+ case 'R': recurse = true; break;
+ /* ... more options ... */
+ }
+ }
+}
+```
+
+### Long Options
+
+| Long Option | Description |
+|-------------|-------------|
+| `--color[=when]` | Colorize output (always/auto/never) |
+| `--group-directories-first` | Sort directories before files |
+| `--sort=version` | Version-number sort |
+
+### Directory Traversal
+
+```c
+static void
+collect_directory_entries(const char *dir, struct entry_list *list)
+{
+ DIR *dp = opendir(dir);
+ struct dirent *ent;
+
+ while ((ent = readdir(dp)) != NULL) {
+ /* Skip . and .. (unless -a) */
+ if (!show_hidden && ent->d_name[0] == '.')
+ continue;
+
+ struct entry *e = alloc_entry(ent->d_name);
+ stat_with_policy(dir, e);
+ list_append(list, e);
+ }
+ closedir(dp);
+}
+```
+
+### Recursive Listing
+
+```c
+static void
+list_directory(const char *path, int depth)
+{
+ collect_directory_entries(path, &entries);
+ sort_entries(&entries);
+ display_entries(&entries);
+
+ if (recurse) {
+ for (each entry that is a directory) {
+ if (should_recurse(entry)) {
+ /* Cycle detection: check device/inode */
+ if (visit_stack_contains(entry->ino, entry->dev))
+ warnx("cycle detected: %s", path);
+ else
+ list_directory(full_path, depth + 1);
+ }
+ }
+ }
+}
+```
+
+### Birth Time via statx
+
+```c
+static void
+fill_birthtime(struct entry *e, const struct statx *stx)
+{
+ if (stx->stx_mask & STATX_BTIME) {
+ e->btime.ts.tv_sec = stx->stx_btime.tv_sec;
+ e->btime.ts.tv_nsec = stx->stx_btime.tv_nsec;
+ e->btime.available = true;
+ } else {
+ e->btime.available = false;
+ }
+}
+```
+
+### Sorting (cmp.c)
+
+Comparators are selected based on the sort mode and direction:
+
+```c
+int namecmp(const struct entry *a, const struct entry *b);
+int mtimecmp(const struct entry *a, const struct entry *b);
+int atimecmp(const struct entry *a, const struct entry *b);
+int btimecmp(const struct entry *a, const struct entry *b);
+int ctimecmp(const struct entry *a, const struct entry *b);
+int sizecmp(const struct entry *a, const struct entry *b);
+```
+
+All comparators fall back to `namecmp()` for stable ordering when
+primary keys are equal.
+
+### Output Formatting (print.c)
+
+| Function | Layout Mode |
+|----------|-------------|
+| `printlong()` | `-l` long listing with permissions, owner, size, date |
+| `printcol()` | `-C` multi-column (default for TTY) |
+| `printstream()` | `-m` comma-separated stream |
+| `printsingle()` | `-1` one per line (default for pipe) |
+
+Human-readable sizes (`-h`) format with K, M, G, T suffixes.
+
+## Full Options Reference
+
+| Flag | Description |
+|------|-------------|
+| `-a` | Show all entries (including `.` and `..`) |
+| `-A` | Show almost all (exclude `.` and `..`) |
+| `-b` | Print C-style escapes for non-printable chars |
+| `-C` | Multi-column output (default if TTY) |
+| `-c` | Use ctime (inode change time) for sorting/display |
+| `-d` | List directories themselves, not contents |
+| `-F` | Append type indicator (`/`, `*`, `@`, `=`, `%`, `\|`) |
+| `-f` | Unsorted, show all |
+| `-G` | Colorize output (same as `--color=auto`) |
+| `-g` | Long format without owner |
+| `-H` | Follow symlinks on command line |
+| `-h` | Human-readable sizes |
+| `-I` | Suppress auto-column mode |
+| `-i` | Print inode number |
+| `-k` | Use 1024-byte blocks |
+| `-L` | Follow all symlinks |
+| `-l` | Long listing format |
+| `-m` | Stream (comma-separated) output |
+| `-n` | Numeric UID/GID |
+| `-o` | Long format without group |
+| `-P` | Never follow symlinks |
+| `-p` | Append `/` to directories |
+| `-q` | Replace non-printable with `?` |
+| `-R` | Recursive listing |
+| `-r` | Reverse sort order |
+| `-S` | Sort by size (largest first) |
+| `-s` | Print block count |
+| `-T` | Show complete time information |
+| `-t` | Sort by time |
+| `-U` | Use birth time |
+| `-u` | Use access time |
+| `-v` | Sort version numbers naturally |
+| `-w` | Force raw (non-printable) output |
+| `-x` | Multi-column sorted across |
+| `-y` | Sort by extension |
+| `-1` | One entry per line |
+| `,` | Thousands separator in sizes |
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `statx(2)` | File metadata including birth time |
+| `stat(2)` / `lstat(2)` | Fallback file metadata |
+| `opendir(3)` / `readdir(3)` | Directory enumeration |
+| `readlink(2)` | Resolve symlink targets |
+| `ioctl(TIOCGWINSZ)` | Terminal width detection |
+| `isatty(3)` | Detect if stdout is a terminal |
+| `getpwuid(3)` / `getgrgid(3)` | User/group name lookup |
+
+## Linux-Specific Notes
+
+- Uses `statx(2)` directly via `syscall()` for birth time support
+- Defines `struct statx` inline for musl compatibility
+- No BSD file flags (`-o`, `-W` not supported)
+- No MAC label support (`-Z` not supported)
+
+## Examples
+
+```sh
+# Long listing
+ls -la
+
+# Human-readable, sorted by size
+ls -lhS
+
+# Recursive with color
+ls -R --color=auto
+
+# Sort by modification time
+ls -lt
+
+# Show birth time (on supporting filesystems)
+ls -lU
+
+# Directories first
+ls --group-directories-first
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Minor problem (cannot access one file) |
+| 2 | Serious trouble (cannot access command line argument) |
diff --git a/docs/handbook/corebinutils/mkdir.md b/docs/handbook/corebinutils/mkdir.md
new file mode 100644
index 0000000000..dfdbf75d27
--- /dev/null
+++ b/docs/handbook/corebinutils/mkdir.md
@@ -0,0 +1,194 @@
+# mkdir — Make Directories
+
+## Overview
+
+`mkdir` creates directories with specified permissions. It shares the
+`mode_compile()` / `mode_apply()` engine with `chmod` for parsing
+symbolic and numeric mode specifications. With `-p`, it creates all
+missing intermediate directories.
+
+**Source**: `mkdir/mkdir.c`, `mkdir/mode.c`, `mkdir/mode.h` (shared with chmod)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+mkdir [-pv] [-m mode] directory ...
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-m mode` | Set permissions (numeric or symbolic) |
+| `-p` | Create parent directories as needed |
+| `-v` | Print each directory as it is created |
+
+## Source Analysis
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options and iterate over arguments |
+| `create_single_path()` | Create one directory (no `-p`) |
+| `create_parents_path()` | Create directory with parents (`-p`) |
+| `create_component()` | Create a single path component |
+| `mkdir_with_umask()` | Atomically apply umask during `mkdir(2)` |
+| `existing_directory()` | Check if a path already exists as a directory |
+| `current_umask()` | Atomically read the current umask |
+| `mode_compile()` | Parse mode string to command array (shared) |
+| `mode_apply()` | Apply compiled mode to existing permissions |
+
+### Simple Creation
+
+```c
+static int
+create_single_path(const char *path, mode_t mode)
+{
+ if (mkdir(path, mode) < 0) {
+ error_errno("cannot create directory '%s'", path);
+ return 1;
+ }
+
+ /* If explicit mode was specified, chmod to override umask */
+ if (explicit_mode) {
+ if (chmod(path, mode) < 0) {
+ error_errno("cannot set permissions on '%s'", path);
+ return 1;
+ }
+ }
+
+ if (verbose)
+ printf("mkdir: created directory '%s'\n", path);
+
+ return 0;
+}
+```
+
+### Parent Directory Creation
+
+```c
+static int
+create_parents_path(const char *path, mode_t mode,
+ mode_t intermediate_mode)
+{
+ char *buf = strdup(path);
+ char *p = buf;
+
+ /* Skip leading slashes */
+ while (*p == '/') p++;
+
+ /* Create each component */
+ while (*p) {
+ char *slash = strchr(p, '/');
+ if (slash) *slash = '\0';
+
+ if (!existing_directory(buf)) {
+ if (mkdir_with_umask(buf, intermediate_mode) < 0) {
+ if (errno != EEXIST) {
+ error_errno("cannot create '%s'", buf);
+ return 1;
+ }
+ }
+ if (verbose)
+ printf("mkdir: created directory '%s'\n", buf);
+ }
+
+ if (slash) {
+ *slash = '/';
+ p = slash + 1;
+ } else {
+ break;
+ }
+ }
+
+ /* Apply final mode to the leaf directory */
+ if (chmod(buf, mode) < 0) { ... }
+ return 0;
+}
+```
+
+### Atomic Umask Handling
+
+To prevent race conditions when setting permissions:
+
+```c
+static mode_t
+current_umask(void)
+{
+ /* Atomically read umask by setting and restoring */
+ mode_t mask = umask(0);
+ umask(mask);
+ return mask;
+}
+
+static int
+mkdir_with_umask(const char *path, mode_t mode)
+{
+ /* Use more restrictive intermediate perms:
+ * u+wx so the creator can write subdirs */
+ mode_t old = umask(0);
+ int ret = mkdir(path, mode);
+ umask(old);
+ return ret;
+}
+```
+
+Intermediate directories are created with `0300 | mode` to ensure the
+creating user always has write and execute access to create children,
+even if the specified mode is more restrictive.
+
+### Mode Compilation (Shared with chmod)
+
+```c
+/* Numeric modes */
+mkdir -m 755 mydir
+/* → mode_compile("755") returns compiled bitcmd array */
+
+/* Symbolic modes */
+mkdir -m u=rwx,g=rx,o=rx mydir
+/* → mode_compile("u=rwx,g=rx,o=rx") */
+
+/* Default mode */
+/* 0777 & ~umask (typically 0755) */
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `mkdir(2)` | Create directory |
+| `chmod(2)` | Set final permissions |
+| `umask(2)` | Read/set file creation mask |
+| `stat(2)` | Check if path exists |
+
+## Examples
+
+```sh
+# Simple directory
+mkdir mydir
+
+# With specific permissions
+mkdir -m 700 private_dir
+
+# Create parent directories
+mkdir -p /opt/myapp/lib/plugins
+
+# Verbose
+mkdir -pv a/b/c
+# mkdir: created directory 'a'
+# mkdir: created directory 'a/b'
+# mkdir: created directory 'a/b/c'
+
+# Symbolic mode
+mkdir -m u=rwx,g=rx,o= restricted_dir
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | All directories created successfully |
+| 1 | Error creating one or more directories |
diff --git a/docs/handbook/corebinutils/mv.md b/docs/handbook/corebinutils/mv.md
new file mode 100644
index 0000000000..c6b7369301
--- /dev/null
+++ b/docs/handbook/corebinutils/mv.md
@@ -0,0 +1,285 @@
+# mv — Move (Rename) Files
+
+## Overview
+
+`mv` moves or renames files and directories. When the source and target
+are on the same filesystem, it uses `rename(2)`. When they are on
+different filesystems, it performs a copy-and-remove fallback with
+extended attribute preservation.
+
+**Source**: `mv/mv.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+mv [-finv] source target
+mv [-finv] source ... directory
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-f` | Force: do not prompt before overwriting |
+| `-i` | Interactive: prompt before overwriting |
+| `-n` | No clobber: do not overwrite existing files |
+| `-v` | Verbose: print each file as it is moved |
+
+## Source Analysis
+
+### Data Structures
+
+```c
+struct mv_options {
+ bool force; /* -f: overwrite without asking */
+ bool interactive; /* -i: prompt before overwrite */
+ bool no_clobber; /* -n: never overwrite */
+ bool no_target_dir_follow; /* Don't follow target symlinks */
+ bool verbose; /* -v: display moves */
+};
+
+struct move_target {
+ char *path;
+ struct stat sb;
+ bool is_directory;
+};
+```
+
+### Constants
+
+```c
+#define MV_EXIT_ERROR 1
+#define MV_EXIT_USAGE 2
+
+#define COPY_BUFFER_MIN (128 * 1024) /* 128 KB */
+#define COPY_BUFFER_MAX (2 * 1024 * 1024) /* 2 MB */
+```
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options, determine single vs. multi-target |
+| `handle_single_move()` | Move one source to one target |
+| `apply_existing_target_policy()` | Handle `-f`, `-i`, `-n` logic |
+| `copy_move_fallback()` | Cross-device copy+remove |
+| `copy_file_data()` | Buffer-based data copy |
+| `copy_file_xattrs()` | Preserve extended attributes |
+| `copy_directory_tree()` | Recursive directory copy |
+| `apply_path_metadata()` | Set ownership, permissions, timestamps |
+| `remove_source_tree()` | Remove original after copy |
+
+### Core Move Logic
+
+```c
+static int
+handle_single_move(const char *source, const char *target,
+ const struct mv_options *opts)
+{
+ /* Check for self-move */
+ struct stat src_sb, tgt_sb;
+ if (stat(source, &src_sb) < 0)
+ return MV_EXIT_ERROR;
+
+ /* Handle existing target */
+ if (lstat(target, &tgt_sb) == 0) {
+ /* Same file? (device + inode) */
+ if (src_sb.st_dev == tgt_sb.st_dev &&
+ src_sb.st_ino == tgt_sb.st_ino) {
+ warnx("'%s' and '%s' are the same file", source, target);
+ return MV_EXIT_ERROR;
+ }
+
+ /* Apply -f/-i/-n policy */
+ int policy = apply_existing_target_policy(target, &tgt_sb, opts);
+ if (policy != 0)
+ return policy;
+ }
+
+ /* Try rename(2) first — fast path */
+ if (rename(source, target) == 0) {
+ if (opts->verbose)
+ printf("'%s' -> '%s'\n", source, target);
+ return 0;
+ }
+
+ /* Cross-device: copy then remove */
+ if (errno == EXDEV)
+ return copy_move_fallback(source, target, &src_sb, opts);
+
+ warn("rename '%s' to '%s'", source, target);
+ return MV_EXIT_ERROR;
+}
+```
+
+### Cross-Device Copy Fallback
+
+When `rename(2)` fails with `EXDEV` (different filesystems):
+
+```c
+static int
+copy_move_fallback(const char *source, const char *target,
+ const struct stat *src_sb,
+ const struct mv_options *opts)
+{
+ if (S_ISDIR(src_sb->st_mode)) {
+ /* Recursive directory copy */
+ if (copy_directory_tree(source, target) != 0)
+ return MV_EXIT_ERROR;
+ } else {
+ /* Regular file copy */
+ if (copy_file_data(source, target) != 0)
+ return MV_EXIT_ERROR;
+ }
+
+ /* Preserve metadata */
+ apply_path_metadata(target, src_sb);
+
+ /* Preserve extended attributes */
+ copy_file_xattrs(source, target);
+
+ /* Remove original */
+ remove_source_tree(source, src_sb);
+
+ if (opts->verbose)
+ printf("'%s' -> '%s'\n", source, target);
+
+ return 0;
+}
+```
+
+### Adaptive Buffer Sizing
+
+```c
+static int
+copy_file_data(const char *source, const char *target)
+{
+ /* Allocate buffer based on available memory */
+ size_t bufsize = COPY_BUFFER_MAX;
+ char *buf = NULL;
+
+ while (bufsize >= COPY_BUFFER_MIN) {
+ buf = malloc(bufsize);
+ if (buf) break;
+ bufsize /= 2;
+ }
+
+ int src_fd = open(source, O_RDONLY);
+ int tgt_fd = open(target, O_WRONLY | O_CREAT | O_TRUNC, 0666);
+
+ ssize_t n;
+ while ((n = read(src_fd, buf, bufsize)) > 0) {
+ if (write_all(tgt_fd, buf, n) < 0) {
+ warn("write '%s'", target);
+ return -1;
+ }
+ }
+
+ free(buf);
+ close(src_fd);
+ close(tgt_fd);
+ return 0;
+}
+```
+
+### Extended Attribute Preservation
+
+```c
+#include <sys/xattr.h>
+
+static void
+copy_file_xattrs(const char *source, const char *target)
+{
+ ssize_t list_len = listxattr(source, NULL, 0);
+ if (list_len <= 0)
+ return;
+
+ char *list = malloc(list_len);
+ listxattr(source, list, list_len);
+
+ for (char *name = list; name < list + list_len;
+ name += strlen(name) + 1) {
+ ssize_t val_len = getxattr(source, name, NULL, 0);
+ if (val_len < 0) continue;
+
+ char *val = malloc(val_len);
+ getxattr(source, name, val, val_len);
+ setxattr(target, name, val, val_len, 0);
+ free(val);
+ }
+
+ free(list);
+}
+```
+
+### Metadata Preservation
+
+```c
+static void
+apply_path_metadata(const char *target, const struct stat *sb)
+{
+ /* Ownership */
+ chown(target, sb->st_uid, sb->st_gid);
+
+ /* Permissions */
+ chmod(target, sb->st_mode);
+
+ /* Timestamps */
+ struct timespec times[2] = {
+ sb->st_atim, /* Access time */
+ sb->st_mtim, /* Modification time */
+ };
+ utimensat(AT_FDCWD, target, times, 0);
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `rename(2)` | Same-device move (fast path) |
+| `read(2)` / `write(2)` | Cross-device data copy |
+| `stat(2)` / `lstat(2)` | File metadata |
+| `chown(2)` | Preserve ownership |
+| `chmod(2)` | Preserve permissions |
+| `utimensat(2)` | Preserve timestamps |
+| `listxattr(2)` | List extended attributes |
+| `getxattr(2)` / `setxattr(2)` | Copy extended attributes |
+| `unlink(2)` / `rmdir(2)` | Remove source after copy |
+
+## Examples
+
+```sh
+# Rename a file
+mv old.txt new.txt
+
+# Move into directory
+mv file.txt /tmp/
+
+# Interactive mode
+mv -i important.txt /backup/
+
+# No clobber
+mv -n *.txt /archive/
+
+# Verbose
+mv -v file1 file2 file3 /dest/
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | All moves successful |
+| 1 | Error during move |
+| 2 | Usage error |
+
+## Differences from GNU mv
+
+- No `--backup` / `-b` option
+- No `--suffix` / `-S`
+- No `--target-directory` / `-t`
+- No `--update` / `-u`
+- Simpler cross-device fallback without sparse file optimization
diff --git a/docs/handbook/corebinutils/overview.md b/docs/handbook/corebinutils/overview.md
new file mode 100644
index 0000000000..0ac41deee5
--- /dev/null
+++ b/docs/handbook/corebinutils/overview.md
@@ -0,0 +1,362 @@
+# Corebinutils — Overview
+
+## What Is Corebinutils?
+
+Corebinutils is Project Tick's collection of core command-line utilities, ported
+from FreeBSD and adapted for Linux with musl libc. It provides the foundational
+user-space programs that every Unix system needs — file manipulation, process
+control, text processing, and system information tools — built from
+battle-tested FreeBSD sources rather than GNU coreutils.
+
+The project targets a clean, auditable, BSD-licensed alternative to the GNU
+toolchain. Every utility compiles against musl libc by default, producing
+statically-linkable binaries with minimal dependencies.
+
+## Heritage and Licensing
+
+All utilities derive from FreeBSD's `/usr/src/bin/` tree, carrying BSD
+3-Clause or BSD 2-Clause licenses from the original Berkeley and FreeBSD
+contributors. Project Tick's modifications (Copyright 2026) maintain the same
+licensing terms. No GPL-licensed code is present in the tree.
+
+The copyright headers trace a direct lineage:
+
+```
+Copyright (c) 1989, 1993, 1994
+ The Regents of the University of California. All rights reserved.
+Copyright (c) 2026
+ Project Tick. All rights reserved.
+```
+
+Key contributors acknowledged across the codebase include Keith Muller (dd),
+Andrew Moore (ed), Michael Fischbein (ls), Ken Smith (mv), and Lance Visser
+(dd).
+
+## Design Philosophy
+
+### Linux-Native, Not Compatibility Layers
+
+Unlike many BSD-to-Linux ports that ship a compatibility shim library,
+corebinutils rewrites platform-specific code using native Linux APIs:
+
+- **`/proc/self/mountinfo`** replaces BSD `getmntinfo(3)` in `df`
+- **`statx(2)`** replaces BSD `stat(2)` for birth time in `ls`
+- **`sched_getaffinity(2)`** replaces BSD `cpuset_getaffinity(2)` in `nproc`
+- **`sethostname(2)` from `<unistd.h>`** replaces BSD kernel calls in `hostname`
+- **`prctl(PR_SET_CHILD_SUBREAPER)`** replaces BSD `procctl` in `timeout`
+- **`fdopendir(3)` + `readdir(3)`** replaces BSD FTS functions in `rm`
+
+### musl-First Toolchain
+
+The build system preferentially selects musl-based compilers. The configure
+script tries, in order:
+
+1. `musl-clang`
+2. `clang --target=<arch>-linux-musl`
+3. `clang --target=<arch>-unknown-linux-musl`
+4. `musl-gcc`
+5. `clang` (generic)
+6. `cc`
+7. `gcc`
+
+If a glibc toolchain is detected, configure refuses to proceed unless
+`--allow-glibc` is explicitly passed.
+
+### No External Dependencies
+
+Core utilities have zero runtime dependencies beyond libc. Optional features
+(readline in `csh`, crypto in `ed`) probe for system libraries at configure
+time but degrade gracefully when absent.
+
+## Complete Utility List
+
+### File Operations
+
+| Utility | Description | Complexity | Source Files |
+|-----------|--------------------------------------|------------|-------------|
+| `cat` | Concatenate and display files | Simple | 1 `.c` |
+| `cp` | Copy files and directory trees | Medium | 3+ `.c` |
+| `dd` | Block-level data copying/conversion | Complex | 8+ `.c` |
+| `ln` | Create hard and symbolic links | Medium | 1 `.c` |
+| `mv` | Move/rename files and directories | Medium | 1 `.c` |
+| `rm` | Remove files and directories | Medium | 1 `.c` |
+| `rmdir` | Remove empty directories | Simple | 1 `.c` |
+
+### Directory Operations
+
+| Utility | Description | Complexity | Source Files |
+|-------------|------------------------------------|------------|-------------|
+| `ls` | List directory contents | Complex | 5+ `.c` |
+| `mkdir` | Create directories | Medium | 2 `.c` |
+| `pwd` | Print working directory | Simple | 1 `.c` |
+| `realpath` | Canonicalize file paths | Simple | 1 `.c` |
+
+### Permission and Attribute Management
+
+| Utility | Description | Complexity | Source Files |
+|-------------|------------------------------------|------------|-------------|
+| `chmod` | Change file permissions | Medium | 2 `.c` |
+| `chflags` | Change file flags (BSD compat) | Medium | 4 `.c` |
+| `getfacl` | Display file ACLs | Medium | 1 `.c` |
+| `setfacl` | Set file ACLs | Medium | 1 `.c` |
+
+### Process Management
+
+| Utility | Description | Complexity | Source Files |
+|-------------|------------------------------------|------------|-------------|
+| `kill` | Send signals to processes | Medium | 1 `.c` |
+| `ps` | List running processes | Complex | 6+ `.c` |
+| `pkill` | Signal processes by name/attribute | Medium | 1+ `.c` |
+| `pwait` | Wait for process termination | Simple | 1 `.c` |
+| `timeout` | Run command with time limit | Medium | 1 `.c` |
+
+### Text Processing
+
+| Utility | Description | Complexity | Source Files |
+|-----------|--------------------------------------|------------|-------------|
+| `echo` | Write arguments to stdout | Simple | 1 `.c` |
+| `ed` | Line-oriented text editor | Complex | 10+ `.c` |
+| `expr` | Evaluate expressions | Medium | 1 `.c` |
+| `test` | Conditional expression evaluation | Medium | 1 `.c` |
+
+### Date and Time
+
+| Utility | Description | Complexity | Source Files |
+|-----------|--------------------------------------|------------|-------------|
+| `date` | Display/set system date and time | Medium | 2 `.c` |
+| `sleep` | Pause for specified duration | Simple | 1 `.c` |
+
+### System Information
+
+| Utility | Description | Complexity | Source Files |
+|------------------|---------------------------------|------------|-------------|
+| `df` | Report filesystem space usage | Complex | 1 `.c` |
+| `hostname` | Get/set system hostname | Simple | 1 `.c` |
+| `domainname` | Get/set NIS domain name | Simple | 1 `.c` |
+| `nproc` | Count available processors | Simple | 1 `.c` |
+| `freebsd-version`| Show FreeBSD version (compat) | Simple | Shell script|
+| `uuidgen` | Generate UUIDs | Simple | 1 `.c` |
+
+### Shells
+
+| Utility | Description | Complexity | Source Files |
+|---------|--------------------------------------|------------|-------------|
+| `sh` | POSIX-compatible shell | Very High | 60+ `.c` |
+| `csh` | C-shell (tcsh port) | Very High | 30+ `.c` |
+
+### Archive and Mail
+
+| Utility | Description | Complexity | Source Files |
+|---------|--------------------------------------|------------|-------------|
+| `pax` | POSIX archive utility (tar/cpio) | Complex | 30+ `.c` |
+| `rmail` | Remote mail handler | Simple | 1 `.c` |
+
+### Miscellaneous
+
+| Utility | Description | Complexity | Source Files |
+|-------------|------------------------------------|------------|-------------|
+| `sync` | Flush filesystem buffers | Simple | 1 `.c` |
+| `stty` | Set terminal characteristics | Medium | 2+ `.c` |
+| `cpuset` | CPU affinity management | Medium | 1 `.c` |
+
+## Shared Components
+
+The `contrib/` directory provides libraries shared across utilities:
+
+### `contrib/libc-vis/`
+BSD `vis(3)` and `unvis(3)` functions for encoding and decoding special
+characters. Used by `ls` for safe filename display and by `pax` for
+header encoding.
+
+### `contrib/libedit/`
+BSD `editline(3)` library providing command-line editing with history and
+completion support. Used by `csh` and `sh` for interactive input.
+
+### `contrib/printf/`
+Shared `printf` format string processing used by multiple utilities that
+need custom format string expansion beyond standard `printf(3)`.
+
+## Project Structure
+
+```
+corebinutils/
+├── configure # Top-level configure script (POSIX sh)
+├── README.md # Build instructions
+├── .gitattributes # Git configuration
+├── .gitignore # Build artifact exclusions
+├── contrib/ # Shared libraries
+│ ├── libc-vis/ # vis(3)/unvis(3)
+│ ├── libedit/ # editline(3)
+│ └── printf/ # Shared printf helpers
+├── cat/ # Each utility in its own directory
+│ ├── cat.c # Main source
+│ ├── GNUmakefile # Per-utility build rules
+│ ├── cat.1 # Manual page
+│ └── README.md # Port-specific notes
+├── chmod/
+│ ├── chmod.c
+│ ├── mode.c # Shared mode parsing library
+│ ├── mode.h
+│ └── GNUmakefile
+├── ... # (33 utility directories total)
+└── sh/ # Full POSIX shell (60+ source files)
+```
+
+## Utility Complexity Classification
+
+### Tier 1 — Simple (1 source file, <500 lines)
+
+`cat`, `echo`, `hostname`, `domainname`, `nproc`, `pwd`, `realpath`, `rmdir`,
+`sleep`, `sync`, `uuidgen`, `pwait`
+
+These utilities typically have a `main()` function that parses options with
+`getopt(3)`, performs a single system call, and exits. Error handling follows
+the `err(3)`/`warn(3)` pattern.
+
+### Tier 2 — Medium (1-3 source files, 500-2000 lines)
+
+`chmod` (with `mode.c`), `cp` (with `utils.c`, `fts.c`), `date` (with
+`vary.c`), `kill`, `ln`, `mkdir` (with `mode.c`), `mv`, `rm`, `test`,
+`timeout`, `expr`, `df`
+
+These utilities involve more complex option parsing, recursive directory
+traversal, or multi-step algorithms. They share code through header files
+and sometimes reuse `mode.c`/`mode.h`.
+
+### Tier 3 — Complex (5+ source files, 2000+ lines)
+
+`dd` (8 source files), `ed` (10 source files), `ls` (5 source files),
+`ps` (6 source files), `pax` (30+ source files)
+
+These are substantial programs with their own internal architecture:
+- `dd`: argument parser, conversion engine, signal handling, I/O position logic
+- `ed`: command parser, buffer manager, regex engine, undo system
+- `ls`: stat engine, sort/compare, print/format, ANSI color
+- `ps`: /proc parser, format string engine, process filter, output formatter
+
+### Tier 4 — Shells (30-60+ source files)
+
+`sh` and `csh` are full POSIX-compatible shells with lexers, parsers, job
+control, signal handling, built-in commands, and editline integration.
+
+## Key Differences from GNU Coreutils
+
+| Feature | Corebinutils (BSD) | GNU Coreutils |
+|------------------------|-----------------------------|----------------------------|
+| License | BSD-3-Clause / BSD-2-Clause | GPL-3.0 |
+| Default libc | musl | glibc |
+| `echo` behavior | No `-e` flag (BSD compat) | `-e` for escape sequences |
+| `test` parser | Recursive descent | Varies by implementation |
+| `ls` birth time | `statx(2)` syscall | `statx(2)` or fallback |
+| `dd` progress | SIGINFO + `status=progress` | `status=progress` |
+| `sleep` units | `s`, `m`, `h`, `d` suffixes | `s`, `m`, `h`, `d` (GNU ext)|
+| Build system | `./configure` + `GNUmakefile`| Autotools (autoconf/automake)|
+| Error functions | `err(3)`/`warn(3)` from libc| `error()` from gnulib |
+| FTS implementation | In-tree custom `fts.c` | gnulib FTS or `nftw(3)` |
+
+## Signal Handling Conventions
+
+Most utilities follow a consistent signal handling pattern:
+
+- **SIGINFO / SIGUSR1**: Progress reporting. `dd`, `chmod`, `sleep`, and
+ others install a handler that sets a `volatile sig_atomic_t` flag, which
+ the main loop checks to print status information.
+
+- **SIGINT**: Graceful termination. Utilities performing recursive operations
+ check for pending signals between iterations.
+
+- **SIGHUP**: In `ed`, triggers an emergency save of the edit buffer to a
+ temporary file.
+
+Signal handlers are installed via `sigaction(2)` rather than the legacy
+`signal(2)` function, ensuring reliable semantics across platforms.
+
+## Error Handling Patterns
+
+All utilities exit with standardized codes:
+
+| Exit Code | Meaning |
+|-----------|------------------------------------------|
+| 0 | Success |
+| 1 | General failure |
+| 2 | Usage error (invalid arguments) |
+| 124 | Command timed out (`timeout` only) |
+| 125 | `timeout` internal error |
+| 126 | Command found but not executable |
+| 127 | Command not found |
+
+Error messages follow the BSD pattern:
+```c
+error_errno("open %s", path); // "mv: open /foo: Permission denied"
+error_msg("invalid mode: %s", arg); // "chmod: invalid mode: xyz"
+```
+
+Many utilities provide custom `error_errno()` / `error_msg()` wrappers that
+prepend the program name, format the message, and optionally append
+`strerror(errno)`.
+
+## Memory Management
+
+Corebinutils utilities follow BSD memory conventions:
+
+- **Dynamic allocation**: `malloc(3)` with explicit `NULL` checks, typically
+ wrapped in `xmalloc()` that calls `err(1, "malloc")` on failure.
+- **No fixed-size buffers** for user-controlled data (paths, format strings).
+- **Adaptive buffer sizing**: `cat` and `cp` scale I/O buffers based on
+ available physical memory via `sysconf(_SC_PHYS_PAGES)`.
+- **Explicit cleanup**: `free()` is called in long-running loops to avoid
+ accumulation, though single-pass utilities may rely on process exit.
+
+### Buffer Strategy Example (from `cat.c` and `cp/utils.c`):
+
+```c
+#define PHYSPAGES_THRESHOLD (32*1024)
+#define BUFSIZE_MAX (2*1024*1024)
+#define BUFSIZE_SMALL (128*1024)
+
+if (sysconf(_SC_PHYS_PAGES) > PHYSPAGES_THRESHOLD)
+ bufsize = MIN(BUFSIZE_MAX, MAXPHYS * 8);
+else
+ bufsize = BUFSIZE_SMALL;
+```
+
+## Testing
+
+Each utility directory may contain its own test suite, invoked through:
+
+```sh
+make -f GNUmakefile test
+```
+
+Or for a specific utility:
+
+```sh
+make -f GNUmakefile check-cat
+make -f GNUmakefile check-ls
+```
+
+Tests that require root privileges or specific kernel features print `SKIP`
+and continue without failing the overall test run.
+
+## Building Quick Reference
+
+```sh
+cd corebinutils/
+./configure # Detect toolchain, generate build files
+make -f GNUmakefile -j$(nproc) all # Build all utilities
+make -f GNUmakefile test # Run test suites
+make -f GNUmakefile stage # Copy binaries to out/bin/
+make -f GNUmakefile install # Install to $PREFIX/bin
+```
+
+See [building.md](building.md) for detailed configure options and build
+customization.
+
+## Further Reading
+
+- [architecture.md](architecture.md) — Build system internals, code organization
+- [building.md](building.md) — Configure options, dependencies, cross-compilation
+- Individual utility documentation: [cat.md](cat.md), [ls.md](ls.md),
+ [dd.md](dd.md), [ps.md](ps.md), etc.
+- [code-style.md](code-style.md) — C coding conventions
+- [error-handling.md](error-handling.md) — Error patterns and exit codes
diff --git a/docs/handbook/corebinutils/ps.md b/docs/handbook/corebinutils/ps.md
new file mode 100644
index 0000000000..cbbd749a44
--- /dev/null
+++ b/docs/handbook/corebinutils/ps.md
@@ -0,0 +1,298 @@
+# ps — Process Status
+
+## Overview
+
+`ps` displays information about active processes. This implementation
+reads process data from the Linux `/proc` filesystem and presents it
+through BSD-style format strings. It provides a custom `struct kinfo_proc`
+that mirrors FreeBSD's interface while reading from Linux procfs.
+
+**Source**: `ps/ps.c`, `ps/ps.h`, `ps/fmt.c`, `ps/keyword.c`,
+`ps/print.c`, `ps/nlist.c`, `ps/extern.h`
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+ps [-AaCcdefHhjLlMmrSTuvwXxZ] [-D fmt] [-G gid[,gid...]]
+ [-J jail] [-N system] [-O fmt] [-o fmt] [-p pid[,pid...]]
+ [-t tty[,tty...]] [-U user[,user...]] [-g group[,group...]]
+```
+
+## Source Architecture
+
+### File Responsibilities
+
+| File | Purpose |
+|------|---------|
+| `ps.c` | Main program, option parsing, process collection |
+| `ps.h` | Data structures, constants, STAILQ macros |
+| `fmt.c` | Format string parsing and column management |
+| `keyword.c` | Format keyword definitions and lookup table |
+| `print.c` | Column value formatters (PID, user, CPU, etc.) |
+| `nlist.c` | Name list support (noop on Linux) |
+| `extern.h` | Cross-file function declarations |
+
+### Key Data Structures
+
+#### Process Information (Linux replacement for BSD kinfo_proc)
+
+```c
+struct kinfo_proc {
+ pid_t ki_pid; /* Process ID */
+ pid_t ki_ppid; /* Parent PID */
+ pid_t ki_pgid; /* Process group ID */
+ pid_t ki_sid; /* Session ID */
+ uid_t ki_uid; /* Real UID */
+ uid_t ki_ruid; /* Real UID (copy) */
+ uid_t ki_svuid; /* Saved UID */
+ gid_t ki_rgid; /* Real GID */
+ gid_t ki_svgid; /* Saved GID */
+ gid_t ki_groups[KI_NGROUPS]; /* Supplementary groups */
+ int ki_ngroups; /* Number of groups */
+ dev_t ki_tdev; /* TTY device */
+ int ki_flag; /* Process flags */
+ int ki_stat; /* Process state */
+ char ki_comm[COMMLEN + 1]; /* Command name */
+ char ki_wmesg[WMESGLEN + 1]; /* Wait channel */
+ int ki_nice; /* Nice value */
+ int ki_pri; /* Priority */
+ long ki_size; /* Virtual size */
+ long ki_rssize; /* Resident size */
+ struct timeval ki_start; /* Start time */
+ struct timeval ki_rusage; /* Resource usage */
+ /* ... additional fields ... */
+};
+```
+
+#### KINFO Wrapper
+
+```c
+typedef struct {
+ struct kinfo_proc *ki_p;
+ char *ki_args; /* Full command line */
+ char *ki_env; /* Environment (if -E) */
+ double ki_pcpu; /* Computed %CPU */
+ long ki_memsize; /* Computed memory size */
+} KINFO;
+```
+
+#### Format Variable
+
+```c
+typedef struct {
+ const char *name; /* Keyword name (e.g., "pid", "user") */
+ const char *header; /* Column header (e.g., "PID", "USER") */
+ int width; /* Column width */
+ int (*sprnt)(KINFO *); /* Print function */
+ int flag; /* Format flags */
+} VAR;
+```
+
+### Constants
+
+```c
+#define COMMLEN 256 /* Max command name length */
+#define WMESGLEN 64 /* Max wait message length */
+#define KI_NGROUPS 16 /* Max supplementary groups tracked */
+```
+
+### musl Compatibility
+
+FreeBSD uses `STAILQ_*` macros extensively, but musl's `<sys/queue.h>`
+may not provide them. `ps.h` defines custom implementations:
+
+```c
+#ifndef STAILQ_HEAD
+#define STAILQ_HEAD(name, type) \
+struct name { \
+ struct type *stqh_first; \
+ struct type **stqh_last; \
+}
+#define STAILQ_ENTRY(type) \
+struct { \
+ struct type *stqe_next; \
+}
+#define STAILQ_INIT(head) do { ... } while (0)
+#define STAILQ_INSERT_TAIL(head, elm, field) do { ... } while (0)
+#define STAILQ_FOREACH(var, head, field) ...
+#endif
+```
+
+### Predefined Format Strings
+
+```c
+/* Default format (-f not specified) */
+const char *dfmt = "pid,tt,stat,time,command";
+
+/* Jobs format (-j) */
+const char *jfmt = "user,pid,ppid,pgid,sid,jobc,stat,tt,time,command";
+
+/* Long format (-l) */
+const char *lfmt = "uid,pid,ppid,cpu,pri,nice,vsz,rss,wchan,stat,tt,time,command";
+
+/* User format (-u) */
+const char *ufmt = "user,pid,%cpu,%mem,vsz,rss,tt,stat,start,time,command";
+
+/* Virtual memory format (-v) */
+const char *vfmt = "pid,stat,time,sl,re,pagein,vsz,rss,lim,tsiz,%cpu,%mem,command";
+```
+
+### /proc Parsing
+
+Process data is read from multiple `/proc/[pid]/` files:
+
+| File | Data Extracted |
+|------|----------------|
+| `/proc/[pid]/stat` | PID, PPID, PGID, state, priority, nice, threads, start time |
+| `/proc/[pid]/status` | UID, GID, groups, memory (VmSize, VmRSS) |
+| `/proc/[pid]/cmdline` | Full command line arguments |
+| `/proc/[pid]/environ` | Environment variables (if requested) |
+| `/proc/[pid]/wchan` | Wait channel name |
+| `/proc/[pid]/fd/0` | Controlling TTY detection |
+
+### Process Filtering
+
+```c
+/* Option string */
+#define PS_ARGS "AaCcD:defG:gHhjJ:LlM:mN:O:o:p:rSTt:U:uvwXxZ"
+
+struct listinfo {
+ int count;
+ int maxcount;
+ int *list; /* Array of values to match */
+ int (*addelem)(struct listinfo *, const char *);
+};
+```
+
+Filtering by PID, UID, GID, TTY, session, and process group uses
+`struct listinfo` with dynamic arrays and element-specific parsers.
+
+### Column Formatting (keyword.c)
+
+The keyword table maps format names to print functions:
+
+```c
+static VAR var[] = {
+ {"pid", "PID", 5, s_pid, 0},
+ {"ppid", "PPID", 5, s_ppid, 0},
+ {"user", "USER", 8, s_user, 0},
+ {"uid", "UID", 5, s_uid, 0},
+ {"gid", "GID", 5, s_gid, 0},
+ {"%cpu", "%CPU", 4, s_pcpu, 0},
+ {"%mem", "%MEM", 4, s_pmem, 0},
+ {"vsz", "VSZ", 6, s_vsz, 0},
+ {"rss", "RSS", 5, s_rss, 0},
+ {"tt", "TT", 3, s_tty, 0},
+ {"stat", "STAT", 4, s_stat, 0},
+ {"time", "TIME", 8, s_time, 0},
+ {"command", "COMMAND", 16, s_command, COMM},
+ {"args", "COMMAND", 16, s_args, COMM},
+ {"comm", "COMMAND", 16, s_comm, COMM},
+ {"nice", "NI", 3, s_nice, 0},
+ {"pri", "PRI", 3, s_pri, 0},
+ {"wchan", "WCHAN", 8, s_wchan, 0},
+ {"start", "STARTED", 8, s_start, 0},
+ /* ... more keywords ... */
+ {NULL, NULL, 0, NULL, 0}, /* Sentinel */
+};
+```
+
+### Global State
+
+```c
+int cflag; /* Raw CPU usage */
+int eval; /* Exit value */
+time_t now; /* Current time */
+int rawcpu; /* Don't compute decay */
+int sumrusage; /* Sum child usage */
+int termwidth; /* Terminal width */
+int showthreads; /* Show threads (-H) */
+int hlines; /* Header repeat interval */
+```
+
+## Options Reference
+
+| Flag | Description |
+|------|-------------|
+| `-A` / `-e` | All processes |
+| `-a` | Processes with terminals (except session leaders) |
+| `-C` | Raw CPU percentage |
+| `-c` | Show command name only (not full path) |
+| `-d` | All except session leaders |
+| `-f` | Full format |
+| `-G gid` | Filter by real group ID |
+| `-g group` | Filter by group name |
+| `-H` | Show threads |
+| `-h` | Repeat header every screenful |
+| `-j` | Jobs format |
+| `-L` | Show all threads (LWP) |
+| `-l` | Long format |
+| `-M` | Display MAC label |
+| `-m` | Sort by memory usage |
+| `-O fmt` | Add columns to default format |
+| `-o fmt` | Custom output format |
+| `-p pid` | Filter by PID |
+| `-r` | Running processes only |
+| `-S` | Include child time |
+| `-T` | Show threads for current terminal |
+| `-t tty` | Filter by TTY |
+| `-U user` | Filter by effective user |
+| `-u` | User format |
+| `-v` | Virtual memory format |
+| `-w` | Wide output |
+| `-X` | Skip processes without controlling TTY |
+| `-x` | Include processes without controlling TTY |
+| `-Z` | Show security context |
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `opendir(3)` / `readdir(3)` | Enumerate `/proc/` PIDs |
+| `open(2)` / `read(2)` | Read `/proc/[pid]/*` files |
+| `stat(2)` | Get file owner for UID detection |
+| `getpwuid(3)` / `getgrgid(3)` | UID/GID to name resolution |
+| `ioctl(TIOCGWINSZ)` | Terminal width |
+| `sysconf(3)` | Clock ticks, page size |
+
+## Examples
+
+```sh
+# Default process list
+ps
+
+# All processes, user format
+ps aux
+
+# Full format
+ps -ef
+
+# Custom columns
+ps -o pid,user,%cpu,%mem,command
+
+# Filter by user
+ps -U root
+
+# Jobs format
+ps -j
+
+# Long format with threads
+ps -lH
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Error |
+
+## Linux-Specific Notes
+
+- Reads from `/proc` filesystem instead of BSD `kvm_getprocs(3)`
+- Custom `struct kinfo_proc` replaces BSD's `<sys/user.h>` variant
+- STAILQ macros defined inline for musl compatibility
+- No jail (`-J`) support on Linux
+- No Capsicum sandboxing
diff --git a/docs/handbook/corebinutils/pwd.md b/docs/handbook/corebinutils/pwd.md
new file mode 100644
index 0000000000..8f584e3357
--- /dev/null
+++ b/docs/handbook/corebinutils/pwd.md
@@ -0,0 +1,152 @@
+# pwd — Print Working Directory
+
+## Overview
+
+`pwd` prints the absolute pathname of the current working directory.
+It supports logical mode (using `$PWD`) and physical mode (resolving
+symlinks). Logical mode is the default, with fallback to physical
+if validation fails.
+
+**Source**: `pwd/pwd.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+pwd [-L | -P]
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-L` | Logical: use `$PWD` (default) |
+| `-P` | Physical: resolve all symlinks |
+
+When both are specified, the last one wins.
+
+## Source Analysis
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options and dispatch |
+| `getcwd_logical()` | Validate and use `$PWD` |
+| `getcwd_physical()` | Resolve via `getcwd(3)` |
+| `usage()` | Print usage message |
+
+### Logical Mode (Default)
+
+```c
+static char *
+getcwd_logical(void)
+{
+ const char *pwd = getenv("PWD");
+
+ /* Must be set and absolute */
+ if (!pwd || pwd[0] != '/')
+ return NULL;
+
+ /* Must not contain "." or ".." components */
+ if (contains_dot_components(pwd))
+ return NULL;
+
+ /* Must refer to the same directory as "." */
+ struct stat pwd_sb, dot_sb;
+ if (stat(pwd, &pwd_sb) < 0 || stat(".", &dot_sb) < 0)
+ return NULL;
+ if (pwd_sb.st_dev != dot_sb.st_dev ||
+ pwd_sb.st_ino != dot_sb.st_ino)
+ return NULL;
+
+ return strdup(pwd);
+}
+```
+
+The `$PWD` validation ensures:
+1. The value is an absolute path
+2. It contains no `.` or `..` components
+3. The path resolves to the same inode as `.`
+
+### Physical Mode
+
+```c
+static char *
+getcwd_physical(void)
+{
+ /* POSIX: getcwd(NULL, 0) dynamically allocates */
+ return getcwd(NULL, 0);
+}
+```
+
+Uses the POSIX extension `getcwd(NULL, 0)` which allocates the
+returned buffer dynamically, avoiding fixed-size buffer limitations.
+
+### Main Logic
+
+```c
+int main(int argc, char *argv[])
+{
+ int mode = MODE_LOGICAL; /* Default: logical */
+
+ while ((ch = getopt(argc, argv, "LP")) != -1) {
+ switch (ch) {
+ case 'L': mode = MODE_LOGICAL; break;
+ case 'P': mode = MODE_PHYSICAL; break;
+ default: usage();
+ }
+ }
+
+ char *cwd;
+ if (mode == MODE_LOGICAL) {
+ cwd = getcwd_logical();
+ if (!cwd)
+ cwd = getcwd_physical(); /* Fallback */
+ } else {
+ cwd = getcwd_physical();
+ }
+
+ if (!cwd)
+ err(1, "getcwd");
+
+ puts(cwd);
+ free(cwd);
+ return 0;
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `getcwd(3)` | Physical working directory |
+| `stat(2)` | Validate `$PWD` against `.` |
+| `getenv(3)` | Read `$PWD` environment variable |
+
+## Examples
+
+```sh
+# Default (logical)
+pwd
+# /home/user/projects/mylink (preserves symlink name)
+
+# Physical
+pwd -P
+# /home/user/actual/path (resolved symlinks)
+
+# Demonstrate difference
+cd /tmp
+ln -s /usr/local/share mylink
+cd mylink
+pwd -L # → /tmp/mylink
+pwd -P # → /usr/local/share
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Error (cannot determine directory) |
diff --git a/docs/handbook/corebinutils/realpath.md b/docs/handbook/corebinutils/realpath.md
new file mode 100644
index 0000000000..bf7c1d421f
--- /dev/null
+++ b/docs/handbook/corebinutils/realpath.md
@@ -0,0 +1,119 @@
+# realpath — Resolve to Canonical Path
+
+## Overview
+
+`realpath` resolves each given pathname to its canonical absolute form
+by expanding all symbolic links, resolving `.` and `..` references,
+and removing extra `/` characters.
+
+**Source**: `realpath/realpath.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+realpath [-q] [path ...]
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-q` | Quiet: suppress error messages for non-existent paths |
+
+## Source Analysis
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options and resolve loop |
+| `resolve_path()` | Wrapper around `realpath(3)` |
+| `set_progname()` | Extract program name from `argv[0]` |
+| `print_line()` | Safe stdout writing |
+| `usage()` | Print usage message |
+| `warnx_msg()` | Warning without errno |
+| `warn_path_errno()` | Warning with errno for path |
+
+### Core Logic
+
+```c
+static int
+resolve_path(const char *path, bool quiet)
+{
+ char *resolved = realpath(path, NULL);
+ if (!resolved) {
+ if (!quiet)
+ warn("%s", path);
+ return 1;
+ }
+
+ puts(resolved);
+ free(resolved);
+ return 0;
+}
+```
+
+### Main Loop
+
+```c
+int main(int argc, char *argv[])
+{
+ bool quiet = false;
+ int ch, errors = 0;
+
+ while ((ch = getopt(argc, argv, "q")) != -1) {
+ switch (ch) {
+ case 'q': quiet = true; break;
+ default: usage();
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (argc == 0)
+ usage();
+
+ for (int i = 0; i < argc; i++)
+ errors |= resolve_path(argv[i], quiet);
+
+ return errors ? 1 : 0;
+}
+```
+
+Uses `realpath(path, NULL)` (POSIX.1-2008) for dynamic buffer
+allocation, avoiding `PATH_MAX` limitations.
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `realpath(3)` | Canonicalize pathname |
+
+## Examples
+
+```sh
+# Simple resolution
+realpath ../foo/bar
+# → /home/user/foo/bar
+
+# Resolve symlink
+ln -s /usr/local/bin target
+realpath target
+# → /usr/local/bin
+
+# Quiet mode (no error for missing)
+realpath -q /nonexistent/path
+# (no output, exit 1)
+
+# Multiple paths
+realpath /tmp/../etc ./relative/path
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | All paths resolved successfully |
+| 1 | One or more paths could not be resolved |
diff --git a/docs/handbook/corebinutils/rm.md b/docs/handbook/corebinutils/rm.md
new file mode 100644
index 0000000000..36c92e34ec
--- /dev/null
+++ b/docs/handbook/corebinutils/rm.md
@@ -0,0 +1,293 @@
+# rm — Remove Files and Directories
+
+## Overview
+
+`rm` removes files and directories. It supports recursive removal,
+interactive prompting, forced deletion, and protects against removal
+of `/`, `.`, and `..`. Directory traversal uses `openat(2)` and
+`fdopendir(3)` for safe recursive descent.
+
+**Source**: `rm/rm.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+rm [-dfiIPRrvWx] file ...
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-d` | Remove empty directories (like `rmdir`) |
+| `-f` | Force: no prompts, ignore nonexistent files |
+| `-i` | Interactive: prompt for each file |
+| `-I` | Prompt once before recursive removal or >3 files |
+| `-P` | Overwrite before delete (BSD; not on Linux) |
+| `-R` / `-r` | Recursive: remove directories and contents |
+| `-v` | Verbose: print each file as removed |
+| `-W` | Whiteout (BSD union fs; not on Linux) |
+| `-x` | Stay on one filesystem |
+
+## Source Analysis
+
+### Data Structures
+
+```c
+struct options_t {
+ bool force; /* -f */
+ bool interactive; /* -i */
+ bool prompt_once; /* -I */
+ bool recursive; /* -R/-r */
+ bool remove_empty; /* -d */
+ bool verbose; /* -v */
+ bool one_fs; /* -x */
+ bool stdin_tty; /* Whether stdin is a TTY */
+};
+```
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options and dispatch |
+| `remove_path()` | Remove a single top-level argument |
+| `remove_simple_path()` | Remove non-directory file |
+| `remove_path_at()` | Recursive removal at directory fd |
+| `prompt_for_removal()` | Interactive prompt for single file |
+| `prompt_for_directory_descent()` | Prompt before entering directory |
+| `prompt_once()` | One-time batch prompt (`-I`) |
+| `prompt_yesno()` | Read yes/no from terminal |
+| `join_path()` | Path concatenation |
+| `path_is_writable()` | Check write access |
+
+### Safety Checks
+
+```c
+static int
+remove_path(const char *path, const struct options_t *opts)
+{
+ /* Reject "/" */
+ if (strcmp(path, "/") == 0) {
+ warnx("\"/\" may not be removed");
+ return 1;
+ }
+
+ /* Reject "." and ".." */
+ const char *base = basename(path);
+ if (strcmp(base, ".") == 0 || strcmp(base, "..") == 0) {
+ warnx("\".\" and \"..\" may not be removed");
+ return 1;
+ }
+
+ struct stat sb;
+ if (lstat(path, &sb) < 0) {
+ if (opts->force)
+ return 0; /* Silently ignore */
+ warn("%s", path);
+ return 1;
+ }
+
+ if (S_ISDIR(sb.st_mode) && opts->recursive)
+ return remove_path_at(AT_FDCWD, path, &sb, opts);
+ else
+ return remove_simple_path(path, &sb, opts);
+}
+```
+
+### Recursive Removal
+
+```c
+static int
+remove_path_at(int dirfd, const char *path,
+ const struct stat *sb,
+ const struct options_t *opts)
+{
+ /* Prompt before descending */
+ if (opts->interactive &&
+ !prompt_for_directory_descent(path))
+ return 0;
+
+ /* Open directory safely */
+ int fd = openat(dirfd, path,
+ O_RDONLY | O_DIRECTORY | O_NOFOLLOW);
+ if (fd < 0) {
+ warn("cannot open '%s'", path);
+ return 1;
+ }
+
+ /* One-filesystem check */
+ if (opts->one_fs) {
+ struct stat dir_sb;
+ fstat(fd, &dir_sb);
+ if (dir_sb.st_dev != sb->st_dev) {
+ warnx("skipping '%s' (different filesystem)", path);
+ close(fd);
+ return 1;
+ }
+ }
+
+ DIR *dp = fdopendir(fd);
+ struct dirent *ent;
+ int errors = 0;
+
+ while ((ent = readdir(dp)) != NULL) {
+ /* Skip . and .. */
+ if (ent->d_name[0] == '.' &&
+ (ent->d_name[1] == '\0' ||
+ (ent->d_name[1] == '.' && ent->d_name[2] == '\0')))
+ continue;
+
+ struct stat child_sb;
+ if (fstatat(fd, ent->d_name, &child_sb,
+ AT_SYMLINK_NOFOLLOW) < 0) {
+ warn("%s/%s", path, ent->d_name);
+ errors = 1;
+ continue;
+ }
+
+ if (S_ISDIR(child_sb.st_mode)) {
+ /* Cycle detection: compare device/inode */
+ errors |= remove_path_at(fd,
+ ent->d_name, &child_sb, opts);
+ } else {
+ /* Prompt and remove */
+ if (!opts->force &&
+ !prompt_for_removal(path, ent->d_name,
+ &child_sb, opts))
+ continue;
+ if (unlinkat(fd, ent->d_name, 0) < 0) {
+ warn("cannot remove '%s/%s'", path, ent->d_name);
+ errors = 1;
+ } else if (opts->verbose) {
+ printf("removed '%s/%s'\n", path, ent->d_name);
+ }
+ }
+ }
+ closedir(dp);
+
+ /* Remove the directory itself */
+ if (unlinkat(dirfd, path, AT_REMOVEDIR) < 0) {
+ warn("cannot remove '%s'", path);
+ errors = 1;
+ } else if (opts->verbose) {
+ printf("removed directory '%s'\n", path);
+ }
+
+ return errors;
+}
+```
+
+### Interactive Prompting
+
+```c
+static bool
+prompt_for_removal(const char *dir, const char *name,
+ const struct stat *sb,
+ const struct options_t *opts)
+{
+ if (opts->force)
+ return true;
+
+ /* Always prompt in -i mode */
+ if (opts->interactive) {
+ fprintf(stderr, "remove %s '%s/%s'? ",
+ filetype_name(sb->st_mode), dir, name);
+ return prompt_yesno();
+ }
+
+ /* Prompt for non-writable files (unless -f) */
+ if (!path_is_writable(sb) && opts->stdin_tty) {
+ fprintf(stderr, "remove write-protected %s '%s/%s'? ",
+ filetype_name(sb->st_mode), dir, name);
+ return prompt_yesno();
+ }
+
+ return true;
+}
+
+static bool
+prompt_yesno(void)
+{
+ char buf[128];
+ if (fgets(buf, sizeof(buf), stdin) == NULL)
+ return false;
+ return (buf[0] == 'y' || buf[0] == 'Y');
+}
+```
+
+### Batch Prompt (-I)
+
+```c
+static bool
+prompt_once(int count, const char *first_path,
+ const struct options_t *opts)
+{
+ if (!opts->prompt_once)
+ return true;
+
+ if (count > 3 || opts->recursive) {
+ fprintf(stderr,
+ "remove %d arguments%s? ",
+ count,
+ opts->recursive ? " recursively" : "");
+ return prompt_yesno();
+ }
+ return true;
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `unlink(2)` | Remove file |
+| `unlinkat(2)` | Remove file/directory relative to dirfd |
+| `openat(2)` | Open directory for traversal |
+| `fdopendir(3)` | DIR stream from file descriptor |
+| `fstatat(2)` | Stat relative to dirfd |
+| `lstat(2)` | Stat without following symlinks |
+| `readdir(3)` | Read directory entries |
+| `rmdir(2)` | Remove empty directory |
+
+## Examples
+
+```sh
+# Remove a file
+rm file.txt
+
+# Force remove (no prompts)
+rm -f *.o
+
+# Recursive remove
+rm -rf build/
+
+# Interactive
+rm -ri important_dir/
+
+# Verbose
+rm -rv old_directory/
+
+# Prompt once
+rm -I *.log
+
+# Stay on one filesystem
+rm -rx /mounted/dir/
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | All files removed successfully |
+| 1 | Error removing one or more files |
+
+## Differences from GNU rm
+
+- No `--preserve-root` / `--no-preserve-root` (always refuses `/`)
+- No `--one-file-system` long option (uses `-x` instead)
+- No `--interactive=WHEN` (only `-i` and `-I`)
+- `-P` (overwrite) is BSD-only and not functional on Linux
+- `-W` (whiteout) is BSD-only
diff --git a/docs/handbook/corebinutils/sleep.md b/docs/handbook/corebinutils/sleep.md
new file mode 100644
index 0000000000..b562daff3a
--- /dev/null
+++ b/docs/handbook/corebinutils/sleep.md
@@ -0,0 +1,218 @@
+# sleep — Suspend Execution for an Interval
+
+## Overview
+
+`sleep` pauses for the specified duration. It supports fractional seconds,
+multiple arguments (accumulated), unit suffixes (`s`, `m`, `h`, `d`),
+and `SIGINFO`/`SIGUSR1` for progress reporting.
+
+**Source**: `sleep/sleep.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+sleep number[suffix] ...
+```
+
+## Options
+
+No flags. Arguments are durations with optional unit suffixes.
+
+## Unit Suffixes
+
+| Suffix | Meaning | Multiplier |
+|--------|---------|------------|
+| `s` (default) | Seconds | 1 |
+| `m` | Minutes | 60 |
+| `h` | Hours | 3600 |
+| `d` | Days | 86400 |
+
+## Source Analysis
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse arguments and sleep loop |
+| `parse_interval()` | Parse numeric value with unit suffix |
+| `scale_interval()` | Apply unit multiplier with overflow check |
+| `seconds_to_timespec()` | Convert float seconds to `struct timespec` |
+| `seconds_from_timespec()` | Extract seconds from `struct timespec` |
+| `install_info_handler()` | Set up `SIGINFO`/`SIGUSR1` handler |
+| `report_remaining()` | Print remaining time on signal |
+| `die()` / `die_errno()` | Error handling |
+| `usage()` | Print usage and exit |
+
+### Argument Accumulation
+
+Multiple arguments are summed:
+
+```c
+int main(int argc, char *argv[])
+{
+ double total = 0.0;
+
+ for (int i = 1; i < argc; i++) {
+ double interval = parse_interval(argv[i]);
+ total += interval;
+ }
+
+ if (total > (double)TIME_T_MAX)
+ die("total sleep duration too large");
+
+ struct timespec ts = seconds_to_timespec(total);
+ install_info_handler();
+
+ /* Sleep loop with EINTR restart */
+ while (nanosleep(&ts, &ts) < 0) {
+ if (errno != EINTR)
+ die_errno("nanosleep");
+
+ /* SIGINFO handler may have reported progress */
+ }
+
+ return 0;
+}
+```
+
+### Interval Parsing
+
+```c
+static double
+parse_interval(const char *arg)
+{
+ char *end;
+ double val = strtod(arg, &end);
+
+ if (end == arg || val < 0)
+ die("invalid time interval: %s", arg);
+
+ /* Apply unit suffix */
+ if (*end != '\0') {
+ val = scale_interval(val, *end);
+ end++;
+ }
+
+ if (*end != '\0')
+ die("invalid time interval: %s", arg);
+
+ return val;
+}
+
+static double
+scale_interval(double val, char unit)
+{
+ switch (unit) {
+ case 's': return val;
+ case 'm': return val * 60.0;
+ case 'h': return val * 3600.0;
+ case 'd': return val * 86400.0;
+ default:
+ die("invalid unit: %c", unit);
+ }
+}
+```
+
+### Progress Reporting
+
+```c
+static volatile sig_atomic_t info_requested;
+
+static void
+signal_handler(int sig)
+{
+ (void)sig;
+ info_requested = 1;
+}
+
+static void
+install_info_handler(void)
+{
+ struct sigaction sa = {
+ .sa_handler = signal_handler,
+ .sa_flags = 0,
+ };
+ sigemptyset(&sa.sa_mask);
+
+#ifdef SIGINFO
+ sigaction(SIGINFO, &sa, NULL);
+#endif
+ sigaction(SIGUSR1, &sa, NULL);
+}
+
+static void
+report_remaining(const struct timespec *remaining)
+{
+ double secs = seconds_from_timespec(remaining);
+ fprintf(stderr, "sleep: about %.1f second(s) remaining\n", secs);
+ info_requested = 0;
+}
+```
+
+When `nanosleep` returns with `EINTR` and the remaining time is in `ts`,
+the handler flag is checked and progress is reported before restarting.
+
+### Overflow Protection
+
+```c
+static struct timespec
+seconds_to_timespec(double sec)
+{
+ struct timespec ts;
+
+ if (sec >= (double)TIME_T_MAX) {
+ ts.tv_sec = TIME_T_MAX;
+ ts.tv_nsec = 0;
+ } else {
+ ts.tv_sec = (time_t)sec;
+ ts.tv_nsec = (long)((sec - ts.tv_sec) * 1e9);
+ }
+
+ return ts;
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `nanosleep(2)` | Sleep with nanosecond precision |
+| `sigaction(2)` | Install signal handlers |
+
+## Examples
+
+```sh
+# Sleep 5 seconds
+sleep 5
+
+# Fractional seconds
+sleep 0.5
+
+# With units
+sleep 2m # 2 minutes
+sleep 1.5h # 90 minutes
+sleep 1d # 24 hours
+
+# Multiple arguments (accumulated)
+sleep 1m 30s # 90 seconds total
+
+# Check remaining time (send SIGUSR1 from another terminal)
+kill -USR1 $(pgrep sleep)
+# → "sleep: about 42.3 second(s) remaining"
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success (slept for full duration) |
+| 1 | Error (invalid argument) |
+
+## Differences from GNU sleep
+
+- POSIX-compliant with BSD extensions
+- Supports `SIGINFO` (on systems that have it, otherwise `SIGUSR1`)
+- Same unit suffix support (`s`, `m`, `h`, `d`)
+- Multiple arguments are accumulated (same as GNU)
diff --git a/docs/handbook/corebinutils/test.md b/docs/handbook/corebinutils/test.md
new file mode 100644
index 0000000000..11b429ab2a
--- /dev/null
+++ b/docs/handbook/corebinutils/test.md
@@ -0,0 +1,248 @@
+# test — Evaluate Conditional Expressions
+
+## Overview
+
+`test` (also invoked as `[`) evaluates file attributes, string comparisons,
+and integer arithmetic, returning an exit status of 0 (true) or 1 (false).
+It uses a recursive descent parser with short-circuit evaluation and
+supports both POSIX and BSD extensions.
+
+**Source**: `test/test.c` (single file)
+**Origin**: BSD 4.4, University of California, Berkeley
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+test expression
+[ expression ]
+```
+
+When invoked as `[`, the last argument must be `]`.
+
+## Source Analysis
+
+### Parser Architecture
+
+```c
+struct parser {
+ int argc;
+ char **argv;
+ int pos; /* Current argument index */
+};
+
+enum token {
+ TOK_OPERAND, /* String/number operand */
+ TOK_UNARY, /* Unary operator (-f, -d, etc.) */
+ TOK_BINARY, /* Binary operator (-eq, =, etc.) */
+ TOK_NOT, /* ! */
+ TOK_AND, /* -a */
+ TOK_OR, /* -o */
+ TOK_LPAREN, /* ( */
+ TOK_RPAREN, /* ) */
+ TOK_END, /* End of arguments */
+};
+```
+
+### Operator Table
+
+```c
+struct operator {
+ const char *name;
+ enum token type;
+ int (*eval)(/* ... */);
+};
+```
+
+### Recursive Descent Grammar
+
+```
+parse_expr()
+ └── parse_oexpr() /* -o (OR, lowest precedence) */
+ └── parse_aexpr() /* -a (AND) */
+ └── parse_nexpr() /* ! (NOT) */
+ └── parse_primary() /* atoms, ( expr ) */
+```
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Entry: handle `[`/`test` invocation, drive parser |
+| `current_arg()` | Return current argument |
+| `peek_arg()` | Look at next argument |
+| `advance_arg()` | Consume current argument |
+| `lex_token()` | Classify current argument as token type |
+| `find_operator()` | Look up operator in table |
+| `parse_primary()` | Parse `( expr )`, unary ops, binary ops |
+| `parse_nexpr()` | Parse `! expression` |
+| `parse_aexpr()` | Parse `expr -a expr` |
+| `parse_oexpr()` | Parse `expr -o expr` |
+| `parse_binop()` | Evaluate binary operators |
+| `evaluate_file_test()` | Evaluate file test primaries |
+| `compare_integers()` | Integer comparison |
+| `compare_mtime()` | File modification time comparison |
+| `newer_file()` | `-nt` test |
+| `older_file()` | `-ot` test |
+| `same_file()` | `-ef` test |
+| `parse_int()` | Parse integer with error checking |
+| `effective_access()` | `eaccess(2)` or `faccessat(AT_EACCESS)` |
+
+### File Test Primaries
+
+| Operator | Test | System Call |
+|----------|------|------------|
+| `-b file` | Block special | `stat(2)` + `S_ISBLK` |
+| `-c file` | Character special | `stat(2)` + `S_ISCHR` |
+| `-d file` | Directory | `stat(2)` + `S_ISDIR` |
+| `-e file` | Exists | `stat(2)` |
+| `-f file` | Regular file | `stat(2)` + `S_ISREG` |
+| `-g file` | Set-GID bit | `stat(2)` + `S_ISGID` |
+| `-h file` | Symbolic link | `lstat(2)` + `S_ISLNK` |
+| `-k file` | Sticky bit | `stat(2)` + `S_ISVTX` |
+| `-L file` | Symbolic link | `lstat(2)` + `S_ISLNK` |
+| `-p file` | Named pipe (FIFO) | `stat(2)` + `S_ISFIFO` |
+| `-r file` | Readable | `eaccess(2)` or `faccessat(2)` |
+| `-s file` | Non-zero size | `stat(2)` + `st_size > 0` |
+| `-S file` | Socket | `stat(2)` + `S_ISSOCK` |
+| `-t fd` | Is a terminal | `isatty(3)` |
+| `-u file` | Set-UID bit | `stat(2)` + `S_ISUID` |
+| `-w file` | Writable | `eaccess(2)` or `faccessat(2)` |
+| `-x file` | Executable | `eaccess(2)` or `faccessat(2)` |
+| `-O file` | Owned by EUID | `stat(2)` + `st_uid == geteuid()` |
+| `-G file` | Group matches EGID | `stat(2)` + `st_gid == getegid()` |
+
+### String Operators
+
+| Operator | Description |
+|----------|-------------|
+| `-z string` | String is zero length |
+| `-n string` | String is non-zero length |
+| `s1 = s2` | Strings are identical |
+| `s1 == s2` | Strings are identical (alias) |
+| `s1 != s2` | Strings differ |
+| `s1 < s2` | String less than (lexicographic) |
+| `s1 > s2` | String greater than (lexicographic) |
+
+### Integer Operators
+
+| Operator | Description |
+|----------|-------------|
+| `n1 -eq n2` | Equal |
+| `n1 -ne n2` | Not equal |
+| `n1 -lt n2` | Less than |
+| `n1 -le n2` | Less or equal |
+| `n1 -gt n2` | Greater than |
+| `n1 -ge n2` | Greater or equal |
+
+### File Comparison Operators
+
+| Operator | Description |
+|----------|-------------|
+| `f1 -nt f2` | f1 is newer than f2 |
+| `f1 -ot f2` | f1 is older than f2 |
+| `f1 -ef f2` | f1 and f2 are the same file (device + inode) |
+
+### Short-Circuit Evaluation
+
+```c
+static int
+parse_oexpr(struct parser *p)
+{
+ int result = parse_aexpr(p);
+
+ while (current_is(p, "-o")) {
+ advance_arg(p);
+ int right = parse_aexpr(p);
+ result = result || right; /* Short-circuit */
+ }
+
+ return result;
+}
+
+static int
+parse_aexpr(struct parser *p)
+{
+ int result = parse_nexpr(p);
+
+ while (current_is(p, "-a")) {
+ advance_arg(p);
+ int right = parse_nexpr(p);
+ result = result && right; /* Short-circuit */
+ }
+
+ return result;
+}
+```
+
+### Bracket Mode
+
+```c
+int main(int argc, char *argv[])
+{
+ /* If invoked as "[", last arg must be "]" */
+ const char *progname = basename(argv[0]);
+ if (strcmp(progname, "[") == 0) {
+ if (argc < 2 || strcmp(argv[argc - 1], "]") != 0)
+ errx(2, "missing ]");
+ argc--; /* Remove trailing ] */
+ }
+
+ if (argc <= 1)
+ return 1; /* No expression → false */
+
+ struct parser p = { argc - 1, argv + 1, 0 };
+ int result = parse_oexpr(&p);
+
+ if (p.pos < p.argc)
+ errx(2, "unexpected argument: %s", current_arg(&p));
+
+ return !result; /* 0 = true, 1 = false */
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `stat(2)` | File attribute tests |
+| `lstat(2)` | Symlink tests (`-h`, `-L`) |
+| `eaccess(2)` / `faccessat(2)` | Permission tests (`-r`, `-w`, `-x`) |
+| `isatty(3)` | Terminal test (`-t`) |
+| `geteuid(3)` / `getegid(3)` | Ownership tests (`-O`, `-G`) |
+
+## Examples
+
+```sh
+# File exists
+test -f /etc/passwd && echo "exists"
+
+# Using [ syntax
+[ -d /tmp ] && echo "is a directory"
+
+# String comparison
+[ "$var" = "hello" ] && echo "match"
+
+# Integer comparison
+[ "$count" -gt 10 ] && echo "more than 10"
+
+# Combined with AND
+[ -f file.txt -a -r file.txt ] && echo "readable file"
+
+# File newer than another
+[ config.new -nt config.old ] && echo "config updated"
+
+# Negation
+[ ! -e /tmp/lockfile ] && echo "no lock"
+
+# Parenthesized expression
+[ \( -f a -o -f b \) -a -r c ]
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 0 | Expression is true |
+| 1 | Expression is false |
+| 2 | Invalid expression (syntax error) |
diff --git a/docs/handbook/corebinutils/timeout.md b/docs/handbook/corebinutils/timeout.md
new file mode 100644
index 0000000000..3186b42886
--- /dev/null
+++ b/docs/handbook/corebinutils/timeout.md
@@ -0,0 +1,297 @@
+# timeout — Run a Command with a Time Limit
+
+## Overview
+
+`timeout` runs a command and kills it if it exceeds a time limit. It
+supports a two-stage kill strategy: first send a configurable signal
+(default `SIGTERM`), then optionally send a second kill signal after
+a grace period. Uses `prctl(PR_SET_CHILD_SUBREAPER)` to reliably
+reap grandchild processes.
+
+**Source**: `timeout/timeout.c` (single file)
+**Origin**: BSD/Project Tick
+**License**: BSD-3-Clause
+
+## Synopsis
+
+```
+timeout [--preserve-status] [--foreground] [-k duration]
+ [-s signal] [--verbose] duration command [arg ...]
+```
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `-s signal` | Signal to send on timeout (default: `SIGTERM`) |
+| `-k duration` | Kill signal to send after grace period |
+| `--preserve-status` | Exit with the command's status, not 124 |
+| `--foreground` | Don't create a new process group |
+| `--verbose` | Print diagnostics when sending signals |
+
+## Source Analysis
+
+### Constants
+
+```c
+#define EXIT_TIMEOUT 124 /* Command timed out */
+#define EXIT_INVALID 125 /* timeout itself failed */
+#define EXIT_CMD_ERROR 126 /* Command found but not executable */
+#define EXIT_CMD_NOENT 127 /* Command not found */
+```
+
+### Data Structures
+
+```c
+struct options {
+ bool foreground; /* --foreground */
+ bool preserve; /* --preserve-status */
+ bool verbose; /* --verbose */
+ bool kill_after_set; /* -k was specified */
+ int timeout_signal; /* -s signal (default SIGTERM) */
+ double duration; /* Primary timeout */
+ double kill_after; /* Grace period before SIGKILL */
+ const char *command_name;
+ char **command_argv;
+};
+
+struct child_state {
+ pid_t pid;
+ int status;
+ bool exited;
+ bool signaled;
+};
+
+struct runtime_state {
+ struct child_state child;
+ bool first_timeout_sent;
+ bool kill_sent;
+};
+
+enum deadline_kind {
+ DEADLINE_TIMEOUT, /* Primary timeout */
+ DEADLINE_KILL, /* Kill-after grace period */
+};
+```
+
+### Functions
+
+| Function | Purpose |
+|----------|---------|
+| `main()` | Parse options, fork, wait with timers |
+| `parse_duration_or_die()` | Parse duration string (fractional seconds + units) |
+| `monotonic_seconds()` | Read `CLOCK_MONOTONIC` |
+| `enable_subreaper_or_die()` | Call `prctl(PR_SET_CHILD_SUBREAPER)` |
+| `send_signal_to_command()` | Send signal to child/process group |
+| `arm_second_timer()` | Set up kill-after timer |
+| `reap_children()` | Wait for all descendants |
+| `child_exec()` | Child process: exec the command |
+
+### Signal Table
+
+`timeout` shares the same signal table as `kill`:
+
+```c
+/* Same SIGNAL_ENTRY() macro and signal_entry table */
+/* Supports named signals: TERM, KILL, HUP, INT, etc. */
+/* Supports SIGRTMIN+n notation */
+```
+
+### Duration Parsing
+
+```c
+static double
+parse_duration_or_die(const char *str)
+{
+ char *end;
+ double val = strtod(str, &end);
+
+ if (end == str || val < 0)
+ errx(EXIT_INVALID, "invalid duration: %s", str);
+
+ /* Apply unit suffix */
+ switch (*end) {
+ case '\0':
+ case 's': break; /* seconds (default) */
+ case 'm': val *= 60; break;
+ case 'h': val *= 3600; break;
+ case 'd': val *= 86400; break;
+ default:
+ errx(EXIT_INVALID, "invalid unit: %c", *end);
+ }
+
+ return val;
+}
+```
+
+### Subreaper
+
+The Linux-specific `prctl(PR_SET_CHILD_SUBREAPER)` ensures that orphaned
+grandchild processes are reparented to `timeout` instead of PID 1:
+
+```c
+static void
+enable_subreaper_or_die(void)
+{
+ if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
+ err(EXIT_INVALID, "prctl(PR_SET_CHILD_SUBREAPER)");
+}
+```
+
+### Two-Stage Kill Strategy
+
+```
+┌──────────────────────────────────────────────────┐
+│ timeout 30 -k 5 -s TERM ./long_running_task │
+│ │
+│ 1. Fork and exec ./long_running_task │
+│ 2. Wait up to 30 seconds │
+│ 3. If still running: send SIGTERM │
+│ 4. Wait up to 5 more seconds (-k 5) │
+│ 5. If still running: send SIGKILL │
+│ 6. Reap all children │
+└──────────────────────────────────────────────────┘
+```
+
+```c
+/* Primary timeout handler */
+static void
+handle_timeout(struct runtime_state *state,
+ const struct options *opts)
+{
+ if (opts->verbose)
+ warnx("sending signal %s to command '%s'",
+ signal_name_for_number(opts->timeout_signal),
+ opts->command_name);
+
+ send_signal_to_command(state, opts->timeout_signal, opts);
+ state->first_timeout_sent = true;
+
+ /* Arm kill-after timer if specified */
+ if (opts->kill_after_set)
+ arm_second_timer(opts->kill_after);
+}
+
+/* Kill-after timer handler */
+static void
+handle_kill_after(struct runtime_state *state,
+ const struct options *opts)
+{
+ if (opts->verbose)
+ warnx("sending SIGKILL to command '%s'",
+ opts->command_name);
+
+ send_signal_to_command(state, SIGKILL, opts);
+ state->kill_sent = true;
+}
+```
+
+### Process Group Management
+
+```c
+static void
+send_signal_to_command(struct runtime_state *state,
+ int sig, const struct options *opts)
+{
+ if (opts->foreground) {
+ /* Send to child only */
+ kill(state->child.pid, sig);
+ } else {
+ /* Send to entire process group */
+ kill(-state->child.pid, sig);
+ }
+}
+
+static void
+child_exec(const struct options *opts)
+{
+ if (!opts->foreground) {
+ /* Create new process group */
+ setpgid(0, 0);
+ }
+
+ execvp(opts->command_name, opts->command_argv);
+
+ /* exec failed */
+ int code = (errno == ENOENT) ? EXIT_CMD_NOENT : EXIT_CMD_ERROR;
+ err(code, "exec '%s'", opts->command_name);
+}
+```
+
+### Timer Implementation
+
+Uses `timer_create(2)` with `CLOCK_MONOTONIC`:
+
+```c
+static void
+arm_timer(double seconds)
+{
+ struct itimerspec its = {
+ .it_value = {
+ .tv_sec = (time_t)seconds,
+ .tv_nsec = (long)((seconds - (time_t)seconds) * 1e9),
+ },
+ };
+
+ timer_t timerid;
+ struct sigevent sev = {
+ .sigev_notify = SIGEV_SIGNAL,
+ .sigev_signo = SIGALRM,
+ };
+
+ timer_create(CLOCK_MONOTONIC, &sev, &timerid);
+ timer_settime(timerid, 0, &its, NULL);
+}
+```
+
+## System Calls Used
+
+| Syscall | Purpose |
+|---------|---------|
+| `fork(2)` | Create child process |
+| `execvp(3)` | Execute the command |
+| `kill(2)` | Send signal to child/group |
+| `waitpid(2)` | Wait for child/grandchild exit |
+| `setpgid(2)` | Create new process group |
+| `prctl(2)` | `PR_SET_CHILD_SUBREAPER` |
+| `timer_create(2)` | POSIX timer for deadline |
+| `timer_settime(2)` | Arm the timer |
+| `clock_gettime(2)` | `CLOCK_MONOTONIC` for elapsed time |
+| `sigaction(2)` | Signal handler setup |
+
+## Examples
+
+```sh
+# Basic timeout (30 seconds)
+timeout 30 make -j4
+
+# With kill-after grace period
+timeout -k 10 60 ./server
+
+# Custom signal
+timeout -s HUP 300 ./daemon
+
+# Verbose
+timeout --verbose 5 sleep 100
+# timeout: sending signal TERM to command 'sleep'
+
+# Preserve exit status
+timeout --preserve-status 10 ./test_runner
+echo $? # Exit code from test_runner, not 124
+
+# Fractional seconds
+timeout 2.5 curl https://example.com
+
+# Foreground (no process group)
+timeout --foreground 30 ./interactive_app
+```
+
+## Exit Codes
+
+| Code | Meaning |
+|------|---------|
+| 124 | Command timed out |
+| 125 | `timeout` itself failed |
+| 126 | Command found but not executable |
+| 127 | Command not found |
+| other | Command's exit status (or 128+signal if killed) |
diff --git a/docs/handbook/forgewrapper/architecture.md b/docs/handbook/forgewrapper/architecture.md
new file mode 100644
index 0000000000..9f93c9dc76
--- /dev/null
+++ b/docs/handbook/forgewrapper/architecture.md
@@ -0,0 +1,1202 @@
+# ForgeWrapper Architecture
+
+## Table of Contents
+1. [Class Hierarchy and Structure](#class-hierarchy-and-structure)
+2. [Package Organization](#package-organization)
+3. [Design Patterns Used](#design-patterns-used)
+4. [Data Flow and Execution Model](#data-flow-and-execution-model)
+5. [Thread Model and Concurrency](#thread-model-and-concurrency)
+6. [Error Handling Strategy](#error-handling-strategy)
+7. [Dual-Module System](#dual-module-system)
+8. [ModuleUtil Implementation Details](#moduleutil-implementation-details)
+9. [Class Diagram](#class-diagram)
+10. [Reflection-Based Architecture](#reflection-based-architecture)
+
+## Class Hierarchy and Structure
+
+### Core Class Relationships
+
+```
+io.github.zekerzhayard.forgewrapper.installer package:
+│
+├─ Main (public class)
+│ ├─ main(String[]) : void - ENTRY POINT
+│ └─ isFile(Path) : boolean
+│
+├─ Installer (public class)
+│ ├─ getData(File) : Map<String, Object>
+│ ├─ install(File, File, File) : boolean
+│ ├─ getWrapper(File) : InstallV1Wrapper
+│ │
+│ ├─ InstallV1Wrapper (public static inner class extends InstallV1)
+│ │ ├─ processors : Map<String, List<Processor>>
+│ │ ├─ librariesDir : File
+│ │ ├─ getProcessors(String) : List<Processor>
+│ │ ├─ checkProcessorFiles(List<Processor>, Map<String, String>, File) : void
+│ │ └─ setOutputs(Processor, Map<String, String>) : void
+│ │
+│ └─ Version0 (public static class extends Version)
+│ ├─ mainClass : String
+│ ├─ arguments : Arguments
+│ ├─ getMainClass() : String
+│ ├─ getArguments() : Arguments
+│ └─ Arguments (public static inner class)
+│ ├─ jvm : String[]
+│ └─ getJvm() : String[]
+│
+├─ Bootstrap (public class)
+│ └─ bootstrap(String[], String, String) : void throws Throwable
+│
+└─ detector package:
+ │
+ ├─ IFileDetector (public interface)
+ │ ├─ name() : String (abstract)
+ │ ├─ enabled(HashMap<String, IFileDetector>) : boolean (abstract)
+ │ ├─ getLibraryDir() : Path (default, can override)
+ │ ├─ getInstallerJar(String, String, String) : Path (default)
+ │ └─ getMinecraftJar(String) : Path (default)
+ │
+ ├─ DetectorLoader (public class)
+ │ └─ loadDetector() : IFileDetector (static)
+ │
+ └─ MultiMCFileDetector (public class implements IFileDetector)
+ ├─ libraryDir : Path
+ ├─ installerJar : Path
+ ├─ minecraftJar : Path
+ ├─ name() : String
+ ├─ enabled(HashMap<String, IFileDetector>) : boolean
+ ├─ getLibraryDir() : Path
+ ├─ getInstallerJar(String, String, String) : Path
+ └─ getMinecraftJar(String) : Path
+
+util package:
+│
+└─ ModuleUtil (public class)
+ ├─ addModules(String) : void throws Throwable
+ ├─ addExports(List<String>) : void
+ ├─ addOpens(List<String>) : void
+ ├─ setupClassPath(Path, List<String>) : void throws Throwable
+ ├─ setupBootstrapLauncher(Class<?>) : Class<?>
+ ├─ getPlatformClassLoader() : ClassLoader
+ └─ (version-specific implementation)
+```
+
+### Field Definitions and Types
+
+#### Main.java Fields
+None (purely algorithmic, no state)
+
+#### Installer.java Fields
+```java
+// Static field
+private static InstallV1Wrapper wrapper; // Cached wrapper instance
+
+// Inner class InstallV1Wrapper fields
+protected Map<String, List<Processor>> processors; // Cache of side-specific processors
+protected File librariesDir; // Base libraries directory
+protected String serverJarPath; // Server JAR path (inherited)
+
+// Inner class Version0 fields
+protected String mainClass; // Forge bootstrap main class
+protected Version0.Arguments arguments; // JVM arguments
+protected String[] jvm; // JVM argument array (in Arguments)
+```
+
+#### Bootstrap.java Fields
+None (purely algorithmic, no state)
+
+#### DetectorLoader.java Fields
+None (stateless utility)
+
+#### MultiMCFileDetector.java Fields
+```java
+protected Path libraryDir; // Cached library directory (lazy-initialized)
+protected Path installerJar; // Cached installer JAR path
+protected Path minecraftJar; // Cached Minecraft JAR path
+```
+
+#### ModuleUtil.java Fields
+
+**Main version (Java 8)**:
+```java
+// No fields - all methods are no-op for Java 8
+```
+
+**Jigsaw version (Java 9+)**:
+```java
+private final static MethodHandles.Lookup IMPL_LOOKUP; // Internal method handle lookup
+
+// TypeToAdd enum fields
+EXPORTS {
+ private final MethodHandle implAddMH; // Invoke implAddExports()
+ private final MethodHandle implAddToAllUnnamedMH; // Invoke implAddExportsToAllUnnamed()
+}
+OPENS {
+ private final MethodHandle implAddMH; // Invoke implAddOpens()
+ private final MethodHandle implAddToAllUnnamedMH; // Invoke implAddOpensToAllUnnamed()
+}
+
+// ParserData fields
+class ParserData {
+ final String module; // Source module name
+ final String packages; // Package names to export/open
+ final String target; // Target module or ALL-UNNAMED
+}
+```
+
+## Package Organization
+
+### Main Package Structure
+
+```
+io.github.zekerzhayard.forgewrapper.installer/
+├─ Main.java (entry point, 50 lines)
+├─ Installer.java (installer integration, 200+ lines)
+├─ Bootstrap.java (runtime configuration, 70 lines)
+│
+├─ detector/ (file detection subsystem)
+│ ├─ IFileDetector.java (interface contract, 90 lines)
+│ ├─ DetectorLoader.java (SPI loader, 30 lines)
+│ └─ MultiMCFileDetector.java (concrete impl, 40 lines)
+│
+└─ util/ (utility functions)
+ └─ ModuleUtil.java (Java 8 version, 40 lines)
+
+jigsaw/src/main/java/io.../util/
+└─ ModuleUtil.java (Java 9+ version, 200+ lines)
+```
+
+### Resource Structure
+
+```
+src/main/resources/
+└─ META-INF/services/
+ └─ io.github.zekerzhayard.forgewrapper.installer.detector.IFileDetector
+ (contains: io.github.zekerzhayard.forgewrapper.installer.detector.MultiMCFileDetector)
+
+jigsaw/src/main/resources/
+(empty - no SPI definitions)
+```
+
+### Build Output Structure
+
+The Gradle multi-release JAR structure (from `build.gradle` lines 47-54):
+
+```
+ForgeWrapper.jar/
+├─ io/github/zekerzhayard/forgewrapper/installer/
+│ ├─ Main.class (Java 8 version)
+│ ├─ Installer.class including inner classes (Java 8 version)
+│ ├─ Bootstrap.class (Java 8 version)
+│ ├─ detector/
+│ │ ├─ IFileDetector.class (Java 8 version)
+│ │ ├─ DetectorLoader.class (Java 8 version)
+│ │ └─ MultiMCFileDetector.class (Java 8 version)
+│ └─ util/
+│ └─ ModuleUtil.class (Java 8 stub version)
+│
+├─ META-INF/versions/9/
+│ └─ io/github/zekerzhayard/forgewrapper/installer/
+│ └─ util/
+│ └─ ModuleUtil.class (Java 9+ Advanced version)
+│
+└─ META-INF/services/
+ └─ io.github.zekerzhayard.forgewrapper.installer.detector.IFileDetector
+```
+
+When running on Java 9+, the JVM automatically prefers classes in `META-INF/versions/9/` over base classes.
+
+## Design Patterns Used
+
+### 1. Service Provider Interface (SPI) Pattern
+
+**Location**: `DetectorLoader.java` and `IFileDetector` interface
+
+**Purpose**: Decouple file detection from launcher specifics. Allows multiple implementations without modifying core code.
+
+**Implementation**:
+- `IFileDetector` defines the contract
+- `MultiMCFileDetector` implements it
+- `META-INF/services/` file registers the implementation
+- `ServiceLoader.load()` discovers implementations at runtime
+
+**Benefits**:
+- Custom launchers can provide their own detector by adding a JAR with SPI metadata
+- ForgeWrapper doesn't need recompilation
+- Multiple detectors can coexist; the `enabled()` logic selects the active one
+
+**Code Example** (from `DetectorLoader.java` lines 4-8):
+```java
+ServiceLoader<IFileDetector> sl = ServiceLoader.load(IFileDetector.class);
+HashMap<String, IFileDetector> detectors = new HashMap<>();
+for (IFileDetector detector : sl) {
+ detectors.put(detector.name(), detector);
+}
+```
+
+### 2. Strategy Pattern
+
+**Location**: `IFileDetector` implementations (current: `MultiMCFileDetector`)
+
+**Purpose**: Encapsulate different file location strategies for different launchers.
+
+**Implementation**:
+- `IFileDetector` is the strategy interface defining `getLibraryDir()`, `getInstallerJar()`, `getMinecraftJar()`
+- `MultiMCFileDetector` is the concrete strategy for MultiMC/MeshMC
+- Future: `MeshMCFileDetector`, `ATLauncherFileDetector` could be added
+
+**Algorithm Variation**: File path construction differs:
+- MultiMC: `libraries/net/neoforged/neoforge/VERSION/neoforge-VERSION-installer.jar`
+- Another launcher: might use different structure
+
+**Code Example** (from `MultiMCFileDetector.java` lines 25-34):
+```java
+@Override
+public Path getInstallerJar(String forgeGroup, String forgeArtifact, String forgeFullVersion) {
+ Path path = IFileDetector.super.getInstallerJar(...);
+ if (path == null) {
+ Path installerBase = this.getLibraryDir();
+ for (String dir : forgeGroup.split("\\."))
+ installerBase = installerBase.resolve(dir);
+ return installerBase.resolve(forgeArtifact).resolve(forgeFullVersion)...;
+ }
+ return path;
+}
+```
+
+### 3. Template Method Pattern
+
+**Location**: `IFileDetector.getLibraryDir()` default implementation
+
+**Purpose**: Provide default library discovery algorithm while allowing overrides.
+
+**Implementation**:
+- `IFileDetector` provides `getLibraryDir()` implementation (lines 34-63)
+- `MultiMCFileDetector` calls `IFileDetector.super.getLibraryDir()` then caches (lines 19-23)
+- Template steps:
+ 1. Check system property
+ 2. Find launcher JAR location
+ 3. Walk up directory tree to find `libraries` folder
+ 4. Return absolute path
+
+**Template Hook**: Subclasses can call `super.getLibraryDir()` to use default or override entirely
+
+**Code Example** (from `IFileDetector.java` lines 34-63):
+```java
+default Path getLibraryDir() {
+ String libraryDir = System.getProperty("forgewrapper.librariesDir");
+ if (libraryDir != null) {
+ return Paths.get(libraryDir).toAbsolutePath();
+ }
+ // ... walk classloader resources and find libraries/
+}
+```
+
+### 4. Wrapper/Adapter Pattern
+
+**Location**: `Installer.InstallV1Wrapper` class
+
+**Purpose**: Adapt Forge's `InstallV1` class to handle missing processor outputs.
+
+**Implementation**:
+- Extends `InstallV1` (the wrapped class)
+- Overrides `getProcessors()` to intercept processor handling
+- Calls `super.getProcessors()` to get original implementation
+- Wraps result with custom `checkProcessorFiles()` logic
+
+**Problem Solved**: Forge's installer expects all processor outputs to have SHA1 hashes, but some libraries may not. ForgeWrapper computes hashes for files that exist.
+
+**Code Example** (from `Installer.java` lines 42-52):
+```java
+@Override
+public List<Processor> getProcessors(String side) {
+ List<Processor> processor = this.processors.get(side);
+ if (processor == null) {
+ checkProcessorFiles(
+ processor = super.getProcessors(side), // get original
+ super.getData("client".equals(side)),
+ this.librariesDir
+ );
+ this.processors.put(side, processor);
+ }
+ return processor;
+}
+```
+
+### 5. Facade Pattern
+
+**Location**: `Main.java`
+
+**Purpose**: Provide simplified interface to complex subsystems (Detector, Installer, Bootstrap, ModuleUtil).
+
+**Implementation**:
+- `Main.main()` coordinates multiple subsystems
+- Hides detector loading complexity
+- Hides custom classloader creation
+- Hides reflection-based Installer invocation
+- Hides version-specific ModuleUtil calls
+
+**Client Perspective**: Launcher only needs to invoke:
+```bash
+java -cp ForgeWrapper.jar io.github.zekerzhayard.forgewrapper.installer.Main args...
+```
+
+**Code Example** (from `Main.java` lines 17-46):
+```java
+public static void main(String[] args) throws Throwable {
+ // Simplified entry point - complex logic hidden
+ IFileDetector detector = DetectorLoader.loadDetector();
+ // ... use detector ...
+ // ... create classloader ...
+ // ... invoke Installer via reflection ...
+}
+```
+
+### 6. Lazy Initialization Pattern
+
+**Location**: `MultiMCFileDetector` field initialization and `Installer` wrapper caching
+
+**Purpose**: Defer expensive operations until actually needed.
+
+**Implementation** (from `MultiMCFileDetector.java`):
+```java
+@Override
+public Path getLibraryDir() {
+ if (this.libraryDir == null) {
+ this.libraryDir = IFileDetector.super.getLibraryDir(); // computed on first access
+ }
+ return this.libraryDir;
+}
+```
+
+**Benefits**:
+- If detector method isn't called, path resolution doesn't happen
+- Caching prevents repeated expensive classloader searches
+- Memory efficient for unused paths
+
+### 7. Builder Pattern (Implicit)
+
+**Location**: `Installer.Version0.Arguments` and `Installer.Version0`
+
+**Purpose**: Construct complex configuration objects from JSON.
+
+**Implementation**:
+- `Version0.loadVersion()` (lines 209-214) deserializes from JSON using GSON
+- JSON structure becomes nested object graph
+- Fields are populated via reflection by GSON
+
+**Code Example** (from `Installer.java` lines 203-214):
+```java
+public static Version0 loadVersion(Install profile) {
+ try (InputStream stream = Util.class.getResourceAsStream(profile.getJson())) {
+ return Util.GSON.fromJson(new InputStreamReader(stream, StandardCharsets.UTF_8), Version0.class);
+ } catch (Throwable t) {
+ throw new RuntimeException(t);
+ }
+}
+```
+
+## Data Flow and Execution Model
+
+### Execution Pipeline
+
+```
+Phase 1: Invocation
+ Launcher executes: java -cp ForgeWrapper.jar ... Main args...
+ │
+ ├─ JVM starts ForgeWrapper JAR as classpath
+ ├─ Java locates Main.main(String[]) entry point
+ └─ Execution begins
+
+Phase 2: Argument Parsing (Main.java lines 17-27)
+ args[] contains: --fml.mcVersion 1.20.2 --fml.neoForgeVersion 20.2.20-beta ...
+ │
+ ├─ Convert String[] to List for easier manipulation
+ ├─ Check for --fml.neoForgeVersion to detect NeoForge vs Forge
+ ├─ Search list for mcVersion index, forge version index
+ ├─ Extract and store values
+ └─ Result: 4 string variables containing version info
+
+Phase 3: Detector Selection (Main.java line 28)
+ Calls: IFileDetector detector = DetectorLoader.loadDetector()
+ │
+ ├─ ServiceLoader scans classpath for IFileDetector implementations
+ ├─ Instantiates each implementation (no-arg constructor)
+ ├─ Builds HashMap of name() → detector instance
+ ├─ Iterates to find exactly one with enabled(others) == true
+ ├─ Validates no conflicts
+ └─ Returns single enabled detector (MultiMCFileDetector)
+
+Phase 4: File Location (Main.java lines 30-36)
+ Calls detector methods to locate files:
+ │
+ ├─ detector.getInstallerJar(forgeGroup, forgeArtifact, forgeFullVersion)
+ │ └─ Returns Path or null
+ ├─ Validates with isFile(Path)
+ ├─ If not found, throws "Unable to detect the forge installer!"
+ │
+ ├─ detector.getMinecraftJar(mcVersion)
+ │ └─ Returns Path or null
+ ├─ Validates with isFile(Path)
+ └─ If not found, throws "Unable to detect the Minecraft jar!"
+
+Phase 5: Custom Classloader Creation (Main.java lines 38-41)
+ Creates URLClassLoader with:
+ │
+ ├─ URLClassLoader.newInstance(new URL[] {
+ │ Main.class.getProtectionDomain().getCodeSource().getLocation(), // ForgeWrapper.jar
+ │ installerJar.toUri().toURL() // forge-installer.jar
+ │ }, ModuleUtil.getPlatformClassLoader())
+ │
+ └─ Result: custom classloader with both JARs
+
+Phase 6: Installer Invocation via Reflection (Main.java lines 42-43)
+ Loads Installer from custom classloader:
+ │
+ ├─ Class<?> installer = ucl.loadClass("io.github.zekerzhayard.forgewrapper.installer.Installer")
+ ├─ installer.getMethod("getData", File.class) gets method reference
+ │
+ ├─ Invokes: installer.getMethod("getData", File.class)
+ │ .invoke(null, detector.getLibraryDir().toFile())
+ │
+ └─ Returns: Map<String, Object> with mainClass, jvmArgs, extraLibraries
+
+Phase 7: Bootstrap Configuration (Main.java line 44)
+ Calls: Bootstrap.bootstrap((String[]) data.get("jvmArgs"), ...)
+ │
+ ├─ Replaces placeholders in JVM arguments
+ ├─ Removes NewLaunch.jar from classpath
+ ├─ Extracts module paths from arguments
+ ├─ Calls ModuleUtil.addModules(), addExports(), addOpens()
+ ├─ Sets up system properties
+ └─ Returns normally (or logs error if Java 8)
+
+Phase 8: Installation Execution (Main.java line 47)
+ Invokes: installer.getMethod("install", File.class, File.class, File.class)
+ .invoke(null, libraryDir, minecraftJar, installerJar)
+ │
+ ├─ Installer.install() creates InstallV1Wrapper
+ ├─ Loads installation profile from installer JAR
+ ├─ Gets processors via getProcessors(side)
+ ├─ InstallV1Wrapper.checkProcessorFiles() computes missing hashes
+ ├─ PostProcessors.process() executes installation
+ ├─ Libraries are downloaded or linked
+ ├─ Mod loaders are configured
+ └─ Returns boolean success
+
+Phase 9: Classloader Setup (Main.java line 50)
+ Calls: ModuleUtil.setupClassPath(detector.getLibraryDir(), extraLibraries)
+ │
+ ├─ Reflects into sun.misc.Unsafe to access internal classloader
+ ├─ Gets URLClassPath for system classloader
+ ├─ Adds URLs for all extra libraries
+ └─ System classloader now has all library JARs in path
+
+Phase 10: Module System Finalization (Main.java line 51)
+ Calls: Class<?> mainClass = ModuleUtil.setupBootstrapLauncher(...)
+ │
+ ├─ Java 8: returns mainClass unchanged (no-op)
+ ├─ Java 9+: performs final module system configuration
+ └─ Result: ClassLoader-ready class
+
+Phase 11: Forge Invocation (Main.java line 52)
+ Calls: mainClass.getMethod("main", String[].class)
+ .invoke(null, new Object[] {args})
+ │
+ ├─ Forge bootstrap launcher main() method is invoked
+ ├─ Original launcher arguments are passed through
+ ├─ Forge initializes mod framework
+ ├─ Minecraft main is invoked
+ └─ Game runs under mod loader
+```
+
+### Data Transformation Chain
+
+```
+Original Arguments from Launcher
+ ↓
+Parsed into: { mcVersion, forgeVersion, forgeGroup, isNeoForge, ... }
+ ↓
+Detector#get*() methods
+ ↓
+File Path objects
+ ↓
+URLClassLoader creation
+ ↓
+Installer class loading and invocation
+ ↓
+Installation Profile (JSON deserialization)
+ ↓
+Version0 object with mainClass, Arguments
+ ↓
+JVM Arguments string array with placeholders
+ ↓
+Bootstrap placeholder replacement
+ ↓
+Modified JVM Arguments suitable for Forge
+ ↓
+Module paths extracted from arguments
+ ↓
+ModuleUtil#addModules() invocations
+ ↓
+Modified Java module layer
+ ↓
+Final Forge main class invocation
+ ↓
+Running Minecraft with mods
+```
+
+## Thread Model and Concurrency
+
+### Single-Threaded Execution Model
+
+ForgeWrapper operates entirely in a single thread (the launcher thread):
+
+1. **Main thread**: Entry point and execution coordinator
+2. **No worker threads**: ForgeWrapper itself doesn't spawn threads
+3. **Blocking operations**: All I/O is synchronous and blocking
+4. **Sequential execution**: Phases execute in strict order
+
+**Code Flow**:
+```java
+Main.main(args) // launcher thread
+ ├─ DetectorLoader.loadDetector() // synchronous
+ └─ detector.getInstallerJar() // synchronous
+ └─ detector.getMinecraftJar() // synchronous
+ └─ URLClassLoader creation // synchronous
+ └─ Installer.getData() // synchronous
+ └─ Bootstrap.bootstrap() // synchronous
+ └─ ModuleUtil methods // synchronous
+ └─ Installer.install() // synchronous (Forge may download, happens here)
+ └─ mainClass.main(args) // delegates to Forge (Forge may be multi-threaded)
+```
+
+### Concurrency Considerations
+
+**Not Thread-Safe**:
+- `MultiMCFileDetector` field caching (lines 7-9 in `MultiMCFileDetector.java`) is not synchronized
+- If multiple threads called detector methods, race conditions could occur
+- However, this is not a concern because ForgeWrapper is designed as single-shot execution
+
+**Why Single-Threaded Design**:
+1. Launchers invoke ForgeWrapper once, get result, and move on
+2. No need for concurrent execution
+3. Simplifies error handling (no thread coordination needed)
+4. Reduces memory overhead
+
+**Installer Thread Safety**:
+- `Installer.wrapper` static field (line 23 in `Installer.java`) is cached but not synchronized
+- Safe because it's only accessed during single-shot initialization
+- Even if multiple threads accessed, worst case is re-creation (not data corruption)
+
+### Reflection and ClassLoader Thread Safety
+
+The custom URLClassLoader (Main.java line 38-41) operates in a thread-safe manner:
+- ClassLoader has internal synchronization for class loading
+- Multiple threads can load classes from same classloader
+- However, ForgeWrapper doesn't use this capability
+
+**ModuleUtil Reflection Safety** (jigsaw version):
+- Uses `sun.misc.Unsafe` for low-level access
+- `Unsafe.putObject()` operations are atomic at hardware level
+- Module layer is designed to be modified during startup only
+- No concurrent access to module layer during bootstrap
+
+## Error Handling Strategy
+
+### Error Scenarios and Recovery
+
+#### Scenario 1: Detector Not Found
+
+**Code** (from `DetectorLoader.java` line 21):
+```java
+if (temp == null) {
+ throw new RuntimeException("No file detector is enabled!");
+}
+```
+
+**When**: No implementations of `IFileDetector` found in `META-INF/services/`
+
+**Recovery**: None - launcher must ensure ForgeWrapper JAR is complete and classpath includes implementations
+
+**User Impact**: Launcher cannot start; must fix installation
+
+#### Scenario 2: Multiple Detectors Enabled
+
+**Code** (from `DetectorLoader.java` lines 18-19):
+```java
+} else if (detector.getValue().enabled(others)) {
+ throw new RuntimeException("There are two or more file detectors are enabled! ...");
+}
+```
+
+**When**: Two implementations both return `true` from `enabled()`
+
+**Recovery**: None - one detector must disable itself
+
+**User Impact**: Configuration error; launcher installation needs repair
+
+#### Scenario 3: Installer JAR Not Found
+
+**Code** (from `Main.java` line 32):
+```java
+if (!isFile(installerJar)) {
+ throw new RuntimeException("Unable to detect the forge installer!");
+}
+```
+
+**When**:
+- `detector.getInstallerJar()` returns null
+- Or returns path to non-existent file
+- Or returns directory instead of file
+
+**Recovery Hints**:
+- Check ForgeWrapper properties: `-Dforgewrapper.installer=/path/to/jar`
+- Verify installer JAR exists at expected MultiMC paths
+- Ensure forge version is correctly specified
+
+**User Impact**: Modded instance cannot launch
+
+#### Scenario 4: Minecraft JAR Not Found
+
+**Code** (from `Main.java` line 36):
+```java
+if (!isFile(minecraftJar)) {
+ throw new RuntimeException("Unable to detect the Minecraft jar!");
+}
+```
+
+**When**:
+- Vanilla Minecraft not installed in expected location
+- Path resolution failed
+
+**Recovery Hints**:
+- Manually specify: `-Dforgewrapper.minecraft=/path/to/1.20.2.jar`
+- Ensure vanilla Minecraft is installed first
+
+**User Impact**: Modded instance cannot launch
+
+#### Scenario 5: Library Directory Not Resolvable
+
+**Code** (from `IFileDetector.java` line 60):
+```java
+throw new UnsupportedOperationException(
+ "Could not detect the libraries folder - it can be manually specified with `-Dforgewrapper.librariesDir=`"
+);
+```
+
+**When**:
+- Classloader resources don't contain expected Forge classes
+- Libraries folder walking fails
+- ClassLoader inspection fails
+
+**Recovery**:
+- Set property: `-Dforgewrapper.librariesDir=/home/user/.minecraft/libraries`
+- Standard recovery path is well-documented
+
+**User Impact**: ForgeWrapper fails at startup; user must configure manually
+
+#### Scenario 6: Installation Failed
+
+**Code** (from `Main.java` line 47):
+```java
+if (!((boolean) installer.getMethod("install", ...)
+ .invoke(null, ...))) {
+ return; // Silent failure
+}
+```
+
+**When**: `PostProcessors.process()` returns false
+
+**Recovery**: None built-in
+
+**User Impact**:
+- Mod loaders not installed
+- Minecraft may fail to start
+- User must reinstall or redownload
+
+#### Scenario 7: Bootstrap Configuration Error
+
+**Code** (from `Main.java` lines 44-46):
+```java
+try {
+ Bootstrap.bootstrap(...);
+} catch (Throwable t) {
+ t.printStackTrace(); // Error logged but execution continues
+}
+```
+
+**When**: JVM argument parsing fails or module configuration fails
+
+**Recovery**: Execution continues regardless
+
+**Rationale**: Bootstrap errors should not prevent Forge initialization attempt
+
+**User Impact**: Depends on specific error; may cause module system misconfiguration
+
+#### Scenario 8: Reflection Error (ModuleUtil)
+
+**Code** (from jigsaw `ModuleUtil.java` line 35):
+```java
+} catch (Throwable t) {
+ throw new RuntimeException(t);
+}
+```
+
+**When**: Module system manipulation via reflection fails
+
+**Recovery**: None - throws RuntimeException
+
+**User Impact**: Java 9+ users cannot run modded Minecraft; critical error
+
+## Dual-Module System
+
+ForgeWrapper uses a sophisticated dual-module architecture to support both Java 8 and Java 9+:
+
+### File Organization
+
+**Main project** (`src/main/`):
+- Targets Java 8 (minimum compatibility)
+- Contains all public APIs
+- `ModuleUtil.java` is stub implementation with all methods as no-ops
+- This is the "default" version used on Java 8
+
+**Jigsaw subproject** (`jigsaw/src/main/`):
+- Targets Java 9+
+- Compiles with `-targetCompatibility 9`
+- Contains advanced `ModuleUtil.java` with full module system support
+- Only compiled if JDK 9+ is available for compilation
+- Classes packaged into `META-INF/versions/9/` in final JAR
+
+### Build Configuration
+
+**From `build.gradle` lines 23-27**:
+```gradle
+configurations {
+ multirelase {
+ implementation.extendsFrom multirelase // typo in original, should be multirelease
+ }
+}
+
+dependencies {
+ multirelase project(":jigsaw") // jigsaw output goes into multirelease config
+}
+```
+
+**From `build.gradle` lines 47-54**:
+```gradle
+jar {
+ into "META-INF/versions/9", {
+ from configurations.multirelase.files.collect {
+ zipTree(it)
+ }
+ }
+}
+```
+
+**From `jigsaw/build.gradle` lines 14-23**:
+```gradle
+compileJava {
+ if (JavaVersion.current() < JavaVersion.VERSION_1_9) {
+ javaCompiler = javaToolchains.compilerFor {
+ languageVersion = JavaLanguageVersion.of(9)
+ }
+ }
+ sourceCompatibility = 9
+ targetCompatibility = 9
+}
+```
+
+### Runtime Behavior
+
+**On Java 8**:
+```
+JVM loads ForgeWrapper.jar
+ ├─ Main class path: io/github/zekerzhayard/forgewrapper/installer/Main.class (Java 8 version)
+ ├─ ModuleUtil path: io/.../util/ModuleUtil.class (Java 8 stub)
+ │ ├─ addModules() → no-op
+ │ ├─ addExports() → no-op
+ │ ├─ addOpens() → no-op
+ │ └─ getPlatformClassLoader() → returns null
+ └─ Execution proceeds with no module system features
+```
+
+**On Java 9+**:
+```
+JVM loads ForgeWrapper.jar
+ ├─ Main class path: io/github/zekerzhayard/forgewrapper/installer/Main.class (Java 8 version)
+ │ (same binary works on both versions)
+ ├─ ModuleUtil path: Preferentially loads from META-INF/versions/9/
+ │ io/.../util/ModuleUtil.class (Java 9+ advanced version)
+ │ ├─ addModules() → complex module layer manipulation
+ │ ├─ addExports() → module export configuration
+ │ ├─ addOpens() → module open configuration
+ │ └─ getPlatformClassLoader() → returns actual PlatformClassLoader
+ └─ Execution proceeds with full module system support
+```
+
+### Manifest Configuration
+
+**From `build.gradle` lines 50-51**:
+```gradle
+"Multi-Release": "true",
+```
+
+This manifest attribute signals to the JVM that the JAR contains multi-release content. Without this, the JVM ignores `META-INF/versions/9/` directory.
+
+### Why Dual Module System Is Necessary
+
+**Java 8** has no module system:
+- No `java.lang.module` package
+- No `sun.misc.Unsafe` reflective access patterns for modules
+- Runtime module manipulation not possible or necessary
+- Stub implementation sufficient
+
+**Java 9+** requires module configuration:
+- Minecraft uses JPMS
+- Forge uses module system features
+- Need to add modules at runtime
+- Need to configure exports and opens
+- Requires low-level JDK manipulation
+
+**Forward Compatibility**:
+- Java 8 code can run unchanged on Java 9+
+- Java 9+ code cannot run on Java 8 (compilation fails)
+- Multi-release JAR solves this elegantly
+
+## ModuleUtil Implementation Details
+
+### Java 8 Version (Simple)
+
+Located in `src/main/java/...ModuleUtil.java` (40 lines total):
+
+**Purpose**: Provide stubs that do nothing, allowing the same API to be called regardless of Java version.
+
+**Methods**:
+
+```java
+public static void addModules(String modulePath) {
+ // nothing to do with Java 8
+}
+```
+No-op: Java 8 has no module system; modulePath argument is ignored.
+
+```java
+public static void setupClassPath(Path libraryDir, List<String> paths) throws Throwable {
+ Method addURLMethod = URLClassLoader.class.getDeclaredMethod("addURL", URL.class);
+ addURLMethod.setAccessible(true);
+ for (String path : paths) {
+ addURLMethod.invoke(ClassLoader.getSystemClassLoader(),
+ libraryDir.resolve(path).toUri().toURL());
+ }
+}
+```
+Functional: Uses reflection to access Java 8's URLClassLoader.addURL() to dynamically add JARs to the system classloader.
+
+**Pattern**: All other methods follow same no-op structure for Java 8 compatibility.
+
+### Java 9+ Version (Complex)
+
+Located in `jigsaw/src/main/java/...ModuleUtil.java` (200+ lines):
+
+**Prerequisite Knowledge**:
+- Java module system basics: modules named, exports controlled
+- JDK internals: MethodHandles, Unsafe, reflection APIs
+- Module layer: boot layer is the root layer
+
+**Key Insight** (line 32):
+```java
+private final static MethodHandles.Lookup IMPL_LOOKUP = getImplLookup();
+```
+
+The `IMPL_LOOKUP` is a special method handle lookup obtained via `sun.misc.Unsafe`. This provides access to internal JDK classes not normally accessible through reflection.
+
+**The addModules() Method (lines 52-137)**: This is the most complex method. Step-by-step:
+
+1. **Module Discovery** (lines 56-60):
+```java
+ModuleFinder finder = ModuleFinder.of(
+ Stream.of(modulePath.split(File.pathSeparator))
+ .map(Paths::get)
+ .filter(p -> ...) // exclude existing modules
+ .toArray(Path[]::new)
+);
+```
+Creates a ModuleFinder for the provided module path, excluding modules that already exist.
+
+2. **Module Loading** (lines 61-63):
+```java
+MethodHandle loadModuleMH = IMPL_LOOKUP.findVirtual(
+ Class.forName("jdk.internal.loader.BuiltinClassLoader"),
+ "loadModule",
+ MethodType.methodType(void.class, ModuleReference.class)
+);
+```
+Obtains a method handle for `BuiltinClassLoader.loadModule()`, which is an internal JDK method.
+
+3. **Resolution and Configuration** (lines 66-79):
+```java
+Configuration config = Configuration.resolveAndBind(
+ finder, List.of(ModuleLayer.boot().configuration()), finder, roots
+);
+```
+Creates a module configuration graph for the new modules, bound to the boot module layer.
+
+4. **Graph Manipulation** (lines 81-118):
+```java
+HashMap<ResolvedModule, Set<ResolvedModule>> graphMap = ...
+for (Map.Entry<ResolvedModule, Set<ResolvedModule>> entry : graphMap.entrySet()) {
+ cfSetter.invokeWithArguments(entry.getKey(), ModuleLayer.boot().configuration());
+ ...
+}
+```
+Modifies the module dependency graph to integrate new modules into the boot layer's configuration.
+
+5. **Module Definition** (lines 120-123):
+Invokes internal `Module.defineModules()` to actually create the module objects in the boot layer.
+
+6. **Read Edge Setup** (lines 131-137):
+```java
+implAddReadsMH.invokeWithArguments(module, bootModule);
+```
+Establishes "reads" relationships between new modules and existing boot modules, allowing them to see each other.
+
+**The addExports() Method (lines 139-140)**:
+```java
+public static void addExports(List<String> exports) {
+ TypeToAdd.EXPORTS.implAdd(exports);
+}
+```
+Delegates to enum that uses method handles to call module export methods.
+
+**The addOpens() Method (lines 143-144)**:
+```java
+public static void addOpens(List<String> opens) {
+ TypeToAdd.OPENS.implAdd(opens);
+}
+```
+Similar to addExports but for "opens" (deep reflection access).
+
+**The parseModuleExtra() Method (lines 173-186)**:
+Parses format: `<module>/<package>=<target>`
+
+Example: `java.base/java.lang=ALL-UNNAMED`
+
+This means: Module `java.base` should export/open package `java.lang` to all unnamed modules.
+
+## Class Diagram
+
+ASCII UML representation of full architecture:
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ <<entry-point>> │
+│ Main │
+├─────────────────────────────────────────────────────────────────┤
+│ - (no fields, stateless) │
+├─────────────────────────────────────────────────────────────────┤
+│ + main(arguments: String[]): void (static) │
+│ + isFile(path: Path): boolean (private static) │
+└────────────────┬──────────────────────────────────────────────┬─┘
+ │ invokes creates │
+ │ │
+ ▼ ▼
+ ┌────────────────────────┐ ┌──────────────────────────┐
+ │ DetectorLoader │ │ URLClassLoader │
+ ├────────────────────────┤ ├──────────────────────────┤
+ │ - (no fields) │ │ - custom classloader │
+ ├────────────────────────┤ ├──────────────────────────┤
+ │ + loadDetector() │────┐ │ loadClass(name) │
+ │ : IFileDetector │ │ └──────────────────────────┘
+ └────────────────────────┘ │
+ ▲ │
+ uses SPI │ ┌──────▼─────────────────┐
+ │ │ <<interface>> │
+ ┌────────────┴───────────IFileDetector │
+ │ │ ├────────────────────────┤
+ │ │ │ + name(): String │
+ │ returns │ │ + enabled(...):bool │
+ │ │ │ + getLibraryDir():Path │
+ │ └────────→ │ + getInstallerJar(...):Path
+ │ │ + getMinecraftJar(...):Path
+ │ └────────────────────────┘
+ │ ▲
+ │ │ implements
+ │ │
+ │ ┌──────┴──────────────────────┐
+ │ │ MultiMCFileDetector │
+ │ ├─────────────────────────────┤
+ │ │ - libraryDir: Path (cached) │
+ │ │ - installerJar: Path │
+ │ │ - minecraftJar: Path │
+ │ ├─────────────────────────────┤
+ │ │ + name() │
+ │ │ + enabled(...) │
+ │ │ + getLibraryDir() │
+ │ │ + getInstallerJar(...): Path
+ │ │ + getMinecraftJar(...) │
+ │ └─────────────────────────────┘
+ │
+ └────────────────────────────────────────────────────────┘
+
+ ┌─────────────────────────────────────────────────────┐
+ │ Main calls Bootstrap.bootstrap(...) │
+ └─────────────────────┬───────────────────────────────┘
+ │
+ ▼
+ ┌────────────────────────────┐
+ │ Bootstrap │
+ ├────────────────────────────┤
+ │ - (no fields) │
+ ├────────────────────────────┤
+ │ + bootstrap(jvmArgs,...): │
+ │ void (static throws) │
+ └────┬───────────────────────┘
+ │ invokes
+ ▼
+ ┌─────────────────────────────────┐
+ │ ModuleUtil │
+ ├─────────────────────────────────┤
+ │ - (version-specific) │
+ ├─────────────────────────────────┤
+ │ + addModules(path): void │
+ │ + addExports(list): void │
+ │ + addOpens(list): void │
+ │ + setupClassPath(...): void │
+ │ + setupBootstrapLauncher(...): │
+ │ Class<?> │
+ │ + getPlatformClassLoader(): │
+ │ ClassLoader │
+ └─────────────────────────────────┘
+ (Java 8: all no-op)
+ (Java 9+: advanced module manipulation)
+
+ ┌──────────────────────────────────────────┐
+ │ URLClassLoader loads Installer via │
+ │ reflection │
+ └──────────────────────────┬───────────────┘
+ │
+ ▼
+ ┌────────────────────────────────────┐
+ │ Installer │
+ ├────────────────────────────────────┤
+ │- wrapper: InstallV1Wrapper (static)│
+ ├────────────────────────────────────┤
+ │+ getData(File): Map │
+ │+ install(File,File,File): boolean │
+ │- getWrapper(File):InstallV1Wrapper │
+ └────────────┬─────────────────────┬─┘
+ │ │
+ ┌────────────────┘ └────────────┐
+ │ │
+ ▼ ▼
+ ┌────────────────────────────────┐ ┌──────────────────────────────┐
+ │ InstallV1Wrapper │ │ Version0 │
+ │ extends InstallV1 │ │ extends Version │
+ ├────────────────────────────────┤ ├──────────────────────────────┤
+ │- processors: Map<...> │ │- mainClass: String │
+ │- librariesDir: File │ │- arguments: Arguments │
+ ├────────────────────────────────┤ ├──────────────────────────────┤
+ │+ getProcessors(side): List │ │+ getMainClass(): String │
+ │- checkProcessorFiles(...) │ │+ getArguments(): Arguments │
+ │- setOutputs(...) │ │+ loadVersion(...):Version0 │
+ └────────────────────────────────┘ └──────────────────────────────┘
+ │
+ │ contains
+ ▼
+ ┌──────────────────────┐
+ │ Arguments │
+ ├──────────────────────┤
+ │- jvm: String[] │
+ ├──────────────────────┤
+ │+ getJvm(): String[] │
+ └──────────────────────┘
+```
+
+## Reflection-Based Architecture
+
+ForgeWrapper uses reflection extensively to provide a flexible, decoupled architecture. Here are key reflection patterns:
+
+### Pattern 1: Dynamic Class Loading (Main.java lines 42-43)
+
+```java
+Class<?> installer = ucl.loadClass("io.github.zekerzhayard.forgewrapper.installer.Installer");
+```
+
+**Why**: Avoids compile-time dependency on installer JAR. The Installer class is only loaded from the custom classloader that includes the installer JAR.
+
+**Benefits**:
+- ForgeWrapper source doesn't need Installer on buildpath
+- Different Installer versions can be used without rebuilding ForgeWrapper
+- Decouples ForgeWrapper versioning from Forge versioning
+
+### Pattern 2: Method Invocation via Reflection (Main.java lines 43-44)
+
+```java
+Map<String, Object> data = (Map<String, Object>) installer
+ .getMethod("getData", File.class)
+ .invoke(null, detector.getLibraryDir().toFile());
+```
+
+**Why**: Main.java doesn't have the Installer class available at compile time.
+
+**Mechanics**:
+- `getMethod("getData", File.class)` finds method that takes File parameter
+- `.invoke(null, ...)` invokes it (null = static method, no instance needed)
+- Result cast to Map<String, Object>
+
+### Pattern 3: Unsafe Field Access (jigsaw ModuleUtil.java lines 33-48)
+
+```java
+Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
+unsafeField.setAccessible(true);
+Unsafe unsafe = (Unsafe) unsafeField.get(null);
+
+Field implLookupField = MethodHandles.Lookup.class.getDeclaredField("IMPL_LOOKUP");
+return (MethodHandles.Lookup) unsafe.getObject(
+ unsafe.staticFieldBase(implLookupField),
+ unsafe.staticFieldOffset(implLookupField)
+);
+```
+
+**Why**: Need access to internal `IMPL_LOOKUP` field that's normally inaccessible.
+
+**What It Does**: Uses Unsafe to reflectively read a private static field from MethodHandles.Lookup, obtaining the internal "implementation lookup" that has special JDK permissions.
+
+**Risk**: Relies on internal JDK implementation; may break in future Java versions.
+
+### Pattern 4: Method Handle Creation (TypeToAdd enum constructors)
+
+```java
+this.implAddMH = IMPL_LOOKUP.findVirtual(Module.class, "implAddExports",
+ MethodType.methodType(void.class, String.class, Module.class));
+```
+
+**Why**: Method handles provide high-performance reflection; used for repeated invocations.
+
+**Benefit**: Once created, method handles are cached. Repeated calls are very fast.
+
+### Pattern 5: Field Reflection for Modification (Installer.java lines 185-192)
+
+```java
+private static Field outputsField;
+private static void setOutputs(Processor processor, Map<String, String> outputs) {
+ try {
+ if (outputsField == null) {
+ outputsField = Processor.class.getDeclaredField("outputs");
+ outputsField.setAccessible(true);
+ }
+ outputsField.set(processor, outputs);
+ } catch (Throwable t) {
+ throw new RuntimeException(t);
+ }
+}
+```
+
+**Why**: Need to modify private field `outputs` on Processor objects.
+
+**Pattern**: Lazy-initialize field reference, cache it, reuse for all modifications.
+
+This comprehensive architectural documentation provides deep understanding of ForgeWrapper's design, pattern usage, and implementation strategy.
+```
+
diff --git a/docs/handbook/forgewrapper/building.md b/docs/handbook/forgewrapper/building.md
new file mode 100644
index 0000000000..84fa3bb1ec
--- /dev/null
+++ b/docs/handbook/forgewrapper/building.md
@@ -0,0 +1,1843 @@
+# ForgeWrapper — Building & Gradle Build System Reference
+
+This document provides a comprehensive, line-by-line analysis of the ForgeWrapper
+build system. Every statement references actual source code from the Gradle build
+files. Variable names, values, and configuration blocks are taken directly from
+the project.
+
+---
+
+## Table of Contents
+
+1. [Project Overview](#1-project-overview)
+2. [Multi-Project Structure](#2-multi-project-structure)
+3. [settings.gradle Analysis](#3-settingsgradle-analysis)
+4. [gradle.properties Analysis](#4-gradleproperties-analysis)
+5. [Gradle Wrapper Configuration](#5-gradle-wrapper-configuration)
+6. [Root build.gradle — Complete Line-by-Line Analysis](#6-root-buildgradle--complete-line-by-line-analysis)
+ - 6.1 [Imports](#61-imports)
+ - 6.2 [Plugins Block](#62-plugins-block)
+ - 6.3 [Java Source/Target Compatibility](#63-java-sourcetarget-compatibility)
+ - 6.4 [Version, Group, and Archives Base Name](#64-version-group-and-archives-base-name)
+ - 6.5 [The multirelase Configuration](#65-the-multirelase-configuration)
+ - 6.6 [Repositories](#66-repositories)
+ - 6.7 [Dependencies](#67-dependencies)
+ - 6.8 [Sources JAR](#68-sources-jar)
+ - 6.9 [JAR Manifest Attributes](#69-jar-manifest-attributes)
+ - 6.10 [Multi-Release JAR Packing](#610-multi-release-jar-packing)
+ - 6.11 [Publishing Configuration](#611-publishing-configuration)
+ - 6.12 [getVersionSuffix()](#612-getversionsuffix)
+7. [Jigsaw Subproject build.gradle — Complete Analysis](#7-jigsaw-subproject-buildgradle--complete-analysis)
+ - 7.1 [Plugins](#71-plugins)
+ - 7.2 [Java 9 Toolchain Auto-Detection](#72-java-9-toolchain-auto-detection)
+ - 7.3 [JVM Version Attribute Override](#73-jvm-version-attribute-override)
+8. [Multi-Release JAR — Deep Dive](#8-multi-release-jar--deep-dive)
+9. [Build Pipeline Diagram](#9-build-pipeline-diagram)
+10. [Build Targets and Tasks](#10-build-targets-and-tasks)
+11. [Step-by-Step Build Guide](#11-step-by-step-build-guide)
+12. [CI/CD Integration](#12-cicd-integration)
+13. [Artifact Output Structure](#13-artifact-output-structure)
+14. [Publishing to Local Maven Repository](#14-publishing-to-local-maven-repository)
+15. [Java Version Requirements](#15-java-version-requirements)
+16. [Source File Layout and the Dual-ModuleUtil Pattern](#16-source-file-layout-and-the-dual-moduleutil-pattern)
+17. [Troubleshooting](#17-troubleshooting)
+18. [Quick Reference Card](#18-quick-reference-card)
+
+---
+
+## 1. Project Overview
+
+ForgeWrapper is a Java library that allows third-party Minecraft launchers
+(originally MultiMC, now adopted more broadly) to launch Minecraft 1.13+ with
+Forge and NeoForge. The build system produces a **Multi-Release JAR** (MRJAR)
+that contains:
+
+- **Java 8 bytecode** in the standard class path for maximum compatibility.
+- **Java 9+ bytecode** under `META-INF/versions/9/` for environments running on
+ the Java Platform Module System (JPMS / Project Jigsaw).
+
+The Gradle build is a **multi-project build** consisting of:
+
+| Project | Directory | Java Target | Purpose |
+|------------|-------------------|-------------|---------------------------------|
+| Root | `forgewrapper/` | Java 8 | Main ForgeWrapper library |
+| `jigsaw` | `forgewrapper/jigsaw/` | Java 9 | JPMS-aware `ModuleUtil` overlay |
+
+The root project's JAR task assembles both outputs into a single artifact with
+`Multi-Release: true` in its manifest.
+
+---
+
+## 2. Multi-Project Structure
+
+The ForgeWrapper build is organized as a Gradle multi-project build. The
+directory tree relevant to the build system is:
+
+```
+forgewrapper/
+├── build.gradle ← Root project build script
+├── settings.gradle ← Declares root name + subprojects
+├── gradle.properties ← Project-wide properties
+├── gradlew ← Gradle wrapper (Linux/macOS)
+├── gradlew.bat ← Gradle wrapper (Windows)
+├── gradle/
+│ └── wrapper/
+│ └── gradle-wrapper.properties ← Wrapper distribution URL + version
+├── src/
+│ └── main/
+│ └── java/
+│ └── io/github/zekerzhayard/forgewrapper/installer/
+│ ├── Bootstrap.java
+│ ├── Installer.java
+│ ├── Main.java
+│ ├── detector/
+│ │ ├── DetectorLoader.java
+│ │ ├── IFileDetector.java
+│ │ └── MultiMCFileDetector.java
+│ └── util/
+│ └── ModuleUtil.java ← Java 8 stub (no-op methods)
+├── jigsaw/
+│ ├── build.gradle ← Subproject build script
+│ └── src/
+│ └── main/
+│ └── java/
+│ └── io/github/zekerzhayard/forgewrapper/installer/
+│ └── util/
+│ └── ModuleUtil.java ← Java 9 full implementation
+└── build/ ← Generated outputs (after build)
+ ├── libs/
+ │ ├── ForgeWrapper-<version>.jar
+ │ └── ForgeWrapper-<version>-sources.jar
+ └── maven/ ← Local maven publish target
+```
+
+The root project compiles all code under `src/` with Java 8. The `jigsaw`
+subproject compiles its own `ModuleUtil.java` with Java 9. At JAR assembly time,
+the jigsaw output is injected into `META-INF/versions/9/` inside the root JAR,
+creating the Multi-Release JAR.
+
+---
+
+## 3. settings.gradle Analysis
+
+**File: `forgewrapper/settings.gradle`** (2 lines of active content)
+
+```groovy
+rootProject.name = 'ForgeWrapper'
+
+include 'jigsaw'
+```
+
+### Line-by-line:
+
+**`rootProject.name = 'ForgeWrapper'`**
+
+Sets the human-readable name of the root Gradle project to `ForgeWrapper`. This
+name is used as:
+
+- The default `archivesBaseName` (overridden in `build.gradle` to
+ `rootProject.name`, which resolves to the same value `ForgeWrapper`).
+- The `Specification-Title` and `Implementation-Title` in the JAR manifest.
+- The base filename for produced artifacts: `ForgeWrapper-<version>.jar`.
+
+**`include 'jigsaw'`**
+
+Includes the subdirectory `jigsaw/` as a Gradle subproject. Gradle will look for
+`jigsaw/build.gradle` and compile it as a separate project within the same build.
+This subproject is referenced in the root `build.gradle` via
+`project(":jigsaw")` in the `multirelase` dependency declaration.
+
+The relationship between settings.gradle and the two build.gradle files:
+
+```
+settings.gradle
+ │
+ ├── rootProject.name = 'ForgeWrapper'
+ │ ↓
+ │ build.gradle (root)
+ │ archivesBaseName = rootProject.name → "ForgeWrapper"
+ │ dependencies { multirelase project(":jigsaw") }
+ │ │
+ └── include 'jigsaw' │
+ ↓ ↓
+ jigsaw/build.gradle ←─────── resolved here
+```
+
+---
+
+## 4. gradle.properties Analysis
+
+**File: `forgewrapper/gradle.properties`** (2 active properties)
+
+```properties
+org.gradle.daemon = false
+
+fw_version = projt
+```
+
+### `org.gradle.daemon = false`
+
+Disables the Gradle daemon for this project. Normally Gradle keeps a long-lived
+JVM process (the daemon) running between builds to speed up subsequent
+invocations. Setting this to `false` means every `./gradlew` invocation starts a
+fresh JVM. This is typical for:
+
+- **CI/CD environments** where daemon state can cause flaky builds.
+- **Projects with infrequent builds** where the daemon would just consume memory.
+- **Reproducibility** — no cached daemon state between runs.
+
+### `fw_version = projt`
+
+Defines the base version string for ForgeWrapper as the literal value `projt`.
+This property is referenced in `build.gradle` via string interpolation:
+
+```groovy
+version = "${fw_version}${-> getVersionSuffix()}"
+```
+
+The `${fw_version}` is replaced with `projt` from `gradle.properties`, and then
+the lazy GString closure `${-> getVersionSuffix()}` appends a suffix. The final
+version string will be one of:
+
+| Environment | Resulting Version |
+|--------------------------------------|-------------------------------|
+| Local development | `projt-LOCAL` |
+| GitHub Actions CI | `projt-2026-04-05` |
+| IS_PUBLICATION env var set | `projt-2026-04-05` |
+
+The `fw_version` property is available to all projects in the multi-project
+build because `gradle.properties` in the root directory is automatically loaded
+for all subprojects.
+
+---
+
+## 5. Gradle Wrapper Configuration
+
+**File: `forgewrapper/gradle/wrapper/gradle-wrapper.properties`**
+
+```properties
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-all.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+```
+
+### Property-by-property:
+
+**`distributionBase=GRADLE_USER_HOME`**
+
+The base directory where the downloaded Gradle distribution will be stored.
+`GRADLE_USER_HOME` defaults to `~/.gradle` on Linux/macOS and
+`%USERPROFILE%\.gradle` on Windows.
+
+**`distributionPath=wrapper/dists`**
+
+Relative path under `distributionBase` where distributions are unpacked. The
+full absolute path becomes `~/.gradle/wrapper/dists/`.
+
+**`distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-all.zip`**
+
+The URL from which Gradle **7.3.3** is downloaded. Key observations:
+
+- **Version 7.3.3** — Released December 2021. This is a Gradle 7.x release that
+ supports Java toolchains (used in the jigsaw subproject) and is compatible
+ with both Java 8 and Java 17+.
+- **`-all` variant** — Includes Gradle source code and documentation, not just
+ the binaries (`-bin`). This allows IDEs to show Gradle DSL documentation and
+ enables source-level debugging of Gradle itself.
+- The `\:` escape is standard `.properties` file syntax for a literal colon.
+
+**`zipStoreBase=GRADLE_USER_HOME`** and **`zipStorePath=wrapper/dists`**
+
+Where the downloaded ZIP file itself is cached before extraction. Same base
+directory as the unpacked distribution.
+
+### Gradle Version Compatibility Matrix
+
+| Feature Used in Build | Minimum Gradle Version | Actual |
+|--------------------------------|------------------------|--------|
+| `java` plugin | 1.0 | 7.3.3 |
+| `maven-publish` plugin | 1.3 | 7.3.3 |
+| `javaToolchains.compilerFor` | 6.7 | 7.3.3 |
+| `layout.buildDirectory.dir()` | 7.1 | 7.3.3 |
+| `TargetJvmVersion` attribute | 6.0 | 7.3.3 |
+
+All features used in the build are supported by Gradle 7.3.3.
+
+---
+
+## 6. Root build.gradle — Complete Line-by-Line Analysis
+
+**File: `forgewrapper/build.gradle`** (90 lines)
+
+This is the primary build script. We will examine every section.
+
+### 6.1 Imports
+
+```groovy
+import java.text.SimpleDateFormat
+```
+
+Line 1 imports `SimpleDateFormat`, used in the `getVersionSuffix()` method at
+line 89 to format the current date as `-yyyy-MM-dd` (e.g., `-2026-04-05`).
+This import is at the top of the script, outside any block, making it available
+to all closures in the file.
+
+### 6.2 Plugins Block
+
+```groovy
+plugins {
+ id "java"
+ id "eclipse"
+ id "maven-publish"
+}
+```
+
+Lines 3–7: Three plugins are applied.
+
+**`java`** — The core Java compilation plugin. It provides:
+- `compileJava` task — compiles `src/main/java/**/*.java`.
+- `jar` task — packages compiled classes into a JAR.
+- `sourceSets` — the `main` and `test` source sets.
+- `sourceCompatibility` / `targetCompatibility` — Java version settings.
+- `configurations` — `implementation`, `compileOnly`, `runtimeOnly`, etc.
+- The `components.java` software component used by `maven-publish`.
+
+**`eclipse`** — Generates Eclipse IDE project files (`.project`, `.classpath`,
+`.settings/`). Allows developers to run `./gradlew eclipse` to import into
+Eclipse. This does not affect the build output.
+
+**`maven-publish`** — Enables publishing artifacts to Maven repositories. It
+provides:
+- The `publishing { }` DSL block.
+- The `publish` task (and per-repository `publishToMavenLocal`, etc.).
+- `MavenPublication` type for defining what gets published.
+
+### 6.3 Java Source/Target Compatibility
+
+```groovy
+sourceCompatibility = targetCompatibility = 1.8
+compileJava {
+ sourceCompatibility = targetCompatibility = 1.8
+}
+```
+
+Lines 9–12: Java 8 compatibility is set **twice** — at the project level and
+inside the `compileJava` task configuration. This double declaration ensures the
+setting is applied regardless of evaluation order.
+
+- **`sourceCompatibility = 1.8`** — The Java source code must be valid Java 8
+ syntax. The compiler will reject Java 9+ language features (e.g., `var`,
+ private interface methods).
+- **`targetCompatibility = 1.8`** — The emitted bytecode targets Java 8 (class
+ file version 52.0). JVMs older than Java 8 will refuse to load these classes.
+
+This is critical because ForgeWrapper must run on Minecraft launchers that may
+still use Java 8. The `ModuleUtil.java` in the root source tree contains no-op
+stubs precisely because Java 8 has no `java.lang.module` API:
+
+```java
+// forgewrapper/src/main/java/.../util/ModuleUtil.java
+public class ModuleUtil {
+ public static void addModules(String modulePath) {
+ // nothing to do with Java 8
+ }
+ public static void addExports(List<String> exports) {
+ // nothing to do with Java 8
+ }
+ public static void addOpens(List<String> opens) {
+ // nothing to do with Java 8
+ }
+ // ...
+}
+```
+
+### 6.4 Version, Group, and Archives Base Name
+
+```groovy
+version = "${fw_version}${-> getVersionSuffix()}"
+group = "io.github.zekerzhayard"
+archivesBaseName = rootProject.name
+```
+
+Lines 14–16: Maven coordinates and artifact naming.
+
+**`version`** — A Groovy GString with a lazy evaluation closure. The `fw_version`
+from `gradle.properties` is resolved immediately to `projt`. The closure
+`${-> getVersionSuffix()}` is evaluated lazily — only when the GString is
+converted to a String (at task execution time, not configuration time). This
+matters because `getVersionSuffix()` calls `System.getenv()` and `new Date()`,
+which should reflect the actual build time, not the configuration phase time.
+
+**`group = "io.github.zekerzhayard"`** — The Maven `groupId`. Follows the
+reverse-domain convention for the project's GitHub namespace
+(`github.com/ZekerZhayard`).
+
+**`archivesBaseName = rootProject.name`** — Set to `ForgeWrapper` (from
+`settings.gradle`). This is the filename prefix for all produced JARs:
+`ForgeWrapper-projt-LOCAL.jar`.
+
+### 6.5 The multirelase Configuration
+
+```groovy
+configurations {
+ multirelase {
+ implementation.extendsFrom multirelase
+ }
+}
+```
+
+Lines 18–22: This block creates a **custom Gradle configuration** named
+`multirelase` (note: this is intentionally or accidentally spelled without the
+second "e" — it is not `multirelease`).
+
+The statement `implementation.extendsFrom multirelase` establishes a dependency
+inheritance chain: everything in the `multirelase` configuration is also visible
+to the `implementation` configuration. This means:
+
+1. When `multirelase project(":jigsaw")` is declared (line 38), the jigsaw
+ subproject's classes are available on the root project's compile classpath.
+2. The `multirelase` configuration is also used separately in the `jar` block
+ (line 60) to extract the jigsaw classes and pack them into
+ `META-INF/versions/9/`.
+
+The configuration flow:
+
+```
+multirelase config
+ │
+ ├──→ implementation config (via extendsFrom)
+ │ → compileJava can see jigsaw classes
+ │
+ └──→ jar task (via configurations.multirelase.files)
+ → copies jigsaw classes into META-INF/versions/9/
+```
+
+This dual use is the heart of the Multi-Release JAR mechanism. The root source
+code can reference `ModuleUtil` (which it does — in `Bootstrap.java` and
+`Main.java`), and at compile time Gradle resolves the Java 8 stubs from the root
+`src/`. At runtime, the JVM selects the appropriate `ModuleUtil` class: the Java
+8 stub from the main class path on Java 8 JVMs, or the Java 9 implementation
+from `META-INF/versions/9/` on Java 9+ JVMs.
+
+### 6.6 Repositories
+
+```groovy
+repositories {
+ mavenCentral()
+ maven {
+ name = "forge"
+ url = "https://maven.minecraftforge.net/"
+ }
+}
+```
+
+Lines 24–29: Two Maven repositories are declared.
+
+**`mavenCentral()`** — Maven Central Repository (`https://repo.maven.apache.org/maven2/`).
+Used to resolve:
+- `com.google.code.gson:gson:2.8.7`
+- `net.sf.jopt-simple:jopt-simple:5.0.4`
+
+**Forge Maven** (`https://maven.minecraftforge.net/`) — MinecraftForge's official
+Maven repository. Named `"forge"` for logging clarity. Used to resolve:
+- `cpw.mods:modlauncher:8.0.9`
+- `net.minecraftforge:installer:2.2.7`
+
+Note: The repository named `"forge"` is a Gradle naming convention for
+readability; the name does not affect dependency resolution behavior.
+
+### 6.7 Dependencies
+
+```groovy
+dependencies {
+ compileOnly "com.google.code.gson:gson:2.8.7"
+ compileOnly "cpw.mods:modlauncher:8.0.9"
+ compileOnly "net.minecraftforge:installer:2.2.7"
+ compileOnly "net.sf.jopt-simple:jopt-simple:5.0.4"
+
+ multirelase project(":jigsaw")
+}
+```
+
+Lines 31–38: All external dependencies use the `compileOnly` scope.
+
+#### Why `compileOnly`?
+
+`compileOnly` means the dependency is available at compile time but is **not**
+included in the runtime classpath or the published POM. ForgeWrapper is loaded
+into an environment (the Minecraft launcher) that already provides these
+libraries. Bundling them would cause version conflicts and inflate the JAR.
+
+#### Dependency Analysis
+
+**`com.google.code.gson:gson:2.8.7`**
+
+- **What**: Google's JSON serialization/deserialization library.
+- **Why compileOnly**: The Minecraft launcher and Forge installer both bundle
+ Gson. Including it again would create version conflicts.
+- **Classes used**: `com.google.gson.Gson`, `com.google.gson.JsonObject`, etc.
+ Used in `Installer.java` to parse the Forge installer's JSON metadata.
+
+**`cpw.mods:modlauncher:8.0.9`**
+
+- **What**: The Forge ModLauncher framework by cpw (ChickenPatches Warrior).
+ Manages mod loading, class transformation, and game launch orchestration.
+- **Why compileOnly**: ModLauncher is part of the Forge runtime. ForgeWrapper
+ interacts with its API to set up the launch environment but doesn't ship it.
+- **Classes used**: Launch target interfaces and transformation services.
+- **Version 8.0.9**: Targets Forge for Minecraft 1.16.x–1.17.x era.
+
+**`net.minecraftforge:installer:2.2.7`**
+
+- **What**: The Forge installer library. Contains code for downloading,
+ extracting, and setting up Forge libraries.
+- **Why compileOnly**: ForgeWrapper loads the installer JAR at runtime via a
+ `URLClassLoader` (see `Main.java` lines 48–50). It does not bundle it.
+- **Classes used**: Invoked reflectively — `installer.getMethod("getData", ...)`
+ and `installer.getMethod("install", ...)` in `Main.java`.
+
+**`net.sf.jopt-simple:jopt-simple:5.0.4`**
+
+- **What**: A command-line option parser for Java.
+- **Why compileOnly**: Provided by the launcher environment (Forge uses it for
+ argument parsing). ForgeWrapper reads parsed arguments but doesn't need its
+ own copy.
+- **Classes used**: `OptionParser`, `OptionSet` — for parsing launch arguments
+ like `--fml.mcVersion`, `--fml.forgeVersion`, `--fml.neoForgeVersion`.
+
+**`multirelase project(":jigsaw")`**
+
+- **What**: A project dependency on the `jigsaw` subproject.
+- **Scope**: The custom `multirelase` configuration (not `compileOnly` or
+ `implementation` directly). Since `implementation.extendsFrom multirelase`,
+ the jigsaw classes are on the compile classpath.
+- **Purpose**: The jigsaw project produces a JAR with Java 9 bytecode. This JAR
+ is consumed by the root project's `jar` task and unpacked into
+ `META-INF/versions/9/`.
+
+### 6.8 Sources JAR
+
+```groovy
+java {
+ withSourcesJar()
+}
+```
+
+Lines 40–42: Registers a `sourcesJar` task that produces
+`ForgeWrapper-<version>-sources.jar` containing all `.java` files from
+`src/main/java`. This artifact is included in Maven publications alongside the
+main JAR, enabling downstream users and IDEs to access source code for debugging.
+
+### 6.9 JAR Manifest Attributes
+
+```groovy
+jar {
+ manifest.attributes([
+ "Specification-Title": "${project.name}",
+ "Specification-Vendor": "ZekerZhayard",
+ "Specification-Version": "${project.version}".split("-")[0],
+ "Implementation-Title": "${project.name}",
+ "Implementation-Version": "${project.version}",
+ "Implementation-Vendor" :"ZekerZhayard",
+ "Implementation-Timestamp": new Date().format("yyyy-MM-dd'T'HH:mm:ssZ"),
+ "Automatic-Module-Name": "${project.group}.${project.archivesBaseName}".toString().toLowerCase(),
+ "Multi-Release": "true",
+ "GitCommit": String.valueOf(System.getenv("GITHUB_SHA"))
+ ])
+```
+
+Lines 44–56: The `jar` task configures `META-INF/MANIFEST.MF` with these
+attributes. Each one explained:
+
+**`Specification-Title: ForgeWrapper`**
+
+Identifies the specification this JAR implements. Set to `project.name` which
+resolves to `ForgeWrapper`. Part of the JAR Specification versioning convention
+defined in `java.lang.Package`.
+
+**`Specification-Vendor: ZekerZhayard`**
+
+The organization or individual that maintains the specification. Hardcoded to
+the project author's GitHub handle.
+
+**`Specification-Version: projt`**
+
+The spec version. Note the expression `"${project.version}".split("-")[0]` —
+this takes the full version string (e.g., `projt-LOCAL` or `projt-2026-04-05`)
+and splits on `-`, taking only the first element: `projt`. This gives a stable
+specification version irrespective of the build suffix.
+
+**`Implementation-Title: ForgeWrapper`**
+
+Same as `Specification-Title`. Identifies this particular implementation.
+
+**`Implementation-Version: projt-LOCAL`** (or `projt-2026-04-05`)
+
+The full version string including the suffix. Unlike `Specification-Version`,
+this captures the exact build variant.
+
+**`Implementation-Vendor: ZekerZhayard`**
+
+The vendor of this implementation. Same as `Specification-Vendor`.
+
+**`Implementation-Timestamp: 2026-04-05T14:30:00+0000`** (example)
+
+The build timestamp in ISO 8601 format. Generated by `new Date().format(...)`.
+The format pattern `yyyy-MM-dd'T'HH:mm:ssZ` produces:
+- `yyyy` — 4-digit year
+- `MM` — 2-digit month
+- `dd` — 2-digit day
+- `'T'` — literal T separator
+- `HH:mm:ss` — 24-hour time
+- `Z` — timezone offset (e.g., `+0000`)
+
+**`Automatic-Module-Name: io.github.zekerzhayard.forgewrapper`**
+
+The JPMS module name for this JAR when it is placed on the module path. The
+expression `"${project.group}.${project.archivesBaseName}".toString().toLowerCase()`
+concatenates `io.github.zekerzhayard` + `.` + `ForgeWrapper`, converts to
+lowercase: `io.github.zekerzhayard.forgewrapper`. This allows other JPMS modules
+to `requires io.github.zekerzhayard.forgewrapper;`.
+
+**`Multi-Release: true`**
+
+The critical attribute. Tells the JVM (Java 9+) that this JAR contains
+version-specific class files under `META-INF/versions/<N>/`. Without this
+attribute, the JVM ignores the `META-INF/versions/` directory entirely. Defined
+in JEP 238 (Multi-Release JAR Files).
+
+**`GitCommit: <sha>`** (or `null`)
+
+Captures the Git commit SHA from the `GITHUB_SHA` environment variable.
+`String.valueOf(System.getenv("GITHUB_SHA"))` returns:
+- The 40-character SHA hex string when built in GitHub Actions.
+- The string `"null"` when built locally (since `getenv()` returns Java `null`,
+ and `String.valueOf(null)` returns the string `"null"`).
+
+This attribute enables tracing a built artifact back to its exact source commit.
+
+### 6.10 Multi-Release JAR Packing
+
+```groovy
+ into "META-INF/versions/9", {
+ from configurations.multirelase.files.collect {
+ zipTree(it)
+ }
+ exclude "META-INF/**"
+ }
+}
+```
+
+Lines 58–63: This block within the `jar { }` closure is the mechanism that
+creates the Multi-Release JAR. Detailed breakdown:
+
+**`into "META-INF/versions/9"`** — All files produced by this `from` block are
+placed inside the `META-INF/versions/9/` directory within the JAR. This is the
+Java 9 version overlay directory per JEP 238.
+
+**`configurations.multirelase.files`** — Resolves the `multirelase` configuration,
+which contains `project(":jigsaw")`. This evaluates to the jigsaw subproject's
+JAR file (e.g., `jigsaw/build/libs/jigsaw.jar`).
+
+**`.collect { zipTree(it) }`** — For each file in the resolved configuration
+(there's exactly one: the jigsaw JAR), `zipTree()` treats it as a ZIP archive
+and returns a file tree of its contents. This effectively "explodes" the jigsaw
+JAR.
+
+**`exclude "META-INF/**"`** — Excludes the jigsaw JAR's own `META-INF/`
+directory (which contains its own `MANIFEST.MF`). Only actual class files are
+copied. This prevents nested/conflicting manifests.
+
+The net result: the jigsaw subproject's compiled class files (specifically
+`io/github/zekerzhayard/forgewrapper/installer/util/ModuleUtil.class`) are
+placed at:
+
+```
+META-INF/versions/9/io/github/zekerzhayard/forgewrapper/installer/util/ModuleUtil.class
+```
+
+When the JVM is Java 9+, it will load this class **instead of** the Java 8 stub
+at:
+
+```
+io/github/zekerzhayard/forgewrapper/installer/util/ModuleUtil.class
+```
+
+### 6.11 Publishing Configuration
+
+```groovy
+publishing {
+ publications {
+ maven(MavenPublication) {
+ groupId "${project.group}"
+ artifactId "${project.archivesBaseName}"
+ version "${project.version}"
+
+ from components.java
+ }
+ }
+ repositories {
+ maven {
+ url = layout.buildDirectory.dir("maven")
+ }
+ }
+}
+tasks.publish.dependsOn build
+```
+
+Lines 65–82: Maven publishing setup.
+
+**Publication: `maven(MavenPublication)`**
+
+Defines a Maven publication with coordinates:
+- `groupId` = `io.github.zekerzhayard`
+- `artifactId` = `ForgeWrapper`
+- `version` = `projt-LOCAL` (or dated variant)
+
+**`from components.java`** — Publishes the Java component, which includes:
+- The main JAR (`ForgeWrapper-<version>.jar`)
+- The sources JAR (`ForgeWrapper-<version>-sources.jar`)
+- A generated POM file with dependency metadata
+
+**Repository target:**
+
+```groovy
+url = layout.buildDirectory.dir("maven")
+```
+
+Publishes to a local directory inside the build output:
+`forgewrapper/build/maven/`. This is **not** Maven Central or any remote
+repository. The published artifacts end up at:
+
+```
+build/maven/io/github/zekerzhayard/ForgeWrapper/<version>/
+├── ForgeWrapper-<version>.jar
+├── ForgeWrapper-<version>-sources.jar
+├── ForgeWrapper-<version>.pom
+├── ForgeWrapper-<version>.module
+└── (checksums: .md5, .sha1, .sha256, .sha512)
+```
+
+**`tasks.publish.dependsOn build`** — Ensures the project is fully built before
+publishing. Without this, Gradle could attempt to publish before the JAR is
+assembled.
+
+### 6.12 getVersionSuffix()
+
+```groovy
+static String getVersionSuffix() {
+ if (System.getenv("IS_PUBLICATION") != null || System.getenv("GITHUB_ACTIONS") == "true")
+ return new SimpleDateFormat("-yyyy-MM-dd").format(new Date())
+
+ return "-LOCAL"
+}
+```
+
+Lines 84–89: A static method that determines the version suffix.
+
+**Logic:**
+
+1. If the environment variable `IS_PUBLICATION` is set (to any value, including
+ empty string — `!= null` is the check), append a date suffix.
+2. **OR** if the environment variable `GITHUB_ACTIONS` equals the string
+ `"true"` (which GitHub Actions always sets), append a date suffix.
+3. Otherwise, append `-LOCAL`.
+
+**Important Groovy/Java note:** The `==` comparison with `System.getenv()` uses
+Java `String.equals()` semantics in Groovy when comparing strings. However,
+there is a subtle behavior: `System.getenv("GITHUB_ACTIONS")` returns `null`
+when not in GitHub Actions, and `null == "true"` evaluates to `false` in Groovy
+(no NPE), which is the desired behavior.
+
+**Date format:** `SimpleDateFormat("-yyyy-MM-dd")` produces strings like
+`-2026-04-05`. The leading `-` is part of the pattern, so the result
+includes the hyphen separator.
+
+**Version assembly flow:**
+
+```
+gradle.properties getVersionSuffix() Final version
+───────────────── ────────────────── ─────────────
+fw_version = projt + "-LOCAL" → "projt-LOCAL"
+fw_version = projt + "-2026-04-05" → "projt-2026-04-05"
+```
+
+---
+
+## 7. Jigsaw Subproject build.gradle — Complete Analysis
+
+**File: `forgewrapper/jigsaw/build.gradle`** (27 lines)
+
+This subproject compiles the Java 9+ version of `ModuleUtil.java`.
+
+### 7.1 Plugins
+
+```groovy
+plugins {
+ id "java"
+ id "eclipse"
+}
+```
+
+Lines 1–4: Only `java` and `eclipse`. No `maven-publish` — the jigsaw subproject
+is never published independently. Its output is consumed exclusively by the root
+project's `jar` task via the `multirelase` configuration.
+
+### 7.2 Java 9 Toolchain Auto-Detection
+
+```groovy
+compileJava {
+ if (JavaVersion.current() < JavaVersion.VERSION_1_9) {
+ javaCompiler = javaToolchains.compilerFor {
+ languageVersion = JavaLanguageVersion.of(9)
+ }
+ }
+ sourceCompatibility = 9
+ targetCompatibility = 9
+}
+```
+
+Lines 6–14: The compilation block with intelligent Java version detection.
+
+**The `if` check:** `JavaVersion.current()` returns the JVM version running
+Gradle itself. If Gradle is running on Java 8 (which is `< VERSION_1_9`), then
+the `javaCompiler` is overridden using Gradle's **Java Toolchain** feature.
+
+**`javaToolchains.compilerFor { languageVersion = JavaLanguageVersion.of(9) }`**
+
+This tells Gradle: "Find a Java 9 compiler on this system and use it for this
+compilation task." Gradle will search:
+1. JDK installations registered in the toolchain registry.
+2. Standard JDK installation directories (`/usr/lib/jvm/`, etc.).
+3. JDKs managed by tools like SDKMAN!, jabba, or Gradle's auto-provisioning.
+
+If no Java 9+ JDK is found, the build fails with an error.
+
+**If Gradle is already running on Java 9+**, the `if` block is skipped entirely.
+The current JVM's compiler is used, with `sourceCompatibility = 9` and
+`targetCompatibility = 9` ensuring Java 9 bytecode is produced.
+
+**`sourceCompatibility = 9`** — Accept Java 9 language features (modules,
+private interface methods, etc.). The jigsaw `ModuleUtil.java` uses:
+- `java.lang.module.Configuration`
+- `java.lang.module.ModuleFinder`
+- `java.lang.module.ModuleReference`
+- `java.lang.module.ResolvedModule`
+- `ModuleLayer.boot()`
+- `List.of()`
+- `ClassLoader.getPlatformClassLoader()`
+
+These APIs do not exist in Java 8, which is why this code cannot be in the root
+project.
+
+**`targetCompatibility = 9`** — Emit class file version 53.0 (Java 9). The JVM
+only loads classes from `META-INF/versions/9/` if they are version 53.0 or
+higher.
+
+### 7.3 JVM Version Attribute Override
+
+```groovy
+configurations {
+ apiElements {
+ attributes {
+ attribute TargetJvmVersion.TARGET_JVM_VERSION_ATTRIBUTE, 8
+ }
+ }
+ runtimeElements {
+ attributes {
+ attribute TargetJvmVersion.TARGET_JVM_VERSION_ATTRIBUTE, 8
+ }
+ }
+}
+```
+
+Lines 16–27: This is a subtle but critical configuration.
+
+**Problem:** The jigsaw subproject compiles with `targetCompatibility = 9`, so
+Gradle automatically sets the `TargetJvmVersion` attribute on its
+`apiElements` and `runtimeElements` configurations to `9`. When the root project
+(which targets Java 8) depends on `project(":jigsaw")`, Gradle's dependency
+resolution would detect a JVM version mismatch and fail:
+
+```
+> Could not resolve project :jigsaw.
+ > Incompatible because this component declares a component compatible
+ with Java 9 and the consumer needed a component compatible with Java 8
+```
+
+**Solution:** Override the `TARGET_JVM_VERSION_ATTRIBUTE` to `8` on both
+outgoing configurations (`apiElements` and `runtimeElements`). This tells
+Gradle: "Even though this project compiles with Java 9, treat its output as
+Java 8-compatible for dependency resolution purposes."
+
+This is safe because:
+1. The jigsaw JAR is **never loaded directly** on Java 8. It is packed into
+ `META-INF/versions/9/` and only loaded by Java 9+ JVMs.
+2. The attribute override only affects Gradle's dependency resolution metadata,
+ not the actual bytecode version.
+
+**`apiElements`** — The configuration used when another project declares a
+`compileOnly` or `api` dependency on this project.
+
+**`runtimeElements`** — The configuration used when another project declares an
+`implementation` or `runtimeOnly` dependency on this project.
+
+Both must be overridden because the root project's `multirelase` configuration
+(which extends `implementation`) resolves through `runtimeElements`.
+
+---
+
+## 8. Multi-Release JAR — Deep Dive
+
+The Multi-Release JAR (MRJAR) is the defining feature of ForgeWrapper's build
+system. Here is the complete picture.
+
+### What is a Multi-Release JAR?
+
+Defined by [JEP 238](https://openjdk.org/jeps/238) and standardized in Java 9,
+a Multi-Release JAR allows a single JAR file to contain multiple versions of
+the same class, each compiled for a different Java version. The JVM
+automatically selects the appropriate version at runtime based on the JVM
+version.
+
+### JAR Internal Structure
+
+```
+ForgeWrapper-projt-LOCAL.jar
+│
+├── META-INF/
+│ ├── MANIFEST.MF
+│ │ ├── Multi-Release: true ← Activates MRJAR behavior
+│ │ ├── Specification-Title: ForgeWrapper
+│ │ ├── Specification-Vendor: ZekerZhayard
+│ │ ├── Specification-Version: projt
+│ │ ├── Implementation-Title: ForgeWrapper
+│ │ ├── Implementation-Version: projt-LOCAL
+│ │ ├── Implementation-Vendor: ZekerZhayard
+│ │ ├── Implementation-Timestamp: 2026-04-05T...
+│ │ ├── Automatic-Module-Name: io.github.zekerzhayard.forgewrapper
+│ │ └── GitCommit: null
+│ │
+│ └── versions/
+│ └── 9/
+│ └── io/github/zekerzhayard/forgewrapper/installer/util/
+│ └── ModuleUtil.class ← Java 9 bytecode (version 53.0)
+│
+├── io/github/zekerzhayard/forgewrapper/installer/
+│ ├── Bootstrap.class ← Java 8 bytecode (version 52.0)
+│ ├── Installer.class
+│ ├── Main.class
+│ ├── detector/
+│ │ ├── DetectorLoader.class
+│ │ ├── IFileDetector.class
+│ │ └── MultiMCFileDetector.class
+│ └── util/
+│ └── ModuleUtil.class ← Java 8 bytecode (no-op stubs)
+```
+
+### Runtime Class Selection
+
+```
+JVM Version Class Loaded for ModuleUtil
+─────────── ──────────────────────────────────────────────────
+Java 8 io/github/.../util/ModuleUtil.class (root, no-ops)
+Java 9 META-INF/versions/9/io/github/.../util/ModuleUtil.class
+Java 10 META-INF/versions/9/io/github/.../util/ModuleUtil.class
+Java 11 META-INF/versions/9/io/github/.../util/ModuleUtil.class
+ ... (same — highest ≤ JVM version wins)
+Java 21 META-INF/versions/9/io/github/.../util/ModuleUtil.class
+```
+
+The JVM always selects the highest versioned class that is ≤ the running JVM
+version. Since ForgeWrapper only has a version 9 overlay, all Java 9+ JVMs
+use the same Java 9 `ModuleUtil`.
+
+### The Dual ModuleUtil Classes
+
+**Root ModuleUtil** (`src/main/java/.../util/ModuleUtil.java`) — 42 lines:
+
+```java
+public class ModuleUtil {
+ public static void addModules(String modulePath) {
+ // nothing to do with Java 8
+ }
+ public static void addExports(List<String> exports) {
+ // nothing to do with Java 8
+ }
+ public static void addOpens(List<String> opens) {
+ // nothing to do with Java 8
+ }
+ public static void setupClassPath(Path libraryDir, List<String> paths)
+ throws Throwable {
+ // Uses URLClassLoader.addURL() via reflection
+ }
+ public static Class<?> setupBootstrapLauncher(Class<?> mainClass) {
+ // nothing to do with Java 8
+ return mainClass;
+ }
+ public static ClassLoader getPlatformClassLoader() {
+ // PlatformClassLoader does not exist in Java 8
+ return null;
+ }
+}
+```
+
+**Jigsaw ModuleUtil** (`jigsaw/src/main/java/.../util/ModuleUtil.java`) — 150+ lines:
+
+The Java 9 version provides actual JPMS integration:
+- `addModules()` — Dynamically adds module paths to the boot module layer at
+ runtime using `sun.misc.Unsafe` to access `MethodHandles.Lookup.IMPL_LOOKUP`
+ and reflectively manipulate the module graph.
+- `addExports()` / `addOpens()` — Adds `--add-exports` / `--add-opens` at
+ runtime via `Module.implAddExports()` and `Module.implAddOpens()`.
+- `setupClassPath()` — Uses the module system's `ModuleLayer` to properly add
+ libraries.
+- `setupBootstrapLauncher()` — Sets up the BootstrapLauncher class introduced
+ in newer Forge versions.
+- `getPlatformClassLoader()` — Returns `ClassLoader.getPlatformClassLoader()`
+ (Java 9+ API, does not exist in Java 8).
+
+---
+
+## 9. Build Pipeline Diagram
+
+### Complete Build Flow
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ ./gradlew build │
+└──────────────────────────────┬──────────────────────────────────┘
+ │
+ ┌────────────────┴────────────────┐
+ ▼ ▼
+┌──────────────────────┐ ┌──────────────────────┐
+│ :compileJava │ │ :jigsaw:compileJava │
+│ (Java 8, v52.0) │ │ (Java 9, v53.0) │
+│ │ │ │
+│ Source: │ │ Source: │
+│ src/main/java/ │ │ jigsaw/src/main/ │
+│ └─ ...ModuleUtil │ │ └─ ...ModuleUtil │
+│ (no-op stubs) │ │ (full JPMS impl)│
+└──────────┬───────────┘ └──────────┬───────────┘
+ │ │
+ ▼ ▼
+┌──────────────────────┐ ┌──────────────────────┐
+│ :processResources │ │ :jigsaw:jar │
+└──────────┬───────────┘ │ → jigsaw.jar │
+ │ └──────────┬───────────┘
+ │ │
+ │ ┌───────────────────────────┘
+ │ │ configurations.multirelase
+ │ │ resolves to jigsaw.jar
+ ▼ ▼
+┌──────────────────────────────────────────────┐
+│ :jar │
+│ │
+│ 1. Pack root classes → / │
+│ 2. zipTree(jigsaw.jar) │
+│ exclude META-INF/** │
+│ → META-INF/versions/9/ │
+│ 3. Generate MANIFEST.MF with attributes │
+│ including Multi-Release: true │
+│ │
+│ Output: build/libs/ForgeWrapper-<ver>.jar │
+└──────────────────────┬───────────────────────┘
+ │
+ ┌────────────┼────────────┐
+ ▼ ▼ ▼
+┌──────────────┐ ┌──────────┐ ┌──────────────┐
+│ :sourcesJar │ │ :check │ │ :assemble │
+│ (sources) │ │ (tests) │ │ (lifecycle) │
+└──────┬───────┘ └────┬─────┘ └──────┬───────┘
+ │ │ │
+ └──────────────┼──────────────┘
+ ▼
+ ┌──────────────────┐
+ │ :build │
+ └──────────────────┘
+```
+
+### Publishing Flow
+
+```
+┌──────────────┐
+│ :build │
+└──────┬───────┘
+ │ tasks.publish.dependsOn build
+ ▼
+┌──────────────────────────────────────────────────────┐
+│ :publish │
+│ │
+│ Publication: maven(MavenPublication) │
+│ groupId: io.github.zekerzhayard │
+│ artifactId: ForgeWrapper │
+│ version: projt-LOCAL (or projt-YYYY-MM-DD) │
+│ │
+│ from components.java: │
+│ ├── ForgeWrapper-<ver>.jar │
+│ └── ForgeWrapper-<ver>-sources.jar │
+│ │
+│ Target: build/maven/ │
+│ └── io/github/zekerzhayard/ForgeWrapper/<ver>/ │
+│ ├── ForgeWrapper-<ver>.jar │
+│ ├── ForgeWrapper-<ver>-sources.jar │
+│ ├── ForgeWrapper-<ver>.pom │
+│ └── ForgeWrapper-<ver>.module │
+└──────────────────────────────────────────────────────┘
+```
+
+### Version Resolution Flow
+
+```
+gradle.properties build.gradle
+┌──────────────────┐ ┌──────────────────────────────────┐
+│ fw_version=projt │─────▶│ version="${fw_version}${->...}" │
+└──────────────────┘ │ │ │
+ │ ▼ │
+ │ getVersionSuffix() │
+ │ │ │
+ │ ├─ IS_PUBLICATION!=null? │
+ │ │ └─ YES → "-YYYY-MM-DD" │
+ │ │ │
+ │ ├─ GITHUB_ACTIONS=="true"? │
+ │ │ └─ YES → "-YYYY-MM-DD" │
+ │ │ │
+ │ └─ else → "-LOCAL" │
+ │ │
+ │ Result: "projt-LOCAL" │
+ │ or: "projt-2026-04-05" │
+ └──────────────────────────────────┘
+```
+
+---
+
+## 10. Build Targets and Tasks
+
+### Key Tasks
+
+| Task | Type | Description |
+|-----------------------------|-------------------|------------------------------------------------|
+| `:compileJava` | JavaCompile | Compiles root `src/main/java` with Java 8 |
+| `:processResources` | Copy | Copies `src/main/resources` to build dir |
+| `:classes` | Lifecycle | Depends on compileJava + processResources |
+| `:jar` | Jar | Assembles the Multi-Release JAR |
+| `:sourcesJar` | Jar | Assembles sources JAR |
+| `:assemble` | Lifecycle | Depends on jar + sourcesJar |
+| `:check` | Lifecycle | Runs tests (none configured) |
+| `:build` | Lifecycle | Depends on assemble + check |
+| `:publish` | Lifecycle | Publishes to build/maven/ (depends on build) |
+| `:jigsaw:compileJava` | JavaCompile | Compiles jigsaw `src/main/java` with Java 9 |
+| `:jigsaw:jar` | Jar | Packages jigsaw classes into jigsaw.jar |
+| `:eclipse` | Lifecycle | Generates Eclipse project files |
+| `:jigsaw:eclipse` | Lifecycle | Generates Eclipse files for jigsaw subproject |
+| `:clean` | Delete | Removes `build/` directory |
+| `:jigsaw:clean` | Delete | Removes `jigsaw/build/` directory |
+
+### Task Dependency Chain
+
+Running `./gradlew build` triggers this chain:
+
+```
+:build
+├── :assemble
+│ ├── :jar
+│ │ ├── :classes
+│ │ │ ├── :compileJava
+│ │ │ └── :processResources
+│ │ └── [multirelase config resolution]
+│ │ └── :jigsaw:jar
+│ │ └── :jigsaw:classes
+│ │ ├── :jigsaw:compileJava
+│ │ └── :jigsaw:processResources
+│ └── :sourcesJar
+└── :check
+ └── :test (no tests configured → no-op)
+```
+
+### What Each Task Produces
+
+| Task | Output |
+|-------------------------|-----------------------------------------------------|
+| `:compileJava` | `build/classes/java/main/**/*.class` |
+| `:jigsaw:compileJava` | `jigsaw/build/classes/java/main/**/*.class` |
+| `:jigsaw:jar` | `jigsaw/build/libs/jigsaw.jar` |
+| `:jar` | `build/libs/ForgeWrapper-<ver>.jar` |
+| `:sourcesJar` | `build/libs/ForgeWrapper-<ver>-sources.jar` |
+| `:publish` | `build/maven/io/github/zekerzhayard/ForgeWrapper/` |
+
+---
+
+## 11. Step-by-Step Build Guide
+
+### Prerequisites
+
+1. **Java 8 or higher** — To run Gradle itself. Gradle 7.3.3 supports Java
+ 8 through Java 17.
+2. **Java 9 or higher JDK** — Required to compile the jigsaw subproject. If
+ Gradle runs on Java 8, the toolchain must find a Java 9+ JDK. If Gradle
+ runs on Java 9+, no additional JDK is needed.
+3. **Internet access** — To download the Gradle wrapper distribution and
+ Maven dependencies (first build only; cached after that).
+
+### Clone and Build
+
+```bash
+# Navigate to the forgewrapper directory
+cd forgewrapper/
+
+# Make the wrapper executable (Linux/macOS)
+chmod +x gradlew
+
+# Build the project
+./gradlew build
+```
+
+On Windows:
+```cmd
+cd forgewrapper\
+gradlew.bat build
+```
+
+### Build Output
+
+After a successful build:
+
+```
+build/libs/
+├── ForgeWrapper-projt-LOCAL.jar ← Main MRJAR artifact
+└── ForgeWrapper-projt-LOCAL-sources.jar ← Source code archive
+```
+
+### Build + Publish
+
+```bash
+./gradlew publish
+```
+
+This runs `build` first (due to `tasks.publish.dependsOn build`), then publishes:
+
+```
+build/maven/
+└── io/
+ └── github/
+ └── zekerzhayard/
+ └── ForgeWrapper/
+ └── projt-LOCAL/
+ ├── ForgeWrapper-projt-LOCAL.jar
+ ├── ForgeWrapper-projt-LOCAL-sources.jar
+ ├── ForgeWrapper-projt-LOCAL.pom
+ └── ForgeWrapper-projt-LOCAL.module
+```
+
+### Clean Build
+
+```bash
+./gradlew clean build
+```
+
+Removes all generated files in `build/` and `jigsaw/build/` before rebuilding.
+
+### Individual Tasks
+
+```bash
+# Compile only the root project
+./gradlew compileJava
+
+# Compile only the jigsaw subproject
+./gradlew :jigsaw:compileJava
+
+# Build only the JAR (no tests, no publish)
+./gradlew jar
+
+# Generate Eclipse project files
+./gradlew eclipse
+
+# List all available tasks
+./gradlew tasks --all
+
+# Show the dependency tree
+./gradlew dependencies
+```
+
+### Simulating CI Build Locally
+
+```bash
+# Set environment variables to simulate GitHub Actions
+GITHUB_ACTIONS=true GITHUB_SHA=abc123def456 ./gradlew build
+
+# Or force a publication-style version
+IS_PUBLICATION=1 ./gradlew build
+```
+
+With either of these, the version suffix changes from `-LOCAL` to the current
+date (e.g., `-2026-04-05`), and `GitCommit` in the manifest is set to the
+provided SHA.
+
+### Verbose/Debug Build
+
+```bash
+# Show task execution details
+./gradlew build --info
+
+# Full debug logging
+./gradlew build --debug
+
+# Show dependency resolution details
+./gradlew build --scan
+```
+
+---
+
+## 12. CI/CD Integration
+
+The build system detects CI environments via environment variables.
+
+### GitHub Actions Detection
+
+Two environment variables are checked in `getVersionSuffix()` (line 86 of
+`build.gradle`):
+
+**`GITHUB_ACTIONS`** — Automatically set to `"true"` by GitHub Actions runners.
+When detected, the version suffix switches from `-LOCAL` to a date stamp.
+
+**`GITHUB_SHA`** — The full SHA of the commit that triggered the workflow. This
+is embedded in the JAR manifest as the `GitCommit` attribute (line 55 of
+`build.gradle`):
+
+```groovy
+"GitCommit": String.valueOf(System.getenv("GITHUB_SHA"))
+```
+
+### IS_PUBLICATION
+
+**`IS_PUBLICATION`** — A custom environment variable. When set (to any non-null
+value), it forces date-stamped versioning regardless of whether the build runs
+in GitHub Actions. This allows publication builds from other CI systems or
+local environments:
+
+```bash
+IS_PUBLICATION=yes ./gradlew publish
+```
+
+### CI Build vs Local Build Comparison
+
+```
+┌───────────────────────┬─────────────────────┬──────────────────────┐
+│ Aspect │ Local Build │ CI Build │
+├───────────────────────┼─────────────────────┼──────────────────────┤
+│ Version suffix │ -LOCAL │ -2026-04-05 │
+│ GitCommit manifest │ null │ abc123def456... │
+│ Gradle daemon │ disabled (property) │ disabled (property) │
+│ Full version example │ projt-LOCAL │ projt-2026-04-05 │
+│ Spec-Version │ projt │ projt │
+│ Impl-Version │ projt-LOCAL │ projt-2026-04-05 │
+└───────────────────────┴─────────────────────┴──────────────────────┘
+```
+
+### Environment Variable Summary
+
+| Variable | Set By | Used In | Effect |
+|------------------|------------------|---------------------------|--------------------------------|
+| `GITHUB_ACTIONS` | GitHub Actions | `getVersionSuffix()` | Enables date-stamped version |
+| `GITHUB_SHA` | GitHub Actions | `jar.manifest.attributes` | Records commit SHA in manifest |
+| `IS_PUBLICATION` | Manual / CI | `getVersionSuffix()` | Forces date-stamped version |
+
+---
+
+## 13. Artifact Output Structure
+
+### Primary Artifact: ForgeWrapper JAR
+
+**File:** `build/libs/ForgeWrapper-<version>.jar`
+
+Inspecting the JAR contents (example):
+
+```bash
+jar tf build/libs/ForgeWrapper-projt-LOCAL.jar
+```
+
+Expected output:
+
+```
+META-INF/
+META-INF/MANIFEST.MF
+io/
+io/github/
+io/github/zekerzhayard/
+io/github/zekerzhayard/forgewrapper/
+io/github/zekerzhayard/forgewrapper/installer/
+io/github/zekerzhayard/forgewrapper/installer/Bootstrap.class
+io/github/zekerzhayard/forgewrapper/installer/Installer.class
+io/github/zekerzhayard/forgewrapper/installer/Main.class
+io/github/zekerzhayard/forgewrapper/installer/detector/
+io/github/zekerzhayard/forgewrapper/installer/detector/DetectorLoader.class
+io/github/zekerzhayard/forgewrapper/installer/detector/IFileDetector.class
+io/github/zekerzhayard/forgewrapper/installer/detector/MultiMCFileDetector.class
+io/github/zekerzhayard/forgewrapper/installer/util/
+io/github/zekerzhayard/forgewrapper/installer/util/ModuleUtil.class
+META-INF/versions/
+META-INF/versions/9/
+META-INF/versions/9/io/
+META-INF/versions/9/io/github/
+META-INF/versions/9/io/github/zekerzhayard/
+META-INF/versions/9/io/github/zekerzhayard/forgewrapper/
+META-INF/versions/9/io/github/zekerzhayard/forgewrapper/installer/
+META-INF/versions/9/io/github/zekerzhayard/forgewrapper/installer/util/
+META-INF/versions/9/io/github/zekerzhayard/forgewrapper/installer/util/ModuleUtil.class
+```
+
+### Sources Artifact
+
+**File:** `build/libs/ForgeWrapper-<version>-sources.jar`
+
+Contains `.java` source files from the root project's `src/main/java/` tree.
+
+### Published Maven Artifacts
+
+**Directory:** `build/maven/io/github/zekerzhayard/ForgeWrapper/<version>/`
+
+| File | Description |
+|------------------------------------------|------------------------------------|
+| `ForgeWrapper-<ver>.jar` | The MRJAR binary |
+| `ForgeWrapper-<ver>-sources.jar` | Source code archive |
+| `ForgeWrapper-<ver>.pom` | Maven POM with dependency metadata |
+| `ForgeWrapper-<ver>.module` | Gradle Module Metadata (GMM) |
+| `*.md5`, `*.sha1`, `*.sha256`, `*.sha512`| Integrity checksums |
+
+### Manifest File Content
+
+The `META-INF/MANIFEST.MF` in the produced JAR:
+
+```
+Manifest-Version: 1.0
+Specification-Title: ForgeWrapper
+Specification-Vendor: ZekerZhayard
+Specification-Version: projt
+Implementation-Title: ForgeWrapper
+Implementation-Version: projt-LOCAL
+Implementation-Vendor: ZekerZhayard
+Implementation-Timestamp: 2026-04-05T12:00:00+0000
+Automatic-Module-Name: io.github.zekerzhayard.forgewrapper
+Multi-Release: true
+GitCommit: null
+```
+
+---
+
+## 14. Publishing to Local Maven Repository
+
+### How It Works
+
+The `publishing` block in `build.gradle` (lines 65–80) configures a Maven
+publication with a **local filesystem repository**:
+
+```groovy
+repositories {
+ maven {
+ url = layout.buildDirectory.dir("maven")
+ }
+}
+```
+
+`layout.buildDirectory.dir("maven")` resolves to `build/maven/`. This is a
+Gradle 7+ API (`layout.buildDirectory` replaces the deprecated `$buildDir`).
+
+### Running the Publish
+
+```bash
+./gradlew publish
+```
+
+The `publish` task depends on `build` (line 81: `tasks.publish.dependsOn build`),
+so running `publish` implicitly runs the full build first.
+
+### Generated POM
+
+The POM is auto-generated from `components.java`. Since all external dependencies
+are `compileOnly`, they are **not** included in the POM. The POM contains only
+the GAV (Group, Artifact, Version) coordinates:
+
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="...">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>io.github.zekerzhayard</groupId>
+ <artifactId>ForgeWrapper</artifactId>
+ <version>projt-LOCAL</version>
+</project>
+```
+
+No `<dependencies>` section is generated because `compileOnly` dependencies are
+not published.
+
+### Using the Local Maven Repo
+
+Other projects can consume the published artifact by adding the local directory
+as a Maven repository:
+
+```groovy
+repositories {
+ maven {
+ url = file("/path/to/forgewrapper/build/maven")
+ }
+}
+
+dependencies {
+ implementation "io.github.zekerzhayard:ForgeWrapper:projt-LOCAL"
+}
+```
+
+---
+
+## 15. Java Version Requirements
+
+### Summary Table
+
+| Component | Minimum Java | Configured As |
+|---------------------|--------------|---------------------------------------------|
+| Gradle execution | Java 8 | Gradle 7.3.3 supports Java 8–17 |
+| Root compilation | Java 8 | `sourceCompatibility = targetCompatibility = 1.8` |
+| Jigsaw compilation | Java 9 | `sourceCompatibility = targetCompatibility = 9` |
+| Runtime (Java 8) | Java 8 | Uses root ModuleUtil (no-op stubs) |
+| Runtime (Java 9+) | Java 9 | Uses jigsaw ModuleUtil (full JPMS) |
+
+### Toolchain Behavior
+
+The jigsaw subproject uses Gradle's Java Toolchain feature with conditional
+logic (lines 7–9 of `jigsaw/build.gradle`):
+
+```
+┌────────────────────────────────┐
+│ Gradle running on Java 8? │
+│ │
+│ YES → javaToolchains finds │
+│ Java 9 JDK on system │
+│ and uses it to compile │
+│ │
+│ NO → Current JVM (≥9) │
+│ compiles with │
+│ sourceCompatibility=9 │
+└────────────────────────────────┘
+```
+
+### Finding Java Toolchains
+
+When Gradle needs a Java 9 toolchain, it searches these locations:
+
+1. **Environment** — `JAVA_HOME`, `JDK_HOME`, and `PATH` entries.
+2. **Standard paths** — `/usr/lib/jvm/` (Linux), `/Library/Java/JavaVirtualMachines/` (macOS),
+ Windows Registry.
+3. **Tool managers** — SDKMAN!, jabba, IntelliJ installations.
+4. **Gradle auto-provisioning** — If enabled, Gradle can download a JDK
+ automatically from AdoptOpenJDK/Adoptium.
+
+### Bytecode Versions
+
+| Class Origin | Target | Class File Version | `-target` flag |
+|---------------------|--------|--------------------|----------------|
+| Root project | 1.8 | 52.0 | `1.8` |
+| Jigsaw subproject | 9 | 53.0 | `9` |
+
+You can verify bytecode versions with `javap`:
+
+```bash
+# Check root ModuleUtil (should be 52.0 = Java 8)
+javap -v build/classes/java/main/io/github/zekerzhayard/forgewrapper/installer/util/ModuleUtil.class \
+ | grep "major version"
+
+# Check jigsaw ModuleUtil (should be 53.0 = Java 9)
+javap -v jigsaw/build/classes/java/main/io/github/zekerzhayard/forgewrapper/installer/util/ModuleUtil.class \
+ | grep "major version"
+```
+
+---
+
+## 16. Source File Layout and the Dual-ModuleUtil Pattern
+
+### Why Two ModuleUtil Classes?
+
+ForgeWrapper must run on both Java 8 and Java 9+ JVMs. The Java 9+ `ModuleUtil`
+uses APIs that don't exist in Java 8:
+
+| API Used in Jigsaw ModuleUtil | Introduced In |
+|-----------------------------------------|---------------|
+| `java.lang.module.Configuration` | Java 9 |
+| `java.lang.module.ModuleFinder` | Java 9 |
+| `java.lang.module.ModuleReference` | Java 9 |
+| `java.lang.module.ResolvedModule` | Java 9 |
+| `ModuleLayer.boot()` | Java 9 |
+| `ClassLoader.getPlatformClassLoader()` | Java 9 |
+| `List.of()` | Java 9 |
+| `Module.implAddExports()` | Java 9 (internal) |
+| `Module.implAddOpens()` | Java 9 (internal) |
+
+If these APIs were in the main source tree compiled with Java 8, the build
+would fail with compilation errors. The Multi-Release JAR approach solves this
+by keeping the Java 9 code in a separate compilation unit.
+
+### Method Signature Compatibility
+
+Both `ModuleUtil` classes must have **identical method signatures** so that
+callers in the root project (e.g., `Bootstrap.java`, `Main.java`) can reference
+`ModuleUtil` without caring which version is loaded at runtime.
+
+**Root ModuleUtil (Java 8 stubs):**
+```java
+public static void addModules(String modulePath) // line 10: empty body
+public static void addExports(List<String> exports) // line 14: empty body
+public static void addOpens(List<String> opens) // line 18: empty body
+public static void setupClassPath(Path, List<String>) // line 22: URLClassLoader reflection
+public static Class<?> setupBootstrapLauncher(Class<?>)// line 31: returns mainClass
+public static ClassLoader getPlatformClassLoader() // line 36: returns null
+```
+
+**Jigsaw ModuleUtil (Java 9 implementation):**
+```java
+public static void addModules(String modulePath) // Full JPMS module loading
+public static void addExports(List<String> exports) // Module.implAddExports
+public static void addOpens(List<String> opens) // Module.implAddOpens
+public static void setupClassPath(Path, List<String>) // Module-aware loading
+public static Class<?> setupBootstrapLauncher(Class<?>)// BootstrapLauncher setup
+public static ClassLoader getPlatformClassLoader() // ClassLoader.getPlatformClassLoader()
+```
+
+### Call Sites
+
+`Bootstrap.java` calls these ModuleUtil methods (lines 70–76):
+
+```java
+if (modulePath != null) {
+ ModuleUtil.addModules(modulePath);
+}
+ModuleUtil.addExports(addExports);
+ModuleUtil.addOpens(addOpens);
+```
+
+`Main.java` calls (lines 49, 62, 63):
+
+```java
+// line 49 (in URLClassLoader creation):
+ModuleUtil.getPlatformClassLoader()
+
+// lines 62-63:
+ModuleUtil.setupClassPath(detector.getLibraryDir(), ...);
+Class<?> mainClass = ModuleUtil.setupBootstrapLauncher(Class.forName(...));
+```
+
+At runtime, the JVM transparently loads the correct `ModuleUtil` class based on
+the Java version, with no conditional logic needed in the calling code.
+
+---
+
+## 17. Troubleshooting
+
+### Build Fails: "No compatible toolchains found"
+
+**Symptom:**
+```
+> No locally installed toolchains match and toolchain download repositories
+ have not been configured.
+```
+
+**Cause:** Gradle is running on Java 8 and cannot find a Java 9+ JDK for the
+jigsaw subproject.
+
+**Fix:** Install a Java 9+ JDK. On Linux:
+```bash
+# Ubuntu/Debian
+sudo apt install openjdk-11-jdk
+
+# Fedora
+sudo dnf install java-11-openjdk-devel
+```
+
+Or set `JAVA_HOME` to an existing Java 9+ installation:
+```bash
+export JAVA_HOME=/path/to/jdk-11
+./gradlew build
+```
+
+### Build Fails: "Could not resolve project :jigsaw"
+
+**Symptom:**
+```
+> Could not resolve project :jigsaw.
+ > Incompatible because this component declares a component compatible
+ with Java 9 and the consumer needed a component compatible with Java 8
+```
+
+**Cause:** The `TargetJvmVersion` attribute override in `jigsaw/build.gradle`
+is missing or incorrect.
+
+**Fix:** Ensure lines 16–27 of `jigsaw/build.gradle` are present:
+```groovy
+configurations {
+ apiElements {
+ attributes {
+ attribute TargetJvmVersion.TARGET_JVM_VERSION_ATTRIBUTE, 8
+ }
+ }
+ runtimeElements {
+ attributes {
+ attribute TargetJvmVersion.TARGET_JVM_VERSION_ATTRIBUTE, 8
+ }
+ }
+}
+```
+
+### Dependencies Not Found
+
+**Symptom:**
+```
+> Could not find cpw.mods:modlauncher:8.0.9.
+```
+
+**Cause:** The Forge Maven repository is unreachable or not declared.
+
+**Fix:** Check network connectivity to `https://maven.minecraftforge.net/`.
+Verify the `repositories` block in `build.gradle` includes the `forge` maven
+repository (lines 24–29).
+
+### Wrong Version in JAR Name
+
+**Symptom:** JAR is named `ForgeWrapper-projt-LOCAL.jar` but you expected a
+dated version.
+
+**Cause:** `GITHUB_ACTIONS` and `IS_PUBLICATION` are not set.
+
+**Fix:**
+```bash
+IS_PUBLICATION=1 ./gradlew build
+```
+
+### MultiRelease Attribute Missing
+
+**Symptom:** The JAR does not exhibit Multi-Release behavior on Java 9+.
+`ModuleUtil` methods are no-ops even on Java 11.
+
+**Diagnosis:** Inspect the manifest:
+```bash
+unzip -p build/libs/ForgeWrapper-*.jar META-INF/MANIFEST.MF | grep Multi-Release
+```
+
+If `Multi-Release: true` is missing, check the `jar` block in `build.gradle`
+(line 54).
+
+Also verify the jigsaw classes exist in the JAR:
+```bash
+jar tf build/libs/ForgeWrapper-*.jar | grep "META-INF/versions/9"
+```
+
+Expected output should show the `ModuleUtil.class` under `META-INF/versions/9/`.
+
+### Gradle Daemon Issues
+
+**Symptom:** Stale build state, phantom errors that disappear after restarting.
+
+**Note:** The daemon is disabled (`org.gradle.daemon = false` in
+`gradle.properties`), so this should not normally occur. If a daemon is somehow
+running from a previous configuration:
+
+```bash
+./gradlew --stop
+./gradlew clean build
+```
+
+### Java Version Mismatch in IDE
+
+**Symptom:** Eclipse or IntelliJ shows errors on jigsaw source files.
+
+**Cause:** The IDE is using Java 8 to compile the jigsaw source set.
+
+**Fix for Eclipse:**
+```bash
+./gradlew eclipse
+```
+Then re-import the project. The Eclipse plugin generates `.classpath` with
+correct JRE containers.
+
+**Fix for IntelliJ:** Import as Gradle project. IntelliJ reads the `compileJava`
+block and sets the jigsaw module to use Java 9.
+
+### Verifying the MRJAR
+
+Full verification workflow:
+
+```bash
+# 1. Build
+./gradlew clean build
+
+# 2. Check JAR contents
+jar tf build/libs/ForgeWrapper-projt-LOCAL.jar
+
+# 3. Verify manifest
+unzip -p build/libs/ForgeWrapper-projt-LOCAL.jar META-INF/MANIFEST.MF
+
+# 4. Check bytecode versions
+cd build/libs
+mkdir -p _verify && cd _verify
+jar xf ../ForgeWrapper-projt-LOCAL.jar
+
+# Root ModuleUtil → should be 52 (Java 8)
+javap -v io/github/zekerzhayard/forgewrapper/installer/util/ModuleUtil.class \
+ | grep "major version"
+
+# Jigsaw ModuleUtil → should be 53 (Java 9)
+javap -v META-INF/versions/9/io/github/zekerzhayard/forgewrapper/installer/util/ModuleUtil.class \
+ | grep "major version"
+
+# 5. Cleanup
+cd .. && rm -rf _verify
+```
+
+---
+
+## 18. Quick Reference Card
+
+```
+┌──────────────────────────────────────────────────────────────────┐
+│ ForgeWrapper Build Cheat Sheet │
+├──────────────────────────────────────────────────────────────────┤
+│ │
+│ Build: ./gradlew build │
+│ Clean+Build: ./gradlew clean build │
+│ Publish: ./gradlew publish │
+│ JAR only: ./gradlew jar │
+│ Eclipse: ./gradlew eclipse │
+│ Tasks list: ./gradlew tasks --all │
+│ Dependencies: ./gradlew dependencies │
+│ │
+│ CI version: GITHUB_ACTIONS=true ./gradlew build │
+│ Pub version: IS_PUBLICATION=1 ./gradlew build │
+│ │
+│ Gradle: 7.3.3 (wrapper) │
+│ Root Java: 1.8 (source + target) │
+│ Jigsaw Java: 9 (source + target, toolchain auto-detect) │
+│ Group: io.github.zekerzhayard │
+│ Artifact: ForgeWrapper │
+│ Base Version: projt (from gradle.properties: fw_version) │
+│ │
+│ Output JAR: build/libs/ForgeWrapper-<ver>.jar │
+│ Sources JAR: build/libs/ForgeWrapper-<ver>-sources.jar │
+│ Maven output: build/maven/ │
+│ │
+│ Repos: Maven Central │
+│ https://maven.minecraftforge.net/ │
+│ │
+│ Dependencies (all compileOnly): │
+│ gson 2.8.7, modlauncher 8.0.9, │
+│ installer 2.2.7, jopt-simple 5.0.4 │
+│ │
+│ MRJAR overlay: META-INF/versions/9/ (from :jigsaw) │
+│ Manifest key: Multi-Release: true │
+│ │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+*This document was generated from direct analysis of the ForgeWrapper build
+configuration files: `build.gradle`, `jigsaw/build.gradle`, `settings.gradle`,
+`gradle.properties`, and `gradle/wrapper/gradle-wrapper.properties`.*
diff --git a/docs/handbook/forgewrapper/overview.md b/docs/handbook/forgewrapper/overview.md
new file mode 100644
index 0000000000..cd10dcf434
--- /dev/null
+++ b/docs/handbook/forgewrapper/overview.md
@@ -0,0 +1,270 @@
+# ForgeWrapper — Overview
+
+## What Is ForgeWrapper?
+
+ForgeWrapper is a specialized Java wrapper and bootstrap layer authored by ZekerZhayard that enables third-party Minecraft launchers — originally MultiMC, and now any launcher implementing the `IFileDetector` interface — to launch Minecraft 1.13+ with Forge or NeoForge mod loaders. It intercepts the standard Forge/NeoForge installation process, running the installer's post-processors in a headless (non-GUI) mode, and then configures the Java module system, classpath, and system properties required to launch the modded game.
+
+**Root Package:** `io.github.zekerzhayard.forgewrapper.installer`
+**Language:** Java
+**Build System:** Gradle (multi-project: root + `jigsaw` subproject)
+**License:** LGPL-3.0-only
+**Source Compatibility:** Java 8 (base), Java 9+ (jigsaw multi-release overlay)
+**Version Property:** `fw_version = projt` (defined in `gradle.properties`)
+
+---
+
+## The Problem ForgeWrapper Solves
+
+The official Forge and NeoForge installers are designed to run as standalone GUI applications. They download libraries, run post-processors (such as binary patching and JAR merging), and produce a launchable game directory. This works for the vanilla Minecraft launcher, but third-party launchers like MultiMC, PrismLauncher, and custom launchers manage libraries and game directories differently.
+
+ForgeWrapper bridges this gap by:
+
+1. **Detecting required files** — locating the Forge/NeoForge installer JAR, the vanilla Minecraft client JAR, and the shared libraries directory via a pluggable `IFileDetector` system.
+2. **Running the installer headlessly** — invoking `PostProcessors.process()` from the Forge installer API reflectively, inside an isolated `URLClassLoader`, so that binary patches and data generation happen automatically.
+3. **Configuring the Java Platform Module System (JPMS)** — on Java 9+, dynamically adding modules, exports, and opens to the boot module layer at runtime using `sun.misc.Unsafe` and `MethodHandles.Lookup` tricks.
+4. **Setting up the classpath** — adding extra libraries that lack direct-download URLs (and thus are missing from launcher metadata) to the system class loader at runtime.
+5. **Launching the game** — loading and invoking the real main class (typically `cpw.mods.bootstraplauncher.BootstrapLauncher` for modern Forge/NeoForge) with the original command-line arguments.
+
+---
+
+## High-Level Architecture
+
+ForgeWrapper is structured as a Gradle multi-project build with two subprojects:
+
+```
+forgewrapper/
+├── build.gradle # Root project: Java 8 target, multi-release JAR assembly
+├── settings.gradle # Includes the :jigsaw subproject
+├── gradle.properties # fw_version = projt
+├── jigsaw/
+│ ├── build.gradle # Java 9 target, produces ModuleUtil override
+│ └── src/main/java/... # Java 9+ ModuleUtil implementation
+├── src/
+│ └── main/
+│ ├── java/
+│ │ └── io/github/zekerzhayard/forgewrapper/installer/
+│ │ ├── Main.java # Entry point
+│ │ ├── Bootstrap.java # JVM argument processing
+│ │ ├── Installer.java # Forge installer integration
+│ │ ├── detector/
+│ │ │ ├── IFileDetector.java # File detection interface
+│ │ │ ├── DetectorLoader.java # ServiceLoader-based loader
+│ │ │ └── MultiMCFileDetector.java # Default MultiMC detector
+│ │ └── util/
+│ │ └── ModuleUtil.java # Java 8 stubs (no-ops)
+│ └── resources/
+│ └── META-INF/
+│ └── services/
+│ └── io.github...IFileDetector # ServiceLoader registration
+```
+
+The final JAR is a **Multi-Release JAR** (MR-JAR). The root classes are compiled for Java 8. The `jigsaw` subproject compiles a replacement `ModuleUtil` class for Java 9+, which gets placed under `META-INF/versions/9/` in the JAR. At runtime:
+
+- On Java 8: the JVM loads the base `ModuleUtil` with no-op module methods.
+- On Java 9+: the JVM loads `META-INF/versions/9/.../ModuleUtil` with full JPMS manipulation.
+
+---
+
+## Complete Execution Flow
+
+The entire lifecycle of a ForgeWrapper invocation proceeds through these phases:
+
+### Phase 1: Entry and Argument Parsing (`Main.main()`)
+
+The launcher starts ForgeWrapper by calling `Main.main(String[] args)`. The arguments are the standard Forge/NeoForge FML launch arguments:
+
+```
+--fml.neoForgeVersion 20.2.20-beta
+--fml.fmlVersion 1.0.2
+--fml.mcVersion 1.20.2
+--fml.neoFormVersion 20231019.002635
+--launchTarget forgeclient
+```
+
+`Main` converts the args array to a mutable `List<String>` via `Stream.of(args).collect(Collectors.toList())` and parses the following:
+
+| Variable | Source Argument | Example Value |
+|-------------------|---------------------------------------|------------------------|
+| `isNeoForge` | presence of `--fml.neoForgeVersion` | `true` |
+| `mcVersion` | `--fml.mcVersion` | `1.20.2` |
+| `forgeGroup` | `--fml.forgeGroup` or default `net.neoforged` | `net.neoforged` |
+| `forgeArtifact` | `neoforge` if NeoForge, else `forge` | `neoforge` |
+| `forgeVersion` | `--fml.neoForgeVersion` or `--fml.forgeVersion` | `20.2.20-beta` |
+| `forgeFullVersion`| NeoForge: version alone; Forge: `mcVersion-forgeVersion` | `20.2.20-beta` |
+
+**Key distinction:** NeoForge versions after 20.2.x use `--fml.neoForgeVersion`; early NeoForge for 1.20.1 is not handled by this codepath.
+
+### Phase 2: File Detection (`DetectorLoader` + `IFileDetector`)
+
+`DetectorLoader.loadDetector()` uses `java.util.ServiceLoader` to discover all implementations of `IFileDetector`. It builds a `HashMap<String, IFileDetector>` map of `name -> detector`, then iterates through each entry. For each detector, it creates a copy of the map with that detector removed (the `others` map) and calls `detector.enabled(others)`. Exactly one detector must return `true` from `enabled()`; zero enabled detectors causes `"No file detector is enabled!"`, and two or more causes `"There are two or more file detectors are enabled!"`.
+
+The default `MultiMCFileDetector` returns `true` from `enabled()` only when `others.size() == 0` — that is, when it is the sole registered detector. This means any launcher that registers its own `IFileDetector` will automatically disable `MultiMCFileDetector`.
+
+After detection, `Main` validates that both the installer JAR and Minecraft JAR exist as regular files via `Files.isRegularFile()`.
+
+### Phase 3: Isolated ClassLoader and Data Extraction (`Installer.getData()`)
+
+`Main` creates a child `URLClassLoader` containing:
+1. ForgeWrapper's own JAR (from `Main.class.getProtectionDomain().getCodeSource().getLocation()`)
+2. The Forge/NeoForge installer JAR
+
+This classloader's parent is `ModuleUtil.getPlatformClassLoader()` — on Java 8 this returns `null` (bootstrap loader), on Java 9+ it returns the platform class loader. This isolation prevents the installer's classes from conflicting with the launcher's classpath.
+
+Through this classloader, `Main` reflectively loads `Installer.class` and calls `getData(libraryDir)`, which:
+
+1. Calls `Util.loadInstallProfile()` to parse the installer's embedded `install_profile.json`.
+2. Wraps it in `InstallV1Wrapper` (a subclass of `InstallV1`), storing a reference to `librariesDir`.
+3. Loads the embedded version JSON (e.g., `version.json`) via `Version0.loadVersion()` using the JSON path from the install profile.
+4. Extracts the main class name, JVM arguments, and extra library paths into a `HashMap`.
+5. Returns a `Map<String, Object>` with keys: `"mainClass"`, `"jvmArgs"`, `"extraLibraries"`.
+
+### Phase 4: JVM Bootstrap (`Bootstrap.bootstrap()`)
+
+`Bootstrap.bootstrap()` receives the JVM args array, the Minecraft JAR filename, and the library directory path. It performs placeholder replacement, classpath sanitization, JVM argument extraction, and module system configuration. See the [Bootstrap System](bootstrap-system.md) document for full details.
+
+### Phase 5: Installation (`Installer.install()`)
+
+`Installer.install()` runs the Forge/NeoForge post-processors via reflective invocation of `PostProcessors.process()`. The `InstallV1Wrapper` adds caching of processor outputs and an optional hash-check bypass. See the [Installer System](installer-system.md) document for full details.
+
+### Phase 6: Classpath Setup and Game Launch
+
+After installation succeeds:
+
+1. `ModuleUtil.setupClassPath()` adds extra libraries (those with empty download URLs in the version JSON) to the system class loader at runtime.
+2. `ModuleUtil.setupBootstrapLauncher()` ensures the main class's package is open to ForgeWrapper's module (Java 9+ only).
+3. The main class's `main(String[])` method is invoked with the original `args`, launching the game.
+
+---
+
+## Key Design Decisions
+
+### Multi-Release JAR Strategy
+
+ForgeWrapper must run on both Java 8 and Java 17+. Rather than using two separate JARs or runtime Java version checks, it uses the Multi-Release JAR specification (JEP 238). The base `ModuleUtil` provides no-op stubs for Java 8, while the jigsaw version provides full JPMS manipulation for Java 9+.
+
+### ServiceLoader-Based Detector Plugin System
+
+The `IFileDetector` interface allows any launcher to provide its own file detection logic without modifying ForgeWrapper. Launchers add their IFileDetector implementation JAR to the classpath along with a `META-INF/services` file. The `DetectorLoader` ensures exactly one detector is active — this prevents conflicting detection logic from multiple launchers.
+
+### Isolated URLClassLoader for Installer
+
+The Forge installer JAR contains classes (from `net.minecraftforge.installer`) that may conflict with the game's runtime classes. By loading them in a child `URLClassLoader` with the platform class loader as parent (rather than the application class loader), ForgeWrapper keeps the installer isolated. The `URLClassLoader` is wrapped in a try-with-resources block and closed after use.
+
+### Reflective Access via sun.misc.Unsafe
+
+On Java 9+, the JPMS restricts access to internal APIs. ForgeWrapper's jigsaw `ModuleUtil` uses `sun.misc.Unsafe` to obtain `MethodHandles.Lookup.IMPL_LOOKUP`, which has unrestricted access. This is necessary to:
+- Add modules to the boot layer at runtime
+- Modify `Configuration` internal fields (`graph`, `modules`, `nameToModule`)
+- Add exports and opens between modules
+- Access `jdk.internal.loader.BuiltinClassLoader.loadModule()`
+
+---
+
+## NeoForge vs. Forge Detection
+
+ForgeWrapper distinguishes between NeoForge and legacy Forge by checking for `--fml.neoForgeVersion` in the argument list:
+
+| Property | Forge | NeoForge |
+|---------------------|---------------------------------|---------------------------------|
+| Detection key | `--fml.forgeVersion` | `--fml.neoForgeVersion` |
+| Default group | `net.minecraftforge` | `net.neoforged` |
+| Artifact name | `forge` | `neoforge` |
+| Full version format | `{mcVersion}-{forgeVersion}` | `{forgeVersion}` (standalone) |
+
+The `--fml.forgeGroup` argument can override the default group for either variant.
+
+---
+
+## JVM System Properties Reference
+
+ForgeWrapper recognizes and uses the following system properties:
+
+| Property | Purpose | Default |
+|---------------------------------|-----------------------------------------------------|------------------------|
+| `forgewrapper.librariesDir` | Override library directory path | Auto-detected |
+| `forgewrapper.installer` | Override installer JAR path | Auto-detected |
+| `forgewrapper.minecraft` | Override Minecraft client JAR path | Auto-detected |
+| `forgewrapper.skipHashCheck` | Skip processor output hash verification | `false` |
+| `libraryDirectory` | Library directory for Forge internal use | Set by Bootstrap |
+| `ignoreList` | Comma-separated list of JARs to ignore in ModLauncher | Extended by Bootstrap |
+| `java.net.preferIPv4Stack` | Force IPv4 for Forge network operations | `true` if not set |
+
+---
+
+## Version Scheme
+
+The version string is assembled in `build.gradle`:
+
+```groovy
+version = "${fw_version}${-> getVersionSuffix()}"
+```
+
+Where:
+- `fw_version` is `projt` (from `gradle.properties`)
+- The suffix is `-yyyy-MM-dd` in CI (`IS_PUBLICATION` env var or `GITHUB_ACTIONS == "true"`), or `-LOCAL` for local builds
+
+Examples: `projt-2024-03-15`, `projt-LOCAL`.
+
+---
+
+## Dependencies
+
+ForgeWrapper declares all major dependencies as `compileOnly` — they are expected to be on the classpath at runtime (provided by the launcher or the installer JAR):
+
+| Dependency | Version | Purpose |
+|-----------------------------------------|---------|--------------------------------------------|
+| `com.google.code.gson:gson` | 2.8.7 | JSON parsing for install profiles |
+| `cpw.mods:modlauncher` | 8.0.9 | Forge's mod loading framework |
+| `net.minecraftforge:installer` | 2.2.7 | Forge installer API (`PostProcessors`, `InstallV1`, etc.) |
+| `net.sf.jopt-simple:jopt-simple` | 5.0.4 | Command-line argument parsing |
+
+The `jigsaw` subproject has no additional dependencies — it only uses JDK internal APIs.
+
+---
+
+## Repository Configuration
+
+The build resolves dependencies from:
+
+```groovy
+repositories {
+ mavenCentral()
+ maven {
+ name = "forge"
+ url = "https://maven.minecraftforge.net/"
+ }
+}
+```
+
+The Forge Maven repository provides the `net.minecraftforge:installer` and `cpw.mods:modlauncher` artifacts, which are not available on Maven Central.
+
+---
+
+## Summary of Source Files
+
+| File | Role |
+|-----------------------------|-------------------------------------------------------------|
+| `Main.java` | Entry point: argument parsing, detector loading, orchestration |
+| `Bootstrap.java` | JVM argument processing, module system setup delegation |
+| `Installer.java` | Forge installer integration, post-processor execution |
+| `IFileDetector.java` | Interface for file detection with default implementations |
+| `DetectorLoader.java` | ServiceLoader-based detector discovery and validation |
+| `MultiMCFileDetector.java` | Default file detector using Maven-style library paths |
+| `ModuleUtil.java` (base) | Java 8 no-op stubs for module operations |
+| `ModuleUtil.java` (jigsaw) | Java 9+ full JPMS manipulation via Unsafe/MethodHandles |
+
+---
+
+## Further Reading
+
+- [Architecture](architecture.md) — Detailed class relationships and data flow diagrams
+- [Bootstrap System](bootstrap-system.md) — JVM argument processing and placeholder replacement
+- [Installer System](installer-system.md) — Forge/NeoForge installer integration mechanics
+- [Module System](module-system.md) — JPMS manipulation on Java 9+
+- [File Detection](file-detection.md) — The IFileDetector plugin system
+- [NeoForge Support](neoforge-support.md) — NeoForge-specific handling
+- [Building](building.md) — Build instructions and Gradle configuration
+- [Java Compatibility](java-compatibility.md) — Multi-release JAR and Java version support
+- [Gradle Configuration](gradle-configuration.md) — Detailed build.gradle analysis
+- [Code Style](code-style.md) — Coding conventions
+- [Contributing](contributing.md) — Contribution guidelines
diff --git a/docs/handbook/genqrcode/architecture.md b/docs/handbook/genqrcode/architecture.md
new file mode 100644
index 0000000000..f03715bcba
--- /dev/null
+++ b/docs/handbook/genqrcode/architecture.md
@@ -0,0 +1,948 @@
+# genqrcode / libqrencode — Architecture
+
+## High-Level Architecture
+
+libqrencode is a layered C library that transforms input data into a QR Code bitmap through a pipeline of distinct modules. The architecture separates concerns cleanly: input management, bit stream encoding, error correction, frame construction, module placement, and masking are each handled by dedicated source files.
+
+```
+User Input
+ │
+ ▼
+┌─────────────────────────────────────────┐
+│ split.c — Input Splitter │
+│ Automatic mode detection & optimization│
+└─────────────┬───────────────────────────┘
+ │
+ ▼
+┌─────────────────────────────────────────┐
+│ qrinput.c — Input Data Manager │
+│ QRinput linked list, mode encoders, │
+│ bit stream construction, padding │
+└─────────────┬───────────────────────────┘
+ │
+ ▼
+┌─────────────────────────────────────────┐
+│ bitstream.c — Bit Stream Class │
+│ Dynamic bit array with append ops │
+└─────────────┬───────────────────────────┘
+ │
+ ▼
+┌─────────────────────────────────────────┐
+│ qrencode.c — Core Encoder │
+│ QRRawCode / MQRRawCode, FrameFiller, │
+│ QRcode_encodeMask, interleaving │
+│ │
+│ ┌──────────┐ ┌──────────────────────┐ │
+│ │ rsecc.c │ │ qrspec.c / mqrspec.c │ │
+│ │ RS ECC │ │ Spec tables & frames │ │
+│ └──────────┘ └──────────────────────┘ │
+└─────────────┬───────────────────────────┘
+ │
+ ▼
+┌─────────────────────────────────────────┐
+│ mask.c / mmask.c — Masking │
+│ 8 patterns (QR) / 4 patterns (MQR), │
+│ penalty evaluation, mask selection │
+└─────────────┬───────────────────────────┘
+ │
+ ▼
+ QRcode struct
+ (version, width, data[])
+```
+
+---
+
+## Module Dependency Graph
+
+Each `.c` file includes specific headers. Here is the complete include dependency:
+
+```
+qrencode.c
+├── qrencode.h
+├── qrspec.h
+├── mqrspec.h
+├── bitstream.h
+├── qrinput.h
+├── rsecc.h
+├── split.h
+├── mask.h
+└── mmask.h
+
+qrinput.c
+├── qrencode.h
+├── qrspec.h
+├── mqrspec.h
+├── bitstream.h
+└── qrinput.h
+
+split.c
+├── qrencode.h
+├── qrinput.h
+├── qrspec.h
+└── split.h
+
+mask.c
+├── qrencode.h
+├── qrspec.h
+└── mask.h
+
+mmask.c
+├── qrencode.h
+├── mqrspec.h
+└── mmask.h
+
+qrspec.c
+├── qrspec.h
+└── qrinput.h
+
+mqrspec.c
+└── mqrspec.h
+
+rsecc.c
+└── rsecc.h
+
+bitstream.c
+└── bitstream.h
+
+qrenc.c (CLI tool)
+└── qrencode.h
+```
+
+**Key insight:** `qrencode.c` is the integration point that pulls together all modules. The CLI tool (`qrenc.c`) only depends on `qrencode.h`, using exclusively the public API.
+
+---
+
+## Detailed Module Descriptions
+
+### `qrencode.h` — Public API Header
+
+This is the **only header that external consumers include**. It defines:
+
+- `QRencodeMode` enum — All encoding modes (`QR_MODE_NUM`, `QR_MODE_AN`, `QR_MODE_8`, `QR_MODE_KANJI`, `QR_MODE_ECI`, `QR_MODE_FNC1FIRST`, `QR_MODE_FNC1SECOND`, plus internal `QR_MODE_NUL`, `QR_MODE_STRUCTURE`)
+- `QRecLevel` enum — Error correction levels (`QR_ECLEVEL_L` through `QR_ECLEVEL_H`)
+- `QRcode` struct — Output symbol (version, width, data array)
+- `QRcode_List` struct — Singly-linked list of QRcode for structured append
+- `QRinput` — Opaque typedef for input object
+- `QRinput_Struct` — Opaque typedef for structured input set
+- Version constants: `QRSPEC_VERSION_MAX` (40), `MQRSPEC_VERSION_MAX` (4)
+- All `QRinput_*`, `QRcode_*`, and utility function declarations
+
+The header uses `extern "C"` guards for C++ compatibility.
+
+### `qrencode_inner.h` — Test-Only Internal Header
+
+Exposes internal types and functions for the test suite:
+
+```c
+typedef struct {
+ int dataLength;
+ int eccLength;
+ unsigned char *data;
+ unsigned char *ecc;
+} RSblock;
+
+typedef struct {
+ int version;
+ int dataLength;
+ int eccLength;
+ unsigned char *datacode;
+ unsigned char *ecccode;
+ int b1;
+ int blocks;
+ RSblock *rsblock;
+ int count;
+} QRRawCode;
+
+typedef struct {
+ int version;
+ int dataLength;
+ int eccLength;
+ unsigned char *datacode;
+ unsigned char *ecccode;
+ RSblock *rsblock;
+ int oddbits;
+ int count;
+} MQRRawCode;
+```
+
+Exposed functions:
+- `QRraw_new()`, `QRraw_getCode()`, `QRraw_free()`
+- `MQRraw_new()`, `MQRraw_getCode()`, `MQRraw_free()`
+- `FrameFiller_test()`, `FrameFiller_testMQR()`
+- `QRcode_encodeMask()`, `QRcode_encodeMaskMQR()`
+- `QRcode_new()`
+
+### `qrencode.c` — Core Encoding Engine
+
+This is the **central orchestrator** of the entire encoding process. It contains:
+
+#### RSblock and QRRawCode
+
+The `RSblock` struct represents one Reed-Solomon block:
+```c
+typedef struct {
+ int dataLength;
+ int eccLength;
+ unsigned char *data;
+ unsigned char *ecc;
+} RSblock;
+```
+
+`QRRawCode` manages all RS blocks for a full QR Code:
+```c
+typedef struct {
+ int version;
+ int dataLength;
+ int eccLength;
+ unsigned char *datacode; // merged data byte stream
+ unsigned char *ecccode; // merged ECC byte stream
+ int b1; // number of type-1 blocks
+ int blocks; // total block count
+ RSblock *rsblock; // array of RS blocks
+ int count; // iteration counter for getCode()
+} QRRawCode;
+```
+
+`QRraw_new()` creates a QRRawCode from a QRinput:
+1. Calls `QRinput_getByteStream()` to convert input to a padded byte stream
+2. Retrieves the ECC specification via `QRspec_getEccSpec()`
+3. Initializes RS blocks via `RSblock_init()`, which calls `RSECC_encode()` for each block
+4. Sets up interleaving state (b1 records the count of type-1 blocks)
+
+`QRraw_getCode()` implements data/ECC interleaving:
+```c
+STATIC_IN_RELEASE unsigned char QRraw_getCode(QRRawCode *raw)
+{
+ int col, row;
+ unsigned char ret;
+
+ if(raw->count < raw->dataLength) {
+ row = raw->count % raw->blocks;
+ col = raw->count / raw->blocks;
+ if(col >= raw->rsblock[0].dataLength) {
+ row += raw->b1;
+ }
+ ret = raw->rsblock[row].data[col];
+ } else if(raw->count < raw->dataLength + raw->eccLength) {
+ row = (raw->count - raw->dataLength) % raw->blocks;
+ col = (raw->count - raw->dataLength) / raw->blocks;
+ ret = raw->rsblock[row].ecc[col];
+ } else {
+ return 0;
+ }
+ raw->count++;
+ return ret;
+}
+```
+
+This interleaves by cycling through blocks row-by-row: the first byte from each block, then the second byte from each block, and so on. When type-1 blocks are exhausted (their data is shorter), it shifts to type-2 blocks by adding `b1` to the row index.
+
+#### FrameFiller
+
+The `FrameFiller` struct manages the zigzag placement of data modules:
+
+```c
+typedef struct {
+ int width;
+ unsigned char *frame;
+ int x, y; // current position
+ int dir; // direction: -1 (upward) or 1 (downward)
+ int bit; // 0 or 1 within a column pair
+ int mqr; // flag for Micro QR mode
+} FrameFiller;
+```
+
+`FrameFiller_set()` initializes the filler at the bottom-right corner `(width-1, width-1)` with direction `-1` (upward).
+
+`FrameFiller_next()` implements the QR Code module placement algorithm:
+- Modules are placed in 2-column strips from right to left
+- Within each strip, columns alternate right-left
+- Direction alternates between upward and downward
+- Column 6 (the vertical timing pattern) is skipped in full QR mode
+- Modules already marked with bit 7 set (`0x80`) — function patterns — are skipped
+
+#### QRcode_encodeMask()
+
+The main encoding function for full QR Codes:
+
+1. Validates input (not MQR, version in range, valid EC level)
+2. Creates `QRRawCode` from input
+3. Creates base frame via `QRspec_newFrame()`
+4. Places data bits using `FrameFiller_next()`:
+ - Data modules: `*p = ((bit & code) != 0)` — only LSB set
+ - ECC modules: `*p = 0x02 | ((bit & code) != 0)` — bit 1 also set
+ - Remainder bits: `*p = 0x02`
+5. Applies masking:
+ - `mask == -2`: debug mode, no masking
+ - `mask < 0` (normal): `Mask_mask()` evaluates all 8 patterns
+ - `mask >= 0`: `Mask_makeMask()` applies specific mask
+
+#### QRcode_encodeMaskMQR()
+
+The Micro QR variant:
+- Validates MQR mode, version 1–4, EC level L through Q
+- Uses `MQRRawCode` instead of `QRRawCode`
+- Handles `oddbits` — the last data byte may have fewer than 8 significant bits
+- Uses `MMask_mask()` / `MMask_makeMask()` for the 4-pattern MQR masking
+- Sets FrameFiller's `mqr` flag to 1 (no column-6 skip)
+
+#### High-Level Encoding Functions
+
+`QRcode_encodeInput()` simply dispatches based on the `mqr` flag:
+```c
+QRcode *QRcode_encodeInput(QRinput *input)
+{
+ if(input->mqr) {
+ return QRcode_encodeMaskMQR(input, -1);
+ } else {
+ return QRcode_encodeMask(input, -1);
+ }
+}
+```
+
+`QRcode_encodeStringReal()` is the shared implementation for `QRcode_encodeString()` and `QRcode_encodeStringMQR()`:
+1. Creates a `QRinput` (MQR or standard)
+2. Calls `Split_splitStringToQRinput()` for automatic mode optimization
+3. Calls `QRcode_encodeInput()`
+
+For MQR, `QRcode_encodeStringMQR()` tries each version from the minimum up to 4:
+```c
+for(i = version; i <= MQRSPEC_VERSION_MAX; i++) {
+ QRcode *code = QRcode_encodeStringReal(string, i, level, 1, hint, casesensitive);
+ if(code != NULL) return code;
+}
+```
+
+#### Structured Append
+
+`QRcode_encodeInputStructured()` encodes each `QRinput` in a `QRinput_Struct` and builds a `QRcode_List` linked list.
+
+`QRcode_encodeStringStructured()` and variants auto-split the input, insert structured append headers, encode each part, and return the linked list.
+
+---
+
+### `qrinput.h` / `qrinput.c` — Input Data Management
+
+This module manages the input data pipeline from raw user data to a padded byte stream ready for RS encoding.
+
+#### Internal Structures
+
+```c
+// A linked list entry for one data chunk
+struct _QRinput_List {
+ QRencodeMode mode; // encoding mode
+ int size; // data size in bytes
+ unsigned char *data; // data chunk
+ BitStream *bstream; // encoded bit stream (created during encoding)
+ QRinput_List *next; // next chunk
+};
+
+// The main input object
+struct _QRinput {
+ int version;
+ QRecLevel level;
+ QRinput_List *head; // first data chunk
+ QRinput_List *tail; // last data chunk
+ int mqr; // 1 if Micro QR mode
+ int fnc1; // FNC1 mode flag
+ unsigned char appid; // FNC1 application ID
+};
+
+// Structured append management
+struct _QRinput_Struct {
+ int size; // number of symbols
+ int parity; // parity byte
+ QRinput_InputList *head;
+ QRinput_InputList *tail;
+};
+```
+
+#### Mode Encoding Functions
+
+Each encoding mode has three functions in `qrinput.c`:
+
+1. **Check function** — Validates input data for the mode
+2. **Estimate function** — Estimates bit count without actually encoding
+3. **Encode function** — Produces the actual bit stream
+
+##### Numeric Mode (`QRinput_encodeModeNum`)
+
+Encodes digits in groups:
+- 3 digits → 10 bits (values 000–999)
+- 2 remaining digits → 7 bits
+- 1 remaining digit → 4 bits
+
+```c
+int QRinput_estimateBitsModeNum(int size)
+{
+ int w = size / 3;
+ int bits = w * 10;
+ switch(size - w * 3) {
+ case 1: bits += 4; break;
+ case 2: bits += 7; break;
+ }
+ return bits;
+}
+```
+
+##### Alphanumeric Mode (`QRinput_encodeModeAn`)
+
+Uses the 45-character lookup table `QRinput_anTable[128]`:
+- Pairs → 11 bits (value = c1 × 45 + c2)
+- Odd character → 6 bits
+
+The table maps: 0-9 → 0-9, A-Z → 10-35, space → 36, $ → 37, % → 38, * → 39, + → 40, - → 41, . → 42, / → 43, : → 44.
+
+The lookup macro:
+```c
+#define QRinput_lookAnTable(__c__) \
+ ((__c__ & 0x80)?-1:QRinput_anTable[(int)__c__])
+```
+
+##### 8-Bit Mode (`QRinput_encodeMode8`)
+
+Each byte → 8 bits, using `BitStream_appendBytes()`.
+
+##### Kanji Mode (`QRinput_encodeModeKanji`)
+
+Shift-JIS double-byte characters are compressed:
+1. If code ≤ 0x9FFC: subtract 0x8140
+2. If code > 0x9FFC: subtract 0xC140
+3. High byte × 0xC0 + low byte → 13-bit value
+
+Validation in `QRinput_checkModeKanji()`:
+```c
+if(val < 0x8140 || (val > 0x9ffc && val < 0xe040) || val > 0xebbf) {
+ return -1;
+}
+```
+
+##### ECI Mode (`QRinput_encodeModeECI`)
+
+ECI indicator encoding (per JIS X0510:2004, Table 4):
+- 0–127: 1 byte (8 bits)
+- 128–16383: 2 bytes (16 bits, prefix 0x8000)
+- 16384–999999: 3 bytes (24 bits, prefix 0xC0000)
+
+##### Structured Append Header
+
+20-bit header: 4 bits mode indicator + 4 bits symbol count + 4 bits symbol index + 8 bits parity.
+
+##### FNC1 Second Position
+
+4-bit mode indicator + 8-bit application ID.
+
+#### Bit Stream Construction Pipeline
+
+The conversion from input chunks to a byte stream follows this call chain:
+
+```
+QRinput_getByteStream()
+ └── QRinput_getBitStream()
+ ├── QRinput_convertData() [for standard QR]
+ │ ├── QRinput_estimateVersion()
+ │ │ └── QRinput_estimateBitStreamSize()
+ │ │ └── QRinput_estimateBitStreamSizeOfEntry() × N
+ │ └── QRinput_createBitStream()
+ │ └── QRinput_encodeBitStream() × N
+ │ └── QRinput_encodeMode{Num,An,8,Kanji,...}()
+ ├── QRinput_appendPaddingBit() [for standard QR]
+ └── QRinput_appendPaddingBitMQR() [for Micro QR]
+ └── BitStream_toByte()
+```
+
+`QRinput_convertData()` handles version auto-selection with a convergence loop:
+```c
+for(;;) {
+ BitStream_reset(bstream);
+ bits = QRinput_createBitStream(input, bstream);
+ ver = QRspec_getMinimumVersion((bits + 7) / 8, input->level);
+ if(ver > QRinput_getVersion(input)) {
+ QRinput_setVersion(input, ver);
+ } else {
+ break;
+ }
+}
+```
+
+`QRinput_encodeBitStream()` handles entry splitting when data exceeds `QRspec_maximumWords()`:
+```c
+if(words != 0 && entry->size > words) {
+ st1 = QRinput_List_newEntry(entry->mode, words, entry->data);
+ st2 = QRinput_List_newEntry(entry->mode, entry->size - words, &entry->data[words]);
+ QRinput_encodeBitStream(st1, bstream, version, mqr);
+ QRinput_encodeBitStream(st2, bstream, version, mqr);
+}
+```
+
+Padding appends:
+1. Terminator (up to 4 zero bits)
+2. Byte-alignment zeros
+3. Alternating pad codewords: `0xEC`, `0x11`, `0xEC`, `0x11`, ...
+
+---
+
+### `bitstream.h` / `bitstream.c` — Binary Sequence Class
+
+A dynamic bit array class used throughout the encoding pipeline.
+
+```c
+typedef struct {
+ size_t length; // current number of bits
+ size_t datasize; // allocated buffer size
+ unsigned char *data; // one byte per bit (0 or 1)
+} BitStream;
+```
+
+**Critical design choice:** Each bit occupies one byte in memory. This simplifies bit manipulation at the cost of memory. The buffer starts at `DEFAULT_BUFSIZE` (128) and doubles on demand via `BitStream_expand()`.
+
+Key operations:
+
+| Function | Description |
+|---|---|
+| `BitStream_new()` | Allocate with 128-byte initial buffer |
+| `BitStream_append(dst, src)` | Append another BitStream |
+| `BitStream_appendNum(bs, bits, num)` | Append `bits` bits of integer `num` |
+| `BitStream_appendBytes(bs, size, data)` | Append `size` bytes (8 bits each) |
+| `BitStream_toByte(bs)` | Pack bit array into byte array |
+| `BitStream_free(bs)` | Free all memory |
+
+Macros:
+```c
+#define BitStream_size(__bstream__) (__bstream__->length)
+#define BitStream_reset(__bstream__) (__bstream__->length = 0)
+```
+
+`BitStream_toByte()` packs the 1-byte-per-bit representation into a proper byte array:
+```c
+for(i = 0; i < bytes; i++) {
+ v = 0;
+ for(j = 0; j < 8; j++) {
+ v = (unsigned char)(v << 1);
+ v |= *p;
+ p++;
+ }
+ data[i] = v;
+}
+```
+
+---
+
+### `qrspec.h` / `qrspec.c` — QR Code Specification Tables
+
+Contains all specification-derived data tables and frame construction for full QR Codes.
+
+#### Capacity Table
+
+```c
+typedef struct {
+ int width;
+ int words;
+ int remainder;
+ int ec[4];
+} QRspec_Capacity;
+
+static const QRspec_Capacity qrspecCapacity[QRSPEC_VERSION_MAX + 1];
+```
+
+Sourced from Table 1 (p.13) and Tables 12–16 (pp.30–36) of JIS X0510:2004.
+
+#### Length Indicator Table
+
+```c
+static const int lengthTableBits[4][3] = {
+ {10, 12, 14}, // Numeric
+ { 9, 11, 13}, // Alphanumeric
+ { 8, 16, 16}, // 8-bit
+ { 8, 10, 12} // Kanji
+};
+```
+
+Three version ranges: 1–9, 10–26, 27–40.
+
+#### ECC Block Specification Table
+
+```c
+static const int eccTable[QRSPEC_VERSION_MAX+1][4][2];
+```
+
+Each entry `eccTable[version][level]` gives `{type1_blocks, type2_blocks}`. Combined with `QRspec_getEccSpec()` to produce the 5-element spec array:
+
+```c
+void QRspec_getEccSpec(int version, QRecLevel level, int spec[5])
+```
+
+Where `spec` = `{num_type1_blocks, type1_data_codes, ecc_codes_per_block, num_type2_blocks, type2_data_codes}`.
+
+Accessor macros:
+```c
+#define QRspec_rsBlockNum(__spec__) (__spec__[0] + __spec__[3])
+#define QRspec_rsBlockNum1(__spec__) (__spec__[0])
+#define QRspec_rsDataCodes1(__spec__) (__spec__[1])
+#define QRspec_rsEccCodes1(__spec__) (__spec__[2])
+#define QRspec_rsBlockNum2(__spec__) (__spec__[3])
+#define QRspec_rsDataCodes2(__spec__) (__spec__[4])
+#define QRspec_rsEccCodes2(__spec__) (__spec__[2])
+```
+
+Note: type-1 and type-2 blocks share the same ECC code count.
+
+#### Alignment Pattern Table
+
+```c
+static const int alignmentPattern[QRSPEC_VERSION_MAX+1][2];
+```
+
+From Table 1 in Appendix E (p.71). Stores the second and third alignment pattern positions; remaining positions are interpolated.
+
+`QRspec_putAlignmentPattern()` places all alignment markers, computing positions from the stored two values and the inter-pattern distance.
+
+#### Version Information Pattern
+
+```c
+static const unsigned int versionPattern[QRSPEC_VERSION_MAX - 6];
+```
+
+BCH-encoded version information for versions 7–40. From Appendix D (p.68).
+
+#### Format Information
+
+```c
+static const unsigned int formatInfo[4][8];
+```
+
+BCH-encoded format information indexed by `[level][mask]`.
+
+#### Frame Creation (`QRspec_createFrame`)
+
+Builds the initial symbol frame for a given version:
+
+1. Allocates `width × width` bytes, zeroed
+2. Places **3 finder patterns** (7×7, `0xC1`/`0xC0` pattern) at corners
+3. Places **separators** (1-module-wide white border around finders, `0xC0`)
+4. Masks **format information area** (9+8 modules around finder, `0x84`)
+5. Places **timing patterns** (alternating `0x91`/`0x90` along row 6 and column 6)
+6. Places **alignment patterns** (5×5, `0xA1`/`0xA0` pattern)
+7. For versions ≥ 7: places **version information** (6×3 blocks, `0x88`/`0x89`)
+8. Sets the **dark module** at position `(8, width-8)` to `0x81`
+
+All function pattern modules have bit 7 set (`0x80`), so the FrameFiller skips them during data placement.
+
+---
+
+### `mqrspec.h` / `mqrspec.c` — Micro QR Specification Tables
+
+Parallel to `qrspec.c` but for Micro QR (versions M1–M4).
+
+```c
+typedef struct {
+ int width;
+ int ec[4];
+} MQRspec_Capacity;
+
+static const MQRspec_Capacity mqrspecCapacity[MQRSPEC_VERSION_MAX + 1] = {
+ { 0, {0, 0, 0, 0}},
+ { 11, {2, 0, 0, 0}}, // M1
+ { 13, {5, 6, 0, 0}}, // M2
+ { 15, {6, 8, 0, 0}}, // M3
+ { 17, {8, 10, 14, 0}} // M4
+};
+```
+
+Notable difference: `MQRspec_getDataLengthBit()` returns data capacity in **bits** (not bytes), because Micro QR symbols can have non-byte-aligned data areas:
+```c
+int MQRspec_getDataLengthBit(int version, QRecLevel level)
+{
+ int w = mqrspecCapacity[version].width - 1;
+ int ecc = mqrspecCapacity[version].ec[level];
+ if(ecc == 0) return 0;
+ return w * w - 64 - ecc * 8;
+}
+```
+
+The data length in bytes rounds up: `(bits + 4) / 8`.
+
+#### Micro QR Frame Creation
+
+`MQRspec_createFrame()` is simpler than the full QR version:
+- Only **1 finder pattern** (top-left)
+- No alignment patterns
+- No version information
+- Timing patterns run along one row and one column from the finder
+
+#### Format Info Type Table
+
+```c
+static const int typeTable[MQRSPEC_VERSION_MAX + 1][3] = {
+ {-1, -1, -1},
+ { 0, -1, -1}, // M1: only error detection
+ { 1, 2, -1}, // M2: L, M
+ { 3, 4, -1}, // M3: L, M
+ { 5, 6, 7} // M4: L, M, Q
+};
+```
+
+Maps `(version, level)` → format info table index. Returns -1 for unsupported combinations.
+
+---
+
+### `rsecc.h` / `rsecc.c` — Reed-Solomon Error Correction
+
+Contains the GF(2^8) arithmetic and RS encoding for QR Codes.
+
+Single public function:
+```c
+int RSECC_encode(size_t data_length, size_t ecc_length,
+ const unsigned char *data, unsigned char *ecc);
+```
+
+Internal state:
+- `alpha[256]` — Power-to-element mapping (logarithm table)
+- `aindex[256]` — Element-to-power mapping (antilogarithm table)
+- `generator[29][31]` — Cached generator polynomials for ECC lengths 2–30
+- `generatorInitialized[29]` — Whether each generator has been computed
+
+Lazy initialization via `RSECC_init()` and `generator_init()`, protected by `RSECC_mutex` when pthreads are available.
+
+The primitive polynomial is `0x11d` = $x^8 + x^4 + x^3 + x^2 + 1$, per JIS X0510:2004 p.37.
+
+See [reed-solomon.md](reed-solomon.md) for detailed implementation analysis.
+
+---
+
+### `split.h` / `split.c` — Input String Splitter
+
+Automatically parses an input string and splits it into optimal encoding mode segments.
+
+Entry point:
+```c
+int Split_splitStringToQRinput(const char *string, QRinput *input,
+ QRencodeMode hint, int casesensitive);
+```
+
+Key functions:
+- `Split_identifyMode()` — Classifies a character: digit → `QR_MODE_NUM`, AN table match → `QR_MODE_AN`, Shift-JIS → `QR_MODE_KANJI`, else → `QR_MODE_8`
+- `Split_eatNum()` — Consumes a run of numeric characters, considers switching to AN or 8-bit if more efficient
+- `Split_eatAn()` — Consumes alphanumeric characters, embedded digit runs tested for mode-switch optimization
+- `Split_eatKanji()` — Consumes pairs of Kanji bytes
+- `Split_eat8()` — Consumes 8-bit characters, tests for switching to NUM or AN when sub-runs are encountered
+
+The optimization logic compares bit costs: each `Split_eat*` function calculates `dif` — the bit savings of staying in the current mode vs. switching. Example from `Split_eatNum()`:
+
+```c
+dif = QRinput_estimateBitsModeNum(run) + 4 + ln
+ + QRinput_estimateBitsMode8(1)
+ - QRinput_estimateBitsMode8(run + 1);
+if(dif > 0) {
+ return Split_eat8(string, input, hint);
+}
+```
+
+Case conversion (when `casesensitive=0`) is handled by `dupAndToUpper()`, which converts lowercase to uppercase while preserving Kanji double-byte sequences.
+
+---
+
+### `mask.h` / `mask.c` — QR Code Masking
+
+Implements the 8 mask patterns for full QR Code and the penalty evaluation algorithm.
+
+#### Mask Patterns
+
+All 8 patterns are defined via the `MASKMAKER` macro:
+
+```c
+#define MASKMAKER(__exp__) \
+ int x, y;\
+ int b = 0;\
+ for(y = 0; y < width; y++) {\
+ for(x = 0; x < width; x++) {\
+ if(*s & 0x80) {\
+ *d = *s;\
+ } else {\
+ *d = *s ^ ((__exp__) == 0);\
+ }\
+ b += (int)(*d & 1);\
+ s++; d++;\
+ }\
+ }\
+ return b;
+```
+
+The 8 mask functions:
+
+| Pattern | Function | Condition (dark if true) |
+|---|---|---|
+| 0 | `Mask_mask0` | `(x+y) % 2 == 0` |
+| 1 | `Mask_mask1` | `y % 2 == 0` |
+| 2 | `Mask_mask2` | `x % 3 == 0` |
+| 3 | `Mask_mask3` | `(x+y) % 3 == 0` |
+| 4 | `Mask_mask4` | `((y/2)+(x/3)) % 2 == 0` |
+| 5 | `Mask_mask5` | `(x*y)%2 + (x*y)%3 == 0` |
+| 6 | `Mask_mask6` | `((x*y)%2 + (x*y)%3) % 2 == 0` |
+| 7 | `Mask_mask7` | `((x*y)%3 + (x+y)%2) % 2 == 0` |
+
+Function pointer array: `static MaskMaker *maskMakers[8]`
+
+#### Penalty Evaluation
+
+`Mask_mask()` tries all 8 masks and selects the one with the lowest penalty:
+
+```c
+for(i = 0; i < maskNum; i++) {
+ penalty = 0;
+ blacks = maskMakers[i](width, frame, mask);
+ blacks += Mask_writeFormatInformation(width, mask, i, level);
+ bratio = (200 * blacks + w2) / w2 / 2;
+ penalty = (abs(bratio - 50) / 5) * N4;
+ penalty += Mask_evaluateSymbol(width, mask);
+ if(penalty < minPenalty) {
+ minPenalty = penalty;
+ memcpy(bestMask, mask, w2);
+ }
+}
+```
+
+Penalty constants from JIS X0510:2004, Section 8.8.2:
+```c
+#define N1 (3) // Run penalty base
+#define N2 (3) // 2×2 block penalty
+#define N3 (40) // Finder-like pattern penalty
+#define N4 (10) // Proportion penalty per 5% deviation
+```
+
+See [masking-algorithms.md](masking-algorithms.md) for detailed penalty calculation analysis.
+
+---
+
+### `mmask.h` / `mmask.c` — Micro QR Masking
+
+Implements 4 mask patterns for Micro QR Code with a different evaluation algorithm.
+
+The 4 patterns:
+
+| Pattern | Condition |
+|---|---|
+| 0 | `y % 2 == 0` |
+| 1 | `((y/2)+(x/3)) % 2 == 0` |
+| 2 | `((x*y)%2 + (x*y)%3) % 2 == 0` |
+| 3 | `((x+y)%2 + (x*y)%3) % 2 == 0` |
+
+The Micro QR evaluation in `MMask_evaluateSymbol()` uses a completely different approach from full QR:
+```c
+STATIC_IN_RELEASE int MMask_evaluateSymbol(int width, unsigned char *frame)
+{
+ int x, y;
+ unsigned char *p;
+ int sum1 = 0, sum2 = 0;
+
+ p = frame + width * (width - 1);
+ for(x = 1; x < width; x++) {
+ sum1 += (p[x] & 1);
+ }
+
+ p = frame + width * 2 - 1;
+ for(y = 1; y < width; y++) {
+ sum2 += (*p & 1);
+ p += width;
+ }
+
+ return (sum1 <= sum2)?(sum1 * 16 + sum2):(sum2 * 16 + sum1);
+}
+```
+
+Instead of penalties, it counts dark modules on the bottom row and right column, then selects the mask with the **highest** score (not lowest).
+
+---
+
+## Data Flow: Complete Encoding Pipeline
+
+Here is the detailed function call chain for encoding a string:
+
+```
+QRcode_encodeString("Hello", 0, QR_ECLEVEL_M, QR_MODE_8, 1)
+ │
+ └── QRcode_encodeStringReal("Hello", 0, QR_ECLEVEL_M, 0, QR_MODE_8, 1)
+ │
+ ├── QRinput_new2(0, QR_ECLEVEL_M)
+ │ └── allocate QRinput {version=0, level=M, mqr=0}
+ │
+ ├── Split_splitStringToQRinput("Hello", input, QR_MODE_8, 1)
+ │ └── Split_splitString("Hello", input, QR_MODE_8)
+ │ ├── Split_identifyMode("H") → QR_MODE_AN
+ │ └── Split_eatAn("Hello", ...)
+ │ └── QRinput_append(input, QR_MODE_AN, 5, "Hello")
+ │
+ └── QRcode_encodeInput(input)
+ │
+ └── QRcode_encodeMask(input, -1)
+ │
+ ├── QRraw_new(input)
+ │ ├── QRinput_getByteStream(input)
+ │ │ ├── QRinput_convertData() — version auto-select
+ │ │ ├── QRinput_createBitStream() — encode each chunk
+ │ │ ├── QRinput_appendPaddingBit() — terminator + padding
+ │ │ └── BitStream_toByte() — pack bits to bytes
+ │ │
+ │ ├── QRspec_getEccSpec(version, level, spec)
+ │ └── RSblock_init() → RSECC_encode() per block
+ │
+ ├── QRspec_newFrame(version)
+ │ └── QRspec_createFrame() — finder, timing, alignment
+ │
+ ├── FrameFiller placement loop
+ │ └── FrameFiller_next() × (dataLength + eccLength) × 8
+ │
+ └── Mask_mask(width, frame, level)
+ ├── maskMakers[0..7]() — apply each pattern
+ ├── Mask_writeFormatInformation() — embed format info
+ ├── Mask_evaluateSymbol() — penalty calculation
+ │ ├── Mask_calcN2() — 2×2 blocks
+ │ ├── Mask_calcRunLengthH() — horizontal runs
+ │ ├── Mask_calcRunLengthV() — vertical runs
+ │ └── Mask_calcN1N3() — run + finder penalties
+ └── select minimum penalty mask
+```
+
+---
+
+## STATIC_IN_RELEASE Pattern
+
+The codebase uses the `STATIC_IN_RELEASE` macro to control visibility:
+
+```c
+// When WITH_TESTS is defined:
+#define STATIC_IN_RELEASE
+
+// When WITH_TESTS is not defined:
+#define STATIC_IN_RELEASE static
+```
+
+Functions marked `STATIC_IN_RELEASE` (like `QRraw_new`, `QRcode_encodeMask`, `Mask_evaluateSymbol`) are `static` in release builds and externally visible in test builds. Similarly, `#ifdef WITH_TESTS` blocks expose additional test-only functions.
+
+---
+
+## Memory Management
+
+The library follows a consistent pattern:
+- All allocation uses `malloc()`/`calloc()`/`realloc()`
+- Every `_new()` function has a corresponding `_free()` function
+- On allocation failure, functions return `NULL` and set `errno`
+- `QRcode_free()` frees both the `QRcode` struct and its internal `data` array
+- `QRinput_free()` walks the linked list and frees each entry
+- `BitStream_free()` frees the data buffer and the struct
+
+No memory pools or custom allocators are used.
+
+---
+
+## Error Handling
+
+Errors are reported via return values and `errno`:
+
+| Error | Meaning |
+|---|---|
+| `EINVAL` | Invalid argument (bad version, level, mode, or data) |
+| `ENOMEM` | Memory allocation failure |
+| `ERANGE` | Input data too large for any supported version |
+
+Functions returning pointers return `NULL` on error. Functions returning `int` return `-1` on error and `0` on success. The only exception is `QRraw_getCode()` which returns `0` when the code stream is exhausted.
diff --git a/docs/handbook/genqrcode/building.md b/docs/handbook/genqrcode/building.md
new file mode 100644
index 0000000000..8f504d654c
--- /dev/null
+++ b/docs/handbook/genqrcode/building.md
@@ -0,0 +1,570 @@
+# genqrcode / libqrencode — Building
+
+## Overview
+
+genqrcode (libqrencode 4.1.1) supports two build systems:
+
+1. **CMake** — The recommended modern approach
+2. **GNU Autotools** — The traditional `./configure && make` workflow
+
+Both produce the same outputs: a static and/or shared library (`libqrencode`), the optional command-line tool (`qrencode`), and associated files (pkg-config, man page).
+
+---
+
+## Dependencies
+
+### Required
+
+None. The core library has **zero external dependencies**.
+
+### Optional
+
+| Dependency | Purpose | Detection Method |
+|---|---|---|
+| **pthreads** | Thread-safe RS encoding | CMake: `find_package(Threads)` / Autotools: `AC_CHECK_LIB([pthread])` |
+| **libpng** | PNG output for CLI tool | CMake: `find_package(PNG)` / Autotools: `PKG_CHECK_MODULES(png, "libpng")` |
+| **libiconv** | Test suite character conversion decoder | CMake: `find_package(Iconv)` / Autotools: `AM_ICONV_LINK` |
+| **SDL 2.0** | `view_qrcode` test viewer | Autotools only: `PKG_CHECK_MODULES(SDL, [sdl2 >= 2.0.0])` |
+
+### Build Tools Required
+
+For **CMake**:
+- CMake ≥ 3.1.0
+- A C compiler (GCC, Clang, MSVC)
+
+For **Autotools** (when building from a Git checkout):
+- autoconf
+- automake
+- autotools-dev
+- libtool
+- pkg-config
+- libpng-dev (for CLI tool)
+
+> **Note:** If you downloaded a release tarball that already includes the `configure` script, you do not need autoconf/automake/libtool.
+
+---
+
+## CMake Build
+
+### Source Files Compiled
+
+The CMakeLists.txt defines the library sources explicitly:
+
+```cmake
+set(QRENCODE_SRCS qrencode.c
+ qrinput.c
+ bitstream.c
+ qrspec.c
+ rsecc.c
+ split.c
+ mask.c
+ mqrspec.c
+ mmask.c)
+
+set(QRENCODE_HDRS qrencode_inner.h
+ qrinput.h
+ bitstream.h
+ qrspec.h
+ rsecc.h
+ split.h
+ mask.h
+ mqrspec.h
+ mmask.h)
+```
+
+### Configuration Options
+
+| Option | Default | Description |
+|---|---|---|
+| `WITH_TOOLS` | `YES` | Build the `qrencode` CLI tool |
+| `WITH_TESTS` | `NO` | Build test programs |
+| `WITHOUT_PNG` | `NO` | Disable PNG support (even if libpng is found) |
+| `BUILD_SHARED_LIBS` | `NO` | Build shared library instead of static |
+| `GPROF` | `OFF` | Add `-pg` for gprof profiling |
+| `COVERAGE` | `OFF` | Add `--coverage` for gcov |
+| `ASAN` | `OFF` | Enable AddressSanitizer |
+
+### Basic Build
+
+```bash
+cd genqrcode
+mkdir build && cd build
+cmake ..
+make
+```
+
+### Build with All Options
+
+```bash
+cmake .. \
+ -DWITH_TOOLS=YES \
+ -DWITH_TESTS=YES \
+ -DBUILD_SHARED_LIBS=YES \
+ -DCMAKE_INSTALL_PREFIX=/usr/local \
+ -DCMAKE_BUILD_TYPE=Release
+make -j$(nproc)
+```
+
+### Shared Library Build
+
+```bash
+cmake .. -DBUILD_SHARED_LIBS=YES
+make
+```
+
+On MSVC, this automatically sets `CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS=ON`. The shared library gets proper versioning:
+
+```cmake
+set_target_properties(qrencode PROPERTIES
+ VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}
+ SOVERSION ${PROJECT_VERSION_MAJOR})
+```
+
+This produces `libqrencode.so.4.1.1` with symlinks `libqrencode.so.4` and `libqrencode.so`.
+
+### Building Without PNG
+
+If you don't need the CLI tool to output PNG files:
+
+```bash
+cmake .. -DWITHOUT_PNG=YES
+```
+
+The CLI tool will still be built (if `WITH_TOOLS=YES`) but will print an error if PNG output is requested at runtime.
+
+### Building Tests
+
+```bash
+cmake .. -DWITH_TESTS=YES
+make
+ctest
+```
+
+When `WITH_TESTS=YES`:
+- The `STATIC_IN_RELEASE` macro is defined as empty (not `static`), exposing internal functions
+- The `WITH_TESTS` macro is defined, enabling test-only code paths
+- The `tests/` subdirectory is included
+
+The test CMakeLists.txt creates these test executables:
+
+| Test | Required Dependencies |
+|---|---|
+| `test_bitstream` | None |
+| `test_estimatebit` | None |
+| `test_split` | None |
+| `test_qrinput` | iconv |
+| `test_qrspec` | iconv |
+| `test_mqrspec` | iconv |
+| `test_qrencode` | iconv |
+| `test_split_urls` | iconv |
+| `test_monkey` | iconv |
+| `test_mask` | iconv + VLA support |
+| `test_mmask` | iconv + VLA support |
+| `test_rs` | iconv + VLA support |
+
+### Sanitizer and Profiling Builds
+
+```bash
+# AddressSanitizer
+cmake .. -DASAN=ON
+make
+
+# gprof profiling
+cmake .. -DGPROF=ON
+make
+
+# Code coverage
+cmake .. -DCOVERAGE=ON
+make
+# ... run tests ...
+gcov *.c
+```
+
+The ASAN option adds `-fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls`.
+
+### Cross-Compilation (MinGW)
+
+The project includes `toolchain-mingw32.cmake` for cross-compiling to Windows:
+
+```bash
+cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake
+make
+```
+
+### Installation
+
+```bash
+sudo make install
+```
+
+Installed files:
+
+| File | Destination |
+|---|---|
+| `libqrencode.a` / `.so` | `${CMAKE_INSTALL_LIBDIR}` |
+| `qrencode.h` | `${CMAKE_INSTALL_INCLUDEDIR}` |
+| `qrencode` (CLI) | `${CMAKE_INSTALL_BINDIR}` |
+| `libqrencode.pc` | `${CMAKE_INSTALL_LIBDIR}/pkgconfig` |
+| `qrencode.1` | `${CMAKE_INSTALL_MANDIR}/man1` |
+
+The pkg-config file is generated from `libqrencode.pc.in`:
+
+```
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libqrencode
+Description: A QR Code encoding library
+Version: @VERSION@
+Libs: -L${libdir} -lqrencode @LIBPTHREAD@
+Cflags: -I${includedir}
+```
+
+### CMake System Checks
+
+The CMakeLists.txt performs these checks at configure time:
+
+```cmake
+check_include_file(dlfcn.h HAVE_DLFCN_H)
+check_include_file(inttypes.h HAVE_INTTYPES_H)
+check_include_file(memory.h HAVE_MEMORY_H)
+check_include_file(stdint.h HAVE_STDINT_H)
+check_include_file(stdlib.h HAVE_STDLIB_H)
+check_include_file(strings.h HAVE_STRINGS_H)
+check_include_file(string.h HAVE_STRING_H)
+check_include_file(getopt.h HAVE_GETOPT_H)
+check_include_file(sys/time.h HAVE_SYS_TIME_H)
+check_include_file(time.h HAVE_TIME_H)
+check_include_file(pthread.h HAVE_PTHREAD_H)
+
+check_function_exists(strdup HAVE_STRDUP)
+```
+
+Preprocessor defines always set:
+```cmake
+add_definitions(-DMAJOR_VERSION=${PROJECT_VERSION_MAJOR})
+add_definitions(-DMINOR_VERSION=${PROJECT_VERSION_MINOR})
+add_definitions(-DMICRO_VERSION=${PROJECT_VERSION_PATCH})
+add_definitions(-DVERSION="${PROJECT_VERSION}")
+add_definitions(-DHAVE_SDL=0)
+```
+
+### MSVC-Specific Handling
+
+On MSVC, additional definitions are added:
+
+```cmake
+add_definitions(-Dstrcasecmp=_stricmp)
+add_definitions(-Dstrncasecmp=_strnicmp)
+add_definitions(-D_CRT_SECURE_NO_WARNINGS)
+add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE)
+```
+
+When building the CLI tool on MSVC, `getopt.h` and `getopt.lib` must be found separately, as MSVC does not provide them:
+
+```cmake
+find_path(GETOPT_INCLUDE_DIR getopt.h PATH_SUFFIXES include)
+find_library(GETOPT_LIBRARIES getopt PATH_SUFFIXES lib)
+```
+
+---
+
+## Autotools Build
+
+### Generating Configure Script
+
+If building from Git (no `configure` script present):
+
+```bash
+./autogen.sh
+```
+
+This runs `autoreconf -i` to generate `configure`, `Makefile.in`, and related files. Required packages on Ubuntu/Debian:
+
+```bash
+sudo apt install autoconf automake autotools-dev libtool pkg-config
+```
+
+### Configuration
+
+```bash
+./configure [OPTIONS]
+```
+
+#### Configure Options
+
+| Option | Default | Description |
+|---|---|---|
+| `--enable-thread-safety` | `yes` | Enable pthread-based thread safety |
+| `--without-png` | (with png) | Disable libpng support |
+| `--with-tools` | `yes` | Build CLI tool |
+| `--without-tools` | — | Skip CLI tool |
+| `--with-tests` | `no` | Build test suite |
+| `--enable-gprof` | `no` | Enable gprof profiling (`-g -pg`) |
+| `--enable-gcov` | `no` | Enable gcov coverage (`--coverage`) |
+| `--enable-asan` | `no` | Enable AddressSanitizer |
+| `--prefix=DIR` | `/usr/local` | Installation prefix |
+
+### Basic Build
+
+```bash
+./configure
+make
+sudo make install
+sudo ldconfig
+```
+
+### Disabling Tools
+
+```bash
+./configure --without-tools
+make
+```
+
+### Building Tests
+
+```bash
+./configure --with-tests
+make
+make check
+```
+
+When `--with-tests` is given, `configure.ac` also:
+- Checks for SDL 2.0 (for the `view_qrcode` viewer)
+- Checks for iconv (for the test decoder)
+- Defines `STATIC_IN_RELEASE` as empty
+- Defines `WITH_TESTS=1`
+
+### Thread Safety
+
+Thread safety is enabled by default:
+
+```bash
+# Explicitly enable (default)
+./configure --enable-thread-safety
+
+# Disable
+./configure --disable-thread-safety
+```
+
+When enabled, `configure.ac` checks for `pthread_mutex_init` in libpthread. If found:
+- `HAVE_LIBPTHREAD=1` is defined
+- `-pthread` is added to `CFLAGS`
+- `-lpthread` is added to the linker flags (via `LIBPTHREAD` substitution)
+
+### Library Versioning (Autotools)
+
+The Automake configuration uses libtool versioning:
+
+```makefile
+libqrencode_la_LDFLAGS = -version-number $(MAJOR_VERSION):$(MINOR_VERSION):$(MICRO_VERSION)
+```
+
+For version 4.1.1, this produces `libqrencode.so.4.1.1`.
+
+### MinGW Cross-Compilation
+
+The configure script detects MinGW targets:
+
+```m4
+case "${target}" in
+*-*-mingw*)
+ mingw=yes
+esac
+```
+
+On MinGW, additional linker flags are applied:
+
+```makefile
+libqrencode_la_LDFLAGS += -no-undefined -avoid-version -Wl,--nxcompat -Wl,--dynamicbase
+```
+
+### Full Configure Example
+
+```bash
+./configure \
+ --prefix=/opt/qrencode \
+ --enable-thread-safety \
+ --with-tools \
+ --with-tests \
+ --enable-asan
+make -j$(nproc)
+make check
+sudo make install
+```
+
+### Source Distribution
+
+To create a source tarball:
+
+```bash
+make dist
+```
+
+The `EXTRA_DIST` variable ensures additional files are included:
+
+```makefile
+EXTRA_DIST = libqrencode.pc.in autogen.sh configure.ac acinclude.m4 \
+ Makefile.am tests/Makefile.am \
+ qrencode.1.in Doxyfile \
+ CMakeLists.txt cmake/FindIconv.cmake
+```
+
+---
+
+## vcpkg
+
+libqrencode is available through Microsoft's vcpkg package manager:
+
+```bash
+git clone https://github.com/Microsoft/vcpkg.git
+cd vcpkg
+./bootstrap-vcpkg.sh
+./vcpkg integrate install
+./vcpkg install libqrencode
+```
+
+---
+
+## Using the Installed Library
+
+### pkg-config
+
+```bash
+# Compile
+gcc -c myapp.c $(pkg-config --cflags libqrencode)
+
+# Link
+gcc -o myapp myapp.o $(pkg-config --libs libqrencode)
+```
+
+### CMake (as dependency)
+
+In your application's `CMakeLists.txt`:
+
+```cmake
+find_package(PkgConfig REQUIRED)
+pkg_check_modules(QRENCODE REQUIRED libqrencode)
+
+target_include_directories(myapp PRIVATE ${QRENCODE_INCLUDE_DIRS})
+target_link_libraries(myapp ${QRENCODE_LIBRARIES})
+```
+
+Or directly:
+
+```cmake
+find_library(QRENCODE_LIB qrencode)
+find_path(QRENCODE_INCLUDE qrencode.h)
+
+target_include_directories(myapp PRIVATE ${QRENCODE_INCLUDE})
+target_link_libraries(myapp ${QRENCODE_LIB})
+```
+
+### Direct Compilation
+
+For simple projects, you can compile the library sources directly into your project:
+
+```bash
+gcc -c qrencode.c qrinput.c bitstream.c qrspec.c rsecc.c split.c mask.c mqrspec.c mmask.c
+ar rcs libqrencode.a qrencode.o qrinput.o bitstream.o qrspec.o rsecc.o split.o mask.o mqrspec.o mmask.o
+gcc -o myapp myapp.c -L. -lqrencode
+```
+
+You must define the version macros:
+```bash
+gcc -DMAJOR_VERSION=4 -DMINOR_VERSION=1 -DMICRO_VERSION=1 \
+ -DVERSION=\"4.1.1\" -DHAVE_STRDUP=1 \
+ -DSTATIC_IN_RELEASE=static \
+ -c qrencode.c qrinput.c bitstream.c qrspec.c rsecc.c split.c mask.c mqrspec.c mmask.c
+```
+
+---
+
+## Build Output Summary
+
+| Build Target | File | Description |
+|---|---|---|
+| Library (static) | `libqrencode.a` | Static library |
+| Library (shared) | `libqrencode.so.4.1.1` | Shared library |
+| CLI tool | `qrencode` | Command-line encoder (built from `qrenc.c`) |
+| pkg-config | `libqrencode.pc` | Generated from `libqrencode.pc.in` |
+| Man page | `qrencode.1` | Generated from `qrencode.1.in` |
+| Public header | `qrencode.h` | Only public header installed |
+
+---
+
+## Configuration Summary Output
+
+The CMake build prints a detailed configuration summary:
+
+```
+------------------------------------------------------------
+[QRencode] Configuration summary.
+------------------------------------------------------------
+ System configuration:
+ .. Processor type .............. = x86_64
+ .. CMake version ............... = 3.x.x
+ Dependencies:
+ .. Thread library .............. = -lpthread
+ .. Iconv ....................... = TRUE
+ .. PNG ......................... = TRUE
+ Project configuration:
+ .. Build test programs ......... = YES
+ .. Build utility tools ......... = YES
+ .. Disable PNG support ......... = NO
+ .. Installation prefix ......... = /usr/local
+------------------------------------------------------------
+```
+
+The Autotools build prints compiler flags:
+
+```
+Options used to compile and link:
+ CC = gcc
+ CFLAGS = -Wall -pthread
+ CXX = g++
+ LDFLAGS =
+```
+
+---
+
+## Generating API Documentation
+
+The project includes a `Doxyfile` for generating API documentation with Doxygen:
+
+```bash
+doxygen Doxyfile
+```
+
+This generates HTML documentation from the `qrencode.h` header comments. The documentation is also available online at:
+https://fukuchi.org/works/qrencode/manual/index.html
+
+---
+
+## Troubleshooting
+
+### "configure: error: cannot find install-sh, install.sh, or shtool"
+
+Run `./autogen.sh` first to generate the Autotools infrastructure.
+
+### "PNG output is disabled at compile time"
+
+The CLI tool was built without libpng. Install libpng-dev and rebuild, or use `-DWITHOUT_PNG=NO` with CMake.
+
+### Test failures with missing iconv
+
+Many tests require iconv for the decoder library. Install libiconv or your system's iconv implementation. On Ubuntu:
+
+```bash
+sudo apt install libc6-dev # glibc includes iconv
+```
+
+### "undefined reference to `pthread_mutex_init`"
+
+Add `-lpthread` to your linker flags, or rebuild with `--disable-thread-safety` if you don't need thread safety.
+
+### CMake can't find getopt.h (MSVC)
+
+On Windows with MSVC, getopt.h is not available by default. Install a compatible getopt implementation, or disable tool building with `-DWITH_TOOLS=NO`.
diff --git a/docs/handbook/genqrcode/cli-usage.md b/docs/handbook/genqrcode/cli-usage.md
new file mode 100644
index 0000000000..3de3e83678
--- /dev/null
+++ b/docs/handbook/genqrcode/cli-usage.md
@@ -0,0 +1,382 @@
+# genqrcode / libqrencode — CLI Usage (`qrencode`)
+
+## Overview
+
+`qrencode` is the command-line tool for encoding data into QR Code symbols. It is built from `qrenc.c` when `WITH_TOOLS` is enabled (default: ON). The tool supports 14 output formats and all library features including Micro QR, structured append, and FNC1.
+
+---
+
+## Basic Usage
+
+```bash
+# Encode text to PNG
+qrencode -o output.png "Hello, World!"
+
+# Read from stdin
+echo "Hello" | qrencode -o output.png
+
+# Encode to terminal (UTF-8)
+qrencode -t UTF8 "Hello"
+
+# Encode to terminal (ANSI colors)
+qrencode -t ANSI "Hello"
+```
+
+---
+
+## Command-Line Options
+
+### Input Options
+
+| Flag | Long | Description |
+|---|---|---|
+| `-i FILE` | `--input=FILE` | Input file (default: stdin, `-` for stdin) |
+| (positional) | | Input string (alternative to `-i` or stdin) |
+
+### Output Options
+
+| Flag | Long | Description |
+|---|---|---|
+| `-o FILE` | `--output=FILE` | Output file (default: stdout) |
+| `-t TYPE` | `--type=TYPE` | Output format (see formats below) |
+| `-s SIZE` | `--size=SIZE` | Module size in pixels (PNG/EPS/SVG/XPM) |
+| `-m MARGIN` | `--margin=MARGIN` | Margin width in modules (default: 4 for QR, 2 for MQR) |
+| `-d DPI` | `--dpi=DPI` | DPI for EPS output |
+| `--rle` | | Run-length encoding for SVG |
+| `--svg-path` | | Embed path in SVG |
+| `--inline` | | Inline SVG (no XML header) |
+
+### Encoding Options
+
+| Flag | Long | Description |
+|---|---|---|
+| `-v VERSION` | `--symversion=VERSION` | Symbol version (0 = auto, 1–40 for QR, 1–4 for MQR) |
+| `-l LEVEL` | `--level=LEVEL` | EC level: L, M, Q, H |
+| `-8` | `--8bit` | 8-bit mode (no mode optimization) |
+| `-k` | `--kanji` | Kanji mode (assume Shift-JIS input) |
+| `-c` | `--casesensitive` | Case-sensitive encoding |
+| `-S` | `--structured` | Structured append mode |
+| `-M` | `--micro` | Micro QR Code |
+| `--strict-version` | | Fail if data doesn't fit in specified version |
+
+### Color Options
+
+| Flag | Long | Description |
+|---|---|---|
+| `--foreground=RRGGBB[AA]` | | Foreground color (hex, default: 000000) |
+| `--background=RRGGBB[AA]` | | Background color (hex, default: FFFFFF) |
+
+### Other
+
+| Flag | Long | Description |
+|---|---|---|
+| `-V` | `--version` | Print library version |
+| `-h` | `--help` | Print help |
+
+---
+
+## Output Formats
+
+The `--type` / `-t` flag accepts:
+
+| Type | Description |
+|---|---|
+| `PNG` | PNG image (default when -o ends in .png) |
+| `PNG32` | 32-bit PNG with alpha channel |
+| `EPS` | Encapsulated PostScript |
+| `SVG` | Scalable Vector Graphics |
+| `XPM` | X PixMap |
+| `ANSI` | ANSI terminal colors (2 rows per line) |
+| `ANSI256` | ANSI 256-color terminal |
+| `ASCII` | ASCII art (## for dark, spaces for light) |
+| `ASCIIi` | Inverted ASCII art |
+| `UTF8` | Unicode block characters (half-height modules) |
+| `UTF8i` | Inverted UTF8 |
+| `ANSIUTF8` | UTF-8 with ANSI color codes |
+| `ANSIUTF8i` | Inverted ANSIUTF8 |
+| `ANSI256UTF8` | UTF-8 with ANSI 256-color codes |
+
+Auto-detection: if `-t` is not specified, the format is inferred from the output filename extension:
+- `.png` → PNG
+- `.eps` → EPS
+- `.svg` → SVG
+- `.xpm` → XPM
+- Otherwise: PNG to file, UTF8 to terminal
+
+Defined in `qrenc.c`:
+
+```c
+enum imageType {
+ PNG_TYPE,
+ PNG32_TYPE,
+ EPS_TYPE,
+ SVG_TYPE,
+ XPM_TYPE,
+ ANSI_TYPE,
+ ANSI256_TYPE,
+ ASCII_TYPE,
+ ASCIIi_TYPE,
+ UTF8_TYPE,
+ UTF8i_TYPE,
+ ANSIUTF8_TYPE,
+ ANSIUTF8i_TYPE,
+ ANSI256UTF8_TYPE
+};
+```
+
+---
+
+## Output Writers
+
+### writePNG / writePNG32
+
+```c
+static int writePNG(const QRcode *qrcode, const char *outfile,
+ enum imageType type)
+```
+
+Uses libpng. Module size and margin affect dimensions. Supports foreground/background colors and alpha channel (PNG32).
+
+### writeEPS
+
+```c
+static int writeEPS(const QRcode *qrcode, const char *outfile)
+```
+
+Generates PostScript with module rectangles. Respects DPI setting.
+
+### writeSVG
+
+```c
+static int writeSVG(const QRcode *qrcode, const char *outfile)
+```
+
+Options:
+- `--rle`: Uses run-length encoding for horizontal module runs (smaller files)
+- `--svg-path`: Uses a single SVG path element instead of rectangles
+- `--inline`: Omits XML declaration and DOCTYPE
+
+### writeUTF8
+
+```c
+static void writeUTF8(const QRcode *qrcode, const char *outfile,
+ int use_ansi, int invert)
+```
+
+Uses Unicode block-drawing characters to display 2 rows per terminal line:
+- `█` (U+2588): both dark
+- `▀` (U+2580): top dark, bottom light
+- `▄` (U+2584): top light, bottom dark
+- ` `: both light
+
+### writeANSI
+
+```c
+static void writeANSI(const QRcode *qrcode, const char *outfile,
+ int use256, int invert)
+```
+
+Uses ANSI escape codes for colored terminal output. Two rows per line with `▀` characters.
+
+### writeASCII
+
+```c
+static void writeASCII(const QRcode *qrcode, const char *outfile, int invert)
+```
+
+Simple ASCII output: `##` for dark modules, ` ` for light modules.
+
+---
+
+## Encoding Logic
+
+The `encode()` function dispatches to the appropriate library function:
+
+```c
+static QRcode *encode(const unsigned char *intext, int length)
+{
+ QRcode *code;
+
+ if(micro) {
+ if(eightbit) {
+ code = QRcode_encodeDataMQR(length, intext, version, level);
+ } else {
+ code = QRcode_encodeStringMQR((char *)intext, version, level,
+ hint, casesensitive);
+ }
+ } else if(eightbit) {
+ code = QRcode_encodeData(length, intext, version, level);
+ } else {
+ code = QRcode_encodeString((char *)intext, version, level,
+ hint, casesensitive);
+ }
+
+ return code;
+}
+```
+
+### Structured Append
+
+When `-S` is specified, `qrencodeStructured()` is called instead:
+
+```c
+static void qrencodeStructured(const unsigned char *intext, int length,
+ const char *outfile)
+{
+ QRcode_List *qrlist, *p;
+ char filename[FILENAME_MAX];
+ int i = 1;
+
+ if(eightbit) {
+ qrlist = QRcode_encodeDataStructured(length, intext, version, level);
+ } else {
+ qrlist = QRcode_encodeStringStructured((char *)intext, version, level,
+ hint, casesensitive);
+ }
+
+ for(p = qrlist; p != NULL; p = p->next) {
+ // Generate filename with sequence number
+ snprintf(filename, FILENAME_MAX, "%s-%02d.png", outfile_base, i);
+ writePNG(p->code, filename, image_type);
+ i++;
+ }
+ QRcode_List_free(qrlist);
+}
+```
+
+Each symbol in the structured set is written to a separate file with sequence numbering.
+
+---
+
+## Color Parsing
+
+Foreground and background colors are parsed from hex strings:
+
+```c
+static int color_set(unsigned char color[4], const char *value)
+{
+ // Parse RRGGBB or RRGGBBAA hex string
+ int len = strlen(value);
+ if(len == 6) {
+ sscanf(value, "%02x%02x%02x",
+ (unsigned int *)&color[0],
+ (unsigned int *)&color[1],
+ (unsigned int *)&color[2]);
+ color[3] = 255; // fully opaque
+ } else if(len == 8) {
+ sscanf(value, "%02x%02x%02x%02x",
+ (unsigned int *)&color[0],
+ (unsigned int *)&color[1],
+ (unsigned int *)&color[2],
+ (unsigned int *)&color[3]);
+ }
+ // ...
+}
+```
+
+Default colors:
+```c
+static unsigned char fg_color[4] = {0, 0, 0, 255}; // black
+static unsigned char bg_color[4] = {255, 255, 255, 255}; // white
+```
+
+---
+
+## Examples
+
+### Generate PNG with custom version and EC level
+
+```bash
+qrencode -v 5 -l H -o code.png "Secure data"
+```
+
+### Generate SVG with custom colors
+
+```bash
+qrencode -t SVG --foreground=336699 --background=FFFFFF -o code.svg "Hello"
+```
+
+### Generate Micro QR to terminal
+
+```bash
+qrencode -M -v 3 -l L -t UTF8 "12345"
+```
+
+### Force 8-bit encoding (no mode optimization)
+
+```bash
+qrencode -8 -o code.png "Already know encoding mode"
+```
+
+### Structured append — split across multiple symbols
+
+```bash
+qrencode -S -v 5 -l M -o codes "Very long text that needs splitting..."
+# Outputs: codes-01.png, codes-02.png, ...
+```
+
+### Case-insensitive (maximize alphanumeric mode)
+
+```bash
+qrencode -o code.png "HELLO WORLD" # default: case-sensitive
+qrencode -o code.png "hello world" # -c not set: converted to uppercase
+```
+
+### Custom module size and margin
+
+```bash
+qrencode -s 10 -m 2 -o code.png "Hello"
+# 10px per module, 2-module margin
+```
+
+### Inline SVG for HTML embedding
+
+```bash
+qrencode -t SVG --inline --svg-path -o- "Hello" > page.html
+```
+
+### Read binary data from file
+
+```bash
+qrencode -8 -i binary_file.dat -o code.png
+```
+
+### Print to terminal with ANSI colors
+
+```bash
+qrencode -t ANSI256 "https://example.com"
+```
+
+---
+
+## Global Variables
+
+Key globals in `qrenc.c` that control behavior:
+
+```c
+static int casesensitive = 1;
+static int eightbit = 0;
+static int version = 0;
+static int size = 3;
+static int margin = -1;
+static int dpi = 72;
+static int structured = 0;
+static int rle = 0;
+static int svg_path = 0;
+static int micro = 0;
+static int inline_svg = 0;
+static int strict_versioning = 0;
+static QRecLevel level = QR_ECLEVEL_L;
+static QRencodeMode hint = QR_MODE_8;
+static unsigned char fg_color[4] = {0, 0, 0, 255};
+static unsigned char bg_color[4] = {255, 255, 255, 255};
+```
+
+---
+
+## Dependencies
+
+- **libpng** (optional): Required for PNG output. Disabled with `--without-png` / `-DWITHOUT_PNG=ON`.
+- **zlib**: Required by libpng.
+
+When libpng is not available, PNG output types are disabled and the tool falls back to text-based formats. The build system detects libpng via `pkg-config` or `FindPNG.cmake`.
diff --git a/docs/handbook/genqrcode/code-style.md b/docs/handbook/genqrcode/code-style.md
new file mode 100644
index 0000000000..d85ee7a416
--- /dev/null
+++ b/docs/handbook/genqrcode/code-style.md
@@ -0,0 +1,351 @@
+# genqrcode / libqrencode — Code Style and Conventions
+
+## Naming Conventions
+
+### Module Prefixes
+
+Every function and type is prefixed with its module:
+
+| Prefix | Module | File |
+|---|---|---|
+| `QRcode_` | Encoded symbol / high-level API | `qrencode.c` |
+| `QRinput_` | Input data management | `qrinput.c` |
+| `QRinput_Struct_` | Structured append input | `qrinput.c` |
+| `QRspec_` | QR Code spec tables | `qrspec.c` |
+| `MQRspec_` | Micro QR spec tables | `mqrspec.c` |
+| `QRraw_` | Raw RS block management | `qrencode.c` |
+| `MQRraw_` | Micro QR raw management | `qrencode.c` |
+| `Mask_` | Full QR masking | `mask.c` |
+| `MMask_` | Micro QR masking | `mmask.c` |
+| `RSECC_` | Reed-Solomon encoder | `rsecc.c` |
+| `BitStream_` | Bit stream builder | `bitstream.c` |
+| `Split_` | Input string splitter | `split.c` |
+| `FrameFiller_` | Module placement | `qrencode.c` |
+
+### Function Naming Patterns
+
+- `*_new()` / `*_free()` — Constructor / destructor pairs
+- `*_init()` — In-place initialization (no allocation)
+- `*_get*()` / `*_set*()` — Accessor / mutator pairs
+- `*_encode*()` — Encoding operations
+- `*_check*()` — Validation without side effects
+- `*_estimate*()` — Capacity estimation
+
+### Static Functions
+
+Internal functions use the module prefix plus a descriptive name:
+
+```c
+static int QRinput_encodeModeNum(QRinput_List *entry, int version, int mqr);
+static int QRinput_checkModeNum(int size, const char *data);
+static int QRinput_estimateBitsModeNum(int size);
+```
+
+---
+
+## STATIC_IN_RELEASE Macro
+
+A key pattern for testability — internal functions are static in release builds but visible in test builds:
+
+```c
+#ifdef STATIC_IN_RELEASE
+#define STATIC_IN_RELEASE static
+#else
+#define STATIC_IN_RELEASE
+#endif
+```
+
+When `WITH_TESTS` is defined (via CMake or autotools), `STATIC_IN_RELEASE` is undefined and internal functions get external linkage, making them callable from test programs.
+
+Usage throughout the codebase:
+
+```c
+STATIC_IN_RELEASE int BitStream_appendNum(BitStream *bstream, size_t bits, unsigned int num);
+STATIC_IN_RELEASE int BitStream_appendBytes(BitStream *bstream, size_t size, unsigned char *data);
+```
+
+This avoids `__attribute__((visibility("default")))` hacks — simply controlling `static` linkage via a macro.
+
+---
+
+## The `qrencode_inner.h` Header
+
+Test programs include `qrencode_inner.h` instead of (or in addition to) `qrencode.h`. This header exposes:
+
+```c
+// Internal struct types
+typedef struct { ... } RSblock;
+typedef struct { ... } QRRawCode;
+typedef struct { ... } MQRRawCode;
+
+// Internal functions for testing
+extern unsigned char *FrameFiller_test(int version);
+extern QRcode *QRcode_encodeMask(QRinput *input, int mask);
+extern QRcode *QRcode_encodeMaskMQR(QRinput *input, int mask);
+extern QRcode *QRcode_new(int version, int width, unsigned char *data);
+```
+
+This separation keeps the public API clean while enabling thorough unit testing.
+
+---
+
+## Error Handling Pattern
+
+### errno-Based Errors
+
+All functions that can fail set `errno` and return a sentinel:
+
+```c
+// Pointer-returning functions: return NULL on error
+QRinput *QRinput_new2(int version, QRecLevel level)
+{
+ if(version < 0 || version > QRSPEC_VERSION_MAX) {
+ errno = EINVAL;
+ return NULL;
+ }
+ if(/* invalid level */) {
+ errno = EINVAL;
+ return NULL;
+ }
+ // ...
+ input = malloc(sizeof(QRinput));
+ if(input == NULL) {
+ // errno set by malloc (ENOMEM)
+ return NULL;
+ }
+ // ...
+}
+
+// int-returning functions: return -1 on error
+int QRinput_append(QRinput *input, QRencodeMode mode, int size,
+ const unsigned char *data)
+{
+ int ret = QRinput_check(mode, size, data);
+ if(ret != 0) {
+ errno = EINVAL;
+ return -1;
+ }
+ // ...
+}
+```
+
+### Common errno Values
+
+- `EINVAL` — Invalid parameters
+- `ENOMEM` — Memory allocation failure
+- `ERANGE` — Data too large for any QR version
+
+---
+
+## Memory Management
+
+### Ownership Model
+
+- `QRinput_append()` **copies** its data — the caller retains ownership
+- `QRcode_encodeInput()` does **not** free the input — caller must free both
+- `QRinput_Struct_free()` frees all contained `QRinput` objects
+- `QRcode_List_free()` frees all contained `QRcode` objects
+- `QRcode_free()` frees the `data` array inside the `QRcode`
+
+### Allocation Patterns
+
+```c
+// Typical constructor pattern
+QRinput *QRinput_new(void)
+{
+ QRinput *input;
+ input = (QRinput *)malloc(sizeof(QRinput));
+ if(input == NULL) return NULL;
+ // initialize fields
+ input->head = NULL;
+ input->tail = NULL;
+ // ...
+ return input;
+}
+
+// Typical destructor pattern — walk linked list
+void QRinput_free(QRinput *input)
+{
+ if(input != NULL) {
+ QRinput_List *list = input->head;
+ while(list != NULL) {
+ QRinput_List *next = list->next;
+ // free list entry data
+ if(list->data) free(list->data);
+ if(list->bstream) BitStream_free(list->bstream);
+ free(list);
+ list = next;
+ }
+ free(input);
+ }
+}
+```
+
+### BitStream Growth
+
+`BitStream` uses doubling strategy:
+
+```c
+#define DEFAULT_BUFSIZE 128
+
+static int BitStream_allocate(BitStream *bstream, size_t length)
+{
+ // doubles capacity until sufficient
+ while(bstream->datasize < length) {
+ bstream->datasize *= 2;
+ }
+ bstream->data = realloc(bstream->data, bstream->datasize);
+}
+```
+
+---
+
+## Macro Usage
+
+### Spec Accessor Macros
+
+Five macros for RS block spec access (from `qrspec.h`):
+
+```c
+#define QRspec_rsBlockNum(__spec__) (__spec__[0] + __spec__[3])
+#define QRspec_rsBlockNum1(__spec__) (__spec__[0])
+#define QRspec_rsDataCodes1(__spec__) (__spec__[1])
+#define QRspec_rsEccCodes1(__spec__) (__spec__[2])
+#define QRspec_rsBlockNum2(__spec__) (__spec__[3])
+#define QRspec_rsDataCodes2(__spec__) (__spec__[4])
+#define QRspec_rsEccCodes2(__spec__) (__spec__[2])
+```
+
+### MASKMAKER
+
+Code generation macro for mask functions (see masking docs):
+
+```c
+#define MASKMAKER(__exp__) \
+ int x, y;\
+ int b = 0;\
+ for(y = 0; y < width; y++) {\
+ for(x = 0; x < width; x++) {\
+ if(*s & 0x80) { *d = *s; }\
+ else { *d = *s ^ ((__exp__) == 0); }\
+ s++; d++;\
+ }\
+ }\
+ return b;
+```
+
+---
+
+## Header Include Pattern
+
+Each module follows a consistent pattern:
+
+```c
+// In .c files:
+#include "config.h" // generated by autotools/cmake
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include "qrencode.h" // public header (if needed)
+#include "qrencode_inner.h" // internal declarations (if needed)
+#include "module.h" // own header
+```
+
+### Config Macros
+
+Key `config.h` defines checked throughout:
+- `HAVE_CONFIG_H` — autotools-generated config
+- `HAVE_LIBPTHREAD` — pthread support available
+- `STATIC_IN_RELEASE` — controls function visibility
+- `WITH_TESTS` — test build mode
+- `HAVE_STRDUP` — strdup availability
+
+---
+
+## Data Type Conventions
+
+- `unsigned char *` for binary data and QR module arrays
+- `int` for sizes, lengths, and error returns
+- `unsigned int` for bit values and format info
+- `signed char` for the AN lookup table (needs -1 sentinel)
+- Opaque types via `typedef struct _Name Name` pattern
+
+---
+
+## Const Correctness
+
+Input data parameters are consistently `const`:
+
+```c
+int RSECC_encode(int data_length, int ecc_length,
+ const unsigned char *data, unsigned char *ecc);
+
+int QRinput_append(QRinput *input, QRencodeMode mode, int size,
+ const unsigned char *data);
+```
+
+Spec tables are `static const`:
+
+```c
+static const QRspec_Capacity qrspecCapacity[QRSPEC_VERSION_MAX + 1] = { ... };
+static const int eccTable[QRSPEC_VERSION_MAX+1][4][2] = { ... };
+static const unsigned int formatInfo[4][8] = { ... };
+static const signed char QRinput_anTable[128] = { ... };
+```
+
+---
+
+## Thread Safety Approach
+
+The library uses a minimal locking strategy:
+
+1. **Single mutex** in `rsecc.c`: `RSECC_mutex` protects lazy initialization of GF tables and generator polynomials
+2. **No global mutable state** after initialization — spec tables are `const`, generator polynomials become read-only after first build
+3. **Local allocations** in encoding functions — each call creates its own frame, mask buffers, etc.
+4. **Conditional compilation**: `#ifdef HAVE_LIBPTHREAD` guards all pthread usage
+
+---
+
+## Build-System Integration
+
+### CMake Conditionals
+
+```cmake
+if(WITH_TESTS)
+ add_definitions(-DWITH_TESTS)
+endif()
+```
+
+When `WITH_TESTS` is on, `STATIC_IN_RELEASE` is not defined, exposing internal functions.
+
+### Autotools Conditionals
+
+```m4
+AC_ARG_WITH([tests],
+ [AS_HELP_STRING([--with-tests], [build tests])],
+ [], [with_tests=no])
+if test x$with_tests = xyes ; then
+ AC_DEFINE([WITH_TESTS], [1])
+fi
+```
+
+---
+
+## File Organization
+
+Each module is a `.c` / `.h` pair:
+
+- `bitstream.c` / `bitstream.h` — Bit stream container
+- `mask.c` / `mask.h` — QR masking and penalty
+- `mmask.c` / `mmask.h` — Micro QR masking
+- `qrencode.c` / `qrencode.h` — Public API + encoder core
+- `qrinput.c` / `qrinput.h` — Input management + mode encoding
+- `qrspec.c` / `qrspec.h` — QR Code specification tables
+- `mqrspec.c` / `mqrspec.h` — Micro QR specification tables
+- `rsecc.c` / `rsecc.h` — Reed-Solomon encoder
+- `split.c` / `split.h` — String splitter / mode optimizer
+
+Standalone files:
+- `qrencode_inner.h` — Test-only internal declarations
+- `qrenc.c` — CLI tool (not a library module)
diff --git a/docs/handbook/genqrcode/encoding-modes.md b/docs/handbook/genqrcode/encoding-modes.md
new file mode 100644
index 0000000000..334f8a0866
--- /dev/null
+++ b/docs/handbook/genqrcode/encoding-modes.md
@@ -0,0 +1,591 @@
+# genqrcode / libqrencode — Encoding Modes
+
+## Mode Indicator Overview
+
+Every QR Code data segment begins with a 4-bit mode indicator (3 bits for Micro QR M1–M3, fewer bits for smaller versions). The library defines modes in the `QRencodeMode` enum:
+
+| Mode | Enum Value | Mode Indicator (QR) | Description |
+|---|---|---|---|
+| Numeric | `QR_MODE_NUM` (0) | 0001 | Digits 0–9 |
+| Alphanumeric | `QR_MODE_AN` (1) | 0010 | 45-character set |
+| 8-bit Byte | `QR_MODE_8` (2) | 0100 | Arbitrary byte data |
+| Kanji | `QR_MODE_KANJI` (3) | 1000 | Shift-JIS double-byte |
+| ECI | `QR_MODE_ECI` (6) | 0111 | Extended Channel Interpretation |
+| FNC1 (1st) | `QR_MODE_FNC1FIRST` (7) | 0101 | GS1 first position |
+| FNC1 (2nd) | `QR_MODE_FNC1SECOND` (8) | 1001 | GS1 second position |
+| Structured | `QR_MODE_STRUCTURE` (4) | 0011 | Internal: structured append |
+| Terminator | `QR_MODE_NUL` (-1) | 0000 | Internal: end of data |
+
+---
+
+## Numeric Mode (`QR_MODE_NUM`)
+
+Numeric mode encodes digit characters '0'–'9' at ~3.3 bits per character.
+
+### Encoding Algorithm
+
+From `QRinput_encodeModeNum()` in `qrinput.c`:
+
+```c
+static int QRinput_encodeModeNum(QRinput_List *entry, int version, int mqr)
+```
+
+1. **Mode indicator**: 4 bits `0001` (or fewer bits for MQR)
+2. **Character count indicator**: Variable length from `QRspec_lengthIndicator(QR_MODE_NUM, version)`:
+ - Versions 1–9: 10 bits
+ - Versions 10–26: 12 bits
+ - Versions 27–40: 14 bits
+ - MQR M1: 3 bits, M2: 4 bits, M3: 5 bits, M4: 6 bits
+3. **Data encoding**: Groups of 3 digits → 10 bits, 2 remaining digits → 7 bits, 1 remaining → 4 bits
+
+The core loop:
+
+```c
+for(i = 0; i < words; i++) {
+ val = (entry->data[i*3 ] - '0') * 100;
+ val += (entry->data[i*3+1] - '0') * 10;
+ val += (entry->data[i*3+2] - '0');
+ BitStream_appendNum(entry->bstream, 10, val);
+}
+if(entry->size - words * 3 == 1) {
+ val = entry->data[words*3] - '0';
+ BitStream_appendNum(entry->bstream, 4, val);
+} else if(entry->size - words * 3 == 2) {
+ val = (entry->data[words*3 ] - '0') * 10;
+ val += (entry->data[words*3+1] - '0');
+ BitStream_appendNum(entry->bstream, 7, val);
+}
+```
+
+### Validation
+
+`QRinput_checkModeNum()` verifies every byte is in '0'–'9':
+
+```c
+static int QRinput_checkModeNum(int size, const char *data)
+{
+ int i;
+ for(i = 0; i < size; i++) {
+ if(data[i] < '0' || data[i] > '9')
+ return -1;
+ }
+ return 0;
+}
+```
+
+### Bit Cost Estimation
+
+`QRinput_estimateBitsModeNum()`:
+
+```c
+int QRinput_estimateBitsModeNum(int size)
+{
+ int w = size / 3;
+ int bits = w * 10;
+ switch(size - w * 3) {
+ case 1: bits += 4; break;
+ case 2: bits += 7; break;
+ default: break;
+ }
+ return bits;
+}
+```
+
+---
+
+## Alphanumeric Mode (`QR_MODE_AN`)
+
+Encodes 45 characters at ~5.5 bits per character.
+
+### The `QRinput_anTable` Lookup
+
+Defined in `qrinput.c`, this 128-entry table maps ASCII values to the 45-character alphanumeric set:
+
+```c
+const signed char QRinput_anTable[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 36, -1, -1, -1, 37, 38, -1, -1, -1, -1, 39, 40, -1, 41, 42, 43,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 44, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+};
+```
+
+The valid characters and their indices:
+- `0`–`9` → 0–9
+- `A`–`Z` → 10–35
+- Space (`' '`) → 36
+- `$` → 37, `%` → 38, `*` → 39, `+` → 40
+- `-` → 41, `.` → 42, `/` → 43, `:` → 44
+
+### Encoding Algorithm
+
+From `QRinput_encodeModeAn()`:
+
+```c
+for(i = 0; i < words; i++) {
+ val = (unsigned int)QRinput_lookAnTable(entry->data[i*2 ]) * 45;
+ val += (unsigned int)QRinput_lookAnTable(entry->data[i*2+1]);
+ BitStream_appendNum(entry->bstream, 11, val);
+}
+if(entry->size & 1) {
+ val = (unsigned int)QRinput_lookAnTable(entry->data[words*2]);
+ BitStream_appendNum(entry->bstream, 6, val);
+}
+```
+
+- **Pairs** of characters → multiply first by 45, add second → 11-bit value
+- **Odd last character** → 6-bit value
+- Character count indicator lengths: 9/11/13 bits for version groups 1–9/10–26/27–40
+
+### Validation
+
+`QRinput_checkModeAn()` uses `QRinput_lookAnTable()`:
+
+```c
+static int QRinput_checkModeAn(int size, const char *data)
+{
+ int i;
+ for(i = 0; i < size; i++) {
+ if(QRinput_lookAnTable(data[i]) < 0)
+ return -1;
+ }
+ return 0;
+}
+```
+
+Where `QRinput_lookAnTable()` returns -1 for any byte ≥ 128 or any byte mapping to -1 in the table.
+
+---
+
+## 8-bit Byte Mode (`QR_MODE_8`)
+
+Encodes arbitrary bytes at 8 bits per character with no transformation.
+
+### Encoding
+
+From `QRinput_encodeMode8()`:
+
+```c
+static int QRinput_encodeMode8(QRinput_List *entry, int version, int mqr)
+{
+ // ... mode indicator and count ...
+ ret = BitStream_appendBytes(entry->bstream, entry->size, entry->data);
+ // ...
+}
+```
+
+Character count indicator lengths: 8/16/16 bits for version groups.
+
+### Validation
+
+No validation — all byte values 0x00–0xFF are accepted:
+
+```c
+static int QRinput_checkMode8(int size, const char *data)
+{
+ (void)size; (void)data;
+ return 0;
+}
+```
+
+---
+
+## Kanji Mode (`QR_MODE_KANJI`)
+
+Encodes Shift-JIS double-byte characters at 13 bits per character.
+
+### Encoding Algorithm
+
+From `QRinput_encodeModeKanji()`:
+
+```c
+for(i = 0; i < entry->size; i += 2) {
+ val = ((unsigned int)entry->data[i] << 8) | entry->data[i+1];
+ if(val <= 0x9ffc) {
+ val -= 0x8140;
+ } else {
+ val -= 0xc140;
+ }
+ val = (val >> 8) * 0xc0 + (val & 0xff);
+ BitStream_appendNum(entry->bstream, 13, val);
+}
+```
+
+**Steps per character:**
+1. Combine two bytes into a 16-bit value
+2. Subtract `0x8140` (if ≤ 0x9FFC) or `0xC140` (otherwise)
+3. Decompose: high byte × 0xC0 + low byte → 13-bit output
+
+Character count indicator lengths: 8/10/12 bits. Count is **number of characters** (bytes / 2).
+
+### Validation
+
+`QRinput_checkModeKanji()` requires even size, and each pair must be in valid Shift-JIS ranges:
+
+```c
+static int QRinput_checkModeKanji(int size, const unsigned char *data)
+{
+ int i;
+ unsigned int val;
+ if(size & 1) return -1;
+ for(i = 0; i < size; i += 2) {
+ val = ((unsigned int)data[i] << 8) | data[i+1];
+ if(val < 0x8140 || (val > 0x9ffc && val < 0xe040) || val > 0xebbf)
+ return -1;
+ }
+ return 0;
+}
+```
+
+---
+
+## ECI Mode (`QR_MODE_ECI`)
+
+Extended Channel Interpretation selects a character encoding for subsequent data.
+
+### Encoding
+
+From `QRinput_encodeModeECI()`:
+
+```c
+static int QRinput_encodeModeECI(QRinput_List *entry, int version)
+{
+ // ... mode indicator 0111 ...
+ unsigned int ecinum = (entry->data[0])
+ | (entry->data[1] << 8)
+ | (entry->data[2] << 16)
+ | (entry->data[3] << 24);
+ if(ecinum < 128) {
+ BitStream_appendNum(entry->bstream, 8, ecinum);
+ } else if(ecinum < 16384) {
+ BitStream_appendNum(entry->bstream, 2, 2); // 10
+ BitStream_appendNum(entry->bstream, 14, ecinum);
+ } else {
+ BitStream_appendNum(entry->bstream, 3, 6); // 110
+ BitStream_appendNum(entry->bstream, 21, ecinum);
+ }
+ return 0;
+}
+```
+
+ECI number stored as 4-byte little-endian internally. The encoding uses variable-length:
+- 0–127: 8 bits (0xxxxxxx)
+- 128–16383: 16 bits (10xxxxxxxxxxxxxx)
+- 16384–999999: 24 bits (110xxxxxxxxxxxxxxxxxxxxx)
+
+### Usage
+
+ECI numbers are set via `QRinput_appendECIheader()`, not `QRinput_append()`. The function validates `ecinum ≤ 999999` and stores it in little-endian:
+
+```c
+int QRinput_appendECIheader(QRinput *input, unsigned int ecinum)
+{
+ unsigned char data[4];
+ if(ecinum > 999999) { errno = EINVAL; return -1; }
+ data[0] = ecinum & 0xff;
+ data[1] = (ecinum >> 8) & 0xff;
+ data[2] = (ecinum >> 16) & 0xff;
+ data[3] = (ecinum >> 24) & 0xff;
+ return QRinput_append(input, QR_MODE_ECI, 4, data);
+}
+```
+
+Common ECI assignments:
+- 3: ISO/IEC 8859-1 (Latin-1)
+- 20: Shift JIS
+- 26: UTF-8
+
+---
+
+## FNC1 Mode
+
+### First Position (`QR_MODE_FNC1FIRST`)
+
+Used for GS1 QR Codes. Sets mode indicator `0101`.
+
+Encoded via `QRinput_encodeModeFNC1First()`:
+
+```c
+static int QRinput_encodeModeFNC1First(QRinput_List *entry, int version)
+{
+ // Just the mode indicator, no data follows
+ BitStream_appendNum(entry->bstream, 4, MODEID_FNC1FIRST);
+ return 0;
+}
+```
+
+Set via `QRinput_setFNC1First()`.
+
+### Second Position (`QR_MODE_FNC1SECOND`)
+
+Mode indicator `1001`, followed by 1-byte application identifier.
+
+```c
+static int QRinput_encodeModeFNC1Second(QRinput_List *entry, int version)
+{
+ BitStream_appendNum(entry->bstream, 4, MODEID_FNC1SECOND);
+ BitStream_appendBytes(entry->bstream, 1, entry->data);
+ return 0;
+}
+```
+
+Validation: `QRinput_checkModeFNC1Second()` requires `size == 1`.
+
+---
+
+## Structured Append Mode (`QR_MODE_STRUCTURE`)
+
+Internal mode for linking multiple QR Code symbols. Not set directly by users.
+
+```c
+static int QRinput_encodeModeStructure(QRinput_List *entry, int mqr)
+{
+ if(mqr) { errno = EINVAL; return -1; } // not supported in MQR
+ BitStream_appendNum(entry->bstream, 4, MODEID_STRUCTURE);
+ BitStream_appendNum(entry->bstream, 4, entry->data[1] - 1); // total symbols - 1
+ BitStream_appendNum(entry->bstream, 4, entry->data[0] - 1); // current position - 1
+ BitStream_appendBytes(entry->bstream, 1, &entry->data[2]); // parity byte
+ return 0;
+}
+```
+
+Total overhead: 4 + 4 + 4 + 8 = 20 bits per symbol.
+
+---
+
+## Mode Selection — The Split Algorithm
+
+When using high-level functions like `QRcode_encodeString()`, the library automatically selects optimal encoding modes via `Split_splitStringToQRinput()` in `split.c`.
+
+### Mode Identification
+
+`Split_identifyMode()` classifies each byte:
+
+```c
+static QRencodeMode Split_identifyMode(const char *string, QRencodeMode hint)
+{
+ unsigned char c = string[0];
+
+ if(isdigit(c)) {
+ return QR_MODE_NUM;
+ } else if(QRinput_lookAnTable(c) >= 0) {
+ return QR_MODE_AN;
+ } else if(hint == QR_MODE_KANJI) {
+ if(iskanji(c, string[1])) {
+ return QR_MODE_KANJI;
+ }
+ }
+ return QR_MODE_8;
+}
+```
+
+### The Eat Functions
+
+The splitter uses four "eat" functions that greedily consume characters:
+
+**`Split_eatNum()`**: Starts in numeric mode, looks ahead to decide:
+- If next non-digit is alphanumeric, calculates bit cost difference:
+ ```c
+ dif = QRinput_estimateBitsModeNum(run)
+ + QRinput_estimateBitsModeAn(1)
+ - QRinput_estimateBitsModeAn(run + 1);
+ if(dif > 0) {
+ // switch to AN mode — it's cheaper
+ }
+ ```
+- If next non-digit is 8-bit, compares numeric-then-8-bit cost vs. all-8-bit:
+ ```c
+ dif = QRinput_estimateBitsModeNum(run)
+ + 4 + ln // mode switch overhead
+ - QRinput_estimatebitsModeMode8(run);
+ if(dif > 0) {
+ // encode remaining as 8-bit
+ }
+ ```
+
+**`Split_eatAn()`**: Consumes alphanumeric characters, decides whether to switch to numeric mode or 8-bit mode based on bit efficiency.
+
+**`Split_eatKanji()`**: Consumes consecutive valid Shift-JIS Kanji pairs.
+
+**`Split_eat8()`**: Consumes 8-bit bytes, checks for opportunities to switch to numeric, alphanumeric, or Kanji by looking at upcoming runs.
+
+### Case Sensitivity
+
+When `casesensitive == 0`, `Split_splitStringToQRinput()` calls `dupAndToUpper()`:
+
+```c
+static char *dupAndToUpper(const char *str, QRencodeMode hint)
+{
+ char *newstr, *p;
+ newstr = strdup(str);
+ if(hint == QR_MODE_KANJI) return newstr; // skip for Kanji
+ p = newstr;
+ while(*p) {
+ if(*p >= 'a' && *p <= 'z') *p = (char)((int)*p - 32);
+ p++;
+ }
+ return newstr;
+}
+```
+
+Converting to uppercase allows more characters to use the more efficient alphanumeric mode.
+
+---
+
+## Mode Length Indicator Sizes
+
+The bit width of the character count indicator varies by version. Defined in `qrspec.c`:
+
+```c
+static const int lengthTableBits[4][3] = {
+ {10, 12, 14}, // QR_MODE_NUM
+ { 9, 11, 13}, // QR_MODE_AN
+ { 8, 16, 16}, // QR_MODE_8
+ { 8, 10, 12} // QR_MODE_KANJI
+};
+```
+
+Version groups: 1–9, 10–26, 27–40.
+
+For Micro QR, lengths are defined in `mqrspec.c` via `MQRspec_lengthTableBits[4][4]`:
+
+| Mode | M1 | M2 | M3 | M4 |
+|---|---|---|---|---|
+| NUM | 3 | 4 | 5 | 6 |
+| AN | 0 | 3 | 4 | 5 |
+| 8 | 0 | 0 | 4 | 5 |
+| KANJI | 0 | 0 | 3 | 4 |
+
+A zero means the mode is not available for that version. This is checked by `QRinput_isModeNumValid()` and siblings, which call `MQRspec_maximumWords()`.
+
+---
+
+## Bit Estimation Functions
+
+Used internally for mode optimization and version selection.
+
+### Per-Mode Estimators
+
+```c
+int QRinput_estimateBitsModeNum(int size); // (size/3)*10 + [4|7|0]
+int QRinput_estimateBitsModeAn(int size); // (size/2)*11 + [6|0]
+int QRinput_estimateBitsMode8(int size); // size * 8
+int QRinput_estimateBitsModeKanji(int size); // (size/2) * 13
+```
+
+### Total Stream Estimation
+
+`QRinput_estimateBitStreamSize()` sums across all entries:
+
+```c
+static int QRinput_estimateBitStreamSize(QRinput *input, int version)
+{
+ QRinput_List *list = input->head;
+ int bits = 0;
+ while(list != NULL) {
+ bits += QRinput_estimateBitStreamSizeOfEntry(list, version, input->mqr);
+ list = list->next;
+ }
+ return bits;
+}
+```
+
+Each entry contribution = mode indicator bits + count indicator bits + data bits.
+
+### Version Auto-Selection
+
+`QRinput_estimateVersion()` iterates to convergence:
+
+```c
+static int QRinput_estimateVersion(QRinput *input)
+{
+ int bits, version, prev;
+ version = 0;
+ do {
+ prev = version;
+ bits = QRinput_estimateBitStreamSize(input, prev);
+ version = QRspec_getMinimumVersion((bits + 7) / 8, input->level);
+ if(version < 0) return -1; // ERANGE
+ } while(version > prev);
+ return version;
+}
+```
+
+The version may increase because larger versions have longer count indicators, which in turn require more bits. The loop converges because version increases monotonically and is bounded by 40.
+
+---
+
+## Micro QR Mode Restrictions
+
+Not all modes are available in all Micro QR versions:
+
+| Version | Available Modes |
+|---|---|
+| M1 | Numeric only |
+| M2 | Numeric, Alphanumeric |
+| M3 | Numeric, Alphanumeric, 8-bit, Kanji |
+| M4 | Numeric, Alphanumeric, 8-bit, Kanji |
+
+Enforced by `QRinput_isModeNumValid()`, `QRinput_isModeAnValid()`, `QRinput_isMode8Valid()`, `QRinput_isModeKanjiValid()`. Each calls `MQRspec_maximumWords(version, mode)` and returns error if the result is 0.
+
+---
+
+## BitStream: Internal Encoding Engine
+
+All mode encoders produce output through the `BitStream` type defined in `bitstream.h`:
+
+```c
+typedef struct {
+ size_t length; // current number of bits stored
+ size_t datasize; // allocated capacity in bytes (1 bit per byte!)
+ unsigned char *data;
+} BitStream;
+```
+
+**Storage**: Each bit occupies one byte (values 0 or 1). This wastes memory but simplifies manipulation.
+
+**Core operations:**
+- `BitStream_appendNum(bstream, bits, val)` — Appends `bits` bits from integer `val`
+- `BitStream_appendBytes(bstream, size, data)` — Appends `size` bytes as `size * 8` bits
+- `BitStream_append(bstream, src)` — Concatenates two bit streams
+- `BitStream_toByte(bstream)` — Packs 1-bit-per-byte into 8-bits-per-byte format
+
+**Growth**: `DEFAULT_BUFSIZE = 128`, doubles on overflow via `BitStream_allocate()`.
+
+The final `QRinput_getByteStream()` function calls `QRinput_getBitStream()` (which chains all entry bit streams with padding) and then `BitStream_toByte()` to produce the packed byte array consumed by the RS encoder.
+
+---
+
+## Padding Algorithm
+
+After all data entries are encoded, `QRinput_createBitStream()` calls `QRinput_createPaddingBit()` to fill remaining capacity:
+
+```c
+static int QRinput_createPaddingBit(QRinput *input)
+{
+ // ... calculate remaining capacity ...
+
+ // Add terminator (0000): up to 4 bits for QR, variable for MQR
+ if(bits > terminator) {
+ bits -= terminator;
+ } else {
+ // terminator alone fills remaining space
+ }
+
+ // Align to byte boundary
+ padlen = 8 - (QRinput_lengthOfCode(input) * 8 + input->mqr) % 8;
+ if(padlen == 8) padlen = 0;
+
+ // Fill with alternating 0xEC, 0x11
+ padlen = maxwords - (QRinput_lengthOfCode(input));
+ for(i = 0; i < padlen; i++) {
+ padbuf[i] = (i & 1) ? 0x11 : 0xEC;
+ }
+}
+```
+
+Padding bytes `0xEC` and `0x11` are specified by the QR Code standard.
diff --git a/docs/handbook/genqrcode/error-correction.md b/docs/handbook/genqrcode/error-correction.md
new file mode 100644
index 0000000000..cfa917257c
--- /dev/null
+++ b/docs/handbook/genqrcode/error-correction.md
@@ -0,0 +1,455 @@
+# genqrcode / libqrencode — Error Correction
+
+## Overview
+
+QR Code uses Reed-Solomon error correction to enable reliable scanning even when parts of the symbol are damaged or obscured. libqrencode implements error correction through the `rsecc.c` module (GF(2^8) Reed-Solomon encoder) and coordinates block layout through `qrspec.c` / `mqrspec.c`.
+
+---
+
+## Error Correction Levels
+
+Four levels are defined in `QRecLevel`:
+
+```c
+typedef enum {
+ QR_ECLEVEL_L = 0, // Low — ~7% recovery
+ QR_ECLEVEL_M, // Medium — ~15% recovery
+ QR_ECLEVEL_Q, // Quartile — ~25% recovery
+ QR_ECLEVEL_H // High — ~30% recovery
+} QRecLevel;
+```
+
+Higher error correction means more codewords are devoted to ECC, reducing data capacity.
+
+### Micro QR Restrictions
+
+Not all Micro QR versions support all levels:
+
+| Version | Supported Levels |
+|---|---|
+| M1 | Error detection only (no EC level parameter effect) |
+| M2 | L, M |
+| M3 | L, M |
+| M4 | L, M, Q |
+
+Micro QR never supports `QR_ECLEVEL_H`. This is enforced in `QRinput_newMQR()` via `MQRspec_getECCLength()` returning 0 for invalid combinations.
+
+---
+
+## ECC Specification Tables
+
+### Full QR Code — `eccTable`
+
+Defined in `qrspec.c`:
+
+```c
+static const int eccTable[QRSPEC_VERSION_MAX+1][4][2] = {
+ {{ 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}}, // version 0 (unused)
+ {{ 1, 0}, { 1, 0}, { 1, 0}, { 1, 0}}, // version 1
+ {{ 1, 0}, { 1, 0}, { 1, 0}, { 1, 0}}, // version 2
+ {{ 1, 0}, { 1, 0}, { 2, 0}, { 2, 0}}, // version 3
+ // ... through version 40
+};
+```
+
+Dimensions: `[version][ec_level][2]`. The two values are:
+- `eccTable[v][l][0]` — Number of RS blocks in group 1
+- `eccTable[v][l][1]` — Number of RS blocks in group 2 (0 if only one group)
+
+### ECC Spec Extraction
+
+`QRspec_getEccSpec()` computes a 5-element spec array:
+
+```c
+void QRspec_getEccSpec(int version, QRecLevel level, int spec[5])
+{
+ int b1 = eccTable[version][level][0];
+ int b2 = eccTable[version][level][1];
+ int data = QRspec_getDataLength(version, level);
+ int ecc = QRspec_getECCLength(version, level);
+
+ if(b2 == 0) {
+ spec[0] = b1;
+ spec[1] = data / b1;
+ spec[2] = ecc / b1;
+ spec[3] = 0;
+ spec[4] = 0;
+ } else {
+ spec[0] = b1;
+ spec[1] = data / (b1 + b2);
+ spec[2] = ecc / (b1 + b2);
+ spec[3] = b2;
+ spec[4] = spec[1] + 1;
+ }
+}
+```
+
+The spec layout:
+- `spec[0]` — Number of RS blocks in group 1 (`b1`)
+- `spec[1]` — Data codewords per block in group 1
+- `spec[2]` — ECC codewords per block
+- `spec[3]` — Number of RS blocks in group 2 (`b2`, 0 if none)
+- `spec[4]` — Data codewords per block in group 2 (= spec[1] + 1, or 0)
+
+Accessor macros:
+```c
+#define QRspec_rsBlockNum(__spec__) (__spec__[0] + __spec__[3])
+#define QRspec_rsBlockNum1(__spec__) (__spec__[0])
+#define QRspec_rsDataCodes1(__spec__) (__spec__[1])
+#define QRspec_rsEccCodes1(__spec__) (__spec__[2])
+#define QRspec_rsBlockNum2(__spec__) (__spec__[3])
+#define QRspec_rsDataCodes2(__spec__) (__spec__[4])
+#define QRspec_rsEccCodes2(__spec__) (__spec__[2])
+```
+
+Note that both groups share the same ECC codeword count.
+
+---
+
+## Capacity Tables
+
+### Full QR — `qrspecCapacity`
+
+Defined in `qrspec.c`, 41 entries (index 0 is unused):
+
+```c
+typedef struct {
+ int width; // symbol width in modules
+ int words; // total data codewords
+ int remainder; // remainder bits (0–7)
+ int ec[4]; // data codewords per EC level [L, M, Q, H]
+} QRspec_Capacity;
+
+static const QRspec_Capacity qrspecCapacity[QRSPEC_VERSION_MAX + 1] = {
+ { 0, 0, 0, { 0, 0, 0, 0}},
+ { 21, 26, 0, { 19, 16, 13, 9}}, // v1: 21×21
+ { 25, 44, 7, { 34, 28, 22, 16}}, // v2: 25×25
+ { 29, 70, 7, { 55, 44, 34, 26}}, // v3
+ { 33, 100, 7, { 80, 64, 48, 36}}, // v4
+ // ... through version 40
+ {177, 3706, 0, {2956, 2334, 1666, 1276}}, // v40: 177×177
+};
+```
+
+Key accessor functions:
+```c
+int QRspec_getDataLength(int version, QRecLevel level);
+// Returns qrspecCapacity[version].ec[level]
+
+int QRspec_getECCLength(int version, QRecLevel level);
+// Returns qrspecCapacity[version].words - qrspecCapacity[version].ec[level]
+
+int QRspec_getWidth(int version);
+// Returns qrspecCapacity[version].width (= version * 4 + 17)
+
+int QRspec_getRemainder(int version);
+// Returns qrspecCapacity[version].remainder
+
+int QRspec_getMinimumVersion(int size, QRecLevel level);
+// Scans versions 1-40 for first where data capacity >= size
+```
+
+### Sample Capacities
+
+| Version | Width | Total Words | L Data | M Data | Q Data | H Data |
+|---|---|---|---|---|---|---|
+| 1 | 21 | 26 | 19 | 16 | 13 | 9 |
+| 5 | 37 | 134 | 108 | 86 | 62 | 46 |
+| 10 | 57 | 346 | 271 | 213 | 151 | 119 |
+| 20 | 97 | 1022 | 858 | 666 | 482 | 382 |
+| 40 | 177 | 3706 | 2956 | 2334 | 1666 | 1276 |
+
+### Micro QR — `mqrspecCapacity`
+
+Defined in `mqrspec.c`, 5 entries:
+
+```c
+static const MQRspec_Capacity mqrspecCapacity[MQRSPEC_VERSION_MAX + 1] = {
+ {0, 0, {0, 0, 0, 0}},
+ {11, 5, {2, 0, 0, 0}}, // M1: 11×11, detection only (2 data + 3 ECC)
+ {13, 10, {5, 4, 0, 0}}, // M2: 13×13
+ {15, 17, {11, 9, 7, 0}}, // M3: 15×15
+ {17, 24, {16, 14, 10, 0}} // M4: 17×17
+};
+```
+
+---
+
+## RSblock Structure
+
+In `qrencode.c`, RS blocks are organized in two structures:
+
+```c
+typedef struct {
+ int dataLength;
+ unsigned char *data;
+ int eccLength;
+ unsigned char *ecc;
+} RSblock;
+```
+
+### QRRawCode — Full QR
+
+```c
+typedef struct {
+ int dataLength;
+ int eccLength;
+ int b1; // number of group 1 blocks
+ int b2; // number of group 2 blocks
+ int rsblock_num; // total blocks (b1 + b2)
+ RSblock *rsblock;
+ int count; // interleave counter
+} QRRawCode;
+```
+
+### MQRRawCode — Micro QR
+
+```c
+typedef struct {
+ int version;
+ int dataLength;
+ int eccLength;
+ unsigned char *datacode;
+ unsigned char *ecccode;
+ int b1;
+ int rsblock_num;
+ RSblock *rsblock;
+ int count;
+ int oddbits;
+} MQRRawCode;
+```
+
+The `oddbits` field handles Micro QR versions where data length is specified in bits rather than bytes (M1 has odd-bit data length).
+
+### Block Initialization
+
+`RSblock_initBlock()` calls the Reed-Solomon encoder for each block:
+
+```c
+static int RSblock_initBlock(RSblock *block, int dl, unsigned char *data,
+ int el, unsigned char *ecc,
+ RSECC_encoder encoder)
+{
+ block->dataLength = dl;
+ block->data = data;
+ block->eccLength = el;
+ block->ecc = ecc;
+ return encoder(dl, el, data, ecc);
+}
+```
+
+Where `encoder` is a function pointer to `RSECC_encode`.
+
+---
+
+## Block Interleaving
+
+The `QRraw_getCode()` function interleaves data and ECC bytes across blocks:
+
+```c
+unsigned char QRraw_getCode(QRRawCode *raw)
+{
+ if(raw->count < raw->dataLength) {
+ // Data interleaving phase
+ int blockNum = raw->count % raw->rsblock_num;
+ int dataPos = raw->count / raw->rsblock_num;
+ // ... skip blocks whose data is too short for this position ...
+ unsigned char code = raw->rsblock[blockNum].data[dataPos];
+ } else {
+ // ECC interleaving phase
+ int blockNum = (raw->count - raw->dataLength) % raw->rsblock_num;
+ int eccPos = (raw->count - raw->dataLength) / raw->rsblock_num;
+ unsigned char code = raw->rsblock[blockNum].ecc[eccPos];
+ }
+ raw->count++;
+ return code;
+}
+```
+
+The interleaving ensures that consecutive codewords in the symbol come from different RS blocks, maximizing burst-error recovery.
+
+For a symbol with 2 blocks of 15 data + 1 block of 16 data:
+- First pass: byte 0 from block 0, byte 0 from block 1, byte 0 from block 2
+- Second pass: byte 1 from block 0, byte 1 from block 1, byte 1 from block 2
+- ...
+- The extra byte from block 2 (position 15) is appended at the end
+
+---
+
+## The RSECC Module
+
+The core Reed-Solomon encoder in `rsecc.c`.
+
+### Galois Field GF(2^8)
+
+**Primitive polynomial**: `proot = 0x11d` = x^8 + x^4 + x^3 + x^2 + 1
+
+**Log/antilog tables**:
+```c
+static unsigned char alpha[256]; // alpha[i] = x^i mod proot
+static unsigned char aindex[256]; // aindex[alpha[i]] = i
+```
+
+Initialized in `RSECC_init()`:
+
+```c
+static void RSECC_init(void)
+{
+ int i, b;
+ alpha[0] = 1;
+ aindex[0] = 0; // undefined, but set to 0
+ aindex[1] = 0;
+ for(i = 1; i < 255; i++) {
+ b = alpha[i-1] << 1;
+ if(b & 0x100) b ^= proot; // reduce mod primitive polynomial
+ alpha[i] = (unsigned char)b;
+ aindex[b] = i;
+ }
+}
+```
+
+### Generator Polynomial Cache
+
+Generator polynomials are cached for ECC lengths 2 through 30:
+
+```c
+static int generator_initialized[29] = {0};
+static unsigned char generator[29][31]; // generator[el-2] stores the polynomial
+```
+
+`generator_init()` builds the polynomial (x + α^0)(x + α^1)...(x + α^(el-1)):
+
+```c
+static void generator_init(int el)
+{
+ int i, j;
+ unsigned char *g = generator[el - 2];
+ g[0] = 1;
+ for(i = 0; i < el; i++) {
+ g[i+1] = 1;
+ for(j = i; j > 0; j--) {
+ if(g[j] != 0) {
+ g[j] = g[j-1] ^ alpha[(aindex[g[j]] + i) % 255];
+ } else {
+ g[j] = g[j-1];
+ }
+ }
+ g[0] = alpha[(aindex[g[0]] + i) % 255];
+ }
+}
+```
+
+### The Encode Function
+
+```c
+int RSECC_encode(int data_length, int ecc_length,
+ const unsigned char *data, unsigned char *ecc)
+```
+
+**Thread safety**: Protected by `RSECC_mutex` (pthread mutex) during initialization:
+
+```c
+#ifdef HAVE_LIBPTHREAD
+static pthread_mutex_t RSECC_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+int RSECC_encode(int data_length, int ecc_length,
+ const unsigned char *data, unsigned char *ecc)
+{
+ // ... mutex lock ...
+ if(!generator_initialized[ecc_length - 2]) {
+ if(!initialized) RSECC_init();
+ generator_init(ecc_length);
+ generator_initialized[ecc_length - 2] = 1;
+ }
+ // ... mutex unlock ...
+```
+
+**Encoding algorithm** — polynomial division:
+
+```c
+ unsigned char *gen = generator[ecc_length - 2];
+ unsigned char feedback;
+
+ memset(ecc, 0, ecc_length);
+ for(i = 0; i < data_length; i++) {
+ feedback = aindex[data[i] ^ ecc[0]];
+ if(feedback != 255) {
+ for(j = 1; j < ecc_length; j++) {
+ ecc[j] ^= alpha[(feedback + aindex[gen[ecc_length - j]]) % 255];
+ }
+ }
+ memmove(&ecc[0], &ecc[1], ecc_length - 1);
+ if(feedback != 255) {
+ ecc[ecc_length - 1] = alpha[(feedback + aindex[gen[0]]) % 255];
+ } else {
+ ecc[ecc_length - 1] = 0;
+ }
+ }
+```
+
+This implements systematic Reed-Solomon encoding: the `ecc` output is the remainder of dividing the data polynomial by the generator polynomial over GF(2^8).
+
+---
+
+## Format Information
+
+Format information encodes the EC level and mask pattern, protected by BCH(15,5).
+
+### Full QR Format Info
+
+From `qrspec.c`, pre-computed table:
+
+```c
+static const unsigned int formatInfo[4][8] = {
+ {0x77c4, 0x72f3, 0x7daa, 0x789d, 0x662f, 0x6318, 0x6c41, 0x6976},
+ {0x5412, 0x5125, 0x5e7c, 0x5b4b, 0x45f9, 0x40ce, 0x4f97, 0x4aa0},
+ {0x355f, 0x3068, 0x3f31, 0x3a06, 0x24b4, 0x2183, 0x2eda, 0x2bed},
+ {0x1689, 0x13be, 0x1ce7, 0x19d0, 0x0762, 0x0255, 0x0d0c, 0x083b}
+};
+```
+
+Indexed by `formatInfo[ec_level][mask_pattern]`. The 15-bit value is written into two locations in the symbol by `Mask_writeFormatInformation()`.
+
+### Micro QR Format Info
+
+From `mqrspec.c`:
+
+```c
+static const unsigned int typeTable[MQRSPEC_VERSION_MAX + 1][3] = {
+ {0, 0, 0},
+ {0x4445, 0x4172, 0x4e2b},
+ {0x2f7f, 0x2a48, 0x2511},
+ // ...
+};
+```
+
+---
+
+## How ECC Integrates Into Encoding
+
+The full encoding pipeline in `QRcode_encodeMask()`:
+
+1. **Build byte stream**: `QRinput_getByteStream(input)` → packed data bytes
+2. **Create QRRawCode**: `QRraw_new(input)` → initializes RS blocks, runs `RSECC_encode()` on each block
+3. **Interleave**: `QRraw_getCode()` called repeatedly to get interleaved data+ECC bytes
+4. **Place in frame**: `FrameFiller_next()` places each codeword bit in zigzag order
+5. **Apply mask**: One of 8 mask patterns XORed with data area
+6. **Format info**: `Mask_writeFormatInformation()` embeds EC level + mask pattern
+
+For Micro QR, `QRcode_encodeMaskMQR()` follows the same pattern but uses `MQRraw_new()`, `MQRraw_getCode()`, and `MMask_*` functions.
+
+---
+
+## RS Block Count Examples
+
+| Version | Level | Group 1 Blocks | Group 1 Data | Group 2 Blocks | Group 2 Data | ECC/Block |
+|---|---|---|---|---|---|---|
+| 1 | L | 1 | 19 | 0 | — | 7 |
+| 1 | H | 1 | 9 | 0 | — | 17 |
+| 5 | M | 2 | 43 | 0 | — | 24 |
+| 10 | Q | 6 | 24 | 2 | 25 | 26 |
+| 40 | L | 19 | 118 | 6 | 119 | 30 |
+| 40 | H | 20 | 15 | 61 | 16 | 30 |
+
+The maximum ECC codewords per block is 30, which corresponds to a degree-30 generator polynomial — the maximum cached in `generator[29]`.
diff --git a/docs/handbook/genqrcode/masking-algorithms.md b/docs/handbook/genqrcode/masking-algorithms.md
new file mode 100644
index 0000000000..120f774bf6
--- /dev/null
+++ b/docs/handbook/genqrcode/masking-algorithms.md
@@ -0,0 +1,578 @@
+# genqrcode / libqrencode — Masking Algorithms
+
+## Purpose
+
+After placing data and error correction codewords in the QR Code matrix, a mask pattern is XORed with the data area to avoid unfavorable patterns (large uniform regions, patterns resembling finder patterns). The library evaluates all candidate masks using a penalty scoring system and selects the mask with the lowest penalty.
+
+---
+
+## Full QR Code — 8 Mask Patterns
+
+### The MASKMAKER Macro
+
+All 8 mask condition functions are generated by a single macro in `mask.c`:
+
+```c
+#define MASKMAKER(__exp__) \
+ int x, y;\
+ int b = 0;\
+ for(y = 0; y < width; y++) {\
+ for(x = 0; x < width; x++) {\
+ if(*s & 0x80) {\
+ *d = *s;\
+ } else {\
+ *d = *s ^ ((__exp__) == 0);\
+ }\
+ s++; d++;\
+ }\
+ }\
+ return b;
+```
+
+The `0x80` bit check skips non-data modules (finder patterns, timing patterns, alignment patterns, format/version info). The expression `(__exp__) == 0` evaluates the mask condition — when the condition is true (equals 0), the module is flipped.
+
+### Mask Pattern Definitions
+
+```c
+static int Mask_mask0(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER((y+x) % 2)
+}
+
+static int Mask_mask1(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER(y % 2)
+}
+
+static int Mask_mask2(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER(x % 3)
+}
+
+static int Mask_mask3(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER((y+x) % 3)
+}
+
+static int Mask_mask4(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER((y/2 + x/3) % 2)
+}
+
+static int Mask_mask5(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER(((y*x) % 2 + (y*x) % 3))
+}
+
+static int Mask_mask6(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER(((y*x) % 2 + (y*x) % 3) % 2)
+}
+
+static int Mask_mask7(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER(((y*x) % 3 + (y+x) % 2) % 2)
+}
+```
+
+Function pointer array:
+
+```c
+typedef int MaskMaker(int, const unsigned char *, unsigned char *);
+static MaskMaker *maskMakers[8] = {
+ Mask_mask0, Mask_mask1, Mask_mask2, Mask_mask3,
+ Mask_mask4, Mask_mask5, Mask_mask6, Mask_mask7
+};
+```
+
+### Summary Table
+
+| Mask | Condition (module inverted when true) | Pattern |
+|---|---|---|
+| 0 | `(y + x) % 2 == 0` | Checkerboard |
+| 1 | `y % 2 == 0` | Horizontal stripes |
+| 2 | `x % 3 == 0` | Vertical stripes (every 3) |
+| 3 | `(y + x) % 3 == 0` | Diagonal stripes |
+| 4 | `(y/2 + x/3) % 2 == 0` | Block pattern |
+| 5 | `(y*x)%2 + (y*x)%3 == 0` | Complex |
+| 6 | `((y*x)%2 + (y*x)%3) % 2 == 0` | Complex |
+| 7 | `((y*x)%3 + (y+x)%2) % 2 == 0` | Complex |
+
+---
+
+## Penalty Scoring
+
+### Penalty Constants
+
+Defined in `mask.c`:
+
+```c
+#define N1 3
+#define N2 3
+#define N3 40
+#define N4 10
+```
+
+### Penalty Rule N1 + N3: Run Length
+
+`Mask_calcN1N3()` evaluates both Rule 1 (adjacent same-color modules) and Rule 3 (finder-like patterns) using run-length data:
+
+```c
+static int Mask_calcN1N3(int length, int *runLength)
+{
+ int i;
+ int demerit = 0;
+ int fact;
+
+ for(i = 0; i < length; i++) {
+ if(runLength[i] >= 5) {
+ demerit += N1 + (runLength[i] - 5);
+ }
+ if((i & 1)) {
+ // Check for 1:1:3:1:1 pattern embedded in dark-light sequence
+ if(i >= 3 && i < length - 2
+ && (runLength[i] % 3) == 0) {
+ fact = runLength[i] / 3;
+ if(runLength[i-2] == fact &&
+ runLength[i-1] == fact &&
+ runLength[i+1] == fact &&
+ runLength[i+2] == fact) {
+ // Check for 4-module light space on either side
+ if(i == 3 || runLength[i-3] >= 4 * fact) {
+ demerit += N3;
+ } else if(i+4 >= length || runLength[i+3] >= 4 * fact) {
+ demerit += N3;
+ }
+ }
+ }
+ }
+ }
+ return demerit;
+}
+```
+
+**Rule N1**: Run of ≥ 5 same-color modules → penalty = 3 + (run_length − 5). For example, 7 consecutive dark modules → 3 + 2 = 5 penalty.
+
+**Rule N3**: Pattern 1:1:3:1:1 (the finder pattern ratio) with 4+ light modules on either side → 40 penalty. This prevents patterns that confuse QR Code scanners.
+
+### Run Length Calculation
+
+Horizontal runs via `Mask_calcRunLengthH()`:
+
+```c
+static int Mask_calcRunLengthH(int width, const unsigned char *frame, int *runLength)
+{
+ int i;
+ int head;
+ int prev;
+
+ if(frame[0] & 1) {
+ runLength[0] = -1;
+ head = 1;
+ } else {
+ head = 0;
+ }
+ runLength[head] = 1;
+ prev = frame[0];
+
+ for(i = 1; i < width; i++) {
+ if((frame[i] ^ prev) & 1) {
+ head++;
+ runLength[head] = 1;
+ prev = frame[i];
+ } else {
+ runLength[head]++;
+ }
+ }
+ return head + 1;
+}
+```
+
+Vertical runs via `Mask_calcRunLengthV()` — same logic but iterates `frame[i * width]`.
+
+### Penalty Rule N2: 2×2 Blocks
+
+`Mask_calcN2()` counts 2×2 same-color blocks:
+
+```c
+static int Mask_calcN2(int width, unsigned char *frame)
+{
+ int x, y;
+ int demerit = 0;
+ unsigned char *p;
+
+ p = frame;
+ for(y = 1; y < width; y++) {
+ for(x = 1; x < width; x++) {
+ // Check 2x2 block using bit 0
+ if(((p[0]^p[1])|(p[width]^p[width+1])) & 1) {
+ // not all same
+ } else {
+ demerit += N2;
+ }
+ p++;
+ }
+ p++;
+ }
+ return demerit;
+}
+```
+
+Each 2×2 same-color block adds N2 = 3 penalty points.
+
+### Penalty Rule N4: Dark/Light Balance
+
+The `Mask_evaluateSymbol()` function counts dark modules and applies the balance penalty:
+
+```c
+static int Mask_evaluateSymbol(int width, unsigned char *frame)
+{
+ int x, y;
+ int demerit = 0;
+ int length;
+ int runLength[width + 1];
+ unsigned char *p;
+ int dark = 0;
+
+ demerit += Mask_calcN2(width, frame);
+
+ p = frame;
+ for(y = 0; y < width; y++) {
+ length = Mask_calcRunLengthH(width, p, runLength);
+ demerit += Mask_calcN1N3(length, runLength);
+ p += width;
+ }
+
+ for(x = 0; x < width; x++) {
+ length = Mask_calcRunLengthV(width, frame + x, runLength);
+ demerit += Mask_calcN1N3(length, runLength);
+ }
+
+ // Count dark modules for N4
+ p = frame;
+ for(y = 0; y < width * width; y++) {
+ if(p[y] & 1) dark++;
+ }
+
+ // Calculate demerits for N4
+ // dark ratio in percent, deviation from 50%
+ int ratio = (200 * dark + width * width) / (2 * width * width) - 50;
+ if(ratio < 0) ratio = -ratio;
+ demerit += ratio / 5 * N4;
+
+ return demerit;
+}
+```
+
+N4 penalty: For each 5% deviation from 50% dark/light balance, add N4 = 10 points.
+
+---
+
+## Mask Selection Algorithm
+
+`Mask_mask()` tries all 8 patterns and selects the best:
+
+```c
+unsigned char *Mask_mask(int width, unsigned char *frame, QRecLevel level)
+{
+ int i;
+ unsigned char *mask, *bestMask;
+ int minDemerit = INT_MAX;
+ int bestMaskNum = 0;
+ int blacks;
+ int bratio;
+ int demerit;
+
+ bestMask = NULL;
+
+ for(i = 0; i < 8; i++) {
+ mask = (unsigned char *)malloc(width * width);
+ if(mask == NULL) break;
+
+ demerit = 0;
+ blacks = maskMakers[i](width, frame, mask);
+ demerit = Mask_evaluateSymbol(width, mask);
+
+ if(demerit < minDemerit) {
+ minDemerit = demerit;
+ free(bestMask);
+ bestMask = mask;
+ bestMaskNum = i;
+ } else {
+ free(mask);
+ }
+ }
+
+ Mask_writeFormatInformation(width, bestMask, bestMaskNum, level);
+
+ return bestMask;
+}
+```
+
+Key points:
+- Allocates a new `width × width` buffer for each mask attempt
+- Applies the mask via `maskMakers[i]`
+- Evaluates penalty via `Mask_evaluateSymbol()`
+- Keeps only the lowest-demerit mask, frees the rest
+- Writes format information into the selected mask
+
+### Forced Mask
+
+`QRcode_encodeMask()` accepts a `mask` parameter. When >= 0, it skips penalty evaluation and uses the specified mask directly:
+
+```c
+if(mask < 0) {
+ masked = Mask_mask(width, frame, input->level);
+} else {
+ masked = Mask_makeMask(width, frame, mask, input->level);
+}
+```
+
+`Mask_makeMask()` applies a single mask without evaluation:
+
+```c
+unsigned char *Mask_makeMask(int width, unsigned char *frame, int mask,
+ QRecLevel level)
+{
+ unsigned char *masked = (unsigned char *)malloc(width * width);
+ maskMakers[mask](width, frame, masked);
+ Mask_writeFormatInformation(width, masked, mask, level);
+ return masked;
+}
+```
+
+This is used by the test suite via `QRcode_encodeMask()` (exposed in `qrencode_inner.h`).
+
+---
+
+## Format Information Writing
+
+`Mask_writeFormatInformation()` embeds the 15-bit format info (EC level + mask pattern) into the symbol at two fixed locations:
+
+```c
+void Mask_writeFormatInformation(int width, unsigned char *frame,
+ int mask, QRecLevel level)
+{
+ unsigned int format;
+ unsigned char v;
+ int i;
+
+ format = QRspec_getFormatInfo(mask, level);
+
+ // Horizontal strip near top-left
+ for(i = 0; i < 8; i++) {
+ // ... write bits to specific positions around top-left finder ...
+ }
+
+ // Vertical strip near top-left and bottom-left
+ for(i = 0; i < 7; i++) {
+ // ... write bits to specific positions ...
+ }
+}
+```
+
+The format info is retrieved from the pre-computed `formatInfo[level][mask]` table (15-bit BCH code).
+
+---
+
+## Micro QR Code — 4 Mask Patterns
+
+### Mask Definitions
+
+From `mmask.c`, only 4 patterns:
+
+```c
+static int MMask_mask0(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER(y % 2)
+}
+
+static int MMask_mask1(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER(((y/2) + (x/3)) % 2)
+}
+
+static int MMask_mask2(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER((((y*x) % 2) + ((y*x) % 3)) % 2)
+}
+
+static int MMask_mask3(int width, const unsigned char *s, unsigned char *d)
+{
+ MASKMAKER((((y+x) % 2) + ((y*x) % 3)) % 2)
+}
+```
+
+Function pointer array:
+```c
+static MaskMaker *maskMakers[4] = {
+ MMask_mask0, MMask_mask1, MMask_mask2, MMask_mask3
+};
+```
+
+| Mask | Condition |
+|---|---|
+| 0 | `y % 2 == 0` |
+| 1 | `(y/2 + x/3) % 2 == 0` |
+| 2 | `((y*x)%2 + (y*x)%3) % 2 == 0` |
+| 3 | `((y+x)%2 + (y*x)%3) % 2 == 0` |
+
+### Micro QR Mask Evaluation
+
+Micro QR uses a completely different scoring method. Instead of penalty rules, it maximizes a quality metric.
+
+`MMask_evaluateSymbol()`:
+
+```c
+static int MMask_evaluateSymbol(int width, unsigned char *frame)
+{
+ int x, y;
+ int sum1 = 0, sum2 = 0;
+
+ // Sum of bottom row (last row, only data area)
+ for(x = 1; x < width; x++) {
+ if(frame[width * (width-1) + x] & 1) {
+ sum1++;
+ }
+ }
+ // Sum of rightmost column (only data area)
+ for(y = 1; y < width; y++) {
+ if(frame[y * width + (width-1)] & 1) {
+ sum2++;
+ }
+ }
+
+ return sum1 * 16 + sum2;
+}
+```
+
+The score favors masks that produce more dark modules along the bottom row and right column. **Unlike full QR, higher scores are better.**
+
+### Micro QR Mask Selection
+
+`MMask_mask()`:
+
+```c
+unsigned char *MMask_mask(int version, unsigned char *frame, QRecLevel level)
+{
+ int width = MQRspec_getWidth(version);
+ int i;
+ unsigned char *mask, *bestMask = NULL;
+ int maxScore = 0;
+ int bestMaskNum = 0;
+ int score;
+
+ for(i = 0; i < 4; i++) {
+ mask = (unsigned char *)malloc(width * width);
+ maskMakers[i](width, frame, mask);
+ score = MMask_evaluateSymbol(width, mask);
+
+ if(score > maxScore) { // Note: MAXIMUM, not minimum
+ maxScore = score;
+ free(bestMask);
+ bestMask = mask;
+ bestMaskNum = i;
+ } else {
+ free(mask);
+ }
+ }
+
+ MMask_writeFormatInformation(version, bestMask, bestMaskNum, level);
+
+ return bestMask;
+}
+```
+
+### Micro QR Format Information
+
+`MMask_writeFormatInformation()` writes a 15-bit format info in a single strip around the top-left finder pattern:
+
+```c
+void MMask_writeFormatInformation(int version, unsigned char *frame,
+ int mask, QRecLevel level)
+{
+ unsigned int format;
+ unsigned char v;
+ int i;
+ int width = MQRspec_getWidth(version);
+
+ format = MQRspec_getFormatInfo(mask, version, level);
+
+ for(i = 0; i < 8; i++) {
+ v = 0x84 | (format & 1);
+ frame[width * (i + 1) + 8] = v; // left column
+ format >>= 1;
+ }
+ for(i = 0; i < 7; i++) {
+ v = 0x84 | (format & 1);
+ frame[width * 8 + 7 - i] = v; // top row
+ format >>= 1;
+ }
+}
+```
+
+---
+
+## Module Bit Flags
+
+When placing modules, the frame builder uses bit flags to mark module types:
+
+```c
+// Bit 7 (0x80): Non-data module flag
+// When set, masking skips this module
+// Set on: finder pattern, separator, timing, alignment, version info, format info
+```
+
+In `MASKMAKER`, this check appears as:
+```c
+if(*s & 0x80) {
+ *d = *s; // copy as-is (non-data module)
+} else {
+ *d = *s ^ ((__exp__) == 0); // apply mask
+}
+```
+
+This ensures that only the data and ECC area is affected by masking.
+
+---
+
+## Integration with Encoding Pipeline
+
+In `QRcode_encodeMask()` (from `qrencode.c`):
+
+```c
+QRcode *QRcode_encodeMask(QRinput *input, int mask)
+{
+ // ... setup ...
+
+ // 1. Build frame with function patterns
+ frame = QRspec_createFrame(version);
+
+ // 2. Place data/ECC codewords via FrameFiller
+ filler = FrameFiller_new(width, frame, 0);
+ for(i = 0; i < raw->dataLength + raw->eccLength; i++) {
+ code = QRraw_getCode(raw);
+ bit = 0x80;
+ for(j = 0; j < 8; j++) {
+ p = FrameFiller_next(filler);
+ if(p == NULL) goto EXIT;
+ *p = 0x02 | ((bit & code) != 0);
+ bit >>= 1;
+ }
+ }
+ // ... remainder bits ...
+
+ // 3. Apply mask
+ if(mask < 0) {
+ masked = Mask_mask(width, frame, input->level);
+ } else {
+ masked = Mask_makeMask(width, frame, mask, input->level);
+ }
+
+ // 4. Package result
+ qrcode = QRcode_new(version, width, masked);
+ // ...
+}
+```
+
+The same flow applies for `QRcode_encodeMaskMQR()`, using `MMask_mask()` instead.
diff --git a/docs/handbook/genqrcode/micro-qr.md b/docs/handbook/genqrcode/micro-qr.md
new file mode 100644
index 0000000000..d202ceb76c
--- /dev/null
+++ b/docs/handbook/genqrcode/micro-qr.md
@@ -0,0 +1,456 @@
+# genqrcode / libqrencode — Micro QR Code Support
+
+## Overview
+
+Micro QR Code is a compact variant of QR Code standardized in JIS X0510:2004 and ISO/IEC 18004. libqrencode supports Micro QR versions M1 through M4 via dedicated spec tables in `mqrspec.c`, masking in `mmask.c`, and encoding paths in `qrencode.c`.
+
+---
+
+## Micro QR vs. Full QR
+
+| Feature | Full QR Code | Micro QR Code |
+|---|---|---|
+| Versions | 1–40 | M1–M4 (1–4 internally) |
+| Max constant | `QRSPEC_VERSION_MAX = 40` | `MQRSPEC_VERSION_MAX = 4` |
+| Finder patterns | 3 | 1 |
+| Alignment patterns | 0–46 per version | None |
+| Timing patterns | Horizontal + Vertical | Horizontal + Vertical |
+| Version information | Versions 7+ | Never |
+| Mask patterns | 8 | 4 |
+| Mask selection | Minimize penalty | Maximize edge score |
+| EC Levels | L, M, Q, H | Version-dependent subset |
+| Structured append | Supported | Not supported |
+| ECI mode | Supported | Not supported |
+| FNC1 mode | Supported | Not supported |
+
+---
+
+## Version Capacities
+
+From `mqrspec.c`:
+
+```c
+typedef struct {
+ int width;
+ int ec[4];
+} MQRspec_Capacity;
+
+static const MQRspec_Capacity mqrspecCapacity[MQRSPEC_VERSION_MAX + 1] = {
+ {0, { 0, 0, 0, 0}},
+ {11, { 2, 0, 0, 0}}, // M1: 11×11
+ {13, { 5, 4, 0, 0}}, // M2: 13×13
+ {15, {11, 9, 7, 0}}, // M3: 15×15
+ {17, {16, 14, 10, 0}} // M4: 17×17
+};
+```
+
+Width formula: `version * 2 + 9` (vs. `version * 4 + 17` for full QR).
+
+### Detailed Capacities
+
+| Version | Width | Total Data Words | L Data | M Data | Q Data | H Data |
+|---|---|---|---|---|---|---|
+| M1 | 11 | 5 | 2* | — | — | — |
+| M2 | 13 | 10 | 5 | 4 | — | — |
+| M3 | 15 | 17 | 11 | 9 | 7 | — |
+| M4 | 17 | 24 | 16 | 14 | 10 | — |
+
+*M1 has error detection only, not error correction.
+
+### ECC Lengths
+
+```c
+int MQRspec_getECCLength(int version, QRecLevel level)
+{
+ return mqrspecCapacity[version].width * mqrspecCapacity[version].width
+ - mqrspecCapacity[version].ec[level]
+ - ... (function pattern modules) ...;
+}
+```
+
+Returns 0 for unsupported version/level combinations, which `QRinput_newMQR()` uses to reject invalid inputs.
+
+### Character Capacities by Mode
+
+| Version | Level | Numeric | Alphanumeric | Byte | Kanji |
+|---|---|---|---|---|---|
+| M1 | — | 5 | — | — | — |
+| M2 | L | 10 | 6 | — | — |
+| M2 | M | 8 | 5 | — | — |
+| M3 | L | 23 | 14 | 9 | 6 |
+| M3 | M | 18 | 11 | 7 | 4 |
+| M4 | L | 35 | 21 | 15 | 9 |
+| M4 | M | 30 | 18 | 13 | 8 |
+| M4 | Q | 21 | 12 | 9 | 5 |
+
+---
+
+## Mode Restrictions
+
+Not all encoding modes are available in every Micro QR version:
+
+| Version | Numeric | Alphanumeric | 8-bit | Kanji | ECI | FNC1 | Structured |
+|---|---|---|---|---|---|---|---|
+| M1 | Yes | No | No | No | No | No | No |
+| M2 | Yes | Yes | No | No | No | No | No |
+| M3 | Yes | Yes | Yes | Yes | No | No | No |
+| M4 | Yes | Yes | Yes | Yes | No | No | No |
+
+This is enforced by `QRinput_encodeBitStream()` which calls validation functions:
+
+```c
+static int QRinput_isModeNumValid(int version, QRinput_List *entry, int mqr)
+{
+ if(mqr) {
+ if(MQRspec_maximumWords(QR_MODE_NUM, version) < entry->size)
+ return -1;
+ }
+ return 0;
+}
+```
+
+`MQRspec_maximumWords()` returns 0 for unsupported modes at a given version.
+
+---
+
+## MQR-Specific Encoding
+
+### Mode Indicator Sizes
+
+Micro QR uses shorter mode indicators than full QR. From `mqrspec.c`:
+
+```c
+// MQR mode indicator bit lengths per version:
+// M1: 0 bits (only numeric, implied)
+// M2: 1 bit
+// M3: 2 bits
+// M4: 3 bits
+```
+
+These are retrieved by `MQRspec_lengthIndicator()`.
+
+### Character Count Indicator Sizes
+
+```c
+static const int lengthTableBits[4][4] = {
+ { 3, 0, 0, 0}, // QR_MODE_NUM: M1=3, M2=4, M3=5, M4=6
+ { 0, 3, 0, 0}, // QR_MODE_AN: M1=0, M2=3, M3=4, M4=5
+ { 0, 0, 4, 0}, // QR_MODE_8: M1=0, M2=0, M3=4, M4=5
+ { 0, 0, 3, 0}, // QR_MODE_KANJI: M1=0, M2=0, M3=3, M4=4
+};
+```
+
+A value of 0 means the mode is unsupported for that version.
+
+### Data Length in Bits
+
+A key difference: `MQRspec_getDataLengthBit()` returns data length in **bits**, not bytes:
+
+```c
+int MQRspec_getDataLengthBit(int version, QRecLevel level)
+{
+ int w = mqrspecCapacity[version].width - 1;
+ return w * w - 64 - MQRspec_getECCLength(version, level) * 8;
+}
+```
+
+This matters because M1 and some M2 configurations have data lengths that are not byte-aligned.
+
+---
+
+## MQRRawCode — Micro QR Block Structure
+
+From `qrencode.c`:
+
+```c
+typedef struct {
+ int version;
+ int dataLength;
+ int eccLength;
+ unsigned char *datacode;
+ unsigned char *ecccode;
+ int b1;
+ int rsblock_num;
+ RSblock *rsblock;
+ int count;
+ int oddbits; // Number of "odd" bits in last data byte
+} MQRRawCode;
+```
+
+Micro QR always has exactly **one RS block** (no block interleaving):
+
+```c
+static MQRRawCode *MQRraw_new(QRinput *input)
+{
+ MQRRawCode *raw;
+ raw->version = input->version;
+ raw->dataLength = MQRspec_getDataLength(input->version, input->level);
+ raw->eccLength = MQRspec_getECCLength(input->version, input->level);
+ raw->oddbits = raw->dataLength * 8 - MQRspec_getDataLengthBit(input->version, input->level);
+
+ raw->datacode = QRinput_getByteStream(input);
+ raw->ecccode = (unsigned char *)malloc(raw->eccLength);
+ raw->rsblock_num = 1;
+ raw->rsblock = calloc(1, sizeof(RSblock));
+
+ RSblock_initBlock(raw->rsblock, raw->dataLength, raw->datacode,
+ raw->eccLength, raw->ecccode, RSECC_encode);
+ raw->count = 0;
+ return raw;
+}
+```
+
+### Odd Bits Handling
+
+The `oddbits` field handles versions where data capacity is not byte-aligned:
+
+```c
+unsigned char MQRraw_getCode(MQRRawCode *raw)
+{
+ if(raw->count < raw->dataLength) {
+ return raw->datacode[raw->count++];
+ } else {
+ return raw->ecccode[raw->count++ - raw->dataLength];
+ }
+}
+```
+
+In `QRcode_encodeMaskMQR()`, the odd bits are handled after placing full codewords:
+
+```c
+j = MQRspec_getDataLengthBit(input->version, input->level)
+ - MQRraw_getDataLength(raw) * 8;
+if(j > 0) {
+ // Place remaining odd bits from last data byte
+ code = MQRraw_getCode(raw);
+ bit = 0x80;
+ for(i = 0; i < j; i++) {
+ p = FrameFiller_next(filler);
+ *p = 0x02 | ((code & bit) != 0);
+ bit >>= 1;
+ }
+}
+```
+
+---
+
+## Frame Creation
+
+`MQRspec_createFrame()` builds the base frame with function patterns:
+
+```c
+unsigned char *MQRspec_createFrame(int version)
+{
+ unsigned char *frame;
+ int width = mqrspecCapacity[version].width;
+
+ frame = (unsigned char *)calloc(width * width, 1);
+
+ // 1. Finder pattern (only ONE, top-left)
+ putFinderPattern(frame, width, 0, 0);
+
+ // 2. Separator (no full separator ring — only right and bottom)
+
+ // 3. Timing pattern (horizontal and vertical)
+ for(int i = 0; i < width - 8; i++) {
+ // horizontal timing along row 0, starting at column 8
+ frame[8 + i] = 0x90 | (i & 1);
+ // vertical timing along column 0, starting at row 8
+ frame[(8 + i) * width] = 0x90 | (i & 1);
+ }
+
+ // 4. Format information area (reserved)
+ // No version information (unlike QR versions 7+)
+ // No alignment patterns (unlike QR versions 2+)
+
+ return frame;
+}
+```
+
+Key differences from `QRspec_createFrame()`:
+- Single finder pattern instead of three
+- No alignment patterns
+- No version information area
+- Simpler separator structure
+
+---
+
+## Micro QR Masking
+
+See [masking-algorithms.md](masking-algorithms.md) for the full details. Summary:
+
+### 4 Patterns
+
+```c
+MMask_mask0: y % 2 == 0
+MMask_mask1: (y/2 + x/3) % 2 == 0
+MMask_mask2: ((y*x)%2 + (y*x)%3) % 2 == 0
+MMask_mask3: ((y+x)%2 + (y*x)%3) % 2 == 0
+```
+
+### Selection Criterion
+
+Micro QR picks the mask with the **highest** score (opposite of full QR):
+
+```c
+// Sum of dark modules in bottom row × 16 + sum of dark modules in right column
+score = sum1 * 16 + sum2;
+```
+
+The right column gets lower weight (×1) than the bottom row (×16).
+
+---
+
+## Format Information
+
+Micro QR encodes version, EC level, and mask pattern in a single 15-bit format info:
+
+```c
+static const unsigned int typeTable[MQRSPEC_VERSION_MAX + 1][3] = {
+ {0x00000, 0x00000, 0x00000}, // unused
+ {0x04445, 0x04172, 0x04e2b}, // M1
+ {0x02f7f, 0x02a48, 0x02511}, // M2
+ {0x07f46, 0x07a71, 0x07528}, // M3
+ {0x00dc5, 0x008f2, 0x007ab} // M4
+};
+```
+
+Indexed as `typeTable[version][typeNumber]` where `typeNumber` depends on the EC level:
+
+```c
+unsigned int MQRspec_getFormatInfo(int mask, int version, QRecLevel level)
+{
+ // ... compute typeNumber from version and level ...
+ // ... XOR with mask-dependent pattern ...
+}
+```
+
+Written into the symbol by `MMask_writeFormatInformation()` in a single strip around the finder pattern (8 bits on the left side, 7 bits on the top).
+
+---
+
+## API for Micro QR
+
+### Input Creation
+
+```c
+QRinput *input = QRinput_newMQR(int version, QRecLevel level);
+```
+
+- `version` must be 1–4 (no auto-detection for manual input)
+- Invalid version/level combinations are rejected
+
+### High-Level Encoding
+
+```c
+QRcode *QRcode_encodeStringMQR(const char *string, int version,
+ QRecLevel level, QRencodeMode hint,
+ int casesensitive);
+QRcode *QRcode_encodeString8bitMQR(const char *string, int version,
+ QRecLevel level);
+QRcode *QRcode_encodeDataMQR(int size, const unsigned char *data,
+ int version, QRecLevel level);
+```
+
+When `version` is 0, these functions try versions M1 through M4 incrementally:
+
+```c
+if(version == 0) {
+ for(i = 1; i <= MQRSPEC_VERSION_MAX; i++) {
+ QRcode *code = QRcode_encodeDataReal(data, size, i, level, 1);
+ if(code != NULL) return code;
+ }
+}
+```
+
+### Version/Level Validation
+
+Use `QRinput_setVersionAndErrorCorrectionLevel()` for Micro QR — it validates the combination. Using `QRinput_setVersion()` or `QRinput_setErrorCorrectionLevel()` individually on MQR inputs returns `EINVAL`.
+
+---
+
+## Structured Append — Not Supported
+
+Micro QR does not support structured append mode. Attempting to encode structured append with MQR results in an error:
+
+```c
+static int QRinput_encodeModeStructure(QRinput_List *entry, int mqr)
+{
+ if(mqr) {
+ errno = EINVAL;
+ return -1;
+ }
+ // ...
+}
+```
+
+`QRinput_Struct_appendInput()` also rejects MQR inputs:
+
+```c
+int QRinput_Struct_appendInput(QRinput_Struct *s, QRinput *input)
+{
+ if(input == NULL || input->mqr) {
+ errno = EINVAL;
+ return -1;
+ }
+ // ...
+}
+```
+
+---
+
+## Encoding Pipeline Differences
+
+`QRcode_encodeMaskMQR()` in `qrencode.c` follows a modified pipeline:
+
+1. **Create frame**: `MQRspec_createFrame(version)` — single finder, no alignment
+2. **Initialize RS**: `MQRraw_new(input)` — single block, with odd bits tracking
+3. **Place data**: Via `FrameFiller_next()`, but handles odd bits at boundary:
+ ```c
+ // Place full data bytes
+ for(i = 0; i < MQRraw_getDataLength(raw) - 1; i++) {
+ code = MQRraw_getCode(raw);
+ bit = 0x80;
+ for(j = 0; j < 8; j++) {
+ p = FrameFiller_next(filler);
+ *p = 0x02 | ((bit & code) != 0);
+ bit >>= 1;
+ }
+ }
+ // Handle odd bits from last data byte
+ ```
+4. **Place ECC**: Full ECC bytes, then remainder bits (if any)
+5. **Apply mask**: `MMask_mask(version, frame, level)` — 4 patterns, maximize score
+6. **Package**: `QRcode_new(version, width, masked)`
+
+---
+
+## CLI Support
+
+The `qrencode` CLI tool supports Micro QR via the `-M` / `--micro` flag:
+
+```bash
+qrencode -M -v 3 -l M -o output.png "Hello"
+```
+
+In `qrenc.c`:
+
+```c
+case 'M':
+ micro = 1;
+ break;
+```
+
+When `micro` is set, `encode()` calls `QRcode_encodeStringMQR()` or `QRcode_encodeDataMQR()` instead of the standard variants.
+
+---
+
+## Limitations Summary
+
+1. **No H level**: Maximum EC is Q (M4 only)
+2. **No structured append**: Cannot split data across multiple symbols
+3. **No ECI**: Cannot specify character encodings
+4. **No FNC1**: Cannot create GS1-compatible codes
+5. **Small capacity**: Maximum 35 numeric or 15 byte characters (M4-L)
+6. **Single finder**: Only top-left finder pattern — orientation from timing patterns
+7. **Version must be specified**: For `QRinput_newMQR()`, version 0 is not auto-detect (though high-level `QRcode_encodeStringMQR()` with version 0 does try all versions)
diff --git a/docs/handbook/genqrcode/overview.md b/docs/handbook/genqrcode/overview.md
new file mode 100644
index 0000000000..ad7ad97dda
--- /dev/null
+++ b/docs/handbook/genqrcode/overview.md
@@ -0,0 +1,502 @@
+# genqrcode / libqrencode — Overview
+
+## Introduction
+
+genqrcode is Project-Tick's integrated copy of **libqrencode**, a fast and compact C library for encoding data into QR Code symbols. Originally developed by Kentaro Fukuchi and distributed under the GNU Lesser General Public License v2.1+, the library implements QR Code Model 2 as specified in **JIS X0510:2004** and **ISO/IEC 18004:2006**.
+
+The library encodes input data into a raw bitmap array (`unsigned char *`) representing the QR Code matrix. Unlike tools that produce image files directly, libqrencode gives applications direct access to the symbol matrix, enabling flexible rendering into any output format. The accompanying `qrencode` CLI tool wraps the library and produces image files in PNG, EPS, SVG, XPM, and various terminal text formats.
+
+The current version integrated in Project-Tick is **4.1.1**.
+
+---
+
+## Feature Summary
+
+### Core Capabilities
+
+| Feature | Description |
+|---|---|
+| **QR Code Model 2** | Full implementation of the modern QR Code standard |
+| **Micro QR Code** | Experimental support for M1–M4 (versions 1–4) |
+| **Versions 1–40** | Full-size QR Code from 21×21 to 177×177 modules |
+| **Auto-version selection** | Automatically selects minimum version for given data |
+| **Structured Append** | Split large data across up to 16 linked QR symbols |
+| **Optimized encoding** | Automatic input parsing selects optimal encoding modes |
+| **Thread-safe** | Optional pthread mutex protection for concurrent use |
+
+### Encoding Modes
+
+The library supports all standard encoding modes defined in the QR Code specification:
+
+| Mode | Enum Value | Bit Indicator | Characters |
+|---|---|---|---|
+| **Numeric** | `QR_MODE_NUM` (0) | `0001` | Digits 0–9 |
+| **Alphanumeric** | `QR_MODE_AN` (1) | `0010` | 0–9, A–Z, space, $, %, *, +, -, ., /, : |
+| **8-bit Byte** | `QR_MODE_8` (2) | `0100` | Any 8-bit byte (ISO 8859-1 / UTF-8) |
+| **Kanji** | `QR_MODE_KANJI` (3) | `1000` | Shift-JIS double-byte characters |
+| **ECI** | `QR_MODE_ECI` (6) | `0111` | Extended Channel Interpretation headers |
+| **FNC1 (1st pos)** | `QR_MODE_FNC1FIRST` (7) | `0101` | GS1 DataBar compatibility |
+| **FNC1 (2nd pos)** | `QR_MODE_FNC1SECOND` (8) | `1001` | Application identifier mode |
+
+Internal-only modes:
+
+| Mode | Enum Value | Purpose |
+|---|---|---|
+| `QR_MODE_NUL` | -1 | Terminator sentinel |
+| `QR_MODE_STRUCTURE` | 5 | Structured append header |
+
+These are defined as the `QRencodeMode` enum in `qrencode.h`.
+
+### Error Correction Levels
+
+Four Reed-Solomon error correction levels are supported, defined as the `QRecLevel` enum:
+
+| Level | Enum | Recovery Capability | Typical Use |
+|---|---|---|---|
+| **L** | `QR_ECLEVEL_L` (0) | ~7% codewords | Maximum data capacity |
+| **M** | `QR_ECLEVEL_M` (1) | ~15% codewords | Standard use |
+| **Q** | `QR_ECLEVEL_Q` (2) | ~25% codewords | Higher reliability |
+| **H** | `QR_ECLEVEL_H` (3) | ~30% codewords | Maximum error recovery |
+
+### QR Code Versions and Capacity
+
+QR Code versions range from 1 to 40, each adding 4 modules per side. The maximum version constant is `QRSPEC_VERSION_MAX` (40).
+
+| Version | Size (modules) | Max Data (L) | Max Data (H) |
+|---|---|---|---|
+| 1 | 21 × 21 | 19 bytes | 9 bytes |
+| 5 | 37 × 37 | 108 bytes | 46 bytes |
+| 10 | 57 × 57 | 274 bytes | 122 bytes |
+| 20 | 97 × 97 | 861 bytes | 385 bytes |
+| 30 | 137 × 137 | 1735 bytes | 745 bytes |
+| 40 | 177 × 177 | 2956 bytes | 1276 bytes |
+
+The full capacity table is stored in `qrspec.c` as `qrspecCapacity[QRSPEC_VERSION_MAX + 1]`, a static array of `QRspec_Capacity` structures:
+
+```c
+typedef struct {
+ int width; // Edge length of the symbol
+ int words; // Data capacity (bytes)
+ int remainder; // Remainder bit (bits)
+ int ec[4]; // Number of ECC code (bytes) per level
+} QRspec_Capacity;
+```
+
+### Micro QR Code Versions
+
+Micro QR supports versions M1 through M4 (`MQRSPEC_VERSION_MAX` = 4):
+
+| Version | Size | Max EC | Modes Supported |
+|---|---|---|---|
+| M1 | 11 × 11 | Error detection only | Numeric only |
+| M2 | 13 × 13 | L, M | Numeric, Alphanumeric |
+| M3 | 15 × 15 | L, M | Numeric, Alphanumeric, 8-bit, Kanji |
+| M4 | 17 × 17 | L, M, Q | Numeric, Alphanumeric, 8-bit, Kanji |
+
+---
+
+## Output Data Format
+
+The encoded QR Code is returned as a `QRcode` struct:
+
+```c
+typedef struct {
+ int version; // version of the symbol
+ int width; // width of the symbol
+ unsigned char *data; // symbol data
+} QRcode;
+```
+
+The `data` field is a flat array of `width * width` unsigned chars. Each byte represents one module (dot) with the following bit layout:
+
+```
+MSB 76543210 LSB
+ |||||||`- 1=black/0=white
+ ||||||`-- 1=ecc/0=data code area
+ |||||`--- format information
+ ||||`---- version information
+ |||`----- timing pattern
+ ||`------ alignment pattern
+ |`------- finder pattern and separator
+ `-------- non-data modules (format, timing, etc.)
+```
+
+For most applications, only the least significant bit (bit 0) matters — it determines whether a module is black (1) or white (0). The higher bits provide metadata about what type of QR Code element occupies that position.
+
+### Rendering Example
+
+From `qrencode.h`:
+
+```c
+QRcode *qrcode;
+qrcode = QRcode_encodeString("TEST", 0, QR_ECLEVEL_M, QR_MODE_8, 1);
+if(qrcode == NULL) abort();
+
+for(int y = 0; y < qrcode->width; y++) {
+ for(int x = 0; x < qrcode->width; x++) {
+ if(qrcode->data[y * qrcode->width + x] & 1) {
+ draw_black_dot(x, y);
+ } else {
+ draw_white_dot(x, y);
+ }
+ }
+}
+QRcode_free(qrcode);
+```
+
+---
+
+## API Surface Overview
+
+The public API is declared in `qrencode.h` and falls into these categories:
+
+### Input Construction
+
+| Function | Purpose |
+|---|---|
+| `QRinput_new()` | Create input object (version=0/auto, level=L) |
+| `QRinput_new2(version, level)` | Create input with explicit version and level |
+| `QRinput_newMQR(version, level)` | Create Micro QR input object |
+| `QRinput_append(input, mode, size, data)` | Append data chunk to input |
+| `QRinput_appendECIheader(input, ecinum)` | Append ECI header |
+| `QRinput_getVersion(input)` | Get current version |
+| `QRinput_setVersion(input, version)` | Set version (not for MQR) |
+| `QRinput_getErrorCorrectionLevel(input)` | Get current EC level |
+| `QRinput_setErrorCorrectionLevel(input, level)` | Set EC level (not for MQR) |
+| `QRinput_setVersionAndErrorCorrectionLevel(input, version, level)` | Set both (recommended for MQR) |
+| `QRinput_free(input)` | Free input and all chunks |
+| `QRinput_check(mode, size, data)` | Validate input data |
+| `QRinput_setFNC1First(input)` | Set FNC1 first position flag |
+| `QRinput_setFNC1Second(input, appid)` | Set FNC1 second position with app ID |
+
+### Structured Append
+
+| Function | Purpose |
+|---|---|
+| `QRinput_Struct_new()` | Create structured input set |
+| `QRinput_Struct_setParity(s, parity)` | Set parity for structured symbols |
+| `QRinput_Struct_appendInput(s, input)` | Append QRinput to set |
+| `QRinput_Struct_free(s)` | Free all inputs in set |
+| `QRinput_splitQRinputToStruct(input)` | Auto-split input into structured set |
+| `QRinput_Struct_insertStructuredAppendHeaders(s)` | Insert SA headers |
+
+### Encoding (Simple API)
+
+| Function | Purpose |
+|---|---|
+| `QRcode_encodeString(string, version, level, hint, casesensitive)` | Auto-parse and encode string |
+| `QRcode_encodeString8bit(string, version, level)` | Encode string as 8-bit |
+| `QRcode_encodeData(size, data, version, level)` | Encode raw byte data |
+| `QRcode_encodeInput(input)` | Encode from QRinput object |
+| `QRcode_free(qrcode)` | Free QRcode result |
+
+### Encoding (Micro QR)
+
+| Function | Purpose |
+|---|---|
+| `QRcode_encodeStringMQR(...)` | Auto-parse string to Micro QR |
+| `QRcode_encodeString8bitMQR(...)` | 8-bit string to Micro QR |
+| `QRcode_encodeDataMQR(...)` | Raw data to Micro QR |
+
+### Encoding (Structured Append)
+
+| Function | Purpose |
+|---|---|
+| `QRcode_encodeInputStructured(s)` | Encode structured input |
+| `QRcode_encodeStringStructured(...)` | Auto-split and encode string |
+| `QRcode_encodeString8bitStructured(...)` | 8-bit structured encoding |
+| `QRcode_encodeDataStructured(...)` | Raw data structured encoding |
+| `QRcode_List_size(qrlist)` | Count symbols in list |
+| `QRcode_List_free(qrlist)` | Free symbol list |
+
+### Utility
+
+| Function | Purpose |
+|---|---|
+| `QRcode_APIVersion(&major, &minor, &micro)` | Get version numbers |
+| `QRcode_APIVersionString()` | Get version string |
+| `QRcode_clearCache()` | Deprecated, no-op |
+
+---
+
+## Source File Inventory
+
+The library consists of the following source files:
+
+### Core Library
+
+| File | Purpose |
+|---|---|
+| `qrencode.h` | Public API header — all external declarations |
+| `qrencode.c` | Core encoding engine — QRRawCode, FrameFiller, QRcode_encode* |
+| `qrencode_inner.h` | Internal header for test access to private types |
+| `qrinput.h` / `qrinput.c` | Input data management, mode encoding, bit stream construction |
+| `bitstream.h` / `bitstream.c` | Binary sequence (bit array) class |
+| `qrspec.h` / `qrspec.c` | QR Code spec tables — capacity, ECC, alignment, frame creation |
+| `mqrspec.h` / `mqrspec.c` | Micro QR Code spec tables and frame creation |
+| `rsecc.h` / `rsecc.c` | Reed-Solomon error correction encoder |
+| `split.h` / `split.c` | Input string splitter — automatic mode detection and optimization |
+| `mask.h` / `mask.c` | Masking for full QR Code — 8 patterns, penalty evaluation |
+| `mmask.h` / `mmask.c` | Masking for Micro QR Code — 4 patterns |
+
+### CLI Tool
+
+| File | Purpose |
+|---|---|
+| `qrenc.c` | Command-line `qrencode` tool — PNG, EPS, SVG, XPM, ANSI, ASCII, UTF-8 output |
+
+### Build System
+
+| File | Purpose |
+|---|---|
+| `CMakeLists.txt` | CMake build configuration |
+| `configure.ac` | Autotools configure script template |
+| `Makefile.am` | Automake makefile template |
+| `autogen.sh` | Script to generate `configure` from `configure.ac` |
+| `libqrencode.pc.in` | pkg-config template |
+| `qrencode.1.in` | Man page template |
+| `cmake/FindIconv.cmake` | CMake module for finding iconv |
+
+### Test Suite
+
+Located in `tests/`:
+
+| File | Tests |
+|---|---|
+| `test_bitstream.c` | BitStream class operations |
+| `test_estimatebit.c` | Bit stream size estimation |
+| `test_qrinput.c` | Input data handling and encoding |
+| `test_qrspec.c` | QR specification tables and frame generation |
+| `test_mqrspec.c` | Micro QR specification |
+| `test_qrencode.c` | End-to-end encoding |
+| `test_split.c` | String splitting and mode optimization |
+| `test_split_urls.c` | URL-specific splitting tests |
+| `test_mask.c` | Mask pattern and penalty evaluation |
+| `test_mmask.c` | Micro QR mask patterns |
+| `test_rs.c` | Reed-Solomon encoder correctness |
+| `test_monkey.c` | Randomized fuzz testing |
+| `prof_qrencode.c` | Performance profiling |
+| `pthread_qrencode.c` | Thread safety testing |
+
+---
+
+## Supported Standards
+
+- **JIS X0510:2004** — "Two dimensional symbol — QR-code — Basic Specification"
+- **ISO/IEC 18004:2006** — "Automatic identification and data capture techniques — QR Code 2005 bar code symbology specification"
+
+The source code frequently references specific sections and tables from these standards. For example:
+- Capacity tables: Table 1 (p.13) and Tables 12-16 (pp.30-36) of JIS X0510:2004
+- Mode indicators: Table 2 of JIS X0510:2004 (p.16)
+- Penalty rules: Section 8.8.2 (p.45) of JIS X0510:2004
+- ECI encoding: Table 4 of JIS X0510:2004 (p.17)
+- Alignment patterns: Table 1 in Appendix E (p.71) of JIS X0510:2004
+- Version information: Table 1 in Appendix D (p.68) of JIS X0510:2004
+- Micro QR format info: Table 10 of Appendix 1 (p.115) of JIS X0510:2004
+
+---
+
+## What Is NOT Supported
+
+The README explicitly lists features not implemented:
+
+- **QR Code Model 1** — The deprecated original model
+- **ECI mode** — Listed as unsupported in README, though the code has partial implementation with `QR_MODE_ECI` and `QRinput_appendECIheader()` / `QRinput_encodeModeECI()`
+- **FNC1 mode** — Similarly listed as unsupported in README, but has code paths for `QR_MODE_FNC1FIRST` and `QR_MODE_FNC1SECOND`
+
+> **Note:** The code contains working implementations for ECI and FNC1 modes despite the README claiming they are unsupported. The README may be outdated — these modes appear functional based on code analysis.
+
+---
+
+## Thread Safety
+
+When built with pthread support (`--enable-thread-safety` for Autotools, or automatic detection in CMake), the library uses a mutex to protect:
+
+1. **Reed-Solomon initialization** — The GF(2^8) lookup tables and generator polynomials are lazily initialized. A `pthread_mutex_t` in `rsecc.c` guards `RSECC_init()` and `generator_init()`.
+
+The `HAVE_LIBPTHREAD` preprocessor macro controls this behavior. Functions marked as "THREAD UNSAFE when pthread is disabled" in the API documentation include all `QRcode_encode*` functions, as they share global RS state without mutex protection when pthread is not available.
+
+---
+
+## Output Formats (CLI Tool)
+
+The `qrencode` CLI tool supports the following output formats via the `-t` flag:
+
+| Format | Description |
+|---|---|
+| `PNG` | Indexed-color PNG (1-bit, palette-based) |
+| `PNG32` | 32-bit RGBA PNG |
+| `EPS` | Encapsulated PostScript |
+| `SVG` | Scalable Vector Graphics (with optional RLE and path mode) |
+| `XPM` | X PixMap format |
+| `ANSI` | ANSI terminal escape codes (16-color) |
+| `ANSI256` | ANSI terminal escape codes (256-color) |
+| `ASCII` | ASCII art (# for black, space for white) |
+| `ASCIIi` | Inverted ASCII art |
+| `UTF8` | Unicode block characters |
+| `UTF8i` | Inverted UTF-8 |
+| `ANSIUTF8` | UTF-8 with ANSI color codes |
+| `ANSIUTF8i` | Inverted UTF-8 with ANSI color codes |
+| `ANSI256UTF8` | UTF-8 with 256-color ANSI codes |
+
+---
+
+## License
+
+The library is licensed under the **GNU Lesser General Public License v2.1** or any later version. The Reed-Solomon encoder is derived from Phil Karn's (KA9Q) FEC library, also under LGPL.
+
+```
+Copyright (C) 2006-2018, 2020 Kentaro Fukuchi <kentaro@fukuchi.org>
+Reed-Solomon: Copyright (C) 2002, 2003, 2004, 2006 Phil Karn, KA9Q
+```
+
+---
+
+## Key Constants
+
+Defined across the headers:
+
+```c
+// qrencode.h
+#define QRSPEC_VERSION_MAX 40 // Maximum QR version
+#define MQRSPEC_VERSION_MAX 4 // Maximum Micro QR version
+
+// qrspec.h
+#define QRSPEC_WIDTH_MAX 177 // Maximum symbol width (version 40)
+
+// mqrspec.h
+#define MQRSPEC_WIDTH_MAX 17 // Maximum Micro QR width (M4)
+
+// qrinput.h
+#define MODE_INDICATOR_SIZE 4 // Bits for mode indicator
+#define STRUCTURE_HEADER_SIZE 20 // Bits for structured append header
+#define MAX_STRUCTURED_SYMBOLS 16 // Max symbols in structured set
+
+// qrspec.h — Mode indicator values
+#define QRSPEC_MODEID_ECI 7
+#define QRSPEC_MODEID_NUM 1
+#define QRSPEC_MODEID_AN 2
+#define QRSPEC_MODEID_8 4
+#define QRSPEC_MODEID_KANJI 8
+#define QRSPEC_MODEID_FNC1FIRST 5
+#define QRSPEC_MODEID_FNC1SECOND 9
+#define QRSPEC_MODEID_STRUCTURE 3
+#define QRSPEC_MODEID_TERMINATOR 0
+
+// mqrspec.h — Micro QR mode indicator values
+#define MQRSPEC_MODEID_NUM 0
+#define MQRSPEC_MODEID_AN 1
+#define MQRSPEC_MODEID_8 2
+#define MQRSPEC_MODEID_KANJI 3
+```
+
+---
+
+## Dependencies
+
+The library itself has **no external dependencies**. Optional dependencies are:
+
+| Dependency | Required For | Detection |
+|---|---|---|
+| **libpng** | PNG output in CLI tool | pkg-config / CMake `find_package` |
+| **SDL 2.0** | `view_qrcode` test viewer | pkg-config |
+| **libiconv** | Decoder in test suite | CMake `find_package` / `AM_ICONV_LINK` |
+| **pthreads** | Thread safety | `AC_CHECK_LIB` / CMake `find_package(Threads)` |
+
+---
+
+## Building
+
+The library supports two build systems:
+
+1. **Autotools** — `./configure && make && make install`
+2. **CMake** — `cmake . && make`
+
+Both produce:
+- `libqrencode.{a,so,dylib}` — The library
+- `qrencode` — The CLI tool (optional)
+- `libqrencode.pc` — pkg-config file
+- `qrencode.1` — Man page
+
+See [building.md](building.md) for detailed build instructions.
+
+---
+
+## Structured Append
+
+Structured Append allows splitting a large data set across multiple QR Code symbols (up to `MAX_STRUCTURED_SYMBOLS` = 16). Each symbol carries a header encoding:
+
+- Total number of symbols (4 bits)
+- Symbol index (4 bits)
+- Parity byte (8 bits)
+
+Total header overhead: 20 bits (`STRUCTURE_HEADER_SIZE`) per symbol.
+
+The library provides both automatic splitting (`QRinput_splitQRinputToStruct()`, `QRcode_encodeStringStructured()`) and manual construction (`QRinput_Struct_new()`, `QRinput_Struct_appendInput()`).
+
+Example from the public API documentation:
+
+```c
+QRcode_List *qrcodes;
+QRcode_List *entry;
+QRcode *qrcode;
+
+qrcodes = QRcode_encodeStringStructured(...);
+entry = qrcodes;
+while(entry != NULL) {
+ qrcode = entry->code;
+ // render qrcode
+ entry = entry->next;
+}
+QRcode_List_free(entry);
+```
+
+---
+
+## Version Auto-Selection
+
+When version is set to 0 (the default for `QRinput_new()`), the library automatically selects the minimum version that can accommodate the input data. This is implemented in `QRinput_estimateVersion()` in `qrinput.c`:
+
+```c
+STATIC_IN_RELEASE int QRinput_estimateVersion(QRinput *input)
+{
+ int bits;
+ int version, prev;
+
+ version = 0;
+ do {
+ prev = version;
+ bits = QRinput_estimateBitStreamSize(input, prev);
+ version = QRspec_getMinimumVersion((bits + 7) / 8, input->level);
+ if(prev == 0 && version > 1) {
+ version--;
+ }
+ } while (version > prev);
+
+ return version;
+}
+```
+
+This iterates because changing the version changes the length indicator sizes, which in turn affects the total bit count. The loop converges when the estimated version matches the previous iteration.
+
+For Micro QR encoding (`QRcode_encodeStringMQR`, `QRcode_encodeDataMQR`), auto-selection works by trying each version from the specified minimum up to `MQRSPEC_VERSION_MAX` (4) until encoding succeeds.
+
+---
+
+## Encoding Pipeline Summary
+
+The high-level data flow from input string to QR Code matrix is:
+
+1. **Input parsing** — `Split_splitStringToQRinput()` analyzes the input and splits it into optimal mode segments (numeric, alphanumeric, 8-bit, Kanji)
+2. **Bit stream construction** — Each segment is encoded according to its mode and appended to a `BitStream`
+3. **Version estimation** — The minimum version is selected based on total bit count and error correction level
+4. **Padding** — Terminator pattern and pad codewords (`0xEC`, `0x11` alternating) are appended
+5. **RS encoding** — Reed-Solomon ECC codewords are computed for each data block via `RSECC_encode()`
+6. **Interleaving** — Data and ECC blocks are interleaved according to the QR spec
+7. **Frame creation** — `QRspec_newFrame()` builds the base frame with finder patterns, timing patterns, alignment patterns, and version information
+8. **Module placement** — `FrameFiller_next()` places data/ECC bits into the frame in the correct zigzag pattern
+9. **Masking** — All 8 mask patterns are applied and evaluated; the one with the lowest penalty score is selected
+10. **Format information** — BCH-encoded format info (EC level + mask) is written into the frame
+11. **Output** — The completed frame is returned as a `QRcode` struct
+
+See [architecture.md](architecture.md) for detailed module relationships and data flow.
diff --git a/docs/handbook/genqrcode/public-api.md b/docs/handbook/genqrcode/public-api.md
new file mode 100644
index 0000000000..3d12824eb2
--- /dev/null
+++ b/docs/handbook/genqrcode/public-api.md
@@ -0,0 +1,912 @@
+# genqrcode / libqrencode — Public API Reference
+
+## Header
+
+All public API declarations are in `qrencode.h`. This is the only header that consumers need to include:
+
+```c
+#include <qrencode.h>
+```
+
+The header is guarded by `QRENCODE_H` and includes `extern "C"` linkage for C++ compatibility.
+
+---
+
+## Types
+
+### QRencodeMode
+
+Encoding mode enumeration:
+
+```c
+typedef enum {
+ QR_MODE_NUL = -1, // Terminator (internal use only)
+ QR_MODE_NUM = 0, // Numeric mode
+ QR_MODE_AN, // Alphabet-numeric mode
+ QR_MODE_8, // 8-bit data mode
+ QR_MODE_KANJI, // Kanji (shift-jis) mode
+ QR_MODE_STRUCTURE, // Internal use only
+ QR_MODE_ECI, // ECI mode
+ QR_MODE_FNC1FIRST, // FNC1, first position
+ QR_MODE_FNC1SECOND, // FNC1, second position
+} QRencodeMode;
+```
+
+**Notes:**
+- `QR_MODE_NUL` and `QR_MODE_STRUCTURE` are for internal use only
+- When using auto-parsing functions (`QRcode_encodeString`), only `QR_MODE_8` and `QR_MODE_KANJI` are valid as hints
+- `QR_MODE_ECI` data must be appended via `QRinput_appendECIheader()`, not `QRinput_append()`
+
+### QRecLevel
+
+Error correction level enumeration:
+
+```c
+typedef enum {
+ QR_ECLEVEL_L = 0, // lowest (~7% recovery)
+ QR_ECLEVEL_M, // (~15% recovery)
+ QR_ECLEVEL_Q, // (~25% recovery)
+ QR_ECLEVEL_H // highest (~30% recovery)
+} QRecLevel;
+```
+
+### QRcode
+
+The encoded QR Code symbol:
+
+```c
+typedef struct {
+ int version; // version of the symbol (1-40 for QR, 1-4 for MQR)
+ int width; // width of the symbol in modules
+ unsigned char *data; // symbol data (width*width bytes)
+} QRcode;
+```
+
+The `data` array has `width * width` entries. Each byte is a module with this bit layout:
+
+```
+MSB 76543210 LSB
+ |||||||`- 1=black/0=white
+ ||||||`-- 1=ecc/0=data code area
+ |||||`--- format information
+ ||||`---- version information
+ |||`----- timing pattern
+ ||`------ alignment pattern
+ |`------- finder pattern and separator
+ `-------- non-data modules (format, timing, etc.)
+```
+
+For rendering, typically only bit 0 is used: `data[y * width + x] & 1`.
+
+### QRcode_List
+
+Singly-linked list of QRcode for structured-append symbols:
+
+```c
+typedef struct _QRcode_List {
+ QRcode *code;
+ struct _QRcode_List *next;
+} QRcode_List;
+```
+
+### QRinput
+
+Opaque input data object:
+
+```c
+typedef struct _QRinput QRinput;
+```
+
+Internally (in `qrinput.h`), this is:
+```c
+struct _QRinput {
+ int version;
+ QRecLevel level;
+ QRinput_List *head;
+ QRinput_List *tail;
+ int mqr;
+ int fnc1;
+ unsigned char appid;
+};
+```
+
+### QRinput_Struct
+
+Opaque structured input set:
+
+```c
+typedef struct _QRinput_Struct QRinput_Struct;
+```
+
+Internally:
+```c
+struct _QRinput_Struct {
+ int size;
+ int parity;
+ QRinput_InputList *head;
+ QRinput_InputList *tail;
+};
+```
+
+---
+
+## Constants
+
+```c
+#define QRSPEC_VERSION_MAX 40 // Maximum QR Code version
+#define MQRSPEC_VERSION_MAX 4 // Maximum Micro QR version
+```
+
+---
+
+## Input Construction Functions
+
+### QRinput_new
+
+```c
+QRinput *QRinput_new(void);
+```
+
+Creates a new input data object with version set to 0 (auto-select) and error correction level set to `QR_ECLEVEL_L`.
+
+**Returns:** Input object, or `NULL` on error (sets `errno` to `ENOMEM`).
+
+**Example:**
+```c
+QRinput *input = QRinput_new();
+if(input == NULL) {
+ perror("QRinput_new");
+ exit(1);
+}
+// Append data, encode, then free
+QRinput_free(input);
+```
+
+### QRinput_new2
+
+```c
+QRinput *QRinput_new2(int version, QRecLevel level);
+```
+
+Creates a new input data object with explicit version and error correction level.
+
+**Parameters:**
+- `version` — Version number (0 for auto-select, 1–40)
+- `level` — Error correction level (`QR_ECLEVEL_L` through `QR_ECLEVEL_H`)
+
+**Returns:** Input object, or `NULL` on error.
+
+**Errors:**
+- `ENOMEM` — Unable to allocate memory
+- `EINVAL` — Invalid version (< 0 or > 40) or level
+
+**Example:**
+```c
+QRinput *input = QRinput_new2(5, QR_ECLEVEL_M);
+```
+
+### QRinput_newMQR
+
+```c
+QRinput *QRinput_newMQR(int version, QRecLevel level);
+```
+
+Creates a Micro QR Code input object. **Version must be specified** (> 0), unlike standard QR where 0 means auto.
+
+**Parameters:**
+- `version` — Version number (1–4)
+- `level` — Error correction level. Valid combinations:
+ - M1: error detection only (level is ignored)
+ - M2: `QR_ECLEVEL_L`, `QR_ECLEVEL_M`
+ - M3: `QR_ECLEVEL_L`, `QR_ECLEVEL_M`
+ - M4: `QR_ECLEVEL_L`, `QR_ECLEVEL_M`, `QR_ECLEVEL_Q`
+
+**Returns:** Input object (with `mqr` flag set), or `NULL`.
+
+**Errors:**
+- `EINVAL` — Invalid version/level combination (checks `MQRspec_getECCLength()` != 0)
+- `ENOMEM` — Unable to allocate memory
+
+**Example:**
+```c
+QRinput *input = QRinput_newMQR(3, QR_ECLEVEL_L);
+```
+
+### QRinput_append
+
+```c
+int QRinput_append(QRinput *input, QRencodeMode mode, int size,
+ const unsigned char *data);
+```
+
+Appends a data chunk to the input object. The data is **copied** — the caller retains ownership of the `data` buffer.
+
+**Parameters:**
+- `input` — Input object
+- `mode` — Encoding mode (`QR_MODE_NUM`, `QR_MODE_AN`, `QR_MODE_8`, `QR_MODE_KANJI`, `QR_MODE_STRUCTURE`, `QR_MODE_ECI`, `QR_MODE_FNC1FIRST`, `QR_MODE_FNC1SECOND`)
+- `size` — Size of data in bytes
+- `data` — Pointer to input data
+
+**Returns:** 0 on success, -1 on error.
+
+**Errors:**
+- `EINVAL` — Invalid data for the specified mode (e.g., non-digit data with `QR_MODE_NUM`)
+- `ENOMEM` — Unable to allocate memory
+
+**Validation rules per mode:**
+- `QR_MODE_NUM`: All bytes must be '0'–'9'
+- `QR_MODE_AN`: All bytes must pass `QRinput_lookAnTable()` (digits, uppercase letters, space, $, %, *, +, -, ., /, :)
+- `QR_MODE_KANJI`: Even number of bytes, each pair must be valid Shift-JIS range (0x8140–0x9FFC or 0xE040–0xEBBF)
+- `QR_MODE_8`: No validation (any bytes accepted)
+- `QR_MODE_FNC1SECOND`: Size must be exactly 1
+
+**Example:**
+```c
+QRinput *input = QRinput_new2(0, QR_ECLEVEL_M);
+
+// Append numeric data
+const unsigned char num[] = "12345";
+QRinput_append(input, QR_MODE_NUM, 5, num);
+
+// Append 8-bit data
+const unsigned char text[] = "Hello, World!";
+QRinput_append(input, QR_MODE_8, 13, text);
+
+QRcode *code = QRcode_encodeInput(input);
+QRinput_free(input);
+```
+
+### QRinput_appendECIheader
+
+```c
+int QRinput_appendECIheader(QRinput *input, unsigned int ecinum);
+```
+
+Appends an Extended Channel Interpretation (ECI) header.
+
+**Parameters:**
+- `input` — Input object
+- `ecinum` — ECI indicator number (0–999999)
+
+**Returns:** 0 on success, -1 on error.
+
+**Errors:**
+- `EINVAL` — `ecinum` > 999999
+
+**Note:** Internally creates a 4-byte little-endian representation of `ecinum` and appends as `QR_MODE_ECI`.
+
+**Example:**
+```c
+// Set ECI to UTF-8 (ECI 000026)
+QRinput_appendECIheader(input, 26);
+QRinput_append(input, QR_MODE_8, strlen(utf8_str), (unsigned char *)utf8_str);
+```
+
+### QRinput_getVersion
+
+```c
+int QRinput_getVersion(QRinput *input);
+```
+
+Returns the current version number of the input object.
+
+**Returns:** Version number (0 for auto-select, 1–40 for QR, 1–4 for MQR).
+
+### QRinput_setVersion
+
+```c
+int QRinput_setVersion(QRinput *input, int version);
+```
+
+Sets the version number. **Cannot be used with Micro QR objects.**
+
+**Parameters:**
+- `version` — 0 for auto-select, 1–40 for explicit version
+
+**Returns:** 0 on success, -1 on error (`EINVAL`).
+
+### QRinput_getErrorCorrectionLevel
+
+```c
+QRecLevel QRinput_getErrorCorrectionLevel(QRinput *input);
+```
+
+Returns the current error correction level.
+
+### QRinput_setErrorCorrectionLevel
+
+```c
+int QRinput_setErrorCorrectionLevel(QRinput *input, QRecLevel level);
+```
+
+Sets the error correction level. **Cannot be used with Micro QR objects.**
+
+**Returns:** 0 on success, -1 on error (`EINVAL`).
+
+### QRinput_setVersionAndErrorCorrectionLevel
+
+```c
+int QRinput_setVersionAndErrorCorrectionLevel(QRinput *input, int version,
+ QRecLevel level);
+```
+
+Sets both version and error correction level at once. **Recommended for Micro QR**, as it validates the combination.
+
+**Returns:** 0 on success, -1 on error (`EINVAL`).
+
+### QRinput_free
+
+```c
+void QRinput_free(QRinput *input);
+```
+
+Frees the input object and all data chunks contained within it. Safe to call with `NULL`.
+
+### QRinput_check
+
+```c
+int QRinput_check(QRencodeMode mode, int size, const unsigned char *data);
+```
+
+Validates input data for the given mode without modifying any objects.
+
+**Returns:** 0 if valid, -1 if invalid.
+
+### QRinput_setFNC1First
+
+```c
+int QRinput_setFNC1First(QRinput *input);
+```
+
+Sets the FNC1 first position flag for GS1 compatibility.
+
+### QRinput_setFNC1Second
+
+```c
+int QRinput_setFNC1Second(QRinput *input, unsigned char appid);
+```
+
+Sets the FNC1 second position flag with the given application identifier.
+
+---
+
+## Structured Append Functions
+
+### QRinput_Struct_new
+
+```c
+QRinput_Struct *QRinput_Struct_new(void);
+```
+
+Creates a new structured input set for generating linked QR Code symbols.
+
+**Returns:** Instance of `QRinput_Struct`, or `NULL` (`ENOMEM`).
+
+### QRinput_Struct_setParity
+
+```c
+void QRinput_Struct_setParity(QRinput_Struct *s, unsigned char parity);
+```
+
+Manually sets the parity byte for the structured symbol set. If not called, parity is calculated automatically by `QRinput_Struct_insertStructuredAppendHeaders()`.
+
+### QRinput_Struct_appendInput
+
+```c
+int QRinput_Struct_appendInput(QRinput_Struct *s, QRinput *input);
+```
+
+Appends a `QRinput` to the structured set. Rejects Micro QR inputs.
+
+**Warning:** Never append the same `QRinput` object twice.
+
+**Returns:** Number of inputs in the structure (> 0), or -1 on error.
+
+**Errors:**
+- `EINVAL` — NULL input or MQR input
+- `ENOMEM` — Unable to allocate memory
+
+### QRinput_Struct_free
+
+```c
+void QRinput_Struct_free(QRinput_Struct *s);
+```
+
+Frees the structured set and **all QRinput objects** contained within.
+
+### QRinput_splitQRinputToStruct
+
+```c
+QRinput_Struct *QRinput_splitQRinputToStruct(QRinput *input);
+```
+
+Automatically splits a single `QRinput` into multiple inputs suitable for structured append. Calculates parity, sets it, and inserts structured-append headers.
+
+**Prerequisites:** Version and error correction level must be set on `input`.
+
+**Returns:** A `QRinput_Struct`, or `NULL` on error.
+
+**Errors:**
+- `ERANGE` — Input data too large
+- `EINVAL` — Invalid input
+- `ENOMEM` — Unable to allocate memory
+
+### QRinput_Struct_insertStructuredAppendHeaders
+
+```c
+int QRinput_Struct_insertStructuredAppendHeaders(QRinput_Struct *s);
+```
+
+Inserts structured-append headers into each QRinput in the set. Calculates parity if not already set. **Call this exactly once before encoding.**
+
+**Returns:** 0 on success, -1 on error.
+
+---
+
+## Encoding Functions — Standard QR
+
+### QRcode_encodeInput
+
+```c
+QRcode *QRcode_encodeInput(QRinput *input);
+```
+
+Encodes a manually constructed `QRinput` into a QR Code. Dispatches to `QRcode_encodeMask()` or `QRcode_encodeMaskMQR()` based on the input's `mqr` flag.
+
+**Warning:** THREAD UNSAFE when pthread is disabled.
+
+**Returns:** `QRcode` instance. The result version may be larger than specified.
+
+**Errors:**
+- `EINVAL` — Invalid input object
+- `ENOMEM` — Unable to allocate memory
+
+**Example:**
+```c
+QRinput *input = QRinput_new2(0, QR_ECLEVEL_H);
+QRinput_append(input, QR_MODE_8, 11, (unsigned char *)"Hello World");
+QRcode *qr = QRcode_encodeInput(input);
+// Use qr->data, qr->width, qr->version
+QRcode_free(qr);
+QRinput_free(input);
+```
+
+### QRcode_encodeString
+
+```c
+QRcode *QRcode_encodeString(const char *string, int version, QRecLevel level,
+ QRencodeMode hint, int casesensitive);
+```
+
+The primary high-level encoding function. Automatically parses the input string and selects optimal encoding modes.
+
+**Parameters:**
+- `string` — NUL-terminated input string
+- `version` — Minimum version (0 for auto)
+- `level` — Error correction level
+- `hint` — Only `QR_MODE_8` or `QR_MODE_KANJI` are valid:
+ - `QR_MODE_KANJI`: Assumes Shift-JIS input, encodes Kanji characters in Kanji mode
+ - `QR_MODE_8`: All non-alphanumeric characters encoded as 8-bit (use for UTF-8)
+- `casesensitive` — 1 for case-sensitive, 0 to convert lowercase to uppercase (enables more AN-mode encoding)
+
+**Returns:** `QRcode` instance or `NULL`.
+
+**Errors:**
+- `EINVAL` — NULL string or invalid hint (not `QR_MODE_8` or `QR_MODE_KANJI`)
+- `ENOMEM` — Unable to allocate memory
+- `ERANGE` — Input too large for any version
+
+**Implementation:** Internally calls `Split_splitStringToQRinput()` to optimize mode selection, then `QRcode_encodeInput()`.
+
+**Example:**
+```c
+// Simple encoding
+QRcode *qr = QRcode_encodeString("https://example.com", 0,
+ QR_ECLEVEL_M, QR_MODE_8, 1);
+
+// Case-insensitive (more compact)
+QRcode *qr = QRcode_encodeString("HELLO123", 0,
+ QR_ECLEVEL_L, QR_MODE_8, 0);
+
+// With Kanji support
+QRcode *qr = QRcode_encodeString(sjis_string, 0,
+ QR_ECLEVEL_M, QR_MODE_KANJI, 1);
+```
+
+### QRcode_encodeString8bit
+
+```c
+QRcode *QRcode_encodeString8bit(const char *string, int version, QRecLevel level);
+```
+
+Encodes the entire string in 8-bit mode without mode optimization.
+
+**Parameters:**
+- `string` — NUL-terminated input string
+- `version` — Minimum version (0 for auto)
+- `level` — Error correction level
+
+**Returns:** `QRcode` instance or `NULL`.
+
+**Note:** Internally calls `QRcode_encodeData()` with `strlen(string)`.
+
+### QRcode_encodeData
+
+```c
+QRcode *QRcode_encodeData(int size, const unsigned char *data, int version,
+ QRecLevel level);
+```
+
+Encodes raw byte data (may include NUL bytes) in 8-bit mode.
+
+**Parameters:**
+- `size` — Size of input data in bytes
+- `data` — Pointer to input data
+- `version` — Minimum version (0 for auto)
+- `level` — Error correction level
+
+**Returns:** `QRcode` instance or `NULL`.
+
+**Example:**
+```c
+// Encoding binary data (may contain NUL)
+unsigned char binary[] = {0x00, 0xFF, 0x42, 0x00, 0x7A};
+QRcode *qr = QRcode_encodeData(5, binary, 0, QR_ECLEVEL_L);
+```
+
+### QRcode_free
+
+```c
+void QRcode_free(QRcode *qrcode);
+```
+
+Frees a `QRcode` instance, including the internal `data` array. Safe to call with `NULL`.
+
+---
+
+## Encoding Functions — Micro QR
+
+### QRcode_encodeStringMQR
+
+```c
+QRcode *QRcode_encodeStringMQR(const char *string, int version, QRecLevel level,
+ QRencodeMode hint, int casesensitive);
+```
+
+Micro QR version of `QRcode_encodeString()`. If `version` is 0, it tries versions 1 through 4 until encoding succeeds.
+
+**Implementation detail:** Loops from `version` to `MQRSPEC_VERSION_MAX`:
+```c
+for(i = version; i <= MQRSPEC_VERSION_MAX; i++) {
+ QRcode *code = QRcode_encodeStringReal(string, i, level, 1, hint, casesensitive);
+ if(code != NULL) return code;
+}
+```
+
+### QRcode_encodeString8bitMQR
+
+```c
+QRcode *QRcode_encodeString8bitMQR(const char *string, int version, QRecLevel level);
+```
+
+8-bit encoding for Micro QR. Tries versions incrementally.
+
+### QRcode_encodeDataMQR
+
+```c
+QRcode *QRcode_encodeDataMQR(int size, const unsigned char *data, int version,
+ QRecLevel level);
+```
+
+Raw data encoding for Micro QR. Tries versions incrementally.
+
+---
+
+## Encoding Functions — Structured Append
+
+### QRcode_encodeInputStructured
+
+```c
+QRcode_List *QRcode_encodeInputStructured(QRinput_Struct *s);
+```
+
+Encodes each `QRinput` in a structured set into a linked list of `QRcode` objects.
+
+**Returns:** Head of `QRcode_List`, or `NULL` on error.
+
+### QRcode_encodeStringStructured
+
+```c
+QRcode_List *QRcode_encodeStringStructured(const char *string, int version,
+ QRecLevel level, QRencodeMode hint,
+ int casesensitive);
+```
+
+Auto-splits a string and encodes as structured-append symbols.
+
+**Parameters:** Same as `QRcode_encodeString()`, but `version` must be specified (non-zero).
+
+**Returns:** Head of `QRcode_List`.
+
+**Example:**
+```c
+QRcode_List *list = QRcode_encodeStringStructured(long_text, 5,
+ QR_ECLEVEL_M, QR_MODE_8, 1);
+QRcode_List *entry = list;
+int i = 1;
+while(entry != NULL) {
+ QRcode *qr = entry->code;
+ printf("Symbol %d: version=%d, width=%d\n", i, qr->version, qr->width);
+ // render qr
+ entry = entry->next;
+ i++;
+}
+QRcode_List_free(list);
+```
+
+### QRcode_encodeString8bitStructured
+
+```c
+QRcode_List *QRcode_encodeString8bitStructured(const char *string, int version,
+ QRecLevel level);
+```
+
+8-bit structured encoding.
+
+### QRcode_encodeDataStructured
+
+```c
+QRcode_List *QRcode_encodeDataStructured(int size, const unsigned char *data,
+ int version, QRecLevel level);
+```
+
+Raw data structured encoding.
+
+### QRcode_List_size
+
+```c
+int QRcode_List_size(QRcode_List *qrlist);
+```
+
+Returns the number of QRcode objects in the linked list.
+
+### QRcode_List_free
+
+```c
+void QRcode_List_free(QRcode_List *qrlist);
+```
+
+Frees the entire linked list and all contained `QRcode` objects.
+
+---
+
+## Utility Functions
+
+### QRcode_APIVersion
+
+```c
+void QRcode_APIVersion(int *major_version, int *minor_version, int *micro_version);
+```
+
+Retrieves the library version as three integers.
+
+**Example:**
+```c
+int major, minor, micro;
+QRcode_APIVersion(&major, &minor, &micro);
+printf("libqrencode %d.%d.%d\n", major, minor, micro);
+// Output: libqrencode 4.1.1
+```
+
+### QRcode_APIVersionString
+
+```c
+char *QRcode_APIVersionString(void);
+```
+
+Returns a string identifying the library version. The string is held by the library — do **NOT** free it.
+
+**Returns:** Version string (e.g., `"4.1.1"`).
+
+### QRcode_clearCache
+
+```c
+void QRcode_clearCache(void); // DEPRECATED
+```
+
+A deprecated no-op function. Previously cleared internal frame caches. Marked with `__attribute__((deprecated))` on GCC.
+
+---
+
+## Complete Usage Examples
+
+### Minimal Example
+
+```c
+#include <stdio.h>
+#include <qrencode.h>
+
+int main(void) {
+ QRcode *qr = QRcode_encodeString("Hello", 0, QR_ECLEVEL_L, QR_MODE_8, 1);
+ if(qr == NULL) {
+ perror("encoding failed");
+ return 1;
+ }
+
+ printf("Version: %d, Width: %d\n", qr->version, qr->width);
+
+ for(int y = 0; y < qr->width; y++) {
+ for(int x = 0; x < qr->width; x++) {
+ printf("%s", (qr->data[y * qr->width + x] & 1) ? "##" : " ");
+ }
+ printf("\n");
+ }
+
+ QRcode_free(qr);
+ return 0;
+}
+```
+
+### Manual Input Construction
+
+```c
+#include <qrencode.h>
+
+int main(void) {
+ QRinput *input = QRinput_new2(0, QR_ECLEVEL_M);
+
+ // Mixed-mode encoding: numbers in numeric mode, text in 8-bit
+ QRinput_append(input, QR_MODE_NUM, 10, (unsigned char *)"0123456789");
+ QRinput_append(input, QR_MODE_8, 5, (unsigned char *)"Hello");
+ QRinput_append(input, QR_MODE_NUM, 3, (unsigned char *)"999");
+
+ QRcode *qr = QRcode_encodeInput(input);
+ // ... use qr ...
+
+ QRcode_free(qr);
+ QRinput_free(input);
+ return 0;
+}
+```
+
+### ECI Header for UTF-8
+
+```c
+#include <qrencode.h>
+
+int main(void) {
+ QRinput *input = QRinput_new2(0, QR_ECLEVEL_M);
+
+ // Add ECI header for UTF-8 (ECI 000026)
+ QRinput_appendECIheader(input, 26);
+
+ // Append UTF-8 data
+ const char *utf8 = "こんにちは"; // UTF-8 encoded
+ QRinput_append(input, QR_MODE_8, strlen(utf8), (unsigned char *)utf8);
+
+ QRcode *qr = QRcode_encodeInput(input);
+ // ... use qr ...
+
+ QRcode_free(qr);
+ QRinput_free(input);
+ return 0;
+}
+```
+
+### Structured Append
+
+```c
+#include <qrencode.h>
+
+int main(void) {
+ // Automatically split and encode
+ const char *long_data = "...very long text...";
+ QRcode_List *list = QRcode_encodeStringStructured(
+ long_data, 5, QR_ECLEVEL_M, QR_MODE_8, 1);
+
+ if(list == NULL) {
+ perror("structured encoding failed");
+ return 1;
+ }
+
+ printf("Total symbols: %d\n", QRcode_List_size(list));
+
+ QRcode_List *entry = list;
+ while(entry != NULL) {
+ // Render entry->code
+ entry = entry->next;
+ }
+
+ QRcode_List_free(list);
+ return 0;
+}
+```
+
+### Micro QR
+
+```c
+#include <qrencode.h>
+
+int main(void) {
+ // Encode short data in Micro QR
+ QRcode *qr = QRcode_encodeStringMQR("12345", 0, QR_ECLEVEL_L,
+ QR_MODE_8, 1);
+ if(qr != NULL) {
+ printf("MQR version: M%d, width: %d\n", qr->version, qr->width);
+ QRcode_free(qr);
+ }
+ return 0;
+}
+```
+
+### Binary Data with NUL Bytes
+
+```c
+#include <qrencode.h>
+
+int main(void) {
+ // QRcode_encodeString cannot handle NUL bytes — use QRcode_encodeData
+ unsigned char binary_data[] = {0x48, 0x65, 0x00, 0x6C, 0x6F};
+ QRcode *qr = QRcode_encodeData(5, binary_data, 0, QR_ECLEVEL_L);
+
+ if(qr != NULL) {
+ // ... use qr ...
+ QRcode_free(qr);
+ }
+ return 0;
+}
+```
+
+### FNC1 (GS1) Mode
+
+```c
+#include <qrencode.h>
+
+int main(void) {
+ QRinput *input = QRinput_new2(0, QR_ECLEVEL_M);
+
+ // Set GS1 mode
+ QRinput_setFNC1First(input);
+
+ // Append GS1 data
+ const char *gs1 = "(01)12345678901234";
+ QRinput_append(input, QR_MODE_8, strlen(gs1), (unsigned char *)gs1);
+
+ QRcode *qr = QRcode_encodeInput(input);
+ // ...
+ QRcode_free(qr);
+ QRinput_free(input);
+ return 0;
+}
+```
+
+---
+
+## Thread Safety Notes
+
+All `QRcode_encode*` functions are documented as:
+
+> **Warning:** This function is THREAD UNSAFE when pthread is disabled.
+
+When built with pthread support (`HAVE_LIBPTHREAD`):
+- The Reed-Solomon initialization is protected by `RSECC_mutex`
+- Multiple threads can safely call encoding functions concurrently
+
+When built without pthread support:
+- The lazy initialization of GF(2^8) tables and generator polynomials is not thread-safe
+- Concurrent calls may corrupt internal state
+- Use external synchronization or call `QRcode_encodeString()` once from the main thread before spawning worker threads (to trigger initialization)
+
+---
+
+## Error Convention Summary
+
+| Return Type | Success | Failure |
+|---|---|---|
+| `QRcode *` | Valid pointer | `NULL` (check `errno`) |
+| `QRcode_List *` | Valid pointer | `NULL` (check `errno`) |
+| `QRinput *` | Valid pointer | `NULL` (check `errno`) |
+| `QRinput_Struct *` | Valid pointer | `NULL` (check `errno`) |
+| `int` (status) | `0` | `-1` (check `errno`) |
+| `int` (count) | `> 0` | `-1` (check `errno`) |
+| `char *` (version string) | Non-NULL | N/A (always succeeds) |
diff --git a/docs/handbook/genqrcode/reed-solomon.md b/docs/handbook/genqrcode/reed-solomon.md
new file mode 100644
index 0000000000..ff84c2f772
--- /dev/null
+++ b/docs/handbook/genqrcode/reed-solomon.md
@@ -0,0 +1,347 @@
+# genqrcode / libqrencode — Reed-Solomon Internals
+
+## Overview
+
+The `rsecc.c` module implements a systematic Reed-Solomon encoder over GF(2^8). It is the sole error-correction engine used by libqrencode for both full QR Code and Micro QR Code. The module provides a single public function, `RSECC_encode()`, and manages all GF arithmetic, generator polynomials, and thread safety internally.
+
+---
+
+## Galois Field GF(2^8)
+
+### Primitive Polynomial
+
+```c
+static int proot = 0x11d; // x^8 + x^4 + x^3 + x^2 + 1
+```
+
+This is the standard QR Code primitive polynomial (identical to the one used in AES). In binary: `100011101`. It is irreducible over GF(2) and generates a field of 256 elements.
+
+### Log and Antilog Tables
+
+Two 256-entry lookup tables enable O(1) multiplication in GF(2^8):
+
+```c
+static unsigned char alpha[256]; // antilog: alpha[i] = α^i
+static unsigned char aindex[256]; // log: aindex[α^i] = i
+```
+
+**Initialization** (`RSECC_init()`):
+
+```c
+static void RSECC_init(void)
+{
+ int i, b;
+
+ alpha[0] = 1; // α^0 = 1
+ aindex[0] = 0; // log(0) = undefined, set to 0
+ aindex[1] = 0; // log(1) = 0
+
+ for(i = 1; i < 255; i++) {
+ b = alpha[i-1] << 1; // multiply by x
+ if(b & 0x100) {
+ b ^= proot; // reduce mod primitive
+ }
+ alpha[i] = (unsigned char)b;
+ aindex[b] = i;
+ }
+}
+```
+
+After initialization:
+- `alpha[0] = 1, alpha[1] = 2, alpha[2] = 4, ..., alpha[7] = 128, alpha[8] = 29, ...`
+- `alpha[255]` is not computed (wraps to `alpha[0]`)
+- `aindex[0] = 0` is a sentinel (log of 0 is undefined in GF)
+
+**GF multiplication**: To multiply `a * b`:
+```
+result = alpha[(aindex[a] + aindex[b]) % 255]
+```
+Special case: if either operand is 0, result is 0.
+
+---
+
+## Generator Polynomials
+
+### Theory
+
+For `t` ECC codewords, the generator polynomial is:
+
+$$g(x) = (x + \alpha^0)(x + \alpha^1) \cdots (x + \alpha^{t-1})$$
+
+This produces a degree-`t` polynomial. The encoder divides the data polynomial by `g(x)` and the remainder becomes the ECC codewords.
+
+### Cache
+
+Generator polynomials are cached to avoid recomputation:
+
+```c
+static int generator_initialized[29] = {0}; // flags for el=2..30
+static unsigned char generator[29][31]; // polynomial coefficients
+```
+
+The array is indexed by `ecc_length - 2`, supporting ECC lengths 2 through 30. QR Code never needs more than 30 ECC codewords per block.
+
+### Construction
+
+`generator_init()` builds the polynomial incrementally:
+
+```c
+static void generator_init(int el)
+{
+ int i, j;
+ unsigned char *g = generator[el - 2];
+
+ g[0] = 1; // start with g(x) = 1
+
+ for(i = 0; i < el; i++) {
+ // multiply g(x) by (x + α^i)
+ g[i+1] = 1;
+ for(j = i; j > 0; j--) {
+ if(g[j] != 0) {
+ g[j] = g[j-1] ^ alpha[(aindex[g[j]] + i) % 255];
+ } else {
+ g[j] = g[j-1];
+ }
+ }
+ g[0] = alpha[(aindex[g[0]] + i) % 255];
+ }
+}
+```
+
+**Walk-through for `el = 2`** (2 ECC codewords):
+
+1. Start: g = [1]
+2. Multiply by (x + α^0) = (x + 1): g = [1, 1] → coefficients of x + 1
+3. Multiply by (x + α^1) = (x + 2): g = [2, 3, 1] → coefficients of 2x^2 + 3x + 1
+
+Wait — actually the coefficients are stored in reverse order (constant term first):
+- `g[0]` = constant term
+- `g[el]` = leading coefficient (always 1)
+
+**Walk-through for `el = 4`** (4 ECC codewords):
+1. g(x) = 1
+2. g(x) = (x + 1) → g(x) = x + 1
+3. g(x) *= (x + α) → g(x) = x^2 + 3x + 2
+4. g(x) *= (x + α^2) → g(x) = x^3 + (some)x^2 + (some)x + (some)
+5. g(x) *= (x + α^3) → degree-4 polynomial
+
+---
+
+## The Encode Function
+
+```c
+int RSECC_encode(int data_length, int ecc_length,
+ const unsigned char *data, unsigned char *ecc)
+```
+
+**Parameters:**
+- `data_length` — Number of data codewords
+- `ecc_length` — Number of ECC codewords to generate (2–30)
+- `data` — Input data codewords
+- `ecc` — Output buffer for ECC codewords (must be pre-allocated, `ecc_length` bytes)
+
+**Returns:** 0 on success, -1 on failure.
+
+### Thread Safety
+
+```c
+#ifdef HAVE_LIBPTHREAD
+static pthread_mutex_t RSECC_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+int RSECC_encode(int data_length, int ecc_length,
+ const unsigned char *data, unsigned char *ecc)
+{
+ int i, j;
+ unsigned char feedback;
+ unsigned char *gen;
+
+#ifdef HAVE_LIBPTHREAD
+ pthread_mutex_lock(&RSECC_mutex);
+#endif
+
+ if(!generator_initialized[ecc_length - 2]) {
+ if(!initialized) {
+ RSECC_init();
+ initialized = 1;
+ }
+ generator_init(ecc_length);
+ generator_initialized[ecc_length - 2] = 1;
+ }
+
+#ifdef HAVE_LIBPTHREAD
+ pthread_mutex_unlock(&RSECC_mutex);
+#endif
+```
+
+The mutex protects **only the initialization** path. Once tables are initialized, subsequent calls proceed without locking.
+
+### Encoding Algorithm
+
+```c
+ gen = generator[ecc_length - 2];
+
+ memset(ecc, 0, ecc_length);
+ for(i = 0; i < data_length; i++) {
+ feedback = aindex[data[i] ^ ecc[0]];
+ if(feedback != 255) {
+ for(j = 1; j < ecc_length; j++) {
+ ecc[j] ^= alpha[(feedback + aindex[gen[ecc_length - j]]) % 255];
+ }
+ }
+ memmove(&ecc[0], &ecc[1], ecc_length - 1);
+ if(feedback != 255) {
+ ecc[ecc_length - 1] = alpha[(feedback + aindex[gen[0]]) % 255];
+ } else {
+ ecc[ecc_length - 1] = 0;
+ }
+ }
+
+ return 0;
+}
+```
+
+This is a standard LFSR (linear feedback shift register) implementation of systematic RS encoding:
+
+1. **For each data byte `data[i]`:**
+ - XOR with current first ECC byte: `data[i] ^ ecc[0]`
+ - Convert to log domain: `feedback = aindex[...]`
+ - If non-zero (`!= 255` sentinel):
+ - Multiply feedback by each generator coefficient and XOR into corresponding ECC position
+ - Shift ECC register left by one (via `memmove`)
+ - Set last ECC position to `α^(feedback + log(g[0]))` or 0
+
+2. **After processing all data bytes**, `ecc[]` contains the RS remainder — the ECC codewords.
+
+### Why `feedback != 255`?
+
+The check `feedback != 255` is a zero-check. When `data[i] ^ ecc[0] == 0`, its log is undefined. The code uses 255 as a sentinel since `α^255 = α^0 = 1` (wraps around), but 0 has no valid logarithm. So when `aindex[0]` returns 0 (the initialized sentinel), the specific case where the XOR result is actually 0 must be caught.
+
+Actually, looking more carefully: `aindex[0] = 0`, and `alpha[0] = 1`, so `aindex[0]` doesn't return 255. The check `feedback != 255` would only trigger if `data[i] ^ ecc[0]` maps to index 255 in the log table. Since `alpha[254]` is the last computed value and `alpha[255]` wraps, the value 255 in `aindex` isn't actually reached for any valid input. The zero case is when `data[i] ^ ecc[0] == 0`, giving `feedback = aindex[0] = 0`, which is non-zero and proceeds normally. The `!= 255` check is a safety guard for the edge case.
+
+---
+
+## How RS Blocks Are Created
+
+In `qrencode.c`, `QRraw_new()` creates RS blocks:
+
+```c
+static QRRawCode *QRraw_new(QRinput *input) {
+ QRRawCode *raw;
+ int spec[5];
+
+ QRspec_getEccSpec(input->version, input->level, spec);
+
+ raw->dataLength = QRspec_getDataLength(input->version, input->level);
+ raw->eccLength = QRspec_getECCLength(input->version, input->level);
+ raw->b1 = QRspec_rsBlockNum1(spec);
+ raw->b2 = QRspec_rsBlockNum2(spec);
+ raw->rsblock_num = raw->b1 + raw->b2;
+ raw->rsblock = calloc(raw->rsblock_num, sizeof(RSblock));
+
+ unsigned char *datacode = QRinput_getByteStream(input);
+ unsigned char *ecccode = malloc(raw->eccLength);
+
+ int dl1 = QRspec_rsDataCodes1(spec);
+ int el = QRspec_rsEccCodes1(spec); // same for both groups
+ int dl2 = QRspec_rsDataCodes2(spec);
+
+ unsigned char *dp = datacode;
+ unsigned char *ep = ecccode;
+
+ // Initialize group 1 blocks
+ for(int i = 0; i < raw->b1; i++) {
+ RSblock_initBlock(&raw->rsblock[i], dl1, dp, el, ep, RSECC_encode);
+ dp += dl1;
+ ep += el;
+ }
+
+ // Initialize group 2 blocks (if any)
+ for(int i = 0; i < raw->b2; i++) {
+ RSblock_initBlock(&raw->rsblock[raw->b1 + i], dl2, dp, el, ep, RSECC_encode);
+ dp += dl2;
+ ep += el;
+ }
+
+ return raw;
+}
+```
+
+`RSblock_initBlock()` calls `RSECC_encode()` directly:
+
+```c
+static int RSblock_initBlock(RSblock *block, int dl, unsigned char *data,
+ int el, unsigned char *ecc,
+ RSECC_encoder encoder)
+{
+ block->dataLength = dl;
+ block->data = data;
+ block->eccLength = el;
+ block->ecc = ecc;
+
+ return encoder(dl, el, data, ecc);
+}
+```
+
+There is no decoding — libqrencode only encodes. RS decoding is the scanner's responsibility.
+
+---
+
+## ECC Length Range
+
+The generator cache supports ECC lengths 2–30:
+
+```c
+static int generator_initialized[29] = {0}; // indices 0..28 → el 2..30
+static unsigned char generator[29][31]; // max degree 30, 31 coefficients
+```
+
+From the `eccTable` in `qrspec.c`, the maximum ECC per block is 30 codewords (used in version 40 at all EC levels). The minimum is 7 (version 1, level L).
+
+For Micro QR, all versions use a single block with small ECC counts (3, 5, 6, 8, 10, or 14 depending on version/level).
+
+---
+
+## Performance Characteristics
+
+- **Table initialization**: One-time cost. GF tables take ~510 bytes. Generator polynomials up to ~930 bytes total.
+- **Encoding**: O(n × t) where n = data length, t = ECC length. The inner loop does 1 log lookup, 1 addition mod 255, 1 antilog lookup, and 1 XOR per generator coefficient.
+- **Memory**: The `memmove()` in each iteration shifts `ecc_length - 1` bytes, which for typical blocks (el ≤ 30) is negligible.
+- **Thread contention**: Minimal. The mutex is only held during initialization. After all generator polynomials are initialized (at most 29 different ones), encoding is lock-free.
+
+---
+
+## Mathematical Background
+
+### Systematic Encoding
+
+Given data polynomial $D(x) = d_{k-1}x^{k-1} + d_{k-2}x^{k-2} + \cdots + d_0$ and generator polynomial $g(x)$ of degree $t$:
+
+1. Compute $x^t \cdot D(x)$ (shift data up by $t$ positions)
+2. Divide: $x^t \cdot D(x) = q(x) \cdot g(x) + r(x)$
+3. The codeword is $x^t \cdot D(x) - r(x) = x^t \cdot D(x) + r(x)$ (subtraction = addition in GF(2))
+
+The remainder $r(x)$ has degree < $t$ and its coefficients are the ECC codewords.
+
+### The LFSR Approach
+
+The RSECC_encode implementation uses a shift-register approach equivalent to polynomial long division:
+
+```
+ecc = [0, 0, ..., 0] (t zeros)
+
+for each data byte d:
+ feedback = log(d XOR ecc[0])
+ shift ecc left by 1
+ for j = 1 to t-1:
+ ecc[j] ^= alpha[feedback + log(g[t-j])]
+ ecc[t-1] = alpha[feedback + log(g[0])]
+```
+
+Each iteration processes one data codeword, maintaining the partial remainder in the `ecc` register. After all data is processed, the register contains the final remainder.
+
+### Error Correction Capability
+
+For `t` ECC codewords, the RS code can correct up to $\lfloor t/2 \rfloor$ symbol errors, or detect up to `t` symbol errors (or any combination where $2e + d \leq t$, with $e$ errors and $d$ erasures).
+
+QR Code Level L with 7 ECC per block → 3 correctable symbol errors per block.
diff --git a/docs/handbook/genqrcode/testing.md b/docs/handbook/genqrcode/testing.md
new file mode 100644
index 0000000000..d606302ee5
--- /dev/null
+++ b/docs/handbook/genqrcode/testing.md
@@ -0,0 +1,398 @@
+# genqrcode / libqrencode — Testing
+
+## Test Infrastructure
+
+### Test Framework
+
+libqrencode uses a custom test framework defined in `tests/common.h`:
+
+```c
+#define testStart(__arg__) { \
+ int tests = 0, failed = 0, assertion; \
+ char *_test_name = (__arg__); \
+ fputs("--- TEST: ", stderr); \
+ fputs(_test_name, stderr); \
+ fputs(" ---\n", stderr);
+
+#define testEnd() \
+ if(failed) fputs("FAILED.\n", stderr); \
+ else fputs("PASSED.\n", stderr); \
+ testFinish(); }
+
+#define testFinish() \
+ printf(" %s: %d assertions, %d failures.\n", \
+ _test_name, tests, failed);
+
+#define assert_equal(__actual__, __expected__, ...) { \
+ tests++; assertion = (__actual__ == __expected__); \
+ if(!assertion) { \
+ failed++; \
+ printf("FAILED: " __VA_ARGS__); \
+ printf(" (%d != %d)\n", __actual__, __expected__); \
+ } }
+
+#define assert_zero(__expr__, ...) \
+ assert_equal((__expr__), 0, __VA_ARGS__)
+
+#define assert_nonzero(__expr__, ...) { \
+ tests++; assertion = (__expr__) != 0; \
+ if(!assertion) { \
+ failed++; \
+ printf("FAILED: " __VA_ARGS__); \
+ printf("\n"); \
+ } }
+
+#define assert_null(__expr__, ...) { \
+ tests++; assertion = (__expr__) == NULL; \
+ if(!assertion) { \
+ failed++; \
+ printf("FAILED: " __VA_ARGS__); \
+ printf("\n"); \
+ } }
+
+#define assert_nonnull(__expr__, ...) { \
+ tests++; assertion = (__expr__) != NULL; \
+ if(!assertion) { \
+ failed++; \
+ printf("FAILED: " __VA_ARGS__); \
+ printf("\n"); \
+ } }
+
+#define assert_nothing(__expr__, ...) { \
+ tests++; __expr__; }
+```
+
+Test functions return void and use `testStart()`/`testEnd()` blocks. Assertions increment the `tests` counter and only print output on failure.
+
+---
+
+## Test Programs
+
+### test_bitstream
+
+Tests the `BitStream` module:
+
+- `test_null()` — New stream has length 0
+- `test_num()` — `BitStream_appendNum()` with various bit widths
+- `test_bytes()` — `BitStream_appendBytes()` correctness
+- `test_toByte()` — Bit packing from 1-bit-per-byte to 8-bits-per-byte
+
+### test_estimatebit
+
+Tests bit estimation functions:
+
+- `test_estimateBitsModeNum()` — Numeric mode bit cost for various lengths
+- `test_estimateBitsModeAn()` — Alphanumeric mode bit cost
+- `test_estimateBitsMode8()` — 8-bit mode bit cost
+- `test_estimateBitsModeKanji()` — Kanji mode bit cost
+
+### test_qrinput
+
+Tests input data management:
+
+- `test_encodeNumeric()` — Numeric data encoding correctness
+- `test_encodeAn()` — Alphanumeric encoding with the AN table
+- `test_encode8()` — 8-bit byte encoding
+- `test_encodeKanji()` — Kanji double-byte encoding
+- `test_encodeTooLong()` — Overflow detection (ERANGE)
+- `test_struct()` — Structured append input handling
+- `test_splitEntry()` — Input entry splitting for version changes
+- `test_parity()` — Parity calculation for structured append
+- `test_padding()` — Pad byte alternation (0xEC/0x11)
+- `test_mqr()` — Micro QR input creation and validation
+
+### test_qrspec
+
+Tests QR Code specification tables:
+
+- `test_getWidth()` — Width = version × 4 + 17
+- `test_getDataLength()` — Data capacity lookup
+- `test_getECCLength()` — ECC length calculation
+- `test_alignment()` — Alignment pattern positions
+- `test_versionPattern()` — Version info BCH codes
+- `test_formatInfo()` — Format info BCH codes
+
+### test_mqrspec
+
+Tests Micro QR spec tables:
+
+- `test_getWidth()` — Width = version × 2 + 9
+- `test_getDataLengthBit()` — Bit-level data capacity
+- `test_getECCLength()` — MQR ECC lengths
+- `test_newFrame()` — Frame creation with single finder pattern
+
+### test_rs
+
+Tests Reed-Solomon encoder:
+
+- Tests `RSECC_encode()` with known data/ECC pairs
+
+### test_qrencode
+
+Tests the high-level encoding pipeline:
+
+- `test_encode()` — Full encode and verify symbol dimensions
+- `test_encodeVersion()` — Version auto-selection
+- `test_encodeMQR()` — Micro QR encoding
+- `test_encode8bitString()` — 8-bit string encoding
+- `test_encodeStructured()` — Structured append output
+- `test_encodeMask()` — Forced mask testing
+- `test_encodeEmpty()` — Empty input handling
+
+### test_split
+
+Tests the string splitter:
+
+- `test_split1()` — Pure numeric string
+- `test_split2()` — Pure alphanumeric
+- `test_split3()` — Mixed numeric/alpha/8-bit
+- `test_split4()` — Kanji detection
+- `test_split5()` — Mode boundary optimization
+
+### test_split_urls
+
+Tests URL splitting efficiency:
+
+- Tests that common URL patterns produce efficient mode sequences
+- Verifies that `http://`, domain names, and paths are encoded optimally
+
+### test_mask
+
+Tests full QR masking:
+
+- Tests all 8 mask patterns (`Mask_mask0` through `Mask_mask7`)
+- `test_eval()` — Penalty evaluation
+- `test_format()` — Format information writing
+- `test_maskSelection()` — Automatic mask selection picks minimum penalty
+
+### test_mmask
+
+Tests Micro QR masking:
+
+- Tests 4 mask patterns (`MMask_mask0` through `MMask_mask3`)
+- `test_eval()` — Micro QR scoring (maximize, not minimize)
+- `test_format()` — MQR format information
+
+### test_monkey
+
+Random/fuzz testing:
+
+- Generates random input data of various sizes and modes
+- Encodes and verifies the result is non-NULL and has valid dimensions
+
+---
+
+## Utility Programs
+
+### prof_qrencode
+
+Profiling harness. Encodes the same string many times for performance measurement:
+
+```c
+int main() {
+ // Repeated encoding for profiling with gprof/gcov
+}
+```
+
+Build with `GPROF` or `COVERAGE` CMake options.
+
+### pthread_qrencode
+
+Thread safety test. Spawns multiple threads encoding simultaneously:
+
+```c
+// Tests concurrent RSECC_encode() calls
+// Verifies no crashes or data corruption
+```
+
+Only built when `HAVE_LIBPTHREAD` is defined.
+
+### view_qrcode
+
+Renders a QR code to the terminal for visual inspection:
+
+```c
+// ASCII art output of encoded symbol
+```
+
+### create_frame_pattern
+
+Generates and prints the function pattern frame (finder, timing, alignment):
+
+```c
+// Used to visually verify frame creation
+```
+
+---
+
+## Running Tests
+
+### Via CMake
+
+```bash
+mkdir build && cd build
+cmake -DWITH_TESTS=ON ..
+make
+make test
+# or:
+ctest --verbose
+```
+
+CMake `tests/CMakeLists.txt` registers test executables:
+
+```cmake
+add_executable(test_qrinput test_qrinput.c common.c)
+target_link_libraries(test_qrinput qrencode_static)
+add_test(test_qrinput test_qrinput)
+
+add_executable(test_bitstream test_bitstream.c common.c)
+target_link_libraries(test_bitstream qrencode_static)
+add_test(test_bitstream test_bitstream)
+
+# ... similar for all test programs
+```
+
+### Via Autotools
+
+```bash
+./configure --with-tests
+make
+make check
+```
+
+Or use the test scripts directly:
+
+```bash
+cd tests
+./test_basic.sh
+```
+
+### test_basic.sh
+
+```bash
+#!/bin/sh
+./test_bitstream && \
+./test_estimatebit && \
+./test_qrinput && \
+./test_qrspec && \
+./test_mqrspec && \
+./test_rs && \
+./test_qrencode && \
+./test_split && \
+./test_split_urls && \
+./test_mask && \
+./test_mmask && \
+./test_monkey
+```
+
+Runs all test executables sequentially. Exits on first failure due to `&&` chaining.
+
+### test_all.sh
+
+Runs `test_basic.sh` plus `test_configure.sh` (which tests the autotools configure/build cycle).
+
+---
+
+## Test Build Macros
+
+When tests are enabled:
+
+- `WITH_TESTS` is defined → `STATIC_IN_RELEASE` is NOT defined
+- All `STATIC_IN_RELEASE` functions become externally visible
+- `qrencode_inner.h` exposes internal types (`RSblock`, `QRRawCode`, etc.)
+
+This allows tests to call internal functions directly:
+
+```c
+// In test_mask.c:
+#include "../qrencode_inner.h"
+
+void test_mask() {
+ testStart("Testing mask evaluation");
+ QRcode *code = QRcode_encodeMask(input, 0); // force mask 0
+ // ... verify mask application ...
+ testEnd();
+}
+```
+
+---
+
+## Code Coverage
+
+### With gcov (Autotools)
+
+```bash
+./configure --enable-gcov --with-tests
+make
+make check
+gcov *.c
+```
+
+### With CMake
+
+```bash
+cmake -DCOVERAGE=ON -DWITH_TESTS=ON ..
+make
+make test
+# generate coverage report
+```
+
+CMake adds `-fprofile-arcs -ftest-coverage` flags when `COVERAGE` is ON.
+
+---
+
+## Address Sanitizer
+
+```bash
+# CMake:
+cmake -DASAN=ON -DWITH_TESTS=ON ..
+
+# Autotools:
+./configure --enable-asan --with-tests
+```
+
+Adds `-fsanitize=address -fno-omit-frame-pointer` for memory error detection.
+
+---
+
+## Adding a New Test
+
+1. Create `tests/test_newmodule.c`:
+```c
+#include <stdio.h>
+#include "common.h"
+#include "../module.h"
+
+void test_something(void)
+{
+ testStart("Testing something");
+ int result = module_function(42);
+ assert_equal(result, 84, "module_function(42)");
+ testEnd();
+}
+
+int main(void)
+{
+ test_something();
+ return 0;
+}
+```
+
+2. Add to `tests/CMakeLists.txt`:
+```cmake
+add_executable(test_newmodule test_newmodule.c common.c)
+target_link_libraries(test_newmodule qrencode_static)
+add_test(test_newmodule test_newmodule)
+```
+
+3. Add to `tests/Makefile.am`:
+```makefile
+noinst_PROGRAMS += test_newmodule
+test_newmodule_SOURCES = test_newmodule.c common.h common.c
+test_newmodule_LDADD = ../libqrencode.la
+```
+
+4. Add to `tests/test_basic.sh`:
+```bash
+./test_newmodule && \
+```
diff --git a/docs/handbook/hooks/logging-system.md b/docs/handbook/hooks/logging-system.md
new file mode 100644
index 0000000000..5bb0b79e01
--- /dev/null
+++ b/docs/handbook/hooks/logging-system.md
@@ -0,0 +1,492 @@
+# Logging System
+
+## Table of Contents
+
+- [Introduction](#introduction)
+- [The log() Function](#the-log-function)
+ - [Function Signature](#function-signature)
+ - [Timestamp Generation](#timestamp-generation)
+ - [Dual Output with tee](#dual-output-with-tee)
+ - [Graceful Degradation](#graceful-degradation)
+- [Log File Configuration](#log-file-configuration)
+ - [The MIRROR_LOG Variable](#the-mirror_log-variable)
+ - [Default Path](#default-path)
+ - [Custom Log Paths](#custom-log-paths)
+ - [Log File Permissions](#log-file-permissions)
+- [Log Message Format](#log-message-format)
+ - [Timestamp Format](#timestamp-format)
+ - [Session Boundaries](#session-boundaries)
+ - [Ref Update Entries](#ref-update-entries)
+ - [Push Status Entries](#push-status-entries)
+ - [Summary Block](#summary-block)
+- [Complete Log Output Example](#complete-log-output-example)
+- [Git Push Output Capture](#git-push-output-capture)
+- [Log Rotation](#log-rotation)
+ - [Using logrotate](#using-logrotate)
+ - [Manual Rotation](#manual-rotation)
+ - [Size-Based Rotation](#size-based-rotation)
+- [Log Analysis](#log-analysis)
+ - [Counting Mirror Sessions](#counting-mirror-sessions)
+ - [Finding Failures](#finding-failures)
+ - [Extracting Push Duration](#extracting-push-duration)
+ - [Monitoring with tail](#monitoring-with-tail)
+- [Fallback Behavior](#fallback-behavior)
+- [Concurrency and Log Interleaving](#concurrency-and-log-interleaving)
+- [Security Considerations](#security-considerations)
+
+---
+
+## Introduction
+
+The Project-Tick `post-receive` hook (`hooks/post-receive`) includes a built-in logging system that records every mirror push operation. The system is implemented as a single bash function, `log()`, that writes timestamped messages to both standard output and a persistent log file.
+
+The logging system is designed with three priorities:
+1. **Reliability** — Logging never causes the hook to fail, even if the log file is unwritable
+2. **Visibility** — Messages appear on the pusher's terminal and in the persistent log
+3. **Simplicity** — A single function, no external logging frameworks
+
+---
+
+## The log() Function
+
+### Function Signature
+
+```bash
+log() {
+ local timestamp
+ timestamp="$(date -u '+%Y-%m-%d %H:%M:%S UTC')"
+ echo "[$timestamp] $*" | tee -a "$MIRROR_LOG" 2>/dev/null || echo "[$timestamp] $*"
+}
+```
+
+The function accepts any number of string arguments via `$*`, which concatenates them with a space separator.
+
+**Usage examples** from the hook:
+
+```bash
+log "=== Mirror push triggered ==="
+log " ref: $refname ($oldrev -> $newrev)"
+log "Pushing to remote: $remote"
+log " ✓ Successfully pushed to $remote"
+log " ✗ FAILED to push to $remote"
+log "--- Summary ---"
+log " Succeeded: ${SUCCEEDED_REMOTES[*]:-none}"
+log " Failed: ${FAILED_REMOTES[*]:-none}"
+log "=== Finished with errors ==="
+log "=== Finished successfully ==="
+```
+
+### Timestamp Generation
+
+```bash
+local timestamp
+timestamp="$(date -u '+%Y-%m-%d %H:%M:%S UTC')"
+```
+
+The `date` command is invoked with two key options:
+
+| Option | Purpose |
+|--------|---------|
+| `-u` | Use UTC time regardless of the server's local timezone |
+| `'+%Y-%m-%d %H:%M:%S UTC'` | ISO 8601-inspired format with explicit UTC suffix |
+
+**Why UTC?** Server environments may span multiple time zones. UTC ensures all log entries are comparable without timezone conversion. The explicit `UTC` suffix in the format string makes it unambiguous — a reader seeing `[2026-04-05 14:30:00 UTC]` knows this is not local time.
+
+**Why `local`?** The `local` keyword restricts `timestamp` to the function scope. Without it, `timestamp` would be a global variable, persisting after the function returns and potentially conflicting with other variables.
+
+### Dual Output with tee
+
+```bash
+echo "[$timestamp] $*" | tee -a "$MIRROR_LOG" 2>/dev/null
+```
+
+The `tee` command reads from stdin and writes to both stdout and the specified file:
+
+```
+echo "message" ──► tee ──► stdout (pusher's terminal)
+ │
+ └──► $MIRROR_LOG (append mode)
+```
+
+The `-a` flag means **append**. Without it, `tee` would truncate the log file on each write, losing previous entries.
+
+### Graceful Degradation
+
+```bash
+echo "[$timestamp] $*" | tee -a "$MIRROR_LOG" 2>/dev/null || echo "[$timestamp] $*"
+```
+
+The error handling chain works in three stages:
+
+1. **Primary path**: `echo | tee -a "$MIRROR_LOG"` — write to both stdout and log file
+2. **Error suppression**: `2>/dev/null` — if `tee` can't write to the log file, suppress its error message (e.g., "Permission denied")
+3. **Fallback**: `|| echo "[$timestamp] $*"` — if the entire `echo | tee` pipeline fails, write to stdout only
+
+This means the log function **never fails silently** — even if the log file is inaccessible, the message still reaches the pusher's terminal. And it **never crashes the hook** — log file errors don't propagate despite `set -e` (because they're handled by the `||` fallback).
+
+---
+
+## Log File Configuration
+
+### The MIRROR_LOG Variable
+
+```bash
+MIRROR_LOG="${MIRROR_LOG:-/var/log/git-mirror.log}"
+```
+
+The log file path is controlled by the `MIRROR_LOG` environment variable with a default fallback.
+
+### Default Path
+
+The default log file location is `/var/log/git-mirror.log`. This follows the Linux Filesystem Hierarchy Standard (FHS) convention of placing log files under `/var/log/`.
+
+**Requirements for the default path**:
+- The directory `/var/log/` must exist (it always does on standard Linux systems)
+- The user running the git daemon must have write permission to the file
+- The file will be created if it doesn't exist (assuming directory write permission)
+
+### Custom Log Paths
+
+Override the default by setting `MIRROR_LOG` in the hook's environment:
+
+```bash
+# In the git daemon's environment
+export MIRROR_LOG=/var/log/project-tick/mirror.log
+
+# Or per-repository via a wrapper
+MIRROR_LOG=/home/git/logs/mirror.log hooks/post-receive
+```
+
+**Common custom paths**:
+
+| Path | Use Case |
+|------|----------|
+| `/var/log/git-mirror.log` | Default — shared system log |
+| `/var/log/project-tick/mirror.log` | Project-specific log directory |
+| `/home/git/logs/mirror.log` | User-local log (no root needed) |
+| `/tmp/mirror.log` | Temporary/testing |
+| `/dev/null` | Disable file logging (stdout only) |
+
+### Log File Permissions
+
+Set up the log file with appropriate ownership:
+
+```bash
+# Create the log file with correct ownership
+sudo touch /var/log/git-mirror.log
+sudo chown git:git /var/log/git-mirror.log
+sudo chmod 640 /var/log/git-mirror.log
+
+# Or create a project-specific log directory
+sudo mkdir -p /var/log/project-tick
+sudo chown git:git /var/log/project-tick
+sudo chmod 750 /var/log/project-tick
+```
+
+The `640` permission (`rw-r-----`) allows the git user to write and the git group to read, while preventing other users from accessing potentially sensitive information.
+
+---
+
+## Log Message Format
+
+### Timestamp Format
+
+Every log line follows this pattern:
+
+```
+[YYYY-MM-DD HH:MM:SS UTC] <message>
+```
+
+Example:
+```
+[2026-04-05 14:30:00 UTC] === Mirror push triggered ===
+```
+
+The square brackets delimit the timestamp, making it easy to parse programmatically:
+
+```bash
+# Extract just the messages (remove timestamps)
+sed 's/^\[[^]]*\] //' /var/log/git-mirror.log
+
+# Extract just the timestamps
+grep -oP '^\[\K[^]]+' /var/log/git-mirror.log
+```
+
+### Session Boundaries
+
+Each mirror operation is delimited by banner lines:
+
+```
+[2026-04-05 14:30:00 UTC] === Mirror push triggered ===
+...
+[2026-04-05 14:30:15 UTC] === Finished successfully ===
+```
+
+Or on failure:
+
+```
+[2026-04-05 14:30:00 UTC] === Mirror push triggered ===
+...
+[2026-04-05 14:30:15 UTC] === Finished with errors ===
+```
+
+The `===` delimiters serve as visual and programmatic session markers.
+
+### Ref Update Entries
+
+Each ref in the push is logged with indentation:
+
+```
+[2026-04-05 14:30:00 UTC] ref: refs/heads/main (abc1234 -> def5678)
+[2026-04-05 14:30:00 UTC] ref: refs/tags/v1.0.0 (0000000 -> abc1234)
+```
+
+The format `($oldrev -> $newrev)` shows the transition. The all-zeros SHA (`0000000...`) indicates a new ref creation or deletion:
+
+| Pattern | Meaning |
+|---------|---------|
+| `(000... -> abc...)` | New ref created |
+| `(abc... -> def...)` | Ref updated |
+| `(abc... -> 000...)` | Ref deleted |
+
+### Push Status Entries
+
+Each remote push produces a status line:
+
+```
+[2026-04-05 14:30:05 UTC] Pushing to remote: github
+[2026-04-05 14:30:08 UTC] ✓ Successfully pushed to github
+```
+
+Or on failure:
+
+```
+[2026-04-05 14:30:10 UTC] Pushing to remote: sourceforge
+[2026-04-05 14:30:25 UTC] ✗ FAILED to push to sourceforge
+```
+
+The Unicode symbols (✓ and ✗) provide quick visual scanning in the log.
+
+### Summary Block
+
+At the end of each session:
+
+```
+[2026-04-05 14:30:15 UTC] --- Summary ---
+[2026-04-05 14:30:15 UTC] Succeeded: github gitlab codeberg
+[2026-04-05 14:30:15 UTC] Failed: sourceforge
+```
+
+Or when all succeed:
+
+```
+[2026-04-05 14:30:15 UTC] --- Summary ---
+[2026-04-05 14:30:15 UTC] Succeeded: github gitlab codeberg sourceforge
+[2026-04-05 14:30:15 UTC] Failed: none
+```
+
+---
+
+## Complete Log Output Example
+
+A typical successful mirror operation produces:
+
+```
+[2026-04-05 14:30:00 UTC] === Mirror push triggered ===
+[2026-04-05 14:30:00 UTC] ref: refs/heads/main (a1b2c3d4e5f6 -> f6e5d4c3b2a1)
+[2026-04-05 14:30:00 UTC] ref: refs/tags/v2.1.0 (0000000000000000000000000000000000000000 -> a1b2c3d4e5f6)
+[2026-04-05 14:30:00 UTC] Pushing to remote: github
+To github.com:Project-Tick/Project-Tick.git
+ + a1b2c3d..f6e5d4c main -> main (forced update)
+ * [new tag] v2.1.0 -> v2.1.0
+[2026-04-05 14:30:03 UTC] ✓ Successfully pushed to github
+[2026-04-05 14:30:03 UTC] Pushing to remote: gitlab
+To gitlab.com:Project-Tick/Project-Tick.git
+ + a1b2c3d..f6e5d4c main -> main (forced update)
+ * [new tag] v2.1.0 -> v2.1.0
+[2026-04-05 14:30:06 UTC] ✓ Successfully pushed to gitlab
+[2026-04-05 14:30:06 UTC] Pushing to remote: codeberg
+To codeberg.org:Project-Tick/Project-Tick.git
+ + a1b2c3d..f6e5d4c main -> main (forced update)
+ * [new tag] v2.1.0 -> v2.1.0
+[2026-04-05 14:30:09 UTC] ✓ Successfully pushed to codeberg
+[2026-04-05 14:30:09 UTC] --- Summary ---
+[2026-04-05 14:30:09 UTC] Succeeded: github gitlab codeberg
+[2026-04-05 14:30:09 UTC] Failed: none
+[2026-04-05 14:30:09 UTC] === Finished successfully ===
+```
+
+Note that the raw `git push` output (the `To ...` and `+ ... (forced update)` lines) is **interleaved** with the hook's log messages. This is because `git push` output goes through `tee` to the log file alongside the hook's `log()` calls.
+
+---
+
+## Git Push Output Capture
+
+Beyond the `log()` function's messages, the raw output of each `git push` is also captured:
+
+```bash
+git push --mirror --force "$remote" 2>&1 | tee -a "$MIRROR_LOG" 2>/dev/null
+```
+
+The `2>&1` redirect merges git's stderr into stdout before piping to `tee`. Git sends progress messages and transfer statistics to stderr, so this redirect ensures the complete output is logged:
+
+```
+To github.com:Project-Tick/Project-Tick.git
+ + a1b2c3d..f6e5d4c main -> main (forced update)
+ * [new tag] v2.1.0 -> v2.1.0
+```
+
+This raw output appears in the log file **without timestamps** because it bypasses the `log()` function. It sits between the "Pushing to remote:" and "✓ Successfully pushed" entries.
+
+---
+
+## Log Rotation
+
+The hook appends to the log file indefinitely. Without rotation, the file will grow without bound. Here are strategies for managing log file size.
+
+### Using logrotate
+
+Create `/etc/logrotate.d/git-mirror`:
+
+```
+/var/log/git-mirror.log {
+ weekly
+ rotate 12
+ compress
+ delaycompress
+ missingok
+ notifempty
+ create 640 git git
+}
+```
+
+| Directive | Effect |
+|-----------|--------|
+| `weekly` | Rotate once per week |
+| `rotate 12` | Keep 12 rotated files (3 months) |
+| `compress` | Compress rotated files with gzip |
+| `delaycompress` | Don't compress the most recent rotated file |
+| `missingok` | Don't error if the log file doesn't exist |
+| `notifempty` | Don't rotate if the file is empty |
+| `create 640 git git` | Create new log file with these permissions |
+
+### Manual Rotation
+
+```bash
+# Rotate manually
+mv /var/log/git-mirror.log /var/log/git-mirror.log.1
+touch /var/log/git-mirror.log
+chown git:git /var/log/git-mirror.log
+```
+
+No signal or restart is needed — the hook appends to `$MIRROR_LOG` on each invocation, so it will create a new file if the old one was moved.
+
+### Size-Based Rotation
+
+Add a cron job that rotates when the file exceeds a certain size:
+
+```bash
+# /etc/cron.daily/git-mirror-log-rotate
+#!/bin/sh
+LOG=/var/log/git-mirror.log
+MAX_SIZE=10485760 # 10 MB
+
+if [ -f "$LOG" ]; then
+ SIZE=$(stat -c %s "$LOG" 2>/dev/null || echo 0)
+ if [ "$SIZE" -gt "$MAX_SIZE" ]; then
+ mv "$LOG" "${LOG}.$(date +%Y%m%d)"
+ gzip "${LOG}.$(date +%Y%m%d)"
+ fi
+fi
+```
+
+---
+
+## Log Analysis
+
+### Counting Mirror Sessions
+
+```bash
+grep -c "=== Mirror push triggered ===" /var/log/git-mirror.log
+```
+
+### Finding Failures
+
+```bash
+# Find all failure entries
+grep "✗ FAILED" /var/log/git-mirror.log
+
+# Find sessions that ended with errors
+grep "=== Finished with errors ===" /var/log/git-mirror.log
+
+# Count failures per remote
+grep "✗ FAILED" /var/log/git-mirror.log | awk '{print $NF}' | sort | uniq -c | sort -rn
+```
+
+### Extracting Push Duration
+
+Calculate the time between trigger and finish:
+
+```bash
+# Extract session start and end times
+grep -E "(Mirror push triggered|Finished)" /var/log/git-mirror.log
+```
+
+### Monitoring with tail
+
+For real-time monitoring during a push:
+
+```bash
+tail -f /var/log/git-mirror.log
+```
+
+---
+
+## Fallback Behavior
+
+The logging system handles the following failure scenarios:
+
+| Scenario | Behavior |
+|----------|----------|
+| Log file doesn't exist | `tee` creates it (if directory is writable) |
+| Log file is not writable | `tee` error suppressed; message goes to stdout only |
+| Log directory doesn't exist | `tee` fails silently; message goes to stdout only |
+| `/dev/null` as log path | All file output discarded; stdout works normally |
+| `$MIRROR_LOG` is empty | `tee -a ""` fails; fallback echo to stdout |
+
+In every case, the hook continues to function. Logging is strictly best-effort and never causes a hook failure.
+
+---
+
+## Concurrency and Log Interleaving
+
+When multiple pushes trigger the hook simultaneously, multiple `post-receive` instances write to the same log file concurrently. The `-a` (append) flag on `tee` uses `O_APPEND` semantics, which means writes are atomic at the kernel level for sizes up to `PIPE_BUF` (4096 bytes on Linux).
+
+Since individual log lines are well under 4096 bytes, **individual lines will not be corrupted**. However, lines from different sessions may interleave:
+
+```
+[2026-04-05 14:30:00 UTC] === Mirror push triggered === # Session A
+[2026-04-05 14:30:00 UTC] === Mirror push triggered === # Session B
+[2026-04-05 14:30:00 UTC] ref: refs/heads/main (...) # Session A
+[2026-04-05 14:30:00 UTC] ref: refs/heads/feature (...) # Session B
+```
+
+To disambiguate, you could modify the `log()` function to include a PID:
+
+```bash
+echo "[$$][$timestamp] $*" | tee -a "$MIRROR_LOG" 2>/dev/null || echo "[$$][$timestamp] $*"
+```
+
+This produces lines like `[12345][2026-04-05 14:30:00 UTC] message` which can be filtered by PID.
+
+---
+
+## Security Considerations
+
+1. **Log file contents** — The log records ref names, remote names, and git push output. It should **not** contain credentials (tokens are in the git config, not in push output). However, treat the log as moderately sensitive.
+
+2. **Log file permissions** — Use `640` or `600` permissions. Avoid world-readable (`644`) logs on multi-user systems.
+
+3. **Log injection** — Ref names come from the pusher and appear in log messages (`log " ref: $refname ..."`). While this is a cosmetic concern (log files aren't executed), extremely long or crafted ref names could produce misleading log entries. Git itself limits ref names to valid characters.
+
+4. **Disk exhaustion** — Without log rotation, the log file grows indefinitely. A hostile actor with push access could trigger many pushes to fill the disk. Use log rotation and monitoring to mitigate.
diff --git a/docs/handbook/hooks/mirror-configuration.md b/docs/handbook/hooks/mirror-configuration.md
new file mode 100644
index 0000000000..2bf584e861
--- /dev/null
+++ b/docs/handbook/hooks/mirror-configuration.md
@@ -0,0 +1,627 @@
+# Mirror Configuration
+
+## Table of Contents
+
+- [Introduction](#introduction)
+- [How Git Mirroring Works](#how-git-mirroring-works)
+ - [Push Mirroring vs Fetch Mirroring](#push-mirroring-vs-fetch-mirroring)
+ - [Ref Namespaces Synchronized](#ref-namespaces-synchronized)
+ - [The --mirror Flag Internals](#the---mirror-flag-internals)
+ - [The --force Flag and Divergent History](#the---force-flag-and-divergent-history)
+- [Mirror Remote Configuration](#mirror-remote-configuration)
+ - [Adding a Mirror Remote](#adding-a-mirror-remote)
+ - [Listing Configured Remotes](#listing-configured-remotes)
+ - [Modifying a Remote URL](#modifying-a-remote-url)
+ - [Removing a Mirror Remote](#removing-a-mirror-remote)
+- [Supported Protocols](#supported-protocols)
+ - [SSH Protocol](#ssh-protocol)
+ - [HTTPS Protocol](#https-protocol)
+ - [Git Protocol](#git-protocol)
+ - [Local Path Protocol](#local-path-protocol)
+- [Forge-Specific Configuration](#forge-specific-configuration)
+ - [GitHub](#github)
+ - [GitLab](#gitlab)
+ - [Codeberg](#codeberg)
+ - [SourceForge](#sourceforge)
+ - [Bitbucket](#bitbucket)
+ - [Gitea (Self-Hosted)](#gitea-self-hosted)
+- [Authentication Setup](#authentication-setup)
+ - [SSH Key Authentication](#ssh-key-authentication)
+ - [HTTPS Token Authentication](#https-token-authentication)
+ - [SSH Config for Multiple Keys](#ssh-config-for-multiple-keys)
+ - [Token Scopes and Permissions](#token-scopes-and-permissions)
+- [The MIRROR_REMOTES Variable](#the-mirror_remotes-variable)
+ - [Auto-Detection Mode](#auto-detection-mode)
+ - [Explicit Remote List](#explicit-remote-list)
+ - [Excluding Specific Remotes](#excluding-specific-remotes)
+- [Git Config File Format](#git-config-file-format)
+- [Multi-Repository Mirroring](#multi-repository-mirroring)
+- [Troubleshooting Mirror Issues](#troubleshooting-mirror-issues)
+
+---
+
+## Introduction
+
+The Project-Tick `post-receive` hook (`hooks/post-receive`) mirrors the canonical bare repository to multiple forge platforms. This document covers the configuration of mirror remotes — how to set them up, what protocols and authentication methods are supported, and how the hook discovers and uses them.
+
+The mirror push is triggered by the following line in the hook:
+
+```bash
+git push --mirror --force "$remote"
+```
+
+Everything in this document revolves around configuring the `$remote` targets that this command pushes to.
+
+---
+
+## How Git Mirroring Works
+
+### Push Mirroring vs Fetch Mirroring
+
+Git supports two mirroring directions:
+
+| Type | Command | Direction |
+|------|---------|-----------|
+| Push mirror | `git push --mirror <remote>` | Local → Remote |
+| Fetch mirror | `git clone --mirror <url>` | Remote → Local |
+
+The Project-Tick hook uses **push mirroring** — the canonical repository pushes its refs outward to each forge. This is the active/upstream pattern: changes flow from one source to many targets.
+
+The opposite approach, fetch mirroring, would require each forge to periodically pull from the canonical repo. Push mirroring is preferred because it provides immediate synchronization without polling latency.
+
+### Ref Namespaces Synchronized
+
+When `git push --mirror` executes, it synchronizes **all** refs under the `refs/` hierarchy:
+
+| Ref Namespace | Contents | Example |
+|---------------|----------|---------|
+| `refs/heads/*` | Branches | `refs/heads/main`, `refs/heads/feature/x` |
+| `refs/tags/*` | Tags | `refs/tags/v1.0.0`, `refs/tags/v2.0.0-rc1` |
+| `refs/notes/*` | Git notes | `refs/notes/commits` |
+| `refs/replace/*` | Replacement objects | `refs/replace/<sha>` |
+| `refs/meta/*` | Metadata refs (Gerrit) | `refs/meta/config` |
+
+Notably, `--mirror` also **deletes** remote refs that no longer exist locally. If a branch `feature/old` is deleted in the canonical repo, the mirror push removes it from all mirrors.
+
+### The --mirror Flag Internals
+
+Under the hood, `git push --mirror` is equivalent to:
+
+```bash
+git push --force --prune <remote> 'refs/*:refs/*'
+```
+
+This refspec (`refs/*:refs/*`) maps every local ref to the same-named remote ref. The `--prune` flag deletes remote refs that have no local counterpart.
+
+### The --force Flag and Divergent History
+
+The `--force` flag in the hook's `git push --mirror --force` is redundant with `--mirror` (which implies force), but it's included explicitly for clarity. It handles the case where a ref on the canonical repo has been rewritten (e.g., via `git push --force` or `git rebase`), which would otherwise be rejected as a non-fast-forward update:
+
+```
+ ! [rejected] main -> main (non-fast-forward)
+```
+
+With `--force`, the mirror is overwritten to match the canonical state exactly, even if that means losing remote-only history.
+
+---
+
+## Mirror Remote Configuration
+
+### Adding a Mirror Remote
+
+From within the bare repository:
+
+```bash
+cd /path/to/project-tick.git
+git remote add <name> <url>
+```
+
+The `<name>` can be any valid git remote name. The hook auto-discovers all remotes that aren't named `origin`:
+
+```bash
+MIRROR_REMOTES=$(git remote | grep -v '^origin$' || true)
+```
+
+**Convention**: Use the forge name as the remote name:
+
+```bash
+git remote add github git@github.com:Project-Tick/Project-Tick.git
+git remote add gitlab git@gitlab.com:Project-Tick/Project-Tick.git
+git remote add codeberg git@codeberg.org:Project-Tick/Project-Tick.git
+```
+
+### Listing Configured Remotes
+
+```bash
+git remote -v
+```
+
+Output:
+```
+codeberg git@codeberg.org:Project-Tick/Project-Tick.git (fetch)
+codeberg git@codeberg.org:Project-Tick/Project-Tick.git (push)
+github git@github.com:Project-Tick/Project-Tick.git (fetch)
+github git@github.com:Project-Tick/Project-Tick.git (push)
+gitlab git@gitlab.com:Project-Tick/Project-Tick.git (fetch)
+gitlab git@gitlab.com:Project-Tick/Project-Tick.git (push)
+origin /srv/git/project-tick.git (fetch)
+origin /srv/git/project-tick.git (push)
+```
+
+### Modifying a Remote URL
+
+```bash
+git remote set-url github https://x-access-token:NEW_TOKEN@github.com/Project-Tick/Project-Tick.git
+```
+
+### Removing a Mirror Remote
+
+```bash
+git remote remove codeberg
+```
+
+The hook will no longer push to Codeberg on subsequent pushes. This is the recommended way to temporarily or permanently disable a mirror.
+
+---
+
+## Supported Protocols
+
+### SSH Protocol
+
+**URL format**:
+```
+git@<host>:<owner>/<repo>.git
+ssh://<user>@<host>/<path>
+```
+
+**Examples**:
+```bash
+git remote add github git@github.com:Project-Tick/Project-Tick.git
+git remote add sourceforge ssh://USERNAME@git.code.sf.net/p/project-tick/code
+```
+
+**Characteristics**:
+- Authenticated via SSH keypair
+- Supports key-based automation
+- Port 22 by default (or custom via `ssh://host:port/path`)
+- Requires public key registration on the forge
+
+**Best for**: Server-side automation where SSH keys can be managed securely.
+
+### HTTPS Protocol
+
+**URL format**:
+```
+https://<user>:<token>@<host>/<owner>/<repo>.git
+```
+
+**Examples**:
+```bash
+git remote add github https://x-access-token:TOKEN@github.com/Project-Tick/Project-Tick.git
+git remote add gitlab https://oauth2:TOKEN@gitlab.com/Project-Tick/Project-Tick.git
+git remote add codeberg https://TOKEN@codeberg.org/Project-Tick/Project-Tick.git
+```
+
+**Characteristics**:
+- Token embedded in URL (stored in git config)
+- Works behind HTTP proxies
+- Port 443 by default
+- No SSH key management needed
+
+**Best for**: Environments where SSH is blocked or key management is impractical.
+
+### Git Protocol
+
+**URL format**:
+```
+git://<host>/<path>
+```
+
+**Characteristics**:
+- Read-only — **cannot** be used for mirroring
+- Unauthenticated
+- Port 9418
+
+Not suitable for the mirror hook.
+
+### Local Path Protocol
+
+**URL format**:
+```
+/path/to/repo.git
+file:///path/to/repo.git
+```
+
+**Characteristics**:
+- No network involved
+- Useful for local backup mirrors
+- Fast — uses hardlinks when possible
+
+**Example**:
+```bash
+git remote add backup /mnt/backup/project-tick.git
+```
+
+---
+
+## Forge-Specific Configuration
+
+### GitHub
+
+**SSH remote**:
+```bash
+git remote add github git@github.com:Project-Tick/Project-Tick.git
+```
+
+**HTTPS remote**:
+```bash
+git remote add github https://x-access-token:ghp_XXXX@github.com/Project-Tick/Project-Tick.git
+```
+
+**Token format**: GitHub Personal Access Tokens start with `ghp_` (classic) or `github_pat_` (fine-grained).
+
+**Required token scopes**:
+- `repo` (full control of private repositories) — for classic tokens
+- Repository permissions → Contents → Read and Write — for fine-grained tokens
+
+**GitHub-specific considerations**:
+- GitHub may reject pushes that include non-standard refs (e.g., `refs/pull/*` or `refs/keep-around/*`). Since these refs don't exist in the canonical repo, this is typically not an issue.
+- Branch protection rules may block `--force` pushes. Ensure the mirror token has admin access or that branch protection allows force pushes from the mirror user.
+- GitHub has a push size limit of 2 GB per push. Large mirror pushes may need to be split.
+
+### GitLab
+
+**SSH remote**:
+```bash
+git remote add gitlab git@gitlab.com:Project-Tick/Project-Tick.git
+```
+
+**HTTPS remote**:
+```bash
+git remote add gitlab https://oauth2:glpat-XXXX@gitlab.com/Project-Tick/Project-Tick.git
+```
+
+**Token format**: GitLab Personal Access Tokens start with `glpat-`.
+
+**Required token scopes**:
+- `write_repository` — for push access
+
+**GitLab-specific considerations**:
+- GitLab supports built-in repository mirroring (Settings → Repository → Mirroring). This is an alternative to the hook-based approach but requires GitLab Premium for push mirroring.
+- Protected branches/tags may reject force pushes. Configure the mirror user as a Maintainer with force-push permissions.
+
+### Codeberg
+
+**SSH remote**:
+```bash
+git remote add codeberg git@codeberg.org:Project-Tick/Project-Tick.git
+```
+
+**HTTPS remote**:
+```bash
+git remote add codeberg https://TOKEN@codeberg.org/Project-Tick/Project-Tick.git
+```
+
+**Token format**: Codeberg (Gitea) uses application tokens without a specific prefix.
+
+**Required token permissions**:
+- Repository → Write
+
+**Codeberg-specific considerations**:
+- Codeberg runs Gitea/Forgejo. Token authentication uses the token as the username with no password (or any dummy password).
+- Codeberg also supports Gitea's built-in mirror feature.
+
+### SourceForge
+
+**SSH remote** (SSH only — no HTTPS push support):
+```bash
+git remote add sourceforge ssh://USERNAME@git.code.sf.net/p/project-tick/code
+```
+
+**SourceForge-specific considerations**:
+- SourceForge uses a non-standard URL format: `ssh://USERNAME@git.code.sf.net/p/<project>/<mount>`
+- The `USERNAME` is the SourceForge account username
+- SSH key must be registered at https://sourceforge.net/auth/shell_services
+- SourceForge may have rate limits on Git operations
+
+### Bitbucket
+
+**SSH remote**:
+```bash
+git remote add bitbucket git@bitbucket.org:Project-Tick/Project-Tick.git
+```
+
+**HTTPS remote**:
+```bash
+git remote add bitbucket https://USERNAME:APP_PASSWORD@bitbucket.org/Project-Tick/Project-Tick.git
+```
+
+**Bitbucket-specific considerations**:
+- Bitbucket uses App Passwords (not PATs) for HTTPS authentication
+- The username is the Bitbucket account username (not email)
+
+### Gitea (Self-Hosted)
+
+**SSH remote**:
+```bash
+git remote add gitea git@gitea.example.com:Project-Tick/Project-Tick.git
+```
+
+**HTTPS remote**:
+```bash
+git remote add gitea https://TOKEN@gitea.example.com/Project-Tick/Project-Tick.git
+```
+
+**Considerations**:
+- Self-hosted Gitea instances may use custom SSH ports: `ssh://git@gitea.example.com:2222/Project-Tick/Project-Tick.git`
+- Self-signed TLS certificates require `git config http.sslVerify false` on the bare repo (not recommended for production)
+
+---
+
+## Authentication Setup
+
+### SSH Key Authentication
+
+Generate a dedicated mirror key:
+
+```bash
+ssh-keygen -t ed25519 -C "project-tick-mirror" -f ~/.ssh/mirror_key -N ""
+```
+
+Register the public key (`~/.ssh/mirror_key.pub`) as a deploy key on each forge:
+
+| Forge | Registration Path |
+|-------|------------------|
+| GitHub | Repository → Settings → Deploy keys |
+| GitLab | Repository → Settings → Repository → Deploy keys |
+| Codeberg | Repository → Settings → Deploy Keys |
+| SourceForge | Account → SSH Settings |
+
+### HTTPS Token Authentication
+
+Tokens are embedded directly in the remote URL as documented in each forge's section above. The token is stored in the bare repository's git config file:
+
+```bash
+cat /path/to/project-tick.git/config
+```
+
+```ini
+[remote "github"]
+ url = https://x-access-token:ghp_XXXX@github.com/Project-Tick/Project-Tick.git
+ fetch = +refs/*:refs/remotes/github/*
+```
+
+**Security note**: Ensure the config file has restrictive permissions:
+
+```bash
+chmod 600 /path/to/project-tick.git/config
+```
+
+### SSH Config for Multiple Keys
+
+When different forges require different SSH keys, use `~/.ssh/config`:
+
+```
+Host github.com
+ HostName github.com
+ User git
+ IdentityFile ~/.ssh/github_mirror_key
+ IdentitiesOnly yes
+
+Host gitlab.com
+ HostName gitlab.com
+ User git
+ IdentityFile ~/.ssh/gitlab_mirror_key
+ IdentitiesOnly yes
+
+Host codeberg.org
+ HostName codeberg.org
+ User git
+ IdentityFile ~/.ssh/codeberg_mirror_key
+ IdentitiesOnly yes
+
+Host git.code.sf.net
+ HostName git.code.sf.net
+ User USERNAME
+ IdentityFile ~/.ssh/sourceforge_mirror_key
+ IdentitiesOnly yes
+```
+
+The `IdentitiesOnly yes` directive ensures only the specified key is offered, preventing SSH from trying all loaded keys.
+
+### Token Scopes and Permissions
+
+Minimum required permissions for each forge:
+
+| Forge | Scope/Permission | Allows |
+|-------|-----------------|--------|
+| GitHub (classic PAT) | `repo` | Push to public and private repos |
+| GitHub (fine-grained) | Contents: Read and Write | Push to the specific repo |
+| GitLab | `write_repository` | Push to the repo |
+| Codeberg | Repository: Write | Push to the repo |
+| Bitbucket | Repositories: Write | Push to the repo |
+
+**Principle of least privilege**: Use fine-grained tokens scoped to a single repository when possible. Avoid tokens with admin or organizational permissions.
+
+---
+
+## The MIRROR_REMOTES Variable
+
+### Auto-Detection Mode
+
+When `MIRROR_REMOTES` is not set (the default), the hook auto-detects remotes:
+
+```bash
+MIRROR_REMOTES="${MIRROR_REMOTES:-}"
+
+if [[ -z "$MIRROR_REMOTES" ]]; then
+ MIRROR_REMOTES=$(git remote | grep -v '^origin$' || true)
+fi
+```
+
+This discovers all remotes except `origin`. The assumption is:
+- `origin` refers to the canonical repo itself (if configured)
+- Everything else is a mirror target
+
+### Explicit Remote List
+
+Override auto-detection by setting `MIRROR_REMOTES`:
+
+```bash
+export MIRROR_REMOTES="github gitlab"
+```
+
+This restricts mirroring to only the named remotes, ignoring others like `codeberg` or `sourceforge`. Useful for:
+- Temporarily disabling a problematic mirror
+- Phased rollouts of new mirrors
+- Testing a single mirror
+
+### Excluding Specific Remotes
+
+There is no built-in exclude mechanism, but you can achieve it by explicitly listing the desired remotes:
+
+```bash
+# Mirror to everything except sourceforge
+export MIRROR_REMOTES="github gitlab codeberg"
+```
+
+Alternatively, modify the auto-detection grep to exclude additional patterns:
+
+```bash
+MIRROR_REMOTES=$(git remote | grep -v -e '^origin$' -e '^sourceforge$' || true)
+```
+
+---
+
+## Git Config File Format
+
+The mirror remotes are stored in the bare repository's `config` file. The relevant sections look like:
+
+```ini
+[remote "origin"]
+ url = /srv/git/project-tick.git
+ fetch = +refs/*:refs/remotes/origin/*
+
+[remote "github"]
+ url = git@github.com:Project-Tick/Project-Tick.git
+ fetch = +refs/*:refs/remotes/github/*
+
+[remote "gitlab"]
+ url = git@gitlab.com:Project-Tick/Project-Tick.git
+ fetch = +refs/*:refs/remotes/gitlab/*
+
+[remote "codeberg"]
+ url = git@codeberg.org:Project-Tick/Project-Tick.git
+ fetch = +refs/*:refs/remotes/codeberg/*
+
+[remote "sourceforge"]
+ url = ssh://USERNAME@git.code.sf.net/p/project-tick/code
+ fetch = +refs/*:refs/remotes/sourceforge/*
+```
+
+You can edit this file directly instead of using `git remote add`:
+
+```bash
+vim /path/to/project-tick.git/config
+```
+
+However, `git remote add` is preferred because it ensures correct syntax and creates the appropriate fetch refspec.
+
+---
+
+## Multi-Repository Mirroring
+
+If Project-Tick manages multiple bare repositories (e.g., separate repos for subprojects), the same hook script can be deployed to each:
+
+```bash
+for repo in /srv/git/*.git; do
+ cp hooks/post-receive "$repo/hooks/post-receive"
+ chmod +x "$repo/hooks/post-receive"
+done
+```
+
+Each repository needs its own mirror remotes configured:
+
+```bash
+cd /srv/git/project-tick.git
+git remote add github git@github.com:Project-Tick/Project-Tick.git
+
+cd /srv/git/sub-project.git
+git remote add github git@github.com:Project-Tick/sub-project.git
+```
+
+The hook's auto-detection ensures each repository mirrors to its own set of remotes without shared configuration.
+
+---
+
+## Troubleshooting Mirror Issues
+
+### Remote URL Typo
+
+**Symptom**: `fatal: 'github' does not appear to be a git repository`
+
+**Fix**: Verify the remote URL:
+```bash
+git remote get-url github
+```
+
+### SSH Host Key Verification Failed
+
+**Symptom**: `Host key verification failed.`
+
+**Fix**: Add the host key to known_hosts:
+```bash
+ssh-keyscan github.com >> ~/.ssh/known_hosts
+ssh-keyscan gitlab.com >> ~/.ssh/known_hosts
+ssh-keyscan codeberg.org >> ~/.ssh/known_hosts
+ssh-keyscan git.code.sf.net >> ~/.ssh/known_hosts
+```
+
+### HTTPS Token Expired
+
+**Symptom**: `fatal: Authentication failed for 'https://...'`
+
+**Fix**: Update the token in the remote URL:
+```bash
+git remote set-url github https://x-access-token:NEW_TOKEN@github.com/Project-Tick/Project-Tick.git
+```
+
+### Force Push Rejected by Branch Protection
+
+**Symptom**: `! [remote rejected] main -> main (protected branch hook declined)`
+
+**Fix**: On the forge, either:
+1. Grant the mirror user admin/bypass permissions
+2. Add the mirror user/key to the force-push allowlist
+3. Disable branch protection for the mirror repository (if appropriate)
+
+### Push Size Limit Exceeded
+
+**Symptom**: `fatal: the remote end hung up unexpectedly` (during large pushes)
+
+**Fix**: Increase Git's buffer sizes:
+```bash
+git config http.postBuffer 524288000 # 500 MB
+```
+
+Or perform an initial manual push before enabling automated mirroring.
+
+### SSH Agent Not Available
+
+**Symptom**: `Permission denied (publickey).`
+
+**Fix**: Ensure the SSH key is loaded or use `IdentityFile` in SSH config:
+```bash
+ssh-add ~/.ssh/mirror_key
+# or configure ~/.ssh/config with IdentityFile
+```
+
+### Network Timeout
+
+**Symptom**: `fatal: unable to access '...': Failed to connect to ... port 443: Connection timed out`
+
+**Fix**: Check network connectivity, proxy settings, and firewall rules. Consider setting a git timeout:
+```bash
+git config http.lowSpeedLimit 1000
+git config http.lowSpeedTime 300
+```
diff --git a/docs/handbook/hooks/notification-system.md b/docs/handbook/hooks/notification-system.md
new file mode 100644
index 0000000000..a642d5343f
--- /dev/null
+++ b/docs/handbook/hooks/notification-system.md
@@ -0,0 +1,538 @@
+# Notification System
+
+## Table of Contents
+
+- [Introduction](#introduction)
+- [Notification Architecture](#notification-architecture)
+- [Email Notification Implementation](#email-notification-implementation)
+ - [Trigger Conditions](#trigger-conditions)
+ - [Prerequisite Check](#prerequisite-check)
+ - [Email Body Construction](#email-body-construction)
+ - [Subject Line Format](#subject-line-format)
+ - [Recipient Configuration](#recipient-configuration)
+- [The MIRROR_NOTIFY Variable](#the-mirror_notify-variable)
+ - [Enabling Notifications](#enabling-notifications)
+ - [Disabling Notifications](#disabling-notifications)
+ - [Multiple Recipients](#multiple-recipients)
+- [Email Body Format](#email-body-format)
+ - [Complete Email Example](#complete-email-example)
+ - [Field-by-Field Breakdown](#field-by-field-breakdown)
+- [Mail Command Integration](#mail-command-integration)
+ - [The mail Command](#the-mail-command)
+ - [Installing mail on Different Systems](#installing-mail-on-different-systems)
+ - [Mail Transfer Agent Configuration](#mail-transfer-agent-configuration)
+ - [Testing Email Delivery](#testing-email-delivery)
+- [Failure Scenarios and Edge Cases](#failure-scenarios-and-edge-cases)
+- [Extending the Notification System](#extending-the-notification-system)
+ - [Adding Webhook Notifications](#adding-webhook-notifications)
+ - [Adding Slack Integration](#adding-slack-integration)
+ - [Adding Discord Integration](#adding-discord-integration)
+ - [Adding Matrix Integration](#adding-matrix-integration)
+ - [Adding SMS Notifications](#adding-sms-notifications)
+- [Notification Suppression](#notification-suppression)
+- [Monitoring and Alerting Integration](#monitoring-and-alerting-integration)
+
+---
+
+## Introduction
+
+The Project-Tick `post-receive` hook (`hooks/post-receive`) includes an optional email notification system that alerts administrators when mirror push operations fail. The system is triggered only on failure and only when explicitly configured via the `MIRROR_NOTIFY` environment variable.
+
+The notification system follows two guiding principles:
+1. **Opt-in** — Notifications are disabled by default; no email is sent unless `MIRROR_NOTIFY` is set
+2. **Graceful degradation** — If the `mail` command is not available, the notification is silently skipped
+
+---
+
+## Notification Architecture
+
+The notification flow is:
+
+```
+Mirror push loop completes
+ │
+ ▼
+Any FAILED_REMOTES? ──No──► Skip notification
+ │
+ Yes
+ │
+ ▼
+MIRROR_NOTIFY set? ──No──► Skip notification
+ │
+ Yes
+ │
+ ▼
+mail command available? ──No──► Skip notification
+ │
+ Yes
+ │
+ ▼
+Construct email body from:
+ - FAILED_REMOTES[]
+ - $(pwd) ← repository path
+ - REFS[] ← updated refs
+ - $MIRROR_LOG ← log file path
+ │
+ ▼
+Send via: mail -s "[git-mirror] Push failure in <reponame>" "$MIRROR_NOTIFY"
+```
+
+Three gates must all pass before an email is sent:
+1. At least one remote must have failed
+2. `MIRROR_NOTIFY` must be set to a non-empty value
+3. The `mail` command must be present on the system
+
+---
+
+## Email Notification Implementation
+
+### Trigger Conditions
+
+```bash
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_NOTIFY:-}" ]]; then
+```
+
+This compound condition checks:
+
+| Expression | Test |
+|------------|------|
+| `${#FAILED_REMOTES[@]} -gt 0` | The `FAILED_REMOTES` array has at least one element |
+| `-n "${MIRROR_NOTIFY:-}"` | The `MIRROR_NOTIFY` variable is non-empty |
+
+The `${MIRROR_NOTIFY:-}` expansion with `:-` is critical under `set -u` — it prevents an "unbound variable" error if `MIRROR_NOTIFY` was never set. The `:-` substitutes an empty string for an unset variable, and then `-n` tests whether that string is non-empty.
+
+The `&&` short-circuit operator means the `MIRROR_NOTIFY` check is only evaluated if there are failures. This is functionally irrelevant (both must pass), but reads naturally: "if there are failures AND notifications are configured."
+
+### Prerequisite Check
+
+```bash
+if command -v mail &>/dev/null; then
+```
+
+Before attempting to send email, the script checks if the `mail` command exists:
+
+| Component | Purpose |
+|-----------|---------|
+| `command -v mail` | Looks up `mail` in PATH; prints its path if found, exits non-zero if not |
+| `&>/dev/null` | Suppresses both stdout and stderr (we only care about the exit code) |
+
+This is the POSIX-compliant way to check for command availability, preferred over:
+- `which mail` — not POSIX, may behave differently across systems
+- `type mail` — bash-specific, prints extra output
+- `hash mail` — bash-specific
+
+If `mail` is not found, the entire notification block is skipped silently — no error, no warning. This allows deploying the hook on systems without `mail` configured.
+
+### Email Body Construction
+
+```bash
+{
+ echo "Mirror push failed for the following remotes:"
+ printf ' - %s\n' "${FAILED_REMOTES[@]}"
+ echo ""
+ echo "Repository: $(pwd)"
+ echo "Refs updated:"
+ printf ' %s\n' "${REFS[@]}"
+ echo ""
+ echo "Check log: $MIRROR_LOG"
+} | mail -s "[git-mirror] Push failure in $(basename "$(pwd)")" "$MIRROR_NOTIFY"
+```
+
+The `{ ... }` command group constructs the email body as a multi-line string. This group acts as a single compound command whose combined stdout is piped to `mail`.
+
+**`printf ' - %s\n' "${FAILED_REMOTES[@]}"`** — Iterates over each element of the `FAILED_REMOTES` array, printing each as a bulleted list item. Using `printf` with an array is a bash idiom: the format string is applied to each argument in turn.
+
+For example, if `FAILED_REMOTES=(sourceforge codeberg)`, the output is:
+```
+ - sourceforge
+ - codeberg
+```
+
+**`$(pwd)`** — Expands to the current working directory. In a bare repository hook, this is the bare repository path (e.g., `/srv/git/project-tick.git`).
+
+**`printf ' %s\n' "${REFS[@]}"`** — Lists all refs that were updated in this push, providing context about what triggered the mirror.
+
+**`$MIRROR_LOG`** — Points the reader to the log file for detailed push output and error messages.
+
+### Subject Line Format
+
+```bash
+mail -s "[git-mirror] Push failure in $(basename "$(pwd)")" "$MIRROR_NOTIFY"
+```
+
+The subject line follows the pattern:
+
+```
+[git-mirror] Push failure in <repository-directory-name>
+```
+
+**`$(basename "$(pwd)")`** — Extracts just the directory name from the full path:
+- Input: `/srv/git/project-tick.git`
+- Output: `project-tick.git`
+
+The `[git-mirror]` prefix allows email filters to route or prioritize these notifications:
+
+```
+# Example email filter rule
+Subject contains "[git-mirror]" → Move to "Git Alerts" folder
+```
+
+### Recipient Configuration
+
+The recipient is specified by the `MIRROR_NOTIFY` environment variable, passed as the final argument to `mail`:
+
+```bash
+mail -s "subject" "$MIRROR_NOTIFY"
+```
+
+The variable is quoted (`"$MIRROR_NOTIFY"`) to handle email addresses that might contain special characters (though standard email addresses typically don't).
+
+---
+
+## The MIRROR_NOTIFY Variable
+
+### Enabling Notifications
+
+Set the variable in the environment of the process running the git daemon:
+
+```bash
+# In systemd service file
+Environment=MIRROR_NOTIFY=admin@project-tick.org
+
+# In shell profile
+export MIRROR_NOTIFY=admin@project-tick.org
+
+# In a wrapper script
+MIRROR_NOTIFY=admin@project-tick.org exec hooks/post-receive
+```
+
+### Disabling Notifications
+
+Notifications are disabled by default. To explicitly disable:
+
+```bash
+# Unset the variable
+unset MIRROR_NOTIFY
+
+# Or set to empty
+export MIRROR_NOTIFY=""
+```
+
+### Multiple Recipients
+
+The `mail` command typically supports multiple recipients as a comma-separated list:
+
+```bash
+export MIRROR_NOTIFY="admin@project-tick.org,ops@project-tick.org"
+```
+
+Or as space-separated arguments (behavior depends on the MTA):
+
+```bash
+export MIRROR_NOTIFY="admin@project-tick.org ops@project-tick.org"
+```
+
+For reliable multi-recipient support, modify the script to loop over recipients:
+
+```bash
+for addr in $MIRROR_NOTIFY; do
+ { ... } | mail -s "subject" "$addr"
+done
+```
+
+---
+
+## Email Body Format
+
+### Complete Email Example
+
+```
+From: git@server.project-tick.org
+To: admin@project-tick.org
+Subject: [git-mirror] Push failure in project-tick.git
+
+Mirror push failed for the following remotes:
+ - sourceforge
+ - codeberg
+
+Repository: /srv/git/project-tick.git
+Refs updated:
+ refs/heads/main
+ refs/tags/v2.1.0
+
+Check log: /var/log/git-mirror.log
+```
+
+### Field-by-Field Breakdown
+
+| Field | Source | Example |
+|-------|--------|---------|
+| Failed remotes list | `"${FAILED_REMOTES[@]}"` | `sourceforge`, `codeberg` |
+| Repository path | `$(pwd)` | `/srv/git/project-tick.git` |
+| Updated refs | `"${REFS[@]}"` | `refs/heads/main`, `refs/tags/v2.1.0` |
+| Log file path | `$MIRROR_LOG` | `/var/log/git-mirror.log` |
+| Subject repo name | `$(basename "$(pwd)")` | `project-tick.git` |
+
+The email body provides enough context for an administrator to:
+1. Identify which mirrors are out of sync (failed remotes)
+2. Locate the repository to investigate (repository path)
+3. Understand what changed (updated refs)
+4. Access detailed error output (log file path)
+
+---
+
+## Mail Command Integration
+
+### The mail Command
+
+The hook uses the `mail` command (also known as `mailx`), a standard Unix mail user agent. It reads the message body from stdin and sends it to the specified recipient via the system's mail transfer agent (MTA).
+
+```bash
+echo "body" | mail -s "subject" recipient@example.com
+```
+
+### Installing mail on Different Systems
+
+| System | Package | Command |
+|--------|---------|---------|
+| Debian/Ubuntu | `sudo apt install mailutils` | `mail` |
+| RHEL/CentOS | `sudo yum install mailx` | `mail` |
+| Fedora | `sudo dnf install mailx` | `mail` |
+| Arch Linux | `sudo pacman -S s-nail` | `mail` |
+| Alpine | `apk add mailx` | `mail` |
+| NixOS | `nix-env -iA nixpkgs.mailutils` | `mail` |
+| macOS | Pre-installed (or `brew install mailutils`) | `mail` |
+
+### Mail Transfer Agent Configuration
+
+The `mail` command hands off the message to a local MTA. Common MTAs include:
+
+| MTA | Package | Use Case |
+|-----|---------|----------|
+| Postfix | `postfix` | Full-featured, most common |
+| Exim | `exim4` | Flexible, Debian default |
+| msmtp | `msmtp` | Lightweight relay to external SMTP |
+| ssmtp | `ssmtp` | Minimal relay (deprecated) |
+| OpenSMTPD | `opensmtpd` | Simple, secure |
+
+For a server that only needs to send outbound email (no receiving), `msmtp` is the simplest option:
+
+```bash
+# /etc/msmtprc
+account default
+host smtp.example.com
+port 587
+auth on
+user notifications@project-tick.org
+password APP_PASSWORD
+tls on
+from git-mirror@project-tick.org
+```
+
+### Testing Email Delivery
+
+```bash
+# Test basic mail delivery
+echo "Test message from git-mirror" | mail -s "Test" admin@project-tick.org
+
+# Check mail queue
+mailq
+
+# Check mail log
+sudo tail /var/log/mail.log
+```
+
+---
+
+## Failure Scenarios and Edge Cases
+
+| Scenario | Behavior | User Impact |
+|----------|----------|-------------|
+| `MIRROR_NOTIFY` not set | Notification block skipped entirely | None |
+| `MIRROR_NOTIFY` set to empty string | `-n` test fails; notification skipped | None |
+| `mail` command not found | `command -v mail` fails; notification skipped | None |
+| MTA not configured | `mail` command may succeed but message is undeliverable | Email queued or bounced locally |
+| MTA fails to send | `mail` exits non-zero; under `set -e`... | See note below |
+| Invalid email address | MTA accepts the message but it bounces later | Bounce email to local mailbox |
+| All remotes succeed | `${#FAILED_REMOTES[@]} -gt 0` is false; notification skipped | None — no false alerts |
+| REFS array is empty | `printf` prints nothing for refs section | Email sent with empty refs list |
+
+**Note on `set -e` and `mail` failure**: The `mail` command is inside an `if` block (the `if command -v mail` block), which shields it from `set -e`. However, if `mail` itself fails, the pipeline `{ ... } | mail ...` would fail. Under `pipefail`, this could cause the `if` block's body to fail. In practice, `mail` commands rarely fail immediately — they queue messages locally even if delivery fails.
+
+---
+
+## Extending the Notification System
+
+### Adding Webhook Notifications
+
+To send notifications via a generic webhook (e.g., for monitoring tools):
+
+```bash
+# Add after the mail block:
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_WEBHOOK:-}" ]]; then
+ if command -v curl &>/dev/null; then
+ curl -sf -X POST \
+ -H "Content-Type: application/json" \
+ -d "{
+ \"event\": \"mirror_failure\",
+ \"repository\": \"$(basename "$(pwd)")\",
+ \"failed_remotes\": [$(printf '\"%s\",' "${FAILED_REMOTES[@]}" | sed 's/,$//')]
+ }" \
+ "$MIRROR_WEBHOOK" 2>/dev/null || true
+ fi
+fi
+```
+
+Configure with: `export MIRROR_WEBHOOK="https://monitoring.example.com/hooks/git-mirror"`
+
+### Adding Slack Integration
+
+```bash
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_SLACK_WEBHOOK:-}" ]]; then
+ if command -v curl &>/dev/null; then
+ REMOTE_LIST=$(printf '• %s\\n' "${FAILED_REMOTES[@]}")
+ REF_LIST=$(printf '• %s\\n' "${REFS[@]}")
+ curl -sf -X POST \
+ -H "Content-Type: application/json" \
+ -d "{
+ \"text\": \":x: *Mirror push failed* in \`$(basename "$(pwd)")\`\",
+ \"blocks\": [
+ {
+ \"type\": \"section\",
+ \"text\": {
+ \"type\": \"mrkdwn\",
+ \"text\": \":x: *Mirror push failed* in \`$(basename "$(pwd)")\`\\n\\n*Failed remotes:*\\n${REMOTE_LIST}\\n\\n*Refs updated:*\\n${REF_LIST}\"
+ }
+ }
+ ]
+ }" \
+ "$MIRROR_SLACK_WEBHOOK" 2>/dev/null || true
+ fi
+fi
+```
+
+Configure with: `export MIRROR_SLACK_WEBHOOK="https://hooks.slack.com/services/T.../B.../xxx"`
+
+### Adding Discord Integration
+
+```bash
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_DISCORD_WEBHOOK:-}" ]]; then
+ if command -v curl &>/dev/null; then
+ REMOTE_LIST=$(printf '- %s\n' "${FAILED_REMOTES[@]}")
+ curl -sf -X POST \
+ -H "Content-Type: application/json" \
+ -d "{
+ \"content\": \"**Mirror push failed** in \`$(basename "$(pwd)")\`\\n\\nFailed remotes:\\n${REMOTE_LIST}\\n\\nCheck log: ${MIRROR_LOG}\"
+ }" \
+ "$MIRROR_DISCORD_WEBHOOK" 2>/dev/null || true
+ fi
+fi
+```
+
+Configure with: `export MIRROR_DISCORD_WEBHOOK="https://discord.com/api/webhooks/xxx/yyy"`
+
+### Adding Matrix Integration
+
+```bash
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_MATRIX_WEBHOOK:-}" ]]; then
+ if command -v curl &>/dev/null; then
+ REMOTE_LIST=$(printf '- %s\n' "${FAILED_REMOTES[@]}")
+ MSG="Mirror push failed in $(basename "$(pwd)")\n\nFailed remotes:\n${REMOTE_LIST}"
+ curl -sf -X POST \
+ -H "Content-Type: application/json" \
+ -d "{
+ \"msgtype\": \"m.text\",
+ \"body\": \"${MSG}\"
+ }" \
+ "$MIRROR_MATRIX_WEBHOOK" 2>/dev/null || true
+ fi
+fi
+```
+
+### Adding SMS Notifications
+
+Using Twilio as an example:
+
+```bash
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_TWILIO_SID:-}" ]]; then
+ if command -v curl &>/dev/null; then
+ REMOTE_LIST=$(printf '%s, ' "${FAILED_REMOTES[@]}" | sed 's/, $//')
+ curl -sf -X POST \
+ "https://api.twilio.com/2010-04-01/Accounts/${MIRROR_TWILIO_SID}/Messages.json" \
+ -u "${MIRROR_TWILIO_SID}:${MIRROR_TWILIO_TOKEN}" \
+ -d "From=${MIRROR_TWILIO_FROM}" \
+ -d "To=${MIRROR_TWILIO_TO}" \
+ -d "Body=Mirror push failed in $(basename "$(pwd)"). Failed: ${REMOTE_LIST}" \
+ 2>/dev/null || true
+ fi
+fi
+```
+
+---
+
+## Notification Suppression
+
+To temporarily suppress notifications without removing the `MIRROR_NOTIFY` configuration:
+
+```bash
+# Method 1: Unset for a single invocation
+unset MIRROR_NOTIFY
+echo "..." | hooks/post-receive
+
+# Method 2: Override with empty string
+MIRROR_NOTIFY="" hooks/post-receive
+
+# Method 3: Remove notification config from systemd
+# Edit the service file and remove the MIRROR_NOTIFY line
+sudo systemctl edit git-daemon
+```
+
+The hook's design ensures notifications are never sent unless explicitly enabled, so the default state is already "suppressed."
+
+---
+
+## Monitoring and Alerting Integration
+
+For production deployments, the notification system can be integrated with monitoring platforms:
+
+### Prometheus + Alertmanager
+
+Expose mirror status as a Prometheus metric by writing to a textfile collector:
+
+```bash
+# Add to the end of the hook:
+METRICS_DIR="/var/lib/prometheus/node-exporter"
+if [[ -d "$METRICS_DIR" ]]; then
+ cat > "$METRICS_DIR/git_mirror.prom" <<EOF
+# HELP git_mirror_last_run_timestamp_seconds Unix timestamp of the last mirror run
+# TYPE git_mirror_last_run_timestamp_seconds gauge
+git_mirror_last_run_timestamp_seconds $(date +%s)
+# HELP git_mirror_failed_remotes_total Number of remotes that failed in the last run
+# TYPE git_mirror_failed_remotes_total gauge
+git_mirror_failed_remotes_total ${#FAILED_REMOTES[@]}
+# HELP git_mirror_succeeded_remotes_total Number of remotes that succeeded
+# TYPE git_mirror_succeeded_remotes_total gauge
+git_mirror_succeeded_remotes_total ${#SUCCEEDED_REMOTES[@]}
+EOF
+fi
+```
+
+### Healthcheck Pings
+
+Integrate with uptime monitoring services:
+
+```bash
+# Ping a healthcheck endpoint on success
+if [[ ${#FAILED_REMOTES[@]} -eq 0 && -n "${MIRROR_HEALTHCHECK_URL:-}" ]]; then
+ curl -sf "$MIRROR_HEALTHCHECK_URL" 2>/dev/null || true
+fi
+
+# Signal failure
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_HEALTHCHECK_URL:-}" ]]; then
+ curl -sf "${MIRROR_HEALTHCHECK_URL}/fail" 2>/dev/null || true
+fi
+```
+
+Configure with: `export MIRROR_HEALTHCHECK_URL="https://hc-ping.com/uuid-here"`
+
+This allows dead-man's-switch monitoring — if no push occurs within the expected interval, the monitoring service alerts.
diff --git a/docs/handbook/hooks/overview.md b/docs/handbook/hooks/overview.md
new file mode 100644
index 0000000000..510d0a68b6
--- /dev/null
+++ b/docs/handbook/hooks/overview.md
@@ -0,0 +1,712 @@
+# Hooks — Overview
+
+## Table of Contents
+
+- [Introduction](#introduction)
+- [What Are Git Hooks?](#what-are-git-hooks)
+- [Hook Types in Git](#hook-types-in-git)
+ - [Client-Side Hooks](#client-side-hooks)
+ - [Server-Side Hooks](#server-side-hooks)
+- [Project-Tick Hook Architecture](#project-tick-hook-architecture)
+- [The post-receive Hook](#the-post-receive-hook)
+ - [Purpose and Design Goals](#purpose-and-design-goals)
+ - [Script Anatomy](#script-anatomy)
+ - [Configuration Block](#configuration-block)
+ - [Remote Auto-Detection](#remote-auto-detection)
+ - [Logging Subsystem](#logging-subsystem)
+ - [Main Execution Loop](#main-execution-loop)
+ - [Push Strategy](#push-strategy)
+ - [Result Tracking](#result-tracking)
+ - [Failure Notification](#failure-notification)
+ - [Exit Behavior](#exit-behavior)
+- [Supported Forge Targets](#supported-forge-targets)
+ - [GitHub](#github)
+ - [GitLab](#gitlab)
+ - [Codeberg](#codeberg)
+ - [SourceForge](#sourceforge)
+- [Authentication Methods](#authentication-methods)
+ - [SSH Key Authentication](#ssh-key-authentication)
+ - [HTTPS Token Authentication](#https-token-authentication)
+- [Environment Variables](#environment-variables)
+- [Installation Guide](#installation-guide)
+- [Directory Layout](#directory-layout)
+- [Operational Flow Diagram](#operational-flow-diagram)
+- [Interaction with Other Project-Tick Components](#interaction-with-other-project-tick-components)
+- [Troubleshooting Common Issues](#troubleshooting-common-issues)
+- [Security Considerations](#security-considerations)
+- [Related Documentation](#related-documentation)
+
+---
+
+## Introduction
+
+The `hooks/` directory in the Project-Tick monorepo contains Git hook scripts that automate repository management tasks. These hooks are designed to run on the bare repository that serves as the canonical upstream source for the Project-Tick project.
+
+The hooks system currently consists of a single, well-structured script:
+
+| File | Type | Purpose |
+|------|------|---------|
+| `hooks/post-receive` | Bash script | Mirror pushes to multiple forge platforms |
+
+This document provides a comprehensive explanation of how the hooks system works, how Git hooks function in general, and how the Project-Tick hook integrates with the broader project infrastructure.
+
+---
+
+## What Are Git Hooks?
+
+Git hooks are executable scripts that Git runs automatically at specific points in the version control workflow. They reside in the `.git/hooks/` directory of a repository (or the `hooks/` directory of a bare repository). Git ships with sample hook scripts (with `.sample` extensions) that are inactive by default.
+
+Hooks serve as extension points for automating tasks such as:
+
+- Enforcing commit message conventions
+- Running linters or tests before accepting commits
+- Triggering CI/CD pipelines after pushes
+- Synchronizing mirrors to external platforms
+- Sending notifications on repository events
+
+A hook is activated by placing an executable file with the correct name (no extension) in the hooks directory. Git invokes the hook at the corresponding event and passes relevant data via standard input or command-line arguments.
+
+---
+
+## Hook Types in Git
+
+### Client-Side Hooks
+
+Client-side hooks run on the developer's local machine during operations like committing, merging, and rebasing:
+
+| Hook | Trigger | Use Case |
+|------|---------|----------|
+| `pre-commit` | Before a commit is created | Lint source files, check formatting |
+| `prepare-commit-msg` | After default message generated | Auto-populate commit templates |
+| `commit-msg` | After user enters message | Validate commit message format |
+| `post-commit` | After commit completes | Post-commit notifications |
+| `pre-rebase` | Before rebase starts | Prevent rebasing published branches |
+| `post-merge` | After a merge completes | Restore tracked file permissions |
+| `pre-push` | Before push to remote | Run tests before sharing code |
+| `post-checkout` | After `git checkout` | Set up working directory environment |
+
+### Server-Side Hooks
+
+Server-side hooks run on the repository that receives pushes. These are the hooks relevant to the Project-Tick infrastructure:
+
+| Hook | Trigger | Input | Use Case |
+|------|---------|-------|----------|
+| `pre-receive` | Before any refs updated | `<old-sha> <new-sha> <refname>` per line on stdin | Reject pushes that violate policies |
+| `update` | Per-ref, before each ref updated | `<refname> <old-sha> <new-sha>` as arguments | Per-branch access control |
+| `post-receive` | After all refs updated | `<old-sha> <new-sha> <refname>` per line on stdin | Trigger CI, mirrors, notifications |
+| `post-update` | After refs updated | Updated ref names as arguments | Update `info/refs` for dumb HTTP |
+
+The **post-receive** hook is the one used by Project-Tick. It fires after all refs have been successfully updated, making it the ideal place for mirror synchronization — the push to the canonical repo has already succeeded, so mirroring can proceed without blocking the original pusher.
+
+---
+
+## Project-Tick Hook Architecture
+
+The Project-Tick hooks system follows a minimal, single-script architecture:
+
+```
+hooks/
+└── post-receive # The only hook script — handles multi-forge mirroring
+```
+
+The script is stored in the monorepo source tree at `hooks/post-receive` and is deployed to the bare repository at the path:
+
+```
+/path/to/project-tick.git/hooks/post-receive
+```
+
+### Design Principles
+
+1. **Single responsibility** — The script does exactly one thing: mirror pushes to configured forge remotes.
+2. **Fail-safe defaults** — If no mirror remotes are configured, the script exits silently without error.
+3. **Comprehensive logging** — Every action is logged with UTC timestamps.
+4. **Non-blocking on partial failure** — If one remote fails, the script continues pushing to the remaining remotes.
+5. **Notification support** — Optional email alerts on failure.
+6. **Zero external dependencies** — Uses only bash builtins, `git`, `date`, `tee`, and optionally `mail`.
+
+---
+
+## The post-receive Hook
+
+### Purpose and Design Goals
+
+The `post-receive` script in `hooks/post-receive` serves as a multi-forge mirror synchronization tool. When a push lands on the canonical bare repository, this hook automatically replicates all refs (branches, tags, notes) to every configured mirror remote.
+
+The opening comment block documents this purpose:
+
+```bash
+# ==============================================================================
+# post-receive hook — Mirror push to multiple forges
+# ==============================================================================
+```
+
+### Script Anatomy
+
+The script is structured into four clearly delineated sections:
+
+1. **Header block** (lines 1–33) — Shebang, documentation comments, and usage instructions
+2. **Configuration block** (lines 35–53) — Variable initialization and remote auto-detection
+3. **Logging function** (lines 55–62) — The `log()` helper
+4. **Main execution** (lines 64–112) — Ref reading, push loop, summary, notification, exit
+
+The script begins with strict error handling:
+
+```bash
+#!/usr/bin/env bash
+set -euo pipefail
+```
+
+The `set -euo pipefail` line enables three safety nets:
+
+| Flag | Effect |
+|------|--------|
+| `-e` | Exit immediately if any command fails |
+| `-u` | Treat unset variables as errors |
+| `-o pipefail` | A pipeline fails if any component fails, not just the last command |
+
+### Configuration Block
+
+The configuration block initializes three environment-controlled variables:
+
+```bash
+MIRROR_REMOTES="${MIRROR_REMOTES:-}"
+MIRROR_LOG="${MIRROR_LOG:-/var/log/git-mirror.log}"
+```
+
+| Variable | Default | Purpose |
+|----------|---------|---------|
+| `MIRROR_REMOTES` | `""` (empty — triggers auto-detection) | Space-separated list of git remote names |
+| `MIRROR_LOG` | `/var/log/git-mirror.log` | Path to the log file |
+| `MIRROR_NOTIFY` | `""` (unset — notifications disabled) | Email address for failure alerts |
+
+The `${VAR:-default}` syntax provides defaults while allowing environment variable overrides. This means an administrator can control behavior without modifying the script:
+
+```bash
+MIRROR_REMOTES="github gitlab" MIRROR_LOG=/tmp/mirror.log ./hooks/post-receive
+```
+
+### Remote Auto-Detection
+
+If `MIRROR_REMOTES` is empty (the default), the script auto-detects mirror targets:
+
+```bash
+if [[ -z "$MIRROR_REMOTES" ]]; then
+ MIRROR_REMOTES=$(git remote | grep -v '^origin$' || true)
+fi
+```
+
+This runs `git remote` to list all configured remotes, then filters out `origin` with `grep -v '^origin$'`. The `|| true` suffix prevents `set -e` from terminating the script if `grep` finds no matches (which would produce exit code 1).
+
+The rationale: `origin` typically points to the canonical repository itself. Everything else is assumed to be a mirror target. This convention allows adding new mirrors simply by running:
+
+```bash
+git remote add <name> <url>
+```
+
+If after auto-detection the list is still empty, the script exits cleanly:
+
+```bash
+if [[ -z "$MIRROR_REMOTES" ]]; then
+ echo "[mirror] No mirror remotes configured. Skipping." >&2
+ exit 0
+fi
+```
+
+This is a **non-error exit** (`exit 0`) because having no mirrors is a valid configuration — the hook should not cause the push to appear to have failed.
+
+### Logging Subsystem
+
+The `log()` function provides timestamped logging to both stdout and a persistent log file:
+
+```bash
+log() {
+ local timestamp
+ timestamp="$(date -u '+%Y-%m-%d %H:%M:%S UTC')"
+ echo "[$timestamp] $*" | tee -a "$MIRROR_LOG" 2>/dev/null || echo "[$timestamp] $*"
+}
+```
+
+Key characteristics:
+
+- **UTC timestamps** — `date -u` ensures consistent timestamps regardless of server timezone.
+- **Format** — `[2026-04-05 14:30:00 UTC] message` — ISO 8601 date with human-readable time.
+- **Dual output** — `tee -a` appends to `$MIRROR_LOG` while also writing to stdout.
+- **Graceful fallback** — If the log file is not writable (permissions, missing directory), `2>/dev/null` suppresses the `tee` error, and the `||` fallback ensures the message still appears on stdout.
+- **`local` variable** — The `timestamp` variable is scoped to the function to avoid polluting the global namespace.
+
+### Main Execution Loop
+
+The main section begins by reading the ref update data from stdin:
+
+```bash
+log "=== Mirror push triggered ==="
+
+REFS=()
+while read -r oldrev newrev refname; do
+ REFS+=("$refname")
+ log " ref: $refname ($oldrev -> $newrev)"
+done
+```
+
+Git's `post-receive` hook receives one line per updated ref on stdin, formatted as:
+
+```
+<old-sha1> <new-sha1> <refname>
+```
+
+For example:
+```
+abc123 def456 refs/heads/main
+000000 789abc refs/tags/v1.0.0
+```
+
+The `read -r` flag prevents backslash interpretation. Each ref name is accumulated in the `REFS` bash array for later use in notifications.
+
+### Push Strategy
+
+For each mirror remote, the script performs a `--mirror --force` push:
+
+```bash
+for remote in $MIRROR_REMOTES; do
+ log "Pushing to remote: $remote"
+
+ if git push --mirror --force "$remote" 2>&1 | tee -a "$MIRROR_LOG" 2>/dev/null; then
+ SUCCEEDED_REMOTES+=("$remote")
+ log " ✓ Successfully pushed to $remote"
+ else
+ FAILED_REMOTES+=("$remote")
+ log " ✗ FAILED to push to $remote"
+ fi
+done
+```
+
+The `git push` flags are critical:
+
+| Flag | Effect |
+|------|--------|
+| `--mirror` | Push all refs under `refs/` — branches, tags, notes, replace refs, everything. Also deletes remote refs that no longer exist locally. |
+| `--force` | Force-update refs that have diverged. Ensures the mirror is an exact copy. |
+
+The `2>&1` redirects stderr to stdout so both success and error messages are captured by `tee`. The `if` statement checks the exit code of the entire pipeline — if `git push` fails (non-zero exit), the remote is added to `FAILED_REMOTES`.
+
+**Important**: The loop does **not** use `set -e` behavior for individual pushes because the `if` statement captures the exit code rather than triggering an immediate exit. This ensures all remotes are attempted even if some fail.
+
+### Result Tracking
+
+Two arrays track the outcome:
+
+```bash
+FAILED_REMOTES=()
+SUCCEEDED_REMOTES=()
+```
+
+After the loop, a summary is logged:
+
+```bash
+log "--- Summary ---"
+log " Succeeded: ${SUCCEEDED_REMOTES[*]:-none}"
+log " Failed: ${FAILED_REMOTES[*]:-none}"
+```
+
+The `${array[*]:-none}` syntax expands all array elements separated by spaces, or prints "none" if the array is empty.
+
+### Failure Notification
+
+When mirrors fail and `MIRROR_NOTIFY` is set, the script sends an email:
+
+```bash
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_NOTIFY:-}" ]]; then
+ if command -v mail &>/dev/null; then
+ {
+ echo "Mirror push failed for the following remotes:"
+ printf ' - %s\n' "${FAILED_REMOTES[@]}"
+ echo ""
+ echo "Repository: $(pwd)"
+ echo "Refs updated:"
+ printf ' %s\n' "${REFS[@]}"
+ echo ""
+ echo "Check log: $MIRROR_LOG"
+ } | mail -s "[git-mirror] Push failure in $(basename "$(pwd)")" "$MIRROR_NOTIFY"
+ fi
+fi
+```
+
+The notification includes:
+
+- Which remotes failed (`FAILED_REMOTES`)
+- The repository path (`$(pwd)`)
+- Which refs were updated (`REFS`)
+- Where to find detailed logs (`$MIRROR_LOG`)
+
+The subject line uses the repository directory name: `[git-mirror] Push failure in project-tick.git`.
+
+The `command -v mail &>/dev/null` check ensures the script doesn't crash if `mail` is not installed — it simply skips notification silently.
+
+### Exit Behavior
+
+```bash
+if [[ ${#FAILED_REMOTES[@]} -gt 0 ]]; then
+ log "=== Finished with errors ==="
+ exit 1
+fi
+
+log "=== Finished successfully ==="
+exit 0
+```
+
+| Condition | Exit Code | Meaning |
+|-----------|-----------|---------|
+| All remotes succeeded | `0` | Success — the pusher sees no error |
+| One or more remotes failed | `1` | Failure — the pusher sees an error message |
+| No remotes configured | `0` | No-op — silent success |
+
+**Note**: A non-zero exit from `post-receive` does **not** reject the push (the refs are already updated). It only causes Git to display the hook's output as an error to the pusher. This alerts the developer that mirroring failed without rolling back their work.
+
+---
+
+## Supported Forge Targets
+
+The script header documents four forge platforms with example remote URLs:
+
+### GitHub
+
+```bash
+# SSH
+git remote add github git@github.com:Project-Tick/Project-Tick.git
+
+# HTTPS with token
+git remote add github https://x-access-token:TOKEN@github.com/Project-Tick/Project-Tick.git
+```
+
+GitHub uses `x-access-token` as the username for personal access tokens and GitHub App installation tokens.
+
+### GitLab
+
+```bash
+# SSH
+git remote add gitlab git@gitlab.com:Project-Tick/Project-Tick.git
+
+# HTTPS with token
+git remote add gitlab https://oauth2:TOKEN@gitlab.com/Project-Tick/Project-Tick.git
+```
+
+GitLab uses `oauth2` as the username for personal access tokens with HTTPS.
+
+### Codeberg
+
+```bash
+# SSH
+git remote add codeberg git@codeberg.org:Project-Tick/Project-Tick.git
+
+# HTTPS with token
+git remote add codeberg https://TOKEN@codeberg.org/Project-Tick/Project-Tick.git
+```
+
+Codeberg (Gitea-based) accepts the token directly as the username with no password.
+
+### SourceForge
+
+```bash
+# SSH only
+git remote add sourceforge ssh://USERNAME@git.code.sf.net/p/project-tick/code
+```
+
+SourceForge uses a non-standard SSH URL format with a username prefix and a project-specific path structure.
+
+---
+
+## Authentication Methods
+
+### SSH Key Authentication
+
+SSH-based authentication requires:
+
+1. An SSH keypair accessible to the user running the Git daemon
+2. The public key registered on each forge platform
+3. Correct SSH host key verification (or entries in `~/.ssh/known_hosts`)
+
+For automated server-side usage, a dedicated deploy key is recommended:
+
+```bash
+# Generate a dedicated mirror key
+ssh-keygen -t ed25519 -f ~/.ssh/mirror_key -N ""
+
+# Configure SSH to use it for each host
+cat >> ~/.ssh/config <<EOF
+Host github.com
+ IdentityFile ~/.ssh/mirror_key
+Host gitlab.com
+ IdentityFile ~/.ssh/mirror_key
+Host codeberg.org
+ IdentityFile ~/.ssh/mirror_key
+EOF
+```
+
+### HTTPS Token Authentication
+
+HTTPS authentication embeds the token in the remote URL. The token format varies by forge:
+
+| Forge | URL Format | Token Type |
+|-------|------------|------------|
+| GitHub | `https://x-access-token:TOKEN@github.com/...` | Personal Access Token or App Installation Token |
+| GitLab | `https://oauth2:TOKEN@gitlab.com/...` | Personal Access Token |
+| Codeberg | `https://TOKEN@codeberg.org/...` | Application Token |
+
+**Security warning**: Tokens embedded in remote URLs are stored in the Git config file of the bare repository. Ensure the repository directory has restrictive permissions (`chmod 700`).
+
+---
+
+## Environment Variables
+
+The script supports three environment variables for runtime configuration:
+
+### `MIRROR_REMOTES`
+
+```bash
+MIRROR_REMOTES="${MIRROR_REMOTES:-}"
+```
+
+- **Type**: Space-separated string of git remote names
+- **Default**: Empty (triggers auto-detection of all non-`origin` remotes)
+- **Example**: `MIRROR_REMOTES="github gitlab codeberg"`
+- **Use case**: Restrict mirroring to specific remotes, e.g., push to GitHub and GitLab but skip Codeberg temporarily
+
+### `MIRROR_LOG`
+
+```bash
+MIRROR_LOG="${MIRROR_LOG:-/var/log/git-mirror.log}"
+```
+
+- **Type**: Filesystem path
+- **Default**: `/var/log/git-mirror.log`
+- **Example**: `MIRROR_LOG=/var/log/project-tick/mirror.log`
+- **Requirements**: The directory must exist and be writable by the user running the hook. If not writable, the script falls back to stdout-only logging.
+
+### `MIRROR_NOTIFY`
+
+```bash
+"${MIRROR_NOTIFY:-}"
+```
+
+- **Type**: Email address string
+- **Default**: Empty (notifications disabled)
+- **Example**: `MIRROR_NOTIFY=admin@project-tick.org`
+- **Requirements**: The `mail` command must be available on the system. If `mail` is not installed, the notification is silently skipped.
+
+---
+
+## Installation Guide
+
+### Step 1: Locate the Bare Repository
+
+```bash
+# The bare repository is typically at:
+cd /srv/git/project-tick.git
+# or
+cd /var/lib/gitolite/repositories/project-tick.git
+```
+
+### Step 2: Copy the Hook Script
+
+```bash
+cp /path/to/Project-Tick/hooks/post-receive hooks/post-receive
+chmod +x hooks/post-receive
+```
+
+### Step 3: Configure Mirror Remotes
+
+```bash
+git remote add github git@github.com:Project-Tick/Project-Tick.git
+git remote add gitlab git@gitlab.com:Project-Tick/Project-Tick.git
+git remote add codeberg git@codeberg.org:Project-Tick/Project-Tick.git
+```
+
+### Step 4: Verify Remote Configuration
+
+```bash
+git remote -v
+# github git@github.com:Project-Tick/Project-Tick.git (push)
+# gitlab git@gitlab.com:Project-Tick/Project-Tick.git (push)
+# codeberg git@codeberg.org:Project-Tick/Project-Tick.git (push)
+# origin (local bare repo — no push URL)
+```
+
+### Step 5: Set Up Logging
+
+```bash
+sudo mkdir -p /var/log/
+sudo touch /var/log/git-mirror.log
+sudo chown git:git /var/log/git-mirror.log
+```
+
+### Step 6: (Optional) Configure Notifications
+
+```bash
+# Set in the shell environment of the user running the git daemon
+export MIRROR_NOTIFY="admin@project-tick.org"
+```
+
+### Step 7: Test the Hook
+
+```bash
+echo "0000000000000000000000000000000000000000 $(git rev-parse HEAD) refs/heads/main" | hooks/post-receive
+```
+
+---
+
+## Directory Layout
+
+```
+Project-Tick/
+├── hooks/
+│ └── post-receive # The mirror hook script (source copy)
+│
+├── docs/handbook/hooks/
+│ ├── overview.md # This document
+│ ├── post-receive-hook.md # Deep-dive into the post-receive script
+│ ├── mirror-configuration.md # Mirror setup and forge configuration
+│ ├── logging-system.md # Logging internals
+│ └── notification-system.md # Failure notification system
+│
+└── /path/to/project-tick.git/ # Deployed bare repository
+ └── hooks/
+ └── post-receive # Deployed copy (executable)
+```
+
+---
+
+## Operational Flow Diagram
+
+```
+Developer pushes to canonical repo
+ │
+ ▼
+ Git updates refs in bare repo
+ │
+ ▼
+ post-receive hook is invoked
+ │
+ ▼
+ Read stdin: old-sha, new-sha, refname
+ │
+ ▼
+ Auto-detect mirror remotes
+ (all remotes except "origin")
+ │
+ ├── No remotes? ──► exit 0 (silent)
+ │
+ ▼
+ For each remote:
+ git push --mirror --force $remote
+ │
+ ├── Success ──► add to SUCCEEDED_REMOTES
+ │
+ └── Failure ──► add to FAILED_REMOTES
+ │
+ ▼
+ MIRROR_NOTIFY set?
+ │
+ ├── Yes + mail available ──► send email
+ │
+ └── No ──► skip
+ │
+ ▼
+ Log summary
+ │
+ ├── Any failures? ──► exit 1
+ │
+ └── All ok? ──► exit 0
+```
+
+---
+
+## Interaction with Other Project-Tick Components
+
+### cgit Integration
+
+The Project-Tick monorepo includes `cgit/` — a web frontend for Git repositories. The `post-receive` mirroring hook complements cgit by ensuring that the repositories displayed on the cgit web interface are kept in sync across multiple forges.
+
+The `cgit/contrib/hooks/post-receive.agefile` hook (a separate, cgit-specific hook) updates the `info/web/last-modified` file for cgit's cache invalidation. In a multi-hook setup, both hooks can be combined using a wrapper script.
+
+### lefthook Integration
+
+The `lefthook.yml` at the repository root configures client-side hooks for the development workflow. This is complementary to the server-side `post-receive` hook — lefthook manages pre-commit and pre-push checks locally, while `post-receive` manages post-push mirroring on the server.
+
+### CI Pipeline
+
+The `ci/` directory contains CI configuration. The mirror hook runs independently of CI — it triggers on the bare repository while CI typically triggers on the forge platforms that receive the mirrored pushes.
+
+---
+
+## Troubleshooting Common Issues
+
+### Hook Not Executing
+
+```bash
+# Check permissions
+ls -la hooks/post-receive
+# Must show: -rwxr-xr-x or similar with execute bit
+
+# Fix permissions
+chmod +x hooks/post-receive
+```
+
+### "No mirror remotes configured"
+
+```bash
+# Verify remotes exist
+git remote -v
+
+# If empty, add remotes:
+git remote add github git@github.com:Project-Tick/Project-Tick.git
+```
+
+### SSH Authentication Failures
+
+```bash
+# Test SSH connectivity
+ssh -T git@github.com
+ssh -T git@gitlab.com
+ssh -T git@codeberg.org
+
+# Check SSH agent
+ssh-add -l
+```
+
+### Log File Not Writable
+
+```bash
+# Check permissions
+ls -la /var/log/git-mirror.log
+
+# Create with correct ownership
+sudo touch /var/log/git-mirror.log
+sudo chown $(whoami) /var/log/git-mirror.log
+```
+
+### Push Rejected by Remote
+
+```bash
+# Check if the remote repository exists
+# Check if the token/key has push permissions
+# Check if branch protection rules block --force pushes
+```
+
+---
+
+## Security Considerations
+
+1. **Token storage** — HTTPS tokens embedded in remote URLs are stored in plain text in the git config. Restrict access to the bare repository directory.
+2. **SSH keys** — Use dedicated deploy keys with minimal permissions (push-only, no admin).
+3. **Log file contents** — The log file may contain ref names and remote names but should not contain credentials. However, restrict access to logs as ref names may be sensitive.
+4. **`set -euo pipefail`** — The strict bash mode prevents silent failures and unset variable references that could lead to unexpected behavior.
+5. **`--force` flag** — The `--force` flag overwrites remote refs unconditionally. This is intentional for mirroring but means the canonical repo must be protected against unauthorized pushes.
+
+---
+
+## Related Documentation
+
+- [post-receive-hook.md](post-receive-hook.md) — Line-by-line analysis of the post-receive script
+- [mirror-configuration.md](mirror-configuration.md) — Detailed mirror remote setup guide
+- [logging-system.md](logging-system.md) — Logging system internals
+- [notification-system.md](notification-system.md) — Email notification system
diff --git a/docs/handbook/hooks/post-receive-hook.md b/docs/handbook/hooks/post-receive-hook.md
new file mode 100644
index 0000000000..845291d7f2
--- /dev/null
+++ b/docs/handbook/hooks/post-receive-hook.md
@@ -0,0 +1,778 @@
+# post-receive Hook — Deep Analysis
+
+## Table of Contents
+
+- [Introduction](#introduction)
+- [File Location and Deployment](#file-location-and-deployment)
+- [Complete Source Listing](#complete-source-listing)
+- [Line-by-Line Analysis](#line-by-line-analysis)
+ - [Line 1: Shebang](#line-1-shebang)
+ - [Lines 2–33: Documentation Header](#lines-2-33-documentation-header)
+ - [Line 35: Strict Mode](#line-35-strict-mode)
+ - [Lines 41–42: Variable Initialization](#lines-41-42-variable-initialization)
+ - [Lines 45–47: Remote Auto-Detection](#lines-45-47-remote-auto-detection)
+ - [Lines 49–52: Empty Remote Guard](#lines-49-52-empty-remote-guard)
+ - [Lines 57–61: The log() Function](#lines-57-61-the-log-function)
+ - [Line 67: Trigger Banner](#line-67-trigger-banner)
+ - [Lines 70–74: Stdin Ref Reading Loop](#lines-70-74-stdin-ref-reading-loop)
+ - [Lines 76–77: Result Arrays](#lines-76-77-result-arrays)
+ - [Lines 79–90: Mirror Push Loop](#lines-79-90-mirror-push-loop)
+ - [Lines 92–94: Summary Logging](#lines-92-94-summary-logging)
+ - [Lines 97–109: Failure Notification](#lines-97-109-failure-notification)
+ - [Lines 112–116: Exit Logic](#lines-112-116-exit-logic)
+- [Data Flow Analysis](#data-flow-analysis)
+ - [Input Data](#input-data)
+ - [Internal State](#internal-state)
+ - [Output Channels](#output-channels)
+- [Bash Constructs Reference](#bash-constructs-reference)
+- [Error Handling Strategy](#error-handling-strategy)
+- [Pipeline Behavior Under pipefail](#pipeline-behavior-under-pipefail)
+- [Race Conditions and Concurrency](#race-conditions-and-concurrency)
+- [Performance Characteristics](#performance-characteristics)
+- [Testing the Hook](#testing-the-hook)
+ - [Manual Invocation](#manual-invocation)
+ - [Dry Run Approach](#dry-run-approach)
+ - [Unit Testing with Mocks](#unit-testing-with-mocks)
+- [Modification Guide](#modification-guide)
+ - [Adding a New Remote Type](#adding-a-new-remote-type)
+ - [Adding Retry Logic](#adding-retry-logic)
+ - [Adding Webhook Notifications](#adding-webhook-notifications)
+ - [Selective Ref Mirroring](#selective-ref-mirroring)
+- [Comparison with Alternative Approaches](#comparison-with-alternative-approaches)
+
+---
+
+## Introduction
+
+The `post-receive` hook at `hooks/post-receive` is the single operational hook in the Project-Tick hooks system. It implements multi-forge mirror synchronization — whenever a push lands on the canonical bare repository, this script replicates all refs to every configured mirror remote.
+
+This document provides an exhaustive, line-by-line analysis of the script, covering every variable, control structure, and design decision.
+
+---
+
+## File Location and Deployment
+
+**Source location** (in the monorepo):
+```
+Project-Tick/hooks/post-receive
+```
+
+**Deployed location** (in the bare repository):
+```
+/path/to/project-tick.git/hooks/post-receive
+```
+
+**File type**: Bash shell script
+**Permissions required**: Executable (`chmod +x`)
+**Interpreter**: `/usr/bin/env bash` (portable shebang)
+**Total lines**: 116
+
+---
+
+## Complete Source Listing
+
+For reference, the complete script with line numbers:
+
+```bash
+ 1 #!/usr/bin/env bash
+ 2 # ==============================================================================
+ 3 # post-receive hook — Mirror push to multiple forges
+ 4 # ==============================================================================
+ 5 #
+ 6 # Place this file in your bare repository:
+ 7 # /path/to/project-tick.git/hooks/post-receive
+ 8 #
+ 9 # Make it executable:
+ 10 # chmod +x hooks/post-receive
+ 11 #
+ 12 # Configuration:
+ 13 # Set mirror remotes in the bare repo:
+ 14 #
+ 15 # git remote add github git@github.com:Project-Tick/Project-Tick.git
+ 16 # git remote add gitlab git@gitlab.com:Project-Tick/Project-Tick.git
+ 17 # git remote add codeberg git@codeberg.org:Project-Tick/Project-Tick.git
+ 18 # git remote add sourceforge ssh://USERNAME@git.code.sf.net/p/project-tick/code
+ 19 #
+ 20 # Or use HTTPS with token auth:
+ 21 #
+ 22 # git remote add github https://x-access-token:TOKEN@github.com/Project-Tick/Project-Tick.git
+ 23 # git remote add gitlab https://oauth2:TOKEN@gitlab.com/Project-Tick/Project-Tick.git
+ 24 # git remote add codeberg https://TOKEN@codeberg.org/Project-Tick/Project-Tick.git
+ 25 #
+ 26 # Environment variables (optional):
+ 27 # MIRROR_REMOTES — space-separated list of remote names to push to.
+ 28 # Defaults to all configured mirror remotes.
+ 29 # MIRROR_LOG — path to log file. Defaults to /var/log/git-mirror.log
+ 30 # MIRROR_NOTIFY — email address for failure notifications (requires mail cmd)
+ 31 #
+ 32 # ==============================================================================
+ 33
+ 34 set -euo pipefail
+ 35
+ 36 # ---------------------
+ 37 # Configuration
+ 38 # ---------------------
+ 39
+ 40 MIRROR_REMOTES="${MIRROR_REMOTES:-}"
+ 41 MIRROR_LOG="${MIRROR_LOG:-/var/log/git-mirror.log}"
+ 42
+ 43 if [[ -z "$MIRROR_REMOTES" ]]; then
+ 44 MIRROR_REMOTES=$(git remote | grep -v '^origin$' || true)
+ 45 fi
+ 46
+ 47 if [[ -z "$MIRROR_REMOTES" ]]; then
+ 48 echo "[mirror] No mirror remotes configured. Skipping." >&2
+ 49 exit 0
+ 50 fi
+ 51
+ 52 # ---------------------
+ 53 # Logging
+ 54 # ---------------------
+ 55 log() {
+ 56 local timestamp
+ 57 timestamp="$(date -u '+%Y-%m-%d %H:%M:%S UTC')"
+ 58 echo "[$timestamp] $*" | tee -a "$MIRROR_LOG" 2>/dev/null || echo "[$timestamp] $*"
+ 59 }
+ 60
+ 61 # ---------------------
+ 62 # Main
+ 63 # ---------------------
+ 64
+ 65 log "=== Mirror push triggered ==="
+ 66
+ 67 REFS=()
+ 68 while read -r oldrev newrev refname; do
+ 69 REFS+=("$refname")
+ 70 log " ref: $refname ($oldrev -> $newrev)"
+ 71 done
+ 72
+ 73 FAILED_REMOTES=()
+ 74 SUCCEEDED_REMOTES=()
+ 75
+ 76 for remote in $MIRROR_REMOTES; do
+ 77 log "Pushing to remote: $remote"
+ 78
+ 79 if git push --mirror --force "$remote" 2>&1 | tee -a "$MIRROR_LOG" 2>/dev/null; then
+ 80 SUCCEEDED_REMOTES+=("$remote")
+ 81 log " ✓ Successfully pushed to $remote"
+ 82 else
+ 83 FAILED_REMOTES+=("$remote")
+ 84 log " ✗ FAILED to push to $remote"
+ 85 fi
+ 86 done
+ 87
+ 88 log "--- Summary ---"
+ 89 log " Succeeded: ${SUCCEEDED_REMOTES[*]:-none}"
+ 90 log " Failed: ${FAILED_REMOTES[*]:-none}"
+ 91
+ 92 if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_NOTIFY:-}" ]]; then
+ 93 if command -v mail &>/dev/null; then
+ 94 {
+ 95 echo "Mirror push failed for the following remotes:"
+ 96 printf ' - %s\n' "${FAILED_REMOTES[@]}"
+ 97 echo ""
+ 98 echo "Repository: $(pwd)"
+ 99 echo "Refs updated:"
+100 printf ' %s\n' "${REFS[@]}"
+101 echo ""
+102 echo "Check log: $MIRROR_LOG"
+103 } | mail -s "[git-mirror] Push failure in $(basename "$(pwd)")" "$MIRROR_NOTIFY"
+104 fi
+105 fi
+106
+107 if [[ ${#FAILED_REMOTES[@]} -gt 0 ]]; then
+108 log "=== Finished with errors ==="
+109 exit 1
+110 fi
+111
+112 log "=== Finished successfully ==="
+113 exit 0
+```
+
+---
+
+## Line-by-Line Analysis
+
+### Line 1: Shebang
+
+```bash
+#!/usr/bin/env bash
+```
+
+The `#!/usr/bin/env bash` shebang is the portable way to invoke bash. Instead of hardcoding `/bin/bash` (which varies across systems — on NixOS, for example, bash is at `/run/current-system/sw/bin/bash`), `env` searches `$PATH` for the `bash` binary.
+
+**Why bash specifically?** The script uses bash-specific features:
+- Arrays (`REFS=()`, `REFS+=()`)
+- `[[ ]]` conditional expressions
+- `${array[*]:-default}` expansion
+- `${#array[@]}` array length
+- `set -o pipefail`
+
+These are not available in POSIX `sh`.
+
+### Lines 2–33: Documentation Header
+
+The header block is an extensive comment documenting:
+
+1. **What the hook does** — "Mirror push to multiple forges"
+2. **Where to deploy it** — `/path/to/project-tick.git/hooks/post-receive`
+3. **How to make it executable** — `chmod +x hooks/post-receive`
+4. **How to configure mirror remotes** — four SSH examples plus three HTTPS examples
+5. **Environment variables** — `MIRROR_REMOTES`, `MIRROR_LOG`, `MIRROR_NOTIFY`
+
+This self-documenting style means an administrator can understand the hook without reading external documentation.
+
+### Line 35: Strict Mode
+
+```bash
+set -euo pipefail
+```
+
+This is bash "strict mode," composed of three flags:
+
+**`-e` (errexit)**: If any command returns a non-zero exit code, the script terminates immediately. Exceptions:
+- Commands in `if` conditions
+- Commands followed by `&&` or `||`
+- Commands in `while`/`until` conditions
+
+This is why the `git push` is wrapped in `if` — to capture its exit code without triggering `errexit`.
+
+**`-u` (nounset)**: Referencing an unset variable causes an immediate error instead of silently expanding to an empty string. This catches typos like `$MIIROR_LOG`. The `${VAR:-default}` syntax is used throughout to safely reference variables that may not be set.
+
+**`-o pipefail`**: By default, a pipeline's exit code is the exit code of the last command. With `pipefail`, the pipeline's exit code is the exit code of the rightmost command that failed (non-zero). This matters for:
+
+```bash
+git push --mirror --force "$remote" 2>&1 | tee -a "$MIRROR_LOG"
+```
+
+Without `pipefail`, this pipeline would succeed as long as `tee` succeeds, even if `git push` fails. With `pipefail`, a `git push` failure propagates through the pipeline. However, note the `2>/dev/null` after `tee` which may affect this — see the [Pipeline Behavior Under pipefail](#pipeline-behavior-under-pipefail) section.
+
+### Lines 41–42: Variable Initialization
+
+```bash
+MIRROR_REMOTES="${MIRROR_REMOTES:-}"
+MIRROR_LOG="${MIRROR_LOG:-/var/log/git-mirror.log}"
+```
+
+The `${VAR:-default}` expansion works as follows:
+
+| `VAR` state | Expansion |
+|-------------|-----------|
+| Set to value | The value |
+| Set to empty string | The default |
+| Unset | The default |
+
+For `MIRROR_REMOTES`, the default is an empty string, which triggers auto-detection later. For `MIRROR_LOG`, the default is `/var/log/git-mirror.log`.
+
+Note that `MIRROR_NOTIFY` is **not** initialized here — it's referenced later with `${MIRROR_NOTIFY:-}` inline. This is safe because the `:-` syntax prevents `set -u` from triggering on an unset variable.
+
+### Lines 45–47: Remote Auto-Detection
+
+```bash
+if [[ -z "$MIRROR_REMOTES" ]]; then
+ MIRROR_REMOTES=$(git remote | grep -v '^origin$' || true)
+fi
+```
+
+**`git remote`** — Lists all remote names, one per line. In the bare repository, this might output:
+
+```
+origin
+github
+gitlab
+codeberg
+sourceforge
+```
+
+**`grep -v '^origin$'`** — Inverts the match, removing lines that are exactly `origin`. The `^` and `$` anchors prevent matching remotes like `origin-backup` or `my-origin`.
+
+**`|| true`** — If `grep` finds no matches (all remotes are `origin`, or there are no remotes at all), it exits with code 1. Under `set -e`, this would terminate the script. The `|| true` ensures the command always succeeds.
+
+**`$(...)`** — Command substitution captures the output. Multi-line output from `git remote` is collapsed into a space-separated string when assigned to a scalar variable, which is exactly what the `for remote in $MIRROR_REMOTES` loop expects.
+
+### Lines 49–52: Empty Remote Guard
+
+```bash
+if [[ -z "$MIRROR_REMOTES" ]]; then
+ echo "[mirror] No mirror remotes configured. Skipping." >&2
+ exit 0
+fi
+```
+
+If auto-detection produced no results (no non-origin remotes), the script prints a message to stderr (`>&2`) and exits with code 0. Using stderr ensures the message doesn't interfere with any stdout processing, while exit code 0 ensures the push appears successful to the user.
+
+### Lines 57–61: The log() Function
+
+```bash
+log() {
+ local timestamp
+ timestamp="$(date -u '+%Y-%m-%d %H:%M:%S UTC')"
+ echo "[$timestamp] $*" | tee -a "$MIRROR_LOG" 2>/dev/null || echo "[$timestamp] $*"
+}
+```
+
+Detailed breakdown:
+
+1. **`local timestamp`** — Declares `timestamp` as function-local. Without `local`, it would be a global variable that persists after the function returns.
+
+2. **`date -u '+%Y-%m-%d %H:%M:%S UTC'`** — Generates a UTC timestamp. The `-u` flag is critical for server environments where multiple time zones may be in play. The format string produces output like `2026-04-05 14:30:00 UTC`.
+
+3. **`echo "[$timestamp] $*"`** — `$*` expands all function arguments as a single string. Unlike `$@`, which preserves argument boundaries, `$*` joins them with the first character of `$IFS` (default: space). For logging, this distinction doesn't matter.
+
+4. **`| tee -a "$MIRROR_LOG"`** — `tee -a` appends (`-a`) to the log file while passing through to stdout. This achieves dual output — the message appears in the hook's stdout (visible to the pusher) and is persisted in the log file.
+
+5. **`2>/dev/null`** — Suppresses `tee`'s stderr. If `$MIRROR_LOG` doesn't exist or isn't writable, `tee` would print an error like `tee: /var/log/git-mirror.log: Permission denied`. Suppressing this keeps the output clean.
+
+6. **`|| echo "[$timestamp] $*"`** — If the entire `echo | tee` pipeline fails (e.g., the log file is unwritable and `tee` exits non-zero under `pipefail`), this fallback ensures the message still reaches stdout.
+
+### Line 67: Trigger Banner
+
+```bash
+log "=== Mirror push triggered ==="
+```
+
+A visual separator in the log that marks the start of a new mirror operation. The `===` delimiters make it easy to grep for session boundaries:
+
+```bash
+grep "=== Mirror push" /var/log/git-mirror.log
+```
+
+### Lines 70–74: Stdin Ref Reading Loop
+
+```bash
+REFS=()
+while read -r oldrev newrev refname; do
+ REFS+=("$refname")
+ log " ref: $refname ($oldrev -> $newrev)"
+done
+```
+
+**`REFS=()`** — Initializes an empty bash array to accumulate ref names.
+
+**`read -r oldrev newrev refname`** — Reads one line from stdin, splitting on whitespace into three variables. The `-r` flag prevents backslash interpretation (e.g., `\n` is read literally, not as a newline).
+
+Git feeds post-receive hooks with lines formatted as:
+```
+<40-char old SHA-1> <40-char new SHA-1> <refname>
+```
+
+The `refname` variable captures everything after the second space, which is correct because ref names don't contain spaces.
+
+**Special SHA values**:
+
+| Old SHA | New SHA | Meaning |
+|---------|---------|---------|
+| `0000...0000` | `abc123...` | New ref created (branch/tag created) |
+| `abc123...` | `def456...` | Ref updated (normal push) |
+| `abc123...` | `0000...0000` | Ref deleted (branch/tag deleted) |
+
+**`REFS+=("$refname")`** — Appends the ref name to the array. The quotes around `$refname` are important to preserve the value as a single array element.
+
+### Lines 76–77: Result Arrays
+
+```bash
+FAILED_REMOTES=()
+SUCCEEDED_REMOTES=()
+```
+
+Two arrays that accumulate results as the push loop iterates. These are used later for the summary log and the notification email.
+
+### Lines 79–90: Mirror Push Loop
+
+```bash
+for remote in $MIRROR_REMOTES; do
+ log "Pushing to remote: $remote"
+
+ if git push --mirror --force "$remote" 2>&1 | tee -a "$MIRROR_LOG" 2>/dev/null; then
+ SUCCEEDED_REMOTES+=("$remote")
+ log " ✓ Successfully pushed to $remote"
+ else
+ FAILED_REMOTES+=("$remote")
+ log " ✗ FAILED to push to $remote"
+ fi
+done
+```
+
+**`for remote in $MIRROR_REMOTES`** — Note the unquoted `$MIRROR_REMOTES`. This is intentional — word splitting on spaces produces individual remote names. If it were quoted as `"$MIRROR_REMOTES"`, the entire string would be treated as a single remote name.
+
+**`git push --mirror --force "$remote"`**:
+- `--mirror` — Push all refs under `refs/` to the remote, and delete remote refs that don't exist locally. This includes `refs/heads/*`, `refs/tags/*`, `refs/notes/*`, `refs/replace/*`, etc.
+- `--force` — Force-update diverged refs. Without this, pushes to refs that have been rewritten (e.g., after a force-push to the canonical repo) would be rejected.
+- `"$remote"` — Quoted to handle remote names with unusual characters (defensive coding).
+
+**`2>&1`** — Merges stderr into stdout. Git's push progress and error messages go to stderr by default; this redirect ensures they're all captured by `tee`.
+
+**`| tee -a "$MIRROR_LOG" 2>/dev/null`** — Appends the complete push output to the log file. The `2>/dev/null` suppresses errors from `tee` if the log isn't writable.
+
+**`if ... then ... else`** — The `if` statement tests the exit code of the pipeline. Under `pipefail`, the pipeline fails if `git push` fails (regardless of `tee`'s exit code).
+
+### Lines 92–94: Summary Logging
+
+```bash
+log "--- Summary ---"
+log " Succeeded: ${SUCCEEDED_REMOTES[*]:-none}"
+log " Failed: ${FAILED_REMOTES[*]:-none}"
+```
+
+**`${SUCCEEDED_REMOTES[*]:-none}`** — Expands the array elements separated by spaces. If the array is empty, the `:-none` default kicks in and prints "none". This produces output like:
+
+```
+[2026-04-05 14:30:05 UTC] --- Summary ---
+[2026-04-05 14:30:05 UTC] Succeeded: github gitlab codeberg
+[2026-04-05 14:30:05 UTC] Failed: none
+```
+
+### Lines 97–109: Failure Notification
+
+```bash
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_NOTIFY:-}" ]]; then
+ if command -v mail &>/dev/null; then
+ {
+ echo "Mirror push failed for the following remotes:"
+ printf ' - %s\n' "${FAILED_REMOTES[@]}"
+ echo ""
+ echo "Repository: $(pwd)"
+ echo "Refs updated:"
+ printf ' %s\n' "${REFS[@]}"
+ echo ""
+ echo "Check log: $MIRROR_LOG"
+ } | mail -s "[git-mirror] Push failure in $(basename "$(pwd)")" "$MIRROR_NOTIFY"
+ fi
+fi
+```
+
+**`${#FAILED_REMOTES[@]}`** — Array length operator. Returns the number of elements in `FAILED_REMOTES`.
+
+**`-gt 0`** — "Greater than 0" — at least one remote failed.
+
+**`-n "${MIRROR_NOTIFY:-}"`** — Tests if `MIRROR_NOTIFY` is non-empty. The `:-` prevents `set -u` from triggering on an unset variable.
+
+**`command -v mail &>/dev/null`** — Checks if `mail` is available. `command -v` is the POSIX-compliant way to check for command existence (preferred over `which`).
+
+**`{ ... } | mail ...`** — A command group constructs the email body as a multi-line string, piping it to `mail`:
+- `printf ' - %s\n' "${FAILED_REMOTES[@]}"` — Prints each failed remote as a bulleted list item
+- `$(pwd)` — The bare repository path
+- `printf ' %s\n' "${REFS[@]}"` — Lists all refs that were updated
+- `$MIRROR_LOG` — Points to the log file for detailed output
+
+**`mail -s "..." "$MIRROR_NOTIFY"`** — Sends an email with the given subject line to the configured address.
+
+### Lines 112–116: Exit Logic
+
+```bash
+if [[ ${#FAILED_REMOTES[@]} -gt 0 ]]; then
+ log "=== Finished with errors ==="
+ exit 1
+fi
+
+log "=== Finished successfully ==="
+exit 0
+```
+
+The exit code is meaningful but not catastrophic:
+
+- **`exit 1`** — Git displays the hook's output to the pusher with a warning that the hook failed. The push itself has already succeeded (refs were already updated before `post-receive` ran).
+- **`exit 0`** — Clean completion, no warning displayed.
+
+---
+
+## Data Flow Analysis
+
+### Input Data
+
+```
+┌──────────────────────────────────────────────────────────┐
+│ stdin │
+│ <old-sha> <new-sha> refs/heads/main │
+│ <old-sha> <new-sha> refs/tags/v1.0.0 │
+│ ... │
+└──────────────────────────────────────────────────────────┘
+ │
+ ▼
+ while read -r oldrev newrev refname
+ │
+ ├──► REFS[] array (refname values)
+ └──► log output (old→new transitions)
+```
+
+### Internal State
+
+```
+┌─────────────────────────────────────────┐
+│ MIRROR_REMOTES "github gitlab ..." │
+│ MIRROR_LOG "/var/log/..." │
+│ MIRROR_NOTIFY "admin@..." or "" │
+│ REFS[] ref names from push │
+│ FAILED_REMOTES[] failed remote names │
+│ SUCCEEDED_REMOTES[] ok remote names │
+└─────────────────────────────────────────┘
+```
+
+### Output Channels
+
+| Channel | Target | Content |
+|---------|--------|---------|
+| stdout | Pusher's terminal | Log messages, push output |
+| `$MIRROR_LOG` | Log file on disk | All log messages + push output |
+| `mail` | Email recipient | Failure notification body |
+| Exit code | Git server | 0 (success) or 1 (failure) |
+
+---
+
+## Bash Constructs Reference
+
+| Construct | Line(s) | Meaning |
+|-----------|---------|---------|
+| `${VAR:-default}` | 40–41 | Use `default` if `VAR` is unset or empty |
+| `${VAR:-}` | 92 | Expand to empty string if unset (avoids `set -u` error) |
+| `$(command)` | 44, 57, 98, 103 | Command substitution |
+| `[[ -z "$VAR" ]]` | 43, 47 | Test if string is empty |
+| `[[ -n "$VAR" ]]` | 92 | Test if string is non-empty |
+| `${#ARRAY[@]}` | 92, 107 | Array length |
+| `${ARRAY[*]:-x}` | 89, 90 | All elements or default |
+| `ARRAY+=("item")` | 69, 80, 83 | Append to array |
+| `read -r a b c` | 68 | Read space-delimited fields |
+| `cmd 2>&1` | 79 | Redirect stderr to stdout |
+| `cmd &>/dev/null` | 93 | Redirect all output to null |
+| `\|\| true` | 44 | Force success exit code |
+| `local var` | 56 | Function-scoped variable |
+| `{ ... }` | 94–102 | Command group for I/O redirection |
+
+---
+
+## Error Handling Strategy
+
+The script uses a layered error handling approach:
+
+1. **Global strict mode** (`set -euo pipefail`) catches programming errors
+2. **`if` wrappers** protect commands that are expected to fail (git push)
+3. **`|| true` guards** prevent `set -e` from triggering on grep no-match
+4. **`2>/dev/null` + `||` fallback** in `log()` handles unwritable log files
+5. **`command -v` checks** prevent crashes when optional tools are missing
+6. **`${VAR:-}` expansions** prevent `set -u` errors on optional variables
+
+This means the script will:
+- ✓ Continue if one mirror push fails (handled by `if`)
+- ✓ Continue if the log file is unwritable (handled by `2>/dev/null || echo`)
+- ✓ Continue if `mail` is not installed (handled by `command -v` check)
+- ✓ Continue if no remotes are configured (handled by `exit 0` guard)
+- ✗ Abort on undefined variables (caught by `set -u`)
+- ✗ Abort on unexpected command failures (caught by `set -e`)
+
+---
+
+## Pipeline Behavior Under pipefail
+
+The push pipeline deserves special attention:
+
+```bash
+git push --mirror --force "$remote" 2>&1 | tee -a "$MIRROR_LOG" 2>/dev/null
+```
+
+Under `pipefail`, the pipeline's exit code is determined by the rightmost failing command:
+
+| `git push` exit | `tee` exit | Pipeline exit |
+|-----------------|------------|---------------|
+| 0 (success) | 0 (success) | 0 |
+| 128 (failure) | 0 (success) | 128 |
+| 0 (success) | 1 (failure) | 1 |
+| 128 (failure) | 1 (failure) | 128 |
+
+If both fail, the rightmost failure wins — but in practice, `tee` rarely fails because its stderr is redirected to `/dev/null`, and even if it can't write to the log file, it still passes data through to stdout (which always works).
+
+However, there's a subtlety: `tee`'s `2>/dev/null` only suppresses `tee`'s own error messages. If `tee` can't open the log file for writing, it will still exit with a non-zero code, which could mask a `git push` success under `pipefail`. In practice, this is unlikely to cause problems because `tee` typically succeeds even if it can't write (it still outputs to stdout).
+
+---
+
+## Race Conditions and Concurrency
+
+If multiple pushes arrive simultaneously, multiple instances of `post-receive` may run concurrently. Potential issues:
+
+1. **Log file interleaving** — Multiple `tee -a` writes to the same log file. The `-a` (append) mode is file-system atomic for writes smaller than `PIPE_BUF` (typically 4096 bytes), so individual log lines won't corrupt each other, but they may interleave.
+
+2. **Simultaneous mirror pushes** — Two hooks pushing to the same mirror remote concurrently. Git handles this gracefully — one push will complete first, and the second will either fast-forward or be a no-op.
+
+3. **REFS array** — Each hook instance has its own `REFS` array (separate bash process), so there's no cross-instance contamination.
+
+---
+
+## Performance Characteristics
+
+| Operation | Typical Duration | Notes |
+|-----------|-----------------|-------|
+| Remote auto-detection | <10 ms | `git remote` + `grep` on local config |
+| Stdin reading | <1 ms | Reading a few lines from pipe |
+| `git push --mirror` per remote | 1–60 seconds | Network-bound; depends on delta size |
+| Logging | <1 ms per call | Local file I/O |
+| Email notification | 100–500 ms | Depends on MTA |
+
+Total execution time is dominated by the mirror push loop. With 4 remotes, worst case is ~4 minutes for large pushes. The pushes are **sequential**, not parallel — see [Modification Guide](#modification-guide) for adding parallelism.
+
+---
+
+## Testing the Hook
+
+### Manual Invocation
+
+Simulate a push by feeding ref data on stdin:
+
+```bash
+cd /path/to/project-tick.git
+echo "0000000000000000000000000000000000000000 $(git rev-parse HEAD) refs/heads/main" \
+ | hooks/post-receive
+```
+
+### Dry Run Approach
+
+Create a modified version that uses `echo` instead of `git push`:
+
+```bash
+# In the hook, temporarily replace:
+# git push --mirror --force "$remote"
+# With:
+# echo "[DRY RUN] Would push --mirror --force to $remote"
+```
+
+### Unit Testing with Mocks
+
+```bash
+#!/usr/bin/env bash
+# test-post-receive.sh — Test the hook with mock remotes
+
+# Create a temporary bare repo
+TMPDIR=$(mktemp -d)
+git init --bare "$TMPDIR/test.git"
+cd "$TMPDIR/test.git"
+
+# Add a mock remote (pointing to a local bare repo)
+git init --bare "$TMPDIR/mirror.git"
+git remote add testmirror "$TMPDIR/mirror.git"
+
+# Copy the hook
+cp /path/to/hooks/post-receive hooks/post-receive
+chmod +x hooks/post-receive
+
+# Create a dummy ref
+git hash-object -t commit --stdin <<< "tree $(git hash-object -t tree /dev/null)
+author Test <test@test> 0 +0000
+committer Test <test@test> 0 +0000
+
+test" > /dev/null
+
+# Invoke the hook
+echo "0000000000000000000000000000000000000000 $(git rev-parse HEAD 2>/dev/null || echo abc123) refs/heads/main" \
+ | MIRROR_LOG="$TMPDIR/mirror.log" hooks/post-receive
+
+echo "Exit code: $?"
+cat "$TMPDIR/mirror.log"
+
+# Cleanup
+rm -rf "$TMPDIR"
+```
+
+---
+
+## Modification Guide
+
+### Adding a New Remote Type
+
+Simply add a new git remote to the bare repository. No script modification needed:
+
+```bash
+cd /path/to/project-tick.git
+git remote add bitbucket git@bitbucket.org:Project-Tick/Project-Tick.git
+```
+
+The auto-detection mechanism will pick it up automatically on the next push.
+
+### Adding Retry Logic
+
+To add retry logic for transient network failures, replace the push section:
+
+```bash
+for remote in $MIRROR_REMOTES; do
+ log "Pushing to remote: $remote"
+
+ MAX_RETRIES=3
+ RETRY_DELAY=5
+ attempt=0
+ push_success=false
+
+ while [[ $attempt -lt $MAX_RETRIES ]]; do
+ attempt=$((attempt + 1))
+ log " Attempt $attempt/$MAX_RETRIES for $remote"
+
+ if git push --mirror --force "$remote" 2>&1 | tee -a "$MIRROR_LOG" 2>/dev/null; then
+ push_success=true
+ break
+ fi
+
+ if [[ $attempt -lt $MAX_RETRIES ]]; then
+ log " Retrying in ${RETRY_DELAY}s..."
+ sleep "$RETRY_DELAY"
+ fi
+ done
+
+ if $push_success; then
+ SUCCEEDED_REMOTES+=("$remote")
+ log " ✓ Successfully pushed to $remote"
+ else
+ FAILED_REMOTES+=("$remote")
+ log " ✗ FAILED to push to $remote after $MAX_RETRIES attempts"
+ fi
+done
+```
+
+### Adding Webhook Notifications
+
+To add webhook notifications (e.g., Slack, Discord) alongside email:
+
+```bash
+# After the mail block, add:
+if [[ ${#FAILED_REMOTES[@]} -gt 0 && -n "${MIRROR_WEBHOOK:-}" ]]; then
+ if command -v curl &>/dev/null; then
+ PAYLOAD=$(cat <<EOF
+{
+ "text": "Mirror push failed in $(basename "$(pwd)")",
+ "remotes": "$(printf '%s, ' "${FAILED_REMOTES[@]}")",
+ "refs": "$(printf '%s, ' "${REFS[@]}")"
+}
+EOF
+)
+ curl -s -X POST -H "Content-Type: application/json" \
+ -d "$PAYLOAD" "$MIRROR_WEBHOOK" 2>/dev/null || true
+ fi
+fi
+```
+
+### Selective Ref Mirroring
+
+To mirror only specific branches instead of using `--mirror`:
+
+```bash
+for remote in $MIRROR_REMOTES; do
+ for ref in "${REFS[@]}"; do
+ log "Pushing $ref to $remote"
+ if git push --force "$remote" "$ref" 2>&1 | tee -a "$MIRROR_LOG" 2>/dev/null; then
+ log " ✓ $ref -> $remote"
+ else
+ log " ✗ FAILED $ref -> $remote"
+ FAILED_REMOTES+=("$remote:$ref")
+ fi
+ done
+done
+```
+
+---
+
+## Comparison with Alternative Approaches
+
+| Approach | Pros | Cons |
+|----------|------|------|
+| **post-receive hook** (current) | Simple, self-contained, zero external deps | Sequential pushes, coupled to git server |
+| **CI-triggered mirror** | Parallel, retries built-in, monitoring | Requires CI infrastructure, higher latency |
+| **Cron-based sync** | Decoupled from push flow | Delayed mirroring, may miss rapid pushes |
+| **Git federation** | Native, protocol-level | Not widely supported |
+| **Grokmirror** | Efficient for large repos | Complex setup, Python dependency |
+
+The post-receive hook approach chosen by Project-Tick is the simplest and most appropriate for a single-repository setup where immediate mirroring is desired.
diff --git a/docs/handbook/images4docker/architecture.md b/docs/handbook/images4docker/architecture.md
new file mode 100644
index 0000000000..8a1501f6fc
--- /dev/null
+++ b/docs/handbook/images4docker/architecture.md
@@ -0,0 +1,504 @@
+# images4docker — Architecture
+
+## Repository Structure
+
+images4docker follows a deliberately flat, single-purpose architecture. There
+are no build scripts, no Makefiles, no helper utilities. Every file in the
+repository serves exactly one role.
+
+```
+images4docker/
+├── .gitattributes # Line-ending enforcement (LF everywhere)
+├── .gitignore # Ignore *.log, *.tmp, .env
+├── LICENSE # GPL-3.0-or-later (full text)
+├── LICENSES/
+│ └── GPL-3.0-or-later.txt # REUSE-compliant license copy
+├── README.md # Project overview and notes
+└── dockerfiles/
+ ├── alma-9.Dockerfile
+ ├── alma-10.Dockerfile
+ ├── alpine-319.Dockerfile
+ ├── alpine-320.Dockerfile
+ ├── alpine-321.Dockerfile
+ ├── alpine-322.Dockerfile
+ ├── alpine-latest.Dockerfile
+ ├── amazonlinux-2.Dockerfile
+ ├── amazonlinux-2023.Dockerfile
+ ├── arch-latest.Dockerfile
+ ├── centos-stream9.Dockerfile
+ ├── centos-stream10.Dockerfile
+ ├── debian-bookworm.Dockerfile
+ ├── debian-bookworm-slim.Dockerfile
+ ├── debian-bullseye.Dockerfile
+ ├── debian-bullseye-slim.Dockerfile
+ ├── debian-stable-slim.Dockerfile
+ ├── debian-trixie-slim.Dockerfile
+ ├── devuan-chimaera.Dockerfile
+ ├── devuan-daedalus.Dockerfile
+ ├── fedora-40.Dockerfile
+ ├── fedora-41.Dockerfile
+ ├── fedora-42.Dockerfile
+ ├── fedora-latest.Dockerfile
+ ├── gentoo-stage3.Dockerfile
+ ├── kali-rolling.Dockerfile
+ ├── nix-latest.Dockerfile
+ ├── opensuse-leap-155.Dockerfile
+ ├── opensuse-leap-156.Dockerfile
+ ├── opensuse-tumbleweed.Dockerfile
+ ├── oraclelinux-8.Dockerfile
+ ├── oraclelinux-9.Dockerfile
+ ├── oraclelinux-10.Dockerfile
+ ├── rocky-9.Dockerfile
+ ├── rocky-10.Dockerfile
+ ├── ubuntu-2004.Dockerfile
+ ├── ubuntu-2204.Dockerfile
+ ├── ubuntu-2404.Dockerfile
+ ├── ubuntu-latest.Dockerfile
+ └── void-latest.Dockerfile
+```
+
+Total: **40 Dockerfiles**, **1 README**, **1 LICENSE pair**, **2 git config files**.
+
+---
+
+## Dockerfile Naming Convention
+
+Every Dockerfile follows a strict naming pattern:
+
+```
+<distro>-<version_or_tag>.Dockerfile
+```
+
+### Rules
+
+1. **Distro name** is the short, lower-case distribution identifier:
+ `alma`, `alpine`, `amazonlinux`, `arch`, `centos-stream`, `debian`,
+ `devuan`, `fedora`, `gentoo`, `kali`, `nix`, `opensuse-leap`,
+ `opensuse-tumbleweed`, `oraclelinux`, `rocky`, `ubuntu`, `void`.
+
+2. **Version** is the numeric version with dots stripped, or a keyword:
+ - Numeric: `9`, `10`, `319` (for 3.19), `2004` (for 20.04), `155` (for 15.5)
+ - Keywords: `latest`, `rolling`, `stage3`, `stream9`, `stream10`
+ - Variants: `bookworm-slim`, `bullseye-slim`, `stable-slim`, `trixie-slim`
+
+3. **Extension** is always `.Dockerfile` (capital D), not `.dockerfile`.
+
+### Examples
+
+| File name | Distribution | Version / Tag |
+|----------------------------------|-----------------|----------------------|
+| `alma-9.Dockerfile` | AlmaLinux | 9 |
+| `alpine-322.Dockerfile` | Alpine Linux | 3.22 |
+| `debian-bookworm-slim.Dockerfile` | Debian | Bookworm (12), slim |
+| `centos-stream10.Dockerfile` | CentOS Stream | 10 |
+| `opensuse-leap-156.Dockerfile` | openSUSE Leap | 15.6 |
+| `ubuntu-2404.Dockerfile` | Ubuntu | 24.04 |
+| `void-latest.Dockerfile` | Void Linux | latest |
+
+---
+
+## The Universal Dockerfile Template
+
+Every single Dockerfile in the repository shares the same structural template.
+The only differences between files are:
+
+1. The `FROM` base image reference.
+2. The default package-manager command used when `CUSTOM_INSTALL` is not set.
+3. The default cleanup command.
+4. Minor variations in the Qt6 binary search path.
+
+### Template Anatomy
+
+```dockerfile
+# syntax=docker/dockerfile:1.7
+FROM <base_image>:<tag>
+
+ARG PACKAGES=
+ARG CUSTOM_INSTALL=
+ARG UPDATE_CMD=
+ARG CLEAN_CMD=
+
+SHELL ["/bin/sh", "-lc"]
+
+RUN set -eux; \
+ if [ -n "${UPDATE_CMD}" ]; then \
+ sh -lc "${UPDATE_CMD}"; \
+ fi; \
+ if [ -n "${CUSTOM_INSTALL}" ]; then \
+ sh -lc "${CUSTOM_INSTALL}"; \
+ elif [ -n "${PACKAGES}" ]; then \
+ <package_manager> install <flags> ${PACKAGES}; \
+ fi; \
+ if [ -n "${CLEAN_CMD}" ]; then \
+ sh -lc "${CLEAN_CMD}"; \
+ else \
+ <default_cleanup>; \
+ fi; \
+ export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"; \
+ <qt6_verification_gate>
+
+CMD ["/bin/sh"]
+```
+
+### Template Sections Explained
+
+#### 1. Syntax Directive
+
+```dockerfile
+# syntax=docker/dockerfile:1.7
+```
+
+Every file begins with this BuildKit syntax directive. This enables:
+- Heredoc support (`<<EOF`)
+- Improved caching behaviour
+- `RUN --mount` options (though not currently used)
+- Better error messages during builds
+
+#### 2. FROM Statement
+
+```dockerfile
+FROM <base_image>:<tag>
+```
+
+Each Dockerfile has a single, non-parameterised `FROM`. The image reference is
+hardcoded — there is no `ARG`-based base image selection. This is intentional:
+every Dockerfile builds exactly one image from exactly one base.
+
+The `FROM` references use public registries:
+
+| Registry | Used by |
+|-------------------------|--------------------------------------------|
+| Docker Hub (implicit) | AlmaLinux, Alpine, Amazon Linux, Arch, |
+| | Debian, Fedora, Gentoo, Kali, NixOS, |
+| | openSUSE, Oracle Linux, Ubuntu, Void |
+| `quay.io` | CentOS Stream (`quay.io/centos/centos`) |
+| Docker Hub (explicit) | Devuan (`devuan/devuan`), Rocky |
+| | (`rockylinux/rockylinux`), |
+| | Void (`voidlinux/voidlinux`) |
+
+#### 3. Build Arguments
+
+```dockerfile
+ARG PACKAGES=
+ARG CUSTOM_INSTALL=
+ARG UPDATE_CMD=
+ARG CLEAN_CMD=
+```
+
+All four arguments default to empty strings. They are the injection points
+through which the CI workflow customises each build:
+
+| Argument | Purpose | Example value |
+|-------------------|-------------------------------------------------------------|-----------------------------------------|
+| `PACKAGES` | Space-separated list of packages to install | `qt6-base-dev cmake gcc g++` |
+| `CUSTOM_INSTALL` | Arbitrary shell command that replaces the default install | `dnf config-manager --enable crb && dnf install -y qt6-qtbase-devel` |
+| `UPDATE_CMD` | Shell command run before package installation | `apt-get update` |
+| `CLEAN_CMD` | Shell command run after installation to reduce image size | `rm -rf /var/lib/apt/lists/*` |
+
+Priority logic:
+1. If `CUSTOM_INSTALL` is non-empty, it is executed instead of the package manager.
+2. Otherwise, if `PACKAGES` is non-empty, the native package manager installs them.
+3. If neither is set, nothing is installed (but the Qt6 check still runs and will fail).
+
+#### 4. Shell Override
+
+```dockerfile
+SHELL ["/bin/sh", "-lc"]
+```
+
+The default Docker shell is `["/bin/sh", "-c"]`. The `-l` flag forces a login
+shell, which ensures:
+- `/etc/profile` and `/etc/profile.d/*.sh` are sourced.
+- `PATH` extensions from the distribution's login scripts are available.
+- NixOS profile paths (`/root/.nix-profile/bin`) are activated.
+
+#### 5. The RUN Block
+
+The entire build logic is a single `RUN` instruction. This is deliberate — it
+creates a single Docker layer, minimising image size and avoiding intermediate
+layers that would persist deleted files.
+
+The `RUN` block executes in this order:
+
+```
+┌──────────────────────┐
+│ set -eux │ Fail on errors, undefined vars, print commands
+├──────────────────────┤
+│ UPDATE_CMD? │ Optional: pre-install update (apt-get update, etc.)
+├──────────────────────┤
+│ CUSTOM_INSTALL? │ If set: run arbitrary install command
+│ or PACKAGES? │ Else if set: run native pkg manager with PACKAGES
+├──────────────────────┤
+│ CLEAN_CMD? │ If set: run custom cleanup
+│ or default clean │ Else: run distro-appropriate cleanup
+├──────────────────────┤
+│ export PATH=... │ Extend PATH with Qt6 binary locations
+├──────────────────────┤
+│ Qt6 verification │ Check for qmake6/qmake-qt6/qtpaths6 binaries
+│ gate │ FAILS BUILD if not found
+└──────────────────────┘
+```
+
+#### 6. CMD
+
+```dockerfile
+CMD ["/bin/sh"]
+```
+
+Every image defaults to a shell. CI jobs override this with their own
+`entrypoint` or `command` specifications, so the `CMD` is effectively a
+debug/interactive fallback.
+
+---
+
+## Package Manager Dispatch
+
+The package-manager command in the `RUN` block varies per distribution family.
+Here is the exact command used by each group:
+
+### apt-based (Debian, Ubuntu, Devuan, Kali)
+
+```sh
+apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}
+```
+
+- `--no-install-recommends` keeps images lean by skipping suggested packages.
+- Default cleanup: `rm -rf /var/lib/apt/lists/*`
+
+### dnf-based (Fedora, AlmaLinux, CentOS Stream, Rocky, Oracle Linux, Amazon Linux 2023)
+
+```sh
+dnf install -y ${PACKAGES}
+```
+
+- Default cleanup: `dnf clean all || true`
+
+### yum-based (Amazon Linux 2)
+
+```sh
+yum install -y ${PACKAGES}
+```
+
+- Default cleanup: `yum clean all || true`
+
+### apk-based (Alpine Linux)
+
+```sh
+apk add --no-cache ${PACKAGES}
+```
+
+- `--no-cache` means no index files are persisted.
+- Default cleanup: `true` (no-op, since apk --no-cache handles it).
+
+### zypper-based (openSUSE Leap, Tumbleweed)
+
+```sh
+zypper --non-interactive refresh; zypper --non-interactive install --no-recommends ${PACKAGES}
+```
+
+- `--non-interactive` prevents prompts.
+- `--no-recommends` skips recommended (but not required) packages.
+- Default cleanup: `zypper clean --all || true`
+
+### pacman-based (Arch Linux)
+
+```sh
+pacman -Syu --noconfirm --needed ${PACKAGES}
+```
+
+- `-Syu` does a full system upgrade before installing.
+- `--needed` skips already-installed packages.
+- Default cleanup: `pacman -Scc --noconfirm || true`
+
+### emerge-based (Gentoo)
+
+```sh
+emerge --sync; emerge ${PACKAGES}
+```
+
+- `--sync` refreshes the Portage tree before installing.
+- Default cleanup: `true` (no-op).
+
+### nix-env-based (NixOS/Nix)
+
+```sh
+nix-env -iA ${PACKAGES}
+```
+
+- `-iA` installs by attribute path from nixpkgs.
+- Default cleanup: `nix-collect-garbage -d || true`
+
+### xbps-based (Void Linux)
+
+```sh
+xbps-install -Sy ${PACKAGES}
+```
+
+- `-S` syncs the repository index.
+- `-y` assumes yes to prompts.
+- Default cleanup: `xbps-remove -O || true`
+
+---
+
+## Qt6 Binary Search Paths
+
+After package installation, every Dockerfile extends `PATH` to include
+distribution-specific Qt6 binary directories. There are two variants:
+
+### Standard Path Extension (most distros)
+
+```sh
+export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"
+```
+
+Used by: AlmaLinux, Alpine, Arch, CentOS Stream, Debian, Devuan, Fedora,
+Kali, openSUSE, Rocky, Ubuntu.
+
+### Extended Path (distros with /usr/libexec/qt6)
+
+```sh
+export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/usr/libexec/qt6:/opt/qt6/bin:/root/.nix-profile/bin"
+```
+
+Used by: Amazon Linux 2023, Gentoo, NixOS, Oracle Linux, Void Linux.
+
+The `/usr/libexec/qt6` path is added for distributions where Qt6 installs its
+binaries under `libexec` rather than `lib/qt6/bin`.
+
+---
+
+## Base Image Selection Strategy
+
+The choice of base images follows these principles:
+
+### Version Pinning
+
+- **LTS releases** are pinned to specific versions: `ubuntu:20.04`, `ubuntu:22.04`,
+ `ubuntu:24.04`, `debian:bookworm`, `alpine:3.19`, etc.
+- **Rolling releases** use `latest` tags: `archlinux:latest`, `fedora:latest`,
+ `alpine:latest`, `opensuse/tumbleweed:latest`.
+- **Dual coverage**: where possible, both a pinned version and a `latest` tag
+ are maintained (e.g., `alpine-321.Dockerfile` + `alpine-latest.Dockerfile`).
+
+### Registry Selection
+
+- **Docker Hub** is the primary registry for most images.
+- **Quay.io** is used for CentOS Stream because the official CentOS images
+ are hosted there: `quay.io/centos/centos:stream9`.
+- **Namespaced images** are used where distributions publish under their own
+ Docker Hub organisation: `devuan/devuan`, `rockylinux/rockylinux`,
+ `voidlinux/voidlinux`, `kalilinux/kali-rolling`, `gentoo/stage3`.
+
+### Slim vs Full Variants
+
+For Debian, both full and slim variants are maintained:
+- `debian:bookworm` — full image with documentation, man pages, extra utilities.
+- `debian:bookworm-slim` — minimal image, roughly half the size.
+
+The slim variants are preferred for CI because they download faster, but full
+variants are kept for cases where build scripts expect standard utilities.
+
+---
+
+## Configuration Files
+
+### .gitattributes
+
+```
+# images4docker
+* text=auto eol=lf
+*.Dockerfile text
+```
+
+- Forces LF line endings on all text files.
+- Explicitly marks `*.Dockerfile` as text to ensure proper diff handling.
+- Prevents CRLF corruption when contributors use Windows.
+
+### .gitignore
+
+```
+# images4docker
+*.log
+*.tmp
+.env
+```
+
+- Ignores build logs and temporary files.
+- Ignores `.env` files that might contain registry credentials.
+
+---
+
+## Design Principles
+
+### Single-layer images
+
+Each Dockerfile has exactly one `RUN` instruction. This means:
+- The final image has the base image's layers plus exactly one additional layer.
+- No intermediate layers persist files that are later deleted (which would bloat
+ the image even though the files are not visible).
+
+### No COPY or ADD
+
+None of the Dockerfiles copy any files from the build context. All configuration
+is done via `ARG` values injected at build time. This means:
+- The Docker build context is effectively empty.
+- Builds are fast because no files need to be sent to the Docker daemon.
+- The Dockerfiles are entirely self-contained.
+
+### No ENTRYPOINT
+
+Images use `CMD ["/bin/sh"]` without an `ENTRYPOINT`. This allows CI jobs to
+override the command freely without needing `--entrypoint`.
+
+### No EXPOSE or VOLUME
+
+These images are build environments, not services. There are no network ports
+to expose and no data volumes to mount.
+
+### No USER directive
+
+All images run as `root`. CI builds typically need root to install packages
+and access system directories. Security isolation is handled at the container
+runtime level (Docker, Podman, etc.), not inside the image.
+
+### No HEALTHCHECK
+
+These are ephemeral CI images, not long-running services. Health checks would
+add unnecessary complexity.
+
+---
+
+## Image Lifecycle
+
+```
+┌─────────────────┐ ┌──────────────────┐ ┌─────────────────────┐
+│ Upstream base │────▶│ Dockerfile │────▶│ Built image │
+│ (Docker Hub / │ │ (in this repo) │ │ (GHCR) │
+│ Quay.io) │ │ │ │ │
+└─────────────────┘ └──────────────────┘ └─────────────────────┘
+ │ │ │
+ │ Daily pull │ Push to main / │ Used by CI
+ │ (cron 03:17 UTC) │ daily cron │ jobs in other
+ │ │ │ repositories
+ ▼ ▼ ▼
+ New upstream ──▶ Rebuild triggered ──▶ New image pushed
+ tag available by workflow to ghcr.io
+```
+
+1. Upstream distributions publish new base images.
+2. The daily cron or a push to `main` triggers the GitHub Actions workflow.
+3. The workflow builds each Dockerfile with the appropriate `--build-arg` values.
+4. The Qt6 verification gate passes or fails the build.
+5. Successful images are pushed to `ghcr.io/project-tick-infra/images/`.
+6. Other Project Tick CI jobs pull these images as their build containers.
+
+---
+
+## Related Documentation
+
+- [Overview](overview.md) — project summary
+- [Base Images](base-images.md) — per-image deep dive
+- [Qt6 Verification](qt6-verification.md) — the verification gate
+- [CI/CD Integration](ci-cd-integration.md) — workflow details
+- [Creating New Images](creating-new-images.md) — adding distributions
+- [Troubleshooting](troubleshooting.md) — debugging builds
diff --git a/docs/handbook/images4docker/base-images.md b/docs/handbook/images4docker/base-images.md
new file mode 100644
index 0000000000..e28aa89aac
--- /dev/null
+++ b/docs/handbook/images4docker/base-images.md
@@ -0,0 +1,825 @@
+# images4docker — Base Images
+
+## Overview
+
+This document provides an exhaustive reference for every base image used by
+images4docker. Each entry covers the upstream `FROM` reference, the package
+manager used, the install and cleanup commands, the Qt6 search paths, and
+notes about distribution-specific behaviour.
+
+All 40 Dockerfiles share the same template structure (see
+[Architecture](architecture.md)). The differences are:
+
+1. The `FROM` image reference.
+2. The native package-manager command.
+3. The default cache-cleanup command.
+4. The Qt6 binary search `PATH` extensions.
+
+---
+
+## RHEL / Enterprise Linux Family
+
+### AlmaLinux 9
+
+**File:** `dockerfiles/alma-9.Dockerfile`
+**FROM:** `almalinux:9`
+**Package manager:** `dnf`
+
+```dockerfile
+# syntax=docker/dockerfile:1.7
+FROM almalinux:9
+
+ARG PACKAGES=
+ARG CUSTOM_INSTALL=
+ARG UPDATE_CMD=
+ARG CLEAN_CMD=
+
+SHELL ["/bin/sh", "-lc"]
+
+RUN set -eux; \
+ ... \
+ dnf install -y ${PACKAGES}; \
+ ... \
+ dnf clean all || true; \
+ export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"; \
+ <qt6 verification>
+
+CMD ["/bin/sh"]
+```
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Binary-compatible rebuild of RHEL 9.
+- Qt6 packages typically available via EPEL or CRB repositories (enabled via `CUSTOM_INSTALL`).
+- CRB (CodeReady Builder) / PowerTools sometimes needs explicit enablement: `dnf config-manager --enable crb`.
+
+---
+
+### AlmaLinux 10
+
+**File:** `dockerfiles/alma-10.Dockerfile`
+**FROM:** `almalinux:10`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- RHEL 10 compatible rebuild.
+- Qt6 packages expected to be more widely available in RHEL 10 repositories compared to RHEL 9.
+
+---
+
+### CentOS Stream 9
+
+**File:** `dockerfiles/centos-stream9.Dockerfile`
+**FROM:** `quay.io/centos/centos:stream9`
+**Package manager:** `dnf`
+
+```dockerfile
+FROM quay.io/centos/centos:stream9
+```
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Official CentOS Stream images are hosted on **Quay.io**, not Docker Hub.
+- CentOS Stream 9 is the upstream development branch for RHEL 9.
+- The `quay.io/centos/centos` namespace replaced the former `centos` Docker Hub image.
+
+---
+
+### CentOS Stream 10
+
+**File:** `dockerfiles/centos-stream10.Dockerfile`
+**FROM:** `quay.io/centos/centos:stream10`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Upstream development branch for RHEL 10.
+- Hosted on Quay.io at `quay.io/centos/centos:stream10`.
+
+---
+
+### Oracle Linux 8
+
+**File:** `dockerfiles/oraclelinux-8.Dockerfile`
+**FROM:** `oraclelinux:8`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, **`/usr/libexec/qt6`**, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Qt6 extra checks:** Also checks `/usr/libexec/qt6/qmake` and `/usr/libexec/qt6/qtpaths`
+**Notes:**
+- RHEL 8 compatible. Oracle Linux 8 may have limited Qt6 availability.
+- Has the extended Qt6 path including `/usr/libexec/qt6` where Oracle may place Qt6 binaries.
+- May require `CUSTOM_INSTALL` to enable additional repositories for Qt6.
+
+---
+
+### Oracle Linux 9
+
+**File:** `dockerfiles/oraclelinux-9.Dockerfile`
+**FROM:** `oraclelinux:9`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, **`/usr/libexec/qt6`**, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Qt6 extra checks:** Also checks `/usr/libexec/qt6/qmake` and `/usr/libexec/qt6/qtpaths`
+
+---
+
+### Oracle Linux 10
+
+**File:** `dockerfiles/oraclelinux-10.Dockerfile`
+**FROM:** `oraclelinux:10`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, **`/usr/libexec/qt6`**, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Qt6 extra checks:** Also checks `/usr/libexec/qt6/qmake` and `/usr/libexec/qt6/qtpaths`
+
+---
+
+### Rocky Linux 9
+
+**File:** `dockerfiles/rocky-9.Dockerfile`
+**FROM:** `rockylinux/rockylinux:9`
+**Package manager:** `dnf`
+
+```dockerfile
+FROM rockylinux/rockylinux:9
+```
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Rocky Linux images are under the `rockylinux/rockylinux` namespace on Docker Hub.
+- RHEL 9 binary-compatible community rebuild (successor to CentOS Linux).
+
+---
+
+### Rocky Linux 10
+
+**File:** `dockerfiles/rocky-10.Dockerfile`
+**FROM:** `rockylinux/rockylinux:10`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+
+---
+
+## Amazon Linux
+
+### Amazon Linux 2
+
+**File:** `dockerfiles/amazonlinux-2.Dockerfile`
+**FROM:** `amazonlinux:2`
+**Package manager:** `yum`
+
+```dockerfile
+FROM amazonlinux:2
+
+RUN set -eux; \
+ ... \
+ yum install -y ${PACKAGES}; \
+ ... \
+ yum clean all || true; \
+ ...
+```
+
+**Install command:** `yum install -y ${PACKAGES}`
+**Cleanup command:** `yum clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- This is the **only** image that uses `yum` instead of `dnf`.
+- Amazon Linux 2 is based on RHEL 7 / CentOS 7 era packages.
+- Qt6 availability is **very limited** on Amazon Linux 2. This image is likely
+ excluded from the active CI matrix (the README states ~35 of 40 are active).
+- May require a `CUSTOM_INSTALL` command to build Qt6 from source or use a
+ third-party repository.
+
+---
+
+### Amazon Linux 2023
+
+**File:** `dockerfiles/amazonlinux-2023.Dockerfile`
+**FROM:** `amazonlinux:2023`
+**Package manager:** `dnf`
+
+```dockerfile
+FROM amazonlinux:2023
+
+RUN set -eux; \
+ ... \
+ dnf install -y ${PACKAGES}; \
+ ... \
+ dnf clean all || true; \
+ export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/usr/libexec/qt6:/opt/qt6/bin:/root/.nix-profile/bin"; \
+ ...
+```
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, **`/usr/libexec/qt6`**, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Qt6 extra checks:** Also checks `/usr/libexec/qt6/qmake` and `/usr/libexec/qt6/qtpaths`
+**Notes:**
+- Amazon Linux 2023 uses `dnf` (not `yum`), aligning with modern Fedora/RHEL.
+- Has the extended `/usr/libexec/qt6` path, suggesting Qt6 packages may install to libexec on AL2023.
+
+---
+
+## Fedora
+
+### Fedora 40
+
+**File:** `dockerfiles/fedora-40.Dockerfile`
+**FROM:** `fedora:40`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Fedora has excellent Qt6 support. Packages like `qt6-qtbase-devel` are available
+ directly in the default repositories.
+
+---
+
+### Fedora 41
+
+**File:** `dockerfiles/fedora-41.Dockerfile`
+**FROM:** `fedora:41`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+
+---
+
+### Fedora 42
+
+**File:** `dockerfiles/fedora-42.Dockerfile`
+**FROM:** `fedora:42`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+
+---
+
+### Fedora Latest
+
+**File:** `dockerfiles/fedora-latest.Dockerfile`
+**FROM:** `fedora:latest`
+**Package manager:** `dnf`
+
+**Install command:** `dnf install -y ${PACKAGES}`
+**Cleanup command:** `dnf clean all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Rolling tag that always points to the newest stable Fedora release.
+
+---
+
+## Debian Family
+
+### Debian Bookworm (12)
+
+**File:** `dockerfiles/debian-bookworm.Dockerfile`
+**FROM:** `debian:bookworm`
+**Package manager:** `apt`
+
+```dockerfile
+FROM debian:bookworm
+
+RUN set -eux; \
+ ... \
+ apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}; \
+ ... \
+ rm -rf /var/lib/apt/lists/*; \
+ ...
+```
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Debian 12 "Bookworm" is the current stable release.
+- Full variant — includes documentation, man pages, standard utilities.
+- Qt6 packages are available as `qt6-base-dev`, `qmake6`, etc.
+- `--no-install-recommends` is critical for keeping image size down.
+
+---
+
+### Debian Bookworm Slim
+
+**File:** `dockerfiles/debian-bookworm-slim.Dockerfile`
+**FROM:** `debian:bookworm-slim`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Minimal Debian 12 — roughly half the size of the full variant.
+- No man pages, no documentation packages.
+- Preferred for CI where download speed matters.
+
+---
+
+### Debian Bullseye (11)
+
+**File:** `dockerfiles/debian-bullseye.Dockerfile`
+**FROM:** `debian:bullseye`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Debian 11 "Bullseye" (old stable).
+- Qt6 availability is **limited** — Bullseye shipped with Qt 5.15 in main.
+ Qt6 may require backports or `CUSTOM_INSTALL`.
+- Likely excluded from active CI matrix due to unreliable Qt6.
+
+---
+
+### Debian Bullseye Slim
+
+**File:** `dockerfiles/debian-bullseye-slim.Dockerfile`
+**FROM:** `debian:bullseye-slim`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Minimal variant of Debian 11.
+- Same Qt6 limitations as the full Bullseye variant.
+
+---
+
+### Debian Stable Slim
+
+**File:** `dockerfiles/debian-stable-slim.Dockerfile`
+**FROM:** `debian:stable-slim`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Rolling tag pointing to current Debian stable (currently Bookworm).
+- Automatically upgrades when a new Debian stable is released.
+
+---
+
+### Debian Trixie Slim
+
+**File:** `dockerfiles/debian-trixie-slim.Dockerfile`
+**FROM:** `debian:trixie-slim`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Debian 13 "Trixie" (testing).
+- Provides the latest packages, including recent Qt6 versions.
+- Good for catching regressions early with newer toolchains.
+
+---
+
+## Devuan
+
+### Devuan Chimaera
+
+**File:** `dockerfiles/devuan-chimaera.Dockerfile`
+**FROM:** `devuan/devuan:chimaera`
+**Package manager:** `apt`
+
+```dockerfile
+FROM devuan/devuan:chimaera
+```
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Devuan 4 "Chimaera" — based on Debian Bullseye but **without systemd**.
+- Uses `sysvinit` or OpenRC as init system.
+- Images are under `devuan/devuan` namespace on Docker Hub.
+- Qt6 availability mirrors Debian Bullseye (limited).
+
+---
+
+### Devuan Daedalus
+
+**File:** `dockerfiles/devuan-daedalus.Dockerfile`
+**FROM:** `devuan/devuan:daedalus`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Devuan 5 "Daedalus" — based on Debian Bookworm without systemd.
+- Qt6 availability mirrors Debian Bookworm (good).
+
+---
+
+## Ubuntu
+
+### Ubuntu 20.04 LTS (Focal Fossa)
+
+**File:** `dockerfiles/ubuntu-2004.Dockerfile`
+**FROM:** `ubuntu:20.04`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Ubuntu 20.04 LTS does **not** ship Qt6 in its default repositories.
+- Qt6 requires PPAs, the Qt online installer, or building from source.
+- Likely excluded from the active CI matrix.
+
+---
+
+### Ubuntu 22.04 LTS (Jammy Jellyfish)
+
+**File:** `dockerfiles/ubuntu-2204.Dockerfile`
+**FROM:** `ubuntu:22.04`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Ubuntu 22.04 ships Qt 6.2 LTS in the `universe` repository.
+- Packages: `qt6-base-dev`, `qmake6`, `qt6-tools-dev`, etc.
+
+---
+
+### Ubuntu 24.04 LTS (Noble Numbat)
+
+**File:** `dockerfiles/ubuntu-2404.Dockerfile`
+**FROM:** `ubuntu:24.04`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Ubuntu 24.04 ships Qt 6.4+ in the default repositories.
+- Full Qt6 development support out of the box.
+
+---
+
+### Ubuntu Latest
+
+**File:** `dockerfiles/ubuntu-latest.Dockerfile`
+**FROM:** `ubuntu:latest`
+**Package manager:** `apt`
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Rolling tag pointing to the latest Ubuntu LTS release.
+
+---
+
+## Kali Linux
+
+### Kali Rolling
+
+**File:** `dockerfiles/kali-rolling.Dockerfile`
+**FROM:** `kalilinux/kali-rolling:latest`
+**Package manager:** `apt`
+
+```dockerfile
+FROM kalilinux/kali-rolling:latest
+```
+
+**Install command:** `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}`
+**Cleanup command:** `rm -rf /var/lib/apt/lists/*`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Kali is Debian-based, so it uses the same `apt` install pattern.
+- Images are under `kalilinux/kali-rolling` on Docker Hub.
+- Kali's rolling release usually has recent Qt6 packages.
+
+---
+
+## Alpine Linux
+
+### Alpine 3.19
+
+**File:** `dockerfiles/alpine-319.Dockerfile`
+**FROM:** `alpine:3.19`
+**Package manager:** `apk`
+
+```dockerfile
+FROM alpine:3.19
+
+RUN set -eux; \
+ ... \
+ apk add --no-cache ${PACKAGES}; \
+ ... \
+ true; \
+ ...
+```
+
+**Install command:** `apk add --no-cache ${PACKAGES}`
+**Cleanup command:** `true` (no-op — `--no-cache` handles cleanup)
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Alpine uses `musl libc`, not `glibc`.
+- Qt6 packages: `qt6-qtbase-dev`, `qt6-qttools-dev`, etc.
+- No `libsystemd-dev` equivalent (Alpine does not use systemd).
+- Very small base images (~7 MB compressed).
+
+---
+
+### Alpine 3.20
+
+**File:** `dockerfiles/alpine-320.Dockerfile`
+**FROM:** `alpine:3.20`
+**Package manager:** `apk`
+
+**Install command:** `apk add --no-cache ${PACKAGES}`
+**Cleanup command:** `true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+
+---
+
+### Alpine 3.21
+
+**File:** `dockerfiles/alpine-321.Dockerfile`
+**FROM:** `alpine:3.21`
+**Package manager:** `apk`
+
+**Install command:** `apk add --no-cache ${PACKAGES}`
+**Cleanup command:** `true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+
+---
+
+### Alpine 3.22
+
+**File:** `dockerfiles/alpine-322.Dockerfile`
+**FROM:** `alpine:3.22`
+**Package manager:** `apk`
+
+**Install command:** `apk add --no-cache ${PACKAGES}`
+**Cleanup command:** `true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+
+---
+
+### Alpine Latest
+
+**File:** `dockerfiles/alpine-latest.Dockerfile`
+**FROM:** `alpine:latest`
+**Package manager:** `apk`
+
+**Install command:** `apk add --no-cache ${PACKAGES}`
+**Cleanup command:** `true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Rolling tag pointing to the latest Alpine stable release.
+
+---
+
+## openSUSE
+
+### openSUSE Leap 15.5
+
+**File:** `dockerfiles/opensuse-leap-155.Dockerfile`
+**FROM:** `opensuse/leap:15.5`
+**Package manager:** `zypper`
+
+```dockerfile
+FROM opensuse/leap:15.5
+
+RUN set -eux; \
+ ... \
+ zypper --non-interactive refresh; \
+ zypper --non-interactive install --no-recommends ${PACKAGES}; \
+ ... \
+ zypper clean --all || true; \
+ ...
+```
+
+**Install command:** `zypper --non-interactive refresh; zypper --non-interactive install --no-recommends ${PACKAGES}`
+**Cleanup command:** `zypper clean --all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- openSUSE Leap 15.5 — enterprise-grade stability.
+- Qt6 availability depends on the OBS (Open Build Service) repositories.
+- `--non-interactive` prevents zypper from blocking on prompts.
+- `--no-recommends` skips recommended packages (equivalent to `--no-install-recommends` in apt).
+
+---
+
+### openSUSE Leap 15.6
+
+**File:** `dockerfiles/opensuse-leap-156.Dockerfile`
+**FROM:** `opensuse/leap:15.6`
+**Package manager:** `zypper`
+
+**Install command:** `zypper --non-interactive refresh; zypper --non-interactive install --no-recommends ${PACKAGES}`
+**Cleanup command:** `zypper clean --all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+
+---
+
+### openSUSE Tumbleweed
+
+**File:** `dockerfiles/opensuse-tumbleweed.Dockerfile`
+**FROM:** `opensuse/tumbleweed:latest`
+**Package manager:** `zypper`
+
+**Install command:** `zypper --non-interactive refresh; zypper --non-interactive install --no-recommends ${PACKAGES}`
+**Cleanup command:** `zypper clean --all || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- openSUSE Tumbleweed is a rolling release with the latest packages.
+- Excellent Qt6 support, typically ships the latest Qt6 version.
+
+---
+
+## Arch Linux
+
+### Arch Latest
+
+**File:** `dockerfiles/arch-latest.Dockerfile`
+**FROM:** `archlinux:latest`
+**Package manager:** `pacman`
+
+```dockerfile
+FROM archlinux:latest
+
+RUN set -eux; \
+ ... \
+ pacman -Syu --noconfirm --needed ${PACKAGES}; \
+ ... \
+ pacman -Scc --noconfirm || true; \
+ ...
+```
+
+**Install command:** `pacman -Syu --noconfirm --needed ${PACKAGES}`
+**Cleanup command:** `pacman -Scc --noconfirm || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Notes:**
+- Rolling release with bleeding-edge packages.
+- `-Syu` performs a full system update before installing, which is mandatory on
+ Arch to avoid partial-upgrade breakage.
+- `--needed` skips packages that are already installed at the latest version.
+- Qt6 packages: `qt6-base`, `qt6-tools`, etc.
+- `pacman -Scc --noconfirm` removes all cached packages and unused repositories,
+ significantly reducing image size.
+
+---
+
+## Gentoo
+
+### Gentoo Stage 3
+
+**File:** `dockerfiles/gentoo-stage3.Dockerfile`
+**FROM:** `gentoo/stage3:latest`
+**Package manager:** `emerge`
+
+```dockerfile
+FROM gentoo/stage3:latest
+
+RUN set -eux; \
+ ... \
+ emerge --sync; emerge ${PACKAGES}; \
+ ... \
+ true; \
+ export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/usr/libexec/qt6:/opt/qt6/bin:/root/.nix-profile/bin"; \
+ ...
+```
+
+**Install command:** `emerge --sync; emerge ${PACKAGES}`
+**Cleanup command:** `true` (no-op)
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, **`/usr/libexec/qt6`**, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Qt6 extra checks:** Also checks `/usr/libexec/qt6/qmake` and `/usr/libexec/qt6/qtpaths`
+**Notes:**
+- Gentoo Stage 3 is a minimal Gentoo installation from which packages are compiled.
+- `emerge --sync` refreshes the Portage tree (package database).
+- Package compilation from source makes builds **very slow** compared to binary distributions.
+- Has the extended `/usr/libexec/qt6` path.
+- Qt6 packages in Gentoo: `dev-qt/qtbase`, `dev-qt/qttools`, etc.
+
+---
+
+## NixOS / Nix
+
+### Nix Latest
+
+**File:** `dockerfiles/nix-latest.Dockerfile`
+**FROM:** `nixos/nix:latest`
+**Package manager:** `nix-env`
+
+```dockerfile
+FROM nixos/nix:latest
+
+RUN set -eux; \
+ ... \
+ nix-env -iA ${PACKAGES}; \
+ ... \
+ nix-collect-garbage -d || true; \
+ export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/usr/libexec/qt6:/opt/qt6/bin:/root/.nix-profile/bin"; \
+ ...
+```
+
+**Install command:** `nix-env -iA ${PACKAGES}`
+**Cleanup command:** `nix-collect-garbage -d || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, **`/usr/libexec/qt6`**, `/opt/qt6/bin`, **`/root/.nix-profile/bin`**
+**Qt6 extra checks:** Also checks `/usr/libexec/qt6/qmake` and `/usr/libexec/qt6/qtpaths`
+**Notes:**
+- Uses the Nix package manager, not a traditional FHS layout.
+- `-iA` installs packages by attribute path (e.g., `nixpkgs.qt6.qtbase`).
+- `nix-collect-garbage -d` removes old generations and unreferenced store paths.
+- `/root/.nix-profile/bin` is the primary binary path for Nix-installed packages.
+ This is where `qmake6` / `qtpaths6` would be found after `nix-env -iA`.
+- The `-l` flag in `SHELL ["/bin/sh", "-lc"]` is especially important here to
+ source `/root/.nix-profile/etc/profile.d/nix.sh`.
+
+---
+
+## Void Linux
+
+### Void Latest
+
+**File:** `dockerfiles/void-latest.Dockerfile`
+**FROM:** `voidlinux/voidlinux:latest`
+**Package manager:** `xbps`
+
+```dockerfile
+FROM voidlinux/voidlinux:latest
+
+RUN set -eux; \
+ ... \
+ xbps-install -Sy ${PACKAGES}; \
+ ... \
+ xbps-remove -O || true; \
+ export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/usr/libexec/qt6:/opt/qt6/bin:/root/.nix-profile/bin"; \
+ ...
+```
+
+**Install command:** `xbps-install -Sy ${PACKAGES}`
+**Cleanup command:** `xbps-remove -O || true`
+**Qt6 PATH:** `/usr/lib/qt6/bin`, `/usr/lib64/qt6/bin`, **`/usr/libexec/qt6`**, `/opt/qt6/bin`, `/root/.nix-profile/bin`
+**Qt6 extra checks:** Also checks `/usr/libexec/qt6/qmake` and `/usr/libexec/qt6/qtpaths`
+**Notes:**
+- Void Linux is an independent rolling-release distribution.
+- XBPS (X Binary Package System) is Void's native package manager.
+- `-S` syncs the repository data; `-y` assumes yes to all prompts.
+- `xbps-remove -O` removes orphaned packages and old cached downloads.
+- Qt6 packages in Void: `qt6-base-devel`, `qt6-tools-devel`, etc.
+
+---
+
+## Package Manager Summary Table
+
+| Package Manager | Command in Dockerfile | Cleanup Command | Distros using it |
+|-----------------|-------------------------------------------------------------------------------|-------------------------------------|---------------------------|
+| `apt` | `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}` | `rm -rf /var/lib/apt/lists/*` | Debian, Ubuntu, Devuan, Kali |
+| `dnf` | `dnf install -y ${PACKAGES}` | `dnf clean all \|\| true` | Fedora, Alma, CentOS, Rocky, Oracle, AL2023 |
+| `yum` | `yum install -y ${PACKAGES}` | `yum clean all \|\| true` | Amazon Linux 2 |
+| `apk` | `apk add --no-cache ${PACKAGES}` | `true` | Alpine |
+| `zypper` | `zypper --non-interactive refresh; zypper --non-interactive install --no-recommends ${PACKAGES}` | `zypper clean --all \|\| true` | openSUSE |
+| `pacman` | `pacman -Syu --noconfirm --needed ${PACKAGES}` | `pacman -Scc --noconfirm \|\| true` | Arch |
+| `emerge` | `emerge --sync; emerge ${PACKAGES}` | `true` | Gentoo |
+| `nix-env` | `nix-env -iA ${PACKAGES}` | `nix-collect-garbage -d \|\| true` | NixOS/Nix |
+| `xbps` | `xbps-install -Sy ${PACKAGES}` | `xbps-remove -O \|\| true` | Void |
+
+---
+
+## Related Documentation
+
+- [Overview](overview.md) — project summary and image inventory
+- [Architecture](architecture.md) — structural details
+- [Qt6 Verification](qt6-verification.md) — the verification gate
+- [CI/CD Integration](ci-cd-integration.md) — workflow details
+- [Creating New Images](creating-new-images.md) — adding new distributions
+- [Troubleshooting](troubleshooting.md) — debugging builds
diff --git a/docs/handbook/images4docker/ci-cd-integration.md b/docs/handbook/images4docker/ci-cd-integration.md
new file mode 100644
index 0000000000..c6c617a21f
--- /dev/null
+++ b/docs/handbook/images4docker/ci-cd-integration.md
@@ -0,0 +1,396 @@
+# images4docker — CI/CD Integration
+
+## Overview
+
+images4docker is purpose-built for CI/CD. The entire repository exists to
+produce pre-baked Docker images that other Project Tick repositories consume
+as build containers. This document covers how the images are built, pushed,
+and consumed.
+
+---
+
+## GitHub Actions Workflow
+
+### Workflow File
+
+The CI workflow is defined in `.github/workflows/build.yml` (referenced in the
+project README as "Repack Existing Images").
+
+### Trigger Conditions
+
+The workflow runs on two triggers:
+
+#### 1. Push to `main`
+
+Triggered when any of these paths change in a push to the `main` branch:
+
+- `dockerfiles/*.Dockerfile` — any Dockerfile modification
+- `.github/workflows/build.yml` — workflow changes
+- `README.md` — documentation updates
+
+#### 2. Scheduled Cron
+
+```yaml
+schedule:
+ - cron: '17 3 * * *' # Daily at 03:17 UTC
+```
+
+The daily schedule ensures that:
+- Upstream base image security patches are incorporated.
+- New upstream package versions are picked up.
+- Images stay current even when no Dockerfiles change.
+
+The non-standard minute (`17`) avoids the GitHub Actions "top of the hour"
+congestion that causes delayed job starts.
+
+---
+
+## Build Process
+
+### Build Matrix
+
+The workflow defines a build matrix of approximately **35 targets** — the
+Qt6-capable subset of the 40 Dockerfiles. Each target specifies:
+
+- The Dockerfile path
+- The target image name and tag
+- The `PACKAGES` build argument (distro-specific package list)
+- Optionally, the `CUSTOM_INSTALL`, `UPDATE_CMD`, or `CLEAN_CMD` build arguments
+
+### Build Arguments
+
+Each distribution requires a different set of Qt6 package names. The workflow
+injects these via Docker build arguments.
+
+#### apt-based distributions (Debian, Ubuntu, Devuan, Kali)
+
+Typical `PACKAGES` value:
+
+```
+qt6-base-dev qt6-tools-dev qmake6 cmake gcc g++ make pkg-config
+```
+
+Notes:
+- Qt6 packages require `apt-get update` first (injected via `UPDATE_CMD` or
+ handled by the Dockerfile's default `apt-get update` prefix).
+- `--no-install-recommends` is hardcoded in the Dockerfile.
+
+#### dnf-based distributions (Fedora, RHEL family)
+
+Typical `PACKAGES` value:
+
+```
+qt6-qtbase-devel qt6-qttools-devel cmake gcc gcc-c++ make pkgconfig
+```
+
+For RHEL-family distros that need CRB/PowerTools, the workflow may use
+`CUSTOM_INSTALL`:
+
+```sh
+dnf config-manager --enable crb && dnf install -y epel-release && dnf install -y qt6-qtbase-devel ...
+```
+
+#### apk-based distributions (Alpine)
+
+Typical `PACKAGES` value:
+
+```
+qt6-qtbase-dev qt6-qttools-dev cmake gcc g++ make musl-dev pkgconf
+```
+
+Notes:
+- No `libsystemd-dev` equivalent (Alpine does not use systemd).
+- Uses `musl-dev` instead of `libc6-dev`.
+
+#### zypper-based distributions (openSUSE)
+
+Typical `PACKAGES` value:
+
+```
+qt6-base-devel qt6-tools-devel cmake gcc gcc-c++ make pkg-config
+```
+
+#### pacman-based (Arch Linux)
+
+Typical `PACKAGES` value:
+
+```
+qt6-base qt6-tools cmake gcc make pkgconf
+```
+
+#### emerge-based (Gentoo)
+
+Typical `PACKAGES` value:
+
+```
+dev-qt/qtbase dev-qt/qttools dev-build/cmake
+```
+
+#### nix-env-based (NixOS)
+
+Typical `PACKAGES` value:
+
+```
+nixpkgs.qt6.qtbase nixpkgs.qt6.qttools nixpkgs.cmake nixpkgs.gcc nixpkgs.gnumake
+```
+
+#### xbps-based (Void Linux)
+
+Typical `PACKAGES` value:
+
+```
+qt6-base-devel qt6-tools-devel cmake gcc make pkg-config
+```
+
+### Docker Build Command
+
+Each matrix entry runs a Docker build command equivalent to:
+
+```bash
+docker build \
+ --file dockerfiles/<distro>-<tag>.Dockerfile \
+ --build-arg PACKAGES="<package list>" \
+ --build-arg CUSTOM_INSTALL="<optional custom command>" \
+ --build-arg UPDATE_CMD="<optional pre-install command>" \
+ --build-arg CLEAN_CMD="<optional cleanup command>" \
+ --tag ghcr.io/project-tick-infra/images/<target_name>:<target_tag> \
+ .
+```
+
+### BuildKit
+
+Every Dockerfile starts with `# syntax=docker/dockerfile:1.7`, which enables
+BuildKit features. The workflow likely sets `DOCKER_BUILDKIT=1` or uses
+`docker buildx build` for:
+
+- Improved build caching
+- Better error reporting
+- Parallel layer execution
+
+---
+
+## Container Registry
+
+### Registry URL
+
+```
+ghcr.io/project-tick-infra/images/
+```
+
+All images are pushed to the **GitHub Container Registry** (GHCR), which is
+tightly integrated with GitHub Actions.
+
+### Image Naming
+
+The target format for pushed images is:
+
+```
+ghcr.io/project-tick-infra/images/<target_name>:<target_tag>
+```
+
+Where `<target_name>` and `<target_tag>` are derived from the Dockerfile name.
+For example:
+
+| Dockerfile | Image reference |
+|--------------------------------|-----------------------------------------------------------------|
+| `alma-9.Dockerfile` | `ghcr.io/project-tick-infra/images/alma:9` |
+| `alpine-321.Dockerfile` | `ghcr.io/project-tick-infra/images/alpine:3.21` |
+| `debian-bookworm-slim.Dockerfile` | `ghcr.io/project-tick-infra/images/debian:bookworm-slim` |
+| `ubuntu-2404.Dockerfile` | `ghcr.io/project-tick-infra/images/ubuntu:24.04` |
+| `fedora-latest.Dockerfile` | `ghcr.io/project-tick-infra/images/fedora:latest` |
+
+### Authentication
+
+The workflow authenticates to GHCR using the built-in `GITHUB_TOKEN` provided
+by GitHub Actions:
+
+```bash
+echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
+```
+
+### Push
+
+After a successful build (meaning the Qt6 verification gate passed), the
+image is pushed:
+
+```bash
+docker push ghcr.io/project-tick-infra/images/<target_name>:<target_tag>
+```
+
+---
+
+## Consuming the Images
+
+### In GitHub Actions Workflows
+
+Other Project Tick repositories can use these images as build containers:
+
+```yaml
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ container:
+ image: ghcr.io/project-tick-infra/images/ubuntu:24.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Build
+ run: |
+ cmake -B build
+ cmake --build build
+```
+
+### In Docker Compose
+
+```yaml
+services:
+ build:
+ image: ghcr.io/project-tick-infra/images/fedora:42
+ volumes:
+ - .:/workspace
+ working_dir: /workspace
+ command: cmake -B build && cmake --build build
+```
+
+### As a FROM Base
+
+Other Dockerfiles can extend these images:
+
+```dockerfile
+FROM ghcr.io/project-tick-infra/images/debian:bookworm-slim
+RUN apt-get update && apt-get install -y additional-package
+```
+
+### Direct Docker Run
+
+```bash
+docker run --rm -v $(pwd):/workspace -w /workspace \
+ ghcr.io/project-tick-infra/images/alpine:3.22 \
+ cmake -B build && cmake --build build
+```
+
+---
+
+## Build Lifecycle
+
+```
+ ┌────────────────────────────────────────────────────────────────────────┐
+ │ Trigger (push to main or cron) │
+ └────────────────────────┬───────────────────────────────────────────────┘
+ │
+ ▼
+ ┌────────────────────────────────────────────────────────────────────────┐
+ │ Login to GHCR │
+ └────────────────────────┬───────────────────────────────────────────────┘
+ │
+ ▼
+ ┌────────────────────────────────────────────────────────────────────────┐
+ │ Build matrix (parallel) │
+ │ │
+ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
+ │ │ alma:9 │ │ alpine:3.21│ │ fedora:42 │ ... (×35) │
+ │ │ │ │ │ │ │ │
+ │ │ 1. Pull base│ │ 1. Pull base│ │ 1. Pull base│ │
+ │ │ 2. Install │ │ 2. Install │ │ 2. Install │ │
+ │ │ 3. Clean │ │ 3. Clean │ │ 3. Clean │ │
+ │ │ 4. Qt6 gate │ │ 4. Qt6 gate │ │ 4. Qt6 gate │ │
+ │ │ 5. Tag │ │ 5. Tag │ │ 5. Tag │ │
+ │ │ 6. Push │ │ 6. Push │ │ 6. Push │ │
+ │ └─────────────┘ └─────────────┘ └─────────────┘ │
+ └────────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Caching Strategy
+
+### No explicit Docker cache
+
+The current design rebuilds images from scratch on each run. This is
+intentional:
+
+1. **Security** — pulling fresh base images ensures the latest security patches
+ are included.
+2. **Reproducibility** — no stale cached layers that might mask package changes.
+3. **Simplicity** — no cache management, no cache invalidation bugs.
+
+The trade-off is longer build times (~35 parallel builds), which is acceptable
+for a daily cron job.
+
+### Layer efficiency
+
+Each Dockerfile produces a single additional layer on top of the base image.
+This means:
+
+- No intermediate layers to cache or invalidate.
+- The push to GHCR transfers only the diff layer.
+- If the base image hasn't changed and the packages are the same, the layer
+ content will be identical (content-addressable storage deduplicates it).
+
+---
+
+## Monitoring and Failure Handling
+
+### Build Failures
+
+When a Qt6 verification fails:
+
+1. The Docker build exits with code 1.
+2. The GitHub Actions matrix job for that image is marked as failed.
+3. Other matrix jobs continue (matrix builds are independent).
+4. The workflow summary shows which images succeeded and which failed.
+
+### Upstream Image Disappearance
+
+If an upstream base image tag is removed or renamed:
+
+1. The `FROM` instruction fails with "manifest not found".
+2. The build fails for that specific target.
+3. The old image remains in GHCR (it is not deleted).
+4. Fix: update the Dockerfile to use the new tag, or remove the Dockerfile.
+
+### Monitoring
+
+Build status can be monitored via:
+
+- GitHub Actions workflow run history
+- GHCR package page (shows when images were last updated)
+- Downstream CI failures (if an image is stale or missing)
+
+---
+
+## Security Considerations
+
+### Image Provenance
+
+- All Dockerfiles use official, well-known upstream base images.
+- CentOS Stream images are pulled from `quay.io/centos/centos` (the official
+ CentOS mirror), not unofficial sources.
+- No third-party or personal Docker Hub repositories are used as bases.
+
+### No Secrets in Images
+
+- The Dockerfiles do not `COPY` any files from the build context.
+- No `ARG` or `ENV` values contain secrets.
+- The `GITHUB_TOKEN` is only used for GHCR authentication, not passed into builds.
+
+### No Network Access at Runtime
+
+The images are build environments. They do not expose ports, run daemons,
+or listen for connections. Network access is only used during `docker build`
+to download packages from distribution repositories.
+
+### Daily Rebuilds
+
+The daily cron ensures that security patches from upstream distributions are
+incorporated within 24 hours.
+
+---
+
+## Related Documentation
+
+- [Overview](overview.md) — project summary and image list
+- [Architecture](architecture.md) — Dockerfile structure
+- [Base Images](base-images.md) — per-distribution details
+- [Qt6 Verification](qt6-verification.md) — the verification gate
+- [Creating New Images](creating-new-images.md) — adding new distributions
+- [Troubleshooting](troubleshooting.md) — debugging build failures
diff --git a/docs/handbook/images4docker/creating-new-images.md b/docs/handbook/images4docker/creating-new-images.md
new file mode 100644
index 0000000000..95fd1d3d16
--- /dev/null
+++ b/docs/handbook/images4docker/creating-new-images.md
@@ -0,0 +1,338 @@
+# images4docker — Creating New Images
+
+## Overview
+
+This guide explains how to add a new distribution or version to images4docker.
+The process is straightforward because every Dockerfile follows an identical
+template — you only need to know the base image reference and the distribution's
+package manager.
+
+---
+
+## Prerequisites
+
+Before adding a new image, verify:
+
+1. **Qt6 availability** — the distribution must have Qt6 packages in its
+ repositories (default, EPEL, backports, or a reliable third-party source).
+ If Qt6 is not available, the image will fail the verification gate and
+ cannot be used.
+
+2. **Official Docker image** — the distribution must publish an official (or
+ well-maintained) Docker image on Docker Hub, Quay.io, or another public
+ registry.
+
+3. **Active support** — the distribution version should be actively maintained.
+ End-of-life versions should not be added.
+
+---
+
+## Step-by-Step: Adding a New Dockerfile
+
+### Step 1: Determine the Base Image
+
+Find the official Docker image reference on Docker Hub or the distribution's
+documentation. Examples:
+
+| Distribution | Docker Hub reference |
+|---------------------|-----------------------------------|
+| Debian Forky | `debian:forky` or `debian:14` |
+| Ubuntu 26.04 | `ubuntu:26.04` |
+| Fedora 43 | `fedora:43` |
+| Alpine 3.23 | `alpine:3.23` |
+| Rocky Linux 11 | `rockylinux/rockylinux:11` |
+| openSUSE Leap 16.0 | `opensuse/leap:16.0` |
+
+### Step 2: Identify the Package Manager
+
+| Package Manager | Command in template | Clean command |
+|-----------------|----------------------------------------------------------------------------|------------------------------------|
+| `apt` | `apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}` | `rm -rf /var/lib/apt/lists/*` |
+| `dnf` | `dnf install -y ${PACKAGES}` | `dnf clean all \|\| true` |
+| `yum` | `yum install -y ${PACKAGES}` | `yum clean all \|\| true` |
+| `apk` | `apk add --no-cache ${PACKAGES}` | `true` |
+| `zypper` | `zypper --non-interactive refresh; zypper --non-interactive install --no-recommends ${PACKAGES}` | `zypper clean --all \|\| true` |
+| `pacman` | `pacman -Syu --noconfirm --needed ${PACKAGES}` | `pacman -Scc --noconfirm \|\| true`|
+| `emerge` | `emerge --sync; emerge ${PACKAGES}` | `true` |
+| `nix-env` | `nix-env -iA ${PACKAGES}` | `nix-collect-garbage -d \|\| true` |
+| `xbps` | `xbps-install -Sy ${PACKAGES}` | `xbps-remove -O \|\| true` |
+
+### Step 3: Determine the Qt6 Path Variant
+
+Most distributions use the **standard path**:
+
+```sh
+export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"
+```
+
+If the distro installs Qt6 binaries under `/usr/libexec/qt6/`, use the
+**extended path** and add extra checks. Currently, this applies to:
+Oracle Linux, Amazon Linux 2023, Gentoo, NixOS, Void Linux.
+
+### Step 4: Choose the Filename
+
+Follow the naming convention:
+
+```
+<distro>-<version>.Dockerfile
+```
+
+Rules:
+- Lower-case distro name.
+- Version dots stripped for numbers: `3.23` → `323`, `26.04` → `2604`.
+- Use `latest` for rolling-release tags.
+- Use hyphenated variants for slim/special tags: `bookworm-slim`.
+- Capital `D` in `.Dockerfile`.
+
+Examples:
+- `alpine-323.Dockerfile`
+- `ubuntu-2604.Dockerfile`
+- `fedora-43.Dockerfile`
+- `debian-forky-slim.Dockerfile`
+
+### Step 5: Create the Dockerfile
+
+Copy an existing Dockerfile from the same package-manager family and change
+only the `FROM` line.
+
+For example, to add Fedora 43, copy `fedora-42.Dockerfile`:
+
+```dockerfile
+# syntax=docker/dockerfile:1.7
+FROM fedora:43
+
+ARG PACKAGES=
+ARG CUSTOM_INSTALL=
+ARG UPDATE_CMD=
+ARG CLEAN_CMD=
+
+SHELL ["/bin/sh", "-lc"]
+
+RUN set -eux; if [ -n "${UPDATE_CMD}" ]; then sh -lc "${UPDATE_CMD}"; fi; if [ -n "${CUSTOM_INSTALL}" ]; then sh -lc "${CUSTOM_INSTALL}"; elif [ -n "${PACKAGES}" ]; then dnf install -y ${PACKAGES}; fi; if [ -n "${CLEAN_CMD}" ]; then sh -lc "${CLEAN_CMD}"; else dnf clean all || true; fi; export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"; if command -v qmake6 >/dev/null 2>&1 || command -v qmake-qt6 >/dev/null 2>&1 || command -v qtpaths6 >/dev/null 2>&1 || [ -x /usr/lib/qt6/bin/qmake ] || [ -x /usr/lib64/qt6/bin/qmake ] || [ -x /usr/lib/qt6/bin/qtpaths ] || [ -x /usr/lib64/qt6/bin/qtpaths ]; then true; else echo "Qt6 toolchain not found" >&2; exit 1; fi
+
+CMD ["/bin/sh"]
+```
+
+**The only change is `FROM fedora:43`** — everything else is identical to the
+other Fedora Dockerfiles.
+
+### Step 6: Test Locally
+
+Build the image locally to verify it works:
+
+```bash
+docker build \
+ --file dockerfiles/fedora-43.Dockerfile \
+ --build-arg PACKAGES="qt6-qtbase-devel qt6-qttools-devel cmake gcc gcc-c++ make" \
+ --tag test-fedora-43 \
+ .
+```
+
+If the build completes without "Qt6 toolchain not found", the image passes
+the verification gate.
+
+### Step 7: Add to the CI Matrix
+
+Update `.github/workflows/build.yml` to include the new image in the build
+matrix. Add an entry with:
+
+- The Dockerfile path
+- The target image name and tag
+- The `PACKAGES` build argument value
+
+### Step 8: Commit and Push
+
+```bash
+git add dockerfiles/fedora-43.Dockerfile
+git commit -s -m "images4docker: add Fedora 43"
+git push
+```
+
+The workflow will trigger automatically and build the new image.
+
+---
+
+## Adding an Entirely New Distribution
+
+If the distribution uses a package manager not yet represented, you need to:
+
+1. **Determine the install command** — how to install packages non-interactively
+ with automatic dependency resolution.
+
+2. **Determine the cleanup command** — how to remove package caches to minimise
+ image size.
+
+3. **Create the Dockerfile** using the universal template structure.
+
+4. **Check Qt6 binary paths** — run an interactive container from the base image,
+ install Qt6 packages, and find where `qmake6` / `qtpaths6` are located:
+
+ ```bash
+ docker run --rm -it <base_image>:<tag> sh
+ # Inside the container:
+ <install Qt6 packages>
+ find / -name 'qmake*' -o -name 'qtpaths*' 2>/dev/null
+ ```
+
+5. **Choose the path variant** — if Qt6 binaries are in `/usr/libexec/qt6/`,
+ use the extended path. Otherwise, use the standard path.
+
+---
+
+## Adding a Slim Variant
+
+For distributions that offer slim/minimal Docker images (currently only Debian),
+you can add a slim variant:
+
+1. Copy the full variant's Dockerfile.
+2. Change the `FROM` tag to the slim variant (e.g., `debian:trixie-slim`).
+3. Name the file accordingly (e.g., `debian-trixie-slim.Dockerfile`).
+
+The package installation and Qt6 verification are identical — slim variants
+just start with fewer pre-installed packages.
+
+---
+
+## Retiring an Image
+
+When a distribution version reaches end-of-life:
+
+1. Remove the Dockerfile from `dockerfiles/`.
+2. Remove the matrix entry from `.github/workflows/build.yml`.
+3. The image remains in GHCR (it is not automatically deleted).
+4. Downstream CI jobs that reference the image will continue to work until
+ GHCR retention policies remove it.
+
+If you want to keep the Dockerfile for historical reference but stop building
+it, remove it from the workflow matrix but leave the file in the repository.
+
+---
+
+## Checklist for New Images
+
+- [ ] Base image exists on Docker Hub / Quay.io / other public registry
+- [ ] Qt6 packages are available in the distribution's repositories
+- [ ] Dockerfile created with correct `FROM` reference
+- [ ] Dockerfile uses correct package manager command
+- [ ] Correct Qt6 path variant (standard or extended)
+- [ ] File named according to convention: `<distro>-<version>.Dockerfile`
+- [ ] Local build tested successfully
+- [ ] Qt6 verification gate passes
+- [ ] Workflow matrix updated with new entry
+- [ ] `PACKAGES` build argument determined and documented
+- [ ] Committed with `git commit -s` (signed-off)
+- [ ] Push triggers successful CI build
+
+---
+
+## Common Mistakes
+
+### Wrong package manager
+
+Each distribution family has its own package manager. Using `apt-get` in a
+Fedora Dockerfile will fail immediately. Always copy from a Dockerfile in
+the same family.
+
+### Missing repository enablement
+
+Some RHEL-family distributions require enabling CRB/PowerTools or EPEL before
+Qt6 packages are available. Use `CUSTOM_INSTALL` for this:
+
+```sh
+CUSTOM_INSTALL="dnf config-manager --enable crb && dnf install -y epel-release && dnf install -y qt6-qtbase-devel ..."
+```
+
+### Forgetting the Dockerfile syntax directive
+
+Every file must start with:
+
+```dockerfile
+# syntax=docker/dockerfile:1.7
+```
+
+This must be the very first line — no blank lines or comments before it.
+
+### Using a non-existent base tag
+
+Before creating the Dockerfile, verify the tag exists:
+
+```bash
+docker pull <base_image>:<tag>
+```
+
+If the pull fails, the tag does not exist and the CI build will fail.
+
+### CRLF line endings
+
+The `.gitattributes` enforces LF line endings, but if you create files on
+Windows without proper Git configuration, CRLF characters can sneak in and
+cause shell script failures inside the container.
+
+---
+
+## Template Reference
+
+### For apt-based distributions
+
+```dockerfile
+# syntax=docker/dockerfile:1.7
+FROM <base_image>:<tag>
+
+ARG PACKAGES=
+ARG CUSTOM_INSTALL=
+ARG UPDATE_CMD=
+ARG CLEAN_CMD=
+
+SHELL ["/bin/sh", "-lc"]
+
+RUN set -eux; if [ -n "${UPDATE_CMD}" ]; then sh -lc "${UPDATE_CMD}"; fi; if [ -n "${CUSTOM_INSTALL}" ]; then sh -lc "${CUSTOM_INSTALL}"; elif [ -n "${PACKAGES}" ]; then apt-get update; apt-get install -y --no-install-recommends ${PACKAGES}; fi; if [ -n "${CLEAN_CMD}" ]; then sh -lc "${CLEAN_CMD}"; else rm -rf /var/lib/apt/lists/*; fi; export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"; if command -v qmake6 >/dev/null 2>&1 || command -v qmake-qt6 >/dev/null 2>&1 || command -v qtpaths6 >/dev/null 2>&1 || [ -x /usr/lib/qt6/bin/qmake ] || [ -x /usr/lib64/qt6/bin/qmake ] || [ -x /usr/lib/qt6/bin/qtpaths ] || [ -x /usr/lib64/qt6/bin/qtpaths ]; then true; else echo "Qt6 toolchain not found" >&2; exit 1; fi
+
+CMD ["/bin/sh"]
+```
+
+### For dnf-based distributions
+
+```dockerfile
+# syntax=docker/dockerfile:1.7
+FROM <base_image>:<tag>
+
+ARG PACKAGES=
+ARG CUSTOM_INSTALL=
+ARG UPDATE_CMD=
+ARG CLEAN_CMD=
+
+SHELL ["/bin/sh", "-lc"]
+
+RUN set -eux; if [ -n "${UPDATE_CMD}" ]; then sh -lc "${UPDATE_CMD}"; fi; if [ -n "${CUSTOM_INSTALL}" ]; then sh -lc "${CUSTOM_INSTALL}"; elif [ -n "${PACKAGES}" ]; then dnf install -y ${PACKAGES}; fi; if [ -n "${CLEAN_CMD}" ]; then sh -lc "${CLEAN_CMD}"; else dnf clean all || true; fi; export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"; if command -v qmake6 >/dev/null 2>&1 || command -v qmake-qt6 >/dev/null 2>&1 || command -v qtpaths6 >/dev/null 2>&1 || [ -x /usr/lib/qt6/bin/qmake ] || [ -x /usr/lib64/qt6/bin/qmake ] || [ -x /usr/lib/qt6/bin/qtpaths ] || [ -x /usr/lib64/qt6/bin/qtpaths ]; then true; else echo "Qt6 toolchain not found" >&2; exit 1; fi
+
+CMD ["/bin/sh"]
+```
+
+### For apk-based distributions
+
+```dockerfile
+# syntax=docker/dockerfile:1.7
+FROM <base_image>:<tag>
+
+ARG PACKAGES=
+ARG CUSTOM_INSTALL=
+ARG UPDATE_CMD=
+ARG CLEAN_CMD=
+
+SHELL ["/bin/sh", "-lc"]
+
+RUN set -eux; if [ -n "${UPDATE_CMD}" ]; then sh -lc "${UPDATE_CMD}"; fi; if [ -n "${CUSTOM_INSTALL}" ]; then sh -lc "${CUSTOM_INSTALL}"; elif [ -n "${PACKAGES}" ]; then apk add --no-cache ${PACKAGES}; fi; if [ -n "${CLEAN_CMD}" ]; then sh -lc "${CLEAN_CMD}"; else true; fi; export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"; if command -v qmake6 >/dev/null 2>&1 || command -v qmake-qt6 >/dev/null 2>&1 || command -v qtpaths6 >/dev/null 2>&1 || [ -x /usr/lib/qt6/bin/qmake ] || [ -x /usr/lib64/qt6/bin/qmake ] || [ -x /usr/lib/qt6/bin/qtpaths ] || [ -x /usr/lib64/qt6/bin/qtpaths ]; then true; else echo "Qt6 toolchain not found" >&2; exit 1; fi
+
+CMD ["/bin/sh"]
+```
+
+---
+
+## Related Documentation
+
+- [Overview](overview.md) — project summary
+- [Architecture](architecture.md) — Dockerfile template structure
+- [Base Images](base-images.md) — existing images
+- [Qt6 Verification](qt6-verification.md) — the verification gate
+- [CI/CD Integration](ci-cd-integration.md) — workflow details
+- [Troubleshooting](troubleshooting.md) — debugging builds
diff --git a/docs/handbook/images4docker/overview.md b/docs/handbook/images4docker/overview.md
new file mode 100644
index 0000000000..f90d590d3d
--- /dev/null
+++ b/docs/handbook/images4docker/overview.md
@@ -0,0 +1,304 @@
+# images4docker — Overview
+
+## What Is images4docker?
+
+**images4docker** (formally titled "Project Tick Infra Image Repack") is a repository
+that maintains **40 separate Dockerfiles**, each pinned to a distinct upstream Linux
+distribution base image. Every Dockerfile follows an identical, parameterised
+template that:
+
+1. Pulls a fixed upstream base (`FROM <distro>:<tag>`).
+2. Accepts four build-time arguments (`PACKAGES`, `CUSTOM_INSTALL`, `UPDATE_CMD`,
+ `CLEAN_CMD`) so the CI workflow can inject the exact package list at build time.
+3. Installs the requested packages using the distro's native package manager.
+4. Runs a mandatory **Qt6 toolchain verification** gate — the build *fails* if
+ `qmake6`, `qmake-qt6`, or `qtpaths6` cannot be found in any of the standard
+ Qt6 binary paths.
+5. Cleans caches and sets `/bin/sh` as the default `CMD`.
+
+The resulting images are pushed to the GitHub Container Registry at:
+
+```
+ghcr.io/project-tick-infra/images/<target_name>:<target_tag>
+```
+
+These images serve as the **standardised build and test environments** for all
+Project Tick CI/CD pipelines. Rather than installing dependencies inside every
+CI job, the project pre-bakes them into reusable Docker images that are rebuilt
+daily and on every push to `main`.
+
+---
+
+## Why Does images4docker Exist?
+
+CI/CD jobs that compile C/C++ projects with Qt6 dependencies need a consistent
+set of development libraries, headers, and toolchains. Doing `apt-get install`
+or `dnf install` at the start of every job is:
+
+- **Slow** — package downloads and dependency resolution add minutes.
+- **Fragile** — upstream mirrors change, packages get renamed, GPG keys rotate.
+- **Non-reproducible** — two jobs that start seconds apart can get different
+ package versions.
+
+images4docker solves this by providing **pre-built, version-pinned, Qt6-verified
+Docker images** for every distribution the project needs to support.
+
+---
+
+## Repository Location
+
+Within the Project Tick monorepo:
+
+```
+Project-Tick/
+└── images4docker/
+ ├── .gitattributes
+ ├── .gitignore
+ ├── LICENSE (GPL-3.0-or-later)
+ ├── LICENSES/
+ │ └── GPL-3.0-or-later.txt
+ ├── README.md
+ └── dockerfiles/
+ ├── alma-9.Dockerfile
+ ├── alma-10.Dockerfile
+ ├── alpine-319.Dockerfile
+ ├── alpine-320.Dockerfile
+ ├── alpine-321.Dockerfile
+ ├── alpine-322.Dockerfile
+ ├── alpine-latest.Dockerfile
+ ├── amazonlinux-2.Dockerfile
+ ├── amazonlinux-2023.Dockerfile
+ ├── arch-latest.Dockerfile
+ ├── centos-stream9.Dockerfile
+ ├── centos-stream10.Dockerfile
+ ├── debian-bookworm.Dockerfile
+ ├── debian-bookworm-slim.Dockerfile
+ ├── debian-bullseye.Dockerfile
+ ├── debian-bullseye-slim.Dockerfile
+ ├── debian-stable-slim.Dockerfile
+ ├── debian-trixie-slim.Dockerfile
+ ├── devuan-chimaera.Dockerfile
+ ├── devuan-daedalus.Dockerfile
+ ├── fedora-40.Dockerfile
+ ├── fedora-41.Dockerfile
+ ├── fedora-42.Dockerfile
+ ├── fedora-latest.Dockerfile
+ ├── gentoo-stage3.Dockerfile
+ ├── kali-rolling.Dockerfile
+ ├── nix-latest.Dockerfile
+ ├── opensuse-leap-155.Dockerfile
+ ├── opensuse-leap-156.Dockerfile
+ ├── opensuse-tumbleweed.Dockerfile
+ ├── oraclelinux-8.Dockerfile
+ ├── oraclelinux-9.Dockerfile
+ ├── oraclelinux-10.Dockerfile
+ ├── rocky-9.Dockerfile
+ ├── rocky-10.Dockerfile
+ ├── ubuntu-2004.Dockerfile
+ ├── ubuntu-2204.Dockerfile
+ ├── ubuntu-2404.Dockerfile
+ ├── ubuntu-latest.Dockerfile
+ └── void-latest.Dockerfile
+```
+
+---
+
+## Complete Image Inventory
+
+The 40 Dockerfiles map to the following image targets. They are grouped below by
+distribution family.
+
+### RHEL / Enterprise Linux Family
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|------------------------------|------------------------------------------|-----------------|-----------------------------------------|
+| `alma-9.Dockerfile` | `almalinux:9` | `dnf` | RHEL 9 binary-compatible rebuild |
+| `alma-10.Dockerfile` | `almalinux:10` | `dnf` | RHEL 10 binary-compatible rebuild |
+| `centos-stream9.Dockerfile` | `quay.io/centos/centos:stream9` | `dnf` | CentOS Stream 9, upstream of RHEL 9 |
+| `centos-stream10.Dockerfile`| `quay.io/centos/centos:stream10` | `dnf` | CentOS Stream 10, upstream of RHEL 10 |
+| `oraclelinux-8.Dockerfile` | `oraclelinux:8` | `dnf` | Oracle Linux 8 (RHEL 8 compatible) |
+| `oraclelinux-9.Dockerfile` | `oraclelinux:9` | `dnf` | Oracle Linux 9 (RHEL 9 compatible) |
+| `oraclelinux-10.Dockerfile` | `oraclelinux:10` | `dnf` | Oracle Linux 10 (RHEL 10 compatible) |
+| `rocky-9.Dockerfile` | `rockylinux/rockylinux:9` | `dnf` | Rocky Linux 9 (RHEL 9 compatible) |
+| `rocky-10.Dockerfile` | `rockylinux/rockylinux:10` | `dnf` | Rocky Linux 10 (RHEL 10 compatible) |
+
+### Amazon Linux
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|---------------------------------|------------------------|-----------------|---------------------------------|
+| `amazonlinux-2.Dockerfile` | `amazonlinux:2` | `yum` | Amazon Linux 2 (legacy) |
+| `amazonlinux-2023.Dockerfile` | `amazonlinux:2023` | `dnf` | Amazon Linux 2023 (current) |
+
+### Fedora
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|-----------------------------|--------------------|-----------------|---------------------------|
+| `fedora-40.Dockerfile` | `fedora:40` | `dnf` | Fedora 40 |
+| `fedora-41.Dockerfile` | `fedora:41` | `dnf` | Fedora 41 |
+| `fedora-42.Dockerfile` | `fedora:42` | `dnf` | Fedora 42 |
+| `fedora-latest.Dockerfile` | `fedora:latest` | `dnf` | Fedora rolling latest |
+
+### Debian
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|-------------------------------------|---------------------------|-----------------|---------------------------|
+| `debian-bookworm.Dockerfile` | `debian:bookworm` | `apt` | Debian 12 full |
+| `debian-bookworm-slim.Dockerfile` | `debian:bookworm-slim` | `apt` | Debian 12 minimal |
+| `debian-bullseye.Dockerfile` | `debian:bullseye` | `apt` | Debian 11 full |
+| `debian-bullseye-slim.Dockerfile` | `debian:bullseye-slim` | `apt` | Debian 11 minimal |
+| `debian-stable-slim.Dockerfile` | `debian:stable-slim` | `apt` | Current stable, minimal |
+| `debian-trixie-slim.Dockerfile` | `debian:trixie-slim` | `apt` | Debian 13 (testing) slim |
+
+### Devuan
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|----------------------------------|-----------------------------|-----------------|--------------------------|
+| `devuan-chimaera.Dockerfile` | `devuan/devuan:chimaera` | `apt` | Devuan 4 (systemd-free) |
+| `devuan-daedalus.Dockerfile` | `devuan/devuan:daedalus` | `apt` | Devuan 5 (systemd-free) |
+
+### Ubuntu
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|-------------------------------|--------------------|-----------------|------------------------|
+| `ubuntu-2004.Dockerfile` | `ubuntu:20.04` | `apt` | Ubuntu 20.04 LTS |
+| `ubuntu-2204.Dockerfile` | `ubuntu:22.04` | `apt` | Ubuntu 22.04 LTS |
+| `ubuntu-2404.Dockerfile` | `ubuntu:24.04` | `apt` | Ubuntu 24.04 LTS |
+| `ubuntu-latest.Dockerfile` | `ubuntu:latest` | `apt` | Ubuntu rolling latest |
+
+### Kali Linux
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|-------------------------------|-------------------------------------|-----------------|---------------|
+| `kali-rolling.Dockerfile` | `kalilinux/kali-rolling:latest` | `apt` | Kali rolling |
+
+### Alpine Linux
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|--------------------------------|--------------------|-----------------|-----------------|
+| `alpine-319.Dockerfile` | `alpine:3.19` | `apk` | Alpine 3.19 |
+| `alpine-320.Dockerfile` | `alpine:3.20` | `apk` | Alpine 3.20 |
+| `alpine-321.Dockerfile` | `alpine:3.21` | `apk` | Alpine 3.21 |
+| `alpine-322.Dockerfile` | `alpine:3.22` | `apk` | Alpine 3.22 |
+| `alpine-latest.Dockerfile` | `alpine:latest` | `apk` | Alpine edge |
+
+### openSUSE
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|--------------------------------------|-----------------------------------|-----------------|-----------------------|
+| `opensuse-leap-155.Dockerfile` | `opensuse/leap:15.5` | `zypper` | Leap 15.5 |
+| `opensuse-leap-156.Dockerfile` | `opensuse/leap:15.6` | `zypper` | Leap 15.6 |
+| `opensuse-tumbleweed.Dockerfile` | `opensuse/tumbleweed:latest` | `zypper` | Tumbleweed rolling |
+
+### Arch Linux
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|------------------------------|------------------------|-----------------|--------------------|
+| `arch-latest.Dockerfile` | `archlinux:latest` | `pacman` | Arch rolling |
+
+### Gentoo
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|-------------------------------|----------------------------|-----------------|--------------------|
+| `gentoo-stage3.Dockerfile` | `gentoo/stage3:latest` | `emerge` | Gentoo Stage 3 |
+
+### NixOS
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|-----------------------------|-----------------------|-----------------|----------------|
+| `nix-latest.Dockerfile` | `nixos/nix:latest` | `nix-env` | NixOS/Nix |
+
+### Void Linux
+
+| Dockerfile | FROM base image | Package manager | Notes |
+|------------------------------|---------------------------------|-----------------|----------------|
+| `void-latest.Dockerfile` | `voidlinux/voidlinux:latest` | `xbps` | Void rolling |
+
+---
+
+## Distribution Coverage Summary
+
+| Package Manager Family | Count | Distributions |
+|------------------------|-------|--------------------------------------------------------------------------------|
+| `dnf` | 14 | AlmaLinux, CentOS Stream, Oracle Linux, Rocky Linux, Fedora, Amazon Linux 2023 |
+| `apt` | 14 | Debian, Ubuntu, Devuan, Kali |
+| `apk` | 5 | Alpine Linux |
+| `zypper` | 3 | openSUSE Leap, openSUSE Tumbleweed |
+| `pacman` | 1 | Arch Linux |
+| `yum` | 1 | Amazon Linux 2 |
+| `emerge` | 1 | Gentoo |
+| `nix-env` | 1 | NixOS/Nix |
+| `xbps` | 1 | Void Linux |
+| **Total** | **40**| |
+
+---
+
+## Workflow Automation
+
+The images are built by a GitHub Actions workflow named **"Repack Existing Images"**
+(`.github/workflows/build.yml`). It triggers on:
+
+- **Push to `main`** — when any `Dockerfile`, the workflow YAML, or `README.md` changes.
+- **Daily schedule** — cron at `03:17 UTC` every day.
+
+On each run the workflow builds and pushes the **Qt6-compatible set** (currently
+35 of the 40 targets — some older bases are excluded because they cannot provide
+Qt6 reliably).
+
+---
+
+## Licensing
+
+images4docker is licensed under the **GNU General Public License v3.0 or later**
+(GPL-3.0-or-later). The full license text is stored in:
+
+- `LICENSE` (top-level)
+- `LICENSES/GPL-3.0-or-later.txt`
+
+---
+
+## Key Design Decisions
+
+1. **One Dockerfile per base image** — no multi-stage, no shared `FROM`. This
+ keeps each image independent and debuggable.
+
+2. **Build-arg-driven customisation** — the Dockerfile itself contains only the
+ package-manager dispatch and the Qt6 check. All concrete package lists are
+ injected via `--build-arg PACKAGES=...` by the CI workflow.
+
+3. **Qt6 or bust** — there is no Qt5 fallback. If a distribution does not
+ package Qt6 tools, the build intentionally fails.
+
+4. **Syntax directive** — every Dockerfile starts with `# syntax=docker/dockerfile:1.7`
+ to opt into BuildKit features.
+
+5. **LF line endings enforced** — `.gitattributes` sets `* text=auto eol=lf` and
+ marks `*.Dockerfile` as text to prevent CRLF issues on Windows.
+
+6. **Minimal `.gitignore`** — only `*.log`, `*.tmp`, and `.env` are ignored.
+
+---
+
+## Quick Reference
+
+| Item | Value |
+|-------------------------|---------------------------------------------------------|
+| Total Dockerfiles | 40 |
+| Active CI matrix | ~35 (Qt6-capable targets) |
+| Registry | `ghcr.io/project-tick-infra/images/` |
+| Build trigger (push) | Changes to `dockerfiles/`, workflow, `README.md` |
+| Build trigger (cron) | Daily at `03:17 UTC` |
+| Qt6 requirement | Mandatory — build fails without it |
+| License | GPL-3.0-or-later |
+| Dockerfile syntax | `docker/dockerfile:1.7` (BuildKit) |
+| Default CMD | `/bin/sh` |
+
+---
+
+## Related Documentation
+
+- [Architecture](architecture.md) — directory layout and Dockerfile structure
+- [Base Images](base-images.md) — per-image deep dive
+- [Qt6 Verification](qt6-verification.md) — how the Qt6 gate works
+- [CI/CD Integration](ci-cd-integration.md) — workflow and registry details
+- [Creating New Images](creating-new-images.md) — how to add a new distribution
+- [Troubleshooting](troubleshooting.md) — common issues and debugging
diff --git a/docs/handbook/images4docker/qt6-verification.md b/docs/handbook/images4docker/qt6-verification.md
new file mode 100644
index 0000000000..b29343e19f
--- /dev/null
+++ b/docs/handbook/images4docker/qt6-verification.md
@@ -0,0 +1,283 @@
+# images4docker — Qt6 Verification
+
+## Purpose
+
+Every Dockerfile in images4docker includes a mandatory **Qt6 toolchain
+verification gate**. This gate runs at the end of the `RUN` instruction,
+after all packages have been installed. If the gate fails, the entire
+Docker build fails — there is **no fallback to Qt5** and no option to skip
+the check.
+
+This ensures that every image published to `ghcr.io/project-tick-infra/images/`
+is guaranteed to have a working Qt6 toolchain available.
+
+---
+
+## How the Gate Works
+
+### Step 1: PATH Extension
+
+Before checking for Qt6 binaries, the `PATH` environment variable is extended
+to include all known Qt6 installation directories:
+
+**Standard path extension** (most distributions):
+
+```sh
+export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"
+```
+
+**Extended path** (Amazon Linux 2023, Gentoo, NixOS, Oracle Linux, Void Linux):
+
+```sh
+export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/usr/libexec/qt6:/opt/qt6/bin:/root/.nix-profile/bin"
+```
+
+The extended variant adds `/usr/libexec/qt6` for distributions where Qt6
+installs its binaries under `libexec`.
+
+### Step 2: Binary Search
+
+The gate checks for the presence of Qt6 binaries using a multi-pronged approach:
+
+**Command-based checks** (via `command -v` — checks `$PATH`):
+
+```sh
+command -v qmake6 >/dev/null 2>&1
+command -v qmake-qt6 >/dev/null 2>&1
+command -v qtpaths6 >/dev/null 2>&1
+```
+
+**Absolute path checks** (via `[ -x ... ]` — checks specific filesystem locations):
+
+Standard set:
+```sh
+[ -x /usr/lib/qt6/bin/qmake ]
+[ -x /usr/lib64/qt6/bin/qmake ]
+[ -x /usr/lib/qt6/bin/qtpaths ]
+[ -x /usr/lib64/qt6/bin/qtpaths ]
+```
+
+Extended set (Oracle Linux, Gentoo, NixOS, Amazon Linux 2023, Void Linux):
+```sh
+[ -x /usr/libexec/qt6/qmake ]
+[ -x /usr/libexec/qt6/qtpaths ]
+```
+
+### Step 3: Pass or Fail
+
+All checks are combined with `||` (logical OR). If **any single check**
+succeeds, the gate passes:
+
+```sh
+if command -v qmake6 >/dev/null 2>&1 \
+ || command -v qmake-qt6 >/dev/null 2>&1 \
+ || command -v qtpaths6 >/dev/null 2>&1 \
+ || [ -x /usr/lib/qt6/bin/qmake ] \
+ || [ -x /usr/lib64/qt6/bin/qmake ] \
+ || [ -x /usr/lib/qt6/bin/qtpaths ] \
+ || [ -x /usr/lib64/qt6/bin/qtpaths ]; then
+ true;
+ else
+ echo "Qt6 toolchain not found" >&2;
+ exit 1;
+ fi
+```
+
+If **all checks fail**, the gate prints "Qt6 toolchain not found" to stderr
+and exits with code 1, which causes the Docker `RUN` instruction to fail
+and aborts the build.
+
+---
+
+## Actual Dockerfile Snippet
+
+Here is the exact verification code as it appears in the Dockerfiles (shown
+in formatted form — the actual files have it on a single `RUN` line):
+
+```sh
+set -eux
+
+# ... package installation ...
+
+# PATH extension
+export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"
+
+# Qt6 verification gate
+if command -v qmake6 >/dev/null 2>&1 \
+ || command -v qmake-qt6 >/dev/null 2>&1 \
+ || command -v qtpaths6 >/dev/null 2>&1 \
+ || [ -x /usr/lib/qt6/bin/qmake ] \
+ || [ -x /usr/lib64/qt6/bin/qmake ] \
+ || [ -x /usr/lib/qt6/bin/qtpaths ] \
+ || [ -x /usr/lib64/qt6/bin/qtpaths ]; then
+ true
+else
+ echo "Qt6 toolchain not found" >&2
+ exit 1
+fi
+```
+
+---
+
+## Why These Specific Binaries?
+
+### qmake6
+
+`qmake6` is the Qt 6 version of the qmake build system. On most distributions,
+the `qt6-base-dev` or `qt6-qtbase-devel` package installs it as `qmake6`.
+
+### qmake-qt6
+
+Some distributions (especially Debian-based ones) install the Qt6 qmake as
+`qmake-qt6` instead of `qmake6`. The `-qt6` suffix is a Debian packaging
+convention to allow multiple Qt versions to coexist.
+
+### qtpaths6
+
+`qtpaths6` is a Qt6 utility that reports installation paths (plugin directory,
+library directory, etc.). It is a lightweight Qt6 binary that confirms the
+Qt6 runtime is properly installed, without needing a full build tool like qmake.
+
+### Why check absolute paths too?
+
+On some distributions, Qt6 binaries are installed to non-standard locations
+that may not be in `$PATH`:
+
+| Path | Used by |
+|-------------------------------|--------------------------------------------------|
+| `/usr/lib/qt6/bin/qmake` | Debian, Ubuntu (32-bit or arch-independent) |
+| `/usr/lib64/qt6/bin/qmake` | Fedora, RHEL family (64-bit lib directory) |
+| `/usr/libexec/qt6/qmake` | Oracle Linux, Gentoo, Amazon Linux 2023, Void |
+| `/usr/lib/qt6/bin/qtpaths` | Debian, Ubuntu |
+| `/usr/lib64/qt6/bin/qtpaths` | Fedora, RHEL family |
+| `/usr/libexec/qt6/qtpaths` | Oracle Linux, Gentoo, Amazon Linux 2023, Void |
+| `/opt/qt6/bin/` | Custom Qt6 installations (from source or installer)|
+| `/root/.nix-profile/bin/` | NixOS (Nix profile symlinks) |
+
+---
+
+## Which Dockerfiles Use Which Path Variant?
+
+### Standard Qt6 paths (7 check locations, 4 PATH dirs)
+
+Used by 30 Dockerfiles:
+
+- All **AlmaLinux** images (alma-9, alma-10)
+- All **Alpine** images (alpine-319 through alpine-latest)
+- **Arch** (arch-latest)
+- All **CentOS Stream** images (centos-stream9, centos-stream10)
+- All **Debian** images (bookworm, bookworm-slim, bullseye, bullseye-slim, stable-slim, trixie-slim)
+- All **Devuan** images (devuan-chimaera, devuan-daedalus)
+- All **Fedora** images (fedora-40, fedora-41, fedora-42, fedora-latest)
+- **Kali** (kali-rolling)
+- All **openSUSE** images (opensuse-leap-155, opensuse-leap-156, opensuse-tumbleweed)
+- All **Rocky** images (rocky-9, rocky-10)
+- All **Ubuntu** images (ubuntu-2004, ubuntu-2204, ubuntu-2404, ubuntu-latest)
+- **Amazon Linux 2** (amazonlinux-2)
+
+```sh
+export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin:/root/.nix-profile/bin"
+
+# Checks: qmake6, qmake-qt6, qtpaths6 (via command -v)
+# plus: /usr/lib/qt6/bin/qmake, /usr/lib64/qt6/bin/qmake,
+# /usr/lib/qt6/bin/qtpaths, /usr/lib64/qt6/bin/qtpaths
+```
+
+### Extended Qt6 paths (9 check locations, 5 PATH dirs)
+
+Used by 6 Dockerfiles:
+
+- **Amazon Linux 2023** (amazonlinux-2023)
+- **Gentoo** (gentoo-stage3)
+- **NixOS** (nix-latest)
+- **Oracle Linux** 8, 9, 10 (oraclelinux-8, oraclelinux-9, oraclelinux-10)
+- **Void Linux** (void-latest)
+
+```sh
+export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/usr/libexec/qt6:/opt/qt6/bin:/root/.nix-profile/bin"
+
+# Checks: qmake6, qmake-qt6, qtpaths6 (via command -v)
+# plus: /usr/lib/qt6/bin/qmake, /usr/lib64/qt6/bin/qmake,
+# /usr/lib/qt6/bin/qtpaths, /usr/lib64/qt6/bin/qtpaths,
+# /usr/libexec/qt6/qmake, /usr/libexec/qt6/qtpaths
+```
+
+---
+
+## Images Excluded from Active CI Matrix
+
+The README notes that approximately **35 of 40** images are in the active CI
+build matrix. The ~5 excluded images are those where Qt6 packages are not
+reliably available:
+
+| Image | Reason for exclusion |
+|----------------------------------|-----------------------------------------------------|
+| `amazonlinux-2` | Based on RHEL 7 era; no Qt6 in default repos |
+| `debian-bullseye` / `bullseye-slim` | Debian 11 shipped Qt 5.15, not Qt6 |
+| `devuan-chimaera` | Based on Debian Bullseye, same Qt6 limitation |
+| `ubuntu-2004` | Ubuntu 20.04 does not ship Qt6 |
+
+These images are still maintained in the repository for potential future use
+(e.g., if Qt6 becomes available via backports or PPAs), but they are not built
+in the regular CI workflow.
+
+---
+
+## Failure Behaviour
+
+When the Qt6 gate fails:
+
+1. The `echo "Qt6 toolchain not found" >&2` message is printed to stderr.
+2. `exit 1` terminates the shell with a non-zero exit code.
+3. The `RUN` instruction fails.
+4. Docker aborts the build.
+5. The CI workflow reports a build failure for that image target.
+6. The failed image is **not** pushed to the container registry.
+
+Because `set -eux` is active at the top of the `RUN` block:
+- `-e`: Exit immediately if any command fails.
+- `-u`: Treat unset variables as errors.
+- `-x`: Print each command before executing (useful for debugging in CI logs).
+
+---
+
+## Why No Qt5 Fallback?
+
+The project has made a deliberate decision to require Qt6:
+
+1. **Qt5 is end-of-life** — Qt 5.15 LTS support ended. New features and
+ security fixes only go into Qt6.
+2. **API consistency** — supporting both Qt5 and Qt6 would require conditional
+ compilation paths, increasing maintenance burden.
+3. **Clear signal** — if a distribution cannot provide Qt6, it is too old to
+ be a supported build target.
+
+---
+
+## Verifying Qt6 Locally
+
+To test whether Qt6 would be found in a specific image, you can run:
+
+```bash
+docker run --rm <image> sh -c '
+ export PATH="$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin:/opt/qt6/bin"
+ echo "qmake6: $(command -v qmake6 2>/dev/null || echo NOT FOUND)"
+ echo "qmake-qt6: $(command -v qmake-qt6 2>/dev/null || echo NOT FOUND)"
+ echo "qtpaths6: $(command -v qtpaths6 2>/dev/null || echo NOT FOUND)"
+ for p in /usr/lib/qt6/bin/qmake /usr/lib64/qt6/bin/qmake \
+ /usr/lib/qt6/bin/qtpaths /usr/lib64/qt6/bin/qtpaths \
+ /usr/libexec/qt6/qmake /usr/libexec/qt6/qtpaths; do
+ [ -x "$p" ] && echo "Found: $p" || echo "Missing: $p"
+ done
+'
+```
+
+---
+
+## Related Documentation
+
+- [Overview](overview.md) — project summary
+- [Architecture](architecture.md) — Dockerfile template structure
+- [Base Images](base-images.md) — per-image deep dive
+- [CI/CD Integration](ci-cd-integration.md) — how the workflow builds and verifies
+- [Troubleshooting](troubleshooting.md) — debugging Qt6 failures
diff --git a/docs/handbook/images4docker/troubleshooting.md b/docs/handbook/images4docker/troubleshooting.md
new file mode 100644
index 0000000000..894a89488e
--- /dev/null
+++ b/docs/handbook/images4docker/troubleshooting.md
@@ -0,0 +1,395 @@
+# images4docker — Troubleshooting
+
+## Overview
+
+This document covers common issues encountered when building, pushing, or
+using images4docker images, and how to diagnose and resolve them.
+
+---
+
+## Build Failures
+
+### "Qt6 toolchain not found"
+
+**Symptom:** The Docker build fails with:
+
+```
+Qt6 toolchain not found
+```
+
+**Cause:** The Qt6 verification gate ran after package installation and could
+not find any of the expected binaries (`qmake6`, `qmake-qt6`, `qtpaths6`) in
+any of the searched paths.
+
+**Diagnosis:**
+
+1. Check that the correct `PACKAGES` build argument was passed. For example,
+ on Debian/Ubuntu, the package is `qt6-base-dev`, not `qt6-base`:
+
+ ```bash
+ docker build \
+ --file dockerfiles/debian-bookworm.Dockerfile \
+ --build-arg PACKAGES="qt6-base-dev" \
+ --tag test .
+ ```
+
+2. Check that Qt6 is actually available in the distribution's repositories.
+ Run an interactive container:
+
+ ```bash
+ docker run --rm -it debian:bookworm sh
+ apt-get update
+ apt-cache search qt6
+ ```
+
+3. For RHEL-family distributions, check if CRB/PowerTools or EPEL needs to be
+ enabled. Use `CUSTOM_INSTALL`:
+
+ ```bash
+ docker build \
+ --file dockerfiles/alma-9.Dockerfile \
+ --build-arg CUSTOM_INSTALL="dnf config-manager --enable crb && dnf install -y epel-release qt6-qtbase-devel" \
+ --tag test .
+ ```
+
+4. Verify the Qt6 binary location inside the container:
+
+ ```bash
+ docker run --rm -it <base_image>:<tag> sh
+ # Install Qt6 packages manually, then:
+ find / -name 'qmake*' -o -name 'qtpaths*' 2>/dev/null
+ ```
+
+ If the binaries are in an unexpected path, the Dockerfile may need the
+ extended path variant (with `/usr/libexec/qt6`).
+
+**Resolution:**
+- Ensure correct Qt6 package names for the distribution.
+- Enable required repositories (CRB, EPEL, backports).
+- If the distribution does not provide Qt6 at all, it should be excluded from
+ the active CI matrix.
+
+---
+
+### "manifest not found" or "pull access denied"
+
+**Symptom:** The build fails immediately at the `FROM` instruction:
+
+```
+ERROR: pull access denied, repository does not exist or may require authentication
+```
+
+or:
+
+```
+ERROR: manifest for <image>:<tag> not found
+```
+
+**Cause:** The upstream base image tag no longer exists. This can happen when:
+- A distribution reaches end-of-life and its Docker image is removed.
+- A distribution renames or reorganises its official images.
+- A registry changes its namespace (as CentOS did from Docker Hub to Quay.io).
+
+**Diagnosis:**
+
+```bash
+docker pull <base_image>:<tag>
+```
+
+If this fails, the tag is no longer available.
+
+**Resolution:**
+- Check the distribution's official Docker Hub / Quay.io page for the current
+ tag names.
+- Update the `FROM` line in the Dockerfile.
+- If the distribution is EOL, remove the Dockerfile and its workflow matrix entry.
+
+---
+
+### Package installation fails
+
+**Symptom:** The `dnf install`, `apt-get install`, or other package manager
+command fails with "No package found" or similar.
+
+**Cause:** Package names vary between distribution versions. For example:
+- Fedora uses `qt6-qtbase-devel`, Debian uses `qt6-base-dev`.
+- Some RHEL versions need `qt6-qtbase-devel`, others `qt6-qtbase-devel`.
+- Alpine uses `qt6-qtbase-dev` (with musl, not glibc).
+
+**Diagnosis:**
+
+Run an interactive container and search for packages:
+
+```bash
+# Debian/Ubuntu
+docker run --rm -it debian:bookworm sh -c 'apt-get update && apt-cache search qt6'
+
+# Fedora/RHEL
+docker run --rm -it fedora:42 sh -c 'dnf search qt6'
+
+# Alpine
+docker run --rm -it alpine:3.22 sh -c 'apk search qt6'
+
+# openSUSE
+docker run --rm -it opensuse/tumbleweed:latest sh -c 'zypper search qt6'
+
+# Arch
+docker run --rm -it archlinux:latest sh -c 'pacman -Ss qt6'
+```
+
+**Resolution:**
+- Use the correct package names for the distribution.
+- If a package was renamed, update the `PACKAGES` in the workflow matrix.
+- If a package is missing, try `CUSTOM_INSTALL` to enable additional repositories.
+
+---
+
+### "set -u: unbound variable"
+
+**Symptom:** The build fails with a message about an unbound variable.
+
+**Cause:** The `set -u` flag (part of `set -eux`) causes the shell to fail
+on any reference to an unset variable. However, all `ARG` values default to
+empty strings (`ARG PACKAGES=`), so this should not normally occur.
+
+**Diagnosis:** Check that all `ARG` declarations are present in the Dockerfile
+and that they have default values:
+
+```dockerfile
+ARG PACKAGES=
+ARG CUSTOM_INSTALL=
+ARG UPDATE_CMD=
+ARG CLEAN_CMD=
+```
+
+**Resolution:** Ensure all four `ARG` lines are present with `=` (defaulting
+to empty string).
+
+---
+
+## Push Failures
+
+### GHCR authentication failure
+
+**Symptom:** `docker push` fails with "authentication required" or
+"denied: permission denied".
+
+**Cause:**
+- The `GITHUB_TOKEN` does not have `packages: write` permission.
+- The workflow permissions are not configured correctly.
+
+**Resolution:**
+- Ensure the workflow has:
+ ```yaml
+ permissions:
+ packages: write
+ contents: read
+ ```
+- Verify the repository settings allow GitHub Actions to push packages.
+
+---
+
+### Image size unexpectedly large
+
+**Symptom:** A pushed image is much larger than expected.
+
+**Cause:** The cleanup command did not run, or it was overridden with an
+empty / incorrect value.
+
+**Diagnosis:**
+
+Check the image size:
+
+```bash
+docker images --format "{{.Repository}}:{{.Tag}} {{.Size}}" | grep project-tick
+```
+
+Compare with the base image size:
+
+```bash
+docker images --format "{{.Repository}}:{{.Tag}} {{.Size}}" | grep <base_image>
+```
+
+**Resolution:**
+- Verify the cleanup command is correct for the package manager.
+- Check that `CLEAN_CMD` is not set to an empty string (which would skip
+ the default cleanup).
+- For apt-based images, ensure `rm -rf /var/lib/apt/lists/*` runs after install.
+- For dnf-based images, ensure `dnf clean all` runs.
+
+---
+
+## Runtime Issues
+
+### "command not found" in CI jobs
+
+**Symptom:** A CI job using a images4docker image reports "command not found"
+for tools like `cmake`, `gcc`, or `make`.
+
+**Cause:** The image was built without the complete `PACKAGES` list, or the
+build argument was incorrect.
+
+**Diagnosis:**
+
+```bash
+docker run --rm ghcr.io/project-tick-infra/images/<name>:<tag> which cmake
+```
+
+**Resolution:**
+- Check the `PACKAGES` build argument in the workflow matrix.
+- Rebuild the image with the correct package list.
+
+---
+
+### Qt6 binaries not in PATH
+
+**Symptom:** CI jobs report `qmake6: command not found` even though the
+image was built successfully.
+
+**Cause:** The `export PATH=...` in the Dockerfile's `RUN` instruction only
+applies during the build. It does **not** persist as an environment variable
+in the final image.
+
+**Diagnosis:**
+
+```bash
+docker run --rm ghcr.io/project-tick-infra/images/<name>:<tag> sh -c 'echo $PATH'
+```
+
+If the Qt6 paths are not in the output, they need to be set at runtime.
+
+**Resolution:**
+- In CI jobs, explicitly set the PATH:
+ ```yaml
+ env:
+ PATH: "$PATH:/usr/lib/qt6/bin:/usr/lib64/qt6/bin"
+ ```
+- Or use the full path to the binary: `/usr/lib64/qt6/bin/qmake6`.
+- Consider adding an `ENV PATH=...` line to the Dockerfiles (this is a
+ design decision — currently not done to keep images generic).
+
+---
+
+### Alpine: "Error relocating" or linking errors
+
+**Symptom:** Compiled binaries on Alpine fail at runtime with relocation
+errors or "not found" for shared libraries.
+
+**Cause:** Alpine uses `musl libc`, not `glibc`. Binaries compiled on
+glibc-based distributions are not compatible.
+
+**Resolution:**
+- Ensure all dependencies are compiled against musl on Alpine.
+- This is not an images4docker issue per se — it is an Alpine-specific constraint.
+- Use a glibc-based image (Debian, Fedora, etc.) if glibc compatibility is required.
+
+---
+
+## Debugging Techniques
+
+### Interactive Build
+
+Add `--progress=plain` to see the full build output:
+
+```bash
+docker build --progress=plain \
+ --file dockerfiles/<distro>.Dockerfile \
+ --build-arg PACKAGES="..." \
+ --tag test .
+```
+
+### Shell into a Failed Build
+
+If the build fails, start a container from the base image and manually
+run the install commands:
+
+```bash
+docker run --rm -it <base_image>:<tag> sh
+```
+
+Then manually execute the RUN commands to see where they fail.
+
+### Check Build Arguments
+
+Print the build arguments to verify they are being passed correctly:
+
+```bash
+docker build \
+ --file dockerfiles/<distro>.Dockerfile \
+ --build-arg PACKAGES="qt6-base-dev cmake" \
+ --build-arg UPDATE_CMD="echo UPDATE_CMD is set" \
+ --tag test .
+```
+
+The `set -x` (from `set -eux`) will print each command, including the
+expanded variable values.
+
+### Compare with a Working Image
+
+If a new Dockerfile fails but a similar one works, diff them:
+
+```bash
+diff dockerfiles/fedora-42.Dockerfile dockerfiles/fedora-43.Dockerfile
+```
+
+The only difference should be the `FROM` line.
+
+### Inspect a Built Image
+
+```bash
+# Check image layers
+docker history ghcr.io/project-tick-infra/images/<name>:<tag>
+
+# Check image metadata
+docker inspect ghcr.io/project-tick-infra/images/<name>:<tag>
+
+# Get a shell in the image
+docker run --rm -it ghcr.io/project-tick-infra/images/<name>:<tag> sh
+```
+
+---
+
+## Known Limitations
+
+### No Qt5 fallback
+
+If a distribution cannot provide Qt6, there is no option to fall back to Qt5.
+The image will fail the verification gate. This is intentional.
+
+### No multi-arch support
+
+Currently all images are built for `linux/amd64` only. There is no
+`linux/arm64` or other architecture support.
+
+### PATH not persisted
+
+The `export PATH=...` in the `RUN` instruction does not create a persistent
+`ENV` in the image. CI jobs may need to set `PATH` themselves.
+
+### Single-layer constraint
+
+All package installation happens in a single `RUN` instruction. If you need
+to debug which specific package fails, you must do so interactively rather
+than by inserting intermediate `RUN` instructions (which would change the
+template structure).
+
+---
+
+## Getting Help
+
+1. Check the CI workflow logs (GitHub Actions) for the exact error message.
+2. Reproduce locally with `docker build --progress=plain`.
+3. Run an interactive container from the base image to test package availability.
+4. Consult the distribution's documentation for Qt6 package names.
+5. Review the [Base Images](base-images.md) documentation for distribution-specific notes.
+
+---
+
+## Related Documentation
+
+- [Overview](overview.md) — project summary
+- [Architecture](architecture.md) — Dockerfile template structure
+- [Base Images](base-images.md) — per-distribution details
+- [Qt6 Verification](qt6-verification.md) — the verification gate
+- [CI/CD Integration](ci-cd-integration.md) — workflow details
+- [Creating New Images](creating-new-images.md) — adding new distributions
diff --git a/docs/handbook/json4cpp/architecture.md b/docs/handbook/json4cpp/architecture.md
new file mode 100644
index 0000000000..d0140b8bbf
--- /dev/null
+++ b/docs/handbook/json4cpp/architecture.md
@@ -0,0 +1,613 @@
+# json4cpp — Architecture
+
+## Overview
+
+The json4cpp library (nlohmann/json 3.12.0) is organized as a heavily
+templatized, header-only C++ library. The architecture revolves around a single
+class template, `basic_json`, whose template parameters allow customization of
+every underlying storage type. This document describes the internal structure,
+class hierarchy, memory layout, and key design patterns.
+
+## The `basic_json` Class Template
+
+### Template Declaration
+
+The full template declaration in `include/nlohmann/json_fwd.hpp`:
+
+```cpp
+template<
+ template<typename U, typename V, typename... Args> class ObjectType = std::map,
+ template<typename U, typename... Args> class ArrayType = std::vector,
+ class StringType = std::string,
+ class BooleanType = bool,
+ class NumberIntegerType = std::int64_t,
+ class NumberUnsignedType = std::uint64_t,
+ class NumberFloatType = double,
+ template<typename U> class AllocatorType = std::allocator,
+ template<typename T, typename SFINAE = void> class JSONSerializer = adl_serializer,
+ class BinaryType = std::vector<std::uint8_t>,
+ class CustomBaseClass = void
+>
+class basic_json;
+```
+
+Each parameter controls a specific aspect:
+
+| Parameter | Purpose | Default |
+|---|---|---|
+| `ObjectType` | Map template for JSON objects | `std::map` |
+| `ArrayType` | Sequential container for JSON arrays | `std::vector` |
+| `StringType` | String type for keys and string values | `std::string` |
+| `BooleanType` | Boolean storage | `bool` |
+| `NumberIntegerType` | Signed integer type | `std::int64_t` |
+| `NumberUnsignedType` | Unsigned integer type | `std::uint64_t` |
+| `NumberFloatType` | Floating-point type | `double` |
+| `AllocatorType` | Allocator template | `std::allocator` |
+| `JSONSerializer` | Serializer template for custom types | `adl_serializer` |
+| `BinaryType` | Container for binary data | `std::vector<std::uint8_t>` |
+| `CustomBaseClass` | Optional base class for extension | `void` |
+
+### Default Type Aliases
+
+Two default specializations are defined:
+
+```cpp
+using json = basic_json<>;
+using ordered_json = basic_json<nlohmann::ordered_map>;
+```
+
+The `ordered_json` type preserves insertion order by using `ordered_map`
+instead of `std::map`.
+
+### Derived Type Aliases
+
+Within `basic_json`, the following public type aliases expose the actual
+types used for JSON value storage:
+
+```cpp
+using object_t = ObjectType<StringType, basic_json,
+ default_object_comparator_t,
+ AllocatorType<std::pair<const StringType, basic_json>>>;
+using array_t = ArrayType<basic_json, AllocatorType<basic_json>>;
+using string_t = StringType;
+using boolean_t = BooleanType;
+using number_integer_t = NumberIntegerType;
+using number_unsigned_t = NumberUnsignedType;
+using number_float_t = NumberFloatType;
+using binary_t = nlohmann::byte_container_with_subtype<BinaryType>;
+using object_comparator_t = detail::actual_object_comparator_t<basic_json>;
+```
+
+The `default_object_comparator_t` depends on the C++ standard level:
+- C++14 and above: `std::less<>` (transparent comparator)
+- C++11: `std::less<StringType>`
+
+## Inheritance Structure
+
+### Base Class: `json_base_class`
+
+`basic_json` inherits from `detail::json_base_class<CustomBaseClass>`:
+
+```cpp
+class basic_json
+ : public ::nlohmann::detail::json_base_class<CustomBaseClass>
+```
+
+When `CustomBaseClass` is `void` (the default), this is an empty base class
+that adds no overhead. When a user-provided type is specified, it becomes
+the base, enabling extension without modifying the library.
+
+### Friend Declarations
+
+The class declares friendship with its internal collaborators:
+
+```cpp
+template<detail::value_t> friend struct detail::external_constructor;
+template<typename> friend class ::nlohmann::json_pointer;
+template<typename BasicJsonType, typename InputType>
+ friend class ::nlohmann::detail::parser;
+friend ::nlohmann::detail::serializer<basic_json>;
+template<typename BasicJsonType>
+ friend class ::nlohmann::detail::iter_impl;
+template<typename BasicJsonType, typename CharType>
+ friend class ::nlohmann::detail::binary_writer;
+template<typename BasicJsonType, typename InputType, typename SAX>
+ friend class ::nlohmann::detail::binary_reader;
+template<typename BasicJsonType, typename InputAdapterType>
+ friend class ::nlohmann::detail::json_sax_dom_parser;
+template<typename BasicJsonType, typename InputAdapterType>
+ friend class ::nlohmann::detail::json_sax_dom_callback_parser;
+friend class ::nlohmann::detail::exception;
+```
+
+## Memory Layout: `json_value` Union
+
+### The `json_value` Union
+
+The core storage is a union that keeps the `basic_json` object at minimum
+size:
+
+```cpp
+union json_value
+{
+ object_t* object; // pointer — 8 bytes
+ array_t* array; // pointer — 8 bytes
+ string_t* string; // pointer — 8 bytes
+ binary_t* binary; // pointer — 8 bytes
+ boolean_t boolean; // typically 1 byte
+ number_integer_t number_integer; // 8 bytes
+ number_unsigned_t number_unsigned; // 8 bytes
+ number_float_t number_float; // 8 bytes
+
+ json_value() = default;
+ json_value(boolean_t v) noexcept;
+ json_value(number_integer_t v) noexcept;
+ json_value(number_unsigned_t v) noexcept;
+ json_value(number_float_t v) noexcept;
+ json_value(value_t t); // creates empty container for compound types
+
+ void destroy(value_t t); // type-aware destructor
+};
+```
+
+**Key design decisions:**
+
+1. **Pointers for variable-length types.** Objects, arrays, strings, and binaries
+ are stored as pointers. This keeps the union at 8 bytes on 64-bit systems
+ and avoids calling constructors/destructors for the union members of
+ non-active types.
+
+2. **Value semantics for scalars.** Booleans, integers, and floats are stored
+ directly in the union without indirection.
+
+3. **Heap allocation via `create<T>()`.** The private static method
+ `basic_json::create<T>(Args...)` uses the `AllocatorType` to allocate
+ and construct heap objects.
+
+### The `data` Struct
+
+The union is wrapped in a `data` struct that pairs it with the type tag:
+
+```cpp
+struct data
+{
+ value_t m_type = value_t::null;
+ json_value m_value = {};
+
+ data(const value_t v);
+ data(size_type cnt, const basic_json& val);
+ data() noexcept = default;
+ data(data&&) noexcept = default;
+
+ ~data() noexcept { m_value.destroy(m_type); }
+};
+```
+
+The instance lives in `basic_json` as `data m_data`:
+
+```cpp
+data m_data = {}; // the type + value
+
+#if JSON_DIAGNOSTICS
+basic_json* m_parent = nullptr; // parent pointer for diagnostics
+#endif
+
+#if JSON_DIAGNOSTIC_POSITIONS
+std::size_t start_position = std::string::npos;
+std::size_t end_position = std::string::npos;
+#endif
+```
+
+### Destruction Strategy
+
+The `json_value::destroy(value_t)` method handles recursive destruction
+without stack overflow. For arrays and objects, it uses an iterative
+approach with a `std::vector<basic_json>` stack:
+
+```cpp
+void destroy(value_t t) {
+ // For arrays/objects: flatten children onto a heap-allocated stack
+ if (t == value_t::array || t == value_t::object) {
+ std::vector<basic_json> stack;
+ // Move children to stack
+ while (!stack.empty()) {
+ basic_json current_item(std::move(stack.back()));
+ stack.pop_back();
+ // Move current_item's children to stack
+ // current_item safely destructed here (no children)
+ }
+ }
+ // Deallocate the container itself
+ switch (t) {
+ case value_t::object: /* deallocate object */ break;
+ case value_t::array: /* deallocate array */ break;
+ case value_t::string: /* deallocate string */ break;
+ case value_t::binary: /* deallocate binary */ break;
+ default: break;
+ }
+}
+```
+
+This prevents stack overflow when destroying deeply nested JSON structures.
+
+## The `value_t` Enumeration
+
+Defined in `detail/value_t.hpp`:
+
+```cpp
+enum class value_t : std::uint8_t
+{
+ null, // null value
+ object, // unordered set of name/value pairs
+ array, // ordered collection of values
+ string, // string value
+ boolean, // boolean value
+ number_integer, // signed integer
+ number_unsigned, // unsigned integer
+ number_float, // floating-point
+ binary, // binary array
+ discarded // discarded by parser callback
+};
+```
+
+A comparison operator defines a Python-like ordering:
+`null < boolean < number < object < array < string < binary`
+
+With C++20, this uses `std::partial_ordering` via the spaceship operator.
+
+## Class Invariant
+
+The `assert_invariant()` method (called at the end of every constructor)
+enforces the following:
+
+```cpp
+void assert_invariant(bool check_parents = true) const noexcept
+{
+ JSON_ASSERT(m_data.m_type != value_t::object || m_data.m_value.object != nullptr);
+ JSON_ASSERT(m_data.m_type != value_t::array || m_data.m_value.array != nullptr);
+ JSON_ASSERT(m_data.m_type != value_t::string || m_data.m_value.string != nullptr);
+ JSON_ASSERT(m_data.m_type != value_t::binary || m_data.m_value.binary != nullptr);
+}
+```
+
+When `JSON_DIAGNOSTICS` is enabled, it additionally checks that all children
+have their `m_parent` pointer set to `this`.
+
+## Internal Component Architecture
+
+### Input Pipeline
+
+```
+Input Source → Input Adapter → Lexer → Parser → DOM / SAX Events
+```
+
+1. **Input Adapters** (`detail/input/input_adapters.hpp`)
+ - `file_input_adapter` — wraps `std::FILE*`
+ - `input_stream_adapter` — wraps `std::istream`
+ - `iterator_input_adapter` — wraps iterator pairs
+ - `contiguous_input_adapter` — optimized for contiguous memory
+
+2. **Lexer** (`detail/input/lexer.hpp`)
+ - `lexer_base<BasicJsonType>` — defines `token_type` enumeration
+ - `lexer<BasicJsonType, InputAdapterType>` — the tokenizer
+ - Token types: `literal_true`, `literal_false`, `literal_null`,
+ `value_string`, `value_unsigned`, `value_integer`, `value_float`,
+ `begin_array`, `begin_object`, `end_array`, `end_object`,
+ `name_separator`, `value_separator`, `parse_error`, `end_of_input`
+
+3. **Parser** (`detail/input/parser.hpp`)
+ - `parser<BasicJsonType, InputAdapterType>` — recursive descent parser
+ - Supports callback-based filtering via `parser_callback_t`
+ - Supports both DOM parsing and SAX event dispatch
+
+4. **SAX Interface** (`detail/input/json_sax.hpp`)
+ - `json_sax<BasicJsonType>` — abstract base with virtual methods
+ - `json_sax_dom_parser` — builds a DOM tree from SAX events
+ - `json_sax_dom_callback_parser` — DOM builder with filtering
+
+### Output Pipeline
+
+```
+basic_json → Serializer → Output Adapter → Destination
+```
+
+1. **Serializer** (`detail/output/serializer.hpp`)
+ - `serializer<BasicJsonType>` — converts JSON to text
+ - Handles indentation, UTF-8 validation, number formatting
+ - `error_handler_t`: `strict`, `replace`, `ignore` for invalid UTF-8
+
+2. **Binary Writer** (`detail/output/binary_writer.hpp`)
+ - `binary_writer<BasicJsonType, CharType>` — writes CBOR, MessagePack,
+ UBJSON, BJData, BSON
+
+3. **Output Adapters** (`detail/output/output_adapters.hpp`)
+ - `output_vector_adapter` — writes to `std::vector<CharType>`
+ - `output_stream_adapter` — writes to `std::ostream`
+ - `output_string_adapter` — writes to a string type
+
+### Iterator System
+
+```
+basic_json::iterator → iter_impl<basic_json>
+ → internal_iterator (union of object/array/primitive iterators)
+```
+
+- `iter_impl<BasicJsonType>` — the main iterator class
+- `internal_iterator<BasicJsonType>` — holds the active iterator:
+ - `typename object_t::iterator object_iterator` for objects
+ - `typename array_t::iterator array_iterator` for arrays
+ - `primitive_iterator_t` for scalars (0 = begin, 1 = end)
+- `json_reverse_iterator<Base>` — reverse iterator adapter
+- `iteration_proxy<IteratorType>` — returned by `items()`, exposes
+ `key()` and `value()` methods
+
+### Conversion System
+
+The ADL (Argument-Dependent Lookup) design enables seamless integration of
+user-defined types:
+
+```
+User Type → to_json(json&, const T&) → json value
+json value → from_json(const json&, T&) → User Type
+```
+
+- `adl_serializer<T>` — default serializer that delegates via ADL
+- `detail/conversions/to_json.hpp` — built-in `to_json()` overloads
+ for standard types (arithmetic, strings, containers, pairs, tuples)
+- `detail/conversions/from_json.hpp` — built-in `from_json()` overloads
+
+### JSON Pointer and Patch
+
+- `json_pointer<RefStringType>` — implements RFC 6901, stores parsed
+ reference tokens as `std::vector<string_t>`
+- Patch operations implemented directly in `basic_json::patch_inplace()`
+ as an inline method operating on the `basic_json` itself
+
+## The `ordered_map` Container
+
+Defined in `include/nlohmann/ordered_map.hpp`:
+
+```cpp
+template<class Key, class T, class IgnoredLess = std::less<Key>,
+ class Allocator = std::allocator<std::pair<const Key, T>>>
+struct ordered_map : std::vector<std::pair<const Key, T>, Allocator>
+{
+ using key_type = Key;
+ using mapped_type = T;
+ using Container = std::vector<std::pair<const Key, T>, Allocator>;
+
+ std::pair<iterator, bool> emplace(const key_type& key, T&& t);
+ T& operator[](const key_type& key);
+ T& at(const key_type& key);
+ size_type erase(const key_type& key);
+ size_type count(const key_type& key) const;
+ iterator find(const key_type& key);
+ // ...
+};
+```
+
+It inherits from `std::vector` and implements map-like operations with
+linear search. The `IgnoredLess` parameter exists for API compatibility
+with `std::map` but is not used — instead, `std::equal_to<>` (C++14) or
+`std::equal_to<Key>` (C++11) is used for key comparison.
+
+## The `byte_container_with_subtype` Class
+
+Wraps binary data with an optional subtype tag for binary formats
+(MsgPack ext types, CBOR tags, BSON binary subtypes):
+
+```cpp
+template<typename BinaryType>
+class byte_container_with_subtype : public BinaryType
+{
+public:
+ using container_type = BinaryType;
+ using subtype_type = std::uint64_t;
+
+ void set_subtype(subtype_type subtype_) noexcept;
+ constexpr subtype_type subtype() const noexcept;
+ constexpr bool has_subtype() const noexcept;
+ void clear_subtype() noexcept;
+
+private:
+ subtype_type m_subtype = 0;
+ bool m_has_subtype = false;
+};
+```
+
+## Namespace Organization
+
+The library uses inline namespaces for ABI versioning:
+
+```cpp
+NLOHMANN_JSON_NAMESPACE_BEGIN // expands to: namespace nlohmann { inline namespace ... {
+// ...
+NLOHMANN_JSON_NAMESPACE_END // expands to: } }
+```
+
+The inner inline namespace name encodes configuration flags to prevent
+ABI mismatches when different translation units are compiled with
+different macro settings. The `detail` sub-namespace is not part of the
+public API.
+
+## Template Metaprogramming Techniques
+
+### SFINAE and Type Traits
+
+Located in `detail/meta/type_traits.hpp`, these traits control overload
+resolution:
+
+- `is_basic_json<T>` — checks if T is a `basic_json` specialization
+- `is_compatible_type<BasicJsonType, T>` — checks if T can be stored
+- `is_getable<BasicJsonType, T>` — checks if `get<T>()` works
+- `has_from_json<BasicJsonType, T>` — checks for `from_json()` overload
+- `has_non_default_from_json<BasicJsonType, T>` — non-void return version
+- `is_usable_as_key_type<Comparator, KeyType, T>` — for heterogeneous lookup
+- `is_comparable<Comparator, A, B>` — checks comparability
+
+### Priority Tags
+
+The `get_impl()` method uses priority tags (`detail::priority_tag<N>`)
+to control overload resolution order:
+
+```cpp
+template<typename ValueType>
+ValueType get_impl(detail::priority_tag<0>) const; // standard from_json
+template<typename ValueType>
+ValueType get_impl(detail::priority_tag<1>) const; // non-default from_json
+template<typename BasicJsonType>
+BasicJsonType get_impl(detail::priority_tag<2>) const; // cross-json conversion
+template<typename BasicJsonType>
+basic_json get_impl(detail::priority_tag<3>) const; // identity
+template<typename PointerType>
+auto get_impl(detail::priority_tag<4>) const; // pointer access
+```
+
+Higher priority tags are preferred during overload resolution.
+
+### External Constructors
+
+The `detail::external_constructor<value_t>` template specializations
+handle constructing `json_value` instances for specific types:
+
+```cpp
+template<> struct external_constructor<value_t::string>;
+template<> struct external_constructor<value_t::number_float>;
+template<> struct external_constructor<value_t::number_unsigned>;
+template<> struct external_constructor<value_t::number_integer>;
+template<> struct external_constructor<value_t::array>;
+template<> struct external_constructor<value_t::object>;
+template<> struct external_constructor<value_t::boolean>;
+template<> struct external_constructor<value_t::binary>;
+```
+
+## Diagnostics Architecture
+
+### `JSON_DIAGNOSTICS` Mode
+
+When enabled, each `basic_json` node stores a `m_parent` pointer:
+
+```cpp
+#if JSON_DIAGNOSTICS
+basic_json* m_parent = nullptr;
+#endif
+```
+
+The `set_parents()` and `set_parent()` methods maintain these links.
+On errors, `exception::diagnostics()` walks the parent chain to build
+a JSON Pointer path showing where in the document the error occurred:
+
+```
+[json.exception.type_error.302] (/config/debug) type must be boolean, but is string
+```
+
+### `JSON_DIAGNOSTIC_POSITIONS` Mode
+
+When enabled, byte offsets from parsing are stored:
+
+```cpp
+#if JSON_DIAGNOSTIC_POSITIONS
+std::size_t start_position = std::string::npos;
+std::size_t end_position = std::string::npos;
+#endif
+```
+
+Error messages then include `(bytes N-M)` indicating the exact input range.
+
+## Copy and Move Semantics
+
+### Copy Constructor
+
+Deep-copies the value based on type. For compound types (object, array,
+string, binary), the heap-allocated data is cloned:
+
+```cpp
+basic_json(const basic_json& other)
+ : json_base_class_t(other)
+{
+ m_data.m_type = other.m_data.m_type;
+ switch (m_data.m_type) {
+ case value_t::object: m_data.m_value = *other.m_data.m_value.object; break;
+ case value_t::array: m_data.m_value = *other.m_data.m_value.array; break;
+ case value_t::string: m_data.m_value = *other.m_data.m_value.string; break;
+ // ... scalar types are copied directly
+ }
+ set_parents();
+}
+```
+
+### Move Constructor
+
+Transfers ownership and invalidates the source:
+
+```cpp
+basic_json(basic_json&& other) noexcept
+ : json_base_class_t(std::forward<json_base_class_t>(other)),
+ m_data(std::move(other.m_data))
+{
+ other.m_data.m_type = value_t::null;
+ other.m_data.m_value = {};
+ set_parents();
+}
+```
+
+### Copy-and-Swap Assignment
+
+Uses the copy-and-swap idiom for exception safety:
+
+```cpp
+basic_json& operator=(basic_json other) noexcept {
+ using std::swap;
+ swap(m_data.m_type, other.m_data.m_type);
+ swap(m_data.m_value, other.m_data.m_value);
+ json_base_class_t::operator=(std::move(other));
+ set_parents();
+ return *this;
+}
+```
+
+## Comparison Architecture
+
+### C++20 Path (Three-Way Comparison)
+
+When `JSON_HAS_THREE_WAY_COMPARISON` is true:
+
+```cpp
+bool operator==(const_reference rhs) const noexcept;
+bool operator!=(const_reference rhs) const noexcept;
+std::partial_ordering operator<=>(const_reference rhs) const noexcept;
+```
+
+### Pre-C++20 Path
+
+Individual comparison operators are defined as `friend` functions:
+
+```cpp
+friend bool operator==(const_reference lhs, const_reference rhs) noexcept;
+friend bool operator!=(const_reference lhs, const_reference rhs) noexcept;
+friend bool operator<(const_reference lhs, const_reference rhs) noexcept;
+friend bool operator<=(const_reference lhs, const_reference rhs) noexcept;
+friend bool operator>(const_reference lhs, const_reference rhs) noexcept;
+friend bool operator>=(const_reference lhs, const_reference rhs) noexcept;
+```
+
+Both paths use the `JSON_IMPLEMENT_OPERATOR` macro internally, which handles:
+1. Same-type comparison (delegates to underlying type's operator)
+2. Cross-numeric-type comparison (int vs float, signed vs unsigned)
+3. Unordered comparison (NaN, discarded values)
+4. Different-type comparison (compares `value_t` ordering)
+
+## `std` Namespace Specializations
+
+The library provides:
+
+```cpp
+namespace std {
+ template<> struct hash<nlohmann::json> { ... };
+ template<> struct less<nlohmann::detail::value_t> { ... };
+ void swap(nlohmann::json& j1, nlohmann::json& j2) noexcept; // pre-C++20 only
+}
+```
+
+The hash function delegates to `nlohmann::detail::hash()` which recursively
+hashes the JSON value based on its type.
diff --git a/docs/handbook/json4cpp/basic-usage.md b/docs/handbook/json4cpp/basic-usage.md
new file mode 100644
index 0000000000..80b9b3a176
--- /dev/null
+++ b/docs/handbook/json4cpp/basic-usage.md
@@ -0,0 +1,601 @@
+# json4cpp — Basic Usage
+
+## Including the Library
+
+```cpp
+#include <nlohmann/json.hpp>
+
+// Convenience alias
+using json = nlohmann::json;
+```
+
+Or with the forward declaration header (for header files):
+
+```cpp
+#include <nlohmann/json_fwd.hpp> // declares json, ordered_json, json_pointer
+```
+
+## Creating JSON Values
+
+### Null
+
+```cpp
+json j; // default constructor → null
+json j = nullptr; // explicit null
+json j(nullptr); // explicit null
+json j(json::value_t::null); // from value_t enum
+```
+
+### Boolean
+
+```cpp
+json j = true;
+json j = false;
+json j(json::value_t::boolean); // false (default-initialized)
+```
+
+### Numbers
+
+```cpp
+// Integer (stored as number_integer_t = std::int64_t)
+json j = 42;
+json j = -100;
+
+// Unsigned (stored as number_unsigned_t = std::uint64_t)
+json j = 42u;
+json j = static_cast<std::uint64_t>(100);
+
+// Floating-point (stored as number_float_t = double)
+json j = 3.14;
+json j = 1.0e10;
+```
+
+### String
+
+```cpp
+json j = "hello world";
+json j = std::string("hello");
+
+// With C++17 string_view:
+json j = std::string_view("hello");
+```
+
+### Array
+
+```cpp
+// From initializer list
+json j = {1, 2, 3, 4, 5};
+
+// Explicit array factory
+json j = json::array(); // empty array
+json j = json::array({1, 2, 3}); // pre-populated
+
+// From value_t enum
+json j(json::value_t::array); // empty array
+
+// From count and value
+json j(5, "x"); // ["x", "x", "x", "x", "x"]
+```
+
+### Object
+
+```cpp
+// From initializer list of key-value pairs
+json j = {
+ {"name", "Alice"},
+ {"age", 30},
+ {"active", true}
+};
+
+// Explicit object factory
+json j = json::object();
+json j = json::object({{"key", "value"}});
+
+// From value_t enum
+json j(json::value_t::object);
+
+// The library auto-detects objects vs arrays in initializer lists:
+// All elements are [string, value] pairs → object
+// Otherwise → array
+json obj = {{"a", 1}, {"b", 2}}; // → object
+json arr = {1, 2, 3}; // → array
+json arr2 = {{1, 2}, {3, 4}}; // → array of arrays
+```
+
+### Binary
+
+```cpp
+// Binary data without subtype
+json j = json::binary({0x01, 0x02, 0x03});
+
+// Binary data with subtype (used by MessagePack ext, CBOR tags, etc.)
+json j = json::binary({0x01, 0x02}, 42);
+
+// From std::vector<std::uint8_t>
+std::vector<std::uint8_t> data = {0xCA, 0xFE};
+json j = json::binary(data);
+json j = json::binary(std::move(data)); // move semantics
+```
+
+### From Existing Types
+
+The `basic_json` constructor template accepts any "compatible type" —
+any type for which a `to_json()` overload exists:
+
+```cpp
+// Standard containers
+std::vector<int> v = {1, 2, 3};
+json j = v; // [1, 2, 3]
+
+std::map<std::string, int> m = {{"a", 1}, {"b", 2}};
+json j = m; // {"a": 1, "b": 2}
+
+// Pairs and tuples (C++11)
+std::pair<std::string, int> p = {"key", 42};
+json j = p; // ["key", 42]
+
+// Enum types (unless JSON_DISABLE_ENUM_SERIALIZATION is set)
+enum Color { Red, Green, Blue };
+json j = Green; // 1
+```
+
+## Parsing JSON
+
+### From String
+
+```cpp
+// Static parse method
+json j = json::parse(R"({"key": "value", "number": 42})");
+
+// From std::string
+std::string input = R"([1, 2, 3])";
+json j = json::parse(input);
+
+// User-defined literal (requires JSON_USE_GLOBAL_UDLS or using namespace)
+auto j = R"({"key": "value"})"_json;
+```
+
+### From Stream
+
+```cpp
+#include <fstream>
+
+std::ifstream file("data.json");
+json j = json::parse(file);
+
+// Or with stream extraction operator:
+json j;
+file >> j;
+```
+
+### From Iterator Pair
+
+```cpp
+std::string input = R"({"key": "value"})";
+json j = json::parse(input.begin(), input.end());
+
+// Works with any input iterator
+std::vector<char> data = ...;
+json j = json::parse(data.begin(), data.end());
+```
+
+### Parse Options
+
+```cpp
+json j = json::parse(
+ input,
+ nullptr, // callback (nullptr = no callback)
+ true, // allow_exceptions (true = throw on error)
+ false, // ignore_comments (false = comments are errors)
+ false // ignore_trailing_commas (false = trailing commas are errors)
+);
+```
+
+### Error Handling During Parsing
+
+```cpp
+// Option 1: Exceptions (default)
+try {
+ json j = json::parse("invalid json");
+} catch (json::parse_error& e) {
+ std::cerr << e.what() << "\n";
+ // [json.exception.parse_error.101] parse error at line 1, column 1:
+ // syntax error while parsing value - invalid literal; ...
+}
+
+// Option 2: No exceptions
+json j = json::parse("invalid json", nullptr, false);
+if (j.is_discarded()) {
+ // parsing failed
+}
+```
+
+### Validation Without Parsing
+
+```cpp
+bool valid = json::accept(R"({"key": "value"})"); // true
+bool invalid = json::accept("not json"); // false
+
+// With options
+bool valid = json::accept(input, true, true); // ignore comments, trailing commas
+```
+
+### Parser Callbacks
+
+Filter or modify values during parsing:
+
+```cpp
+json j = json::parse(input, [](int depth, json::parse_event_t event, json& parsed) {
+ // event: object_start, object_end, array_start, array_end, key, value
+ // Return false to discard the value
+ if (event == json::parse_event_t::key && parsed == json("password")) {
+ return false; // discard objects with "password" key
+ }
+ return true;
+});
+```
+
+## Serialization
+
+### To String
+
+```cpp
+json j = {{"name", "Alice"}, {"age", 30}};
+
+// Compact (no indentation)
+std::string s = j.dump();
+// {"age":30,"name":"Alice"}
+
+// Pretty-printed (4-space indent)
+std::string s = j.dump(4);
+// {
+// "age": 30,
+// "name": "Alice"
+// }
+
+// Custom indent character
+std::string s = j.dump(1, '\t');
+
+// Force ASCII output
+std::string s = j.dump(-1, ' ', true);
+// Non-ASCII chars are escaped as \uXXXX
+```
+
+### `dump()` Method Signature
+
+```cpp
+string_t dump(
+ const int indent = -1,
+ const char indent_char = ' ',
+ const bool ensure_ascii = false,
+ const error_handler_t error_handler = error_handler_t::strict
+) const;
+```
+
+The `error_handler` controls how invalid UTF-8 in strings is handled:
+
+| Value | Behavior |
+|---|---|
+| `error_handler_t::strict` | Throw `type_error::316` |
+| `error_handler_t::replace` | Replace invalid bytes with U+FFFD |
+| `error_handler_t::ignore` | Skip invalid bytes |
+
+### To Stream
+
+```cpp
+std::cout << j << std::endl; // compact
+std::cout << std::setw(4) << j << "\n"; // pretty
+
+// To file
+std::ofstream file("output.json");
+file << std::setw(4) << j;
+```
+
+## Type Inspection
+
+### Type Query Methods
+
+```cpp
+json j = 42;
+
+j.type() // value_t::number_integer
+j.type_name() // "number"
+
+j.is_null() // false
+j.is_boolean() // false
+j.is_number() // true
+j.is_number_integer() // true
+j.is_number_unsigned() // false
+j.is_number_float() // false
+j.is_object() // false
+j.is_array() // false
+j.is_string() // false
+j.is_binary() // false
+j.is_discarded() // false
+
+j.is_primitive() // true (null, string, boolean, number, binary)
+j.is_structured() // false (object or array)
+```
+
+### Explicit Type Conversion
+
+```cpp
+json j = 42;
+
+// Using get<T>()
+int i = j.get<int>();
+double d = j.get<double>();
+std::string s = j.get<std::string>(); // throws type_error::302
+
+// Using get_to()
+int i;
+j.get_to(i);
+
+// Using get_ref<T&>()
+json j = "hello";
+const std::string& ref = j.get_ref<const std::string&>();
+
+// Using get_ptr<T*>()
+json j = "hello";
+const std::string* ptr = j.get_ptr<const std::string*>();
+if (ptr != nullptr) {
+ // use *ptr
+}
+```
+
+### Implicit Type Conversion
+
+When `JSON_USE_IMPLICIT_CONVERSIONS` is enabled (default):
+
+```cpp
+json j = 42;
+int i = j; // implicit conversion
+
+json j = "hello";
+std::string s = j; // implicit conversion
+
+json j = {1, 2, 3};
+std::vector<int> v = j; // implicit conversion
+```
+
+### Cast to `value_t`
+
+```cpp
+json j = 42;
+json::value_t t = j; // implicit cast via operator value_t()
+if (t == json::value_t::number_integer) { ... }
+```
+
+## Working with Objects
+
+### Creating and Modifying
+
+```cpp
+json j; // null
+
+// operator[] implicitly converts null to object/array
+j["name"] = "Alice"; // null → object, then insert
+j["age"] = 30;
+j["scores"] = {95, 87, 92};
+
+// Nested objects
+j["address"]["city"] = "Springfield";
+j["address"]["state"] = "IL";
+```
+
+### Checking Keys
+
+```cpp
+if (j.contains("name")) { ... }
+if (j.count("name") > 0) { ... }
+if (j.find("name") != j.end()) { ... }
+```
+
+### Removing Keys
+
+```cpp
+j.erase("name"); // by key
+j.erase(j.find("age")); // by iterator
+```
+
+### Getting with Default Value
+
+```cpp
+std::string name = j.value("name", "unknown");
+int port = j.value("port", 8080);
+```
+
+## Working with Arrays
+
+### Creating and Modifying
+
+```cpp
+json arr = json::array();
+arr.push_back(1);
+arr.push_back("hello");
+arr += 3.14; // operator+=
+
+arr.emplace_back("world"); // in-place construction
+
+// Insert at position
+arr.insert(arr.begin(), 0);
+arr.insert(arr.begin() + 2, {10, 20});
+```
+
+### Accessing Elements
+
+```cpp
+int first = arr[0];
+int second = arr.at(1); // bounds-checked
+int last = arr.back();
+int first2 = arr.front();
+```
+
+### Modifying
+
+```cpp
+arr.erase(arr.begin()); // remove first element
+arr.erase(2); // remove element at index 2
+arr.clear(); // remove all elements
+```
+
+### Size and Capacity
+
+```cpp
+arr.size(); // number of elements
+arr.empty(); // true if no elements
+arr.max_size(); // maximum possible elements
+```
+
+## Ordered JSON
+
+For insertion-order preservation:
+
+```cpp
+nlohmann::ordered_json j;
+j["z"] = 1;
+j["a"] = 2;
+j["m"] = 3;
+
+// Iteration preserves insertion order: z, a, m
+for (auto& [key, val] : j.items()) {
+ std::cout << key << ": " << val << "\n";
+}
+```
+
+The `ordered_json` type uses `nlohmann::ordered_map` (a `std::vector`-based
+map) instead of `std::map`. Lookups are O(n) instead of O(log n).
+
+## Copy and Comparison
+
+### Copying
+
+```cpp
+json j1 = {{"key", "value"}};
+json j2 = j1; // deep copy
+json j3(j1); // deep copy
+json j4 = std::move(j1); // move (j1 becomes null)
+```
+
+### Comparison
+
+```cpp
+json a = {1, 2, 3};
+json b = {1, 2, 3};
+
+a == b; // true
+a != b; // false
+a < b; // false (same type, same value)
+
+// Cross-type numeric comparison
+json(1) == json(1.0); // true
+json(1) < json(1.5); // true
+```
+
+## Structured Bindings (C++17)
+
+```cpp
+json j = {{"name", "Alice"}, {"age", 30}};
+
+for (auto& [key, val] : j.items()) {
+ std::cout << key << " = " << val << "\n";
+}
+```
+
+## Common Patterns
+
+### Configuration File Loading
+
+```cpp
+json load_config(const std::string& path) {
+ std::ifstream file(path);
+ if (!file.is_open()) {
+ return json::object();
+ }
+ return json::parse(file, nullptr, true, true); // allow comments
+}
+
+auto config = load_config("config.json");
+int port = config.value("port", 8080);
+std::string host = config.value("host", "localhost");
+```
+
+### Safe Value Extraction
+
+```cpp
+template<typename T>
+std::optional<T> safe_get(const json& j, const std::string& key) {
+ if (j.contains(key)) {
+ try {
+ return j.at(key).get<T>();
+ } catch (const json::type_error&) {
+ return std::nullopt;
+ }
+ }
+ return std::nullopt;
+}
+```
+
+### Building JSON Programmatically
+
+```cpp
+json build_response(int status, const std::string& message) {
+ return {
+ {"status", status},
+ {"message", message},
+ {"timestamp", std::time(nullptr)},
+ {"data", json::object()}
+ };
+}
+```
+
+### Merging Objects
+
+```cpp
+json defaults = {{"color", "blue"}, {"size", 10}, {"visible", true}};
+json user_prefs = {{"color", "red"}, {"opacity", 0.8}};
+
+defaults.update(user_prefs);
+// defaults = {"color": "red", "size": 10, "visible": true, "opacity": 0.8}
+
+// Deep merge with merge_objects=true
+defaults.update(user_prefs, true);
+```
+
+### Flattening and Unflattening
+
+```cpp
+json nested = {
+ {"a", {{"b", {{"c", 42}}}}}
+};
+
+json flat = nested.flatten();
+// {"/a/b/c": 42}
+
+json restored = flat.unflatten();
+// {"a": {"b": {"c": 42}}}
+```
+
+## Error Handling Summary
+
+| Exception | When Thrown |
+|---|---|
+| `json::parse_error` | Invalid JSON input |
+| `json::type_error` | Wrong type access (e.g., `string` on a number) |
+| `json::out_of_range` | Index/key not found with `at()` |
+| `json::invalid_iterator` | Invalid iterator operation |
+| `json::other_error` | Miscellaneous errors |
+
+```cpp
+try {
+ json j = json::parse("...");
+ int val = j.at("missing_key").get<int>();
+} catch (json::parse_error& e) {
+ // e.id: 101, 102, 103, 104, 105
+ // e.byte: position in input
+} catch (json::out_of_range& e) {
+ // e.id: 401, 402, 403, 404, 405
+} catch (json::type_error& e) {
+ // e.id: 301, 302, 303, 304, 305, 306, 307, 308, ...
+}
+```
diff --git a/docs/handbook/json4cpp/binary-formats.md b/docs/handbook/json4cpp/binary-formats.md
new file mode 100644
index 0000000000..9cb9f666f2
--- /dev/null
+++ b/docs/handbook/json4cpp/binary-formats.md
@@ -0,0 +1,411 @@
+# json4cpp — Binary Formats
+
+## Overview
+
+The library supports five binary serialization formats in addition to JSON
+text. All are available as static methods on `basic_json`:
+
+| Format | To | From | RFC/Spec |
+|---|---|---|---|
+| CBOR | `to_cbor()` | `from_cbor()` | RFC 7049 |
+| MessagePack | `to_msgpack()` | `from_msgpack()` | MessagePack spec |
+| UBJSON | `to_ubjson()` | `from_ubjson()` | UBJSON spec |
+| BSON | `to_bson()` | `from_bson()` | BSON spec |
+| BJData | `to_bjdata()` | `from_bjdata()` | BJData spec |
+
+Binary serialization is useful for:
+- Smaller payload sizes
+- Faster parsing
+- Native binary data support
+- Type-rich encodings (timestamps, binary subtypes)
+
+## CBOR (Concise Binary Object Representation)
+
+### Serialization
+
+```cpp
+// To vector<uint8_t>
+static std::vector<std::uint8_t> to_cbor(const basic_json& j);
+
+// To output adapter (stream, string, vector)
+static void to_cbor(const basic_json& j, detail::output_adapter<uint8_t> o);
+static void to_cbor(const basic_json& j, detail::output_adapter<char> o);
+```
+
+```cpp
+json j = {{"compact", true}, {"schema", 0}};
+
+// Serialize to byte vector
+auto cbor = json::to_cbor(j);
+
+// Serialize to stream
+std::ofstream out("data.cbor", std::ios::binary);
+json::to_cbor(j, out);
+```
+
+### Deserialization
+
+```cpp
+template<typename InputType>
+static basic_json from_cbor(InputType&& i,
+ const bool strict = true,
+ const bool allow_exceptions = true,
+ const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error);
+```
+
+Parameters:
+- `strict` — if `true`, requires that all bytes are consumed
+- `allow_exceptions` — if `false`, returns discarded value on error
+- `tag_handler` — how to handle CBOR tags
+
+```cpp
+auto j = json::from_cbor(cbor);
+```
+
+### CBOR Tag Handling
+
+```cpp
+enum class cbor_tag_handler_t
+{
+ error, ///< throw parse_error on any tag
+ ignore, ///< ignore tags
+ store ///< store tags as binary subtype
+};
+```
+
+```cpp
+// Ignore CBOR tags
+auto j = json::from_cbor(data, true, true, json::cbor_tag_handler_t::ignore);
+
+// Store CBOR tags as subtypes in binary values
+auto j = json::from_cbor(data, true, true, json::cbor_tag_handler_t::store);
+```
+
+### CBOR Type Mapping
+
+| JSON Type | CBOR Type |
+|---|---|
+| null | null (0xF6) |
+| boolean | true/false (0xF5/0xF4) |
+| number_integer | negative/unsigned integer |
+| number_unsigned | unsigned integer |
+| number_float | IEEE 754 double (0xFB) or half-precision (0xF9) |
+| string | text string (major type 3) |
+| array | array (major type 4) |
+| object | map (major type 5) |
+| binary | byte string (major type 2) |
+
+## MessagePack
+
+### Serialization
+
+```cpp
+static std::vector<std::uint8_t> to_msgpack(const basic_json& j);
+static void to_msgpack(const basic_json& j, detail::output_adapter<uint8_t> o);
+static void to_msgpack(const basic_json& j, detail::output_adapter<char> o);
+```
+
+```cpp
+json j = {{"array", {1, 2, 3}}, {"null", nullptr}};
+auto msgpack = json::to_msgpack(j);
+```
+
+### Deserialization
+
+```cpp
+template<typename InputType>
+static basic_json from_msgpack(InputType&& i,
+ const bool strict = true,
+ const bool allow_exceptions = true);
+```
+
+```cpp
+auto j = json::from_msgpack(msgpack);
+```
+
+### MessagePack Type Mapping
+
+| JSON Type | MessagePack Type |
+|---|---|
+| null | nil (0xC0) |
+| boolean | true/false (0xC3/0xC2) |
+| number_integer | int 8/16/32/64 or negative fixint |
+| number_unsigned | uint 8/16/32/64 or positive fixint |
+| number_float | float 32 or float 64 |
+| string | fixstr / str 8/16/32 |
+| array | fixarray / array 16/32 |
+| object | fixmap / map 16/32 |
+| binary | bin 8/16/32 |
+| binary with subtype | ext 8/16/32 / fixext 1/2/4/8/16 |
+
+The library chooses the **smallest** encoding that fits the value.
+
+### Ext Types
+
+MessagePack extension types carry a type byte. The library maps this to the
+binary subtype:
+
+```cpp
+json j = json::binary({0x01, 0x02, 0x03}, 42); // subtype 42
+auto mp = json::to_msgpack(j);
+// Encoded as ext with type byte 42
+
+auto j2 = json::from_msgpack(mp);
+assert(j2.get_binary().subtype() == 42);
+```
+
+## UBJSON (Universal Binary JSON)
+
+### Serialization
+
+```cpp
+static std::vector<std::uint8_t> to_ubjson(const basic_json& j,
+ const bool use_size = false,
+ const bool use_type = false);
+```
+
+Parameters:
+- `use_size` — write container size markers (enables optimized containers)
+- `use_type` — write type markers for homogeneous containers (requires `use_size`)
+
+```cpp
+json j = {1, 2, 3, 4, 5};
+
+// Without optimization
+auto ub1 = json::to_ubjson(j);
+
+// With size optimization
+auto ub2 = json::to_ubjson(j, true);
+
+// With size+type optimization (smallest for homogeneous arrays)
+auto ub3 = json::to_ubjson(j, true, true);
+```
+
+### Deserialization
+
+```cpp
+template<typename InputType>
+static basic_json from_ubjson(InputType&& i,
+ const bool strict = true,
+ const bool allow_exceptions = true);
+```
+
+### UBJSON Type Markers
+
+| Marker | Type |
+|---|---|
+| `Z` | null |
+| `T` / `F` | true / false |
+| `i` | int8 |
+| `U` | uint8 |
+| `I` | int16 |
+| `l` | int32 |
+| `L` | int64 |
+| `d` | float32 |
+| `D` | float64 |
+| `C` | char |
+| `S` | string |
+| `[` / `]` | array begin / end |
+| `{` / `}` | object begin / end |
+| `H` | high-precision number (string representation) |
+
+## BSON (Binary JSON)
+
+### Serialization
+
+```cpp
+static std::vector<std::uint8_t> to_bson(const basic_json& j);
+static void to_bson(const basic_json& j, detail::output_adapter<uint8_t> o);
+static void to_bson(const basic_json& j, detail::output_adapter<char> o);
+```
+
+**Important:** BSON requires the top-level value to be an **object**:
+
+```cpp
+json j = {{"key", "value"}, {"num", 42}};
+auto bson = json::to_bson(j);
+
+// json j = {1, 2, 3};
+// json::to_bson(j); // throws type_error::317 — not an object
+```
+
+### Deserialization
+
+```cpp
+template<typename InputType>
+static basic_json from_bson(InputType&& i,
+ const bool strict = true,
+ const bool allow_exceptions = true);
+```
+
+### BSON Type Mapping
+
+| JSON Type | BSON Type |
+|---|---|
+| null | 0x0A (Null) |
+| boolean | 0x08 (Boolean) |
+| number_integer | 0x10 (int32) or 0x12 (int64) |
+| number_unsigned | 0x10 or 0x12 (depends on value) |
+| number_float | 0x01 (double) |
+| string | 0x02 (String) |
+| array | 0x04 (Array) — encoded as object with "0", "1", ... keys |
+| object | 0x03 (Document) |
+| binary | 0x05 (Binary) |
+
+### BSON Binary Subtypes
+
+```cpp
+json j;
+j["data"] = json::binary({0x01, 0x02}, 0x80); // subtype 0x80
+auto bson = json::to_bson(j);
+// Binary encoded with subtype byte 0x80
+```
+
+## BJData (Binary JData)
+
+BJData extends UBJSON with additional types for N-dimensional arrays and
+optimized integer types.
+
+### Serialization
+
+```cpp
+static std::vector<std::uint8_t> to_bjdata(const basic_json& j,
+ const bool use_size = false,
+ const bool use_type = false);
+```
+
+### Deserialization
+
+```cpp
+template<typename InputType>
+static basic_json from_bjdata(InputType&& i,
+ const bool strict = true,
+ const bool allow_exceptions = true);
+```
+
+### Additional BJData Types
+
+Beyond UBJSON types, BJData adds:
+
+| Marker | Type |
+|---|---|
+| `u` | uint16 |
+| `m` | uint32 |
+| `M` | uint64 |
+| `h` | float16 (half-precision) |
+
+## Roundtrip Between Formats
+
+Binary formats can preserve the same data as JSON text, but with some
+differences:
+
+```cpp
+json original = {
+ {"name", "test"},
+ {"values", {1, 2, 3}},
+ {"data", json::binary({0xFF, 0xFE})}
+};
+
+// JSON text cannot represent binary
+std::string text = original.dump();
+// "data" field would cause issues in text form
+
+// Binary formats can represent binary natively
+auto cbor = json::to_cbor(original);
+auto restored = json::from_cbor(cbor);
+assert(original == restored); // exact roundtrip
+
+// Cross-format conversion
+auto mp = json::to_msgpack(original);
+auto from_mp = json::from_msgpack(mp);
+assert(original == from_mp);
+```
+
+## Size Comparison
+
+Typical size savings over JSON text:
+
+| Data | JSON | CBOR | MessagePack | UBJSON | BSON |
+|---|---|---|---|---|---|
+| `{"a":1}` | 7 bytes | 5 bytes | 4 bytes | 6 bytes | 18 bytes |
+| `[1,2,3]` | 7 bytes | 4 bytes | 4 bytes | 6 bytes | N/A (top-level array) |
+| `true` | 4 bytes | 1 byte | 1 byte | 1 byte | N/A (top-level bool) |
+
+BSON has the most overhead due to its document-structure requirements.
+MessagePack and CBOR are generally the most compact.
+
+## Stream-Based Serialization
+
+All binary formats support streaming to/from `std::ostream` / `std::istream`:
+
+```cpp
+// Write CBOR to file
+std::ofstream out("data.cbor", std::ios::binary);
+json::to_cbor(j, out);
+
+// Read CBOR from file
+std::ifstream in("data.cbor", std::ios::binary);
+json j = json::from_cbor(in);
+```
+
+## Strict vs. Non-Strict Parsing
+
+All `from_*` functions accept a `strict` parameter:
+
+- `strict = true` (default): all input bytes must be consumed. Extra
+ trailing data causes a parse error.
+- `strict = false`: parsing stops after the first valid value. Remaining
+ input is ignored.
+
+```cpp
+std::vector<uint8_t> data = /* two CBOR values concatenated */;
+
+// Strict: fails because of trailing data
+// json::from_cbor(data, true);
+
+// Non-strict: parses only the first value
+json j = json::from_cbor(data, false);
+```
+
+## Binary Reader / Writer Architecture
+
+The binary I/O is implemented by two internal classes:
+
+### `binary_reader`
+
+Located in `include/nlohmann/detail/input/binary_reader.hpp`:
+
+```cpp
+template<typename BasicJsonType, typename InputAdapterType, typename SAX>
+class binary_reader
+{
+ bool parse_cbor_internal(bool get_char, int tag_handler);
+ bool parse_msgpack_internal();
+ bool parse_ubjson_internal(bool get_char = true);
+ bool parse_bson_internal();
+ bool parse_bjdata_internal();
+ // ...
+};
+```
+
+Uses the SAX interface internally — each decoded value is reported to a
+SAX handler (typically `json_sax_dom_parser`) which builds the JSON tree.
+
+### `binary_writer`
+
+Located in `include/nlohmann/detail/output/binary_writer.hpp`:
+
+```cpp
+template<typename BasicJsonType, typename CharType>
+class binary_writer
+{
+ void write_cbor(const BasicJsonType& j);
+ void write_msgpack(const BasicJsonType& j);
+ void write_ubjson(const BasicJsonType& j, ...);
+ void write_bson(const BasicJsonType& j);
+ void write_bjdata(const BasicJsonType& j, ...);
+ // ...
+};
+```
+
+Directly writes encoded bytes to an `output_adapter`.
diff --git a/docs/handbook/json4cpp/building.md b/docs/handbook/json4cpp/building.md
new file mode 100644
index 0000000000..73e29a65fc
--- /dev/null
+++ b/docs/handbook/json4cpp/building.md
@@ -0,0 +1,430 @@
+# json4cpp — Building and Integration
+
+## Header-Only Usage
+
+json4cpp (nlohmann/json 3.12.0) is a header-only library. The simplest way
+to use it is to copy the single amalgamated header and include it:
+
+```cpp
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::json;
+```
+
+### Single Header vs. Multi-Header
+
+The library ships in two forms:
+
+| Form | Location | Use Case |
+|---|---|---|
+| Single header | `single_include/nlohmann/json.hpp` | Simplest integration |
+| Multi-header | `include/nlohmann/json.hpp` + `include/nlohmann/detail/` | Better IDE navigation, faster incremental builds |
+
+The single header (`json.hpp`, ~25,000 lines) is generated by amalgamating
+all the multi-header files. It also ships `json_fwd.hpp` for forward
+declarations without pulling in the full implementation.
+
+### Forward Declaration Header
+
+```cpp
+#include <nlohmann/json_fwd.hpp>
+
+// Now you can declare functions accepting json parameters
+void process(const nlohmann::json& data);
+```
+
+The forward header declares `basic_json`, `json`, `ordered_json`,
+`json_pointer`, `ordered_map`, and `adl_serializer` without including
+any implementation.
+
+## CMake Integration
+
+### As a Subdirectory
+
+```cmake
+add_subdirectory(json4cpp) # or wherever the library lives
+
+target_link_libraries(my_target PRIVATE nlohmann_json::nlohmann_json)
+```
+
+### Via `FetchContent`
+
+```cmake
+include(FetchContent)
+FetchContent_Declare(
+ json
+ SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/json4cpp
+)
+FetchContent_MakeAvailable(json)
+
+target_link_libraries(my_target PRIVATE nlohmann_json::nlohmann_json)
+```
+
+### Via `find_package` (After Install)
+
+```cmake
+find_package(nlohmann_json 3.12.0 REQUIRED)
+target_link_libraries(my_target PRIVATE nlohmann_json::nlohmann_json)
+```
+
+### Target Include Directories
+
+For the simplest possible integration without CMake targets:
+
+```cmake
+target_include_directories(my_target PRIVATE
+ ${CMAKE_CURRENT_SOURCE_DIR}/json4cpp/single_include
+)
+```
+
+Or for multi-header:
+
+```cmake
+target_include_directories(my_target PRIVATE
+ ${CMAKE_CURRENT_SOURCE_DIR}/json4cpp/include
+)
+```
+
+## CMake Options Reference
+
+The top-level `CMakeLists.txt` defines these options:
+
+```cmake
+cmake_minimum_required(VERSION 3.5...4.0)
+project(nlohmann_json VERSION 3.12.0 LANGUAGES CXX)
+```
+
+### Build Options
+
+| Option | Default | Description |
+|---|---|---|
+| `JSON_BuildTests` | `ON` (main project) | Build the test suite |
+| `JSON_CI` | `OFF` | Enable CI build targets |
+| `JSON_Diagnostics` | `OFF` | Extended diagnostic messages |
+| `JSON_Diagnostic_Positions` | `OFF` | Track byte positions |
+| `JSON_GlobalUDLs` | `ON` | Place UDLs in global namespace |
+| `JSON_ImplicitConversions` | `ON` | Enable implicit `operator T()` |
+| `JSON_DisableEnumSerialization` | `OFF` | Disable automatic enum conversion |
+| `JSON_LegacyDiscardedValueComparison` | `OFF` | Legacy comparison behavior |
+| `JSON_Install` | `ON` (main project) | Install CMake targets |
+| `JSON_MultipleHeaders` | `ON` | Use multi-header tree |
+| `JSON_SystemInclude` | `OFF` | Include as system headers |
+
+### Configuration Variables
+
+```cmake
+NLOHMANN_JSON_TARGET_NAME # Override target name (default: nlohmann_json)
+NLOHMANN_JSON_CONFIG_INSTALL_DIR # CMake config install dir
+NLOHMANN_JSON_INCLUDE_INSTALL_DIR # Header install dir
+```
+
+### Header Selection Logic
+
+```cmake
+if (JSON_MultipleHeaders)
+ set(NLOHMANN_JSON_INCLUDE_BUILD_DIR "${PROJECT_SOURCE_DIR}/include/")
+else()
+ set(NLOHMANN_JSON_INCLUDE_BUILD_DIR "${PROJECT_SOURCE_DIR}/single_include/")
+endif()
+```
+
+### Compile Definitions Set by CMake
+
+When options are toggled, CMake sets preprocessor definitions on the target:
+
+```cmake
+if (JSON_Diagnostics)
+ target_compile_definitions(nlohmann_json INTERFACE JSON_DIAGNOSTICS=1)
+endif()
+
+if (NOT JSON_ImplicitConversions)
+ target_compile_definitions(nlohmann_json INTERFACE JSON_USE_IMPLICIT_CONVERSIONS=0)
+endif()
+
+if (JSON_DisableEnumSerialization)
+ target_compile_definitions(nlohmann_json INTERFACE JSON_DISABLE_ENUM_SERIALIZATION=1)
+endif()
+
+if (JSON_Diagnostic_Positions)
+ target_compile_definitions(nlohmann_json INTERFACE JSON_DIAGNOSTIC_POSITIONS=1)
+endif()
+
+if (NOT JSON_GlobalUDLs)
+ target_compile_definitions(nlohmann_json INTERFACE JSON_USE_GLOBAL_UDLS=0)
+endif()
+
+if (JSON_LegacyDiscardedValueComparison)
+ target_compile_definitions(nlohmann_json INTERFACE JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON=1)
+endif()
+```
+
+## Other Build Systems
+
+### Bazel
+
+```python
+# BUILD.bazel is provided at the top level
+cc_library(
+ name = "json",
+ hdrs = glob(["include/**/*.hpp"]),
+ includes = ["include"],
+)
+```
+
+A `MODULE.bazel` file is also provided for Bzlmod support.
+
+### Meson
+
+```meson
+# meson.build is provided at the top level
+nlohmann_json_dep = dependency('nlohmann_json', fallback: ['nlohmann_json', 'nlohmann_json_dep'])
+```
+
+### Swift Package Manager
+
+```swift
+// Package.swift is provided
+.package(path: "json4cpp")
+```
+
+### pkg-config
+
+After installation, a `nlohmann_json.pc` file is generated from
+`cmake/pkg-config.pc.in`:
+
+```
+pkg-config --cflags nlohmann_json
+```
+
+## Preprocessor Configuration Macros
+
+These macros can be defined before including the header or via compiler
+flags to control library behavior:
+
+### Core Behavior
+
+| Macro | Values | Effect |
+|---|---|---|
+| `JSON_DIAGNOSTICS` | `0`/`1` | Extended error messages with parent-chain paths |
+| `JSON_DIAGNOSTIC_POSITIONS` | `0`/`1` | Track byte positions in parsed values |
+| `JSON_USE_IMPLICIT_CONVERSIONS` | `0`/`1` | Enable/disable implicit `operator T()` |
+| `JSON_DISABLE_ENUM_SERIALIZATION` | `0`/`1` | Disable enum-to-integer serialization |
+| `JSON_USE_GLOBAL_UDLS` | `0`/`1` | Place `_json` / `_json_pointer` UDLs in global scope |
+| `JSON_NO_IO` | defined/undefined | Disable all stream-based I/O |
+| `JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON` | `0`/`1` | Legacy discarded value comparison |
+
+### Assertion Customization
+
+```cpp
+// Override the assertion macro (default: assert())
+#define JSON_ASSERT(x) my_assert(x)
+```
+
+### Exception Customization
+
+```cpp
+// Override throw behavior
+#define JSON_THROW(exception) throw exception
+#define JSON_TRY try
+#define JSON_CATCH(exception) catch(exception)
+#define JSON_INTERNAL_CATCH(exception) catch(exception)
+```
+
+To disable exceptions entirely:
+
+```cpp
+#define JSON_THROW(exception) std::abort()
+#define JSON_TRY if(true)
+#define JSON_CATCH(exception) if(false)
+#define JSON_INTERNAL_CATCH(exception) if(false)
+```
+
+### Version Macros
+
+```cpp
+NLOHMANN_JSON_VERSION_MAJOR // 3
+NLOHMANN_JSON_VERSION_MINOR // 12
+NLOHMANN_JSON_VERSION_PATCH // 0
+```
+
+### Standard Detection Macros
+
+Set automatically based on the compiler:
+
+```cpp
+JSON_HAS_CPP_11 // always 1
+JSON_HAS_CPP_14 // 1 if C++14 or higher
+JSON_HAS_CPP_17 // 1 if C++17 or higher
+JSON_HAS_CPP_20 // 1 if C++20 or higher
+```
+
+### RTTI Detection
+
+```cpp
+JSON_HAS_STATIC_RTTI // 1 if RTTI is available
+```
+
+### Three-Way Comparison Detection
+
+```cpp
+JSON_HAS_THREE_WAY_COMPARISON // 1 if <=> is available
+```
+
+## C++20 Module Support
+
+The library includes experimental C++20 module support:
+
+```cmake
+option(NLOHMANN_JSON_BUILD_MODULES "Build C++ modules support" OFF)
+```
+
+When enabled and CMake >= 3.28 is available, the module is built from
+`src/modules/`. Usage:
+
+```cpp
+import nlohmann.json;
+```
+
+## Compiler-Specific Notes
+
+### GCC
+
+The `cmake/gcc_flags.cmake` file configures GCC-specific warning flags.
+GCC 4.8 support requires workarounds (user-defined literal spacing).
+
+### Clang
+
+`cmake/clang_flags.cmake` handles Clang warning configuration. The
+`-Wweak-vtables` warning is suppressed in `detail/exceptions.hpp`
+since header-only libraries cannot have out-of-line vtables.
+
+### MSVC
+
+MSVC receives specific warning suppressions. The `nlohmann_json.natvis`
+file provides Visual Studio debugger visualization:
+
+```xml
+<!-- nlohmann_json.natvis provides structured views in the VS debugger -->
+```
+
+## Installation
+
+### Default Installation Layout
+
+```bash
+cmake -B build -DCMAKE_INSTALL_PREFIX=/usr/local
+cmake --build build
+cmake --install build
+```
+
+This installs:
+
+```
+/usr/local/include/nlohmann/ # Headers
+/usr/local/share/cmake/nlohmann_json/ # CMake config files
+/usr/local/share/pkgconfig/ # pkg-config file
+```
+
+### Controlling Installation
+
+```cmake
+set(JSON_Install OFF) # Disable installation entirely
+```
+
+### Version Compatibility
+
+The installed `nlohmann_jsonConfigVersion.cmake` file supports version
+range checking, allowing consumers to request minimum versions:
+
+```cmake
+find_package(nlohmann_json 3.11.0 REQUIRED) # any 3.x >= 3.11.0
+```
+
+## Integration Patterns
+
+### Pattern 1: Copy Single Header
+
+```bash
+cp json4cpp/single_include/nlohmann/json.hpp my_project/third_party/
+```
+
+```cpp
+#include "third_party/json.hpp"
+```
+
+### Pattern 2: Git Submodule + CMake
+
+```bash
+git submodule add <url> third_party/json
+```
+
+```cmake
+add_subdirectory(third_party/json)
+target_link_libraries(my_target PRIVATE nlohmann_json::nlohmann_json)
+```
+
+### Pattern 3: System Package
+
+Most Linux distributions package nlohmann/json:
+
+```bash
+# Debian/Ubuntu
+apt install nlohmann-json3-dev
+
+# Fedora
+dnf install json-devel
+
+# Arch
+pacman -S nlohmann-json
+
+# macOS
+brew install nlohmann-json
+```
+
+### Pattern 4: Header-Only with Forward Declarations
+
+For faster compilation, use the forward declaration header in headers
+and the full header only in implementation files:
+
+```cpp
+// my_class.hpp
+#include <nlohmann/json_fwd.hpp>
+class MyClass {
+ void process(const nlohmann::json& j);
+};
+
+// my_class.cpp
+#include <nlohmann/json.hpp>
+#include "my_class.hpp"
+void MyClass::process(const nlohmann::json& j) { ... }
+```
+
+## Compilation Speed Tips
+
+1. **Use `json_fwd.hpp`** in headers to avoid pulling the full
+ implementation into every translation unit.
+
+2. **Precompiled headers** — add `nlohmann/json.hpp` to your PCH:
+ ```cmake
+ target_precompile_headers(my_target PRIVATE <nlohmann/json.hpp>)
+ ```
+
+3. **Unity builds** work naturally since the library is header-only.
+
+4. **Multi-header mode** with `JSON_MultipleHeaders=ON` can improve
+ incremental rebuild times since changes to one detail header don't
+ invalidate the entire amalgamated file.
+
+5. **`JSON_NO_IO`** — define this if you don't need stream operators,
+ reducing the include chain.
+
+## Minimum Requirements
+
+| Requirement | Minimum |
+|---|---|
+| C++ Standard | C++11 |
+| CMake | 3.5 (3.28 for modules) |
+| GCC | 4.8 |
+| Clang | 3.4 |
+| MSVC | 2015 (19.0) |
+| Intel C++ | 2017 |
diff --git a/docs/handbook/json4cpp/code-style.md b/docs/handbook/json4cpp/code-style.md
new file mode 100644
index 0000000000..05fb76f4dd
--- /dev/null
+++ b/docs/handbook/json4cpp/code-style.md
@@ -0,0 +1,209 @@
+# json4cpp — Code Style & Conventions
+
+## Source Organisation
+
+### Directory Layout
+
+```
+json4cpp/
+├── include/nlohmann/ # Multi-header installation
+│ ├── json.hpp # Main header (includes everything)
+│ ├── json_fwd.hpp # Forward declarations only
+│ ├── adl_serializer.hpp # ADL-based serializer
+│ ├── byte_container_with_subtype.hpp
+│ ├── ordered_map.hpp # Insertion-order map
+│ └── detail/ # Internal implementation
+│ ├── exceptions.hpp # Exception hierarchy
+│ ├── hash.hpp # std::hash specialization
+│ ├── json_pointer.hpp # RFC 6901 implementation
+│ ├── json_ref.hpp # Internal reference wrapper
+│ ├── macro_scope.hpp # Macro definitions
+│ ├── macro_unscope.hpp # Macro undefinitions
+│ ├── string_concat.hpp # String concatenation helper
+│ ├── string_escape.hpp # String escaping utilities
+│ ├── value_t.hpp # value_t enum
+│ ├── abi_macros.hpp # ABI versioning macros
+│ ├── conversions/ # Type conversion traits
+│ ├── input/ # Parsing pipeline
+│ ├── iterators/ # Iterator implementations
+│ ├── meta/ # Type traits & SFINAE
+│ └── output/ # Serialization pipeline
+├── single_include/nlohmann/ # Single-header (amalgamated)
+│ └── json.hpp # Complete library in one file
+├── tests/ # Test suite (doctest)
+│ ├── CMakeLists.txt
+│ └── src/
+│ └── unit-*.cpp # One file per feature area
+└── CMakeLists.txt # Build configuration
+```
+
+### Public vs. Internal API
+
+- `include/nlohmann/*.hpp` — public API, included by users
+- `include/nlohmann/detail/` — internal, not for direct inclusion
+- `single_include/` — generated amalgamation, mirrors the public API
+
+Users should only include `<nlohmann/json.hpp>` or
+`<nlohmann/json_fwd.hpp>`.
+
+## Naming Conventions
+
+### Types
+
+- Template parameters: `PascalCase` — `BasicJsonType`, `ObjectType`,
+ `InputAdapterType`
+- Type aliases: `snake_case` — `value_t`, `object_t`, `string_t`,
+ `number_integer_t`
+- Internal classes: `snake_case` — `iter_impl`, `binary_reader`,
+ `json_sax_dom_parser`
+
+### Functions and Methods
+
+- All functions: `snake_case` — `parse()`, `dump()`, `push_back()`,
+ `is_null()`, `get_to()`, `merge_patch()`
+- Private methods: `snake_case` — `set_parent()`, `assert_invariant()`
+
+### Variables
+
+- Member variables: `m_` prefix — `m_type`, `m_value`, `m_parent`
+- Local variables: `snake_case` — `reference_tokens`, `token_buffer`
+
+### Macros
+
+- All macros: `SCREAMING_SNAKE_CASE` with project prefix
+- Public macros: `NLOHMANN_` prefix or `JSON_` prefix
+ - `NLOHMANN_DEFINE_TYPE_INTRUSIVE`
+ - `NLOHMANN_JSON_SERIALIZE_ENUM`
+ - `JSON_DIAGNOSTICS`
+ - `JSON_USE_IMPLICIT_CONVERSIONS`
+- Internal macros: `NLOHMANN_JSON_` prefix for implementation detail macros
+- All macros are undefined by `macro_unscope.hpp` to avoid pollution
+
+### Namespaces
+
+```cpp
+namespace nlohmann {
+ // Public API: basic_json, json, ordered_json, json_pointer, ...
+ namespace detail {
+ // Internal implementation
+ }
+ namespace literals {
+ namespace json_literals {
+ // _json, _json_pointer UDLs
+ }
+ }
+}
+```
+
+The `NLOHMANN_JSON_NAMESPACE_BEGIN` / `NLOHMANN_JSON_NAMESPACE_END` macros
+handle optional ABI versioning via inline namespaces.
+
+## Template Style
+
+### SFINAE Guards
+
+The library uses SFINAE extensively to constrain overloads:
+
+```cpp
+template<typename BasicJsonType, typename T,
+ enable_if_t<is_compatible_type<BasicJsonType, T>::value, int> = 0>
+void to_json(BasicJsonType& j, T&& val);
+```
+
+The `enable_if_t<..., int> = 0` pattern is used throughout instead of
+`enable_if_t<..., void>` or return-type SFINAE.
+
+### Tag Dispatch
+
+Priority tags resolve overload ambiguity:
+
+```cpp
+template<unsigned N> struct priority_tag : priority_tag<N - 1> {};
+template<> struct priority_tag<0> {};
+```
+
+Higher-numbered tags are tried first (since they inherit from lower ones).
+
+### `static_assert` Guards
+
+Critical type requirements use `static_assert` with readable messages:
+
+```cpp
+static_assert(std::is_default_constructible<T>::value,
+ "T must be default constructible");
+```
+
+## Header Guards
+
+Each header uses `#ifndef` guards following the pattern:
+
+```cpp
+#ifndef INCLUDE_NLOHMANN_JSON_HPP_
+#define INCLUDE_NLOHMANN_JSON_HPP_
+// ...
+#endif // INCLUDE_NLOHMANN_JSON_HPP_
+```
+
+Detail headers follow `INCLUDE_NLOHMANN_JSON_DETAIL_*` naming.
+
+## Code Documentation
+
+### Doxygen-Style Comments
+
+Public API methods use `///` or `/** */` with standard Doxygen tags:
+
+```cpp
+/// @brief parse a JSON value from a string
+/// @param[in] i the input to parse
+/// @param[in] cb a callback function (default: none)
+/// @param[in] allow_exceptions whether exceptions should be thrown
+/// @return the parsed JSON value
+static basic_json parse(InputType&& i, ...);
+```
+
+### `@sa` Cross References
+
+Related methods are linked with `@sa`:
+
+```cpp
+/// @sa dump() for serialization
+/// @sa operator>> for stream parsing
+```
+
+### `@throw` Documentation
+
+Exception-throwing methods document which exceptions they throw:
+
+```cpp
+/// @throw parse_error.101 if unexpected token
+/// @throw parse_error.102 if invalid unicode escape
+```
+
+## Error Handling Style
+
+- Public API methods that can fail throw typed exceptions from the
+ hierarchy (`parse_error`, `type_error`, `out_of_range`,
+ `invalid_iterator`, `other_error`)
+- Each exception has a unique numeric ID for programmatic handling
+- Error messages follow the format:
+ `[json.exception.<type>.<id>] <description>`
+- Internal assertions use `JSON_ASSERT(condition)` which maps to
+ `assert()` by default
+
+## Compatibility
+
+### C++ Standard
+
+- Minimum: C++11
+- Optional features with C++14: heterogeneous lookup
+ (`std::less<>`)
+- Optional features with C++17: `std::string_view`, `std::optional`,
+ `std::variant`, `std::filesystem::path`, structured bindings,
+ `if constexpr`
+- Optional features with C++20: modules, `operator<=>`
+
+### Compiler Notes
+
+Tested compilers include GCC ≥ 4.8, Clang ≥ 3.4, MSVC ≥ 2015, Intel
+C++, and various others. Compiler-specific workarounds are guarded with
+preprocessor conditionals.
diff --git a/docs/handbook/json4cpp/custom-types.md b/docs/handbook/json4cpp/custom-types.md
new file mode 100644
index 0000000000..086fa0ebcc
--- /dev/null
+++ b/docs/handbook/json4cpp/custom-types.md
@@ -0,0 +1,465 @@
+# json4cpp — Custom Type Serialization
+
+## ADL-Based Serialization
+
+The library uses **Argument-Dependent Lookup** (ADL) to find `to_json()`
+and `from_json()` free functions for user-defined types. This allows
+seamless conversion without modifying the library.
+
+### Basic Pattern
+
+Define `to_json()` and `from_json()` as free functions in the **same
+namespace** as your type:
+
+```cpp
+namespace myapp {
+
+struct Person {
+ std::string name;
+ int age;
+};
+
+void to_json(nlohmann::json& j, const Person& p) {
+ j = nlohmann::json{{"name", p.name}, {"age", p.age}};
+}
+
+void from_json(const nlohmann::json& j, Person& p) {
+ j.at("name").get_to(p.name);
+ j.at("age").get_to(p.age);
+}
+
+} // namespace myapp
+```
+
+Usage:
+
+```cpp
+myapp::Person alice{"alice", 30};
+
+// Serialization
+json j = alice; // calls myapp::to_json via ADL
+// or
+json j2;
+j2 = alice;
+
+// Deserialization
+auto bob = j.get<myapp::Person>(); // calls myapp::from_json via ADL
+// or
+myapp::Person carol;
+j.get_to(carol);
+```
+
+### How ADL Resolution Works
+
+When you write `json j = my_obj;`, the library calls:
+
+```cpp
+nlohmann::adl_serializer<MyType>::to_json(j, my_obj);
+```
+
+The default `adl_serializer` implementation delegates via an unqualified
+call:
+
+```cpp
+template<typename BasicJsonType, typename TargetType>
+static auto to_json(BasicJsonType& j, TargetType&& val)
+ -> decltype(::nlohmann::to_json(j, std::forward<TargetType>(val)), void())
+{
+ ::nlohmann::to_json(j, std::forward<TargetType>(val));
+}
+```
+
+The unqualified call to `::nlohmann::to_json(j, val)` finds:
+1. Built-in overloads in `namespace nlohmann` (via `using` declarations)
+2. User-provided overloads in the type's namespace (via ADL)
+
+## `get_to()` Helper
+
+```cpp
+template<typename ValueType>
+ValueType& get_to(ValueType& v) const;
+```
+
+Converts and writes into an existing variable:
+
+```cpp
+json j = {{"x", 1}, {"y", 2}};
+int x, y;
+j.at("x").get_to(x);
+j.at("y").get_to(y);
+```
+
+## Automatic Macros
+
+The library provides macros to auto-generate `to_json()` and `from_json()`
+without writing them manually. All macros are defined in
+`include/nlohmann/detail/macro_scope.hpp`.
+
+### `NLOHMANN_DEFINE_TYPE_INTRUSIVE`
+
+Defines `to_json()` and `from_json()` as **friend functions** inside the
+class body. Requires all fields to be present during deserialization:
+
+```cpp
+struct Point {
+ double x;
+ double y;
+ double z;
+
+ NLOHMANN_DEFINE_TYPE_INTRUSIVE(Point, x, y, z)
+};
+```
+
+This expands to:
+
+```cpp
+friend void to_json(nlohmann::json& nlohmann_json_j, const Point& nlohmann_json_t) {
+ nlohmann_json_j["x"] = nlohmann_json_t.x;
+ nlohmann_json_j["y"] = nlohmann_json_t.y;
+ nlohmann_json_j["z"] = nlohmann_json_t.z;
+}
+
+friend void from_json(const nlohmann::json& nlohmann_json_j, Point& nlohmann_json_t) {
+ nlohmann_json_j.at("x").get_to(nlohmann_json_t.x);
+ nlohmann_json_j.at("y").get_to(nlohmann_json_t.y);
+ nlohmann_json_j.at("z").get_to(nlohmann_json_t.z);
+}
+```
+
+### `NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT`
+
+Same as above, but uses `value()` instead of `at()` during deserialization.
+Missing keys get the default-constructed or current value instead of
+throwing:
+
+```cpp
+struct Config {
+ std::string host = "localhost";
+ int port = 8080;
+ bool debug = false;
+
+ NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Config, host, port, debug)
+};
+```
+
+Now parsing `{}` produces a Config with all default values instead of
+throwing.
+
+### `NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE`
+
+Generates only the `to_json()` function (no `from_json()`). Useful for
+types that should be serializable but not deserializable:
+
+```cpp
+struct LogEntry {
+ std::string timestamp;
+ std::string message;
+ int level;
+
+ NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE(LogEntry, timestamp, message, level)
+};
+```
+
+### `NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE`
+
+Defines `to_json()` and `from_json()` as **free functions** outside the
+class. Requires all members to be public:
+
+```cpp
+struct Color {
+ int r, g, b;
+};
+
+NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Color, r, g, b)
+```
+
+### `NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT`
+
+Non-intrusive version with default values for missing keys:
+
+```cpp
+struct Margin {
+ int top = 0;
+ int right = 0;
+ int bottom = 0;
+ int left = 0;
+};
+
+NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Margin, top, right, bottom, left)
+```
+
+### `NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE`
+
+Non-intrusive, serialize-only:
+
+```cpp
+struct Metric {
+ std::string name;
+ double value;
+};
+
+NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Metric, name, value)
+```
+
+## Derived Type Macros
+
+For inheritance hierarchies, use the `DERIVED_TYPE` variants. These include
+the base class fields:
+
+### `NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE`
+
+```cpp
+struct Base {
+ std::string id;
+ NLOHMANN_DEFINE_TYPE_INTRUSIVE(Base, id)
+};
+
+struct Derived : Base {
+ int value;
+ NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE(Derived, Base, value)
+};
+```
+
+This generates serialization that includes both `id` (from Base) and
+`value` (from Derived).
+
+### All Derived Variants
+
+| Macro | Intrusive | Default | Serialize-Only |
+|---|---|---|---|
+| `NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE` | Yes | No | No |
+| `NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT` | Yes | Yes | No |
+| `NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE` | Yes | — | Yes |
+| `NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE` | No | No | No |
+| `NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT` | No | Yes | No |
+| `NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE` | No | — | Yes |
+
+## Low-Level Macros
+
+### `NLOHMANN_JSON_TO` / `NLOHMANN_JSON_FROM`
+
+Building-block macros for custom serialization:
+
+```cpp
+#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1;
+#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1);
+#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) \
+ nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1);
+```
+
+These are used internally by the `NLOHMANN_DEFINE_TYPE_*` macros and can
+be used directly for custom patterns.
+
+## Custom `adl_serializer` Specialization
+
+For types where you can't add free functions (e.g., third-party types),
+specialize `adl_serializer`:
+
+```cpp
+namespace nlohmann {
+
+template<>
+struct adl_serializer<third_party::Point3D> {
+ static void to_json(json& j, const third_party::Point3D& p) {
+ j = json{{"x", p.x()}, {"y", p.y()}, {"z", p.z()}};
+ }
+
+ static void from_json(const json& j, third_party::Point3D& p) {
+ p = third_party::Point3D(
+ j.at("x").get<double>(),
+ j.at("y").get<double>(),
+ j.at("z").get<double>()
+ );
+ }
+};
+
+} // namespace nlohmann
+```
+
+### Non-Default-Constructible Types
+
+For types without a default constructor, implement `from_json()` as a
+static method returning the constructed value:
+
+```cpp
+namespace nlohmann {
+
+template<>
+struct adl_serializer<Immutable> {
+ static Immutable from_json(const json& j) {
+ return Immutable(j.at("x").get<int>(), j.at("y").get<int>());
+ }
+
+ static void to_json(json& j, const Immutable& val) {
+ j = json{{"x", val.x()}, {"y", val.y()}};
+ }
+};
+
+} // namespace nlohmann
+```
+
+Usage:
+
+```cpp
+json j = {{"x", 1}, {"y", 2}};
+auto val = j.get<Immutable>(); // calls adl_serializer<Immutable>::from_json(j)
+```
+
+## Enum Serialization
+
+### Default: Integer Mapping
+
+By default, enums are serialized as their underlying integer value:
+
+```cpp
+enum class Status { active, inactive, pending };
+
+json j = Status::active; // 0
+auto s = j.get<Status>(); // Status::active
+```
+
+### Disabling Enum Serialization
+
+```cpp
+#define JSON_DISABLE_ENUM_SERIALIZATION 1
+```
+
+Or via CMake:
+
+```cmake
+set(JSON_DisableEnumSerialization ON)
+```
+
+### Custom Enum Mapping with `NLOHMANN_JSON_SERIALIZE_ENUM`
+
+```cpp
+enum class Color { red, green, blue };
+
+NLOHMANN_JSON_SERIALIZE_ENUM(Color, {
+ {Color::red, "red"},
+ {Color::green, "green"},
+ {Color::blue, "blue"},
+})
+```
+
+This generates both `to_json()` and `from_json()` that map between enum
+values and strings:
+
+```cpp
+json j = Color::red; // "red"
+auto c = j.get<Color>(); // Color::red
+
+json j2 = "unknown";
+auto c2 = j2.get<Color>(); // Color::red (first entry is the default)
+```
+
+The first entry in the mapping serves as the default for unrecognized
+values during deserialization.
+
+## Nested Types
+
+```cpp
+struct Address {
+ std::string city;
+ std::string zip;
+ NLOHMANN_DEFINE_TYPE_INTRUSIVE(Address, city, zip)
+};
+
+struct Employee {
+ std::string name;
+ Address address;
+ std::vector<std::string> skills;
+ NLOHMANN_DEFINE_TYPE_INTRUSIVE(Employee, name, address, skills)
+};
+```
+
+The library handles nesting automatically — `Address` has its own
+serialization, and `Employee` uses it implicitly:
+
+```cpp
+Employee emp{"alice", {"wonderland", "12345"}, {"c++", "python"}};
+json j = emp;
+// {
+// "name": "alice",
+// "address": {"city": "wonderland", "zip": "12345"},
+// "skills": ["c++", "python"]
+// }
+
+auto emp2 = j.get<Employee>();
+```
+
+## Optional Fields
+
+Use `std::optional` (C++17) for truly optional fields:
+
+```cpp
+struct UserProfile {
+ std::string username;
+ std::optional<std::string> bio;
+ std::optional<int> age;
+
+ NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(UserProfile, username, bio, age)
+};
+```
+
+`std::optional<T>` serializes as:
+- The value `T` if it has a value
+- `null` if it's `std::nullopt`
+
+With `_WITH_DEFAULT`, missing keys leave the optional as `std::nullopt`.
+
+## Collection Types
+
+Standard containers are automatically handled:
+
+```cpp
+struct Team {
+ std::string name;
+ std::vector<Person> members;
+ std::map<std::string, int> scores;
+ std::set<std::string> tags;
+
+ NLOHMANN_DEFINE_TYPE_INTRUSIVE(Team, name, members, scores, tags)
+};
+```
+
+Any type that satisfies the required type traits is automatically
+serializable:
+- Sequence containers (`std::vector`, `std::list`, `std::deque`, etc.)
+- Associative containers (`std::map`, `std::set`, `std::unordered_map`, etc.)
+- `std::pair`, `std::tuple`
+- `std::array`
+
+## Smart Pointers
+
+`std::unique_ptr<T>` and `std::shared_ptr<T>` are supported if `T` is
+serializable:
+
+```cpp
+struct Node {
+ int value;
+ std::shared_ptr<Node> next;
+
+ NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Node, value, next)
+};
+```
+
+- Non-null pointer → serializes the pointed-to value
+- Null pointer → serializes as JSON `null`
+- JSON `null` → deserializes as null pointer
+
+## Type Traits
+
+The library uses SFINAE-based type traits to detect capabilities:
+
+| Trait | Purpose |
+|---|---|
+| `is_compatible_type` | Can be converted to/from JSON |
+| `has_to_json` | Has a `to_json()` function |
+| `has_from_json` | Has a `from_json()` function |
+| `is_compatible_object_type` | Looks like a JSON object |
+| `is_compatible_array_type` | Looks like a JSON array |
+| `is_compatible_string_type` | Looks like a JSON string |
+| `is_compatible_integer_type` | Looks like a JSON integer |
+
+These traits live in `include/nlohmann/detail/meta/type_traits.hpp`.
diff --git a/docs/handbook/json4cpp/element-access.md b/docs/handbook/json4cpp/element-access.md
new file mode 100644
index 0000000000..73d52126bb
--- /dev/null
+++ b/docs/handbook/json4cpp/element-access.md
@@ -0,0 +1,581 @@
+# json4cpp — Element Access
+
+## Overview
+
+The `basic_json` class provides several ways to access elements:
+
+| Method | Applicable To | Behaviour on Missing |
+|---|---|---|
+| `operator[]` | array, object, null | Inserts default (creates if null) |
+| `at()` | array, object | Throws `out_of_range` |
+| `value()` | object | Returns caller-supplied default |
+| `front()` | array, object, scalar | UB if empty |
+| `back()` | array, object, scalar | UB if empty |
+| `find()` | object | Returns `end()` |
+| `contains()` | object | Returns `false` |
+| `count()` | object | Returns `0` |
+
+## `operator[]`
+
+### Array Access
+
+```cpp
+reference operator[](size_type idx);
+const_reference operator[](size_type idx) const;
+```
+
+Accesses the element at index `idx`. If the JSON value is **null**, it is
+automatically converted to an **array** before accessing:
+
+```cpp
+json j; // null
+j[0] = "first"; // j is now ["first"]
+j[1] = "second"; // j is now ["first", "second"]
+```
+
+If `idx` is beyond the current array size, the array is extended with null
+elements:
+
+```cpp
+json j = {1, 2};
+j[5] = 99;
+// j is now [1, 2, null, null, null, 99]
+```
+
+**Warning:** `const` array access does **not** extend the array and has
+undefined behavior for out-of-bounds access.
+
+### Object Access
+
+```cpp
+reference operator[](const typename object_t::key_type& key);
+const_reference operator[](const typename object_t::key_type& key) const;
+
+// C++14 heterogeneous lookup (KeyType template)
+template<typename KeyType>
+reference operator[](KeyType&& key);
+template<typename KeyType>
+const_reference operator[](KeyType&& key) const;
+```
+
+Accesses the element with key `key`. If the key does not exist in a mutable
+context, it is **inserted** with a null value:
+
+```cpp
+json j = {{"name", "alice"}};
+j["age"] = 30; // inserts "age"
+std::string name = j["name"];
+
+// const access does not insert
+const json& cj = j;
+// cj["missing"]; // undefined behavior if key doesn't exist
+```
+
+If the JSON value is **null**, it is automatically converted to an **object**:
+
+```cpp
+json j; // null
+j["key"] = "value"; // j is now {"key": "value"}
+```
+
+### String Literal vs. Integer Ambiguity
+
+Be careful with `0`:
+
+```cpp
+json j = {{"key", "value"}};
+// j[0] — array access (selects first element of object iteration) — NOT recommended
+// j["key"] — object access
+```
+
+### Using `json::object_t::key_type`
+
+The non-const `operator[]` accepts a `key_type` (default: `std::string`).
+The `KeyType` template overloads accept any type that satisfies these
+constraints via `detail::is_usable_as_key_type`:
+
+- Must be comparable with `object_comparator_t`
+- Not convertible to `basic_json`
+- Not a `value_t`
+- Not a `BasicJsonType`
+
+## `at()`
+
+### Array Access
+
+```cpp
+reference at(size_type idx);
+const_reference at(size_type idx) const;
+```
+
+Returns a reference to the element at index `idx`. Throws
+`json::out_of_range` (id 401) if the index is out of bounds:
+
+```cpp
+json j = {1, 2, 3};
+j.at(0); // 1
+j.at(3); // throws out_of_range::401: "array index 3 is out of range"
+```
+
+When `JSON_DIAGNOSTIC_POSITIONS` is enabled, the exception includes
+byte-offset information.
+
+### Object Access
+
+```cpp
+reference at(const typename object_t::key_type& key);
+const_reference at(const typename object_t::key_type& key) const;
+
+template<typename KeyType>
+reference at(KeyType&& key);
+template<typename KeyType>
+const_reference at(KeyType&& key) const;
+```
+
+Returns a reference to the element with key `key`. Throws
+`json::out_of_range` (id 403) if the key is not found:
+
+```cpp
+json j = {{"name", "alice"}, {"age", 30}};
+j.at("name"); // "alice"
+j.at("missing"); // throws out_of_range::403: "key 'missing' not found"
+```
+
+### Type Mismatch
+
+Both `at()` overloads throw `json::type_error` (id 304) if the JSON value
+is not of the expected type:
+
+```cpp
+json j = 42;
+j.at(0); // throws type_error::304: "cannot use at() with number"
+j.at("key"); // throws type_error::304: "cannot use at() with number"
+```
+
+## `value()`
+
+```cpp
+// With default value
+ValueType value(const typename object_t::key_type& key, const ValueType& default_value) const;
+
+// With JSON pointer
+ValueType value(const json_pointer& ptr, const ValueType& default_value) const;
+
+// KeyType template overloads
+template<typename KeyType>
+ValueType value(KeyType&& key, const ValueType& default_value) const;
+```
+
+Returns the value for a given key or JSON pointer, or `default_value` if
+the key/pointer does not resolve. Unlike `operator[]` and `at()`, this
+method **never modifies** the JSON value.
+
+```cpp
+json j = {{"name", "alice"}, {"age", 30}};
+
+std::string name = j.value("name", "unknown"); // "alice"
+std::string addr = j.value("address", "N/A"); // "N/A"
+int height = j.value("height", 170); // 170
+
+// With JSON pointer
+int age = j.value("/age"_json_pointer, 0); // 30
+int foo = j.value("/foo"_json_pointer, -1); // -1
+```
+
+Throws `json::type_error` (id 306) if the JSON value is not an object (for
+the key overloads) or if the found value cannot be converted to `ValueType`.
+
+### `value()` vs `operator[]`
+
+| Feature | `operator[]` | `value()` |
+|---|---|---|
+| Modifies on miss | Yes (inserts null) | No |
+| Returns | Reference | Value copy |
+| Default on miss | null (always) | Caller-specified |
+| Applicable to arrays | Yes | No (objects only) |
+
+## `front()` and `back()`
+
+```cpp
+reference front();
+const_reference front() const;
+
+reference back();
+const_reference back() const;
+```
+
+Return references to the first/last element. For **arrays**, this is the
+first/last element by index. For **objects**, this is the first/last element
+by iteration order (which depends on the comparator — insertion order for
+`ordered_json`). For **non-compound types**, the value itself is returned
+(the JSON value is treated as a single-element container).
+
+```cpp
+json j = {1, 2, 3};
+j.front(); // 1
+j.back(); // 3
+
+json j2 = 42;
+j2.front(); // 42
+j2.back(); // 42
+```
+
+**Warning:** Calling `front()` or `back()` on an empty container is
+**undefined behavior** (same as STL containers).
+
+## `find()`
+
+```cpp
+iterator find(const typename object_t::key_type& key);
+const_iterator find(const typename object_t::key_type& key) const;
+
+template<typename KeyType>
+iterator find(KeyType&& key);
+template<typename KeyType>
+const_iterator find(KeyType&& key) const;
+```
+
+Returns an iterator to the element with the given key, or `end()` if not
+found. Only works on objects:
+
+```cpp
+json j = {{"name", "alice"}, {"age", 30}};
+
+auto it = j.find("name");
+if (it != j.end()) {
+ std::cout << it.key() << " = " << it.value() << "\n";
+}
+
+auto it2 = j.find("missing");
+assert(it2 == j.end());
+```
+
+For non-objects, `find()` always returns `end()`.
+
+## `contains()`
+
+```cpp
+bool contains(const typename object_t::key_type& key) const;
+
+template<typename KeyType>
+bool contains(KeyType&& key) const;
+
+// JSON pointer overload
+bool contains(const json_pointer& ptr) const;
+```
+
+Returns `true` if the key or pointer exists:
+
+```cpp
+json j = {{"name", "alice"}, {"address", {{"city", "wonderland"}}}};
+
+j.contains("name"); // true
+j.contains("phone"); // false
+
+// JSON pointer — checks nested paths
+j.contains("/address/city"_json_pointer); // true
+j.contains("/address/zip"_json_pointer); // false
+```
+
+## `count()`
+
+```cpp
+size_type count(const typename object_t::key_type& key) const;
+
+template<typename KeyType>
+size_type count(KeyType&& key) const;
+```
+
+Returns the number of elements with the given key. Since JSON objects have
+unique keys, the result is always `0` or `1`:
+
+```cpp
+json j = {{"name", "alice"}};
+j.count("name"); // 1
+j.count("missing"); // 0
+```
+
+## `erase()`
+
+### Erase by Iterator
+
+```cpp
+iterator erase(iterator pos);
+iterator erase(const_iterator pos);
+```
+
+Removes the element at the given iterator position. Returns an iterator to
+the element after the erased one:
+
+```cpp
+json j = {1, 2, 3, 4, 5};
+auto it = j.erase(j.begin() + 2); // removes 3
+// j is now [1, 2, 4, 5], it points to 4
+```
+
+### Erase by Iterator Range
+
+```cpp
+iterator erase(iterator first, iterator last);
+iterator erase(const_iterator first, const_iterator last);
+```
+
+Removes all elements in the range `[first, last)`:
+
+```cpp
+json j = {1, 2, 3, 4, 5};
+j.erase(j.begin() + 1, j.begin() + 3);
+// j is now [1, 4, 5]
+```
+
+### Erase by Key
+
+```cpp
+size_type erase(const typename object_t::key_type& key);
+
+template<typename KeyType>
+size_type erase(KeyType&& key);
+```
+
+Removes the element with the given key from an object. Returns the number
+of elements removed (0 or 1):
+
+```cpp
+json j = {{"name", "alice"}, {"age", 30}};
+j.erase("age");
+// j is now {"name": "alice"}
+```
+
+### Erase by Index
+
+```cpp
+void erase(const size_type idx);
+```
+
+Removes the element at the given index from an array. Throws
+`out_of_range::401` if the index is out of range:
+
+```cpp
+json j = {"a", "b", "c"};
+j.erase(1);
+// j is now ["a", "c"]
+```
+
+### Erase on Primitive Types
+
+Erasing by iterator on primitive types (number, string, boolean) is
+supported only if the iterator points to the single element:
+
+```cpp
+json j = 42;
+j.erase(j.begin()); // j is now null
+```
+
+## `size()`, `empty()`, `max_size()`
+
+```cpp
+size_type size() const noexcept;
+bool empty() const noexcept;
+size_type max_size() const noexcept;
+```
+
+| Type | `size()` | `empty()` |
+|---|---|---|
+| null | 0 | `true` |
+| object | number of key-value pairs | `true` if no pairs |
+| array | number of elements | `true` if no elements |
+| scalar (string, number, boolean, binary) | 1 | `false` |
+
+```cpp
+json j_null;
+j_null.size(); // 0
+j_null.empty(); // true
+
+json j_arr = {1, 2, 3};
+j_arr.size(); // 3
+j_arr.empty(); // false
+
+json j_str = "hello";
+j_str.size(); // 1
+j_str.empty(); // false (primitive → always 1)
+```
+
+`max_size()` returns the maximum number of elements the container can hold
+(delegates to the underlying container's `max_size()` for arrays and
+objects; returns 1 for scalars).
+
+## `clear()`
+
+```cpp
+void clear() noexcept;
+```
+
+Resets the value to a default-constructed value of the same type:
+
+| Type | Result after `clear()` |
+|---|---|
+| null | null |
+| object | `{}` |
+| array | `[]` |
+| string | `""` |
+| boolean | `false` |
+| number_integer | `0` |
+| number_unsigned | `0` |
+| number_float | `0.0` |
+| binary | `[]` (empty, no subtype) |
+
+## `push_back()` and `emplace_back()`
+
+### Array Operations
+
+```cpp
+void push_back(basic_json&& val);
+void push_back(const basic_json& val);
+
+template<typename... Args>
+reference emplace_back(Args&&... args);
+```
+
+Appends an element at the end:
+
+```cpp
+json j = {1, 2, 3};
+j.push_back(4);
+j.emplace_back(5);
+// j is now [1, 2, 3, 4, 5]
+```
+
+If the value is `null`, it's first converted to an empty array.
+
+### Object Operations
+
+```cpp
+void push_back(const typename object_t::value_type& val);
+void push_back(initializer_list_t init);
+```
+
+Inserts a key-value pair:
+
+```cpp
+json j = {{"a", 1}};
+j.push_back({"b", 2}); // initializer_list pair
+j.push_back(json::object_t::value_type("c", 3)); // explicit pair
+// j is now {"a": 1, "b": 2, "c": 3}
+```
+
+### `operator+=`
+
+Alias for `push_back()`:
+
+```cpp
+json j = {1, 2};
+j += 3;
+j += {4, 5}; // pushes an array [4, 5] as a single element
+```
+
+## `emplace()`
+
+```cpp
+template<typename... Args>
+std::pair<iterator, bool> emplace(Args&&... args);
+```
+
+For objects, inserts a key-value pair if the key doesn't already exist.
+Returns a pair of iterator and bool (whether insertion took place):
+
+```cpp
+json j = {{"a", 1}};
+auto [it, inserted] = j.emplace("b", 2);
+// inserted == true, it points to {"b": 2}
+auto [it2, inserted2] = j.emplace("a", 99);
+// inserted2 == false, existing value unchanged
+```
+
+## `insert()`
+
+### Array Insert
+
+```cpp
+iterator insert(const_iterator pos, const basic_json& val);
+iterator insert(const_iterator pos, basic_json&& val);
+iterator insert(const_iterator pos, size_type cnt, const basic_json& val);
+iterator insert(const_iterator pos, const_iterator first, const_iterator last);
+iterator insert(const_iterator pos, initializer_list_t ilist);
+```
+
+Inserts elements at the given position:
+
+```cpp
+json j = {1, 2, 5};
+j.insert(j.begin() + 2, 3);
+j.insert(j.begin() + 3, 4);
+// j is now [1, 2, 3, 4, 5]
+
+// Insert count copies
+j.insert(j.end(), 2, 0);
+// j is now [1, 2, 3, 4, 5, 0, 0]
+```
+
+### Object Insert
+
+```cpp
+void insert(const_iterator first, const_iterator last);
+```
+
+Inserts elements from another object:
+
+```cpp
+json j1 = {{"a", 1}};
+json j2 = {{"b", 2}, {"c", 3}};
+j1.insert(j2.begin(), j2.end());
+// j1 is now {"a": 1, "b": 2, "c": 3}
+```
+
+## `update()`
+
+```cpp
+void update(const_reference j, bool merge_objects = false);
+void update(const_iterator first, const_iterator last, bool merge_objects = false);
+```
+
+Updates an object with keys from another object. Existing keys are
+**overwritten**:
+
+```cpp
+json j1 = {{"a", 1}, {"b", 2}};
+json j2 = {{"b", 99}, {"c", 3}};
+j1.update(j2);
+// j1 is now {"a": 1, "b": 99, "c": 3}
+```
+
+When `merge_objects` is `true`, nested objects are merged recursively
+instead of being overwritten:
+
+```cpp
+json j1 = {{"config", {{"debug", true}, {"port", 8080}}}};
+json j2 = {{"config", {{"port", 9090}, {"host", "localhost"}}}};
+j1.update(j2, true);
+// j1["config"] is now {"debug": true, "port": 9090, "host": "localhost"}
+```
+
+## `swap()`
+
+```cpp
+void swap(reference other) noexcept;
+
+void swap(array_t& other);
+void swap(object_t& other);
+void swap(string_t& other);
+void swap(binary_t& other);
+void swap(typename binary_t::container_type& other);
+```
+
+Swaps contents with another value or with a compatible container.
+The typed overloads throw `type_error::310` if the types don't match:
+
+```cpp
+json j = {1, 2, 3};
+std::vector<json> v = {4, 5, 6};
+j.swap(v);
+// j is now [4, 5, 6], v contains the old j's elements
+```
diff --git a/docs/handbook/json4cpp/exception-handling.md b/docs/handbook/json4cpp/exception-handling.md
new file mode 100644
index 0000000000..58e52a4598
--- /dev/null
+++ b/docs/handbook/json4cpp/exception-handling.md
@@ -0,0 +1,368 @@
+# json4cpp — Exception Handling
+
+## Exception Hierarchy
+
+All exceptions derive from `json::exception`, which itself inherits from
+`std::exception`. Defined in `include/nlohmann/detail/exceptions.hpp`:
+
+```
+std::exception
+ └── json::exception
+ ├── json::parse_error
+ ├── json::invalid_iterator
+ ├── json::type_error
+ ├── json::out_of_range
+ └── json::other_error
+```
+
+## Base Class: `json::exception`
+
+```cpp
+class exception : public std::exception
+{
+public:
+ const char* what() const noexcept override;
+ const int id; // numeric error identifier
+
+protected:
+ exception(int id_, const char* what_arg);
+
+ static std::string name(const std::string& ename, int id_);
+ static std::string diagnostics(std::nullptr_t leaf_element);
+ static std::string diagnostics(const BasicJsonType* leaf_element);
+
+private:
+ std::runtime_error m; // stores the what() message
+};
+```
+
+### Error Message Format
+
+```
+[json.exception.<type>.<id>] <description>
+```
+
+Example:
+```
+[json.exception.type_error.302] type must be string, but is number
+```
+
+### Diagnostics Mode
+
+When `JSON_DIAGNOSTICS` is enabled, error messages include the **path**
+from the root to the problematic value:
+
+```cpp
+#define JSON_DIAGNOSTICS 1
+#include <nlohmann/json.hpp>
+```
+
+Error message with diagnostics:
+```
+[json.exception.type_error.302] (/config/server/port) type must be string, but is number
+```
+
+The path is computed by walking up the parent chain (stored in each
+`basic_json` node when diagnostics are enabled).
+
+## `parse_error`
+
+Thrown when parsing fails.
+
+```cpp
+class parse_error : public exception
+{
+public:
+ const std::size_t byte; // byte position of the error
+
+ static parse_error create(int id_, const position_t& pos,
+ const std::string& what_arg,
+ BasicJsonType* context);
+ static parse_error create(int id_, std::size_t byte_,
+ const std::string& what_arg,
+ BasicJsonType* context);
+};
+```
+
+### Error IDs
+
+| ID | Condition | Example |
+|---|---|---|
+| 101 | Unexpected token | `parse("}")` |
+| 102 | Invalid `\u` escape | `parse("\"\\u000g\"")` |
+| 103 | Invalid surrogate pair | `parse("\"\\uDC00\"")` |
+| 104 | Invalid JSON Patch | `patch(json::array({42}))` |
+| 105 | JSON Patch missing field | `patch(json::array({{{"op","add"}}}))` |
+| 106 | Number overflow | Very large number literal |
+| 107 | Invalid JSON Pointer | `json_pointer("no-slash")` |
+| 108 | Invalid Unicode code point | Code point > U+10FFFF |
+| 109 | Invalid UTF-8 in input | Binary data as string |
+| 110 | Binary format marker error | Invalid CBOR/MsgPack byte |
+| 112 | BSON parse error | Malformed BSON input |
+| 113 | UBJSON parse error | Invalid UBJSON type marker |
+| 114 | BJData parse error | Invalid BJData structure |
+| 115 | Incomplete binary input | Truncated binary data |
+
+### Catching Parse Errors
+
+```cpp
+try {
+ json j = json::parse("{invalid}");
+} catch (json::parse_error& e) {
+ std::cerr << "Parse error: " << e.what() << "\n";
+ std::cerr << "Error ID: " << e.id << "\n";
+ std::cerr << "Byte position: " << e.byte << "\n";
+}
+```
+
+### Avoiding Exceptions
+
+```cpp
+json j = json::parse("invalid", nullptr, false);
+if (j.is_discarded()) {
+ // Handle parse failure without exception
+}
+```
+
+## `type_error`
+
+Thrown when a method is called on a JSON value of the wrong type.
+
+```cpp
+class type_error : public exception
+{
+public:
+ static type_error create(int id_, const std::string& what_arg,
+ BasicJsonType* context);
+};
+```
+
+### Error IDs
+
+| ID | Condition | Example |
+|---|---|---|
+| 301 | Cannot create from type | `json j = std::complex<double>()` |
+| 302 | Type mismatch in get | `json("str").get<int>()` |
+| 303 | Type mismatch in get_ref | `json(42).get_ref<std::string&>()` |
+| 304 | Wrong type for at() | `json(42).at("key")` |
+| 305 | Wrong type for operator[] | `json("str")[0]` |
+| 306 | Wrong type for value() | `json(42).value("k", 0)` |
+| 307 | Cannot erase from type | `json(42).erase(0)` |
+| 308 | Wrong type for push_back | `json("str").push_back(1)` |
+| 309 | Wrong type for insert | `json(42).insert(...)` |
+| 310 | Wrong type for swap | `json(42).swap(vec)` |
+| 311 | Wrong type for iterator | `json(42).begin().key()` |
+| 312 | Cannot serialize binary to text | `json::binary(...).dump()` |
+| 313 | Wrong type for push_back pair | `json(42).push_back({"k",1})` |
+| 314 | unflatten: value not primitive | `json({"/a": [1]}).unflatten()` |
+| 315 | unflatten: conflicting paths | `json({"/a": 1, "/a/b": 2}).unflatten()` |
+| 316 | Invalid UTF-8 in dump (strict) | Invalid byte in string |
+| 317 | to_bson: top level not object | `json::to_bson(json::array())` |
+| 318 | to_bson: key too long | Key > max int32 length |
+
+### Common Type Errors
+
+```cpp
+json j = 42;
+
+// 302: type mismatch
+try {
+ std::string s = j.get<std::string>();
+} catch (json::type_error& e) {
+ // type must be string, but is number
+}
+
+// 304: wrong type for at()
+try {
+ j.at("key");
+} catch (json::type_error& e) {
+ // cannot use at() with number
+}
+
+// 316: invalid UTF-8
+try {
+ json j = std::string("\xC0\xAF"); // overlong encoding
+ j.dump();
+} catch (json::type_error& e) {
+ // invalid utf-8 byte
+}
+```
+
+## `out_of_range`
+
+Thrown when accessing elements outside valid bounds.
+
+```cpp
+class out_of_range : public exception
+{
+public:
+ static out_of_range create(int id_, const std::string& what_arg,
+ BasicJsonType* context);
+};
+```
+
+### Error IDs
+
+| ID | Condition | Example |
+|---|---|---|
+| 401 | Array index out of range | `json({1,2}).at(5)` |
+| 402 | Array index `-` in at() | `j.at("/-"_json_pointer)` |
+| 403 | Key not found | `j.at("missing")` |
+| 404 | JSON Pointer reference error | `j["/bad/path"_json_pointer]` |
+| 405 | back()/pop_back() on empty ptr | `json_pointer("").back()` |
+| 406 | Numeric overflow in get | Large float → int |
+| 407 | Number not representable | `json(1e500).get<int>()` |
+| 408 | BSON key conflict | Key "0" in BSON array |
+
+```cpp
+json j = {1, 2, 3};
+
+try {
+ j.at(10);
+} catch (json::out_of_range& e) {
+ // [json.exception.out_of_range.401] array index 10 is out of range
+}
+```
+
+## `invalid_iterator`
+
+Thrown when iterators are used incorrectly.
+
+```cpp
+class invalid_iterator : public exception
+{
+public:
+ static invalid_iterator create(int id_, const std::string& what_arg,
+ BasicJsonType* context);
+};
+```
+
+### Error IDs
+
+| ID | Condition |
+|---|---|
+| 201 | Iterator not dereferenceable |
+| 202 | Iterator += on non-array |
+| 203 | Iterator compare across values |
+| 204 | Iterator - on non-array |
+| 205 | Iterator > on non-array |
+| 206 | Iterator + on non-array |
+| 207 | Cannot use key() on array iterator |
+| 209 | Range [first, last) not from same container |
+| 210 | Range not valid for erase |
+| 211 | Range not valid for insert |
+| 212 | Range from different container in insert |
+| 213 | Insert iterator for non-array |
+| 214 | Insert range for non-object |
+
+```cpp
+json j1 = {1, 2, 3};
+json j2 = {4, 5, 6};
+
+try {
+ j1.erase(j2.begin()); // wrong container
+} catch (json::invalid_iterator& e) {
+ // iterator does not fit current value
+}
+```
+
+## `other_error`
+
+Thrown for miscellaneous errors that don't fit the other categories.
+
+```cpp
+class other_error : public exception
+{
+public:
+ static other_error create(int id_, const std::string& what_arg,
+ BasicJsonType* context);
+};
+```
+
+### Error IDs
+
+| ID | Condition |
+|---|---|
+| 501 | JSON Patch test operation failed |
+
+```cpp
+json doc = {{"name", "alice"}};
+json patch = json::array({
+ {{"op", "test"}, {"path", "/name"}, {"value", "bob"}}
+});
+
+try {
+ doc.patch(patch);
+} catch (json::other_error& e) {
+ // [json.exception.other_error.501] unsuccessful: /name
+}
+```
+
+## Exception-Free API
+
+### `parse()` with `allow_exceptions = false`
+
+```cpp
+json j = json::parse("invalid", nullptr, false);
+if (j.is_discarded()) {
+ // Handle gracefully
+}
+```
+
+### `get()` Alternatives
+
+Use `value()` for safe object access with defaults:
+
+```cpp
+json j = {{"timeout", 30}};
+int t = j.value("timeout", 60); // 30
+int r = j.value("retries", 3); // 3 (missing key)
+```
+
+Use `contains()` before access:
+
+```cpp
+if (j.contains("key")) {
+ auto val = j["key"];
+}
+```
+
+Use `find()` for iterator-based access:
+
+```cpp
+auto it = j.find("key");
+if (it != j.end()) {
+ // use *it
+}
+```
+
+### Type Checking Before Access
+
+```cpp
+json j = /* unknown content */;
+
+if (j.is_string()) {
+ auto s = j.get<std::string>();
+}
+```
+
+## Catching All JSON Exceptions
+
+```cpp
+try {
+ // JSON operations
+} catch (json::exception& e) {
+ std::cerr << "JSON error [" << e.id << "]: " << e.what() << "\n";
+}
+```
+
+Since `json::exception` derives from `std::exception`, it can also be
+caught generically:
+
+```cpp
+try {
+ // ...
+} catch (const std::exception& e) {
+ std::cerr << e.what() << "\n";
+}
+```
diff --git a/docs/handbook/json4cpp/iteration.md b/docs/handbook/json4cpp/iteration.md
new file mode 100644
index 0000000000..be32a21ea8
--- /dev/null
+++ b/docs/handbook/json4cpp/iteration.md
@@ -0,0 +1,339 @@
+# json4cpp — Iteration
+
+## Iterator Types
+
+The `basic_json` class provides a full set of iterators modeled after STL
+container iterators. All are defined in
+`include/nlohmann/detail/iterators/`:
+
+| Type | Class | Header |
+|---|---|---|
+| `iterator` | `iter_impl<basic_json>` | `iter_impl.hpp` |
+| `const_iterator` | `iter_impl<const basic_json>` | `iter_impl.hpp` |
+| `reverse_iterator` | `json_reverse_iterator<iterator>` | `json_reverse_iterator.hpp` |
+| `const_reverse_iterator` | `json_reverse_iterator<const_iterator>` | `json_reverse_iterator.hpp` |
+
+## `iter_impl` Internals
+
+The `iter_impl<BasicJsonType>` template is the core iterator
+implementation. It wraps an `internal_iterator` struct:
+
+```cpp
+struct internal_iterator
+{
+ typename BasicJsonType::object_t::iterator object_iterator;
+ typename BasicJsonType::array_t::iterator array_iterator;
+ primitive_iterator_t primitive_iterator;
+};
+```
+
+Only one of these three fields is active at a time, determined by the
+`m_object` pointer's `type()`:
+
+- **Object**: uses `object_iterator` (delegates to the underlying map/ordered_map iterator)
+- **Array**: uses `array_iterator` (delegates to `std::vector::iterator`)
+- **Primitive** (null, boolean, number, string, binary): uses `primitive_iterator_t`
+
+### `primitive_iterator_t`
+
+Primitive types are treated as single-element containers. The
+`primitive_iterator_t` is a wrapper around `std::ptrdiff_t`:
+
+- Value `0` → points to the element (equivalent to `begin()`)
+- Value `1` → past-the-end (equivalent to `end()`)
+- Value `-1` → before-the-begin (sentinel, `end_value`)
+
+```cpp
+json j = 42;
+for (auto it = j.begin(); it != j.end(); ++it) {
+ // executes exactly once, *it == 42
+}
+```
+
+## Range Functions
+
+### Forward Iteration
+
+```cpp
+iterator begin() noexcept;
+const_iterator begin() const noexcept;
+const_iterator cbegin() const noexcept;
+
+iterator end() noexcept;
+const_iterator end() const noexcept;
+const_iterator cend() const noexcept;
+```
+
+```cpp
+json j = {1, 2, 3};
+for (auto it = j.begin(); it != j.end(); ++it) {
+ std::cout << *it << " ";
+}
+// Output: 1 2 3
+```
+
+### Reverse Iteration
+
+```cpp
+reverse_iterator rbegin() noexcept;
+const_reverse_iterator rbegin() const noexcept;
+const_reverse_iterator crbegin() const noexcept;
+
+reverse_iterator rend() noexcept;
+const_reverse_iterator rend() const noexcept;
+const_reverse_iterator crend() const noexcept;
+```
+
+```cpp
+json j = {1, 2, 3};
+for (auto it = j.rbegin(); it != j.rend(); ++it) {
+ std::cout << *it << " ";
+}
+// Output: 3 2 1
+```
+
+## Range-Based For Loops
+
+### Simple Iteration
+
+```cpp
+json j = {"alpha", "beta", "gamma"};
+for (const auto& element : j) {
+ std::cout << element << "\n";
+}
+```
+
+### Mutable Iteration
+
+```cpp
+json j = {1, 2, 3};
+for (auto& element : j) {
+ element = element.get<int>() * 2;
+}
+// j is now [2, 4, 6]
+```
+
+### Object Iteration
+
+When iterating over objects, each element is a **value** (not a key-value
+pair). Use `it.key()` and `it.value()` on an explicit iterator, or use
+`items()`:
+
+```cpp
+json j = {{"name", "alice"}, {"age", 30}};
+
+// Method 1: explicit iterator
+for (auto it = j.begin(); it != j.end(); ++it) {
+ std::cout << it.key() << ": " << it.value() << "\n";
+}
+
+// Method 2: range-for gives values only
+for (const auto& val : j) {
+ // val is the value, key is not accessible
+}
+```
+
+## `items()` — Key-Value Iteration
+
+```cpp
+iteration_proxy<iterator> items() noexcept;
+iteration_proxy<const_iterator> items() const noexcept;
+```
+
+Returns an `iteration_proxy` that wraps the iterator to provide `key()`
+and `value()` accessors in range-based for loops:
+
+```cpp
+json j = {{"name", "alice"}, {"age", 30}, {"active", true}};
+
+for (auto& [key, value] : j.items()) {
+ std::cout << key << " = " << value << "\n";
+}
+```
+
+### Array Keys
+
+For arrays, `items()` synthesizes string keys from the index:
+
+```cpp
+json j = {"a", "b", "c"};
+
+for (auto& [key, value] : j.items()) {
+ std::cout << key << ": " << value << "\n";
+}
+// Output:
+// 0: "a"
+// 1: "b"
+// 2: "c"
+```
+
+### Primitive Keys
+
+For primitives, the key is always `""` (empty string):
+
+```cpp
+json j = 42;
+for (auto& [key, value] : j.items()) {
+ // key == "", value == 42
+}
+```
+
+## `iteration_proxy` Implementation
+
+Defined in `include/nlohmann/detail/iterators/iteration_proxy.hpp`:
+
+```cpp
+template<typename IteratorType>
+class iteration_proxy
+{
+ class iteration_proxy_value
+ {
+ IteratorType anchor; // underlying iterator
+ std::size_t array_index = 0; // cached index for arrays
+ mutable std::size_t array_index_last = 0;
+ mutable string_type array_index_str = "0";
+
+ const string_type& key() const;
+ typename IteratorType::reference value() const;
+ };
+
+public:
+ iteration_proxy_value begin() const noexcept;
+ iteration_proxy_value end() const noexcept;
+};
+```
+
+The `key()` method dispatches based on the value type:
+- **Array**: converts `array_index` to string (`std::to_string`)
+- **Object**: returns `anchor.key()`
+- **Other**: returns empty string
+
+## Structured Bindings
+
+C++17 structured bindings work with `items()`:
+
+```cpp
+json j = {{"x", 1}, {"y", 2}};
+for (const auto& [key, value] : j.items()) {
+ // key is const std::string&, value is const json&
+}
+```
+
+This is enabled by specializations in `<nlohmann/detail/iterators/iteration_proxy.hpp>`:
+
+```cpp
+namespace std {
+ template<std::size_t N, typename IteratorType>
+ struct tuple_element<N, ::nlohmann::detail::iteration_proxy_value<IteratorType>>;
+
+ template<typename IteratorType>
+ struct tuple_size<::nlohmann::detail::iteration_proxy_value<IteratorType>>;
+}
+```
+
+## Iterator Arithmetic (Arrays Only)
+
+Array iterators support random access:
+
+```cpp
+json j = {10, 20, 30, 40, 50};
+
+auto it = j.begin();
+it += 2; // points to 30
+auto it2 = it + 1; // points to 40
+auto diff = it2 - it; // 1
+
+j[it - j.begin()]; // equivalent of *it
+```
+
+Object iterators support only increment and decrement (bidirectional).
+
+## `json_reverse_iterator`
+
+Extends `std::reverse_iterator` with `key()` and `value()` methods:
+
+```cpp
+template<typename Base>
+class json_reverse_iterator : public std::reverse_iterator<Base>
+{
+public:
+ // Inherited from std::reverse_iterator:
+ // operator*, operator->, operator++, operator--, operator+, operator- ...
+
+ // Added:
+ const typename Base::key_type& key() const;
+ typename Base::reference value() const;
+};
+```
+
+```cpp
+json j = {{"a", 1}, {"b", 2}, {"c", 3}};
+for (auto it = j.rbegin(); it != j.rend(); ++it) {
+ std::cout << it.key() << ": " << it.value() << "\n";
+}
+// Output (reversed iteration order)
+```
+
+## Iterator Invalidation
+
+Iterator invalidation follows the rules of the underlying containers:
+
+| Operation | Object (`std::map`) | Array (`std::vector`) |
+|---|---|---|
+| `push_back()` | Not invalidated | May invalidate all |
+| `insert()` | Not invalidated | Invalidates at/after pos |
+| `erase()` | Only erased | At/after erased pos |
+| `clear()` | All invalidated | All invalidated |
+| `operator[]` (new key) | Not invalidated | May invalidate all |
+
+For `ordered_json` (backed by `std::vector`), all iterators may be
+invalidated on any insertion/erasure since the ordered_map inherits from
+`std::vector`.
+
+## Iterating Null Values
+
+Null values behave as empty containers:
+
+```cpp
+json j; // null
+for (const auto& el : j) {
+ // never executes
+}
+assert(j.begin() == j.end());
+```
+
+## Complete Example
+
+```cpp
+#include <nlohmann/json.hpp>
+#include <iostream>
+
+using json = nlohmann::json;
+
+int main() {
+ json config = {
+ {"server", {
+ {"host", "localhost"},
+ {"port", 8080},
+ {"features", {"auth", "logging", "metrics"}}
+ }},
+ {"debug", false}
+ };
+
+ // Iterate top-level keys
+ for (auto& [key, value] : config.items()) {
+ std::cout << key << " [" << value.type_name() << "]\n";
+ }
+
+ // Iterate nested array
+ for (const auto& feature : config["server"]["features"]) {
+ std::cout << " feature: " << feature << "\n";
+ }
+
+ // Reverse iterate
+ auto& features = config["server"]["features"];
+ for (auto it = features.rbegin(); it != features.rend(); ++it) {
+ std::cout << " reverse: " << *it << "\n";
+ }
+}
+```
diff --git a/docs/handbook/json4cpp/json-patch.md b/docs/handbook/json4cpp/json-patch.md
new file mode 100644
index 0000000000..4c9de8fad5
--- /dev/null
+++ b/docs/handbook/json4cpp/json-patch.md
@@ -0,0 +1,341 @@
+# json4cpp — JSON Patch & Merge Patch
+
+## JSON Patch (RFC 6902)
+
+JSON Patch defines a JSON document structure for expressing a sequence of
+operations to apply to a JSON document.
+
+### `patch()`
+
+```cpp
+basic_json patch(const basic_json& json_patch) const;
+```
+
+Returns a new JSON value with the patch applied. Does not modify the
+original. Throws `parse_error::104` if the patch document is malformed.
+
+```cpp
+json doc = {
+ {"name", "alice"},
+ {"age", 30},
+ {"scores", {90, 85}}
+};
+
+json patch = json::array({
+ {{"op", "replace"}, {"path", "/name"}, {"value", "bob"}},
+ {{"op", "add"}, {"path", "/scores/-"}, {"value", 95}},
+ {{"op", "remove"}, {"path", "/age"}}
+});
+
+json result = doc.patch(patch);
+// {"name": "bob", "scores": [90, 85, 95]}
+```
+
+### `patch_inplace()`
+
+```cpp
+void patch_inplace(const basic_json& json_patch);
+```
+
+Applies the patch directly to the JSON value (modifying in place):
+
+```cpp
+json doc = {{"key", "old"}};
+doc.patch_inplace(json::array({
+ {{"op", "replace"}, {"path", "/key"}, {"value", "new"}}
+}));
+// doc is now {"key": "new"}
+```
+
+### Patch Operations
+
+Each operation is a JSON object with an `"op"` field and operation-specific
+fields:
+
+#### `add`
+
+Adds a value at the target location. If the target exists and is in an
+object, it is replaced. If the target is in an array, the value is inserted
+before the specified index.
+
+```json
+{"op": "add", "path": "/a/b", "value": 42}
+```
+
+The path's parent must exist. The `-` token appends to arrays:
+
+```cpp
+json doc = {{"arr", {1, 2}}};
+json p = json::array({{{"op", "add"}, {"path", "/arr/-"}, {"value", 3}}});
+doc.patch(p); // {"arr": [1, 2, 3]}
+```
+
+#### `remove`
+
+Removes the value at the target location:
+
+```json
+{"op": "remove", "path": "/a/b"}
+```
+
+Throws `out_of_range` if the path does not exist.
+
+#### `replace`
+
+Replaces the value at the target location (equivalent to `remove` + `add`):
+
+```json
+{"op": "replace", "path": "/name", "value": "bob"}
+```
+
+Throws `out_of_range` if the path does not exist.
+
+#### `move`
+
+Moves a value from one location to another:
+
+```json
+{"op": "move", "from": "/a/b", "path": "/c/d"}
+```
+
+Equivalent to `remove` from source + `add` to target. The `from` path
+must not be a prefix of the `path`.
+
+#### `copy`
+
+Copies a value from one location to another:
+
+```json
+{"op": "copy", "from": "/a/b", "path": "/c/d"}
+```
+
+#### `test`
+
+Tests that the value at the target location equals the specified value:
+
+```json
+{"op": "test", "path": "/name", "value": "alice"}
+```
+
+If the test fails, `patch()` throws `other_error::501`:
+
+```cpp
+json doc = {{"name", "alice"}};
+json p = json::array({
+ {{"op", "test"}, {"path", "/name"}, {"value", "bob"}}
+});
+
+try {
+ doc.patch(p);
+} catch (json::other_error& e) {
+ // [json.exception.other_error.501] unsuccessful: ...
+}
+```
+
+### Patch Validation
+
+The `patch()` method validates each operation:
+- `op` must be one of: `add`, `remove`, `replace`, `move`, `copy`, `test`
+- `path` is required for all operations
+- `value` is required for `add`, `replace`, `test`
+- `from` is required for `move`, `copy`
+
+Missing or invalid fields throw `parse_error::105`.
+
+### Operation Order
+
+Operations are applied sequentially. Each operation acts on the result of
+the previous one:
+
+```cpp
+json doc = {};
+json ops = json::array({
+ {{"op", "add"}, {"path", "/a"}, {"value", 1}},
+ {{"op", "add"}, {"path", "/b"}, {"value", 2}},
+ {{"op", "replace"}, {"path", "/a"}, {"value", 10}},
+ {{"op", "remove"}, {"path", "/b"}}
+});
+
+json result = doc.patch(ops);
+// {"a": 10}
+```
+
+## `diff()` — Computing Patches
+
+```cpp
+static basic_json diff(const basic_json& source,
+ const basic_json& target,
+ const string_t& path = "");
+```
+
+Generates a JSON Patch that transforms `source` into `target`:
+
+```cpp
+json source = {{"name", "alice"}, {"age", 30}};
+json target = {{"name", "alice"}, {"age", 31}, {"city", "wonderland"}};
+
+json patch = json::diff(source, target);
+// [
+// {"op": "replace", "path": "/age", "value": 31},
+// {"op": "add", "path": "/city", "value": "wonderland"}
+// ]
+
+// Verify roundtrip
+assert(source.patch(patch) == target);
+```
+
+### Diff Algorithm
+
+The algorithm works recursively:
+1. If `source == target`, produce no operations
+2. If types differ, produce a `replace` operation
+3. If both are objects:
+ - Keys in `source` but not `target` → `remove`
+ - Keys in `target` but not `source` → `add`
+ - Keys in both with different values → recurse
+4. If both are arrays:
+ - Compare element-by-element
+ - Produce `replace` for changed elements
+ - Produce `add` for extra elements in target
+ - Produce `remove` for extra elements in source
+5. For primitives with different values → `replace`
+
+Note: The generated patch uses only `add`, `remove`, and `replace`
+operations (not `move` or `copy`).
+
+### Custom Base Path
+
+The `path` parameter sets a prefix for all generated paths:
+
+```cpp
+json patch = json::diff(a, b, "/config");
+// All paths will start with "/config/..."
+```
+
+## Merge Patch (RFC 7396)
+
+Merge Patch is a simpler alternative to JSON Patch. Instead of an array of
+operations, a merge patch is a JSON object that describes the desired
+changes directly.
+
+### `merge_patch()`
+
+```cpp
+void merge_patch(const basic_json& apply_patch);
+```
+
+Applies a merge patch to the JSON value in place:
+
+```cpp
+json doc = {
+ {"title", "Hello"},
+ {"author", {{"name", "alice"}}},
+ {"tags", {"example"}}
+};
+
+json patch = {
+ {"title", "Goodbye"},
+ {"author", {{"name", "bob"}}},
+ {"tags", nullptr} // null means "remove"
+};
+
+doc.merge_patch(patch);
+// {
+// "title": "Goodbye",
+// "author": {"name": "bob"},
+// }
+// "tags" was removed because the patch value was null
+```
+
+### Merge Patch Rules
+
+The merge patch algorithm (per RFC 7396):
+
+1. If the patch is not an object, replace the target entirely
+2. If the patch is an object:
+ - For each key in the patch:
+ - If the value is `null`, remove the key from the target
+ - Otherwise, recursively merge_patch the target's key with the value
+
+```cpp
+// Partial update — only specified fields change
+json config = {{"debug", false}, {"port", 8080}, {"host", "0.0.0.0"}};
+
+config.merge_patch({{"port", 9090}});
+// {"debug": false, "port": 9090, "host": "0.0.0.0"}
+
+config.merge_patch({{"debug", nullptr}});
+// {"port": 9090, "host": "0.0.0.0"}
+```
+
+### Limitations of Merge Patch
+
+- Cannot set a value to `null` (null means "delete")
+- Cannot manipulate arrays — arrays are replaced entirely
+- Cannot express "move" or "copy" semantics
+
+```cpp
+json doc = {{"items", {1, 2, 3}}};
+doc.merge_patch({{"items", {4, 5}}});
+// {"items": [4, 5]} — array replaced, not merged
+```
+
+## JSON Patch vs. Merge Patch
+
+| Feature | JSON Patch (RFC 6902) | Merge Patch (RFC 7396) |
+|---|---|---|
+| Format | Array of operations | JSON object |
+| Operations | add, remove, replace, move, copy, test | Implicit merge |
+| Array handling | Per-element operations | Replace entire array |
+| Set value to null | Yes (explicit `add`/`replace`) | No (null = delete) |
+| Test assertions | Yes (`test` op) | No |
+| Reversibility | Can `diff()` to reverse | No |
+| Complexity | More verbose | Simpler |
+
+## Complete Example
+
+```cpp
+#include <nlohmann/json.hpp>
+#include <iostream>
+
+using json = nlohmann::json;
+
+int main() {
+ // Original document
+ json doc = {
+ {"name", "Widget"},
+ {"version", "1.0"},
+ {"settings", {
+ {"color", "blue"},
+ {"size", 10},
+ {"enabled", true}
+ }},
+ {"tags", {"production", "stable"}}
+ };
+
+ // JSON Patch: precise operations
+ json patch = json::array({
+ {{"op", "replace"}, {"path", "/version"}, {"value", "2.0"}},
+ {{"op", "add"}, {"path", "/settings/theme"}, {"value", "dark"}},
+ {{"op", "remove"}, {"path", "/settings/size"}},
+ {{"op", "add"}, {"path", "/tags/-"}, {"value", "updated"}},
+ {{"op", "test"}, {"path", "/name"}, {"value", "Widget"}}
+ });
+
+ json patched = doc.patch(patch);
+
+ // Compute diff to verify
+ json computed_patch = json::diff(doc, patched);
+ assert(doc.patch(computed_patch) == patched);
+
+ // Merge Patch: simple update
+ json merge = {
+ {"version", "2.1"},
+ {"settings", {{"color", "red"}}},
+ {"tags", nullptr} // remove tags
+ };
+
+ patched.merge_patch(merge);
+ std::cout << patched.dump(2) << "\n";
+}
+```
diff --git a/docs/handbook/json4cpp/json-pointer.md b/docs/handbook/json4cpp/json-pointer.md
new file mode 100644
index 0000000000..0fb0283fe9
--- /dev/null
+++ b/docs/handbook/json4cpp/json-pointer.md
@@ -0,0 +1,361 @@
+# json4cpp — JSON Pointer (RFC 6901)
+
+## Overview
+
+JSON Pointer (RFC 6901) provides a string syntax for identifying a specific
+value within a JSON document. The library implements this as the
+`json_pointer` class template, defined in
+`include/nlohmann/detail/json_pointer.hpp`.
+
+```cpp
+template<typename RefStringType>
+class json_pointer
+{
+ friend class basic_json;
+
+ std::vector<string_t> reference_tokens; // parsed path segments
+};
+```
+
+The default alias is:
+
+```cpp
+using json_pointer = json_pointer<std::string>;
+```
+
+## Syntax
+
+A JSON Pointer is a string of zero or more tokens separated by `/`:
+
+```
+"" → whole document
+"/foo" → key "foo" in root object
+"/foo/0" → first element of array at key "foo"
+"/a~1b" → key "a/b" (escaped /)
+"/m~0n" → key "m~n" (escaped ~)
+```
+
+### Escape Sequences
+
+| Sequence | Represents |
+|---|---|
+| `~0` | `~` |
+| `~1` | `/` |
+
+Escaping is applied **before** splitting (per RFC 6901 §3).
+
+## Construction
+
+### From String
+
+```cpp
+json_pointer(const string_t& s = "");
+```
+
+Parses the pointer string and populates `reference_tokens`. Throws
+`parse_error::107` if the string is not a valid JSON Pointer (e.g.,
+a non-empty string that doesn't start with `/`):
+
+```cpp
+json_pointer ptr("/foo/bar/0");
+
+// Invalid:
+// json_pointer ptr("foo"); // parse_error::107 — must start with /
+```
+
+### User-Defined Literal
+
+```cpp
+using namespace nlohmann::literals;
+
+auto ptr = "/server/host"_json_pointer;
+```
+
+## Accessing Values
+
+### `operator[]` with Pointer
+
+```cpp
+json j = {{"server", {{"host", "localhost"}, {"port", 8080}}}};
+
+j["/server/host"_json_pointer]; // "localhost"
+j["/server/port"_json_pointer]; // 8080
+j["/server"_json_pointer]; // {"host":"localhost","port":8080}
+```
+
+### `at()` with Pointer
+
+```cpp
+json j = {{"a", {{"b", 42}}}};
+
+j.at("/a/b"_json_pointer); // 42
+j.at("/a/missing"_json_pointer); // throws out_of_range::403
+```
+
+### `value()` with Pointer
+
+```cpp
+json j = {{"timeout", 30}};
+
+j.value("/timeout"_json_pointer, 60); // 30
+j.value("/retries"_json_pointer, 3); // 3 (key not found, returns default)
+```
+
+### `contains()` with Pointer
+
+```cpp
+json j = {{"a", {{"b", 42}}}};
+
+j.contains("/a/b"_json_pointer); // true
+j.contains("/a/c"_json_pointer); // false
+j.contains("/x"_json_pointer); // false
+```
+
+## Pointer Manipulation
+
+### `to_string()`
+
+```cpp
+string_t to_string() const;
+```
+
+Reconstructs the pointer string with proper escaping:
+
+```cpp
+json_pointer ptr("/a~1b/0");
+ptr.to_string(); // "/a~1b/0"
+```
+
+### `operator string_t()`
+
+Implicit conversion to string (same as `to_string()`).
+
+### `operator/=` — Append Token
+
+```cpp
+json_pointer& operator/=(const string_t& token);
+json_pointer& operator/=(std::size_t array_index);
+```
+
+Appends a reference token:
+
+```cpp
+json_pointer ptr("/a");
+ptr /= "b"; // "/a/b"
+ptr /= 0; // "/a/b/0"
+```
+
+### `operator/` — Concatenate
+
+```cpp
+friend json_pointer operator/(const json_pointer& lhs, const string_t& token);
+friend json_pointer operator/(const json_pointer& lhs, std::size_t array_index);
+friend json_pointer operator/(const json_pointer& lhs, const json_pointer& rhs);
+```
+
+```cpp
+auto ptr = "/a"_json_pointer / "b" / 0; // "/a/b/0"
+auto combined = "/a"_json_pointer / "/b/c"_json_pointer; // "/a/b/c"
+```
+
+### `parent_pointer()`
+
+```cpp
+json_pointer parent_pointer() const;
+```
+
+Returns the parent pointer (all tokens except the last):
+
+```cpp
+auto ptr = "/a/b/c"_json_pointer;
+ptr.parent_pointer().to_string(); // "/a/b"
+
+auto root = ""_json_pointer;
+root.parent_pointer().to_string(); // "" (root's parent is root)
+```
+
+### `back()`
+
+```cpp
+const string_t& back() const;
+```
+
+Returns the last reference token:
+
+```cpp
+auto ptr = "/a/b/c"_json_pointer;
+ptr.back(); // "c"
+```
+
+Throws `out_of_range::405` if the pointer is empty (root).
+
+### `push_back()`
+
+```cpp
+void push_back(const string_t& token);
+void push_back(string_t&& token);
+```
+
+Appends a token:
+
+```cpp
+json_pointer ptr;
+ptr.push_back("a");
+ptr.push_back("b");
+ptr.to_string(); // "/a/b"
+```
+
+### `pop_back()`
+
+```cpp
+void pop_back();
+```
+
+Removes the last token:
+
+```cpp
+auto ptr = "/a/b/c"_json_pointer;
+ptr.pop_back();
+ptr.to_string(); // "/a/b"
+```
+
+Throws `out_of_range::405` if the pointer is empty.
+
+### `empty()`
+
+```cpp
+bool empty() const noexcept;
+```
+
+Returns `true` if the pointer has no reference tokens (i.e., it refers to
+the whole document):
+
+```cpp
+json_pointer("").empty(); // true (root pointer)
+json_pointer("/a").empty(); // false
+```
+
+## Array Indexing
+
+JSON Pointer uses string tokens for array indices. The token `"0"` refers
+to the first element, `"1"` to the second, etc.:
+
+```cpp
+json j = {"a", "b", "c"};
+
+j["/0"_json_pointer]; // "a"
+j["/1"_json_pointer]; // "b"
+j["/2"_json_pointer]; // "c"
+```
+
+### The `-` Token
+
+The special token `-` refers to the "past-the-end" position in an array.
+It can be used with `operator[]` to **append** to an array:
+
+```cpp
+json j = {1, 2, 3};
+j["/-"_json_pointer] = 4;
+// j is now [1, 2, 3, 4]
+```
+
+Using `-` with `at()` throws `out_of_range::402` since there's no element
+at that position.
+
+## `flatten()` and `unflatten()`
+
+### `flatten()`
+
+```cpp
+basic_json flatten() const;
+```
+
+Converts a nested JSON value into a flat object where each key is a JSON
+Pointer and each value is a primitive:
+
+```cpp
+json j = {
+ {"name", "alice"},
+ {"address", {
+ {"city", "wonderland"},
+ {"zip", "12345"}
+ }},
+ {"scores", {90, 85, 92}}
+};
+
+json flat = j.flatten();
+// {
+// "/name": "alice",
+// "/address/city": "wonderland",
+// "/address/zip": "12345",
+// "/scores/0": 90,
+// "/scores/1": 85,
+// "/scores/2": 92
+// }
+```
+
+### `unflatten()`
+
+```cpp
+basic_json unflatten() const;
+```
+
+The inverse of `flatten()`. Reconstructs a nested structure from a flat
+pointer-keyed object:
+
+```cpp
+json flat = {
+ {"/a/b", 1},
+ {"/a/c", 2},
+ {"/d", 3}
+};
+
+json nested = flat.unflatten();
+// {"a": {"b": 1, "c": 2}, "d": 3}
+```
+
+Throws `type_error::314` if a value is not primitive, or
+`type_error::315` if values at a path conflict (e.g., both
+`/a` and `/a/b` have values).
+
+### Roundtrip
+
+```cpp
+json j = /* any JSON value */;
+assert(j == j.flatten().unflatten());
+```
+
+Note: `unflatten()` cannot reconstruct arrays from flattened form since
+numeric keys (`/0`, `/1`) become object keys. The result will have
+object-typed containers where the original had arrays.
+
+## Internal Implementation
+
+### Token Resolution
+
+The `get_checked()` and `get_unchecked()` methods resolve a pointer
+against a JSON value by walking through the reference tokens:
+
+```cpp
+// Simplified logic
+BasicJsonType* ptr = &value;
+for (const auto& token : reference_tokens) {
+ if (ptr->is_object()) {
+ ptr = &ptr->at(token);
+ } else if (ptr->is_array()) {
+ ptr = &ptr->at(std::stoi(token));
+ }
+}
+return *ptr;
+```
+
+### Error IDs
+
+| ID | Condition |
+|---|---|
+| `parse_error::107` | Invalid pointer syntax |
+| `out_of_range::401` | Array index out of range |
+| `out_of_range::402` | Array index `-` used with `at()` |
+| `out_of_range::403` | Key not found in object |
+| `out_of_range::404` | Unresolved reference token |
+| `out_of_range::405` | `back()` / `pop_back()` on empty pointer |
diff --git a/docs/handbook/json4cpp/overview.md b/docs/handbook/json4cpp/overview.md
new file mode 100644
index 0000000000..6737ebcc6a
--- /dev/null
+++ b/docs/handbook/json4cpp/overview.md
@@ -0,0 +1,330 @@
+# json4cpp — Overview
+
+## What is json4cpp?
+
+json4cpp is the Project-Tick vendored copy of **nlohmann/json** (version 3.12.0),
+a header-only C++ library for working with JSON data. Created by Niels Lohmann,
+it provides a first-class JSON type (`nlohmann::json`) that behaves like an STL
+container and integrates seamlessly with modern C++ idioms.
+
+The library is designed around one central class template:
+
+```cpp
+template<
+ template<typename U, typename V, typename... Args> class ObjectType = std::map,
+ template<typename U, typename... Args> class ArrayType = std::vector,
+ class StringType = std::string,
+ class BooleanType = bool,
+ class NumberIntegerType = std::int64_t,
+ class NumberUnsignedType = std::uint64_t,
+ class NumberFloatType = double,
+ template<typename U> class AllocatorType = std::allocator,
+ template<typename T, typename SFINAE = void> class JSONSerializer = adl_serializer,
+ class BinaryType = std::vector<std::uint8_t>,
+ class CustomBaseClass = void
+>
+class basic_json;
+```
+
+The default specialization is the convenient type alias:
+
+```cpp
+using json = basic_json<>;
+```
+
+An insertion-order-preserving variant is also provided:
+
+```cpp
+using ordered_json = basic_json<nlohmann::ordered_map>;
+```
+
+## Key Features
+
+### Header-Only Design
+
+The entire library ships in a single header (`single_include/nlohmann/json.hpp`)
+or as a multi-header tree rooted at `include/nlohmann/json.hpp`. No compilation
+of library code is needed — just `#include` and use.
+
+The multi-header layout, used when `JSON_MultipleHeaders` is ON in CMake,
+breaks the implementation into focused files under `include/nlohmann/detail/`:
+
+| Directory / File | Purpose |
+|---|---|
+| `detail/value_t.hpp` | `value_t` enumeration of JSON types |
+| `detail/exceptions.hpp` | Exception hierarchy (`parse_error`, `type_error`, etc.) |
+| `detail/json_pointer.hpp` | RFC 6901 JSON Pointer |
+| `detail/input/lexer.hpp` | Tokenizer / lexical analyzer |
+| `detail/input/parser.hpp` | Recursive-descent parser |
+| `detail/input/json_sax.hpp` | SAX interface and DOM builders |
+| `detail/input/binary_reader.hpp` | CBOR / MessagePack / UBJSON / BSON / BJData reader |
+| `detail/input/input_adapters.hpp` | Input source abstraction (file, stream, string, iterators) |
+| `detail/output/serializer.hpp` | JSON text serializer with UTF-8 validation |
+| `detail/output/binary_writer.hpp` | Binary format writers |
+| `detail/output/output_adapters.hpp` | Output sink abstraction |
+| `detail/iterators/iter_impl.hpp` | Iterator implementation |
+| `detail/iterators/iteration_proxy.hpp` | `items()` proxy for key-value iteration |
+| `detail/conversions/from_json.hpp` | Default `from_json()` overloads |
+| `detail/conversions/to_json.hpp` | Default `to_json()` overloads |
+| `detail/macro_scope.hpp` | Configuration macros, `NLOHMANN_DEFINE_TYPE_*` |
+| `detail/meta/type_traits.hpp` | SFINAE helpers and concept checks |
+
+### Intuitive Syntax
+
+```cpp
+#include <nlohmann/json.hpp>
+using json = nlohmann::json;
+
+// Create a JSON object
+json j = {
+ {"name", "Project-Tick"},
+ {"version", 3},
+ {"features", {"parsing", "serialization", "patch"}},
+ {"active", true}
+};
+
+// Access values
+std::string name = j["name"];
+int version = j.at("version");
+
+// Iterate
+for (auto& [key, val] : j.items()) {
+ std::cout << key << ": " << val << "\n";
+}
+
+// Serialize
+std::string pretty = j.dump(4);
+```
+
+### STL Container Compatibility
+
+`basic_json` models an STL container — it defines the standard type aliases
+and fulfills the Container concept requirements:
+
+```cpp
+// Container type aliases defined by basic_json:
+using value_type = basic_json;
+using reference = value_type&;
+using const_reference = const value_type&;
+using difference_type = std::ptrdiff_t;
+using size_type = std::size_t;
+using allocator_type = AllocatorType<basic_json>;
+using pointer = typename std::allocator_traits<allocator_type>::pointer;
+using const_pointer = typename std::allocator_traits<allocator_type>::const_pointer;
+using iterator = iter_impl<basic_json>;
+using const_iterator = iter_impl<const basic_json>;
+using reverse_iterator = json_reverse_iterator<typename basic_json::iterator>;
+using const_reverse_iterator = json_reverse_iterator<typename basic_json::const_iterator>;
+```
+
+This means `basic_json` works with STL algorithms:
+
+```cpp
+json arr = {3, 1, 4, 1, 5};
+std::sort(arr.begin(), arr.end());
+auto it = std::find(arr.begin(), arr.end(), 4);
+```
+
+### Implicit Type Conversions
+
+By default (`JSON_USE_IMPLICIT_CONVERSIONS=1`), values can be implicitly
+converted to native C++ types:
+
+```cpp
+json j = 42;
+int x = j; // implicit conversion
+std::string s = j; // throws type_error::302 — type mismatch
+```
+
+This can be disabled at compile time with `-DJSON_ImplicitConversions=OFF`
+(sets `JSON_USE_IMPLICIT_CONVERSIONS` to 0), requiring explicit `.get<T>()`
+calls instead.
+
+### Comprehensive JSON Value Types
+
+Every JSON value type maps to a C++ type through the `value_t` enumeration
+defined in `detail/value_t.hpp`:
+
+| JSON Type | `value_t` Enumerator | C++ Storage Type | Default |
+|---|---|---|---|
+| Object | `value_t::object` | `object_t*` | `std::map<std::string, basic_json>` |
+| Array | `value_t::array` | `array_t*` | `std::vector<basic_json>` |
+| String | `value_t::string` | `string_t*` | `std::string` |
+| Boolean | `value_t::boolean` | `boolean_t` | `bool` |
+| Integer | `value_t::number_integer` | `number_integer_t` | `std::int64_t` |
+| Unsigned | `value_t::number_unsigned` | `number_unsigned_t` | `std::uint64_t` |
+| Float | `value_t::number_float` | `number_float_t` | `double` |
+| Binary | `value_t::binary` | `binary_t*` | `byte_container_with_subtype<vector<uint8_t>>` |
+| Null | `value_t::null` | (none) | — |
+| Discarded | `value_t::discarded` | (none) | — |
+
+Variable-length types (object, array, string, binary) are stored as heap
+pointers to keep the `json_value` union at 8 bytes on 64-bit platforms.
+
+### Binary Format Support
+
+Beyond JSON text, the library supports round-trip conversion to and from
+several binary serialization formats:
+
+- **CBOR** (RFC 7049) — `to_cbor()` / `from_cbor()`
+- **MessagePack** — `to_msgpack()` / `from_msgpack()`
+- **UBJSON** — `to_ubjson()` / `from_ubjson()`
+- **BSON** (MongoDB) — `to_bson()` / `from_bson()`
+- **BJData** — `to_bjdata()` / `from_bjdata()`
+
+### RFC Compliance
+
+| Feature | Specification |
+|---|---|
+| JSON Pointer | RFC 6901 — navigating JSON documents with path syntax |
+| JSON Patch | RFC 6902 — describing mutations as operation arrays |
+| JSON Merge Patch | RFC 7396 — simplified document merging |
+
+### SAX-Style Parsing
+
+For memory-constrained scenarios or streaming, the SAX interface
+(`json_sax<BasicJsonType>`) allows event-driven parsing without building
+a DOM tree in memory.
+
+### Custom Type Serialization
+
+The ADL-based serializer architecture lets users define `to_json()` and
+`from_json()` free functions for any user-defined type. Convenience macros
+automate this:
+
+- `NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, member1, member2, ...)`
+- `NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, member1, member2, ...)`
+- `NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...)`
+- `NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...)`
+- `NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, ...)`
+- `NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, ...)`
+- `NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE(Type, BaseType, ...)`
+
+### User-Defined Literals
+
+When `JSON_USE_GLOBAL_UDLS` is enabled (the default), two string literals
+are available globally:
+
+```cpp
+auto j = R"({"key": "value"})"_json;
+auto ptr = "/key"_json_pointer;
+```
+
+These are always available as `nlohmann::literals::json_literals::operator""_json`
+and `operator""_json_pointer`.
+
+## Version Information
+
+The library provides compile-time version macros:
+
+```cpp
+NLOHMANN_JSON_VERSION_MAJOR // 3
+NLOHMANN_JSON_VERSION_MINOR // 12
+NLOHMANN_JSON_VERSION_PATCH // 0
+```
+
+And a runtime introspection method:
+
+```cpp
+json meta = json::meta();
+// Returns:
+// {
+// "copyright": "(C) 2013-2026 Niels Lohmann",
+// "name": "JSON for Modern C++",
+// "url": "https://github.com/nlohmann/json",
+// "version": {"string": "3.12.0", "major": 3, "minor": 12, "patch": 0},
+// "compiler": {...},
+// "platform": "linux"
+// }
+```
+
+## Compiler Support
+
+The library requires C++11 at minimum. Higher standard modes unlock
+additional features:
+
+| Standard | Features Enabled |
+|---|---|
+| C++11 | Full library functionality |
+| C++14 | `constexpr` support for `get<>()`, transparent comparators (`std::less<>`) |
+| C++17 | `std::string_view` support, `std::any` integration, `if constexpr` |
+| C++20 | Three-way comparison (`<=>` / `std::partial_ordering`), `std::format` |
+
+Automatic detection uses `__cplusplus` (or `_MSVC_LANG` on MSVC) and defines:
+
+- `JSON_HAS_CPP_11` — always 1
+- `JSON_HAS_CPP_14` — C++14 or above
+- `JSON_HAS_CPP_17` — C++17 or above
+- `JSON_HAS_CPP_20` — C++20 or above
+
+## Configuration Macros
+
+The library's behavior is controlled by preprocessor macros, typically set
+via CMake options:
+
+| Macro | CMake Option | Default | Effect |
+|---|---|---|---|
+| `JSON_DIAGNOSTICS` | `JSON_Diagnostics` | `OFF` | Extended diagnostic messages with parent paths |
+| `JSON_DIAGNOSTIC_POSITIONS` | `JSON_Diagnostic_Positions` | `OFF` | Track byte positions in parsed values |
+| `JSON_USE_IMPLICIT_CONVERSIONS` | `JSON_ImplicitConversions` | `ON` | Allow implicit `operator ValueType()` |
+| `JSON_DISABLE_ENUM_SERIALIZATION` | `JSON_DisableEnumSerialization` | `OFF` | Disable automatic enum-to-int conversion |
+| `JSON_USE_GLOBAL_UDLS` | `JSON_GlobalUDLs` | `ON` | Place UDLs in global namespace |
+| `JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON` | `JSON_LegacyDiscardedValueComparison` | `OFF` | Old comparison behavior for discarded values |
+| `JSON_NO_IO` | — | (not set) | Disable `<iosfwd>` / stream operators |
+
+## License
+
+nlohmann/json is released under the **MIT License**.
+
+```
+SPDX-License-Identifier: MIT
+SPDX-FileCopyrightText: 2013-2026 Niels Lohmann <https://nlohmann.me>
+```
+
+## Directory Structure in Project-Tick
+
+```
+json4cpp/
+├── CMakeLists.txt # Top-level build configuration
+├── include/nlohmann/ # Multi-header source tree
+│ ├── json.hpp # Main header
+│ ├── json_fwd.hpp # Forward declarations
+│ ├── adl_serializer.hpp # ADL serializer
+│ ├── byte_container_with_subtype.hpp
+│ ├── ordered_map.hpp # Insertion-order map
+│ └── detail/ # Implementation details
+├── single_include/nlohmann/
+│ ├── json.hpp # Amalgamated single header
+│ └── json_fwd.hpp # Forward declarations
+├── tests/ # Doctest-based test suite
+├── docs/ # Upstream documentation source
+├── tools/ # Code generation and maintenance scripts
+├── cmake/ # CMake modules and configs
+├── BUILD.bazel # Bazel build file
+├── MODULE.bazel # Bazel module definition
+├── Package.swift # Swift Package Manager support
+├── meson.build # Meson build file
+└── Makefile # Convenience Makefile
+```
+
+## Further Reading
+
+The remaining handbook documents cover:
+
+- **architecture.md** — Internal class hierarchy and template structure
+- **building.md** — Integration methods, CMake support, compilation options
+- **basic-usage.md** — Creating JSON values, accessing data, type system
+- **value-types.md** — All JSON value types and their C++ representation
+- **element-access.md** — `operator[]`, `at()`, `value()`, `find()`, `contains()`
+- **iteration.md** — Iterators, range-for, `items()`, structured bindings
+- **serialization.md** — `dump()`, `parse()`, stream I/O, `to_json`/`from_json`
+- **binary-formats.md** — MessagePack, CBOR, BSON, UBJSON, BJData
+- **json-pointer.md** — RFC 6901 JSON Pointer navigation
+- **json-patch.md** — RFC 6902 JSON Patch and RFC 7396 Merge Patch
+- **custom-types.md** — ADL serialization and `NLOHMANN_DEFINE_TYPE_*` macros
+- **parsing-internals.md** — Lexer, parser, and SAX DOM builder internals
+- **exception-handling.md** — Exception types, error IDs, when they are thrown
+- **sax-interface.md** — SAX-style event-driven parsing
+- **performance.md** — Performance characteristics and tuning
+- **code-style.md** — Source code conventions
+- **testing.md** — Test framework and running tests
diff --git a/docs/handbook/json4cpp/parsing-internals.md b/docs/handbook/json4cpp/parsing-internals.md
new file mode 100644
index 0000000000..ecbc946dee
--- /dev/null
+++ b/docs/handbook/json4cpp/parsing-internals.md
@@ -0,0 +1,493 @@
+# json4cpp — Parsing Internals
+
+## Parser Architecture
+
+The parsing pipeline consists of three stages:
+
+```
+Input → InputAdapter → Lexer → Parser → JSON value
+ ↓
+ SAX Handler
+```
+
+1. **Input adapters** normalize various input sources into a uniform byte stream
+2. **Lexer** tokenizes the byte stream into JSON tokens
+3. **Parser** implements a recursive descent parser driven by SAX events
+
+## Input Adapters
+
+Defined in `include/nlohmann/detail/input/input_adapters.hpp`.
+
+### Adapter Hierarchy
+
+```cpp
+// File input
+class file_input_adapter {
+ std::FILE* m_file;
+ std::char_traits<char>::int_type get_character();
+};
+
+// Stream input
+class input_stream_adapter {
+ std::istream* is;
+ std::streambuf* sb;
+ std::char_traits<char>::int_type get_character();
+};
+
+// Iterator-based input
+template<typename IteratorType>
+class iterator_input_adapter {
+ IteratorType current;
+ IteratorType end;
+ std::char_traits<char>::int_type get_character();
+};
+```
+
+All adapters expose a `get_character()` method that returns the next byte
+or `std::char_traits<char>::eof()` at end of input.
+
+### `input_adapter()` Factory
+
+The free function `input_adapter()` selects the appropriate adapter:
+
+```cpp
+// From string/string_view
+auto adapter = input_adapter(std::string("{}"));
+
+// From iterators
+auto adapter = input_adapter(vec.begin(), vec.end());
+
+// From stream
+auto adapter = input_adapter(std::cin);
+```
+
+### Span Input Adapter
+
+For contiguous memory (C++17):
+
+```cpp
+template<typename CharT>
+class contiguous_bytes_input_adapter {
+ const CharT* current;
+ const CharT* end;
+};
+```
+
+This is the fastest adapter since it reads directly from memory without
+virtual dispatch.
+
+## Lexer
+
+Defined in `include/nlohmann/detail/input/lexer.hpp`. The lexer
+(scanner/tokenizer) converts a byte stream into a sequence of tokens.
+
+### Token Types
+
+```cpp
+enum class token_type
+{
+ uninitialized, ///< indicating the scanner is uninitialized
+ literal_true, ///< the 'true' literal
+ literal_false, ///< the 'false' literal
+ literal_null, ///< the 'null' literal
+ value_string, ///< a string (includes the quotes)
+ value_unsigned, ///< an unsigned integer
+ value_integer, ///< a signed integer
+ value_float, ///< a floating-point number
+ begin_array, ///< the character '['
+ begin_object, ///< the character '{'
+ end_array, ///< the character ']'
+ end_object, ///< the character '}'
+ name_separator, ///< the character ':'
+ value_separator, ///< the character ','
+ parse_error, ///< indicating a parse error
+ end_of_input ///< indicating the end of the input buffer
+};
+```
+
+### Lexer Class
+
+```cpp
+template<typename BasicJsonType, typename InputAdapterType>
+class lexer : public lexer_base<BasicJsonType>
+{
+public:
+ using number_integer_t = typename BasicJsonType::number_integer_t;
+ using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+ using number_float_t = typename BasicJsonType::number_float_t;
+ using string_t = typename BasicJsonType::string_t;
+
+ // Main scanning entry point
+ token_type scan();
+
+ // Access scanned values
+ constexpr number_integer_t get_number_integer() const noexcept;
+ constexpr number_unsigned_t get_number_unsigned() const noexcept;
+ constexpr number_float_t get_number_float() const noexcept;
+ string_t& get_string();
+
+ // Error information
+ constexpr position_t get_position() const noexcept;
+ std::string get_token_string() const;
+ const std::string& get_error_message() const noexcept;
+
+private:
+ InputAdapterType ia; // input source
+ char_int_type current; // current character
+ bool next_unget = false; // lookahead flag
+ position_t position {}; // line/column tracking
+ std::vector<char_type> token_string {}; // raw token for error messages
+ string_t token_buffer {}; // decoded string value
+ // Number storage (only one is valid at a time)
+ number_integer_t value_integer = 0;
+ number_unsigned_t value_unsigned = 0;
+ number_float_t value_float = 0;
+};
+```
+
+### Position Tracking
+
+```cpp
+struct position_t
+{
+ std::size_t chars_read_total = 0; // total characters read
+ std::size_t chars_read_current_line = 0; // characters on current line
+ std::size_t lines_read = 0; // lines read (newline count)
+};
+```
+
+### String Scanning
+
+The `scan_string()` method handles:
+- Regular characters
+- Escape sequences: `\"`, `\\`, `\/`, `\b`, `\f`, `\n`, `\r`, `\t`
+- Unicode escapes: `\uXXXX` (including surrogate pairs for `\uD800`–`\uDBFF` + `\uDC00`–`\uDFFF`)
+- UTF-8 validation using a state machine
+
+### Number Scanning
+
+The `scan_number()` method determines the number type:
+
+1. Parse sign (optional `-`)
+2. Parse integer part
+3. If `.` follows → parse fractional part → `value_float`
+4. If `e`/`E` follows → parse exponent → `value_float`
+5. Otherwise, try to fit into `number_integer_t` or `number_unsigned_t`
+
+The method first accumulates the raw characters, then converts:
+- Integers: `std::strtoull` / `std::strtoll`
+- Floats: `std::strtod`
+
+### Comment Scanning
+
+When `ignore_comments` is enabled:
+
+```cpp
+bool scan_comment() {
+ // After seeing '/', check next char:
+ // '/' → scan to end of line (C++ comment)
+ // '*' → scan to '*/' (C comment)
+}
+```
+
+## Parser
+
+Defined in `include/nlohmann/detail/input/parser.hpp`. Implements a
+**recursive descent** parser that generates SAX events.
+
+### Parser Class
+
+```cpp
+template<typename BasicJsonType, typename InputAdapterType>
+class parser
+{
+public:
+ using number_integer_t = typename BasicJsonType::number_integer_t;
+ using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+ using number_float_t = typename BasicJsonType::number_float_t;
+ using string_t = typename BasicJsonType::string_t;
+ using lexer_t = lexer<BasicJsonType, InputAdapterType>;
+
+ parser(InputAdapterType&& adapter,
+ const parser_callback_t<BasicJsonType> cb = nullptr,
+ const bool allow_exceptions_ = true,
+ const bool skip_comments = false,
+ const bool ignore_trailing_commas_ = false);
+
+ void parse(const bool strict, BasicJsonType& result);
+ bool accept(const bool strict = true);
+
+ template<typename SAX>
+ bool sax_parse(SAX* sax, const bool strict = true);
+
+private:
+ template<typename SAX>
+ bool sax_parse_internal(SAX* sax);
+
+ lexer_t m_lexer;
+ token_type last_token = token_type::uninitialized;
+ bool allow_exceptions;
+ bool ignore_trailing_commas;
+};
+```
+
+### Recursive Descent Grammar
+
+The parser implements the JSON grammar:
+
+```
+json → value
+value → object | array | string | number | "true" | "false" | "null"
+object → '{' (pair (',' pair)* ','?)? '}'
+pair → string ':' value
+array → '[' (value (',' value)* ','?)? ']'
+```
+
+The trailing comma handling is optional (controlled by
+`ignore_trailing_commas`).
+
+### SAX-Driven Parsing
+
+The parser calls SAX handler methods as it encounters JSON structure:
+
+```cpp
+template<typename SAX>
+bool sax_parse_internal(SAX* sax)
+{
+ switch (last_token) {
+ case token_type::begin_object:
+ // 1. sax->start_object(...)
+ // 2. For each key-value:
+ // a. sax->key(string)
+ // b. recurse into sax_parse_internal for value
+ // 3. sax->end_object()
+ break;
+ case token_type::begin_array:
+ // 1. sax->start_array(...)
+ // 2. For each element: recurse into sax_parse_internal
+ // 3. sax->end_array()
+ break;
+ case token_type::value_string:
+ return sax->string(m_lexer.get_string());
+ case token_type::value_unsigned:
+ return sax->number_unsigned(m_lexer.get_number_unsigned());
+ case token_type::value_integer:
+ return sax->number_integer(m_lexer.get_number_integer());
+ case token_type::value_float:
+ // Check for NaN: store as null
+ return sax->number_float(m_lexer.get_number_float(), ...);
+ case token_type::literal_true:
+ return sax->boolean(true);
+ case token_type::literal_false:
+ return sax->boolean(false);
+ case token_type::literal_null:
+ return sax->null();
+ default:
+ return sax->parse_error(...);
+ }
+}
+```
+
+### DOM Construction
+
+Two SAX handlers build the DOM tree:
+
+#### `json_sax_dom_parser`
+
+Standard DOM builder. Each SAX event creates or appends to the JSON tree:
+
+```cpp
+template<typename BasicJsonType>
+class json_sax_dom_parser
+{
+ BasicJsonType& root;
+ std::vector<BasicJsonType*> ref_stack; // stack of parent nodes
+ BasicJsonType* object_element = nullptr;
+ bool errored = false;
+ bool allow_exceptions;
+
+ bool null();
+ bool boolean(bool val);
+ bool number_integer(number_integer_t val);
+ bool number_unsigned(number_unsigned_t val);
+ bool number_float(number_float_t val, const string_t& s);
+ bool string(string_t& val);
+ bool binary(binary_t& val);
+ bool start_object(std::size_t elements);
+ bool end_object();
+ bool start_array(std::size_t elements);
+ bool end_array();
+ bool key(string_t& val);
+ bool parse_error(std::size_t position, const std::string& last_token,
+ const detail::exception& ex);
+};
+```
+
+The `ref_stack` tracks the current nesting path. On `start_object()` /
+`start_array()`, a new container is pushed. On `end_object()` /
+`end_array()`, the stack is popped.
+
+#### `json_sax_dom_callback_parser`
+
+Extends the DOM builder with callback support. When the callback returns
+`false`, the value is discarded:
+
+```cpp
+template<typename BasicJsonType>
+class json_sax_dom_callback_parser
+{
+ BasicJsonType& root;
+ std::vector<BasicJsonType*> ref_stack;
+ std::vector<bool> keep_stack; // tracks which values to keep
+ std::vector<bool> key_keep_stack;
+ BasicJsonType* object_element = nullptr;
+ BasicJsonType discarded = BasicJsonType::value_t::discarded;
+ parser_callback_t<BasicJsonType> callback;
+ bool errored = false;
+ bool allow_exceptions;
+};
+```
+
+## `accept()` Method
+
+The `accept()` method checks validity without building a DOM:
+
+```cpp
+bool accept(const bool strict = true);
+```
+
+Internally it uses `json_sax_acceptor` — a SAX handler where all methods
+return `true` (accepting everything) and `parse_error()` returns `false`:
+
+```cpp
+template<typename BasicJsonType>
+struct json_sax_acceptor
+{
+ bool null() { return true; }
+ bool boolean(bool) { return true; }
+ bool number_integer(number_integer_t) { return true; }
+ // ... all return true ...
+ bool parse_error(...) { return false; }
+};
+```
+
+## `sax_parse()` — Static SAX Entry Point
+
+```cpp
+template<typename InputType, typename SAX>
+static bool sax_parse(InputType&& i, SAX* sax,
+ input_format_t format = input_format_t::json,
+ const bool strict = true,
+ const bool ignore_comments = false,
+ const bool ignore_trailing_commas = false);
+```
+
+The `input_format_t` enum selects the parser:
+
+```cpp
+enum class input_format_t {
+ json,
+ cbor,
+ msgpack,
+ ubjson,
+ bson,
+ bjdata
+};
+```
+
+For `json`, the text parser is used. For binary formats, the
+`binary_reader` is used (which also generates SAX events).
+
+## Error Reporting
+
+### Parse Error Format
+
+```
+[json.exception.parse_error.101] parse error at line 3, column 5:
+syntax error while parsing object key - unexpected end of input;
+expected string literal
+```
+
+The error message includes:
+- Exception ID (e.g., 101)
+- Position (line and column, or byte offset)
+- Description of what was expected vs. what was found
+- The last token read (for context)
+
+### Error IDs
+
+| ID | Condition |
+|---|---|
+| 101 | Unexpected token |
+| 102 | `\u` escape with invalid hex digits |
+| 103 | Invalid UTF-8 surrogate pair |
+| 104 | JSON Patch: invalid patch document |
+| 105 | JSON Patch: missing required field |
+| 106 | Invalid number format |
+| 107 | Invalid JSON Pointer syntax |
+| 108 | Invalid Unicode code point |
+| 109 | Invalid UTF-8 byte sequence |
+| 110 | Unrecognized CBOR/MessagePack/UBJSON/BSON marker |
+| 112 | Parse error in BSON |
+| 113 | Parse error in UBJSON |
+| 114 | Parse error in BJData |
+| 115 | Parse error due to incomplete binary data |
+
+### Diagnostic Positions
+
+When `JSON_DIAGNOSTIC_POSITIONS` is enabled at compile time, the library
+tracks byte positions for each value. Error messages then include
+`start_position` and `end_position` for the offending value:
+
+```cpp
+#define JSON_DIAGNOSTICS 1
+#define JSON_DIAGNOSTIC_POSITIONS 1
+```
+
+## Parser Callback Events
+
+The parser callback receives events defined by `parse_event_t`:
+
+```cpp
+enum class parse_event_t : std::uint8_t
+{
+ object_start, // '{' read
+ object_end, // '}' read
+ array_start, // '[' read
+ array_end, // ']' read
+ key, // object key read
+ value // value read
+};
+```
+
+Callback invocation points in the parser:
+1. `object_start` — after `{` is consumed, before any key
+2. `key` — after a key string is consumed, `parsed` = the key string
+3. `value` — after any value is fully parsed, `parsed` = the value
+4. `object_end` — after `}` is consumed, `parsed` = the complete object
+5. `array_start` — after `[` is consumed, before any element
+6. `array_end` — after `]` is consumed, `parsed` = the complete array
+
+### Callback Return Value
+
+- `true` → keep the value
+- `false` → discard (replace with `discarded`)
+
+For container events (`object_start`, `array_start`), returning `false`
+skips the **entire** container and all its contents.
+
+## Performance Characteristics
+
+| Stage | Complexity | Dominant Cost |
+|---|---|---|
+| Input adapter | O(n) | Single pass over input |
+| Lexer | O(n) | Character-by-character scan, string copy |
+| Parser | O(n) | Recursive descent, SAX event dispatch |
+| DOM construction | O(n) | Memory allocation for containers |
+
+The overall parsing complexity is O(n) in the input size. Memory usage is
+proportional to the nesting depth (parser stack) plus the size of the
+resulting DOM (heap allocations for strings, arrays, objects).
+
+For large inputs where the full DOM is not needed, using the SAX interface
+directly avoids DOM construction overhead entirely.
diff --git a/docs/handbook/json4cpp/performance.md b/docs/handbook/json4cpp/performance.md
new file mode 100644
index 0000000000..a35d0bc4b8
--- /dev/null
+++ b/docs/handbook/json4cpp/performance.md
@@ -0,0 +1,275 @@
+# json4cpp — Performance
+
+## Memory Layout
+
+### `json_value` Union
+
+The core storage is a union of 8 members:
+
+```cpp
+union json_value
+{
+ object_t* object; // 8 bytes (pointer)
+ array_t* array; // 8 bytes (pointer)
+ string_t* string; // 8 bytes (pointer)
+ binary_t* binary; // 8 bytes (pointer)
+ boolean_t boolean; // 1 byte
+ number_integer_t number_integer; // 8 bytes
+ number_unsigned_t number_unsigned; // 8 bytes
+ number_float_t number_float; // 8 bytes
+};
+```
+
+The union is **8 bytes** on 64-bit platforms. Variable-length types
+(object, array, string, binary) are stored as heap-allocated pointers to
+keep the union small.
+
+### Total `basic_json` Size
+
+Each `basic_json` node contains:
+
+```cpp
+struct data
+{
+ value_t m_type = value_t::null; // 1 byte (uint8_t enum)
+ // + padding
+ json_value m_value = {}; // 8 bytes
+};
+```
+
+With alignment: **16 bytes per node** on most 64-bit platforms (1 byte
+type + 7 bytes padding + 8 bytes value).
+
+When `JSON_DIAGNOSTICS` is enabled, each node additionally stores a parent
+pointer:
+
+```cpp
+struct data
+{
+ value_t m_type;
+ json_value m_value;
+ const basic_json* m_parent = nullptr; // 8 bytes extra
+};
+```
+
+Total with diagnostics: **24 bytes per node**.
+
+## Allocation Strategy
+
+### Object Storage (default `std::map`)
+
+- Red-black tree nodes: ~48–64 bytes each (key + value + pointers + color)
+- O(log n) lookup, insert, erase
+- Good cache locality within individual nodes, poor across the tree
+
+### Array Storage (`std::vector`)
+
+- Contiguous memory: amortized O(1) push_back
+- Reallocations: capacity doubles, causing copies of all elements
+- Each element is 16 bytes (`basic_json`)
+
+### String Storage (`std::string`)
+
+- SSO (Small String Optimization): strings ≤ ~15 chars stored inline
+ (no allocation). Exact threshold is implementation-defined.
+- Longer strings: heap allocation
+
+## `ordered_map` Performance
+
+`ordered_json` uses `ordered_map<std::string, basic_json>` which inherits
+from `std::vector<std::pair<const Key, T>>`:
+
+| Operation | `std::map` (json) | `ordered_map` (ordered_json) |
+|---|---|---|
+| Lookup by key | O(log n) | O(n) linear search |
+| Insert | O(log n) | O(1) amortized (push_back) |
+| Erase by key | O(log n) | O(n) (shift elements) |
+| Iteration | O(n), sorted order | O(n), insertion order |
+| Memory | Tree nodes (fragmented) | Contiguous vector |
+
+Use `ordered_json` only when insertion order matters and the number of
+keys is small (< ~100).
+
+## Destruction
+
+### Iterative Destruction
+
+Deeply nested JSON values would cause stack overflow with recursive
+destructors. The library uses **iterative destruction**:
+
+```cpp
+void data::destroy(value_t t)
+{
+ if (t == value_t::array || t == value_t::object)
+ {
+ // Move children to a flat list
+ std::vector<basic_json> stack;
+ if (t == value_t::array) {
+ stack.reserve(m_value.array->size());
+ std::move(m_value.array->begin(), m_value.array->end(),
+ std::back_inserter(stack));
+ } else {
+ // Extract values from object pairs
+ for (auto& pair : *m_value.object) {
+ stack.push_back(std::move(pair.second));
+ }
+ }
+ // Continue flattening until stack is empty
+ while (!stack.empty()) {
+ // Pop and flatten nested containers
+ }
+ }
+ // Destroy the container itself
+}
+```
+
+This ensures O(1) stack depth regardless of JSON nesting depth.
+
+### Destruction Cost
+
+- Primitives (null, boolean, number): O(1), no heap deallocation
+- String: O(1), single `delete`
+- Array: O(n), iterative flattening + deallocation of each element
+- Object: O(n), iterative flattening + deallocation of each key-value
+- Binary: O(1), single `delete`
+
+## Parsing Performance
+
+### Lexer Optimizations
+
+- Single-character lookahead (no backtracking)
+- Token string is accumulated in a pre-allocated buffer
+- Number parsing avoids `std::string` intermediate: raw chars → integer or
+ float directly via `strtoull`/`strtod`
+- UTF-8 validation uses a compact state machine (400-byte lookup table)
+
+### Parser Complexity
+
+- O(n) in input size
+- O(d) stack depth where d = maximum nesting depth
+- SAX approach avoids intermediate DOM allocations
+
+### Fastest Parsing Path
+
+For maximum speed:
+1. Use contiguous input (`std::string`, `const char*`, `std::vector<uint8_t>`)
+ — avoids virtual dispatch in input adapter
+2. Disable comments (`ignore_comments = false`)
+3. Disable trailing commas (`ignore_trailing_commas = false`)
+4. No callback (`cb = nullptr`)
+5. Allow exceptions (`allow_exceptions = true`) — avoids extra bookkeeping
+
+## Serialization Performance
+
+### Number Formatting
+
+- **Integers**: Custom digit-by-digit algorithm writing to a 64-byte stack
+ buffer. Faster than `std::to_string` (no `std::string` allocation).
+- **Floats**: `std::snprintf` with `max_digits10` precision. The format
+ string is `%.*g`.
+
+### String Escaping
+
+- ASCII-only strings: nearly zero overhead (copy + quote wrapping)
+- Strings with special characters: per-byte check against escape table
+- `ensure_ascii`: full UTF-8 decode + `\uXXXX` encoding (slower)
+
+### Output Adapter
+
+- `output_string_adapter` (default for `dump()`): writes to `std::string`
+ with `push_back()` / `append()`
+- `output_stream_adapter`: writes to `std::ostream` via `put()` / `write()`
+- `output_vector_adapter`: writes to `std::vector<char>` via `push_back()`
+
+## Compilation Time
+
+Being header-only, json.hpp can add significant compilation time. Strategies:
+
+### Single Include vs. Multi-Header
+
+| Approach | Files | Compilation Model |
+|---|---|---|
+| `single_include/nlohmann/json.hpp` | 1 file (~25K lines) | Include everywhere |
+| `include/nlohmann/json.hpp` | Many small headers | Better incremental builds |
+
+### Reducing Compilation Time
+
+1. **Precompiled headers**: Add `nlohmann/json.hpp` to your PCH
+2. **Forward declarations**: Use `nlohmann/json_fwd.hpp` in headers, full
+ include only in `.cpp` files
+3. **Extern template**: Pre-instantiate in one TU:
+
+```cpp
+// json_instantiation.cpp
+#include <nlohmann/json.hpp>
+template class nlohmann::basic_json<>; // explicit instantiation
+```
+
+4. **Minimize includes**: Only include where actually needed
+
+## Binary Format Performance
+
+Size and speed characteristics compared to JSON text:
+
+| Aspect | JSON Text | CBOR | MessagePack | UBJSON |
+|---|---|---|---|---|
+| Encoding speed | Fast | Fast | Fast | Moderate |
+| Decoding speed | Moderate | Fast | Fast | Moderate |
+| Output size | Largest | Compact | Most compact | Moderate |
+| Human readable | Yes | No | No | No |
+
+Binary formats are generally faster to parse because:
+- No string-to-number conversion (numbers stored in binary)
+- Size-prefixed containers (no scanning for delimiters)
+- No whitespace handling
+- No string escape processing
+
+## Best Practices
+
+### Avoid Copies
+
+```cpp
+// Bad: copies the entire array
+json arr = j["data"];
+
+// Good: reference
+const auto& arr = j["data"];
+```
+
+### Use `get_ref()` for String Access
+
+```cpp
+// Bad: copies the string
+std::string s = j.get<std::string>();
+
+// Good: reference (no copy)
+const auto& s = j.get_ref<const std::string&>();
+```
+
+### Reserve Capacity
+
+```cpp
+json j = json::array();
+// If you know the size, reserve first (via underlying container)
+// The API doesn't expose reserve() directly, but you can:
+json j = json::parse(input); // parser pre-allocates when size hints are available
+```
+
+### SAX for Large Documents
+
+```cpp
+// Bad: loads entire 1GB file into DOM
+json j = json::parse(huge_file);
+
+// Good: process streaming with SAX
+struct my_handler : nlohmann::json_sax<json> { /* ... */ };
+my_handler handler;
+json::sax_parse(huge_file, &handler);
+```
+
+### Move Semantics
+
+```cpp
+json source = get_data();
+json dest = std::move(source); // O(1) move, source becomes null
+```
diff --git a/docs/handbook/json4cpp/sax-interface.md b/docs/handbook/json4cpp/sax-interface.md
new file mode 100644
index 0000000000..3164be9694
--- /dev/null
+++ b/docs/handbook/json4cpp/sax-interface.md
@@ -0,0 +1,337 @@
+# json4cpp — SAX Interface
+
+## Overview
+
+The SAX (Simple API for XML/JSON) interface provides an event-driven
+parsing model. Instead of building a complete DOM tree in memory, the
+parser reports structural events to a handler as it reads the input.
+
+This is useful for:
+- Processing very large JSON documents without loading them fully
+- Filtering or transforming data during parsing
+- Building custom data structures directly from JSON input
+- Reducing memory usage
+
+## `json_sax` Abstract Class
+
+Defined in `include/nlohmann/detail/input/json_sax.hpp`:
+
+```cpp
+template<typename BasicJsonType>
+struct json_sax
+{
+ using number_integer_t = typename BasicJsonType::number_integer_t;
+ using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+ using number_float_t = typename BasicJsonType::number_float_t;
+ using string_t = typename BasicJsonType::string_t;
+ using binary_t = typename BasicJsonType::binary_t;
+
+ virtual bool null() = 0;
+ virtual bool boolean(bool val) = 0;
+ virtual bool number_integer(number_integer_t val) = 0;
+ virtual bool number_unsigned(number_unsigned_t val) = 0;
+ virtual bool number_float(number_float_t val, const string_t& s) = 0;
+ virtual bool string(string_t& val) = 0;
+ virtual bool binary(binary_t& val) = 0;
+
+ virtual bool start_object(std::size_t elements) = 0;
+ virtual bool key(string_t& val) = 0;
+ virtual bool end_object() = 0;
+
+ virtual bool start_array(std::size_t elements) = 0;
+ virtual bool end_array() = 0;
+
+ virtual bool parse_error(std::size_t position,
+ const std::string& last_token,
+ const detail::exception& ex) = 0;
+
+ json_sax() = default;
+ json_sax(const json_sax&) = default;
+ json_sax(json_sax&&) noexcept = default;
+ json_sax& operator=(const json_sax&) = default;
+ json_sax& operator=(json_sax&&) noexcept = default;
+ virtual ~json_sax() = default;
+};
+```
+
+## Event Methods
+
+### Scalar Events
+
+| Method | Called When | Arguments |
+|---|---|---|
+| `null()` | `null` literal parsed | — |
+| `boolean(val)` | `true` or `false` parsed | `bool val` |
+| `number_integer(val)` | Signed integer parsed | `number_integer_t val` |
+| `number_unsigned(val)` | Unsigned integer parsed | `number_unsigned_t val` |
+| `number_float(val, s)` | Float parsed | `number_float_t val`, `string_t s` (raw text) |
+| `string(val)` | String parsed | `string_t& val` |
+| `binary(val)` | Binary data parsed | `binary_t& val` |
+
+### Container Events
+
+| Method | Called When | Arguments |
+|---|---|---|
+| `start_object(n)` | `{` read | Element count hint (or -1 if unknown) |
+| `key(val)` | Object key read | `string_t& val` |
+| `end_object()` | `}` read | — |
+| `start_array(n)` | `[` read | Element count hint (or -1 if unknown) |
+| `end_array()` | `]` read | — |
+
+### Error Event
+
+| Method | Called When | Arguments |
+|---|---|---|
+| `parse_error(pos, tok, ex)` | Parse error | Byte position, last token, exception |
+
+### Return Values
+
+All methods return `bool`:
+- `true` — continue parsing
+- `false` — abort parsing immediately
+
+For `parse_error()`:
+- `true` — abort with no exception (return discarded value)
+- `false` — abort with no exception (return discarded value)
+
+In practice, the return value of `parse_error()` has the same effect
+regardless — the parser stops. The distinction matters for whether
+exceptions are thrown (controlled by `allow_exceptions`).
+
+## Using `sax_parse()`
+
+```cpp
+template<typename InputType, typename SAX>
+static bool sax_parse(InputType&& i,
+ SAX* sax,
+ input_format_t format = input_format_t::json,
+ const bool strict = true,
+ const bool ignore_comments = false,
+ const bool ignore_trailing_commas = false);
+```
+
+```cpp
+MySaxHandler handler;
+bool success = json::sax_parse(input_string, &handler);
+```
+
+The `format` parameter supports binary formats too:
+
+```cpp
+json::sax_parse(cbor_data, &handler, json::input_format_t::cbor);
+json::sax_parse(msgpack_data, &handler, json::input_format_t::msgpack);
+```
+
+## Implementing a Custom SAX Handler
+
+### Minimal Handler (Count Elements)
+
+```cpp
+struct counter_handler : nlohmann::json_sax<json>
+{
+ std::size_t values = 0;
+ std::size_t objects = 0;
+ std::size_t arrays = 0;
+
+ bool null() override { ++values; return true; }
+ bool boolean(bool) override { ++values; return true; }
+ bool number_integer(json::number_integer_t) override { ++values; return true; }
+ bool number_unsigned(json::number_unsigned_t) override { ++values; return true; }
+ bool number_float(json::number_float_t, const std::string&) override { ++values; return true; }
+ bool string(std::string&) override { ++values; return true; }
+ bool binary(json::binary_t&) override { ++values; return true; }
+
+ bool start_object(std::size_t) override { ++objects; return true; }
+ bool key(std::string&) override { return true; }
+ bool end_object() override { return true; }
+
+ bool start_array(std::size_t) override { ++arrays; return true; }
+ bool end_array() override { return true; }
+
+ bool parse_error(std::size_t, const std::string&,
+ const nlohmann::detail::exception&) override
+ { return false; }
+};
+
+// Usage
+counter_handler handler;
+json::sax_parse(R"({"a": [1, 2], "b": true})", &handler);
+// handler.values == 3 (1, 2, true)
+// handler.objects == 1
+// handler.arrays == 1
+```
+
+### Key Extractor
+
+Extract all keys from a JSON document without building the DOM:
+
+```cpp
+struct key_extractor : nlohmann::json_sax<json>
+{
+ std::vector<std::string> keys;
+ int depth = 0;
+
+ bool null() override { return true; }
+ bool boolean(bool) override { return true; }
+ bool number_integer(json::number_integer_t) override { return true; }
+ bool number_unsigned(json::number_unsigned_t) override { return true; }
+ bool number_float(json::number_float_t, const std::string&) override { return true; }
+ bool string(std::string&) override { return true; }
+ bool binary(json::binary_t&) override { return true; }
+
+ bool start_object(std::size_t) override { ++depth; return true; }
+ bool key(std::string& val) override {
+ keys.push_back(val);
+ return true;
+ }
+ bool end_object() override { --depth; return true; }
+
+ bool start_array(std::size_t) override { return true; }
+ bool end_array() override { return true; }
+
+ bool parse_error(std::size_t, const std::string&,
+ const nlohmann::detail::exception&) override
+ { return false; }
+};
+```
+
+### Early Termination
+
+Return `false` from any method to stop parsing immediately:
+
+```cpp
+struct find_key_handler : nlohmann::json_sax<json>
+{
+ std::string target_key;
+ json found_value;
+ bool found = false;
+ bool capture_next = false;
+
+ bool key(std::string& val) override {
+ capture_next = (val == target_key);
+ return true;
+ }
+
+ bool string(std::string& val) override {
+ if (capture_next) {
+ found_value = val;
+ found = true;
+ return false; // stop parsing
+ }
+ return true;
+ }
+
+ bool number_integer(json::number_integer_t val) override {
+ if (capture_next) {
+ found_value = val;
+ found = true;
+ return false;
+ }
+ return true;
+ }
+
+ // ... remaining methods return true ...
+ bool null() override { return !capture_next || (found = true, false); }
+ bool boolean(bool v) override {
+ if (capture_next) { found_value = v; found = true; return false; }
+ return true;
+ }
+ bool number_unsigned(json::number_unsigned_t v) override {
+ if (capture_next) { found_value = v; found = true; return false; }
+ return true;
+ }
+ bool number_float(json::number_float_t v, const std::string&) override {
+ if (capture_next) { found_value = v; found = true; return false; }
+ return true;
+ }
+ bool binary(json::binary_t&) override { return true; }
+ bool start_object(std::size_t) override { capture_next = false; return true; }
+ bool end_object() override { return true; }
+ bool start_array(std::size_t) override { capture_next = false; return true; }
+ bool end_array() override { return true; }
+ bool parse_error(std::size_t, const std::string&,
+ const nlohmann::detail::exception&) override { return false; }
+};
+```
+
+## Built-in SAX Handlers
+
+### `json_sax_dom_parser`
+
+The default handler used by `parse()`. Builds a complete DOM tree:
+
+```cpp
+template<typename BasicJsonType>
+class json_sax_dom_parser
+{
+ BasicJsonType& root;
+ std::vector<BasicJsonType*> ref_stack;
+ BasicJsonType* object_element = nullptr;
+};
+```
+
+### `json_sax_dom_callback_parser`
+
+Used when `parse()` is called with a callback. Adds filtering logic:
+
+```cpp
+template<typename BasicJsonType>
+class json_sax_dom_callback_parser
+{
+ BasicJsonType& root;
+ std::vector<BasicJsonType*> ref_stack;
+ std::vector<bool> keep_stack;
+ std::vector<bool> key_keep_stack;
+ parser_callback_t<BasicJsonType> callback;
+};
+```
+
+### `json_sax_acceptor`
+
+Used by `accept()`. All event methods return `true`, `parse_error()`
+returns `false`:
+
+```cpp
+template<typename BasicJsonType>
+struct json_sax_acceptor {
+ bool null() { return true; }
+ bool boolean(bool) { return true; }
+ // ... all true ...
+ bool parse_error(...) { return false; }
+};
+```
+
+## SAX with Binary Formats
+
+The SAX interface works uniformly across all supported formats. The
+`binary_reader` generates the same SAX events from binary input:
+
+```cpp
+struct my_handler : nlohmann::json_sax<json> { /* ... */ };
+
+my_handler handler;
+
+// JSON text
+json::sax_parse(json_text, &handler);
+
+// CBOR
+json::sax_parse(cbor_bytes, &handler, json::input_format_t::cbor);
+
+// MessagePack
+json::sax_parse(msgpack_bytes, &handler, json::input_format_t::msgpack);
+```
+
+The `start_object(n)` and `start_array(n)` methods receive the element
+count as `n` for binary formats (where the count is known from the header).
+For JSON text, `n` is always `static_cast<std::size_t>(-1)` (unknown).
+
+## Performance Considerations
+
+SAX parsing avoids DOM construction overhead:
+- No heap allocations for JSON containers
+- No recursive destruction of the DOM tree
+- Constant memory usage (proportional to nesting depth only)
+- Can process arbitrarily large documents
+
+For streaming scenarios where you need to process multiple JSON values
+from a single input, use `sax_parse()` with `strict = false` in a loop.
diff --git a/docs/handbook/json4cpp/serialization.md b/docs/handbook/json4cpp/serialization.md
new file mode 100644
index 0000000000..56265d62f1
--- /dev/null
+++ b/docs/handbook/json4cpp/serialization.md
@@ -0,0 +1,528 @@
+# json4cpp — Serialization & Deserialization
+
+## Parsing (Deserialization)
+
+### `parse()`
+
+```cpp
+template<typename InputType>
+static basic_json parse(InputType&& i,
+ const parser_callback_t cb = nullptr,
+ const bool allow_exceptions = true,
+ const bool ignore_comments = false,
+ const bool ignore_trailing_commas = false);
+```
+
+Parses JSON text from multiple source types.
+
+### Accepted Input Types
+
+| Input Type | Example |
+|---|---|
+| `std::string` / `string_view` | `json::parse("{}")` |
+| `const char*` | `json::parse(ptr)` |
+| `std::istream&` | `json::parse(file_stream)` |
+| Iterator pair | `json::parse(vec.begin(), vec.end())` |
+| `FILE*` | `json::parse(std::fopen("f.json", "r"))` |
+| Contiguous container | `json::parse(std::vector<uint8_t>{...})` |
+
+### String Parsing
+
+```cpp
+auto j = json::parse(R"({"key": "value", "num": 42})");
+```
+
+### File Parsing
+
+```cpp
+std::ifstream f("config.json");
+json j = json::parse(f);
+```
+
+### Iterator Parsing
+
+```cpp
+std::string s = R"([1, 2, 3])";
+json j = json::parse(s.begin(), s.end());
+
+std::vector<uint8_t> bytes = {'{', '}' };
+json j2 = json::parse(bytes);
+```
+
+### Error Handling
+
+By default, `parse()` throws `json::parse_error` on invalid input:
+
+```cpp
+try {
+ json j = json::parse("not json");
+} catch (json::parse_error& e) {
+ std::cerr << e.what() << "\n";
+ // "[json.exception.parse_error.101] parse error at line 1, column 1:
+ // syntax error while parsing value - invalid literal; ..."
+ std::cerr << "byte: " << e.byte << "\n"; // position of error
+}
+```
+
+Set `allow_exceptions = false` to get a discarded value instead:
+
+```cpp
+json j = json::parse("not json", nullptr, false);
+assert(j.is_discarded());
+```
+
+### Comments
+
+JSON does not support comments by standard, but the parser can skip them:
+
+```cpp
+std::string input = R"({
+ // line comment
+ "key": "value",
+ /* block comment */
+ "num": 42
+})";
+
+json j = json::parse(input, nullptr, true, true); // ignore_comments=true
+```
+
+Both C-style (`/* */`) and C++-style (`//`) comments are supported.
+
+### Trailing Commas
+
+```cpp
+std::string input = R"({
+ "a": 1,
+ "b": 2,
+})";
+
+json j = json::parse(input, nullptr, true, false, true); // ignore_trailing_commas=true
+```
+
+### `operator>>`
+
+Stream extraction operator:
+
+```cpp
+std::istringstream ss(R"({"key": "value"})");
+json j;
+ss >> j;
+```
+
+### `_json` User-Defined Literal
+
+```cpp
+using namespace nlohmann::literals;
+
+auto j = R"({"key": "value"})"_json;
+auto j2 = "[1, 2, 3]"_json;
+```
+
+The UDL is also available via `using namespace nlohmann::json_literals` or
+`using namespace nlohmann::literals`. When `JSON_GlobalUDLs` is enabled
+(the default), the literals are in the global namespace via
+`inline namespace`.
+
+### `_json_pointer` Literal
+
+```cpp
+using namespace nlohmann::literals;
+
+auto ptr = "/foo/bar/0"_json_pointer;
+```
+
+## Parser Callbacks
+
+### `parse_event_t`
+
+```cpp
+enum class parse_event_t : std::uint8_t
+{
+ object_start, ///< the parser read `{` and started to process a JSON object
+ object_end, ///< the parser read `}` and finished processing a JSON object
+ array_start, ///< the parser read `[` and started to process a JSON array
+ array_end, ///< the parser read `]` and finished processing a JSON array
+ key, ///< the parser read a key of a value in an object
+ value ///< the parser finished reading a JSON value
+};
+```
+
+### Callback Signature
+
+```cpp
+using parser_callback_t = std::function<bool(int depth,
+ parse_event_t event,
+ basic_json& parsed)>;
+```
+
+- `depth` — nesting depth (0 = top level)
+- `event` — current parse event
+- `parsed` — the parsed value (for `value`/`key` events) or null (for start/end)
+- Return `true` to keep the value, `false` to discard
+
+### Filtering Example
+
+```cpp
+// Remove all keys named "password"
+json j = json::parse(input, [](int /*depth*/, json::parse_event_t event, json& parsed) {
+ if (event == json::parse_event_t::key && parsed == "password") {
+ return false;
+ }
+ return true;
+});
+```
+
+### Depth-Limited Parsing
+
+```cpp
+// Only keep top-level keys
+json j = json::parse(input, [](int depth, json::parse_event_t event, json&) {
+ if (depth > 1 && event == json::parse_event_t::value) {
+ return false;
+ }
+ return true;
+});
+```
+
+## Serialization
+
+### `dump()`
+
+```cpp
+string_t dump(const int indent = -1,
+ const char indent_char = ' ',
+ const bool ensure_ascii = false,
+ const error_handler_t error_handler = error_handler_t::strict) const;
+```
+
+Converts a JSON value to a string.
+
+### Compact Output
+
+```cpp
+json j = {{"name", "alice"}, {"scores", {90, 85, 92}}};
+std::string s = j.dump();
+// {"name":"alice","scores":[90,85,92]}
+```
+
+### Pretty Printing
+
+```cpp
+std::string s = j.dump(4);
+// {
+// "name": "alice",
+// "scores": [
+// 90,
+// 85,
+// 92
+// ]
+// }
+```
+
+You can change the indent character:
+
+```cpp
+std::string s = j.dump(1, '\t');
+// {
+// "name": "alice",
+// "scores": [
+// 90,
+// 85,
+// 92
+// ]
+// }
+```
+
+### ASCII Escaping
+
+When `ensure_ascii = true`, all non-ASCII characters are escaped:
+
+```cpp
+json j = "München";
+j.dump(); // "München"
+j.dump(-1, ' ', true); // "M\u00FCnchen"
+```
+
+### UTF-8 Error Handling
+
+The `error_handler` controls what happens when the serializer encounters
+invalid UTF-8:
+
+```cpp
+enum class error_handler_t
+{
+ strict, ///< throw type_error::316 on invalid UTF-8
+ replace, ///< replace invalid bytes with U+FFFD
+ ignore ///< skip invalid bytes silently
+};
+```
+
+```cpp
+// String with invalid UTF-8 byte 0xFF
+std::string bad = "hello\xFFworld";
+json j = bad;
+
+j.dump(); // throws type_error::316
+
+j.dump(-1, ' ', false, json::error_handler_t::replace);
+// "hello\uFFFDworld"
+
+j.dump(-1, ' ', false, json::error_handler_t::ignore);
+// "helloworld"
+```
+
+### `operator<<`
+
+Stream insertion operator for convenience:
+
+```cpp
+json j = {{"key", "value"}};
+std::cout << j << "\n"; // compact: {"key":"value"}
+std::cout << std::setw(4) << j; // pretty: 4-space indent
+```
+
+Using `std::setw()` with the stream sets the indentation level.
+
+## Serializer Internals
+
+The actual serialization is performed by `detail::serializer<basic_json>`
+(in `include/nlohmann/detail/output/serializer.hpp`):
+
+```cpp
+template<typename BasicJsonType>
+class serializer
+{
+public:
+ serializer(output_adapter_t<char> s, const char ichar,
+ error_handler_t error_handler_ = error_handler_t::strict);
+
+ void dump(const BasicJsonType& val, const bool pretty_print,
+ const bool ensure_ascii, const unsigned int indent_step,
+ const unsigned int current_indent = 0);
+
+private:
+ void dump_escaped(const string_t& s, const bool ensure_ascii);
+ void dump_integer(number_integer_t x);
+ void dump_integer(number_unsigned_t x);
+ void dump_float(number_float_t x, std::true_type is_ieee);
+ void dump_float(number_float_t x, std::false_type is_ieee);
+};
+```
+
+### Number Serialization
+
+**Integers** are serialized using a custom digit-by-digit algorithm that
+writes into a stack buffer (`number_buffer`), avoiding `std::to_string`:
+
+```cpp
+// Internal buffer for number conversion
+std::array<char, 64> number_buffer{{}};
+```
+
+**Floating-point** values use different strategies:
+- On IEEE 754 platforms (`std::true_type`): uses `std::snprintf` with
+ `%.*g` format, with precision from `std::numeric_limits<number_float_t>::max_digits10`
+- On non-IEEE platforms (`std::false_type`): same `snprintf` approach
+
+Special float values:
+- `NaN` → serialized as `null`
+- `Infinity` → serialized as `null`
+
+### String Escaping
+
+The `dump_escaped()` method handles:
+- Control characters (`\n`, `\t`, `\r`, `\b`, `\f`, `\\`, `\"`)
+- Characters 0x00–0x1F are escaped as `\u00XX`
+- Non-ASCII characters can be escaped as `\uXXXX` (if `ensure_ascii = true`)
+- Surrogate pairs for characters above U+FFFF
+- Invalid UTF-8 handling per `error_handler_t`
+
+The UTF-8 decoder uses a state machine:
+
+```cpp
+static const std::array<std::uint8_t, 400> utf8d;
+std::uint8_t decode(std::uint8_t& state, std::uint32_t& codep, const std::uint8_t byte);
+```
+
+States: `UTF8_ACCEPT` (0) and `UTF8_REJECT` (1).
+
+## Output Adapters
+
+Defined in `include/nlohmann/detail/output/output_adapters.hpp`:
+
+```cpp
+template<typename CharType>
+class output_adapter
+{
+public:
+ // Adapts std::vector<CharType>
+ output_adapter(std::vector<CharType>& vec);
+
+ // Adapts std::basic_ostream
+ output_adapter(std::basic_ostream<CharType>& s);
+
+ // Adapts std::basic_string
+ output_adapter(StringType& s);
+};
+```
+
+Three concrete adapters:
+- `output_vector_adapter` — writes to `std::vector<char>`
+- `output_stream_adapter` — writes to `std::ostream`
+- `output_string_adapter` — writes to `std::string`
+
+## ADL Serialization Mechanism
+
+### How `to_json()` / `from_json()` Work
+
+The library uses **Argument-Dependent Lookup (ADL)** to find conversion
+functions. When you call `json j = my_obj;`, the library ultimately invokes:
+
+```cpp
+nlohmann::adl_serializer<MyType>::to_json(j, my_obj);
+```
+
+The default `adl_serializer` delegates to a free function found via ADL:
+
+```cpp
+template<typename ValueType, typename>
+struct adl_serializer
+{
+ template<typename BasicJsonType, typename TargetType = ValueType>
+ static auto from_json(BasicJsonType&& j, TargetType& val)
+ -> decltype(::nlohmann::from_json(std::forward<BasicJsonType>(j), val), void())
+ {
+ ::nlohmann::from_json(std::forward<BasicJsonType>(j), val);
+ }
+
+ template<typename BasicJsonType, typename TargetType = ValueType>
+ static auto to_json(BasicJsonType& j, TargetType&& val)
+ -> decltype(::nlohmann::to_json(j, std::forward<TargetType>(val)), void())
+ {
+ ::nlohmann::to_json(j, std::forward<TargetType>(val));
+ }
+};
+```
+
+### Built-in Conversions
+
+The library provides `to_json()` and `from_json()` overloads for:
+
+| C++ Type | JSON Type |
+|---|---|
+| `bool` | boolean |
+| `int`, `long`, `int64_t`, etc. | number_integer |
+| `unsigned`, `uint64_t`, etc. | number_unsigned |
+| `float`, `double` | number_float |
+| `std::string`, `const char*` | string |
+| `std::nullptr_t` | null |
+| `std::vector<T>`, `std::list<T>`, ... | array |
+| `std::array<T, N>` | array |
+| `std::map<string, T>` | object |
+| `std::unordered_map<string, T>` | object |
+| `std::pair<T1, T2>` | array of 2 |
+| `std::tuple<Ts...>` | array of N |
+| `std::optional<T>` (C++17) | value or null |
+| `std::variant<Ts...>` (C++17) | depends on held type |
+| Enum types | integer (unless disabled) |
+
+### Priority Tags for Overload Resolution
+
+Built-in `to_json()` overloads use a priority tag system to resolve
+ambiguity:
+
+```cpp
+template<typename BasicJsonType, typename T>
+void to_json(BasicJsonType& j, T val, priority_tag<1>); // higher priority
+template<typename BasicJsonType, typename T>
+void to_json(BasicJsonType& j, T val, priority_tag<0>); // lower priority
+```
+
+The `priority_tag<N>` inherits from `priority_tag<N-1>`, so higher-tagged
+overloads are preferred.
+
+## Roundtrip Guarantees
+
+### Integers
+
+Integer values survive a parse → dump → parse roundtrip exactly, as long
+as they fit within `int64_t` or `uint64_t` range.
+
+### Floating-Point
+
+The library uses `max_digits10` precision (typically 17 for `double`) to
+ensure roundtrip fidelity:
+
+```cpp
+json j = 3.141592653589793;
+std::string s = j.dump(); // "3.141592653589793"
+json j2 = json::parse(s);
+assert(j == j2); // true
+```
+
+### Strings
+
+UTF-8 strings roundtrip exactly. The serializer preserves all valid UTF-8
+sequences. Unicode escapes in input (`\uXXXX`) are converted to UTF-8 on
+parsing and will be re-escaped only if `ensure_ascii = true`.
+
+## `accept()`
+
+```cpp
+template<typename InputType>
+static bool accept(InputType&& i,
+ const bool ignore_comments = false,
+ const bool ignore_trailing_commas = false);
+```
+
+Checks whether the input is valid JSON without constructing a value:
+
+```cpp
+json::accept("{}"); // true
+json::accept("[1,2,3]"); // true
+json::accept("not json"); // false
+json::accept("{\"a\": 1,}"); // false
+json::accept("{\"a\": 1,}", false, true); // true (trailing commas)
+```
+
+## Conversion to/from STL Types
+
+### Explicit Conversion (`get<T>()`)
+
+```cpp
+json j = 42;
+int i = j.get<int>();
+double d = j.get<double>();
+
+json j2 = {1, 2, 3};
+auto v = j2.get<std::vector<int>>();
+auto s = j2.get<std::set<int>>();
+```
+
+### Implicit Conversion
+
+When `JSON_ImplicitConversions` is enabled (default), implicit conversions
+are available:
+
+```cpp
+json j = 42;
+int i = j; // implicit conversion
+std::string s = j; // throws type_error::302 (wrong type)
+
+json j2 = "hello";
+std::string s2 = j2; // "hello"
+```
+
+To disable implicit conversions (recommended for new code):
+
+```cmake
+set(JSON_ImplicitConversions OFF)
+```
+
+Or define the macro:
+
+```cpp
+#define JSON_USE_IMPLICIT_CONVERSIONS 0
+```
+
+When disabled, only explicit `get<T>()` works.
diff --git a/docs/handbook/json4cpp/testing.md b/docs/handbook/json4cpp/testing.md
new file mode 100644
index 0000000000..4439b71a42
--- /dev/null
+++ b/docs/handbook/json4cpp/testing.md
@@ -0,0 +1,190 @@
+# json4cpp — Testing
+
+## Test Framework
+
+The test suite uses **doctest** (a single-header C++ testing framework).
+Tests are located in `json4cpp/tests/src/` with one file per feature area.
+
+## Test File Naming
+
+All test files follow the pattern `unit-<feature>.cpp`:
+
+| File | Covers |
+|---|---|
+| `unit-allocator.cpp` | Custom allocator support |
+| `unit-alt-string.cpp` | Alternative string types |
+| `unit-bson.cpp` | BSON serialization/deserialization |
+| `unit-bjdata.cpp` | BJData format |
+| `unit-capacity.cpp` | `size()`, `empty()`, `max_size()` |
+| `unit-cbor.cpp` | CBOR format |
+| `unit-class_const_iterator.cpp` | `const_iterator` behavior |
+| `unit-class_iterator.cpp` | `iterator` behavior |
+| `unit-class_lexer.cpp` | Lexer token scanning |
+| `unit-class_parser.cpp` | Parser behavior |
+| `unit-comparison.cpp` | Comparison operators |
+| `unit-concepts.cpp` | Concept/type trait checks |
+| `unit-constructor1.cpp` | Constructor overloads (part 1) |
+| `unit-constructor2.cpp` | Constructor overloads (part 2) |
+| `unit-convenience.cpp` | Convenience methods (`type_name`, etc.) |
+| `unit-conversions.cpp` | Type conversions |
+| `unit-deserialization.cpp` | Parsing from various sources |
+| `unit-diagnostics.cpp` | `JSON_DIAGNOSTICS` mode |
+| `unit-element_access1.cpp` | `operator[]`, `at()` (part 1) |
+| `unit-element_access2.cpp` | `value()`, `front()`, `back()` (part 2) |
+| `unit-hash.cpp` | `std::hash` specialization |
+| `unit-inspection.cpp` | `is_*()`, `type()` methods |
+| `unit-items.cpp` | `items()` iteration proxy |
+| `unit-iterators1.cpp` | Forward iterators |
+| `unit-iterators2.cpp` | Reverse iterators |
+| `unit-json_patch.cpp` | JSON Patch (RFC 6902) |
+| `unit-json_pointer.cpp` | JSON Pointer (RFC 6901) |
+| `unit-large_json.cpp` | Large document handling |
+| `unit-merge_patch.cpp` | Merge Patch (RFC 7396) |
+| `unit-meta.cpp` | Library metadata |
+| `unit-modifiers.cpp` | `push_back()`, `insert()`, `erase()`, etc. |
+| `unit-msgpack.cpp` | MessagePack format |
+| `unit-ordered_json.cpp` | `ordered_json` behavior |
+| `unit-ordered_map.cpp` | `ordered_map` internals |
+| `unit-pointer_access.cpp` | `get_ptr()` |
+| `unit-readme.cpp` | Examples from README |
+| `unit-reference_access.cpp` | `get_ref()` |
+| `unit-regression1.cpp` | Regression tests (part 1) |
+| `unit-regression2.cpp` | Regression tests (part 2) |
+| `unit-serialization.cpp` | `dump()`, stream output |
+| `unit-testsuites.cpp` | External test suites (JSONTestSuite, etc.) |
+| `unit-to_chars.cpp` | Float-to-string conversion |
+| `unit-ubjson.cpp` | UBJSON format |
+| `unit-udt.cpp` | User-defined type conversions |
+| `unit-udt_macro.cpp` | `NLOHMANN_DEFINE_TYPE_*` macros |
+| `unit-unicode1.cpp` | Unicode handling (part 1) |
+| `unit-unicode2.cpp` | Unicode handling (part 2) |
+| `unit-unicode3.cpp` | Unicode handling (part 3) |
+| `unit-unicode4.cpp` | Unicode handling (part 4) |
+| `unit-unicode5.cpp` | Unicode handling (part 5) |
+| `unit-wstring.cpp` | Wide string support |
+
+## CMake Configuration
+
+From `tests/CMakeLists.txt`:
+
+```cmake
+# Test data files
+file(GLOB_RECURSE JSON_TEST_DATA_FILES
+ "${CMAKE_CURRENT_SOURCE_DIR}/data/*.json"
+ "${CMAKE_CURRENT_SOURCE_DIR}/data/*.cbor"
+ "${CMAKE_CURRENT_SOURCE_DIR}/data/*.msgpack"
+ "${CMAKE_CURRENT_SOURCE_DIR}/data/*.bson"
+ "${CMAKE_CURRENT_SOURCE_DIR}/data/*.ubjson"
+ "${CMAKE_CURRENT_SOURCE_DIR}/data/*.bjdata")
+
+# Each unit-*.cpp compiles to its own test executable
+```
+
+Key CMake options affecting tests:
+
+| Option | Effect |
+|---|---|
+| `JSON_BuildTests` | Enable/disable test building |
+| `JSON_Diagnostics` | Build tests with `JSON_DIAGNOSTICS=1` |
+| `JSON_Diagnostic_Positions` | Build tests with position tracking |
+| `JSON_MultipleHeaders` | Use multi-header include paths |
+| `JSON_ImplicitConversions` | Test with implicit conversions on/off |
+| `JSON_DisableEnumSerialization` | Test with enum serialization off |
+| `JSON_GlobalUDLs` | Test with global UDLs on/off |
+
+## Building Tests
+
+```bash
+cd json4cpp
+mkdir build && cd build
+
+# Configure with tests enabled
+cmake .. -DJSON_BuildTests=ON
+
+# Build
+cmake --build .
+
+# Run all tests
+ctest --output-on-failure
+
+# Run a specific test
+./tests/test-unit-json_pointer
+```
+
+## Test Structure
+
+Tests use doctest's `TEST_CASE` and `SECTION` macros:
+
+```cpp
+#include <doctest/doctest.h>
+#include <nlohmann/json.hpp>
+
+using json = nlohmann::json;
+
+TEST_CASE("element access") {
+ SECTION("array") {
+ json j = {1, 2, 3};
+
+ SECTION("operator[]") {
+ CHECK(j[0] == 1);
+ CHECK(j[1] == 2);
+ CHECK(j[2] == 3);
+ }
+
+ SECTION("at()") {
+ CHECK(j.at(0) == 1);
+ CHECK_THROWS_AS(j.at(5), json::out_of_range);
+ }
+ }
+
+ SECTION("object") {
+ json j = {{"key", "value"}};
+
+ CHECK(j["key"] == "value");
+ CHECK(j.at("key") == "value");
+ CHECK_THROWS_AS(j.at("missing"), json::out_of_range);
+ }
+}
+```
+
+### Common Assertions
+
+| Macro | Purpose |
+|---|---|
+| `CHECK(expr)` | Value assertion (non-fatal) |
+| `REQUIRE(expr)` | Value assertion (fatal) |
+| `CHECK_THROWS_AS(expr, type)` | Exception type assertion |
+| `CHECK_THROWS_WITH_AS(expr, msg, type)` | Exception message + type |
+| `CHECK_NOTHROW(expr)` | No exception assertion |
+
+## Test Data
+
+The `tests/data/` directory contains JSON files for conformance testing:
+- Input from JSONTestSuite (parsing edge cases)
+- Binary format test vectors (CBOR, MessagePack, UBJSON, BSON, BJData)
+- Unicode test cases
+- Large nested structures
+
+## Running Specific Tests
+
+doctest supports command-line filtering:
+
+```bash
+# Run tests matching a substring
+./test-unit-json_pointer -tc="JSON pointer"
+
+# List all test cases
+./test-unit-json_pointer -ltc
+
+# Run with verbose output
+./test-unit-json_pointer -s
+```
+
+## Continuous Integration
+
+Tests are run across multiple compilers and platforms via CI (see `ci/`
+directory). The `ci/supportedBranches.js` file lists which branches are
+tested. The test matrix covers:
+- GCC, Clang, MSVC
+- C++11 through C++20
+- Various option combinations (diagnostics, implicit conversions, etc.)
diff --git a/docs/handbook/json4cpp/value-types.md b/docs/handbook/json4cpp/value-types.md
new file mode 100644
index 0000000000..c9f9bf0e6f
--- /dev/null
+++ b/docs/handbook/json4cpp/value-types.md
@@ -0,0 +1,474 @@
+# json4cpp — Value Types
+
+## The `value_t` Enumeration
+
+Defined in `include/nlohmann/detail/value_t.hpp`, the `value_t` enumeration
+identifies the type of a `basic_json` value:
+
+```cpp
+enum class value_t : std::uint8_t
+{
+ null, ///< null value
+ object, ///< unordered set of name/value pairs
+ array, ///< ordered collection of values
+ string, ///< string value
+ boolean, ///< boolean value
+ number_integer, ///< signed integer number
+ number_unsigned, ///< unsigned integer number
+ number_float, ///< floating-point number
+ binary, ///< binary array (ordered collection of bytes)
+ discarded ///< discarded by the parser callback function
+};
+```
+
+The underlying type is `std::uint8_t` (1 byte), stored in `m_data.m_type`.
+
+## Type-to-Storage Mapping
+
+| `value_t` | C++ Type Alias | Default C++ Type | Storage in `json_value` |
+|---|---|---|---|
+| `null` | — | — | No active member (pointer set to `nullptr`) |
+| `object` | `object_t` | `std::map<std::string, basic_json>` | `object_t* object` |
+| `array` | `array_t` | `std::vector<basic_json>` | `array_t* array` |
+| `string` | `string_t` | `std::string` | `string_t* string` |
+| `boolean` | `boolean_t` | `bool` | `boolean_t boolean` |
+| `number_integer` | `number_integer_t` | `std::int64_t` | `number_integer_t number_integer` |
+| `number_unsigned` | `number_unsigned_t` | `std::uint64_t` | `number_unsigned_t number_unsigned` |
+| `number_float` | `number_float_t` | `double` | `number_float_t number_float` |
+| `binary` | `binary_t` | `byte_container_with_subtype<vector<uint8_t>>` | `binary_t* binary` |
+| `discarded` | — | — | No storage (used only during parse callback filtering) |
+
+Variable-length types (object, array, string, binary) are stored as **heap-
+allocated pointers** to keep the `json_value` union at 8 bytes on 64-bit.
+
+## Type Inspection Methods
+
+### `type()`
+
+Returns the `value_t` of the stored value:
+
+```cpp
+constexpr value_t type() const noexcept;
+```
+
+```cpp
+json j = 42;
+assert(j.type() == json::value_t::number_integer);
+
+json j2 = "hello";
+assert(j2.type() == json::value_t::string);
+```
+
+### `type_name()`
+
+Returns a human-readable string for the current type:
+
+```cpp
+const char* type_name() const noexcept;
+```
+
+| `value_t` | Returned String |
+|---|---|
+| `null` | `"null"` |
+| `object` | `"object"` |
+| `array` | `"array"` |
+| `string` | `"string"` |
+| `boolean` | `"boolean"` |
+| `binary` | `"binary"` |
+| `number_integer` | `"number"` |
+| `number_unsigned` | `"number"` |
+| `number_float` | `"number"` |
+| `discarded` | `"discarded"` |
+
+Note that all three numeric types return `"number"`.
+
+```cpp
+json j = {1, 2, 3};
+std::cout << j.type_name(); // "array"
+```
+
+### `is_*()` Methods
+
+All return `constexpr bool` and are `noexcept`:
+
+```cpp
+constexpr bool is_null() const noexcept;
+constexpr bool is_boolean() const noexcept;
+constexpr bool is_number() const noexcept;
+constexpr bool is_number_integer() const noexcept;
+constexpr bool is_number_unsigned() const noexcept;
+constexpr bool is_number_float() const noexcept;
+constexpr bool is_object() const noexcept;
+constexpr bool is_array() const noexcept;
+constexpr bool is_string() const noexcept;
+constexpr bool is_binary() const noexcept;
+constexpr bool is_discarded() const noexcept;
+constexpr bool is_primitive() const noexcept;
+constexpr bool is_structured() const noexcept;
+```
+
+### Category Methods
+
+```cpp
+// is_primitive() == is_null() || is_string() || is_boolean() || is_number() || is_binary()
+// is_structured() == is_array() || is_object()
+// is_number() == is_number_integer() || is_number_float()
+// is_number_integer() == (type == number_integer || type == number_unsigned)
+```
+
+Important: `is_number_integer()` returns `true` for **both** signed and unsigned
+integers. Use `is_number_unsigned()` to distinguish.
+
+```cpp
+json j = 42u;
+j.is_number() // true
+j.is_number_integer() // true
+j.is_number_unsigned() // true
+
+json j2 = -5;
+j2.is_number_integer() // true
+j2.is_number_unsigned() // false
+```
+
+### `operator value_t()`
+
+Implicit conversion to `value_t`:
+
+```cpp
+constexpr operator value_t() const noexcept;
+```
+
+```cpp
+json j = "hello";
+json::value_t t = j; // value_t::string
+```
+
+## Null Type
+
+Null is the default value:
+
+```cpp
+json j; // null
+json j = nullptr; // null
+json j(json::value_t::null); // null
+
+j.is_null() // true
+j.type_name() // "null"
+```
+
+Null values have special behavior:
+- `size()` returns 0
+- `empty()` returns `true`
+- `operator[]` with a string key converts null to an object
+- `operator[]` with a numeric index converts null to an array
+- `push_back()` converts null to an array
+
+## Object Type
+
+### Internal Representation
+
+```cpp
+using object_t = ObjectType<StringType, basic_json,
+ default_object_comparator_t,
+ AllocatorType<std::pair<const StringType, basic_json>>>;
+```
+
+Default: `std::map<std::string, basic_json, std::less<>, std::allocator<...>>`
+
+With C++14 transparent comparators, heterogeneous lookup is supported.
+
+### `ordered_json` Objects
+
+When using `ordered_json = basic_json<nlohmann::ordered_map>`, objects
+preserve insertion order:
+
+```cpp
+nlohmann::ordered_json j;
+j["z"] = 1;
+j["a"] = 2;
+j["m"] = 3;
+// iteration order: z, a, m
+```
+
+The `ordered_map` uses linear search (O(n) lookup) instead of tree-based
+(O(log n)).
+
+## Array Type
+
+### Internal Representation
+
+```cpp
+using array_t = ArrayType<basic_json, AllocatorType<basic_json>>;
+```
+
+Default: `std::vector<basic_json, std::allocator<basic_json>>`
+
+Arrays can contain mixed types (heterogeneous):
+
+```cpp
+json j = {1, "two", 3.0, true, nullptr, {{"nested", "object"}}};
+```
+
+## String Type
+
+### Internal Representation
+
+```cpp
+using string_t = StringType; // default: std::string
+```
+
+Strings in JSON are Unicode (UTF-8). The library validates UTF-8 during
+serialization but stores raw bytes. The `dump()` method's `error_handler`
+parameter controls what happens with invalid UTF-8:
+
+- `error_handler_t::strict` — throw `type_error::316`
+- `error_handler_t::replace` — replace with U+FFFD
+- `error_handler_t::ignore` — skip invalid bytes
+
+## Boolean Type
+
+```cpp
+using boolean_t = BooleanType; // default: bool
+```
+
+Stored directly in the union (no heap allocation):
+
+```cpp
+json j = true;
+bool b = j.get<bool>();
+```
+
+## Number Types
+
+### Three Distinct Types
+
+The library distinguishes three numeric types:
+
+```cpp
+using number_integer_t = NumberIntegerType; // default: std::int64_t
+using number_unsigned_t = NumberUnsignedType; // default: std::uint64_t
+using number_float_t = NumberFloatType; // default: double
+```
+
+During parsing, the lexer determines the best-fit type:
+1. If the number has a decimal point or exponent → `number_float`
+2. If it fits in `int64_t` → `number_integer`
+3. If it fits in `uint64_t` → `number_unsigned`
+4. Otherwise → `number_float` (as approximation)
+
+### Cross-Type Comparisons
+
+Numbers of different types are compared correctly:
+
+```cpp
+json(1) == json(1.0) // true
+json(1) == json(1u) // true
+json(-1) < json(0u) // true (signed < unsigned via cast)
+```
+
+The comparison logic in `JSON_IMPLEMENT_OPERATOR` handles all 6 cross-type
+combinations (int×float, float×int, unsigned×float, float×unsigned,
+unsigned×int, int×unsigned).
+
+### NaN Handling
+
+`NaN` values result in unordered comparisons:
+
+```cpp
+json nan_val = std::numeric_limits<double>::quiet_NaN();
+nan_val == nan_val; // false (IEEE 754 semantics)
+```
+
+## Binary Type
+
+### Internal Representation
+
+```cpp
+using binary_t = nlohmann::byte_container_with_subtype<BinaryType>;
+// BinaryType default: std::vector<std::uint8_t>
+```
+
+`byte_container_with_subtype<BinaryType>` extends `BinaryType` with an
+optional subtype tag:
+
+```cpp
+template<typename BinaryType>
+class byte_container_with_subtype : public BinaryType
+{
+public:
+ using container_type = BinaryType;
+ using subtype_type = std::uint64_t;
+
+ void set_subtype(subtype_type subtype_) noexcept;
+ constexpr subtype_type subtype() const noexcept;
+ constexpr bool has_subtype() const noexcept;
+ void clear_subtype() noexcept;
+
+private:
+ subtype_type m_subtype = 0;
+ bool m_has_subtype = false;
+};
+```
+
+### Creating Binary Values
+
+```cpp
+// Without subtype
+json j = json::binary({0x01, 0x02, 0x03, 0x04});
+
+// With subtype
+json j = json::binary({0x01, 0x02}, 128);
+
+// Access the binary container
+json::binary_t& bin = j.get_binary();
+bin.push_back(0x05);
+assert(bin.has_subtype() == false); // depends on how it was created
+```
+
+### Subtype Significance
+
+The subtype is meaningful for binary formats:
+- **MessagePack**: maps to ext type
+- **CBOR**: maps to tag
+- **BSON**: maps to binary subtype
+
+JSON text format does not support binary data natively.
+
+## Discarded Type
+
+The `discarded` type is special — it's used only during parsing with
+callbacks to indicate that a value should be excluded from the result:
+
+```cpp
+json j = json::parse(input, [](int depth, json::parse_event_t event, json& parsed) {
+ if (event == json::parse_event_t::key && parsed == "secret") {
+ return false; // discard this key-value pair
+ }
+ return true;
+});
+```
+
+When `json::parse()` is called with `allow_exceptions=false` and parsing
+fails, the result is a discarded value:
+
+```cpp
+json j = json::parse("invalid", nullptr, false);
+assert(j.is_discarded());
+```
+
+## Type Ordering
+
+The `value_t` enumeration has a defined ordering used for cross-type
+comparisons when values can't be compared directly:
+
+```
+null < boolean < number < object < array < string < binary
+```
+
+This means:
+
+```cpp
+json(nullptr) < json(false); // true (null < boolean)
+json(42) < json::object(); // true (number < object)
+json("abc") > json::array(); // true (string > array)
+```
+
+The ordering is implemented via a lookup array in `value_t.hpp`:
+
+```cpp
+static constexpr std::array<std::uint8_t, 9> order = {{
+ 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */,
+ 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */,
+ 6 /* binary */
+}};
+```
+
+All three numeric types share order index 2.
+
+## Type Conversions
+
+### Widening Conversions
+
+```cpp
+json j = 42; // number_integer
+double d = j.get<double>(); // OK: int → double
+
+json j2 = 3.14; // number_float
+int i = j2.get<int>(); // OK: truncates to 3
+```
+
+### Container Conversions
+
+```cpp
+json arr = {1, 2, 3};
+auto v = arr.get<std::vector<int>>();
+auto l = arr.get<std::list<int>>();
+auto s = arr.get<std::set<int>>();
+
+json obj = {{"a", 1}, {"b", 2}};
+auto m = obj.get<std::map<std::string, int>>();
+auto um = obj.get<std::unordered_map<std::string, int>>();
+```
+
+### String Conversions
+
+```cpp
+json j = 42;
+std::string s = j.dump(); // "42" (serialization, not type conversion)
+// j.get<std::string>() // throws type_error::302
+
+json j = "hello";
+std::string s = j.get<std::string>(); // "hello"
+```
+
+## Constructing from `value_t`
+
+You can construct an empty value of a specific type:
+
+```cpp
+json j_null(json::value_t::null); // null
+json j_obj(json::value_t::object); // {}
+json j_arr(json::value_t::array); // []
+json j_str(json::value_t::string); // ""
+json j_bool(json::value_t::boolean); // false
+json j_int(json::value_t::number_integer); // 0
+json j_uint(json::value_t::number_unsigned); // 0
+json j_float(json::value_t::number_float); // 0.0
+json j_bin(json::value_t::binary); // binary([], no subtype)
+```
+
+## Pointer Access
+
+Low-level pointer access to the underlying value:
+
+```cpp
+json j = "hello";
+
+// Returns nullptr if type doesn't match
+const std::string* sp = j.get_ptr<const std::string*>();
+assert(sp != nullptr);
+
+const int* ip = j.get_ptr<const int*>();
+assert(ip == nullptr); // wrong type
+
+// Mutable pointer access
+std::string* sp = j.get_ptr<std::string*>();
+*sp = "world";
+assert(j == "world");
+```
+
+## Reference Access
+
+```cpp
+json j = "hello";
+
+const std::string& ref = j.get_ref<const std::string&>();
+assert(ref == "hello");
+
+// Throws type_error::303 if type doesn't match
+try {
+ const int& ref = j.get_ref<const int&>();
+} catch (json::type_error& e) {
+ // "incompatible ReferenceType for get_ref, actual type is string"
+}
+```
diff --git a/docs/handbook/libnbtplusplus/architecture.md b/docs/handbook/libnbtplusplus/architecture.md
new file mode 100644
index 0000000000..cd8da9722b
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/architecture.md
@@ -0,0 +1,607 @@
+# libnbt++ Architecture
+
+## High-Level Design
+
+libnbt++ follows a classic object-oriented design with a polymorphic tag hierarchy, augmented by C++ template metaprogramming for code reuse. The architecture has five major layers:
+
+1. **Tag Hierarchy** — Polymorphic class tree rooted at `tag`, with concrete types for each NBT tag
+2. **Value Layer** — Type-erased wrappers (`value`, `value_initializer`) for runtime tag manipulation
+3. **I/O Layer** — Stream-based readers/writers handling binary serialization and endianness
+4. **Compression Layer** — zlib stream adapters for transparent gzip/deflate support
+5. **Text Layer** — Formatters for human-readable tag output (JSON-like)
+
+---
+
+## Class Hierarchy
+
+```
+tag (abstract base, tag.h)
+└── detail::crtp_tag<Sub> (CRTP intermediate, crtp_tag.h)
+ ├── tag_primitive<int8_t> → typedef tag_byte
+ ├── tag_primitive<int16_t> → typedef tag_short
+ ├── tag_primitive<int32_t> → typedef tag_int
+ ├── tag_primitive<int64_t> → typedef tag_long
+ ├── tag_primitive<float> → typedef tag_float
+ ├── tag_primitive<double> → typedef tag_double
+ ├── tag_string
+ ├── tag_array<int8_t> → typedef tag_byte_array
+ ├── tag_array<int32_t> → typedef tag_int_array
+ ├── tag_array<int64_t> → typedef tag_long_array
+ ├── tag_list
+ └── tag_compound
+```
+
+All concrete tag classes are declared `final` — they cannot be further subclassed. The hierarchy uses exactly two levels of inheritance: `tag` → `crtp_tag<Sub>` → concrete class.
+
+---
+
+## The CRTP Pattern
+
+The Curiously Recurring Template Pattern (CRTP) is central to libnbt++'s design. The intermediate class `detail::crtp_tag<Sub>` (defined in `include/crtp_tag.h`) automatically implements all the `tag` virtual methods that can be expressed generically:
+
+```cpp
+namespace nbt {
+namespace detail {
+
+template <class Sub> class crtp_tag : public tag
+{
+public:
+ virtual ~crtp_tag() noexcept = 0; // Pure virtual to keep it abstract
+
+ tag_type get_type() const noexcept override final {
+ return Sub::type; // Each Sub has a static constexpr tag_type type
+ };
+
+ std::unique_ptr<tag> clone() const& override final {
+ return make_unique<Sub>(sub_this()); // Copy-constructs Sub
+ }
+
+ std::unique_ptr<tag> move_clone() && override final {
+ return make_unique<Sub>(std::move(sub_this())); // Move-constructs Sub
+ }
+
+ tag& assign(tag&& rhs) override final {
+ return sub_this() = dynamic_cast<Sub&&>(rhs);
+ // Throws std::bad_cast if rhs is not the same Sub type
+ }
+
+ void accept(nbt_visitor& visitor) override final {
+ visitor.visit(sub_this()); // Double dispatch
+ }
+
+ void accept(const_nbt_visitor& visitor) const override final {
+ visitor.visit(sub_this());
+ }
+
+private:
+ bool equals(const tag& rhs) const override final {
+ return sub_this() == static_cast<const Sub&>(rhs);
+ }
+
+ Sub& sub_this() { return static_cast<Sub&>(*this); }
+ const Sub& sub_this() const { return static_cast<const Sub&>(*this); }
+};
+
+template <class Sub> crtp_tag<Sub>::~crtp_tag() noexcept {}
+
+} // namespace detail
+} // namespace nbt
+```
+
+### What the CRTP Provides
+
+Each concrete tag class inherits from `crtp_tag<Self>` and automatically gets:
+
+| Method | Behavior |
+|------------------|---------------------------------------------------------|
+| `get_type()` | Returns `Sub::type` (the static `tag_type` constant) |
+| `clone()` | Copy-constructs a new `Sub` via `make_unique<Sub>` |
+| `move_clone()` | Move-constructs a new `Sub` |
+| `assign(tag&&)` | Dynamic casts to `Sub&&` and uses `Sub::operator=` |
+| `accept()` | Calls `visitor.visit(sub_this())` — double dispatch |
+| `equals()` | Uses `Sub::operator==` |
+
+The concrete class only needs to provide:
+
+1. A `static constexpr tag_type type` member
+2. Copy and move constructors/assignment operators
+3. `operator==` and `operator!=`
+4. `read_payload(io::stream_reader&)` and `write_payload(io::stream_writer&) const`
+
+---
+
+## The tag Base Class
+
+The `tag` base class (defined in `include/tag.h`) establishes the interface for all NBT tags:
+
+```cpp
+class NBT_EXPORT tag
+{
+public:
+ virtual ~tag() noexcept {}
+
+ virtual tag_type get_type() const noexcept = 0;
+
+ virtual std::unique_ptr<tag> clone() const& = 0;
+ virtual std::unique_ptr<tag> move_clone() && = 0;
+ std::unique_ptr<tag> clone() &&; // Delegates to move_clone
+
+ template <class T> T& as();
+ template <class T> const T& as() const;
+
+ virtual tag& assign(tag&& rhs) = 0;
+
+ virtual void accept(nbt_visitor& visitor) = 0;
+ virtual void accept(const_nbt_visitor& visitor) const = 0;
+
+ virtual void read_payload(io::stream_reader& reader) = 0;
+ virtual void write_payload(io::stream_writer& writer) const = 0;
+
+ static std::unique_ptr<tag> create(tag_type type);
+ static std::unique_ptr<tag> create(tag_type type, int8_t val);
+ static std::unique_ptr<tag> create(tag_type type, int16_t val);
+ static std::unique_ptr<tag> create(tag_type type, int32_t val);
+ static std::unique_ptr<tag> create(tag_type type, int64_t val);
+ static std::unique_ptr<tag> create(tag_type type, float val);
+ static std::unique_ptr<tag> create(tag_type type, double val);
+
+ friend NBT_EXPORT bool operator==(const tag& lhs, const tag& rhs);
+ friend NBT_EXPORT bool operator!=(const tag& lhs, const tag& rhs);
+
+private:
+ virtual bool equals(const tag& rhs) const = 0;
+};
+```
+
+### Key Design Choices
+
+1. **`clone()` is ref-qualified**: `const&` for copy-cloning, `&&` for move-cloning. The rvalue `clone()` delegates to `move_clone()`.
+
+2. **`as<T>()` uses `dynamic_cast`**: Provides safe downcasting with `std::bad_cast` on failure.
+
+3. **`operator==` uses RTTI**: The free `operator==` first checks `typeid(lhs) == typeid(rhs)`, then delegates to the virtual `equals()` method.
+
+4. **Factory methods**: `tag::create()` constructs tags by `tag_type` at runtime, supporting both default construction and numeric initialization.
+
+---
+
+## Ownership Model
+
+libnbt++ uses a strict ownership model based on `std::unique_ptr<tag>`:
+
+### Where Ownership Lives
+
+- **`value`** — Owns a single tag via `std::unique_ptr<tag> tag_`
+- **`tag_compound`** — Owns values in a `std::map<std::string, value>`
+- **`tag_list`** — Owns values in a `std::vector<value>`
+
+### Ownership Rules
+
+1. **Single owner**: Every tag has exactly one owner. No shared ownership.
+2. **Deep copying**: `clone()` performs a full deep copy of the entire tag tree.
+3. **Move semantics**: Tags can be efficiently moved between owners without copying.
+4. **No raw pointers for ownership**: The library never uses raw `new`/`delete` for tag management.
+
+### The value Class
+
+The `value` class (`include/value.h`) is the primary type-erasure mechanism. It wraps `std::unique_ptr<tag>` and provides:
+
+```cpp
+class NBT_EXPORT value
+{
+public:
+ value() noexcept {} // Empty/null value
+ explicit value(std::unique_ptr<tag>&& t) noexcept; // Takes ownership
+ explicit value(tag&& t); // Clones the tag
+
+ // Move only (no implicit copy)
+ value(value&&) noexcept = default;
+ value& operator=(value&&) noexcept = default;
+
+ // Explicit copy
+ explicit value(const value& rhs);
+ value& operator=(const value& rhs);
+
+ // Type conversion
+ operator tag&();
+ operator const tag&() const;
+ tag& get();
+ const tag& get() const;
+ template <class T> T& as();
+ template <class T> const T& as() const;
+
+ // Numeric assignments (existing tag gets updated, or new one created)
+ value& operator=(int8_t val);
+ value& operator=(int16_t val);
+ value& operator=(int32_t val);
+ value& operator=(int64_t val);
+ value& operator=(float val);
+ value& operator=(double val);
+
+ // String assignment
+ value& operator=(const std::string& str);
+ value& operator=(std::string&& str);
+
+ // Numeric conversions (widening only)
+ explicit operator int8_t() const;
+ explicit operator int16_t() const;
+ explicit operator int32_t() const;
+ explicit operator int64_t() const;
+ explicit operator float() const;
+ explicit operator double() const;
+ explicit operator const std::string&() const;
+
+ // Compound access delegation
+ value& at(const std::string& key);
+ value& operator[](const std::string& key);
+ value& operator[](const char* key);
+
+ // List access delegation
+ value& at(size_t i);
+ value& operator[](size_t i);
+
+ // Null check
+ explicit operator bool() const { return tag_ != nullptr; }
+
+ // Direct pointer access
+ std::unique_ptr<tag>& get_ptr();
+ void set_ptr(std::unique_ptr<tag>&& t);
+ tag_type get_type() const;
+
+private:
+ std::unique_ptr<tag> tag_;
+};
+```
+
+### The value_initializer Class
+
+`value_initializer` (`include/value_initializer.h`) extends `value` with **implicit** constructors. It is used as a parameter type in functions like `tag_compound::put()` and `tag_list::push_back()`:
+
+```cpp
+class NBT_EXPORT value_initializer : public value
+{
+public:
+ value_initializer(std::unique_ptr<tag>&& t) noexcept;
+ value_initializer(std::nullptr_t) noexcept;
+ value_initializer(value&& val) noexcept;
+ value_initializer(tag&& t);
+
+ value_initializer(int8_t val); // Creates tag_byte
+ value_initializer(int16_t val); // Creates tag_short
+ value_initializer(int32_t val); // Creates tag_int
+ value_initializer(int64_t val); // Creates tag_long
+ value_initializer(float val); // Creates tag_float
+ value_initializer(double val); // Creates tag_double
+ value_initializer(const std::string& str); // Creates tag_string
+ value_initializer(std::string&& str); // Creates tag_string
+ value_initializer(const char* str); // Creates tag_string
+};
+```
+
+This is why you can write `compound.put("key", 42)` — the `42` (an `int`) implicitly converts to `value_initializer(int32_t(42))`, which constructs a `tag_int(42)` inside a `value`.
+
+### Why value vs value_initializer?
+
+The separation exists because implicit conversions on `value` itself would cause ambiguity problems. For example, if `value` had an implicit constructor from `tag&&`, then expressions involving compound assignment could be ambiguous. By limiting implicit conversions to `value_initializer` (used only as function parameters), the library avoids these issues.
+
+---
+
+## Template Design
+
+### tag_primitive<T>
+
+Six NBT types share the same structure: a single numeric value. The `tag_primitive<T>` template (`include/tag_primitive.h`) handles all of them:
+
+```cpp
+template <class T>
+class tag_primitive final : public detail::crtp_tag<tag_primitive<T>>
+{
+public:
+ typedef T value_type;
+ static constexpr tag_type type = detail::get_primitive_type<T>::value;
+
+ constexpr tag_primitive(T val = 0) noexcept : value(val) {}
+
+ operator T&();
+ constexpr operator T() const;
+ constexpr T get() const { return value; }
+
+ tag_primitive& operator=(T val);
+ void set(T val);
+
+ void read_payload(io::stream_reader& reader) override;
+ void write_payload(io::stream_writer& writer) const override;
+
+private:
+ T value;
+};
+```
+
+The type mapping uses `detail::get_primitive_type<T>` (`include/primitive_detail.h`):
+
+```cpp
+template <> struct get_primitive_type<int8_t> : std::integral_constant<tag_type, tag_type::Byte> {};
+template <> struct get_primitive_type<int16_t> : std::integral_constant<tag_type, tag_type::Short> {};
+template <> struct get_primitive_type<int32_t> : std::integral_constant<tag_type, tag_type::Int> {};
+template <> struct get_primitive_type<int64_t> : std::integral_constant<tag_type, tag_type::Long> {};
+template <> struct get_primitive_type<float> : std::integral_constant<tag_type, tag_type::Float> {};
+template <> struct get_primitive_type<double> : std::integral_constant<tag_type, tag_type::Double> {};
+```
+
+**Explicit instantiation**: Template instantiations are declared `extern template class NBT_EXPORT tag_primitive<...>` in the header and explicitly instantiated in `src/tag.cpp`. This prevents duplicate template instantiations across translation units.
+
+### tag_array<T>
+
+Three NBT array types share the same vector-based structure. The `tag_array<T>` template (`include/tag_array.h`) handles all of them:
+
+```cpp
+template <class T>
+class tag_array final : public detail::crtp_tag<tag_array<T>>
+{
+public:
+ typedef typename std::vector<T>::iterator iterator;
+ typedef typename std::vector<T>::const_iterator const_iterator;
+ typedef T value_type;
+ static constexpr tag_type type = detail::get_array_type<T>::value;
+
+ tag_array() {}
+ tag_array(std::initializer_list<T> init);
+ tag_array(std::vector<T>&& vec) noexcept;
+
+ std::vector<T>& get();
+ T& at(size_t i);
+ T& operator[](size_t i);
+ void push_back(T val);
+ void pop_back();
+ size_t size() const;
+ void clear();
+
+ iterator begin(); iterator end();
+ const_iterator begin() const; const_iterator end() const;
+
+ void read_payload(io::stream_reader& reader) override;
+ void write_payload(io::stream_writer& writer) const override;
+
+private:
+ std::vector<T> data;
+};
+```
+
+The type mapping uses `detail::get_array_type<T>`:
+
+```cpp
+template <> struct get_array_type<int8_t> : std::integral_constant<tag_type, tag_type::Byte_Array> {};
+template <> struct get_array_type<int32_t> : std::integral_constant<tag_type, tag_type::Int_Array> {};
+template <> struct get_array_type<int64_t> : std::integral_constant<tag_type, tag_type::Long_Array> {};
+```
+
+**Specialized I/O**: `read_payload` and `write_payload` have explicit template specializations for `int8_t` (byte arrays can be read/written as raw byte blocks) and `int64_t` (long arrays read element-by-element with `read_num`). The generic template handles `int32_t` arrays.
+
+---
+
+## File Roles Breakdown
+
+### Core Headers
+
+| File | Role |
+|------|------|
+| `include/tag.h` | Defines the `tag` abstract base class, the `tag_type` enum (End through Long_Array, plus Null), `is_valid_type()`, the `create()` factory methods, `operator==`/`!=`, and `operator<<`. Also forward-declares `nbt_visitor`, `const_nbt_visitor`, `io::stream_reader`, and `io::stream_writer`. |
+| `include/tagfwd.h` | Forward declarations only. Declares `tag`, `tag_primitive<T>` with all six typedefs, `tag_string`, `tag_array<T>` with all three typedefs, `tag_list`, and `tag_compound`. Used by headers that need type names without full definitions. |
+| `include/nbt_tags.h` | Convenience umbrella header. Simply includes `tag_primitive.h`, `tag_string.h`, `tag_array.h`, `tag_list.h`, and `tag_compound.h`. |
+| `include/crtp_tag.h` | Defines `detail::crtp_tag<Sub>`, the CRTP intermediate class. Includes `tag.h`, `nbt_visitor.h`, and `make_unique.h`. |
+| `include/primitive_detail.h` | Defines `detail::get_primitive_type<T>`, mapping C++ types to `tag_type` values. Uses `std::integral_constant` for compile-time constants. |
+| `include/make_unique.h` | Provides `nbt::make_unique<T>(args...)`, a C++11 polyfill for `std::make_unique` (which was only added in C++14). |
+
+### Tag Implementation Headers
+
+| File | Role |
+|------|------|
+| `include/tag_primitive.h` | Full definition of `tag_primitive<T>` including inline `read_payload`/`write_payload`. The six typedefs (`tag_byte` through `tag_double`) are declared here, along with `extern template` declarations for link-time optimization. |
+| `include/tag_string.h` | Full definition of `tag_string`. Wraps `std::string` with constructors from `const std::string&`, `std::string&&`, and `const char*`. Provides implicit conversion operators to `std::string&` and `const std::string&`. |
+| `include/tag_array.h` | Full definition of `tag_array<T>` with specialized `read_payload`/`write_payload` for `int8_t`, `int64_t`, and the generic case. The three typedefs (`tag_byte_array`, `tag_int_array`, `tag_long_array`) are at the bottom. |
+| `include/tag_list.h` | Full definition of `tag_list`. Stores `std::vector<value>` with a tracked `el_type_` (element type). Provides `of<T>()` static factory, `push_back(value_initializer&&)`, `emplace_back<T, Args...>()`, `set()`, iterators, and I/O methods. |
+| `include/tag_compound.h` | Full definition of `tag_compound`. Stores `std::map<std::string, value>`. Provides `at()`, `operator[]`, `put()`, `insert()`, `emplace<T>()`, `erase()`, `has_key()`, iterators, and I/O methods. |
+
+### Value Layer
+
+| File | Role |
+|------|------|
+| `include/value.h` | Type-erased `value` class wrapping `std::unique_ptr<tag>`. Provides numeric/string assignment operators, conversion operators (with widening semantics), compound/list access delegation via `operator[]`. |
+| `include/value_initializer.h` | `value_initializer` subclass of `value` with implicit constructors from primitive types, strings, tags, and `nullptr`. Used as function parameter type. |
+
+### I/O Headers
+
+| File | Role |
+|------|------|
+| `include/endian_str.h` | The `endian` namespace. Declares `read_little`/`read_big`/`write_little`/`write_big` overloads for all integer and floating-point types. Template functions `read()`/`write()` dispatch based on an `endian::endian` enum. |
+| `include/io/stream_reader.h` | `io::stream_reader` class and `io::input_error` exception. Free functions `read_compound()` and `read_tag()`. The reader tracks nesting depth (max 1024) to prevent stack overflow attacks. |
+| `include/io/stream_writer.h` | `io::stream_writer` class. Free function `write_tag()`. Defines `max_string_len` (UINT16_MAX) and `max_array_len` (INT32_MAX) constants. |
+
+### Compression Headers
+
+| File | Role |
+|------|------|
+| `include/io/zlib_streambuf.h` | Base class `zlib::zlib_streambuf` extending `std::streambuf`. Contains input/output buffers (`std::vector<char>`) and a `z_stream` struct. Also defines `zlib::zlib_error` exception. |
+| `include/io/izlibstream.h` | `zlib::inflate_streambuf` and `zlib::izlibstream`. Decompresses data read from a wrapped `std::istream`. Auto-detects gzip vs zlib format via `window_bits = 32 + 15`. |
+| `include/io/ozlibstream.h` | `zlib::deflate_streambuf` and `zlib::ozlibstream`. Compresses data written to a wrapped `std::ostream`. Supports configurable compression level and gzip vs zlib output. |
+
+### Text Headers
+
+| File | Role |
+|------|------|
+| `include/text/json_formatter.h` | `text::json_formatter` class with a single `print()` method. |
+
+### Visitor
+
+| File | Role |
+|------|------|
+| `include/nbt_visitor.h` | `nbt_visitor` and `const_nbt_visitor` abstract base classes with 12 `visit()` overloads each (one per concrete tag type). All overloads have default empty implementations, allowing visitors to override only the types they care about. |
+
+---
+
+## Source File Roles
+
+### Core Sources
+
+| File | Role |
+|------|------|
+| `src/tag.cpp` | Contains the explicit template instantiation definitions for all six `tag_primitive<T>` specializations. Implements `tag::create()` factory methods (both default and numeric), `operator==`/`!=` (using `typeid` comparison), `operator<<` (delegating to `json_formatter`), and the `tag_type` output operator. Also contains a `static_assert` ensuring IEEE 754 floating point. |
+| `src/tag_compound.cpp` | Implements `tag_compound`'s constructor from initializer list, `at()`, `put()`, `insert()`, `erase()`, `has_key()`, `read_payload()` (reads until `tag_type::End`), and `write_payload()` (writes each entry then `tag_type::End`). |
+| `src/tag_list.cpp` | Implements `tag_list`'s 12 initializer list constructors (one per tag type), the `value` initializer list constructor, `at()`, `set()`, `push_back()`, `reset()`, `read_payload()`, `write_payload()`, and `operator==`/`!=`. |
+| `src/tag_string.cpp` | Implements `tag_string::read_payload()` (delegates to `reader.read_string()`) and `write_payload()` (delegates to `writer.write_string()`). |
+| `src/value.cpp` | Implements `value`'s copy constructor, copy assignment, `set()`, all numeric assignment operators (using `assign_numeric_impl` helper), all numeric conversion operators (widening conversions via switch/case), string operations, and compound/list delegation methods. |
+| `src/value_initializer.cpp` | Implements `value_initializer`'s constructors for each primitive type and string variants. Each constructs the appropriate tag and passes it to the `value` base. |
+
+### I/O Sources
+
+| File | Role |
+|------|------|
+| `src/endian_str.cpp` | Implements all `read_little`/`read_big`/`write_little`/`write_big` overloads. Uses byte-by-byte construction for portability (no reliance on host endianness). Float/double conversion uses `memcpy`-based type punning. Includes `static_assert` checks for `CHAR_BIT == 8`, `sizeof(float) == 4`, `sizeof(double) == 8`. |
+| `src/io/stream_reader.cpp` | Implements `stream_reader::read_compound()`, `read_tag()`, `read_payload()`, `read_type()`, and `read_string()`. The `read_payload()` method tracks nesting depth with a max of `MAX_DEPTH = 1024` to prevent stack overflow from malicious input. Free functions `read_compound()` and `read_tag()` are thin wrappers. |
+| `src/io/stream_writer.cpp` | Implements `stream_writer::write_tag()` (writes type + name + payload) and `write_string()` (writes 2-byte length prefix + UTF-8 data, throws `std::length_error` if string exceeds 65535 bytes). Free function `write_tag()` is a thin wrapper. |
+
+### Compression Sources
+
+| File | Role |
+|------|------|
+| `src/io/izlibstream.cpp` | Implements `inflate_streambuf`: constructor calls `inflateInit2()`, destructor calls `inflateEnd()`. The `underflow()` method reads compressed data from the wrapped stream, calls `inflate()`, and handles `Z_STREAM_END` by seeking back the wrapped stream to account for over-read data. |
+| `src/io/ozlibstream.cpp` | Implements `deflate_streambuf`: constructor calls `deflateInit2()`, destructor calls `close()` then `deflateEnd()`. The `deflate_chunk()` method compresses buffered data and writes to the output stream. `close()` flushes with `Z_FINISH`. `ozlibstream::close()` handles exceptions gracefully by setting `badbit` instead of re-throwing. |
+
+### Text Sources
+
+| File | Role |
+|------|------|
+| `src/text/json_formatter.cpp` | Implements `json_formatter::print()` using a private `json_fmt_visitor` class (extends `const_nbt_visitor`). The visitor handles indentation, JSON-like output for compounds (`{}`), lists (`[]`), quoted strings, numeric suffixes (`b`, `s`, `l`, `f`, `d`), and special float values (Infinity, NaN). |
+
+---
+
+## Data Flow: Reading NBT
+
+```
+Input Stream
+ │
+ ▼
+[izlibstream] ← optional decompression
+ │
+ ▼
+stream_reader
+ ├── read_type() → reads 1 byte, validates tag type
+ ├── read_string() → reads 2-byte length + UTF-8 name
+ └── read_payload(type) → tag::create(type), then tag->read_payload(*this)
+ │
+ ├── tag_primitive<T>::read_payload() → reader.read_num(value)
+ ├── tag_string::read_payload() → reader.read_string()
+ ├── tag_array<T>::read_payload() → reader.read_num(length), then read elements
+ ├── tag_list::read_payload() → read element type, length, then element payloads
+ └── tag_compound::read_payload() → loop: read_type(), read_string(), read_payload() until End
+```
+
+## Data Flow: Writing NBT
+
+```
+tag_compound (root)
+ │
+ ▼
+stream_writer::write_tag(key, tag)
+ ├── write_type(tag.get_type()) → 1 byte
+ ├── write_string(key) → 2-byte length + UTF-8
+ └── write_payload(tag) → tag.write_payload(*this)
+ │
+ ├── tag_primitive<T>::write_payload() → writer.write_num(value)
+ ├── tag_string::write_payload() → writer.write_string(value)
+ ├── tag_array<T>::write_payload() → write length, then elements
+ ├── tag_list::write_payload() → write type + length + element payloads
+ └── tag_compound::write_payload() → for each entry: write_tag(key, value); write_type(End)
+ │
+ ▼
+[ozlibstream] ← optional compression
+ │
+ ▼
+Output Stream
+```
+
+---
+
+## Depth Protection
+
+The `stream_reader` maintains a `depth` counter (private `int depth = 0`) that increments on each recursive `read_payload()` call and decrements on return. If `depth` exceeds `MAX_DEPTH` (1024), an `input_error` is thrown. This prevents stack overflow from deeply nested structures in malicious NBT files.
+
+```cpp
+std::unique_ptr<tag> stream_reader::read_payload(tag_type type)
+{
+ if (++depth > MAX_DEPTH)
+ throw input_error("Too deeply nested");
+ std::unique_ptr<tag> t = tag::create(type);
+ t->read_payload(*this);
+ --depth;
+ return t;
+}
+```
+
+---
+
+## Export Macros
+
+The library uses `generate_export_header()` from CMake to create `nbt_export.h` at build time. The `NBT_EXPORT` macro is applied to all public classes and functions. When building shared libraries (`NBT_BUILD_SHARED=ON`), symbols default to hidden (`CXX_VISIBILITY_PRESET hidden`) and only `NBT_EXPORT`-marked symbols are exported. For static builds, `NBT_EXPORT` expands to nothing.
+
+---
+
+## Numeric Widening Rules in value
+
+The `value` class implements a strict widening hierarchy for numeric conversions:
+
+**Assignment (write) direction** — A value can be assigned a narrower type:
+
+```
+value holding tag_short can accept int8_t (narrower OK)
+value holding tag_short rejects int32_t (wider → std::bad_cast)
+```
+
+**Conversion (read) direction** — A value can be read as a wider type:
+
+```
+value holding tag_byte can be read as int8_t, int16_t, int32_t, int64_t, float, double
+value holding tag_short can be read as int16_t, int32_t, int64_t, float, double
+value holding tag_int can be read as int32_t, int64_t, float, double
+value holding tag_long can be read as int64_t, float, double
+value holding tag_float can be read as float, double
+value holding tag_double can be read as double only
+```
+
+The implementation in `src/value.cpp` uses an `assign_numeric_impl` helper template with a switch-case dispatching on the existing tag type, comparing ordinal positions in the `tag_type` enum.
+
+---
+
+## Dependency Graph
+
+```
+nbt_tags.h ──┬── tag_primitive.h → crtp_tag.h → tag.h, nbt_visitor.h, make_unique.h
+ │ → primitive_detail.h
+ │ → io/stream_reader.h → endian_str.h, tag.h, tag_compound.h
+ │ → io/stream_writer.h → endian_str.h, tag.h
+ ├── tag_string.h ──→ crtp_tag.h
+ ├── tag_array.h ───→ crtp_tag.h, io/stream_reader.h, io/stream_writer.h
+ ├── tag_list.h ────→ crtp_tag.h, tagfwd.h, value_initializer.h → value.h → tag.h
+ └── tag_compound.h → crtp_tag.h, value_initializer.h
+
+io/izlibstream.h → io/zlib_streambuf.h, <zlib.h>
+io/ozlibstream.h → io/zlib_streambuf.h, <zlib.h>
+text/json_formatter.h → tagfwd.h
+```
+
+---
+
+## Thread Safety Implications
+
+The architecture has no global mutable state. The `json_formatter` used by `operator<<` is a `static const` local in `tag.cpp`:
+
+```cpp
+std::ostream& operator<<(std::ostream& os, const tag& t)
+{
+ static const text::json_formatter formatter;
+ formatter.print(os, t);
+ return os;
+}
+```
+
+Since `formatter` is const and `print()` is const, multiple threads can safely use `operator<<` concurrently. Tag objects themselves are not thread-safe for concurrent mutation.
diff --git a/docs/handbook/libnbtplusplus/building.md b/docs/handbook/libnbtplusplus/building.md
new file mode 100644
index 0000000000..61bd5a57a3
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/building.md
@@ -0,0 +1,401 @@
+# Building libnbt++
+
+## Build System
+
+libnbt++ uses **CMake** (minimum version 3.15) as its build system. The root `CMakeLists.txt` defines the project, its options, source files, dependencies, and installation rules.
+
+---
+
+## Prerequisites
+
+### Required
+
+- **C++11 compatible compiler**: GCC 4.8+, Clang 3.3+, or MSVC 2015+
+- **CMake**: Version 3.15 or later
+
+### Optional
+
+- **zlib**: Required for compressed NBT support (gzip/deflate). Enabled by default.
+- **CxxTest**: Required for building and running unit tests. Must be discoverable by CMake's `find_package(CxxTest)`.
+- **objcopy**: Required for test data embedding on Linux (binary test files are converted to object files via `objcopy`).
+
+---
+
+## CMake Options
+
+The following options are available when configuring the project:
+
+| Option | Default | Description |
+|-------------------|---------|----------------------------------------------------------|
+| `NBT_BUILD_SHARED` | `OFF` | Build as a shared (dynamic) library instead of static |
+| `NBT_USE_ZLIB` | `ON` | Enable zlib compression support |
+| `NBT_BUILD_TESTS` | `ON` | Build the unit test executables |
+| `NBT_NAME` | `nbt++` | Override the output library name |
+| `NBT_DEST_DIR` | (unset) | If set, enables install target with specified destination|
+
+### Option Details
+
+#### NBT_BUILD_SHARED
+
+When `NBT_BUILD_SHARED=OFF` (default), a static library (`libnbt++.a` or `nbt++.lib`) is produced.
+
+When `NBT_BUILD_SHARED=ON`, a shared library is produced. In this case, CMake is configured to:
+- Set `CXX_VISIBILITY_PRESET` to `hidden`
+- Set `VISIBILITY_INLINES_HIDDEN` to `1`
+- Use the `NBT_EXPORT` macro (generated by `generate_export_header()`) to control symbol visibility
+
+This means only classes and functions explicitly marked with `NBT_EXPORT` are exported from the shared library.
+
+#### NBT_USE_ZLIB
+
+When enabled (default), the build:
+1. Calls `find_package(ZLIB REQUIRED)` to locate the system zlib
+2. Adds the zlib source files to the library: `src/io/izlibstream.cpp` and `src/io/ozlibstream.cpp`
+3. Defines the preprocessor macro `NBT_HAVE_ZLIB`
+4. Links the library against `ZLIB::ZLIB`
+
+The zlib headers (`include/io/izlibstream.h`, `include/io/ozlibstream.h`, `include/io/zlib_streambuf.h`) include `<zlib.h>` directly. If zlib is not available, these headers cannot be included.
+
+#### NBT_NAME
+
+Allows overriding the library target name. By default, the target is called `nbt++`, producing `libnbt++.a`. Setting `NBT_NAME=mynbt` would produce `libmynbt.a`:
+
+```cmake
+cmake -DNBT_NAME=mynbt ..
+```
+
+---
+
+## Source Files
+
+### Core Library Sources
+
+The `NBT_SOURCES` variable lists all non-zlib source files:
+
+```cmake
+set(NBT_SOURCES
+ src/endian_str.cpp
+ src/tag.cpp
+ src/tag_compound.cpp
+ src/tag_list.cpp
+ src/tag_string.cpp
+ src/value.cpp
+ src/value_initializer.cpp
+ src/io/stream_reader.cpp
+ src/io/stream_writer.cpp
+ src/text/json_formatter.cpp)
+```
+
+### Zlib Sources (Conditional)
+
+Only added when `NBT_USE_ZLIB=ON`:
+
+```cmake
+set(NBT_SOURCES_Z
+ src/io/izlibstream.cpp
+ src/io/ozlibstream.cpp)
+```
+
+---
+
+## Building Step by Step
+
+### 1. Clone and Navigate
+
+```bash
+git clone https://github.com/Project-Tick/Project-Tick.git
+cd Project-Tick/libnbtplusplus/
+```
+
+### 2. Create Build Directory
+
+```bash
+mkdir build
+cd build
+```
+
+### 3. Configure
+
+#### Default (static library, with zlib, with tests):
+
+```bash
+cmake ..
+```
+
+#### Static library, no zlib, no tests:
+
+```bash
+cmake -DNBT_USE_ZLIB=OFF -DNBT_BUILD_TESTS=OFF ..
+```
+
+#### Shared library:
+
+```bash
+cmake -DNBT_BUILD_SHARED=ON ..
+```
+
+#### Custom library name:
+
+```bash
+cmake -DNBT_NAME=nbtpp ..
+```
+
+#### Specify a different compiler:
+
+```bash
+cmake -DCMAKE_CXX_COMPILER=clang++ ..
+```
+
+#### With install destination:
+
+```bash
+cmake -DNBT_DEST_DIR=/usr/local/lib ..
+```
+
+### 4. Build
+
+```bash
+cmake --build .
+```
+
+Or with make directly:
+
+```bash
+make -j$(nproc)
+```
+
+### 5. Run Tests (if enabled)
+
+```bash
+ctest --output-on-failure
+```
+
+### 6. Install (optional)
+
+Only works if `NBT_DEST_DIR` was set:
+
+```bash
+cmake --install .
+```
+
+---
+
+## Integration into Other Projects
+
+### As a CMake Subdirectory
+
+The most common integration method is adding libnbt++ as a subdirectory in your project:
+
+```cmake
+# In your project's CMakeLists.txt
+
+# Optional: disable tests for the dependency
+set(NBT_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+
+add_subdirectory(libnbtplusplus)
+
+add_executable(myapp main.cpp)
+target_link_libraries(myapp nbt++)
+```
+
+The `target_include_directories` in libnbt++'s CMakeLists already uses `PUBLIC`, so include paths propagate automatically:
+
+```cmake
+target_include_directories(${NBT_NAME} PUBLIC include ${CMAKE_CURRENT_BINARY_DIR})
+```
+
+The `${CMAKE_CURRENT_BINARY_DIR}` is included because `generate_export_header()` creates `nbt_export.h` in the build directory.
+
+### Include Paths
+
+After linking against the `nbt++` target, your code can include:
+
+```cpp
+#include <nbt_tags.h> // All tag types
+#include <io/stream_reader.h> // Reading
+#include <io/stream_writer.h> // Writing
+#include <io/izlibstream.h> // Decompression (if NBT_USE_ZLIB)
+#include <io/ozlibstream.h> // Compression (if NBT_USE_ZLIB)
+```
+
+### Manually (without CMake)
+
+If not using CMake, you need to:
+
+1. Add `libnbtplusplus/include/` to your include path
+2. Compile all `.cpp` files in `src/` (and `src/io/`, `src/text/`)
+3. If using zlib: add `-DNBT_HAVE_ZLIB`, link against `-lz`
+4. Create your own `nbt_export.h` or define `NBT_EXPORT` as empty:
+
+```cpp
+// nbt_export.h — manual version for static builds
+#ifndef NBT_EXPORT_H
+#define NBT_EXPORT_H
+#define NBT_EXPORT
+#endif
+```
+
+5. Set C++ standard to C++11 or later: `-std=c++11`
+
+---
+
+## The nbt_export.h Header
+
+This header is **auto-generated** by CMake's `generate_export_header()` command at configure time. It is placed in `${CMAKE_CURRENT_BINARY_DIR}` and defines:
+
+- `NBT_EXPORT` — marks symbols for export from shared libraries
+- `NBT_NO_EXPORT` — marks symbols as hidden
+
+For static builds, `NBT_EXPORT` typically expands to nothing. For shared builds, it maps to compiler-specific visibility attributes:
+
+```cpp
+// Example generated content (GCC/Clang)
+#define NBT_EXPORT __attribute__((visibility("default")))
+#define NBT_NO_EXPORT __attribute__((visibility("hidden")))
+```
+
+The binary directory is added to include paths so all source files can `#include "nbt_export.h"`.
+
+---
+
+## C++ Standard
+
+The library enforces C++11 via:
+
+```cmake
+set_property(TARGET ${NBT_NAME} PROPERTY CXX_STANDARD 11)
+```
+
+This does not set `CXX_STANDARD_REQUIRED`, so CMake may use a higher standard if the compiler defaults to one. The code is compatible with C++11 through C++20+.
+
+---
+
+## Compile-Time Assertions
+
+The library includes several `static_assert` checks to ensure platform compatibility:
+
+In `src/tag.cpp`:
+```cpp
+static_assert(
+ std::numeric_limits<float>::is_iec559 &&
+ std::numeric_limits<double>::is_iec559,
+ "The floating point values for NBT must conform to IEC 559/IEEE 754");
+```
+
+In `src/endian_str.cpp`:
+```cpp
+static_assert(CHAR_BIT == 8, "Assuming that a byte has 8 bits");
+static_assert(sizeof(float) == 4, "Assuming that a float is 4 byte long");
+static_assert(sizeof(double) == 8, "Assuming that a double is 8 byte long");
+```
+
+These ensure that the platform's floating-point representation matches the NBT format's IEEE 754 requirement.
+
+---
+
+## Platform-Specific Notes
+
+### Linux
+
+Tests are only supported on `x86_64` and `i686` architectures due to the use of `objcopy` for binary test data embedding:
+
+```cmake
+if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL amd64)
+ set(OBJCOPY_TARGET "elf64-x86-64")
+ set(OBJCOPY_ARCH "x86_64")
+elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL i686)
+ set(OBJCOPY_TARGET "elf32-i386")
+ set(OBJCOPY_ARCH "i386")
+else()
+ message(AUTHOR_WARNING "This is not a platform that would support testing nbt++")
+ return()
+endif()
+```
+
+### macOS / Windows
+
+The core library compiles on any platform with a C++11 compiler and optionally zlib. However, the test suite uses Linux-specific `objcopy` commands and may not build on non-Linux platforms without modifications.
+
+---
+
+## Shared Library Visibility
+
+When building as a shared library (`NBT_BUILD_SHARED=ON`), the CMake configuration applies strict visibility rules:
+
+```cmake
+if(${BUILD_SHARED_LIBS})
+ set_target_properties(${NBT_NAME} PROPERTIES
+ CXX_VISIBILITY_PRESET hidden
+ VISIBILITY_INLINES_HIDDEN 1)
+endif()
+```
+
+This means:
+- All symbols are hidden by default
+- Inline functions are also hidden
+- Only symbols marked `NBT_EXPORT` are exported
+
+This reduces binary size and prevents symbol collision when multiple libraries are loaded.
+
+---
+
+## Typical Build Output
+
+After a successful build with all options enabled, you will have:
+
+```
+build/
+├── libnbt++.a # The static library (or libnbt++.so for shared)
+├── nbt_export.h # Generated export header
+└── test/
+ ├── nbttest # Core tag tests
+ ├── endian_str_test # Endianness tests
+ ├── read_test # Reading tests
+ ├── write_test # Writing tests
+ ├── zlibstream_test # Compression tests (if NBT_USE_ZLIB)
+ ├── format_test # JSON formatter test
+ └── test_value # Value assignment tests
+```
+
+---
+
+## Troubleshooting
+
+### "Could not find ZLIB"
+
+Install the zlib development package:
+
+```bash
+# Debian/Ubuntu
+sudo apt install zlib1g-dev
+
+# Fedora
+sudo dnf install zlib-devel
+
+# macOS
+brew install zlib
+```
+
+Or disable zlib: `cmake -DNBT_USE_ZLIB=OFF ..`
+
+### "Could not find CxxTest"
+
+Install CxxTest:
+
+```bash
+# Debian/Ubuntu
+sudo apt install cxxtest
+
+# macOS
+brew install cxxtest
+```
+
+Or disable tests: `cmake -DNBT_BUILD_TESTS=OFF ..`
+
+### "nbt_export.h not found"
+
+This file is generated at configure time. Make sure you've run `cmake ..` (the configure step) before building. If building manually without CMake, create a minimal `nbt_export.h` as described in the manual integration section above.
+
+### Linking Errors with Shared Builds
+
+If you see undefined symbol errors when linking against the shared library, ensure your code includes the correct headers and that `nbt_export.h` was generated during the shared build configuration. Verify `NBT_EXPORT` expands to the visibility attribute.
diff --git a/docs/handbook/libnbtplusplus/code-style.md b/docs/handbook/libnbtplusplus/code-style.md
new file mode 100644
index 0000000000..a779f44a0f
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/code-style.md
@@ -0,0 +1,299 @@
+# Code Style & Conventions
+
+## Overview
+
+This document describes the coding conventions and patterns observed throughout the libnbt++ codebase. These are not arbitrary style choices — they reflect deliberate design decisions for a C++11 library focused on correctness, interoperability, and clean ownership semantics.
+
+---
+
+## Namespaces
+
+### Primary Namespaces
+
+| Namespace | Purpose | Location |
+|-----------|---------|----------|
+| `nbt` | All public API types: tags, value, visitors | `include/` |
+| `nbt::detail` | Internal implementation details | `include/crtp_tag.h`, `include/tag_primitive.h` |
+| `nbt::io` | Binary serialization (stream_reader, stream_writer) | `include/io/` |
+| `nbt::text` | Text formatting (json_formatter) | `include/text/` |
+| `endian` | Byte-order conversion functions | `include/endian_str.h` |
+| `zlib` | Compression stream wrappers | `include/io/izlibstream.h`, `include/io/ozlibstream.h` |
+
+### Namespace Usage
+
+- All user-facing code is in the `nbt` namespace
+- Internal helpers like `crtp_tag` and `make_unique` are in `nbt::detail` (not `nbt`)
+- The `endian` and `zlib` namespaces are top-level, **not** nested under `nbt`
+- No `using namespace` directives appear in headers
+
+---
+
+## Export Macro
+
+```cpp
+#ifdef NBT_SHARED
+ #ifdef NBT_BUILD
+ #define NBT_EXPORT __declspec(dllexport) // When building the shared lib
+ #else
+ #define NBT_EXPORT __declspec(dllimport) // When consuming the shared lib
+ #endif
+#else
+ #define NBT_EXPORT // Static build: empty
+#endif
+```
+
+`NBT_EXPORT` is applied to all public classes and free functions:
+
+```cpp
+class NBT_EXPORT tag { ... };
+class NBT_EXPORT tag_list final : public detail::crtp_tag<tag_list> { ... };
+NBT_EXPORT bool operator==(const tag_compound& lhs, const tag_compound& rhs);
+```
+
+Classes in `nbt::detail` (like `crtp_tag`) are **not** exported.
+
+---
+
+## Class Design Patterns
+
+### final Classes
+
+All concrete tag classes are `final`:
+
+```cpp
+class tag_list final : public detail::crtp_tag<tag_list> { ... };
+class tag_compound final : public detail::crtp_tag<tag_compound> { ... };
+class tag_string final : public detail::crtp_tag<tag_string> { ... };
+```
+
+This prevents further inheritance and enables compiler devirtualization optimizations.
+
+### CRTP Intermediate
+
+The Curiously Recurring Template Pattern eliminates boilerplate:
+
+```cpp
+template <class Sub>
+class crtp_tag : public tag
+{
+ tag_type get_type() const noexcept override { return Sub::type; }
+ std::unique_ptr<tag> clone() const& override { return make_unique<Sub>(/*copy*/); }
+ std::unique_ptr<tag> move_clone() && override { return make_unique<Sub>(std::move(/*this*/)); }
+ void accept(nbt_visitor& visitor) const override { visitor.visit(const_cast<Sub&>(...)); }
+ // ...
+};
+```
+
+Each concrete class inherits from `crtp_tag<Self>` and gets all 6 virtual method implementations for free.
+
+### Static Type Constants
+
+Each tag class exposes its type as a `static constexpr`:
+
+```cpp
+class tag_int final : public detail::crtp_tag<tag_int>
+{
+public:
+ static constexpr tag_type type = tag_type::Int;
+ // ...
+};
+```
+
+Used for compile-time type checks and template metaprogramming.
+
+---
+
+## Ownership Conventions
+
+### Unique Pointer Everywhere
+
+All tag ownership uses `std::unique_ptr<tag>`:
+
+```cpp
+std::unique_ptr<tag> tag::create(tag_type type);
+std::unique_ptr<tag> tag::clone() const&;
+std::unique_ptr<tag> stream_reader::read_payload(tag_type type);
+```
+
+### Custom make_unique
+
+Since `std::make_unique` is C++14, the library provides its own in `nbt::detail`:
+
+```cpp
+namespace detail {
+ template <class T, class... Args>
+ std::unique_ptr<T> make_unique(Args&&... args)
+ {
+ return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+ }
+}
+```
+
+Used throughout source files instead of raw `new`:
+
+```cpp
+tags.emplace_back(make_unique<T>(std::forward<Args>(args)...));
+```
+
+### value as Type-Erased Wrapper
+
+The `value` class wraps `std::unique_ptr<tag>` to provide implicit conversions and operator overloading that `unique_ptr` cannot support:
+
+```cpp
+class value
+{
+ std::unique_ptr<tag> tag_;
+public:
+ value& operator=(int32_t val); // Assigns to contained tag
+ operator int32_t() const; // Reads from contained tag
+ value& operator[](const std::string& key); // Delegates to tag_compound
+ // ...
+};
+```
+
+---
+
+## C++11 Features Used
+
+| Feature | Usage |
+|---------|-------|
+| `std::unique_ptr` | All tag ownership |
+| Move semantics | `value(value&&)`, `tag_list::push_back(value_initializer&&)` |
+| `override` | All virtual method overrides |
+| `final` | All concrete tag classes |
+| `constexpr` | Static type constants, writer limits |
+| `noexcept` | `get_type()`, visitor destructors |
+| `std::initializer_list` | Compound and list construction |
+| Range-based for | Internal iteration in compounds, lists |
+| `auto` | Type deduction in local variables |
+| `static_assert` | Endian implementation checks |
+| `enum class` | `tag_type`, `endian::endian` |
+| Variadic templates | `emplace_back<T>(Args&&...)`, `make_unique` |
+
+The library does **not** use C++14 or later features, maintaining broad compiler compatibility.
+
+---
+
+## Include Structure
+
+### Public Headers (for Library Users)
+
+```
+include/
+ tag.h // tag base, tag_type enum, create(), as<T>()
+ crtp_tag.h // CRTP intermediate (detail)
+ nbt_tags.h // Master include — includes all tag headers
+ tag_primitive.h // tag_byte, tag_short, tag_int, tag_long, tag_float, tag_double
+ tag_string.h // tag_string
+ tag_array.h // tag_byte_array, tag_int_array, tag_long_array
+ tag_list.h // tag_list
+ tag_compound.h // tag_compound
+ value.h // value wrapper
+ value_initializer.h // value_initializer for implicit construction
+ nbt_visitor.h // nbt_visitor, const_nbt_visitor
+ endian_str.h // endian read/write functions
+ io/
+ stream_reader.h // stream_reader, read_compound(), read_tag()
+ stream_writer.h // stream_writer, write_tag()
+ izlibstream.h // izlibstream (decompression)
+ ozlibstream.h // ozlibstream (compression)
+ zlib_streambuf.h // Base streambuf for zlib
+ text/
+ json_formatter.h // json_formatter
+```
+
+### Master Include
+
+`nbt_tags.h` includes all tag types for convenience:
+
+```cpp
+// nbt_tags.h
+#include "tag.h"
+#include "tag_primitive.h"
+#include "tag_string.h"
+#include "tag_array.h"
+#include "tag_list.h"
+#include "tag_compound.h"
+#include "value.h"
+#include "value_initializer.h"
+```
+
+Users can include individual headers for faster compilation or `nbt_tags.h` for convenience.
+
+---
+
+## Error Handling Style
+
+### Exceptions for Programmer Errors
+
+- `std::invalid_argument`: Type mismatches in lists, null values
+- `std::out_of_range`: Invalid indices in lists, missing keys in compounds
+- `std::logic_error`: Inconsistent internal state
+
+### Exceptions for I/O Errors
+
+- `io::input_error` (extends `std::runtime_error`): All parse/read failures
+- `std::runtime_error`: Write stream failures
+- `std::length_error`: Exceeding NBT format limits
+- `zlib::zlib_error` (extends `std::runtime_error`): Compression/decompression failures
+
+### Stream State
+
+Reader/writer methods check `is` / `os` state after I/O operations and throw on failure. Write methods set failbit before throwing to maintain consistent stream state.
+
+---
+
+## Naming Conventions
+
+| Element | Convention | Examples |
+|---------|-----------|----------|
+| Classes | `snake_case` | `tag_compound`, `stream_reader`, `tag_list` |
+| Methods | `snake_case` | `get_type()`, `read_payload()`, `el_type()` |
+| Member variables | `snake_case` with trailing `_` | `el_type_`, `tag_`, `is`, `os` |
+| Template parameters | `PascalCase` | `Sub`, `T`, `Args` |
+| Enum values | `PascalCase` | `tag_type::Byte_Array`, `tag_type::Long_Array` |
+| Namespaces | `snake_case` | `nbt`, `nbt::io`, `nbt::detail` |
+| Macros | `UPPER_SNAKE_CASE` | `NBT_EXPORT`, `NBT_HAVE_ZLIB`, `NBT_SHARED` |
+| Constants | `snake_case` | `max_string_len`, `max_array_len`, `MAX_DEPTH` |
+
+---
+
+## Template Patterns
+
+### Explicit Instantiation
+
+Template classes like `tag_primitive<T>` and `tag_array<T>` use extern template declarations in headers and explicit instantiation in source files:
+
+```cpp
+// In tag_primitive.h
+extern template class tag_primitive<int8_t>;
+extern template class tag_primitive<int16_t>;
+extern template class tag_primitive<int32_t>;
+extern template class tag_primitive<int64_t>;
+extern template class tag_primitive<float>;
+extern template class tag_primitive<double>;
+
+// In tag.cpp
+template class tag_primitive<int8_t>;
+template class tag_primitive<int16_t>;
+// ...
+```
+
+This prevents duplicate template instantiation across translation units, reducing compile time and binary size.
+
+### Type Aliases
+
+```cpp
+typedef tag_primitive<int8_t> tag_byte;
+typedef tag_primitive<int16_t> tag_short;
+typedef tag_primitive<int32_t> tag_int;
+typedef tag_primitive<int64_t> tag_long;
+typedef tag_primitive<float> tag_float;
+typedef tag_primitive<double> tag_double;
+
+typedef tag_array<int8_t> tag_byte_array;
+typedef tag_array<int32_t> tag_int_array;
+typedef tag_array<int64_t> tag_long_array;
+```
+
+Uses `typedef` rather than `using` — a C++11 compatibility choice, though both are equivalent.
diff --git a/docs/handbook/libnbtplusplus/compound-tags.md b/docs/handbook/libnbtplusplus/compound-tags.md
new file mode 100644
index 0000000000..fbd5ce7764
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/compound-tags.md
@@ -0,0 +1,602 @@
+# Compound Tags
+
+## Overview
+
+`tag_compound` is the most important tag type in NBT. It represents an unordered collection of **named tags** of arbitrary types — the NBT equivalent of a JSON object or a C++ `std::map`. Every NBT file has a root compound tag, and compounds can be nested arbitrarily deep.
+
+Defined in `include/tag_compound.h`, implemented in `src/tag_compound.cpp`.
+
+---
+
+## Class Definition
+
+```cpp
+class NBT_EXPORT tag_compound final : public detail::crtp_tag<tag_compound>
+{
+ typedef std::map<std::string, value> map_t_;
+
+public:
+ typedef map_t_::iterator iterator;
+ typedef map_t_::const_iterator const_iterator;
+
+ static constexpr tag_type type = tag_type::Compound;
+
+ tag_compound() {}
+ tag_compound(
+ std::initializer_list<std::pair<std::string, value_initializer>> init);
+
+ value& at(const std::string& key);
+ const value& at(const std::string& key) const;
+
+ value& operator[](const std::string& key);
+
+ std::pair<iterator, bool> put(const std::string& key,
+ value_initializer&& val);
+ std::pair<iterator, bool> insert(const std::string& key,
+ value_initializer&& val);
+
+ template <class T, class... Args>
+ std::pair<iterator, bool> emplace(const std::string& key, Args&&... args);
+
+ bool erase(const std::string& key);
+ bool has_key(const std::string& key) const;
+ bool has_key(const std::string& key, tag_type type) const;
+
+ size_t size() const;
+ void clear();
+
+ iterator begin();
+ iterator end();
+ const_iterator begin() const;
+ const_iterator end() const;
+ const_iterator cbegin() const;
+ const_iterator cend() const;
+
+ void read_payload(io::stream_reader& reader) override;
+ void write_payload(io::stream_writer& writer) const override;
+
+ friend bool operator==(const tag_compound& lhs, const tag_compound& rhs);
+ friend bool operator!=(const tag_compound& lhs, const tag_compound& rhs);
+
+private:
+ map_t_ tags;
+};
+```
+
+---
+
+## Internal Storage
+
+`tag_compound` uses a `std::map<std::string, value>` as its internal container. This means:
+
+- **Keys are sorted**: Iteration order is lexicographic by key name
+- **Unique keys**: Each key appears at most once
+- **Logarithmic access**: Lookup, insertion, and deletion are $O(\log n)$
+- **Stable iterators**: Inserting/erasing does not invalidate other iterators
+
+Each value in the map is a `value` object (wrapping `std::unique_ptr<tag>`), which owns its tag.
+
+---
+
+## Construction
+
+### Default Constructor
+
+```cpp
+tag_compound comp; // Empty compound
+```
+
+### Initializer List Constructor
+
+The most powerful constructor takes a brace-enclosed list of key-value pairs:
+
+```cpp
+tag_compound comp{
+ {"name", "Steve"}, // const char* → tag_string via value_initializer
+ {"health", int16_t(20)}, // int16_t → tag_short
+ {"xp", int32_t(1500)}, // int32_t → tag_int
+ {"velocity", 0.0}, // double → tag_double
+ {"inventory", tag_list::of<tag_compound>({
+ {{"id", "minecraft:sword"}, {"count", int8_t(1)}},
+ {{"id", "minecraft:shield"}, {"count", int8_t(1)}}
+ })},
+ {"scores", tag_int_array{100, 200, 300}},
+ {"nested", tag_compound{{"inner_key", "inner_value"}}}
+};
+```
+
+The initializer list type is `std::initializer_list<std::pair<std::string, value_initializer>>`. The `value_initializer` class accepts implicit conversions from all supported types (see the architecture documentation).
+
+#### Implementation
+
+```cpp
+tag_compound::tag_compound(
+ std::initializer_list<std::pair<std::string, value_initializer>> init)
+{
+ for (const auto& pair : init)
+ tags.emplace(std::move(pair.first), std::move(pair.second));
+}
+```
+
+Each pair's key and value_initializer are moved into the map. Since `value_initializer` inherits from `value`, it converts seamlessly.
+
+---
+
+## Element Access
+
+### operator[] — Unchecked Access
+
+```cpp
+value& operator[](const std::string& key);
+```
+
+Returns a reference to the value associated with `key`. If the key does not exist, **a new uninitialized value is created** under that key (matching `std::map::operator[]` behavior).
+
+```cpp
+tag_compound comp{{"name", "Steve"}};
+
+// Access existing key
+value& name = comp["name"];
+std::string n = static_cast<std::string>(name); // "Steve"
+
+// Create new entry (value is uninitialized/null)
+value& newval = comp["new_key"];
+// newval is a null value: (bool)newval == false
+newval = int32_t(42); // Now it holds a tag_int(42)
+```
+
+**Warning**: Since `operator[]` creates entries, do not use it to test for key existence. Use `has_key()` instead.
+
+### at() — Bounds-Checked Access
+
+```cpp
+value& at(const std::string& key);
+const value& at(const std::string& key) const;
+```
+
+Returns a reference to the value associated with `key`. Throws `std::out_of_range` if the key does not exist.
+
+```cpp
+tag_compound comp{{"health", int16_t(20)}};
+
+value& health = comp.at("health"); // OK
+const value& h = comp.at("health"); // OK (const)
+comp.at("missing_key"); // throws std::out_of_range
+```
+
+Implementation:
+```cpp
+value& tag_compound::at(const std::string& key)
+{
+ return tags.at(key);
+}
+```
+
+---
+
+## Insertion and Modification
+
+### put() — Insert or Assign
+
+```cpp
+std::pair<iterator, bool> put(const std::string& key, value_initializer&& val);
+```
+
+If `key` already exists, **replaces** the existing value. If `key` does not exist, **inserts** a new entry. Returns a pair of (iterator to the entry, bool indicating whether the key was new).
+
+```cpp
+tag_compound comp;
+
+auto [it1, inserted1] = comp.put("key", int32_t(42));
+// inserted1 == true, comp["key"] == tag_int(42)
+
+auto [it2, inserted2] = comp.put("key", int32_t(99));
+// inserted2 == false (key existed), comp["key"] == tag_int(99)
+```
+
+Implementation:
+```cpp
+std::pair<tag_compound::iterator, bool>
+tag_compound::put(const std::string& key, value_initializer&& val)
+{
+ auto it = tags.find(key);
+ if (it != tags.end()) {
+ it->second = std::move(val);
+ return {it, false};
+ } else {
+ return tags.emplace(key, std::move(val));
+ }
+}
+```
+
+### insert() — Insert Only
+
+```cpp
+std::pair<iterator, bool> insert(const std::string& key, value_initializer&& val);
+```
+
+Inserts a new entry only if the key does **not** already exist. If the key exists, the value is not modified. Returns (iterator, bool) where bool indicates whether insertion occurred.
+
+```cpp
+auto [it, inserted] = comp.insert("key", int32_t(42));
+// If "key" exists: inserted == false, value unchanged
+// If "key" missing: inserted == true, "key" → tag_int(42)
+```
+
+Implementation:
+```cpp
+std::pair<tag_compound::iterator, bool>
+tag_compound::insert(const std::string& key, value_initializer&& val)
+{
+ return tags.emplace(key, std::move(val));
+}
+```
+
+This delegates to `std::map::emplace`, which does not overwrite existing entries.
+
+### emplace() — Construct and Insert/Assign
+
+```cpp
+template <class T, class... Args>
+std::pair<iterator, bool> emplace(const std::string& key, Args&&... args);
+```
+
+Constructs a tag of type `T` in-place and assigns or inserts it. Unlike `std::map::emplace`, this **will overwrite** existing values (it delegates to `put()`).
+
+```cpp
+// Construct a tag_int(42) in place
+comp.emplace<tag_int>("key", 42);
+
+// Construct a tag_compound with initializer
+comp.emplace<tag_compound>("nested", std::initializer_list<
+ std::pair<std::string, value_initializer>>{
+ {"inner", int32_t(1)}
+ });
+```
+
+Implementation:
+```cpp
+template <class T, class... Args>
+std::pair<tag_compound::iterator, bool>
+tag_compound::emplace(const std::string& key, Args&&... args)
+{
+ return put(key, value(make_unique<T>(std::forward<Args>(args)...)));
+}
+```
+
+### Direct Assignment via operator[]
+
+Since `operator[]` returns a `value&`, you can assign directly:
+
+```cpp
+comp["health"] = int16_t(20); // Creates/updates tag_short
+comp["name"] = "Steve"; // Creates/updates tag_string
+comp["data"] = tag_compound{ // Assigns a whole compound
+ {"nested", "value"}
+};
+```
+
+Assignment behavior depends on whether the value is initialized:
+- **Uninitialized value**: Creates a new tag of the appropriate type
+- **Initialized value**: Updates the existing tag if types match, throws `std::bad_cast` if types differ (for numeric/string assignments on `value`)
+
+---
+
+## Deletion
+
+### erase()
+
+```cpp
+bool erase(const std::string& key);
+```
+
+Removes the entry with the given key. Returns `true` if an entry was removed, `false` if the key did not exist.
+
+```cpp
+tag_compound comp{{"a", 1}, {"b", 2}, {"c", 3}};
+
+comp.erase("b"); // returns true, comp now has "a" and "c"
+comp.erase("z"); // returns false, no effect
+```
+
+Implementation:
+```cpp
+bool tag_compound::erase(const std::string& key)
+{
+ return tags.erase(key) != 0;
+}
+```
+
+---
+
+## Key Queries
+
+### has_key() — Check Existence
+
+```cpp
+bool has_key(const std::string& key) const;
+```
+
+Returns `true` if the key exists in the compound.
+
+```cpp
+if (comp.has_key("name")) {
+ // Key exists
+}
+```
+
+### has_key() — Check Existence and Type
+
+```cpp
+bool has_key(const std::string& key, tag_type type) const;
+```
+
+Returns `true` if the key exists **and** the value has the specified type.
+
+```cpp
+if (comp.has_key("health", tag_type::Short)) {
+ int16_t health = static_cast<int16_t>(comp.at("health"));
+}
+```
+
+Implementation:
+```cpp
+bool tag_compound::has_key(const std::string& key, tag_type type) const
+{
+ auto it = tags.find(key);
+ return it != tags.end() && it->second.get_type() == type;
+}
+```
+
+---
+
+## Size and Clearing
+
+```cpp
+size_t size() const { return tags.size(); } // Number of entries
+void clear() { tags.clear(); } // Remove all entries
+```
+
+---
+
+## Iteration
+
+`tag_compound` provides full bidirectional iterator support over its entries. Each entry is a `std::pair<const std::string, value>`.
+
+```cpp
+iterator begin();
+iterator end();
+const_iterator begin() const;
+const_iterator end() const;
+const_iterator cbegin() const;
+const_iterator cend() const;
+```
+
+### Iteration Examples
+
+```cpp
+tag_compound comp{{"a", 1}, {"b", 2}, {"c", 3}};
+
+// Range-based for loop
+for (const auto& [key, val] : comp) {
+ std::cout << key << " = " << val.get() << "\n";
+}
+// Output (sorted by key):
+// a = 1
+// b = 2
+// c = 3
+
+// Iterator-based loop
+for (auto it = comp.begin(); it != comp.end(); ++it) {
+ std::cout << it->first << ": type=" << it->second.get_type() << "\n";
+}
+
+// Const iteration
+for (auto it = comp.cbegin(); it != comp.cend(); ++it) {
+ // Read-only access
+}
+```
+
+**Note**: Iteration order is **lexicographic by key name** because the internal `std::map` sorts its keys. This is not necessarily the same order as the original NBT file — NBT compounds are unordered in the specification.
+
+---
+
+## Named Tag Insertion Patterns
+
+### Pattern 1: Initializer List (Preferred for Construction)
+
+```cpp
+tag_compound comp{
+ {"key1", int32_t(1)},
+ {"key2", "hello"},
+ {"key3", tag_list{1, 2, 3}}
+};
+```
+
+### Pattern 2: operator[] for Dynamic Updates
+
+```cpp
+comp["new_key"] = int32_t(42);
+comp["string_key"] = std::string("value");
+```
+
+### Pattern 3: put() for Insert-or-Update
+
+```cpp
+comp.put("key", int32_t(42));
+comp.put("key", int32_t(99)); // Overwrites
+```
+
+### Pattern 4: insert() for Insert-if-Missing
+
+```cpp
+comp.insert("default", int32_t(42)); // Only inserts if "default" doesn't exist
+```
+
+### Pattern 5: emplace() for In-Place Construction
+
+```cpp
+comp.emplace<tag_int>("key", 42);
+comp.emplace<tag_string>("name", "Steve");
+```
+
+### Pattern 6: Moving Tags In
+
+```cpp
+tag_compound inner{{"nested_key", "nested_value"}};
+comp.put("section", std::move(inner)); // Moves the compound
+```
+
+---
+
+## Binary Format
+
+### Reading (Deserialization)
+
+A compound tag's payload is a sequence of named tags terminated by a `tag_type::End` byte:
+
+```
+[type byte] [name length] [name bytes] [tag payload]
+[type byte] [name length] [name bytes] [tag payload]
+...
+[0x00] ← End tag type
+```
+
+Implementation:
+```cpp
+void tag_compound::read_payload(io::stream_reader& reader)
+{
+ clear();
+ tag_type tt;
+ while ((tt = reader.read_type(true)) != tag_type::End) {
+ std::string key;
+ try {
+ key = reader.read_string();
+ } catch (io::input_error& ex) {
+ std::ostringstream str;
+ str << "Error reading key of tag_" << tt;
+ throw io::input_error(str.str());
+ }
+ auto tptr = reader.read_payload(tt);
+ tags.emplace(std::move(key), value(std::move(tptr)));
+ }
+}
+```
+
+The reader loops until it encounters `tag_type::End` (0x00). For each entry:
+1. Read the tag type byte
+2. Read the name string (2-byte length + UTF-8)
+3. Read the tag payload via `reader.read_payload()`
+4. Emplace the key-value pair into the map
+
+### Writing (Serialization)
+
+```cpp
+void tag_compound::write_payload(io::stream_writer& writer) const
+{
+ for (const auto& pair : tags)
+ writer.write_tag(pair.first, pair.second);
+ writer.write_type(tag_type::End);
+}
+```
+
+The writer iterates over all entries (in map order), writing each as a named tag, then writes a single `End` byte.
+
+---
+
+## Equality Comparison
+
+Two compounds are equal if and only if their internal `std::map` objects are equal:
+
+```cpp
+friend bool operator==(const tag_compound& lhs, const tag_compound& rhs)
+{
+ return lhs.tags == rhs.tags;
+}
+```
+
+This performs a deep comparison: same keys, in the same order, with equal values (which recursively compares the owned tags).
+
+---
+
+## Nested Access
+
+The `value` class delegates `operator[]` and `at()` to `tag_compound` when the held tag is a compound. This enables chained access:
+
+```cpp
+tag_compound root{
+ {"player", tag_compound{
+ {"name", "Steve"},
+ {"stats", tag_compound{
+ {"health", int16_t(20)},
+ {"hunger", int16_t(18)}
+ }}
+ }}
+};
+
+// Chained access
+std::string name = static_cast<std::string>(root["player"]["name"]);
+int16_t health = static_cast<int16_t>(root["player"]["stats"]["health"]);
+
+// Bounds-checked
+root.at("player").at("stats").at("missing"); // throws std::out_of_range
+```
+
+The delegation works because `value::operator[](const std::string& key)` performs:
+```cpp
+value& value::operator[](const std::string& key)
+{
+ return dynamic_cast<tag_compound&>(*tag_)[key];
+}
+```
+
+If the held tag is not a `tag_compound`, `dynamic_cast` throws `std::bad_cast`.
+
+---
+
+## Common Usage Patterns
+
+### Checking and Accessing
+
+```cpp
+if (comp.has_key("version", tag_type::Int)) {
+ int32_t version = static_cast<int32_t>(comp.at("version"));
+}
+```
+
+### Safe Nested Access
+
+```cpp
+try {
+ auto& player = comp.at("player").as<tag_compound>();
+ if (player.has_key("health")) {
+ int16_t health = static_cast<int16_t>(player.at("health"));
+ }
+} catch (const std::out_of_range& e) {
+ // Key doesn't exist
+} catch (const std::bad_cast& e) {
+ // Type mismatch
+}
+```
+
+### Building from Dynamic Data
+
+```cpp
+tag_compound comp;
+for (const auto& item : items) {
+ comp.put(item.name, tag_compound{
+ {"id", item.id},
+ {"count", int8_t(item.count)},
+ {"damage", int16_t(item.damage)}
+ });
+}
+```
+
+### Merging Compounds
+
+```cpp
+// Copy all entries from source to dest (overwriting existing keys)
+for (const auto& [key, val] : source) {
+ dest.put(key, value(val)); // Explicit copy via value(const value&)
+}
+```
diff --git a/docs/handbook/libnbtplusplus/endian-handling.md b/docs/handbook/libnbtplusplus/endian-handling.md
new file mode 100644
index 0000000000..7699de0bf0
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/endian-handling.md
@@ -0,0 +1,359 @@
+# Endian Handling
+
+## Overview
+
+The `endian` namespace provides byte-order conversion for reading and writing multi-byte numeric values in big-endian or little-endian format. This is the lowest layer of the I/O system, called by `stream_reader::read_num()` and `stream_writer::write_num()`.
+
+Defined in `include/endian_str.h`, implemented in `src/endian_str.cpp`.
+
+---
+
+## Endianness Enum
+
+```cpp
+namespace endian {
+
+enum class endian
+{
+ big,
+ little
+};
+
+}
+```
+
+- `endian::big` — Most significant byte first. Default for Java Edition NBT (per the Minecraft specification).
+- `endian::little` — Least significant byte first. Used by Bedrock Edition NBT.
+
+---
+
+## Public API
+
+### Read Functions
+
+```cpp
+namespace endian {
+
+template <class T>
+void read(std::istream& is, T& x, endian e);
+
+void read_little(std::istream& is, uint8_t& x);
+void read_little(std::istream& is, int8_t& x);
+void read_little(std::istream& is, uint16_t& x);
+void read_little(std::istream& is, int16_t& x);
+void read_little(std::istream& is, uint32_t& x);
+void read_little(std::istream& is, int32_t& x);
+void read_little(std::istream& is, uint64_t& x);
+void read_little(std::istream& is, int64_t& x);
+void read_little(std::istream& is, float& x);
+void read_little(std::istream& is, double& x);
+
+void read_big(std::istream& is, uint8_t& x);
+void read_big(std::istream& is, int8_t& x);
+void read_big(std::istream& is, uint16_t& x);
+void read_big(std::istream& is, int16_t& x);
+void read_big(std::istream& is, uint32_t& x);
+void read_big(std::istream& is, int32_t& x);
+void read_big(std::istream& is, uint64_t& x);
+void read_big(std::istream& is, int64_t& x);
+void read_big(std::istream& is, float& x);
+void read_big(std::istream& is, double& x);
+
+}
+```
+
+### Write Functions
+
+```cpp
+namespace endian {
+
+template <class T>
+void write(std::ostream& os, T x, endian e);
+
+void write_little(std::ostream& os, uint8_t x);
+void write_little(std::ostream& os, int8_t x);
+void write_little(std::ostream& os, uint16_t x);
+void write_little(std::ostream& os, int16_t x);
+void write_little(std::ostream& os, uint32_t x);
+void write_little(std::ostream& os, int32_t x);
+void write_little(std::ostream& os, uint64_t x);
+void write_little(std::ostream& os, int64_t x);
+void write_little(std::ostream& os, float x);
+void write_little(std::ostream& os, double x);
+
+void write_big(std::ostream& os, uint8_t x);
+void write_big(std::ostream& os, int8_t x);
+void write_big(std::ostream& os, uint16_t x);
+void write_big(std::ostream& os, int16_t x);
+void write_big(std::ostream& os, uint32_t x);
+void write_big(std::ostream& os, int32_t x);
+void write_big(std::ostream& os, uint64_t x);
+void write_big(std::ostream& os, int64_t x);
+void write_big(std::ostream& os, float x);
+void write_big(std::ostream& os, double x);
+
+}
+```
+
+---
+
+## Template Dispatch
+
+The `read()` and `write()` templates dispatch to the correct endian-specific function:
+
+```cpp
+template <class T>
+void read(std::istream& is, T& x, endian e)
+{
+ switch (e) {
+ case endian::big: read_big(is, x); break;
+ case endian::little: read_little(is, x); break;
+ }
+}
+
+template <class T>
+void write(std::ostream& os, T x, endian e)
+{
+ switch (e) {
+ case endian::big: write_big(os, x); break;
+ case endian::little: write_little(os, x); break;
+ }
+}
+```
+
+This is called by `stream_reader` and `stream_writer`:
+
+```cpp
+// In stream_reader
+template <class T> void read_num(T& x)
+{
+ endian::read(is, x, endian);
+}
+
+// In stream_writer
+template <class T> void write_num(T x)
+{
+ endian::write(os, x, endian);
+}
+```
+
+---
+
+## Implementation Details
+
+### Static Assertions
+
+The implementation begins with compile-time checks:
+
+```cpp
+static_assert(CHAR_BIT == 8, "Assumes 8-bit bytes");
+static_assert(sizeof(float) == 4, "Assumes 32-bit float");
+static_assert(sizeof(double) == 8, "Assumes 64-bit double");
+```
+
+### Single-Byte Types
+
+For `int8_t` and `uint8_t`, endianness is irrelevant — the byte is read/written directly:
+
+```cpp
+void read_little(std::istream& is, uint8_t& x)
+{
+ x = is.get();
+}
+
+void write_little(std::ostream& os, uint8_t x)
+{
+ os.put(x);
+}
+
+// Same for read_big/write_big
+```
+
+### Multi-Byte Integer Types
+
+Bytes are read/written individually and assembled in the correct order.
+
+**Big-endian read (most significant byte first):**
+```cpp
+void read_big(std::istream& is, uint16_t& x)
+{
+ uint8_t bytes[2];
+ is.read(reinterpret_cast<char*>(bytes), 2);
+ x = static_cast<uint16_t>(bytes[0]) << 8
+ | static_cast<uint16_t>(bytes[1]);
+}
+
+void read_big(std::istream& is, uint32_t& x)
+{
+ uint8_t bytes[4];
+ is.read(reinterpret_cast<char*>(bytes), 4);
+ x = static_cast<uint32_t>(bytes[0]) << 24
+ | static_cast<uint32_t>(bytes[1]) << 16
+ | static_cast<uint32_t>(bytes[2]) << 8
+ | static_cast<uint32_t>(bytes[3]);
+}
+
+void read_big(std::istream& is, uint64_t& x)
+{
+ uint8_t bytes[8];
+ is.read(reinterpret_cast<char*>(bytes), 8);
+ x = static_cast<uint64_t>(bytes[0]) << 56
+ | static_cast<uint64_t>(bytes[1]) << 48
+ | static_cast<uint64_t>(bytes[2]) << 40
+ | static_cast<uint64_t>(bytes[3]) << 32
+ | static_cast<uint64_t>(bytes[4]) << 24
+ | static_cast<uint64_t>(bytes[5]) << 16
+ | static_cast<uint64_t>(bytes[6]) << 8
+ | static_cast<uint64_t>(bytes[7]);
+}
+```
+
+**Little-endian read (least significant byte first):**
+```cpp
+void read_little(std::istream& is, uint16_t& x)
+{
+ uint8_t bytes[2];
+ is.read(reinterpret_cast<char*>(bytes), 2);
+ x = static_cast<uint16_t>(bytes[1]) << 8
+ | static_cast<uint16_t>(bytes[0]);
+}
+```
+
+**Big-endian write:**
+```cpp
+void write_big(std::ostream& os, uint16_t x)
+{
+ os.put(static_cast<char>(x >> 8));
+ os.put(static_cast<char>(x));
+}
+
+void write_big(std::ostream& os, uint32_t x)
+{
+ os.put(static_cast<char>(x >> 24));
+ os.put(static_cast<char>(x >> 16));
+ os.put(static_cast<char>(x >> 8));
+ os.put(static_cast<char>(x));
+}
+```
+
+**Little-endian write:**
+```cpp
+void write_little(std::ostream& os, uint16_t x)
+{
+ os.put(static_cast<char>(x));
+ os.put(static_cast<char>(x >> 8));
+}
+```
+
+### Signed Types
+
+Signed integers delegate to unsigned via `reinterpret_cast`:
+
+```cpp
+void read_big(std::istream& is, int16_t& x)
+{
+ read_big(is, reinterpret_cast<uint16_t&>(x));
+}
+
+void write_big(std::ostream& os, int16_t x)
+{
+ write_big(os, static_cast<uint16_t>(x));
+}
+```
+
+This works because the bit patterns are identical — only interpretation differs.
+
+### Floating-Point Types
+
+Floats and doubles use `memcpy` to convert between floating-point and integer representations, avoiding undefined behavior from type-punning casts:
+
+```cpp
+void read_big(std::istream& is, float& x)
+{
+ uint32_t tmp;
+ read_big(is, tmp);
+ std::memcpy(&x, &tmp, sizeof(x));
+}
+
+void write_big(std::ostream& os, float x)
+{
+ uint32_t tmp;
+ std::memcpy(&tmp, &x, sizeof(tmp));
+ write_big(os, tmp);
+}
+
+void read_big(std::istream& is, double& x)
+{
+ uint64_t tmp;
+ read_big(is, tmp);
+ std::memcpy(&x, &tmp, sizeof(x));
+}
+
+void write_big(std::ostream& os, double x)
+{
+ uint64_t tmp;
+ std::memcpy(&tmp, &x, sizeof(tmp));
+ write_big(os, tmp);
+}
+```
+
+The `memcpy` approach:
+- Is defined behavior in C++11 (unlike `reinterpret_cast` between float/int, which is UB)
+- Assumes IEEE 754 representation (verified by `static_assert(sizeof(float) == 4)`)
+- Is typically optimized by the compiler to a no-op or register move
+
+---
+
+## Byte Layout Reference
+
+### Big-Endian (Java Edition Default)
+
+```
+Value: 0x12345678 (int32_t)
+Memory: [0x12] [0x34] [0x56] [0x78]
+ MSB LSB
+
+Value: 3.14f (float, IEEE 754: 0x4048F5C3)
+Memory: [0x40] [0x48] [0xF5] [0xC3]
+```
+
+### Little-Endian (Bedrock Edition)
+
+```
+Value: 0x12345678 (int32_t)
+Memory: [0x78] [0x56] [0x34] [0x12]
+ LSB MSB
+
+Value: 3.14f (float, IEEE 754: 0x4048F5C3)
+Memory: [0xC3] [0xF5] [0x48] [0x40]
+```
+
+---
+
+## Supported Types
+
+| C++ Type | Size | NBT Use |
+|----------|------|---------|
+| `int8_t` / `uint8_t` | 1 byte | tag_byte, type bytes |
+| `int16_t` / `uint16_t` | 2 bytes | tag_short, string lengths |
+| `int32_t` / `uint32_t` | 4 bytes | tag_int, array/list lengths |
+| `int64_t` / `uint64_t` | 8 bytes | tag_long |
+| `float` | 4 bytes | tag_float |
+| `double` | 8 bytes | tag_double |
+
+---
+
+## Design Rationale
+
+### Why Not Use System Endianness Detection?
+
+The implementation always performs explicit byte-by-byte construction rather than detecting the host endianness and potentially passing through. This approach:
+
+1. **Portable**: Works correctly on any architecture (big-endian, little-endian, or mixed)
+2. **Simple**: No preprocessor conditionals or platform detection
+3. **Correct**: No alignment issues since bytes are assembled individually
+4. **Predictable**: Same code path on all platforms
+
+### Why memcpy for Floats?
+
+C++ standards do not guarantee that `reinterpret_cast<uint32_t&>(float_val)` produces defined behavior (strict aliasing violation). `memcpy` is the standard-sanctioned way to perform type punning between unrelated types, and modern compilers optimize it to equivalent machine code.
diff --git a/docs/handbook/libnbtplusplus/io-system.md b/docs/handbook/libnbtplusplus/io-system.md
new file mode 100644
index 0000000000..9f0d543a51
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/io-system.md
@@ -0,0 +1,672 @@
+# I/O System
+
+## Overview
+
+The `nbt::io` namespace provides the binary serialization layer for reading and writing NBT data. The two central classes are `stream_reader` and `stream_writer`, both operating on standard C++ streams (`std::istream` / `std::ostream`).
+
+Defined in:
+- `include/io/stream_reader.h` / `src/io/stream_reader.cpp`
+- `include/io/stream_writer.h` / `src/io/stream_writer.cpp`
+
+---
+
+## stream_reader
+
+### Class Definition
+
+```cpp
+class NBT_EXPORT stream_reader
+{
+public:
+ explicit stream_reader(std::istream& is,
+ endian::endian e = endian::endian::big);
+
+ std::istream& get_istr() const { return is; }
+ endian::endian get_endian() const { return endian; }
+
+ // Read named + typed tags
+ std::pair<std::string, std::unique_ptr<tag>> read_tag();
+
+ // Read payload only (for tags whose type is already known)
+ std::unique_ptr<tag> read_payload(tag_type type);
+
+ // Read a type byte
+ tag_type read_type(bool allow_end);
+
+ // Read a length-prefixed UTF-8 string
+ std::string read_string();
+
+ // Read a numeric value in the configured endianness
+ template <class T> void read_num(T& x);
+
+ static const unsigned int MAX_DEPTH = 1024;
+
+private:
+ std::istream& is;
+ endian::endian endian;
+ unsigned int depth_ = 0;
+};
+```
+
+### Constructor
+
+```cpp
+stream_reader(std::istream& is, endian::endian e = endian::endian::big);
+```
+
+- `is`: The input stream to read from
+- `e`: Byte order — `endian::big` (default, Java edition NBT) or `endian::little` (Bedrock edition)
+
+### read_tag() — Read a Complete Named Tag
+
+```cpp
+std::pair<std::string, std::unique_ptr<tag>> read_tag();
+```
+
+Reads a complete tag from the stream:
+1. Reads the type byte
+2. If type is `End`, returns `{"", nullptr}` (end-of-compound sentinel)
+3. Reads the name string
+4. Reads the payload via `read_payload()`
+
+Returns a pair of `{name, tag_ptr}`.
+
+Implementation:
+```cpp
+std::pair<std::string, std::unique_ptr<tag>>
+stream_reader::read_tag()
+{
+ tag_type type = read_type(true);
+ if (type == tag_type::End)
+ return {"", nullptr};
+
+ std::string name = read_string();
+ auto tag = read_payload(type);
+ return {std::move(name), std::move(tag)};
+}
+```
+
+### read_payload() — Read a Tag Payload
+
+```cpp
+std::unique_ptr<tag> read_payload(tag_type type);
+```
+
+Creates a tag of the specified type, then calls its `read_payload()` virtual method. Tracks recursive nesting depth, throwing `io::input_error` if `MAX_DEPTH` (1024) is exceeded.
+
+Implementation:
+```cpp
+std::unique_ptr<tag> stream_reader::read_payload(tag_type type)
+{
+ if (++depth_ > MAX_DEPTH)
+ throw input_error("Maximum nesting depth exceeded");
+
+ auto ret = tag::create(type);
+ ret->read_payload(*this);
+
+ --depth_;
+ return ret;
+}
+```
+
+The `tag::create()` factory instantiates the correct concrete class:
+```cpp
+std::unique_ptr<tag> tag::create(tag_type type)
+{
+ switch (type) {
+ case tag_type::Byte: return make_unique<tag_byte>();
+ case tag_type::Short: return make_unique<tag_short>();
+ case tag_type::Int: return make_unique<tag_int>();
+ case tag_type::Long: return make_unique<tag_long>();
+ case tag_type::Float: return make_unique<tag_float>();
+ case tag_type::Double: return make_unique<tag_double>();
+ case tag_type::Byte_Array: return make_unique<tag_byte_array>();
+ case tag_type::String: return make_unique<tag_string>();
+ case tag_type::List: return make_unique<tag_list>();
+ case tag_type::Compound: return make_unique<tag_compound>();
+ case tag_type::Int_Array: return make_unique<tag_int_array>();
+ case tag_type::Long_Array: return make_unique<tag_long_array>();
+ default:
+ throw std::invalid_argument("Invalid tag type: "
+ + std::to_string(static_cast<int>(type)));
+ }
+}
+```
+
+### read_type() — Read and Validate Type Byte
+
+```cpp
+tag_type read_type(bool allow_end);
+```
+
+Reads a single byte, casts to `tag_type`, and validates:
+```cpp
+tag_type stream_reader::read_type(bool allow_end)
+{
+ int type = is.get();
+ if (!is)
+ throw input_error("Error reading tag type");
+ if (!is_valid_type(type, allow_end))
+ throw input_error("Invalid tag type: "
+ + std::to_string(type));
+ return static_cast<tag_type>(type);
+}
+```
+
+The `allow_end` parameter controls whether `tag_type::End` (0) is accepted — it's valid when reading list element types or compound children, but not at the top level of a standalone tag.
+
+### read_string() — Read Length-Prefixed String
+
+```cpp
+std::string read_string();
+```
+
+Reads a 2-byte unsigned length, then that many bytes of UTF-8 data:
+```cpp
+std::string stream_reader::read_string()
+{
+ uint16_t len;
+ read_num(len);
+ if (!is)
+ throw input_error("Error reading string length");
+ std::string str(len, '\0');
+ is.read(&str[0], len);
+ if (!is)
+ throw input_error("Error reading string");
+ return str;
+}
+```
+
+Maximum string length: 65535 bytes (uint16_t max).
+
+### read_num() — Read Numeric Value
+
+```cpp
+template <class T> void read_num(T& x)
+{
+ endian::read(is, x, endian);
+}
+```
+
+Delegates to the `endian` namespace for endianness-appropriate reading.
+
+---
+
+## stream_writer
+
+### Class Definition
+
+```cpp
+class NBT_EXPORT stream_writer
+{
+public:
+ explicit stream_writer(std::ostream& os,
+ endian::endian e = endian::endian::big);
+
+ std::ostream& get_ostr() const { return os; }
+ endian::endian get_endian() const { return endian; }
+
+ void write_type(tag_type type);
+ void write_string(const std::string& str);
+ void write_payload(const tag& t);
+ template <class T> void write_num(T x);
+
+ static constexpr size_t max_string_len = UINT16_MAX;
+ static constexpr int32_t max_array_len = INT32_MAX;
+
+private:
+ std::ostream& os;
+ endian::endian endian;
+};
+```
+
+### Constructor
+
+```cpp
+stream_writer(std::ostream& os, endian::endian e = endian::endian::big);
+```
+
+- `os`: The output stream to write to
+- `e`: Byte order — `endian::big` (default) or `endian::little`
+
+### write_tag() — Free Function
+
+```cpp
+void write_tag(const std::string& name, const tag& t,
+ std::ostream& os,
+ endian::endian e = endian::endian::big);
+```
+
+This is a **free function** (not a member). It writes a complete named tag:
+1. Writes the type byte
+2. Writes the name string
+3. Writes the payload
+
+```cpp
+void write_tag(const std::string& name, const tag& t,
+ std::ostream& os, endian::endian e)
+{
+ stream_writer writer(os, e);
+ writer.write_type(t.get_type());
+ writer.write_string(name);
+ t.write_payload(writer);
+}
+```
+
+### write_type() — Write Type Byte
+
+```cpp
+void stream_writer::write_type(tag_type type)
+{
+ os.put(static_cast<char>(type));
+ if (!os)
+ throw std::runtime_error("Error writing tag type");
+}
+```
+
+### write_string() — Write Length-Prefixed String
+
+```cpp
+void stream_writer::write_string(const std::string& str)
+{
+ if (str.size() > max_string_len) {
+ os.setstate(std::ios::failbit);
+ throw std::length_error("String is too long for NBT");
+ }
+ write_num(static_cast<uint16_t>(str.size()));
+ os.write(str.data(), str.size());
+ if (!os)
+ throw std::runtime_error("Error writing string");
+}
+```
+
+Strings longer than 65535 bytes trigger a `std::length_error`.
+
+### write_payload() — Write Tag Payload
+
+```cpp
+void stream_writer::write_payload(const tag& t)
+{
+ t.write_payload(*this);
+}
+```
+
+Delegates to the tag's virtual `write_payload()` method.
+
+### write_num() — Write Numeric Value
+
+```cpp
+template <class T> void write_num(T x)
+{
+ endian::write(os, x, endian);
+}
+```
+
+---
+
+## Free Functions
+
+### Reading
+
+```cpp
+// In nbt::io namespace
+
+std::pair<std::string, std::unique_ptr<tag>>
+read_compound(std::istream& is,
+ endian::endian e = endian::endian::big);
+
+std::pair<std::string, std::unique_ptr<tag>>
+read_tag(std::istream& is,
+ endian::endian e = endian::endian::big);
+```
+
+**`read_compound()`** reads and validates that the top-level tag is a compound:
+
+```cpp
+std::pair<std::string, std::unique_ptr<tag>>
+read_compound(std::istream& is, endian::endian e)
+{
+ stream_reader reader(is, e);
+ auto result = reader.read_tag();
+ if (!result.second || result.second->get_type() != tag_type::Compound)
+ throw input_error("Top-level tag is not a compound");
+ return result;
+}
+```
+
+**`read_tag()`** reads any tag without type restriction:
+
+```cpp
+std::pair<std::string, std::unique_ptr<tag>>
+read_tag(std::istream& is, endian::endian e)
+{
+ stream_reader reader(is, e);
+ return reader.read_tag();
+}
+```
+
+### Writing
+
+```cpp
+void write_tag(const std::string& name, const tag& t,
+ std::ostream& os,
+ endian::endian e = endian::endian::big);
+```
+
+Writes a complete named tag (type + name + payload). See above.
+
+---
+
+## Error Handling
+
+### input_error
+
+```cpp
+class input_error : public std::runtime_error
+{
+public:
+ using std::runtime_error::runtime_error;
+};
+```
+
+Thrown by `stream_reader` for all parse errors:
+- Invalid tag type bytes
+- Stream read failures
+- Negative array/list lengths
+- Maximum nesting depth exceeded
+- Corrupt or truncated data
+
+### Stream State Errors
+
+Write errors set stream failbit and throw:
+- `std::runtime_error` for general write failures
+- `std::length_error` for strings exceeding `max_string_len` (65535 bytes)
+- `std::length_error` for arrays/lists exceeding `max_array_len` (INT32_MAX elements)
+- `std::logic_error` for list type inconsistencies during write
+
+---
+
+## Payload Format Per Tag Type
+
+Each concrete tag class implements its own `read_payload()` and `write_payload()`:
+
+### Primitives (tag_byte, tag_short, tag_int, tag_long, tag_float, tag_double)
+
+```cpp
+// In tag_primitive.h (inline)
+void read_payload(io::stream_reader& reader) override
+{
+ reader.read_num(val);
+}
+
+void write_payload(io::stream_writer& writer) const override
+{
+ writer.write_num(val);
+}
+```
+
+Simply reads/writes the raw value in the configured endianness.
+
+| Type | Payload Size |
+|------|-------------|
+| tag_byte | 1 byte |
+| tag_short | 2 bytes |
+| tag_int | 4 bytes |
+| tag_long | 8 bytes |
+| tag_float | 4 bytes |
+| tag_double | 8 bytes |
+
+### tag_string
+
+Payload: 2-byte length + UTF-8 data.
+
+```cpp
+void tag_string::read_payload(io::stream_reader& reader)
+{
+ val = reader.read_string();
+}
+
+void tag_string::write_payload(io::stream_writer& writer) const
+{
+ writer.write_string(val);
+}
+```
+
+### tag_array<T>
+
+Payload: 4-byte signed length + elements.
+
+Specialized for different element types:
+
+**tag_byte_array** (int8_t) — raw block read/write:
+```cpp
+// Specialization for int8_t (byte array)
+void tag_array<int8_t>::read_payload(io::stream_reader& reader)
+{
+ int32_t length;
+ reader.read_num(length);
+ if (length < 0)
+ reader.get_istr().setstate(std::ios::failbit);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading length of tag_byte_array");
+ data.resize(length);
+ reader.get_istr().read(reinterpret_cast<char*>(data.data()), length);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading tag_byte_array");
+}
+```
+
+**tag_long_array** (int64_t) — element-by-element:
+```cpp
+// Specialization for int64_t (long array)
+void tag_array<int64_t>::read_payload(io::stream_reader& reader)
+{
+ int32_t length;
+ reader.read_num(length);
+ if (length < 0)
+ reader.get_istr().setstate(std::ios::failbit);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading length of tag_long_array");
+ data.clear();
+ data.reserve(length);
+ for (int32_t i = 0; i < length; ++i) {
+ int64_t val;
+ reader.read_num(val);
+ data.push_back(val);
+ }
+ if (!reader.get_istr())
+ throw io::input_error("Error reading tag_long_array");
+}
+```
+
+**Generic T** (int32_t for tag_int_array):
+```cpp
+template <class T>
+void tag_array<T>::read_payload(io::stream_reader& reader)
+{
+ int32_t length;
+ reader.read_num(length);
+ if (length < 0)
+ reader.get_istr().setstate(std::ios::failbit);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading length of tag_array");
+ data.clear();
+ data.reserve(length);
+ for (int32_t i = 0; i < length; ++i) {
+ T val;
+ reader.read_num(val);
+ data.push_back(val);
+ }
+ if (!reader.get_istr())
+ throw io::input_error("Error reading tag_array");
+}
+```
+
+### tag_compound
+
+Payload: sequence of complete named tags, terminated by `tag_type::End` (single 0 byte):
+
+```cpp
+void tag_compound::read_payload(io::stream_reader& reader)
+{
+ clear();
+ std::pair<std::string, std::unique_ptr<tag>> entry;
+ while ((entry = reader.read_tag()).second)
+ tags.emplace(std::move(entry.first), std::move(entry.second));
+ if (!reader.get_istr())
+ throw io::input_error("Error reading tag_compound");
+}
+
+void tag_compound::write_payload(io::stream_writer& writer) const
+{
+ for (const auto& pair : tags) {
+ writer.write_type(pair.second.get_type());
+ writer.write_string(pair.first);
+ pair.second.get().write_payload(writer);
+ }
+ writer.write_type(tag_type::End);
+}
+```
+
+### tag_list
+
+Payload: 1-byte element type + 4-byte signed length + element payloads (without type bytes):
+
+(See the [list-tags.md](list-tags.md) document for the full implementation.)
+
+---
+
+## Depth Tracking
+
+`stream_reader` tracks recursive depth to prevent stack overflow from maliciously crafted NBT data with deeply nested compounds or lists:
+
+```cpp
+static const unsigned int MAX_DEPTH = 1024;
+```
+
+Each call to `read_payload()` increments `depth_`, and decrements on return. If `depth_` exceeds 1024, an `io::input_error` is thrown.
+
+This is critical for security — without depth limits, a crafted file with thousands of nested compounds could cause a stack overflow.
+
+---
+
+## Endianness
+
+Both `stream_reader` and `stream_writer` take an `endian::endian` parameter:
+
+| Value | Use Case |
+|-------|----------|
+| `endian::big` | Java Edition NBT (default, per Minecraft specification) |
+| `endian::little` | Bedrock Edition NBT |
+
+The endianness affects all numeric reads/writes (lengths, primitive values, etc.) but not single bytes (type, byte values).
+
+---
+
+## Usage Examples
+
+### Reading a File
+
+```cpp
+#include <nbt_tags.h>
+#include <io/stream_reader.h>
+#include <fstream>
+
+std::ifstream file("level.dat", std::ios::binary);
+auto result = nbt::io::read_compound(file);
+
+std::string name = result.first; // Root tag name
+tag_compound& root = result.second->as<tag_compound>();
+
+int32_t version = static_cast<int32_t>(root.at("version"));
+```
+
+### Reading with zlib Decompression
+
+```cpp
+#include <io/izlibstream.h>
+
+std::ifstream file("level.dat", std::ios::binary);
+zlib::izlibstream zs(file);
+auto result = nbt::io::read_compound(zs);
+```
+
+### Writing a File
+
+```cpp
+#include <io/stream_writer.h>
+#include <fstream>
+
+tag_compound root{
+ {"Data", tag_compound{
+ {"version", int32_t(19133)},
+ {"LevelName", std::string("My World")}
+ }}
+};
+
+std::ofstream file("level.dat", std::ios::binary);
+nbt::io::write_tag("", root, file);
+```
+
+### Writing with zlib Compression
+
+```cpp
+#include <io/ozlibstream.h>
+
+std::ofstream file("level.dat", std::ios::binary);
+zlib::ozlibstream zs(file);
+nbt::io::write_tag("", root, zs);
+zs.close();
+```
+
+### Little-Endian (Bedrock)
+
+```cpp
+auto result = nbt::io::read_compound(file, endian::endian::little);
+nbt::io::write_tag("", root, file, endian::endian::little);
+```
+
+### Roundtrip Test
+
+```cpp
+// Write
+std::stringstream ss;
+nbt::io::write_tag("test", original_root, ss);
+
+// Read back
+ss.seekg(0);
+auto [name, tag] = nbt::io::read_tag(ss);
+assert(name == "test");
+assert(*tag == original_root);
+```
+
+---
+
+## Wire Format Summary
+
+```
+Named Tag:
+ [type: 1 byte] [name_length: 2 bytes] [name: N bytes] [payload: variable]
+
+Compound Payload:
+ [child_tag_1] [child_tag_2] ... [End: 0x00]
+
+List Payload:
+ [element_type: 1 byte] [length: 4 bytes] [payload_1] [payload_2] ...
+
+String Payload:
+ [length: 2 bytes] [data: N bytes, UTF-8]
+
+Array Payload (Byte/Int/Long):
+ [length: 4 bytes] [element_1] [element_2] ...
+
+Primitive Payloads:
+ Byte: 1 byte
+ Short: 2 bytes
+ Int: 4 bytes
+ Long: 8 bytes
+ Float: 4 bytes (IEEE 754)
+ Double: 8 bytes (IEEE 754)
+```
+
+All multi-byte values use the configured endianness (big-endian by default).
diff --git a/docs/handbook/libnbtplusplus/list-tags.md b/docs/handbook/libnbtplusplus/list-tags.md
new file mode 100644
index 0000000000..f3ca7dabb4
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/list-tags.md
@@ -0,0 +1,682 @@
+# List Tags
+
+## Overview
+
+`tag_list` represents an ordered collection of unnamed tags that all share the same type. It is the NBT equivalent of a typed array — all elements must be the same `tag_type`, and elements are accessed by index rather than by name.
+
+Defined in `include/tag_list.h`, implemented in `src/tag_list.cpp`.
+
+---
+
+## Class Definition
+
+```cpp
+class NBT_EXPORT tag_list final : public detail::crtp_tag<tag_list>
+{
+public:
+ typedef std::vector<value>::iterator iterator;
+ typedef std::vector<value>::const_iterator const_iterator;
+ static constexpr tag_type type = tag_type::List;
+
+ template <class T> static tag_list of(std::initializer_list<T> init);
+
+ tag_list() : tag_list(tag_type::Null) {}
+ explicit tag_list(tag_type content_type) : el_type_(content_type) {}
+
+ // Initializer list constructors for each supported type
+ tag_list(std::initializer_list<int8_t> init);
+ tag_list(std::initializer_list<int16_t> init);
+ tag_list(std::initializer_list<int32_t> init);
+ tag_list(std::initializer_list<int64_t> init);
+ tag_list(std::initializer_list<float> init);
+ tag_list(std::initializer_list<double> init);
+ tag_list(std::initializer_list<std::string> init);
+ tag_list(std::initializer_list<tag_byte_array> init);
+ tag_list(std::initializer_list<tag_list> init);
+ tag_list(std::initializer_list<tag_compound> init);
+ tag_list(std::initializer_list<tag_int_array> init);
+ tag_list(std::initializer_list<tag_long_array> init);
+ tag_list(std::initializer_list<value> init);
+
+ value& at(size_t i);
+ const value& at(size_t i) const;
+ value& operator[](size_t i);
+ const value& operator[](size_t i) const;
+
+ void set(size_t i, value&& val);
+ void push_back(value_initializer&& val);
+ template <class T, class... Args> void emplace_back(Args&&... args);
+ void pop_back();
+
+ tag_type el_type() const;
+ size_t size() const;
+ void clear();
+ void reset(tag_type type = tag_type::Null);
+
+ iterator begin(); iterator end();
+ const_iterator begin() const; const_iterator end() const;
+ const_iterator cbegin() const; const_iterator cend() const;
+
+ void read_payload(io::stream_reader& reader) override;
+ void write_payload(io::stream_writer& writer) const override;
+
+ friend NBT_EXPORT bool operator==(const tag_list& lhs, const tag_list& rhs);
+ friend NBT_EXPORT bool operator!=(const tag_list& lhs, const tag_list& rhs);
+
+private:
+ std::vector<value> tags;
+ tag_type el_type_;
+
+ template <class T, class Arg> void init(std::initializer_list<Arg> il);
+};
+```
+
+---
+
+## Internal Storage
+
+`tag_list` stores its elements in a `std::vector<value>` and tracks the content type in `el_type_` (a `tag_type` value).
+
+### Element Type Tracking
+
+The `el_type_` field records what type of tags the list contains:
+
+- **Determined**: Set to a specific `tag_type` (e.g., `tag_type::Int`) when elements are present or the type has been set explicitly
+- **Undetermined**: Set to `tag_type::Null` when the list is empty and no type has been specified
+
+The element type is automatically determined when the first element is added to an undetermined list.
+
+---
+
+## Construction
+
+### Default Constructor
+
+```cpp
+tag_list() // Empty list with undetermined type (tag_type::Null)
+```
+
+### Typed Empty Constructor
+
+```cpp
+tag_list(tag_type::Int) // Empty list, but typed as Int
+```
+
+This is useful when you need an empty list that will later hold elements of a specific type.
+
+### Initializer List Constructors
+
+`tag_list` provides 12 initializer list constructors, one for each concrete element type:
+
+```cpp
+tag_list bytes{int8_t(1), int8_t(2), int8_t(3)}; // List of tag_byte
+tag_list shorts{int16_t(100), int16_t(200)}; // List of tag_short
+tag_list ints{1, 2, 3, 4, 5}; // List of tag_int
+tag_list longs{int64_t(1), int64_t(2)}; // List of tag_long
+tag_list floats{1.0f, 2.0f, 3.0f}; // List of tag_float
+tag_list doubles{1.0, 2.0, 3.0}; // List of tag_double
+tag_list strings{"hello", "world"}; // List of tag_string (fails: not std::string)
+
+tag_list byte_arrays{
+ tag_byte_array{1, 2, 3},
+ tag_byte_array{4, 5, 6}
+}; // List of tag_byte_array
+
+tag_list nested_lists{
+ tag_list{1, 2, 3},
+ tag_list{4, 5, 6}
+}; // List of tag_list
+
+tag_list compounds{
+ tag_compound{{"name", "a"}},
+ tag_compound{{"name", "b"}}
+}; // List of tag_compound
+```
+
+Each constructor delegates to the private `init<T, Arg>()` template:
+
+```cpp
+template <class T, class Arg>
+void tag_list::init(std::initializer_list<Arg> init)
+{
+ el_type_ = T::type;
+ tags.reserve(init.size());
+ for (const Arg& arg : init)
+ tags.emplace_back(nbt::make_unique<T>(arg));
+}
+```
+
+### Value Initializer List Constructor
+
+```cpp
+tag_list(std::initializer_list<value> init);
+```
+
+Constructs a list from `value` objects. All values must be the same type, or an exception is thrown.
+
+Implementation:
+```cpp
+tag_list::tag_list(std::initializer_list<value> init)
+{
+ if (init.size() == 0)
+ el_type_ = tag_type::Null;
+ else {
+ el_type_ = init.begin()->get_type();
+ for (const value& val : init) {
+ if (!val || val.get_type() != el_type_)
+ throw std::invalid_argument(
+ "The values are not all the same type");
+ }
+ tags.assign(init.begin(), init.end());
+ }
+}
+```
+
+### Static of<T>() Factory
+
+```cpp
+template <class T> static tag_list of(std::initializer_list<T> init);
+```
+
+Creates a list with elements of type `T`, where each element is constructed from the corresponding value in the initializer list. Most commonly used for creating lists of compounds:
+
+```cpp
+auto list = tag_list::of<tag_compound>({
+ {{"name", "Item 1"}, {"count", int32_t(64)}},
+ {{"name", "Item 2"}, {"count", int32_t(32)}}
+});
+
+auto shorts = tag_list::of<tag_short>({100, 200, 300});
+auto bytes = tag_list::of<tag_byte>({1, 2, 3, 4, 5});
+```
+
+Implementation:
+```cpp
+template <class T> tag_list tag_list::of(std::initializer_list<T> il)
+{
+ tag_list result;
+ result.init<T, T>(il);
+ return result;
+}
+```
+
+---
+
+## Type Enforcement
+
+`tag_list` enforces type homogeneity at runtime. Every operation that modifies the list checks that the new element matches the list's content type.
+
+### How Type Enforcement Works
+
+1. **Empty lists** have `el_type_ == tag_type::Null` (undetermined)
+2. When the **first element** is added, `el_type_` is set to that element's type
+3. Subsequent additions must have the **same type** or `std::invalid_argument` is thrown
+4. `clear()` preserves the content type; `reset()` clears and optionally changes it
+
+### Example
+
+```cpp
+tag_list list; // el_type_ == tag_type::Null
+
+list.push_back(int32_t(42)); // el_type_ becomes tag_type::Int
+list.push_back(int32_t(99)); // OK: same type
+
+list.push_back(int16_t(5)); // throws std::invalid_argument
+// "The tag type does not match the list's content type"
+
+list.push_back(std::string("hello")); // throws std::invalid_argument
+```
+
+---
+
+## Element Access
+
+### operator[] — Unchecked Access
+
+```cpp
+value& operator[](size_t i) { return tags[i]; }
+const value& operator[](size_t i) const { return tags[i]; }
+```
+
+No bounds checking. Behavior is undefined if `i >= size()`.
+
+### at() — Bounds-Checked Access
+
+```cpp
+value& at(size_t i);
+const value& at(size_t i) const;
+```
+
+Throws `std::out_of_range` if `i >= size()`.
+
+```cpp
+tag_list list{1, 2, 3};
+
+value& first = list[0]; // tag_int(1)
+value& second = list.at(1); // tag_int(2), bounds-checked
+
+list.at(10); // throws std::out_of_range
+```
+
+### Accessing the Contained Tag
+
+Since each element is a `value`, you can access the underlying tag:
+
+```cpp
+tag_list list{1, 2, 3};
+
+// Via value's conversion operators
+int32_t val = static_cast<int32_t>(list[0]);
+
+// Via as<T>()
+tag_int& tag = list[0].as<tag_int>();
+int32_t raw = tag.get();
+
+// Via tag reference
+const tag& t = list[0].get();
+```
+
+---
+
+## Modification
+
+### push_back()
+
+```cpp
+void push_back(value_initializer&& val);
+```
+
+Appends a tag to the end of the list. If the list's type is undetermined, sets it. If the type mismatches, throws `std::invalid_argument`. Null values are rejected.
+
+```cpp
+tag_list list;
+list.push_back(int32_t(1)); // list is now type Int
+list.push_back(int32_t(2)); // OK
+list.push_back(int16_t(3)); // throws: Short != Int
+```
+
+Implementation:
+```cpp
+void tag_list::push_back(value_initializer&& val)
+{
+ if (!val)
+ throw std::invalid_argument("The value must not be null");
+ if (el_type_ == tag_type::Null)
+ el_type_ = val.get_type();
+ else if (el_type_ != val.get_type())
+ throw std::invalid_argument(
+ "The tag type does not match the list's content type");
+ tags.push_back(std::move(val));
+}
+```
+
+### emplace_back()
+
+```cpp
+template <class T, class... Args> void emplace_back(Args&&... args);
+```
+
+Constructs a tag of type `T` in-place at the end of the list. Type checking is performed against `T::type`.
+
+```cpp
+tag_list list;
+list.emplace_back<tag_int>(42);
+list.emplace_back<tag_int>(99);
+list.emplace_back<tag_short>(5); // throws: Short != Int
+```
+
+Implementation:
+```cpp
+template <class T, class... Args>
+void tag_list::emplace_back(Args&&... args)
+{
+ if (el_type_ == tag_type::Null)
+ el_type_ = T::type;
+ else if (el_type_ != T::type)
+ throw std::invalid_argument(
+ "The tag type does not match the list's content type");
+ tags.emplace_back(make_unique<T>(std::forward<Args>(args)...));
+}
+```
+
+### set()
+
+```cpp
+void set(size_t i, value&& val);
+```
+
+Replaces the element at index `i`. Type checking is enforced — the new value must match `el_type_`. Throws `std::out_of_range` if the index is invalid.
+
+```cpp
+tag_list list{1, 2, 3};
+list.set(1, value(tag_int(99))); // list is now {1, 99, 3}
+```
+
+Implementation:
+```cpp
+void tag_list::set(size_t i, value&& val)
+{
+ if (val.get_type() != el_type_)
+ throw std::invalid_argument(
+ "The tag type does not match the list's content type");
+ tags.at(i) = std::move(val);
+}
+```
+
+### pop_back()
+
+```cpp
+void pop_back() { tags.pop_back(); }
+```
+
+Removes the last element. Does **not** change `el_type_`, even if the list becomes empty.
+
+### clear()
+
+```cpp
+void clear() { tags.clear(); }
+```
+
+Removes all elements. **Preserves** the content type.
+
+### reset()
+
+```cpp
+void reset(tag_type type = tag_type::Null);
+```
+
+Clears all elements **and** sets the content type. Defaults to `tag_type::Null` (undetermined).
+
+```cpp
+tag_list list{1, 2, 3}; // type: Int
+list.reset(); // empty, type: Null (undetermined)
+list.reset(tag_type::String); // empty, type: String
+```
+
+---
+
+## Content Type Query
+
+```cpp
+tag_type el_type() const { return el_type_; }
+```
+
+Returns the content type of the list:
+- A specific `tag_type` if determined
+- `tag_type::Null` if undetermined
+
+```cpp
+tag_list list;
+list.el_type(); // tag_type::Null
+
+tag_list ints{1, 2, 3};
+ints.el_type(); // tag_type::Int
+
+tag_list typed(tag_type::String);
+typed.el_type(); // tag_type::String
+```
+
+---
+
+## Iteration
+
+`tag_list` provides full random-access iterator support over `value` elements:
+
+```cpp
+iterator begin(); iterator end();
+const_iterator begin() const; const_iterator end() const;
+const_iterator cbegin() const; const_iterator cend() const;
+```
+
+### Iteration Examples
+
+```cpp
+tag_list list{10, 20, 30, 40, 50};
+
+// Range-based for
+for (const auto& val : list) {
+ int32_t num = static_cast<int32_t>(val);
+ std::cout << num << " ";
+}
+// Output: 10 20 30 40 50
+
+// Index-based
+for (size_t i = 0; i < list.size(); ++i) {
+ std::cout << static_cast<int32_t>(list[i]) << " ";
+}
+
+// Iterator-based
+for (auto it = list.begin(); it != list.end(); ++it) {
+ tag& t = it->get();
+ // Process tag...
+}
+```
+
+---
+
+## Nested Access
+
+The `value` class delegates index-based access to `tag_list` when the held tag is a list. This enables chained access from compounds:
+
+```cpp
+tag_compound root{
+ {"items", tag_list::of<tag_compound>({
+ {{"id", "sword"}, {"damage", int16_t(50)}},
+ {{"id", "shield"}, {"damage", int16_t(100)}}
+ })}
+};
+
+// Access list element from compound
+value& firstItem = root["items"][0];
+std::string id = static_cast<std::string>(firstItem["id"]); // "sword"
+
+// Bounds-checked
+root["items"].at(99); // throws std::out_of_range
+```
+
+The delegation in `value`:
+```cpp
+value& value::operator[](size_t i)
+{
+ return dynamic_cast<tag_list&>(*tag_)[i];
+}
+
+value& value::at(size_t i)
+{
+ return dynamic_cast<tag_list&>(*tag_).at(i);
+}
+```
+
+---
+
+## Binary Format
+
+### Reading (Deserialization)
+
+A list tag's payload is:
+
+```
+[element type byte] [length (4 bytes, signed)] [element payloads...]
+```
+
+Implementation:
+```cpp
+void tag_list::read_payload(io::stream_reader& reader)
+{
+ tag_type lt = reader.read_type(true);
+
+ int32_t length;
+ reader.read_num(length);
+ if (length < 0)
+ reader.get_istr().setstate(std::ios::failbit);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading length of tag_list");
+
+ if (lt != tag_type::End) {
+ reset(lt);
+ tags.reserve(length);
+ for (int32_t i = 0; i < length; ++i)
+ tags.emplace_back(reader.read_payload(lt));
+ } else {
+ // tag_end type: leave type undetermined
+ reset(tag_type::Null);
+ }
+}
+```
+
+Key behaviors:
+- Element type `End` (0) means an empty list with undetermined type — the length is ignored
+- Negative length sets failbit and throws `io::input_error`
+- Each element is read as a payload-only tag (no type byte or name)
+
+### Writing (Serialization)
+
+```cpp
+void tag_list::write_payload(io::stream_writer& writer) const
+{
+ if (size() > io::stream_writer::max_array_len) {
+ writer.get_ostr().setstate(std::ios::failbit);
+ throw std::length_error("List is too large for NBT");
+ }
+ writer.write_type(el_type_ != tag_type::Null ? el_type_ : tag_type::End);
+ writer.write_num(static_cast<int32_t>(size()));
+ for (const auto& val : tags) {
+ if (val.get_type() != el_type_) {
+ writer.get_ostr().setstate(std::ios::failbit);
+ throw std::logic_error(
+ "The tags in the list do not all match the content type");
+ }
+ writer.write_payload(val);
+ }
+}
+```
+
+Key behaviors:
+- Undetermined type (`Null`) is written as `End` (0)
+- An additional consistency check verifies all elements match `el_type_` during write
+- Lists exceeding `INT32_MAX` elements throw `std::length_error`
+
+---
+
+## Equality Comparison
+
+Two lists are equal if they have the same element type **and** the same elements:
+
+```cpp
+bool operator==(const tag_list& lhs, const tag_list& rhs)
+{
+ return lhs.el_type_ == rhs.el_type_ && lhs.tags == rhs.tags;
+}
+```
+
+This means:
+- An empty list of `tag_type::Int` is **not** equal to an empty list of `tag_type::String`
+- An empty list with undetermined type **is** equal to another undetermined empty list
+
+---
+
+## Common Usage Patterns
+
+### Creating a List of Compounds (Inventory Example)
+
+```cpp
+tag_list inventory = tag_list::of<tag_compound>({
+ {{"Slot", int8_t(0)}, {"id", "minecraft:diamond_sword"}, {"Count", int8_t(1)}},
+ {{"Slot", int8_t(1)}, {"id", "minecraft:torch"}, {"Count", int8_t(64)}},
+ {{"Slot", int8_t(2)}, {"id", "minecraft:apple"}, {"Count", int8_t(16)}}
+});
+```
+
+### Building a List Dynamically
+
+```cpp
+tag_list positions;
+for (const auto& pos : player_positions) {
+ positions.push_back(tag_compound{
+ {"x", pos.x},
+ {"y", pos.y},
+ {"z", pos.z}
+ });
+}
+```
+
+### Processing a List of Compounds
+
+```cpp
+tag_list& items = root->at("Items").as<tag_list>();
+for (size_t i = 0; i < items.size(); ++i) {
+ auto& item = items[i].as<tag_compound>();
+ std::string id = static_cast<std::string>(item.at("id"));
+ int8_t count = static_cast<int8_t>(item.at("Count"));
+ std::cout << id << " x" << (int)count << "\n";
+}
+```
+
+### Nested Lists
+
+```cpp
+tag_list outer = tag_list::of<tag_list>({
+ tag_list{1, 2, 3}, // Inner list of Int
+ tag_list{4, 5, 6} // Inner list of Int
+});
+
+// Access: outer[0] → value wrapping tag_list{1, 2, 3}
+// outer[0].as<tag_list>()[1] → tag_int(2)
+```
+
+### Converting Between List and Vector
+
+```cpp
+// List to vector
+tag_list list{1, 2, 3, 4, 5};
+std::vector<int32_t> vec;
+for (const auto& val : list) {
+ vec.push_back(static_cast<int32_t>(val));
+}
+
+// Vector to list
+tag_list result;
+for (int32_t v : vec) {
+ result.push_back(v);
+}
+```
+
+---
+
+## Edge Cases
+
+### Empty Lists
+
+```cpp
+tag_list empty1; // el_type_ == Null
+tag_list empty2(tag_type::Int); // el_type_ == Int, size == 0
+tag_list empty3(tag_type::Null); // Same as default constructor
+
+// Read from NBT: a list with type End and length 0
+// → el_type_ = Null (undetermined)
+```
+
+### Clearing vs. Resetting
+
+```cpp
+tag_list list{1, 2, 3}; // el_type_ = Int
+
+list.clear(); // size = 0, el_type_ = Int (preserved!)
+list.push_back(int32_t(4)); // OK: type still Int
+
+list.reset(); // size = 0, el_type_ = Null
+list.push_back("hello"); // OK: type becomes String
+```
+
+### Type Mismatch Prevention
+
+```cpp
+tag_list list{1, 2, 3};
+
+// These all throw std::invalid_argument:
+list.push_back(int16_t(4)); // Short != Int
+list.push_back("hello"); // String != Int
+list.push_back(tag_compound{{"a", 1}}); // Compound != Int
+list.set(0, value(tag_short(5))); // Short != Int
+list.emplace_back<tag_short>(5); // Short != Int
+```
diff --git a/docs/handbook/libnbtplusplus/overview.md b/docs/handbook/libnbtplusplus/overview.md
new file mode 100644
index 0000000000..b2144a2bdc
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/overview.md
@@ -0,0 +1,422 @@
+# libnbt++ Overview
+
+## What is libnbt++?
+
+libnbt++ is a free C++ library for reading, writing, and manipulating Minecraft's **Named Binary Tag (NBT)** file format. It provides a modern C++11 interface for working with NBT data, supporting both compressed and uncompressed files, big-endian (Java Edition) and little-endian (Bedrock/Pocket Edition) byte orders, and full tag hierarchy manipulation.
+
+The library lives under the `nbt` namespace and provides strongly-typed tag classes that mirror the NBT specification exactly. It was originally created by ljfa-ag and is licensed under the GNU Lesser General Public License v3.0 (LGPL-3.0-or-later).
+
+libnbt++3 is a complete rewrite of the older libnbt++2, designed to eliminate boilerplate code and provide a more natural C++ syntax for NBT operations.
+
+---
+
+## The NBT Format
+
+NBT (Named Binary Tag) is a binary serialization format invented by Markus "Notch" Persson for Minecraft. It is used throughout the game to store:
+
+- World save data (level.dat)
+- Chunk data (region files)
+- Player inventories and statistics
+- Structure files
+- Server configuration
+
+### Binary Structure
+
+An NBT file consists of a single named root tag, which is always a **Compound** tag. The binary layout is:
+
+```
+[tag type byte] [name length (2 bytes, big-endian)] [name (UTF-8)] [payload]
+```
+
+Each tag type has a specific format for its payload, and compound/list tags recursively contain other tags.
+
+### Compression
+
+NBT files in Minecraft are typically compressed with either **gzip** (most common for `.dat` files) or **zlib/deflate** (used in chunk data within region files). libnbt++ supports both through its optional zlib integration.
+
+---
+
+## Tag Types
+
+The NBT format defines 13 tag types, represented in libnbt++ by the `tag_type` enum class defined in `include/tag.h`:
+
+```cpp
+enum class tag_type : int8_t {
+ End = 0, // Marks the end of a compound tag
+ Byte = 1, // Signed 8-bit integer
+ Short = 2, // Signed 16-bit integer
+ Int = 3, // Signed 32-bit integer
+ Long = 4, // Signed 64-bit integer
+ Float = 5, // 32-bit IEEE 754 floating point
+ Double = 6, // 64-bit IEEE 754 floating point
+ Byte_Array = 7, // Array of signed bytes
+ String = 8, // UTF-8 string (max 65535 bytes)
+ List = 9, // Ordered list of unnamed tags (same type)
+ Compound = 10, // Collection of named tags (any type)
+ Int_Array = 11, // Array of signed 32-bit integers
+ Long_Array = 12, // Array of signed 64-bit integers
+ Null = -1 // Internal: denotes empty value objects
+};
+```
+
+The `Null` type (value -1) is an internal sentinel used by libnbt++ to represent uninitialized `value` objects; it does not appear in the NBT specification.
+
+The `End` type (value 0) is only valid within compound tags to mark their end; it is never used as a standalone tag.
+
+### Tag Type Validation
+
+The function `is_valid_type()` checks whether an integer value is a valid tag type:
+
+```cpp
+bool is_valid_type(int type, bool allow_end = false);
+```
+
+It returns `true` when `type` falls between 1 and 12 (inclusive), or between 0 and 12 if `allow_end` is `true`.
+
+---
+
+## C++ Tag Classes
+
+Each NBT tag type maps to a concrete C++ class in the `nbt` namespace. The classes are organized using templates for related types:
+
+| NBT Type | ID | C++ Class | Underlying Type | Header |
+|-------------|----|--------------------|------------------------|--------------------|
+| Byte | 1 | `tag_byte` | `tag_primitive<int8_t>` | `tag_primitive.h` |
+| Short | 2 | `tag_short` | `tag_primitive<int16_t>`| `tag_primitive.h` |
+| Int | 3 | `tag_int` | `tag_primitive<int32_t>`| `tag_primitive.h` |
+| Long | 4 | `tag_long` | `tag_primitive<int64_t>`| `tag_primitive.h` |
+| Float | 5 | `tag_float` | `tag_primitive<float>` | `tag_primitive.h` |
+| Double | 6 | `tag_double` | `tag_primitive<double>` | `tag_primitive.h` |
+| Byte_Array | 7 | `tag_byte_array` | `tag_array<int8_t>` | `tag_array.h` |
+| String | 8 | `tag_string` | `tag_string` | `tag_string.h` |
+| List | 9 | `tag_list` | `tag_list` | `tag_list.h` |
+| Compound | 10 | `tag_compound` | `tag_compound` | `tag_compound.h` |
+| Int_Array | 11 | `tag_int_array` | `tag_array<int32_t>` | `tag_array.h` |
+| Long_Array | 12 | `tag_long_array` | `tag_array<int64_t>` | `tag_array.h` |
+
+The typedef names (`tag_byte`, `tag_short`, etc.) are the intended public API. The underlying template classes (`tag_primitive<T>`, `tag_array<T>`) should not be used directly.
+
+---
+
+## Library Features
+
+### Modern C++11 Design
+
+- **Move semantics**: Tags support move construction and move assignment for efficient transfers
+- **Smart pointers**: `std::unique_ptr<tag>` is used throughout for ownership management
+- **Initializer lists**: Compounds and lists can be constructed with brace-enclosed initializer lists
+- **Type-safe conversions**: The `value` class provides explicit conversions with `std::bad_cast` on type mismatch
+- **Templates**: `tag_primitive<T>` and `tag_array<T>` use templates to avoid code duplication
+
+### Convenient Syntax
+
+Creating complex NBT structures is straightforward:
+
+```cpp
+#include <nbt_tags.h>
+
+nbt::tag_compound root{
+ {"playerName", "Steve"},
+ {"health", int16_t(20)},
+ {"position", nbt::tag_list{1.0, 64.5, -3.2}},
+ {"inventory", nbt::tag_list::of<nbt::tag_compound>({
+ {{"id", "minecraft:diamond_sword"}, {"count", int8_t(1)}},
+ {{"id", "minecraft:apple"}, {"count", int8_t(64)}}
+ })},
+ {"scores", nbt::tag_int_array{100, 250, 380}}
+};
+```
+
+### The value Class
+
+The `value` class (`include/value.h`) acts as a type-erased wrapper around `std::unique_ptr<tag>`. It enables:
+
+- Implicit numeric conversions (widening only): `int8_t` → `int16_t` → `int32_t` → `int64_t` → `float` → `double`
+- Direct string assignment
+- Subscript access: `compound["key"]` for compounds, `list[index]` for lists
+- Chained access: `root["nested"]["deep"]["value"]`
+
+### I/O System
+
+Reading and writing NBT data uses the stream-based API:
+
+```cpp
+#include <nbt_tags.h>
+#include <io/stream_reader.h>
+#include <io/stream_writer.h>
+#include <fstream>
+
+// Reading
+std::ifstream file("level.dat", std::ios::binary);
+auto [name, compound] = nbt::io::read_compound(file);
+
+// Writing
+std::ofstream out("output.nbt", std::ios::binary);
+nbt::io::write_tag("Level", *compound, out);
+```
+
+The I/O system supports both big-endian (Java Edition default) and little-endian (Bedrock Edition) byte orders via the `endian::endian` enum:
+
+```cpp
+// Reading Bedrock Edition data
+auto pair = nbt::io::read_compound(file, endian::little);
+```
+
+### Zlib Compression Support
+
+When built with `NBT_USE_ZLIB=ON` (the default), the library provides stream wrappers for transparent compression/decompression:
+
+```cpp
+#include <io/izlibstream.h>
+#include <io/ozlibstream.h>
+
+// Reading gzip-compressed NBT
+std::ifstream gzfile("level.dat", std::ios::binary);
+zlib::izlibstream decompressed(gzfile);
+auto pair = nbt::io::read_compound(decompressed);
+
+// Writing gzip-compressed NBT
+std::ofstream outfile("output.dat", std::ios::binary);
+zlib::ozlibstream compressed(outfile, Z_DEFAULT_COMPRESSION, true /* gzip */);
+nbt::io::write_tag("Level", root, compressed);
+compressed.close();
+```
+
+### Visitor Pattern
+
+The library implements the Visitor pattern through `nbt_visitor` and `const_nbt_visitor` base classes, with 12 overloads (one per concrete tag type). The JSON formatter (`text::json_formatter`) is an example of a visitor that pretty-prints tag trees for debugging.
+
+### Polymorphic Operations
+
+All tag classes support:
+
+- **`clone()`** — Deep-copies the tag, returning `std::unique_ptr<tag>`
+- **`move_clone()`** — Moves the tag into a new `unique_ptr`
+- **`assign(tag&&)`** — Move-assigns from another tag of the same type
+- **`get_type()`** — Returns the `tag_type` enum value
+- **`operator==` / `operator!=`** — Deep equality comparison
+- **`operator<<`** — JSON-like formatted output via `text::json_formatter`
+
+### Factory Construction
+
+Tags can be constructed dynamically by type:
+
+```cpp
+auto t = nbt::tag::create(nbt::tag_type::Int); // Default-constructed tag_int(0)
+auto t = nbt::tag::create(nbt::tag_type::Float, 3.14f); // Numeric tag_float(3.14)
+```
+
+---
+
+## Namespace Organization
+
+| Namespace | Contents |
+|----------------|-------------------------------------------------------------|
+| `nbt` | All tag classes, `value`, `value_initializer`, helpers |
+| `nbt::detail` | CRTP base class, primitive/array type traits (internal) |
+| `nbt::io` | `stream_reader`, `stream_writer`, free functions |
+| `nbt::text` | `json_formatter` for pretty-printing |
+| `endian` | Endianness-aware binary read/write functions |
+| `zlib` | zlib stream wrappers (`izlibstream`, `ozlibstream`) |
+
+---
+
+## File Organization
+
+### Public Headers (`include/`)
+
+| File | Purpose |
+|--------------------------|-------------------------------------------------------------|
+| `tag.h` | `tag` base class, `tag_type` enum, `is_valid_type()` |
+| `tagfwd.h` | Forward declarations for all tag classes |
+| `nbt_tags.h` | Convenience header — includes all tag headers |
+| `tag_primitive.h` | `tag_primitive<T>` template and `tag_byte`..`tag_double` typedefs |
+| `tag_string.h` | `tag_string` class |
+| `tag_array.h` | `tag_array<T>` template and `tag_byte_array`..`tag_long_array` |
+| `tag_list.h` | `tag_list` class |
+| `tag_compound.h` | `tag_compound` class |
+| `value.h` | `value` type-erased tag wrapper |
+| `value_initializer.h` | `value_initializer` — implicit conversions for function params |
+| `crtp_tag.h` | CRTP base template implementing polymorphic dispatch |
+| `primitive_detail.h` | Type traits mapping C++ types to `tag_type` values |
+| `nbt_visitor.h` | `nbt_visitor` and `const_nbt_visitor` base classes |
+| `endian_str.h` | Endianness-aware binary I/O functions |
+| `make_unique.h` | `nbt::make_unique<T>()` helper (C++11 polyfill) |
+| `io/stream_reader.h` | `stream_reader` class and `read_compound()`/`read_tag()` |
+| `io/stream_writer.h` | `stream_writer` class and `write_tag()` |
+| `io/izlibstream.h` | `izlibstream` for decompression (requires zlib) |
+| `io/ozlibstream.h` | `ozlibstream` for compression (requires zlib) |
+| `io/zlib_streambuf.h` | `zlib_streambuf` base class, `zlib_error` exception |
+| `text/json_formatter.h` | `json_formatter` for pretty-printing tags |
+
+### Source Files (`src/`)
+
+| File | Purpose |
+|---------------------------|------------------------------------------|
+| `tag.cpp` | `tag` methods, `tag_primitive` explicit instantiations, operators |
+| `tag_compound.cpp` | `tag_compound` methods, binary I/O |
+| `tag_list.cpp` | `tag_list` methods, initializer lists, binary I/O |
+| `tag_string.cpp` | `tag_string` read/write payload |
+| `value.cpp` | `value` assignment operators, conversions |
+| `value_initializer.cpp` | `value_initializer` constructors |
+| `endian_str.cpp` | Big/little endian read/write implementations |
+| `io/stream_reader.cpp` | `stream_reader` methods, format parsing |
+| `io/stream_writer.cpp` | `stream_writer` methods, format output |
+| `io/izlibstream.cpp` | `inflate_streambuf` implementation |
+| `io/ozlibstream.cpp` | `deflate_streambuf` implementation |
+| `text/json_formatter.cpp` | `json_formatter` visitor implementation |
+
+---
+
+## Quick Start Examples
+
+### Reading an NBT File
+
+```cpp
+#include <nbt_tags.h>
+#include <io/stream_reader.h>
+#include <fstream>
+#include <iostream>
+
+int main() {
+ std::ifstream file("level.dat", std::ios::binary);
+ if (!file) return 1;
+
+ auto [name, root] = nbt::io::read_compound(file);
+ std::cout << "Root tag: " << name << "\n";
+ std::cout << *root << std::endl; // JSON-formatted output
+
+ return 0;
+}
+```
+
+### Reading a Compressed File
+
+```cpp
+#include <nbt_tags.h>
+#include <io/stream_reader.h>
+#include <io/izlibstream.h>
+#include <fstream>
+
+int main() {
+ std::ifstream file("level.dat", std::ios::binary);
+ zlib::izlibstream decompressed(file); // Auto-detects gzip/zlib
+ auto [name, root] = nbt::io::read_compound(decompressed);
+ return 0;
+}
+```
+
+### Creating and Writing NBT Data
+
+```cpp
+#include <nbt_tags.h>
+#include <io/stream_writer.h>
+#include <fstream>
+
+int main() {
+ nbt::tag_compound data{
+ {"name", "World1"},
+ {"seed", int64_t(123456789)},
+ {"spawnX", int32_t(0)},
+ {"spawnY", int32_t(64)},
+ {"spawnZ", int32_t(0)},
+ {"gameType", int32_t(0)},
+ {"raining", int8_t(0)},
+ {"version", nbt::tag_compound{
+ {"id", int32_t(19133)},
+ {"name", "1.20.4"},
+ {"snapshot", int8_t(0)}
+ }}
+ };
+
+ std::ofstream out("output.nbt", std::ios::binary);
+ nbt::io::write_tag("", data, out);
+ return 0;
+}
+```
+
+### Modifying Existing Data
+
+```cpp
+auto [name, root] = nbt::io::read_compound(file);
+
+// Modify values using operator[]
+(*root)["playerName"] = std::string("Alex");
+(*root)["health"] = int16_t(20);
+
+// Add a new nested compound
+root->put("newSection", nbt::tag_compound{
+ {"key1", int32_t(42)},
+ {"key2", "hello"}
+});
+
+// Remove a tag
+root->erase("oldSection");
+
+// Check if a key exists
+if (root->has_key("inventory", nbt::tag_type::List)) {
+ auto& inv = root->at("inventory").as<nbt::tag_list>();
+ inv.push_back(nbt::tag_compound{{"id", "minecraft:stone"}, {"count", int8_t(1)}});
+}
+```
+
+### Iterating Over Tags
+
+```cpp
+// Iterating a compound
+for (const auto& [key, val] : *root) {
+ std::cout << key << ": type=" << val.get_type() << "\n";
+}
+
+// Iterating a list
+auto& list = root->at("items").as<nbt::tag_list>();
+for (size_t i = 0; i < list.size(); ++i) {
+ std::cout << "Item " << i << ": " << list[i].get() << "\n";
+}
+```
+
+---
+
+## Error Handling
+
+libnbt++ uses exceptions for error reporting:
+
+| Exception | Thrown When |
+|------------------------|----------------------------------------------------------|
+| `nbt::io::input_error` | Read failure: invalid tag type, unexpected EOF, corruption |
+| `std::bad_cast` | Type mismatch in `value` conversions or `tag::assign()` |
+| `std::out_of_range` | Invalid key in `tag_compound::at()` or index in `tag_list::at()` |
+| `std::invalid_argument`| Invalid tag type to `tag::create()`, type mismatch in list operations |
+| `std::length_error` | String > 65535 bytes, array > INT32_MAX elements |
+| `zlib::zlib_error` | zlib decompression/compression failure |
+| `std::bad_alloc` | zlib memory allocation failure |
+
+Stream state flags (`failbit`, `badbit`) are also set on the underlying `std::istream`/`std::ostream` when errors occur.
+
+---
+
+## Thread Safety
+
+libnbt++ provides no thread safety guarantees beyond those of the C++ standard library. Tag objects should not be accessed concurrently from multiple threads without external synchronization. Reading from separate `stream_reader` instances using independent streams is safe.
+
+---
+
+## Platform Requirements
+
+- C++11 compatible compiler (GCC 4.8+, Clang 3.3+, MSVC 2015+)
+- CMake 3.15 or later
+- zlib (optional, for compressed NBT support)
+- IEEE 754 floating point (enforced via `static_assert`)
+- 8-bit bytes (enforced via `static_assert` on `CHAR_BIT`)
+
+The library uses `memcpy`-based type punning (not `reinterpret_cast`) for float/double endian conversions, ensuring defined behavior across compilers.
+
+---
+
+## License
+
+libnbt++ is licensed under the **GNU Lesser General Public License v3.0 or later** (LGPL-3.0-or-later). This means:
+
+- You can link against libnbt++ from proprietary software
+- Modifications to libnbt++ itself must be released under LGPL
+- The full license text is in `COPYING` and `COPYING.LESSER`
diff --git a/docs/handbook/libnbtplusplus/tag-system.md b/docs/handbook/libnbtplusplus/tag-system.md
new file mode 100644
index 0000000000..a467ddaf78
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/tag-system.md
@@ -0,0 +1,643 @@
+# Tag System
+
+## Overview
+
+The tag system is the core of libnbt++. It provides a polymorphic class hierarchy where every NBT tag type maps to a concrete C++ class. All classes share the `tag` abstract base class and use the CRTP pattern via `detail::crtp_tag<Sub>` to implement common operations without repetitive boilerplate.
+
+---
+
+## The tag_type Enum
+
+Defined in `include/tag.h`, `tag_type` is a strongly-typed enum representing every NBT tag type:
+
+```cpp
+enum class tag_type : int8_t {
+ End = 0,
+ Byte = 1,
+ Short = 2,
+ Int = 3,
+ Long = 4,
+ Float = 5,
+ Double = 6,
+ Byte_Array = 7,
+ String = 8,
+ List = 9,
+ Compound = 10,
+ Int_Array = 11,
+ Long_Array = 12,
+ Null = -1 ///< Used to denote empty value objects
+};
+```
+
+### Type Validation
+
+```cpp
+bool is_valid_type(int type, bool allow_end = false);
+```
+
+Returns `true` when `type` is between 1 and 12 (inclusive), or between 0 and 12 if `allow_end` is `true`. The `End` type (0) and `Null` type (-1) are not valid for standalone tags.
+
+### Type Output Operator
+
+```cpp
+std::ostream& operator<<(std::ostream& os, tag_type tt);
+```
+
+Outputs human-readable names: `"byte"`, `"short"`, `"int"`, `"long"`, `"float"`, `"double"`, `"byte_array"`, `"string"`, `"list"`, `"compound"`, `"int_array"`, `"long_array"`, `"end"`, `"null"`, or `"invalid"`.
+
+---
+
+## The tag Base Class
+
+Defined in `include/tag.h`, `tag` is the abstract base class for all NBT tags. It declares the interface that all concrete tag classes must implement:
+
+### Pure Virtual Methods
+
+```cpp
+virtual tag_type get_type() const noexcept = 0; // Returns the tag type
+virtual std::unique_ptr<tag> clone() const& = 0; // Deep-copies the tag
+virtual std::unique_ptr<tag> move_clone() && = 0; // Move-constructs a copy
+virtual tag& assign(tag&& rhs) = 0; // Move-assigns same-type tag
+virtual void accept(nbt_visitor& visitor) = 0; // Visitor pattern (mutable)
+virtual void accept(const_nbt_visitor& visitor) const = 0; // Visitor pattern (const)
+virtual void read_payload(io::stream_reader& reader) = 0; // Deserialize from stream
+virtual void write_payload(io::stream_writer& writer) const = 0; // Serialize to stream
+```
+
+### Non-Virtual Methods
+
+```cpp
+std::unique_ptr<tag> clone() &&; // Rvalue overload: delegates to move_clone()
+```
+
+### Template Methods
+
+```cpp
+template <class T> T& as();
+template <class T> const T& as() const;
+```
+
+Downcasts `*this` to `T&` using `dynamic_cast`. Requires `T` to be a subclass of `tag` (enforced by `static_assert`). Throws `std::bad_cast` if the tag is not of type `T`.
+
+```cpp
+// Usage:
+tag& t = /* some tag */;
+tag_string& s = t.as<tag_string>(); // OK if t is a tag_string
+int32_t val = t.as<tag_int>().get(); // OK if t is a tag_int
+```
+
+### Factory Methods
+
+These static methods construct tags at runtime by `tag_type`:
+
+```cpp
+static std::unique_ptr<tag> create(tag_type type); // Default-construct
+```
+
+Creates a new tag with default values:
+- Numeric types: value = 0
+- String: empty string
+- Arrays: empty vector
+- List: empty list (undetermined type)
+- Compound: empty compound
+
+Throws `std::invalid_argument` for `tag_type::End`, `tag_type::Null`, or invalid values.
+
+```cpp
+static std::unique_ptr<tag> create(tag_type type, int8_t val);
+static std::unique_ptr<tag> create(tag_type type, int16_t val);
+static std::unique_ptr<tag> create(tag_type type, int32_t val);
+static std::unique_ptr<tag> create(tag_type type, int64_t val);
+static std::unique_ptr<tag> create(tag_type type, float val);
+static std::unique_ptr<tag> create(tag_type type, double val);
+```
+
+Creates a numeric tag with the specified value. The value is cast to the appropriate type. Throws `std::invalid_argument` if `type` is not a numeric type (Byte through Double).
+
+```cpp
+auto t = tag::create(tag_type::Int); // tag_int(0)
+auto t = tag::create(tag_type::Float, 3.14f); // tag_float(3.14)
+auto t = tag::create(tag_type::Byte, 42); // tag_byte(42), value cast to int8_t
+```
+
+### Equality Operators
+
+```cpp
+friend bool operator==(const tag& lhs, const tag& rhs);
+friend bool operator!=(const tag& lhs, const tag& rhs);
+```
+
+The `operator==` implementation first checks `typeid(lhs) != typeid(rhs)` — if the RTTI types differ, tags are unequal. If types match, it delegates to the private virtual `equals()` method, which each concrete class implements via the CRTP.
+
+### Output Operator
+
+```cpp
+std::ostream& operator<<(std::ostream& os, const tag& t);
+```
+
+Uses `text::json_formatter` to produce JSON-like output. Created as a `static const` in `src/tag.cpp`.
+
+---
+
+## Concrete Tag Classes
+
+### tag_byte / tag_short / tag_int / tag_long / tag_float / tag_double
+
+These are all instantiations of `tag_primitive<T>`, defined in `include/tag_primitive.h`:
+
+```cpp
+template <class T>
+class tag_primitive final : public detail::crtp_tag<tag_primitive<T>>
+{
+public:
+ typedef T value_type;
+ static constexpr tag_type type = detail::get_primitive_type<T>::value;
+
+ constexpr tag_primitive(T val = 0) noexcept : value(val) {}
+
+ // Implicit conversion to/from T
+ operator T&();
+ constexpr operator T() const;
+ constexpr T get() const { return value; }
+
+ tag_primitive& operator=(T val) { value = val; return *this; }
+ void set(T val) { value = val; }
+
+ void read_payload(io::stream_reader& reader) override;
+ void write_payload(io::stream_writer& writer) const override;
+
+private:
+ T value;
+};
+```
+
+#### Type Mapping
+
+| Typedef | Template | `type` constant | C++ Type | NBT Size |
+|--------------|--------------------------|----------------------|------------|----------|
+| `tag_byte` | `tag_primitive<int8_t>` | `tag_type::Byte` | `int8_t` | 1 byte |
+| `tag_short` | `tag_primitive<int16_t>` | `tag_type::Short` | `int16_t` | 2 bytes |
+| `tag_int` | `tag_primitive<int32_t>` | `tag_type::Int` | `int32_t` | 4 bytes |
+| `tag_long` | `tag_primitive<int64_t>` | `tag_type::Long` | `int64_t` | 8 bytes |
+| `tag_float` | `tag_primitive<float>` | `tag_type::Float` | `float` | 4 bytes |
+| `tag_double` | `tag_primitive<double>` | `tag_type::Double` | `double` | 8 bytes |
+
+#### Implicit Conversions
+
+`tag_primitive<T>` provides implicit conversion operators. This allows natural C++ usage:
+
+```cpp
+tag_int myInt(42);
+int val = myInt; // Implicit conversion: constexpr operator T() const
+int& ref = myInt; // Mutable reference: operator T&()
+ref = 100; // Modifies the tag's value
+myInt = 200; // Uses operator=(T val)
+```
+
+#### Binary I/O
+
+Reading and writing are implemented inline in the header:
+
+```cpp
+template <class T>
+void tag_primitive<T>::read_payload(io::stream_reader& reader)
+{
+ reader.read_num(value);
+ if (!reader.get_istr()) {
+ std::ostringstream str;
+ str << "Error reading tag_" << type;
+ throw io::input_error(str.str());
+ }
+}
+
+template <class T>
+void tag_primitive<T>::write_payload(io::stream_writer& writer) const
+{
+ writer.write_num(value);
+}
+```
+
+#### Equality
+
+```cpp
+template <class T>
+bool operator==(const tag_primitive<T>& lhs, const tag_primitive<T>& rhs)
+{
+ return lhs.get() == rhs.get();
+}
+```
+
+Note: `tag_float(2.5)` and `tag_double(2.5)` are **not** equal — they are different types.
+
+#### Explicit Instantiation
+
+In `include/tag_primitive.h`:
+```cpp
+extern template class NBT_EXPORT tag_primitive<int8_t>;
+extern template class NBT_EXPORT tag_primitive<int16_t>;
+extern template class NBT_EXPORT tag_primitive<int32_t>;
+extern template class NBT_EXPORT tag_primitive<int64_t>;
+extern template class NBT_EXPORT tag_primitive<float>;
+extern template class NBT_EXPORT tag_primitive<double>;
+```
+
+In `src/tag.cpp`:
+```cpp
+template class tag_primitive<int8_t>;
+template class tag_primitive<int16_t>;
+template class tag_primitive<int32_t>;
+template class tag_primitive<int64_t>;
+template class tag_primitive<float>;
+template class tag_primitive<double>;
+```
+
+This ensures template code is compiled once in `tag.cpp` rather than in every translation unit.
+
+---
+
+### tag_string
+
+Defined in `include/tag_string.h`:
+
+```cpp
+class NBT_EXPORT tag_string final : public detail::crtp_tag<tag_string>
+{
+public:
+ static constexpr tag_type type = tag_type::String;
+
+ tag_string() {}
+ tag_string(const std::string& str) : value(str) {}
+ tag_string(std::string&& str) noexcept : value(std::move(str)) {}
+ tag_string(const char* str) : value(str) {}
+
+ // Implicit conversion to/from std::string
+ operator std::string&();
+ operator const std::string&() const;
+ const std::string& get() const { return value; }
+
+ tag_string& operator=(const std::string& str);
+ tag_string& operator=(std::string&& str);
+ tag_string& operator=(const char* str);
+ void set(const std::string& str);
+ void set(std::string&& str);
+
+ void read_payload(io::stream_reader& reader) override;
+ void write_payload(io::stream_writer& writer) const override;
+
+private:
+ std::string value;
+};
+```
+
+#### NBT String Format
+
+NBT strings are encoded as:
+1. A 2-byte unsigned big-endian length prefix (max 65535)
+2. UTF-8 encoded characters
+
+The `write_payload` method throws `std::length_error` if the string exceeds 65535 bytes.
+
+#### Usage
+
+```cpp
+tag_string name("Steve");
+std::string& ref = name; // Implicit conversion
+ref = "Alex"; // Modifies in place
+name = "Notch"; // operator=(const char*)
+name.set("jeb_"); // Explicit setter
+```
+
+---
+
+### tag_byte_array / tag_int_array / tag_long_array
+
+These are all instantiations of `tag_array<T>`, defined in `include/tag_array.h`:
+
+```cpp
+template <class T>
+class tag_array final : public detail::crtp_tag<tag_array<T>>
+{
+public:
+ typedef typename std::vector<T>::iterator iterator;
+ typedef typename std::vector<T>::const_iterator const_iterator;
+ typedef T value_type;
+ static constexpr tag_type type = detail::get_array_type<T>::value;
+
+ tag_array() {}
+ tag_array(std::initializer_list<T> init) : data(init) {}
+ tag_array(std::vector<T>&& vec) noexcept : data(std::move(vec)) {}
+
+ std::vector<T>& get();
+ const std::vector<T>& get() const;
+
+ T& at(size_t i); // Bounds-checked
+ T& operator[](size_t i); // Unchecked
+
+ void push_back(T val);
+ void pop_back();
+ size_t size() const;
+ void clear();
+
+ iterator begin(); iterator end();
+ const_iterator begin() const; const_iterator end() const;
+ const_iterator cbegin() const; const_iterator cend() const;
+
+ void read_payload(io::stream_reader& reader) override;
+ void write_payload(io::stream_writer& writer) const override;
+
+private:
+ std::vector<T> data;
+};
+```
+
+#### Type Mapping
+
+| Typedef | Template | `type` constant |
+|-------------------|-----------------------|------------------------|
+| `tag_byte_array` | `tag_array<int8_t>` | `tag_type::Byte_Array` |
+| `tag_int_array` | `tag_array<int32_t>` | `tag_type::Int_Array` |
+| `tag_long_array` | `tag_array<int64_t>` | `tag_type::Long_Array` |
+
+#### Specialized Binary I/O
+
+The `read_payload` and `write_payload` methods have three implementations:
+
+**Byte arrays** (`tag_array<int8_t>`): Read/written as raw byte blocks:
+```cpp
+template <>
+void tag_array<int8_t>::read_payload(io::stream_reader& reader)
+{
+ int32_t length;
+ reader.read_num(length);
+ if (length < 0)
+ reader.get_istr().setstate(std::ios::failbit);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading length of tag_byte_array");
+
+ data.resize(length);
+ reader.get_istr().read(reinterpret_cast<char*>(data.data()), length);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading contents of tag_byte_array");
+}
+```
+
+**Long arrays** (`tag_array<int64_t>`): Read element-by-element with `read_num`:
+```cpp
+template <>
+void tag_array<int64_t>::read_payload(io::stream_reader& reader)
+{
+ int32_t length;
+ reader.read_num(length);
+ if (length < 0) reader.get_istr().setstate(std::ios::failbit);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading length of tag_long_array");
+
+ data.clear();
+ data.reserve(length);
+ for (int32_t i = 0; i < length; ++i) {
+ int64_t val;
+ reader.read_num(val);
+ data.push_back(val);
+ }
+ if (!reader.get_istr())
+ throw io::input_error("Error reading contents of tag_long_array");
+}
+```
+
+**Int arrays and generic** (`tag_array<T>`): Uses the generic template with `read_num`:
+```cpp
+template <typename T>
+void tag_array<T>::read_payload(io::stream_reader& reader)
+{
+ int32_t length;
+ reader.read_num(length);
+ // ... similar element-by-element reading ...
+}
+```
+
+#### NBT Array Format
+
+Arrays are encoded as:
+1. A 4-byte signed big-endian length (number of elements)
+2. The elements in sequence
+
+Negative lengths set the failbit and throw `io::input_error`. Arrays exceeding `INT32_MAX` elements throw `std::length_error` on write.
+
+#### Usage
+
+```cpp
+// Initialize with values
+tag_byte_array ba{0, 1, 2, 3, 4};
+tag_int_array ia{100, 200, 300};
+tag_long_array la{1000000000LL, 2000000000LL};
+
+// Access elements
+int8_t first = ba[0];
+int32_t safe = ia.at(1); // Bounds-checked
+
+// Modify
+ba.push_back(5);
+ia.pop_back();
+la.clear();
+
+// Iterate
+for (int32_t val : ia) {
+ std::cout << val << " ";
+}
+
+// Access underlying vector
+std::vector<int8_t>& raw = ba.get();
+raw.insert(raw.begin(), -1);
+```
+
+---
+
+### tag_list
+
+See [list-tags.md](list-tags.md) for full details. Briefly: `tag_list` stores a `std::vector<value>` with a tracked element type (`el_type_`). All elements must have the same type.
+
+### tag_compound
+
+See [compound-tags.md](compound-tags.md) for full details. Briefly: `tag_compound` stores a `std::map<std::string, value>` providing named tag access with ordered iteration.
+
+---
+
+## Clone, Equals, and Assign
+
+These operations are all provided by the CRTP layer (`detail::crtp_tag<Sub>`) and work uniformly across all tag types:
+
+### clone()
+
+```cpp
+std::unique_ptr<tag> clone() const&; // Copy-clones
+std::unique_ptr<tag> move_clone() &&; // Move-clones
+std::unique_ptr<tag> clone() &&; // Delegates to move_clone()
+```
+
+The CRTP implementation:
+```cpp
+std::unique_ptr<tag> clone() const& override final {
+ return make_unique<Sub>(sub_this()); // Copy constructor of Sub
+}
+std::unique_ptr<tag> move_clone() && override final {
+ return make_unique<Sub>(std::move(sub_this())); // Move constructor of Sub
+}
+```
+
+**Example:**
+```cpp
+tag_compound comp{{"key", 42}};
+
+// Deep copy
+auto copy = comp.clone();
+// copy is a tag_compound with {"key": tag_int(42)}
+
+// Move (original is in moved-from state)
+auto moved = std::move(comp).clone();
+```
+
+### equals()
+
+The private virtual `equals()` method (implemented by crtp_tag) delegates to the concrete class's `operator==`:
+
+```cpp
+bool equals(const tag& rhs) const override final {
+ return sub_this() == static_cast<const Sub&>(rhs);
+}
+```
+
+The public `operator==` first checks RTTI types:
+```cpp
+bool operator==(const tag& lhs, const tag& rhs)
+{
+ if (typeid(lhs) != typeid(rhs))
+ return false;
+ return lhs.equals(rhs);
+}
+```
+
+**Examples:**
+```cpp
+tag_int a(42), b(42), c(99);
+a == b; // true (same type, same value)
+a == c; // false (same type, different value)
+
+tag_float f(42.0f);
+a == f; // false (different types, even though numeric value matches)
+```
+
+### assign()
+
+The CRTP implementation:
+```cpp
+tag& assign(tag&& rhs) override final {
+ return sub_this() = dynamic_cast<Sub&&>(rhs);
+}
+```
+
+This move-assigns the content of `rhs` into `*this`. Throws `std::bad_cast` if `rhs` is not the same concrete type as `*this`.
+
+**Example:**
+```cpp
+tag_string s("hello");
+s.assign(tag_string("world")); // OK: s now contains "world"
+
+tag_int i(42);
+s.assign(std::move(i)); // throws std::bad_cast (int != string)
+```
+
+---
+
+## Primitive Type Traits
+
+The `detail::get_primitive_type<T>` meta-struct (`include/primitive_detail.h`) uses template specialization to map C++ types to `tag_type` values at compile time:
+
+```cpp
+namespace detail {
+ template <class T> struct get_primitive_type {
+ static_assert(sizeof(T) != sizeof(T),
+ "Invalid type paramter for tag_primitive");
+ };
+
+ template <> struct get_primitive_type<int8_t>
+ : public std::integral_constant<tag_type, tag_type::Byte> {};
+ template <> struct get_primitive_type<int16_t>
+ : public std::integral_constant<tag_type, tag_type::Short> {};
+ template <> struct get_primitive_type<int32_t>
+ : public std::integral_constant<tag_type, tag_type::Int> {};
+ template <> struct get_primitive_type<int64_t>
+ : public std::integral_constant<tag_type, tag_type::Long> {};
+ template <> struct get_primitive_type<float>
+ : public std::integral_constant<tag_type, tag_type::Float> {};
+ template <> struct get_primitive_type<double>
+ : public std::integral_constant<tag_type, tag_type::Double> {};
+}
+```
+
+The unspecialized template uses a `static_assert` that always fails (via `sizeof(T) != sizeof(T)`, which is always `false`). This ensures that attempting to create a `tag_primitive<SomeOtherType>` produces a clear compile error.
+
+Similarly, `detail::get_array_type<T>` maps array element types:
+
+```cpp
+template <> struct get_array_type<int8_t>
+ : public std::integral_constant<tag_type, tag_type::Byte_Array> {};
+template <> struct get_array_type<int32_t>
+ : public std::integral_constant<tag_type, tag_type::Int_Array> {};
+template <> struct get_array_type<int64_t>
+ : public std::integral_constant<tag_type, tag_type::Long_Array> {};
+```
+
+---
+
+## make_unique Helper
+
+Defined in `include/make_unique.h`, this provides a C++11 polyfill for `std::make_unique` (which was introduced in C++14):
+
+```cpp
+namespace nbt {
+ template <class T, class... Args>
+ std::unique_ptr<T> make_unique(Args&&... args)
+ {
+ return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+ }
+}
+```
+
+It is used throughout the library for creating tag instances:
+```cpp
+auto t = nbt::make_unique<tag_int>(42);
+auto s = nbt::make_unique<tag_string>("hello");
+```
+
+---
+
+## Tag Construction Summary
+
+| Tag Type | Default Constructor | Value Constructor | Initializer List |
+|----------------|---------------------|--------------------------------------------------|-------------------------------|
+| `tag_byte` | `tag_byte()`→0 | `tag_byte(int8_t(42))` | N/A |
+| `tag_short` | `tag_short()`→0 | `tag_short(int16_t(1000))` | N/A |
+| `tag_int` | `tag_int()`→0 | `tag_int(42)` | N/A |
+| `tag_long` | `tag_long()`→0 | `tag_long(int64_t(123456789))` | N/A |
+| `tag_float` | `tag_float()`→0 | `tag_float(3.14f)` | N/A |
+| `tag_double` | `tag_double()`→0 | `tag_double(2.71828)` | N/A |
+| `tag_string` | `tag_string()`→"" | `tag_string("text")`, `tag_string(std::string)` | N/A |
+| `tag_byte_array`| `tag_byte_array()` | `tag_byte_array(std::vector<int8_t>&&)` | `tag_byte_array{1,2,3}` |
+| `tag_int_array` | `tag_int_array()` | `tag_int_array(std::vector<int32_t>&&)` | `tag_int_array{1,2,3}` |
+| `tag_long_array`| `tag_long_array()` | `tag_long_array(std::vector<int64_t>&&)` | `tag_long_array{1,2,3}` |
+| `tag_list` | `tag_list()`→Null | `tag_list(tag_type)` (empty with type) | `tag_list{1,2,3}` (various) |
+| `tag_compound` | `tag_compound()` | N/A | `tag_compound{{"k",v},...}` |
+
+---
+
+## Error Handling in Tags
+
+| Operation | Exception | Condition |
+|-------------------|---------------------------|-------------------------------------|
+| `tag::create()` | `std::invalid_argument` | Invalid type (End, Null, or >12) |
+| `tag::as<T>()` | `std::bad_cast` | Tag is not of type T |
+| `tag::assign()` | `std::bad_cast` | Source tag has different type |
+| Primitive I/O | `io::input_error` | Stream read failure |
+| String write | `std::length_error` | String exceeds 65535 bytes |
+| Array read | `io::input_error` | Negative length or read failure |
+| Array write | `std::length_error` | Array exceeds INT32_MAX elements |
diff --git a/docs/handbook/libnbtplusplus/testing.md b/docs/handbook/libnbtplusplus/testing.md
new file mode 100644
index 0000000000..807f0fbaa8
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/testing.md
@@ -0,0 +1,291 @@
+# Testing
+
+## Overview
+
+libnbt++ uses the **CxxTest** testing framework. Tests are defined as C++ header files with test classes inheriting from `CxxTest::TestSuite`. The test suite covers all tag types, I/O operations, endian conversion, zlib streams, and value semantics.
+
+Build configuration is in `test/CMakeLists.txt`.
+
+---
+
+## Build Configuration
+
+```cmake
+# test/CMakeLists.txt
+if(NOT (UNIX AND (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i[3-6]86")))
+ message(WARNING "Tests are only supported on Linux x86/x86_64")
+ return()
+endif()
+
+find_package(CxxTest REQUIRED)
+```
+
+Tests are **Linux x86/x86_64 only** due to the use of `objcopy` for embedding binary test data.
+
+### CMake Options
+
+Tests are controlled by the `NBT_BUILD_TESTS` option:
+
+```cmake
+option(NBT_BUILD_TESTS "Build libnbt++ tests" ON)
+
+if(NBT_BUILD_TESTS)
+ enable_testing()
+ add_subdirectory(test)
+endif()
+```
+
+### Binary Test Data Embedding
+
+Test data files (e.g., `bigtest.nbt`, `bigtest_uncompr`, `littletest.nbt`) are converted to object files via `objcopy` and linked directly into test executables:
+
+```cmake
+set(BINARY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/")
+
+function(add_nbt_test name)
+ # ...
+ foreach(binfile ${ARGN})
+ set(obj "${CMAKE_CURRENT_BINARY_DIR}/${binfile}.o")
+ add_custom_command(
+ OUTPUT ${obj}
+ COMMAND objcopy -I binary -O elf64-x86-64
+ -B i386:x86-64 ${binfile} ${obj}
+ WORKING_DIRECTORY ${BINARY_DIR}
+ DEPENDS ${BINARY_DIR}/${binfile}
+ )
+ target_sources(${name} PRIVATE ${obj})
+ endforeach()
+endfunction()
+```
+
+The embedded data is accessed via extern symbols declared in `test/data.h`:
+
+```cpp
+// test/data.h
+#define DECLARE_BINARY(name) \
+ extern "C" const char _binary_##name##_start[]; \
+ extern "C" const char _binary_##name##_end[];
+
+DECLARE_BINARY(bigtest_nbt)
+DECLARE_BINARY(bigtest_uncompr)
+DECLARE_BINARY(littletest_nbt)
+DECLARE_BINARY(littletest_uncompr)
+DECLARE_BINARY(errortest_eof_nbt)
+DECLARE_BINARY(errortest_negative_length_nbt)
+DECLARE_BINARY(errortest_excessive_depth_nbt)
+```
+
+---
+
+## Test Targets
+
+| Target | Source | Tests |
+|--------|--------|-------|
+| `nbttest` | `test/nbttest.h` | Core tag types, value, compound, list, visitor |
+| `endian_str_test` | `test/endian_str_test.h` | Endian read/write roundtrips |
+| `read_test` | `test/read_test.h` | stream_reader, big/little endian, errors |
+| `write_test` | `test/write_test.h` | stream_writer, payload writing, roundtrips |
+| `zlibstream_test` | `test/zlibstream_test.h` | izlibstream, ozlibstream, compression roundtrip |
+| `format_test` | `test/format_test.cpp` | JSON text formatting |
+| `test_value` | `test/test_value.h` | Value numeric assignment and conversion |
+
+Test registration in CMake:
+
+```cmake
+add_nbt_test(nbttest nbttest.h)
+add_nbt_test(endian_str_test endian_str_test.h)
+add_nbt_test(read_test read_test.h
+ bigtest.nbt bigtest_uncompr littletest.nbt littletest_uncompr
+ errortest_eof.nbt errortest_negative_length.nbt
+ errortest_excessive_depth.nbt)
+add_nbt_test(write_test write_test.h
+ bigtest_uncompr littletest_uncompr)
+if(NBT_HAVE_ZLIB)
+ add_nbt_test(zlibstream_test zlibstream_test.h
+ bigtest.nbt bigtest_uncompr)
+endif()
+add_nbt_test(test_value test_value.h)
+```
+
+`zlibstream_test` is only built when `NBT_HAVE_ZLIB` is defined.
+
+---
+
+## Test Details
+
+### nbttest.h — Core Functionality
+
+Tests the fundamental tag and value operations:
+
+```cpp
+class NbtTest : public CxxTest::TestSuite
+{
+public:
+ void test_tag_primitive(); // Constructors, get/set, implicit conversion
+ void test_tag_string(); // String constructors, conversion operators
+ void test_tag_compound(); // Insertion, lookup, iteration, has_key()
+ void test_tag_list(); // Type enforcement, push_back, of<T>()
+ void test_tag_array(); // Vector access, constructors
+ void test_value(); // Type erasure, numeric assignment
+ void test_visitor(); // Double dispatch, visitor invocation
+ void test_equality(); // operator==, operator!= for all types
+ void test_clone(); // clone() and move_clone() correctness
+ void test_as(); // tag::as<T>() casting, bad_cast
+};
+```
+
+### read_test.h — Deserialization
+
+Tests `stream_reader` against known binary data:
+
+```cpp
+class ReadTest : public CxxTest::TestSuite
+{
+public:
+ void test_read_bigtest(); // Verifies full bigtest.nbt structure
+ void test_read_littletest(); // Little-endian variant
+ void test_read_bigtest_uncompr(); // Uncompressed big-endian
+ void test_read_littletest_uncompr(); // Uncompressed little-endian
+ void test_read_eof(); // Truncated data → input_error
+ void test_read_negative_length(); // Negative array length → input_error
+ void test_read_excessive_depth(); // >1024 nesting → input_error
+};
+```
+
+The "bigtest" validates a complex nested compound with all tag types — the standard NBT test file from the Minecraft community. Fields verified include:
+
+- `"byteTest"`: `tag_byte` with value 127
+- `"shortTest"`: `tag_short` with value 32767
+- `"intTest"`: `tag_int` with value 2147483647
+- `"longTest"`: `tag_long` with value 9223372036854775807
+- `"floatTest"`: `tag_float` with value ~0.498...
+- `"doubleTest"`: `tag_double` with value ~0.493...
+- `"stringTest"`: UTF-8 string "HELLO WORLD THIS IS A TEST STRING ÅÄÖ!"
+- `"byteArrayTest"`: 1000-element byte array
+- `"listTest (long)"`: List of 5 longs
+- `"listTest (compound)"`: List of compounds
+- Nested compound within compound
+
+### write_test.h — Serialization
+
+Tests `stream_writer` and write-then-read roundtrips:
+
+```cpp
+class WriteTest : public CxxTest::TestSuite
+{
+public:
+ void test_write_bigtest(); // Write and compare against reference
+ void test_write_littletest(); // Little-endian write
+ void test_write_payload(); // Individual payload writing
+ void test_roundtrip(); // Write → read → compare equality
+};
+```
+
+### endian_str_test.h — Byte Order
+
+Tests all numeric types through read/write roundtrips in both endiannesses:
+
+```cpp
+class EndianStrTest : public CxxTest::TestSuite
+{
+public:
+ void test_read_big();
+ void test_read_little();
+ void test_write_big();
+ void test_write_little();
+ void test_roundtrip_big();
+ void test_roundtrip_little();
+ void test_float_big();
+ void test_float_little();
+ void test_double_big();
+ void test_double_little();
+};
+```
+
+### zlibstream_test.h — Compression
+
+Tests zlib stream wrappers:
+
+```cpp
+class ZlibstreamTest : public CxxTest::TestSuite
+{
+public:
+ void test_inflate_gzip(); // Decompress gzip data
+ void test_inflate_zlib(); // Decompress zlib data
+ void test_inflate_corrupt(); // Corrupt data → zlib_error
+ void test_inflate_eof(); // Truncated data → EOF
+ void test_inflate_trailing(); // Data after compressed stream
+ void test_deflate_roundtrip(); // Compress → decompress → compare
+ void test_deflate_gzip(); // Gzip format output
+};
+```
+
+### test_value.h — Value Semantics
+
+Tests the `value` class's numeric assignment and conversion:
+
+```cpp
+class TestValue : public CxxTest::TestSuite
+{
+public:
+ void test_assign_byte();
+ void test_assign_short();
+ void test_assign_int();
+ void test_assign_long();
+ void test_assign_float();
+ void test_assign_double();
+ void test_assign_widening(); // int8 → int16 → int32 → int64
+ void test_assign_narrowing(); // Narrowing disallowed
+ void test_assign_string();
+};
+```
+
+### format_test.cpp — Text Output
+
+A standalone test (not CxxTest) that constructs a compound, serializes it, reads it back, and prints JSON:
+
+```cpp
+// Constructs a compound with nested types
+// Writes to stringstream, reads back
+// Outputs via operator<< (json_formatter)
+```
+
+---
+
+## Running Tests
+
+```bash
+mkdir build && cd build
+cmake .. -DNBT_BUILD_TESTS=ON
+cmake --build .
+ctest
+```
+
+Or run individual tests:
+
+```bash
+./test/nbttest
+./test/read_test
+./test/write_test
+./test/endian_str_test
+./test/zlibstream_test
+./test/test_value
+```
+
+---
+
+## Test Data Files
+
+Located in `test/`:
+
+| File | Description |
+|------|-------------|
+| `bigtest.nbt` | Standard NBT test file, gzip-compressed, big-endian |
+| `bigtest_uncompr` | Same as bigtest but uncompressed |
+| `littletest.nbt` | Little-endian NBT test file, compressed |
+| `littletest_uncompr` | Little-endian, uncompressed |
+| `errortest_eof.nbt` | Truncated NBT for error testing |
+| `errortest_negative_length.nbt` | NBT with negative array length |
+| `errortest_excessive_depth.nbt` | NBT with >1024 nesting levels |
+
+These files are embedded into test binaries via `objcopy` and accessed through the `DECLARE_BINARY` macros in `data.h`.
diff --git a/docs/handbook/libnbtplusplus/visitor-pattern.md b/docs/handbook/libnbtplusplus/visitor-pattern.md
new file mode 100644
index 0000000000..dbf8124959
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/visitor-pattern.md
@@ -0,0 +1,333 @@
+# Visitor Pattern
+
+## Overview
+
+libnbt++ implements the classic double-dispatch visitor pattern for traversing and processing tag hierarchies without modifying the tag classes themselves. Two visitor base classes are provided: `nbt_visitor` for mutable access and `const_nbt_visitor` for read-only traversal.
+
+Defined in `include/nbt_visitor.h`.
+
+---
+
+## Visitor Base Classes
+
+### nbt_visitor — Mutable Visitor
+
+```cpp
+class nbt_visitor
+{
+public:
+ virtual ~nbt_visitor() noexcept;
+
+ virtual void visit(tag_byte&) {}
+ virtual void visit(tag_short&) {}
+ virtual void visit(tag_int&) {}
+ virtual void visit(tag_long&) {}
+ virtual void visit(tag_float&) {}
+ virtual void visit(tag_double&) {}
+ virtual void visit(tag_byte_array&) {}
+ virtual void visit(tag_string&) {}
+ virtual void visit(tag_list&) {}
+ virtual void visit(tag_compound&) {}
+ virtual void visit(tag_int_array&) {}
+ virtual void visit(tag_long_array&) {}
+};
+```
+
+### const_nbt_visitor — Immutable Visitor
+
+```cpp
+class const_nbt_visitor
+{
+public:
+ virtual ~const_nbt_visitor() noexcept;
+
+ virtual void visit(const tag_byte&) {}
+ virtual void visit(const tag_short&) {}
+ virtual void visit(const tag_int&) {}
+ virtual void visit(const tag_long&) {}
+ virtual void visit(const tag_float&) {}
+ virtual void visit(const tag_double&) {}
+ virtual void visit(const tag_byte_array&) {}
+ virtual void visit(const tag_string&) {}
+ virtual void visit(const tag_list&) {}
+ virtual void visit(const tag_compound&) {}
+ virtual void visit(const tag_int_array&) {}
+ virtual void visit(const tag_long_array&) {}
+};
+```
+
+Both provide 12 `visit()` overloads — one per concrete tag type. All default to empty (no-op), so subclasses only override the types they care about.
+
+---
+
+## Double Dispatch via accept()
+
+The `tag` base class declares the `accept()` method:
+
+```cpp
+class tag
+{
+public:
+ virtual void accept(nbt_visitor& visitor) const = 0;
+ virtual void accept(const_nbt_visitor& visitor) const = 0;
+};
+```
+
+The CRTP intermediate `crtp_tag<Sub>` implements both `accept()` methods:
+
+```cpp
+template <class Sub>
+class crtp_tag : public tag
+{
+public:
+ void accept(nbt_visitor& visitor) const override
+ {
+ visitor.visit(const_cast<Sub&>(static_cast<const Sub&>(*this)));
+ }
+
+ void accept(const_nbt_visitor& visitor) const override
+ {
+ visitor.visit(static_cast<const Sub&>(*this));
+ }
+};
+```
+
+For `nbt_visitor` (mutable), `const_cast` removes the `const` from `accept()` so the visitor receives a mutable reference.
+
+For `const_nbt_visitor`, the const reference is passed through directly.
+
+---
+
+## How It Works
+
+1. Client code creates a visitor subclass, overriding `visit()` for the types it handles
+2. Client calls `tag.accept(visitor)` on any tag
+3. The CRTP-generated `accept()` calls `visitor.visit(static_cast<Sub&>(*this))`
+4. The correct `visit()` overload is called based on the **concrete** tag type
+
+```
+Client → tag.accept(visitor)
+ → crtp_tag<tag_int>::accept()
+ → visitor.visit(static_cast<tag_int&>(*this))
+ → YourVisitor::visit(tag_int&) // Your override
+```
+
+This resolves the combination of (runtime tag type) × (visitor implementation) without `dynamic_cast` or switch statements.
+
+---
+
+## Built-in Visitor: json_fmt_visitor
+
+The library includes one concrete visitor in `src/text/json_formatter.cpp`:
+
+```cpp
+class json_fmt_visitor : public const_nbt_visitor
+{
+public:
+ json_fmt_visitor(std::ostream& os, unsigned int indent);
+
+ void visit(const tag_byte& t) override;
+ void visit(const tag_short& t) override;
+ void visit(const tag_int& t) override;
+ void visit(const tag_long& t) override;
+ void visit(const tag_float& t) override;
+ void visit(const tag_double& t) override;
+ void visit(const tag_byte_array& t) override;
+ void visit(const tag_string& t) override;
+ void visit(const tag_list& t) override;
+ void visit(const tag_compound& t) override;
+ void visit(const tag_int_array& t) override;
+ void visit(const tag_long_array& t) override;
+
+private:
+ std::ostream& os;
+ unsigned int indent;
+ void write_indent();
+};
+```
+
+This visitor renders any tag as a JSON-like text format. Used by `tag::operator<<` for debug output:
+
+```cpp
+std::ostream& operator<<(std::ostream& os, const tag& t)
+{
+ static text::json_formatter formatter;
+ formatter.print(os, t);
+ return os;
+}
+```
+
+### Formatting Rules
+
+| Type | Output Format | Example |
+|------|--------------|---------|
+| `tag_byte` | `<value>b` | `42b` |
+| `tag_short` | `<value>s` | `100s` |
+| `tag_int` | `<value>` | `12345` |
+| `tag_long` | `<value>l` | `9876543210l` |
+| `tag_float` | `<value>f` | `3.14f` |
+| `tag_double` | `<value>d` | `2.718d` |
+| `tag_string` | `"<value>"` | `"hello"` |
+| `tag_byte_array` | `[B; ...]` | `[B; 1b, 2b, 3b]` |
+| `tag_int_array` | `[I; ...]` | `[I; 1, 2, 3]` |
+| `tag_long_array` | `[L; ...]` | `[L; 1l, 2l, 3l]` |
+| `tag_list` | `[...]` | `[1, 2, 3]` |
+| `tag_compound` | `{...}` | `{"key": 42}` |
+
+Special float/double handling:
+- `+Infinity`, `-Infinity`, `NaN` are written as-is (not JSON-compliant but accurate)
+- Uses the `std::defaultfloat` format
+
+---
+
+## Writing Custom Visitors
+
+### Example: Tag Counter
+
+Count the total number of tags and tags of each type:
+
+```cpp
+class tag_counter : public const_nbt_visitor
+{
+public:
+ int total = 0;
+ std::map<tag_type, int> counts;
+
+ void visit(const tag_byte&) override { ++total; ++counts[tag_type::Byte]; }
+ void visit(const tag_short&) override { ++total; ++counts[tag_type::Short]; }
+ void visit(const tag_int&) override { ++total; ++counts[tag_type::Int]; }
+ void visit(const tag_long&) override { ++total; ++counts[tag_type::Long]; }
+ void visit(const tag_float&) override { ++total; ++counts[tag_type::Float]; }
+ void visit(const tag_double&) override { ++total; ++counts[tag_type::Double]; }
+ void visit(const tag_string&) override { ++total; ++counts[tag_type::String]; }
+ void visit(const tag_byte_array&) override { ++total; ++counts[tag_type::Byte_Array]; }
+ void visit(const tag_int_array&) override { ++total; ++counts[tag_type::Int_Array]; }
+ void visit(const tag_long_array&) override { ++total; ++counts[tag_type::Long_Array]; }
+
+ void visit(const tag_list& t) override {
+ ++total;
+ ++counts[tag_type::List];
+ for (const auto& val : t)
+ val.get().accept(*this); // Recurse into children
+ }
+
+ void visit(const tag_compound& t) override {
+ ++total;
+ ++counts[tag_type::Compound];
+ for (const auto& [name, val] : t)
+ val.get().accept(*this); // Recurse into children
+ }
+};
+
+// Usage
+tag_counter counter;
+root.accept(counter);
+std::cout << "Total tags: " << counter.total << "\n";
+```
+
+### Example: Tag Modifier (Mutable)
+
+Double all integer values in a tree:
+
+```cpp
+class int_doubler : public nbt_visitor
+{
+public:
+ void visit(tag_int& t) override {
+ t.set(t.get() * 2);
+ }
+ void visit(tag_list& t) override {
+ for (auto& val : t)
+ val.get().accept(*this);
+ }
+ void visit(tag_compound& t) override {
+ for (auto& [name, val] : t)
+ val.get().accept(*this);
+ }
+};
+
+int_doubler doubler;
+root.accept(doubler);
+```
+
+### Example: Selective Visitor
+
+Only handle specific types — unhandled types use the default no-op:
+
+```cpp
+class string_collector : public const_nbt_visitor
+{
+public:
+ std::vector<std::string> strings;
+
+ void visit(const tag_string& t) override {
+ strings.push_back(t.get());
+ }
+ void visit(const tag_list& t) override {
+ for (const auto& val : t)
+ val.get().accept(*this);
+ }
+ void visit(const tag_compound& t) override {
+ for (const auto& [name, val] : t)
+ val.get().accept(*this);
+ }
+};
+```
+
+---
+
+## Recursive Traversal
+
+The visitor pattern does **not** automatically recurse into compounds and lists. To walk an entire tag tree, your visitor must explicitly recurse in its `visit(tag_compound&)` and `visit(tag_list&)` overloads:
+
+```cpp
+void visit(const tag_compound& t) override {
+ for (const auto& [name, val] : t)
+ val.get().accept(*this);
+}
+
+void visit(const tag_list& t) override {
+ for (const auto& val : t)
+ val.get().accept(*this);
+}
+```
+
+This is by design — it gives visitors control over traversal depth, ordering, and filtering.
+
+---
+
+## Visitor vs. Dynamic Cast
+
+Two approaches to type-specific processing:
+
+### Visitor Approach
+
+```cpp
+class my_visitor : public const_nbt_visitor {
+ void visit(const tag_int& t) override { /* handle int */ }
+ void visit(const tag_string& t) override { /* handle string */ }
+ // ...
+};
+my_visitor v;
+tag.accept(v);
+```
+
+### Dynamic Cast Approach
+
+```cpp
+if (auto* int_tag = dynamic_cast<const tag_int*>(&tag)) {
+ // handle int
+} else if (auto* str_tag = dynamic_cast<const tag_string*>(&tag)) {
+ // handle string
+}
+```
+
+The visitor pattern is preferable when:
+- Processing many or all tag types
+- Building reusable tree-walking logic
+- The compiler should warn about unhandled types (though default no-ops mask this)
+
+`dynamic_cast` / `tag::as<T>()` is simpler when:
+- You know the type at the call site
+- You only need to handle one or two types
+- You're accessing a specific child of a compound
diff --git a/docs/handbook/libnbtplusplus/zlib-integration.md b/docs/handbook/libnbtplusplus/zlib-integration.md
new file mode 100644
index 0000000000..592d8510da
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/zlib-integration.md
@@ -0,0 +1,514 @@
+# Zlib Integration
+
+## Overview
+
+libnbt++ provides optional zlib support for reading and writing gzip/zlib-compressed NBT data, which is the standard format for Minecraft world files (`level.dat`, region files, etc.).
+
+The zlib integration is in the `zlib` namespace and operates through standard C++ stream wrappers. It is conditionally compiled via the `NBT_USE_ZLIB` CMake option (default: `ON`).
+
+Defined in:
+- `include/io/zlib_streambuf.h` — Base streambuf with z_stream management
+- `include/io/izlibstream.h` / `src/io/izlibstream.cpp` — Decompression stream
+- `include/io/ozlibstream.h` / `src/io/ozlibstream.cpp` — Compression stream
+
+---
+
+## Build Configuration
+
+```cmake
+option(NBT_USE_ZLIB "Build with zlib stream support" ON)
+```
+
+When enabled, CMake finds zlib and links against it:
+```cmake
+if(NBT_USE_ZLIB)
+ find_package(ZLIB REQUIRED)
+ target_link_libraries(${NBT_NAME} PRIVATE ZLIB::ZLIB)
+ target_compile_definitions(${NBT_NAME} PUBLIC NBT_HAVE_ZLIB)
+endif()
+```
+
+The `NBT_HAVE_ZLIB` preprocessor macro is defined publicly, allowing downstream code to conditionally use zlib features:
+
+```cpp
+#ifdef NBT_HAVE_ZLIB
+#include <io/izlibstream.h>
+#include <io/ozlibstream.h>
+#endif
+```
+
+---
+
+## zlib_streambuf — Base Class
+
+```cpp
+class zlib_streambuf : public std::streambuf
+{
+protected:
+ z_stream zstr;
+ std::vector<char> in;
+ std::vector<char> out;
+
+ static const size_t bufsize = 32768; // 32 KB
+};
+```
+
+This abstract base provides the shared z_stream state and I/O buffers used by both inflate and deflate streambufs.
+
+- `zstr`: The zlib `z_stream` struct controlling compression/decompression state
+- `in`: Input buffer (32 KB)
+- `out`: Output buffer (32 KB)
+- `bufsize`: Buffer size constant (32768 bytes)
+
+---
+
+## zlib_error — Exception Class
+
+```cpp
+class zlib_error : public std::runtime_error
+{
+public:
+ zlib_error(const z_stream& zstr, int status);
+ int status() const { return status_; }
+
+private:
+ int status_;
+};
+```
+
+Thrown on zlib API failures. Wraps the error message from `zstr.msg` (if available) along with the numeric error code.
+
+---
+
+## Decompression: izlibstream
+
+### inflate_streambuf
+
+```cpp
+class inflate_streambuf : public zlib_streambuf
+{
+public:
+ explicit inflate_streambuf(std::istream& input,
+ int window_bits = 32 + MAX_WBITS);
+ ~inflate_streambuf();
+
+ int_type underflow() override;
+
+private:
+ std::istream& is;
+ bool stream_end = false;
+};
+```
+
+**Constructor:**
+```cpp
+inflate_streambuf::inflate_streambuf(std::istream& input, int window_bits)
+ : is(input)
+{
+ in.resize(bufsize);
+ out.resize(bufsize);
+
+ zstr.zalloc = Z_NULL;
+ zstr.zfree = Z_NULL;
+ zstr.opaque = Z_NULL;
+ zstr.avail_in = 0;
+ zstr.next_in = Z_NULL;
+
+ int status = inflateInit2(&zstr, window_bits);
+ if (status != Z_OK)
+ throw zlib_error(zstr, status);
+}
+```
+
+The default `window_bits = 32 + MAX_WBITS` (typically `32 + 15 = 47`) enables **automatic format detection** — zlib will detect whether the data is raw deflate, zlib, or gzip format. This is critical because Minecraft NBT files may use either gzip or zlib compression.
+
+**underflow() — Buffered decompression:**
+
+```cpp
+inflate_streambuf::int_type inflate_streambuf::underflow()
+{
+ if (stream_end)
+ return traits_type::eof();
+
+ zstr.next_out = reinterpret_cast<Bytef*>(out.data());
+ zstr.avail_out = out.size();
+
+ while (zstr.avail_out == out.size()) {
+ if (zstr.avail_in == 0) {
+ is.read(in.data(), in.size());
+ auto bytes_read = is.gcount();
+ if (bytes_read == 0) {
+ setg(nullptr, nullptr, nullptr);
+ return traits_type::eof();
+ }
+ zstr.next_in = reinterpret_cast<Bytef*>(in.data());
+ zstr.avail_in = bytes_read;
+ }
+
+ int status = inflate(&zstr, Z_NO_FLUSH);
+ if (status == Z_STREAM_END) {
+ // Seek back unused input so the underlying stream
+ // position is correct for any subsequent reads
+ if (zstr.avail_in > 0)
+ is.seekg(-static_cast<int>(zstr.avail_in),
+ std::ios::cur);
+ stream_end = true;
+ break;
+ }
+ if (status != Z_OK)
+ throw zlib_error(zstr, status);
+ }
+
+ auto decompressed = out.size() - zstr.avail_out;
+ if (decompressed == 0)
+ return traits_type::eof();
+
+ setg(out.data(), out.data(), out.data() + decompressed);
+ return traits_type::to_int_type(out[0]);
+}
+```
+
+Key behaviors:
+- Reads compressed data in 32 KB chunks from the underlying stream
+- Decompresses into the output buffer
+- On `Z_STREAM_END`, seeks the underlying stream back by the number of unconsumed bytes, so subsequent reads on the same stream work correctly (important for concatenated data)
+- Throws `zlib_error` on decompression errors
+
+### izlibstream
+
+```cpp
+class izlibstream : public std::istream
+{
+public:
+ explicit izlibstream(std::istream& input,
+ int window_bits = 32 + MAX_WBITS);
+
+private:
+ inflate_streambuf buf;
+};
+```
+
+A simple `std::istream` wrapper around `inflate_streambuf`:
+
+```cpp
+izlibstream::izlibstream(std::istream& input, int window_bits)
+ : std::istream(&buf), buf(input, window_bits)
+{}
+```
+
+Usage:
+```cpp
+std::ifstream file("level.dat", std::ios::binary);
+zlib::izlibstream zs(file);
+auto result = nbt::io::read_compound(zs);
+```
+
+---
+
+## Compression: ozlibstream
+
+### deflate_streambuf
+
+```cpp
+class deflate_streambuf : public zlib_streambuf
+{
+public:
+ explicit deflate_streambuf(std::ostream& output,
+ int level = Z_DEFAULT_COMPRESSION,
+ int window_bits = MAX_WBITS);
+ ~deflate_streambuf();
+
+ int_type overflow(int_type ch) override;
+ int sync() override;
+ void close();
+
+private:
+ std::ostream& os;
+ bool closed_ = false;
+
+ void deflate_chunk(int flush);
+};
+```
+
+**Constructor:**
+```cpp
+deflate_streambuf::deflate_streambuf(std::ostream& output,
+ int level, int window_bits)
+ : os(output)
+{
+ in.resize(bufsize);
+ out.resize(bufsize);
+
+ zstr.zalloc = Z_NULL;
+ zstr.zfree = Z_NULL;
+ zstr.opaque = Z_NULL;
+
+ int status = deflateInit2(&zstr, level, Z_DEFLATED,
+ window_bits, 8, Z_DEFAULT_STRATEGY);
+ if (status != Z_OK)
+ throw zlib_error(zstr, status);
+
+ setp(in.data(), in.data() + in.size());
+}
+```
+
+Parameters:
+- `level`: Compression level (0–9, or `Z_DEFAULT_COMPRESSION` = -1)
+- `window_bits`: Format control
+ - `MAX_WBITS` (15): raw zlib format
+ - `MAX_WBITS + 16` (31): gzip format
+
+**overflow() — Buffer full, deflate and flush:**
+```cpp
+deflate_streambuf::int_type deflate_streambuf::overflow(int_type ch)
+{
+ deflate_chunk(Z_NO_FLUSH);
+ if (ch != traits_type::eof()) {
+ *pptr() = traits_type::to_char_type(ch);
+ pbump(1);
+ }
+ return ch;
+}
+```
+
+**sync() — Flush current buffer:**
+```cpp
+int deflate_streambuf::sync()
+{
+ deflate_chunk(Z_SYNC_FLUSH);
+ return 0;
+}
+```
+
+**deflate_chunk() — Core compression loop:**
+```cpp
+void deflate_streambuf::deflate_chunk(int flush)
+{
+ zstr.next_in = reinterpret_cast<Bytef*>(pbase());
+ zstr.avail_in = pptr() - pbase();
+
+ do {
+ zstr.next_out = reinterpret_cast<Bytef*>(out.data());
+ zstr.avail_out = out.size();
+
+ int status = deflate(&zstr, flush);
+ if (status != Z_OK && status != Z_STREAM_END
+ && status != Z_BUF_ERROR)
+ throw zlib_error(zstr, status);
+
+ auto compressed = out.size() - zstr.avail_out;
+ if (compressed > 0)
+ os.write(out.data(), compressed);
+ } while (zstr.avail_out == 0);
+
+ setp(in.data(), in.data() + in.size());
+}
+```
+
+**close() — Finalize compression:**
+```cpp
+void deflate_streambuf::close()
+{
+ if (closed_) return;
+ closed_ = true;
+ deflate_chunk(Z_FINISH);
+}
+```
+
+Must be called to write the final compressed block with `Z_FINISH`.
+
+### ozlibstream
+
+```cpp
+class ozlibstream : public std::ostream
+{
+public:
+ explicit ozlibstream(std::ostream& output,
+ int level = Z_DEFAULT_COMPRESSION,
+ bool gzip = true);
+
+ void close();
+
+private:
+ deflate_streambuf buf;
+};
+```
+
+**Constructor:**
+```cpp
+ozlibstream::ozlibstream(std::ostream& output, int level, bool gzip)
+ : std::ostream(&buf),
+ buf(output, level, MAX_WBITS + (gzip ? 16 : 0))
+{}
+```
+
+The `gzip` parameter (default: `true`) controls the output format:
+- `true`: gzip format (`window_bits = MAX_WBITS + 16 = 31`)
+- `false`: raw zlib format (`window_bits = MAX_WBITS = 15`)
+
+**close() — Exception-safe stream finalization:**
+```cpp
+void ozlibstream::close()
+{
+ try {
+ buf.close();
+ } catch (...) {
+ setstate(std::ios::badbit);
+ if (exceptions() & std::ios::badbit)
+ throw;
+ }
+}
+```
+
+`close()` catches exceptions from `buf.close()` and converts them to a badbit state. If the stream has badbit exceptions enabled, it re-throws.
+
+---
+
+## Format Detection
+
+### Automatic Detection (Reading)
+
+The default inflate `window_bits = 32 + MAX_WBITS` enables automatic format detection:
+
+| Bits | Format |
+|------|--------|
+| `MAX_WBITS` (15) | Raw deflate |
+| `MAX_WBITS + 16` (31) | Gzip only |
+| `MAX_WBITS + 32` (47) | Auto-detect gzip or zlib |
+
+The library defaults to auto-detect (47), so it handles both formats transparently.
+
+### Explicit Format (Writing)
+
+When writing, you must choose:
+
+```cpp
+// Gzip format (default for Minecraft)
+zlib::ozlibstream gzip_out(file); // gzip=true
+zlib::ozlibstream gzip_out(file, Z_DEFAULT_COMPRESSION, true); // explicit
+
+// Zlib format
+zlib::ozlibstream zlib_out(file, Z_DEFAULT_COMPRESSION, false);
+```
+
+---
+
+## Usage Examples
+
+### Reading a Gzip-Compressed NBT File
+
+```cpp
+std::ifstream file("level.dat", std::ios::binary);
+zlib::izlibstream zs(file);
+auto [name, root] = nbt::io::read_compound(zs);
+```
+
+### Writing a Gzip-Compressed NBT File
+
+```cpp
+std::ofstream file("level.dat", std::ios::binary);
+zlib::ozlibstream zs(file); // gzip by default
+nbt::io::write_tag("", root, zs);
+zs.close(); // MUST call close() to finalize
+```
+
+### Roundtrip Compression
+
+```cpp
+// Compress
+std::stringstream ss;
+{
+ zlib::ozlibstream zs(ss);
+ nbt::io::write_tag("test", compound, zs);
+ zs.close();
+}
+
+// Decompress
+ss.seekg(0);
+{
+ zlib::izlibstream zs(ss);
+ auto [name, tag] = nbt::io::read_compound(zs);
+ // tag now contains the original compound
+}
+```
+
+### Controlling Compression Level
+
+```cpp
+// No compression (fastest)
+zlib::ozlibstream fast(file, Z_NO_COMPRESSION);
+
+// Best compression (slowest)
+zlib::ozlibstream best(file, Z_BEST_COMPRESSION);
+
+// Default compression (balanced)
+zlib::ozlibstream default_level(file, Z_DEFAULT_COMPRESSION);
+
+// Specific level (0-9)
+zlib::ozlibstream level6(file, 6);
+```
+
+---
+
+## Error Handling
+
+### zlib_error
+
+All zlib API failures throw `zlib::zlib_error`:
+
+```cpp
+try {
+ zlib::izlibstream zs(file);
+ auto result = nbt::io::read_compound(zs);
+} catch (const zlib::zlib_error& e) {
+ std::cerr << "Zlib error: " << e.what()
+ << " (status: " << e.status() << ")\n";
+}
+```
+
+Common error codes:
+| Status | Meaning |
+|--------|---------|
+| `Z_DATA_ERROR` | Corrupted compressed data |
+| `Z_MEM_ERROR` | Insufficient memory |
+| `Z_BUF_ERROR` | Buffer/progress error |
+| `Z_STREAM_ERROR` | Invalid parameters |
+
+### Stream State
+
+After decompression errors, the `izlibstream` may be in a bad state. After `ozlibstream::close()` catches an exception from the deflate buffer, it sets `std::ios::badbit` on the stream.
+
+---
+
+## Resource Management
+
+### Destructors
+
+Both `inflate_streambuf` and `deflate_streambuf` call the corresponding zlib cleanup in their destructors:
+
+```cpp
+inflate_streambuf::~inflate_streambuf()
+{
+ inflateEnd(&zstr);
+}
+
+deflate_streambuf::~deflate_streambuf()
+{
+ deflateEnd(&zstr);
+}
+```
+
+These release all memory allocated by zlib. The destructors are noexcept-safe — `inflateEnd`/`deflateEnd` do not throw.
+
+### Important: Call close() Before Destruction
+
+For `ozlibstream`, you **must** call `close()` before the stream is destroyed to ensure the final compressed block is flushed. The destructor calls `deflateEnd()` but does **not** call `close()` — failing to close will produce a truncated compressed file.
+
+```cpp
+{
+ std::ofstream file("output.dat", std::ios::binary);
+ zlib::ozlibstream zs(file);
+ nbt::io::write_tag("", root, zs);
+ zs.close(); // Required!
+} // file and zs destroyed safely
+```
diff --git a/docs/handbook/meshmc/account-management.md b/docs/handbook/meshmc/account-management.md
new file mode 100644
index 0000000000..c38eef014d
--- /dev/null
+++ b/docs/handbook/meshmc/account-management.md
@@ -0,0 +1,470 @@
+# Account Management
+
+## Overview
+
+MeshMC supports Microsoft Account (MSA) authentication for Minecraft login. The authentication system implements the full MSA → Xbox Live → Minecraft authentication chain using Qt6's `QOAuth2AuthorizationCodeFlow` and the Katabasis OAuth2 library. Multiple accounts can be stored, managed, and switched between.
+
+## Authentication Architecture
+
+### Key Classes
+
+| Class | File | Purpose |
+|---|---|---|
+| `MinecraftAccount` | `minecraft/auth/MinecraftAccount.{h,cpp}` | Account model |
+| `AccountList` | `minecraft/auth/AccountList.{h,cpp}` | Account collection model |
+| `AccountData` | `minecraft/auth/AccountData.{h,cpp}` | Token/profile data storage |
+| `AccountTask` | `minecraft/auth/AccountTask.{h,cpp}` | Async auth task base |
+| `AuthFlow` | `minecraft/auth/flows/AuthFlow.{h,cpp}` | Auth step pipeline |
+| `MSAInteractive` | `minecraft/auth/flows/MSA.{h,cpp}` | Interactive MSA login |
+| `MSASilent` | `minecraft/auth/flows/MSA.{h,cpp}` | Silent token refresh |
+| `AuthStep` | `minecraft/auth/AuthStep.{h,cpp}` | Abstract auth pipeline step |
+| `AuthSession` | `minecraft/auth/AuthSession.{h,cpp}` | Resolved session for launch |
+| `AuthRequest` | `minecraft/auth/AuthRequest.{h,cpp}` | HTTP request helper |
+| `Parsers` | `minecraft/auth/Parsers.{h,cpp}` | Response JSON parsers |
+| `MSALoginDialog` | `ui/dialogs/MSALoginDialog.{h,cpp}` | Login UI dialog |
+
+### Authentication Steps
+
+| Step | File | Purpose |
+|---|---|---|
+| `MSAStep` | `steps/MSAStep.{h,cpp}` | OAuth2 browser-based login |
+| `XboxUserStep` | `steps/XboxUserStep.{h,cpp}` | Exchange MSA token → Xbox User Token |
+| `XboxAuthorizationStep` | `steps/XboxAuthorizationStep.{h,cpp}` | Exchange → XSTS Token |
+| `XboxProfileStep` | `steps/XboxProfileStep.{h,cpp}` | Fetch Xbox profile |
+| `MinecraftProfileStep` | `steps/MinecraftProfileStep.{h,cpp}` | Fetch Minecraft profile |
+| `EntitlementsStep` | `steps/EntitlementsStep.{h,cpp}` | Verify game ownership |
+| `GetSkinStep` | `steps/GetSkinStep.{h,cpp}` | Download player skin |
+| `MeshMCLoginStep` | `steps/MeshMCLoginStep.{h,cpp}` | MeshMC-specific login |
+
+## MSA Authentication Flow
+
+### Interactive Login (First-Time)
+
+The `MSAInteractive` flow is used when the user needs to sign in for the first time:
+
+```
+MSAStep(Login)
+ │
+ ├── QOAuth2AuthorizationCodeFlow configured with:
+ │ ├── authorizationUrl: https://login.microsoftonline.com/consumers/oauth2/v2.0/authorize
+ │ ├── accessTokenUrl: https://login.microsoftonline.com/consumers/oauth2/v2.0/token
+ │ ├── clientId: MeshMC_MICROSOFT_CLIENT_ID (from BuildConfig)
+ │ └── scope: XboxLive.signin XboxLive.offline_access
+ │
+ ├── Opens browser for user authentication
+ ├── QOAuthHttpServerReplyHandler listens on localhost
+ ├── Receives authorization code callback
+ └── Exchanges code for MSA tokens
+ │
+ ▼
+XboxUserStep
+ │
+ ├── POST https://user.auth.xboxlive.com/user/authenticate
+ ├── Body: { "Properties": { "AuthMethod": "RPS", "SiteName": "user.auth.xboxlive.com",
+ │ "RpsTicket": "d=<MSA_ACCESS_TOKEN>" }, "RelyingParty": "http://auth.xboxlive.com",
+ │ "TokenType": "JWT" }
+ └── Returns: Xbox User Token + user hash
+ │
+ ▼
+XboxAuthorizationStep
+ │
+ ├── POST https://xsts.auth.xboxlive.com/xsts/authorize
+ ├── Body: { "Properties": { "SandboxId": "RETAIL",
+ │ "UserTokens": ["<XBOX_USER_TOKEN>"] },
+ │ "RelyingParty": "rp://api.minecraftservices.com/",
+ │ "TokenType": "JWT" }
+ └── Returns: XSTS Token + user hash
+ │
+ ▼
+MinecraftProfileStep
+ │
+ ├── POST https://api.minecraftservices.com/authentication/login_with_xbox
+ ├── Body: { "identityToken": "XBL3.0 x=<USERHASH>;<XSTS_TOKEN>" }
+ ├── Returns: Minecraft access token
+ │
+ ├── GET https://api.minecraftservices.com/minecraft/profile
+ └── Returns: player UUID, name, skin, capes
+ │
+ ▼
+EntitlementsStep
+ │
+ ├── GET https://api.minecraftservices.com/entitlements/mcstore
+ └── Verifies: ownsMinecraft, canPlayMinecraft
+ │
+ ▼
+GetSkinStep
+ │
+ ├── Downloads skin texture from profile URL
+ └── Stores skin data in AccountData
+```
+
+### Silent Refresh
+
+The `MSASilent` flow is used for automatic token refresh after the initial login:
+
+```
+MSAStep(Refresh)
+ │
+ ├── Uses stored refresh token
+ ├── QOAuth2AuthorizationCodeFlow::refreshAccessToken()
+ └── Exchanges refresh token for new MSA tokens
+ │
+ ▼
+(Same steps as Interactive: XboxUser → Xbox Authorization → MinecraftProfile → etc.)
+```
+
+## MSAStep Implementation
+
+`MSAStep` uses Qt6's `QOAuth2AuthorizationCodeFlow`:
+
+```cpp
+class MSAStep : public AuthStep
+{
+ Q_OBJECT
+public:
+ enum Action { Refresh, Login };
+
+ explicit MSAStep(AccountData* data, Action action);
+ virtual ~MSAStep() noexcept;
+
+ void perform() override;
+ void rehydrate() override;
+ QString describe() override;
+
+private slots:
+ void onGranted();
+ void onRequestFailed(QAbstractOAuth::Error error);
+ void onOpenBrowser(const QUrl& url);
+};
+```
+
+For `Login` action:
+- Creates `QOAuth2AuthorizationCodeFlow`
+- Starts a `QOAuthHttpServerReplyHandler` on a random local port
+- Opens the user's browser to Microsoft's login page
+- Receives the callback and exchanges the code for tokens
+
+For `Refresh` action:
+- Uses the stored refresh token
+- Calls `refreshAccessToken()` on the flow
+- No browser interaction needed
+
+## AuthFlow Pipeline
+
+`AuthFlow` orchestrates the step sequence:
+
+```cpp
+class AuthFlow : public AccountTask
+{
+ Q_OBJECT
+public:
+ explicit AuthFlow(AccountData* data, QObject* parent = 0);
+
+ void executeTask() override;
+
+private slots:
+ void stepFinished(AccountTaskState resultingState, QString message);
+
+protected:
+ void succeed();
+ void nextStep();
+
+ QList<AuthStep::Ptr> m_steps;
+ AuthStep::Ptr m_currentStep;
+};
+```
+
+Steps are executed sequentially. Each step calls `stepFinished()` when complete, which either advances to the next step or handles errors.
+
+## AccountData
+
+`AccountData` stores all authentication state for a single account:
+
+```cpp
+struct AccountData {
+ // Serialization
+ QJsonObject saveState() const;
+ bool resumeStateFromV3(QJsonObject data);
+
+ // Display
+ QString accountDisplayString() const; // Gamertag
+ QString accessToken() const; // Minecraft access token
+ QString profileId() const; // Minecraft UUID
+ QString profileName() const; // Minecraft username
+ QString lastError() const;
+
+ // Account type
+ AccountType type = AccountType::MSA;
+
+ // Token storage
+ Katabasis::Token msaToken; // Microsoft Account token
+ Katabasis::Token userToken; // Xbox User Token
+ Katabasis::Token xboxApiToken; // XSTS Token
+ Katabasis::Token mojangservicesToken; // Mojang services token
+ Katabasis::Token yggdrasilToken; // Minecraft access token
+
+ // Profile data
+ MinecraftProfile minecraftProfile; // UUID, name, skin, capes
+ MinecraftEntitlement minecraftEntitlement; // Game ownership
+
+ // Internal
+ Katabasis::Validity validity_;
+ QString internalId; // Runtime-only unique ID
+};
+```
+
+### Token Stack
+
+Each token is stored as a `Katabasis::Token`:
+- `token` — the actual token string
+- `refresh_token` — token for refresh operations
+- `validity` — `Katabasis::Validity::None`, `Assumed`, or `Certain`
+- `extra` — additional token metadata
+
+### MinecraftProfile
+
+```cpp
+struct MinecraftProfile {
+ QString id; // UUID
+ QString name; // Player name
+ Skin skin; // Active skin
+ QString currentCape; // Active cape ID
+ QMap<QString, Cape> capes; // Available capes
+ Katabasis::Validity validity;
+};
+
+struct Skin {
+ QString id;
+ QString url;
+ QString variant; // "classic" or "slim"
+ QByteArray data; // Texture data
+};
+```
+
+## MinecraftAccount
+
+`MinecraftAccount` is the QObject wrapper around `AccountData`:
+
+```cpp
+class MinecraftAccount : public QObject, public Usable
+{
+ Q_OBJECT
+public:
+ static MinecraftAccountPtr createBlankMSA();
+ static MinecraftAccountPtr loadFromJsonV3(const QJsonObject& json);
+ QJsonObject saveToJson() const;
+
+ // Auth operations
+ shared_qobject_ptr<AccountTask> loginMSA();
+ shared_qobject_ptr<AccountTask> refresh();
+ shared_qobject_ptr<AccountTask> currentTask();
+
+ // Queries
+ QString internalId() const;
+ QString accountDisplayString() const;
+ QString accessToken() const;
+ QString profileId() const;
+ QString profileName() const;
+ bool isActive() const;
+
+signals:
+ void changed();
+ void activityChanged(bool active);
+
+public:
+ AccountData data;
+};
+```
+
+### Account States
+
+```cpp
+enum class AccountState {
+ Unchecked, // Not yet validated
+ Offline, // Tokens exist but not verified
+ Working, // Auth operation in progress
+ Online, // Valid tokens, verified
+ Errored, // Auth operation failed
+ Expired, // Tokens expired
+ Gone // Account removed from Microsoft
+};
+```
+
+## AccountList
+
+`AccountList` manages the collection of Microsoft accounts:
+
+```cpp
+class AccountList : public QAbstractListModel
+{
+ Q_OBJECT
+public:
+ enum VListColumns {
+ NameColumn = 0,
+ ProfileNameColumn,
+ TypeColumn,
+ StatusColumn,
+ NUM_COLUMNS
+ };
+
+ const MinecraftAccountPtr at(int i) const;
+ int count() const;
+
+ void addAccount(const MinecraftAccountPtr account);
+ void removeAccount(QModelIndex index);
+ int findAccountByProfileId(const QString& profileId) const;
+ MinecraftAccountPtr getAccountByProfileName(const QString& profileName) const;
+ QStringList profileNames() const;
+
+ void requestRefresh(QString accountId);
+ void queueRefresh(QString accountId);
+
+ void setListFilePath(QString path, bool autosave = false);
+ bool loadList();
+ bool saveList();
+
+ MinecraftAccountPtr defaultAccount() const;
+ void setDefaultAccount(MinecraftAccountPtr profileId);
+ bool anyAccountIsValid();
+};
+```
+
+### Persistence (`accounts.json`)
+
+Accounts are serialized to `accounts.json` in the data directory:
+
+```json
+{
+ "formatVersion": 3,
+ "accounts": [
+ {
+ "type": "MSA",
+ "msa": { ... token data ... },
+ "utoken": { ... xbox user token ... },
+ "xrp-main": { ... xsts token ... },
+ "ygg": { ... minecraft access token ... },
+ "profile": {
+ "id": "player-uuid",
+ "name": "PlayerName",
+ "skin": { ... }
+ },
+ "entitlement": {
+ "ownsMinecraft": true,
+ "canPlayMinecraft": true
+ }
+ }
+ ],
+ "activeAccount": "player-uuid"
+}
+```
+
+### Token Refresh Queue
+
+`AccountList` maintains a refresh queue:
+- `requestRefresh()` pushes an account to the front (high priority)
+- `queueRefresh()` adds to the back (low priority)
+- Refresh operations run sequentially to avoid rate limiting
+
+## AuthSession
+
+`AuthSession` is the resolved auth session passed to the launch system:
+
+```cpp
+struct AuthSession {
+ QString player_name; // Minecraft username
+ QString uuid; // Player UUID
+ QString access_token; // Minecraft access token
+ QString session; // Legacy session token (deprecated)
+ QString user_type; // "msa"
+
+ // Used for censor filter
+ QMap<QString, QString> sensitiveStrings();
+};
+```
+
+## MSALoginDialog
+
+The login dialog (`ui/dialogs/MSALoginDialog.h`) provides the user interface for Microsoft authentication:
+
+```cpp
+class MSALoginDialog : public QDialog {
+ Q_OBJECT
+};
+```
+
+The dialog:
+1. Starts the `MSAInteractive` auth flow
+2. Displays a message asking the user to sign in via their browser
+3. Shows the authentication URL and a "Copy to Clipboard" button
+4. Shows progress as auth steps complete
+5. Closes on success or shows error on failure
+
+## Account Management UI
+
+### AccountListPage (`ui/pages/global/AccountListPage.h`)
+
+The global settings page for account management:
+- Lists all stored Microsoft accounts
+- Shows account status (Online, Offline, Expired, etc.)
+- "Add Microsoft Account" button → opens `MSALoginDialog`
+- "Remove" button → deletes selected account
+- "Set Default" button → sets the default account for launches
+- "Refresh" button → triggers token refresh for selected account
+- Displays skin preview for the selected account
+
+### ProfileSelectDialog (`ui/dialogs/ProfileSelectDialog.h`)
+
+Prompt dialog shown when launching without a default account:
+- Lists available accounts
+- User selects which account to use
+- Returns the selected `MinecraftAccountPtr`
+
+### ProfileSetupDialog (`ui/dialogs/ProfileSetupDialog.h`)
+
+Dialog for initial profile setup when a Microsoft account doesn't yet have a Minecraft profile:
+- Lets the user set their Minecraft username
+- Submits the profile creation request to Mojang's API
+
+## Skin Management
+
+### SkinUploadDialog (`ui/dialogs/SkinUploadDialog.h`)
+
+Upload dialog for changing the player's skin:
+- Select a skin file (PNG)
+- Choose variant (classic or slim)
+- Upload to Mojang's API using the access token
+
+### SkinUtils (`launcher/SkinUtils.h`)
+
+Utility functions for skin image processing:
+- Download skin textures
+- Parse skin images for display
+
+## Security Considerations
+
+### Token Storage
+
+Tokens are stored in `accounts.json` in the user's data directory:
+- File is readable only by the user (standard OS file permissions)
+- Tokens include MSA refresh tokens, Xbox tokens, and Minecraft access tokens
+- **No encryption at rest** — the file relies on OS-level file permissions
+
+### Censor Filter
+
+The launch system automatically censors sensitive tokens from game log output:
+
+```cpp
+QMap<QString, QString> MinecraftInstance::createCensorFilterFromSession(AuthSessionPtr session);
+```
+
+This replaces token strings in log output with placeholder text to prevent accidental exposure.
+
+### Client ID
+
+The Microsoft OAuth2 client ID is embedded at build time:
+
+```cmake
+set(MeshMC_MICROSOFT_CLIENT_ID "3035382c-8f73-493a-b579-d182905c2864")
+```
+
+This is a public client ID registered with Azure Active Directory for the MeshMC application.
diff --git a/docs/handbook/meshmc/application-lifecycle.md b/docs/handbook/meshmc/application-lifecycle.md
new file mode 100644
index 0000000000..d870b7d544
--- /dev/null
+++ b/docs/handbook/meshmc/application-lifecycle.md
@@ -0,0 +1,373 @@
+# Application Lifecycle
+
+## Overview
+
+MeshMC's application lifecycle is managed by the `Application` class, which extends `QApplication`. The lifecycle progresses through distinct phases: initialization, main event loop, and shutdown. The `Application` singleton owns all major subsystems and coordinates their creation and destruction.
+
+## Entry Point (`main.cpp`)
+
+The application entry point is in `launcher/main.cpp`:
+
+```cpp
+int main(int argc, char* argv[])
+{
+#if (QT_VERSION >= QT_VERSION_CHECK(6, 8, 0))
+ QApplication::setAttribute(Qt::AA_EnableHighDpiScaling);
+ QGuiApplication::setAttribute(Qt::AA_UseHighDpiPixmaps);
+#endif
+
+ // initialize Qt
+ Application app(argc, argv);
+
+ switch (app.status()) {
+ case Application::StartingUp:
+ case Application::Initialized: {
+ Q_INIT_RESOURCE(multimc);
+ Q_INIT_RESOURCE(backgrounds);
+ Q_INIT_RESOURCE(documents);
+ Q_INIT_RESOURCE(meshmc);
+
+ Q_INIT_RESOURCE(pe_dark);
+ Q_INIT_RESOURCE(pe_light);
+ Q_INIT_RESOURCE(pe_blue);
+ Q_INIT_RESOURCE(pe_colored);
+ Q_INIT_RESOURCE(breeze_dark);
+ Q_INIT_RESOURCE(breeze_light);
+ Q_INIT_RESOURCE(OSX);
+ Q_INIT_RESOURCE(iOS);
+ Q_INIT_RESOURCE(flat);
+ Q_INIT_RESOURCE(flat_white);
+ return app.exec();
+ }
+ case Application::Failed:
+ return 1;
+ case Application::Succeeded:
+ return 0;
+ }
+}
+```
+
+Key observations:
+- High DPI scaling is enabled for Qt ≥ 6.8
+- The `Application` constructor performs all initialization
+- Qt resources (QRC files) for themes and branding are loaded only if initialization succeeds
+- The return value depends on `Application::status()` — `Failed` returns 1, `Succeeded` returns 0, otherwise enters the event loop via `app.exec()`
+
+## Application Status Enum
+
+The `Application` class tracks its lifecycle state:
+
+```cpp
+class Application : public QApplication {
+public:
+ enum Status {
+ StartingUp, // Constructor is running, subsystems initializing
+ Failed, // Fatal error during initialization
+ Succeeded, // Initialization complete, immediate exit (e.g., --version)
+ Initialized // Ready for main event loop
+ };
+};
+```
+
+## Initialization Sequence
+
+The `Application` constructor (`Application::Application(int&, char**)`) executes a carefully ordered initialization sequence. Each step is implemented as a private helper method:
+
+### Phase 1: Platform Initialization
+
+```cpp
+void Application::initPlatform();
+```
+
+Platform-specific setup:
+- **Windows** — attaches console for stdout/stderr if launched from terminal
+- **macOS** — sets `QDir::homePath()` workarounds if needed
+- **Linux** — no special platform init required
+
+### Phase 2: Command-Line Parsing
+
+```cpp
+QHash<QString, QVariant> Application::parseCommandLine(int& argc, char** argv);
+```
+
+Uses the `Commandline` module to parse arguments:
+- `--dir <path>` — override data directory
+- `--launch <instance_id>` — launch an instance immediately
+- `--server <address[:port]>` — join a server on launch
+- `--profile <name>` — use a specific account profile
+- `--alive` — check if another instance is running (live check)
+- `--import <url_or_path>` — import a ZIP or URL as an instance
+
+Parsed values are stored in member variables:
+```cpp
+QString m_instanceIdToLaunch;
+QString m_serverToJoin;
+QString m_profileToUse;
+bool m_liveCheck = false;
+QUrl m_zipToImport;
+```
+
+### Phase 3: Data Path Resolution
+
+```cpp
+bool Application::resolveDataPath(
+ const QHash<QString, QVariant>& args,
+ QString& dataPath,
+ QString& adjustedBy,
+ QString& origcwdPath);
+```
+
+Determines the data directory with the following priority:
+1. Command-line `--dir` argument
+2. Portable mode (if `portable.txt` exists next to executable)
+3. Platform-specific standard location:
+ - **macOS** — `~/Library/Application Support/MeshMC`
+ - **Linux** — `$XDG_DATA_HOME/MeshMC` or `~/.local/share/MeshMC`
+ - **Windows** — `%APPDATA%/MeshMC`
+
+### Phase 4: Single-Instance Check
+
+```cpp
+bool Application::initPeerInstance();
+```
+
+Uses the `LocalPeer` library to ensure only one instance of MeshMC runs at a time:
+- Creates a local socket with the application ID
+- If another instance is already running, sends the command-line message to it and returns `false` (sets status to `Succeeded`)
+- The running instance receives the message via `Application::messageReceived()` and processes it (e.g., showing the main window, launching an instance)
+
+### Phase 5: Logging Setup
+
+```cpp
+bool Application::initLogging(const QString& dataPath);
+```
+
+Configures Qt's logging framework:
+- Creates a log file in the data directory
+- Installs a custom message handler that writes to both the log file and stderr
+- Loads `qtlogging.ini` for logging category configuration
+
+### Phase 6: Path Configuration
+
+```cpp
+void Application::setupPaths(
+ const QString& binPath,
+ const QString& origcwdPath,
+ const QString& adjustedBy);
+```
+
+Sets up critical directory paths:
+- `m_rootPath` — the application installation root
+- JAR path for Java launcher components
+- Ensures required directories exist (instances, icons, themes, translations, etc.)
+
+### Phase 7: Settings Initialization
+
+```cpp
+void Application::initSettings();
+```
+
+Creates the global `SettingsObject` backed by `INISettingsObject` reading from `meshmc.cfg`. Registers all global settings with their default values:
+
+- **General** — language, update channel, instance directory, icon theme
+- **Java** — Java path, min/max memory, JVM arguments, permission mode
+- **Minecraft** — window size, fullscreen, console behavior
+- **Proxy** — proxy type, host, port, username, password
+- **External tools** — paths to JProfiler, JVisualVM, MCEdit
+- **Paste** — paste.ee API key configuration, paste type
+- **Custom commands** — pre-launch, post-exit, wrapper commands
+- **Analytics** — analytics opt-in/opt-out
+- **Appearance** — application theme, icon theme, cat pack
+
+### Phase 8: Subsystem Initialization
+
+```cpp
+void Application::initSubsystems();
+```
+
+Creates and initializes all major subsystems in dependency order:
+
+1. **Network** — `QNetworkAccessManager` with proxy configuration
+2. **HTTP Meta Cache** — `HttpMetaCache` for download caching
+3. **Translations** — `TranslationsModel` with language loading
+4. **Theme Manager** — `ThemeManager` with theme detection and application
+5. **Icon List** — `IconList` scanning icon directories
+6. **Metadata Index** — `Meta::Index` pointing to the metadata server
+7. **Instance List** — `InstanceList` scanning the instances directory
+8. **Account List** — `AccountList` loading from `accounts.json`
+9. **Java List** — `JavaInstallList` for detected Java installations
+10. **Profilers** — `JProfiler` and `JVisualVM` tool factories
+11. **MCEdit** — `MCEditTool` integration
+12. **Update Checker** — `UpdateChecker` with network manager
+13. **News Checker** — RSS feed reader
+
+### Phase 9: Analytics Initialization
+
+```cpp
+void Application::initAnalytics();
+```
+
+Initializes Google Analytics (`GAnalytics`) if the user has opted in. The analytics ID comes from `BuildConfig.ANALYTICS_ID`. Analytics can be toggled via the settings, with changes handled by `Application::analyticsSettingChanged()`.
+
+### Phase 10: First-Run Wizard or Main Window
+
+```cpp
+bool Application::createSetupWizard();
+void Application::performMainStartupAction();
+```
+
+If this is the first run or required settings are missing, a `SetupWizard` is displayed with pages:
+- `LanguageWizardPage` — language selection
+- `JavaWizardPage` — Java path configuration
+- `AnalyticsWizardPage` — analytics opt-in
+
+On wizard completion (`Application::setupWizardFinished()`), or if no wizard is needed, `performMainStartupAction()` runs:
+- If `m_instanceIdToLaunch` is set → launch that instance directly
+- If `m_zipToImport` is set → show import dialog
+- Otherwise → show `MainWindow`
+
+## Main Event Loop
+
+After initialization, `main()` calls `app.exec()`, which enters the Qt event loop. The event loop processes:
+
+- **UI events** — mouse clicks, keyboard input, paint events
+- **Timer events** — periodic checks, delayed operations
+- **Network events** — HTTP responses from `QNetworkAccessManager`
+- **IPC events** — messages from other MeshMC instances via `LocalPeer`
+- **File system events** — directory changes detected by `RecursiveFileSystemWatcher`
+
+## Window Management
+
+The `Application` class manages window lifecycle:
+
+```cpp
+MainWindow* Application::showMainWindow(bool minimized = false);
+InstanceWindow* Application::showInstanceWindow(InstancePtr instance, QString page = QString());
+```
+
+### Main Window
+
+- Created on first call to `showMainWindow()`
+- Tracked via `m_mainWindow` pointer
+- Connected to `Application::on_windowClose()` via the `isClosing` signal
+
+### Instance Windows
+
+- One per running instance, tracked in `m_instanceExtras`:
+ ```cpp
+ struct InstanceXtras {
+ InstanceWindow* window = nullptr;
+ shared_qobject_ptr<LaunchController> controller;
+ };
+ std::map<QString, InstanceXtras> m_instanceExtras;
+ ```
+- Created when launching or editing an instance
+- Display game logs and instance-specific controls
+
+### Window Close Tracking
+
+The application tracks open windows and running instances:
+
+```cpp
+size_t m_openWindows = 0;
+size_t m_runningInstances = 0;
+```
+
+The `on_windowClose()` slot decrements the window counter. When both counters reach zero and no updates are running, the application quits.
+
+## Instance Launching
+
+The `Application::launch()` slot orchestrates instance launches:
+
+```cpp
+bool Application::launch(
+ InstancePtr instance,
+ bool online = true,
+ BaseProfilerFactory* profiler = nullptr,
+ MinecraftServerTargetPtr serverToJoin = nullptr,
+ MinecraftAccountPtr accountToUse = nullptr);
+```
+
+1. Creates a `LaunchController`
+2. Configures it with the instance, online mode, profiler, server target, and account
+3. Connects `controllerSucceeded()` and `controllerFailed()` slots
+4. Opens an `InstanceWindow` for log display
+5. Starts the controller task
+6. Increments `m_runningInstances` via `addRunningInstance()`
+
+## Instance Kill
+
+```cpp
+bool Application::kill(InstancePtr instance);
+```
+
+Terminates a running instance's launch controller, which aborts the `LaunchTask` and kills the JVM process.
+
+## Application Shutdown
+
+Shutdown is triggered when all windows close and no instances are running:
+
+```cpp
+bool Application::shouldExitNow() const;
+```
+
+This checks:
+- `m_openWindows == 0`
+- `m_runningInstances == 0`
+- `m_updateRunning == false`
+
+The destructor `Application::~Application()` cleans up:
+- Saves pending settings
+- Saves instance group data
+- Saves account list
+- Closes log file
+- Destroys all owned subsystems (in reverse initialization order due to smart pointer destruction)
+
+## IPC Message Handling
+
+When a second MeshMC process starts, it sends a message to the running instance via `LocalPeer`:
+
+```cpp
+void Application::messageReceived(const QByteArray& message);
+```
+
+The message is parsed by `ApplicationMessage` and can trigger:
+- Showing the main window (if no specific action)
+- Launching an instance (`launch <instance_id>`)
+- Importing a file (`import <path_or_url>`)
+
+## Signal/Slot Connections
+
+Key application-level signal/slot connections:
+
+| Signal | Slot | Purpose |
+|---|---|---|
+| `Setting::SettingChanged` | `analyticsSettingChanged()` | Toggle analytics |
+| `LaunchController::succeeded` | `controllerSucceeded()` | Handle launch success |
+| `LaunchController::failed` | `controllerFailed()` | Handle launch failure |
+| `LocalPeer::messageReceived` | `messageReceived()` | IPC from other instances |
+| `SetupWizard::finished` | `setupWizardFinished()` | Wizard completion |
+| `updateAllowedChanged` | (MainWindow) | Enable/disable update button |
+| `globalSettingsAboutToOpen` | (listeners) | Notify settings dialog opening |
+| `globalSettingsClosed` | (listeners) | Notify settings dialog closed |
+
+## Update Flow During Lifecycle
+
+Updates interact with the lifecycle:
+
+```cpp
+void Application::updateIsRunning(bool running);
+bool Application::updatesAreAllowed();
+```
+
+- `updateIsRunning()` sets `m_updateRunning` and emits `updateAllowedChanged()`
+- `updatesAreAllowed()` returns false if any instances are running (prevents updating while games are active)
+- The `UpdateChecker` runs its check asynchronously; when an update is found, `MainWindow` handles the `updateAvailable()` signal
+
+## Global Settings Dialog
+
+```cpp
+void Application::ShowGlobalSettings(QWidget* parent, QString open_page = QString());
+```
+
+Opens the settings `PageDialog` with the global page provider. Emits `globalSettingsAboutToOpen()` before and `globalSettingsClosed()` after.
diff --git a/docs/handbook/meshmc/architecture.md b/docs/handbook/meshmc/architecture.md
new file mode 100644
index 0000000000..069aa32cc2
--- /dev/null
+++ b/docs/handbook/meshmc/architecture.md
@@ -0,0 +1,724 @@
+# MeshMC Architecture
+
+## Architectural Overview
+
+MeshMC follows a layered architecture built on top of Qt6's object model. The application is structured as a single-process desktop application with a central `Application` singleton that owns all major subsystems. The architecture can be decomposed into five primary layers:
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│ UI Layer (Qt Widgets) │
+│ MainWindow, InstanceWindow, Dialogs, Pages, SetupWizard │
+├─────────────────────────────────────────────────────────────┤
+│ Controller Layer │
+│ LaunchController, UpdateController, InstanceTask │
+├─────────────────────────────────────────────────────────────┤
+│ Model Layer │
+│ InstanceList, PackProfile, AccountList, ModFolderModel │
+│ IconList, JavaInstallList, TranslationsModel, BaseVersion │
+├─────────────────────────────────────────────────────────────┤
+│ Service Layer │
+│ Network (Download, NetJob), Auth (AuthFlow, MSAStep), │
+│ Meta (Index), HttpMetaCache, Settings, FileSystem │
+├─────────────────────────────────────────────────────────────┤
+│ Platform / Infrastructure Layer │
+│ Qt6 Core, LocalPeer, Katabasis, systeminfo, GAnalytics │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Directory Structure
+
+### Top-Level Repository Layout
+
+```
+meshmc/
+├── CMakeLists.txt # Root build configuration
+├── CMakePresets.json # Platform-specific build presets
+├── BUILD.md # Build instructions
+├── CONTRIBUTING.md # Contribution guidelines
+├── COPYING.md # License text (GPL-3.0-or-later)
+├── REUSE.toml # REUSE license compliance metadata
+├── branding/ # Icons, desktop files, manifests, NSIS scripts
+├── buildconfig/ # BuildConfig.h generation (version, URLs, keys)
+├── cmake/ # Custom CMake modules
+├── launcher/ # Main application source code
+├── libraries/ # Bundled third-party libraries
+├── updater/ # Self-updater binary
+├── nix/ # Nix packaging support
+├── scripts/ # Development scripts
+├── default.nix # Nix expression
+├── flake.nix # Nix flake
+├── flake.lock # Nix flake lock
+├── shell.nix # Nix development shell
+├── Containerfile # Container build (Podman/Docker)
+├── vcpkg.json # vcpkg dependency manifest
+└── vcpkg-configuration.json # vcpkg configuration
+```
+
+### Launcher Source Tree (`launcher/`)
+
+The `launcher/` directory contains all application source code. The organization follows a feature-module pattern:
+
+```
+launcher/
+├── main.cpp # Application entry point
+├── Application.{h,cpp} # Application singleton
+├── ApplicationMessage.{h,cpp} # IPC message handling
+├── BaseInstance.{h,cpp} # Abstract instance interface
+├── BaseInstaller.{h,cpp} # Base installer interface
+├── BaseVersion.h # Abstract version interface
+├── BaseVersionList.{h,cpp} # Abstract version list model
+├── BuildConfig.h # Build-time configuration (generated)
+├── Commandline.{h,cpp} # Command-line argument parsing
+├── DefaultVariable.h # Default-valued variable wrapper
+├── DesktopServices.{h,cpp} # Platform desktop integration (open URLs, folders)
+├── Exception.h # Base exception class
+├── ExponentialSeries.h # Exponential backoff helper
+├── FileSystem.{h,cpp} # Filesystem utilities (copy, move, path combining)
+├── Filter.{h,cpp} # Log/text filtering
+├── GZip.{h,cpp} # GZip compression/decompression
+├── HoeDown.h # Markdown rendering wrapper
+├── InstanceCopyTask.{h,cpp} # Instance cloning logic
+├── InstanceCreationTask.{h,cpp} # New instance creation
+├── InstanceImportTask.{h,cpp} # Instance import from ZIP/URL
+├── InstanceList.{h,cpp} # Instance collection model (QAbstractListModel)
+├── InstancePageProvider.h # Instance page factory interface
+├── InstanceTask.{h,cpp} # Base class for instance operations
+├── JavaCommon.{h,cpp} # Common Java utilities
+├── Json.{h,cpp} # JSON helper utilities
+├── KonamiCode.{h,cpp} # Easter egg: Konami code detector
+├── LaunchController.{h,cpp} # Launch orchestration controller
+├── LoggedProcess.{h,cpp} # QProcess wrapper with logging
+├── MMCStrings.{h,cpp} # String utilities
+├── MMCTime.{h,cpp} # Time formatting utilities
+├── MMCZip.{h,cpp} # ZIP archive utilities
+├── MessageLevel.{h,cpp} # Log message severity levels
+├── NullInstance.h # Null object pattern for instances
+├── ProblemProvider.h # Interface for reporting problems
+├── QObjectPtr.h # shared_qobject_ptr smart pointer
+├── RWStorage.h # Thread-safe key-value cache
+├── RecursiveFileSystemWatcher.{h,cpp} # Recursive directory watcher
+├── SeparatorPrefixTree.h # Prefix tree for path matching
+├── SkinUtils.{h,cpp} # Minecraft skin image utilities
+├── UpdateController.{h,cpp} # Update orchestration
+├── Usable.h # Reference counting mixin
+├── Version.{h,cpp} # Semantic version comparison
+├── VersionProxyModel.{h,cpp} # Version list filter/sort proxy
+├── WatchLock.h # RAII file system watcher pause
+│
+├── icons/ # Icon management
+│ └── IconList.{h,cpp} # Icon collection model
+│
+├── java/ # Java detection/management
+│ ├── JavaChecker.{h,cpp} # JVM validation (spawn + parse)
+│ ├── JavaCheckerJob.{h,cpp} # Parallel Java checking
+│ ├── JavaInstall.{h,cpp} # Java installation descriptor
+│ ├── JavaInstallList.{h,cpp} # Java installation list model
+│ ├── JavaUtils.{h,cpp} # Platform-specific Java discovery
+│ ├── JavaVersion.{h,cpp} # Java version parsing/comparison
+│ └── download/ # Java download support
+│
+├── launch/ # Launch infrastructure
+│ ├── LaunchStep.{h,cpp} # Abstract launch step
+│ ├── LaunchTask.{h,cpp} # Launch step orchestrator
+│ ├── LogModel.{h,cpp} # Game log data model
+│ └── steps/ # Generic launch steps
+│
+├── meta/ # Metadata index system
+│ ├── Index.{h,cpp} # Root metadata index
+│ ├── Version.{h,cpp} # Metadata version entry
+│ ├── VersionList.{h,cpp} # Metadata version list
+│ ├── BaseEntity.{h,cpp} # Base metadata entity
+│ └── JsonFormat.{h,cpp} # Metadata JSON serialization
+│
+├── minecraft/ # Minecraft-specific subsystem
+│ ├── MinecraftInstance.{h,cpp} # Concrete Minecraft instance
+│ ├── MinecraftUpdate.{h,cpp} # Game update task
+│ ├── MinecraftLoadAndCheck.{h,cpp} # Version validation
+│ ├── Component.{h,cpp} # Version component
+│ ├── ComponentUpdateTask.{h,cpp} # Dependency resolution
+│ ├── PackProfile.{h,cpp} # Component list model
+│ ├── LaunchProfile.{h,cpp} # Resolved launch parameters
+│ ├── Library.{h,cpp} # Java library descriptor
+│ ├── VersionFile.{h,cpp} # Version JSON file model
+│ ├── VersionFilterData.{h,cpp} # Version-specific quirks
+│ ├── ProfileUtils.{h,cpp} # Profile file utilities
+│ ├── AssetsUtils.{h,cpp} # Asset management
+│ ├── GradleSpecifier.h # Maven coordinate parser
+│ ├── MojangDownloadInfo.h # Mojang download descriptor
+│ ├── MojangVersionFormat.{h,cpp} # Mojang version JSON parser
+│ ├── OneSixVersionFormat.{h,cpp} # MeshMC version JSON format
+│ ├── OpSys.{h,cpp} # Operating system identifier
+│ ├── ParseUtils.{h,cpp} # Version string parsing
+│ ├── Rule.{h,cpp} # Platform/feature rules
+│ ├── World.{h,cpp} # Minecraft world descriptor
+│ ├── WorldList.{h,cpp} # World collection model
+│ │
+│ ├── auth/ # Authentication subsystem
+│ │ ├── MinecraftAccount.{h,cpp} # Account model
+│ │ ├── AccountList.{h,cpp} # Account collection model
+│ │ ├── AccountData.{h,cpp} # Token/profile storage
+│ │ ├── AccountTask.{h,cpp} # Auth task base class
+│ │ ├── AuthRequest.{h,cpp} # HTTP auth request helper
+│ │ ├── AuthSession.{h,cpp} # Resolved auth session
+│ │ ├── AuthStep.{h,cpp} # Abstract auth pipeline step
+│ │ ├── Parsers.{h,cpp} # Response JSON parsers
+│ │ ├── flows/ # Auth flow implementations
+│ │ │ ├── AuthFlow.{h,cpp} # Base auth flow orchestrator
+│ │ │ └── MSA.{h,cpp} # MSA login flows
+│ │ └── steps/ # Individual auth steps
+│ │ ├── MSAStep.{h,cpp} # Microsoft OAuth2
+│ │ ├── XboxUserStep.{h,cpp} # Xbox User Token
+│ │ ├── XboxAuthorizationStep.{h,cpp} # XSTS Token
+│ │ ├── XboxProfileStep.{h,cpp} # Xbox Profile
+│ │ ├── MinecraftProfileStep.{h,cpp} # MC Profile
+│ │ ├── MeshMCLoginStep.{h,cpp} # MeshMC-specific login
+│ │ ├── EntitlementsStep.{h,cpp} # Entitlement check
+│ │ └── GetSkinStep.{h,cpp} # Skin retrieval
+│ │
+│ ├── launch/ # Minecraft-specific launch steps
+│ │ ├── DirectJavaLaunch.{h,cpp} # Direct JVM invocation
+│ │ ├── MeshMCPartLaunch.{h,cpp} # Java launcher component
+│ │ ├── ClaimAccount.{h,cpp} # Account claim step
+│ │ ├── CreateGameFolders.{h,cpp} # Directory setup
+│ │ ├── ExtractNatives.{h,cpp} # Native library extraction
+│ │ ├── ModMinecraftJar.{h,cpp} # Jar mod application
+│ │ ├── PrintInstanceInfo.{h,cpp} # Debug info logging
+│ │ ├── ReconstructAssets.{h,cpp} # Asset reconstruction
+│ │ ├── ScanModFolders.{h,cpp} # Mod folder scanning
+│ │ ├── VerifyJavaInstall.{h,cpp} # Java validation
+│ │ └── MinecraftServerTarget.{h,cpp} # Server join info
+│ │
+│ ├── mod/ # Mod management
+│ │ ├── Mod.{h,cpp} # Single mod descriptor
+│ │ ├── ModDetails.h # Mod metadata
+│ │ ├── ModFolderModel.{h,cpp} # Mod list model
+│ │ ├── ModFolderLoadTask.{h,cpp} # Async mod folder scan
+│ │ ├── LocalModParseTask.{h,cpp} # Mod metadata extraction
+│ │ ├── ResourcePackFolderModel.{h,cpp} # Resource pack model
+│ │ └── TexturePackFolderModel.{h,cpp} # Texture pack model
+│ │
+│ ├── gameoptions/ # Game options parsing
+│ ├── services/ # Online service clients
+│ ├── update/ # Game file update logic
+│ ├── legacy/ # Legacy version support
+│ └── testdata/ # Test fixtures
+│
+├── modplatform/ # Mod platform integrations
+│ ├── atlauncher/ # ATLauncher import
+│ ├── flame/ # CurseForge API client
+│ ├── modrinth/ # Modrinth API client
+│ ├── modpacksch/ # FTB/modpacksch API
+│ ├── technic/ # Technic API client
+│ └── legacy_ftb/ # Legacy FTB support
+│
+├── net/ # Network subsystem
+│ ├── NetAction.h # Abstract network action
+│ ├── NetJob.{h,cpp} # Parallel download manager
+│ ├── Download.{h,cpp} # Single file download
+│ ├── HttpMetaCache.{h,cpp} # HTTP caching layer
+│ ├── Sink.h # Download output interface
+│ ├── FileSink.{h,cpp} # File output sink
+│ ├── ByteArraySink.h # Memory output sink
+│ ├── MetaCacheSink.{h,cpp} # Cache-aware file sink
+│ ├── Validator.h # Download validation interface
+│ ├── ChecksumValidator.h # Checksum validation
+│ ├── Mode.h # Network mode (online/offline)
+│ └── PasteUpload.{h,cpp} # paste.ee upload
+│
+├── settings/ # Settings framework
+│ ├── Setting.{h,cpp} # Individual setting descriptor
+│ ├── SettingsObject.{h,cpp} # Settings container
+│ ├── INIFile.{h,cpp} # INI file reader/writer
+│ ├── INISettingsObject.{h,cpp} # INI-backed settings
+│ ├── OverrideSetting.{h,cpp} # Override/gate pattern
+│ └── PassthroughSetting.{h,cpp} # Passthrough delegation
+│
+├── tasks/ # Task infrastructure
+│ └── Task.{h,cpp} # Abstract async task
+│
+├── tools/ # External tool integration
+│ ├── BaseProfiler.{h,cpp} # Profiler interface
+│ ├── JProfiler.{h,cpp} # JProfiler integration
+│ ├── JVisualVM.{h,cpp} # JVisualVM integration
+│ └── MCEditTool.{h,cpp} # MCEdit integration
+│
+├── translations/ # i18n
+│ └── TranslationsModel.{h,cpp} # Translation management
+│
+├── updater/ # Self-updater
+│ └── UpdateChecker.{h,cpp} # Dual-source update checker
+│
+├── news/ # News feed
+├── notifications/ # Notification system
+├── screenshots/ # Screenshot management
+├── pathmatcher/ # File path matching
+│ └── IPathMatcher.h # Path matcher interface
+│
+└── ui/ # User interface
+ ├── MainWindow.{h,cpp} # Main application window
+ ├── InstanceWindow.{h,cpp} # Instance console window
+ ├── ColorCache.{h,cpp} # Color cache for log display
+ ├── GuiUtil.{h,cpp} # GUI utility functions
+ ├── themes/ # Theme system
+ ├── pages/ # Page widgets
+ │ ├── BasePage.h # Page interface
+ │ ├── BasePageProvider.h # Page factory interface
+ │ ├── global/ # Global settings pages
+ │ ├── instance/ # Instance settings pages
+ │ └── modplatform/ # Mod platform pages
+ ├── dialogs/ # Dialog windows
+ ├── widgets/ # Custom widgets
+ ├── instanceview/ # Instance grid/list view
+ ├── pagedialog/ # Page container dialog
+ └── setupwizard/ # First-run wizard
+```
+
+### Libraries Directory (`libraries/`)
+
+```
+libraries/
+├── LocalPeer/ # Single-instance enforcement via local socket
+├── classparser/ # Java .class file parser for mod metadata
+├── ganalytics/ # Google Analytics measurement protocol
+├── hoedown/ # C Markdown parser (renders changelogs, notes)
+├── iconfix/ # Fixes for Qt's icon theme loading
+├── javacheck/ # Java process that reports JVM capabilities
+├── katabasis/ # OAuth2 authentication framework
+├── launcher/ # Java-side launcher (MeshMCPartLaunch)
+├── optional-bare/ # nonstd::optional for pre-C++17 compatibility
+├── rainbow/ # HSL color manipulation for Qt
+├── systeminfo/ # CPU, OS, memory detection
+├── tomlc99/ # C99 TOML parser
+└── xz-embedded/ # Embedded XZ/LZMA decompressor
+```
+
+## Module Dependency Graph
+
+The inter-module dependency relationships form a directed acyclic graph:
+
+```
+ ┌──────────┐
+ │ main() │
+ └────┬─────┘
+ │
+ ┌────▼─────┐
+ │Application│
+ └────┬─────┘
+ ┌──────────────┼──────────────┐
+ │ │ │
+ ┌─────▼────┐ ┌──────▼─────┐ ┌────▼──────┐
+ │ UI Layer │ │InstanceList│ │ AccountList│
+ │MainWindow│ │ │ │ │
+ └─────┬────┘ └──────┬─────┘ └────┬──────┘
+ │ │ │
+ │ ┌──────▼──────┐ │
+ │ │BaseInstance │ │
+ │ │MinecraftInst│ │
+ │ └──────┬──────┘ │
+ │ │ │
+ │ ┌──────▼──────┐ │
+ └──────►│LaunchControl│◄──────┘
+ └──────┬──────┘
+ │
+ ┌──────▼──────┐
+ │ LaunchTask │
+ │ (steps) │
+ └──────┬──────┘
+ │
+ ┌──────────┼──────────┐
+ │ │ │
+ ┌─────▼──┐ ┌────▼────┐ ┌──▼───────┐
+ │AuthFlow│ │PackProf.│ │DirectJava│
+ │ MSA │ │Component│ │ Launch │
+ └────────┘ └─────────┘ └──────────┘
+```
+
+## Key Design Patterns
+
+### Singleton Application
+
+The `Application` class extends `QApplication` and serves as the central hub. A macro provides convenient access:
+
+```cpp
+#define APPLICATION (static_cast<Application*>(QCoreApplication::instance()))
+```
+
+All subsystems are accessed through `Application`:
+- `APPLICATION->settings()` — global settings
+- `APPLICATION->instances()` — instance list
+- `APPLICATION->accounts()` — account list
+- `APPLICATION->network()` — shared `QNetworkAccessManager`
+- `APPLICATION->metacache()` — HTTP metadata cache
+- `APPLICATION->metadataIndex()` — version metadata index
+- `APPLICATION->javalist()` — Java installation list
+- `APPLICATION->icons()` — icon list
+- `APPLICATION->translations()` — translation model
+- `APPLICATION->themeManager()` — theme manager
+- `APPLICATION->updateChecker()` — update checker
+
+### Task System
+
+All asynchronous operations derive from the `Task` base class:
+
+```cpp
+class Task : public QObject {
+ Q_OBJECT
+public:
+ enum class State { Inactive, Running, Succeeded, Failed, AbortedByUser };
+ virtual bool canAbort() const { return false; }
+signals:
+ void started();
+ void progress(qint64 current, qint64 total);
+ void finished();
+ void failed(QString reason);
+ void succeeded();
+ void status(QString status);
+ void stepStatus(QString status);
+public slots:
+ void start();
+ virtual bool abort();
+protected:
+ virtual void executeTask() = 0;
+ void emitSucceeded();
+ void emitFailed(QString reason);
+};
+```
+
+Subclasses include `NetJob`, `LaunchTask`, `LaunchController`, `AccountTask`, `InstanceTask`, `InstanceCopyTask`, `InstanceImportTask`, `ComponentUpdateTask`, `JavaCheckerJob`, and more. Tasks emit Qt signals for progress tracking and completion.
+
+### Model-View Architecture
+
+MeshMC extensively uses Qt's Model-View framework:
+
+| Model Class | Base Class | Purpose |
+|---|---|---|
+| `InstanceList` | `QAbstractListModel` | Instance collection |
+| `PackProfile` | `QAbstractListModel` | Component list per instance |
+| `AccountList` | `QAbstractListModel` | Microsoft accounts |
+| `ModFolderModel` | `QAbstractListModel` | Mods in an instance |
+| `WorldList` | `QAbstractListModel` | Worlds in an instance |
+| `JavaInstallList` | `BaseVersionList` → `QAbstractListModel` | Detected Java installations |
+| `TranslationsModel` | `QAbstractListModel` | Available translations |
+| `IconList` | `QAbstractListModel` | Available icons |
+| `LogModel` | `QAbstractListModel` | Game log lines |
+| `VersionProxyModel` | `QSortFilterProxyModel` | Filtered/sorted version list |
+
+### Smart Pointer Usage
+
+MeshMC uses a custom smart pointer for QObject-derived types:
+
+```cpp
+template <class T>
+using shared_qobject_ptr = std::shared_ptr<T>;
+```
+
+This is defined in `QObjectPtr.h` and used throughout the codebase for shared ownership of QObject-derived instances (`AccountList`, `LaunchTask`, `QNetworkAccessManager`, etc.). Standard `std::shared_ptr` is used for non-QObject types (`SettingsObject`, `InstanceList`, `IconList`).
+
+### Settings Override Pattern
+
+The settings system supports hierarchical overrides:
+
+```
+Global Settings (meshmc.cfg)
+ └── Instance Settings (instance.cfg)
+ └── OverrideSetting (gate-controlled)
+```
+
+Each instance has its own `SettingsObject`. Instance settings can either use the global value (passthrough) or override it. The `OverrideSetting` class implements this gate pattern — a boolean gate setting controls whether the override value or the global value is used.
+
+### Launch Step Pipeline
+
+Game launching uses a sequential step pipeline:
+
+```cpp
+class LaunchTask : public Task {
+ void appendStep(shared_qobject_ptr<LaunchStep> step);
+ void prependStep(shared_qobject_ptr<LaunchStep> step);
+ void proceed(); // advance to next step
+};
+```
+
+Each `LaunchStep` can:
+- Execute immediately and complete
+- Emit `readyForLaunch()` to pause the pipeline (awaiting user interaction)
+- Call `proceed()` on the parent to advance to the next step
+
+The pipeline for a Minecraft launch typically includes:
+1. `VerifyJavaInstall` — check Java exists and is compatible
+2. `CreateGameFolders` — ensure directory structure exists
+3. `ScanModFolders` — scan and index mod files
+4. `ExtractNatives` — extract platform-native libraries
+5. `ModMinecraftJar` — apply jar mods
+6. `ReconstructAssets` — legacy asset management
+7. `ClaimAccount` — lock the account for this session
+8. `PrintInstanceInfo` — log debug information
+9. `DirectJavaLaunch` or `MeshMCPartLaunch` — spawn the JVM
+
+### Authentication Pipeline
+
+Authentication uses a step-based pipeline similar to launch:
+
+```cpp
+class AuthFlow : public AccountTask {
+ QList<AuthStep::Ptr> m_steps;
+ AuthStep::Ptr m_currentStep;
+ void nextStep(); // advance pipeline
+ void stepFinished(...); // handle step completion
+};
+```
+
+MSA interactive login steps:
+1. `MSAStep(Login)` — OAuth2 browser-based login
+2. `XboxUserStep` — exchange MSA token for Xbox User Token
+3. `XboxAuthorizationStep` — exchange for XSTS token
+4. `MinecraftProfileStep` — get Minecraft profile
+5. `EntitlementsStep` — verify game ownership
+6. `GetSkinStep` — fetch player skin
+
+### Network Download Architecture
+
+Downloads use a Sink/Validator pattern:
+
+```
+NetJob (manages multiple concurrent downloads)
+ └── Download (single network request)
+ ├── Sink (output destination)
+ │ ├── FileSink — write to file
+ │ ├── ByteArraySink — write to memory
+ │ └── MetaCacheSink — write with cache metadata
+ └── Validator (verify content)
+ └── ChecksumValidator — MD5/SHA1/SHA256 check
+```
+
+`NetJob` extends `Task` and manages a collection of `NetAction` objects (typically `Download`). It handles concurrent execution, progress aggregation, retry logic, and failure reporting.
+
+### Page System
+
+Settings and instance configuration use a page-based UI:
+
+```cpp
+class BasePage {
+public:
+ virtual QString displayName() const = 0;
+ virtual QIcon icon() const = 0;
+ virtual QString id() const = 0;
+ virtual QString helpPage() const { return QString(); }
+ virtual bool apply() { return true; }
+};
+```
+
+Pages are organized into providers:
+- **Global pages** — `MeshMCPage`, `MinecraftPage`, `JavaPage`, `LanguagePage`, `ProxyPage`, `ExternalToolsPage`, `AccountListPage`, `PasteEEPage`, `CustomCommandsPage`, `AppearancePage`
+- **Instance pages** — `VersionPage`, `ModFolderPage`, `LogPage`, `NotesPage`, `InstanceSettingsPage`, `ScreenshotsPage`, `WorldListPage`, `GameOptionsPage`, `ServersPage`, `OtherLogsPage`, `ResourcePackPage`, `ShaderPackPage`, `TexturePackPage`
+- **Mod platform pages** — integration-specific browse/search pages
+
+## Build System Architecture
+
+### CMake Target Structure
+
+```
+MeshMC (root)
+├── MeshMC_nbt++ # NBT library (from ../libnbtplusplus)
+├── ganalytics # Analytics library
+├── systeminfo # System information
+├── hoedown # Markdown parser
+├── MeshMC_launcher # Java launcher component
+├── MeshMC_javacheck # Java checker
+├── xz-embedded # XZ decompression
+├── MeshMC_rainbow # Color utilities
+├── MeshMC_iconfix # Icon loader fixes
+├── MeshMC_LocalPeer # Single-instance
+├── MeshMC_classparser # Class file parser
+├── optional-bare # optional polyfill
+├── tomlc99 # TOML parser
+├── MeshMC_katabasis # OAuth2
+├── BuildConfig # Generated build constants
+├── meshmc-updater # Updater binary
+└── meshmc # Main launcher executable
+```
+
+### Generated Files
+
+The build system generates several files from templates:
+
+| Template | Generated | Purpose |
+|---|---|---|
+| `branding/*.in` | `build/*.desktop`, `*.metainfo.xml`, `*.rc`, `*.qrc`, `*.manifest` | Platform packaging |
+| `launcher/MeshMC.in` | `MeshMCScript` | Linux runner script |
+| `buildconfig/BuildConfig.cpp.in` | `BuildConfig.cpp` | Compile-time constants |
+| `branding/win_install.nsi.in` | `win_install.nsi` | NSIS installer script |
+
+### Qt Meta-Object Compiler (MOC)
+
+Qt's MOC is enabled globally via `CMAKE_AUTOMOC ON`. All classes using `Q_OBJECT` have their MOC files generated automatically. The `Q_OBJECT` macro is used extensively throughout the codebase for:
+- Signal/slot connections
+- Property system
+- Dynamic type identification
+
+## Data Flow Diagrams
+
+### Instance Launch Sequence
+
+```
+User clicks "Launch"
+ │
+ ▼
+MainWindow::on_actionLaunchInstance_triggered()
+ │
+ ▼
+Application::launch(instance, online, profiler, server, account)
+ │
+ ▼
+LaunchController::executeTask()
+ │
+ ├── decideAccount() ← select/prompt for account
+ ├── login() ← authenticate (MSAInteractive/MSASilent)
+ │ │
+ │ ▼
+ │ AuthFlow::executeTask() → step pipeline
+ │ │
+ │ ▼
+ │ AuthSession created with tokens
+ │
+ ▼
+launchInstance()
+ │
+ ▼
+MinecraftInstance::createLaunchTask(session, server)
+ │
+ ▼
+LaunchTask (step pipeline)
+ │
+ ├── VerifyJavaInstall
+ ├── CreateGameFolders
+ ├── ScanModFolders
+ ├── ExtractNatives
+ ├── ModMinecraftJar
+ ├── ReconstructAssets
+ ├── ClaimAccount
+ ├── PrintInstanceInfo
+ └── DirectJavaLaunch/MeshMCPartLaunch
+ │
+ ▼
+ LoggedProcess (QProcess wrapper)
+ │
+ ▼
+ JVM subprocess running Minecraft
+```
+
+### Mod Installation Flow
+
+```
+User browses mod platform page
+ │
+ ▼
+API query (CurseForge/Modrinth)
+ │
+ ▼
+NetJob → Download mod file
+ │
+ ▼
+File placed in instance mods/ directory
+ │
+ ▼
+ModFolderModel::update()
+ │
+ ▼
+LocalModParseTask extracts metadata
+ │
+ ▼
+UI refreshed with new mod entry
+```
+
+### Settings Lookup Flow
+
+```
+Code requests setting value:
+ instance->settings()->get("JavaPath")
+ │
+ ▼
+OverrideSetting checks gate:
+ instance->settings()->get("OverrideJava")
+ │
+ ├── gate ON → return instance-local value
+ │ from instance.cfg
+ │
+ └── gate OFF → passthrough to global
+ from meshmc.cfg
+```
+
+## Threading Model
+
+MeshMC is primarily single-threaded (Qt main/GUI thread), with specific operations dispatched to background threads:
+
+- **File I/O** — `QFuture`/`QFutureWatcher` via `QtConcurrent` for file copy operations (`InstanceCopyTask`)
+- **Mod parsing** — `LocalModParseTask` runs in background thread to parse mod metadata from JAR files
+- **Mod folder scanning** — `ModFolderLoadTask` enumerates mod directories asynchronously
+- **Java checking** — `JavaChecker` spawns external JVM processes (via `QProcess`) and parses output
+- **Network** — `QNetworkAccessManager` handles HTTP requests asynchronously via Qt's event loop (not separate threads)
+
+All UI updates happen on the main thread. Background tasks communicate results back via Qt's signal-slot mechanism with automatic cross-thread marshalling (`Qt::QueuedConnection`).
+
+## Error Handling Strategy
+
+MeshMC uses several error handling approaches:
+
+1. **Task failure signals** — `Task::emitFailed(QString reason)` propagates errors through the task chain
+2. **Problem providers** — `ProblemProvider` interface reports issues with severity levels (`ProblemSeverity`)
+3. **Status enums** — `Application::Status`, `LoggedProcess::State`, `AccountState`, `JobStatus`
+4. **Fatal errors** — `Application::showFatalErrorMessage()` displays critical errors and sets status to `Failed`
+5. **Exception class** — `Exception` base class for recoverable errors (rarely used; signals preferred)
+
+## Configuration Architecture
+
+### Build-Time Configuration (`BuildConfig`)
+
+The `buildconfig/` directory generates a `BuildConfig` struct containing all compile-time constants:
+
+```cpp
+struct Config {
+ QString MESHMC_NAME; // "MeshMC"
+ QString MESHMC_BINARY; // "meshmc"
+ QString MESHMC_APP_ID; // "org.projecttick.MeshMC"
+ QString MESHMC_META_URL; // "https://meta.projecttick.org/"
+ QString MICROSOFT_CLIENT_ID; // Azure AD app client ID
+ QString CURSEFORGE_API_KEY; // CurseForge API key
+ QString PASTE_EE_API_KEY; // paste.ee key
+ QString IMGUR_CLIENT_ID; // Imgur API client ID
+ QString ANALYTICS_ID; // Google Analytics measurement ID
+ QString NEWS_RSS_URL; // RSS feed URL
+ QString BUG_TRACKER_URL; // Issue tracker URL
+ QString UPDATER_FEED_URL; // Update RSS feed
+ QString UPDATER_GITHUB_API_URL; // GitHub releases API
+ QString BUILD_ARTIFACT; // Artifact identifier
+ QString BUILD_PLATFORM; // Platform string
+ // ... version numbers, git info, etc.
+};
+```
+
+### Runtime Configuration (Settings)
+
+Runtime settings are stored in INI files and managed through the `SettingsObject` → `INISettingsObject` hierarchy. Individual `Setting` objects are registered with the settings container and support:
+- Default values
+- Synonym keys (for migration)
+- Change notification signals
+- Override/gate patterns for instance-level settings
+
+## Metadata System
+
+The `meta/` subsystem provides access to version metadata from the MeshMC metadata server (default: `https://meta.projecttick.org/`):
+
+```
+Meta::Index (root)
+ └── Meta::VersionList (per component UID)
+ └── Meta::Version (individual version)
+ └── VersionFile (parsed JSON with libraries, rules, etc.)
+```
+
+All metadata entities extend `Meta::BaseEntity`, which provides:
+- Local cache loading
+- Remote fetching via `NetJob`
+- Staleness tracking
+- JSON serialization/deserialization
+
+The metadata index is the authoritative source for available Minecraft versions, mod loader versions, and their associated libraries and launch parameters.
diff --git a/docs/handbook/meshmc/building.md b/docs/handbook/meshmc/building.md
new file mode 100644
index 0000000000..615b96ec2a
--- /dev/null
+++ b/docs/handbook/meshmc/building.md
@@ -0,0 +1,554 @@
+# Building MeshMC
+
+## Prerequisites
+
+### Required Tools
+
+| Tool | Minimum Version | Purpose |
+|---|---|---|
+| CMake | 3.28 | Build system generator |
+| Ninja | Any recent | Build executor (recommended) |
+| C++ Compiler | GCC ≥ 13, Clang ≥ 17, MSVC ≥ 19.36 | C++23 compilation |
+| JDK | 17 | Building Java launcher components |
+| Git | Any recent | Submodule management |
+| pkg-config | Any | Dependency discovery |
+
+### Required Qt6 Modules
+
+MeshMC requires Qt 6 with the following modules, as specified in the root `CMakeLists.txt`:
+
+```cmake
+find_package(Qt6 REQUIRED COMPONENTS
+ Core
+ Widgets
+ Concurrent
+ Network
+ NetworkAuth
+ Test
+ Xml
+)
+```
+
+### Required External Libraries
+
+| Dependency | Purpose | pkg-config / CMake Name |
+|---|---|---|
+| Qt 6 Base | GUI framework | `Qt6Core`, `Qt6Widgets`, etc. |
+| Qt 6 NetworkAuth | OAuth2 authentication | `Qt6NetworkAuth` |
+| QuaZip (Qt6) | ZIP archive support | `quazip1-qt6` |
+| zlib | Compression | `zlib` |
+| Extra CMake Modules | KDE CMake utilities | `ECM` |
+| cmark | Markdown rendering | — |
+| tomlplusplus | TOML parsing | — |
+| libarchive | Archive extraction | `LibArchive` (CMake) |
+| libqrencode | QR code generation | — |
+| scdoc | Man page generation (optional) | — |
+
+## Quick Start with Bootstrap
+
+The fastest path to building MeshMC is the bootstrap script at the repository root. It detects your platform, installs missing dependencies, initializes submodules, and configures lefthook git hooks.
+
+### Linux / macOS
+
+```bash
+cd meshmc/
+../bootstrap.sh
+```
+
+Supported distributions: Debian, Ubuntu, Fedora, RHEL/CentOS, openSUSE, Arch Linux, macOS (via Homebrew).
+
+### Windows
+
+```cmd
+cd meshmc\
+..\bootstrap.cmd
+```
+
+Uses Scoop for CLI tools and vcpkg for C/C++ libraries.
+
+## Cloning the Repository
+
+MeshMC uses git submodules (notably `libnbtplusplus`). Always clone recursively:
+
+```bash
+git clone --recursive https://github.com/Project-Tick/MeshMC.git
+cd MeshMC
+```
+
+If you already cloned without `--recursive`:
+
+```bash
+git submodule update --init --recursive
+```
+
+The `libnbtplusplus` submodule lives at `../libnbtplusplus` relative to the `meshmc/` directory and is referenced in the root `CMakeLists.txt`:
+
+```cmake
+add_subdirectory(${CMAKE_SOURCE_DIR}/../libnbtplusplus libnbtplusplus)
+```
+
+## Distro-Specific Package Installation
+
+### Debian / Ubuntu
+
+```bash
+sudo apt-get install \
+ cmake ninja-build extra-cmake-modules pkg-config \
+ qt6-base-dev libquazip1-qt6-dev zlib1g-dev \
+ libcmark-dev libarchive-dev libqrencode-dev libtomlplusplus-dev \
+ scdoc
+```
+
+### Fedora
+
+```bash
+sudo dnf install \
+ cmake ninja-build extra-cmake-modules pkgconf \
+ qt6-qtbase-devel quazip-qt6-devel zlib-devel \
+ cmark-devel libarchive-devel qrencode-devel tomlplusplus-devel \
+ scdoc
+```
+
+### Arch Linux
+
+```bash
+sudo pacman -S --needed \
+ cmake ninja extra-cmake-modules pkgconf \
+ qt6-base quazip-qt6 zlib \
+ cmark libarchive qrencode tomlplusplus \
+ scdoc
+```
+
+### openSUSE
+
+```bash
+sudo zypper install \
+ cmake ninja extra-cmake-modules pkg-config \
+ qt6-base-devel quazip-qt6-devel zlib-devel \
+ cmark-devel libarchive-devel qrencode-devel tomlplusplus-devel \
+ scdoc
+```
+
+### macOS (Homebrew)
+
+```bash
+brew install \
+ cmake ninja extra-cmake-modules \
+ qt@6 quazip zlib \
+ cmark libarchive qrencode tomlplusplus \
+ scdoc
+```
+
+### Windows
+
+On Windows, use vcpkg for C/C++ dependencies and ensure `VCPKG_ROOT` is set:
+
+```cmd
+set VCPKG_ROOT=C:\path\to\vcpkg
+vcpkg install qt6 quazip libarchive zlib cmark
+```
+
+Or install Qt via the Qt Online Installer for full module support.
+
+## CMake Presets
+
+MeshMC ships `CMakePresets.json` with pre-configured presets for each platform. All presets use the **Ninja Multi-Config** generator and output to the `build/` directory.
+
+### Available Configure Presets
+
+| Preset | Platform | Notes |
+|---|---|---|
+| `linux` | Linux | Available only on Linux hosts |
+| `macos` | macOS | Uses vcpkg toolchain (`$VCPKG_ROOT`) |
+| `macos_universal` | macOS (Universal Binary) | Builds for x86_64 + arm64 |
+| `windows_mingw` | Windows (MinGW) | Available only on Windows hosts |
+| `windows_msvc` | Windows (MSVC) | Uses vcpkg toolchain (`$VCPKG_ROOT`) |
+
+All presets inherit from a hidden `base` preset which sets:
+- **Generator:** `Ninja Multi-Config`
+- **Build directory:** `build/`
+- **Install directory:** `install/`
+- **LTO:** Enabled by default (`ENABLE_LTO=ON`)
+
+### Environment Variables
+
+| Variable | Used By | Purpose |
+|---|---|---|
+| `VCPKG_ROOT` | `macos`, `macos_universal`, `windows_msvc` | Path to vcpkg installation |
+| `ARTIFACT_NAME` | All (via `base`) | Updater artifact identifier |
+| `BUILD_PLATFORM` | All (via `base`) | Platform identifier string |
+
+## Building on Linux
+
+### Configure
+
+```bash
+cmake --preset linux
+```
+
+### Build
+
+```bash
+cmake --build --preset linux --config Release
+```
+
+For a debug build:
+
+```bash
+cmake --build --preset linux --config Debug
+```
+
+Since the generator is `Ninja Multi-Config`, you can switch between `Debug`, `Release`, `RelWithDebInfo`, and `MinSizeRel` without re-configuring.
+
+### Install
+
+```bash
+cmake --install build --config Release --prefix /usr/local
+```
+
+The install layout on Linux follows KDE conventions:
+- Binary: `bin/meshmc`
+- Libraries: `lib/`
+- Data: `share/MeshMC/`
+- Desktop file: `share/applications/org.projecttick.MeshMC.desktop`
+- Metainfo: `share/metainfo/org.projecttick.MeshMC.metainfo.xml`
+- Icon: `share/icons/hicolor/scalable/apps/org.projecttick.MeshMC.svg`
+- MIME type: `share/mime/packages/org.projecttick.MeshMC.xml`
+
+### Full One-Liner
+
+```bash
+cmake --preset linux && cmake --build --preset linux --config Release
+```
+
+## Building on macOS
+
+### Prerequisites
+
+Ensure `VCPKG_ROOT` is set:
+
+```bash
+export VCPKG_ROOT="$HOME/vcpkg"
+```
+
+### Standard Build (Native Architecture)
+
+```bash
+cmake --preset macos
+cmake --build --preset macos --config Release
+```
+
+### Universal Binary (x86_64 + arm64)
+
+```bash
+cmake --preset macos_universal
+cmake --build --preset macos_universal --config Release
+```
+
+### Install
+
+```bash
+cmake --install build --config Release
+```
+
+The macOS install layout creates an application bundle:
+- `MeshMC.app/Contents/MacOS/` — binaries and plugins
+- `MeshMC.app/Contents/Frameworks/` — frameworks and libraries
+- `MeshMC.app/Contents/Resources/` — icons, assets
+
+### macOS-Specific Features
+
+- **Sparkle updates** — macOS uses the Sparkle framework for native update UI. The public key and feed URL are configured via CMake:
+ ```cmake
+ set(MACOSX_SPARKLE_UPDATE_PUBLIC_KEY "C0eBoyDSoZbzgCMxQH9wH6kmjU2mPRmvhZZd9mHgqZQ=")
+ set(MACOSX_SPARKLE_UPDATE_FEED_URL "https://projecttick.org/product/meshmc/appcast.xml")
+ ```
+- **Asset catalog** — Icons are compiled via `actool` when Xcode ≥ 26.0 is available (liquid glass icons)
+- **Bundle metadata** — Info.plist values are set via `MACOSX_BUNDLE_*` CMake variables
+
+## Building on Windows
+
+### Using MSVC
+
+Requires Visual Studio with C++ workload and vcpkg:
+
+```cmd
+set VCPKG_ROOT=C:\path\to\vcpkg
+cmake --preset windows_msvc
+cmake --build --preset windows_msvc --config Release
+```
+
+### Using MinGW
+
+```cmd
+cmake --preset windows_mingw
+cmake --build --preset windows_mingw --config Release
+```
+
+### Install
+
+```cmd
+cmake --install build --config Release
+```
+
+Windows install layout places everything in a flat directory structure.
+
+### Windows-Specific Notes
+
+- **MSVC C standard** — C11 is used instead of C23 for MSVC compatibility:
+ ```cmake
+ if(MSVC)
+ set(CMAKE_C_STANDARD 11)
+ else()
+ set(CMAKE_C_STANDARD 23)
+ endif()
+ ```
+- **NSIS installer** — An NSIS installer script is generated from `branding/win_install.nsi.in`
+- **Visual C++ Redistributable** — The NSIS installer can download and install the VC++ runtime automatically
+- **Resource file** — Windows executable metadata is provided via a `.rc` file generated from `branding/meshmc.rc.in`
+- **Manifest** — A Windows application manifest is generated for DPI awareness and UAC settings
+
+## Building with Nix
+
+MeshMC provides a Nix flake for reproducible builds:
+
+### Using the Nix Flake
+
+```bash
+# Build the package
+nix build .#meshmc
+
+# Enter the development shell
+nix develop
+
+# Inside the dev shell:
+cd "$cmakeBuildDir"
+ninjaBuildPhase
+ninjaInstallPhase
+```
+
+### Without Flakes
+
+```bash
+nix-build
+# or
+nix-shell
+```
+
+### Binary Cache
+
+A binary cache is available to speed up builds:
+
+```
+https://meshmc.cachix.org
+```
+
+Public key:
+```
+meshmc.cachix.org-1:6ZNLcfqjVDKmN9/XNWGV3kcjBTL51v1v2V+cvanMkZA=
+```
+
+These are already configured in the flake's `nixConfig`.
+
+## Building with Container (Podman/Docker)
+
+A `Containerfile` (Debian-based) is provided for CI and reproducible builds:
+
+### Build the Container Image
+
+```bash
+podman build -t meshmc-build .
+```
+
+### Run a Build Inside the Container
+
+```bash
+podman run --rm -it -v "$(pwd):/work:z" meshmc-build
+
+# Inside the container:
+git submodule update --init --recursive
+cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release
+cmake --build build
+```
+
+The container comes with Qt 6.10.2 (installed via `aqtinstall`), Clang, LLD, Ninja, CMake, and all required dependencies pre-installed.
+
+## CMake Options
+
+These options can be set during configuration with `-D<OPTION>=<VALUE>`:
+
+| Option | Default | Description |
+|---|---|---|
+| `ENABLE_LTO` | `OFF` (ON in presets) | Enable Link Time Optimization |
+| `MeshMC_DISABLE_JAVA_DOWNLOADER` | `OFF` | Disable the built-in Java downloader feature |
+| `MeshMC_ENABLE_CLANG_TIDY` | `OFF` | Run clang-tidy during compilation |
+| `MeshMC_BUILD_PLATFORM` | `""` | Platform identifier string for notifications |
+| `MeshMC_VERSION_BUILD` | `-1` | Build number (-1 for no build number) |
+| `MeshMC_BUILD_ARTIFACT` | `""` | Artifact name for updater identification |
+| `BUILD_TESTING` | `ON` | Build unit tests |
+
+### URL Configuration Options
+
+These are typically set for custom/self-hosted deployments:
+
+| Option | Default | Description |
+|---|---|---|
+| `MeshMC_META_URL` | `https://meta.projecttick.org/` | Metadata server URL |
+| `MeshMC_NEWS_RSS_URL` | `https://projecttick.org/product/meshmc/feed.xml` | News RSS feed URL |
+| `MeshMC_UPDATER_FEED_URL` | `""` | RSS feed URL for updater |
+| `MeshMC_UPDATER_GITHUB_API_URL` | `""` | GitHub Releases API URL for update verification |
+| `MeshMC_NOTIFICATION_URL` | `https://projecttick.org/` | Notification check URL |
+| `MeshMC_BUG_TRACKER_URL` | `https://github.com/Project-Tick/MeshMC/issues` | Bug tracker URL |
+
+### API Key Options
+
+| Option | Description |
+|---|---|
+| `MeshMC_MICROSOFT_CLIENT_ID` | Azure AD application client ID for MSA login |
+| `MeshMC_PASTE_EE_API_KEY` | paste.ee API key for log upload |
+| `MeshMC_IMGUR_CLIENT_ID` | Imgur API client ID for screenshot upload |
+| `MeshMC_CURSEFORGE_API_KEY` | CurseForge API key |
+| `MeshMC_ANALYTICS_ID` | Google Analytics measurement ID |
+
+## Compiler Flags
+
+### GCC / Clang
+
+```
+-Wall -pedantic -Wno-deprecated-declarations
+-fstack-protector-strong --param=ssp-buffer-size=4
+-O3 -D_FORTIFY_SOURCE=2
+-DQT_NO_DEPRECATED_WARNINGS=Y
+```
+
+### MSVC
+
+```
+/W4 /DQT_NO_DEPRECATED_WARNINGS=Y
+```
+
+### macOS-Specific
+
+```
+-stdlib=libc++
+```
+
+## Running Tests
+
+### Using CTest Presets
+
+```bash
+cmake --preset linux
+cmake --build --preset linux --config Debug
+ctest --preset linux --build-config Debug
+```
+
+### Running Tests Directly
+
+```bash
+cd build
+ctest --output-on-failure
+```
+
+### Available Test Binaries
+
+After building, individual test binaries are available in `build/`:
+
+| Test Binary | Tests |
+|---|---|
+| `DownloadTask_test` | Network download functionality |
+| `FileSystem_test` | Filesystem utilities |
+| `GradleSpecifier_test` | Maven coordinate parsing |
+| `GZip_test` | GZip compression/decompression |
+| `Index_test` | Metadata index |
+| `INIFile_test` | INI file parsing |
+| `JavaVersion_test` | Java version comparison |
+| `Library_test` | Library descriptor |
+| `ModFolderModel_test` | Mod folder model |
+| `MojangVersionFormat_test` | Version JSON parsing |
+| `ParseUtils_test` | Version string parsing |
+| `UpdateChecker_test` | Update check system |
+| `sys_test` | System information |
+
+Tests use Qt's `QTest` framework and are integrated via ECM's `ecm_add_tests()`.
+
+## Build Safety
+
+The root `CMakeLists.txt` enforces several safety rules:
+
+### No In-Source Builds
+
+```cmake
+string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BUILD_DIR}" IS_IN_SOURCE_BUILD)
+if(IS_IN_SOURCE_BUILD)
+ message(FATAL_ERROR "You are building MeshMC in-source. ...")
+endif()
+```
+
+### No WSL Builds
+
+```cmake
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ if(CMAKE_HOST_SYSTEM_VERSION MATCHES ".*[Mm]icrosoft.*" OR
+ CMAKE_HOST_SYSTEM_VERSION MATCHES ".*WSL.*")
+ message(FATAL_ERROR "Building MeshMC is not supported in Linux-on-Windows distributions.")
+ endif()
+endif()
+```
+
+## Development Tooling
+
+Optional tools for development:
+
+| Tool | Purpose |
+|---|---|
+| `lefthook` | Git hooks manager (installed by bootstrap) |
+| `reuse` | REUSE license compliance checker |
+| `clang-format` | Code formatting (config in `.clang-format`) |
+| `clang-tidy` | Static analysis (config in `.clang-tidy`) |
+| `scdoc` | Man page generation |
+
+## Troubleshooting
+
+### Qt6 Not Found
+
+If CMake cannot find Qt6, set the `Qt6_DIR` or `CMAKE_PREFIX_PATH`:
+
+```bash
+cmake --preset linux -DQt6_DIR=/usr/lib64/cmake/Qt6
+# or
+export CMAKE_PREFIX_PATH=/opt/qt6
+cmake --preset linux
+```
+
+### ECM Not Found
+
+Install `extra-cmake-modules`:
+
+```bash
+# Debian/Ubuntu
+sudo apt-get install extra-cmake-modules
+# Fedora
+sudo dnf install extra-cmake-modules
+# Arch
+sudo pacman -S extra-cmake-modules
+```
+
+### Submodule Errors
+
+If you see errors about missing `libnbtplusplus`:
+
+```bash
+git submodule update --init --recursive
+```
+
+### Missing Qt6 NetworkAuth
+
+This module is not always included in distro Qt6 packages. Install separately:
+
+```bash
+# Debian/Ubuntu
+sudo apt-get install qt6-networkauth-dev
+# Fedora
+sudo dnf install qt6-qtnetworkauth-devel
+```
diff --git a/docs/handbook/meshmc/code-style.md b/docs/handbook/meshmc/code-style.md
new file mode 100644
index 0000000000..93a5d250a8
--- /dev/null
+++ b/docs/handbook/meshmc/code-style.md
@@ -0,0 +1,315 @@
+# Code Style
+
+## Overview
+
+MeshMC enforces consistent code style using `.clang-format` and `.clang-tidy` configurations. The project uses C++23 with Qt6 patterns. Code formatting is checked automatically via lefthook git hooks.
+
+## Clang-Format Configuration
+
+The `.clang-format` file defines the formatting rules:
+
+```yaml
+BasedOnStyle: LLVM
+ColumnLimit: 80
+IndentWidth: 4
+TabWidth: 4
+UseTab: Always
+ContinuationIndentWidth: 4
+BreakBeforeBraces: Linux
+PointerAlignment: Left
+SortIncludes: false
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignOperands: Align
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLoopsOnASingleLine: false
+IndentCaseLabels: true
+IndentPPDirectives: None
+NamespaceIndentation: All
+SpaceBeforeParens: ControlStatements
+SpacesInParentheses: false
+```
+
+### Key Formatting Rules
+
+| Rule | Value | Example |
+|---|---|---|
+| Indentation | Tabs (width 4) | `\tif (x)` |
+| Column limit | 80 characters | — |
+| Braces | Linux style | `if (x) {` on same line; function `{` on new line |
+| Pointer alignment | Left | `int* ptr` not `int *ptr` |
+| Include sorting | Disabled | Preserve manual grouping |
+| Short blocks | Never on one line | Always use braces + newlines |
+| Namespace indent | All | Contents indented inside namespaces |
+
+### Running clang-format
+
+```bash
+# Format a single file
+clang-format -i launcher/Application.cpp
+
+# Format all source files
+find launcher libraries -name '*.cpp' -o -name '*.h' | xargs clang-format -i
+
+# Check without modifying (CI mode)
+clang-format --dry-run --Werror launcher/Application.cpp
+```
+
+## Clang-Tidy Configuration
+
+The `.clang-tidy` file enables static analysis checks:
+
+```yaml
+Checks: >
+ -*,
+ bugprone-*,
+ clang-analyzer-*,
+ performance-*,
+ portability-*,
+ readability-*,
+ -readability-function-cognitive-complexity,
+ -readability-magic-numbers,
+ -readability-identifier-length,
+ -readability-convert-member-functions-to-static,
+ modernize-*,
+ -modernize-use-trailing-return-type
+HeaderFilterRegex: '^(launcher|libraries|updater|buildconfig)/'
+FormatStyle: file
+CheckOptions:
+ - key: readability-function-size.LineThreshold
+ value: '200'
+ - key: readability-function-size.StatementThreshold
+ value: '120'
+```
+
+### Enabled Check Categories
+
+| Category | Scope |
+|---|---|
+| `bugprone-*` | Bug-prone patterns (narrowing conversions, incorrect moves, etc.) |
+| `clang-analyzer-*` | Clang Static Analyzer checks (null deref, memory leaks, etc.) |
+| `performance-*` | Performance issues (unnecessary copies, move semantics) |
+| `portability-*` | Portability concerns across compilers/platforms |
+| `readability-*` | Code readability (naming, braces, simplification) |
+| `modernize-*` | C++ modernization (auto, range-for, nullptr, etc.) |
+
+### Disabled Checks
+
+| Check | Reason |
+|---|---|
+| `readability-function-cognitive-complexity` | Qt UI code often has inherently complex functions |
+| `readability-magic-numbers` | Not enforced due to frequent use in UI layout code |
+| `readability-identifier-length` | Short names like `i`, `dl`, `it` are acceptable |
+| `readability-convert-member-functions-to-static` | Conflicts with Qt's signal/slot pattern |
+| `modernize-use-trailing-return-type` | Traditional return types preferred |
+
+### Function Size Limits
+
+- Maximum **200 lines** per function
+- Maximum **120 statements** per function
+
+### Running clang-tidy
+
+```bash
+# Run on a single file (requires compile_commands.json)
+clang-tidy launcher/Application.cpp
+
+# Run with fixes applied
+clang-tidy --fix launcher/Application.cpp
+
+# Generate compile_commands.json
+cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ...
+```
+
+## C++ Standard
+
+MeshMC uses **C++23**:
+
+```cmake
+set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+```
+
+### C++23 Features Used
+
+- `std::expected` — error handling without exceptions
+- `std::format` — string formatting (where supported)
+- `std::ranges` — range operations
+- Structured bindings
+- `if constexpr`
+- `std::optional`, `std::variant`
+- Designated initializers
+- Three-way comparison (`<=>`)
+
+## Qt Patterns
+
+### Q_OBJECT Macro
+
+All QObject subclasses must include the `Q_OBJECT` macro:
+
+```cpp
+class MyClass : public QObject
+{
+ Q_OBJECT
+public:
+ explicit MyClass(QObject* parent = nullptr);
+ // ...
+};
+```
+
+### Signals and Slots
+
+Use the new-style signal/slot syntax:
+
+```cpp
+// Preferred: compile-time checked
+connect(sender, &Sender::signalName, receiver, &Receiver::slotName);
+
+// Lambda connections
+connect(sender, &Sender::signalName, this, [this]() {
+ // Handle signal
+});
+```
+
+Avoid the old `SIGNAL()`/`SLOT()` macro syntax.
+
+### shared_qobject_ptr
+
+MeshMC uses `shared_qobject_ptr<T>` for shared ownership of QObjects:
+
+```cpp
+// Instead of raw pointers or QSharedPointer
+shared_qobject_ptr<NetJob> job(new NetJob("Download", network));
+```
+
+This is a custom smart pointer that integrates with Qt's parent-child ownership.
+
+### Memory Management
+
+- Use Qt's parent-child ownership for UI objects
+- Use `shared_qobject_ptr<T>` for task objects shared across modules
+- Use `std::shared_ptr<T>` for non-QObject types
+- Use `std::unique_ptr<T>` for exclusive ownership
+- Avoid raw `new`/`delete` outside Qt's parent-child system
+
+## Naming Conventions
+
+### Classes
+
+```cpp
+class MinecraftInstance; // PascalCase
+class BaseVersionList; // PascalCase
+class NetJob; // PascalCase
+```
+
+### Member Variables
+
+```cpp
+class MyClass {
+ int m_count; // m_ prefix for member variables
+ QString m_name; // m_ prefix
+ QList<Item> m_items; // m_ prefix
+
+ static int s_instance; // s_ prefix for static members
+};
+```
+
+### Methods
+
+```cpp
+void executeTask(); // camelCase
+QString profileName() const; // camelCase, const for getters
+void setProfileName(const QString& name); // set prefix for setters
+```
+
+### Signals and Slots
+
+```cpp
+signals:
+ void taskStarted(); // camelCase, past tense for events
+ void progressChanged(int); // camelCase
+ void downloadFinished(); // camelCase
+
+public slots:
+ void onButtonClicked(); // on prefix for UI slots (optional)
+ void handleError(QString); // handle prefix (optional)
+```
+
+### Type Aliases
+
+```cpp
+using Ptr = std::shared_ptr<MyClass>; // Ptr alias convention
+using WeakPtr = std::weak_ptr<MyClass>;
+```
+
+### Enums
+
+```cpp
+enum class AccountState { // PascalCase, scoped (enum class)
+ Unchecked, // PascalCase values
+ Online,
+ Offline,
+ Errored,
+};
+```
+
+### Files
+
+```
+Application.h / Application.cpp // PascalCase, matching class name
+MinecraftInstance.h // PascalCase
+ui-shared.h / ui-shared.c // kebab-case for C files (cgit-inherited)
+```
+
+## Header Guards
+
+Use `#pragma once`:
+
+```cpp
+#pragma once
+
+#include <QString>
+// ...
+```
+
+## Include Order
+
+Includes are grouped (but not auto-sorted):
+
+```cpp
+// 1. Corresponding header
+#include "MyClass.h"
+
+// 2. Project headers
+#include "Application.h"
+#include "settings/SettingsObject.h"
+
+// 3. Qt headers
+#include <QObject>
+#include <QString>
+
+// 4. Standard library
+#include <memory>
+#include <vector>
+```
+
+## Lefthook Integration
+
+Git hooks are managed via `lefthook.yml`:
+
+```yaml
+pre-commit:
+ commands:
+ clang-format:
+ glob: "*.{cpp,h}"
+ run: clang-format --dry-run --Werror {staged_files}
+```
+
+Install hooks after bootstrapping:
+```bash
+lefthook install
+```
diff --git a/docs/handbook/meshmc/component-system.md b/docs/handbook/meshmc/component-system.md
new file mode 100644
index 0000000000..e05870c88e
--- /dev/null
+++ b/docs/handbook/meshmc/component-system.md
@@ -0,0 +1,540 @@
+# Component System
+
+## Overview
+
+MeshMC's component system is the mechanism by which Minecraft versions, mod loaders, and library overlays are decomposed into modular, reorderable, and independently versionable units. Rather than storing a single monolithic version profile, each instance maintains a `PackProfile` — an ordered list of `Component` objects that are resolved and merged into a `LaunchProfile` at launch time.
+
+## Core Classes
+
+### Component (`minecraft/Component.h`)
+
+A `Component` represents a single versioned layer in the instance's version profile:
+
+```cpp
+class Component : public QObject, public ProblemProvider
+{
+ Q_OBJECT
+public:
+ Component(PackProfile* parent, const QString& uid);
+ Component(PackProfile* parent, std::shared_ptr<Meta::Version> version);
+ Component(PackProfile* parent, const QString& uid,
+ std::shared_ptr<VersionFile> file);
+
+ void applyTo(LaunchProfile* profile);
+
+ // State queries
+ bool isEnabled();
+ bool setEnabled(bool state);
+ bool canBeDisabled();
+ bool isMoveable();
+ bool isCustomizable();
+ bool isRevertible();
+ bool isRemovable();
+ bool isCustom();
+ bool isVersionChangeable();
+
+ // Identity
+ QString getID();
+ QString getName();
+ QString getVersion();
+ std::shared_ptr<Meta::Version> getMeta();
+ QDateTime getReleaseDateTime();
+
+ // Customization
+ bool customize();
+ bool revert();
+ void setVersion(const QString& version);
+ void setImportant(bool state);
+
+ // Problem reporting
+ const QList<PatchProblem> getProblems() const override;
+ ProblemSeverity getProblemSeverity() const override;
+
+ void updateCachedData();
+
+signals:
+ void dataChanged();
+};
+```
+
+### Component Data Members
+
+Each component stores both persistent and cached data:
+
+```cpp
+// Persistent properties (saved to mmc-pack.json)
+QString m_uid; // Component identifier (e.g., "net.minecraft", "net.minecraftforge")
+QString m_version; // Selected version string
+bool m_dependencyOnly = false; // Auto-added to satisfy dependencies
+bool m_important = false; // Cannot be removed (e.g., base Minecraft)
+bool m_disabled = false; // Temporarily disabled
+
+// Cached properties (from version file)
+QString m_cachedName; // Display name
+QString m_cachedVersion; // Resolved version (may differ from m_version)
+Meta::RequireSet m_cachedRequires; // Dependencies
+Meta::RequireSet m_cachedConflicts; // Conflicts
+bool m_cachedVolatile = false; // Auto-removable when not needed
+
+// Load state
+std::shared_ptr<Meta::Version> m_metaVersion; // Remote metadata
+std::shared_ptr<VersionFile> m_file; // Parsed version file
+bool m_loaded = false;
+```
+
+### Component UIDs
+
+Components are identified by UIDs that follow a reverse-domain convention:
+
+| UID | Component |
+|---|---|
+| `net.minecraft` | Minecraft base game |
+| `net.minecraftforge` | Minecraft Forge |
+| `net.fabricmc.fabric-loader` | Fabric Loader |
+| `org.quiltmc.quilt-loader` | Quilt Loader |
+| `net.neoforged.neoforge` | NeoForge |
+| `com.mumfrey.liteloader` | LiteLoader |
+| `net.fabricmc.intermediary` | Fabric Intermediary mappings |
+| `org.lwjgl` | LWJGL (auto-dependency) |
+| `org.lwjgl3` | LWJGL 3 (auto-dependency) |
+
+## PackProfile (`minecraft/PackProfile.h`)
+
+`PackProfile` is a `QAbstractListModel` that manages the ordered list of components for a `MinecraftInstance`:
+
+```cpp
+class PackProfile : public QAbstractListModel
+{
+ Q_OBJECT
+ friend ComponentUpdateTask;
+
+public:
+ enum Columns { NameColumn = 0, VersionColumn, NUM_COLUMNS };
+
+ explicit PackProfile(MinecraftInstance* instance);
+
+ // Model interface
+ QVariant data(const QModelIndex& index, int role) const override;
+ bool setData(const QModelIndex& index, const QVariant& value,
+ int role) override;
+ QVariant headerData(int section, Qt::Orientation orientation,
+ int role) const override;
+ int rowCount(const QModelIndex& parent = QModelIndex()) const override;
+ int columnCount(const QModelIndex& parent) const override;
+ Qt::ItemFlags flags(const QModelIndex& index) const override;
+
+ // Component operations
+ void buildingFromScratch();
+ void installJarMods(QStringList selectedFiles);
+ void installCustomJar(QString selectedFile);
+
+ enum MoveDirection { MoveUp, MoveDown };
+ void move(const int index, const MoveDirection direction);
+ bool remove(const int index);
+ bool remove(const QString id);
+ bool customize(int index);
+ bool revertToBase(int index);
+
+ // Resolution
+ void reload(Net::Mode netmode);
+ void resolve(Net::Mode netmode);
+ Task::Ptr getCurrentTask();
+
+ // Profile access
+ std::shared_ptr<LaunchProfile> getProfile() const;
+
+ // Component queries
+ QString getComponentVersion(const QString& uid) const;
+ bool setComponentVersion(const QString& uid, const QString& version,
+ bool important = false);
+ bool installEmpty(const QString& uid, const QString& name);
+ Component* getComponent(const QString& id);
+ Component* getComponent(int index);
+ void appendComponent(ComponentPtr component);
+
+ void saveNow();
+
+signals:
+ void minecraftChanged();
+};
+```
+
+### PackProfile Serialization (`mmc-pack.json`)
+
+The component list is persisted in `mmc-pack.json` within the instance directory:
+
+```json
+{
+ "components": [
+ {
+ "cachedName": "Minecraft",
+ "cachedVersion": "1.20.4",
+ "cachedRequires": [],
+ "important": true,
+ "uid": "net.minecraft",
+ "version": "1.20.4"
+ },
+ {
+ "cachedName": "Fabric Loader",
+ "cachedVersion": "0.15.6",
+ "cachedRequires": [
+ {
+ "suggests": "1.20.4",
+ "uid": "net.minecraft"
+ },
+ {
+ "uid": "net.fabricmc.intermediary"
+ }
+ ],
+ "uid": "net.fabricmc.fabric-loader",
+ "version": "0.15.6"
+ },
+ {
+ "cachedName": "Intermediary Mappings",
+ "cachedVersion": "1.20.4",
+ "dependencyOnly": true,
+ "uid": "net.fabricmc.intermediary",
+ "version": "1.20.4"
+ }
+ ],
+ "formatVersion": 1
+}
+```
+
+### Component Ordering
+
+Components are ordered in the list and applied in sequence. Order matters because later components can override values from earlier ones:
+
+1. **Minecraft base** — always first, provides core libraries, assets, main class
+2. **Intermediary/deobfuscation** — mappings layer if present
+3. **Mod loader** — Forge/Fabric/Quilt/NeoForge, adds loader libraries and tweakers
+4. **Additional libraries** — LWJGL overrides, etc.
+
+Users can reorder components using `PackProfile::move()`:
+
+```cpp
+enum MoveDirection { MoveUp, MoveDown };
+void PackProfile::move(const int index, const MoveDirection direction);
+```
+
+Not all components are movable — `Component::isMoveable()` returns false for important components.
+
+## ComponentUpdateTask (`minecraft/ComponentUpdateTask.h`)
+
+`ComponentUpdateTask` is the task responsible for resolving component dependencies and updating version metadata:
+
+```cpp
+class ComponentUpdateTask : public Task
+{
+ Q_OBJECT
+public:
+ enum class Mode {
+ Launch, // Full resolution for launching
+ Resolution // Lightweight resolution for UI display
+ };
+
+ explicit ComponentUpdateTask(Mode mode, Net::Mode netmode,
+ PackProfile* list, QObject* parent = 0);
+
+protected:
+ void executeTask();
+
+private:
+ void loadComponents();
+ void resolveDependencies(bool checkOnly);
+ void remoteLoadSucceeded(size_t index);
+ void remoteLoadFailed(size_t index, const QString& msg);
+ void checkIfAllFinished();
+};
+```
+
+### Resolution Process
+
+The component update task follows this algorithm:
+
+1. **Load local data** — for each component, load the local version file (`patches/<uid>.json`) if it exists
+2. **Fetch metadata** — for components without local data, fetch from the metadata server (`Meta::Version`)
+3. **Parse version files** — each `Meta::Version` resolves to a `VersionFile` containing libraries, arguments, and rules
+4. **Resolve dependencies** — scan all components' `cachedRequires` sets:
+ - If a required UID is not in the component list, add it as a dependency-only component
+ - If a required UID specifies a version suggestion, set the dependency component's version
+ - Check for conflicts in `cachedConflicts`
+5. **Repeat** — re-resolve until no new dependencies are added (fixed-point iteration)
+6. **Validate** — check for unresolved dependencies, conflicts, and problems
+
+### Network Modes
+
+Resolution can operate in different network modes:
+
+```cpp
+namespace Net {
+ enum class Mode {
+ Offline, // Use only cached/local data
+ Online // Fetch from network
+ };
+}
+```
+
+In `Offline` mode, the task uses only cached metadata. In `Online` mode, it fetches fresh metadata from `meta.projecttick.org`.
+
+## LaunchProfile (`minecraft/LaunchProfile.h`)
+
+`LaunchProfile` is the resolved, merged result of applying all components in sequence. It contains the concrete values needed to launch Minecraft:
+
+```cpp
+class LaunchProfile : public ProblemProvider
+{
+public:
+ // Apply methods (called by Component::applyTo)
+ void applyMinecraftVersion(const QString& id);
+ void applyMainClass(const QString& mainClass);
+ void applyAppletClass(const QString& appletClass);
+ void applyMinecraftArguments(const QString& minecraftArguments);
+ void applyMinecraftVersionType(const QString& type);
+ void applyMinecraftAssets(MojangAssetIndexInfo::Ptr assets);
+ void applyTraits(const QSet<QString>& traits);
+ void applyTweakers(const QStringList& tweakers);
+ void applyJarMods(const QList<LibraryPtr>& jarMods);
+ void applyMods(const QList<LibraryPtr>& mods);
+ void applyLibrary(LibraryPtr library);
+ void applyMavenFile(LibraryPtr library);
+ void applyMainJar(LibraryPtr jar);
+ void applyProblemSeverity(ProblemSeverity severity);
+ void clear();
+
+ // Getters
+ QString getMinecraftVersion() const;
+ QString getMainClass() const;
+ QString getAppletClass() const;
+ QString getMinecraftVersionType() const;
+ MojangAssetIndexInfo::Ptr getMinecraftAssets() const;
+ QString getMinecraftArguments() const;
+ const QSet<QString>& getTraits() const;
+ const QStringList& getTweakers() const;
+ const QList<LibraryPtr>& getJarMods() const;
+ const QList<LibraryPtr>& getLibraries() const;
+ const QList<LibraryPtr>& getNativeLibraries() const;
+ const QList<LibraryPtr>& getMavenFiles() const;
+ const LibraryPtr getMainJar() const;
+ void getLibraryFiles(const QString& architecture,
+ QStringList& jars, QStringList& nativeJars,
+ const QString& overridePath,
+ const QString& tempPath) const;
+ bool hasTrait(const QString& trait) const;
+};
+```
+
+### Profile Merging
+
+Components are applied in order via `Component::applyTo(LaunchProfile*)`:
+
+```
+Component[0] (net.minecraft) → applyTo(profile)
+ sets: mainClass, libraries, assets, arguments, mainJar
+Component[1] (net.fabricmc.intermediary) → applyTo(profile)
+ adds: intermediary libraries
+Component[2] (net.fabricmc.fabric-loader) → applyTo(profile)
+ overrides: mainClass (to Fabric's knot launcher)
+ adds: Fabric libraries, tweakers
+```
+
+The `applyLibrary()` method handles library deduplication — if a library with the same name but different version exists, the later component's version wins.
+
+### Profile Fields
+
+```cpp
+private:
+ QString m_minecraftVersion; // "1.20.4"
+ QString m_minecraftVersionType; // "release" or "snapshot"
+ MojangAssetIndexInfo::Ptr m_minecraftAssets; // Asset index info
+ QString m_minecraftArguments; // Template arguments string
+ QStringList m_tweakers; // Tweaker classes
+ QString m_mainClass; // e.g., "net.fabricmc.loader.impl.launch.knot.KnotClient"
+ QString m_appletClass; // Legacy applet class
+ QList<LibraryPtr> m_libraries; // Classpath libraries
+ QList<LibraryPtr> m_nativeLibraries; // Native libraries
+ QList<LibraryPtr> m_mavenFiles; // Maven artifacts
+ QList<LibraryPtr> m_jarMods; // Jar modifications
+ LibraryPtr m_mainJar; // Minecraft main JAR
+ QSet<QString> m_traits; // Feature traits
+ ProblemSeverity m_problemSeverity; // Worst problem severity
+ QList<PatchProblem> m_problems; // Accumulated problems
+```
+
+## VersionFile (`minecraft/VersionFile.h`)
+
+A `VersionFile` represents the parsed content of a version JSON file. It is the intermediate format between raw JSON and the `LaunchProfile`:
+
+Key fields include:
+- `mainClass` — Java main class
+- `mainJar` — the primary game JAR
+- `libraries` — list of `Library` objects for the classpath
+- `mavenFiles` — additional Maven artifacts
+- `jarMods` — jar modifications
+- `minecraftArguments` — game argument template
+- `tweakers` — tweaker classes for legacy Forge
+- `requires` — dependency requirements (`Meta::RequireSet`)
+- `conflicts` — conflict declarations
+- `traits` — feature traits (set of strings)
+- `rules` — platform/feature conditional rules
+
+## Library (`minecraft/Library.h`)
+
+A `Library` represents a Java library dependency identified by Maven coordinates:
+
+```cpp
+class Library {
+public:
+ GradleSpecifier m_name; // e.g., "net.minecraft:launchwrapper:1.12"
+ QString m_absoluteURL; // Direct download URL (if any)
+ QString m_repositoryURL; // Maven repository base URL
+ QList<Rule> m_rules; // Platform conditional rules
+ QStringList m_natives; // Native classifier map
+ // ...
+};
+```
+
+Libraries are resolved to file paths using `GradleSpecifier`:
+
+```cpp
+class GradleSpecifier {
+ QString m_group; // "net.minecraft"
+ QString m_artifact; // "launchwrapper"
+ QString m_version; // "1.12"
+ QString m_classifier; // "" or "natives-linux"
+ QString m_extension; // "jar"
+
+ // Produces paths like: net/minecraft/launchwrapper/1.12/launchwrapper-1.12.jar
+ QString toPath() const;
+};
+```
+
+## Version Format Parsers
+
+### MojangVersionFormat (`minecraft/MojangVersionFormat.h`)
+
+Parses Mojang's official version JSON format:
+
+```cpp
+class MojangVersionFormat {
+public:
+ static VersionFilePtr versionFileFromJson(const QJsonDocument& doc,
+ const QString& filename);
+ static QJsonDocument versionFileToJson(const VersionFilePtr& patch);
+ static LibraryPtr libraryFromJson(const QJsonObject& libObj,
+ const QString& filename);
+ static QJsonObject libraryToJson(Library* library);
+};
+```
+
+### OneSixVersionFormat (`minecraft/OneSixVersionFormat.h`)
+
+Parses MeshMC's extended version JSON format (superset of Mojang's):
+
+```cpp
+class OneSixVersionFormat {
+public:
+ static VersionFilePtr versionFileFromJson(const QJsonDocument& doc,
+ const QString& filename,
+ bool requireOrder);
+ static QJsonDocument versionFileToJson(const VersionFilePtr& patch);
+ static LibraryPtr libraryFromJson(ProblemContainer& problems,
+ const QJsonObject& libObj,
+ const QString& filename);
+ static QJsonObject libraryToJson(Library* library);
+};
+```
+
+The "OneSix" format adds fields for component metadata, requirements, conflicts, and MeshMC-specific extensions.
+
+## Component Customization
+
+### Customizing a Component
+
+```cpp
+bool Component::customize();
+```
+
+Customizing a component creates a local override file in `patches/<uid>.json` within the instance directory. This allows users to manually edit the version JSON:
+- Modified libraries
+- Changed main class
+- Custom arguments
+- Additional tweakers
+
+### Reverting to Base
+
+```cpp
+bool Component::revert();
+```
+
+Reverting removes the local override file and restores the component to its remote metadata version.
+
+### Version Changes
+
+```cpp
+void Component::setVersion(const QString& version);
+```
+
+Changing a component's version triggers a re-resolution:
+1. Update the version string
+2. Clear cached data
+3. `PackProfile` schedules a save
+4. Next `resolve()` call fetches the new version's metadata
+
+## Metadata System Integration
+
+Components rely on the metadata system (`meta/`) for version information:
+
+```
+Meta::Index (root, loaded from meta.projecttick.org)
+ └── Meta::VersionList (per UID, e.g., "net.minecraft")
+ └── Meta::Version (e.g., "1.20.4")
+ └── VersionFile (parsed JSON data)
+```
+
+When a component needs metadata:
+1. `Component` requests its `Meta::Version` from the metadata index
+2. `Meta::Version` checks local cache
+3. If cache is stale, fetches fresh data from `meta.projecttick.org/v1/<uid>/<version>.json`
+4. Parses the response into a `VersionFile`
+5. `Component` applies the `VersionFile` to the `LaunchProfile`
+
+## Jar Mods
+
+Jar mods are modifications applied directly to the Minecraft JAR file:
+
+```cpp
+void PackProfile::installJarMods(QStringList selectedFiles);
+void PackProfile::installCustomJar(QString selectedFile);
+```
+
+- `installJarMods()` adds JAR files to the jar mods list; they are merged into the game JAR at launch by `ModMinecraftJar`
+- `installCustomJar()` replaces the main Minecraft JAR entirely
+
+Jar mod files are stored in `<instance>/jarmods/` and referenced in the component list.
+
+## Problem Detection
+
+Both `Component` and `LaunchProfile` implement the `ProblemProvider` interface:
+
+```cpp
+class ProblemProvider {
+public:
+ virtual const QList<PatchProblem> getProblems() const = 0;
+ virtual ProblemSeverity getProblemSeverity() const = 0;
+};
+```
+
+Problems are detected during component resolution:
+- Missing metadata for a required component
+- Unresolvable dependency
+- Conflicting component versions
+- Incompatible library combinations
+- Missing or invalid local override files
+
+Problem severity levels:
+- **None** — no issues
+- **Warning** — may cause problems but launch is allowed
+- **Error** — launch should be prevented
+
+The `VersionPage` UI displays problems as icons/tooltips on affected components.
diff --git a/docs/handbook/meshmc/contributing.md b/docs/handbook/meshmc/contributing.md
new file mode 100644
index 0000000000..e7612f13d4
--- /dev/null
+++ b/docs/handbook/meshmc/contributing.md
@@ -0,0 +1,130 @@
+# Contributing
+
+## Overview
+
+This document summarizes the contribution guidelines for MeshMC. For the full authoritative guide, see [CONTRIBUTING.md](../../../meshmc/CONTRIBUTING.md) in the MeshMC source tree.
+
+## AI Policy
+
+MeshMC follows a strict AI usage policy adapted from matplotlib and the Linux Kernel:
+
+- **No raw AI output** as comments on GitHub or Discord
+- If AI tools are used to develop code or documentation, the contributor **must fully understand** the changes and explain why they are the correct approach
+- Contributions must demonstrate personal competency and added value
+- Low-quality AI-generated contributions will be rigorously rejected
+
+### AI Agent Restrictions
+
+- AI agents **MUST NOT** add `Signed-off-by` tags — only humans can certify the Developer Certificate of Origin
+- The human submitter is responsible for reviewing all AI-generated code, ensuring licensing compliance, and taking full responsibility
+
+### AI Attribution
+
+When AI tools contribute to development, include an `Assisted-by` tag in the commit message:
+
+```
+Assisted-by: AGENT_NAME:MODEL_VERSION [TOOL1] [TOOL2]
+```
+
+Example:
+```
+Assisted-by: Claude:claude-3-opus coccinelle sparse
+```
+
+Basic development tools (git, gcc, make, editors) do not need to be listed.
+
+## Bootstrapping
+
+Before building, run the bootstrap script:
+
+```bash
+# Linux / macOS
+./bootstrap.sh
+
+# Windows
+.\bootstrap.cmd
+```
+
+This installs:
+- lefthook (git hooks)
+- reuse (license compliance)
+- Go tools
+- zlib, extra-cmake-modules
+- Other build dependencies
+
+**Note**: Qt6 with modules must be installed separately. The bootstrap script installs Qt only for QuaZip's needs. On Windows, use the Qt Online Installer.
+
+## Building
+
+See [Building MeshMC](building.md) for complete build instructions.
+
+## Signing Your Work (DCO)
+
+All contributions must be signed off using the Developer Certificate of Origin (DCO).
+
+### How to Sign Off
+
+Append `-s` to your git commit:
+```bash
+git commit -s -m "Fix instance loading crash"
+```
+
+Or manually append:
+```
+Fix instance loading crash
+
+Signed-off-by: Your Name <your.email@example.com>
+```
+
+### Developer Certificate of Origin 1.1
+
+By signing off, you certify:
+
+1. **(a)** The contribution was created by you and you have the right to submit it under the project's open source license
+2. **(b)** The contribution is based on existing work covered under an appropriate open source license
+3. **(c)** The contribution was provided to you by someone who certified (a), (b), or (c)
+4. **(d)** You understand this is a public record maintained indefinitely
+
+### Enforcement
+
+Sign-off is enforced automatically when creating a pull request. You will be notified if any commits aren't signed off.
+
+### Cryptographic Signing (Optional)
+
+You can also [cryptographically sign commits](https://docs.github.com/en/authentication/managing-commit-signature-verification/signing-commits) and enable [vigilant mode](https://docs.github.com/en/authentication/managing-commit-signature-verification/displaying-verification-statuses-for-all-of-your-commits) on GitHub.
+
+## Contributor License Agreement (CLA)
+
+By submitting a contribution, you agree to the [Project Tick CLA](https://projecttick.org/licenses/PT-CLA-2.0.txt).
+
+The CLA ensures:
+- You have the legal right to submit the contribution
+- It does not knowingly infringe third-party rights
+- Project Tick may distribute the contribution under the applicable license
+- Long-term governance and license consistency are maintained
+
+The CLA applies to all intentional contributions: source code, documentation, tests, data, media, and configuration files.
+
+## Backporting
+
+Automated backports merge specific contributions from `develop` into `release` branches:
+
+- Add labels like `backport release-7.x` to PRs
+- Add the milestone for the target release
+- The [backport workflow](https://github.com/Project-Tick/MeshMC/blob/master/.github/workflows/backport.yml) handles the merge automatically
+
+## Pull Request Workflow
+
+1. Fork the repository
+2. Create a feature branch from `develop`
+3. Make your changes
+4. Ensure code passes `clang-format` and `clang-tidy` checks
+5. Sign off all commits (`git commit -s`)
+6. Include AI attribution if applicable
+7. Open a PR against `develop`
+8. Address review feedback
+9. Add backport labels if the fix applies to release branches
+
+## License
+
+MeshMC is licensed under GPL-3.0-or-later. All contributions must be compatible with this license. The project uses REUSE for license compliance tracking.
diff --git a/docs/handbook/meshmc/dependencies.md b/docs/handbook/meshmc/dependencies.md
new file mode 100644
index 0000000000..dda35337ec
--- /dev/null
+++ b/docs/handbook/meshmc/dependencies.md
@@ -0,0 +1,241 @@
+# Dependencies
+
+## Overview
+
+MeshMC depends on a mix of bundled libraries (shipped in the source tree under `libraries/`) and external libraries resolved at build time via the system package manager or vcpkg.
+
+## Bundled Libraries
+
+These libraries are included in the `libraries/` directory and built as part of the MeshMC CMake project:
+
+| Library | Directory | Purpose | License |
+|---|---|---|---|
+| **ganalytics** | `libraries/ganalytics/` | Google Analytics integration for usage telemetry | MIT |
+| **systeminfo** | `libraries/systeminfo/` | System information queries (OS, CPU, memory) | GPL-3.0-or-later |
+| **hoedown** | `libraries/hoedown/` | Markdown rendering (changelogs, news) | ISC |
+| **launcher** | `libraries/launcher/` | Java process launcher helper binary | GPL-3.0-or-later |
+| **javacheck** | `libraries/javacheck/` | Java installation validator (JAR) | GPL-3.0-or-later |
+| **xz-embedded** | `libraries/xz-embedded/` | XZ decompression (embedded, minimal) | Public Domain |
+| **rainbow** | `libraries/rainbow/` | KDE-style color utilities | LGPL-2.1 |
+| **iconfix** | `libraries/iconfix/` | Qt icon theme fixes | Apache-2.0 |
+| **LocalPeer** | `libraries/LocalPeer/` | Single-instance IPC (based on QtSingleApplication) | LGPL-2.1 |
+| **classparser** | `libraries/classparser/` | Java `.class` file parser (mod metadata) | GPL-3.0-or-later |
+| **optional-bare** | `libraries/optional-bare/` | C++17 `std::optional` polyfill for older code | BSL-1.0 |
+| **tomlc99** | `libraries/tomlc99/` | TOML parser (C99) | MIT |
+| **katabasis** | `libraries/katabasis/` | OAuth2 library (MSA authentication) | BSD-2-Clause |
+| **libnbtplusplus** | `libraries/libnbtplusplus/` | NBT (Named Binary Tag) parser for Minecraft data | LGPL-3.0 |
+| **qdcss** | `libraries/qdcss/` | CSS-like parser for theme files | GPL-3.0-or-later |
+| **murmur2** | `libraries/murmur2/` | MurmurHash2 implementation (CurseForge fingerprinting) | Public Domain |
+
+### ganalytics
+
+Provides opt-in usage analytics via Google Analytics Measurement Protocol. Tracks feature usage, not personal data. Can be disabled in settings.
+
+### systeminfo
+
+Cross-platform system info queries:
+- Operating system name and version
+- CPU architecture and model
+- Available memory
+- Used for analytics and crash reports
+
+### launcher
+
+A small native binary that acts as the actual Java process launcher:
+- Handles process spawning on all platforms
+- Supports wrapper commands
+- Manages stdio piping
+
+### javacheck
+
+A minimal Java program (`JavaCheck.class`) that prints JVM system properties. Spawned by `JavaChecker` to validate Java installations without loading the full Minecraft runtime.
+
+### classparser
+
+Parses Java `.class` files to extract:
+- Mod metadata (name, version, mod ID)
+- Forge/Fabric/Quilt mod annotations
+- Used by `LocalModParseTask` for mod discovery
+
+### katabasis
+
+OAuth2 implementation used for Microsoft Account authentication:
+- Token storage structures (`Katabasis::Token`)
+- Token validity tracking
+- Refresh token management
+
+### libnbtplusplus
+
+Parses and writes Minecraft NBT (Named Binary Tag) format:
+- Level.dat parsing for world metadata
+- Server.dat parsing for server list
+- Used by InstanceImportTask and world management
+
+## External Dependencies
+
+These are resolved at build time and must be installed on the system or via vcpkg:
+
+| Library | CMake Target | Purpose | Required |
+|---|---|---|---|
+| **Qt6::Core** | `Qt6::Core` | Foundation (strings, containers, I/O, events) | Yes |
+| **Qt6::Widgets** | `Qt6::Widgets` | GUI toolkit | Yes |
+| **Qt6::Concurrent** | `Qt6::Concurrent` | Threading utilities | Yes |
+| **Qt6::Network** | `Qt6::Network` | HTTP, SSL, proxy | Yes |
+| **Qt6::NetworkAuth** | `Qt6::NetworkAuth` | OAuth2 (MSA authentication) | Yes |
+| **Qt6::Test** | `Qt6::Test` | Unit testing framework | Optional |
+| **Qt6::Xml** | `Qt6::Xml` | XML parsing | Yes |
+| **libarchive** | `LibArchive::LibArchive` | Archive extraction (zip, tar, 7z) | Yes |
+| **zlib** | `ZLIB::ZLIB` | Compression (used by libarchive, QuaZip) | Yes |
+| **ECM** | `extra-cmake-modules` | KDE CMake macros (install dirs, icons) | Yes |
+| **cmark** | `cmark` | CommonMark rendering (changelogs) | Yes |
+| **tomlplusplus** | `tomlplusplus::tomlplusplus` | TOML parsing (C++17) | Yes |
+| **libqrencode** | `qrencode` | QR code generation (MSA login) | Optional |
+| **QuaZip** | `QuaZip::QuaZip` | Qt-based ZIP file I/O | Yes |
+| **Sparkle** | `Sparkle.framework` | macOS auto-update framework | macOS only |
+
+### Qt6
+
+Minimum version: **Qt 6.7** (for full C++23 and NetworkAuth support).
+
+Required Qt modules:
+```cmake
+find_package(Qt6 6.7 REQUIRED COMPONENTS
+ Core
+ Widgets
+ Concurrent
+ Network
+ NetworkAuth
+ Xml
+)
+find_package(Qt6 6.7 COMPONENTS Test)
+```
+
+### libarchive
+
+Used for extracting:
+- Minecraft archives (`.jar`, `.zip`)
+- Modpack archives (`.mrpack`, `.zip`)
+- Java runtime archives (`.tar.gz`, `.zip`)
+
+### Extra CMake Modules (ECM)
+
+KDE's CMake module collection provides:
+- `KDEInstallDirs` — standardized install paths
+- `ecm_install_icons` — icon theme installation
+- `ECMQueryQt` — Qt path queries
+
+### cmark
+
+CommonMark rendering for:
+- Changelogs and release notes
+- CurseForge/Modrinth mod descriptions
+- News feed content
+
+The `cmark` source is included in the repository at `/cmark/` as a subproject.
+
+### tomlplusplus
+
+Modern C++ TOML parser used for:
+- Fabric/Quilt mod metadata (`fabric.mod.json` alternative format)
+- Configuration file parsing
+
+The `tomlplusplus` source is included in the repository at `/tomlplusplus/`.
+
+### QuaZip
+
+Qt wrapper around zlib/minizip for ZIP I/O:
+- Instance export (creating `.zip` files)
+- Instance import (reading `.zip` modpacks)
+- Mod file inspection
+
+## vcpkg Integration
+
+The build system supports vcpkg for dependency management:
+
+```cmake
+# CMakePresets.json
+{
+ "configurePresets": [
+ {
+ "name": "windows_msvc",
+ "toolchainFile": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
+ "cacheVariables": {
+ "VCPKG_TARGET_TRIPLET": "x64-windows"
+ }
+ }
+ ]
+}
+```
+
+### vcpkg.json
+
+```json
+{
+ "dependencies": [
+ "libarchive",
+ "zlib",
+ "quazip",
+ "cmark",
+ "tomlplusplus"
+ ]
+}
+```
+
+## Build vs Runtime Dependencies
+
+### Build-Only Dependencies
+
+| Dependency | Purpose |
+|---|---|
+| CMake 3.28+ | Build system generator |
+| Ninja | Build tool |
+| C++23 compiler | GCC 14+, Clang 18+, MSVC 17.10+ |
+| ECM | CMake macros |
+| Qt6 (all modules) | Framework headers and libs |
+
+### Runtime Dependencies
+
+| Dependency | Purpose |
+|---|---|
+| Qt6 shared libraries | Core framework runtime |
+| libarchive | Archive operations |
+| zlib | Compression |
+| OpenSSL / Schannel | TLS for network operations |
+| Java 8-21 | Minecraft runtime (user-managed) |
+
+### Optional Runtime Dependencies
+
+| Dependency | Purpose | Platform |
+|---|---|---|
+| libqrencode | QR code for MSA login | All (optional feature) |
+| Sparkle | Auto-updates | macOS |
+| xdg-utils | Open URLs/files | Linux |
+
+## Dependency Graph
+
+```
+meshmc (executable)
+├── Qt6::Core, Qt6::Widgets, Qt6::Concurrent, Qt6::Network, Qt6::NetworkAuth, Qt6::Xml
+├── LibArchive::LibArchive
+├── ZLIB::ZLIB
+├── QuaZip::QuaZip
+├── cmark
+├── tomlplusplus::tomlplusplus
+├── libraries/
+│ ├── ganalytics (→ Qt6::Core, Qt6::Network)
+│ ├── systeminfo (→ Qt6::Core)
+│ ├── hoedown (C library, no Qt dependency)
+│ ├── launcher (→ Qt6::Core)
+│ ├── javacheck (Java, no C++ deps)
+│ ├── xz-embedded (C library, no deps)
+│ ├── rainbow (→ Qt6::Core, Qt6::Widgets)
+│ ├── iconfix (→ Qt6::Core, Qt6::Widgets)
+│ ├── LocalPeer (→ Qt6::Core, Qt6::Network)
+│ ├── classparser (→ Qt6::Core)
+│ ├── tomlc99 (C library, no deps)
+│ ├── katabasis (→ Qt6::Core, Qt6::Network, Qt6::NetworkAuth)
+│ ├── libnbtplusplus (→ ZLIB::ZLIB)
+│ ├── qdcss (→ Qt6::Core)
+│ └── murmur2 (C library, no deps)
+└── optional: qrencode, Sparkle.framework
+```
diff --git a/docs/handbook/meshmc/instance-management.md b/docs/handbook/meshmc/instance-management.md
new file mode 100644
index 0000000000..801050fa10
--- /dev/null
+++ b/docs/handbook/meshmc/instance-management.md
@@ -0,0 +1,483 @@
+# Instance Management
+
+## Overview
+
+Instance management is central to MeshMC's design. An "instance" is a self-contained Minecraft environment with its own game version, mods, settings, saves, resource packs, and configuration. MeshMC stores multiple instances in parallel, allowing users to maintain entirely separate Minecraft setups.
+
+## Instance Storage Layout
+
+### Instance Root Directory
+
+All instances live under a single parent directory, configurable in settings (default: `instances/` within the MeshMC data directory). Each instance occupies its own subdirectory:
+
+```
+instances/
+├── MyVanilla1.20/
+│ ├── instance.cfg # Instance-level settings (INI format)
+│ ├── mmc-pack.json # Component list (PackProfile)
+│ ├── patches/ # Custom component overrides (JSON)
+│ ├── .minecraft/ # Game directory
+│ │ ├── mods/ # Loader mods
+│ │ ├── resourcepacks/ # Resource packs
+│ │ ├── shaderpacks/ # Shader packs
+│ │ ├── saves/ # World saves
+│ │ ├── config/ # Mod configuration
+│ │ ├── options.txt # Game options
+│ │ ├── screenshots/ # Screenshots
+│ │ └── logs/ # Game logs
+│ └── libraries/ # Instance-local libraries
+├── ForgeModded/
+│ ├── instance.cfg
+│ ├── mmc-pack.json
+│ ├── patches/
+│ └── .minecraft/
+└── ...
+```
+
+### Instance Configuration File (`instance.cfg`)
+
+Each instance has an `instance.cfg` file (INI format) managed by `INISettingsObject`. This stores per-instance metadata and setting overrides:
+
+```ini
+InstanceType=OneSix
+name=My Modded Instance
+iconKey=flame
+notes=Testing Forge 1.20.4 with performance mods
+lastLaunchTime=1712345678000
+totalTimePlayed=3600
+lastTimePlayed=1800
+JoinServerOnLaunch=false
+OverrideJavaPath=true
+JavaPath=/usr/lib/jvm/java-21/bin/java
+OverrideMemory=true
+MinMemAlloc=2048
+MaxMemAlloc=8192
+```
+
+## BaseInstance Class
+
+`BaseInstance` is the abstract base class for all instance types, defined in `launcher/BaseInstance.h`:
+
+```cpp
+class BaseInstance : public QObject,
+ public std::enable_shared_from_this<BaseInstance>
+{
+ Q_OBJECT
+protected:
+ BaseInstance(SettingsObjectPtr globalSettings,
+ SettingsObjectPtr settings,
+ const QString& rootDir);
+public:
+ enum class Status { Present, Gone };
+
+ virtual void saveNow() = 0;
+ void invalidate();
+
+ virtual QString id() const;
+ void setRunning(bool running);
+ bool isRunning() const;
+ int64_t totalTimePlayed() const;
+ int64_t lastTimePlayed() const;
+ void resetTimePlayed();
+
+ QString instanceType() const;
+ QString instanceRoot() const;
+ virtual QString gameRoot() const { return instanceRoot(); }
+ virtual QString modsRoot() const = 0;
+
+ QString name() const;
+ void setName(QString val);
+ QString windowTitle() const;
+ QString iconKey() const;
+ void setIconKey(QString val);
+ QString notes() const;
+ void setNotes(QString val);
+
+ QString getPreLaunchCommand();
+ QString getPostExitCommand();
+ QString getWrapperCommand();
+
+ virtual QSet<QString> traits() const = 0;
+ qint64 lastLaunch() const;
+ void setLastLaunch(qint64 val);
+
+ virtual SettingsObjectPtr settings() const;
+ virtual Task::Ptr createUpdateTask(Net::Mode mode) = 0;
+ virtual shared_qobject_ptr<LaunchTask>
+ createLaunchTask(AuthSessionPtr account,
+ MinecraftServerTargetPtr serverToJoin) = 0;
+ shared_qobject_ptr<LaunchTask> getLaunchTask();
+ virtual QProcessEnvironment createEnvironment() = 0;
+ virtual IPathMatcher::Ptr getLogFileMatcher() = 0;
+ virtual QString getLogFileRoot() = 0;
+};
+
+typedef std::shared_ptr<BaseInstance> InstancePtr;
+```
+
+Key characteristics:
+- Uses `std::enable_shared_from_this` for safe self-reference in callbacks
+- Instance ID is determined internally by MeshMC (typically the directory name)
+- Tracks play time (total and last session) in milliseconds
+- Supports custom pre-launch, post-exit, and wrapper commands
+- `traits()` returns feature flags from the version profile (e.g., `"XR:Initial"`, `"FirstThreadOnMacOS"`)
+
+## MinecraftInstance
+
+`MinecraftInstance` is the concrete implementation of `BaseInstance` for modern Minecraft versions, defined in `launcher/minecraft/MinecraftInstance.h`:
+
+```cpp
+class MinecraftInstance : public BaseInstance
+{
+ Q_OBJECT
+public:
+ MinecraftInstance(SettingsObjectPtr globalSettings,
+ SettingsObjectPtr settings,
+ const QString& rootDir);
+
+ // Directory accessors
+ QString jarModsDir() const;
+ QString resourcePacksDir() const;
+ QString texturePacksDir() const;
+ QString shaderPacksDir() const;
+ QString modsRoot() const override;
+ QString coreModsDir() const;
+ QString modsCacheLocation() const;
+ QString libDir() const;
+ QString worldDir() const;
+ QString resourcesDir() const;
+ QDir jarmodsPath() const;
+ QDir librariesPath() const;
+ QDir versionsPath() const;
+ QString instanceConfigFolder() const override;
+ QString gameRoot() const override;
+ QString binRoot() const;
+ QString getNativePath() const;
+ QString getLocalLibraryPath() const;
+
+ // Component system
+ std::shared_ptr<PackProfile> getPackProfile() const;
+
+ // Mod folder models
+ std::shared_ptr<ModFolderModel> loaderModList() const;
+ std::shared_ptr<ModFolderModel> coreModList() const;
+ std::shared_ptr<ModFolderModel> resourcePackList() const;
+ std::shared_ptr<ModFolderModel> texturePackList() const;
+ std::shared_ptr<ModFolderModel> shaderPackList() const;
+ std::shared_ptr<WorldList> worldList() const;
+ std::shared_ptr<GameOptions> gameOptionsModel() const;
+
+ // Launch
+ Task::Ptr createUpdateTask(Net::Mode mode) override;
+ shared_qobject_ptr<LaunchTask>
+ createLaunchTask(AuthSessionPtr account,
+ MinecraftServerTargetPtr serverToJoin) override;
+ QStringList javaArguments() const;
+ QStringList getClassPath() const;
+ QStringList getNativeJars() const;
+ QString getMainClass() const;
+ QStringList processMinecraftArgs(AuthSessionPtr account,
+ MinecraftServerTargetPtr serverToJoin) const;
+ JavaVersion getJavaVersion() const;
+};
+```
+
+`MinecraftInstance` provides:
+- All directory path resolution for game assets
+- Lazy-initialized folder models (`ModFolderModel`, `WorldList`, etc.)
+- Launch task construction with the full step pipeline
+- Java argument assembly including classpath, library paths, and game arguments
+- Version-specific behaviors via traits
+
+## InstanceList
+
+`InstanceList` manages the collection of all instances, defined in `launcher/InstanceList.h`:
+
+```cpp
+class InstanceList : public QAbstractListModel
+{
+ Q_OBJECT
+public:
+ explicit InstanceList(SettingsObjectPtr settings,
+ const QString& instDir, QObject* parent = 0);
+
+ // Model interface
+ QModelIndex index(int row, int column = 0,
+ const QModelIndex& parent = QModelIndex()) const override;
+ int rowCount(const QModelIndex& parent = QModelIndex()) const override;
+ QVariant data(const QModelIndex& index, int role) const override;
+ Qt::ItemFlags flags(const QModelIndex& index) const override;
+ bool setData(const QModelIndex& index, const QVariant& value,
+ int role) override;
+
+ enum AdditionalRoles {
+ GroupRole = Qt::UserRole,
+ InstancePointerRole = 0x34B1CB48,
+ InstanceIDRole = 0x34B1CB49
+ };
+
+ InstancePtr at(int i) const { return m_instances.at(i); }
+};
+```
+
+### Instance Discovery
+
+On startup, `InstanceList` scans the instances directory:
+1. Enumerates subdirectories in the instances folder
+2. For each directory, looks for `instance.cfg`
+3. Reads `InstanceType` from the config to determine the instance class
+4. Creates the appropriate instance object (`MinecraftInstance`)
+5. Adds it to the internal list and emits model change signals
+
+### Custom Model Roles
+
+| Role | Value | Returns |
+|---|---|---|
+| `GroupRole` | `Qt::UserRole` | Group name (QString) |
+| `InstancePointerRole` | `0x34B1CB48` | `InstancePtr` (shared pointer) |
+| `InstanceIDRole` | `0x34B1CB49` | Instance ID (QString) |
+
+## Instance Groups
+
+Instances can be organized into named groups. Group assignments are stored in `instgroups.json` in the data directory:
+
+```json
+{
+ "formatVersion": 1,
+ "groups": {
+ "Modded": {
+ "hidden": false,
+ "instances": [
+ "ForgeModded",
+ "FabricServer"
+ ]
+ },
+ "Vanilla": {
+ "hidden": false,
+ "instances": [
+ "MyVanilla1.20"
+ ]
+ }
+ }
+}
+```
+
+Group state tracking uses a tri-state enum:
+
+```cpp
+enum class GroupsState { NotLoaded, Steady, Dirty };
+```
+
+Groups are loaded lazily and saved when dirty. The `GroupRole` in the model provides group information for UI display.
+
+## Instance Creation
+
+### New Instance Creation
+
+New instances are created via `NewInstanceDialog`, which collects:
+- Instance name and icon
+- Minecraft version selection
+- Optional mod loader (Forge, Fabric, Quilt, NeoForge)
+- Optional modpack import
+
+The actual creation is handled by `InstanceCreationTask`:
+1. Creates the instance directory
+2. Writes initial `instance.cfg`
+3. Creates a `PackProfile` with the selected components
+4. Saves the component list to `mmc-pack.json`
+5. Runs `ComponentUpdateTask` to resolve dependencies and download metadata
+
+### Instance Import
+
+`InstanceImportTask` handles importing instances from external sources:
+
+```cpp
+class InstanceImportTask : public InstanceTask
+{
+ Q_OBJECT
+public:
+ explicit InstanceImportTask(const QUrl& url);
+protected:
+ virtual void executeTask() override;
+};
+```
+
+Supported import formats:
+- **ZIP archives** — exported MeshMC/MultiMC instance packages
+- **URLs** — downloads and extracts (supports CurseForge, Modrinth, and Technic pack URLs)
+- **Modpack manifests** — platform-specific manifests trigger specialized import logic
+
+### Instance Copying
+
+`InstanceCopyTask` clones an existing instance:
+
+```cpp
+class InstanceCopyTask : public InstanceTask
+{
+ Q_OBJECT
+public:
+ explicit InstanceCopyTask(InstancePtr origInstance,
+ bool copySaves, bool keepPlaytime);
+protected:
+ virtual void executeTask() override;
+private:
+ InstancePtr m_origInstance;
+ QFuture<bool> m_copyFuture;
+ QFutureWatcher<bool> m_copyFutureWatcher;
+ std::unique_ptr<IPathMatcher> m_matcher;
+ bool m_keepPlaytime;
+};
+```
+
+Key options:
+- `copySaves` — whether to include world save data
+- `keepPlaytime` — whether to copy playtime statistics
+- Uses `QtConcurrent` for background file copying with progress tracking
+- An `IPathMatcher` can exclude specific files/directories from the copy
+
+The copy dialog is `CopyInstanceDialog`:
+
+```cpp
+class CopyInstanceDialog : public QDialog
+// UI file: CopyInstanceDialog.ui
+```
+
+## Instance Lifecycle States
+
+An instance progresses through several states during its lifetime:
+
+```
+Created → Present → [Running] → Present → [Gone]
+```
+
+The `BaseInstance::Status` enum tracks the primary state:
+
+```cpp
+enum class Status {
+ Present, // Instance exists and is tracked
+ Gone // Instance was removed or invalidated
+};
+```
+
+Running state is tracked separately:
+
+```cpp
+void BaseInstance::setRunning(bool running);
+bool BaseInstance::isRunning() const;
+```
+
+When running:
+- Play time counters are updated
+- The instance icon shows a running indicator in the UI
+- Certain operations are disabled (delete, move, etc.)
+
+## Instance Invalidation
+
+```cpp
+void BaseInstance::invalidate();
+```
+
+An instance is invalidated when:
+- Its directory is externally deleted or moved
+- A `RecursiveFileSystemWatcher` detects the directory change
+- The `InstanceList` removes it from its model
+
+## Instance Settings Override System
+
+Each instance has its own `SettingsObject` that can override global settings:
+
+```cpp
+virtual SettingsObjectPtr settings() const;
+```
+
+The override mechanism uses `OverrideSetting`:
+- A **gate setting** (boolean) controls whether the override is active
+- When the gate is ON, the instance's local value is used
+- When the gate is OFF, the global value is passed through
+
+Common overridable settings:
+
+| Setting | Gate Setting | Purpose |
+|---|---|---|
+| `JavaPath` | `OverrideJavaPath` | Java binary path |
+| `MinMemAlloc` | `OverrideMemory` | Minimum memory (MB) |
+| `MaxMemAlloc` | `OverrideMemory` | Maximum memory (MB) |
+| `JvmArgs` | `OverrideJavaArgs` | Additional JVM arguments |
+| `MCLaunchMethod` | `OverrideMCLaunchMethod` | Launch method |
+| `PreLaunchCommand` | `OverrideCommands` | Pre-launch command |
+| `PostExitCommand` | `OverrideCommands` | Post-exit command |
+| `WrapperCommand` | `OverrideCommands` | Wrapper command |
+| `WindowWidth` | `OverrideWindow` | Window width |
+| `WindowHeight` | `OverrideWindow` | Window height |
+| `MaximizeWindow` | `OverrideWindow` | Start maximized |
+
+The `InstanceSettingsPage` UI provides checkboxes for each gate setting, enabling or disabling the corresponding override section.
+
+## Instance UI Integration
+
+### Instance View
+
+The main window displays instances in a custom view (`InstanceView` in `ui/instanceview/`):
+- Grid or list layout
+- Group headers with collapse/expand
+- Drag and drop between groups
+- Context menu for instance operations
+- Icon display with status overlay (running indicator)
+
+### Instance Pages
+
+When editing an instance, a `PageDialog` opens with these pages:
+
+| Page | File | Purpose |
+|---|---|---|
+| `VersionPage` | `ui/pages/instance/VersionPage.{h,cpp}` | Component management |
+| `ModFolderPage` | `ui/pages/instance/ModFolderPage.{h,cpp}` | Mod list |
+| `ResourcePackPage` | `ui/pages/instance/ResourcePackPage.h` | Resource packs |
+| `TexturePackPage` | `ui/pages/instance/TexturePackPage.h` | Texture packs |
+| `ShaderPackPage` | `ui/pages/instance/ShaderPackPage.h` | Shader packs |
+| `NotesPage` | `ui/pages/instance/NotesPage.{h,cpp}` | Instance notes |
+| `LogPage` | `ui/pages/instance/LogPage.{h,cpp}` | Game log viewer |
+| `ScreenshotsPage` | `ui/pages/instance/ScreenshotsPage.{h,cpp}` | Screenshots |
+| `WorldListPage` | `ui/pages/instance/WorldListPage.{h,cpp}` | World management |
+| `GameOptionsPage` | `ui/pages/instance/GameOptionsPage.{h,cpp}` | Game options editor |
+| `ServersPage` | `ui/pages/instance/ServersPage.{h,cpp}` | Server list editor |
+| `InstanceSettingsPage` | `ui/pages/instance/InstanceSettingsPage.{h,cpp}` | Settings overrides |
+| `OtherLogsPage` | `ui/pages/instance/OtherLogsPage.{h,cpp}` | Additional log files |
+
+### Instance Window
+
+`InstanceWindow` provides a dedicated window for a running instance:
+
+```cpp
+class InstanceWindow : public QMainWindow
+{
+ Q_OBJECT
+};
+```
+
+It displays:
+- Real-time game log output via `LogModel`
+- Launch/kill controls
+- Instance page navigation
+
+## Instance Export
+
+`ExportInstanceDialog` allows exporting an instance to a ZIP archive:
+- Select which files/directories to include
+- Exclude sensitive data, caches, and temporary files
+- The exported archive can be imported by MeshMC on another machine
+
+## Play Time Tracking
+
+Each instance tracks cumulative play time:
+
+```cpp
+int64_t BaseInstance::totalTimePlayed() const;
+int64_t BaseInstance::lastTimePlayed() const;
+void BaseInstance::resetTimePlayed();
+```
+
+- `totalTimePlayed` — cumulative milliseconds across all sessions
+- `lastTimePlayed` — duration of the most recent session
+- Time is recorded when `setRunning(false)` is called after a session
+- Stored in `instance.cfg` as `totalTimePlayed` and `lastTimePlayed`
diff --git a/docs/handbook/meshmc/java-detection.md b/docs/handbook/meshmc/java-detection.md
new file mode 100644
index 0000000000..00f6ed6805
--- /dev/null
+++ b/docs/handbook/meshmc/java-detection.md
@@ -0,0 +1,411 @@
+# Java Detection
+
+## Overview
+
+MeshMC requires a compatible Java installation to launch Minecraft. The Java detection system automatically discovers Java installations across all supported platforms, validates their architecture and version, and manages bundled Java downloads.
+
+## Architecture
+
+### Key Classes
+
+| Class | File | Purpose |
+|---|---|---|
+| `JavaUtils` | `java/JavaUtils.{h,cpp}` | Platform-specific Java discovery |
+| `JavaChecker` | `java/JavaChecker.{h,cpp}` | Java installation validator |
+| `JavaCheckerJob` | `java/JavaCheckerJob.{h,cpp}` | Batch validation task |
+| `JavaInstall` | `java/JavaInstall.{h,cpp}` | Java installation descriptor |
+| `JavaInstallList` | `java/JavaInstallList.{h,cpp}` | Discovered Java list model |
+| `JavaVersion` | `java/JavaVersion.{h,cpp}` | Version string parser |
+
+## JavaUtils
+
+Platform-specific Java discovery:
+
+```cpp
+class JavaUtils : public QObject
+{
+ Q_OBJECT
+public:
+ JavaUtils();
+
+ // Discovery
+ QList<QString> FindJavaPaths();
+ static QString GetDefaultJava();
+ QList<JavaInstallPtr> FindJavaFromRegistryKey(
+ DWORD keyType, QString keyName, QString keyJavaDir, QString subkeySuffix = ""
+ ); // Windows only
+
+private:
+ // Platform-specific search paths
+ QStringList platformSearchPaths();
+};
+```
+
+### Platform Search Paths
+
+#### Linux
+
+```cpp
+QStringList JavaUtils::platformSearchPaths()
+{
+ return {
+ "/usr/lib/jvm", // Distro packages
+ "/usr/lib64/jvm", // 64-bit distros
+ "/usr/lib32/jvm", // 32-bit compat
+ "/opt/java", // Manual installs
+ "/opt/jdk", // Manual JDK installs
+ QDir::homePath() + "/.sdkman/candidates/java", // SDKMAN
+ QDir::homePath() + "/.jdks", // IntelliJ downloads
+ "/snap/openjdk", // Snap packages
+ "/usr/lib/jvm/java-*/jre", // JRE subdirs
+ };
+}
+```
+
+Scans each directory recursively for `bin/java` executables.
+
+#### macOS
+
+```cpp
+QStringList JavaUtils::platformSearchPaths()
+{
+ return {
+ "/Library/Java/JavaVirtualMachines", // System JDKs
+ "/System/Library/Java/JavaVirtualMachines", // Apple JDKs
+ "/usr/local/opt/openjdk", // Homebrew
+ "/opt/homebrew/opt/openjdk", // Homebrew (Apple Silicon)
+ QDir::homePath() + "/Library/Java/JavaVirtualMachines", // User JDKs
+ QDir::homePath() + "/.sdkman/candidates/java",
+ QDir::homePath() + "/.jdks",
+ };
+}
+```
+
+Also runs `/usr/libexec/java_home -V` to discover system-registered JDKs.
+
+#### Windows
+
+```cpp
+QStringList JavaUtils::platformSearchPaths()
+{
+ QStringList paths;
+ // Registry-based discovery
+ FindJavaFromRegistryKey(KEY_WOW64_64KEY,
+ "SOFTWARE\\JavaSoft\\Java Runtime Environment", "JavaHome");
+ FindJavaFromRegistryKey(KEY_WOW64_64KEY,
+ "SOFTWARE\\JavaSoft\\Java Development Kit", "JavaHome");
+ FindJavaFromRegistryKey(KEY_WOW64_64KEY,
+ "SOFTWARE\\JavaSoft\\JDK", "JavaHome");
+ FindJavaFromRegistryKey(KEY_WOW64_64KEY,
+ "SOFTWARE\\Eclipse Adoptium\\JDK", "Path", "\\hotspot\\MSI");
+ FindJavaFromRegistryKey(KEY_WOW64_64KEY,
+ "SOFTWARE\\Microsoft\\JDK", "Path", "\\hotspot\\MSI");
+
+ // Filesystem paths
+ paths << "C:/Program Files/Java"
+ << "C:/Program Files (x86)/Java"
+ << "C:/Program Files/Eclipse Adoptium"
+ << "C:/Program Files/Microsoft";
+
+ return paths;
+}
+```
+
+## JavaChecker
+
+Validates a Java installation by spawning a subprocess:
+
+```cpp
+class JavaChecker : public QObject
+{
+ Q_OBJECT
+public:
+ void performCheck();
+
+ // Input
+ QString m_path; // Path to java binary
+ int m_minMem = 0; // Minimum memory to test
+ int m_maxMem = 0; // Maximum memory to test
+ int m_permGen = 0; // PermGen size to test
+
+ // Results
+ struct Result {
+ QString path;
+ QString javaVersion;
+ QString realArch; // "amd64", "aarch64", etc.
+ bool valid = false;
+ bool is_64bit = false;
+ int id = 0;
+ QString errorLog;
+ QString outLog;
+ };
+
+signals:
+ void checkFinished(JavaChecker::Result result);
+
+private:
+ QProcess* m_process = nullptr;
+};
+```
+
+### Check Process
+
+`JavaChecker` spawns the Java binary with a small Java program (`javacheck.jar`):
+
+```
+java -jar javacheck.jar
+```
+
+The `javacheck` program (in `libraries/javacheck/`) prints system properties:
+
+```java
+// javacheck/src/main/java/org/projecttick/meshmc/JavaCheck.java
+public class JavaCheck {
+ public static void main(String[] args) {
+ System.out.println("os.arch=" + System.getProperty("os.arch"));
+ System.out.println("java.version=" + System.getProperty("java.version"));
+ System.out.println("java.vendor=" + System.getProperty("java.vendor"));
+ System.out.println("sun.arch.data.model=" + System.getProperty("sun.arch.data.model"));
+ System.out.println("java.runtime.name=" + System.getProperty("java.runtime.name"));
+ }
+}
+```
+
+Output is parsed to populate the `Result` struct.
+
+### Timeout
+
+The check process has a timeout (typically 30 seconds). If Java hangs or takes too long, the check is marked as failed.
+
+## JavaCheckerJob
+
+Batch job for checking multiple Java installations:
+
+```cpp
+class JavaCheckerJob : public Task
+{
+ Q_OBJECT
+public:
+ explicit JavaCheckerJob(QString job_name);
+
+ void addJavaCheckerAction(JavaCheckerPtr base);
+
+signals:
+ void checkFinished(JavaChecker::Result result);
+
+protected:
+ void executeTask() override;
+
+private slots:
+ void partFinished(JavaChecker::Result result);
+
+private:
+ QList<JavaCheckerPtr> m_checks;
+ int m_done = 0;
+};
+```
+
+Used by `JavaInstallList` to validate all discovered Java paths in parallel.
+
+## JavaInstall
+
+Descriptor for a single Java installation:
+
+```cpp
+class JavaInstall
+{
+public:
+ using Ptr = std::shared_ptr<JavaInstall>;
+
+ QString id; // Unique identifier
+ QString path; // Path to java binary
+ JavaVersion version; // Parsed version
+ QString arch; // Architecture (amd64, aarch64)
+ bool is_64bit; // 64-bit flag
+ bool recommended; // Whether this is the recommended choice
+};
+```
+
+## JavaInstallList
+
+Model for the discovered Java installations:
+
+```cpp
+class JavaInstallList : public BaseVersionList
+{
+ Q_OBJECT
+public:
+ void load(); // Triggers discovery + validation
+ void updateListData(QList<BaseVersion::Ptr> versions) override;
+
+ // Filtering
+ BaseVersion::Ptr getRecommended();
+
+protected:
+ void loadList();
+ void sortVersionList();
+
+ QList<BaseVersion::Ptr> m_vlist;
+ bool loaded = false;
+};
+```
+
+### Discovery Flow
+
+```
+JavaInstallList::load()
+ │
+ ├── JavaUtils::FindJavaPaths()
+ │ └── Returns list of java binary paths
+ │
+ ├── Create JavaCheckerJob
+ │ └── Add JavaChecker for each path
+ │
+ ├── Run JavaCheckerJob
+ │ ├── Spawn each java with javacheck.jar (parallel)
+ │ └── Parse output → JavaChecker::Result
+ │
+ ├── Filter valid results
+ │ └── Discard paths where valid == false
+ │
+ └── Create JavaInstall entries
+ └── Store in m_vlist, emit signal
+```
+
+## JavaVersion
+
+Version string parsing and comparison:
+
+```cpp
+class JavaVersion
+{
+public:
+ JavaVersion() {}
+ JavaVersion(const QString& rhs);
+
+ bool operator<(const JavaVersion& rhs) const;
+ bool operator>(const JavaVersion& rhs) const;
+ bool operator==(const JavaVersion& rhs) const;
+ bool requiresPermGen() const;
+
+ int major() const { return m_major; }
+ int minor() const { return m_minor; }
+ int security() const { return m_security; }
+ QString toString() const;
+
+private:
+ int m_major = 0;
+ int m_minor = 0;
+ int m_security = 0;
+ QString m_prerelease;
+ bool m_parseable = false;
+};
+```
+
+### Version String Formats
+
+Handles both old and new Java version schemes:
+
+| Format | Example | Major |
+|---|---|---|
+| Old (1.x.y) | `1.8.0_312` | 8 |
+| New (x.y.z) | `17.0.2` | 17 |
+| New (x.y.z+b) | `21.0.1+12` | 21 |
+| EA builds | `22-ea` | 22 |
+
+### PermGen Detection
+
+```cpp
+bool JavaVersion::requiresPermGen() const
+{
+ return m_major < 8; // PermGen removed in Java 8
+}
+```
+
+Used to conditionally add `-XX:PermSize` and `-XX:MaxPermSize` JVM arguments for Java 7 and below.
+
+## Java Compatibility
+
+### Version Requirements
+
+| Minecraft Version | Minimum Java | Recommended |
+|---|---|---|
+| 1.16.5 and below | Java 8 | Java 8 |
+| 1.17 - 1.17.1 | Java 16 | Java 16 |
+| 1.18 - 1.20.4 | Java 17 | Java 17 |
+| 1.20.5+ | Java 21 | Java 21 |
+
+`ComponentUpdateTask` determines the required Java version from the Minecraft version metadata and validates the configured Java installation.
+
+### Compatibility Warnings
+
+When launching with an incompatible Java version:
+
+```cpp
+// LaunchController checks Java compatibility
+if (settings->get("IgnoreJavaCompatibility").toBool()) {
+ // Skip check, launch anyway
+} else {
+ // Show warning dialog
+ // "Minecraft X.Y requires Java Z, but Java W is configured"
+ // Options: Continue anyway / Change Java / Cancel
+}
+```
+
+## Managed Java Downloads
+
+The `java/download/` subdirectory handles automatic Java downloads:
+
+### ArchiveDownloadTask
+
+Downloads and extracts Java archives:
+- Platform-appropriate archives (tar.gz for Linux/macOS, zip for Windows)
+- Progress tracking during download and extraction
+- Installs to `<data_dir>/java/<version>/`
+
+### ManifestDownloadTask
+
+Fetches Java availability manifest:
+- Queries Mojang's runtime manifest for available Java versions
+- Selects appropriate version for the operating system and architecture
+
+### Java Auto-Setup
+
+In the setup wizard, if no compatible Java is found:
+
+```
+JavaPage (wizard)
+ │
+ ├── Display "No compatible Java found"
+ ├── Offer to download recommended version
+ │
+ └── On accept:
+ ├── Fetch runtime manifest
+ ├── Select appropriate Java version
+ ├── Download archive
+ ├── Extract to data_dir/java/
+ └── Set JavaPath setting to extracted binary
+```
+
+## JavaPage Settings UI
+
+The Java settings page (`ui/pages/global/JavaPage.h`) provides:
+
+| Control | Description |
+|---|---|
+| Java Path | Text field + Browse button for java binary |
+| Auto-detect | Scans system and lists all found Java installations in a dialog |
+| Test | Validates the current Java path using `JavaChecker` |
+| Min Memory | Spinbox for minimum heap allocation (MB) |
+| Max Memory | Spinbox for maximum heap allocation (MB) |
+| PermGen | Spinbox for PermGen (only shown for Java < 8) |
+| JVM Arguments | Text field for additional JVM flags |
+| Ignore Compatibility | Checkbox to skip version compatibility checks |
+
+### Auto-Detect Dialog
+
+When "Auto-detect" is clicked:
+1. `JavaInstallList::load()` runs full discovery
+2. Results shown in a table: Path, Version, Architecture, 64-bit
+3. User selects one → fills Java Path field
+4. Recommended installation is highlighted
diff --git a/docs/handbook/meshmc/launch-system.md b/docs/handbook/meshmc/launch-system.md
new file mode 100644
index 0000000000..a55beca71a
--- /dev/null
+++ b/docs/handbook/meshmc/launch-system.md
@@ -0,0 +1,569 @@
+# Launch System
+
+## Overview
+
+MeshMC's launch system orchestrates the process of starting a Minecraft game instance. It involves account authentication, component resolution, file preparation, JVM argument assembly, and process management. The launch system is built around a step-based pipeline that allows individual phases to be executed, paused, and resumed.
+
+## Launch Architecture
+
+### Key Classes
+
+| Class | File | Purpose |
+|---|---|---|
+| `LaunchController` | `launcher/LaunchController.{h,cpp}` | High-level orchestrator, UI interaction |
+| `LaunchTask` | `launcher/launch/LaunchTask.{h,cpp}` | Step pipeline executor |
+| `LaunchStep` | `launcher/launch/LaunchStep.{h,cpp}` | Abstract step interface |
+| `DirectJavaLaunch` | `minecraft/launch/DirectJavaLaunch.{h,cpp}` | JVM process spawner |
+| `MeshMCPartLaunch` | `minecraft/launch/MeshMCPartLaunch.{h,cpp}` | Java-wrapped launcher |
+| `LoggedProcess` | `launcher/LoggedProcess.{h,cpp}` | QProcess wrapper with logging |
+| `LogModel` | `launcher/launch/LogModel.{h,cpp}` | Game log data model |
+
+## LaunchController
+
+`LaunchController` extends `Task` and handles the user-facing launch flow:
+
+```cpp
+class LaunchController : public Task
+{
+ Q_OBJECT
+public:
+ void executeTask() override;
+ void setInstance(InstancePtr instance);
+ void setOnline(bool online);
+ void setProfiler(BaseProfilerFactory* profiler);
+ void setParentWidget(QWidget* widget);
+ void setServerToJoin(MinecraftServerTargetPtr serverToJoin);
+ void setAccountToUse(MinecraftAccountPtr accountToUse);
+ bool abort() override;
+
+private:
+ void login();
+ void launchInstance();
+ void decideAccount();
+
+private slots:
+ void readyForLaunch();
+ void onSucceeded();
+ void onFailed(QString reason);
+ void onProgressRequested(Task* task);
+
+private:
+ BaseProfilerFactory* m_profiler = nullptr;
+ bool m_online = true;
+ InstancePtr m_instance;
+ QWidget* m_parentWidget = nullptr;
+ InstanceWindow* m_console = nullptr;
+ MinecraftAccountPtr m_accountToUse = nullptr;
+ MinecraftServerTargetPtr m_serverToJoin;
+};
+```
+
+### Launch Flow
+
+```
+LaunchController::executeTask()
+ │
+ ├── decideAccount()
+ │ ├── If m_accountToUse is set → use it
+ │ ├── Else → get default account from AccountList
+ │ └── If no account → prompt user with ProfileSelectDialog
+ │
+ ├── login()
+ │ ├── If online → authenticate via MSASilent (token refresh)
+ │ │ ├── On success → launchInstance()
+ │ │ └── On failure → fall back to MSAInteractive (browser login)
+ │ └── If offline → create offline AuthSession → launchInstance()
+ │
+ └── launchInstance()
+ ├── instance->createUpdateTask(mode) → update game files if needed
+ ├── instance->createLaunchTask(session, server) → build step pipeline
+ ├── Connect LaunchTask signals to controller slots
+ └── LaunchTask::start()
+```
+
+### Account Decision (`decideAccount()`)
+
+The account selection priority:
+1. Explicitly provided `m_accountToUse` (from command line or UI)
+2. Default account from `AccountList::defaultAccount()`
+3. User prompt via `ProfileSelectDialog` if no default is set
+
+### Authentication (`login()`)
+
+For online mode:
+1. Attempt `MSASilent` (token refresh using existing tokens)
+2. If refresh fails, prompt for `MSAInteractive` (browser-based OAuth2 login)
+3. On success, an `AuthSession` is created containing:
+ - Access token
+ - Player UUID
+ - Player name
+ - User type
+
+For offline mode:
+- An `AuthSession` is created with a dummy token
+- The player name comes from the account profile
+
+## LaunchTask
+
+`LaunchTask` is the central pipeline executor:
+
+```cpp
+class LaunchTask : public Task
+{
+ Q_OBJECT
+public:
+ enum State { NotStarted, Running, Waiting, Failed, Aborted, Finished };
+
+ static shared_qobject_ptr<LaunchTask> create(InstancePtr inst);
+
+ void appendStep(shared_qobject_ptr<LaunchStep> step);
+ void prependStep(shared_qobject_ptr<LaunchStep> step);
+ void setCensorFilter(QMap<QString, QString> filter);
+
+ InstancePtr instance();
+ void setPid(qint64 pid);
+ qint64 pid();
+
+ void executeTask() override;
+ void proceed();
+ bool abort() override;
+ bool canAbort() const override;
+
+ shared_qobject_ptr<LogModel> getLogModel();
+
+signals:
+ void log(QString text, MessageLevel::Enum level);
+ void readyForLaunch();
+ void requestProgress(Task* task);
+ void requestLogging();
+};
+```
+
+### Step Execution Model
+
+Steps are executed sequentially:
+
+```
+Step 0: executeTask() → completes → advance
+Step 1: executeTask() → completes → advance
+Step 2: executeTask() → emits readyForLaunch() → WAIT
+ User clicks "Launch" in console → proceed()
+Step 3: executeTask() → starts JVM process → RUNNING
+ Process exits → completes → advance
+Step N: finalize() called in reverse order
+```
+
+Each step can:
+- **Complete immediately** — call `emitSucceeded()` to advance
+- **Pause** — emit `readyForLaunch()` to wait for user interaction
+- **Fail** — call `emitFailed()` to abort the pipeline
+- **Run persistently** — stay active until an external event (process exit)
+
+### Censor Filter
+
+The `setCensorFilter()` method installs a string replacement map that redacts sensitive information from logs:
+
+```cpp
+void LaunchTask::setCensorFilter(QMap<QString, QString> filter);
+```
+
+`MinecraftInstance` populates this with:
+- Access token → `<ACCESS TOKEN>`
+- Client token → `<CLIENT TOKEN>`
+- Player UUID → `<PROFILE ID>`
+
+## LaunchStep
+
+Abstract base class for individual launch steps:
+
+```cpp
+class LaunchStep : public Task
+{
+ Q_OBJECT
+public:
+ explicit LaunchStep(LaunchTask* parent)
+ : Task(nullptr), m_parent(parent) { bind(parent); }
+
+signals:
+ void logLines(QStringList lines, MessageLevel::Enum level);
+ void logLine(QString line, MessageLevel::Enum level);
+ void readyForLaunch();
+ void progressReportingRequest();
+
+public slots:
+ virtual void proceed() {}
+ virtual void finalize() {}
+
+protected:
+ LaunchTask* m_parent;
+};
+```
+
+## Minecraft Launch Steps
+
+### Step Pipeline Construction
+
+`MinecraftInstance::createLaunchTask()` builds the step pipeline for a Minecraft launch:
+
+```cpp
+shared_qobject_ptr<LaunchTask>
+MinecraftInstance::createLaunchTask(AuthSessionPtr session,
+ MinecraftServerTargetPtr serverToJoin)
+{
+ auto task = LaunchTask::create(std::dynamic_pointer_cast<MinecraftInstance>(
+ shared_from_this()));
+
+ // Step order:
+ task->appendStep(make_shared<VerifyJavaInstall>(task.get()));
+ task->appendStep(make_shared<CreateGameFolders>(task.get()));
+ task->appendStep(make_shared<ScanModFolders>(task.get()));
+ task->appendStep(make_shared<ExtractNatives>(task.get()));
+ task->appendStep(make_shared<ModMinecraftJar>(task.get()));
+ task->appendStep(make_shared<ReconstructAssets>(task.get()));
+ task->appendStep(make_shared<ClaimAccount>(task.get()));
+ task->appendStep(make_shared<PrintInstanceInfo>(task.get()));
+
+ // Choose launch method
+ auto method = launchMethod();
+ if (method == "LauncherPart") {
+ auto step = make_shared<MeshMCPartLaunch>(task.get());
+ step->setAuthSession(session);
+ step->setServerToJoin(serverToJoin);
+ task->appendStep(step);
+ } else {
+ auto step = make_shared<DirectJavaLaunch>(task.get());
+ step->setAuthSession(session);
+ step->setServerToJoin(serverToJoin);
+ task->appendStep(step);
+ }
+
+ // Set up censor filter
+ task->setCensorFilter(createCensorFilterFromSession(session));
+
+ return task;
+}
+```
+
+### VerifyJavaInstall (`minecraft/launch/VerifyJavaInstall.h`)
+
+Validates that the configured Java installation exists and is compatible:
+- Checks that the Java binary exists at the configured path
+- Verifies Java version meets minimum requirements
+- Fails with descriptive error if Java is missing or incompatible
+
+### CreateGameFolders (`minecraft/launch/CreateGameFolders.h`)
+
+Ensures required directory structure exists:
+- `.minecraft/` game directory
+- `mods/`, `resourcepacks/`, `saves/` subdirectories
+- `libraries/` for instance-local libraries
+- `natives/` for platform-specific native libraries
+
+### ScanModFolders (`minecraft/launch/ScanModFolders.h`)
+
+Scans mod directories and updates the mod list:
+- Enumerates `.minecraft/mods/` for loader mods
+- Enumerates `.minecraft/coremods/` for core mods (Forge legacy)
+- Updates the instance's mod models
+
+### ExtractNatives (`minecraft/launch/ExtractNatives.h`)
+
+Extracts platform-specific native libraries from JAR files:
+- Iterates through native libraries in the `LaunchProfile`
+- Extracts `.so` (Linux), `.dll` (Windows), or `.dylib` (macOS) files
+- Places them in the `natives/` directory within the instance
+
+### ModMinecraftJar (`minecraft/launch/ModMinecraftJar.h`)
+
+Applies jar mods to the Minecraft game JAR:
+- If jar mods are present in the component list, creates a modified JAR
+- Overlays jar mod contents onto the vanilla JAR
+- Stores the modified JAR for use by the launcher
+
+### ReconstructAssets (`minecraft/launch/ReconstructAssets.h`)
+
+Handles legacy asset management:
+- For older Minecraft versions that use the "legacy" asset system
+- Copies assets from the shared cache to the instance's `resources/` directory
+- Modern versions use the asset index system and skip this step
+
+### ClaimAccount (`minecraft/launch/ClaimAccount.h`)
+
+Marks the account as in-use for this launch session:
+- Prevents the same account from being used in concurrent launches
+- Releases the claim when the game exits
+
+### PrintInstanceInfo (`minecraft/launch/PrintInstanceInfo.h`)
+
+Logs debug information to the console:
+- Instance name and ID
+- Minecraft version
+- Java path and version
+- JVM arguments
+- Classpath
+- Native library path
+- Working directory
+
+### DirectJavaLaunch (`minecraft/launch/DirectJavaLaunch.h`)
+
+The primary launch step that spawns the JVM process:
+
+```cpp
+class DirectJavaLaunch : public LaunchStep
+{
+ Q_OBJECT
+public:
+ explicit DirectJavaLaunch(LaunchTask* parent);
+
+ virtual void executeTask();
+ virtual bool abort();
+ virtual void proceed();
+ virtual bool canAbort() const { return true; }
+
+ void setWorkingDirectory(const QString& wd);
+ void setAuthSession(AuthSessionPtr session);
+ void setServerToJoin(MinecraftServerTargetPtr serverToJoin);
+
+private slots:
+ void on_state(LoggedProcess::State state);
+
+private:
+ LoggedProcess m_process;
+ QString m_command;
+ AuthSessionPtr m_session;
+ MinecraftServerTargetPtr m_serverToJoin;
+};
+```
+
+This step:
+1. Assembles the full Java command line
+2. Sets the working directory to the game root
+3. Configures environment variables
+4. Spawns the process via `LoggedProcess`
+5. Connects to `on_state()` for process lifecycle events
+6. Emits `readyForLaunch()` — the pipeline pauses until the user confirms
+7. On `proceed()`, the process starts
+8. Monitors the process until exit
+
+### MeshMCPartLaunch (`minecraft/launch/MeshMCPartLaunch.h`)
+
+Alternative launch method using MeshMC's Java-side launcher component:
+- Writes launch parameters to a temporary file
+- Starts the Java-side launcher (`libraries/launcher/`) which reads the parameters
+- The Java launcher handles classpath assembly and game startup
+- Provides additional launch customization capabilities
+
+## JVM Argument Assembly
+
+`MinecraftInstance` assembles JVM arguments through several methods:
+
+### `javaArguments()`
+
+Returns the complete list of JVM arguments:
+
+```cpp
+QStringList MinecraftInstance::javaArguments() const;
+```
+
+Components:
+1. **Memory settings** — `-Xms<min>m -Xmx<max>m`
+2. **Permission size** — `-XX:PermSize=<size>` (for older JVMs)
+3. **Custom JVM args** — user-specified in instance/global settings
+4. **System properties** — `-D` flags for various launchwrapper parameters
+
+### `getClassPath()`
+
+Builds the Java classpath:
+
+```cpp
+QStringList MinecraftInstance::getClassPath() const;
+```
+
+Sources:
+1. Libraries from the resolved `LaunchProfile`
+2. Maven files
+3. Main game JAR (possibly modified by jar mods)
+4. Instance-local libraries
+
+### `getMainClass()`
+
+Returns the Java main class:
+
+```cpp
+QString MinecraftInstance::getMainClass() const;
+```
+
+This comes from the resolved `LaunchProfile`, which may be:
+- `net.minecraft.client.main.Main` — vanilla
+- `net.minecraftforge.fml.launching.FMLClientLaunchProvider` — Forge
+- `net.fabricmc.loader.impl.launch.knot.KnotClient` — Fabric
+- `org.quiltmc.loader.impl.launch.knot.KnotClient` — Quilt
+
+### `processMinecraftArgs()`
+
+Template-expands game arguments:
+
+```cpp
+QStringList MinecraftInstance::processMinecraftArgs(
+ AuthSessionPtr account,
+ MinecraftServerTargetPtr serverToJoin) const;
+```
+
+Template variables replaced:
+| Variable | Replacement |
+|---|---|
+| `${auth_player_name}` | Player name |
+| `${auth_session}` | Session token |
+| `${auth_uuid}` | Player UUID |
+| `${auth_access_token}` | Access token |
+| `${version_name}` | Minecraft version |
+| `${game_directory}` | Game root path |
+| `${assets_root}` | Assets directory path |
+| `${assets_index_name}` | Asset index ID |
+| `${user_type}` | Account type |
+| `${version_type}` | Version type (release/snapshot) |
+
+Additional server join arguments:
+- `--server <address>` and `--port <port>` (if `serverToJoin` is set)
+
+## LoggedProcess
+
+`LoggedProcess` wraps `QProcess` with structured logging:
+
+```cpp
+class LoggedProcess : public QProcess
+{
+ Q_OBJECT
+public:
+ enum State {
+ NotRunning,
+ Starting,
+ FailedToStart,
+ Running,
+ Finished,
+ Crashed,
+ Aborted
+ };
+
+ explicit LoggedProcess(QObject* parent = 0);
+
+ State state() const;
+ int exitCode() const;
+ qint64 processId() const;
+ void setDetachable(bool detachable);
+
+signals:
+ void log(QStringList lines, MessageLevel::Enum level);
+ void stateChanged(LoggedProcess::State state);
+
+public slots:
+ void kill();
+
+private slots:
+ void on_stdErr();
+ void on_stdOut();
+ void on_exit(int exit_code, QProcess::ExitStatus status);
+ void on_error(QProcess::ProcessError error);
+ void on_stateChange(QProcess::ProcessState);
+};
+```
+
+Features:
+- Captures `stdout` and `stderr` separately
+- Splits output into lines
+- Emits structured log events with message levels
+- Handles line buffering for partial reads (`m_err_leftover`, `m_out_leftover`)
+- Tracks process state transitions
+- Supports detachable processes
+
+## LogModel
+
+`LogModel` stores game log output for display:
+
+```cpp
+class LogModel : public QAbstractListModel
+{
+ Q_OBJECT
+};
+```
+
+Used by `LogPage` and `InstanceWindow` to display real-time game output with color coding based on `MessageLevel::Enum`:
+
+| Level | Color | Examples |
+|---|---|---|
+| `Unknown` | Default | Unclassified output |
+| `StdOut` | Default | Normal game output |
+| `StdErr` | Red | Error output |
+| `Info` | Default | Informational messages |
+| `Warning` | Yellow | Warning messages |
+| `Error` | Red | Error messages |
+| `Fatal` | Dark Red | Fatal/crash messages |
+| `MeshMC` | Blue | Launcher messages |
+
+## Server Join
+
+`MinecraftServerTarget` carries server join information:
+
+```cpp
+struct MinecraftServerTarget {
+ QString address;
+ quint16 port;
+
+ static MinecraftServerTargetPtr parse(const QString& fullAddress);
+};
+```
+
+When `--server` is provided on the command line or configured in instance settings:
+- The server address/port is parsed
+- Passed to `LaunchController::setServerToJoin()`
+- Forwarded to `DirectJavaLaunch::setServerToJoin()`
+- Appended as `--server` and `--port` to Minecraft's arguments
+
+## Profiler Integration
+
+Profiler tools hook into the launch pipeline:
+
+```cpp
+class BaseProfilerFactory {
+public:
+ virtual BaseProfiler* createProfiler(InstancePtr instance, QObject* parent) = 0;
+ virtual bool check(QString* error) = 0;
+ virtual QString name() const = 0;
+};
+```
+
+When a profiler is selected:
+1. `LaunchController` creates the profiler via the factory
+2. The profiler adds its own steps or arguments to the launch
+3. JProfiler: opens profiling session via JProfiler's agent
+4. JVisualVM: launches alongside the game process
+
+## Process Environment
+
+`MinecraftInstance::createEnvironment()` constructs the `QProcessEnvironment` for the game:
+
+```cpp
+QProcessEnvironment MinecraftInstance::createEnvironment() override;
+```
+
+This includes:
+- System environment (inherited)
+- `INST_NAME` — instance name
+- `INST_ID` — instance ID
+- `INST_DIR` — instance root directory
+- `INST_MC_DIR` — game directory
+- `INST_JAVA` — Java binary path
+- `INST_JAVA_ARGS` — JVM arguments
+- Native library path (`LD_LIBRARY_PATH` / `PATH` / `DYLD_LIBRARY_PATH`)
+
+## Custom Commands
+
+Instances support custom commands that execute at specific lifecycle points:
+
+| Command | When | Purpose |
+|---|---|---|
+| `PreLaunchCommand` | Before JVM starts | Custom setup scripts |
+| `PostExitCommand` | After JVM exits | Cleanup scripts |
+| `WrapperCommand` | Wraps the JVM command | e.g., `gamemoderun` or `mangohud` |
+
+These are configured per-instance (with override gate) or globally.
diff --git a/docs/handbook/meshmc/mod-system.md b/docs/handbook/meshmc/mod-system.md
new file mode 100644
index 0000000000..14d309e721
--- /dev/null
+++ b/docs/handbook/meshmc/mod-system.md
@@ -0,0 +1,410 @@
+# Mod System
+
+## Overview
+
+MeshMC provides comprehensive mod management through a combination of local folder models and mod platform integrations. The mod system handles installation, discovery, metadata extraction, enabling/disabling, and browsing of mods from CurseForge, Modrinth, ATLauncher, FTB, and Technic.
+
+## Local Mod Management
+
+### ModFolderModel (`minecraft/mod/ModFolderModel.h`)
+
+`ModFolderModel` is a `QAbstractListModel` that represents the contents of a mod directory (e.g., `<instance>/.minecraft/mods/`):
+
+```cpp
+class ModFolderModel : public QAbstractListModel
+{
+ Q_OBJECT
+public:
+ enum Columns {
+ ActiveColumn = 0, // Enabled/disabled toggle
+ NameColumn, // Mod name
+ VersionColumn, // Mod version
+ DateColumn, // File modification date
+ NUM_COLUMNS
+ };
+
+ enum ModStatusAction { Disable, Enable, Toggle };
+
+ ModFolderModel(const QString& dir);
+
+ // Model interface
+ QVariant data(const QModelIndex& index, int role) const override;
+ bool setData(const QModelIndex& index, const QVariant& value,
+ int role) override;
+ Qt::DropActions supportedDropActions() const override;
+ Qt::ItemFlags flags(const QModelIndex& index) const override;
+ QStringList mimeTypes() const override;
+ bool dropMimeData(const QMimeData* data, Qt::DropAction action,
+ int row, int column, const QModelIndex& parent) override;
+ int rowCount(const QModelIndex&) const override;
+ QVariant headerData(int section, Qt::Orientation orientation,
+ int role) const override;
+ int columnCount(const QModelIndex& parent) const override;
+
+ size_t size() const;
+ bool empty() const;
+ Mod& operator[](size_t index);
+};
+```
+
+Key features:
+- **Drag and drop** — users can drag mod files into the view to install them
+- **Enable/disable** — toggling a mod renames the file (`.jar` ↔ `.jar.disabled`)
+- **Automatic refresh** — directory changes are detected via `QFileSystemWatcher`
+- **Column sorting** — by name, version, or date
+
+### Mod Class (`minecraft/mod/Mod.h`)
+
+The `Mod` class represents a single mod file:
+
+```cpp
+class Mod {
+public:
+ enum ModType {
+ MOD_UNKNOWN, // Unknown format
+ MOD_ZIPFILE, // ZIP/JAR with metadata
+ MOD_SINGLEFILE, // Single file (no metadata)
+ MOD_FOLDER, // Directory mod
+ MOD_LITEMOD // LiteLoader mod (.litemod)
+ };
+
+ Mod(const QFileInfo& file);
+
+ QString name() const;
+ QString version() const;
+ QString homeurl() const;
+ QString description() const;
+ QStringList authors() const;
+ QDateTime dateTimeChanged() const;
+
+ bool enable(bool value); // Enable or disable
+ bool enabled() const;
+ ModType type() const;
+};
+```
+
+### ModDetails (`minecraft/mod/ModDetails.h`)
+
+Extracted metadata from mod files:
+
+```cpp
+struct ModDetails {
+ QString mod_id;
+ QString name;
+ QString version;
+ QString description;
+ QStringList authors;
+ QString homeurl;
+ QStringList loaders; // Compatible mod loaders
+};
+```
+
+### Mod Metadata Extraction (`minecraft/mod/LocalModParseTask.h`)
+
+`LocalModParseTask` runs in a background thread to parse metadata from mod JAR/ZIP files:
+
+```cpp
+class LocalModParseTask : public QObject {
+ Q_OBJECT
+public:
+ LocalModParseTask(int token, Mod::ModType type,
+ const QFileInfo& modFile);
+signals:
+ void metadataReady(int token, ModDetails details);
+};
+```
+
+Supported metadata formats:
+- **Forge** — `mcmod.info` (JSON, legacy) and `mods.toml` (TOML, modern)
+- **Fabric** — `fabric.mod.json` (JSON)
+- **Quilt** — `quilt.mod.json` (JSON)
+- **LiteLoader** — `litemod.json` (JSON)
+
+The parser uses `tomlc99` for TOML parsing and Qt's JSON facilities for JSON.
+
+### ModFolderLoadTask (`minecraft/mod/ModFolderLoadTask.h`)
+
+Background task that scans a mod directory and creates `Mod` objects:
+
+```cpp
+class ModFolderLoadTask : public QObject {
+ Q_OBJECT
+public:
+ ModFolderLoadTask(const QString& dir);
+ void run();
+signals:
+ void succeeded();
+};
+```
+
+This task:
+1. Enumerates all files in the mod directory
+2. Creates `Mod` objects for each `.jar`, `.zip`, `.litemod`, `.disabled`, and directory entry
+3. Emits `succeeded()` when scanning is complete
+4. The `ModFolderModel` then triggers `LocalModParseTask` for each mod to extract metadata
+
+### Resource Pack and Texture Pack Models
+
+Similar models exist for resource packs and texture packs:
+
+```cpp
+class ResourcePackFolderModel : public ModFolderModel {
+ // Manages <instance>/.minecraft/resourcepacks/
+};
+
+class TexturePackFolderModel : public ModFolderModel {
+ // Manages <instance>/.minecraft/texturepacks/
+};
+```
+
+These inherit from `ModFolderModel` but specialize for their respective content types.
+
+## Mod Platform Integrations
+
+### Directory Structure
+
+```
+launcher/modplatform/
+├── atlauncher/ # ATLauncher API client
+├── flame/ # CurseForge (Flame) API client
+├── legacy_ftb/ # Legacy FTB modpack support
+├── modpacksch/ # FTB/modpacksch API (modern)
+├── modrinth/ # Modrinth API client
+└── technic/ # Technic Platform API client
+```
+
+### CurseForge Integration (`modplatform/flame/`)
+
+CurseForge (internally called "Flame") integration provides:
+
+- **Mod search** — query CurseForge's API for mods compatible with the instance's game version and loader
+- **Modpack installation** — download and install complete CurseForge modpacks
+- **Mod installation** — download individual mods and place them in the mods folder
+- **Version resolution** — select the correct mod version for the instance's configuration
+
+API authentication uses the `MeshMC_CURSEFORGE_API_KEY` set at build time:
+
+```cmake
+set(MeshMC_CURSEFORGE_API_KEY "$2a$10$..." CACHE STRING
+ "API key for the CurseForge API")
+```
+
+### Modrinth Integration (`modplatform/modrinth/`)
+
+Modrinth integration provides:
+
+- **Mod search** — query Modrinth's API for mods
+- **Modpack installation** — download and install Modrinth modpacks (`.mrpack` format)
+- **Mod installation** — download and install individual mods
+- **Version filtering** — filter by game version, loader, and project type
+
+Modrinth uses a public API and does not require an API key for basic operations.
+
+### ATLauncher Integration (`modplatform/atlauncher/`)
+
+ATLauncher support enables importing ATLauncher modpack definitions:
+
+- Parse ATLauncher pack JSON manifests
+- Download required mods and configurations
+- Create a new instance with the correct components
+
+### FTB/modpacksch Integration (`modplatform/modpacksch/`)
+
+Modern FTB modpack support via the modpacksch API:
+
+- Browse available FTB modpacks
+- Download and install modpacks
+- Handle FTB-specific pack format
+
+### Legacy FTB Integration (`modplatform/legacy_ftb/`)
+
+Support for the older FTB modpack format:
+
+- Parse legacy FTB pack definitions
+- Import packs from FTB launcher directories
+
+### Technic Integration (`modplatform/technic/`)
+
+Technic Platform support:
+
+- Browse Technic modpacks
+- Download and install Technic packs
+- Handle Technic's ZIP-based pack format
+
+## Mod Installation Flow
+
+### From Platform Browse Page
+
+1. User opens a mod platform page (CurseForge/Modrinth) from the instance settings
+2. Searches or browses for a mod
+3. Selects a version compatible with their instance
+4. MeshMC creates a `Download` via `NetJob` to fetch the mod file
+5. The mod file is placed in `<instance>/.minecraft/mods/`
+6. `ModFolderModel` detects the new file and updates the listing
+7. `LocalModParseTask` extracts metadata for display
+
+### From Drag and Drop
+
+1. User drags a `.jar` file onto the mod list in `ModFolderPage`
+2. Qt's drag-and-drop system fires `ModFolderModel::dropMimeData()`
+3. The file is copied to the mods directory
+4. The model updates automatically
+
+### From File Dialog
+
+1. User clicks "Add" in `ModFolderPage`
+2. A `QFileDialog` opens for file selection
+3. Selected files are copied to the mods directory
+4. The model refreshes
+
+## Modpack Import
+
+### Overview
+
+Modpack import is handled through `InstanceImportTask` and platform-specific import logic:
+
+```
+User selects modpack file/URL
+ │
+ ▼
+InstanceImportTask::executeTask()
+ │
+ ├── Detect format (CurseForge manifest, Modrinth mrpack, etc.)
+ │
+ ├── CurseForge: parse manifest.json → download mods → set up instance
+ ├── Modrinth: parse modrinth.index.json → download mods → set up instance
+ ├── ATLauncher: parse ATL config → download mods → set up instance
+ ├── Technic: extract ZIP → set up instance
+ └── Generic: extract ZIP → copy files → set up instance
+```
+
+### CurseForge Modpack Format
+
+CurseForge modpacks contain a `manifest.json`:
+```json
+{
+ "minecraft": {
+ "version": "1.20.4",
+ "modLoaders": [
+ { "id": "forge-49.0.19", "primary": true }
+ ]
+ },
+ "files": [
+ { "projectID": 123456, "fileID": 789012, "required": true }
+ ]
+}
+```
+
+MeshMC parses this manifest, creates components for the game version and mod loader, then downloads each mod file by its CurseForge project/file IDs.
+
+### Modrinth Modpack Format (`.mrpack`)
+
+Modrinth packs use the `.mrpack` format (ZIP with `modrinth.index.json`):
+```json
+{
+ "formatVersion": 1,
+ "game": "minecraft",
+ "versionId": "1.0.0",
+ "dependencies": {
+ "minecraft": "1.20.4",
+ "fabric-loader": "0.15.6"
+ },
+ "files": [
+ {
+ "path": "mods/sodium-0.5.5.jar",
+ "hashes": { "sha1": "...", "sha512": "..." },
+ "downloads": ["https://cdn.modrinth.com/..."]
+ }
+ ]
+}
+```
+
+### Blocked Mods Handling
+
+Some mods on CurseForge restrict third-party downloads. `BlockedModsDialog` handles this case:
+
+```cpp
+class BlockedModsDialog : public QDialog {
+ // Shows a list of mods that couldn't be auto-downloaded
+ // Provides manual download links for the user
+};
+```
+
+## Mod Enable/Disable Mechanism
+
+MeshMC enables and disables mods by renaming files:
+
+```cpp
+bool Mod::enable(bool value);
+```
+
+- **Disable**: Rename `modname.jar` → `modname.jar.disabled`
+- **Enable**: Rename `modname.jar.disabled` → `modname.jar`
+
+This approach ensures disabled mods are not loaded by the game's mod loader while remaining in the directory for easy re-enabling.
+
+## Mod Folder Page (`ui/pages/instance/ModFolderPage.h`)
+
+`ModFolderPage` provides the UI for managing mods within an instance:
+
+```cpp
+class ModFolderPage : public QMainWindow, public BasePage {
+ Q_OBJECT
+};
+```
+
+Features:
+- List view with columns: Active (checkbox), Name, Version, Date
+- Add button — file dialog for selecting mod files
+- Remove button — delete selected mods
+- Enable/Disable button — toggle selection
+- View folder button — open the mods directory in the file manager
+- Mod details panel showing name, version, authors, description, homepage URL
+
+## Shader Packs and Resource Packs
+
+MeshMC provides similar management for other content types:
+
+| Content Type | Directory | Page Class | Model |
+|---|---|---|---|
+| Mods | `.minecraft/mods/` | `ModFolderPage` | `ModFolderModel` |
+| Resource Packs | `.minecraft/resourcepacks/` | `ResourcePackPage` | `ResourcePackFolderModel` |
+| Texture Packs | `.minecraft/texturepacks/` | `TexturePackPage` | `TexturePackFolderModel` |
+| Shader Packs | `.minecraft/shaderpacks/` | `ShaderPackPage` | `ModFolderModel` |
+
+`ResourcePackPage`, `TexturePackPage`, and `ShaderPackPage` are thin wrappers around `ModFolderPage`:
+
+```cpp
+// ShaderPackPage.h
+class ShaderPackPage : public ModFolderPage {
+ // Specializes base path and file filters for shader packs
+};
+```
+
+## World Management
+
+While not strictly part of the mod system, world management follows a similar pattern:
+
+### WorldList (`minecraft/WorldList.h`)
+
+```cpp
+class WorldList : public QAbstractListModel {
+ Q_OBJECT
+public:
+ WorldList(const QString& dir);
+ // Provides list of worlds with name, last played, game mode
+};
+```
+
+### World (`minecraft/World.h`)
+
+```cpp
+class World {
+public:
+ World(const QFileInfo& file);
+ QString name() const;
+ // Reads level.dat for world metadata
+};
+```
+
+The `WorldListPage` provides UI for browsing, adding, copying, and deleting worlds.
diff --git a/docs/handbook/meshmc/network-layer.md b/docs/handbook/meshmc/network-layer.md
new file mode 100644
index 0000000000..ab556fe25f
--- /dev/null
+++ b/docs/handbook/meshmc/network-layer.md
@@ -0,0 +1,551 @@
+# Network Layer
+
+## Overview
+
+MeshMC's network layer provides a managed download system supporting parallel downloads, caching, integrity validation, and progress tracking. Built on Qt's `QNetworkAccessManager`, it centralizes all HTTP operations through the `NetJob` and `Download` classes.
+
+## Architecture
+
+### Key Classes
+
+| Class | File | Purpose |
+|---|---|---|
+| `NetAction` | `net/NetAction.h` | Abstract network operation base |
+| `Download` | `net/Download.{h,cpp}` | HTTP GET download |
+| `NetJob` | `net/NetJob.{h,cpp}` | Parallel download manager |
+| `Sink` | `net/Sink.h` | Abstract data receiver |
+| `FileSink` | `net/FileSink.{h,cpp}` | Write to file |
+| `ByteArraySink` | `net/ByteArraySink.{h,cpp}` | Write to memory |
+| `MetaCacheSink` | `net/MetaCacheSink.{h,cpp}` | Write to file with caching |
+| `Validator` | `net/Validator.h` | Abstract integrity checker |
+| `ChecksumValidator` | `net/ChecksumValidator.{h,cpp}` | Hash-based validation |
+| `HttpMetaCache` | `net/HttpMetaCache.{h,cpp}` | HTTP cache metadata |
+| `MetaEntry` | `net/HttpMetaCache.h` | Cache entry (ETag, MD5) |
+| `PasteUpload` | `net/PasteUpload.{h,cpp}` | Log paste upload |
+| `Upload` | `net/Upload.{h,cpp}` | HTTP POST/PUT upload |
+
+## NetAction
+
+Base class for all network operations:
+
+```cpp
+class NetAction : public Task
+{
+ Q_OBJECT
+public:
+ using Ptr = shared_qobject_ptr<NetAction>;
+
+ virtual void init() = 0;
+
+public slots:
+ void startAction(shared_qobject_ptr<QNetworkAccessManager> nam);
+
+protected:
+ virtual void executeTask() override = 0;
+
+ // State
+ QUrl m_url;
+ QUrl m_redirectUrl;
+ int m_redirectsRemaining = 6;
+ shared_qobject_ptr<QNetworkAccessManager> m_network;
+ std::unique_ptr<QNetworkReply> m_reply;
+};
+```
+
+`NetAction` extends `Task`, inheriting progress tracking, state management, and signal notifications.
+
+### JobStatus
+
+Network operations report their status through the `Task` state machine:
+- `NotStarted` → `Running` → `Succeeded` / `Failed` / `AbortedByUser`
+
+## Download
+
+The primary HTTP download class:
+
+```cpp
+class Download : public NetAction
+{
+ Q_OBJECT
+public:
+ // Factory methods
+ static Download::Ptr makeCached(QUrl url, MetaEntryPtr entry, Options options = Option::NoOptions);
+ static Download::Ptr makeByteArray(QUrl url, std::shared_ptr<QByteArray> output);
+ static Download::Ptr makeFile(QUrl url, QString path);
+
+ // Configuration
+ Download* addValidator(Validator* v);
+ Download* addHttpRedirectHandler(QUrl url);
+
+private slots:
+ void downloadProgress(qint64 bytesReceived, qint64 bytesTotal);
+ void downloadError(QNetworkReply::NetworkError error);
+ void sslErrors(const QList<QSslError>& errors);
+ void downloadFinished();
+ void downloadReadyRead();
+
+protected:
+ std::unique_ptr<Sink> m_sink;
+ QList<Validator*> m_validators;
+ Options m_options;
+};
+```
+
+### Factory Methods
+
+Three factory methods create downloads with appropriate sinks:
+
+#### `makeCached(url, entry, options)`
+Creates a download backed by `MetaCacheSink`:
+- Checks `HttpMetaCache` for existing entry
+- Sends `If-None-Match` with stored ETag
+- On `304 Not Modified`, reuses cached file
+- On `200 OK`, writes to file and updates cache
+- Supports MD5 and SHA-1 validation
+
+#### `makeByteArray(url, output)`
+Creates a download backed by `ByteArraySink`:
+- Data written to the provided `QByteArray`
+- Used for small API responses, JSON data
+- No disk I/O
+
+#### `makeFile(url, path)`
+Creates a download backed by `FileSink`:
+- Writes directly to the specified file path
+- No caching metadata
+- Creates parent directories as needed
+
+### Download Options
+
+```cpp
+enum class Option {
+ NoOptions = 0,
+ AcceptLocalFiles = 1, // Allow file:// URLs
+ MakeEternal = 2, // Never expire from cache
+};
+```
+
+### Redirect Handling
+
+Downloads automatically follow HTTP redirects:
+- Maximum of 6 redirects (configurable via `m_redirectsRemaining`)
+- Both HTTP 301/302 and Qt's redirect attribute are handled
+- Custom redirect handlers can be added for complex redirect chains
+
+## NetJob
+
+Manages parallel execution of multiple `NetAction` operations:
+
+```cpp
+class NetJob : public Task
+{
+ Q_OBJECT
+public:
+ using Ptr = shared_qobject_ptr<NetJob>;
+
+ explicit NetJob(QString job_name, shared_qobject_ptr<QNetworkAccessManager> network);
+
+ bool addNetAction(NetAction::Ptr action);
+
+ // From Task
+ void executeTask() override;
+ bool abort() override;
+
+ int size() const;
+
+ // Progress aggregation
+ auto totalProgress() const -> qint64;
+ auto totalSize() const -> qint64;
+
+signals:
+ void failed(QString reason);
+
+private slots:
+ void partProgress(int index, qint64 bytesReceived, qint64 bytesTotal);
+ void partSucceeded(int index);
+ void partFailed(int index);
+ void partAborted(int index);
+
+private:
+ struct Part {
+ NetAction::Ptr download;
+ qint64 current_progress = 0;
+ qint64 total_progress = 1;
+ int failures = 0;
+ };
+
+ QList<Part> m_parts;
+ QQueue<int> m_todo;
+ QSet<int> m_doing;
+ QSet<int> m_done;
+ QSet<int> m_failed;
+
+ shared_qobject_ptr<QNetworkAccessManager> m_network;
+ int m_maxConcurrent = 6;
+};
+```
+
+### Execution Model
+
+1. All `NetAction` items are queued in `m_todo`
+2. Up to `m_maxConcurrent` (default 6) downloads run simultaneously
+3. As downloads complete, new ones are started from the queue
+4. Progress is aggregated across all downloads
+5. On failure, items are retried up to 3 times before the job fails
+6. The entire job succeeds only when all parts succeed
+
+### Progress Tracking
+
+`NetJob` aggregates progress from all parts:
+
+```cpp
+void NetJob::partProgress(int index, qint64 bytesReceived, qint64 bytesTotal)
+{
+ m_parts[index].current_progress = bytesReceived;
+ m_parts[index].total_progress = bytesTotal;
+
+ qint64 current = 0, total = 0;
+ for (auto& part : m_parts) {
+ current += part.current_progress;
+ total += part.total_progress;
+ }
+ setProgress(current, total);
+}
+```
+
+This emits `Task::progress(qint64, qint64)` which the UI connects to for progress bars.
+
+## Sink System
+
+Sinks receive downloaded data and write it to the appropriate destination:
+
+### Sink (Abstract Base)
+
+```cpp
+class Sink
+{
+public:
+ virtual ~Sink() = default;
+
+ virtual auto init(QNetworkRequest& request) -> Task::State = 0;
+ virtual auto write(const QByteArray& data) -> Task::State = 0;
+ virtual auto abort() -> Task::State = 0;
+ virtual auto finalize(QNetworkReply& reply) -> Task::State = 0;
+
+ void addValidator(Validator* v);
+
+protected:
+ bool finalizeAllValidators(QNetworkReply& reply);
+ QList<Validator*> m_validators;
+};
+```
+
+### FileSink
+
+```cpp
+class FileSink : public Sink
+{
+public:
+ explicit FileSink(const QString& filename);
+
+ auto init(QNetworkRequest& request) -> Task::State override;
+ auto write(const QByteArray& data) -> Task::State override;
+ auto abort() -> Task::State override;
+ auto finalize(QNetworkReply& reply) -> Task::State override;
+
+private:
+ QString m_filename;
+ std::unique_ptr<QSaveFile> m_output; // Atomic write
+};
+```
+
+Uses `QSaveFile` for atomic writes — the file is written to a temporary location and renamed on `finalize()`, preventing partial files on failure.
+
+### ByteArraySink
+
+```cpp
+class ByteArraySink : public Sink
+{
+public:
+ explicit ByteArraySink(std::shared_ptr<QByteArray> output);
+
+ auto init(QNetworkRequest& request) -> Task::State override;
+ auto write(const QByteArray& data) -> Task::State override;
+ auto abort() -> Task::State override;
+ auto finalize(QNetworkReply& reply) -> Task::State override;
+
+private:
+ std::shared_ptr<QByteArray> m_output;
+};
+```
+
+### MetaCacheSink
+
+```cpp
+class MetaCacheSink : public FileSink
+{
+public:
+ MetaCacheSink(MetaEntryPtr entry, const QString& filename);
+
+ auto init(QNetworkRequest& request) -> Task::State override;
+ auto finalize(QNetworkReply& reply) -> Task::State override;
+
+ bool hasLocalData();
+
+private:
+ MetaEntryPtr m_entry;
+};
+```
+
+`MetaCacheSink` extends `FileSink` with:
+- Setting `If-None-Match` header from cached ETag
+- On `304 Not Modified`, short-circuits to success
+- On `200 OK`, updates `MetaEntry` with new ETag, MD5, timestamp
+
+## Validation System
+
+### Validator (Abstract)
+
+```cpp
+class Validator
+{
+public:
+ virtual ~Validator() = default;
+ virtual bool init(QNetworkRequest& request) = 0;
+ virtual bool write(const QByteArray& data) = 0;
+ virtual bool validate(QNetworkReply& reply) = 0;
+};
+```
+
+### ChecksumValidator
+
+```cpp
+class ChecksumValidator : public Validator
+{
+public:
+ ChecksumValidator(QCryptographicHash::Algorithm algorithm,
+ QString expected = QString());
+
+ bool init(QNetworkRequest& request) override;
+ bool write(const QByteArray& data) override;
+ bool validate(QNetworkReply& reply) override;
+
+private:
+ QCryptographicHash m_hash;
+ QString m_expected;
+};
+```
+
+Usage:
+```cpp
+auto dl = Download::makeFile(url, path);
+dl->addValidator(new ChecksumValidator(QCryptographicHash::Sha1, expectedSha1));
+```
+
+The validator incrementally hashes data as it arrives via `write()`, then compares the final hash in `validate()`. Supported algorithms: MD5, SHA-1, SHA-256.
+
+## HttpMetaCache
+
+Persistent HTTP caching metadata:
+
+```cpp
+class HttpMetaCache : public QObject
+{
+ Q_OBJECT
+public:
+ HttpMetaCache(const QString& path = QString());
+ ~HttpMetaCache();
+
+ MetaEntryPtr resolveEntry(QString base, QString relative_path, QString expected_etag = QString());
+ bool updateEntry(MetaEntryPtr stale_entry);
+ bool evictEntry(MetaEntryPtr entry);
+ void addBase(QString base, QString base_root);
+
+ void Load();
+ void Save();
+
+private:
+ // Base URL → local root directory mapping
+ QMap<QString, QString> m_entries;
+
+ // base/path → MetaEntry
+ QMap<QString, QMap<QString, MetaEntryPtr>> m_entry_cache;
+
+ QString m_index_file;
+};
+```
+
+### MetaEntry
+
+```cpp
+class MetaEntry
+{
+public:
+ QString basePath;
+ QString relativePath;
+
+ QString md5sum;
+ QString etag;
+
+ qint64 local_changed_timestamp = 0;
+ qint64 remote_changed_timestamp = 0;
+
+ bool stale = true;
+ bool makeEternal = false;
+
+ QString getFullPath();
+};
+```
+
+### Cache Flow
+
+```
+1. resolveEntry("mojang", "versions/1.21.json")
+ │
+ ├── Lookup in m_entry_cache["mojang"]["versions/1.21.json"]
+ │ ├── Found + !stale → return existing entry
+ │ └── Found + stale → return for re-download
+ └── Not found → create new stale entry
+ │
+2. MetaCacheSink.init()
+ ├── Set If-None-Match: <etag> on request
+ │
+3. HTTP Response
+ ├── 304 Not Modified → mark entry not stale, return
+ └── 200 OK → write file, update etag/md5/timestamps
+ │
+4. updateEntry() → save to cache index
+```
+
+### Cache Persistence
+
+The cache index is stored as JSON in `metacache/metacache.json`:
+
+```json
+{
+ "formatVersion": 2,
+ "entries": {
+ "mojang": {
+ "versions/1.21.json": {
+ "md5sum": "abc123...",
+ "etag": "\"xyz789\"",
+ "local_changed_timestamp": 1700000000,
+ "remote_changed_timestamp": 1699000000
+ }
+ }
+ }
+}
+```
+
+## Shared Network Manager
+
+`Application` creates a single `QNetworkAccessManager` shared across all operations:
+
+```cpp
+// Application.h
+shared_qobject_ptr<QNetworkAccessManager> network();
+
+// Application.cpp
+m_network = new QNetworkAccessManager();
+```
+
+All `NetJob` instances receive this shared manager. Proxy settings from the settings system are applied to the manager at startup and when changed.
+
+### Proxy Configuration
+
+```cpp
+// From global settings
+switch (proxyType) {
+ case "None":
+ QNetworkProxy::setApplicationProxy(QNetworkProxy::NoProxy);
+ break;
+ case "SOCKS5":
+ proxy.setType(QNetworkProxy::Socks5Proxy);
+ // ...
+ break;
+ case "HTTP":
+ proxy.setType(QNetworkProxy::HttpProxy);
+ // ...
+ break;
+}
+```
+
+## Upload System
+
+### PasteUpload
+
+Uploads log content to paste services:
+
+```cpp
+class PasteUpload : public Task
+{
+ Q_OBJECT
+public:
+ PasteUpload(QWidget* window, QString text, QString url);
+
+ QString pasteLink();
+
+protected:
+ void executeTask() override;
+
+private slots:
+ void downloadFinished();
+ void downloadError(QNetworkReply::NetworkError error);
+
+private:
+ QByteArray m_text;
+ QString m_pasteLink;
+ QString m_pasteURL;
+ shared_qobject_ptr<QNetworkReply> m_reply;
+};
+```
+
+Used by the log page to share instance logs. The paste URL is configurable in settings.
+
+## Common Download Patterns
+
+### Downloading Minecraft Version Manifest
+
+```cpp
+auto job = new NetJob("Version List", APPLICATION->network());
+auto entry = APPLICATION->metacache()->resolveEntry("mojang", "version_manifest_v2.json");
+auto dl = Download::makeCached(
+ QUrl("https://piston-meta.mojang.com/mc/game/version_manifest_v2.json"),
+ entry
+);
+job->addNetAction(dl);
+connect(job, &NetJob::succeeded, this, &VersionList::loadListFromFile);
+job->start();
+```
+
+### Downloading Game Libraries
+
+```cpp
+auto job = new NetJob("Libraries", APPLICATION->network());
+for (auto& lib : libraries) {
+ auto dl = Download::makeFile(lib.url(), lib.storagePath());
+ dl->addValidator(new ChecksumValidator(QCryptographicHash::Sha1, lib.sha1()));
+ job->addNetAction(dl);
+}
+connect(job, &NetJob::progress, this, &LaunchStep::setProgress);
+connect(job, &NetJob::succeeded, this, &LaunchStep::emitSucceeded);
+connect(job, &NetJob::failed, this, &LaunchStep::emitFailed);
+job->start();
+```
+
+### Downloading to Memory
+
+```cpp
+auto output = std::make_shared<QByteArray>();
+auto dl = Download::makeByteArray(
+ QUrl("https://api.example.com/data.json"),
+ output
+);
+auto job = new NetJob("API Request", APPLICATION->network());
+job->addNetAction(dl);
+connect(job, &NetJob::succeeded, [output]() {
+ auto doc = QJsonDocument::fromJson(*output);
+ // Process response
+});
+job->start();
+```
diff --git a/docs/handbook/meshmc/overview.md b/docs/handbook/meshmc/overview.md
new file mode 100644
index 0000000000..598eec8258
--- /dev/null
+++ b/docs/handbook/meshmc/overview.md
@@ -0,0 +1,269 @@
+# MeshMC Overview
+
+## What is MeshMC?
+
+MeshMC is a custom, open-source Minecraft launcher developed by Project Tick. It enables players to manage multiple, isolated Minecraft installations simultaneously — each with its own mods, resource packs, settings, and game version. MeshMC is the flagship launcher product of the Project Tick organization, licensed under the GNU General Public License v3.0 (GPL-3.0-or-later).
+
+The launcher binary is named `meshmc` and uses the application ID `org.projecttick.MeshMC`. It is designed for power users who need fine-grained control over their Minecraft setup: modded playthroughs, modpack development, server testing, and version archaeology.
+
+## Core Feature Set
+
+### Multi-Instance Management
+
+MeshMC's defining feature is instance-based Minecraft management. Each instance is a complete, self-contained Minecraft environment:
+
+- **Independent game directories** — each instance has its own `.minecraft`-equivalent folder with saves, mods, configs, resource packs, shader packs, and texture packs
+- **Version isolation** — instances can run different Minecraft versions simultaneously (1.7.10, 1.12.2, 1.20.4, etc.)
+- **Instance groups** — organize instances into named groups for categorization
+- **Instance copying** — clone any instance with optional save data and playtime preservation via `InstanceCopyTask`
+- **Instance import/export** — import instances from ZIP archives or export them for sharing via `InstanceImportTask` and `ExportInstanceDialog`
+- **Per-instance settings** — override global Java path, memory allocation, JVM arguments, window size, and custom commands on a per-instance basis through the `SettingsObject` override system
+
+### Component-Based Version Management
+
+Instead of monolithic version profiles, MeshMC uses a component system (`PackProfile` / `Component`) that decomposes a Minecraft installation into modular layers:
+
+- **Minecraft base version** — the vanilla game jar
+- **Mod loaders** — Forge, Fabric, Quilt, NeoForge, LiteLoader
+- **Library overlays** — additional libraries injected into the classpath
+- **Jar mods** — modifications applied directly to the game jar
+- **Dependency resolution** — automatic resolution of inter-component dependencies via `ComponentUpdateTask`
+
+### Mod Platform Integration
+
+MeshMC integrates with major mod distribution platforms:
+
+- **CurseForge** — browse, search, and install mods/modpacks from CurseForge (`modplatform/flame/`)
+- **Modrinth** — browse, search, and install mods/modpacks from Modrinth (`modplatform/modrinth/`)
+- **ATLauncher** — import ATLauncher modpacks (`modplatform/atlauncher/`)
+- **FTB** — import Feed The Beast modpacks (`modplatform/modpacksch/`)
+- **Technic** — import Technic modpacks (`modplatform/technic/`)
+- **Legacy FTB** — support for old-format FTB modpacks (`modplatform/legacy_ftb/`)
+
+### Microsoft Account Authentication
+
+MeshMC supports Microsoft Account (MSA) login for Minecraft authentication:
+
+- **OAuth2 Authorization Code Flow** via Qt6 NetworkAuth (`QOAuth2AuthorizationCodeFlow`)
+- **Multi-account management** — store and switch between multiple Microsoft accounts
+- **Token refresh** — automatic and manual token refresh via `MSASilent`
+- **Xbox Live integration** — full authentication chain: MSA → Xbox User Token → XSTS Token → Minecraft Token
+- **Profile management** — fetch and display Minecraft profile, skins, capes
+
+### Java Management
+
+- **Automatic Java detection** — `JavaUtils::FindJavaPaths()` scans platform-specific locations (registry on Windows, `/usr/lib/jvm` on Linux, `/Library/Java` on macOS)
+- **Java version validation** — `JavaChecker` spawns a JVM process to verify version, architecture, and vendor
+- **Per-instance Java configuration** — each instance can specify its own Java binary and JVM arguments
+- **Built-in Java downloader** — optional feature to download and manage Java runtimes (can be disabled with `MeshMC_DISABLE_JAVA_DOWNLOADER`)
+
+### Theming and UI Customization
+
+- **Multiple built-in themes** — BrightTheme, DarkTheme, FusionTheme, SystemTheme
+- **Custom themes** — user-defined themes via QSS stylesheets and palette definitions (`CustomTheme`)
+- **Icon theme system** — multiple icon sets with automatic light/dark variant selection
+- **CatPack system** — fun cat images displayed in the launcher background
+- **Appearance settings page** — `AppearancePage` for theme, icon set, and cat pack selection
+
+### Update System
+
+MeshMC includes a dual-source update checker (`UpdateChecker`):
+
+- **RSS feed** — checks `BuildConfig.UPDATER_FEED_URL` for version announcements with `projt:` namespace extensions
+- **GitHub Releases** — cross-references against `BuildConfig.UPDATER_GITHUB_API_URL` for integrity
+- **Platform-aware** — automatically disables for AppImage distributions and non-portable Linux installations
+- **macOS Sparkle** — native update framework integration on macOS via Sparkle 2.x
+
+### Additional Features
+
+- **Log viewer** — real-time game log display with color-coded message levels (`LogModel`, `LogPage`)
+- **Screenshot management** — browse, upload (Imgur), and manage game screenshots (`ScreenshotsPage`)
+- **World management** — list, copy, and manage Minecraft worlds (`WorldList`, `WorldListPage`)
+- **News feed** — RSS news reader showing MeshMC announcements
+- **Notification system** — banner notifications for important updates
+- **Proxy support** — HTTP/SOCKS5 proxy configuration for all network traffic
+- **Paste service** — upload logs to paste.ee for troubleshooting (`PasteUpload`)
+- **Analytics** — optional Google Analytics integration (`GAnalytics`)
+- **Profiler integration** — JProfiler and JVisualVM launch hooks (`tools/JProfiler`, `tools/JVisualVM`)
+- **MCEdit integration** — launch MCEdit for world editing (`tools/MCEditTool`)
+- **Accessibility** — keyboard navigation, screen reader support (`AccessibleInstanceView`)
+
+## Technology Stack
+
+### Core Technologies
+
+| Technology | Version | Purpose |
+|---|---|---|
+| C++ | C++23 | Primary language |
+| Qt | 6.x | GUI framework, networking, data models |
+| CMake | ≥ 3.28 | Build system |
+| Ninja | Any | Build generator (recommended) |
+
+### Qt6 Modules Used
+
+| Module | Purpose |
+|---|---|
+| `Qt6::Core` | Core data types, I/O, event loop |
+| `Qt6::Widgets` | GUI widgets (QMainWindow, QDialog, etc.) |
+| `Qt6::Concurrent` | Asynchronous file operations |
+| `Qt6::Network` | HTTP client, download management |
+| `Qt6::NetworkAuth` | OAuth2 authentication (MSA login) |
+| `Qt6::Test` | Unit testing framework |
+| `Qt6::Xml` | XML parsing (RSS feeds, version manifests) |
+
+### Bundled Libraries
+
+| Library | Directory | Purpose |
+|---|---|---|
+| `ganalytics` | `libraries/ganalytics/` | Google Analytics client |
+| `systeminfo` | `libraries/systeminfo/` | System information gathering |
+| `hoedown` | `libraries/hoedown/` | Markdown-to-HTML renderer |
+| `launcher` | `libraries/launcher/` | Java-based Minecraft launcher component |
+| `javacheck` | `libraries/javacheck/` | Java installation validator |
+| `xz-embedded` | `libraries/xz-embedded/` | XZ/LZMA decompression |
+| `rainbow` | `libraries/rainbow/` | Qt color manipulation |
+| `iconfix` | `libraries/iconfix/` | Qt QIcon loader fixes |
+| `LocalPeer` | `libraries/LocalPeer/` | Single-instance application enforcer |
+| `classparser` | `libraries/classparser/` | Java class file parser |
+| `optional-bare` | `libraries/optional-bare/` | `nonstd::optional` polyfill |
+| `tomlc99` | `libraries/tomlc99/` | TOML file parser |
+| `katabasis` | `libraries/katabasis/` | OAuth2 framework |
+| `libnbtplusplus` | `../libnbtplusplus/` | Minecraft NBT format parser |
+
+### External Dependencies
+
+| Dependency | Purpose |
+|---|---|
+| `libarchive` | Archive extraction (ZIP, tar, etc.) |
+| `zlib` | Data compression |
+| `Extra CMake Modules (ECM)` | KDE CMake utilities, install directories |
+| `cmark` | Markdown rendering |
+| `tomlplusplus` | TOML configuration parsing |
+| `libqrencode` | QR code generation |
+| `QuaZip` | ZIP archive handling |
+| `scdoc` | Man page generation (optional) |
+
+## Component Architecture Overview
+
+MeshMC is organized into several major subsystems, each occupying a distinct directory within `launcher/`:
+
+```
+launcher/
+├── main.cpp # Entry point
+├── Application.{h,cpp} # Application singleton, lifecycle management
+├── BaseInstance.{h,cpp} # Abstract instance interface
+├── InstanceList.{h,cpp} # Instance collection model
+├── LaunchController.{h,cpp} # Launch orchestration
+├── launch/ # Launch pipeline (LaunchTask, LaunchStep)
+├── minecraft/ # Minecraft-specific logic
+│ ├── Component.{h,cpp} # Version component
+│ ├── PackProfile.{h,cpp} # Component list model
+│ ├── MinecraftInstance.{h,cpp} # Concrete instance type
+│ ├── auth/ # Authentication subsystem
+│ ├── launch/ # Minecraft launch steps
+│ ├── mod/ # Mod management models
+│ ├── services/ # Mojang/Microsoft services
+│ └── update/ # Game update logic
+├── modplatform/ # CurseForge, Modrinth, ATL, FTB, Technic
+├── net/ # Network layer (Download, NetJob, cache)
+├── settings/ # Settings framework
+├── java/ # Java detection and validation
+├── ui/ # User interface
+│ ├── MainWindow.{h,cpp} # Main application window
+│ ├── InstanceWindow.{h,cpp} # Per-instance console window
+│ ├── themes/ # Theme system
+│ ├── pages/ # Settings/instance pages
+│ ├── dialogs/ # Modal dialogs
+│ ├── widgets/ # Custom widgets
+│ └── setupwizard/ # First-run wizard
+├── icons/ # Icon management
+├── meta/ # Metadata index (version lists)
+├── tasks/ # Task base class
+├── tools/ # External tool integration
+├── translations/ # i18n/l10n
+├── updater/ # Self-update system
+├── news/ # News feed
+├── notifications/ # Notification system
+├── screenshots/ # Screenshot management
+├── pathmatcher/ # File path matching utilities
+└── resources/ # Embedded resources (QRC)
+```
+
+### Subsystem Interactions
+
+The major data flow through MeshMC:
+
+1. **Startup** — `main()` creates `Application`, which initializes all subsystems: settings, network, accounts, instances, themes, translations, icons, metadata index, and analytics
+2. **Instance Discovery** — `InstanceList` scans the instances directory, loading instance metadata from `instance.cfg` files via `INISettingsObject`
+3. **User Interaction** — `MainWindow` presents the instance list via `InstanceView` (custom `QAbstractItemView`); user actions trigger instance operations
+4. **Launch Flow** — User clicks Launch → `LaunchController::executeTask()` → account selection → `MinecraftInstance::createLaunchTask()` → `LaunchTask` with ordered `LaunchStep` chain → game process spawned via `DirectJavaLaunch` or `MeshMCPartLaunch`
+5. **Mod Installation** — User browses mod platform → platform API query → `NetJob` download → mod file placed in instance mods directory → `ModFolderModel` updated
+
+## Versioning
+
+MeshMC follows semantic versioning. The current version is defined in the root `CMakeLists.txt`:
+
+```cmake
+set(MeshMC_VERSION_MAJOR 7)
+set(MeshMC_VERSION_MINOR 0)
+set(MeshMC_VERSION_HOTFIX 0)
+```
+
+The full version string is assembled as `7.0.0`. Git commit hash and tag information are captured at build time via `GetGitRevisionDescription.cmake` and embedded into the binary via `BuildConfig`.
+
+## Configuration Files
+
+MeshMC uses several configuration file formats:
+
+| File | Format | Purpose |
+|---|---|---|
+| `meshmc.cfg` | INI | Global application settings |
+| `instance.cfg` | INI | Per-instance settings |
+| `accounts.json` | JSON | Account credentials and tokens |
+| `instgroups.json` | JSON | Instance group assignments |
+| `metacache` | JSON | HTTP cache metadata |
+| `mmc-pack.json` | JSON | Component list (PackProfile) |
+| `patches/*.json` | JSON | Custom component overrides |
+
+## Licensing
+
+MeshMC is licensed under the **GNU General Public License v3.0 or later** (GPL-3.0-or-later). The project uses REUSE-compliant licensing headers. Some files incorporate code from MultiMC Contributors under the Apache License 2.0, which is GPL-compatible for combined works.
+
+The `REUSE.toml` file at the repository root and individual SPDX headers in each source file provide the authoritative licensing information.
+
+## Project URLs
+
+| Resource | URL |
+|---|---|
+| Source Code | `https://github.com/Project-Tick/Project-Tick` |
+| Bug Tracker | `https://github.com/Project-Tick/MeshMC/issues` |
+| Metadata Server | `https://meta.projecttick.org/` |
+| News Feed | `https://projecttick.org/product/meshmc/feed.xml` |
+| Application ID | `org.projecttick.MeshMC` |
+
+## Relationship to Other Launchers
+
+MeshMC descends architecturally from the MultiMC launcher codebase (the Apache 2.0-licensed portions). It has been significantly extended and maintained as an independent project by Project Tick. Key differences from upstream include:
+
+- Microsoft-only authentication (Mojang/legacy auth removed)
+- Dual-source update system (RSS + GitHub Releases)
+- CurseForge and Modrinth integration
+- Qt6 migration (from Qt5)
+- C++23 standard requirement
+- Custom branding, theming, and icon sets
+- Nix/flake-based build infrastructure
+- REUSE-compliant licensing
+- Built-in Java downloader
+
+## Next Steps
+
+For deeper understanding of MeshMC's internals, continue with:
+
+- [Architecture](architecture.md) — detailed code architecture and module interactions
+- [Building](building.md) — build instructions for all platforms
+- [Application Lifecycle](application-lifecycle.md) — startup, shutdown, and event loop
+- [Instance Management](instance-management.md) — instance storage, creation, and groups
+- [Component System](component-system.md) — version resolution and dependency management
+- [Launch System](launch-system.md) — process building and game execution
+- [Account Management](account-management.md) — Microsoft OAuth2 authentication
+- [UI System](ui-system.md) — Qt6 widget architecture
diff --git a/docs/handbook/meshmc/platform-support.md b/docs/handbook/meshmc/platform-support.md
new file mode 100644
index 0000000000..496fe07e32
--- /dev/null
+++ b/docs/handbook/meshmc/platform-support.md
@@ -0,0 +1,353 @@
+# Platform Support
+
+## Overview
+
+MeshMC targets Linux, macOS, and Windows with platform-specific build configurations, packaging, and runtime behavior. The CMake build system uses presets and conditional compilation to handle platform differences.
+
+## Build Presets
+
+| Preset | Platform | Compiler | Generator |
+|---|---|---|---|
+| `linux` | Linux (x86_64, aarch64) | GCC / Clang | Ninja Multi-Config |
+| `macos` | macOS (x86_64, arm64) | AppleClang | Ninja Multi-Config |
+| `windows_msvc` | Windows | MSVC | Ninja Multi-Config |
+| `windows_mingw` | Windows | MinGW-w64 | Ninja Multi-Config |
+
+Each preset is defined in `CMakePresets.json` and configures:
+- Compiler toolchain
+- vcpkg integration
+- Platform-specific CMake variables
+- Build/install directories
+
+## Linux
+
+### Build Requirements
+
+- CMake 3.28+
+- GCC 14+ or Clang 18+ (C++23 support)
+- Qt6 (Core, Widgets, Concurrent, Network, NetworkAuth, Test, Xml)
+- Extra CMake Modules (ECM) from KDE
+- libarchive, zlib, cmark, tomlplusplus
+
+### Nix Build
+
+MeshMC provides a `flake.nix` for reproducible builds:
+
+```bash
+nix build .#meshmc # Build release
+nix develop .#meshmc # Enter dev shell with all dependencies
+```
+
+### Desktop Integration
+
+CMake installs standard freedesktop files:
+
+```cmake
+# Application desktop entry
+install(FILES launcher/package/linux/org.projecttick.MeshMC.desktop
+ DESTINATION ${KDE_INSTALL_APPDIR})
+
+# AppStream metainfo
+install(FILES launcher/package/linux/org.projecttick.MeshMC.metainfo.xml
+ DESTINATION ${KDE_INSTALL_METAINFODIR})
+
+# MIME type for .meshmc files
+install(FILES launcher/package/linux/org.projecttick.MeshMC.mime.xml
+ DESTINATION ${KDE_INSTALL_MIMEDIR})
+
+# Application icons (various sizes)
+ecm_install_icons(ICONS
+ launcher/package/linux/16-apps-org.projecttick.MeshMC.png
+ launcher/package/linux/24-apps-org.projecttick.MeshMC.png
+ launcher/package/linux/32-apps-org.projecttick.MeshMC.png
+ launcher/package/linux/48-apps-org.projecttick.MeshMC.png
+ launcher/package/linux/64-apps-org.projecttick.MeshMC.png
+ launcher/package/linux/128-apps-org.projecttick.MeshMC.png
+ launcher/package/linux/256-apps-org.projecttick.MeshMC.png
+ launcher/package/linux/scalable-apps-org.projecttick.MeshMC.svg
+ DESTINATION ${KDE_INSTALL_ICONDIR}
+)
+```
+
+### Runtime Paths
+
+```cpp
+// KDE install directories used via ECM
+KDE_INSTALL_BINDIR → /usr/bin
+KDE_INSTALL_DATADIR → /usr/share
+KDE_INSTALL_APPDIR → /usr/share/applications
+KDE_INSTALL_ICONDIR → /usr/share/icons
+KDE_INSTALL_METAINFODIR → /usr/share/metainfo
+KDE_INSTALL_MIMEDIR → /usr/share/mime/packages
+```
+
+### RPATH
+
+```cmake
+# Set RPATH for installed binary
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+```
+
+Ensures bundled libraries are found at runtime without LD_LIBRARY_PATH.
+
+### Wayland/X11
+
+Qt6 handles Wayland and X11 transparently. MeshMC does not have platform-specific display code.
+
+## macOS
+
+### Build Requirements
+
+- CMake 3.28+
+- Xcode / AppleClang (C++23 support)
+- Qt6 via Homebrew or vcpkg
+- Same library dependencies as Linux
+
+### App Bundle
+
+CMake creates a standard macOS `.app` bundle:
+
+```cmake
+set_target_properties(meshmc PROPERTIES
+ MACOSX_BUNDLE TRUE
+ MACOSX_BUNDLE_INFO_PLIST "${CMAKE_SOURCE_DIR}/launcher/package/macos/Info.plist.in"
+ MACOSX_BUNDLE_BUNDLE_NAME "MeshMC"
+ MACOSX_BUNDLE_BUNDLE_VERSION "${MeshMC_VERSION_NAME}"
+ MACOSX_BUNDLE_GUI_IDENTIFIER "org.projecttick.MeshMC"
+ MACOSX_BUNDLE_ICON_FILE "meshmc.icns"
+ MACOSX_BUNDLE_SHORT_VERSION_STRING "${MeshMC_VERSION_NAME}"
+)
+```
+
+### Application Icon
+
+```cmake
+# Convert SVG to icns
+set(MACOSX_ICON "${CMAKE_SOURCE_DIR}/launcher/package/macos/meshmc.icns")
+set_source_files_properties(${MACOSX_ICON} PROPERTIES MACOSX_PACKAGE_LOCATION "Resources")
+```
+
+### Sparkle Updates
+
+macOS uses the Sparkle framework for auto-updates:
+
+```cmake
+if(APPLE)
+ find_library(SPARKLE_FRAMEWORK Sparkle)
+ if(SPARKLE_FRAMEWORK)
+ target_link_libraries(meshmc PRIVATE ${SPARKLE_FRAMEWORK})
+ endif()
+endif()
+```
+
+Sparkle provides:
+- Built-in update notification UI
+- Differential (delta) updates
+- Code signing verification
+- Automatic background checks
+
+### Universal Binary
+
+The build supports creating Universal binaries (x86_64 + arm64):
+
+```cmake
+# Set via CMake variable
+set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64")
+```
+
+### macOS-Specific Code
+
+```cpp
+#ifdef Q_OS_MACOS
+ // Set application properties for macOS integration
+ QApplication::setAttribute(Qt::AA_DontShowIconsInMenus);
+
+ // Handle macOS dock icon click
+ // Handle macOS file open events
+#endif
+```
+
+## Windows
+
+### Build Requirements
+
+#### MSVC
+- Visual Studio 2022 17.10+ (MSVC v143, C++23)
+- CMake 3.28+
+- Qt6 via vcpkg or installer
+- vcpkg for other dependencies
+
+#### MinGW
+- MinGW-w64 13+ (GCC 14+ for C++23)
+- CMake 3.28+
+- Qt6 built for MinGW
+
+### Windows Resource File
+
+```cmake
+if(WIN32)
+ # Application icon and version info
+ configure_file(
+ "${CMAKE_SOURCE_DIR}/launcher/package/windows/meshmc.rc.in"
+ "${CMAKE_BINARY_DIR}/meshmc.rc"
+ )
+ target_sources(meshmc PRIVATE "${CMAKE_BINARY_DIR}/meshmc.rc")
+endif()
+```
+
+The `.rc` file provides:
+- Application icon (embedded in `.exe`)
+- Version information (shown in file properties)
+- Product name and company
+
+### Application Manifest
+
+```cmake
+if(WIN32)
+ target_sources(meshmc PRIVATE
+ "${CMAKE_SOURCE_DIR}/launcher/package/windows/meshmc.manifest"
+ )
+endif()
+```
+
+The manifest declares:
+- DPI awareness (per-monitor DPI aware)
+- Requested execution level (asInvoker)
+- Common controls v6 (modern UI)
+- UTF-8 code page
+
+### NSIS Installer
+
+For creating Windows installers:
+
+```cmake
+if(WIN32)
+ # CPack NSIS configuration
+ set(CPACK_GENERATOR "NSIS")
+ set(CPACK_NSIS_DISPLAY_NAME "MeshMC")
+ set(CPACK_NSIS_PACKAGE_NAME "MeshMC")
+ set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/launcher/package/windows/meshmc.ico")
+ set(CPACK_NSIS_INSTALLED_ICON_NAME "meshmc.exe")
+ set(CPACK_NSIS_CREATE_ICONS_EXTRA
+ "CreateShortCut '$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\MeshMC.lnk' '$INSTDIR\\\\meshmc.exe'"
+ )
+endif()
+```
+
+### Windows Registry
+
+The installer registers:
+- File associations (`.meshmc` import files)
+- Start menu shortcuts
+- Uninstall information in Add/Remove Programs
+
+### WSL Rejection
+
+MeshMC detects and rejects running under WSL:
+
+```cpp
+#ifdef Q_OS_LINUX
+ // Check for WSL
+ QFile wslInterop("/proc/sys/fs/binfmt_misc/WSLInterop");
+ if (wslInterop.exists()) {
+ QMessageBox::critical(nullptr, "Unsupported Platform",
+ "MeshMC does not support running under WSL. "
+ "Please use the native Windows version.");
+ return 1;
+ }
+#endif
+```
+
+WSL cannot run Java GUI applications reliably, so MeshMC refuses to start.
+
+## Portable Mode
+
+All platforms support portable mode:
+
+```cpp
+// Check for portable marker file
+QFileInfo portableMarker(
+ QCoreApplication::applicationDirPath() + "/meshmc_portable.txt"
+);
+
+if (portableMarker.exists()) {
+ // Use application directory for all data
+ m_dataPath = QCoreApplication::applicationDirPath();
+} else {
+ // Use standard platform data directory
+ m_dataPath = QStandardPaths::writableLocation(
+ QStandardPaths::GenericDataLocation
+ ) + "/MeshMC";
+}
+```
+
+Create `meshmc_portable.txt` next to the binary to enable portable mode. All data (instances, settings, cache) will be stored alongside the executable.
+
+## OpSys Class
+
+Platform detection utility:
+
+```cpp
+class OpSys
+{
+public:
+ enum OS {
+ Os_Windows,
+ Os_Linux,
+ Os_OSX,
+ Os_Other
+ };
+
+ static OS currentSystem();
+ static QString currentSystemString();
+ static bool isLinux();
+ static bool isMacOS();
+ static bool isWindows();
+};
+```
+
+Used throughout the codebase for platform-conditional logic:
+- Library path resolution
+- Native path separators
+- Platform-specific launch arguments
+- Natives extraction (LWJGL)
+
+## Platform-Specific Native Libraries
+
+Minecraft requires platform-specific native libraries (LWJGL, OpenAL, etc.):
+
+```cpp
+// From Library class
+bool Library::isApplicable() const
+{
+ // Check OS rules
+ for (auto& rule : m_rules) {
+ if (rule.os.name == "linux" && OpSys::isLinux()) return rule.action == "allow";
+ if (rule.os.name == "osx" && OpSys::isMacOS()) return rule.action == "allow";
+ if (rule.os.name == "windows" && OpSys::isWindows()) return rule.action == "allow";
+ }
+ return true; // No rules = always applicable
+}
+```
+
+### Natives Classifiers
+
+```json
+{
+ "name": "org.lwjgl:lwjgl:3.3.3",
+ "natives": {
+ "linux": "natives-linux",
+ "osx": "natives-macos",
+ "windows": "natives-windows"
+ }
+}
+```
+
+## Data Directory Locations
+
+| Platform | Default Data Directory |
+|---|---|
+| Linux | `~/.local/share/MeshMC` |
+| macOS | `~/Library/Application Support/MeshMC` |
+| Windows | `%APPDATA%/MeshMC` |
+| Portable | `<binary_dir>/` |
diff --git a/docs/handbook/meshmc/release-notes.md b/docs/handbook/meshmc/release-notes.md
new file mode 100644
index 0000000000..02391fca2c
--- /dev/null
+++ b/docs/handbook/meshmc/release-notes.md
@@ -0,0 +1,222 @@
+# Release Notes
+
+## Versioning Scheme
+
+MeshMC follows a three-component version scheme:
+
+```
+MAJOR.MINOR.HOTFIX
+```
+
+Defined in the top-level `CMakeLists.txt`:
+
+```cmake
+set(MeshMC_VERSION_MAJOR 7)
+set(MeshMC_VERSION_MINOR 0)
+set(MeshMC_VERSION_HOTFIX 0)
+set(MeshMC_VERSION_NAME "${MeshMC_VERSION_MAJOR}.${MeshMC_VERSION_MINOR}.${MeshMC_VERSION_HOTFIX}")
+```
+
+| Component | When Incremented |
+|---|---|
+| **MAJOR** | Breaking changes, major feature overhauls |
+| **MINOR** | New features, significant improvements |
+| **HOTFIX** | Bug fixes, security patches |
+
+### Build Metadata
+
+Additional version metadata is generated at build time:
+
+```cmake
+set(MeshMC_BUILD_PLATFORM "${CMAKE_SYSTEM_NAME}") # Linux, Darwin, Windows
+set(MeshMC_GIT_COMMIT ...) # Git commit hash
+set(MeshMC_GIT_TAG ...) # Git tag if on a tag
+```
+
+The full version string displayed in the About dialog includes platform and git info.
+
+## Update System
+
+MeshMC uses a dual-source update checking system.
+
+### UpdateChecker
+
+```cpp
+class UpdateChecker : public QObject
+{
+ Q_OBJECT
+public:
+ void checkForUpdate(QString updateUrl, bool notifyNoUpdate);
+
+ bool hasNewUpdate() const;
+ QString getLatestVersion() const;
+ QString getDownloadUrl() const;
+ QString getChangelog() const;
+
+signals:
+ void updateAvailable(QString version, QString url, QString changelog);
+ void noUpdateAvailable();
+ void updateCheckFailed();
+
+private:
+ void parseRSSFeed(const QByteArray& data);
+ void parseGitHubRelease(const QJsonObject& release);
+
+ QString m_currentVersion;
+ QString m_latestVersion;
+ QString m_downloadUrl;
+ QString m_changelog;
+ bool m_hasUpdate = false;
+};
+```
+
+### Update Sources
+
+#### RSS Feed
+
+Primary update channel — an RSS/Atom feed listing available versions:
+
+```
+https://projecttick.org/feed/meshmc.xml
+```
+
+The feed contains:
+- Version number
+- Download URLs per platform
+- Changelog summary
+
+#### GitHub Releases
+
+Fallback/alternative update source using the GitHub Releases API:
+
+```
+https://api.github.com/repos/Project-Tick/MeshMC/releases/latest
+```
+
+Returns:
+- Tag name (version)
+- Release body (changelog in Markdown)
+- Asset download URLs
+
+### Update Check Flow
+
+```
+Application startup (if AutoUpdate enabled)
+ │
+ └── UpdateChecker::checkForUpdate()
+ │
+ ├── Fetch RSS feed
+ │ ├── Parse version from <item> entries
+ │ ├── Compare with current version
+ │ └── Extract download URL + changelog
+ │
+ ├── OR Fetch GitHub release
+ │ ├── Parse tag_name for version
+ │ ├── Compare with current version
+ │ └── Extract asset URL + body
+ │
+ ├── If new version available:
+ │ └── emit updateAvailable(version, url, changelog)
+ │ └── MainWindow shows update notification
+ │
+ └── If no update:
+ └── emit noUpdateAvailable()
+```
+
+### Update Settings
+
+```cpp
+m_settings->registerSetting("AutoUpdate", true);
+m_settings->registerSetting("UpdateChannel", "stable");
+```
+
+| Setting | Values | Description |
+|---|---|---|
+| `AutoUpdate` | `true`/`false` | Check for updates on startup |
+| `UpdateChannel` | `stable`/`beta` | Which release channel to follow |
+
+### Update Dialog
+
+When an update is available, `UpdateDialog` is shown:
+
+- Displays current version vs. available version
+- Shows changelog (rendered from Markdown via cmark)
+- "Update Now" button → opens download URL in browser
+- "Skip This Version" → suppresses notification for this version
+- "Remind Me Later" → dismisses until next startup
+
+### Sparkle (macOS)
+
+On macOS, updates are handled by the Sparkle framework instead of the built-in `UpdateChecker`:
+
+- Native macOS update UI
+- Differential updates (only download changed parts)
+- Code signature verification
+- Appcast feed (XML similar to RSS)
+- Background update installation
+
+```cpp
+#ifdef Q_OS_MACOS
+ // Use Sparkle for updates instead of built-in checker
+ SUUpdater* updater = [SUUpdater sharedUpdater];
+ [updater setAutomaticallyChecksForUpdates:autoUpdate];
+ [updater checkForUpdatesInBackground];
+#endif
+```
+
+## Distribution
+
+### Build Artifacts
+
+| Platform | Artifact | Format |
+|---|---|---|
+| Linux | AppImage | `.AppImage` |
+| Linux | Tarball | `.tar.gz` |
+| Linux | Nix | Flake output |
+| macOS | Disk Image | `.dmg` (app bundle) |
+| Windows | Installer | `.exe` (NSIS) |
+| Windows | Portable | `.zip` |
+
+### Release Branches
+
+| Branch | Purpose |
+|---|---|
+| `develop` | Active development |
+| `release-7.x` | Current stable release line |
+| Tagged releases | `v7.0.0`, `v7.0.1`, etc. |
+
+Backports from `develop` to `release-*` branches are automated via GitHub Actions (see [Contributing](contributing.md#backporting)).
+
+## Changelog Format
+
+Release changelogs follow this structure:
+
+```markdown
+## MeshMC 7.0.0
+
+### New Features
+- Feature description
+
+### Bug Fixes
+- Fix description
+
+### Internal Changes
+- Change description
+
+### Dependencies
+- Updated Qt to 6.7.x
+```
+
+## Current Version
+
+**MeshMC 7.0.0** — Initial release as MeshMC (fork of PrismLauncher/MultiMC).
+
+Key features:
+- C++23 codebase with Qt6
+- Microsoft Account authentication via OAuth2
+- Multi-platform support (Linux, macOS, Windows)
+- Instance management with component system
+- Mod management with CurseForge, Modrinth, ATLauncher, FTB, Technic integration
+- Customizable themes and icon packs
+- Managed Java downloads
+- Automatic update checking
diff --git a/docs/handbook/meshmc/settings-system.md b/docs/handbook/meshmc/settings-system.md
new file mode 100644
index 0000000000..7d746971d7
--- /dev/null
+++ b/docs/handbook/meshmc/settings-system.md
@@ -0,0 +1,402 @@
+# Settings System
+
+## Overview
+
+MeshMC uses a hierarchical settings system built on Qt's `QObject` infrastructure. Settings support default values, overrides, signal-based change notification, and per-instance customization through a gate/override pattern.
+
+## Architecture
+
+### Key Classes
+
+| Class | File | Purpose |
+|---|---|---|
+| `SettingsObject` | `settings/SettingsObject.{h,cpp}` | Abstract settings container |
+| `INISettingsObject` | `settings/INISettingsObject.{h,cpp}` | File-backed settings |
+| `INIFile` | `settings/INIFile.{h,cpp}` | QVariant map with file I/O |
+| `Setting` | `settings/Setting.{h,cpp}` | Individual setting entry |
+| `OverrideSetting` | `settings/OverrideSetting.{h,cpp}` | Per-instance override |
+| `PassthroughSetting` | `settings/PassthroughSetting.{h,cpp}` | Delegate to another setting |
+
+### Hierarchy
+
+```
+Global Settings (Application::settings())
+ │
+ ├── INISettingsObject backed by meshmc.cfg
+ ├── Contains defaults for all application settings
+ │
+ ├──→ Instance Settings (BaseInstance::settings())
+ │ │
+ │ ├── INISettingsObject backed by instance.cfg
+ │ ├── Uses OverrideSetting to selectively override globals
+ │ └── Unoverridden settings transparently fall through to global
+ │
+ └──→ MinecraftInstance adds additional Minecraft-specific settings
+```
+
+## SettingsObject
+
+The abstract base class for all settings containers:
+
+```cpp
+class SettingsObject : public QObject
+{
+ Q_OBJECT
+public:
+ virtual ~SettingsObject();
+
+ // Registration
+ std::shared_ptr<Setting> registerSetting(QStringList synonyms, QVariant defVal = QVariant());
+ std::shared_ptr<Setting> registerOverride(std::shared_ptr<Setting> original, std::shared_ptr<Setting> gate);
+ std::shared_ptr<Setting> registerPassthrough(std::shared_ptr<Setting> original, std::shared_ptr<Setting> gate);
+
+ // Access
+ std::shared_ptr<Setting> getSetting(const QString& id) const;
+ QVariant get(const QString& id) const;
+ bool set(const QString& id, QVariant value);
+ void reset(const QString& id) const;
+ bool contains(const QString& id);
+
+ bool reload();
+
+signals:
+ void settingChanged(const Setting& setting, QVariant value);
+ void settingReset(const Setting& setting);
+
+protected:
+ virtual void changeSetting(const Setting& setting, QVariant value) = 0;
+ virtual void resetSetting(const Setting& setting) = 0;
+ virtual QVariant retrieveValue(const Setting& setting) = 0;
+ virtual bool contains(const QString& id) const = 0;
+
+ QMap<QString, std::shared_ptr<Setting>> m_settings;
+};
+```
+
+### Registration Methods
+
+- **`registerSetting(synonyms, default)`** — registers a basic setting with optional synonyms for backward compatibility
+- **`registerOverride(original, gate)`** — registers a setting that overrides `original` when `gate` is `true`; otherwise falls through to `original`
+- **`registerPassthrough(original, gate)`** — registers a setting that always reads from the instance but writes only when `gate` is `true`
+
+## Setting
+
+Individual setting entries:
+
+```cpp
+class Setting : public QObject
+{
+ Q_OBJECT
+public:
+ Setting(QStringList synonyms, QVariant defVal);
+
+ // Value access
+ virtual QVariant get() const;
+ virtual QVariant defValue() const;
+ virtual void set(QVariant value);
+ virtual void reset();
+
+ // Identity
+ QString id() const;
+ QStringList synonyms() const;
+
+signals:
+ void SettingChanged(const Setting& setting, QVariant value);
+ void SettingReset(const Setting& setting);
+};
+```
+
+### Synonyms
+
+Settings can have multiple names for backward compatibility:
+
+```cpp
+s->registerSetting({"MinecraftWinWidth", "MCWindowWidth"}, 854);
+```
+
+The first synonym is the canonical ID. Lookups work with any synonym. This allows renaming settings without breaking existing `meshmc.cfg` files.
+
+## INISettingsObject
+
+File-backed implementation using `INIFile`:
+
+```cpp
+class INISettingsObject : public SettingsObject
+{
+ Q_OBJECT
+public:
+ explicit INISettingsObject(const QString& path, QObject* parent = 0);
+ explicit INISettingsObject(std::shared_ptr<INIFile> file, QObject* parent = 0);
+
+ bool reload() override;
+
+protected:
+ void changeSetting(const Setting& setting, QVariant value) override;
+ void resetSetting(const Setting& setting) override;
+ QVariant retrieveValue(const Setting& setting) override;
+
+ std::shared_ptr<INIFile> m_ini;
+};
+```
+
+### INIFile
+
+Simple key-value storage backed by a text file:
+
+```cpp
+class INIFile : public QMap<QString, QVariant>
+{
+public:
+ explicit INIFile(const QString& filename);
+
+ bool loadFile(const QString& fileName);
+ bool saveFile(const QString& fileName);
+
+ QVariant get(const QString& key, QVariant def) const;
+ void set(const QString& key, QVariant val);
+};
+```
+
+Format of `meshmc.cfg` / `instance.cfg`:
+```ini
+MinMemAlloc=512
+MaxMemAlloc=4096
+JavaPath=/usr/lib/jvm/java-21-openjdk/bin/java
+Language=en_US
+IconTheme=pe_colored
+LaunchMaximized=false
+```
+
+## Override System
+
+### OverrideSetting
+
+Used by instance settings to selectively override global settings:
+
+```cpp
+class OverrideSetting : public Setting
+{
+ Q_OBJECT
+public:
+ OverrideSetting(std::shared_ptr<Setting> other, std::shared_ptr<Setting> gate);
+
+ // Delegation logic:
+ // get() → if gate is true, return local value; else return other->get()
+ // set() → sets local value and sets gate to true
+ // reset() → resets local value and sets gate to false
+
+ bool isOverridden() const; // Returns gate value
+
+ virtual QVariant get() const override;
+ virtual void set(QVariant value) override;
+ virtual void reset() override;
+ virtual QVariant defValue() const override;
+
+private:
+ std::shared_ptr<Setting> m_other;
+ std::shared_ptr<Setting> m_gate;
+};
+```
+
+The **gate** setting is a boolean that determines whether the instance-local value or the global value is used. When the gate is `false`, the setting falls through to the global setting.
+
+### PassthroughSetting
+
+Similar to `OverrideSetting` but always reads from instance storage:
+
+```cpp
+class PassthroughSetting : public Setting
+{
+ Q_OBJECT
+public:
+ PassthroughSetting(std::shared_ptr<Setting> other, std::shared_ptr<Setting> gate);
+
+ // Always reads from instance storage
+ // Writes only when gate is true
+ // Falls through to other when gate is false for writes
+};
+```
+
+## Global Settings Registration
+
+In `Application.cpp`, global settings are registered:
+
+```cpp
+// Memory
+m_settings->registerSetting("MinMemAlloc", 512);
+m_settings->registerSetting("MaxMemAlloc", 4096);
+m_settings->registerSetting("PermGen", 128);
+
+// Java
+m_settings->registerSetting("JavaPath", "");
+m_settings->registerSetting("JvmArgs", "");
+m_settings->registerSetting("IgnoreJavaCompatibility", false);
+m_settings->registerSetting("IgnoreJavaWizard", false);
+
+// Window
+m_settings->registerSetting({"MinecraftWinWidth", "MCWindowWidth"}, 854);
+m_settings->registerSetting({"MinecraftWinHeight", "MCWindowHeight"}, 480);
+m_settings->registerSetting("LaunchMaximized", false);
+
+// Network/proxy
+m_settings->registerSetting("ProxyType", "None");
+m_settings->registerSetting("ProxyAddr", "127.0.0.1");
+m_settings->registerSetting("ProxyPort", 8080);
+m_settings->registerSetting("ProxyUser", "");
+m_settings->registerSetting("ProxyPass", "");
+
+// Console
+m_settings->registerSetting("ShowConsole", false);
+m_settings->registerSetting("AutoCloseConsole", false);
+m_settings->registerSetting("ShowConsoleOnError", true);
+m_settings->registerSetting("LogPrePostOutput", true);
+
+// Custom commands
+m_settings->registerSetting("PreLaunchCommand", "");
+m_settings->registerSetting("WrapperCommand", "");
+m_settings->registerSetting("PostExitCommand", "");
+
+// UI
+m_settings->registerSetting("IconTheme", "pe_colored");
+m_settings->registerSetting("ApplicationTheme", "system");
+m_settings->registerSetting("Language", "");
+
+// Updates
+m_settings->registerSetting("AutoUpdate", true);
+m_settings->registerSetting("UpdateChannel", "stable");
+
+// Analytics
+m_settings->registerSetting("Analytics", true);
+
+// Miscellaneous
+m_settings->registerSetting("InstSortMode", "Name");
+m_settings->registerSetting("SelectedInstance", "");
+m_settings->registerSetting("UpdateDialogGeometry", "");
+m_settings->registerSetting("CatStyle", "kitteh");
+```
+
+## Instance Settings Override
+
+`MinecraftInstance::settings()` creates override settings for per-instance customization:
+
+```cpp
+auto globalSettings = APPLICATION->settings();
+auto s = m_settings; // instance SettingsObject
+
+// Memory overrides
+s->registerOverride(globalSettings->getSetting("MinMemAlloc"), gate);
+s->registerOverride(globalSettings->getSetting("MaxMemAlloc"), gate);
+
+// Java overrides
+auto javaGate = s->registerSetting("OverrideJavaLocation", false);
+s->registerOverride(globalSettings->getSetting("JavaPath"), javaGate);
+
+auto jvmArgsGate = s->registerSetting("OverrideJvmArgs", false);
+s->registerOverride(globalSettings->getSetting("JvmArgs"), jvmArgsGate);
+
+// Window overrides
+auto windowGate = s->registerSetting("OverrideWindow", false);
+s->registerOverride(globalSettings->getSetting("MinecraftWinWidth"), windowGate);
+s->registerOverride(globalSettings->getSetting("MinecraftWinHeight"), windowGate);
+s->registerOverride(globalSettings->getSetting("LaunchMaximized"), windowGate);
+
+// Console overrides
+auto consoleGate = s->registerSetting("OverrideConsole", false);
+s->registerOverride(globalSettings->getSetting("ShowConsole"), consoleGate);
+s->registerOverride(globalSettings->getSetting("AutoCloseConsole"), consoleGate);
+s->registerOverride(globalSettings->getSetting("ShowConsoleOnError"), consoleGate);
+```
+
+### Gate Pattern
+
+Each category of overridable settings has its own gate setting:
+- `OverrideJavaLocation` — gates JavaPath
+- `OverrideJvmArgs` — gates JvmArgs
+- `OverrideMemory` — gates MinMemAlloc, MaxMemAlloc, PermGen
+- `OverrideWindow` — gates MinecraftWinWidth, MinecraftWinHeight, LaunchMaximized
+- `OverrideConsole` — gates ShowConsole, AutoCloseConsole, ShowConsoleOnError
+- `OverrideCommands` — gates PreLaunchCommand, WrapperCommand, PostExitCommand
+- `OverrideNativeWorkarounds` — gates UseNativeOpenAL, UseNativeGLFW
+
+In the UI, each category has a checkbox. Enabling the checkbox:
+1. Sets the gate to `true`
+2. Enables the corresponding UI fields
+3. Makes the setting read from instance storage instead of global
+
+## Settings UI
+
+### Global Settings Pages
+
+Settings UI is organized into pages, each a `BasePage` subclass:
+
+| Page | File | Settings |
+|---|---|---|
+| `MeshMCPage` | `pages/global/MeshMCPage.h` | Update channel, auto-update, analytics |
+| `MinecraftPage` | `pages/global/MinecraftPage.h` | Window size, maximize, console behavior |
+| `JavaPage` | `pages/global/JavaPage.h` | Java path, memory, JVM args |
+| `LanguagePage` | `pages/global/LanguagePage.h` | UI language selection |
+| `ProxyPage` | `pages/global/ProxyPage.h` | Network proxy settings |
+| `ExternalToolsPage` | `pages/global/ExternalToolsPage.h` | Profiler, editor paths |
+| `PasteEEPage` | `pages/global/PasteEEPage.h` | Paste service configuration |
+| `CustomCommandsPage` | `pages/global/CustomCommandsPage.h` | Pre/post/wrapper commands |
+| `AppearancePage` | `pages/global/AppearancePage.h` | Theme, icon theme, cat style |
+
+### Instance Settings Pages
+
+Instance setting pages mirror global pages but include the override checkboxes:
+
+| Instance Page | Override Gate |
+|---|---|
+| Instance Java settings | `OverrideJavaLocation`, `OverrideJvmArgs`, `OverrideMemory` |
+| Instance window settings | `OverrideWindow` |
+| Instance console settings | `OverrideConsole` |
+| Instance custom commands | `OverrideCommands` |
+
+## Settings File Locations
+
+| File | Location | Content |
+|---|---|---|
+| `meshmc.cfg` | Data directory root | Global settings |
+| `instance.cfg` | Instance directory | Per-instance settings + overrides |
+| `accounts.json` | Data directory root | Account data (see Account Management) |
+| `metacache/` | Data directory root | HTTP cache metadata |
+
+## Change Notification
+
+Settings use Qt signals for reactive updates:
+
+```cpp
+// Connect to a specific setting change
+connect(APPLICATION->settings().get(), &SettingsObject::settingChanged,
+ this, [](const Setting& setting, QVariant value) {
+ if (setting.id() == "Language") {
+ // Reload translations
+ }
+ });
+
+// Settings emit when changed
+app->settings()->set("Language", "de_DE");
+// → emits settingChanged(Setting("Language"), "de_DE")
+```
+
+## Data Path Resolution
+
+The settings data path is determined at startup in `Application::Application()`:
+
+1. **Portable mode**: If `meshmc_portable.txt` exists next to the binary, data lives alongside the binary
+2. **Standard paths**: Otherwise uses `QStandardPaths::writableLocation(QStandardPaths::GenericDataLocation) + "/MeshMC"`
+3. **CLI override**: `--dir <path>` overrides the data directory
+
+The data directory contains:
+```
+<data_dir>/
+├── meshmc.cfg # Global settings
+├── accounts.json # Account storage
+├── instances/ # Instance directories
+├── icons/ # Custom icons
+├── themes/ # Custom themes
+├── translations/ # Translation files
+├── metacache/ # HTTP cache
+├── logs/ # Application logs
+└── java/ # Managed Java installations
+```
diff --git a/docs/handbook/meshmc/theme-system.md b/docs/handbook/meshmc/theme-system.md
new file mode 100644
index 0000000000..be60a3b817
--- /dev/null
+++ b/docs/handbook/meshmc/theme-system.md
@@ -0,0 +1,417 @@
+# Theme System
+
+## Overview
+
+MeshMC supports application-wide theming through a `ThemeManager` that manages both visual themes (widget styling, colors) and icon themes. Themes can be built-in or user-provided via the `themes/` directory.
+
+## Architecture
+
+### Key Classes
+
+| Class | File | Purpose |
+|---|---|---|
+| `ThemeManager` | `ui/themes/ThemeManager.{h,cpp}` | Theme registry and lifecycle |
+| `ITheme` | `ui/themes/ITheme.{h,cpp}` | Abstract theme interface |
+| `BrightTheme` | `ui/themes/BrightTheme.{h,cpp}` | Built-in light theme |
+| `DarkTheme` | `ui/themes/DarkTheme.{h,cpp}` | Built-in dark theme |
+| `FusionTheme` | `ui/themes/FusionTheme.{h,cpp}` | Qt Fusion-based theme |
+| `SystemTheme` | `ui/themes/SystemTheme.{h,cpp}` | OS-native theme |
+| `CustomTheme` | `ui/themes/CustomTheme.{h,cpp}` | User-defined theme |
+| `CatPack` | `ui/themes/CatPack.{h,cpp}` | Cat background customization |
+
+## ITheme Interface
+
+All themes implement the `ITheme` interface:
+
+```cpp
+class ITheme
+{
+public:
+ virtual ~ITheme() {}
+
+ // Identity
+ virtual QString id() = 0;
+ virtual QString name() = 0;
+
+ // Application
+ virtual void apply(bool initial);
+
+ // Qt integration
+ virtual bool hasStyleSheet() = 0;
+ virtual QString appStyleSheet() = 0;
+ virtual QString qtTheme() = 0;
+
+ // Colors
+ virtual Qt::ColorScheme colorScheme() = 0;
+ virtual QPalette colorScheme(QPalette basePalette);
+ virtual double fadeAmount() = 0;
+ virtual QColor fadeColor() = 0;
+
+ // Badges
+ virtual QString postprocessSVG(QString svg);
+
+ // Tooltip colors
+ virtual QColor tooltipBackground();
+ virtual QColor tooltipForeground();
+};
+```
+
+### apply()
+
+The `apply()` method is called when activating a theme:
+1. Sets `QApplication::setStyle()` based on `qtTheme()`
+2. Sets the palette via `QApplication::setPalette()`
+3. Applies stylesheet from `appStyleSheet()` if `hasStyleSheet()` is true
+4. Emits color scheme change for Qt6 integration
+
+### Color Scheme
+
+`colorScheme()` returns `Qt::ColorScheme::Light` or `Qt::ColorScheme::Dark`, used by Qt6 to adjust native widget rendering.
+
+### Fade
+
+`fadeAmount()` and `fadeColor()` control the disabled-state appearance of instances in the grid view:
+- `fadeColor()` — base color for fade overlay (typically background color)
+- `fadeAmount()` — opacity of the fade (0.0 = no fade, 1.0 = fully faded)
+
+## Built-in Themes
+
+### SystemTheme
+
+Uses the OS-provided widget style and colors:
+
+```cpp
+class SystemTheme : public ITheme {
+public:
+ QString id() override { return "system"; }
+ QString name() override { return QObject::tr("System"); }
+ bool hasStyleSheet() override { return false; }
+ QString qtTheme() override { return QStyleFactory::keys().first(); }
+ Qt::ColorScheme colorScheme() override { return Qt::ColorScheme::Unknown; }
+};
+```
+
+### BrightTheme
+
+A clean light theme with custom palette:
+
+```cpp
+class BrightTheme : public FusionTheme {
+public:
+ QString id() override { return "bright"; }
+ QString name() override { return QObject::tr("Bright"); }
+ Qt::ColorScheme colorScheme() override { return Qt::ColorScheme::Light; }
+ bool hasStyleSheet() override { return true; }
+ // Custom color palette with light backgrounds and dark text
+};
+```
+
+### DarkTheme
+
+A dark theme for low-light environments:
+
+```cpp
+class DarkTheme : public FusionTheme {
+public:
+ QString id() override { return "dark"; }
+ QString name() override { return QObject::tr("Dark"); }
+ Qt::ColorScheme colorScheme() override { return Qt::ColorScheme::Dark; }
+ bool hasStyleSheet() override { return true; }
+ // Custom color palette with dark backgrounds and light text
+};
+```
+
+### FusionTheme
+
+Base class for Bright and Dark themes, using Qt's Fusion style:
+
+```cpp
+class FusionTheme : public ITheme {
+public:
+ QString qtTheme() override { return "Fusion"; }
+ // Shared Fusion-based styling logic
+};
+```
+
+## CustomTheme
+
+User-provided themes loaded from the `themes/` directory:
+
+```cpp
+class CustomTheme : public ITheme {
+public:
+ CustomTheme(ITheme* baseTheme, const QString& folder);
+
+ QString id() override;
+ QString name() override;
+ bool hasStyleSheet() override;
+ QString appStyleSheet() override;
+ Qt::ColorScheme colorScheme() override;
+
+private:
+ ITheme* m_baseTheme; // Fallback theme
+ QString m_id;
+ QString m_name;
+ QString m_styleSheet;
+ QString m_folder;
+ // Custom palette overrides
+};
+```
+
+### Custom Theme Format
+
+A custom theme is a directory in `themes/` containing:
+
+```
+themes/my-theme/
+├── theme.json # Theme metadata and palette
+└── themeStyle.css # Qt stylesheet (optional)
+```
+
+#### theme.json
+
+```json
+{
+ "name": "My Custom Theme",
+ "baseTheme": "dark",
+ "colors": {
+ "Window": "#1a1b26",
+ "WindowText": "#c0caf5",
+ "Base": "#16161e",
+ "AlternateBase": "#1a1b26",
+ "ToolTipBase": "#1a1b26",
+ "ToolTipText": "#c0caf5",
+ "Text": "#c0caf5",
+ "Button": "#24283b",
+ "ButtonText": "#c0caf5",
+ "BrightText": "#ff0000",
+ "Link": "#7aa2f7",
+ "Highlight": "#3d59a1",
+ "HighlightedText": "#c0caf5",
+ "fadeColor": "#1a1b26",
+ "fadeAmount": 0.5
+ }
+}
+```
+
+- `baseTheme` — fallback theme ID (`bright`, `dark`, `system`)
+- `colors` — QPalette color role overrides (any role not specified falls back to base theme)
+- `fadeColor` / `fadeAmount` — instance fade styling
+
+#### themeStyle.css
+
+Optional Qt stylesheet for fine-grained control:
+
+```css
+QMainWindow {
+ background-color: #1a1b26;
+}
+
+QToolBar {
+ background-color: #24283b;
+ border: none;
+}
+
+QPushButton {
+ background-color: #3d59a1;
+ color: #c0caf5;
+ border: 1px solid #565f89;
+ border-radius: 4px;
+ padding: 4px 12px;
+}
+
+QPushButton:hover {
+ background-color: #7aa2f7;
+}
+```
+
+## ThemeManager
+
+The central theme registry:
+
+```cpp
+class ThemeManager : public QObject
+{
+ Q_OBJECT
+public:
+ ThemeManager();
+
+ QList<ITheme*> getValidApplicationThemes();
+ ITheme* getTheme(const QString& themeId);
+ void applyCurrentlySelectedTheme(bool initial = false);
+ void setApplicationTheme(const QString& name, bool initial = false);
+
+ // Icon themes
+ void applyCurrentlySelectedIconTheme();
+ void setIconTheme(const QString& name);
+ QList<IconThemeEntry> getValidIconThemes();
+
+ // Cat packs
+ void setCatPack(const QString& name);
+ QList<CatPack*> getValidCatPacks();
+ CatPack* getCatPack(const QString& name);
+
+private:
+ void initializeThemes();
+ void initializeIcons();
+ void initializeCatPacks();
+
+ QMap<QString, ITheme*> m_themes;
+ QMap<QString, IconThemeEntry> m_iconThemes;
+ QMap<QString, CatPack*> m_catPacks;
+ ITheme* m_currentTheme = nullptr;
+};
+```
+
+### Initialization
+
+On startup, `ThemeManager` discovers:
+1. **Built-in themes**: System, Bright, Dark (always available)
+2. **Custom themes**: Scans `themes/` directory for `theme.json` files, creates `CustomTheme` instances
+3. **Icon themes**: Scans for icon theme directories
+4. **Cat packs**: Scans for cat background packs
+
+### Theme Application
+
+```cpp
+void ThemeManager::setApplicationTheme(const QString& name, bool initial)
+{
+ auto theme = m_themes.value(name);
+ if (!theme)
+ theme = m_themes.value("system"); // Fallback
+
+ m_currentTheme = theme;
+ theme->apply(initial);
+}
+```
+
+## Icon Theme System
+
+### IconThemeEntry
+
+```cpp
+struct IconThemeEntry {
+ QString id;
+ QString name;
+ QString path;
+};
+```
+
+### Built-in Icon Themes
+
+| ID | Name | Description |
+|---|---|---|
+| `pe_colored` | PE Colored | Colorful flat icons (default) |
+| `pe_dark` | PE Dark | Dark variant |
+| `pe_light` | PE Light | Light variant |
+| `pe_blue` | PE Blue | Blue-tinted variant |
+| `OSX` | OSX | macOS-style icons |
+| `iOS` | iOS | iOS-style icons |
+| `flat` | Flat | Minimal flat icons |
+| `flat_white` | Flat White | White flat icons |
+| `multimc` | MultiMC | Classic MultiMC icons |
+| `custom` | Custom | User-provided icons |
+
+### Icon Resolution
+
+Icons are resolved through Qt's resource system and theme hierarchy:
+
+```cpp
+// Standard icon lookup
+QIcon::fromTheme("instances/creeper")
+
+// Falls back through:
+// 1. Current icon theme directory
+// 2. Default (pe_colored) theme
+// 3. Built-in Qt resources
+```
+
+### Custom Instance Icons
+
+Users can set custom icons per-instance:
+- Icons stored in `icons/` directory in the data path
+- PNG, SVG, ICO formats supported
+- `IconPickerDialog` provides selection UI
+- Custom icons override theme icons
+
+## CatPack System
+
+CatPack provides the cat/background image shown in the main window:
+
+```cpp
+class CatPack {
+public:
+ virtual ~CatPack() {}
+
+ virtual QString id() = 0;
+ virtual QString name() = 0;
+
+ virtual QDate startDate();
+ virtual QDate endDate();
+
+ virtual QString path();
+};
+```
+
+### Built-in Cat Packs
+
+- **kitteh** — Standard cat background
+- **rory** — Rory the cat
+
+Cat packs can have date ranges for seasonal variants.
+
+### Custom Cat Packs
+
+User-provided cat packs in `catpacks/`:
+
+```
+catpacks/my-cats/
+├── catpack.json
+└── images/
+ ├── default.png
+ ├── christmas.png # Dec 24 - Dec 26
+ └── halloween.png # Oct 31 - Nov 1
+```
+
+```json
+{
+ "name": "My Cats",
+ "default": "images/default.png",
+ "variants": [
+ {
+ "startDate": "12-24",
+ "endDate": "12-26",
+ "path": "images/christmas.png"
+ }
+ ]
+}
+```
+
+## SVG Post-Processing
+
+Themes can customize SVG icons at runtime:
+
+```cpp
+QString ITheme::postprocessSVG(QString svg)
+{
+ // Replace placeholder colors in SVG with theme colors
+ svg.replace("%%BADGE_COLOR%%", badgeColor().name());
+ svg.replace("%%BADGE_TEXT%%", badgeTextColor().name());
+ return svg;
+}
+```
+
+This allows icon badges (e.g., update counts, status indicators) to adapt to the current theme's color scheme.
+
+## Theme Settings Integration
+
+Theme selection is stored in global settings:
+
+```cpp
+// Settings registration (Application.cpp)
+m_settings->registerSetting("ApplicationTheme", "system");
+m_settings->registerSetting("IconTheme", "pe_colored");
+m_settings->registerSetting("CatStyle", "kitteh");
+```
+
+The `AppearancePage` in global settings provides the UI for theme selection with live preview.
diff --git a/docs/handbook/meshmc/ui-system.md b/docs/handbook/meshmc/ui-system.md
new file mode 100644
index 0000000000..731cc68d54
--- /dev/null
+++ b/docs/handbook/meshmc/ui-system.md
@@ -0,0 +1,511 @@
+# UI System
+
+## Overview
+
+MeshMC's user interface is built on Qt6 Widgets. The UI follows a page-based navigation model, with a main window hosting instance management, and dialog-based workflows for settings, instance configuration, and account management.
+
+## Architecture
+
+### Key Classes
+
+| Class | File | Purpose |
+|---|---|---|
+| `MainWindow` | `ui/MainWindow.{h,cpp}` | Primary application window |
+| `InstanceWindow` | `ui/InstanceWindow.{h,cpp}` | Per-instance console window |
+| `PageDialog` | `ui/dialogs/PageDialog.{h,cpp}` | Dialog for page navigation |
+| `PageContainer` | `ui/pages/PageContainer.{h,cpp}` | Page management container |
+| `BasePage` | `ui/pages/BasePage.h` | Abstract page interface |
+| `InstanceView` | `ui/instanceview/InstanceView.{h,cpp}` | Grid/list instance view |
+| `WizardDialog` | `ui/setupwizard/SetupWizard.{h,cpp}` | First-run setup wizard |
+
+## MainWindow
+
+The central window of the application:
+
+```cpp
+class MainWindow : public QMainWindow
+{
+ Q_OBJECT
+public:
+ explicit MainWindow(QWidget* parent = 0);
+ ~MainWindow();
+
+ // Instance management
+ void setSelectedInstanceById(const QString& id);
+
+ // Window state
+ void checkInstancePathForProblems();
+
+public slots:
+ void instanceActivated(QModelIndex);
+ void instanceChanged(const QModelIndex& current, const QModelIndex& previous);
+
+ // Toolbar actions
+ void on_actionAddInstance_triggered();
+ void on_actionViewSelectedInstFolder_triggered();
+ void on_actionViewSelectedMCFolder_triggered();
+ void on_actionCopyInstance_triggered();
+ void on_actionDeleteInstance_triggered();
+ void on_actionExportInstance_triggered();
+ void on_actionLaunchInstance_triggered();
+ void on_actionLaunchInstanceOffline_triggered();
+ void on_actionKillInstance_triggered();
+
+ // Global actions
+ void on_actionSettings_triggered();
+ void on_actionManageAccounts_triggered();
+ void on_actionAbout_triggered();
+ void on_actionCAT_triggered();
+
+ // Group management
+ void on_actionRenameGroup_triggered();
+ void on_actionDeleteGroup_triggered();
+
+private:
+ // UI components
+ Ui::MainWindow* ui;
+ InstanceView* view;
+ QToolBar* instanceToolbar;
+ QToolBar* newsToolbar;
+ StatusLabel* m_statusLeft;
+ StatusLabel* m_statusCenter;
+ QMenu* accountMenu;
+ QMenu* skinMenu;
+
+ // Models
+ InstanceProxyModel* proxymodel;
+ GroupView* groupView;
+
+ // State
+ MinecraftAccountPtr m_selectedAccount;
+ BaseInstance::Ptr m_selectedInstance;
+};
+```
+
+### Toolbar Layout
+
+The main window has two toolbars:
+
+**Main Toolbar:**
+| Action | Shortcut | Description |
+|---|---|---|
+| Add Instance | Ctrl+N | Opens NewInstanceDialog |
+| Folders | — | Dropdown: instances, central mods, skins |
+| Settings | — | Opens global settings dialog |
+| Help | — | About, bug report, wiki, Discord |
+| Update | — | Check for updates (when available) |
+
+**Instance Toolbar** (shown when an instance is selected):
+| Action | Description |
+|---|---|
+| Launch | Start the selected instance |
+| Launch Offline | Start without authentication |
+| Kill | Force-stop a running instance |
+| Edit Instance | Open instance settings |
+| Edit Mods | Open mod management page |
+| View Folder | Open instance folder in file manager |
+| Copy Instance | Duplicate the instance |
+| Delete | Delete the instance |
+| Export | Export instance as zip/mrpack |
+
+### Account Selector
+
+The account selector is a dropdown menu in the toolbar:
+- Shows current default account name + skin icon
+- Lists all accounts with switch option
+- "Manage Accounts..." opens global account settings
+- "No Default Account" option
+
+### Instance View
+
+`InstanceView` displays instances in a grid with grouping:
+
+```cpp
+class InstanceView : public QAbstractItemView
+{
+ Q_OBJECT
+public:
+ void setModel(QAbstractItemModel* model) override;
+
+ // View modes
+ void setIconSize(QSize size);
+
+signals:
+ void droppedURLs(QList<QUrl> urls);
+
+protected:
+ void paintEvent(QPaintEvent* event) override;
+ void mousePressEvent(QMouseEvent* event) override;
+ void mouseDoubleClickEvent(QMouseEvent* event) override;
+ void dragEnterEvent(QDragEnterEvent* event) override;
+ void dropEvent(QDropEvent* event) override;
+};
+```
+
+Features:
+- Custom grid layout with grouped headings
+- Drag-and-drop support for instance import (zip/mrpack files)
+- Custom icon rendering with play-time overlay
+- Context menu with all instance actions
+- Group collapse/expand
+
+## Page System
+
+### BasePage Interface
+
+All settings and configuration pages implement `BasePage`:
+
+```cpp
+class BasePage : public QWidget
+{
+public:
+ virtual ~BasePage() {}
+
+ virtual QString id() const = 0;
+ virtual QString displayName() const = 0;
+ virtual QIcon icon() const = 0;
+ virtual QString helpPage() const { return QString(); }
+ virtual bool shouldDisplay() const { return true; }
+
+ virtual void opened() {}
+ virtual void closed() {}
+ virtual bool apply() { return true; }
+
+ virtual bool isOpened() const { return m_isOpened; }
+
+protected:
+ bool m_isOpened = false;
+};
+```
+
+### PageContainer
+
+Manages page navigation with a tree-based sidebar:
+
+```cpp
+class PageContainer : public QWidget
+{
+ Q_OBJECT
+public:
+ PageContainer(BasePageProvider* pages, QString defaultId = QString(),
+ QWidget* parent = nullptr);
+
+ void setPageProvider(BasePageProvider* pages);
+ BasePage* getPage(const QString& id);
+ const QList<BasePage*>& getPages() const;
+
+ void selectPage(const QString& id);
+
+private:
+ QTreeView* m_pageList;
+ QStackedWidget* m_pageStack;
+ QList<BasePage*> m_pages;
+};
+```
+
+### PageDialog
+
+Wraps a `PageContainer` in a dialog:
+
+```cpp
+class PageDialog : public QDialog
+{
+ Q_OBJECT
+public:
+ PageDialog(BasePageProvider* pages, QString title = QString(),
+ QWidget* parent = nullptr);
+
+ void accept() override;
+
+private:
+ PageContainer* m_container;
+};
+```
+
+Used for both global settings and instance settings.
+
+## Global Settings Pages
+
+### MeshMCPage
+- Update channel selection (stable/beta)
+- Auto-update toggle
+- Analytics opt-in/out
+- Instance sort mode
+
+### MinecraftPage
+- Default window dimensions (width × height)
+- Launch maximized toggle
+- Console visibility settings
+- Performance settings
+
+### JavaPage
+- Java path (manual or auto-detect)
+- Memory allocation (min/max heap)
+- JVM arguments
+- Java compatibility warnings toggle
+- "Auto-detect" button triggers `JavaUtils::FindJavaPaths()`
+- "Test" button verifies selected Java installation
+
+### LanguagePage
+- Language selector (populated from `translations/`)
+- Live preview of selected language
+
+### ProxyPage
+- Proxy type: None / SOCKS5 / HTTP
+- Proxy address, port
+- Authentication (username/password)
+
+### ExternalToolsPage
+- Profiler paths (JProfiler, JVisualVM, MCEdit)
+- Custom editor path
+
+### AccountListPage
+- Account list with status indicators
+- Add/Remove/Set Default/Refresh buttons
+- Skin preview panel
+
+### PasteEEPage
+- Paste service URL configuration
+
+### CustomCommandsPage
+- Pre-launch command
+- Wrapper command
+- Post-exit command
+
+### AppearancePage
+- Application theme selector
+- Icon theme selector
+- Cat style (cat/kitteh)
+
+## Instance Pages
+
+When editing an instance (`PageDialog` with instance pages):
+
+### VersionPage
+- Component list (Minecraft version, mod loaders)
+- Add/Remove/Change version of components
+- Component ordering (move up/down)
+
+### ModFolderPage
+- Mod list with enable/disable toggles
+- Add from file / Add from CurseForge / Add from Modrinth
+- Remove selected mods
+- View mod details
+
+### LogPage
+- Live log output from running instance
+- Search/filter log content
+- Upload to paste service
+- Copy to clipboard
+- Auto-scroll toggle
+
+### InstanceSettingsPage
+- Java override (checkbox + path)
+- Memory override (checkbox + min/max)
+- JVM args override
+- Window size override
+- Console settings override
+- Custom commands override
+
+### WorldListPage
+- List of worlds with metadata
+- Backup/Restore/Delete operations
+- Datapacks submenu
+
+### ScreenshotsPage
+- Grid view of instance screenshots
+- Open in file manager
+- Delete selected
+- Upload to Imgur
+
+### ResourcePackPage / TexturePackPage
+- Resource/texture pack list
+- Add/Remove
+- Enable/disable
+
+### NotesPage
+- Free-text notes field per instance
+
+### ServersPage
+- Server list for the instance
+- Add/Edit/Remove servers
+
+## Dialogs
+
+### NewInstanceDialog
+
+Multi-tab dialog for creating new instances:
+
+| Tab | Source |
+|---|---|
+| Vanilla | Select Minecraft version |
+| Import | Import from zip/mrpack URL or file |
+| CurseForge | Browse CurseForge modpacks |
+| Modrinth | Browse Modrinth modpacks |
+| ATLauncher | ATLauncher pack listing |
+| FTB | FTB pack listing |
+| Technic | Technic Platform packs |
+
+Each tab provides search, filtering, and version selection. On confirmation, the appropriate import/creation task is started.
+
+### CopyInstanceDialog
+
+Options for duplicating an instance:
+- New name
+- New group assignment
+- Copy saves (toggle)
+- Keep play time (toggle)
+
+### ExportInstanceDialog
+
+Export format selection and file exclusion:
+- Format: MeshMC zip / Modrinth mrpack / CurseForge manifest
+- File tree with checkboxes for selective export
+- Exclusion filter patterns
+
+### MSALoginDialog
+
+Microsoft account login flow (see Account Management):
+- Displays login URL with copy button
+- Shows authentication progress
+- Error display on failure
+
+### ProfileSelectDialog
+
+Account selection when no default is set:
+- Account list with radio selection
+- "Use selected" / Cancel
+
+### ProfileSetupDialog
+
+First-time Minecraft profile setup:
+- Username entry
+- Validates username availability
+- Creates profile via Mojang API
+
+### SkinUploadDialog
+
+Skin upload interface:
+- File picker for skin PNG
+- Variant selector (classic/slim)
+- Preview
+
+### AboutDialog
+
+Application information:
+- Version, build info, Qt version
+- License (GPL-3.0-or-later)
+- Credits and contributors
+
+### UpdateDialog
+
+Update notification:
+- Version comparison
+- Changelog display
+- Update / Skip buttons
+
+## Setup Wizard
+
+`SetupWizard` runs on first launch or when required:
+
+```cpp
+class SetupWizard : public QWizard
+{
+ Q_OBJECT
+public:
+ SetupWizard(QWidget* parent = nullptr);
+
+ // Pages added conditionally
+ void addLanguagePage();
+ void addJavaPage();
+ void addAnalyticsPage();
+ void addPasteEEPage();
+};
+```
+
+Pages are added based on what needs configuration:
+- **LanguagePage** — if no language is set
+- **JavaPage** — if no valid Java is detected
+- **AnalyticsPage** — if analytics consent is not recorded
+- **PasteEEPage** — if paste service is not configured
+
+## Widget Components
+
+### StatusLabel
+
+Custom label widget for the status bar:
+- Elides long text
+- Supports click-to-copy
+- Used for status area in MainWindow
+
+### InstanceDelegate
+
+Custom item delegate for `InstanceView`:
+- Renders instance icon, name, and status
+- Shows play time overlay
+- Running state indicator
+
+### ProgressWidget
+
+Shared progress display widget:
+- Progress bar with percentage
+- Status text label
+- Cancel button
+- Used by download dialogs, import tasks, etc.
+
+### IconPickerDialog
+
+Instance icon selection:
+- Built-in icon library
+- Custom icon upload
+- Icon theme integration
+
+## Event Handling
+
+### Instance Double-Click
+
+```
+MainWindow::instanceActivated(QModelIndex)
+ │
+ ├── If instance is running → open InstanceWindow
+ └── If instance is stopped → launch instance
+```
+
+### Instance Launch
+
+```
+MainWindow::on_actionLaunchInstance_triggered()
+ │
+ ├── Resolve account (default or prompt with ProfileSelectDialog)
+ ├── Create LaunchController
+ │ ├── Set instance, account, main window
+ │ └── connect succeeded/failed signals
+ └── LaunchController::start()
+```
+
+### Drag-and-Drop Import
+
+```
+InstanceView::dropEvent(QDropEvent)
+ │
+ ├── Extract URLs from QMimeData
+ ├── Filter for .zip, .mrpack files
+ └── For each URL → open NewInstanceDialog with import tab pre-selected
+```
+
+## Window Management
+
+`Application` manages window lifecycle:
+
+```cpp
+// Track open windows
+QList<QWidget*> m_openWindows;
+
+// Show instance window
+void Application::showInstanceWindow(InstancePtr instance);
+
+// Main window
+MainWindow* Application::showMainWindow();
+```
+
+Multiple windows can be open simultaneously (main window + instance console windows). The application exits when all windows are closed.
diff --git a/docs/handbook/meta/architecture.md b/docs/handbook/meta/architecture.md
new file mode 100644
index 0000000000..6790a82fe5
--- /dev/null
+++ b/docs/handbook/meta/architecture.md
@@ -0,0 +1,624 @@
+# Meta — Architecture
+
+## Module Structure
+
+Meta is organized as a standard Python package with three sub-packages, each serving a distinct role in the pipeline:
+
+```
+meta/
+├── __init__.py # Package marker: """Meta package of meta"""
+├── common/ # Shared utilities, constants, and static data
+├── model/ # Pydantic data models for all upstream formats
+└── run/ # Executable scripts (update_*, generate_*, index)
+```
+
+### Dependency Flow
+
+```
+run/ ──depends-on──► model/ ──depends-on──► common/
+run/ ──depends-on──► common/
+```
+
+The `run/` modules import from both `model/` and `common/`. The `model/` package imports from `common/` for shared constants and utilities. There are **no** circular dependencies.
+
+---
+
+## The `common/` Package
+
+### `common/__init__.py` — Core Utilities
+
+This module provides the foundational infrastructure used by every other module:
+
+#### Path Resolution
+
+Three functions resolve working directories from environment variables with filesystem fallbacks:
+
+```python
+def cache_path():
+ if "META_CACHE_DIR" in os.environ:
+ return os.environ["META_CACHE_DIR"]
+ return "cache"
+
+def launcher_path():
+ if "META_LAUNCHER_DIR" in os.environ:
+ return os.environ["META_LAUNCHER_DIR"]
+ return "launcher"
+
+def upstream_path():
+ if "META_UPSTREAM_DIR" in os.environ:
+ return os.environ["META_UPSTREAM_DIR"]
+ return "upstream"
+```
+
+#### Directory Creation
+
+```python
+def ensure_upstream_dir(path):
+ """Create a subdirectory under the upstream root."""
+ path = os.path.join(upstream_path(), path)
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+def ensure_component_dir(component_id: str):
+ """Create a component directory under the launcher root."""
+ path = os.path.join(launcher_path(), component_id)
+ if not os.path.exists(path):
+ os.makedirs(path)
+```
+
+#### HTTP Session Factory
+
+```python
+def default_session():
+ cache = FileCache(os.path.join(cache_path(), "http_cache"))
+ sess = CacheControl(requests.Session(), cache)
+ sess.headers.update({"User-Agent": "ProjectTickMeta/1.0"})
+ return sess
+```
+
+All HTTP requests are made through this cached session. The `CacheControl` wrapper stores responses in a disk-backed `FileCache`, honoring standard HTTP caching headers.
+
+#### Datetime Serialization
+
+```python
+def serialize_datetime(dt: datetime.datetime):
+ if dt.tzinfo is None:
+ return dt.replace(tzinfo=datetime.timezone.utc).isoformat()
+ return dt.isoformat()
+```
+
+Used as Pydantic's custom JSON encoder for `datetime` fields — naive datetimes are assumed UTC.
+
+#### File Hashing
+
+```python
+def file_hash(filename: str, hashtype: Callable[[], "hashlib._Hash"], blocksize: int = 65536) -> str:
+ hashtype = hashtype()
+ with open(filename, "rb") as f:
+ for block in iter(lambda: f.read(blocksize), b""):
+ hashtype.update(block)
+ return hashtype.hexdigest()
+```
+
+Used throughout the pipeline for SHA-1 and SHA-256 integrity checksums on installer JARs and version files.
+
+#### SHA-1 Caching
+
+```python
+def get_file_sha1_from_file(file_name: str, sha1_file: str) -> Optional[str]:
+```
+
+Reads a cached `.sha1` sidecar file if it exists; otherwise computes and writes the SHA-1 hash. Used by Forge/NeoForge update scripts to detect when an installer JAR needs re-downloading.
+
+#### Other Utilities
+
+| Function | Purpose |
+|---|---|
+| `transform_maven_key(key)` | Replaces `:` with `.` in Maven coordinates for filesystem paths |
+| `replace_old_launchermeta_url(url)` | Rewrites `launchermeta.mojang.com` → `piston-meta.mojang.com` |
+| `merge_dict(base, overlay)` | Deep-merges two dicts (base provides defaults, overlay wins) |
+| `get_all_bases(cls)` | Returns the complete MRO (method resolution order) for a class |
+| `remove_files(file_paths)` | Silently deletes a list of files |
+| `eprint(*args)` | Prints to stderr |
+| `LAUNCHER_MAVEN` | URL template: `"https://files.projecttick.org/maven/%s"` |
+
+### `common/http.py` — Binary Downloads
+
+A single function:
+
+```python
+def download_binary_file(sess, path, url):
+ with open(path, "wb") as f:
+ r = sess.get(url)
+ r.raise_for_status()
+ for chunk in r.iter_content(chunk_size=128):
+ f.write(chunk)
+```
+
+Used to download Forge/NeoForge installer JARs and Fabric/Quilt JAR files.
+
+### `common/mojang.py` — Mojang Constants
+
+```python
+BASE_DIR = "mojang"
+VERSION_MANIFEST_FILE = join(BASE_DIR, "version_manifest_v2.json")
+VERSIONS_DIR = join(BASE_DIR, "versions")
+ASSETS_DIR = join(BASE_DIR, "assets")
+
+STATIC_EXPERIMENTS_FILE = join(dirname(__file__), "mojang-minecraft-experiments.json")
+STATIC_OLD_SNAPSHOTS_FILE = join(dirname(__file__), "mojang-minecraft-old-snapshots.json")
+STATIC_OVERRIDES_FILE = join(dirname(__file__), "mojang-minecraft-legacy-override.json")
+STATIC_LEGACY_SERVICES_FILE = join(dirname(__file__), "mojang-minecraft-legacy-services.json")
+LIBRARY_PATCHES_FILE = join(dirname(__file__), "mojang-library-patches.json")
+
+MINECRAFT_COMPONENT = "net.minecraft"
+LWJGL_COMPONENT = "org.lwjgl"
+LWJGL3_COMPONENT = "org.lwjgl3"
+JAVA_MANIFEST_FILE = join(BASE_DIR, "java_all.json")
+```
+
+### `common/forge.py` — Forge Constants
+
+```python
+BASE_DIR = "forge"
+JARS_DIR = join(BASE_DIR, "jars")
+INSTALLER_INFO_DIR = join(BASE_DIR, "installer_info")
+INSTALLER_MANIFEST_DIR = join(BASE_DIR, "installer_manifests")
+VERSION_MANIFEST_DIR = join(BASE_DIR, "version_manifests")
+FILE_MANIFEST_DIR = join(BASE_DIR, "files_manifests")
+DERIVED_INDEX_FILE = join(BASE_DIR, "derived_index.json")
+LEGACYINFO_FILE = join(BASE_DIR, "legacyinfo.json")
+
+FORGE_COMPONENT = "net.minecraftforge"
+
+FORGEWRAPPER_LIBRARY = make_launcher_library(
+ GradleSpecifier("io.github.zekerzhayard", "ForgeWrapper", "projt-2026-04-04"),
+ "4c4653d80409e7e968d3e3209196ffae778b7b4e",
+ 29731,
+)
+
+BAD_VERSIONS = ["1.12.2-14.23.5.2851"]
+```
+
+The `FORGEWRAPPER_LIBRARY` is a pre-built `Library` object pointing to a custom ForgeWrapper build hosted on the ProjT Maven. ForgeWrapper acts as a shim layer to run modern Forge installers at launch time.
+
+### `common/neoforge.py` — NeoForge Constants
+
+```python
+BASE_DIR = "neoforge"
+NEOFORGE_COMPONENT = "net.neoforged"
+```
+
+Similar directory layout to Forge, but with its own `DERIVED_INDEX_FILE`.
+
+### `common/fabric.py` — Fabric Constants
+
+```python
+BASE_DIR = "fabric"
+JARS_DIR = join(BASE_DIR, "jars")
+INSTALLER_INFO_DIR = join(BASE_DIR, "loader-installer-json")
+META_DIR = join(BASE_DIR, "meta-v2")
+
+LOADER_COMPONENT = "net.fabricmc.fabric-loader"
+INTERMEDIARY_COMPONENT = "net.fabricmc.intermediary"
+
+DATETIME_FORMAT_HTTP = "%a, %d %b %Y %H:%M:%S %Z"
+```
+
+### `common/quilt.py` — Quilt Constants
+
+```python
+USE_QUILT_MAPPINGS = False # Quilt recommends using Fabric's intermediary
+
+BASE_DIR = "quilt"
+LOADER_COMPONENT = "org.quiltmc.quilt-loader"
+INTERMEDIARY_COMPONENT = "org.quiltmc.hashed"
+
+# If USE_QUILT_MAPPINGS is False, uses Fabric's intermediary instead
+if not USE_QUILT_MAPPINGS:
+ INTERMEDIARY_COMPONENT = FABRIC_INTERMEDIARY_COMPONENT
+
+DISABLE_BEACON_ARG = "-Dloader.disable_beacon=true"
+DISABLE_BEACON_VERSIONS = {
+ "0.19.2-beta.3", "0.19.2-beta.4", ..., "0.20.0-beta.14",
+}
+```
+
+The `DISABLE_BEACON_VERSIONS` set enumerates Quilt Loader versions that had a telemetry beacon, which is disabled via a JVM argument.
+
+### `common/java.py` — Java Runtime Constants
+
+```python
+BASE_DIR = "java_runtime"
+ADOPTIUM_DIR = join(BASE_DIR, "adoptium")
+OPENJ9_DIR = join(BASE_DIR, "ibm")
+AZUL_DIR = join(BASE_DIR, "azul")
+
+JAVA_MINECRAFT_COMPONENT = "net.minecraft.java"
+JAVA_ADOPTIUM_COMPONENT = "net.adoptium.java"
+JAVA_OPENJ9_COMPONENT = "com.ibm.java"
+JAVA_AZUL_COMPONENT = "com.azul.java"
+```
+
+---
+
+## The `model/` Package
+
+All data models inherit from `MetaBase`, which is a customized `pydantic.BaseModel`.
+
+### Inheritance Hierarchy
+
+```
+pydantic.BaseModel
+└── MetaBase
+ ├── Versioned (adds formatVersion field)
+ │ ├── MetaVersion # Primary output format
+ │ ├── MetaPackage # Package metadata
+ │ ├── MetaVersionIndex # Version list per package
+ │ └── MetaPackageIndex # Master package list
+ │
+ ├── MojangArtifactBase # sha1, sha256, size, url
+ │ ├── MojangAssets # Asset index metadata
+ │ ├── MojangArtifact # Library artifact with path
+ │ └── MojangLoggingArtifact # Logging config artifact
+ │
+ ├── Library # Minecraft library reference
+ │ ├── JavaAgent # Library with Java agent argument
+ │ ├── ForgeLibrary # Forge-specific library fields
+ │ └── NeoForgeLibrary # NeoForge-specific library fields
+ │
+ ├── GradleSpecifier # Maven coordinate (not a MetaBase)
+ ├── Dependency # Component dependency (uid + version)
+ │
+ ├── MojangVersion # Raw Mojang version JSON
+ │ ├── ForgeVersionFile # Forge version JSON (extends Mojang)
+ │ └── NeoForgeVersionFile # NeoForge version JSON
+ │
+ ├── OSRule / MojangRule / MojangRules # Platform rules
+ │
+ ├── ForgeEntry / NeoForgeEntry # Version index entries
+ ├── DerivedForgeIndex / DerivedNeoForgeIndex # Reconstructed indexes
+ ├── ForgeInstallerProfile / ForgeInstallerProfileV2 # Installer data
+ │
+ ├── FabricInstallerDataV1 # Fabric loader installer info
+ ├── FabricJarInfo # JAR release timestamp
+ │
+ ├── LiteloaderIndex # Full LiteLoader metadata
+ │
+ ├── JavaRuntimeMeta # Normalized Java runtime info
+ ├── JavaRuntimeVersion # MetaVersion with runtimes list
+ ├── JavaVersionMeta # Semver-style Java version
+ │
+ └── APIQuery # Base for API URL query builders
+ ├── AdoptxAPIFeatureReleasesQuery
+ └── AzulApiPackagesQuery
+```
+
+### `MetaBase` — The Foundation
+
+```python
+class MetaBase(pydantic.BaseModel):
+ def dict(self, **kwargs):
+ return super().dict(by_alias=True, **kwargs)
+
+ def json(self, **kwargs):
+ return super().json(
+ exclude_none=True, sort_keys=True, by_alias=True, indent=4, **kwargs
+ )
+
+ def write(self, file_path: str):
+ Path(file_path).parent.mkdir(parents=True, exist_ok=True)
+ with open(file_path, "w") as f:
+ f.write(self.json())
+
+ def merge(self, other: "MetaBase"):
+ """Merge other into self: concatenate lists, union sets, deep-merge dicts,
+ recurse on MetaBase fields, overwrite primitives."""
+
+ class Config:
+ allow_population_by_field_name = True
+ json_encoders = {datetime: serialize_datetime, GradleSpecifier: str}
+```
+
+Key design decisions:
+- **`by_alias=True`** everywhere: Field aliases like `mainClass`, `releaseTime`, `assetIndex` match the JSON format the launcher expects.
+- **`exclude_none=True`**: Optional fields that aren't set are omitted from output.
+- **`sort_keys=True`**: Deterministic output for diff-friendly Git commits.
+- **`write()`**: Every model can serialize itself to disk, creating parent directories as needed.
+
+### `MetaVersion` — The Core Output Model
+
+This is the primary data structure that Meta produces. Each component version (Minecraft 1.21.5, Forge 49.0.31, Fabric Loader 0.16.9, etc.) is represented as a `MetaVersion`:
+
+```python
+class MetaVersion(Versioned):
+ name: str # Human-readable name ("Minecraft", "Forge", etc.)
+ version: str # Version string
+ uid: str # Component UID ("net.minecraft", "net.minecraftforge")
+ type: Optional[str] # "release", "snapshot", "experiment", "old_snapshot"
+ order: Optional[int] # Load order (-2=MC, -1=LWJGL, 5=Forge, 10=Fabric)
+ volatile: Optional[bool] # If true, may change between runs (e.g., LWJGL, mappings)
+ requires: Optional[List[Dependency]] # Required components (with version constraints)
+ conflicts: Optional[List[Dependency]] # Conflicting components
+ libraries: Optional[List[Library]] # Runtime classpath libraries
+ asset_index: Optional[MojangAssets] # Asset index reference
+ maven_files: Optional[List[Library]] # Install-time Maven downloads
+ main_jar: Optional[Library] # Main game JAR
+ jar_mods: Optional[List[Library]] # Legacy JAR mod injection
+ main_class: Optional[str] # Java main class
+ applet_class: Optional[str] # Legacy applet class
+ minecraft_arguments: Optional[str] # Game launch arguments
+ release_time: Optional[datetime] # When this version was released
+ compatible_java_majors: Optional[List[int]] # Compatible Java major versions
+ compatible_java_name: Optional[str] # Mojang Java component name
+ java_agents: Optional[List[JavaAgent]] # Java agent libraries
+ additional_traits: Optional[List[str]] # Launcher behavior hints
+ additional_tweakers: Optional[List[str]]# Forge/LiteLoader tweaker classes
+ additional_jvm_args: Optional[List[str]]# Extra JVM arguments
+ logging: Optional[MojangLogging] # Log4j logging configuration
+```
+
+### `GradleSpecifier` — Maven Coordinates
+
+Not a Pydantic model but a core class used as a Pydantic-compatible type:
+
+```python
+class GradleSpecifier:
+ """Maven coordinate like 'org.lwjgl.lwjgl:lwjgl:2.9.0' or
+ 'com.mojang:minecraft:1.21.5:client'"""
+
+ def __init__(self, group, artifact, version, classifier=None, extension=None):
+ # extension defaults to "jar"
+
+ def filename(self): # e.g., "lwjgl-2.9.0.jar"
+ def base(self): # e.g., "org/lwjgl/lwjgl/lwjgl/2.9.0/"
+ def path(self): # e.g., "org/lwjgl/lwjgl/lwjgl/2.9.0/lwjgl-2.9.0.jar"
+ def is_lwjgl(self): # True for org.lwjgl, org.lwjgl.lwjgl, java.jinput, java.jutils
+ def is_log4j(self): # True for org.apache.logging.log4j
+
+ @classmethod
+ def from_string(cls, v: str):
+ # Parses "group:artifact:version[:classifier][@extension]"
+```
+
+This class supports Pydantic validators via `__get_validators__`, comparison operators for sorting, and hashing for use in sets.
+
+---
+
+## The `run/` Package
+
+### Module Naming Convention
+
+Every module follows a strict naming pattern:
+
+- `update_<loader>.py` — Phase 1: fetch upstream data
+- `generate_<loader>.py` — Phase 2: produce launcher metadata
+- `index.py` — Final step: build the master package index
+
+### Common Patterns Across Run Modules
+
+1. **Module-level initialization**: Upstream directories are created at import time:
+ ```python
+ UPSTREAM_DIR = upstream_path()
+ ensure_upstream_dir(JARS_DIR)
+ sess = default_session()
+ ```
+
+2. **`main()` entry point**: Every module exposes a `main()` function called by `update.sh` or the Poetry entrypoints.
+
+3. **Concurrent processing**: Most modules use `concurrent.futures.ThreadPoolExecutor`. The Fabric updater uniquely uses `multiprocessing.Pool`.
+
+4. **Error handling**: Errors during version processing skip individual versions rather than failing the entire pipeline (with `eprint()` logging).
+
+---
+
+## Pipeline Architecture
+
+### Data Flow Diagram
+
+```
+┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐
+│ Upstream APIs │ │ upstream/ repo │ │ launcher/ repo │
+│ (Mojang, Forge, │────►│ (raw JSON, JARs, │────►│ (MetaVersion │
+│ NeoForge, etc.)│ │ manifests) │ │ JSON files) │
+└──────────────────┘ └──────────────────┘ └──────────────────┘
+ Phase 1: UPDATE Intermediate Phase 2: GENERATE
+ (fetch + cache) Storage (transform + write)
+```
+
+### Phase 1: Update Pipeline
+
+Each `update_*` module follows this pattern:
+
+1. **Fetch index**: GET the version manifest or release list from the upstream API.
+2. **Diff against local**: Compare remote version list with what's already in `upstream/`.
+3. **Download new/changed**: Fetch only what's new or modified.
+4. **Extract install data**: For Forge/NeoForge, extract `install_profile.json` and `version.json` from installer JARs.
+5. **Write to upstream**: Serialize all data to `upstream/<loader>/`.
+
+### Phase 2: Generate Pipeline
+
+Each `generate_*` module follows this pattern:
+
+1. **Load upstream data**: Parse the raw data from `upstream/`.
+2. **Transform**: Convert upstream-specific models into `MetaVersion` objects.
+3. **Apply patches**: Merge library patches, override legacy data, fix known issues.
+4. **Write to launcher**: Serialize `MetaVersion` and `MetaPackage` JSON into `launcher/<component_uid>/`.
+
+### Index Building
+
+The `index.py` module (Phase 2, final step):
+
+1. Walks every directory in `launcher/`.
+2. Reads each `package.json` to get package metadata.
+3. For each version file, computes SHA-256 and creates a `MetaVersionIndexEntry`.
+4. Sorts versions by `release_time` (descending).
+5. Writes per-package `index.json` files.
+6. Produces the master `index.json` with SHA-256 hashes of each package index.
+
+```python
+# From index.py — the core indexing logic:
+for package in sorted(os.listdir(LAUNCHER_DIR)):
+ sharedData = MetaPackage.parse_file(package_json_path)
+ versionList = MetaVersionIndex(uid=package, name=sharedData.name)
+
+ for filename in os.listdir(package_path):
+ filehash = file_hash(filepath, hashlib.sha256)
+ versionFile = MetaVersion.parse_file(filepath)
+ is_recommended = versionFile.version in recommendedVersions
+ versionEntry = MetaVersionIndexEntry.from_meta_version(
+ versionFile, is_recommended, filehash
+ )
+ versionList.versions.append(versionEntry)
+
+ versionList.versions = sorted(
+ versionList.versions, key=attrgetter("release_time"), reverse=True
+ )
+ versionList.write(outFilePath)
+```
+
+---
+
+## Component Dependency Graph
+
+Components declare dependencies via the `requires` and `conflicts` fields:
+
+```
+net.minecraft
+├── requires: org.lwjgl (or org.lwjgl3)
+│
+net.minecraftforge
+├── requires: net.minecraft (equals=<mc_version>)
+│
+net.neoforged
+├── requires: net.minecraft (equals=<mc_version>)
+│
+net.fabricmc.fabric-loader
+├── requires: net.fabricmc.intermediary
+│
+net.fabricmc.intermediary
+├── requires: net.minecraft (equals=<mc_version>)
+│
+org.quiltmc.quilt-loader
+├── requires: net.fabricmc.intermediary (or org.quiltmc.hashed)
+│
+org.lwjgl ◄──conflicts──► org.lwjgl3
+```
+
+---
+
+## Version Ordering
+
+The `order` field controls in what sequence the launcher processes components:
+
+| Order | Component |
+|---|---|
+| -2 | `net.minecraft` |
+| -1 | `org.lwjgl` / `org.lwjgl3` |
+| 5 | `net.minecraftforge` / `net.neoforged` |
+| 10 | `net.fabricmc.fabric-loader` / `org.quiltmc.quilt-loader` |
+| 11 | `net.fabricmc.intermediary` |
+
+Lower order = loaded first. Minecraft is always base, LWJGL provides native libraries, then mod loaders layer on top.
+
+---
+
+## Library Patching System
+
+The `LibraryPatches` system (`model/mojang.py`) allows surgically modifying or extending libraries in generated Minecraft versions:
+
+```python
+class LibraryPatch(MetaBase):
+ match: List[GradleSpecifier] # Which libraries to match
+ override: Optional[Library] # Fields to merge into matched lib
+ additionalLibraries: Optional[List[Library]] # Extra libs to add
+ patchAdditionalLibraries: bool = False # Recurse on additions?
+```
+
+The `patch_library()` function in `generate_mojang.py` applies patches:
+
+```python
+def patch_library(lib: Library, patches: LibraryPatches) -> List[Library]:
+ to_patch = [lib]
+ new_libraries = []
+ while to_patch:
+ target = to_patch.pop(0)
+ for patch in patches:
+ if patch.applies(target):
+ if patch.override:
+ target.merge(patch.override)
+ if patch.additionalLibraries:
+ additional_copy = copy.deepcopy(patch.additionalLibraries)
+ new_libraries += list(dict.fromkeys(additional_copy))
+ if patch.patchAdditionalLibraries:
+ to_patch += additional_copy
+ return new_libraries
+```
+
+This system is used to inject missing ARM64 natives, add supplementary libraries, and fix broken upstream metadata.
+
+---
+
+## LWJGL Version Selection
+
+One of the most complex parts of `generate_mojang.py` is LWJGL version deduplication. Mojang's version manifests include LWJGL libraries inline with every Minecraft version, but the launcher manages LWJGL as a separate component. Meta must:
+
+1. **Extract** LWJGL libraries from each Minecraft version.
+2. **Group** them into "variants" by hashing the library set (excluding release time).
+3. **Select** the correct variant using curated lists (`PASS_VARIANTS` and `BAD_VARIANTS`).
+4. **Write** each unique LWJGL version as its own component file.
+
+```python
+PASS_VARIANTS = [
+ "1fd0e4d1f0f7c97e8765a69d38225e1f27ee14ef", # 3.4.1
+ "2b00f31688148fc95dbc8c8ef37308942cf0dce0", # 3.3.6
+ ...
+]
+
+BAD_VARIANTS = [
+ "6442fc475f501fbd0fc4244fd1c38c02d9ebaf7e", # 3.3.3 (broken freetype)
+ ...
+]
+```
+
+Each LWJGL variant is identified by a SHA-1 hash of its serialized library list. Only variants in `PASS_VARIANTS` are accepted; those in `BAD_VARIANTS` are rejected; unknown variants raise an exception.
+
+---
+
+## Split Natives Workaround
+
+Modern Minecraft versions (1.19+) use "split natives" — native libraries are separate Maven artifacts with classifiers like `natives-linux`, `natives-windows`, etc. The launcher has a bug handling these, so Meta applies a workaround:
+
+```python
+APPLY_SPLIT_NATIVES_WORKAROUND = True
+
+if APPLY_SPLIT_NATIVES_WORKAROUND and lib_is_split_native(lib):
+ specifier.artifact += f"-{specifier.classifier}"
+ specifier.classifier = None
+```
+
+This merges the classifier into the artifact name, effectively renaming `lwjgl:3.3.3:natives-linux` to `lwjgl-natives-linux:3.3.3`.
+
+---
+
+## ForgeWrapper Integration
+
+Modern Forge (post-1.12.2) uses an installer-based system that runs processors at install time. The launcher cannot run these processors directly, so Meta injects ForgeWrapper as the main class:
+
+```python
+v.main_class = "io.github.zekerzhayard.forgewrapper.installer.Main"
+```
+
+ForgeWrapper runs the Forge installer's processors transparently when the game is first launched. The installer JAR itself is included under `mavenFiles` so the launcher downloads it alongside regular libraries.
+
+---
+
+## Error Recovery and Resilience
+
+The pipeline is designed to be resumable and fault-tolerant:
+
+1. **Incremental updates**: Only new or changed versions are downloaded.
+2. **SHA-1 verification**: Installer JARs are re-downloaded if their SHA-1 changes.
+3. **Cached intermediates**: Installer profiles and manifests are cached to disk.
+4. **Git reset on failure**: `update.sh` runs `git reset --hard HEAD` on the upstream/launcher repos before starting, and again on failure via `fail_in()`/`fail_out()`.
+5. **Per-version error handling**: A failing version logs an error but doesn't abort the entire pipeline.
diff --git a/docs/handbook/meta/data-models.md b/docs/handbook/meta/data-models.md
new file mode 100644
index 0000000000..c5fd81ad76
--- /dev/null
+++ b/docs/handbook/meta/data-models.md
@@ -0,0 +1,582 @@
+# Meta — Data Models
+
+## Overview
+
+Meta uses **Pydantic v1** (`pydantic==1.10.13`) for all data models. Every model inherits from `MetaBase`, which standardizes JSON serialization and provides merge semantics. The models serve two purposes:
+
+1. **Upstream models** — parse vendor API responses (Mojang, Forge, Fabric, Adoptium, Azul, etc.)
+2. **Output models** — produce launcher-compatible JSON (`MetaVersion`, `MetaPackage`, `JavaRuntimeVersion`)
+
+---
+
+## Core Base Classes
+
+### `MetaBase`
+
+The root class for all Meta models:
+
+```python
+class MetaBase(pydantic.BaseModel):
+ def dict(self, **kwargs):
+ return super().dict(by_alias=True, **kwargs)
+
+ def json(self, **kwargs):
+ return super().json(
+ exclude_none=True, sort_keys=True,
+ by_alias=True, indent=4, **kwargs,
+ )
+
+ def write(self, file_path: str):
+ Path(file_path).parent.mkdir(parents=True, exist_ok=True)
+ with open(file_path, "w") as f:
+ f.write(self.json())
+
+ class Config:
+ allow_population_by_field_name = True
+ json_encoders = {
+ datetime: serialize_datetime,
+ GradleSpecifier: str,
+ }
+```
+
+Key design decisions:
+- **`by_alias=True`** always — JSON keys use aliases (camelCase) rather than Python field names (snake_case).
+- **`exclude_none=True`** — `None` fields are omitted from JSON output, keeping files compact.
+- **`sort_keys=True`** — deterministic key ordering enables meaningful git diffs.
+- **`indent=4`** — human-readable output.
+- **`allow_population_by_field_name=True`** — allows constructing models using either the Python name or the alias.
+- **`write()`** — creates parent directories automatically.
+
+### `MetaBase.merge()`
+
+Deep merge of two model instances:
+
+```python
+def merge(self, other: "MetaBase"):
+ assert type(other) is type(self)
+ for key, field in self.__fields__.items():
+ ours = getattr(self, key)
+ theirs = getattr(other, key)
+ if theirs is None:
+ continue
+ if ours is None:
+ setattr(self, key, theirs)
+ continue
+ if isinstance(ours, list):
+ ours += theirs # concatenate lists
+ elif isinstance(ours, set):
+ ours |= theirs # union sets
+ elif isinstance(ours, dict):
+ result = merge_dict(ours, copy.deepcopy(theirs)) # deep merge dicts
+ setattr(self, key, result)
+ elif MetaBase in get_all_bases(field.type_):
+ ours.merge(theirs) # recursive merge for nested MetaBase
+ else:
+ setattr(self, key, theirs) # overwrite scalars
+```
+
+Used in `generate_mojang.py` when merging LWJGL component data across versions.
+
+### `Versioned`
+
+Adds format versioning:
+
+```python
+META_FORMAT_VERSION = 1
+
+class Versioned(MetaBase):
+ @validator("format_version")
+ def format_version_must_be_supported(cls, v: int):
+ assert v <= META_FORMAT_VERSION
+ return v
+
+ format_version: int = Field(META_FORMAT_VERSION, alias="formatVersion")
+```
+
+All output files include `"formatVersion": 1`.
+
+---
+
+## `GradleSpecifier`
+
+A Maven coordinate parser, not a Pydantic model but integrated with Pydantic via `__get_validators__`:
+
+```python
+class GradleSpecifier:
+ def __init__(self, group, artifact, version, classifier=None, extension=None):
+ if extension is None:
+ extension = "jar"
+ self.group = group
+ self.artifact = artifact
+ self.version = version
+ self.classifier = classifier
+ self.extension = extension
+```
+
+### Parsing
+
+```python
+@classmethod
+def from_string(cls, v: str):
+ ext_split = v.split("@")
+ components = ext_split[0].split(":")
+ group = components[0]
+ artifact = components[1]
+ version = components[2]
+ extension = ext_split[1] if len(ext_split) == 2 else None
+ classifier = components[3] if len(components) == 4 else None
+ return cls(group, artifact, version, classifier, extension)
+```
+
+Examples:
+- `"org.lwjgl.lwjgl:lwjgl:2.9.0"` → `group="org.lwjgl.lwjgl"`, `artifact="lwjgl"`, `version="2.9.0"`
+- `"net.minecraft:client:1.20.4:slim@jar"` → includes classifier `"slim"` and explicit extension `"jar"`
+
+### Path Generation
+
+```python
+def base(self):
+ return "%s/%s/%s/" % (self.group.replace(".", "/"), self.artifact, self.version)
+
+def filename(self):
+ if self.classifier:
+ return "%s-%s-%s.%s" % (self.artifact, self.version, self.classifier, self.extension)
+ else:
+ return "%s-%s.%s" % (self.artifact, self.version, self.extension)
+
+def path(self):
+ return self.base() + self.filename()
+```
+
+`path()` produces Maven-standard paths like `org/lwjgl/lwjgl/lwjgl/2.9.0/lwjgl-2.9.0.jar`.
+
+### Classification Helpers
+
+```python
+def is_lwjgl(self):
+ return self.group in (
+ "org.lwjgl", "org.lwjgl.lwjgl",
+ "net.java.jinput", "net.java.jutils",
+ )
+
+def is_log4j(self):
+ return self.group == "org.apache.logging.log4j"
+```
+
+Used by `generate_mojang.py` to separate LWJGL into its own component and to apply Log4j security patches.
+
+---
+
+## Output Models
+
+### `MetaVersion`
+
+The primary output format for every component version:
+
+```python
+class MetaVersion(Versioned):
+ name: str # Human-readable name
+ version: str # Version string
+ uid: str # Component UID
+ type: Optional[str] # "release", "snapshot", "old_alpha", "old_beta", "experiment"
+ order: Optional[int] # Sort priority (lower = applied first)
+ volatile: Optional[bool] # May change between runs
+ requires: Optional[List[Dependency]] # Component dependencies
+ conflicts: Optional[List[Dependency]] # Incompatible components
+ libraries: Optional[List[Library]] # Java libraries
+ asset_index: Optional[MojangAssets] # Asset manifest (alias: assetIndex)
+ maven_files: Optional[List[Library]] # Extra Maven artifacts (alias: mavenFiles)
+ main_jar: Optional[Library] # Primary game JAR (alias: mainJar)
+ jar_mods: Optional[List[Library]] # Legacy jar mods (alias: jarMods)
+ main_class: Optional[str] # JVM entry point (alias: mainClass)
+ applet_class: Optional[str] # Legacy applet class (alias: appletClass)
+ minecraft_arguments: Optional[str] # Legacy MC arguments (alias: minecraftArguments)
+ release_time: Optional[datetime] # Release timestamp (alias: releaseTime)
+ compatible_java_majors: Optional[List[int]] # e.g., [17, 21] (alias: compatibleJavaMajors)
+ compatible_java_name: Optional[str] # Mojang Java component name (alias: compatibleJavaName)
+ java_agents: Optional[List[JavaAgent]] # e.g., Log4j patcher (alias: +agents)
+ additional_traits: Optional[List[str]] # e.g., "noapplet" (alias: +traits)
+ additional_tweakers: Optional[List[str]] # Legacy FML tweakers (alias: +tweakers)
+ additional_jvm_args: Optional[List[str]] # Extra JVM flags (alias: +jvmArgs)
+ logging: Optional[MojangLogging] # Log4j configuration
+```
+
+### `MetaPackage`
+
+Package-level metadata:
+
+```python
+class MetaPackage(Versioned):
+ name: str
+ uid: str
+ recommended: Optional[List[str]]
+ authors: Optional[List[str]]
+ description: Optional[str]
+ project_url: Optional[str] = Field(alias="projectUrl")
+```
+
+### `Dependency`
+
+Component dependency specification:
+
+```python
+class Dependency(MetaBase):
+ uid: str # Required component UID
+ equals: Optional[str] # Exact version match
+ suggests: Optional[str] # Suggested version (not enforced)
+```
+
+Examples:
+- Fabric Intermediary requires exact MC version: `Dependency(uid="net.minecraft", equals="1.21.5")`
+- Fabric Loader requires any intermediary: `Dependency(uid="net.fabricmc.intermediary")`
+- Forge requires a specific MC version: `Dependency(uid="net.minecraft", equals="1.20.4")`
+
+### `Library`
+
+A Java library dependency:
+
+```python
+class Library(MetaBase):
+ extract: Optional[MojangLibraryExtractRules]
+ name: Optional[GradleSpecifier]
+ downloads: Optional[MojangLibraryDownloads]
+ natives: Optional[Dict[str, str]]
+ rules: Optional[MojangRules]
+ url: Optional[str] # Maven repository URL
+ mmcHint: Optional[str] = Field(None, alias="MMC-hint")
+```
+
+### `JavaAgent`
+
+Extends `Library` with a JVM argument:
+
+```python
+class JavaAgent(Library):
+ argument: Optional[str]
+```
+
+Used for the Log4j patcher agent.
+
+---
+
+## Mojang-Specific Models
+
+### `MojangArtifactBase` / `MojangArtifact`
+
+```python
+class MojangArtifactBase(MetaBase):
+ sha1: Optional[str]
+ sha256: Optional[str]
+ size: Optional[int]
+ url: str
+
+class MojangArtifact(MojangArtifactBase):
+ path: Optional[str]
+```
+
+### `MojangLibraryDownloads`
+
+```python
+class MojangLibraryDownloads(MetaBase):
+ artifact: Optional[MojangArtifact]
+ classifiers: Optional[Dict[Any, MojangArtifact]]
+```
+
+### `MojangRules`
+
+OS-specific allow/disallow rules for libraries:
+
+```python
+class MojangRule(MetaBase):
+ action: str # "allow" or "disallow"
+ os: Optional[OSRule]
+
+class OSRule(MetaBase):
+ name: str # "osx", "linux", "windows", "windows-arm64", etc.
+ version: Optional[str]
+
+class MojangRules(MetaBase):
+ __root__: List[MojangRule]
+```
+
+### `MojangVersion` (in `model/mojang.py`)
+
+The full Mojang version manifest, containing the raw version JSON. Key fields:
+
+```python
+class MojangVersion(MetaBase):
+ id: str
+ type: str
+ main_class: str = Field(alias="mainClass")
+ minecraft_arguments: Optional[str] = Field(alias="minecraftArguments")
+ arguments: Optional[MojangArguments]
+ release_time: datetime = Field(alias="releaseTime")
+ libraries: List[Library]
+ downloads: MojangVersionDownloads
+ asset_index: Optional[MojangAssets] = Field(alias="assetIndex")
+ java_version: Optional[MojangJavaVersion] = Field(alias="javaVersion")
+ logging: Optional[Dict[str, MojangLogging]]
+ compliance_level: Optional[int] = Field(alias="complianceLevel")
+```
+
+### `MojangIndex`
+
+The version manifest index:
+
+```python
+class MojangIndexEntry(MetaBase):
+ id: str
+ type: str
+ url: str
+ time: datetime
+ release_time: datetime = Field(alias="releaseTime")
+ sha1: str
+
+class MojangIndex(MetaBase):
+ latest: Dict[str, str]
+ versions: List[MojangIndexEntry]
+
+class MojangIndexWrap(MetaBase):
+ versions: MojangIndex
+```
+
+### `MojangJavaComponent` (StrEnum)
+
+```python
+class MojangJavaComponent(StrEnum):
+ Alpha = "java-runtime-alpha"
+ Beta = "java-runtime-beta"
+ Gamma = "java-runtime-gamma"
+ GammaSnapshot = "java-runtime-gamma-snapshot"
+ Delta = "java-runtime-delta"
+ JreLegacy = "jre-legacy"
+ Exe = "minecraft-java-exe"
+```
+
+Uses a `_missing_()` override to accept unknown future component names.
+
+---
+
+## Forge Models (in `model/forge.py`)
+
+### `ForgeEntry` / `DerivedForgeIndex`
+
+```python
+class ForgeFile(MetaBase):
+ classifier: str # "installer", "client", "universal", etc.
+ hash: Optional[str]
+ extension: str # "jar", "zip"
+
+class ForgeEntry(MetaBase):
+ long_version: str = Field(alias="longversion")
+ mc_version: str = Field(alias="mcversion")
+ version: str
+ build: int
+ branch: Optional[str]
+ latest: Optional[bool]
+ recommended: Optional[bool]
+ files: Optional[List[ForgeFile]]
+
+class DerivedForgeIndex(MetaBase):
+ versions: Dict[str, ForgeEntry]
+ by_mc_version: Dict[str, List[str]] = Field(alias="by_mcversion")
+```
+
+### `ForgeVersion`
+
+Post-processed Forge version with installer data:
+
+```python
+class ForgeVersion(MetaBase):
+ long_version: str = Field(alias="longversion")
+ mc_version: str = Field(alias="mcversion")
+ version: str
+ build: int
+ branch: Optional[str]
+ installer_filename: Optional[str]
+ installer_url: Optional[str]
+
+ def uses_installer(self) -> bool:
+ # True for MC >= 1.6
+
+ def is_supported(self) -> bool:
+ # Checks if version can be processed
+```
+
+### `ForgeInstallerProfileV2`
+
+Modern Forge installer profiles (MC 1.13+):
+
+```python
+class ForgeInstallerProfileV2(MetaBase):
+ spec: Optional[int]
+ profile: Optional[str]
+ version: Optional[str]
+ icon: Optional[str]
+ json: Optional[str]
+ minecraft: str
+ data: Optional[Dict[str, ForgeProfileData]]
+ processors: Optional[List[ForgeProcessor]]
+ libraries: List[Library]
+```
+
+---
+
+## NeoForge Models (in `model/neoforge.py`)
+
+Similar to Forge but with additions:
+
+```python
+class NeoForgeFile(MetaBase):
+ classifier: str
+ hash: Optional[str]
+ extension: str
+ artifact: Optional[str] # Additional field for NeoForge
+
+class NeoForgeInstallerProfileV2(MetaBase):
+ # Same structure as ForgeInstallerProfileV2
+ minecraft: str
+ libraries: List[Library]
+```
+
+---
+
+## Fabric Models (in `model/fabric.py`)
+
+```python
+class FabricMainClasses(MetaBase):
+ client: Optional[str]
+ common: Optional[str]
+ server: Optional[str]
+
+class FabricInstallerArguments(MetaBase):
+ client: Optional[List[str]]
+ common: Optional[List[str]]
+ server: Optional[List[str]]
+
+class FabricInstallerLibraries(MetaBase):
+ client: Optional[List[Library]]
+ common: Optional[List[Library]]
+ server: Optional[List[Library]]
+
+class FabricInstallerDataV1(MetaBase):
+ version: int
+ libraries: FabricInstallerLibraries
+ main_class: Optional[Union[str, FabricMainClasses]]
+ arguments: Optional[FabricInstallerArguments]
+ launchwrapper: Optional[FabricInstallerLaunchwrapper]
+
+class FabricJarInfo(MetaBase):
+ release_time: Optional[datetime] = Field(alias="releaseTime")
+```
+
+---
+
+## Index Models (in `model/index.py`)
+
+### `MetaVersionIndexEntry`
+
+Per-version index entry with hash:
+
+```python
+class MetaVersionIndexEntry(MetaBase):
+ version: str
+ type: Optional[str]
+ release_time: Optional[datetime] = Field(alias="releaseTime")
+ sha256: str
+ requires: Optional[List[Dependency]]
+ recommended: Optional[bool]
+ volatile: Optional[bool]
+
+ @classmethod
+ def from_meta_version(cls, meta_version: MetaVersion, sha256: str):
+ # Factory method to create from MetaVersion + computed SHA-256
+```
+
+### `MetaVersionIndex`
+
+Per-package version list:
+
+```python
+class MetaVersionIndex(Versioned):
+ name: Optional[str]
+ uid: str
+ description: Optional[str]
+ project_url: Optional[str] = Field(alias="projectUrl")
+ authors: Optional[List[str]]
+ versions: List[MetaVersionIndexEntry]
+```
+
+### `MetaPackageIndex`
+
+Master index of all packages:
+
+```python
+class MetaPackageIndexEntry(MetaBase):
+ name: Optional[str]
+ uid: str
+ sha256: str
+
+class MetaPackageIndex(Versioned):
+ packages: List[MetaPackageIndexEntry]
+```
+
+---
+
+## Helper Functions
+
+### `make_launcher_library()`
+
+Creates a `Library` with a pre-computed artifact URL pointing at the launcher Maven:
+
+```python
+LAUNCHER_MAVEN = "https://files.projecttick.org/maven/%s"
+
+def make_launcher_library(name: GradleSpecifier, hash: str, size: int, maven=LAUNCHER_MAVEN):
+ artifact = MojangArtifact(url=maven % name.path(), sha1=hash, size=size)
+ return Library(name=name, downloads=MojangLibraryDownloads(artifact=artifact))
+```
+
+### `serialize_datetime()`
+
+Custom datetime serializer used by Pydantic's `json_encoders`:
+
+```python
+def serialize_datetime(dt: datetime) -> str:
+ return dt.strftime("%Y-%m-%dT%H:%M:%S+00:00")
+```
+
+All timestamps use UTC with a fixed `+00:00` offset.
+
+---
+
+## Model Hierarchy
+
+```
+pydantic.BaseModel
+└── MetaBase
+ ├── Versioned
+ │ ├── MetaVersion
+ │ │ └── JavaRuntimeVersion
+ │ ├── MetaPackage
+ │ └── MetaVersionIndex
+ ├── GradleSpecifier (not a BaseModel, but Pydantic-integrated)
+ ├── Library
+ │ └── JavaAgent
+ ├── Dependency
+ ├── MojangArtifactBase
+ │ ├── MojangArtifact
+ │ ├── MojangAssets
+ │ └── MojangLoggingArtifact
+ ├── MojangVersion
+ ├── ForgeEntry / NeoForgeEntry
+ ├── ForgeVersion / NeoForgeVersion
+ ├── FabricInstallerDataV1
+ ├── JavaRuntimeMeta
+ ├── JavaVersionMeta
+ ├── AdoptxRelease / AdoptxBinary
+ ├── ZuluPackageDetail
+ └── APIQuery
+ ├── AdoptxAPIFeatureReleasesQuery
+ └── AzulApiPackagesQuery
+```
diff --git a/docs/handbook/meta/deployment.md b/docs/handbook/meta/deployment.md
new file mode 100644
index 0000000000..df66839d22
--- /dev/null
+++ b/docs/handbook/meta/deployment.md
@@ -0,0 +1,285 @@
+# Meta — Deployment
+
+## Overview
+
+Meta supports three deployment strategies, configured via environment variables:
+
+1. **Git deployment** (`DEPLOY_TO_GIT=true`) — commit and push both `upstream/` and `launcher/` repositories
+2. **Folder deployment** (`DEPLOY_TO_FOLDER=true`) — rsync launcher output to a local directory
+3. **NixOS service** — systemd timer-based deployment via a Nix flake module
+
+---
+
+## Git Deployment
+
+### Configuration
+
+Set in `config.sh`:
+
+```bash
+export DEPLOY_TO_GIT=true
+export GIT_AUTHOR_NAME="Herpington Derpson"
+export GIT_AUTHOR_EMAIL="herpderp@derpmail.com"
+export GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME"
+export GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL"
+export GIT_SSH_COMMAND="ssh -i ${BASEDIR}/config/deploy.key"
+```
+
+### Flow
+
+After update scripts complete, `update.sh` stages and commits to the upstream repo:
+
+```bash
+upstream_git add mojang/version_manifest_v2.json mojang/java_all.json mojang/versions/*
+upstream_git add forge/*.json forge/version_manifests/*.json ...
+# ... more components ...
+
+if ! upstream_git diff --cached --exit-code; then
+ upstream_git commit -a -m "Update Date ${currentDate} Time ${currentHour}:${currentMinute}:${currentSecond}"
+ upstream_git push
+fi
+```
+
+After generate scripts complete, the same pattern applies to the launcher repo:
+
+```bash
+launcher_git add index.json org.lwjgl/* org.lwjgl3/* net.minecraft/*
+launcher_git add net.minecraftforge/*
+# ... more components ...
+
+if ! launcher_git diff --cached --exit-code; then
+ launcher_git commit -a -m "Update Date ${currentDate} Time ${currentHour}:${currentMinute}:${currentSecond}"
+ launcher_git push
+fi
+```
+
+The `diff --cached --exit-code` check ensures no empty commits — if nothing changed, nothing is pushed.
+
+### Repository Initialization
+
+Run `init.sh` before the first update:
+
+```bash
+export META_UPSTREAM_URL="git@example.com:org/meta-upstream.git"
+export META_LAUNCHER_URL="git@example.com:org/meta-launcher.git"
+./init.sh
+```
+
+This clones both repositories:
+
+```bash
+function init_repo {
+ if [ -d "$1" ]; then
+ return 0
+ fi
+ if [ -z "$2" ]; then
+ echo "Can't initialize missing $1 directory. Please specify $3" >&2
+ return 1
+ fi
+ git clone "$2" "$1"
+}
+
+init_repo "$META_UPSTREAM_DIR" "$META_UPSTREAM_URL" "META_UPSTREAM_URL"
+init_repo "$META_LAUNCHER_DIR" "$META_LAUNCHER_URL" "META_LAUNCHER_URL"
+```
+
+---
+
+## Folder Deployment
+
+For serving the launcher metadata via a static file server:
+
+```bash
+export DEPLOY_TO_FOLDER=true
+export DEPLOY_FOLDER=/app/public/v1
+```
+
+In `update.sh`:
+
+```bash
+if [ "${DEPLOY_TO_FOLDER}" = true ]; then
+ echo "Deploying to ${DEPLOY_FOLDER}"
+ mkdir -p "${DEPLOY_FOLDER}"
+ rsync -av --exclude=.git "${META_LAUNCHER_DIR}/" "${DEPLOY_FOLDER}"
+fi
+```
+
+This rsyncs the entire launcher directory (excluding `.git`) to the target folder. Can be combined with Git deployment.
+
+---
+
+## NixOS Deployment
+
+### Nix Package
+
+The Nix package (`blockgame-meta`) is built with `buildPythonApplication`:
+
+```nix
+buildPythonApplication {
+ pname = "blockgame-meta";
+ version = "unstable";
+ pyproject = true;
+
+ propagatedBuildInputs = [
+ cachecontrol requests filelock packaging pydantic_1
+ ];
+
+ postInstall = ''
+ install -Dm755 $src/update.sh $out/bin/update
+ install -Dm755 $src/init.sh $out/bin/init
+
+ wrapProgram $out/bin/update \
+ --prefix PYTHONPATH : "$PYTHONPATH" \
+ --prefix PATH : ${lib.makeBinPath [git openssh python rsync]}
+
+ wrapProgram $out/bin/init \
+ --prefix PATH : ${lib.makeBinPath [git openssh]}
+ '';
+
+ mainProgram = "update";
+}
+```
+
+The package:
+- Includes `update.sh` as the main executable (`update`)
+- Includes `init.sh` as `init`
+- Wraps both with correct `PYTHONPATH` and `PATH` (git, openssh, python, rsync)
+
+### NixOS Module
+
+Enable the service with:
+
+```nix
+{
+ services.blockgame-meta = {
+ enable = true;
+ package = inputs.meta.packages.${system}.blockgame-meta;
+ settings = {
+ DEPLOY_TO_GIT = "true";
+ # ... other config vars
+ };
+ };
+}
+```
+
+The module creates:
+
+**System user and group**:
+```nix
+users.users."blockgame-meta" = {
+ isSystemUser = true;
+ group = "blockgame-meta";
+};
+```
+
+**systemd service**:
+```nix
+systemd.services."blockgame-meta" = {
+ description = "blockgame metadata generator";
+ after = ["network-online.target"];
+ wants = ["network-online.target"];
+ serviceConfig = {
+ EnvironmentFile = [(settingsFormat.generate "blockgame-meta.env" cfg.settings)];
+ ExecStartPre = getExe' cfg.package "init";
+ ExecStart = getExe cfg.package;
+ StateDirectory = "blockgame-meta";
+ CacheDirectory = "blockgame-meta";
+ User = "blockgame-meta";
+ };
+};
+```
+
+- `ExecStartPre` runs `init.sh` to clone repos if needed
+- `ExecStart` runs `update.sh` (the main pipeline)
+- `StateDirectory` maps to `$STATE_DIRECTORY` → `/var/lib/blockgame-meta/` (where upstream/ and launcher/ repos live)
+- `CacheDirectory` maps to `$CACHE_DIRECTORY` → `/var/cache/blockgame-meta/` (HTTP cache)
+
+**systemd timer**:
+```nix
+systemd.timers."blockgame-meta" = {
+ timerConfig = {
+ OnCalendar = "hourly";
+ RandomizedDelaySec = "5m";
+ };
+ wantedBy = ["timers.target"];
+};
+```
+
+The pipeline runs **hourly** with up to 5 minutes of randomized delay to avoid thundering herd effects.
+
+### Service Options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `enable` | bool | `false` | Enable the blockgame-meta service |
+| `package` | package | `pkgs.blockgame-meta` | Package to use |
+| `settings.DEPLOY_TO_GIT` | string | `"false"` | Enable Git deployment |
+| `settings.DEPLOY_TO_FOLDER` | string | `"false"` | Enable folder deployment |
+| `settings.DEPLOY_TO_S3` | string | `"false"` | Enable S3 deployment |
+
+Additional settings can be added as freeform key-value pairs.
+
+---
+
+## CI: Garnix
+
+CI is configured in `garnix.yaml`:
+
+```yaml
+builds:
+ include:
+ - "checks.x86_64-linux.*"
+ - "devShells.*.*"
+ - "packages.*.*"
+```
+
+This builds and checks all packages and dev shells on every commit via [Garnix](https://garnix.io).
+
+---
+
+## Manual Execution
+
+For development or one-off updates:
+
+```bash
+# 1. Set up environment
+cp config.example.sh config.sh
+# Edit config.sh with your settings
+
+# 2. Initialize repositories
+./init.sh
+
+# 3. Run the pipeline
+poetry run ./update.sh
+```
+
+Or run individual steps:
+
+```bash
+# Update only Mojang
+poetry run python -m meta.run.update_mojang
+
+# Generate only Forge
+poetry run python -m meta.run.generate_forge
+
+# Rebuild indices
+poetry run python -m meta.run.index
+```
+
+---
+
+## Environment Variables Reference
+
+| Variable | Default | Description |
+|---|---|---|
+| `META_CACHE_DIR` | `$CACHE_DIRECTORY` or `./caches` | HTTP cache directory |
+| `META_UPSTREAM_DIR` | `$STATE_DIRECTORY/upstream` or `./upstream` | Upstream git repo path |
+| `META_LAUNCHER_DIR` | `$STATE_DIRECTORY/metalauncher` or `./metalauncher` | Launcher git repo path |
+| `META_UPSTREAM_URL` | (none) | Git clone URL for upstream repo |
+| `META_LAUNCHER_URL` | (none) | Git clone URL for launcher repo |
+| `DEPLOY_TO_GIT` | `false` | Commit/push git repos after update |
+| `DEPLOY_TO_FOLDER` | `false` | rsync launcher to `DEPLOY_FOLDER` |
+| `DEPLOY_FOLDER` | `/app/public/v1` | Target directory for folder deployment |
+| `GIT_AUTHOR_NAME` | (none) | Git author name for commits |
+| `GIT_AUTHOR_EMAIL` | (none) | Git author email for commits |
+| `GIT_SSH_COMMAND` | (none) | SSH command for git push (deploy key) |
diff --git a/docs/handbook/meta/fabric-metadata.md b/docs/handbook/meta/fabric-metadata.md
new file mode 100644
index 0000000000..070fb9334d
--- /dev/null
+++ b/docs/handbook/meta/fabric-metadata.md
@@ -0,0 +1,323 @@
+# Meta — Fabric Metadata
+
+## Overview
+
+Fabric is a lightweight modding toolchain with a simpler metadata structure than Forge. Fabric has two components that Meta tracks:
+
+1. **Fabric Loader** — the mod loading framework itself
+2. **Intermediary Mappings** — obfuscation mappings that allow mods to work across Minecraft versions
+
+The processing is straightforward compared to Forge because Fabric publishes structured JSON metadata directly, with no need to download and extract installer JARs.
+
+---
+
+## Phase 1: Update — `update_fabric.py`
+
+### Fetching Component Metadata
+
+Fabric Meta v2 exposes version lists at:
+
+```python
+for component in ["intermediary", "loader"]:
+ index = get_json_file(
+ os.path.join(UPSTREAM_DIR, META_DIR, f"{component}.json"),
+ "https://meta.fabricmc.net/v2/versions/" + component,
+ )
+```
+
+This fetches two files:
+- `upstream/fabric/meta-v2/loader.json` — list of loader versions
+- `upstream/fabric/meta-v2/intermediary.json` — list of intermediary mappings
+
+Each entry contains:
+```json
+{
+ "separator": ".",
+ "build": 1,
+ "maven": "net.fabricmc:fabric-loader:0.16.9",
+ "version": "0.16.9",
+ "stable": true
+}
+```
+
+### JAR Timestamp Extraction
+
+For each loader and intermediary version, the updater determines the release timestamp. It tries an efficient HTTP HEAD first, falling back to downloading the JAR:
+
+```python
+def compute_jar_file(path, url):
+ try:
+ headers = head_file(url)
+ tstamp = datetime.strptime(headers["Last-Modified"], DATETIME_FORMAT_HTTP)
+ except requests.HTTPError:
+ print(f"Falling back to downloading jar for {url}")
+ jar_path = path + ".jar"
+ get_binary_file(jar_path, url)
+ tstamp = datetime.fromtimestamp(0)
+ with zipfile.ZipFile(jar_path) as jar:
+ allinfo = jar.infolist()
+ for info in allinfo:
+ tstamp_new = datetime(*info.date_time)
+ if tstamp_new > tstamp:
+ tstamp = tstamp_new
+
+ data = FabricJarInfo(release_time=tstamp)
+ data.write(path + ".json")
+```
+
+The `DATETIME_FORMAT_HTTP` is `"%a, %d %b %Y %H:%M:%S %Z"`.
+
+The result is saved as a `FabricJarInfo` JSON:
+
+```python
+class FabricJarInfo(MetaBase):
+ release_time: Optional[datetime] = Field(alias="releaseTime")
+```
+
+### Loader Installer JSON
+
+For each loader version, the updater downloads the installer JSON from Fabric's Maven:
+
+```python
+def get_json_file_concurrent(it):
+ maven_url = get_maven_url(it["maven"], "https://maven.fabricmc.net/", ".json")
+ get_json_file(
+ os.path.join(UPSTREAM_DIR, INSTALLER_INFO_DIR, f"{it['version']}.json"),
+ maven_url,
+ )
+```
+
+The `get_maven_url()` function constructs Maven repository URLs:
+
+```python
+def get_maven_url(maven_key, server, ext):
+ parts = maven_key.split(":", 3)
+ maven_ver_url = (
+ server + parts[0].replace(".", "/") + "/" + parts[1] + "/" + parts[2] + "/"
+ )
+ maven_url = maven_ver_url + parts[1] + "-" + parts[2] + ext
+ return maven_url
+```
+
+For `net.fabricmc:fabric-loader:0.16.9`, this produces:
+`https://maven.fabricmc.net/net/fabricmc/fabric-loader/0.16.9/fabric-loader-0.16.9.json`
+
+### Concurrency
+
+The Fabric updater uniquely uses `multiprocessing.Pool` (not `ThreadPoolExecutor`):
+
+```python
+with Pool(None) as pool:
+ deque(pool.imap_unordered(compute_jar_file_concurrent, index, 32), 0)
+```
+
+The `deque(..., 0)` pattern consumes the iterator without storing results, purely for side effects.
+
+---
+
+## Phase 2: Generate — `generate_fabric.py`
+
+### Processing Loader Versions
+
+```python
+def process_loader_version(entry) -> MetaVersion:
+ jar_info = load_jar_info(transform_maven_key(entry["maven"]))
+ installer_info = load_installer_info(entry["version"])
+
+ v = MetaVersion(
+ name="Fabric Loader",
+ uid="net.fabricmc.fabric-loader",
+ version=entry["version"]
+ )
+ v.release_time = jar_info.release_time
+ v.requires = [Dependency(uid="net.fabricmc.intermediary")]
+ v.order = 10
+ v.type = "release"
+```
+
+#### Main Class Resolution
+
+The loader installer info may specify the main class as either a string or a `FabricMainClasses` object:
+
+```python
+if isinstance(installer_info.main_class, FabricMainClasses):
+ v.main_class = installer_info.main_class.client
+else:
+ v.main_class = installer_info.main_class
+```
+
+The `FabricMainClasses` model:
+```python
+class FabricMainClasses(MetaBase):
+ client: Optional[str]
+ common: Optional[str]
+ server: Optional[str]
+```
+
+#### Library Assembly
+
+Loader libraries come from the installer's `common` and `client` sections, plus the loader itself:
+
+```python
+v.libraries = []
+v.libraries.extend(installer_info.libraries.common)
+v.libraries.extend(installer_info.libraries.client)
+loader_lib = Library(
+ name=GradleSpecifier.from_string(entry["maven"]),
+ url="https://maven.fabricmc.net",
+)
+v.libraries.append(loader_lib)
+```
+
+### Processing Intermediary Versions
+
+```python
+def process_intermediary_version(entry) -> MetaVersion:
+ jar_info = load_jar_info(transform_maven_key(entry["maven"]))
+
+ v = MetaVersion(
+ name="Intermediary Mappings",
+ uid="net.fabricmc.intermediary",
+ version=entry["version"],
+ )
+ v.release_time = jar_info.release_time
+ v.requires = [Dependency(uid="net.minecraft", equals=entry["version"])]
+ v.order = 11
+ v.type = "release"
+ v.volatile = True
+ intermediary_lib = Library(
+ name=GradleSpecifier.from_string(entry["maven"]),
+ url="https://maven.fabricmc.net",
+ )
+ v.libraries = [intermediary_lib]
+ return v
+```
+
+Key points:
+- Intermediary mappings are `volatile=True` — they may change between runs.
+- The `version` matches the Minecraft version (e.g., `1.21.5`).
+- The `requires` field pins the intermediary to an exact Minecraft version via `equals`.
+
+### Recommended Versions
+
+Fabric Meta has a `stable` field in its loader index. The **first** stable loader version is recommended:
+
+```python
+for entry in loader_version_index:
+ v = process_loader_version(entry)
+ if not recommended_loader_versions and entry["stable"]:
+ recommended_loader_versions.append(version)
+```
+
+All intermediary versions are recommended (since each maps to exactly one Minecraft version).
+
+### Package Metadata
+
+```python
+package = MetaPackage(uid=LOADER_COMPONENT, name="Fabric Loader")
+package.recommended = recommended_loader_versions
+package.description = "Fabric Loader is a tool to load Fabric-compatible mods in game environments."
+package.project_url = "https://fabricmc.net"
+package.authors = ["Fabric Developers"]
+
+package = MetaPackage(uid=INTERMEDIARY_COMPONENT, name="Intermediary Mappings")
+package.recommended = recommended_intermediary_versions
+package.description = "Intermediary mappings allow using Fabric Loader with mods for Minecraft in a more compatible manner."
+package.project_url = "https://fabricmc.net"
+package.authors = ["Fabric Developers"]
+```
+
+---
+
+## Data Models
+
+### `FabricInstallerDataV1`
+
+The installer JSON from Fabric's Maven:
+
+```python
+class FabricInstallerDataV1(MetaBase):
+ version: int
+ libraries: FabricInstallerLibraries
+ main_class: Optional[Union[str, FabricMainClasses]]
+ arguments: Optional[FabricInstallerArguments]
+ launchwrapper: Optional[FabricInstallerLaunchwrapper]
+
+class FabricInstallerLibraries(MetaBase):
+ client: Optional[List[Library]]
+ common: Optional[List[Library]]
+ server: Optional[List[Library]]
+
+class FabricInstallerArguments(MetaBase):
+ client: Optional[List[str]]
+ common: Optional[List[str]]
+ server: Optional[List[str]]
+```
+
+### `FabricJarInfo`
+
+```python
+class FabricJarInfo(MetaBase):
+ release_time: Optional[datetime] = Field(alias="releaseTime")
+```
+
+---
+
+## Constants
+
+| Constant | Value | Location |
+|---|---|---|
+| `LOADER_COMPONENT` | `"net.fabricmc.fabric-loader"` | `common/fabric.py` |
+| `INTERMEDIARY_COMPONENT` | `"net.fabricmc.intermediary"` | `common/fabric.py` |
+| `BASE_DIR` | `"fabric"` | `common/fabric.py` |
+| `META_DIR` | `"fabric/meta-v2"` | `common/fabric.py` |
+| `INSTALLER_INFO_DIR` | `"fabric/loader-installer-json"` | `common/fabric.py` |
+| `JARS_DIR` | `"fabric/jars"` | `common/fabric.py` |
+| `DATETIME_FORMAT_HTTP` | `"%a, %d %b %Y %H:%M:%S %Z"` | `common/fabric.py` |
+
+---
+
+## Component Dependency Chain
+
+```
+org.quiltmc.quilt-loader ──► net.fabricmc.intermediary ──► net.minecraft
+net.fabricmc.fabric-loader ──► net.fabricmc.intermediary ──► net.minecraft
+```
+
+Fabric Loader requires Intermediary Mappings, which require a specific Minecraft version.
+
+---
+
+## Output Structure
+
+```
+launcher/
+├── net.fabricmc.fabric-loader/
+│ ├── package.json
+│ ├── 0.16.9.json
+│ ├── 0.16.8.json
+│ └── ...
+└── net.fabricmc.intermediary/
+ ├── package.json
+ ├── 1.21.5.json
+ ├── 1.20.4.json
+ └── ...
+```
+
+---
+
+## Upstream Data Structure
+
+```
+upstream/fabric/
+├── meta-v2/
+│ ├── loader.json # Full loader version index
+│ └── intermediary.json # Full intermediary version index
+├── loader-installer-json/
+│ ├── 0.16.9.json # Installer JSON per loader version
+│ └── ...
+└── jars/
+ ├── net.fabricmc.fabric-loader.0.16.9.json # JAR timestamp info
+ ├── net.fabricmc.intermediary.1.21.5.json
+ └── ...
+```
diff --git a/docs/handbook/meta/forge-metadata.md b/docs/handbook/meta/forge-metadata.md
new file mode 100644
index 0000000000..93d9c11efa
--- /dev/null
+++ b/docs/handbook/meta/forge-metadata.md
@@ -0,0 +1,492 @@
+# Meta — Forge Metadata
+
+## Overview
+
+Forge is the oldest and most complex mod loader supported by Meta. The processing pipeline handles:
+
+- Multiple Forge distribution formats across different Minecraft versions (legacy JARs, installer profiles v1, installer profiles v2 with build system)
+- ForgeWrapper integration for modern Forge versions
+- FML library injection for legacy versions (1.3.2–1.5.2)
+- Installer JAR downloading, extraction, and caching
+- Library deduplication against Minecraft's own libraries
+
+---
+
+## Phase 1: Update — `update_forge.py`
+
+### Fetching the Version Index
+
+Forge publishes two key metadata files:
+
+```python
+# Maven metadata — maps MC versions to Forge version lists
+r = sess.get(
+ "https://files.minecraftforge.net/net/minecraftforge/forge/maven-metadata.json"
+)
+main_json = r.json() # dict: {"1.20.4": ["1.20.4-49.0.31", ...], ...}
+
+# Promotions — marks latest/recommended versions
+r = sess.get(
+ "https://files.minecraftforge.net/net/minecraftforge/forge/promotions_slim.json"
+)
+promotions_json = r.json() # {"promos": {"1.20.4-latest": "49.0.31", ...}}
+```
+
+### Building the Derived Index
+
+The updater reconstructs a comprehensive index from these fragments:
+
+```python
+new_index = DerivedForgeIndex()
+
+version_expression = re.compile(
+ r"^(?P<mc>[0-9a-zA-Z_\.]+)-(?P<ver>[0-9\.]+\.(?P<build>[0-9]+))(-(?P<branch>[a-zA-Z0-9\.]+))?$"
+)
+
+for mc_version, value in main_json.items():
+ for long_version in value:
+ match = version_expression.match(long_version)
+ files = get_single_forge_files_manifest(long_version)
+
+ entry = ForgeEntry(
+ long_version=long_version,
+ mc_version=mc_version,
+ version=match.group("ver"),
+ build=int(match.group("build")),
+ branch=match.group("branch"),
+ latest=False,
+ recommended=version in recommended_set,
+ files=files,
+ )
+ new_index.versions[long_version] = entry
+```
+
+### File Manifest Fetching
+
+For each version, the updater fetches a file manifest from Forge's Maven:
+
+```python
+def get_single_forge_files_manifest(longversion):
+ file_url = (
+ "https://files.minecraftforge.net/net/minecraftforge/forge/%s/meta.json"
+ % longversion
+ )
+ r = sess.get(file_url)
+ files_json = r.json()
+
+ for classifier, extensionObj in files_json.get("classifiers").items():
+ # Parse each file: installer, universal, changelog, etc.
+ file_obj = ForgeFile(
+ classifier=classifier, hash=processed_hash, extension=extension
+ )
+ ret_dict[classifier] = file_obj
+
+ return ret_dict
+```
+
+Each `ForgeFile` represents a downloadable artifact:
+
+```python
+class ForgeFile(MetaBase):
+ classifier: str # "installer", "universal", "client", "changelog"
+ hash: str # MD5 hash
+ extension: str # "jar", "zip", "txt"
+
+ def filename(self, long_version):
+ return "%s-%s-%s.%s" % ("forge", long_version, self.classifier, self.extension)
+
+ def url(self, long_version):
+ return "https://maven.minecraftforge.net/net/minecraftforge/forge/%s/%s" % (
+ long_version, self.filename(long_version),
+ )
+```
+
+### Installer JAR Processing
+
+For versions that use installers, the updater downloads the JAR and extracts two files:
+
+```python
+def process_forge_version(version, jar_path):
+ # Download if not cached
+ if not os.path.isfile(jar_path):
+ download_binary_file(sess, jar_path, version.url())
+
+ # Extract from ZIP
+ with zipfile.ZipFile(jar_path) as jar:
+ # Extract version.json (Minecraft version overlay)
+ with jar.open("version.json") as profile_zip_entry:
+ version_data = profile_zip_entry.read()
+ MojangVersion.parse_raw(version_data) # Validate
+ with open(version_file_path, "wb") as f:
+ f.write(version_data)
+
+ # Extract install_profile.json
+ with jar.open("install_profile.json") as profile_zip_entry:
+ install_profile_data = profile_zip_entry.read()
+ # Try both v1 and v2 formats
+ try:
+ ForgeInstallerProfile.parse_raw(install_profile_data)
+ except ValidationError:
+ ForgeInstallerProfileV2.parse_raw(install_profile_data)
+```
+
+### Installer Info Caching
+
+SHA-1, SHA-256, and size of each installer JAR are cached:
+
+```python
+installer_info = InstallerInfo()
+installer_info.sha1hash = file_hash(jar_path, hashlib.sha1)
+installer_info.sha256hash = file_hash(jar_path, hashlib.sha256)
+installer_info.size = os.path.getsize(jar_path)
+installer_info.write(installer_info_path)
+```
+
+### SHA-1 Verification
+
+Before processing, the updater checks if the local JAR matches the remote checksum:
+
+```python
+fileSha1 = get_file_sha1_from_file(jar_path, sha1_file)
+rfile = sess.get(version.url() + ".sha1")
+new_sha1 = rfile.text.strip()
+if fileSha1 != new_sha1:
+ remove_files([jar_path, profile_path, installer_info_path, sha1_file])
+```
+
+If the SHA-1 mismatch is detected, all cached artifacts are deleted and re-downloaded.
+
+### Legacy Version Info
+
+For pre-installer Forge versions (MC 1.1–1.5.2), the updater extracts release times from JAR file timestamps:
+
+```python
+tstamp = datetime.fromtimestamp(0)
+with zipfile.ZipFile(jar_path) as jar:
+ for info in jar.infolist():
+ tstamp_new = datetime(*info.date_time)
+ if tstamp_new > tstamp:
+ tstamp = tstamp_new
+legacy_info = ForgeLegacyInfo()
+legacy_info.release_time = tstamp
+legacy_info.sha1 = file_hash(jar_path, hashlib.sha1)
+```
+
+### Bad Versions
+
+Certain versions are blacklisted:
+
+```python
+BAD_VERSIONS = ["1.12.2-14.23.5.2851"]
+```
+
+---
+
+## Phase 2: Generate — `generate_forge.py`
+
+### ForgeVersion Post-Processing
+
+The raw `ForgeEntry` is converted into a `ForgeVersion` object that resolves download URLs:
+
+```python
+class ForgeVersion:
+ def __init__(self, entry: ForgeEntry):
+ self.build = entry.build
+ self.rawVersion = entry.version
+ self.mc_version = entry.mc_version
+ self.mc_version_sane = self.mc_version.replace("_pre", "-pre", 1)
+ self.long_version = "%s-%s" % (self.mc_version, self.rawVersion)
+ if self.branch is not None:
+ self.long_version += "-%s" % self.branch
+
+ for classifier, file in entry.files.items():
+ if classifier == "installer" and extension == "jar":
+ self.installer_filename = filename
+ self.installer_url = url
+ if (classifier == "universal" or classifier == "client") and ...:
+ self.universal_filename = filename
+ self.universal_url = url
+
+ def uses_installer(self):
+ if self.installer_url is None:
+ return False
+ if self.mc_version == "1.5.2":
+ return False
+ return True
+```
+
+### Library Deduplication
+
+Libraries already provided by Minecraft are filtered out:
+
+```python
+def should_ignore_artifact(libs: Collection[GradleSpecifier], match: GradleSpecifier):
+ for ver in libs:
+ if ver.group == match.group and ver.artifact == match.artifact and ver.classifier == match.classifier:
+ if ver.version == match.version:
+ return True # Exact match, ignore
+ elif pversion.parse(ver.version) > pversion.parse(match.version):
+ return True # Minecraft has newer version
+ else:
+ return False # Forge has newer, keep it
+ return False # Not in Minecraft, keep it
+```
+
+### Three Generation Paths
+
+The generator handles three distinct Forge eras:
+
+#### Path 1: Legacy (MC 1.1–1.5.2) — `version_from_legacy()`
+
+Pre-installer versions that inject a JAR mod:
+
+```python
+def version_from_legacy(info: ForgeLegacyInfo, version: ForgeVersion) -> MetaVersion:
+ v = MetaVersion(name="Forge", version=version.rawVersion, uid=FORGE_COMPONENT)
+ v.requires = [Dependency(uid=MINECRAFT_COMPONENT, equals=mc_version)]
+ v.release_time = info.release_time
+ v.order = 5
+
+ if fml_libs_for_version(mc_version):
+ v.additional_traits = ["legacyFML"]
+
+ main_mod = Library(
+ name=GradleSpecifier("net.minecraftforge", "forge", version.long_version, classifier)
+ )
+ main_mod.downloads = MojangLibraryDownloads()
+ main_mod.downloads.artifact = MojangArtifact(url=version.url(), sha1=info.sha1, size=info.size)
+ v.jar_mods = [main_mod]
+ return v
+```
+
+#### Path 2: Old Installer (MC 1.6–1.12.2) — `version_from_profile()` / `version_from_modernized_installer()`
+
+These versions have installer profiles with embedded version JSONs:
+
+```python
+def version_from_profile(profile: ForgeInstallerProfile, version: ForgeVersion) -> MetaVersion:
+ v = MetaVersion(name="Forge", version=version.rawVersion, uid=FORGE_COMPONENT)
+ v.main_class = profile.version_info.main_class
+ v.release_time = profile.version_info.time
+
+ # Extract tweaker classes from arguments
+ args = profile.version_info.minecraft_arguments
+ tweakers = []
+ expression = re.compile(r"--tweakClass ([a-zA-Z0-9.]+)")
+ match = expression.search(args)
+ while match is not None:
+ tweakers.append(match.group(1))
+ # ...
+ v.additional_tweakers = tweakers
+
+ # Filter libraries against Minecraft's library set
+ mc_filter = load_mc_version_filter(mc_version)
+ for forge_lib in profile.version_info.libraries:
+ if forge_lib.name.is_lwjgl() or should_ignore_artifact(mc_filter, forge_lib.name):
+ continue
+ # Rename minecraftforge → forge with universal classifier
+ # ...
+ v.libraries.append(overridden_lib)
+```
+
+#### Path 3: Modern Build System (MC 1.13+) — `version_from_build_system_installer()`
+
+Modern Forge uses a two-file installer system (`install_profile.json` + `version.json`). The launcher cannot run the installer's processors directly, so ForgeWrapper is injected:
+
+```python
+def version_from_build_system_installer(
+ installer: MojangVersion, profile: ForgeInstallerProfileV2, version: ForgeVersion
+) -> MetaVersion:
+ v = MetaVersion(name="Forge", version=version.rawVersion, uid=FORGE_COMPONENT)
+ v.main_class = "io.github.zekerzhayard.forgewrapper.installer.Main"
+
+ v.maven_files = []
+
+ # Add installer JAR as a Maven file
+ info = InstallerInfo.parse_file(...)
+ installer_lib = Library(
+ name=GradleSpecifier("net.minecraftforge", "forge", version.long_version, "installer")
+ )
+ installer_lib.downloads = MojangLibraryDownloads()
+ installer_lib.downloads.artifact = MojangArtifact(
+ url="https://maven.minecraftforge.net/%s" % installer_lib.name.path(),
+ sha1=info.sha1hash, size=info.size,
+ )
+ v.maven_files.append(installer_lib)
+
+ # Add profile libraries as Maven files
+ for forge_lib in profile.libraries:
+ if forge_lib.name.is_log4j():
+ continue
+ update_library_info(forge_lib)
+ v.maven_files.append(forge_lib)
+
+ # Add ForgeWrapper as runtime library
+ v.libraries = [FORGEWRAPPER_LIBRARY]
+
+ # Add installer's runtime libraries
+ for forge_lib in installer.libraries:
+ if forge_lib.name.is_log4j():
+ continue
+ v.libraries.append(forge_lib)
+
+ # Build Minecraft arguments
+ mc_args = "--username ${auth_player_name} --version ${version_name} ..."
+ for arg in installer.arguments.game:
+ mc_args += f" {arg}"
+ if "--fml.forgeGroup" not in installer.arguments.game:
+ mc_args += f" --fml.forgeGroup net.minecraftforge"
+ if "--fml.forgeVersion" not in installer.arguments.game:
+ mc_args += f" --fml.forgeVersion {version.rawVersion}"
+ if "--fml.mcVersion" not in installer.arguments.game:
+ mc_args += f" --fml.mcVersion {version.mc_version}"
+ v.minecraft_arguments = mc_args
+ return v
+```
+
+### Library Info Population
+
+For libraries that lack complete download metadata, the generator fetches it:
+
+```python
+def update_library_info(lib: Library):
+ if not lib.downloads:
+ lib.downloads = MojangLibraryDownloads()
+ if not lib.downloads.artifact:
+ url = lib.url or f"https://maven.minecraftforge.net/{lib.name.path()}"
+ lib.downloads.artifact = MojangArtifact(url=url, sha1=None, size=None)
+
+ art = lib.downloads.artifact
+ if art and art.url:
+ if not art.sha1:
+ r = sess.get(art.url + ".sha1")
+ if r.status_code == 200:
+ art.sha1 = r.text.strip()
+ if not art.size:
+ r = sess.head(art.url)
+ if r.status_code == 200 and 'Content-Length' in r.headers:
+ art.size = int(r.headers['Content-Length'])
+```
+
+### FML Libraries
+
+Legacy Forge versions (MC 1.3.2–1.5.2) require FML libraries. The `fml_libs_for_version()` function returns the exact set for each MC version:
+
+```python
+def fml_libs_for_version(mc_version: str) -> List[FMLLib]:
+ if mc_version == "1.3.2":
+ return [argo_2_25, guava_12_0_1, asm_all_4_0]
+ elif mc_version in ["1.4", "1.4.1", ..., "1.4.7"]:
+ return [argo_2_25, guava_12_0_1, asm_all_4_0, bcprov_jdk15on_147]
+ elif mc_version == "1.5":
+ return [argo_small_3_2, guava_14_0_rc3, asm_all_4_1,
+ bcprov_jdk15on_148, deobfuscation_data_1_5, scala_library]
+ # ...
+```
+
+### Recommended Versions
+
+Versions marked as `recommended` by Forge promotions are tracked:
+
+```python
+recommended_versions = []
+for key, entry in remote_versions.versions.items():
+ if entry.recommended:
+ recommended_versions.append(version.rawVersion)
+
+package = MetaPackage(uid=FORGE_COMPONENT, name="Forge",
+ project_url="https://www.minecraftforge.net/forum/")
+package.recommended = recommended_versions
+```
+
+---
+
+## Data Models
+
+### `ForgeEntry`
+
+```python
+class ForgeEntry(MetaBase):
+ long_version: str # "1.20.4-49.0.31"
+ mc_version: str # "1.20.4"
+ version: str # "49.0.31"
+ build: int # 31
+ branch: Optional[str]
+ latest: Optional[bool]
+ recommended: Optional[bool]
+ files: Optional[Dict[str, ForgeFile]]
+```
+
+### `DerivedForgeIndex`
+
+```python
+class DerivedForgeIndex(MetaBase):
+ versions: Dict[str, ForgeEntry]
+ by_mc_version: Dict[str, ForgeMCVersionInfo]
+```
+
+### `ForgeInstallerProfile` (v1)
+
+```python
+class ForgeInstallerProfile(MetaBase):
+ install: ForgeInstallerProfileInstallSection
+ version_info: ForgeVersionFile
+ optionals: Optional[List[ForgeOptional]]
+```
+
+### `ForgeInstallerProfileV2`
+
+```python
+class ForgeInstallerProfileV2(MetaBase):
+ spec: Optional[int]
+ profile: Optional[str]
+ version: Optional[str]
+ path: Optional[GradleSpecifier]
+ minecraft: Optional[str]
+ data: Optional[Dict[str, DataSpec]]
+ processors: Optional[List[ProcessorSpec]]
+ libraries: Optional[List[Library]]
+```
+
+### `InstallerInfo`
+
+```python
+class InstallerInfo(MetaBase):
+ sha1hash: Optional[str]
+ sha256hash: Optional[str]
+ size: Optional[int]
+```
+
+---
+
+## ForgeWrapper
+
+The `FORGEWRAPPER_LIBRARY` is defined in `common/forge.py`:
+
+```python
+FORGEWRAPPER_LIBRARY = make_launcher_library(
+ GradleSpecifier("io.github.zekerzhayard", "ForgeWrapper", "projt-2026-04-04"),
+ "4c4653d80409e7e968d3e3209196ffae778b7b4e",
+ 29731,
+)
+```
+
+This creates a `Library` object with:
+- Download URL: `https://files.projecttick.org/maven/io/github/zekerzhayard/ForgeWrapper/projt-2026-04-04/ForgeWrapper-projt-2026-04-04.jar`
+- SHA-1: `4c4653d80409e7e968d3e3209196ffae778b7b4e`
+- Size: 29731 bytes
+
+---
+
+## Output Structure
+
+```
+launcher/net.minecraftforge/
+├── package.json # name, recommended versions, project URL
+├── 49.0.31.json # Modern (build system installer)
+├── 36.2.39.json # Modern (build system installer)
+├── 14.23.5.2860.json # Old installer (profile v1)
+├── 10.13.4.1614.json # Modernized installer for legacy MC
+├── 7.8.1.740.json # Legacy JAR mod
+└── ...
+```
diff --git a/docs/handbook/meta/java-runtime-metadata.md b/docs/handbook/meta/java-runtime-metadata.md
new file mode 100644
index 0000000000..dc9ca8a5ea
--- /dev/null
+++ b/docs/handbook/meta/java-runtime-metadata.md
@@ -0,0 +1,546 @@
+# Meta — Java Runtime Metadata
+
+## Overview
+
+Meta aggregates Java runtime information from **four vendors** and produces unified metadata for the launcher. This allows the launcher to automatically download and manage Java installations across platforms and architectures.
+
+### Vendors and Components
+
+| Vendor | Component UID | API Source | JVM Implementation |
+|---|---|---|---|
+| Mojang | `net.minecraft.java` | `launchermeta.mojang.com` | HotSpot |
+| Adoptium (Eclipse Temurin) | `net.adoptium.java` | `api.adoptium.net` | HotSpot |
+| OpenJ9 (IBM Semeru) | `com.ibm.java` | `api.adoptopenjdk.net` | OpenJ9 |
+| Azul (Zulu) | `com.azul.java` | `api.azul.com` | HotSpot |
+
+Each vendor gets its own component UID and separate output files. Mojang's component (`net.minecraft.java`) is special: it is the primary component used by the launcher and is augmented with data from Adoptium and Azul for platforms that Mojang itself doesn't cover.
+
+---
+
+## Phase 1: Update — `update_java.py`
+
+### Adoptium (Eclipse Temurin)
+
+Fetches from the Adoptium v3 API:
+
+```python
+ADOPTIUM_API_BASE = "https://api.adoptium.net"
+ADOPTX_API_AVAILABLE_RELEASES = f"{{base_url}}/v3/info/available_releases"
+ADOPTX_API_FEATURE_RELEASES = (
+ f"{{base_url}}/v3/assets/feature_releases/{{feature_version}}/{{release_type}}"
+)
+```
+
+**Step 1**: Get available feature versions:
+```python
+r = sess.get(ADOPTX_API_AVAILABLE_RELEASES.format(base_url=ADOPTIUM_API_BASE))
+available = AdoptxAvailableReleases(**r.json())
+```
+
+**Step 2**: For each feature version, paginate through releases:
+```python
+for feature in available.available_releases:
+ page = 0
+ while True:
+ query = AdoptxAPIFeatureReleasesQuery(
+ image_type=AdoptxImageType.Jre,
+ page_size=page_size,
+ page=page,
+ jvm_impl=AdoptxJvmImpl.Hotspot,
+ vendor=AdoptxVendor.Eclipse,
+ )
+ api_call = adoptiumAPIFeatureReleasesUrl(feature, query=query)
+ r_rls = sess.get(api_call)
+ # ...
+ if len(r_rls.json()) < page_size:
+ break
+ page += 1
+```
+
+**Step 3**: Save as `AdoptxReleases` per feature version:
+```python
+releases = AdoptxReleases(__root__=releases_for_feature)
+releases.write(feature_file)
+```
+
+**Step 4**: Write filtered available releases:
+```python
+filtered_available_releases(available, present_adoptium_features).write(
+ available_releases_file
+)
+```
+
+The `filtered_available_releases()` function removes features that had no actual releases:
+
+```python
+def filtered_available_releases(
+ available: AdoptxAvailableReleases, present_features: list[int]
+) -> AdoptxAvailableReleases:
+ filtered_features = sorted(set(present_features))
+ filtered_lts = [
+ feature for feature in available.available_lts_releases
+ if feature in filtered_features
+ ]
+ # ...
+```
+
+### Retry Logic
+
+All vendor APIs use a 3-attempt retry pattern with linear backoff for server errors (5xx):
+
+```python
+for attempt in range(3):
+ r = sess.get(api_call)
+ if r.status_code >= 500:
+ if attempt < 2:
+ time.sleep(1 * (attempt + 1))
+ continue
+ else:
+ r.raise_for_status()
+ else:
+ r.raise_for_status()
+ break
+```
+
+### OpenJ9 (IBM Semeru)
+
+Uses the same API structure as Adoptium but with a different base URL and vendor:
+
+```python
+OPENJ9_API_BASE = "https://api.adoptopenjdk.net"
+```
+
+```python
+query = AdoptxAPIFeatureReleasesQuery(
+ image_type=AdoptxImageType.Jre,
+ jvm_impl=AdoptxJvmImpl.OpenJ9,
+ vendor=AdoptxVendor.Ibm,
+)
+api_call = openj9APIFeatureReleasesUrl(feature, query=query)
+```
+
+The model classes `AdoptxRelease`, `AdoptxBinary`, `AdoptxVersion` are shared between Adoptium and OpenJ9 (the "adoptx" prefix covers both).
+
+### Azul (Zulu)
+
+Uses a completely different API structure:
+
+```python
+AZUL_API_BASE = "https://api.azul.com/metadata/v1"
+AZUL_API_PACKAGES = f"{AZUL_API_BASE}/zulu/packages/"
+AZUL_API_PACKAGE_DETAIL = f"{AZUL_API_BASE}/zulu/packages/{{package_uuid}}"
+```
+
+**Step 1**: Paginate through Zulu package listings:
+```python
+query = AzulApiPackagesQuery(
+ archive_type=AzulArchiveType.Zip,
+ release_status=AzulReleaseStatus.Ga,
+ availability_types=[AzulAvailabilityType.CA],
+ java_package_type=AzulJavaPackageType.Jre,
+ javafx_bundled=False,
+ latest=True,
+ page=page,
+ page_size=page_size,
+)
+```
+
+**Step 2**: For each package, fetch detailed info (or use cached):
+```python
+pkg_file = os.path.join(UPSTREAM_DIR, AZUL_VERSIONS_DIR, f"{pkg.package_uuid}.json")
+if os.path.exists(pkg_file):
+ pkg_detail = ZuluPackageDetail.parse_file(pkg_file)
+else:
+ api_call = azulApiPackageDetailUrl(pkg.package_uuid)
+ r_pkg = sess.get(api_call)
+ pkg_detail = ZuluPackageDetail(**r_pkg.json())
+ pkg_detail.write(pkg_file)
+```
+
+---
+
+## Phase 2: Generate — `generate_java.py`
+
+### Architecture and OS Translation
+
+The generator normalizes vendor-specific OS and architecture names to a unified `JavaRuntimeOS` enum:
+
+```python
+MOJANG_OS_ARCHITECTURE_TRANSLATIONS = {
+ 64: "x64",
+ 32: "x86",
+ "x32": "x86",
+ "i386": "x86",
+ "aarch64": "arm64",
+ "x86_64": "x64",
+ "arm": "arm32",
+ "riscv64": "riscv64",
+}
+
+MOJANG_OS_TRANSLATIONS = {
+ "osx": "mac-os",
+ "mac": "mac-os",
+ "macos": "mac-os",
+}
+```
+
+`JavaRuntimeOS` combines OS and architecture into a single enum value like `linux-x64`, `windows-arm64`, `mac-os-arm64`.
+
+### Vendor-Specific Converters
+
+Each vendor has a dedicated conversion function that maps vendor data to the unified `JavaRuntimeMeta` model:
+
+#### Mojang Converter
+
+```python
+def mojang_runtime_to_java_runtime(
+ mojang_runtime: MojangJavaRuntime,
+ mojang_component: MojangJavaComponent,
+ runtime_os: JavaRuntimeOS,
+) -> JavaRuntimeMeta:
+ # Parse version strings like "8u422" or "17.0.12"
+ major, _, trail = mojang_runtime.version.name.partition("u")
+ # ...
+ return JavaRuntimeMeta(
+ name=mojang_component,
+ vendor="mojang",
+ url=mojang_runtime.manifest.url,
+ downloadType=JavaRuntimeDownloadType.Manifest,
+ packageType=JavaPackageType.Jre,
+ # ...
+ )
+```
+
+Key: Mojang uses `downloadType="manifest"` (a JSON manifest of individual files), while other vendors use `downloadType="archive"` (a ZIP/tar.gz download).
+
+#### Adoptium/OpenJ9 Converter
+
+```python
+def adoptx_release_binary_to_java_runtime(
+ rls: AdoptxRelease,
+ binary: AdoptxBinary,
+ runtime_os: JavaRuntimeOS,
+) -> JavaRuntimeMeta:
+ # ...
+ if rls.vendor == "eclipse":
+ rls_distribution = "temurin"
+ elif rls.vendor == "ibm":
+ rls_distribution = "semeru-open"
+
+ rls_name = f"{rls.vendor}_{rls_distribution}_{binary.image_type}{version}"
+ return JavaRuntimeMeta(
+ vendor=rls.vendor,
+ url=binary.package.link,
+ downloadType=JavaRuntimeDownloadType.Archive,
+ # ...
+ )
+```
+
+#### Azul Converter
+
+```python
+def azul_package_to_java_runtime(
+ pkg: ZuluPackageDetail, runtime_os: JavaRuntimeOS
+) -> JavaRuntimeMeta:
+ # ...
+ rls_name = f"azul_{pkg.product}_{pkg.java_package_type}{version}"
+ return JavaRuntimeMeta(
+ vendor="azul",
+ url=pkg.download_url,
+ downloadType=JavaRuntimeDownloadType.Archive,
+ # ...
+ )
+```
+
+### Mojang Java Component Mapping
+
+Mojang names Java runtime groups by Greek letters. The generator maps these to major versions:
+
+```python
+def mojang_component_to_major(mojang_component: MojangJavaComponent) -> int:
+ match mojang_component:
+ case MojangJavaComponent.JreLegacy: return 8
+ case MojangJavaComponent.Alpha: return 17
+ case MojangJavaComponent.Beta: return 17
+ case MojangJavaComponent.Gamma: return 17
+ case MojangJavaComponent.GammaSnapshot: return 17
+ case MojangJavaComponent.Exe: return 0
+ case MojangJavaComponent.Delta: return 21
+```
+
+### Extra Mojang Java (Platform Gap Filling)
+
+Mojang's Java manifest doesn't cover all platforms. The generator fills gaps using Adoptium and Azul data:
+
+```python
+def add_java_runtime(runtime: JavaRuntimeMeta, major: int):
+ javas[major].append(runtime)
+
+ # Track runtimes for platforms Mojang doesn't cover
+ if (
+ (runtime.runtime_os in [JavaRuntimeOS.MacOsArm64, JavaRuntimeOS.WindowsArm64]
+ and major == 8)
+ or (runtime.runtime_os in [
+ JavaRuntimeOS.WindowsArm32, JavaRuntimeOS.LinuxArm32,
+ JavaRuntimeOS.LinuxArm64,
+ ] and major in [8, 17, 21])
+ or (runtime.runtime_os in [JavaRuntimeOS.LinuxX86, JavaRuntimeOS.LinuxRiscv64]
+ and major in [17, 21, 25])
+ ):
+ extra_mojang_javas[major].append(runtime)
+```
+
+After processing all vendors, these extras are injected into `net.minecraft.java`:
+
+```python
+for java_os in [
+ JavaRuntimeOS.WindowsArm32,
+ JavaRuntimeOS.LinuxArm32,
+ JavaRuntimeOS.LinuxArm64,
+ JavaRuntimeOS.LinuxRiscv64,
+]:
+ for comp in [
+ MojangJavaComponent.JreLegacy,
+ MojangJavaComponent.Alpha,
+ MojangJavaComponent.Beta,
+ MojangJavaComponent.Gamma,
+ MojangJavaComponent.GammaSnapshot,
+ MojangJavaComponent.Delta,
+ ]:
+ runtime = get_mojang_extra_java(comp, java_os)
+ if runtime != None:
+ add_java_runtime(runtime, mojang_component_to_major(comp))
+```
+
+The `get_mojang_extra_java()` function prefers Adoptium over Azul, selects the latest version:
+
+```python
+def get_mojang_extra_java(
+ mojang_component: MojangJavaComponent, java_os: JavaRuntimeOS
+) -> JavaRuntimeMeta | None:
+ posible_javas = list(
+ filter(lambda x: x.runtime_os == java_os, extra_mojang_javas[java_major])
+ )
+ prefered_vendor = list(filter(lambda x: x.vendor != "azul", posible_javas))
+ if len(prefered_vendor) == 0:
+ prefered_vendor = posible_javas
+ prefered_vendor.sort(key=lambda x: x.version, reverse=True)
+ runtime = prefered_vendor[0]
+ runtime.name = mojang_component
+ return runtime
+```
+
+### Writing Output
+
+```python
+def writeJavas(javas: dict[int, list[JavaRuntimeMeta]], uid: str):
+ javas = dict(sorted(javas.items(), key=lambda item: item[0]))
+
+ for major, runtimes in javas.items():
+ version_file = os.path.join(LAUNCHER_DIR, uid, f"java{major}.json")
+ java_version = JavaRuntimeVersion(
+ name=f"Java {major}",
+ uid=uid,
+ version=f"java{major}",
+ releaseTime=timestamps.get(major),
+ runtimes=runtimes,
+ )
+ java_version.write(version_file)
+
+ package = MetaPackage(uid=uid, name="Java Runtimes", recommended=[])
+ package.write(os.path.join(LAUNCHER_DIR, uid, "package.json"))
+```
+
+`JavaRuntimeVersion` extends `MetaVersion` with a `runtimes: list[JavaRuntimeMeta]` field.
+
+Timestamps are derived from the **oldest** runtime in each major version to ensure monotonically increasing release times:
+
+```python
+releaseTime = reduce(
+ oldest_timestamp,
+ (runtime.release_time for runtime in runtimes),
+ None,
+)
+if prevDate is not None and releaseTime < prevDate:
+ releaseTime = prevDate + datetime.timedelta(seconds=1)
+```
+
+---
+
+## Key Data Models
+
+### `JavaRuntimeOS` (StrEnum)
+
+Platform identifiers combining OS and architecture:
+
+| Value | Platform |
+|---|---|
+| `mac-os-x64` | macOS Intel |
+| `mac-os-arm64` | macOS Apple Silicon |
+| `linux-x64` | Linux x86_64 |
+| `linux-x86` | Linux 32-bit |
+| `linux-arm64` | Linux AArch64 |
+| `linux-arm32` | Linux ARM 32-bit |
+| `linux-riscv64` | Linux RISC-V 64 |
+| `windows-x64` | Windows 64-bit |
+| `windows-x86` | Windows 32-bit |
+| `windows-arm64` | Windows ARM 64-bit |
+| `windows-arm32` | Windows ARM 32-bit |
+
+### `JavaVersionMeta`
+
+Structured Java version (supports comparison with `@total_ordering`):
+
+```python
+class JavaVersionMeta(MetaBase):
+ major: int
+ minor: int
+ security: int
+ build: Optional[int] = None
+ buildstr: Optional[str] = None
+ name: Optional[str] = None
+```
+
+Example: Java `17.0.12+7` → `major=17, minor=0, security=12, build=7`.
+
+### `JavaRuntimeMeta`
+
+The core runtime descriptor:
+
+```python
+class JavaRuntimeMeta(MetaBase):
+ name: str # e.g., "eclipse_temurin_jre17.0.12"
+ vendor: str # "mojang", "eclipse", "ibm", "azul"
+ url: str # Download URL
+ release_time: datetime
+ checksum: Optional[JavaChecksumMeta]
+ download_type: JavaRuntimeDownloadType # "manifest" or "archive"
+ package_type: JavaPackageType # "jre" or "jdk"
+ version: JavaVersionMeta
+ runtime_os: JavaRuntimeOS
+```
+
+### `JavaRuntimeVersion`
+
+Extends `MetaVersion` with a runtime list:
+
+```python
+class JavaRuntimeVersion(MetaVersion):
+ runtimes: list[JavaRuntimeMeta]
+```
+
+### `AdoptxRelease` / `AdoptxBinary`
+
+Shared models for Adoptium and OpenJ9:
+
+```python
+class AdoptxBinary(MetaBase):
+ os: str
+ architecture: AdoptxArchitecture
+ image_type: AdoptxImageType
+ package: Optional[AdoptxPackage]
+ jvm_impl: AdoptxJvmImpl
+ heap_size: AdoptxHeapSize
+
+class AdoptxRelease(MetaBase):
+ release_id: str = Field(alias="id")
+ timestamp: datetime
+ binaries: list[AdoptxBinary]
+ vendor: AdoptxVendor
+ version_data: AdoptxVersion
+```
+
+### `ZuluPackageDetail`
+
+Azul's detailed package info:
+
+```python
+class ZuluPackageDetail(MetaBase):
+ package_uuid: str
+ sha256_hash: Optional[str]
+ download_url: str
+ java_version: list[int]
+ java_package_type: AzulJavaPackageType
+ os: AzulOs
+ arch: AzulArch
+ hw_bitness: AzulHwBitness
+ archive_type: AzulArchiveType
+```
+
+---
+
+## Output Structure
+
+```
+launcher/
+├── net.minecraft.java/
+│ ├── package.json
+│ ├── java8.json
+│ ├── java17.json
+│ └── java21.json
+├── net.adoptium.java/
+│ ├── package.json
+│ ├── java8.json
+│ ├── java11.json
+│ ├── java17.json
+│ └── java21.json
+├── com.ibm.java/
+│ ├── package.json
+│ ├── java8.json
+│ └── java11.json
+└── com.azul.java/
+ ├── package.json
+ ├── java8.json
+ ├── java11.json
+ ├── java17.json
+ └── java21.json
+```
+
+---
+
+## Upstream Data Structure
+
+```
+upstream/java_runtime/
+├── adoptium/
+│ ├── available_releases.json
+│ └── versions/
+│ ├── java8.json
+│ ├── java11.json
+│ └── ...
+├── ibm/
+│ ├── available_releases.json
+│ └── versions/
+│ ├── java8.json
+│ └── ...
+└── azul/
+ ├── packages.json
+ └── versions/
+ ├── <package-uuid>.json
+ ├── java8.json
+ └── ...
+```
+
+---
+
+## Processing Pipeline Summary
+
+```
+Vendor APIs ──► update_java.py ──► upstream/java_runtime/
+ │
+ ▼
+ generate_java.py
+ │
+ ┌───────────────┼───────────────┐
+ ▼ ▼ ▼
+ net.adoptium.java com.ibm.java com.azul.java
+ │ │ │
+ └───── extra_mojang_javas ──────┘
+ │
+ ▼
+ net.minecraft.java
+ (augmented with third-party
+ runtimes for ARM & RISC-V)
+```
+
+The Mojang component is always processed **last** so it can pull in third-party runtimes for platforms Mojang doesn't natively support.
diff --git a/docs/handbook/meta/mojang-metadata.md b/docs/handbook/meta/mojang-metadata.md
new file mode 100644
index 0000000000..181f0adda4
--- /dev/null
+++ b/docs/handbook/meta/mojang-metadata.md
@@ -0,0 +1,480 @@
+# Meta — Mojang Metadata
+
+## Overview
+
+Mojang metadata processing is the foundation of the entire Meta pipeline. Every other component (Forge, Fabric, etc.) depends on the Minecraft version data produced here. The pipeline handles:
+
+1. The Mojang version manifest (`version_manifest_v2.json`)
+2. Individual version JSONs for every Minecraft release and snapshot
+3. Experimental snapshots (zip-packaged)
+4. Pre-launcher old snapshots
+5. Mojang's Java runtime manifest
+6. LWJGL library extraction and deduplication
+7. Log4j vulnerability patching
+8. Legacy version overrides
+
+---
+
+## Phase 1: Update — `update_mojang.py`
+
+### Version Manifest Fetching
+
+The updater starts by fetching Mojang's version manifest:
+
+```python
+r = sess.get("https://piston-meta.mojang.com/mc/game/version_manifest_v2.json")
+r.raise_for_status()
+remote_versions = MojangIndexWrap(MojangIndex(**r.json()))
+```
+
+The `MojangIndex` model maps the manifest structure:
+
+```python
+class MojangIndex(MetaBase):
+ latest: MojangLatestVersion # {"release": "1.21.5", "snapshot": "25w14a"}
+ versions: List[MojangIndexEntry]
+
+class MojangIndexEntry(MetaBase):
+ id: Optional[str] # "1.21.5"
+ release_time: Optional[datetime]
+ time: Optional[datetime]
+ type: Optional[str] # "release", "snapshot"
+ url: Optional[str] # URL to full version JSON
+ sha1: Optional[str]
+ compliance_level: Optional[int]
+```
+
+The `MojangIndexWrap` class provides a dict-based lookup:
+
+```python
+class MojangIndexWrap:
+ def __init__(self, index: MojangIndex):
+ self.index = index
+ self.latest = index.latest
+ self.versions = dict((x.id, x) for x in index.versions)
+```
+
+### Incremental Updates
+
+Only new or modified versions are fetched:
+
+```python
+if os.path.exists(version_manifest_path):
+ current_versions = MojangIndexWrap(MojangIndex.parse_file(version_manifest_path))
+ local_ids = set(current_versions.versions.keys())
+ pending_ids = remote_ids.difference(local_ids)
+
+ for x in local_ids:
+ remote_version = remote_versions.versions[x]
+ local_version = current_versions.versions[x]
+ if remote_version.time > local_version.time:
+ pending_ids.add(x)
+else:
+ pending_ids = remote_ids
+```
+
+A version is re-fetched if its `time` field changed (Mojang updates this when they modify a version).
+
+### Concurrent Version Downloads
+
+Individual version JSONs are downloaded in parallel:
+
+```python
+with concurrent.futures.ThreadPoolExecutor() as executor:
+ futures = [
+ executor.submit(fetch_version_concurrent, remote_versions, x)
+ for x in pending_ids
+ ]
+ for f in futures:
+ f.result()
+```
+
+Each version is saved to `upstream/mojang/versions/<id>.json`.
+
+### Experimental Snapshots
+
+Experimental snapshots (like combat test snapshots) are distributed as zip files with embedded JSON. Meta handles these via a static registry:
+
+```python
+if os.path.exists(STATIC_EXPERIMENTS_FILE):
+ experiments = ExperimentIndexWrap(
+ ExperimentIndex.parse_file(STATIC_EXPERIMENTS_FILE)
+ )
+ for x in experiment_ids:
+ version = experiments.versions[x]
+ if not os.path.isfile(experiment_path):
+ fetch_zipped_version(experiment_path, version.url)
+```
+
+The `fetch_zipped_version()` function downloads a zip, extracts the JSON, and marks the type as `"experiment"`:
+
+```python
+def fetch_zipped_version(path, url):
+ zip_path = f"{path}.zip"
+ download_binary_file(sess, zip_path, url)
+ with zipfile.ZipFile(zip_path) as z:
+ for info in z.infolist():
+ if info.filename.endswith(".json"):
+ version_json = json.load(z.open(info))
+ break
+ version_json["type"] = "experiment"
+ with open(path, "w", encoding="utf-8") as f:
+ json.dump(version_json, f, sort_keys=True, indent=4)
+```
+
+### Old Snapshots
+
+Pre-launcher snapshots that Mojang never published with proper manifests:
+
+```python
+def fetch_modified_version(path, version):
+ r = sess.get(version.url)
+ version_json = r.json()
+ version_json["releaseTime"] = version_json["releaseTime"] + "T00:00:00+02:00"
+ version_json["time"] = version_json["releaseTime"]
+ downloads = {
+ "client": {"url": version.jar, "sha1": version.sha1, "size": version.size}
+ }
+ version_json["downloads"] = downloads
+ version_json["type"] = "old_snapshot"
+```
+
+These versions have manually curated JAR URLs, SHA-1 hashes, and sizes stored in `mojang-minecraft-old-snapshots.json`.
+
+### Java Runtime Manifest
+
+Mojang provides a manifest of Java runtimes for different platforms at a fixed URL:
+
+```python
+MOJANG_JAVA_URL = "https://piston-meta.mojang.com/v1/products/java-runtime/2ec0cc96c44e5a76b9c8b7c39df7210883d12871/all.json"
+
+def update_javas():
+ r = sess.get(MOJANG_JAVA_URL)
+ remote_javas = JavaIndex(__root__=r.json())
+ remote_javas.write(java_manifest_path)
+```
+
+---
+
+## Phase 2: Generate — `generate_mojang.py`
+
+This is the most complex generator in the pipeline. It produces:
+
+- `net.minecraft/<version>.json` — for every Minecraft version
+- `org.lwjgl/<version>.json` — for LWJGL 2 versions
+- `org.lwjgl3/<version>.json` — for LWJGL 3 versions
+- `net.minecraft/package.json`, `org.lwjgl/package.json`, `org.lwjgl3/package.json`
+
+### Loading Static Data
+
+```python
+override_index = LegacyOverrideIndex.parse_file(STATIC_OVERRIDES_FILE)
+legacy_services = LegacyServices.parse_file(STATIC_LEGACY_SERVICES_FILE)
+library_patches = LibraryPatches.parse_file(LIBRARY_PATCHES_FILE)
+```
+
+### MojangVersion to MetaVersion Conversion
+
+Each raw Mojang version JSON is parsed into a `MojangVersion` and then converted:
+
+```python
+mojang_version = MojangVersion.parse_file(input_file)
+v = mojang_version.to_meta_version("Minecraft", MINECRAFT_COMPONENT, mojang_version.id)
+```
+
+The `to_meta_version()` method handles:
+
+1. **Client JAR**: Extracts the client download into a `main_jar` `Library`:
+ ```python
+ main_jar = Library(
+ name=GradleSpecifier("com.mojang", "minecraft", self.id, "client"),
+ downloads=MojangLibraryDownloads(artifact=artifact),
+ )
+ ```
+
+2. **Compliance level**: Maps `compliance_level=1` to the `XR:Initial` trait.
+
+3. **Java version**: Extracts `javaVersion.majorVersion` and `javaVersion.component`, populating `compatible_java_majors` and `compatible_java_name`.
+
+4. **Type mapping**: Converts `"pending"` to `"experiment"`.
+
+### LWJGL Extraction
+
+Libraries are classified as LWJGL if their `GradleSpecifier.is_lwjgl()` returns `True`:
+
+```python
+def is_lwjgl(self):
+ return self.group in (
+ "org.lwjgl",
+ "org.lwjgl.lwjgl",
+ "net.java.jinput",
+ "net.java.jutils",
+ )
+```
+
+LWJGL libraries are extracted into "buckets" keyed by their OS rules:
+
+```python
+def add_or_get_bucket(buckets, rules: Optional[MojangRules]) -> MetaVersion:
+ rule_hash = None
+ if rules:
+ rule_hash = hash(rules.json())
+ if rule_hash in buckets:
+ bucket = buckets[rule_hash]
+ else:
+ bucket = MetaVersion(name="LWJGL", version="undetermined", uid=LWJGL_COMPONENT)
+ bucket.type = "release"
+ buckets[rule_hash] = bucket
+ return bucket
+```
+
+### LWJGL Variant Deduplication
+
+Multiple Minecraft versions may ship identical LWJGL libraries. Each unique set is identified by SHA-1:
+
+```python
+def hash_lwjgl_version(lwjgl: MetaVersion):
+ lwjgl_copy = copy.deepcopy(lwjgl)
+ lwjgl_copy.release_time = None
+ return hashlib.sha1(lwjgl_copy.json().encode("utf-8", "strict")).hexdigest()
+```
+
+The hash excludes `release_time` so that identical library sets from different Minecraft versions produce the same hash.
+
+### Log4j Patching
+
+To mitigate CVE-2021-44228 (Log4Shell), all Log4j libraries are forcibly upgraded:
+
+```python
+def map_log4j_artifact(version):
+ x = pversion.parse(version)
+ if x <= pversion.parse("2.0"):
+ return "2.0-beta9-fixed", "https://files.projecttick.org/maven/%s"
+ if x <= pversion.parse("2.17.1"):
+ return "2.17.1", "https://repo1.maven.org/maven2/%s"
+ return None, None
+```
+
+Version `2.0-beta9-fixed` is a custom patched build hosted on the ProjT Maven. Version `2.17.1` is the official fixed release from Apache.
+
+Pre-computed hashes are stored for each artifact:
+
+```python
+LOG4J_HASHES = {
+ "2.0-beta9-fixed": {
+ "log4j-api": {"sha1": "b61eaf2e64d8b0277e188262a8b771bbfa1502b3", "size": 107347},
+ "log4j-core": {"sha1": "677991ea2d7426f76309a73739cecf609679492c", "size": 677588},
+ },
+ "2.17.1": {
+ "log4j-api": {"sha1": "d771af8e336e372fb5399c99edabe0919aeaf5b2", "size": 301872},
+ "log4j-core": {"sha1": "779f60f3844dadc3ef597976fcb1e5127b1f343d", "size": 1790452},
+ "log4j-slf4j18-impl": {"sha1": "ca499d751f4ddd8afb016ef698c30be0da1d09f7", "size": 21268},
+ },
+}
+```
+
+### Library Rules and Platform Filtering
+
+Mojang uses a rules system to specify platform-specific libraries:
+
+```python
+class OSRule(MetaBase):
+ name: str # "osx", "linux", "windows", "windows-arm64", etc.
+ version: Optional[str]
+
+class MojangRule(MetaBase):
+ action: str # "allow" or "disallow"
+ os: Optional[OSRule]
+```
+
+The generator filters macOS-only libraries (like older LWJGL builds that were OS-specific):
+
+```python
+def is_macos_only(rules: Optional[MojangRules]):
+ allows_osx = False
+ allows_all = False
+ if rules:
+ for rule in rules:
+ if rule.action == "allow" and rule.os and rule.os.name == "osx":
+ allows_osx = True
+ if rule.action == "allow" and not rule.os:
+ allows_all = True
+ if allows_osx and not allows_all:
+ return True
+ return False
+```
+
+### Argument Processing
+
+Modern Minecraft (1.13+) uses a structured argument format instead of a flat string. The generator flattens these back:
+
+```python
+def adapt_new_style_arguments(arguments):
+ foo = []
+ for arg in arguments.game:
+ if isinstance(arg, str):
+ if arg == "--clientId":
+ continue
+ if arg == "${clientid}":
+ continue
+ if arg == "--xuid":
+ continue
+ if arg == "${auth_xuid}":
+ continue
+ foo.append(arg)
+ else:
+ print("!!! Unrecognized structure in Minecraft game arguments:")
+ pprint(arg)
+ return " ".join(foo)
+```
+
+Some arguments like `--clientId` and `--xuid` are filtered out (they're Microsoft-account-specific and not used by the launcher).
+
+Feature flags in structured arguments are converted to traits:
+
+```python
+def adapt_new_style_arguments_to_traits(arguments):
+ foo = []
+ for arg in arguments.game:
+ if isinstance(arg, dict):
+ for rule in arg["rules"]:
+ for k, v in rule["features"].items():
+ if rule["action"] == "allow" and v and k in SUPPORTED_FEATURES:
+ foo.append(f"feature:{k}")
+ return foo
+```
+
+### Legacy Override System
+
+The `LegacyOverrideIndex` provides manual corrections for old Minecraft versions:
+
+```python
+class LegacyOverrideEntry(MetaBase):
+ main_class: Optional[str]
+ applet_class: Optional[str]
+ release_time: Optional[datetime]
+ additional_traits: Optional[List[str]]
+ additional_jvm_args: Optional[List[str]]
+
+ def apply_onto_meta_version(self, meta_version: MetaVersion, legacy: bool = True):
+ meta_version.main_class = self.main_class
+ meta_version.applet_class = self.applet_class
+ if self.release_time:
+ meta_version.release_time = self.release_time
+ if self.additional_traits:
+ if not meta_version.additional_traits:
+ meta_version.additional_traits = []
+ meta_version.additional_traits += self.additional_traits
+ if legacy:
+ meta_version.libraries = None # Remove all libraries for legacy
+```
+
+### Dependency Wiring
+
+Each Minecraft version declares a dependency on either LWJGL 2 or LWJGL 3:
+
+```python
+if is_lwjgl_3:
+ lwjgl_dependency = Dependency(uid=LWJGL3_COMPONENT)
+else:
+ lwjgl_dependency = Dependency(uid=LWJGL_COMPONENT)
+
+lwjgl_dependency.suggests = suggested_version
+v.requires = [lwjgl_dependency]
+```
+
+LWJGL 3 versions also get the `FirstThreadOnMacOS` trait:
+
+```python
+if is_lwjgl_3:
+ if not v.additional_traits:
+ v.additional_traits = []
+ v.additional_traits.append("FirstThreadOnMacOS")
+```
+
+### Package Metadata
+
+Finally, the generator produces `package.json` files:
+
+```python
+minecraft_package = MetaPackage(uid=MINECRAFT_COMPONENT, name="Minecraft")
+minecraft_package.recommended = [mojang_index.latest.release]
+minecraft_package.write(os.path.join(LAUNCHER_DIR, MINECRAFT_COMPONENT, "package.json"))
+```
+
+---
+
+## Library Patches
+
+The `mojang-library-patches.json` file contains patches applied to Minecraft libraries during generation:
+
+```python
+class LibraryPatch(MetaBase):
+ match: List[GradleSpecifier]
+ override: Optional[Library]
+ additionalLibraries: Optional[List[Library]]
+ patchAdditionalLibraries: bool = False
+
+ def applies(self, target: Library) -> bool:
+ return target.name in self.match
+```
+
+Use cases:
+- Adding ARM64 native libraries for platforms Mojang doesn't officially support
+- Replacing broken library URLs
+- Adding missing download metadata
+
+---
+
+## Supported OS Values
+
+The `OSRule.name` validator accepts:
+
+```python
+["osx", "linux", "windows", "windows-arm64", "osx-arm64",
+ "linux-arm64", "linux-arm32", "linux-riscv64"]
+```
+
+---
+
+## Output Structure
+
+After generation:
+
+```
+launcher/
+├── net.minecraft/
+│ ├── package.json # recommended: [latest release]
+│ ├── 1.21.5.json
+│ ├── 1.20.4.json
+│ ├── 24w14a.json # snapshot
+│ ├── 1.0.json # legacy
+│ └── ...
+├── org.lwjgl/
+│ ├── package.json
+│ ├── 2.9.0.json
+│ ├── 2.9.1.json
+│ └── 2.9.4-nightly-20150209.json
+└── org.lwjgl3/
+ ├── package.json
+ ├── 3.1.2.json
+ ├── 3.2.2.json
+ ├── 3.3.1.json
+ ├── 3.3.3.json
+ └── 3.4.1.json
+```
+
+---
+
+## Key Constants
+
+| Constant | Value | Location |
+|---|---|---|
+| `MINECRAFT_COMPONENT` | `"net.minecraft"` | `common/mojang.py` |
+| `LWJGL_COMPONENT` | `"org.lwjgl"` | `common/mojang.py` |
+| `LWJGL3_COMPONENT` | `"org.lwjgl3"` | `common/mojang.py` |
+| `SUPPORTED_LAUNCHER_VERSION` | `21` | `model/mojang.py` |
+| `SUPPORTED_COMPLIANCE_LEVEL` | `1` | `model/mojang.py` |
+| `DEFAULT_JAVA_MAJOR` | `8` | `model/mojang.py` |
+| `DEFAULT_JAVA_NAME` | `"jre-legacy"` | `model/mojang.py` |
+| `META_FORMAT_VERSION` | `1` | `model/__init__.py` |
diff --git a/docs/handbook/meta/neoforge-metadata.md b/docs/handbook/meta/neoforge-metadata.md
new file mode 100644
index 0000000000..63a45c36bb
--- /dev/null
+++ b/docs/handbook/meta/neoforge-metadata.md
@@ -0,0 +1,334 @@
+# Meta — NeoForge Metadata
+
+## Overview
+
+NeoForge is a fork of Forge that emerged in 2023. Its metadata pipeline closely mirrors Forge's, but with key differences in version numbering and Maven repository structure. NeoForge exclusively uses the build-system installer format (equivalent to Forge's v2 installer profile), so there are no legacy paths to handle.
+
+---
+
+## Phase 1: Update — `update_neoforge.py`
+
+### Fetching Version Lists
+
+NeoForge publishes versions under two Maven artifacts:
+
+```python
+# Legacy artifact (1.20.1 era, when NeoForge still used Forge's naming)
+r = sess.get(
+ "https://maven.neoforged.net/api/maven/versions/releases/net%2Fneoforged%2Fforge"
+)
+main_json = r.json()["versions"]
+
+# New artifact (post-1.20.1, NeoForge's own naming)
+r = sess.get(
+ "https://maven.neoforged.net/api/maven/versions/releases/net%2Fneoforged%2Fneoforge"
+)
+new_main_json = r.json()["versions"]
+
+main_json += new_main_json # Merge both lists
+```
+
+### Version Parsing
+
+Two regex patterns handle the two naming schemes:
+
+```python
+# Legacy format: "1.20.1-47.1.100"
+version_expression = re.compile(
+ r"^(?P<mc>[0-9a-zA-Z_\.]+)-(?P<ver>[0-9\.]+\.(?P<build>[0-9]+))(-(?P<branch>[a-zA-Z0-9\.]+))?$"
+)
+
+# New NeoForge format: "20.4.237" or "21.0.0-beta"
+neoforge_version_re = re.compile(
+ r"^(?P<mcminor>\d+)\.(?:(?P<mcpatch>\d+)|(?P<snapshot>[0-9a-z]+))\.(?P<number>\d+)(?:\.(?P<build>\d+))?(?:-(?P<tag>[0-9A-Za-z][0-9A-Za-z.+-]*))?$"
+)
+```
+
+For the new format, the Minecraft version is reconstructed from the NeoForge version number:
+
+```python
+if match_nf:
+ mc_version = match_nf.group("snapshot")
+ if not mc_version:
+ mc_version = f"1.{match_nf.group('mcminor')}"
+ if match_nf.group("mcpatch") != "0":
+ mc_version += f".{match_nf.group('mcpatch')}"
+ artifact = "neoforge"
+```
+
+### File Manifest from Maven API
+
+Unlike Forge which uses its own `meta.json`, NeoForge file manifests come from the Maven API:
+
+```python
+def get_single_forge_files_manifest(longversion, artifact: str):
+ file_url = (
+ f"https://maven.neoforged.net/api/maven/details/releases/net%2Fneoforged%2F{artifact}%2F"
+ + urllib.parse.quote(longversion)
+ )
+ r = sess.get(file_url)
+ files_json = r.json()
+
+ for file in files_json.get("files"):
+ name = file["name"]
+ prefix = f"{artifact}-{longversion}"
+ file_name = name[len(prefix):]
+ if file_name.startswith("."):
+ continue # Skip top-level extension files
+ classifier, ext = os.path.splitext(file_name)
+ if ext in [".md5", ".sha1", ".sha256", ".sha512"]:
+ continue # Skip checksum files
+
+ file_obj = NeoForgeFile(
+ artifact=artifact, classifier=classifier, extension=ext[1:]
+ )
+ ret_dict[classifier] = file_obj
+```
+
+### NeoForgeFile Model
+
+```python
+class NeoForgeFile(MetaBase):
+ artifact: str # "forge" or "neoforge"
+ classifier: str # "installer", "universal"
+ extension: str # "jar"
+
+ def filename(self, long_version):
+ return "%s-%s-%s.%s" % (
+ self.artifact, long_version, self.classifier, self.extension,
+ )
+
+ def url(self, long_version):
+ return "https://maven.neoforged.net/releases/net/neoforged/%s/%s/%s" % (
+ self.artifact, long_version, self.filename(long_version),
+ )
+```
+
+### Installer Processing
+
+The processing is virtually identical to Forge:
+
+```python
+def process_neoforge_version(key, entry):
+ version = NeoForgeVersion(entry)
+ if version.url() is None or not version.uses_installer():
+ return
+
+ jar_path = os.path.join(UPSTREAM_DIR, JARS_DIR, version.filename())
+
+ # SHA-1 verification, download, extract version.json and install_profile.json
+ with zipfile.ZipFile(jar_path) as jar:
+ with jar.open("version.json") as profile_zip_entry:
+ MojangVersion.parse_raw(version_data)
+ with jar.open("install_profile.json") as profile_zip_entry:
+ NeoForgeInstallerProfileV2.parse_raw(install_profile_data)
+
+ # Cache installer info
+ installer_info = InstallerInfo()
+ installer_info.sha1hash = file_hash(jar_path, hashlib.sha1)
+ installer_info.sha256hash = file_hash(jar_path, hashlib.sha256)
+ installer_info.size = os.path.getsize(jar_path)
+ installer_info.write(installer_info_path)
+```
+
+---
+
+## Phase 2: Generate — `generate_neoforge.py`
+
+### Single Generation Path
+
+Unlike Forge (which has three paths), NeoForge only uses the build-system installer path:
+
+```python
+def version_from_build_system_installer(
+ installer: MojangVersion,
+ profile: NeoForgeInstallerProfileV2,
+ version: NeoForgeVersion,
+) -> MetaVersion:
+ v = MetaVersion(name="NeoForge", version=version.rawVersion, uid=NEOFORGE_COMPONENT)
+ v.main_class = "io.github.zekerzhayard.forgewrapper.installer.Main"
+```
+
+### Library Handling
+
+Profile libraries go into `maven_files` (install-time downloads), and installer libraries plus ForgeWrapper go into `libraries` (runtime classpath):
+
+```python
+ v.maven_files = []
+
+ # Installer JAR as Maven file
+ installer_lib = Library(
+ name=GradleSpecifier("net.neoforged", version.artifact, version.long_version, "installer")
+ )
+ installer_lib.downloads = MojangLibraryDownloads()
+ installer_lib.downloads.artifact = MojangArtifact(
+ url="https://maven.neoforged.net/releases/%s" % installer_lib.name.path(),
+ sha1=info.sha1hash, size=info.size,
+ )
+ v.maven_files.append(installer_lib)
+
+ # Profile libraries (processor dependencies)
+ for forge_lib in profile.libraries:
+ if forge_lib.name.is_log4j():
+ continue
+ update_library_info(forge_lib)
+ v.maven_files.append(forge_lib)
+
+ # Runtime libraries
+ v.libraries = [FORGEWRAPPER_LIBRARY]
+ for forge_lib in installer.libraries:
+ if forge_lib.name.is_log4j():
+ continue
+ v.libraries.append(forge_lib)
+```
+
+### Library Info Fetching
+
+Same approach as Forge — fills in missing SHA-1 and size from Maven:
+
+```python
+def update_library_info(lib: Library):
+ if not lib.downloads:
+ lib.downloads = MojangLibraryDownloads()
+ if not lib.downloads.artifact:
+ url = lib.url or f"https://maven.neoforged.net/releases/{lib.name.path()}"
+ lib.downloads.artifact = MojangArtifact(url=url, sha1=None, size=None)
+
+ art = lib.downloads.artifact
+ if art and art.url:
+ if not art.sha1:
+ r = sess.get(art.url + ".sha1")
+ if r.status_code == 200:
+ art.sha1 = r.text.strip()
+ if not art.size:
+ r = sess.head(art.url)
+ if r.status_code == 200 and 'Content-Length' in r.headers:
+ art.size = int(r.headers['Content-Length'])
+```
+
+### Minecraft Version Dependency
+
+NeoForge extracts the Minecraft version from the installer profile's `minecraft` field:
+
+```python
+v.requires = [Dependency(uid=MINECRAFT_COMPONENT, equals=profile.minecraft)]
+
+# Skip if we don't have the corresponding Minecraft version
+if not os.path.isfile(
+ os.path.join(LAUNCHER_DIR, MINECRAFT_COMPONENT, f"{profile.minecraft}.json")
+):
+ eprint("Skipping %s with no corresponding Minecraft version %s" % (key, profile.minecraft))
+ continue
+```
+
+### Argument Construction
+
+```python
+mc_args = (
+ "--username ${auth_player_name} --version ${version_name} --gameDir ${game_directory} "
+ "--assetsDir ${assets_root} --assetIndex ${assets_index_name} --uuid ${auth_uuid} "
+ "--accessToken ${auth_access_token} --userType ${user_type} --versionType ${version_type}"
+)
+for arg in installer.arguments.game:
+ mc_args += f" {arg}"
+v.minecraft_arguments = mc_args
+```
+
+---
+
+## Data Models
+
+### `NeoForgeEntry`
+
+```python
+class NeoForgeEntry(MetaBase):
+ artifact: str # "forge" or "neoforge"
+ long_version: str # "1.20.1-47.1.100" or "20.4.237"
+ version: str # Short version: "47.1.100" or "237"
+ latest: Optional[bool]
+ recommended: Optional[bool]
+ files: Optional[Dict[str, NeoForgeFile]]
+```
+
+### `DerivedNeoForgeIndex`
+
+```python
+class DerivedNeoForgeIndex(MetaBase):
+ versions: Dict[str, NeoForgeEntry]
+```
+
+Note: Unlike Forge's `DerivedForgeIndex`, this does not have a `by_mc_version` mapping.
+
+### `NeoForgeVersion`
+
+Post-processed version with resolved download URLs:
+
+```python
+class NeoForgeVersion:
+ def __init__(self, entry: NeoForgeEntry):
+ self.artifact = entry.artifact
+ self.rawVersion = entry.version
+ if self.artifact == "neoforge":
+ self.rawVersion = entry.long_version
+
+ self.long_version = entry.long_version
+ for classifier, file in entry.files.items():
+ if classifier == "installer" and extension == "jar":
+ self.installer_filename = filename
+ self.installer_url = url
+```
+
+### `NeoForgeInstallerProfileV2`
+
+Same structure as Forge's v2 profile:
+
+```python
+class NeoForgeInstallerProfileV2(MetaBase):
+ spec: Optional[int]
+ profile: Optional[str]
+ version: Optional[str]
+ path: Optional[GradleSpecifier]
+ minecraft: Optional[str]
+ data: Optional[Dict[str, DataSpec]]
+ processors: Optional[List[ProcessorSpec]]
+ libraries: Optional[List[Library]]
+```
+
+---
+
+## Key Differences from Forge
+
+| Aspect | Forge | NeoForge |
+|---|---|---|
+| Maven URL | `maven.minecraftforge.net` | `maven.neoforged.net/releases` |
+| File manifest API | `meta.json` per version | Maven details API |
+| Artifacts | Always `forge` | `forge` (1.20.1) or `neoforge` (1.20.2+) |
+| Version format | `<mc>-<forge_ver>` | `<mc>-<forge_ver>` or `<mcminor>.<mcpatch>.<build>` |
+| Legacy support | Yes (MC 1.1–1.12.2) | No (MC 1.20.1+ only) |
+| Component UID | `net.minecraftforge` | `net.neoforged` |
+| Bad versions list | Yes | No |
+| ForgeWrapper | Yes | Yes (same library) |
+| Promotions/recommended | Yes | Not currently (`is_recommended = False`) |
+
+---
+
+## Output Structure
+
+```
+launcher/net.neoforged/
+├── package.json
+├── 21.4.38.json # New NeoForge format
+├── 20.4.237.json # New NeoForge format
+├── 47.1.100.json # Legacy Forge-style format (1.20.1)
+└── ...
+```
+
+---
+
+## Constants
+
+| Constant | Value | Location |
+|---|---|---|
+| `NEOFORGE_COMPONENT` | `"net.neoforged"` | `common/neoforge.py` |
+| `BASE_DIR` | `"neoforge"` | `common/neoforge.py` |
+| `FORGEWRAPPER_LIBRARY` | (shared with Forge) | `common/forge.py` |
diff --git a/docs/handbook/meta/overview.md b/docs/handbook/meta/overview.md
new file mode 100644
index 0000000000..032b19ad39
--- /dev/null
+++ b/docs/handbook/meta/overview.md
@@ -0,0 +1,386 @@
+# Meta — Overview
+
+## What is Meta?
+
+Meta is a Python-based metadata generation pipeline that produces the JSON files consumed by the ProjT Launcher (a fork of Prism Launcher). It fetches, processes, and transforms version information from multiple upstream sources — Mojang, Forge, NeoForge, Fabric, Quilt, LiteLoader, Adoptium, Azul, and others — into a unified, normalized format that the launcher can understand.
+
+The launcher does **not** talk to Mojang or mod-loader APIs directly at runtime. Instead, it reads pre-generated metadata hosted as static JSON files in a Git repository. Meta is the tool that keeps those files up to date.
+
+---
+
+## Why Does Meta Exist?
+
+Minecraft's ecosystem has a fragmented metadata landscape:
+
+| Source | API / Format | What it provides |
+|---|---|---|
+| Mojang | `piston-meta.mojang.com` | Vanilla version manifests, libraries, assets, Java runtimes |
+| Forge | `files.minecraftforge.net` | Installer JARs, promotions, maven metadata |
+| NeoForge | `maven.neoforged.net` | Installer JARs, version lists |
+| Fabric | `meta.fabricmc.net` | Loader versions, intermediary mappings |
+| Quilt | `meta.quiltmc.org` | Loader versions, hashed mappings |
+| LiteLoader | `dl.liteloader.com` | Artefact metadata |
+| Adoptium | `api.adoptium.net` | Eclipse Temurin JRE/JDK binaries |
+| IBM Semeru | `api.adoptopenjdk.net` | OpenJ9-based JRE/JDK binaries |
+| Azul | `api.azul.com` | Zulu JRE/JDK packages |
+
+Every source uses a different schema, different versioning conventions, and different distribution mechanisms. Meta normalizes all of these into a single `MetaVersion` / `MetaPackage` JSON schema that the launcher consumes through a flat-file index.
+
+---
+
+## High-Level Pipeline
+
+The pipeline has two major phases executed in sequence by `update.sh`:
+
+### Phase 1 — Update (Fetch + Store Upstream)
+
+Each upstream source has a dedicated `update_*` script in `meta/run/`. These scripts:
+
+1. Fetch the latest metadata from the upstream API.
+2. Download installer JARs when needed (Forge, NeoForge) and extract install profiles.
+3. Write raw upstream data into the `upstream/` Git repository.
+
+Scripts executed in Phase 1:
+
+```
+python -m meta.run.update_mojang
+python -m meta.run.update_forge
+python -m meta.run.update_neoforge
+python -m meta.run.update_fabric
+python -m meta.run.update_quilt
+python -m meta.run.update_liteloader
+python -m meta.run.update_java
+python -m meta.run.update_risugami
+python -m meta.run.update_stationloader
+python -m meta.run.update_optifine
+python -m meta.run.update_modloadermp
+```
+
+After all updaters finish, the upstream repo is committed and pushed (if `DEPLOY_TO_GIT=true`).
+
+### Phase 2 — Generate (Transform + Publish Launcher Metadata)
+
+Each `generate_*` script reads from the `upstream/` directory and writes normalized `MetaVersion` JSON into the `launcher/` (aka `metalauncher/`) directory:
+
+```
+python -m meta.run.generate_mojang
+python -m meta.run.generate_forge
+python -m meta.run.generate_neoforge
+python -m meta.run.generate_fabric
+python -m meta.run.generate_quilt
+python -m meta.run.generate_liteloader
+python -m meta.run.generate_java
+python -m meta.run.generate_risugami
+python -m meta.run.generate_stationloader
+python -m meta.run.generate_optifine
+python -m meta.run.generate_modloadermp
+python -m meta.run.index
+```
+
+The final `index` step walks all generated component directories and produces a master `index.json` listing every package and version with SHA-256 checksums.
+
+---
+
+## Project Identity
+
+| Field | Value |
+|---|---|
+| **Package name** | `meta` |
+| **Version** | `0.0.5-1` |
+| **License** | MS-PL |
+| **Python** | `>=3.10, <4.0` |
+| **Build system** | Poetry (`poetry-core`) |
+| **Repository** | `https://github.com/Project-Tick/meta` |
+| **User-Agent** | `ProjectTickMeta/1.0` |
+
+### Key Dependencies
+
+| Package | Version | Purpose |
+|---|---|---|
+| `pydantic` | `^1.10.13` | Data model validation and serialization |
+| `requests` | `^2.31.0` | HTTP client for upstream APIs |
+| `cachecontrol` | `^0.14.0` | HTTP response caching (disk-backed via `FileCache`) |
+| `filelock` | `^3.20.3` | File locking for concurrent operations |
+| `packaging` | `^25.0` | PEP 440 version parsing (used in Forge generation) |
+
+---
+
+## Entrypoints
+
+`pyproject.toml` registers CLI entrypoints via `[tool.poetry.scripts]`:
+
+```toml
+[tool.poetry.scripts]
+generateFabric = "meta.run.generate_fabric:main"
+generateForge = "meta.run.generate_forge:main"
+generateLiteloader = "meta.run.generate_liteloader:main"
+generateMojang = "meta.run.generate_mojang:main"
+generateNeoForge = "meta.run.generate_neoforge:main"
+generateQuilt = "meta.run.generate_quilt:main"
+generateJava = "meta.run.generate_java:main"
+updateFabric = "meta.run.update_fabric:main"
+updateForge = "meta.run.update_forge:main"
+updateLiteloader = "meta.run.update_liteloader:main"
+updateMojang = "meta.run.update_mojang:main"
+updateNeoForge = "meta.run.update_neoforge:main"
+updateQuilt = "meta.run.update_quilt:main"
+updateJava = "meta.run.update_java:main"
+index = "meta.run.index:main"
+```
+
+Each entrypoint invokes the `main()` function of its respective module. They can also be executed as Python modules (`python -m meta.run.update_mojang`), which is the approach `update.sh` uses.
+
+---
+
+## Directory Layout
+
+```
+meta/ # Project root
+├── pyproject.toml # Poetry project definition
+├── requirements.txt # Pinned pip dependencies
+├── flake.nix # Nix flake for reproducible builds
+├── garnix.yaml # CI build config (Garnix)
+├── renovate.json # Dependency update bot config
+├── config.example.sh # Example environment config
+├── config.sh # Active environment config (git-ignored)
+├── init.sh # Clone upstream/launcher repos
+├── update.sh # Main pipeline orchestrator
+│
+├── meta/ # Python package
+│ ├── __init__.py
+│ ├── common/ # Shared utilities & constants
+│ │ ├── __init__.py # Core helpers, session factory, path utils
+│ │ ├── http.py # download_binary_file()
+│ │ ├── mojang.py # Mojang path constants
+│ │ ├── forge.py # Forge path constants, ForgeWrapper lib
+│ │ ├── neoforge.py # NeoForge path constants
+│ │ ├── fabric.py # Fabric path constants
+│ │ ├── quilt.py # Quilt path constants, beacon disabling
+│ │ ├── java.py # Java runtime path constants
+│ │ ├── liteloader.py # LiteLoader constants
+│ │ ├── risugami.py # Risugami ModLoader constants
+│ │ ├── stationloader.py # Station Loader constants
+│ │ ├── optifine.py # OptiFine constants
+│ │ ├── modloadermp.py # ModLoaderMP constants
+│ │ └── mojang-*.json # Static override/patch data files
+│ │
+│ ├── model/ # Pydantic data models
+│ │ ├── __init__.py # GradleSpecifier, MetaBase, MetaVersion, Library, etc.
+│ │ ├── enum.py # StrEnum backport
+│ │ ├── mojang.py # MojangVersion, MojangIndex, LibraryPatches, etc.
+│ │ ├── forge.py # ForgeEntry, ForgeVersion, ForgeInstallerProfile, etc.
+│ │ ├── neoforge.py # NeoForgeEntry, NeoForgeVersion, etc.
+│ │ ├── fabric.py # FabricInstallerDataV1, FabricJarInfo
+│ │ ├── java.py # JavaRuntimeMeta, AdoptxRelease, ZuluPackage, etc.
+│ │ ├── liteloader.py # LiteloaderIndex, LiteloaderEntry
+│ │ └── index.py # MetaVersionIndex, MetaPackageIndex
+│ │
+│ └── run/ # Executable pipeline scripts
+│ ├── __init__.py
+│ ├── update_mojang.py # Fetch Mojang version manifests
+│ ├── update_forge.py # Fetch Forge installer JARs
+│ ├── update_neoforge.py # Fetch NeoForge installer JARs
+│ ├── update_fabric.py # Fetch Fabric loader & intermediary
+│ ├── update_quilt.py # Fetch Quilt loader
+│ ├── update_java.py # Fetch Adoptium/OpenJ9/Azul releases
+│ ├── update_liteloader.py
+│ ├── update_risugami.py
+│ ├── update_stationloader.py
+│ ├── update_optifine.py
+│ ├── update_modloadermp.py
+│ ├── generate_mojang.py # Transform Mojang → MetaVersion + LWJGL
+│ ├── generate_forge.py # Transform Forge → MetaVersion
+│ ├── generate_neoforge.py# Transform NeoForge → MetaVersion
+│ ├── generate_fabric.py # Transform Fabric → MetaVersion
+│ ├── generate_quilt.py # Transform Quilt → MetaVersion
+│ ├── generate_java.py # Transform Java runtimes → MetaVersion
+│ ├── generate_liteloader.py
+│ ├── generate_risugami.py
+│ ├── generate_stationloader.py
+│ ├── generate_optifine.py
+│ ├── generate_modloadermp.py
+│ └── index.py # Build master index.json
+│
+├── upstream/ # Git repo: raw upstream data (submodule)
+│ ├── mojang/
+│ ├── forge/
+│ ├── neoforge/
+│ ├── fabric/
+│ ├── quilt/
+│ ├── liteloader/
+│ ├── java_runtime/
+│ └── ...
+│
+├── launcher/ # Git repo: generated launcher metadata
+│ ├── index.json
+│ ├── net.minecraft/
+│ ├── org.lwjgl/
+│ ├── org.lwjgl3/
+│ ├── net.minecraftforge/
+│ ├── net.neoforged/
+│ ├── net.fabricmc.fabric-loader/
+│ ├── net.fabricmc.intermediary/
+│ ├── org.quiltmc.quilt-loader/
+│ ├── net.minecraft.java/
+│ ├── net.adoptium.java/
+│ ├── com.azul.java/
+│ ├── com.ibm.java/
+│ └── ...
+│
+├── cache/ # HTTP response cache (CacheControl)
+│ └── http_cache/
+│
+├── caches/ # Additional caches
+│ ├── forge_cache/
+│ └── http_cache/
+│
+└── public/ # Static deploy target (optional)
+```
+
+---
+
+## Component UIDs
+
+Every "component" the launcher manages has a unique identifier. The following UIDs are produced by Meta:
+
+| UID | Component | Generator |
+|---|---|---|
+| `net.minecraft` | Minecraft vanilla | `generate_mojang` |
+| `org.lwjgl` | LWJGL 2 | `generate_mojang` |
+| `org.lwjgl3` | LWJGL 3 | `generate_mojang` |
+| `net.minecraftforge` | Forge | `generate_forge` |
+| `net.neoforged` | NeoForge | `generate_neoforge` |
+| `net.fabricmc.fabric-loader` | Fabric Loader | `generate_fabric` |
+| `net.fabricmc.intermediary` | Intermediary Mappings | `generate_fabric` |
+| `org.quiltmc.quilt-loader` | Quilt Loader | `generate_quilt` |
+| `org.quiltmc.hashed` | Quilt Hashed Mappings | `generate_quilt` (if enabled) |
+| `com.mumfrey.liteloader` | LiteLoader | `generate_liteloader` |
+| `net.minecraft.java` | Mojang Java Runtimes | `generate_java` |
+| `net.adoptium.java` | Eclipse Temurin JREs | `generate_java` |
+| `com.ibm.java` | IBM Semeru Open JREs | `generate_java` |
+| `com.azul.java` | Azul Zulu JREs | `generate_java` |
+| `net.optifine` | OptiFine | `generate_optifine` |
+| `risugami` | Risugami ModLoader | `generate_risugami` |
+| `station-loader` | Station Loader | `generate_stationloader` |
+
+---
+
+## Environment Variables
+
+The pipeline is configured through shell environment variables, typically set in `config.sh`:
+
+| Variable | Default | Description |
+|---|---|---|
+| `META_CACHE_DIR` | `$CACHE_DIRECTORY` or `./caches` | HTTP and Forge cache directory |
+| `META_UPSTREAM_DIR` | `$STATE_DIRECTORY/upstream` or `./upstream` | Path to upstream data Git repo |
+| `META_LAUNCHER_DIR` | `$STATE_DIRECTORY/metalauncher` or `./launcher` | Path to launcher metadata Git repo |
+| `DEPLOY_TO_GIT` | `false` | Whether to commit and push changes |
+| `DEPLOY_TO_FOLDER` | `false` | Whether to copy output to a folder |
+| `DEPLOY_FOLDER` | `/app/public/v1` | Target folder for folder deployment |
+| `GIT_AUTHOR_NAME` | — | Git commit author name |
+| `GIT_AUTHOR_EMAIL` | — | Git commit author email |
+| `GIT_COMMITTER_NAME` | — | Git commit committer name |
+| `GIT_COMMITTER_EMAIL` | — | Git commit committer email |
+| `GIT_SSH_COMMAND` | — | Custom SSH command for Git push |
+
+---
+
+## The Meta Format
+
+All generated version files conform to `META_FORMAT_VERSION = 1`, defined in `meta/model/__init__.py`. The format is a superset of Mojang's own version JSON, extended with:
+
+- **Component dependencies** (`requires`, `conflicts`) — e.g., Forge requires a specific Minecraft version.
+- **Maven files** (`mavenFiles`) — additional JARs needed at install time (ForgeWrapper, installer JARs).
+- **Jar mods** (`jarMods`) — legacy mod injection mechanism.
+- **Traits** (`+traits`) — launcher behavior hints like `FirstThreadOnMacOS`, `legacyFML`, `legacyServices`.
+- **Tweakers** (`+tweakers`) — legacy Forge/LiteLoader tweaker classes.
+- **JVM args** (`+jvmArgs`) — additional JVM arguments (e.g., Quilt beacon disabling).
+- **Java agents** (`+agents`) — Java agent libraries for instrumentation.
+- **Java compatibility** (`compatibleJavaMajors`, `compatibleJavaName`) — which Java versions work.
+- **Ordering** (`order`) — controls component load order (Minecraft = -2, LWJGL = -1, Forge = 5, Fabric = 10).
+
+---
+
+## HTTP Caching
+
+All HTTP requests are routed through a `CacheControl`-wrapped `requests.Session` created by `default_session()`:
+
+```python
+def default_session():
+ cache = FileCache(os.path.join(cache_path(), "http_cache"))
+ sess = CacheControl(requests.Session(), cache)
+ sess.headers.update({"User-Agent": "ProjectTickMeta/1.0"})
+ return sess
+```
+
+This transparently caches responses to disk, respecting HTTP cache headers. The cache directory is controlled by `META_CACHE_DIR`.
+
+---
+
+## Concurrency
+
+Several update and generate scripts use `concurrent.futures.ThreadPoolExecutor` (or `multiprocessing.Pool` in the Fabric updater) for parallel downloads. For example, `update_mojang.py` fetches individual version JSONs concurrently, and `update_forge.py` processes installer JARs in parallel.
+
+---
+
+## Static Data Files
+
+The `meta/common/` directory contains several JSON files with manual overrides and patches:
+
+| File | Purpose |
+|---|---|
+| `mojang-minecraft-experiments.json` | Experimental snapshot URLs (zip-packaged versions) |
+| `mojang-minecraft-old-snapshots.json` | Pre-launcher old snapshot metadata |
+| `mojang-minecraft-legacy-override.json` | Main class, applet class, and trait overrides for legacy versions |
+| `mojang-minecraft-legacy-services.json` | List of versions needing the `legacyServices` trait |
+| `mojang-library-patches.json` | Library replacement/addition patches (e.g., adding ARM natives) |
+
+---
+
+## Relationship to the Launcher
+
+The launcher fetches `index.json` from the hosted metadata repository. This index contains SHA-256 hashes for every package's version index. The launcher then fetches individual version files as needed, verifying integrity via SHA-256.
+
+```
+index.json
+├── net.minecraft/index.json
+│ ├── 1.21.5.json
+│ ├── 1.20.4.json
+│ └── ...
+├── net.minecraftforge/index.json
+│ ├── 49.0.31.json
+│ └── ...
+├── net.adoptium.java/index.json
+│ ├── java21.json
+│ └── ...
+└── ...
+```
+
+Each version JSON is a self-contained `MetaVersion` document that the launcher uses to construct a launch configuration: libraries to download, main class to invoke, arguments to pass, Java version to require, etc.
+
+---
+
+## Security Considerations
+
+- **SHA-1 and SHA-256 verification**: Installer JARs are verified against remote SHA-1 checksums. Version index entries include SHA-256 hashes.
+- **URL normalization**: The `replace_old_launchermeta_url()` function rewrites deprecated `launchermeta.mojang.com` URLs to `piston-meta.mojang.com`.
+- **Log4j patching**: `generate_mojang.py` forcibly upgrades Log4j libraries to patched versions (2.0-beta9-fixed or 2.17.1) to mitigate CVE-2021-44228 (Log4Shell).
+- **Validation**: All parsed data passes through Pydantic model validation with strict type checking and custom validators.
+
+---
+
+## Nix Integration
+
+The project includes a `flake.nix` that provides:
+
+- A NixOS module (`services.blockgame-meta`) for scheduled execution as a systemd service.
+- Development shells with all Python dependencies.
+- Reproducible builds via Garnix CI.
+
+Supported systems: `x86_64-linux`, `aarch64-linux`.
+
+---
+
+## Summary
+
+Meta is the backbone of the ProjT Launcher's version management. It bridges the gap between dozens of upstream metadata sources and the launcher's unified component model. The two-phase update/generate pipeline ensures that raw upstream data is preserved (for auditability and incremental updates) while the launcher receives clean, normalized, integrity-verified metadata.
diff --git a/docs/handbook/meta/quilt-metadata.md b/docs/handbook/meta/quilt-metadata.md
new file mode 100644
index 0000000000..199f43f8a6
--- /dev/null
+++ b/docs/handbook/meta/quilt-metadata.md
@@ -0,0 +1,267 @@
+# Meta — Quilt Metadata
+
+## Overview
+
+Quilt is a fork of Fabric that reuses Fabric's Intermediary mappings but has its own loader and mod system. Meta tracks Quilt as two components:
+
+1. **Quilt Loader** — `org.quiltmc.quilt-loader`
+2. **Intermediary Mappings** — reuses Fabric's `net.fabricmc.intermediary`
+
+Quilt's metadata pipeline closely mirrors Fabric's, using nearly identical data models (`FabricInstallerDataV1`, `FabricJarInfo`), but with distinct quirks around beacon telemetry disabling and version recommendation logic.
+
+---
+
+## Phase 1: Update — `update_quilt.py`
+
+### Fetching Component Metadata
+
+Quilt Meta v3 provides version lists:
+
+```python
+for component in ["quilt-loader"]:
+ index = get_json_file(
+ os.path.join(UPSTREAM_DIR, META_DIR, f"{component}.json"),
+ f"https://meta.quiltmc.org/v3/versions/{component}",
+ )
+```
+
+Unlike Fabric which tracks two components, Quilt only fetches loader versions from Quilt Meta. Intermediary is shared with Fabric.
+
+### Hashed Mappings Decision
+
+```python
+USE_QUILT_MAPPINGS = False
+```
+
+When `USE_QUILT_MAPPINGS` is `False` (the current default), Quilt uses Fabric's Intermediary mappings rather than Quilt's own hashed mappings. This ensures broader mod compatibility.
+
+If `USE_QUILT_MAPPINGS` were `True`, the updater would also fetch:
+```python
+if USE_QUILT_MAPPINGS:
+ components.append("quilt-mappings")
+```
+
+And use `https://maven.quiltmc.org/repository/release/` as the Maven source.
+
+### JAR Timestamp Extraction
+
+Quilt **always downloads the full JAR** to extract timestamps, unlike Fabric which tries HEAD requests first:
+
+```python
+def compute_jar_file(path, url):
+ jar_path = path + ".jar"
+ get_binary_file(jar_path, url)
+ tstamp = datetime.fromtimestamp(0)
+ with zipfile.ZipFile(jar_path) as jar:
+ allinfo = jar.infolist()
+ for info in allinfo:
+ tstamp_new = datetime(*info.date_time)
+ if tstamp_new > tstamp:
+ tstamp = tstamp_new
+ data = FabricJarInfo(release_time=tstamp)
+ data.write(path + ".json")
+```
+
+This is because Quilt's Maven HEAD responses were historically unreliable for `Last-Modified` headers.
+
+### Loader Installer JSON
+
+```python
+def get_json_file_concurrent(it):
+ maven_url = get_maven_url(
+ it["maven"], "https://maven.quiltmc.org/repository/release/", ".json"
+ )
+ get_json_file(
+ os.path.join(UPSTREAM_DIR, INSTALLER_INFO_DIR, f"{it['version']}.json"),
+ maven_url,
+ )
+```
+
+### Concurrency Model
+
+Quilt uses `ThreadPoolExecutor` (not `multiprocessing.Pool` like Fabric):
+
+```python
+with ThreadPoolExecutor() as pool:
+ deque(pool.map(compute_jar_file_concurrent, index, chunksize=32), 0)
+```
+
+---
+
+## Phase 2: Generate — `generate_quilt.py`
+
+### Processing Loader Versions
+
+```python
+def process_loader_version(entry) -> MetaVersion:
+ jar_info = load_jar_info(transform_maven_key(entry["maven"]))
+ installer_info = load_installer_info(entry["version"])
+
+ v = MetaVersion(
+ name="Quilt Loader",
+ uid=LOADER_COMPONENT,
+ version=entry["version"],
+ )
+ v.release_time = jar_info.release_time
+ v.requires = [Dependency(uid=INTERMEDIARY_COMPONENT)]
+ v.order = 10
+ v.type = "release"
+```
+
+#### Main Class Handling
+
+Identical to Fabric — the main class can be a string or a `FabricMainClasses` object:
+
+```python
+if isinstance(installer_info.main_class, FabricMainClasses):
+ v.main_class = installer_info.main_class.client
+else:
+ v.main_class = installer_info.main_class
+```
+
+#### Library Assembly
+
+```python
+v.libraries = []
+v.libraries.extend(installer_info.libraries.common)
+v.libraries.extend(installer_info.libraries.client)
+loader_lib = Library(
+ name=GradleSpecifier.from_string(entry["maven"]),
+ url="https://maven.quiltmc.org/repository/release/",
+)
+v.libraries.append(loader_lib)
+```
+
+#### Beacon Telemetry Disabling
+
+A unique Quilt feature: certain loader versions have opt-out telemetry. Meta disables it:
+
+```python
+DISABLE_BEACON_VERSIONS = {
+ "0.17.0",
+ "0.17.1",
+ # ... etc
+}
+DISABLE_BEACON_ARG = "-Dloader.disable_beacon=true"
+```
+
+Applied during generation:
+
+```python
+if entry["version"] in DISABLE_BEACON_VERSIONS:
+ if v.additional_jvm_args is None:
+ v.additional_jvm_args = []
+ v.additional_jvm_args.append(DISABLE_BEACON_ARG)
+```
+
+### Processing Intermediary / Hashed Mappings
+
+When using Fabric's intermediary (default):
+
+```python
+INTERMEDIARY_COMPONENT = "net.fabricmc.intermediary"
+```
+
+This means Quilt Loader's `requires` field references `net.fabricmc.intermediary`, and the intermediary versions are handled entirely by Fabric's generate pipeline.
+
+If `USE_QUILT_MAPPINGS` were `True`, Quilt would generate its own hashed mappings component:
+
+```python
+if USE_QUILT_MAPPINGS:
+ INTERMEDIARY_COMPONENT = "org.quiltmc.hashed"
+```
+
+### Version Recommendation Logic
+
+Quilt uses a SemVer-based heuristic instead of Fabric's `stable` boolean:
+
+```python
+for entry in loader_version_index:
+ version = entry["version"]
+ # ...
+ if not recommended_loader_versions and "-" not in version:
+ recommended_loader_versions.append(version)
+```
+
+The `"-" not in version` check follows SemVer conventions: a version containing a hyphen (e.g., `0.18.0-beta.1`) is a pre-release. The first version **without** a hyphen becomes the recommended version.
+
+### Package Metadata
+
+```python
+package = MetaPackage(uid=LOADER_COMPONENT, name="Quilt Loader")
+package.recommended = recommended_loader_versions
+package.description = "Quilt Loader is a tool to load Quilt-compatible mods in game environments."
+package.project_url = "https://quiltmc.org"
+package.authors = ["Quilt Developers"]
+```
+
+---
+
+## Constants
+
+| Constant | Value | Location |
+|---|---|---|
+| `LOADER_COMPONENT` | `"org.quiltmc.quilt-loader"` | `common/quilt.py` |
+| `INTERMEDIARY_COMPONENT` | `"net.fabricmc.intermediary"` (default) | `common/quilt.py` |
+| `USE_QUILT_MAPPINGS` | `False` | `common/quilt.py` |
+| `BASE_DIR` | `"quilt"` | `common/quilt.py` |
+| `META_DIR` | `"quilt/meta-v3"` | `common/quilt.py` |
+| `INSTALLER_INFO_DIR` | `"quilt/loader-installer-json"` | `common/quilt.py` |
+| `JARS_DIR` | `"quilt/jars"` | `common/quilt.py` |
+| `DISABLE_BEACON_ARG` | `"-Dloader.disable_beacon=true"` | `common/quilt.py` |
+
+---
+
+## Differences from Fabric
+
+| Aspect | Fabric | Quilt |
+|---|---|---|
+| **Loader UID** | `net.fabricmc.fabric-loader` | `org.quiltmc.quilt-loader` |
+| **Meta API** | v2 (`meta.fabricmc.net/v2/`) | v3 (`meta.quiltmc.org/v3/`) |
+| **Maven** | `maven.fabricmc.net` | `maven.quiltmc.org/repository/release/` |
+| **JAR timestamps** | HEAD first, download fallback | Always download |
+| **Concurrency** | `multiprocessing.Pool` | `ThreadPoolExecutor` |
+| **Recommendation** | `stable` boolean field | SemVer heuristic (`-` = pre-release) |
+| **Beacon disable** | N/A | Injected JVM arg for specific versions |
+| **Mappings** | Own intermediary | Uses Fabric intermediary (configurable) |
+
+---
+
+## Component Dependency Chain
+
+```
+org.quiltmc.quilt-loader ──► net.fabricmc.intermediary ──► net.minecraft
+```
+
+Quilt Loader depends on Fabric Intermediary, which depends on Minecraft.
+
+---
+
+## Output Structure
+
+```
+launcher/
+└── org.quiltmc.quilt-loader/
+ ├── package.json
+ ├── 0.27.1.json
+ ├── 0.27.0.json
+ └── ...
+```
+
+Quilt does not generate its own intermediary package (it reuses Fabric's).
+
+---
+
+## Upstream Data Structure
+
+```
+upstream/quilt/
+├── meta-v3/
+│ └── quilt-loader.json # Full loader version index
+├── loader-installer-json/
+│ ├── 0.27.1.json # Installer JSON per loader version
+│ └── ...
+└── jars/
+ ├── org.quiltmc.quilt-loader.0.27.1.json # JAR timestamp info
+ └── ...
+```
diff --git a/docs/handbook/meta/setup.md b/docs/handbook/meta/setup.md
new file mode 100644
index 0000000000..32abbe9e1e
--- /dev/null
+++ b/docs/handbook/meta/setup.md
@@ -0,0 +1,480 @@
+# Meta — Setup Guide
+
+## Prerequisites
+
+| Requirement | Minimum Version | Notes |
+|---|---|---|
+| Python | 3.10+ | Required by type hint syntax (`list[int]`, `dict[str, Any]`, match statements) |
+| Poetry | 1.x+ | For dependency management (or use pip with `requirements.txt`) |
+| Git | 2.x | For upstream/launcher repo management |
+| Nix (optional) | 2.x with flakes | For reproducible NixOS deployment |
+
+---
+
+## Installation Methods
+
+### Method 1: Poetry (Recommended for Development)
+
+```bash
+cd meta/
+
+# Install Poetry if not present
+pip install poetry
+
+# Install all dependencies
+poetry install
+
+# Verify installation
+poetry run python -c "import meta; print('OK')"
+```
+
+Poetry reads `pyproject.toml` and installs:
+
+```toml
+[tool.poetry.dependencies]
+python = ">=3.10,<4.0"
+cachecontrol = "^0.14.0"
+requests = "^2.31.0"
+filelock = "^3.20.3"
+packaging = "^25.0"
+pydantic = "^1.10.13"
+```
+
+### Method 2: pip with requirements.txt
+
+```bash
+cd meta/
+
+# Create and activate a virtual environment
+python3 -m venv venv
+source venv/bin/activate
+
+# Install pinned dependencies
+pip install -r requirements.txt
+```
+
+The `requirements.txt` contains exact pinned versions:
+
+```
+beautifulsoup4==4.14.3
+CacheControl==0.14.2
+certifi==2025.11.12
+charset-normalizer==3.4.4
+filelock==3.20.1
+idna==3.11
+msgpack==1.1.2
+packaging==25.0
+pydantic==1.10.24
+requests==2.32.4
+soupsieve==2.8.1
+typing_extensions==4.15.0
+urllib3==2.6.3
+```
+
+### Method 3: Nix Flake (Reproducible)
+
+```bash
+cd meta/
+
+# Enter the development shell
+nix develop
+
+# Or build the package
+nix build
+```
+
+The `flake.nix` defines a complete reproducible environment with all dependencies pinned via `flake.lock`. Supported systems: `x86_64-linux` and `aarch64-linux`.
+
+---
+
+## Initial Repository Setup
+
+Meta requires two Git repositories for operation:
+
+1. **Upstream repository** — stores raw fetched data from upstream APIs
+2. **Launcher repository** — stores the generated launcher metadata
+
+### Using `init.sh`
+
+The `init.sh` script clones both repositories:
+
+```bash
+#!/usr/bin/env bash
+set -ex
+
+if [ -f config.sh ]; then
+ source config.sh
+fi
+
+export META_CACHE_DIR=${CACHE_DIRECTORY:-./caches}
+export META_UPSTREAM_DIR=${META_UPSTREAM_DIR:-${STATE_DIRECTORY:-.}/upstream}
+export META_LAUNCHER_DIR=${META_LAUNCHER_DIR:-${STATE_DIRECTORY:-.}/metalauncher}
+
+function init_repo {
+ if [ -d "$1" ]; then
+ return 0 # Already exists, skip
+ fi
+ if [ -z "$2" ]; then
+ echo "Can't initialize missing $1 directory. Please specify $3" >&2
+ return 1
+ fi
+ git clone "$2" "$1"
+}
+
+init_repo "$META_UPSTREAM_DIR" "$META_UPSTREAM_URL" "META_UPSTREAM_URL"
+init_repo "$META_LAUNCHER_DIR" "$META_LAUNCHER_URL" "META_LAUNCHER_URL"
+```
+
+For this to work, you need `META_UPSTREAM_URL` and `META_LAUNCHER_URL` set in your `config.sh`.
+
+### Manual Setup (Without Remote Repos)
+
+For local development/testing without pushing to remote:
+
+```bash
+cd meta/
+
+# Create local upstream directory
+mkdir -p upstream
+
+# Create local launcher directory
+mkdir -p launcher
+
+# Initialize as Git repos (optional, only needed if DEPLOY_TO_GIT=true)
+cd upstream && git init && cd ..
+cd launcher && git init && cd ..
+```
+
+---
+
+## Configuration
+
+### Creating `config.sh`
+
+Copy the example and customize:
+
+```bash
+cp config.example.sh config.sh
+```
+
+The example configuration:
+
+```bash
+export META_UPSTREAM_DIR=upstream
+export META_LAUNCHER_DIR=launcher
+export DEPLOY_TO_FOLDER=false
+export DEPLOY_FOLDER=/app/public/v1
+export DEPLOY_FOLDER_USER=http
+export DEPLOY_FOLDER_GROUP=http
+
+export DEPLOY_TO_GIT=true
+export GIT_AUTHOR_NAME="Herpington Derpson"
+export GIT_AUTHOR_EMAIL="herpderp@derpmail.com"
+export GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME"
+export GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL"
+export GIT_SSH_COMMAND="ssh -i ${BASEDIR}/config/deploy.key"
+```
+
+### Configuration Variables Reference
+
+| Variable | Required | Default | Description |
+|---|---|---|---|
+| `META_UPSTREAM_DIR` | Yes | `upstream` | Path to the upstream data repository |
+| `META_LAUNCHER_DIR` | Yes | `launcher` | Path to the launcher metadata repository |
+| `META_CACHE_DIR` | No | `./caches` | HTTP and Forge cache directory |
+| `DEPLOY_TO_GIT` | No | `false` | Commit and push changes after generation |
+| `DEPLOY_TO_FOLDER` | No | `false` | Copy output to a static folder |
+| `DEPLOY_FOLDER` | No | `/app/public/v1` | Target folder for deployment |
+| `DEPLOY_FOLDER_USER` | No | `http` | User ownership for deployed files |
+| `DEPLOY_FOLDER_GROUP` | No | `http` | Group ownership for deployed files |
+| `GIT_AUTHOR_NAME` | If deploying | — | Git commit author name |
+| `GIT_AUTHOR_EMAIL` | If deploying | — | Git commit author email |
+| `GIT_COMMITTER_NAME` | If deploying | — | Git committer name |
+| `GIT_COMMITTER_EMAIL` | If deploying | — | Git committer email |
+| `GIT_SSH_COMMAND` | If using SSH | — | Custom SSH command for push authentication |
+
+### For Local Development
+
+Minimal `config.sh` for local testing without remote deployment:
+
+```bash
+export META_UPSTREAM_DIR=upstream
+export META_LAUNCHER_DIR=launcher
+export DEPLOY_TO_GIT=false
+```
+
+---
+
+## Running the Pipeline
+
+### Full Pipeline
+
+```bash
+cd meta/
+
+# Source config
+source config.sh
+
+# Initialize repos (first time only)
+bash init.sh
+
+# Run the full pipeline
+bash update.sh
+```
+
+### Individual Steps
+
+#### Update (fetch upstream data)
+
+```bash
+# All updaters
+python -m meta.run.update_mojang
+python -m meta.run.update_forge
+python -m meta.run.update_neoforge
+python -m meta.run.update_fabric
+python -m meta.run.update_quilt
+python -m meta.run.update_liteloader
+python -m meta.run.update_java
+python -m meta.run.update_risugami
+python -m meta.run.update_stationloader
+python -m meta.run.update_optifine
+python -m meta.run.update_modloadermp
+```
+
+#### Generate (transform to launcher format)
+
+```bash
+# All generators
+python -m meta.run.generate_mojang
+python -m meta.run.generate_forge
+python -m meta.run.generate_neoforge
+python -m meta.run.generate_fabric
+python -m meta.run.generate_quilt
+python -m meta.run.generate_liteloader
+python -m meta.run.generate_java
+python -m meta.run.generate_risugami
+python -m meta.run.generate_stationloader
+python -m meta.run.generate_optifine
+python -m meta.run.generate_modloadermp
+```
+
+#### Build the index
+
+```bash
+python -m meta.run.index
+```
+
+### Using Poetry Entrypoints
+
+If installed via Poetry, use the registered script names:
+
+```bash
+poetry run updateMojang
+poetry run updateForge
+poetry run generateMojang
+poetry run generateForge
+poetry run index
+```
+
+---
+
+## Directory Structure After First Run
+
+After a successful pipeline execution, the workspace looks like:
+
+```
+meta/
+├── upstream/ # Populated by update_* scripts
+│ ├── mojang/
+│ │ ├── version_manifest_v2.json
+│ │ ├── java_all.json
+│ │ └── versions/
+│ │ ├── 1.21.5.json
+│ │ ├── 1.20.4.json
+│ │ └── ...
+│ ├── forge/
+│ │ ├── derived_index.json
+│ │ ├── legacyinfo.json
+│ │ ├── maven-metadata.json
+│ │ ├── promotions_slim.json
+│ │ ├── jars/ # Downloaded installer JARs
+│ │ ├── installer_info/ # Installer SHA/size info
+│ │ ├── installer_manifests/# Extracted install_profile.json
+│ │ ├── version_manifests/ # Extracted version.json
+│ │ └── files_manifests/ # Maven classifier metadata
+│ ├── neoforge/ # Same structure as forge
+│ ├── fabric/
+│ │ ├── meta-v2/
+│ │ │ ├── loader.json
+│ │ │ └── intermediary.json
+│ │ ├── loader-installer-json/
+│ │ └── jars/
+│ ├── quilt/ # Same structure as fabric
+│ ├── java_runtime/
+│ │ ├── adoptium/
+│ │ │ ├── available_releases.json
+│ │ │ └── versions/
+│ │ ├── ibm/
+│ │ └── azul/
+│ └── ...
+│
+├── launcher/ # Populated by generate_* and index
+│ ├── index.json # Master package index
+│ ├── net.minecraft/
+│ │ ├── package.json
+│ │ ├── index.json
+│ │ ├── 1.21.5.json
+│ │ └── ...
+│ ├── org.lwjgl3/
+│ ├── net.minecraftforge/
+│ ├── net.neoforged/
+│ ├── net.fabricmc.fabric-loader/
+│ ├── net.fabricmc.intermediary/
+│ ├── org.quiltmc.quilt-loader/
+│ ├── net.minecraft.java/
+│ ├── net.adoptium.java/
+│ ├── com.azul.java/
+│ ├── com.ibm.java/
+│ └── ...
+│
+└── caches/
+ ├── forge_cache/
+ └── http_cache/ # CacheControl disk cache
+```
+
+---
+
+## Troubleshooting
+
+### Common Issues
+
+#### `ModuleNotFoundError: No module named 'meta'`
+
+The `meta` package isn't installed or not on `PYTHONPATH`. Solutions:
+- Install with `poetry install` or `pip install -e .`
+- Run from the project root: `cd meta/ && python -m meta.run.update_mojang`
+
+#### `FileNotFoundError` for upstream directories
+
+The upstream/launcher directories don't exist. Run `bash init.sh` first, or create them manually.
+
+#### `AssertionError` in Pydantic validation
+
+An upstream API changed its schema. Check:
+- Which version caused the error (look at stderr output)
+- Whether the upstream format changed (compare with `upstream/` cached data)
+- Whether `BAD_VERSIONS` needs updating
+
+#### `requests.exceptions.HTTPError: 429 Too Many Requests`
+
+API rate limiting. The HTTP cache (`CacheControl`) helps, but first runs hit APIs heavily. Wait and retry, or run individual updaters incrementally.
+
+#### Stale SHA-1 mismatches
+
+If an installer JAR was re-uploaded upstream with the same version but different content:
+```bash
+# Clear the cached JAR and its SHA file
+rm upstream/forge/jars/<version>.*
+rm upstream/forge/installer_info/<version>.json
+rm upstream/forge/installer_manifests/<version>.json
+```
+
+Then re-run the updater.
+
+### Logging
+
+- Standard output: progress messages and version processing.
+- Standard error (`eprint()`): warnings, skipped versions, download failures.
+- The pipeline does not use Python's `logging` module — all output goes to stdout/stderr directly.
+
+---
+
+## NixOS Deployment
+
+### Adding to a NixOS Configuration
+
+```nix
+{
+ inputs.prism-meta.url = "github:PrismLauncher/meta";
+}
+```
+
+```nix
+{inputs, ...}: {
+ imports = [inputs.prism-meta.nixosModules.default];
+ services.blockgame-meta = {
+ enable = true;
+ settings = {
+ DEPLOY_TO_GIT = "true";
+ GIT_AUTHOR_NAME = "Bot Name";
+ GIT_AUTHOR_EMAIL = "bot@example.com";
+ GIT_COMMITTER_NAME = "Bot Name";
+ GIT_COMMITTER_EMAIL = "bot@example.com";
+ };
+ };
+}
+```
+
+### Managing the Service
+
+```bash
+# Trigger a manual run
+systemctl start blockgame-meta.service
+
+# Monitor logs
+journalctl -fu blockgame-meta.service
+
+# Check status
+systemctl status blockgame-meta.service
+```
+
+---
+
+## CI/CD with Garnix
+
+The `garnix.yaml` configuration:
+
+```yaml
+builds:
+ include:
+ - "checks.x86_64-linux.*"
+ - "devShells.*.*"
+ - "packages.*.*"
+```
+
+This builds all checks, development shells, and packages on Garnix CI for every push.
+
+---
+
+## Development Workflow
+
+### Making Changes
+
+1. Edit source files in `meta/`.
+2. Test individual steps: `python -m meta.run.update_mojang` or `python -m meta.run.generate_mojang`.
+3. Inspect output in `launcher/` directory.
+4. Verify the index: `python -m meta.run.index`.
+
+### Testing with Local Data
+
+To test generation without fetching from upstream (if you already have `upstream/` populated):
+
+```bash
+# Skip update phase, just regenerate
+python -m meta.run.generate_mojang
+python -m meta.run.generate_forge
+# ... etc.
+python -m meta.run.index
+```
+
+### Adding a New Mod Loader
+
+To add support for a new mod loader:
+
+1. Create `meta/common/<loader>.py` with path constants and component UID.
+2. Create model classes in `meta/model/<loader>.py` inheriting from `MetaBase`.
+3. Create `meta/run/update_<loader>.py` to fetch upstream data.
+4. Create `meta/run/generate_<loader>.py` to produce `MetaVersion` files.
+5. Add entries to `update.sh` for both update and generate phases.
+6. Add entrypoints to `pyproject.toml` under `[tool.poetry.scripts]`.
+7. Add Git add patterns to `update.sh` for the new component directories.
diff --git a/docs/handbook/meta/update-pipeline.md b/docs/handbook/meta/update-pipeline.md
new file mode 100644
index 0000000000..fb94d9505e
--- /dev/null
+++ b/docs/handbook/meta/update-pipeline.md
@@ -0,0 +1,330 @@
+# Meta — Update Pipeline
+
+## Overview
+
+The Meta update pipeline follows a strict two-phase architecture orchestrated by `update.sh`:
+
+1. **Update Phase** — fetch and cache upstream vendor data into the `upstream/` git repository
+2. **Generate Phase** — transform cached data into launcher-compatible JSON in the `launcher/` git repository
+3. **Index Phase** — build version indices with SHA-256 hashes
+4. **Deploy Phase** — commit and push both repositories (or rsync to a folder)
+
+---
+
+## Orchestration: `update.sh`
+
+### Environment Configuration
+
+```bash
+export META_CACHE_DIR=${CACHE_DIRECTORY:-./caches}
+export META_UPSTREAM_DIR=${META_UPSTREAM_DIR:-${STATE_DIRECTORY:-.}/upstream}
+export META_LAUNCHER_DIR=${META_LAUNCHER_DIR:-${STATE_DIRECTORY:-.}/metalauncher}
+```
+
+These can be overridden via `config.sh` (sourced if present) or systemd environment variables (`CACHE_DIRECTORY`, `STATE_DIRECTORY`).
+
+### Execution Order
+
+**Update scripts** (populate `upstream/`):
+
+```bash
+upstream_git reset --hard HEAD || exit 1
+
+python -m meta.run.update_mojang || fail_in
+python -m meta.run.update_forge || fail_in
+python -m meta.run.update_neoforge || fail_in
+python -m meta.run.update_fabric || fail_in
+python -m meta.run.update_quilt || fail_in
+python -m meta.run.update_liteloader || fail_in
+python -m meta.run.update_java || fail_in
+python -m meta.run.update_risugami || fail_in
+python -m meta.run.update_stationloader || fail_in
+python -m meta.run.update_optifine || fail_in
+python -m meta.run.update_modloadermp || fail_in
+```
+
+**Generate scripts** (produce `launcher/`):
+
+```bash
+launcher_git reset --hard HEAD || exit 1
+
+python -m meta.run.generate_mojang || fail_out
+python -m meta.run.generate_forge || fail_out
+python -m meta.run.generate_neoforge || fail_out
+python -m meta.run.generate_fabric || fail_out
+python -m meta.run.generate_quilt || fail_out
+python -m meta.run.generate_liteloader || fail_out
+python -m meta.run.generate_java || fail_out
+python -m meta.run.generate_risugami || fail_in
+python -m meta.run.generate_stationloader || fail_in
+python -m meta.run.generate_optifine || fail_in
+python -m meta.run.generate_modloadermp || fail_in
+python -m meta.run.index || fail_out
+```
+
+### Error Handling
+
+Two failure functions ensure clean recovery:
+
+```bash
+function fail_in() {
+ upstream_git reset --hard HEAD
+ exit 1
+}
+
+function fail_out() {
+ launcher_git reset --hard HEAD
+ exit 1
+}
+```
+
+On any script failure, the corresponding git repo is reset to HEAD, discarding partial changes.
+
+---
+
+## Update Scripts
+
+### `update_mojang.py`
+
+**Sources**: Mojang's `piston-meta.mojang.com`
+
+**Steps**:
+1. Fetch `version_manifest_v2.json` — the master list of all Minecraft versions
+2. For each version entry, download the individual version JSON (concurrent)
+3. Fetch experimental snapshots from a bundled ZIP resource
+4. Load old snapshot metadata from bundled JSON
+5. Fetch Mojang's Java runtime manifest (`java_all.json`)
+
+**Concurrency**: `ThreadPoolExecutor` for version JSON downloads
+
+**Output**: `upstream/mojang/`
+
+### `update_forge.py`
+
+**Sources**: Forge Maven (`files.minecraftforge.net`)
+
+**Steps**:
+1. Fetch `maven-metadata.json` and `promotions_slim.json`
+2. Build `DerivedForgeIndex` from version metadata
+3. For each supported version, download installer JAR files
+4. Extract `install_profile.json` and `version.json` from installer JARs
+5. Cache `InstallerInfo` (SHA-1, size) for the launcher Maven
+6. Handle legacy Forge info (FML libs, pre-1.6 versions)
+
+**Key complexity**: Three generation eras (legacy jar mods, profile-based, build system)
+
+**Output**: `upstream/forge/`
+
+### `update_neoforge.py`
+
+**Sources**: NeoForge Maven (`maven.neoforged.net`)
+
+**Steps**:
+1. Fetch maven-metadata.xml from two artifact paths:
+ - `net/neoforged/forge/` (early NeoForge, branched from Forge)
+ - `net/neoforged/neoforge/` (independent NeoForge)
+2. Parse versions with two regex patterns
+3. Download installer JARs and extract profiles (same as Forge)
+
+**Output**: `upstream/neoforge/`
+
+### `update_fabric.py`
+
+**Sources**: Fabric Meta API v2 (`meta.fabricmc.net`)
+
+**Steps**:
+1. Fetch loader and intermediary version lists
+2. For each loader version, download installer JSON from Fabric Maven
+3. Extract JAR timestamps (HEAD requests, download fallback)
+
+**Concurrency**: `multiprocessing.Pool`
+
+**Output**: `upstream/fabric/`
+
+### `update_quilt.py`
+
+**Sources**: Quilt Meta API v3 (`meta.quiltmc.org`)
+
+**Steps**:
+1. Fetch quilt-loader version list
+2. Download installer JSONs from Quilt Maven
+3. Download full JARs for timestamp extraction (no HEAD optimization)
+
+**Concurrency**: `ThreadPoolExecutor`
+
+**Output**: `upstream/quilt/`
+
+### `update_java.py`
+
+**Sources**: Adoptium API (`api.adoptium.net`), OpenJ9 API (`api.adoptopenjdk.net`), Azul API (`api.azul.com`)
+
+**Steps**:
+1. Adoptium: paginate feature releases, save per-major-version
+2. OpenJ9: same API structure as Adoptium, different vendor/JVM-impl
+3. Azul: paginate package list, fetch individual package details
+
+**Retry logic**: 3 attempts with linear backoff for 5xx errors
+
+**Output**: `upstream/java_runtime/`
+
+---
+
+## Generate Scripts
+
+### Processing Pattern
+
+All generate scripts follow the same pattern:
+
+```python
+def main():
+ # 1. Load upstream data
+ index = DerivedIndex.parse_file(upstream_path(...))
+
+ # 2. Transform to MetaVersion
+ for entry in index:
+ version = process_version(entry)
+ version.write(launcher_path(COMPONENT, f"{version.version}.json"))
+
+ # 3. Write package metadata
+ package = MetaPackage(uid=COMPONENT, name="...", recommended=[...])
+ package.write(launcher_path(COMPONENT, "package.json"))
+```
+
+### `generate_mojang.py` — Most Complex
+
+Handles:
+- LWJGL extraction into `org.lwjgl` and `org.lwjgl3` components (variant hashing)
+- Log4j patching (CVE-2021-44228) via `+agents` injection
+- Split natives workaround for pre-1.19 versions
+- Library patching from static JSON overrides
+- Legacy argument processing
+- Compatible Java version detection
+
+### `generate_forge.py` — Three Eras
+
+| Era | MC Versions | Method | Key Class |
+|---|---|---|---|
+| Legacy | 1.1 – 1.5.2 | Jar mods + FML libs | `version_from_legacy()` |
+| Profile | 1.6 – 1.12.2 | Installer JSON | `version_from_profile()` / `version_from_modernized_installer()` |
+| Build System | 1.13+ | ForgeWrapper shim | `version_from_build_system_installer()` |
+
+### `generate_java.py` — Multi-Vendor
+
+Processes four vendors sequentially. Each is written as a separate component:
+1. Adoptium → `net.adoptium.java`
+2. OpenJ9 → `com.ibm.java`
+3. Azul → `com.azul.java`
+4. Mojang → `net.minecraft.java` (augmented with third-party runtimes)
+
+---
+
+## Index Generation: `index.py`
+
+```python
+def main():
+ for package_dir in sorted(os.listdir(LAUNCHER_DIR)):
+ package_path = os.path.join(LAUNCHER_DIR, package_dir, "package.json")
+ if not os.path.isfile(package_path):
+ continue
+
+ # Read package metadata
+ package = MetaPackage.parse_file(package_path)
+
+ # Build version index with SHA-256 hashes
+ version_entries = []
+ for version_file in version_files:
+ sha256 = file_hash(version_file, hashlib.sha256)
+ meta_version = MetaVersion.parse_file(version_file)
+ entry = MetaVersionIndexEntry.from_meta_version(meta_version, sha256)
+ version_entries.append(entry)
+
+ # Sort by release_time descending
+ version_entries.sort(key=lambda e: e.release_time, reverse=True)
+
+ # Write per-package index
+ version_index = MetaVersionIndex(uid=package.uid, versions=version_entries)
+ version_index.write(os.path.join(LAUNCHER_DIR, package_dir, "index.json"))
+
+ # Write master index
+ master_index = MetaPackageIndex(packages=package_entries)
+ master_index.write(os.path.join(LAUNCHER_DIR, "index.json"))
+```
+
+---
+
+## HTTP Caching
+
+All HTTP requests use `CacheControl` with disk-backed `FileCache`:
+
+```python
+def default_session():
+ cache = FileCache(os.path.join(cache_path(), "http_cache"))
+ sess = CacheControl(requests.Session(), cache)
+ sess.headers.update({"User-Agent": "ProjectTickMeta/1.0"})
+ return sess
+```
+
+This respects HTTP cache headers (`ETag`, `Last-Modified`, `Cache-Control`), reducing bandwidth on subsequent runs.
+
+---
+
+## Directory Layout
+
+```
+meta/
+├── upstream/ # Git repo — raw vendor data (Phase 1 output)
+│ ├── mojang/
+│ ├── forge/
+│ ├── neoforge/
+│ ├── fabric/
+│ ├── quilt/
+│ ├── java_runtime/
+│ └── ...
+├── metalauncher/ # Git repo — launcher-ready JSON (Phase 2 output)
+│ ├── index.json # Master package index
+│ ├── net.minecraft/
+│ ├── org.lwjgl/
+│ ├── org.lwjgl3/
+│ ├── net.minecraftforge/
+│ ├── net.neoforged/
+│ ├── net.fabricmc.fabric-loader/
+│ ├── net.fabricmc.intermediary/
+│ ├── org.quiltmc.quilt-loader/
+│ ├── net.minecraft.java/
+│ ├── net.adoptium.java/
+│ ├── com.ibm.java/
+│ ├── com.azul.java/
+│ └── ...
+└── caches/ # HTTP cache directory
+ └── http_cache/
+```
+
+---
+
+## Pipeline Flow Diagram
+
+```
+┌────────────────────────────────────────────────────┐
+│ update.sh │
+│ │
+│ ┌──────────────────────────────────────────────┐ │
+│ │ Phase 1: UPDATE │ │
+│ │ update_mojang → update_forge → update_neo → │ │
+│ │ update_fabric → update_quilt → update_java │ │
+│ │ ↓ (writes to upstream/) │ │
+│ └──────────────────────────────────────────────┘ │
+│ ┌──────────────────────────────────────────────┐ │
+│ │ Git commit + push upstream/ (if changed) │ │
+│ └──────────────────────────────────────────────┘ │
+│ ┌──────────────────────────────────────────────┐ │
+│ │ Phase 2: GENERATE │ │
+│ │ gen_mojang → gen_forge → gen_neoforge → │ │
+│ │ gen_fabric → gen_quilt → gen_java → index │ │
+│ │ ↓ (writes to launcher/) │ │
+│ └──────────────────────────────────────────────┘ │
+│ ┌──────────────────────────────────────────────┐ │
+│ │ Git commit + push launcher/ (if changed) │ │
+│ │ — OR — rsync to DEPLOY_FOLDER │ │
+│ └──────────────────────────────────────────────┘ │
+└────────────────────────────────────────────────────┘
+```
diff --git a/docs/handbook/mnv/architecture.md b/docs/handbook/mnv/architecture.md
new file mode 100644
index 0000000000..9f5cfc876d
--- /dev/null
+++ b/docs/handbook/mnv/architecture.md
@@ -0,0 +1,549 @@
+# MNV — Architecture
+
+## High-Level Organisation
+
+The MNV source tree lives under `mnv/` inside the Project Tick monorepo.
+It follows a flat layout: the editor's core C sources and headers are in
+`mnv/src/`, build infrastructure at the project root, and runtime files under
+`mnv/runtime/`.
+
+```
+mnv/
+├── CMakeLists.txt # Primary CMake build
+├── CMakePresets.json # Named build presets
+├── Makefile # Legacy top-level Make wrapper
+├── configure # Autoconf-generated configure script
+├── src/ # All C source and headers
+│ ├── auto/ # Generated files (config.h, osdef.h, pathdef.c)
+│ ├── libvterm/ # Embedded terminal emulation library
+│ ├── xdiff/ # Embedded diff library
+│ ├── xxd/ # Hex-dump utility (separate binary)
+│ ├── proto/ # Function prototypes (*.pro files)
+│ ├── testdir/ # Script-driven test suite
+│ └── *.c / *.h # Editor source files
+├── runtime/ # Scripts, docs, syntax, ftplugins, …
+├── ci/ # CI helper scripts
+├── cmake/ # CMake template files
+├── nsis/ # Windows installer scripts
+├── pixmaps/ # Application icons
+├── tools/ # Developer tools
+└── lang/ # Source-level gettext catalogs
+```
+
+---
+
+## The Compilation Unit Model
+
+MNV is a single-binary C project. There is **one** executable target (`mnv`)
+comprising roughly 100 `.c` files compiled together with a shared set of
+headers. The primary header is `src/mnv.h`, which every `.c` file includes:
+
+```c
+#include "mnv.h"
+```
+
+`mnv.h` in turn pulls in the header cascade:
+
+```
+mnv.h
+ ├── protodef.h — helper macros for function prototypes
+ ├── feature.h — feature-level #defines (FEAT_TINY/NORMAL/HUGE)
+ ├── os_unix.h / os_win32.h / os_amiga.h — platform headers
+ ├── ascii.h — ASCII character constants
+ ├── keymap.h — terminal key-code mapping
+ ├── termdefs.h — terminal capability definitions
+ ├── macros.h — utility macros
+ ├── option.h — option flag definitions
+ ├── beval.h — balloon-eval declarations
+ ├── proto.h — master prototype include (pulls every src/proto/*.pro)
+ ├── structs.h — all major struct typedefs
+ ├── globals.h — EXTERN global variables
+ ├── errors.h — error message externs
+ └── gui.h — GUI struct and macro definitions
+```
+
+The `EXTERN` macro (defined in `globals.h`) is used to declare global variables
+in headers. `main.c` defines `#define EXTERN` before including `mnv.h`, which
+causes the globals to be **defined** there and merely **declared** everywhere
+else.
+
+```c
+// main.c
+#define EXTERN
+#include "mnv.h"
+```
+
+---
+
+## Core Subsystem Map
+
+Below is every `.c` file in `src/` grouped by subsystem, with a description of
+its role.
+
+### Entry Point
+
+| File | Role |
+|---|---|
+| `main.c` | Program entry. Contains `main()` (or `MNVMain()` on Windows). Parses command-line arguments via `command_line_scan()`, runs `common_init_1()` / `common_init_2()`, sources startup scripts, creates windows, and enters the main editing loop. Key functions: `mainerr()`, `early_arg_scan()`, `usage()`, `parse_command_name()`, `check_tty()`, `read_stdin()`, `create_windows()`, `edit_buffers()`, `exe_pre_commands()`, `exe_commands()`, `source_startup_scripts()`, `main_start_gui()`. |
+| `version.c` | Version number, patch list, build date. Included patches tracked in `included_patches[]`. |
+| `version.h` | Macros: `MNV_VERSION_MAJOR`, `MNV_VERSION_MINOR`, `MNV_VERSION_BUILD`, `MNV_VERSION_LONG`. |
+
+### Memory Management
+
+| File | Role |
+|---|---|
+| `alloc.c` | `malloc`/`realloc`/`free` wrappers: `alloc()`, `alloc_clear()`, `mnv_free()`, `mnv_realloc()`. Optional `MEM_PROFILE` support for tracking allocations. Global counters: `mem_allocs[]`, `mem_frees[]`, `mem_allocated`, `mem_freed`, `mem_peak`. |
+| `alloc.h` | Allocation function prototypes. |
+| `gc.c` | Garbage collection for MNV9 objects. |
+
+### Buffer and Window Management
+
+| File | Role |
+|---|---|
+| `buffer.c` | Buffer list management. Double-linked list of `buf_T` structs. States: never-loaded / not-loaded / hidden / normal. Functions: `buflist_add()`, `buflist_findname()`, `enter_buffer()`, `do_buffer()`, `free_buffer()`, `trigger_undo_ftplugin()`. |
+| `window.c` | Window split, close, resize, and navigation. Manages `win_T` linked list and `frame_T` layout tree. |
+| `tabpanel.c` | Tab page panel display logic. |
+
+### Text Storage
+
+| File | Role |
+|---|---|
+| `memline.c` | Memory line — the B-tree layer that stores buffer lines on disk in swap files and reads them back on demand. Operates on `memfile_T` structures. |
+| `memfile.c` | Block-level swap-file I/O. Manages memory-file pages. |
+
+### Editing Operations
+
+| File | Role |
+|---|---|
+| `edit.c` | Insert-mode handling — character insertion, completion triggers, abbreviations. |
+| `change.c` | Change tracking: `changed()`, `changed_bytes()`, notifies syntax and diff. |
+| `ops.c` | Operator commands: yank, delete, put, format, shift, filter, sort. |
+| `register.c` | Named and unnamed registers, clipboard register bridging. |
+| `undo.c` | Undo/redo tree. Persistent undo-file support. |
+| `indent.c` | Auto-indentation logic. |
+| `cindent.c` | C-language indentation engine (`:set cindent`). |
+| `textformat.c` | Text formatting (`:gq`), paragraph shaping. |
+| `textobject.c` | Text-object selection: `aw`, `iw`, `as`, `is`, `a"`, `i"`, etc. |
+| `textprop.c` | Text properties API for virtual text and diagnostic markers. |
+
+### Normal-Mode and Command-Line
+
+| File | Role |
+|---|---|
+| `normal.c` | The normal-mode command dispatcher. Maps keystrokes to `nv_*` handler functions. Uses `nv_cmds.h` and `nv_cmdidxs.h` index tables. |
+| `ex_docmd.c` | Ex-command dispatcher. Parses `:` commands (`:edit`, `:write`, `:quit`, …) using `ex_cmds.h` / `ex_cmdidxs.h` command tables. |
+| `ex_cmds.c` | Implementation of many ex commands. |
+| `ex_cmds2.c` | More ex command implementations (`:source`, `:runtime`, …). |
+| `ex_getln.c` | Command-line input — editing, history, completion. |
+| `ex_eval.c` | `:try`/`:catch`/`:finally`/`:throw` — exception handling. |
+| `cmdexpand.c` | Command-line completion engine (Tab expansion). |
+| `cmdhist.c` | Command-line history ring. |
+| `usercmd.c` | User-defined `:command` handling. |
+
+### Motion, Search, and Navigation
+
+| File | Role |
+|---|---|
+| `move.c` | Cursor motion — `j`, `k`, `w`, `b`, `CTRL-D`, scrolling. |
+| `search.c` | Pattern search (`/`, `?`, `n`, `N`, `:substitute`). |
+| `regexp.c` | Regex dispatch: chooses BT or NFA engine. |
+| `regexp_bt.c` | Backtracking regex engine. |
+| `regexp_nfa.c` | NFA-based regex engine. |
+| `mark.c` | Named marks, jump list, change list. |
+| `tag.c` | Tags file navigation (`:tag`, `CTRL-]`). |
+| `findfile.c` | `'path'`-based file search (`:find`, `gf`). |
+| `fold.c` | Code folding — six methods (manual, indent, expr, syntax, diff, marker). |
+| `match.c` | `:match` and `matchadd()` highlighting. |
+| `fuzzy.c` | Fuzzy matching for completion. |
+
+### Expression Evaluation (MNVscript)
+
+| File | Role |
+|---|---|
+| `eval.c` | Legacy MNVscript expression parser. Recursive-descent: `eval0()` → `eval9()`. `num_divide()`, `num_modulus()` with safe divide-by-zero handling. |
+| `evalbuffer.c` | Buffer-related evaluation functions. |
+| `evalfunc.c` | Built-in function implementations (`len()`, `map()`, `filter()`, …). |
+| `evalvars.c` | Variable management (`g:`, `b:`, `w:`, `t:`, `l:`, `s:`, `v:`). |
+| `evalwindow.c` | Window-related evaluation functions. |
+| `typval.c` | `typval_T` operations — the typed-value core of the evaluator. |
+| `dict.c` | Dictionary type implementation. |
+| `list.c` | List type implementation. |
+| `blob.c` | Blob (byte-array) type. |
+| `tuple.c` | Tuple type. |
+| `float.c` | Floating-point operations. |
+| `json.c` | JSON encode/decode. |
+| `strings.c` | String utility functions. |
+| `hashtab.c` | Hash table used for dictionaries and symbol tables. |
+
+### MNV9 Compiler and VM
+
+| File | Role |
+|---|---|
+| `mnv9script.c` | `:mnv9script`, `:import`, `:export` commands. `in_mnv9script()` detects MNV9 mode via `SCRIPT_VERSION_MNV9`. |
+| `mnv9compile.c` | Bytecode compiler: transforms MNV9 source into instruction sequences. |
+| `mnv9execute.c` | Bytecode virtual machine: executes compiled MNV9 functions. |
+| `mnv9expr.c` | MNV9 expression compilation — types, operators. |
+| `mnv9type.c` | MNV9 type system — type checking, inference, generics resolution. |
+| `mnv9instr.c` | Instruction definitions for the MNV9 VM. |
+| `mnv9cmds.c` | MNV9-specific commands: `def`, `enddef`, `class`, `enum`, etc. |
+| `mnv9class.c` | MNV9 class and object system — `class`, `interface`, `extends`. |
+| `mnv9generics.c` | Generic type support (`<T>`, `<K, V>`, etc.). |
+| `mnv9.h` | MNV9-specific type definitions and constants. |
+
+### Syntax and Highlighting
+
+| File | Role |
+|---|---|
+| `syntax.c` | Syntax highlighting engine. `syn_pattern` struct, region matching, syncing. |
+| `highlight.c` | Highlight group management, `:highlight` command, attribute resolution. |
+
+### Display and Drawing
+
+| File | Role |
+|---|---|
+| `drawline.c` | Renders a single screen line, handling line numbers, signs, folds, concealing, text properties. |
+| `drawscreen.c` | Top-level screen redraw orchestration — decides which lines need updating. |
+| `screen.c` | Screen buffer management — `ScreenLines[]`, `ScreenAttrs[]`, `ScreenCols[]`. |
+| `ui.c` | Abstract UI layer bridging terminal / GUI drawing. |
+| `term.c` | Terminal capability handling — termcap/terminfo, escape sequences. |
+| `popupmenu.c` | Insert-mode completion popup menu. |
+| `popupwin.c` | Floating popup windows (`:popup`, `popup_create()`). |
+| `sign.c` | Sign column management. |
+
+### File I/O
+
+| File | Role |
+|---|---|
+| `fileio.c` | Reading/writing files, handling encodings, line endings, encryption. |
+| `filepath.c` | File path manipulation — expansion, completion, path separators. |
+| `bufwrite.c` | Buffer-write logic split from `fileio.c`. |
+
+### Terminal Emulator
+
+| File | Role |
+|---|---|
+| `terminal.c` | `:terminal` implementation. `struct terminal_S` wraps a `VTerm` and connects it to a `job_T`. Three parts: generic code, MS-Windows (winpty/conpty), Unix (PTY). |
+| `libvterm/` | Embedded copy of libvterm — the VT100/xterm terminal emulation library. |
+
+### Job and Channel
+
+| File | Role |
+|---|---|
+| `channel.c` | Socket and pipe-based IPC. `channel_read()`, `channel_get_mode()`, `channel_part_send()`. Supports raw, JSON, JS, and NL (newline) modes. |
+| `job.c` | Process spawning and management for `job_start()`. |
+| `netbeans.c` | NetBeans IDE interface protocol handler (over channels). |
+
+### Clipboard and Selection
+
+| File | Role |
+|---|---|
+| `clipboard.c` | Cross-platform clipboard — Visual selection to/from system clipboard. Wayland clipboard integration via `wayland.h` macros. Provider abstraction (`clip_provider_is_available()`). |
+| `wayland.c` | Wayland display connection (`vwl_connection_T`), data-offer monitoring, focus-stealing clipboard. Functions prefixed `vwl_` for abstractions, `wayland_` for global connection. `vwl_connection_flush()`, `vwl_connection_dispatch()`. |
+| `wayland.h` | Wayland types: `vwl_connection_T`, `vwl_seat_S`, `vwl_data_protocol_T` enum. Protocol includes for ext-data-control, wlr-data-control, xdg-shell, primary-selection. |
+
+### GUI Subsystem
+
+| File | Role |
+|---|---|
+| `gui.c` | GUI core — `gui_start()`, `gui_attempt_start()`, scrollbar management, drawing dispatch. Holds global `gui_T gui`. |
+| `gui.h` | GUI macros (`TEXT_X`, `FILL_Y`, `X_2_COL`), scrollbar indices, orientation enums. |
+| `gui_gtk.c` | GTK 2/3 widget creation — toolbar, dialogs, find/replace. Callbacks: `entry_activate_cb()`, `entry_changed_cb()`, `find_replace_cb()`. |
+| `gui_gtk_f.c` / `gui_gtk_f.h` | GTK form widget — custom container for the editor area. |
+| `gui_gtk_x11.c` | GTK+X11 integration — display init, key translation, selection, DnD. |
+| `gui_motif.c` | Motif toolkit backend. |
+| `gui_x11.c` | Raw X11 backend (used by Motif). |
+| `gui_w32.c` | Win32 native GUI backend. |
+| `gui_haiku.cc` / `gui_haiku.h` | Haiku OS GUI backend. |
+| `gui_photon.c` | QNX Photon GUI backend. |
+| `gui_beval.c` | Balloon-eval tooltip windows for GUIs. |
+| `gui_xim.c` | X Input Method integration. |
+| `gui_xmdlg.c` | Motif dialog helpers. |
+| `gui_xmebw.c` / `gui_xmebw.h` / `gui_xmebwp.h` | Motif enhanced button widget. |
+| `gui_dwrite.cpp` / `gui_dwrite.h` | DirectWrite text rendering (Windows). |
+
+### Platform Abstraction
+
+| File | Role |
+|---|---|
+| `os_unix.c` | Unix (and OS/2, Atari MiNT) system calls — signals, process control, terminal setup, file locking. |
+| `os_unix.h` | Unix system includes and defines. |
+| `os_win32.c` | Win32 system calls. |
+| `os_win32.h` | Win32 includes and defines. |
+| `os_mswin.c` | Shared MS-Windows functions (used by both console and GUI). |
+| `os_amiga.c` / `os_amiga.h` | Amiga OS support. |
+| `os_mac_conv.c` | macOS encoding conversion. |
+| `os_macosx.m` | Objective-C integration for macOS (pasteboard, services). |
+| `os_dos.h` | MS-DOS compatibility defines. |
+| `os_haiku.h` / `os_haiku.rdef.in` | Haiku resource definitions. |
+| `os_qnx.c` / `os_qnx.h` | QNX-specific code. |
+| `os_vms.c` / `os_vms_conf.h` | OpenVMS support. |
+| `os_w32dll.c` / `os_w32exe.c` | Win32 DLL/EXE entry points for `MNVDLL` builds. |
+| `pty.c` | Pseudo-terminal allocation (Unix). |
+| `iscygpty.c` / `iscygpty.h` | Cygwin PTY detection. |
+
+### Miscellaneous
+
+| File | Role |
+|---|---|
+| `misc1.c` | Miscellaneous utilities — beep, langmap, various helpers. |
+| `misc2.c` | More miscellaneous — string comparison, strncpy wrappers. |
+| `message.c` | User-facing messages — `msg()`, `emsg()`, `semsg()`, `iemsg()`. |
+| `getchar.c` | Input character queue — typeahead, recording, mappings. |
+| `mouse.c` | Mouse event handling (terminal mouse protocols). |
+| `map.c` | Key mapping (`:map`, `:noremap`, etc.). |
+| `menu.c` | Menu system (`:menu`, GUI menus). |
+| `autocmd.c` | Autocommand infrastructure — `:autocmd`, `BufEnter`, etc. |
+| `arglist.c` | Argument list management (`:args`, `:argadd`). |
+| `locale.c` | Locale and language handling. |
+| `mbyte.c` | Multi-byte encoding — UTF-8, locale conversion. |
+| `charset.c` | Character classification — `mnv_isdigit()`, `mnv_isalpha()`, keyword chars. |
+| `digraph.c` | Digraph input (`CTRL-K`). |
+| `hardcopy.c` | `:hardcopy` PostScript printing. |
+| `help.c` | Help system — `:help` tag lookup and display. |
+| `spell.c` / `spellfile.c` / `spellsuggest.c` | Spell checker. |
+| `diff.c` | Diff mode orchestration. |
+| `crypt.c` / `crypt_zip.c` / `blowfish.c` / `sha256.c` | Encryption and hashing. |
+| `if_cscope.c` | cscope interface. |
+| `if_xcmdsrv.c` | X11 client-server (remote commands). |
+| `clientserver.c` | Client-server architecture (`:remote`). |
+| `scriptfile.c` | `:source` command — script loading and execution. |
+| `session.c` | `:mksession` / `:mkview` — session persistence. |
+| `logfile.c` | Debug logging. |
+| `profiler.c` | Script and function profiling. |
+| `testing.c` | Test framework functions: `assert_equal()`, `test_*()` builtins. |
+| `debugger.c` | `:breakadd`, `:debug` — script debugger. |
+| `time.c` | Time-related functions — reltime, timers. |
+| `sound.c` | Sound playback via libcanberra (or macOS APIs). |
+| `mnvinfo.c` | `.mnvinfo` file — persistent history, marks, registers. |
+
+### Embedded Libraries
+
+| Directory | Role |
+|---|---|
+| `src/libvterm/` | Terminal emulation (VT100/xterm). Built as a static library `libvterm` linked into `mnv`. |
+| `src/xdiff/` | Diff algorithms — `xdiffi.c` (Myers), `xpatience.c` (patience), `xhistogram.c` (histogram), `xprepare.c`, `xemit.c`, `xutils.c`. Built as an `OBJECT` library. |
+| `src/xxd/` | Hex dump utility. Separate executable. Built via `add_subdirectory(src/xxd)`. |
+| `src/tee/` | Windows `tee` utility for test infrastructure. |
+
+### Generated Files
+
+| File | How generated |
+|---|---|
+| `auto/config.h` | `cmake/config.h.cmake` processed by `configure_file()` (CMake) or `configure.ac` → `config.h.in` (Autoconf). |
+| `auto/pathdef.c` | `cmake/pathdef.c.cmake` processed by `configure_file()`. Embeds install paths, compiler flags, compiled-by string. |
+| `auto/osdef.h` | Stub file on modern systems; the Autoconf build generates it from `osdef.sh` + `osdef1.h.in` / `osdef2.h.in`. |
+| `auto/wayland/*.c` / `auto/wayland/*.h` | Generated by `wayland-scanner` from protocol XML files. |
+| `ex_cmdidxs.h` | Command index table (generated by `create_cmdidxs.mnv`). |
+| `nv_cmdidxs.h` | Normal-mode command index (generated by `create_nvcmdidxs.c` / `create_nvcmdidxs.mnv`). |
+
+---
+
+## Key Data Structures
+
+### `buf_T` (buffer)
+
+Defined in `structs.h` as `typedef struct file_buffer buf_T`. A buffer
+represents an in-memory file. Buffers form a doubly-linked list
+(`b_next` / `b_prev`). Each buffer has a `memline_T` (`b_ml`) backed by a
+swap file, option values, undo tree, syntax state, and sign list.
+
+### `win_T` (window)
+
+Defined in `structs.h` as `typedef struct window_S win_T`. A window is a
+viewport onto a buffer. Windows are organised in `frame_T` trees for
+horizontal/vertical splits.
+
+### `pos_T` (position)
+
+```c
+typedef struct
+{
+ linenr_T lnum; // line number
+ colnr_T col; // column number
+ colnr_T coladd; // extra virtual column
+} pos_T;
+```
+
+### `typval_T` (typed value)
+
+The central value type for the expression evaluator. Discriminated union
+tagged by `v_type`. Can hold numbers, strings, floats, lists, dicts, blobs,
+tuples, partial functions, jobs, channels, classes, objects, and more.
+
+### `garray_T` (growing array)
+
+```c
+typedef struct growarray
+{
+ int ga_len; // current number of items used
+ int ga_maxlen; // maximum number of items possible
+ int ga_itemsize; // sizeof(item)
+ int ga_growsize; // number of items to grow each time
+ void *ga_data; // pointer to the first item
+} garray_T;
+```
+
+Used ubiquitously — from argument lists to Wayland seat lists.
+
+### `gui_T` (GUI state)
+
+Global struct holding all GUI state: widget handles, fonts, colours, scrollbars,
+tabline, geometry. Declared in `gui.c`:
+
+```c
+gui_T gui;
+```
+
+### `mparm_T` (main parameters)
+
+Struct collecting all startup parameters (argc, argv, feature flags, window
+counts, edit mode, etc.) and passed through the chain of initialisation
+functions in `main.c`:
+
+```c
+static mparm_T params;
+```
+
+### `vwl_connection_T` (Wayland connection)
+
+```c
+struct vwl_connection_S {
+ struct { struct wl_display *proxy; int fd; } display;
+ struct { struct wl_registry *proxy; } registry;
+ struct {
+ garray_T seats;
+ struct zwlr_data_control_manager_v1 *zwlr_data_control_manager_v1;
+ struct ext_data_control_manager_v1 *ext_data_control_manager_v1;
+ ...
+ } gobjects;
+};
+```
+
+---
+
+## Initialisation Sequence
+
+When `main()` runs, it follows this sequence (see `src/main.c`):
+
+1. **`mch_early_init()`** — low-level OS init (before any memory is usable).
+2. **`CLEAR_FIELD(params)`** — zero the `mparm_T` struct.
+3. **`autocmd_init()`** — set up autocommand tables.
+4. **Interpreter init** — `mnv_ruby_init()`, `mnv_tcl_init()` if compiled in.
+5. **`common_init_1()`** — first batch of shared init (allocator, hash tables,
+ options, global variables).
+6. **`--startuptime` / `--log` scan** — find these flags early for logging.
+7. **`--clean` scan** — detect clean mode before sourcing rcfiles.
+8. **`common_init_2(&params)`** — second batch (terminal setup, option defaults,
+ langmap, langmenu).
+9. **`command_line_scan(&params)`** — full argument parsing.
+10. **`check_tty(&params)`** — verify stdin/stdout are terminals.
+11. **`source_startup_scripts(&params)`** — load system and user `.mnvrc`.
+12. **`create_windows(&params)`** — set up initial window layout.
+13. **`exe_pre_commands(&params)`** — run `--cmd` arguments.
+14. **`edit_buffers(&params, cwd)`** — load file arguments.
+15. **`exe_commands(&params)`** — run `-c` arguments.
+16. **`main_start_gui()`** — launch GUI event loop if applicable.
+17. **Main loop** — the editor enters `normal_cmd()` dispatching in a loop.
+
+---
+
+## Feature Guard System
+
+Optional features are controlled by preprocessor guards of the form `FEAT_*`.
+The hierarchy is defined in `src/feature.h`:
+
+```c
+#ifdef FEAT_HUGE
+# define FEAT_NORMAL
+#endif
+#ifdef FEAT_NORMAL
+# define FEAT_TINY
+#endif
+```
+
+Individual features cascade from the tier:
+
+```c
+// +folding — requires +normal
+#ifdef FEAT_NORMAL
+# define FEAT_FOLDING
+#endif
+
+// +langmap — requires +huge
+#ifdef FEAT_HUGE
+# define FEAT_LANGMAP
+#endif
+```
+
+The CMake build sets the top-level feature in `auto/config.h` via:
+
+```cmake
+set(MNV_FEATURE "huge" CACHE STRING "Feature level: tiny, normal, huge")
+```
+
+And the `feature.h` cascade does the rest.
+
+---
+
+## Threading Model
+
+MNV is fundamentally **single-threaded**. The main loop processes one
+keystroke at a time. Asynchronous I/O is handled through `select()`/`poll()`
+multiplexing in the channel layer (`channel.c`). GUI event loops (GTK, Win32)
+integrate with MNV's own event processing.
+
+The only use of pthreads is optional and runtime-specific (e.g., timer signals
+on some platforms). The Wayland code uses the display file descriptor with
+`poll()` or `select()` for non-blocking dispatch.
+
+---
+
+## Test Architecture
+
+### Unit Tests
+
+Four dedicated unit-test executables are built by the CMake function
+`mnv_add_unit_test()`:
+
+| Test | Replaces | Tests |
+|---|---|---|
+| `json_test` | `json.c` | JSON encode/decode |
+| `kword_test` | `charset.c` | Keyword character classification |
+| `memfile_test` | `memfile.c` | Memory file operations |
+| `message_test` | `message.c` | Message formatting |
+
+Each test replaces `main.c` and one other source with a test variant that
+provides its own `main()`.
+
+### Script Tests
+
+The bulk of the test suite lives in `src/testdir/`. Tests are `.mnv` scripts
+run by the `mnv` binary itself. Categories:
+
+- **nongui** — all terminal-mode tests.
+- **indent** — indentation rule tests under `runtime/indent/`.
+- **syntax** — syntax highlighting tests under `runtime/syntax/`.
+
+Tests are invoked via `ctest` or `make test` and have generous timeouts
+(3600 s for script tests).
+
+---
+
+## Build System Duality
+
+MNV maintains **two** build systems:
+
+1. **CMake** (`CMakeLists.txt`) — the primary, modern build. Handles
+ dependency detection, feature configuration, and test registration. Uses
+ `CMakePresets.json` for named configurations.
+
+2. **Autoconf + Make** (`src/configure.ac`, `src/Makefile`, top-level
+ `Makefile`, plus platform-specific `Make_*.mak` files). The legacy build
+ that works on the widest range of systems.
+
+Both produce the same `mnv` binary; features and dependencies are detected
+independently by each system.
+
+---
+
+*This document describes MNV 10.0 as of build 287 (2026-04-03).*
diff --git a/docs/handbook/mnv/building.md b/docs/handbook/mnv/building.md
new file mode 100644
index 0000000000..ad54b97012
--- /dev/null
+++ b/docs/handbook/mnv/building.md
@@ -0,0 +1,636 @@
+# MNV — Building
+
+## Build System Overview
+
+MNV ships with **two** fully functional build systems:
+
+| System | Entry point | Status |
+|---|---|---|
+| CMake | `CMakeLists.txt` (project root) | **Primary** — recommended for new builds |
+| Autoconf + Make | `src/configure` / `src/Makefile` | Legacy — widest platform coverage |
+
+This document focuses on the CMake build but covers the Autoconf path as well.
+
+---
+
+## Prerequisites
+
+### Compiler
+
+MNV is written in C99. Any modern C compiler works:
+
+```cmake
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_C_STANDARD_REQUIRED OFF)
+```
+
+Tested compilers:
+- GCC 7+
+- Clang 7+
+- MSVC 2015+ (via `Make_mvc.mak` or CMake generators)
+- MinGW / MSYS2
+
+### Required tools
+
+| Tool | Minimum version |
+|---|---|
+| CMake | 3.15 |
+| make / ninja | any recent version |
+| pkg-config | recommended (for GTK, Wayland, libcanberra, libsodium) |
+
+### Required libraries
+
+Only a terminal library is strictly required:
+
+- **ncurses** (preferred) or **termcap** / **tinfo**
+
+Without it the build emits a warning:
+
+```
+No terminal library found (ncurses/curses/termcap). Build may fail.
+```
+
+### Optional libraries
+
+| Library | CMake option | Feature |
+|---|---|---|
+| GTK 3 | `MNV_GUI=gtk3` | Graphical editor |
+| GTK 2 | `MNV_GUI=gtk2` | Graphical editor (older) |
+| Motif | `MNV_GUI=motif` | Motif-based GUI |
+| X11 + Xt | auto-detected | X clipboard, client-server |
+| Wayland + wayland-scanner | auto-detected | Wayland clipboard |
+| libcanberra | `MNV_SOUND=ON` | Sound playback |
+| libsodium | `MNV_SODIUM=ON` | XChaCha20 encryption |
+| libacl | `MNV_ACL=ON` | POSIX ACL preservation |
+| libgpm | `MNV_GPM=ON` | GPM mouse in console |
+| Lua | `MNV_LUA=ON` | Lua scripting |
+| Python 3 | `MNV_PYTHON3=ON` | Python 3 scripting |
+| Ruby | `MNV_RUBY=ON` | Ruby scripting |
+| Perl | `MNV_PERL=ON` | Perl scripting |
+| Tcl | `MNV_TCL=ON` | Tcl scripting |
+| MzScheme / Racket | `MNV_MZSCHEME=ON` | Scheme scripting |
+
+### Ubuntu / Debian quick install
+
+```bash
+# Minimal build
+sudo apt install git make cmake gcc libncurses-dev
+
+# With X clipboard
+sudo apt install libxt-dev
+
+# With GTK 3 GUI
+sudo apt install libgtk-3-dev
+
+# With Wayland clipboard
+sudo apt install libwayland-dev wayland-protocols wayland-scanner
+
+# Optional interpreters
+sudo apt install liblua5.4-dev libpython3-dev ruby-dev libperl-dev tcl-dev
+
+# Optional features
+sudo apt install libcanberra-dev libsodium-dev libacl1-dev libgpm-dev
+```
+
+---
+
+## CMake Build — Quick Start
+
+```bash
+cd mnv
+mkdir build && cd build
+cmake ..
+cmake --build . -j$(nproc)
+ctest # run tests
+sudo cmake --install . # install to /usr/local
+```
+
+---
+
+## CMake Configuration Options
+
+### Feature Level
+
+```cmake
+set(MNV_FEATURE "huge" CACHE STRING "Feature level: tiny, normal, huge")
+```
+
+| Level | Description |
+|---|---|
+| `tiny` | Minimal — no `+eval`, no terminal, no channels |
+| `normal` | Default selection of features; includes `+eval` |
+| `huge` | Everything: terminal, channels, NetBeans, cscope, beval, … |
+
+On Unix/macOS/Windows the default is `huge`.
+
+The CMake build translates the feature level into `config.h` defines consumed
+by `feature.h`:
+
+```cmake
+if(MNV_FEATURE STREQUAL "huge")
+ set(FEAT_HUGE 1)
+ set(FEAT_EVAL 1)
+ set(FEAT_BEVAL 1)
+ set(FEAT_CSCOPE 1)
+ set(FEAT_NETBEANS_INTG 1)
+ set(FEAT_JOB_CHANNEL 1)
+ set(FEAT_TERMINAL 1)
+ set(FEAT_IPV6 1)
+ ...
+endif()
+```
+
+### GUI Selection
+
+```cmake
+set(MNV_GUI "auto" CACHE STRING "GUI toolkit: auto, gtk3, gtk2, motif, none")
+```
+
+- `auto` — tries GTK 3 → GTK 2 → Motif → none.
+- `gtk3` / `gtk2` / `motif` — explicitly request one toolkit.
+- `none` — terminal-only build.
+
+When a GUI is found, these variables are set and the corresponding source files
+are added:
+
+```cmake
+# GTK 3 example
+set(GUI_SRC gui.c gui_gtk.c gui_gtk_f.c gui_gtk_x11.c gui_beval.c)
+```
+
+### Boolean Options
+
+| Option | Default | Effect |
+|---|---|---|
+| `MNV_TERMINAL` | `ON` | Built-in terminal emulator (requires channels) |
+| `MNV_CHANNEL` | `ON` | Job/channel support |
+| `MNV_NETBEANS` | `ON` | NetBeans interface |
+| `MNV_CSCOPE` | `ON` | Cscope integration |
+| `MNV_SOUND` | `ON` | Sound via libcanberra |
+| `MNV_ACL` | `ON` | POSIX ACL support |
+| `MNV_GPM` | `ON` | GPM mouse |
+| `MNV_SODIUM` | `ON` | libsodium encryption |
+| `MNV_MULTIBYTE` | `ON` | Multi-byte character support |
+| `MNV_XIM` | `ON` | X Input Method |
+
+### Language Interpreters
+
+All interpreters default to `OFF` and can be loaded dynamically:
+
+```cmake
+option(MNV_LUA "Enable Lua interpreter" OFF)
+option(MNV_LUA_DYNAMIC "Load Lua dynamically" ON)
+option(MNV_PERL "Enable Perl interpreter" OFF)
+option(MNV_PERL_DYNAMIC "Load Perl dynamically" ON)
+option(MNV_PYTHON3 "Enable Python 3 interpreter" OFF)
+option(MNV_PYTHON3_DYNAMIC "Load Python 3 dynamically" ON)
+option(MNV_RUBY "Enable Ruby interpreter" OFF)
+option(MNV_RUBY_DYNAMIC "Load Ruby dynamically" ON)
+option(MNV_TCL "Enable Tcl interpreter" OFF)
+option(MNV_TCL_DYNAMIC "Load Tcl dynamically" ON)
+option(MNV_MZSCHEME "Enable MzScheme/Racket" OFF)
+```
+
+Dynamic loading means MNV `dlopen()`s the interpreter shared library at
+runtime instead of linking against it at build time. The library name is
+auto-detected:
+
+```cmake
+foreach(_lua_lib ${LUA_LIBRARIES})
+ if(_lua_lib MATCHES "\\.(so|dylib)")
+ get_filename_component(DYNAMIC_LUA_DLL "${_lua_realpath}" NAME)
+ break()
+ endif()
+endforeach()
+```
+
+### Development Options
+
+```cmake
+option(MNV_DEBUG "Enable debug build" OFF)
+option(MNV_PROFILE "Enable profiling" OFF)
+option(MNV_SANITIZE "Enable address/undefined sanitizers" OFF)
+option(MNV_LEAK_CHECK "Enable EXITFREE for leak checking" OFF)
+option(MNV_BUILD_TESTS "Build and enable tests" ON)
+```
+
+When `MNV_DEBUG` is on:
+
+```cmake
+add_compile_options(-g -DDEBUG)
+```
+
+When `MNV_SANITIZE` is on:
+
+```cmake
+add_compile_options(
+ -fsanitize=address
+ -fsanitize=undefined
+ -fsanitize-recover=all
+ -fno-omit-frame-pointer
+)
+add_link_options(
+ -fsanitize=address
+ -fsanitize=undefined
+)
+```
+
+When `MNV_LEAK_CHECK` is on, the `EXITFREE` define ensures all memory is freed
+at exit so leak checkers can report accurately.
+
+### Compiled-by string
+
+```cmake
+set(MNV_COMPILED_BY "" CACHE STRING "Name of the person compiling MNV")
+```
+
+Embedded into `auto/pathdef.c` and shown in `:version` output.
+
+---
+
+## CMake Presets
+
+`CMakePresets.json` defines ready-made configurations. Use them with:
+
+```bash
+cmake --preset <name>
+cmake --build --preset <name>
+```
+
+### Available presets
+
+| Preset | Feature | GUI | Interpreters | Debug |
+|---|---|---|---|---|
+| `default` | huge | auto | none | no |
+| `minimal` | tiny | none | none | no |
+| `normal` | normal | none | none | no |
+| `nogui` | huge | none | none | no |
+| `gtk3` | huge | gtk3 | none | no |
+| `all-interp` | huge | auto | all (dynamic) | yes |
+| `all-interp-static` | huge | auto | all (static) | yes |
+| `lua-only` | huge | none | Lua (dynamic) | yes |
+| `python3-only` | huge | none | Python3 (dynamic) | yes |
+| `ruby-only` | huge | none | Ruby (dynamic) | yes |
+| `perl-only` | huge | none | Perl (dynamic) | yes |
+| `tcl-only` | huge | none | Tcl (dynamic) | yes |
+| `sanitize` | huge | none | none | ASan+UBSan |
+| `profile` | huge | none | none | gprof |
+
+Presets inherit from hidden base presets:
+
+- **`base`** — sets `CMAKE_EXPORT_COMPILE_COMMANDS=ON`, `MNV_BUILD_TESTS=ON`,
+ output to `build/${presetName}`.
+- **`dev-base`** — inherits `base`, adds `MNV_DEBUG=ON`, `MNV_LEAK_CHECK=ON`.
+
+### Example: debug build with all interpreters
+
+```bash
+cmake --preset all-interp
+cmake --build build/all-interp -j$(nproc)
+cd build/all-interp && ctest
+```
+
+### Example: sanitiser build
+
+```bash
+cmake --preset sanitize
+cmake --build build/sanitize -j$(nproc)
+cd build/sanitize && ctest
+```
+
+---
+
+## System Detection
+
+The CMake build performs extensive detection of headers, functions, and types.
+
+### Header checks
+
+```cmake
+check_include_file(stdint.h HAVE_STDINT_H)
+check_include_file(unistd.h HAVE_UNISTD_H)
+check_include_file(termios.h HAVE_TERMIOS_H)
+check_include_file(sys/select.h HAVE_SYS_SELECT_H)
+check_include_file(dlfcn.h HAVE_DLFCN_H)
+check_include_file(pthread.h HAVE_PTHREAD_H)
+# ... 40+ more
+```
+
+### Function checks
+
+```cmake
+check_function_exists(fchdir HAVE_FCHDIR)
+check_function_exists(getcwd HAVE_GETCWD)
+check_function_exists(select HAVE_SELECT)
+check_function_exists(sigaction HAVE_SIGACTION)
+check_function_exists(dlopen HAVE_DLOPEN)
+# ... 50+ more
+```
+
+### Type-size checks
+
+```cmake
+check_type_size(int MNV_SIZEOF_INT)
+check_type_size(long MNV_SIZEOF_LONG)
+check_type_size(off_t SIZEOF_OFF_T)
+check_type_size(time_t SIZEOF_TIME_T)
+check_type_size(wchar_t SIZEOF_WCHAR_T)
+```
+
+MNV requires `sizeof(int) >= 4`:
+
+```c
+#if MNV_SIZEOF_INT < 4 && !defined(PROTO)
+# error MNV only works with 32 bit int or larger
+#endif
+```
+
+### Compile tests
+
+```cmake
+# __attribute__((unused))
+check_c_source_compiles("
+ int x __attribute__((unused));
+ int main() { return 0; }
+" HAVE_ATTRIBUTE_UNUSED)
+
+# struct stat.st_blksize
+check_c_source_compiles("
+ #include <sys/stat.h>
+ int main() { struct stat s; s.st_blksize = 0; return 0; }
+" HAVE_ST_BLKSIZE)
+
+# sockaddr_un
+check_c_source_compiles("
+ #include <sys/un.h>
+ int main() { struct sockaddr_un s; return 0; }
+" HAVE_SOCKADDR_UN)
+```
+
+---
+
+## Generated Files
+
+### `auto/config.h`
+
+Generated from `cmake/config.h.cmake` by `configure_file()`. Contains all
+`HAVE_*`, `FEAT_*`, `SIZEOF_*`, and `DYNAMIC_*` defines.
+
+### `auto/pathdef.c`
+
+Generated from `cmake/pathdef.c.cmake`. Embeds:
+
+- Compiler used (`MNV_ALL_CFLAGS`)
+- Linker used (`MNV_ALL_LFLAGS`)
+- Compiled-by info (`MNV_COMPILED_USER@MNV_COMPILED_SYS`)
+- Install paths for runtime files
+
+### `auto/osdef.h`
+
+On modern systems this is a stub:
+
+```c
+/* osdef.h - generated by CMake */
+/* On modern systems most definitions come from system headers */
+```
+
+The Autoconf build generates a real `osdef.h` from `osdef.sh`.
+
+### Wayland protocol files
+
+When Wayland is detected and `wayland-scanner` is available, the build
+generates `.c` and `.h` files for each supported protocol:
+
+```cmake
+set(WAYLAND_PROTOCOLS
+ ext-data-control-v1
+ primary-selection-unstable-v1
+ wlr-data-control-unstable-v1
+ xdg-shell
+)
+```
+
+Protocol XMLs must exist under `src/auto/wayland/protocols/`.
+
+---
+
+## Build Targets
+
+### `mnv` (main executable)
+
+All `MNV_CORE_SRC` files + `xdiff` object library → single binary.
+
+```cmake
+add_executable(mnv ${MNV_CORE_SRC} $<TARGET_OBJECTS:xdiff>)
+```
+
+### `xdiff` (object library)
+
+```cmake
+add_library(xdiff OBJECT
+ src/xdiff/xdiffi.c
+ src/xdiff/xemit.c
+ src/xdiff/xprepare.c
+ src/xdiff/xutils.c
+ src/xdiff/xhistogram.c
+ src/xdiff/xpatience.c
+)
+```
+
+### `vterm` (static library)
+
+Built when `FEAT_TERMINAL` is enabled:
+
+```cmake
+add_subdirectory(src/libvterm)
+```
+
+### `xxd` (executable)
+
+Hex dump utility:
+
+```cmake
+add_subdirectory(src/xxd)
+```
+
+### Unit tests
+
+Four test executables built by `mnv_add_unit_test()`:
+
+```cmake
+mnv_add_unit_test(json_test json.c json_test.c)
+mnv_add_unit_test(kword_test charset.c kword_test.c)
+mnv_add_unit_test(memfile_test memfile.c memfile_test.c)
+mnv_add_unit_test(message_test message.c message_test.c)
+```
+
+Each replaces `main.c` **and** the file under test with its `_test.c` variant.
+
+---
+
+## Testing
+
+### Running all tests
+
+```bash
+cd build && ctest
+```
+
+### Specific test categories
+
+```bash
+ctest -L scripts # Script tests (src/testdir/)
+ctest -L indent # Indent tests (runtime/indent/)
+ctest -L syntax # Syntax tests (runtime/syntax/)
+ctest -R json_test # Just the JSON unit test
+```
+
+### Test environment
+
+Tests run with explicit environment to avoid GUI interference:
+
+```cmake
+set_tests_properties(${TEST_NAME} PROPERTIES
+ TIMEOUT 120
+ ENVIRONMENT "DISPLAY=;WAYLAND_DISPLAY="
+)
+```
+
+Script tests use:
+
+```cmake
+MNVPROG=$<TARGET_FILE:mnv>
+MNVRUNTIME=${CMAKE_CURRENT_SOURCE_DIR}/runtime
+LINES=24
+COLUMNS=80
+```
+
+---
+
+## Installation
+
+```bash
+sudo cmake --install build
+# Default prefix: /usr/local
+```
+
+### What gets installed
+
+| Component | Destination |
+|---|---|
+| `mnv` binary | `${CMAKE_INSTALL_BINDIR}` (e.g. `/usr/local/bin`) |
+| Symlinks (ex, view, vi, vim, rmnv, rview, mnvdiff) | same |
+| GUI symlinks (gmnv, gview, gvi, gvim, …) | same (if GUI) |
+| Runtime files | `${CMAKE_INSTALL_DATADIR}/mnv/mnv100/` |
+| Man pages | `${CMAKE_INSTALL_MANDIR}/man1/` |
+| Desktop files | `${CMAKE_INSTALL_DATADIR}/applications/` (if GUI) |
+| Icons | `${CMAKE_INSTALL_DATADIR}/icons/hicolor/.../` (if GUI) |
+
+### Runtime subdirectories installed
+
+```cmake
+set(RUNTIME_SUBDIRS
+ colors compiler doc ftplugin import indent keymap
+ lang macros pack plugin print spell syntax tools
+ tutor autoload
+)
+```
+
+### Individual runtime scripts installed
+
+```cmake
+set(RUNTIME_SCRIPTS
+ defaults.mnv emnv.mnv filetype.mnv ftoff.mnv ftplugin.mnv
+ ftplugof.mnv indent.mnv indoff.mnv menu.mnv mswin.mnv
+ optwin.mnv bugreport.mnv scripts.mnv synmenu.mnv delmenu.mnv
+)
+```
+
+---
+
+## Legacy Autoconf Build
+
+```bash
+cd mnv/src
+./configure --with-features=huge --enable-gui=gtk3
+make -j$(nproc)
+make test
+sudo make install
+```
+
+Key configure flags:
+
+| Flag | Effect |
+|---|---|
+| `--with-features=tiny\|normal\|huge` | Feature level |
+| `--enable-gui=gtk3\|gtk2\|motif\|no` | GUI selection |
+| `--enable-luainterp=dynamic` | Lua support |
+| `--enable-python3interp=dynamic` | Python 3 support |
+| `--enable-rubyinterp=dynamic` | Ruby support |
+| `--enable-perlinterp=dynamic` | Perl support |
+| `--enable-tclinterp=dynamic` | Tcl support |
+
+Platform-specific Make files:
+
+| File | Platform |
+|---|---|
+| `Make_cyg.mak` | Cygwin |
+| `Make_cyg_ming.mak` | Cygwin + MinGW |
+| `Make_ming.mak` | MinGW |
+| `Make_mvc.mak` | MSVC |
+| `Make_ami.mak` | Amiga |
+| `Make_vms.mms` | OpenVMS (MMS/MMK) |
+
+---
+
+## Build Summary
+
+At the end of configuration, CMake prints a summary:
+
+```
+=== MNV 10.0 Build Configuration ===
+ Feature level: huge
+ GUI: auto (found: TRUE)
+ Terminal: 1
+ Channel/Job: 1
+ X11: 1
+ Wayland: 1
+ Terminal lib: /usr/lib/x86_64-linux-gnu/libncurses.so
+ Sound: canberra
+ Encryption: libsodium
+ Lua: 5.4.6
+ Python 3: 3.12.3
+ Install prefix: /usr/local
+ Compiled by: user@hostname
+=============================================
+```
+
+---
+
+## Cross-Compilation
+
+See `src/INSTALLx.txt` for cross-compile instructions. With CMake, use a
+toolchain file:
+
+```bash
+cmake .. -DCMAKE_TOOLCHAIN_FILE=/path/to/toolchain.cmake
+```
+
+The legacy build uses `toolchain-mingw32.cmake` (from `cmark/`) as a reference.
+
+---
+
+## Troubleshooting
+
+| Problem | Solution |
+|---|---|
+| "No terminal library found" | Install `libncurses-dev` (Debian/Ubuntu) or `ncurses-devel` (RHEL/Fedora) |
+| GUI not detected | Install `libgtk-3-dev` and ensure `pkg-config` is available |
+| `configure did not run properly` | Autoconf build: check `auto/config.log` |
+| Perl `xsubpp` not found | Install `perl-ExtUtils-MakeMaker` or the full Perl dev package |
+| Wayland protocols missing | Ensure protocol XML files exist in `src/auto/wayland/protocols/` |
+| `MNV only works with 32 bit int or larger` | Your platform has 16-bit int — unsupported |
+
+---
+
+*This document describes MNV 10.0 as of build 287 (2026-04-03).*
diff --git a/docs/handbook/mnv/code-style.md b/docs/handbook/mnv/code-style.md
new file mode 100644
index 0000000000..f80412f09b
--- /dev/null
+++ b/docs/handbook/mnv/code-style.md
@@ -0,0 +1,408 @@
+# MNV — Code Style
+
+## Overview
+
+MNV follows a distinctive coding style inherited from the Vi/Vim tradition.
+The style is partially documented in `.clang-format` and `.editorconfig` at
+the project root, and largely defined by example in the existing codebase.
+
+---
+
+## Indentation and Whitespace
+
+### `.editorconfig` rules
+
+```ini
+[*]
+indent_style = tab
+tab_width = 8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.{c,h,proto}]
+indent_size = 4
+
+[*.{md,yml,sh,bat}]
+indent_style = space
+indent_size = 2
+
+[*.mnv]
+indent_style = space
+indent_size = 2
+```
+
+Key takeaways:
+
+- **C/H files use hard tabs** with `tabstop=8`, `shiftwidth=4`.
+- **Script files (`.mnv`) use spaces** with indent 2.
+- **Markdown, YAML, shell scripts use spaces** with indent 2.
+
+This matches the modeline at the top of every source file:
+
+```c
+/* vi:set ts=8 sts=4 sw=4 noet: */
+```
+
+Meaning:
+- `ts=8` — tab stop at 8 columns.
+- `sts=4` — soft tab stop, making tabs appear 4 columns wide for editing.
+- `sw=4` — shift width is 4.
+- `noet` — do NOT expand tabs to spaces.
+
+### `.clang-format` configuration
+
+The `.clang-format` file specifies detailed formatting rules:
+
+```yaml
+Language: Cpp
+IndentWidth: 8
+TabWidth: 8
+UseTab: Always
+ColumnLimit: 0
+BreakBeforeBraces: Allman
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: DontAlign
+AlignOperands: Align
+```
+
+Note: `clang-format` is available as a reference but is **not universally
+applied** — the codebase still relies heavily on manual formatting.
+
+---
+
+## File Headers
+
+Every `.c` and `.h` file begins with:
+
+```c
+/* vi:set ts=8 sts=4 sw=4 noet:
+ *
+ * MNV - MNV is not Vim by Bram Moolenaar
+ *
+ * Do ":help uganda" in MNV to read copying and usage conditions.
+ * Do ":help credits" in MNV to see a list of people who contributed.
+ * See README.txt for an overview of the MNV source code.
+ */
+```
+
+The first line is a **modeline** that configures the editor for the correct
+whitespace settings. Every source file must have this.
+
+---
+
+## Function Definition Style
+
+MNV uses a distinctive **K&R-like but separated** function definition style.
+The return type and qualifiers go on the line **above** the function name,
+which starts at column 4 (one tab indent):
+
+```c
+ static void
+gui_check_pos(void)
+{
+ ...
+}
+```
+
+```c
+ int
+vwl_connection_flush(vwl_connection_T *self)
+{
+ ...
+}
+```
+
+Public functions similarly:
+
+```c
+ void
+gui_start(char_u *arg UNUSED)
+{
+ ...
+}
+```
+
+Rules:
+- Return type and storage class on its own line, indented by one tab (4 visual
+ columns).
+- Function name starts at column 0.
+- Opening brace on its own line at column 0 (Allman style).
+- Parameter list may wrap with alignment.
+
+### Static function declarations
+
+Forward declarations follow the same pattern:
+
+```c
+static void gui_check_pos(void);
+static void gui_reset_scroll_region(void);
+static int gui_screenchar(int off, int flags, guicolor_T fg, guicolor_T bg, int back);
+```
+
+---
+
+## Variable Declarations
+
+Local variables are declared at the top of a function, before any statements.
+This is required for C89 compatibility (though MNV uses C99):
+
+```c
+ void
+gui_start(char_u *arg UNUSED)
+{
+ char_u *old_term;
+#ifdef GUI_MAY_FORK
+ static int recursive = 0;
+#endif
+
+ old_term = mnv_strsave(T_NAME);
+ ...
+}
+```
+
+Alignment of variable names to the same column (using tabs) is common:
+
+```c
+ int ret;
+ char_u *buf;
+ long lnum;
+ pos_T pos;
+```
+
+---
+
+## Comments
+
+### Block comments
+
+Use `/* ... */` style. Never `//` for multi-line comments:
+
+```c
+/*
+ * This is a block comment explaining the next function.
+ * Multiple lines follow the same pattern.
+ */
+```
+
+### Inline comments
+
+Single-line comments use `//`:
+
+```c
+static int has_dash_c_arg = FALSE; // whether -c was given
+```
+
+This is a newer convention; older code uses `/* */` even for inline comments.
+
+### Section headers
+
+Major sections within a file are marked with comment banners:
+
+```c
+/*
+ * Different types of error messages.
+ */
+```
+
+In the CMake file, section headers use hash-line banners:
+
+```cmake
+###############################################################################
+# Build options
+###############################################################################
+```
+
+---
+
+## Naming Conventions
+
+### Functions
+
+- Core functions: `lowercase_with_underscores` — e.g., `gui_start()`,
+ `command_line_scan()`, `enter_buffer()`.
+- Machine-specific functions: `mch_` prefix — e.g., `mch_early_init()`,
+ `mch_exit()`.
+- GUI backend functions: `gui_mch_` prefix — e.g., `gui_mch_flush()`.
+- Wayland functions: `vwl_` for abstractions, `wayland_` for global
+ connection — e.g., `vwl_connection_flush()`, `vwl_connection_dispatch()`.
+- MNV9 functions: `mnv9_` prefix — e.g., `in_mnv9script()`.
+- Test functions: named after what they test, e.g., `json_test`, `kword_test`.
+
+### Types
+
+- Struct typedefs end with `_T`: `buf_T`, `win_T`, `pos_T`, `typval_T`,
+ `garray_T`, `gui_T`, `mparm_T`, `cellattr_T`, `vwl_connection_T`.
+- Internal struct tags use `_S` suffix: `struct vwl_seat_S`,
+ `struct vwl_connection_S`, `struct terminal_S`.
+- Enum values: `UPPERCASE_SNAKE_CASE` — e.g., `VWL_DATA_PROTOCOL_NONE`,
+ `EDIT_FILE`, `ME_UNKNOWN_OPTION`.
+
+### Macros
+
+- All uppercase: `FEAT_GUI`, `HAVE_CONFIG_H`, `UNUSED`, `EXTERN`, `INIT()`.
+- Feature guards: `FEAT_` prefix (`FEAT_TERMINAL`, `FEAT_EVAL`,
+ `FEAT_WAYLAND`).
+- Detection results: `HAVE_` prefix (`HAVE_STDINT_H`, `HAVE_SELECT`,
+ `HAVE_DLOPEN`).
+- Dynamic library names: `DYNAMIC_` prefix (`DYNAMIC_LUA_DLL`).
+
+### Global variables
+
+Declared in `globals.h` with the `EXTERN` macro:
+
+```c
+EXTERN long Rows;
+EXTERN long Columns INIT(= 80);
+EXTERN schar_T *ScreenLines INIT(= NULL);
+```
+
+The `INIT(x)` macro expands to `= x` in `main.c` and to nothing elsewhere.
+
+---
+
+## Preprocessor Conventions
+
+### Feature guards
+
+MNV uses `#ifdef FEAT_*` extensively. Feature code is wrapped tightly:
+
+```c
+#ifdef FEAT_FOLDING
+EXTERN foldinfo_T win_foldinfo;
+#endif
+```
+
+Functions that only exist with certain features use `#ifdef` blocks:
+
+```c
+#if defined(FEAT_GUI_TABLINE)
+static int gui_has_tabline(void);
+#endif
+```
+
+### Platform guards
+
+```c
+#ifdef MSWIN
+ // Windows-specific code
+#endif
+
+#ifdef UNIX
+ // Unix-specific code
+#endif
+
+#if defined(MACOS_X)
+ // macOS
+#endif
+```
+
+### N_() for translatable strings
+
+User-visible strings are wrapped in `N_()` for gettext extraction:
+
+```c
+N_("Unknown option argument"),
+N_("Too many edit arguments"),
+```
+
+---
+
+## Typedef Conventions
+
+Commonly used types:
+
+```c
+typedef unsigned char char_u; // unsigned character
+typedef signed char int8_T; // signed 8-bit
+typedef double float_T; // floating point
+typedef long linenr_T; // line number
+typedef int colnr_T; // column number
+typedef unsigned short short_u; // unsigned short
+```
+
+The custom `char_u` type is used instead of `char` throughout the codebase to
+avoid signed-char bugs.
+
+---
+
+## Error Handling Patterns
+
+### Error message functions
+
+| Function | Use |
+|---|---|
+| `emsg()` | Display error message |
+| `semsg()` | Formatted error (like `sprintf` + `emsg`) |
+| `iemsg()` | Internal error (bug in MNV) |
+| `msg()` | Informational message |
+| `smsg()` | Formatted informational message |
+
+### Return conventions
+
+- Many functions return `OK` / `FAIL` (defined as `1` / `0`).
+- Pointer-returning functions return `NULL` on failure.
+- Boolean functions return `TRUE` / `FALSE`.
+
+---
+
+## MNV9 Script Style
+
+For `.mnv` files (MNV9 syntax):
+
+- Indent with 2 spaces (per `.editorconfig`).
+- Use `def`/`enddef` instead of `function`/`endfunction`.
+- Type annotations: `var name: type = value`.
+- Use `#` for comments (not `"`).
+
+---
+
+## Guard Macros in Headers
+
+Headers use traditional include guards:
+
+```c
+#ifndef MNV__H
+#define MNV__H
+...
+#endif
+```
+
+```c
+#ifndef _OPTION_H_
+#define _OPTION_H_
+...
+#endif
+```
+
+---
+
+## Test Code Style
+
+Test files (`*_test.c`) follow the same style as production code. Tests in
+`src/testdir/` are MNVscript files following the `.mnv` indent style (2 spaces).
+
+The CI enforces code style via `test_codestyle.mnv` — contributions must pass
+this check.
+
+---
+
+## Summary of Key Rules
+
+| Aspect | Rule |
+|---|---|
+| Tabs vs spaces in C | Hard tabs, `tabstop=8`, `shiftwidth=4` |
+| Tabs vs spaces in .mnv | Spaces, indent 2 |
+| Function definition | Return type on separate line, indented one tab |
+| Braces | Allman style (opening brace on new line) |
+| Variable declarations | Top of function, before statements |
+| Naming | `lowercase_underscores` for functions, `_T` suffix for types |
+| Feature guards | `#ifdef FEAT_*` |
+| Translatable strings | `N_("...")` |
+| Modeline | Required in every `.c` / `.h` file |
+| Prototype generation | `src/proto/*.pro` files |
+
+---
+
+*This document describes MNV 10.0 as of build 287 (2026-04-03).*
diff --git a/docs/handbook/mnv/contributing.md b/docs/handbook/mnv/contributing.md
new file mode 100644
index 0000000000..6815fef2e2
--- /dev/null
+++ b/docs/handbook/mnv/contributing.md
@@ -0,0 +1,293 @@
+# MNV — Contributing
+
+## How to Contribute
+
+MNV welcomes patches in any form. The project's `CONTRIBUTING.md` states:
+
+> Patches are welcome in whatever form. Discussions about patches happen on
+> the mnv-dev mailing list.
+
+### Preferred Channels
+
+1. **GitHub Pull Requests** — PRs on the `Project-Tick/Project-Tick` repository.
+ These trigger CI automatically and are forwarded to the mailing list.
+2. **mnv-dev mailing list** — Send a unified diff as an attachment. Initial
+ posts are moderated.
+
+---
+
+## Setting Up a Development Environment
+
+### Clone the repository
+
+```bash
+git clone https://github.com/Project-Tick/Project-Tick.git
+cd Project-Tick/mnv
+```
+
+### Build with debug flags
+
+Use the `all-interp` or `sanitize` CMake preset for development:
+
+```bash
+cmake --preset sanitize
+cmake --build build/sanitize -j$(nproc)
+```
+
+Or manually:
+
+```bash
+mkdir build && cd build
+cmake .. -DMNV_DEBUG=ON -DMNV_LEAK_CHECK=ON -DMNV_BUILD_TESTS=ON
+cmake --build . -j$(nproc)
+```
+
+### Run the test suite
+
+```bash
+cd build
+ctest
+```
+
+Or run specific test categories:
+
+```bash
+ctest -R json_test # Unit test
+ctest -L scripts # Script tests (src/testdir/)
+ctest -L indent # Indent tests
+ctest -L syntax # Syntax tests
+```
+
+### Legacy Autoconf build for testing
+
+```bash
+cd src
+make
+make test
+```
+
+---
+
+## Contribution Guidelines
+
+### Always add tests
+
+From `CONTRIBUTING.md`:
+
+> Please always add a test, if possible. All new functionality should be tested
+> and bug fixes should be tested for regressions: the test should fail before
+> the fix and pass after the fix.
+
+Tests live in `src/testdir/`. Look at recent patches for examples. Use
+`:help testing` inside MNV for the testing framework documentation.
+
+### Code style
+
+Follow the existing code style (see the `code-style.md` handbook page):
+
+- Hard tabs in C files, `ts=8 sts=4 sw=4 noet`.
+- Function return type on a separate line.
+- Allman braces.
+- Feature guards with `#ifdef FEAT_*`.
+
+The CI runs `test_codestyle.mnv` which checks for style violations.
+**Contributions must pass this check.**
+
+### Commit messages
+
+- Write clear, descriptive commit messages.
+- Reference issue numbers where applicable.
+- One logical change per commit.
+
+### Signed-off-by
+
+While not strictly required, it is recommended to sign off commits using the
+Developer Certificate of Origin (DCO):
+
+```bash
+git commit -s
+```
+
+This adds a `Signed-off-by:` trailer confirming you have the right to submit
+the change under the project's license.
+
+The maintainer (`@chrisbra`) usually adds missing `Signed-off-by` trailers
+when merging.
+
+### AI-generated code
+
+From `CONTRIBUTING.md`:
+
+> When using AI for contributions, please disclose this. Any AI-generated code
+> must follow the MNV code style. In particular, test_codestyle.mnv must not
+> report any failures.
+
+Additional rules:
+- Ensure changes are properly tested.
+- Do not submit a single PR addressing multiple unrelated issues.
+
+---
+
+## License
+
+Contributions are distributed under the **MNV license** (see `COPYING.md`
+and `LICENSE`). By submitting a change you agree to this.
+
+> Providing a change to be included implies that you agree with this and your
+> contribution does not cause us trouble with trademarks or patents. There is
+> no CLA to sign.
+
+---
+
+## Reporting Issues
+
+### GitHub Issues
+
+Use [GitHub Issues](https://github.com/Project-Tick/Project-Tick/issues/new/choose)
+for actual bugs.
+
+### Before reporting
+
+1. Reproduce with a clean configuration:
+
+ ```bash
+ mnv --clean
+ ```
+
+2. Describe exact reproduction steps. Don't say "insert some text" — instead:
+ `ahere is some text<Esc>`.
+
+3. Check the todo file: `:help todo`.
+
+### Appropriate places for discussion
+
+- Not sure if it's a bug? Use the **mnv-dev mailing list** or
+ [reddit.com/r/mnv](https://reddit.com/r/mnv) or
+ [StackExchange](https://vi.stackexchange.com/).
+- Feature requests and design discussions belong on the mailing list or GitHub
+ issues.
+
+---
+
+## Runtime Files (Syntax, Indent, Ftplugins)
+
+If you find a problem with a syntax, indent, or ftplugin file:
+
+1. Check the file header for the **maintainer's** name, email, or GitHub handle.
+2. Also check the `MAINTAINERS` file.
+3. Contact the maintainer directly.
+4. The maintainer sends updates to the MNV project for distribution.
+
+If the maintainer does not respond, use the mailing list or GitHub issues.
+
+### MNV9 in runtime files
+
+Whether to use MNV9 script is up to the maintainer. For files maintained in
+the main repository, preserve compatibility with Neovim if possible. Wrap
+MNV9-specific code in a guard.
+
+---
+
+## CI and Automated Checks
+
+Every pull request triggers:
+
+| CI System | Platform | What it checks |
+|---|---|---|
+| GitHub Actions | Linux, macOS | Build, tests, coverage |
+| Appveyor | Windows | MSVC build, tests |
+| Cirrus CI | FreeBSD | Build, tests |
+| Codecov | — | Coverage reporting (noisy, can be ignored) |
+| Coverity Scan | — | Static analysis |
+
+### CI configuration
+
+CI helper scripts live in the `ci/` directory:
+
+- `ci/config.mk.sed` — configure options for CI builds.
+- `ci/config.mk.clang.sed` / `ci/config.mk.gcc.sed` — compiler-specific
+ variants.
+- `ci/setup-xvfb.sh` — sets up Xvfb for GUI tests.
+- `ci/remove_snap.sh` — removes snap packages that interfere with CI.
+- `ci/pinned-pkgs` — pinned package versions for reproducibility.
+
+---
+
+## Development Workflow
+
+### 1. Fork and branch
+
+```bash
+git checkout -b fix-my-issue
+```
+
+### 2. Make changes and test
+
+```bash
+cd build
+cmake --build . -j$(nproc)
+ctest -R relevant_test
+```
+
+### 3. Check code style
+
+Build and run the code-style test:
+
+```bash
+cd src/testdir
+make test_codestyle.mnv
+```
+
+### 4. Commit with sign-off
+
+```bash
+git commit -s -m "Fix: description of the fix
+
+Detailed explanation if needed.
+Closes #NNNN"
+```
+
+### 5. Push and create PR
+
+```bash
+git push origin fix-my-issue
+```
+
+Open a Pull Request on GitHub.
+
+---
+
+## Architecture for New Contributors
+
+New contributors should familiarize themselves with:
+
+| File | Purpose |
+|---|---|
+| `src/main.c` | Entry point — read the startup sequence |
+| `src/mnv.h` | Master header — see the include hierarchy |
+| `src/feature.h` | Feature tiers and optional feature defines |
+| `src/structs.h` | All major data structures |
+| `src/globals.h` | Global variables |
+| `src/ex_docmd.c` | Ex command dispatcher |
+| `src/normal.c` | Normal-mode command dispatcher |
+
+See the `architecture.md` handbook page for a complete subsystem map.
+
+The `README.md` in `src/` provides additional guidance on the source tree
+layout.
+
+---
+
+## Communication
+
+| Channel | Purpose |
+|---|---|
+| GitHub Issues | Bug reports, feature requests |
+| mnv-dev mailing list | Design discussions, patch review |
+| `#mnv` on Libera.Chat | Real-time chat |
+| reddit.com/r/mnv | Community discussion |
+| StackExchange (vi.stackexchange.com) | Q&A |
+
+---
+
+*This document describes MNV 10.0 as of build 287 (2026-04-03).*
diff --git a/docs/handbook/mnv/gui-extension.md b/docs/handbook/mnv/gui-extension.md
new file mode 100644
index 0000000000..c815503f80
--- /dev/null
+++ b/docs/handbook/mnv/gui-extension.md
@@ -0,0 +1,410 @@
+# MNV — GUI Extension
+
+## Overview
+
+MNV supports a **graphical user interface (GUI)** through a toolkit-agnostic
+abstraction layer. The GUI is optional — MNV works perfectly as a terminal
+application — but when enabled it adds menus, toolbars, scrollbars, tearoff
+menus, tablines, font selection, direct mouse integration, drag-and-drop, and
+balloon-eval tooltips.
+
+The GUI is activated by:
+- Invoking the binary as `gmnv` (via symlink).
+- Running `:gui` from within terminal MNV.
+- Compiling with `FEAT_GUI` and having no `--nofork` / `-f` flag.
+
+The compile-time guard for all GUI code is:
+
+```c
+#if defined(FEAT_GUI_MOTIF) \
+ || defined(FEAT_GUI_GTK) \
+ || defined(FEAT_GUI_HAIKU) \
+ || defined(FEAT_GUI_MSWIN) \
+ || defined(FEAT_GUI_PHOTON)
+# if !defined(FEAT_GUI) && !defined(NO_X11_INCLUDES)
+# define FEAT_GUI
+# endif
+#endif
+```
+
+---
+
+## Architecture
+
+### Abstraction Layer: `gui.c` / `gui.h`
+
+The file `src/gui.c` is the **core GUI dispatcher**. It owns the global
+`gui_T gui` struct and provides toolkit-independent functions that delegate to
+`gui_mch_*()` ("machine-specific") callbacks implemented in each backend.
+
+```c
+// src/gui.c
+gui_T gui;
+```
+
+Key functions in `gui.c`:
+
+| Function | Purpose |
+|---|---|
+| `gui_start()` | Entry point — init toolkit, fork if needed, start event loop. |
+| `gui_attempt_start()` | Try to initialise the GUI; fall back to terminal on failure. |
+| `gui_do_fork()` | Fork the process so `gmnv file` detaches from the shell. |
+| `gui_read_child_pipe()` | IPC between parent and GUI child after fork. |
+| `gui_check_pos()` | Clamp cursor within drawable area. |
+| `gui_reset_scroll_region()` | Reset the scrollable region to full screen. |
+| `gui_outstr()` | Output a string to the GUI display. |
+| `gui_screenchar()` | Draw a single character at a screen position. |
+| `gui_outstr_nowrap()` | Draw a string without line wrapping. |
+| `gui_delete_lines()` / `gui_insert_lines()` | Scroll line ranges. |
+| `gui_xy2colrow()` | Convert pixel coordinates to character row/column. |
+| `gui_do_scrollbar()` | Enable/disable a scrollbar for a window. |
+| `gui_update_horiz_scrollbar()` | Refresh horizontal scrollbar state. |
+| `gui_set_fg_color()` / `gui_set_bg_color()` | Set foreground/background. |
+| `init_gui_options()` | Initialise GUI-related option defaults. |
+| `xy2win()` | Find which window a pixel coordinate falls in. |
+
+### The `gui_T` Data Structure
+
+Declared in `src/gui.h`, `gui_T` holds all mutable GUI state. Its fields
+include (representative, not exhaustive):
+
+- Widget/window handles (toolkit-specific, cast to `void *` or typed per
+ backend).
+- `gui.in_use` — boolean, TRUE when GUI is active.
+- `gui.starting` — TRUE during initialisation.
+- `gui.dofork` — whether to fork on startup.
+- `gui.char_width` / `gui.char_height` / `gui.char_ascent` — font metrics.
+- `gui.border_offset` — pixel offset for the text area border.
+- `gui.num_rows` / `gui.num_cols` — grid dimensions.
+- Scrollbar state arrays.
+- Colour values for foreground, background, scrollbar, menu.
+- Tabline widget handles (for `FEAT_GUI_TABLINE`).
+
+### Coordinate Macros
+
+`gui.h` defines macros for converting between character cells and pixel
+coordinates:
+
+```c
+// Non-MSWIN (X11/GTK/Motif/Haiku/Photon):
+#define TEXT_X(col) ((col) * gui.char_width + gui.border_offset)
+#define TEXT_Y(row) ((row) * gui.char_height + gui.char_ascent + gui.border_offset)
+#define FILL_X(col) ((col) * gui.char_width + gui.border_offset)
+#define FILL_Y(row) ((row) * gui.char_height + gui.border_offset)
+#define X_2_COL(x) (((x) - gui.border_offset) / gui.char_width)
+#define Y_2_ROW(y) (((y) - gui.border_offset) / gui.char_height)
+
+// MSWIN:
+#define TEXT_X(col) ((col) * gui.char_width)
+#define TEXT_Y(row) ((row) * gui.char_height + gui.char_ascent)
+// etc.
+```
+
+### Scrollbar Constants
+
+```c
+#define SBAR_NONE (-1)
+#define SBAR_LEFT 0
+#define SBAR_RIGHT 1
+#define SBAR_BOTTOM 2
+```
+
+---
+
+## GUI Backends
+
+### GTK 2 / GTK 3 (`gui_gtk.c`, `gui_gtk_f.c`, `gui_gtk_x11.c`)
+
+The GTK backend is the most actively maintained Linux GUI. It consists of
+three files:
+
+**`gui_gtk.c`** — High-level GTK widget management:
+
+- Toolbar creation and tearoff support.
+- Find/Replace dialog (`find_replace_cb()`).
+- Dialog entry callbacks: `entry_activate_cb()`, `entry_changed_cb()`.
+
+It includes GTK headers conditionally:
+
+```c
+#ifdef FEAT_GUI_GTK
+# if GTK_CHECK_VERSION(3,0,0)
+# include <gdk/gdkkeysyms-compat.h>
+# else
+# include <gdk/gdkkeysyms.h>
+# endif
+# include <gdk/gdk.h>
+# include <gtk/gtk.h>
+#endif
+```
+
+**`gui_gtk_f.c` / `gui_gtk_f.h`** — A custom GTK container widget (the "form
+widget") that manages the drawing area, scrollbars, and toolbar layout. This
+replaces GTK's standard layout containers with one optimised for MNV's needs.
+
+**`gui_gtk_x11.c`** — Low-level integration with X11 under GTK:
+
+- Display connection and window management.
+- Keyboard input translation (GDK key events → MNV key codes).
+- X selection handling (clipboard).
+- Drag-and-drop (`FEAT_DND`).
+- Input method support via `gui_xim.c`.
+
+**CMake source list for GTK:**
+
+```cmake
+set(GUI_SRC
+ gui.c
+ gui_gtk.c
+ gui_gtk_f.c
+ gui_gtk_x11.c
+ gui_beval.c
+)
+```
+
+**GTK 3 vs GTK 2 detection:**
+
+```cmake
+if(MNV_GUI STREQUAL "auto" OR MNV_GUI STREQUAL "gtk3")
+ pkg_check_modules(GTK3 QUIET gtk+-3.0)
+ if(GTK3_FOUND)
+ set(USE_GTK3 1)
+ set(FEAT_GUI_GTK 1)
+ ...
+ endif()
+endif()
+```
+
+GTK 3 support was added by Kazunobu Kuriyama (2016) and is now the default.
+
+### Motif (`gui_motif.c`, `gui_x11.c`, `gui_xmdlg.c`, `gui_xmebw.c`)
+
+The Motif backend uses the Xt/Motif widget set:
+
+```cmake
+set(GUI_SRC
+ gui.c
+ gui_motif.c
+ gui_x11.c
+ gui_beval.c
+ gui_xmdlg.c
+ gui_xmebw.c
+)
+```
+
+- `gui_motif.c` — Motif menus, toolbar, scrollbars.
+- `gui_x11.c` — Raw X11 drawing, event loop, selection.
+- `gui_xmdlg.c` — Motif dialogs (file selection, font picker).
+- `gui_xmebw.c` / `gui_xmebw.h` / `gui_xmebwp.h` — "Enhanced Button
+ Widget" — a custom Motif widget for toolbar buttons with icons.
+
+### Win32 (`gui_w32.c`)
+
+The native Windows GUI uses the Win32 API directly (no toolkit):
+
+- `gui_w32.c` — window creation, message loop, menus, scrollbars, Direct2D
+ text rendering.
+- `gui_dwrite.cpp` / `gui_dwrite.h` — DirectWrite rendering for high-quality
+ font display on Windows (controlled by `FEAT_DIRECTX` / `FEAT_RENDER_OPTIONS`).
+- `gui_w32_rc.h` — resource header for the Windows resource file (`mnv.rc`).
+
+### Haiku (`gui_haiku.cc`, `gui_haiku.h`)
+
+BeOS/Haiku GUI backend using the native BApplication/BWindow/BView API:
+
+```c
+#ifdef FEAT_GUI_HAIKU
+# include "gui_haiku.h"
+#endif
+```
+
+Supports drag-and-drop (`HAVE_DROP_FILE`).
+
+### Photon (`gui_photon.c`)
+
+QNX Photon microGUI backend. Legacy, for QNX RTOS systems:
+
+```c
+#ifdef FEAT_GUI_PHOTON
+# include <Ph.h>
+# include <Pt.h>
+# include "photon/PxProto.h"
+#endif
+```
+
+---
+
+## GUI Features
+
+### On-the-Fly Scrolling
+
+GTK and Win32 support immediate scroll redraw rather than deferring to the
+main loop:
+
+```c
+#if defined(FEAT_GUI_MSWIN) || defined(FEAT_GUI_GTK)
+# define USE_ON_FLY_SCROLL
+#endif
+```
+
+### Drag and Drop
+
+File dropping is enabled for GTK (with `FEAT_DND`), Win32, and Haiku:
+
+```c
+#if (defined(FEAT_DND) && defined(FEAT_GUI_GTK)) \
+ || defined(FEAT_GUI_MSWIN) \
+ || defined(FEAT_GUI_HAIKU)
+# define HAVE_DROP_FILE
+#endif
+```
+
+### Balloon Evaluation (Tooltips)
+
+`gui_beval.c` implements balloon-eval — hover tooltips used for debugger
+variable inspection, function signatures, and similar features. Controlled
+by `FEAT_BEVAL` / `FEAT_BEVAL_TIP`.
+
+### Tab Page Line
+
+When `FEAT_GUI_TABLINE` is defined, the GUI displays a tab bar at the top of
+the window for switching between tab pages.
+
+```c
+#if defined(FEAT_GUI_TABLINE)
+static int gui_has_tabline(void);
+#endif
+```
+
+### X Input Method (XIM)
+
+`gui_xim.c` integrates X Input Methods for composing complex characters
+(CJK, etc.) on X11. Controlled by `FEAT_XIM`:
+
+```c
+#ifdef FEAT_XIM
+# ifdef FEAT_GUI_GTK
+ // GTK handles XIM through GtkIMContext
+# else
+ // Direct XIM protocol for Motif/X11
+# endif
+#endif
+```
+
+### GUI Forking
+
+On Unix when `gmnv` starts, it forks so the parent shell returns to the
+prompt while the child continues as the editor. This happens in
+`gui_do_fork()`:
+
+```c
+#ifdef GUI_MAY_FORK
+static void gui_do_fork(void);
+
+static int gui_read_child_pipe(int fd);
+
+enum {
+ GUI_CHILD_IO_ERROR,
+ GUI_CHILD_OK,
+ GUI_CHILD_FAILED
+};
+#endif
+```
+
+The fork is skipped when:
+- `-f` flag is given.
+- `'f'` is in `'guioptions'` (`p_go`).
+- A background job is running (`job_any_running()`).
+
+### Menus
+
+Menu definitions are loaded from `runtime/menu.mnv` and `runtime/synmenu.mnv`.
+The `:menu` command adds items to the menu bar. GUI backends render menus
+using platform-native widgets.
+
+### Fonts
+
+GUI font handling is integrated with the `'guifont'` and `'guifontwide'`
+options. Character metrics stored in `gui.char_width`, `gui.char_height`, and
+`gui.char_ascent` are critical for all coordinate conversions.
+
+---
+
+## CMake GUI Detection
+
+The CMake build tries toolkits in order:
+
+```
+1. GTK 3 (pkg-config: gtk+-3.0)
+2. GTK 2 (pkg-config: gtk+-2.0)
+3. Motif (FindMotif)
+4. none
+```
+
+If `MNV_GUI` is set to a specific toolkit and it's not found, the build fails:
+
+```cmake
+if(NOT _gui_found)
+ if(MNV_GUI STREQUAL "none" OR MNV_GUI STREQUAL "auto")
+ message(STATUS "GUI: disabled")
+ else()
+ message(FATAL_ERROR "Requested GUI '${MNV_GUI}' not found")
+ endif()
+endif()
+```
+
+---
+
+## GUI Symlinks
+
+When the GUI is compiled in, the install step creates additional symlinks:
+
+```cmake
+execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink mnv ${_bindir}/gmnv)
+execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink mnv ${_bindir}/gview)
+execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink mnv ${_bindir}/gmnvdiff)
+execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink mnv ${_bindir}/rgmnv)
+execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink mnv ${_bindir}/rgview)
+execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink mnv ${_bindir}/emnv)
+execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink mnv ${_bindir}/eview)
+execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink mnv ${_bindir}/gvi)
+execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink mnv ${_bindir}/gvim)
+```
+
+Desktop files and icons are also installed:
+
+```cmake
+install(FILES runtime/mnv.desktop DESTINATION ${CMAKE_INSTALL_DATADIR}/applications)
+install(FILES runtime/gmnv.desktop DESTINATION ${CMAKE_INSTALL_DATADIR}/applications)
+```
+
+Icons at 16×16, 32×32, 48×48, and 128×128 plus a scalable SVG are placed in
+the hicolor icon theme.
+
+---
+
+## Runtime Configuration
+
+GUI behaviour is customised via options in `.mnvrc` or `.gmnvrc`:
+
+| Option | Purpose |
+|---|---|
+| `'guifont'` | Font face and size |
+| `'guifontwide'` | Font for double-width characters |
+| `'guioptions'` | Flags controlling which GUI elements are shown |
+| `'guicursor'` | Cursor shape in different modes |
+| `'guitablabel'` | Tab page label format |
+| `'guitabtooltip'` | Tab page tooltip format |
+| `'linespace'` | Extra pixels between lines |
+| `'columns'` / `'lines'` | Window dimensions |
+| `'toolbar'` | Toolbar display flags |
+
+The `'guioptions'` string (aliased `p_go` in `option.h`) controls flags
+like `f` (foreground — don't fork), `m` (menu bar), `T` (toolbar), `r`/`l`
+(scrollbars), etc.
+
+The `runtime/gmnvrc_example.mnv` file provides a starting template.
+
+---
+
+*This document describes MNV 10.0 as of build 287 (2026-04-03).*
diff --git a/docs/handbook/mnv/overview.md b/docs/handbook/mnv/overview.md
new file mode 100644
index 0000000000..f65f1291d1
--- /dev/null
+++ b/docs/handbook/mnv/overview.md
@@ -0,0 +1,381 @@
+# MNV — Overview
+
+## What Is MNV?
+
+MNV (recursive acronym: **MNV is not Vim**) is a highly capable, open-source
+text editor descended from the classic UNIX editor Vi. It is developed under
+the Project Tick umbrella and ships as the `mnv` binary. The current major
+release is **MNV 10.0** (version string `MNV_VERSION_MAJOR 10`,
+`MNV_VERSION_MINOR 0`), with build numbers tracked in
+`src/version.h`:
+
+```c
+#define MNV_VERSION_MAJOR 10
+#define MNV_VERSION_MINOR 0
+#define MNV_VERSION_BUILD 287
+```
+
+MNV maintains near-complete compatibility with Vi while adding a vast array of
+modern editing features. It targets the same niche as its ancestors — fast,
+keyboard-driven text editing for programmers and system administrators — but
+extends the experience with a graphical user interface, an embedded scripting
+language, asynchronous job control, a built-in terminal emulator, Wayland
+clipboard integration, and much more.
+
+---
+
+## Project Identity
+
+| Field | Value |
+|---|---|
+| Full name | MNV — MNV is not Vim |
+| Repository | `Project-Tick/Project-Tick` (under `mnv/`) |
+| License | See `COPYING.md` and `LICENSE` in the project root |
+| Language | C (C99), with MNV9 script for runtime |
+| Build systems | CMake (primary), GNU Autoconf + Make (legacy) |
+| Version macro | `MNV_VERSION_LONG` → `"MNV - MNV is not Vim 10.0 (2026 Apr 3)"` |
+
+The project description in `CMakeLists.txt` reads:
+
+```cmake
+project(MNV
+ DESCRIPTION "MNV - MNV is not Vim"
+ LANGUAGES C
+)
+```
+
+---
+
+## Design Philosophy
+
+1. **Vi compatibility first.** Users who have Vi "in the fingers" can work
+ immediately. Every normal-mode, insert-mode, and command-line keystroke
+ from Vi works identically unless a feature consciously extends it.
+
+2. **Layered feature sets.** The build system exposes three feature tiers
+ defined in `src/feature.h`:
+
+ ```c
+ // +tiny — no optional features enabled, not even +eval
+ // +normal — a default selection of features enabled
+ // +huge — all possible features enabled.
+ ```
+
+ Each tier is a strict superset of the previous one:
+
+ ```c
+ #ifdef FEAT_HUGE
+ # define FEAT_NORMAL
+ #endif
+ #ifdef FEAT_NORMAL
+ # define FEAT_TINY
+ #endif
+ ```
+
+ On Unix, macOS and Windows the default is `+huge`.
+
+3. **Portability.** MNV builds on Linux, macOS, Windows (7 – 11), Haiku, VMS,
+ and nearly every UNIX variant. Platform-specific code lives in dedicated
+ `os_*.c` files (`os_unix.c`, `os_win32.c`, `os_amiga.c`, `os_mac_conv.c`,
+ etc.), keeping the core editor portable.
+
+4. **Keyboard efficiency.** All commands use normal keyboard characters.
+ Function keys and mouse are optionally available but never required.
+
+---
+
+## Feature Highlights
+
+### Multi-level Undo / Redo
+
+MNV records every editing operation in an undo tree (`src/undo.c`). Users can
+walk the tree with `u`, `CTRL-R`, and the `:undolist` / `:undo` commands. The
+undo file is persisted across sessions when `'undofile'` is set.
+
+### Syntax Highlighting
+
+Implemented in `src/syntax.c` (guarded by `FEAT_SYN_HL`), MNV ships hundreds
+of syntax definitions under `runtime/syntax/`. The `syn_pattern` struct drives
+the highlighting engine:
+
+```c
+typedef struct syn_pattern
+{
+ char sp_type;
+ char sp_syncing;
+ short sp_syn_match_id;
+ short sp_off_flags;
+ int sp_offsets[SPO_COUNT];
+ int sp_flags;
+ int sp_ic;
+ ...
+} syn_pattern;
+```
+
+### Built-in Terminal Emulator
+
+When compiled with `FEAT_TERMINAL`, MNV embeds a terminal emulator
+(`src/terminal.c`) backed by **libvterm** (`src/libvterm/`). A terminal buffer
+is opened with `:terminal` and connected to a background job via the
+channel/job infrastructure.
+
+```c
+struct terminal_S {
+ term_T *tl_next;
+ VTerm *tl_vterm;
+ job_T *tl_job;
+ buf_T *tl_buffer;
+ ...
+};
+```
+
+### Asynchronous Jobs and Channels
+
+`src/channel.c` and `src/job.c` provide the `+channel` / `+job` features.
+Channels communicate over sockets (TCP, Unix domain), pipes, or PTYs. This
+powers the terminal emulator, the NetBeans interface, Language Server
+connections, and user scripts.
+
+### MNV9 Script
+
+MNV ships a modernized scripting dialect called **MNV9 script**
+(`src/mnv9script.c`, `src/mnv9compile.c`, `src/mnv9execute.c`,
+`src/mnv9expr.c`, `src/mnv9type.c`, `src/mnv9instr.c`, `src/mnv9cmds.c`,
+`src/mnv9class.c`, `src/mnv9generics.c`). Detection of MNV9 mode happens at
+runtime:
+
+```c
+int
+in_mnv9script(void)
+{
+ return (current_sctx.sc_version == SCRIPT_VERSION_MNV9
+ || (cmdmod.cmod_flags & CMOD_MNV9CMD))
+ && !(cmdmod.cmod_flags & CMOD_LEGACY);
+}
+```
+
+MNV9 introduces strict typing, classes, generics, compiled-to-bytecode
+execution, and `import` / `export` semantics.
+
+### Graphical User Interface
+
+MNV supports multiple GUI toolkits (GTK 2, GTK 3, Motif, Win32, Haiku, Photon)
+through a clean backend abstraction in `src/gui.c` / `src/gui.h`. The global
+`gui_T gui` struct holds all GUI state. Platform backends live in
+`gui_gtk.c`, `gui_gtk_x11.c`, `gui_motif.c`, `gui_w32.c`, `gui_haiku.cc`, etc.
+
+### Wayland Clipboard
+
+Native Wayland clipboard support is implemented in `src/wayland.c` and
+`src/wayland.h` (guarded by `FEAT_WAYLAND`). It uses the
+`ext-data-control-v1`, `wlr-data-control-unstable-v1`, and optionally the core
+`wl_data_device_manager` protocols. The clipboard code in `src/clipboard.c`
+dispatches through protocol-agnostic macros defined at the end of `wayland.c`.
+
+```c
+vwl_connection_T *wayland_ct;
+```
+
+### Encryption
+
+MNV supports multiple encryption methods. Blowfish is implemented in
+`src/blowfish.c`, ZIP-based crypt in `src/crypt_zip.c`, and the modern
+`xchacha20` method uses **libsodium** when `HAVE_SODIUM` is defined.
+
+### Regular Expressions
+
+Two regex engines coexist in `src/regexp.c`:
+
+- **BT engine** (`src/regexp_bt.c`) — backtracking, traditional.
+- **NFA engine** (`src/regexp_nfa.c`) — NFA-based, faster for many patterns.
+
+The dispatcher chooses automatically or can be forced via `'regexpengine'`.
+
+### Quickfix / Location Lists
+
+`src/quickfix.c` implements the `:make`, `:grep`, `:copen`, `:lopen` family of
+commands for compiler-output navigation.
+
+### Spell Checking
+
+`src/spell.c`, `src/spellfile.c`, `src/spellsuggest.c` provide the `+spell`
+feature with support for word lists, affixes, compound words, and suggestions.
+
+### Diff Mode
+
+`src/diff.c` together with the embedded `src/xdiff/` library (xdiffi,
+xpatience, xhistogram algorithms) delivers side-by-side diff viewing.
+
+### Folding
+
+`src/fold.c` drives code folding — manual, indent, expr, syntax, diff, and
+marker methods.
+
+### Text Properties / Virtual Text
+
+`src/textprop.c` provides the text-property API used by plugins for inline
+virtual text, diagnostics markers, and similar overlays.
+
+### Popup Windows
+
+`src/popupwin.c` and `src/popupmenu.c` implement floating popup windows and
+the insert-mode completion menu.
+
+---
+
+## Runtime Files
+
+The `runtime/` directory is installed alongside the binary and contains:
+
+| Directory | Purpose |
+|---|---|
+| `runtime/doc/` | Help files (`:help`) |
+| `runtime/syntax/` | Syntax highlighting definitions |
+| `runtime/ftplugin/` | File-type plugins |
+| `runtime/indent/` | Indentation rules |
+| `runtime/colors/` | Color schemes |
+| `runtime/compiler/` | Compiler integration |
+| `runtime/autoload/` | Autoloaded script functions |
+| `runtime/plugin/` | Global plugins |
+| `runtime/pack/` | Package directory |
+| `runtime/tutor/` | The `mnvtutor` training material |
+| `runtime/keymap/` | Keyboard mappings for non-Latin scripts |
+| `runtime/import/` | MNV9 import modules |
+| `runtime/spell/` | Spell-check word-list files |
+| `runtime/print/` | PostScript printing support |
+| `runtime/lang/` | UI translation message files |
+| `runtime/macros/` | Example macros |
+| `runtime/tools/` | Auxiliary tools |
+
+Essential runtime scripts loaded at startup:
+
+- `defaults.mnv` — sensible defaults for new users.
+- `filetype.mnv` / `ftoff.mnv` — filetype detection on/off.
+- `ftplugin.mnv` / `ftplugof.mnv` — filetype plugins on/off.
+- `indent.mnv` / `indoff.mnv` — filetype indentation on/off.
+- `menu.mnv` / `synmenu.mnv` — GUI menu definitions.
+- `scripts.mnv` — fallback filetype detection by content.
+- `optwin.mnv` — the `:options` window.
+- `mswin.mnv` — Windows-style key bindings.
+
+---
+
+## Executable Variants
+
+A single `mnv` binary behaves differently depending on the name it is invoked
+with. The CMake install step creates symlinks:
+
+| Symlink | Behaviour |
+|---|---|
+| `mnv` | Normal mode |
+| `ex` | Start in Ex mode (`:` prompt) |
+| `view` | Read-only mode (`-R`) |
+| `rmnv` | Restricted mode |
+| `rview` | Restricted + read-only |
+| `mnvdiff` | Start in diff mode |
+| `vi` | Compatibility alias |
+| `vim` | Compatibility alias |
+| `gmnv` | GUI mode (when GUI is compiled in) |
+| `gview` | GUI + read-only |
+| `gmnvdiff` | GUI + diff mode |
+| `rgmnv` | GUI + restricted |
+| `rgview` | GUI + restricted + read-only |
+| `emnv` | "Easy mode" GUI |
+| `eview` | "Easy mode" GUI + read-only |
+| `gvi` / `gvim` | GUI compatibility aliases |
+
+---
+
+## The Tutor
+
+MNV bundles a one-hour interactive tutorial. It is typically started with:
+
+```sh
+mnvtutor
+```
+
+The tutor files reside in `runtime/tutor/` and the launcher scripts are
+`mnvtutor.com` (VMS) and `mnvtutor.bat` (Windows) at the project root, plus
+`src/mnvtutor` / `src/gmnvtutor` for Unix.
+
+---
+
+## Auxiliary Tool: xxd
+
+The `src/xxd/` directory contains **xxd**, a hex-dump / reverse-hex-dump
+utility. It is built as a separate executable by the CMake build
+(`add_subdirectory(src/xxd)`).
+
+---
+
+## Relation to Vim and Vi
+
+MNV is a fork that diverges from upstream Vim by:
+
+- Renaming the project and binary to `mnv`.
+- Adopting a CMake-first build system alongside the legacy Autoconf build.
+- Adding first-class Wayland clipboard support (`FEAT_WAYLAND`,
+ `FEAT_WAYLAND_CLIPBOARD`).
+- Using `mnv9script` naming for the modern scripting dialect.
+- Storing runtime files in `mnv`-prefixed paths.
+- Maintaining the project under the Project Tick organisation.
+
+Despite these changes, MNV intentionally preserves Vi and Vim compatibility so
+that existing workflows, plugins, and muscle memory carry over unchanged.
+
+---
+
+## CI and Quality Assurance
+
+The project uses:
+
+- **GitHub Actions** — primary CI (linux, macOS, coverage).
+- **Appveyor** — Windows CI.
+- **Cirrus CI** — FreeBSD builds.
+- **Codecov** — coverage tracking.
+- **Coverity Scan** — static analysis.
+- **Fossies codespell** — spell-checking source comments.
+
+Unit tests (`json_test.c`, `kword_test.c`, `memfile_test.c`, `message_test.c`)
+validate isolated subsystems. The full test suite lives in `src/testdir/` and
+is driven by `make test` or `ctest`.
+
+---
+
+## Further Reading
+
+| Resource | Location |
+|---|---|
+| Build instructions | `src/INSTALL`, the `building.md` handbook page |
+| Architecture | The `architecture.md` handbook page |
+| GUI details | The `gui-extension.md` handbook page |
+| Command-line usage | The `scripting.md` handbook page |
+| Platform notes | The `platform-support.md` handbook page |
+| Coding conventions | The `code-style.md` handbook page |
+| Contributing guide | `CONTRIBUTING.md` in the project root |
+| MNV9 scripting | `README_MNV9.md` |
+| Help system | `:help` inside MNV, or `runtime/doc/help.txt` |
+
+---
+
+## Glossary
+
+| Term | Meaning |
+|---|---|
+| `buf_T` | The C struct representing a buffer (in-memory file). |
+| `win_T` | A window — a viewport onto a buffer. |
+| `pos_T` | A cursor position: `{lnum, col, coladd}`. |
+| `typval_T` | A typed value in the expression evaluator. |
+| `garray_T` | A generic growable array used throughout the codebase. |
+| `mparm_T` | The struct holding `main()` parameters passed between init functions. |
+| `gui_T` | Global GUI state. |
+| `term_T` | A terminal emulator instance. |
+| Feature guard | A `#ifdef FEAT_*` preprocessor conditional controlling optional code. |
+| MNV9 | The modern, statically-typed scripting dialect. |
+| libvterm | The embedded terminal emulation library. |
+| xdiff | The embedded diff library (xdiffi, xhistogram, xpatience). |
+| xxd | The bundled hex-dump utility. |
+
+---
+
+*This document describes MNV 10.0 as of build 287 (2026-04-03).*
diff --git a/docs/handbook/mnv/platform-support.md b/docs/handbook/mnv/platform-support.md
new file mode 100644
index 0000000000..6f7f714b84
--- /dev/null
+++ b/docs/handbook/mnv/platform-support.md
@@ -0,0 +1,306 @@
+# MNV — Platform Support
+
+## Officially Supported Platforms
+
+MNV is designed for maximum portability. The README states:
+
+> MNV runs under MS-Windows (7, 8, 10, 11), macOS, Haiku, VMS and almost all
+> flavours of UNIX. Porting to other systems should not be very difficult.
+
+### Tier 1: Actively tested in CI
+
+| Platform | CI System | Build | GUI |
+|---|---|---|---|
+| Linux (Ubuntu/Debian) | GitHub Actions | CMake + Autoconf | GTK 3, terminal |
+| macOS | GitHub Actions | CMake + Autoconf | terminal |
+| Windows | Appveyor | MSVC (`Make_mvc.mak`) | Win32 native |
+| FreeBSD | Cirrus CI | CMake + Autoconf | terminal |
+
+### Tier 2: Supported, not CI-tested
+
+| Platform | Notes |
+|---|---|
+| Other Linux distros | Community-packaged on Fedora, Arch, Gentoo, Alpine, etc. |
+| Windows (MinGW/MSYS2) | `Make_ming.mak`, `Make_cyg_ming.mak` |
+| Windows (Cygwin) | `Make_cyg.mak` with `iscygpty.c` PTY detection |
+| Haiku | `gui_haiku.cc` backend, resource defs in `os_haiku.rdef.in` |
+| OpenVMS | `Make_vms.mms`, `os_vms.c`, `os_vms_conf.h` |
+| QNX | `os_qnx.c`, `gui_photon.c` |
+
+### Tier 3: Historic / unmaintained
+
+| Platform | Notes |
+|---|---|
+| MS-DOS | No longer maintained |
+| Windows 95/98/Me/NT/2000/XP/Vista | Legacy; not tested |
+| Amiga | `os_amiga.c`, `Make_ami.mak` — code present but unmaintained |
+| Atari MiNT | Mentioned in `os_unix.c` header |
+| BeOS | Haiku is the successor |
+| RISC OS | Legacy |
+| OS/2 | EMX support in `os_unix.c` |
+
+---
+
+## Platform Abstraction Strategy
+
+MNV isolates platform-specific code in dedicated `os_*.c` / `os_*.h` files.
+The core editor never calls raw system APIs directly — it uses wrappers
+prefixed `mch_` ("machine"):
+
+| Wrapper | Example implementations |
+|---|---|
+| `mch_early_init()` | Boot-time init: set signal handlers, console mode |
+| `mch_exit()` | Clean exit with platform cleanup |
+| `mch_fopen()` | `fopen()` with platform-specific path handling |
+| `mch_signal()` | On Unix: `sigaction()`; elsewhere: `signal()` |
+| `mch_getenv()` | Environment variable lookup |
+| `mch_is_gui_executable()` | Win32: check subsystem header |
+
+### Unix (`os_unix.c`, `os_unix.h`, `os_unixx.h`)
+
+The largest platform file. Covers:
+
+- Signal handling (`SIGWINCH`, `SIGCHLD`, `SIGTSTP`, `SIGCONT`, …).
+- Process control (`fork()`, `execvp()`, `waitpid()`).
+- Terminal setup (`termios` / `termio` / `sgtty`).
+- Pseudo-terminal allocation (`pty.c`).
+- File locking and swap-file safety.
+- SELinux context preservation (`HAVE_SELINUX`).
+- Extended attribute support (`FEAT_XATTR`).
+- XSMP (X Session Management Protocol) integration.
+- Shared memory (`shm_open`) for IPC.
+
+The file supports multiple Unix variants through conditionals:
+
+```c
+#if defined(__linux__) && !defined(__ANDROID__)
+ // Linux-specific code
+#endif
+#if defined(__FreeBSD__) || defined(__DragonFly__)
+ // BSD-specific code
+#endif
+#if defined(__sun)
+ // Solaris/SunOS (SUN_SYSTEM macro)
+#endif
+#if defined(__CYGWIN__)
+ // Cygwin compatibility
+#endif
+```
+
+### Windows (`os_win32.c`, `os_mswin.c`, `os_w32dll.c`, `os_w32exe.c`)
+
+- `os_win32.c` — Console-mode Windows: console API, process spawning, pipe I/O.
+- `os_mswin.c` — Shared code between console and GUI Windows builds.
+- `os_w32dll.c` — Entry point when MNV is built as a DLL (`MNVDLL`).
+- `os_w32exe.c` — Standard EXE entry point.
+- `os_dos.h` — Legacy MS-DOS defines still used by Windows.
+
+On Windows, `main()` is named `MNVMain` and the entry point may be in the DLL:
+
+```c
+#ifdef MSWIN
+MNVMain
+#else
+main
+#endif
+(int argc, char **argv)
+```
+
+MinGW expands command-line arguments differently, so Windows builds call
+`get_cmd_argsW()` for the raw wide-character argv.
+
+### macOS (`os_mac_conv.c`, `os_macosx.m`, `os_mac.h`)
+
+- `os_mac_conv.c` — Encoding conversion using Core Foundation.
+- `os_macosx.m` — Objective-C bridge: pasteboard access, system services.
+- `os_mac.h` — macOS-specific defines.
+
+The `MACOS_X_DARWIN` / `MACOS_X` / `MACOS_CONVERT` macros control macOS
+features. Clipboard support uses Cocoa pasteboard via `FEAT_CLIPBOARD`.
+
+### Haiku (`gui_haiku.cc`, `gui_haiku.h`, `os_haiku.h`, `os_haiku.rdef.in`)
+
+Haiku support uses the native C++ Be API. The `gui_haiku.cc` file is compiled
+as C++ (the only `.cc` file in the codebase). Resource definitions for the
+application are in `os_haiku.rdef.in`.
+
+### OpenVMS (`os_vms.c`, `os_vms_conf.h`, `os_vms_fix.com`, `os_vms_mms.c`)
+
+VMS support includes:
+
+- VMS-specific path handling (node::device:[directory]file.ext;version).
+- `Make_vms.mms` — MMS/MMK build script.
+- `os_vms_fix.com` — DCL post-processing script.
+
+### Amiga (`os_amiga.c`, `os_amiga.h`)
+
+Legacy Amiga support. The code remains but is unmaintained. `MNV_SIZEOF_INT`
+is conditionally set for Amiga compilers:
+
+```c
+#ifdef AMIGA
+# ifdef __GNUC__
+# define MNV_SIZEOF_INT 4
+# else
+# define MNV_SIZEOF_INT 2
+# endif
+#endif
+```
+
+### QNX (`os_qnx.c`, `os_qnx.h`)
+
+QNX-specific terminal and event handling. The Photon GUI (`gui_photon.c`)
+provides a native graphical interface on QNX.
+
+---
+
+## Display Server Support
+
+### X11
+
+Auto-detected by CMake via `find_package(X11)`. When available:
+
+- `HAVE_X11` is defined.
+- X clipboard (`FEAT_CLIPBOARD`), client-server (`FEAT_CLIENTSERVER`), XIM
+ input (`FEAT_XIM`), and XSMP session management (`FEAT_XSMP`) are enabled.
+- Libraries linked: `libX11`, `libXt`, `libSM`, `libICE`.
+
+### Wayland
+
+Auto-detected via `pkg_check_modules(WAYLAND wayland-client)`. When available:
+
+- `HAVE_WAYLAND` / `FEAT_WAYLAND` are defined.
+- `wayland-scanner` generates protocol stubs from XML files.
+- Clipboard via `ext-data-control-v1`, `wlr-data-control-unstable-v1`, and
+ optionally `xdg-shell` + `primary-selection-unstable-v1` for focus-stealing
+ clipboard.
+- The `vwl_connection_T` struct in `wayland.h` wraps the display, registry,
+ seats, and global objects.
+
+Wayland and X11 can coexist in the same build (e.g., an XWayland environment).
+
+---
+
+## Terminal Library
+
+MNV requires a terminal library for console mode. Detection order:
+
+1. **ncurses** (`find_package(Curses)`) — preferred. Sets `TERMINFO 1`.
+2. **termcap / tinfo** (`find_library(NAMES termcap tinfo)`) — fallback.
+
+The `tgetent()` function is checked to verify the library is usable:
+
+```cmake
+check_symbol_exists(tgetent "term.h" HAVE_TGETENT)
+```
+
+---
+
+## Cygwin
+
+`iscygpty.c` / `iscygpty.h` detect Cygwin pseudo-terminals so that MNV can
+adjust its terminal handling:
+
+```c
+#if defined(MSWIN) && (!defined(FEAT_GUI_MSWIN) || defined(MNVDLL))
+# include "iscygpty.h"
+#endif
+```
+
+---
+
+## Architecture and Word Size
+
+MNV requires at least a 32-bit `int`:
+
+```c
+#if MNV_SIZEOF_INT < 4 && !defined(PROTO)
+# error MNV only works with 32 bit int or larger
+#endif
+```
+
+The build checks `sizeof(int)`, `sizeof(long)`, `sizeof(off_t)`,
+`sizeof(time_t)`, and `sizeof(wchar_t)` at configure time:
+
+```cmake
+check_type_size(int MNV_SIZEOF_INT)
+check_type_size(long MNV_SIZEOF_LONG)
+check_type_size(off_t SIZEOF_OFF_T)
+check_type_size(time_t SIZEOF_TIME_T)
+check_type_size(wchar_t SIZEOF_WCHAR_T)
+```
+
+If `sizeof(wchar_t) == 2` (Windows), the `SMALL_WCHAR_T` flag is set.
+
+---
+
+## Compiler Support
+
+### GCC and Clang
+
+Default warning flags:
+
+```cmake
+add_compile_options(-Wall -Wno-deprecated-declarations)
+```
+
+Release builds add:
+
+```cmake
+add_compile_options(-O2 -fno-strength-reduce)
+```
+
+The `HAVE_ATTRIBUTE_UNUSED` check enables `__attribute__((unused))` to
+suppress warnings on intentionally unused parameters:
+
+```c
+#if defined(HAVE_ATTRIBUTE_UNUSED) || defined(__MINGW32__)
+# define UNUSED __attribute__((unused))
+#endif
+```
+
+### MSVC
+
+Builds via `Make_mvc.mak` or CMake with Visual Studio generators. Batch
+helpers: `msvc-latest.bat`, `msvc2015.bat`, `msvc2017.bat`, `msvc2019.bat`,
+`msvc2022.bat`.
+
+### Other Compilers
+
+Code contains conditionals for:
+
+- **Aztec C** (Amiga): `#ifdef AZTEC_C`
+- **SAS/C** (Amiga): `#ifdef SASC`
+- **DCC** (Amiga): `#ifdef _DCC`
+- **Tandem NonStop**: `#ifdef __TANDEM` — sets `ROOT_UID 65535`
+
+---
+
+## Packaging
+
+MNV is packaged for many systems. The `repology.org` badge tracks
+distribution status. Key packaging notes:
+
+- Debian/Ubuntu provide `mnv`, `mnv-tiny`, and `mnv-gtk` variants.
+- The `mnv.tiny` build uses `FEAT_TINY` as the default `vi`.
+- Windows provides an NSIS-based installer (scripts in `nsis/`).
+- macOS can be installed via Homebrew.
+- The `mnvtutor.bat` (Windows) and `mnvtutor.com` (VMS) scripts launch the
+ tutor on those platforms.
+
+---
+
+## Platform-Specific Installation Files
+
+| File | Platform |
+|---|---|
+| `src/INSTALL` | Generic + Unix |
+| `src/INSTALLami.txt` | Amiga |
+| `src/INSTALLmac.txt` | macOS |
+| `src/INSTALLpc.txt` | Windows |
+| `src/INSTALLvms.txt` | OpenVMS |
+| `src/INSTALLx.txt` | Cross-compilation |
+
+---
+
+*This document describes MNV 10.0 as of build 287 (2026-04-03).*
diff --git a/docs/handbook/mnv/scripting.md b/docs/handbook/mnv/scripting.md
new file mode 100644
index 0000000000..ed8d4cae4a
--- /dev/null
+++ b/docs/handbook/mnv/scripting.md
@@ -0,0 +1,541 @@
+# MNV — Command-Line Interface and Scripting
+
+## Executable Invocation
+
+MNV is a single binary that adapts its behaviour based on the name it is
+launched with. The `parse_command_name()` function in `src/main.c` determines
+the mode:
+
+```c
+static void parse_command_name(mparm_T *parmp);
+```
+
+| Invocation | Mode | Effect |
+|---|---|---|
+| `mnv` | Normal | Standard editor |
+| `ex` | Ex | Starts in `:` line mode |
+| `view` | Read-only | Equivalent to `mnv -R` |
+| `rmnv` | Restricted | Disables shell commands, file writes |
+| `rview` | Restricted+RO | Restricted + read-only |
+| `mnvdiff` | Diff | Opens files in diff mode (`mnv -d`) |
+| `vi` / `vim` | Compat aliases | Normal mode |
+| `gmnv` | GUI | Starts graphical interface |
+| `gview` | GUI+RO | GUI + read-only |
+| `gmnvdiff` | GUI+Diff | GUI diff mode |
+| `emnv` | Easy GUI | GUI in easy mode (insert mode by default) |
+| `eview` | Easy+RO | Easy mode + read-only |
+
+---
+
+## Command-Line Arguments
+
+The `command_line_scan()` function in `src/main.c` parses all command-line
+arguments:
+
+```c
+static void command_line_scan(mparm_T *parmp);
+```
+
+### File Arguments
+
+Any non-option arguments are treated as files to edit:
+
+```
+mnv file1.c file2.c file3.c
+```
+
+These populate the argument list, managed by `src/arglist.c`.
+
+### Standard Options
+
+| Flag | Description |
+|---|---|
+| `-R` | Read-only mode |
+| `-Z` | Restricted mode (no shell access) |
+| `-g` | Start GUI (equivalent to `gmnv`) |
+| `-d` | Diff mode — open 2-8 files side by side |
+| `-b` | Binary mode |
+| `-l` | Lisp mode |
+| `-M` | Not modifiable — don't allow changes |
+| `-e` | Ex mode (`:` prompt) |
+| `-E` | Improved Ex mode |
+| `-s` | Silent/batch mode (Ex mode, no prompts) |
+| `-y` | Easy mode |
+| `-f` | Foreground — don't fork (GUI) |
+| `-v` | Force terminal mode even if `gmnv` |
+| `-n` | No swap file |
+| `-r` | Recovery mode (list swap files or recover) |
+| `-L` | Same as `-r` |
+| `-p[N]` | Open N tab pages |
+| `-o[N]` | Open N horizontal splits |
+| `-O[N]` | Open N vertical splits |
+| `-t tag` | Edit file containing `tag` |
+| `-q file` | Start in quickfix mode with `file` |
+
+### Pre- and Post-Commands
+
+| Flag | Description |
+|---|---|
+| `--cmd <command>` | Execute `<command>` **before** sourcing `.mnvrc`. Up to 10. |
+| `-c <command>` / `+<command>` | Execute `<command>` **after** loading files. Up to 10. |
+| `+` | Start at last line of first file |
+| `+{num}` | Start at line `{num}` of first file |
+
+The error message for too many commands:
+
+```c
+N_("Too many \"+command\", \"-c command\" or \"--cmd command\" arguments"),
+#define ME_EXTRA_CMD 4
+```
+
+### Startup Control
+
+| Flag | Description |
+|---|---|
+| `--clean` | Skip all config files (`.mnvrc`, plugins) |
+| `-u <file>` | Use `<file>` instead of `.mnvrc` |
+| `-U <file>` | Use `<file>` instead of `.gmnvrc` |
+| `-i <file>` | Use `<file>` instead of `.mnvinfo` |
+| `--noplugin` | Don't load any plugins |
+| `--startuptime <file>` | Log startup timing to `<file>` |
+| `--log <file>` | Enable channel/job logging to `<file>` |
+
+The `--startuptime` flag is scanned in `early_arg_scan()`:
+
+```c
+if (STRICMP(argv[i], "--startuptime") == 0 && time_fd == NULL)
+{
+ time_fd = mch_fopen(argv[i + 1], "a");
+ TIME_MSG("--- MNV STARTING ---");
+}
+```
+
+### Display Options
+
+| Flag | Description |
+|---|---|
+| `-T <terminal>` | Set terminal type |
+| `--not-a-term` | Skip terminal checks |
+| `--ttyfail` | Exit if stdin is not a terminal |
+
+### Client-Server
+
+| Flag | Description |
+|---|---|
+| `--servername <name>` | Set server name |
+| `--serverlist` | List running MNV servers |
+| `--remote <files>` | Open files in an existing MNV |
+| `--remote-send <keys>` | Send keys to a running MNV |
+| `--remote-expr <expr>` | Evaluate expression in a running MNV |
+| `--remote-wait` | Like `--remote` but wait for completion |
+| `--remote-tab` | Like `--remote` but open in new tab |
+
+### Information
+
+| Flag | Description |
+|---|---|
+| `-h` / `--help` | Print usage and exit |
+| `--version` | Print version and exit |
+
+### Error Handling
+
+Unrecognised options produce:
+
+```c
+static char *(main_errors[]) =
+{
+ N_("Unknown option argument"), // ME_UNKNOWN_OPTION
+ N_("Too many edit arguments"), // ME_TOO_MANY_ARGS
+ N_("Argument missing after"), // ME_ARG_MISSING
+ N_("Garbage after option argument"), // ME_GARBAGE
+ N_("Too many \"+command\", \"-c command\" or \"--cmd command\" arguments"),
+ N_("Invalid argument for"), // ME_INVALID_ARG
+};
+```
+
+---
+
+## Startup Sequence
+
+When `main()` runs (see `src/main.c`), parameter processing happens in
+several phases:
+
+### 1. Early Argument Scan
+
+```c
+static void early_arg_scan(mparm_T *parmp);
+```
+
+Scans specifically for `--startuptime`, `--log`, and `--clean` **before** any
+initialisation, because these affect how initialisation proceeds.
+
+### 2. Common Init Phase 1
+
+`common_init_1()` — allocator, hash tables, global options, message system.
+
+### 3. Common Init Phase 2
+
+`common_init_2(&params)` — terminal detection, default options, langmap.
+
+### 4. Full Command-Line Scan
+
+`command_line_scan(&params)` — processes every flag and file argument.
+
+### 5. TTY Check
+
+```c
+static void check_tty(mparm_T *parmp);
+```
+
+Verifies stdin/stdout are terminals when running interactively.
+
+### 6. Source Startup Scripts
+
+```c
+static void source_startup_scripts(mparm_T *parmp);
+```
+
+Loads scripts in this order (unless `--clean` or `-u NONE`):
+
+1. System-wide `.mnvrc` (e.g. `/etc/mnv/mnvrc`).
+2. User `.mnvrc` (`$HOME/.mnvrc` or `$XDG_CONFIG_HOME/mnv/mnvrc`).
+3. The `.gmnvrc` equivalent if GUI.
+4. `defaults.mnv` (new-user defaults).
+
+### 7. Pre-Commands
+
+```c
+static void exe_pre_commands(mparm_T *parmp);
+```
+
+Executes `--cmd` arguments.
+
+### 8. Edit Buffers
+
+```c
+static void edit_buffers(mparm_T *parmp, char_u *cwd);
+```
+
+Opens file arguments into buffers and windows.
+
+### 9. Post-Commands
+
+```c
+static void exe_commands(mparm_T *parmp);
+```
+
+Executes `-c` / `+` arguments.
+
+### 10. GUI Start
+
+```c
+static void main_start_gui(void);
+```
+
+If GUI mode is detected, starts the GUI event loop.
+
+---
+
+## MNVscript: The Built-in Scripting Language
+
+MNV includes a full scripting language for automation, plugins, and
+configuration. Two variants exist:
+
+### Legacy MNVscript
+
+The original `:let`, `:if`, `:while`, `:function` syntax, interpreted at
+runtime by `src/eval.c`. Expression parsing uses a recursive-descent parser:
+
+```c
+static int eval0_simple_funccal(...);
+static int eval2(char_u **arg, typval_T *rettv, evalarg_T *evalarg);
+static int eval3(...);
+// ... through eval9()
+```
+
+The central value type is `typval_T` (`src/structs.h`), a tagged union
+supporting:
+
+- Numbers (`VAR_NUMBER`)
+- Strings (`VAR_STRING`)
+- Floats (`VAR_FLOAT`)
+- Lists (`VAR_LIST`, `src/list.c`)
+- Dictionaries (`VAR_DICT`, `src/dict.c`)
+- Blobs (`VAR_BLOB`, `src/blob.c`)
+- Tuples (`VAR_TUPLE`, `src/tuple.c`)
+- Funcref / Partial (`VAR_FUNC`, `VAR_PARTIAL`)
+- Jobs (`VAR_JOB`)
+- Channels (`VAR_CHANNEL`)
+- Classes / Objects (`VAR_CLASS`, `VAR_OBJECT`)
+
+Variable scoping uses namespace prefixes (`g:`, `b:`, `w:`, `t:`, `l:`, `s:`,
+`v:`), managed by `src/evalvars.c`.
+
+Built-in functions are implemented in `src/evalfunc.c`.
+
+### MNV9 Script
+
+The modern dialect, activated by `:mnv9script` at the top of a script file or
+by the `:mnv9cmd` modifier. Detection:
+
+```c
+int
+in_mnv9script(void)
+{
+ return (current_sctx.sc_version == SCRIPT_VERSION_MNV9
+ || (cmdmod.cmod_flags & CMOD_MNV9CMD))
+ && !(cmdmod.cmod_flags & CMOD_LEGACY);
+}
+```
+
+MNV9 features:
+
+- **Type annotations**: `var name: string = "hello"`.
+- **Compiled to bytecode**: `src/mnv9compile.c` compiles, `src/mnv9execute.c`
+ runs the instructions defined in `src/mnv9instr.c`.
+- **Classes and interfaces**: `src/mnv9class.c` — `class`, `interface`,
+ `extends`, `implements`.
+- **Generics**: `src/mnv9generics.c` — `<T>`, `<K, V>`.
+- **Import/Export**: `src/mnv9script.c` — `import` / `export` for module
+ systems.
+- **`def` functions**: Compiled functions replacing `function`/`endfunction`.
+- **Strict mode**: variables must be declared, types are checked.
+
+Script version identification:
+
+```c
+#define SCRIPT_VERSION_MAX 4
+#define SCRIPT_VERSION_MNV9 999999
+```
+
+### Profiling
+
+When `FEAT_PROFILE` is defined, MNV can profile scripts and functions:
+
+```
+:profile start profile.log
+:profile func *
+:profile file *.mnv
+```
+
+Profiling is implemented in `src/profiler.c`.
+
+### Debugging
+
+MNV includes a built-in script debugger (`src/debugger.c`):
+
+```
+:breakadd func MyFunction
+:debug call MyFunction()
+```
+
+---
+
+## Ex Commands
+
+Ex commands (`:` commands) are the backbone of MNV's command-line mode. They
+are defined in `src/ex_cmds.h` and dispatched by `src/ex_docmd.c`:
+
+```c
+static char_u *do_one_cmd(...);
+```
+
+Command index tables in `ex_cmdidxs.h` (generated by `create_cmdidxs.mnv`)
+enable fast lookup.
+
+### Command Execution
+
+Every ex command receives an `exarg_T` struct containing:
+
+- The command address range (line numbers).
+- `:` modifiers (`:silent`, `:verbose`, `:sandbox`, `:lockmarks`, etc.).
+- The command argument string.
+- Flags for bang (`!`), register, count.
+
+### Notable Command Families
+
+| Family | Files | Examples |
+|---|---|---|
+| File operations | `ex_cmds.c` | `:write`, `:edit`, `:saveas` |
+| Buffer management | `ex_cmds.c`, `buffer.c` | `:bnext`, `:bdelete`, `:buffers` |
+| Window commands | `window.c` | `:split`, `:vsplit`, `:close`, `:only` |
+| Script evaluation | `ex_eval.c` | `:try`, `:catch`, `:throw`, `:finally` |
+| Source / Runtime | `ex_cmds2.c`, `scriptfile.c` | `:source`, `:runtime` |
+| Help | `help.c` | `:help`, `:helpgrep` |
+| Quickfix | `quickfix.c` | `:make`, `:copen`, `:cnext`, `:grep` |
+| Autocmds | `autocmd.c` | `:autocmd`, `:doautocmd`, `:augroup` |
+| Terminal | `terminal.c` | `:terminal` |
+| Session | `session.c` | `:mksession`, `:mkview` |
+| Diff | `diff.c` | `:diffthis`, `:diffoff`, `:diffupdate` |
+| Fold | `fold.c` | `:fold`, `:foldopen`, `:foldclose` |
+
+---
+
+## Autocommands
+
+`src/autocmd.c` implements the event-driven scripting system:
+
+```
+:autocmd BufWritePre *.c call CleanWhitespace()
+:autocmd FileType python setlocal tabstop=4
+```
+
+Autocommand events cover the full editor lifecycle: buffer loading, writing,
+window events, filetype detection, terminal activity, cursor movement, etc.
+
+The `autocmd_init()` function (called early in `main()`) initialises the
+autocommand tables.
+
+---
+
+## Key Mapping
+
+`src/map.c` handles:
+
+- `:map` / `:noremap` / `:unmap` for normal mode.
+- `:imap` / `:inoremap` for insert mode.
+- `:cmap` / `:cnoremap` for command-line mode.
+- `:vmap` / `:vnoremap` for visual mode.
+- `:tmap` / `:tnoremap` for terminal mode.
+- And all operator / select mode variants.
+
+The mapping engine integrates with `src/getchar.c` (typeahead buffer) to remap
+key sequences on the fly.
+
+---
+
+## Registers
+
+`src/register.c` manages:
+
+- Named registers (`"a` – `"z`).
+- Numbered registers (`"0` – `"9`).
+- Small delete register (`"-`).
+- System clipboard registers (`"*`, `"+`).
+- Expression register (`"=`).
+- Search register (`"/`).
+- Last inserted text (`".`).
+- Read-only registers (`"%` filename, `"#` alternate, `":` last command).
+- Black hole register (`"_`).
+
+The `"+` and `"*` registers bridge to the system clipboard via
+`src/clipboard.c` (and on Wayland via `src/wayland.c`).
+
+---
+
+## Configuration Files
+
+### `.mnvrc`
+
+The primary user configuration file. Sourced during startup. Can contain any
+ex commands, option settings, key mappings, autocommands, and function
+definitions.
+
+### `.gmnvrc`
+
+GUI-specific configuration. Sourced after `.mnvrc` when the GUI starts.
+
+### `.mnvinfo`
+
+Persistent session state across MNV invocations. Implemented in
+`src/mnvinfo.c`, controlled by the `FEAT_MNVINFO` guard:
+
+```c
+#ifdef FEAT_NORMAL
+# define FEAT_MNVINFO
+#endif
+```
+
+Stores:
+- Command-line history.
+- Search patterns.
+- Named marks.
+- Register contents.
+- File marks (last cursor positions).
+- Jump list.
+
+---
+
+## The `mparm_T` Struct
+
+The main-parameter struct passed through all startup functions:
+
+```c
+static mparm_T params;
+```
+
+Key fields:
+
+- `argc`, `argv` — raw command-line arguments.
+- `want_full_screen` — TRUE by default.
+- `use_debug_break_level` — debugger break level.
+- `window_count` — number of windows requested by `-o`/`-O`.
+- `clean` — `--clean` flag.
+- `edit_type` — one of `EDIT_NONE`, `EDIT_FILE`, `EDIT_STDIN`, `EDIT_TAG`,
+ `EDIT_QF`.
+
+```c
+#define EDIT_NONE 0
+#define EDIT_FILE 1
+#define EDIT_STDIN 2
+#define EDIT_TAG 3
+#define EDIT_QF 4
+```
+
+---
+
+## Standard I/O: Editing from stdin
+
+When MNV is invoked as part of a pipe:
+
+```bash
+echo "hello world" | mnv -
+```
+
+The `read_stdin()` function reads data from standard input into the first
+buffer:
+
+```c
+static void read_stdin(void);
+```
+
+The `EDIT_STDIN` edit type is set in `command_line_scan()` when `-` appears as
+a file argument.
+
+---
+
+## Example Workflows
+
+### Quick edit from command line
+
+```bash
+mnv +42 src/main.c # Open at line 42
+mnv -c 'set nu' file.txt # Open with line numbers
+mnv -d file1.c file2.c # Diff two files
+mnv -R /var/log/syslog # View log read-only
+```
+
+### Batch processing
+
+```bash
+mnv -es '+%s/foo/bar/g' '+wq' file.txt
+echo ":%s/old/new/g" | mnv -s file.txt
+```
+
+### Remote editing (client-server)
+
+```bash
+mnv --servername MYSERVER file.c &
+mnv --servername MYSERVER --remote file2.c
+mnv --servername MYSERVER --remote-send ':qa!<CR>'
+```
+
+### Startup profiling
+
+```bash
+mnv --startuptime startup.log file.c
+```
+
+---
+
+*This document describes MNV 10.0 as of build 287 (2026-04-03).*
diff --git a/docs/handbook/neozip/api-reference.md b/docs/handbook/neozip/api-reference.md
new file mode 100644
index 0000000000..cc24f0d91c
--- /dev/null
+++ b/docs/handbook/neozip/api-reference.md
@@ -0,0 +1,459 @@
+# API Reference
+
+## Overview
+
+Neozip exposes its public API through `zlib.h` (generated from `zlib.h.in`).
+In zlib-compat mode (`ZLIB_COMPAT=ON`), function names match standard zlib.
+In native mode, all symbols are prefixed with `zng_` and the stream type
+becomes `zng_stream`.
+
+The `PREFIX()` macro handles the distinction:
+```c
+// ZLIB_COMPAT=ON: PREFIX(deflateInit) → deflateInit
+// ZLIB_COMPAT=OFF: PREFIX(deflateInit) → zng_deflateInit
+```
+
+---
+
+## Core Data Structures
+
+### `z_stream` / `zng_stream`
+
+```c
+typedef struct PREFIX3(stream_s) {
+ // Input
+ const uint8_t *next_in; // Next input byte
+ uint32_t avail_in; // Number of bytes available at next_in
+ size_t total_in; // Total bytes read so far
+
+ // Output
+ uint8_t *next_out; // Next output byte position
+ uint32_t avail_out; // Remaining free space at next_out
+ size_t total_out; // Total bytes written so far
+
+ // Error
+ const char *msg; // Last error message (or NULL)
+
+ // Internal
+ struct internal_state *state; // Private state (DO NOT access)
+
+ // Memory management
+ alloc_func zalloc; // Allocation function (or Z_NULL for default)
+ free_func zfree; // Free function (or Z_NULL for default)
+ void *opaque; // Private data for zalloc/zfree
+
+ // Type indicator
+ int data_type; // Best guess about data type (deflate output hint)
+
+ // Checksum
+ unsigned long adler; // Adler-32 or CRC-32 of data
+ unsigned long reserved; // Reserved for future use
+} PREFIX3(stream);
+```
+
+### `gz_header`
+
+```c
+typedef struct PREFIX(gz_header_s) {
+ int text; // True if compressed data is believed to be text
+ unsigned long time; // Modification time
+ int xflags; // Extra flags (not used by gzopen)
+ int os; // Operating system
+ uint8_t *extra; // Pointer to extra field (or Z_NULL)
+ unsigned extra_len; // Extra field length
+ unsigned extra_max; // Space at extra (when reading header)
+ char *name; // Pointer to file name (or Z_NULL)
+ unsigned name_max; // Space at name (when reading header)
+ char *comment; // Pointer to comment (or Z_NULL)
+ unsigned comm_max; // Space at comment (when reading header)
+ int hcrc; // True if header CRC present
+ int done; // True when done reading gzip header
+} PREFIX(gz_header);
+```
+
+---
+
+## Compression
+
+### Initialisation
+
+```c
+int PREFIX(deflateInit)(PREFIX3(stream) *strm, int level);
+int PREFIX(deflateInit2)(PREFIX3(stream) *strm, int level, int method,
+ int windowBits, int memLevel, int strategy);
+```
+
+**Parameters**:
+- `level` — Compression level (0–9, or `Z_DEFAULT_COMPRESSION = -1`):
+ | Level | Meaning | Strategy Function |
+ |---|---|---|
+ | 0 | No compression | `deflate_stored` |
+ | 1 | Fastest | `deflate_quick` (Intel) |
+ | 2 | Fast | `deflate_fast` |
+ | 3 | Fast | `deflate_fast` |
+ | 4–5 | Medium | `deflate_medium` (Intel) |
+ | 6 | Default | `deflate_medium` (Intel) |
+ | 7 | Medium-slow | `deflate_slow` |
+ | 8–9 | Maximum compression | `deflate_slow` |
+
+- `method` — Always `Z_DEFLATED` (8)
+
+- `windowBits` — Window size and format:
+ | Value | Format |
+ |---|---|
+ | 8..15 | zlib wrapper |
+ | -8..-15 | Raw deflate |
+ | 24..31 (8..15 + 16) | gzip wrapper |
+
+- `memLevel` — Memory usage (1–9, default 8):
+ Controls hash table and buffer sizes. Higher = more memory, potentially
+ better compression.
+
+- `strategy`:
+ | Constant | Value | Description |
+ |---|---|---|
+ | `Z_DEFAULT_STRATEGY` | 0 | Normal compression |
+ | `Z_FILTERED` | 1 | Tuned for filtered/delta data |
+ | `Z_HUFFMAN_ONLY` | 2 | Huffman only, no string matching |
+ | `Z_RLE` | 3 | Run-length encoding only (dist=1) |
+ | `Z_FIXED` | 4 | Fixed Huffman tables only |
+
+### Compression
+
+```c
+int PREFIX(deflate)(PREFIX3(stream) *strm, int flush);
+```
+
+**Flush values**:
+| Constant | Value | Behaviour |
+|---|---|---|
+| `Z_NO_FLUSH` | 0 | Compress as much as possible |
+| `Z_PARTIAL_FLUSH` | 1 | Flush pending output |
+| `Z_SYNC_FLUSH` | 2 | Flush + align to byte boundary |
+| `Z_FULL_FLUSH` | 3 | Flush + reset state (sync point) |
+| `Z_FINISH` | 4 | Finish the stream |
+| `Z_BLOCK` | 5 | Flush to next block boundary |
+| `Z_TREES` | 6 | Flush + emit tree (for debugging) |
+
+**Return codes**:
+| Code | Value | Meaning |
+|---|---|---|
+| `Z_OK` | 0 | Progress made |
+| `Z_STREAM_END` | 1 | All input consumed and flushed |
+| `Z_STREAM_ERROR` | -2 | Invalid state |
+| `Z_BUF_ERROR` | -5 | No progress possible |
+
+### Cleanup
+
+```c
+int PREFIX(deflateEnd)(PREFIX3(stream) *strm);
+```
+
+### Auxiliary
+
+```c
+int PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm,
+ const uint8_t *dictionary, unsigned dictLength);
+int PREFIX(deflateGetDictionary)(PREFIX3(stream) *strm,
+ uint8_t *dictionary, unsigned *dictLength);
+int PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *source);
+int PREFIX(deflateReset)(PREFIX3(stream) *strm);
+int PREFIX(deflateParams)(PREFIX3(stream) *strm, int level, int strategy);
+int PREFIX(deflateTune)(PREFIX3(stream) *strm, int good_length, int max_lazy,
+ int nice_length, int max_chain);
+unsigned long PREFIX(deflateBound)(PREFIX3(stream) *strm, unsigned long sourceLen);
+int PREFIX(deflatePending)(PREFIX3(stream) *strm, unsigned *pending, int *bits);
+int PREFIX(deflatePrime)(PREFIX3(stream) *strm, int bits, int value);
+int PREFIX(deflateSetHeader)(PREFIX3(stream) *strm, PREFIX(gz_headerp) head);
+```
+
+---
+
+## Decompression
+
+### Initialisation
+
+```c
+int PREFIX(inflateInit)(PREFIX3(stream) *strm);
+int PREFIX(inflateInit2)(PREFIX3(stream) *strm, int windowBits);
+```
+
+**`windowBits`**:
+| Value | Format |
+|---|---|
+| 8..15 | zlib |
+| -8..-15 | Raw deflate |
+| 24..31 | gzip only |
+| 40..47 | Auto-detect zlib or gzip |
+
+### Decompression
+
+```c
+int PREFIX(inflate)(PREFIX3(stream) *strm, int flush);
+```
+
+**Flush values for inflate**: `Z_NO_FLUSH`, `Z_SYNC_FLUSH`, `Z_FINISH`,
+`Z_BLOCK`, `Z_TREES`
+
+**Return codes**:
+| Code | Value | Meaning |
+|---|---|---|
+| `Z_OK` | 0 | Progress made |
+| `Z_STREAM_END` | 1 | End of stream reached |
+| `Z_NEED_DICT` | 2 | Dictionary required |
+| `Z_DATA_ERROR` | -3 | Invalid compressed data |
+| `Z_MEM_ERROR` | -4 | Out of memory |
+| `Z_BUF_ERROR` | -5 | No progress possible |
+| `Z_STREAM_ERROR` | -2 | Invalid parameters |
+
+### Cleanup
+
+```c
+int PREFIX(inflateEnd)(PREFIX3(stream) *strm);
+```
+
+### Auxiliary
+
+```c
+int PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm,
+ const uint8_t *dictionary, unsigned dictLength);
+int PREFIX(inflateGetDictionary)(PREFIX3(stream) *strm,
+ uint8_t *dictionary, unsigned *dictLength);
+int PREFIX(inflateSync)(PREFIX3(stream) *strm);
+int PREFIX(inflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *source);
+int PREFIX(inflateReset)(PREFIX3(stream) *strm);
+int PREFIX(inflateReset2)(PREFIX3(stream) *strm, int windowBits);
+int PREFIX(inflatePrime)(PREFIX3(stream) *strm, int bits, int value);
+long PREFIX(inflateMark)(PREFIX3(stream) *strm);
+int PREFIX(inflateGetHeader)(PREFIX3(stream) *strm, PREFIX(gz_headerp) head);
+int PREFIX(inflateBack)(PREFIX3(stream) *strm,
+ in_func in, void *in_desc,
+ out_func out, void *out_desc);
+int PREFIX(inflateBackEnd)(PREFIX3(stream) *strm);
+```
+
+---
+
+## One-Shot Functions
+
+### Compress
+
+```c
+int PREFIX(compress)(uint8_t *dest, size_t *destLen,
+ const uint8_t *source, size_t sourceLen);
+int PREFIX(compress2)(uint8_t *dest, size_t *destLen,
+ const uint8_t *source, size_t sourceLen, int level);
+unsigned long PREFIX(compressBound)(unsigned long sourceLen);
+```
+
+`compress()` uses level `Z_DEFAULT_COMPRESSION`. `compress2()` allows
+specifying the level.
+
+`compressBound()` returns the maximum compressed size for a given source
+length, useful for allocating the output buffer.
+
+### Uncompress
+
+```c
+int PREFIX(uncompress)(uint8_t *dest, size_t *destLen,
+ const uint8_t *source, size_t sourceLen);
+int PREFIX(uncompress2)(uint8_t *dest, size_t *destLen,
+ const uint8_t *source, size_t *sourceLen);
+```
+
+`uncompress2()` also updates `*sourceLen` with the number of source bytes
+consumed.
+
+---
+
+## Checksum Functions
+
+### Adler-32
+
+```c
+unsigned long PREFIX(adler32)(unsigned long adler,
+ const uint8_t *buf, unsigned len);
+unsigned long PREFIX(adler32_z)(unsigned long adler,
+ const uint8_t *buf, size_t len);
+unsigned long PREFIX(adler32_combine)(unsigned long adler1,
+ unsigned long adler2, z_off_t len2);
+```
+
+Initial value: `adler32(0L, Z_NULL, 0)` returns `1`.
+
+### CRC-32
+
+```c
+unsigned long PREFIX(crc32)(unsigned long crc,
+ const uint8_t *buf, unsigned len);
+unsigned long PREFIX(crc32_z)(unsigned long crc,
+ const uint8_t *buf, size_t len);
+unsigned long PREFIX(crc32_combine)(unsigned long crc1,
+ unsigned long crc2, z_off_t len2);
+unsigned long PREFIX(crc32_combine_gen)(z_off_t len2);
+unsigned long PREFIX(crc32_combine_op)(unsigned long crc1,
+ unsigned long crc2, unsigned long op);
+```
+
+Initial value: `crc32(0L, Z_NULL, 0)` returns `0`.
+
+---
+
+## Gzip File Operations
+
+```c
+gzFile PREFIX(gzopen)(const char *path, const char *mode);
+gzFile PREFIX(gzdopen)(int fd, const char *mode);
+int PREFIX(gzbuffer)(gzFile file, unsigned size);
+int PREFIX(gzsetparams)(gzFile file, int level, int strategy);
+
+int PREFIX(gzread)(gzFile file, void *buf, unsigned len);
+size_t PREFIX(gzfread)(void *buf, size_t size, size_t nitems, gzFile file);
+int PREFIX(gzwrite)(gzFile file, const void *buf, unsigned len);
+size_t PREFIX(gzfwrite)(const void *buf, size_t size, size_t nitems, gzFile file);
+int PREFIX(gzprintf)(gzFile file, const char *format, ...);
+int PREFIX(gzputs)(gzFile file, const char *s);
+char *PREFIX(gzgets)(gzFile file, char *buf, int len);
+int PREFIX(gzputc)(gzFile file, int c);
+int PREFIX(gzgetc)(gzFile file);
+int PREFIX(gzungetc)(int c, gzFile file);
+int PREFIX(gzflush)(gzFile file, int flush);
+
+z_off64_t PREFIX(gzseek64)(gzFile file, z_off64_t offset, int whence);
+z_off64_t PREFIX(gztell64)(gzFile file);
+z_off64_t PREFIX(gzoffset64)(gzFile file);
+int PREFIX(gzrewind)(gzFile file);
+int PREFIX(gzeof)(gzFile file);
+int PREFIX(gzdirect)(gzFile file);
+
+int PREFIX(gzclose)(gzFile file);
+int PREFIX(gzclose_r)(gzFile file);
+int PREFIX(gzclose_w)(gzFile file);
+const char *PREFIX(gzerror)(gzFile file, int *errnum);
+void PREFIX(gzclearerr)(gzFile file);
+```
+
+---
+
+## Utility Functions
+
+```c
+const char *PREFIX(zlibVersion)(void);
+const char *PREFIX(zlibng_version)(void); // neozip-specific
+unsigned long PREFIX(zlibCompileFlags)(void);
+const char *PREFIX(zError)(int err);
+```
+
+### `zlibCompileFlags()`
+
+Returns a bitmask indicating compilation options:
+
+| Bit | Meaning |
+|---|---|
+| 0–1 | size of uInt (0=16, 1=32, 2=64) |
+| 2–3 | size of unsigned long |
+| 4–5 | size of void * |
+| 6–7 | size of z_off_t |
+| 8 | Debug build |
+| 9 | Assembly code used |
+| 10 | DYNAMIC_CRC_TABLE |
+| 12 | NO_GZCOMPRESS |
+| 16 | PKZIP_BUG_WORKAROUND |
+| 17 | FASTEST (deflate_fast only) |
+
+---
+
+## Version Constants
+
+```c
+#define ZLIBNG_VERSION "2.3.90"
+#define ZLIBNG_VER_MAJOR 2
+#define ZLIBNG_VER_MINOR 3
+#define ZLIBNG_VER_REVISION 90
+#define ZLIBNG_VER_STATUS 0 // 0=devel, 1=alpha, 2=beta, ...9=release
+
+#define ZLIB_VERSION "1.3.1.zlib-ng" // Compat version
+#define ZLIB_VERNUM 0x1310
+```
+
+---
+
+## Usage Examples
+
+### Basic Compression
+
+```c
+#include <zlib.h> // or <zlib-ng.h> in native mode
+
+void compress_data(const uint8_t *input, size_t input_len) {
+ size_t bound = compressBound(input_len);
+ uint8_t *output = malloc(bound);
+ size_t output_len = bound;
+
+ int ret = compress2(output, &output_len, input, input_len, Z_DEFAULT_COMPRESSION);
+ if (ret != Z_OK) { /* handle error */ }
+
+ // output[0..output_len-1] contains compressed data
+ free(output);
+}
+```
+
+### Streaming Compression
+
+```c
+z_stream strm = {0};
+deflateInit2(&strm, 6, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY);
+// windowBits = 15 + 16 → gzip format
+
+strm.next_in = input;
+strm.avail_in = input_len;
+strm.next_out = output;
+strm.avail_out = output_size;
+
+while (strm.avail_in > 0) {
+ int ret = deflate(&strm, Z_NO_FLUSH);
+ if (ret == Z_STREAM_ERROR) break;
+ // Flush output if avail_out == 0
+}
+
+deflate(&strm, Z_FINISH);
+deflateEnd(&strm);
+```
+
+### Streaming Decompression
+
+```c
+z_stream strm = {0};
+inflateInit2(&strm, 15 + 32); // Auto-detect zlib/gzip
+
+strm.next_in = compressed;
+strm.avail_in = compressed_len;
+strm.next_out = output;
+strm.avail_out = output_size;
+
+int ret;
+do {
+ ret = inflate(&strm, Z_NO_FLUSH);
+ if (ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) break;
+} while (ret != Z_STREAM_END);
+
+inflateEnd(&strm);
+```
+
+### Gzip File I/O
+
+```c
+// Write
+gzFile gz = gzopen("data.gz", "wb9"); // Level 9
+gzwrite(gz, data, data_len);
+gzclose(gz);
+
+// Read
+gzFile gz = gzopen("data.gz", "rb");
+char buf[4096];
+int n;
+while ((n = gzread(gz, buf, sizeof(buf))) > 0) {
+ // Process buf[0..n-1]
+}
+gzclose(gz);
+```
diff --git a/docs/handbook/neozip/architecture.md b/docs/handbook/neozip/architecture.md
new file mode 100644
index 0000000000..377773eec5
--- /dev/null
+++ b/docs/handbook/neozip/architecture.md
@@ -0,0 +1,1075 @@
+# Neozip Architecture
+
+## Overview
+
+Neozip is structured as a layered compression library. At the highest level,
+public API functions in `compress.c`, `uncompr.c`, `gzlib.c`, `gzread.c`, and
+`gzwrite.c` provide simple interfaces. These delegate to the core streaming
+engine implemented in `deflate.c` and `inflate.c`. The streaming engine in
+turn relies on Huffman tree management (`trees.c`), hash table operations
+(`insert_string.c`), match finding (`match_tpl.h`), and integrity checking
+(`adler32.c`, `crc32.c`). At the bottom layer, architecture-specific SIMD
+implementations in `arch/` provide hardware-accelerated versions of
+performance-critical functions, selected at runtime by the dispatch table in
+`functable.c`.
+
+---
+
+## Module Dependency Graph
+
+```
+Application
+ │
+ ├─── compress.c / uncompr.c (one-shot API)
+ ├─── gzlib.c / gzread.c / gzwrite.c (gzip file I/O)
+ │
+ └─── deflate.c ──────── inflate.c (streaming core)
+ │ │
+ ├── deflate_fast.c │
+ ├── deflate_quick.c│
+ ├── deflate_medium.c
+ ├── deflate_slow.c │
+ ├── deflate_stored.c
+ ├── deflate_huff.c │
+ ├── deflate_rle.c │
+ │ ├── inftrees.c (code table builder)
+ │ ├── infback.c (callback inflate)
+ │ │
+ ├── trees.c ───────┘
+ │
+ ├── insert_string.c / match_tpl.h
+ │
+ ├── adler32.c ──── crc32.c (checksums)
+ │
+ └── functable.c ── cpu_features.c (dispatch)
+ │
+ └── arch/
+ ├── generic/
+ ├── x86/
+ ├── arm/
+ ├── power/
+ ├── s390/
+ ├── riscv/
+ └── loongarch/
+```
+
+---
+
+## Source File Reference
+
+### Public API Layer
+
+#### `compress.c`
+
+Implements the one-shot `compress()` and `compress2()` functions. These
+create a temporary `z_stream`, call `deflateInit()`, feed all input via a
+single `deflate(Z_FINISH)` loop, and call `deflateEnd()`. The function
+also provides `compressBound()` which returns the maximum compressed size
+for a given input length.
+
+```c
+z_int32_t Z_EXPORT PREFIX(compress2)(
+ unsigned char *dest, z_uintmax_t *destLen,
+ const unsigned char *source, z_uintmax_t sourceLen,
+ z_int32_t level);
+```
+
+The `DEFLATE_BOUND_COMPLEN` macro allows architecture-specific overrides
+(used by IBM z DFLTCC).
+
+#### `uncompr.c`
+
+Implements `uncompress()` and `uncompress2()`. Creates a temporary `z_stream`,
+calls `inflateInit()`, feeds all input, returns the decompressed data. If
+`*destLen` is zero, a 1-byte dummy buffer is used to detect incomplete streams.
+
+```c
+z_int32_t Z_EXPORT PREFIX(uncompress2)(
+ unsigned char *dest, z_uintmax_t *destLen,
+ const unsigned char *source, z_uintmax_t *sourceLen);
+```
+
+#### `gzlib.c`
+
+Common code for gzip file operations. Manages the `gz_state` structure
+(defined in `gzguts.h`), which wraps a `z_stream` with file descriptor,
+buffers, and state tracking.
+
+Key functions:
+- `gz_state_init()` — Allocates and zeroes a `gz_state`
+- `gz_reset()` — Resets read/write state
+- `gz_buffer_alloc()` — Allocates aligned I/O buffers (input is doubled for
+ write mode, output is doubled for read mode)
+- `gz_open()` — Opens a file, detects mode, initialises state
+- `PREFIX(gzerror)()` — Returns error string and code
+- `PREFIX(gzclearerr)()` — Clears error and EOF flags
+
+The gzip file state (`gz_state`) tracks:
+```c
+typedef struct {
+ struct gzFile_s x; // exposed: have, next, pos
+ int mode; // GZ_NONE, GZ_READ, GZ_WRITE
+ int fd; // file descriptor
+ char *path; // for error messages
+ unsigned size; // buffer size (0 = not allocated yet)
+ unsigned want; // requested buffer size (default GZBUFSIZE=131072)
+ unsigned char *in; // input buffer
+ unsigned char *out; // output buffer
+ int direct; // 0=gzip, 1=transparent copy
+ int how; // LOOK, COPY, GZIP
+ z_off64_t start; // where gzip data starts
+ int eof; // end of input file reached
+ int past; // read past end
+ int level; // compression level
+ int strategy; // compression strategy
+ // ...
+} gz_state;
+```
+
+#### `gzread.c`
+
+Implements `gzread()`, `gzgets()`, `gzgetc()`, `gzungetc()`, `gzclose_r()`.
+Uses a lazy initialization pattern — buffers and inflate state are not
+allocated until the first read.
+
+The read pipeline:
+1. `gz_look()` — Detect gzip header or transparent mode
+2. `gz_decomp()` — Call `inflate()` to decompress
+3. `gz_fetch()` — Fill output buffer
+4. `gz_read()` — Copy from output buffer to user buffer
+
+#### `gzwrite.c`
+
+Implements `gzwrite()`, `gzprintf()`, `gzputc()`, `gzputs()`, `gzflush()`,
+`gzclose_w()`. Write initialisation is also lazy.
+
+The write pipeline:
+1. `gz_write_init()` — Allocate buffers, call `deflateInit2()` with `MAX_WBITS + 16`
+ (gzip wrapper)
+2. `gz_comp()` — Call `deflate()` to compress, write to file descriptor
+3. `gz_zero()` — Seek support: write zero bytes as padding
+
+---
+
+### Core Compression Engine
+
+#### `deflate.c`
+
+The central compression module. Contains:
+
+- **`deflateInit2()`** — Main initialisation. Allocates the unified buffer via
+ `alloc_deflate()`, initialises the `deflate_state`, sets up the hash table
+ and window, selects the strategy function based on level.
+
+- **`deflate()`** — Main compression entry point. Handles the state machine
+ for header emission (zlib or gzip), calls the selected strategy function
+ to process input, manages flushing and stream completion.
+
+- **`deflateEnd()`** — Frees all resources via `free_deflate()`.
+
+- **`deflateReset()`** — Resets state for reuse without reallocation.
+
+- **`deflateParams()`** — Changes compression level and strategy mid-stream.
+ Flushes the current block if the strategy function changes.
+
+- **`deflateBound()`** — Returns worst-case compressed size.
+
+- **`deflatePending()`** — Reports pending output bytes.
+
+- **`deflateSetDictionary()`** — Loads a preset dictionary.
+
+- **`deflateCopy()`** — Deep-copies a deflate stream.
+
+- **`fill_window()`** — Slides the window and reads input into the available
+ space. Updates hash entries via `slide_hash()`.
+
+- **`alloc_deflate()`** — Single-allocation buffer partitioning.
+
+- **`free_deflate()`** — Single-free cleanup.
+
+- **`lm_init()`** — Initialises match-finding parameters from the
+ `configuration_table`.
+
+- **`lm_set_level()`** — Updates parameters when level changes.
+
+##### The `deflate_state` Structure
+
+The `internal_state` (aliased as `deflate_state`) is the central data
+structure, carefully laid out for cache performance with `ALIGNED_(64)`:
+
+```c
+struct ALIGNED_(64) internal_state {
+ // Cacheline 0
+ PREFIX3(stream) *strm; // Back-pointer to z_stream
+ unsigned char *pending_buf; // Output pending buffer
+ unsigned char *pending_out; // Next pending byte to output
+ uint32_t pending_buf_size;
+ uint32_t pending; // Bytes in pending buffer
+ int wrap; // bit 0: zlib, bit 1: gzip
+ uint32_t gzindex; // Position in gzip extra/name/comment
+ PREFIX(gz_headerp) gzhead; // gzip header info
+ int status; // INIT_STATE, GZIP_STATE, BUSY_STATE, etc.
+ int last_flush;
+ int reproducible;
+
+ // Cacheline 1
+ unsigned int lookahead; // Valid bytes ahead in window
+ unsigned int strstart; // Start of string to insert
+ unsigned int w_size; // LZ77 window size (32K default)
+ int block_start; // Window position at block start
+ unsigned int high_water; // Initialised bytes high water mark
+ unsigned int window_size; // 2 * w_size
+ unsigned char *window; // Sliding window
+ Pos *prev; // Hash chain links
+ Pos *head; // Hash chain heads
+ uint32_t ins_h; // Hash index of string
+
+ // Match state
+ unsigned int match_length; // Best match length
+ int match_available;// Previous match exists flag
+ uint32_t prev_match; // Previous match position
+ unsigned int match_start; // Start of matching string
+ unsigned int prev_length; // Best match at previous step
+ unsigned int max_chain_length;
+ unsigned int max_lazy_match;
+
+ // Parameters
+ int level;
+ int strategy;
+ unsigned int good_match;
+ int nice_match;
+ unsigned int matches;
+ unsigned int insert;
+
+ // Bit buffer
+ uint64_t bi_buf; // 64-bit output buffer
+ int32_t bi_valid; // Valid bits in bi_buf
+
+ // Huffman trees
+ struct ct_data_s dyn_ltree[HEAP_SIZE]; // Literal/length tree
+ struct ct_data_s dyn_dtree[2*D_CODES+1]; // Distance tree
+ struct ct_data_s bl_tree[2*BL_CODES+1]; // Bit-length tree
+
+ struct tree_desc_s l_desc, d_desc, bl_desc;
+ uint16_t bl_count[MAX_BITS+1];
+ int heap[2*L_CODES+1];
+ unsigned char depth[2*L_CODES+1];
+
+ // Symbol buffer
+ unsigned int lit_bufsize;
+#ifdef LIT_MEM
+ uint16_t *d_buf;
+ unsigned char *l_buf;
+#else
+ unsigned char *sym_buf;
+#endif
+ unsigned int sym_next;
+ unsigned int sym_end;
+
+ unsigned int opt_len;
+ unsigned int static_len;
+};
+```
+
+The state machine transitions are:
+
+```
+INIT_STATE → BUSY_STATE → FINISH_STATE
+ ↓
+GZIP_STATE → EXTRA_STATE → NAME_STATE → COMMENT_STATE → HCRC_STATE → BUSY_STATE
+```
+
+#### `deflate.h`
+
+Defines the `deflate_state` (as `internal_state`), the `ct_data` union type
+for Huffman tree nodes, the `tree_desc` descriptor, the `block_state` enum,
+and all compression-related constants:
+
+```c
+#define LENGTH_CODES 29 // Number of length codes
+#define LITERALS 256 // Number of literal bytes
+#define L_CODES (LITERALS+1+LENGTH_CODES) // 286
+#define D_CODES 30 // Number of distance codes
+#define BL_CODES 19 // Bit-length codes
+#define HEAP_SIZE (2*L_CODES+1) // 573
+#define BIT_BUF_SIZE 64 // 64-bit bit buffer
+#define END_BLOCK 256 // End-of-block literal code
+#define HASH_BITS 16u
+#define HASH_SIZE 65536u
+```
+
+#### `deflate_p.h`
+
+Private inline functions shared across strategy files:
+
+- **`zng_tr_tally_lit()`** — Records a literal byte. Stores into `sym_buf` or
+ `l_buf`/`d_buf`, increments frequency counter.
+- **`zng_tr_tally_dist()`** — Records a length/distance pair. Computes the
+ length code via `zng_length_code[]` and distance code via `zng_dist_code[]`.
+- **`check_match()`** — Debug-only match validation.
+- **`flush_pending()`** — Copies pending output to `strm->next_out`.
+
+The tally functions determine when a block should be flushed by checking
+`sym_next == sym_end`.
+
+---
+
+### Strategy Implementations
+
+#### `deflate_quick.c`
+
+The **quick** strategy (level 1) was contributed by Intel. It uses **static
+Huffman trees** exclusively (no dynamic tree construction), performing a
+single-pass greedy match search.
+
+Key characteristics:
+- Emits and receives static tree blocks via `zng_tr_emit_tree()` /
+ `zng_tr_emit_end_block()`
+- Uses `quick_insert_value()` for single-shot hash insertion
+- Match finding via `FUNCTABLE_CALL(longest_match)`
+- No lazy evaluation
+- Block management tracks `block_open` state (0=closed, 1=open, 2=open+last)
+
+```c
+Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
+ // Emit static Huffman tree block header
+ // For each position:
+ // - Hash insert current position
+ // - If match found: emit length/distance via static codes
+ // - Else: emit literal via static code
+ // Flush when pending is near full
+}
+```
+
+#### `deflate_fast.c`
+
+The **fast** strategy (level 2, or 1–3 without quick) uses **greedy matching**
+with no lazy evaluation:
+
+1. Fill window if `lookahead < MIN_LOOKAHEAD`
+2. Insert current string hash via `quick_insert_value()`
+3. If hash chain has a match within `MAX_DIST`, call `longest_match()`
+4. If match ≥ `WANT_MIN_MATCH` (4): emit distance/length, advance by match length,
+ insert all strings within the match
+5. Else: emit literal, advance by 1
+6. Flush block when symbol buffer is full
+
+#### `deflate_medium.c`
+
+The **medium** strategy (levels 3–6) was contributed by Intel (Arjan van de Ven).
+It provides a balance between speed and compression:
+
+1. Two-match lookahead: finds the best match at the current position and the
+ next position
+2. Uses a `struct match` to track `match_start`, `match_length`, `strstart`,
+ `orgstart`
+3. `find_best_match()` — calls `longest_match()` and evaluates match quality
+4. `emit_match()` — emits literals for short matches (< `WANT_MIN_MATCH`) or
+ distance/length for longer ones
+5. `insert_match()` — inserts hash entries for matched strings
+6. Limited lazy evaluation based on comparing current and next match lengths
+
+#### `deflate_slow.c`
+
+The **slow** strategy (levels 7–9) performs full **lazy match evaluation**:
+
+1. At each position, find the longest match
+2. Instead of immediately emitting it, advance one position and find another match
+3. If the new match is better, discard the previous one (lazy evaluation)
+4. Level 9 uses `longest_match_slow` (the `LONGEST_MATCH_SLOW` variant) with
+ `insert_string_roll` for deeper hash chain traversal
+
+```c
+// Level ≥ 9: use slow match and rolling insert
+if (level >= 9) {
+ longest_match = FUNCTABLE_FPTR(longest_match_slow);
+ insert_string_func = insert_string_roll;
+} else {
+ longest_match = FUNCTABLE_FPTR(longest_match);
+ insert_string_func = insert_string;
+}
+```
+
+The lazy evaluation logic:
+```c
+if (s->prev_length >= STD_MIN_MATCH && match_len <= s->prev_length) {
+ // Previous match was better — emit it
+ bflush = zng_tr_tally_dist(s, s->strstart - 1 - s->prev_match,
+ s->prev_length - STD_MIN_MATCH);
+ s->prev_length -= 1;
+ s->lookahead -= s->prev_length;
+ // Insert strings for the matched region
+}
+```
+
+#### `deflate_stored.c`
+
+The **stored** strategy (level 0) copies input without compression:
+
+- Emits stored block headers: `BFINAL` flag + 16-bit `LEN` + 16-bit `NLEN`
+- Directly copies from `next_in` to `next_out` when possible
+- Falls back to copying through the window when direct copy isn't possible
+- Tracks hash table state for potential `deflateParams()` level changes
+
+#### `deflate_huff.c`
+
+The **Huffman-only** strategy emits every byte as a literal with no LZ77
+matching. Used with `Z_HUFFMAN_ONLY` strategy:
+
+```c
+Z_INTERNAL block_state deflate_huff(deflate_state *s, int flush) {
+ for (;;) {
+ if (s->lookahead == 0) {
+ PREFIX(fill_window)(s);
+ if (s->lookahead == 0) {
+ if (flush == Z_NO_FLUSH) return need_more;
+ break;
+ }
+ }
+ bflush = zng_tr_tally_lit(s, s->window[s->strstart]);
+ s->lookahead--;
+ s->strstart++;
+ if (bflush) FLUSH_BLOCK(s, 0);
+ }
+}
+```
+
+#### `deflate_rle.c`
+
+The **RLE** strategy only searches for runs of identical bytes (distance = 1):
+
+- Does not use hash tables
+- Compares `scan[0] == scan[1] && scan[1] == scan[2]` to detect a run start
+- Uses `compare256_rle()` to find run length
+- Emits `zng_tr_tally_dist(s, 1, match_len - STD_MIN_MATCH)`
+
+---
+
+### Core Decompression Engine
+
+#### `inflate.c`
+
+The main decompression module, implementing a large state machine. Key functions:
+
+- **`inflateInit2()`** — Allocates unified buffer via `alloc_inflate()`,
+ initialises `inflate_state`, sets window bits and wrap mode.
+- **`inflate()`** — Main decompression entry point. A massive `switch` over
+ `inflate_mode` enum values.
+- **`inflateEnd()`** — Frees resources via `free_inflate()`.
+- **`inflateReset()`** — Resets state for reuse.
+- **`inflateSetDictionary()`** — Loads a preset dictionary.
+- **`inflateSync()`** — Searches for a valid sync point in corrupted data.
+- **`inflateCopy()`** — Deep-copies an inflate stream.
+- **`inflateMark()`** — Reports decompression progress.
+- **`updatewindow()`** — Copies decompressed data to the sliding window
+ for back-reference support.
+
+#### `inflate.h`
+
+Defines the `inflate_state` structure and the `inflate_mode` enum.
+
+The inflate state machine has these mode categories:
+
+1. **Header processing**: `HEAD → FLAGS → TIME → OS → EXLEN → EXTRA → NAME → COMMENT → HCRC → TYPE` (gzip) or `HEAD → DICTID/TYPE` (zlib) or `HEAD → TYPEDO` (raw)
+2. **Block reading**: `TYPE → TYPEDO → STORED/TABLE/LEN_/CHECK`
+3. **Data decoding**: `LEN → LENEXT → DIST → DISTEXT → MATCH`, `LIT → LEN`
+4. **Trailer**: `CHECK → LENGTH → DONE`
+
+```c
+struct ALIGNED_(64) inflate_state {
+ PREFIX3(stream) *strm;
+ inflate_mode mode;
+ int last; // Processing last block?
+ int wrap; // zlib/gzip/raw mode
+ int havedict;
+ int flags; // gzip header flags
+ unsigned long check; // Running checksum
+ unsigned long total; // Running output count
+
+ // Sliding window
+ unsigned wbits; // log2(window size)
+ uint32_t wsize;
+ uint32_t whave; // Valid bytes in window
+ uint32_t wnext; // Write index
+ unsigned char *window;
+
+ // Bit accumulator
+ uint64_t hold; // 64-bit input bit accumulator
+ unsigned bits; // Bits in hold
+
+ // Code tables
+ unsigned lenbits; // Index bits for length codes
+ code const *lencode; // Length/literal code table
+ code const *distcode; // Distance code table
+ unsigned distbits; // Index bits for distance codes
+
+ // Dynamic table building
+ unsigned ncode, nlen, ndist;
+ uint32_t have;
+ code *next;
+
+ uint16_t lens[320]; // Code lengths
+ uint16_t work[288]; // Work area
+ code codes[ENOUGH]; // Code tables (ENOUGH = 1924)
+};
+```
+
+#### `inflate_p.h`
+
+Private inflate helpers including checksum computation wrappers:
+
+```c
+static inline void inf_chksum_cpy(PREFIX3(stream) *strm, uint8_t *dst,
+ const uint8_t *src, uint32_t copy) {
+ struct inflate_state *state = (struct inflate_state*)strm->state;
+ if (state->flags) // gzip mode
+ strm->adler = state->check = FUNCTABLE_CALL(crc32_copy)(...);
+ else // zlib mode
+ strm->adler = state->check = FUNCTABLE_CALL(adler32_copy)(...);
+}
+```
+
+#### `inftrees.c` / `inftrees.h`
+
+Builds Huffman decoding tables for inflate. The `code` structure:
+
+```c
+typedef struct {
+ unsigned char bits; // Bits in this code part
+ unsigned char op; // Operation: literal, table link, length/distance, EOB, invalid
+ uint16_t val; // Value or table offset
+} code;
+```
+
+`zng_inflate_table()` constructs a two-level table from code lengths:
+
+```c
+int zng_inflate_table(codetype type, uint16_t *lens, unsigned codes,
+ code **table, unsigned *bits, uint16_t *work);
+```
+
+The `ENOUGH` constant (1924) is the proven maximum table size:
+- `ENOUGH_LENS = 1332` for literal/length codes (286 symbols, root 10 bits, max 15 bits)
+- `ENOUGH_DISTS = 592` for distance codes (30 symbols, root 9 bits, max 15 bits)
+
+#### `inffast_tpl.h`
+
+Template for the fast inflate inner loop. Processes literal/length codes
+without returning to the main `inflate()` state machine loop, significantly
+reducing overhead for long runs of data. Architecture-specific versions
+(`inflate_fast_sse2`, `inflate_fast_avx2`, etc.) instantiate this template
+with SIMD chunk copying.
+
+#### `infback.c`
+
+An alternative inflate interface where the caller provides input/output
+callbacks instead of managing buffers directly. Used by specialised
+applications that need fine-grained control over I/O.
+
+---
+
+### Huffman Tree Management
+
+#### `trees.c`
+
+Constructs and emits Huffman trees for deflate output. Key functions:
+
+- **`zng_tr_init()`** — Initialises tree descriptors
+- **`init_block()`** — Zeroes frequency counts for a new block
+- **`build_tree()`** — Constructs a Huffman tree from frequency data
+- **`gen_bitlen()`** — Generates optimal bit lengths
+- **`scan_tree()`** — Scans tree for repeat counts (for code length encoding)
+- **`send_tree()`** — Emits encoded tree structure
+- **`build_bl_tree()`** — Builds the bit-length tree for encoding the main trees
+- **`send_all_trees()`** — Emits all three trees (literal, distance, bit-length)
+- **`compress_block()`** — Emits compressed data using the specified trees
+- **`detect_data_type()`** — Heuristic for binary vs. text classification
+- **`zng_tr_flush_block()`** — Decides between stored, static, or dynamic blocks
+- **`zng_tr_align()`** — Pads to byte boundary between blocks
+
+Three types of deflate blocks:
+1. **Stored** (`STORED_BLOCK = 0`) — No compression
+2. **Static trees** (`STATIC_TREES = 1`) — Fixed, predefined Huffman codes
+3. **Dynamic trees** (`DYN_TREES = 2`) — Custom trees optimised for the data
+
+#### `trees.h`
+
+Declares tree-related constants and function prototypes:
+
+```c
+#define DIST_CODE_LEN 512
+#define MAX_BL_BITS 7
+```
+
+#### `trees_emit.h`
+
+Inline functions and macros for bit-level output:
+
+- **`send_bits()`** — Packs bits into the 64-bit `bi_buf`, flushing when full
+- **`send_code()`** — Emits a Huffman code using `send_bits()`
+- **`bi_windup()`** — Flushes remaining bits and aligns to byte boundary
+- **`zng_tr_emit_tree()`** — Emits block type marker
+- **`zng_tr_emit_end_block()`** — Emits end-of-block code
+- **`zng_tr_emit_lit()`** — Emits a literal using tree lookup
+- **`zng_tr_emit_dist()`** — Emits a length/distance pair
+
+The 64-bit bit buffer:
+```c
+#define send_bits(s, t_val, t_len, bi_buf, bi_valid) { \
+ uint64_t val = (uint64_t)t_val; \
+ uint32_t len = (uint32_t)t_len; \
+ uint32_t total_bits = bi_valid + len; \
+ if (total_bits < BIT_BUF_SIZE && bi_valid < BIT_BUF_SIZE) { \
+ bi_buf |= val << bi_valid; \
+ bi_valid = total_bits; \
+ } else { \
+ /* flush and continue */ \
+ } \
+}
+```
+
+#### `trees_tbl.h`
+
+Precomputed static Huffman tree tables:
+- `static_ltree[L_CODES+2]` — Static literal/length tree values
+- `static_dtree[D_CODES]` — Static distance tree values
+- `zng_dist_code[DIST_CODE_LEN]` — Distance to distance-code mapping
+- `zng_length_code[STD_MAX_MATCH-STD_MIN_MATCH+1]` — Length to length-code mapping
+- `extra_lbits[]`, `extra_dbits[]`, `extra_blbits[]` — Extra bits per code
+- `lbase_extra[]`, `dbase_extra[]` — Combined base+extra tables for single-lookup
+
+---
+
+### Hash Table and Match Finding
+
+#### `insert_string.c`
+
+Implements hash table insert operations for deflate. Two variants:
+- **`insert_string()`** — Standard insert for levels 1–8
+- **`insert_string_roll()`** — Rolling insert for level 9
+
+#### `insert_string_tpl.h`
+
+Template for hash table operations:
+
+```c
+Z_FORCEINLINE static uint32_t UPDATE_HASH(uint32_t h, uint32_t val) {
+ HASH_CALC(h, val);
+ return h & HASH_CALC_MASK;
+}
+
+Z_FORCEINLINE static uint32_t QUICK_INSERT_VALUE(deflate_state *const s,
+ uint32_t str, uint32_t val) {
+ // Compute hash, insert into head[], update prev[]
+ hm = HASH_CALC_VAR & HASH_CALC_MASK;
+ head = s->head[hm];
+ if (LIKELY(head != str)) {
+ s->prev[str & W_MASK(s)] = (Pos)head;
+ s->head[hm] = (Pos)str;
+ }
+ return head;
+}
+```
+
+#### `insert_string_p.h`
+
+Private header that includes `insert_string_tpl.h` and defines the hash
+function. The hash is computed from 4 bytes read at the current position.
+
+#### `match_tpl.h`
+
+Template for the `longest_match()` function family. Two variants are
+generated:
+- **`longest_match()`** — Standard match for levels 1–8
+- **`longest_match_slow()`** — Enhanced match for level 9 with re-rooting
+ of hash chains
+
+The match finding algorithm:
+1. Read 8 bytes at scan start and scan+offset for quick rejection
+2. Walk the hash chain (`prev[]` array)
+3. For each candidate, compare first/last bytes for early rejection
+4. Call `compare256()` to find actual match length
+5. Update best match if this one is longer
+6. Stop when chain length exhausted or `nice_match` reached
+
+```c
+Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
+ // ...
+ while (--chain_length) {
+ match = mbase_start + cur_match;
+ // Quick rejection: compare last bytes
+ if (zng_memread_8(match+offset) == scan_end &&
+ zng_memread_8(match) == scan_start) {
+ // Full comparison
+ len = compare256(scan+2, match+2) + 2;
+ if (len > best_len) {
+ s->match_start = cur_match;
+ best_len = len;
+ if (best_len >= nice_match) return best_len;
+ // Update scan_end for new best
+ }
+ }
+ cur_match = prev[cur_match & wmask];
+ }
+}
+```
+
+---
+
+### Checksum Implementations
+
+#### `adler32.c`
+
+Entry point for Adler-32 checksum computation. Dispatches to
+`FUNCTABLE_CALL(adler32)`:
+
+```c
+uint32_t Z_EXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) {
+ if (buf == NULL) return ADLER32_INITIAL_VALUE;
+ return FUNCTABLE_CALL(adler32)(adler, buf, len);
+}
+```
+
+Also provides `adler32_combine()` for concatenating checksums without
+re-reading data.
+
+#### `adler32_p.h`
+
+Scalar Adler-32 implementation using unrolled loops:
+
+```c
+#define BASE 65521U // Largest prime < 65536
+#define NMAX 5552 // Largest n such that 255n(n+1)/2 + (n+1)(BASE-1) ≤ 2^32-1
+
+#define ADLER_DO1(sum1, sum2, buf, i) {(sum1) += buf[(i)]; (sum2) += (sum1);}
+#define ADLER_DO16(sum1, sum2, buf) {ADLER_DO8(sum1,sum2,buf,0); ADLER_DO8(sum1,sum2,buf,8);}
+```
+
+The Adler-32 value has two 16-bit halves:
+- `sum1` (lower 16 bits) = running sum of bytes mod `BASE`
+- `sum2` (upper 16 bits) = running sum of `sum1` values mod `BASE`
+
+#### `crc32.c`
+
+Entry point for CRC-32 computation. Dispatches to `FUNCTABLE_CALL(crc32)`:
+
+```c
+uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
+ if (buf == NULL) return CRC32_INITIAL_VALUE;
+ return FUNCTABLE_CALL(crc32)(crc, buf, len);
+}
+```
+
+#### `crc32_braid_p.h`
+
+Configures the **braided** CRC algorithm:
+
+```c
+#define BRAID_N 5 // Number of braids (interleaved CRC computations)
+#define BRAID_W 8 // Word width (8 bytes on 64-bit, 4 on 32-bit)
+#define POLY 0xedb88320 // CRC polynomial (reflected)
+```
+
+#### `crc32_braid_comb.c`
+
+Implements CRC-32 combination for `crc32_combine()`, allowing merging of
+checksums from independently-processed data segments.
+
+#### `crc32_chorba_p.h`
+
+The **Chorba** CRC-32 algorithm — a modern approach using pipeline-friendly
+interleaved computation, contributed by Kadatch & Jenkins (2010).
+
+---
+
+### Runtime Dispatch
+
+#### `cpu_features.c` / `cpu_features.h`
+
+Portable CPU feature detection. `cpu_check_features()` fills a
+`struct cpu_features` by calling the appropriate architecture-specific
+detector:
+
+```c
+void cpu_check_features(struct cpu_features *features) {
+ memset(features, 0, sizeof(struct cpu_features));
+#if defined(X86_FEATURES)
+ x86_check_features(&features->x86);
+#elif defined(ARM_FEATURES)
+ arm_check_features(&features->arm);
+ // ... etc.
+}
+```
+
+The `cpu_features` union:
+```c
+struct cpu_features {
+#if defined(X86_FEATURES)
+ struct x86_cpu_features x86;
+#elif defined(ARM_FEATURES)
+ struct arm_cpu_features arm;
+ // ...
+};
+```
+
+#### `functable.c` / `functable.h`
+
+The function dispatch table. All performance-critical functions are called
+through `functable`:
+
+```c
+struct functable_s {
+ int (* force_init) (void);
+ uint32_t (* adler32) (uint32_t, const uint8_t*, size_t);
+ uint32_t (* adler32_copy) (uint32_t, uint8_t*, const uint8_t*, size_t);
+ uint8_t* (* chunkmemset_safe) (uint8_t*, uint8_t*, size_t, size_t);
+ uint32_t (* compare256) (const uint8_t*, const uint8_t*);
+ uint32_t (* crc32) (uint32_t, const uint8_t*, size_t);
+ uint32_t (* crc32_copy) (uint32_t, uint8_t*, const uint8_t*, size_t);
+ void (* inflate_fast) (PREFIX3(stream)*, uint32_t);
+ uint32_t (* longest_match) (deflate_state*, uint32_t);
+ uint32_t (* longest_match_slow) (deflate_state*, uint32_t);
+ void (* slide_hash) (deflate_state*);
+};
+```
+
+Initialisation uses a cascading priority system: start with generic C
+fallbacks, then override with progressively better SIMD implementations
+based on detected features:
+
+```
+Generic C → SSE2 → SSSE3 → SSE4.2 → AVX2 → AVX-512 → VNNI
+Generic C → NEON → CRC32 → PMULL+EOR3
+Generic C → VMX → POWER8 → POWER9
+```
+
+Thread safety is ensured by atomic pointer stores and memory barriers.
+
+When `DISABLE_RUNTIME_CPU_DETECTION` is defined, all dispatch is resolved
+at compile time via `native_` prefixed macros.
+
+---
+
+### Build System Infrastructure
+
+#### `zbuild.h`
+
+Central build-system header. Defines:
+- POSIX feature test macros
+- Compiler attribute wrappers (`Z_TARGET`, `Z_FORCEINLINE`, `Z_FALLTHROUGH`)
+- `ssize_t` definition for MSVC
+- Platform-specific `Z_EXPORT` / `Z_INTERNAL` visibility macros
+- `ALIGNED_()` macro for struct alignment
+
+#### `zutil.h`
+
+Internal utility header. Defines:
+- `STD_MIN_MATCH = 3`, `STD_MAX_MATCH = 258`
+- `WANT_MIN_MATCH = 4` (internal performance optimisation)
+- `DEF_WBITS = MAX_WBITS`
+- Block type constants: `STORED_BLOCK = 0`, `STATIC_TREES = 1`, `DYN_TREES = 2`
+- Error messages array `z_errmsg[]`
+- Initial checksum values: `ADLER32_INITIAL_VALUE = 1`, `CRC32_INITIAL_VALUE = 0`
+- Wrapper overhead: `ZLIB_WRAPLEN = 6`, `GZIP_WRAPLEN = 18`
+- Deflate block overhead constants
+
+#### `zutil.c`
+
+Implements `z_errmsg[]`, default `zcalloc()` / `zcfree()` allocators, and
+`zlibCompileFlags()`.
+
+#### `zendian.h`
+
+Endianness detection and byte-swap macros for little-endian / big-endian architectures.
+
+#### `zmemory.h`
+
+Provides portable aligned memory read/write functions: `zng_memread_2()`,
+`zng_memread_4()`, `zng_memread_8()`, `zng_memwrite_2()`, `zng_memwrite_4()`.
+
+#### `zarch.h`
+
+Architecture detection macros. Identifies the target CPU family and word
+size (`ARCH_32BIT` / `ARCH_64BIT`), optimal comparison width (`OPTIMAL_CMP`).
+
+#### `arch_functions.h`
+
+Includes the appropriate `arch/<platform>/<platform>_functions.h` header
+to declare architecture-specific function variants.
+
+#### `arch_natives.h`
+
+Includes the appropriate `arch/<platform>/<platform>_natives.h` header
+to define `native_` macros for compile-time dispatch.
+
+---
+
+### Architecture-Specific Implementations
+
+Each directory under `arch/` follows the same pattern:
+
+```
+arch/<platform>/
+├── <platform>_features.c / .h # CPU feature detection
+├── <platform>_functions.h # Function declarations
+├── <platform>_natives.h # Compile-time dispatch macros
+├── adler32_*.c # SIMD Adler-32
+├── crc32_*.c # SIMD CRC-32
+├── chunkset_*.c # SIMD memory set (for inflate)
+├── compare256_*.c # SIMD 256-byte comparison (for match finding)
+├── slide_hash_*.c # SIMD hash table sliding
+└── Makefile.in # Makefile fragment
+```
+
+#### `arch/generic/`
+
+Portable C fallback implementations:
+- `adler32_c.c` — Scalar Adler-32
+- `crc32_braid_c.c` — Braided CRC-32
+- `crc32_chorba_c.c` — Chorba CRC-32 (generic)
+- `compare256_c.c` — Byte-by-byte / word comparison
+- `chunkset_c.c` — Scalar chunk copy for inflate
+- `slide_hash_c.c` — Scalar hash table slide
+
+These are always compiled and serve as the baseline when no SIMD is available.
+
+#### `arch/x86/`
+
+x86 SIMD implementations spanning SSE2 through AVX-512:
+- `adler32_ssse3.c`, `adler32_avx2.c`, `adler32_avx512.c`, `adler32_avx512_vnni.c`, `adler32_sse42.c`
+- `crc32_pclmulqdq.c`, `crc32_vpclmulqdq_avx2.c`, `crc32_vpclmulqdq_avx512.c`
+- `crc32_chorba_sse2.c`, `crc32_chorba_sse41.c`
+- `compare256_sse2.c`, `compare256_avx2.c`, `compare256_avx512.c`
+- `chunkset_sse2.c`, `chunkset_ssse3.c`, `chunkset_avx2.c`, `chunkset_avx512.c`
+- `slide_hash_sse2.c`, `slide_hash_avx2.c`
+- `x86_features.c` — CPUID-based feature detection
+
+#### `arch/arm/`
+
+ARM SIMD implementations:
+- `adler32_neon.c` — NEON Adler-32
+- `crc32_armv8.c` — Hardware CRC-32 instructions
+- `crc32_armv8_pmull_eor3.c` — PMULL polynomial multiply with EOR3
+- `compare256_neon.c` — NEON comparison
+- `chunkset_neon.c` — NEON chunk copy
+- `slide_hash_neon.c` — NEON hash slide
+- `slide_hash_armv6.c` — ARMv6 SIMD hash slide
+- `arm_features.c` — Runtime feature detection
+
+---
+
+## Data Flow
+
+### Compression Data Flow
+
+```
+User calls deflate(strm, flush)
+ │
+ ├─ Header emission (if status == INIT_STATE or GZIP_STATE)
+ │ └─ zlib: CMF + FLG bytes
+ │ └─ gzip: ID1+ID2+CM+FLG+MTIME+XFL+OS + optional fields
+ │
+ ├─ Strategy function call (e.g., deflate_slow)
+ │ │
+ │ ├─ fill_window(): read from next_in into window[]
+ │ │ └─ slide_hash() when window fills (FUNCTABLE dispatch)
+ │ │
+ │ ├─ Hash insert: insert_string / quick_insert_string
+ │ │ └─ head[hash] = position; prev[position] = old_head
+ │ │
+ │ ├─ Match finding: longest_match() (FUNCTABLE dispatch)
+ │ │ └─ Walk prev[] chain, call compare256() (FUNCTABLE dispatch)
+ │ │
+ │ ├─ Tally: zng_tr_tally_lit() or zng_tr_tally_dist()
+ │ │ └─ Store in sym_buf (or d_buf/l_buf)
+ │ │ └─ Update dyn_ltree[].Freq, dyn_dtree[].Freq
+ │ │
+ │ └─ Block flush: zng_tr_flush_block()
+ │ ├─ build_tree() for literal, distance, bit-length trees
+ │ ├─ Compare opt_len (dynamic) vs static_len vs stored size
+ │ ├─ send_all_trees() + compress_block() (dynamic)
+ │ │ or compress_block(static_ltree, static_dtree)
+ │ │ or stored block
+ │ └─ Output through pending_buf → next_out
+ │
+ ├─ Checksum update: strm->adler via adler32() or crc32()
+ │
+ └─ Trailer emission (if flush == Z_FINISH)
+ └─ zlib: 4-byte Adler-32
+ └─ gzip: 4-byte CRC-32 + 4-byte ISIZE
+```
+
+### Decompression Data Flow
+
+```
+User calls inflate(strm, flush)
+ │
+ ├─ Header parsing (HEAD → TYPE modes)
+ │ └─ zlib: check CMF/FLG, read FDICT/Adler
+ │ └─ gzip: check magic, parse FLG bits, skip extra/name/comment
+ │
+ ├─ Block processing (TYPE → data modes)
+ │ │
+ │ ├─ STORED: read LEN/NLEN, copy raw bytes
+ │ │
+ │ ├─ TABLE → LENLENS → CODELENS:
+ │ │ └─ Read code-length code lengths
+ │ │ └─ inflate_table(CODES, ...) → decode code lengths
+ │ │ └─ inflate_table(LENS, ...) → build lencode table
+ │ │ └─ inflate_table(DISTS, ...) → build distcode table
+ │ │
+ │ └─ LEN → LENEXT → DIST → DISTEXT → MATCH:
+ │ ├─ Decode literal/length from lencode table
+ │ ├─ If literal: output byte
+ │ ├─ If length: read extra bits
+ │ │ └─ Decode distance from distcode table + extra bits
+ │ │ └─ Copy from window: chunkmemset_safe() (FUNCTABLE)
+ │ └─ inflate_fast() for hot inner loop (FUNCTABLE dispatch)
+ │
+ ├─ Checksum verification: inf_chksum() / inf_chksum_cpy()
+ │ └─ crc32_copy() or adler32_copy() (FUNCTABLE dispatch)
+ │
+ └─ Trailer verification (CHECK → DONE)
+ └─ Compare computed checksum with stored value
+```
+
+---
+
+## Cache-Line Optimisation
+
+The `deflate_state` structure is explicitly partitioned into cache lines
+(marked with comments in the source):
+
+- **Cacheline 0** (bytes 0–63): Stream pointer, pending buffer, status — accessed
+ on every `deflate()` call
+- **Cacheline 1** (bytes 64–127): Window pointers, lookahead, hash state — accessed
+ during match finding
+- **Cacheline 2** (bytes 128–191): Match parameters, compression level — accessed
+ during strategy decisions
+- **Cacheline 3** (bytes 192–255): Padding for tree data alignment
+
+The Huffman trees (`dyn_ltree`, `dyn_dtree`, `bl_tree`) follow in subsequent
+cache lines, accessed only during tree construction and block flushing.
+
+---
+
+## Conditional Compilation
+
+Key preprocessor macros controlling behaviour:
+
+| Macro | Effect |
+|---|---|
+| `ZLIB_COMPAT` | Enable zlib-compatible API (rename all symbols) |
+| `WITH_GZFILEOP` | Include gzip file I/O functions |
+| `NO_GZIP` | Disable gzip header support (define `GZIP` / `GUNZIP`) |
+| `LIT_MEM` | Use separate distance/length buffers |
+| `NO_LIT_MEM` | Force overlaid `sym_buf` |
+| `NO_QUICK_STRATEGY` | Disable `deflate_quick`, use `deflate_fast` for level 1 |
+| `NO_MEDIUM_STRATEGY` | Disable `deflate_medium`, use `deflate_fast`/`deflate_slow` |
+| `DISABLE_RUNTIME_CPU_DETECTION` | Compile-time function selection only |
+| `WITH_ALL_FALLBACKS` | Build all generic fallbacks (useful for benchmarking) |
+| `WITH_REDUCED_MEM` | Smaller buffers for memory-constrained environments |
+| `INFLATE_STRICT` | Strict distance checking in inflate |
+| `INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR` | Zero-fill invalid distances |
+| `ZLIB_DEBUG` | Enable debug assertions and tracing |
+| `WITHOUT_CHORBA` | Disable Chorba CRC-32 algorithm |
diff --git a/docs/handbook/neozip/arm-optimizations.md b/docs/handbook/neozip/arm-optimizations.md
new file mode 100644
index 0000000000..c7fa94e505
--- /dev/null
+++ b/docs/handbook/neozip/arm-optimizations.md
@@ -0,0 +1,403 @@
+# ARM Optimizations
+
+## Overview
+
+Neozip provides ARM SIMD optimizations using NEON (Advanced SIMD), CRC32
+hardware instructions, and PMULL (polynomial multiply long). These cover
+both AArch32 (ARMv7+) and AArch64 (ARMv8+) targets. All implementations
+reside in `arch/arm/`.
+
+---
+
+## Source Files
+
+| File | ISA Extension | Function |
+|---|---|---|
+| `arm_features.c/h` | — | Feature detection |
+| `adler32_neon.c` | NEON | Adler-32 checksum |
+| `chunkset_neon.c` | NEON | Pattern fill for inflate |
+| `compare256_neon.c` | NEON | 256-byte string comparison |
+| `crc32_acle.c` | CRC32 | Hardware CRC-32 |
+| `crc32_pmull.c` | PMULL | CLMUL-based CRC-32 |
+| `insert_string_acle.c` | CRC32 | CRC-based hash insertion |
+| `slide_hash_neon.c` | NEON | Hash table slide |
+| `inffast_neon.c` | NEON | Fast inflate inner loop |
+
+---
+
+## Feature Detection
+
+### `arm_cpu_features` Structure
+
+```c
+struct arm_cpu_features {
+ int has_simd; // ARMv6 SIMD (AArch32 only)
+ int has_neon; // NEON / ASIMD
+ int has_crc32; // CRC32 instructions (ARMv8.0-A optional, ARMv8.1-A mandatory)
+ int has_pmull; // PMULL (polynomial multiply long, 64→128-bit)
+ int has_eor3; // SHA3 EOR3 instruction (ARMv8.2-A+SHA3)
+ int has_fast_pmull; // High-perf PMULL
+};
+```
+
+### Linux Detection
+
+```c
+void Z_INTERNAL arm_check_features(struct cpu_features *features) {
+#if defined(__linux__)
+ unsigned long hwcap = getauxval(AT_HWCAP);
+#if defined(__aarch64__)
+ features->arm.has_neon = !!(hwcap & HWCAP_ASIMD);
+ features->arm.has_crc32 = !!(hwcap & HWCAP_CRC32);
+ features->arm.has_pmull = !!(hwcap & HWCAP_PMULL);
+ unsigned long hwcap2 = getauxval(AT_HWCAP2);
+ features->arm.has_eor3 = !!(hwcap2 & HWCAP2_SHA3);
+#else // AArch32
+ features->arm.has_simd = !!(hwcap & HWCAP_ARM_VFPv3);
+ features->arm.has_neon = !!(hwcap & HWCAP_ARM_NEON);
+ features->arm.has_crc32 = !!(hwcap2 & HWCAP2_CRC32);
+ features->arm.has_pmull = !!(hwcap2 & HWCAP2_PMULL);
+#endif
+#endif
+}
+```
+
+### macOS/iOS Detection
+
+```c
+#if defined(__APPLE__)
+ // NEON is always available on Apple Silicon
+ features->arm.has_neon = 1;
+ features->arm.has_crc32 = has_feature("hw.optional.armv8_crc32");
+ features->arm.has_pmull = has_feature("hw.optional.arm.FEAT_PMULL");
+#endif
+```
+
+### Windows Detection
+
+```c
+#if defined(_WIN32)
+ features->arm.has_neon = IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE);
+ features->arm.has_crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
+#endif
+```
+
+---
+
+## NEON Adler-32 (`adler32_neon.c`)
+
+Uses 128-bit NEON registers to process 16 bytes per iteration:
+
+```c
+Z_INTERNAL uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len) {
+ uint32_t s1 = adler & 0xffff;
+ uint32_t s2 = adler >> 16;
+
+ // Position weight vector: {16,15,14,...,1}
+ static const uint8_t taps[] = {16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1};
+ uint8x16_t vtaps = vld1q_u8(taps);
+
+ while (len >= 16) {
+ uint32x4_t vs1 = vdupq_n_u32(0);
+ uint32x4_t vs2 = vdupq_n_u32(0);
+ uint32x4_t vs1_0 = vdupq_n_u32(s1);
+
+ // Process up to NMAX bytes before reduction
+ size_t block = MIN(len, NMAX);
+ size_t nblocks = block / 16;
+
+ for (size_t i = 0; i < nblocks; i++) {
+ uint8x16_t vbuf = vld1q_u8(buf);
+
+ // s1 += sum(bytes)
+ uint16x8_t sum16 = vpaddlq_u8(vbuf);
+ uint32x4_t sum32 = vpaddlq_u16(sum16);
+ vs1 = vaddq_u32(vs1, sum32);
+
+ // s2 += 16 * s1_prev + weighted_sum(bytes)
+ vs2 = vshlq_n_u32(vs1_0, 4); // 16 * s1
+ // Multiply-accumulate: weighted position sum
+ uint16x8_t prod = vmull_u8(vget_low_u8(vbuf), vget_low_u8(vtaps));
+ prod = vmlal_u8(prod, vget_high_u8(vbuf), vget_high_u8(vtaps));
+ vs2 = vaddq_u32(vs2, vpaddlq_u16(prod));
+
+ vs1_0 = vs1;
+ buf += 16;
+ }
+
+ // Horizontal reduction
+ s1 += vaddvq_u32(vs1);
+ s2 += vaddvq_u32(vs2);
+ s1 %= BASE;
+ s2 %= BASE;
+ len -= nblocks * 16;
+ }
+ return s1 | (s2 << 16);
+}
+```
+
+Key NEON intrinsics used:
+- `vpaddlq_u8` — Pairwise add long (u8→u16)
+- `vpaddlq_u16` — Pairwise add long (u16→u32)
+- `vmull_u8` — Multiply long (u8×u8→u16)
+- `vmlal_u8` — Multiply-accumulate long
+- `vaddvq_u32` — Horizontal sum across vector (AArch64)
+
+---
+
+## Hardware CRC-32 (`crc32_acle.c`)
+
+Uses ARMv8 CRC32 instructions via ACLE (ARM C Language Extensions):
+
+```c
+Z_INTERNAL uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) {
+ crc = ~crc; // CRC32 instructions use inverted convention
+
+ // Process 8 bytes at a time
+ while (len >= 8) {
+ crc = __crc32d(crc, *(uint64_t *)buf);
+ buf += 8;
+ len -= 8;
+ }
+
+ // Process 4 bytes
+ if (len >= 4) {
+ crc = __crc32w(crc, *(uint32_t *)buf);
+ buf += 4;
+ len -= 4;
+ }
+
+ // Process remaining bytes
+ while (len--) {
+ crc = __crc32b(crc, *buf++);
+ }
+
+ return ~crc;
+}
+```
+
+The `__crc32b`, `__crc32w`, `__crc32d` intrinsics compile to single CRC32
+instructions, computing CRC-32 of 1/4/8 bytes per instruction.
+
+---
+
+## PMULL CRC-32 (`crc32_pmull.c`)
+
+For larger data, polynomial multiply (PMULL) provides higher throughput
+via carry-less multiplication, similar to x86 PCLMULQDQ:
+
+```c
+Z_INTERNAL uint32_t crc32_pmull(uint32_t crc, const uint8_t *buf, size_t len) {
+ poly128_t fold_const;
+ uint64x2_t crc0, crc1, crc2, crc3;
+
+ // Initialize four accumulators with first 64 bytes
+ crc0 = veorq_u64(vld1q_u64((uint64_t *)buf),
+ vcombine_u64(vcreate_u64(crc), vcreate_u64(0)));
+ // ... crc1, crc2, crc3
+
+ // Main fold loop: 64 bytes per iteration
+ while (len >= 64) {
+ // vmull_p64: 64×64→128-bit polynomial multiply
+ poly128_t h0 = vmull_p64(vgetq_lane_u64(crc0, 0), fold_lo);
+ poly128_t h1 = vmull_p64(vgetq_lane_u64(crc0, 1), fold_hi);
+ crc0 = veorq_u64(vreinterpretq_u64_p128(h0),
+ vreinterpretq_u64_p128(h1));
+ crc0 = veorq_u64(crc0, vld1q_u64((uint64_t *)buf));
+ // repeat for crc1..crc3
+ }
+
+ // Barrett reduction to 32-bit CRC
+}
+```
+
+With `has_eor3` (SHA3 extension), three-way XOR is done in a single
+instruction:
+
+```c
+#ifdef ARM_FEATURE_SHA3
+ // EOR3: a ^= b ^ c in one instruction
+ crc0 = vreinterpretq_u64_u8(veor3q_u8(
+ vreinterpretq_u8_p128(h0),
+ vreinterpretq_u8_p128(h1),
+ vreinterpretq_u8_u64(data)));
+#endif
+```
+
+---
+
+## NEON String Comparison (`compare256_neon.c`)
+
+```c
+Z_INTERNAL uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1) {
+ uint32_t len = 0;
+ do {
+ uint8x16_t v0 = vld1q_u8(src0 + len);
+ uint8x16_t v1 = vld1q_u8(src1 + len);
+ uint8x16_t cmp = vceqq_u8(v0, v1);
+
+ // Check if all bytes matched
+ uint64_t mask_lo = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 0);
+ uint64_t mask_hi = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 1);
+
+ if (mask_lo != ~0ULL) {
+ // First mismatch in lower 8 bytes
+ return len + (__builtin_ctzll(~mask_lo) >> 3);
+ }
+ if (mask_hi != ~0ULL) {
+ return len + 8 + (__builtin_ctzll(~mask_hi) >> 3);
+ }
+ len += 16;
+ } while (len < 256);
+ return 256;
+}
+```
+
+---
+
+## NEON Slide Hash (`slide_hash_neon.c`)
+
+```c
+Z_INTERNAL void slide_hash_neon(deflate_state *s) {
+ unsigned n;
+ Pos *p;
+ uint16x8_t vw = vdupq_n_u16((uint16_t)s->w_size);
+
+ n = HASH_SIZE;
+ p = &s->head[n];
+ do {
+ p -= 8;
+ uint16x8_t val = vld1q_u16(p);
+ val = vqsubq_u16(val, vw); // Saturating subtract
+ vst1q_u16(p, val);
+ n -= 8;
+ } while (n);
+
+ // Same loop for s->prev[0..w_size-1]
+ n = s->w_size;
+ p = &s->prev[n];
+ do {
+ p -= 8;
+ uint16x8_t val = vld1q_u16(p);
+ val = vqsubq_u16(val, vw);
+ vst1q_u16(p, val);
+ n -= 8;
+ } while (n);
+}
+```
+
+`vqsubq_u16` performs unsigned saturating subtract — values below zero
+clamp to zero rather than wrapping.
+
+---
+
+## NEON Chunk Memory Set (`chunkset_neon.c`)
+
+Used during inflate for back-reference copies:
+
+```c
+Z_INTERNAL uint8_t* chunkmemset_safe_neon(uint8_t *out, uint8_t *from,
+ unsigned dist, unsigned len) {
+ if (dist == 1) {
+ // Broadcast single byte
+ uint8x16_t vfill = vdupq_n_u8(*from);
+ while (len >= 16) {
+ vst1q_u8(out, vfill);
+ out += 16;
+ len -= 16;
+ }
+ } else if (dist == 2) {
+ uint8x16_t v = vreinterpretq_u8_u16(vdupq_n_u16(*(uint16_t *)from));
+ // ...
+ } else if (dist >= 16) {
+ // Standard copy
+ while (len >= 16) {
+ vst1q_u8(out, vld1q_u8(from));
+ out += 16;
+ from += 16;
+ len -= 16;
+ }
+ } else {
+ // Replicate dist-byte pattern into 16 bytes
+ // ...
+ }
+ return out;
+}
+```
+
+---
+
+## CRC-Based Hash Insertion (`insert_string_acle.c`)
+
+When ARMv8 CRC32 instructions are available, they provide excellent hash
+distribution:
+
+```c
+Z_INTERNAL Pos insert_string_acle(deflate_state *s, Pos str, unsigned count) {
+ Pos idx;
+ for (unsigned i = 0; i < count; i++) {
+ uint32_t val = *(uint32_t *)(s->window + str + i);
+ uint32_t h = __crc32w(0, val);
+ h &= s->hash_mask;
+ idx = s->head[h];
+ s->prev[(str + i) & s->w_mask] = idx;
+ s->head[h] = (Pos)(str + i);
+ }
+ return idx;
+}
+```
+
+---
+
+## CMake Configuration
+
+ARM features are detected via compiler intrinsic checks:
+
+```cmake
+option(WITH_NEON "Build with NEON SIMD" ON)
+option(WITH_ACLE "Build with ACLE CRC" ON)
+
+# AArch64 compiler flags
+if(WITH_NEON)
+ check_c_compiler_flag("-march=armv8-a+simd" HAS_NEON)
+ if(HAS_NEON)
+ set_property(SOURCE arch/arm/adler32_neon.c APPEND
+ PROPERTY COMPILE_OPTIONS -march=armv8-a+simd)
+ # ... other NEON sources
+ add_definitions(-DARM_NEON)
+ endif()
+endif()
+
+if(WITH_ACLE)
+ check_c_compiler_flag("-march=armv8-a+crc" HAS_CRC32)
+ if(HAS_CRC32)
+ set_property(SOURCE arch/arm/crc32_acle.c APPEND
+ PROPERTY COMPILE_OPTIONS -march=armv8-a+crc)
+ add_definitions(-DARM_ACLE_CRC_HASH)
+ endif()
+ check_c_compiler_flag("-march=armv8-a+crypto" HAS_PMULL)
+ if(HAS_PMULL)
+ set_property(SOURCE arch/arm/crc32_pmull.c APPEND
+ PROPERTY COMPILE_OPTIONS -march=armv8-a+crypto)
+ add_definitions(-DARM_PMULL_CRC)
+ endif()
+endif()
+```
+
+---
+
+## Performance Notes
+
+| Operation | NEON | CRC32 HW | PMULL |
+|---|---|---|---|
+| Adler-32 | ~8 bytes/cycle | — | — |
+| CRC-32 | — | ~4 bytes/cycle | ~16 bytes/cycle |
+| CRC-32+Copy | — | — | ~12 bytes/cycle |
+| Compare256 | ~16 bytes/cycle | — | — |
+| Slide Hash | ~8 entries/cycle | — | — |
+
+Apple Silicon (M1+) provides particularly fast CRC32 and PMULL
+implementations with low latency per instruction.
+
+On Cortex-A55 and similar in-order cores, the throughput numbers are roughly
+halved compared to Cortex-A76/A78 and Apple Silicon out-of-order cores.
diff --git a/docs/handbook/neozip/building.md b/docs/handbook/neozip/building.md
new file mode 100644
index 0000000000..1cd36090cc
--- /dev/null
+++ b/docs/handbook/neozip/building.md
@@ -0,0 +1,491 @@
+# Building Neozip
+
+## Prerequisites
+
+- **CMake** ≥ 3.14 (up to 4.2.1 supported)
+- A **C11** compiler (GCC ≥ 5, Clang ≥ 5, MSVC ≥ 2013, Intel ICC, NVIDIA HPC)
+- Optional: C++ compiler for Google Test, benchmarks, and fuzz targets
+- Optional: Google Test (fetched automatically by CMake)
+- Optional: Google Benchmark (fetched automatically)
+
+---
+
+## Quick Start
+
+### Default Build (Native API)
+
+```bash
+cd neozip
+cmake -B build -DCMAKE_BUILD_TYPE=Release
+cmake --build build -j$(nproc)
+```
+
+This produces `libz-ng.so` (or `.dylib` / `.dll`) and `libz-ng.a` with the
+`zng_` prefixed API.
+
+### zlib-Compatible Build
+
+```bash
+cmake -B build -DCMAKE_BUILD_TYPE=Release -DZLIB_COMPAT=ON
+cmake --build build -j$(nproc)
+```
+
+This produces `libz.so` and `libz.a` with standard zlib symbol names,
+suitable as a drop-in replacement for system zlib.
+
+### Install
+
+```bash
+cmake --install build --prefix /usr/local
+```
+
+---
+
+## CMake Configuration Options
+
+### Core Options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `ZLIB_COMPAT` | BOOL | `OFF` | Build with zlib-compatible API. Produces `libz` instead of `libz-ng`. All public symbols use standard zlib names. Forces `WITH_GZFILEOP=ON`. |
+| `ZLIB_ALIASES` | BOOL | `ON` | Provide zlib-compatible CMake targets regardless of `ZLIB_COMPAT`. |
+| `WITH_GZFILEOP` | BOOL | `ON` | Compile gzip file I/O functions (`gzopen`, `gzread`, `gzwrite`, etc.). Forced `ON` in compat mode. |
+| `WITH_OPTIM` | BOOL | `ON` | Enable architecture-specific optimisations. Set `OFF` to use only generic C code. |
+| `WITH_NEW_STRATEGIES` | BOOL | `ON` | Use the `deflate_quick` and `deflate_medium` strategy functions. When `OFF`, only `deflate_fast`, `deflate_slow`, and special strategies are used. |
+| `WITH_REDUCED_MEM` | BOOL | `OFF` | Reduce memory usage at the cost of performance. For memory-constrained environments. |
+| `WITH_ALL_FALLBACKS` | BOOL | `OFF` | Build all generic fallback functions even when SIMD is available. Useful for benchmarking or running `gbench` comparisons. |
+| `WITH_CRC32_CHORBA` | BOOL | `ON` | Enable the optimised Chorba CRC-32 algorithm. |
+
+### Native Instructions
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `WITH_NATIVE_INSTRUCTIONS` | BOOL | `OFF` | Compile with `-march=native` (GCC/Clang) or equivalent. Produces a binary optimised for the build machine. Disables runtime CPU detection (`WITH_RUNTIME_CPU_DETECTION` is forced `OFF`). |
+| `WITH_RUNTIME_CPU_DETECTION` | BOOL | `ON` | Build with runtime CPU feature detection. When `OFF`, function dispatch is resolved at compile time and only the features guaranteed by the target architecture are used. |
+| `NATIVE_ARCH_OVERRIDE` | STRING | (empty) | Override the native instruction flag. For example, `-march=haswell`. |
+
+### x86 SIMD Options
+
+These options are available when building on x86/x86-64:
+
+| Option | Type | Default | Depends On | Description |
+|---|---|---|---|---|
+| `WITH_SSE2` | BOOL | `ON` | — | Build with SSE2 intrinsics. Always available on x86-64. |
+| `WITH_SSSE3` | BOOL | `ON` | `WITH_SSE2` | Build with SSSE3 intrinsics. |
+| `WITH_SSE41` | BOOL | `ON` | `WITH_SSSE3` | Build with SSE4.1 intrinsics. |
+| `WITH_SSE42` | BOOL | `ON` | `WITH_SSE41` | Build with SSE4.2 intrinsics. |
+| `WITH_PCLMULQDQ` | BOOL | `ON` | `WITH_SSE42` | Build with PCLMULQDQ carryless multiply for CRC-32. |
+| `WITH_AVX2` | BOOL | `ON` | `WITH_SSE42` | Build with AVX2 intrinsics (also requires BMI2 at runtime). |
+| `WITH_AVX512` | BOOL | `ON` | `WITH_AVX2` | Build with AVX-512 (F, DQ, BW, VL) intrinsics. |
+| `WITH_AVX512VNNI` | BOOL | `ON` | `WITH_AVX512` | Build with AVX-512 VNNI for Adler-32 using `VPDPBUSD`. |
+| `WITH_VPCLMULQDQ` | BOOL | `ON` | `WITH_PCLMULQDQ`, `WITH_AVX2` | Build with VPCLMULQDQ vectorised CRC-32. |
+
+The dependency chain is enforced by CMake `cmake_dependent_option`. Disabling
+an earlier option automatically disables all options that depend on it.
+
+### ARM SIMD Options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `WITH_ARMV6` | BOOL | `ON` (32-bit only) | Build with ARMv6 SIMD for `slide_hash`. |
+| `WITH_ARMV8` | BOOL | `ON` | Build with ARMv8 intrinsics (CRC32 instructions). |
+| `WITH_NEON` | BOOL | `ON` | Build with NEON intrinsics. |
+
+### PowerPC Options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `WITH_ALTIVEC` | BOOL | `ON` | Build with AltiVec (VMX) optimisations. |
+| `WITH_POWER8` | BOOL | `ON` | Build with POWER8 (VSX) optimisations. |
+| `WITH_POWER9` | BOOL | `ON` | Build with POWER9 optimisations. |
+
+### RISC-V Options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `WITH_RVV` | BOOL | `ON` | Build with RISC-V Vector extension intrinsics. |
+| `WITH_RISCV_ZBC` | BOOL | `ON` | Build with RISC-V Bit-manipulation CRC-32. |
+
+### IBM z/Architecture Options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `WITH_DFLTCC_DEFLATE` | BOOL | `OFF` | Build with DFLTCC hardware compression on IBM Z. |
+| `WITH_DFLTCC_INFLATE` | BOOL | `OFF` | Build with DFLTCC hardware decompression on IBM Z. |
+| `WITH_CRC32_VX` | BOOL | `ON` | Build with vectorised CRC-32 on IBM Z. |
+
+### LoongArch Options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `WITH_CRC32_LA` | BOOL | `ON` | Build with vectorised CRC-32 on LoongArch. |
+| `WITH_LSX` | BOOL | `ON` | Build with LoongArch LSX SIMD. |
+| `WITH_LASX` | BOOL | `ON` (depends on `WITH_LSX`) | Build with LoongArch LASX (256-bit) SIMD. |
+
+### Inflate Options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `WITH_INFLATE_STRICT` | BOOL | `OFF` | Enable strict distance checking in inflate. Rejects distances greater than the output produced so far. |
+| `WITH_INFLATE_ALLOW_INVALID_DIST` | BOOL | `OFF` | Zero-fill invalid inflate distances instead of returning an error. |
+
+### Testing and Development Options
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `BUILD_TESTING` | BOOL | `ON` | Build the test suite. |
+| `WITH_GTEST` | BOOL | `ON` (if `BUILD_TESTING`) | Build Google Test-based tests. |
+| `WITH_FUZZERS` | BOOL | `OFF` (if `BUILD_TESTING`) | Build fuzz targets under `test/fuzz/`. |
+| `WITH_BENCHMARKS` | BOOL | `OFF` (if `BUILD_TESTING`) | Build Google Benchmark harnesses under `test/benchmarks/`. |
+| `WITH_BENCHMARK_APPS` | BOOL | `OFF` (if `BUILD_TESTING`) | Build application-level benchmarks. |
+| `WITH_SANITIZER` | STRING | `OFF` | Enable a sanitizer. Values: `OFF`, `memory`, `address`, `undefined`, `thread`. |
+| `WITH_CODE_COVERAGE` | BOOL | `OFF` | Enable code coverage reporting (lcov/gcov). |
+| `WITH_MAINTAINER_WARNINGS` | BOOL | `OFF` | Enable extra compiler warnings for project maintainers. |
+| `INSTALL_UTILS` | BOOL | `OFF` | Install `minigzip` and `minideflate` utilities. |
+
+### Symbol Prefix
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `ZLIB_SYMBOL_PREFIX` | STRING | (empty) | Add a custom prefix to all public symbols. Useful for embedding neozip into a larger library to avoid symbol conflicts. |
+
+---
+
+## C Standard Selection
+
+The default C standard is **C11**. You can override it:
+
+```bash
+cmake -B build -DCMAKE_C_STANDARD=99 # Use C99
+cmake -B build -DCMAKE_C_STANDARD=17 # Use C17 (requires CMake ≥ 3.21)
+cmake -B build -DCMAKE_C_STANDARD=23 # Use C23 (requires CMake ≥ 3.21)
+```
+
+Valid standards: `99`, `11`, `17`, `23`. The build will fail with a clear
+error message if an unsupported value is specified.
+
+---
+
+## Build Types
+
+When not using a multi-config generator (e.g., Ninja, Makefiles), the
+default build type is **Release**:
+
+| Build Type | Compiler Flags | Use Case |
+|---|---|---|
+| `Release` | `-O3` (GCC/Clang) | Production builds, benchmarks |
+| `Debug` | `-O0 -g` | Development, debugging |
+| `RelWithDebInfo` | `-O2 -g` | Profiling with debug symbols |
+| `MinSizeRel` | `-Os` | Size-constrained environments |
+
+```bash
+cmake -B build -DCMAKE_BUILD_TYPE=Debug
+```
+
+---
+
+## Cross-Compilation
+
+### Toolchain File
+
+```bash
+cmake -B build -DCMAKE_TOOLCHAIN_FILE=/path/to/toolchain.cmake
+```
+
+The build system will log the toolchain being used:
+```
+-- Using CMake toolchain: /path/to/toolchain.cmake
+```
+
+### Example: Cross-compiling for ARM64
+
+```cmake
+# aarch64-toolchain.cmake
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
+set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
+```
+
+```bash
+cmake -B build-arm64 \
+ -DCMAKE_TOOLCHAIN_FILE=aarch64-toolchain.cmake \
+ -DCMAKE_BUILD_TYPE=Release
+cmake --build build-arm64 -j$(nproc)
+```
+
+### Example: MinGW cross-compilation for Windows
+
+A pre-made toolchain file is provided:
+
+```bash
+cmake -B build-mingw \
+ -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-mingw32.cmake \
+ -DCMAKE_BUILD_TYPE=Release
+```
+
+---
+
+## zlib-Compat vs. Native Mode
+
+### API Differences
+
+| Aspect | `ZLIB_COMPAT=ON` | `ZLIB_COMPAT=OFF` (default) |
+|---|---|---|
+| Library name | `libz.so` / `libz.a` | `libz-ng.so` / `libz-ng.a` |
+| Header | `zlib.h` | `zlib-ng.h` |
+| Symbol prefix | `z_` (via mangling) | `zng_` |
+| Stream type | `z_stream` (with `unsigned long`) | `zng_stream` (with `uint32_t`) |
+| CMake target | `ZLIB::ZLIB` | `zlib-ng::zlib-ng` |
+| Config file | `zlib-config.cmake` | `zlib-ng-config.cmake` |
+| pkg-config | `zlib.pc` | (generated) |
+| gzip file ops | Always included | Controlled by `WITH_GZFILEOP` |
+
+### Name Mangling
+
+In zlib-compat mode, `zlib_name_mangling.h` maps neozip's `zng_` names to
+standard `z_` names:
+
+```c
+#define zng_deflateInit deflateInit
+#define zng_inflate inflate
+#define zng_crc32 crc32
+// ... etc.
+```
+
+This is generated from `zlib_name_mangling.h.in` by CMake.
+
+---
+
+## Runtime CPU Detection vs. Native Build
+
+### Runtime Detection (default)
+
+With `WITH_RUNTIME_CPU_DETECTION=ON`:
+- The `functable.c` dispatch table is compiled
+- `cpu_features.c` queries CPUID/feature registers at first use
+- The binary works on any CPU of the target architecture
+- Slight overhead from indirect function calls through `functable`
+
+### Native Build
+
+With `WITH_NATIVE_INSTRUCTIONS=ON`:
+- `-march=native` (or equivalent) is passed to the compiler
+- `WITH_RUNTIME_CPU_DETECTION` is forced `OFF`
+- All dispatch is resolved at compile time via `native_` macros
+- The binary only works on CPUs with the same (or newer) features as the build machine
+- No dispatch overhead — direct function calls
+- May enable additional compiler auto-vectorisation
+
+```bash
+cmake -B build -DWITH_NATIVE_INSTRUCTIONS=ON -DCMAKE_BUILD_TYPE=Release
+```
+
+You can verify the selected native flag in the CMake output:
+```
+-- Performing Test HAVE_MARCH_NATIVE
+-- Performing Test HAVE_MARCH_NATIVE - Success
+```
+
+### When to Use Each
+
+| Scenario | Recommendation |
+|---|---|
+| Distribution package | Runtime detection (default) |
+| Application bundled with specific hardware | Native instructions |
+| Development and testing | Runtime detection |
+| Maximum performance on known hardware | Native instructions |
+| Benchmark comparisons | Both; compare results |
+
+---
+
+## Sanitizer Builds
+
+```bash
+# Address Sanitizer (memory errors)
+cmake -B build-asan -DWITH_SANITIZER=address -DCMAKE_BUILD_TYPE=Debug
+cmake --build build-asan
+
+# Memory Sanitizer (uninitialised reads)
+cmake -B build-msan -DWITH_SANITIZER=memory -DCMAKE_BUILD_TYPE=Debug
+cmake --build build-msan
+
+# Undefined Behaviour Sanitizer
+cmake -B build-ubsan -DWITH_SANITIZER=undefined -DCMAKE_BUILD_TYPE=Debug
+cmake --build build-ubsan
+
+# Thread Sanitizer (data races)
+cmake -B build-tsan -DWITH_SANITIZER=thread -DCMAKE_BUILD_TYPE=Debug
+cmake --build build-tsan
+```
+
+---
+
+## Code Coverage
+
+```bash
+cmake -B build-cov -DWITH_CODE_COVERAGE=ON -DCMAKE_BUILD_TYPE=Debug
+cmake --build build-cov
+cd build-cov
+ctest
+# Generate coverage report using lcov/gcov
+```
+
+---
+
+## Building with ccache
+
+```bash
+cmake -B build -DCMAKE_C_COMPILER_LAUNCHER=ccache
+cmake --build build -j$(nproc)
+```
+
+---
+
+## Building as a Subdirectory
+
+Neozip supports being included as a CMake subdirectory:
+
+```cmake
+add_subdirectory(neozip)
+target_link_libraries(myapp PRIVATE zlibstatic)
+```
+
+The `test/add-subdirectory-project/` directory contains an example.
+
+---
+
+## Output Artifacts
+
+After a successful build, the following artifacts are produced:
+
+### Libraries
+
+| Target | Static | Shared |
+|---|---|---|
+| Native mode | `libz-ng.a` | `libz-ng.so.2.3.90` (Linux) |
+| Compat mode | `libz.a` | `libz.so.1.3.1.zlib-ng` (Linux) |
+
+### Utilities (optional)
+
+| Utility | Description |
+|---|---|
+| `minigzip` | Minimal gzip compressor/decompressor |
+| `minideflate` | Minimal raw deflate tool |
+
+### Test Binaries (optional)
+
+| Binary | Description |
+|---|---|
+| `gtest_zlib` | Google Test test runner |
+| `example` | Classic zlib example |
+| `switchlevels` | Level-switching test |
+| `infcover` | Inflate code coverage test |
+
+---
+
+## Compiler-Specific Notes
+
+### GCC
+
+- Full support from GCC 5 onwards
+- `-march=native` works reliably
+- `-fno-lto` is automatically applied when `WITH_NATIVE_INSTRUCTIONS=OFF` to
+ prevent LTO from hoisting SIMD code into non-SIMD translation units
+
+### Clang
+
+- Full support from Clang 5 onwards
+- Supports all the same flags as GCC
+
+### MSVC
+
+- Minimum version: Visual Studio 2013 (MSVC 1800)
+- SSE2 macros (`__SSE__`, `__SSE2__`) are explicitly defined since MSVC does
+ not set them by default
+- Chorba SSE2/SSE4.1 variants require MSVC 2022 (version 1930+)
+
+### Intel ICC
+
+- Supports `-diag-disable=10441` for deprecation warning suppression
+- Classic Intel compiler flags are handled
+
+### NVIDIA HPC
+
+- Uses `-tp px` or `-tp native` for target selection
+- Supports the standard compilation flow
+
+---
+
+## Feature Summary
+
+At the end of configuration, CMake prints a feature summary showing which
+options are enabled. Example output:
+
+```
+-- Feature summary for zlib 1.3.1
+-- The following features have been enabled:
+-- * CMAKE_BUILD_TYPE, Build type: Release (default)
+-- * WITH_GZFILEOP
+-- * WITH_OPTIM
+-- * WITH_NEW_STRATEGIES
+-- * WITH_CRC32_CHORBA
+-- * WITH_RUNTIME_CPU_DETECTION
+-- * WITH_SSE2
+-- * WITH_SSSE3
+-- * WITH_SSE41
+-- * WITH_SSE42
+-- * WITH_PCLMULQDQ
+-- * WITH_AVX2
+-- * WITH_AVX512
+-- * WITH_AVX512VNNI
+-- * WITH_VPCLMULQDQ
+```
+
+---
+
+## Verifying the Build
+
+```bash
+# Run the test suite
+cd build
+ctest --output-on-failure -j$(nproc)
+
+# Check the library version
+./minigzip -h 2>&1 | head -1
+
+# Verify architecture detection
+cmake -B build -DCMAKE_BUILD_TYPE=Release 2>&1 | grep -i "arch\|SIMD\|SSE\|AVX\|NEON"
+```
+
+---
+
+## Integration in Other Projects
+
+### Using `find_package`
+
+```cmake
+# Native mode
+find_package(zlib-ng 2.3 CONFIG REQUIRED)
+target_link_libraries(myapp PRIVATE zlib-ng::zlib-ng)
+
+# Compat mode
+find_package(ZLIB 1.3 CONFIG REQUIRED)
+target_link_libraries(myapp PRIVATE ZLIB::ZLIB)
+```
+
+### Using pkg-config
+
+```bash
+pkg-config --cflags --libs zlib # compat mode
+```
+
+### Using FetchContent
+
+```cmake
+include(FetchContent)
+FetchContent_Declare(neozip
+ SOURCE_DIR ${CMAKE_SOURCE_DIR}/neozip)
+FetchContent_MakeAvailable(neozip)
+target_link_libraries(myapp PRIVATE zlibstatic)
+```
diff --git a/docs/handbook/neozip/checksum-algorithms.md b/docs/handbook/neozip/checksum-algorithms.md
new file mode 100644
index 0000000000..b21504c5e3
--- /dev/null
+++ b/docs/handbook/neozip/checksum-algorithms.md
@@ -0,0 +1,461 @@
+# Checksum Algorithms
+
+## Overview
+
+Neozip implements two checksum algorithms used by the DEFLATE family of
+compression formats:
+
+- **Adler-32**: A fast checksum used in the zlib container (RFC 1950)
+- **CRC-32**: A more robust check used in the gzip container (RFC 1952)
+
+Both algorithms have SIMD-accelerated implementations across x86, ARM,
+Power, RISC-V, s390, and LoongArch architectures.
+
+---
+
+## Adler-32
+
+### Algorithm
+
+Adler-32 is defined in RFC 1950. It consists of two running sums:
+
+- **s1**: Sum of all bytes (mod BASE)
+- **s2**: Sum of all intermediate s1 values (mod BASE)
+
+```c
+#define BASE 65521U // Largest prime less than 65536
+#define NMAX 5552 // Largest n where 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32 - 1
+```
+
+The `NMAX` constant determines how many bytes can be accumulated before a
+modular reduction is required to prevent 32-bit overflow.
+
+### Scalar Implementation
+
+From `adler32.c`:
+
+```c
+Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
+ uint32_t sum2 = (adler >> 16) & 0xffff;
+ adler &= 0xffff;
+
+ if (len == 1) {
+ adler += buf[0];
+ if (adler >= BASE) adler -= BASE;
+ sum2 += adler;
+ if (sum2 >= BASE) sum2 -= BASE;
+ return adler | (sum2 << 16);
+ }
+
+ // Split into NMAX-sized blocks
+ while (len >= NMAX) {
+ len -= NMAX;
+ unsigned n = NMAX / 16;
+ do {
+ // Unrolled: 16 ADLER_DO per iteration
+ ADLER_DO16(buf);
+ buf += 16;
+ } while (--n);
+ MOD(adler); // adler %= BASE
+ MOD(sum2);
+ }
+
+ // Process remaining bytes
+ while (len >= 16) {
+ len -= 16;
+ ADLER_DO16(buf);
+ buf += 16;
+ }
+ while (len--) {
+ adler += *buf++;
+ sum2 += adler;
+ }
+ MOD(adler);
+ MOD(sum2);
+ return adler | (sum2 << 16);
+}
+```
+
+### Accumulation Macros
+
+From `adler32_p.h`:
+
+```c
+#define ADLER_DO1(buf) { adler += *(buf); sum2 += adler; }
+#define ADLER_DO2(buf) ADLER_DO1(buf); ADLER_DO1(buf + 1)
+#define ADLER_DO4(buf) ADLER_DO2(buf); ADLER_DO2(buf + 2)
+#define ADLER_DO8(buf) ADLER_DO4(buf); ADLER_DO4(buf + 4)
+#define ADLER_DO16(buf) ADLER_DO8(buf); ADLER_DO8(buf + 8)
+```
+
+### Modular Reduction
+
+```c
+#define MOD(a) a %= BASE
+#define MOD4(a) a %= BASE
+```
+
+The straightforward modulo works well because BASE is prime. On architectures
+where division is expensive, Adler-32 can alternatively be reduced by
+subtracting BASE in a loop.
+
+### Combining Adler-32 Checksums
+
+`adler32_combine_()` merges two Adler-32 checksums from adjacent data
+segments without accessing the original data:
+
+```c
+static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
+ uint32_t sum1, sum2;
+ unsigned rem;
+
+ // modular arithmetic to combine:
+ // s1_combined = (s1_a + s1_b - 1) % BASE
+ // s2_combined = (s2_a + s2_b + s1_a * len2 - len2) % BASE
+ rem = (unsigned)(len2 % BASE);
+ sum1 = adler1 & 0xffff;
+ sum2 = rem * sum1;
+ MOD(sum2);
+ sum1 += (adler2 & 0xffff) + BASE - 1;
+ sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
+ if (sum1 >= BASE) sum1 -= BASE;
+ if (sum1 >= BASE) sum1 -= BASE;
+ if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
+ if (sum2 >= BASE) sum2 -= BASE;
+ return sum1 | (sum2 << 16);
+}
+```
+
+### SIMD Implementations
+
+SIMD Adler-32 uses parallel accumulation with dot products:
+
+**AVX2** (`arch/x86/adler32_avx2.c`):
+```c
+Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len) {
+ static const uint8_t dot2v[] = {32,31,30,...,1}; // Position weights
+ static const uint8_t dot3v[] = {32,32,32,...,32}; // Sum1 weight (all ones)
+ __m256i vbuf, vs1, vs2, vs1_0, vs3;
+
+ vs1 = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, adler & 0xffff);
+ vs2 = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, adler >> 16);
+ vs1_0 = vs1;
+
+ while (len >= 32) {
+ vs1_0 = vs1;
+ // Load 32 bytes
+ vbuf = _mm256_loadu_si256((__m256i*)buf);
+ // sum1 += bytes[0..31]
+ vs1 = _mm256_add_epi32(vs1, _mm256_sad_epu8(vbuf, _mm256_setzero_si256()));
+ // vs3 = dot product: sum2 += (32-i)*byte[i]
+ vs3 = _mm256_maddubs_epi16(vbuf, vdot2v);
+ // ... accumulate vs2
+ vs2 = _mm256_add_epi32(vs2, _mm256_madd_epi16(vs3, vones));
+ // vs2 += 32 * previous_vs1
+ vs2 = _mm256_add_epi32(vs2, _mm256_slli_epi32(vs1_0, 5));
+ buf += 32;
+ len -= 32;
+ }
+ // Horizontal reduction and modular reduction
+ ...
+}
+```
+
+The key insight: Instead of computing `sum2 += s1_n` for each byte n
+individually, SIMD computes `sum2 += k * byte[i]` via `_mm256_maddubs_epi16()`
+where k represents the positional weight.
+
+**Available SIMD variants**:
+
+| Architecture | Implementation | Vector Width |
+|---|---|---|
+| x86 SSE4.1 | `adler32_sse41.c` | 128-bit |
+| x86 SSSE3 | `adler32_ssse3.c` | 128-bit |
+| x86 AVX2 | `adler32_avx2.c` | 256-bit |
+| x86 AVX-512 | `adler32_avx512.c` | 512-bit |
+| x86 AVX-512+VNNI | `adler32_avx512_vnni.c` | 512-bit |
+| ARM NEON | `adler32_neon.c` | 128-bit |
+| Power VMX (Altivec) | `adler32_vmx.c` | 128-bit |
+| Power8 | `adler32_power8.c` | 128-bit |
+| RISC-V RVV | `adler32_rvv.c` | Scalable |
+| LoongArch LASX | `adler32_lasx.c` | 256-bit |
+
+### Adler-32 with Copy
+
+`adler32_copy()` computes Adler-32 while simultaneously copying data,
+fusing two memory passes into one:
+
+```c
+typedef uint32_t (*adler32_copy_func)(uint32_t adler, uint8_t *dst,
+ const uint8_t *src, size_t len);
+```
+
+This is used during inflate to compute the checksum while copying
+decompressed data to the output buffer.
+
+---
+
+## CRC-32
+
+### Algorithm
+
+CRC-32 uses the standard polynomial 0xEDB88320 (reflected form):
+
+```c
+#define POLY 0xedb88320 // CRC-32 polynomial (reversed)
+```
+
+### Braided CRC-32
+
+The default software implementation uses a "braided" algorithm that
+processes multiple bytes per step using interleaved CRC tables:
+
+```c
+#define BRAID_N 5 // Number of interleaved CRC computations
+#define BRAID_W 8 // Bytes per word (8 for 64-bit, 4 for 32-bit)
+```
+
+From `crc32_braid_p.h`, the braided approach processes 5 words (40 bytes
+on 64-bit) per iteration:
+
+```c
+// Braided CRC processing (conceptual)
+// Process BRAID_N words at a time:
+z_word_t braids[BRAID_N];
+
+// Load BRAID_N words from input
+for (int k = 0; k < BRAID_N; k++)
+ braids[k] = *(z_word_t *)(buf + k * BRAID_W);
+
+// For each word, XOR with running CRC then look up table
+for (int k = 0; k < BRAID_N; k++) {
+ z_word_t word = braids[k];
+ // CRC-fold using braid tables:
+ // crc = crc_braid_table[N-1-k][byte0] ^ ... ^ crc_braid_table[0][byteN-1]
+}
+```
+
+The braid tables are generated at compile time by `crc32_braid_tbl.h`.
+
+### Chorba CRC-32
+
+A newer CRC-32 algorithm using a "Chorba" reduction technique for
+even faster software CRC computation. Selected when size >= 256 bytes:
+
+```c
+Z_INTERNAL uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, size_t len) {
+ // Short paths for small inputs
+ if (len < 64) {
+ return crc32_small(crc, buf, len);
+ }
+ // For lengths >= threshold, use Chorba
+ if (len >= 256) {
+ return crc32_chorba(crc, buf, len);
+ }
+ // Otherwise use braided
+ ...
+}
+```
+
+### SIMD CRC-32 Implementations
+
+Hardware-accelerated CRC-32 is available on these architectures:
+
+| Architecture | Instruction | File |
+|---|---|---|
+| x86 (PCLMULQDQ) | Carry-less multiply | `crc32_pclmulqdq.c` |
+| x86 (VPCLMULQDQ) | AVX-512 carry-less multiply | `crc32_vpclmulqdq.c` |
+| ARM (CRC32) | CRC32W/CRC32B instructions | `crc32_acle.c` |
+| ARM (PMULL) | Polynomial multiply long | `crc32_pmull.c` |
+| Power8 | Vector carry-less multiply | `crc32_power8.c` |
+| s390 (CRC32) | DFLTCC or hardware CRC | `crc32_vx.c` |
+| RISC-V | Zbc carry-less multiply | `crc32_rvv.c` |
+
+**x86 PCLMULQDQ** (`arch/x86/crc32_pclmulqdq.c`):
+Uses Barrett reduction via carry-less multiplication to fold 64 bytes at
+a time:
+
+```c
+Z_INTERNAL uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len) {
+ __m128i xmm_crc0, xmm_crc1, xmm_crc2, xmm_crc3;
+ __m128i xmm_fold4; // Fold constant
+
+ // Initialize with CRC and first 64 bytes
+ xmm_crc0 = _mm_loadu_si128((__m128i *)buf);
+ xmm_crc0 = _mm_xor_si128(xmm_crc0, _mm_cvtsi32_si128(crc32));
+ // ... load crc1, crc2, crc3
+
+ // Main fold loop: process 64 bytes per iteration
+ while (len >= 64) {
+ // Fold: crc_n = pclmulqdq(crc_n, fold_constant) ^ next_data
+ xmm_crc0 = _mm_xor_si128(
+ _mm_clmulepi64_si128(xmm_crc0, xmm_fold4, 0x01),
+ _mm_clmulepi64_si128(xmm_crc0, xmm_fold4, 0x10));
+ xmm_crc0 = _mm_xor_si128(xmm_crc0, _mm_loadu_si128(next++));
+ // Repeat for crc1..crc3
+ }
+
+ // Final reduction to 32-bit CRC
+ // Barrett reduction using mu and polynomial constants
+}
+```
+
+This processes data at ~16 bytes/cycle on modern x86 hardware.
+
+### CRC-32 with Copy
+
+Like Adler-32, CRC-32 has a combined compute-and-copy variant:
+
+```c
+typedef uint32_t (*crc32_copy_func)(uint32_t crc, uint8_t *dst,
+ const uint8_t *src, size_t len);
+```
+
+This fuses the CRC computation with the `memcpy`, utilising cache lines
+loaded for copying to also feed the CRC calculation.
+
+### Combining CRC-32 Values
+
+```c
+uint32_t crc32_combine(uint32_t crc1, uint32_t crc2, z_off_t len2);
+uint32_t crc32_combine_gen(z_off_t len2);
+uint32_t crc32_combine_op(uint32_t crc1, uint32_t crc2, uint32_t op);
+```
+
+Two-phase combine enables pre-computing the combination operator for a
+known second-segment length, then applying it to multiple CRC pairs.
+
+---
+
+## Dispatch via `functable`
+
+Checksum functions are dispatched through the `functable_s` structure:
+
+```c
+struct functable_s {
+ adler32_func adler32;
+ adler32_copy_func adler32_copy;
+ compare256_func compare256;
+ crc32_func crc32;
+ crc32_copy_func crc32_copy;
+ // ... other function pointers
+};
+```
+
+`functable.c` selects the best implementation at runtime:
+
+```c
+// x86 dispatch cascade for adler32:
+#ifdef X86_SSE42
+ if (cf.x86.has_sse42)
+ functable.adler32 = adler32_sse42;
+#endif
+#ifdef X86_AVX2
+ if (cf.x86.has_avx2)
+ functable.adler32 = adler32_avx2;
+#endif
+#ifdef X86_AVX512
+ if (cf.x86.has_avx512)
+ functable.adler32 = adler32_avx512;
+#endif
+#ifdef X86_AVX512VNNI
+ if (cf.x86.has_avx512vnni)
+ functable.adler32 = adler32_avx512_vnni;
+#endif
+```
+
+Each architecture-specific source file is compiled separately with its
+required SIMD flags (e.g., `-mavx2`, `-mpclmul`).
+
+---
+
+## Function Table API
+
+### Public API
+
+```c
+uint32_t PREFIX(adler32)(uint32_t adler, const uint8_t *buf, uint32_t len);
+uint32_t PREFIX(crc32)(uint32_t crc, const uint8_t *buf, uint32_t len);
+```
+
+For zlib compatibility, `adler32_z()` and `crc32_z()` accept `size_t` length:
+
+```c
+uint32_t PREFIX(adler32_z)(uint32_t adler, const uint8_t *buf, size_t len);
+uint32_t PREFIX(crc32_z)(uint32_t crc, const uint8_t *buf, size_t len);
+```
+
+### Initial Values
+
+- Adler-32: `adler32(0, NULL, 0)` returns `1` (initial value)
+- CRC-32: `crc32(0, NULL, 0)` returns `0` (initial value)
+
+### Typical Usage
+
+```c
+uint32_t checksum = PREFIX(adler32)(0L, Z_NULL, 0);
+checksum = PREFIX(adler32)(checksum, data, data_len);
+// checksum now holds the Adler-32 of data[0..data_len-1]
+```
+
+---
+
+## Performance Characteristics
+
+### Adler-32
+
+| Implementation | Throughput (approximate) |
+|---|---|
+| Scalar C | ~1 byte/cycle |
+| SSE4.1 | ~8 bytes/cycle |
+| AVX2 | ~16 bytes/cycle |
+| AVX-512+VNNI | ~32 bytes/cycle |
+| ARM NEON | ~8 bytes/cycle |
+
+### CRC-32
+
+| Implementation | Throughput (approximate) |
+|---|---|
+| Braided (scalar) | ~4 bytes/cycle |
+| PCLMULQDQ | ~16 bytes/cycle |
+| VPCLMULQDQ (AVX-512) | ~64 bytes/cycle |
+| ARM CRC32 | ~4 bytes/cycle |
+| ARM PMULL | ~16 bytes/cycle |
+
+CRC-32 is computationally heavier than Adler-32, but hardware acceleration
+closes the gap significantly.
+
+---
+
+## Checksum in the Compression Pipeline
+
+### During Deflate
+
+In `deflate.c`, checksums are computed on the input data:
+
+```c
+if (s->wrap == 2) {
+ // gzip: CRC-32
+ strm->adler = FUNCTABLE_CALL(crc32)(strm->adler, strm->next_in, strm->avail_in);
+} else if (s->wrap == 1) {
+ // zlib: Adler-32
+ strm->adler = FUNCTABLE_CALL(adler32)(strm->adler, strm->next_in, strm->avail_in);
+}
+```
+
+### During Inflate
+
+In `inflate.c`, checksums are computed on the output data:
+
+```c
+static inline void inf_chksum(PREFIX3(stream) *strm, const uint8_t *buf, uint32_t len) {
+ struct inflate_state *state = (struct inflate_state *)strm->state;
+ if (state->flags)
+ strm->adler = state->check = FUNCTABLE_CALL(crc32)(state->check, buf, len);
+ else
+ strm->adler = state->check = FUNCTABLE_CALL(adler32)(state->check, buf, len);
+}
+```
+
+The `_copy` variants (`inf_chksum_cpy`) are preferred when data is being
+both checksummed and copied, as they fuse the two operations.
diff --git a/docs/handbook/neozip/code-style.md b/docs/handbook/neozip/code-style.md
new file mode 100644
index 0000000000..680013d1d9
--- /dev/null
+++ b/docs/handbook/neozip/code-style.md
@@ -0,0 +1,259 @@
+# Code Style
+
+## Overview
+
+Neozip follows conventions derived from zlib-ng and the broader zlib
+ecosystem. This document describes naming patterns, macro usage,
+compiler annotations, and structural conventions observed in the
+codebase.
+
+---
+
+## Naming Conventions
+
+### Functions
+
+| Scope | Convention | Example |
+|---|---|---|
+| Public API | `PREFIX(name)` | `PREFIX(deflateInit2)` → `deflateInit2` or `zng_deflateInit2` |
+| Internal | `Z_INTERNAL` linkage | `Z_INTERNAL uint32_t adler32_c(...)` |
+| Exported | `Z_EXPORT` linkage | `int32_t Z_EXPORT PREFIX(deflate)(...)` |
+| Architecture | `name_arch` | `adler32_avx2`, `crc32_neon` |
+| Template | `name_tpl` | `longest_match_tpl`, `inflate_fast_tpl` |
+
+### Macros
+
+| Pattern | Usage | Example |
+|---|---|---|
+| `PREFIX(name)` | Public symbol namespacing | `PREFIX(inflate)` |
+| `PREFIX3(name)` | Type namespacing | `PREFIX3(stream)` → `z_stream` or `zng_stream` |
+| `WITH_*` | CMake build options | `WITH_AVX2`, `WITH_GZFILEOP` |
+| `X86_*` / `ARM_*` | Architecture feature guards | `X86_AVX2`, `ARM_NEON` |
+| `Z_*` | Public constants | `Z_OK`, `Z_DEFLATED`, `Z_DEFAULT_STRATEGY` |
+| `*_H` | Include guards | `DEFLATE_H`, `INFLATE_H` |
+
+### Types
+
+| Type | Definition |
+|---|---|
+| `Pos` | `uint16_t` — hash chain position |
+| `IPos` | `uint32_t` — index position |
+| `z_word_t` | `uint64_t` (64-bit) or `uint32_t` (32-bit) |
+| `block_state` | `enum { need_more, block_done, finish_started, finish_done }` |
+| `code` | Struct: `{ bits, op, val }` for inflate Huffman tables |
+| `ct_data` | Union: `{ freq/code, dad/len }` for deflate Huffman trees |
+
+---
+
+## Visibility Annotations
+
+```c
+#define Z_INTERNAL // Internal linkage (hidden from shared library API)
+#define Z_EXPORT // Public API export (__attribute__((visibility("default"))))
+```
+
+All internal helper functions use `Z_INTERNAL`. All public API functions
+use `Z_EXPORT`.
+
+---
+
+## Compiler Annotations
+
+From `zbuild.h`:
+
+```c
+// Force inlining
+#define Z_FORCEINLINE __attribute__((always_inline)) inline
+
+// Compiler hints
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+// Fallthrough annotation for switch
+#define Z_FALLTHROUGH __attribute__((fallthrough))
+
+// Target-specific compilation
+#define Z_TARGET(x) __attribute__((target(x)))
+
+// Alignment
+#define ALIGNED_(n) __attribute__((aligned(n)))
+
+// Restrict
+#define Z_RESTRICT __restrict__
+```
+
+### Target Attributes vs. Compile Flags
+
+Individual SIMD functions can use `Z_TARGET` instead of file-level flags:
+
+```c
+Z_TARGET("avx2")
+Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len) {
+ // AVX2 intrinsics here
+}
+```
+
+However, neozip typically uses per-file compile flags set via CMake
+`set_property(SOURCE ... COMPILE_OPTIONS ...)`.
+
+---
+
+## Include Patterns
+
+### Public Headers
+
+```
+zlib.h — Main public API (generated from zlib.h.in)
+zconf.h — Configuration (generated)
+zlib_name_mangling.h — Symbol renaming
+```
+
+### Internal Headers
+
+```
+zbuild.h — Build macros, compiler abstractions
+zutil.h — Internal utility macros and constants
+deflate.h — deflate_state, internal types
+inflate.h — inflate_state, inflate_mode
+functable.h — Function dispatch table
+cpu_features.h — CPU detection interface
+deflate_p.h — Private deflate helpers
+adler32_p.h — Private Adler-32 helpers
+crc32_braid_p.h — Private CRC-32 braided implementation
+trees_emit.h — Bit emission helpers
+match_tpl.h — Template for longest_match
+insert_string_tpl.h — Template for hash insertion
+inffast_tpl.h — Template for inflate_fast
+```
+
+### Include Order
+
+Source files typically include:
+```c
+#include "zbuild.h" // Always first (defines PREFIX, types)
+#include "deflate.h" // or "inflate.h"
+#include "functable.h" // If dispatching
+#include "<arch>_features.h" // If architecture-specific
+```
+
+---
+
+## Template Pattern
+
+Several functions use a "template" pattern via preprocessor includes:
+
+```c
+// match_tpl.h — template for longest_match
+// Defines LONGEST_MATCH, COMPARE256, INSERT_STRING via macros
+// Then included by each architecture variant:
+
+// In longest_match_sse2.c:
+#define LONGEST_MATCH longest_match_sse2
+#define COMPARE256 compare256_sse2
+#include "match_tpl.h"
+```
+
+This avoids code duplication while allowing architecture-specific
+function names and intrinsics.
+
+---
+
+## Struct Alignment
+
+Performance-critical structures use `ALIGNED_(64)` for cache-line
+alignment:
+
+```c
+struct ALIGNED_(64) internal_state { ... }; // deflate_state
+struct ALIGNED_(64) inflate_state { ... };
+```
+
+This prevents false sharing and ensures SIMD-friendly alignment.
+
+---
+
+## Conditional Compilation
+
+Architecture and feature guards follow a consistent pattern:
+
+```c
+#ifdef X86_AVX2
+ if (cf.x86.has_avx2) {
+ functable.adler32 = adler32_avx2;
+ }
+#endif
+```
+
+The `#ifdef` tests compile-time availability (was the source included
+in the build?). The runtime `if` tests CPU capability.
+
+### Build Option Guards
+
+```c
+#ifdef WITH_GZFILEOP // Gzip file operations
+#ifdef ZLIB_COMPAT // zlib-compatible API
+#ifndef Z_SOLO // Not a standalone/embedded build
+#ifdef HAVE_BUILTIN_CTZ // Compiler has __builtin_ctz
+```
+
+---
+
+## Error Handling Style
+
+Functions return `int` error codes from the `Z_*` set:
+
+```c
+if (strm == NULL || strm->state == NULL)
+ return Z_STREAM_ERROR;
+```
+
+Internal functions that cannot fail return `void` or the computed value
+directly. Error strings are set via `strm->msg`:
+
+```c
+strm->msg = (char *)"incorrect header check";
+state->mode = BAD;
+```
+
+---
+
+## Memory Management
+
+All dynamically allocated memory goes through user-provided allocators:
+
+```c
+void *zalloc(void *opaque, unsigned items, unsigned size);
+void zfree(void *opaque, void *address);
+```
+
+If `strm->zalloc` and `strm->zfree` are `Z_NULL`, default `malloc`/`free`
+wrappers are used. The single-allocation strategy (`alloc_deflate` /
+`alloc_inflate`) minimises the number of allocator calls.
+
+---
+
+## Formatting
+
+- **Indentation**: 4 spaces (no tabs in main source)
+- **Braces**: K&R style (opening brace on same line)
+- **Line length**: ~80–100 characters preferred
+- **Comments**: C-style `/* */` for multi-line, `//` for inline
+- **Pointer declarations**: `type *name` (space before `*`)
+
+```c
+static void pqdownheap(deflate_state *s, ct_data *tree, int k) {
+ int v = s->heap[k];
+ int j = k << 1;
+ while (j <= s->heap_len) {
+ if (j < s->heap_len &&
+ SMALLER(tree, s->heap[j+1], s->heap[j], s->depth))
+ j++;
+ if (SMALLER(tree, v, s->heap[j], s->depth))
+ break;
+ s->heap[k] = s->heap[j];
+ k = j;
+ j <<= 1;
+ }
+ s->heap[k] = v;
+}
+```
diff --git a/docs/handbook/neozip/deflate-algorithms.md b/docs/handbook/neozip/deflate-algorithms.md
new file mode 100644
index 0000000000..4d3eeb03c9
--- /dev/null
+++ b/docs/handbook/neozip/deflate-algorithms.md
@@ -0,0 +1,797 @@
+# Deflate Algorithms
+
+## Overview
+
+The DEFLATE algorithm (RFC 1951) combines **LZ77** sliding-window compression
+with **Huffman coding**. Neozip implements DEFLATE through a modular strategy
+system where each compression level maps to a specific strategy function with
+tuned parameters. This document covers every aspect of the compression
+pipeline: hash chains, match finding, lazy evaluation, and the strategy
+functions.
+
+---
+
+## The DEFLATE Compression Pipeline
+
+At a high level, DEFLATE works as follows:
+
+1. **Sliding window**: Input is processed through a 32KB (configurable)
+ sliding window. Each position is hashed and inserted into a hash table.
+2. **LZ77 match finding**: For each position, the hash table is consulted to
+ find previous occurrences of the same byte sequence. The longest match
+ within the window distance is selected.
+3. **Symbol emission**: Either a **literal** byte (no match found) or a
+ **length/distance pair** (match found) is recorded.
+4. **Huffman coding**: Accumulated symbols are encoded with Huffman codes
+ (static, dynamic, or the block is stored uncompressed) and output as a
+ DEFLATE block.
+
+```
+Input bytes
+ │
+ ▼
+┌──────────┐ ┌──────────────┐ ┌──────────────┐ ┌────────────┐
+│ Sliding │ ──▶ │ Hash Insert │ ──▶ │ Match Finding│ ──▶ │ Symbol │
+│ Window │ │ (head/prev) │ │ (longest_ │ │ Buffer │
+│ (32K×2) │ │ │ │ match) │ │ (sym_buf) │
+└──────────┘ └──────────────┘ └──────────────┘ └─────┬──────┘
+ │
+ ▼
+ ┌────────────┐
+ │ Huffman │
+ │ Encoding │
+ │ (trees.c) │
+ └─────┬──────┘
+ │
+ ▼
+ Compressed
+ Output
+```
+
+---
+
+## The Sliding Window
+
+The sliding window is a byte array of size `2 * w_size`, where `w_size`
+defaults to 32768 (32KB, corresponding to `windowBits = 15`):
+
+```c
+unsigned char *window; // Size: 2 * w_size bytes
+unsigned int window_size; // = 2 * w_size
+unsigned int w_size; // = 1 << windowBits (default 32768)
+```
+
+Input is read into the **upper half** of the window (positions `w_size` to
+`2*w_size - 1`). When this half fills up, the lower half is discarded, the
+upper half is moved to the lower half (`memcpy` or `memmove`), and new input
+fills the upper half again. This is the "slide" operation.
+
+### Window Sliding
+
+The `fill_window()` function in `deflate.c` performs the slide:
+
+1. If more data is needed and `strstart >= w_size + MAX_DIST(s)`:
+ - Copy bytes `[w_size, 2*w_size)` to `[0, w_size)`
+ - Adjust `match_start`, `strstart`, `block_start` by `-w_size`
+ - Call `FUNCTABLE_CALL(slide_hash)(s)` to update hash table entries
+2. Read new input from `strm->next_in` into the free space
+3. Maintain the `high_water` mark for memory-check safety
+
+### `slide_hash()`
+
+When the window slides, all hash table entries (`head[]` and `prev[]`) must be
+decremented by `w_size`. Entries that would become negative (pointing before
+the new window start) are set to zero.
+
+The generic C implementation:
+```c
+void slide_hash_c(deflate_state *s) {
+ Pos *p;
+ unsigned n = HASH_SIZE;
+ p = &s->head[n];
+ do {
+ unsigned m = *--p;
+ *p = (Pos)(m >= w_size ? m - w_size : 0);
+ } while (--n);
+ // Same for prev[]
+}
+```
+
+SIMD implementations process 8 or 16 entries at a time using vector
+subtraction and saturation.
+
+---
+
+## Hash Table Structure
+
+The hash table maps byte sequences to window positions for fast match lookup.
+It consists of two arrays:
+
+### `head[HASH_SIZE]`
+
+An array of `Pos` (uint16_t) values, indexed by hash value. Each entry
+contains the most recent window position that hashed to that index.
+
+```c
+#define HASH_BITS 16u
+#define HASH_SIZE 65536u // 2^16
+#define HASH_MASK (HASH_SIZE - 1u)
+
+Pos *head; // head[HASH_SIZE]: most recent position for each hash
+```
+
+Note: zlib-ng uses a 16-bit hash (65536 entries) compared to original zlib's
+15-bit hash (32768 entries), providing better distribution and fewer collisions.
+
+### `prev[w_size]`
+
+An array maintaining the hash **chain** — a linked list of all positions
+that share the same hash value:
+
+```c
+Pos *prev; // prev[w_size]: chain links, indexed by (position & w_mask)
+```
+
+When a new position `str` is inserted for hash value `h`:
+```c
+prev[str & w_mask] = head[h]; // Link to previous chain head
+head[h] = str; // New chain head
+```
+
+Walking the chain from `head[h]` through `prev[]` yields all previous
+positions with the same hash, in reverse chronological order.
+
+---
+
+## Hash Function
+
+Neozip hashes 4 bytes at the current position (compared to zlib's 3 bytes).
+The hash function is defined in `insert_string_tpl.h`:
+
+```c
+Z_FORCEINLINE static uint32_t UPDATE_HASH(uint32_t h, uint32_t val) {
+ HASH_CALC(h, val); // Architecture-specific hash computation
+ return h & HASH_CALC_MASK; // Mask to HASH_SIZE
+}
+```
+
+The `HASH_CALC` macro uses a fast multiplicative hash or CRC-based hash
+depending on the configuration. Reading 4 bytes at once (`zng_memread_4`)
+provides better hash distribution than 3-byte hashing.
+
+### Hash Insert Operations
+
+Several insert variants exist:
+
+#### `quick_insert_value()`
+
+Used by `deflate_fast` and `deflate_quick`. Inserts a single position
+with a pre-read 4-byte value:
+
+```c
+Z_FORCEINLINE static uint32_t QUICK_INSERT_VALUE(
+ deflate_state *const s, uint32_t str, uint32_t val) {
+ uint32_t hm, head;
+ HASH_CALC_VAR_INIT;
+ HASH_CALC(HASH_CALC_VAR, val);
+ HASH_CALC_VAR &= HASH_CALC_MASK;
+ hm = HASH_CALC_VAR;
+ head = s->head[hm];
+ if (LIKELY(head != str)) {
+ s->prev[str & W_MASK(s)] = (Pos)head;
+ s->head[hm] = (Pos)str;
+ }
+ return head;
+}
+```
+
+#### `quick_insert_string()`
+
+Like `quick_insert_value()` but reads the 4 bytes itself:
+
+```c
+Z_FORCEINLINE static uint32_t QUICK_INSERT_STRING(
+ deflate_state *const s, uint32_t str) {
+ uint8_t *strstart = s->window + str + HASH_CALC_OFFSET;
+ uint32_t val, hm, head;
+ HASH_CALC_VAR_INIT;
+ HASH_CALC_READ; // val = Z_U32_FROM_LE(zng_memread_4(strstart))
+ HASH_CALC(HASH_CALC_VAR, val);
+ // ... insert ...
+ return head;
+}
+```
+
+#### `insert_string()`
+
+Batch insert. Inserts `count` consecutive positions, used after a match
+to insert all strings within the matched region:
+
+```c
+void insert_string(deflate_state *const s, uint32_t str, uint32_t count);
+```
+
+#### `insert_string_roll()` / `quick_insert_string_roll()`
+
+Rolling hash insert for level 9 (`deflate_slow` with `LONGEST_MATCH_SLOW`).
+Uses a different hash update that considers the full string context for
+better match quality at the cost of speed.
+
+---
+
+## Match Finding
+
+### `longest_match()` (Standard)
+
+Defined via the `match_tpl.h` template. This is the hot inner loop of
+compression.
+
+**Algorithm**:
+
+1. Set `best_len` to `prev_length` (or `STD_MIN_MATCH - 1`)
+2. Read the first 8 bytes at `scan` and at `scan + offset` for fast
+ comparison (where `offset = best_len - 1`, adjusted for word boundaries)
+3. Set `limit` to prevent matches beyond `MAX_DIST(s)`
+4. If `best_len >= good_match`, halve `chain_length` to speed up
+5. Walk the hash chain via `prev[]`:
+ a. For each candidate `cur_match`:
+ - Quick reject: compare 8 bytes at match end position (`mbase_end + cur_match`)
+ against `scan_end`
+ - Quick reject: compare 8 bytes at match start against `scan_start`
+ - If both match: call `compare256()` for exact length
+ - If new length > `best_len`: update `best_len` and `match_start`
+ - If `best_len >= nice_match`: return immediately
+ - Update `scan_end` for the new best length
+ b. Follow `prev[cur_match & wmask]` to next chain entry
+ c. Stop when `chain_length` exhausted or `cur_match <= limit`
+
+```c
+Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
+ // Early exit: if chain becomes too deep for poor matches
+ int32_t early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
+
+ while (--chain_length) {
+ match = mbase_start + cur_match;
+ if (zng_memread_8(mbase_end + cur_match) == scan_end &&
+ zng_memread_8(match) == scan_start) {
+ len = FUNCTABLE_CALL(compare256)(scan + 2, match + 2) + 2;
+ if (len > best_len) {
+ s->match_start = cur_match;
+ best_len = len;
+ if (best_len >= nice_match) return best_len;
+ offset = best_len - 1;
+ // Update scan_end
+ }
+ }
+ cur_match = prev[cur_match & wmask];
+ if (cur_match <= limit) return best_len;
+ }
+}
+```
+
+**Key parameters** that control match-finding behaviour:
+
+| Parameter | Field | Effect |
+|---|---|---|
+| `max_chain_length` | `s->max_chain_length` | Maximum hash chain entries to check |
+| `good_match` | `s->good_match` | Halve chain search above this best length |
+| `nice_match` | `s->nice_match` | Stop searching above this length |
+| `max_lazy_match` | `s->max_lazy_match` | Don't bother with lazy match above this |
+
+### `longest_match_slow()` (Level 9)
+
+The slow variant (`LONGEST_MATCH_SLOW`) adds chain re-rooting for better
+match quality:
+
+1. When continuing a lazy evaluation search, it doesn't just follow the
+ chain from the hash of the current position
+2. Instead, it finds the most distant chain starting from positions
+ `scan[1], scan[2], ..., scan[best_len]` using `update_hash_roll()`
+3. This effectively searches multiple hash chains to find matches that
+ the standard algorithm would miss
+
+```c
+// Re-root: find a more distant chain start
+hash = update_hash_roll(0, scan[1]);
+hash = update_hash_roll(hash, scan[2]);
+for (uint32_t i = 3; i <= best_len; i++) {
+ hash = update_hash_roll(hash, scan[i]);
+ pos = s->head[hash];
+ if (pos < cur_match) {
+ cur_match = pos; // Found a more distant starting point
+ }
+}
+```
+
+### `compare256()`
+
+Compares up to 256 bytes between two pointers, returning the number of
+matching bytes. Architecture-specific implementations:
+
+| Implementation | File | Method |
+|---|---|---|
+| `compare256_c` | `arch/generic/compare256_c.c` | 8-byte word comparison |
+| `compare256_sse2` | `arch/x86/compare256_sse2.c` | 16-byte SSE2 comparison |
+| `compare256_avx2` | `arch/x86/compare256_avx2.c` | 32-byte AVX2 comparison |
+| `compare256_avx512` | `arch/x86/compare256_avx512.c` | 64-byte AVX-512 comparison |
+| `compare256_neon` | `arch/arm/compare256_neon.c` | 16-byte NEON comparison |
+
+The compare function uses `FUNCTABLE_CALL(compare256)` for dispatch.
+
+---
+
+## Configuration Table
+
+Each compression level has a set of tuning parameters defined in
+`deflate.c`:
+
+```c
+typedef struct config_s {
+ uint16_t good_length; // Reduce lazy search above this match length
+ uint16_t max_lazy; // Do not perform lazy search above this length
+ uint16_t nice_length; // Quit search above this match length
+ uint16_t max_chain; // Maximum hash chain length
+ compress_func func; // Strategy function pointer
+} config;
+```
+
+The full table:
+
+| Level | good | lazy | nice | chain | Strategy |
+|---|---|---|---|---|---|
+| 0 | 0 | 0 | 0 | 0 | `deflate_stored` |
+| 1 | 0 | 0 | 0 | 0 | `deflate_quick` |
+| 2 | 4 | 4 | 8 | 4 | `deflate_fast` |
+| 3 | 4 | 6 | 16 | 6 | `deflate_medium` |
+| 4 | 4 | 12 | 32 | 24 | `deflate_medium` |
+| 5 | 8 | 16 | 32 | 32 | `deflate_medium` |
+| 6 | 8 | 16 | 128 | 128 | `deflate_medium` |
+| 7 | 8 | 32 | 128 | 256 | `deflate_slow` |
+| 8 | 32 | 128 | 258 | 1024 | `deflate_slow` |
+| 9 | 32 | 258 | 258 | 4096 | `deflate_slow` |
+
+When `NO_QUICK_STRATEGY` is defined, level 1 uses `deflate_fast` and level 2
+shifts accordingly. When `NO_MEDIUM_STRATEGY` is defined, levels 3–6 use
+`deflate_fast` or `deflate_slow`.
+
+---
+
+## Strategy Functions in Detail
+
+### `deflate_stored` (Level 0)
+
+No compression. Input is emitted as stored blocks (type 0 in DEFLATE):
+
+- Each stored block has a 5-byte header: BFINAL (1 bit), BTYPE (2 bits = 00),
+ padding to byte boundary, LEN (16 bits), NLEN (16 bits, one's complement of LEN)
+- Maximum stored block length: 65535 bytes (`MAX_STORED`)
+- Directly copies from `next_in` to `next_out` when possible
+- Falls back to buffering through the window when direct copy isn't feasible
+
+```c
+Z_INTERNAL block_state deflate_stored(deflate_state *s, int flush) {
+ unsigned min_block = MIN(s->pending_buf_size - 5, w_size);
+ // ...
+ len = MAX_STORED;
+ have = (s->bi_valid + 42) >> 3; // Header overhead
+ // Copy blocks directly when possible
+}
+```
+
+### `deflate_quick` (Level 1)
+
+The fastest compression strategy, designed by Intel:
+
+- Uses **static Huffman trees** only (no dynamic tree construction)
+- Single-pass greedy matching with no lazy evaluation
+- Emits blocks via `zng_tr_emit_tree(s, STATIC_TREES, last)` and
+ `zng_tr_emit_end_block(s, static_ltree, last)`
+- Tracks block state: `block_open` = 0 (closed), 1 (open), 2 (open + last)
+- Flushes when `pending` approaches `pending_buf_size`
+
+```c
+Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
+ // Start static block
+ quick_start_block(s, last);
+
+ for (;;) {
+ // Flush if pending buffer nearly full
+ if (s->pending + ((BIT_BUF_SIZE + 7) >> 3) >= s->pending_buf_size) {
+ PREFIX(flush_pending)(s->strm);
+ // ...
+ }
+
+ // Hash insert
+ uint32_t hash_head = quick_insert_value(s, s->strstart, str_val);
+
+ // Try to find a match
+ if (dist <= MAX_DIST(s) && dist > 0) {
+ // Quick match comparison...
+ if (match_val == str_val) {
+ // Full match via longest_match or inline compare
+ zng_tr_emit_dist(s, static_ltree, static_dtree, ...);
+ }
+ }
+ // No match: emit literal
+ zng_tr_emit_lit(s, static_ltree, lc);
+ }
+}
+```
+
+### `deflate_fast` (Level 2, or 1–3 without quick)
+
+Greedy matching without lazy evaluation:
+
+1. `fill_window()` if `lookahead < MIN_LOOKAHEAD`
+2. Hash insert via `quick_insert_value()`
+3. If match found (within `MAX_DIST`, length ≥ `WANT_MIN_MATCH`):
+ - Record match via `zng_tr_tally_dist()`
+ - Insert all strings within the match region
+ - Advance `strstart` by `match_length`
+4. If no match:
+ - Record literal via `zng_tr_tally_lit()`
+ - Advance `strstart` by 1
+5. If tally function returns true (buffer full): flush block
+
+```c
+Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
+ for (;;) {
+ if (s->lookahead < MIN_LOOKAHEAD) {
+ PREFIX(fill_window)(s);
+ // ...
+ }
+ if (s->lookahead >= WANT_MIN_MATCH) {
+ uint32_t hash_head = quick_insert_value(s, s->strstart, str_val);
+ if (dist <= MAX_DIST(s) && dist > 0 && hash_head != 0) {
+ match_len = FUNCTABLE_CALL(longest_match)(s, hash_head);
+ }
+ }
+ if (match_len >= WANT_MIN_MATCH) {
+ bflush = zng_tr_tally_dist(s, s->strstart - s->match_start,
+ match_len - STD_MIN_MATCH);
+ s->lookahead -= match_len;
+ // Insert strings within match
+ s->strstart += match_len;
+ } else {
+ bflush = zng_tr_tally_lit(s, lc);
+ s->lookahead--;
+ s->strstart++;
+ }
+ if (UNLIKELY(bflush))
+ FLUSH_BLOCK(s, 0);
+ }
+}
+```
+
+### `deflate_medium` (Levels 3–6)
+
+Intel's balanced strategy that bridges fast and slow:
+
+Uses a `struct match` to track match attributes:
+```c
+struct match {
+ uint16_t match_start;
+ uint16_t match_length;
+ uint16_t strstart;
+ uint16_t orgstart;
+};
+```
+
+Key helper functions:
+- **`find_best_match()`** — Calls `longest_match()` and returns a `struct match`
+- **`emit_match()`** — Emits literals for short matches (< `WANT_MIN_MATCH`)
+ or a length/distance pair for longer ones
+- **`insert_match()`** — Inserts hash entries for matched positions
+
+The algorithm maintains a two-position lookahead:
+
+1. Find best match at current position
+2. Find best match at next position
+3. Compare: if next match is better by a meaningful margin, emit current
+ position as a literal and adopt the next match
+4. Otherwise, emit the current match
+
+```c
+Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
+ struct match current_match, next_match;
+
+ for (;;) {
+ current_match = find_best_match(s, hash_head);
+
+ if (current_match.match_length >= WANT_MIN_MATCH) {
+ // Check if next position has a better match
+ next_match = find_best_match(s, next_hash_head);
+ if (next_match.match_length > current_match.match_length + 1) {
+ // Skip current, use next
+ emit_match(s, literal_match);
+ insert_match(s, literal_match);
+ continue;
+ }
+ }
+ emit_match(s, current_match);
+ insert_match(s, current_match);
+ }
+}
+```
+
+### `deflate_slow` (Levels 7–9)
+
+Full lazy match evaluation — the traditional approach for maximum compression:
+
+1. Find longest match at current position
+2. **Do not emit it yet** — set `match_available = 1`
+3. Advance to next position, find another match
+4. If the new match is **not better** than the previous one:
+ - Emit the previous match (the "lazy" match)
+ - Insert all strings within the matched region
+ - Skip past the match
+5. If the new match **is better**:
+ - Emit the previous position as a literal
+ - Record the new match as `match_available`
+ - Continue from step 3
+
+```c
+Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
+ // Level ≥ 9: use slow match finder and rolling insert
+ if (level >= 9) {
+ longest_match = FUNCTABLE_FPTR(longest_match_slow);
+ insert_string_func = insert_string_roll;
+ }
+
+ for (;;) {
+ // Find match
+ if (dist <= MAX_DIST(s) && s->prev_length < s->max_lazy_match) {
+ match_len = longest_match(s, hash_head);
+ }
+
+ // Lazy evaluation
+ if (s->prev_length >= STD_MIN_MATCH && match_len <= s->prev_length) {
+ // Previous match was better — emit it
+ bflush = zng_tr_tally_dist(s, s->strstart - 1 - s->prev_match,
+ s->prev_length - STD_MIN_MATCH);
+ // Insert strings within match
+ s->prev_length -= 1;
+ s->lookahead -= s->prev_length;
+ s->strstart += s->prev_length;
+ s->prev_length = 0;
+ s->match_available = 0;
+ } else if (s->match_available) {
+ // Previous position has no good match — emit as literal
+ bflush = zng_tr_tally_lit(s, s->window[s->strstart - 1]);
+ } else {
+ s->match_available = 1;
+ }
+
+ s->prev_length = match_len;
+ s->prev_match = s->match_start;
+ }
+}
+```
+
+**Level 9 enhancements**:
+- Uses `longest_match_slow` which re-roots hash chains for deeper search
+- Uses `insert_string_roll` with a rolling hash for better distribution
+- `max_chain = 4096` provides the deepest chain traversal
+- `nice_match = 258` (maximum match length) means it never gives up early
+
+### `deflate_huff` (Z_HUFFMAN_ONLY)
+
+Huffman-only compression — every byte is emitted as a literal, no LZ77:
+
+```c
+Z_INTERNAL block_state deflate_huff(deflate_state *s, int flush) {
+ for (;;) {
+ if (s->lookahead == 0) {
+ PREFIX(fill_window)(s);
+ if (s->lookahead == 0) {
+ if (flush == Z_NO_FLUSH) return need_more;
+ break;
+ }
+ }
+ bflush = zng_tr_tally_lit(s, s->window[s->strstart]);
+ s->lookahead--;
+ s->strstart++;
+ if (bflush) FLUSH_BLOCK(s, 0);
+ }
+}
+```
+
+This forces construction of a Huffman tree that encodes only literal
+frequencies. Useful when the data is already compressed or random.
+
+### `deflate_rle` (Z_RLE)
+
+Run-length encoding — only finds runs of identical bytes (distance = 1):
+
+```c
+Z_INTERNAL block_state deflate_rle(deflate_state *s, int flush) {
+ for (;;) {
+ // Check for a run: scan[-1] == scan[0] == scan[1]
+ if (s->lookahead >= STD_MIN_MATCH && s->strstart > 0) {
+ scan = s->window + s->strstart - 1;
+ if (scan[0] == scan[1] && scan[1] == scan[2]) {
+ match_len = compare256_rle(scan, scan + 3) + 2;
+ match_len = MIN(match_len, STD_MAX_MATCH);
+ }
+ }
+
+ if (match_len >= STD_MIN_MATCH) {
+ bflush = zng_tr_tally_dist(s, 1, match_len - STD_MIN_MATCH);
+ } else {
+ bflush = zng_tr_tally_lit(s, s->window[s->strstart]);
+ }
+ }
+}
+```
+
+The `compare256_rle()` function is optimised for the RLE case where
+all bytes in the run are identical:
+
+```c
+// compare256_rle.h
+static inline uint32_t compare256_rle_64(const uint8_t *src0, const uint8_t *src1) {
+ // Read 8-byte words from src0, replicate src1[0] across 8 bytes
+ // XOR and count trailing zeros to find first difference
+}
+```
+
+---
+
+## Symbol Buffer
+
+Matches and literals are stored in a symbol buffer before being emitted:
+
+### Overlaid Format (`sym_buf`, default)
+
+Three bytes per symbol:
+- `sym_buf[i+0]`, `sym_buf[i+1]` — distance (0 for literal)
+- `sym_buf[i+2]` — literal byte or match length
+
+```c
+// zng_tr_tally_dist:
+zng_memwrite_4(&s->sym_buf[sym_next], Z_U32_TO_LE(dist | ((uint32_t)len << 16)));
+s->sym_next = sym_next + 3;
+
+// zng_tr_tally_lit:
+zng_memwrite_4(&s->sym_buf[sym_next], Z_U32_TO_LE((uint32_t)c << 16));
+s->sym_next = sym_next + 3;
+```
+
+### Separate Format (`LIT_MEM`)
+
+When `LIT_MEM` is defined (automatic when `OPTIMAL_CMP < 32`):
+```c
+uint16_t *d_buf; // Distance buffer
+unsigned char *l_buf; // Literal/length buffer
+
+// zng_tr_tally_dist:
+s->d_buf[sym_next] = (uint16_t)dist;
+s->l_buf[sym_next] = (uint8_t)len;
+s->sym_next = sym_next + 1;
+```
+
+This uses ~20% more memory but is 1–2% faster on platforms without fast
+unaligned access.
+
+The buffer size is `lit_bufsize` entries. When `sym_next` reaches `sym_end`,
+the block is flushed. The constant `LIT_BUFS` determines the buffer
+multiplier: 4 (overlaid) or 5 (separate).
+
+---
+
+## Block Flushing
+
+`zng_tr_flush_block()` in `trees.c` decides how to emit the accumulated
+symbols:
+
+1. **Compute tree statistics**: Build dynamic Huffman trees, compute
+ `opt_len` (dynamic tree bit cost) and `static_len` (static tree bit cost)
+2. **Compute stored cost**: Raw data length + 5 bytes overhead per block
+3. **Choose the best**:
+ - If stored cost ≤ `opt_len` and stored cost ≤ `static_len`: emit stored block
+ - Else if `static_len` ≤ `opt_len + 10`: emit with static trees
+ - Else: emit with dynamic trees
+
+```c
+void zng_tr_flush_block(deflate_state *s, char *buf, uint32_t stored_len, int last) {
+ build_tree(s, &s->l_desc);
+ build_tree(s, &s->d_desc);
+
+ if (stored_len + 4 <= opt_len && buf != NULL) {
+ // Stored block
+ } else if (s->strategy == Z_FIXED || static_len == opt_len) {
+ // Static trees
+ compress_block(s, static_ltree, static_dtree);
+ } else {
+ // Dynamic trees
+ send_all_trees(s, lcodes, dcodes, blcodes);
+ compress_block(s, s->dyn_ltree, s->dyn_dtree);
+ }
+ init_block(s); // Reset for next block
+}
+```
+
+---
+
+## Match Length Constraints
+
+| Constant | Value | Purpose |
+|---|---|---|
+| `STD_MIN_MATCH` | 3 | Minimum match length per DEFLATE spec |
+| `STD_MAX_MATCH` | 258 | Maximum match length per DEFLATE spec |
+| `WANT_MIN_MATCH` | 4 | Internal minimum for actual match output |
+| `MIN_LOOKAHEAD` | `STD_MAX_MATCH + WANT_MIN_MATCH + 1` | Minimum lookahead before refilling window |
+| `MAX_DIST(s)` | `s->w_size - MIN_LOOKAHEAD` | Maximum back-reference distance |
+
+`WANT_MIN_MATCH = 4` is a performance optimisation: 3-byte matches provide
+minimal compression benefit but cost significant CPU time to find. By
+requiring 4-byte matches, the hash function can read a full 32-bit word
+and the match finder can use 32-bit comparisons for faster rejection.
+
+---
+
+## Block Types in Deflate Output
+
+Every DEFLATE block starts with a 3-bit header:
+- Bit 0: `BFINAL` — 1 if this is the last block
+- Bits 1–2: `BTYPE` — block type (00=stored, 01=fixed, 10=dynamic, 11=reserved)
+
+### Stored Block (BTYPE=00)
+
+```
+BFINAL BTYPE pad LEN NLEN DATA
+ 1 00 0-7 16 16 LEN bytes
+```
+
+### Fixed Huffman Block (BTYPE=01)
+
+Uses predefined static trees (`static_ltree`, `static_dtree`). No tree
+data in the block — the decoder knows the fixed codes.
+
+### Dynamic Huffman Block (BTYPE=10)
+
+Contains the tree definition before the data:
+```
+BFINAL BTYPE HLIT HDIST HCLEN [Code lengths for code length alphabet]
+[Encoded literal/length code lengths] [Encoded distance code lengths]
+[Compressed data using the defined trees]
+```
+
+---
+
+## Flush Modes
+
+The `flush` parameter to `deflate()` controls output behaviour:
+
+| Value | Name | Effect |
+|---|---|---|
+| 0 | `Z_NO_FLUSH` | Normal operation; deflate decides when to emit output |
+| 1 | `Z_PARTIAL_FLUSH` | Flush output, emit empty fixed code block (10 bits) |
+| 2 | `Z_SYNC_FLUSH` | Flush output, align to byte, emit stored empty block (00 00 FF FF) |
+| 3 | `Z_FULL_FLUSH` | Like `Z_SYNC_FLUSH` but also resets compression state for random access |
+| 4 | `Z_FINISH` | Complete the stream. Returns `Z_STREAM_END` when done |
+| 5 | `Z_BLOCK` | Stop at next block boundary |
+| 6 | `Z_TREES` | Like `Z_BLOCK` but also emit trees (for `Z_TREES` flush) |
+
+Flush priority is ordered via the `RANK()` macro:
+```c
+#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0))
+```
+
+---
+
+## The `block_state` Return Values
+
+Each strategy function returns a `block_state`:
+
+```c
+typedef enum {
+ need_more, // Need more input or more output space
+ block_done, // Block was flushed
+ finish_started, // Z_FINISH started, need more output calls
+ finish_done // Z_FINISH complete
+} block_state;
+```
+
+`deflate()` uses these to control its outer loop and determine when to
+return to the caller.
diff --git a/docs/handbook/neozip/gzip-support.md b/docs/handbook/neozip/gzip-support.md
new file mode 100644
index 0000000000..ada310fa88
--- /dev/null
+++ b/docs/handbook/neozip/gzip-support.md
@@ -0,0 +1,413 @@
+# Gzip Support
+
+## Overview
+
+Neozip provides a complete gzip file I/O layer on top of the core
+deflate/inflate engine. This layer is implemented in three files:
+
+- `gzlib.c` — Shared state management, file open/close, seeking
+- `gzread.c` — Gzip file reading (decompression)
+- `gzwrite.c` — Gzip file writing (compression)
+- `gzguts.h` — Internal structures and constants
+
+The gzip API is enabled by the `WITH_GZFILEOP` CMake option (ON by default).
+
+---
+
+## The `gz_state` Structure
+
+From `gzguts.h`:
+
+```c
+typedef struct {
+ // Identification
+ PREFIX3(stream) strm; // Inflate/deflate stream
+ int mode; // GZ_READ or GZ_WRITE
+ int fd; // File descriptor
+ char *path; // Path for error messages
+ unsigned size; // Buffer size (default GZBUFSIZE)
+
+ // Buffering
+ unsigned want; // Requested buffer size
+ unsigned char *in; // Input buffer (read mode)
+ unsigned char *out; // Output buffer
+ int direct; // 0=compressed, 1=passthrough (not gzip)
+
+ // Position tracking
+ z_off64_t start; // Start of compressed data (after header)
+ z_off64_t raw; // Raw (compressed) file position
+ z_off64_t pos; // Uncompressed data position
+ int eof; // End of input file reached
+ int past; // Read past end of input
+
+ // Error tracking
+ int err; // Error code
+ char *msg; // Error message (or NULL)
+ int how; // 0=output, 1=copy, 2=decompress
+
+ // Write mode
+ int level; // Compression level
+ int strategy; // Compression strategy
+ int reset; // true if deflateReset needed
+
+ // Seeking
+ z_off64_t skip; // Bytes to skip during next read
+
+ // Peek
+ int seek; // Seek request pending
+} gz_state;
+```
+
+### Constants
+
+```c
+#define GZBUFSIZE 131072 // Default buffer size (128 KB)
+#define GZ_READ 7247 // Sentinel for read mode
+#define GZ_WRITE 31153 // Sentinel for write mode
+#define GZ_APPEND 1 // Mode flag for append
+```
+
+The sentinel values `GZ_READ` and `GZ_WRITE` are non-obvious integers
+chosen to catch state corruption.
+
+---
+
+## File Open (`gzlib.c`)
+
+### `gzopen()` / `gzdopen()`
+
+```c
+gzFile PREFIX(gzopen)(const char *path, const char *mode);
+gzFile PREFIX(gzdopen)(int fd, const char *mode);
+gzFile PREFIX(gzopen64)(const char *path, const char *mode);
+```
+
+The mode string supports:
+- `r` — Read (decompress)
+- `w` — Write (compress)
+- `a` — Append (compress, append to existing file)
+- `0-9` — Compression level
+- `f` — `Z_FILTERED` strategy
+- `h` — `Z_HUFFMAN_ONLY` strategy
+- `R` — `Z_RLE` strategy
+- `F` — `Z_FIXED` strategy
+- `T` — Direct/transparent (no compression)
+
+### `gz_state_init()`
+
+```c
+static void gz_state_init(gz_state *state) {
+ state->size = 0;
+ state->want = GZBUFSIZE;
+ state->in = NULL;
+ state->out = NULL;
+ state->direct = 0;
+ state->err = Z_OK;
+ state->pos = 0;
+ state->strm.avail_in = 0;
+}
+```
+
+### `gz_buffer_alloc()`
+
+Allocates I/O buffers:
+
+```c
+static int gz_buffer_alloc(gz_state *state) {
+ unsigned size = state->want;
+
+ if (state->mode == GZ_READ) {
+ // Read: input buffer = size, output buffer = size * 2
+ state->in = malloc(size);
+ state->out = malloc(size << 1);
+ state->size = size;
+ } else {
+ // Write: output buffer = size
+ state->in = NULL;
+ state->out = malloc(size);
+ state->size = size;
+ }
+ return 0;
+}
+```
+
+In read mode, the output buffer is doubled to handle cases where
+decompression expands data significantly within a single call.
+
+---
+
+## Reading (`gzread.c`)
+
+### Read Pipeline
+
+```
+gz_read() → gz_fetch() → gz_decomp() → inflate()
+ ↘ gz_look() (header detection)
+```
+
+### `gz_look()` — Header Detection
+
+Determines if the file is gzip-compressed or raw:
+
+```c
+static int gz_look(gz_state *state) {
+ // Read enough to check for gzip magic number
+ if (state->strm.avail_in < 2) {
+ // Read from file
+ int got = read(state->fd, state->in, state->size);
+ state->strm.avail_in = got;
+ state->strm.next_in = state->in;
+ }
+
+ // Check for gzip magic (1f 8b)
+ if (state->strm.avail_in >= 2 &&
+ state->in[0] == 0x1f && state->in[1] == 0x8b) {
+ // Initialize inflate for gzip
+ inflateInit2(&state->strm, 15 + 16); // windowBits + 16 = gzip
+ state->how = 2; // Decompress mode
+ } else {
+ // Not gzip — pass through directly
+ state->direct = 1;
+ state->how = 1; // Copy mode
+ }
+}
+```
+
+### `gz_decomp()` — Decompression
+
+```c
+static int gz_decomp(gz_state *state) {
+ int ret;
+ unsigned had = state->strm.avail_out;
+
+ // Call inflate
+ ret = PREFIX(inflate)(&state->strm, Z_NO_FLUSH);
+ state->pos += had - state->strm.avail_out;
+
+ if (ret == Z_STREAM_END) {
+ // End of gzip member — may be concatenated gzip
+ inflateReset(&state->strm);
+ state->how = 0; // Need to look for next member
+ }
+ return 0;
+}
+```
+
+### `gz_fetch()` — Fetch More Data
+
+```c
+static int gz_fetch(gz_state *state) {
+ do {
+ switch (state->how) {
+ case 0: // Look for gzip header
+ if (gz_look(state) == -1) return -1;
+ if (state->how == 0) return 0; // EOF
+ break;
+ case 1: // Copy raw data
+ if (gz_load(state, state->out, state->size << 1, &got) == -1)
+ return -1;
+ state->pos += got;
+ break;
+ case 2: // Decompress
+ if (state->strm.avail_in == 0) {
+ // Refill input buffer
+ int got = read(state->fd, state->in, state->size);
+ state->strm.avail_in = got;
+ state->strm.next_in = state->in;
+ }
+ if (gz_decomp(state) == -1) return -1;
+ break;
+ }
+ } while (state->strm.avail_out && !state->eof);
+ return 0;
+}
+```
+
+### Public Read API
+
+```c
+int PREFIX(gzread)(gzFile file, void *buf, unsigned len);
+int PREFIX(gzgetc)(gzFile file); // Read single character
+char *PREFIX(gzgets)(gzFile file, char *buf, int len); // Read line
+z_off_t PREFIX(gzungetc)(int c, gzFile file); // Push back character
+int PREFIX(gzdirect)(gzFile file); // Check if raw
+```
+
+---
+
+## Writing (`gzwrite.c`)
+
+### Write Pipeline
+
+```
+gz_write() → gz_comp() → deflate()
+```
+
+### `gz_write_init()` — Lazy Initialisation
+
+```c
+static int gz_write_init(gz_state *state) {
+ // Allocate output buffer
+ gz_buffer_alloc(state);
+
+ // Initialize deflate
+ state->strm.next_out = state->out;
+ state->strm.avail_out = state->size;
+
+ int ret = PREFIX(deflateInit2)(&state->strm,
+ state->level, Z_DEFLATED,
+ 15 + 16, // windowBits + 16 = gzip wrapping
+ DEF_MEM_LEVEL, state->strategy);
+
+ return ret == Z_OK ? 0 : -1;
+}
+```
+
+### `gz_comp()` — Compress Buffered Data
+
+```c
+static int gz_comp(gz_state *state, int flush) {
+ int ret;
+ unsigned have;
+
+ // Deflate until done
+ do {
+ if (state->strm.avail_out == 0) {
+ // Flush output buffer to file
+ have = state->size;
+ if (write(state->fd, state->out, have) != have) {
+ state->err = Z_ERRNO;
+ return -1;
+ }
+ state->strm.next_out = state->out;
+ state->strm.avail_out = state->size;
+ }
+ ret = PREFIX(deflate)(&state->strm, flush);
+ } while (ret == Z_OK && state->strm.avail_out == 0);
+
+ if (flush == Z_FINISH && ret == Z_STREAM_END) {
+ // Write final output
+ have = state->size - state->strm.avail_out;
+ if (have && write(state->fd, state->out, have) != have) {
+ state->err = Z_ERRNO;
+ return -1;
+ }
+ }
+ return 0;
+}
+```
+
+### Public Write API
+
+```c
+int PREFIX(gzwrite)(gzFile file, const void *buf, unsigned len);
+int PREFIX(gzputc)(gzFile file, int c);
+int PREFIX(gzputs)(gzFile file, const char *s);
+int PREFIX(gzprintf)(gzFile file, const char *format, ...);
+int PREFIX(gzflush)(gzFile file, int flush);
+int PREFIX(gzsetparams)(gzFile file, int level, int strategy);
+```
+
+---
+
+## Seeking and Position
+
+```c
+z_off64_t PREFIX(gzseek64)(gzFile file, z_off64_t offset, int whence);
+z_off64_t PREFIX(gztell64)(gzFile file);
+z_off64_t PREFIX(gzoffset64)(gzFile file);
+int PREFIX(gzrewind)(gzFile file);
+int PREFIX(gzeof)(gzFile file);
+```
+
+### Forward Seeking
+
+For read mode, seeking forward decompresses and discards data:
+
+```c
+// In gzseek: forward seek in read mode
+state->skip = offset; // Will be consumed during next gz_fetch
+```
+
+### Backward Seeking
+
+Backward seeking requires a full rewind and re-decompression:
+
+```c
+// Must reset and decompress from the beginning
+gzrewind(file);
+state->skip = offset;
+```
+
+---
+
+## Gzip Format
+
+A gzip file (RFC 1952) consists of:
+
+```
+┌──────────────────────────────────┐
+│ Header (10+ bytes) │
+│ 1F 8B — magic number │
+│ 08 — compression method │
+│ FLG — flags │
+│ MTIME — modification time │
+│ XFL — extra flags │
+│ OS — operating system │
+│ [EXTRA] [NAME] [COMMENT] [HCRC]│
+├──────────────────────────────────┤
+│ Compressed data (deflate) │
+├──────────────────────────────────┤
+│ Trailer (8 bytes) │
+│ CRC32 — CRC of original data │
+│ ISIZE — size of original data │
+└──────────────────────────────────┘
+```
+
+FLG bits:
+- `FTEXT` (0x01) — Text mode hint
+- `FHCRC` (0x02) — Header CRC present
+- `FEXTRA` (0x04) — Extra field present
+- `FNAME` (0x08) — Original filename present
+- `FCOMMENT` (0x10) — Comment present
+
+### Concatenated Gzip
+
+Multiple gzip members can be concatenated. `gzread()` transparently
+decompresses all members in sequence, resetting the inflate state at
+each `Z_STREAM_END` boundary.
+
+---
+
+## Error Handling
+
+```c
+int PREFIX(gzerror)(gzFile file, int *errnum); // Get error message
+void PREFIX(gzclearerr)(gzFile file); // Clear error state
+```
+
+The `gz_state.err` field tracks errors:
+- `Z_OK` — No error
+- `Z_ERRNO` — System I/O error (check `errno`)
+- `Z_STREAM_ERROR` — Invalid state
+- `Z_DATA_ERROR` — Corrupted gzip data
+- `Z_MEM_ERROR` — Memory allocation failure
+- `Z_BUF_ERROR` — Insufficient buffer space
+
+---
+
+## Close
+
+```c
+int PREFIX(gzclose)(gzFile file);
+int PREFIX(gzclose_r)(gzFile file); // Close read-mode file
+int PREFIX(gzclose_w)(gzFile file); // Close write-mode file
+```
+
+`gzclose_w()` flushes pending output with `Z_FINISH`, writes the
+remaining compressed data, then calls `deflateEnd()`.
+
+`gzclose_r()` calls `inflateEnd()` and frees buffers.
+
+Both close the file descriptor (unless opened via `gzdopen()` with
+the `F` flag to leave the fd open).
diff --git a/docs/handbook/neozip/hardware-acceleration.md b/docs/handbook/neozip/hardware-acceleration.md
new file mode 100644
index 0000000000..b087e5a2ab
--- /dev/null
+++ b/docs/handbook/neozip/hardware-acceleration.md
@@ -0,0 +1,447 @@
+# Hardware Acceleration
+
+## Overview
+
+Neozip dispatches compression and decompression operations to the best
+available hardware-accelerated implementation at runtime. This is achieved
+through a function table (`functable`), CPU feature detection, and
+architecture-specific source files compiled with appropriate SIMD flags.
+
+---
+
+## CPU Feature Detection
+
+### `cpu_features.c`
+
+The entry point for feature detection:
+
+```c
+void Z_INTERNAL cpu_check_features(struct cpu_features *features) {
+ // Zero out features
+ memset(features, 0, sizeof(*features));
+
+#if defined(X86_FEATURES)
+ x86_check_features(features);
+#elif defined(ARM_FEATURES)
+ arm_check_features(features);
+#elif defined(POWER_FEATURES)
+ power_check_features(features);
+#elif defined(S390_FEATURES)
+ s390_check_features(features);
+#elif defined(RISCV_FEATURES)
+ riscv_check_features(features);
+#elif defined(LOONGARCH_FEATURES)
+ loongarch_check_features(features);
+#endif
+}
+```
+
+### CPU Feature Structures
+
+```c
+struct cpu_features {
+ union {
+#if defined(X86_FEATURES)
+ struct x86_cpu_features x86;
+#elif defined(ARM_FEATURES)
+ struct arm_cpu_features arm;
+#elif defined(POWER_FEATURES)
+ struct power_cpu_features power;
+#elif defined(S390_FEATURES)
+ struct s390_cpu_features s390;
+#elif defined(RISCV_FEATURES)
+ struct riscv_cpu_features riscv;
+#elif defined(LOONGARCH_FEATURES)
+ struct loongarch_cpu_features loongarch;
+#endif
+ };
+};
+```
+
+Each architecture defines its own feature structure:
+
+**x86** (`x86_features.h`):
+```c
+struct x86_cpu_features {
+ int has_avx2;
+ int has_avx512f;
+ int has_avx512dq;
+ int has_avx512bw;
+ int has_avx512vl;
+ int has_avx512_common; // All of f+dq+bw+vl
+ int has_avx512vnni;
+ int has_sse2;
+ int has_ssse3;
+ int has_sse41;
+ int has_sse42;
+ int has_pclmulqdq;
+ int has_vpclmulqdq;
+ int has_os_save_ymm;
+ int has_os_save_zmm;
+};
+```
+
+**ARM** (`arm_features.h`):
+```c
+struct arm_cpu_features {
+ int has_simd; // ARMv6 SIMD
+ int has_neon; // ARMv7+ NEON / AArch64 ASIMD
+ int has_crc32; // CRC32 instructions
+ int has_pmull; // PMULL (polynomial multiply long)
+ int has_eor3; // SHA3 EOR3 instruction
+ int has_fast_pmull; // High-perf PMULL available
+};
+```
+
+---
+
+## x86 Feature Detection
+
+`x86_check_features()` in `arch/x86/x86_features.c` uses CPUID:
+
+```c
+void Z_INTERNAL x86_check_features(struct cpu_features *features) {
+ unsigned eax, ebx, ecx, edx;
+
+ // CPUID leaf 1
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ features->x86.has_sse2 = !!(edx & (1 << 26));
+ features->x86.has_ssse3 = !!(ecx & (1 << 9));
+ features->x86.has_sse41 = !!(ecx & (1 << 19));
+ features->x86.has_sse42 = !!(ecx & (1 << 20));
+ features->x86.has_pclmulqdq = !!(ecx & (1 << 1));
+
+ // Check XSAVE support for YMM
+ if (ecx & (1 << 27)) { // OSXSAVE
+ uint64_t xcr0 = xgetbv(0);
+ features->x86.has_os_save_ymm = (xcr0 & 0x06) == 0x06;
+ features->x86.has_os_save_zmm = (xcr0 & 0xe6) == 0xe6;
+ }
+
+ // CPUID leaf 7
+ cpuidp(7, 0, &eax, &ebx, &ecx, &edx);
+ if (features->x86.has_os_save_ymm) {
+ features->x86.has_avx2 = !!(ebx & (1 << 5));
+ }
+ if (features->x86.has_os_save_zmm) {
+ features->x86.has_avx512f = !!(ebx & (1 << 16));
+ features->x86.has_avx512dq = !!(ebx & (1 << 17));
+ features->x86.has_avx512bw = !!(ebx & (1 << 30));
+ features->x86.has_avx512vl = !!(ebx & (1 << 31));
+ features->x86.has_vpclmulqdq = !!(ecx & (1 << 10));
+ features->x86.has_avx512vnni = !!(ecx & (1 << 11));
+ }
+ features->x86.has_avx512_common =
+ features->x86.has_avx512f && features->x86.has_avx512dq &&
+ features->x86.has_avx512bw && features->x86.has_avx512vl;
+}
+```
+
+### OS Support Verification
+
+YMM (256-bit) and ZMM (512-bit) registers require OS support to save/restore
+context during context switches. `xgetbv(0)` reads the XCR0 register:
+
+- Bits 1+2 set → YMM state is saved (required for AVX2)
+- Bits 1+2+5+6+7 set → ZMM state is saved (required for AVX-512)
+
+Without OS support, using AVX2/AVX-512 instructions will fault.
+
+---
+
+## ARM Feature Detection
+
+ARM detection in `arch/arm/arm_features.c` uses platform-specific methods:
+
+**Linux**: Reads `/proc/cpuinfo` or uses `getauxval(AT_HWCAP)`:
+```c
+features->arm.has_neon = !!(hwcap & HWCAP_NEON); // AArch32
+features->arm.has_neon = !!(hwcap & HWCAP_ASIMD); // AArch64
+features->arm.has_crc32 = !!(hwcap & HWCAP_CRC32);
+features->arm.has_pmull = !!(hwcap & HWCAP_PMULL);
+```
+
+**macOS**: Uses `sysctlbyname()`:
+```c
+features->arm.has_neon = 1; // Always available on Apple Silicon
+features->arm.has_crc32 = has_feature("hw.optional.armv8_crc32");
+features->arm.has_pmull = has_feature("hw.optional.arm.FEAT_PMULL");
+```
+
+---
+
+## The Function Table
+
+### `functable_s` Structure
+
+```c
+struct functable_s {
+ adler32_func adler32;
+ adler32_copy_func adler32_copy;
+ compare256_func compare256;
+ crc32_func crc32;
+ crc32_copy_func crc32_copy;
+ inflate_fast_func inflate_fast;
+ longest_match_func longest_match;
+ longest_match_slow_func longest_match_slow;
+ slide_hash_func slide_hash;
+ chunksize_func chunksize;
+ chunkmemset_safe_func chunkmemset_safe;
+};
+```
+
+### Dispatch Cascade
+
+`functable.c` initialises the function table using a cascade:
+
+```c
+static void init_functable(void) {
+ struct cpu_features cf;
+ cpu_check_features(&cf);
+
+ // Start with generic C implementations
+ functable.adler32 = adler32_c;
+ functable.crc32 = crc32_braid;
+ functable.compare256 = compare256_c;
+ functable.longest_match = longest_match_c;
+ functable.slide_hash = slide_hash_c;
+ functable.inflate_fast = inflate_fast_c;
+ functable.chunksize = chunksize_c;
+ functable.chunkmemset_safe = chunkmemset_safe_c;
+
+#ifdef X86_SSE2
+ if (cf.x86.has_sse2) {
+ functable.chunksize = chunksize_sse2;
+ functable.chunkmemset_safe = chunkmemset_safe_sse2;
+ functable.compare256 = compare256_sse2;
+ functable.inflate_fast = inflate_fast_sse2;
+ functable.longest_match = longest_match_sse2;
+ functable.slide_hash = slide_hash_sse2;
+ }
+#endif
+#ifdef X86_SSSE3
+ if (cf.x86.has_ssse3) {
+ functable.adler32 = adler32_ssse3;
+ }
+#endif
+#ifdef X86_SSE42
+ if (cf.x86.has_sse42) {
+ functable.adler32 = adler32_sse42;
+ functable.compare256 = compare256_sse42;
+ functable.longest_match = longest_match_sse42;
+ }
+#endif
+#ifdef X86_PCLMULQDQ
+ if (cf.x86.has_pclmulqdq) {
+ functable.crc32 = crc32_pclmulqdq;
+ }
+#endif
+#ifdef X86_AVX2
+ if (cf.x86.has_avx2) {
+ functable.adler32 = adler32_avx2;
+ functable.chunksize = chunksize_avx2;
+ functable.chunkmemset_safe = chunkmemset_safe_avx2;
+ functable.compare256 = compare256_avx2;
+ functable.inflate_fast = inflate_fast_avx2;
+ functable.longest_match = longest_match_avx2;
+ functable.slide_hash = slide_hash_avx2;
+ }
+#endif
+#ifdef X86_AVX512
+ if (cf.x86.has_avx512_common) {
+ functable.adler32 = adler32_avx512;
+ functable.slide_hash = slide_hash_avx512;
+ }
+#endif
+#ifdef X86_AVX512VNNI
+ if (cf.x86.has_avx512vnni) {
+ functable.adler32 = adler32_avx512_vnni;
+ }
+#endif
+#ifdef X86_VPCLMULQDQ
+ if (cf.x86.has_vpclmulqdq && cf.x86.has_avx512_common) {
+ functable.crc32 = crc32_vpclmulqdq;
+ }
+#endif
+
+ // ARM cascade
+#ifdef ARM_NEON
+ if (cf.arm.has_neon) {
+ functable.adler32 = adler32_neon;
+ functable.chunksize = chunksize_neon;
+ functable.chunkmemset_safe = chunkmemset_safe_neon;
+ functable.compare256 = compare256_neon;
+ functable.slide_hash = slide_hash_neon;
+ functable.inflate_fast = inflate_fast_neon;
+ functable.longest_match = longest_match_neon;
+ }
+#endif
+#ifdef ARM_ACLE_CRC_HASH
+ if (cf.arm.has_crc32) {
+ functable.crc32 = crc32_acle;
+ }
+#endif
+
+ // Store with release semantics for thread safety
+ atomic_store_explicit(&functable_init_done, 1, memory_order_release);
+}
+```
+
+Later features override earlier ones, so the best available implementation
+wins.
+
+### Thread-Safe Initialisation
+
+The function table uses atomic operations for thread safety:
+
+```c
+static atomic_int functable_init_done = 0;
+static struct functable_s functable;
+
+#define FUNCTABLE_CALL(name) \
+ do { \
+ if (!atomic_load_explicit(&functable_init_done, memory_order_acquire)) \
+ init_functable(); \
+ } while (0); \
+ functable.name
+```
+
+The first call triggers initialisation; subsequent calls skip it via the
+atomic flag.
+
+---
+
+## Accelerated Operations
+
+### 1. Adler-32 Checksum
+
+**Scalar**: `adler32_c()` — byte-by-byte with NMAX blocking
+**SIMD**: Uses horizontal sum and dot product — SSE4.1/SSSE3/AVX2/AVX-512/VNNI/NEON/VMX/Power8/RVV/LASX
+
+### 2. CRC-32 Checksum
+
+**Scalar**: `crc32_braid()` — braided 5-word parallel CRC
+**SIMD**: Carry-less multiplication (CLMUL) for fast polynomial arithmetic — PCLMULQDQ/VPCLMULQDQ/PMULL/Power8/Zbc
+
+### 3. String Matching (`compare256`)
+
+Compares up to 256 bytes to find the longest match:
+
+**Scalar**: `compare256_c()` — byte-by-byte comparison
+**SIMD**: Loads 16/32/64 bytes at a time, uses `_mm_cmpeq_epi8` + `_mm_movemask_epi8` (SSE2) or equivalent to find the first mismatch
+
+### 4. Longest Match
+
+Wraps `compare256` with hash chain walking:
+
+```c
+longest_match_func longest_match;
+longest_match_slow_func longest_match_slow;
+```
+
+The `_slow` variant also inserts intermediate hash entries for level ≥ 9.
+
+### 5. Slide Hash
+
+Slides the hash table down by one window's worth:
+
+**Scalar**: `slide_hash_c()` — loop over HASH_SIZE + w_size entries
+**SIMD**: Processes 8/16/32 entries at a time using saturating subtract
+
+```c
+// SSE2 example pattern:
+__m128i vw = _mm_set1_epi16((uint16_t)s->w_size);
+for (...) {
+ __m128i v = _mm_loadu_si128(p);
+ v = _mm_subs_epu16(v, vw); // Saturating subtract
+ _mm_storeu_si128(p, v);
+}
+```
+
+### 6. Chunk Memory Set (`chunkmemset_safe`)
+
+Fast memset/memcpy for inflate back-reference copying:
+
+**Scalar**: `chunkmemset_safe_c()` — handles overlap via small loops
+**SIMD**: Replicates the pattern into vector registers, handles even
+overlapping copies via broadcast
+
+### 7. Inflate Fast
+
+The hot inner loop of the inflate engine:
+
+**Scalar**: `inflate_fast_c()` — standard decode loop
+**SIMD**: Uses wider copy operations from chunkmemset for the back-reference
+copy step
+
+---
+
+## Compile-Time vs Runtime Detection
+
+### Runtime Detection (Default)
+
+Enabled by `WITH_RUNTIME_CPU_DETECTION=ON` (default):
+- All SIMD variants are compiled as separate translation units
+- `functable.c` selects the best at runtime
+- Binary runs on any CPU of the target architecture
+
+### Native Compilation
+
+Enabled by `WITH_NATIVE_INSTRUCTIONS=ON`:
+- Compiles with `-march=native` (or equivalent)
+- The compiler uses host CPU features directly
+- Slightly faster: no function pointer indirection
+- Binary only runs on the build machine's CPU (or compatible)
+
+### Disabling Runtime Detection
+
+`DISABLE_RUNTIME_CPU_DETECTION` can be defined to skip runtime checks
+and use only the generic C implementations, useful for constrained
+environments.
+
+---
+
+## Adding a New Architecture
+
+To add SIMD support for a new architecture:
+
+1. **Create `arch/<arch>/` directory** with feature detection and implementations
+2. **Define a feature structure** in `<arch>_features.h`
+3. **Implement `<arch>_check_features()`** using platform-specific detection
+4. **Implement accelerated functions** matching the `functable_s` signatures
+5. **Add dispatch entries** in `functable.c` guarded by feature flags
+6. **Add CMake detection** in `CMakeLists.txt`:
+ ```cmake
+ check_<arch>_intrinsics()
+ if(WITH_<ARCH>_<FEATURE>)
+ add_compile_options(-m<flag>)
+ list(APPEND ZLIB_ARCH_SRCS arch/<arch>/...)
+ add_definitions(-D<ARCH>_<FEATURE>)
+ endif()
+ ```
+
+---
+
+## Supported Architecture Matrix
+
+| Architecture | adler32 | crc32 | compare256 | longest_match | slide_hash | inflate_fast | chunkmemset |
+|---|---|---|---|---|---|---|---|
+| x86 SSE2 | – | – | ✓ | ✓ | ✓ | ✓ | ✓ |
+| x86 SSSE3 | ✓ | – | – | – | – | – | – |
+| x86 SSE4.1 | ✓ | – | – | – | – | – | – |
+| x86 SSE4.2 | ✓ | – | ✓ | ✓ | – | – | – |
+| x86 PCLMULQDQ | – | ✓ | – | – | – | – | – |
+| x86 AVX2 | ✓ | – | ✓ | ✓ | ✓ | ✓ | ✓ |
+| x86 AVX-512 | ✓ | – | – | – | ✓ | – | – |
+| x86 AVX-512+VNNI | ✓ | – | – | – | – | – | – |
+| x86 VPCLMULQDQ | – | ✓ | – | – | – | – | – |
+| ARM NEON | ✓ | – | ✓ | ✓ | ✓ | ✓ | ✓ |
+| ARM CRC32 | – | ✓ | – | – | – | – | – |
+| ARM PMULL | – | ✓ | – | – | – | – | – |
+| Power VMX | ✓ | – | – | – | ✓ | – | – |
+| Power8 | ✓ | ✓ | – | – | – | – | – |
+| Power9 | – | – | ✓ | ✓ | – | – | – |
+| RISC-V RVV | ✓ | – | ✓ | ✓ | – | – | – |
+| s390 CRC | – | ✓ | – | – | – | – | – |
+| LoongArch LSX | ✓ | – | – | – | ✓ | – | ✓ |
+| LoongArch LASX | ✓ | – | – | – | – | – | – |
diff --git a/docs/handbook/neozip/huffman-coding.md b/docs/handbook/neozip/huffman-coding.md
new file mode 100644
index 0000000000..51998941d5
--- /dev/null
+++ b/docs/handbook/neozip/huffman-coding.md
@@ -0,0 +1,643 @@
+# Huffman Coding
+
+## Overview
+
+Huffman coding is the heart of DEFLATE compression. It assigns
+variable-length bit codes to symbols: shorter codes for frequent symbols,
+longer codes for rare ones. Neozip implements full Huffman tree construction,
+code generation, and bitstream emission in `trees.c` and `trees_emit.h`.
+
+---
+
+## Data Structures
+
+### `ct_data` — Code/Tree Node
+
+```c
+typedef union ct_data_s {
+ struct {
+ uint16_t freq; // Frequency count (during tree building)
+ uint16_t code; // Bit string (after tree building)
+ };
+ struct {
+ uint16_t dad; // Father node in Huffman tree
+ uint16_t len; // Bit length of the code
+ };
+} ct_data;
+```
+
+The union reuses the same 4 bytes: during tree construction, `freq` and
+`dad` are used; after code generation, `code` and `len` replace them.
+
+### `tree_desc` — Tree Descriptor
+
+```c
+typedef struct tree_desc_s {
+ ct_data *dyn_tree; // The dynamic tree being built
+ int max_code; // Largest code with non-zero frequency
+ const static_tree_desc *stat_desc; // Corresponding static tree description
+} tree_desc;
+```
+
+Each deflate state maintains three tree descriptors:
+```c
+struct ALIGNED_(64) internal_state {
+ tree_desc l_desc; // Literal/length tree descriptor
+ tree_desc d_desc; // Distance tree descriptor
+ tree_desc bl_desc; // Bit-length tree descriptor (for encoding the dynamic trees)
+ // ...
+ ct_data dyn_ltree[HEAP_SIZE]; // Literal/length tree (2*L_CODES+1 = 573)
+ ct_data dyn_dtree[2*D_CODES+1]; // Distance tree (2*30+1 = 61)
+ ct_data bl_tree[2*BL_CODES+1]; // Bit-length tree (2*19+1 = 39)
+};
+```
+
+### `static_tree_desc` — Static Tree Description
+
+```c
+struct static_tree_desc_s {
+ const ct_data *static_tree; // Static tree (NULL for bit lengths)
+ const int *extra_bits; // Extra bits for each code (or NULL)
+ int extra_base; // First code with extra bits
+ int elems; // Maximum number of elements in tree
+ unsigned int max_length; // Maximum code bit length
+};
+```
+
+Three static descriptors exist:
+```c
+static const static_tree_desc static_l_desc = {
+ static_ltree, extra_lbits, LITERALS + 1, L_CODES, MAX_BITS
+};
+static const static_tree_desc static_d_desc = {
+ static_dtree, extra_dbits, 0, D_CODES, MAX_BITS
+};
+static const static_tree_desc static_bl_desc = {
+ NULL, extra_blbits, 0, BL_CODES, MAX_BL_BITS
+};
+```
+
+---
+
+## Constants
+
+```c
+#define L_CODES 286 // Number of literal/length codes (256 literals + END_BLOCK + 29 lengths)
+#define D_CODES 30 // Number of distance codes
+#define BL_CODES 19 // Number of bit-length codes
+#define HEAP_SIZE (2*L_CODES + 1) // = 573
+#define MAX_BITS 15 // Maximum Huffman code length
+#define MAX_BL_BITS 7 // Maximum bit-length code length
+#define END_BLOCK 256 // End of block symbol
+#define LITERALS 256 // Number of literal bytes (0..255)
+#define LENGTH_CODES 29 // Number of length codes (not counting END_BLOCK)
+```
+
+### Extra Bits Tables
+
+Length codes (257–285) carry 0–5 extra bits:
+```c
+static const int extra_lbits[LENGTH_CODES] = {
+ 0,0,0,0,0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3, 4,4,4,4, 5,5,5,5, 0
+};
+```
+
+Distance codes (0–29) carry 0–13 extra bits:
+```c
+static const int extra_dbits[D_CODES] = {
+ 0,0,0,0, 1,1, 2,2, 3,3, 4,4, 5,5, 6,6, 7,7, 8,8, 9,9, 10,10, 11,11, 12,12, 13,13
+};
+```
+
+Bit-length codes (used to encode dynamic tree) carry 0–7 extra bits:
+```c
+static const int extra_blbits[BL_CODES] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 2,3,7
+};
+```
+
+---
+
+## Tree Construction
+
+### `build_tree()`
+
+The main tree-building function:
+
+```c
+static void build_tree(deflate_state *s, tree_desc *desc) {
+ ct_data *tree = desc->dyn_tree;
+ const ct_data *stree = desc->stat_desc->static_tree;
+ int elems = desc->stat_desc->elems;
+ int n, m;
+ int max_code = -1;
+ int node;
+
+ // Step 1: Initialize the heap with leaf frequencies
+ s->heap_len = 0;
+ s->heap_max = HEAP_SIZE;
+
+ for (n = 0; n < elems; n++) {
+ if (tree[n].freq != 0) {
+ s->heap[++(s->heap_len)] = max_code = n;
+ s->depth[n] = 0;
+ } else {
+ tree[n].len = 0;
+ }
+ }
+
+ // Step 2: Ensure at least two codes exist
+ while (s->heap_len < 2) {
+ node = s->heap[++(s->heap_len)] =
+ (max_code < 2 ? ++max_code : 0);
+ tree[node].freq = 1;
+ s->depth[node] = 0;
+ }
+ desc->max_code = max_code;
+
+ // Step 3: Build the Huffman tree using a min-heap
+ // Nodes elems..HEAP_SIZE-1 are internal nodes
+ for (n = s->heap_len / 2; n >= 1; n--)
+ pqdownheap(s, tree, n);
+
+ node = elems;
+ do {
+ n = s->heap[1]; // Least frequent
+ pqremove(s, tree, n);
+ m = s->heap[1]; // Next least frequent
+
+ s->heap[--(s->heap_max)] = n;
+ s->heap[--(s->heap_max)] = m;
+
+ // Create internal node
+ tree[node].freq = tree[n].freq + tree[m].freq;
+ s->depth[node] = MAX(s->depth[n], s->depth[m]) + 1;
+ tree[n].dad = tree[m].dad = (uint16_t)node;
+
+ s->heap[1] = node++;
+ pqdownheap(s, tree, 1);
+ } while (s->heap_len >= 2);
+
+ s->heap[--(s->heap_max)] = s->heap[1];
+
+ // Step 4: Compute code lengths and generate codes
+ gen_bitlen(s, desc);
+ gen_codes(tree, max_code, s->bl_count);
+}
+```
+
+### `pqdownheap()` — Min-Heap Maintenance
+
+```c
+static void pqdownheap(deflate_state *s, ct_data *tree, int k) {
+ int v = s->heap[k];
+ int j = k << 1; // Left child
+
+ while (j <= s->heap_len) {
+ // Select smaller child
+ if (j < s->heap_len &&
+ SMALLER(tree, s->heap[j+1], s->heap[j], s->depth))
+ j++;
+ // If v is smaller than both children, stop
+ if (SMALLER(tree, v, s->heap[j], s->depth))
+ break;
+ s->heap[k] = s->heap[j];
+ k = j;
+ j <<= 1;
+ }
+ s->heap[k] = v;
+}
+```
+
+The `SMALLER` macro compares by frequency first, then by depth:
+```c
+#define SMALLER(tree, n, m, depth) \
+ (tree[n].freq < tree[m].freq || \
+ (tree[n].freq == tree[m].freq && depth[n] <= depth[m]))
+```
+
+### `gen_bitlen()` — Bit Length Generation
+
+Converts the tree structure into code lengths, enforcing the maximum
+code length constraint:
+
+```c
+static void gen_bitlen(deflate_state *s, tree_desc *desc) {
+ ct_data *tree = desc->dyn_tree;
+ int max_code = desc->max_code;
+ const ct_data *stree = desc->stat_desc->static_tree;
+ const int *extra = desc->stat_desc->extra_bits;
+ int base = desc->stat_desc->extra_base;
+ int max_length = desc->stat_desc->max_length;
+ int overflow = 0;
+
+ // Traverse the tree via heap and set bit lengths
+ for (int bits = 0; bits <= MAX_BITS; bits++)
+ s->bl_count[bits] = 0;
+
+ tree[s->heap[s->heap_max]].len = 0; // Root has length 0
+
+ for (int h = s->heap_max + 1; h < HEAP_SIZE; h++) {
+ int n = s->heap[h];
+ int bits = tree[tree[n].dad].len + 1;
+ if (bits > max_length) {
+ bits = max_length;
+ overflow++;
+ }
+ tree[n].len = (uint16_t)bits;
+ if (n > max_code) continue; // Not a leaf
+
+ s->bl_count[bits]++;
+ // Account for extra bits in cost calculation
+ }
+
+ if (overflow == 0) return;
+
+ // Adjust bit lengths to stay within max_length
+ // Find the deepest non-full level and redistribute
+ // ...
+}
+```
+
+### `gen_codes()` — Code Generation
+
+Converts bit lengths into canonical Huffman codes:
+
+```c
+static void gen_codes(ct_data *tree, int max_code, uint16_t *bl_count) {
+ uint16_t next_code[MAX_BITS + 1];
+ unsigned code = 0;
+
+ // Step 1: Compute the first code for each bit length
+ for (int bits = 1; bits <= MAX_BITS; bits++) {
+ code = (code + bl_count[bits - 1]) << 1;
+ next_code[bits] = (uint16_t)code;
+ }
+
+ // Step 2: Assign codes
+ for (int n = 0; n <= max_code; n++) {
+ int len = tree[n].len;
+ if (len == 0) continue;
+ tree[n].code = (uint16_t)bi_reverse(next_code[len]++, len);
+ }
+}
+```
+
+The `bi_reverse()` function reverses the bit order because DEFLATE uses
+reversed (LSB-first) Huffman codes.
+
+---
+
+## Static Huffman Trees
+
+DEFLATE defines fixed Huffman tables for BTYPE=01 blocks:
+
+**Literal/Length codes**:
+| Value | Bits | Codes |
+|---|---|---|
+| 0–143 | 8 | 00110000 – 10111111 |
+| 144–255 | 9 | 110010000 – 111111111 |
+| 256–279 | 7 | 0000000 – 0010111 |
+| 280–287 | 8 | 11000000 – 11000111 |
+
+**Distance codes**: All 30 codes use 5 bits (0–29).
+
+Static tables are precomputed:
+```c
+static const ct_data static_ltree[L_CODES + 2];
+static const ct_data static_dtree[D_CODES];
+```
+
+---
+
+## Dynamic Tree Encoding
+
+For BTYPE=10 blocks, the Huffman trees must be transmitted before the data.
+DEFLATE encodes the trees themselves using a third Huffman tree (the
+"bit-length tree").
+
+### Tree Encoding Steps
+
+1. **`scan_tree()`** — Find repeat patterns in the code length sequence:
+
+```c
+static void scan_tree(deflate_state *s, ct_data *tree, int max_code) {
+ int prevlen = -1;
+ int curlen;
+ int nextlen = tree[0].len;
+ int count = 0;
+ int max_count = 7;
+ int min_count = 4;
+
+ for (int n = 0; n <= max_code; n++) {
+ curlen = nextlen;
+ nextlen = tree[n + 1].len;
+ if (++count < max_count && curlen == nextlen) continue;
+
+ if (count < min_count) {
+ s->bl_tree[curlen].freq += count;
+ } else if (curlen != 0) {
+ if (curlen != prevlen) s->bl_tree[curlen].freq++;
+ s->bl_tree[REP_3_6].freq++; // Code 16: repeat 3-6 times
+ } else if (count <= 10) {
+ s->bl_tree[REPZ_3_10].freq++; // Code 17: repeat 0, 3-10 times
+ } else {
+ s->bl_tree[REPZ_11_138].freq++; // Code 18: repeat 0, 11-138 times
+ }
+ // Reset for next sequence
+ count = 0;
+ prevlen = curlen;
+ }
+}
+```
+
+Special codes for run-length encoding:
+```c
+#define REP_3_6 16 // Repeat previous length, 3-6 times (2 extra bits)
+#define REPZ_3_10 17 // Repeat a zero length, 3-10 times (3 extra bits)
+#define REPZ_11_138 18 // Repeat a zero length, 11-138 times (7 extra bits)
+```
+
+2. **`send_tree()`** — Emit the encoded tree to the bitstream:
+
+```c
+static void send_tree(deflate_state *s, ct_data *tree, int max_code) {
+ int prevlen = -1;
+ int curlen, nextlen = tree[0].len;
+ int count = 0;
+
+ for (int n = 0; n <= max_code; n++) {
+ curlen = nextlen;
+ nextlen = tree[n + 1].len;
+ if (++count < max_count && curlen == nextlen) continue;
+
+ if (count < min_count) {
+ do { send_code(s, curlen, s->bl_tree); } while (--count);
+ } else if (curlen != 0) {
+ if (curlen != prevlen) {
+ send_code(s, curlen, s->bl_tree);
+ count--;
+ }
+ send_code(s, REP_3_6, s->bl_tree);
+ send_bits(s, count - 3, 2); // 2 extra bits
+ } else if (count <= 10) {
+ send_code(s, REPZ_3_10, s->bl_tree);
+ send_bits(s, count - 3, 3); // 3 extra bits
+ } else {
+ send_code(s, REPZ_11_138, s->bl_tree);
+ send_bits(s, count - 11, 7); // 7 extra bits
+ }
+ count = 0;
+ prevlen = curlen;
+ }
+}
+```
+
+3. **Bit length code order** — The 19 bit-length codes are transmitted in
+ a permuted order to minimize trailing zeros:
+
+```c
+static const uint8_t bl_order[BL_CODES] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+};
+```
+
+---
+
+## Bit Output Engine
+
+### `send_bits()` — 64-bit Bit Buffer
+
+From `trees_emit.h`:
+
+```c
+static inline void send_bits(deflate_state *s, int value, unsigned length) {
+ s->bi_buf |= ((uint64_t)value << s->bi_valid);
+ s->bi_valid += length;
+
+ if (s->bi_valid >= BIT_BUF_SIZE) { // BIT_BUF_SIZE = 64
+ put_uint64(s, s->bi_buf); // Flush 8 bytes to pending buffer
+ s->bi_valid -= BIT_BUF_SIZE;
+ s->bi_buf = (uint64_t)value >> (length - s->bi_valid);
+ }
+}
+```
+
+The 64-bit `bi_buf` accumulates bits and flushes complete 8-byte words to
+the pending output buffer. This is more efficient than the traditional
+16-bit buffer used in original zlib.
+
+### `send_code()` — Emit a Huffman Code
+
+```c
+#define send_code(s, c, tree) send_bits(s, tree[c].code, tree[c].len)
+```
+
+### `bi_windup()` — Byte-Align the Output
+
+```c
+static inline void bi_windup(deflate_state *s) {
+ if (s->bi_valid > 56) {
+ put_uint64(s, s->bi_buf);
+ } else {
+ // Flush remaining bytes
+ while (s->bi_valid >= 8) {
+ put_byte(s, s->bi_buf & 0xff);
+ s->bi_buf >>= 8;
+ s->bi_valid -= 8;
+ }
+ }
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+}
+```
+
+---
+
+## Block Emission
+
+### `compress_block()`
+
+Emits all symbols in the current block using Huffman codes:
+
+```c
+static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data *dtree) {
+ uint32_t sym_buf_index = 0;
+ unsigned dist, lc, code;
+
+ if (s->sym_next != 0) {
+ do {
+ // Read (distance, length/literal) from symbol buffer
+ dist = s->sym_buf[sym_buf_index++];
+ dist |= (uint32_t)s->sym_buf[sym_buf_index++] << 8;
+ lc = s->sym_buf[sym_buf_index++];
+
+ if (dist == 0) {
+ // Literal byte
+ send_code(s, lc, ltree);
+ } else {
+ // Length/distance pair
+ code = zng_length_code[lc];
+ send_code(s, code + LITERALS + 1, ltree); // Length code
+ int extra = extra_lbits[code];
+ if (extra) send_bits(s, lc - base_length[code], extra);
+
+ dist--;
+ code = d_code(dist);
+ send_code(s, code, dtree); // Distance code
+ extra = extra_dbits[code];
+ if (extra) send_bits(s, dist - base_dist[code], extra);
+ }
+ } while (sym_buf_index < s->sym_next);
+ }
+
+ send_code(s, END_BLOCK, ltree); // Emit end-of-block
+}
+```
+
+### Combined Base+Extra Tables
+
+From `trees_emit.h`, base values and extra bits are combined into tables
+for faster lookup:
+
+```c
+struct lut_pair {
+ uint16_t base;
+ uint8_t extra;
+};
+
+// Length base values and extra bits (indexed by length code)
+static const struct lut_pair base_length_lut[LENGTH_CODES] = {
+ {0,0}, {1,0}, {2,0}, {3,0}, {4,0}, {5,0}, {6,0}, {7,0},
+ {8,1}, {10,1}, {12,1}, {14,1},
+ {16,2}, {20,2}, {24,2}, {28,2},
+ {32,3}, {40,3}, {48,3}, {56,3},
+ {64,4}, {80,4}, {96,4}, {112,4},
+ {128,5}, {160,5}, {192,5}, {224,5}, {0,0}
+};
+
+// Distance base values and extra bits (indexed by distance code)
+static const struct lut_pair base_dist_lut[D_CODES] = { ... };
+```
+
+---
+
+## Block Type Selection
+
+### `zng_tr_flush_block()`
+
+Decides the block type (stored, static, or dynamic) and emits the block:
+
+```c
+void Z_INTERNAL zng_tr_flush_block(deflate_state *s, char *buf,
+ uint32_t stored_len, int last) {
+ uint32_t opt_lenb, static_lenb;
+ int max_blindex = 0;
+
+ if (s->level > 0) {
+ // Build dynamic trees
+ build_tree(s, &(s->l_desc));
+ build_tree(s, &(s->d_desc));
+
+ // Determine number of bit-length codes to send
+ max_blindex = build_bl_tree(s);
+
+ // Compute block sizes
+ opt_lenb = (s->opt_len + 3 + 7) >> 3; // Dynamic block size
+ static_lenb = (s->static_len + 3 + 7) >> 3; // Static block size
+ } else {
+ opt_lenb = static_lenb = stored_len + 5;
+ }
+
+ if (stored_len + 4 <= opt_lenb && buf != NULL) {
+ // Stored block is smallest
+ zng_tr_stored_block(s, buf, stored_len, last);
+ } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
+ // Static Huffman block
+ send_bits(s, (STATIC_TREES << 1) + last, 3);
+ compress_block(s, static_ltree, static_dtree);
+ } else {
+ // Dynamic Huffman block
+ send_bits(s, (DYN_TREES << 1) + last, 3);
+ send_all_trees(s, s->l_desc.max_code + 1,
+ s->d_desc.max_code + 1,
+ max_blindex + 1);
+ compress_block(s, s->dyn_ltree, s->dyn_dtree);
+ }
+
+ init_block(s); // Reset for next block
+
+ if (last) bi_windup(s); // Byte-align the final block
+}
+```
+
+Block type constants:
+```c
+#define STORED_BLOCK 0
+#define STATIC_TREES 1
+#define DYN_TREES 2
+```
+
+### `init_block()` — Reset Tree State
+
+```c
+void Z_INTERNAL init_block(deflate_state *s) {
+ // Reset literal/length frequencies
+ for (int n = 0; n < L_CODES; n++)
+ s->dyn_ltree[n].freq = 0;
+ // Reset distance frequencies
+ for (int n = 0; n < D_CODES; n++)
+ s->dyn_dtree[n].freq = 0;
+ // Reset bit-length frequencies
+ for (int n = 0; n < BL_CODES; n++)
+ s->bl_tree[n].freq = 0;
+
+ s->dyn_ltree[END_BLOCK].freq = 1;
+ s->opt_len = s->static_len = 0;
+ s->sym_next = s->matches = 0;
+}
+```
+
+---
+
+## Symbol Buffer
+
+During compression, literals and length/distance pairs are recorded in the
+symbol buffer before Huffman encoding:
+
+```c
+// In deflate_state:
+unsigned char *sym_buf; // Buffer for literals and matches
+uint32_t sym_next; // Next free position
+uint32_t sym_end; // Size of sym_buf (lit_bufsize * 3)
+```
+
+Each symbol occupies 3 bytes:
+- **Literal**: `{ 0x00, 0x00, byte_value }`
+- **Match**: `{ dist_low, dist_high, length - STD_MIN_MATCH }`
+
+```c
+// Record a literal
+static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) {
+ s->sym_buf[s->sym_next++] = 0;
+ s->sym_buf[s->sym_next++] = 0;
+ s->sym_buf[s->sym_next++] = c;
+ s->dyn_ltree[c].freq++;
+ return (s->sym_next == s->sym_end);
+}
+
+// Record a match
+static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist,
+ uint32_t len) {
+ s->sym_buf[s->sym_next++] = (uint8_t)(dist);
+ s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8);
+ s->sym_buf[s->sym_next++] = (uint8_t)len;
+ s->matches++;
+ dist--;
+ s->dyn_ltree[zng_length_code[len] + LITERALS + 1].freq++;
+ s->dyn_dtree[d_code(dist)].freq++;
+ return (s->sym_next == s->sym_end);
+}
+```
+
+When the buffer is full (`sym_next == sym_end`), the block is flushed.
diff --git a/docs/handbook/neozip/inflate-engine.md b/docs/handbook/neozip/inflate-engine.md
new file mode 100644
index 0000000000..6467cff487
--- /dev/null
+++ b/docs/handbook/neozip/inflate-engine.md
@@ -0,0 +1,665 @@
+# Inflate Engine
+
+## Overview
+
+The inflate engine decompresses DEFLATE-format data (RFC 1951) wrapped in
+either a zlib container (RFC 1950), a gzip container (RFC 1952), or with no
+wrapper (raw deflate). It is implemented as a state machine in `inflate.c`,
+with a hot inner loop in `inffast_tpl.h` and table-building in `inftrees.c`.
+
+The engine processes input byte-by-byte through a 64-bit bit accumulator
+(`hold`), transitioning between states as it parses headers, decodes Huffman
+codes, copies back-referenced data, and verifies integrity checksums.
+
+---
+
+## State Machine
+
+The `inflate_mode` enum defines all possible states:
+
+```c
+typedef enum {
+ HEAD = 16180, // Waiting for magic header
+ FLAGS, // Waiting for method and flags (gzip)
+ TIME, // Waiting for modification time (gzip)
+ OS, // Waiting for extra flags and OS (gzip)
+ EXLEN, // Waiting for extra length (gzip)
+ EXTRA, // Waiting for extra bytes (gzip)
+ NAME, // Waiting for end of file name (gzip)
+ COMMENT, // Waiting for end of comment (gzip)
+ HCRC, // Waiting for header CRC (gzip)
+ DICTID, // Waiting for dictionary check value
+ DICT, // Waiting for inflateSetDictionary() call
+ TYPE, // Waiting for type bits (including last-flag bit)
+ TYPEDO, // Same as TYPE but skip check to exit on new block
+ STORED, // Waiting for stored size (length and complement)
+ COPY_, // Waiting for input or output to copy stored block (first time)
+ COPY, // Waiting for input or output to copy stored block
+ TABLE, // Waiting for dynamic block table lengths
+ LENLENS, // Waiting for code length code lengths
+ CODELENS, // Waiting for length/lit and distance code lengths
+ LEN_, // Waiting for length/lit/eob code (first time)
+ LEN, // Waiting for length/lit/eob code
+ LENEXT, // Waiting for length extra bits
+ DIST, // Waiting for distance code
+ DISTEXT, // Waiting for distance extra bits
+ MATCH, // Waiting for output space to copy string
+ LIT, // Waiting for output space to write literal
+ CHECK, // Waiting for 32-bit check value
+ LENGTH, // Waiting for 32-bit length (gzip)
+ DONE, // Finished check, remain here until reset
+ BAD, // Got a data error, remain here until reset
+ SYNC // Looking for synchronization bytes
+} inflate_mode;
+```
+
+The starting value `HEAD = 16180` (a non-zero, distinctive value) helps catch
+uninitialised state errors.
+
+---
+
+## State Transitions
+
+### Header Processing
+
+```
+HEAD ─┬─ (gzip header detected) ──▶ FLAGS ─▶ TIME ─▶ OS ─▶ EXLEN ─▶ EXTRA
+ │ ─▶ NAME ─▶ COMMENT ─▶ HCRC ─▶ TYPE
+ │
+ ├─ (zlib header detected) ──▶ DICTID ─▶ DICT ─▶ TYPE
+ │ or
+ │ ──▶ TYPE (no dictionary)
+ │
+ └─ (raw deflate) ──────────▶ TYPEDO
+```
+
+### Block Processing
+
+```
+TYPE ──▶ TYPEDO ─┬─ (stored block) ──▶ STORED ─▶ COPY_ ─▶ COPY ──▶ TYPE
+ │
+ ├─ (dynamic block) ──▶ TABLE ─▶ LENLENS ─▶ CODELENS ─▶ LEN_
+ │
+ ├─ (fixed block) ──▶ LEN_
+ │
+ └─ (last block) ──▶ CHECK
+```
+
+### Data Decoding
+
+```
+LEN_ ──▶ LEN ─┬─ (literal) ──▶ LIT ──▶ LEN
+ │
+ ├─ (length code) ──▶ LENEXT ──▶ DIST ──▶ DISTEXT ──▶ MATCH ──▶ LEN
+ │
+ └─ (end-of-block) ──▶ TYPE (or CHECK if last block)
+```
+
+### Trailer Processing
+
+```
+CHECK ──▶ LENGTH (gzip only) ──▶ DONE
+```
+
+---
+
+## The `inflate_state` Structure
+
+```c
+struct ALIGNED_(64) inflate_state {
+ // Stream back-pointer
+ PREFIX3(stream) *strm;
+
+ // State machine
+ inflate_mode mode; // Current state
+ int last; // true if processing last block
+ int wrap; // bit 0: zlib, bit 1: gzip, bit 2: validate check
+ int havedict; // Dictionary provided?
+ int flags; // gzip header flags (-1 = no header yet, 0 = zlib)
+
+ // Integrity
+ unsigned was; // Initial match length for inflateMark
+ unsigned long check; // Running checksum (Adler-32 or CRC-32)
+ unsigned long total; // Running output byte count
+ PREFIX(gz_headerp) head;// Where to save gzip header info
+
+ // Sliding window
+ unsigned wbits; // log2(requested window size)
+ uint32_t wsize; // Window size (or 0 if not allocated)
+ uint32_t wbufsize; // Real allocated window size including padding
+ uint32_t whave; // Valid bytes in window
+ uint32_t wnext; // Window write index
+ unsigned char *window; // Sliding window buffer
+
+ // Bit accumulator
+ uint64_t hold; // 64-bit input bit accumulator
+ unsigned bits; // Bits currently held
+
+ // Code tables
+ unsigned lenbits; // Root bits for length code table
+ code const *lencode; // Length/literal code table pointer
+ code const *distcode; // Distance code table pointer
+ unsigned distbits; // Root bits for distance code table
+
+ // Current match state
+ uint32_t length; // Literal value or copy length
+ unsigned offset; // Copy distance
+ unsigned extra; // Extra bits to read
+
+ // Dynamic table building
+ unsigned ncode; // Code length code lengths count
+ unsigned nlen; // Length code count
+ unsigned ndist; // Distance code count
+ uint32_t have; // Code lengths decoded so far
+ code *next; // Next free slot in codes[]
+
+ // Working storage
+ uint16_t lens[320]; // Code lengths (max 286 + 30 + padding)
+ uint16_t work[288]; // Work area for inflate_table
+ code codes[ENOUGH]; // Code tables (ENOUGH = 1924 entries)
+
+ inflate_allocs *alloc_bufs;
+};
+```
+
+### Key Dimensions
+
+| Constant | Value | Purpose |
+|---|---|---|
+| `ENOUGH` | 1924 | Maximum total code table entries |
+| `ENOUGH_LENS` | 1332 | Maximum literal/length table entries |
+| `ENOUGH_DISTS` | 592 | Maximum distance table entries |
+| `MAX_WBITS` | 15 | Maximum window bits (32KB window) |
+| `MIN_WBITS` | 8 | Minimum window bits (256-byte window) |
+
+---
+
+## Bit Accumulator
+
+The inflate engine uses a 64-bit bit buffer for efficient bit extraction:
+
+```c
+uint64_t hold; // Accumulated bits (LSB first)
+unsigned bits; // Number of valid bits in hold
+```
+
+Bits are read from `hold` from the least significant end:
+
+```c
+// Load bits from input
+#define PULLBYTE() do { \
+ hold += (uint64_t)(*next++) << bits; \
+ bits += 8; \
+} while (0)
+
+// Ensure at least n bits are available
+#define NEEDBITS(n) do { \
+ while (bits < (unsigned)(n)) \
+ PULLBYTE(); \
+} while (0)
+
+// Read n bits from hold
+#define BITS(n) ((unsigned)hold & ((1U << (n)) - 1))
+
+// Drop n bits from hold
+#define DROPBITS(n) do { \
+ hold >>= (n); \
+ bits -= (unsigned)(n); \
+} while (0)
+```
+
+The 64-bit width allows accumulating up to 8 bytes before overflow, reducing
+the frequency of input reads.
+
+---
+
+## Huffman Code Decoding
+
+### The `code` Structure
+
+Each entry in a decoding table is a `code` structure:
+
+```c
+typedef struct {
+ unsigned char bits; // Bits to consume from input
+ unsigned char op; // Operation type + extra bits
+ uint16_t val; // Output value or table offset
+} code;
+```
+
+The `op` field encodes the meaning:
+
+| `op` Value | Meaning |
+|---|---|
+| `0x00` | Literal: `val` is the literal byte |
+| `0x0t` (t ≠ 0) | Table link: `t` is the number of additional index bits |
+| `0x1e` | Length or distance: `e` extra bits, `val` is the base value |
+| `0x60` | End of block |
+| `0x40` | Invalid code |
+
+### Table Building via `zng_inflate_table()`
+
+`inftrees.c` builds two-level Huffman decoding tables:
+
+```c
+int zng_inflate_table(codetype type, uint16_t *lens, unsigned codes,
+ code **table, unsigned *bits, uint16_t *work);
+```
+
+Parameters:
+- `type` — `CODES` (code lengths), `LENS` (literal/length), or `DISTS` (distances)
+- `lens` — Array of code lengths for each symbol
+- `codes` — Number of symbols
+- `table` — Output: pointer to the root table
+- `bits` — Input: requested root table bits; Output: actual root table bits
+- `work` — Scratch space
+
+The algorithm:
+1. Count code lengths → `count[]`
+2. Compute offsets for sorting → `offs[]`
+3. Sort symbols by code length → `work[]`
+4. Fill the primary table (root bits = `*bits`)
+5. For codes longer than root bits, create sub-tables
+
+Root table sizes:
+- Literal/length: 10 bits (1024 entries)
+- Distance: 9 bits (512 entries)
+
+Sub-tables handle codes longer than the root bits, linked from the primary
+table via the `op` field.
+
+### Fixed Huffman Tables
+
+For fixed-code blocks (BTYPE=01), predefined tables are stored in
+`inffixed_tbl.h`. These are computed once and reused.
+
+The `fixedtables()` function in `inflate.c` sets up the fixed tables:
+```c
+state->lencode = lenfix; // Predefined literal/length table
+state->lenbits = 9; // Root bits for fixed literal/length codes
+state->distcode = distfix; // Predefined distance table
+state->distbits = 5; // Root bits for fixed distance codes
+```
+
+---
+
+## The Inflate Loop
+
+The main `inflate()` function is a single large `for` loop with a `switch`
+on `state->mode`:
+
+```c
+int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) {
+ // Input/output tracking
+ unsigned char *put = strm->next_out;
+ unsigned char *next = strm->next_in;
+ uint64_t hold = state->hold;
+ unsigned bits = state->bits;
+
+ for (;;) switch (state->mode) {
+ case HEAD:
+ // Detect zlib/gzip/raw header
+ NEEDBITS(16);
+ if (state->wrap == 0) {
+ // Raw deflate
+ state->mode = TYPEDO;
+ break;
+ }
+ if ((hold & 0xff) == 0x1f && ((hold >> 8) & 0xff) == 0x8b) {
+ // gzip header
+ state->mode = FLAGS;
+ } else {
+ // zlib header (CMF + FLG)
+ state->mode = DICTID; // or TYPE
+ }
+ break;
+
+ case TYPE:
+ // Read block header
+ NEEDBITS(3);
+ state->last = BITS(1);
+ DROPBITS(1);
+ switch (BITS(2)) {
+ case 0: state->mode = STORED; break; // Stored block
+ case 1: fixedtables(state); // Fixed Huffman
+ state->mode = LEN_; break;
+ case 2: state->mode = TABLE; break; // Dynamic Huffman
+ case 3: state->mode = BAD; break; // Reserved (error)
+ }
+ DROPBITS(2);
+ break;
+
+ case LEN:
+ // Decode literal/length code
+ here = state->lencode[BITS(state->lenbits)];
+ if (here.op == 0) {
+ // Literal
+ *put++ = (unsigned char)(here.val);
+ DROPBITS(here.bits);
+ state->mode = LEN;
+ } else if (here.op & 16) {
+ // Length code
+ state->length = here.val;
+ state->extra = here.op & 15;
+ state->mode = LENEXT;
+ } else if (here.op == 96) {
+ // End of block
+ state->mode = TYPE;
+ }
+ break;
+
+ case DIST:
+ // Decode distance code
+ here = state->distcode[BITS(state->distbits)];
+ // ... similar pattern ...
+ break;
+
+ case MATCH:
+ // Copy from window
+ // Copy state->length bytes from position (out - state->offset)
+ // Uses chunkmemset_safe() for SIMD-accelerated copying
+ break;
+
+ case CHECK:
+ // Verify checksum
+ // Compare computed check with stored value
+ break;
+
+ case DONE:
+ ret = Z_STREAM_END;
+ goto inf_leave;
+ }
+
+inf_leave:
+ // Save state
+ state->hold = hold;
+ state->bits = bits;
+ return ret;
+}
+```
+
+---
+
+## Fast Inflate Path
+
+When sufficient input and output are available, `inflate()` calls the fast
+inner loop:
+
+```c
+case LEN_:
+ // Check if we can use the fast path
+ if (have >= 6 && left >= 258) {
+ FUNCTABLE_CALL(inflate_fast)(strm, out);
+ // Reload local state
+ state->mode = LEN;
+ break;
+ }
+ state->mode = LEN;
+ break;
+```
+
+`inflate_fast()` (defined via `inffast_tpl.h`) processes codes without
+returning to the main switch loop:
+
+1. Pre-load bits into the 64-bit accumulator
+2. Loop while input ≥ 6 bytes and output ≥ 258 bytes:
+ - Decode literal/length from `lencode`
+ - If literal: output directly
+ - If length: decode distance from `distcode`, copy from window
+ - If EOB: exit loop
+3. Handle sub-table traversal for codes longer than root bits
+
+SIMD variants (SSE2, AVX2, AVX-512, NEON) accelerate the copy operation
+via `chunkmemset_safe()`, which can copy overlapping regions efficiently
+using vector loads/stores.
+
+---
+
+## Window Management
+
+The inflate sliding window stores recent output for back-reference copying:
+
+```c
+unsigned char *window; // Circular buffer
+uint32_t wsize; // Window size
+uint32_t whave; // Valid bytes in window
+uint32_t wnext; // Write position (circular)
+```
+
+### `updatewindow()`
+
+Called after each inflate pass to copy decompressed output into the window:
+
+```c
+static void updatewindow(PREFIX3(stream) *strm, const uint8_t *end,
+ uint32_t len, int32_t cksum) {
+ struct inflate_state *state = (struct inflate_state *)strm->state;
+
+ // First time: set up window size
+ if (state->wsize == 0) {
+ state->wsize = 1U << state->wbits;
+ state->wnext = 0;
+ state->whave = 0;
+ }
+
+ // Copy output to window (handles wraparound)
+ if (len >= state->wsize) {
+ // Copy last wsize bytes
+ memcpy(state->window, end - state->wsize, state->wsize);
+ state->wnext = 0;
+ state->whave = state->wsize;
+ } else {
+ // Copy len bytes, wrapping around if necessary
+ uint32_t dist = MIN(state->wsize - state->wnext, len);
+ memcpy(state->window + state->wnext, end - len, dist);
+ len -= dist;
+ if (len) {
+ memcpy(state->window, end - len, len);
+ state->wnext = len;
+ state->whave = state->wsize;
+ } else {
+ state->wnext += dist;
+ if (state->wnext == state->wsize) state->wnext = 0;
+ if (state->whave < state->wsize) state->whave += dist;
+ }
+ }
+}
+```
+
+### Back-Reference Copying
+
+When a length/distance pair is decoded, the engine copies `length` bytes
+from position `(output_position - distance)`:
+
+```c
+case MATCH:
+ // Copy from window
+ from = put - state->offset;
+ if (state->offset > (put - state->window)) {
+ // Source is in the circular window buffer
+ // Handle wrap-around via window[]
+ }
+ // Copy length bytes to put, potentially overlapping
+ FUNCTABLE_CALL(chunkmemset_safe)(put, from, state->length, left);
+```
+
+The `chunkmemset_safe()` function handles the case where `distance < length`
+(overlapping copy), which occurs in runs of repeated patterns. SIMD
+implementations can vectorise even overlapping copies by replicating the
+pattern.
+
+---
+
+## Checksum Handling
+
+During decompression, a running checksum is computed:
+
+- **zlib format**: Adler-32 of the uncompressed data
+- **gzip format**: CRC-32 of the uncompressed data
+
+The selection is based on `state->flags`:
+```c
+static inline void inf_chksum_cpy(PREFIX3(stream) *strm, uint8_t *dst,
+ const uint8_t *src, uint32_t copy) {
+ struct inflate_state *state = (struct inflate_state *)strm->state;
+ if (state->flags)
+ strm->adler = state->check = FUNCTABLE_CALL(crc32_copy)(state->check, dst, src, copy);
+ else
+ strm->adler = state->check = FUNCTABLE_CALL(adler32_copy)(state->check, dst, src, copy);
+}
+```
+
+The `_copy` variants compute the checksum while simultaneously copying data,
+avoiding a second pass over the output.
+
+After all blocks are decompressed, the stored checksum is read and compared:
+
+```c
+case CHECK:
+ NEEDBITS(32);
+ if (ZSWAP32((unsigned long)hold) != state->check) {
+ strm->msg = "incorrect data check";
+ state->mode = BAD;
+ break;
+ }
+ state->mode = LENGTH; // gzip: also check ISIZE
+ break;
+```
+
+---
+
+## Error Handling
+
+The inflate engine detects and reports several error conditions:
+
+| Error | Detection | Recovery |
+|---|---|---|
+| Invalid block type (11) | `TYPE` state | `state->mode = BAD` |
+| Invalid stored block length | `STORED` state: `LEN != ~NLEN` | `BAD` |
+| Invalid code length repeat | `CODELENS` state | `BAD` |
+| Invalid literal/length code | Decoding | `BAD` |
+| Invalid distance code | Decoding | `BAD` |
+| Distance too far back | `MATCH` state | `BAD` (or zero-fill if `INFLATE_ALLOW_INVALID_DIST`) |
+| Checksum mismatch | `CHECK` state | `Z_DATA_ERROR` |
+| Header version mismatch | `HEAD` state | `Z_STREAM_ERROR` |
+
+The `inflateSync()` function searches for a sync point (four bytes `00 00 FF FF`)
+to recover from data corruption. `inflateMark()` reports the current
+decompression position for partial recovery.
+
+---
+
+## Inflate API Functions
+
+### Core Functions
+
+```c
+int inflateInit(z_stream *strm);
+int inflateInit2(z_stream *strm, int windowBits);
+int inflate(z_stream *strm, int flush);
+int inflateEnd(z_stream *strm);
+```
+
+### Window Bits Parameter
+
+The `windowBits` parameter to `inflateInit2()` controls both the window size
+and the stream format:
+
+| windowBits | Format | Window Size |
+|---|---|---|
+| 8..15 | zlib (auto-detect dictionary) | 2^windowBits |
+| -8..-15 | Raw deflate (no wrapper) | 2^|windowBits| |
+| 24..31 (8..15 + 16) | gzip only | 2^(windowBits-16) |
+| 40..47 (8..15 + 32) | Auto-detect zlib or gzip | 2^(windowBits-32) |
+
+### Reset Functions
+
+```c
+int inflateReset(z_stream *strm); // Full reset
+int inflateResetKeep(z_stream *strm); // Reset but keep window
+int inflateReset2(z_stream *strm, int windowBits); // Reset with new windowBits
+```
+
+### Dictionary Support
+
+```c
+int inflateSetDictionary(z_stream *strm, const unsigned char *dictionary, unsigned dictLength);
+int inflateGetDictionary(z_stream *strm, unsigned char *dictionary, unsigned *dictLength);
+```
+
+When the zlib header indicates a preset dictionary (`FDICT` flag), `inflate()`
+returns `Z_NEED_DICT`. The application must then call `inflateSetDictionary()`
+with the correct dictionary before continuing.
+
+### Sync and Recovery
+
+```c
+int inflateSync(z_stream *strm); // Search for sync point
+long inflateMark(z_stream *strm); // Report progress
+int inflatePrime(z_stream *strm, int bits, int value); // Prime bit buffer
+```
+
+### Header Access
+
+```c
+int inflateGetHeader(z_stream *strm, gz_header *head); // Get gzip header info
+```
+
+### Copy
+
+```c
+int inflateCopy(z_stream *dest, z_stream *source); // Deep copy
+```
+
+---
+
+## Memory Allocation
+
+Inflate uses a single-allocation strategy via `alloc_inflate()`:
+
+```c
+inflate_allocs* alloc_inflate(PREFIX3(stream) *strm) {
+ int window_size = INFLATE_ADJUST_WINDOW_SIZE((1 << MAX_WBITS) + 64);
+ int state_size = sizeof(inflate_state);
+ int alloc_size = sizeof(inflate_allocs);
+
+ // Calculate positions with alignment padding
+ int window_pos = PAD_WINDOW(0);
+ int state_pos = PAD_64(window_pos + window_size);
+ int alloc_pos = PAD_16(state_pos + state_size);
+ int total_size = PAD_64(alloc_pos + alloc_size + (WINDOW_PAD_SIZE - 1));
+
+ char *buf = strm->zalloc(strm->opaque, 1, total_size);
+ // Partition buf into window, state, alloc_bufs
+ return alloc_bufs;
+}
+```
+
+A single `zfree()` call releases everything:
+```c
+void free_inflate(PREFIX3(stream) *strm) {
+ inflate_allocs *alloc_bufs = state->alloc_bufs;
+ alloc_bufs->zfree(strm->opaque, alloc_bufs->buf_start);
+ strm->state = NULL;
+}
+```
+
+---
+
+## `infback.c` — Callback-Based Inflate
+
+An alternative inflate interface where the application provides input and
+output callbacks:
+
+```c
+int inflateBack(z_stream *strm,
+ in_func in, void *in_desc,
+ out_func out, void *out_desc);
+```
+
+The caller provides:
+- `in(in_desc, &buf)` — Returns available input and sets `buf`
+- `out(out_desc, buf, len)` — Consumes `len` bytes of output from `buf`
+
+This avoids buffer management complexity for applications that can provide
+data on demand.
diff --git a/docs/handbook/neozip/overview.md b/docs/handbook/neozip/overview.md
new file mode 100644
index 0000000000..acf32a427e
--- /dev/null
+++ b/docs/handbook/neozip/overview.md
@@ -0,0 +1,509 @@
+# Neozip Overview
+
+## What Is Neozip?
+
+Neozip is Project Tick's fork of **zlib-ng**, which is itself a modernized,
+performance-oriented fork of the venerable zlib compression library. Neozip
+provides a drop-in replacement for zlib with significantly improved throughput
+on modern hardware while retaining full API and format compatibility with the
+original zlib 1.3.1 specification.
+
+The library implements the **DEFLATE** compressed data format (RFC 1951),
+wrapped in either the **zlib** container (RFC 1950) or the **gzip** container
+(RFC 1952). It also exposes raw deflate streams without any wrapper.
+
+Neozip tracks upstream zlib-ng closely. At the time of writing, the embedded
+version strings are:
+
+```c
+#define ZLIBNG_VERSION "2.3.90"
+#define ZLIB_VERSION "1.3.1.zlib-ng"
+```
+
+---
+
+## Why Neozip Exists
+
+The original zlib library was written in the early 1990s when CPUs had very
+different performance characteristics. While zlib is extremely portable and
+well-tested, it leaves significant performance on the table on modern
+processors because:
+
+1. **No SIMD utilisation** — zlib's inner loops (match finding, checksumming,
+ sliding the hash window) are scalar C targeting 32-bit architectures.
+2. **Conservative data structures** — hash chain lengths, buffer sizes, and
+ alignment are tuned for machines with tiny caches.
+3. **No runtime CPU feature detection** — the same compiled binary cannot
+ select between SSE2 and AVX-512 code paths at runtime.
+
+Neozip (via zlib-ng) addresses every one of these issues while maintaining
+byte-for-byte compatible output with zlib for any given set of compression
+parameters (when the `ZLIB_COMPAT` build option is enabled).
+
+---
+
+## Feature List
+
+### Core Compression and Decompression
+
+| Feature | Description |
+|---|---|
+| DEFLATE compression (RFC 1951) | Full implementation of LZ77 + Huffman coding |
+| DEFLATE decompression | State-machine inflater with optimised fast paths |
+| zlib wrapper (RFC 1950) | Adler-32 integrity, two-byte header |
+| gzip wrapper (RFC 1952) | CRC-32 integrity, file metadata header |
+| Raw deflate | No wrapper, caller handles integrity |
+| Compression levels 0–9 | From stored (level 0) through maximum compression (level 9) |
+| Multiple strategies | `Z_DEFAULT_STRATEGY`, `Z_FILTERED`, `Z_HUFFMAN_ONLY`, `Z_RLE`, `Z_FIXED` |
+| Streaming API | Process data in arbitrarily-sized chunks via `deflate()` / `inflate()` |
+| One-shot API | `compress()` / `uncompress()` for simple in-memory use |
+| gzip file I/O | `gzopen()`, `gzread()`, `gzwrite()`, `gzprintf()`, etc. |
+| Dictionary support | Pre-seed compression / decompression with a shared dictionary |
+
+### Performance Optimisations
+
+| Optimisation | Details |
+|---|---|
+| Runtime CPU detection | `cpu_features.c` queries CPUID (x86), `/proc/cpuinfo` (ARM), etc. |
+| Function dispatch table | `functable.c` selects the best implementation for each hot function |
+| x86 SSE2 | `slide_hash`, `compare256`, `chunkset`, `inflate_fast`, CRC-32 Chorba |
+| x86 SSSE3 | `adler32`, `chunkset`, `inflate_fast` |
+| x86 SSE4.1 | CRC-32 Chorba SSE4.1 variant |
+| x86 SSE4.2 | `adler32_copy` |
+| x86 PCLMULQDQ | Carryless-multiply CRC-32 |
+| x86 AVX2 | `adler32`, `compare256`, `chunkset`, `slide_hash`, `inflate_fast`, `longest_match` |
+| x86 AVX-512 | `adler32`, `compare256`, `chunkset`, `inflate_fast`, `longest_match` |
+| x86 AVX-512 VNNI | `adler32` using VPDPBUSD |
+| x86 VPCLMULQDQ | Vectorised CRC-32 with AVX2 and AVX-512 widths |
+| ARM NEON | `adler32`, `compare256`, `chunkset`, `slide_hash`, `inflate_fast`, `longest_match` |
+| ARM CRC32 extension | Hardware CRC-32 instructions |
+| ARM PMULL+EOR3 | Polynomial multiply CRC-32 with SHA3 three-way XOR |
+| ARMv6 SIMD | `slide_hash` for 32-bit ARM |
+| PowerPC VMX/VSX | `adler32`, `slide_hash`, `chunkset`, `inflate_fast` |
+| POWER8/9 | Optimised Adler-32, CRC-32, compare256 |
+| RISC-V RVV | Vector extensions for core loops |
+| RISC-V Zbc | Bit-manipulation CRC-32 |
+| IBM z/Architecture DFLTCC | Hardware deflate/inflate in a single instruction |
+| LoongArch LSX/LASX | SIMD for CRC-32 and general loops |
+
+### Algorithmic Improvements
+
+| Improvement | Details |
+|---|---|
+| Quick deflate (level 1) | Intel-designed single-pass strategy (`deflate_quick.c`) |
+| Medium deflate (levels 3-6) | Intel-designed strategy bridging fast and slow (`deflate_medium.c`) |
+| Chorba CRC-32 | Modern CRC algorithm by Kadatch & Jenkins with braided and SIMD variants |
+| 64-bit bit buffer | `bi_buf` is `uint64_t` instead of `unsigned long`, reducing flush frequency |
+| Unified memory allocation | Single `zalloc` call for all deflate/inflate buffers, cache-line aligned |
+| LIT_MEM mode | Separate distance/length buffers for platforms without fast unaligned access |
+| Rolling hash for level 9 | `insert_string_roll` for better match quality at maximum compression |
+
+### Build System
+
+| Feature | Details |
+|---|---|
+| CMake (≥ 3.14) | Primary build system with extensive option detection |
+| C11 standard | Default; C99, C17, C23 also supported |
+| zlib-compat mode | `ZLIB_COMPAT=ON` produces a drop-in `libz` replacement |
+| Native mode | `ZLIB_COMPAT=OFF` produces `libz-ng` with `zng_` prefixed API |
+| Static and shared libraries | Both targets generated |
+| Google Test suite | Comprehensive C++ test suite under `test/` |
+| Fuzz targets | Under `test/fuzz/` for OSS-Fuzz integration |
+| Benchmark suite | Google Benchmark harnesses under `test/benchmarks/` |
+| Sanitizer support | ASan, MSan, TSan, UBSan integration via `WITH_SANITIZER` |
+| Code coverage | `WITH_CODE_COVERAGE` for lcov/gcov |
+
+---
+
+## Repository Structure
+
+The neozip source tree is organised as follows:
+
+```
+neozip/
+├── CMakeLists.txt # Top-level build configuration
+├── deflate.c / deflate.h # Core compression engine
+├── deflate_fast.c # Level 1-2 (or 2-3) fast strategy
+├── deflate_medium.c # Level 3-6 medium strategy (Intel)
+├── deflate_slow.c # Level 7-9 lazy/slow strategy
+├── deflate_quick.c # Level 1 quick strategy (Intel)
+├── deflate_stored.c # Level 0 stored (no compression)
+├── deflate_huff.c # Huffman-only strategy
+├── deflate_rle.c # Run-length encoding strategy
+├── deflate_p.h # Private deflate inline helpers
+├── inflate.c / inflate.h # Decompression state machine
+├── inflate_p.h # Private inflate inline helpers
+├── infback.c # Inflate with caller-provided window
+├── inftrees.c / inftrees.h # Huffman code table builder for inflate
+├── inffast_tpl.h # Fast inflate inner loop template
+├── inffixed_tbl.h # Fixed Huffman tables for inflate
+├── trees.c / trees.h # Huffman tree construction for deflate
+├── trees_emit.h # Bit emission macros for tree output
+├── trees_tbl.h # Static Huffman tree tables
+├── adler32.c # Adler-32 checksum entry points
+├── adler32_p.h # Scalar Adler-32 implementation
+├── crc32.c # CRC-32 checksum entry points
+├── crc32_braid_p.h # Braided CRC-32 configuration
+├── crc32_braid_comb.c # CRC-32 combine logic
+├── crc32_chorba_p.h # Chorba CRC-32 algorithm
+├── compress.c # One-shot compress()
+├── uncompr.c # One-shot uncompress()
+├── gzlib.c # gzip file I/O common code
+├── gzread.c # gzip file reading
+├── gzwrite.c # gzip file writing
+├── gzguts.h # gzip internal definitions
+├── zlib.h.in # Public API header (zlib-compat mode)
+├── zlib-ng.h.in # Public API header (native mode)
+├── zbuild.h # Build-system defines, compiler abstraction
+├── zutil.h / zutil.c # Internal utility functions
+├── zutil_p.h # Private utility helpers
+├── zendian.h # Endianness detection and byte-swap macros
+├── zmemory.h # Aligned memory read/write helpers
+├── zarch.h # Architecture detection macros
+├── cpu_features.c / .h # Runtime CPU feature detection dispatch
+├── functable.c / .h # Runtime function pointer dispatch table
+├── arch_functions.h # Architecture-specific function declarations
+├── arch_natives.h # Native (compile-time) arch selection
+├── insert_string.c # Hash table insert implementations
+├── insert_string_p.h # Private insert_string helpers
+├── insert_string_tpl.h # Insert string template macros
+├── match_tpl.h # Longest-match template (compare256 based)
+├── chunkset_tpl.h # Chunk memory-set template
+├── compare256_rle.h # RLE-optimised compare256
+├── arch/ # Architecture-specific SIMD implementations
+│ ├── generic/ # Portable C fallbacks
+│ ├── x86/ # SSE2, SSSE3, SSE4, AVX2, AVX-512, PCLMULQDQ
+│ ├── arm/ # NEON, CRC32 extension, PMULL
+│ ├── power/ # VMX, VSX, POWER8, POWER9
+│ ├── s390/ # IBM z DFLTCC
+│ ├── riscv/ # RVV, Zbc
+│ └── loongarch/ # LSX, LASX
+├── test/ # GTest test suite, fuzz targets, benchmarks
+├── cmake/ # CMake modules (intrinsic detection, etc.)
+├── doc/ # Upstream documentation
+├── tools/ # Utility scripts
+└── win32/ # Windows-specific files
+```
+
+---
+
+## Data Formats
+
+Neozip processes three container formats, all built on top of the same DEFLATE
+compressed data representation:
+
+### Raw Deflate (RFC 1951)
+
+A sequence of DEFLATE blocks with no framing. The caller is responsible for
+any integrity checking. Selected by passing a negative `windowBits` value
+(e.g., `-15`) to `deflateInit2()` / `inflateInit2()`.
+
+### zlib Format (RFC 1950)
+
+```
++---+---+ +---+---+---+---+
+| CMF|FLG| | DATA | ... +---+---+---+---+
++---+---+ +---+---+---+---+ | ADLER-32 |
+ +---+---+---+---+
+```
+
+- **CMF** (Compression Method and Flags): method = 8 (deflate), window size
+- **FLG**: check bits, optional preset dictionary flag (`FDICT`)
+- **DATA**: raw deflate blocks
+- **ADLER-32**: checksum of uncompressed data (big-endian)
+
+Overhead: 6 bytes (`ZLIB_WRAPLEN`).
+
+### gzip Format (RFC 1952)
+
+```
++---+---+---+---+---+---+---+---+---+---+ +-------+ +---+---+---+---+---+---+---+---+
+|ID1|ID2| CM|FLG| MTIME |XFL| OS | | DATA | | CRC-32 | ISIZE |
++---+---+---+---+---+---+---+---+---+---+ +-------+ +---+---+---+---+---+---+---+---+
+```
+
+- **ID1, ID2**: Magic bytes `0x1f`, `0x8b`
+- **CM**: Compression method (8 = deflate)
+- **FLG**: Flags for text, CRC, extra, name, comment
+- **MTIME**: Modification time (Unix epoch)
+- **XFL**: Extra flags (2 = best compression, 4 = fastest)
+- **OS**: Operating system code
+- **DATA**: Raw deflate blocks
+- **CRC-32**: CRC of uncompressed data
+- **ISIZE**: Uncompressed size mod 2^32
+
+Overhead: 18 bytes (`GZIP_WRAPLEN`).
+
+---
+
+## Compilation Modes
+
+### zlib-Compatible Mode (`ZLIB_COMPAT=ON`)
+
+When built with `-DZLIB_COMPAT=ON`:
+
+- The library is named `libz` (no suffix).
+- All public symbols use standard zlib names: `deflateInit`, `inflate`, `crc32`, etc.
+- The `z_stream` structure uses `unsigned long` for `total_in` / `total_out`.
+- Header file is `zlib.h`.
+- Symbol prefix macro `PREFIX()` expands to `z_` (via mangling headers).
+- The `ZLIB_COMPAT` preprocessor macro is defined.
+- gzip file operations (`WITH_GZFILEOP`) are forced on.
+
+This is the mode to use when neozip must be a transparent drop-in replacement
+for system zlib.
+
+### Native Mode (`ZLIB_COMPAT=OFF`)
+
+When built with `-DZLIB_COMPAT=OFF` (the default):
+
+- The library is named `libz-ng`.
+- All public symbols use `zng_` prefixed names: `zng_deflateInit`, `zng_inflate`, etc.
+- The `zng_stream` structure uses fixed-width types (`uint32_t`).
+- Header file is `zlib-ng.h`.
+- Symbol prefix macro `PREFIX()` expands to `zng_`.
+- No `ZLIB_COMPAT` macro is defined.
+
+Native mode is recommended for new code. Types are cleaner and there is no
+ambiguity about which zlib implementation is in use.
+
+---
+
+## Compression Levels and Strategies
+
+### Compression Levels
+
+Neozip maps each compression level (0–9) to a specific **strategy function**
+and a set of tuning parameters defined in the `configuration_table` in
+`deflate.c`:
+
+```c
+static const config configuration_table[10] = {
+/* good lazy nice chain */
+/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
+/* 1 */ {0, 0, 0, 0, deflate_quick}, /* quick strategy */
+/* 2 */ {4, 4, 8, 4, deflate_fast},
+/* 3 */ {4, 6, 16, 6, deflate_medium},
+/* 4 */ {4, 12, 32, 24, deflate_medium},
+/* 5 */ {8, 16, 32, 32, deflate_medium},
+/* 6 */ {8, 16, 128, 128, deflate_medium},
+/* 7 */ {8, 32, 128, 256, deflate_slow},
+/* 8 */ {32, 128, 258, 1024, deflate_slow},
+/* 9 */ {32, 258, 258, 4096, deflate_slow},
+};
+```
+
+The `config` fields are:
+- **good_length** — reduce lazy search above this match length
+- **max_lazy** — do not perform lazy search above this match length
+- **nice_length** — quit search above this match length
+- **max_chain** — maximum hash chain length to traverse
+- **func** — pointer to the strategy function
+
+### Strategy Functions
+
+| Strategy | Levels | Source File | Description |
+|---|---|---|---|
+| `deflate_stored` | 0 | `deflate_stored.c` | No compression; copies input as stored blocks |
+| `deflate_quick` | 1 | `deflate_quick.c` | Fastest compression; static Huffman, minimal match search |
+| `deflate_fast` | 2 (or 1–3 without quick) | `deflate_fast.c` | Greedy matching, no lazy evaluation |
+| `deflate_medium` | 3–6 | `deflate_medium.c` | Balanced: limited lazy evaluation, match merging |
+| `deflate_slow` | 7–9 | `deflate_slow.c` | Full lazy evaluation, deepest hash chain search |
+| `deflate_huff` | (Z_HUFFMAN_ONLY) | `deflate_huff.c` | Huffman-only, no LZ77 matching |
+| `deflate_rle` | (Z_RLE) | `deflate_rle.c` | Run-length encoding, distance always 1 |
+
+### Explicit Strategies
+
+The `strategy` parameter to `deflateInit2()` can override the default level-based
+selection:
+
+- **`Z_DEFAULT_STRATEGY` (0)** — Normal deflate; level selects the function.
+- **`Z_FILTERED` (1)** — Optimised for data produced by a filter (e.g., PNG predictors).
+ Uses `deflate_slow` with short match rejection.
+- **`Z_HUFFMAN_ONLY` (2)** — No LZ77; every byte is a literal.
+- **`Z_RLE` (3)** — Only find runs of identical bytes (distance = 1).
+- **`Z_FIXED` (4)** — Use fixed Huffman codes instead of dynamic trees.
+
+---
+
+## Memory Layout
+
+Neozip uses a **single-allocation** strategy for both deflate and inflate
+states. The function `alloc_deflate()` in `deflate.c` computes the total
+buffer size required and calls `zalloc` exactly once, then partitions the
+returned memory into:
+
+1. **Window buffer** — Aligned to `WINDOW_PAD_SIZE` (64 or 4096 bytes depending
+ on architecture). Size: `2 * (1 << windowBits)`.
+2. **prev array** — `Pos` (uint16_t) array of size `1 << windowBits`. Aligned to 64 bytes.
+3. **head array** — `Pos` array of size `HASH_SIZE` (65536). Aligned to 64 bytes.
+4. **pending_buf** — Output bit buffer of size `lit_bufsize * LIT_BUFS + 1`. Aligned to 64 bytes.
+5. **deflate_state** — The `internal_state` struct itself. Aligned to 64 bytes
+ (cache-line aligned via `ALIGNED_(64)`).
+6. **deflate_allocs** — Book-keeping struct tracking the original allocation pointer.
+
+The `inflate_state` uses an analogous scheme via `alloc_inflate()`:
+
+1. **Window buffer** — `(1 << MAX_WBITS) + 64` bytes with `WINDOW_PAD_SIZE` alignment.
+2. **inflate_state** — The state struct, 64-byte aligned.
+3. **inflate_allocs** — Book-keeping.
+
+This approach minimises the number of `malloc` calls, improves cache locality,
+and simplifies cleanup (a single `zfree` releases everything).
+
+---
+
+## Thread Safety
+
+Neozip is thread-safe under the following conditions:
+
+1. Each `z_stream` instance is accessed from only one thread at a time.
+2. The `zalloc` and `zfree` callbacks are thread-safe (the defaults use
+ `malloc` / `free`, which are thread-safe on all supported platforms).
+
+The function dispatch table (`functable`) uses atomic stores during
+initialisation:
+
+```c
+#define FUNCTABLE_ASSIGN(VAR, FUNC_NAME) \
+ __atomic_store(&(functable.FUNC_NAME), &(VAR.FUNC_NAME), __ATOMIC_SEQ_CST)
+#define FUNCTABLE_BARRIER() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+```
+
+This ensures that even if multiple threads call `deflateInit` / `inflateInit`
+concurrently, the function table is initialised safely.
+
+---
+
+## Version Identification
+
+The library provides several ways to query version information:
+
+```c
+const char *zlibVersion(void); // Returns "1.3.1.zlib-ng" in compat mode
+const char *zlibng_version(void); // Returns "2.3.90"
+
+// Compile-time constants
+#define ZLIBNG_VERSION "2.3.90"
+#define ZLIBNG_VERNUM 0x02039000L
+#define ZLIB_VERSION "1.3.1.zlib-ng"
+#define ZLIB_VERNUM 0x131f
+```
+
+In compat mode, `deflateInit` and `inflateInit` verify that the header version
+matches the library version to prevent ABI mismatches:
+
+```c
+#define CHECK_VER_STSIZE(version, stream_size) \
+ (version == NULL || version[0] != ZLIB_VERSION[0] || \
+ stream_size != (int32_t)sizeof(PREFIX3(stream)))
+```
+
+---
+
+## Licensing
+
+Neozip inherits the zlib/libpng license from both zlib and zlib-ng:
+
+> This software is provided 'as-is', without any express or implied warranty.
+> Permission is granted to anyone to use this software for any purpose,
+> including commercial applications, and to alter it and redistribute it freely,
+> subject to the following restrictions: [...]
+
+See `LICENSE.md` in the neozip source tree for the full text.
+
+---
+
+## Key Differences from Upstream zlib
+
+| Area | zlib 1.3.1 | Neozip (zlib-ng) |
+|---|---|---|
+| Bit buffer width | 32-bit `unsigned long` | 64-bit `uint64_t` |
+| Hash table size | 32768 entries (15 bits) | 65536 entries (16 bits) |
+| Match buffer format | Overlaid `sym_buf` only | `LIT_MEM` option for separate `d_buf`/`l_buf` |
+| Hash function | Three-byte rolling | Four-byte CRC-based or multiplicative |
+| SIMD acceleration | None | Extensive (see Performance Optimisations) |
+| CPU detection | None (compile-time only) | Runtime `cpuid` / feature detection |
+| Memory allocation | Multiple `zalloc` calls | Single allocation, cache-aligned |
+| Minimum match length | 3 (`STD_MIN_MATCH`) | Internally uses `WANT_MIN_MATCH = 4` for speed |
+| Quick strategy | None | `deflate_quick` for level 1 |
+| Medium strategy | None | `deflate_medium` for levels 3–6 |
+| Data structure alignment | None | `ALIGNED_(64)` on key structs |
+| Build system | Makefile / CMake | CMake primary with full feature detection |
+
+---
+
+## Quick Start
+
+### Building
+
+```bash
+cd neozip
+mkdir build && cd build
+cmake .. -DCMAKE_BUILD_TYPE=Release
+cmake --build . -j$(nproc)
+```
+
+### Using in a CMake Project
+
+```cmake
+find_package(zlib-ng CONFIG REQUIRED)
+target_link_libraries(myapp PRIVATE zlib-ng::zlib-ng)
+```
+
+Or in zlib-compat mode:
+
+```cmake
+find_package(ZLIB CONFIG REQUIRED)
+target_link_libraries(myapp PRIVATE ZLIB::ZLIB)
+```
+
+### Minimal Compression Example
+
+```c
+#include <zlib-ng.h>
+#include <string.h>
+#include <stdio.h>
+
+int main(void) {
+ const char *source = "Hello, Neozip! This is a test of compression.";
+ size_t source_len = strlen(source);
+
+ size_t dest_len = zng_compressBound(source_len);
+ unsigned char *dest = malloc(dest_len);
+
+ int ret = zng_compress(dest, &dest_len, (const unsigned char *)source, source_len);
+ if (ret == Z_OK) {
+ printf("Compressed %zu bytes to %zu bytes\n", source_len, dest_len);
+ }
+
+ unsigned char *recovered = malloc(source_len + 1);
+ size_t recovered_len = source_len;
+ zng_uncompress(recovered, &recovered_len, dest, dest_len);
+ recovered[recovered_len] = '\0';
+ printf("Recovered: %s\n", recovered);
+
+ free(dest);
+ free(recovered);
+ return 0;
+}
+```
+
+---
+
+## Further Reading
+
+- [Architecture](architecture.md) — Module-by-module breakdown of the source
+- [Building](building.md) — Complete CMake option reference
+- [Deflate Algorithms](deflate-algorithms.md) — LZ77 match finding and strategies
+- [Inflate Engine](inflate-engine.md) — Decompression state machine
+- [Huffman Coding](huffman-coding.md) — Tree construction and bit emission
+- [Checksum Algorithms](checksum-algorithms.md) — CRC-32 and Adler-32 details
+- [Hardware Acceleration](hardware-acceleration.md) — CPU detection and dispatch
+- [x86 Optimizations](x86-optimizations.md) — SSE/AVX/PCLMULQDQ implementations
+- [ARM Optimizations](arm-optimizations.md) — NEON and CRC32 extension
+- [Gzip Support](gzip-support.md) — gzip file I/O layer
+- [API Reference](api-reference.md) — Full public API documentation
+- [Performance Tuning](performance-tuning.md) — Benchmarking and tuning guide
+- [Testing](testing.md) — Test suite reference
+- [Code Style](code-style.md) — Coding conventions
diff --git a/docs/handbook/neozip/performance-tuning.md b/docs/handbook/neozip/performance-tuning.md
new file mode 100644
index 0000000000..f45706e964
--- /dev/null
+++ b/docs/handbook/neozip/performance-tuning.md
@@ -0,0 +1,361 @@
+# Performance Tuning
+
+## Overview
+
+Neozip offers multiple controls for trading compression ratio against
+speed: compression level, strategy, window size, memory level, hardware
+acceleration, and buffer sizing. This guide describes how each knob
+affects performance and when to use them.
+
+---
+
+## Compression Level
+
+The `level` parameter (0–9) selects the deflate strategy function and its
+internal tuning parameters via the `configuration_table`:
+
+```c
+static const config configuration_table[10] = {
+/* good_length lazy nice max_chain func */
+/* 0 */ {0, 0, 0, 0, deflate_stored}, // No compression
+/* 1 */ {0, 0, 0, 0, deflate_quick}, // Fastest (Intel)
+/* 2 */ {4, 4, 8, 4, deflate_fast}, // Fast greedy
+/* 3 */ {4, 6, 32, 32, deflate_fast},
+/* 4 */ {4, 4, 16, 16, deflate_medium}, // Balanced (Intel)
+/* 5 */ {8, 16, 32, 32, deflate_medium},
+/* 6 */ {8, 16,128, 128, deflate_medium}, // Default
+/* 7 */ {8, 32,128, 256, deflate_slow}, // Slow lazy
+/* 8 */ {32, 128,258, 1024, deflate_slow},
+/* 9 */ {32, 258,258, 4096, deflate_slow}, // Maximum
+};
+```
+
+| Parameter | Effect |
+|---|---|
+| `good_length` | Reduce match search when match ≥ this length |
+| `max_lazy` | Don't try lazy match if current match ≥ this |
+| `nice_length` | Stop searching once match ≥ this length |
+| `max_chain` | Maximum hash chain steps to search |
+
+### Level Selection Guide
+
+| Use Case | Recommended Level | Rationale |
+|---|---|---|
+| Real-time streaming | 1 | `deflate_quick`: static Huffman, minimal search |
+| Network compression | 2–3 | `deflate_fast`: greedy match, short chains |
+| General purpose | 6 (default) | `deflate_medium`: good ratio/speed balance |
+| Archival storage | 9 | `deflate_slow`: full lazy evaluation, deep chains |
+| Pre-compressed data | 0 | `deflate_stored`: passthrough with framing |
+
+### Speed vs. Ratio Tradeoffs
+
+Approximate throughput (x86_64 with AVX2, single core):
+
+| Level | Compression Speed | Ratio (typical) |
+|---|---|---|
+| 0 | ~5 GB/s | 1.00 (none) |
+| 1 | ~800 MB/s | 2.0–2.5:1 |
+| 3 | ~400 MB/s | 2.2–2.8:1 |
+| 6 | ~150 MB/s | 2.5–3.2:1 |
+| 9 | ~30 MB/s | 2.6–3.4:1 |
+
+Decompression speed is largely independent of the compression level
+(~1–2 GB/s), since it only depends on the encoded stream, not the search
+strategy.
+
+---
+
+## Strategy Selection
+
+### `Z_DEFAULT_STRATEGY` (0)
+
+Standard DEFLATE with adaptive Huffman coding and LZ77 matching.
+Best for most data types.
+
+### `Z_FILTERED` (1)
+
+Optimised for data produced by filters (e.g., delta encoding, integer
+sequences). Uses shorter hash chains and favours Huffman coding efficiency.
+
+### `Z_HUFFMAN_ONLY` (2)
+
+Disables LZ77 matching entirely. Every byte is encoded as a literal.
+Fast but poor compression ratio for most data. Useful when the data has
+already been transformed (e.g., BWT output).
+
+```c
+// deflate_huff.c: Only emits literals
+block_state deflate_huff(deflate_state *s, int flush) {
+ for (;;) {
+ // No match search — emit one literal per byte
+ zng_tr_tally_lit(s, s->window[s->strstart]);
+ s->strstart++;
+ s->lookahead--;
+ if (s->sym_next == s->sym_end) {
+ FLUSH_BLOCK(s, 0);
+ }
+ }
+}
+```
+
+### `Z_RLE` (3)
+
+Run-length encoding: only matches at distance 1. Very fast for data with
+repeated byte patterns:
+
+```c
+// deflate_rle.c
+block_state deflate_rle(deflate_state *s, int flush) {
+ // Only search for matches at distance == 1
+ // Uses compare256_rle for fast run detection
+ match_len = FUNCTABLE_CALL(compare256)(scan, scan - 1);
+}
+```
+
+### `Z_FIXED` (4)
+
+Forces use of static (fixed) Huffman tables for every block. Eliminates
+the overhead of dynamic tree transmission. Slightly faster for small
+blocks where the tree overhead dominates.
+
+### Strategy Selection Guide
+
+| Data Type | Strategy |
+|---|---|
+| General text/binary | `Z_DEFAULT_STRATEGY` |
+| Numeric arrays, deltas | `Z_FILTERED` |
+| Pre-transformed data | `Z_HUFFMAN_ONLY` |
+| Runs of repeated bytes | `Z_RLE` |
+| Very small blocks | `Z_FIXED` |
+| Random/encrypted data | Level 0 (skip entirely) |
+
+---
+
+## Window Size (`windowBits`)
+
+Controls the LZ77 sliding window (8–15, default 15):
+
+| windowBits | Window Size | Memory (deflate) |
+|---|---|---|
+| 9 | 512 B | ~4 KB |
+| 10 | 1 KB | ~8 KB |
+| 11 | 2 KB | ~16 KB |
+| 12 | 4 KB | ~32 KB |
+| 13 | 8 KB | ~64 KB |
+| 14 | 16 KB | ~128 KB |
+| 15 | 32 KB | ~256 KB |
+
+Smaller windows use less memory but find fewer long-distance matches,
+reducing compression ratio. For streaming protocols with tight memory
+budgets, windowBits=10–12 is a reasonable compromise.
+
+---
+
+## Memory Level (`memLevel`)
+
+Controls the internal hash table and buffer sizes (1–9, default 8):
+
+```c
+#define DEF_MEM_LEVEL 8
+
+// In deflateInit2:
+s->hash_size = 1 << (memLevel + 7); // hash_bits = memLevel + 7
+s->lit_bufsize = 1 << (memLevel + 6);
+```
+
+| memLevel | Hash Table Entries | Literal Buffer | Total Memory |
+|---|---|---|---|
+| 1 | 256 | 128 | ~1 KB |
+| 4 | 2048 | 1024 | ~16 KB |
+| 8 (default) | 32768 | 16384 | ~256 KB |
+| 9 | 65536 | 32768 | ~512 KB |
+
+Higher memLevel improves hash distribution (fewer collisions) and allows
+more symbols to accumulate before flushing, improving Huffman coding
+efficiency.
+
+---
+
+## Hardware Acceleration
+
+### Enabling SIMD
+
+**Runtime detection** (default, recommended for distributed binaries):
+```bash
+cmake .. -DWITH_RUNTIME_CPU_DETECTION=ON
+```
+
+**Native compilation** (fastest, for local/dedicated use):
+```bash
+cmake .. -DWITH_NATIVE_INSTRUCTIONS=ON
+```
+
+This passes `-march=native` to the compiler, enabling all instructions
+supported by the build machine.
+
+### Selective Feature Control
+
+Disable specific SIMD features:
+```bash
+cmake .. -DWITH_AVX512=OFF # Avoid AVX-512 (thermal throttling concern)
+cmake .. -DWITH_VPCLMULQDQ=OFF # Disable VPCLMULQDQ CRC
+cmake .. -DWITH_NEON=OFF # Disable NEON on ARM
+```
+
+### SIMD Impact by Operation
+
+| Operation | Scalar | Best SIMD | Speedup |
+|---|---|---|---|
+| Adler-32 | ~1 B/cycle | ~32 B/cycle (AVX-512+VNNI) | 32× |
+| CRC-32 | ~4 B/cycle | ~64 B/cycle (VPCLMULQDQ) | 16× |
+| Compare256 | ~1 B/cycle | ~16 B/cycle (AVX2) | 16× |
+| Slide Hash | ~1 entry/cycle | ~32 entries/cycle (AVX-512) | 32× |
+| Inflate Copy | ~1 B/cycle | ~32 B/cycle (AVX2 chunkmemset) | 32× |
+
+---
+
+## Buffer Sizing
+
+### Compression Buffers
+
+For streaming compression, the output buffer should be at least as large
+as `deflateBound(sourceLen)` for the expected input chunk size:
+
+```c
+size_t out_size = deflateBound(&strm, chunk_size);
+```
+
+Larger buffers reduce system call overhead and improve throughput.
+
+### Gzip Buffer
+
+```c
+gzbuffer(gz, size); // Set before first read/write
+```
+
+Default `GZBUFSIZE` is 131072 (128 KB). For sequential I/O, larger
+buffers (256 KB–1 MB) improve throughput by amortising I/O overhead.
+
+### Inflate Buffers
+
+The inflate engine benefits from output buffers ≥ 32 KB (the maximum
+window size). Buffers ≥ 64 KB keep the fast path active longer (the
+fast path requires ≥ 258 bytes of output space and ≥ 6 bytes of input).
+
+---
+
+## `deflateTune()`
+
+Fine-tune the `configuration_table` parameters at runtime without
+changing the level:
+
+```c
+int deflateTune(z_stream *strm, int good_length, int max_lazy,
+ int nice_length, int max_chain);
+```
+
+Example — high-speed level 6:
+```c
+deflateInit(&strm, 6);
+deflateTune(&strm, 4, 8, 32, 64); // Shorter chains than default
+```
+
+Example — deeper search at level 4:
+```c
+deflateInit(&strm, 4);
+deflateTune(&strm, 16, 64, 128, 512); // Deeper search
+```
+
+---
+
+## Profiling Tips
+
+### 1. Identify the Bottleneck
+
+Use `perf` or equivalent to identify whether compression is CPU-bound
+(expect: hash lookup, match search) or I/O-bound (expect: read/write
+syscalls):
+
+```bash
+perf record -g ./minigzip < large_file > /dev/null
+perf report
+```
+
+Look for hot functions:
+- `longest_match_*` — String matching (CPU-bound)
+- `adler32_*` / `crc32_*` — Checksumming (CPU-bound)
+- `slide_hash_*` — Window maintenance (CPU-bound)
+- `__write` / `__read` — I/O (I/O-bound)
+
+### 2. Verify SIMD Usage
+
+Check which implementations are selected:
+
+```bash
+# Check for SIMD symbols in the binary
+nm -D libz-ng.so | grep -E 'avx2|neon|sse|pclmul'
+```
+
+Or set a breakpoint in `init_functable()` during debugging.
+
+### 3. Benchmark Specific Functions
+
+Use the built-in benchmarks:
+```bash
+cmake .. -DWITH_BENCHMARKS=ON
+cmake --build .
+./benchmark_adler32 --benchmark_repetitions=5
+./benchmark_compress --benchmark_filter="BM_Compress/6"
+```
+
+---
+
+## Common Tuning Scenarios
+
+### High-Throughput Compression (Level 1)
+
+```c
+deflateInit2(&strm, 1, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY);
+```
+
+Level 1 uses `deflate_quick`: no hash chain walking, static Huffman tables,
+minimal overhead. Best for cases where compression speed matters more than
+ratio (real-time logging, network IPC).
+
+### Maximum Compression (Level 9)
+
+```c
+deflateInit2(&strm, 9, Z_DEFLATED, 15, 9, Z_DEFAULT_STRATEGY);
+```
+
+Level 9 + memLevel 9 provides the deepest search (`max_chain=4096`) and
+largest hash table. Use for archival where decompression speed matters
+but compression can be slow.
+
+### Memory-Constrained Environment
+
+```c
+deflateInit2(&strm, 6, Z_DEFLATED, 10, 4, Z_DEFAULT_STRATEGY);
+```
+
+windowBits=10 (1KB window) + memLevel=4 gives ~16KB total memory.
+Suitable for embedded systems.
+
+### Multiple Streams in Parallel
+
+Each `z_stream` is independent. For multi-threaded compression, create
+one stream per thread:
+
+```c
+// Thread-safe: each thread has its own z_stream
+#pragma omp parallel for
+for (int i = 0; i < num_chunks; i++) {
+ z_stream strm = {};
+ deflateInit(&strm, 6);
+ // compress chunk[i]
+ deflateEnd(&strm);
+}
+```
+
+The `functable` initialisation is thread-safe (atomic init flag), so
+the first call from any thread will safely initialise SIMD dispatch.
diff --git a/docs/handbook/neozip/testing.md b/docs/handbook/neozip/testing.md
new file mode 100644
index 0000000000..1c3b067ebb
--- /dev/null
+++ b/docs/handbook/neozip/testing.md
@@ -0,0 +1,317 @@
+# Testing
+
+## Overview
+
+Neozip has a comprehensive test suite covering correctness, fuzz testing,
+performance benchmarking, and regression testing for known CVEs. Testing
+is built with CMake (`BUILD_TESTING=ON`, default) and uses Google Test
+for structured test cases.
+
+---
+
+## Build Configuration
+
+```cmake
+option(BUILD_TESTING "Build test binaries" ON)
+option(WITH_GTEST "Build with GTest" ON)
+option(WITH_FUZZERS "Build fuzz targets" OFF)
+option(WITH_BENCHMARKS "Build benchmarks" OFF)
+option(WITH_SANITIZER "Build with sanitizer" OFF)
+```
+
+### Building Tests
+
+```bash
+mkdir build && cd build
+cmake .. -DBUILD_TESTING=ON -DWITH_GTEST=ON
+cmake --build .
+ctest --output-on-failure
+```
+
+### With Sanitizers
+
+```bash
+cmake .. -DWITH_SANITIZER=address # ASan
+cmake .. -DWITH_SANITIZER=memory # MSan
+cmake .. -DWITH_SANITIZER=undefined # UBSan
+cmake .. -DWITH_SANITIZER=thread # TSan
+```
+
+### With Code Coverage
+
+```bash
+cmake .. -DWITH_CODE_COVERAGE=ON
+cmake --build .
+ctest
+# Generate coverage report
+```
+
+---
+
+## Test Structure
+
+Tests reside in the `test/` directory:
+
+### Google Test Files
+
+| File | What It Tests |
+|---|---|
+| `test_adler32.cc` | Adler-32 correctness and edge cases |
+| `test_compare256.cc` | String comparison implementations |
+| `test_compress.cc` | compress/uncompress one-shot API |
+| `test_compress_bound.cc` | compressBound accuracy |
+| `test_crc32.cc` | CRC-32 correctness |
+| `test_cve.cc` | CVE regression tests |
+| `test_deflate_bound.cc` | deflateBound accuracy |
+| `test_deflate_copy.cc` | deflateCopy correctness |
+| `test_deflate_dict.cc` | Dictionary-based compression |
+| `test_deflate_hash_head_0.cc` | Hash table edge case |
+| `test_deflate_header.cc` | Gzip header handling |
+| `test_deflate_params.cc` | Dynamic level/strategy changes |
+| `test_deflate_pending.cc` | deflatePending correctness |
+| `test_deflate_prime.cc` | deflatePrime bit injection |
+| `test_deflate_quick_bi_valid.cc` | Quick deflate bi_valid edge case |
+| `test_deflate_tune.cc` | deflateTune parameter modification |
+| `test_dict.cc` | Dictionary compression/decompression |
+| `test_inflate_adler32.cc` | Inflate Adler-32 validation |
+| `test_inflate_sync.cc` | inflateSync recovery |
+| `test_infcover.cc` | Inflate code coverage |
+| `test_large_buffers.cc` | Large buffer handling |
+| `test_main.cc` | Test runner entry point |
+| `test_version.cc` | Version string checks |
+
+### Standalone Test Utilities
+
+| File | Purpose |
+|---|---|
+| `minigzip.c` | Minimal gzip compressor/decompressor |
+| `minideflate.c` | Minimal deflate stream tool |
+| `infcover.c` | Inflate code coverage driver |
+| `switchlevels.c` | Test dynamic level switching |
+
+### Test Data
+
+The `test/data/` directory contains test vectors:
+
+- Compressed files at various levels
+- Known-good decompression outputs
+- Edge-case inputs (empty, single-byte, very large)
+
+---
+
+## Google Test Details
+
+### Test Fixture Pattern
+
+Tests use parameterised fixtures for systematic coverage:
+
+```cpp
+class CompressTest : public testing::TestWithParam<std::tuple<int, int>> {
+ // param<0> = compression level (0-9)
+ // param<1> = strategy
+};
+
+TEST_P(CompressTest, RoundTrip) {
+ auto [level, strategy] = GetParam();
+ z_stream strm = {};
+ deflateInit2(&strm, level, Z_DEFLATED, 15, 8, strategy);
+ // Compress → decompress → verify
+}
+
+INSTANTIATE_TEST_SUITE_P(AllLevels, CompressTest,
+ testing::Combine(
+ testing::Range(0, 10),
+ testing::Values(Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY,
+ Z_RLE, Z_FIXED)));
+```
+
+### Adler-32 Tests
+
+```cpp
+TEST(Adler32, KnownVectors) {
+ // Test against known Adler-32 values
+ uint32_t adler = adler32(0L, Z_NULL, 0);
+ EXPECT_EQ(adler, 1U);
+
+ adler = adler32(adler, (const uint8_t *)"Hello", 5);
+ // Verify against expected value
+}
+
+TEST(Adler32, Combine) {
+ // Verify adler32_combine produces correct results
+ uint32_t a1 = adler32(0L, buf1, len1);
+ uint32_t a2 = adler32(0L, buf2, len2);
+ uint32_t combined = adler32_combine(a1, a2, len2);
+ uint32_t full = adler32(0L, full_buf, len1 + len2);
+ EXPECT_EQ(combined, full);
+}
+```
+
+### CRC-32 Tests
+
+```cpp
+TEST(CRC32, KnownVectors) {
+ uint32_t crc = crc32(0L, Z_NULL, 0);
+ EXPECT_EQ(crc, 0U);
+
+ crc = crc32(crc, (const uint8_t *)"123456789", 9);
+ EXPECT_EQ(crc, 0xCBF43926U); // Standard test vector
+}
+```
+
+### CVE Regression Tests
+
+`test_cve.cc` ensures previously discovered vulnerabilities remain fixed:
+
+```cpp
+TEST(CVE, TestHeapOverflow) {
+ // Reproduce specific malformed input that triggered a vulnerability
+ // Verify inflate returns Z_DATA_ERROR instead of crashing
+ z_stream strm = {};
+ inflateInit2(&strm, -15);
+ strm.next_in = malformed_data;
+ strm.avail_in = sizeof(malformed_data);
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+ int ret = inflate(&strm, Z_NO_FLUSH);
+ EXPECT_EQ(ret, Z_DATA_ERROR);
+ inflateEnd(&strm);
+}
+```
+
+---
+
+## Fuzz Testing
+
+Fuzz targets are enabled with `-DWITH_FUZZERS=ON` and require a
+fuzzing-capable compiler (Clang with libFuzzer or AFL):
+
+### Fuzz Targets
+
+| File | Target |
+|---|---|
+| `test/fuzz/fuzzer_compress.c` | compress/uncompress round-trip |
+| `test/fuzz/fuzzer_deflate.c` | Deflate streaming API |
+| `test/fuzz/fuzzer_inflate.c` | Inflate with arbitrary input |
+| `test/fuzz/fuzzer_checksum.c` | Adler-32 and CRC-32 |
+| `test/fuzz/fuzzer_gzip.c` | Gzip file I/O |
+
+### Running Fuzzers
+
+```bash
+cmake .. -DWITH_FUZZERS=ON -DCMAKE_C_COMPILER=clang \
+ -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link,address"
+cmake --build .
+
+# Run a fuzzer
+./fuzzer_inflate corpus/ -max_total_time=3600
+```
+
+Fuzz testing with AddressSanitizer catches:
+- Buffer overflows/underflows
+- Use-after-free
+- Double-free
+- Stack buffer overflows
+
+---
+
+## Benchmarks
+
+Enabled with `-DWITH_BENCHMARKS=ON`:
+
+### Benchmark Targets
+
+| File | What It Benchmarks |
+|---|---|
+| `test/benchmarks/benchmark_adler32.cc` | Adler-32 throughput |
+| `test/benchmarks/benchmark_compare256.cc` | String comparison throughput |
+| `test/benchmarks/benchmark_crc32.cc` | CRC-32 throughput |
+| `test/benchmarks/benchmark_compress.cc` | Compression throughput per level |
+| `test/benchmarks/benchmark_inflate.cc` | Decompression throughput |
+| `test/benchmarks/benchmark_slidehash.cc` | Hash table slide throughput |
+
+Uses Google Benchmark framework:
+
+```cpp
+static void BM_Adler32(benchmark::State& state) {
+ std::vector<uint8_t> data(state.range(0));
+ for (auto _ : state) {
+ adler32(1, data.data(), data.size());
+ }
+ state.SetBytesProcessed(state.iterations() * data.size());
+}
+BENCHMARK(BM_Adler32)->Range(64, 1 << 20);
+```
+
+### Running Benchmarks
+
+```bash
+cmake .. -DWITH_BENCHMARKS=ON
+cmake --build .
+./benchmark_adler32
+./benchmark_crc32
+./benchmark_compress --benchmark_filter=".*level6.*"
+```
+
+---
+
+## `minigzip` — Integration Test Tool
+
+`test/minigzip.c` is a minimal gzip-compatible utility for manual testing:
+
+```bash
+# Compress
+./minigzip < input.txt > input.txt.gz
+
+# Decompress
+./minigzip -d < input.txt.gz > output.txt
+
+# Verify
+diff input.txt output.txt
+```
+
+Options:
+- `-d` — Decompress mode
+- `-1` to `-9` — Compression level
+- `-f` — Z_FILTERED strategy
+- `-h` — Z_HUFFMAN_ONLY strategy
+- `-R` — Z_RLE strategy
+- `-F` — Z_FIXED strategy
+
+---
+
+## `minideflate` — Raw Deflate Test Tool
+
+`test/minideflate.c` tests raw deflate streams (no wrapper):
+
+```bash
+./minideflate -c -k < input > compressed
+./minideflate -d -k < compressed > output
+```
+
+---
+
+## Running the Full Test Suite
+
+```bash
+cd build
+ctest --output-on-failure -j$(nproc)
+```
+
+Individual tests can be run:
+```bash
+ctest -R test_adler32
+ctest -R test_crc32
+ctest -R test_compress
+ctest -R test_cve
+```
+
+### CI Integration
+
+The project uses CI for:
+- Multiple compiler versions (GCC, Clang, MSVC)
+- Multiple architectures (x86_64, AArch64, Power, s390x)
+- Multiple configurations (compat mode, native mode, sanitizers)
+- Multiple operating systems (Linux, macOS, Windows)
+
+Test results are reported via CTest and Google Test XML output.
diff --git a/docs/handbook/neozip/x86-optimizations.md b/docs/handbook/neozip/x86-optimizations.md
new file mode 100644
index 0000000000..21b1a711d9
--- /dev/null
+++ b/docs/handbook/neozip/x86-optimizations.md
@@ -0,0 +1,439 @@
+# x86 Optimizations
+
+## Overview
+
+Neozip provides extensive x86 SIMD optimizations spanning SSE2, SSSE3,
+SSE4.1, SSE4.2, PCLMULQDQ, AVX2, AVX-512, AVX-512+VNNI, and VPCLMULQDQ.
+All implementations live in `arch/x86/` and are selected at runtime by
+`functable.c` based on CPUID detection.
+
+---
+
+## Source Files
+
+| File | ISA | Function |
+|---|---|---|
+| `x86_features.c/h` | — | CPUID feature detection |
+| `adler32_avx2.c` | AVX2 | Adler-32 checksum |
+| `adler32_avx512.c` | AVX-512 | Adler-32 checksum |
+| `adler32_avx512_vnni.c` | AVX-512+VNNI | Adler-32 checksum |
+| `adler32_sse42.c` | SSE4.2 | Adler-32 checksum |
+| `adler32_ssse3.c` | SSSE3 | Adler-32 checksum |
+| `crc32_pclmulqdq.c` | PCLMULQDQ | CRC-32 (carry-less multiply) |
+| `crc32_vpclmulqdq.c` | VPCLMULQDQ | CRC-32 (AVX-512 CLMUL) |
+| `compare256_avx2.c` | AVX2 | 256-byte comparison |
+| `compare256_sse2.c` | SSE2 | 256-byte comparison |
+| `compare256_sse42.c` | SSE4.2 | 256-byte comparison |
+| `chunkset_avx2.c` | AVX2 | Pattern fill for inflate |
+| `chunkset_sse2.c` | SSE2 | Pattern fill for inflate |
+| `slide_hash_avx2.c` | AVX2 | Hash table slide |
+| `slide_hash_avx512.c` | AVX-512 | Hash table slide |
+| `slide_hash_sse2.c` | SSE2 | Hash table slide |
+| `insert_string_sse42.c` | SSE4.2 | CRC-based hash insertion |
+| `inffast_avx2.c` | AVX2 | Fast inflate inner loop |
+| `inffast_sse2.c` | SSE2 | Fast inflate inner loop |
+
+---
+
+## Feature Detection
+
+### CPUID Queries
+
+`x86_features.c` queries CPUID leaves 1 and 7:
+
+```c
+void Z_INTERNAL x86_check_features(struct cpu_features *features) {
+ unsigned eax, ebx, ecx, edx;
+
+ // Leaf 1 — basic features
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ features->x86.has_sse2 = !!(edx & (1 << 26));
+ features->x86.has_ssse3 = !!(ecx & (1 << 9));
+ features->x86.has_sse41 = !!(ecx & (1 << 19));
+ features->x86.has_sse42 = !!(ecx & (1 << 20));
+ features->x86.has_pclmulqdq = !!(ecx & (1 << 1));
+
+ // Check OS YMM/ZMM support via XSAVE/XGETBV
+ if (ecx & (1 << 27)) {
+ uint64_t xcr0 = xgetbv(0);
+ features->x86.has_os_save_ymm = ((xcr0 & 0x06) == 0x06);
+ features->x86.has_os_save_zmm = ((xcr0 & 0xe6) == 0xe6);
+ }
+
+ // Leaf 7, sub-leaf 0 — extended features
+ cpuidp(7, 0, &eax, &ebx, &ecx, &edx);
+ if (features->x86.has_os_save_ymm)
+ features->x86.has_avx2 = !!(ebx & (1 << 5));
+ if (features->x86.has_os_save_zmm) {
+ features->x86.has_avx512f = !!(ebx & (1 << 16));
+ features->x86.has_avx512dq = !!(ebx & (1 << 17));
+ features->x86.has_avx512bw = !!(ebx & (1 << 30));
+ features->x86.has_avx512vl = !!(ebx & (1 << 31));
+ features->x86.has_vpclmulqdq = !!(ecx & (1 << 10));
+ features->x86.has_avx512vnni = !!(ecx & (1 << 11));
+ }
+ features->x86.has_avx512_common =
+ features->x86.has_avx512f && features->x86.has_avx512dq &&
+ features->x86.has_avx512bw && features->x86.has_avx512vl;
+}
+```
+
+### `xgetbv()` — Reading Extended Control Register
+
+```c
+static inline uint64_t xgetbv(unsigned xcr) {
+ uint32_t eax, edx;
+ __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
+ return ((uint64_t)edx << 32) | eax;
+}
+```
+
+This verifies the OS has enabled the save/restore of wider register files.
+Without this check, using YMM/ZMM registers would cause a #UD fault.
+
+---
+
+## Adler-32 Implementations
+
+### SSSE3 (`adler32_ssse3.c`)
+
+Uses `_mm_maddubs_epi16` for weighted position sums on 16-byte vectors:
+
+```c
+Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len) {
+ __m128i vs1 = _mm_cvtsi32_si128(adler & 0xffff);
+ __m128i vs2 = _mm_cvtsi32_si128(adler >> 16);
+ const __m128i dot2v = _mm_setr_epi8(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
+
+ while (len >= 16) {
+ __m128i vbuf = _mm_loadu_si128((__m128i *)buf);
+ // sum1 += bytes
+ vs1 = _mm_add_epi32(vs1, _mm_sad_epu8(vbuf, _mm_setzero_si128()));
+ // sum2 += position_weight * bytes
+ __m128i vtmp = _mm_maddubs_epi16(vbuf, dot2v);
+ vs2 = _mm_add_epi32(vs2, _mm_madd_epi16(vtmp, _mm_set1_epi16(1)));
+ // Accumulate 16 * prev_s1 into s2
+ vs2 = _mm_add_epi32(vs2, _mm_slli_epi32(vs1_0, 4));
+ buf += 16;
+ len -= 16;
+ }
+ // Horizontal reduction and MOD BASE
+}
+```
+
+### AVX2 (`adler32_avx2.c`)
+
+Processes 32 bytes per iteration using 256-bit registers:
+
+```c
+Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len) {
+ static const uint8_t dot2v_data[] = {32,31,30,...,2,1};
+ __m256i vdot2v = _mm256_loadu_si256((__m256i*)dot2v_data);
+ __m256i vs1 = _mm256_set_epi32(0,0,0,0,0,0,0, adler & 0xffff);
+ __m256i vs2 = _mm256_set_epi32(0,0,0,0,0,0,0, adler >> 16);
+
+ while (len >= 32) {
+ __m256i vbuf = _mm256_loadu_si256((__m256i *)buf);
+ // s1 += sum of all bytes (using SAD against zero)
+ vs1 = _mm256_add_epi32(vs1,
+ _mm256_sad_epu8(vbuf, _mm256_setzero_si256()));
+ // s2 += weighted sum (dot product approach)
+ __m256i vtmp = _mm256_maddubs_epi16(vbuf, vdot2v);
+ vs2 = _mm256_add_epi32(vs2,
+ _mm256_madd_epi16(vtmp, _mm256_set1_epi16(1)));
+ // s2 += 32 * prev_s1
+ vs2 = _mm256_add_epi32(vs2, _mm256_slli_epi32(vs1_0, 5));
+ buf += 32;
+ len -= 32;
+ }
+}
+```
+
+The `_mm256_maddubs_epi16` instruction multiplies unsigned bytes by signed
+bytes and sums adjacent pairs, computing the weighted position sum in one
+instruction. `_mm256_sad_epu8` computes the horizontal sum of bytes.
+
+### AVX-512 (`adler32_avx512.c`)
+
+Processes 64 bytes per iteration using 512-bit `__m512i` registers:
+
+```c
+__m512i vs1 = _mm512_set_epi32(0,...,0, adler & 0xffff);
+__m512i vs2 = _mm512_set_epi32(0,...,0, adler >> 16);
+
+while (len >= 64) {
+ __m512i vbuf = _mm512_loadu_si512(buf);
+ vs1 = _mm512_add_epi32(vs1, _mm512_sad_epu8(vbuf, _mm512_setzero_si512()));
+ __m512i vtmp = _mm512_maddubs_epi16(vbuf, vdot2v);
+ vs2 = _mm512_add_epi32(vs2, _mm512_madd_epi16(vtmp, vones));
+ vs2 = _mm512_add_epi32(vs2, _mm512_slli_epi32(vs1_0, 6));
+ buf += 64;
+ len -= 64;
+}
+```
+
+### AVX-512+VNNI (`adler32_avx512_vnni.c`)
+
+Uses `_mm512_dpbusd_epi32` (dot product of unsigned bytes and signed bytes),
+available with the VNNI extension:
+
+```c
+// VPDPBUSD replaces maddubs + madd sequence with a single instruction
+vs2 = _mm512_dpbusd_epi32(vs2, vbuf, vdot2v);
+```
+
+---
+
+## CRC-32 Implementations
+
+### PCLMULQDQ (`crc32_pclmulqdq.c`)
+
+Uses carry-less multiplication for CRC folding. Processes 64 bytes per
+iteration with four XMM accumulators:
+
+```c
+Z_INTERNAL uint32_t crc32_pclmulqdq(uint32_t crc, const uint8_t *buf, size_t len) {
+ __m128i xmm_crc0, xmm_crc1, xmm_crc2, xmm_crc3;
+ __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4,
+ 0x00000001, 0xc6e41596);
+
+ // Init: XOR CRC into first 16 bytes of data
+ xmm_crc0 = _mm_xor_si128(_mm_loadu_si128(buf), _mm_cvtsi32_si128(crc));
+ xmm_crc1 = _mm_loadu_si128(buf + 16);
+ xmm_crc2 = _mm_loadu_si128(buf + 32);
+ xmm_crc3 = _mm_loadu_si128(buf + 48);
+
+ // Main loop: fold 64 bytes per iteration
+ while (len >= 64) {
+ // For each accumulator:
+ // crc_n = clmul(crc_n, fold4, 0x01) ^ clmul(crc_n, fold4, 0x10) ^ next_data
+ __m128i xmm_t0 = _mm_clmulepi64_si128(xmm_crc0, xmm_fold4, 0x01);
+ __m128i xmm_t1 = _mm_clmulepi64_si128(xmm_crc0, xmm_fold4, 0x10);
+ xmm_crc0 = _mm_xor_si128(_mm_xor_si128(xmm_t0, xmm_t1),
+ _mm_loadu_si128(next++));
+ // repeat for crc1..crc3
+ }
+
+ // Fold 4→1, then Barrett reduction to 32-bit CRC
+ // ...
+}
+```
+
+### VPCLMULQDQ (`crc32_vpclmulqdq.c`)
+
+Uses AVX-512 carry-less multiply to process 256 bytes per iteration
+with four ZMM (512-bit) accumulators:
+
+```c
+__m512i zmm_crc0 = _mm512_loadu_si512(buf);
+zmm_crc0 = _mm512_xor_si512(zmm_crc0, _mm512_castsi128_si512(_mm_cvtsi32_si128(crc)));
+// ... 3 more accumulators
+
+while (len >= 256) {
+ __m512i zmm_t0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x01);
+ __m512i zmm_t1 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x10);
+ zmm_crc0 = _mm512_ternarylogic_epi64(zmm_t0, zmm_t1,
+ _mm512_loadu_si512(next++), 0x96);
+ // XOR three values in one instruction via ternarylogic
+}
+```
+
+`_mm512_ternarylogic_epi64(..., 0x96)` computes `A ^ B ^ C` in a single
+instruction, fusing two XOR operations.
+
+---
+
+## String Comparison (`compare256`)
+
+### SSE2 (`compare256_sse2.c`)
+
+```c
+Z_INTERNAL uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1) {
+ uint32_t len = 0;
+ do {
+ __m128i v0 = _mm_loadu_si128((__m128i *)(src0 + len));
+ __m128i v1 = _mm_loadu_si128((__m128i *)(src1 + len));
+ __m128i cmp = _mm_cmpeq_epi8(v0, v1);
+ unsigned mask = (unsigned)_mm_movemask_epi8(cmp);
+ if (mask != 0xffff) {
+ // Find first mismatch
+ return len + __builtin_ctz(~mask);
+ }
+ len += 16;
+ } while (len < 256);
+ return 256;
+}
+```
+
+### AVX2 (`compare256_avx2.c`)
+
+Same approach with 32-byte vectors:
+
+```c
+Z_INTERNAL uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1) {
+ uint32_t len = 0;
+ do {
+ __m256i v0 = _mm256_loadu_si256((__m256i *)(src0 + len));
+ __m256i v1 = _mm256_loadu_si256((__m256i *)(src1 + len));
+ __m256i cmp = _mm256_cmpeq_epi8(v0, v1);
+ unsigned mask = (unsigned)_mm256_movemask_epi8(cmp);
+ if (mask != 0xffffffff) {
+ return len + __builtin_ctz(~mask);
+ }
+ len += 32;
+ } while (len < 256);
+ return 256;
+}
+```
+
+### SSE4.2 (`compare256_sse42.c`)
+
+Uses `_mm_cmpistri` (string compare instruction):
+
+```c
+Z_INTERNAL uint32_t compare256_sse42(const uint8_t *src0, const uint8_t *src1) {
+ // _mm_cmpistri with EQUAL_EACH | NEGATIVE_POLARITY finds first mismatch
+ // in a 16-byte comparison
+}
+```
+
+---
+
+## Slide Hash
+
+### SSE2 (`slide_hash_sse2.c`)
+
+```c
+Z_INTERNAL void slide_hash_sse2(deflate_state *s) {
+ Pos *p;
+ unsigned n;
+ __m128i xmm_wsize = _mm_set1_epi16((uint16_t)s->w_size);
+
+ n = HASH_SIZE;
+ p = &s->head[n];
+ do {
+ p -= 8;
+ __m128i value = _mm_loadu_si128((__m128i *)p);
+ _mm_storeu_si128((__m128i *)p,
+ _mm_subs_epu16(value, xmm_wsize)); // Saturating subtract
+ n -= 8;
+ } while (n);
+ // Same for s->prev
+}
+```
+
+### AVX-512 (`slide_hash_avx512.c`)
+
+Processes 32 entries (64 bytes) per iteration:
+
+```c
+Z_INTERNAL void slide_hash_avx512(deflate_state *s) {
+ __m512i zmm_wsize = _mm512_set1_epi16((uint16_t)s->w_size);
+ // Process 32 uint16_t entries per iteration
+ for (...) {
+ __m512i v = _mm512_loadu_si512(p);
+ _mm512_storeu_si512(p, _mm512_subs_epu16(v, zmm_wsize));
+ }
+}
+```
+
+---
+
+## Hash Insertion (SSE4.2)
+
+`insert_string_sse42.c` uses the hardware CRC32 instruction for hashing:
+
+```c
+Z_INTERNAL Pos insert_string_sse42(deflate_state *s,
+ Pos str, unsigned count) {
+ Pos idx;
+ for (unsigned i = 0; i < count; i++) {
+ unsigned val = *(uint32_t *)(s->window + str + i);
+ uint32_t h = 0;
+ h = _mm_crc32_u32(h, val); // Hardware CRC32C
+ h &= s->hash_mask;
+ idx = s->head[h];
+ s->prev[str + i & s->w_mask] = idx;
+ s->head[h] = (Pos)(str + i);
+ }
+ return idx;
+}
+```
+
+The CRC32C instruction provides excellent hash distribution with near-zero
+cost.
+
+---
+
+## Chunkset (Inflate Copy)
+
+### SSE2 (`chunkset_sse2.c`)
+
+Used during inflate for back-reference copying:
+
+```c
+Z_INTERNAL uint8_t* chunkmemset_safe_sse2(uint8_t *out, uint8_t *from,
+ unsigned dist, unsigned len) {
+ if (dist >= 16) {
+ // Standard copy with SSE2 loads/stores
+ while (len >= 16) {
+ _mm_storeu_si128((__m128i *)out, _mm_loadu_si128((__m128i *)from));
+ out += 16;
+ from += 16;
+ len -= 16;
+ }
+ } else {
+ // Replicate pattern: broadcast dist-byte pattern into 16 bytes
+ // Handle dist=1 (memset), dist=2, dist=4, dist=8 specially
+ __m128i pattern = replicate_pattern(from, dist);
+ while (len >= 16) {
+ _mm_storeu_si128((__m128i *)out, pattern);
+ out += 16;
+ len -= 16;
+ }
+ }
+ return out;
+}
+```
+
+### AVX2 (`chunkset_avx2.c`)
+
+Same pattern with 32-byte chunks:
+
+```c
+// Replicate to 256-bit and store 32 bytes at a time
+__m256i pattern = _mm256_broadcastsi128_si256(pattern_128);
+while (len >= 32) {
+ _mm256_storeu_si256((__m256i *)out, pattern);
+ out += 32;
+ len -= 32;
+}
+```
+
+---
+
+## CMake Configuration
+
+Each x86 SIMD feature has a corresponding `WITH_` option:
+
+```cmake
+option(WITH_SSE2 "Build with SSE2" ON)
+option(WITH_SSSE3 "Build with SSSE3" ON)
+option(WITH_SSE42 "Build with SSE4.2" ON)
+option(WITH_PCLMULQDQ "Build with PCLMULQDQ" ON)
+option(WITH_AVX2 "Build with AVX2" ON)
+option(WITH_AVX512 "Build with AVX-512" ON)
+option(WITH_AVX512VNNI "Build with AVX512VNNI" ON)
+option(WITH_VPCLMULQDQ "Build with VPCLMULQDQ" ON)
+```
+
+Each source file is compiled with its minimum required flags:
+
+```cmake
+set_property(SOURCE arch/x86/adler32_avx2.c APPEND PROPERTY COMPILE_OPTIONS -mavx2)
+set_property(SOURCE arch/x86/crc32_pclmulqdq.c APPEND PROPERTY COMPILE_OPTIONS -mpclmul -msse4.2)
+set_property(SOURCE arch/x86/crc32_vpclmulqdq.c APPEND PROPERTY COMPILE_OPTIONS -mvpclmulqdq -mavx512f)
+```
+
+This ensures the main code compiles without SIMD requirements while
+individual acceleration files use their specific instruction sets.
diff --git a/docs/handbook/ofborg/amqp-infrastructure.md b/docs/handbook/ofborg/amqp-infrastructure.md
new file mode 100644
index 0000000000..4575da966a
--- /dev/null
+++ b/docs/handbook/ofborg/amqp-infrastructure.md
@@ -0,0 +1,631 @@
+# Tickborg — AMQP Infrastructure
+
+## Overview
+
+Tickborg uses **AMQP 0-9-1** (RabbitMQ) as the message bus connecting all
+services. The Rust crate `lapin` (v4.3.0) provides the low-level protocol
+client. Two abstraction layers — `easyamqp` and `easylapin` — provide
+higher-level APIs for declaring exchanges, binding queues, and running worker
+consumers.
+
+---
+
+## Key Source Files
+
+| File | Purpose |
+|------|---------|
+| `tickborg/src/easyamqp.rs` | Config types, traits, exchange/queue declarations |
+| `tickborg/src/easylapin.rs` | `lapin`-based implementations of the traits |
+| `tickborg/src/worker.rs` | `SimpleWorker` trait, `Action` enum |
+| `tickborg/src/notifyworker.rs` | `SimpleNotifyWorker`, `NotificationReceiver` |
+| `tickborg/src/config.rs` | `RabbitMqConfig` |
+
+---
+
+## Connection Configuration
+
+### `RabbitMqConfig`
+
+```rust
+// config.rs
+#[derive(Deserialize, Debug)]
+pub struct RabbitMqConfig {
+ pub ssl: bool,
+ pub host: String,
+ pub vhost: Option<String>,
+ pub username: String,
+ pub password_file: PathBuf,
+}
+```
+
+### Connection URI Construction
+
+```rust
+// easylapin.rs
+pub async fn from_config(cfg: &RabbitMqConfig) -> Result<lapin::Connection, lapin::Error> {
+ let password = std::fs::read_to_string(&cfg.password_file)
+ .expect("Failed to read RabbitMQ password file")
+ .trim()
+ .to_owned();
+
+ let vhost = cfg.vhost
+ .as_deref()
+ .unwrap_or("/")
+ .to_owned();
+
+ let scheme = if cfg.ssl { "amqps" } else { "amqp" };
+ let uri = format!(
+ "{scheme}://{user}:{pass}@{host}/{vhost}",
+ user = urlencoding::encode(&cfg.username),
+ pass = urlencoding::encode(&password),
+ host = cfg.host,
+ vhost = urlencoding::encode(&vhost),
+ );
+
+ lapin::Connection::connect(
+ &uri,
+ lapin::ConnectionProperties::default()
+ .with_tokio()
+ .with_default_executor(8),
+ ).await
+}
+```
+
+---
+
+## Exchange and Queue Configuration Types
+
+### `ExchangeType`
+
+```rust
+#[derive(Clone, Debug)]
+pub enum ExchangeType {
+ Topic,
+ Fanout,
+ Headers,
+ Direct,
+ Custom(String),
+}
+
+impl ExchangeType {
+ fn as_str(&self) -> &str {
+ match self {
+ ExchangeType::Topic => "topic",
+ ExchangeType::Fanout => "fanout",
+ ExchangeType::Headers => "headers",
+ ExchangeType::Direct => "direct",
+ ExchangeType::Custom(s) => s.as_ref(),
+ }
+ }
+}
+```
+
+### `ExchangeConfig`
+
+```rust
+#[derive(Clone, Debug)]
+pub struct ExchangeConfig {
+ pub exchange_name: String,
+ pub exchange_type: ExchangeType,
+ pub passive: bool,
+ pub durable: bool,
+ pub exclusive: bool,
+ pub auto_delete: bool,
+ pub no_wait: bool,
+ pub internal: bool,
+}
+
+impl Default for ExchangeConfig {
+ fn default() -> Self {
+ ExchangeConfig {
+ exchange_name: String::new(),
+ exchange_type: ExchangeType::Topic,
+ passive: false,
+ durable: true,
+ exclusive: false,
+ auto_delete: false,
+ no_wait: false,
+ internal: false,
+ }
+ }
+}
+```
+
+### `QueueConfig`
+
+```rust
+#[derive(Clone, Debug)]
+pub struct QueueConfig {
+ pub queue_name: String,
+ pub passive: bool,
+ pub durable: bool,
+ pub exclusive: bool,
+ pub auto_delete: bool,
+ pub no_wait: bool,
+}
+
+impl Default for QueueConfig {
+ fn default() -> Self {
+ QueueConfig {
+ queue_name: String::new(),
+ passive: false,
+ durable: true,
+ exclusive: false,
+ auto_delete: false,
+ no_wait: false,
+ }
+ }
+}
+```
+
+### `BindQueueConfig`
+
+```rust
+#[derive(Clone, Debug)]
+pub struct BindQueueConfig {
+ pub queue_name: String,
+ pub exchange_name: String,
+ pub routing_key: Option<String>,
+ pub no_wait: bool,
+ pub headers: Option<Vec<(String, String)>>,
+}
+```
+
+### `ConsumeConfig`
+
+```rust
+#[derive(Clone, Debug)]
+pub struct ConsumeConfig {
+ pub queue: String,
+ pub consumer_tag: String,
+ pub no_local: bool,
+ pub no_ack: bool,
+ pub no_wait: bool,
+ pub exclusive: bool,
+}
+```
+
+---
+
+## The `ChannelExt` Trait
+
+```rust
+// easyamqp.rs
+pub trait ChannelExt {
+ fn declare_exchange(
+ &mut self,
+ config: ExchangeConfig,
+ ) -> impl Future<Output = Result<(), String>>;
+
+ fn declare_queue(
+ &mut self,
+ config: QueueConfig,
+ ) -> impl Future<Output = Result<(), String>>;
+
+ fn bind_queue(
+ &mut self,
+ config: BindQueueConfig,
+ ) -> impl Future<Output = Result<(), String>>;
+}
+```
+
+### `lapin` Implementation
+
+```rust
+// easylapin.rs
+impl ChannelExt for lapin::Channel {
+ async fn declare_exchange(&mut self, config: ExchangeConfig) -> Result<(), String> {
+ let opts = ExchangeDeclareOptions {
+ passive: config.passive,
+ durable: config.durable,
+ auto_delete: config.auto_delete,
+ internal: config.internal,
+ nowait: config.no_wait,
+ };
+ self.exchange_declare(
+ &config.exchange_name,
+ lapin::ExchangeKind::Custom(
+ config.exchange_type.as_str().to_owned()
+ ),
+ opts,
+ FieldTable::default(),
+ ).await
+ .map_err(|e| format!("Failed to declare exchange: {e}"))?;
+ Ok(())
+ }
+
+ async fn declare_queue(&mut self, config: QueueConfig) -> Result<(), String> {
+ let opts = QueueDeclareOptions {
+ passive: config.passive,
+ durable: config.durable,
+ exclusive: config.exclusive,
+ auto_delete: config.auto_delete,
+ nowait: config.no_wait,
+ };
+ self.queue_declare(
+ &config.queue_name,
+ opts,
+ FieldTable::default(),
+ ).await
+ .map_err(|e| format!("Failed to declare queue: {e}"))?;
+ Ok(())
+ }
+
+ async fn bind_queue(&mut self, config: BindQueueConfig) -> Result<(), String> {
+ let routing_key = config.routing_key
+ .as_deref()
+ .unwrap_or("#");
+
+ let mut headers = FieldTable::default();
+ if let Some(hdr_vec) = &config.headers {
+ for (k, v) in hdr_vec {
+ headers.insert(
+ k.clone().into(),
+ AMQPValue::LongString(v.clone().into()),
+ );
+ }
+ }
+
+ self.queue_bind(
+ &config.queue_name,
+ &config.exchange_name,
+ routing_key,
+ QueueBindOptions { nowait: config.no_wait },
+ headers,
+ ).await
+ .map_err(|e| format!("Failed to bind queue: {e}"))?;
+ Ok(())
+ }
+}
+```
+
+---
+
+## The `ConsumerExt` Trait
+
+```rust
+// easyamqp.rs
+pub trait ConsumerExt {
+ fn consume<W: worker::SimpleWorker + 'static>(
+ &mut self,
+ worker: W,
+ config: ConsumeConfig,
+ ) -> impl Future<Output = Result<(), String>>;
+}
+```
+
+Three implementations exist in `easylapin.rs`:
+
+### 1. `Channel` — Simple Workers
+
+```rust
+impl ConsumerExt for lapin::Channel {
+ async fn consume<W: worker::SimpleWorker + 'static>(
+ &mut self,
+ mut worker: W,
+ config: ConsumeConfig,
+ ) -> Result<(), String> {
+ let consumer = self.basic_consume(
+ &config.queue,
+ &config.consumer_tag,
+ BasicConsumeOptions {
+ no_local: config.no_local,
+ no_ack: config.no_ack,
+ exclusive: config.exclusive,
+ nowait: config.no_wait,
+ },
+ FieldTable::default(),
+ ).await
+ .map_err(|e| format!("Failed to start consumer: {e}"))?;
+
+ // Message processing loop
+ while let Some(delivery) = consumer.next().await {
+ let delivery = delivery
+ .map_err(|e| format!("Consumer error: {e}"))?;
+
+ // Decode the message
+ let job = match worker.msg_to_job(
+ &delivery.routing_key,
+ &delivery.exchange,
+ &delivery.data,
+ ).await {
+ Ok(job) => job,
+ Err(err) => {
+ tracing::error!("Failed to decode message: {}", err);
+ delivery.ack(BasicAckOptions::default()).await?;
+ continue;
+ }
+ };
+
+ // Process the job
+ let actions = worker.consumer(&job).await;
+
+ // Execute resulting actions
+ for action in actions {
+ action_deliver(&self, &delivery, action).await?;
+ }
+ }
+ Ok(())
+ }
+}
+```
+
+### 2. `WorkerChannel` — Workers on a Dedicated Channel
+
+```rust
+pub struct WorkerChannel {
+ pub channel: lapin::Channel,
+ pub prefetch_count: u16,
+}
+
+impl ConsumerExt for WorkerChannel {
+ async fn consume<W: worker::SimpleWorker + 'static>(
+ &mut self,
+ worker: W,
+ config: ConsumeConfig,
+ ) -> Result<(), String> {
+ // Set QoS (prefetch count)
+ self.channel.basic_qos(
+ self.prefetch_count,
+ BasicQosOptions::default(),
+ ).await?;
+
+ // Delegate to Channel implementation
+ self.channel.consume(worker, config).await
+ }
+}
+```
+
+### 3. `NotifyChannel` — Notify Workers
+
+```rust
+pub struct NotifyChannel {
+ pub channel: lapin::Channel,
+}
+
+impl NotifyChannel {
+ pub async fn consume<W: notifyworker::SimpleNotifyWorker + 'static>(
+ &mut self,
+ mut worker: W,
+ config: ConsumeConfig,
+ ) -> Result<(), String> {
+ // Similar to Channel but creates a ChannelNotificationReceiver
+ // that allows the worker to report progress back to AMQP
+ let consumer = self.channel.basic_consume(/* ... */).await?;
+
+ while let Some(delivery) = consumer.next().await {
+ let delivery = delivery?;
+ let receiver = ChannelNotificationReceiver {
+ channel: self.channel.clone(),
+ delivery: &delivery,
+ };
+
+ let job = worker.msg_to_job(/* ... */).await?;
+ let actions = worker.consumer(&job, &receiver).await;
+
+ for action in actions {
+ action_deliver(&self.channel, &delivery, action).await?;
+ }
+ }
+ Ok(())
+ }
+}
+```
+
+---
+
+## Action Delivery
+
+```rust
+// easylapin.rs
+async fn action_deliver(
+ channel: &lapin::Channel,
+ delivery: &lapin::message::Delivery,
+ action: worker::Action,
+) -> Result<(), String> {
+ match action {
+ worker::Action::Ack => {
+ delivery.ack(BasicAckOptions::default()).await
+ .map_err(|e| format!("Failed to ack: {e}"))?;
+ }
+ worker::Action::NackRequeue => {
+ delivery.nack(BasicNackOptions {
+ requeue: true,
+ ..Default::default()
+ }).await
+ .map_err(|e| format!("Failed to nack: {e}"))?;
+ }
+ worker::Action::NackDump => {
+ delivery.nack(BasicNackOptions {
+ requeue: false,
+ ..Default::default()
+ }).await
+ .map_err(|e| format!("Failed to nack-dump: {e}"))?;
+ }
+ worker::Action::Publish(msg) => {
+ channel.basic_publish(
+ msg.exchange.as_deref().unwrap_or(""),
+ msg.routing_key.as_deref().unwrap_or(""),
+ BasicPublishOptions::default(),
+ &msg.content,
+ BasicProperties::default()
+ .with_delivery_mode(2), // persistent
+ ).await
+ .map_err(|e| format!("Failed to publish: {e}"))?;
+ }
+ }
+ Ok(())
+}
+```
+
+---
+
+## Notification Receiver
+
+```rust
+// easylapin.rs
+pub struct ChannelNotificationReceiver<'a> {
+ channel: lapin::Channel,
+ delivery: &'a lapin::message::Delivery,
+}
+
+impl<'a> notifyworker::NotificationReceiver for ChannelNotificationReceiver<'a> {
+ async fn tell(&mut self, action: worker::Action) {
+ if let Err(e) = action_deliver(&self.channel, self.delivery, action).await {
+ tracing::error!("Failed to deliver notification action: {}", e);
+ }
+ }
+}
+```
+
+Used by `BuildWorker` (which implements `SimpleNotifyWorker`) to publish
+incremental log messages while a build is in progress, without waiting for the
+build to complete.
+
+---
+
+## Exchange Topology
+
+### Declarations
+
+Every binary declares its own required exchanges/queues at startup.
+Here is the complete topology used across the system:
+
+| Exchange | Type | Purpose |
+|----------|------|---------|
+| `github-events` | Topic | GitHub webhooks → routing by event type |
+| `build-jobs` | Fanout | Evaluation → builders |
+| `build-results` | Fanout | Builder results → poster + stats |
+| `logs` | Topic | Build log lines → collector |
+| `stats` | Fanout | Metrics events → stats collector |
+
+### Queue Bindings
+
+| Queue | Exchange | Routing Key | Consumer |
+|-------|----------|-------------|----------|
+| `mass-rebuild-check-inputs` | `github-events` | `pull_request.*` | EvaluationFilterWorker |
+| `mass-rebuild-check-jobs` | _(direct publish)_ | — | EvaluationWorker |
+| `build-inputs-{identity}` | `build-jobs` | — | BuildWorker |
+| `build-results` | `build-results` | — | GitHubCommentPoster |
+| `build-logs` | `logs` | `logs.*` | LogMessageCollector |
+| `comment-jobs` | `github-events` | `issue_comment.*` | GitHubCommentWorker |
+| `push-jobs` | `github-events` | `push.*` | PushFilterWorker |
+| `stats-events` | `stats` | — | StatCollectorWorker |
+
+### Topic Routing Keys
+
+For the `github-events` exchange, the routing key follows the pattern:
+
+```
+{event_type}.{action}
+```
+
+Examples:
+- `pull_request.opened`
+- `pull_request.synchronize`
+- `issue_comment.created`
+- `push.push`
+
+For the `logs` exchange:
+- `logs.{build_id}` — Each build's log lines are tagged with the build ID
+
+---
+
+## Message Persistence
+
+All published messages use `delivery_mode = 2` (persistent), which means
+messages survive RabbitMQ restarts:
+
+```rust
+BasicProperties::default()
+ .with_delivery_mode(2) // persistent
+```
+
+---
+
+## Prefetch / QoS
+
+Worker binaries configure `basic_qos` (prefetch count) to control how many
+messages are delivered to a consumer before it must acknowledge them:
+
+```rust
+let mut chan = WorkerChannel {
+ channel,
+ prefetch_count: 1, // Process one job at a time
+};
+```
+
+Setting `prefetch_count = 1` ensures fair dispatching across multiple worker
+instances and prevents a single slow worker from hoarding messages.
+
+---
+
+## Error Recovery
+
+### Message Processing Failures
+
+| Scenario | Action | Effect |
+|----------|--------|--------|
+| Decode error | `Ack` | Message discarded |
+| Processing error (retryable) | `NackRequeue` | Message requeued |
+| Processing error (permanent) | `NackDump` | Message dead-lettered |
+| Processing success | `Ack` | Message removed |
+| Worker publish | `Publish` | New message to exchange |
+
+### Connection Recovery
+
+`lapin` supports automatic connection recovery. If the TCP connection drops,
+the library will attempt to reconnect. However, tickborg binaries are designed
+to be restarted by their process supervisor (systemd) if the connection
+cannot be re-established.
+
+---
+
+## Usage Example: Declaring a Full Stack
+
+A typical binary does:
+
+```rust
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+ tickborg::setup_log();
+ let cfg = tickborg::config::load();
+
+ // 1. Connect to RabbitMQ
+ let conn = easylapin::from_config(&cfg.rabbitmq).await?;
+ let mut chan = conn.create_channel().await?;
+
+ // 2. Declare exchange
+ chan.declare_exchange(ExchangeConfig {
+ exchange_name: "github-events".to_owned(),
+ exchange_type: ExchangeType::Topic,
+ durable: true,
+ ..Default::default()
+ }).await?;
+
+ // 3. Declare queue
+ chan.declare_queue(QueueConfig {
+ queue_name: "mass-rebuild-check-inputs".to_owned(),
+ durable: true,
+ ..Default::default()
+ }).await?;
+
+ // 4. Bind queue to exchange
+ chan.bind_queue(BindQueueConfig {
+ queue_name: "mass-rebuild-check-inputs".to_owned(),
+ exchange_name: "github-events".to_owned(),
+ routing_key: Some("pull_request.*".to_owned()),
+ ..Default::default()
+ }).await?;
+
+ // 5. Start consume loop
+ let worker = EvaluationFilterWorker::new(cfg.acl());
+ chan.consume(worker, ConsumeConfig {
+ queue: "mass-rebuild-check-inputs".to_owned(),
+ consumer_tag: format!("evaluation-filter-{}", cfg.identity),
+ ..Default::default()
+ }).await?;
+
+ Ok(())
+}
+```
diff --git a/docs/handbook/ofborg/architecture.md b/docs/handbook/ofborg/architecture.md
new file mode 100644
index 0000000000..69b02cc4db
--- /dev/null
+++ b/docs/handbook/ofborg/architecture.md
@@ -0,0 +1,814 @@
+# Tickborg — Architecture
+
+## Workspace Structure
+
+The tickborg codebase is organized as a Cargo workspace with two member crates:
+
+```toml
+# ofborg/Cargo.toml
+[workspace]
+members = [
+ "tickborg",
+ "tickborg-simple-build"
+]
+resolver = "2"
+
+[profile.release]
+debug = true
+```
+
+The `debug = true` in the release profile ensures that production binaries
+include debug symbols, making crash backtraces and profiling useful without
+sacrificing optimization.
+
+---
+
+## Crate: `tickborg`
+
+This is the main crate. It compiles into a library (`lib.rs`) and **11 binary
+targets** under `src/bin/`.
+
+### Library Root (`src/lib.rs`)
+
+```rust
+#![recursion_limit = "512"]
+#![allow(clippy::redundant_closure)]
+
+pub mod acl;
+pub mod asynccmd;
+pub mod buildtool;
+pub mod checkout;
+pub mod clone;
+pub mod commentparser;
+pub mod commitstatus;
+pub mod config;
+pub mod easyamqp;
+pub mod easylapin;
+pub mod evalchecker;
+pub mod files;
+pub mod ghevent;
+pub mod locks;
+pub mod message;
+pub mod notifyworker;
+pub mod stats;
+pub mod systems;
+pub mod tagger;
+pub mod tasks;
+pub mod test_scratch;
+pub mod worker;
+pub mod writetoline;
+```
+
+Additionally, a `tickborg` sub-module re-exports everything for convenient
+access:
+
+```rust
+pub mod tickborg {
+ pub use crate::acl;
+ pub use crate::asynccmd;
+ pub use crate::buildtool;
+ pub use crate::checkout;
+ pub use crate::clone;
+ pub use crate::commentparser;
+ // ... all modules re-exported ...
+
+ pub const VERSION: &str = env!("CARGO_PKG_VERSION");
+
+ pub fn partition_result<A, B>(results: Vec<Result<A, B>>) -> (Vec<A>, Vec<B>) {
+ let mut ok = Vec::new();
+ let mut err = Vec::new();
+ for result in results.into_iter() {
+ match result {
+ Ok(x) => ok.push(x),
+ Err(x) => err.push(x),
+ }
+ }
+ (ok, err)
+ }
+}
+```
+
+### Logging Initialization
+
+```rust
+pub fn setup_log() {
+ let filter_layer = EnvFilter::try_from_default_env()
+ .or_else(|_| EnvFilter::try_new("info"))
+ .unwrap();
+
+ let log_json = env::var("RUST_LOG_JSON").is_ok_and(|s| s == "1");
+
+ if log_json {
+ let fmt_layer = tracing_subscriber::fmt::layer().json();
+ tracing_subscriber::registry()
+ .with(filter_layer)
+ .with(fmt_layer)
+ .init();
+ } else {
+ let fmt_layer = tracing_subscriber::fmt::layer();
+ tracing_subscriber::registry()
+ .with(filter_layer)
+ .with(fmt_layer)
+ .init();
+ }
+}
+```
+
+Every binary calls `tickborg::setup_log()` as its first action. The environment
+variable `RUST_LOG` controls the filter level. Setting `RUST_LOG_JSON=1`
+switches to JSON-structured output for log aggregation in production.
+
+---
+
+## Module Hierarchy
+
+### Core Worker Pattern
+
+```
+worker.rs
+├── SimpleWorker trait
+├── Action enum (Ack, NackRequeue, NackDump, Publish)
+├── QueueMsg struct
+└── publish_serde_action() helper
+
+notifyworker.rs
+├── SimpleNotifyWorker trait
+├── NotificationReceiver trait
+└── DummyNotificationReceiver (for testing)
+```
+
+### AMQP Layer
+
+```
+easyamqp.rs
+├── ConsumeConfig struct
+├── BindQueueConfig struct
+├── ExchangeConfig struct
+├── QueueConfig struct
+├── ExchangeType enum (Topic, Headers, Fanout, Direct, Custom)
+├── ChannelExt trait
+└── ConsumerExt trait
+
+easylapin.rs
+├── from_config() → Connection
+├── impl ChannelExt for Channel
+├── impl ConsumerExt for Channel
+├── WorkerChannel (with prefetch=1)
+├── NotifyChannel (with prefetch=1, for SimpleNotifyWorker)
+├── ChannelNotificationReceiver
+└── action_deliver() (Ack/Nack/Publish dispatch)
+```
+
+### Configuration
+
+```
+config.rs
+├── Config (top-level)
+├── GithubWebhookConfig
+├── LogApiConfig
+├── EvaluationFilter
+├── GithubCommentFilter
+├── GithubCommentPoster
+├── MassRebuilder
+├── Builder
+├── PushFilter
+├── LogMessageCollector
+├── Stats
+├── RabbitMqConfig
+├── BuildConfig
+├── GithubAppConfig
+├── RunnerConfig
+├── CheckoutConfig
+├── GithubAppVendingMachine
+└── load() → Config
+```
+
+### Message Types
+
+```
+message/
+├── mod.rs (re-exports)
+├── common.rs
+│ ├── Repo
+│ ├── Pr
+│ └── PushTrigger
+├── buildjob.rs
+│ ├── BuildJob
+│ ├── QueuedBuildJobs
+│ └── Actions
+├── buildresult.rs
+│ ├── BuildStatus enum
+│ ├── BuildResult enum (V1, Legacy)
+│ ├── LegacyBuildResult
+│ └── V1Tag
+├── buildlogmsg.rs
+│ ├── BuildLogMsg
+│ └── BuildLogStart
+└── evaluationjob.rs
+ ├── EvaluationJob
+ └── Actions
+```
+
+### GitHub Event Types
+
+```
+ghevent/
+├── mod.rs (re-exports)
+├── common.rs
+│ ├── Comment
+│ ├── User
+│ ├── Repository
+│ ├── Issue
+│ └── GenericWebhook
+├── issuecomment.rs
+│ ├── IssueComment
+│ └── IssueCommentAction enum
+├── pullrequestevent.rs
+│ ├── PullRequestEvent
+│ ├── PullRequest
+│ ├── PullRequestRef
+│ ├── PullRequestState enum
+│ ├── PullRequestAction enum
+│ ├── PullRequestChanges
+│ └── BaseChange, ChangeWas
+└── pushevent.rs
+ ├── PushEvent
+ ├── Pusher
+ └── HeadCommit
+```
+
+### Task Implementations
+
+```
+tasks/
+├── mod.rs
+├── build.rs
+│ ├── BuildWorker (SimpleNotifyWorker)
+│ └── JobActions (log streaming helper)
+├── eval/
+│ ├── mod.rs
+│ │ ├── EvaluationStrategy trait
+│ │ ├── EvaluationComplete
+│ │ └── Error enum
+│ └── monorepo.rs
+│ ├── MonorepoStrategy
+│ ├── label_from_title()
+│ └── parse_commit_scopes()
+├── evaluate.rs
+│ ├── EvaluationWorker (SimpleWorker)
+│ ├── OneEval (per-job evaluation context)
+│ └── update_labels()
+├── evaluationfilter.rs
+│ └── EvaluationFilterWorker (SimpleWorker)
+├── githubcommentfilter.rs
+│ └── GitHubCommentWorker (SimpleWorker)
+├── githubcommentposter.rs
+│ ├── GitHubCommentPoster (SimpleWorker)
+│ ├── PostableEvent enum
+│ ├── job_to_check()
+│ └── result_to_check()
+├── log_message_collector.rs
+│ ├── LogMessageCollector (SimpleWorker)
+│ ├── LogFrom
+│ └── LogMessage
+├── pushfilter.rs
+│ └── PushFilterWorker (SimpleWorker)
+└── statscollector.rs
+ └── StatCollectorWorker (SimpleWorker)
+```
+
+### Utility Modules
+
+```
+acl.rs — Access control (repos, trusted users, arch mapping)
+asynccmd.rs — Async subprocess execution with streaming output
+buildtool.rs — Build system detection and execution
+checkout.rs — Git checkout caching (CachedCloner, CachedProject)
+clone.rs — Git clone trait (GitClonable, file locking)
+commentparser.rs — @tickbot command parser (nom combinators)
+commitstatus.rs — GitHub commit status abstraction
+evalchecker.rs — Generic command execution checker
+files.rs — File utility functions
+locks.rs — File-based locking (fs2)
+stats.rs — Metrics events and RabbitMQ publisher
+systems.rs — Platform/architecture enum
+tagger.rs — PR label generation from changed files
+writetoline.rs — Random-access line writer for log files
+```
+
+---
+
+## Binary Targets
+
+### `github-webhook-receiver`
+
+**File:** `src/bin/github-webhook-receiver.rs`
+
+- Starts an HTTP server using `hyper 1.0`.
+- Validates `X-Hub-Signature-256` using HMAC-SHA256.
+- Reads the `X-Github-Event` header to determine the event type.
+- Parses the body as `GenericWebhook` to extract the repository name.
+- Publishes to the `github-events` topic exchange with routing key
+ `{event_type}.{owner}/{repo}`.
+- Declares queues: `build-inputs`, `github-events-unknown`,
+ `mass-rebuild-check-inputs`, `push-build-inputs`.
+
+### `evaluation-filter`
+
+**File:** `src/bin/evaluation-filter.rs`
+
+- Consumes from `mass-rebuild-check-inputs`.
+- Deserializes `PullRequestEvent`.
+- Checks if the repo is eligible via ACL.
+- Filters by action (Opened, Synchronize, Reopened, Edited with base change).
+- Produces `EvaluationJob` to `mass-rebuild-check-jobs`.
+
+### `github-comment-filter`
+
+**File:** `src/bin/github-comment-filter.rs`
+
+- Consumes from `build-inputs`.
+- Deserializes `IssueComment`.
+- Parses the comment body for `@tickbot` commands.
+- Looks up the PR via GitHub API to get the head SHA.
+- Produces `BuildJob` messages to architecture-specific queues.
+- Also produces `QueuedBuildJobs` to `build-results` for the comment poster.
+
+### `github-comment-poster`
+
+**File:** `src/bin/github-comment-poster.rs`
+
+- Consumes from `build-results`.
+- Accepts both `QueuedBuildJobs` (build queued) and `BuildResult` (build
+ finished).
+- Creates GitHub Check Runs via the Checks API.
+- Maps `BuildStatus` to `Conclusion` (Success, Failure, Skipped, Neutral).
+
+### `mass-rebuilder`
+
+**File:** `src/bin/mass-rebuilder.rs`
+
+- Consumes from `mass-rebuild-check-jobs`.
+- Uses `EvaluationWorker` with `MonorepoStrategy`.
+- Clones the repository, checks out the PR, detects changed files.
+- Uses build system detection to discover affected projects.
+- Creates `BuildJob` messages for each affected project/architecture.
+- Updates GitHub commit statuses throughout the process.
+
+### `builder`
+
+**File:** `src/bin/builder.rs`
+
+- Consumes from `build-inputs-{system}` (e.g., `build-inputs-x86_64-linux`).
+- Creates one channel per configured system.
+- Uses `BuildWorker` (a `SimpleNotifyWorker`) to execute builds.
+- Streams build log lines to the `logs` exchange in real-time.
+- Publishes `BuildResult` to `build-results` when done.
+
+### `push-filter`
+
+**File:** `src/bin/push-filter.rs`
+
+- Consumes from `push-build-inputs`.
+- Deserializes `PushEvent`.
+- Skips tag pushes, branch deletions, and zero-SHA events.
+- Detects changed projects from the push event's commit info.
+- Falls back to `default_attrs` when no projects are detected.
+- Creates `BuildJob::new_push()` and schedules on primary platforms.
+
+### `log-message-collector`
+
+**File:** `src/bin/log-message-collector.rs`
+
+- Consumes from `logs` (ephemeral queue bound to the `logs` exchange).
+- Writes build log lines to `{logs_path}/{routing_key}/{attempt_id}`.
+- Uses `LineWriter` for random-access line writing.
+- Also writes `.metadata.json` and `.result.json` files.
+
+### `logapi`
+
+**File:** `src/bin/logapi.rs`
+
+- HTTP server that serves build log metadata.
+- Endpoint: `GET /logs/{routing_key}`.
+- Returns JSON with attempt IDs, metadata, results, and log URLs.
+- Path traversal prevention via `canonicalize()` and `validate_path_segment()`.
+
+### `stats`
+
+**File:** `src/bin/stats.rs`
+
+- Consumes from `stats-events` (bound to the `stats` fanout exchange).
+- Collects `EventMessage` payloads.
+- Exposes Prometheus-compatible metrics on `0.0.0.0:9898`.
+- Runs an HTTP server in a separate thread.
+
+### `build-faker`
+
+**File:** `src/bin/build-faker.rs`
+
+- Development tool that publishes fake `BuildJob` messages.
+- Useful for testing the builder without a real GitHub webhook.
+
+---
+
+## The Worker Pattern in Detail
+
+### `SimpleWorker`
+
+```rust
+pub trait SimpleWorker: Send {
+ type J: Send;
+
+ fn consumer(&mut self, job: &Self::J) -> impl Future<Output = Actions>;
+
+ fn msg_to_job(
+ &mut self,
+ method: &str,
+ headers: &Option<String>,
+ body: &[u8],
+ ) -> impl Future<Output = Result<Self::J, String>>;
+}
+```
+
+Workers that implement `SimpleWorker` receive a message, process it, and return
+a `Vec<Action>`. The actions are applied in order:
+
+```rust
+pub enum Action {
+ Ack, // Acknowledge message (remove from queue)
+ NackRequeue, // Negative ack, requeue (retry later)
+ NackDump, // Negative ack, discard
+ Publish(Arc<QueueMsg>), // Publish a new message
+}
+```
+
+The `ConsumerExt` implementation on `Channel` drives the loop:
+
+```rust
+impl<'a, W: SimpleWorker + 'a> ConsumerExt<'a, W> for Channel {
+ async fn consume(self, mut worker: W, config: ConsumeConfig)
+ -> Result<Self::Handle, Self::Error>
+ {
+ let mut consumer = self.basic_consume(/* ... */).await?;
+ Ok(Box::pin(async move {
+ while let Some(Ok(deliver)) = consumer.next().await {
+ let job = worker.msg_to_job(/* ... */).await.expect("...");
+ for action in worker.consumer(&job).await {
+ action_deliver(&self, &deliver, action).await.expect("...");
+ }
+ }
+ }))
+ }
+}
+```
+
+### `SimpleNotifyWorker`
+
+```rust
+#[async_trait]
+pub trait SimpleNotifyWorker {
+ type J;
+
+ async fn consumer(
+ &self,
+ job: Self::J,
+ notifier: Arc<dyn NotificationReceiver + Send + Sync>,
+ );
+
+ fn msg_to_job(
+ &self,
+ routing_key: &str,
+ content_type: &Option<String>,
+ body: &[u8],
+ ) -> Result<Self::J, String>;
+}
+```
+
+The key difference: instead of returning `Actions`, the worker receives a
+`NotificationReceiver` that it can `tell()` at any point during processing.
+This enables streaming log lines back to RabbitMQ while a build is still
+running.
+
+```rust
+#[async_trait]
+pub trait NotificationReceiver {
+ async fn tell(&self, action: Action);
+}
+```
+
+The `ChannelNotificationReceiver` bridges this to a real AMQP channel:
+
+```rust
+pub struct ChannelNotificationReceiver {
+ channel: lapin::Channel,
+ deliver: Delivery,
+}
+
+#[async_trait]
+impl NotificationReceiver for ChannelNotificationReceiver {
+ async fn tell(&self, action: Action) {
+ action_deliver(&self.channel, &self.deliver, action)
+ .await
+ .expect("action deliver failure");
+ }
+}
+```
+
+### Channel Variants
+
+| Wrapper | Prefetch | Use Case |
+|---------|----------|----------|
+| `Channel` (raw) | None | Services with a single instance or that want prefetching |
+| `WorkerChannel(Channel)` | 1 | Multi-instance workers (fair dispatch) |
+| `NotifyChannel(Channel)` | 1 | Long-running workers with streaming notifications |
+
+---
+
+## Message Flow Through the System
+
+### PR Opened/Synchronised
+
+```
+GitHub ──POST──► webhook-receiver
+ │
+ ▼ publish to github-events
+ │ routing_key: pull_request.{owner}/{repo}
+ │
+ ┌──────────┴──────────┐
+ ▼ ▼
+ evaluation-filter (other consumers)
+ │
+ ▼ publish to mass-rebuild-check-jobs
+ │
+ mass-rebuilder
+ │
+ ├─► clone repo
+ ├─► checkout PR branch
+ ├─► detect changed files
+ ├─► map to projects
+ ├─► create BuildJob per project/arch
+ │
+ ├─► publish BuildJob to build-inputs-{system}
+ ├─► publish QueuedBuildJobs to build-results
+ └─► update commit status
+ │
+ ┌──────────┴──────────┐
+ ▼ ▼
+ builder comment-poster
+ │ │
+ ├─► clone & merge ├─► create check run (Queued)
+ ├─► build project │
+ ├─► stream logs ──► │
+ │ logs exchange │
+ │ │ │
+ │ log-collector │
+ │ │
+ ├─► publish result │
+ │ to build-results │
+ │ │ │
+ │ └──────────►├─► create check run (Completed)
+ └─► Ack └─► Ack
+```
+
+### Comment Command (`@tickbot build meshmc`)
+
+```
+GitHub ──POST──► webhook-receiver
+ │
+ ▼ publish to github-events
+ │ routing_key: issue_comment.{owner}/{repo}
+ │
+ comment-filter
+ │
+ ├─► parse @tickbot commands
+ ├─► lookup PR via GitHub API
+ ├─► determine build architectures from ACL
+ │
+ ├─► publish BuildJob to build-inputs-{system}
+ ├─► publish QueuedBuildJobs to build-results
+ └─► Ack
+```
+
+### Push to Branch
+
+```
+GitHub ──POST──► webhook-receiver
+ │
+ ▼ publish to github-events
+ │ routing_key: push.{owner}/{repo}
+ │
+ push-filter
+ │
+ ├─► check if branch push (not tag/delete)
+ ├─► detect changed projects from commit info
+ ├─► fallback to default_attrs if needed
+ │
+ ├─► create BuildJob::new_push()
+ ├─► publish to build-inputs-{system} (primary)
+ ├─► publish QueuedBuildJobs to build-results
+ └─► Ack
+```
+
+---
+
+## Concurrency Model
+
+Tickborg uses **Tokio** as its async runtime with multi-threaded scheduling:
+
+```rust
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn Error>> {
+ // ...
+}
+```
+
+Within the builder, multiple systems can be served simultaneously:
+
+```rust
+// builder.rs — main()
+let mut handles: Vec<Pin<Box<dyn Future<Output = ()> + Send>>> = Vec::new();
+for system in &cfg.build.system {
+ handles.push(self::create_handle(&conn, &cfg, system.to_string()).await?);
+}
+future::join_all(handles).await;
+```
+
+Each handle is a `Pin<Box<dyn Future>>` that runs a consumer loop for one
+architecture. The `basic_qos(1)` prefetch setting ensures that each builder
+instance only works on one job at a time from each queue, preventing resource
+starvation.
+
+Build subprocesses themselves are spawned via `std::process::Command` and
+monitored through the `AsyncCmd` abstraction which uses OS threads for I/O
+multiplexing:
+
+```rust
+pub struct AsyncCmd {
+ command: Command,
+}
+
+pub struct SpawnedAsyncCmd {
+ waiter: JoinHandle<Option<Result<ExitStatus, io::Error>>>,
+ rx: Receiver<String>,
+}
+```
+
+---
+
+## Git Operations
+
+### CachedCloner
+
+```rust
+pub struct CachedCloner {
+ root: PathBuf,
+}
+
+impl CachedCloner {
+ pub fn project(&self, name: &str, clone_url: String) -> CachedProject;
+}
+```
+
+The cached cloner maintains a local mirror of repositories under:
+```
+{root}/repo/{md5(name)}/clone — bare clone (shared by all checkouts)
+{root}/repo/{md5(name)}/{category}/ — working checkouts
+```
+
+### CachedProjectCo (Checkout)
+
+```rust
+pub struct CachedProjectCo {
+ root: PathBuf,
+ id: String,
+ clone_url: String,
+ local_reference: PathBuf,
+}
+
+impl CachedProjectCo {
+ pub fn checkout_origin_ref(&self, git_ref: &OsStr) -> Result<String, Error>;
+ pub fn checkout_ref(&self, git_ref: &OsStr) -> Result<String, Error>;
+ pub fn fetch_pr(&self, pr_id: u64) -> Result<(), Error>;
+ pub fn commit_exists(&self, commit: &OsStr) -> bool;
+ pub fn merge_commit(&self, commit: &OsStr) -> Result<(), Error>;
+ pub fn commit_messages_from_head(&self, commit: &str) -> Result<Vec<String>, Error>;
+ pub fn files_changed_from_head(&self, commit: &str) -> Result<Vec<String>, Error>;
+}
+```
+
+All git operations use file-based locking via `fs2::FileExt::lock_exclusive()`
+to prevent concurrent access to the same checkout directory.
+
+---
+
+## File Locking
+
+Two locking mechanisms exist:
+
+### `clone.rs` — Git-level locks
+
+```rust
+pub trait GitClonable {
+ fn lock_path(&self) -> PathBuf;
+ fn lock(&self) -> Result<Lock, Error>;
+ fn clone_repo(&self) -> Result<(), Error>;
+ fn fetch_repo(&self) -> Result<(), Error>;
+}
+```
+
+### `locks.rs` — Generic file locks
+
+```rust
+pub trait Lockable {
+ fn lock_path(&self) -> PathBuf;
+ fn lock(&self) -> Result<Lock, Error>;
+}
+
+pub struct Lock {
+ lock: Option<fs::File>,
+}
+
+impl Lock {
+ pub fn unlock(&mut self) { self.lock = None }
+}
+```
+
+Both use `fs2`'s `lock_exclusive()` which maps to `flock(2)` on Unix.
+
+---
+
+## Error Handling Strategy
+
+### CommitStatusError
+
+```rust
+pub enum CommitStatusError {
+ ExpiredCreds(hubcaps::Error),
+ MissingSha(hubcaps::Error),
+ Error(hubcaps::Error),
+ InternalError(String),
+}
+```
+
+This is used to determine retry behavior:
+- `ExpiredCreds` → `NackRequeue` (retry after token refresh)
+- `MissingSha` → `Ack` (commit was force-pushed away, skip)
+- `InternalError` → `Ack` + label `tickborg-internal-error`
+
+### EvalWorkerError
+
+```rust
+enum EvalWorkerError {
+ EvalError(eval::Error),
+ CommitStatusWrite(CommitStatusError),
+}
+```
+
+### eval::Error
+
+```rust
+pub enum Error {
+ CommitStatusWrite(CommitStatusError),
+ Fail(String),
+}
+```
+
+---
+
+## Testing Strategy
+
+- Unit tests are embedded in modules using `#[cfg(test)]`.
+- Test fixtures (JSON event payloads) are stored in `test-srcs/events/`.
+- Tests use `include_str!()` to load test data at compile time.
+- The `DummyNotificationReceiver` captures actions for assertion:
+
+```rust
+#[derive(Default)]
+pub struct DummyNotificationReceiver {
+ pub actions: parking_lot::Mutex<Vec<Action>>,
+}
+```
+
+Example test from `evaluationfilter.rs`:
+
+```rust
+#[tokio::test]
+async fn changed_base() {
+ let data = include_str!("../../test-srcs/events/pr-changed-base.json");
+ let job: PullRequestEvent = serde_json::from_str(data).expect("...");
+
+ let mut worker = EvaluationFilterWorker::new(
+ acl::Acl::new(vec!["project-tick/Project-Tick".to_owned()], Some(vec![]))
+ );
+
+ assert_eq!(worker.consumer(&job).await, vec![
+ worker::publish_serde_action(
+ None,
+ Some("mass-rebuild-check-jobs".to_owned()),
+ &evaluationjob::EvaluationJob { /* ... */ }
+ ),
+ worker::Action::Ack,
+ ]);
+}
+```
diff --git a/docs/handbook/ofborg/build-executor.md b/docs/handbook/ofborg/build-executor.md
new file mode 100644
index 0000000000..8b0cbcdac8
--- /dev/null
+++ b/docs/handbook/ofborg/build-executor.md
@@ -0,0 +1,657 @@
+# Tickborg — Build Executor
+
+## Overview
+
+The **build executor** is the component responsible for actually running builds
+of sub-projects in the Project Tick monorepo. Unlike the original ofborg which
+used `nix-build` exclusively, tickborg's build executor supports multiple build
+systems: CMake, Meson, Autotools, Cargo, Gradle, Make, and custom commands.
+
+The build executor is invoked by the **builder** binary
+(`tickborg/src/bin/builder.rs`) which consumes `BuildJob` messages from
+architecture-specific queues.
+
+---
+
+## Key Source Files
+
+| File | Purpose |
+|------|---------|
+| `tickborg/src/buildtool.rs` | Build system abstraction, `BuildExecutor`, `ProjectBuildConfig` |
+| `tickborg/src/tasks/build.rs` | `BuildWorker`, `JobActions` — the task implementation |
+| `tickborg/src/bin/builder.rs` | Binary entry point |
+| `tickborg/src/asynccmd.rs` | Async subprocess execution |
+
+---
+
+## Build System Abstraction
+
+### `BuildSystem` Enum
+
+```rust
+// tickborg/src/buildtool.rs
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+pub enum BuildSystem {
+ CMake,
+ Meson,
+ Autotools,
+ Cargo,
+ Gradle,
+ Make,
+ Custom { command: String },
+}
+```
+
+Each variant corresponds to a well-known build system with a standard
+invocation pattern.
+
+### `ProjectBuildConfig`
+
+```rust
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct ProjectBuildConfig {
+ pub name: String,
+ pub path: String,
+ pub build_system: BuildSystem,
+ pub build_timeout_seconds: u16,
+ pub configure_args: Vec<String>,
+ pub build_args: Vec<String>,
+ pub test_command: Option<Vec<String>>,
+}
+```
+
+Each sub-project in the monorepo has a `ProjectBuildConfig` that specifies:
+- **name**: Human-readable project name (e.g., `"meshmc"`, `"mnv"`)
+- **path**: Relative path within the repository
+- **build_system**: Which build system to use
+- **build_timeout_seconds**: Maximum time allowed for the build
+- **configure_args**: Arguments passed to the configure step
+- **build_args**: Arguments passed to the build step
+- **test_command**: Custom test command (overrides the default for the build system)
+
+### `BuildExecutor`
+
+```rust
+#[derive(Clone, Debug)]
+pub struct BuildExecutor {
+ pub build_timeout: u16,
+}
+
+impl BuildExecutor {
+ pub fn new(build_timeout: u16) -> Self {
+ Self { build_timeout }
+ }
+}
+```
+
+The `BuildExecutor` is created from the configuration with a minimum timeout
+of 300 seconds:
+
+```rust
+// config.rs
+impl Config {
+ pub fn build_executor(&self) -> BuildExecutor {
+ if self.build.build_timeout_seconds < 300 {
+ error!(?self.build.build_timeout_seconds,
+ "Please set build_timeout_seconds to at least 300");
+ panic!();
+ }
+ BuildExecutor::new(self.build.build_timeout_seconds)
+ }
+}
+```
+
+---
+
+## Build Commands Per System
+
+### CMake
+
+```rust
+fn build_command(&self, project_dir: &Path, config: &ProjectBuildConfig) -> Command {
+ let build_dir = project_dir.join("build");
+ let mut cmd = Command::new("cmake");
+ cmd.arg("--build").arg(&build_dir);
+ cmd.args(["--config", "Release"]);
+ for arg in &config.build_args { cmd.arg(arg); }
+ cmd.current_dir(project_dir);
+ cmd
+}
+```
+
+Test command (default):
+```rust
+let mut cmd = Command::new("ctest");
+cmd.arg("--test-dir").arg("build");
+cmd.args(["--output-on-failure"]);
+```
+
+### Meson
+
+```rust
+let mut cmd = Command::new("meson");
+cmd.arg("compile");
+cmd.args(["-C", "build"]);
+```
+
+Test:
+```rust
+let mut cmd = Command::new("meson");
+cmd.arg("test").args(["-C", "build"]);
+```
+
+### Autotools / Make
+
+```rust
+let mut cmd = Command::new("make");
+cmd.args(["-j", &num_cpus().to_string()]);
+```
+
+Test:
+```rust
+let mut cmd = Command::new("make");
+cmd.arg("check");
+```
+
+### Cargo
+
+```rust
+let mut cmd = Command::new("cargo");
+cmd.arg("build").arg("--release");
+```
+
+Test:
+```rust
+let mut cmd = Command::new("cargo");
+cmd.arg("test");
+```
+
+### Gradle
+
+```rust
+let gradlew = project_dir.join("gradlew");
+let prog = if gradlew.exists() {
+ gradlew.to_string_lossy().to_string()
+} else {
+ "gradle".to_string()
+};
+let mut cmd = Command::new(prog);
+cmd.arg("build");
+```
+
+Gradle prefers the wrapper (`gradlew`) if present.
+
+### Custom
+
+```rust
+let mut cmd = Command::new("sh");
+cmd.args(["-c", command]);
+```
+
+---
+
+## Build Execution Methods
+
+### Synchronous Build
+
+```rust
+impl BuildExecutor {
+ pub fn build_project(
+ &self, project_root: &Path, config: &ProjectBuildConfig,
+ ) -> Result<fs::File, fs::File> {
+ let project_dir = project_root.join(&config.path);
+ let cmd = self.build_command(&project_dir, config);
+ self.run(cmd, true)
+ }
+}
+```
+
+Returns `Ok(File)` with stdout/stderr on success, `Err(File)` on failure.
+The `File` contains the captured output.
+
+### Asynchronous Build
+
+```rust
+impl BuildExecutor {
+ pub fn build_project_async(
+ &self, project_root: &Path, config: &ProjectBuildConfig,
+ ) -> SpawnedAsyncCmd {
+ let project_dir = project_root.join(&config.path);
+ let cmd = self.build_command(&project_dir, config);
+ AsyncCmd::new(cmd).spawn()
+ }
+}
+```
+
+Returns a `SpawnedAsyncCmd` that allows streaming output line-by-line.
+
+### Test Execution
+
+```rust
+impl BuildExecutor {
+ pub fn test_project(
+ &self, project_root: &Path, config: &ProjectBuildConfig,
+ ) -> Result<fs::File, fs::File> {
+ let project_dir = project_root.join(&config.path);
+ let cmd = self.test_command(&project_dir, config);
+ self.run(cmd, true)
+ }
+}
+```
+
+If `config.test_command` is set, it is used directly. Otherwise, the default
+test command for the build system is used.
+
+---
+
+## Async Command Execution (`asynccmd.rs`)
+
+The `AsyncCmd` abstraction wraps `std::process::Command` to provide:
+- Non-blocking output streaming via channels
+- Separate stderr/stdout capture
+- Exit status monitoring
+
+```rust
+pub struct AsyncCmd {
+ command: Command,
+}
+
+pub struct SpawnedAsyncCmd {
+ waiter: JoinHandle<Option<Result<ExitStatus, io::Error>>>,
+ rx: Receiver<String>,
+}
+```
+
+### Spawning
+
+```rust
+impl AsyncCmd {
+ pub fn new(cmd: Command) -> AsyncCmd {
+ AsyncCmd { command: cmd }
+ }
+
+ pub fn spawn(mut self) -> SpawnedAsyncCmd {
+ let mut child = self.command
+ .stdin(Stdio::null())
+ .stderr(Stdio::piped())
+ .stdout(Stdio::piped())
+ .spawn()
+ .unwrap();
+
+ // Sets up channels and monitoring threads
+ // ...
+ }
+}
+```
+
+The spawn implementation:
+1. Creates a `sync_channel` for output lines (buffer size: 30).
+2. Spawns a reader thread for stdout.
+3. Spawns a reader thread for stderr.
+4. Spawns a waiter thread for each (stdout thread, stderr thread, child process).
+5. Returns a `SpawnedAsyncCmd` whose `rx` receiver yields lines as they arrive.
+
+```rust
+fn reader_tx<R: 'static + Read + Send>(
+ read: R, tx: SyncSender<String>,
+) -> thread::JoinHandle<()> {
+ let read = BufReader::new(read);
+ thread::spawn(move || {
+ for line in read.lines() {
+ let to_send = match line {
+ Ok(line) => line,
+ Err(e) => {
+ error!("Error reading data in reader_tx: {:?}", e);
+ "Non-UTF8 data omitted from the log.".to_owned()
+ }
+ };
+ if let Err(e) = tx.send(to_send) {
+ error!("Failed to send log line: {:?}", e);
+ }
+ }
+ })
+}
+```
+
+The channel buffer size is intentionally small (30) to apply backpressure:
+
+```rust
+const OUT_CHANNEL_BUFFER_SIZE: usize = 30;
+```
+
+---
+
+## The Builder Binary
+
+### Entry Point
+
+```rust
+// src/bin/builder.rs
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn Error>> {
+ tickborg::setup_log();
+
+ let arg = env::args().nth(1).unwrap_or_else(|| panic!("usage: ..."));
+ let cfg = config::load(arg.as_ref());
+
+ let conn = easylapin::from_config(&builder_cfg.rabbitmq).await?;
+ let mut handles: Vec<Pin<Box<dyn Future<Output = ()> + Send>>> = Vec::new();
+
+ for system in &cfg.build.system {
+ handles.push(create_handle(&conn, &cfg, system.to_string()).await?);
+ }
+
+ future::join_all(handles).await;
+}
+```
+
+The builder creates one consumer handle per configured system. This allows a
+single builder process to serve multiple architectures (e.g., `x86_64-linux`
+and `aarch64-linux`).
+
+### Channel Setup
+
+```rust
+async fn create_handle(
+ conn: &lapin::Connection, cfg: &config::Config, system: String,
+) -> Result<Pin<Box<dyn Future<Output = ()> + Send>>, Box<dyn Error>> {
+ let mut chan = conn.create_channel().await?;
+ let cloner = checkout::cached_cloner(Path::new(&cfg.checkout.root));
+ let build_executor = cfg.build_executor();
+
+ // Declare build-jobs exchange (Fanout)
+ chan.declare_exchange(/* build-jobs, Fanout */);
+
+ // Declare and bind the system-specific queue
+ let queue_name = format!("build-inputs-{system}");
+ chan.declare_queue(/* queue_name, durable */);
+ chan.bind_queue(/* queue_name ← build-jobs */);
+
+ // Start consuming
+ let handle = easylapin::NotifyChannel(chan).consume(
+ tasks::build::BuildWorker::new(
+ cloner, build_executor, system, cfg.runner.identity.clone()
+ ),
+ easyamqp::ConsumeConfig {
+ queue: queue_name,
+ consumer_tag: format!("{}-builder", cfg.whoami()),
+ no_local: false, no_ack: false, no_wait: false, exclusive: false,
+ },
+ ).await?;
+
+ Ok(handle)
+}
+```
+
+### Development Mode (`build_all_jobs`)
+
+When `runner.build_all_jobs` is set to `true`, the builder creates an
+exclusive, auto-delete queue instead of the named durable one:
+
+```rust
+if cfg.runner.build_all_jobs != Some(true) {
+ // Normal: named durable queue
+ let queue_name = format!("build-inputs-{system}");
+ chan.declare_queue(QueueConfig { durable: true, exclusive: false, ... });
+} else {
+ // Dev mode: ephemeral queue (receives ALL jobs)
+ warn!("Building all jobs, please don't use this unless ...");
+ chan.declare_queue(QueueConfig { durable: false, exclusive: true, auto_delete: true, ... });
+}
+```
+
+---
+
+## The `BuildWorker`
+
+```rust
+// tasks/build.rs
+pub struct BuildWorker {
+ cloner: checkout::CachedCloner,
+ build_executor: buildtool::BuildExecutor,
+ system: String,
+ identity: String,
+}
+
+impl BuildWorker {
+ pub fn new(
+ cloner: checkout::CachedCloner,
+ build_executor: buildtool::BuildExecutor,
+ system: String,
+ identity: String,
+ ) -> BuildWorker { ... }
+}
+```
+
+The `BuildWorker` implements `SimpleNotifyWorker`, meaning it receives a
+`NotificationReceiver` that allows it to stream log lines back during
+processing.
+
+---
+
+## `JobActions` — The Streaming Helper
+
+`JobActions` wraps the build job context and provides methods for logging and
+reporting:
+
+```rust
+pub struct JobActions {
+ system: String,
+ identity: String,
+ receiver: Arc<dyn NotificationReceiver + Send + Sync>,
+ job: buildjob::BuildJob,
+ line_counter: AtomicU64,
+ snippet_log: parking_lot::RwLock<VecDeque<String>>,
+ attempt_id: String,
+ log_exchange: Option<String>,
+ log_routing_key: Option<String>,
+ result_exchange: Option<String>,
+ result_routing_key: Option<String>,
+}
+```
+
+### Attempt ID
+
+Each build execution gets a unique UUID v4 `attempt_id`:
+
+```rust
+attempt_id: Uuid::new_v4().to_string(),
+```
+
+### Snippet Log
+
+The last 10 lines of output are kept in a ring buffer for inclusion in the
+build result:
+
+```rust
+snippet_log: parking_lot::RwLock::new(VecDeque::with_capacity(10)),
+```
+
+### Log Streaming
+
+```rust
+impl JobActions {
+ pub async fn log_line(&self, line: String) {
+ self.line_counter.fetch_add(1, Ordering::SeqCst);
+
+ // Update snippet ring buffer
+ {
+ let mut snippet_log = self.snippet_log.write();
+ if snippet_log.len() >= 10 {
+ snippet_log.pop_front();
+ }
+ snippet_log.push_back(line.clone());
+ }
+
+ let msg = buildlogmsg::BuildLogMsg {
+ identity: self.identity.clone(),
+ system: self.system.clone(),
+ attempt_id: self.attempt_id.clone(),
+ line_number: self.line_counter.load(Ordering::SeqCst),
+ output: line,
+ };
+
+ self.tell(worker::publish_serde_action(
+ self.log_exchange.clone(),
+ self.log_routing_key.clone(),
+ &msg,
+ )).await;
+ }
+}
+```
+
+Each log line is published as a `BuildLogMsg` to the `logs` exchange in
+real-time. The `line_counter` uses `AtomicU64` for thread-safe incrementing.
+
+### Build Start Notification
+
+```rust
+pub async fn log_started(&self, can_build: Vec<String>, cannot_build: Vec<String>) {
+ let msg = buildlogmsg::BuildLogStart {
+ identity: self.identity.clone(),
+ system: self.system.clone(),
+ attempt_id: self.attempt_id.clone(),
+ attempted_attrs: Some(can_build),
+ skipped_attrs: Some(cannot_build),
+ };
+ self.tell(worker::publish_serde_action(
+ self.log_exchange.clone(), self.log_routing_key.clone(), &msg,
+ )).await;
+}
+```
+
+### Build Result Reporting
+
+```rust
+pub async fn merge_failed(&self) {
+ let msg = BuildResult::V1 {
+ tag: V1Tag::V1,
+ repo: self.job.repo.clone(),
+ pr: self.job.pr.clone(),
+ system: self.system.clone(),
+ output: vec![String::from("Merge failed")],
+ attempt_id: self.attempt_id.clone(),
+ request_id: self.job.request_id.clone(),
+ attempted_attrs: None,
+ skipped_attrs: None,
+ status: BuildStatus::Failure,
+ push: self.job.push.clone(),
+ };
+
+ self.tell(worker::publish_serde_action(
+ self.result_exchange.clone(),
+ self.result_routing_key.clone(),
+ &msg,
+ )).await;
+ self.tell(worker::Action::Ack).await;
+}
+```
+
+### Other Status Methods
+
+```rust
+impl JobActions {
+ pub async fn pr_head_missing(&self) { self.tell(Action::Ack).await; }
+ pub async fn commit_missing(&self) { self.tell(Action::Ack).await; }
+ pub async fn nothing_to_do(&self) { self.tell(Action::Ack).await; }
+ pub async fn merge_failed(&self) { /* publish Failure + Ack */ }
+ pub async fn log_started(&self, ...) { /* publish BuildLogStart */ }
+ pub async fn log_line(&self, line) { /* publish BuildLogMsg */ }
+ pub async fn log_instantiation_errors(&self, ...) { /* log each error */ }
+ pub fn log_snippet(&self) -> Vec<String> { /* return last 10 lines */ }
+}
+```
+
+---
+
+## Build Flow
+
+1. **Receive** `BuildJob` from queue
+2. **Clone** repository (using `CachedCloner`)
+3. **Checkout** target branch
+4. **Fetch** PR (if PR-triggered)
+5. **Merge** PR into target branch
+6. **Determine** which attrs can build on this system
+7. **Log start** (`BuildLogStart` message)
+8. **For each attr**:
+ a. Execute build command
+ b. Stream output lines (`BuildLogMsg` messages)
+ c. Check exit status
+9. **Publish result** (`BuildResult` with `BuildStatus`)
+10. **Ack** the original message
+
+---
+
+## Project Detection
+
+The `detect_changed_projects` function in `buildtool.rs` maps changed files
+to project names:
+
+```rust
+pub fn detect_changed_projects(changed_files: &[String]) -> Vec<String>;
+```
+
+It examines the first path component of each changed file and matches it
+against known project directories in the monorepo.
+
+The `find_project` function looks up a project by name:
+
+```rust
+pub fn find_project(name: &str) -> Option<ProjectBuildConfig>;
+```
+
+---
+
+## Build Timeout
+
+The build timeout is enforced at the configuration level:
+
+```rust
+pub struct BuildConfig {
+ pub system: Vec<String>,
+ pub build_timeout_seconds: u16,
+ pub extra_env: Option<HashMap<String, String>>,
+}
+```
+
+The minimum is 300 seconds (5 minutes). This is validated at startup:
+
+```rust
+if self.build.build_timeout_seconds < 300 {
+ error!("Please set build_timeout_seconds to at least 300");
+ panic!();
+}
+```
+
+When a build times out, the result status is set to `BuildStatus::TimedOut`.
+
+---
+
+## NixOS Service Configuration
+
+The builder has special systemd resource limits:
+
+```nix
+# service.nix
+"tickborg-builder" = mkTickborgService "Builder" {
+ binary = "builder";
+ serviceConfig = {
+ MemoryMax = "8G";
+ CPUQuota = "400%";
+ };
+};
+```
+
+The `CPUQuota = "400%"` allows the builder to use up to 4 CPU cores.
+
+The service PATH includes build tools:
+
+```nix
+path = with pkgs; [
+ git bash cmake gnumake gcc pkg-config
+ meson ninja
+ autoconf automake libtool
+ jdk17
+ rustc cargo
+];
+```
diff --git a/docs/handbook/ofborg/building.md b/docs/handbook/ofborg/building.md
new file mode 100644
index 0000000000..622be96356
--- /dev/null
+++ b/docs/handbook/ofborg/building.md
@@ -0,0 +1,530 @@
+# Tickborg — Building
+
+## Prerequisites
+
+| Prerequisite | Minimum Version | Notes |
+|-------------|-----------------|-------|
+| Rust | Edition 2024 | `rustup default stable` |
+| Cargo | Latest stable | Comes with Rust |
+| Git | 2.x | For repository cloning |
+| pkg-config | Any | Native dependency resolution |
+| CMake | 3.x | If building CMake-based sub-projects |
+| OpenSSL / rustls | — | TLS for AMQP + GitHub API |
+
+---
+
+## Quick Build
+
+```bash
+cd ofborg
+cargo build --workspace
+```
+
+This compiles both workspace members:
+- `tickborg` (main crate — library + 11 binaries)
+- `tickborg-simple-build` (simplified build tool)
+
+### Release Build
+
+```bash
+cargo build --workspace --release
+```
+
+The release profile includes debug symbols (`debug = true` in workspace
+`Cargo.toml`) so that backtraces are readable in production.
+
+### Build Individual Binaries
+
+```bash
+# Build only the webhook receiver
+cargo build -p tickborg --bin github-webhook-receiver
+
+# Build only the builder
+cargo build -p tickborg --bin builder
+
+# Build only the mass rebuilder
+cargo build -p tickborg --bin mass-rebuilder
+```
+
+### List All Binary Targets
+
+```bash
+cargo build -p tickborg --bins 2>&1 | head -20
+# Or:
+ls tickborg/src/bin/
+```
+
+Available binaries:
+
+| Binary | Source File |
+|--------|-----------|
+| `build-faker` | `src/bin/build-faker.rs` |
+| `builder` | `src/bin/builder.rs` |
+| `evaluation-filter` | `src/bin/evaluation-filter.rs` |
+| `github-comment-filter` | `src/bin/github-comment-filter.rs` |
+| `github-comment-poster` | `src/bin/github-comment-poster.rs` |
+| `github-webhook-receiver` | `src/bin/github-webhook-receiver.rs` |
+| `log-message-collector` | `src/bin/log-message-collector.rs` |
+| `logapi` | `src/bin/logapi.rs` |
+| `mass-rebuilder` | `src/bin/mass-rebuilder.rs` |
+| `push-filter` | `src/bin/push-filter.rs` |
+| `stats` | `src/bin/stats.rs` |
+
+---
+
+## Cargo.toml — Dependencies Deep Dive
+
+### `tickborg/Cargo.toml`
+
+```toml
+[package]
+name = "tickborg"
+version = "0.1.0"
+authors = ["Project Tick Contributors"]
+build = "build.rs"
+edition = "2024"
+description = "Distributed CI bot for Project Tick monorepo"
+license = "MIT"
+```
+
+### Core Dependencies
+
+#### Async Runtime & Networking
+
+```toml
+tokio = { version = "1", features = ["rt-multi-thread", "net", "macros", "sync"] }
+tokio-stream = "0.1"
+futures = "0.3.31"
+futures-util = "0.3.31"
+async-trait = "0.1.89"
+```
+
+- **tokio**: The async runtime. `rt-multi-thread` enables the work-stealing
+ scheduler. `net` provides TCP listeners. `macros` enables `#[tokio::main]`.
+ `sync` provides `RwLock`, `Mutex`, etc.
+- **tokio-stream**: `StreamExt` for consuming lapin message streams.
+- **futures / futures-util**: `join_all`, `TryFutureExt`, and stream utilities.
+- **async-trait**: Enables `async fn` in trait definitions (used by
+ `SimpleNotifyWorker` and `NotificationReceiver`).
+
+#### AMQP Client
+
+```toml
+lapin = "4.3.0"
+```
+
+- **lapin**: Pure-Rust AMQP 0-9-1 client. Provides `Connection`, `Channel`,
+ `Consumer`, publish/consume/ack/nack operations. Built on tokio.
+
+#### HTTP Server
+
+```toml
+hyper = { version = "1.0", features = ["full", "server", "http1"] }
+hyper-util = { version = "0.1", features = ["server", "tokio", "http1"] }
+http = "1"
+http-body-util = "0.1"
+```
+
+- **hyper**: The webhook receiver and logapi/stats HTTP servers use hyper 1.0
+ directly (no framework). `http1` feature is sufficient — no HTTP/2 needed.
+- **hyper-util**: `TokioIo` adapter and server utilities.
+- **http**: Standard HTTP types (`StatusCode`, `Method`, `Request`, `Response`).
+- **http-body-util**: `Full<Bytes>` response body, `BodyExt` for collecting
+ incoming bodies.
+
+#### GitHub API
+
+```toml
+hubcaps = { git = "https://github.com/ofborg/hubcaps.git", rev = "0d7466e..." }
+```
+
+- **hubcaps**: GitHub REST API client. The custom fork adds
+ `Conclusion::Skipped` for check runs. Provides:
+ - `Github` client
+ - `Credentials` (Client OAuth, JWT, InstallationToken)
+ - `JWTCredentials`, `InstallationTokenGenerator`
+ - Repository, Pull Request, Issue, Statuses, Check Runs APIs
+
+#### Serialization
+
+```toml
+serde = { version = "1.0.217", features = ["derive"] }
+serde_json = "1.0.135"
+```
+
+All message types, configuration, and GitHub event payloads use serde for
+JSON serialization/deserialization.
+
+#### Cryptography
+
+```toml
+hmac = "0.13.0"
+sha2 = "0.11.0"
+hex = "0.4.3"
+md5 = "0.8.0"
+```
+
+- **hmac + sha2**: HMAC-SHA256 for GitHub webhook signature verification.
+- **hex**: Hex encoding/decoding for signature comparison.
+- **md5**: Hashing repository names for cache directory names (not security-critical).
+
+#### TLS
+
+```toml
+rustls-pki-types = "1.14"
+```
+
+- Reading PEM-encoded private keys for GitHub App JWT authentication.
+
+#### Parsing
+
+```toml
+nom = "8"
+regex = "1.11.1"
+brace-expand = "0.1.0"
+```
+
+- **nom**: Parser combinator library for the `@tickbot` comment command parser.
+- **regex**: Pattern matching for PR title label extraction and commit scope
+ parsing.
+- **brace-expand**: Shell-style brace expansion (e.g., `{meshmc,mnv}`).
+
+#### Logging
+
+```toml
+tracing = "0.1.41"
+tracing-subscriber = { version = "0.3.19", features = ["json", "env-filter"] }
+```
+
+- **tracing**: Structured logging with spans and events.
+- **tracing-subscriber**: `EnvFilter` for `RUST_LOG`-based filtering, JSON
+ formatter for production logging.
+
+#### Concurrency
+
+```toml
+parking_lot = "0.12.4"
+fs2 = "0.4.3"
+```
+
+- **parking_lot**: Fast `Mutex` and `RwLock` (used for the snippet log in
+ `BuildWorker` and the `DummyNotificationReceiver` in tests).
+- **fs2**: File-based exclusive locking (`flock`) for git operations.
+
+#### Utilities
+
+```toml
+chrono = { version = "0.4.38", default-features = false, features = ["clock", "std"] }
+either = "1.13.0"
+lru-cache = "0.1.2"
+mime = "0.3"
+tempfile = "3.15.0"
+uuid = { version = "1.12", features = ["v4"] }
+```
+
+- **chrono**: Timestamps for check run `started_at` / `completed_at`.
+- **lru-cache**: LRU eviction for open log file handles in the log collector.
+- **tempfile**: Temporary files for build output capture.
+- **uuid**: v4 UUIDs for `attempt_id` and `request_id`.
+
+---
+
+## Build Script (`build.rs`)
+
+The crate has a build script at `tickborg/build.rs` that generates event
+definitions at compile time:
+
+```rust
+// tickborg/src/stats.rs
+include!(concat!(env!("OUT_DIR"), "/events.rs"));
+```
+
+The build script generates a `events.rs` file into `OUT_DIR` containing the
+`Event` enum and related metric functions used by the stats system.
+
+---
+
+## Running Tests
+
+```bash
+# Run all tests
+cargo test --workspace
+
+# Run tests for tickborg only
+cargo test -p tickborg
+
+# Run a specific test
+cargo test -p tickborg -- evaluationfilter::tests::changed_base
+
+# Run tests with output
+cargo test -p tickborg -- --nocapture
+
+# Run tests with logging
+RUST_LOG=tickborg=debug cargo test -p tickborg -- --nocapture
+```
+
+### Test Data
+
+Test fixtures are located in:
+
+```
+tickborg/test-srcs/events/ — GitHub webhook JSON payloads
+tickborg/test-scratch/ — Scratch test data
+tickborg/test-nix/ — Legacy Nix test data
+```
+
+Tests load fixtures at compile time:
+
+```rust
+let data = include_str!("../../test-srcs/events/pr-changed-base.json");
+let job: PullRequestEvent = serde_json::from_str(data).expect("...");
+```
+
+---
+
+## Linting
+
+```bash
+# Check formatting
+cargo fmt --check
+
+# Run clippy
+cargo clippy --workspace
+
+# Both (as defined in the dev shell)
+cargo fmt && cargo clippy
+```
+
+The dev shell sets `RUSTFLAGS = "-D warnings"` so that all warnings are treated
+as errors in CI.
+
+Known clippy allowances in the codebase:
+
+```rust
+#![allow(clippy::redundant_closure)] // lib.rs — readability preference
+#[allow(clippy::cognitive_complexity)] // githubcommentfilter — complex match
+#[allow(clippy::too_many_arguments)] // OneEval::new
+#[allow(clippy::upper_case_acronyms)] // Subset::Project
+#[allow(clippy::vec_init_then_push)] // githubcommentposter — readability
+```
+
+---
+
+## Nix-Based Build
+
+### Dev Shell
+
+```bash
+nix develop ./ofborg
+```
+
+This provides:
+
+```nix
+nativeBuildInputs = with pkgs; [
+ bash rustc cargo clippy rustfmt pkg-config git cmake
+];
+
+RUSTFLAGS = "-D warnings";
+RUST_BACKTRACE = "1";
+RUST_LOG = "tickborg=debug";
+```
+
+The dev shell also defines a `checkPhase` function:
+
+```bash
+checkPhase() (
+ cd ofborg
+ set -x
+ cargo fmt
+ git diff --exit-code
+ cargo clippy
+ cargo build && cargo test
+)
+```
+
+### Nix Package
+
+```bash
+nix build ./ofborg#tickborg
+```
+
+The flake defines a `rustPlatform.buildRustPackage` derivation:
+
+```nix
+pkg = pkgs.rustPlatform.buildRustPackage {
+ name = "tickborg";
+ src = pkgs.nix-gitignore.gitignoreSource [ ] ./.;
+ nativeBuildInputs = with pkgs; [ pkg-config pkgs.rustPackages.clippy ];
+ preBuild = ''cargo clippy'';
+ doCheck = false;
+ cargoLock = {
+ lockFile = ./Cargo.lock;
+ outputHashes = {
+ "hubcaps-0.6.2" = "sha256-Vl4wQIKQVRxkpQxL8fL9rndAN3TKLV4OjgnZOpT6HRo=";
+ };
+ };
+};
+```
+
+The `outputHashes` entry pins the git-sourced `hubcaps` dependency for
+reproducible builds.
+
+---
+
+## Docker Build
+
+```bash
+cd ofborg
+docker build -t tickborg .
+```
+
+The `Dockerfile` performs a multi-stage build:
+
+1. **Builder stage**: Compiles all binaries in release mode.
+2. **Runtime stage**: Copies only the compiled binaries and necessary runtime
+ dependencies.
+
+For the full stack:
+
+```bash
+docker compose build
+docker compose up -d
+```
+
+See [deployment.md](deployment.md) for production Docker usage.
+
+---
+
+## Dependency Management
+
+### Updating Dependencies
+
+```bash
+cargo update # Update all deps within semver ranges
+cargo update -p lapin # Update a specific dependency
+```
+
+### The Lockfile
+
+`Cargo.lock` is checked into version control because tickborg produces binaries.
+This ensures reproducible builds across all environments.
+
+### Git Dependencies
+
+```toml
+hubcaps = { git = "https://github.com/ofborg/hubcaps.git", rev = "0d7466e..." }
+```
+
+This is pinned to a specific commit for stability. When the upstream fork is
+updated, change the `rev` and update the Nix `outputHashes` accordingly.
+
+### Patching Dependencies
+
+The workspace `Cargo.toml` has commented-out patch sections:
+
+```toml
+[patch.crates-io]
+#hubcaps = { path = "../hubcaps" }
+#amq-proto = { path = "rust-amq-proto" }
+```
+
+Uncomment these to develop against local checkouts of forked dependencies.
+
+---
+
+## Build Output
+
+After `cargo build --release`, binaries are located at:
+
+```
+ofborg/target/release/build-faker
+ofborg/target/release/builder
+ofborg/target/release/evaluation-filter
+ofborg/target/release/github-comment-filter
+ofborg/target/release/github-comment-poster
+ofborg/target/release/github-webhook-receiver
+ofborg/target/release/log-message-collector
+ofborg/target/release/logapi
+ofborg/target/release/mass-rebuilder
+ofborg/target/release/push-filter
+ofborg/target/release/stats
+```
+
+Each binary is self-contained and takes a single argument: the path to the
+configuration JSON file.
+
+```bash
+./target/release/builder /etc/tickborg/config.json
+```
+
+---
+
+## Cross-Compilation
+
+The flake supports building on:
+
+```nix
+supportedSystems = [
+ "aarch64-darwin"
+ "x86_64-darwin"
+ "x86_64-linux"
+ "aarch64-linux"
+];
+```
+
+On macOS, additional build inputs are needed:
+
+```nix
+buildInputs = with pkgs; lib.optionals stdenv.isDarwin [
+ darwin.Security
+ libiconv
+];
+```
+
+---
+
+## Incremental Compilation Tips
+
+1. **Use `cargo check` for fast feedback**: Skips codegen, only type-checks.
+2. **Set `CARGO_INCREMENTAL=1`**: Enabled by default in debug builds.
+3. **Use `sccache`**: `RUSTC_WRAPPER=sccache cargo build` for cached
+ compilation across clean builds.
+4. **Link with `mold`**: On Linux, add to `.cargo/config.toml`:
+ ```toml
+ [target.x86_64-unknown-linux-gnu]
+ linker = "clang"
+ rustflags = ["-C", "link-arg=-fuse-ld=mold"]
+ ```
+
+---
+
+## Troubleshooting
+
+### `error[E0554]: #![feature] may not be used on the stable release channel`
+
+You're using an older Rust. Tickborg requires Edition 2024 features. Run:
+```bash
+rustup update stable
+```
+
+### `hubcaps` build failure
+
+The git dependency needs network access on first build. Ensure the rev is
+reachable:
+```bash
+git ls-remote https://github.com/ofborg/hubcaps.git 0d7466e
+```
+
+### Linking errors on macOS
+
+Ensure Xcode Command Line Tools are installed:
+```bash
+xcode-select --install
+```
+
+### `lapin` connection failures at runtime
+
+This is a runtime issue, not a build issue. Ensure RabbitMQ is running and
+the config file points to the correct host. See
+[configuration.md](configuration.md).
diff --git a/docs/handbook/ofborg/code-style.md b/docs/handbook/ofborg/code-style.md
new file mode 100644
index 0000000000..25f0d228d3
--- /dev/null
+++ b/docs/handbook/ofborg/code-style.md
@@ -0,0 +1,332 @@
+# Tickborg — Code Style & Conventions
+
+## Rust Edition and Toolchain
+
+- **Edition**: 2024
+- **Resolver**: Cargo workspace resolver v2
+- **MSRV**: Not pinned — follows latest stable
+
+---
+
+## Module Organization
+
+### Top-Level Layout
+
+```
+tickborg/src/
+├── lib.rs # Public API, module declarations, setup_log()
+├── config.rs # Configuration loading and types
+├── worker.rs # SimpleWorker trait, Action enum
+├── notifyworker.rs # SimpleNotifyWorker trait
+├── easyamqp.rs # AMQP abstraction types
+├── easylapin.rs # lapin-based AMQP implementations
+├── acl.rs # Access control
+├── systems.rs # Platform/architecture definitions
+├── commentparser.rs # @tickbot command parser (nom)
+├── checkout.rs # Git clone/checkout/merge
+├── buildtool.rs # Build system detection
+├── commitstatus.rs # GitHub commit status wrapper
+├── tagger.rs # PR label generation
+├── clone.rs # Low-level git operations
+├── locks.rs # File-based locking
+├── asynccmd.rs # Async subprocess execution
+├── evalchecker.rs # Generic command runner
+├── stats.rs # Metrics collection trait
+├── writetoline.rs # Line-targeted file writing
+├── bin/ # Binary entry points (11 files)
+├── tasks/ # Worker implementations
+├── message/ # AMQP message types
+├── ghevent/ # GitHub webhook event types
+└── eval/ # Evaluation strategies
+```
+
+### Convention: One Trait Per File
+
+Worker-related traits each get their own file:
+- `worker.rs` → `SimpleWorker`
+- `notifyworker.rs` → `SimpleNotifyWorker`
+
+### Convention: `mod.rs` in Sub-Modules
+
+Sub-directories use `mod.rs` for re-exports:
+
+```rust
+// message/mod.rs
+pub mod buildjob;
+pub mod buildresult;
+pub mod evaluationjob;
+pub mod buildlogmsg;
+pub mod common;
+```
+
+---
+
+## Naming Conventions
+
+### Types
+
+| Pattern | Example |
+|---------|---------|
+| Worker structs | `BuildWorker`, `EvaluationFilterWorker` |
+| Config structs | `RabbitMqConfig`, `BuilderConfig` |
+| Message structs | `BuildJob`, `BuildResult`, `EvaluationJob` |
+| Event structs | `PullRequestEvent`, `IssueComment`, `PushEvent` |
+| Enums | `BuildStatus`, `ExchangeType`, `System` |
+
+### Functions
+
+| Pattern | Example |
+|---------|---------|
+| Constructors | `new()`, `from_config()` |
+| Predicates | `is_tag()`, `is_delete()`, `is_zero_sha()` |
+| Accessors | `branch()`, `name()` |
+| Actions | `set_with_description()`, `analyze_changes()` |
+
+### Constants
+
+```rust
+pub const VERSION: &str = env!("CARGO_PKG_VERSION");
+```
+
+Upper-case `SCREAMING_SNAKE_CASE` for constants.
+
+---
+
+## Async Patterns
+
+### `async fn` in Traits
+
+Tickborg uses Rust 2024 edition which supports `async fn` in traits natively
+via `impl Future` return types:
+
+```rust
+pub trait SimpleWorker: Send {
+ type J: Send;
+
+ fn msg_to_job(/* ... */) -> impl Future<Output = Result<Self::J, String>> + Send;
+ fn consumer(&mut self, job: &Self::J) -> impl Future<Output = Actions> + Send;
+}
+```
+
+### Tokio Runtime
+
+All binaries use the multi-threaded Tokio runtime:
+
+```rust
+#[tokio::main]
+async fn main() {
+ // ...
+}
+```
+
+### `RwLock` for Shared State
+
+The `GithubAppVendingMachine` is wrapped in `tokio::sync::RwLock` to allow
+concurrent read access to cached tokens:
+
+```rust
+pub struct EvaluationWorker<E> {
+ github_vend: tokio::sync::RwLock<GithubAppVendingMachine>,
+ // ...
+}
+```
+
+---
+
+## Error Handling
+
+### Pattern: Enum-Based Errors
+
+```rust
+#[derive(Debug)]
+pub enum CommitStatusError {
+ ExpiredCreds(String),
+ MissingSha(String),
+ InternalError(String),
+ Error(String),
+}
+```
+
+### Pattern: String Errors for Worker Actions
+
+Worker methods return `Result<_, String>` for simplicity — the error message
+is logged and the job is acked or nacked.
+
+### Pattern: `unwrap_or_else` with `panic!` for Config
+
+```rust
+let config_str = std::fs::read_to_string(&path)
+ .unwrap_or_else(|e| panic!("Failed to read: {e}"));
+```
+
+Configuration errors are unrecoverable — panic is appropriate at startup.
+
+---
+
+## Serialization
+
+### Serde Conventions
+
+```rust
+// snake_case field renaming
+#[derive(Deserialize, Debug)]
+#[serde(rename_all = "snake_case")]
+pub enum PullRequestAction {
+ Opened,
+ Closed,
+ Synchronize,
+ // ...
+}
+
+// Optional fields
+#[derive(Deserialize, Debug)]
+pub struct Config {
+ pub builder: Option<BuilderConfig>,
+ // ...
+}
+
+// Default values
+#[derive(Deserialize, Debug)]
+pub struct QueueConfig {
+ #[serde(default = "default_true")]
+ pub durable: bool,
+}
+```
+
+### JSON Message Format
+
+All AMQP messages are `serde_json::to_vec()`:
+
+```rust
+pub fn publish_serde_action<T: Serialize>(
+ exchange: Option<String>,
+ routing_key: Option<String>,
+ msg: &T,
+) -> Action {
+ Action::Publish(QueueMsg {
+ exchange,
+ routing_key,
+ content: serde_json::to_vec(msg).unwrap(),
+ })
+}
+```
+
+---
+
+## Testing Patterns
+
+### Unit Tests in Module Files
+
+```rust
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_parse_build_command() {
+ let result = parse("@tickbot build meshmc");
+ assert_eq!(result, vec![Instruction::Build(
+ vec!["meshmc".to_owned()],
+ Subset::Project,
+ )]);
+ }
+}
+```
+
+### The `build-faker` Binary
+
+```rust
+// bin/build-faker.rs
+```
+
+A test utility that simulates a builder without actually running builds.
+Useful for testing the AMQP pipeline end-to-end.
+
+---
+
+## Logging
+
+### `tracing` Macros
+
+```rust
+use tracing::{info, warn, error, debug, trace};
+
+info!("Starting webhook receiver on port {}", port);
+warn!("Token expired, refreshing");
+error!("Failed to decode message: {}", err);
+debug!(routing_key = %key, "Received message");
+```
+
+### Structured Fields
+
+```rust
+tracing::info!(
+ pr = %job.pr.number,
+ repo = %job.repo.full_name,
+ project = %project_name,
+ "Starting build"
+);
+```
+
+---
+
+## Git Operations
+
+### `CachedCloner` Pattern
+
+All git operations go through the `CachedCloner` → `CachedProject` →
+`CachedProjectCo` chain:
+
+```rust
+let cloner = CachedCloner::new(checkout_root, 3); // 3 concurrent clones max
+let project = cloner.project("owner/repo", clone_url);
+let co = project.clone_for("purpose".into(), identity.into())?;
+co.fetch_pr(42)?;
+co.merge_commit(OsStr::new("pr"))?;
+```
+
+### File Locking
+
+```rust
+// locks.rs
+pub struct LockFile {
+ path: PathBuf,
+ file: Option<File>,
+}
+
+impl LockFile {
+ pub fn lock(path: &Path) -> Result<Self, io::Error>;
+}
+
+impl Drop for LockFile {
+ fn drop(&mut self) {
+ // Release lock automatically
+ }
+}
+```
+
+---
+
+## Clippy and Formatting
+
+```bash
+# Format
+cargo fmt --all
+
+# Lint
+cargo clippy --all-targets --all-features -- -D warnings
+```
+
+The CI pipeline enforces both. The workspace `Cargo.toml` does not set custom
+clippy lints — the defaults plus `-D warnings` are used.
+
+---
+
+## Dependencies Policy
+
+- **Minimal external crates** — only well-maintained crates with clear purpose.
+- **Pinned git dependencies** — the `hubcaps` fork is pinned to a specific rev.
+- **Feature-gated Tokio** — only `rt-multi-thread`, `net`, `macros`, `sync`.
+- **No `unwrap()` in library code** — except config loading at startup.
+- **Release profile**: `debug = true` is set to include debug symbols in
+ release builds for better crash diagnostics.
diff --git a/docs/handbook/ofborg/configuration.md b/docs/handbook/ofborg/configuration.md
new file mode 100644
index 0000000000..143ac75f8e
--- /dev/null
+++ b/docs/handbook/ofborg/configuration.md
@@ -0,0 +1,472 @@
+# Tickborg — Configuration Reference
+
+## Overview
+
+Tickborg is configured via a single JSON file, typically located at
+`config.json` or specified via the `CONFIG_PATH` environment variable.
+The file maps to the top-level `Config` struct in `tickborg/src/config.rs`.
+
+---
+
+## Loading Configuration
+
+```rust
+// config.rs
+pub fn load() -> Config {
+ let config_path = env::var("CONFIG_PATH")
+ .unwrap_or_else(|_| "config.json".to_owned());
+
+ let config_str = std::fs::read_to_string(&config_path)
+ .unwrap_or_else(|e| panic!("Failed to read config file {config_path}: {e}"));
+
+ serde_json::from_str(&config_str)
+ .unwrap_or_else(|e| panic!("Failed to parse config file {config_path}: {e}"))
+}
+```
+
+---
+
+## Top-Level `Config`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct Config {
+ pub identity: String,
+ pub rabbitmq: RabbitMqConfig,
+ pub github_app: Option<GithubAppConfig>,
+
+ // Per-service configs — only the relevant one needs to be present
+ pub github_webhook: Option<GithubWebhookConfig>,
+ pub log_api: Option<LogApiConfig>,
+ pub evaluation_filter: Option<EvaluationFilterConfig>,
+ pub mass_rebuilder: Option<MassRebuilderConfig>,
+ pub builder: Option<BuilderConfig>,
+ pub github_comment_filter: Option<GithubCommentFilterConfig>,
+ pub github_comment_poster: Option<GithubCommentPosterConfig>,
+ pub log_message_collector: Option<LogMessageCollectorConfig>,
+ pub push_filter: Option<PushFilterConfig>,
+ pub stats: Option<StatsConfig>,
+}
+```
+
+### `identity`
+
+A unique string identifying this instance. Used as:
+- AMQP consumer tags (`evaluation-filter-{identity}`)
+- Exclusive queue names (`build-inputs-{identity}`)
+- GitHub Check Run external ID
+
+```json
+{
+ "identity": "prod-worker-01"
+}
+```
+
+---
+
+## `RabbitMqConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct RabbitMqConfig {
+ pub ssl: bool,
+ pub host: String,
+ pub vhost: Option<String>,
+ pub username: String,
+ pub password_file: PathBuf,
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `ssl` | bool | yes | Use `amqps://` instead of `amqp://` |
+| `host` | string | yes | RabbitMQ hostname (may include port) |
+| `vhost` | string | no | Virtual host (default: `/`) |
+| `username` | string | yes | AMQP username |
+| `password_file` | path | yes | File containing the password (not the password itself) |
+
+```json
+{
+ "rabbitmq": {
+ "ssl": true,
+ "host": "rabbitmq.example.com",
+ "vhost": "tickborg",
+ "username": "tickborg",
+ "password_file": "/run/secrets/rabbitmq-password"
+ }
+}
+```
+
+> **Security**: The password is read from a file rather than stored directly
+> in the config, allowing secure credential injection via systemd credentials,
+> Docker secrets, or similar mechanisms.
+
+---
+
+## `GithubAppConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct GithubAppConfig {
+ pub app_id: u64,
+ pub private_key_file: PathBuf,
+ pub owner: String,
+ pub repo: String,
+ pub installation_id: Option<u64>,
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `app_id` | u64 | yes | GitHub App ID |
+| `private_key_file` | path | yes | PEM-encoded RSA private key |
+| `owner` | string | yes | Repository owner |
+| `repo` | string | yes | Repository name |
+| `installation_id` | u64 | no | Installation ID (auto-detected if omitted) |
+
+```json
+{
+ "github_app": {
+ "app_id": 12345,
+ "private_key_file": "/run/secrets/github-app-key.pem",
+ "owner": "project-tick",
+ "repo": "Project-Tick",
+ "installation_id": 67890
+ }
+}
+```
+
+---
+
+## Service-Specific Configs
+
+### `GithubWebhookConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct GithubWebhookConfig {
+ pub bind_address: Option<String>,
+ pub port: u16,
+ pub webhook_secret: String,
+}
+```
+
+```json
+{
+ "github_webhook": {
+ "bind_address": "0.0.0.0",
+ "port": 8080,
+ "webhook_secret": "your-webhook-secret-here"
+ }
+}
+```
+
+### `LogApiConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct LogApiConfig {
+ pub bind_address: Option<String>,
+ pub port: u16,
+ pub log_storage_path: PathBuf,
+}
+```
+
+```json
+{
+ "log_api": {
+ "port": 8081,
+ "log_storage_path": "/var/log/tickborg/builds"
+ }
+}
+```
+
+### `EvaluationFilterConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct EvaluationFilterConfig {
+ pub repos: Vec<String>,
+}
+```
+
+```json
+{
+ "evaluation_filter": {
+ "repos": [
+ "project-tick/Project-Tick"
+ ]
+ }
+}
+```
+
+### `MassRebuilderConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct MassRebuilderConfig {
+ pub checkout: CheckoutConfig,
+}
+```
+
+```json
+{
+ "mass_rebuilder": {
+ "checkout": {
+ "root": "/var/cache/tickborg/checkout"
+ }
+ }
+}
+```
+
+### `BuilderConfig` / `RunnerConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct BuilderConfig {
+ pub runner: RunnerConfig,
+ pub checkout: CheckoutConfig,
+ pub build: BuildConfig,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct RunnerConfig {
+ pub identity: Option<String>,
+ pub architectures: Vec<String>,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct BuildConfig {
+ pub timeout_seconds: u64,
+ pub log_tail_lines: usize,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct CheckoutConfig {
+ pub root: PathBuf,
+}
+```
+
+```json
+{
+ "builder": {
+ "runner": {
+ "identity": "builder-x86_64-linux",
+ "architectures": ["x86_64-linux"]
+ },
+ "checkout": {
+ "root": "/var/cache/tickborg/checkout"
+ },
+ "build": {
+ "timeout_seconds": 3600,
+ "log_tail_lines": 100
+ }
+ }
+}
+```
+
+### `GithubCommentFilterConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct GithubCommentFilterConfig {
+ pub repos: Vec<String>,
+ pub trusted_users: Option<Vec<String>>,
+}
+```
+
+```json
+{
+ "github_comment_filter": {
+ "repos": ["project-tick/Project-Tick"],
+ "trusted_users": ["maintainer1", "maintainer2"]
+ }
+}
+```
+
+### `LogMessageCollectorConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct LogMessageCollectorConfig {
+ pub log_storage_path: PathBuf,
+}
+```
+
+```json
+{
+ "log_message_collector": {
+ "log_storage_path": "/var/log/tickborg/builds"
+ }
+}
+```
+
+### `StatsConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct StatsConfig {
+ pub bind_address: Option<String>,
+ pub port: u16,
+}
+```
+
+```json
+{
+ "stats": {
+ "port": 9090
+ }
+}
+```
+
+---
+
+## Complete Example
+
+Based on `example.config.json`:
+
+```json
+{
+ "identity": "prod-01",
+ "rabbitmq": {
+ "ssl": false,
+ "host": "localhost",
+ "vhost": "tickborg",
+ "username": "tickborg",
+ "password_file": "/run/secrets/rabbitmq-password"
+ },
+ "github_app": {
+ "app_id": 12345,
+ "private_key_file": "/run/secrets/github-app-key.pem",
+ "owner": "project-tick",
+ "repo": "Project-Tick"
+ },
+ "github_webhook": {
+ "port": 8080,
+ "webhook_secret": "change-me"
+ },
+ "evaluation_filter": {
+ "repos": ["project-tick/Project-Tick"]
+ },
+ "mass_rebuilder": {
+ "checkout": {
+ "root": "/var/cache/tickborg/checkout"
+ }
+ },
+ "builder": {
+ "runner": {
+ "architectures": ["x86_64-linux"]
+ },
+ "checkout": {
+ "root": "/var/cache/tickborg/checkout"
+ },
+ "build": {
+ "timeout_seconds": 3600,
+ "log_tail_lines": 100
+ }
+ },
+ "github_comment_filter": {
+ "repos": ["project-tick/Project-Tick"]
+ },
+ "log_message_collector": {
+ "log_storage_path": "/var/log/tickborg/builds"
+ },
+ "log_api": {
+ "port": 8081,
+ "log_storage_path": "/var/log/tickborg/builds"
+ },
+ "stats": {
+ "port": 9090
+ }
+}
+```
+
+---
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `CONFIG_PATH` | `config.json` | Path to the JSON config file |
+| `RUST_LOG` | `info` | `tracing` filter directive |
+| `RUST_LOG_JSON` | (unset) | Set to `1` for structured JSON log output |
+
+### `RUST_LOG` Examples
+
+```bash
+# Default — info for everything
+RUST_LOG=info
+
+# Debug for tickborg, info for everything else
+RUST_LOG=info,tickborg=debug
+
+# Trace AMQP operations
+RUST_LOG=info,tickborg=debug,lapin=trace
+
+# Only errors
+RUST_LOG=error
+```
+
+### Logging Initialization
+
+```rust
+// lib.rs
+pub fn setup_log() {
+ let json = std::env::var("RUST_LOG_JSON").is_ok();
+
+ let subscriber = tracing_subscriber::fmt()
+ .with_env_filter(EnvFilter::from_default_env());
+
+ if json {
+ subscriber.json().init();
+ } else {
+ subscriber.init();
+ }
+}
+```
+
+---
+
+## ACL Configuration
+
+The ACL (Access Control List) is derived from the configuration and controls:
+
+- **Repository eligibility** — Which repos tickborg responds to
+- **Architecture access** — Which platforms a user can build on
+- **Unrestricted builds** — Whether a user can bypass project restrictions
+
+```rust
+// acl.rs
+pub struct Acl {
+ repos: Vec<String>,
+ trusted_users: Vec<String>,
+}
+
+impl Acl {
+ pub fn is_repo_eligible(&self, repo: &str) -> bool;
+ pub fn build_job_architectures_for_user_repo(
+ &self, user: &str, repo: &str
+ ) -> Vec<System>;
+ pub fn can_build_unrestricted(&self, user: &str, repo: &str) -> bool;
+}
+```
+
+---
+
+## Secrets Management
+
+Files containing secrets should be readable only by the tickborg service user:
+
+```bash
+# RabbitMQ password
+echo -n "secret-password" > /run/secrets/rabbitmq-password
+chmod 600 /run/secrets/rabbitmq-password
+
+# GitHub App private key
+cp github-app.pem /run/secrets/github-app-key.pem
+chmod 600 /run/secrets/github-app-key.pem
+```
+
+With NixOS and systemd `DynamicUser`, secrets can be placed in
+`/run/credentials/tickborg-*` using systemd's `LoadCredential` or
+`SetCredential` directives.
diff --git a/docs/handbook/ofborg/contributing.md b/docs/handbook/ofborg/contributing.md
new file mode 100644
index 0000000000..17d41ace76
--- /dev/null
+++ b/docs/handbook/ofborg/contributing.md
@@ -0,0 +1,326 @@
+# Tickborg — Contributing Guide
+
+## Getting Started
+
+### Prerequisites
+
+- **Rust** (latest stable) — via `rustup` or Nix
+- **RabbitMQ** — local instance for integration testing
+- **Git** — recent version with submodule support
+- **Nix** (optional) — provides a reproducible dev environment
+
+### Quick Setup with Nix
+
+```bash
+# Enter the development shell
+nix develop
+
+# This provides: cargo, rustc, clippy, rustfmt, pkg-config, openssl
+```
+
+### Manual Setup
+
+```bash
+# Install Rust
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+
+# Install system dependencies (Debian/Ubuntu)
+sudo apt install pkg-config libssl-dev
+
+# Install system dependencies (Fedora)
+sudo dnf install pkg-config openssl-devel
+
+# Install RabbitMQ (for integration testing)
+sudo apt install rabbitmq-server
+sudo systemctl start rabbitmq-server
+```
+
+---
+
+## Building
+
+```bash
+# Debug build (fast compilation)
+cargo build
+
+# Release build (optimized, includes debug symbols)
+cargo build --release
+
+# Build a specific binary
+cargo build --bin github-webhook-receiver
+```
+
+All 11 binaries are built from the `tickborg` crate. The workspace also
+includes `tickborg-simple-build` as a secondary crate.
+
+---
+
+## Running Tests
+
+```bash
+# Run all tests
+cargo test
+
+# Run tests for a specific module
+cargo test --lib commentparser
+
+# Run tests with output
+cargo test -- --nocapture
+
+# Run a specific test
+cargo test test_parse_build_command
+```
+
+---
+
+## Code Quality
+
+### Formatting
+
+```bash
+# Check formatting
+cargo fmt --all -- --check
+
+# Apply formatting
+cargo fmt --all
+```
+
+### Linting
+
+```bash
+# Run clippy with warnings as errors
+cargo clippy --all-targets --all-features -- -D warnings
+```
+
+Both checks run in CI. PRs with formatting or clippy violations will fail.
+
+---
+
+## Project Structure
+
+See [architecture.md](architecture.md) for the full module hierarchy.
+
+Key directories:
+
+| Directory | What goes here |
+|-----------|---------------|
+| `tickborg/src/bin/` | Binary entry points — one file per service |
+| `tickborg/src/tasks/` | Worker implementations |
+| `tickborg/src/message/` | AMQP message type definitions |
+| `tickborg/src/ghevent/` | GitHub webhook event types |
+| `tickborg/src/eval/` | Evaluation strategies |
+| `docs/handbook/ofborg/` | This documentation |
+
+---
+
+## Making Changes
+
+### Adding a New Worker
+
+1. Create the task implementation in `tickborg/src/tasks/`:
+
+```rust
+// tasks/myworker.rs
+pub struct MyWorker { /* ... */ }
+
+impl worker::SimpleWorker for MyWorker {
+ type J = MyMessageType;
+
+ async fn consumer(&mut self, job: &Self::J) -> worker::Actions {
+ // Process the job
+ vec![worker::Action::Ack]
+ }
+}
+```
+
+2. Create the binary entry point in `tickborg/src/bin/`:
+
+```rust
+// bin/my-worker.rs
+#[tokio::main]
+async fn main() {
+ tickborg::setup_log();
+ let cfg = tickborg::config::load();
+ // Connect to AMQP, declare queues, start consumer
+}
+```
+
+3. Add the binary to `tickborg/Cargo.toml`:
+
+```toml
+[[bin]]
+name = "my-worker"
+path = "src/bin/my-worker.rs"
+```
+
+4. Add any necessary config fields to `Config` in `config.rs`.
+
+5. Add the service to `service.nix` and `docker-compose.yml`.
+
+### Adding a New Message Type
+
+1. Create the message type in `tickborg/src/message/`:
+
+```rust
+// message/mymessage.rs
+#[derive(Serialize, Deserialize, Debug)]
+pub struct MyMessage {
+ pub field: String,
+}
+```
+
+2. Add the module to `message/mod.rs`:
+
+```rust
+pub mod mymessage;
+```
+
+### Adding a New GitHub Event Type
+
+1. Create the event type in `tickborg/src/ghevent/`:
+
+```rust
+// ghevent/myevent.rs
+#[derive(Deserialize, Debug)]
+pub struct MyEvent {
+ pub action: String,
+ pub repository: Repository,
+}
+```
+
+2. Add the module to `ghevent/mod.rs`.
+
+3. Add routing in the webhook receiver's `route_event` function.
+
+---
+
+## Testing Locally
+
+### With `build-faker`
+
+The `build-faker` binary simulates a builder without running actual builds:
+
+```bash
+# Terminal 1: Start RabbitMQ
+sudo systemctl start rabbitmq-server
+
+# Terminal 2: Start the webhook receiver
+CONFIG_PATH=example.config.json cargo run --bin github-webhook-receiver
+
+# Terminal 3: Start the build faker
+CONFIG_PATH=example.config.json cargo run --bin build-faker
+```
+
+### Sending Test Webhooks
+
+```bash
+# Compute HMAC signature
+BODY='{"action":"opened","pull_request":{...}}'
+SIG=$(echo -n "$BODY" | openssl dgst -sha256 -hmac "your-webhook-secret" | awk '{print $2}')
+
+# Send webhook
+curl -X POST http://localhost:8080/github-webhook \
+ -H "Content-Type: application/json" \
+ -H "X-GitHub-Event: pull_request" \
+ -H "X-Hub-Signature-256: sha256=$SIG" \
+ -d "$BODY"
+```
+
+---
+
+## Commit Messages
+
+Follow **Conventional Commits** format:
+
+```
+<type>(<scope>): <description>
+
+[optional body]
+
+[optional footer(s)]
+```
+
+### Types
+
+| Type | When to use |
+|------|-------------|
+| `feat` | New feature |
+| `fix` | Bug fix |
+| `docs` | Documentation changes |
+| `refactor` | Code change that neither fixes a bug nor adds a feature |
+| `test` | Adding or correcting tests |
+| `chore` | Maintenance tasks |
+| `ci` | CI/CD changes |
+
+### Scopes
+
+Use the sub-project or module name:
+
+```
+feat(meshmc): add block renderer
+fix(builder): handle timeout correctly
+docs(ofborg): add deployment guide
+ci(github): update workflow matrix
+```
+
+The evaluation system uses commit scopes to detect changed projects — see
+[evaluation-system.md](evaluation-system.md).
+
+---
+
+## Pull Request Workflow
+
+1. **Fork & branch** — Create a feature branch from `main`.
+2. **Develop** — Make changes, run tests locally.
+3. **Push** — Push to your fork.
+4. **Open PR** — Target the `main` branch.
+5. **CI** — Tickborg automatically evaluates the PR:
+ - Detects changed projects
+ - Adds `project: <name>` labels
+ - Schedules builds on eligible platforms
+6. **Review** — Maintainers review the code and build results.
+7. **Merge** — Squash-merge into `main`.
+
+### Bot Commands
+
+Maintainers can use `@tickbot` commands on PRs:
+
+```
+@tickbot build meshmc Build meshmc on all platforms
+@tickbot build meshmc neozip Build multiple projects
+@tickbot test mnv Run tests for mnv
+@tickbot eval Re-run evaluation
+```
+
+---
+
+## Documentation
+
+Documentation lives in `docs/handbook/ofborg/`. When making changes to
+tickborg:
+
+- Update relevant docs if the change affects architecture or configuration.
+- Reference real struct names, function signatures, and module paths.
+- Include code snippets from the actual source.
+
+---
+
+## Release Process
+
+Releases are built via the Nix flake:
+
+```bash
+nix build .#tickborg
+```
+
+The output includes all 11 binaries in a single package. Deploy by updating
+the NixOS module's `package` option or rebuilding the Docker image.
+
+---
+
+## Getting Help
+
+- Read the [overview](overview.md) for a high-level understanding.
+- Check [architecture](architecture.md) for the module structure.
+- See [data-flow](data-flow.md) for end-to-end message tracing.
+- Review [configuration](configuration.md) for config file reference.
diff --git a/docs/handbook/ofborg/data-flow.md b/docs/handbook/ofborg/data-flow.md
new file mode 100644
index 0000000000..528974d0ce
--- /dev/null
+++ b/docs/handbook/ofborg/data-flow.md
@@ -0,0 +1,346 @@
+# Tickborg — Data Flow
+
+## Overview
+
+This document traces the complete path of messages through the tickborg system
+for the three primary event types: **pull request**, **comment command**, and
+**push event**.
+
+---
+
+## Pull Request Flow
+
+A PR opened against the monorepo triggers evaluation and automatic builds.
+
+### Step-by-Step
+
+```
+GitHub Webhook Receiver RabbitMQ
+─────── ───────────────── ────────
+POST /github-webhook ───► HMAC verify ──────────► github-events exchange
+ X-Hub-Signature-256 route by event type routing_key: pull_request.opened
+ X-GitHub-Event: pull_request
+```
+
+```
+RabbitMQ Evaluation Filter RabbitMQ
+──────── ───────────────── ────────
+mass-rebuild-check-inputs PR filter logic ───────► mass-rebuild-check-jobs
+ ◄── github-events - Repo eligible? (direct queue publish)
+ pull_request.* - Action interesting?
+ - PR open?
+```
+
+```
+RabbitMQ Mass Rebuilder RabbitMQ / GitHub
+──────── ────────────── ─────────────────
+mass-rebuild-check-jobs EvaluationWorker - Commit status: pending
+ OneEval: - Clone + merge PR
+ 1. Check PR state - Detect changed projects
+ 2. Clone repo - Generate labels
+ 3. Fetch PR - Commit status: success
+ 4. Merge - Publish BuildJob(s)
+ 5. Detect changes ──► build-jobs exchange (fanout)
+ 6. Run eval checks
+ 7. Tag PR labels ──► GitHub API: add labels
+```
+
+```
+RabbitMQ Builder RabbitMQ / GitHub
+──────── ─────── ─────────────────
+build-inputs-{id} BuildWorker - Check Run: in_progress
+ ◄── build-jobs 1. Clone repo - Publish log lines ──► logs exchange
+ 2. Checkout PR - Check Run: completed
+ 3. Detect build system - Publish BuildResult ──► build-results
+ 4. Build
+ 5. Test (if requested)
+```
+
+```
+RabbitMQ Comment Poster GitHub
+──────── ────────────── ──────
+build-results Format result ───────► PR comment with build summary
+ ◄── build-results as markdown
+```
+
+```
+RabbitMQ Log Collector Disk
+──────── ───────────── ────
+build-logs LogMessageCollector ────► /var/log/tickborg/builds/{id}.log
+ ◄── logs exchange
+ logs.*
+```
+
+### Sequence Diagram
+
+```
+GitHub ──► Webhook Receiver ──► [github-events]
+ │
+ pull_request.*
+ ▼
+ Evaluation Filter
+ │
+ ▼
+ [mass-rebuild-check-jobs]
+ │
+ ▼
+ Mass Rebuilder ──► GitHub (status + labels)
+ │
+ BuildJob × N
+ ▼
+ [build-jobs]
+ │
+ ▼
+ Builder ──► GitHub (check run)
+ / \
+ [logs] [build-results]
+ │ │
+ ▼ ▼
+ Log Collector Comment Poster ──► GitHub (PR comment)
+```
+
+---
+
+## Comment Command Flow
+
+A user posts `@tickbot build meshmc` on a PR.
+
+### Step-by-Step
+
+```
+GitHub Webhook Receiver RabbitMQ
+─────── ───────────────── ────────
+POST /github-webhook ───► HMAC verify ──────────► github-events exchange
+ X-GitHub-Event: route: issue_comment routing_key: issue_comment.created
+ issue_comment
+```
+
+```
+RabbitMQ Comment Filter RabbitMQ
+──────── ────────────── ────────
+comment-jobs GitHubCommentWorker build-jobs exchange
+ ◄── github-events 1. Ignore !Created
+ issue_comment.* 2. Parse @tickbot
+ 3. Extract instruction
+ 4. ACL check
+ 5. Produce BuildJob(s) ──► build-jobs (fanout)
+```
+
+The rest of the flow (builder → log collector → comment poster) is identical
+to the PR flow.
+
+### Comment Parser Detail
+
+```
+Input: "@tickbot build meshmc neozip"
+
+commentparser::parse()
+ ┌──────────────────────────────────────────┐
+ │ nom parser pipeline: │
+ │ 1. tag("@tickbot") │
+ │ 2. space1 │
+ │ 3. alt((tag("build"), tag("test"), │
+ │ tag("eval"))) │
+ │ 4. space1 │
+ │ 5. separated_list1(space1, alphanumeric1) │
+ └──────────────────────────────────────────┘
+
+Output: [Instruction::Build(["meshmc", "neozip"], Subset::Project)]
+```
+
+### Message Expansion
+
+A single comment can generate multiple AMQP messages:
+
+```
+@tickbot build meshmc
+ │
+ ▼
+ACL: user allowed on [x86_64-linux, aarch64-linux, x86_64-darwin]
+ │
+ ▼
+3 BuildJob messages:
+ ├── BuildJob { project: "meshmc", system: "x86_64-linux", ... }
+ ├── BuildJob { project: "meshmc", system: "aarch64-linux", ... }
+ └── BuildJob { project: "meshmc", system: "x86_64-darwin", ... }
+```
+
+---
+
+## Push Event Flow
+
+A push to a tracked branch (e.g., `main`).
+
+### Step-by-Step
+
+```
+GitHub Webhook Receiver RabbitMQ
+─────── ───────────────── ────────
+POST /github-webhook ───► HMAC verify ──────────► github-events exchange
+ X-GitHub-Event: push route: push routing_key: push.push
+```
+
+```
+RabbitMQ Push Filter RabbitMQ / External
+──────── ─────────── ─────────────────
+push-jobs PushFilterWorker
+ ◄── github-events 1. Skip tags
+ push.* 2. Skip deletes
+ 3. Skip zero-SHA
+ 4. Check branch name
+ 5. Trigger rebuild ──► (future: deployment hooks)
+```
+
+### Push Event Guards
+
+```rust
+impl worker::SimpleWorker for PushFilterWorker {
+ async fn consumer(&mut self, job: &ghevent::PushEvent) -> worker::Actions {
+ // Skip tags
+ if job.is_tag() {
+ return vec![worker::Action::Ack];
+ }
+
+ // Skip branch deletions
+ if job.is_delete() {
+ return vec![worker::Action::Ack];
+ }
+
+ // Skip zero-SHA (orphan push)
+ if job.is_zero_sha() {
+ return vec![worker::Action::Ack];
+ }
+
+ // Only process main branch
+ if job.branch() != Some("main") {
+ return vec![worker::Action::Ack];
+ }
+
+ // Process the push event...
+ }
+}
+```
+
+---
+
+## Statistics Flow
+
+All services emit `EventMessage` events to the stats exchange.
+
+```
+Any Service
+ │
+ ├── worker::Action::Publish ──► [stats] exchange (fanout)
+ │ │
+ │ ▼
+ │ stats-events queue
+ │ │
+ │ ▼
+ │ StatCollectorWorker
+ │ │
+ └── Metrics: ▼
+ - JobReceived MetricCollector
+ - JobDecodeSuccess │
+ - JobDecodeFailure ▼
+ - BuildStarted HTTP endpoint (:9090)
+ - BuildCompleted /metrics
+ - EvalStarted
+ - EvalCompleted
+```
+
+### `SysEvents` Trait
+
+```rust
+// stats.rs
+pub trait SysEvents: Send {
+ fn notify(&mut self, event: Event)
+ -> impl Future<Output = ()>;
+}
+```
+
+Every worker is generic over `E: SysEvents`, allowing stats collection
+to be plugged in or replaced with a no-op.
+
+---
+
+## Log Collection Flow
+
+Build logs are streamed in real-time via the `logs` exchange.
+
+```
+Builder (BuildWorker)
+ │
+ │ During build execution, for each output line:
+ │
+ ├── BuildLogStart { /* ... */ } ──► [logs] routing_key: logs.{attempt_id}
+ ├── BuildLogMsg { line: "..." } ──► [logs] routing_key: logs.{attempt_id}
+ ├── BuildLogMsg { line: "..." } ──► [logs] routing_key: logs.{attempt_id}
+ └── BuildLogMsg { line: "..." } ──► [logs] routing_key: logs.{attempt_id}
+```
+
+```
+RabbitMQ Log Collector Disk
+──────── ───────────── ────
+build-logs LogMessageCollector
+ ◄── logs matches by attempt_id
+ logs.* writes to file:
+ {log_storage_path}/{attempt_id}.log
+```
+
+### `LogFrom` Enum
+
+```rust
+pub enum LogFrom {
+ Worker(BuildLogMsg),
+ Start(BuildLogStart),
+}
+```
+
+The collector distinguishes between log start (creates the file with metadata
+header) and log lines (appends to the file).
+
+---
+
+## Message Format Summary
+
+All messages are JSON-serialized via `serde_json`. Key message types and their
+flows:
+
+| Message Type | Producer | Consumer | Exchange |
+|-------------|----------|----------|----------|
+| `PullRequestEvent` | Webhook Receiver | Evaluation Filter | `github-events` |
+| `IssueComment` | Webhook Receiver | Comment Filter | `github-events` |
+| `PushEvent` | Webhook Receiver | Push Filter | `github-events` |
+| `EvaluationJob` | Eval Filter / Comment Filter | Mass Rebuilder | _(direct queue)_ |
+| `BuildJob` | Mass Rebuilder / Comment Filter | Builder | `build-jobs` |
+| `BuildResult` | Builder | Comment Poster, Stats | `build-results` |
+| `BuildLogMsg` | Builder | Log Collector | `logs` |
+| `EventMessage` | Any service | Stats Collector | `stats` |
+
+---
+
+## Failure Modes and Recovery
+
+### Transient Failures
+
+| Failure | Recovery Mechanism |
+|---------|-------------------|
+| GitHub API 401 (expired token) | `NackRequeue` → retry after token refresh |
+| GitHub API 5xx | `NackRequeue` → retry |
+| RabbitMQ connection lost | `lapin` reconnect / systemd restart |
+| Build timeout | `BuildStatus::TimedOut` → report to GitHub |
+
+### Permanent Failures
+
+| Failure | Handling |
+|---------|----------|
+| Invalid message JSON | `Ack` (discard) + log error |
+| PR force-pushed (SHA gone) | `Ack` (skip) — `MissingSha` |
+| GitHub API 4xx (not 401/422) | `Ack` + add `tickborg-internal-error` label |
+| Merge conflict | Report failure status to GitHub, `Ack` |
+
+### Dead Letter Behavior
+
+Messages `NackDump`'d (rejected without requeue) are discarded unless a
+dead-letter exchange is configured in RabbitMQ. This is used for permanently
+invalid messages that should not be retried.
diff --git a/docs/handbook/ofborg/deployment.md b/docs/handbook/ofborg/deployment.md
new file mode 100644
index 0000000000..4a9497b0c3
--- /dev/null
+++ b/docs/handbook/ofborg/deployment.md
@@ -0,0 +1,413 @@
+# Tickborg — Deployment
+
+## Overview
+
+Tickborg can be deployed via **NixOS modules**, **Docker Compose**, or manual
+systemd units. The preferred method is the NixOS module defined in
+`service.nix`, which orchestrates all eight binaries as individual systemd
+services.
+
+---
+
+## Key Files
+
+| File | Purpose |
+|------|---------|
+| `service.nix` | NixOS module — systemd services |
+| `docker-compose.yml` | Full-stack Docker Compose |
+| `flake.nix` | Nix flake — package + dev shell |
+| `example.config.json` | Reference configuration file |
+
+---
+
+## NixOS Deployment
+
+### Module Structure (`service.nix`)
+
+```nix
+{ config, pkgs, lib, ... }:
+let
+ cfg = config.services.tickborg;
+ tickborg = cfg.package;
+in
+{
+ options.services.tickborg = {
+ enable = lib.mkEnableOption "Enable tickborg CI services";
+
+ package = lib.mkOption {
+ type = lib.types.package;
+ description = "The tickborg package to use";
+ };
+
+ configFile = lib.mkOption {
+ type = lib.types.path;
+ description = "Path to the tickborg config.json";
+ };
+
+ logConfig = lib.mkOption {
+ type = lib.types.str;
+ default = "info";
+ description = "RUST_LOG filter string";
+ };
+
+ services = {
+ github-webhook-receiver = lib.mkEnableOption "webhook receiver";
+ evaluation-filter = lib.mkEnableOption "evaluation filter";
+ mass-rebuilder = lib.mkEnableOption "mass rebuilder (evaluation)";
+ builder = lib.mkEnableOption "build executor";
+ github-comment-filter = lib.mkEnableOption "comment filter";
+ github-comment-poster = lib.mkEnableOption "comment poster";
+ log-message-collector = lib.mkEnableOption "log collector";
+ stats = lib.mkEnableOption "stats collector";
+ };
+ };
+}
+```
+
+### Per-Service Configuration
+
+Each service is toggled independently. A common template generates systemd
+units:
+
+```nix
+commonServiceConfig = binary: {
+ description = "tickborg ${binary}";
+ wantedBy = [ "multi-user.target" ];
+ after = [ "network-online.target" "rabbitmq.service" ];
+ wants = [ "network-online.target" ];
+
+ environment = {
+ RUST_LOG = cfg.logConfig;
+ RUST_LOG_JSON = "1";
+ CONFIG_PATH = toString cfg.configFile;
+ };
+
+ serviceConfig = {
+ ExecStart = "${tickborg}/bin/${binary}";
+ Restart = "always";
+ RestartSec = "10s";
+ DynamicUser = true;
+
+ # Hardening
+ NoNewPrivileges = true;
+ ProtectSystem = "strict";
+ ProtectHome = true;
+ PrivateTmp = true;
+ PrivateDevices = true;
+ ProtectKernelTunables = true;
+ ProtectKernelModules = true;
+ ProtectKernelLogs = true;
+ ProtectControlGroups = true;
+ RestrictNamespaces = true;
+ LockPersonality = true;
+ MemoryDenyWriteExecute = true;
+ RestrictRealtime = true;
+ SystemCallFilter = [ "@system-service" "~@mount" ];
+ };
+};
+```
+
+### Applying the Module
+
+```nix
+# In your NixOS configuration.nix or flake:
+{
+ imports = [ ./service.nix ];
+
+ services.tickborg = {
+ enable = true;
+ package = tickborg-pkg;
+ configFile = /etc/tickborg/config.json;
+ logConfig = "info,tickborg=debug";
+
+ services = {
+ github-webhook-receiver = true;
+ evaluation-filter = true;
+ mass-rebuilder = true;
+ builder = true;
+ github-comment-filter = true;
+ github-comment-poster = true;
+ log-message-collector = true;
+ stats = true;
+ };
+ };
+}
+```
+
+### Service Management
+
+```bash
+# View all tickborg services
+systemctl list-units 'tickborg-*'
+
+# Restart a single service
+systemctl restart tickborg-builder
+
+# View logs
+journalctl -u tickborg-builder -f
+
+# Structured JSON logs (when RUST_LOG_JSON=1)
+journalctl -u tickborg-builder -o cat | jq .
+```
+
+---
+
+## Docker Compose Deployment
+
+### `docker-compose.yml`
+
+```yaml
+services:
+ rabbitmq:
+ image: rabbitmq:3-management
+ ports:
+ - "5672:5672"
+ - "15672:15672"
+ environment:
+ RABBITMQ_DEFAULT_USER: tickborg
+ RABBITMQ_DEFAULT_PASS: tickborg
+ volumes:
+ - rabbitmq-data:/var/lib/rabbitmq
+
+ webhook-receiver:
+ build: .
+ command: github-webhook-receiver
+ ports:
+ - "8080:8080"
+ environment:
+ CONFIG_PATH: /config/config.json
+ RUST_LOG: info
+ volumes:
+ - ./config:/config:ro
+ depends_on:
+ - rabbitmq
+
+ evaluation-filter:
+ build: .
+ command: evaluation-filter
+ environment:
+ CONFIG_PATH: /config/config.json
+ RUST_LOG: info
+ volumes:
+ - ./config:/config:ro
+ depends_on:
+ - rabbitmq
+
+ mass-rebuilder:
+ build: .
+ command: mass-rebuilder
+ environment:
+ CONFIG_PATH: /config/config.json
+ RUST_LOG: info
+ volumes:
+ - ./config:/config:ro
+ - checkout-cache:/var/cache/tickborg
+ depends_on:
+ - rabbitmq
+
+ builder:
+ build: .
+ command: builder
+ environment:
+ CONFIG_PATH: /config/config.json
+ RUST_LOG: info
+ volumes:
+ - ./config:/config:ro
+ - checkout-cache:/var/cache/tickborg
+ depends_on:
+ - rabbitmq
+
+ comment-filter:
+ build: .
+ command: github-comment-filter
+ environment:
+ CONFIG_PATH: /config/config.json
+ RUST_LOG: info
+ volumes:
+ - ./config:/config:ro
+ depends_on:
+ - rabbitmq
+
+ comment-poster:
+ build: .
+ command: github-comment-poster
+ environment:
+ CONFIG_PATH: /config/config.json
+ RUST_LOG: info
+ volumes:
+ - ./config:/config:ro
+ depends_on:
+ - rabbitmq
+
+ log-collector:
+ build: .
+ command: log-message-collector
+ environment:
+ CONFIG_PATH: /config/config.json
+ RUST_LOG: info
+ volumes:
+ - ./config:/config:ro
+ - log-data:/var/log/tickborg
+ depends_on:
+ - rabbitmq
+
+ stats:
+ build: .
+ command: stats
+ ports:
+ - "9090:9090"
+ environment:
+ CONFIG_PATH: /config/config.json
+ RUST_LOG: info
+ volumes:
+ - ./config:/config:ro
+ depends_on:
+ - rabbitmq
+
+volumes:
+ rabbitmq-data:
+ checkout-cache:
+ log-data:
+```
+
+### Running
+
+```bash
+# Start all services
+docker compose up -d
+
+# View webhook receiver logs
+docker compose logs -f webhook-receiver
+
+# Scale builders
+docker compose up -d --scale builder=3
+
+# Stop everything
+docker compose down
+```
+
+---
+
+## Nix Flake
+
+### `flake.nix` Outputs
+
+```nix
+{
+ outputs = { self, nixpkgs, ... }: {
+ packages.x86_64-linux.default = /* tickborg cargo build */ ;
+ packages.x86_64-linux.tickborg = self.packages.x86_64-linux.default;
+
+ devShells.x86_64-linux.default = pkgs.mkShell {
+ nativeBuildInputs = with pkgs; [
+ cargo
+ rustc
+ clippy
+ rustfmt
+ pkg-config
+ openssl
+ ];
+ RUST_SRC_PATH = "${pkgs.rust.packages.stable.rustPlatform.rustLibSrc}";
+ };
+
+ nixosModules.default = import ./service.nix;
+ };
+}
+```
+
+### Building with Nix
+
+```bash
+# Build the package
+nix build
+
+# Enter dev shell
+nix develop
+
+# Run directly
+nix run .#tickborg -- github-webhook-receiver
+```
+
+---
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `CONFIG_PATH` | `./config.json` | Path to configuration file |
+| `RUST_LOG` | `info` | tracing filter directive |
+| `RUST_LOG_JSON` | (unset) | Set to `1` for JSON-formatted logs |
+
+---
+
+## Reverse Proxy
+
+The webhook receiver requires an HTTPS endpoint exposed to GitHub. Typical
+setup with nginx:
+
+```nginx
+server {
+ listen 443 ssl;
+ server_name ci.example.com;
+
+ ssl_certificate /etc/letsencrypt/live/ci.example.com/fullchain.pem;
+ ssl_certificate_key /etc/letsencrypt/live/ci.example.com/privkey.pem;
+
+ location /github-webhook {
+ proxy_pass http://127.0.0.1:8080;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+
+ # GitHub sends large payloads
+ client_max_body_size 25m;
+ }
+
+ location /logs/ {
+ proxy_pass http://127.0.0.1:8081/;
+ }
+}
+```
+
+---
+
+## RabbitMQ Setup
+
+### Required Configuration
+
+```bash
+# Create vhost
+rabbitmqctl add_vhost tickborg
+
+# Create user
+rabbitmqctl add_user tickborg <password>
+
+# Grant permissions
+rabbitmqctl set_permissions -p tickborg tickborg ".*" ".*" ".*"
+```
+
+### Management UI
+
+Available at `http://localhost:15672` when using Docker Compose. Useful for
+monitoring queue depths and consumer counts.
+
+---
+
+## Health Checks
+
+Monitor these indicators:
+
+| Check | Healthy | Problem |
+|-------|---------|---------|
+| Queue depth `mass-rebuild-check-inputs` | < 50 | Evaluation filter slow/down |
+| Queue depth `build-inputs-*` | < 20 | Builder slow/down |
+| Consumer count per queue | ≥ 1 | No consumers (service down) |
+| `stats` HTTP endpoint | 200 OK | Stats collector down |
+| Webhook receiver `/health` | 200 OK | Webhook receiver down |
+
+### Systemd Watchdog
+
+Services configured with `Restart = "always"` will be automatically restarted
+on crash. The 10-second `RestartSec` prevents restart loops on persistent
+failures.
diff --git a/docs/handbook/ofborg/evaluation-system.md b/docs/handbook/ofborg/evaluation-system.md
new file mode 100644
index 0000000000..73d6898c30
--- /dev/null
+++ b/docs/handbook/ofborg/evaluation-system.md
@@ -0,0 +1,602 @@
+# Tickborg — Evaluation System
+
+## Overview
+
+The evaluation system determines **which sub-projects changed** in a pull
+request and schedules builds accordingly. It replaces the original ofborg's
+Nix expression evaluation with a monorepo-aware strategy that inspects changed
+files, commit messages, and PR metadata.
+
+---
+
+## Key Source Files
+
+| File | Purpose |
+|------|---------|
+| `tickborg/src/tasks/evaluate.rs` | `EvaluationWorker`, `OneEval` — orchestrates eval |
+| `tickborg/src/tasks/eval/mod.rs` | `EvaluationStrategy` trait, `EvaluationComplete` |
+| `tickborg/src/tasks/eval/monorepo.rs` | `MonorepoStrategy` — Project Tick specific |
+| `tickborg/src/tasks/evaluationfilter.rs` | `EvaluationFilterWorker` — PR event gating |
+| `tickborg/src/bin/evaluation-filter.rs` | Evaluation filter binary |
+| `tickborg/src/bin/mass-rebuilder.rs` | Mass rebuilder binary (runs evaluations) |
+| `tickborg/src/tagger.rs` | `ProjectTagger` — PR label generation |
+| `tickborg/src/evalchecker.rs` | `EvalChecker` — generic command runner |
+| `tickborg/src/buildtool.rs` | `detect_changed_projects()`, `find_project()` |
+
+---
+
+## Stage 1: Evaluation Filter
+
+The evaluation filter is the gateway that decides whether a PR event warrants
+full evaluation.
+
+### `EvaluationFilterWorker`
+
+```rust
+// tasks/evaluationfilter.rs
+pub struct EvaluationFilterWorker {
+ acl: acl::Acl,
+}
+
+impl worker::SimpleWorker for EvaluationFilterWorker {
+ type J = ghevent::PullRequestEvent;
+
+ async fn consumer(&mut self, job: &ghevent::PullRequestEvent) -> worker::Actions {
+ // Check 1: Is the repo eligible?
+ if !self.acl.is_repo_eligible(&job.repository.full_name) {
+ return vec![worker::Action::Ack];
+ }
+
+ // Check 2: Is the PR open?
+ if job.pull_request.state != ghevent::PullRequestState::Open {
+ return vec![worker::Action::Ack];
+ }
+
+ // Check 3: Is the action interesting?
+ let interesting = match job.action {
+ PullRequestAction::Opened => true,
+ PullRequestAction::Synchronize => true,
+ PullRequestAction::Reopened => true,
+ PullRequestAction::Edited => {
+ if let Some(ref changes) = job.changes {
+ changes.base.is_some() // base branch changed
+ } else {
+ false
+ }
+ }
+ _ => false,
+ };
+
+ if !interesting {
+ return vec![worker::Action::Ack];
+ }
+
+ // Produce an EvaluationJob
+ let msg = evaluationjob::EvaluationJob {
+ repo: Repo { /* ... */ },
+ pr: Pr { /* ... */ },
+ };
+
+ vec![
+ worker::publish_serde_action(
+ None, Some("mass-rebuild-check-jobs".to_owned()), &msg
+ ),
+ worker::Action::Ack,
+ ]
+ }
+}
+```
+
+### Filtering Rules
+
+| PR Action | Result |
+|-----------|--------|
+| `Opened` | Evaluate |
+| `Synchronize` (new commits pushed) | Evaluate |
+| `Reopened` | Evaluate |
+| `Edited` with base branch change | Evaluate |
+| `Edited` without base change | Skip |
+| `Closed` | Skip |
+| Any unknown action | Skip |
+
+### AMQP Flow
+
+```
+mass-rebuild-check-inputs (queue)
+ ← github-events (exchange), routing: pull_request.*
+ → EvaluationFilterWorker
+ → mass-rebuild-check-jobs (queue, direct publish)
+```
+
+---
+
+## Stage 2: The Evaluation Worker
+
+### `EvaluationWorker`
+
+```rust
+// tasks/evaluate.rs
+pub struct EvaluationWorker<E> {
+ cloner: checkout::CachedCloner,
+ github_vend: tokio::sync::RwLock<GithubAppVendingMachine>,
+ acl: Acl,
+ identity: String,
+ events: E,
+}
+```
+
+The `EvaluationWorker` implements `SimpleWorker` and orchestrates the full
+evaluation pipeline.
+
+### Message Decoding
+
+```rust
+impl<E: stats::SysEvents + 'static> worker::SimpleWorker for EvaluationWorker<E> {
+ type J = evaluationjob::EvaluationJob;
+
+ async fn msg_to_job(&mut self, _: &str, _: &Option<String>, body: &[u8])
+ -> Result<Self::J, String>
+ {
+ self.events.notify(Event::JobReceived).await;
+ match evaluationjob::from(body) {
+ Ok(job) => {
+ self.events.notify(Event::JobDecodeSuccess).await;
+ Ok(job)
+ }
+ Err(err) => {
+ self.events.notify(Event::JobDecodeFailure).await;
+ Err("Failed to decode message".to_owned())
+ }
+ }
+ }
+}
+```
+
+### Per-Job Evaluation (`OneEval`)
+
+```rust
+struct OneEval<'a, E> {
+ client_app: &'a hubcaps::Github,
+ repo: hubcaps::repositories::Repository,
+ acl: &'a Acl,
+ events: &'a mut E,
+ identity: &'a str,
+ cloner: &'a checkout::CachedCloner,
+ job: &'a evaluationjob::EvaluationJob,
+}
+```
+
+### Evaluation Pipeline
+
+The `evaluate_job` method executes these steps:
+
+#### 1. Check if PR is closed
+
+```rust
+match issue_ref.get().await {
+ Ok(iss) => {
+ if iss.state == "closed" {
+ self.events.notify(Event::IssueAlreadyClosed).await;
+ return Ok(self.actions().skip(job));
+ }
+ // ...
+ }
+}
+```
+
+#### 2. Determine auto-schedule architectures
+
+```rust
+if issue_is_wip(&iss) {
+ auto_schedule_build_archs = vec![];
+} else {
+ auto_schedule_build_archs = self.acl.build_job_architectures_for_user_repo(
+ &iss.user.login, &job.repo.full_name,
+ );
+}
+```
+
+WIP PRs get no automatic builds. The architecture list depends on whether the
+user is trusted (7 platforms) or not (3 primary platforms).
+
+#### 3. Create the evaluation strategy
+
+```rust
+let mut evaluation_strategy = eval::MonorepoStrategy::new(job, &issue_ref);
+```
+
+#### 4. Set commit status
+
+```rust
+let mut overall_status = CommitStatus::new(
+ repo.statuses(),
+ job.pr.head_sha.clone(),
+ format!("{prefix}-eval"),
+ "Starting".to_owned(),
+ None,
+);
+overall_status.set_with_description(
+ "Starting", hubcaps::statuses::State::Pending
+).await?;
+```
+
+#### 5. Pre-clone actions
+
+```rust
+evaluation_strategy.pre_clone().await?;
+```
+
+#### 6. Clone and checkout
+
+```rust
+let project = self.cloner.project(&job.repo.full_name, job.repo.clone_url.clone());
+let co = project.clone_for("mr-est".to_string(), self.identity.to_string())?;
+```
+
+#### 7. Checkout target branch, fetch PR, merge
+
+```rust
+evaluation_strategy.on_target_branch(&co_path, &mut overall_status).await?;
+co.fetch_pr(job.pr.number)?;
+evaluation_strategy.after_fetch(&co)?;
+co.merge_commit(OsStr::new("pr"))?;
+evaluation_strategy.after_merge(&mut overall_status).await?;
+```
+
+#### 8. Run evaluation checks
+
+```rust
+let checks = evaluation_strategy.evaluation_checks();
+// Execute each check and update commit status
+```
+
+#### 9. Complete evaluation
+
+```rust
+let eval_complete = evaluation_strategy.all_evaluations_passed(
+ &mut overall_status
+).await?;
+```
+
+### Error Handling
+
+```rust
+async fn worker_actions(&mut self) -> worker::Actions {
+ let eval_result = match self.evaluate_job().await {
+ Ok(v) => Ok(v),
+ Err(eval_error) => match eval_error {
+ EvalWorkerError::EvalError(eval::Error::Fail(msg)) =>
+ Err(self.update_status(msg, None, State::Failure).await),
+ EvalWorkerError::EvalError(eval::Error::CommitStatusWrite(e)) =>
+ Err(Err(e)),
+ EvalWorkerError::CommitStatusWrite(e) =>
+ Err(Err(e)),
+ },
+ };
+
+ match eval_result {
+ Ok(eval_actions) => {
+ // Remove tickborg-internal-error label
+ update_labels(&issue_ref, &[], &["tickborg-internal-error".into()]).await;
+ eval_actions
+ }
+ Err(Ok(())) => {
+ // Error, but PR updated successfully
+ self.actions().skip(self.job)
+ }
+ Err(Err(CommitStatusError::ExpiredCreds(_))) => {
+ self.actions().retry_later(self.job) // NackRequeue
+ }
+ Err(Err(CommitStatusError::MissingSha(_))) => {
+ self.actions().skip(self.job) // Ack (force pushed)
+ }
+ Err(Err(CommitStatusError::InternalError(_))) => {
+ self.actions().retry_later(self.job) // NackRequeue
+ }
+ Err(Err(CommitStatusError::Error(_))) => {
+ // Add tickborg-internal-error label
+ update_labels(&issue_ref, &["tickborg-internal-error".into()], &[]).await;
+ self.actions().skip(self.job)
+ }
+ }
+}
+```
+
+---
+
+## The `EvaluationStrategy` Trait
+
+```rust
+// tasks/eval/mod.rs
+pub trait EvaluationStrategy {
+ fn pre_clone(&mut self)
+ -> impl Future<Output = StepResult<()>>;
+
+ fn on_target_branch(&mut self, co: &Path, status: &mut CommitStatus)
+ -> impl Future<Output = StepResult<()>>;
+
+ fn after_fetch(&mut self, co: &CachedProjectCo)
+ -> StepResult<()>;
+
+ fn after_merge(&mut self, status: &mut CommitStatus)
+ -> impl Future<Output = StepResult<()>>;
+
+ fn evaluation_checks(&self) -> Vec<EvalChecker>;
+
+ fn all_evaluations_passed(&mut self, status: &mut CommitStatus)
+ -> impl Future<Output = StepResult<EvaluationComplete>>;
+}
+
+pub type StepResult<T> = Result<T, Error>;
+
+#[derive(Default)]
+pub struct EvaluationComplete {
+ pub builds: Vec<BuildJob>,
+}
+
+#[derive(Debug)]
+pub enum Error {
+ CommitStatusWrite(CommitStatusError),
+ Fail(String),
+}
+```
+
+---
+
+## The `MonorepoStrategy`
+
+### Title-Based Label Detection
+
+```rust
+// tasks/eval/monorepo.rs
+const TITLE_LABELS: [(&str, &str); 12] = [
+ ("meshmc", "project: meshmc"),
+ ("mnv", "project: mnv"),
+ ("neozip", "project: neozip"),
+ ("cmark", "project: cmark"),
+ ("cgit", "project: cgit"),
+ ("json4cpp", "project: json4cpp"),
+ ("tomlplusplus", "project: tomlplusplus"),
+ ("corebinutils", "project: corebinutils"),
+ ("forgewrapper", "project: forgewrapper"),
+ ("genqrcode", "project: genqrcode"),
+ ("darwin", "platform: macos"),
+ ("windows", "platform: windows"),
+];
+
+fn label_from_title(title: &str) -> Vec<String> {
+ let title_lower = title.to_lowercase();
+ TITLE_LABELS.iter()
+ .filter(|(word, _)| {
+ let re = Regex::new(&format!("\\b{word}\\b")).unwrap();
+ re.is_match(&title_lower)
+ })
+ .map(|(_, label)| (*label).into())
+ .collect()
+}
+```
+
+This uses word boundary regex (`\b`) to prevent false matches (e.g., "cmake"
+won't match "cmark").
+
+### Commit Scope Parsing
+
+```rust
+fn parse_commit_scopes(messages: &[String]) -> Vec<String> {
+ let scope_re = Regex::new(r"^[a-z]+\(([^)]+)\)").unwrap();
+ let colon_re = Regex::new(r"^([a-z0-9_-]+):").unwrap();
+
+ let mut projects: Vec<String> = messages.iter()
+ .filter_map(|line| {
+ let trimmed = line.trim();
+ // Conventional Commits: "feat(meshmc): add block renderer"
+ if let Some(caps) = scope_re.captures(trimmed) {
+ Some(caps[1].to_string())
+ }
+ // Simple: "meshmc: fix crash"
+ else if let Some(caps) = colon_re.captures(trimmed) {
+ let candidate = caps[1].to_string();
+ if crate::buildtool::find_project(&candidate).is_some() {
+ Some(candidate)
+ } else {
+ None
+ }
+ } else {
+ None
+ }
+ })
+ .collect();
+
+ projects.sort();
+ projects.dedup();
+ projects
+}
+```
+
+This recognises both Conventional Commits (`feat(meshmc): ...`) and simple
+scope prefixes (`meshmc: ...`).
+
+### File Change Detection
+
+The strategy uses `CachedProjectCo::files_changed_from_head()` to get the
+list of changed files, then passes them through
+`buildtool::detect_changed_projects()` which maps each file to its top-level
+directory and matches against known projects.
+
+---
+
+## The `EvalChecker`
+
+```rust
+// evalchecker.rs
+pub struct EvalChecker {
+ name: String,
+ command: String,
+ args: Vec<String>,
+}
+
+impl EvalChecker {
+ pub fn new(name: &str, command: &str, args: Vec<String>) -> EvalChecker;
+ pub fn name(&self) -> &str;
+ pub fn execute(&self, path: &Path) -> Result<File, File>;
+ pub fn cli_cmd(&self) -> String;
+}
+```
+
+`EvalChecker` is a generic command execution wrapper. It runs a command in the
+checkout directory and returns `Ok(File)` on success, `Err(File)` on failure.
+The `File` contains captured stdout + stderr.
+
+```rust
+pub fn execute(&self, path: &Path) -> Result<File, File> {
+ let output = Command::new(&self.command)
+ .args(&self.args)
+ .current_dir(path)
+ .output();
+
+ match output {
+ Ok(result) => {
+ // Write stdout + stderr to temp file
+ if result.status.success() {
+ Ok(file)
+ } else {
+ Err(file)
+ }
+ }
+ Err(e) => {
+ // Write error message to temp file
+ Err(file)
+ }
+ }
+}
+```
+
+---
+
+## The `ProjectTagger`
+
+```rust
+// tagger.rs
+pub struct ProjectTagger {
+ selected: Vec<String>,
+}
+
+impl ProjectTagger {
+ pub fn new() -> Self;
+
+ pub fn analyze_changes(&mut self, changed_files: &[String]) {
+ let projects = detect_changed_projects(changed_files);
+ for project in projects {
+ self.selected.push(format!("project: {project}"));
+ }
+
+ // Cross-cutting labels
+ let has_ci = changed_files.iter().any(|f|
+ f.starts_with(".github/") || f.starts_with("ci/")
+ );
+ let has_docs = changed_files.iter().any(|f|
+ f.starts_with("docs/") || f.ends_with(".md")
+ );
+ let has_root = changed_files.iter().any(|f|
+ !f.contains('/') && !f.ends_with(".md")
+ );
+
+ if has_ci { self.selected.push("scope: ci".into()); }
+ if has_docs { self.selected.push("scope: docs".into()); }
+ if has_root { self.selected.push("scope: root".into()); }
+ }
+
+ pub fn tags_to_add(&self) -> Vec<String>;
+ pub fn tags_to_remove(&self) -> Vec<String>;
+}
+```
+
+### Label Examples
+
+| Changed Files | Generated Labels |
+|--------------|------------------|
+| `meshmc/CMakeLists.txt` | `project: meshmc` |
+| `mnv/src/main.c` | `project: mnv` |
+| `.github/workflows/ci.yml` | `scope: ci` |
+| `README.md` | `scope: docs` |
+| `flake.nix` | `scope: root` |
+
+---
+
+## Commit Status Updates
+
+Throughout evaluation, the commit status is updated to reflect progress:
+
+```
+Starting → Cloning project → Checking out target → Fetching PR →
+Merging → Running checks → Evaluation complete
+```
+
+Or on failure:
+
+```
+Starting → ... → Merge failed (Failure)
+Starting → ... → Check 'xyz' failed (Failure)
+```
+
+The commit status context includes a prefix determined dynamically:
+
+```rust
+let prefix = get_prefix(repo.statuses(), &job.pr.head_sha).await?;
+let context = format!("{prefix}-eval");
+```
+
+---
+
+## Auto-Scheduled vs. Manual Builds
+
+### Auto-Scheduled (from PR evaluation)
+
+When a PR is evaluated, builds are automatically scheduled for the detected
+changed projects. The set of architectures depends on the ACL:
+
+- **Trusted users**: All 7 platforms
+- **Untrusted users**: 3 primary platforms (x86_64 Linux/macOS/Windows)
+- **WIP PRs**: No automatic builds
+
+### Manual (from `@tickbot` commands)
+
+Users can manually trigger builds or re-evaluations:
+
+```
+@tickbot build meshmc → Build meshmc on all eligible platforms
+@tickbot eval → Re-run evaluation
+@tickbot test mnv → Run tests for mnv
+@tickbot build meshmc neozip → Build multiple projects
+```
+
+These are handled by the `github-comment-filter`, not the evaluation system.
+
+---
+
+## Label Management
+
+The evaluation system manages PR labels via the GitHub API:
+
+```rust
+async fn update_labels(
+ issue_ref: &IssueRef,
+ add: &[String],
+ remove: &[String],
+) {
+ // Add labels
+ for label in add {
+ issue_ref.labels().add(vec![label.clone()]).await;
+ }
+ // Remove labels
+ for label in remove {
+ issue_ref.labels().remove(label).await;
+ }
+}
+```
+
+Labels managed:
+- `project: <name>` — Which sub-projects are affected
+- `scope: ci` / `scope: docs` / `scope: root` — Cross-cutting changes
+- `platform: macos` / `platform: windows` — Platform-specific changes
+- `tickborg-internal-error` — Added when tickborg encounters an internal error
diff --git a/docs/handbook/ofborg/github-integration.md b/docs/handbook/ofborg/github-integration.md
new file mode 100644
index 0000000000..4f33f77466
--- /dev/null
+++ b/docs/handbook/ofborg/github-integration.md
@@ -0,0 +1,603 @@
+# Tickborg — GitHub Integration
+
+## Overview
+
+Tickborg communicates with GitHub through the **GitHub App** model. A custom
+fork of the `hubcaps` crate provides the Rust API client. Integration covers
+webhook reception, commit statuses, check runs, issue/PR manipulation, and
+comment posting.
+
+---
+
+## GitHub App Authentication
+
+### `GithubAppVendingMachine`
+
+```rust
+// config.rs
+pub struct GithubAppVendingMachine {
+ conf: GithubAppConfig,
+ current_token: Option<String>,
+ token_expiry: Option<Instant>,
+}
+```
+
+Handles two-stage GitHub App auth:
+
+1. **JWT**: Signed with the App's private RSA key, valid for up to 10 minutes.
+2. **Installation token**: Obtained with the JWT, valid for ~1 hour.
+
+### Token Lifecycle
+
+```rust
+impl GithubAppVendingMachine {
+ pub fn new(conf: GithubAppConfig) -> Self {
+ GithubAppVendingMachine {
+ conf,
+ current_token: None,
+ token_expiry: None,
+ }
+ }
+
+ fn is_token_fresh(&self) -> bool {
+ match self.token_expiry {
+ Some(exp) => Instant::now() < exp,
+ None => false,
+ }
+ }
+
+ pub async fn get_token(&mut self) -> Result<String, String> {
+ if self.is_token_fresh() {
+ return Ok(self.current_token.clone().unwrap());
+ }
+ // Generate a fresh JWT
+ let jwt = self.make_jwt()?;
+ // Exchange JWT for installation token
+ let client = hubcaps::Github::new(
+ "tickborg".to_owned(),
+ hubcaps::Credentials::Jwt(hubcaps::JwtToken::new(jwt)),
+ )?;
+ let installation = client.app()
+ .find_repo_installation(&self.conf.owner, &self.conf.repo)
+ .await?;
+ let token_result = client.app()
+ .create_installation_token(installation.id)
+ .await?;
+
+ self.current_token = Some(token_result.token.clone());
+ // Expire tokens 5 minutes early to avoid edge cases
+ self.token_expiry = Some(
+ Instant::now() + Duration::from_secs(55 * 60) - Duration::from_secs(5 * 60)
+ );
+
+ Ok(token_result.token)
+ }
+
+ pub async fn github(&mut self) -> Result<hubcaps::Github, String> {
+ let token = self.get_token().await?;
+ Ok(hubcaps::Github::new(
+ "tickborg".to_owned(),
+ hubcaps::Credentials::Token(token),
+ )?)
+ }
+}
+```
+
+### JWT Generation
+
+```rust
+fn make_jwt(&self) -> Result<String, String> {
+ let now = SystemTime::now()
+ .duration_since(UNIX_EPOCH).unwrap()
+ .as_secs() as i64;
+
+ let payload = json!({
+ "iat": now - 60, // 1 minute in the past (clock skew)
+ "exp": now + (10 * 60), // 10 minutes from now
+ "iss": self.conf.app_id,
+ });
+
+ let key = EncodingKey::from_rsa_pem(
+ &std::fs::read(&self.conf.private_key_file)?
+ )?;
+
+ encode(&Header::new(Algorithm::RS256), &payload, &key)
+ .map_err(|e| format!("JWT encoding error: {}", e))
+}
+```
+
+### `GithubAppConfig`
+
+```rust
+#[derive(Deserialize, Debug)]
+pub struct GithubAppConfig {
+ pub app_id: u64,
+ pub private_key_file: PathBuf,
+ pub owner: String,
+ pub repo: String,
+ pub installation_id: Option<u64>,
+}
+```
+
+---
+
+## GitHub App Configuration
+
+The `GithubAppConfig` is nested under the top-level `Config`:
+
+```json
+{
+ "github_app": {
+ "app_id": 12345,
+ "private_key_file": "/etc/tickborg/private-key.pem",
+ "owner": "project-tick",
+ "repo": "Project-Tick",
+ "installation_id": 67890
+ }
+}
+```
+
+---
+
+## Commit Statuses
+
+### `CommitStatus`
+
+```rust
+// commitstatus.rs
+pub struct CommitStatus {
+ api: hubcaps::statuses::Statuses,
+ sha: String,
+ context: String,
+ description: String,
+ url: Option<String>,
+}
+```
+
+### State Machine
+
+```rust
+impl CommitStatus {
+ pub fn new(
+ statuses: hubcaps::statuses::Statuses,
+ sha: String,
+ context: String,
+ description: String,
+ url: Option<String>,
+ ) -> Self;
+
+ pub async fn set_url(&mut self, url: Option<String>);
+
+ pub async fn set_with_description(
+ &mut self,
+ description: &str,
+ state: hubcaps::statuses::State,
+ ) -> Result<(), CommitStatusError> {
+ self.description = description.to_owned();
+ self.send_status(state).await
+ }
+
+ pub async fn set(
+ &mut self,
+ state: hubcaps::statuses::State,
+ ) -> Result<(), CommitStatusError>;
+
+ async fn send_status(
+ &self,
+ state: hubcaps::statuses::State,
+ ) -> Result<(), CommitStatusError> {
+ let options = hubcaps::statuses::StatusOptions::builder(state)
+ .description(&self.description)
+ .context(&self.context);
+
+ let options = match &self.url {
+ Some(u) => options.target_url(u).build(),
+ None => options.build(),
+ };
+
+ self.api.create(&self.sha, &options)
+ .await
+ .map_err(|e| CommitStatusError::from(e))?;
+
+ Ok(())
+ }
+}
+```
+
+### Error Classification
+
+```rust
+#[derive(Debug)]
+pub enum CommitStatusError {
+ ExpiredCreds(String), // GitHub App token expired
+ MissingSha(String), // Commit was force-pushed away
+ InternalError(String), // 5xx from GitHub API
+ Error(String), // Other errors
+}
+```
+
+Error mapping from HTTP response:
+
+| HTTP Status | CommitStatusError Variant | Worker Action |
+|------------|--------------------------|---------------|
+| 401 | `ExpiredCreds` | `NackRequeue` (retry) |
+| 422 ("No commit found") | `MissingSha` | `Ack` (skip) |
+| 500-599 | `InternalError` | `NackRequeue` (retry) |
+| Other | `Error` | `Ack` + add error label |
+
+---
+
+## Check Runs
+
+### `job_to_check()`
+
+Creates a Check Run when a build job is started:
+
+```rust
+pub async fn job_to_check(
+ github: &hubcaps::Github,
+ repo_full_name: &str,
+ job: &BuildJob,
+ runner_identity: &str,
+) -> Result<(), String> {
+ let (owner, repo) = parse_repo_name(repo_full_name);
+ let checks = github.repo(owner, repo).check_runs();
+
+ checks.create(&hubcaps::checks::CheckRunOptions {
+ name: format!("build-{}-{}", job.project, job.system),
+ head_sha: job.pr.head_sha.clone(),
+ status: Some(hubcaps::checks::CheckRunStatus::InProgress),
+ external_id: Some(format!("{runner_identity}")),
+ started_at: Some(Utc::now()),
+ output: Some(hubcaps::checks::Output {
+ title: format!("Building {} on {}", job.project, job.system),
+ summary: format!("Runner: {runner_identity}"),
+ text: None,
+ annotations: vec![],
+ }),
+ ..Default::default()
+ }).await.map_err(|e| format!("Failed to create check run: {e}"))?;
+
+ Ok(())
+}
+```
+
+### `result_to_check()`
+
+Updates a Check Run when a build completes:
+
+```rust
+pub async fn result_to_check(
+ github: &hubcaps::Github,
+ repo_full_name: &str,
+ result: &BuildResult,
+) -> Result<(), String> {
+ let (owner, repo) = parse_repo_name(repo_full_name);
+ let checks = github.repo(owner, repo).check_runs();
+
+ let conclusion = match &result.status {
+ BuildStatus::Success => hubcaps::checks::Conclusion::Success,
+ BuildStatus::Failure => hubcaps::checks::Conclusion::Failure,
+ BuildStatus::TimedOut => hubcaps::checks::Conclusion::TimedOut,
+ BuildStatus::Skipped => hubcaps::checks::Conclusion::Skipped,
+ BuildStatus::UnexpectedError { .. } => hubcaps::checks::Conclusion::Failure,
+ };
+
+ // Find and update the existing check run
+ // ...
+}
+```
+
+---
+
+## GitHub Event Types (ghevent)
+
+### Common Types
+
+```rust
+// ghevent/common.rs
+#[derive(Deserialize, Debug)]
+pub struct GenericWebhook {
+ pub repository: Repository,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct Repository {
+ pub owner: User,
+ pub name: String,
+ pub full_name: String,
+ pub clone_url: String,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct User {
+ pub login: String,
+ pub id: u64,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct Comment {
+ pub id: u64,
+ pub body: String,
+ pub user: User,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct Issue {
+ pub number: u64,
+ pub title: String,
+ pub state: String,
+ pub user: User,
+ pub labels: Vec<Label>,
+}
+```
+
+### Pull Request Events
+
+```rust
+// ghevent/pullrequestevent.rs
+#[derive(Deserialize, Debug)]
+pub struct PullRequestEvent {
+ pub action: PullRequestAction,
+ pub number: u64,
+ pub pull_request: PullRequest,
+ pub repository: Repository,
+ pub changes: Option<PullRequestChanges>,
+}
+
+#[derive(Deserialize, Debug)]
+#[serde(rename_all = "snake_case")]
+pub enum PullRequestAction {
+ Opened,
+ Closed,
+ Synchronize,
+ Reopened,
+ Edited,
+ Labeled,
+ Unlabeled,
+ ReviewRequested,
+ Assigned,
+ Unassigned,
+ ReadyForReview,
+}
+
+#[derive(Deserialize, Debug)]
+pub enum PullRequestState {
+ #[serde(rename = "open")]
+ Open,
+ #[serde(rename = "closed")]
+ Closed,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct PullRequest {
+ pub id: u64,
+ pub number: u64,
+ pub state: PullRequestState,
+ pub title: String,
+ pub head: PullRequestRef,
+ pub base: PullRequestRef,
+ pub user: User,
+ pub merged: Option<bool>,
+ pub mergeable: Option<bool>,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct PullRequestRef {
+ pub sha: String,
+ #[serde(rename = "ref")]
+ pub git_ref: String,
+ pub repo: Repository,
+}
+```
+
+### Issue Comment Events
+
+```rust
+// ghevent/issuecomment.rs
+#[derive(Deserialize, Debug)]
+pub struct IssueComment {
+ pub action: IssueCommentAction,
+ pub comment: Comment,
+ pub issue: Issue,
+ pub repository: Repository,
+}
+
+#[derive(Deserialize, Debug)]
+#[serde(rename_all = "snake_case")]
+pub enum IssueCommentAction {
+ Created,
+ Edited,
+ Deleted,
+}
+```
+
+### Push Events
+
+```rust
+// ghevent/pushevent.rs
+#[derive(Deserialize, Debug)]
+pub struct PushEvent {
+ #[serde(rename = "ref")]
+ pub git_ref: String,
+ pub after: String,
+ pub before: String,
+ pub deleted: bool,
+ pub forced: bool,
+ pub created: bool,
+ pub pusher: Pusher,
+ pub head_commit: Option<HeadCommit>,
+ pub repository: Repository,
+ pub commits: Vec<HeadCommit>,
+}
+
+impl PushEvent {
+ pub fn branch(&self) -> Option<&str>;
+ pub fn is_tag(&self) -> bool;
+ pub fn is_delete(&self) -> bool;
+ pub fn is_zero_sha(&self) -> bool;
+}
+```
+
+---
+
+## Comment Posting
+
+### `GitHubCommentPoster`
+
+```rust
+// tasks/githubcommentposter.rs
+pub struct GitHubCommentPoster {
+ github_vend: tokio::sync::RwLock<GithubAppVendingMachine>,
+}
+
+pub trait PostableEvent: Send {
+ fn owner(&self) -> &str;
+ fn repo(&self) -> &str;
+ fn number(&self) -> u64;
+}
+```
+
+### Posting a Result
+
+```rust
+impl worker::SimpleWorker for GitHubCommentPoster {
+ type J = buildresult::BuildResult;
+
+ async fn consumer(&mut self, job: &buildresult::BuildResult) -> worker::Actions {
+ let github = self.github_vend.write().await.github().await;
+ let repo = github.repo(&job.repo.owner, &job.repo.name);
+ let issue = repo.issue(job.pr.number);
+
+ // Build a markdown summary
+ let comment_body = format_build_result(job);
+
+ issue.comments().create(&hubcaps::comments::CommentOptions {
+ body: comment_body,
+ }).await;
+
+ vec![worker::Action::Ack]
+ }
+}
+```
+
+---
+
+## Comment Filtering
+
+### `GitHubCommentWorker`
+
+```rust
+// tasks/githubcommentfilter.rs
+pub struct GitHubCommentWorker {
+ acl: Acl,
+ github_vend: tokio::sync::RwLock<GithubAppVendingMachine>,
+}
+```
+
+The comment filter processes incoming `IssueComment` events:
+
+1. **Ignore non-creation actions** — Only `Created` matters.
+2. **Parse command** — `commentparser::parse()` extracts `@tickbot` instructions.
+3. **ACL check** — Verifies the commenter is allowed to issue the command.
+4. **Generate build/eval jobs** — Creates `BuildJob` or `EvaluationJob` messages.
+5. **Publish to AMQP** — Routes to the appropriate exchange.
+
+```rust
+async fn consumer(&mut self, job: &ghevent::IssueComment) -> worker::Actions {
+ if job.action != IssueCommentAction::Created {
+ return vec![worker::Action::Ack];
+ }
+
+ let instructions = commentparser::parse(&job.comment.body);
+ if instructions.is_empty() {
+ return vec![worker::Action::Ack];
+ }
+
+ let mut actions = Vec::new();
+
+ for instruction in instructions {
+ match instruction {
+ Instruction::Build(projects, subset) => {
+ // Verify ACL
+ let architectures = self.acl.build_job_architectures_for_user_repo(
+ &job.comment.user.login,
+ &job.repository.full_name,
+ );
+ // Create BuildJob per project × architecture
+ for project in projects {
+ for arch in &architectures {
+ let build_job = BuildJob { /* ... */ };
+ actions.push(worker::publish_serde_action(
+ Some("build-jobs".to_owned()),
+ None,
+ &build_job,
+ ));
+ }
+ }
+ }
+ Instruction::Eval => {
+ let eval_job = EvaluationJob { /* ... */ };
+ actions.push(worker::publish_serde_action(
+ None,
+ Some("mass-rebuild-check-jobs".to_owned()),
+ &eval_job,
+ ));
+ }
+ Instruction::Test(projects) => { /* ... */ }
+ }
+ }
+
+ actions.push(worker::Action::Ack);
+ actions
+}
+```
+
+---
+
+## The `hubcaps` Fork
+
+Tickborg uses a forked version of `hubcaps` from:
+
+```toml
+[dependencies]
+hubcaps = { git = "https://github.com/ofborg/hubcaps.git", rev = "0d7466e..." }
+```
+
+Key differences from upstream:
+- **Check Runs API support** — Full CRUD for GitHub Checks API
+- **GitHub App authentication** — JWT + installation token flow
+- **Async/await** — Full Tokio-based async API
+- **App API** — `find_repo_installation()`, `create_installation_token()`
+
+---
+
+## Webhook Signature Verification
+
+See [webhook-receiver.md](webhook-receiver.md) for the full HMAC-SHA256
+verification flow.
+
+```rust
+fn verify_signature(secret: &[u8], signature: &str, body: &[u8]) -> bool {
+ let sig_bytes = match hex::decode(signature.trim_start_matches("sha256=")) {
+ Ok(b) => b,
+ Err(_) => return false,
+ };
+
+ let mut mac = Hmac::<Sha256>::new_from_slice(secret).unwrap();
+ mac.update(body);
+ mac.verify_slice(&sig_bytes).is_ok()
+}
+```
+
+---
+
+## Rate Limiting
+
+The GitHub API has rate limits (5000 requests/hour for GitHub App installations).
+Tickborg mitigates this by:
+
+1. **Caching installation tokens** — Reused until 5 minutes before expiry.
+2. **Minimal API calls** — Only essential status updates and label operations.
+3. **Batching** — Label additions batched into single API calls where possible.
+4. **Backoff on 403** — When rate-limited, jobs are `NackRequeue`'d for retry.
diff --git a/docs/handbook/ofborg/message-system.md b/docs/handbook/ofborg/message-system.md
new file mode 100644
index 0000000000..197152737d
--- /dev/null
+++ b/docs/handbook/ofborg/message-system.md
@@ -0,0 +1,731 @@
+# Tickborg — Message System
+
+## Overview
+
+Tickborg's entire architecture is built on **AMQP 0-9-1** messaging via
+**RabbitMQ**. Every component is a standalone binary that communicates
+exclusively through message queues. There is no shared database, no direct
+RPC between services, and no in-memory coupling.
+
+This document covers:
+- The AMQP topology (exchanges, queues, bindings)
+- Message types and their serialization
+- Publishing and consuming patterns
+- The worker abstraction layer
+
+---
+
+## Exchanges
+
+Tickborg uses four RabbitMQ exchanges:
+
+### `github-events` (Topic Exchange)
+
+**Declared by:** `github-webhook-receiver`
+
+The primary ingestion exchange. All GitHub webhook payloads are published here
+with routing keys of the form `{event_type}.{owner}/{repo}`.
+
+```rust
+chan.declare_exchange(easyamqp::ExchangeConfig {
+ exchange: "github-events".to_owned(),
+ exchange_type: easyamqp::ExchangeType::Topic,
+ passive: false,
+ durable: true,
+ auto_delete: false,
+ no_wait: false,
+ internal: false,
+}).await?;
+```
+
+**Routing key patterns:**
+
+| Pattern | Example | Consumer |
+|---------|---------|----------|
+| `pull_request.*` | `pull_request.project-tick/Project-Tick` | evaluation-filter |
+| `issue_comment.*` | `issue_comment.project-tick/Project-Tick` | github-comment-filter |
+| `push.*` | `push.project-tick/Project-Tick` | push-filter |
+| `unknown.*` | `unknown.project-tick/Project-Tick` | (monitoring) |
+
+### `build-jobs` (Fanout Exchange)
+
+**Declared by:** `github-comment-filter`, `builder`, `push-filter`
+
+Distributes build jobs to all connected builder instances. As a **fanout**
+exchange, every bound queue receives a copy of every message.
+
+```rust
+chan.declare_exchange(easyamqp::ExchangeConfig {
+ exchange: "build-jobs".to_owned(),
+ exchange_type: easyamqp::ExchangeType::Fanout,
+ passive: false,
+ durable: true,
+ auto_delete: false,
+ no_wait: false,
+ internal: false,
+}).await?;
+```
+
+### `build-results` (Fanout Exchange)
+
+**Declared by:** `github-comment-filter`, `github-comment-poster`, `push-filter`
+
+Collects build results (both "queued" notifications and "completed" results).
+The `github-comment-poster` consumes from this to create GitHub Check Runs.
+
+```rust
+chan.declare_exchange(easyamqp::ExchangeConfig {
+ exchange: "build-results".to_owned(),
+ exchange_type: easyamqp::ExchangeType::Fanout,
+ passive: false,
+ durable: true,
+ auto_delete: false,
+ no_wait: false,
+ internal: false,
+}).await?;
+```
+
+### `logs` (Topic Exchange)
+
+**Declared by:** `log-message-collector`
+
+Receives streaming build log messages from builders. The routing key encodes
+the repository and PR/push identifier.
+
+```rust
+chan.declare_exchange(easyamqp::ExchangeConfig {
+ exchange: "logs".to_owned(),
+ exchange_type: easyamqp::ExchangeType::Topic,
+ passive: false,
+ durable: true,
+ auto_delete: false,
+ no_wait: false,
+ internal: false,
+}).await?;
+```
+
+### `stats` (Fanout Exchange)
+
+**Declared by:** `stats`
+
+Receives operational metric events from all workers. The stats collector
+aggregates these into Prometheus-compatible metrics.
+
+```rust
+chan.declare_exchange(easyamqp::ExchangeConfig {
+ exchange: "stats".to_owned(),
+ exchange_type: easyamqp::ExchangeType::Fanout,
+ passive: false,
+ durable: true,
+ auto_delete: false,
+ no_wait: false,
+ internal: false,
+}).await?;
+```
+
+---
+
+## Queues
+
+### Durable Queues
+
+| Queue Name | Exchange | Routing Key | Consumer |
+|------------|----------|-------------|----------|
+| `build-inputs` | `github-events` | `issue_comment.*` | github-comment-filter |
+| `github-events-unknown` | `github-events` | `unknown.*` | (monitoring) |
+| `mass-rebuild-check-inputs` | `github-events` | `pull_request.*` | evaluation-filter |
+| `push-build-inputs` | `github-events` | `push.*` | push-filter |
+| `mass-rebuild-check-jobs` | (direct publish) | — | mass-rebuilder |
+| `build-inputs-x86_64-linux` | `build-jobs` | — | builder (x86_64-linux) |
+| `build-inputs-aarch64-linux` | `build-jobs` | — | builder (aarch64-linux) |
+| `build-inputs-x86_64-darwin` | `build-jobs` | — | builder (x86_64-darwin) |
+| `build-inputs-aarch64-darwin` | `build-jobs` | — | builder (aarch64-darwin) |
+| `build-inputs-x86_64-windows` | `build-jobs` | — | builder (x86_64-windows) |
+| `build-inputs-aarch64-windows` | `build-jobs` | — | builder (aarch64-windows) |
+| `build-inputs-x86_64-freebsd` | `build-jobs` | — | builder (x86_64-freebsd) |
+| `build-results` | `build-results` | — | github-comment-poster |
+| `stats-events` | `stats` | — | stats |
+
+### Ephemeral Queues
+
+| Queue Name | Exchange | Routing Key | Consumer |
+|------------|----------|-------------|----------|
+| `logs` | `logs` | `*.*` | log-message-collector |
+
+The `logs` queue is declared `durable: false, exclusive: true, auto_delete:
+true`. This means:
+- It only exists while the log collector is connected.
+- If the log collector disconnects, the queue is deleted.
+- Log messages published while no collector is connected are lost.
+- This is intentional: logs are not critical path data and the exchange itself
+ is durable.
+
+---
+
+## Message Types
+
+All messages are serialized as JSON using `serde_json`.
+
+### `EvaluationJob`
+
+**Published by:** evaluation-filter, github-comment-filter
+**Consumed by:** mass-rebuilder
+**Queue:** `mass-rebuild-check-jobs`
+
+```rust
+// message/evaluationjob.rs
+#[derive(Serialize, Deserialize, Debug)]
+pub struct EvaluationJob {
+ pub repo: Repo,
+ pub pr: Pr,
+}
+```
+
+Example JSON:
+
+```json
+{
+ "repo": {
+ "owner": "project-tick",
+ "name": "Project-Tick",
+ "full_name": "project-tick/Project-Tick",
+ "clone_url": "https://github.com/project-tick/Project-Tick.git"
+ },
+ "pr": {
+ "number": 42,
+ "head_sha": "abc123def456...",
+ "target_branch": "main"
+ }
+}
+```
+
+### `BuildJob`
+
+**Published by:** github-comment-filter, mass-rebuilder, push-filter
+**Consumed by:** builder
+**Queue:** `build-inputs-{system}`
+
+```rust
+// message/buildjob.rs
+#[derive(Serialize, Deserialize, Debug)]
+pub struct BuildJob {
+ pub repo: Repo,
+ pub pr: Pr,
+ pub subset: Option<Subset>,
+ pub attrs: Vec<String>,
+ pub request_id: String,
+ pub logs: Option<ExchangeQueue>,
+ pub statusreport: Option<ExchangeQueue>,
+ pub push: Option<PushTrigger>,
+}
+```
+
+The `logs` and `statusreport` fields are tuples of `(Option<Exchange>,
+Option<RoutingKey>)` that tell the builder where to send log messages and
+build results.
+
+Two constructors exist:
+
+```rust
+// For PR-triggered builds
+impl BuildJob {
+ pub fn new(
+ repo: Repo, pr: Pr, subset: Subset, attrs: Vec<String>,
+ logs: Option<ExchangeQueue>, statusreport: Option<ExchangeQueue>,
+ request_id: String,
+ ) -> BuildJob;
+
+ // For push-triggered builds
+ pub fn new_push(
+ repo: Repo, push: PushTrigger, attrs: Vec<String>,
+ request_id: String,
+ ) -> BuildJob;
+
+ pub fn is_push(&self) -> bool;
+}
+```
+
+### `QueuedBuildJobs`
+
+**Published by:** github-comment-filter, push-filter
+**Consumed by:** github-comment-poster
+**Exchange/Queue:** `build-results`
+
+```rust
+#[derive(Serialize, Deserialize, Debug)]
+pub struct QueuedBuildJobs {
+ pub job: BuildJob,
+ pub architectures: Vec<String>,
+}
+```
+
+This message tells the comment poster that builds have been queued so it can
+create "Queued" check runs on GitHub.
+
+### `BuildResult`
+
+**Published by:** builder
+**Consumed by:** github-comment-poster, log-message-collector
+**Exchange/Queue:** `build-results`, `logs`
+
+```rust
+// message/buildresult.rs
+#[derive(Serialize, Deserialize, Debug)]
+pub enum BuildResult {
+ V1 {
+ tag: V1Tag,
+ repo: Repo,
+ pr: Pr,
+ system: String,
+ output: Vec<String>,
+ attempt_id: String,
+ request_id: String,
+ status: BuildStatus,
+ skipped_attrs: Option<Vec<String>>,
+ attempted_attrs: Option<Vec<String>>,
+ push: Option<PushTrigger>,
+ },
+ Legacy { /* backward compat */ },
+}
+```
+
+```rust
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
+pub enum BuildStatus {
+ Skipped,
+ Success,
+ Failure,
+ TimedOut,
+ HashMismatch,
+ UnexpectedError { err: String },
+}
+```
+
+### `BuildLogMsg`
+
+**Published by:** builder
+**Consumed by:** log-message-collector
+**Exchange:** `logs`
+
+```rust
+// message/buildlogmsg.rs
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct BuildLogMsg {
+ pub system: String,
+ pub identity: String,
+ pub attempt_id: String,
+ pub line_number: u64,
+ pub output: String,
+}
+```
+
+### `BuildLogStart`
+
+**Published by:** builder
+**Consumed by:** log-message-collector
+**Exchange:** `logs`
+
+```rust
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct BuildLogStart {
+ pub system: String,
+ pub identity: String,
+ pub attempt_id: String,
+ pub attempted_attrs: Option<Vec<String>>,
+ pub skipped_attrs: Option<Vec<String>>,
+}
+```
+
+### `EventMessage`
+
+**Published by:** all workers (via `stats::RabbitMq`)
+**Consumed by:** stats
+**Exchange:** `stats`
+
+```rust
+// stats.rs
+#[derive(Serialize, Deserialize, Debug)]
+pub struct EventMessage {
+ pub sender: String,
+ pub events: Vec<Event>,
+}
+```
+
+---
+
+## Common Message Structures
+
+### `Repo`
+
+```rust
+// message/common.rs
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Repo {
+ pub owner: String,
+ pub name: String,
+ pub full_name: String,
+ pub clone_url: String,
+}
+```
+
+### `Pr`
+
+```rust
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub struct Pr {
+ pub target_branch: Option<String>,
+ pub number: u64,
+ pub head_sha: String,
+}
+```
+
+For push-triggered builds, `pr.number` is set to `0` and `pr.head_sha`
+contains the push commit SHA.
+
+### `PushTrigger`
+
+```rust
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub struct PushTrigger {
+ pub head_sha: String,
+ pub branch: String,
+ pub before_sha: Option<String>,
+}
+```
+
+---
+
+## Publishing Messages
+
+### The `publish_serde_action` Helper
+
+```rust
+// worker.rs
+pub fn publish_serde_action<T: Serialize + ?Sized>(
+ exchange: Option<String>,
+ routing_key: Option<String>,
+ msg: &T,
+) -> Action {
+ Action::Publish(Arc::new(QueueMsg {
+ exchange,
+ routing_key,
+ mandatory: false,
+ immediate: false,
+ content_type: Some("application/json".to_owned()),
+ content: serde_json::to_string(&msg).unwrap().into_bytes(),
+ }))
+}
+```
+
+This is the primary way workers produce outgoing messages. The message is
+serialized to JSON and wrapped in a `QueueMsg` which is then wrapped in an
+`Action::Publish`.
+
+### Message Delivery
+
+The `action_deliver` function in `easylapin.rs` handles all action types:
+
+```rust
+async fn action_deliver(
+ chan: &Channel, deliver: &Delivery, action: Action,
+) -> Result<(), lapin::Error> {
+ match action {
+ Action::Ack => {
+ chan.basic_ack(deliver.delivery_tag, BasicAckOptions::default()).await
+ }
+ Action::NackRequeue => {
+ chan.basic_nack(deliver.delivery_tag,
+ BasicNackOptions { requeue: true, ..Default::default() }).await
+ }
+ Action::NackDump => {
+ chan.basic_nack(deliver.delivery_tag,
+ BasicNackOptions::default()).await
+ }
+ Action::Publish(msg) => {
+ let exch = msg.exchange.as_deref().unwrap_or("");
+ let key = msg.routing_key.as_deref().unwrap_or("");
+
+ let mut props = BasicProperties::default()
+ .with_delivery_mode(2); // persistent
+
+ if let Some(s) = msg.content_type.as_deref() {
+ props = props.with_content_type(s.into());
+ }
+
+ chan.basic_publish(
+ exch.into(), key.into(),
+ BasicPublishOptions::default(),
+ &msg.content, props,
+ ).await?.await?;
+ Ok(())
+ }
+ }
+}
+```
+
+Key details:
+- **delivery_mode = 2**: All published messages are persistent.
+- The double `.await` on `basic_publish`: the first await sends the message,
+ the second awaits the publisher confirm from the broker.
+- When `exchange` is `None`, an empty string is used (the default exchange).
+- When `routing_key` is `None`, an empty string is used.
+
+---
+
+## Consuming Messages
+
+### Consumer Loop (SimpleWorker)
+
+```rust
+// easylapin.rs
+impl<'a, W: SimpleWorker + 'a> ConsumerExt<'a, W> for Channel {
+ async fn consume(self, mut worker: W, config: ConsumeConfig)
+ -> Result<Self::Handle, Self::Error>
+ {
+ let mut consumer = self.basic_consume(
+ config.queue.into(),
+ config.consumer_tag.into(),
+ BasicConsumeOptions::default(),
+ FieldTable::default(),
+ ).await?;
+
+ Ok(Box::pin(async move {
+ while let Some(Ok(deliver)) = consumer.next().await {
+ let job = worker.msg_to_job(
+ deliver.routing_key.as_str(),
+ &content_type,
+ &deliver.data,
+ ).await.expect("worker unexpected message consumed");
+
+ for action in worker.consumer(&job).await {
+ action_deliver(&self, &deliver, action)
+ .await.expect("action deliver failure");
+ }
+ }
+ }))
+ }
+}
+```
+
+### Consumer Loop (SimpleNotifyWorker)
+
+```rust
+impl<'a, W: SimpleNotifyWorker + 'a + Send> ConsumerExt<'a, W> for NotifyChannel {
+ async fn consume(self, worker: W, config: ConsumeConfig)
+ -> Result<Self::Handle, Self::Error>
+ {
+ self.0.basic_qos(1, BasicQosOptions::default()).await?;
+
+ let mut consumer = self.0.basic_consume(/* ... */).await?;
+
+ Ok(Box::pin(async move {
+ while let Some(Ok(deliver)) = consumer.next().await {
+ let receiver = ChannelNotificationReceiver {
+ channel: chan.clone(),
+ deliver,
+ };
+
+ let job = worker.msg_to_job(
+ receiver.deliver.routing_key.as_str(),
+ &content_type,
+ &receiver.deliver.data,
+ ).expect("worker unexpected message consumed");
+
+ worker.consumer(job, Arc::new(receiver)).await;
+ }
+ }))
+ }
+}
+```
+
+### Prefetch (QoS)
+
+- **`WorkerChannel`** and **`NotifyChannel`** both set `basic_qos(1)`.
+ This means the broker will only deliver one unacknowledged message at a time
+ to each consumer. This provides fair dispatch when multiple instances consume
+ from the same queue.
+- **Raw `Channel`** has no prefetch limit set. This is used by the log
+ collector which benefits from prefetching many small messages.
+
+---
+
+## Message Routing Diagram
+
+```
+ github-events (Topic)
+ ┌───────────────────────────────────────────┐
+ │ │
+ │ issue_comment.* ──► build-inputs │
+ │ pull_request.* ──► mass-rebuild-check- │
+ │ inputs │
+ │ push.* ──► push-build-inputs │
+ │ unknown.* ──► github-events- │
+ │ unknown │
+ └───────────────────────────────────────────┘
+
+ build-jobs (Fanout)
+ ┌───────────────────────────────────────────┐
+ │ │
+ │ ──► build-inputs-x86_64-linux │
+ │ ──► build-inputs-aarch64-linux │
+ │ ──► build-inputs-x86_64-darwin │
+ │ ──► build-inputs-aarch64-darwin │
+ │ ──► build-inputs-x86_64-windows │
+ │ ──► build-inputs-aarch64-windows │
+ │ ──► build-inputs-x86_64-freebsd │
+ └───────────────────────────────────────────┘
+
+ build-results (Fanout)
+ ┌───────────────────────────────────────────┐
+ │ ──► build-results │
+ └───────────────────────────────────────────┘
+
+ logs (Topic)
+ ┌───────────────────────────────────────────┐
+ │ *.* ──► logs (ephemeral) │
+ └───────────────────────────────────────────┘
+
+ stats (Fanout)
+ ┌───────────────────────────────────────────┐
+ │ ──► stats-events │
+ └───────────────────────────────────────────┘
+```
+
+---
+
+## Direct Queue Publishing
+
+Some messages bypass exchanges and are published directly to queues:
+
+| Source | Target Queue | Method |
+|--------|-------------|--------|
+| evaluation-filter | `mass-rebuild-check-jobs` | `publish_serde_action(None, Some("mass-rebuild-check-jobs"))` |
+| github-comment-filter | `build-inputs-{system}` | `publish_serde_action(None, Some("build-inputs-x86_64-linux"))` |
+| push-filter | `build-inputs-{system}` | `publish_serde_action(None, Some("build-inputs-x86_64-linux"))` |
+
+When the exchange is `None` (empty string `""`), AMQP uses the **default
+exchange**, which routes messages directly to the queue named by the routing key.
+
+---
+
+## Message Acknowledgment Patterns
+
+### Typical Worker Flow
+
+```
+1. Receive message from queue
+2. Deserialize (msg_to_job)
+3. Process (consumer)
+4. Return [Action::Publish(...), Action::Publish(...), Action::Ack]
+5. All Publish actions are executed
+6. Final Ack removes the message from the queue
+```
+
+### Error Handling
+
+| Situation | Action | Effect |
+|-----------|--------|--------|
+| Job decoded, processed successfully | `Ack` | Message removed from queue |
+| Temporary error (e.g., expired creds) | `NackRequeue` | Message returned to queue for retry |
+| Permanent error (e.g., force-pushed) | `Ack` | Message discarded (no point retrying) |
+| Decode failure | `panic!` or `Err` | Consumer thread crashes (message stays in queue) |
+
+### Builder Flow (Notify Worker)
+
+```
+1. Receive message
+2. Deserialize (msg_to_job)
+3. Begin build
+4. notifier.tell(Publish(BuildLogStart)) → logs exchange
+5. For each line of build output:
+ notifier.tell(Publish(BuildLogMsg)) → logs exchange
+6. notifier.tell(Publish(BuildResult)) → build-results exchange
+7. notifier.tell(Ack) → acknowledge original message
+```
+
+---
+
+## Connection Management
+
+### Creating a Connection
+
+```rust
+// easylapin.rs
+pub async fn from_config(cfg: &RabbitMqConfig) -> Result<Connection, lapin::Error> {
+ let opts = ConnectionProperties::default()
+ .with_client_property("tickborg_version".into(), tickborg::VERSION.into());
+ Connection::connect(&cfg.as_uri()?, opts).await
+}
+```
+
+The connection URI is constructed from the config:
+
+```rust
+impl RabbitMqConfig {
+ pub fn as_uri(&self) -> Result<String, std::io::Error> {
+ let password = std::fs::read_to_string(&self.password_file)?;
+ Ok(format!(
+ "{}://{}:{}@{}/{}",
+ if self.ssl { "amqps" } else { "amqp" },
+ self.username, password, self.host,
+ self.virtualhost.clone().unwrap_or_else(|| "/".to_owned()),
+ ))
+ }
+}
+```
+
+### Channel Creation
+
+Each binary creates one or more channels from its connection:
+
+```rust
+let conn = easylapin::from_config(&cfg.rabbitmq).await?;
+let mut chan = conn.create_channel().await?;
+```
+
+The builder creates one channel per system architecture:
+
+```rust
+for system in &cfg.build.system {
+ handles.push(create_handle(&conn, &cfg, system.to_string()).await?);
+}
+// Each create_handle call does: conn.create_channel().await?
+```
+
+### Connection Lifecycle
+
+Connections are held for the lifetime of the process. When the main consumer
+future completes (all messages consumed or an error), the connection is dropped:
+
+```rust
+handle.await;
+drop(conn); // Close connection.
+info!("Closed the session... EOF");
+```
+
+---
+
+## Consumer Tags
+
+Each consumer is identified by a unique tag derived from the runner identity:
+
+```rust
+easyamqp::ConsumeConfig {
+ queue: queue_name.clone(),
+ consumer_tag: format!("{}-builder", cfg.whoami()),
+ // ...
+}
+```
+
+Where `whoami()` returns `"{identity}-{system}"`:
+
+```rust
+impl Config {
+ pub fn whoami(&self) -> String {
+ format!("{}-{}", self.runner.identity, self.build.system.join(","))
+ }
+}
+```
+
+This ensures that consumer tags are unique across multiple instances and
+architectures.
diff --git a/docs/handbook/ofborg/overview.md b/docs/handbook/ofborg/overview.md
new file mode 100644
index 0000000000..51cc18cb83
--- /dev/null
+++ b/docs/handbook/ofborg/overview.md
@@ -0,0 +1,571 @@
+# Tickborg (ofborg) — Overview
+
+## What is Tickborg?
+
+Tickborg is the distributed Continuous Integration (CI) bot purpose-built for the
+**Project Tick monorepo**. It is a Rust-based system derived from the original
+[ofborg](https://github.com/NixOS/ofborg) — a CI system created for the NixOS
+project — and adapted for the multi-project, multi-language, multi-platform
+reality of Project Tick.
+
+Where the original ofborg was tightly coupled to Nix package evaluation, tickborg
+has been generalised to handle arbitrary build systems (CMake, Meson, Autotools,
+Cargo, Gradle, Make, and custom commands) while retaining the proven AMQP-based
+distributed worker architecture that made ofborg reliable at scale.
+
+The crate name remains **`tickborg`** in code, the workspace lives under
+`ofborg/` in the Project Tick tree, and the bot responds to the handle
+**`@tickbot`** in GitHub comments.
+
+---
+
+## High-Level Goals
+
+| Goal | How Tickborg achieves it |
+|------|--------------------------|
+| **Automated PR evaluation** | Every opened / synchronised PR is evaluated for which sub-projects changed and builds are scheduled automatically. |
+| **On-demand builds** | Maintainers comment `@tickbot build <attr>` or `@tickbot eval` on a PR to trigger builds or re-evaluations. |
+| **Push-triggered CI** | Direct pushes to protected branches (`main`, `staging`, etc.) are detected and build jobs are dispatched. |
+| **Multi-platform builds** | Builds can be fanned out to `x86_64-linux`, `aarch64-linux`, `x86_64-darwin`, `aarch64-darwin`, `x86_64-windows`, `aarch64-windows`, and `x86_64-freebsd`. |
+| **GitHub Check Runs** | Build results are reported back via the GitHub Checks API, giving inline status on every PR. |
+| **Build log collection** | Build output is streamed over AMQP to a central log collector and served via a log viewer web UI. |
+| **Prometheus metrics** | Operational statistics are published to RabbitMQ and exposed on a `/metrics`-compatible HTTP endpoint. |
+
+---
+
+## Design Principles
+
+### 1. Message-Oriented Architecture
+
+Every component communicates exclusively through **RabbitMQ (AMQP 0-9-1)**
+messages. There is no shared database, no direct RPC between services, and no
+in-memory coupling between workers. This means:
+
+- Each worker binary can be deployed, scaled, and restarted independently.
+- Work is durable — RabbitMQ queues are declared `durable: true` and messages
+ are published with `delivery_mode: 2` (persistent).
+- Load balancing is implicit: multiple builder instances consuming from the same
+ queue will each receive a fair share of jobs via `basic_qos(1)`.
+
+### 2. Worker Trait Abstraction
+
+All business logic is expressed through two traits:
+
+```rust
+// tickborg/src/worker.rs
+pub trait SimpleWorker: Send {
+ type J: Send;
+ fn consumer(&mut self, job: &Self::J) -> impl Future<Output = Actions>;
+ fn msg_to_job(
+ &mut self, method: &str, headers: &Option<String>, body: &[u8],
+ ) -> impl Future<Output = Result<Self::J, String>>;
+}
+```
+
+```rust
+// tickborg/src/notifyworker.rs
+#[async_trait]
+pub trait SimpleNotifyWorker {
+ type J;
+ async fn consumer(
+ &self, job: Self::J,
+ notifier: Arc<dyn NotificationReceiver + Send + Sync>,
+ );
+ fn msg_to_job(
+ &self, routing_key: &str, content_type: &Option<String>, body: &[u8],
+ ) -> Result<Self::J, String>;
+}
+```
+
+`SimpleWorker` is for purely functional message processors: receive a message,
+return a list of `Action`s. `SimpleNotifyWorker` is for long-running tasks (like
+builds) that need to stream intermediate results back during processing.
+
+### 3. One Binary per Concern
+
+Each responsibility is compiled into its own binary target under
+`tickborg/src/bin/`:
+
+| Binary | Role |
+|--------|------|
+| `github-webhook-receiver` | HTTP server that validates GitHub webhook payloads, verifies HMAC-SHA256 signatures, and publishes them to the `github-events` exchange. |
+| `evaluation-filter` | Consumes `pull_request.*` events and decides whether a PR warrants evaluation. Publishes `EvaluationJob` to `mass-rebuild-check-jobs`. |
+| `github-comment-filter` | Consumes `issue_comment.*` events, parses `@tickbot` commands, and publishes `BuildJob` messages. |
+| `github-comment-poster` | Consumes `build-results` and creates GitHub Check Runs. |
+| `mass-rebuilder` | Performs full monorepo evaluation on a PR checkout: detects changed projects, schedules builds. |
+| `builder` | Executes actual builds using the configured build system (CMake, Cargo, etc.) and reports results. |
+| `push-filter` | Consumes `push.*` events and creates build jobs for pushes to tracked branches. |
+| `log-message-collector` | Collects streaming build log messages and writes them to disk. |
+| `logapi` | HTTP server that serves collected build logs via a REST API. |
+| `stats` | Collects stat events from RabbitMQ and exposes Prometheus metrics on port 9898. |
+| `build-faker` | Development/testing tool that publishes fake build jobs. |
+
+---
+
+## Key Data Structures
+
+### Repo
+
+```rust
+// tickborg/src/message/common.rs
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Repo {
+ pub owner: String,
+ pub name: String,
+ pub full_name: String,
+ pub clone_url: String,
+}
+```
+
+### Pr
+
+```rust
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub struct Pr {
+ pub target_branch: Option<String>,
+ pub number: u64,
+ pub head_sha: String,
+}
+```
+
+### PushTrigger
+
+```rust
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+pub struct PushTrigger {
+ pub head_sha: String,
+ pub branch: String,
+ pub before_sha: Option<String>,
+}
+```
+
+### BuildJob
+
+```rust
+// tickborg/src/message/buildjob.rs
+#[derive(Serialize, Deserialize, Debug)]
+pub struct BuildJob {
+ pub repo: Repo,
+ pub pr: Pr,
+ pub subset: Option<Subset>,
+ pub attrs: Vec<String>,
+ pub request_id: String,
+ pub logs: Option<ExchangeQueue>,
+ pub statusreport: Option<ExchangeQueue>,
+ pub push: Option<PushTrigger>,
+}
+```
+
+### BuildResult
+
+```rust
+// tickborg/src/message/buildresult.rs
+#[derive(Serialize, Deserialize, Debug)]
+pub enum BuildResult {
+ V1 {
+ tag: V1Tag,
+ repo: Repo,
+ pr: Pr,
+ system: String,
+ output: Vec<String>,
+ attempt_id: String,
+ request_id: String,
+ status: BuildStatus,
+ skipped_attrs: Option<Vec<String>>,
+ attempted_attrs: Option<Vec<String>>,
+ push: Option<PushTrigger>,
+ },
+ Legacy { /* ... backward compat ... */ },
+}
+```
+
+### BuildStatus
+
+```rust
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
+pub enum BuildStatus {
+ Skipped,
+ Success,
+ Failure,
+ TimedOut,
+ HashMismatch,
+ UnexpectedError { err: String },
+}
+```
+
+---
+
+## Supported Build Systems
+
+The `BuildExecutor` struct in `tickborg/src/buildtool.rs` supports:
+
+```rust
+pub enum BuildSystem {
+ CMake,
+ Meson,
+ Autotools,
+ Cargo,
+ Gradle,
+ Make,
+ Custom { command: String },
+}
+```
+
+For each build system, tickborg knows how to invoke the configure, build, and
+test phases. A `ProjectBuildConfig` ties a sub-project to its build system:
+
+```rust
+pub struct ProjectBuildConfig {
+ pub name: String,
+ pub path: String,
+ pub build_system: BuildSystem,
+ pub build_timeout_seconds: u16,
+ pub configure_args: Vec<String>,
+ pub build_args: Vec<String>,
+ pub test_command: Option<Vec<String>>,
+}
+```
+
+---
+
+## Supported Platforms (Systems)
+
+```rust
+// tickborg/src/systems.rs
+pub enum System {
+ X8664Linux,
+ Aarch64Linux,
+ X8664Darwin,
+ Aarch64Darwin,
+ X8664Windows,
+ Aarch64Windows,
+ X8664FreeBSD,
+}
+```
+
+Primary CI platforms (used for untrusted users):
+
+- `x86_64-linux`
+- `x86_64-darwin`
+- `x86_64-windows`
+
+Trusted users get access to all seven platforms, including ARM and FreeBSD.
+
+---
+
+## Comment Parser
+
+Users interact with tickborg by posting comments on GitHub PRs/issues:
+
+```
+@tickbot build meshmc
+@tickbot eval
+@tickbot test mnv
+@tickbot build meshmc json4cpp neozip
+```
+
+The parser is implemented in `tickborg/src/commentparser.rs` using the `nom`
+parser combinator library. It produces:
+
+```rust
+pub enum Instruction {
+ Build(Subset, Vec<String>),
+ Test(Vec<String>),
+ Eval,
+}
+
+pub enum Subset {
+ Project,
+}
+```
+
+Multiple commands can appear in a single comment, even interspersed with prose:
+
+```markdown
+I noticed the target was broken — let's re-eval:
+@tickbot eval
+
+Also, try building meshmc:
+@tickbot build meshmc
+```
+
+---
+
+## Access Control (ACL)
+
+```rust
+// tickborg/src/acl.rs
+pub struct Acl {
+ trusted_users: Option<Vec<String>>,
+ repos: Vec<String>,
+}
+```
+
+- `repos` — list of GitHub repositories tickborg is responsible for.
+- `trusted_users` — users who can build on *all* architectures (including ARM,
+ FreeBSD). When `None` (disabled), everyone gets unrestricted access.
+- Non-trusted users only build on primary platforms.
+
+```rust
+impl Acl {
+ pub fn is_repo_eligible(&self, name: &str) -> bool;
+ pub fn build_job_architectures_for_user_repo(
+ &self, user: &str, repo: &str
+ ) -> Vec<System>;
+ pub fn can_build_unrestricted(&self, user: &str, repo: &str) -> bool;
+}
+```
+
+---
+
+## Project Tagger
+
+The `ProjectTagger` in `tickborg/src/tagger.rs` analyses changed files in a PR
+and generates labels:
+
+```rust
+pub struct ProjectTagger {
+ selected: Vec<String>,
+}
+
+impl ProjectTagger {
+ pub fn analyze_changes(&mut self, changed_files: &[String]);
+ pub fn tags_to_add(&self) -> Vec<String>;
+}
+```
+
+It produces labels like:
+- `project: meshmc`
+- `project: mnv`
+- `scope: ci`
+- `scope: docs`
+- `scope: root`
+
+---
+
+## The Monorepo Evaluation Strategy
+
+When a PR is evaluated, the `MonorepoStrategy` in
+`tickborg/src/tasks/eval/monorepo.rs` implements the `EvaluationStrategy` trait:
+
+```rust
+pub trait EvaluationStrategy {
+ fn pre_clone(&mut self) -> impl Future<Output = StepResult<()>>;
+ fn on_target_branch(&mut self, co: &Path, status: &mut CommitStatus)
+ -> impl Future<Output = StepResult<()>>;
+ fn after_fetch(&mut self, co: &CachedProjectCo) -> StepResult<()>;
+ fn after_merge(&mut self, status: &mut CommitStatus)
+ -> impl Future<Output = StepResult<()>>;
+ fn evaluation_checks(&self) -> Vec<EvalChecker>;
+ fn all_evaluations_passed(&mut self, status: &mut CommitStatus)
+ -> impl Future<Output = StepResult<EvaluationComplete>>;
+}
+```
+
+The strategy:
+
+1. Labels the PR from its title (extracting project names like `meshmc`,
+ `mnv`, etc. using regex word boundaries).
+2. Parses Conventional Commit messages to find affected scopes.
+3. Uses file-change detection to identify which sub-projects changed.
+4. Returns an `EvaluationComplete` containing `BuildJob`s to be dispatched.
+
+---
+
+## How It All Fits Together
+
+```
+GitHub Webhook
+ │
+ ▼
+┌──────────────────┐
+│ Webhook Receiver │──► github-events (Topic Exchange)
+└──────────────────┘ │
+ ┌─────────────────┼──────────────────┐
+ ▼ ▼ ▼
+ ┌─────────────┐ ┌───────────────┐ ┌──────────────┐
+ │ Eval Filter │ │ Comment Filter│ │ Push Filter │
+ └──────┬──────┘ └──────┬────────┘ └──────┬───────┘
+ │ │ │
+ ▼ ▼ ▼
+ mass-rebuild- build-jobs build-inputs-*
+ check-jobs (Fanout) queues
+ │ │ │
+ ▼ ▼ ▼
+ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
+ │Mass Rebuilder │ │ Builder │ │ Builder │
+ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘
+ │ │ │
+ └────────┬────────┘ │
+ ▼ ▼
+ build-results build-results
+ (Fanout Exchange) (Fanout Exchange)
+ │
+ ▼
+ ┌────────────────┐ ┌──────────────────┐
+ │ Comment Poster │ │ Log Collector │
+ └────────────────┘ └──────────────────┘
+ │ │
+ ▼ ▼
+ GitHub Checks API /var/log/tickborg/
+```
+
+---
+
+## Repository Layout
+
+```
+ofborg/
+├── Cargo.toml # Workspace root
+├── Cargo.lock # Pinned dependency versions
+├── docker-compose.yml # Full stack for local dev / production
+├── Dockerfile # Multi-stage build for all binaries
+├── service.nix # NixOS module for systemd services
+├── flake.nix # Nix flake for dev shell & building
+├── example.config.json # Example configuration file
+├── config.production.json # Production config template
+├── config.public.json # Public (non-secret) config
+├── deploy/ # Deployment scripts
+├── doc/ # Legacy upstream docs
+├── ofborg/ # Original ofborg crate (deprecated)
+├── ofborg-simple-build/ # Original simple build (deprecated)
+├── ofborg-viewer/ # Log viewer web UI (JavaScript)
+├── tickborg/ # Main crate
+│ ├── Cargo.toml # Crate manifest with all dependencies
+│ ├── build.rs # Build script (generates events.rs)
+│ ├── src/
+│ │ ├── lib.rs # Library root — module declarations
+│ │ ├── bin/ # Binary entry points (11 binaries)
+│ │ ├── acl.rs # Access control lists
+│ │ ├── asynccmd.rs # Async command execution
+│ │ ├── buildtool.rs # Build system abstraction
+│ │ ├── checkout.rs # Git checkout / caching
+│ │ ├── clone.rs # Git clone trait
+│ │ ├── commentparser.rs # @tickbot command parser (nom)
+│ │ ├── commitstatus.rs # GitHub commit status wrapper
+│ │ ├── config.rs # Configuration types & loading
+│ │ ├── easyamqp.rs # AMQP config types & traits
+│ │ ├── easylapin.rs # lapin (AMQP) integration layer
+│ │ ├── evalchecker.rs # Generic command checker
+│ │ ├── files.rs # File utilities
+│ │ ├── ghevent/ # GitHub event type definitions
+│ │ ├── locks.rs # File-based locking
+│ │ ├── message/ # Message types (jobs, results, logs)
+│ │ ├── notifyworker.rs # Streaming notification worker trait
+│ │ ├── stats.rs # Metrics / event system
+│ │ ├── systems.rs # Platform / architecture enum
+│ │ ├── tagger.rs # PR label tagger
+│ │ ├── tasks/ # Task implementations
+│ │ ├── worker.rs # Core worker trait
+│ │ └── writetoline.rs # Line-based file writer
+│ ├── test-nix/ # Test fixtures (Nix-era, kept)
+│ ├── test-scratch/ # Scratch test data
+│ └── test-srcs/ # Test source data (JSON events)
+└── tickborg-simple-build/ # Simplified build tool crate
+ ├── Cargo.toml
+ └── src/
+```
+
+---
+
+## Technology Stack
+
+| Component | Technology |
+|-----------|-----------|
+| Language | Rust (Edition 2024) |
+| Async runtime | Tokio (multi-thread) |
+| AMQP client | lapin 4.3 |
+| HTTP server | hyper 1.0 + hyper-util |
+| JSON | serde + serde_json |
+| GitHub API | hubcaps (custom fork) |
+| Logging | tracing + tracing-subscriber |
+| Parser | nom 8 |
+| Cryptography | hmac + sha2 (webhook verification) |
+| Concurrency | parking_lot, tokio::sync |
+| UUID | uuid v4 |
+| Caching | lru-cache |
+| File locking | fs2 |
+| Date/time | chrono |
+
+---
+
+## Versioning
+
+The crate version is declared in `tickborg/Cargo.toml`:
+
+```toml
+[package]
+name = "tickborg"
+version = "0.1.0"
+```
+
+The version is accessible at runtime via:
+
+```rust
+pub const VERSION: &str = env!("CARGO_PKG_VERSION");
+```
+
+It is also embedded in the RabbitMQ connection properties:
+
+```rust
+let opts = ConnectionProperties::default()
+ .with_client_property("tickborg_version".into(), tickborg::VERSION.into());
+```
+
+---
+
+## Relation to the Original ofborg
+
+Tickborg was forked from ofborg (NixOS/ofborg) and adapted:
+
+| Aspect | ofborg | tickborg |
+|--------|--------|----------|
+| Purpose | Nix package evaluation for nixpkgs | Monorepo CI for Project Tick |
+| Build system | `nix-build` only | CMake, Meson, Cargo, Gradle, Make, Custom |
+| Bot handle | `@ofborg` | `@tickbot` |
+| Platforms | Linux, macOS | Linux, macOS, Windows, FreeBSD |
+| Evaluation | Nix expression evaluation | File-change detection + project mapping |
+| Package crate | `ofborg` | `tickborg` |
+
+The `ofborg/` and `ofborg-simple-build/` directories are kept for reference but
+are no longer compiled as part of the workspace.
+
+---
+
+## Quick Start (for developers)
+
+```bash
+# Enter the dev shell (requires Nix)
+nix develop ./ofborg
+
+# Or without Nix, ensure Rust 2024+ is installed
+cd ofborg
+cargo build --workspace
+
+# Run tests
+cargo test --workspace
+
+# Start local infra
+docker compose up -d rabbitmq
+```
+
+See [building.md](building.md) for comprehensive build instructions and
+[configuration.md](configuration.md) for setting up a config file.
+
+---
+
+## Further Reading
+
+- [architecture.md](architecture.md) — Crate structure, module hierarchy, worker pattern
+- [building.md](building.md) — Cargo build, dependencies, features, build targets
+- [webhook-receiver.md](webhook-receiver.md) — GitHub webhook handling
+- [message-system.md](message-system.md) — AMQP/RabbitMQ messaging
+- [build-executor.md](build-executor.md) — Build execution, build system abstraction
+- [evaluation-system.md](evaluation-system.md) — Monorepo evaluation, project detection
+- [github-integration.md](github-integration.md) — GitHub API interaction
+- [amqp-infrastructure.md](amqp-infrastructure.md) — RabbitMQ connection management
+- [deployment.md](deployment.md) — NixOS module, Docker Compose
+- [configuration.md](configuration.md) — Config file format, environment variables
+- [data-flow.md](data-flow.md) — End-to-end data flow
+- [code-style.md](code-style.md) — Rust coding conventions
+- [contributing.md](contributing.md) — Contribution guide
diff --git a/docs/handbook/ofborg/webhook-receiver.md b/docs/handbook/ofborg/webhook-receiver.md
new file mode 100644
index 0000000000..7eddf7173b
--- /dev/null
+++ b/docs/handbook/ofborg/webhook-receiver.md
@@ -0,0 +1,470 @@
+# Tickborg — Webhook Receiver
+
+## Overview
+
+The **GitHub Webhook Receiver** (`github-webhook-receiver`) is the entry point
+for all GitHub events into the tickborg system. It is an HTTP server that:
+
+1. Listens for incoming POST requests from GitHub's webhook delivery system.
+2. Validates the HMAC-SHA256 signature of every payload.
+3. Extracts the event type from the `X-Github-Event` header.
+4. Parses the payload to determine the target repository.
+5. Publishes the raw payload to the `github-events` RabbitMQ topic exchange.
+6. Declares and binds the downstream queues that other workers consume from.
+
+**Source file:** `tickborg/src/bin/github-webhook-receiver.rs`
+
+---
+
+## HTTP Server
+
+The webhook receiver uses **hyper 1.0** directly — no web framework is
+involved. The server is configured to listen on the address specified in the
+configuration file:
+
+```rust
+let addr: SocketAddr = listen.parse().expect("Invalid listen address");
+let listener = TcpListener::bind(addr).await?;
+```
+
+The main accept loop:
+
+```rust
+loop {
+ let (stream, _) = listener.accept().await?;
+ let io = TokioIo::new(stream);
+
+ let secret = webhook_secret.clone();
+ let chan = chan.clone();
+
+ tokio::task::spawn(async move {
+ let service = service_fn(move |req| {
+ handle_request(req, secret.clone(), chan.clone())
+ });
+ http1::Builder::new().serve_connection(io, service).await
+ });
+}
+```
+
+Each incoming connection is spawned as an independent tokio task. The service
+function (`handle_request`) processes one request at a time per connection.
+
+---
+
+## Request Handling
+
+### HTTP Method Validation
+
+```rust
+if req.method() != Method::POST {
+ return Ok(empty_response(StatusCode::METHOD_NOT_ALLOWED));
+}
+```
+
+Only `POST` requests are accepted. Any other method receives a `405 Method Not
+Allowed`.
+
+### Header Extraction
+
+Three headers are extracted before consuming the request body:
+
+```rust
+let sig_header = req.headers().get("X-Hub-Signature-256")
+ .and_then(|v| v.to_str().ok())
+ .map(|s| s.to_string());
+
+let event_type = req.headers().get("X-Github-Event")
+ .and_then(|v| v.to_str().ok())
+ .map(|s| s.to_string());
+
+let content_type = req.headers().get("Content-Type")
+ .and_then(|v| v.to_str().ok())
+ .map(|s| s.to_string());
+```
+
+### Body Collection
+
+```rust
+let raw = match req.collect().await {
+ Ok(collected) => collected.to_bytes(),
+ Err(e) => {
+ warn!("Failed to read body from client: {e}");
+ return Ok(response(StatusCode::INTERNAL_SERVER_ERROR, "Failed to read body"));
+ }
+};
+```
+
+The full body is collected into a `Bytes` buffer using `http-body-util`'s
+`BodyExt::collect()`.
+
+---
+
+## HMAC-SHA256 Signature Verification
+
+GitHub sends a `X-Hub-Signature-256` header with the format:
+
+```
+sha256=<hex-encoded HMAC-SHA256>
+```
+
+The webhook receiver verifies this signature against the configured webhook
+secret:
+
+### Step 1: Parse the signature header
+
+```rust
+let Some(sig) = sig_header else {
+ return Ok(response(StatusCode::BAD_REQUEST, "Missing signature header"));
+};
+
+let mut components = sig.splitn(2, '=');
+let Some(algo) = components.next() else {
+ return Ok(response(StatusCode::BAD_REQUEST, "Signature hash method missing"));
+};
+let Some(hash) = components.next() else {
+ return Ok(response(StatusCode::BAD_REQUEST, "Signature hash missing"));
+};
+let Ok(hash) = hex::decode(hash) else {
+ return Ok(response(StatusCode::BAD_REQUEST, "Invalid signature hash hex"));
+};
+```
+
+### Step 2: Validate the algorithm
+
+```rust
+if algo != "sha256" {
+ return Ok(response(StatusCode::BAD_REQUEST, "Invalid signature hash method"));
+}
+```
+
+Only SHA-256 is accepted. GitHub also supports SHA-1 (`X-Hub-Signature`) but
+tickborg does not accept it.
+
+### Step 3: Compute and compare
+
+```rust
+let Ok(mut mac) = Hmac::<Sha256>::new_from_slice(webhook_secret.as_bytes()) else {
+ error!("Unable to create HMAC from secret");
+ return Ok(response(StatusCode::INTERNAL_SERVER_ERROR, "Internal error"));
+};
+
+mac.update(&raw);
+
+if mac.verify_slice(&hash).is_err() {
+ return Ok(response(StatusCode::FORBIDDEN, "Signature verification failed"));
+}
+```
+
+The HMAC is computed using `hmac::Hmac<sha2::Sha256>` from the `hmac` and `sha2`
+crates. `verify_slice` performs a constant-time comparison to prevent timing
+attacks.
+
+---
+
+## Event Type Routing
+
+After signature verification, the event type and repository are determined:
+
+```rust
+let event_type = event_type.unwrap_or_else(|| "unknown".to_owned());
+
+let body_json: GenericWebhook = match serde_json::from_slice(&raw) {
+ Ok(webhook) => webhook,
+ Err(_) => {
+ // If we can't parse the body, route to the unknown queue
+ // ...
+ }
+};
+
+let routing_key = format!("{}.{}", event_type, body_json.repository.full_name);
+```
+
+The `GenericWebhook` struct is minimal — it only extracts the `repository`
+field:
+
+```rust
+// ghevent/common.rs
+#[derive(Serialize, Deserialize, Debug)]
+pub struct GenericWebhook {
+ pub repository: Repository,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Repository {
+ pub owner: User,
+ pub name: String,
+ pub full_name: String,
+ pub clone_url: String,
+}
+```
+
+### Routing Key Format
+
+```
+{event_type}.{owner}/{repo}
+```
+
+Examples:
+- `pull_request.project-tick/Project-Tick`
+- `issue_comment.project-tick/Project-Tick`
+- `push.project-tick/Project-Tick`
+- `unknown.project-tick/Project-Tick`
+
+---
+
+## AMQP Setup
+
+The `setup_amqp` function declares the exchange and all downstream queues:
+
+### Exchange Declaration
+
+```rust
+chan.declare_exchange(easyamqp::ExchangeConfig {
+ exchange: "github-events".to_owned(),
+ exchange_type: easyamqp::ExchangeType::Topic,
+ passive: false,
+ durable: true,
+ auto_delete: false,
+ no_wait: false,
+ internal: false,
+}).await?;
+```
+
+The `github-events` exchange is a **topic** exchange. This means routing keys
+are matched against binding patterns using `.`-separated segments and `*`/`#`
+wildcards.
+
+### Queue Declarations and Bindings
+
+| Queue | Binding Pattern | Consumer |
+|-------|----------------|----------|
+| `build-inputs` | `issue_comment.*` | github-comment-filter |
+| `github-events-unknown` | `unknown.*` | (monitoring/debugging) |
+| `mass-rebuild-check-inputs` | `pull_request.*` | evaluation-filter |
+| `push-build-inputs` | `push.*` | push-filter |
+
+Each queue is declared with:
+
+```rust
+chan.declare_queue(easyamqp::QueueConfig {
+ queue: queue_name.clone(),
+ passive: false,
+ durable: true, // survive broker restart
+ exclusive: false, // accessible by other connections
+ auto_delete: false, // don't delete when last consumer disconnects
+ no_wait: false,
+}).await?;
+```
+
+And bound to the exchange:
+
+```rust
+chan.bind_queue(easyamqp::BindQueueConfig {
+ queue: queue_name.clone(),
+ exchange: "github-events".to_owned(),
+ routing_key: Some(String::from("issue_comment.*")),
+ no_wait: false,
+}).await?;
+```
+
+---
+
+## Message Publishing
+
+After validation and routing key construction, the raw GitHub payload is
+published:
+
+```rust
+let props = BasicProperties::default()
+ .with_content_type("application/json".into())
+ .with_delivery_mode(2); // persistent
+
+chan.lock().await.basic_publish(
+ "github-events".into(),
+ routing_key.into(),
+ BasicPublishOptions::default(),
+ &raw,
+ props,
+).await?;
+```
+
+Key properties:
+- **delivery_mode = 2**: Message is persisted to disk by RabbitMQ.
+- **content_type**: `application/json` — the raw GitHub payload.
+- The **entire raw body** is published, not a parsed/re-serialized version.
+ This preserves all fields that downstream consumers might need, even if the
+ webhook receiver itself doesn't parse them.
+
+---
+
+## Configuration
+
+The webhook receiver reads from the `github_webhook_receiver` section of the
+config:
+
+```rust
+#[derive(Serialize, Deserialize, Debug)]
+pub struct GithubWebhookConfig {
+ pub listen: String,
+ pub webhook_secret_file: String,
+ pub rabbitmq: RabbitMqConfig,
+}
+```
+
+Example configuration:
+
+```json
+{
+ "github_webhook_receiver": {
+ "listen": "0.0.0.0:9899",
+ "webhook_secret_file": "/run/secrets/tickborg/webhook-secret",
+ "rabbitmq": {
+ "ssl": false,
+ "host": "rabbitmq:5672",
+ "virtualhost": "tickborg",
+ "username": "tickborg",
+ "password_file": "/run/secrets/tickborg/rabbitmq-password"
+ }
+ }
+}
+```
+
+The webhook secret is read from a file (not inline in the config) to prevent
+accidental exposure in version control.
+
+---
+
+## Response Codes
+
+| Code | Meaning |
+|------|---------|
+| `200 OK` | Webhook received and published successfully |
+| `400 Bad Request` | Missing or malformed signature header |
+| `403 Forbidden` | Signature verification failed |
+| `405 Method Not Allowed` | Non-POST request |
+| `500 Internal Server Error` | Body read failure or HMAC creation failure |
+
+---
+
+## GitHub Webhook Configuration
+
+### Required Events
+
+The GitHub App or webhook should be configured to send:
+
+| Event | Used By |
+|-------|---------|
+| `pull_request` | evaluation-filter (auto-eval on PR open/sync) |
+| `issue_comment` | github-comment-filter (@tickbot commands) |
+| `push` | push-filter (branch push CI) |
+| `check_run` | (optional, for re-run triggers) |
+
+### Required Permissions (GitHub App)
+
+| Permission | Level | Purpose |
+|------------|-------|---------|
+| Pull requests | Read & Write | Read PR details, post comments |
+| Commit statuses | Read & Write | Set commit status checks |
+| Issues | Read & Write | Read comments, manage labels |
+| Contents | Read | Clone repository, read files |
+| Checks | Read & Write | Create/update check runs |
+
+### Webhook URL
+
+```
+https://<your-domain>:9899/github-webhooks
+```
+
+The receiver accepts POSTs on any path — the path segment is not validated.
+However, conventionally `/github-webhooks` is used.
+
+---
+
+## Security Considerations
+
+### Signature Verification
+
+**Every** request must have a valid `X-Hub-Signature-256` header. Requests
+without this header, or with an invalid signature, are rejected before any
+processing occurs. The HMAC comparison uses `verify_slice` which is
+constant-time.
+
+### Secret File
+
+The webhook secret is read from a file rather than an environment variable or
+inline config value. This:
+- Prevents accidental exposure in process listings (`/proc/*/environ`)
+- Allows secrets management via Docker secrets, Kubernetes secrets, or
+ NixOS `sops-nix`
+
+### No Path Traversal
+
+The webhook receiver does not serve files or interact with the filesystem beyond
+reading the config and secret files. There is no path traversal risk.
+
+### Rate Limiting
+
+The webhook receiver does **not** implement application-level rate limiting.
+This should be handled by:
+- An upstream reverse proxy (nginx, Caddy)
+- GitHub's own delivery rate limiting
+- RabbitMQ's flow control mechanisms
+
+---
+
+## Deployment
+
+### Docker Compose
+
+```yaml
+webhook-receiver:
+ build:
+ context: .
+ dockerfile: Dockerfile
+ command: ["github-webhook-receiver", "/etc/tickborg/config.json"]
+ ports:
+ - "9899:9899"
+ volumes:
+ - ./config.json:/etc/tickborg/config.json:ro
+ - ./secrets:/run/secrets/tickborg:ro
+ depends_on:
+ rabbitmq:
+ condition: service_healthy
+ restart: unless-stopped
+```
+
+### NixOS (`service.nix`)
+
+```nix
+systemd.services."tickborg-webhook-receiver" = mkTickborgService "Webhook Receiver" {
+ binary = "github_webhook_receiver";
+};
+```
+
+Note: The binary name uses underscores (`github_webhook_receiver`) while the
+Cargo target uses hyphens (`github-webhook-receiver`). Cargo generates both
+forms but the NixOS service uses the underscore variant.
+
+---
+
+## Monitoring
+
+The webhook receiver logs:
+- Every accepted webhook (event type, routing key)
+- Signature verification failures (at `warn` level)
+- AMQP publish errors (at `error` level)
+- Body read failures (at `warn` level)
+
+Check the `github-events-unknown` queue for events that couldn't be routed to
+a handler — these indicate new event types that may need new consumers.
+
+---
+
+## Event Type Reference
+
+| GitHub Event | Routing Key Pattern | Queue | Handler |
+|-------------|--------------------|---------|---------|
+| `pull_request` | `pull_request.{owner}/{repo}` | `mass-rebuild-check-inputs` | evaluation-filter |
+| `issue_comment` | `issue_comment.{owner}/{repo}` | `build-inputs` | github-comment-filter |
+| `push` | `push.{owner}/{repo}` | `push-build-inputs` | push-filter |
+| (any other) | `unknown.{owner}/{repo}` | `github-events-unknown` | none (monitoring) |
diff --git a/docs/handbook/tomlplusplus/architecture.md b/docs/handbook/tomlplusplus/architecture.md
new file mode 100644
index 0000000000..8ba979d200
--- /dev/null
+++ b/docs/handbook/tomlplusplus/architecture.md
@@ -0,0 +1,920 @@
+# toml++ — Architecture
+
+## Overview
+
+toml++ implements a tree-based data model for TOML documents. A parsed TOML document becomes a tree of `toml::node` objects, with `toml::table` as the root. The architecture centers on:
+
+1. A polymorphic node hierarchy (`node` → `table`, `array`, `value<T>`)
+2. A recursive-descent parser that builds trees
+3. Formatter classes that serialize trees back to text
+4. A path system for structured navigation
+
+All public types live in the `toml` namespace. Internal implementation details live in `toml::impl` (an ABI-namespaced detail namespace).
+
+---
+
+## Class Hierarchy
+
+```
+toml::node (abstract base)
+├── toml::table — ordered map of key → node*
+├── toml::array — vector of node*
+└── toml::value<T> — leaf node holding a value
+ ├── value<std::string>
+ ├── value<int64_t>
+ ├── value<double>
+ ├── value<bool>
+ ├── value<toml::date>
+ ├── value<toml::time>
+ └── value<toml::date_time>
+```
+
+Supporting types:
+```
+toml::node_view<T> — non-owning optional reference to a node
+toml::key — string + source_region metadata
+toml::path — vector of path_component
+toml::path_component — key string or array index
+toml::source_position — line + column
+toml::source_region — begin + end positions + path
+toml::parse_error — error description + source_region
+toml::parse_result — table | parse_error (no-exceptions mode)
+```
+
+Formatter hierarchy:
+```
+impl::formatter (base, protected)
+├── toml::toml_formatter — TOML output
+├── toml::json_formatter — JSON output
+└── toml::yaml_formatter — YAML output
+```
+
+---
+
+## `toml::node` — The Abstract Base Class
+
+Defined in `include/toml++/impl/node.hpp`, `toml::node` is the polymorphic base of all TOML tree nodes. It is declared as `TOML_ABSTRACT_INTERFACE`, meaning it has pure virtual methods and cannot be instantiated directly.
+
+### Private Members
+
+```cpp
+class node
+{
+ private:
+ source_region source_{};
+
+ template <typename T>
+ decltype(auto) get_value_exact() const noexcept(...);
+
+ // ref_type_ and ref_type — template aliases for ref() return types
+ // do_ref() — static helper for ref() implementation
+```
+
+The `source_` member records where this node was defined in the original TOML document (line, column, file path).
+
+### Protected Members
+
+```cpp
+ protected:
+ node() noexcept;
+ node(const node&) noexcept;
+ node(node&&) noexcept;
+ node& operator=(const node&) noexcept;
+ node& operator=(node&&) noexcept;
+
+ // ref_cast<T>() — unsafe downcast helpers (all four ref-qualifications)
+ template <typename T> ref_cast_type<T, node&> ref_cast() & noexcept;
+ template <typename T> ref_cast_type<T, node&&> ref_cast() && noexcept;
+ template <typename T> ref_cast_type<T, const node&> ref_cast() const& noexcept;
+ template <typename T> ref_cast_type<T, const node&&> ref_cast() const&& noexcept;
+```
+
+Constructors and assignment operators are `protected` to prevent direct instantiation. `ref_cast<T>()` performs `reinterpret_cast`-based downcasts, used internally by `ref<T>()`.
+
+### Public Interface — Type Checks
+
+Every `node` provides a complete set of virtual type-checking methods:
+
+```cpp
+ public:
+ virtual ~node() noexcept;
+
+ // Homogeneity checks
+ virtual bool is_homogeneous(node_type ntype, node*& first_nonmatch) noexcept = 0;
+ virtual bool is_homogeneous(node_type ntype, const node*& first_nonmatch) const noexcept = 0;
+ virtual bool is_homogeneous(node_type ntype) const noexcept = 0;
+ template <typename ElemType = void>
+ bool is_homogeneous() const noexcept;
+
+ // Type identity
+ virtual node_type type() const noexcept = 0;
+ virtual bool is_table() const noexcept = 0;
+ virtual bool is_array() const noexcept = 0;
+ virtual bool is_array_of_tables() const noexcept;
+ virtual bool is_value() const noexcept = 0;
+ virtual bool is_string() const noexcept = 0;
+ virtual bool is_integer() const noexcept = 0;
+ virtual bool is_floating_point() const noexcept = 0;
+ virtual bool is_number() const noexcept = 0;
+ virtual bool is_boolean() const noexcept = 0;
+ virtual bool is_date() const noexcept = 0;
+ virtual bool is_time() const noexcept = 0;
+ virtual bool is_date_time() const noexcept = 0;
+
+ // Template type check
+ template <typename T>
+ bool is() const noexcept;
+```
+
+The `is<T>()` template dispatches to the appropriate virtual method using `if constexpr`:
+
+```cpp
+template <typename T>
+bool is() const noexcept
+{
+ using type = impl::remove_cvref<impl::unwrap_node<T>>;
+ if constexpr (std::is_same_v<type, table>)
+ return is_table();
+ else if constexpr (std::is_same_v<type, array>)
+ return is_array();
+ else if constexpr (std::is_same_v<type, std::string>)
+ return is_string();
+ // ... etc for int64_t, double, bool, date, time, date_time
+}
+```
+
+### Public Interface — Type Casts
+
+```cpp
+ // Downcasts — return nullptr if type doesn't match
+ virtual table* as_table() noexcept = 0;
+ virtual array* as_array() noexcept = 0;
+ virtual toml::value<std::string>* as_string() noexcept = 0;
+ virtual toml::value<int64_t>* as_integer() noexcept = 0;
+ virtual toml::value<double>* as_floating_point() noexcept = 0;
+ virtual toml::value<bool>* as_boolean() noexcept = 0;
+ virtual toml::value<date>* as_date() noexcept = 0;
+ virtual toml::value<time>* as_time() noexcept = 0;
+ virtual toml::value<date_time>* as_date_time() noexcept = 0;
+ // + const overloads for all of the above
+
+ // Template downcast
+ template <typename T>
+ impl::wrap_node<T>* as() noexcept;
+ template <typename T>
+ const impl::wrap_node<T>* as() const noexcept;
+```
+
+`as<T>()` is the unified template that dispatches to `as_table()`, `as_string()`, etc.
+
+### Public Interface — Value Retrieval
+
+```cpp
+ // Exact-match value retrieval
+ template <typename T>
+ optional<T> value_exact() const noexcept(...);
+
+ // Permissive value retrieval (allows conversions)
+ template <typename T>
+ optional<T> value() const noexcept(...);
+
+ // Value with default
+ template <typename T>
+ auto value_or(T&& default_value) const noexcept(...);
+```
+
+`value_exact<T>()` only succeeds if the node contains exactly type `T`. `value<T>()` is more lenient, allowing integer-to-float conversions and the like. `value_or()` returns the value if present, otherwise the given default.
+
+### Public Interface — Reference Access
+
+```cpp
+ template <typename T>
+ decltype(auto) ref() & noexcept;
+ template <typename T>
+ decltype(auto) ref() && noexcept;
+ template <typename T>
+ decltype(auto) ref() const& noexcept;
+ template <typename T>
+ decltype(auto) ref() const&& noexcept;
+```
+
+`ref<T>()` provides direct reference access to the underlying value. It asserts the type matches and is UB if it doesn't.
+
+### Public Interface — Visitation
+
+```cpp
+ template <typename Func>
+ decltype(auto) visit(Func&& visitor) & noexcept(...);
+ // + &&, const&, const&& overloads
+```
+
+Calls the visitor with the concrete node type. The visitor receives the actual `table&`, `array&`, or `value<T>&`.
+
+### Source Region
+
+```cpp
+ const source_region& source() const noexcept;
+```
+
+Returns where this node was defined in the source document.
+
+---
+
+## `toml::table` — TOML Tables
+
+Declared in `include/toml++/impl/table.hpp`, `toml::table` extends `node` and models an ordered map of keys to nodes.
+
+### Internal Storage
+
+```cpp
+class table : public node
+{
+ private:
+ using map_type = std::map<toml::key, impl::node_ptr, std::less<>>;
+ map_type map_;
+ bool inline_ = false;
+```
+
+- The backing container is `std::map<toml::key, std::unique_ptr<node>, std::less<>>`.
+- `std::less<>` enables heterogeneous lookup (search by `std::string_view` without constructing a `key`).
+- `inline_` tracks whether the table should be serialized as an inline table `{ ... }`.
+- Insertion order is maintained because `toml::key` provides comparison operators that match `std::map`'s ordered semantics.
+
+### Iterators
+
+The table uses custom `impl::table_iterator<IsConst>` which wraps the map iterator and produces `table_proxy_pair<IsConst>` references:
+
+```cpp
+template <bool IsConst>
+struct table_proxy_pair
+{
+ using value_type = std::conditional_t<IsConst, const node, node>;
+ const toml::key& first;
+ value_type& second;
+};
+```
+
+This means iterating a table yields `(const key&, node&)` pairs, not `(const key&, unique_ptr<node>&)`. The `unique_ptr` layer is hidden.
+
+Public type aliases:
+```cpp
+using table_iterator = impl::table_iterator<false>;
+using const_table_iterator = impl::table_iterator<true>;
+```
+
+### Construction
+
+```cpp
+table() noexcept; // default
+table(const table&); // deep copy
+table(table&& other) noexcept; // move
+explicit table(std::initializer_list<impl::table_init_pair> kvps);
+```
+
+The initializer-list constructor accepts `table_init_pair` objects, each containing a key and a value:
+
+```cpp
+struct table_init_pair
+{
+ mutable toml::key key;
+ mutable node_ptr value; // std::unique_ptr<node>
+
+ template <typename K, typename V>
+ table_init_pair(K&& k, V&& v, value_flags flags = preserve_source_value_flags);
+};
+```
+
+This enables the idiomatic construction:
+```cpp
+auto tbl = toml::table{
+ { "name", "toml++" },
+ { "version", 3 },
+ { "nested", toml::table{ { "key", true } } }
+};
+```
+
+### Key Operations
+
+| Method | Description |
+|--------|-------------|
+| `size()` | Number of key-value pairs |
+| `empty()` | Whether the table is empty |
+| `get(key)` | Get node pointer by key, or nullptr |
+| `get_as<T>(key)` | Get typed node pointer, or nullptr |
+| `contains(key)` | Check if key exists |
+| `operator[](key)` | Returns `node_view` (safe, never null-deref) |
+| `at(key)` | Returns node reference, throws if missing |
+| `insert(key, val)` | Insert if not present |
+| `insert_or_assign(key, val)` | Insert or replace |
+| `emplace<T>(key, args...)` | Construct in place if not present |
+| `erase(key)` | Remove by key |
+| `erase(iterator)` | Remove by iterator |
+| `clear()` | Remove all entries |
+
+### Metadata
+
+```cpp
+bool is_inline() const noexcept;
+void is_inline(bool val) noexcept;
+```
+
+Controls inline table formatting: `{ a = 1, b = 2 }` vs. multi-line.
+
+---
+
+## `toml::array` — TOML Arrays
+
+Declared in `include/toml++/impl/array.hpp`, `toml::array` extends `node` and models a heterogeneous sequence.
+
+### Internal Storage
+
+```cpp
+class array : public node
+{
+ private:
+ std::vector<impl::node_ptr> elems_;
+```
+
+Each element is a `std::unique_ptr<node>`. The array can contain any mix of value types, tables, and nested arrays.
+
+### Iterators
+
+`impl::array_iterator<IsConst>` wraps the vector iterator and dereferences the `unique_ptr`, yielding `node&` references:
+
+```cpp
+using array_iterator = impl::array_iterator<false>;
+using const_array_iterator = impl::array_iterator<true>;
+```
+
+It satisfies `RandomAccessIterator` requirements (unlike `table_iterator` which is `BidirectionalIterator`).
+
+### Key Operations
+
+| Method | Description |
+|--------|-------------|
+| `size()` | Number of elements |
+| `empty()` | Whether the array is empty |
+| `capacity()` | Reserved capacity |
+| `reserve(n)` | Reserve capacity |
+| `shrink_to_fit()` | Release excess capacity |
+| `operator[](index)` | Returns `node&` (no bounds check) |
+| `at(index)` | Returns `node&` (bounds-checked, throws) |
+| `front()` / `back()` | First / last element |
+| `get(index)` | Returns `node*` or nullptr |
+| `get_as<T>(index)` | Returns typed pointer or nullptr |
+| `push_back(val)` | Append element |
+| `emplace_back<T>(args...)` | Construct at end |
+| `insert(pos, val)` | Insert at position |
+| `emplace(pos, args...)` | Construct at position |
+| `erase(pos)` | Remove at position |
+| `erase(first, last)` | Remove range |
+| `pop_back()` | Remove last |
+| `clear()` | Remove all |
+| `resize(n)` | Resize (default-constructed elements) |
+| `truncate(n)` | Remove elements beyond index n |
+| `flatten()` | Flatten nested arrays |
+| `prune()` | Remove empty tables and arrays recursively |
+
+### Homogeneity
+
+```cpp
+bool is_homogeneous(node_type ntype) const noexcept;
+bool is_homogeneous(node_type ntype, node*& first_nonmatch) noexcept;
+template <typename ElemType = void>
+bool is_homogeneous() const noexcept;
+```
+
+Returns `true` if all elements are the same type. Returns `false` for empty arrays.
+
+### for_each
+
+```cpp
+template <typename Func>
+array& for_each(Func&& visitor) & noexcept(...);
+```
+
+Iterates elements, calling the visitor with each concrete element type. Supports early exit by returning `bool` from the visitor (on compilers without the GCC 7 bug).
+
+---
+
+## `toml::value<T>` — Leaf Values
+
+Declared in `include/toml++/impl/value.hpp`, `toml::value<T>` is a class template holding a single TOML value.
+
+### Template Parameter Constraints
+
+`T` must be one of the native TOML value types:
+- `std::string`
+- `int64_t`
+- `double`
+- `bool`
+- `toml::date`
+- `toml::time`
+- `toml::date_time`
+
+```cpp
+template <typename ValueType>
+class value : public node
+{
+ static_assert(impl::is_native<ValueType> && !impl::is_cvref<ValueType>);
+
+ private:
+ ValueType val_;
+ value_flags flags_ = value_flags::none;
+```
+
+### Type Aliases
+
+```cpp
+using value_type = ValueType;
+using value_arg = /* conditional type */;
+```
+
+`value_arg` differs by value type:
+- `int64_t`, `double`, `bool` → passed by value
+- `std::string` → passed as `std::string_view`
+- `date`, `time`, `date_time` → passed as `const value_type&`
+
+### Key Operations
+
+```cpp
+// Access the underlying value
+ValueType& get() & noexcept;
+ValueType&& get() && noexcept;
+const ValueType& get() const& noexcept;
+const ValueType&& get() const&& noexcept;
+
+// Implicit conversion operator
+operator ValueType&() noexcept;
+operator const ValueType&() const noexcept;
+
+// Value flags (integer format: binary, octal, hex)
+value_flags flags() const noexcept;
+value<ValueType>& flags(value_flags new_flags) noexcept;
+```
+
+### Construction
+
+`value<T>` supports variadic construction via `impl::native_value_maker`:
+
+```cpp
+template <typename... Args>
+explicit value(Args&&... args);
+
+value(const value& other) noexcept;
+value(const value& other, value_flags flags) noexcept;
+value(value&& other) noexcept;
+value(value&& other, value_flags flags) noexcept;
+```
+
+Special handling exists for:
+- `char8_t` strings (C++20): converted via `reinterpret_cast`
+- Wide strings (Windows): narrowed via `impl::narrow()`
+
+---
+
+## Date/Time Types
+
+Defined in `include/toml++/impl/date_time.hpp`:
+
+### `toml::date`
+
+```cpp
+struct date
+{
+ uint16_t year;
+ uint8_t month; // 1-12
+ uint8_t day; // 1-31
+
+ constexpr date(Y y, M m, D d) noexcept;
+ // Comparison operators: ==, !=, <, <=, >, >=
+ // Stream output: YYYY-MM-DD
+};
+```
+
+### `toml::time`
+
+```cpp
+struct time
+{
+ uint8_t hour; // 0-23
+ uint8_t minute; // 0-59
+ uint8_t second; // 0-59
+ uint32_t nanosecond; // 0-999999999
+
+ constexpr time(H h, M m, S s = 0, NS ns = 0) noexcept;
+ // Comparison operators, stream output: HH:MM:SS.nnnnnnnnn
+};
+```
+
+### `toml::time_offset`
+
+```cpp
+struct time_offset
+{
+ int16_t minutes; // -1440 to +1440
+
+ constexpr time_offset(H h, M m) noexcept;
+ // Comparison operators, stream output: +HH:MM or -HH:MM
+};
+```
+
+### `toml::date_time`
+
+```cpp
+struct date_time
+{
+ toml::date date;
+ toml::time time;
+ optional<toml::time_offset> offset;
+
+ // If offset is present, it's an offset date-time
+ // If offset is absent, it's a local date-time
+ bool is_local() const noexcept;
+};
+```
+
+---
+
+## `toml::key` — Table Keys
+
+Defined in `include/toml++/impl/key.hpp`:
+
+```cpp
+class key
+{
+ private:
+ std::string key_;
+ source_region source_;
+
+ public:
+ explicit key(std::string_view k, source_region&& src = {});
+ explicit key(std::string&& k, source_region&& src = {}) noexcept;
+ explicit key(const char* k, source_region&& src = {});
+
+ // String access
+ std::string_view str() const noexcept;
+ operator std::string_view() const noexcept;
+ bool empty() const noexcept;
+
+ // Source tracking
+ const source_region& source() const noexcept;
+
+ // Comparison — compares the string content, not source position
+ friend bool operator==(const key& lhs, const key& rhs) noexcept;
+ friend bool operator<(const key& lhs, const key& rhs) noexcept;
+ // + heterogeneous comparisons with std::string_view
+};
+```
+
+Keys carry source position metadata from parsing, but comparisons only consider the string content.
+
+---
+
+## Parser Design
+
+The parser is a recursive-descent UTF-8 parser implemented in `impl::parser` (defined in `parser.inl`). It operates on a stream of UTF-8 codepoints.
+
+### Parse Entry Points
+
+```cpp
+// Defined in parser.hpp
+parse_result parse(std::string_view doc, std::string_view source_path = {});
+parse_result parse(std::string_view doc, std::string&& source_path);
+parse_result parse_file(std::string_view file_path);
+
+// Stream overloads
+parse_result parse(std::istream& doc, std::string_view source_path = {});
+```
+
+### Return Type: `parse_result`
+
+When exceptions are enabled (`TOML_EXCEPTIONS=1`):
+```cpp
+using parse_result = table; // parse() throws on error
+```
+
+When exceptions are disabled (`TOML_EXCEPTIONS=0`):
+```cpp
+class parse_result // discriminated union: table or parse_error
+{
+ bool err_;
+ union { toml::table; parse_error; } storage_;
+
+ public:
+ explicit operator bool() const noexcept; // true = success
+ table& table() &;
+ parse_error& error() &;
+ // + iterator accessors for safe ranged-for on failure
+};
+```
+
+### Error Type
+
+```cpp
+class parse_error /* : public std::runtime_error (with exceptions) */
+{
+ std::string_view description() const noexcept;
+ const source_region& source() const noexcept;
+};
+```
+
+### Parser Internals
+
+The `impl::parser` class stores:
+- A UTF-8 byte stream reader
+- A source position tracker
+- The root table being built
+- The current implicit table stack (for dotted keys and `[section.headers]`)
+
+Parsing proceeds top-down:
+1. Skip BOM if present
+2. Parse key-value pairs, table headers (`[table]`), and array-of-tables headers (`[[array]]`)
+3. For each key-value pair, parse the key (bare or quoted), then the value
+4. Values are parsed based on the leading character(s): strings, numbers, booleans, dates/times, arrays, inline tables
+
+Node allocation uses `impl::make_node()` which dispatches through `impl::make_node_impl_specialized()`:
+```cpp
+template <typename T>
+auto* make_node_impl_specialized(T&& val, value_flags flags)
+{
+ using unwrapped_type = unwrap_node<remove_cvref<T>>;
+ if constexpr (is_one_of<unwrapped_type, array, table>)
+ return new unwrapped_type(static_cast<T&&>(val));
+ else
+ {
+ using native_type = native_type_of<unwrapped_type>;
+ using value_type = value<native_type>;
+ return new value_type{ static_cast<T&&>(val) };
+ }
+}
+```
+
+---
+
+## Formatter Design
+
+### Base Class: `impl::formatter`
+
+Defined in `include/toml++/impl/formatter.hpp`:
+
+```cpp
+class formatter
+{
+ private:
+ const node* source_;
+ const parse_result* result_; // for no-exceptions mode
+ const formatter_constants* constants_;
+ formatter_config config_;
+ size_t indent_columns_;
+ format_flags int_format_mask_;
+ std::ostream* stream_;
+ int indent_;
+ bool naked_newline_;
+
+ protected:
+ // Stream management
+ void attach(std::ostream& stream) noexcept;
+ void detach() noexcept;
+
+ // Output primitives
+ void print_newline(bool force = false);
+ void print_indent();
+ void print_unformatted(char);
+ void print_unformatted(std::string_view);
+
+ // Typed printing
+ void print_string(std::string_view str, bool allow_multi_line, bool allow_bare, bool allow_literal_whitespace);
+ void print(const value<std::string>&);
+ void print(const value<int64_t>&);
+ void print(const value<double>&);
+ void print(const value<bool>&);
+ void print(const value<date>&);
+ void print(const value<time>&);
+ void print(const value<date_time>&);
+ void print_value(const node&, node_type);
+
+ // Configuration queries
+ bool indent_array_elements() const noexcept;
+ bool indent_sub_tables() const noexcept;
+ bool literal_strings_allowed() const noexcept;
+ bool multi_line_strings_allowed() const noexcept;
+ bool unicode_strings_allowed() const noexcept;
+ bool terse_kvps() const noexcept;
+ bool force_multiline_arrays() const noexcept;
+};
+```
+
+### Formatter Constants
+
+Each concrete formatter defines its behavior via `formatter_constants`:
+
+```cpp
+struct formatter_constants
+{
+ format_flags mandatory_flags; // always-on flags
+ format_flags ignored_flags; // always-off flags
+ std::string_view float_pos_inf; // e.g., "inf", "Infinity", ".inf"
+ std::string_view float_neg_inf;
+ std::string_view float_nan;
+ std::string_view bool_true;
+ std::string_view bool_false;
+};
+```
+
+| Formatter | pos_inf | neg_inf | nan | bool_true | bool_false |
+|-----------|---------|---------|-----|-----------|------------|
+| `toml_formatter` | `"inf"` | `"-inf"` | `"nan"` | `"true"` | `"false"` |
+| `json_formatter` | `"Infinity"` | `"-Infinity"` | `"NaN"` | `"true"` | `"false"` |
+| `yaml_formatter` | `".inf"` | `"-.inf"` | `".NAN"` | `"true"` | `"false"` |
+
+### `toml::toml_formatter`
+
+Inherits `impl::formatter`. Serializes to valid TOML format.
+
+Key behaviors:
+- Tracks a `key_path_` vector for producing `[table.paths]`
+- Manages `pending_table_separator_` for blank lines between sections
+- Uses 4-space indentation by default (`" "sv`)
+- Respects `is_inline()` on tables
+- Default flags include all `allow_*` flags and `indentation`
+
+### `toml::json_formatter`
+
+Outputs valid JSON. Key differences from TOML formatter:
+- Mandatory `quote_dates_and_times` (dates become quoted strings)
+- Ignores `allow_literal_strings` and `allow_multi_line_strings`
+- Default includes `quote_infinities_and_nans`
+- Uses 4-space indentation
+
+### `toml::yaml_formatter`
+
+Outputs YAML. Key differences:
+- Uses 2-space indentation (`" "sv`)
+- Mandatory `quote_dates_and_times` and `indentation`
+- Ignores `allow_multi_line_strings`
+- Custom string printing via `print_yaml_string()`
+- YAML-style inf/nan representation (`.inf`, `-.inf`, `.NAN`)
+
+### Streaming Pattern
+
+All formatters use the same attach/print/detach pattern:
+
+```cpp
+friend std::ostream& operator<<(std::ostream& lhs, toml_formatter& rhs)
+{
+ rhs.attach(lhs);
+ rhs.key_path_.clear(); // (toml_formatter specific)
+ rhs.print();
+ rhs.detach();
+ return lhs;
+}
+```
+
+---
+
+## `toml::node_view<T>` — Safe Node References
+
+Defined in `include/toml++/impl/node_view.hpp`:
+
+```cpp
+template <typename ViewedType>
+class node_view
+{
+ static_assert(impl::is_one_of<ViewedType, toml::node, const toml::node>);
+
+ private:
+ mutable viewed_type* node_ = nullptr;
+
+ public:
+ using viewed_type = ViewedType;
+
+ node_view() noexcept = default;
+ explicit node_view(viewed_type* node) noexcept;
+ explicit node_view(viewed_type& node) noexcept;
+
+ explicit operator bool() const noexcept;
+ viewed_type* node() const noexcept;
+```
+
+`node_view` wraps a pointer to a `node` (or `const node`) and provides the same interface as `node` — type checks, casts, value retrieval — but safely handles null (returns empty optionals, false booleans, empty views on subscript).
+
+The key design feature is chainable subscript operators:
+
+```cpp
+node_view operator[](std::string_view key) const noexcept;
+node_view operator[](size_t index) const noexcept;
+node_view operator[](const path& p) const noexcept;
+```
+
+These return empty views when the key/index doesn't exist, so you can chain deeply without null checks:
+
+```cpp
+auto val = tbl["section"]["subsection"]["key"].value_or(42);
+// Safe even if any intermediate node is missing
+```
+
+---
+
+## Source Tracking
+
+### `toml::source_position`
+
+```cpp
+struct source_position
+{
+ source_index line; // 1-based
+ source_index column; // 1-based
+
+ explicit constexpr operator bool() const noexcept;
+ // Comparison operators
+};
+```
+
+### `toml::source_region`
+
+```cpp
+struct source_region
+{
+ source_position begin;
+ source_position end;
+ source_path_ptr path; // std::shared_ptr<const std::string>
+};
+```
+
+Every node carries a `source_region` accessible via `node::source()`. For programmatically-constructed nodes, the source region is default (zeroed). For parsed nodes, it tracks the exact file location.
+
+---
+
+## Ownership Model
+
+The ownership model is straightforward:
+- `toml::table` owns its child nodes via `std::map<key, std::unique_ptr<node>>`
+- `toml::array` owns its child nodes via `std::vector<std::unique_ptr<node>>`
+- `toml::value<T>` owns its stored value directly (by value)
+- `toml::node_view<T>` is non-owning (raw pointer)
+- `toml::parse_result` owns either a `table` or a `parse_error` (union storage)
+
+Copying a `table` or `array` performs a deep copy of the entire subtree. Moving transfers ownership with no allocation.
+
+---
+
+## Thread Safety
+
+toml++ does not provide internal synchronization. A parsed `table` tree can be read concurrently from multiple threads, but modification requires external synchronization. The parser itself is not re-entrant — each call to `parse()` creates a new internal parser instance.
+
+---
+
+## Memory Allocation
+
+All heap allocation uses the global `operator new` (no custom allocators). Nodes are individually heap-allocated even in arrays. The `make_node.hpp` factory always calls `new`:
+
+```cpp
+return new unwrapped_type(static_cast<T&&>(val));
+// or
+return new value_type{ static_cast<T&&>(val) };
+```
+
+This makes the library simple but means that large TOML documents with many small values will create many small allocations.
+
+---
+
+## ABI Namespaces
+
+toml++ uses conditional inline namespaces to prevent ODR violations when mixing translation units compiled with different configuration macros:
+
+```cpp
+TOML_ABI_NAMESPACE_BOOL(TOML_EXCEPTIONS, ex, noex);
+// Creates either:
+// namespace ex { ... } // when TOML_EXCEPTIONS == 1
+// namespace noex { ... } // when TOML_EXCEPTIONS == 0
+
+TOML_ABI_NAMESPACE_BOOL(TOML_HAS_CUSTOM_OPTIONAL_TYPE, custopt, stdopt);
+```
+
+This means `toml::ex::parse_result` (exceptions enabled) and `toml::noex::parse_result` (exceptions disabled) are different types, preventing linker errors if you accidentally mix them.
+
+---
+
+## Conditional Compilation
+
+Large portions of the library can be compiled out:
+
+| Macro | What it removes |
+|-------|----------------|
+| `TOML_ENABLE_PARSER=0` | All parsing functions, `parse_error`, `parse_result`, parser implementation |
+| `TOML_ENABLE_FORMATTERS=0` | All formatter classes, `format_flags` |
+| `TOML_HEADER_ONLY=0` | Switches to compiled mode (link against `toml.cpp`) |
+
+This enables minimal builds for projects that only need, say, programmatic TOML construction and serialization (no parsing).
+
+---
+
+## Related Documentation
+
+- [node-system.md](node-system.md) — Detailed node API reference
+- [tables.md](tables.md) — Table operations in depth
+- [arrays.md](arrays.md) — Array operations in depth
+- [values.md](values.md) — Value template details
+- [parsing.md](parsing.md) — Parser behavior and error handling
+- [formatting.md](formatting.md) — Formatter usage and customization
diff --git a/docs/handbook/tomlplusplus/arrays.md b/docs/handbook/tomlplusplus/arrays.md
new file mode 100644
index 0000000000..3e916392fc
--- /dev/null
+++ b/docs/handbook/tomlplusplus/arrays.md
@@ -0,0 +1,625 @@
+# toml++ — Arrays
+
+## Overview
+
+`toml::array` extends `toml::node` and models a heterogeneous, ordered sequence of TOML nodes. Unlike C++ standard containers that store elements of a single type, a TOML array can contain any mix of value types, sub-tables, and nested arrays.
+
+Declared in `include/toml++/impl/array.hpp` with implementation in `array.inl`.
+
+---
+
+## Internal Storage
+
+```cpp
+class array : public node
+{
+ private:
+ std::vector<impl::node_ptr> elems_;
+ // impl::node_ptr = std::unique_ptr<node>
+};
+```
+
+- Each element is owned via `std::unique_ptr<node>`
+- The array owns all child nodes — destruction cascades
+- Elements can be any `node` subclass: `value<T>`, `table`, or nested `array`
+
+---
+
+## Construction
+
+### Default Construction
+
+```cpp
+toml::array arr; // empty array
+```
+
+### Initializer List Construction
+
+```cpp
+auto arr = toml::array{ 1, 2, 3 }; // array of integers
+auto mixed = toml::array{ 1, "hello", 3.14, true }; // mixed types
+auto nested = toml::array{
+ toml::array{ 1, 2 },
+ toml::array{ 3, 4 }
+};
+
+// Array of tables (array-of-tables syntax in TOML)
+auto aot = toml::array{
+ toml::table{ { "name", "Alice" }, { "age", 30 } },
+ toml::table{ { "name", "Bob" }, { "age", 25 } }
+};
+```
+
+Values are converted to nodes via `impl::make_node()`:
+- `int`, `int64_t` → `value<int64_t>`
+- `double`, `float` → `value<double>`
+- `const char*`, `std::string` → `value<std::string>`
+- `bool` → `value<bool>`
+- `toml::date` → `value<date>`
+- `toml::time` → `value<time>`
+- `toml::date_time` → `value<date_time>`
+- `toml::table` → `table` (moved)
+- `toml::array` → `array` (moved)
+
+### Copy and Move
+
+```cpp
+toml::array copy(original); // deep copy — all elements cloned
+toml::array moved(std::move(arr)); // move — no allocation
+```
+
+Copy is recursive: nested tables and arrays are deep-copied.
+
+---
+
+## Iterators
+
+### Types
+
+```cpp
+using array_iterator = impl::array_iterator<false>;
+using const_array_iterator = impl::array_iterator<true>;
+```
+
+`array_iterator` is a **RandomAccessIterator** (unlike `table_iterator` which is Bidirectional). This means it supports arithmetic, comparison, and random access:
+
+```cpp
+auto it = arr.begin();
+it += 3; // jump forward 3
+ptrdiff_t diff = arr.end() - it; // distance
+bool less = it < arr.end(); // comparison
+```
+
+Dereferencing yields `node&` (the `unique_ptr` is hidden):
+
+```cpp
+for (auto it = arr.begin(); it != arr.end(); ++it)
+{
+ toml::node& elem = *it;
+ std::cout << elem << "\n";
+}
+```
+
+### Iterator Methods
+
+```cpp
+iterator begin() noexcept;
+iterator end() noexcept;
+const_iterator begin() const noexcept;
+const_iterator end() const noexcept;
+const_iterator cbegin() const noexcept;
+const_iterator cend() const noexcept;
+```
+
+### Range-Based For
+
+```cpp
+for (auto& elem : arr)
+{
+ std::cout << elem.type() << ": " << elem << "\n";
+}
+```
+
+---
+
+## Capacity
+
+```cpp
+size_t size() const noexcept; // number of elements
+bool empty() const noexcept; // true if size() == 0
+size_t capacity() const noexcept; // reserved capacity
+size_t max_size() const noexcept; // maximum possible size
+
+void reserve(size_t new_cap); // reserve capacity
+void shrink_to_fit(); // release excess capacity
+```
+
+---
+
+## Element Access
+
+### `operator[]` — Unchecked Index Access
+
+```cpp
+node& operator[](size_t index) noexcept;
+const node& operator[](size_t index) const noexcept;
+```
+
+No bounds checking. UB if `index >= size()`.
+
+```cpp
+auto arr = toml::array{ 10, 20, 30 };
+std::cout << arr[1].value_or(0) << "\n"; // 20
+```
+
+### `at()` — Bounds-Checked Access
+
+```cpp
+node& at(size_t index);
+const node& at(size_t index) const;
+```
+
+Throws `std::out_of_range` if `index >= size()`.
+
+### `front()` / `back()`
+
+```cpp
+node& front() noexcept;
+node& back() noexcept;
+const node& front() const noexcept;
+const node& back() const noexcept;
+```
+
+### `get()` — Pointer Access
+
+```cpp
+node* get(size_t index) noexcept;
+const node* get(size_t index) const noexcept;
+```
+
+Returns `nullptr` if out of bounds (safe alternative to `operator[]`).
+
+### `get_as<T>()` — Typed Pointer Access
+
+```cpp
+template <typename T>
+impl::wrap_node<T>* get_as(size_t index) noexcept;
+```
+
+Returns a typed pointer if the element at `index` exists and matches type `T`:
+
+```cpp
+auto arr = toml::array{ "hello", 42, true };
+
+if (auto* s = arr.get_as<std::string>(0))
+ std::cout << "String: " << s->get() << "\n";
+
+if (auto* i = arr.get_as<int64_t>(1))
+ std::cout << "Integer: " << i->get() << "\n";
+```
+
+---
+
+## Insertion
+
+### `push_back()` — Append
+
+```cpp
+template <typename T>
+decltype(auto) push_back(T&& val, value_flags flags = preserve_source_value_flags);
+```
+
+Appends a new element to the end:
+
+```cpp
+arr.push_back(42);
+arr.push_back("hello");
+arr.push_back(toml::table{ { "key", "value" } });
+arr.push_back(toml::array{ 1, 2, 3 });
+```
+
+Returns a reference to the inserted node.
+
+### `emplace_back<T>()` — Construct at End
+
+```cpp
+template <typename T, typename... Args>
+decltype(auto) emplace_back(Args&&... args);
+```
+
+```cpp
+arr.emplace_back<std::string>("constructed in place");
+arr.emplace_back<int64_t>(42);
+arr.emplace_back<toml::table>(); // empty table
+```
+
+### `insert()` — Insert at Position
+
+```cpp
+template <typename T>
+iterator insert(const_iterator pos, T&& val, value_flags flags = preserve_source_value_flags);
+```
+
+```cpp
+arr.insert(arr.begin(), "first"); // insert at front
+arr.insert(arr.begin() + 2, 42); // insert at index 2
+arr.insert(arr.end(), toml::array{1,2}); // same as push_back
+```
+
+### `emplace()` — Construct at Position
+
+```cpp
+template <typename T, typename... Args>
+iterator emplace(const_iterator pos, Args&&... args);
+```
+
+```cpp
+arr.emplace<std::string>(arr.begin(), "inserted string");
+```
+
+---
+
+## Removal
+
+### `pop_back()`
+
+```cpp
+void pop_back() noexcept;
+```
+
+Removes the last element.
+
+### `erase()` — By Iterator
+
+```cpp
+iterator erase(const_iterator pos) noexcept;
+iterator erase(const_iterator first, const_iterator last) noexcept;
+```
+
+```cpp
+arr.erase(arr.begin()); // remove first
+arr.erase(arr.begin(), arr.begin() + 3); // remove first 3
+```
+
+### `clear()`
+
+```cpp
+void clear() noexcept;
+```
+
+Removes all elements.
+
+### `resize()`
+
+```cpp
+void resize(size_t new_size, T&& default_value, value_flags flags = preserve_source_value_flags);
+```
+
+If `new_size > size()`, appends copies of `default_value`. If `new_size < size()`, truncates.
+
+### `truncate()`
+
+```cpp
+void truncate(size_t new_size);
+```
+
+Removes elements beyond `new_size`. If `new_size >= size()`, does nothing.
+
+---
+
+## Array Transformation
+
+### `flatten()` — Flatten Nested Arrays
+
+```cpp
+array& flatten() &;
+array&& flatten() &&;
+```
+
+Recursively flattens nested arrays into a single-level array:
+
+```cpp
+auto arr = toml::array{
+ 1,
+ toml::array{ 2, 3 },
+ toml::array{ toml::array{ 4, 5 }, 6 }
+};
+
+arr.flatten();
+// arr is now: [1, 2, 3, 4, 5, 6]
+```
+
+Tables within nested arrays are **not** flattened — only arrays are unwrapped.
+
+### `prune()` — Remove Empty Containers
+
+```cpp
+array& prune(bool recursive = true) &;
+array&& prune(bool recursive = true) &&;
+```
+
+Removes empty tables and empty arrays. If `recursive` is true, prunes nested containers first, then removes them if they became empty:
+
+```cpp
+auto arr = toml::array{
+ 1,
+ toml::table{}, // empty table
+ toml::array{}, // empty array
+ toml::array{ toml::table{} } // array containing empty table
+};
+
+arr.prune();
+// arr is now: [1]
+```
+
+---
+
+## Homogeneity
+
+### Checking Type Uniformity
+
+```cpp
+bool is_homogeneous(node_type ntype) const noexcept;
+bool is_homogeneous(node_type ntype, node*& first_nonmatch) noexcept;
+bool is_homogeneous(node_type ntype, const node*& first_nonmatch) const noexcept;
+
+template <typename ElemType = void>
+bool is_homogeneous() const noexcept;
+```
+
+```cpp
+auto ints = toml::array{ 1, 2, 3 };
+auto mixed = toml::array{ 1, "two", 3.0 };
+
+ints.is_homogeneous<int64_t>(); // true
+ints.is_homogeneous<double>(); // false
+ints.is_homogeneous(); // true (all same type)
+
+mixed.is_homogeneous(); // false
+mixed.is_homogeneous(toml::node_type::none); // false
+
+// Find first mismatch:
+toml::node* bad = nullptr;
+mixed.is_homogeneous(toml::node_type::integer, bad);
+// bad points to the "two" string value
+```
+
+**Important:** Empty arrays return `false` for all homogeneity checks — they don't contain "any" type.
+
+### `is_array_of_tables()`
+
+```cpp
+bool is_array_of_tables() const noexcept;
+```
+
+Returns `true` only if the array is non-empty and every element is a `table`:
+
+```cpp
+auto aot = toml::array{
+ toml::table{ { "name", "Alice" } },
+ toml::table{ { "name", "Bob" } }
+};
+std::cout << aot.is_array_of_tables() << "\n"; // true
+
+auto mixed = toml::array{ toml::table{}, 42 };
+std::cout << mixed.is_array_of_tables() << "\n"; // false
+```
+
+---
+
+## `for_each()` — Type-Safe Iteration
+
+```cpp
+template <typename Func>
+array& for_each(Func&& visitor) &;
+template <typename Func>
+array&& for_each(Func&& visitor) &&;
+template <typename Func>
+const array& for_each(Func&& visitor) const&;
+```
+
+Iterates elements, calling the visitor with each element in its concrete type. The visitor can accept:
+- `(auto& element)` — element only
+- `(size_t index, auto& element)` — index + element
+
+```cpp
+auto arr = toml::array{ 1, "two", 3.0, true };
+
+arr.for_each([](size_t idx, auto& elem)
+{
+ using T = std::remove_cvref_t<decltype(elem)>;
+
+ std::cout << "[" << idx << "] ";
+
+ if constexpr (toml::is_integer<T>)
+ std::cout << "int: " << elem.get() << "\n";
+ else if constexpr (toml::is_string<T>)
+ std::cout << "string: " << elem.get() << "\n";
+ else if constexpr (toml::is_floating_point<T>)
+ std::cout << "float: " << elem.get() << "\n";
+ else if constexpr (toml::is_boolean<T>)
+ std::cout << "bool: " << elem.get() << "\n";
+});
+```
+
+Output:
+```
+[0] int: 1
+[1] string: two
+[2] float: 3
+[3] bool: true
+```
+
+### Early Exit (Non-GCC-7)
+
+On supported compilers, returning `bool` from the visitor enables early termination:
+
+```cpp
+arr.for_each([](auto& elem) -> bool
+{
+ if constexpr (toml::is_string<decltype(elem)>)
+ {
+ std::cout << "Found string: " << elem.get() << "\n";
+ return false; // stop
+ }
+ return true; // continue
+});
+```
+
+---
+
+## Array of Tables (TOML `[[syntax]]`)
+
+In TOML, `[[array_name]]` defines an array of tables:
+
+```toml
+[[servers]]
+name = "alpha"
+ip = "10.0.0.1"
+
+[[servers]]
+name = "beta"
+ip = "10.0.0.2"
+```
+
+This parses to a `table` containing key `"servers"` → `array` → `[table, table]`.
+
+Accessing:
+```cpp
+auto tbl = toml::parse(/* above TOML */);
+
+if (auto* servers = tbl["servers"].as_array())
+{
+ for (auto& server_node : *servers)
+ {
+ auto* server = server_node.as_table();
+ if (server)
+ {
+ auto name = (*server)["name"].value_or(""sv);
+ auto ip = (*server)["ip"].value_or(""sv);
+ std::cout << name << " @ " << ip << "\n";
+ }
+ }
+}
+```
+
+Creating programmatically:
+```cpp
+auto tbl = toml::table{
+ { "servers", toml::array{
+ toml::table{ { "name", "alpha" }, { "ip", "10.0.0.1" } },
+ toml::table{ { "name", "beta" }, { "ip", "10.0.0.2" } }
+ }}
+};
+```
+
+---
+
+## Comparison
+
+### Equality
+
+```cpp
+friend bool operator==(const array& lhs, const array& rhs) noexcept;
+friend bool operator!=(const array& lhs, const array& rhs) noexcept;
+```
+
+Deep structural equality: same size, same element types, same values in order.
+
+---
+
+## Printing
+
+Arrays are streamable:
+
+```cpp
+auto arr = toml::array{ 1, 2, 3, "four" };
+std::cout << arr << "\n";
+// Output: [1, 2, 3, "four"]
+```
+
+The output format depends on the formatter. The default `toml_formatter` uses inline array syntax for simple arrays and multiline for arrays of tables.
+
+---
+
+## Type Identity
+
+```cpp
+node_type type() const noexcept final; // returns node_type::array
+bool is_table() const noexcept final; // returns false
+bool is_array() const noexcept final; // returns true
+bool is_value() const noexcept final; // returns false
+// ... all other is_*() return false
+
+array* as_array() noexcept final; // returns this
+const array* as_array() const noexcept final; // returns this
+// ... all other as_*() return nullptr
+```
+
+---
+
+## Complete Example
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+
+int main()
+{
+ // Build an array
+ toml::array fruits;
+ fruits.push_back("apple");
+ fruits.push_back("banana");
+ fruits.push_back("cherry");
+
+ // Insert at position
+ fruits.insert(fruits.begin() + 1, "blueberry");
+
+ // Iterate
+ for (size_t i = 0; i < fruits.size(); i++)
+ {
+ std::cout << i << ": " << fruits[i].value_or(""sv) << "\n";
+ }
+
+ // Check homogeneity
+ std::cout << "All strings? " << fruits.is_homogeneous<std::string>() << "\n";
+
+ // Flatten nested arrays
+ auto nested = toml::array{
+ toml::array{ 1, 2 },
+ toml::array{ 3, toml::array{ 4, 5 } }
+ };
+ nested.flatten();
+ std::cout << "Flattened: " << nested << "\n";
+
+ // Array of tables
+ auto servers = toml::array{
+ toml::table{ { "host", "alpha" }, { "port", 8080 } },
+ toml::table{ { "host", "beta" }, { "port", 8081 } }
+ };
+ std::cout << "Is array of tables? " << servers.is_array_of_tables() << "\n";
+
+ // for_each with type dispatch
+ auto mixed = toml::array{ 42, "hello", 3.14 };
+ mixed.for_each([](auto& elem)
+ {
+ if constexpr (toml::is_integer<decltype(elem)>)
+ std::cout << "int: " << elem.get() << "\n";
+ else if constexpr (toml::is_string<decltype(elem)>)
+ std::cout << "str: " << elem.get() << "\n";
+ else if constexpr (toml::is_floating_point<decltype(elem)>)
+ std::cout << "flt: " << elem.get() << "\n";
+ });
+
+ return 0;
+}
+```
+
+---
+
+## Related Documentation
+
+- [node-system.md](node-system.md) — Base node interface
+- [tables.md](tables.md) — Table container details
+- [values.md](values.md) — Leaf value details
+- [formatting.md](formatting.md) — Array formatting options
diff --git a/docs/handbook/tomlplusplus/basic-usage.md b/docs/handbook/tomlplusplus/basic-usage.md
new file mode 100644
index 0000000000..e11d47c42c
--- /dev/null
+++ b/docs/handbook/tomlplusplus/basic-usage.md
@@ -0,0 +1,705 @@
+# toml++ — Basic Usage
+
+## Including the Library
+
+The simplest way to start using toml++ is with the default header-only mode:
+
+```cpp
+#include <toml++/toml.hpp>
+```
+
+Or with the single-header drop-in:
+
+```cpp
+#include "toml.hpp"
+```
+
+The library places everything in the `toml` namespace. Most examples use the string literal namespace:
+
+```cpp
+using namespace std::string_view_literals; // for "..."sv
+```
+
+---
+
+## Parsing TOML
+
+### Parsing a String
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+
+int main()
+{
+ auto tbl = toml::parse(R"(
+ title = "My Config"
+
+ [database]
+ server = "192.168.1.1"
+ ports = [ 8001, 8001, 8002 ]
+ enabled = true
+ )");
+
+ std::cout << tbl << "\n";
+ return 0;
+}
+```
+
+`toml::parse()` accepts a `std::string_view` and returns a `toml::table` (when exceptions are enabled) or a `toml::parse_result` (when exceptions are disabled).
+
+### Parsing a File
+
+```cpp
+auto tbl = toml::parse_file("config.toml");
+```
+
+`toml::parse_file()` takes a file path as `std::string_view`, opens the file, and parses its contents.
+
+### Parsing from a Stream
+
+```cpp
+#include <fstream>
+
+std::ifstream file("config.toml");
+auto tbl = toml::parse(file, "config.toml");
+```
+
+The second argument is the source path used for error messages and `source()` metadata.
+
+### Parsing with Source Path
+
+You can provide a source path for diagnostic purposes:
+
+```cpp
+auto tbl = toml::parse(toml_string, "my_config.toml");
+```
+
+This path is stored in each node's `source().path` and appears in error messages.
+
+---
+
+## Error Handling
+
+### With Exceptions (Default)
+
+When `TOML_EXCEPTIONS` is enabled (the default if you don't disable them), `toml::parse()` and `toml::parse_file()` throw `toml::parse_error` on failure:
+
+```cpp
+try
+{
+ auto tbl = toml::parse_file("config.toml");
+}
+catch (const toml::parse_error& err)
+{
+ std::cerr << "Parse error: " << err.description() << "\n";
+ std::cerr << " at " << err.source() << "\n";
+ // err.source().begin.line, err.source().begin.column
+}
+```
+
+`toml::parse_error` inherits from `std::runtime_error` in this mode.
+
+### Without Exceptions
+
+When compiled with `TOML_EXCEPTIONS=0` (or with `-fno-exceptions`), `parse()` returns a `toml::parse_result`:
+
+```cpp
+toml::parse_result result = toml::parse_file("config.toml");
+
+if (result)
+{
+ // Success — result implicitly converts to table&
+ toml::table& tbl = result;
+ std::cout << tbl << "\n";
+}
+else
+{
+ // Failure
+ std::cerr << "Parse error: " << result.error().description() << "\n";
+ std::cerr << " at " << result.error().source() << "\n";
+}
+```
+
+`parse_result` is a discriminated union that holds either a `toml::table` or a `toml::parse_error`. It converts to `bool` for success checking.
+
+---
+
+## Accessing Values
+
+### Using `operator[]` — The Easy Way
+
+`operator[]` on a `toml::table` returns a `toml::node_view`, which is a safe optional-like wrapper:
+
+```cpp
+auto tbl = toml::parse(R"(
+ [server]
+ host = "localhost"
+ port = 8080
+ debug = false
+ tags = ["web", "api"]
+)");
+
+// Chained access — never throws, returns empty view if path doesn't exist
+auto host_view = tbl["server"]["host"];
+auto port_view = tbl["server"]["port"];
+
+// Check if the view refers to a node
+if (host_view)
+ std::cout << "Host exists\n";
+```
+
+### Getting Values with `value<T>()`
+
+```cpp
+// Returns std::optional<T>
+std::optional<std::string_view> host = tbl["server"]["host"].value<std::string_view>();
+std::optional<int64_t> port = tbl["server"]["port"].value<int64_t>();
+std::optional<bool> debug = tbl["server"]["debug"].value<bool>();
+
+if (host)
+ std::cout << "Host: " << *host << "\n";
+```
+
+`value<T>()` is permissive — it allows some type conversions (e.g., reading an integer as a double).
+
+### Getting Values with `value_exact<T>()`
+
+```cpp
+// Strict — only returns a value if the types match exactly
+std::optional<int64_t> port = tbl["server"]["port"].value_exact<int64_t>();
+```
+
+`value_exact<T>()` only succeeds if the underlying node is exactly that type. No conversions.
+
+### Getting Values with `value_or()`
+
+The most convenient accessor — returns the value or a default:
+
+```cpp
+std::string_view host = tbl["server"]["host"].value_or("0.0.0.0"sv);
+int64_t port = tbl["server"]["port"].value_or(80);
+bool debug = tbl["server"]["debug"].value_or(true);
+
+// Safe even if the key doesn't exist:
+std::string_view missing = tbl["nonexistent"]["key"].value_or("default"sv);
+```
+
+### Using `as<T>()` for Pointer Access
+
+`as<T>()` returns a pointer to the node if it matches the type, or `nullptr`:
+
+```cpp
+if (auto* str_val = tbl["server"]["host"].as_string())
+ std::cout << "Host: " << str_val->get() << "\n";
+
+if (auto* port_val = tbl["server"]["port"].as_integer())
+ std::cout << "Port: " << port_val->get() << "\n";
+
+// Generic template version:
+if (auto* arr = tbl["server"]["tags"].as<toml::array>())
+ std::cout << "Tags count: " << arr->size() << "\n";
+```
+
+Specific convenience methods exist:
+- `as_table()` → `toml::table*`
+- `as_array()` → `toml::array*`
+- `as_string()` → `toml::value<std::string>*`
+- `as_integer()` → `toml::value<int64_t>*`
+- `as_floating_point()` → `toml::value<double>*`
+- `as_boolean()` → `toml::value<bool>*`
+- `as_date()` → `toml::value<toml::date>*`
+- `as_time()` → `toml::value<toml::time>*`
+- `as_date_time()` → `toml::value<toml::date_time>*`
+
+### Direct Node Access with `get()`
+
+On `toml::table`:
+```cpp
+toml::node* node = tbl.get("server");
+if (node && node->is_table())
+{
+ toml::table& server = *node->as_table();
+ // ...
+}
+```
+
+On `toml::array`:
+```cpp
+auto* arr = tbl["server"]["tags"].as_array();
+if (arr && arr->size() > 0)
+{
+ toml::node& first = (*arr)[0];
+ std::cout << first.value_or(""sv) << "\n";
+}
+```
+
+### Typed get with `get_as<T>()`
+
+```cpp
+// On table — returns pointer if key exists AND matches type
+if (auto* val = tbl.get_as<std::string>("title"))
+ std::cout << "Title: " << val->get() << "\n";
+
+// On array — returns pointer if index is valid AND matches type
+auto* arr = tbl["tags"].as_array();
+if (arr)
+{
+ if (auto* s = arr->get_as<std::string>(0))
+ std::cout << "First tag: " << s->get() << "\n";
+}
+```
+
+---
+
+## Iterating Tables
+
+### Range-based For Loop
+
+```cpp
+auto tbl = toml::parse(R"(
+ a = 1
+ b = "hello"
+ c = true
+)");
+
+for (auto&& [key, value] : tbl)
+{
+ std::cout << key << " = " << value << " (type: " << value.type() << ")\n";
+}
+```
+
+Output:
+```
+a = 1 (type: integer)
+b = "hello" (type: string)
+c = true (type: boolean)
+```
+
+### Using `for_each()`
+
+`for_each()` calls a visitor with each key-value pair. The value is passed as its concrete type:
+
+```cpp
+tbl.for_each([](auto& key, auto& value)
+{
+ std::cout << key << ": ";
+ if constexpr (toml::is_string<decltype(value)>)
+ std::cout << "string = " << value.get() << "\n";
+ else if constexpr (toml::is_integer<decltype(value)>)
+ std::cout << "integer = " << value.get() << "\n";
+ else if constexpr (toml::is_boolean<decltype(value)>)
+ std::cout << "boolean = " << value.get() << "\n";
+ else
+ std::cout << "(other)\n";
+});
+```
+
+---
+
+## Iterating Arrays
+
+### Range-based For Loop
+
+```cpp
+auto tbl = toml::parse(R"(
+ numbers = [1, 2, 3, 4, 5]
+)");
+
+auto& arr = *tbl["numbers"].as_array();
+for (auto& elem : arr)
+{
+ std::cout << elem.value_or(0) << " ";
+}
+// Output: 1 2 3 4 5
+```
+
+### Index-based Access
+
+```cpp
+for (size_t i = 0; i < arr.size(); i++)
+{
+ std::cout << arr[i].value_or(0) << " ";
+}
+```
+
+### Using `for_each()`
+
+```cpp
+arr.for_each([](size_t index, auto& elem)
+{
+ if constexpr (toml::is_integer<decltype(elem)>)
+ std::cout << "[" << index << "] = " << elem.get() << "\n";
+});
+```
+
+---
+
+## Creating TOML Programmatically
+
+### Constructing a Table
+
+```cpp
+auto tbl = toml::table{
+ { "title", "My Application" },
+ { "version", 2 },
+ { "debug", false },
+ { "database", toml::table{
+ { "host", "localhost" },
+ { "port", 5432 }
+ }},
+ { "tags", toml::array{ "web", "api", "rest" } }
+};
+
+std::cout << tbl << "\n";
+```
+
+Output:
+```toml
+title = "My Application"
+version = 2
+debug = false
+tags = ["web", "api", "rest"]
+
+[database]
+host = "localhost"
+port = 5432
+```
+
+### Inserting Values
+
+```cpp
+toml::table config;
+
+// insert() — only inserts if key doesn't exist
+config.insert("name", "MyApp");
+config.insert("name", "Overwritten"); // no-op, key already exists
+
+// insert_or_assign() — inserts or replaces
+config.insert_or_assign("name", "ReplacedApp");
+
+// emplace() — construct in place if key doesn't exist
+config.emplace<std::string>("greeting", "Hello, World!");
+```
+
+### Building Arrays
+
+```cpp
+toml::array arr;
+arr.push_back(1);
+arr.push_back(2);
+arr.push_back(3);
+arr.push_back("mixed types are fine");
+
+// Or construct directly:
+auto arr2 = toml::array{ 10, 20, 30 };
+
+// Emplace:
+arr2.emplace_back<std::string>("hello");
+```
+
+### Creating Date/Time Values
+
+```cpp
+auto tbl = toml::table{
+ { "birthday", toml::date{ 1990, 6, 15 } },
+ { "alarm", toml::time{ 7, 30 } },
+ { "event", toml::date_time{
+ toml::date{ 2024, 12, 25 },
+ toml::time{ 9, 0 },
+ toml::time_offset{ -5, 0 } // EST
+ }}
+};
+
+std::cout << tbl << "\n";
+```
+
+Output:
+```toml
+birthday = 1990-06-15
+alarm = 07:30:00
+event = 2024-12-25T09:00:00-05:00
+```
+
+---
+
+## Modifying Parsed Data
+
+```cpp
+auto tbl = toml::parse(R"(
+ [server]
+ host = "localhost"
+ port = 8080
+)");
+
+// Change a value
+tbl.insert_or_assign("server", toml::table{
+ { "host", "0.0.0.0" },
+ { "port", 443 },
+ { "ssl", true }
+});
+
+// Add a new section
+tbl.insert("logging", toml::table{
+ { "level", "info" },
+ { "file", "/var/log/app.log" }
+});
+
+// Remove a key
+if (auto* server = tbl["server"].as_table())
+ server->erase("ssl");
+
+// Modify array
+tbl.insert("features", toml::array{ "auth", "cache" });
+if (auto* features = tbl["features"].as_array())
+{
+ features->push_back("logging");
+ features->insert(features->begin(), "core");
+}
+
+std::cout << tbl << "\n";
+```
+
+---
+
+## Serialization
+
+### To TOML (Default)
+
+Simply stream a table or use `toml_formatter`:
+
+```cpp
+// These are equivalent:
+std::cout << tbl << "\n";
+std::cout << toml::toml_formatter{ tbl } << "\n";
+```
+
+### To JSON
+
+```cpp
+std::cout << toml::json_formatter{ tbl } << "\n";
+```
+
+### To YAML
+
+```cpp
+std::cout << toml::yaml_formatter{ tbl } << "\n";
+```
+
+### To a String
+
+```cpp
+#include <sstream>
+
+std::ostringstream ss;
+ss << tbl;
+std::string toml_string = ss.str();
+
+// Or as JSON:
+ss.str("");
+ss << toml::json_formatter{ tbl };
+std::string json_string = ss.str();
+```
+
+### To a File
+
+```cpp
+#include <fstream>
+
+std::ofstream file("output.toml");
+file << tbl;
+```
+
+---
+
+## Path-Based Access
+
+### Using `at_path()`
+
+```cpp
+auto tbl = toml::parse(R"(
+ [database]
+ servers = [
+ { host = "alpha", port = 5432 },
+ { host = "beta", port = 5433 }
+ ]
+)");
+
+// Dot-separated path with array indices
+auto host = toml::at_path(tbl, "database.servers[0].host");
+std::cout << host.value_or("unknown"sv) << "\n"; // "alpha"
+
+auto port = toml::at_path(tbl, "database.servers[1].port");
+std::cout << port.value_or(0) << "\n"; // 5433
+```
+
+### Using `toml::path`
+
+```cpp
+toml::path p("database.servers[0].host");
+auto view = tbl[p];
+std::cout << view.value_or("unknown"sv) << "\n";
+
+// Path manipulation
+toml::path parent = p.parent_path(); // "database.servers[0]"
+std::cout << tbl[parent] << "\n"; // { host = "alpha", port = 5432 }
+```
+
+---
+
+## The Visitor Pattern
+
+### Using `visit()`
+
+```cpp
+toml::node& some_node = *tbl.get("title");
+
+some_node.visit([](auto& val)
+{
+ // val is the concrete type: table&, array&, or value<T>&
+ using T = std::remove_cvref_t<decltype(val)>;
+
+ if constexpr (std::is_same_v<T, toml::table>)
+ std::cout << "It's a table\n";
+ else if constexpr (std::is_same_v<T, toml::array>)
+ std::cout << "It's an array\n";
+ else
+ std::cout << "It's a value: " << val.get() << "\n";
+});
+```
+
+### Using `for_each()` on Tables and Arrays
+
+`for_each()` iterates and visits each element with its concrete type:
+
+```cpp
+tbl.for_each([](const toml::key& key, auto& value)
+{
+ std::cout << key << " -> " << value << "\n";
+});
+```
+
+---
+
+## Source Information
+
+Every parsed node tracks where it was defined:
+
+```cpp
+auto tbl = toml::parse_file("config.toml");
+
+if (auto* name = tbl.get("name"))
+{
+ auto& src = name->source();
+ std::cout << "Defined at line " << src.begin.line
+ << ", column " << src.begin.column << "\n";
+
+ if (src.path)
+ std::cout << "In file: " << *src.path << "\n";
+}
+```
+
+---
+
+## Type Checking
+
+```cpp
+toml::node& node = /* some node */;
+
+// Virtual method checks
+if (node.is_string()) { /* ... */ }
+if (node.is_integer()) { /* ... */ }
+if (node.is_table()) { /* ... */ }
+
+// Template check
+if (node.is<double>()) { /* ... */ }
+if (node.is<toml::array>()) { /* ... */ }
+
+// Get the type enum
+switch (node.type())
+{
+ case toml::node_type::string: break;
+ case toml::node_type::integer: break;
+ case toml::node_type::floating_point: break;
+ case toml::node_type::boolean: break;
+ case toml::node_type::date: break;
+ case toml::node_type::time: break;
+ case toml::node_type::date_time: break;
+ case toml::node_type::table: break;
+ case toml::node_type::array: break;
+ default: break;
+}
+```
+
+---
+
+## Complete Example: Config File Reader
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+#include <string_view>
+
+using namespace std::string_view_literals;
+
+int main()
+{
+ toml::table config;
+ try
+ {
+ config = toml::parse_file("app.toml");
+ }
+ catch (const toml::parse_error& err)
+ {
+ std::cerr << "Failed to parse config:\n" << err << "\n";
+ return 1;
+ }
+
+ // Read application settings
+ auto app_name = config["app"]["name"].value_or("Unknown"sv);
+ auto app_version = config["app"]["version"].value_or(1);
+ auto log_level = config["logging"]["level"].value_or("info"sv);
+ auto log_file = config["logging"]["file"].value_or("/tmp/app.log"sv);
+
+ std::cout << "Application: " << app_name << " v" << app_version << "\n";
+ std::cout << "Log level: " << log_level << "\n";
+ std::cout << "Log file: " << log_file << "\n";
+
+ // Read database connections
+ if (auto* dbs = config["databases"].as_array())
+ {
+ for (auto& db_node : *dbs)
+ {
+ if (auto* db = db_node.as_table())
+ {
+ auto host = (*db)["host"].value_or("localhost"sv);
+ auto port = (*db)["port"].value_or(5432);
+ auto name = (*db)["name"].value_or("mydb"sv);
+ std::cout << "DB: " << name << " @ " << host << ":" << port << "\n";
+ }
+ }
+ }
+
+ // Modify and write back
+ config.insert_or_assign("last_run", toml::date_time{
+ toml::date{ 2024, 1, 15 },
+ toml::time{ 14, 30, 0 }
+ });
+
+ std::ofstream out("app.toml");
+ out << config;
+
+ return 0;
+}
+```
+
+---
+
+## Related Documentation
+
+- [node-system.md](node-system.md) — Deep dive into node types and value retrieval
+- [tables.md](tables.md) — Table manipulation details
+- [arrays.md](arrays.md) — Array manipulation details
+- [parsing.md](parsing.md) — Parser internals and error handling
+- [formatting.md](formatting.md) — Serialization customization
+- [path-system.md](path-system.md) — Path-based navigation
diff --git a/docs/handbook/tomlplusplus/building.md b/docs/handbook/tomlplusplus/building.md
new file mode 100644
index 0000000000..c70c76c297
--- /dev/null
+++ b/docs/handbook/tomlplusplus/building.md
@@ -0,0 +1,474 @@
+# toml++ — Building
+
+## Overview
+
+toml++ supports multiple build modes and build systems. It can be consumed as a header-only library, a single-header drop-in, or a compiled static/shared library. The primary build system is Meson, with CMake as a first-class alternative and Visual Studio project files also provided.
+
+---
+
+## Build Modes
+
+### 1. Header-Only Mode (Default)
+
+The simplest way to use toml++. No compilation of the library itself is needed.
+
+**Setup:**
+1. Add `tomlplusplus/include` to your include paths
+2. `#include <toml++/toml.hpp>` in your source files
+3. Compile your project with C++17 or later
+
+This is the default mode. The macro `TOML_HEADER_ONLY` defaults to `1`.
+
+**Advantages:**
+- Zero build configuration
+- No separate library to link
+- Works with any build system
+
+**Disadvantages:**
+- Every translation unit that includes toml++ compiles the full implementation
+- Can increase compile times in large projects
+
+**Example CMake integration:**
+```cmake
+# Add tomlplusplus as a subdirectory or fetch it
+add_subdirectory(external/tomlplusplus)
+target_link_libraries(my_target PRIVATE tomlplusplus::tomlplusplus)
+```
+
+### 2. Single-Header Mode
+
+toml++ ships with a pre-amalgamated single-header file at the repository root: `toml.hpp`.
+
+**Setup:**
+1. Copy `toml.hpp` into your project
+2. `#include "toml.hpp"` in your source files
+3. Done
+
+This file contains the entire library — all headers, all `.inl` implementation files — concatenated into one file. The API is identical to the multi-header version.
+
+### 3. Compiled Library Mode
+
+For projects where compile time matters, toml++ can be built as a compiled library.
+
+**Setup:**
+
+In exactly **one** translation unit, compile `src/toml.cpp`:
+
+```cpp
+// src/toml.cpp — this is the entire compiled-library source
+#ifndef TOML_IMPLEMENTATION
+#define TOML_IMPLEMENTATION
+#endif
+#ifndef TOML_HEADER_ONLY
+#define TOML_HEADER_ONLY 0
+#endif
+
+#include <toml++/toml.hpp>
+```
+
+In all other translation units, define `TOML_HEADER_ONLY=0` before including the header:
+
+```cpp
+#define TOML_HEADER_ONLY 0
+#include <toml++/toml.hpp>
+```
+
+Or set it project-wide via compiler flags:
+```bash
+g++ -DTOML_HEADER_ONLY=0 -I/path/to/tomlplusplus/include ...
+```
+
+**Advantages:**
+- The parser and formatter implementations are compiled once
+- Faster incremental builds
+- Smaller binary size (fewer inlined copies)
+
+**Disadvantages:**
+- Requires linking the compiled translation unit
+- Need to ensure `TOML_HEADER_ONLY=0` is consistent across all TUs
+
+### 4. C++20 Modules Mode
+
+When using CMake 3.28+ and a C++20 compiler:
+
+```cmake
+cmake -DTOMLPLUSPLUS_BUILD_MODULES=ON ..
+```
+
+Then in your source:
+```cpp
+import tomlplusplus;
+```
+
+Module support is experimental and requires:
+- CMake ≥ 3.28
+- A compiler with C++20 module support (recent GCC, Clang, or MSVC)
+
+The module source files are in `src/modules/`.
+
+---
+
+## Meson Build System
+
+Meson is the primary build system for toml++. The project file is `meson.build` at the repository root.
+
+### Project Definition
+
+```meson
+project(
+ 'tomlplusplus',
+ 'cpp',
+ license: 'MIT',
+ version: '3.4.0',
+ meson_version: '>=0.61.0',
+ default_options: [
+ 'buildtype=release',
+ 'default_library=shared',
+ 'b_lto=false',
+ 'b_ndebug=if-release',
+ 'cpp_std=c++17'
+ ]
+)
+```
+
+### Meson Options
+
+Options are defined in `meson_options.txt`:
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `devel` | bool | `false` | Development build (implies `build_tests`, `build_examples`, `pedantic`) |
+| `build_lib` | bool | `false` | Compile as a library (implied by `devel`) |
+| `build_examples` | bool | `false` | Build example programs (implied by `devel`) |
+| `build_tests` | bool | `false` | Build test suite (implied by `devel`) |
+| `build_tt` | bool | `false` | Build toml-test encoder/decoder (implied by `devel`, disabled by `unreleased_features`) |
+| `pedantic` | bool | `false` | Enable maximum compiler warnings (implied by `devel`) |
+| `permissive` | bool | `false` | MSVC `/permissive` mode (default is `/permissive-`) |
+| `time_trace` | bool | `false` | Enable `-ftime-trace` (Clang only) |
+| `unreleased_features` | bool | `false` | Enable `TOML_UNRELEASED_FEATURES=1` |
+| `generate_cmake_config` | bool | `true` | Generate a CMake package config file |
+| `use_vendored_libs` | bool | `true` | Use vendored Catch2 for tests |
+
+### Building with Meson
+
+```bash
+# Configure
+meson setup build
+# Or with options:
+meson setup build -Dbuild_tests=true -Dbuild_examples=true
+
+# Compile
+meson compile -C build
+
+# Run tests
+meson test -C build
+
+# Development build (builds everything, enables warnings)
+meson setup build -Ddevel=true
+```
+
+### Using as a Meson Subproject
+
+Create `subprojects/tomlplusplus.wrap`:
+```ini
+[wrap-git]
+url = https://github.com/marzer/tomlplusplus.git
+revision = v3.4.0
+
+[provide]
+tomlplusplus = tomlplusplus_dep
+```
+
+Then in your `meson.build`:
+```meson
+tomlplusplus_dep = dependency('tomlplusplus', version: '>=3.4.0')
+executable('my_app', 'main.cpp', dependencies: [tomlplusplus_dep])
+```
+
+### Meson Library Target
+
+When `build_lib` is true (or implied), the library is compiled:
+
+```meson
+# In the meson.build, the library creates a tomlplusplus_dep dependency
+# that other targets consume
+```
+
+The compiled library defines:
+- `TOML_HEADER_ONLY=0`
+- `TOML_IMPLEMENTATION`
+
+### Compiler Flag Management
+
+The Meson build applies comprehensive compiler flags based on the detected compiler:
+
+**Common flags:**
+```
+-ferror-limit=5 # Clang: max errors
+-fmax-errors=5 # GCC: max errors
+-fchar8_t # Enable char8_t
+```
+
+**MSVC-specific:**
+```
+/bigobj # Large object file support
+/utf-8 # UTF-8 source encoding
+/Zc:__cplusplus # Correct __cplusplus value
+/Zc:inline # Remove unreferenced COMDAT
+/Zc:externConstexpr # External constexpr linkage
+/Zc:preprocessor # Standards-conforming preprocessor
+```
+
+**Pedantic mode** enables extensive warning flags for both GCC and Clang (`-Weverything`, `-Wcast-align`, `-Wshadow`, etc.) with targeted suppressions for unavoidable warnings (`-Wno-c++98-compat`, `-Wno-padded`, etc.).
+
+---
+
+## CMake Build System
+
+### CMake Project
+
+The `CMakeLists.txt` defines an interface (header-only) library:
+
+```cmake
+cmake_minimum_required(VERSION 3.14)
+
+project(
+ tomlplusplus
+ VERSION 3.4.0
+ DESCRIPTION "Header-only TOML config file parser and serializer for C++17"
+ HOMEPAGE_URL "https://marzer.github.io/tomlplusplus/"
+ LANGUAGES CXX
+)
+
+add_library(tomlplusplus_tomlplusplus INTERFACE)
+add_library(tomlplusplus::tomlplusplus ALIAS tomlplusplus_tomlplusplus)
+
+target_include_directories(
+ tomlplusplus_tomlplusplus
+ INTERFACE
+ "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
+)
+
+target_compile_features(tomlplusplus_tomlplusplus INTERFACE cxx_std_17)
+```
+
+### Using with CMake — Subdirectory
+
+```cmake
+add_subdirectory(path/to/tomlplusplus)
+target_link_libraries(my_target PRIVATE tomlplusplus::tomlplusplus)
+```
+
+### Using with CMake — FetchContent
+
+```cmake
+include(FetchContent)
+FetchContent_Declare(
+ tomlplusplus
+ GIT_REPOSITORY https://github.com/marzer/tomlplusplus.git
+ GIT_TAG v3.4.0
+)
+FetchContent_MakeAvailable(tomlplusplus)
+
+target_link_libraries(my_target PRIVATE tomlplusplus::tomlplusplus)
+```
+
+### Using with CMake — find_package
+
+If toml++ is installed system-wide or the CMake config was generated:
+
+```cmake
+find_package(tomlplusplus REQUIRED)
+target_link_libraries(my_target PRIVATE tomlplusplus::tomlplusplus)
+```
+
+### CMake Options
+
+```cmake
+option(BUILD_EXAMPLES "Build examples tree." OFF)
+option(BUILD_FUZZER "Build fuzzer." OFF)
+option(TOMLPLUSPLUS_BUILD_MODULES "Build C++ modules support" OFF)
+```
+
+### CMake Install
+
+When `tomlplusplus_INSTALL` is true, install rules are included from `cmake/install-rules.cmake`.
+
+---
+
+## Visual Studio
+
+The repository includes Visual Studio project files:
+- `toml++.sln` — Solution file
+- `toml++.vcxproj` — Main project file
+- `toml++.vcxproj.filters` — Filter definition
+- `toml++.props` — Property sheet
+- `toml++.natvis` — Natvis debugger visualizer for TOML node types
+
+Individual examples also have `.vcxproj` files:
+- `examples/simple_parser.vcxproj`
+- `examples/toml_to_json_transcoder.vcxproj`
+- `examples/toml_generator.vcxproj`
+- `examples/error_printer.vcxproj`
+- `examples/parse_benchmark.vcxproj`
+
+---
+
+## Package Managers
+
+### Vcpkg
+
+```bash
+vcpkg install tomlplusplus
+```
+
+### Conan
+
+In your `conanfile.txt`:
+```
+[requires]
+tomlplusplus/3.4.0
+```
+
+### DDS
+
+In your `package.json5`:
+```json5
+depends: [
+ 'tomlpp^3.4.0',
+]
+```
+
+### tipi.build
+
+In `.tipi/deps`:
+```json
+{
+ "marzer/tomlplusplus": {}
+}
+```
+
+---
+
+## Compiler Requirements
+
+### Minimum Versions
+
+| Compiler | Minimum Version |
+|----------|----------------|
+| GCC | 8+ |
+| Clang | 8+ |
+| Apple Clang | Xcode 10+ |
+| MSVC | VS2019 (19.20+) |
+| Intel C++ | ICC 19+, ICL 19+ |
+
+### Required Standard
+
+C++17 is required. The preprocessor enforces this:
+
+```cpp
+#if TOML_CPP < 17
+#error toml++ requires C++17 or higher.
+#endif
+```
+
+### Compiler Feature Detection
+
+The library detects compiler capabilities:
+
+```cpp
+// TOML_CPP — detected C++ standard version (11, 14, 17, 20, 23, 26, 29)
+// TOML_GCC — GCC major version (0 if not GCC)
+// TOML_CLANG — Clang major version (0 if not Clang)
+// TOML_MSVC — MSVC version (0 if not MSVC)
+// TOML_ICC — Intel compiler detection
+// TOML_NVCC — NVIDIA CUDA compiler detection
+// TOML_HAS_CHAR8 — char8_t available
+// TOML_HAS_EXCEPTIONS — exceptions enabled
+```
+
+---
+
+## Configuration Macros Reference
+
+Define these **before** including `<toml++/toml.hpp>`:
+
+### Core Configuration
+
+```cpp
+// Library mode
+#define TOML_HEADER_ONLY 1 // 1 = header-only (default), 0 = compiled
+
+// Feature toggles
+#define TOML_ENABLE_PARSER 1 // 1 = include parser (default), 0 = no parser
+#define TOML_ENABLE_FORMATTERS 1 // 1 = include formatters (default), 0 = no formatters
+
+// Exception handling
+// Auto-detected from compiler settings. Override with:
+#define TOML_EXCEPTIONS 1 // or 0
+
+// Unreleased TOML features
+#define TOML_UNRELEASED_FEATURES 0 // 1 = enable upcoming TOML spec features
+```
+
+### Platform Configuration
+
+```cpp
+// Windows wide-string support (auto-detected on Windows)
+#define TOML_ENABLE_WINDOWS_COMPAT 1
+
+// Custom optional type
+#define TOML_OPTIONAL_TYPE std::optional // or your custom type
+
+// Disable environment checks
+#define TOML_DISABLE_ENVIRONMENT_CHECKS
+```
+
+### Example: Minimal Parse-Only Build
+
+```cpp
+#define TOML_ENABLE_FORMATTERS 0 // Don't need serialization
+#include <toml++/toml.hpp>
+```
+
+### Example: Serialize-Only Build
+
+```cpp
+#define TOML_ENABLE_PARSER 0 // Don't need parsing
+#include <toml++/toml.hpp>
+```
+
+---
+
+## Build Troubleshooting
+
+### Common Issues
+
+**"toml++ requires C++17 or higher"**
+Ensure your compiler is invoked with `-std=c++17` (or later) or the equivalent flag.
+
+**Large object files on MSVC**
+Use `/bigobj` flag (the Meson build adds this automatically).
+
+**Long compile times**
+Switch to compiled library mode (`TOML_HEADER_ONLY=0` + compile `src/toml.cpp`).
+
+**ODR violations when mixing settings**
+Ensure all translation units use the same values for `TOML_EXCEPTIONS`, `TOML_ENABLE_PARSER`, etc. The ABI namespace system catches some mismatches at link time, but not all.
+
+**`char8_t` errors on older compilers**
+Add `-fchar8_t` flag if your compiler supports it, or compile with C++20 mode.
+
+**RTTI disabled**
+toml++ does not require RTTI. It uses virtual dispatch, not `dynamic_cast` or `typeid`.
+
+**Exceptions disabled**
+Set `TOML_EXCEPTIONS=0` or use `-fno-exceptions`. The API adapts: `parse()` returns `parse_result` instead of throwing.
+
+---
+
+## Related Documentation
+
+- [overview.md](overview.md) — Library feature list
+- [basic-usage.md](basic-usage.md) — Getting started with parsing and serialization
+- [testing.md](testing.md) — Running the test suite
diff --git a/docs/handbook/tomlplusplus/code-style.md b/docs/handbook/tomlplusplus/code-style.md
new file mode 100644
index 0000000000..43c16d3ce4
--- /dev/null
+++ b/docs/handbook/tomlplusplus/code-style.md
@@ -0,0 +1,277 @@
+# toml++ — Code Style
+
+## Overview
+
+This document describes the code conventions and formatting rules used in the toml++ project, derived from the `.clang-format` configuration and source code patterns.
+
+---
+
+## Formatting Rules (`.clang-format`)
+
+The project uses clang-format with these key settings:
+
+### Indentation
+
+- **IndentWidth**: 4 (tabs are used, tab width 4)
+- **UseTab**: `ForContinuationAndIndentation`
+- **TabWidth**: 4
+- **ContinuationIndentWidth**: 4
+- **ConstructorInitializerIndentWidth**: 4
+- **AccessModifierOffset**: -4 (access specifiers at class indent level)
+- **IndentCaseLabels**: true
+- **NamespaceIndentation**: All
+
+### Braces
+
+- **BreakBeforeBraces**: Allman style
+ - Functions, classes, structs, enums, namespaces, control statements — all open brace on new line:
+
+```cpp
+namespace toml
+{
+ class node
+ {
+ public:
+ void method()
+ {
+ if (condition)
+ {
+ // ...
+ }
+ }
+ };
+}
+```
+
+### Alignment
+
+- **AlignConsecutiveAssignments**: true
+- **AlignConsecutiveDeclarations**: true
+- **AlignTrailingComments**: true
+- **AlignOperands**: true
+- **AlignAfterOpenBracket**: Align
+
+### Line Length
+
+- **ColumnLimit**: 120
+
+### Other Settings
+
+- **AllowShortFunctionsOnASingleLine**: Empty (empty functions on one line)
+- **AllowShortIfStatementsOnASingleLine**: Never
+- **AllowShortLoopsOnASingleLine**: false
+- **AlwaysBreakTemplateDeclarations**: Yes
+- **BinPackArguments**: false
+- **BinPackParameters**: false
+- **PointerAlignment**: Left (`int* ptr`, not `int *ptr`)
+- **SpaceAfterTemplateKeyword**: true
+- **SortIncludes**: false (manual include ordering)
+
+---
+
+## Naming Conventions
+
+### Macros
+
+All macros use the `TOML_` prefix with `UPPER_SNAKE_CASE`:
+
+```cpp
+TOML_HEADER_ONLY
+TOML_EXCEPTIONS
+TOML_ENABLE_PARSER
+TOML_ENABLE_FORMATTERS
+TOML_ENABLE_WINDOWS_COMPAT
+TOML_UNRELEASED_FEATURES
+TOML_LIB_MAJOR
+TOML_NAMESPACE_START
+TOML_NAMESPACE_END
+TOML_EXPORTED_CLASS
+TOML_EXPORTED_MEMBER_FUNCTION
+TOML_EXPORTED_STATIC_FUNCTION
+TOML_EXPORTED_FREE_FUNCTION
+```
+
+### Namespaces
+
+- Public API: `toml` namespace (aliased from a versioned namespace `toml::vN`)
+- Internal implementation: `toml::impl` (aka `toml::vN::impl`)
+- Macro-managed namespace boundaries:
+
+```cpp
+TOML_NAMESPACE_START // opens toml::v3
+{
+ // public API
+}
+TOML_NAMESPACE_END // closes
+
+TOML_IMPL_NAMESPACE_START // opens toml::v3::impl
+{
+ // internal details
+}
+TOML_IMPL_NAMESPACE_END
+```
+
+### Types and Classes
+
+- `snake_case` for all types: `node`, `table`, `array`, `value`, `path`, `path_component`, `parse_result`, `parse_error`, `source_region`, `source_position`, `date_time`, `time_offset`, `node_view`, `key`
+- Template parameters: `PascalCase` (`ValueType`, `IsConst`, `ViewedType`, `ElemType`)
+
+### Member Variables
+
+- Private members use trailing underscore: `val_`, `flags_`, `elems_`, `map_`, `inline_`, `source_`, `components_`
+- No prefix for public struct fields: `year`, `month`, `day`, `line`, `column`, `begin`, `end`, `path`
+
+### Methods
+
+- `snake_case`: `is_table()`, `as_array()`, `value_or()`, `push_back()`, `emplace_back()`, `is_homogeneous()`, `for_each()`, `parse_file()`, `at_path()`
+
+### Enums
+
+- `snake_case` enum type names: `node_type`, `value_flags`, `format_flags`, `path_component_type`
+- `snake_case` enum values: `node_type::string`, `value_flags::format_as_hexadecimal`, `format_flags::indent_sub_tables`
+
+---
+
+## Header Organization
+
+### File Pairs
+
+Most features have a `.hpp` declaration header and a `.inl` implementation file:
+
+```
+node.hpp / node.inl
+table.hpp / table.inl
+array.hpp / array.inl
+parser.hpp / parser.inl
+formatter.hpp / formatter.inl
+```
+
+### Include Guards
+
+Headers use `#pragma once` (no traditional include guards).
+
+### Header Structure
+
+Typical header layout:
+
+```cpp
+// license header comment
+#pragma once
+
+#include "preprocessor.hpp" // macros and config
+#include "forward_declarations.hpp" // forward declarations
+// ... other includes
+
+// Header-only mode guard
+#if defined(TOML_IMPLEMENTATION) || !TOML_HEADER_ONLY
+
+TOML_NAMESPACE_START
+{
+ // declarations / implementations
+}
+TOML_NAMESPACE_END
+
+#endif // TOML_IMPLEMENTATION
+```
+
+### Export Annotations
+
+Exported symbols use macros for DLL visibility:
+
+```cpp
+TOML_EXPORTED_CLASS table : public node
+{
+ TOML_EXPORTED_MEMBER_FUNCTION void clear() noexcept;
+ TOML_EXPORTED_STATIC_FUNCTION static table parse(...);
+};
+
+TOML_EXPORTED_FREE_FUNCTION parse_result parse(std::string_view);
+```
+
+---
+
+## Preprocessor Conventions
+
+### Compiler Detection
+
+```cpp
+TOML_GCC // GCC
+TOML_CLANG // Clang
+TOML_MSVC // MSVC
+TOML_ICC // Intel C++
+TOML_ICC_CL // Intel C++ (MSVC frontend)
+```
+
+### Feature Detection
+
+```cpp
+TOML_HAS_CHAR8 // char8_t available
+TOML_HAS_CUSTOM_OPTIONAL_TYPE // user-provided optional
+TOML_INT_CHARCONV // charconv for integers
+TOML_FLOAT_CHARCONV // charconv for floats
+```
+
+### Warning Management
+
+Extensive `#pragma` blocks suppress known-benign warnings per compiler:
+
+```cpp
+TOML_PUSH_WARNINGS
+TOML_DISABLE_WARNINGS
+// ... code ...
+TOML_POP_WARNINGS
+
+TOML_DISABLE_ARITHMETIC_WARNINGS
+TOML_DISABLE_SPAM_WARNINGS
+```
+
+---
+
+## Conditional Compilation Patterns
+
+Major features are conditionally compiled:
+
+```cpp
+#if TOML_ENABLE_PARSER
+ // parser code
+#endif
+
+#if TOML_ENABLE_FORMATTERS
+ // formatter code
+#endif
+
+#if TOML_ENABLE_WINDOWS_COMPAT
+ // wchar_t / wstring overloads
+#endif
+
+#if TOML_EXCEPTIONS
+ // exception-based error handling
+#else
+ // return-code error handling
+#endif
+```
+
+---
+
+## Documentation Conventions
+
+- Source comments use `//` style (not `/* */`)
+- Doxygen is used for API documentation (the public `toml.hpp` single-header has `///` comments)
+- Internal implementation headers have minimal comments — the code is expected to be self-documenting
+
+---
+
+## Build System Conventions
+
+- Primary build: **Meson** (`meson.build`, `meson_options.txt`)
+- Secondary: **CMake** (`CMakeLists.txt`)
+- All configuration macros can be set via build system options or via `#define` before including the header
+- Meson option names mirror the macro names: `is_header_only` → `TOML_HEADER_ONLY`
+
+---
+
+## Related Documentation
+
+- [architecture.md](architecture.md) — Project structure and design
+- [building.md](building.md) — Build system details
+- [testing.md](testing.md) — Testing conventions
diff --git a/docs/handbook/tomlplusplus/formatting.md b/docs/handbook/tomlplusplus/formatting.md
new file mode 100644
index 0000000000..d46a433a86
--- /dev/null
+++ b/docs/handbook/tomlplusplus/formatting.md
@@ -0,0 +1,546 @@
+# toml++ — Formatting
+
+## Overview
+
+toml++ includes three formatters for serializing a TOML node tree to text:
+
+| Formatter | Output Format | Header |
+|-----------|--------------|--------|
+| `toml::toml_formatter` | Standard TOML | `toml_formatter.hpp` |
+| `toml::json_formatter` | JSON | `json_formatter.hpp` |
+| `toml::yaml_formatter` | YAML | `yaml_formatter.hpp` |
+
+All three inherit from the internal `impl::formatter` base class and share common indentation and streaming infrastructure.
+
+Formatters can be disabled entirely via `TOML_ENABLE_FORMATTERS=0`.
+
+---
+
+## Base Formatter (`impl::formatter`)
+
+Declared in `include/toml++/impl/formatter.hpp`. Not directly instantiable — used through the concrete subclasses.
+
+### `formatter_constants`
+
+Each formatter defines a set of string constants:
+
+```cpp
+struct formatter_constants
+{
+ format_flags mandatory_flags; // flags always applied
+ format_flags ignored_flags; // flags explicitly not applied
+
+ std::string_view float_pos_inf; // "+inf", "Infinity", ".inf"
+ std::string_view float_neg_inf; // "-inf", "-Infinity", "-.inf"
+ std::string_view float_nan; // "nan", "NaN", ".nan"
+
+ std::string_view bool_true; // "true"
+ std::string_view bool_false; // "false"
+};
+```
+
+### `formatter_config`
+
+```cpp
+struct formatter_config
+{
+ format_flags flags; // active formatting flags
+};
+```
+
+### Internal State
+
+The base class manages:
+- `const node* source_` — the node being formatted
+- `formatter_constants constants_` — string representations
+- `formatter_config config_` — user-supplied configuration
+- `int indent_` — current indentation level
+- `bool naked_newline_` — tracks newline state
+
+Helper methods:
+- `increase_indent()` / `decrease_indent()` — adjust indentation
+- `print_indent()` — emit current indentation
+- `print_newline()` — emit newline with proper tracking
+- `print_string()` — format a TOML string with proper escaping
+- `print_value()` — format a leaf value (delegates to constants for inf/nan/bool)
+
+---
+
+## `format_flags`
+
+Bitmask enum controlling formatting behavior:
+
+```cpp
+enum class format_flags : uint64_t
+{
+ none = 0,
+ quote_dates_and_times = (1ull << 0),
+ quote_infinities_and_nans = (1ull << 1),
+ allow_literal_strings = (1ull << 2),
+ allow_multi_line_strings = (1ull << 3),
+ allow_real_tabs_in_values = (1ull << 4),
+ allow_unicode_strings = (1ull << 5),
+ allow_binary_integers = (1ull << 6),
+ allow_octal_integers = (1ull << 7),
+ allow_hexadecimal_integers = (1ull << 8),
+ indent_sub_tables = (1ull << 9),
+ indent_array_elements = (1ull << 10),
+ indentation = indent_sub_tables | indent_array_elements,
+ relaxed_float_precision = (1ull << 11),
+ terse_key_value_pairs = (1ull << 12),
+};
+```
+
+### Flag Details
+
+| Flag | Effect |
+|------|--------|
+| `quote_dates_and_times` | Emit dates/times as `"2024-01-15"` instead of `2024-01-15` |
+| `quote_infinities_and_nans` | Emit `"inf"` / `"nan"` as quoted strings |
+| `allow_literal_strings` | Use `'single quotes'` where possible |
+| `allow_multi_line_strings` | Use `"""multi-line"""` where appropriate |
+| `allow_real_tabs_in_values` | Emit `\t` as literal tab instead of escape |
+| `allow_unicode_strings` | Keep Unicode characters instead of escaping to `\uXXXX` |
+| `allow_binary_integers` | Emit `0b1010` for binary-flagged integers |
+| `allow_octal_integers` | Emit `0o755` for octal-flagged integers |
+| `allow_hexadecimal_integers` | Emit `0xFF` for hex-flagged integers |
+| `indent_sub_tables` | Indent sub-table content |
+| `indent_array_elements` | Indent array elements on separate lines |
+| `relaxed_float_precision` | Use less precision for floats |
+| `terse_key_value_pairs` | Emit `key=value` instead of `key = value` |
+
+---
+
+## TOML Formatter
+
+### Constants
+
+```cpp
+static constexpr formatter_constants toml_formatter_constants = {
+ // mandatory_flags:
+ format_flags::allow_literal_strings
+ | format_flags::allow_multi_line_strings
+ | format_flags::allow_unicode_strings
+ | format_flags::allow_binary_integers
+ | format_flags::allow_octal_integers
+ | format_flags::allow_hexadecimal_integers,
+
+ // ignored_flags:
+ format_flags::quote_dates_and_times
+ | format_flags::quote_infinities_and_nans,
+
+ // float_pos_inf, float_neg_inf, float_nan:
+ "inf"sv, "-inf"sv, "nan"sv,
+
+ // bool_true, bool_false:
+ "true"sv, "false"sv
+};
+```
+
+### Default Flags
+
+```cpp
+static constexpr format_flags default_flags =
+ format_flags::allow_literal_strings
+ | format_flags::allow_multi_line_strings
+ | format_flags::allow_unicode_strings
+ | format_flags::allow_binary_integers
+ | format_flags::allow_octal_integers
+ | format_flags::allow_hexadecimal_integers
+ | format_flags::indentation;
+```
+
+### Construction
+
+```cpp
+// Format a table (most common)
+toml::toml_formatter fmt{ my_table };
+std::cout << fmt;
+
+// With custom flags
+toml::toml_formatter fmt2{ my_table, format_flags::indent_sub_tables };
+std::cout << fmt2;
+
+// Format any node (array, value, etc.)
+toml::toml_formatter fmt3{ my_array };
+std::cout << fmt3;
+```
+
+### Key Path Tracking
+
+The TOML formatter maintains a `key_path_` to correctly generate fully-qualified section headers:
+
+```cpp
+auto tbl = toml::parse(R"(
+ [server.database]
+ host = "localhost"
+ port = 5432
+)");
+
+std::cout << toml::toml_formatter{ tbl };
+```
+
+Output:
+```toml
+[server.database]
+host = "localhost"
+port = 5432
+```
+
+### Inline Tables
+
+Tables marked as inline are output as `{ key = val, ... }`:
+
+```cpp
+auto tbl = toml::table{
+ { "point", toml::table{ { "x", 1 }, { "y", 2 } } }
+};
+tbl["point"].as_table()->is_inline(true);
+
+std::cout << toml::toml_formatter{ tbl };
+// Output: point = { x = 1, y = 2 }
+```
+
+### Streaming
+
+The default `operator<<` for nodes uses `toml_formatter`:
+
+```cpp
+auto tbl = toml::parse("key = 42");
+std::cout << tbl << "\n";
+// Equivalent to: std::cout << toml::toml_formatter{ tbl } << "\n";
+```
+
+---
+
+## JSON Formatter
+
+### Constants
+
+```cpp
+static constexpr formatter_constants json_formatter_constants = {
+ // mandatory_flags:
+ format_flags::quote_dates_and_times
+ | format_flags::quote_infinities_and_nans,
+
+ // ignored_flags:
+ format_flags::allow_literal_strings
+ | format_flags::allow_multi_line_strings
+ | format_flags::allow_binary_integers
+ | format_flags::allow_octal_integers
+ | format_flags::allow_hexadecimal_integers,
+
+ // float_pos_inf, float_neg_inf, float_nan:
+ "Infinity"sv, "-Infinity"sv, "NaN"sv,
+
+ // bool_true, bool_false:
+ "true"sv, "false"sv
+};
+```
+
+### Key Differences from TOML Formatter
+
+- **Dates and times** are always quoted: `"2024-01-15"` instead of `2024-01-15`
+- **Infinity and NaN** are quoted: `"Infinity"`, `"-Infinity"`, `"NaN"`
+- **Integers** always in decimal (no `0xFF`, `0o777`, `0b1010`)
+- **Object keys** always quoted
+- **No section headers** — uses nested `{ }` structure
+- **Commas** separate elements
+
+### Default Flags
+
+```cpp
+static constexpr format_flags default_flags =
+ format_flags::quote_dates_and_times
+ | format_flags::quote_infinities_and_nans
+ | format_flags::indentation;
+```
+
+### Usage
+
+```cpp
+auto tbl = toml::parse(R"(
+ [server]
+ host = "localhost"
+ port = 8080
+ tags = ["web", "api"]
+)");
+
+std::cout << toml::json_formatter{ tbl } << "\n";
+```
+
+Output:
+```json
+{
+ "server": {
+ "host": "localhost",
+ "port": 8080,
+ "tags": [
+ "web",
+ "api"
+ ]
+ }
+}
+```
+
+### Transcoding Example
+
+From the `examples/toml_to_json_transcoder.cpp`:
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+
+int main(int argc, char** argv)
+{
+ toml::table tbl;
+ try
+ {
+ tbl = toml::parse(std::cin, "stdin"sv);
+ }
+ catch (const toml::parse_error& err)
+ {
+ std::cerr << err << "\n";
+ return 1;
+ }
+
+ std::cout << toml::json_formatter{ tbl } << "\n";
+ return 0;
+}
+```
+
+---
+
+## YAML Formatter
+
+### Constants
+
+```cpp
+static constexpr formatter_constants yaml_formatter_constants = {
+ // mandatory_flags:
+ format_flags::quote_dates_and_times,
+
+ // ignored_flags:
+ format_flags::allow_literal_strings
+ | format_flags::allow_multi_line_strings
+ | format_flags::allow_binary_integers
+ | format_flags::allow_octal_integers
+ | format_flags::allow_hexadecimal_integers,
+
+ // float_pos_inf, float_neg_inf, float_nan:
+ ".inf"sv, "-.inf"sv, ".nan"sv,
+
+ // bool_true, bool_false:
+ "true"sv, "false"sv
+};
+```
+
+### Key Differences
+
+- **Indentation** uses 2 spaces (indent level 1 = 2 spaces)
+- **No braces or brackets** — uses YAML's indentation-based structure
+- **Dates quoted**: `"2024-01-15"`
+- **Inf/NaN**: `.inf`, `-.inf`, `.nan` (YAML style)
+- **Array elements** prefixed with `- `
+- **No commas**
+
+### Default Flags
+
+```cpp
+static constexpr format_flags default_flags =
+ format_flags::quote_dates_and_times
+ | format_flags::allow_unicode_strings
+ | format_flags::indentation;
+```
+
+### Usage
+
+```cpp
+auto tbl = toml::parse(R"(
+ [server]
+ host = "localhost"
+ port = 8080
+ tags = ["web", "api"]
+)");
+
+std::cout << toml::yaml_formatter{ tbl } << "\n";
+```
+
+Output:
+```yaml
+server:
+ host: "localhost"
+ port: 8080
+ tags:
+ - "web"
+ - "api"
+```
+
+---
+
+## Printing Individual Nodes
+
+Any node can be formatted, not just tables:
+
+```cpp
+auto arr = toml::array{ 1, 2, 3, "four" };
+std::cout << toml::toml_formatter{ arr } << "\n";
+// [1, 2, 3, "four"]
+
+std::cout << toml::json_formatter{ arr } << "\n";
+// [1, 2, 3, "four"]
+
+auto val = toml::value<std::string>{ "hello" };
+std::cout << toml::toml_formatter{ val } << "\n";
+// "hello"
+```
+
+---
+
+## Writing to Files
+
+```cpp
+#include <fstream>
+
+auto tbl = toml::parse_file("input.toml");
+
+// Write as TOML
+{
+ std::ofstream out("output.toml");
+ out << toml::toml_formatter{ tbl };
+}
+
+// Write as JSON
+{
+ std::ofstream out("output.json");
+ out << toml::json_formatter{ tbl };
+}
+
+// Write as YAML
+{
+ std::ofstream out("output.yaml");
+ out << toml::yaml_formatter{ tbl };
+}
+```
+
+---
+
+## Customizing Output
+
+### Disabling Indentation
+
+```cpp
+auto fmt = toml::toml_formatter{ tbl, format_flags::none };
+std::cout << fmt;
+```
+
+### Terse Key-Value Pairs
+
+```cpp
+auto fmt = toml::toml_formatter{
+ tbl,
+ format_flags::terse_key_value_pairs | format_flags::indentation
+};
+// Output: key=value instead of key = value
+```
+
+### Preserving Source Format
+
+By default, integer format flags from parsing are preserved. A value parsed from `0xFF` will serialize back as `0xFF`:
+
+```cpp
+auto tbl = toml::parse("mask = 0xFF");
+std::cout << tbl << "\n";
+// Output: mask = 0xFF
+```
+
+This works because the parser sets `value_flags::format_as_hexadecimal` on the value, and the TOML formatter has `allow_hexadecimal_integers` in its mandatory flags.
+
+---
+
+## Formatter Comparison
+
+| Feature | `toml_formatter` | `json_formatter` | `yaml_formatter` |
+|---------|-----------------|-----------------|-----------------|
+| Format | TOML v1.0 | JSON | YAML |
+| Indentation | Tab (default) | 4 spaces | 2 spaces |
+| Infinity | `inf` | `"Infinity"` | `.inf` |
+| NaN | `nan` | `"NaN"` | `.nan` |
+| Dates | Unquoted | Quoted | Quoted |
+| Integer formats | Hex/Oct/Bin | Decimal only | Decimal only |
+| Literal strings | Yes | No | No |
+| Multi-line strings | Yes | No | No |
+| Section headers | `[table]` | N/A | N/A |
+| Inline tables | `{ k = v }` | N/A | N/A |
+
+---
+
+## Complete Example
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+#include <sstream>
+
+int main()
+{
+ auto config = toml::parse(R"(
+ title = "My Config"
+ debug = true
+ max_connections = 0xFF
+
+ [server]
+ host = "localhost"
+ port = 8080
+ started = 2024-01-15T10:30:00Z
+
+ [server.ssl]
+ enabled = true
+ cert = "/etc/ssl/cert.pem"
+
+ [[server.routes]]
+ path = "/"
+ handler = "index"
+
+ [[server.routes]]
+ path = "/api"
+ handler = "api"
+ )");
+
+ // TOML output (the default)
+ std::cout << "=== TOML ===\n";
+ std::cout << config << "\n\n";
+
+ // JSON output
+ std::cout << "=== JSON ===\n";
+ std::cout << toml::json_formatter{ config } << "\n\n";
+
+ // YAML output
+ std::cout << "=== YAML ===\n";
+ std::cout << toml::yaml_formatter{ config } << "\n\n";
+
+ // Terse TOML
+ std::cout << "=== Terse TOML ===\n";
+ std::cout << toml::toml_formatter{
+ config,
+ toml::format_flags::terse_key_value_pairs
+ | toml::format_flags::indentation
+ } << "\n";
+
+ // Format to string
+ std::ostringstream ss;
+ ss << toml::json_formatter{ config };
+ std::string json_string = ss.str();
+
+ return 0;
+}
+```
+
+---
+
+## Related Documentation
+
+- [parsing.md](parsing.md) — Parsing TOML into node trees
+- [values.md](values.md) — Value flags affecting output format
+- [tables.md](tables.md) — Inline table formatting
+- [basic-usage.md](basic-usage.md) — Quick formatting examples
diff --git a/docs/handbook/tomlplusplus/node-system.md b/docs/handbook/tomlplusplus/node-system.md
new file mode 100644
index 0000000000..c34b531385
--- /dev/null
+++ b/docs/handbook/tomlplusplus/node-system.md
@@ -0,0 +1,625 @@
+# toml++ — Node System
+
+## Overview
+
+The node system is the core of toml++'s data model. Every element in a TOML document — tables, arrays, and leaf values — is represented as a `toml::node`. This document covers the base class interface, `node_view` for safe access, type checking mechanisms, value retrieval strategies, and visitation patterns.
+
+---
+
+## `toml::node` — The Base Class
+
+`toml::node` is an abstract base class (`TOML_ABSTRACT_INTERFACE`) declared in `include/toml++/impl/node.hpp`. It cannot be instantiated directly; only its derived classes (`table`, `array`, `value<T>`) can.
+
+### Source Tracking
+
+Every node stores a `source_region` tracking its origin in the parsed document:
+
+```cpp
+class node
+{
+ private:
+ source_region source_{};
+
+ public:
+ const source_region& source() const noexcept;
+};
+```
+
+For programmatically-constructed nodes, `source()` returns a default-constructed region (all zeros). For parsed nodes, it contains the file path and begin/end line/column.
+
+### Lifetime
+
+```cpp
+ protected:
+ node() noexcept;
+ node(const node&) noexcept; // copies source_region
+ node(node&&) noexcept; // moves source_region
+ node& operator=(const node&) noexcept;
+ node& operator=(node&&) noexcept;
+
+ public:
+ virtual ~node() noexcept;
+```
+
+Constructors are protected — you create nodes by constructing `table`, `array`, or `value<T>` objects.
+
+---
+
+## Type Checking
+
+### Virtual Type Checks
+
+Every `node` provides a full set of virtual type-checking methods:
+
+```cpp
+virtual node_type type() const noexcept = 0;
+
+virtual bool is_table() const noexcept = 0;
+virtual bool is_array() const noexcept = 0;
+virtual bool is_array_of_tables() const noexcept;
+virtual bool is_value() const noexcept = 0;
+virtual bool is_string() const noexcept = 0;
+virtual bool is_integer() const noexcept = 0;
+virtual bool is_floating_point() const noexcept = 0;
+virtual bool is_number() const noexcept = 0;
+virtual bool is_boolean() const noexcept = 0;
+virtual bool is_date() const noexcept = 0;
+virtual bool is_time() const noexcept = 0;
+virtual bool is_date_time() const noexcept = 0;
+```
+
+`is_number()` returns `true` for both integers and floating-point values.
+
+`is_array_of_tables()` returns `true` only for arrays where every element is a table.
+
+### Template Type Check: `is<T>()`
+
+```cpp
+template <typename T>
+bool is() const noexcept;
+```
+
+Accepts any TOML node or value type. Uses `if constexpr` internally to dispatch:
+
+```cpp
+node.is<toml::table>() // equivalent to node.is_table()
+node.is<toml::array>() // equivalent to node.is_array()
+node.is<std::string>() // equivalent to node.is_string()
+node.is<int64_t>() // equivalent to node.is_integer()
+node.is<double>() // equivalent to node.is_floating_point()
+node.is<bool>() // equivalent to node.is_boolean()
+node.is<toml::date>() // equivalent to node.is_date()
+node.is<toml::time>() // equivalent to node.is_time()
+node.is<toml::date_time>() // equivalent to node.is_date_time()
+```
+
+You can also use the wrapped `value<T>` type:
+```cpp
+node.is<toml::value<int64_t>>() // same as node.is<int64_t>()
+```
+
+The `impl::unwrap_node<T>` trait unwraps `value<T>` → `T` and `node_view<T>` → `T`.
+
+### Compile-Time Type Traits
+
+The `toml` namespace provides type traits usable with `if constexpr`:
+
+```cpp
+// Type traits for use in generic/template code
+toml::is_table<decltype(val)> // true if val is table or node_view of table
+toml::is_array<decltype(val)> // true if val is array or node_view of array
+toml::is_string<decltype(val)> // true if val is value<std::string>
+toml::is_integer<decltype(val)> // true if val is value<int64_t>
+toml::is_floating_point<decltype(val)> // true if val is value<double>
+toml::is_number<decltype(val)> // integer or floating-point
+toml::is_boolean<decltype(val)> // true if val is value<bool>
+toml::is_date<decltype(val)> // true if val is value<date>
+toml::is_time<decltype(val)> // true if val is value<time>
+toml::is_date_time<decltype(val)> // true if val is value<date_time>
+toml::is_value<T> // true for any native value type
+toml::is_container<T> // true for table or array
+```
+
+These are critical for `for_each()` visitors:
+
+```cpp
+tbl.for_each([](auto& key, auto& value)
+{
+ if constexpr (toml::is_string<decltype(value)>)
+ std::cout << key << " is a string: " << value.get() << "\n";
+ else if constexpr (toml::is_integer<decltype(value)>)
+ std::cout << key << " is an integer: " << value.get() << "\n";
+ else if constexpr (toml::is_table<decltype(value)>)
+ std::cout << key << " is a table with " << value.size() << " entries\n";
+});
+```
+
+### `node_type` Enum
+
+Runtime type identification uses the `node_type` enum:
+
+```cpp
+enum class node_type : uint8_t
+{
+ none, // sentinel / empty
+ table,
+ array,
+ string,
+ integer,
+ floating_point,
+ boolean,
+ date,
+ time,
+ date_time
+};
+```
+
+Usage:
+```cpp
+switch (node.type())
+{
+ case toml::node_type::string: /* ... */ break;
+ case toml::node_type::table: /* ... */ break;
+ // ...
+}
+```
+
+`node_type` is streamable:
+```cpp
+std::cout << node.type() << "\n"; // prints "string", "integer", etc.
+```
+
+---
+
+## Type Casts: `as<T>()` and Friends
+
+### Virtual Cast Methods
+
+```cpp
+// Return pointer if this node IS that type, nullptr otherwise
+virtual table* as_table() noexcept = 0;
+virtual array* as_array() noexcept = 0;
+virtual toml::value<std::string>* as_string() noexcept = 0;
+virtual toml::value<int64_t>* as_integer() noexcept = 0;
+virtual toml::value<double>* as_floating_point() noexcept = 0;
+virtual toml::value<bool>* as_boolean() noexcept = 0;
+virtual toml::value<date>* as_date() noexcept = 0;
+virtual toml::value<time>* as_time() noexcept = 0;
+virtual toml::value<date_time>* as_date_time() noexcept = 0;
+// + const overloads
+```
+
+Each derived class implements these: `table::as_table()` returns `this`, all others return `nullptr`; `value<int64_t>::as_integer()` returns `this`, all others return `nullptr`.
+
+### Template Cast: `as<T>()`
+
+```cpp
+template <typename T>
+impl::wrap_node<T>* as() noexcept;
+
+template <typename T>
+const impl::wrap_node<T>* as() const noexcept;
+```
+
+Dispatches to the appropriate `as_*()` method. `impl::wrap_node<T>` wraps native types in `value<T>`:
+- `as<int64_t>()` → `value<int64_t>*` (via `as_integer()`)
+- `as<toml::table>()` → `table*` (via `as_table()`)
+- `as<toml::value<int64_t>>()` → `value<int64_t>*` (same as above)
+
+Usage:
+```cpp
+if (auto* tbl = node.as<toml::table>())
+ std::cout << "Table with " << tbl->size() << " entries\n";
+
+if (auto* val = node.as<int64_t>())
+ std::cout << "Integer: " << val->get() << "\n";
+```
+
+### Reference Access: `ref<T>()`
+
+```cpp
+template <typename T>
+decltype(auto) ref() & noexcept;
+template <typename T>
+decltype(auto) ref() && noexcept;
+template <typename T>
+decltype(auto) ref() const& noexcept;
+template <typename T>
+decltype(auto) ref() const&& noexcept;
+```
+
+Returns a **direct reference** to the underlying value. Unlike `as<T>()`, this does not return a pointer and does not check the type at runtime — it is **undefined behavior** to call `ref<T>()` with the wrong type. It asserts in debug builds.
+
+```cpp
+// Only safe if you KNOW the type
+int64_t& val = node.ref<int64_t>();
+```
+
+---
+
+## Value Retrieval
+
+### `value<T>()` — Permissive Retrieval
+
+```cpp
+template <typename T>
+optional<T> value() const noexcept(...);
+```
+
+Returns the node's value, allowing type conversions:
+
+| Source Type | Target Type | Behavior |
+|-------------|-------------|----------|
+| `int64_t` | `int64_t` | Exact |
+| `int64_t` | `double` | Converts |
+| `int64_t` | `int32_t` | Converts if lossless (range check) |
+| `int64_t` | `uint32_t` | Converts if lossless (range check) |
+| `double` | `double` | Exact |
+| `double` | `int64_t` | No (returns empty) |
+| `bool` | `bool` | Exact |
+| `bool` | `int64_t` | Converts (0 or 1) |
+| `std::string` | `std::string_view` | Returns view |
+| `std::string` | `std::string` | Returns copy |
+| `date` | `date` | Exact |
+| `time` | `time` | Exact |
+| `date_time` | `date_time` | Exact |
+
+```cpp
+auto tbl = toml::parse("val = 42");
+
+// These all work:
+auto as_i64 = tbl["val"].value<int64_t>(); // 42
+auto as_dbl = tbl["val"].value<double>(); // 42.0
+auto as_i32 = tbl["val"].value<int32_t>(); // 42
+auto as_u16 = tbl["val"].value<uint16_t>(); // 42
+
+// Returns empty:
+auto as_str = tbl["val"].value<std::string>(); // nullopt (int != string)
+```
+
+### `value_exact<T>()` — Strict Retrieval
+
+```cpp
+template <typename T>
+optional<T> value_exact() const noexcept(...);
+```
+
+Only returns a value if the node's native type matches exactly:
+
+```cpp
+auto tbl = toml::parse("val = 42");
+
+auto exact = tbl["val"].value_exact<int64_t>(); // 42
+auto wrong = tbl["val"].value_exact<double>(); // nullopt (42 is integer, not float)
+auto wrong2 = tbl["val"].value_exact<int32_t>(); // nullopt (native type is int64_t)
+```
+
+Allowed target types for `value_exact<T>()`:
+- Native TOML types: `std::string`, `int64_t`, `double`, `bool`, `date`, `time`, `date_time`
+- Lossless representations: `std::string_view`, `const char*` (for strings), `std::wstring` (Windows)
+
+### `value_or()` — Retrieval with Default
+
+```cpp
+template <typename T>
+auto value_or(T&& default_value) const noexcept(...);
+```
+
+Returns the value if the node contains a compatible type, otherwise returns the default. The return type matches the default value's type.
+
+```cpp
+int64_t port = tbl["port"].value_or(8080); // 8080 if missing
+std::string_view host = tbl["host"].value_or("localhost"sv);
+
+// Works safely on missing paths:
+bool flag = tbl["section"]["missing"]["deep"].value_or(false);
+```
+
+---
+
+## `toml::node_view<T>` — Safe Optional Node Access
+
+### Purpose
+
+`node_view` wraps a `node*` (possibly null) and provides the full node interface with null safety. It's what `table::operator[]` returns.
+
+### Template Parameter
+
+```cpp
+template <typename ViewedType>
+class node_view
+{
+ static_assert(impl::is_one_of<ViewedType, toml::node, const toml::node>);
+ // ...
+ mutable viewed_type* node_ = nullptr;
+};
+```
+
+- `node_view<node>` — mutable view
+- `node_view<const node>` — const view
+
+### Construction
+
+```cpp
+node_view() noexcept = default; // empty view
+explicit node_view(viewed_type* node) noexcept; // from pointer
+explicit node_view(viewed_type& node) noexcept; // from reference
+```
+
+### Boolean Conversion
+
+```cpp
+explicit operator bool() const noexcept; // true if node_ != nullptr
+```
+
+### Chained Subscript
+
+The key design feature — subscript returns another `node_view`, enabling safe deep access:
+
+```cpp
+// operator[] on node_view
+node_view operator[](std::string_view key) const noexcept;
+node_view operator[](size_t index) const noexcept;
+node_view operator[](const toml::path& p) const noexcept;
+```
+
+If `node_` is null or isn't the right container type, returns an empty `node_view`. This makes arbitrarily deep access safe:
+
+```cpp
+// All of these are safe even if intermediate keys don't exist:
+auto v1 = tbl["a"]["b"]["c"].value_or(0);
+auto v2 = tbl["missing"]["doesn't"]["exist"].value_or("default"sv);
+```
+
+### Full Interface Mirror
+
+`node_view` provides all the same methods as `node`:
+- Type checks: `is_table()`, `is_string()`, `is<T>()`, etc.
+- Type casts: `as_table()`, `as_string()`, `as<T>()`, etc.
+- Value retrieval: `value<T>()`, `value_exact<T>()`, `value_or()`
+- Source access: `source()`
+- Visitation: `visit()`
+
+All safely return defaults/nullptr/empty-optionals when the view is empty.
+
+### `node()` Accessor
+
+```cpp
+viewed_type* node() const noexcept;
+```
+
+Returns the raw pointer, which may be `nullptr`.
+
+### Printing
+
+```cpp
+friend std::ostream& operator<<(std::ostream& os, const node_view& nv);
+```
+
+Prints the referenced node's value, or nothing if empty.
+
+---
+
+## Homogeneity Checking
+
+### `is_homogeneous()` — Check Element Type Uniformity
+
+Available on `node`, `table`, and `array`:
+
+```cpp
+// With node_type parameter:
+virtual bool is_homogeneous(node_type ntype) const noexcept = 0;
+
+// With out-parameter for first mismatch:
+virtual bool is_homogeneous(node_type ntype, node*& first_nonmatch) noexcept = 0;
+
+// Template version:
+template <typename ElemType = void>
+bool is_homogeneous() const noexcept;
+```
+
+**Behavior:**
+- `node_type::none` → "are all elements the same type?" (any type, as long as consistent)
+- Any specific type → "are all elements this type?"
+- Returns `false` for empty containers
+
+```cpp
+auto arr = toml::array{ 1, 2, 3 };
+
+arr.is_homogeneous(toml::node_type::integer); // true
+arr.is_homogeneous(toml::node_type::string); // false
+arr.is_homogeneous(toml::node_type::none); // true (all same type)
+arr.is_homogeneous<int64_t>(); // true
+arr.is_homogeneous<double>(); // false
+arr.is_homogeneous(); // true (void = any consistent type)
+
+// Find the first mismatch:
+auto mixed = toml::array{ 1, 2, "oops" };
+toml::node* mismatch = nullptr;
+if (!mixed.is_homogeneous(toml::node_type::integer, mismatch))
+{
+ std::cout << "Mismatch at " << mismatch->source() << "\n";
+ std::cout << "Type: " << mismatch->type() << "\n"; // "string"
+}
+```
+
+For `value<T>` nodes, `is_homogeneous()` trivially returns `true` (a single value is always homogeneous with itself).
+
+---
+
+## Visitation with `visit()`
+
+```cpp
+template <typename Func>
+decltype(auto) visit(Func&& visitor) & noexcept(...);
+template <typename Func>
+decltype(auto) visit(Func&& visitor) && noexcept(...);
+template <typename Func>
+decltype(auto) visit(Func&& visitor) const& noexcept(...);
+template <typename Func>
+decltype(auto) visit(Func&& visitor) const&& noexcept(...);
+```
+
+Calls the visitor with the concrete derived type. The visitor must accept all possible types (use a generic lambda or overload set):
+
+```cpp
+node.visit([](auto& concrete)
+{
+ using T = std::remove_cvref_t<decltype(concrete)>;
+
+ if constexpr (std::is_same_v<T, toml::table>)
+ std::cout << "table with " << concrete.size() << " keys\n";
+ else if constexpr (std::is_same_v<T, toml::array>)
+ std::cout << "array with " << concrete.size() << " elements\n";
+ else
+ std::cout << "value: " << concrete.get() << "\n";
+});
+```
+
+The visitor receives one of:
+- `table&` / `const table&`
+- `array&` / `const array&`
+- `value<std::string>&` / `const value<std::string>&`
+- `value<int64_t>&` / `const value<int64_t>&`
+- `value<double>&` / `const value<double>&`
+- `value<bool>&` / `const value<bool>&`
+- `value<date>&` / `const value<date>&`
+- `value<time>&` / `const value<time>&`
+- `value<date_time>&` / `const value<date_time>&`
+
+### Return Values
+
+If your visitor returns a value, `visit()` returns it. All branches must return the same type:
+
+```cpp
+std::string desc = node.visit([](auto& val) -> std::string
+{
+ using T = std::remove_cvref_t<decltype(val)>;
+ if constexpr (std::is_same_v<T, toml::table>)
+ return "table";
+ else if constexpr (std::is_same_v<T, toml::array>)
+ return "array";
+ else
+ return "value";
+});
+```
+
+---
+
+## `for_each()` Iteration
+
+Available on `table` and `array`:
+
+### On Tables
+
+```cpp
+template <typename Func>
+table& for_each(Func&& visitor) &;
+```
+
+The visitor receives `(const toml::key& key, auto& value)` where `value` is the concrete type:
+
+```cpp
+tbl.for_each([](const toml::key& key, auto& value)
+{
+ std::cout << key.str() << " (" << key.source().begin.line << "): ";
+
+ if constexpr (toml::is_string<decltype(value)>)
+ std::cout << '"' << value.get() << "\"\n";
+ else if constexpr (toml::is_integer<decltype(value)>)
+ std::cout << value.get() << "\n";
+ else if constexpr (toml::is_table<decltype(value)>)
+ std::cout << "{...}\n";
+ else
+ std::cout << value << "\n";
+});
+```
+
+### On Arrays
+
+```cpp
+template <typename Func>
+array& for_each(Func&& visitor) &;
+```
+
+The visitor receives `(size_t index, auto& element)` or just `(auto& element)`:
+
+```cpp
+arr.for_each([](size_t idx, auto& elem)
+{
+ std::cout << "[" << idx << "] " << elem << "\n";
+});
+```
+
+### Early Exit
+
+On compilers without the GCC 7 bug (`TOML_RETURN_BOOL_FROM_FOR_EACH_BROKEN == 0`), your visitor can return `bool` to stop iteration early:
+
+```cpp
+tbl.for_each([](const toml::key& key, auto& value) -> bool
+{
+ if (key.str() == "stop_here")
+ return false; // stop iteration
+ std::cout << key << "\n";
+ return true; // continue
+});
+```
+
+---
+
+## `impl::unwrap_node<T>` and `impl::wrap_node<T>`
+
+These internal type traits handle the mapping between user-facing types and internal node types:
+
+- `unwrap_node<value<int64_t>>` → `int64_t`
+- `unwrap_node<int64_t>` → `int64_t` (no change)
+- `unwrap_node<node_view<node>>` → `node` (extracted viewed type)
+- `wrap_node<int64_t>` → `value<int64_t>`
+- `wrap_node<table>` → `table` (no change)
+- `wrap_node<array>` → `array` (no change)
+
+These ensure that `as<int64_t>()` returns `value<int64_t>*` and `as<table>()` returns `table*`.
+
+---
+
+## Node Comparison
+
+Nodes support equality comparison:
+
+```cpp
+// Same type and value → equal
+toml::value<int64_t> a(42);
+toml::value<int64_t> b(42);
+bool eq = (a == b); // true
+
+// Cross-type comparison is always false
+toml::value<int64_t> i(42);
+toml::value<double> d(42.0);
+bool eq2 = (i == d); // false (different node types)
+
+// Tables and arrays compare structurally (deep equality)
+```
+
+---
+
+## Summary: Choosing the Right Accessor
+
+| Need | Method | Returns | Null Safe |
+|------|--------|---------|-----------|
+| Check if key exists | `tbl["key"]` then `operator bool()` | `node_view` | Yes |
+| Get value or default | `value_or(default)` | Value type | Yes |
+| Get value, might be absent | `value<T>()` | `optional<T>` | Yes |
+| Get exact-type value | `value_exact<T>()` | `optional<T>` | Yes |
+| Get typed pointer | `as<T>()` | `T*` or `nullptr` | Yes |
+| Direct reference (unsafe) | `ref<T>()` | `T&` | No (UB if wrong type) |
+| Raw node pointer | `get(key)` | `node*` | Yes (returns null) |
+| Typed node pointer | `get_as<T>(key)` | `wrap_node<T>*` | Yes (returns null) |
+
+---
+
+## Related Documentation
+
+- [architecture.md](architecture.md) — Overall class hierarchy
+- [tables.md](tables.md) — Table-specific operations
+- [arrays.md](arrays.md) — Array-specific operations
+- [values.md](values.md) — Value template details
diff --git a/docs/handbook/tomlplusplus/overview.md b/docs/handbook/tomlplusplus/overview.md
new file mode 100644
index 0000000000..d8ba7df5aa
--- /dev/null
+++ b/docs/handbook/tomlplusplus/overview.md
@@ -0,0 +1,474 @@
+# toml++ (tomlplusplus) — Overview
+
+## What Is toml++?
+
+toml++ is a header-only TOML v1.0 parser, serializer, and data model library for C++17 and later. It is authored by Mark Gillard and published under the MIT license. The library version as of this documentation is **3.4.0**, implementing TOML language specification version **1.0.0**.
+
+The library lives in the `toml` namespace and provides a complete object model for TOML documents: tables, arrays, and typed values. It can parse TOML from strings, streams, and files; manipulate the resulting tree programmatically; and serialize back to TOML, JSON, or YAML.
+
+Repository: [https://github.com/marzer/tomlplusplus](https://github.com/marzer/tomlplusplus)
+
+---
+
+## What Is TOML?
+
+TOML stands for **Tom's Obvious Minimal Language**. It is a configuration file format designed to be easy to read, unambiguous, and map cleanly to a hash table (dictionary). A TOML document is fundamentally a collection of key-value pairs organized into tables.
+
+### TOML Data Types
+
+TOML defines the following native data types:
+
+| TOML Type | C++ Representation in toml++ | `node_type` Enum |
+|-----------------|------------------------------|-------------------------------|
+| String | `std::string` | `node_type::string` |
+| Integer | `int64_t` | `node_type::integer` |
+| Float | `double` | `node_type::floating_point` |
+| Boolean | `bool` | `node_type::boolean` |
+| Local Date | `toml::date` | `node_type::date` |
+| Local Time | `toml::time` | `node_type::time` |
+| Offset Date-Time| `toml::date_time` | `node_type::date_time` |
+| Array | `toml::array` | `node_type::array` |
+| Table | `toml::table` | `node_type::table` |
+
+### Example TOML Document
+
+```toml
+# This is a TOML document
+
+title = "TOML Example"
+
+[owner]
+name = "Tom Preston-Werner"
+dob = 1979-05-27T07:32:00-08:00
+
+[database]
+enabled = true
+ports = [ 8000, 8001, 8002 ]
+data = [ ["delta", "phi"], [3.14] ]
+temp_targets = { cpu = 79.5, case = 72.0 }
+
+[servers]
+
+[servers.alpha]
+ip = "10.0.0.1"
+role = "frontend"
+
+[servers.beta]
+ip = "10.0.0.2"
+role = "backend"
+```
+
+---
+
+## C++17 Features Used
+
+toml++ requires C++17 as its minimum standard. The version detection logic in `preprocessor.hpp` checks `__cplusplus` and `_MSVC_LANG`, rejecting anything below C++17:
+
+```cpp
+#if TOML_CPP < 17
+#error toml++ requires C++17 or higher.
+#endif
+```
+
+Key C++17 features utilized throughout the library:
+
+### `std::string_view`
+Used pervasively for zero-copy string references in parsing, key lookups, path parsing, and formatting. The parser accepts `std::string_view` for document content and source paths.
+
+### `std::optional<T>`
+Returned by value retrieval functions like `node::value<T>()` and `node::value_exact<T>()`. The library also supports a custom optional type via `TOML_HAS_CUSTOM_OPTIONAL_TYPE`.
+
+### `if constexpr`
+Used extensively in template code for compile-time type dispatch. For example, `node::is<T>()` and `node::as<T>()` use `if constexpr` chains to dispatch to the correct type check or cast:
+
+```cpp
+template <typename T>
+bool is() const noexcept
+{
+ using type = impl::remove_cvref<impl::unwrap_node<T>>;
+ if constexpr (std::is_same_v<type, table>)
+ return is_table();
+ else if constexpr (std::is_same_v<type, array>)
+ return is_array();
+ else if constexpr (std::is_same_v<type, std::string>)
+ return is_string();
+ // ...
+}
+```
+
+### Structured Bindings
+Table iteration supports structured bindings:
+
+```cpp
+for (auto&& [key, value] : my_table)
+{
+ std::cout << key << " = " << value << "\n";
+}
+```
+
+### Fold Expressions
+Used in template parameter packs throughout the implementation, such as in `impl::all_integral<>` constraints for date/time constructors.
+
+### `std::variant` / `std::any` Awareness
+While not directly depending on `std::variant`, the library includes `std_variant.hpp` for platforms that need it. The node hierarchy itself is polymorphic (virtual dispatch), not variant-based.
+
+### Inline Variables
+Used for constants like `impl::node_type_friendly_names[]` and `impl::control_char_escapes[]`, which are declared `inline constexpr` in header files.
+
+### Class Template Argument Deduction (CTAD)
+`toml::value` supports CTAD for constructing values from native types without explicitly specifying the template parameter.
+
+---
+
+## C++20 Feature Support
+
+When compiled under C++20 or later, toml++ optionally supports:
+
+- **`char8_t` strings**: When `TOML_HAS_CHAR8` is true, the parser accepts `std::u8string_view` and `std::u8string` inputs. File paths can also be `char8_t`-based.
+- **C++20 Modules**: The library ships with experimental module support via `import tomlplusplus;`. Enabled by setting `TOMLPLUSPLUS_BUILD_MODULES=ON` in CMake (requires CMake 3.28+).
+
+---
+
+## Complete Feature List
+
+### Parsing
+- Parse TOML from `std::string_view`, `std::istream`, or files (`toml::parse()`, `toml::parse_file()`)
+- Full TOML v1.0.0 conformance — passes all tests in the [toml-test](https://github.com/toml-lang/toml-test) suite
+- Optional support for unreleased TOML features (e.g., unicode bare keys from toml/pull/891)
+- Proper UTF-8 handling including BOM detection and skipping
+- Detailed error reporting with source positions (`toml::parse_error`, `toml::source_region`)
+- Works with or without exceptions (`TOML_EXCEPTIONS` macro)
+- Support for `char8_t` strings (C++20)
+- Windows wide string compatibility (`TOML_ENABLE_WINDOWS_COMPAT`)
+
+### Data Model
+- Complete node type hierarchy: `toml::node` (abstract base) → `toml::table`, `toml::array`, `toml::value<T>`
+- `toml::node_view<T>` for safe, optional-like node access with chained subscript operators
+- `toml::key` class preserving source location information for parsed keys
+- Type-safe value access: `node::value<T>()`, `node::value_exact<T>()`, `node::value_or(default)`
+- Template `as<T>()` for casting nodes to concrete types
+- `is<T>()` family for type checking
+- Visitor pattern via `node::visit()` and `for_each()`
+- Homogeneity checking with `is_homogeneous()`
+
+### Manipulation
+- Construct tables and arrays programmatically using initializer lists
+- `table::insert()`, `table::insert_or_assign()`, `table::emplace()`
+- `array::push_back()`, `array::emplace_back()`, `array::insert()`
+- `table::erase()`, `array::erase()`
+- Deep copy via copy constructors
+- All containers are movable
+
+### Navigation
+- `operator[]` subscript chaining: `tbl["section"]["key"]`
+- `toml::at_path()` free function for dot-separated path lookup: `at_path(tbl, "section.key[0]")`
+- `toml::path` class for programmatic path construction and manipulation
+- `path::parent_path()`, `path::leaf()`, `path::subpath()`
+- Path components are either keys (`std::string`) or array indices (`size_t`)
+
+### Serialization
+- `toml::toml_formatter` — serialize as valid TOML (default when streaming nodes)
+- `toml::json_formatter` — serialize as JSON
+- `toml::yaml_formatter` — serialize as YAML
+- All formatters support `format_flags` for fine-grained output control
+- Format flags include: `indent_array_elements`, `indent_sub_tables`, `allow_literal_strings`, `allow_multi_line_strings`, `allow_unicode_strings`, `allow_real_tabs_in_strings`, `allow_binary_integers`, `allow_octal_integers`, `allow_hexadecimal_integers`, `quote_dates_and_times`, `quote_infinities_and_nans`, `terse_key_value_pairs`, `force_multiline_arrays`
+- Inline table detection (`table::is_inline()`)
+- Value flags for controlling integer format representation (`value_flags::format_as_binary`, `format_as_octal`, `format_as_hexadecimal`)
+
+### Build Modes
+- **Header-only** (default): just `#include <toml++/toml.hpp>`
+- **Single-header**: drop `toml.hpp` (root-level amalgamated file) into your project
+- **Compiled library**: define `TOML_HEADER_ONLY=0` and compile `src/toml.cpp`
+- **C++20 Modules**: `import tomlplusplus;`
+
+### Build Systems
+- Meson (primary, with full option support)
+- CMake (interface library target `tomlplusplus::tomlplusplus`)
+- Visual Studio solution files (`.sln`, `.vcxproj`)
+- Package managers: Conan, Vcpkg, DDS, tipi.build
+
+### Compiler Support
+- GCC 8+
+- Clang 8+ (including Apple Clang)
+- MSVC (VS2019+)
+- Intel C++ Compiler (ICC/ICL)
+- NVIDIA CUDA Compiler (NVCC) with workarounds
+
+### Platform Support
+- x86, x64, ARM architectures
+- Windows, Linux, macOS
+- Does not require RTTI
+- Works with or without exceptions
+
+---
+
+## Version Information
+
+Version constants are defined in `include/toml++/impl/version.hpp`:
+
+```cpp
+#define TOML_LIB_MAJOR 3
+#define TOML_LIB_MINOR 4
+#define TOML_LIB_PATCH 0
+
+#define TOML_LANG_MAJOR 1
+#define TOML_LANG_MINOR 0
+#define TOML_LANG_PATCH 0
+```
+
+- `TOML_LIB_MAJOR/MINOR/PATCH` — the library version (3.4.0)
+- `TOML_LANG_MAJOR/MINOR/PATCH` — the TOML specification version implemented (1.0.0)
+
+---
+
+## Configuration Macros
+
+toml++ is heavily configurable via preprocessor macros. Key ones include:
+
+| Macro | Default | Description |
+|-------|---------|-------------|
+| `TOML_HEADER_ONLY` | `1` | When `1`, the library is header-only. Set to `0` for compiled mode. |
+| `TOML_EXCEPTIONS` | auto-detected | Whether to use exceptions. Auto-detected from compiler settings. |
+| `TOML_ENABLE_PARSER` | `1` | Set to `0` to disable the parser entirely (serialization only). |
+| `TOML_ENABLE_FORMATTERS` | `1` | Set to `0` to disable all formatters. |
+| `TOML_ENABLE_WINDOWS_COMPAT` | `1` on Windows | Enables `std::wstring` overloads for Windows. |
+| `TOML_UNRELEASED_FEATURES` | `0` | Enable support for unreleased TOML spec features. |
+| `TOML_HAS_CUSTOM_OPTIONAL_TYPE` | `0` | Define with a custom optional type to use instead of `std::optional`. |
+| `TOML_DISABLE_ENVIRONMENT_CHECKS` | undefined | Define to skip compile-time environment validation. |
+
+### Environment Ground-Truths
+
+The library validates its environment at compile time (unless `TOML_DISABLE_ENVIRONMENT_CHECKS` is defined):
+
+```cpp
+static_assert(CHAR_BIT == 8, TOML_ENV_MESSAGE);
+static_assert('A' == 65, TOML_ENV_MESSAGE); // ASCII
+static_assert(sizeof(double) == 8, TOML_ENV_MESSAGE);
+static_assert(std::numeric_limits<double>::is_iec559, TOML_ENV_MESSAGE); // IEEE 754
+```
+
+These ensure the library operates on platforms with 8-bit bytes, ASCII character encoding, and IEEE 754 double-precision floats.
+
+---
+
+## Namespace Organization
+
+The library uses a layered namespace structure:
+
+- **`toml`** — The root namespace containing all public API types: `table`, `array`, `value<T>`, `node`, `node_view`, `key`, `path`, `date`, `time`, `date_time`, `source_position`, `source_region`, `parse_error`, `parse_result`, etc.
+- **`toml::impl`** (internal) — Implementation details not part of the public API. Contains the parser, formatter base class, iterator implementations, and type trait utilities.
+- **ABI namespaces** — Conditional inline namespaces (e.g., `ex`/`noex` for exception mode, `custopt`/`stdopt` for optional type) ensure ABI compatibility when linking translation units compiled with different settings.
+
+---
+
+## Type Traits and Concepts
+
+The `toml` namespace provides several type trait utilities:
+
+```cpp
+toml::is_value<T> // true for native value types (std::string, int64_t, double, bool, date, time, date_time)
+toml::is_container<T> // true for table and array
+toml::is_string<T> // true if T is a toml::value<std::string> or node_view thereof
+toml::is_integer<T> // true if T is a toml::value<int64_t> or node_view thereof
+toml::is_floating_point<T>
+toml::is_number<T>
+toml::is_boolean<T>
+toml::is_date<T>
+toml::is_time<T>
+toml::is_date_time<T>
+toml::is_table<T>
+toml::is_array<T>
+```
+
+These are usable in `if constexpr` and `static_assert` contexts, making generic TOML-processing code straightforward.
+
+---
+
+## The `node_type` Enumeration
+
+Defined in the forward declarations, `node_type` identifies the kind of a TOML node:
+
+```cpp
+enum class node_type : uint8_t
+{
+ none, // Not a valid node type (used as nil sentinel)
+ table, // toml::table
+ array, // toml::array
+ string, // toml::value<std::string>
+ integer, // toml::value<int64_t>
+ floating_point, // toml::value<double>
+ boolean, // toml::value<bool>
+ date, // toml::value<toml::date>
+ time, // toml::value<toml::time>
+ date_time // toml::value<toml::date_time>
+};
+```
+
+Friendly display names are available via `impl::node_type_friendly_names[]`:
+`"none"`, `"table"`, `"array"`, `"string"`, `"integer"`, `"floating-point"`, `"boolean"`, `"date"`, `"time"`, `"date-time"`.
+
+---
+
+## The `value_flags` Enumeration
+
+Controls how integer values are formatted during serialization:
+
+```cpp
+enum class value_flags : uint16_t
+{
+ none = 0,
+ format_as_binary = 1, // 0b...
+ format_as_octal = 2, // 0o...
+ format_as_hexadecimal = 3 // 0x...
+};
+```
+
+The sentinel value `preserve_source_value_flags` tells the library to keep whatever format the parser originally detected.
+
+---
+
+## The `format_flags` Enumeration
+
+Controls formatter output behavior. It is a bitmask enum:
+
+```cpp
+enum class format_flags : uint64_t
+{
+ none = 0,
+ quote_dates_and_times = 1,
+ quote_infinities_and_nans = 2,
+ allow_literal_strings = 4,
+ allow_multi_line_strings = 8,
+ allow_real_tabs_in_strings = 16,
+ allow_unicode_strings = 32,
+ allow_binary_integers = 64,
+ allow_octal_integers = 128,
+ allow_hexadecimal_integers = 256,
+ indent_sub_tables = 512,
+ indent_array_elements = 1024,
+ indentation = indent_sub_tables | indent_array_elements,
+ terse_key_value_pairs = 2048,
+ force_multiline_arrays = 4096
+};
+```
+
+Each formatter has its own `default_flags` static constant and a set of mandatory/ignored flags defined in `formatter_constants`.
+
+---
+
+## Minimal Usage Example
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+
+int main()
+{
+ // Parse a TOML string
+ auto config = toml::parse(R"(
+ [server]
+ host = "localhost"
+ port = 8080
+ debug = false
+ )");
+
+ // Read values
+ std::string_view host = config["server"]["host"].value_or("0.0.0.0"sv);
+ int64_t port = config["server"]["port"].value_or(80);
+ bool debug = config["server"]["debug"].value_or(true);
+
+ std::cout << "Host: " << host << "\n";
+ std::cout << "Port: " << port << "\n";
+ std::cout << "Debug: " << std::boolalpha << debug << "\n";
+
+ // Serialize back to TOML
+ std::cout << "\n" << config << "\n";
+
+ // Serialize as JSON
+ std::cout << toml::json_formatter{ config } << "\n";
+
+ return 0;
+}
+```
+
+---
+
+## File Organization
+
+```
+tomlplusplus/
+├── toml.hpp # Single-header amalgamation (drop-in)
+├── include/
+│ └── toml++/
+│ ├── toml.hpp # Main include (includes all impl headers)
+│ ├── toml.h # C-compatible header alias
+│ └── impl/
+│ ├── forward_declarations.hpp # All forward decls, type aliases
+│ ├── preprocessor.hpp # Compiler/platform detection, macros
+│ ├── version.hpp # Version constants
+│ ├── node.hpp # toml::node base class
+│ ├── node.inl # node method implementations
+│ ├── node_view.hpp # toml::node_view<T>
+│ ├── table.hpp # toml::table
+│ ├── table.inl # table method implementations
+│ ├── array.hpp # toml::array
+│ ├── array.inl # array method implementations
+│ ├── value.hpp # toml::value<T>
+│ ├── key.hpp # toml::key
+│ ├── date_time.hpp # toml::date, toml::time, toml::date_time
+│ ├── source_region.hpp # source_position, source_region
+│ ├── parser.hpp # toml::parse(), toml::parse_file()
+│ ├── parser.inl # Parser implementation
+│ ├── parse_error.hpp # toml::parse_error
+│ ├── parse_result.hpp # toml::parse_result (no-exceptions mode)
+│ ├── path.hpp # toml::path, toml::path_component
+│ ├── path.inl # Path implementation
+│ ├── at_path.hpp # toml::at_path() free function
+│ ├── at_path.inl # at_path implementation
+│ ├── formatter.hpp # impl::formatter base class
+│ ├── formatter.inl # formatter base implementation
+│ ├── toml_formatter.hpp # toml::toml_formatter
+│ ├── toml_formatter.inl # TOML formatter implementation
+│ ├── json_formatter.hpp # toml::json_formatter
+│ ├── json_formatter.inl # JSON formatter implementation
+│ ├── yaml_formatter.hpp # toml::yaml_formatter
+│ ├── yaml_formatter.inl # YAML formatter implementation
+│ ├── make_node.hpp # impl::make_node() factory
+│ ├── print_to_stream.hpp/.inl # Stream output helpers
+│ ├── unicode.hpp # Unicode utilities
+│ ├── unicode.inl # UTF-8 decoder
+│ ├── unicode_autogenerated.hpp # Auto-generated Unicode tables
+│ └── std_*.hpp # Standard library includes
+├── src/
+│ └── toml.cpp # Compiled-library translation unit
+├── tests/ # Catch2-based test suite
+├── examples/ # Example programs
+├── tools/ # Build/generation tools
+├── meson.build # Primary build system
+├── CMakeLists.txt # CMake build system
+└── toml-test/ # toml-test conformance suite integration
+```
+
+---
+
+## License
+
+toml++ is licensed under the **MIT License**. See `LICENSE` in the repository root for the full text.
+
+---
+
+## Related Documentation
+
+- [architecture.md](architecture.md) — Class hierarchy and internal design
+- [building.md](building.md) — Build instructions and integration
+- [basic-usage.md](basic-usage.md) — Common usage patterns
+- [node-system.md](node-system.md) — The node type system in depth
+- [tables.md](tables.md) — Working with toml::table
+- [arrays.md](arrays.md) — Working with toml::array
+- [values.md](values.md) — Working with toml::value<T>
+- [parsing.md](parsing.md) — Parser internals and error handling
+- [formatting.md](formatting.md) — Output formatters
+- [path-system.md](path-system.md) — Path-based navigation
+- [unicode-handling.md](unicode-handling.md) — UTF-8 and Unicode support
+- [code-style.md](code-style.md) — Code conventions
+- [testing.md](testing.md) — Test framework and conformance
diff --git a/docs/handbook/tomlplusplus/parsing.md b/docs/handbook/tomlplusplus/parsing.md
new file mode 100644
index 0000000000..ac97ba89e8
--- /dev/null
+++ b/docs/handbook/tomlplusplus/parsing.md
@@ -0,0 +1,494 @@
+# toml++ — Parsing
+
+## Overview
+
+toml++ provides a recursive-descent parser that converts TOML text into a `toml::table` tree of nodes. The parser handles the full TOML v1.0.0 specification, including all string types, numeric formats, date/time values, inline tables, and arrays of tables.
+
+Key entry points are `toml::parse()` and `toml::parse_file()`, declared in `include/toml++/impl/parser.hpp`.
+
+The parser can be disabled entirely via `TOML_ENABLE_PARSER=0`.
+
+---
+
+## Entry Points
+
+### `parse()` — Parse from String
+
+```cpp
+// From std::string_view (most common)
+parse_result parse(std::string_view doc, std::string_view source_path = {});
+parse_result parse(std::string_view doc, std::string&& source_path);
+
+// From std::istream
+parse_result parse(std::istream& doc, std::string_view source_path = {});
+parse_result parse(std::istream& doc, std::string&& source_path);
+
+// From char8_t (C++20 u8 strings)
+parse_result parse(std::u8string_view doc, std::string_view source_path = {});
+```
+
+The `source_path` parameter is stored in `source_region` data and appears in error messages. It does not affect parsing behavior.
+
+```cpp
+auto result = toml::parse(R"(
+ [server]
+ host = "localhost"
+ port = 8080
+)");
+
+// With source path for error messages:
+auto result2 = toml::parse(toml_string, "config.toml");
+```
+
+### `parse_file()` — Parse from File Path
+
+```cpp
+parse_result parse_file(std::string_view file_path);
+
+// Windows wstring overload (when TOML_ENABLE_WINDOWS_COMPAT=1)
+parse_result parse_file(std::wstring_view file_path);
+```
+
+Reads the entire file and parses it:
+
+```cpp
+auto config = toml::parse_file("settings.toml");
+auto config2 = toml::parse_file("/etc/myapp/config.toml");
+```
+
+---
+
+## parse_result
+
+`parse_result` is the return type from all parse functions. Its behavior depends on whether exceptions are enabled.
+
+Declared in `include/toml++/impl/parse_result.hpp`.
+
+### With Exceptions (`TOML_EXCEPTIONS=1`, the default)
+
+`parse_result` is simply a type alias for `toml::table`:
+
+```cpp
+// Effectively:
+using parse_result = table;
+```
+
+If parsing fails, `toml::parse_error` (derived from `std::runtime_error`) is thrown:
+
+```cpp
+try
+{
+ toml::table config = toml::parse("invalid [[[toml");
+}
+catch (const toml::parse_error& err)
+{
+ std::cerr << "Parse error: " << err.description() << "\n";
+ std::cerr << " at: " << err.source() << "\n";
+}
+```
+
+### Without Exceptions (`TOML_EXCEPTIONS=0`)
+
+`parse_result` is a discriminated union — it holds either a `toml::table` (success) or a `toml::parse_error` (failure):
+
+```cpp
+class parse_result
+{
+ public:
+ // Check success
+ bool succeeded() const noexcept;
+ bool failed() const noexcept;
+ explicit operator bool() const noexcept; // same as succeeded()
+
+ // Access the table (success)
+ table& get() & noexcept;
+ const table& get() const& noexcept;
+ table&& get() && noexcept;
+
+ table& operator*() & noexcept;
+ table* operator->() noexcept;
+
+ // Access the error (failure)
+ const parse_error& error() const& noexcept;
+
+ // Implicit conversion to table& (success only)
+ operator table&() noexcept;
+ operator const table&() const noexcept;
+};
+```
+
+Usage pattern:
+
+```cpp
+auto result = toml::parse("...");
+
+if (result)
+{
+ // Success — use the table
+ toml::table& config = result;
+ auto val = config["key"].value_or("default"sv);
+}
+else
+{
+ // Failure — inspect the error
+ std::cerr << "Error: " << result.error().description() << "\n";
+ std::cerr << " at " << result.error().source() << "\n";
+}
+```
+
+### Internal Storage (No-Exceptions Mode)
+
+`parse_result` uses aligned storage to hold either type:
+
+```cpp
+// Simplified internal layout:
+alignas(table) mutable unsigned char storage_[sizeof(table)];
+bool err_;
+parse_error err_val_; // only when err_ == true
+```
+
+The `table` is placement-new'd into `storage_` on success. On failure, `err_val_` is populated instead.
+
+---
+
+## parse_error
+
+Declared in `include/toml++/impl/parse_error.hpp`.
+
+### With Exceptions
+
+```cpp
+class parse_error : public std::runtime_error
+{
+ public:
+ std::string_view description() const noexcept;
+ const source_region& source() const noexcept;
+
+ // what() returns the description
+};
+```
+
+### Without Exceptions
+
+```cpp
+class parse_error
+{
+ public:
+ std::string_view description() const noexcept;
+ const source_region& source() const noexcept;
+
+ // Streaming
+ friend std::ostream& operator<<(std::ostream&, const parse_error&);
+};
+```
+
+### Error Information
+
+```cpp
+auto result = toml::parse("x = [1, 2,, 3]", "test.toml");
+
+if (!result)
+{
+ const auto& err = result.error();
+
+ // Human-readable description
+ std::cout << err.description() << "\n";
+ // e.g., "Unexpected character ',' (U+002C) in array"
+
+ // Source location
+ const auto& src = err.source();
+ std::cout << "File: " << *src.path << "\n"; // "test.toml"
+ std::cout << "Line: " << src.begin.line << "\n"; // 1
+ std::cout << "Col: " << src.begin.column << "\n"; // 11
+
+ // Full error with location (via operator<<)
+ std::cout << err << "\n";
+ // "Unexpected character ',' ... at line 1, column 11 of 'test.toml'"
+}
+```
+
+---
+
+## Source Tracking
+
+### `source_position`
+
+```cpp
+struct source_position
+{
+ source_index line; // 1-based line number
+ source_index column; // 1-based column number (byte offset, not codepoint)
+
+ explicit operator bool() const noexcept; // true if line > 0
+
+ friend bool operator==(const source_position&, const source_position&) noexcept;
+ friend bool operator!=(const source_position&, const source_position&) noexcept;
+ friend bool operator< (const source_position&, const source_position&) noexcept;
+ friend bool operator<=(const source_position&, const source_position&) noexcept;
+};
+```
+
+`source_index` is typically `uint32_t` (or `uint16_t` on small builds via `TOML_SMALL_INT_TYPE`).
+
+### `source_region`
+
+```cpp
+struct source_region
+{
+ source_position begin; // start of the element
+ source_position end; // end of the element
+ source_path_ptr path; // shared_ptr<const std::string>
+};
+```
+
+Every `node` in the parsed tree carries a `source_region`:
+
+```cpp
+auto tbl = toml::parse(R"(
+ name = "Alice"
+ age = 30
+)", "config.toml");
+
+const auto& src = tbl["name"].node()->source();
+std::cout << "Defined at "
+ << *src.path << ":"
+ << src.begin.line << ":"
+ << src.begin.column << "\n";
+// "Defined at config.toml:2:5"
+```
+
+`source_path_ptr` is `std::shared_ptr<const std::string>`, shared among all nodes parsed from the same file.
+
+---
+
+## Stream Parsing
+
+Parsing from `std::istream` allows reading from any stream source:
+
+```cpp
+#include <fstream>
+
+std::ifstream file("config.toml");
+auto config = toml::parse(file, "config.toml");
+
+// From stringstream
+std::istringstream ss(R"(key = "value")");
+auto tbl = toml::parse(ss);
+```
+
+The stream is consumed entirely during parsing.
+
+---
+
+## UTF-8 Handling During Parsing
+
+The parser expects UTF-8 encoded input. It handles:
+
+- **BOM stripping**: A leading UTF-8 BOM (`0xEF 0xBB 0xBF`) is silently consumed
+- **Multi-byte sequences**: Bare keys, strings, and comments can contain Unicode
+- **Escape sequences in strings**: `\uXXXX` and `\UXXXXXXXX` are decoded
+- **Validation**: Invalid UTF-8 sequences are rejected with parse errors
+- **Non-characters and surrogates**: Rejected as per the TOML specification
+
+### char8_t Support (C++20)
+
+```cpp
+auto tbl = toml::parse(u8R"(
+ greeting = "Hello, 世界"
+)"sv);
+```
+
+The `char8_t` overloads allow passing C++20 UTF-8 string literals directly.
+
+---
+
+## Windows Compatibility
+
+When `TOML_ENABLE_WINDOWS_COMPAT=1` (default on Windows):
+
+```cpp
+// Accept wstring file paths (converted to UTF-8 internally)
+auto config = toml::parse_file(L"C:\\config\\settings.toml");
+
+// Wide string values can be used with value()
+auto path = config["path"].value<std::wstring>();
+```
+
+---
+
+## Parser Configuration Macros
+
+| Macro | Default | Effect |
+|-------|---------|--------|
+| `TOML_ENABLE_PARSER` | `1` | Set to `0` to remove the parser entirely (serialize-only builds) |
+| `TOML_EXCEPTIONS` | Auto-detected | Controls exception vs. return-code error handling |
+| `TOML_UNRELEASED_FEATURES` | `0` | Enable parsing of TOML features not yet in a released spec |
+| `TOML_ENABLE_WINDOWS_COMPAT` | `1` on Windows | Enable wstring/wchar_t overloads |
+
+---
+
+## Parsing Specific TOML Features
+
+### Strings
+
+```toml
+basic = "hello\nworld" # basic (escape sequences)
+literal = 'no \escapes' # literal (no escapes)
+multi_basic = """
+multiline
+string""" # multi-line basic
+multi_literal = '''
+multiline
+literal''' # multi-line literal
+```
+
+### Numbers
+
+```toml
+int_dec = 42
+int_hex = 0xFF
+int_oct = 0o755
+int_bin = 0b11010110
+float_std = 3.14
+float_exp = 1e10
+float_inf = inf
+float_nan = nan
+underscore = 1_000_000
+```
+
+The parser records the integer format in `value_flags`:
+- `0xFF` → `value_flags::format_as_hexadecimal`
+- `0o755` → `value_flags::format_as_octal`
+- `0b1010` → `value_flags::format_as_binary`
+
+### Inline Tables
+
+```toml
+point = { x = 1, y = 2 } # single-line inline table
+```
+
+Parsed as a `toml::table` with `is_inline()` returning `true`.
+
+### Arrays of Tables
+
+```toml
+[[products]]
+name = "Hammer"
+price = 9.99
+
+[[products]]
+name = "Nail"
+price = 0.05
+```
+
+Parsed as `table["products"]` → `array` containing two `table` nodes.
+
+---
+
+## Error Recovery
+
+The parser does **not** attempt error recovery. Upon encountering the first error, parsing stops and the error is returned (or thrown). This design ensures:
+
+1. No partially-parsed trees with missing data
+2. Clear, unambiguous error messages
+3. The error source points to the exact location of the problem
+
+---
+
+## Thread Safety
+
+- Parsing is **thread-safe**: multiple threads can call `parse()` concurrently with different inputs
+- The parser uses no global state
+- The returned `parse_result` / `table` is independent and owned by the caller
+
+---
+
+## Complete Example
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+#include <fstream>
+
+int main()
+{
+ // --- Parse from string ---
+ auto config = toml::parse(R"(
+ [database]
+ server = "192.168.1.1"
+ ports = [8001, 8001, 8002]
+ enabled = true
+ connection_max = 5000
+
+ [database.credentials]
+ username = "admin"
+ password_file = "/etc/db/pass"
+ )");
+
+#if TOML_EXCEPTIONS
+ // With exceptions, config is a toml::table directly
+ auto server = config["database"]["server"].value_or(""sv);
+ std::cout << "Server: " << server << "\n";
+
+#else
+ // Without exceptions, check for success first
+ if (!config)
+ {
+ std::cerr << "Parse failed:\n" << config.error() << "\n";
+ return 1;
+ }
+
+ auto& tbl = config.get();
+ auto server = tbl["database"]["server"].value_or(""sv);
+ std::cout << "Server: " << server << "\n";
+#endif
+
+ // --- Parse from file ---
+ try
+ {
+ auto file_config = toml::parse_file("app.toml");
+ std::cout << file_config << "\n";
+ }
+ catch (const toml::parse_error& err)
+ {
+ std::cerr << "Failed to parse app.toml:\n"
+ << err.description() << "\n"
+ << " at " << err.source() << "\n";
+ return 1;
+ }
+
+ // --- Source tracking ---
+ auto doc = toml::parse(R"(
+ title = "Example"
+ [owner]
+ name = "Tom"
+ )", "example.toml");
+
+ auto* name_node = doc.get("owner");
+ if (name_node)
+ {
+ const auto& src = name_node->source();
+ std::cout << "owner defined at: "
+ << *src.path << ":"
+ << src.begin.line << ":"
+ << src.begin.column << "\n";
+ }
+
+ // --- Stream parsing ---
+ std::istringstream ss(R"(key = "from stream")");
+ auto stream_result = toml::parse(ss, "stream-input");
+ std::cout << stream_result["key"].value_or(""sv) << "\n";
+
+ return 0;
+}
+```
+
+---
+
+## Related Documentation
+
+- [basic-usage.md](basic-usage.md) — Quick start guide with parsing examples
+- [values.md](values.md) — Value types created by the parser
+- [tables.md](tables.md) — Root table structure
+- [formatting.md](formatting.md) — Serializing parsed data back to TOML
+- [unicode-handling.md](unicode-handling.md) — UTF-8 handling details
diff --git a/docs/handbook/tomlplusplus/path-system.md b/docs/handbook/tomlplusplus/path-system.md
new file mode 100644
index 0000000000..d9ed1a6262
--- /dev/null
+++ b/docs/handbook/tomlplusplus/path-system.md
@@ -0,0 +1,412 @@
+# toml++ — Path System
+
+## Overview
+
+`toml::path` provides structured navigation into a TOML document tree using dot-separated key names and array indices. Rather than chaining `operator[]` calls, a path can be constructed from a string and applied to a node tree in a single operation.
+
+Declared in `include/toml++/impl/path.hpp` with `at_path()` free functions in `at_path.hpp`.
+
+---
+
+## `path_component`
+
+Each segment of a path is a `path_component`, which is either a **key** (string) or an **array index** (integer):
+
+```cpp
+class path_component
+{
+ public:
+ // Type query
+ path_component_type type() const noexcept;
+ // Returns path_component_type::key or path_component_type::array_index
+
+ // Access (key)
+ const toml::key& key() const noexcept;
+
+ // Access (array index)
+ size_t array_index() const noexcept;
+
+ // Comparison
+ friend bool operator==(const path_component&, const path_component&) noexcept;
+ friend bool operator!=(const path_component&, const path_component&) noexcept;
+};
+```
+
+```cpp
+enum class path_component_type : uint8_t
+{
+ key = 0x1,
+ array_index = 0x2
+};
+```
+
+### Internal Storage
+
+`path_component` uses a union with type discrimination:
+
+```cpp
+// Simplified internal layout:
+union storage_t
+{
+ toml::key k; // for key components
+ size_t index; // for array_index components
+};
+
+path_component_type type_;
+storage_t storage_;
+```
+
+---
+
+## `toml::path`
+
+A path is a sequence of `path_component` values:
+
+```cpp
+class path
+{
+ private:
+ std::vector<path_component> components_;
+};
+```
+
+### Construction
+
+#### From String
+
+```cpp
+path(std::string_view str);
+path(std::wstring_view str); // Windows compat
+```
+
+Path string syntax:
+- Dot `.` separates keys: `"server.host"` → key("server"), key("host")
+- Brackets `[N]` denote array indices: `"servers[0].host"` → key("servers"), index(0), key("host")
+- Quoted keys for special chars: `"a.\"dotted.key\".b"`
+
+```cpp
+toml::path p1("server.host"); // 2 components: key, key
+toml::path p2("servers[0].name"); // 3 components: key, index, key
+toml::path p3("[0][1]"); // 2 components: index, index
+toml::path p4("database.\"dotted.key\""); // 2 components
+```
+
+#### From Components
+
+```cpp
+path(); // empty path
+path(const path& other); // copy
+path(path&& other) noexcept; // move
+```
+
+### Size and Emptiness
+
+```cpp
+size_t size() const noexcept; // number of components
+bool empty() const noexcept; // true if no components
+
+explicit operator bool() const noexcept; // true if non-empty
+
+void clear() noexcept; // remove all components
+```
+
+### Element Access
+
+```cpp
+path_component& operator[](size_t index) noexcept;
+const path_component& operator[](size_t index) const noexcept;
+
+// Iterator support
+auto begin() noexcept;
+auto end() noexcept;
+auto begin() const noexcept;
+auto end() const noexcept;
+auto cbegin() const noexcept;
+auto cend() const noexcept;
+```
+
+```cpp
+toml::path p("server.ports[0]");
+
+for (const auto& component : p)
+{
+ if (component.type() == toml::path_component_type::key)
+ std::cout << "key: " << component.key() << "\n";
+ else
+ std::cout << "index: " << component.array_index() << "\n";
+}
+// key: server
+// key: ports
+// index: 0
+```
+
+---
+
+## Path Operations
+
+### Subpath Extraction
+
+```cpp
+path parent_path() const; // all but last component
+path leaf() const; // last component only
+
+path subpath(size_t start, size_t length) const;
+path subpath(std::vector<path_component>::const_iterator start,
+ std::vector<path_component>::const_iterator end) const;
+
+path truncated(size_t n) const; // first n components
+```
+
+```cpp
+toml::path p("a.b.c.d");
+
+auto parent = p.parent_path(); // "a.b.c"
+auto leaf = p.leaf(); // "d"
+auto sub = p.subpath(1, 2); // "b.c"
+auto trunc = p.truncated(2); // "a.b"
+```
+
+### Concatenation
+
+```cpp
+path operator+(const path& rhs) const;
+path operator+(const path_component& rhs) const;
+path operator+(std::string_view rhs) const;
+
+path& operator+=(const path& rhs);
+path& operator+=(const path_component& rhs);
+path& operator+=(std::string_view rhs);
+
+// Prepend
+path& prepend(const path& source);
+path& prepend(path&& source);
+```
+
+```cpp
+toml::path base("server");
+toml::path full = base + "host";
+// full == "server.host"
+
+toml::path p("a.b");
+p += "c.d";
+// p == "a.b.c.d"
+```
+
+### Assignment
+
+```cpp
+path& assign(std::string_view str);
+path& assign(const path& other);
+path& assign(path&& other) noexcept;
+
+path& operator=(std::string_view str);
+path& operator=(const path& other);
+path& operator=(path&& other) noexcept;
+```
+
+---
+
+## Comparison
+
+```cpp
+friend bool operator==(const path& lhs, const path& rhs) noexcept;
+friend bool operator!=(const path& lhs, const path& rhs) noexcept;
+
+friend bool operator==(const path& lhs, std::string_view rhs);
+friend bool operator!=(const path& lhs, std::string_view rhs);
+```
+
+```cpp
+toml::path a("server.host");
+toml::path b("server.host");
+
+std::cout << (a == b) << "\n"; // true
+std::cout << (a == "server.host") << "\n"; // true
+```
+
+---
+
+## Hashing
+
+```cpp
+size_t hash() const noexcept;
+
+// std::hash specialization
+namespace std {
+ template<> struct hash<toml::path> { ... };
+}
+```
+
+Paths can be used as keys in `std::unordered_map` and `std::unordered_set`.
+
+---
+
+## String Conversion
+
+```cpp
+std::string str() const;
+explicit operator std::string() const;
+
+friend std::ostream& operator<<(std::ostream&, const path&);
+```
+
+```cpp
+toml::path p("servers[0].host");
+std::cout << p << "\n"; // servers[0].host
+std::string s = p.str(); // "servers[0].host"
+```
+
+---
+
+## `at_path()` — Path-Based Node Access
+
+Declared in `include/toml++/impl/at_path.hpp`. These free functions apply a path to a node tree:
+
+```cpp
+node_view<node> at_path(node& root, const toml::path& path) noexcept;
+node_view<const node> at_path(const node& root, const toml::path& path) noexcept;
+
+node_view<node> at_path(node& root, std::string_view path) noexcept;
+node_view<const node> at_path(const node& root, std::string_view path) noexcept;
+
+// Windows compat
+node_view<node> at_path(node& root, std::wstring_view path) noexcept;
+```
+
+Returns a `node_view` — null-safe wrapper that returns empty/default if the path doesn't resolve.
+
+```cpp
+auto tbl = toml::parse(R"(
+ [server]
+ host = "localhost"
+ ports = [8080, 8081, 8082]
+
+ [[servers]]
+ name = "alpha"
+
+ [[servers]]
+ name = "beta"
+)");
+
+// Access nested value
+auto host = toml::at_path(tbl, "server.host").value_or(""sv);
+// "localhost"
+
+// Access array element
+auto port = toml::at_path(tbl, "server.ports[1]").value_or(int64_t{0});
+// 8081
+
+// Access array-of-tables element
+auto name = toml::at_path(tbl, "servers[0].name").value_or(""sv);
+// "alpha"
+
+// Non-existent path returns empty node_view
+auto missing = toml::at_path(tbl, "nonexistent.path");
+std::cout << missing.value_or("default"sv) << "\n"; // "default"
+```
+
+### With `toml::path` Objects
+
+```cpp
+toml::path p("server.ports[0]");
+auto port = toml::at_path(tbl, p).value_or(int64_t{0});
+
+// Reuse path for multiple lookups
+for (size_t i = 0; i < 3; i++)
+{
+ toml::path elem_path = toml::path("server.ports") + toml::path("[" + std::to_string(i) + "]");
+ auto val = toml::at_path(tbl, elem_path).value_or(int64_t{0});
+ std::cout << val << "\n";
+}
+```
+
+---
+
+## `operator[]` with Path
+
+`table` and `node_view` also support path-like access via `operator[]`:
+
+```cpp
+auto tbl = toml::parse(R"(
+ [server]
+ host = "localhost"
+)");
+
+// Chained subscript (each [] does a single lookup)
+auto host = tbl["server"]["host"].value_or(""sv);
+
+// With toml::path (single lookup resolving the full path)
+toml::path p("server.host");
+auto host2 = toml::at_path(tbl, p).value_or(""sv);
+```
+
+Note: `operator[]` on `table` does single-key lookups only. `at_path()` resolves multi-component paths.
+
+---
+
+## Complete Example
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+
+int main()
+{
+ auto config = toml::parse(R"(
+ [database]
+ host = "db.example.com"
+ port = 5432
+
+ [database.pools]
+ read = 10
+ write = 5
+
+ [[database.replicas]]
+ host = "replica1.example.com"
+ port = 5433
+
+ [[database.replicas]]
+ host = "replica2.example.com"
+ port = 5434
+ )");
+
+ // Construct paths
+ toml::path db_host("database.host");
+ toml::path db_port("database.port");
+ toml::path pool_read("database.pools.read");
+
+ // Use at_path for access
+ std::cout << "Host: " << toml::at_path(config, db_host).value_or(""sv) << "\n";
+ std::cout << "Port: " << toml::at_path(config, db_port).value_or(int64_t{0}) << "\n";
+ std::cout << "Read pool: " << toml::at_path(config, pool_read).value_or(int64_t{0}) << "\n";
+
+ // Array-of-tables access
+ for (size_t i = 0; i < 2; i++)
+ {
+ auto host_path = toml::path("database.replicas[" + std::to_string(i) + "].host");
+ auto port_path = toml::path("database.replicas[" + std::to_string(i) + "].port");
+
+ auto host = toml::at_path(config, host_path).value_or(""sv);
+ auto port = toml::at_path(config, port_path).value_or(int64_t{0});
+
+ std::cout << "Replica " << i << ": " << host << ":" << port << "\n";
+ }
+
+ // Path manipulation
+ toml::path base("database");
+ auto full = base + "host";
+ std::cout << "Full path: " << full << "\n"; // database.host
+ std::cout << "Parent: " << full.parent_path() << "\n"; // database
+ std::cout << "Leaf: " << full.leaf() << "\n"; // host
+
+ return 0;
+}
+```
+
+---
+
+## Related Documentation
+
+- [basic-usage.md](basic-usage.md) — Simple access patterns
+- [node-system.md](node-system.md) — node_view returned by at_path
+- [tables.md](tables.md) — Table subscript access
diff --git a/docs/handbook/tomlplusplus/tables.md b/docs/handbook/tomlplusplus/tables.md
new file mode 100644
index 0000000000..d573ec0da2
--- /dev/null
+++ b/docs/handbook/tomlplusplus/tables.md
@@ -0,0 +1,551 @@
+# toml++ — Tables
+
+## Overview
+
+`toml::table` is the primary container in toml++. It extends `toml::node` and models an ordered map from `toml::key` objects to child `toml::node` pointers. Every parsed TOML document has a `table` as its root.
+
+Declared in `include/toml++/impl/table.hpp` with implementation in `table.inl`.
+
+---
+
+## Internal Storage
+
+```cpp
+class table : public node
+{
+ private:
+ using map_type = std::map<toml::key, impl::node_ptr, std::less<>>;
+ map_type map_;
+ bool inline_ = false;
+};
+```
+
+- **`map_type`** = `std::map<toml::key, std::unique_ptr<node>, std::less<>>`
+- **`std::less<>`** enables heterogeneous lookup — you can search by `std::string_view` without constructing a `toml::key`
+- **`inline_`** controls whether the table serializes as `{ a = 1, b = 2 }` (inline) or with `[section]` headers (non-inline)
+- Ownership: the table owns all child nodes via `unique_ptr`
+
+---
+
+## Construction
+
+### Default Construction
+
+```cpp
+toml::table tbl; // empty table
+```
+
+### Initializer List Construction
+
+```cpp
+auto tbl = toml::table{
+ { "name", "toml++" },
+ { "version", 3 },
+ { "features", toml::array{ "parsing", "serialization" } },
+ { "metadata", toml::table{
+ { "author", "Mark Gillard" },
+ { "license", "MIT" }
+ }}
+};
+```
+
+This uses `impl::table_init_pair`:
+```cpp
+struct table_init_pair
+{
+ mutable toml::key key;
+ mutable node_ptr value;
+
+ template <typename K, typename V>
+ table_init_pair(K&& k, V&& v, value_flags flags = preserve_source_value_flags);
+};
+```
+
+Values are converted to nodes via `impl::make_node()`, which handles:
+- Native types (`int`, `double`, `const char*`, etc.) → `value<T>`
+- `toml::array` → `array` (moved)
+- `toml::table` → `table` (moved)
+
+### Copy and Move
+
+```cpp
+toml::table copy(original_table); // deep copy — all child nodes are cloned
+toml::table moved(std::move(tbl)); // move — no allocation, transfers ownership
+```
+
+Copy is deep: every child node in the tree is recursively copied.
+
+---
+
+## Iterators
+
+### Types
+
+```cpp
+using table_iterator = impl::table_iterator<false>;
+using const_table_iterator = impl::table_iterator<true>;
+```
+
+`table_iterator` is a **BidirectionalIterator**. Dereferencing yields `table_proxy_pair`:
+
+```cpp
+template <bool IsConst>
+struct table_proxy_pair
+{
+ using value_type = std::conditional_t<IsConst, const node, node>;
+ const toml::key& first;
+ value_type& second;
+};
+```
+
+The `unique_ptr` layer is hidden — you get `(const key&, node&)` pairs.
+
+### Iterator Methods
+
+```cpp
+iterator begin() noexcept;
+iterator end() noexcept;
+const_iterator begin() const noexcept;
+const_iterator end() const noexcept;
+const_iterator cbegin() const noexcept;
+const_iterator cend() const noexcept;
+```
+
+### Range-Based For
+
+```cpp
+for (auto&& [key, value] : tbl)
+{
+ std::cout << key << " = " << value << "\n";
+}
+```
+
+Structured bindings work because `table_proxy_pair` has public `first` and `second` members.
+
+### Iterator to Key String
+
+```cpp
+for (auto it = tbl.begin(); it != tbl.end(); ++it)
+{
+ const toml::key& k = it->first;
+ toml::node& v = it->second;
+ std::cout << k.str() << ": " << v.type() << "\n";
+}
+```
+
+---
+
+## Capacity
+
+```cpp
+size_t size() const noexcept; // number of key-value pairs
+bool empty() const noexcept; // true if size() == 0
+```
+
+---
+
+## Element Access
+
+### `operator[]` — Returns `node_view`
+
+```cpp
+node_view<node> operator[](std::string_view key) noexcept;
+node_view<const node> operator[](std::string_view key) const noexcept;
+```
+
+Returns a `node_view` that wraps the node at that key, or an empty view if the key doesn't exist. This is the safe, chainable accessor:
+
+```cpp
+auto val = tbl["section"]["subsection"]["key"].value_or(42);
+```
+
+### `at()` — Bounds-Checked Access
+
+```cpp
+node& at(std::string_view key);
+const node& at(std::string_view key) const;
+```
+
+Returns a reference to the node at the key. Throws `std::out_of_range` if the key doesn't exist.
+
+### `get()` — Raw Pointer Access
+
+```cpp
+node* get(std::string_view key) noexcept;
+const node* get(std::string_view key) const noexcept;
+```
+
+Returns a pointer to the node, or `nullptr` if not found:
+
+```cpp
+if (auto* n = tbl.get("name"))
+{
+ std::cout << "Found: " << *n << "\n";
+}
+```
+
+### `get_as<T>()` — Typed Pointer Access
+
+```cpp
+template <typename T>
+impl::wrap_node<T>* get_as(std::string_view key) noexcept;
+
+template <typename T>
+const impl::wrap_node<T>* get_as(std::string_view key) const noexcept;
+```
+
+Combines `get()` and `as<T>()`:
+
+```cpp
+if (auto* val = tbl.get_as<std::string>("name"))
+ std::cout << "Name: " << val->get() << "\n";
+
+if (auto* sub = tbl.get_as<toml::table>("database"))
+ std::cout << "Database has " << sub->size() << " keys\n";
+```
+
+### `contains()` — Key Existence Check
+
+```cpp
+bool contains(std::string_view key) const noexcept;
+```
+
+```cpp
+if (tbl.contains("database"))
+ std::cout << "Has database config\n";
+```
+
+---
+
+## Insertion
+
+### `insert()` — Insert If Not Present
+
+```cpp
+template <typename KeyType, typename ValueType>
+std::pair<iterator, bool> insert(KeyType&& key, ValueType&& val,
+ value_flags flags = preserve_source_value_flags);
+```
+
+Inserts a new key-value pair only if the key doesn't already exist. Returns `(iterator, true)` on success, `(iterator_to_existing, false)` if the key was already present:
+
+```cpp
+auto [it, inserted] = tbl.insert("name", "toml++");
+if (inserted)
+ std::cout << "Inserted: " << it->second << "\n";
+else
+ std::cout << "Key already exists\n";
+```
+
+### `insert_or_assign()` — Insert or Replace
+
+```cpp
+template <typename KeyType, typename ValueType>
+std::pair<iterator, bool> insert_or_assign(KeyType&& key, ValueType&& val,
+ value_flags flags = preserve_source_value_flags);
+```
+
+Always succeeds — inserts if new, replaces if existing:
+
+```cpp
+tbl.insert_or_assign("version", 4); // replaces any existing "version"
+```
+
+### `emplace<T>()` — Construct In Place
+
+```cpp
+template <typename ValueType, typename KeyType, typename... Args>
+std::pair<iterator, bool> emplace(KeyType&& key, Args&&... args);
+```
+
+Constructs a new node in place if the key doesn't exist:
+
+```cpp
+tbl.emplace<std::string>("greeting", "Hello, World!");
+tbl.emplace<toml::array>("empty_list");
+tbl.emplace<toml::table>("empty_section");
+```
+
+---
+
+## Removal
+
+### `erase()` — By Key
+
+```cpp
+size_t erase(std::string_view key) noexcept;
+```
+
+Returns 1 if the key was found and removed, 0 otherwise:
+
+```cpp
+tbl.erase("deprecated_key");
+```
+
+### `erase()` — By Iterator
+
+```cpp
+iterator erase(iterator pos) noexcept;
+iterator erase(const_iterator pos) noexcept;
+iterator erase(const_iterator first, const_iterator last) noexcept;
+```
+
+```cpp
+auto it = tbl.find("old_key");
+if (it != tbl.end())
+ tbl.erase(it);
+```
+
+### `clear()`
+
+```cpp
+void clear() noexcept;
+```
+
+Removes all key-value pairs.
+
+---
+
+## Search
+
+### `find()`
+
+```cpp
+iterator find(std::string_view key) noexcept;
+const_iterator find(std::string_view key) const noexcept;
+```
+
+Returns an iterator to the key-value pair, or `end()` if not found.
+
+### `lower_bound()` / `upper_bound()` / `equal_range()`
+
+These operate on the underlying `std::map` with heterogeneous lookup:
+
+```cpp
+iterator lower_bound(std::string_view key) noexcept;
+iterator upper_bound(std::string_view key) noexcept;
+std::pair<iterator, iterator> equal_range(std::string_view key) noexcept;
+// + const overloads
+```
+
+---
+
+## Metadata
+
+### `is_inline()`
+
+```cpp
+bool is_inline() const noexcept;
+void is_inline(bool val) noexcept;
+```
+
+Controls inline serialization. When `true`, the table formats as `{ a = 1, b = 2 }` instead of using `[section]` headers:
+
+```cpp
+auto tbl = toml::table{
+ { "a", 1 },
+ { "b", 2 },
+ { "nested", toml::table{ { "c", 3 } } }
+};
+
+std::cout << tbl << "\n";
+// Output:
+// a = 1
+// b = 2
+//
+// [nested]
+// c = 3
+
+tbl.is_inline(true);
+std::cout << tbl << "\n";
+// Output:
+// { a = 1, b = 2, nested = { c = 3 } }
+```
+
+Runtime-constructed tables default to non-inline. The parser sets `is_inline(true)` for tables parsed from inline syntax.
+
+---
+
+## `for_each()` — Type-Safe Iteration
+
+```cpp
+template <typename Func>
+table& for_each(Func&& visitor) &;
+```
+
+Visits each key-value pair, passing the value as its concrete type:
+
+```cpp
+tbl.for_each([](const toml::key& key, auto& value)
+{
+ std::cout << key << ": ";
+
+ using value_type = std::remove_cvref_t<decltype(value)>;
+ if constexpr (std::is_same_v<value_type, toml::table>)
+ std::cout << "table (" << value.size() << " entries)\n";
+ else if constexpr (std::is_same_v<value_type, toml::array>)
+ std::cout << "array (" << value.size() << " elements)\n";
+ else
+ std::cout << value.get() << "\n";
+});
+```
+
+The visitor is instantiated for all 9 possible value types (table, array, + 7 value types).
+
+---
+
+## Path-Based Access
+
+### `at_path()` Member
+
+```cpp
+node_view<node> at_path(std::string_view path) noexcept;
+node_view<const node> at_path(std::string_view path) const noexcept;
+node_view<node> at_path(const toml::path& path) noexcept;
+```
+
+Resolves dot-separated paths with array indices:
+
+```cpp
+auto tbl = toml::parse(R"(
+ [database]
+ servers = [
+ { host = "alpha", port = 5432 },
+ { host = "beta", port = 5433 }
+ ]
+)");
+
+std::cout << tbl.at_path("database.servers[0].host") << "\n"; // "alpha"
+std::cout << tbl.at_path("database.servers[1].port") << "\n"; // 5433
+```
+
+### `operator[]` with `toml::path`
+
+```cpp
+node_view<node> operator[](const toml::path& path) noexcept;
+```
+
+```cpp
+toml::path p("database.servers[0].host");
+std::cout << tbl[p] << "\n"; // "alpha"
+```
+
+---
+
+## Comparison
+
+### Equality
+
+```cpp
+friend bool operator==(const table& lhs, const table& rhs) noexcept;
+friend bool operator!=(const table& lhs, const table& rhs) noexcept;
+```
+
+Deep structural equality: two tables are equal if they have the same keys with equal values. Source regions and inline-ness are not compared.
+
+---
+
+## Printing
+
+Tables are streamable via the default `toml_formatter`:
+
+```cpp
+std::cout << tbl << "\n";
+```
+
+Equivalent to:
+```cpp
+std::cout << toml::toml_formatter{ tbl } << "\n";
+```
+
+---
+
+## Type Identity
+
+`table` overrides all type-check virtuals from `node`:
+
+```cpp
+node_type type() const noexcept final; // returns node_type::table
+bool is_table() const noexcept final; // returns true
+bool is_array() const noexcept final; // returns false
+bool is_value() const noexcept final; // returns false
+bool is_string() const noexcept final; // returns false
+// ... all other is_*() return false
+
+table* as_table() noexcept final; // returns this
+const table* as_table() const noexcept final; // returns this
+// ... all other as_*() return nullptr
+```
+
+---
+
+## Windows Compatibility
+
+When `TOML_ENABLE_WINDOWS_COMPAT` is enabled, additional overloads accept `std::wstring_view` for key parameters:
+
+```cpp
+node* get(std::wstring_view key);
+bool contains(std::wstring_view key) const;
+node_view<node> operator[](std::wstring_view key) noexcept;
+// etc.
+```
+
+Wide strings are internally narrowed via `impl::narrow()`.
+
+---
+
+## Complete Example
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+
+int main()
+{
+ // Build a table programmatically
+ auto config = toml::table{
+ { "app", toml::table{
+ { "name", "MyApp" },
+ { "version", 2 }
+ }},
+ { "features", toml::array{ "auth", "logging" } }
+ };
+
+ // Navigate
+ std::cout << "App: " << config["app"]["name"].value_or("?"sv) << "\n";
+
+ // Insert
+ config["app"].as_table()->insert("debug", false);
+
+ // Modify
+ config.insert_or_assign("features",
+ toml::array{ "auth", "logging", "metrics" });
+
+ // Check
+ if (config.contains("app"))
+ {
+ auto* app = config.get_as<toml::table>("app");
+ std::cout << "App table has " << app->size() << " keys\n";
+ }
+
+ // Iterate
+ for (auto&& [key, value] : config)
+ {
+ std::cout << key << " (" << value.type() << ")\n";
+ }
+
+ // Serialize
+ std::cout << "\n" << config << "\n";
+
+ return 0;
+}
+```
+
+---
+
+## Related Documentation
+
+- [node-system.md](node-system.md) — Base node interface
+- [arrays.md](arrays.md) — Array container details
+- [values.md](values.md) — Value node details
+- [path-system.md](path-system.md) — Path-based navigation
diff --git a/docs/handbook/tomlplusplus/testing.md b/docs/handbook/tomlplusplus/testing.md
new file mode 100644
index 0000000000..d8469413a2
--- /dev/null
+++ b/docs/handbook/tomlplusplus/testing.md
@@ -0,0 +1,226 @@
+# toml++ — Testing
+
+## Overview
+
+toml++ uses the **Catch2** testing framework. Tests are organized in `tests/` and built via Meson. The test suite includes unit tests for every major feature, TOML specification conformance tests, and third-party test suites.
+
+---
+
+## Test Framework
+
+### Catch2
+
+- Used as the test runner and assertion library
+- Can be vendored (`extern/Catch2/`) or found as a system dependency
+- Tests use `TEST_CASE`, `SECTION`, `REQUIRE`, `CHECK` macros
+
+### Build Configuration
+
+Tests are built with Meson. The test build options from `meson_options.txt`:
+
+```
+option('build_tests', type: 'boolean', value: false)
+option('use_vendored_libs', type: 'boolean', value: true)
+```
+
+Build and run tests:
+
+```bash
+meson setup build -Dbuild_tests=true
+meson compile -C build
+meson test -C build
+```
+
+---
+
+## Test File Organization
+
+From `tests/meson.build`, the test suite consists of:
+
+### Conformance Tests
+
+Third-party test suites that validate the parser against the TOML specification:
+
+| Test Suite | Files | Description |
+|-----------|-------|-------------|
+| BurntSushi (valid) | `conformance_burntsushi_valid.cpp` | Validates that valid TOML parses correctly |
+| BurntSushi (invalid) | `conformance_burntsushi_invalid.cpp` | Validates that invalid TOML is rejected |
+| iarna (valid) | `conformance_iarna_valid.cpp` | Additional valid TOML test cases |
+| iarna (invalid) | `conformance_iarna_invalid.cpp` | Additional invalid TOML test cases |
+
+Test data files are in `tests/` subdirectories corresponding to each third-party suite.
+
+### Parsing Tests
+
+Unit tests for the parser:
+
+| File | Content |
+|------|---------|
+| `parsing_arrays.cpp` | Array parsing edge cases |
+| `parsing_booleans.cpp` | Boolean value parsing |
+| `parsing_comments.cpp` | Comment handling |
+| `parsing_dates_and_times.cpp` | Date/time value parsing |
+| `parsing_floats.cpp` | Float parsing (inf, nan, precision) |
+| `parsing_integers.cpp` | Integer parsing (hex, oct, bin, overflow) |
+| `parsing_key_value_pairs.cpp` | Key-value pair syntax |
+| `parsing_spec_example.cpp` | TOML spec example document |
+| `parsing_strings.cpp` | All 4 string types, escape sequences |
+| `parsing_tables.cpp` | Standard and inline tables |
+
+### Manipulation Tests
+
+Tests for programmatic construction and modification:
+
+| File | Content |
+|------|---------|
+| `manipulating_arrays.cpp` | Array push_back, erase, flatten, etc. |
+| `manipulating_tables.cpp` | Table insert, emplace, merge, etc. |
+| `manipulating_values.cpp` | Value construction, assignment, flags |
+| `manipulating_parse_result.cpp` | parse_result access patterns |
+
+### Formatter Tests
+
+| File | Content |
+|------|---------|
+| `formatters.cpp` | TOML, JSON, and YAML formatter output |
+
+### Path Tests
+
+| File | Content |
+|------|---------|
+| `path.cpp` | Path parsing, navigation, at_path() |
+
+### Other Tests
+
+| File | Content |
+|------|---------|
+| `at_path.cpp` | at_path() function specifically |
+| `for_each.cpp` | for_each() visitor pattern |
+| `user_feedback.cpp` | Tests from user-reported issues |
+| `windows_compat.cpp` | Windows wstring compatibility |
+| `using_iterators.cpp` | Iterator usage patterns |
+| `main.cpp` | Catch2 main entry point |
+| `tests.hpp` | Shared test utilities and macros |
+
+---
+
+## Running Tests
+
+### Full Test Suite
+
+```bash
+cd tomlplusplus
+meson setup build -Dbuild_tests=true
+meson compile -C build
+meson test -C build
+```
+
+### Verbose Output
+
+```bash
+meson test -C build -v
+```
+
+### Running Specific Tests
+
+Catch2 allows filtering by test name:
+
+```bash
+./build/tests/toml_test "parsing integers"
+./build/tests/toml_test "[arrays]"
+```
+
+### Exception / No-Exception Modes
+
+Tests are compiled in both modes when possible:
+
+```bash
+# With exceptions (default)
+meson setup build_exc -Dbuild_tests=true
+
+# Without exceptions
+meson setup build_noexc -Dbuild_tests=true -Dcpp_eh=none
+```
+
+---
+
+## Test Patterns
+
+### Parsing Roundtrip
+
+A common pattern: parse TOML, verify values, re-serialize, verify output:
+
+```cpp
+TEST_CASE("integers - hex")
+{
+ auto tbl = toml::parse("val = 0xFF");
+ CHECK(tbl["val"].value<int64_t>() == 255);
+ CHECK(tbl["val"].as_integer()->flags() == toml::value_flags::format_as_hexadecimal);
+}
+```
+
+### Invalid Input Rejection
+
+```cpp
+TEST_CASE("invalid - unterminated string")
+{
+ CHECK_THROWS_AS(toml::parse("val = \"unterminated"), toml::parse_error);
+}
+```
+
+Or without exceptions:
+
+```cpp
+TEST_CASE("invalid - unterminated string")
+{
+ auto result = toml::parse("val = \"unterminated");
+ CHECK(!result);
+ CHECK(result.error().description().find("unterminated") != std::string_view::npos);
+}
+```
+
+### Manipulation Verification
+
+```cpp
+TEST_CASE("array - push_back")
+{
+ toml::array arr;
+ arr.push_back(1);
+ arr.push_back("two");
+ arr.push_back(3.0);
+
+ REQUIRE(arr.size() == 3);
+ CHECK(arr[0].value<int64_t>() == 1);
+ CHECK(arr[1].value<std::string_view>() == "two");
+ CHECK(arr[2].value<double>() == 3.0);
+}
+```
+
+---
+
+## Adding New Tests
+
+1. Create a `.cpp` file in `tests/`
+2. Include `"tests.hpp"` for common utilities
+3. Add the file to the test source list in `tests/meson.build`
+4. Write `TEST_CASE` blocks using Catch2 macros
+5. Rebuild and run
+
+```cpp
+// tests/my_feature.cpp
+#include "tests.hpp"
+
+TEST_CASE("my feature - basic behavior")
+{
+ auto tbl = toml::parse(R"(key = "value")");
+ REQUIRE(tbl["key"].value<std::string_view>() == "value");
+}
+```
+
+---
+
+## Related Documentation
+
+- [building.md](building.md) — Build system setup
+- [code-style.md](code-style.md) — Code conventions
+- [parsing.md](parsing.md) — Parser being tested
diff --git a/docs/handbook/tomlplusplus/unicode-handling.md b/docs/handbook/tomlplusplus/unicode-handling.md
new file mode 100644
index 0000000000..6cafb3deff
--- /dev/null
+++ b/docs/handbook/tomlplusplus/unicode-handling.md
@@ -0,0 +1,335 @@
+# toml++ — Unicode Handling
+
+## Overview
+
+toml++ fully handles UTF-8 encoded input and output as required by the TOML specification. All TOML documents must be valid UTF-8, and the library validates, decodes, and encodes Unicode throughout parsing and formatting.
+
+Core Unicode utilities are in `include/toml++/impl/unicode.hpp` with auto-generated lookup tables in `unicode_autogenerated.hpp`.
+
+---
+
+## UTF-8 Input Requirements
+
+The parser expects all input to be valid UTF-8:
+
+- **BOM handling**: A leading UTF-8 BOM (`0xEF 0xBB 0xBF`) is silently stripped before parsing begins
+- **Validation**: Invalid byte sequences (overlong encodings, surrogate code points, truncated sequences) produce parse errors
+- **Multi-byte characters**: Fully supported in string values, comments, and bare keys (where permitted by TOML)
+
+```cpp
+// UTF-8 content works naturally
+auto tbl = toml::parse(R"(
+ greeting = "Hello, 世界!"
+ emoji = "🎉"
+ name = "Ñoño"
+)");
+```
+
+---
+
+## Character Classification
+
+The library classifies Unicode code points for parsing with functions in `unicode.hpp`:
+
+### `is_string_delimiter()`
+
+Identifies characters that can start/end strings: `"` (U+0022) and `'` (U+0027).
+
+### `is_ascii_letter()`
+
+`[A-Za-z]` — used in bare key validation and other ASCII-specific checks.
+
+### `is_ascii_whitespace()`
+
+Space (U+0020) and tab (U+0009).
+
+### `is_ascii_line_break()`
+
+LF (U+000A) and CR (U+000D).
+
+### `is_bare_key_character()`
+
+Characters permitted in TOML bare keys: `[A-Za-z0-9_-]` plus Unicode letters/digits when `TOML_LANG_UNRELEASED_FEATURES` is enabled.
+
+### `is_control_character()`
+
+Control characters (U+0000–U+001F, U+007F) excluding tab. These are forbidden in basic strings and must be escaped.
+
+### `is_non_ascii_letter()`
+
+Unicode letter code points outside ASCII — from auto-generated tables in `unicode_autogenerated.hpp`. Used for extended bare key support in unreleased TOML features.
+
+### `is_non_ascii_number()`
+
+Unicode digit code points outside ASCII (e.g., Arabic-Indic digits).
+
+### `is_non_ascii_whitespace()`
+
+Unicode whitespace beyond ASCII space/tab.
+
+---
+
+## Escape Sequences in Strings
+
+TOML basic strings (`"..."` and `"""..."""`) support escape sequences. The parser decodes these into their UTF-8 representations:
+
+| Escape | Meaning | Code Point |
+|--------|---------|------------|
+| `\b` | Backspace | U+0008 |
+| `\t` | Tab | U+0009 |
+| `\n` | Line Feed | U+000A |
+| `\f` | Form Feed | U+000C |
+| `\r` | Carriage Return | U+000D |
+| `\"` | Quote | U+0022 |
+| `\\` | Backslash | U+005C |
+| `\uXXXX` | Unicode (4 hex digits) | U+0000–U+FFFF |
+| `\UXXXXXXXX` | Unicode (8 hex digits) | U+00000000–U+0010FFFF |
+
+### `control_char_escapes` Table
+
+The formatter uses a lookup table for serializing control characters back to escape sequences:
+
+```cpp
+// In impl namespace:
+inline constexpr const char* control_char_escapes[] = {
+ "\\u0000", "\\u0001", "\\u0002", "\\u0003",
+ "\\u0004", "\\u0005", "\\u0006", "\\u0007",
+ "\\b", "\\t", "\\n", "\\u000B",
+ "\\f", "\\r", "\\u000E", "\\u000F",
+ "\\u0010", "\\u0011", "\\u0012", "\\u0013",
+ "\\u0014", "\\u0015", "\\u0016", "\\u0017",
+ "\\u0018", "\\u0019", "\\u001A", "\\u001B",
+ "\\u001C", "\\u001D", "\\u001E", "\\u001F",
+};
+```
+
+---
+
+## Unicode Escape Decoding
+
+The parser processes `\uXXXX` and `\UXXXXXXXX` escapes:
+
+1. Reads 4 or 8 hexadecimal digits
+2. Validates the code point:
+ - Must not be a surrogate (U+D800–U+DFFF)
+ - Must not exceed U+10FFFF
+ - Must not be a non-character (U+FDD0–U+FDEF, U+xFFFE–U+xFFFF)
+3. Encodes to UTF-8 bytes (1–4 bytes depending on code point range)
+
+```toml
+# Valid Unicode escapes
+escape_a = "\u0041" # "A"
+escape_heart = "\u2764" # "❤"
+escape_emoji = "\U0001F600" # "😀"
+```
+
+```cpp
+auto tbl = toml::parse(R"(
+ a = "\u0041"
+ heart = "\u2764"
+)");
+
+std::cout << tbl["a"].value_or(""sv) << "\n"; // A
+std::cout << tbl["heart"].value_or(""sv) << "\n"; // ❤
+```
+
+---
+
+## UTF-8 Encoding in Output
+
+When formatting, the behavior depends on `format_flags::allow_unicode_strings`:
+
+### With `allow_unicode_strings` (default for TOML and YAML formatters)
+
+Non-ASCII characters pass through unescaped:
+
+```cpp
+auto tbl = toml::table{ { "name", "日本語" } };
+std::cout << tbl << "\n";
+// name = "日本語"
+```
+
+### Without `allow_unicode_strings`
+
+Non-ASCII characters are escaped to `\uXXXX` / `\UXXXXXXXX`:
+
+```cpp
+auto tbl = toml::table{ { "name", "日本語" } };
+auto fmt = toml::toml_formatter{
+ tbl,
+ toml::format_flags::indentation // no allow_unicode_strings
+};
+std::cout << fmt << "\n";
+// name = "\u65E5\u672C\u8A9E"
+```
+
+---
+
+## char8_t Support (C++20)
+
+When compiling with C++20, `char8_t` overloads are available for parsing:
+
+```cpp
+auto tbl = toml::parse(u8R"(
+ greeting = "Hello, 世界"
+)"sv);
+```
+
+The `char8_t` strings are internally treated as UTF-8 byte sequences. `std::u8string_view` is accepted by `parse()`.
+
+### `source_path` as u8string
+
+```cpp
+auto tbl = toml::parse(doc, u8"config.toml"sv);
+```
+
+---
+
+## Windows Compatibility (`TOML_ENABLE_WINDOWS_COMPAT`)
+
+When enabled (default on Windows), additional conversion overloads exist:
+
+- `parse_file(std::wstring_view)` — converts wide file path to UTF-8
+- `value<std::wstring>()` — converts stored UTF-8 string to wide string
+- String comparison with `wchar_t*` / `std::wstring_view`
+
+The conversions use Windows API (`MultiByteToWideChar` / `WideCharToMultiByte`) internally.
+
+---
+
+## Bare Key Unicode Rules
+
+Per TOML v1.0.0, bare keys are limited to ASCII letters, digits, hyphen, and underscore:
+
+```toml
+valid-key = "value"
+valid_key_2 = "value"
+# 日本語 = "value" # NOT valid as bare key in TOML v1.0
+```
+
+Non-ASCII keys must be quoted:
+
+```toml
+"日本語" = "value" # valid as quoted key
+```
+
+### Unreleased Features
+
+With `TOML_UNRELEASED_FEATURES=1`, the parser accepts Unicode letters and digits in bare keys as proposed for future TOML versions:
+
+```toml
+# Only with TOML_UNRELEASED_FEATURES=1:
+日本語 = "value" # bare key with Unicode letters
+```
+
+The `is_non_ascii_letter()` and `is_non_ascii_number()` functions from `unicode_autogenerated.hpp` provide the code point tables for this classification.
+
+---
+
+## Auto-Generated Unicode Tables
+
+`include/toml++/impl/unicode_autogenerated.hpp` contains machine-generated lookup tables derived from the Unicode Character Database. These tables classify code points by category:
+
+- **Letter** categories: Lu, Ll, Lt, Lm, Lo
+- **Number** categories: Nd, Nl
+- **Combining marks**: Mn, Mc
+- **Connector punctuation**: Pc
+
+The tables use range-based compression for efficiency:
+
+```cpp
+// Simplified representation:
+struct code_point_range
+{
+ char32_t first;
+ char32_t last;
+};
+
+// Function uses binary search over sorted ranges
+bool is_non_ascii_letter(char32_t cp) noexcept;
+```
+
+---
+
+## String Handling in Formatters
+
+Each formatter handles strings slightly differently:
+
+### TOML Formatter
+
+- Defaults to basic strings with escaping: `"hello\nworld"`
+- Uses literal strings when `allow_literal_strings` is set and string has no single quotes: `'no escapes needed'`
+- Uses multi-line strings when `allow_multi_line_strings` is set and string contains newlines
+- Preserves Unicode with `allow_unicode_strings` (default on)
+
+### JSON Formatter
+
+- Always uses double-quoted strings
+- Escapes all required JSON characters
+- Does not use literal or multi-line strings
+- Unicode behavior follows `allow_unicode_strings` flag
+
+### YAML Formatter
+
+- Uses double-quoted strings
+- `allow_unicode_strings` is on by default
+- Escapes control characters
+
+---
+
+## Complete Example
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+
+int main()
+{
+ // Parse document with Unicode content
+ auto config = toml::parse(R"(
+ title = "日本語テスト"
+ greeting = "Hello, 世界! 🌍"
+ escaped = "\u0048\u0065\u006C\u006C\u006F"
+ path = "C:\\Users\\名前\\config"
+
+ [metadata]
+ "quoted.key" = "value"
+ author = "José García"
+ )");
+
+ // Read values — Unicode is preserved
+ auto title = config["title"].value_or(""sv);
+ std::cout << "Title: " << title << "\n";
+ // Title: 日本語テスト
+
+ auto greeting = config["greeting"].value_or(""sv);
+ std::cout << "Greeting: " << greeting << "\n";
+ // Greeting: Hello, 世界! 🌍
+
+ // Escaped values are decoded
+ auto escaped = config["escaped"].value_or(""sv);
+ std::cout << "Escaped: " << escaped << "\n";
+ // Escaped: Hello
+
+ // Serialize back — Unicode preserved by default
+ std::cout << "\n=== TOML (Unicode) ===\n";
+ std::cout << config << "\n";
+
+ // Serialize with Unicode escaping
+ std::cout << "\n=== TOML (Escaped) ===\n";
+ std::cout << toml::toml_formatter{
+ config,
+ toml::format_flags::indentation // no allow_unicode_strings
+ } << "\n";
+
+ return 0;
+}
+```
+
+---
+
+## Related Documentation
+
+- [parsing.md](parsing.md) — Parser UTF-8 input handling
+- [formatting.md](formatting.md) — Unicode output control via format_flags
+- [values.md](values.md) — String value type
diff --git a/docs/handbook/tomlplusplus/values.md b/docs/handbook/tomlplusplus/values.md
new file mode 100644
index 0000000000..453140df89
--- /dev/null
+++ b/docs/handbook/tomlplusplus/values.md
@@ -0,0 +1,547 @@
+# toml++ — Values
+
+## Overview
+
+`toml::value<T>` represents leaf TOML values — the concrete data in a TOML document. Each `value` wraps one of seven native C++ types corresponding to the TOML data types.
+
+Declared in `include/toml++/impl/value.hpp` with supporting types in `forward_declarations.hpp` and `date_time.hpp`.
+
+---
+
+## Native Types
+
+The seven supported value types map to TOML types via `toml::value_type_of<T>`:
+
+| TOML Type | C++ Storage Type | `node_type` Enum | Alias |
+|-------------------|---------------------|---------------------------|--------------------|
+| String | `std::string` | `node_type::string` | `value<std::string>` |
+| Integer | `int64_t` | `node_type::integer` | `value<int64_t>` |
+| Float | `double` | `node_type::floating_point` | `value<double>` |
+| Boolean | `bool` | `node_type::boolean` | `value<bool>` |
+| Date | `toml::date` | `node_type::date` | `value<date>` |
+| Time | `toml::time` | `node_type::time` | `value<time>` |
+| Date-Time | `toml::date_time` | `node_type::date_time` | `value<date_time>` |
+
+Only these seven instantiations of `value<T>` exist. The template is constrained:
+
+```cpp
+template <typename T>
+class value : public node
+{
+ static_assert(
+ impl::is_native<T>,
+ "Template parameter must be one of the TOML native value types"
+ );
+
+ private:
+ value_type val_;
+ value_flags flags_ = value_flags::none;
+};
+```
+
+Where `value_type` is the `impl::native_type_of<T>` alias.
+
+---
+
+## Type Traits
+
+```cpp
+// Check at compile time
+toml::is_string<value<std::string>> // true
+toml::is_integer<value<int64_t>> // true
+toml::is_floating_point<value<double>> // true
+toml::is_boolean<value<bool>> // true
+toml::is_date<value<date>> // true
+toml::is_time<value<time>> // true
+toml::is_date_time<value<date_time>> // true
+
+// Works on the raw types too
+toml::is_integer<int64_t> // true
+toml::is_number<int64_t> // true (integer or float)
+toml::is_number<double> // true
+
+// Supertype checks
+toml::is_value<value<int64_t>> // true (any value<T>)
+toml::is_chronological<value<date>> // true (date, time, or date_time)
+```
+
+---
+
+## Construction
+
+### From Compatible Types
+
+```cpp
+toml::value<int64_t> i{ 42 };
+toml::value<double> f{ 3.14 };
+toml::value<std::string> s{ "hello" };
+toml::value<bool> b{ true };
+
+// Implicit promotion from smaller integer types
+toml::value<int64_t> from_int{ 42 }; // int → int64_t
+toml::value<int64_t> from_short{ short(5) }; // short → int64_t
+
+// Implicit promotion from float → double
+toml::value<double> from_float{ 1.5f }; // float → double
+```
+
+The `native_value_maker` mechanism handles promotions:
+
+```cpp
+// In impl namespace:
+template <typename T>
+struct native_value_maker;
+
+// For integer types (int, unsigned, short, etc.):
+// Promotes to int64_t
+
+// For floating-point (float):
+// Promotes to double
+
+// For string types (const char*, string_view, etc.):
+// Converts to std::string
+
+// For char8_t strings (u8"..."):
+// Transcodes to std::string
+```
+
+### Copy and Move
+
+```cpp
+toml::value<std::string> original{ "hello" };
+toml::value<std::string> copy{ original }; // deep copy
+toml::value<std::string> moved{ std::move(original) }; // move
+```
+
+### Assignment
+
+```cpp
+auto v = toml::value<int64_t>{ 10 };
+v = 42; // assign from raw value (operator=(ValueType))
+v = copy; // assign from another value (operator=(const value&))
+v = std::move(other); // move assign
+```
+
+---
+
+## Retrieving Values
+
+### `get()` — Direct Access
+
+```cpp
+ValueType& get() & noexcept;
+ValueType&& get() && noexcept;
+const ValueType& get() const& noexcept;
+```
+
+Returns a direct reference to the stored value:
+
+```cpp
+auto v = toml::value<std::string>{ "hello" };
+std::string& s = v.get();
+s += " world";
+std::cout << v.get() << "\n"; // "hello world"
+```
+
+### `operator ValueType&()` — Implicit Conversion
+
+```cpp
+explicit operator ValueType&() noexcept;
+explicit operator const ValueType&() const noexcept;
+```
+
+```cpp
+auto v = toml::value<int64_t>{ 42 };
+int64_t x = static_cast<int64_t>(v);
+```
+
+### `operator*()` / `operator->()`
+
+```cpp
+ValueType& operator*() & noexcept;
+const ValueType& operator*() const& noexcept;
+ValueType* operator->() noexcept;
+const ValueType* operator->() const noexcept;
+```
+
+Dereference-style access:
+
+```cpp
+auto v = toml::value<std::string>{ "hello" };
+std::cout << v->length() << "\n"; // 5
+std::cout << (*v).size() << "\n"; // 5
+```
+
+---
+
+## Value Flags
+
+`value_flags` is a bitmask controlling how values are formatted when serialized:
+
+```cpp
+enum class value_flags : uint16_t
+{
+ none = 0,
+ format_as_binary = 1, // 0b10101
+ format_as_octal = 2, // 0o755
+ format_as_hexadecimal = 4, // 0xFF
+
+ // Special sentinel (default behavior):
+ // preserve_source_value_flags
+};
+```
+
+### Getting / Setting Flags
+
+```cpp
+value_flags flags() const noexcept;
+value<T>& flags(value_flags new_flags) noexcept;
+```
+
+```cpp
+auto v = toml::value<int64_t>{ 255 };
+v.flags(toml::value_flags::format_as_hexadecimal);
+
+std::cout << toml::toml_formatter{ toml::table{ { "val", v } } };
+// Output: val = 0xFF
+```
+
+### Source Format Preservation
+
+When parsing, the library records the source format in the flags. When printing, if `preserve_source_value_flags` is used (the default), the original format is retained:
+
+```toml
+port = 0xFF
+mask = 0o777
+bits = 0b1010
+```
+
+After parsing and re-serializing, these retain their hex/octal/binary format.
+
+---
+
+## Date and Time Types
+
+Defined in `include/toml++/impl/date_time.hpp`.
+
+### `toml::date`
+
+```cpp
+struct date
+{
+ uint16_t year;
+ uint8_t month; // 1-12
+ uint8_t day; // 1-31
+
+ // Comparison operators
+ friend bool operator==(const date&, const date&) noexcept;
+ friend bool operator!=(const date&, const date&) noexcept;
+ friend bool operator< (const date&, const date&) noexcept;
+ friend bool operator<=(const date&, const date&) noexcept;
+ friend bool operator> (const date&, const date&) noexcept;
+ friend bool operator>=(const date&, const date&) noexcept;
+
+ // Streaming
+ friend std::ostream& operator<<(std::ostream&, const date&);
+};
+```
+
+```cpp
+auto d = toml::date{ 2024, 1, 15 };
+auto v = toml::value<toml::date>{ d };
+std::cout << v << "\n"; // 2024-01-15
+```
+
+### `toml::time`
+
+```cpp
+struct time
+{
+ uint8_t hour; // 0-23
+ uint8_t minute; // 0-59
+ uint8_t second; // 0-59 (0-60 for leap second)
+ uint32_t nanosecond; // 0-999999999
+
+ // Comparison and streaming operators
+};
+```
+
+```cpp
+auto t = toml::time{ 14, 30, 0 };
+auto v = toml::value<toml::time>{ t };
+std::cout << v << "\n"; // 14:30:00
+```
+
+### `toml::time_offset`
+
+```cpp
+struct time_offset
+{
+ int16_t minutes; // UTC offset: -720 to +840
+
+ // Convenience for UTC:
+ static constexpr time_offset utc() noexcept { return { 0 }; }
+
+ // Comparison operators
+};
+```
+
+### `toml::date_time`
+
+```cpp
+struct date_time
+{
+ toml::date date;
+ toml::time time;
+ optional<time_offset> offset; // nullopt = local date-time
+
+ // Constructor overloads:
+ constexpr date_time(const toml::date& d, const toml::time& t) noexcept;
+ constexpr date_time(const toml::date& d, const toml::time& t,
+ const toml::time_offset& off) noexcept;
+
+ bool is_local() const noexcept; // true if no offset
+
+ // Comparison and streaming operators
+};
+```
+
+#### TOML Date-Time Variants
+
+```toml
+# Offset date-time (has time zone)
+odt = 2024-01-15T14:30:00+05:30
+odt_utc = 2024-01-15T09:00:00Z
+
+# Local date-time (no time zone)
+ldt = 2024-01-15T14:30:00
+
+# Local date
+ld = 2024-01-15
+
+# Local time
+lt = 14:30:00
+```
+
+```cpp
+auto tbl = toml::parse(R"(
+ odt = 2024-01-15T14:30:00+05:30
+ ldt = 2024-01-15T14:30:00
+ ld = 2024-01-15
+ lt = 14:30:00
+)");
+
+auto odt = tbl["odt"].value<toml::date_time>();
+// odt->offset has value, odt->is_local() == false
+
+auto ldt = tbl["ldt"].value<toml::date_time>();
+// ldt->offset is nullopt, ldt->is_local() == true
+
+auto ld = tbl["ld"].value<toml::date>();
+// ld->year == 2024, month == 1, day == 15
+
+auto lt = tbl["lt"].value<toml::time>();
+// lt->hour == 14, minute == 30, second == 0
+```
+
+---
+
+## Type Identity
+
+```cpp
+// For value<int64_t>:
+node_type type() const noexcept final; // node_type::integer
+bool is_integer() const noexcept final; // true
+bool is_number() const noexcept final; // true
+bool is_value() const noexcept final; // true
+// All other is_*() return false
+
+value<int64_t>* as_integer() noexcept final; // returns this
+// All other as_*() return nullptr
+```
+
+---
+
+## Retrieving Values from Nodes
+
+From the base `node` or `node_view`, there are multiple retrieval patterns:
+
+### `value<T>()` — Get with Type Coercion
+
+```cpp
+// As node method:
+optional<T> value() const noexcept;
+```
+
+Attempts to return the value as `T`, with permitted coercions:
+- `int64_t` → `int`, `unsigned`, `size_t`, etc. (bounds-checked)
+- `double` → `float` (precision loss allowed)
+- `double` ↔ `int64_t` (within representable range)
+
+```cpp
+auto tbl = toml::parse("x = 42");
+
+auto as_int = tbl["x"].value<int64_t>(); // optional(42)
+auto as_double = tbl["x"].value<double>(); // optional(42.0)
+auto as_int32 = tbl["x"].value<int>(); // optional(42)
+auto as_string = tbl["x"].value<std::string>(); // nullopt (type mismatch)
+```
+
+### `value_exact<T>()` — No Coercion
+
+```cpp
+optional<T> value_exact() const noexcept;
+```
+
+Only succeeds if the stored type exactly matches `T` (no int→double or similar coercions):
+
+```cpp
+auto tbl = toml::parse("x = 42");
+
+auto exact_int = tbl["x"].value_exact<int64_t>(); // optional(42)
+auto exact_dbl = tbl["x"].value_exact<double>(); // nullopt (it's an int)
+```
+
+### `value_or()` — With Default
+
+```cpp
+template <typename T>
+auto value_or(T&& default_value) const noexcept;
+```
+
+Returns the value or a default:
+
+```cpp
+auto tbl = toml::parse("name = \"Alice\"");
+
+auto name = tbl["name"].value_or("unknown"sv); // "Alice"
+auto age = tbl["age"].value_or(int64_t{ 0 }); // 0 (key missing)
+```
+
+---
+
+## Comparison
+
+```cpp
+// Between values of same type
+friend bool operator==(const value& lhs, const value& rhs) noexcept;
+
+// Between value and raw type
+friend bool operator==(const value<T>& lhs, const T& rhs) noexcept;
+```
+
+```cpp
+auto a = toml::value<int64_t>{ 42 };
+auto b = toml::value<int64_t>{ 42 };
+
+std::cout << (a == b) << "\n"; // true
+std::cout << (a == 42) << "\n"; // true
+std::cout << (a != 99) << "\n"; // true
+```
+
+For `value<std::string>`, comparison also works with `std::string_view` and `const char*`:
+
+```cpp
+auto s = toml::value<std::string>{ "hello" };
+std::cout << (s == "hello") << "\n"; // true
+std::cout << (s == "world"sv) << "\n"; // false
+```
+
+---
+
+## `make_value<T>`
+
+Utility function in `make_node.hpp` for constructing values:
+
+```cpp
+template <typename T, typename... Args>
+auto make_value(Args&&... args)
+ -> decltype(std::make_unique<impl::wrap_node<T>>(std::forward<Args>(args)...));
+```
+
+Returns `std::unique_ptr<value<T>>`:
+
+```cpp
+auto v = toml::make_value<int64_t>(42);
+// v is std::unique_ptr<toml::value<int64_t>>
+```
+
+---
+
+## Printing
+
+Values stream via the default formatter:
+
+```cpp
+auto v = toml::value<std::string>{ "hello world" };
+std::cout << v << "\n"; // "hello world"
+
+auto d = toml::value<toml::date>{ toml::date{ 2024, 6, 15 } };
+std::cout << d << "\n"; // 2024-06-15
+
+auto i = toml::value<int64_t>{ 255 };
+i.flags(toml::value_flags::format_as_hexadecimal);
+std::cout << i << "\n"; // 0xFF
+```
+
+---
+
+## Complete Example
+
+```cpp
+#include <toml++/toml.hpp>
+#include <iostream>
+
+int main()
+{
+ auto config = toml::parse(R"(
+ title = "My App"
+ version = 3
+ debug = true
+ pi = 3.14159
+ created = 2024-01-15T10:30:00Z
+ expires = 2025-12-31
+ check_time = 08:00:00
+ )");
+
+ // Type-safe retrieval with defaults
+ auto title = config["title"].value_or("Untitled"sv);
+ auto version = config["version"].value_or(int64_t{ 1 });
+ auto debug = config["debug"].value_or(false);
+ auto pi = config["pi"].value_or(0.0);
+
+ std::cout << "Title: " << title << "\n";
+ std::cout << "Version: " << version << "\n";
+ std::cout << "Debug: " << debug << "\n";
+ std::cout << "Pi: " << pi << "\n";
+
+ // Date-time access
+ if (auto dt = config["created"].value<toml::date_time>())
+ {
+ std::cout << "Created: " << dt->date.year
+ << "-" << (int)dt->date.month
+ << "-" << (int)dt->date.day << "\n";
+
+ if (!dt->is_local())
+ std::cout << " Offset: " << dt->offset->minutes << " min\n";
+ }
+
+ // Modify and re-serialize
+ auto* v = config["version"].as_integer();
+ if (v) *v = 4;
+
+ std::cout << "\n" << config << "\n";
+
+ return 0;
+}
+```
+
+---
+
+## Related Documentation
+
+- [node-system.md](node-system.md) — Base node class and type dispatch
+- [arrays.md](arrays.md) — Array container
+- [tables.md](tables.md) — Table container
+- [parsing.md](parsing.md) — Parsing values from TOML text
+- [formatting.md](formatting.md) — Formatting values for output