summaryrefslogtreecommitdiff
path: root/docs/handbook/libnbtplusplus/io-system.md
diff options
context:
space:
mode:
Diffstat (limited to 'docs/handbook/libnbtplusplus/io-system.md')
-rw-r--r--docs/handbook/libnbtplusplus/io-system.md672
1 files changed, 672 insertions, 0 deletions
diff --git a/docs/handbook/libnbtplusplus/io-system.md b/docs/handbook/libnbtplusplus/io-system.md
new file mode 100644
index 0000000000..9f0d543a51
--- /dev/null
+++ b/docs/handbook/libnbtplusplus/io-system.md
@@ -0,0 +1,672 @@
+# I/O System
+
+## Overview
+
+The `nbt::io` namespace provides the binary serialization layer for reading and writing NBT data. The two central classes are `stream_reader` and `stream_writer`, both operating on standard C++ streams (`std::istream` / `std::ostream`).
+
+Defined in:
+- `include/io/stream_reader.h` / `src/io/stream_reader.cpp`
+- `include/io/stream_writer.h` / `src/io/stream_writer.cpp`
+
+---
+
+## stream_reader
+
+### Class Definition
+
+```cpp
+class NBT_EXPORT stream_reader
+{
+public:
+ explicit stream_reader(std::istream& is,
+ endian::endian e = endian::endian::big);
+
+ std::istream& get_istr() const { return is; }
+ endian::endian get_endian() const { return endian; }
+
+ // Read named + typed tags
+ std::pair<std::string, std::unique_ptr<tag>> read_tag();
+
+ // Read payload only (for tags whose type is already known)
+ std::unique_ptr<tag> read_payload(tag_type type);
+
+ // Read a type byte
+ tag_type read_type(bool allow_end);
+
+ // Read a length-prefixed UTF-8 string
+ std::string read_string();
+
+ // Read a numeric value in the configured endianness
+ template <class T> void read_num(T& x);
+
+ static const unsigned int MAX_DEPTH = 1024;
+
+private:
+ std::istream& is;
+ endian::endian endian;
+ unsigned int depth_ = 0;
+};
+```
+
+### Constructor
+
+```cpp
+stream_reader(std::istream& is, endian::endian e = endian::endian::big);
+```
+
+- `is`: The input stream to read from
+- `e`: Byte order — `endian::big` (default, Java edition NBT) or `endian::little` (Bedrock edition)
+
+### read_tag() — Read a Complete Named Tag
+
+```cpp
+std::pair<std::string, std::unique_ptr<tag>> read_tag();
+```
+
+Reads a complete tag from the stream:
+1. Reads the type byte
+2. If type is `End`, returns `{"", nullptr}` (end-of-compound sentinel)
+3. Reads the name string
+4. Reads the payload via `read_payload()`
+
+Returns a pair of `{name, tag_ptr}`.
+
+Implementation:
+```cpp
+std::pair<std::string, std::unique_ptr<tag>>
+stream_reader::read_tag()
+{
+ tag_type type = read_type(true);
+ if (type == tag_type::End)
+ return {"", nullptr};
+
+ std::string name = read_string();
+ auto tag = read_payload(type);
+ return {std::move(name), std::move(tag)};
+}
+```
+
+### read_payload() — Read a Tag Payload
+
+```cpp
+std::unique_ptr<tag> read_payload(tag_type type);
+```
+
+Creates a tag of the specified type, then calls its `read_payload()` virtual method. Tracks recursive nesting depth, throwing `io::input_error` if `MAX_DEPTH` (1024) is exceeded.
+
+Implementation:
+```cpp
+std::unique_ptr<tag> stream_reader::read_payload(tag_type type)
+{
+ if (++depth_ > MAX_DEPTH)
+ throw input_error("Maximum nesting depth exceeded");
+
+ auto ret = tag::create(type);
+ ret->read_payload(*this);
+
+ --depth_;
+ return ret;
+}
+```
+
+The `tag::create()` factory instantiates the correct concrete class:
+```cpp
+std::unique_ptr<tag> tag::create(tag_type type)
+{
+ switch (type) {
+ case tag_type::Byte: return make_unique<tag_byte>();
+ case tag_type::Short: return make_unique<tag_short>();
+ case tag_type::Int: return make_unique<tag_int>();
+ case tag_type::Long: return make_unique<tag_long>();
+ case tag_type::Float: return make_unique<tag_float>();
+ case tag_type::Double: return make_unique<tag_double>();
+ case tag_type::Byte_Array: return make_unique<tag_byte_array>();
+ case tag_type::String: return make_unique<tag_string>();
+ case tag_type::List: return make_unique<tag_list>();
+ case tag_type::Compound: return make_unique<tag_compound>();
+ case tag_type::Int_Array: return make_unique<tag_int_array>();
+ case tag_type::Long_Array: return make_unique<tag_long_array>();
+ default:
+ throw std::invalid_argument("Invalid tag type: "
+ + std::to_string(static_cast<int>(type)));
+ }
+}
+```
+
+### read_type() — Read and Validate Type Byte
+
+```cpp
+tag_type read_type(bool allow_end);
+```
+
+Reads a single byte, casts to `tag_type`, and validates:
+```cpp
+tag_type stream_reader::read_type(bool allow_end)
+{
+ int type = is.get();
+ if (!is)
+ throw input_error("Error reading tag type");
+ if (!is_valid_type(type, allow_end))
+ throw input_error("Invalid tag type: "
+ + std::to_string(type));
+ return static_cast<tag_type>(type);
+}
+```
+
+The `allow_end` parameter controls whether `tag_type::End` (0) is accepted — it's valid when reading list element types or compound children, but not at the top level of a standalone tag.
+
+### read_string() — Read Length-Prefixed String
+
+```cpp
+std::string read_string();
+```
+
+Reads a 2-byte unsigned length, then that many bytes of UTF-8 data:
+```cpp
+std::string stream_reader::read_string()
+{
+ uint16_t len;
+ read_num(len);
+ if (!is)
+ throw input_error("Error reading string length");
+ std::string str(len, '\0');
+ is.read(&str[0], len);
+ if (!is)
+ throw input_error("Error reading string");
+ return str;
+}
+```
+
+Maximum string length: 65535 bytes (uint16_t max).
+
+### read_num() — Read Numeric Value
+
+```cpp
+template <class T> void read_num(T& x)
+{
+ endian::read(is, x, endian);
+}
+```
+
+Delegates to the `endian` namespace for endianness-appropriate reading.
+
+---
+
+## stream_writer
+
+### Class Definition
+
+```cpp
+class NBT_EXPORT stream_writer
+{
+public:
+ explicit stream_writer(std::ostream& os,
+ endian::endian e = endian::endian::big);
+
+ std::ostream& get_ostr() const { return os; }
+ endian::endian get_endian() const { return endian; }
+
+ void write_type(tag_type type);
+ void write_string(const std::string& str);
+ void write_payload(const tag& t);
+ template <class T> void write_num(T x);
+
+ static constexpr size_t max_string_len = UINT16_MAX;
+ static constexpr int32_t max_array_len = INT32_MAX;
+
+private:
+ std::ostream& os;
+ endian::endian endian;
+};
+```
+
+### Constructor
+
+```cpp
+stream_writer(std::ostream& os, endian::endian e = endian::endian::big);
+```
+
+- `os`: The output stream to write to
+- `e`: Byte order — `endian::big` (default) or `endian::little`
+
+### write_tag() — Free Function
+
+```cpp
+void write_tag(const std::string& name, const tag& t,
+ std::ostream& os,
+ endian::endian e = endian::endian::big);
+```
+
+This is a **free function** (not a member). It writes a complete named tag:
+1. Writes the type byte
+2. Writes the name string
+3. Writes the payload
+
+```cpp
+void write_tag(const std::string& name, const tag& t,
+ std::ostream& os, endian::endian e)
+{
+ stream_writer writer(os, e);
+ writer.write_type(t.get_type());
+ writer.write_string(name);
+ t.write_payload(writer);
+}
+```
+
+### write_type() — Write Type Byte
+
+```cpp
+void stream_writer::write_type(tag_type type)
+{
+ os.put(static_cast<char>(type));
+ if (!os)
+ throw std::runtime_error("Error writing tag type");
+}
+```
+
+### write_string() — Write Length-Prefixed String
+
+```cpp
+void stream_writer::write_string(const std::string& str)
+{
+ if (str.size() > max_string_len) {
+ os.setstate(std::ios::failbit);
+ throw std::length_error("String is too long for NBT");
+ }
+ write_num(static_cast<uint16_t>(str.size()));
+ os.write(str.data(), str.size());
+ if (!os)
+ throw std::runtime_error("Error writing string");
+}
+```
+
+Strings longer than 65535 bytes trigger a `std::length_error`.
+
+### write_payload() — Write Tag Payload
+
+```cpp
+void stream_writer::write_payload(const tag& t)
+{
+ t.write_payload(*this);
+}
+```
+
+Delegates to the tag's virtual `write_payload()` method.
+
+### write_num() — Write Numeric Value
+
+```cpp
+template <class T> void write_num(T x)
+{
+ endian::write(os, x, endian);
+}
+```
+
+---
+
+## Free Functions
+
+### Reading
+
+```cpp
+// In nbt::io namespace
+
+std::pair<std::string, std::unique_ptr<tag>>
+read_compound(std::istream& is,
+ endian::endian e = endian::endian::big);
+
+std::pair<std::string, std::unique_ptr<tag>>
+read_tag(std::istream& is,
+ endian::endian e = endian::endian::big);
+```
+
+**`read_compound()`** reads and validates that the top-level tag is a compound:
+
+```cpp
+std::pair<std::string, std::unique_ptr<tag>>
+read_compound(std::istream& is, endian::endian e)
+{
+ stream_reader reader(is, e);
+ auto result = reader.read_tag();
+ if (!result.second || result.second->get_type() != tag_type::Compound)
+ throw input_error("Top-level tag is not a compound");
+ return result;
+}
+```
+
+**`read_tag()`** reads any tag without type restriction:
+
+```cpp
+std::pair<std::string, std::unique_ptr<tag>>
+read_tag(std::istream& is, endian::endian e)
+{
+ stream_reader reader(is, e);
+ return reader.read_tag();
+}
+```
+
+### Writing
+
+```cpp
+void write_tag(const std::string& name, const tag& t,
+ std::ostream& os,
+ endian::endian e = endian::endian::big);
+```
+
+Writes a complete named tag (type + name + payload). See above.
+
+---
+
+## Error Handling
+
+### input_error
+
+```cpp
+class input_error : public std::runtime_error
+{
+public:
+ using std::runtime_error::runtime_error;
+};
+```
+
+Thrown by `stream_reader` for all parse errors:
+- Invalid tag type bytes
+- Stream read failures
+- Negative array/list lengths
+- Maximum nesting depth exceeded
+- Corrupt or truncated data
+
+### Stream State Errors
+
+Write errors set stream failbit and throw:
+- `std::runtime_error` for general write failures
+- `std::length_error` for strings exceeding `max_string_len` (65535 bytes)
+- `std::length_error` for arrays/lists exceeding `max_array_len` (INT32_MAX elements)
+- `std::logic_error` for list type inconsistencies during write
+
+---
+
+## Payload Format Per Tag Type
+
+Each concrete tag class implements its own `read_payload()` and `write_payload()`:
+
+### Primitives (tag_byte, tag_short, tag_int, tag_long, tag_float, tag_double)
+
+```cpp
+// In tag_primitive.h (inline)
+void read_payload(io::stream_reader& reader) override
+{
+ reader.read_num(val);
+}
+
+void write_payload(io::stream_writer& writer) const override
+{
+ writer.write_num(val);
+}
+```
+
+Simply reads/writes the raw value in the configured endianness.
+
+| Type | Payload Size |
+|------|-------------|
+| tag_byte | 1 byte |
+| tag_short | 2 bytes |
+| tag_int | 4 bytes |
+| tag_long | 8 bytes |
+| tag_float | 4 bytes |
+| tag_double | 8 bytes |
+
+### tag_string
+
+Payload: 2-byte length + UTF-8 data.
+
+```cpp
+void tag_string::read_payload(io::stream_reader& reader)
+{
+ val = reader.read_string();
+}
+
+void tag_string::write_payload(io::stream_writer& writer) const
+{
+ writer.write_string(val);
+}
+```
+
+### tag_array<T>
+
+Payload: 4-byte signed length + elements.
+
+Specialized for different element types:
+
+**tag_byte_array** (int8_t) — raw block read/write:
+```cpp
+// Specialization for int8_t (byte array)
+void tag_array<int8_t>::read_payload(io::stream_reader& reader)
+{
+ int32_t length;
+ reader.read_num(length);
+ if (length < 0)
+ reader.get_istr().setstate(std::ios::failbit);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading length of tag_byte_array");
+ data.resize(length);
+ reader.get_istr().read(reinterpret_cast<char*>(data.data()), length);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading tag_byte_array");
+}
+```
+
+**tag_long_array** (int64_t) — element-by-element:
+```cpp
+// Specialization for int64_t (long array)
+void tag_array<int64_t>::read_payload(io::stream_reader& reader)
+{
+ int32_t length;
+ reader.read_num(length);
+ if (length < 0)
+ reader.get_istr().setstate(std::ios::failbit);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading length of tag_long_array");
+ data.clear();
+ data.reserve(length);
+ for (int32_t i = 0; i < length; ++i) {
+ int64_t val;
+ reader.read_num(val);
+ data.push_back(val);
+ }
+ if (!reader.get_istr())
+ throw io::input_error("Error reading tag_long_array");
+}
+```
+
+**Generic T** (int32_t for tag_int_array):
+```cpp
+template <class T>
+void tag_array<T>::read_payload(io::stream_reader& reader)
+{
+ int32_t length;
+ reader.read_num(length);
+ if (length < 0)
+ reader.get_istr().setstate(std::ios::failbit);
+ if (!reader.get_istr())
+ throw io::input_error("Error reading length of tag_array");
+ data.clear();
+ data.reserve(length);
+ for (int32_t i = 0; i < length; ++i) {
+ T val;
+ reader.read_num(val);
+ data.push_back(val);
+ }
+ if (!reader.get_istr())
+ throw io::input_error("Error reading tag_array");
+}
+```
+
+### tag_compound
+
+Payload: sequence of complete named tags, terminated by `tag_type::End` (single 0 byte):
+
+```cpp
+void tag_compound::read_payload(io::stream_reader& reader)
+{
+ clear();
+ std::pair<std::string, std::unique_ptr<tag>> entry;
+ while ((entry = reader.read_tag()).second)
+ tags.emplace(std::move(entry.first), std::move(entry.second));
+ if (!reader.get_istr())
+ throw io::input_error("Error reading tag_compound");
+}
+
+void tag_compound::write_payload(io::stream_writer& writer) const
+{
+ for (const auto& pair : tags) {
+ writer.write_type(pair.second.get_type());
+ writer.write_string(pair.first);
+ pair.second.get().write_payload(writer);
+ }
+ writer.write_type(tag_type::End);
+}
+```
+
+### tag_list
+
+Payload: 1-byte element type + 4-byte signed length + element payloads (without type bytes):
+
+(See the [list-tags.md](list-tags.md) document for the full implementation.)
+
+---
+
+## Depth Tracking
+
+`stream_reader` tracks recursive depth to prevent stack overflow from maliciously crafted NBT data with deeply nested compounds or lists:
+
+```cpp
+static const unsigned int MAX_DEPTH = 1024;
+```
+
+Each call to `read_payload()` increments `depth_`, and decrements on return. If `depth_` exceeds 1024, an `io::input_error` is thrown.
+
+This is critical for security — without depth limits, a crafted file with thousands of nested compounds could cause a stack overflow.
+
+---
+
+## Endianness
+
+Both `stream_reader` and `stream_writer` take an `endian::endian` parameter:
+
+| Value | Use Case |
+|-------|----------|
+| `endian::big` | Java Edition NBT (default, per Minecraft specification) |
+| `endian::little` | Bedrock Edition NBT |
+
+The endianness affects all numeric reads/writes (lengths, primitive values, etc.) but not single bytes (type, byte values).
+
+---
+
+## Usage Examples
+
+### Reading a File
+
+```cpp
+#include <nbt_tags.h>
+#include <io/stream_reader.h>
+#include <fstream>
+
+std::ifstream file("level.dat", std::ios::binary);
+auto result = nbt::io::read_compound(file);
+
+std::string name = result.first; // Root tag name
+tag_compound& root = result.second->as<tag_compound>();
+
+int32_t version = static_cast<int32_t>(root.at("version"));
+```
+
+### Reading with zlib Decompression
+
+```cpp
+#include <io/izlibstream.h>
+
+std::ifstream file("level.dat", std::ios::binary);
+zlib::izlibstream zs(file);
+auto result = nbt::io::read_compound(zs);
+```
+
+### Writing a File
+
+```cpp
+#include <io/stream_writer.h>
+#include <fstream>
+
+tag_compound root{
+ {"Data", tag_compound{
+ {"version", int32_t(19133)},
+ {"LevelName", std::string("My World")}
+ }}
+};
+
+std::ofstream file("level.dat", std::ios::binary);
+nbt::io::write_tag("", root, file);
+```
+
+### Writing with zlib Compression
+
+```cpp
+#include <io/ozlibstream.h>
+
+std::ofstream file("level.dat", std::ios::binary);
+zlib::ozlibstream zs(file);
+nbt::io::write_tag("", root, zs);
+zs.close();
+```
+
+### Little-Endian (Bedrock)
+
+```cpp
+auto result = nbt::io::read_compound(file, endian::endian::little);
+nbt::io::write_tag("", root, file, endian::endian::little);
+```
+
+### Roundtrip Test
+
+```cpp
+// Write
+std::stringstream ss;
+nbt::io::write_tag("test", original_root, ss);
+
+// Read back
+ss.seekg(0);
+auto [name, tag] = nbt::io::read_tag(ss);
+assert(name == "test");
+assert(*tag == original_root);
+```
+
+---
+
+## Wire Format Summary
+
+```
+Named Tag:
+ [type: 1 byte] [name_length: 2 bytes] [name: N bytes] [payload: variable]
+
+Compound Payload:
+ [child_tag_1] [child_tag_2] ... [End: 0x00]
+
+List Payload:
+ [element_type: 1 byte] [length: 4 bytes] [payload_1] [payload_2] ...
+
+String Payload:
+ [length: 2 bytes] [data: N bytes, UTF-8]
+
+Array Payload (Byte/Int/Long):
+ [length: 4 bytes] [element_1] [element_2] ...
+
+Primitive Payloads:
+ Byte: 1 byte
+ Short: 2 bytes
+ Int: 4 bytes
+ Long: 8 bytes
+ Float: 4 bytes (IEEE 754)
+ Double: 8 bytes (IEEE 754)
+```
+
+All multi-byte values use the configured endianness (big-endian by default).