diff options
Diffstat (limited to 'json4cpp/include/nlohmann/detail/input')
| -rw-r--r-- | json4cpp/include/nlohmann/detail/input/binary_reader.hpp | 3087 | ||||
| -rw-r--r-- | json4cpp/include/nlohmann/detail/input/input_adapters.hpp | 550 | ||||
| -rw-r--r-- | json4cpp/include/nlohmann/detail/input/json_sax.hpp | 986 | ||||
| -rw-r--r-- | json4cpp/include/nlohmann/detail/input/lexer.hpp | 1647 | ||||
| -rw-r--r-- | json4cpp/include/nlohmann/detail/input/parser.hpp | 536 | ||||
| -rw-r--r-- | json4cpp/include/nlohmann/detail/input/position_t.hpp | 37 |
6 files changed, 6843 insertions, 0 deletions
diff --git a/json4cpp/include/nlohmann/detail/input/binary_reader.hpp b/json4cpp/include/nlohmann/detail/input/binary_reader.hpp new file mode 100644 index 0000000000..033cfebd78 --- /dev/null +++ b/json4cpp/include/nlohmann/detail/input/binary_reader.hpp @@ -0,0 +1,3087 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.12.0 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2026 Niels Lohmann <https://nlohmann.me> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <algorithm> // generate_n +#include <array> // array +#include <cmath> // ldexp +#include <cstddef> // size_t +#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t +#include <cstdio> // snprintf +#include <cstring> // memcpy +#include <iterator> // back_inserter +#include <limits> // numeric_limits +#include <string> // char_traits, string +#include <utility> // make_pair, move +#include <vector> // vector +#ifdef __cpp_lib_byteswap + #include <bit> //byteswap +#endif + +#include <nlohmann/detail/exceptions.hpp> +#include <nlohmann/detail/input/input_adapters.hpp> +#include <nlohmann/detail/input/json_sax.hpp> +#include <nlohmann/detail/input/lexer.hpp> +#include <nlohmann/detail/macro_scope.hpp> +#include <nlohmann/detail/meta/is_sax.hpp> +#include <nlohmann/detail/meta/type_traits.hpp> +#include <nlohmann/detail/string_concat.hpp> +#include <nlohmann/detail/value_t.hpp> + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/// how to treat CBOR tags +enum class cbor_tag_handler_t +{ + error, ///< throw a parse_error exception in case of a tag + ignore, ///< ignore tags + store ///< store tags as binary type +}; + +/*! +@brief determine system byte order + +@return true if and only if system's byte order is little endian + +@note from https://stackoverflow.com/a/1001328/266378 +*/ +inline bool little_endianness(int num = 1) noexcept +{ + return *reinterpret_cast<char*>(&num) == 1; +} + +/////////////////// +// binary reader // +/////////////////// + +/*! +@brief deserialization of CBOR, MessagePack, and UBJSON values +*/ +template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType, InputAdapterType>> +class binary_reader +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using json_sax_t = SAX; + using char_type = typename InputAdapterType::char_type; + using char_int_type = typename char_traits<char_type>::int_type; + + public: + /*! + @brief create a binary reader + + @param[in] adapter input adapter to read from + */ + explicit binary_reader(InputAdapterType&& adapter, const input_format_t format = input_format_t::json) noexcept : ia(std::move(adapter)), input_format(format) + { + (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; + } + + // make class move-only + binary_reader(const binary_reader&) = delete; + binary_reader(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + binary_reader& operator=(const binary_reader&) = delete; + binary_reader& operator=(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~binary_reader() = default; + + /*! + @param[in] format the binary format to parse + @param[in] sax_ a SAX event processor + @param[in] strict whether to expect the input to be consumed completed + @param[in] tag_handler how to treat CBOR tags + + @return whether parsing was successful + */ + JSON_HEDLEY_NON_NULL(3) + bool sax_parse(const input_format_t format, + json_sax_t* sax_, + const bool strict = true, + const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) + { + sax = sax_; + bool result = false; + + switch (format) + { + case input_format_t::bson: + result = parse_bson_internal(); + break; + + case input_format_t::cbor: + result = parse_cbor_internal(true, tag_handler); + break; + + case input_format_t::msgpack: + result = parse_msgpack_internal(); + break; + + case input_format_t::ubjson: + case input_format_t::bjdata: + result = parse_ubjson_internal(); + break; + + case input_format_t::json: // LCOV_EXCL_LINE + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + } + + // strict mode: next byte must be EOF + if (result && strict) + { + if (input_format == input_format_t::ubjson || input_format == input_format_t::bjdata) + { + get_ignore_noop(); + } + else + { + get(); + } + + if (JSON_HEDLEY_UNLIKELY(current != char_traits<char_type>::eof())) + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read, + exception_message(input_format, concat("expected end of input; last byte: 0x", get_token_string()), "value"), nullptr)); + } + } + + return result; + } + + private: + ////////// + // BSON // + ////////// + + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t document_size{}; + get_number<std::int32_t, true>(input_format_t::bson, document_size); + + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(detail::unknown_size()))) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } + + /*! + @brief Parses a C-style string from the BSON input. + @param[in,out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. + */ + bool get_bson_cstr(string_t& result) + { + auto out = std::back_inserter(result); + while (true) + { + get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring"))) + { + return false; + } + if (current == 0x00) + { + return true; + } + *out++ = static_cast<typename string_t::value_type>(current); + } + } + + /*! + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in,out] result A reference to the string variable where the read + string is to be stored. + @tparam NumberType The type of the length @a len + @pre len >= 1 + @return `true` if the string was successfully parsed + */ + template<typename NumberType> + bool get_bson_string(const NumberType len, string_t& result) + { + if (JSON_HEDLEY_UNLIKELY(len < 1)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format_t::bson, concat("string length must be at least 1, is ", std::to_string(len)), "string"), nullptr)); + } + + return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != char_traits<char_type>::eof(); + } + + /*! + @brief Parses a byte array input of length @a len from the BSON input. + @param[in] len The length of the byte array to be read. + @param[in,out] result A reference to the binary variable where the read + array is to be stored. + @tparam NumberType The type of the length @a len + @pre len >= 0 + @return `true` if the byte array was successfully parsed + */ + template<typename NumberType> + bool get_bson_binary(const NumberType len, binary_t& result) + { + if (JSON_HEDLEY_UNLIKELY(len < 0)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format_t::bson, concat("byte array length cannot be negative, is ", std::to_string(len)), "binary"), nullptr)); + } + + // All BSON binary values have a subtype + std::uint8_t subtype{}; + get_number<std::uint8_t>(input_format_t::bson, subtype); + result.set_subtype(subtype); + + return get_binary(input_format_t::bson, len, result); + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(const char_int_type element_type, + const std::size_t element_type_parse_position) + { + switch (element_type) + { + case 0x01: // double + { + double number{}; + return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0x02: // string + { + std::int32_t len{}; + string_t value; + return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value); + } + + case 0x03: // object + { + return parse_bson_internal(); + } + + case 0x04: // array + { + return parse_bson_array(); + } + + case 0x05: // binary + { + std::int32_t len{}; + binary_t value; + return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value); + } + + case 0x08: // boolean + { + return sax->boolean(get() != 0); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value{}; + return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value); + } + + case 0x12: // int64 + { + std::int64_t value{}; + return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value); + } + + case 0x11: // uint64 + { + std::uint64_t value{}; + return get_number<std::uint64_t, true>(input_format_t::bson, value) && sax->number_unsigned(value); + } + + default: // anything else is not supported (yet) + { + std::array<char, 3> cr{{}}; + static_cast<void>((std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + const std::string cr_str{cr.data()}; + return sax->parse_error(element_type_parse_position, cr_str, + parse_error::create(114, element_type_parse_position, concat("Unsupported BSON record type 0x", cr_str), nullptr)); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(const bool is_array) + { + string_t key; + + while (auto element_type = get()) + { + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list"))) + { + return false; + } + + const std::size_t element_type_parse_position = chars_read; + if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key))) + { + return false; + } + + if (!is_array && !sax->key(key)) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position))) + { + return false; + } + + // get_bson_cstr only appends + key.clear(); + } + + return true; + } + + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ + bool parse_bson_array() + { + std::int32_t document_size{}; + get_number<std::int32_t, true>(input_format_t::bson, document_size); + + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(detail::unknown_size()))) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true))) + { + return false; + } + + return sax->end_array(); + } + + ////////// + // CBOR // + ////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true) or whether the last read character should + be considered instead (false) + @param[in] tag_handler how CBOR tags should be treated + + @return whether a valid CBOR value was passed to the SAX parser + */ + + template<typename NumberType> + bool get_cbor_negative_integer() + { + NumberType number{}; + if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::cbor, number))) + { + return false; + } + const auto max_val = static_cast<NumberType>((std::numeric_limits<number_integer_t>::max)()); + if (number > max_val) + { + return sax->parse_error(chars_read, get_token_string(), + parse_error::create(112, chars_read, + exception_message(input_format_t::cbor, "negative integer overflow", "value"), nullptr)); + } + return sax->number_integer(static_cast<number_integer_t>(-1) - static_cast<number_integer_t>(number)); + } + + bool parse_cbor_internal(const bool get_char, + const cbor_tag_handler_t tag_handler) + { + switch (get_char ? get() : current) + { + // EOF + case char_traits<char_type>::eof(): + return unexpect_eof(input_format_t::cbor, "value"); + + // Integer 0x00..0x17 (0..23) + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0A: + case 0x0B: + case 0x0C: + case 0x0D: + case 0x0E: + case 0x0F: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + return sax->number_unsigned(static_cast<number_unsigned_t>(current)); + + case 0x18: // Unsigned integer (one-byte uint8_t follows) + { + std::uint8_t number{}; + return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); + } + + case 0x19: // Unsigned integer (two-byte uint16_t follows) + { + std::uint16_t number{}; + return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); + } + + case 0x1A: // Unsigned integer (four-byte uint32_t follows) + { + std::uint32_t number{}; + return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); + } + + case 0x1B: // Unsigned integer (eight-byte uint64_t follows) + { + std::uint64_t number{}; + return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); + } + + // Negative integer -1-0x00..-1-0x17 (-1..-24) + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current)); + + case 0x38: // Negative integer (one-byte uint8_t follows) + return get_cbor_negative_integer<std::uint8_t>(); + + case 0x39: // Negative integer -1-n (two-byte uint16_t follows) + return get_cbor_negative_integer<std::uint16_t>(); + + case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) + return get_cbor_negative_integer<std::uint32_t>(); + + case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) + return get_cbor_negative_integer<std::uint64_t>(); + + // Binary data (0x00..0x17 bytes follow) + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: // Binary data (one-byte uint8_t for n follows) + case 0x59: // Binary data (two-byte uint16_t for n follow) + case 0x5A: // Binary data (four-byte uint32_t for n follow) + case 0x5B: // Binary data (eight-byte uint64_t for n follow) + case 0x5F: // Binary data (indefinite length) + { + binary_t b; + return get_cbor_binary(b) && sax->binary(b); + } + + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + case 0x7F: // UTF-8 string (indefinite length) + { + string_t s; + return get_cbor_string(s) && sax->string(s); + } + + // array (0x00..0x17 data items follow) + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + return get_cbor_array( + conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler); + + case 0x98: // array (one-byte uint8_t for n follows) + { + std::uint8_t len{}; + return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler); + } + + case 0x99: // array (two-byte uint16_t for n follow) + { + std::uint16_t len{}; + return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler); + } + + case 0x9A: // array (four-byte uint32_t for n follow) + { + std::uint32_t len{}; + return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler); + } + + case 0x9B: // array (eight-byte uint64_t for n follow) + { + std::uint64_t len{}; + return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler); + } + + case 0x9F: // array (indefinite length) + return get_cbor_array(detail::unknown_size(), tag_handler); + + // map (0x00..0x17 pairs of data items follow) + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + return get_cbor_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler); + + case 0xB8: // map (one-byte uint8_t for n follows) + { + std::uint8_t len{}; + return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler); + } + + case 0xB9: // map (two-byte uint16_t for n follow) + { + std::uint16_t len{}; + return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler); + } + + case 0xBA: // map (four-byte uint32_t for n follow) + { + std::uint32_t len{}; + return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler); + } + + case 0xBB: // map (eight-byte uint64_t for n follow) + { + std::uint64_t len{}; + return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler); + } + + case 0xBF: // map (indefinite length) + return get_cbor_object(detail::unknown_size(), tag_handler); + + case 0xC6: // tagged item + case 0xC7: + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD8: // tagged item (1 byte follows) + case 0xD9: // tagged item (2 bytes follow) + case 0xDA: // tagged item (4 bytes follow) + case 0xDB: // tagged item (8 bytes follow) + { + switch (tag_handler) + { + case cbor_tag_handler_t::error: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format_t::cbor, concat("invalid byte: 0x", last_token), "value"), nullptr)); + } + + case cbor_tag_handler_t::ignore: + { + // ignore binary subtype + switch (current) + { + case 0xD8: + { + std::uint8_t subtype_to_ignore{}; + get_number(input_format_t::cbor, subtype_to_ignore); + break; + } + case 0xD9: + { + std::uint16_t subtype_to_ignore{}; + get_number(input_format_t::cbor, subtype_to_ignore); + break; + } + case 0xDA: + { + std::uint32_t subtype_to_ignore{}; + get_number(input_format_t::cbor, subtype_to_ignore); + break; + } + case 0xDB: + { + std::uint64_t subtype_to_ignore{}; + get_number(input_format_t::cbor, subtype_to_ignore); + break; + } + default: + break; + } + return parse_cbor_internal(true, tag_handler); + } + + case cbor_tag_handler_t::store: + { + binary_t b; + // use binary subtype and store in a binary container + switch (current) + { + case 0xD8: + { + std::uint8_t subtype{}; + get_number(input_format_t::cbor, subtype); + b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype)); + break; + } + case 0xD9: + { + std::uint16_t subtype{}; + get_number(input_format_t::cbor, subtype); + b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype)); + break; + } + case 0xDA: + { + std::uint32_t subtype{}; + get_number(input_format_t::cbor, subtype); + b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype)); + break; + } + case 0xDB: + { + std::uint64_t subtype{}; + get_number(input_format_t::cbor, subtype); + b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype)); + break; + } + default: + return parse_cbor_internal(true, tag_handler); + } + get(); + return get_cbor_binary(b) && sax->binary(b); + } + + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + return false; // LCOV_EXCL_LINE + } + } + + case 0xF4: // false + return sax->boolean(false); + + case 0xF5: // true + return sax->boolean(true); + + case 0xF6: // null + return sax->null(); + + case 0xF9: // Half-Precision Float (two-byte IEEE 754) + { + const auto byte1_raw = get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number"))) + { + return false; + } + const auto byte2_raw = get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number"))) + { + return false; + } + + const auto byte1 = static_cast<unsigned char>(byte1_raw); + const auto byte2 = static_cast<unsigned char>(byte2_raw); + + // Code from RFC 7049, Appendix D, Figure 3: + // As half-precision floating-point numbers were only added + // to IEEE 754 in 2008, today's programming platforms often + // still only have limited support for them. It is very + // easy to include at least decoding support for them even + // without such support. An example of a small decoder for + // half-precision floating-point numbers in the C language + // is shown in Fig. 3. + const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2); + const double val = [&half] + { + const int exp = (half >> 10u) & 0x1Fu; + const unsigned int mant = half & 0x3FFu; + JSON_ASSERT(0 <= exp&& exp <= 32); + JSON_ASSERT(mant <= 1024); + switch (exp) + { + case 0: + return std::ldexp(mant, -24); + case 31: + return (mant == 0) + ? std::numeric_limits<double>::infinity() + : std::numeric_limits<double>::quiet_NaN(); + default: + return std::ldexp(mant + 1024, exp - 25); + } + }(); + return sax->number_float((half & 0x8000u) != 0 + ? static_cast<number_float_t>(-val) + : static_cast<number_float_t>(val), ""); + } + + case 0xFA: // Single-Precision Float (four-byte IEEE 754) + { + float number{}; + return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0xFB: // Double-Precision Float (eight-byte IEEE 754) + { + double number{}; + return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), ""); + } + + default: // anything else (0xFF is handled inside the other types) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format_t::cbor, concat("invalid byte: 0x", last_token), "value"), nullptr)); + } + } + } + + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_cbor_string(string_t& result) + { + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string"))) + { + return false; + } + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + std::uint8_t len{}; + return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + std::uint16_t len{}; + return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result); + } + + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + { + std::uint32_t len{}; + return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result); + } + + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + { + std::uint64_t len{}; + return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result); + } + + case 0x7F: // UTF-8 string (indefinite length) + { + while (get() != 0xFF) + { + string_t chunk; + if (!get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, + exception_message(input_format_t::cbor, concat("expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x", last_token), "string"), nullptr)); + } + } + } + + /*! + @brief reads a CBOR byte array + + This function first reads starting bytes to determine the expected + byte array length and then copies this number of bytes into the byte array. + Additionally, CBOR's byte arrays with indefinite lengths are supported. + + @param[out] result created byte array + + @return whether byte array creation completed + */ + bool get_cbor_binary(binary_t& result) + { + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary"))) + { + return false; + } + + switch (current) + { + // Binary data (0x00..0x17 bytes follow) + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + { + return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result); + } + + case 0x58: // Binary data (one-byte uint8_t for n follows) + { + std::uint8_t len{}; + return get_number(input_format_t::cbor, len) && + get_binary(input_format_t::cbor, len, result); + } + + case 0x59: // Binary data (two-byte uint16_t for n follow) + { + std::uint16_t len{}; + return get_number(input_format_t::cbor, len) && + get_binary(input_format_t::cbor, len, result); + } + + case 0x5A: // Binary data (four-byte uint32_t for n follow) + { + std::uint32_t len{}; + return get_number(input_format_t::cbor, len) && + get_binary(input_format_t::cbor, len, result); + } + + case 0x5B: // Binary data (eight-byte uint64_t for n follow) + { + std::uint64_t len{}; + return get_number(input_format_t::cbor, len) && + get_binary(input_format_t::cbor, len, result); + } + + case 0x5F: // Binary data (indefinite length) + { + while (get() != 0xFF) + { + binary_t chunk; + if (!get_cbor_binary(chunk)) + { + return false; + } + result.insert(result.end(), chunk.begin(), chunk.end()); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, + exception_message(input_format_t::cbor, concat("expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x", last_token), "binary"), nullptr)); + } + } + } + + /*! + @param[in] len the length of the array or detail::unknown_size() for an + array of indefinite size + @param[in] tag_handler how CBOR tags should be treated + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len, + const cbor_tag_handler_t tag_handler) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len))) + { + return false; + } + + if (len != detail::unknown_size()) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler))) + { + return false; + } + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object or detail::unknown_size() for an + object of indefinite size + @param[in] tag_handler how CBOR tags should be treated + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len, + const cbor_tag_handler_t tag_handler) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len))) + { + return false; + } + + if (len != 0) + { + string_t key; + if (len != detail::unknown_size()) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFF) + { + if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler))) + { + return false; + } + key.clear(); + } + } + } + + return sax->end_object(); + } + + ///////////// + // MsgPack // + ///////////// + + /*! + @return whether a valid MessagePack value was passed to the SAX parser + */ + bool parse_msgpack_internal() + { + switch (get()) + { + // EOF + case char_traits<char_type>::eof(): + return unexpect_eof(input_format_t::msgpack, "value"); + + // positive fixint + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0A: + case 0x0B: + case 0x0C: + case 0x0D: + case 0x0E: + case 0x0F: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1A: + case 0x1B: + case 0x1C: + case 0x1D: + case 0x1E: + case 0x1F: + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + return sax->number_unsigned(static_cast<number_unsigned_t>(current)); + + // fixmap + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + return get_msgpack_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); + + // fixarray + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + return get_msgpack_array(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); + + // fixstr + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + case 0xD9: // str 8 + case 0xDA: // str 16 + case 0xDB: // str 32 + { + string_t s; + return get_msgpack_string(s) && sax->string(s); + } + + case 0xC0: // nil + return sax->null(); + + case 0xC2: // false + return sax->boolean(false); + + case 0xC3: // true + return sax->boolean(true); + + case 0xC4: // bin 8 + case 0xC5: // bin 16 + case 0xC6: // bin 32 + case 0xC7: // ext 8 + case 0xC8: // ext 16 + case 0xC9: // ext 32 + case 0xD4: // fixext 1 + case 0xD5: // fixext 2 + case 0xD6: // fixext 4 + case 0xD7: // fixext 8 + case 0xD8: // fixext 16 + { + binary_t b; + return get_msgpack_binary(b) && sax->binary(b); + } + + case 0xCA: // float 32 + { + float number{}; + return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0xCB: // float 64 + { + double number{}; + return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0xCC: // uint 8 + { + std::uint8_t number{}; + return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); + } + + case 0xCD: // uint 16 + { + std::uint16_t number{}; + return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); + } + + case 0xCE: // uint 32 + { + std::uint32_t number{}; + return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); + } + + case 0xCF: // uint 64 + { + std::uint64_t number{}; + return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); + } + + case 0xD0: // int 8 + { + std::int8_t number{}; + return get_number(input_format_t::msgpack, number) && sax->number_integer(number); + } + + case 0xD1: // int 16 + { + std::int16_t number{}; + return get_number(input_format_t::msgpack, number) && sax->number_integer(number); + } + + case 0xD2: // int 32 + { + std::int32_t number{}; + return get_number(input_format_t::msgpack, number) && sax->number_integer(number); + } + + case 0xD3: // int 64 + { + std::int64_t number{}; + return get_number(input_format_t::msgpack, number) && sax->number_integer(number); + } + + case 0xDC: // array 16 + { + std::uint16_t len{}; + return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len)); + } + + case 0xDD: // array 32 + { + std::uint32_t len{}; + return get_number(input_format_t::msgpack, len) && get_msgpack_array(conditional_static_cast<std::size_t>(len)); + } + + case 0xDE: // map 16 + { + std::uint16_t len{}; + return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len)); + } + + case 0xDF: // map 32 + { + std::uint32_t len{}; + return get_number(input_format_t::msgpack, len) && get_msgpack_object(conditional_static_cast<std::size_t>(len)); + } + + // negative fixint + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + case 0xF8: + case 0xF9: + case 0xFA: + case 0xFB: + case 0xFC: + case 0xFD: + case 0xFE: + case 0xFF: + return sax->number_integer(static_cast<std::int8_t>(current)); + + default: // anything else + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format_t::msgpack, concat("invalid byte: 0x", last_token), "value"), nullptr)); + } + } + } + + /*! + @brief reads a MessagePack string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_msgpack_string(string_t& result) + { + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string"))) + { + return false; + } + + switch (current) + { + // fixstr + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + { + return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result); + } + + case 0xD9: // str 8 + { + std::uint8_t len{}; + return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result); + } + + case 0xDA: // str 16 + { + std::uint16_t len{}; + return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result); + } + + case 0xDB: // str 32 + { + std::uint32_t len{}; + return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result); + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, + exception_message(input_format_t::msgpack, concat("expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x", last_token), "string"), nullptr)); + } + } + } + + /*! + @brief reads a MessagePack byte array + + This function first reads starting bytes to determine the expected + byte array length and then copies this number of bytes into a byte array. + + @param[out] result created byte array + + @return whether byte array creation completed + */ + bool get_msgpack_binary(binary_t& result) + { + // helper function to set the subtype + auto assign_and_return_true = [&result](std::int8_t subtype) + { + result.set_subtype(static_cast<std::uint8_t>(subtype)); + return true; + }; + + switch (current) + { + case 0xC4: // bin 8 + { + std::uint8_t len{}; + return get_number(input_format_t::msgpack, len) && + get_binary(input_format_t::msgpack, len, result); + } + + case 0xC5: // bin 16 + { + std::uint16_t len{}; + return get_number(input_format_t::msgpack, len) && + get_binary(input_format_t::msgpack, len, result); + } + + case 0xC6: // bin 32 + { + std::uint32_t len{}; + return get_number(input_format_t::msgpack, len) && + get_binary(input_format_t::msgpack, len, result); + } + + case 0xC7: // ext 8 + { + std::uint8_t len{}; + std::int8_t subtype{}; + return get_number(input_format_t::msgpack, len) && + get_number(input_format_t::msgpack, subtype) && + get_binary(input_format_t::msgpack, len, result) && + assign_and_return_true(subtype); + } + + case 0xC8: // ext 16 + { + std::uint16_t len{}; + std::int8_t subtype{}; + return get_number(input_format_t::msgpack, len) && + get_number(input_format_t::msgpack, subtype) && + get_binary(input_format_t::msgpack, len, result) && + assign_and_return_true(subtype); + } + + case 0xC9: // ext 32 + { + std::uint32_t len{}; + std::int8_t subtype{}; + return get_number(input_format_t::msgpack, len) && + get_number(input_format_t::msgpack, subtype) && + get_binary(input_format_t::msgpack, len, result) && + assign_and_return_true(subtype); + } + + case 0xD4: // fixext 1 + { + std::int8_t subtype{}; + return get_number(input_format_t::msgpack, subtype) && + get_binary(input_format_t::msgpack, 1, result) && + assign_and_return_true(subtype); + } + + case 0xD5: // fixext 2 + { + std::int8_t subtype{}; + return get_number(input_format_t::msgpack, subtype) && + get_binary(input_format_t::msgpack, 2, result) && + assign_and_return_true(subtype); + } + + case 0xD6: // fixext 4 + { + std::int8_t subtype{}; + return get_number(input_format_t::msgpack, subtype) && + get_binary(input_format_t::msgpack, 4, result) && + assign_and_return_true(subtype); + } + + case 0xD7: // fixext 8 + { + std::int8_t subtype{}; + return get_number(input_format_t::msgpack, subtype) && + get_binary(input_format_t::msgpack, 8, result) && + assign_and_return_true(subtype); + } + + case 0xD8: // fixext 16 + { + std::int8_t subtype{}; + return get_number(input_format_t::msgpack, subtype) && + get_binary(input_format_t::msgpack, 16, result) && + assign_and_return_true(subtype); + } + + default: // LCOV_EXCL_LINE + return false; // LCOV_EXCL_LINE + } + } + + /*! + @param[in] len the length of the array + @return whether array creation completed + */ + bool get_msgpack_array(const std::size_t len) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len))) + { + return false; + } + + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal())) + { + return false; + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object + @return whether object creation completed + */ + bool get_msgpack_object(const std::size_t len) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len))) + { + return false; + } + + string_t key; + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key))) + { + return false; + } + + if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal())) + { + return false; + } + key.clear(); + } + + return sax->end_object(); + } + + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + + /*! + @brief reads a UBJSON string + + This function is either called after reading the 'S' byte explicitly + indicating a string, or in case of an object key where the 'S' byte can be + left out. + + @param[out] result created string + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether string creation completed + */ + bool get_ubjson_string(string_t& result, const bool get_char = true) + { + if (get_char) + { + get(); // TODO(niels): may we ignore N here? + } + + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "value"))) + { + return false; + } + + switch (current) + { + case 'U': + { + std::uint8_t len{}; + return get_number(input_format, len) && get_string(input_format, len, result); + } + + case 'i': + { + std::int8_t len{}; + return get_number(input_format, len) && get_string(input_format, len, result); + } + + case 'I': + { + std::int16_t len{}; + return get_number(input_format, len) && get_string(input_format, len, result); + } + + case 'l': + { + std::int32_t len{}; + return get_number(input_format, len) && get_string(input_format, len, result); + } + + case 'L': + { + std::int64_t len{}; + return get_number(input_format, len) && get_string(input_format, len, result); + } + + case 'u': + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint16_t len{}; + return get_number(input_format, len) && get_string(input_format, len, result); + } + + case 'm': + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint32_t len{}; + return get_number(input_format, len) && get_string(input_format, len, result); + } + + case 'M': + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint64_t len{}; + return get_number(input_format, len) && get_string(input_format, len, result); + } + + default: + break; + } + auto last_token = get_token_string(); + std::string message; + + if (input_format != input_format_t::bjdata) + { + message = "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token; + } + else + { + message = "expected length type specification (U, i, u, I, m, l, M, L); last byte: 0x" + last_token; + } + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, message, "string"), nullptr)); + } + + /*! + @param[out] dim an integer vector storing the ND array dimensions + @return whether reading ND array size vector is successful + */ + bool get_ubjson_ndarray_size(std::vector<size_t>& dim) + { + std::pair<std::size_t, char_int_type> size_and_type; + size_t dimlen = 0; + bool no_ndarray = true; + + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type, no_ndarray))) + { + return false; + } + + if (size_and_type.first != npos) + { + if (size_and_type.second != 0) + { + if (size_and_type.second != 'N') + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray, size_and_type.second))) + { + return false; + } + dim.push_back(dimlen); + } + } + } + else + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray))) + { + return false; + } + dim.push_back(dimlen); + } + } + } + else + { + while (current != ']') + { + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray, current))) + { + return false; + } + dim.push_back(dimlen); + get_ignore_noop(); + } + } + return true; + } + + /*! + @param[out] result determined size + @param[in,out] is_ndarray for input, `true` means already inside an ndarray vector + or ndarray dimension is not allowed; `false` means ndarray + is allowed; for output, `true` means an ndarray is found; + is_ndarray can only return `true` when its initial value + is `false` + @param[in] prefix type marker if already read, otherwise set to 0 + + @return whether size determination completed + */ + bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0) + { + if (prefix == 0) + { + prefix = get_ignore_noop(); + } + + switch (prefix) + { + case 'U': + { + std::uint8_t number{}; + if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number))) + { + return false; + } + result = static_cast<std::size_t>(number); + return true; + } + + case 'i': + { + std::int8_t number{}; + if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number))) + { + return false; + } + if (number < 0) + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, + exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr)); + } + result = static_cast<std::size_t>(number); // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char + return true; + } + + case 'I': + { + std::int16_t number{}; + if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number))) + { + return false; + } + if (number < 0) + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, + exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr)); + } + result = static_cast<std::size_t>(number); + return true; + } + + case 'l': + { + std::int32_t number{}; + if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number))) + { + return false; + } + if (number < 0) + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, + exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr)); + } + result = static_cast<std::size_t>(number); + return true; + } + + case 'L': + { + std::int64_t number{}; + if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number))) + { + return false; + } + if (number < 0) + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, + exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr)); + } + if (!value_in_range_of<std::size_t>(number)) + { + return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, + exception_message(input_format, "integer value overflow", "size"), nullptr)); + } + result = static_cast<std::size_t>(number); + return true; + } + + case 'u': + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint16_t number{}; + if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number))) + { + return false; + } + result = static_cast<std::size_t>(number); + return true; + } + + case 'm': + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint32_t number{}; + if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number))) + { + return false; + } + result = conditional_static_cast<std::size_t>(number); + return true; + } + + case 'M': + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint64_t number{}; + if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number))) + { + return false; + } + if (!value_in_range_of<std::size_t>(number)) + { + return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, + exception_message(input_format, "integer value overflow", "size"), nullptr)); + } + result = detail::conditional_static_cast<std::size_t>(number); + return true; + } + + case '[': + { + if (input_format != input_format_t::bjdata) + { + break; + } + if (is_ndarray) // ndarray dimensional vector can only contain integers and cannot embed another array + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimensional vector is not allowed", "size"), nullptr)); + } + std::vector<size_t> dim; + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim))) + { + return false; + } + if (dim.size() == 1 || (dim.size() == 2 && dim.at(0) == 1)) // return normal array size if 1D row vector + { + result = dim.at(dim.size() - 1); + return true; + } + if (!dim.empty()) // if ndarray, convert to an object in JData annotated array format + { + for (auto i : dim) // test if any dimension in an ndarray is 0, if so, return a 1D empty container + { + if ( i == 0 ) + { + result = 0; + return true; + } + } + + string_t key = "_ArraySize_"; + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(3) || !sax->key(key) || !sax->start_array(dim.size()))) + { + return false; + } + result = 1; + for (auto i : dim) + { + // Pre-multiplication overflow check: if i > 0 and result > SIZE_MAX/i, then result*i would overflow. + // This check must happen before multiplication since overflow detection after the fact is unreliable + // as modular arithmetic can produce any value, not just 0 or SIZE_MAX. + if (JSON_HEDLEY_UNLIKELY(i > 0 && result > (std::numeric_limits<std::size_t>::max)() / i)) + { + return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr)); + } + result *= i; + // Additional post-multiplication check to catch any edge cases the pre-check might miss + if (result == 0 || result == npos) + { + return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr)); + } + if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(static_cast<number_unsigned_t>(i)))) + { + return false; + } + } + is_ndarray = true; + return sax->end_array(); + } + result = 0; + return true; + } + + default: + break; + } + auto last_token = get_token_string(); + std::string message; + + if (input_format != input_format_t::bjdata) + { + message = "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token; + } + else + { + message = "expected length type specification (U, i, u, I, m, l, M, L) after '#'; last byte: 0x" + last_token; + } + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, message, "size"), nullptr)); + } + + /*! + @brief determine the type and size for a container + + In the optimized UBJSON format, a type and a size can be provided to allow + for a more compact representation. + + @param[out] result pair of the size and the type + @param[in] inside_ndarray whether the parser is parsing an ND array dimensional vector + + @return whether pair creation completed + */ + bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false) + { + result.first = npos; // size + result.second = 0; // type + bool is_ndarray = false; + + get_ignore_noop(); + + if (current == '$') + { + result.second = get(); // must not ignore 'N', because 'N' maybe the type + if (input_format == input_format_t::bjdata + && JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second))) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format, concat("marker 0x", last_token, " is not a permitted optimized array type"), "type"), nullptr)); + } + + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type"))) + { + return false; + } + + get_ignore_noop(); + if (JSON_HEDLEY_UNLIKELY(current != '#')) + { + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "value"))) + { + return false; + } + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); + } + + const bool is_error = get_ubjson_size_value(result.first, is_ndarray); + if (input_format == input_format_t::bjdata && is_ndarray) + { + if (inside_ndarray) + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read, + exception_message(input_format, "ndarray can not be recursive", "size"), nullptr)); + } + result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters + } + return is_error; + } + + if (current == '#') + { + const bool is_error = get_ubjson_size_value(result.first, is_ndarray); + if (input_format == input_format_t::bjdata && is_ndarray) + { + return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read, + exception_message(input_format, "ndarray requires both type and size", "size"), nullptr)); + } + return is_error; + } + + return true; + } + + /*! + @param prefix the previously read or set type prefix + @return whether value creation completed + */ + bool get_ubjson_value(const char_int_type prefix) + { + switch (prefix) + { + case char_traits<char_type>::eof(): // EOF + return unexpect_eof(input_format, "value"); + + case 'T': // true + return sax->boolean(true); + case 'F': // false + return sax->boolean(false); + + case 'Z': // null + return sax->null(); + + case 'B': // byte + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint8_t number{}; + return get_number(input_format, number) && sax->number_unsigned(number); + } + + case 'U': + { + std::uint8_t number{}; + return get_number(input_format, number) && sax->number_unsigned(number); + } + + case 'i': + { + std::int8_t number{}; + return get_number(input_format, number) && sax->number_integer(number); + } + + case 'I': + { + std::int16_t number{}; + return get_number(input_format, number) && sax->number_integer(number); + } + + case 'l': + { + std::int32_t number{}; + return get_number(input_format, number) && sax->number_integer(number); + } + + case 'L': + { + std::int64_t number{}; + return get_number(input_format, number) && sax->number_integer(number); + } + + case 'u': + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint16_t number{}; + return get_number(input_format, number) && sax->number_unsigned(number); + } + + case 'm': + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint32_t number{}; + return get_number(input_format, number) && sax->number_unsigned(number); + } + + case 'M': + { + if (input_format != input_format_t::bjdata) + { + break; + } + std::uint64_t number{}; + return get_number(input_format, number) && sax->number_unsigned(number); + } + + case 'h': + { + if (input_format != input_format_t::bjdata) + { + break; + } + const auto byte1_raw = get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number"))) + { + return false; + } + const auto byte2_raw = get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number"))) + { + return false; + } + + const auto byte1 = static_cast<unsigned char>(byte1_raw); + const auto byte2 = static_cast<unsigned char>(byte2_raw); + + // Code from RFC 7049, Appendix D, Figure 3: + // As half-precision floating-point numbers were only added + // to IEEE 754 in 2008, today's programming platforms often + // still only have limited support for them. It is very + // easy to include at least decoding support for them even + // without such support. An example of a small decoder for + // half-precision floating-point numbers in the C language + // is shown in Fig. 3. + const auto half = static_cast<unsigned int>((byte2 << 8u) + byte1); + const double val = [&half] + { + const int exp = (half >> 10u) & 0x1Fu; + const unsigned int mant = half & 0x3FFu; + JSON_ASSERT(0 <= exp&& exp <= 32); + JSON_ASSERT(mant <= 1024); + switch (exp) + { + case 0: + return std::ldexp(mant, -24); + case 31: + return (mant == 0) + ? std::numeric_limits<double>::infinity() + : std::numeric_limits<double>::quiet_NaN(); + default: + return std::ldexp(mant + 1024, exp - 25); + } + }(); + return sax->number_float((half & 0x8000u) != 0 + ? static_cast<number_float_t>(-val) + : static_cast<number_float_t>(val), ""); + } + + case 'd': + { + float number{}; + return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 'D': + { + double number{}; + return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 'H': + { + return get_ubjson_high_precision_number(); + } + + case 'C': // char + { + get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "char"))) + { + return false; + } + if (JSON_HEDLEY_UNLIKELY(current > 127)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, + exception_message(input_format, concat("byte after 'C' must be in range 0x00..0x7F; last byte: 0x", last_token), "char"), nullptr)); + } + string_t s(1, static_cast<typename string_t::value_type>(current)); + return sax->string(s); + } + + case 'S': // string + { + string_t s; + return get_ubjson_string(s) && sax->string(s); + } + + case '[': // array + return get_ubjson_array(); + + case '{': // object + return get_ubjson_object(); + + default: // anything else + break; + } + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format, "invalid byte: 0x" + last_token, "value"), nullptr)); + } + + /*! + @return whether array creation completed + */ + bool get_ubjson_array() + { + std::pair<std::size_t, char_int_type> size_and_type; + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) + { + return false; + } + + // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): + // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} + + if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0) + { + size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker + auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t) + { + return p.first < t; + }); + string_t key = "_ArrayType_"; + if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); + } + + string_t type = it->second; // sax->string() takes a reference + if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type))) + { + return false; + } + + if (size_and_type.second == 'C' || size_and_type.second == 'B') + { + size_and_type.second = 'U'; + } + + key = "_ArrayData_"; + if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) )) + { + return false; + } + + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) + { + return false; + } + } + + return (sax->end_array() && sax->end_object()); + } + + // If BJData type marker is 'B' decode as binary + if (input_format == input_format_t::bjdata && size_and_type.first != npos && size_and_type.second == 'B') + { + binary_t result; + return get_binary(input_format, size_and_type.first, result) && sax->binary(result); + } + + if (size_and_type.first != npos) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) + { + return false; + } + + if (size_and_type.second != 0) + { + if (size_and_type.second != 'N') + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) + { + return false; + } + } + } + } + else + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal())) + { + return false; + } + } + } + } + else + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(detail::unknown_size()))) + { + return false; + } + + while (current != ']') + { + if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false))) + { + return false; + } + get_ignore_noop(); + } + } + + return sax->end_array(); + } + + /*! + @return whether object creation completed + */ + bool get_ubjson_object() + { + std::pair<std::size_t, char_int_type> size_and_type; + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) + { + return false; + } + + // do not accept ND-array size in objects in BJData + if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, + exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr)); + } + + string_t key; + if (size_and_type.first != npos) + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first))) + { + return false; + } + + if (size_and_type.second != 0) + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) + { + return false; + } + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) + { + return false; + } + key.clear(); + } + } + else + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) + { + return false; + } + if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal())) + { + return false; + } + key.clear(); + } + } + } + else + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(detail::unknown_size()))) + { + return false; + } + + while (current != '}') + { + if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key))) + { + return false; + } + if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal())) + { + return false; + } + get_ignore_noop(); + key.clear(); + } + } + + return sax->end_object(); + } + + // Note, no reader for UBJSON binary types is implemented because they do + // not exist + + bool get_ubjson_high_precision_number() + { + // get the size of the following number string + std::size_t size{}; + bool no_ndarray = true; + auto res = get_ubjson_size_value(size, no_ndarray); + if (JSON_HEDLEY_UNLIKELY(!res)) + { + return res; + } + + // get number string + std::vector<char> number_vector; + for (std::size_t i = 0; i < size; ++i) + { + get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number"))) + { + return false; + } + number_vector.push_back(static_cast<char>(current)); + } + + // parse number string + using ia_type = decltype(detail::input_adapter(number_vector)); + auto number_lexer = detail::lexer<BasicJsonType, ia_type>(detail::input_adapter(number_vector), false); + const auto result_number = number_lexer.scan(); + const auto number_string = number_lexer.get_token_string(); + const auto result_remainder = number_lexer.scan(); + + using token_type = typename detail::lexer_base<BasicJsonType>::token_type; + + if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input)) + { + return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, + exception_message(input_format, concat("invalid number text: ", number_lexer.get_token_string()), "high-precision number"), nullptr)); + } + + switch (result_number) + { + case token_type::value_integer: + return sax->number_integer(number_lexer.get_number_integer()); + case token_type::value_unsigned: + return sax->number_unsigned(number_lexer.get_number_unsigned()); + case token_type::value_float: + return sax->number_float(number_lexer.get_number_float(), std::move(number_string)); + case token_type::uninitialized: + case token_type::literal_true: + case token_type::literal_false: + case token_type::literal_null: + case token_type::value_string: + case token_type::begin_array: + case token_type::begin_object: + case token_type::end_array: + case token_type::end_object: + case token_type::name_separator: + case token_type::value_separator: + case token_type::parse_error: + case token_type::end_of_input: + case token_type::literal_or_value: + default: + return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, + exception_message(input_format, concat("invalid number text: ", number_lexer.get_token_string()), "high-precision number"), nullptr)); + } + } + + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `char_traits<char_type>::eof()` in that case. + + @return character read from the input + */ + char_int_type get() + { + ++chars_read; + return current = ia.get_character(); + } + + /*! + @brief get_to read into a primitive type + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns false instead + + @return bool, whether the read was successful + */ + template<class T> + bool get_to(T& dest, const input_format_t format, const char* context) + { + auto new_chars_read = ia.get_elements(&dest); + chars_read += new_chars_read; + if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T))) + { + // in case of failure, advance position by 1 to report the failing location + ++chars_read; + sax->parse_error(chars_read, "<end of file>", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr)); + return false; + } + return true; + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + char_int_type get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + template<class NumberType> + static void byte_swap(NumberType& number) + { + constexpr std::size_t sz = sizeof(number); +#ifdef __cpp_lib_byteswap + if constexpr (sz == 1) + { + return; + } + else if constexpr(std::is_integral_v<NumberType>) + { + number = std::byteswap(number); + return; + } + else + { +#endif + auto* ptr = reinterpret_cast<std::uint8_t*>(&number); + for (std::size_t i = 0; i < sz / 2; ++i) + { + std::swap(ptr[i], ptr[sz - i - 1]); + } +#ifdef __cpp_lib_byteswap + } +#endif + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianness, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + On the other hand, BSON and BJData use little endian and should reorder + on big endian systems. + */ + template<typename NumberType, bool InputIsLittleEndian = false> + bool get_number(const input_format_t format, NumberType& result) + { + // read in the original format + + if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number"))) + { + return false; + } + if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata)) + { + byte_swap(result); + } + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template<typename NumberType> + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + for (NumberType i = 0; i < len; i++) + { + get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string"))) + { + success = false; + break; + } + result.push_back(static_cast<typename string_t::value_type>(current)); + } + return success; + } + + /*! + @brief create a byte array by reading bytes from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of bytes to read + @param[out] result byte array created by reading @a len bytes + + @return whether byte array creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of memory. + */ + template<typename NumberType> + bool get_binary(const input_format_t format, + const NumberType len, + binary_t& result) + { + bool success = true; + for (NumberType i = 0; i < len; i++) + { + get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary"))) + { + success = false; + break; + } + result.push_back(static_cast<typename binary_t::value_type>(current)); + } + return success; + } + + /*! + @param[in] format the current format (for diagnostics) + @param[in] context further context information (for diagnostics) + @return whether the last read character is not EOF + */ + JSON_HEDLEY_NON_NULL(3) + bool unexpect_eof(const input_format_t format, const char* context) const + { + if (JSON_HEDLEY_UNLIKELY(current == char_traits<char_type>::eof())) + { + return sax->parse_error(chars_read, "<end of file>", + parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr)); + } + return true; + } + + /*! + @return a string representation of the last read byte + */ + std::string get_token_string() const + { + std::array<char, 3> cr{{}}; + static_cast<void>((std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + return std::string{cr.data()}; + } + + /*! + @param[in] format the current format + @param[in] detail a detailed error message + @param[in] context further context information + @return a message string to use in the parse_error exceptions + */ + std::string exception_message(const input_format_t format, + const std::string& detail, + const std::string& context) const + { + std::string error_msg = "syntax error while parsing "; + + switch (format) + { + case input_format_t::cbor: + error_msg += "CBOR"; + break; + + case input_format_t::msgpack: + error_msg += "MessagePack"; + break; + + case input_format_t::ubjson: + error_msg += "UBJSON"; + break; + + case input_format_t::bson: + error_msg += "BSON"; + break; + + case input_format_t::bjdata: + error_msg += "BJData"; + break; + + case input_format_t::json: // LCOV_EXCL_LINE + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + } + + return concat(error_msg, ' ', context, ": ", detail); + } + + private: + static JSON_INLINE_VARIABLE constexpr std::size_t npos = detail::unknown_size(); + + /// input adapter + InputAdapterType ia; + + /// the current character + char_int_type current = char_traits<char_type>::eof(); + + /// the number of characters read + std::size_t chars_read = 0; + + /// whether we can assume little endianness + const bool is_little_endian = little_endianness(); + + /// input format + const input_format_t input_format = input_format_t::json; + + /// the SAX parser + json_sax_t* sax = nullptr; + + // excluded markers in bjdata optimized type +#define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \ + make_array<char_int_type>('F', 'H', 'N', 'S', 'T', 'Z', '[', '{') + +#define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \ + make_array<bjd_type>( \ + bjd_type{'B', "byte"}, \ + bjd_type{'C', "char"}, \ + bjd_type{'D', "double"}, \ + bjd_type{'I', "int16"}, \ + bjd_type{'L', "int64"}, \ + bjd_type{'M', "uint64"}, \ + bjd_type{'U', "uint8"}, \ + bjd_type{'d', "single"}, \ + bjd_type{'i', "int8"}, \ + bjd_type{'l', "int32"}, \ + bjd_type{'m', "uint32"}, \ + bjd_type{'u', "uint16"}) + + JSON_PRIVATE_UNLESS_TESTED: + // lookup tables + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers = + JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_; + + using bjd_type = std::pair<char_int_type, string_t>; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map = + JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_; + +#undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ +#undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ +}; + +#ifndef JSON_HAS_CPP_17 + template<typename BasicJsonType, typename InputAdapterType, typename SAX> + constexpr std::size_t binary_reader<BasicJsonType, InputAdapterType, SAX>::npos; +#endif + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END diff --git a/json4cpp/include/nlohmann/detail/input/input_adapters.hpp b/json4cpp/include/nlohmann/detail/input/input_adapters.hpp new file mode 100644 index 0000000000..be58b1110f --- /dev/null +++ b/json4cpp/include/nlohmann/detail/input/input_adapters.hpp @@ -0,0 +1,550 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.12.0 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2026 Niels Lohmann <https://nlohmann.me> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <array> // array +#include <cstddef> // size_t +#include <cstring> // strlen +#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next +#include <memory> // shared_ptr, make_shared, addressof +#include <numeric> // accumulate +#include <streambuf> // streambuf +#include <string> // string, char_traits +#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer +#include <utility> // pair, declval + +#ifndef JSON_NO_IO + #include <cstdio> // FILE * + #include <istream> // istream +#endif // JSON_NO_IO + +#include <nlohmann/detail/exceptions.hpp> +#include <nlohmann/detail/iterators/iterator_traits.hpp> +#include <nlohmann/detail/macro_scope.hpp> +#include <nlohmann/detail/meta/type_traits.hpp> + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/// the supported input formats +enum class input_format_t { json, cbor, msgpack, ubjson, bson, bjdata }; + +//////////////////// +// input adapters // +//////////////////// + +#ifndef JSON_NO_IO +/*! +Input adapter for stdio file access. This adapter read only 1 byte and do not use any + buffer. This adapter is a very low level adapter. +*/ +class file_input_adapter +{ + public: + using char_type = char; + + JSON_HEDLEY_NON_NULL(2) + explicit file_input_adapter(std::FILE* f) noexcept + : m_file(f) + { + JSON_ASSERT(m_file != nullptr); + } + + // make class move-only + file_input_adapter(const file_input_adapter&) = delete; + file_input_adapter(file_input_adapter&&) noexcept = default; + file_input_adapter& operator=(const file_input_adapter&) = delete; + file_input_adapter& operator=(file_input_adapter&&) = delete; + ~file_input_adapter() = default; + + std::char_traits<char>::int_type get_character() noexcept + { + return std::fgetc(m_file); + } + + // returns the number of characters successfully read + template<class T> + std::size_t get_elements(T* dest, std::size_t count = 1) + { + return fread(dest, 1, sizeof(T) * count, m_file); + } + + private: + /// the file pointer to read from + std::FILE* m_file; +}; + +/*! +Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at +beginning of input. Does not support changing the underlying std::streambuf +in mid-input. Maintains underlying std::istream and std::streambuf to support +subsequent use of standard std::istream operations to process any input +characters following those used in parsing the JSON input. Clears the +std::istream flags; any input errors (e.g., EOF) will be detected by the first +subsequent call for input from the std::istream. +*/ +class input_stream_adapter +{ + public: + using char_type = char; + + ~input_stream_adapter() + { + // clear stream flags; we use underlying streambuf I/O, do not + // maintain ifstream flags, except eof + if (is != nullptr) + { + is->clear(is->rdstate() & std::ios::eofbit); + } + } + + explicit input_stream_adapter(std::istream& i) + : is(&i), sb(i.rdbuf()) + {} + + // deleted because of pointer members + input_stream_adapter(const input_stream_adapter&) = delete; + input_stream_adapter& operator=(input_stream_adapter&) = delete; + input_stream_adapter& operator=(input_stream_adapter&&) = delete; + + input_stream_adapter(input_stream_adapter&& rhs) noexcept + : is(rhs.is), sb(rhs.sb) + { + rhs.is = nullptr; + rhs.sb = nullptr; + } + + // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to + // ensure that std::char_traits<char>::eof() and the character 0xFF do not + // end up as the same value, e.g., 0xFFFFFFFF. + std::char_traits<char>::int_type get_character() + { + auto res = sb->sbumpc(); + // set eof manually, as we don't use the istream interface. + if (JSON_HEDLEY_UNLIKELY(res == std::char_traits<char>::eof())) + { + is->clear(is->rdstate() | std::ios::eofbit); + } + return res; + } + + template<class T> + std::size_t get_elements(T* dest, std::size_t count = 1) + { + auto res = static_cast<std::size_t>(sb->sgetn(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(count * sizeof(T)))); + if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T))) + { + is->clear(is->rdstate() | std::ios::eofbit); + } + return res; + } + + private: + /// the associated input stream + std::istream* is = nullptr; + std::streambuf* sb = nullptr; +}; +#endif // JSON_NO_IO + +// General-purpose iterator-based adapter. It might not be as fast as +// theoretically possible for some containers, but it is extremely versatile. +template<typename IteratorType> +class iterator_input_adapter +{ + public: + using char_type = typename std::iterator_traits<IteratorType>::value_type; + + iterator_input_adapter(IteratorType first, IteratorType last) + : current(std::move(first)), end(std::move(last)) + {} + + typename char_traits<char_type>::int_type get_character() + { + if (JSON_HEDLEY_LIKELY(current != end)) + { + auto result = char_traits<char_type>::to_int_type(*current); + std::advance(current, 1); + return result; + } + + return char_traits<char_type>::eof(); + } + + // for general iterators, we cannot really do something better than falling back to processing the range one-by-one + template<class T> + std::size_t get_elements(T* dest, std::size_t count = 1) + { + auto* ptr = reinterpret_cast<char*>(dest); + for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index) + { + if (JSON_HEDLEY_LIKELY(current != end)) + { + ptr[read_index] = static_cast<char>(*current); + std::advance(current, 1); + } + else + { + return read_index; + } + } + return count * sizeof(T); + } + + private: + IteratorType current; + IteratorType end; + + template<typename BaseInputAdapter, size_t T> + friend struct wide_string_input_helper; + + bool empty() const + { + return current == end; + } +}; + +template<typename BaseInputAdapter, size_t T> +struct wide_string_input_helper; + +template<typename BaseInputAdapter> +struct wide_string_input_helper<BaseInputAdapter, 4> +{ + // UTF-32 + static void fill_buffer(BaseInputAdapter& input, + std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, + size_t& utf8_bytes_index, + size_t& utf8_bytes_filled) + { + utf8_bytes_index = 0; + + if (JSON_HEDLEY_UNLIKELY(input.empty())) + { + utf8_bytes[0] = std::char_traits<char>::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const auto wc = input.get_character(); + + // UTF-32 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu)); + utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu)); + utf8_bytes_filled = 2; + } + else if (wc <= 0xFFFF) + { + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu)); + utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu)); + utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu)); + utf8_bytes_filled = 3; + } + else if (wc <= 0x10FFFF) + { + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u)); + utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu)); + utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu)); + utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu)); + utf8_bytes_filled = 4; + } + else + { + // unknown character + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); + utf8_bytes_filled = 1; + } + } + } +}; + +template<typename BaseInputAdapter> +struct wide_string_input_helper<BaseInputAdapter, 2> +{ + // UTF-16 + static void fill_buffer(BaseInputAdapter& input, + std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, + size_t& utf8_bytes_index, + size_t& utf8_bytes_filled) + { + utf8_bytes_index = 0; + + if (JSON_HEDLEY_UNLIKELY(input.empty())) + { + utf8_bytes[0] = std::char_traits<char>::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const auto wc = input.get_character(); + + // UTF-16 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u))); + utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu)); + utf8_bytes_filled = 2; + } + else if (0xD800 > wc || wc >= 0xE000) + { + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u))); + utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu)); + utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu)); + utf8_bytes_filled = 3; + } + else + { + if (JSON_HEDLEY_UNLIKELY(!input.empty())) + { + const auto wc2 = static_cast<unsigned int>(input.get_character()); + const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu)); + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u)); + utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu)); + utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu)); + utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu)); + utf8_bytes_filled = 4; + } + else + { + utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc); + utf8_bytes_filled = 1; + } + } + } + } +}; + +// Wraps another input adapter to convert wide character types into individual bytes. +template<typename BaseInputAdapter, typename WideCharType> +class wide_string_input_adapter +{ + public: + using char_type = char; + + wide_string_input_adapter(BaseInputAdapter base) + : base_adapter(base) {} + + typename std::char_traits<char>::int_type get_character() noexcept + { + // check if the buffer needs to be filled + if (utf8_bytes_index == utf8_bytes_filled) + { + fill_buffer<sizeof(WideCharType)>(); + + JSON_ASSERT(utf8_bytes_filled > 0); + JSON_ASSERT(utf8_bytes_index == 0); + } + + // use buffer + JSON_ASSERT(utf8_bytes_filled > 0); + JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled); + return utf8_bytes[utf8_bytes_index++]; + } + + // parsing binary with wchar doesn't make sense, but since the parsing mode can be runtime, we need something here + template<class T> + std::size_t get_elements(T* /*dest*/, std::size_t /*count*/ = 1) + { + JSON_THROW(parse_error::create(112, 1, "wide string type cannot be interpreted as binary data", nullptr)); + } + + private: + BaseInputAdapter base_adapter; + + template<size_t T> + void fill_buffer() + { + wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); + } + + /// a buffer for UTF-8 bytes + std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; + + /// index to the utf8_codes array for the next valid byte + std::size_t utf8_bytes_index = 0; + /// number of valid bytes in the utf8_codes array + std::size_t utf8_bytes_filled = 0; +}; + +template<typename IteratorType, typename Enable = void> +struct iterator_input_adapter_factory +{ + using iterator_type = IteratorType; + using char_type = typename std::iterator_traits<iterator_type>::value_type; + using adapter_type = iterator_input_adapter<iterator_type>; + + static adapter_type create(IteratorType first, IteratorType last) + { + return adapter_type(std::move(first), std::move(last)); + } +}; + +template<typename T> +struct is_iterator_of_multibyte +{ + using value_type = typename std::iterator_traits<T>::value_type; + enum // NOLINT(cppcoreguidelines-use-enum-class) + { + value = sizeof(value_type) > 1 + }; +}; + +template<typename IteratorType> +struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>> +{ + using iterator_type = IteratorType; + using char_type = typename std::iterator_traits<iterator_type>::value_type; + using base_adapter_type = iterator_input_adapter<iterator_type>; + using adapter_type = wide_string_input_adapter<base_adapter_type, char_type>; + + static adapter_type create(IteratorType first, IteratorType last) + { + return adapter_type(base_adapter_type(std::move(first), std::move(last))); + } +}; + +// General purpose iterator-based input +template<typename IteratorType> +typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last) +{ + using factory_type = iterator_input_adapter_factory<IteratorType>; + return factory_type::create(first, last); +} + +// Convenience shorthand from container to iterator +// Enables ADL on begin(container) and end(container) +// Encloses the using declarations in namespace for not to leak them to outside scope + +namespace container_input_adapter_factory_impl +{ + +using std::begin; +using std::end; + +template<typename ContainerType, typename Enable = void> +struct container_input_adapter_factory {}; + +template<typename ContainerType> +struct container_input_adapter_factory< ContainerType, + void_t<decltype(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>()))>> + { + using adapter_type = decltype(input_adapter(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>()))); + + static adapter_type create(const ContainerType& container) +{ + return input_adapter(begin(container), end(container)); +} + }; + +} // namespace container_input_adapter_factory_impl + +template<typename ContainerType> +typename container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::adapter_type input_adapter(const ContainerType& container) +{ + return container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::create(container); +} + +// specialization for std::string +using string_input_adapter_type = decltype(input_adapter(std::declval<std::string>())); + +#ifndef JSON_NO_IO +// Special cases with fast paths +inline file_input_adapter input_adapter(std::FILE* file) +{ + if (file == nullptr) + { + JSON_THROW(parse_error::create(101, 0, "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); + } + return file_input_adapter(file); +} + +inline input_stream_adapter input_adapter(std::istream& stream) +{ + return input_stream_adapter(stream); +} + +inline input_stream_adapter input_adapter(std::istream&& stream) +{ + return input_stream_adapter(stream); +} +#endif // JSON_NO_IO + +using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>())); + +// Null-delimited strings, and the like. +template < typename CharT, + typename std::enable_if < + std::is_pointer<CharT>::value&& + !std::is_array<CharT>::value&& + std::is_integral<typename std::remove_pointer<CharT>::type>::value&& + sizeof(typename std::remove_pointer<CharT>::type) == 1, + int >::type = 0 > +contiguous_bytes_input_adapter input_adapter(CharT b) +{ + if (b == nullptr) + { + JSON_THROW(parse_error::create(101, 0, "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); + } + auto length = std::strlen(reinterpret_cast<const char*>(b)); + const auto* ptr = reinterpret_cast<const char*>(b); + return input_adapter(ptr, ptr + length); // cppcheck-suppress[nullPointerArithmeticRedundantCheck] +} + +template<typename T, std::size_t N> +auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N)) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) +{ + return input_adapter(array, array + N); +} + +// This class only handles inputs of input_buffer_adapter type. +// It's required so that expressions like {ptr, len} can be implicitly cast +// to the correct adapter. +class span_input_adapter +{ + public: + template < typename CharT, + typename std::enable_if < + std::is_pointer<CharT>::value&& + std::is_integral<typename std::remove_pointer<CharT>::type>::value&& + sizeof(typename std::remove_pointer<CharT>::type) == 1, + int >::type = 0 > + span_input_adapter(CharT b, std::size_t l) + : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {} + + template<class IteratorType, + typename std::enable_if< + std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value, + int>::type = 0> + span_input_adapter(IteratorType first, IteratorType last) + : ia(input_adapter(first, last)) {} + + contiguous_bytes_input_adapter&& get() + { + return std::move(ia); // NOLINT(hicpp-move-const-arg,performance-move-const-arg) + } + + private: + contiguous_bytes_input_adapter ia; +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END diff --git a/json4cpp/include/nlohmann/detail/input/json_sax.hpp b/json4cpp/include/nlohmann/detail/input/json_sax.hpp new file mode 100644 index 0000000000..ea41322326 --- /dev/null +++ b/json4cpp/include/nlohmann/detail/input/json_sax.hpp @@ -0,0 +1,986 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.12.0 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2026 Niels Lohmann <https://nlohmann.me> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <cstddef> +#include <string> // string +#include <type_traits> // enable_if_t +#include <utility> // move +#include <vector> // vector + +#include <nlohmann/detail/exceptions.hpp> +#include <nlohmann/detail/input/lexer.hpp> +#include <nlohmann/detail/macro_scope.hpp> +#include <nlohmann/detail/string_concat.hpp> +NLOHMANN_JSON_NAMESPACE_BEGIN + +/*! +@brief SAX interface + +This class describes the SAX interface used by @ref nlohmann::json::sax_parse. +Each function is called in different situations while the input is parsed. The +boolean return value informs the parser whether to continue processing the +input. +*/ +template<typename BasicJsonType> +struct json_sax +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + + /*! + @brief a null value was read + @return whether parsing should proceed + */ + virtual bool null() = 0; + + /*! + @brief a boolean value was read + @param[in] val boolean value + @return whether parsing should proceed + */ + virtual bool boolean(bool val) = 0; + + /*! + @brief an integer number was read + @param[in] val integer value + @return whether parsing should proceed + */ + virtual bool number_integer(number_integer_t val) = 0; + + /*! + @brief an unsigned integer number was read + @param[in] val unsigned integer value + @return whether parsing should proceed + */ + virtual bool number_unsigned(number_unsigned_t val) = 0; + + /*! + @brief a floating-point number was read + @param[in] val floating-point value + @param[in] s raw token value + @return whether parsing should proceed + */ + virtual bool number_float(number_float_t val, const string_t& s) = 0; + + /*! + @brief a string value was read + @param[in] val string value + @return whether parsing should proceed + @note It is safe to move the passed string value. + */ + virtual bool string(string_t& val) = 0; + + /*! + @brief a binary value was read + @param[in] val binary value + @return whether parsing should proceed + @note It is safe to move the passed binary value. + */ + virtual bool binary(binary_t& val) = 0; + + /*! + @brief the beginning of an object was read + @param[in] elements number of object elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_object(std::size_t elements) = 0; + + /*! + @brief an object key was read + @param[in] val object key + @return whether parsing should proceed + @note It is safe to move the passed string. + */ + virtual bool key(string_t& val) = 0; + + /*! + @brief the end of an object was read + @return whether parsing should proceed + */ + virtual bool end_object() = 0; + + /*! + @brief the beginning of an array was read + @param[in] elements number of array elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_array(std::size_t elements) = 0; + + /*! + @brief the end of an array was read + @return whether parsing should proceed + */ + virtual bool end_array() = 0; + + /*! + @brief a parse error occurred + @param[in] position the position in the input where the error occurs + @param[in] last_token the last read token + @param[in] ex an exception object describing the error + @return whether parsing should proceed (must return false) + */ + virtual bool parse_error(std::size_t position, + const std::string& last_token, + const detail::exception& ex) = 0; + + json_sax() = default; + json_sax(const json_sax&) = default; + json_sax(json_sax&&) noexcept = default; + json_sax& operator=(const json_sax&) = default; + json_sax& operator=(json_sax&&) noexcept = default; + virtual ~json_sax() = default; +}; + +namespace detail +{ +constexpr std::size_t unknown_size() +{ + return (std::numeric_limits<std::size_t>::max)(); +} + +/*! +@brief SAX implementation to create a JSON value from SAX events + +This class implements the @ref json_sax interface and processes the SAX events +to create a JSON value which makes it basically a DOM parser. The structure or +hierarchy of the JSON value is managed by the stack `ref_stack` which contains +a pointer to the respective array or object for each recursion depth. + +After successful parsing, the value that is passed by reference to the +constructor contains the parsed value. + +@tparam BasicJsonType the JSON type +*/ +template<typename BasicJsonType, typename InputAdapterType> +class json_sax_dom_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using lexer_t = lexer<BasicJsonType, InputAdapterType>; + + /*! + @param[in,out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr) + : root(r), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) + {} + + // make class move-only + json_sax_dom_parser(const json_sax_dom_parser&) = delete; + json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; + json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_parser() = default; + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } + + bool start_object(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } + + return true; + } + + bool key(string_t& val) + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); + + // add null at the given key and store the reference for later + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val)); + return true; + } + + bool end_object() + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } + + return true; + } + + bool end_array() + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_array()); + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } + + template<class Exception> + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast<void>(ex); + if (allow_exceptions) + { + JSON_THROW(ex); + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + // As we handle the start and end positions for values created during parsing, + // we do not expect the following value type to be called. Regardless, set the positions + // in case this is created manually or through a different constructor. Exclude from lcov + // since the exact condition of this switch is esoteric. + // LCOV_EXCL_START + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + // LCOV_EXCL_STOP + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template<typename Value> + JSON_HEDLEY_RETURNS_NON_NULL + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + root = BasicJsonType(std::forward<Value>(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(root); +#endif + + return &root; + } + + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->emplace_back(std::forward<Value>(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(ref_stack.back()->m_data.m_value.array->back()); +#endif + + return &(ref_stack.back()->m_data.m_value.array->back()); + } + + JSON_ASSERT(ref_stack.back()->is_object()); + JSON_ASSERT(object_element); + *object_element = BasicJsonType(std::forward<Value>(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(*object_element); +#endif + + return object_element; + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector<BasicJsonType*> ref_stack {}; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; +}; + +template<typename BasicJsonType, typename InputAdapterType> +class json_sax_dom_callback_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using parser_callback_t = typename BasicJsonType::parser_callback_t; + using parse_event_t = typename BasicJsonType::parse_event_t; + using lexer_t = lexer<BasicJsonType, InputAdapterType>; + + json_sax_dom_callback_parser(BasicJsonType& r, + parser_callback_t cb, + const bool allow_exceptions_ = true, + lexer_t* lexer_ = nullptr) + : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) + { + keep_stack.push_back(true); + } + + // make class move-only + json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_callback_parser() = default; + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } + + bool start_object(std::size_t len) + { + // check callback for object start + const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::object_start, discarded); + keep_stack.push_back(keep); + + auto val = handle_value(BasicJsonType::value_t::object, true); + ref_stack.push_back(val.second); + + if (ref_stack.back()) + { + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the object here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the object, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check object limit + if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } + } + return true; + } + + bool key(string_t& val) + { + BasicJsonType k = BasicJsonType(val); + + // check callback for the key + const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::key, k); + key_keep_stack.push_back(keep); + + // add discarded value at the given key and store the reference for later + if (keep && ref_stack.back()) + { + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val) = discarded); + } + + return true; + } + + bool end_object() + { + if (ref_stack.back()) + { + if (!callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) + { + // discard object + *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded object. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif + } + else + { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing brace, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + + ref_stack.back()->set_parents(); + } + } + + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured()) + { + // remove discarded value + for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) + { + if (it->is_discarded()) + { + ref_stack.back()->erase(it); + break; + } + } + } + + return true; + } + + bool start_array(std::size_t len) + { + const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::array_start, discarded); + keep_stack.push_back(keep); + + auto val = handle_value(BasicJsonType::value_t::array, true); + ref_stack.push_back(val.second); + + if (ref_stack.back()) + { + +#if JSON_DIAGNOSTIC_POSITIONS + // Manually set the start position of the array here. + // Ensure this is after the call to handle_value to ensure correct start position. + if (m_lexer_ref) + { + // Lexer has read the first character of the array, so + // subtract 1 from the position to get the correct start position. + ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; + } +#endif + + // check array limit + if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } + } + + return true; + } + + bool end_array() + { + bool keep = true; + + if (ref_stack.back()) + { + keep = callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); + if (keep) + { + +#if JSON_DIAGNOSTIC_POSITIONS + if (m_lexer_ref) + { + // Lexer's position is past the closing bracket, so set that as the end position. + ref_stack.back()->end_position = m_lexer_ref->get_position(); + } +#endif + + ref_stack.back()->set_parents(); + } + else + { + // discard array + *ref_stack.back() = discarded; + +#if JSON_DIAGNOSTIC_POSITIONS + // Set start/end positions for discarded array. + handle_diagnostic_positions_for_json_value(*ref_stack.back()); +#endif + } + } + + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + // remove discarded value + if (!keep && !ref_stack.empty() && ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->pop_back(); + } + + return true; + } + + template<class Exception> + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast<void>(ex); + if (allow_exceptions) + { + JSON_THROW(ex); + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + +#if JSON_DIAGNOSTIC_POSITIONS + void handle_diagnostic_positions_for_json_value(BasicJsonType& v) + { + if (m_lexer_ref) + { + // Lexer has read past the current field value, so set the end position to the current position. + // The start position will be set below based on the length of the string representation + // of the value. + v.end_position = m_lexer_ref->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + // 4 and 5 are the string length of "true" and "false" + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + // 4 is the string length of "null" + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + // include the length of the quotes, which is 2 + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + case value_t::discarded: + { + v.end_position = std::string::npos; + v.start_position = v.end_position; + break; + } + + case value_t::binary: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::number_float: + { + v.start_position = v.end_position - m_lexer_ref->get_string().size(); + break; + } + + case value_t::object: + case value_t::array: + { + // object and array are handled in start_object() and start_array() handlers + // skip setting the values here. + break; + } + default: // LCOV_EXCL_LINE + // Handle all possible types discretely, default handler should never be reached. + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE + } + } + } +#endif + + /*! + @param[in] v value to add to the JSON value we build during parsing + @param[in] skip_callback whether we should skip calling the callback + function; this is required after start_array() and + start_object() SAX events, because otherwise we would call the + callback function with an empty array or object, respectively. + + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + + @return pair of boolean (whether value should be kept) and pointer (to the + passed value in the ref_stack hierarchy; nullptr if not kept) + */ + template<typename Value> + std::pair<bool, BasicJsonType*> handle_value(Value&& v, const bool skip_callback = false) + { + JSON_ASSERT(!keep_stack.empty()); + + // do not handle this value if we know it would be added to a discarded + // container + if (!keep_stack.back()) + { + return {false, nullptr}; + } + + // create value + auto value = BasicJsonType(std::forward<Value>(v)); + +#if JSON_DIAGNOSTIC_POSITIONS + handle_diagnostic_positions_for_json_value(value); +#endif + + // check callback + const bool keep = skip_callback || callback(static_cast<int>(ref_stack.size()), parse_event_t::value, value); + + // do not handle this value if we just learnt it shall be discarded + if (!keep) + { + return {false, nullptr}; + } + + if (ref_stack.empty()) + { + root = std::move(value); + return {true, & root}; + } + + // skip this value if we already decided to skip the parent + // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) + if (!ref_stack.back()) + { + return {false, nullptr}; + } + + // we now only expect arrays and objects + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + // array + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->emplace_back(std::move(value)); + return {true, & (ref_stack.back()->m_data.m_value.array->back())}; + } + + // object + JSON_ASSERT(ref_stack.back()->is_object()); + // check if we should store an element for the current key + JSON_ASSERT(!key_keep_stack.empty()); + const bool store_element = key_keep_stack.back(); + key_keep_stack.pop_back(); + + if (!store_element) + { + return {false, nullptr}; + } + + JSON_ASSERT(object_element); + *object_element = std::move(value); + return {true, object_element}; + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector<BasicJsonType*> ref_stack {}; + /// stack to manage which values to keep + std::vector<bool> keep_stack {}; // NOLINT(readability-redundant-member-init) + /// stack to manage which object keys to keep + std::vector<bool> key_keep_stack {}; // NOLINT(readability-redundant-member-init) + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// callback function + const parser_callback_t callback = nullptr; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// a discarded value for the callback + BasicJsonType discarded = BasicJsonType::value_t::discarded; + /// the lexer reference to obtain the current position + lexer_t* m_lexer_ref = nullptr; +}; + +template<typename BasicJsonType> +class json_sax_acceptor +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + + bool null() + { + return true; + } + + bool boolean(bool /*unused*/) + { + return true; + } + + bool number_integer(number_integer_t /*unused*/) + { + return true; + } + + bool number_unsigned(number_unsigned_t /*unused*/) + { + return true; + } + + bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) + { + return true; + } + + bool string(string_t& /*unused*/) + { + return true; + } + + bool binary(binary_t& /*unused*/) + { + return true; + } + + bool start_object(std::size_t /*unused*/ = detail::unknown_size()) + { + return true; + } + + bool key(string_t& /*unused*/) + { + return true; + } + + bool end_object() + { + return true; + } + + bool start_array(std::size_t /*unused*/ = detail::unknown_size()) + { + return true; + } + + bool end_array() + { + return true; + } + + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) + { + return false; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END diff --git a/json4cpp/include/nlohmann/detail/input/lexer.hpp b/json4cpp/include/nlohmann/detail/input/lexer.hpp new file mode 100644 index 0000000000..4742d889b5 --- /dev/null +++ b/json4cpp/include/nlohmann/detail/input/lexer.hpp @@ -0,0 +1,1647 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.12.0 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2026 Niels Lohmann <https://nlohmann.me> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <array> // array +#include <clocale> // localeconv +#include <cstddef> // size_t +#include <cstdio> // snprintf +#include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull +#include <initializer_list> // initializer_list +#include <string> // char_traits, string +#include <utility> // move +#include <vector> // vector + +#include <nlohmann/detail/input/input_adapters.hpp> +#include <nlohmann/detail/input/position_t.hpp> +#include <nlohmann/detail/macro_scope.hpp> +#include <nlohmann/detail/meta/type_traits.hpp> + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/////////// +// lexer // +/////////// + +template<typename BasicJsonType> +class lexer_base +{ + public: + /// token types for the parser + enum class token_type + { + uninitialized, ///< indicating the scanner is uninitialized + literal_true, ///< the `true` literal + literal_false, ///< the `false` literal + literal_null, ///< the `null` literal + value_string, ///< a string -- use get_string() for actual value + value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value + value_integer, ///< a signed integer -- use get_number_integer() for actual value + value_float, ///< an floating point number -- use get_number_float() for actual value + begin_array, ///< the character for array begin `[` + begin_object, ///< the character for object begin `{` + end_array, ///< the character for array end `]` + end_object, ///< the character for object end `}` + name_separator, ///< the name separator `:` + value_separator, ///< the value separator `,` + parse_error, ///< indicating a parse error + end_of_input, ///< indicating the end of the input buffer + literal_or_value ///< a literal or the begin of a value (only for diagnostics) + }; + + /// return name of values of type token_type (only used for errors) + JSON_HEDLEY_RETURNS_NON_NULL + JSON_HEDLEY_CONST + static const char* token_type_name(const token_type t) noexcept + { + switch (t) + { + case token_type::uninitialized: + return "<uninitialized>"; + case token_type::literal_true: + return "true literal"; + case token_type::literal_false: + return "false literal"; + case token_type::literal_null: + return "null literal"; + case token_type::value_string: + return "string literal"; + case token_type::value_unsigned: + case token_type::value_integer: + case token_type::value_float: + return "number literal"; + case token_type::begin_array: + return "'['"; + case token_type::begin_object: + return "'{'"; + case token_type::end_array: + return "']'"; + case token_type::end_object: + return "'}'"; + case token_type::name_separator: + return "':'"; + case token_type::value_separator: + return "','"; + case token_type::parse_error: + return "<parse error>"; + case token_type::end_of_input: + return "end of input"; + case token_type::literal_or_value: + return "'[', '{', or a literal"; + // LCOV_EXCL_START + default: // catch non-enum values + return "unknown token"; + // LCOV_EXCL_STOP + } + } +}; +/*! +@brief lexical analysis + +This class organizes the lexical analysis during JSON deserialization. +*/ +template<typename BasicJsonType, typename InputAdapterType> +class lexer : public lexer_base<BasicJsonType> +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using char_type = typename InputAdapterType::char_type; + using char_int_type = typename char_traits<char_type>::int_type; + + public: + using token_type = typename lexer_base<BasicJsonType>::token_type; + + explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept + : ia(std::move(adapter)) + , ignore_comments(ignore_comments_) + , decimal_point_char(static_cast<char_int_type>(get_decimal_point())) + {} + + // deleted because of pointer members + lexer(const lexer&) = delete; + lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + lexer& operator=(lexer&) = delete; + lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~lexer() = default; + + private: + ///////////////////// + // locales + ///////////////////// + + /// return the locale-dependent decimal point + JSON_HEDLEY_PURE + static char get_decimal_point() noexcept + { + const auto* loc = localeconv(); + JSON_ASSERT(loc != nullptr); + return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); + } + + ///////////////////// + // scan functions + ///////////////////// + + /*! + @brief get codepoint from 4 hex characters following `\u` + + For input "\u c1 c2 c3 c4" the codepoint is: + (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 + = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) + + Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' + must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The + conversion is done by subtracting the offset (0x30, 0x37, and 0x57) + between the ASCII value of the character and the desired integer value. + + @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or + non-hex character) + */ + int get_codepoint() + { + // this function only makes sense after reading `\u` + JSON_ASSERT(current == 'u'); + int codepoint = 0; + + const auto factors = { 12u, 8u, 4u, 0u }; + for (const auto factor : factors) + { + get(); + + if (current >= '0' && current <= '9') + { + codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor); + } + else if (current >= 'A' && current <= 'F') + { + codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor); + } + else if (current >= 'a' && current <= 'f') + { + codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor); + } + else + { + return -1; + } + } + + JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF); + return codepoint; + } + + /*! + @brief check if the next byte(s) are inside a given range + + Adds the current byte and, for each passed range, reads a new byte and + checks if it is inside the range. If a violation was detected, set up an + error message and return false. Otherwise, return true. + + @param[in] ranges list of integers; interpreted as list of pairs of + inclusive lower and upper bound, respectively + + @pre The passed list @a ranges must have 2, 4, or 6 elements; that is, + 1, 2, or 3 pairs. This precondition is enforced by an assertion. + + @return true if and only if no range violation was detected + */ + bool next_byte_in_range(std::initializer_list<char_int_type> ranges) + { + JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6); + add(current); + + for (auto range = ranges.begin(); range != ranges.end(); ++range) + { + get(); + if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range))) // NOLINT(bugprone-inc-dec-in-conditions) + { + add(current); + } + else + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return false; + } + } + + return true; + } + + /*! + @brief scan a string literal + + This function scans a string according to Sect. 7 of RFC 8259. While + scanning, bytes are escaped and copied into buffer token_buffer. Then the + function returns successfully, token_buffer is *not* null-terminated (as it + may contain \0 bytes), and token_buffer.size() is the number of bytes in the + string. + + @return token_type::value_string if string could be successfully scanned, + token_type::parse_error otherwise + + @note In case of errors, variable error_message contains a textual + description. + */ + token_type scan_string() + { + // reset token_buffer (ignore opening quote) + reset(); + + // we entered the function by reading an open quote + JSON_ASSERT(current == '\"'); + + while (true) + { + // get the next character + switch (get()) + { + // end of file while parsing the string + case char_traits<char_type>::eof(): + { + error_message = "invalid string: missing closing quote"; + return token_type::parse_error; + } + + // closing quote + case '\"': + { + return token_type::value_string; + } + + // escapes + case '\\': + { + switch (get()) + { + // quotation mark + case '\"': + add('\"'); + break; + // reverse solidus + case '\\': + add('\\'); + break; + // solidus + case '/': + add('/'); + break; + // backspace + case 'b': + add('\b'); + break; + // form feed + case 'f': + add('\f'); + break; + // line feed + case 'n': + add('\n'); + break; + // carriage return + case 'r': + add('\r'); + break; + // tab + case 't': + add('\t'); + break; + + // unicode escapes + case 'u': + { + const int codepoint1 = get_codepoint(); + int codepoint = codepoint1; // start with codepoint1 + + if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } + + // check if code point is a high surrogate + if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF) + { + // expect next \uxxxx entry + if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u')) + { + const int codepoint2 = get_codepoint(); + + if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } + + // check if codepoint2 is a low surrogate + if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF)) + { + // overwrite codepoint + codepoint = static_cast<int>( + // high surrogate occupies the most significant 22 bits + (static_cast<unsigned int>(codepoint1) << 10u) + // low surrogate occupies the least significant 15 bits + + static_cast<unsigned int>(codepoint2) + // there is still the 0xD800, 0xDC00, and 0x10000 noise + // in the result, so we have to subtract with: + // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 + - 0x35FDC00u); + } + else + { + error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF)) + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; + return token_type::parse_error; + } + } + + // the result of the above calculation yields a proper codepoint + JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF); + + // translate codepoint into bytes + if (codepoint < 0x80) + { + // 1-byte characters: 0xxxxxxx (ASCII) + add(static_cast<char_int_type>(codepoint)); + } + else if (codepoint <= 0x7FF) + { + // 2-byte characters: 110xxxxx 10xxxxxx + add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u))); + add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu))); + } + else if (codepoint <= 0xFFFF) + { + // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx + add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u))); + add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu))); + add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu))); + } + else + { + // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u))); + add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu))); + add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu))); + add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu))); + } + + break; + } + + // other characters after escape + default: + error_message = "invalid string: forbidden character after backslash"; + return token_type::parse_error; + } + + break; + } + + // invalid control characters + case 0x00: + { + error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000"; + return token_type::parse_error; + } + + case 0x01: + { + error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001"; + return token_type::parse_error; + } + + case 0x02: + { + error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002"; + return token_type::parse_error; + } + + case 0x03: + { + error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003"; + return token_type::parse_error; + } + + case 0x04: + { + error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004"; + return token_type::parse_error; + } + + case 0x05: + { + error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005"; + return token_type::parse_error; + } + + case 0x06: + { + error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006"; + return token_type::parse_error; + } + + case 0x07: + { + error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007"; + return token_type::parse_error; + } + + case 0x08: + { + error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b"; + return token_type::parse_error; + } + + case 0x09: + { + error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t"; + return token_type::parse_error; + } + + case 0x0A: + { + error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n"; + return token_type::parse_error; + } + + case 0x0B: + { + error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B"; + return token_type::parse_error; + } + + case 0x0C: + { + error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f"; + return token_type::parse_error; + } + + case 0x0D: + { + error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r"; + return token_type::parse_error; + } + + case 0x0E: + { + error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E"; + return token_type::parse_error; + } + + case 0x0F: + { + error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F"; + return token_type::parse_error; + } + + case 0x10: + { + error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010"; + return token_type::parse_error; + } + + case 0x11: + { + error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011"; + return token_type::parse_error; + } + + case 0x12: + { + error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012"; + return token_type::parse_error; + } + + case 0x13: + { + error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013"; + return token_type::parse_error; + } + + case 0x14: + { + error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014"; + return token_type::parse_error; + } + + case 0x15: + { + error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015"; + return token_type::parse_error; + } + + case 0x16: + { + error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016"; + return token_type::parse_error; + } + + case 0x17: + { + error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017"; + return token_type::parse_error; + } + + case 0x18: + { + error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018"; + return token_type::parse_error; + } + + case 0x19: + { + error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019"; + return token_type::parse_error; + } + + case 0x1A: + { + error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A"; + return token_type::parse_error; + } + + case 0x1B: + { + error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B"; + return token_type::parse_error; + } + + case 0x1C: + { + error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C"; + return token_type::parse_error; + } + + case 0x1D: + { + error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D"; + return token_type::parse_error; + } + + case 0x1E: + { + error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E"; + return token_type::parse_error; + } + + case 0x1F: + { + error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F"; + return token_type::parse_error; + } + + // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) + case 0x20: + case 0x21: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + { + add(current); + break; + } + + // U+0080..U+07FF: bytes C2..DF 80..BF + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + { + if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF}))) + { + return token_type::parse_error; + } + break; + } + + // U+0800..U+0FFF: bytes E0 A0..BF 80..BF + case 0xE0: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF + // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xEE: + case 0xEF: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+D000..U+D7FF: bytes ED 80..9F 80..BF + case 0xED: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + case 0xF0: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + case 0xF1: + case 0xF2: + case 0xF3: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + case 0xF4: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // the remaining bytes (80..C1 and F5..FF) are ill-formed + default: + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; + } + } + } + } + + /*! + * @brief scan a comment + * @return whether comment could be scanned successfully + */ + bool scan_comment() + { + switch (get()) + { + // single-line comments skip input until a newline or EOF is read + case '/': + { + while (true) + { + switch (get()) + { + case '\n': + case '\r': + case char_traits<char_type>::eof(): + case '\0': + return true; + + default: + break; + } + } + + JSON_HEDLEY_UNREACHABLE(); + } + + // multi-line comments skip input until */ is read + case '*': + { + while (true) + { + switch (get()) + { + case char_traits<char_type>::eof(): + case '\0': + { + error_message = "invalid comment; missing closing '*/'"; + return false; + } + + case '*': + { + switch (get()) + { + case '/': + return true; + + default: + { + unget(); + continue; + } + } + } + + default: + continue; + } + } + + JSON_HEDLEY_UNREACHABLE(); + } + + // unexpected character after reading '/' + default: + { + error_message = "invalid comment; expecting '/' or '*' after '/'"; + return false; + } + } + } + + JSON_HEDLEY_NON_NULL(2) + static void strtof(float& f, const char* str, char** endptr) noexcept + { + f = std::strtof(str, endptr); + } + + JSON_HEDLEY_NON_NULL(2) + static void strtof(double& f, const char* str, char** endptr) noexcept + { + f = std::strtod(str, endptr); + } + + JSON_HEDLEY_NON_NULL(2) + static void strtof(long double& f, const char* str, char** endptr) noexcept + { + f = std::strtold(str, endptr); + } + + /*! + @brief scan a number literal + + This function scans a string according to Sect. 6 of RFC 8259. + + The function is realized with a deterministic finite state machine derived + from the grammar described in RFC 8259. Starting in state "init", the + input is read and used to determined the next state. Only state "done" + accepts the number. State "error" is a trap state to model errors. In the + table below, "anything" means any character but the ones listed before. + + state | 0 | 1-9 | e E | + | - | . | anything + ---------|----------|----------|----------|---------|---------|----------|----------- + init | zero | any1 | [error] | [error] | minus | [error] | [error] + minus | zero | any1 | [error] | [error] | [error] | [error] | [error] + zero | done | done | exponent | done | done | decimal1 | done + any1 | any1 | any1 | exponent | done | done | decimal1 | done + decimal1 | decimal2 | decimal2 | [error] | [error] | [error] | [error] | [error] + decimal2 | decimal2 | decimal2 | exponent | done | done | done | done + exponent | any2 | any2 | [error] | sign | sign | [error] | [error] + sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] + any2 | any2 | any2 | done | done | done | done | done + + The state machine is realized with one label per state (prefixed with + "scan_number_") and `goto` statements between them. The state machine + contains cycles, but any cycle can be left when EOF is read. Therefore, + the function is guaranteed to terminate. + + During scanning, the read bytes are stored in token_buffer. This string is + then converted to a signed integer, an unsigned integer, or a + floating-point number. + + @return token_type::value_unsigned, token_type::value_integer, or + token_type::value_float if number could be successfully scanned, + token_type::parse_error otherwise + + @note The scanner is independent of the current locale. Internally, the + locale's decimal point is used instead of `.` to work with the + locale-dependent converters. + */ + token_type scan_number() // lgtm [cpp/use-of-goto] `goto` is used in this function to implement the number-parsing state machine described above. By design, any finite input will eventually reach the "done" state or return token_type::parse_error. In each intermediate state, 1 byte of the input is appended to the token_buffer vector, and only the already initialized variables token_buffer, number_type, and error_message are manipulated. + { + // reset token_buffer to store the number's bytes + reset(); + + // the type of the parsed number; initially set to unsigned; will be + // changed if minus sign, decimal point, or exponent is read + token_type number_type = token_type::value_unsigned; + + // state (init): we just found out we need to scan a number + switch (current) + { + case '-': + { + add(current); + goto scan_number_minus; + } + + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + // all other characters are rejected outside scan_number() + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + } + +scan_number_minus: + // state: we just parsed a leading minus sign + number_type = token_type::value_integer; + switch (get()) + { + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + default: + { + error_message = "invalid number; expected digit after '-'"; + return token_type::parse_error; + } + } + +scan_number_zero: + // state: we just parse a zero (maybe with a leading minus sign) + switch (get()) + { + case '.': + { + add(decimal_point_char); + decimal_point_position = token_buffer.size() - 1; + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_any1: + // state: we just parsed a number 0-9 (maybe with a leading minus sign) + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + case '.': + { + add(decimal_point_char); + decimal_point_position = token_buffer.size() - 1; + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_decimal1: + // state: we just parsed a decimal point + number_type = token_type::value_float; + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + default: + { + error_message = "invalid number; expected digit after '.'"; + return token_type::parse_error; + } + } + +scan_number_decimal2: + // we just parsed at least one number after a decimal point + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_exponent: + // we just parsed an exponent + number_type = token_type::value_float; + switch (get()) + { + case '+': + case '-': + { + add(current); + goto scan_number_sign; + } + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = + "invalid number; expected '+', '-', or digit after exponent"; + return token_type::parse_error; + } + } + +scan_number_sign: + // we just parsed an exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = "invalid number; expected digit after exponent sign"; + return token_type::parse_error; + } + } + +scan_number_any2: + // we just parsed a number after the exponent or exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + goto scan_number_done; + } + +scan_number_done: + // unget the character after the number (we only read it to know that + // we are done scanning a number) + unget(); + + char* endptr = nullptr; // NOLINT(misc-const-correctness,cppcoreguidelines-pro-type-vararg,hicpp-vararg) + errno = 0; + + // try to parse integers first and fall back to floats + if (number_type == token_type::value_unsigned) + { + const auto x = std::strtoull(token_buffer.data(), &endptr, 10); + + // we checked the number format before + JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + + if (errno != ERANGE) + { + value_unsigned = static_cast<number_unsigned_t>(x); + if (value_unsigned == x) + { + return token_type::value_unsigned; + } + } + } + else if (number_type == token_type::value_integer) + { + const auto x = std::strtoll(token_buffer.data(), &endptr, 10); + + // we checked the number format before + JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + + if (errno != ERANGE) + { + value_integer = static_cast<number_integer_t>(x); + if (value_integer == x) + { + return token_type::value_integer; + } + } + } + + // this code is reached if we parse a floating-point number or if an + // integer conversion above failed + strtof(value_float, token_buffer.data(), &endptr); + + // we checked the number format before + JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + + return token_type::value_float; + } + + /*! + @param[in] literal_text the literal text to expect + @param[in] length the length of the passed literal text + @param[in] return_type the token type to return on success + */ + JSON_HEDLEY_NON_NULL(2) + token_type scan_literal(const char_type* literal_text, const std::size_t length, + token_type return_type) + { + JSON_ASSERT(char_traits<char_type>::to_char_type(current) == literal_text[0]); + for (std::size_t i = 1; i < length; ++i) + { + if (JSON_HEDLEY_UNLIKELY(char_traits<char_type>::to_char_type(get()) != literal_text[i])) + { + error_message = "invalid literal"; + return token_type::parse_error; + } + } + return return_type; + } + + ///////////////////// + // input management + ///////////////////// + + /// reset token_buffer; current character is beginning of token + void reset() noexcept + { + token_buffer.clear(); + token_string.clear(); + decimal_point_position = std::string::npos; + token_string.push_back(char_traits<char_type>::to_char_type(current)); + } + + /* + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a + `char_traits<char>::eof()` in that case. Stores the scanned characters + for use in error messages. + + @return character read from the input + */ + char_int_type get() + { + ++position.chars_read_total; + ++position.chars_read_current_line; + + if (next_unget) + { + // only reset the next_unget variable and work with current + next_unget = false; + } + else + { + current = ia.get_character(); + } + + if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof())) + { + token_string.push_back(char_traits<char_type>::to_char_type(current)); + } + + if (current == '\n') + { + ++position.lines_read; + position.chars_read_current_line = 0; + } + + return current; + } + + /*! + @brief unget current character (read it again on next get) + + We implement unget by setting variable next_unget to true. The input is not + changed - we just simulate ungetting by modifying chars_read_total, + chars_read_current_line, and token_string. The next call to get() will + behave as if the unget character is read again. + */ + void unget() + { + next_unget = true; + + --position.chars_read_total; + + // in case we "unget" a newline, we have to also decrement the lines_read + if (position.chars_read_current_line == 0) + { + if (position.lines_read > 0) + { + --position.lines_read; + } + } + else + { + --position.chars_read_current_line; + } + + if (JSON_HEDLEY_LIKELY(current != char_traits<char_type>::eof())) + { + JSON_ASSERT(!token_string.empty()); + token_string.pop_back(); + } + } + + /// add a character to token_buffer + void add(char_int_type c) + { + token_buffer.push_back(static_cast<typename string_t::value_type>(c)); + } + + public: + ///////////////////// + // value getters + ///////////////////// + + /// return integer value + constexpr number_integer_t get_number_integer() const noexcept + { + return value_integer; + } + + /// return unsigned integer value + constexpr number_unsigned_t get_number_unsigned() const noexcept + { + return value_unsigned; + } + + /// return floating-point value + constexpr number_float_t get_number_float() const noexcept + { + return value_float; + } + + /// return current string value (implicitly resets the token; useful only once) + string_t& get_string() + { + // translate decimal points from locale back to '.' (#4084) + if (decimal_point_char != '.' && decimal_point_position != std::string::npos) + { + token_buffer[decimal_point_position] = '.'; + } + return token_buffer; + } + + ///////////////////// + // diagnostics + ///////////////////// + + /// return position of last read token + constexpr position_t get_position() const noexcept + { + return position; + } + + /// return the last read token (for errors only). Will never contain EOF + /// (an arbitrary value that is not a valid char value, often -1), because + /// 255 may legitimately occur. May contain NUL, which should be escaped. + std::string get_token_string() const + { + // escape control characters + std::string result; + for (const auto c : token_string) + { + if (static_cast<unsigned char>(c) <= '\x1F') + { + // escape control characters + std::array<char, 9> cs{{}}; + static_cast<void>((std::snprintf)(cs.data(), cs.size(), "<U+%.4X>", static_cast<unsigned char>(c))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + result += cs.data(); + } + else + { + // add character as is + result.push_back(static_cast<std::string::value_type>(c)); + } + } + + return result; + } + + /// return syntax error message + JSON_HEDLEY_RETURNS_NON_NULL + constexpr const char* get_error_message() const noexcept + { + return error_message; + } + + ///////////////////// + // actual scanner + ///////////////////// + + /*! + @brief skip the UTF-8 byte order mark + @return true iff there is no BOM or the correct BOM has been skipped + */ + bool skip_bom() + { + if (get() == 0xEF) + { + // check if we completely parse the BOM + return get() == 0xBB && get() == 0xBF; + } + + // the first character is not the beginning of the BOM; unget it to + // process is later + unget(); + return true; + } + + void skip_whitespace() + { + do + { + get(); + } + while (current == ' ' || current == '\t' || current == '\n' || current == '\r'); + } + + token_type scan() + { + // initially, skip the BOM + if (position.chars_read_total == 0 && !skip_bom()) + { + error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; + return token_type::parse_error; + } + + // read the next character and ignore whitespace + skip_whitespace(); + + // ignore comments + while (ignore_comments && current == '/') + { + if (!scan_comment()) + { + return token_type::parse_error; + } + + // skip following whitespace + skip_whitespace(); + } + + switch (current) + { + // structural characters + case '[': + return token_type::begin_array; + case ']': + return token_type::end_array; + case '{': + return token_type::begin_object; + case '}': + return token_type::end_object; + case ':': + return token_type::name_separator; + case ',': + return token_type::value_separator; + + // literals + case 't': + { + std::array<char_type, 4> true_literal = {{static_cast<char_type>('t'), static_cast<char_type>('r'), static_cast<char_type>('u'), static_cast<char_type>('e')}}; + return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true); + } + case 'f': + { + std::array<char_type, 5> false_literal = {{static_cast<char_type>('f'), static_cast<char_type>('a'), static_cast<char_type>('l'), static_cast<char_type>('s'), static_cast<char_type>('e')}}; + return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false); + } + case 'n': + { + std::array<char_type, 4> null_literal = {{static_cast<char_type>('n'), static_cast<char_type>('u'), static_cast<char_type>('l'), static_cast<char_type>('l')}}; + return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null); + } + + // string + case '\"': + return scan_string(); + + // number + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return scan_number(); + + // end of input (the null byte is needed when parsing from + // string literals) + case '\0': + case char_traits<char_type>::eof(): + return token_type::end_of_input; + + // error + default: + error_message = "invalid literal"; + return token_type::parse_error; + } + } + + private: + /// input adapter + InputAdapterType ia; + + /// whether comments should be ignored (true) or signaled as errors (false) + const bool ignore_comments = false; + + /// the current character + char_int_type current = char_traits<char_type>::eof(); + + /// whether the next get() call should just return current + bool next_unget = false; + + /// the start position of the current token + position_t position {}; + + /// raw input token string (for error messages) + std::vector<char_type> token_string {}; + + /// buffer for variable-length tokens (numbers, strings) + string_t token_buffer {}; + + /// a description of occurred lexer errors + const char* error_message = ""; + + // number values + number_integer_t value_integer = 0; + number_unsigned_t value_unsigned = 0; + number_float_t value_float = 0; + + /// the decimal point + const char_int_type decimal_point_char = '.'; + /// the position of the decimal point in the input + std::size_t decimal_point_position = std::string::npos; +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END diff --git a/json4cpp/include/nlohmann/detail/input/parser.hpp b/json4cpp/include/nlohmann/detail/input/parser.hpp new file mode 100644 index 0000000000..4b1963f4a7 --- /dev/null +++ b/json4cpp/include/nlohmann/detail/input/parser.hpp @@ -0,0 +1,536 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.12.0 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2026 Niels Lohmann <https://nlohmann.me> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <cmath> // isfinite +#include <cstdint> // uint8_t +#include <functional> // function +#include <string> // string +#include <utility> // move +#include <vector> // vector + +#include <nlohmann/detail/exceptions.hpp> +#include <nlohmann/detail/input/input_adapters.hpp> +#include <nlohmann/detail/input/json_sax.hpp> +#include <nlohmann/detail/input/lexer.hpp> +#include <nlohmann/detail/macro_scope.hpp> +#include <nlohmann/detail/meta/is_sax.hpp> +#include <nlohmann/detail/string_concat.hpp> +#include <nlohmann/detail/value_t.hpp> + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ +//////////// +// parser // +//////////// + +enum class parse_event_t : std::uint8_t +{ + /// the parser read `{` and started to process a JSON object + object_start, + /// the parser read `}` and finished processing a JSON object + object_end, + /// the parser read `[` and started to process a JSON array + array_start, + /// the parser read `]` and finished processing a JSON array + array_end, + /// the parser read a key of a value in an object + key, + /// the parser finished reading a JSON value + value +}; + +template<typename BasicJsonType> +using parser_callback_t = + std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>; + +/*! +@brief syntax analysis + +This class implements a recursive descent parser. +*/ +template<typename BasicJsonType, typename InputAdapterType> +class parser +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using lexer_t = lexer<BasicJsonType, InputAdapterType>; + using token_type = typename lexer_t::token_type; + + public: + /// a parser reading from an input adapter + explicit parser(InputAdapterType&& adapter, + parser_callback_t<BasicJsonType> cb = nullptr, + const bool allow_exceptions_ = true, + const bool ignore_comments = false, + const bool ignore_trailing_commas_ = false) + : callback(std::move(cb)) + , m_lexer(std::move(adapter), ignore_comments) + , allow_exceptions(allow_exceptions_) + , ignore_trailing_commas(ignore_trailing_commas_) + { + // read first token + get_token(); + } + + /*! + @brief public parser interface + + @param[in] strict whether to expect the last token to be EOF + @param[in,out] result parsed JSON value + + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + */ + void parse(const bool strict, BasicJsonType& result) + { + if (callback) + { + json_sax_dom_callback_parser<BasicJsonType, InputAdapterType> sdp(result, callback, allow_exceptions, &m_lexer); + sax_parse_internal(&sdp); + + // in strict mode, input must be completely read + if (strict && (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_of_input, "value"), nullptr)); + } + + // in case of an error, return a discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } + + // set top-level value to null if it was discarded by the callback + // function + if (result.is_discarded()) + { + result = nullptr; + } + } + else + { + json_sax_dom_parser<BasicJsonType, InputAdapterType> sdp(result, allow_exceptions, &m_lexer); + sax_parse_internal(&sdp); + + // in strict mode, input must be completely read + if (strict && (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr)); + } + + // in case of an error, return a discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } + } + + result.assert_invariant(); + } + + /*! + @brief public accept interface + + @param[in] strict whether to expect the last token to be EOF + @return whether the input is a proper JSON text + */ + bool accept(const bool strict = true) + { + json_sax_acceptor<BasicJsonType> sax_acceptor; + return sax_parse(&sax_acceptor, strict); + } + + template<typename SAX> + JSON_HEDLEY_NON_NULL(2) + bool sax_parse(SAX* sax, const bool strict = true) + { + (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; + const bool result = sax_parse_internal(sax); + + // strict mode: next byte must be EOF + if (result && strict && (get_token() != token_type::end_of_input)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr)); + } + + return result; + } + + private: + template<typename SAX> + JSON_HEDLEY_NON_NULL(2) + bool sax_parse_internal(SAX* sax) + { + // stack to remember the hierarchy of structured values we are parsing + // true = array; false = object + std::vector<bool> states; + // value to avoid a goto (see comment where set to true) + bool skip_to_state_evaluation = false; + + while (true) + { + if (!skip_to_state_evaluation) + { + // invariant: get_token() was called before each iteration + switch (last_token) + { + case token_type::begin_object: + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(detail::unknown_size()))) + { + return false; + } + + // closing } -> we are done + if (get_token() == token_type::end_object) + { + if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) + { + return false; + } + break; + } + + // parse key + if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); + } + if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) + { + return false; + } + + // parse separator (:) + if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); + } + + // remember we are now inside an object + states.push_back(false); + + // parse values + get_token(); + continue; + } + + case token_type::begin_array: + { + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(detail::unknown_size()))) + { + return false; + } + + // closing ] -> we are done + if (get_token() == token_type::end_array) + { + if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) + { + return false; + } + break; + } + + // remember we are now inside an array + states.push_back(true); + + // parse values (no need to call get_token) + continue; + } + + case token_type::value_float: + { + const auto res = m_lexer.get_number_float(); + + if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res))) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr)); + } + + if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string()))) + { + return false; + } + + break; + } + + case token_type::literal_false: + { + if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false))) + { + return false; + } + break; + } + + case token_type::literal_null: + { + if (JSON_HEDLEY_UNLIKELY(!sax->null())) + { + return false; + } + break; + } + + case token_type::literal_true: + { + if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true))) + { + return false; + } + break; + } + + case token_type::value_integer: + { + if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer()))) + { + return false; + } + break; + } + + case token_type::value_string: + { + if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string()))) + { + return false; + } + break; + } + + case token_type::value_unsigned: + { + if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned()))) + { + return false; + } + break; + } + + case token_type::parse_error: + { + // using "uninitialized" to avoid an "expected" message + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr)); + } + case token_type::end_of_input: + { + if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); + } + + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr)); + } + case token_type::uninitialized: + case token_type::end_array: + case token_type::end_object: + case token_type::name_separator: + case token_type::value_separator: + case token_type::literal_or_value: + default: // the last token was unexpected + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr)); + } + } + } + else + { + skip_to_state_evaluation = false; + } + + // we reached this line after we successfully parsed a value + if (states.empty()) + { + // empty stack: we reached the end of the hierarchy: done + return true; + } + + if (states.back()) // array + { + // comma -> next value + // or end of array (ignore_trailing_commas = true) + if (get_token() == token_type::value_separator) + { + // parse a new value + get_token(); + + // if ignore_trailing_commas and last_token is ], we can continue to "closing ]" + if (!(ignore_trailing_commas && last_token == token_type::end_array)) + { + continue; + } + } + + // closing ] + if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array)) + { + if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) + { + return false; + } + + // We are done with this array. Before we can parse a + // new value, we need to evaluate the new state first. + // By setting skip_to_state_evaluation to false, we + // are effectively jumping to the beginning of this if. + JSON_ASSERT(!states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), nullptr)); + } + + // states.back() is false -> object + + // comma -> next value + // or end of object (ignore_trailing_commas = true) + if (get_token() == token_type::value_separator) + { + get_token(); + + // if ignore_trailing_commas and last_token is }, we can continue to "closing }" + if (!(ignore_trailing_commas && last_token == token_type::end_object)) + { + // parse key + if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); + } + + if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) + { + return false; + } + + // parse separator (:) + if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); + } + + // parse values + get_token(); + continue; + } + } + + // closing } + if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object)) + { + if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) + { + return false; + } + + // We are done with this object. Before we can parse a + // new value, we need to evaluate the new state first. + // By setting skip_to_state_evaluation to false, we + // are effectively jumping to the beginning of this if. + JSON_ASSERT(!states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), nullptr)); + } + } + + /// get next token from lexer + token_type get_token() + { + return last_token = m_lexer.scan(); + } + + std::string exception_message(const token_type expected, const std::string& context) + { + std::string error_msg = "syntax error "; + + if (!context.empty()) + { + error_msg += concat("while parsing ", context, ' '); + } + + error_msg += "- "; + + if (last_token == token_type::parse_error) + { + error_msg += concat(m_lexer.get_error_message(), "; last read: '", + m_lexer.get_token_string(), '\''); + } + else + { + error_msg += concat("unexpected ", lexer_t::token_type_name(last_token)); + } + + if (expected != token_type::uninitialized) + { + error_msg += concat("; expected ", lexer_t::token_type_name(expected)); + } + + return error_msg; + } + + private: + /// callback function + const parser_callback_t<BasicJsonType> callback = nullptr; + /// the type of the last read token + token_type last_token = token_type::uninitialized; + /// the lexer + lexer_t m_lexer; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// whether trailing commas in objects and arrays should be ignored (true) or signaled as errors (false) + const bool ignore_trailing_commas = false; +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END diff --git a/json4cpp/include/nlohmann/detail/input/position_t.hpp b/json4cpp/include/nlohmann/detail/input/position_t.hpp new file mode 100644 index 0000000000..25b0a6a840 --- /dev/null +++ b/json4cpp/include/nlohmann/detail/input/position_t.hpp @@ -0,0 +1,37 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.12.0 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2026 Niels Lohmann <https://nlohmann.me> +// SPDX-License-Identifier: MIT + +#pragma once + +#include <cstddef> // size_t + +#include <nlohmann/detail/abi_macros.hpp> + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END |
