diff --git a/include/jsonv/encode.hpp b/include/jsonv/encode.hpp index c7b34a3..bc87e06 100644 --- a/include/jsonv/encode.hpp +++ b/include/jsonv/encode.hpp @@ -21,8 +21,7 @@ namespace jsonv { -/** An encoder is responsible for writing values to some form of output. -**/ +/** An encoder is responsible for writing values to some form of output. **/ class JSONV_PUBLIC encoder { public: @@ -148,6 +147,17 @@ class JSONV_PUBLIC ostream_encoder : virtual ~ostream_encoder() noexcept; + /** If set to true (the default), then all non-ASCII characters in strings will be replaced with their numeric + * encodings. Since JSON allows for encoded text to be contained in a document, this is inefficient if you have + * many non-ASCII characters. If you know that your decoding side can properly handle UTF-8 encoding, then you + * should turn this on. + * + * \note + * This functionality cannot be used to passthrough malformed UTF-8 encoded strings. If a given string is invalid + * UTF-8, it will still get replaced with a numeric encoding. + **/ + void ensure_ascii(bool value); + protected: virtual void write_null() override; @@ -179,6 +189,7 @@ class JSONV_PUBLIC ostream_encoder : private: std::ostream& _output; + bool _ensure_ascii; }; /** Like \c ostream_encoder, but pretty prints output to an \c std::ostream. diff --git a/src/jsonv/char_convert.cpp b/src/jsonv/char_convert.cpp index 5225fff..c0624e2 100644 --- a/src/jsonv/char_convert.cpp +++ b/src/jsonv/char_convert.cpp @@ -197,7 +197,7 @@ static void utf16_create_surrogates(char32_t codepoint, uint16_t* high, uint16_t *low = uint16_t(val & 0x03ff) | 0xdc00; } -std::ostream& string_encode(std::ostream& stream, string_view source) +std::ostream& string_encode(std::ostream& stream, string_view source, bool ensure_ascii) { typedef string_view::size_type size_type; @@ -232,8 +232,14 @@ std::ostream& string_encode(std::ostream& stream, string_view source) length = 1; code = char32_t(current) & 0xff; } - - if (code < 0x10000) + + // if the input string is valid UTF-8, let it pass through + if (valid_utf8 && !ensure_ascii) + { + stream.write(¤t, length); + } + // basic multilingual plane points are encoded in hex + else if (code < 0x10000) { stream << "\\u"; to_hex(stream, uint16_t(code)); diff --git a/src/jsonv/char_convert.hpp b/src/jsonv/char_convert.hpp index 2356848..866dd0c 100644 --- a/src/jsonv/char_convert.hpp +++ b/src/jsonv/char_convert.hpp @@ -43,7 +43,7 @@ class decode_error : /** Encodes C++ string \a source into a fully-escaped JSON string into \a stream ready for sending over the wire. **/ -std::ostream& string_encode(std::ostream& stream, string_view source); +std::ostream& string_encode(std::ostream& stream, string_view source, bool ensure_ascii = true); /** A function that decodes an over the wire character sequence \c source into a C++ string. **/ typedef std::string (*string_decode_fn)(string_view source); diff --git a/src/jsonv/detail.cpp b/src/jsonv/detail.cpp index 9cd4b5d..a7585b4 100644 --- a/src/jsonv/detail.cpp +++ b/src/jsonv/detail.cpp @@ -96,10 +96,10 @@ void check_type(std::initializer_list expected, kind actual) // Printing // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -std::ostream& stream_escaped_string(std::ostream& stream, string_view str) +std::ostream& stream_escaped_string(std::ostream& stream, string_view str, bool ensure_ascii) { stream << "\""; - detail::string_encode(stream, str); + detail::string_encode(stream, str, ensure_ascii); stream << "\""; return stream; } diff --git a/src/jsonv/detail.hpp b/src/jsonv/detail.hpp index bf3802f..797214b 100644 --- a/src/jsonv/detail.hpp +++ b/src/jsonv/detail.hpp @@ -41,7 +41,7 @@ const char* kind_desc(kind type); bool kind_valid(kind k); void check_type(kind expected, kind actual); void check_type(std::initializer_list expected, kind actual); -std::ostream& stream_escaped_string(std::ostream& stream, string_view str); +std::ostream& stream_escaped_string(std::ostream& stream, string_view str, bool require_ascii); } diff --git a/src/jsonv/encode.cpp b/src/jsonv/encode.cpp index bc5e57a..84cd4b3 100644 --- a/src/jsonv/encode.cpp +++ b/src/jsonv/encode.cpp @@ -84,7 +84,8 @@ void encoder::encode(const value& source) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ostream_encoder::ostream_encoder(std::ostream& output) : - _output(output) + _output(output), + _ensure_ascii(true) { } ostream_encoder::~ostream_encoder() noexcept = default; @@ -151,7 +152,7 @@ void ostream_encoder::write_object_key(string_view key) void ostream_encoder::write_string(string_view value) { - stream_escaped_string(_output, value); + stream_escaped_string(_output, value, _ensure_ascii); } std::ostream& ostream_encoder::output()