Skip to content

Commit

Permalink
Issue #11: Allow for properly-formed UTF-8 strings to pass through to…
Browse files Browse the repository at this point in the history
… the output ostream.
  • Loading branch information
tgockel committed Nov 8, 2015
1 parent f5cdf4b commit 0c07da8
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 11 deletions.
15 changes: 13 additions & 2 deletions include/jsonv/encode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
namespace jsonv
{

/** An encoder is responsible for writing values to some form of output.
**/
/** An encoder is responsible for writing values to some form of output. **/
class JSONV_PUBLIC encoder
{
public:
Expand Down Expand Up @@ -148,6 +147,17 @@ class JSONV_PUBLIC ostream_encoder :

virtual ~ostream_encoder() noexcept;

/** If set to true (the default), then all non-ASCII characters in strings will be replaced with their numeric
* encodings. Since JSON allows for encoded text to be contained in a document, this is inefficient if you have
* many non-ASCII characters. If you know that your decoding side can properly handle UTF-8 encoding, then you
* should turn this on.
*
* \note
* This functionality cannot be used to passthrough malformed UTF-8 encoded strings. If a given string is invalid
* UTF-8, it will still get replaced with a numeric encoding.
**/
void ensure_ascii(bool value);

This comment has been minimized.

Copy link
@mvshyvk

mvshyvk Apr 12, 2020

Contributor

Method ensure_ascii is not implemented yet. Is it by design ?

This comment has been minimized.

Copy link
@tgockel

tgockel Apr 12, 2020

Author Owner

Nope, I think I accidentally removed that some time before putting it in mainline. I opened #163 to fix this.


protected:
virtual void write_null() override;

Expand Down Expand Up @@ -179,6 +189,7 @@ class JSONV_PUBLIC ostream_encoder :

private:
std::ostream& _output;
bool _ensure_ascii;
};

/** Like \c ostream_encoder, but pretty prints output to an \c std::ostream.
Expand Down
12 changes: 9 additions & 3 deletions src/jsonv/char_convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ static void utf16_create_surrogates(char32_t codepoint, uint16_t* high, uint16_t
*low = uint16_t(val & 0x03ff) | 0xdc00;
}

std::ostream& string_encode(std::ostream& stream, string_view source)
std::ostream& string_encode(std::ostream& stream, string_view source, bool ensure_ascii)
{
typedef string_view::size_type size_type;

Expand Down Expand Up @@ -232,8 +232,14 @@ std::ostream& string_encode(std::ostream& stream, string_view source)
length = 1;
code = char32_t(current) & 0xff;
}

if (code < 0x10000)

// if the input string is valid UTF-8, let it pass through
if (valid_utf8 && !ensure_ascii)
{
stream.write(&current, length);
}
// basic multilingual plane points are encoded in hex
else if (code < 0x10000)
{
stream << "\\u";
to_hex(stream, uint16_t(code));
Expand Down
2 changes: 1 addition & 1 deletion src/jsonv/char_convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class decode_error :

/** Encodes C++ string \a source into a fully-escaped JSON string into \a stream ready for sending over the wire.
**/
std::ostream& string_encode(std::ostream& stream, string_view source);
std::ostream& string_encode(std::ostream& stream, string_view source, bool ensure_ascii = true);

/** A function that decodes an over the wire character sequence \c source into a C++ string. **/
typedef std::string (*string_decode_fn)(string_view source);
Expand Down
4 changes: 2 additions & 2 deletions src/jsonv/detail.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ void check_type(std::initializer_list<kind> expected, kind actual)
// Printing //
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

std::ostream& stream_escaped_string(std::ostream& stream, string_view str)
std::ostream& stream_escaped_string(std::ostream& stream, string_view str, bool ensure_ascii)
{
stream << "\"";
detail::string_encode(stream, str);
detail::string_encode(stream, str, ensure_ascii);
stream << "\"";
return stream;
}
Expand Down
2 changes: 1 addition & 1 deletion src/jsonv/detail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ const char* kind_desc(kind type);
bool kind_valid(kind k);
void check_type(kind expected, kind actual);
void check_type(std::initializer_list<kind> expected, kind actual);
std::ostream& stream_escaped_string(std::ostream& stream, string_view str);
std::ostream& stream_escaped_string(std::ostream& stream, string_view str, bool require_ascii);

}

Expand Down
5 changes: 3 additions & 2 deletions src/jsonv/encode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ void encoder::encode(const value& source)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

ostream_encoder::ostream_encoder(std::ostream& output) :
_output(output)
_output(output),
_ensure_ascii(true)
{ }

ostream_encoder::~ostream_encoder() noexcept = default;
Expand Down Expand Up @@ -151,7 +152,7 @@ void ostream_encoder::write_object_key(string_view key)

void ostream_encoder::write_string(string_view value)
{
stream_escaped_string(_output, value);
stream_escaped_string(_output, value, _ensure_ascii);
}

std::ostream& ostream_encoder::output()
Expand Down

0 comments on commit 0c07da8

Please sign in to comment.