diff --git a/include/flatbuffers/flexbuffers.h b/include/flatbuffers/flexbuffers.h index 023654a60..abccd6c73 100644 --- a/include/flatbuffers/flexbuffers.h +++ b/include/flatbuffers/flexbuffers.h @@ -495,14 +495,14 @@ class Reference { if (type_ == TYPE_STRING) { String str(Indirect(), byte_width_); if (strings_quoted) { - flatbuffers::EscapeString(str.c_str(), str.length(), &s, true); + flatbuffers::EscapeString(str.c_str(), str.length(), &s, true, false); } else { s.append(str.c_str(), str.length()); } } else if (IsKey()) { auto str = AsKey(); if (keys_quoted) { - flatbuffers::EscapeString(str, strlen(str), &s, true); + flatbuffers::EscapeString(str, strlen(str), &s, true, false); } else { s += str; } diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h index ca24ef445..4e0b08d92 100644 --- a/include/flatbuffers/idl.h +++ b/include/flatbuffers/idl.h @@ -379,6 +379,7 @@ struct IDLOptions { std::string object_suffix; bool union_value_namespacing; bool allow_non_utf8; + bool natural_utf8; std::string include_prefix; bool keep_include_path; bool binary_schema_comments; @@ -439,6 +440,7 @@ struct IDLOptions { object_suffix("T"), union_value_namespacing(true), allow_non_utf8(false), + natural_utf8(false), keep_include_path(false), binary_schema_comments(false), binary_schema_builtins(false), diff --git a/include/flatbuffers/minireflect.h b/include/flatbuffers/minireflect.h index 4fb536e7c..fdafdeddb 100644 --- a/include/flatbuffers/minireflect.h +++ b/include/flatbuffers/minireflect.h @@ -314,7 +314,7 @@ struct ToStringVisitor : public IterationVisitor { void Float(float x) { s += NumToString(x); } void Double(double x) { s += NumToString(x); } void String(const struct String *str) { - EscapeString(str->c_str(), str->size(), &s, true); + EscapeString(str->c_str(), str->size(), &s, true, false); } void Unknown(const uint8_t *) { s += "(?)"; } void StartVector() { s += "[ "; } diff --git a/include/flatbuffers/util.h b/include/flatbuffers/util.h index 51f83cb22..9fd83354a 100644 --- a/include/flatbuffers/util.h +++ b/include/flatbuffers/util.h @@ -381,7 +381,7 @@ inline std::string WordWrap(const std::string in, size_t max_length, } inline bool EscapeString(const char *s, size_t length, std::string *_text, - bool allow_non_utf8) { + bool allow_non_utf8, bool natural_utf8) { std::string &text = *_text; text += "\""; for (uoffset_t i = 0; i < length; i++) { @@ -421,7 +421,10 @@ inline bool EscapeString(const char *s, size_t length, std::string *_text, return false; } } else { - if (ucc <= 0xFFFF) { + if (natural_utf8) { + // utf8 points to past all utf-8 bytes parsed + text.append(s + i, static_cast(utf8 - s - i)); + } else if (ucc <= 0xFFFF) { // Parses as Unicode within JSON's \uXXXX range, so use that. text += "\\u"; text += IntToStringHex(ucc, 4); diff --git a/src/flatc.cpp b/src/flatc.cpp index 60fabd50b..a2f3a7dcd 100644 --- a/src/flatc.cpp +++ b/src/flatc.cpp @@ -69,6 +69,8 @@ std::string FlatCompiler::GetUsageString(const char *program_name) const { " --allow-non-utf8 Pass non-UTF-8 input through parser and emit nonstandard\n" " \\x escapes in JSON. (Default is to raise parse error on\n" " non-UTF-8 input.)\n" + " --natural-utf8 Output strings with UTF-8 as human-readable strings.\n" + " By default, UTF-8 characters are printed as \\uXXXX escapes.\n" " --defaults-json Output fields whose value is the default when\n" " writing JSON\n" " --unknown-json Allow fields in JSON that are not defined in the\n" @@ -182,6 +184,8 @@ int FlatCompiler::Compile(int argc, const char **argv) { opts.strict_json = true; } else if (arg == "--allow-non-utf8") { opts.allow_non_utf8 = true; + } else if (arg == "--natural-utf8") { + opts.natural_utf8 = true; } else if (arg == "--no-js-exports") { opts.skip_js_exports = true; } else if (arg == "--goog-js-export") { diff --git a/src/idl_gen_text.cpp b/src/idl_gen_text.cpp index cb2525bf8..41d19125e 100644 --- a/src/idl_gen_text.cpp +++ b/src/idl_gen_text.cpp @@ -119,7 +119,8 @@ bool Print(const void *val, Type type, int indent, break; case BASE_TYPE_STRING: { auto s = reinterpret_cast(val); - if (!EscapeString(s->c_str(), s->Length(), _text, opts.allow_non_utf8)) { + if (!EscapeString(s->c_str(), s->Length(), _text, opts.allow_non_utf8, + opts.natural_utf8)) { return false; } break; diff --git a/src/reflection.cpp b/src/reflection.cpp index 4a6c64aef..18a47b2d6 100644 --- a/src/reflection.cpp +++ b/src/reflection.cpp @@ -91,7 +91,8 @@ std::string GetAnyValueS(reflection::BaseType type, const uint8_t *data, auto val = GetAnyFieldS(*table_field, fielddef, schema); if (fielddef.type()->base_type() == reflection::String) { std::string esc; - flatbuffers::EscapeString(val.c_str(), val.length(), &esc, true); + flatbuffers::EscapeString(val.c_str(), val.length(), &esc, true, + false); val = esc; } s += fielddef.name()->str(); diff --git a/tests/test.cpp b/tests/test.cpp index 4977878a3..875e45d28 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -1,4 +1,4 @@ -/* +/* * Copyright 2014 Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -645,6 +645,22 @@ void ParseAndGenerateTextTest() { // If this fails, check registry.lasterror_. TEST_EQ(ok, true); TEST_EQ_STR(text.c_str(), jsonfile.c_str()); + + // Generate text for UTF-8 strings without escapes. + std::string jsonfile_utf8; + TEST_EQ(flatbuffers::LoadFile((test_data_path + "unicode_test.json").c_str(), + false, &jsonfile_utf8), + true); + TEST_EQ(parser.Parse(jsonfile_utf8.c_str(), include_directories), true); + // To ensure it is correct, generate utf-8 text back from the binary. + std::string jsongen_utf8; + // request natural printing for utf-8 strings + parser.opts.natural_utf8 = true; + parser.opts.strict_json = true; + TEST_EQ( + GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen_utf8), + true); + TEST_EQ_STR(jsongen_utf8.c_str(), jsonfile_utf8.c_str()); } void ReflectionTest(uint8_t *flatbuf, size_t length) { diff --git a/tests/unicode_test.json b/tests/unicode_test.json index 75e467a51..2894f0c85 100644 --- a/tests/unicode_test.json +++ b/tests/unicode_test.json @@ -1,13 +1,5 @@ { "name": "unicode_test", - "testarrayoftables": [ - { "name": "Цлїςσδε" }, - { "name": "フムアムカモケモ" }, - { "name": "フムヤムカモケモ" }, - { "name": "㊀㊁㊂㊃㊄" }, - { "name": "☳☶☲" }, - { "name": "𡇙𝌆" } - ], "testarrayofstring": [ "Цлїςσδε", "フムアムカモケモ", @@ -15,5 +7,25 @@ "㊀㊁㊂㊃㊄", "☳☶☲", "𡇙𝌆" + ], + "testarrayoftables": [ + { + "name": "Цлїςσδε" + }, + { + "name": "フムアムカモケモ" + }, + { + "name": "フムヤムカモケモ" + }, + { + "name": "㊀㊁㊂㊃㊄" + }, + { + "name": "☳☶☲" + }, + { + "name": "𡇙𝌆" + } ] }