diff --git a/.gitignore b/.gitignore index db8b067fe..9e00828a3 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,5 @@ dart/.dart_tool/ dart/build/ dart/doc/api/ Cargo.lock +.corpus** +.seed** diff --git a/docs/source/Grammar.md b/docs/source/Grammar.md index bf79596fa..724137e41 100644 --- a/docs/source/Grammar.md +++ b/docs/source/Grammar.md @@ -49,11 +49,26 @@ file_extension_decl = `file_extension` string\_constant `;` file_identifier_decl = `file_identifier` string\_constant `;` -integer\_constant = `-?[0-9]+` | `true` | `false` - -float\_constant = `-?[0-9]+.[0-9]+((e|E)(+|-)?[0-9]+)?` - string\_constant = `\".*?\"` ident = `[a-zA-Z_][a-zA-Z0-9_]*` +`[:digit:]` = `[0-9]` + +`[:xdigit:]` = `[0-9a-fA-F]` + +dec\_integer\_constant = `[-+]?[:digit:]+` + +hex\_integer\_constant = `[-+]?0[xX][:xdigit:]+` + +integer\_constant = dec\_integer\_constant | hex\_integer\_constant + +dec\_float\_constant = `[-+]?(([.][:digit:]+)|([:digit:]+[.][:digit:]*)|([:digit:]+))([eE][-+]?[:digit:]+)?` + +hex\_float\_constant = `[-+]?0[xX](([.][:xdigit:]+)|([:xdigit:]+[.][:xdigit:]*)|([:xdigit:]+))([pP][-+]?[:digit:]+)` + +special\_float\_constant = `[-+]?(nan|inf|infinity)` + +float\_constant = decimal\_float\_constant | hexadecimal\_float\_constant | special\_float\_constant + +boolean\_constant = `(true|false)` | (integer\_constant ? `true` : `false`) diff --git a/docs/source/Schemas.md b/docs/source/Schemas.md index a05b00273..9647f7cbf 100644 --- a/docs/source/Schemas.md +++ b/docs/source/Schemas.md @@ -385,6 +385,31 @@ When parsing JSON, it recognizes the following escape codes in strings: It also generates these escape codes back again when generating JSON from a binary representation. +When parsing numbers, the parser is more flexible than JSON. +A format of numeric literals is more close to the C/C++. +According to the [grammar](@ref flatbuffers_grammar), it accepts the following +numerical literals: + +- An integer literal can have any number of leading zero `0` digits. + Unlike C/C++, the parser ignores a leading zero, not interpreting it as the + beginning of the octal number. + The numbers `[081, -00094]` are equal to `[81, -94]` decimal integers. +- The parser accepts unsigned and signed hexadecimal integer numbers. + For example: `[0x123, +0x45, -0x67]` are equal to `[291, 69, -103]` decimals. +- The format of float-point numbers is fully compatible with C/C++ format. + If a modern C++ compiler is used the parser accepts hexadecimal and special + float-point literals as well: + `[-1.0, 2., .3e0, 3.e4, 0x21.34p-5, -inf, nan]`. + The exponent suffix of hexadecimal float-point number is mandatory. + + Extended float-point support was tested with: + - x64 Windows: `MSVC2015` and higher. + - x64 Linux: `LLVM 6.0`, `GCC 4.9` and higher. + +- For compatibility with a JSON lint tool all numeric literals of scalar + fields can be wrapped to quoted string: + `"1", "2.0", "0x48A", "0x0C.0Ep-1", "-inf", "true"`. + ## Guidelines ### Efficiency diff --git a/include/flatbuffers/base.h b/include/flatbuffers/base.h index c755d6f8f..2656f602e 100644 --- a/include/flatbuffers/base.h +++ b/include/flatbuffers/base.h @@ -180,6 +180,17 @@ #endif // __has_include #endif // !FLATBUFFERS_HAS_STRING_VIEW +#ifndef FLATBUFFERS_HAS_NEW_STRTOD + // Modern (C++11) strtod and strtof functions are available for use. + // 1) nan/inf strings as argument of strtod; + // 2) hex-float as argument of strtod/strtof. + #if (defined(_MSC_VER) && _MSC_VER >= 1900) || \ + (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \ + (defined(__clang__)) + #define FLATBUFFERS_HAS_NEW_STRTOD 1 + #endif +#endif // !FLATBUFFERS_HAS_NEW_STRTOD + /// @endcond /// @file diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h index cf5446a2d..024f324ee 100644 --- a/include/flatbuffers/idl.h +++ b/include/flatbuffers/idl.h @@ -484,7 +484,11 @@ struct IDLOptions { // This encapsulates where the parser is in the current source file. struct ParserState { ParserState() - : cursor_(nullptr), line_start_(nullptr), line_(0), token_(-1) {} + : cursor_(nullptr), + line_start_(nullptr), + line_(0), + token_(-1), + attr_is_trivial_ascii_string_(true) {} protected: void ResetState(const char *source) { @@ -508,6 +512,10 @@ struct ParserState { int line_; // the current line being parsed int token_; + // Flag: text in attribute_ is true ASCII string without escape + // sequences. Only printable ASCII (without [\t\r\n]). + // Used for number-in-string (and base64 string in future). + bool attr_is_trivial_ascii_string_; std::string attribute_; std::vector doc_comment_; }; @@ -644,7 +652,8 @@ class Parser : public ParserState { bool ParseFlexBuffer(const char *source, const char *source_filename, flexbuffers::Builder *builder); - FLATBUFFERS_CHECKED_ERROR CheckInRange(int64_t val, int64_t min, int64_t max); + FLATBUFFERS_CHECKED_ERROR InvalidNumber(const char *number, + const std::string &msg); StructDef *LookupStruct(const std::string &id) const; @@ -711,7 +720,7 @@ class Parser : public ParserState { BaseType req, bool *destmatch); FLATBUFFERS_CHECKED_ERROR ParseHash(Value &e, FieldDef* field); FLATBUFFERS_CHECKED_ERROR TokenError(); - FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e); + FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e, bool check_now); FLATBUFFERS_CHECKED_ERROR ParseEnumFromString(Type &type, int64_t *result); StructDef *LookupCreateStruct(const std::string &name, bool create_if_new = true, diff --git a/include/flatbuffers/stl_emulation.h b/include/flatbuffers/stl_emulation.h index ff537cd4b..028cc6fd2 100644 --- a/include/flatbuffers/stl_emulation.h +++ b/include/flatbuffers/stl_emulation.h @@ -37,9 +37,9 @@ // Not possible if Microsoft Compiler before 2012 // Possible is the language feature __cpp_alias_templates is defined well // Or possible if the C++ std is C+11 or newer -#if !(defined(_MSC_VER) && _MSC_VER <= 1700 /* MSVC2012 */) \ - && ((defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \ - || (defined(__cplusplus) && __cplusplus >= 201103L)) +#if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \ + || (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \ + || (defined(__cplusplus) && __cplusplus >= 201103L) #define FLATBUFFERS_TEMPLATES_ALIASES #endif @@ -88,12 +88,33 @@ inline void vector_emplace_back(std::vector *vector, V &&data) { #endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) #else template class numeric_limits : - public std::numeric_limits {}; + public std::numeric_limits { + public: + // Android NDK fix. + static T lowest() { + return std::numeric_limits::min(); + } + }; + + template <> class numeric_limits : + public std::numeric_limits { + public: + static float lowest() { return -FLT_MAX; } + }; + + template <> class numeric_limits : + public std::numeric_limits { + public: + static double lowest() { return -DBL_MAX; } + }; template <> class numeric_limits { public: static unsigned long long min() { return 0ULL; } static unsigned long long max() { return ~0ULL; } + static unsigned long long lowest() { + return numeric_limits::min(); + } }; template <> class numeric_limits { @@ -105,6 +126,9 @@ inline void vector_emplace_back(std::vector *vector, V &&data) { return static_cast( (1ULL << ((sizeof(long long) << 3) - 1)) - 1); } + static long long lowest() { + return numeric_limits::min(); + } }; #endif // FLATBUFFERS_CPP98_STL @@ -114,6 +138,7 @@ inline void vector_emplace_back(std::vector *vector, V &&data) { template using is_same = std::is_same; template using is_floating_point = std::is_floating_point; template using is_unsigned = std::is_unsigned; + template using make_unsigned = std::make_unsigned; #else // Map C++ TR1 templates defined by stlport. template using is_scalar = std::tr1::is_scalar; @@ -121,6 +146,13 @@ inline void vector_emplace_back(std::vector *vector, V &&data) { template using is_floating_point = std::tr1::is_floating_point; template using is_unsigned = std::tr1::is_unsigned; + // Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned. + template struct make_unsigned { + static_assert(is_unsigned::value, "Specialization not impelented!"); + using type = T; + }; + template<> struct make_unsigned { using type = unsigned char; }; + template<> struct make_unsigned { using type = unsigned int; }; #endif // !FLATBUFFERS_CPP98_STL #else // MSVC 2010 doesn't support C++11 aliases. @@ -129,6 +161,7 @@ inline void vector_emplace_back(std::vector *vector, V &&data) { template struct is_floating_point : public std::is_floating_point {}; template struct is_unsigned : public std::is_unsigned {}; + template struct make_unsigned : public std::make_unsigned {}; #endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) #ifndef FLATBUFFERS_CPP98_STL diff --git a/include/flatbuffers/util.h b/include/flatbuffers/util.h index cf2949a20..01e1b23f8 100644 --- a/include/flatbuffers/util.h +++ b/include/flatbuffers/util.h @@ -17,7 +17,7 @@ #ifndef FLATBUFFERS_UTIL_H_ #define FLATBUFFERS_UTIL_H_ -#include +#include #include #include #include @@ -50,6 +50,52 @@ namespace flatbuffers { +// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`. +template FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(const T &t) { + return !!t; +} + +// @locale-independent functions for ASCII characters set. + +// Check that integer scalar is in closed range: (a <= x <= b) +// using one compare (conditional branch) operator. +template inline bool check_in_range(T x, T a, T b) { + // (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`. + FLATBUFFERS_ASSERT(a <= b); // static_assert only if 'a' & 'b' templated + typedef typename flatbuffers::make_unsigned::type U; + return (static_cast(x - a) <= static_cast(b - a)); +} + +// Case-insensitive isalpha +static inline bool is_alpha(char c) { + // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF). + return check_in_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF); +} + +// Check (case-insensitive) that `c` is equal to alpha. +static inline bool is_alpha_char(char c, char alpha) { + FLATBUFFERS_ASSERT(is_alpha(alpha)); + // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF). + return ((c & 0xDF) == (alpha & 0xDF)); +} + +// https://en.cppreference.com/w/cpp/string/byte/isxdigit +// isdigit and isxdigit are the only standard narrow character classification +// functions that are not affected by the currently installed C locale. although +// some implementations (e.g. Microsoft in 1252 codepage) may classify +// additional single-byte characters as digits. +static inline bool is_digit(char c) { return check_in_range(c, '0', '9'); } + +static inline bool is_xdigit(char c) { + // Replace by look-up table. + return is_digit(c) | check_in_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF); +} + +// Case-insensitive isalnum +static inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); } + +// @end-locale-independent functions for ASCII character set + #ifdef FLATBUFFERS_PREFER_PRINTF template size_t IntToDigitCount(T t) { size_t digit_count = 0; @@ -158,6 +204,7 @@ template<> inline std::string NumToString(float t) { // The returned string length is always xdigits long, prefixed by 0 digits. // For example, IntToStringHex(0x23, 8) returns the string "00000023". inline std::string IntToStringHex(int i, int xdigits) { + FLATBUFFERS_ASSERT(i >= 0); // clang-format off #ifndef FLATBUFFERS_PREFER_PRINTF std::stringstream ss; @@ -170,28 +217,178 @@ inline std::string IntToStringHex(int i, int xdigits) { // clang-format on } -// Portable implementation of strtoll(). -inline int64_t StringToInt(const char *str, char **endptr = nullptr, - int base = 10) { +static inline double strtod_impl(const char *str, char **str_end) { + // Result of strtod (printf, etc) depends from current C-locale. + return strtod(str, str_end); +} + +static inline float strtof_impl(const char *str, char **str_end) { + // Use "strtof" for float and strtod for double to avoid double=>float + // rounding problems (see + // https://en.cppreference.com/w/cpp/numeric/fenv/feround) or problems with + // std::numeric_limits::is_iec559==false. Example: + // for (int mode : { FE_DOWNWARD, FE_TONEAREST, FE_TOWARDZERO, FE_UPWARD }){ + // const char *s = "-4e38"; + // std::fesetround(mode); + // std::cout << strtof(s, nullptr) << "; " << strtod(s, nullptr) << "; " + // << static_cast(strtod(s, nullptr)) << "\n"; + // } + // Gives: + // -inf; -4e+38; -inf + // -inf; -4e+38; -inf + // -inf; -4e+38; -3.40282e+38 + // -inf; -4e+38; -3.40282e+38 + // clang-format off - #ifdef _MSC_VER - return _strtoi64(str, endptr, base); + #ifdef FLATBUFFERS_HAS_NEW_STRTOD + return strtof(str, str_end); #else - return strtoll(str, endptr, base); - #endif + return static_cast(strtod_impl(str, str_end)); + #endif // !FLATBUFFERS_HAS_NEW_STRTOD // clang-format on } -// Portable implementation of strtoull(). -inline uint64_t StringToUInt(const char *str, char **endptr = nullptr, +// Adaptor for strtoull()/strtoll(). +// Flatbuffers accepts numbers with any count of leading zeros (-009 is -9), +// while strtoll with base=0 interprets first leading zero as octal prefix. +// In future, it is possible to add prefixed 0b0101. +// 1) Checks errno code for overflow condition (out of range). +// 2) If base <= 0, function try to detect base of number by prefix. +// +// Return value (like strtoull and strtoll, but reject partial result): +// - If successful, an integer value corresponding to the str is returned. +// - If full string conversion can't be performed, ​0​ is returned. +// - If the converted value falls out of range of corresponding return type, a +// range error occurs. In this case value MAX(T)/MIN(T) is returned. +template +inline T StringToInteger64Impl(const char *const str, const char **endptr, + const int base, const bool check_errno = true) { + static_assert(flatbuffers::is_same::value || + flatbuffers::is_same::value, + "Type T must be either int64_t or uint64_t"); + FLATBUFFERS_ASSERT(str && endptr); // endptr must be not null + if (base <= 0) { + auto s = str; + while (*s && !is_digit(*s)) s++; + if (s[0] == '0' && is_alpha_char(s[1], 'X')) + return StringToInteger64Impl(str, endptr, 16, check_errno); + // if a prefix not match, try base=10 + return StringToInteger64Impl(str, endptr, 10, check_errno); + } else { + if (check_errno) errno = 0; // clear thread-local errno + // calculate result + T result; + if (IsConstTrue(flatbuffers::is_same::value)) { + // clang-format off + #ifdef _MSC_VER + result = _strtoi64(str, const_cast(endptr), base); + #else + result = strtoll(str, const_cast(endptr), base); + #endif + // clang-format on + } else { // T is uint64_t + // clang-format off + #ifdef _MSC_VER + result = _strtoui64(str, const_cast(endptr), base); + #else + result = strtoull(str, const_cast(endptr), base); + #endif + // clang-format on + + // The strtoull accepts negative numbers: + // If the minus sign was part of the input sequence, the numeric value + // calculated from the sequence of digits is negated as if by unary minus + // in the result type, which applies unsigned integer wraparound rules. + // Fix this behaviour (except -0). + if ((**endptr == '\0') && (0 != result)) { + auto s = str; + while (*s && !is_digit(*s)) s++; + s = (s > str) ? (s - 1) : s; // step back to one symbol + if (*s == '-') { + // For unsigned types return max to distinguish from + // "no conversion can be performed". + result = flatbuffers::numeric_limits::max(); + // point to the start of string, like errno + *endptr = str; + } + } + } + // check for overflow + if (check_errno && errno) *endptr = str; // point it to start of input + // erase partial result, but save an overflow + if ((*endptr != str) && (**endptr != '\0')) result = 0; + return result; + } +} + +// Convert a string to an instance of T. +// Return value (matched with StringToInteger64Impl and strtod): +// - If successful, a numeric value corresponding to the str is returned. +// - If full string conversion can't be performed, ​0​ is returned. +// - If the converted value falls out of range of corresponding return type, a +// range error occurs. In this case value MAX(T)/MIN(T) is returned. +template inline bool StringToNumber(const char *s, T *val) { + FLATBUFFERS_ASSERT(s && val); + const char *end = nullptr; + // The errno check isn't needed. strtoll will return MAX/MIN on overlow. + const int64_t i = StringToInteger64Impl(s, &end, -1, false); + *val = static_cast(i); + const auto done = (s != end) && (*end == '\0'); + if (done) { + const int64_t max = flatbuffers::numeric_limits::max(); + const int64_t min = flatbuffers::numeric_limits::lowest(); + if (i > max) { + *val = static_cast(max); + return false; + } + if (i < min) { + // For unsigned types return max to distinguish from + // "no conversion can be performed" when 0 is returned. + *val = static_cast(flatbuffers::is_unsigned::value ? max : min); + return false; + } + } + return done; +} +template<> inline bool StringToNumber(const char *s, int64_t *val) { + const char *end = s; // request errno checking + *val = StringToInteger64Impl(s, &end, -1); + return (s != end) && (*end == '\0'); +} +template<> inline bool StringToNumber(const char *s, uint64_t *val) { + const char *end = s; // request errno checking + *val = StringToInteger64Impl(s, &end, -1); + return (s != end) && (*end == '\0'); +} + +template<> inline bool StringToNumber(const char *s, double *val) { + FLATBUFFERS_ASSERT(s && val); + char *end = nullptr; + *val = strtod_impl(s, &end); + auto done = (s != end) && (*end == '\0'); + if (!done) *val = 0; // erase partial result + return done; +} + +template<> inline bool StringToNumber(const char *s, float *val) { + FLATBUFFERS_ASSERT(s && val); + char *end = nullptr; + *val = strtof_impl(s, &end); + auto done = (s != end) && (*end == '\0'); + if (!done) *val = 0; // erase partial result + return done; +} + +inline int64_t StringToInt(const char *str, const char **endptr = nullptr, + int base = 10) { + const char *ep = nullptr; + return StringToInteger64Impl(str, endptr ? endptr : &ep, base); +} + +inline uint64_t StringToUInt(const char *str, const char **endptr = nullptr, int base = 10) { - // clang-format off - #ifdef _MSC_VER - return _strtoui64(str, endptr, base); - #else - return strtoull(str, endptr, base); - #endif - // clang-format on + const char *ep = nullptr; + return StringToInteger64Impl(str, endptr ? endptr : &ep, base); } typedef bool (*LoadFileFunction)(const char *filename, bool binary, diff --git a/src/idl_gen_cpp.cpp b/src/idl_gen_cpp.cpp index 17838132c..9e39339f2 100644 --- a/src/idl_gen_cpp.cpp +++ b/src/idl_gen_cpp.cpp @@ -145,7 +145,7 @@ class CppGenerator : public BaseGenerator { std::string guard = file_name_; // Remove any non-alpha-numeric characters that may appear in a filename. struct IsAlnum { - bool operator()(char c) const { return !isalnum(c); } + bool operator()(char c) const { return !is_alnum(c); } }; guard.erase(std::remove_if(guard.begin(), guard.end(), IsAlnum()), guard.end()); diff --git a/src/idl_gen_text.cpp b/src/idl_gen_text.cpp index 563f69013..c12948865 100644 --- a/src/idl_gen_text.cpp +++ b/src/idl_gen_text.cpp @@ -149,19 +149,23 @@ bool Print(const void *val, Type type, int indent, return true; } +template static T GetFieldDefault(const FieldDef &fd) { + T val; + auto check = StringToNumber(fd.value.constant.c_str(), &val); + (void)check; + FLATBUFFERS_ASSERT(check); + return val; +} + // Generate text for a scalar field. -template static bool GenField(const FieldDef &fd, - const Table *table, bool fixed, - const IDLOptions &opts, - int indent, - std::string *_text) { - return Print(fixed ? - reinterpret_cast(table)->GetField(fd.value.offset) : - table->GetField(fd.value.offset, - IsFloat(fd.value.type.base_type) ? - static_cast(strtod(fd.value.constant.c_str(), nullptr)) : - static_cast(StringToInt(fd.value.constant.c_str()))), - fd.value.type, indent, nullptr, opts, _text); +template +static bool GenField(const FieldDef &fd, const Table *table, bool fixed, + const IDLOptions &opts, int indent, std::string *_text) { + return Print( + fixed ? reinterpret_cast(table)->GetField( + fd.value.offset) + : table->GetField(fd.value.offset, GetFieldDefault(fd)), + fd.value.type, indent, nullptr, opts, _text); } static bool GenStruct(const StructDef &struct_def, const Table *table, diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp index e686f628f..fff320a71 100644 --- a/src/idl_parser.cpp +++ b/src/idl_parser.cpp @@ -1,4 +1,4 @@ -/* +/* * Copyright 2014 Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -119,57 +119,22 @@ CheckedError Parser::RecurseError() { NumToString(FLATBUFFERS_MAX_PARSING_DEPTH) + " reached"); } -inline std::string OutOfRangeErrorMsg(int64_t val, const std::string &op, - int64_t limit) { - const std::string cause = NumToString(val) + op + NumToString(limit); - return "constant does not fit (" + cause + ")"; +CheckedError Parser::InvalidNumber(const char *number, const std::string &msg) { + return Error("invalid number: \"" + std::string(number) + "\"" + msg); } - -// Ensure that integer values we parse fit inside the declared integer type. -CheckedError Parser::CheckInRange(int64_t val, int64_t min, int64_t max) { - if (val < min) - return Error(OutOfRangeErrorMsg(val, " < ", min)); - else if (val > max) - return Error(OutOfRangeErrorMsg(val, " > ", max)); - else - return NoError(); -} - // atot: templated version of atoi/atof: convert a string to an instance of T. template inline CheckedError atot(const char *s, Parser &parser, T *val) { - int64_t i = StringToInt(s); - const int64_t min = flatbuffers::numeric_limits::min(); - const int64_t max = flatbuffers::numeric_limits::max(); - *val = (T)i; // Assign this first to make ASAN happy. - return parser.CheckInRange(i, min, max); -} -template<> -inline CheckedError atot(const char *s, Parser &parser, - uint64_t *val) { - (void)parser; - *val = StringToUInt(s); - return NoError(); -} -template<> -inline CheckedError atot(const char *s, Parser &parser, bool *val) { - (void)parser; - *val = 0 != atoi(s); - return NoError(); -} -template<> -inline CheckedError atot(const char *s, Parser &parser, float *val) { - (void)parser; - *val = static_cast(strtod(s, nullptr)); - return NoError(); -} -template<> -inline CheckedError atot(const char *s, Parser &parser, double *val) { - (void)parser; - *val = strtod(s, nullptr); - return NoError(); -} + auto done = StringToNumber(s, val); + if (done) return NoError(); + return parser.InvalidNumber( + s, (0 == *val) + ? "" + : (", constant does not fit [" + + NumToString(flatbuffers::numeric_limits::lowest()) + "; " + + NumToString(flatbuffers::numeric_limits::max()) + "]")); +} template<> inline CheckedError atot>(const char *s, Parser &parser, Offset *val) { @@ -239,8 +204,9 @@ std::string Parser::TokenToStringId(int t) const { // Parses exactly nibbles worth of hex digits into a number, or error. CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) { + FLATBUFFERS_ASSERT(nibbles > 0); for (int i = 0; i < nibbles; i++) - if (!isxdigit(static_cast(cursor_[i]))) + if (!is_xdigit(cursor_[i])) return Error("escape code must be followed by " + NumToString(nibbles) + " hex digits"); std::string target(cursor_, cursor_ + nibbles); @@ -261,14 +227,15 @@ CheckedError Parser::SkipByteOrderMark() { return NoError(); } -bool IsIdentifierStart(char c) { - return isalpha(static_cast(c)) || c == '_'; +static inline bool IsIdentifierStart(char c) { + return is_alpha(c) || (c == '_'); } CheckedError Parser::Next() { doc_comment_.clear(); bool seen_newline = cursor_ == source_; attribute_.clear(); + attr_is_trivial_ascii_string_ = true; for (;;) { char c = *cursor_++; token_ = c; @@ -294,10 +261,6 @@ CheckedError Parser::Next() { case ':': case ';': case '=': return NoError(); - case '.': - if (!isdigit(static_cast(*cursor_))) - return NoError(); - return Error("floating point constant can\'t start with \".\""); case '\"': case '\'': { int unicode_high_surrogate = -1; @@ -306,6 +269,7 @@ CheckedError Parser::Next() { if (*cursor_ < ' ' && static_cast(*cursor_) >= 0) return Error("illegal character in string constant"); if (*cursor_ == '\\') { + attr_is_trivial_ascii_string_ = false; // has escape sequence cursor_++; if (unicode_high_surrogate != -1 && *cursor_ != 'u') { return Error( @@ -393,6 +357,9 @@ CheckedError Parser::Next() { return Error( "illegal Unicode sequence (unpaired high surrogate)"); } + // reset if non-printable + attr_is_trivial_ascii_string_ &= check_in_range(*cursor_, ' ', '~'); + attribute_ += *cursor_++; } } @@ -400,7 +367,8 @@ CheckedError Parser::Next() { return Error("illegal Unicode sequence (unpaired high surrogate)"); } cursor_++; - if (!opts.allow_non_utf8 && !ValidateUTF8(attribute_)) { + if (!attr_is_trivial_ascii_string_ && !opts.allow_non_utf8 && + !ValidateUTF8(attribute_)) { return Error("illegal UTF-8 sequence"); } token_ = kTokenStringConstant; @@ -430,55 +398,69 @@ CheckedError Parser::Next() { } // fall thru default: - if (IsIdentifierStart(c)) { + const auto has_sign = (c == '+') || (c == '-'); + // '-'/'+' and following identifier - can be a predefined constant like: + // NAN, INF, PI, etc. + if (IsIdentifierStart(c) || (has_sign && IsIdentifierStart(*cursor_))) { // Collect all chars of an identifier: const char *start = cursor_ - 1; - while (isalnum(static_cast(*cursor_)) || *cursor_ == '_') - cursor_++; + while (IsIdentifierStart(*cursor_) || is_digit(*cursor_)) cursor_++; attribute_.append(start, cursor_); - token_ = kTokenIdentifier; + token_ = has_sign ? kTokenStringConstant : kTokenIdentifier; return NoError(); - } else if (isdigit(static_cast(c)) || c == '-') { - const char *start = cursor_ - 1; - if (c == '-' && *cursor_ == '0' && - (cursor_[1] == 'x' || cursor_[1] == 'X')) { - ++start; - ++cursor_; - attribute_.append(&c, &c + 1); - c = '0'; + } + + auto dot_lvl = (c == '.') ? 0 : 1; // dot_lvl==0 <=> exactly one '.' seen + if (!dot_lvl && !is_digit(*cursor_)) return NoError(); // enum? + // Parser accepts hexadecimal-floating-literal (see C++ 5.13.4). + if (is_digit(c) || has_sign || !dot_lvl) { + const auto start = cursor_ - 1; + auto start_digits = !is_digit(c) ? cursor_ : cursor_ - 1; + if (!is_digit(c) && is_digit(*cursor_)){ + start_digits = cursor_; // see digit in cursor_ position + c = *cursor_++; } - if (c == '0' && (*cursor_ == 'x' || *cursor_ == 'X')) { - cursor_++; - while (isxdigit(static_cast(*cursor_))) cursor_++; - attribute_.append(start + 2, cursor_); - attribute_ = NumToString(static_cast( - StringToUInt(attribute_.c_str(), nullptr, 16))); - token_ = kTokenIntegerConstant; - return NoError(); - } - while (isdigit(static_cast(*cursor_))) cursor_++; - if (*cursor_ == '.' || *cursor_ == 'e' || *cursor_ == 'E') { - if (*cursor_ == '.') { - cursor_++; - while (isdigit(static_cast(*cursor_))) cursor_++; + // hex-float can't begind with '.' + auto use_hex = dot_lvl && (c == '0') && is_alpha_char(*cursor_, 'X'); + if (use_hex) start_digits = ++cursor_; // '0x' is the prefix, skip it + // Read an integer number or mantisa of float-point number. + do { + if (use_hex) { + while (is_xdigit(*cursor_)) cursor_++; + } else { + while (is_digit(*cursor_)) cursor_++; } - // See if this float has a scientific notation suffix. Both JSON - // and C++ (through strtod() we use) have the same format: - if (*cursor_ == 'e' || *cursor_ == 'E') { + } while ((*cursor_ == '.') && (++cursor_) && (--dot_lvl >= 0)); + // Exponent of float-point number. + if ((dot_lvl >= 0) && (cursor_ > start_digits)) { + // The exponent suffix of hexadecimal float number is mandatory. + if (use_hex && !dot_lvl) start_digits = cursor_; + if ((use_hex && is_alpha_char(*cursor_, 'P')) || + is_alpha_char(*cursor_, 'E')) { + dot_lvl = 0; // Emulate dot to signal about float-point number. cursor_++; if (*cursor_ == '+' || *cursor_ == '-') cursor_++; - while (isdigit(static_cast(*cursor_))) cursor_++; + start_digits = cursor_; // the exponent-part has to have digits + // Exponent is decimal integer number + while (is_digit(*cursor_)) cursor_++; + if (*cursor_ == '.') { + cursor_++; // If see a dot treat it as part of invalid number. + dot_lvl = -1; // Fall thru to Error(). + } } - token_ = kTokenFloatConstant; - } else { - token_ = kTokenIntegerConstant; } - attribute_.append(start, cursor_); - return NoError(); + // Finalize. + if ((dot_lvl >= 0) && (cursor_ > start_digits)) { + attribute_.append(start, cursor_); + token_ = dot_lvl ? kTokenIntegerConstant : kTokenFloatConstant; + return NoError(); + } else { + return Error("invalid number: " + std::string(start, cursor_)); + } } std::string ch; ch = c; - if (c < ' ' || c > '~') ch = "code: " + NumToString(c); + if (false == check_in_range(c, ' ', '~')) ch = "code: " + NumToString(c); return Error("illegal character: " + ch); } } @@ -674,7 +656,7 @@ CheckedError Parser::ParseField(StructDef &struct_def) { (struct_def.fixed && field->value.constant != "0")) return Error( "default values currently only supported for scalars in tables"); - ECHECK(ParseSingleValue(&field->name, field->value)); + ECHECK(ParseSingleValue(&field->name, field->value, true)); } if (type.enum_def && !type.enum_def->is_union && @@ -684,9 +666,20 @@ CheckedError Parser::ParseField(StructDef &struct_def) { return Error("default value of " + field->value.constant + " for field " + name + " is not part of enum " + type.enum_def->name); } + // Append .0 if the value has not it (skip hex and scientific floats). + // This suffix needed for generated C++ code. if (IsFloat(type.base_type)) { - if (!strpbrk(field->value.constant.c_str(), ".eE")) + auto &text = field->value.constant; + FLATBUFFERS_ASSERT(false == text.empty()); + auto s = text.c_str(); + while(*s == ' ') s++; + if (*s == '-' || *s == '+') s++; + // 1) A float constants (nan, inf, pi, etc) is a kind of identifier. + // 2) A float number needn't ".0" at the end if it has exponent. + if ((false == IsIdentifierStart(*s)) && + (std::string::npos == field->value.constant.find_first_of(".eEpP"))) { field->value.constant += ".0"; + } } if (type.enum_def && IsScalar(type.base_type) && !struct_def.fixed && @@ -915,11 +908,13 @@ CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field, (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) { ECHECK(ParseHash(val, field)); } else { - ECHECK(ParseSingleValue(field ? &field->name : nullptr, val)); + ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false)); } break; } - default: ECHECK(ParseSingleValue(field ? &field->name : nullptr, val)); break; + default: + ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false)); + break; } return NoError(); } @@ -994,7 +989,8 @@ CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value, ECHECK(parser->SkipAnyJsonValue()); } } else { - if (parser->IsIdent("null")) { + if (parser->IsIdent("null") && + !IsScalar(field->value.type.base_type)) { ECHECK(parser->Next()); // Ignore this field. } else { Value val = field->value; @@ -1252,7 +1248,7 @@ CheckedError Parser::ParseMetaData(SymbolTable *attributes) { attributes->Add(name, e); if (Is(':')) { NEXT(); - ECHECK(ParseSingleValue(&name, *e)); + ECHECK(ParseSingleValue(&name, *e, true)); } if (Is(')')) { NEXT(); @@ -1264,23 +1260,40 @@ CheckedError Parser::ParseMetaData(SymbolTable *attributes) { return NoError(); } -CheckedError Parser::TryTypedValue(const std::string *name, int dtoken, bool check, Value &e, - BaseType req, bool *destmatch) { +CheckedError Parser::TryTypedValue(const std::string *name, int dtoken, + bool check, Value &e, BaseType req, + bool *destmatch) { bool match = dtoken == token_; if (match) { + FLATBUFFERS_ASSERT(*destmatch == false); *destmatch = true; e.constant = attribute_; + // Check token match if (!check) { if (e.type.base_type == BASE_TYPE_NONE) { e.type.base_type = req; } else { - return Error(std::string("type mismatch: expecting: ") + - kTypeNames[e.type.base_type] + - ", found: " + kTypeNames[req] + - ", name: " + (name ? *name : "") + - ", value: " + e.constant); + return Error( + std::string("type mismatch: expecting: ") + + kTypeNames[e.type.base_type] + ", found: " + kTypeNames[req] + + ", name: " + (name ? *name : "") + ", value: " + e.constant); } } + // The exponent suffix of hexadecimal float-point number is mandatory. + // A hex-integer constant is forbidden as an initializer of float number. + if ((kTokenFloatConstant != dtoken) && IsFloat(e.type.base_type)) { + const auto &s = e.constant; + const auto k = s.find_first_of("0123456789."); + if ((std::string::npos != k) && (s.length() > (k + 1)) && + (s.at(k) == '0' && is_alpha_char(s.at(k + 1), 'X')) && + (std::string::npos == s.find_first_of("pP", k + 2))) { + return Error( + "invalid number, the exponent suffix of hexadecimal " + "floating-point literals is mandatory: \"" + + s + "\""); + } + } + NEXT(); } return NoError(); @@ -1375,20 +1388,29 @@ CheckedError Parser::TokenError() { return Error("cannot parse value starting with: " + TokenToStringId(token_)); } -CheckedError Parser::ParseSingleValue(const std::string *name, Value &e) { +CheckedError Parser::ParseSingleValue(const std::string *name, Value &e, + bool check_now) { // First see if this could be a conversion function: if (token_ == kTokenIdentifier && *cursor_ == '(') { - auto functionname = attribute_; + // todo: Extract processing of conversion functions to ParseFunction. + const auto functionname = attribute_; + if (!IsFloat(e.type.base_type)) { + return Error(functionname + ": type of argument mismatch, expecting: " + + kTypeNames[BASE_TYPE_DOUBLE] + + ", found: " + kTypeNames[e.type.base_type] + + ", name: " + (name ? *name : "") + ", value: " + e.constant); + } NEXT(); EXPECT('('); - ECHECK(ParseSingleValue(name, e)); + ECHECK(Recurse([&]() { return ParseSingleValue(name, e, false); })); EXPECT(')'); + // calculate with double precision + double x, y = 0.0; + ECHECK(atot(e.constant.c_str(), *this, &x)); + auto func_match = false; // clang-format off #define FLATBUFFERS_FN_DOUBLE(name, op) \ - if (functionname == name) { \ - auto x = strtod(e.constant.c_str(), nullptr); \ - e.constant = NumToString(op); \ - } + if (!func_match && functionname == name) { y = op; func_match = true; } FLATBUFFERS_FN_DOUBLE("deg", x / kPi * 180); FLATBUFFERS_FN_DOUBLE("rad", x * kPi / 180); FLATBUFFERS_FN_DOUBLE("sin", sin(x)); @@ -1400,47 +1422,108 @@ CheckedError Parser::ParseSingleValue(const std::string *name, Value &e) { // TODO(wvo): add more useful conversion functions here. #undef FLATBUFFERS_FN_DOUBLE // clang-format on - // Then check if this could be a string/identifier enum value: - } else if (e.type.base_type != BASE_TYPE_STRING && - e.type.base_type != BASE_TYPE_BOOL && - e.type.base_type != BASE_TYPE_NONE && - (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) { - if (IsIdentifierStart(attribute_[0])) { // Enum value. + if (true != func_match) { + return Error(std::string("Unknown conversion function: ") + functionname + + ", field name: " + (name ? *name : "") + + ", value: " + e.constant); + } + e.constant = NumToString(y); + return NoError(); + } + + auto match = false; + // clang-format off + #define TRY_ECHECK(force, dtoken, check, req) \ + if (!match && ((check) || IsConstTrue(force))) \ + ECHECK(TryTypedValue(name, dtoken, check, e, req, &match)) + // clang-format on + + if (token_ == kTokenStringConstant || token_ == kTokenIdentifier) { + const auto kTokenStringOrIdent = token_; + // The string type is a most probable type, check it first. + TRY_ECHECK(false, kTokenStringConstant, + e.type.base_type == BASE_TYPE_STRING, BASE_TYPE_STRING); + + // avoid escaped and non-ascii in the string + if ((token_ == kTokenStringConstant) && IsScalar(e.type.base_type) && + !attr_is_trivial_ascii_string_) { + return Error( + std::string("type mismatch or invalid value, an initializer of " + "non-string field must be trivial ASCII string: type: ") + + kTypeNames[e.type.base_type] + ", name: " + (name ? *name : "") + + ", value: " + attribute_); + } + + // A boolean as true/false. Boolean as Integer check below. + if (!match && IsBool(e.type.base_type)) { + auto is_true = attribute_ == "true"; + if (is_true || attribute_ == "false") { + attribute_ = is_true ? "1" : "0"; + // accepts both kTokenStringConstant and kTokenIdentifier + TRY_ECHECK(false, kTokenStringOrIdent, IsBool(e.type.base_type), + BASE_TYPE_BOOL); + } + } + // Check if this could be a string/identifier enum value. + // Enum can have only true integer base type. + if (!match && IsInteger(e.type.base_type) && !IsBool(e.type.base_type) && + IsIdentifierStart(*attribute_.c_str())) { int64_t val; ECHECK(ParseEnumFromString(e.type, &val)); e.constant = NumToString(val); NEXT(); - } else { // Numeric constant in string. - if (IsInteger(e.type.base_type)) { - char *end; - e.constant = NumToString(StringToInt(attribute_.c_str(), &end)); - if (*end) return Error("invalid integer: " + attribute_); - } else if (IsFloat(e.type.base_type)) { - char *end; - e.constant = NumToString(strtod(attribute_.c_str(), &end)); - if (*end) return Error("invalid float: " + attribute_); - } else { - FLATBUFFERS_ASSERT(0); // Shouldn't happen, we covered all types. - e.constant = "0"; - } - NEXT(); + match = true; } + // float/integer number in string + if ((token_ == kTokenStringConstant) && IsScalar(e.type.base_type)) { + // remove trailing whitespaces from attribute_ + auto last = attribute_.find_last_not_of(' '); + if (std::string::npos != last) // has non-whitespace + attribute_.resize(last + 1); + } + // Float numbers or nan, inf, pi, etc. + TRY_ECHECK(false, kTokenStringOrIdent, IsFloat(e.type.base_type), + BASE_TYPE_FLOAT); + // An integer constant in string. + TRY_ECHECK(false, kTokenStringOrIdent, IsInteger(e.type.base_type), + BASE_TYPE_INT); + // Unknown tokens will be interpreted as string type. + TRY_ECHECK(true, kTokenStringConstant, e.type.base_type == BASE_TYPE_STRING, + BASE_TYPE_STRING); } else { - bool match = false; - ECHECK(TryTypedValue(name, kTokenIntegerConstant, IsScalar(e.type.base_type), e, - BASE_TYPE_INT, &match)); - ECHECK(TryTypedValue(name, kTokenFloatConstant, IsFloat(e.type.base_type), e, - BASE_TYPE_FLOAT, &match)); - ECHECK(TryTypedValue(name, kTokenStringConstant, - e.type.base_type == BASE_TYPE_STRING, e, - BASE_TYPE_STRING, &match)); - auto istrue = IsIdent("true"); - if (istrue || IsIdent("false")) { - attribute_ = NumToString(istrue); - ECHECK(TryTypedValue(name, kTokenIdentifier, IsBool(e.type.base_type), e, - BASE_TYPE_BOOL, &match)); + // Try a float number. + TRY_ECHECK(false, kTokenFloatConstant, IsFloat(e.type.base_type), + BASE_TYPE_FLOAT); + // Integer token can init any scalar (integer of float). + TRY_ECHECK(true, kTokenIntegerConstant, IsScalar(e.type.base_type), + BASE_TYPE_INT); + } + #undef TRY_ECHECK + + if (!match) return TokenError(); + + // The check_now flag must be true when parse a fbs-schema. + // This flag forces to check default scalar values or metadata of field. + // For JSON parser the flag should be false. + // If it is set for JSON each value will be checked twice (see ParseTable). + if (check_now && IsScalar(e.type.base_type)) { + // "re-pack" an integer scalar to remove any ambiguities like leading zeros + // which can be treated as octal-literal (idl_gen_cpp/GenDefaultConstant). + const auto repack = IsInteger(e.type.base_type); + switch (e.type.base_type) { + // clang-format off + #define FLATBUFFERS_TD(ENUM, IDLTYPE, \ + CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, RTYPE) \ + case BASE_TYPE_ ## ENUM: {\ + CTYPE val; \ + ECHECK(atot(e.constant.c_str(), *this, &val)); \ + if(repack) e.constant = NumToString(val); \ + break; } + FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD); + #undef FLATBUFFERS_TD + default: break; + // clang-format on } - if (!match) return TokenError(); } return NoError(); } @@ -1565,7 +1648,7 @@ CheckedError Parser::ParseEnum(bool is_union, EnumDef **dest) { } if (Is('=')) { NEXT(); - ev.value = StringToInt(attribute_.c_str()); + ECHECK(atot(attribute_.c_str(), *this, &ev.value)); EXPECT(kTokenIntegerConstant); if (!opts.proto_mode && prevsize && enum_def->vals.vec[prevsize - 1]->value >= ev.value) @@ -2451,6 +2534,9 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths, ? file_identifier_.c_str() : nullptr); } + // Check that JSON file doesn't contain more objects or IDL directives. + // Comments after JSON are allowed. + EXPECT(kTokenEof); } else if (IsIdent("enum")) { ECHECK(ParseEnum(false, nullptr)); } else if (IsIdent("union")) { @@ -2606,7 +2692,9 @@ Offset FieldDef::Serialize(FlatBufferBuilder *builder, return reflection::CreateField( *builder, builder->CreateString(name), value.type.Serialize(builder), id, value.offset, + // Is uint64>max(int64) tested? IsInteger(value.type.base_type) ? StringToInt(value.constant.c_str()) : 0, + // result may be platform-dependent if underlying is float (not double) IsFloat(value.type.base_type) ? strtod(value.constant.c_str(), nullptr) : 0.0, deprecated, required, key, SerializeAttributes(builder, parser), diff --git a/tests/test.cpp b/tests/test.cpp index a02547655..e860e09dd 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -1,4 +1,4 @@ -/* +/* * Copyright 2014 Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - +#include #include "flatbuffers/flatbuffers.h" #include "flatbuffers/idl.h" #include "flatbuffers/minireflect.h" @@ -35,11 +35,6 @@ #include "union_vector/union_vector_generated.h" #include "test_assert.h" -// clang-format off -#ifndef FLATBUFFERS_CPP98_STL - #include -#endif - #include "flatbuffers/flexbuffers.h" using namespace MyGame::Example; @@ -1200,7 +1195,6 @@ void TestError_(const char *src, const char *error_substr, const char *file, void ErrorTest() { // In order they appear in idl_parser.cpp TestError("table X { Y:byte; } root_type X; { Y: 999 }", "does not fit"); - TestError(".0", "floating point"); TestError("\"\0", "illegal"); TestError("\"\\q", "escape code"); TestError("table ///", "documentation"); @@ -1238,25 +1232,42 @@ void ErrorTest() { TestError("table X { Y:int; } table X {", "datatype already"); TestError("struct X (force_align: 7) { Y:int; }", "force_align"); TestError("{}", "no root"); - TestError("table X { Y:byte; } root_type X; { Y:1 } { Y:1 }", "one json"); + TestError("table X { Y:byte; } root_type X; { Y:1 } { Y:1 }", "end of file"); + TestError("table X { Y:byte; } root_type X; { Y:1 } table Y{ Z:int }", + "end of file"); TestError("root_type X;", "unknown root"); TestError("struct X { Y:int; } root_type X;", "a table"); TestError("union X { Y }", "referenced"); TestError("union Z { X } struct X { Y:int; }", "only tables"); TestError("table X { Y:[int]; YLength:int; }", "clash"); TestError("table X { Y:byte; } root_type X; { Y:1, Y:2 }", "more than once"); + // float to integer conversion is forbidden + TestError("table X { Y:int; } root_type X; { Y:1.0 }", "float"); + TestError("table X { Y:bool; } root_type X; { Y:1.0 }", "float"); } template T TestValue(const char *json, const char *type_name) { flatbuffers::Parser parser; - + parser.builder_.ForceDefaults(true); // return defaults + auto check_default = json ? false : true; + if (check_default) { parser.opts.output_default_scalars_in_json = true; } // Simple schema. - TEST_EQ(parser.Parse(std::string("table X { Y:" + std::string(type_name) + - "; } root_type X;") - .c_str()), - true); + std::string schema = + "table X { Y:" + std::string(type_name) + "; } root_type X;"; + TEST_EQ(parser.Parse(schema.c_str()), true); + + auto done = parser.Parse(check_default ? "{}" : json); + TEST_EQ_STR(parser.error_.c_str(), ""); + TEST_EQ(done, true); + + // Check with print. + std::string print_back; + parser.opts.indent_step = -1; + TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), &print_back), + true); + // restore value from its default + if (check_default) { TEST_EQ(parser.Parse(print_back.c_str()), true); } - TEST_EQ(parser.Parse(json), true); auto root = flatbuffers::GetRoot( parser.builder_.GetBufferPointer()); return root->GetField(flatbuffers::FieldIndexToOffset(0), 0); @@ -1270,17 +1281,44 @@ void ValueTest() { TEST_EQ(FloatCompare(TestValue("{ Y:0.0314159e+2 }", "float"), (float)3.14159), true); + // number in string + TEST_EQ(FloatCompare(TestValue("{ Y:\"0.0314159e+2\" }", "float"), + (float)3.14159), + true); // Test conversion functions. TEST_EQ(FloatCompare(TestValue("{ Y:cos(rad(180)) }", "float"), -1), true); + // int embedded to string + TEST_EQ(TestValue("{ Y:\"-876\" }", "int=-123"), -876); + TEST_EQ(TestValue("{ Y:\"876\" }", "int=-123"), 876); + // Test negative hex constant. - TEST_EQ(TestValue("{ Y:-0x80 }", "int"), -128); + TEST_EQ(TestValue("{ Y:-0x8ea0 }", "int=-0x8ea0"), -36512); + TEST_EQ(TestValue(nullptr, "int=-0x8ea0"), -36512); + + // positive hex constant + TEST_EQ(TestValue("{ Y:0x1abcdef }", "int=0x1"), 0x1abcdef); + // with optional '+' sign + TEST_EQ(TestValue("{ Y:+0x1abcdef }", "int=+0x1"), 0x1abcdef); + // hex in string + TEST_EQ(TestValue("{ Y:\"0x1abcdef\" }", "int=+0x1"), 0x1abcdef); // Make sure we do unsigned 64bit correctly. TEST_EQ(TestValue("{ Y:12335089644688340133 }", "ulong"), 12335089644688340133ULL); + + // bool in string + TEST_EQ(TestValue("{ Y:\"false\" }", "bool=true"), false); + TEST_EQ(TestValue("{ Y:\"true\" }", "bool=\"true\""), true); + TEST_EQ(TestValue("{ Y:'false' }", "bool=true"), false); + TEST_EQ(TestValue("{ Y:'true' }", "bool=\"true\""), true); + + // check comments before and after json object + TEST_EQ(TestValue("/*before*/ { Y:1 } /*after*/", "int"), 1); + TEST_EQ(TestValue("//before \n { Y:1 } //after", "int"), 1); + } void NestedListTest() { @@ -1337,6 +1375,47 @@ void IntegerOutOfRangeTest() { "constant does not fit"); TestError("table T { F:uint; } root_type T; { F:-1 }", "constant does not fit"); + // Check fixed width aliases + TestError("table X { Y:uint8; } root_type X; { Y: -1 }", "does not fit"); + TestError("table X { Y:uint8; } root_type X; { Y: 256 }", "does not fit"); + TestError("table X { Y:uint16; } root_type X; { Y: -1 }", "does not fit"); + TestError("table X { Y:uint16; } root_type X; { Y: 65536 }", "does not fit"); + TestError("table X { Y:uint32; } root_type X; { Y: -1 }", ""); + TestError("table X { Y:uint32; } root_type X; { Y: 4294967296 }", + "does not fit"); + TestError("table X { Y:uint64; } root_type X; { Y: -1 }", ""); + TestError("table X { Y:uint64; } root_type X; { Y: -9223372036854775809 }", + "does not fit"); + TestError("table X { Y:uint64; } root_type X; { Y: 18446744073709551616 }", + "does not fit"); + + TestError("table X { Y:int8; } root_type X; { Y: -129 }", "does not fit"); + TestError("table X { Y:int8; } root_type X; { Y: 128 }", "does not fit"); + TestError("table X { Y:int16; } root_type X; { Y: -32769 }", "does not fit"); + TestError("table X { Y:int16; } root_type X; { Y: 32768 }", "does not fit"); + TestError("table X { Y:int32; } root_type X; { Y: -2147483649 }", ""); + TestError("table X { Y:int32; } root_type X; { Y: 2147483648 }", + "does not fit"); + TestError("table X { Y:int64; } root_type X; { Y: -9223372036854775809 }", + "does not fit"); + TestError("table X { Y:int64; } root_type X; { Y: 9223372036854775808 }", + "does not fit"); + // check out-of-int64 as int8 + TestError("table X { Y:int8; } root_type X; { Y: -9223372036854775809 }", + "does not fit"); + TestError("table X { Y:int8; } root_type X; { Y: 9223372036854775808 }", + "does not fit"); + + // Check default values + TestError("table X { Y:int64=-9223372036854775809; } root_type X; {}", + "does not fit"); + TestError("table X { Y:int64= 9223372036854775808; } root_type X; {}", + "does not fit"); + TestError("table X { Y:uint64; } root_type X; { Y: -1 }", ""); + TestError("table X { Y:uint64=-9223372036854775809; } root_type X; {}", + "does not fit"); + TestError("table X { Y:uint64= 18446744073709551616; } root_type X; {}", + "does not fit"); } void IntegerBoundaryTest() { @@ -1359,6 +1438,207 @@ void IntegerBoundaryTest() { TEST_EQ(TestValue("{ Y:18446744073709551615 }", "ulong"), 18446744073709551615U); TEST_EQ(TestValue("{ Y:0 }", "ulong"), 0); + TEST_EQ(TestValue("{ Y: 18446744073709551615 }", "uint64"), + 18446744073709551615ULL); + // check that the default works + TEST_EQ(TestValue(nullptr, "uint64 = 18446744073709551615"), + 18446744073709551615ULL); +} + +void ValidFloatTest() { + const auto infinityf = flatbuffers::numeric_limits::infinity(); + const auto infinityd = flatbuffers::numeric_limits::infinity(); + // check rounding to infinity + TEST_EQ(TestValue("{ Y:+3.4029e+38 }", "float"), +infinityf); + TEST_EQ(TestValue("{ Y:-3.4029e+38 }", "float"), -infinityf); + TEST_EQ(TestValue("{ Y:+1.7977e+308 }", "double"), +infinityd); + TEST_EQ(TestValue("{ Y:-1.7977e+308 }", "double"), -infinityd); + + TEST_EQ(FloatCompare(TestValue("{ Y:0.0314159e+2 }", "float"), + (float)3.14159), + true); + // float in string + TEST_EQ(FloatCompare(TestValue("{ Y:\" 0.0314159e+2 \" }", "float"), + (float)3.14159), + true); + + TEST_EQ(TestValue("{ Y:1 }", "float"), 1.0f); + TEST_EQ(TestValue("{ Y:1.0 }", "float"), 1.0f); + TEST_EQ(TestValue("{ Y:1. }", "float"), 1.0f); + TEST_EQ(TestValue("{ Y:+1. }", "float"), 1.0f); + TEST_EQ(TestValue("{ Y:-1. }", "float"), -1.0f); + TEST_EQ(TestValue("{ Y:1.e0 }", "float"), 1.0f); + TEST_EQ(TestValue("{ Y:1.e+0 }", "float"), 1.0f); + TEST_EQ(TestValue("{ Y:1.e-0 }", "float"), 1.0f); + TEST_EQ(TestValue("{ Y:0.125 }", "float"), 0.125f); + TEST_EQ(TestValue("{ Y:.125 }", "float"), 0.125f); + TEST_EQ(TestValue("{ Y:-.125 }", "float"), -0.125f); + TEST_EQ(TestValue("{ Y:+.125 }", "float"), +0.125f); + TEST_EQ(TestValue("{ Y:5 }", "float"), 5.0f); + TEST_EQ(TestValue("{ Y:\"5\" }", "float"), 5.0f); + + #if defined(FLATBUFFERS_HAS_NEW_STRTOD) + // Old MSVC versions may have problem with this check. + // https://www.exploringbinary.com/visual-c-plus-plus-strtod-still-broken/ + TEST_EQ(TestValue("{ Y:6.9294956446009195e15 }", "double"), + 6929495644600920); + // check nan's + TEST_EQ(std::isnan(TestValue("{ Y:nan }", "double")), true); + TEST_EQ(std::isnan(TestValue("{ Y:nan }", "float")), true); + TEST_EQ(std::isnan(TestValue("{ Y:\"nan\" }", "float")), true); + TEST_EQ(std::isnan(TestValue("{ Y:+nan }", "float")), true); + TEST_EQ(std::isnan(TestValue("{ Y:-nan }", "float")), true); + TEST_EQ(std::isnan(TestValue(nullptr, "float=nan")), true); + TEST_EQ(std::isnan(TestValue(nullptr, "float=-nan")), true); + // check inf + TEST_EQ(TestValue("{ Y:inf }", "float"), infinityf); + TEST_EQ(TestValue("{ Y:\"inf\" }", "float"), infinityf); + TEST_EQ(TestValue("{ Y:+inf }", "float"), infinityf); + TEST_EQ(TestValue("{ Y:-inf }", "float"), -infinityf); + TEST_EQ(TestValue(nullptr, "float=inf"), infinityf); + TEST_EQ(TestValue(nullptr, "float=-inf"), -infinityf); + TestValue( + "{ Y : [0.2, .2, 1.0, -1.0, -2., 2., 1e0, -1e0, 1.0e0, -1.0e0, -3.e2, " + "3.0e2] }", + "[double]"); + TestValue( + "{ Y : [0.2, .2, 1.0, -1.0, -2., 2., 1e0, -1e0, 1.0e0, -1.0e0, -3.e2, " + "3.0e2] }", + "[float]"); + + // Test binary format of float point. + // https://en.cppreference.com/w/cpp/language/floating_literal + // 0x11.12p-1 = (1*16^1 + 2*16^0 + 3*16^-1 + 4*16^-2) * 2^-1 = + TEST_EQ(TestValue("{ Y:0x12.34p-1 }", "double"), 9.1015625); + // hex fraction 1.2 (decimal 1.125) scaled by 2^3, that is 9.0 + TEST_EQ(TestValue("{ Y:-0x0.2p0 }", "float"), -0.125f); + TEST_EQ(TestValue("{ Y:-0x.2p1 }", "float"), -0.25f); + TEST_EQ(TestValue("{ Y:0x1.2p3 }", "float"), 9.0f); + TEST_EQ(TestValue("{ Y:0x10.1p0 }", "float"), 16.0625f); + TEST_EQ(TestValue("{ Y:0x1.2p3 }", "double"), 9.0); + TEST_EQ(TestValue("{ Y:0x10.1p0 }", "double"), 16.0625); + TEST_EQ(TestValue("{ Y:0xC.68p+2 }", "double"), 49.625); + TestValue("{ Y : [0x20.4ep1, +0x20.4ep1, -0x20.4ep1] }", "[double]"); + TestValue("{ Y : [0x20.4ep1, +0x20.4ep1, -0x20.4ep1] }", "[float]"); + +#else // FLATBUFFERS_HAS_NEW_STRTOD + TEST_OUTPUT_LINE("FLATBUFFERS_HAS_NEW_STRTOD tests skipped"); +#endif // FLATBUFFERS_HAS_NEW_STRTOD +} + +void InvalidFloatTest() { + auto invalid_msg = "invalid number"; + auto comma_msg = "expecting: ,"; + TestError("table T { F:float; } root_type T; { F:1,0 }", ""); + TestError("table T { F:float; } root_type T; { F:. }", ""); + TestError("table T { F:float; } root_type T; { F:- }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:+ }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:-. }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:+. }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:.e }", ""); + TestError("table T { F:float; } root_type T; { F:-e }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:+e }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:-.e }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:+.e }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:-e1 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:+e1 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.0e+ }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.0e- }", invalid_msg); + // exponent pP is mandatory for hex-float + TestError("table T { F:float; } root_type T; { F:0x0 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:-0x. }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x. }", invalid_msg); + // eE not exponent in hex-float! + TestError("table T { F:float; } root_type T; { F:0x0.0e+ }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0e- }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0p }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0p+ }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0p- }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0pa1 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0e+ }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0e- }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0e+0 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0e-0 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0ep+ }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0x0.0ep- }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.2.3 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.2.e3 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.2e.3 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.2e0.3 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.2e3. }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.2e3.0 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:+-1.0 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.0e+-1 }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:\"1.0e+-1\" }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:1.e0e }", comma_msg); + TestError("table T { F:float; } root_type T; { F:0x1.p0e }", comma_msg); + TestError("table T { F:float; } root_type T; { F:\" 0x10 \" }", invalid_msg); + // floats in string + TestError("table T { F:float; } root_type T; { F:\"1,2.\" }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:\"1.2e3.\" }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:\"0x1.p0e\" }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:\"0x1.0\" }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:\" 0x1.0\" }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:\"+ 0\" }", invalid_msg); + // disable escapes for "number-in-string" + TestError("table T { F:float; } root_type T; { F:\"\\f1.2e3.\" }", "invalid"); + TestError("table T { F:float; } root_type T; { F:\"\\t1.2e3.\" }", "invalid"); + TestError("table T { F:float; } root_type T; { F:\"\\n1.2e3.\" }", "invalid"); + TestError("table T { F:float; } root_type T; { F:\"\\r1.2e3.\" }", "invalid"); + TestError("table T { F:float; } root_type T; { F:\"4\\x005\" }", "invalid"); + TestError("table T { F:float; } root_type T; { F:\"\'12\'\" }", invalid_msg); + // null is not a number constant! + TestError("table T { F:float; } root_type T; { F:\"null\" }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:null }", invalid_msg); +} + +template +void NumericUtilsTestInteger(const char *lower, const char *upper) { + T x; + TEST_EQ(flatbuffers::StringToNumber("1q", &x), false); + TEST_EQ(x, 0); + TEST_EQ(flatbuffers::StringToNumber(upper, &x), false); + TEST_EQ(x, flatbuffers::numeric_limits::max()); + TEST_EQ(flatbuffers::StringToNumber(lower, &x), false); + auto expval = flatbuffers::is_unsigned::value + ? flatbuffers::numeric_limits::max() + : flatbuffers::numeric_limits::lowest(); + TEST_EQ(x, expval); +} + +template +void NumericUtilsTestFloat(const char *lower, const char *upper) { + T f; + TEST_EQ(flatbuffers::StringToNumber("1q", &f), false); + TEST_EQ(f, 0); + TEST_EQ(flatbuffers::StringToNumber(upper, &f), true); + TEST_EQ(f, +flatbuffers::numeric_limits::infinity()); + TEST_EQ(flatbuffers::StringToNumber(lower, &f), true); + TEST_EQ(f, -flatbuffers::numeric_limits::infinity()); +} + +void NumericUtilsTest() { + NumericUtilsTestInteger("-1", "18446744073709551616"); + NumericUtilsTestInteger("-1", "256"); + NumericUtilsTestInteger("-9223372036854775809", + "9223372036854775808"); + NumericUtilsTestInteger("-129", "128"); + NumericUtilsTestFloat("-3.4029e+38", "+3.4029e+38"); + NumericUtilsTestFloat("-1.7977e+308", "+1.7977e+308"); +} + +void IsAsciiUtilsTest() { + char c = -128; + for (int cnt = 0; cnt < 256; cnt++) { + auto alpha = (('a' <= c) && (c <= 'z')) || (('A' <= c) && (c <= 'Z')); + auto dec = (('0' <= c) && (c <= '9')); + auto hex = (('a' <= c) && (c <= 'f')) || (('A' <= c) && (c <= 'F')); + TEST_EQ(flatbuffers::is_alpha(c), alpha); + TEST_EQ(flatbuffers::is_alnum(c), alpha || dec); + TEST_EQ(flatbuffers::is_digit(c), dec); + TEST_EQ(flatbuffers::is_xdigit(c), dec || hex); + c += 1; + } } void UnicodeTest() { @@ -1572,6 +1852,15 @@ void InvalidUTF8Test() { // U+10400 "encoded" as U+D801 U+DC00 "{ F:\"\xED\xA0\x81\xED\xB0\x80\"}", "illegal UTF-8 sequence"); + + // Check independence of identifier from locale. + std::string locale_ident; + locale_ident += "table T { F"; + locale_ident += static_cast(-32); // unsigned 0xE0 + locale_ident += " :string; }"; + locale_ident += "root_type T;"; + locale_ident += "{}"; + TestError(locale_ident.c_str(), ""); } void UnknownFieldsTest() { @@ -2100,13 +2389,14 @@ int FlatBufferTests() { ParseProtoBufAsciiTest(); TypeAliasesTest(); EndianSwapTest(); - JsonDefaultTest(); - FlexBuffersTest(); UninitializedVectorTest(); EqualOperatorTest(); - + NumericUtilsTest(); + IsAsciiUtilsTest(); + ValidFloatTest(); + InvalidFloatTest(); return 0; }