Refactoring of idl_parser (#4948)

* Refactoring of numbers parser

More accurate parse of float and double.
Hexadecimal floats.
Check "out-of-range" of uint64 fields.
Check correctness of default values and metadata.

* Remove locale-independent code strtod/strtof from PR #4948.

* small optimization

* Add is_(ascii) functions

* is_ascii cleanup

* Fix format conversation

* Refine number parser

* Make code compatible with Android build

* Remove unnecessary suppression of warning C4127
This commit is contained in:
Vladimir Glavnyy 2018-10-12 00:37:47 +07:00 committed by Wouter van Oortmerssen
parent 53ce80ce91
commit 4ed6fafdfa
11 changed files with 880 additions and 206 deletions

2
.gitignore vendored
View File

@ -109,3 +109,5 @@ dart/.dart_tool/
dart/build/
dart/doc/api/
Cargo.lock
.corpus**
.seed**

View File

@ -49,11 +49,26 @@ file_extension_decl = `file_extension` string\_constant `;`
file_identifier_decl = `file_identifier` string\_constant `;`
integer\_constant = `-?[0-9]+` | `true` | `false`
float\_constant = `-?[0-9]+.[0-9]+((e|E)(+|-)?[0-9]+)?`
string\_constant = `\".*?\"`
ident = `[a-zA-Z_][a-zA-Z0-9_]*`
`[:digit:]` = `[0-9]`
`[:xdigit:]` = `[0-9a-fA-F]`
dec\_integer\_constant = `[-+]?[:digit:]+`
hex\_integer\_constant = `[-+]?0[xX][:xdigit:]+`
integer\_constant = dec\_integer\_constant | hex\_integer\_constant
dec\_float\_constant = `[-+]?(([.][:digit:]+)|([:digit:]+[.][:digit:]*)|([:digit:]+))([eE][-+]?[:digit:]+)?`
hex\_float\_constant = `[-+]?0[xX](([.][:xdigit:]+)|([:xdigit:]+[.][:xdigit:]*)|([:xdigit:]+))([pP][-+]?[:digit:]+)`
special\_float\_constant = `[-+]?(nan|inf|infinity)`
float\_constant = decimal\_float\_constant | hexadecimal\_float\_constant | special\_float\_constant
boolean\_constant = `(true|false)` | (integer\_constant ? `true` : `false`)

View File

@ -385,6 +385,31 @@ When parsing JSON, it recognizes the following escape codes in strings:
It also generates these escape codes back again when generating JSON from a
binary representation.
When parsing numbers, the parser is more flexible than JSON.
A format of numeric literals is more close to the C/C++.
According to the [grammar](@ref flatbuffers_grammar), it accepts the following
numerical literals:
- An integer literal can have any number of leading zero `0` digits.
Unlike C/C++, the parser ignores a leading zero, not interpreting it as the
beginning of the octal number.
The numbers `[081, -00094]` are equal to `[81, -94]` decimal integers.
- The parser accepts unsigned and signed hexadecimal integer numbers.
For example: `[0x123, +0x45, -0x67]` are equal to `[291, 69, -103]` decimals.
- The format of float-point numbers is fully compatible with C/C++ format.
If a modern C++ compiler is used the parser accepts hexadecimal and special
float-point literals as well:
`[-1.0, 2., .3e0, 3.e4, 0x21.34p-5, -inf, nan]`.
The exponent suffix of hexadecimal float-point number is mandatory.
Extended float-point support was tested with:
- x64 Windows: `MSVC2015` and higher.
- x64 Linux: `LLVM 6.0`, `GCC 4.9` and higher.
- For compatibility with a JSON lint tool all numeric literals of scalar
fields can be wrapped to quoted string:
`"1", "2.0", "0x48A", "0x0C.0Ep-1", "-inf", "true"`.
## Guidelines
### Efficiency

View File

@ -180,6 +180,17 @@
#endif // __has_include
#endif // !FLATBUFFERS_HAS_STRING_VIEW
#ifndef FLATBUFFERS_HAS_NEW_STRTOD
// Modern (C++11) strtod and strtof functions are available for use.
// 1) nan/inf strings as argument of strtod;
// 2) hex-float as argument of strtod/strtof.
#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
(defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \
(defined(__clang__))
#define FLATBUFFERS_HAS_NEW_STRTOD 1
#endif
#endif // !FLATBUFFERS_HAS_NEW_STRTOD
/// @endcond
/// @file

View File

@ -484,7 +484,11 @@ struct IDLOptions {
// This encapsulates where the parser is in the current source file.
struct ParserState {
ParserState()
: cursor_(nullptr), line_start_(nullptr), line_(0), token_(-1) {}
: cursor_(nullptr),
line_start_(nullptr),
line_(0),
token_(-1),
attr_is_trivial_ascii_string_(true) {}
protected:
void ResetState(const char *source) {
@ -508,6 +512,10 @@ struct ParserState {
int line_; // the current line being parsed
int token_;
// Flag: text in attribute_ is true ASCII string without escape
// sequences. Only printable ASCII (without [\t\r\n]).
// Used for number-in-string (and base64 string in future).
bool attr_is_trivial_ascii_string_;
std::string attribute_;
std::vector<std::string> doc_comment_;
};
@ -644,7 +652,8 @@ class Parser : public ParserState {
bool ParseFlexBuffer(const char *source, const char *source_filename,
flexbuffers::Builder *builder);
FLATBUFFERS_CHECKED_ERROR CheckInRange(int64_t val, int64_t min, int64_t max);
FLATBUFFERS_CHECKED_ERROR InvalidNumber(const char *number,
const std::string &msg);
StructDef *LookupStruct(const std::string &id) const;
@ -711,7 +720,7 @@ class Parser : public ParserState {
BaseType req, bool *destmatch);
FLATBUFFERS_CHECKED_ERROR ParseHash(Value &e, FieldDef* field);
FLATBUFFERS_CHECKED_ERROR TokenError();
FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e);
FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e, bool check_now);
FLATBUFFERS_CHECKED_ERROR ParseEnumFromString(Type &type, int64_t *result);
StructDef *LookupCreateStruct(const std::string &name,
bool create_if_new = true,

View File

@ -37,9 +37,9 @@
// Not possible if Microsoft Compiler before 2012
// Possible is the language feature __cpp_alias_templates is defined well
// Or possible if the C++ std is C+11 or newer
#if !(defined(_MSC_VER) && _MSC_VER <= 1700 /* MSVC2012 */) \
&& ((defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
|| (defined(__cplusplus) && __cplusplus >= 201103L))
#if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \
|| (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \
|| (defined(__cplusplus) && __cplusplus >= 201103L)
#define FLATBUFFERS_TEMPLATES_ALIASES
#endif
@ -88,12 +88,33 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES)
#else
template <typename T> class numeric_limits :
public std::numeric_limits<T> {};
public std::numeric_limits<T> {
public:
// Android NDK fix.
static T lowest() {
return std::numeric_limits<T>::min();
}
};
template <> class numeric_limits<float> :
public std::numeric_limits<float> {
public:
static float lowest() { return -FLT_MAX; }
};
template <> class numeric_limits<double> :
public std::numeric_limits<double> {
public:
static double lowest() { return -DBL_MAX; }
};
template <> class numeric_limits<unsigned long long> {
public:
static unsigned long long min() { return 0ULL; }
static unsigned long long max() { return ~0ULL; }
static unsigned long long lowest() {
return numeric_limits<unsigned long long>::min();
}
};
template <> class numeric_limits<long long> {
@ -105,6 +126,9 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
return static_cast<long long>(
(1ULL << ((sizeof(long long) << 3) - 1)) - 1);
}
static long long lowest() {
return numeric_limits<long long>::min();
}
};
#endif // FLATBUFFERS_CPP98_STL
@ -114,6 +138,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
template <typename T, typename U> using is_same = std::is_same<T,U>;
template <typename T> using is_floating_point = std::is_floating_point<T>;
template <typename T> using is_unsigned = std::is_unsigned<T>;
template <typename T> using make_unsigned = std::make_unsigned<T>;
#else
// Map C++ TR1 templates defined by stlport.
template <typename T> using is_scalar = std::tr1::is_scalar<T>;
@ -121,6 +146,13 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
template <typename T> using is_floating_point =
std::tr1::is_floating_point<T>;
template <typename T> using is_unsigned = std::tr1::is_unsigned<T>;
// Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned.
template<typename T> struct make_unsigned {
static_assert(is_unsigned<T>::value, "Specialization not impelented!");
using type = T;
};
template<> struct make_unsigned<char> { using type = unsigned char; };
template<> struct make_unsigned<int> { using type = unsigned int; };
#endif // !FLATBUFFERS_CPP98_STL
#else
// MSVC 2010 doesn't support C++11 aliases.
@ -129,6 +161,7 @@ inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
template <typename T> struct is_floating_point :
public std::is_floating_point<T> {};
template <typename T> struct is_unsigned : public std::is_unsigned<T> {};
template <typename T> struct make_unsigned : public std::make_unsigned<T> {};
#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES)
#ifndef FLATBUFFERS_CPP98_STL

View File

@ -17,7 +17,7 @@
#ifndef FLATBUFFERS_UTIL_H_
#define FLATBUFFERS_UTIL_H_
#include <assert.h>
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <fstream>
@ -50,6 +50,52 @@
namespace flatbuffers {
// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`.
template<typename T> FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(const T &t) {
return !!t;
}
// @locale-independent functions for ASCII characters set.
// Check that integer scalar is in closed range: (a <= x <= b)
// using one compare (conditional branch) operator.
template<typename T> inline bool check_in_range(T x, T a, T b) {
// (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`.
FLATBUFFERS_ASSERT(a <= b); // static_assert only if 'a' & 'b' templated
typedef typename flatbuffers::make_unsigned<T>::type U;
return (static_cast<U>(x - a) <= static_cast<U>(b - a));
}
// Case-insensitive isalpha
static inline bool is_alpha(char c) {
// ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
return check_in_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
}
// Check (case-insensitive) that `c` is equal to alpha.
static inline bool is_alpha_char(char c, char alpha) {
FLATBUFFERS_ASSERT(is_alpha(alpha));
// ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF).
return ((c & 0xDF) == (alpha & 0xDF));
}
// https://en.cppreference.com/w/cpp/string/byte/isxdigit
// isdigit and isxdigit are the only standard narrow character classification
// functions that are not affected by the currently installed C locale. although
// some implementations (e.g. Microsoft in 1252 codepage) may classify
// additional single-byte characters as digits.
static inline bool is_digit(char c) { return check_in_range(c, '0', '9'); }
static inline bool is_xdigit(char c) {
// Replace by look-up table.
return is_digit(c) | check_in_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF);
}
// Case-insensitive isalnum
static inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); }
// @end-locale-independent functions for ASCII character set
#ifdef FLATBUFFERS_PREFER_PRINTF
template<typename T> size_t IntToDigitCount(T t) {
size_t digit_count = 0;
@ -158,6 +204,7 @@ template<> inline std::string NumToString<float>(float t) {
// The returned string length is always xdigits long, prefixed by 0 digits.
// For example, IntToStringHex(0x23, 8) returns the string "00000023".
inline std::string IntToStringHex(int i, int xdigits) {
FLATBUFFERS_ASSERT(i >= 0);
// clang-format off
#ifndef FLATBUFFERS_PREFER_PRINTF
std::stringstream ss;
@ -170,28 +217,178 @@ inline std::string IntToStringHex(int i, int xdigits) {
// clang-format on
}
// Portable implementation of strtoll().
inline int64_t StringToInt(const char *str, char **endptr = nullptr,
int base = 10) {
static inline double strtod_impl(const char *str, char **str_end) {
// Result of strtod (printf, etc) depends from current C-locale.
return strtod(str, str_end);
}
static inline float strtof_impl(const char *str, char **str_end) {
// Use "strtof" for float and strtod for double to avoid double=>float
// rounding problems (see
// https://en.cppreference.com/w/cpp/numeric/fenv/feround) or problems with
// std::numeric_limits<float>::is_iec559==false. Example:
// for (int mode : { FE_DOWNWARD, FE_TONEAREST, FE_TOWARDZERO, FE_UPWARD }){
// const char *s = "-4e38";
// std::fesetround(mode);
// std::cout << strtof(s, nullptr) << "; " << strtod(s, nullptr) << "; "
// << static_cast<float>(strtod(s, nullptr)) << "\n";
// }
// Gives:
// -inf; -4e+38; -inf
// -inf; -4e+38; -inf
// -inf; -4e+38; -3.40282e+38
// -inf; -4e+38; -3.40282e+38
// clang-format off
#ifdef _MSC_VER
return _strtoi64(str, endptr, base);
#ifdef FLATBUFFERS_HAS_NEW_STRTOD
return strtof(str, str_end);
#else
return strtoll(str, endptr, base);
#endif
return static_cast<float>(strtod_impl(str, str_end));
#endif // !FLATBUFFERS_HAS_NEW_STRTOD
// clang-format on
}
// Portable implementation of strtoull().
inline uint64_t StringToUInt(const char *str, char **endptr = nullptr,
// Adaptor for strtoull()/strtoll().
// Flatbuffers accepts numbers with any count of leading zeros (-009 is -9),
// while strtoll with base=0 interprets first leading zero as octal prefix.
// In future, it is possible to add prefixed 0b0101.
// 1) Checks errno code for overflow condition (out of range).
// 2) If base <= 0, function try to detect base of number by prefix.
//
// Return value (like strtoull and strtoll, but reject partial result):
// - If successful, an integer value corresponding to the str is returned.
// - If full string conversion can't be performed, 0 is returned.
// - If the converted value falls out of range of corresponding return type, a
// range error occurs. In this case value MAX(T)/MIN(T) is returned.
template<typename T>
inline T StringToInteger64Impl(const char *const str, const char **endptr,
const int base, const bool check_errno = true) {
static_assert(flatbuffers::is_same<T, int64_t>::value ||
flatbuffers::is_same<T, uint64_t>::value,
"Type T must be either int64_t or uint64_t");
FLATBUFFERS_ASSERT(str && endptr); // endptr must be not null
if (base <= 0) {
auto s = str;
while (*s && !is_digit(*s)) s++;
if (s[0] == '0' && is_alpha_char(s[1], 'X'))
return StringToInteger64Impl<T>(str, endptr, 16, check_errno);
// if a prefix not match, try base=10
return StringToInteger64Impl<T>(str, endptr, 10, check_errno);
} else {
if (check_errno) errno = 0; // clear thread-local errno
// calculate result
T result;
if (IsConstTrue(flatbuffers::is_same<T, int64_t>::value)) {
// clang-format off
#ifdef _MSC_VER
result = _strtoi64(str, const_cast<char**>(endptr), base);
#else
result = strtoll(str, const_cast<char**>(endptr), base);
#endif
// clang-format on
} else { // T is uint64_t
// clang-format off
#ifdef _MSC_VER
result = _strtoui64(str, const_cast<char**>(endptr), base);
#else
result = strtoull(str, const_cast<char**>(endptr), base);
#endif
// clang-format on
// The strtoull accepts negative numbers:
// If the minus sign was part of the input sequence, the numeric value
// calculated from the sequence of digits is negated as if by unary minus
// in the result type, which applies unsigned integer wraparound rules.
// Fix this behaviour (except -0).
if ((**endptr == '\0') && (0 != result)) {
auto s = str;
while (*s && !is_digit(*s)) s++;
s = (s > str) ? (s - 1) : s; // step back to one symbol
if (*s == '-') {
// For unsigned types return max to distinguish from
// "no conversion can be performed".
result = flatbuffers::numeric_limits<T>::max();
// point to the start of string, like errno
*endptr = str;
}
}
}
// check for overflow
if (check_errno && errno) *endptr = str; // point it to start of input
// erase partial result, but save an overflow
if ((*endptr != str) && (**endptr != '\0')) result = 0;
return result;
}
}
// Convert a string to an instance of T.
// Return value (matched with StringToInteger64Impl and strtod):
// - If successful, a numeric value corresponding to the str is returned.
// - If full string conversion can't be performed, 0 is returned.
// - If the converted value falls out of range of corresponding return type, a
// range error occurs. In this case value MAX(T)/MIN(T) is returned.
template<typename T> inline bool StringToNumber(const char *s, T *val) {
FLATBUFFERS_ASSERT(s && val);
const char *end = nullptr;
// The errno check isn't needed. strtoll will return MAX/MIN on overlow.
const int64_t i = StringToInteger64Impl<int64_t>(s, &end, -1, false);
*val = static_cast<T>(i);
const auto done = (s != end) && (*end == '\0');
if (done) {
const int64_t max = flatbuffers::numeric_limits<T>::max();
const int64_t min = flatbuffers::numeric_limits<T>::lowest();
if (i > max) {
*val = static_cast<T>(max);
return false;
}
if (i < min) {
// For unsigned types return max to distinguish from
// "no conversion can be performed" when 0 is returned.
*val = static_cast<T>(flatbuffers::is_unsigned<T>::value ? max : min);
return false;
}
}
return done;
}
template<> inline bool StringToNumber<int64_t>(const char *s, int64_t *val) {
const char *end = s; // request errno checking
*val = StringToInteger64Impl<int64_t>(s, &end, -1);
return (s != end) && (*end == '\0');
}
template<> inline bool StringToNumber<uint64_t>(const char *s, uint64_t *val) {
const char *end = s; // request errno checking
*val = StringToInteger64Impl<uint64_t>(s, &end, -1);
return (s != end) && (*end == '\0');
}
template<> inline bool StringToNumber<double>(const char *s, double *val) {
FLATBUFFERS_ASSERT(s && val);
char *end = nullptr;
*val = strtod_impl(s, &end);
auto done = (s != end) && (*end == '\0');
if (!done) *val = 0; // erase partial result
return done;
}
template<> inline bool StringToNumber<float>(const char *s, float *val) {
FLATBUFFERS_ASSERT(s && val);
char *end = nullptr;
*val = strtof_impl(s, &end);
auto done = (s != end) && (*end == '\0');
if (!done) *val = 0; // erase partial result
return done;
}
inline int64_t StringToInt(const char *str, const char **endptr = nullptr,
int base = 10) {
const char *ep = nullptr;
return StringToInteger64Impl<int64_t>(str, endptr ? endptr : &ep, base);
}
inline uint64_t StringToUInt(const char *str, const char **endptr = nullptr,
int base = 10) {
// clang-format off
#ifdef _MSC_VER
return _strtoui64(str, endptr, base);
#else
return strtoull(str, endptr, base);
#endif
// clang-format on
const char *ep = nullptr;
return StringToInteger64Impl<uint64_t>(str, endptr ? endptr : &ep, base);
}
typedef bool (*LoadFileFunction)(const char *filename, bool binary,

View File

@ -145,7 +145,7 @@ class CppGenerator : public BaseGenerator {
std::string guard = file_name_;
// Remove any non-alpha-numeric characters that may appear in a filename.
struct IsAlnum {
bool operator()(char c) const { return !isalnum(c); }
bool operator()(char c) const { return !is_alnum(c); }
};
guard.erase(std::remove_if(guard.begin(), guard.end(), IsAlnum()),
guard.end());

View File

@ -149,19 +149,23 @@ bool Print<const void *>(const void *val, Type type, int indent,
return true;
}
template<typename T> static T GetFieldDefault(const FieldDef &fd) {
T val;
auto check = StringToNumber(fd.value.constant.c_str(), &val);
(void)check;
FLATBUFFERS_ASSERT(check);
return val;
}
// Generate text for a scalar field.
template<typename T> static bool GenField(const FieldDef &fd,
const Table *table, bool fixed,
const IDLOptions &opts,
int indent,
std::string *_text) {
return Print(fixed ?
reinterpret_cast<const Struct *>(table)->GetField<T>(fd.value.offset) :
table->GetField<T>(fd.value.offset,
IsFloat(fd.value.type.base_type) ?
static_cast<T>(strtod(fd.value.constant.c_str(), nullptr)) :
static_cast<T>(StringToInt(fd.value.constant.c_str()))),
fd.value.type, indent, nullptr, opts, _text);
template<typename T>
static bool GenField(const FieldDef &fd, const Table *table, bool fixed,
const IDLOptions &opts, int indent, std::string *_text) {
return Print(
fixed ? reinterpret_cast<const Struct *>(table)->GetField<T>(
fd.value.offset)
: table->GetField<T>(fd.value.offset, GetFieldDefault<T>(fd)),
fd.value.type, indent, nullptr, opts, _text);
}
static bool GenStruct(const StructDef &struct_def, const Table *table,

View File

@ -1,4 +1,4 @@
/*
/*
* Copyright 2014 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -119,57 +119,22 @@ CheckedError Parser::RecurseError() {
NumToString(FLATBUFFERS_MAX_PARSING_DEPTH) + " reached");
}
inline std::string OutOfRangeErrorMsg(int64_t val, const std::string &op,
int64_t limit) {
const std::string cause = NumToString(val) + op + NumToString(limit);
return "constant does not fit (" + cause + ")";
CheckedError Parser::InvalidNumber(const char *number, const std::string &msg) {
return Error("invalid number: \"" + std::string(number) + "\"" + msg);
}
// Ensure that integer values we parse fit inside the declared integer type.
CheckedError Parser::CheckInRange(int64_t val, int64_t min, int64_t max) {
if (val < min)
return Error(OutOfRangeErrorMsg(val, " < ", min));
else if (val > max)
return Error(OutOfRangeErrorMsg(val, " > ", max));
else
return NoError();
}
// atot: templated version of atoi/atof: convert a string to an instance of T.
template<typename T>
inline CheckedError atot(const char *s, Parser &parser, T *val) {
int64_t i = StringToInt(s);
const int64_t min = flatbuffers::numeric_limits<T>::min();
const int64_t max = flatbuffers::numeric_limits<T>::max();
*val = (T)i; // Assign this first to make ASAN happy.
return parser.CheckInRange(i, min, max);
}
template<>
inline CheckedError atot<uint64_t>(const char *s, Parser &parser,
uint64_t *val) {
(void)parser;
*val = StringToUInt(s);
return NoError();
}
template<>
inline CheckedError atot<bool>(const char *s, Parser &parser, bool *val) {
(void)parser;
*val = 0 != atoi(s);
return NoError();
}
template<>
inline CheckedError atot<float>(const char *s, Parser &parser, float *val) {
(void)parser;
*val = static_cast<float>(strtod(s, nullptr));
return NoError();
}
template<>
inline CheckedError atot<double>(const char *s, Parser &parser, double *val) {
(void)parser;
*val = strtod(s, nullptr);
return NoError();
}
auto done = StringToNumber(s, val);
if (done) return NoError();
return parser.InvalidNumber(
s, (0 == *val)
? ""
: (", constant does not fit [" +
NumToString(flatbuffers::numeric_limits<T>::lowest()) + "; " +
NumToString(flatbuffers::numeric_limits<T>::max()) + "]"));
}
template<>
inline CheckedError atot<Offset<void>>(const char *s, Parser &parser,
Offset<void> *val) {
@ -239,8 +204,9 @@ std::string Parser::TokenToStringId(int t) const {
// Parses exactly nibbles worth of hex digits into a number, or error.
CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) {
FLATBUFFERS_ASSERT(nibbles > 0);
for (int i = 0; i < nibbles; i++)
if (!isxdigit(static_cast<unsigned char>(cursor_[i])))
if (!is_xdigit(cursor_[i]))
return Error("escape code must be followed by " + NumToString(nibbles) +
" hex digits");
std::string target(cursor_, cursor_ + nibbles);
@ -261,14 +227,15 @@ CheckedError Parser::SkipByteOrderMark() {
return NoError();
}
bool IsIdentifierStart(char c) {
return isalpha(static_cast<unsigned char>(c)) || c == '_';
static inline bool IsIdentifierStart(char c) {
return is_alpha(c) || (c == '_');
}
CheckedError Parser::Next() {
doc_comment_.clear();
bool seen_newline = cursor_ == source_;
attribute_.clear();
attr_is_trivial_ascii_string_ = true;
for (;;) {
char c = *cursor_++;
token_ = c;
@ -294,10 +261,6 @@ CheckedError Parser::Next() {
case ':':
case ';':
case '=': return NoError();
case '.':
if (!isdigit(static_cast<unsigned char>(*cursor_)))
return NoError();
return Error("floating point constant can\'t start with \".\"");
case '\"':
case '\'': {
int unicode_high_surrogate = -1;
@ -306,6 +269,7 @@ CheckedError Parser::Next() {
if (*cursor_ < ' ' && static_cast<signed char>(*cursor_) >= 0)
return Error("illegal character in string constant");
if (*cursor_ == '\\') {
attr_is_trivial_ascii_string_ = false; // has escape sequence
cursor_++;
if (unicode_high_surrogate != -1 && *cursor_ != 'u') {
return Error(
@ -393,6 +357,9 @@ CheckedError Parser::Next() {
return Error(
"illegal Unicode sequence (unpaired high surrogate)");
}
// reset if non-printable
attr_is_trivial_ascii_string_ &= check_in_range(*cursor_, ' ', '~');
attribute_ += *cursor_++;
}
}
@ -400,7 +367,8 @@ CheckedError Parser::Next() {
return Error("illegal Unicode sequence (unpaired high surrogate)");
}
cursor_++;
if (!opts.allow_non_utf8 && !ValidateUTF8(attribute_)) {
if (!attr_is_trivial_ascii_string_ && !opts.allow_non_utf8 &&
!ValidateUTF8(attribute_)) {
return Error("illegal UTF-8 sequence");
}
token_ = kTokenStringConstant;
@ -430,55 +398,69 @@ CheckedError Parser::Next() {
}
// fall thru
default:
if (IsIdentifierStart(c)) {
const auto has_sign = (c == '+') || (c == '-');
// '-'/'+' and following identifier - can be a predefined constant like:
// NAN, INF, PI, etc.
if (IsIdentifierStart(c) || (has_sign && IsIdentifierStart(*cursor_))) {
// Collect all chars of an identifier:
const char *start = cursor_ - 1;
while (isalnum(static_cast<unsigned char>(*cursor_)) || *cursor_ == '_')
cursor_++;
while (IsIdentifierStart(*cursor_) || is_digit(*cursor_)) cursor_++;
attribute_.append(start, cursor_);
token_ = kTokenIdentifier;
token_ = has_sign ? kTokenStringConstant : kTokenIdentifier;
return NoError();
} else if (isdigit(static_cast<unsigned char>(c)) || c == '-') {
const char *start = cursor_ - 1;
if (c == '-' && *cursor_ == '0' &&
(cursor_[1] == 'x' || cursor_[1] == 'X')) {
++start;
++cursor_;
attribute_.append(&c, &c + 1);
c = '0';
}
auto dot_lvl = (c == '.') ? 0 : 1; // dot_lvl==0 <=> exactly one '.' seen
if (!dot_lvl && !is_digit(*cursor_)) return NoError(); // enum?
// Parser accepts hexadecimal-floating-literal (see C++ 5.13.4).
if (is_digit(c) || has_sign || !dot_lvl) {
const auto start = cursor_ - 1;
auto start_digits = !is_digit(c) ? cursor_ : cursor_ - 1;
if (!is_digit(c) && is_digit(*cursor_)){
start_digits = cursor_; // see digit in cursor_ position
c = *cursor_++;
}
if (c == '0' && (*cursor_ == 'x' || *cursor_ == 'X')) {
cursor_++;
while (isxdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
attribute_.append(start + 2, cursor_);
attribute_ = NumToString(static_cast<int64_t>(
StringToUInt(attribute_.c_str(), nullptr, 16)));
token_ = kTokenIntegerConstant;
return NoError();
}
while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
if (*cursor_ == '.' || *cursor_ == 'e' || *cursor_ == 'E') {
if (*cursor_ == '.') {
cursor_++;
while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
// hex-float can't begind with '.'
auto use_hex = dot_lvl && (c == '0') && is_alpha_char(*cursor_, 'X');
if (use_hex) start_digits = ++cursor_; // '0x' is the prefix, skip it
// Read an integer number or mantisa of float-point number.
do {
if (use_hex) {
while (is_xdigit(*cursor_)) cursor_++;
} else {
while (is_digit(*cursor_)) cursor_++;
}
// See if this float has a scientific notation suffix. Both JSON
// and C++ (through strtod() we use) have the same format:
if (*cursor_ == 'e' || *cursor_ == 'E') {
} while ((*cursor_ == '.') && (++cursor_) && (--dot_lvl >= 0));
// Exponent of float-point number.
if ((dot_lvl >= 0) && (cursor_ > start_digits)) {
// The exponent suffix of hexadecimal float number is mandatory.
if (use_hex && !dot_lvl) start_digits = cursor_;
if ((use_hex && is_alpha_char(*cursor_, 'P')) ||
is_alpha_char(*cursor_, 'E')) {
dot_lvl = 0; // Emulate dot to signal about float-point number.
cursor_++;
if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
start_digits = cursor_; // the exponent-part has to have digits
// Exponent is decimal integer number
while (is_digit(*cursor_)) cursor_++;
if (*cursor_ == '.') {
cursor_++; // If see a dot treat it as part of invalid number.
dot_lvl = -1; // Fall thru to Error().
}
}
token_ = kTokenFloatConstant;
} else {
token_ = kTokenIntegerConstant;
}
attribute_.append(start, cursor_);
return NoError();
// Finalize.
if ((dot_lvl >= 0) && (cursor_ > start_digits)) {
attribute_.append(start, cursor_);
token_ = dot_lvl ? kTokenIntegerConstant : kTokenFloatConstant;
return NoError();
} else {
return Error("invalid number: " + std::string(start, cursor_));
}
}
std::string ch;
ch = c;
if (c < ' ' || c > '~') ch = "code: " + NumToString(c);
if (false == check_in_range(c, ' ', '~')) ch = "code: " + NumToString(c);
return Error("illegal character: " + ch);
}
}
@ -674,7 +656,7 @@ CheckedError Parser::ParseField(StructDef &struct_def) {
(struct_def.fixed && field->value.constant != "0"))
return Error(
"default values currently only supported for scalars in tables");
ECHECK(ParseSingleValue(&field->name, field->value));
ECHECK(ParseSingleValue(&field->name, field->value, true));
}
if (type.enum_def &&
!type.enum_def->is_union &&
@ -684,9 +666,20 @@ CheckedError Parser::ParseField(StructDef &struct_def) {
return Error("default value of " + field->value.constant + " for field " +
name + " is not part of enum " + type.enum_def->name);
}
// Append .0 if the value has not it (skip hex and scientific floats).
// This suffix needed for generated C++ code.
if (IsFloat(type.base_type)) {
if (!strpbrk(field->value.constant.c_str(), ".eE"))
auto &text = field->value.constant;
FLATBUFFERS_ASSERT(false == text.empty());
auto s = text.c_str();
while(*s == ' ') s++;
if (*s == '-' || *s == '+') s++;
// 1) A float constants (nan, inf, pi, etc) is a kind of identifier.
// 2) A float number needn't ".0" at the end if it has exponent.
if ((false == IsIdentifierStart(*s)) &&
(std::string::npos == field->value.constant.find_first_of(".eEpP"))) {
field->value.constant += ".0";
}
}
if (type.enum_def && IsScalar(type.base_type) && !struct_def.fixed &&
@ -915,11 +908,13 @@ CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
(token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
ECHECK(ParseHash(val, field));
} else {
ECHECK(ParseSingleValue(field ? &field->name : nullptr, val));
ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false));
}
break;
}
default: ECHECK(ParseSingleValue(field ? &field->name : nullptr, val)); break;
default:
ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false));
break;
}
return NoError();
}
@ -994,7 +989,8 @@ CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
ECHECK(parser->SkipAnyJsonValue());
}
} else {
if (parser->IsIdent("null")) {
if (parser->IsIdent("null") &&
!IsScalar(field->value.type.base_type)) {
ECHECK(parser->Next()); // Ignore this field.
} else {
Value val = field->value;
@ -1252,7 +1248,7 @@ CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
attributes->Add(name, e);
if (Is(':')) {
NEXT();
ECHECK(ParseSingleValue(&name, *e));
ECHECK(ParseSingleValue(&name, *e, true));
}
if (Is(')')) {
NEXT();
@ -1264,23 +1260,40 @@ CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
return NoError();
}
CheckedError Parser::TryTypedValue(const std::string *name, int dtoken, bool check, Value &e,
BaseType req, bool *destmatch) {
CheckedError Parser::TryTypedValue(const std::string *name, int dtoken,
bool check, Value &e, BaseType req,
bool *destmatch) {
bool match = dtoken == token_;
if (match) {
FLATBUFFERS_ASSERT(*destmatch == false);
*destmatch = true;
e.constant = attribute_;
// Check token match
if (!check) {
if (e.type.base_type == BASE_TYPE_NONE) {
e.type.base_type = req;
} else {
return Error(std::string("type mismatch: expecting: ") +
kTypeNames[e.type.base_type] +
", found: " + kTypeNames[req] +
", name: " + (name ? *name : "") +
", value: " + e.constant);
return Error(
std::string("type mismatch: expecting: ") +
kTypeNames[e.type.base_type] + ", found: " + kTypeNames[req] +
", name: " + (name ? *name : "") + ", value: " + e.constant);
}
}
// The exponent suffix of hexadecimal float-point number is mandatory.
// A hex-integer constant is forbidden as an initializer of float number.
if ((kTokenFloatConstant != dtoken) && IsFloat(e.type.base_type)) {
const auto &s = e.constant;
const auto k = s.find_first_of("0123456789.");
if ((std::string::npos != k) && (s.length() > (k + 1)) &&
(s.at(k) == '0' && is_alpha_char(s.at(k + 1), 'X')) &&
(std::string::npos == s.find_first_of("pP", k + 2))) {
return Error(
"invalid number, the exponent suffix of hexadecimal "
"floating-point literals is mandatory: \"" +
s + "\"");
}
}
NEXT();
}
return NoError();
@ -1375,20 +1388,29 @@ CheckedError Parser::TokenError() {
return Error("cannot parse value starting with: " + TokenToStringId(token_));
}
CheckedError Parser::ParseSingleValue(const std::string *name, Value &e) {
CheckedError Parser::ParseSingleValue(const std::string *name, Value &e,
bool check_now) {
// First see if this could be a conversion function:
if (token_ == kTokenIdentifier && *cursor_ == '(') {
auto functionname = attribute_;
// todo: Extract processing of conversion functions to ParseFunction.
const auto functionname = attribute_;
if (!IsFloat(e.type.base_type)) {
return Error(functionname + ": type of argument mismatch, expecting: " +
kTypeNames[BASE_TYPE_DOUBLE] +
", found: " + kTypeNames[e.type.base_type] +
", name: " + (name ? *name : "") + ", value: " + e.constant);
}
NEXT();
EXPECT('(');
ECHECK(ParseSingleValue(name, e));
ECHECK(Recurse([&]() { return ParseSingleValue(name, e, false); }));
EXPECT(')');
// calculate with double precision
double x, y = 0.0;
ECHECK(atot(e.constant.c_str(), *this, &x));
auto func_match = false;
// clang-format off
#define FLATBUFFERS_FN_DOUBLE(name, op) \
if (functionname == name) { \
auto x = strtod(e.constant.c_str(), nullptr); \
e.constant = NumToString(op); \
}
if (!func_match && functionname == name) { y = op; func_match = true; }
FLATBUFFERS_FN_DOUBLE("deg", x / kPi * 180);
FLATBUFFERS_FN_DOUBLE("rad", x * kPi / 180);
FLATBUFFERS_FN_DOUBLE("sin", sin(x));
@ -1400,47 +1422,108 @@ CheckedError Parser::ParseSingleValue(const std::string *name, Value &e) {
// TODO(wvo): add more useful conversion functions here.
#undef FLATBUFFERS_FN_DOUBLE
// clang-format on
// Then check if this could be a string/identifier enum value:
} else if (e.type.base_type != BASE_TYPE_STRING &&
e.type.base_type != BASE_TYPE_BOOL &&
e.type.base_type != BASE_TYPE_NONE &&
(token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
if (IsIdentifierStart(attribute_[0])) { // Enum value.
if (true != func_match) {
return Error(std::string("Unknown conversion function: ") + functionname +
", field name: " + (name ? *name : "") +
", value: " + e.constant);
}
e.constant = NumToString(y);
return NoError();
}
auto match = false;
// clang-format off
#define TRY_ECHECK(force, dtoken, check, req) \
if (!match && ((check) || IsConstTrue(force))) \
ECHECK(TryTypedValue(name, dtoken, check, e, req, &match))
// clang-format on
if (token_ == kTokenStringConstant || token_ == kTokenIdentifier) {
const auto kTokenStringOrIdent = token_;
// The string type is a most probable type, check it first.
TRY_ECHECK(false, kTokenStringConstant,
e.type.base_type == BASE_TYPE_STRING, BASE_TYPE_STRING);
// avoid escaped and non-ascii in the string
if ((token_ == kTokenStringConstant) && IsScalar(e.type.base_type) &&
!attr_is_trivial_ascii_string_) {
return Error(
std::string("type mismatch or invalid value, an initializer of "
"non-string field must be trivial ASCII string: type: ") +
kTypeNames[e.type.base_type] + ", name: " + (name ? *name : "") +
", value: " + attribute_);
}
// A boolean as true/false. Boolean as Integer check below.
if (!match && IsBool(e.type.base_type)) {
auto is_true = attribute_ == "true";
if (is_true || attribute_ == "false") {
attribute_ = is_true ? "1" : "0";
// accepts both kTokenStringConstant and kTokenIdentifier
TRY_ECHECK(false, kTokenStringOrIdent, IsBool(e.type.base_type),
BASE_TYPE_BOOL);
}
}
// Check if this could be a string/identifier enum value.
// Enum can have only true integer base type.
if (!match && IsInteger(e.type.base_type) && !IsBool(e.type.base_type) &&
IsIdentifierStart(*attribute_.c_str())) {
int64_t val;
ECHECK(ParseEnumFromString(e.type, &val));
e.constant = NumToString(val);
NEXT();
} else { // Numeric constant in string.
if (IsInteger(e.type.base_type)) {
char *end;
e.constant = NumToString(StringToInt(attribute_.c_str(), &end));
if (*end) return Error("invalid integer: " + attribute_);
} else if (IsFloat(e.type.base_type)) {
char *end;
e.constant = NumToString(strtod(attribute_.c_str(), &end));
if (*end) return Error("invalid float: " + attribute_);
} else {
FLATBUFFERS_ASSERT(0); // Shouldn't happen, we covered all types.
e.constant = "0";
}
NEXT();
match = true;
}
// float/integer number in string
if ((token_ == kTokenStringConstant) && IsScalar(e.type.base_type)) {
// remove trailing whitespaces from attribute_
auto last = attribute_.find_last_not_of(' ');
if (std::string::npos != last) // has non-whitespace
attribute_.resize(last + 1);
}
// Float numbers or nan, inf, pi, etc.
TRY_ECHECK(false, kTokenStringOrIdent, IsFloat(e.type.base_type),
BASE_TYPE_FLOAT);
// An integer constant in string.
TRY_ECHECK(false, kTokenStringOrIdent, IsInteger(e.type.base_type),
BASE_TYPE_INT);
// Unknown tokens will be interpreted as string type.
TRY_ECHECK(true, kTokenStringConstant, e.type.base_type == BASE_TYPE_STRING,
BASE_TYPE_STRING);
} else {
bool match = false;
ECHECK(TryTypedValue(name, kTokenIntegerConstant, IsScalar(e.type.base_type), e,
BASE_TYPE_INT, &match));
ECHECK(TryTypedValue(name, kTokenFloatConstant, IsFloat(e.type.base_type), e,
BASE_TYPE_FLOAT, &match));
ECHECK(TryTypedValue(name, kTokenStringConstant,
e.type.base_type == BASE_TYPE_STRING, e,
BASE_TYPE_STRING, &match));
auto istrue = IsIdent("true");
if (istrue || IsIdent("false")) {
attribute_ = NumToString(istrue);
ECHECK(TryTypedValue(name, kTokenIdentifier, IsBool(e.type.base_type), e,
BASE_TYPE_BOOL, &match));
// Try a float number.
TRY_ECHECK(false, kTokenFloatConstant, IsFloat(e.type.base_type),
BASE_TYPE_FLOAT);
// Integer token can init any scalar (integer of float).
TRY_ECHECK(true, kTokenIntegerConstant, IsScalar(e.type.base_type),
BASE_TYPE_INT);
}
#undef TRY_ECHECK
if (!match) return TokenError();
// The check_now flag must be true when parse a fbs-schema.
// This flag forces to check default scalar values or metadata of field.
// For JSON parser the flag should be false.
// If it is set for JSON each value will be checked twice (see ParseTable).
if (check_now && IsScalar(e.type.base_type)) {
// "re-pack" an integer scalar to remove any ambiguities like leading zeros
// which can be treated as octal-literal (idl_gen_cpp/GenDefaultConstant).
const auto repack = IsInteger(e.type.base_type);
switch (e.type.base_type) {
// clang-format off
#define FLATBUFFERS_TD(ENUM, IDLTYPE, \
CTYPE, JTYPE, GTYPE, NTYPE, PTYPE, RTYPE) \
case BASE_TYPE_ ## ENUM: {\
CTYPE val; \
ECHECK(atot(e.constant.c_str(), *this, &val)); \
if(repack) e.constant = NumToString(val); \
break; }
FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
#undef FLATBUFFERS_TD
default: break;
// clang-format on
}
if (!match) return TokenError();
}
return NoError();
}
@ -1565,7 +1648,7 @@ CheckedError Parser::ParseEnum(bool is_union, EnumDef **dest) {
}
if (Is('=')) {
NEXT();
ev.value = StringToInt(attribute_.c_str());
ECHECK(atot(attribute_.c_str(), *this, &ev.value));
EXPECT(kTokenIntegerConstant);
if (!opts.proto_mode && prevsize &&
enum_def->vals.vec[prevsize - 1]->value >= ev.value)
@ -2451,6 +2534,9 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths,
? file_identifier_.c_str()
: nullptr);
}
// Check that JSON file doesn't contain more objects or IDL directives.
// Comments after JSON are allowed.
EXPECT(kTokenEof);
} else if (IsIdent("enum")) {
ECHECK(ParseEnum(false, nullptr));
} else if (IsIdent("union")) {
@ -2606,7 +2692,9 @@ Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
return reflection::CreateField(
*builder, builder->CreateString(name), value.type.Serialize(builder), id,
value.offset,
// Is uint64>max(int64) tested?
IsInteger(value.type.base_type) ? StringToInt(value.constant.c_str()) : 0,
// result may be platform-dependent if underlying is float (not double)
IsFloat(value.type.base_type) ? strtod(value.constant.c_str(), nullptr)
: 0.0,
deprecated, required, key, SerializeAttributes(builder, parser),

View File

@ -1,4 +1,4 @@
/*
/*
* Copyright 2014 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cmath>
#include "flatbuffers/flatbuffers.h"
#include "flatbuffers/idl.h"
#include "flatbuffers/minireflect.h"
@ -35,11 +35,6 @@
#include "union_vector/union_vector_generated.h"
#include "test_assert.h"
// clang-format off
#ifndef FLATBUFFERS_CPP98_STL
#include <random>
#endif
#include "flatbuffers/flexbuffers.h"
using namespace MyGame::Example;
@ -1200,7 +1195,6 @@ void TestError_(const char *src, const char *error_substr, const char *file,
void ErrorTest() {
// In order they appear in idl_parser.cpp
TestError("table X { Y:byte; } root_type X; { Y: 999 }", "does not fit");
TestError(".0", "floating point");
TestError("\"\0", "illegal");
TestError("\"\\q", "escape code");
TestError("table ///", "documentation");
@ -1238,25 +1232,42 @@ void ErrorTest() {
TestError("table X { Y:int; } table X {", "datatype already");
TestError("struct X (force_align: 7) { Y:int; }", "force_align");
TestError("{}", "no root");
TestError("table X { Y:byte; } root_type X; { Y:1 } { Y:1 }", "one json");
TestError("table X { Y:byte; } root_type X; { Y:1 } { Y:1 }", "end of file");
TestError("table X { Y:byte; } root_type X; { Y:1 } table Y{ Z:int }",
"end of file");
TestError("root_type X;", "unknown root");
TestError("struct X { Y:int; } root_type X;", "a table");
TestError("union X { Y }", "referenced");
TestError("union Z { X } struct X { Y:int; }", "only tables");
TestError("table X { Y:[int]; YLength:int; }", "clash");
TestError("table X { Y:byte; } root_type X; { Y:1, Y:2 }", "more than once");
// float to integer conversion is forbidden
TestError("table X { Y:int; } root_type X; { Y:1.0 }", "float");
TestError("table X { Y:bool; } root_type X; { Y:1.0 }", "float");
}
template<typename T> T TestValue(const char *json, const char *type_name) {
flatbuffers::Parser parser;
parser.builder_.ForceDefaults(true); // return defaults
auto check_default = json ? false : true;
if (check_default) { parser.opts.output_default_scalars_in_json = true; }
// Simple schema.
TEST_EQ(parser.Parse(std::string("table X { Y:" + std::string(type_name) +
"; } root_type X;")
.c_str()),
true);
std::string schema =
"table X { Y:" + std::string(type_name) + "; } root_type X;";
TEST_EQ(parser.Parse(schema.c_str()), true);
auto done = parser.Parse(check_default ? "{}" : json);
TEST_EQ_STR(parser.error_.c_str(), "");
TEST_EQ(done, true);
// Check with print.
std::string print_back;
parser.opts.indent_step = -1;
TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), &print_back),
true);
// restore value from its default
if (check_default) { TEST_EQ(parser.Parse(print_back.c_str()), true); }
TEST_EQ(parser.Parse(json), true);
auto root = flatbuffers::GetRoot<flatbuffers::Table>(
parser.builder_.GetBufferPointer());
return root->GetField<T>(flatbuffers::FieldIndexToOffset(0), 0);
@ -1270,17 +1281,44 @@ void ValueTest() {
TEST_EQ(FloatCompare(TestValue<float>("{ Y:0.0314159e+2 }", "float"),
(float)3.14159),
true);
// number in string
TEST_EQ(FloatCompare(TestValue<float>("{ Y:\"0.0314159e+2\" }", "float"),
(float)3.14159),
true);
// Test conversion functions.
TEST_EQ(FloatCompare(TestValue<float>("{ Y:cos(rad(180)) }", "float"), -1),
true);
// int embedded to string
TEST_EQ(TestValue<int>("{ Y:\"-876\" }", "int=-123"), -876);
TEST_EQ(TestValue<int>("{ Y:\"876\" }", "int=-123"), 876);
// Test negative hex constant.
TEST_EQ(TestValue<int>("{ Y:-0x80 }", "int"), -128);
TEST_EQ(TestValue<int>("{ Y:-0x8ea0 }", "int=-0x8ea0"), -36512);
TEST_EQ(TestValue<int>(nullptr, "int=-0x8ea0"), -36512);
// positive hex constant
TEST_EQ(TestValue<int>("{ Y:0x1abcdef }", "int=0x1"), 0x1abcdef);
// with optional '+' sign
TEST_EQ(TestValue<int>("{ Y:+0x1abcdef }", "int=+0x1"), 0x1abcdef);
// hex in string
TEST_EQ(TestValue<int>("{ Y:\"0x1abcdef\" }", "int=+0x1"), 0x1abcdef);
// Make sure we do unsigned 64bit correctly.
TEST_EQ(TestValue<uint64_t>("{ Y:12335089644688340133 }", "ulong"),
12335089644688340133ULL);
// bool in string
TEST_EQ(TestValue<bool>("{ Y:\"false\" }", "bool=true"), false);
TEST_EQ(TestValue<bool>("{ Y:\"true\" }", "bool=\"true\""), true);
TEST_EQ(TestValue<bool>("{ Y:'false' }", "bool=true"), false);
TEST_EQ(TestValue<bool>("{ Y:'true' }", "bool=\"true\""), true);
// check comments before and after json object
TEST_EQ(TestValue<int>("/*before*/ { Y:1 } /*after*/", "int"), 1);
TEST_EQ(TestValue<int>("//before \n { Y:1 } //after", "int"), 1);
}
void NestedListTest() {
@ -1337,6 +1375,47 @@ void IntegerOutOfRangeTest() {
"constant does not fit");
TestError("table T { F:uint; } root_type T; { F:-1 }",
"constant does not fit");
// Check fixed width aliases
TestError("table X { Y:uint8; } root_type X; { Y: -1 }", "does not fit");
TestError("table X { Y:uint8; } root_type X; { Y: 256 }", "does not fit");
TestError("table X { Y:uint16; } root_type X; { Y: -1 }", "does not fit");
TestError("table X { Y:uint16; } root_type X; { Y: 65536 }", "does not fit");
TestError("table X { Y:uint32; } root_type X; { Y: -1 }", "");
TestError("table X { Y:uint32; } root_type X; { Y: 4294967296 }",
"does not fit");
TestError("table X { Y:uint64; } root_type X; { Y: -1 }", "");
TestError("table X { Y:uint64; } root_type X; { Y: -9223372036854775809 }",
"does not fit");
TestError("table X { Y:uint64; } root_type X; { Y: 18446744073709551616 }",
"does not fit");
TestError("table X { Y:int8; } root_type X; { Y: -129 }", "does not fit");
TestError("table X { Y:int8; } root_type X; { Y: 128 }", "does not fit");
TestError("table X { Y:int16; } root_type X; { Y: -32769 }", "does not fit");
TestError("table X { Y:int16; } root_type X; { Y: 32768 }", "does not fit");
TestError("table X { Y:int32; } root_type X; { Y: -2147483649 }", "");
TestError("table X { Y:int32; } root_type X; { Y: 2147483648 }",
"does not fit");
TestError("table X { Y:int64; } root_type X; { Y: -9223372036854775809 }",
"does not fit");
TestError("table X { Y:int64; } root_type X; { Y: 9223372036854775808 }",
"does not fit");
// check out-of-int64 as int8
TestError("table X { Y:int8; } root_type X; { Y: -9223372036854775809 }",
"does not fit");
TestError("table X { Y:int8; } root_type X; { Y: 9223372036854775808 }",
"does not fit");
// Check default values
TestError("table X { Y:int64=-9223372036854775809; } root_type X; {}",
"does not fit");
TestError("table X { Y:int64= 9223372036854775808; } root_type X; {}",
"does not fit");
TestError("table X { Y:uint64; } root_type X; { Y: -1 }", "");
TestError("table X { Y:uint64=-9223372036854775809; } root_type X; {}",
"does not fit");
TestError("table X { Y:uint64= 18446744073709551616; } root_type X; {}",
"does not fit");
}
void IntegerBoundaryTest() {
@ -1359,6 +1438,207 @@ void IntegerBoundaryTest() {
TEST_EQ(TestValue<uint64_t>("{ Y:18446744073709551615 }", "ulong"),
18446744073709551615U);
TEST_EQ(TestValue<uint64_t>("{ Y:0 }", "ulong"), 0);
TEST_EQ(TestValue<uint64_t>("{ Y: 18446744073709551615 }", "uint64"),
18446744073709551615ULL);
// check that the default works
TEST_EQ(TestValue<uint64_t>(nullptr, "uint64 = 18446744073709551615"),
18446744073709551615ULL);
}
void ValidFloatTest() {
const auto infinityf = flatbuffers::numeric_limits<float>::infinity();
const auto infinityd = flatbuffers::numeric_limits<double>::infinity();
// check rounding to infinity
TEST_EQ(TestValue<float>("{ Y:+3.4029e+38 }", "float"), +infinityf);
TEST_EQ(TestValue<float>("{ Y:-3.4029e+38 }", "float"), -infinityf);
TEST_EQ(TestValue<double>("{ Y:+1.7977e+308 }", "double"), +infinityd);
TEST_EQ(TestValue<double>("{ Y:-1.7977e+308 }", "double"), -infinityd);
TEST_EQ(FloatCompare(TestValue<float>("{ Y:0.0314159e+2 }", "float"),
(float)3.14159),
true);
// float in string
TEST_EQ(FloatCompare(TestValue<float>("{ Y:\" 0.0314159e+2 \" }", "float"),
(float)3.14159),
true);
TEST_EQ(TestValue<float>("{ Y:1 }", "float"), 1.0f);
TEST_EQ(TestValue<float>("{ Y:1.0 }", "float"), 1.0f);
TEST_EQ(TestValue<float>("{ Y:1. }", "float"), 1.0f);
TEST_EQ(TestValue<float>("{ Y:+1. }", "float"), 1.0f);
TEST_EQ(TestValue<float>("{ Y:-1. }", "float"), -1.0f);
TEST_EQ(TestValue<float>("{ Y:1.e0 }", "float"), 1.0f);
TEST_EQ(TestValue<float>("{ Y:1.e+0 }", "float"), 1.0f);
TEST_EQ(TestValue<float>("{ Y:1.e-0 }", "float"), 1.0f);
TEST_EQ(TestValue<float>("{ Y:0.125 }", "float"), 0.125f);
TEST_EQ(TestValue<float>("{ Y:.125 }", "float"), 0.125f);
TEST_EQ(TestValue<float>("{ Y:-.125 }", "float"), -0.125f);
TEST_EQ(TestValue<float>("{ Y:+.125 }", "float"), +0.125f);
TEST_EQ(TestValue<float>("{ Y:5 }", "float"), 5.0f);
TEST_EQ(TestValue<float>("{ Y:\"5\" }", "float"), 5.0f);
#if defined(FLATBUFFERS_HAS_NEW_STRTOD)
// Old MSVC versions may have problem with this check.
// https://www.exploringbinary.com/visual-c-plus-plus-strtod-still-broken/
TEST_EQ(TestValue<double>("{ Y:6.9294956446009195e15 }", "double"),
6929495644600920);
// check nan's
TEST_EQ(std::isnan(TestValue<double>("{ Y:nan }", "double")), true);
TEST_EQ(std::isnan(TestValue<float>("{ Y:nan }", "float")), true);
TEST_EQ(std::isnan(TestValue<float>("{ Y:\"nan\" }", "float")), true);
TEST_EQ(std::isnan(TestValue<float>("{ Y:+nan }", "float")), true);
TEST_EQ(std::isnan(TestValue<float>("{ Y:-nan }", "float")), true);
TEST_EQ(std::isnan(TestValue<float>(nullptr, "float=nan")), true);
TEST_EQ(std::isnan(TestValue<float>(nullptr, "float=-nan")), true);
// check inf
TEST_EQ(TestValue<float>("{ Y:inf }", "float"), infinityf);
TEST_EQ(TestValue<float>("{ Y:\"inf\" }", "float"), infinityf);
TEST_EQ(TestValue<float>("{ Y:+inf }", "float"), infinityf);
TEST_EQ(TestValue<float>("{ Y:-inf }", "float"), -infinityf);
TEST_EQ(TestValue<float>(nullptr, "float=inf"), infinityf);
TEST_EQ(TestValue<float>(nullptr, "float=-inf"), -infinityf);
TestValue<double>(
"{ Y : [0.2, .2, 1.0, -1.0, -2., 2., 1e0, -1e0, 1.0e0, -1.0e0, -3.e2, "
"3.0e2] }",
"[double]");
TestValue<float>(
"{ Y : [0.2, .2, 1.0, -1.0, -2., 2., 1e0, -1e0, 1.0e0, -1.0e0, -3.e2, "
"3.0e2] }",
"[float]");
// Test binary format of float point.
// https://en.cppreference.com/w/cpp/language/floating_literal
// 0x11.12p-1 = (1*16^1 + 2*16^0 + 3*16^-1 + 4*16^-2) * 2^-1 =
TEST_EQ(TestValue<double>("{ Y:0x12.34p-1 }", "double"), 9.1015625);
// hex fraction 1.2 (decimal 1.125) scaled by 2^3, that is 9.0
TEST_EQ(TestValue<float>("{ Y:-0x0.2p0 }", "float"), -0.125f);
TEST_EQ(TestValue<float>("{ Y:-0x.2p1 }", "float"), -0.25f);
TEST_EQ(TestValue<float>("{ Y:0x1.2p3 }", "float"), 9.0f);
TEST_EQ(TestValue<float>("{ Y:0x10.1p0 }", "float"), 16.0625f);
TEST_EQ(TestValue<double>("{ Y:0x1.2p3 }", "double"), 9.0);
TEST_EQ(TestValue<double>("{ Y:0x10.1p0 }", "double"), 16.0625);
TEST_EQ(TestValue<double>("{ Y:0xC.68p+2 }", "double"), 49.625);
TestValue<double>("{ Y : [0x20.4ep1, +0x20.4ep1, -0x20.4ep1] }", "[double]");
TestValue<float>("{ Y : [0x20.4ep1, +0x20.4ep1, -0x20.4ep1] }", "[float]");
#else // FLATBUFFERS_HAS_NEW_STRTOD
TEST_OUTPUT_LINE("FLATBUFFERS_HAS_NEW_STRTOD tests skipped");
#endif // FLATBUFFERS_HAS_NEW_STRTOD
}
void InvalidFloatTest() {
auto invalid_msg = "invalid number";
auto comma_msg = "expecting: ,";
TestError("table T { F:float; } root_type T; { F:1,0 }", "");
TestError("table T { F:float; } root_type T; { F:. }", "");
TestError("table T { F:float; } root_type T; { F:- }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:+ }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:-. }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:+. }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:.e }", "");
TestError("table T { F:float; } root_type T; { F:-e }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:+e }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:-.e }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:+.e }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:-e1 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:+e1 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.0e+ }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.0e- }", invalid_msg);
// exponent pP is mandatory for hex-float
TestError("table T { F:float; } root_type T; { F:0x0 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:-0x. }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x. }", invalid_msg);
// eE not exponent in hex-float!
TestError("table T { F:float; } root_type T; { F:0x0.0e+ }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0e- }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0p }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0p+ }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0p- }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0pa1 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0e+ }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0e- }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0e+0 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0e-0 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0ep+ }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:0x0.0ep- }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.2.3 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.2.e3 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.2e.3 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.2e0.3 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.2e3. }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.2e3.0 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:+-1.0 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.0e+-1 }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:\"1.0e+-1\" }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:1.e0e }", comma_msg);
TestError("table T { F:float; } root_type T; { F:0x1.p0e }", comma_msg);
TestError("table T { F:float; } root_type T; { F:\" 0x10 \" }", invalid_msg);
// floats in string
TestError("table T { F:float; } root_type T; { F:\"1,2.\" }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:\"1.2e3.\" }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:\"0x1.p0e\" }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:\"0x1.0\" }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:\" 0x1.0\" }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:\"+ 0\" }", invalid_msg);
// disable escapes for "number-in-string"
TestError("table T { F:float; } root_type T; { F:\"\\f1.2e3.\" }", "invalid");
TestError("table T { F:float; } root_type T; { F:\"\\t1.2e3.\" }", "invalid");
TestError("table T { F:float; } root_type T; { F:\"\\n1.2e3.\" }", "invalid");
TestError("table T { F:float; } root_type T; { F:\"\\r1.2e3.\" }", "invalid");
TestError("table T { F:float; } root_type T; { F:\"4\\x005\" }", "invalid");
TestError("table T { F:float; } root_type T; { F:\"\'12\'\" }", invalid_msg);
// null is not a number constant!
TestError("table T { F:float; } root_type T; { F:\"null\" }", invalid_msg);
TestError("table T { F:float; } root_type T; { F:null }", invalid_msg);
}
template<typename T>
void NumericUtilsTestInteger(const char *lower, const char *upper) {
T x;
TEST_EQ(flatbuffers::StringToNumber("1q", &x), false);
TEST_EQ(x, 0);
TEST_EQ(flatbuffers::StringToNumber(upper, &x), false);
TEST_EQ(x, flatbuffers::numeric_limits<T>::max());
TEST_EQ(flatbuffers::StringToNumber(lower, &x), false);
auto expval = flatbuffers::is_unsigned<T>::value
? flatbuffers::numeric_limits<T>::max()
: flatbuffers::numeric_limits<T>::lowest();
TEST_EQ(x, expval);
}
template<typename T>
void NumericUtilsTestFloat(const char *lower, const char *upper) {
T f;
TEST_EQ(flatbuffers::StringToNumber("1q", &f), false);
TEST_EQ(f, 0);
TEST_EQ(flatbuffers::StringToNumber(upper, &f), true);
TEST_EQ(f, +flatbuffers::numeric_limits<T>::infinity());
TEST_EQ(flatbuffers::StringToNumber(lower, &f), true);
TEST_EQ(f, -flatbuffers::numeric_limits<T>::infinity());
}
void NumericUtilsTest() {
NumericUtilsTestInteger<uint64_t>("-1", "18446744073709551616");
NumericUtilsTestInteger<uint8_t>("-1", "256");
NumericUtilsTestInteger<int64_t>("-9223372036854775809",
"9223372036854775808");
NumericUtilsTestInteger<int8_t>("-129", "128");
NumericUtilsTestFloat<float>("-3.4029e+38", "+3.4029e+38");
NumericUtilsTestFloat<float>("-1.7977e+308", "+1.7977e+308");
}
void IsAsciiUtilsTest() {
char c = -128;
for (int cnt = 0; cnt < 256; cnt++) {
auto alpha = (('a' <= c) && (c <= 'z')) || (('A' <= c) && (c <= 'Z'));
auto dec = (('0' <= c) && (c <= '9'));
auto hex = (('a' <= c) && (c <= 'f')) || (('A' <= c) && (c <= 'F'));
TEST_EQ(flatbuffers::is_alpha(c), alpha);
TEST_EQ(flatbuffers::is_alnum(c), alpha || dec);
TEST_EQ(flatbuffers::is_digit(c), dec);
TEST_EQ(flatbuffers::is_xdigit(c), dec || hex);
c += 1;
}
}
void UnicodeTest() {
@ -1572,6 +1852,15 @@ void InvalidUTF8Test() {
// U+10400 "encoded" as U+D801 U+DC00
"{ F:\"\xED\xA0\x81\xED\xB0\x80\"}",
"illegal UTF-8 sequence");
// Check independence of identifier from locale.
std::string locale_ident;
locale_ident += "table T { F";
locale_ident += static_cast<char>(-32); // unsigned 0xE0
locale_ident += " :string; }";
locale_ident += "root_type T;";
locale_ident += "{}";
TestError(locale_ident.c_str(), "");
}
void UnknownFieldsTest() {
@ -2100,13 +2389,14 @@ int FlatBufferTests() {
ParseProtoBufAsciiTest();
TypeAliasesTest();
EndianSwapTest();
JsonDefaultTest();
FlexBuffersTest();
UninitializedVectorTest();
EqualOperatorTest();
NumericUtilsTest();
IsAsciiUtilsTest();
ValidFloatTest();
InvalidFloatTest();
return 0;
}