From 9b034eee126f837e4cf2eabbeace87507337e4b0 Mon Sep 17 00:00:00 2001 From: Vladimir Glavnyy <31897320+vglavnyy@users.noreply.github.com> Date: Tue, 17 Mar 2020 01:59:34 +0700 Subject: [PATCH] Fix interpretation of 'nan(number)' by the idl_parser (#5810) * Parser reject "nan(n)" string as it does with nan(n) * Adjust scalar fuzzer to ignore '$schema' substrings - Scalar fuzzer ignores '$schema' substrings at the input - Added 'scalar_debug' target to simplify research of fuzzed cases * Improve formatting of './tests/fuzzer/CMakeLists.txt' --- include/flatbuffers/flatbuffers.h | 3 +- include/flatbuffers/idl.h | 1 + src/idl_parser.cpp | 195 ++++++++++++---------- tests/fuzzer/CMakeLists.txt | 183 ++++++++++++-------- tests/fuzzer/flatbuffers_parser_fuzzer.cc | 3 +- tests/fuzzer/flatbuffers_scalar_fuzzer.cc | 14 +- tests/fuzzer/scalar_debug.cpp | 28 ++++ tests/test.cpp | 3 + 8 files changed, 266 insertions(+), 164 deletions(-) create mode 100644 tests/fuzzer/scalar_debug.cpp diff --git a/include/flatbuffers/flatbuffers.h b/include/flatbuffers/flatbuffers.h index c4dc5bcd0..6ab60ee30 100644 --- a/include/flatbuffers/flatbuffers.h +++ b/include/flatbuffers/flatbuffers.h @@ -1795,8 +1795,7 @@ class FlatBufferBuilder { return a.KeyCompareLessThan(&b); } - private: - StructKeyComparator &operator=(const StructKeyComparator &); + FLATBUFFERS_DELETE_FUNC(StructKeyComparator &operator=(const StructKeyComparator &)) }; /// @endcond diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h index 12b2b142f..eff2058bb 100644 --- a/include/flatbuffers/idl.h +++ b/include/flatbuffers/idl.h @@ -891,6 +891,7 @@ class Parser : public ParserState { FLATBUFFERS_CHECKED_ERROR TokenError(); FLATBUFFERS_CHECKED_ERROR ParseSingleValue(const std::string *name, Value &e, bool check_now); + FLATBUFFERS_CHECKED_ERROR ParseFunction(const std::string *name, Value &e); FLATBUFFERS_CHECKED_ERROR ParseEnumFromString(const Type &type, std::string *result); StructDef *LookupCreateStruct(const std::string &name, diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp index 28ff3ec05..76c56de4e 100644 --- a/src/idl_parser.cpp +++ b/src/idl_parser.cpp @@ -436,7 +436,7 @@ CheckedError Parser::Next() { default: const auto has_sign = (c == '+') || (c == '-'); // '-'/'+' and following identifier - can be a predefined constant like: - // NAN, INF, PI, etc. + // NAN, INF, PI, etc or it can be a function name like cos/sin/deg. if (IsIdentifierStart(c) || (has_sign && IsIdentifierStart(*cursor_))) { // Collect all chars of an identifier: const char *start = cursor_ - 1; @@ -1511,45 +1511,6 @@ CheckedError Parser::ParseMetaData(SymbolTable *attributes) { return NoError(); } -CheckedError Parser::TryTypedValue(const std::string *name, int dtoken, - bool check, Value &e, BaseType req, - bool *destmatch) { - bool match = dtoken == token_; - if (match) { - FLATBUFFERS_ASSERT(*destmatch == false); - *destmatch = true; - e.constant = attribute_; - // Check token match - if (!check) { - if (e.type.base_type == BASE_TYPE_NONE) { - e.type.base_type = req; - } else { - return Error( - std::string("type mismatch: expecting: ") + - kTypeNames[e.type.base_type] + ", found: " + kTypeNames[req] + - ", name: " + (name ? *name : "") + ", value: " + e.constant); - } - } - // The exponent suffix of hexadecimal float-point number is mandatory. - // A hex-integer constant is forbidden as an initializer of float number. - if ((kTokenFloatConstant != dtoken) && IsFloat(e.type.base_type)) { - const auto &s = e.constant; - const auto k = s.find_first_of("0123456789."); - if ((std::string::npos != k) && (s.length() > (k + 1)) && - (s[k] == '0' && is_alpha_char(s[k + 1], 'X')) && - (std::string::npos == s.find_first_of("pP", k + 2))) { - return Error( - "invalid number, the exponent suffix of hexadecimal " - "floating-point literals is mandatory: \"" + - s + "\""); - } - } - - NEXT(); - } - return NoError(); -} - CheckedError Parser::ParseEnumFromString(const Type &type, std::string *result) { const auto base_type = @@ -1638,7 +1599,8 @@ template inline void SingleValueRepack(Value &e, T val) { if (IsInteger(e.type.base_type)) { e.constant = NumToString(val); } } #if defined(FLATBUFFERS_HAS_NEW_STRTOD) && (FLATBUFFERS_HAS_NEW_STRTOD > 0) -// Normilaze defaults NaN to unsigned quiet-NaN(0). +// Normalize defaults NaN to unsigned quiet-NaN(0) if value was parsed from +// hex-float literal. static inline void SingleValueRepack(Value &e, float val) { if (val != val) e.constant = "nan"; } @@ -1647,52 +1609,98 @@ static inline void SingleValueRepack(Value &e, double val) { } #endif -CheckedError Parser::ParseSingleValue(const std::string *name, Value &e, - bool check_now) { - // First see if this could be a conversion function: - if (token_ == kTokenIdentifier && *cursor_ == '(') { - // todo: Extract processing of conversion functions to ParseFunction. - const auto functionname = attribute_; - if (!IsFloat(e.type.base_type)) { - return Error(functionname + ": type of argument mismatch, expecting: " + - kTypeNames[BASE_TYPE_DOUBLE] + - ", found: " + kTypeNames[e.type.base_type] + - ", name: " + (name ? *name : "") + ", value: " + e.constant); +CheckedError Parser::ParseFunction(const std::string *name, Value &e) { + // Copy name, attribute will be changed on NEXT(). + const auto functionname = attribute_; + if (!IsFloat(e.type.base_type)) { + return Error(functionname + ": type of argument mismatch, expecting: " + + kTypeNames[BASE_TYPE_DOUBLE] + + ", found: " + kTypeNames[e.type.base_type] + + ", name: " + (name ? *name : "") + ", value: " + e.constant); + } + NEXT(); + EXPECT('('); + ECHECK(Recurse([&]() { return ParseSingleValue(name, e, false); })); + EXPECT(')'); + // calculate with double precision + double x, y = 0.0; + ECHECK(atot(e.constant.c_str(), *this, &x)); + // clang-format off + auto func_match = false; + #define FLATBUFFERS_FN_DOUBLE(name, op) \ + if (!func_match && functionname == name) { y = op; func_match = true; } + FLATBUFFERS_FN_DOUBLE("deg", x / kPi * 180); + FLATBUFFERS_FN_DOUBLE("rad", x * kPi / 180); + FLATBUFFERS_FN_DOUBLE("sin", sin(x)); + FLATBUFFERS_FN_DOUBLE("cos", cos(x)); + FLATBUFFERS_FN_DOUBLE("tan", tan(x)); + FLATBUFFERS_FN_DOUBLE("asin", asin(x)); + FLATBUFFERS_FN_DOUBLE("acos", acos(x)); + FLATBUFFERS_FN_DOUBLE("atan", atan(x)); + // TODO(wvo): add more useful conversion functions here. + #undef FLATBUFFERS_FN_DOUBLE + // clang-format on + if (true != func_match) { + return Error(std::string("Unknown conversion function: ") + functionname + + ", field name: " + (name ? *name : "") + + ", value: " + e.constant); + } + e.constant = NumToString(y); + return NoError(); +} + +CheckedError Parser::TryTypedValue(const std::string *name, int dtoken, + bool check, Value &e, BaseType req, + bool *destmatch) { + bool match = dtoken == token_; + if (match) { + FLATBUFFERS_ASSERT(*destmatch == false); + *destmatch = true; + e.constant = attribute_; + // Check token match + if (!check) { + if (e.type.base_type == BASE_TYPE_NONE) { + e.type.base_type = req; + } else { + return Error( + std::string("type mismatch: expecting: ") + + kTypeNames[e.type.base_type] + ", found: " + kTypeNames[req] + + ", name: " + (name ? *name : "") + ", value: " + e.constant); + } + } + // The exponent suffix of hexadecimal float-point number is mandatory. + // A hex-integer constant is forbidden as an initializer of float number. + if ((kTokenFloatConstant != dtoken) && IsFloat(e.type.base_type)) { + const auto &s = e.constant; + const auto k = s.find_first_of("0123456789."); + if ((std::string::npos != k) && (s.length() > (k + 1)) && + (s[k] == '0' && is_alpha_char(s[k + 1], 'X')) && + (std::string::npos == s.find_first_of("pP", k + 2))) { + return Error( + "invalid number, the exponent suffix of hexadecimal " + "floating-point literals is mandatory: \"" + + s + "\""); + } } NEXT(); - EXPECT('('); - ECHECK(Recurse([&]() { return ParseSingleValue(name, e, false); })); - EXPECT(')'); - // calculate with double precision - double x, y = 0.0; - ECHECK(atot(e.constant.c_str(), *this, &x)); - auto func_match = false; - // clang-format off - #define FLATBUFFERS_FN_DOUBLE(name, op) \ - if (!func_match && functionname == name) { y = op; func_match = true; } - FLATBUFFERS_FN_DOUBLE("deg", x / kPi * 180); - FLATBUFFERS_FN_DOUBLE("rad", x * kPi / 180); - FLATBUFFERS_FN_DOUBLE("sin", sin(x)); - FLATBUFFERS_FN_DOUBLE("cos", cos(x)); - FLATBUFFERS_FN_DOUBLE("tan", tan(x)); - FLATBUFFERS_FN_DOUBLE("asin", asin(x)); - FLATBUFFERS_FN_DOUBLE("acos", acos(x)); - FLATBUFFERS_FN_DOUBLE("atan", atan(x)); - // TODO(wvo): add more useful conversion functions here. - #undef FLATBUFFERS_FN_DOUBLE - // clang-format on - if (true != func_match) { - return Error(std::string("Unknown conversion function: ") + functionname + - ", field name: " + (name ? *name : "") + - ", value: " + e.constant); - } - e.constant = NumToString(y); - return NoError(); + } + return NoError(); +} + +CheckedError Parser::ParseSingleValue(const std::string *name, Value &e, + bool check_now) { + const auto in_type = e.type.base_type; + const auto is_tok_ident = (token_ == kTokenIdentifier); + const auto is_tok_string = (token_ == kTokenStringConstant); + + // First see if this could be a conversion function: + if (is_tok_ident && *cursor_ == '(') { + return ParseFunction(name, e); } - auto match = false; - const auto in_type = e.type.base_type; // clang-format off + auto match = false; + #define IF_ECHECK_(force, dtoken, check, req) \ if (!match && ((check) || IsConstTrue(force))) \ ECHECK(TryTypedValue(name, dtoken, check, e, req, &match)) @@ -1700,14 +1708,14 @@ CheckedError Parser::ParseSingleValue(const std::string *name, Value &e, #define FORCE_ECHECK(dtoken, check, req) IF_ECHECK_(true, dtoken, check, req) // clang-format on - if (token_ == kTokenStringConstant || token_ == kTokenIdentifier) { + if (is_tok_ident || is_tok_string) { const auto kTokenStringOrIdent = token_; // The string type is a most probable type, check it first. TRY_ECHECK(kTokenStringConstant, in_type == BASE_TYPE_STRING, BASE_TYPE_STRING); // avoid escaped and non-ascii in the string - if (!match && (token_ == kTokenStringConstant) && IsScalar(in_type) && + if (!match && is_tok_string && IsScalar(in_type) && !attr_is_trivial_ascii_string_) { return Error( std::string("type mismatch or invalid value, an initializer of " @@ -1735,11 +1743,20 @@ CheckedError Parser::ParseSingleValue(const std::string *name, Value &e, } // Parse a float/integer number from the string. if (!match) check_now = true; // Re-pack if parsed from string literal. - if (!match && (token_ == kTokenStringConstant) && IsScalar(in_type)) { - // remove trailing whitespaces from attribute_ - auto last = attribute_.find_last_not_of(' '); - if (std::string::npos != last) // has non-whitespace - attribute_.resize(last + 1); + // A "scalar-in-string" value needs extra checks. + if (!match && is_tok_string && IsScalar(in_type)) { + // Strip trailing whitespaces from attribute_. + auto last_non_ws = attribute_.find_last_not_of(' '); + if (std::string::npos != last_non_ws) + attribute_.resize(last_non_ws + 1); + if (IsFloat(e.type.base_type)) { + // The functions strtod() and strtof() accept both 'nan' and + // 'nan(number)' literals. While 'nan(number)' is rejected by the parser + // as an unsupported function if is_tok_ident is true. + if (attribute_.find_last_of(')') != std::string::npos) { + return Error("invalid number: " + attribute_); + } + } } // Float numbers or nan, inf, pi, etc. TRY_ECHECK(kTokenStringOrIdent, IsFloat(in_type), BASE_TYPE_FLOAT); diff --git a/tests/fuzzer/CMakeLists.txt b/tests/fuzzer/CMakeLists.txt index de1626f38..88366720a 100644 --- a/tests/fuzzer/CMakeLists.txt +++ b/tests/fuzzer/CMakeLists.txt @@ -1,89 +1,138 @@ cmake_minimum_required(VERSION 3.9) set(CMAKE_VERBOSE_MAKEFILE ON) - set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) project(FlatBuffersFuzzerTests) -set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -std=c++14 -Wall -pedantic -Werror -Wextra -Wno-unused-parameter -fsigned-char") - -set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -g -fsigned-char -fno-omit-frame-pointer") - -# Typical slowdown introduced by MemorySanitizer (memory) is 3x. -# '-fsanitize=address' not allowed with '-fsanitize=memory' -if(YES) - set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer,address,undefined") -else() - set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer,memory,undefined -fsanitize-memory-track-origins=2") -endif() - -set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fsanitize-coverage=edge,trace-cmp") - -# enable link-time optimisation -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto") - -# https://llvm.org/docs/Passes.html -# save IR to see call graph -# make one bitcode file:> llvm-link *.bc -o out.bc -# print call-graph:> opt out.bc -analyze -print-callgraph &> callgraph.txt -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -save-temps -flto") - -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") - -set(FLATBUFFERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../") - -set(FlatBuffers_Library_SRCS - ${FLATBUFFERS_DIR}/include/flatbuffers/base.h - ${FLATBUFFERS_DIR}/include/flatbuffers/flatbuffers.h - ${FLATBUFFERS_DIR}/include/flatbuffers/hash.h - ${FLATBUFFERS_DIR}/include/flatbuffers/idl.h - ${FLATBUFFERS_DIR}/include/flatbuffers/util.h - ${FLATBUFFERS_DIR}/include/flatbuffers/reflection.h - ${FLATBUFFERS_DIR}/include/flatbuffers/reflection_generated.h - ${FLATBUFFERS_DIR}/include/flatbuffers/stl_emulation.h - ${FLATBUFFERS_DIR}/include/flatbuffers/flexbuffers.h - ${FLATBUFFERS_DIR}/include/flatbuffers/registry.h - ${FLATBUFFERS_DIR}/include/flatbuffers/minireflect.h - ${FLATBUFFERS_DIR}/src/idl_parser.cpp - ${FLATBUFFERS_DIR}/src/idl_gen_text.cpp - ${FLATBUFFERS_DIR}/src/reflection.cpp - ${FLATBUFFERS_DIR}/src/util.cpp - ${FLATBUFFERS_DIR}/tests/test_assert.cpp -) - -include_directories(${FLATBUFFERS_DIR}/include) -include_directories(${FLATBUFFERS_DIR}/tests) -add_library(flatbuffers STATIC ${FlatBuffers_Library_SRCS}) - -# FLATBUFFERS_ASSERT should assert in Release as well. -# Redefine FLATBUFFERS_ASSERT macro definition. -# Declare as PUBLIC to cover asserts in all included header files. -target_compile_definitions(flatbuffers PUBLIC - FLATBUFFERS_ASSERT=fuzzer_assert_impl) -target_compile_definitions(flatbuffers PUBLIC - FLATBUFFERS_ASSERT_INCLUDE="${CMAKE_CURRENT_SOURCE_DIR}/fuzzer_assert.h") +option(BUILD_DEBUGGER "Compile a debugger with main() and without libFuzzer" OFF) if(NOT DEFINED FLATBUFFERS_MAX_PARSING_DEPTH) # Force checking of RecursionError in the test set(FLATBUFFERS_MAX_PARSING_DEPTH 8) endif() message(STATUS "FLATBUFFERS_MAX_PARSING_DEPTH: ${FLATBUFFERS_MAX_PARSING_DEPTH}") -target_compile_definitions(flatbuffers PRIVATE FLATBUFFERS_MAX_PARSING_DEPTH=8) + +# Usage '-fsanitize=address' doesn't allowed with '-fsanitize=memory'. +# MemorySanitizer will not work out-of-the-box, and will instead report false +# positives coming from uninstrumented code. Need to re-build both C++ standard +# library: https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo +option(USE_MSAN "Use MSAN instead of ASASN" OFF) + +# Use Clang linker. +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") + +# add_link_options(-stdlib=libc++) + +add_compile_options( + # -stdlib=libc++ # Use Clang libc++ instead of GNU. + -std=c++14 + -Wall + -pedantic + -Werror + -Wextra + -Wno-unused-parameter + -fsigned-char + -fno-omit-frame-pointer + -g # Generate source-level debug information + # -flto # enable link-time optimisation +) + +# https://llvm.org/docs/Passes.html save IR to see call graph make one bitcode +# file:> llvm-link *.bc -o out.bc print call-graph:> opt out.bc -analyze -print- +# callgraph &> callgraph.txt set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -save-temps +# -flto") + +# A special target with fuzzer+sanitizer flags. +add_library(fuzzer_config INTERFACE) + +target_compile_options( + fuzzer_config + INTERFACE + -fsanitize-coverage=edge,trace-cmp + $<$: + -fsanitize=fuzzer,undefined,address + > + $<$: + -fsanitize=fuzzer,undefined,memory + -fsanitize-memory-track-origins=2 + > +) + +target_link_libraries( + fuzzer_config + INTERFACE + $<$: + -fsanitize=fuzzer,undefined,address + > + $<$: + -fsanitize=fuzzer,undefined,memory + > +) + +set(FLATBUFFERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../") + +set(FlatBuffers_Library_SRCS + ${FLATBUFFERS_DIR}/include/flatbuffers/base.h + ${FLATBUFFERS_DIR}/include/flatbuffers/flatbuffers.h + ${FLATBUFFERS_DIR}/include/flatbuffers/hash.h + ${FLATBUFFERS_DIR}/include/flatbuffers/idl.h + ${FLATBUFFERS_DIR}/include/flatbuffers/util.h + ${FLATBUFFERS_DIR}/include/flatbuffers/reflection.h + ${FLATBUFFERS_DIR}/include/flatbuffers/reflection_generated.h + ${FLATBUFFERS_DIR}/include/flatbuffers/stl_emulation.h + ${FLATBUFFERS_DIR}/include/flatbuffers/flexbuffers.h + ${FLATBUFFERS_DIR}/include/flatbuffers/registry.h + ${FLATBUFFERS_DIR}/include/flatbuffers/minireflect.h + ${FLATBUFFERS_DIR}/src/idl_parser.cpp + ${FLATBUFFERS_DIR}/src/idl_gen_text.cpp + ${FLATBUFFERS_DIR}/src/reflection.cpp + ${FLATBUFFERS_DIR}/src/util.cpp + ${FLATBUFFERS_DIR}/tests/test_assert.cpp +) + +include_directories(${FLATBUFFERS_DIR}/include) +include_directories(${FLATBUFFERS_DIR}/tests) + +add_library(flatbuffers_fuzzed STATIC ${FlatBuffers_Library_SRCS}) +# Use PUBLIC to force 'fuzzer_config' for all dependent targets +target_link_libraries(flatbuffers_fuzzed PUBLIC fuzzer_config) + +# FLATBUFFERS_ASSERT should assert in Release as well. Redefine +# FLATBUFFERS_ASSERT macro definition. Declare as PUBLIC to cover asserts in all +# included header files. +target_compile_definitions( + flatbuffers_fuzzed + PUBLIC + FLATBUFFERS_ASSERT=fuzzer_assert_impl + FLATBUFFERS_ASSERT_INCLUDE="${CMAKE_CURRENT_SOURCE_DIR}/fuzzer_assert.h" + PRIVATE + FLATBUFFERS_MAX_PARSING_DEPTH=${FLATBUFFERS_MAX_PARSING_DEPTH} +) # Setup fuzzer tests. add_executable(scalar_fuzzer flatbuffers_scalar_fuzzer.cc) -target_link_libraries(scalar_fuzzer PRIVATE flatbuffers) +target_link_libraries(scalar_fuzzer PRIVATE flatbuffers_fuzzed) add_executable(parser_fuzzer flatbuffers_parser_fuzzer.cc) -target_link_libraries(parser_fuzzer PRIVATE flatbuffers) +target_link_libraries(parser_fuzzer PRIVATE flatbuffers_fuzzed) add_executable(verifier_fuzzer flatbuffers_verifier_fuzzer.cc) -target_link_libraries(verifier_fuzzer PRIVATE flatbuffers) +target_link_libraries(verifier_fuzzer PRIVATE flatbuffers_fuzzed) + +# Build debugger for weird cases found with fuzzer. +if(BUILD_DEBUGGER) + add_library(flatbuffers_nonfuzz STATIC ${FlatBuffers_Library_SRCS}) + target_compile_definitions( + flatbuffers_nonfuzz + PUBLIC + FLATBUFFERS_ASSERT=fuzzer_assert_impl + FLATBUFFERS_ASSERT_INCLUDE="${CMAKE_CURRENT_SOURCE_DIR}/fuzzer_assert.h" + PRIVATE + FLATBUFFERS_MAX_PARSING_DEPTH=${FLATBUFFERS_MAX_PARSING_DEPTH} + ) + add_executable(scalar_debug flatbuffers_scalar_fuzzer.cc scalar_debug.cpp) + target_link_libraries(scalar_debug PRIVATE flatbuffers_nonfuzz) +endif(BUILD_DEBUGGER) diff --git a/tests/fuzzer/flatbuffers_parser_fuzzer.cc b/tests/fuzzer/flatbuffers_parser_fuzzer.cc index 87dd2d251..6646cd6d0 100644 --- a/tests/fuzzer/flatbuffers_parser_fuzzer.cc +++ b/tests/fuzzer/flatbuffers_parser_fuzzer.cc @@ -26,7 +26,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size < 3) return 0; const uint8_t flags = data[0]; // normalize to ascii alphabet - const int extra_rep_number = data[1] >= '0' ? (data[1] - '0') : 0; + const int extra_rep_number = + std::max(5, (data[1] < '0' ? (data[1] - '0') : 0)); data += 2; size -= 2; // bypass diff --git a/tests/fuzzer/flatbuffers_scalar_fuzzer.cc b/tests/fuzzer/flatbuffers_scalar_fuzzer.cc index 074a48885..ea8878a3d 100644 --- a/tests/fuzzer/flatbuffers_scalar_fuzzer.cc +++ b/tests/fuzzer/flatbuffers_scalar_fuzzer.cc @@ -101,8 +101,8 @@ class IntegerRegex : public RegexMatcher { static const std::vector re_list = { std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize }, - std::regex{ - R"(^[-+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize } + std::regex{ R"(^[-+]?0[xX][0-9a-fA-F]+$)", + std::regex_constants::optimize } }; return MatchRegexList(input, re_list); } @@ -117,8 +117,8 @@ class UIntegerRegex : public RegexMatcher { bool MatchNumber(const std::string &input) const override { static const std::vector re_list = { std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize }, - std::regex{ - R"(^[+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize }, + std::regex{ R"(^[+]?0[xX][0-9a-fA-F]+$)", + std::regex_constants::optimize }, // accept -0 number std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize } }; @@ -216,7 +216,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size < 3) return 0; const uint8_t flags = data[0]; // normalize to ascii alphabet - const int extra_rep_number = data[1] >= '0' ? (data[1] - '0') : 0; + const int extra_rep_number = + std::max(5, (data[1] < '0' ? (data[1] - '0') : 0)); data += 2; size -= 2; // bypass @@ -232,6 +233,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { // We reject this by transform "/* text */ 12345" to "@* text */ 12345". BreakSequence(input, "//", '@'); // "//" -> "@/" BreakSequence(input, "/*", '@'); // "/*" -> "@*" + // { "$schema: "text" } is exceptional case. + // This key:value ignored by the parser. Numbers can not have $. + BreakSequence(input, "$schema", '@'); // "$schema" -> "@schema" // Break all known scalar functions (todo: add them to regex?): for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) { BreakSequence(input, f, '_'); // ident -> ident diff --git a/tests/fuzzer/scalar_debug.cpp b/tests/fuzzer/scalar_debug.cpp new file mode 100644 index 000000000..9ce9e5e18 --- /dev/null +++ b/tests/fuzzer/scalar_debug.cpp @@ -0,0 +1,28 @@ +#include +#include "flatbuffers/util.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); + +int main(int argc, char *argv[]) { + if (argc < 2) { + std::cerr << "Usage: scalar_debug \n"; + return 0; + } + std::string crash_file_name(argv[1]); + std::string crash_file_data; + auto done = + flatbuffers::LoadFile(crash_file_name.c_str(), true, &crash_file_data); + if (!done) { + std::cerr << "Can not load file: '" << crash_file_name << "'"; + return -1; + } + if (crash_file_data.size() < 3) { + std::cerr << "Invalid file data: '" << crash_file_data << "'"; + return -2; + } + auto rc = LLVMFuzzerTestOneInput( + reinterpret_cast(crash_file_data.data()), + crash_file_data.size()); + std::cout << "LLVMFuzzerTestOneInput finished with code " << rc; + return rc; +} diff --git a/tests/test.cpp b/tests/test.cpp index 13bd66d5e..408194482 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -2019,6 +2019,9 @@ void InvalidFloatTest() { TestError("table T { F:float; } root_type T; { F:0x0 }", invalid_msg); TestError("table T { F:float; } root_type T; { F:-0x. }", invalid_msg); TestError("table T { F:float; } root_type T; { F:0x. }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:0Xe }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:\"0Xe\" }", invalid_msg); + TestError("table T { F:float; } root_type T; { F:\"nan(1)\" }", invalid_msg); // eE not exponent in hex-float! TestError("table T { F:float; } root_type T; { F:0x0.0e+ }", invalid_msg); TestError("table T { F:float; } root_type T; { F:0x0.0e- }", invalid_msg);