377 lines
13 KiB
C++
377 lines
13 KiB
C++
/*
|
|
* Copyright 2014 Google Inc. All rights reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
#include <algorithm>
|
|
#include <clocale>
|
|
#include <memory>
|
|
#include <regex>
|
|
#include <string>
|
|
|
|
#include "flatbuffers/idl.h"
|
|
#include "test_init.h"
|
|
|
|
static constexpr size_t kMinInputLength = 1;
|
|
static constexpr size_t kMaxInputLength = 3000;
|
|
|
|
static constexpr uint8_t flags_scalar_type = 0x0F; // type of scalar value
|
|
static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or '
|
|
// reserved for future: json {named} or [unnamed]
|
|
// static constexpr uint8_t flags_json_bracer = 0x20;
|
|
|
|
// Find all 'subj' sub-strings and replace first character of sub-string.
|
|
// BreakSequence("testest","tes", 'X') -> "XesXest".
|
|
// BreakSequence("xxx","xx", 'Y') -> "YYx".
|
|
static void BreakSequence(std::string &s, const char *subj, char repl) {
|
|
size_t pos = 0;
|
|
while (pos = s.find(subj, pos), pos != std::string::npos) {
|
|
s.at(pos) = repl;
|
|
pos++;
|
|
}
|
|
}
|
|
|
|
// Remove all leading and trailing symbols matched with pattern set.
|
|
// StripString("xy{xy}y", "xy") -> "{xy}"
|
|
static std::string StripString(const std::string &s, const char *pattern,
|
|
size_t *pos = nullptr) {
|
|
if (pos) *pos = 0;
|
|
// leading
|
|
auto first = s.find_first_not_of(pattern);
|
|
if (std::string::npos == first) return "";
|
|
if (pos) *pos = first;
|
|
// trailing
|
|
auto last = s.find_last_not_of(pattern);
|
|
assert(last < s.length());
|
|
assert(first <= last);
|
|
return s.substr(first, last - first + 1);
|
|
}
|
|
|
|
class RegexMatcher {
|
|
protected:
|
|
virtual bool MatchNumber(const std::string &input) const = 0;
|
|
|
|
public:
|
|
virtual ~RegexMatcher() = default;
|
|
|
|
struct MatchResult {
|
|
size_t pos{ 0 };
|
|
size_t len{ 0 };
|
|
bool res{ false };
|
|
bool quoted{ false };
|
|
};
|
|
|
|
MatchResult Match(const std::string &input) const {
|
|
MatchResult r;
|
|
// strip leading and trailing "spaces" accepted by flatbuffer
|
|
auto test = StripString(input, "\t\r\n ", &r.pos);
|
|
r.len = test.size();
|
|
// check quotes
|
|
if (test.size() >= 2) {
|
|
auto fch = test.front();
|
|
auto lch = test.back();
|
|
r.quoted = (fch == lch) && (fch == '\'' || fch == '\"');
|
|
if (r.quoted) {
|
|
// remove quotes for regex test
|
|
test = test.substr(1, test.size() - 2);
|
|
}
|
|
}
|
|
// Fast check:
|
|
if (test.empty()) return r;
|
|
// A string with a valid scalar shouldn't have non-ascii or non-printable
|
|
// symbols.
|
|
for (auto c : test) {
|
|
if ((c < ' ') || (c > '~')) return r;
|
|
}
|
|
// Check with regex
|
|
r.res = MatchNumber(test);
|
|
return r;
|
|
}
|
|
|
|
bool MatchRegexList(const std::string &input,
|
|
const std::vector<std::regex> &re_list) const {
|
|
auto str = StripString(input, " ");
|
|
if (str.empty()) return false;
|
|
for (auto &re : re_list) {
|
|
std::smatch match;
|
|
if (std::regex_match(str, match, re)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
};
|
|
|
|
class IntegerRegex : public RegexMatcher {
|
|
protected:
|
|
bool MatchNumber(const std::string &input) const override {
|
|
static const std::vector<std::regex> re_list = {
|
|
std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize },
|
|
|
|
std::regex{ R"(^[-+]?0[xX][0-9a-fA-F]+$)",
|
|
std::regex_constants::optimize }
|
|
};
|
|
return MatchRegexList(input, re_list);
|
|
}
|
|
|
|
public:
|
|
IntegerRegex() = default;
|
|
virtual ~IntegerRegex() = default;
|
|
};
|
|
|
|
class UIntegerRegex : public RegexMatcher {
|
|
protected:
|
|
bool MatchNumber(const std::string &input) const override {
|
|
static const std::vector<std::regex> re_list = {
|
|
std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize },
|
|
std::regex{ R"(^[+]?0[xX][0-9a-fA-F]+$)",
|
|
std::regex_constants::optimize },
|
|
// accept -0 number
|
|
std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize }
|
|
};
|
|
return MatchRegexList(input, re_list);
|
|
}
|
|
|
|
public:
|
|
UIntegerRegex() = default;
|
|
virtual ~UIntegerRegex() = default;
|
|
};
|
|
|
|
class BooleanRegex : public IntegerRegex {
|
|
protected:
|
|
bool MatchNumber(const std::string &input) const override {
|
|
if (input == "true" || input == "false") return true;
|
|
return IntegerRegex::MatchNumber(input);
|
|
}
|
|
|
|
public:
|
|
BooleanRegex() = default;
|
|
virtual ~BooleanRegex() = default;
|
|
};
|
|
|
|
class FloatRegex : public RegexMatcher {
|
|
protected:
|
|
bool MatchNumber(const std::string &input) const override {
|
|
static const std::vector<std::regex> re_list = {
|
|
// hex-float
|
|
std::regex{
|
|
R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)",
|
|
std::regex_constants::optimize },
|
|
// dec-float
|
|
std::regex{
|
|
R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)",
|
|
std::regex_constants::optimize },
|
|
|
|
std::regex{ R"(^[-+]?(?:nan|inf|infinity)$)",
|
|
std::regex_constants::optimize | std::regex_constants::icase }
|
|
};
|
|
return MatchRegexList(input, re_list);
|
|
}
|
|
|
|
public:
|
|
FloatRegex() = default;
|
|
virtual ~FloatRegex() = default;
|
|
};
|
|
|
|
class ScalarReferenceResult {
|
|
private:
|
|
ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched)
|
|
: type(_type), matched(_matched) {}
|
|
|
|
public:
|
|
// Decode scalar type and check if the input string satisfies the scalar type.
|
|
static ScalarReferenceResult Check(uint8_t code, const std::string &input) {
|
|
switch (code) {
|
|
case 0x0: return { "double", FloatRegex().Match(input) };
|
|
case 0x1: return { "float", FloatRegex().Match(input) };
|
|
case 0x2: return { "int8", IntegerRegex().Match(input) };
|
|
case 0x3: return { "int16", IntegerRegex().Match(input) };
|
|
case 0x4: return { "int32", IntegerRegex().Match(input) };
|
|
case 0x5: return { "int64", IntegerRegex().Match(input) };
|
|
case 0x6: return { "uint8", UIntegerRegex().Match(input) };
|
|
case 0x7: return { "uint16", UIntegerRegex().Match(input) };
|
|
case 0x8: return { "uint32", UIntegerRegex().Match(input) };
|
|
case 0x9: return { "uint64", UIntegerRegex().Match(input) };
|
|
case 0xA: return { "bool", BooleanRegex().Match(input) };
|
|
default: return { "float", FloatRegex().Match(input) };
|
|
};
|
|
}
|
|
|
|
const char *type;
|
|
const RegexMatcher::MatchResult matched;
|
|
};
|
|
|
|
bool Parse(flatbuffers::Parser &parser, const std::string &json,
|
|
std::string *_text) {
|
|
auto done = parser.ParseJson(json.c_str());
|
|
if (done) {
|
|
TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text),
|
|
true);
|
|
} else {
|
|
*_text = parser.error_;
|
|
}
|
|
return done;
|
|
}
|
|
|
|
// Utility for test run.
|
|
OneTimeTestInit OneTimeTestInit::one_time_init_;
|
|
|
|
// llvm std::regex have problem with stack overflow, limit maximum length.
|
|
// ./scalar_fuzzer -max_len=3000
|
|
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|
// Reserve one byte for Parser flags and one byte for repetition counter.
|
|
if (size < 3) return 0;
|
|
const uint8_t flags = data[0];
|
|
// normalize to ascii alphabet
|
|
const int extra_rep_number =
|
|
std::max(5, (data[1] > '0' ? (data[1] - '0') : 0));
|
|
data += 2;
|
|
size -= 2; // bypass
|
|
|
|
// Guarantee 0-termination.
|
|
const std::string original(reinterpret_cast<const char *>(data), size);
|
|
auto input = std::string(original.c_str()); // until '\0'
|
|
if (input.size() < kMinInputLength || input.size() > kMaxInputLength)
|
|
return 0;
|
|
|
|
// Break comments in json to avoid complexity with regex matcher.
|
|
// The string " 12345 /* text */" will be accepted if insert it to string
|
|
// expression: "table X { Y: " + " 12345 /* text */" + "; }.
|
|
// But strings like this will complicate regex matcher.
|
|
// We reject this by transform "/* text */ 12345" to "@* text */ 12345".
|
|
BreakSequence(input, "//", '@'); // "//" -> "@/"
|
|
BreakSequence(input, "/*", '@'); // "/*" -> "@*"
|
|
// { "$schema: "text" } is exceptional case.
|
|
// This key:value ignored by the parser. Numbers can not have $.
|
|
BreakSequence(input, "$schema", '@'); // "$schema" -> "@schema"
|
|
// Break all known scalar functions (todo: add them to regex?):
|
|
for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) {
|
|
BreakSequence(input, f, '_'); // ident -> ident
|
|
}
|
|
|
|
// Extract type of scalar from 'flags' and check if the input string satisfies
|
|
// the scalar type.
|
|
const auto ref_res =
|
|
ScalarReferenceResult::Check(flags & flags_scalar_type, input);
|
|
auto &recheck = ref_res.matched;
|
|
|
|
// Create parser
|
|
flatbuffers::IDLOptions opts;
|
|
opts.force_defaults = true;
|
|
opts.output_default_scalars_in_json = true;
|
|
opts.indent_step = -1;
|
|
opts.strict_json = true;
|
|
|
|
flatbuffers::Parser parser(opts);
|
|
auto schema =
|
|
"table X { Y: " + std::string(ref_res.type) + "; } root_type X;";
|
|
TEST_EQ_FUNC(parser.Parse(schema.c_str()), true);
|
|
|
|
// The fuzzer can adjust the number repetition if a side-effects have found.
|
|
// Each test should pass at least two times to ensure that the parser doesn't
|
|
// have any hidden-states or locale-depended effects.
|
|
for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
|
|
// Each even run (0,2,4..) will test locale independed code.
|
|
auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
|
|
// Set new locale.
|
|
if (use_locale) {
|
|
FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
|
|
}
|
|
|
|
// Parse original input as-is.
|
|
auto orig_scalar = "{\"Y\" : " + input + "}";
|
|
std::string orig_back;
|
|
auto orig_done = Parse(parser, orig_scalar, &orig_back);
|
|
|
|
if (recheck.res != orig_done) {
|
|
// look for "does not fit" or "doesn't fit" or "out of range"
|
|
auto not_fit =
|
|
(true == recheck.res)
|
|
? ((orig_back.find("does not fit") != std::string::npos) ||
|
|
(orig_back.find("out of range") != std::string::npos))
|
|
: false;
|
|
|
|
if (false == not_fit) {
|
|
TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
|
|
recheck.res);
|
|
TEST_EQ_STR(orig_back.c_str(),
|
|
input.substr(recheck.pos, recheck.len).c_str());
|
|
TEST_EQ_FUNC(orig_done, recheck.res);
|
|
}
|
|
}
|
|
|
|
// Try to make quoted string and test it.
|
|
std::string qouted_input;
|
|
if (true == recheck.quoted) {
|
|
// we can't simply remove quotes, they may be nested "'12'".
|
|
// Original string "\'12\'" converted to "'12'".
|
|
// The string can be an invalid string by JSON rules, but after quotes
|
|
// removed can transform to valid.
|
|
assert(recheck.len >= 2);
|
|
} else {
|
|
const auto quote = (flags & flags_quotes_kind) ? '\"' : '\'';
|
|
qouted_input = input; // copy
|
|
qouted_input.insert(recheck.pos + recheck.len, 1, quote);
|
|
qouted_input.insert(recheck.pos, 1, quote);
|
|
}
|
|
|
|
// Test quoted version of the string
|
|
if (!qouted_input.empty()) {
|
|
auto fix_scalar = "{\"Y\" : " + qouted_input + "}";
|
|
std::string fix_back;
|
|
auto fix_done = Parse(parser, fix_scalar, &fix_back);
|
|
|
|
if (orig_done != fix_done) {
|
|
TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
|
|
orig_done);
|
|
TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
|
|
}
|
|
if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); }
|
|
TEST_EQ_FUNC(fix_done, orig_done);
|
|
}
|
|
|
|
// Create new parser and test default value
|
|
if (true == orig_done) {
|
|
flatbuffers::Parser def_parser(opts); // re-use options
|
|
auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
|
|
input + "; } root_type X;" +
|
|
"{}"; // <- with empty json {}!
|
|
|
|
auto def_done = def_parser.Parse(def_schema.c_str());
|
|
if (false == def_done) {
|
|
TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
|
|
def_parser.error_.c_str());
|
|
FLATBUFFERS_ASSERT(false);
|
|
}
|
|
// Compare with print.
|
|
std::string ref_string, def_string;
|
|
FLATBUFFERS_ASSERT(GenerateText(
|
|
parser, parser.builder_.GetBufferPointer(), &ref_string));
|
|
FLATBUFFERS_ASSERT(GenerateText(
|
|
def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
|
|
if (ref_string != def_string) {
|
|
TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
|
|
ref_string.c_str());
|
|
FLATBUFFERS_ASSERT(false);
|
|
}
|
|
}
|
|
|
|
// Restore locale.
|
|
if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
|
|
}
|
|
return 0;
|
|
}
|