From c91a9f64ee0f9e3086d55f2dbb2e63df1d5d6e0d Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Tue, 27 Jul 2021 14:46:13 +0100 Subject: [PATCH] tidy-html5: add more extensive fuzzing suite. (#6116) * tidy-html5: add more extensive fuzzing suite. * tidy-html5: cleanup general fuzzer. * we need a few more bytes than the size of the array. --- projects/tidy-html5/build.sh | 2 +- projects/tidy-html5/tidy_general_fuzzer.c | 160 ++++++++++++++++++ projects/tidy-html5/tidy_parse_file_fuzzer.c | 64 +++++++ .../tidy-html5/tidy_parse_string_fuzzer.c | 57 +++++++ projects/tidy-html5/tidy_xml_fuzzer.c | 66 ++++++++ 5 files changed, 348 insertions(+), 1 deletion(-) create mode 100644 projects/tidy-html5/tidy_general_fuzzer.c create mode 100644 projects/tidy-html5/tidy_parse_file_fuzzer.c create mode 100644 projects/tidy-html5/tidy_parse_string_fuzzer.c create mode 100644 projects/tidy-html5/tidy_xml_fuzzer.c diff --git a/projects/tidy-html5/build.sh b/projects/tidy-html5/build.sh index 518669f1b..7b75ab68e 100644 --- a/projects/tidy-html5/build.sh +++ b/projects/tidy-html5/build.sh @@ -22,7 +22,7 @@ cd ${WORK}/tidy-html5 cmake -GNinja ${SRC}/tidy-html5/ ninja -for fuzzer in tidy_config_fuzzer tidy_fuzzer; do +for fuzzer in tidy_config_fuzzer tidy_fuzzer tidy_xml_fuzzer tidy_parse_string_fuzzer tidy_parse_file_fuzzer tidy_general_fuzzer; do ${CC} ${CFLAGS} -c -I${SRC}/tidy-html5/include \ $SRC/${fuzzer}.c -o ${fuzzer}.o ${CXX} ${CXXFLAGS} -std=c++11 ${fuzzer}.o \ diff --git a/projects/tidy-html5/tidy_general_fuzzer.c b/projects/tidy-html5/tidy_general_fuzzer.c new file mode 100644 index 000000000..67a064c7f --- /dev/null +++ b/projects/tidy-html5/tidy_general_fuzzer.c @@ -0,0 +1,160 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include "tidybuffio.h" +#include "tidy.h" + +// All boolean options. These will be set randomly +// based on the fuzzer data. +TidyOptionId bool_options[] = { + TidyJoinClasses, + TidyJoinStyles, + TidyKeepFileTimes, + TidyKeepTabs, + TidyLiteralAttribs, + TidyLogicalEmphasis, + TidyLowerLiterals, + TidyMakeBare, + TidyFixUri, + TidyForceOutput, + TidyGDocClean, + TidyHideComments, + TidyMark, + TidyXmlTags, + TidyMakeClean, + TidyAnchorAsName, + TidyMergeEmphasis, + TidyMakeBare, + TidyMetaCharset, + TidyMuteShow, + TidyNCR, + TidyNumEntities, + TidyOmitOptionalTags, + TidyPunctWrap, + TidyQuiet, + TidyQuoteAmpersand, + TidyQuoteMarks, + TidyQuoteNbsp, + TidyReplaceColor, + TidyShowFilename, + TidyShowInfo, + TidyShowMarkup, + TidyShowMetaChange, + TidyShowWarnings, + TidySkipNested, + TidyUpperCaseTags, + TidyWarnPropAttrs, + TidyWord2000, + TidyWrapAsp, + TidyWrapAttVals, + TidyWrapJste, + TidyWrapPhp, + TidyWrapScriptlets, + TidyWrapSection, + TidyWriteBack, +}; + +void set_option(const uint8_t** data, size_t *size, TidyDoc *tdoc, TidyOptionId tboolID) { + uint8_t decider; + decider = **data; + *data += 1; + *size -= 1; + if (decider % 2 == 0) tidyOptSetBool( *tdoc, tboolID, yes ); + else { tidyOptSetBool( *tdoc, tboolID, no ); } +} + +int TidyXhtml(const uint8_t* data, size_t size, TidyBuffer* output, TidyBuffer* errbuf) { + uint8_t decider; + + // We need enough data for picking all of the options. One byte per option. + if (size < 5+(sizeof(bool_options)/sizeof(bool_options[0]))) { + return 0; + } + + TidyDoc tdoc = tidyCreate(); + + // Decide output format + decider = *data; + data++; size--; + if (decider % 3 == 0) tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); + else { tidyOptSetBool( tdoc, TidyXhtmlOut, no ); } + + if (decider % 3 == 1) tidyOptSetBool( tdoc, TidyHtmlOut, yes ); + else { tidyOptSetBool( tdoc, TidyHtmlOut, no ); } + + if (decider % 3 == 2) tidyOptSetBool( tdoc, TidyXmlOut, yes ); + else { tidyOptSetBool( tdoc, TidyXmlOut, no ); } + + // Set options + for (int i=0; i < sizeof(bool_options)/sizeof(TidyOptionId); i++) { + set_option(&data, &size, &tdoc, bool_options[i]); + } + + // Set an error buffer. + tidySetErrorBuffer(tdoc, errbuf); + + // Parse the data + decider = *data; + data++; size--; + switch (decider % 2) { + case 0: { + char filename[256]; + sprintf(filename, "/tmp/libfuzzer.%d", getpid()); + + FILE *fp = fopen(filename, "wb"); + if (!fp) { + return 0; + } + fwrite(data, size, 1, fp); + fclose(fp); + + tidyParseFile(tdoc, filename); + unlink(filename); + } + break; + case 1: { + char *inp = malloc(size+1); + inp[size] = '\0'; + memcpy(inp, data, size); + tidyParseString(tdoc, inp); + free(inp); + } + } + + // Cleanup + tidyRelease( tdoc ); + + return 0; +} + +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + TidyBuffer fuzz_toutput; + TidyBuffer fuzz_terror; + + tidyBufInit(&fuzz_toutput); + tidyBufInit(&fuzz_terror); + + TidyXhtml(data, size, &fuzz_toutput, &fuzz_terror); + + tidyBufFree(&fuzz_toutput); + tidyBufFree(&fuzz_terror); + + return 0; +} diff --git a/projects/tidy-html5/tidy_parse_file_fuzzer.c b/projects/tidy-html5/tidy_parse_file_fuzzer.c new file mode 100644 index 000000000..febeaa6ca --- /dev/null +++ b/projects/tidy-html5/tidy_parse_file_fuzzer.c @@ -0,0 +1,64 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include "tidybuffio.h" +#include "tidy.h" + + +int TidyXhtml(const uint8_t* data, size_t size, TidyBuffer* output, TidyBuffer* errbuf) { + Bool ok; + + TidyDoc tdoc = tidyCreate(); + + ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); + if (ok) tidySetErrorBuffer(tdoc, errbuf); + + char filename[256]; + sprintf(filename, "/tmp/libfuzzer.%d", getpid()); + + FILE *fp = fopen(filename, "wb"); + if (!fp) { + return 0; + } + fwrite(data, size, 1, fp); + fclose(fp); + + tidyParseFile(tdoc, filename); + + tidyRelease( tdoc ); + unlink(filename); + + return 0; +} + +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + TidyBuffer fuzz_toutput; + TidyBuffer fuzz_terror; + + tidyBufInit(&fuzz_toutput); + tidyBufInit(&fuzz_terror); + + TidyXhtml(data, size, &fuzz_toutput, &fuzz_terror); + + tidyBufFree(&fuzz_toutput); + tidyBufFree(&fuzz_terror); + return 0; +} + diff --git a/projects/tidy-html5/tidy_parse_string_fuzzer.c b/projects/tidy-html5/tidy_parse_string_fuzzer.c new file mode 100644 index 000000000..3d1f7e27d --- /dev/null +++ b/projects/tidy-html5/tidy_parse_string_fuzzer.c @@ -0,0 +1,57 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "tidybuffio.h" +#include "tidy.h" + + +int TidyXhtml(const char* input, TidyBuffer* output, TidyBuffer* errbuf) { + TidyDoc tdoc = tidyCreate(); + tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); + tidySetErrorBuffer(tdoc, errbuf); + + tidyParseString(tdoc, input); + + tidyCleanAndRepair(tdoc); + tidyRunDiagnostics(tdoc); + tidyOptSetBool(tdoc, TidyForceOutput, yes); + tidySaveBuffer(tdoc, output); + tidyRelease( tdoc ); + return 0; +} + +int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + char *fuzz_inp = malloc(size+1); + memcpy(fuzz_inp, data, size); + fuzz_inp[size] = '\0'; + + TidyBuffer fuzz_toutput; + TidyBuffer fuzz_terror; + + tidyBufInit(&fuzz_toutput); + tidyBufInit(&fuzz_terror); + + TidyXhtml(fuzz_inp, &fuzz_toutput, &fuzz_terror); + + tidyBufFree(&fuzz_toutput); + tidyBufFree(&fuzz_terror); + free(fuzz_inp); + return 0; +} + diff --git a/projects/tidy-html5/tidy_xml_fuzzer.c b/projects/tidy-html5/tidy_xml_fuzzer.c new file mode 100644 index 000000000..c40551193 --- /dev/null +++ b/projects/tidy-html5/tidy_xml_fuzzer.c @@ -0,0 +1,66 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "tidy.h" +#include "tidybuffio.h" +#include "tidyenum.h" +#include "tidyplatform.h" + +void TidyXml(char *fuzz_inp, TidyBuffer *toutput, + TidyBuffer *terror) { + TidyDoc tdoc = tidyCreate(); + tidyBufClear(toutput); + tidyBufClear(terror); + if (tidyOptSetBool(tdoc, TidyXmlOut, yes)) { + tidySetCharEncoding(tdoc, "utf8"); + tidySetErrorBuffer(tdoc, terror); + tidyOptSetInt(tdoc, TidyWrapLen, 0); + tidyOptSetBool(tdoc, TidyXmlTags, yes); + tidyOptSetBool(tdoc, TidyQuoteNbsp, no); + tidyOptSetBool(tdoc, TidyNumEntities, yes); + tidyOptSetBool(tdoc, TidyQuiet, yes); + tidyOptSetBool(tdoc, TidyMark, no); + tidyOptSetBool(tdoc, TidyShowWarnings, no); + tidyParseString(tdoc, fuzz_inp); + tidyCleanAndRepair(tdoc); + tidyRunDiagnostics(tdoc); + tidySaveBuffer(tdoc, toutput); + } + + tidyRelease(tdoc); +} + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + char *fuzz_inp = malloc(size+1); + memcpy(fuzz_inp, data, size); + fuzz_inp[size] = '\0'; + + TidyBuffer fuzz_toutput; + TidyBuffer fuzz_terror; + + tidyBufInit(&fuzz_toutput); + tidyBufInit(&fuzz_terror); + + TidyXml(fuzz_inp, &fuzz_toutput, &fuzz_terror); + + free(fuzz_inp); + tidyBufFree(&fuzz_toutput); + tidyBufFree(&fuzz_terror); + return 0; +}