From b6aefa51cbce6dcef23f07721e91a2dd0bcf02b1 Mon Sep 17 00:00:00 2001 From: Stefan Bucur <281483+stefanbucur@users.noreply.github.com> Date: Wed, 19 Dec 2018 23:18:01 -0500 Subject: [PATCH] Integration of tidy-html5 into OSS-Fuzz (#2038) * Basic skeleton for tidy-html5 OSS-Fuzz integration. * Added two tidy-html5 fuzzers: One that exercises the HTML parser and one that exercises the configuration file parser. * Added the copyright headers. * Clarified why the tidy config fuzzer uses a file interface. --- projects/tidy-html5/Dockerfile | 28 +++++++ projects/tidy-html5/build.sh | 33 ++++++++ projects/tidy-html5/fuzzer_temp_file.h | 81 +++++++++++++++++++ projects/tidy-html5/project.yaml | 12 +++ projects/tidy-html5/tidy_config_fuzzer.c | 31 +++++++ .../tidy-html5/tidy_config_fuzzer.options | 2 + projects/tidy-html5/tidy_fuzzer.c | 69 ++++++++++++++++ 7 files changed, 256 insertions(+) create mode 100644 projects/tidy-html5/Dockerfile create mode 100644 projects/tidy-html5/build.sh create mode 100644 projects/tidy-html5/fuzzer_temp_file.h create mode 100644 projects/tidy-html5/project.yaml create mode 100644 projects/tidy-html5/tidy_config_fuzzer.c create mode 100644 projects/tidy-html5/tidy_config_fuzzer.options create mode 100644 projects/tidy-html5/tidy_fuzzer.c diff --git a/projects/tidy-html5/Dockerfile b/projects/tidy-html5/Dockerfile new file mode 100644 index 000000000..e9a181725 --- /dev/null +++ b/projects/tidy-html5/Dockerfile @@ -0,0 +1,28 @@ +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-builder +MAINTAINER sbucur@google.com + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + cmake ninja-build && \ + apt-get clean + +RUN git clone https://github.com/htacg/tidy-html5.git tidy-html5 +WORKDIR tidy-html5 +COPY build.sh $SRC/ +COPY *.c *.h *.options $SRC/ diff --git a/projects/tidy-html5/build.sh b/projects/tidy-html5/build.sh new file mode 100644 index 000000000..840e9a505 --- /dev/null +++ b/projects/tidy-html5/build.sh @@ -0,0 +1,33 @@ +#!/bin/bash -eu +# +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +mkdir -p ${WORK}/tidy-html5 +cd ${WORK}/tidy-html5 + +cmake -GNinja ${SRC}/tidy-html5/ +ninja + +for fuzzer in tidy_config_fuzzer tidy_fuzzer; do + ${CC} ${CFLAGS} -c -I${SRC}/tidy-html5/include \ + $SRC/${fuzzer}.c -o ${fuzzer}.o + ${CXX} ${CXXFLAGS} -std=c++11 ${fuzzer}.o \ + -o $OUT/${fuzzer} \ + -lFuzzingEngine libtidys.a +done + +cp ${SRC}/*.options ${OUT}/ diff --git a/projects/tidy-html5/fuzzer_temp_file.h b/projects/tidy-html5/fuzzer_temp_file.h new file mode 100644 index 000000000..fe25cabae --- /dev/null +++ b/projects/tidy-html5/fuzzer_temp_file.h @@ -0,0 +1,81 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Adapter utility from fuzzer input to a temporary file, for fuzzing APIs that +// require a file instead of an input buffer. + +#ifndef FUZZER_TEMP_FILE_H_ +#define FUZZER_TEMP_FILE_H_ + +#include +#include +#include +#include +#include + +// Pure-C interface for creating and cleaning up temporary files. + +static char* fuzzer_get_tmpfile(const uint8_t* data, size_t size) { + char* filename_buffer = strdup("/tmp/generate_temporary_file.XXXXXX"); + if (!filename_buffer) { + perror("Failed to allocate file name buffer."); + abort(); + } + const int file_descriptor = mkstemp(filename_buffer); + if (file_descriptor < 0) { + perror("Failed to make temporary file."); + abort(); + } + FILE* file = fdopen(file_descriptor, "wb"); + if (!file) { + perror("Failed to open file descriptor."); + close(file_descriptor); + abort(); + } + const size_t bytes_written = fwrite(data, sizeof(uint8_t), size, file); + if (bytes_written < size) { + close(file_descriptor); + fprintf(stderr, "Failed to write all bytes to file (%zu out of %zu)", + bytes_written, size); + abort(); + } + fclose(file); + return filename_buffer; +} + +static void fuzzer_release_tmpfile(char* filename) { + if (unlink(filename) != 0) { + perror("WARNING: Failed to delete temporary file."); + } + free(filename); +} + +// C++ RAII object for creating temporary files. + +#ifdef __cplusplus +class FuzzerTemporaryFile { + public: + FuzzerTemporaryFile(const uint8_t* data, size_t size) + : filename_(fuzzer_get_tmpfile(data, size)) {} + + ~FuzzerTemporaryFile() { fuzzer_release_tmpfile(filename_); } + + const char* filename() const { return filename_; } + + private: + char* filename_; +}; +#endif + +#endif // FUZZER_TEMP_FILE_H_ diff --git a/projects/tidy-html5/project.yaml b/projects/tidy-html5/project.yaml new file mode 100644 index 000000000..c0fc8f490 --- /dev/null +++ b/projects/tidy-html5/project.yaml @@ -0,0 +1,12 @@ +homepage: "http://www.html-tidy.org/" +primary_contact: "sbucur@google.com" +auto_ccs: + - "nmarrow@google.com" + - "pmokati@google.com" +sanitizers: + - address + - memory + - undefined +labels: + tidy_fuzzer: + - sundew diff --git a/projects/tidy-html5/tidy_config_fuzzer.c b/projects/tidy-html5/tidy_config_fuzzer.c new file mode 100644 index 000000000..87b05a43f --- /dev/null +++ b/projects/tidy-html5/tidy_config_fuzzer.c @@ -0,0 +1,31 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "fuzzer_temp_file.h" +#include "tidy.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + TidyDoc tdoc = tidyCreate(); + + // At the time this fuzzer was written, the configuration parser could + // only be exercised via a file interface. + char* tmpfile = fuzzer_get_tmpfile(data, size); + tidyLoadConfig(tdoc, tmpfile); + fuzzer_release_tmpfile(tmpfile); + tidyRelease(tdoc); + return 0; +} diff --git a/projects/tidy-html5/tidy_config_fuzzer.options b/projects/tidy-html5/tidy_config_fuzzer.options new file mode 100644 index 000000000..329a6e27b --- /dev/null +++ b/projects/tidy-html5/tidy_config_fuzzer.options @@ -0,0 +1,2 @@ +[libfuzzer] +close_fd_mask = 3 diff --git a/projects/tidy-html5/tidy_fuzzer.c b/projects/tidy-html5/tidy_fuzzer.c new file mode 100644 index 000000000..3cf2732dd --- /dev/null +++ b/projects/tidy-html5/tidy_fuzzer.c @@ -0,0 +1,69 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "tidy.h" +#include "tidybuffio.h" + +void run_tidy_parser(TidyBuffer* data_buffer, + TidyBuffer* output_buffer, + TidyBuffer* error_buffer) { + TidyDoc tdoc = tidyCreate(); + if (tidySetErrorBuffer(tdoc, error_buffer) < 0) { + abort(); + } + tidyOptSetBool(tdoc, TidyXhtmlOut, yes); + tidyOptSetBool(tdoc, TidyForceOutput, yes); + + if (tidyParseBuffer(tdoc, data_buffer) >= 0 && + tidyCleanAndRepair(tdoc) >= 0 && + tidyRunDiagnostics(tdoc) >= 0) { + tidySaveBuffer(tdoc, output_buffer); + } + tidyRelease(tdoc); +} + +void attach_string_to_buffer(const uint8_t* data, + size_t size, + TidyBuffer* buffer) { + // Use a NULL-terminated copy to make it more likely to expose + // buffer overflows. + char *data_string = strndup((const char*)data, size); + if (data_string == NULL) { + perror("Could not allocate string buffer."); + abort(); + } + tidyBufAttach(buffer, (byte*)data_string, strlen(data_string) + 1); +} + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + TidyBuffer data_buffer; + TidyBuffer output_buffer; + TidyBuffer error_buffer; + tidyBufInit(&data_buffer); + tidyBufInit(&output_buffer); + tidyBufInit(&error_buffer); + + attach_string_to_buffer(data, size, &data_buffer); + run_tidy_parser(&data_buffer, &output_buffer, &error_buffer); + + tidyBufFree(&error_buffer); + tidyBufFree(&output_buffer); + tidyBufFree(&data_buffer); + return 0; +}