Integration of tidy-html5 into OSS-Fuzz (#2038)

* Basic skeleton for tidy-html5 OSS-Fuzz integration.

* Added two tidy-html5 fuzzers: One that exercises the HTML parser and one that exercises the configuration file parser.

* Added the copyright headers.

* Clarified why the tidy config fuzzer uses a file interface.
This commit is contained in:
Stefan Bucur 2018-12-19 23:18:01 -05:00 committed by jonathanmetzman
parent f9971c4914
commit b6aefa51cb
7 changed files with 256 additions and 0 deletions

View File

@ -0,0 +1,28 @@
# Copyright 2018 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
FROM gcr.io/oss-fuzz-base/base-builder
MAINTAINER sbucur@google.com
RUN apt-get update && \
apt-get install -y --no-install-recommends \
cmake ninja-build && \
apt-get clean
RUN git clone https://github.com/htacg/tidy-html5.git tidy-html5
WORKDIR tidy-html5
COPY build.sh $SRC/
COPY *.c *.h *.options $SRC/

View File

@ -0,0 +1,33 @@
#!/bin/bash -eu
#
# Copyright 2018 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
mkdir -p ${WORK}/tidy-html5
cd ${WORK}/tidy-html5
cmake -GNinja ${SRC}/tidy-html5/
ninja
for fuzzer in tidy_config_fuzzer tidy_fuzzer; do
${CC} ${CFLAGS} -c -I${SRC}/tidy-html5/include \
$SRC/${fuzzer}.c -o ${fuzzer}.o
${CXX} ${CXXFLAGS} -std=c++11 ${fuzzer}.o \
-o $OUT/${fuzzer} \
-lFuzzingEngine libtidys.a
done
cp ${SRC}/*.options ${OUT}/

View File

@ -0,0 +1,81 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Adapter utility from fuzzer input to a temporary file, for fuzzing APIs that
// require a file instead of an input buffer.
#ifndef FUZZER_TEMP_FILE_H_
#define FUZZER_TEMP_FILE_H_
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
// Pure-C interface for creating and cleaning up temporary files.
static char* fuzzer_get_tmpfile(const uint8_t* data, size_t size) {
char* filename_buffer = strdup("/tmp/generate_temporary_file.XXXXXX");
if (!filename_buffer) {
perror("Failed to allocate file name buffer.");
abort();
}
const int file_descriptor = mkstemp(filename_buffer);
if (file_descriptor < 0) {
perror("Failed to make temporary file.");
abort();
}
FILE* file = fdopen(file_descriptor, "wb");
if (!file) {
perror("Failed to open file descriptor.");
close(file_descriptor);
abort();
}
const size_t bytes_written = fwrite(data, sizeof(uint8_t), size, file);
if (bytes_written < size) {
close(file_descriptor);
fprintf(stderr, "Failed to write all bytes to file (%zu out of %zu)",
bytes_written, size);
abort();
}
fclose(file);
return filename_buffer;
}
static void fuzzer_release_tmpfile(char* filename) {
if (unlink(filename) != 0) {
perror("WARNING: Failed to delete temporary file.");
}
free(filename);
}
// C++ RAII object for creating temporary files.
#ifdef __cplusplus
class FuzzerTemporaryFile {
public:
FuzzerTemporaryFile(const uint8_t* data, size_t size)
: filename_(fuzzer_get_tmpfile(data, size)) {}
~FuzzerTemporaryFile() { fuzzer_release_tmpfile(filename_); }
const char* filename() const { return filename_; }
private:
char* filename_;
};
#endif
#endif // FUZZER_TEMP_FILE_H_

View File

@ -0,0 +1,12 @@
homepage: "http://www.html-tidy.org/"
primary_contact: "sbucur@google.com"
auto_ccs:
- "nmarrow@google.com"
- "pmokati@google.com"
sanitizers:
- address
- memory
- undefined
labels:
tidy_fuzzer:
- sundew

View File

@ -0,0 +1,31 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stddef.h>
#include <stdint.h>
#include "fuzzer_temp_file.h"
#include "tidy.h"
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
TidyDoc tdoc = tidyCreate();
// At the time this fuzzer was written, the configuration parser could
// only be exercised via a file interface.
char* tmpfile = fuzzer_get_tmpfile(data, size);
tidyLoadConfig(tdoc, tmpfile);
fuzzer_release_tmpfile(tmpfile);
tidyRelease(tdoc);
return 0;
}

View File

@ -0,0 +1,2 @@
[libfuzzer]
close_fd_mask = 3

View File

@ -0,0 +1,69 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "tidy.h"
#include "tidybuffio.h"
void run_tidy_parser(TidyBuffer* data_buffer,
TidyBuffer* output_buffer,
TidyBuffer* error_buffer) {
TidyDoc tdoc = tidyCreate();
if (tidySetErrorBuffer(tdoc, error_buffer) < 0) {
abort();
}
tidyOptSetBool(tdoc, TidyXhtmlOut, yes);
tidyOptSetBool(tdoc, TidyForceOutput, yes);
if (tidyParseBuffer(tdoc, data_buffer) >= 0 &&
tidyCleanAndRepair(tdoc) >= 0 &&
tidyRunDiagnostics(tdoc) >= 0) {
tidySaveBuffer(tdoc, output_buffer);
}
tidyRelease(tdoc);
}
void attach_string_to_buffer(const uint8_t* data,
size_t size,
TidyBuffer* buffer) {
// Use a NULL-terminated copy to make it more likely to expose
// buffer overflows.
char *data_string = strndup((const char*)data, size);
if (data_string == NULL) {
perror("Could not allocate string buffer.");
abort();
}
tidyBufAttach(buffer, (byte*)data_string, strlen(data_string) + 1);
}
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
TidyBuffer data_buffer;
TidyBuffer output_buffer;
TidyBuffer error_buffer;
tidyBufInit(&data_buffer);
tidyBufInit(&output_buffer);
tidyBufInit(&error_buffer);
attach_string_to_buffer(data, size, &data_buffer);
run_tidy_parser(&data_buffer, &output_buffer, &error_buffer);
tidyBufFree(&error_buffer);
tidyBufFree(&output_buffer);
tidyBufFree(&data_buffer);
return 0;
}