From 06224212901a9d9b63fe6cab11e9d6d7de4c8153 Mon Sep 17 00:00:00 2001 From: Stefan Bucur <281483+stefanbucur@users.noreply.github.com> Date: Wed, 21 Nov 2018 15:21:56 -0500 Subject: [PATCH] [libxml2] Add a libXML fuzzer that exercises its file-based parsing interface. (#1967) --- projects/libxml2/Dockerfile | 4 +- projects/libxml2/build.sh | 2 +- projects/libxml2/byte_stream.h | 128 ++++++++++++++++++ projects/libxml2/fuzzer_temp_file.h | 81 +++++++++++ .../libxml2_xml_reader_for_file_fuzzer.cc | 50 +++++++ 5 files changed, 261 insertions(+), 4 deletions(-) create mode 100644 projects/libxml2/byte_stream.h create mode 100644 projects/libxml2/fuzzer_temp_file.h create mode 100644 projects/libxml2/libxml2_xml_reader_for_file_fuzzer.cc diff --git a/projects/libxml2/Dockerfile b/projects/libxml2/Dockerfile index 3f144d2ab..fc821c806 100644 --- a/projects/libxml2/Dockerfile +++ b/projects/libxml2/Dockerfile @@ -22,6 +22,4 @@ RUN git clone --depth 1 https://gitlab.gnome.org/GNOME/libxml2.git WORKDIR libxml2 COPY build.sh $SRC/ -COPY libxml2_xml_read_memory_fuzzer.* \ - libxml2_xml_regexp_compile_fuzzer.* \ - xml.dict $SRC/ +COPY *.cc *.h *.options *.dict $SRC/ diff --git a/projects/libxml2/build.sh b/projects/libxml2/build.sh index bbfcf4181..da97cad6c 100755 --- a/projects/libxml2/build.sh +++ b/projects/libxml2/build.sh @@ -21,7 +21,7 @@ make -j$(nproc) clean make -j$(nproc) all -for fuzzer in libxml2_xml_read_memory_fuzzer libxml2_xml_regexp_compile_fuzzer; do +for fuzzer in libxml2_xml_read_memory_fuzzer libxml2_xml_reader_for_file_fuzzer libxml2_xml_regexp_compile_fuzzer; do $CXX $CXXFLAGS -std=c++11 -Iinclude/ \ $SRC/$fuzzer.cc -o $OUT/$fuzzer \ -lFuzzingEngine .libs/libxml2.a diff --git a/projects/libxml2/byte_stream.h b/projects/libxml2/byte_stream.h new file mode 100644 index 000000000..6a4257891 --- /dev/null +++ b/projects/libxml2/byte_stream.h @@ -0,0 +1,128 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BYTE_STREAM_H_ +#define BYTE_STREAM_H_ + +#include +#include +#include +#include +#include + +// Wrapper for fuzzer input strings that helps consume and interpret the data +// as a sequence of values, such as strings and PODs. +class ByteStream { + public: + // Does not take ownership of data. + ByteStream(const uint8_t* data, size_t size) + : data_(data), size_(size), position_(0) {} + + ByteStream(const ByteStream&) = delete; + ByteStream& operator=(const ByteStream&) = delete; + + // Returns a string. Strings are obtained from the byte stream by reading a + // size_t N followed by N char elements. If there are fewer than N bytes left + // in the stream, this returns as many bytes as are available. + std::string GetNextString(); + + // The following GetNext{integer type} functions all return the next + // sizeof(integer type) bytes in the stream or 0 if there is insufficient + // capacity. + size_t GetNextSizeT() { return ConsumeCopyOrDefault(0); } + int GetNextInt() { return ConsumeCopyOrDefault(0); } + uint8_t GetNextUint8() { return ConsumeCopyOrDefault(0); } + int64_t GetNextInt64() { return ConsumeCopyOrDefault(0); } + + // Returns an integer in the range [0,n) for n > 0 and consumes up to + // sizeof(int) bytes. For n<=0, returns 0 and consumes 0 bytes. + int GetNextInt(int n); + + private: + // The remaining capacity of the ByteStream. + size_t capacity() const { return size_ - position_; } + + // Returns data_ + position_ and then advances position_ by requested bytes. + // + // This is the canonical way for the class to request regions of memory + // or to advance the position by requested bytes. This operation is unchecked + // for maintaining that position_ <= size_. Requesting 0 bytes always + // succeeds. + const uint8_t* UncheckedConsume(size_t requested) { + const uint8_t* region = data_ + position_; + position_ += requested; + return region; + } + + // Directly initialize T by copying sizeof(T) bytes into results if there is + // sufficient capacity in the stream. If there is not sufficient capacity + // result is unmodified. + template + void ConsumeBytesByCopy(T* result) { + constexpr size_t type_size = sizeof(T); + if (type_size <= capacity()) { + const uint8_t* region = UncheckedConsume(type_size); + memcpy(static_cast(result), region, type_size); + } else { + // Consume the remainder of data_. + UncheckedConsume(capacity()); + } + } + + // A helper function for using ConsumeBytesByCopy and returning a default + // value `t` if there is insufficient capacity to read a full `T`. T should + // probably be a primitive type. + template + T ConsumeCopyOrDefault(T t) { + ConsumeBytesByCopy(&t); + return t; + } + + const uint8_t* data_; + const size_t size_; + size_t position_; +}; + +inline std::string ByteStream::GetNextString() { + const size_t requested_size = GetNextSizeT(); + const size_t consumed_size = std::min(requested_size, capacity()); + const uint8_t* selection = UncheckedConsume(consumed_size); + return std::string(reinterpret_cast(selection), consumed_size); +} + +inline int ByteStream::GetNextInt(int n) { + if (n <= 0) { + return 0; + } + // We grab as few bytes as possible as n will often be fixed. + int selection = 0; + if (n <= std::numeric_limits::max()) { + selection = static_cast(GetNextUint8()); + } else if (n <= std::numeric_limits::max()) { + selection = ConsumeCopyOrDefault(0); + } else { + selection = GetNextInt(); + } + + // Take the absolute value of selection w/o undefined behavior. + // If selection is INT_MIN, return 0. + if (selection == std::numeric_limits::min()) { + selection = 0; + } else if (selection < 0) { + selection = -selection; + } + return selection % n; +} + +#endif // BYTE_STREAM_H_ diff --git a/projects/libxml2/fuzzer_temp_file.h b/projects/libxml2/fuzzer_temp_file.h new file mode 100644 index 000000000..fe25cabae --- /dev/null +++ b/projects/libxml2/fuzzer_temp_file.h @@ -0,0 +1,81 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Adapter utility from fuzzer input to a temporary file, for fuzzing APIs that +// require a file instead of an input buffer. + +#ifndef FUZZER_TEMP_FILE_H_ +#define FUZZER_TEMP_FILE_H_ + +#include +#include +#include +#include +#include + +// Pure-C interface for creating and cleaning up temporary files. + +static char* fuzzer_get_tmpfile(const uint8_t* data, size_t size) { + char* filename_buffer = strdup("/tmp/generate_temporary_file.XXXXXX"); + if (!filename_buffer) { + perror("Failed to allocate file name buffer."); + abort(); + } + const int file_descriptor = mkstemp(filename_buffer); + if (file_descriptor < 0) { + perror("Failed to make temporary file."); + abort(); + } + FILE* file = fdopen(file_descriptor, "wb"); + if (!file) { + perror("Failed to open file descriptor."); + close(file_descriptor); + abort(); + } + const size_t bytes_written = fwrite(data, sizeof(uint8_t), size, file); + if (bytes_written < size) { + close(file_descriptor); + fprintf(stderr, "Failed to write all bytes to file (%zu out of %zu)", + bytes_written, size); + abort(); + } + fclose(file); + return filename_buffer; +} + +static void fuzzer_release_tmpfile(char* filename) { + if (unlink(filename) != 0) { + perror("WARNING: Failed to delete temporary file."); + } + free(filename); +} + +// C++ RAII object for creating temporary files. + +#ifdef __cplusplus +class FuzzerTemporaryFile { + public: + FuzzerTemporaryFile(const uint8_t* data, size_t size) + : filename_(fuzzer_get_tmpfile(data, size)) {} + + ~FuzzerTemporaryFile() { fuzzer_release_tmpfile(filename_); } + + const char* filename() const { return filename_; } + + private: + char* filename_; +}; +#endif + +#endif // FUZZER_TEMP_FILE_H_ diff --git a/projects/libxml2/libxml2_xml_reader_for_file_fuzzer.cc b/projects/libxml2/libxml2_xml_reader_for_file_fuzzer.cc new file mode 100644 index 000000000..4f4cf6c35 --- /dev/null +++ b/projects/libxml2/libxml2_xml_reader_for_file_fuzzer.cc @@ -0,0 +1,50 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "byte_stream.h" +#include "fuzzer_temp_file.h" + +#include "libxml/xmlreader.h" + +void ignore (void* ctx, const char* msg, ...) { + // Error handler to avoid spam of error messages from libxml parser. +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + xmlSetGenericErrorFunc(NULL, &ignore); + + ByteStream stream(data, size); + const int options = stream.GetNextInt(); + const std::string encoding = stream.GetNextString(); + const std::string file_contents = stream.GetNextString(); + FuzzerTemporaryFile file( + reinterpret_cast(file_contents.c_str()), + file_contents.size()); + + xmlTextReaderPtr xmlReader = + xmlReaderForFile(file.filename(), encoding.c_str(), options); + + constexpr int kReadSuccessful = 1; + while (xmlTextReaderRead(xmlReader) == kReadSuccessful) { + xmlTextReaderNodeType(xmlReader); + xmlTextReaderConstValue(xmlReader); + } + + xmlFreeTextReader(xmlReader); + return EXIT_SUCCESS; +}