From 16125ac2bce61d29aa5e418444dd328a555c3d90 Mon Sep 17 00:00:00 2001 From: Max Moroz Date: Fri, 6 Sep 2019 09:56:42 -0700 Subject: [PATCH] [libxml2] Use FuzzedDataProvider isntead of byte_stream. (#2809) --- projects/libxml2/byte_stream.h | 142 ------------------ .../libxml2_xml_reader_for_file_fuzzer.cc | 17 ++- 2 files changed, 9 insertions(+), 150 deletions(-) delete mode 100644 projects/libxml2/byte_stream.h diff --git a/projects/libxml2/byte_stream.h b/projects/libxml2/byte_stream.h deleted file mode 100644 index da72d5573..000000000 --- a/projects/libxml2/byte_stream.h +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2018 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef BYTE_STREAM_H_ -#define BYTE_STREAM_H_ - -#include -#include -#include -#include -#include - -// Wrapper for fuzzer input strings that helps consume and interpret the data -// as a sequence of values, such as strings and PODs. -class ByteStream { - public: - // Does not take ownership of data. - ByteStream(const uint8_t* data, size_t size) - : data_(data), size_(size), position_(0) {} - - ByteStream(const ByteStream&) = delete; - ByteStream& operator=(const ByteStream&) = delete; - - // Returns a pointer to the chunk of data of |size| bytes, where |size| is - // either a requested value or all the bytes that are available. If the - // requested |size| is 0, return all the bytes that are available. - const uint8_t* GetNextChunk(size_t* size); - - // Returns a string. Strings are obtained from the byte stream by reading a - // size_t N followed by N char elements. If there are fewer than N bytes left - // in the stream, this returns as many bytes as are available. - std::string GetNextString(); - - // The following GetNext{integer type} functions all return the next - // sizeof(integer type) bytes in the stream or 0 if there is insufficient - // capacity. - size_t GetNextSizeT() { return ConsumeCopyOrDefault(0); } - int GetNextInt() { return ConsumeCopyOrDefault(0); } - uint8_t GetNextUint8() { return ConsumeCopyOrDefault(0); } - int64_t GetNextInt64() { return ConsumeCopyOrDefault(0); } - - // Returns an integer in the range [0,n) for n > 0 and consumes up to - // sizeof(int) bytes. For n<=0, returns 0 and consumes 0 bytes. - int GetNextInt(int n); - - private: - // The remaining capacity of the ByteStream. - size_t capacity() const { return size_ - position_; } - - // Returns data_ + position_ and then advances position_ by requested bytes. - // - // This is the canonical way for the class to request regions of memory - // or to advance the position by requested bytes. This operation is unchecked - // for maintaining that position_ <= size_. Requesting 0 bytes always - // succeeds. - const uint8_t* UncheckedConsume(size_t requested) { - const uint8_t* region = data_ + position_; - position_ += requested; - return region; - } - - // Directly initialize T by copying sizeof(T) bytes into results if there is - // sufficient capacity in the stream. If there is not sufficient capacity - // result is unmodified. - template - void ConsumeBytesByCopy(T* result) { - constexpr size_t type_size = sizeof(T); - if (type_size <= capacity()) { - const uint8_t* region = UncheckedConsume(type_size); - memcpy(static_cast(result), region, type_size); - } else { - // Consume the remainder of data_. - UncheckedConsume(capacity()); - } - } - - // A helper function for using ConsumeBytesByCopy and returning a default - // value `t` if there is insufficient capacity to read a full `T`. T should - // probably be a primitive type. - template - T ConsumeCopyOrDefault(T t) { - ConsumeBytesByCopy(&t); - return t; - } - - const uint8_t* data_; - const size_t size_; - size_t position_; -}; - -inline const uint8_t* ByteStream::GetNextChunk(size_t* size) { - if (*size) - *size = std::min(*size, capacity()); - else - *size = capacity(); - - return UncheckedConsume(*size); -} - -inline std::string ByteStream::GetNextString() { - const size_t requested_size = GetNextSizeT(); - const size_t consumed_size = std::min(requested_size, capacity()); - const uint8_t* selection = UncheckedConsume(consumed_size); - return std::string(reinterpret_cast(selection), consumed_size); -} - -inline int ByteStream::GetNextInt(int n) { - if (n <= 0) { - return 0; - } - // We grab as few bytes as possible as n will often be fixed. - int selection = 0; - if (n <= std::numeric_limits::max()) { - selection = static_cast(GetNextUint8()); - } else if (n <= std::numeric_limits::max()) { - selection = ConsumeCopyOrDefault(0); - } else { - selection = GetNextInt(); - } - - // Take the absolute value of selection w/o undefined behavior. - // If selection is INT_MIN, return 0. - if (selection == std::numeric_limits::min()) { - selection = 0; - } else if (selection < 0) { - selection = -selection; - } - return selection % n; -} - -#endif // BYTE_STREAM_H_ diff --git a/projects/libxml2/libxml2_xml_reader_for_file_fuzzer.cc b/projects/libxml2/libxml2_xml_reader_for_file_fuzzer.cc index ab296d834..4a71aa5a9 100644 --- a/projects/libxml2/libxml2_xml_reader_for_file_fuzzer.cc +++ b/projects/libxml2/libxml2_xml_reader_for_file_fuzzer.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include #include #include -#include "byte_stream.h" #include "fuzzer_temp_file.h" #include "libxml/xmlreader.h" @@ -28,14 +29,14 @@ void ignore (void* ctx, const char* msg, ...) { extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { xmlSetGenericErrorFunc(NULL, &ignore); - ByteStream stream(data, size); - const int options = stream.GetNextInt(); - const std::string encoding = stream.GetNextString(); - size_t file_contents_size = 0; - const uint8_t* file_contents = stream.GetNextChunk(&file_contents_size); + FuzzedDataProvider provider(data, size); + const int options = provider.ConsumeIntegral(); - // Intentionally pass raw data as the API does not require trailing \0. - FuzzerTemporaryFile file(file_contents, file_contents_size); + // libxml does not expect more than 100 characters, let's go beyond that. + const std::string encoding = provider.ConsumeRandomLengthString(128); + auto file_contents = provider.ConsumeRemainingBytes(); + + FuzzerTemporaryFile file(file_contents.data(), file_contents.size()); xmlTextReaderPtr xmlReader = xmlReaderForFile(file.filename(), encoding.c_str(), options);