Add mpg123_decode fuzzer (#2037)

* Add fuzzer for mpg123_decode. This fuzzes a different and non-filebased API that read_fuzzer.c. mpg123_decode is, however, streaming based so we need to chunk the fuzzer's input into multiple pieces, and pass them to the library. This is pretty annoying to do by hand so I've introduced byte_stream.h to do this automatically. Sadly, byte_stream.h is very C++ so the decode_fuzzer is also C++ now. This isn't ideal since mpg123 is a C library but this is the easiest way to do it. * Do not carry next input within std::string. * Malloc a new buffer for each decode invocation. I used malloc instead of new since ideally this fuzz target would be written in C.
2018-12-21 14:20:00 -05:00 · 2018-12-21 14:20:00 -05:00 · 2ae303bc3b
parent 194c7de657
commit 2ae303bc3b
5 changed files with 195 additions and 0 deletions
--- a/projects/mpg123/Dockerfile
+++ b/projects/mpg123/Dockerfile
@ -22,4 +22,6 @@ RUN tar -xvf snapshot
 RUN mv mpg123* mpg123
 WORKDIR $SRC
 COPY read_fuzzer.c $SRC/
+COPY decode_fuzzer.cc $SRC/
+COPY byte_stream.h $SRC/
 COPY build.sh $SRC/
--- a/projects/mpg123/build.sh
+++ b/projects/mpg123/build.sh
@ -23,3 +23,6 @@ popd

 $CC $CXXFLAGS read_fuzzer.c -I$WORK/include $WORK/lib/libmpg123.a \
  -lFuzzingEngine -lc++ -o $OUT/read_fuzzer
+
+$CXX $CXXFLAGS decode_fuzzer.cc -I$WORK/include $WORK/lib/libmpg123.a \
+  -lFuzzingEngine -o $OUT/decode_fuzzer
--- a/projects/mpg123/byte_stream.h
+++ b/projects/mpg123/byte_stream.h
@ -0,0 +1,129 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BYTE_STREAM_H_
+#define BYTE_STREAM_H_
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <string>
+
+// Wrapper for fuzzer input strings that helps consume and interpret the data
+// as a sequence of values, such as strings and PODs.
+class ByteStream {
+ public:
+  // Does not take ownership of data.
+  ByteStream(const uint8_t* data, size_t size)
+      : data_(data), size_(size), position_(0) {}
+
+  ByteStream(const ByteStream&) = delete;
+  ByteStream& operator=(const ByteStream&) = delete;
+
+  // Returns a string. Strings are obtained from the byte stream by reading a
+  // size_t N followed by N char elements. If there are fewer than N bytes left
+  // in the stream, this returns as many bytes as are available.
+  std::string GetNextString();
+
+  // The following GetNext{integer type} functions all return the next
+  // sizeof(integer type) bytes in the stream or 0 if there is insufficient
+  // capacity.
+  size_t GetNextSizeT() { return ConsumeCopyOrDefault<size_t>(0); }
+  int GetNextInt() { return ConsumeCopyOrDefault<int>(0); }
+  uint8_t GetNextUint8() { return ConsumeCopyOrDefault<uint8_t>(0); }
+  int64_t GetNextInt64() { return ConsumeCopyOrDefault<int64_t>(0); }
+
+  // Returns an integer in the range [0,n) for n > 0 and consumes up to
+  // sizeof(int) bytes. For n<=0, returns 0 and consumes 0 bytes.
+  int GetNextInt(int n);
+
+  // The remaining capacity of the ByteStream.
+  size_t capacity() const { return size_ - position_; }
+
+  // Returns data_ + position_ and then advances position_ by requested bytes.
+  //
+  // This is the canonical way for the class to request regions of memory
+  // or to advance the position by requested bytes. This operation is unchecked
+  // for maintaining that position_ <= size_. Requesting 0 bytes always
+  // succeeds.
+  const uint8_t* UncheckedConsume(size_t requested) {
+    const uint8_t* region = data_ + position_;
+    position_ += requested;
+    return region;
+  }
+
+ private:
+
+  // Directly initialize T by copying sizeof(T) bytes into results if there is
+  // sufficient capacity in the stream. If there is not sufficient capacity
+  // result is unmodified.
+  template <class T>
+  void ConsumeBytesByCopy(T* result) {
+    constexpr size_t type_size = sizeof(T);
+    if (type_size <= capacity()) {
+      const uint8_t* region = UncheckedConsume(type_size);
+      memcpy(static_cast<void*>(result), region, type_size);
+    } else {
+      // Consume the remainder of data_.
+      UncheckedConsume(capacity());
+    }
+  }
+
+  // A helper function for using ConsumeBytesByCopy and returning a default
+  // value `t` if there is insufficient capacity to read a full `T`. T should
+  // probably be a primitive type.
+  template <class T>
+  T ConsumeCopyOrDefault(T t) {
+    ConsumeBytesByCopy(&t);
+    return t;
+  }
+
+  const uint8_t* data_;
+  const size_t size_;
+  size_t position_;
+};
+
+inline std::string ByteStream::GetNextString() {
+  const size_t requested_size = GetNextSizeT();
+  const size_t consumed_size = std::min(requested_size, capacity());
+  const uint8_t* selection = UncheckedConsume(consumed_size);
+  return std::string(reinterpret_cast<const char*>(selection), consumed_size);
+}
+
+inline int ByteStream::GetNextInt(int n) {
+  if (n <= 0) {
+    return 0;
+  }
+  // We grab as few bytes as possible as n will often be fixed.
+  int selection = 0;
+  if (n <= std::numeric_limits<uint8_t>::max()) {
+    selection = static_cast<int>(GetNextUint8());
+  } else if (n <= std::numeric_limits<uint16_t>::max()) {
+    selection = ConsumeCopyOrDefault<uint16_t>(0);
+  } else {
+    selection = GetNextInt();
+  }
+
+  // Take the absolute value of selection w/o undefined behavior.
+  // If selection is INT_MIN, return 0.
+  if (selection == std::numeric_limits<int>::min()) {
+    selection = 0;
+  } else if (selection < 0) {
+    selection = -selection;
+  }
+  return selection % n;
+}
+
+#endif  // BYTE_STREAM_H_
--- a/projects/mpg123/decode_fuzzer.cc
+++ b/projects/mpg123/decode_fuzzer.cc
@ -0,0 +1,59 @@
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+
+#include "mpg123.h"
+#include "byte_stream.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  static bool initialized = false;
+  if (!initialized) {
+    mpg123_init();
+    initialized = true;
+  }
+  int ret;
+  mpg123_handle* handle = mpg123_new(nullptr, &ret);
+  if (handle == nullptr) {
+    return 0;
+  }
+
+  ret = mpg123_open_feed(handle);
+  if (ret != MPG123_OK) {
+    mpg123_delete(handle);
+    return 0;
+  }
+
+  std::vector<uint8_t> output_buffer(mpg123_outblock(handle));
+
+  size_t output_written = 0;
+  // Initially, start by feeding the decoder more data.
+  int decode_ret = MPG123_NEED_MORE;
+  ByteStream stream(data, size);
+  while ((decode_ret != MPG123_ERR)) {
+    if (decode_ret == MPG123_NEED_MORE) {
+      if (stream.capacity() == 0) {
+        break;
+      }
+      const size_t next_size = std::min(stream.GetNextSizeT(), stream.capacity());
+      uint8_t* next_input = (uint8_t*)malloc(sizeof(uint8_t) * next_size);
+      memcpy(next_input, stream.UncheckedConsume(next_size), next_size);
+      decode_ret = mpg123_decode(
+          handle, reinterpret_cast<const unsigned char*>(next_input),
+          next_size, output_buffer.data(), output_buffer.size(),
+          &output_written);
+      free(next_input);
+    } else if (decode_ret != MPG123_ERR && decode_ret != MPG123_NEED_MORE) {
+      decode_ret = mpg123_decode(handle, nullptr, 0, output_buffer.data(),
+                                 output_buffer.size(), &output_written);
+    } else {
+      // Unhandled mpg123_decode return value.
+      abort();
+    }
+  }
+
+  mpg123_delete(handle);
+
+  return 0;
+}
--- a/projects/mpg123/project.yaml
+++ b/projects/mpg123/project.yaml
@ -11,3 +11,5 @@ sanitizers:
 labels:
  read_fuzzer:
    - sundew
+  decode_fuzzer:
+    - sundew