diff --git a/projects/arrow-java/Dockerfile b/projects/arrow-java/Dockerfile new file mode 100644 index 000000000..5fcb6b4a0 --- /dev/null +++ b/projects/arrow-java/Dockerfile @@ -0,0 +1,36 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-builder-jvm + +RUN curl -L https://downloads.apache.org/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.zip -o maven.zip && \ + unzip maven.zip -d $SRC/maven && \ + rm -rf maven.zip + +ENV MVN $SRC/maven/apache-maven-3.6.3/bin/mvn + +RUN git clone https://github.com/apache/arrow.git $SRC/arrow + +RUN git clone https://github.com/apache/arrow-testing.git $SRC/arrow-testing + +# Seed corpus +RUN zip -j $SRC/FuzzIpcFile_seed_corpus.zip $SRC/arrow-testing/data/arrow-ipc-file/clusterfuzz-testcase-arrow-ipc-file-fuzz-* && \ + zip -j $SRC/FuzzIpcStream_seed_corpus.zip $SRC/arrow-testing/data/arrow-ipc-stream/clusterfuzz-testcase-arrow-ipc-stream-fuzz-* + +# Copy build script and all fuzzers +COPY build.sh $SRC/ +COPY Fuzz*.java $SRC/ +WORKDIR $SRC/ diff --git a/projects/arrow-java/FuzzIpcFile.java b/projects/arrow-java/FuzzIpcFile.java new file mode 100644 index 000000000..6d1b5e004 --- /dev/null +++ b/projects/arrow-java/FuzzIpcFile.java @@ -0,0 +1,44 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.InvalidArrowFileException; +import org.apache.arrow.vector.ipc.SeekableReadChannel; +import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; +import org.apache.arrow.vector.util.ValueVectorUtility; + +public class FuzzIpcFile { + + public static void fuzzerTestOneInput(byte[] data) { + BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + try (SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(data)); + ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + + // validate schema + ValueVectorUtility.validate(root); + + while (reader.loadNextBatch()) { + ValueVectorUtility.validateFull(root); + } + } catch (IOException | InvalidArrowFileException e) {} + } +} diff --git a/projects/arrow-java/FuzzIpcStream.java b/projects/arrow-java/FuzzIpcStream.java new file mode 100644 index 000000000..379c12c36 --- /dev/null +++ b/projects/arrow-java/FuzzIpcStream.java @@ -0,0 +1,40 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.util.ValueVectorUtility; + +public class FuzzIpcStream { + + public static void fuzzerTestOneInput(byte[] data) { + BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayInputStream(data), allocator)) { + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + + // validate schema + ValueVectorUtility.validate(root); + + while (reader.loadNextBatch()) { + ValueVectorUtility.validateFull(root); + } + } catch (IOException e) {} + } +} diff --git a/projects/arrow-java/build.sh b/projects/arrow-java/build.sh new file mode 100755 index 000000000..49805f8d6 --- /dev/null +++ b/projects/arrow-java/build.sh @@ -0,0 +1,54 @@ +#!/bin/bash -eu +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +# Only build the Java module +MAVEN_ARGS="-DskipTests" +cd arrow/java +$MVN install ${MAVEN_ARGS} + +# copy dependencies +$MVN dependency:copy-dependencies -DoutputDirectory=$OUT + +# setup class path +ALL_JARS="" +for jar in $(find $OUT -name '*.jar'); do + jar_name=$(basename $jar) + ALL_JARS=$ALL_JARS" "$jar_name +done + +cd ../.. + +BUILD_CLASSPATH=$(echo $ALL_JARS | xargs printf -- "$OUT/%s:"):$JAZZER_API_PATH +RUNTIME_CLASSPATH=$(echo $ALL_JARS | xargs printf -- "\$this_dir/%s:"):.:\$this_dir + +for fuzzer in $(find $SRC -name 'Fuzz*.java'); do + fuzzer_basename=$(basename -s .java $fuzzer) + javac -cp $BUILD_CLASSPATH $fuzzer + cp $SRC/$fuzzer_basename.class $OUT/ + +# Create an execution wrapper that executes Jazzer with the correct arguments. + echo "#!/bin/sh +# LLVMFuzzerTestOneInput for fuzzer detection. +this_dir=\$(dirname \"\$0\") +LD_LIBRARY_PATH=\"$JVM_LD_LIBRARY_PATH\":\$this_dir \ +\$this_dir/jazzer_driver --agent_path=\$this_dir/jazzer_agent_deploy.jar \ +--cp=$RUNTIME_CLASSPATH \ +--target_class=$fuzzer_basename \ +--jvm_args=\"-Xmx2048m\" \ +\$@" > $OUT/$fuzzer_basename + chmod u+x $OUT/$fuzzer_basename +done diff --git a/projects/arrow-java/project.yaml b/projects/arrow-java/project.yaml new file mode 100644 index 000000000..5acc523a3 --- /dev/null +++ b/projects/arrow-java/project.yaml @@ -0,0 +1,12 @@ +homepage: "https://arrow.apache.org/" +language: jvm +primary_contact: "liya.fan03@gmail.com" +auto_ccs: + - "emkornfield@gmail.com" + - "jacques@apache.org" + - "wesmckinn@gmail.com" +fuzzing_engines: + - libfuzzer +main_repo: "https://github.com/apache/arrow.git" +sanitizers: + - address