diff --git a/infra/base-images/base-builder/compile b/infra/base-images/base-builder/compile index 78453c98c..00868b323 100755 --- a/infra/base-images/base-builder/compile +++ b/infra/base-images/base-builder/compile @@ -27,7 +27,7 @@ if [ "$FUZZING_LANGUAGE" = "jvm" ]; then echo "ERROR: JVM projects can be fuzzed with libFuzzer engine only." exit 1 fi - if [ "$SANITIZER" != "address" ]; then + if [ "$SANITIZER" != "address" ] && [ "$SANITIZER" != "coverage" ]; then echo "ERROR: JVM projects can be fuzzed with AddressSanitizer only." exit 1 fi diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile index c6a63eb0c..b25e4e87d 100755 --- a/infra/base-images/base-runner/Dockerfile +++ b/infra/base-images/base-runner/Dockerfile @@ -94,6 +94,12 @@ RUN wget https://download.java.net/java/GA/jdk15.0.2/0d1cfde4252546c6931946de8db # Install Java AWT dependencies. RUN apt-get install libxext-dev -y +# Install JaCoCo for JVM coverage. +RUN wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.cli/0.8.7/org.jacoco.cli-0.8.7-nodeps.jar -O /opt/jacoco-cli.jar && \ + wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.agent/0.8.7/org.jacoco.agent-0.8.7-runtime.jar -O /opt/jacoco-agent.jar && \ + echo "37df187b76888101ecd745282e9cd1ad4ea508d6 /opt/jacoco-agent.jar" | shasum --check && \ + echo "c1814e7bba5fd8786224b09b43c84fd6156db690 /opt/jacoco-cli.jar" | shasum --check + # Do this last to make developing these files easier/faster due to caching. COPY bad_build_check \ collect_dft \ @@ -101,6 +107,7 @@ COPY bad_build_check \ coverage_helper \ dataflow_tracer.py \ download_corpus \ + jacoco_report_converter.py \ rcfilt \ reproduce \ run_fuzzer \ diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage index a86b00dec..3a72b2fe6 100755 --- a/infra/base-images/base-runner/coverage +++ b/infra/base-images/base-runner/coverage @@ -125,6 +125,36 @@ function run_go_fuzz_target { $SYSGOPATH/bin/gocovsum $DUMPS_DIR/$target.profdata > $FUZZER_STATS_DIR/$target.json } +function run_java_fuzz_target { + local target=$1 + + local exec_file="$DUMPS_DIR/$target.exec" + local class_dump_dir="$DUMPS_DIR/${target}_classes/" + mkdir "$class_dump_dir" + local corpus_real="/corpus/${target}" + + # -merge=1 requires an output directory, create a new, empty dir for that. + local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}" + rm -rf $corpus_dummy && mkdir -p $corpus_dummy + + # Use 100s timeout instead of 25s as code coverage builds can be very slow. + local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir" + local args="-merge=1 -timeout=100 -close_fd_mask=3 --nohooks \ + --jvm_args=-javaagent:/opt/jacoco-agent.jar=$jacoco_args \ + $corpus_dummy $corpus_real" + + timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log + if (( $? != 0 )); then + echo "Error occured while running $target:" + cat $LOGS_DIR/$target.log + fi + + if (( $(du -c $exec_file | tail -n 1 | cut -f 1) == 0 )); then + # Skip fuzz targets that failed to produce .exec files. + return 0 + fi +} + export SYSGOPATH=$GOPATH export GOPATH=$OUT/$GOPATH # Run each fuzz target, generate raw coverage dumps. @@ -136,6 +166,14 @@ for fuzz_target in $FUZZ_TARGETS; do grep "FUZZ_CORPUS_DIR" $fuzz_target > /dev/null 2>&1 || continue fi run_go_fuzz_target $fuzz_target & + elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then + # Continue if not a fuzz target. + if [[ $FUZZING_ENGINE != "none" ]]; then + grep "LLVMFuzzerTestOneInput" $fuzz_target > /dev/null 2>&1 || continue + fi + + echo "Running $fuzz_target" + run_java_fuzz_target $fuzz_target & else # Continue if not a fuzz target. if [[ $FUZZING_ENGINE != "none" ]]; then @@ -175,6 +213,43 @@ if [[ $FUZZING_LANGUAGE == "go" ]]; then mv merged.data $REPORT_ROOT_DIR/heap.prof #TODO some proxy for go tool pprof -http=127.0.0.1:8001 $DUMPS_DIR/cpu.prof echo "Finished generating code coverage report for Go fuzz targets." +elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then + + # From this point on the script does not tolerate any errors. + set -e + + # Merge .exec files from the individual targets. + jacoco_merged_exec=$DUMPS_DIR/jacoco.merged.exec + java -jar /opt/jacoco-cli.jar merge $DUMPS_DIR/*.exec \ + --destfile $jacoco_merged_exec + + # Merge .class files from the individual targets. + classes_dir=$DUMPS_DIR/classes + mkdir $classes_dir + for fuzz_target in $FUZZ_TARGETS; do + cp -r $DUMPS_DIR/${fuzz_target}_classes/* $classes_dir/ + done + + # Heuristically determine source directories based on Maven structure. + # Always include the $SRC root as it likely contains the fuzzer sources. + sourcefiles_args=(--sourcefiles $OUT/$SRC) + source_dirs=$(find $OUT/$SRC -type d -name 'java') + for source_dir in $source_dirs; do + sourcefiles_args+=(--sourcefiles "$source_dir") + done + + # Generate HTML and XML reports. + xml_report=$REPORT_PLATFORM_DIR/index.xml + java -jar /opt/jacoco-cli.jar report $jacoco_merged_exec \ + --html $REPORT_PLATFORM_DIR \ + --xml $xml_report \ + --classfiles $classes_dir \ + "${sourcefiles_args[@]}" + + # Write llvm-cov summary file. + jacoco_report_converter.py $xml_report $SUMMARY_FILE + + set +e else # From this point on the script does not tolerate any errors. diff --git a/infra/base-images/base-runner/jacoco_report_converter.py b/infra/base-images/base-runner/jacoco_report_converter.py new file mode 100755 index 000000000..c5b5fc1e7 --- /dev/null +++ b/infra/base-images/base-runner/jacoco_report_converter.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +"""Helper script for creating an llvm-cov style JSON summary from a JaCoCo XML +report.""" +import json +import sys +import xml.etree.ElementTree as ET + + +def convert(xml): + """Turns a JaCoCo XML report into an llvm-cov JSON summary.""" + summary = { + "type": "oss-fuzz.java.coverage.json.export", + "version": "1.0.0", + "data": [{ + "totals": {}, + }], + } + + root = ET.fromstring(xml) + totals = {} + + function_counter = root.find("./counter[@type='METHOD']") + totals["functions"] = make_counter_summary(function_counter) + + line_counter = root.find("./counter[@type='LINE']") + totals["lines"] = make_counter_summary(line_counter) + + region_counter = root.find("./counter[@type='BRANCH']") + totals["regions"] = make_counter_summary(region_counter) + + summary["data"][0]["totals"] = totals + + return json.dumps(summary) + + +def make_counter_summary(counter_element): + """Turns a JaCoCo tag into an llvm-cov totals entry.""" + summary = {} + summary["covered"] = int(counter_element.attrib["covered"]) + summary["notcovered"] = int(counter_element.attrib["missed"]) + summary["count"] = summary["covered"] + summary["notcovered"] + summary["percent"] = (100.0 * summary["covered"]) / summary["count"] + return summary + + +def main(): + """Produces an llvm-cov style JSON summary from a JaCoCo XML report.""" + if len(sys.argv) != 3: + sys.stderr.write('Usage: %s \n' % + sys.argv[0]) + return 1 + + with open(sys.argv[1], 'r') as xml_file: + xml_report = xml_file.read() + json_summary = convert(xml_report) + with open(sys.argv[2], 'w') as json_file: + json_file.write(json_summary) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/infra/helper.py b/infra/helper.py index 812e2d1c2..5a96c949d 100755 --- a/infra/helper.py +++ b/infra/helper.py @@ -58,7 +58,7 @@ CORPUS_BACKUP_URL_FORMAT = ( PROJECT_LANGUAGE_REGEX = re.compile(r'\s*language\s*:\s*([^\s]+)') # Languages from project.yaml that have code coverage support. -LANGUAGES_WITH_COVERAGE_SUPPORT = ['c', 'c++', 'go', 'rust'] +LANGUAGES_WITH_COVERAGE_SUPPORT = ['c', 'c++', 'go', 'jvm', 'rust'] WORKDIR_REGEX = re.compile(r'\s*WORKDIR\s*([^\s]+)')