Add Java coverage support (#5733)

Coverage is collected by running the fuzz targets with the JaCoCo agent
in Jazzer's no instrumentation mode.

Since JaCoCo does not support llvm-cov style coverage reports, a simple
Python helper creates it using the information contained in the JaCoCo
XML report.

As the Java build process does not maintain a mapping between source
files and build artifacts and JaCoCo needs to be passed the root folder
of the package tree, we use the Maven directory layout convention to
heuristically detect these roots.
This commit is contained in:
Fabian Meumertzheim 2021-05-11 05:33:07 +02:00 committed by GitHub
parent a83b7c9d18
commit 5cfb074ae5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 162 additions and 2 deletions

View File

@ -27,7 +27,7 @@ if [ "$FUZZING_LANGUAGE" = "jvm" ]; then
echo "ERROR: JVM projects can be fuzzed with libFuzzer engine only."
exit 1
fi
if [ "$SANITIZER" != "address" ]; then
if [ "$SANITIZER" != "address" ] && [ "$SANITIZER" != "coverage" ]; then
echo "ERROR: JVM projects can be fuzzed with AddressSanitizer only."
exit 1
fi

View File

@ -94,6 +94,12 @@ RUN wget https://download.java.net/java/GA/jdk15.0.2/0d1cfde4252546c6931946de8db
# Install Java AWT dependencies.
RUN apt-get install libxext-dev -y
# Install JaCoCo for JVM coverage.
RUN wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.cli/0.8.7/org.jacoco.cli-0.8.7-nodeps.jar -O /opt/jacoco-cli.jar && \
wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.agent/0.8.7/org.jacoco.agent-0.8.7-runtime.jar -O /opt/jacoco-agent.jar && \
echo "37df187b76888101ecd745282e9cd1ad4ea508d6 /opt/jacoco-agent.jar" | shasum --check && \
echo "c1814e7bba5fd8786224b09b43c84fd6156db690 /opt/jacoco-cli.jar" | shasum --check
# Do this last to make developing these files easier/faster due to caching.
COPY bad_build_check \
collect_dft \
@ -101,6 +107,7 @@ COPY bad_build_check \
coverage_helper \
dataflow_tracer.py \
download_corpus \
jacoco_report_converter.py \
rcfilt \
reproduce \
run_fuzzer \

View File

@ -125,6 +125,36 @@ function run_go_fuzz_target {
$SYSGOPATH/bin/gocovsum $DUMPS_DIR/$target.profdata > $FUZZER_STATS_DIR/$target.json
}
function run_java_fuzz_target {
local target=$1
local exec_file="$DUMPS_DIR/$target.exec"
local class_dump_dir="$DUMPS_DIR/${target}_classes/"
mkdir "$class_dump_dir"
local corpus_real="/corpus/${target}"
# -merge=1 requires an output directory, create a new, empty dir for that.
local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}"
rm -rf $corpus_dummy && mkdir -p $corpus_dummy
# Use 100s timeout instead of 25s as code coverage builds can be very slow.
local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir"
local args="-merge=1 -timeout=100 -close_fd_mask=3 --nohooks \
--jvm_args=-javaagent:/opt/jacoco-agent.jar=$jacoco_args \
$corpus_dummy $corpus_real"
timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log
if (( $? != 0 )); then
echo "Error occured while running $target:"
cat $LOGS_DIR/$target.log
fi
if (( $(du -c $exec_file | tail -n 1 | cut -f 1) == 0 )); then
# Skip fuzz targets that failed to produce .exec files.
return 0
fi
}
export SYSGOPATH=$GOPATH
export GOPATH=$OUT/$GOPATH
# Run each fuzz target, generate raw coverage dumps.
@ -136,6 +166,14 @@ for fuzz_target in $FUZZ_TARGETS; do
grep "FUZZ_CORPUS_DIR" $fuzz_target > /dev/null 2>&1 || continue
fi
run_go_fuzz_target $fuzz_target &
elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then
# Continue if not a fuzz target.
if [[ $FUZZING_ENGINE != "none" ]]; then
grep "LLVMFuzzerTestOneInput" $fuzz_target > /dev/null 2>&1 || continue
fi
echo "Running $fuzz_target"
run_java_fuzz_target $fuzz_target &
else
# Continue if not a fuzz target.
if [[ $FUZZING_ENGINE != "none" ]]; then
@ -175,6 +213,43 @@ if [[ $FUZZING_LANGUAGE == "go" ]]; then
mv merged.data $REPORT_ROOT_DIR/heap.prof
#TODO some proxy for go tool pprof -http=127.0.0.1:8001 $DUMPS_DIR/cpu.prof
echo "Finished generating code coverage report for Go fuzz targets."
elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then
# From this point on the script does not tolerate any errors.
set -e
# Merge .exec files from the individual targets.
jacoco_merged_exec=$DUMPS_DIR/jacoco.merged.exec
java -jar /opt/jacoco-cli.jar merge $DUMPS_DIR/*.exec \
--destfile $jacoco_merged_exec
# Merge .class files from the individual targets.
classes_dir=$DUMPS_DIR/classes
mkdir $classes_dir
for fuzz_target in $FUZZ_TARGETS; do
cp -r $DUMPS_DIR/${fuzz_target}_classes/* $classes_dir/
done
# Heuristically determine source directories based on Maven structure.
# Always include the $SRC root as it likely contains the fuzzer sources.
sourcefiles_args=(--sourcefiles $OUT/$SRC)
source_dirs=$(find $OUT/$SRC -type d -name 'java')
for source_dir in $source_dirs; do
sourcefiles_args+=(--sourcefiles "$source_dir")
done
# Generate HTML and XML reports.
xml_report=$REPORT_PLATFORM_DIR/index.xml
java -jar /opt/jacoco-cli.jar report $jacoco_merged_exec \
--html $REPORT_PLATFORM_DIR \
--xml $xml_report \
--classfiles $classes_dir \
"${sourcefiles_args[@]}"
# Write llvm-cov summary file.
jacoco_report_converter.py $xml_report $SUMMARY_FILE
set +e
else
# From this point on the script does not tolerate any errors.

View File

@ -0,0 +1,78 @@
#!/usr/bin/env python3
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
"""Helper script for creating an llvm-cov style JSON summary from a JaCoCo XML
report."""
import json
import sys
import xml.etree.ElementTree as ET
def convert(xml):
"""Turns a JaCoCo XML report into an llvm-cov JSON summary."""
summary = {
"type": "oss-fuzz.java.coverage.json.export",
"version": "1.0.0",
"data": [{
"totals": {},
}],
}
root = ET.fromstring(xml)
totals = {}
function_counter = root.find("./counter[@type='METHOD']")
totals["functions"] = make_counter_summary(function_counter)
line_counter = root.find("./counter[@type='LINE']")
totals["lines"] = make_counter_summary(line_counter)
region_counter = root.find("./counter[@type='BRANCH']")
totals["regions"] = make_counter_summary(region_counter)
summary["data"][0]["totals"] = totals
return json.dumps(summary)
def make_counter_summary(counter_element):
"""Turns a JaCoCo <counter> tag into an llvm-cov totals entry."""
summary = {}
summary["covered"] = int(counter_element.attrib["covered"])
summary["notcovered"] = int(counter_element.attrib["missed"])
summary["count"] = summary["covered"] + summary["notcovered"]
summary["percent"] = (100.0 * summary["covered"]) / summary["count"]
return summary
def main():
"""Produces an llvm-cov style JSON summary from a JaCoCo XML report."""
if len(sys.argv) != 3:
sys.stderr.write('Usage: %s <path_to_jacoco_xml> <out_path_json>\n' %
sys.argv[0])
return 1
with open(sys.argv[1], 'r') as xml_file:
xml_report = xml_file.read()
json_summary = convert(xml_report)
with open(sys.argv[2], 'w') as json_file:
json_file.write(json_summary)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@ -58,7 +58,7 @@ CORPUS_BACKUP_URL_FORMAT = (
PROJECT_LANGUAGE_REGEX = re.compile(r'\s*language\s*:\s*([^\s]+)')
# Languages from project.yaml that have code coverage support.
LANGUAGES_WITH_COVERAGE_SUPPORT = ['c', 'c++', 'go', 'rust']
LANGUAGES_WITH_COVERAGE_SUPPORT = ['c', 'c++', 'go', 'jvm', 'rust']
WORKDIR_REGEX = re.compile(r'\s*WORKDIR\s*([^\s]+)')