[infra] File-level coverage for JVM projects (#5822)

* Exclude Jazzer classes from coverage reports

* Generate file-level coverage data for Java

Adds per-file coverage information to llvm-cov style *.json files for
Java targets.

This provides full CI Fuzz support for "jvm" projects.
This commit is contained in:
Fabian Meumertzheim 2021-05-25 17:05:16 +02:00 committed by GitHub
parent f43928cf47
commit 6defe8c725
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 98 additions and 18 deletions

View File

@ -143,7 +143,7 @@ function run_java_fuzz_target {
rm -rf $corpus_dummy && mkdir -p $corpus_dummy
# Use 100s timeout instead of 25s as code coverage builds can be very slow.
local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir"
local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir,excludes=com.code_intelligence.jazzer.*"
local args="-merge=1 -timeout=100 -close_fd_mask=3 --nohooks \
--jvm_args=-javaagent:/opt/jacoco-agent.jar=$jacoco_args \
$corpus_dummy $corpus_real"

View File

@ -17,6 +17,7 @@
"""Helper script for creating an llvm-cov style JSON summary from a JaCoCo XML
report."""
import json
import os
import sys
import xml.etree.ElementTree as ET
@ -28,33 +29,112 @@ def convert(xml):
"version": "1.0.0",
"data": [{
"totals": {},
"files": [],
}],
}
root = ET.fromstring(xml)
totals = {}
function_counter = root.find("./counter[@type='METHOD']")
totals["functions"] = make_counter_summary(function_counter)
line_counter = root.find("./counter[@type='LINE']")
totals["lines"] = make_counter_summary(line_counter)
region_counter = root.find("./counter[@type='BRANCH']")
totals["regions"] = make_counter_summary(region_counter)
report = ET.fromstring(xml)
totals = make_element_summary(report)
summary["data"][0]["totals"] = totals
# Since Java compilation does not track source file location, we match
# coverage info to source files via the full class name, e.g. we search for
# a path in /out/src ending in foo/bar/Baz.java for the class foo.bar.Baz.
# Under the assumptions that a given project only ever contains a single
# version of a class and that no class name appears as a suffix of another
# class name, we can assign coverage info to every source file matched in that
# way.
src_files = list_src_files()
for class_element in report.findall("./package/class"):
class_name = class_element.attrib["name"]
package_name = os.path.dirname(class_name)
if "sourcefilename" not in class_element.attrib:
continue
basename = class_element.attrib["sourcefilename"]
# This path is "foo/Bar.java" for the class element
# <class name="foo/Bar" sourcefilename="Bar.java">.
canonical_path = os.path.join(package_name, basename)
class_summary = make_element_summary(class_element)
summary["data"][0]["files"].append({
"filename": relative_to_src_path(src_files, canonical_path),
"summary": class_summary,
})
return json.dumps(summary)
def make_counter_summary(counter_element):
"""Turns a JaCoCo <counter> tag into an llvm-cov totals entry."""
def list_src_files():
"""Returns a map from basename to full path for all files in $OUT/$SRC."""
filename_to_paths = {}
out_path = os.environ["OUT"] + "/"
src_path = os.environ["SRC"]
src_in_out = out_path + src_path
for dirpath, _, filenames in os.walk(src_in_out):
for filename in filenames:
full_path = dirpath + "/" + filename
# Map /out//src/... to /src/...
src_path = full_path[len(out_path):]
filename_to_paths.setdefault(filename, []).append(src_path)
return filename_to_paths
def relative_to_src_path(src_files, canonical_path):
"""Returns all paths in src_files ending in canonical_path."""
basename = os.path.basename(canonical_path)
if basename not in src_files:
return []
candidate_paths = src_files[basename]
return [
path for path in candidate_paths if path.endswith("/" + canonical_path)
]
def make_element_summary(element):
"""Returns a coverage summary for an element in the XML report."""
summary = {}
summary["covered"] = int(counter_element.attrib["covered"])
summary["notcovered"] = int(counter_element.attrib["missed"])
function_counter = element.find("./counter[@type='METHOD']")
summary["functions"] = make_counter_summary(function_counter)
line_counter = element.find("./counter[@type='LINE']")
summary["lines"] = make_counter_summary(line_counter)
# JaCoCo tracks branch coverage, which counts the covered control-flow edges
# between llvm-cov's regions instead of the covered regions themselves. For
# non-trivial code parts, the difference is usually negligible. However, if
# all methods of a class consist of a single region only (no branches),
# JaCoCo does not report any branch coverage even if there is instruction
# coverage. Since this would give incorrect results for CI Fuzz purposes, we
# increase the regions counter by 1 if there is any amount of instruction
# coverage.
instruction_counter = element.find("./counter[@type='INSTRUCTION']")
has_some_coverage = instruction_counter is not None and int(
instruction_counter.attrib["covered"]) > 0
branch_covered_adjustment = 1 if has_some_coverage else 0
region_counter = element.find("./counter[@type='BRANCH']")
summary["regions"] = make_counter_summary(
region_counter, covered_adjustment=branch_covered_adjustment)
return summary
def make_counter_summary(counter_element, covered_adjustment=0):
"""Turns a JaCoCo <counter> element into an llvm-cov totals entry."""
summary = {}
covered = covered_adjustment
missed = 0
if counter_element is not None:
covered += int(counter_element.attrib["covered"])
missed += int(counter_element.attrib["missed"])
summary["covered"] = covered
summary["notcovered"] = missed
summary["count"] = summary["covered"] + summary["notcovered"]
if summary["count"] != 0:
summary["percent"] = (100.0 * summary["covered"]) / summary["count"]
else:
summary["percent"] = 0
return summary