diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage index 936e55765..57b537c44 100755 --- a/infra/base-images/base-runner/coverage +++ b/infra/base-images/base-runner/coverage @@ -143,7 +143,7 @@ function run_java_fuzz_target { rm -rf $corpus_dummy && mkdir -p $corpus_dummy # Use 100s timeout instead of 25s as code coverage builds can be very slow. - local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir" + local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir,excludes=com.code_intelligence.jazzer.*" local args="-merge=1 -timeout=100 -close_fd_mask=3 --nohooks \ --jvm_args=-javaagent:/opt/jacoco-agent.jar=$jacoco_args \ $corpus_dummy $corpus_real" diff --git a/infra/base-images/base-runner/jacoco_report_converter.py b/infra/base-images/base-runner/jacoco_report_converter.py index c5b5fc1e7..3c36065f1 100755 --- a/infra/base-images/base-runner/jacoco_report_converter.py +++ b/infra/base-images/base-runner/jacoco_report_converter.py @@ -17,6 +17,7 @@ """Helper script for creating an llvm-cov style JSON summary from a JaCoCo XML report.""" import json +import os import sys import xml.etree.ElementTree as ET @@ -28,33 +29,112 @@ def convert(xml): "version": "1.0.0", "data": [{ "totals": {}, + "files": [], }], } - root = ET.fromstring(xml) - totals = {} - - function_counter = root.find("./counter[@type='METHOD']") - totals["functions"] = make_counter_summary(function_counter) - - line_counter = root.find("./counter[@type='LINE']") - totals["lines"] = make_counter_summary(line_counter) - - region_counter = root.find("./counter[@type='BRANCH']") - totals["regions"] = make_counter_summary(region_counter) - + report = ET.fromstring(xml) + totals = make_element_summary(report) summary["data"][0]["totals"] = totals + # Since Java compilation does not track source file location, we match + # coverage info to source files via the full class name, e.g. we search for + # a path in /out/src ending in foo/bar/Baz.java for the class foo.bar.Baz. + # Under the assumptions that a given project only ever contains a single + # version of a class and that no class name appears as a suffix of another + # class name, we can assign coverage info to every source file matched in that + # way. + src_files = list_src_files() + + for class_element in report.findall("./package/class"): + class_name = class_element.attrib["name"] + package_name = os.path.dirname(class_name) + if "sourcefilename" not in class_element.attrib: + continue + basename = class_element.attrib["sourcefilename"] + # This path is "foo/Bar.java" for the class element + # . + canonical_path = os.path.join(package_name, basename) + + class_summary = make_element_summary(class_element) + summary["data"][0]["files"].append({ + "filename": relative_to_src_path(src_files, canonical_path), + "summary": class_summary, + }) + return json.dumps(summary) -def make_counter_summary(counter_element): - """Turns a JaCoCo tag into an llvm-cov totals entry.""" +def list_src_files(): + """Returns a map from basename to full path for all files in $OUT/$SRC.""" + filename_to_paths = {} + out_path = os.environ["OUT"] + "/" + src_path = os.environ["SRC"] + src_in_out = out_path + src_path + for dirpath, _, filenames in os.walk(src_in_out): + for filename in filenames: + full_path = dirpath + "/" + filename + # Map /out//src/... to /src/... + src_path = full_path[len(out_path):] + filename_to_paths.setdefault(filename, []).append(src_path) + return filename_to_paths + + +def relative_to_src_path(src_files, canonical_path): + """Returns all paths in src_files ending in canonical_path.""" + basename = os.path.basename(canonical_path) + if basename not in src_files: + return [] + candidate_paths = src_files[basename] + return [ + path for path in candidate_paths if path.endswith("/" + canonical_path) + ] + + +def make_element_summary(element): + """Returns a coverage summary for an element in the XML report.""" summary = {} - summary["covered"] = int(counter_element.attrib["covered"]) - summary["notcovered"] = int(counter_element.attrib["missed"]) + + function_counter = element.find("./counter[@type='METHOD']") + summary["functions"] = make_counter_summary(function_counter) + + line_counter = element.find("./counter[@type='LINE']") + summary["lines"] = make_counter_summary(line_counter) + + # JaCoCo tracks branch coverage, which counts the covered control-flow edges + # between llvm-cov's regions instead of the covered regions themselves. For + # non-trivial code parts, the difference is usually negligible. However, if + # all methods of a class consist of a single region only (no branches), + # JaCoCo does not report any branch coverage even if there is instruction + # coverage. Since this would give incorrect results for CI Fuzz purposes, we + # increase the regions counter by 1 if there is any amount of instruction + # coverage. + instruction_counter = element.find("./counter[@type='INSTRUCTION']") + has_some_coverage = instruction_counter is not None and int( + instruction_counter.attrib["covered"]) > 0 + branch_covered_adjustment = 1 if has_some_coverage else 0 + region_counter = element.find("./counter[@type='BRANCH']") + summary["regions"] = make_counter_summary( + region_counter, covered_adjustment=branch_covered_adjustment) + + return summary + + +def make_counter_summary(counter_element, covered_adjustment=0): + """Turns a JaCoCo element into an llvm-cov totals entry.""" + summary = {} + covered = covered_adjustment + missed = 0 + if counter_element is not None: + covered += int(counter_element.attrib["covered"]) + missed += int(counter_element.attrib["missed"]) + summary["covered"] = covered + summary["notcovered"] = missed summary["count"] = summary["covered"] + summary["notcovered"] - summary["percent"] = (100.0 * summary["covered"]) / summary["count"] + if summary["count"] != 0: + summary["percent"] = (100.0 * summary["covered"]) / summary["count"] + else: + summary["percent"] = 0 return summary