# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Module for determining coverage of fuzz targets.""" import json import logging import os import sys import http_utils # pylint: disable=wrong-import-position,import-error sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import utils # The path to get OSS-Fuzz project's latest report json file.` OSS_FUZZ_LATEST_COVERAGE_INFO_PATH = 'oss-fuzz-coverage/latest_report_info/' # pylint: disable=too-few-public-methods class CoverageError(Exception): """Exceptions for project coverage.""" class BaseCoverage: """Gets coverage data for a project.""" def __init__(self, repo_path): self.repo_path = _normalize_repo_path(repo_path) def get_files_covered_by_target(self, target): """Returns a list of source files covered by the specific fuzz target. Args: target: The name of the fuzz target whose coverage is requested. Returns: A list of files that the fuzz target covers or None. """ target_cov = self.get_target_coverage(target) if not target_cov: logging.info('No coverage available for %s.', target) return None coverage_per_file = get_coverage_per_file(target_cov) if not coverage_per_file: logging.info('No files found in coverage report.') return None affected_file_list = [] for file_cov in coverage_per_file: norm_file_path = os.path.normpath(file_cov['filename']) if not norm_file_path.startswith(self.repo_path): # Exclude files outside of the main repo. continue if not is_file_covered(file_cov): # Don't consider a file affected if code in it is never executed. continue # TODO(metzman): It's weird to me that we access file_cov['filename'] # again and not norm_file_path, figure out if this makes sense. relative_path = utils.remove_prefix(file_cov['filename'], self.repo_path) affected_file_list.append(relative_path) return affected_file_list def get_target_coverage(self, target): """Get the coverage report for a specific fuzz target. Args: target: The name of the fuzz target whose coverage is requested. Returns: The target's coverage json dict or None on failure. """ raise NotImplementedError('Child class must implement method.') class OSSFuzzCoverage(BaseCoverage): """Gets coverage data for a project from OSS-Fuzz.""" def __init__(self, repo_path, oss_fuzz_project_name): """Constructor for OSSFuzzCoverage.""" super().__init__(repo_path) self.oss_fuzz_project_name = oss_fuzz_project_name self.fuzzer_stats_url = _get_oss_fuzz_fuzzer_stats_dir_url( self.oss_fuzz_project_name) if self.fuzzer_stats_url is None: raise CoverageError('Could not get latest coverage.') def get_target_coverage(self, target): """Get the coverage report for a specific fuzz target. Args: target: The name of the fuzz target whose coverage is requested. Returns: The target's coverage json dict or None on failure. """ if not self.fuzzer_stats_url: return None target_url = utils.url_join(self.fuzzer_stats_url, target + '.json') return http_utils.get_json_from_url(target_url) def _get_oss_fuzz_latest_cov_report_info(oss_fuzz_project_name): """Gets and returns a dictionary containing the latest coverage report info for |project|.""" latest_report_info_url = utils.url_join(utils.GCS_BASE_URL, OSS_FUZZ_LATEST_COVERAGE_INFO_PATH, oss_fuzz_project_name + '.json') latest_cov_info = http_utils.get_json_from_url(latest_report_info_url) if latest_cov_info is None: logging.error('Could not get the coverage report json from url: %s.', latest_report_info_url) return None return latest_cov_info def _get_oss_fuzz_fuzzer_stats_dir_url(oss_fuzz_project_name): """Gets latest coverage report info for a specific OSS-Fuzz project from GCS. Args: oss_fuzz_project_name: The name of the project. Returns: The projects coverage report info in json dict or None on failure. """ latest_cov_info = _get_oss_fuzz_latest_cov_report_info(oss_fuzz_project_name) if not latest_cov_info: return None if 'fuzzer_stats_dir' not in latest_cov_info: logging.error('fuzzer_stats_dir not in latest coverage info.') return None fuzzer_stats_dir_gs_url = latest_cov_info['fuzzer_stats_dir'] fuzzer_stats_dir_url = utils.gs_url_to_https(fuzzer_stats_dir_gs_url) return fuzzer_stats_dir_url class FilesystemCoverage(BaseCoverage): """Class that gets a project's coverage from the filesystem.""" def __init__(self, repo_path, project_coverage_dir): super().__init__(repo_path) self.project_coverage_dir = project_coverage_dir def get_target_coverage(self, target): """Get the coverage report for a specific fuzz target. Args: target: The name of the fuzz target whose coverage is requested. Returns: The target's coverage json dict or None on failure. """ logging.info('Getting coverage for %s from filesystem.', target) fuzzer_stats_json_path = os.path.join(self.project_coverage_dir, 'fuzzer_stats', target + '.json') if not os.path.exists(fuzzer_stats_json_path): logging.warning('%s does not exist.', fuzzer_stats_json_path) return None with open(fuzzer_stats_json_path) as fuzzer_stats_json_file_handle: try: return json.load(fuzzer_stats_json_file_handle) except json.decoder.JSONDecodeError as err: logging.error('Could not decode: %s. Error: %s.', fuzzer_stats_json_path, err) return None def is_file_covered(file_cov): """Returns whether the file is covered.""" return file_cov['summary']['regions']['covered'] def get_coverage_per_file(target_cov): """Returns the coverage per file within |target_cov|.""" try: return target_cov['data'][0]['files'] except (IndexError, TypeError, KeyError): logging.error('target_cov: %s is malformed.', target_cov) return None def _normalize_repo_path(repo_path): """Normalizes and returns |repo_path| to make sure cases like /src/curl and /src/curl/ are both handled.""" repo_path = os.path.normpath(repo_path) if not repo_path.endswith('/'): repo_path += '/' return repo_path