From 7f150fe75e2a50cde54169fd638b942b92556fe2 Mon Sep 17 00:00:00 2001 From: jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com> Date: Wed, 3 Feb 2021 12:46:19 -0800 Subject: [PATCH] [cifuzz] Abstract-away OSS-Fuzz specific bits (#5088) Abstract away OSS-Fuzz specific bits into the OSSFuzz implementation of the ClusterFuzzDeployment class. This will make it easier to implement support for other deployments of ClusterFuzz (including ClusterFuzzLite). --- infra/cifuzz/actions/run_fuzzers/action.yml | 7 + infra/cifuzz/affected_fuzz_targets.py | 1 + infra/cifuzz/clusterfuzz_deployment.py | 218 ++++++++++++++++++ infra/cifuzz/clusterfuzz_deployment_test.py | 161 +++++++++++++ infra/cifuzz/config_utils.py | 2 +- infra/cifuzz/continuous_integration.py | 5 + infra/cifuzz/fuzz_target.py | 240 ++++--------------- infra/cifuzz/fuzz_target_test.py | 243 +++++--------------- infra/cifuzz/run_fuzzers.py | 10 +- infra/cifuzz/run_fuzzers_test.py | 6 +- 10 files changed, 501 insertions(+), 392 deletions(-) create mode 100644 infra/cifuzz/clusterfuzz_deployment.py create mode 100644 infra/cifuzz/clusterfuzz_deployment_test.py diff --git a/infra/cifuzz/actions/run_fuzzers/action.yml b/infra/cifuzz/actions/run_fuzzers/action.yml index 42cb2ddaf..582133c74 100644 --- a/infra/cifuzz/actions/run_fuzzers/action.yml +++ b/infra/cifuzz/actions/run_fuzzers/action.yml @@ -15,6 +15,9 @@ inputs: sanitizer: description: 'The sanitizer to run the fuzzers with.' default: 'address' + build-integration-path: + description: "The path to the the project's build integration." + required: false run-fuzzers-mode: description: | The mode to run the fuzzers with ("ci" or "batch"). @@ -32,3 +35,7 @@ runs: DRY_RUN: ${{ inputs.dry-run}} SANITIZER: ${{ inputs.sanitizer }} RUN_FUZZERS_MODE: ${{ inputs.run-fuzzers-mode }} + # TODO(metzman): Even though this param is used for building, it's needed + # for running because we use it to distinguish OSS-Fuzz from non-OSS-Fuzz. + # We should do something explicit instead. + BUILD_INTEGRATION_PATH: ${{ inputs.build-integration-path }} diff --git a/infra/cifuzz/affected_fuzz_targets.py b/infra/cifuzz/affected_fuzz_targets.py index fb09eb644..f9f2242a3 100644 --- a/infra/cifuzz/affected_fuzz_targets.py +++ b/infra/cifuzz/affected_fuzz_targets.py @@ -38,6 +38,7 @@ def remove_unaffected_fuzz_targets(project_name, out_dir, files_changed, targets are unaffected. For example, this means that fuzz targets which don't have coverage data on will not be deleted. """ + # TODO(metzman): Make this use clusterfuzz deployment. if not files_changed: # Don't remove any fuzz targets if there is no difference from HEAD. logging.info('No files changed compared to HEAD.') diff --git a/infra/cifuzz/clusterfuzz_deployment.py b/infra/cifuzz/clusterfuzz_deployment.py new file mode 100644 index 000000000..60b1c31a0 --- /dev/null +++ b/infra/cifuzz/clusterfuzz_deployment.py @@ -0,0 +1,218 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module for interacting with the "ClusterFuzz deployment.""" +import logging +import os +import sys +import tempfile +import time +import urllib.error +import urllib.request +import zipfile + +# pylint: disable=wrong-import-position,import-error +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import utils + + +class BaseClusterFuzzDeployment: + """Base class for ClusterFuzz deployments.""" + + CORPUS_DIR_NAME = 'cifuzz-corpus' + BUILD_DIR_NAME = 'cifuzz-latest-build' + + def __init__(self, config): + self.config = config + + def download_latest_build(self, out_dir): + """Downloads the latest build from ClusterFuzz. + + Returns: + A path to where the OSS-Fuzz build was stored, or None if it wasn't. + """ + raise NotImplementedError('Child class must implement method.') + + def download_corpus(self, target_name, out_dir): + """Downloads the corpus for |target_name| from ClusterFuzz to |out_dir|. + + Returns: + A path to where the OSS-Fuzz build was stored, or None if it wasn't. + """ + raise NotImplementedError('Child class must implement method.') + + +class ClusterFuzzLite(BaseClusterFuzzDeployment): + """Class representing a deployment of ClusterFuzzLite.""" + + def download_latest_build(self, out_dir): + logging.info('download_latest_build not implemented for ClusterFuzzLite.') + + def download_corpus(self, target_name, out_dir): + logging.info('download_corpus not implemented for ClusterFuzzLite.') + + +class OSSFuzz(BaseClusterFuzzDeployment): + """The OSS-Fuzz ClusterFuzz deployment.""" + + # Location of clusterfuzz builds on GCS. + CLUSTERFUZZ_BUILDS = 'clusterfuzz-builds' + + # Format string for the latest version of a project's build. + VERSION_STRING = '{project_name}-{sanitizer}-latest.version' + + # Zip file name containing the corpus. + CORPUS_ZIP_NAME = 'public.zip' + + def get_latest_build_name(self): + """Gets the name of the latest OSS-Fuzz build of a project. + + Returns: + A string with the latest build version or None. + """ + version_file = self.VERSION_STRING.format( + project_name=self.config.project_name, sanitizer=self.config.sanitizer) + version_url = utils.url_join(utils.GCS_BASE_URL, self.CLUSTERFUZZ_BUILDS, + self.config.project_name, version_file) + try: + response = urllib.request.urlopen(version_url) + except urllib.error.HTTPError: + logging.error('Error getting latest build version for %s from: %s.', + self.config.project_name, version_url) + return None + return response.read().decode() + + def download_latest_build(self, out_dir): + """Downloads the latest OSS-Fuzz build from GCS. + + Returns: + A path to where the OSS-Fuzz build was stored, or None if it wasn't. + """ + build_dir = os.path.join(out_dir, self.BUILD_DIR_NAME) + if os.path.exists(build_dir): + return build_dir + + os.makedirs(build_dir, exist_ok=True) + + latest_build_name = self.get_latest_build_name() + if not latest_build_name: + return None + + oss_fuzz_build_url = utils.url_join(utils.GCS_BASE_URL, + self.CLUSTERFUZZ_BUILDS, + self.config.project_name, + latest_build_name) + if download_and_unpack_zip(oss_fuzz_build_url, build_dir): + return build_dir + + return None + + def download_corpus(self, target_name, out_dir): + """Downloads the latest OSS-Fuzz corpus for the target. + + Returns: + The local path to to corpus or None if download failed. + """ + corpus_dir = os.path.join(out_dir, self.CORPUS_DIR_NAME, target_name) + os.makedirs(corpus_dir, exist_ok=True) + # TODO(metzman): Clean up this code. + project_qualified_fuzz_target_name = target_name + qualified_name_prefix = self.config.project_name + '_' + + if not target_name.startswith(qualified_name_prefix): + project_qualified_fuzz_target_name = qualified_name_prefix + target_name + + corpus_url = utils.url_join( + utils.GCS_BASE_URL, + '{0}-backup.clusterfuzz-external.appspot.com/corpus/libFuzzer/'.format( + self.config.project_name), project_qualified_fuzz_target_name, + self.CORPUS_ZIP_NAME) + + if download_and_unpack_zip(corpus_url, corpus_dir): + return corpus_dir + + return None + + +def download_url(url, filename, num_attempts=3): + """Downloads the file located at |url|, using HTTP to |filename|. + + Args: + url: A url to a file to download. + filename: The path the file should be downloaded to. + num_retries: The number of times to retry the download on + ConnectionResetError. + + Returns: + True on success. + """ + sleep_time = 1 + + # TODO(metzman): Use retry.wrap here. + for _ in range(num_attempts): + try: + urllib.request.urlretrieve(url, filename) + return True + except urllib.error.HTTPError: + # In these cases, retrying probably wont work since the error probably + # means there is nothing at the URL to download. + logging.error('Unable to download from: %s.', url) + return False + except ConnectionResetError: + # These errors are more likely to be transient. Retry. + pass + time.sleep(sleep_time) + + logging.error('Failed to download %s, %d times.', url, num_attempts) + + return False + + +def download_and_unpack_zip(url, extract_directory): + """Downloads and unpacks a zip file from an HTTP URL. + + Args: + url: A url to the zip file to be downloaded and unpacked. + out_dir: The path where the zip file should be extracted to. + + Returns: + True on success. + """ + if not os.path.exists(extract_directory): + logging.error('Extract directory: %s does not exist.', extract_directory) + return False + + # Gives the temporary zip file a unique identifier in the case that + # that download_and_unpack_zip is done in parallel. + with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file: + if not download_url(url, tmp_file.name): + return False + + try: + with zipfile.ZipFile(tmp_file.name, 'r') as zip_file: + zip_file.extractall(extract_directory) + except zipfile.BadZipFile: + logging.error('Error unpacking zip from %s. Bad Zipfile.', url) + return False + + return True + + +def get_clusterfuzz_deployment(config): + """Returns object reprsenting deployment of ClusterFuzz used by |config|.""" + if (config.platform == config.Platform.INTERNAL_GENERIC_CI or + config.platform == config.Platform.INTERNAL_GITHUB): + logging.info('Using OSS-Fuzz as ClusterFuzz deployment.') + return OSSFuzz(config) + logging.info('Using ClusterFuzzLite as ClusterFuzz deployment.') + return ClusterFuzzLite(config) diff --git a/infra/cifuzz/clusterfuzz_deployment_test.py b/infra/cifuzz/clusterfuzz_deployment_test.py new file mode 100644 index 000000000..a7c088126 --- /dev/null +++ b/infra/cifuzz/clusterfuzz_deployment_test.py @@ -0,0 +1,161 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for clusterfuzz_deployment.py""" + +import os +import tempfile +import unittest +from unittest import mock +import urllib.error + +from pyfakefs import fake_filesystem_unittest + +import clusterfuzz_deployment +import config_utils + +# NOTE: This integration test relies on +# https://github.com/google/oss-fuzz/tree/master/projects/example project. +EXAMPLE_PROJECT = 'example' + +# An example fuzzer that triggers an error. +EXAMPLE_FUZZER = 'example_crash_fuzzer' + + +def _create_config(**kwargs): + """Creates a config object and then sets every attribute that is a key in + |kwargs| to the corresponding value. Asserts that each key in |kwargs| is an + attribute of Config.""" + defaults = {'is_github': True, 'project_name': EXAMPLE_PROJECT} + for default_key, default_value in defaults.items(): + if default_key not in kwargs: + kwargs[default_key] = default_value + + with mock.patch('os.path.basename', return_value=None), mock.patch( + 'config_utils.get_project_src_path', + return_value=None), mock.patch('config_utils._is_dry_run', + return_value=True): + config = config_utils.RunFuzzersConfig() + + for key, value in kwargs.items(): + assert hasattr(config, key), 'Config doesn\'t have attribute: ' + key + setattr(config, key, value) + return config + + +def _create_deployment(**kwargs): + config = _create_config(**kwargs) + return clusterfuzz_deployment.get_clusterfuzz_deployment(config) + + +class OSSFuzzTest(unittest.TestCase): + """Tests OSSFuzz.""" + + def test_download_corpus(self): + """Tests that we can download a corpus for a valid project.""" + deployment = _create_deployment() + with tempfile.TemporaryDirectory() as tmp_dir: + with mock.patch('clusterfuzz_deployment.download_and_unpack_zip', + return_value=False) as mocked_download_and_unpack_zip: + deployment.download_corpus(EXAMPLE_FUZZER, tmp_dir) + (url, out_dir), _ = mocked_download_and_unpack_zip.call_args + self.assertEqual( + url, 'https://storage.googleapis.com/example-backup.' + 'clusterfuzz-external.appspot.com/corpus/libFuzzer/' + 'example_crash_fuzzer/public.zip') + self.assertEqual(out_dir, + os.path.join(tmp_dir, 'cifuzz-corpus', EXAMPLE_FUZZER)) + + def test_download_fail(self): + """Tests that when downloading fails, None is returned.""" + deployment = _create_deployment() + with tempfile.TemporaryDirectory() as tmp_dir: + with mock.patch('clusterfuzz_deployment.download_and_unpack_zip', + return_value=False): + corpus_path = deployment.download_corpus(EXAMPLE_FUZZER, tmp_dir) + self.assertIsNone(corpus_path) + + def test_download_latest_build(self): + """Tests that the build directory is downloaded once and no more.""" + deployment = _create_deployment() + with tempfile.TemporaryDirectory() as tmp_dir: + latest_name = deployment.get_latest_build_name() + with mock.patch('clusterfuzz_deployment.OSSFuzz.get_latest_build_name', + return_value=latest_name): + latest_build_path = deployment.download_latest_build(tmp_dir) + self.assertNotEqual(len(os.listdir(latest_build_path)), 0) + + def test_get_latest_build_name(self): + """Tests that the latest build name can be retrieved from GCS.""" + deployment = _create_deployment() + latest_build_name = deployment.get_latest_build_name() + self.assertTrue(latest_build_name.endswith('.zip')) + self.assertTrue('address' in latest_build_name) + + +class DownloadUrlTest(unittest.TestCase): + """Tests that download_url works.""" + URL = 'example.com/file' + FILE_PATH = '/tmp/file' + + @mock.patch('time.sleep') + @mock.patch('urllib.request.urlretrieve', return_value=True) + def test_download_url_no_error(self, mocked_urlretrieve, _): + """Tests that download_url works when there is no error.""" + self.assertTrue( + clusterfuzz_deployment.download_url(self.URL, self.FILE_PATH)) + self.assertEqual(1, mocked_urlretrieve.call_count) + + @mock.patch('time.sleep') + @mock.patch('logging.error') + @mock.patch('urllib.request.urlretrieve', + side_effect=urllib.error.HTTPError(None, None, None, None, None)) + def test_download_url_http_error(self, mocked_urlretrieve, mocked_error, _): + """Tests that download_url doesn't retry when there is an HTTP error.""" + self.assertFalse( + clusterfuzz_deployment.download_url(self.URL, self.FILE_PATH)) + mocked_error.assert_called_with('Unable to download from: %s.', self.URL) + self.assertEqual(1, mocked_urlretrieve.call_count) + + @mock.patch('time.sleep') + @mock.patch('logging.error') + @mock.patch('urllib.request.urlretrieve', side_effect=ConnectionResetError) + def test_download_url_connection_error(self, mocked_urlretrieve, mocked_error, + mocked_sleep): + """Tests that download_url doesn't retry when there is an HTTP error.""" + self.assertFalse( + clusterfuzz_deployment.download_url(self.URL, self.FILE_PATH)) + self.assertEqual(3, mocked_urlretrieve.call_count) + self.assertEqual(3, mocked_sleep.call_count) + mocked_error.assert_called_with('Failed to download %s, %d times.', + self.URL, 3) + + +class DownloadAndUnpackZipTest(fake_filesystem_unittest.TestCase): + """Tests download_and_unpack_zip.""" + + def setUp(self): + self.setUpPyfakefs() + + def test_bad_zip_download(self): + """Tests download_and_unpack_zip returns none when a bad zip is passed.""" + with open('/url_tmp.zip', 'w') as file_handle: + file_handle.write('Test file.') + with mock.patch('urllib.request.urlretrieve', return_value=True): + self.assertFalse( + clusterfuzz_deployment.download_and_unpack_zip( + '/not/a/real/url', '/extract-directory')) + + +if __name__ == '__main__': + unittest.main() diff --git a/infra/cifuzz/config_utils.py b/infra/cifuzz/config_utils.py index e2093b5b1..fd1871497 100644 --- a/infra/cifuzz/config_utils.py +++ b/infra/cifuzz/config_utils.py @@ -127,7 +127,7 @@ class BuildFuzzersConfig(BaseConfig): event_data['pull_request']['number']) logging.debug('pr_ref: %s', self.pr_ref) - self.git_url = event_data['repository']['ssh_url'] + self.git_url = event_data['repository']['html_url'] def __init__(self): """Get the configuration from CIFuzz from the environment. These variables diff --git a/infra/cifuzz/continuous_integration.py b/infra/cifuzz/continuous_integration.py index 75a9e2245..b2e8af28e 100644 --- a/infra/cifuzz/continuous_integration.py +++ b/infra/cifuzz/continuous_integration.py @@ -48,6 +48,11 @@ class BaseCi: def __init__(self, config): self.config = config + def prepare_for_fuzzer_build(self): + """Builds the fuzzer builder image and gets the source code we need to + fuzz.""" + raise NotImplementedError('Children must implement this method.') + def get_diff_base(self): """Returns the base to diff against with git to get the change under test.""" diff --git a/infra/cifuzz/fuzz_target.py b/infra/cifuzz/fuzz_target.py index 6d42563e1..3f2d7f8d7 100644 --- a/infra/cifuzz/fuzz_target.py +++ b/infra/cifuzz/fuzz_target.py @@ -18,11 +18,6 @@ import re import stat import subprocess import sys -import tempfile -import time -import urllib.error -import urllib.request -import zipfile # pylint: disable=wrong-import-position,import-error sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -35,30 +30,17 @@ logging.basicConfig( LIBFUZZER_OPTIONS = '-seed=1337 -len_control=0' -# Location of cluster fuzz builds on GCS. -CLUSTERFUZZ_BUILDS = 'clusterfuzz-builds' - -# The get request for the latest version of a project's build. -VERSION_STRING = '{project_name}-{sanitizer}-latest.version' - -# The name to store the latest OSS-Fuzz build at. -BUILD_ARCHIVE_NAME = 'oss_fuzz_latest.zip' - -# Zip file name containing the corpus. -CORPUS_ZIP_NAME = 'public.zip' - # The number of reproduce attempts for a crash. REPRODUCE_ATTEMPTS = 10 # Seconds on top of duration until a timeout error is raised. BUFFER_TIME = 10 -# Log message for is_crash_reportable if it can't check if crash reproduces on -# an OSS-Fuzz build. -COULD_NOT_TEST_ON_OSS_FUZZ_MESSAGE = ( - 'Crash is reproducible. Could not run OSS-Fuzz build of ' - 'target to determine if this pull request introduced crash. ' - 'Assuming this pull request introduced crash.') +# Log message if we can't check if crash reproduces on an recent build. +COULD_NOT_TEST_ON_RECENT_MESSAGE = ( + 'Crash is reproducible. Could not run recent build of ' + 'target to determine if this code change (pr/commit) introduced crash. ' + 'Assuming this code change introduced crash.') class ReproduceError(Exception): @@ -73,36 +55,27 @@ class FuzzTarget: duration: The length of time in seconds that the target should run. target_path: The location of the fuzz target binary. out_dir: The location of where output artifacts are stored. - project_name: The name of the relevant OSS-Fuzz project. """ # pylint: disable=too-many-arguments - def __init__(self, - target_path, - duration, - out_dir, - project_name=None, - sanitizer='address'): + def __init__(self, target_path, duration, out_dir, clusterfuzz_deployment, + config): """Represents a single fuzz target. - Note: project_name should be none when the fuzzer being run is not - associated with a specific OSS-Fuzz project. - Args: target_path: The location of the fuzz target binary. duration: The length of time in seconds the target should run. out_dir: The location of where the output from crashes should be stored. - project_name: The name of the relevant OSS-Fuzz project. + clusterfuzz_deployment: The object representing the ClusterFuzz + deployment. + config: The config of this project. """ - # TODO(metzman): Get rid of sanitizer defaulting to address. config_utils - # implements this functionality. Also look into why project_name defaults to - # None. Maybe accept config and get those values from there. self.target_path = target_path self.target_name = os.path.basename(self.target_path) self.duration = int(duration) self.out_dir = out_dir - self.project_name = project_name - self.sanitizer = sanitizer + self.clusterfuzz_deployment = clusterfuzz_deployment + self.config = config def fuzz(self): """Starts the fuzz target run for the length of time specified by duration. @@ -123,9 +96,10 @@ class FuzzTarget: command += ['-v', '%s:%s' % (self.out_dir, '/out')] command += [ - '-e', 'FUZZING_ENGINE=libfuzzer', '-e', 'SANITIZER=' + self.sanitizer, - '-e', 'CIFUZZ=True', '-e', 'RUN_FUZZER_MODE=interactive', - 'gcr.io/oss-fuzz-base/base-runner', 'bash', '-c' + '-e', 'FUZZING_ENGINE=libfuzzer', '-e', + 'SANITIZER=' + self.config.sanitizer, '-e', 'CIFUZZ=True', '-e', + 'RUN_FUZZER_MODE=interactive', 'gcr.io/oss-fuzz-base/base-runner', + 'bash', '-c' ] run_fuzzer_command = 'run_fuzzer {fuzz_target} {options}'.format( @@ -133,7 +107,8 @@ class FuzzTarget: options=LIBFUZZER_OPTIONS + ' -max_total_time=' + str(self.duration)) # If corpus can be downloaded use it for fuzzing. - latest_corpus_path = self.download_latest_corpus() + latest_corpus_path = self.clusterfuzz_deployment.download_corpus( + self.target_name, self.out_dir) if latest_corpus_path: run_fuzzer_command = run_fuzzer_command + ' ' + latest_corpus_path command.append(run_fuzzer_command) @@ -220,11 +195,8 @@ class FuzzTarget: def is_crash_reportable(self, testcase): """Returns True if a crash is reportable. This means the crash is - reproducible but not reproducible on a build from OSS-Fuzz (meaning the - crash was introduced by this PR). - - NOTE: If no project is specified the crash is assumed introduced - by the pull request if it is reproducible. + reproducible but not reproducible on a build from the ClusterFuzz deployment + (meaning the crash was introduced by this PR/commit/code change). Args: testcase: The path to the testcase that triggered the crash. @@ -239,44 +211,46 @@ class FuzzTarget: raise ReproduceError('Testcase %s not found.' % testcase) try: - reproducible_on_pr_build = self.is_reproducible(testcase, - self.target_path) + reproducible_on_code_change = self.is_reproducible( + testcase, self.target_path) except ReproduceError as error: logging.error('Could not run target when checking for reproducibility.' 'Please file an issue:' 'https://github.com/google/oss-fuzz/issues/new.') raise error - if not self.project_name: - return reproducible_on_pr_build - - if not reproducible_on_pr_build: + if not reproducible_on_code_change: logging.info('Failed to reproduce the crash using the obtained testcase.') return False - oss_fuzz_build_dir = self.download_oss_fuzz_build() - if not oss_fuzz_build_dir: - # Crash is reproducible on PR build and we can't test on OSS-Fuzz build. - logging.info(COULD_NOT_TEST_ON_OSS_FUZZ_MESSAGE) + clusterfuzz_build_dir = self.clusterfuzz_deployment.download_latest_build( + self.out_dir) + if not clusterfuzz_build_dir: + # Crash is reproducible on PR build and we can't test on a recent + # ClusterFuzz/OSS-Fuzz build. + logging.info(COULD_NOT_TEST_ON_RECENT_MESSAGE) return True - oss_fuzz_target_path = os.path.join(oss_fuzz_build_dir, self.target_name) + clusterfuzz_target_path = os.path.join(clusterfuzz_build_dir, + self.target_name) try: - reproducible_on_oss_fuzz_build = self.is_reproducible( - testcase, oss_fuzz_target_path) + reproducible_on_clusterfuzz_build = self.is_reproducible( + testcase, clusterfuzz_target_path) except ReproduceError: - # This happens if the project has OSS-Fuzz builds, but the fuzz target + # This happens if the project has ClusterFuzz builds, but the fuzz target # is not in it (e.g. because the fuzz target is new). - logging.info(COULD_NOT_TEST_ON_OSS_FUZZ_MESSAGE) + logging.info(COULD_NOT_TEST_ON_RECENT_MESSAGE) return True - if not reproducible_on_oss_fuzz_build: + if not reproducible_on_clusterfuzz_build: logging.info('The crash is reproducible. The crash doesn\'t reproduce ' - 'on old builds. This pull request probably introduced the ' + 'on old builds. This code change probably introduced the ' 'crash.') + return True - logging.info('The crash is reproducible without the current pull request.') + logging.info('The crash is reproducible on old builds ' + '(without the current code change).') return False def get_testcase(self, error_bytes): @@ -292,137 +266,3 @@ class FuzzTarget: if match: return os.path.join(self.out_dir, match.group(1).decode('utf-8')) return None - - def get_latest_build_version(self): - """Gets the latest OSS-Fuzz build version for a projects' fuzzers. - - Returns: - A string with the latest build version or None. - """ - if not self.project_name: - return None - - version = VERSION_STRING.format(project_name=self.project_name, - sanitizer=self.sanitizer) - version_url = utils.url_join(utils.GCS_BASE_URL, CLUSTERFUZZ_BUILDS, - self.project_name, version) - try: - response = urllib.request.urlopen(version_url) - except urllib.error.HTTPError: - logging.error('Error getting latest build version for %s with url %s.', - self.project_name, version_url) - return None - return response.read().decode() - - def download_oss_fuzz_build(self): - """Downloads the latest OSS-Fuzz build from GCS. - - Returns: - A path to where the OSS-Fuzz build is located, or None. - """ - if not os.path.exists(self.out_dir): - logging.error('Out directory %s does not exist.', self.out_dir) - return None - if not self.project_name: - return None - - build_dir = os.path.join(self.out_dir, 'oss_fuzz_latest', self.project_name) - if os.path.exists(os.path.join(build_dir, self.target_name)): - return build_dir - os.makedirs(build_dir, exist_ok=True) - latest_build_str = self.get_latest_build_version() - if not latest_build_str: - return None - - oss_fuzz_build_url = utils.url_join(utils.GCS_BASE_URL, CLUSTERFUZZ_BUILDS, - self.project_name, latest_build_str) - return download_and_unpack_zip(oss_fuzz_build_url, build_dir) - - def download_latest_corpus(self): - """Downloads the latest OSS-Fuzz corpus for the target from google cloud. - - Returns: - The local path to to corpus or None if download failed. - """ - if not self.project_name: - return None - if not os.path.exists(self.out_dir): - logging.error('Out directory %s does not exist.', self.out_dir) - return None - - corpus_dir = os.path.join(self.out_dir, 'backup_corpus', self.target_name) - os.makedirs(corpus_dir, exist_ok=True) - project_qualified_fuzz_target_name = self.target_name - qualified_name_prefix = '%s_' % self.project_name - if not self.target_name.startswith(qualified_name_prefix): - project_qualified_fuzz_target_name = qualified_name_prefix + \ - self.target_name - corpus_url = utils.url_join( - utils.GCS_BASE_URL, - '{0}-backup.clusterfuzz-external.appspot.com/corpus/libFuzzer/'.format( - self.project_name), project_qualified_fuzz_target_name, - CORPUS_ZIP_NAME) - return download_and_unpack_zip(corpus_url, corpus_dir) - - -def download_url(url, filename, num_retries=3): - """Downloads the file located at |url|, using HTTP to |filename|. - - Args: - url: A url to a file to download. - filename: The path the file should be downloaded to. - num_retries: The number of times to retry the download on - ConnectionResetError. - - Returns: - True on success. - """ - sleep_time = 1 - - for _ in range(num_retries): - try: - urllib.request.urlretrieve(url, filename) - return True - except urllib.error.HTTPError: - # In these cases, retrying probably wont work since the error probably - # means there is nothing at the URL to download. - logging.error('Unable to download from: %s.', url) - return False - except ConnectionResetError: - # These errors are more likely to be transient. Retry. - pass - time.sleep(sleep_time) - - logging.error('Failed to download %s, %d times.', url, num_retries) - - return False - - -def download_and_unpack_zip(url, out_dir): - """Downloads and unpacks a zip file from an HTTP URL. - - Args: - url: A url to the zip file to be downloaded and unpacked. - out_dir: The path where the zip file should be extracted to. - - Returns: - A path to the extracted file or None on failure. - """ - if not os.path.exists(out_dir): - logging.error('Out directory %s does not exist.', out_dir) - return None - - # Gives the temporary zip file a unique identifier in the case that - # that download_and_unpack_zip is done in parallel. - with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file: - result = download_url(url, tmp_file.name) - if not result: - return None - - try: - with zipfile.ZipFile(tmp_file.name, 'r') as zip_file: - zip_file.extractall(out_dir) - except zipfile.BadZipFile: - logging.error('Error unpacking zip from %s. Bad Zipfile.', url) - return None - return out_dir diff --git a/infra/cifuzz/fuzz_target_test.py b/infra/cifuzz/fuzz_target_test.py index e8ec1241e..21fa3b437 100644 --- a/infra/cifuzz/fuzz_target_test.py +++ b/infra/cifuzz/fuzz_target_test.py @@ -14,18 +14,15 @@ """Tests the functionality of the fuzz_target module.""" import os -import sys import tempfile import unittest -import unittest.mock -import urllib.error +from unittest import mock import parameterized from pyfakefs import fake_filesystem_unittest -# Pylint has an issue importing utils which is why error suppression is needed. -# pylint: disable=wrong-import-position,import-error -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import clusterfuzz_deployment +import config_utils import fuzz_target # NOTE: This integration test relies on @@ -42,9 +39,35 @@ EXECUTE_SUCCESS_RETVAL = ('', '', 0) EXECUTE_FAILURE_RETVAL = ('', '', 1) +def _create_config(**kwargs): + """Creates a config object and then sets every attribute that is a key in + |kwargs| to the corresponding value. Asserts that each key in |kwargs| is an + attribute of Config.""" + defaults = {'is_github': True, 'project_name': EXAMPLE_PROJECT} + for default_key, default_value in defaults.items(): + if default_key not in kwargs: + kwargs[default_key] = default_value + + with mock.patch('os.path.basename', return_value=None), mock.patch( + 'config_utils.get_project_src_path', + return_value=None), mock.patch('config_utils._is_dry_run', + return_value=True): + config = config_utils.RunFuzzersConfig() + + for key, value in kwargs.items(): + assert hasattr(config, key), 'Config doesn\'t have attribute: ' + key + setattr(config, key, value) + return config + + +def _create_deployment(**kwargs): + config = _create_config(**kwargs) + return clusterfuzz_deployment.get_clusterfuzz_deployment(config) + + # TODO(metzman): Use patch from test_libs/helpers.py in clusterfuzz so that we # don't need to accept this as an argument in every test method. -@unittest.mock.patch('utils.get_container_name', return_value='container') +@mock.patch('utils.get_container_name', return_value='container') class IsReproducibleTest(fake_filesystem_unittest.TestCase): """Tests the is_reproducible method in the fuzz_target.FuzzTarget class.""" @@ -52,17 +75,18 @@ class IsReproducibleTest(fake_filesystem_unittest.TestCase): """Sets up example fuzz target to test is_reproducible method.""" self.fuzz_target_path = '/example/path' self.testcase_path = '/testcase' + deployment = _create_deployment() self.test_target = fuzz_target.FuzzTarget(self.fuzz_target_path, fuzz_target.REPRODUCE_ATTEMPTS, - '/example/outdir') + '/example/outdir', deployment, + deployment.config) def test_reproducible(self, _): """Tests that is_reproducible returns True if crash is detected and that is_reproducible uses the correct command to reproduce a crash.""" self._set_up_fakefs() all_repro = [EXECUTE_FAILURE_RETVAL] * fuzz_target.REPRODUCE_ATTEMPTS - with unittest.mock.patch('utils.execute', - side_effect=all_repro) as mocked_execute: + with mock.patch('utils.execute', side_effect=all_repro) as mocked_execute: result = self.test_target.is_reproducible(self.testcase_path, self.fuzz_target_path) mocked_execute.assert_called_once_with([ @@ -86,8 +110,8 @@ class IsReproducibleTest(fake_filesystem_unittest.TestCase): attempt.""" self._set_up_fakefs() last_time_repro = [EXECUTE_SUCCESS_RETVAL] * 9 + [EXECUTE_FAILURE_RETVAL] - with unittest.mock.patch('utils.execute', - side_effect=last_time_repro) as mocked_execute: + with mock.patch('utils.execute', + side_effect=last_time_repro) as mocked_execute: self.assertTrue( self.test_target.is_reproducible(self.testcase_path, self.fuzz_target_path)) @@ -105,7 +129,7 @@ class IsReproducibleTest(fake_filesystem_unittest.TestCase): reproduce.""" all_unrepro = [EXECUTE_SUCCESS_RETVAL] * fuzz_target.REPRODUCE_ATTEMPTS self._set_up_fakefs() - with unittest.mock.patch('utils.execute', side_effect=all_unrepro): + with mock.patch('utils.execute', side_effect=all_unrepro): result = self.test_target.is_reproducible(self.testcase_path, self.fuzz_target_path) self.assertFalse(result) @@ -116,8 +140,10 @@ class GetTestCaseTest(unittest.TestCase): def setUp(self): """Sets up example fuzz target to test get_testcase method.""" + deployment = _create_deployment() self.test_target = fuzz_target.FuzzTarget('/example/path', 10, - '/example/outdir') + '/example/outdir', deployment, + deployment.config) def test_valid_error_string(self): """Tests that get_testcase returns the correct testcase give an error.""" @@ -142,48 +168,16 @@ class GetTestCaseTest(unittest.TestCase): self.assertTrue(isinstance(result, str)) -class DownloadLatestCorpusTest(unittest.TestCase): - """Tests parse_fuzzer_output.""" - - def test_download_valid_projects_corpus(self): - """Tests that a valid fuzz target returns a corpus directory.""" - with tempfile.TemporaryDirectory() as tmp_dir: - test_target = fuzz_target.FuzzTarget('testfuzzer', 3, 'test_out') - test_target.project_name = EXAMPLE_PROJECT - test_target.target_name = EXAMPLE_FUZZER - test_target.out_dir = tmp_dir - with unittest.mock.patch( - 'fuzz_target.download_and_unpack_zip', - return_value=tmp_dir) as mocked_download_and_unpack_zip: - test_target.download_latest_corpus() - (url, out_dir), _ = mocked_download_and_unpack_zip.call_args - self.assertEqual( - url, 'https://storage.googleapis.com/example-backup.' - 'clusterfuzz-external.appspot.com/corpus/libFuzzer/' - 'example_crash_fuzzer/public.zip') - self.assertEqual(out_dir, - os.path.join(tmp_dir, 'backup_corpus', EXAMPLE_FUZZER)) - - def test_download_invalid_projects_corpus(self): - """Tests that a invade fuzz target does not return None.""" - with tempfile.TemporaryDirectory() as tmp_dir: - test_target = fuzz_target.FuzzTarget('test fuzzer', 3, tmp_dir) - corpus_path = test_target.download_latest_corpus() - self.assertIsNone(corpus_path) - test_target = fuzz_target.FuzzTarget('not_a_fuzzer', 3, tmp_dir, - 'not_a_project') - corpus_path = test_target.download_latest_corpus() - self.assertIsNone(corpus_path) - - class IsCrashReportableTest(fake_filesystem_unittest.TestCase): """Tests the is_crash_reportable method of FuzzTarget.""" def setUp(self): """Sets up example fuzz target to test is_crash_reportable method.""" self.fuzz_target_path = '/example/do_stuff_fuzzer' + deployment = _create_deployment() self.test_target = fuzz_target.FuzzTarget(self.fuzz_target_path, 100, - '/example/outdir', 'example') + '/example/outdir', deployment, + deployment.config) self.oss_fuzz_build_path = '/oss-fuzz-build' self.setUpPyfakefs() self.fs.create_file(self.fuzz_target_path) @@ -193,19 +187,19 @@ class IsCrashReportableTest(fake_filesystem_unittest.TestCase): self.testcase_path = '/testcase' self.fs.create_file(self.testcase_path, contents='') - @unittest.mock.patch('logging.info') + @mock.patch('logging.info') def test_new_reproducible_crash(self, mocked_info): """Tests that a new reproducible crash returns True.""" - with unittest.mock.patch('fuzz_target.FuzzTarget.is_reproducible', - side_effect=[True, False]): + with mock.patch('fuzz_target.FuzzTarget.is_reproducible', + side_effect=[True, False]): with tempfile.TemporaryDirectory() as tmp_dir: self.test_target.out_dir = tmp_dir self.assertTrue(self.test_target.is_crash_reportable( self.testcase_path)) mocked_info.assert_called_with( 'The crash is reproducible. The crash doesn\'t reproduce ' - 'on old builds. This pull request probably introduced the ' + 'on old builds. This code change probably introduced the ' 'crash.') # yapf: disable @@ -223,17 +217,16 @@ class IsCrashReportableTest(fake_filesystem_unittest.TestCase): # yapf: enable def test_invalid_crash(self, is_reproducible_retvals): """Tests that a nonreportable crash causes the method to return False.""" - with unittest.mock.patch('fuzz_target.FuzzTarget.is_reproducible', - side_effect=is_reproducible_retvals): + with mock.patch('fuzz_target.FuzzTarget.is_reproducible', + side_effect=is_reproducible_retvals): - with unittest.mock.patch('fuzz_target.FuzzTarget.download_oss_fuzz_build', - return_value=self.oss_fuzz_build_path): + with mock.patch('clusterfuzz_deployment.OSSFuzz.download_latest_build', + return_value=self.oss_fuzz_build_path): self.assertFalse( self.test_target.is_crash_reportable(self.testcase_path)) - @unittest.mock.patch('logging.info') - @unittest.mock.patch('fuzz_target.FuzzTarget.is_reproducible', - return_value=[True]) + @mock.patch('logging.info') + @mock.patch('fuzz_target.FuzzTarget.is_reproducible', return_value=[True]) def test_reproducible_no_oss_fuzz_target(self, _, mocked_info): """Tests that is_crash_reportable returns True when a crash reproduces on the PR build but the target is not in the OSS-Fuzz build (usually because it @@ -245,137 +238,19 @@ class IsCrashReportableTest(fake_filesystem_unittest.TestCase): raise fuzz_target.ReproduceError() return True - with unittest.mock.patch( + with mock.patch( 'fuzz_target.FuzzTarget.is_reproducible', side_effect=is_reproducible_side_effect) as mocked_is_reproducible: - with unittest.mock.patch('fuzz_target.FuzzTarget.download_oss_fuzz_build', - return_value=self.oss_fuzz_build_path): + with mock.patch('clusterfuzz_deployment.OSSFuzz.download_latest_build', + return_value=self.oss_fuzz_build_path): self.assertTrue(self.test_target.is_crash_reportable( self.testcase_path)) mocked_is_reproducible.assert_any_call(self.testcase_path, self.oss_fuzz_target_path) mocked_info.assert_called_with( - 'Crash is reproducible. Could not run OSS-Fuzz build of ' - 'target to determine if this pull request introduced crash. ' - 'Assuming this pull request introduced crash.') - - -class GetLatestBuildVersionTest(unittest.TestCase): - """Tests the get_latest_build_version function.""" - - def test_get_valid_project(self): - """Tests that the latest build can be retrieved from GCS.""" - test_target = fuzz_target.FuzzTarget('/example/path', 10, '/example/outdir', - 'example') - latest_build = test_target.get_latest_build_version() - self.assertIsNotNone(latest_build) - self.assertTrue(latest_build.endswith('.zip')) - self.assertTrue('address' in latest_build) - - def test_get_invalid_project(self): - """Tests that the latest build returns None when project doesn't exist.""" - test_target = fuzz_target.FuzzTarget('/example/path', 10, '/example/outdir', - 'not-a-proj') - self.assertIsNone(test_target.get_latest_build_version()) - test_target = fuzz_target.FuzzTarget('/example/path', 10, '/example/outdir') - self.assertIsNone(test_target.get_latest_build_version()) - - -class DownloadOSSFuzzBuildDirIntegrationTest(unittest.TestCase): - """Tests download_oss_fuzz_build.""" - - def test_single_download(self): - """Tests that the build directory was only downloaded once.""" - with tempfile.TemporaryDirectory() as tmp_dir: - test_target = fuzz_target.FuzzTarget('/example/do_stuff_fuzzer', 10, - tmp_dir, 'example') - latest_version = test_target.get_latest_build_version() - with unittest.mock.patch( - 'fuzz_target.FuzzTarget.get_latest_build_version', - return_value=latest_version) as mocked_get_latest_build_version: - for _ in range(5): - oss_fuzz_build_path = test_target.download_oss_fuzz_build() - self.assertEqual(1, mocked_get_latest_build_version.call_count) - self.assertIsNotNone(oss_fuzz_build_path) - self.assertTrue(os.listdir(oss_fuzz_build_path)) - - def test_get_valid_project(self): - """Tests the latest build can be retrieved from GCS.""" - with tempfile.TemporaryDirectory() as tmp_dir: - test_target = fuzz_target.FuzzTarget('/example/do_stuff_fuzzer', 10, - tmp_dir, 'example') - oss_fuzz_build_path = test_target.download_oss_fuzz_build() - self.assertIsNotNone(oss_fuzz_build_path) - self.assertTrue(os.listdir(oss_fuzz_build_path)) - - def test_get_invalid_project(self): - """Tests the latest build returns None when project doesn't exist.""" - with tempfile.TemporaryDirectory() as tmp_dir: - test_target = fuzz_target.FuzzTarget('/example/do_stuff_fuzzer', 10, - tmp_dir) - self.assertIsNone(test_target.download_oss_fuzz_build()) - test_target = fuzz_target.FuzzTarget('/example/do_stuff_fuzzer', 10, - tmp_dir, 'not-a-proj') - self.assertIsNone(test_target.download_oss_fuzz_build()) - - def test_invalid_build_dir(self): - """Tests the download returns None when out_dir doesn't exist.""" - with tempfile.TemporaryDirectory() as tmp_dir: - invalid_dir = os.path.join(tmp_dir, 'not/a/dir') - test_target = fuzz_target.FuzzTarget('/example/do_stuff_fuzzer', 10, - invalid_dir, 'example') - self.assertIsNone(test_target.download_oss_fuzz_build()) - - -class DownloadUrlTest(unittest.TestCase): - """Tests that download_url works.""" - URL = 'example.com/file' - FILE_PATH = '/tmp/file' - - @unittest.mock.patch('time.sleep') - @unittest.mock.patch('urllib.request.urlretrieve', return_value=True) - def test_download_url_no_error(self, mocked_urlretrieve, _): - """Tests that download_url works when there is no error.""" - self.assertTrue(fuzz_target.download_url(self.URL, self.FILE_PATH)) - self.assertEqual(1, mocked_urlretrieve.call_count) - - @unittest.mock.patch('time.sleep') - @unittest.mock.patch('logging.error') - @unittest.mock.patch('urllib.request.urlretrieve', - side_effect=urllib.error.HTTPError( - None, None, None, None, None)) - def test_download_url_http_error(self, mocked_urlretrieve, mocked_error, _): - """Tests that download_url doesn't retry when there is an HTTP error.""" - self.assertFalse(fuzz_target.download_url(self.URL, self.FILE_PATH)) - mocked_error.assert_called_with('Unable to download from: %s.', self.URL) - self.assertEqual(1, mocked_urlretrieve.call_count) - - @unittest.mock.patch('time.sleep') - @unittest.mock.patch('logging.error') - @unittest.mock.patch('urllib.request.urlretrieve', - side_effect=ConnectionResetError) - def test_download_url_connection_error(self, mocked_urlretrieve, mocked_error, - mocked_sleep): - """Tests that download_url doesn't retry when there is an HTTP error.""" - self.assertFalse(fuzz_target.download_url(self.URL, self.FILE_PATH)) - self.assertEqual(3, mocked_urlretrieve.call_count) - self.assertEqual(3, mocked_sleep.call_count) - mocked_error.assert_called_with('Failed to download %s, %d times.', - self.URL, 3) - - -class DownloadAndUnpackZipTest(unittest.TestCase): - """Tests download_and_unpack_zip.""" - - def test_bad_zip_download(self): - """Tests download_and_unpack_zip returns none when a bad zip is passed.""" - with tempfile.TemporaryDirectory() as tmp_dir, unittest.mock.patch( - 'urllib.request.urlretrieve', return_value=True): - file_handle = open(os.path.join(tmp_dir, 'url_tmp.zip'), 'w') - file_handle.write('Test file.') - file_handle.close() - self.assertIsNone( - fuzz_target.download_and_unpack_zip('/not/a/real/url', tmp_dir)) + 'Crash is reproducible. Could not run recent build of ' + 'target to determine if this code change (pr/commit) introduced crash. ' + 'Assuming this code change introduced crash.') if __name__ == '__main__': diff --git a/infra/cifuzz/run_fuzzers.py b/infra/cifuzz/run_fuzzers.py index 1ba6865c4..f3e4e5284 100644 --- a/infra/cifuzz/run_fuzzers.py +++ b/infra/cifuzz/run_fuzzers.py @@ -18,6 +18,7 @@ import shutil import sys import time +import clusterfuzz_deployment import fuzz_target import stack_parser @@ -32,6 +33,8 @@ class BaseFuzzTargetRunner: def __init__(self, config): self.config = config + self.clusterfuzz_deployment = ( + clusterfuzz_deployment.get_clusterfuzz_deployment(self.config)) # Set by the initialize method. self.out_dir = None self.fuzz_target_paths = None @@ -96,11 +99,8 @@ class BaseFuzzTargetRunner: def create_fuzz_target_obj(self, target_path, run_seconds): """Returns a fuzz target object.""" - return fuzz_target.FuzzTarget(target_path, - run_seconds, - self.out_dir, - self.config.project_name, - sanitizer=self.config.sanitizer) + return fuzz_target.FuzzTarget(target_path, run_seconds, self.out_dir, + self.clusterfuzz_deployment, self.config) def run_fuzz_targets(self): """Runs fuzz targets. Returns True if a bug was found.""" diff --git a/infra/cifuzz/run_fuzzers_test.py b/infra/cifuzz/run_fuzzers_test.py index a55ed8495..5038e00fc 100644 --- a/infra/cifuzz/run_fuzzers_test.py +++ b/infra/cifuzz/run_fuzzers_test.py @@ -317,6 +317,8 @@ class RunAddressFuzzersIntegrationTest(RunFuzzerIntegrationTestMixin, unittest.TestCase): """Integration tests for build_fuzzers with an ASAN build.""" + BUILD_DIR_NAME = 'cifuzz-latest-build' + @unittest.skipIf(not os.getenv('INTEGRATION_TESTS'), 'INTEGRATION_TESTS=1 not set') def test_new_bug_found(self): @@ -335,7 +337,7 @@ class RunAddressFuzzersIntegrationTest(RunFuzzerIntegrationTestMixin, run_success, bug_found = run_fuzzers.run_fuzzers(config) self.assertTrue(run_success) self.assertTrue(bug_found) - build_dir = os.path.join(workspace, 'out', 'oss_fuzz_latest') + build_dir = os.path.join(workspace, 'out', self.BUILD_DIR_NAME) self.assertNotEqual(0, len(os.listdir(build_dir))) @unittest.skipIf(not os.getenv('INTEGRATION_TESTS'), @@ -354,7 +356,7 @@ class RunAddressFuzzersIntegrationTest(RunFuzzerIntegrationTestMixin, workspace=TEST_FILES_PATH, project_name=EXAMPLE_PROJECT) run_success, bug_found = run_fuzzers.run_fuzzers(config) - build_dir = os.path.join(TEST_FILES_PATH, 'out', 'oss_fuzz_latest') + build_dir = os.path.join(TEST_FILES_PATH, 'out', self.BUILD_DIR_NAME) self.assertTrue(os.path.exists(build_dir)) self.assertNotEqual(0, len(os.listdir(build_dir))) self.assertTrue(run_success)