From 2aa7a0d558a7b672fcbc97eb0cfcf0995bd3d42f Mon Sep 17 00:00:00 2001 From: Leo Neat Date: Wed, 26 Feb 2020 08:47:13 -0800 Subject: [PATCH] [CIFuzz] Use backup corpus for fuzzing (#3380) This allows fuzzing using fuzz target's backup corpora. It will make fuzzing more efficient for targets that have a backup OSS-Fuzz corpus avaliable to the public. It will not effect targets that don't have backups. --- infra/cifuzz/cifuzz_test.py | 2 +- infra/cifuzz/fuzz_target.py | 115 ++++++++++++++++++++++++------- infra/cifuzz/fuzz_target_test.py | 59 +++++++++++++++- 3 files changed, 150 insertions(+), 26 deletions(-) diff --git a/infra/cifuzz/cifuzz_test.py b/infra/cifuzz/cifuzz_test.py index 3060cea7a..1d4e2a68c 100644 --- a/infra/cifuzz/cifuzz_test.py +++ b/infra/cifuzz/cifuzz_test.py @@ -136,7 +136,7 @@ class RunFuzzersIntegrationTest(unittest.TestCase): def test_new_bug_found(self): """Test run_fuzzers with a valid build.""" - # Setting the first return value to True, then the second to False to + # Set the first return value to True, then the second to False to # emulate a bug existing in the current PR but not on the downloaded # OSS-Fuzz build. with unittest.mock.patch.object(fuzz_target.FuzzTarget, diff --git a/infra/cifuzz/fuzz_target.py b/infra/cifuzz/fuzz_target.py index 304f57cb1..795ec15fb 100644 --- a/infra/cifuzz/fuzz_target.py +++ b/infra/cifuzz/fuzz_target.py @@ -18,6 +18,8 @@ import posixpath import re import subprocess import sys +import tempfile +import urllib.error import urllib.request import zipfile @@ -26,7 +28,7 @@ import zipfile sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import utils -# TODO: Turn default logging to WARNING when CIFuzz is stable +# TODO: Turn default logging to WARNING when CIFuzz is stable. logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.DEBUG) @@ -34,19 +36,26 @@ logging.basicConfig( LIBFUZZER_OPTIONS = '-seed=1337 -len_control=0' # Location of google cloud storage for latest OSS-Fuzz builds. -GCS_BASE_URL = 'https://storage.googleapis.com/clusterfuzz-builds' +GCS_BASE_URL = 'https://storage.googleapis.com/' -# The number of reproduce attempts for a crash. -REPRODUCE_ATTEMPTS = 10 - -# The name to store the latest OSS-Fuzz build at. -BUILD_ARCHIVE_NAME = 'oss_fuzz_latest.zip' +# Location of cluster fuzz builds on GCS. +CLUSTERFUZZ_BUILDS = 'clusterfuzz-builds' # The get request for the latest version of a project's build. VERSION_STRING = '{project_name}-{sanitizer}-latest.version' +# The name to store the latest OSS-Fuzz build at. +BUILD_ARCHIVE_NAME = 'oss_fuzz_latest.zip' + +# Zip file name containing the corpus. +CORPUS_ZIP_NAME = 'public.zip' + +# The sanitizer build to download. SANITIZER = 'address' +# The number of reproduce attempts for a crash. +REPRODUCE_ATTEMPTS = 10 + class FuzzTarget: """A class to manage a single fuzz target. @@ -55,6 +64,7 @@ class FuzzTarget: target_name: The name of the fuzz target. duration: The length of time in seconds that the target should run. target_path: The location of the fuzz target binary. + out_dir: The location of where output artifacts are stored. project_name: The name of the relevant OSS-Fuzz project. """ @@ -95,9 +105,17 @@ class FuzzTarget: command += [ '-e', 'FUZZING_ENGINE=libfuzzer', '-e', 'SANITIZER=address', '-e', 'RUN_FUZZER_MODE=interactive', 'gcr.io/oss-fuzz-base/base-runner', - 'bash', '-c', 'run_fuzzer {fuzz_target} {options}'.format( - fuzz_target=self.target_name, options=LIBFUZZER_OPTIONS) + 'bash', '-c' ] + run_fuzzer_command = 'run_fuzzer {fuzz_target} {options}'.format( + fuzz_target=self.target_name, options=LIBFUZZER_OPTIONS) + + # If corpus can be downloaded use it for fuzzing. + latest_corpus_path = self.download_latest_corpus() + if latest_corpus_path: + run_fuzzer_command = run_fuzzer_command + ' ' + latest_corpus_path + command.append(run_fuzzer_command) + logging.info('Running command: %s', ' '.join(command)) process = subprocess.Popen(command, stdout=subprocess.PIPE, @@ -203,13 +221,13 @@ class FuzzTarget: version = VERSION_STRING.format(project_name=self.project_name, sanitizer=SANITIZER) - version_url = url_join(GCS_BASE_URL, self.project_name, version) + version_url = url_join(GCS_BASE_URL, CLUSTERFUZZ_BUILDS, self.project_name, + version) try: response = urllib.request.urlopen(version_url) except urllib.error.HTTPError: - logging.error( - 'Error getting the lastest build version for %s from url %s.', - self.project_name, version_url) + logging.error('Error getting latest build version for %s with url %s.', + self.project_name, version_url) return None return response.read().decode() @@ -224,6 +242,7 @@ class FuzzTarget: return None if not self.project_name: return None + build_dir = os.path.join(self.out_dir, 'oss_fuzz_latest', self.project_name) if os.path.exists(os.path.join(build_dir, self.target_name)): return build_dir @@ -232,17 +251,67 @@ class FuzzTarget: if not latest_build_str: return None - oss_fuzz_build_url = url_join(GCS_BASE_URL, self.project_name, - latest_build_str) - try: - urllib.request.urlretrieve(oss_fuzz_build_url, BUILD_ARCHIVE_NAME) - except urllib.error.HTTPError: - logging.error('Unable to download build from: %s.', oss_fuzz_build_url) + oss_fuzz_build_url = url_join(GCS_BASE_URL, CLUSTERFUZZ_BUILDS, + self.project_name, latest_build_str) + return download_and_unpack_zip(oss_fuzz_build_url, build_dir) + + def download_latest_corpus(self): + """Downloads the latest OSS-Fuzz corpus for the target from google cloud. + + Returns: + The local path to to corpus or None if download failed. + """ + if not self.project_name: return None - with zipfile.ZipFile(BUILD_ARCHIVE_NAME, 'r') as zip_file: - zip_file.extractall(build_dir) - os.remove(BUILD_ARCHIVE_NAME) - return build_dir + if not os.path.exists(self.out_dir): + logging.error('Out directory %s does not exist.', self.out_dir) + return None + + corpus_dir = os.path.join(self.out_dir, 'backup_corpus', self.target_name) + os.makedirs(corpus_dir, exist_ok=True) + project_qualified_fuzz_target_name = self.target_name + qualified_name_prefix = '%s_' % self.project_name + if not self.target_name.startswith(qualified_name_prefix): + project_qualified_fuzz_target_name = qualified_name_prefix + \ + self.target_name + corpus_url = url_join( + GCS_BASE_URL, + '{0}-backup.clusterfuzz-external.appspot.com/corpus/libFuzzer/'.format( + self.project_name), project_qualified_fuzz_target_name, + CORPUS_ZIP_NAME) + return download_and_unpack_zip(corpus_url, corpus_dir) + + +def download_and_unpack_zip(http_url, out_dir): + """Downloads and unpacks a zip file from an http url. + + Args: + http_url: A url to the zip file to be downloaded and unpacked. + out_dir: The path where the zip file should be extracted to. + + Returns: + A path to the extracted file or None on failure. + """ + if not os.path.exists(out_dir): + logging.error('Out directory %s does not exist.', out_dir) + return None + + # Gives the temporary zip file a unique identifier in the case that + # that download_and_unpack_zip is done in parallel. + with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file: + try: + urllib.request.urlretrieve(http_url, tmp_file.name) + except urllib.error.HTTPError: + logging.error('Unable to download build from: %s.', http_url) + return None + + try: + with zipfile.ZipFile(tmp_file.name, 'r') as zip_file: + zip_file.extractall(out_dir) + except zipfile.BadZipFile: + logging.error('Error unpacking zip from %s. Bad Zipfile.', http_url) + return None + return out_dir def url_join(*argv): diff --git a/infra/cifuzz/fuzz_target_test.py b/infra/cifuzz/fuzz_target_test.py index 14f869624..a5dae0e32 100644 --- a/infra/cifuzz/fuzz_target_test.py +++ b/infra/cifuzz/fuzz_target_test.py @@ -18,18 +18,23 @@ import sys import tempfile import unittest import unittest.mock +import urllib # Pylint has issue importing utils which is why error suppression is required. # pylint: disable=wrong-import-position # pylint: disable=import-error sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import fuzz_target + import utils # NOTE: This integration test relies on -# https://github.com/google/oss-fuzz/tree/master/projects/example project +# https://github.com/google/oss-fuzz/tree/master/projects/example project. EXAMPLE_PROJECT = 'example' +# An example fuzzer that triggers an error. +EXAMPLE_FUZZER = 'do_stuff_fuzzer' + class IsReproducibleUnitTest(unittest.TestCase): """Test is_reproducible function in the fuzz_target module.""" @@ -94,12 +99,48 @@ class GetTestCaseUnitTest(unittest.TestCase): self.assertIsNone(self.test_target.get_test_case(' Example crash string.')) +class DownloadLatestCorpusUnitTest(unittest.TestCase): + """Test parse_fuzzer_output function in the cifuzz module.""" + + def test_download_valid_projects_corpus(self): + """Tests that a vaild fuzz target will return a corpus directory.""" + with tempfile.TemporaryDirectory() as tmp_dir: + test_target = fuzz_target.FuzzTarget('testfuzzer', 3, 'test_out') + test_target.project_name = EXAMPLE_PROJECT + test_target.target_name = EXAMPLE_FUZZER + test_target.out_dir = tmp_dir + with unittest.mock.patch.object(fuzz_target, + 'download_and_unpack_zip', + return_value=tmp_dir) as mock: + test_target.download_latest_corpus() + (url, out_dir), _ = mock.call_args + self.assertEqual( + url, + 'https://storage.googleapis.com/example-backup.' \ + 'clusterfuzz-external.appspot.com/corpus/libFuzzer/' \ + 'example_do_stuff_fuzzer/public.zip' + ) + self.assertEqual(out_dir, + os.path.join(tmp_dir, 'backup_corpus', EXAMPLE_FUZZER)) + + def test_download_invalid_projects_corpus(self): + """Tests that a invaild fuzz target will not return None.""" + with tempfile.TemporaryDirectory() as tmp_dir: + test_target = fuzz_target.FuzzTarget('testfuzzer', 3, tmp_dir) + corpus_path = test_target.download_latest_corpus() + self.assertIsNone(corpus_path) + test_target = fuzz_target.FuzzTarget('not_a_fuzzer', 3, tmp_dir, + 'not_a_project') + corpus_path = test_target.download_latest_corpus() + self.assertIsNone(corpus_path) + + class CheckReproducibilityAndRegressionUnitTest(unittest.TestCase): """Test check_reproducibility_and_regression function fuzz_target module.""" def setUp(self): """Sets up dummy fuzz target to test is_reproducible method.""" - self.test_target = fuzz_target.FuzzTarget('/example/do_stuff_fuzzer', 10, + self.test_target = fuzz_target.FuzzTarget('/example/do_stuff_fuzzer', 100, '/example/outdir', 'example') def test_with_valid_crash(self): @@ -202,5 +243,19 @@ class DownloadOSSFuzzBuildDirIntegrationTests(unittest.TestCase): self.assertIsNone(test_target.download_oss_fuzz_build()) +class DownloadAndUnpackZipUnitTests(unittest.TestCase): + """Test the download and unpack functionality in the fuzz_target module.""" + + def test_bad_zip_download(self): + """Tests download_and_unpack_zip returns none when a bad zip is passed.""" + with tempfile.TemporaryDirectory() as tmp_dir, unittest.mock.patch.object( + urllib.request, 'urlretrieve', return_value=True): + file_handle = open(os.path.join(tmp_dir, 'url_tmp.zip'), 'w') + file_handle.write('Test file.') + file_handle.close() + self.assertIsNone( + fuzz_target.download_and_unpack_zip('/not/a/real/url', tmp_dir)) + + if __name__ == '__main__': unittest.main()