[CIFuzz] Use backup corpus for fuzzing (#3380)

This allows fuzzing using fuzz target's backup corpora. It will make fuzzing more efficient for targets that have a backup OSS-Fuzz corpus avaliable to the public. It will not effect targets that don't have backups.
This commit is contained in:
Leo Neat 2020-02-26 08:47:13 -08:00 committed by GitHub
parent 7e4bf8831c
commit 2aa7a0d558
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 150 additions and 26 deletions

View File

@ -136,7 +136,7 @@ class RunFuzzersIntegrationTest(unittest.TestCase):
def test_new_bug_found(self):
"""Test run_fuzzers with a valid build."""
# Setting the first return value to True, then the second to False to
# Set the first return value to True, then the second to False to
# emulate a bug existing in the current PR but not on the downloaded
# OSS-Fuzz build.
with unittest.mock.patch.object(fuzz_target.FuzzTarget,

View File

@ -18,6 +18,8 @@ import posixpath
import re
import subprocess
import sys
import tempfile
import urllib.error
import urllib.request
import zipfile
@ -26,7 +28,7 @@ import zipfile
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import utils
# TODO: Turn default logging to WARNING when CIFuzz is stable
# TODO: Turn default logging to WARNING when CIFuzz is stable.
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
@ -34,19 +36,26 @@ logging.basicConfig(
LIBFUZZER_OPTIONS = '-seed=1337 -len_control=0'
# Location of google cloud storage for latest OSS-Fuzz builds.
GCS_BASE_URL = 'https://storage.googleapis.com/clusterfuzz-builds'
GCS_BASE_URL = 'https://storage.googleapis.com/'
# The number of reproduce attempts for a crash.
REPRODUCE_ATTEMPTS = 10
# The name to store the latest OSS-Fuzz build at.
BUILD_ARCHIVE_NAME = 'oss_fuzz_latest.zip'
# Location of cluster fuzz builds on GCS.
CLUSTERFUZZ_BUILDS = 'clusterfuzz-builds'
# The get request for the latest version of a project's build.
VERSION_STRING = '{project_name}-{sanitizer}-latest.version'
# The name to store the latest OSS-Fuzz build at.
BUILD_ARCHIVE_NAME = 'oss_fuzz_latest.zip'
# Zip file name containing the corpus.
CORPUS_ZIP_NAME = 'public.zip'
# The sanitizer build to download.
SANITIZER = 'address'
# The number of reproduce attempts for a crash.
REPRODUCE_ATTEMPTS = 10
class FuzzTarget:
"""A class to manage a single fuzz target.
@ -55,6 +64,7 @@ class FuzzTarget:
target_name: The name of the fuzz target.
duration: The length of time in seconds that the target should run.
target_path: The location of the fuzz target binary.
out_dir: The location of where output artifacts are stored.
project_name: The name of the relevant OSS-Fuzz project.
"""
@ -95,9 +105,17 @@ class FuzzTarget:
command += [
'-e', 'FUZZING_ENGINE=libfuzzer', '-e', 'SANITIZER=address', '-e',
'RUN_FUZZER_MODE=interactive', 'gcr.io/oss-fuzz-base/base-runner',
'bash', '-c', 'run_fuzzer {fuzz_target} {options}'.format(
fuzz_target=self.target_name, options=LIBFUZZER_OPTIONS)
'bash', '-c'
]
run_fuzzer_command = 'run_fuzzer {fuzz_target} {options}'.format(
fuzz_target=self.target_name, options=LIBFUZZER_OPTIONS)
# If corpus can be downloaded use it for fuzzing.
latest_corpus_path = self.download_latest_corpus()
if latest_corpus_path:
run_fuzzer_command = run_fuzzer_command + ' ' + latest_corpus_path
command.append(run_fuzzer_command)
logging.info('Running command: %s', ' '.join(command))
process = subprocess.Popen(command,
stdout=subprocess.PIPE,
@ -203,13 +221,13 @@ class FuzzTarget:
version = VERSION_STRING.format(project_name=self.project_name,
sanitizer=SANITIZER)
version_url = url_join(GCS_BASE_URL, self.project_name, version)
version_url = url_join(GCS_BASE_URL, CLUSTERFUZZ_BUILDS, self.project_name,
version)
try:
response = urllib.request.urlopen(version_url)
except urllib.error.HTTPError:
logging.error(
'Error getting the lastest build version for %s from url %s.',
self.project_name, version_url)
logging.error('Error getting latest build version for %s with url %s.',
self.project_name, version_url)
return None
return response.read().decode()
@ -224,6 +242,7 @@ class FuzzTarget:
return None
if not self.project_name:
return None
build_dir = os.path.join(self.out_dir, 'oss_fuzz_latest', self.project_name)
if os.path.exists(os.path.join(build_dir, self.target_name)):
return build_dir
@ -232,17 +251,67 @@ class FuzzTarget:
if not latest_build_str:
return None
oss_fuzz_build_url = url_join(GCS_BASE_URL, self.project_name,
latest_build_str)
try:
urllib.request.urlretrieve(oss_fuzz_build_url, BUILD_ARCHIVE_NAME)
except urllib.error.HTTPError:
logging.error('Unable to download build from: %s.', oss_fuzz_build_url)
oss_fuzz_build_url = url_join(GCS_BASE_URL, CLUSTERFUZZ_BUILDS,
self.project_name, latest_build_str)
return download_and_unpack_zip(oss_fuzz_build_url, build_dir)
def download_latest_corpus(self):
"""Downloads the latest OSS-Fuzz corpus for the target from google cloud.
Returns:
The local path to to corpus or None if download failed.
"""
if not self.project_name:
return None
with zipfile.ZipFile(BUILD_ARCHIVE_NAME, 'r') as zip_file:
zip_file.extractall(build_dir)
os.remove(BUILD_ARCHIVE_NAME)
return build_dir
if not os.path.exists(self.out_dir):
logging.error('Out directory %s does not exist.', self.out_dir)
return None
corpus_dir = os.path.join(self.out_dir, 'backup_corpus', self.target_name)
os.makedirs(corpus_dir, exist_ok=True)
project_qualified_fuzz_target_name = self.target_name
qualified_name_prefix = '%s_' % self.project_name
if not self.target_name.startswith(qualified_name_prefix):
project_qualified_fuzz_target_name = qualified_name_prefix + \
self.target_name
corpus_url = url_join(
GCS_BASE_URL,
'{0}-backup.clusterfuzz-external.appspot.com/corpus/libFuzzer/'.format(
self.project_name), project_qualified_fuzz_target_name,
CORPUS_ZIP_NAME)
return download_and_unpack_zip(corpus_url, corpus_dir)
def download_and_unpack_zip(http_url, out_dir):
"""Downloads and unpacks a zip file from an http url.
Args:
http_url: A url to the zip file to be downloaded and unpacked.
out_dir: The path where the zip file should be extracted to.
Returns:
A path to the extracted file or None on failure.
"""
if not os.path.exists(out_dir):
logging.error('Out directory %s does not exist.', out_dir)
return None
# Gives the temporary zip file a unique identifier in the case that
# that download_and_unpack_zip is done in parallel.
with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file:
try:
urllib.request.urlretrieve(http_url, tmp_file.name)
except urllib.error.HTTPError:
logging.error('Unable to download build from: %s.', http_url)
return None
try:
with zipfile.ZipFile(tmp_file.name, 'r') as zip_file:
zip_file.extractall(out_dir)
except zipfile.BadZipFile:
logging.error('Error unpacking zip from %s. Bad Zipfile.', http_url)
return None
return out_dir
def url_join(*argv):

View File

@ -18,18 +18,23 @@ import sys
import tempfile
import unittest
import unittest.mock
import urllib
# Pylint has issue importing utils which is why error suppression is required.
# pylint: disable=wrong-import-position
# pylint: disable=import-error
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import fuzz_target
import utils
# NOTE: This integration test relies on
# https://github.com/google/oss-fuzz/tree/master/projects/example project
# https://github.com/google/oss-fuzz/tree/master/projects/example project.
EXAMPLE_PROJECT = 'example'
# An example fuzzer that triggers an error.
EXAMPLE_FUZZER = 'do_stuff_fuzzer'
class IsReproducibleUnitTest(unittest.TestCase):
"""Test is_reproducible function in the fuzz_target module."""
@ -94,12 +99,48 @@ class GetTestCaseUnitTest(unittest.TestCase):
self.assertIsNone(self.test_target.get_test_case(' Example crash string.'))
class DownloadLatestCorpusUnitTest(unittest.TestCase):
"""Test parse_fuzzer_output function in the cifuzz module."""
def test_download_valid_projects_corpus(self):
"""Tests that a vaild fuzz target will return a corpus directory."""
with tempfile.TemporaryDirectory() as tmp_dir:
test_target = fuzz_target.FuzzTarget('testfuzzer', 3, 'test_out')
test_target.project_name = EXAMPLE_PROJECT
test_target.target_name = EXAMPLE_FUZZER
test_target.out_dir = tmp_dir
with unittest.mock.patch.object(fuzz_target,
'download_and_unpack_zip',
return_value=tmp_dir) as mock:
test_target.download_latest_corpus()
(url, out_dir), _ = mock.call_args
self.assertEqual(
url,
'https://storage.googleapis.com/example-backup.' \
'clusterfuzz-external.appspot.com/corpus/libFuzzer/' \
'example_do_stuff_fuzzer/public.zip'
)
self.assertEqual(out_dir,
os.path.join(tmp_dir, 'backup_corpus', EXAMPLE_FUZZER))
def test_download_invalid_projects_corpus(self):
"""Tests that a invaild fuzz target will not return None."""
with tempfile.TemporaryDirectory() as tmp_dir:
test_target = fuzz_target.FuzzTarget('testfuzzer', 3, tmp_dir)
corpus_path = test_target.download_latest_corpus()
self.assertIsNone(corpus_path)
test_target = fuzz_target.FuzzTarget('not_a_fuzzer', 3, tmp_dir,
'not_a_project')
corpus_path = test_target.download_latest_corpus()
self.assertIsNone(corpus_path)
class CheckReproducibilityAndRegressionUnitTest(unittest.TestCase):
"""Test check_reproducibility_and_regression function fuzz_target module."""
def setUp(self):
"""Sets up dummy fuzz target to test is_reproducible method."""
self.test_target = fuzz_target.FuzzTarget('/example/do_stuff_fuzzer', 10,
self.test_target = fuzz_target.FuzzTarget('/example/do_stuff_fuzzer', 100,
'/example/outdir', 'example')
def test_with_valid_crash(self):
@ -202,5 +243,19 @@ class DownloadOSSFuzzBuildDirIntegrationTests(unittest.TestCase):
self.assertIsNone(test_target.download_oss_fuzz_build())
class DownloadAndUnpackZipUnitTests(unittest.TestCase):
"""Test the download and unpack functionality in the fuzz_target module."""
def test_bad_zip_download(self):
"""Tests download_and_unpack_zip returns none when a bad zip is passed."""
with tempfile.TemporaryDirectory() as tmp_dir, unittest.mock.patch.object(
urllib.request, 'urlretrieve', return_value=True):
file_handle = open(os.path.join(tmp_dir, 'url_tmp.zip'), 'w')
file_handle.write('Test file.')
file_handle.close()
self.assertIsNone(
fuzz_target.download_and_unpack_zip('/not/a/real/url', tmp_dir))
if __name__ == '__main__':
unittest.main()