diff --git a/infra/bisector.py b/infra/bisector.py index ff8366db7..f8d2e7d4f 100644 --- a/infra/bisector.py +++ b/infra/bisector.py @@ -32,9 +32,6 @@ This is done with the following steps: import argparse import collections -import datetime -from distutils import spawn -import json import logging import os import sys @@ -121,34 +118,6 @@ def main(): return 0 -def _load_base_builder_repo(): - """Get base-image digests.""" - gcloud_path = spawn.find_executable('gcloud') - if not gcloud_path: - logging.warning('gcloud not found in PATH.') - return None - - result, _, _ = utils.execute([ - gcloud_path, - 'container', - 'images', - 'list-tags', - 'gcr.io/oss-fuzz-base/base-builder', - '--format=json', - '--sort-by=timestamp', - ], - check_result=True) - result = json.loads(result) - - repo = build_specified_commit.BaseBuilderRepo() - for image in result: - timestamp = datetime.datetime.fromisoformat( - image['timestamp']['datetime']).astimezone(datetime.timezone.utc) - repo.add_digest(timestamp, image['digest']) - - return repo - - def _get_dedup_token(output): """Get dedup token.""" for line in output.splitlines(): @@ -200,7 +169,7 @@ def _bisect(bisect_type, old_commit, new_commit, test_case_path, fuzz_target, build_data): """Perform the bisect.""" # pylint: disable=too-many-branches - base_builder_repo = _load_base_builder_repo() + base_builder_repo = build_specified_commit.load_base_builder_repo() with tempfile.TemporaryDirectory() as tmp_dir: repo_url, repo_path = build_specified_commit.detect_main_repo( diff --git a/infra/build_specified_commit.py b/infra/build_specified_commit.py index 72c3bad8e..eef671b70 100644 --- a/infra/build_specified_commit.py +++ b/infra/build_specified_commit.py @@ -17,12 +17,17 @@ This module is allows each of the OSS Fuzz projects fuzzers to be built from a specific point in time. This feature can be used for implementations like continuious integration fuzzing and bisection to find errors """ +import argparse import bisect +import datetime +from distutils import spawn import os import collections +import json import logging import re import shutil +import tempfile import time import helper @@ -152,6 +157,34 @@ def _build_image_with_retries(project_name): return result +def get_required_post_checkout_steps(dockerfile_path): + """Get required post checkout steps (best effort).""" + + checkout_pattern = re.compile(r'\s*RUN\s*(git|svn|hg)') + + # If the build.sh is copied from upstream, we need to copy it again after + # changing the revision to ensure correct building. + post_run_pattern = re.compile(r'\s*RUN\s*(.*build\.sh.*(\$SRC|/src).*)') + + with open(dockerfile_path) as handle: + lines = handle.readlines() + + subsequent_run_cmds = [] + for i, line in enumerate(lines): + if checkout_pattern.match(line): + subsequent_run_cmds = [] + continue + + match = post_run_pattern.match(line) + if match: + workdir = helper.workdir_from_lines(lines[:i]) + command = match.group(1) + subsequent_run_cmds.append((workdir, command)) + + return subsequent_run_cmds + + +# pylint: disable=too-many-locals def build_fuzzers_from_commit(commit, build_repo_manager, host_src_path, @@ -175,8 +208,27 @@ def build_fuzzers_from_commit(commit, copy_src_from_docker(build_data.project_name, os.path.dirname(host_src_path)) + projects_dir = os.path.join('projects', build_data.project_name) + dockerfile_path = os.path.join(projects_dir, 'Dockerfile') + for i in range(num_retry + 1): build_repo_manager.checkout_commit(commit, clean=False) + + post_checkout_steps = get_required_post_checkout_steps(dockerfile_path) + for workdir, post_checkout_step in post_checkout_steps: + logging.info('Running post-checkout step `%s` in %s.', post_checkout_step, + workdir) + helper.docker_run([ + '-w', + workdir, + '-v', + host_src_path + ':' + '/src', + 'gcr.io/oss-fuzz/' + build_data.project_name, + '/bin/bash', + '-c', + post_checkout_step, + ]) + result = helper.build_fuzzers_impl(project_name=build_data.project_name, clean=True, engine=build_data.engine, @@ -191,7 +243,6 @@ def build_fuzzers_from_commit(commit, # Retry with an OSS-Fuzz builder container that's closer to the project # commit date. commit_date = build_repo_manager.commit_date(commit) - projects_dir = os.path.join('projects', build_data.project_name) # Find first change in the projects/ directory before the project # commit date. @@ -216,8 +267,7 @@ def build_fuzzers_from_commit(commit, if base_builder_repo: base_builder_digest = base_builder_repo.find_digest(commit_date) logging.info('Using base-builder with digest %s.', base_builder_digest) - _replace_base_builder_digest(os.path.join(projects_dir, 'Dockerfile'), - base_builder_digest) + _replace_base_builder_digest(dockerfile_path, base_builder_digest) # Rebuild image and re-copy src dir since things in /src could have changed. if not _build_image_with_retries(build_data.project_name): @@ -273,3 +323,80 @@ def detect_main_repo(project_name, repo_name=None, commit=None): logging.error('Failed to detect repo:\n%s', out) return None, None + + +def load_base_builder_repo(): + """Get base-image digests.""" + gcloud_path = spawn.find_executable('gcloud') + if not gcloud_path: + logging.warning('gcloud not found in PATH.') + return None + + result, _, _ = utils.execute([ + gcloud_path, + 'container', + 'images', + 'list-tags', + 'gcr.io/oss-fuzz-base/base-builder', + '--format=json', + '--sort-by=timestamp', + ], + check_result=True) + result = json.loads(result) + + repo = BaseBuilderRepo() + for image in result: + timestamp = datetime.datetime.fromisoformat( + image['timestamp']['datetime']).astimezone(datetime.timezone.utc) + repo.add_digest(timestamp, image['digest']) + + return repo + + +def main(): + """Main function.""" + logging.getLogger().setLevel(logging.INFO) + + parser = argparse.ArgumentParser( + description='Build fuzzers at a specific commit') + parser.add_argument('--project_name', + help='The name of the project where the bug occurred.', + required=True) + parser.add_argument('--commit', + help='The newest commit SHA to be bisected.', + required=True) + parser.add_argument('--engine', + help='The default is "libfuzzer".', + default='libfuzzer') + parser.add_argument('--sanitizer', + default='address', + help='The default is "address".') + parser.add_argument('--architecture', default='x86_64') + + args = parser.parse_args() + + repo_url, repo_path = detect_main_repo(args.project_name, commit=args.commit) + + if not repo_url or not repo_path: + raise ValueError('Main git repo can not be determined.') + + with tempfile.TemporaryDirectory() as tmp_dir: + host_src_dir = copy_src_from_docker(args.project_name, tmp_dir) + build_repo_manager = repo_manager.BaseRepoManager( + os.path.join(host_src_dir, os.path.basename(repo_path))) + base_builder_repo = load_base_builder_repo() + + build_data = BuildData(project_name=args.project_name, + engine=args.engine, + sanitizer=args.sanitizer, + architecture=args.architecture) + if not build_fuzzers_from_commit(args.commit, + build_repo_manager, + host_src_dir, + build_data, + base_builder_repo=base_builder_repo): + raise RuntimeError('Failed to build.') + + +if __name__ == '__main__': + main() diff --git a/infra/helper.py b/infra/helper.py index 668f31d0b..22c81984d 100755 --- a/infra/helper.py +++ b/infra/helper.py @@ -363,13 +363,8 @@ def _env_to_docker_args(env_list): WORKDIR_REGEX = re.compile(r'\s*WORKDIR\s*([^\s]+)') -def _workdir_from_dockerfile(project_name): - """Parse WORKDIR from the Dockerfile for the given project.""" - dockerfile_path = get_dockerfile_path(project_name) - - with open(dockerfile_path) as file_handle: - lines = file_handle.readlines() - +def workdir_from_lines(lines, default='/src'): + """Get the WORKDIR from the given lines.""" for line in reversed(lines): # reversed to get last WORKDIR. match = re.match(WORKDIR_REGEX, line) if match: @@ -381,7 +376,17 @@ def _workdir_from_dockerfile(project_name): return os.path.normpath(workdir) - return os.path.join('/src', project_name) + return default + + +def _workdir_from_dockerfile(project_name): + """Parse WORKDIR from the Dockerfile for the given project.""" + dockerfile_path = get_dockerfile_path(project_name) + + with open(dockerfile_path) as file_handle: + lines = file_handle.readlines() + + return workdir_from_lines(lines, default=os.path.join('/src', project_name)) def docker_run(run_args, print_output=True):