build_specified_commit: Handle build.sh which are part of upstream repo. (#3932)

This is a best effort attempt to parse the relevant copy command from
the Dockerfile.

Also add a main function to build_specified_commit to make it easier to
test.
This commit is contained in:
Oliver Chang 2020-06-10 17:12:28 +10:00 committed by GitHub
parent 778d61c291
commit 4a51ecb61f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 144 additions and 43 deletions

View File

@ -32,9 +32,6 @@ This is done with the following steps:
import argparse
import collections
import datetime
from distutils import spawn
import json
import logging
import os
import sys
@ -121,34 +118,6 @@ def main():
return 0
def _load_base_builder_repo():
"""Get base-image digests."""
gcloud_path = spawn.find_executable('gcloud')
if not gcloud_path:
logging.warning('gcloud not found in PATH.')
return None
result, _, _ = utils.execute([
gcloud_path,
'container',
'images',
'list-tags',
'gcr.io/oss-fuzz-base/base-builder',
'--format=json',
'--sort-by=timestamp',
],
check_result=True)
result = json.loads(result)
repo = build_specified_commit.BaseBuilderRepo()
for image in result:
timestamp = datetime.datetime.fromisoformat(
image['timestamp']['datetime']).astimezone(datetime.timezone.utc)
repo.add_digest(timestamp, image['digest'])
return repo
def _get_dedup_token(output):
"""Get dedup token."""
for line in output.splitlines():
@ -200,7 +169,7 @@ def _bisect(bisect_type, old_commit, new_commit, test_case_path, fuzz_target,
build_data):
"""Perform the bisect."""
# pylint: disable=too-many-branches
base_builder_repo = _load_base_builder_repo()
base_builder_repo = build_specified_commit.load_base_builder_repo()
with tempfile.TemporaryDirectory() as tmp_dir:
repo_url, repo_path = build_specified_commit.detect_main_repo(

View File

@ -17,12 +17,17 @@ This module is allows each of the OSS Fuzz projects fuzzers to be built
from a specific point in time. This feature can be used for implementations
like continuious integration fuzzing and bisection to find errors
"""
import argparse
import bisect
import datetime
from distutils import spawn
import os
import collections
import json
import logging
import re
import shutil
import tempfile
import time
import helper
@ -152,6 +157,34 @@ def _build_image_with_retries(project_name):
return result
def get_required_post_checkout_steps(dockerfile_path):
"""Get required post checkout steps (best effort)."""
checkout_pattern = re.compile(r'\s*RUN\s*(git|svn|hg)')
# If the build.sh is copied from upstream, we need to copy it again after
# changing the revision to ensure correct building.
post_run_pattern = re.compile(r'\s*RUN\s*(.*build\.sh.*(\$SRC|/src).*)')
with open(dockerfile_path) as handle:
lines = handle.readlines()
subsequent_run_cmds = []
for i, line in enumerate(lines):
if checkout_pattern.match(line):
subsequent_run_cmds = []
continue
match = post_run_pattern.match(line)
if match:
workdir = helper.workdir_from_lines(lines[:i])
command = match.group(1)
subsequent_run_cmds.append((workdir, command))
return subsequent_run_cmds
# pylint: disable=too-many-locals
def build_fuzzers_from_commit(commit,
build_repo_manager,
host_src_path,
@ -175,8 +208,27 @@ def build_fuzzers_from_commit(commit,
copy_src_from_docker(build_data.project_name,
os.path.dirname(host_src_path))
projects_dir = os.path.join('projects', build_data.project_name)
dockerfile_path = os.path.join(projects_dir, 'Dockerfile')
for i in range(num_retry + 1):
build_repo_manager.checkout_commit(commit, clean=False)
post_checkout_steps = get_required_post_checkout_steps(dockerfile_path)
for workdir, post_checkout_step in post_checkout_steps:
logging.info('Running post-checkout step `%s` in %s.', post_checkout_step,
workdir)
helper.docker_run([
'-w',
workdir,
'-v',
host_src_path + ':' + '/src',
'gcr.io/oss-fuzz/' + build_data.project_name,
'/bin/bash',
'-c',
post_checkout_step,
])
result = helper.build_fuzzers_impl(project_name=build_data.project_name,
clean=True,
engine=build_data.engine,
@ -191,7 +243,6 @@ def build_fuzzers_from_commit(commit,
# Retry with an OSS-Fuzz builder container that's closer to the project
# commit date.
commit_date = build_repo_manager.commit_date(commit)
projects_dir = os.path.join('projects', build_data.project_name)
# Find first change in the projects/<PROJECT> directory before the project
# commit date.
@ -216,8 +267,7 @@ def build_fuzzers_from_commit(commit,
if base_builder_repo:
base_builder_digest = base_builder_repo.find_digest(commit_date)
logging.info('Using base-builder with digest %s.', base_builder_digest)
_replace_base_builder_digest(os.path.join(projects_dir, 'Dockerfile'),
base_builder_digest)
_replace_base_builder_digest(dockerfile_path, base_builder_digest)
# Rebuild image and re-copy src dir since things in /src could have changed.
if not _build_image_with_retries(build_data.project_name):
@ -273,3 +323,80 @@ def detect_main_repo(project_name, repo_name=None, commit=None):
logging.error('Failed to detect repo:\n%s', out)
return None, None
def load_base_builder_repo():
"""Get base-image digests."""
gcloud_path = spawn.find_executable('gcloud')
if not gcloud_path:
logging.warning('gcloud not found in PATH.')
return None
result, _, _ = utils.execute([
gcloud_path,
'container',
'images',
'list-tags',
'gcr.io/oss-fuzz-base/base-builder',
'--format=json',
'--sort-by=timestamp',
],
check_result=True)
result = json.loads(result)
repo = BaseBuilderRepo()
for image in result:
timestamp = datetime.datetime.fromisoformat(
image['timestamp']['datetime']).astimezone(datetime.timezone.utc)
repo.add_digest(timestamp, image['digest'])
return repo
def main():
"""Main function."""
logging.getLogger().setLevel(logging.INFO)
parser = argparse.ArgumentParser(
description='Build fuzzers at a specific commit')
parser.add_argument('--project_name',
help='The name of the project where the bug occurred.',
required=True)
parser.add_argument('--commit',
help='The newest commit SHA to be bisected.',
required=True)
parser.add_argument('--engine',
help='The default is "libfuzzer".',
default='libfuzzer')
parser.add_argument('--sanitizer',
default='address',
help='The default is "address".')
parser.add_argument('--architecture', default='x86_64')
args = parser.parse_args()
repo_url, repo_path = detect_main_repo(args.project_name, commit=args.commit)
if not repo_url or not repo_path:
raise ValueError('Main git repo can not be determined.')
with tempfile.TemporaryDirectory() as tmp_dir:
host_src_dir = copy_src_from_docker(args.project_name, tmp_dir)
build_repo_manager = repo_manager.BaseRepoManager(
os.path.join(host_src_dir, os.path.basename(repo_path)))
base_builder_repo = load_base_builder_repo()
build_data = BuildData(project_name=args.project_name,
engine=args.engine,
sanitizer=args.sanitizer,
architecture=args.architecture)
if not build_fuzzers_from_commit(args.commit,
build_repo_manager,
host_src_dir,
build_data,
base_builder_repo=base_builder_repo):
raise RuntimeError('Failed to build.')
if __name__ == '__main__':
main()

View File

@ -363,13 +363,8 @@ def _env_to_docker_args(env_list):
WORKDIR_REGEX = re.compile(r'\s*WORKDIR\s*([^\s]+)')
def _workdir_from_dockerfile(project_name):
"""Parse WORKDIR from the Dockerfile for the given project."""
dockerfile_path = get_dockerfile_path(project_name)
with open(dockerfile_path) as file_handle:
lines = file_handle.readlines()
def workdir_from_lines(lines, default='/src'):
"""Get the WORKDIR from the given lines."""
for line in reversed(lines): # reversed to get last WORKDIR.
match = re.match(WORKDIR_REGEX, line)
if match:
@ -381,7 +376,17 @@ def _workdir_from_dockerfile(project_name):
return os.path.normpath(workdir)
return os.path.join('/src', project_name)
return default
def _workdir_from_dockerfile(project_name):
"""Parse WORKDIR from the Dockerfile for the given project."""
dockerfile_path = get_dockerfile_path(project_name)
with open(dockerfile_path) as file_handle:
lines = file_handle.readlines()
return workdir_from_lines(lines, default=os.path.join('/src', project_name))
def docker_run(run_args, print_output=True):