# Copyright 2020 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ #!/usr/bin/env python3 """Starts project build on Google Cloud Builder. Usage: build_project.py """ from __future__ import print_function import argparse import collections import datetime import json import logging import os import posixpath import re import sys import oauth2client.client import six import yaml import build_lib FUZZING_BUILD_TYPE = 'fuzzing' GCB_LOGS_BUCKET = 'oss-fuzz-gcb-logs' DEFAULT_ARCHITECTURES = ['x86_64'] DEFAULT_ENGINES = ['libfuzzer', 'afl', 'honggfuzz'] DEFAULT_SANITIZERS = ['address', 'undefined'] LATEST_VERSION_FILENAME = 'latest.version' LATEST_VERSION_CONTENT_TYPE = 'text/plain' QUEUE_TTL_SECONDS = 60 * 60 * 24 # 24 hours. PROJECTS_DIR = os.path.abspath( os.path.join(__file__, os.path.pardir, os.path.pardir, os.path.pardir, os.path.pardir, 'projects')) DEFAULT_OSS_FUZZ_REPO = 'https://github.com/google/oss-fuzz.git' Config = collections.namedtuple( 'Config', ['testing', 'test_image_suffix', 'repo', 'branch', 'parallel', 'upload'], defaults=(False, None, DEFAULT_OSS_FUZZ_REPO, None, False, True)) WORKDIR_REGEX = re.compile(r'\s*WORKDIR\s*([^\s]+)') class Build: # pylint: disable=too-few-public-methods """Class representing the configuration for a build.""" def __init__(self, fuzzing_engine, sanitizer, architecture): self.fuzzing_engine = fuzzing_engine self.sanitizer = sanitizer self.architecture = architecture self.targets_list_filename = build_lib.get_targets_list_filename( self.sanitizer) @property def is_arm(self): """Returns True if CPU architecture is ARM-based.""" return self.architecture == 'aarch64' @property def out(self): """Returns the out directory for the build.""" return posixpath.join( '/workspace/out/', f'{self.fuzzing_engine}-{self.sanitizer}-{self.architecture}') def get_project_data(project_name): """(Local only) Returns a tuple containing the contents of the project.yaml and Dockerfile of |project_name|. Raises a FileNotFoundError if there is no Dockerfile for |project_name|.""" project_dir = os.path.join(PROJECTS_DIR, project_name) dockerfile_path = os.path.join(project_dir, 'Dockerfile') try: with open(dockerfile_path) as dockerfile: dockerfile = dockerfile.read() except FileNotFoundError: logging.error('Project "%s" does not have a dockerfile.', project_name) raise project_yaml_path = os.path.join(project_dir, 'project.yaml') with open(project_yaml_path, 'r') as project_yaml_file_handle: project_yaml_contents = project_yaml_file_handle.read() project_yaml = yaml.safe_load(project_yaml_contents) return project_yaml, dockerfile def get_sanitizer_strings(sanitizers): """Accepts the sanitizers field from project.yaml where some sanitizers can be defined as experimental. Returns a list of sanitizers.""" processed_sanitizers = [] for sanitizer in sanitizers: if isinstance(sanitizer, six.string_types): processed_sanitizers.append(sanitizer) elif isinstance(sanitizer, dict): processed_sanitizers.extend(sanitizer.keys()) return processed_sanitizers def set_default_sanitizer_for_centipede(project_yaml): """Adds none as a sanitizer for centipede in yaml if it does not exist yet.""" # Centipede requires a separate unsanitized binary to use sanitized ones. if ('centipede' in project_yaml['fuzzing_engines'] and project_yaml['sanitizers'] and 'none' not in project_yaml['sanitizers']): project_yaml['sanitizers'].append('none') class Project: # pylint: disable=too-many-instance-attributes """Class representing an OSS-Fuzz project.""" def __init__(self, name, project_yaml, dockerfile, image_project): project_yaml = project_yaml.copy() set_yaml_defaults(project_yaml) self.name = name self.image_project = image_project self.workdir = workdir_from_dockerfile(dockerfile) self._sanitizers = project_yaml['sanitizers'] self.disabled = project_yaml['disabled'] self.architectures = project_yaml['architectures'] self.fuzzing_engines = project_yaml['fuzzing_engines'] self.coverage_extra_args = project_yaml['coverage_extra_args'] self.labels = project_yaml['labels'] self.fuzzing_language = project_yaml['language'] self.run_tests = project_yaml['run_tests'] if 'main_repo' in project_yaml: self.main_repo = project_yaml['main_repo'] else: self.main_repo = '' @property def sanitizers(self): """Returns processed sanitizers.""" assert isinstance(self._sanitizers, list) return get_sanitizer_strings(self._sanitizers) @property def image(self): """Returns the docker image for the project.""" return f'gcr.io/{self.image_project}/{self.name}' def get_last_step_id(steps): """Returns the id of the last step in |steps|.""" return steps[-1]['id'] def set_yaml_defaults(project_yaml): """Sets project.yaml's default parameters.""" project_yaml.setdefault('disabled', False) project_yaml.setdefault('architectures', DEFAULT_ARCHITECTURES) project_yaml.setdefault('sanitizers', DEFAULT_SANITIZERS) project_yaml.setdefault('fuzzing_engines', DEFAULT_ENGINES) project_yaml.setdefault('run_tests', True) project_yaml.setdefault('coverage_extra_args', '') project_yaml.setdefault('labels', {}) # Adds 'none' as a sanitizer for centipede to the project yaml by default, # because Centipede always requires a separate build of unsanitized binary. set_default_sanitizer_for_centipede(project_yaml) def is_supported_configuration(build): """Check if the given configuration is supported.""" fuzzing_engine_info = build_lib.ENGINE_INFO[build.fuzzing_engine] if build.architecture == 'i386' and build.sanitizer != 'address': return False # TODO(jonathanmetzman): UBSan should be easy to support. if build.architecture == 'aarch64' and (build.sanitizer not in {'address', 'hwaddress'}): return False return (build.sanitizer in fuzzing_engine_info.supported_sanitizers and build.architecture in fuzzing_engine_info.supported_architectures) def workdir_from_dockerfile(dockerfile): """Parses WORKDIR from the Dockerfile.""" dockerfile_lines = dockerfile.split('\n') for line in dockerfile_lines: match = re.match(WORKDIR_REGEX, line) if match: # We need to escape '$' since they're used for subsitutions in Container # Builer builds. return match.group(1).replace('$', '$$') return '/src' def get_datetime_now(): """Returns datetime.datetime.now(). Used for mocking.""" return datetime.datetime.now() def get_env(fuzzing_language, build): """Returns an environment for building. The environment is returned as a list and is suitable for use as the "env" parameter in a GCB build step. The environment variables are based on the values of |fuzzing_language| and |build.""" env_dict = { 'FUZZING_LANGUAGE': fuzzing_language, 'FUZZING_ENGINE': build.fuzzing_engine, 'SANITIZER': build.sanitizer, 'ARCHITECTURE': build.architecture, # Set HOME so that it doesn't point to a persisted volume (see # https://github.com/google/oss-fuzz/issues/6035). 'HOME': '/root', 'OUT': build.out, } return list(sorted([f'{key}={value}' for key, value in env_dict.items()])) def get_compile_step(project, build, env, parallel): """Returns the GCB step for compiling |projects| fuzzers using |env|. The type of build is specified by |build|.""" failure_msg = ( '*' * 80 + '\nFailed to build.\nTo reproduce, run:\n' f'python infra/helper.py build_image {project.name}\n' 'python infra/helper.py build_fuzzers --sanitizer ' f'{build.sanitizer} --engine {build.fuzzing_engine} --architecture ' f'{build.architecture} {project.name}\n' + '*' * 80) compile_step = { 'name': project.image, 'env': env, 'args': [ 'bash', '-c', # Remove /out to make sure there are non instrumented binaries. # `cd /src && cd {workdir}` (where {workdir} is parsed from the # Dockerfile). Container Builder overrides our workdir so we need # to add this step to set it back. (f'rm -r /out && cd /src && cd {project.workdir} && ' f'mkdir -p {build.out} && compile || ' f'(echo "{failure_msg}" && false)'), ], 'id': get_id('compile', build), } build_lib.dockerify_run_step(compile_step, build, use_architecture_image_name=build.is_arm) maybe_add_parallel(compile_step, build_lib.get_srcmap_step_id(), parallel) return compile_step def maybe_add_parallel(step, wait_for_id, parallel): """Makes |step| run immediately after |wait_for_id| if |parallel|. Mutates |step|.""" if not parallel: return step['waitFor'] = wait_for_id def get_id(step_type, build): """Returns a unique step id based on |step_type| and |build|. Useful for parallelizing builds.""" return (f'{step_type}-{build.fuzzing_engine}-{build.sanitizer}' f'-{build.architecture}') def get_build_steps( # pylint: disable=too-many-locals, too-many-statements, too-many-branches, too-many-arguments project_name, project_yaml, dockerfile, image_project, base_images_project, config): """Returns build steps for project.""" project = Project(project_name, project_yaml, dockerfile, image_project) if project.disabled: logging.info('Project "%s" is disabled.', project.name) return [] timestamp = get_datetime_now().strftime('%Y%m%d%H%M') build_steps = build_lib.get_project_image_steps( project.name, project.image, project.fuzzing_language, config=config, architectures=project.architectures) # Sort engines to make AFL first to test if libFuzzer has an advantage in # finding bugs first since it is generally built first. for fuzzing_engine in sorted(project.fuzzing_engines): # Sort sanitizers and architectures so order is determinisitic (good for # tests). for sanitizer in sorted(project.sanitizers): # Build x86_64 before i386. for architecture in reversed(sorted(project.architectures)): build = Build(fuzzing_engine, sanitizer, architecture) if not is_supported_configuration(build): continue env = get_env(project.fuzzing_language, build) compile_step = get_compile_step(project, build, env, config.parallel) build_steps.append(compile_step) if project.run_tests: failure_msg = ( '*' * 80 + '\nBuild checks failed.\n' 'To reproduce, run:\n' f'python infra/helper.py build_image {project.name}\n' 'python infra/helper.py build_fuzzers --sanitizer ' f'{build.sanitizer} --engine {build.fuzzing_engine} ' f'--architecture {build.architecture} {project.name}\n' 'python infra/helper.py check_build --sanitizer ' f'{build.sanitizer} --engine {build.fuzzing_engine} ' f'--architecture {build.architecture} {project.name}\n' + '*' * 80) # Test fuzz targets. test_step = { 'name': build_lib.get_runner_image_name(base_images_project, config.test_image_suffix), 'env': env, 'args': [ 'bash', '-c', f'test_all.py || (echo "{failure_msg}" && false)' ], 'id': get_id('build-check', build) } build_lib.dockerify_run_step(test_step, build) maybe_add_parallel(test_step, get_last_step_id(build_steps), config.parallel) build_steps.append(test_step) if project.labels: # Write target labels. build_steps.append({ 'name': project.image, 'env': env, 'args': [ '/usr/local/bin/write_labels.py', json.dumps(project.labels), build.out, ], }) build_steps.extend([ # Generate targets list. { 'name': build_lib.get_runner_image_name(base_images_project, config.test_image_suffix), 'env': env, 'args': [ 'bash', '-c', f'targets_list > /workspace/{build.targets_list_filename}' ], } ]) if config.upload: upload_steps = get_upload_steps(project, build, timestamp, base_images_project, config.testing) build_steps.extend(upload_steps) return build_steps def get_targets_list_upload_step(bucket, project, build, uploader_image): """Returns the step to upload targets_list for |build| of |project| to |bucket|.""" targets_list_url = build_lib.get_signed_url( build_lib.get_targets_list_url(bucket, project.name, build.sanitizer)) return { 'name': uploader_image, 'args': [ f'/workspace/{build.targets_list_filename}', targets_list_url, ], } def get_uploader_image(base_images_project): """Returns the uploader base image in |base_images_project|.""" return f'gcr.io/{base_images_project}/uploader' def get_upload_steps(project, build, timestamp, base_images_project, testing): """Returns the steps for uploading the fuzzer build specified by |project| and |build|. Uses |timestamp| for naming the uploads. Uses |base_images_project| and |testing| for determining which image to use for the upload.""" bucket = build_lib.get_upload_bucket(build.fuzzing_engine, build.architecture, testing) stamped_name = '-'.join([project.name, build.sanitizer, timestamp]) zip_file = stamped_name + '.zip' upload_url = build_lib.get_signed_url( build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, project.name, zip_file)) stamped_srcmap_file = stamped_name + '.srcmap.json' srcmap_url = build_lib.get_signed_url( build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, project.name, stamped_srcmap_file)) latest_version_file = '-'.join( [project.name, build.sanitizer, LATEST_VERSION_FILENAME]) latest_version_url = build_lib.GCS_UPLOAD_URL_FORMAT.format( bucket, project.name, latest_version_file) latest_version_url = build_lib.get_signed_url( latest_version_url, content_type=LATEST_VERSION_CONTENT_TYPE) uploader_image = get_uploader_image(base_images_project) upload_steps = [ # Zip binaries. { 'name': project.image, 'args': ['bash', '-c', f'cd {build.out} && zip -r {zip_file} *'], }, # Upload srcmap. { 'name': uploader_image, 'args': [ '/workspace/srcmap.json', srcmap_url, ], }, # Upload binaries. { 'name': uploader_image, 'args': [ os.path.join(build.out, zip_file), upload_url, ], }, # Upload targets list. get_targets_list_upload_step(bucket, project, build, uploader_image), # Upload the latest.version file. build_lib.http_upload_step(zip_file, latest_version_url, LATEST_VERSION_CONTENT_TYPE), # Cleanup. get_cleanup_step(project, build), ] return upload_steps def get_cleanup_step(project, build): """Returns the step for cleaning up after doing |build| of |project|.""" return { 'name': project.image, 'args': [ 'bash', '-c', 'rm -r ' + build.out, ], } # pylint: disable=no-member,too-many-arguments def run_build(oss_fuzz_project, build_steps, credentials, build_type, cloud_project='oss-fuzz', extra_tags=None): """Run the build for given steps on cloud build. |build_steps| are the steps to run. |credentials| are are used to authenticate to GCB and build in |cloud_project|. |oss_fuzz_project| and |build_type| are used to tag the build in GCB so the build can be queried for debugging purposes.""" if extra_tags is None: extra_tags = [] tags = [oss_fuzz_project + '-' + build_type, build_type, oss_fuzz_project] tags.extend(extra_tags) timeout = build_lib.BUILD_TIMEOUT body_overrides = { 'logsBucket': GCB_LOGS_BUCKET, 'queueTtl': str(QUEUE_TTL_SECONDS) + 's', } return build_lib.run_build(build_steps, credentials, cloud_project, timeout, body_overrides=body_overrides, tags=tags) def get_args(description): """Parses command line arguments and returns them. Suitable for a build script.""" parser = argparse.ArgumentParser(sys.argv[0], description=description) parser.add_argument('projects', help='Projects.', nargs='+') parser.add_argument('--testing', action='store_true', required=False, default=False, help='Upload to testing buckets.') parser.add_argument('--test-image-suffix', required=False, default=None, help='Use testing base-images.') parser.add_argument('--branch', required=False, default=None, help='Use specified OSS-Fuzz branch.') parser.add_argument('--parallel', action='store_true', required=False, default=False, help='Do builds in parallel.') return parser.parse_args() def build_script_main(script_description, get_build_steps_func, build_type): """Gets arguments from command line using |script_description| as helpstring description. Gets build_steps using |get_build_steps_func| and then runs those steps on GCB, tagging the builds with |build_type|. Returns 0 on success, 1 on failure.""" args = get_args(script_description) logging.basicConfig(level=logging.INFO) image_project = 'oss-fuzz' credentials = oauth2client.client.GoogleCredentials.get_application_default() error = False config = Config(testing=args.testing, test_image_suffix=args.test_image_suffix, branch=args.branch, parallel=args.parallel, upload=True) for project_name in args.projects: logging.info('Getting steps for: "%s".', project_name) try: project_yaml, dockerfile_contents = get_project_data(project_name) except FileNotFoundError: logging.error('Couldn\'t get project data. Skipping %s.', project_name) error = True continue steps = get_build_steps_func(project_name, project_yaml, dockerfile_contents, image_project, build_lib.BASE_IMAGES_PROJECT, config) if not steps: logging.error('No steps. Skipping %s.', project_name) error = True continue run_build(project_name, steps, credentials, build_type) return 0 if not error else 1 def main(): """Build and run projects.""" return build_script_main('Builds a project on GCB.', get_build_steps, FUZZING_BUILD_TYPE) if __name__ == '__main__': sys.exit(main())