# Copyright 2020 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ """Utility module for Google Cloud Build scripts.""" import base64 import collections import logging import os import six.moves.urllib.parse as urlparse import sys import time from googleapiclient.discovery import build as cloud_build import googleapiclient.discovery from google.api_core.client_options import ClientOptions import google.auth from oauth2client.service_account import ServiceAccountCredentials import requests import yaml BASE_IMAGES_PROJECT = 'oss-fuzz-base' BUILD_TIMEOUT = 20 * 60 * 60 # Needed for reading public target.list.* files. GCS_URL_BASENAME = 'https://storage.googleapis.com/' GCS_UPLOAD_URL_FORMAT = '/{0}/{1}/{2}' # Where corpus backups can be downloaded from. CORPUS_BACKUP_URL = ('/{project}-backup.clusterfuzz-external.appspot.com/' 'corpus/libFuzzer/{fuzzer}/latest.zip') # Cloud Builder has a limit of 100 build steps and 100 arguments for each step. CORPUS_DOWNLOAD_BATCH_SIZE = 100 TARGETS_LIST_BASENAME = 'targets.list' EngineInfo = collections.namedtuple( 'EngineInfo', ['upload_bucket', 'supported_sanitizers', 'supported_architectures']) ENGINE_INFO = { 'libfuzzer': EngineInfo(upload_bucket='clusterfuzz-builds', supported_sanitizers=['address', 'memory', 'undefined'], supported_architectures=['x86_64', 'i386', 'aarch64']), 'afl': EngineInfo(upload_bucket='clusterfuzz-builds-afl', supported_sanitizers=['address'], supported_architectures=['x86_64']), 'honggfuzz': EngineInfo(upload_bucket='clusterfuzz-builds-honggfuzz', supported_sanitizers=['address'], supported_architectures=['x86_64']), 'none': EngineInfo(upload_bucket='clusterfuzz-builds-no-engine', supported_sanitizers=['address'], supported_architectures=['x86_64']), 'wycheproof': EngineInfo(upload_bucket='clusterfuzz-builds-wycheproof', supported_sanitizers=['none'], supported_architectures=['x86_64']), 'centipede': EngineInfo(upload_bucket='clusterfuzz-builds-centipede', supported_sanitizers=['address', 'none'], supported_architectures=['x86_64']), } OSS_FUZZ_BUILDPOOL_NAME = os.getenv( 'GCB_BUILDPOOL_NAME', 'projects/oss-fuzz/locations/us-central1/' 'workerPools/buildpool') US_CENTRAL_CLIENT_OPTIONS = ClientOptions( api_endpoint='https://us-central1-cloudbuild.googleapis.com/') DOCKER_TOOL_IMAGE = 'gcr.io/cloud-builders/docker' _ARM64 = 'aarch64' def get_targets_list_filename(sanitizer): """Returns target list filename.""" return TARGETS_LIST_BASENAME + '.' + sanitizer def get_targets_list_url(bucket, project, sanitizer): """Returns target list url.""" filename = get_targets_list_filename(sanitizer) url = GCS_UPLOAD_URL_FORMAT.format(bucket, project, filename) return url def dockerify_run_step(step, build, use_architecture_image_name=False): """Modify a docker run step to run using gcr.io/cloud-builders/docker. This allows us to specify which architecture to run the image on.""" image = step['name'] if use_architecture_image_name: image = _make_image_name_architecture_specific(image, build.architecture) step['name'] = DOCKER_TOOL_IMAGE if build.is_arm: platform = 'linux/arm64' else: platform = 'linux/amd64' new_args = [ 'run', '--platform', platform, '-v', '/workspace:/workspace', '--privileged', '--cap-add=all' ] for env_var in step.get('env', {}): new_args.extend(['-e', env_var]) new_args += ['-t', image] new_args += step['args'] step['args'] = new_args return step def get_upload_bucket(engine, architecture, testing): """Returns the upload bucket for |engine| and architecture. Returns the testing bucket if |testing|.""" bucket = ENGINE_INFO[engine].upload_bucket if architecture != 'x86_64': bucket += '-' + architecture if testing: bucket += '-testing' return bucket def _get_targets_list(project_name): """Returns target list.""" # libFuzzer ASan 'x86_84' is the default configuration, get list of targets # from it. # We never want the target list from the testing bucket, the testing bucket is # only for uploading. bucket = get_upload_bucket('libfuzzer', 'x86_64', testing=None) url = get_targets_list_url(bucket, project_name, 'address') url = urlparse.urljoin(GCS_URL_BASENAME, url) response = requests.get(url) if not response.status_code == 200: sys.stderr.write('Failed to get list of targets from "%s".\n' % url) sys.stderr.write('Status code: %d \t\tText:\n%s\n' % (response.status_code, response.text)) return None return response.text.split() # pylint: disable=no-member def get_signed_url(path, method='PUT', content_type=''): """Returns signed url.""" timestamp = int(time.time() + BUILD_TIMEOUT) blob = f'{method}\n\n{content_type}\n{timestamp}\n{path}' service_account_path = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') if service_account_path: creds = ServiceAccountCredentials.from_json_keyfile_name( os.environ['GOOGLE_APPLICATION_CREDENTIALS']) client_id = creds.service_account_email signature = base64.b64encode(creds.sign_blob(blob)[1]) else: credentials, project = google.auth.default() iam = googleapiclient.discovery.build('iamcredentials', 'v1', credentials=credentials, cache_discovery=False) client_id = project + '@appspot.gserviceaccount.com' service_account = f'projects/-/serviceAccounts/{client_id}' response = iam.projects().serviceAccounts().signBlob( name=service_account, body={ 'delegates': [], 'payload': base64.b64encode(blob.encode('utf-8')).decode('utf-8'), }).execute() signature = response['signedBlob'] values = { 'GoogleAccessId': client_id, 'Expires': timestamp, 'Signature': signature, } return f'https://storage.googleapis.com{path}?{urlparse.urlencode(values)}' def download_corpora_steps(project_name, test_image_suffix): """Returns GCB steps for downloading corpora backups for the given project. """ fuzz_targets = _get_targets_list(project_name) if not fuzz_targets: sys.stderr.write('No fuzz targets found for project "%s".\n' % project_name) return None steps = [] # Split fuzz targets into batches of CORPUS_DOWNLOAD_BATCH_SIZE. for i in range(0, len(fuzz_targets), CORPUS_DOWNLOAD_BATCH_SIZE): download_corpus_args = [] for binary_name in fuzz_targets[i:i + CORPUS_DOWNLOAD_BATCH_SIZE]: qualified_name = binary_name qualified_name_prefix = '%s_' % project_name if not binary_name.startswith(qualified_name_prefix): qualified_name = qualified_name_prefix + binary_name url = get_signed_url(CORPUS_BACKUP_URL.format(project=project_name, fuzzer=qualified_name), method='GET') corpus_archive_path = os.path.join('/corpus', binary_name + '.zip') download_corpus_args.append('%s %s' % (corpus_archive_path, url)) steps.append({ 'name': get_runner_image_name(BASE_IMAGES_PROJECT, test_image_suffix), 'entrypoint': 'download_corpus', 'args': download_corpus_args, 'volumes': [{ 'name': 'corpus', 'path': '/corpus' }], }) return steps def download_coverage_data_steps(project_name, latest, bucket_name, out_dir): """Returns GCB steps to download coverage data for the given project""" steps = [] fuzz_targets = _get_targets_list(project_name) if not fuzz_targets: sys.stderr.write('No fuzz targets found for project "%s".\n' % project_name) return None steps.append({ 'name': 'gcr.io/oss-fuzz-base/base-runner', 'args': ['bash', '-c', (f'mkdir -p {out_dir}/textcov_reports')] }) # Split fuzz targets into batches of CORPUS_DOWNLOAD_BATCH_SIZE. for i in range(0, len(fuzz_targets), CORPUS_DOWNLOAD_BATCH_SIZE): download_coverage_args = [] for target_name in fuzz_targets[i:i + CORPUS_DOWNLOAD_BATCH_SIZE]: bucket_path = (f'/{bucket_name}/{project_name}/textcov_reports/' f'{latest}/{target_name}.covreport') url = 'https://storage.googleapis.com' + bucket_path coverage_data_path = os.path.join(f'{out_dir}/textcov_reports', target_name + '.covreport') download_coverage_args.append('%s %s' % (coverage_data_path, url)) steps.append({ 'name': 'gcr.io/oss-fuzz-base/base-runner', 'entrypoint': 'download_corpus', 'args': download_coverage_args }) steps.append({ 'name': 'gcr.io/oss-fuzz-base/base-runner', 'args': ['bash', '-c', f'ls -lrt {out_dir}/textcov_reports'] }) return steps def http_upload_step(data, signed_url, content_type): """Returns a GCB step to upload data to the given URL via GCS HTTP API.""" step = { 'name': 'gcr.io/cloud-builders/curl', 'args': [ '-H', 'Content-Type: ' + content_type, '-X', 'PUT', '-d', data, signed_url, ], } return step def gsutil_rm_rf_step(url): """Returns a GCB step to recursively delete the object with given GCS url.""" step = { 'name': 'gcr.io/cloud-builders/gsutil', 'entrypoint': 'sh', 'args': [ '-c', 'gsutil -m rm -rf %s || exit 0' % url, ], } return step def get_pull_test_images_steps(test_image_suffix): """Returns steps to pull testing versions of base-images and tag them so that they are used in builds.""" images = [ 'gcr.io/oss-fuzz-base/base-builder', 'gcr.io/oss-fuzz-base/base-builder-swift', 'gcr.io/oss-fuzz-base/base-builder-javascript', 'gcr.io/oss-fuzz-base/base-builder-jvm', 'gcr.io/oss-fuzz-base/base-builder-go', 'gcr.io/oss-fuzz-base/base-builder-python', 'gcr.io/oss-fuzz-base/base-builder-rust', 'gcr.io/oss-fuzz-base/base-runner', ] steps = [] for image in images: test_image = image + '-' + test_image_suffix steps.append({ 'name': DOCKER_TOOL_IMAGE, 'args': [ 'pull', test_image, ], 'waitFor': '-' # Start this immediately, don't wait for previous step. }) # This step is hacky but gives us great flexibility. OSS-Fuzz has hardcoded # references to gcr.io/oss-fuzz-base/base-builder (in dockerfiles, for # example) and gcr.io/oss-fuzz-base-runner (in this build code). But the # testing versions of those images are called e.g. # gcr.io/oss-fuzz-base/base-builder-testing and # gcr.io/oss-fuzz-base/base-runner-testing. How can we get the build to use # the testing images instead of the real ones? By doing this step: tagging # the test image with the non-test version, so that the test version is used # instead of pulling the real one. steps.append({ 'name': DOCKER_TOOL_IMAGE, 'args': ['tag', test_image, image], }) return steps def get_srcmap_step_id(): """Returns the id for the srcmap step.""" return 'srcmap' def get_git_clone_step(repo_url='https://github.com/google/oss-fuzz.git', branch=None): """Returns the git clone step.""" clone_step = { 'args': ['clone', repo_url, '--depth', '1'], 'name': 'gcr.io/cloud-builders/git', } if branch: # Do this to support testing other branches. clone_step['args'].extend(['--branch', branch]) return clone_step def _make_image_name_architecture_specific(image_name, architecture): """Returns an architecture-specific name for |image_name|, based on |build|""" return f'{image_name}-{architecture.lower()}' def get_docker_build_step(image_names, directory, buildkit_cache_image=None, src_root='oss-fuzz', architecture='x86_64'): """Returns the docker build step.""" assert len(image_names) >= 1 directory = os.path.join(src_root, directory) if architecture != _ARM64: args = ['build'] else: args = [ 'buildx', 'build', '--platform', 'linux/arm64', '--progress', 'plain', '--load' ] # TODO(metzman): This wont work when we want to build the base-images. image_names = [ _make_image_name_architecture_specific(image_name, architecture) for image_name in image_names ] for image_name in image_names: args.extend(['--tag', image_name]) step = { 'name': DOCKER_TOOL_IMAGE, 'args': args, 'dir': directory, } # Note that we mutate "args" after making it a value in step. if buildkit_cache_image is not None: env = ['DOCKER_BUILDKIT=1'] step['env'] = env assert buildkit_cache_image in args additional_args = [ '--build-arg', 'BUILDKIT_INLINE_CACHE=1', '--cache-from', buildkit_cache_image ] args.extend(additional_args) args.append('.') return step def has_arm_build(architectures): """Returns True if project has an ARM build.""" return 'aarch64' in architectures def get_project_image_steps( # pylint: disable=too-many-arguments name, image, language, config, architectures=None): """Returns GCB steps to build OSS-Fuzz project image.""" if architectures is None: architectures = [] # TODO(metzman): Pass the URL to clone. clone_step = get_git_clone_step(repo_url=config.repo, branch=config.branch) steps = [clone_step] if config.test_image_suffix: steps.extend(get_pull_test_images_steps(config.test_image_suffix)) docker_build_step = get_docker_build_step([image], os.path.join('projects', name)) steps.append(docker_build_step) srcmap_step_id = get_srcmap_step_id() steps.extend([{ 'name': image, 'args': [ 'bash', '-c', 'srcmap > /workspace/srcmap.json && cat /workspace/srcmap.json' ], 'env': [ 'OSSFUZZ_REVISION=$REVISION_ID', 'FUZZING_LANGUAGE=%s' % language, ], 'id': srcmap_step_id }]) if has_arm_build(architectures): builder_name = 'buildxbuilder' steps.extend([ { 'name': 'gcr.io/cloud-builders/docker', 'args': ['run', '--privileged', 'linuxkit/binfmt:v0.8'] }, { 'name': DOCKER_TOOL_IMAGE, 'args': ['buildx', 'create', '--name', builder_name] }, { 'name': DOCKER_TOOL_IMAGE, 'args': ['buildx', 'use', builder_name] }, ]) docker_build_arm_step = get_docker_build_step([image], os.path.join( 'projects', name), architecture=_ARM64) steps.append(docker_build_arm_step) return steps def get_logs_url(build_id, project_id='oss-fuzz-base'): """Returns url that displays the build logs.""" return ('https://console.developers.google.com/logs/viewer?' f'resource=build%2Fbuild_id%2F{build_id}&project={project_id}') def get_gcb_url(build_id, cloud_project='oss-fuzz'): """Returns url where logs are displayed for the build.""" return ( 'https://console.cloud.google.com/cloud-build/builds;region=us-central1/' f'{build_id}?project={cloud_project}') def get_runner_image_name(base_images_project, test_image_suffix): """Returns the runner image that should be used, based on |base_images_project|. Returns the testing image if |test_image_suffix|.""" image = f'gcr.io/{base_images_project}/base-runner' if test_image_suffix: image += '-' + test_image_suffix return image def get_build_body(steps, timeout, body_overrides, build_tags, use_build_pool=True): """Helper function to create a build from |steps|.""" if 'GCB_OPTIONS' in os.environ: options = yaml.safe_load(os.environ['GCB_OPTIONS']) else: options = {} if use_build_pool: options['pool'] = {'name': OSS_FUZZ_BUILDPOOL_NAME} build_body = { 'steps': steps, 'timeout': str(timeout) + 's', 'options': options, } if build_tags: build_body['tags'] = build_tags if body_overrides is None: body_overrides = {} for key, value in body_overrides.items(): build_body[key] = value return build_body def run_build( # pylint: disable=too-many-arguments steps, credentials, cloud_project, timeout, body_overrides=None, tags=None, use_build_pool=True): """Runs the build.""" build_body = get_build_body(steps, timeout, body_overrides, tags, use_build_pool=use_build_pool) cloudbuild = cloud_build('cloudbuild', 'v1', credentials=credentials, cache_discovery=False, client_options=US_CENTRAL_CLIENT_OPTIONS) build_info = cloudbuild.projects().builds().create(projectId=cloud_project, body=build_body).execute() build_id = build_info['metadata']['build']['id'] logging.info('Build ID: %s', build_id) logging.info('Logs: %s', get_logs_url(build_id, cloud_project)) logging.info('Cloud build page: %s', get_gcb_url(build_id, cloud_project)) return build_id