oss-fuzz/infra/gcb/build_lib.py

"""Utility module for Google Cloud Build scripts."""
import base64
import collections
import os
import requests
import sys
import time
import urllib
import urlparse

from oauth2client.service_account import ServiceAccountCredentials

BUILD_TIMEOUT = 12 * 60 * 60

# Needed for reading public target.list.* files.
GCS_URL_BASENAME = 'https://storage.googleapis.com/'

GCS_UPLOAD_URL_FORMAT = '/{0}/{1}/{2}'

# Where corpus backups can be downloaded from.
CORPUS_BACKUP_URL = ('/{project}-backup.clusterfuzz-external.appspot.com/'
                     'corpus/libFuzzer/{fuzzer}/latest.zip')

# Cloud Builder has a limit of 100 build steps and 100 arguments for each step.
CORPUS_DOWNLOAD_BATCH_SIZE = 100

TARGETS_LIST_BASENAME = 'targets.list'

EngineInfo = collections.namedtuple(
    'EngineInfo',
    ['upload_bucket', 'supported_sanitizers', 'supported_architectures'])

ENGINE_INFO = {
    'libfuzzer':
        EngineInfo(upload_bucket='clusterfuzz-builds',
                   supported_sanitizers=['address', 'memory', 'undefined'],
                   supported_architectures=['x86_64', 'i386']),
    'afl':
        EngineInfo(upload_bucket='clusterfuzz-builds-afl',
                   supported_sanitizers=['address'],
                   supported_architectures=['x86_64']),
    'honggfuzz':
        EngineInfo(upload_bucket='clusterfuzz-builds-honggfuzz',
                   supported_sanitizers=['address', 'memory', 'undefined'],
                   supported_architectures=['x86_64']),
    'dataflow':
        EngineInfo(upload_bucket='clusterfuzz-builds-dataflow',
                   supported_sanitizers=['dataflow'],
                   supported_architectures=['x86_64']),
    'none':
        EngineInfo(upload_bucket='clusterfuzz-builds-no-engine',
                   supported_sanitizers=['address'],
                   supported_architectures=['x86_64']),
}


def get_targets_list_filename(sanitizer):
  return TARGETS_LIST_BASENAME + '.' + sanitizer


def get_targets_list_url(bucket, project, sanitizer):
  filename = get_targets_list_filename(sanitizer)
  url = GCS_UPLOAD_URL_FORMAT.format(bucket, project, filename)
  return url


def _get_targets_list(project_name):
  # libFuzzer ASan is the default configuration, get list of targets from it.
  url = get_targets_list_url(ENGINE_INFO['libfuzzer'].upload_bucket,
                             project_name, 'address')

  url = urlparse.urljoin(GCS_URL_BASENAME, url)
  response = requests.get(url)
  if not response.status_code == 200:
    sys.stderr.write('Failed to get list of targets from "%s".\n' % url)
    sys.stderr.write('Status code: %d \t\tText:\n%s\n' %
                     (response.status_code, response.text))
    return None

  return response.text.split()


def get_signed_url(path, method='PUT', content_type=''):
  timestamp = int(time.time() + BUILD_TIMEOUT)
  blob = '{0}\n\n{1}\n{2}\n{3}'.format(method, content_type, timestamp, path)

  creds = ServiceAccountCredentials.from_json_keyfile_name(
      os.environ['GOOGLE_APPLICATION_CREDENTIALS'])
  client_id = creds.service_account_email
  signature = base64.b64encode(creds.sign_blob(blob)[1])
  values = {
      'GoogleAccessId': client_id,
      'Expires': timestamp,
      'Signature': signature,
  }

  return ('https://storage.googleapis.com{0}?'.format(path) +
          urllib.urlencode(values))


def download_corpora_step(project_name):
  """Returns a GCB step for downloading corpora backups for the given project.
  """
  fuzz_targets = _get_targets_list(project_name)
  if not fuzz_targets:
    sys.stderr.write('No fuzz targets found for project "%s".\n' % project_name)
    return None

  # Split fuzz targets into batches of CORPUS_DOWNLOAD_BATCH_SIZE.
  for i in range(0, len(fuzz_targets), CORPUS_DOWNLOAD_BATCH_SIZE):
    download_corpus_args = []
    for binary_name in fuzz_targets[i:i + CORPUS_DOWNLOAD_BATCH_SIZE]:
      qualified_name = binary_name
      qualified_name_prefix = '%s_' % project_name
      if not binary_name.startswith(qualified_name_prefix):
        qualified_name = qualified_name_prefix + binary_name

      url = get_signed_url(CORPUS_BACKUP_URL.format(project=project_name,
                                                    fuzzer=qualified_name),
                           method='GET')

      corpus_archive_path = os.path.join('/corpus', binary_name + '.zip')
      download_corpus_args.append('%s %s' % (corpus_archive_path, url))

    step = {
        'name': 'gcr.io/oss-fuzz-base/base-runner',
        'entrypoint': 'download_corpus',
        'args': download_corpus_args,
        'volumes': [{
            'name': 'corpus',
            'path': '/corpus'
        }],
    }
    return step
[infra] Collect dataflow traces on the builder (#1632). (#3238) * [infra] Skeleton of the changes needed for collecting DFT on the builder (#1632). * move ENGINE_INFO to the helper as well * make collect_dft +x * syntax fixes * add actual dataflow tracer script * format * more refactoring and cleanup * format * address Oliver's feedback * format * more fixes * format * do not redirect stderr to stdout * add exit at the end of main * address feedback from Oliver 2020-01-17 14:24:15 +00:00			`"""Utility module for Google Cloud Build scripts."""`
			`import base64`
			`import collections`
			`import os`
			`import requests`
			`import sys`
			`import time`
			`import urllib`
			`import urlparse`

			`from oauth2client.service_account import ServiceAccountCredentials`

			`BUILD_TIMEOUT = 12 * 60 * 60`

			`# Needed for reading public target.list.* files.`
			`GCS_URL_BASENAME = 'https://storage.googleapis.com/'`

			`GCS_UPLOAD_URL_FORMAT = '/{0}/{1}/{2}'`

			`# Where corpus backups can be downloaded from.`
			`CORPUS_BACKUP_URL = ('/{project}-backup.clusterfuzz-external.appspot.com/'`
			`'corpus/libFuzzer/{fuzzer}/latest.zip')`

			`# Cloud Builder has a limit of 100 build steps and 100 arguments for each step.`
			`CORPUS_DOWNLOAD_BATCH_SIZE = 100`

			`TARGETS_LIST_BASENAME = 'targets.list'`

			`EngineInfo = collections.namedtuple(`
			`'EngineInfo',`
			`['upload_bucket', 'supported_sanitizers', 'supported_architectures'])`

			`ENGINE_INFO = {`
			`'libfuzzer':`
			`EngineInfo(upload_bucket='clusterfuzz-builds',`
			`supported_sanitizers=['address', 'memory', 'undefined'],`
			`supported_architectures=['x86_64', 'i386']),`
			`'afl':`
			`EngineInfo(upload_bucket='clusterfuzz-builds-afl',`
			`supported_sanitizers=['address'],`
			`supported_architectures=['x86_64']),`
			`'honggfuzz':`
			`EngineInfo(upload_bucket='clusterfuzz-builds-honggfuzz',`
			`supported_sanitizers=['address', 'memory', 'undefined'],`
			`supported_architectures=['x86_64']),`
			`'dataflow':`
			`EngineInfo(upload_bucket='clusterfuzz-builds-dataflow',`
			`supported_sanitizers=['dataflow'],`
			`supported_architectures=['x86_64']),`
			`'none':`
			`EngineInfo(upload_bucket='clusterfuzz-builds-no-engine',`
			`supported_sanitizers=['address'],`
			`supported_architectures=['x86_64']),`
			`}`


			`def get_targets_list_filename(sanitizer):`
			`return TARGETS_LIST_BASENAME + '.' + sanitizer`


			`def get_targets_list_url(bucket, project, sanitizer):`
			`filename = get_targets_list_filename(sanitizer)`
			`url = GCS_UPLOAD_URL_FORMAT.format(bucket, project, filename)`
			`return url`


			`def _get_targets_list(project_name):`
			`# libFuzzer ASan is the default configuration, get list of targets from it.`
			`url = get_targets_list_url(ENGINE_INFO['libfuzzer'].upload_bucket,`
			`project_name, 'address')`

			`url = urlparse.urljoin(GCS_URL_BASENAME, url)`
			`response = requests.get(url)`
			`if not response.status_code == 200:`
			`sys.stderr.write('Failed to get list of targets from "%s".\n' % url)`
			`sys.stderr.write('Status code: %d \t\tText:\n%s\n' %`
			`(response.status_code, response.text))`
			`return None`

			`return response.text.split()`


			`def get_signed_url(path, method='PUT', content_type=''):`
			`timestamp = int(time.time() + BUILD_TIMEOUT)`
			`blob = '{0}\n\n{1}\n{2}\n{3}'.format(method, content_type, timestamp, path)`

			`creds = ServiceAccountCredentials.from_json_keyfile_name(`
			`os.environ['GOOGLE_APPLICATION_CREDENTIALS'])`
			`client_id = creds.service_account_email`
			`signature = base64.b64encode(creds.sign_blob(blob)[1])`
			`values = {`
			`'GoogleAccessId': client_id,`
			`'Expires': timestamp,`
			`'Signature': signature,`
			`}`

			`return ('https://storage.googleapis.com{0}?'.format(path) +`
			`urllib.urlencode(values))`


			`def download_corpora_step(project_name):`
			`"""Returns a GCB step for downloading corpora backups for the given project.`
			`"""`
			`fuzz_targets = _get_targets_list(project_name)`
			`if not fuzz_targets:`
			`sys.stderr.write('No fuzz targets found for project "%s".\n' % project_name)`
			`return None`

			`# Split fuzz targets into batches of CORPUS_DOWNLOAD_BATCH_SIZE.`
			`for i in range(0, len(fuzz_targets), CORPUS_DOWNLOAD_BATCH_SIZE):`
			`download_corpus_args = []`
			`for binary_name in fuzz_targets[i:i + CORPUS_DOWNLOAD_BATCH_SIZE]:`
			`qualified_name = binary_name`
			`qualified_name_prefix = '%s_' % project_name`
			`if not binary_name.startswith(qualified_name_prefix):`
			`qualified_name = qualified_name_prefix + binary_name`

			`url = get_signed_url(CORPUS_BACKUP_URL.format(project=project_name,`
			`fuzzer=qualified_name),`
			`method='GET')`

			`corpus_archive_path = os.path.join('/corpus', binary_name + '.zip')`
			`download_corpus_args.append('%s %s' % (corpus_archive_path, url))`

			`step = {`
			`'name': 'gcr.io/oss-fuzz-base/base-runner',`
			`'entrypoint': 'download_corpus',`
			`'args': download_corpus_args,`
			`'volumes': [{`
			`'name': 'corpus',`
			`'path': '/corpus'`
			`}],`
			`}`
			`return step`