oss-fuzz/infra/cifuzz/clusterfuzz_deployment.py

# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for interacting with the "ClusterFuzz deployment."""
import logging
import os
import sys
import tempfile
import time
import urllib.error
import urllib.request
import zipfile

# pylint: disable=wrong-import-position,import-error
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import utils


class BaseClusterFuzzDeployment:
  """Base class for ClusterFuzz deployments."""

  CORPUS_DIR_NAME = 'cifuzz-corpus'
  BUILD_DIR_NAME = 'cifuzz-latest-build'

  def __init__(self, config):
    self.config = config

  def download_latest_build(self, out_dir):
    """Downloads the latest build from ClusterFuzz.

    Returns:
      A path to where the OSS-Fuzz build was stored, or None if it wasn't.
    """
    raise NotImplementedError('Child class must implement method.')

  def download_corpus(self, target_name, out_dir):
    """Downloads the corpus for |target_name| from ClusterFuzz to |out_dir|.

    Returns:
      A path to where the OSS-Fuzz build was stored, or None if it wasn't.
    """
    raise NotImplementedError('Child class must implement method.')


class ClusterFuzzLite(BaseClusterFuzzDeployment):
  """Class representing a deployment of ClusterFuzzLite."""

  def download_latest_build(self, out_dir):
    logging.info('download_latest_build not implemented for ClusterFuzzLite.')

  def download_corpus(self, target_name, out_dir):
    logging.info('download_corpus not implemented for ClusterFuzzLite.')


class OSSFuzz(BaseClusterFuzzDeployment):
  """The OSS-Fuzz ClusterFuzz deployment."""

  # Location of clusterfuzz builds on GCS.
  CLUSTERFUZZ_BUILDS = 'clusterfuzz-builds'

  # Format string for the latest version of a project's build.
  VERSION_STRING = '{project_name}-{sanitizer}-latest.version'

  # Zip file name containing the corpus.
  CORPUS_ZIP_NAME = 'public.zip'

  def get_latest_build_name(self):
    """Gets the name of the latest OSS-Fuzz build of a project.

    Returns:
      A string with the latest build version or None.
    """
    version_file = self.VERSION_STRING.format(
        project_name=self.config.project_name, sanitizer=self.config.sanitizer)
    version_url = utils.url_join(utils.GCS_BASE_URL, self.CLUSTERFUZZ_BUILDS,
                                 self.config.project_name, version_file)
    try:
      response = urllib.request.urlopen(version_url)
    except urllib.error.HTTPError:
      logging.error('Error getting latest build version for %s from: %s.',
                    self.config.project_name, version_url)
      return None
    return response.read().decode()

  def download_latest_build(self, out_dir):
    """Downloads the latest OSS-Fuzz build from GCS.

    Returns:
      A path to where the OSS-Fuzz build was stored, or None if it wasn't.
    """
    build_dir = os.path.join(out_dir, self.BUILD_DIR_NAME)
    if os.path.exists(build_dir):
      return build_dir

    os.makedirs(build_dir, exist_ok=True)

    latest_build_name = self.get_latest_build_name()
    if not latest_build_name:
      return None

    oss_fuzz_build_url = utils.url_join(utils.GCS_BASE_URL,
                                        self.CLUSTERFUZZ_BUILDS,
                                        self.config.project_name,
                                        latest_build_name)
    if download_and_unpack_zip(oss_fuzz_build_url, build_dir):
      return build_dir

    return None

  def download_corpus(self, target_name, out_dir):
    """Downloads the latest OSS-Fuzz corpus for the target.

    Returns:
      The local path to to corpus or None if download failed.
    """
    corpus_dir = os.path.join(out_dir, self.CORPUS_DIR_NAME, target_name)
    os.makedirs(corpus_dir, exist_ok=True)
    # TODO(metzman): Clean up this code.
    project_qualified_fuzz_target_name = target_name
    qualified_name_prefix = self.config.project_name + '_'

    if not target_name.startswith(qualified_name_prefix):
      project_qualified_fuzz_target_name = qualified_name_prefix + target_name

    corpus_url = utils.url_join(
        utils.GCS_BASE_URL,
        '{0}-backup.clusterfuzz-external.appspot.com/corpus/libFuzzer/'.format(
            self.config.project_name), project_qualified_fuzz_target_name,
        self.CORPUS_ZIP_NAME)

    if download_and_unpack_zip(corpus_url, corpus_dir):
      return corpus_dir

    return None


def download_url(url, filename, num_attempts=3):
  """Downloads the file located at |url|, using HTTP to |filename|.

  Args:
    url: A url to a file to download.
    filename: The path the file should be downloaded to.
    num_retries: The number of times to retry the download on
       ConnectionResetError.

  Returns:
    True on success.
  """
  sleep_time = 1

  # Don't use retry wrapper since we don't want this to raise any exceptions.
  for _ in range(num_attempts):
    try:
      urllib.request.urlretrieve(url, filename)
      return True
    except urllib.error.HTTPError:
      # In these cases, retrying probably wont work since the error probably
      # means there is nothing at the URL to download.
      logging.error('Unable to download from: %s.', url)
      return False
    except ConnectionResetError:
      # These errors are more likely to be transient. Retry.
      pass
    time.sleep(sleep_time)

  logging.error('Failed to download %s, %d times.', url, num_attempts)

  return False


def download_and_unpack_zip(url, extract_directory):
  """Downloads and unpacks a zip file from an HTTP URL.

  Args:
    url: A url to the zip file to be downloaded and unpacked.
    out_dir: The path where the zip file should be extracted to.

  Returns:
    True on success.
  """
  if not os.path.exists(extract_directory):
    logging.error('Extract directory: %s does not exist.', extract_directory)
    return False

  # Gives the temporary zip file a unique identifier in the case that
  # that download_and_unpack_zip is done in parallel.
  with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file:
    if not download_url(url, tmp_file.name):
      return False

    try:
      with zipfile.ZipFile(tmp_file.name, 'r') as zip_file:
        zip_file.extractall(extract_directory)
    except zipfile.BadZipFile:
      logging.error('Error unpacking zip from %s. Bad Zipfile.', url)
      return False

  return True


def get_clusterfuzz_deployment(config):
  """Returns object reprsenting deployment of ClusterFuzz used by |config|."""
  if (config.platform == config.Platform.INTERNAL_GENERIC_CI or
      config.platform == config.Platform.INTERNAL_GITHUB):
    logging.info('Using OSS-Fuzz as ClusterFuzz deployment.')
    return OSSFuzz(config)
  logging.info('Using ClusterFuzzLite as ClusterFuzz deployment.')
  return ClusterFuzzLite(config)