From afd8a78b0b938542c94ad55569ca78a153cbbe30 Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Wed, 14 Dec 2022 10:53:00 +0000 Subject: [PATCH] infra: build: provide correct corpus name (#9189) This is a follow-up to https://github.com/google/oss-fuzz/pull/9167 -- the change should also be applied in the build functions that are responsible for downloading the corpus. Signed-off-by: David Korczynski --- infra/build/functions/build_lib.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/infra/build/functions/build_lib.py b/infra/build/functions/build_lib.py index a33deff3e..311ab74d8 100644 --- a/infra/build/functions/build_lib.py +++ b/infra/build/functions/build_lib.py @@ -18,6 +18,7 @@ import base64 import collections import logging import os +import re import six.moves.urllib.parse as urlparse import sys import time @@ -43,6 +44,9 @@ GCS_UPLOAD_URL_FORMAT = '/{0}/{1}/{2}' CORPUS_BACKUP_URL = ('/{project}-backup.clusterfuzz-external.appspot.com/' 'corpus/libFuzzer/{fuzzer}/latest.zip') +# Regex to match special chars in project name. +SPECIAL_CHARS_REGEX = re.compile('[^a-zA-Z0-9_-]') + # Cloud Builder has a limit of 100 build steps and 100 arguments for each step. CORPUS_DOWNLOAD_BATCH_SIZE = 100 @@ -193,6 +197,13 @@ def get_signed_url(path, method='PUT', content_type=''): return f'https://storage.googleapis.com{path}?{urlparse.urlencode(values)}' +def _normalized_name(name): + """Return normalized name with special chars like slash, colon, etc normalized + to hyphen(-). This is important as otherwise these chars break local and cloud + storage paths.""" + return SPECIAL_CHARS_REGEX.sub('-', name).strip('-') + + def download_corpora_steps(project_name, test_image_suffix): """Returns GCB steps for downloading corpora backups for the given project. """ @@ -211,6 +222,9 @@ def download_corpora_steps(project_name, test_image_suffix): if not binary_name.startswith(qualified_name_prefix): qualified_name = qualified_name_prefix + binary_name + # Normalize qualified_name name. + qualified_name = _normalized_name(qualified_name) + url = get_signed_url(CORPUS_BACKUP_URL.format(project=project_name, fuzzer=qualified_name), method='GET')