Don't use GitHub API for project syncs. (#8589)

Download the zip and unzip it instead. There should be enough memory for
this on the tmpfs environment on Cloud Functions, but bump up the memory
to 4GB just in case.

Create a `OssFuzzRepo` which replicates the GitHub client library to
avoid changing too many things.

Fixes #8565
This commit is contained in:
Oliver Chang 2022-09-26 17:46:18 +10:00 committed by GitHub
parent 4f2e7adc87
commit c278d2a190
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 57 additions and 25 deletions

View File

@ -79,7 +79,7 @@ function deploy_cloud_function {
--region us-central1 \
--set-env-vars GCP_PROJECT=$project,FUNCTION_REGION=us-central1 \
--max-instances 1 \
--memory 2048MB
--memory 4096MB
}
if [ $# == 1 ]; then

View File

@ -16,19 +16,21 @@
"""Cloud functions for build scheduling."""
from collections import namedtuple
import io
import logging
import os
import re
import yaml
import tempfile
import urllib.request
import zipfile
from github import Github
from google.api_core import exceptions
from google.cloud import ndb
from google.cloud import scheduler_v1
import yaml
import build_and_run_coverage
import build_project
from datastore_entities import GithubCreds
from datastore_entities import Project
VALID_PROJECT_NAME = re.compile(r'^[a-zA-Z0-9_-]+$')
@ -43,9 +45,54 @@ INTROSPECTOR_BUILD_TOPIC = 'request-introspector-build'
ProjectMetadata = namedtuple(
'ProjectMetadata', 'schedule project_yaml_contents dockerfile_contents')
Content = namedtuple('Content', 'type path name decoded_content')
logging.basicConfig(level=logging.INFO)
# pylint: disable=too-few-public-methods
class OssFuzzRepo:
"""OSS-Fuzz repo."""
_MASTER_ZIP_LINK = (
'https://github.com/google/oss-fuzz/archive/refs/heads/master.zip')
def __init__(self, out_dir):
with urllib.request.urlopen(self._MASTER_ZIP_LINK) as response:
zip_contents = response.read()
with zipfile.ZipFile(io.BytesIO(zip_contents)) as zip_file:
zip_file.extractall(out_dir)
self._out_dir = out_dir
@property
def _repo_dir(self):
return os.path.join(self._out_dir, 'oss-fuzz-master')
def get_contents(self, path):
"""Gets contents of path."""
contents = []
list_path = os.path.join(self._repo_dir, path)
for item in os.listdir(list_path):
full_path = os.path.join(list_path, item)
rel_path = os.path.relpath(full_path, self._repo_dir)
if os.path.isdir(full_path):
file_type = 'dir'
decoded_content = None
else:
file_type = 'file'
with open(full_path, mode='rb') as file:
decoded_content = file.read()
contents.append(
Content(file_type, rel_path, os.path.basename(rel_path),
decoded_content))
return contents
class ProjectYamlError(Exception):
"""Error in project.yaml format."""
@ -222,22 +269,13 @@ def get_projects(repo):
return projects
def get_github_creds():
"""Retrieves GitHub client credentials."""
git_creds = GithubCreds.query().get()
if git_creds is None:
raise RuntimeError('Git credentials not available.')
return git_creds
def sync(event, context):
"""Sync projects with cloud datastore."""
del event, context # Unused.
with ndb.Client().context():
git_creds = get_github_creds()
github_client = Github(git_creds.client_id, git_creds.client_secret)
repo = github_client.get_repo('google/oss-fuzz')
projects = get_projects(repo)
cloud_scheduler_client = scheduler_v1.CloudSchedulerClient()
sync_projects(cloud_scheduler_client, projects)
with tempfile.TemporaryDirectory() as temp_dir:
repo = OssFuzzRepo(temp_dir)
projects = get_projects(repo)
cloud_scheduler_client = scheduler_v1.CloudSchedulerClient()
sync_projects(cloud_scheduler_client, projects)

View File

@ -26,7 +26,6 @@ sys.path.append(os.path.dirname(__file__))
# pylint: disable=wrong-import-position
from datastore_entities import Project
from project_sync import get_github_creds
from project_sync import get_projects
from project_sync import ProjectMetadata
from project_sync import sync_projects
@ -354,11 +353,6 @@ class TestDataSync(unittest.TestCase):
self.assertEqual(get_projects(repo), {})
def test_get_github_creds(self):
"""Testing get_github_creds()."""
with ndb.Client().context():
self.assertRaises(RuntimeError, get_github_creds)
@classmethod
def tearDownClass(cls):
test_utils.cleanup_emulator(cls.ds_emulator)

View File

@ -18,7 +18,7 @@ Brotli==1.0.9
hiredis==1.1.0
PyYaml==5.4
PyGithub==1.51
grpcio==1.29.0
grpcio==1.49.1
google-auth==1.21.1
google-cloud-datastore<2.0
google-cloud-ndb==1.7.1