From 181812050dc1be74dcdebd39c919dc90e0cda3a3 Mon Sep 17 00:00:00 2001 From: kabeer27 <32016558+kabeer27@users.noreply.github.com> Date: Thu, 25 Jun 2020 03:41:04 +0000 Subject: [PATCH] Cloud function for syncing OSS-Fuzz projects (#3538). (#4023) Integrating the first cloud function i implemented which syncs the project list from github and uploads the list to cloud datastore, which will be used by another cloud function to request builds. Co-authored-by: Kabeer Seth --- .github/workflows/presubmit.yml | 4 +- .gitignore | 2 +- infra/build/functions/main.py | 95 +++++++++++++ infra/build/functions/requirements.txt | 18 +++ infra/build/functions/test_sync.py | 184 +++++++++++++++++++++++++ 5 files changed, 301 insertions(+), 2 deletions(-) create mode 100644 infra/build/functions/main.py create mode 100644 infra/build/functions/requirements.txt create mode 100644 infra/build/functions/test_sync.py diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index a2e85ce70..e45dca7ee 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -17,7 +17,9 @@ jobs: python-version: 3.7 - name: Install dependencies - run: pip install -r infra/ci/requirements.txt + run: + pip install -r infra/ci/requirements.txt + pip install -r infra/build/functions/requirements.txt - name: Run presubmit checks run: python infra/presubmit.py diff --git a/.gitignore b/.gitignore index 63a2602ff..0b41c101a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ .vscode/ *.pyc -build/ +/build/ *~ \ No newline at end of file diff --git a/infra/build/functions/main.py b/infra/build/functions/main.py new file mode 100644 index 000000000..64b8a3699 --- /dev/null +++ b/infra/build/functions/main.py @@ -0,0 +1,95 @@ +# Copyright 2020 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +"""Cloud functions for build scheduling.""" + +import re + +from github import Github +from google.cloud import ndb + +VALID_PROJECT_NAME = re.compile(r'^[a-zA-Z0-9_-]+$') + + +# pylint: disable=too-few-public-methods +class Project(ndb.Model): + """Represents an integrated OSS-Fuzz project.""" + name = ndb.StringProperty() + + +# pylint: disable=too-few-public-methods +class GitAuth(ndb.Model): + """Represents Github access token entity.""" + access_token = ndb.StringProperty() + + +def sync_projects(projects): + """Sync projects with cloud datastore.""" + project_query = Project.query() + projects_to_remove = [ + project.key for project in project_query if project.name not in projects + ] + + ndb.delete_multi(projects_to_remove) + + existing_projects = {project.name for project in project_query} + + new_projects = [ + Project(name=project) + for project in projects + if project not in existing_projects + ] + ndb.put_multi(new_projects) + + +def _has_docker_file(repo, project_path): + """Checks if project has a Dockerfile.""" + return any(content_file.name == 'Dockerfile' + for content_file in repo.get_contents(project_path)) + + +def get_projects(repo): + """Get project list from git repository.""" + contents = repo.get_contents('projects') + projects = { + content_file.name + for content_file in contents + if content_file.type == 'dir' and + _has_docker_file(repo, content_file.path) and + VALID_PROJECT_NAME.match(content_file.name) + } + return projects + + +def get_access_token(): + """Retrieves Github's Access token from Cloud Datastore.""" + token = GitAuth.query().get() + if token is None: + raise RuntimeError('No access token available') + return token.access_token + + +def sync(event, context): + """Sync projects with cloud datastore.""" + + del event, context #unused + client = ndb.Client() + + with client.context(): + github_client = Github(get_access_token()) + repo = github_client.get_repo('google/oss-fuzz') + projects = get_projects(repo) + + sync_projects(projects) diff --git a/infra/build/functions/requirements.txt b/infra/build/functions/requirements.txt new file mode 100644 index 000000000..e2526bedb --- /dev/null +++ b/infra/build/functions/requirements.txt @@ -0,0 +1,18 @@ +# Copyright 2020 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +PyGithub==1.51 +google-cloud-ndb==1.3.0 diff --git a/infra/build/functions/test_sync.py b/infra/build/functions/test_sync.py new file mode 100644 index 000000000..8f54b9e88 --- /dev/null +++ b/infra/build/functions/test_sync.py @@ -0,0 +1,184 @@ +# Copyright 2020 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +"""Unit tests for Cloud Function sync, which syncs the list of github projects +and uploads them to the Cloud Datastore.""" + +import os +import unittest +import subprocess +import threading + +from google.cloud import ndb + +from main import sync_projects +from main import get_projects +from main import get_access_token +from main import Project + +_EMULATOR_TIMEOUT = 20 +_DATASTORE_READY_INDICATOR = b'is now running' + + +def start_datastore_emulator(): + """Start Datastore emulator.""" + return subprocess.Popen([ + 'gcloud', + 'beta', + 'emulators', + 'datastore', + 'start', + '--consistency=1.0', + '--host-port=localhost:' + str(os.environ.get('DATASTORE_EMULATOR_PORT')), + '--project=' + os.environ.get('DATASTORE_PROJECT_ID'), + '--no-store-on-disk', + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + +def _wait_for_emulator_ready(proc, + emulator, + indicator, + timeout=_EMULATOR_TIMEOUT): + """Wait for emulator to be ready.""" + + def _read_thread(proc, ready_event): + """Thread to continuously read from the process stdout.""" + ready = False + while True: + line = proc.stdout.readline() + if not line: + break + if not ready and indicator in line: + ready = True + ready_event.set() + + # Wait for process to become ready. + ready_event = threading.Event() + thread = threading.Thread(target=_read_thread, args=(proc, ready_event)) + thread.daemon = True + thread.start() + if not ready_event.wait(timeout): + raise RuntimeError( + '{} emulator did not get ready in time.'.format(emulator)) + return thread + + +# pylint: disable=too-few-public-methods +class Repository: + """Mocking Github Repository.""" + + def __init__(self, name, file_type, path, contents=None): + self.contents = contents or [] + self.name = name + self.type = file_type + self.path = path + + def get_contents(self, path): + """"Get contents of repository.""" + if self.path == path: + return self.contents + + for content_file in self.contents: + if content_file.path == path: + return content_file.contents + + return None + + +class TestDataSync(unittest.TestCase): + """Unit tests for sync.""" + + def test_sync_projects(self): + """Testing sync_projects().""" + client = ndb.Client() + + with client.context(): + Project(name='test1').put() + Project(name='test2').put() + + projects = {'test1', 'test3'} + sync_projects(projects) + + projects_query = Project.query() + self.assertEqual(projects, {project.name for project in projects_query}) + + def test_get_projects(self): + """Testing get_projects().""" + + repo = Repository('oss-fuzz', 'dir', 'projects', [ + Repository( + 'test0', 'dir', 'projects/test0', + [Repository('Dockerfile', 'file', 'projects/test0/Dockerfile')]), + Repository( + 'test1', 'dir', 'projects/test1', + [Repository('Dockerfile', 'file', 'projects/test1/Dockerfile')]) + ]) + + self.assertEqual(get_projects(repo), {'test0', 'test1'}) + + def test_get_projects_no_docker_file(self): + """Testing get_projects() with missing dockerfile""" + + repo = Repository('oss-fuzz', 'dir', 'projects', [ + Repository( + 'test0', 'dir', 'projects/test0', + [Repository('Dockerfile', 'file', 'projects/test0/Dockerfile')]), + Repository('test1', 'dir', 'projects/test1') + ]) + + self.assertEqual(get_projects(repo), {'test0'}) + + def test_get_projects_invalid_project_name(self): + """Testing get_projects() with invalid project name""" + + repo = Repository('oss-fuzz', 'dir', 'projects', [ + Repository( + 'test0', 'dir', 'projects/test0', + [Repository('Dockerfile', 'file', 'projects/test0/Dockerfile')]), + Repository( + 'test1@', 'dir', 'projects/test1', + [Repository('Dockerfile', 'file', 'projects/test1/Dockerfile')]) + ]) + + self.assertEqual(get_projects(repo), {'test0'}) + + def test_get_projects_non_directory_type_project(self): + """Testing get_projects() when a file in projects/ is not of type 'dir'.""" + + repo = Repository('oss-fuzz', 'dir', 'projects', [ + Repository( + 'test0', 'dir', 'projects/test0', + [Repository('Dockerfile', 'file', 'projects/test0/Dockerfile')]), + Repository('test1', 'file', 'projects/test1') + ]) + + self.assertEqual(get_projects(repo), {'test0'}) + + def test_get_access_token(self): + """Testing get_access_token().""" + client = ndb.Client() + + with client.context(): + self.assertRaises(RuntimeError, get_access_token) + + +if __name__ == '__main__': + DS_EMULATOR = start_datastore_emulator() + _wait_for_emulator_ready(DS_EMULATOR, 'datastore', _DATASTORE_READY_INDICATOR) + unittest.main(exit=False) + # TODO: replace this with a cleaner way of killing the process + os.system('pkill -f datastore')