Cloud function for syncing OSS-Fuzz projects (#3538). (#4023)

Integrating the first cloud function i implemented which syncs the project list from github and uploads the list to cloud datastore, which will be used by another cloud function to request builds.

Co-authored-by: Kabeer Seth <kabeerseth@google.com>
This commit is contained in:
kabeer27 2020-06-25 03:41:04 +00:00 committed by GitHub
parent d647392eda
commit 181812050d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 301 additions and 2 deletions

View File

@ -17,7 +17,9 @@ jobs:
python-version: 3.7
- name: Install dependencies
run: pip install -r infra/ci/requirements.txt
run:
pip install -r infra/ci/requirements.txt
pip install -r infra/build/functions/requirements.txt
- name: Run presubmit checks
run: python infra/presubmit.py

2
.gitignore vendored
View File

@ -1,4 +1,4 @@
.vscode/
*.pyc
build/
/build/
*~

View File

@ -0,0 +1,95 @@
# Copyright 2020 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
"""Cloud functions for build scheduling."""
import re
from github import Github
from google.cloud import ndb
VALID_PROJECT_NAME = re.compile(r'^[a-zA-Z0-9_-]+$')
# pylint: disable=too-few-public-methods
class Project(ndb.Model):
"""Represents an integrated OSS-Fuzz project."""
name = ndb.StringProperty()
# pylint: disable=too-few-public-methods
class GitAuth(ndb.Model):
"""Represents Github access token entity."""
access_token = ndb.StringProperty()
def sync_projects(projects):
"""Sync projects with cloud datastore."""
project_query = Project.query()
projects_to_remove = [
project.key for project in project_query if project.name not in projects
]
ndb.delete_multi(projects_to_remove)
existing_projects = {project.name for project in project_query}
new_projects = [
Project(name=project)
for project in projects
if project not in existing_projects
]
ndb.put_multi(new_projects)
def _has_docker_file(repo, project_path):
"""Checks if project has a Dockerfile."""
return any(content_file.name == 'Dockerfile'
for content_file in repo.get_contents(project_path))
def get_projects(repo):
"""Get project list from git repository."""
contents = repo.get_contents('projects')
projects = {
content_file.name
for content_file in contents
if content_file.type == 'dir' and
_has_docker_file(repo, content_file.path) and
VALID_PROJECT_NAME.match(content_file.name)
}
return projects
def get_access_token():
"""Retrieves Github's Access token from Cloud Datastore."""
token = GitAuth.query().get()
if token is None:
raise RuntimeError('No access token available')
return token.access_token
def sync(event, context):
"""Sync projects with cloud datastore."""
del event, context #unused
client = ndb.Client()
with client.context():
github_client = Github(get_access_token())
repo = github_client.get_repo('google/oss-fuzz')
projects = get_projects(repo)
sync_projects(projects)

View File

@ -0,0 +1,18 @@
# Copyright 2020 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
PyGithub==1.51
google-cloud-ndb==1.3.0

View File

@ -0,0 +1,184 @@
# Copyright 2020 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
"""Unit tests for Cloud Function sync, which syncs the list of github projects
and uploads them to the Cloud Datastore."""
import os
import unittest
import subprocess
import threading
from google.cloud import ndb
from main import sync_projects
from main import get_projects
from main import get_access_token
from main import Project
_EMULATOR_TIMEOUT = 20
_DATASTORE_READY_INDICATOR = b'is now running'
def start_datastore_emulator():
"""Start Datastore emulator."""
return subprocess.Popen([
'gcloud',
'beta',
'emulators',
'datastore',
'start',
'--consistency=1.0',
'--host-port=localhost:' + str(os.environ.get('DATASTORE_EMULATOR_PORT')),
'--project=' + os.environ.get('DATASTORE_PROJECT_ID'),
'--no-store-on-disk',
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
def _wait_for_emulator_ready(proc,
emulator,
indicator,
timeout=_EMULATOR_TIMEOUT):
"""Wait for emulator to be ready."""
def _read_thread(proc, ready_event):
"""Thread to continuously read from the process stdout."""
ready = False
while True:
line = proc.stdout.readline()
if not line:
break
if not ready and indicator in line:
ready = True
ready_event.set()
# Wait for process to become ready.
ready_event = threading.Event()
thread = threading.Thread(target=_read_thread, args=(proc, ready_event))
thread.daemon = True
thread.start()
if not ready_event.wait(timeout):
raise RuntimeError(
'{} emulator did not get ready in time.'.format(emulator))
return thread
# pylint: disable=too-few-public-methods
class Repository:
"""Mocking Github Repository."""
def __init__(self, name, file_type, path, contents=None):
self.contents = contents or []
self.name = name
self.type = file_type
self.path = path
def get_contents(self, path):
""""Get contents of repository."""
if self.path == path:
return self.contents
for content_file in self.contents:
if content_file.path == path:
return content_file.contents
return None
class TestDataSync(unittest.TestCase):
"""Unit tests for sync."""
def test_sync_projects(self):
"""Testing sync_projects()."""
client = ndb.Client()
with client.context():
Project(name='test1').put()
Project(name='test2').put()
projects = {'test1', 'test3'}
sync_projects(projects)
projects_query = Project.query()
self.assertEqual(projects, {project.name for project in projects_query})
def test_get_projects(self):
"""Testing get_projects()."""
repo = Repository('oss-fuzz', 'dir', 'projects', [
Repository(
'test0', 'dir', 'projects/test0',
[Repository('Dockerfile', 'file', 'projects/test0/Dockerfile')]),
Repository(
'test1', 'dir', 'projects/test1',
[Repository('Dockerfile', 'file', 'projects/test1/Dockerfile')])
])
self.assertEqual(get_projects(repo), {'test0', 'test1'})
def test_get_projects_no_docker_file(self):
"""Testing get_projects() with missing dockerfile"""
repo = Repository('oss-fuzz', 'dir', 'projects', [
Repository(
'test0', 'dir', 'projects/test0',
[Repository('Dockerfile', 'file', 'projects/test0/Dockerfile')]),
Repository('test1', 'dir', 'projects/test1')
])
self.assertEqual(get_projects(repo), {'test0'})
def test_get_projects_invalid_project_name(self):
"""Testing get_projects() with invalid project name"""
repo = Repository('oss-fuzz', 'dir', 'projects', [
Repository(
'test0', 'dir', 'projects/test0',
[Repository('Dockerfile', 'file', 'projects/test0/Dockerfile')]),
Repository(
'test1@', 'dir', 'projects/test1',
[Repository('Dockerfile', 'file', 'projects/test1/Dockerfile')])
])
self.assertEqual(get_projects(repo), {'test0'})
def test_get_projects_non_directory_type_project(self):
"""Testing get_projects() when a file in projects/ is not of type 'dir'."""
repo = Repository('oss-fuzz', 'dir', 'projects', [
Repository(
'test0', 'dir', 'projects/test0',
[Repository('Dockerfile', 'file', 'projects/test0/Dockerfile')]),
Repository('test1', 'file', 'projects/test1')
])
self.assertEqual(get_projects(repo), {'test0'})
def test_get_access_token(self):
"""Testing get_access_token()."""
client = ndb.Client()
with client.context():
self.assertRaises(RuntimeError, get_access_token)
if __name__ == '__main__':
DS_EMULATOR = start_datastore_emulator()
_wait_for_emulator_ready(DS_EMULATOR, 'datastore', _DATASTORE_READY_INDICATOR)
unittest.main(exit=False)
# TODO: replace this with a cleaner way of killing the process
os.system('pkill -f datastore')