2020-06-25 03:41:04 +00:00
|
|
|
# Copyright 2020 Google Inc.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
#
|
|
|
|
################################################################################
|
|
|
|
"""Cloud functions for build scheduling."""
|
|
|
|
|
2020-07-06 07:52:24 +00:00
|
|
|
from collections import namedtuple
|
|
|
|
import logging
|
|
|
|
import os
|
2020-06-25 03:41:04 +00:00
|
|
|
import re
|
2020-07-06 07:52:24 +00:00
|
|
|
import yaml
|
2020-06-25 03:41:04 +00:00
|
|
|
|
|
|
|
from github import Github
|
2020-07-06 07:52:24 +00:00
|
|
|
from google.api_core import exceptions
|
2020-06-25 03:41:04 +00:00
|
|
|
from google.cloud import ndb
|
2020-07-06 07:52:24 +00:00
|
|
|
from google.cloud import scheduler_v1
|
2020-06-25 03:41:04 +00:00
|
|
|
|
2020-07-31 00:56:18 +00:00
|
|
|
import build_and_run_coverage
|
|
|
|
import build_project
|
2020-07-27 03:19:44 +00:00
|
|
|
from datastore_entities import GithubCreds
|
2020-07-16 01:11:18 +00:00
|
|
|
from datastore_entities import Project
|
|
|
|
|
2020-06-25 03:41:04 +00:00
|
|
|
VALID_PROJECT_NAME = re.compile(r'^[a-zA-Z0-9_-]+$')
|
2020-07-06 07:52:24 +00:00
|
|
|
DEFAULT_BUILDS_PER_DAY = 1
|
|
|
|
MAX_BUILDS_PER_DAY = 4
|
2020-07-31 00:56:18 +00:00
|
|
|
COVERAGE_SCHEDULE = '0 6 * * *'
|
|
|
|
FUZZING_BUILD_TOPIC = 'request-build'
|
|
|
|
COVERAGE_BUILD_TOPIC = 'request-coverage-build'
|
2020-07-06 07:52:24 +00:00
|
|
|
|
2020-07-13 04:21:56 +00:00
|
|
|
ProjectMetadata = namedtuple(
|
|
|
|
'ProjectMetadata', 'schedule project_yaml_contents dockerfile_contents')
|
2020-07-06 07:52:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
class ProjectYamlError(Exception):
|
|
|
|
"""Error in project.yaml format."""
|
2020-06-25 03:41:04 +00:00
|
|
|
|
|
|
|
|
2020-07-31 00:56:18 +00:00
|
|
|
def create_scheduler(cloud_scheduler_client, project_name, schedule, tag,
|
|
|
|
topic):
|
2020-07-06 07:52:24 +00:00
|
|
|
"""Creates schedulers for new projects."""
|
|
|
|
project_id = os.environ.get('GCP_PROJECT')
|
|
|
|
location_id = os.environ.get('FUNCTION_REGION')
|
|
|
|
parent = cloud_scheduler_client.location_path(project_id, location_id)
|
|
|
|
job = {
|
2020-07-31 00:56:18 +00:00
|
|
|
'name': parent + '/jobs/' + project_name + '-scheduler-' + tag,
|
2020-07-06 07:52:24 +00:00
|
|
|
'pubsub_target': {
|
2020-07-31 00:56:18 +00:00
|
|
|
'topic_name': 'projects/' + project_id + '/topics/' + topic,
|
2020-07-06 07:52:24 +00:00
|
|
|
'data': project_name.encode()
|
|
|
|
},
|
|
|
|
'schedule': schedule
|
|
|
|
}
|
|
|
|
|
|
|
|
cloud_scheduler_client.create_job(parent, job)
|
|
|
|
|
|
|
|
|
|
|
|
def delete_scheduler(cloud_scheduler_client, project_name):
|
|
|
|
"""Deletes schedulers for projects that were removed."""
|
|
|
|
project_id = os.environ.get('GCP_PROJECT')
|
|
|
|
location_id = os.environ.get('FUNCTION_REGION')
|
|
|
|
name = cloud_scheduler_client.job_path(project_id, location_id,
|
|
|
|
project_name + '-scheduler')
|
|
|
|
cloud_scheduler_client.delete_job(name)
|
|
|
|
|
|
|
|
|
|
|
|
def update_scheduler(cloud_scheduler_client, project, schedule):
|
|
|
|
"""Updates schedule in case schedule was changed."""
|
|
|
|
project_id = os.environ.get('GCP_PROJECT')
|
|
|
|
location_id = os.environ.get('FUNCTION_REGION')
|
|
|
|
parent = cloud_scheduler_client.location_path(project_id, location_id)
|
|
|
|
job = {
|
|
|
|
'name': parent + '/jobs/' + project.name + '-scheduler',
|
|
|
|
'pubsub_target': {
|
|
|
|
'topic_name': 'projects/' + project_id + '/topics/request-build',
|
|
|
|
'data': project.name.encode()
|
|
|
|
},
|
|
|
|
'schedule': project.schedule
|
|
|
|
}
|
|
|
|
|
|
|
|
update_mask = {'schedule': schedule}
|
2020-08-11 06:25:08 +00:00
|
|
|
cloud_scheduler_client.update_job(job, update_mask)
|
2020-07-06 07:52:24 +00:00
|
|
|
|
|
|
|
|
2020-07-13 04:21:56 +00:00
|
|
|
# pylint: disable=too-many-branches
|
2020-07-06 07:52:24 +00:00
|
|
|
def sync_projects(cloud_scheduler_client, projects):
|
2020-06-25 03:41:04 +00:00
|
|
|
"""Sync projects with cloud datastore."""
|
2020-07-06 07:52:24 +00:00
|
|
|
for project in Project.query():
|
|
|
|
if project.name in projects:
|
|
|
|
continue
|
|
|
|
|
2020-07-27 03:19:44 +00:00
|
|
|
logging.info('Deleting project %s', project.name)
|
2020-07-06 07:52:24 +00:00
|
|
|
try:
|
|
|
|
delete_scheduler(cloud_scheduler_client, project.name)
|
|
|
|
project.key.delete()
|
|
|
|
except exceptions.GoogleAPICallError as error:
|
|
|
|
logging.error('Scheduler deletion for %s failed with %s', project.name,
|
|
|
|
error)
|
|
|
|
|
|
|
|
existing_projects = {project.name for project in Project.query()}
|
|
|
|
for project_name in projects:
|
|
|
|
if project_name in existing_projects:
|
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
|
|
|
create_scheduler(cloud_scheduler_client, project_name,
|
2020-07-31 00:56:18 +00:00
|
|
|
projects[project_name].schedule,
|
|
|
|
build_project.FUZZING_BUILD_TAG, FUZZING_BUILD_TOPIC)
|
|
|
|
create_scheduler(cloud_scheduler_client, project_name, COVERAGE_SCHEDULE,
|
|
|
|
build_and_run_coverage.COVERAGE_BUILD_TAG,
|
|
|
|
COVERAGE_BUILD_TOPIC)
|
2020-07-13 04:21:56 +00:00
|
|
|
project_metadata = projects[project_name]
|
|
|
|
Project(name=project_name,
|
|
|
|
schedule=project_metadata.schedule,
|
|
|
|
project_yaml_contents=project_metadata.project_yaml_contents,
|
|
|
|
dockerfile_contents=project_metadata.dockerfile_contents).put()
|
2020-07-06 07:52:24 +00:00
|
|
|
except exceptions.GoogleAPICallError as error:
|
|
|
|
logging.error('Scheduler creation for %s failed with %s', project_name,
|
|
|
|
error)
|
|
|
|
|
|
|
|
for project in Project.query():
|
2020-07-13 04:21:56 +00:00
|
|
|
if project.name not in projects:
|
2020-07-06 07:52:24 +00:00
|
|
|
continue
|
2020-07-27 03:19:44 +00:00
|
|
|
|
|
|
|
logging.info('Setting up project %s', project.name)
|
2020-07-13 04:21:56 +00:00
|
|
|
project_metadata = projects[project.name]
|
|
|
|
project_changed = False
|
|
|
|
if project.schedule != project_metadata.schedule:
|
|
|
|
try:
|
2020-07-27 03:19:44 +00:00
|
|
|
logging.info('Schedule changed.')
|
2020-07-13 04:21:56 +00:00
|
|
|
update_scheduler(cloud_scheduler_client, project,
|
|
|
|
projects[project.name].schedule)
|
|
|
|
project.schedule = project_metadata.schedule
|
|
|
|
project_changed = True
|
|
|
|
except exceptions.GoogleAPICallError as error:
|
|
|
|
logging.error('Updating scheduler for %s failed with %s', project.name,
|
|
|
|
error)
|
|
|
|
if project.project_yaml_contents != project_metadata.project_yaml_contents:
|
|
|
|
project.project_yaml_contents = project_metadata.project_yaml_contents
|
|
|
|
project_changed = True
|
|
|
|
|
|
|
|
if project.dockerfile_contents != project_metadata.dockerfile_contents:
|
|
|
|
project.dockerfile_contents = project_metadata.dockerfile_contents
|
|
|
|
project_changed = True
|
|
|
|
|
|
|
|
if project_changed:
|
2020-07-06 07:52:24 +00:00
|
|
|
project.put()
|
|
|
|
|
|
|
|
|
|
|
|
def _has_docker_file(project_contents):
|
|
|
|
"""Checks if project has a Dockerfile."""
|
|
|
|
return any(
|
|
|
|
content_file.name == 'Dockerfile' for content_file in project_contents)
|
2020-06-25 03:41:04 +00:00
|
|
|
|
|
|
|
|
2020-07-13 04:21:56 +00:00
|
|
|
def get_project_metadata(project_contents):
|
2020-07-06 07:52:24 +00:00
|
|
|
"""Checks for schedule parameter in yaml file else uses DEFAULT_SCHEDULE."""
|
|
|
|
for content_file in project_contents:
|
2020-07-13 04:21:56 +00:00
|
|
|
if content_file.name == 'project.yaml':
|
|
|
|
project_yaml_contents = content_file.decoded_content.decode('utf-8')
|
|
|
|
|
|
|
|
if content_file.name == 'Dockerfile':
|
|
|
|
dockerfile_contents = content_file.decoded_content.decode('utf-8')
|
2020-06-25 03:41:04 +00:00
|
|
|
|
2020-07-13 04:21:56 +00:00
|
|
|
project_yaml = yaml.safe_load(project_yaml_contents)
|
|
|
|
builds_per_day = project_yaml.get('builds_per_day', DEFAULT_BUILDS_PER_DAY)
|
|
|
|
if not isinstance(builds_per_day, int) or builds_per_day not in range(
|
|
|
|
1, MAX_BUILDS_PER_DAY + 1):
|
|
|
|
raise ProjectYamlError('Parameter is not an integer in range [1-4]')
|
2020-06-25 03:41:04 +00:00
|
|
|
|
2020-07-13 04:21:56 +00:00
|
|
|
# Starting at 6:00 am, next build schedules are added at 'interval' slots
|
|
|
|
# Example for interval 2, hours = [6, 18] and schedule = '0 6,18 * * *'
|
|
|
|
interval = 24 // builds_per_day
|
|
|
|
hours = []
|
|
|
|
for hour in range(6, 30, interval):
|
|
|
|
hours.append(hour % 24)
|
|
|
|
schedule = '0 ' + ','.join(str(hour) for hour in hours) + ' * * *'
|
2020-06-25 03:41:04 +00:00
|
|
|
|
2020-07-13 04:21:56 +00:00
|
|
|
return ProjectMetadata(schedule, project_yaml_contents, dockerfile_contents)
|
2020-06-25 03:41:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_projects(repo):
|
|
|
|
"""Get project list from git repository."""
|
2020-07-06 07:52:24 +00:00
|
|
|
projects = {}
|
2020-06-25 03:41:04 +00:00
|
|
|
contents = repo.get_contents('projects')
|
2020-07-06 07:52:24 +00:00
|
|
|
for content_file in contents:
|
|
|
|
if content_file.type != 'dir' or not VALID_PROJECT_NAME.match(
|
|
|
|
content_file.name):
|
|
|
|
continue
|
|
|
|
|
|
|
|
project_contents = repo.get_contents(content_file.path)
|
|
|
|
if not _has_docker_file(project_contents):
|
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
2020-07-13 04:21:56 +00:00
|
|
|
projects[content_file.name] = get_project_metadata(project_contents)
|
2020-07-06 07:52:24 +00:00
|
|
|
except ProjectYamlError as error:
|
|
|
|
logging.error(
|
|
|
|
'Incorrect format for project.yaml file of %s with error %s',
|
|
|
|
content_file.name, error)
|
|
|
|
|
2020-06-25 03:41:04 +00:00
|
|
|
return projects
|
|
|
|
|
|
|
|
|
2020-07-27 03:19:44 +00:00
|
|
|
def get_github_creds():
|
|
|
|
"""Retrieves GitHub client credentials."""
|
|
|
|
git_creds = GithubCreds.query().get()
|
|
|
|
if git_creds is None:
|
|
|
|
raise RuntimeError('Git credentials not available.')
|
|
|
|
return git_creds
|
2020-06-25 03:41:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
def sync(event, context):
|
|
|
|
"""Sync projects with cloud datastore."""
|
|
|
|
del event, context #unused
|
|
|
|
|
2020-07-16 01:11:18 +00:00
|
|
|
with ndb.Client().context():
|
2020-07-27 03:19:44 +00:00
|
|
|
git_creds = get_github_creds()
|
|
|
|
github_client = Github(git_creds.client_id, git_creds.client_secret)
|
2020-06-25 03:41:04 +00:00
|
|
|
repo = github_client.get_repo('google/oss-fuzz')
|
|
|
|
projects = get_projects(repo)
|
2020-07-06 07:52:24 +00:00
|
|
|
cloud_scheduler_client = scheduler_v1.CloudSchedulerClient()
|
|
|
|
sync_projects(cloud_scheduler_client, projects)
|