From 9413d10e086ecb776c511de80a1531fae2b5dd8f Mon Sep 17 00:00:00 2001 From: kabeer27 <32016558+kabeer27@users.noreply.github.com> Date: Mon, 13 Jul 2020 04:21:56 +0000 Subject: [PATCH] [Sync cloud function] Cache file contents (#4104) * Storing contents of project.yaml and dockerfile, to reduce api calls * Fixing comment indentation * Requested changes * Minor changes * Importing projectmetadata Co-authored-by: Kabeer Seth --- infra/build/functions/sync/main.py | 80 ++++++++++++++++--------- infra/build/functions/sync/main_test.py | 61 ++++++++++++------- 2 files changed, 92 insertions(+), 49 deletions(-) diff --git a/infra/build/functions/sync/main.py b/infra/build/functions/sync/main.py index 3fea68b09..5524bea4b 100644 --- a/infra/build/functions/sync/main.py +++ b/infra/build/functions/sync/main.py @@ -30,7 +30,8 @@ VALID_PROJECT_NAME = re.compile(r'^[a-zA-Z0-9_-]+$') DEFAULT_BUILDS_PER_DAY = 1 MAX_BUILDS_PER_DAY = 4 -ProjectMetadata = namedtuple('ProjectMetadata', 'schedule') +ProjectMetadata = namedtuple( + 'ProjectMetadata', 'schedule project_yaml_contents dockerfile_contents') class ProjectYamlError(Exception): @@ -42,6 +43,8 @@ class Project(ndb.Model): """Represents an integrated OSS-Fuzz project.""" name = ndb.StringProperty() schedule = ndb.StringProperty() + project_yaml_contents = ndb.TextProperty() + dockerfile_contents = ndb.TextProperty() # pylint: disable=too-few-public-methods @@ -94,6 +97,7 @@ def update_scheduler(cloud_scheduler_client, project, schedule): cloud_scheduler_client.update(job, update_mask) +# pylint: disable=too-many-branches def sync_projects(cloud_scheduler_client, projects): """Sync projects with cloud datastore.""" for project in Project.query(): @@ -115,23 +119,39 @@ def sync_projects(cloud_scheduler_client, projects): try: create_scheduler(cloud_scheduler_client, project_name, projects[project_name].schedule) - Project(name=project_name, schedule=projects[project_name].schedule).put() + project_metadata = projects[project_name] + Project(name=project_name, + schedule=project_metadata.schedule, + project_yaml_contents=project_metadata.project_yaml_contents, + dockerfile_contents=project_metadata.dockerfile_contents).put() except exceptions.GoogleAPICallError as error: logging.error('Scheduler creation for %s failed with %s', project_name, error) for project in Project.query(): - if project.name not in projects or project.schedule == projects[ - project.name]: + if project.name not in projects: continue - try: - update_scheduler(cloud_scheduler_client, project, - projects[project.name].schedule) - project.schedule = projects[project.name].schedule + project_metadata = projects[project.name] + project_changed = False + if project.schedule != project_metadata.schedule: + try: + update_scheduler(cloud_scheduler_client, project, + projects[project.name].schedule) + project.schedule = project_metadata.schedule + project_changed = True + except exceptions.GoogleAPICallError as error: + logging.error('Updating scheduler for %s failed with %s', project.name, + error) + if project.project_yaml_contents != project_metadata.project_yaml_contents: + project.project_yaml_contents = project_metadata.project_yaml_contents + project_changed = True + + if project.dockerfile_contents != project_metadata.dockerfile_contents: + project.dockerfile_contents = project_metadata.dockerfile_contents + project_changed = True + + if project_changed: project.put() - except exceptions.GoogleAPICallError as error: - logging.error('Updating scheduler for %s failed with %s', project.name, - error) def _has_docker_file(project_contents): @@ -140,27 +160,30 @@ def _has_docker_file(project_contents): content_file.name == 'Dockerfile' for content_file in project_contents) -def get_schedule(project_contents): +def get_project_metadata(project_contents): """Checks for schedule parameter in yaml file else uses DEFAULT_SCHEDULE.""" for content_file in project_contents: - if content_file.name != 'project.yaml': - continue - project_yaml = yaml.safe_load(content_file.decoded_content.decode('utf-8')) - builds_per_day = project_yaml.get('builds_per_day', DEFAULT_BUILDS_PER_DAY) - if not isinstance(builds_per_day, int) or builds_per_day not in range( - 1, MAX_BUILDS_PER_DAY + 1): - raise ProjectYamlError('Parameter is not an integer in range [1-4]') + if content_file.name == 'project.yaml': + project_yaml_contents = content_file.decoded_content.decode('utf-8') - # Starting at 6:00 am, next build schedules are added at 'interval' slots - # Example for interval 2, hours = [6, 18] and schedule = '0 6,18 * * *' + if content_file.name == 'Dockerfile': + dockerfile_contents = content_file.decoded_content.decode('utf-8') - interval = 24 // builds_per_day - hours = [] - for hour in range(6, 30, interval): - hours.append(hour % 24) - schedule = '0 ' + ','.join(str(hour) for hour in hours) + ' * * *' + project_yaml = yaml.safe_load(project_yaml_contents) + builds_per_day = project_yaml.get('builds_per_day', DEFAULT_BUILDS_PER_DAY) + if not isinstance(builds_per_day, int) or builds_per_day not in range( + 1, MAX_BUILDS_PER_DAY + 1): + raise ProjectYamlError('Parameter is not an integer in range [1-4]') - return schedule + # Starting at 6:00 am, next build schedules are added at 'interval' slots + # Example for interval 2, hours = [6, 18] and schedule = '0 6,18 * * *' + interval = 24 // builds_per_day + hours = [] + for hour in range(6, 30, interval): + hours.append(hour % 24) + schedule = '0 ' + ','.join(str(hour) for hour in hours) + ' * * *' + + return ProjectMetadata(schedule, project_yaml_contents, dockerfile_contents) def get_projects(repo): @@ -177,8 +200,7 @@ def get_projects(repo): continue try: - projects[content_file.name] = ProjectMetadata( - schedule=get_schedule(project_contents)) + projects[content_file.name] = get_project_metadata(project_contents) except ProjectYamlError as error: logging.error( 'Incorrect format for project.yaml file of %s with error %s', diff --git a/infra/build/functions/sync/main_test.py b/infra/build/functions/sync/main_test.py index b52a8a0c1..315f0e901 100644 --- a/infra/build/functions/sync/main_test.py +++ b/infra/build/functions/sync/main_test.py @@ -16,7 +16,6 @@ """Unit tests for Cloud Function sync, which syncs the list of github projects and uploads them to the Cloud Datastore.""" -from collections import namedtuple import os import subprocess import threading @@ -30,12 +29,12 @@ from main import get_access_token from main import get_projects from main import sync_projects from main import Project +from main import ProjectMetadata _EMULATOR_TIMEOUT = 20 _DATASTORE_READY_INDICATOR = b'is now running' _DATASTORE_EMULATOR_PORT = 8432 _TEST_PROJECT_ID = 'test-project' -ProjectMetadata = namedtuple('ProjectMetadata', 'schedule') def start_datastore_emulator(): @@ -173,12 +172,18 @@ class TestDataSync(unittest.TestCase): cloud_scheduler_client = CloudSchedulerClient() with client.context(): - Project(name='test1', schedule='0 8 * * *').put() - Project(name='test2', schedule='0 9 * * *').put() + Project(name='test1', + schedule='0 8 * * *', + project_yaml_contents='', + dockerfile_contents='').put() + Project(name='test2', + schedule='0 9 * * *', + project_yaml_contents='', + dockerfile_contents='').put() projects = { - 'test1': ProjectMetadata('0 8 * * *'), - 'test2': ProjectMetadata('0 7 * * *') + 'test1': ProjectMetadata('0 8 * * *', '', ''), + 'test2': ProjectMetadata('0 7 * * *', '', '') } sync_projects(cloud_scheduler_client, projects) @@ -194,11 +199,14 @@ class TestDataSync(unittest.TestCase): cloud_scheduler_client = CloudSchedulerClient() with client.context(): - Project(name='test1', schedule='0 8 * * *').put() + Project(name='test1', + schedule='0 8 * * *', + project_yaml_contents='', + dockerfile_contents='').put() projects = { - 'test1': ProjectMetadata('0 8 * * *'), - 'test2': ProjectMetadata('0 7 * * *') + 'test1': ProjectMetadata('0 8 * * *', '', ''), + 'test2': ProjectMetadata('0 7 * * *', '', '') } sync_projects(cloud_scheduler_client, projects) @@ -214,10 +222,16 @@ class TestDataSync(unittest.TestCase): cloud_scheduler_client = CloudSchedulerClient() with client.context(): - Project(name='test1', schedule='0 8 * * *').put() - Project(name='test2', schedule='0 9 * * *').put() + Project(name='test1', + schedule='0 8 * * *', + project_yaml_contents='', + dockerfile_contents='').put() + Project(name='test2', + schedule='0 9 * * *', + project_yaml_contents='', + dockerfile_contents='').put() - projects = {'test1': ProjectMetadata('0 8 * * *')} + projects = {'test1': ProjectMetadata('0 8 * * *', '', '')} sync_projects(cloud_scheduler_client, projects) projects_query = Project.query() @@ -243,8 +257,12 @@ class TestDataSync(unittest.TestCase): self.assertEqual( get_projects(repo), { - 'test0': ProjectMetadata('0 6,18 * * *'), - 'test1': ProjectMetadata('0 6,14,22 * * *') + 'test0': + ProjectMetadata('0 6,18 * * *', 'builds_per_day: 2', + 'name: test'), + 'test1': + ProjectMetadata('0 6,14,22 * * *', 'builds_per_day: 3', + 'name: test') }) def test_get_projects_no_docker_file(self): @@ -258,8 +276,9 @@ class TestDataSync(unittest.TestCase): Repository('test1', 'dir', 'projects/test1') ]) - self.assertEqual(get_projects(repo), - {'test0': ProjectMetadata('0 6 * * *')}) + self.assertEqual( + get_projects(repo), + {'test0': ProjectMetadata('0 6 * * *', 'name: test', 'name: test')}) def test_get_projects_invalid_project_name(self): """Testing get_projects() with invalid project name""" @@ -275,8 +294,9 @@ class TestDataSync(unittest.TestCase): ]) ]) - self.assertEqual(get_projects(repo), - {'test0': ProjectMetadata('0 6 * * *')}) + self.assertEqual( + get_projects(repo), + {'test0': ProjectMetadata('0 6 * * *', 'name: test', 'name: test')}) def test_get_projects_non_directory_type_project(self): """Testing get_projects() when a file in projects/ is not of type 'dir'.""" @@ -289,8 +309,9 @@ class TestDataSync(unittest.TestCase): Repository('test1', 'file', 'projects/test1') ]) - self.assertEqual(get_projects(repo), - {'test0': ProjectMetadata('0 6 * * *')}) + self.assertEqual( + get_projects(repo), + {'test0': ProjectMetadata('0 6 * * *', 'name: test', 'name: test')}) def test_invalid_yaml_format(self): """Testing invalid yaml schedule parameter argument."""