2019-12-12 17:26:37 +00:00
|
|
|
# Copyright 2019 Google LLC
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
"""Class to manage a git repository via python.
|
|
|
|
|
|
|
|
This class is to be used to implement git commands over
|
|
|
|
a python API and manage the current state of the git repo.
|
|
|
|
|
|
|
|
Typical usage example:
|
|
|
|
|
|
|
|
r_man = RepoManager('https://github.com/google/oss-fuzz.git')
|
|
|
|
r_man.checkout('5668cc422c2c92d38a370545d3591039fb5bb8d4')
|
|
|
|
"""
|
2020-04-21 00:11:29 +00:00
|
|
|
import datetime
|
2020-02-28 16:30:42 +00:00
|
|
|
import logging
|
2019-12-12 17:26:37 +00:00
|
|
|
import os
|
|
|
|
import shutil
|
2020-01-06 20:17:26 +00:00
|
|
|
|
2020-01-31 18:19:12 +00:00
|
|
|
import utils
|
2019-12-12 17:26:37 +00:00
|
|
|
|
|
|
|
|
2020-12-07 18:50:11 +00:00
|
|
|
class RepoManager:
|
|
|
|
"""Repo manager."""
|
2019-12-12 17:26:37 +00:00
|
|
|
|
2020-04-21 00:11:29 +00:00
|
|
|
def __init__(self, repo_dir):
|
|
|
|
self.repo_dir = repo_dir
|
2019-12-12 17:26:37 +00:00
|
|
|
|
|
|
|
def _is_git_repo(self):
|
|
|
|
"""Test if the current repo dir is a git repo or not.
|
|
|
|
|
|
|
|
Returns:
|
2020-01-31 23:31:18 +00:00
|
|
|
True if the current repo_dir is a valid git repo.
|
2019-12-12 17:26:37 +00:00
|
|
|
"""
|
|
|
|
git_path = os.path.join(self.repo_dir, '.git')
|
|
|
|
return os.path.isdir(git_path)
|
|
|
|
|
2020-04-21 00:11:29 +00:00
|
|
|
def git(self, cmd, check_result=False):
|
|
|
|
"""Run a git command.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
command: The git command as a list to be run.
|
|
|
|
check_result: Should an exception be thrown on failed command.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
stdout, stderr, error code.
|
|
|
|
"""
|
|
|
|
return utils.execute(['git'] + cmd,
|
|
|
|
location=self.repo_dir,
|
|
|
|
check_result=check_result)
|
|
|
|
|
2019-12-12 17:26:37 +00:00
|
|
|
def commit_exists(self, commit):
|
|
|
|
"""Checks to see if a commit exists in the project repo.
|
|
|
|
|
|
|
|
Args:
|
2020-01-31 23:31:18 +00:00
|
|
|
commit: The commit SHA you are checking.
|
2019-12-12 17:26:37 +00:00
|
|
|
|
|
|
|
Returns:
|
2020-01-31 23:31:18 +00:00
|
|
|
True if the commit exits in the project.
|
2019-12-12 17:26:37 +00:00
|
|
|
"""
|
|
|
|
if not commit.rstrip():
|
2020-01-31 23:31:18 +00:00
|
|
|
return False
|
2019-12-12 17:26:37 +00:00
|
|
|
|
2020-04-21 00:11:29 +00:00
|
|
|
_, _, err_code = self.git(['cat-file', '-e', commit])
|
2019-12-12 17:26:37 +00:00
|
|
|
return not err_code
|
|
|
|
|
2020-04-21 00:11:29 +00:00
|
|
|
def commit_date(self, commit):
|
|
|
|
"""Get the date of a commit.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
commit: The commit hash.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A datetime representing the date of the commit.
|
|
|
|
"""
|
|
|
|
out, _, _ = self.git(['show', '-s', '--format=%ct', commit],
|
|
|
|
check_result=True)
|
2020-04-28 01:31:02 +00:00
|
|
|
return datetime.datetime.fromtimestamp(int(out), tz=datetime.timezone.utc)
|
2020-04-21 00:11:29 +00:00
|
|
|
|
2021-01-28 20:10:57 +00:00
|
|
|
def get_git_diff(self, base='origin...'):
|
2020-02-28 16:30:42 +00:00
|
|
|
"""Gets a list of files that have changed from the repo head.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A list of changed file paths or None on Error.
|
|
|
|
"""
|
|
|
|
self.fetch_unshallow()
|
2021-01-28 20:10:57 +00:00
|
|
|
# Add '--' so that git knows we aren't talking about files.
|
|
|
|
command = ['diff', '--name-only', base, '--']
|
|
|
|
out, err_msg, err_code = self.git(command)
|
2020-02-28 16:30:42 +00:00
|
|
|
if err_code:
|
|
|
|
logging.error('Git diff failed with error message %s.', err_msg)
|
|
|
|
return None
|
|
|
|
if not out:
|
|
|
|
logging.error('No diff was found.')
|
|
|
|
return None
|
|
|
|
return [line for line in out.splitlines() if line]
|
|
|
|
|
2019-12-12 17:26:37 +00:00
|
|
|
def get_current_commit(self):
|
|
|
|
"""Gets the current commit SHA of the repo.
|
|
|
|
|
|
|
|
Returns:
|
2020-01-31 23:31:18 +00:00
|
|
|
The current active commit SHA.
|
2019-12-12 17:26:37 +00:00
|
|
|
"""
|
2020-04-21 00:11:29 +00:00
|
|
|
out, _, _ = self.git(['rev-parse', 'HEAD'], check_result=True)
|
2020-07-09 10:04:13 +00:00
|
|
|
return out.strip()
|
|
|
|
|
|
|
|
def get_parent(self, commit, count):
|
|
|
|
"""Gets the count'th parent of the given commit.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The parent commit SHA.
|
|
|
|
"""
|
|
|
|
self.fetch_unshallow()
|
|
|
|
out, _, err_code = self.git(['rev-parse', commit + '~' + str(count)],
|
|
|
|
check_result=False)
|
|
|
|
if err_code:
|
|
|
|
return None
|
|
|
|
|
|
|
|
return out.strip()
|
2019-12-12 17:26:37 +00:00
|
|
|
|
2021-03-23 03:17:37 +00:00
|
|
|
def fetch_all_remotes(self):
|
|
|
|
"""Fetch all remotes for checkouts that track a single branch."""
|
|
|
|
self.git([
|
|
|
|
'config', 'remote.origin.fetch', '+refs/heads/*:refs/remotes/origin/*'
|
|
|
|
],
|
|
|
|
check_result=True)
|
|
|
|
self.git(['remote', 'update'], check_result=True)
|
|
|
|
|
2020-04-14 22:59:33 +00:00
|
|
|
def get_commit_list(self, newest_commit, oldest_commit=None):
|
2019-12-12 17:26:37 +00:00
|
|
|
"""Gets the list of commits(inclusive) between the old and new commits.
|
|
|
|
|
|
|
|
Args:
|
2020-04-14 22:59:33 +00:00
|
|
|
newest_commit: The newest commit to be in the list.
|
|
|
|
oldest_commit: The (optional) oldest commit to be in the list.
|
2019-12-12 17:26:37 +00:00
|
|
|
|
|
|
|
Returns:
|
2020-01-31 23:31:18 +00:00
|
|
|
The list of commit SHAs from newest to oldest.
|
2019-12-12 17:26:37 +00:00
|
|
|
|
|
|
|
Raises:
|
2020-04-14 22:59:33 +00:00
|
|
|
ValueError: When either the oldest or newest commit does not exist.
|
2020-01-31 23:31:18 +00:00
|
|
|
RuntimeError: When there is an error getting the commit list.
|
2019-12-12 17:26:37 +00:00
|
|
|
"""
|
2020-04-14 01:38:23 +00:00
|
|
|
self.fetch_unshallow()
|
2020-04-14 22:59:33 +00:00
|
|
|
if oldest_commit and not self.commit_exists(oldest_commit):
|
|
|
|
raise ValueError('The oldest commit %s does not exist' % oldest_commit)
|
|
|
|
if not self.commit_exists(newest_commit):
|
|
|
|
raise ValueError('The newest commit %s does not exist' % newest_commit)
|
|
|
|
if oldest_commit == newest_commit:
|
|
|
|
return [oldest_commit]
|
|
|
|
|
|
|
|
if oldest_commit:
|
|
|
|
commit_range = oldest_commit + '..' + newest_commit
|
|
|
|
else:
|
|
|
|
commit_range = newest_commit
|
|
|
|
|
2020-04-21 00:11:29 +00:00
|
|
|
out, _, err_code = self.git(['rev-list', commit_range])
|
2019-12-12 17:26:37 +00:00
|
|
|
commits = out.split('\n')
|
|
|
|
commits = [commit for commit in commits if commit]
|
|
|
|
if err_code or not commits:
|
2020-01-31 23:31:18 +00:00
|
|
|
raise RuntimeError('Error getting commit list between %s and %s ' %
|
2020-04-14 22:59:33 +00:00
|
|
|
(oldest_commit, newest_commit))
|
2019-12-12 17:26:37 +00:00
|
|
|
|
|
|
|
# Make sure result is inclusive
|
2020-04-14 22:59:33 +00:00
|
|
|
if oldest_commit:
|
|
|
|
commits.append(oldest_commit)
|
2019-12-12 17:26:37 +00:00
|
|
|
return commits
|
|
|
|
|
2021-01-28 20:10:57 +00:00
|
|
|
def fetch_branch(self, branch):
|
|
|
|
"""Fetches a remote branch from origin."""
|
|
|
|
return self.git(
|
|
|
|
['fetch', 'origin', '{branch}:{branch}'.format(branch=branch)])
|
|
|
|
|
2020-01-31 23:31:18 +00:00
|
|
|
def fetch_unshallow(self):
|
|
|
|
"""Gets the current git repository history."""
|
2020-04-14 01:38:23 +00:00
|
|
|
shallow_file = os.path.join(self.repo_dir, '.git', 'shallow')
|
|
|
|
if os.path.exists(shallow_file):
|
2021-01-14 03:16:20 +00:00
|
|
|
_, err, err_code = self.git(['fetch', '--unshallow'], check_result=False)
|
|
|
|
if err_code:
|
|
|
|
logging.error('Unshallow returned non-zero code: %s', err)
|
2020-01-31 23:31:18 +00:00
|
|
|
|
|
|
|
def checkout_pr(self, pr_ref):
|
|
|
|
"""Checks out a remote pull request.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
pr_ref: The pull request reference to be checked out.
|
|
|
|
"""
|
|
|
|
self.fetch_unshallow()
|
2020-04-21 00:11:29 +00:00
|
|
|
self.git(['fetch', 'origin', pr_ref], check_result=True)
|
|
|
|
self.git(['checkout', '-f', 'FETCH_HEAD'], check_result=True)
|
2020-01-31 23:31:18 +00:00
|
|
|
|
2020-04-15 06:17:13 +00:00
|
|
|
def checkout_commit(self, commit, clean=True):
|
2019-12-12 17:26:37 +00:00
|
|
|
"""Checks out a specific commit from the repo.
|
|
|
|
|
|
|
|
Args:
|
2020-01-31 23:31:18 +00:00
|
|
|
commit: The commit SHA to be checked out.
|
2019-12-12 17:26:37 +00:00
|
|
|
|
|
|
|
Raises:
|
2020-01-31 23:31:18 +00:00
|
|
|
RuntimeError: when checkout is not successful.
|
|
|
|
ValueError: when commit does not exist.
|
2019-12-12 17:26:37 +00:00
|
|
|
"""
|
2020-01-31 23:31:18 +00:00
|
|
|
self.fetch_unshallow()
|
2019-12-12 17:26:37 +00:00
|
|
|
if not self.commit_exists(commit):
|
2020-01-31 23:31:18 +00:00
|
|
|
raise ValueError('Commit %s does not exist in current branch' % commit)
|
2020-04-21 00:11:29 +00:00
|
|
|
self.git(['checkout', '-f', commit], check_result=True)
|
2020-04-15 06:17:13 +00:00
|
|
|
if clean:
|
2020-04-21 00:11:29 +00:00
|
|
|
self.git(['clean', '-fxd'], check_result=True)
|
2019-12-12 17:26:37 +00:00
|
|
|
if self.get_current_commit() != commit:
|
2020-01-31 23:31:18 +00:00
|
|
|
raise RuntimeError('Error checking out commit %s' % commit)
|
2019-12-12 17:26:37 +00:00
|
|
|
|
|
|
|
def remove_repo(self):
|
2020-12-07 18:50:11 +00:00
|
|
|
"""Removes the git repo from disk."""
|
2019-12-12 17:26:37 +00:00
|
|
|
if os.path.isdir(self.repo_dir):
|
|
|
|
shutil.rmtree(self.repo_dir)
|
2020-04-21 00:11:29 +00:00
|
|
|
|
|
|
|
|
2020-12-07 18:50:11 +00:00
|
|
|
def clone_repo_and_get_manager(repo_url, base_dir, repo_name=None):
|
|
|
|
"""Clones a repo and constructs a repo manager class.
|
2020-04-21 00:11:29 +00:00
|
|
|
|
|
|
|
Args:
|
|
|
|
repo_url: The github url needed to clone.
|
|
|
|
base_dir: The full file-path where the git repo is located.
|
|
|
|
repo_name: The name of the directory the repo is cloned to.
|
|
|
|
"""
|
2020-12-07 18:50:11 +00:00
|
|
|
if repo_name is None:
|
|
|
|
repo_name = os.path.basename(repo_url).replace('.git', '')
|
|
|
|
repo_dir = os.path.join(base_dir, repo_name)
|
|
|
|
manager = RepoManager(repo_dir)
|
2020-04-21 00:11:29 +00:00
|
|
|
|
2020-12-07 18:50:11 +00:00
|
|
|
if not os.path.exists(repo_dir):
|
|
|
|
_clone(repo_url, base_dir, repo_name)
|
2020-04-21 00:11:29 +00:00
|
|
|
|
2020-12-07 18:50:11 +00:00
|
|
|
return manager
|
2020-04-21 00:11:29 +00:00
|
|
|
|
2020-12-07 18:50:11 +00:00
|
|
|
|
|
|
|
def _clone(repo_url, base_dir, repo_name):
|
|
|
|
"""Creates a clone of the repo in the specified directory.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
ValueError: when the repo is not able to be cloned.
|
|
|
|
"""
|
|
|
|
utils.execute(['git', 'clone', repo_url, repo_name],
|
|
|
|
location=base_dir,
|
|
|
|
check_result=True)
|