From c498bfee813d8660954006946b4778bfce509d25 Mon Sep 17 00:00:00 2001 From: Felipe Monteiro Date: Thu, 25 Oct 2018 13:42:48 -0400 Subject: [PATCH] fix: Parse revision out of SSH repo url This patch set adds additional logic to properly handle parsing out the revision from an SSH repo url. The issue was being masked by unit tests whose automated logic for calculating the expected revision mirrored the actual implementation. Thus, the unit tests have also been refactored to take in hardcoded expected values to ensure that the assertions are foolproof around validating expected revisions. Change-Id: I7aacb4792f6b2dfc08d3a7bb4c3f18bbcfc95b8a --- pegleg/cli.py | 11 +- pegleg/engine/repository.py | 47 +++++-- pegleg/engine/util/git.py | 32 +++-- tests/unit/conftest.py | 1 + tests/unit/engine/test_site_repository.py | 152 +++++++++++++++++----- tests/unit/test_cli.py | 40 +++--- 6 files changed, 192 insertions(+), 91 deletions(-) diff --git a/pegleg/cli.py b/pegleg/cli.py index cfcc574e..8c4d3b2e 100644 --- a/pegleg/cli.py +++ b/pegleg/cli.py @@ -69,8 +69,7 @@ REPOSITORY_CLONE_PATH_OPTION = click.option( '-p', '--clone-path', 'clone_path', - help= - 'The path where the repo will be cloned. By default the repo will be ' + help='The path where the repo will be cloned. By default the repo will be ' 'cloned to the /tmp path. If this option is included and the repo already ' 'exists, then the repo will not be cloned again and the user must specify ' 'a new clone path or pass in the local copy of the repository as the site ' @@ -188,8 +187,8 @@ def lint_repo(*, fail_on_missing_sub_src, exclude_lint, warn_lint): @EXTRA_REPOSITORY_OPTION @REPOSITORY_USERNAME_OPTION @REPOSITORY_KEY_OPTION -def site(*, site_repository, clone_path, extra_repositories, - repo_key, repo_username): +def site(*, site_repository, clone_path, extra_repositories, repo_key, + repo_username): """Group for site-level actions, which include: * list: list available sites in a manifests repo @@ -324,8 +323,8 @@ def lint_site(*, fail_on_missing_sub_src, exclude_lint, warn_lint, site_name): @EXTRA_REPOSITORY_OPTION @REPOSITORY_USERNAME_OPTION @REPOSITORY_KEY_OPTION -def type(*, site_repository, clone_path, extra_repositories, - repo_key, repo_username): +def type(*, site_repository, clone_path, extra_repositories, repo_key, + repo_username): """Group for repo-level actions, which include: * list: list all types across the repository diff --git a/pegleg/engine/repository.py b/pegleg/engine/repository.py index 00c09c49..c65e1b11 100644 --- a/pegleg/engine/repository.py +++ b/pegleg/engine/repository.py @@ -15,6 +15,7 @@ import atexit import logging import os +import re import shutil import tempfile @@ -87,15 +88,7 @@ def process_repositories(site_name): repo_url_or_path = site_def_repos[repo_alias]['url'] repo_revision = site_def_repos[repo_alias]['revision'] - # If a repo user is provided, do the necessary replacements. - if repo_user: - if "REPO_USERNAME" not in repo_url_or_path: - LOG.warning( - "A repository username was specified but no REPO_USERNAME " - "string found in repository url %s", repo_url_or_path) - else: - repo_url_or_path = repo_url_or_path.replace( - 'REPO_USERNAME', repo_user) + repo_url_or_path = _format_url_with_repo_username(repo_url_or_path) LOG.info("Processing repository %s with url=%s, repo_key=%s, " "repo_username=%s, revision=%s", repo_alias, repo_url_or_path, @@ -129,6 +122,7 @@ def process_site_repository(update_config=False): repo_url_or_path, repo_revision = _extract_repo_url_and_revision( site_repo_or_path) + repo_url_or_path = _format_url_with_repo_username(repo_url_or_path) new_repo_path = _process_repository(repo_url_or_path, repo_revision) if update_config: @@ -284,16 +278,27 @@ def _extract_repo_url_and_revision(repo_url_or_path): """ + ssh_username_pattern = re.compile('ssh:\/\/.+@.+\/.+') + + def has_revision(repo_url_or_path): + if repo_url_or_path.lower().startswith('ssh'): + if ssh_username_pattern.match(repo_url_or_path): + return repo_url_or_path.count('@') == 2 + return repo_url_or_path.count('@') == 1 + else: + return '@' in repo_url_or_path + # they've forced a revision using @revision - careful not to confuse # this with auth revision = None try: - if '@' in repo_url_or_path: - # extract revision from repo URL or path + if has_revision(repo_url_or_path): repo_url_or_path, revision = repo_url_or_path.rsplit('@', 1) - revision = revision[:-1] if revision.endswith('/') else revision + revision = revision.rstrip('/') if revision.endswith(".git"): revision = revision[:-4] + if repo_url_or_path.endswith(".git"): + repo_url_or_path = repo_url_or_path[:-4] else: repo_url_or_path = repo_url_or_path except Exception: @@ -303,6 +308,20 @@ def _extract_repo_url_and_revision(repo_url_or_path): return repo_url_or_path, revision +def _format_url_with_repo_username(repo_url_or_path): + # If a repo user is provided, do the necessary replacements. + repo_user = config.get_repo_username() + if repo_user: + if "REPO_USERNAME" not in repo_url_or_path: + LOG.warning( + "A repository username was specified but no REPO_USERNAME " + "string found in repository url %s", repo_url_or_path) + else: + repo_url_or_path = repo_url_or_path.replace( + 'REPO_USERNAME', repo_user) + return repo_url_or_path + + def _handle_repository(repo_url_or_path, *args, **kwargs): """Clone remote remote (if ``repo_url_or_path`` is a remote URL) and checkout specified reference . @@ -312,8 +331,8 @@ def _handle_repository(repo_url_or_path, *args, **kwargs): clone_path = config.get_clone_path() try: - return util.git.git_handler(repo_url_or_path, clone_path=clone_path, - *args, **kwargs) + return util.git.git_handler( + repo_url_or_path, clone_path=clone_path, *args, **kwargs) except exceptions.GitException as e: raise click.ClickException(e) except Exception as e: diff --git a/pegleg/engine/util/git.py b/pegleg/engine/util/git.py index e638d7c7..f6b0d26b 100644 --- a/pegleg/engine/util/git.py +++ b/pegleg/engine/util/git.py @@ -22,6 +22,7 @@ from git import exc as git_exc from git import Git from git import Repo +from pegleg import config from pegleg.engine import exceptions LOG = logging.getLogger(__name__) @@ -29,8 +30,11 @@ LOG = logging.getLogger(__name__) __all__ = ('git_handler', ) -def git_handler(repo_url, ref=None, proxy_server=None, - auth_key=None, clone_path=None): +def git_handler(repo_url, + ref=None, + proxy_server=None, + auth_key=None, + clone_path=None): """Handle directories that are Git repositories. If ``repo_url`` is a valid URL for which a local repository doesn't @@ -75,8 +79,8 @@ def git_handler(repo_url, ref=None, proxy_server=None, # we need to clone the repo_url first since it doesn't exist and then # checkout the appropriate reference - and return the tmpdir if parsed_url.scheme in supported_clone_protocols: - return _try_git_clone(repo_url, ref, proxy_server, - auth_key, clone_path) + return _try_git_clone(repo_url, ref, proxy_server, auth_key, + clone_path) else: raise ValueError('repo_url=%s must use one of the following ' 'protocols: %s' % @@ -142,8 +146,11 @@ def _get_current_ref(repo_url): return None -def _try_git_clone(repo_url, ref=None, proxy_server=None, - auth_key=None, clone_path=None): +def _try_git_clone(repo_url, + ref=None, + proxy_server=None, + auth_key=None, + clone_path=None): """Try cloning Git repo from ``repo_url`` using the reference ``ref``. :param repo_url: URL of remote Git repo or path to local Git repo. @@ -177,7 +184,7 @@ def _try_git_clone(repo_url, ref=None, proxy_server=None, LOG.error(msg) raise - env_vars = _get_clone_env_vars(repo_url, ref, auth_key) + env_vars = _get_remote_env_vars(auth_key) ssh_cmd = env_vars.get('GIT_SSH_COMMAND') try: @@ -213,11 +220,9 @@ def _try_git_clone(repo_url, ref=None, proxy_server=None, return temp_dir -def _get_clone_env_vars(repo_url, ref, auth_key): +def _get_remote_env_vars(auth_key=None): """Generate environment variables include SSH command for Git clone. - :param repo_url: URL of remote Git repo or path to local Git repo. - :param ref: branch, commit or reference in the repo to clone. :param auth_key: If supplied results in using SSH to clone the repository with the specified key. If the value is None, SSH is not used. :returns: Dictionary of key-value pairs for Git clone. @@ -226,13 +231,11 @@ def _get_clone_env_vars(repo_url, ref, auth_key): could not be found and ``auth_method`` is "SSH". """ - ssh_cmd = None + auth_key = auth_key or config.get_repo_key() env_vars = {'GIT_TERMINAL_PROMPT': '0'} if auth_key: if os.path.exists(auth_key): - LOG.debug('Attempting to clone the repo at %s using reference %s ' - 'with SSH authentication.', repo_url, ref) # Ensure that host checking is ignored, to avoid unnecessary # required CLI input. ssh_cmd = ( @@ -326,6 +329,7 @@ def _create_local_ref(g, branches, ref, newref, reftype=None): branches.append(newref) +# TODO(felipemonteiro): Memoize this using beaker. def is_repository(repo_url_or_path, *args, **kwargs): """Checks whether the directory ``repo_url_or_path`` is a Git repository. @@ -344,7 +348,7 @@ def is_repository(repo_url_or_path, *args, **kwargs): else: try: g = Git() - g.ls_remote(repo_url_or_path) + g.ls_remote(repo_url_or_path, env=_get_remote_env_vars()) return True except git_exc.CommandError: return False diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index a05f82b4..fc0b171e 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -50,6 +50,7 @@ def clean_temporary_git_repos(): if os.path.isdir(path) and os.access(path, os.R_OK): if any(p.startswith('airship') for p in os.listdir(path)): yield path + try: yield finally: diff --git a/tests/unit/engine/test_site_repository.py b/tests/unit/engine/test_site_repository.py index ebfe3c6c..7fb48ece 100644 --- a/tests/unit/engine/test_site_repository.py +++ b/tests/unit/engine/test_site_repository.py @@ -86,7 +86,10 @@ def _test_process_repositories_inner(site_name="test_site", def _test_process_repositories(site_repo=None, repo_username=None, - repo_overrides=None): + repo_overrides=None, + expected_repo_url=None, + expected_repo_revision=None, + expected_repo_overrides=None): """Validate :func:`repository.process_repositories`. :param site_repo: Primary site repository. @@ -117,14 +120,11 @@ def _test_process_repositories(site_repo=None, if site_repo: # Validate that the primary site repository is cloned, in addition # to the extra repositories. - repo_revision = None - repo_url = site_repo.rsplit('@', 1) - if len(repo_url) == 1: # Case: local repo path. - repo_url = repo_url[0] - elif len(repo_url) == 2: # Case: remote repo URL. - repo_url, repo_revision = repo_url mock_calls = [ - mock.call(repo_url, ref=repo_revision, auth_key=None) + mock.call( + expected_repo_url, + ref=expected_repo_revision, + auth_key=None) ] mock_calls.extend([ mock.call(r['url'], ref=r['revision'], auth_key=None) @@ -148,15 +148,12 @@ def _test_process_repositories(site_repo=None, assert (expected_call_count == m_clone_repo.call_count) for x, r in TEST_REPOSITORIES['repositories'].items(): - if x in repo_overrides: - ref = None - repo_url = repo_overrides[x].rsplit('@', 1) - if len(repo_url) == 1: # Case: local repo path. - repo_url = repo_url[0] - elif len(repo_url) == 2: # Case: remote repo URL. - repo_url, ref = repo_url - repo_url = repo_url.split('=')[-1] + if x in expected_repo_overrides: + repo_url = expected_repo_overrides[x]['url'] + ref = expected_repo_overrides[x]['revision'] else: + # Handles default values in TEST_REPOSITORIES -- which + # represents defaults in site-definition.yaml. repo_url = r['url'] ref = r['revision'] m_clone_repo.assert_any_call(repo_url, ref=ref, auth_key=None) @@ -197,19 +194,36 @@ def test_process_repositories(): def test_process_repositories_with_site_repo_url(): """Test process_repository when site repo is a remote URL.""" + # With REPO_USERNAME. site_repo = ( 'ssh://REPO_USERNAME@gerrit:29418/aic-clcp-site-manifests.git@333') - _test_process_repositories(site_repo=site_repo) + _test_process_repositories( + site_repo=site_repo, + expected_repo_url=( + "ssh://REPO_USERNAME@gerrit:29418/aic-clcp-site-manifests"), + expected_repo_revision="333") + # Without REPO_USERNAME. + site_repo = 'ssh://gerrit:29418/aic-clcp-site-manifests.git@333' + _test_process_repositories( + site_repo=site_repo, + expected_repo_url="ssh://gerrit:29418/aic-clcp-site-manifests", + expected_repo_revision="333") def test_process_repositories_handles_local_site_repo_path(): site_repo = '/opt/aic-clcp-site-manifests' - _test_process_repositories(site_repo=site_repo) + _test_process_repositories( + site_repo=site_repo, + expected_repo_url='/opt/aic-clcp-site-manifests', + expected_repo_revision=None) def test_process_repositories_handles_local_site_repo_path_with_revision(): site_repo = '/opt/aic-clcp-site-manifests@333' - _test_process_repositories(site_repo=site_repo) + _test_process_repositories( + site_repo=site_repo, + expected_repo_url="/opt/aic-clcp-site-manifests", + expected_repo_revision="333") @mock.patch.object( @@ -240,33 +254,77 @@ def test_process_repositories_with_repo_overrides_remote_urls(): 'global': 'global=ssh://REPO_USERNAME@gerrit:29418/aic-clcp-manifests.git@12345' } - _test_process_repositories(repo_overrides=overrides) + expected_repo_overrides = { + 'global': { + 'url': 'ssh://REPO_USERNAME@gerrit:29418/aic-clcp-manifests', + 'revision': '12345' + }, + } + _test_process_repositories( + repo_overrides=overrides, + expected_repo_overrides=expected_repo_overrides) # Different URL, different revision (than TEST_REPOSITORIES). overrides = { 'global': 'global=https://gerrit/aic-clcp-manifests.git@12345' } - _test_process_repositories(repo_overrides=overrides) + expected_repo_overrides = { + 'global': { + 'url': 'https://gerrit/aic-clcp-manifests', + 'revision': '12345' + }, + } + _test_process_repositories( + repo_overrides=overrides, + expected_repo_overrides=expected_repo_overrides) def test_process_repositories_with_repo_overrides_local_paths(): # Local path without revision. overrides = {'global': 'global=/opt/aic-clcp-manifests'} - _test_process_repositories(repo_overrides=overrides) + expected_repo_overrides = { + 'global': { + 'url': '/opt/aic-clcp-manifests', + 'revision': None + }, + } + _test_process_repositories( + repo_overrides=overrides, + expected_repo_overrides=expected_repo_overrides) # Local path with revision. overrides = {'global': 'global=/opt/aic-clcp-manifests@12345'} - _test_process_repositories(repo_overrides=overrides) + expected_repo_overrides = { + 'global': { + 'url': '/opt/aic-clcp-manifests', + 'revision': '12345' + }, + } + _test_process_repositories( + repo_overrides=overrides, + expected_repo_overrides=expected_repo_overrides) def test_process_repositories_with_multiple_repo_overrides_remote_urls(): overrides = { 'global': - 'global=ssh://errit:29418/aic-clcp-manifests.git@12345', + 'global=ssh://gerrit:29418/aic-clcp-manifests.git@12345', 'secrets': 'secrets=ssh://gerrit:29418/aic-clcp-security-manifests.git@54321' } - _test_process_repositories(repo_overrides=overrides) + expected_repo_overrides = { + 'global': { + 'url': 'ssh://gerrit:29418/aic-clcp-manifests', + 'revision': '12345' + }, + 'secrets': { + 'url': 'ssh://gerrit:29418/aic-clcp-security-manifests', + 'revision': '54321' + }, + } + _test_process_repositories( + repo_overrides=overrides, + expected_repo_overrides=expected_repo_overrides) def test_process_repositories_with_multiple_repo_overrides_local_paths(): @@ -274,7 +332,19 @@ def test_process_repositories_with_multiple_repo_overrides_local_paths(): 'global': 'global=/opt/aic-clcp-manifests@12345', 'secrets': 'secrets=/opt/aic-clcp-security-manifests.git@54321' } - _test_process_repositories(repo_overrides=overrides) + expected_repo_overrides = { + 'global': { + 'url': '/opt/aic-clcp-manifests', + 'revision': '12345' + }, + 'secrets': { + 'url': '/opt/aic-clcp-security-manifests', + 'revision': '54321' + }, + } + _test_process_repositories( + repo_overrides=overrides, + expected_repo_overrides=expected_repo_overrides) @mock.patch.object( @@ -415,23 +485,33 @@ def test_process_extra_repositories_malformed_format_raises_exception( @mock.patch.object(util.git, 'is_repository', autospec=True, return_value=True) def test_process_site_repository(_): - def _do_test(site_repo): - expected = site_repo.rsplit('@', 1)[0] - + def _do_test(site_repo, expected): config.set_site_repo(site_repo) with mock.patch.object( repository, '_handle_repository', autospec=True, - return_value=expected): + side_effect=lambda x, *a, **k: x): result = repository.process_site_repository() assert os.path.normpath(expected) == os.path.normpath(result) # Ensure that the reference is always pruned. - _do_test('http://github.com/openstack/treasuremap@master') - _do_test('http://github.com/openstack/treasuremap') - _do_test('https://github.com/openstack/treasuremap@master') - _do_test('https://github.com/openstack/treasuremap') - _do_test('ssh://foo@github.com/openstack/treasuremap:12345@master') - _do_test('ssh://foo@github.com/openstack/treasuremap:12345') + _do_test( + 'http://github.com/openstack/treasuremap@master', + expected='http://github.com/openstack/treasuremap') + _do_test( + 'http://github.com/openstack/treasuremap', + expected='http://github.com/openstack/treasuremap') + _do_test( + 'https://github.com/openstack/treasuremap@master', + expected='https://github.com/openstack/treasuremap') + _do_test( + 'https://github.com/openstack/treasuremap', + expected='https://github.com/openstack/treasuremap') + _do_test( + 'ssh://foo@github.com/openstack/treasuremap:12345@master', + expected='ssh://foo@github.com/openstack/treasuremap:12345') + _do_test( + 'ssh://foo@github.com/openstack/treasuremap:12345', + expected='ssh://foo@github.com/openstack/treasuremap:12345') diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 2bc37fc6..ad4c34c8 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -26,6 +26,7 @@ from pegleg.engine.util import git from tests.unit import test_utils from tests.unit.fixtures import temp_clone_path + @pytest.mark.skipif( not test_utils.is_connected(), reason='git clone requires network connectivity.') @@ -59,8 +60,7 @@ class TestSiteCLIOptions(BaseCLIActionTest): ### clone_path tests ### def test_list_sites_using_remote_repo_and_clone_path_option( - self, - temp_clone_path): + self, temp_clone_path): """Validates clone_path (-p) option is working properly with site list action when using remote repo. Verify that the repo was cloned in the clone_path @@ -74,18 +74,16 @@ class TestSiteCLIOptions(BaseCLIActionTest): self.repo_rev) # Note that the -p option is used to specify the clone_folder - site_list = self.runner.invoke(cli.site, ['-p', temp_clone_path, - '-r', repo_url, 'list']) + site_list = self.runner.invoke( + cli.site, ['-p', temp_clone_path, '-r', repo_url, 'list']) assert site_list.exit_code == 0 # Verify that the repo was cloned into the clone_path - assert os.path.exists(os.path.join(temp_clone_path, - self.repo_name)) - assert git.is_repository(os.path.join(temp_clone_path, - self.repo_name)) + assert os.path.exists(os.path.join(temp_clone_path, self.repo_name)) + assert git.is_repository(os.path.join(temp_clone_path, self.repo_name)) - def test_list_sites_using_local_repo_and_clone_path_option(self, - temp_clone_path): + def test_list_sites_using_local_repo_and_clone_path_option( + self, temp_clone_path): """Validates clone_path (-p) option is working properly with site list action when using a local repo. Verify that the clone_path has NO effect when using a local repo @@ -98,12 +96,13 @@ class TestSiteCLIOptions(BaseCLIActionTest): repo_path = self.treasuremap_path # Note that the -p option is used to specify the clone_folder - site_list = self.runner.invoke(cli.site, ['-p', temp_clone_path, - '-r', repo_path, 'list']) + site_list = self.runner.invoke( + cli.site, ['-p', temp_clone_path, '-r', repo_path, 'list']) assert site_list.exit_code == 0 # Verify that passing in clone_path when using local repo has no effect - assert not os.path.exists(os.path.join(temp_clone_path, self.repo_name)) + assert not os.path.exists( + os.path.join(temp_clone_path, self.repo_name)) class TestSiteCLIOptionsNegative(BaseCLIActionTest): @@ -111,8 +110,8 @@ class TestSiteCLIOptionsNegative(BaseCLIActionTest): ### Negative clone_path tests ### - def test_list_sites_using_remote_repo_and_reuse_clone_path_option(self, - temp_clone_path): + def test_list_sites_using_remote_repo_and_reuse_clone_path_option( + self, temp_clone_path): """Validates clone_path (-p) option is working properly with site list action when using remote repo. Verify that the same repo can't be cloned in the same clone_path if it already exists @@ -126,16 +125,15 @@ class TestSiteCLIOptionsNegative(BaseCLIActionTest): self.repo_rev) # Note that the -p option is used to specify the clone_folder - site_list = self.runner.invoke(cli.site, ['-p', temp_clone_path, - '-r', repo_url, 'list']) + site_list = self.runner.invoke( + cli.site, ['-p', temp_clone_path, '-r', repo_url, 'list']) - assert git.is_repository(os.path.join(temp_clone_path, - self.repo_name)) + assert git.is_repository(os.path.join(temp_clone_path, self.repo_name)) # Run site list for a second time to validate that the repo can't be # cloned twice in the same clone_path - site_list = self.runner.invoke(cli.site, ['-p', temp_clone_path, - '-r', repo_url, 'list']) + site_list = self.runner.invoke( + cli.site, ['-p', temp_clone_path, '-r', repo_url, 'list']) assert site_list.exit_code == 1 msg = "The repository already exists in the given path. Either " \