From e3d37db45eb7d2b518c71e61e223c877ec7ffd48 Mon Sep 17 00:00:00 2001 From: Rick Bartra Date: Sat, 20 Oct 2018 14:15:30 -0400 Subject: [PATCH] Allow the repository clone path to be specified in the CLI As it currently stands, Pegleg clones site repositories into the /tmp directory. Even if the site repository already exists in the /tmp directory it is still cloned there which results in wasted disk space. This commit allows users to pass in a `clone_path` (-p) option to Pegleg CLI commands that specify where to clone a site repository. If the clone path matches the path of an existing repository, then a error message is logged stating so. If the repository already exists in the clone path, the user can either specify to use that local repo by passing it as the site repository or they proceed by passing in a different clone path. This commit also updates the logic that deletes the copy of the repo that is created in the temporary folder to also delete the parent folder that contains the copied repo. This scenario happens when using a local repository as the site repository. Addionally, this commit adds a cleanup fixture that removes files and directories created in the temporary folder by the unit tests. Change-Id: I1b2943493b8f201f337ea60006c009973dd941b3 --- doc/source/cli/cli.rst | 28 +++++++++ pegleg/cli.py | 28 ++++++++- pegleg/config.py | 9 +++ pegleg/engine/repository.py | 11 +++- pegleg/engine/util/git.py | 27 +++++++-- tests/unit/conftest.py | 25 ++++++++ tests/unit/engine/util/test_git.py | 16 +---- tests/unit/fixtures.py | 11 ++++ tests/unit/test_cli.py | 93 +++++++++++++++++++++++++++++- 9 files changed, 220 insertions(+), 28 deletions(-) diff --git a/doc/source/cli/cli.rst b/doc/source/cli/cli.rst index 5b00d057..98f3283f 100644 --- a/doc/source/cli/cli.rst +++ b/doc/source/cli/cli.rst @@ -103,6 +103,20 @@ The revision can also be specified via (for example): -r /opt/aic-site-clcp-manifests@revision +**-p / --clone-path** (Optional). + +The path where the repo will be cloned. By default the repo will be cloned to +the /tmp path. If this option is included and the repo already exists, then the +repo will not be cloned again and the user must specify a new clone path or +pass in the local copy of the repository as the site repository. Suppose the +repo name is airship-treasuremap and the clone path is /tmp/mypath then the +following directory is created /tmp/mypath/airship-treasuremap which will +contain the contents of the repo. Example of using clone path: + +:: + + -p /tmp/mypath + .. _cli-repo-lint: Lint @@ -148,6 +162,20 @@ These should be named per the site-definition file, e.g.: -e global=/opt/global -e secrets=/opt/secrets +**-p / --clone-path** (Optional). + +The path where the repo will be cloned. By default the repo will be cloned to +the /tmp path. If this option is included and the repo already exists, then the +repo will not be cloned again and the user must specify a new clone path or +pass in the local copy of the repository as the site repository. Suppose the +repo name is airship-treasuremap and the clone path is /tmp/mypath then the +following directory is created /tmp/mypath/airship-treasuremap which will +contain the contents of the repo. Example of using clone path: + +:: + + -p /tmp/mypath + Repository Overrides ^^^^^^^^^^^^^^^^^^^^ diff --git a/pegleg/cli.py b/pegleg/cli.py index 9035794e..cfcc574e 100644 --- a/pegleg/cli.py +++ b/pegleg/cli.py @@ -65,6 +65,20 @@ REPOSITORY_USERNAME_OPTION = click.option( 'specified in the site-definition file. Any occurrences of REPO_USERNAME ' 'will be replaced with this value.') +REPOSITORY_CLONE_PATH_OPTION = click.option( + '-p', + '--clone-path', + 'clone_path', + help= + 'The path where the repo will be cloned. By default the repo will be ' + 'cloned to the /tmp path. If this option is included and the repo already ' + 'exists, then the repo will not be cloned again and the user must specify ' + 'a new clone path or pass in the local copy of the repository as the site ' + 'repository. Suppose the repo name is airship-treasuremap and the clone ' + 'path is /tmp/mypath then the following directory is created ' + '/tmp/mypath/airship-treasuremap which will contain the contents of the ' + 'repo') + ALLOW_MISSING_SUBSTITUTIONS_OPTION = click.option( '-f', '--fail-on-missing-sub-src', @@ -116,11 +130,12 @@ def main(*, verbose): @main.group(help='Commands related to repositories') @MAIN_REPOSITORY_OPTION +@REPOSITORY_CLONE_PATH_OPTION # TODO(felipemonteiro): Support EXTRA_REPOSITORY_OPTION as well to be # able to lint multiple repos together. @REPOSITORY_USERNAME_OPTION @REPOSITORY_KEY_OPTION -def repo(*, site_repository, repo_key, repo_username): +def repo(*, site_repository, clone_path, repo_key, repo_username): """Group for repo-level actions, which include: * lint: lint all sites across the repository @@ -128,6 +143,7 @@ def repo(*, site_repository, repo_key, repo_username): """ config.set_site_repo(site_repository) + config.set_clone_path(clone_path) config.set_repo_key(repo_key) config.set_repo_username(repo_username) @@ -168,10 +184,12 @@ def lint_repo(*, fail_on_missing_sub_src, exclude_lint, warn_lint): @main.group(help='Commands related to sites') @MAIN_REPOSITORY_OPTION +@REPOSITORY_CLONE_PATH_OPTION @EXTRA_REPOSITORY_OPTION @REPOSITORY_USERNAME_OPTION @REPOSITORY_KEY_OPTION -def site(*, site_repository, extra_repositories, repo_key, repo_username): +def site(*, site_repository, clone_path, extra_repositories, + repo_key, repo_username): """Group for site-level actions, which include: * list: list available sites in a manifests repo @@ -182,6 +200,7 @@ def site(*, site_repository, extra_repositories, repo_key, repo_username): """ config.set_site_repo(site_repository) + config.set_clone_path(clone_path) config.set_extra_repo_store(extra_repositories or []) config.set_repo_key(repo_key) config.set_repo_username(repo_username) @@ -301,16 +320,19 @@ def lint_site(*, fail_on_missing_sub_src, exclude_lint, warn_lint, site_name): @main.group(help='Commands related to types') @MAIN_REPOSITORY_OPTION +@REPOSITORY_CLONE_PATH_OPTION @EXTRA_REPOSITORY_OPTION @REPOSITORY_USERNAME_OPTION @REPOSITORY_KEY_OPTION -def type(*, site_repository, extra_repositories, repo_key, repo_username): +def type(*, site_repository, clone_path, extra_repositories, + repo_key, repo_username): """Group for repo-level actions, which include: * list: list all types across the repository """ config.set_site_repo(site_repository) + config.set_clone_path(clone_path) config.set_extra_repo_store(extra_repositories or []) config.set_repo_key(repo_key) config.set_repo_username(repo_username) diff --git a/pegleg/config.py b/pegleg/config.py index 4f099fde..ffc09c1c 100644 --- a/pegleg/config.py +++ b/pegleg/config.py @@ -19,6 +19,7 @@ except NameError: GLOBAL_CONTEXT = { 'site_repo': './', 'extra_repos': [], + 'clone_path': None, 'site_path': 'site', 'type_path': 'type' } @@ -32,6 +33,14 @@ def set_site_repo(r): GLOBAL_CONTEXT['site_repo'] = r.rstrip('/') + '/' +def get_clone_path(): + return GLOBAL_CONTEXT['clone_path'] + + +def set_clone_path(p): + GLOBAL_CONTEXT['clone_path'] = p + + def get_extra_repo_store(): return GLOBAL_CONTEXT.get('extra_repo_store', []) diff --git a/pegleg/engine/repository.py b/pegleg/engine/repository.py index b98d4706..fcea6047 100644 --- a/pegleg/engine/repository.py +++ b/pegleg/engine/repository.py @@ -152,7 +152,9 @@ def _process_repository(repo_url_or_path, repo_revision): if os.path.exists(repo_url_or_path): repo_name = util.git.repo_name(repo_url_or_path) - new_temp_path = os.path.join(tempfile.mkdtemp(), repo_name) + parent_temp_path = tempfile.mkdtemp() + __REPO_FOLDERS.setdefault(repo_name, parent_temp_path) + new_temp_path = os.path.join(parent_temp_path, repo_name) norm_path, sub_path = util.git.normalize_repo_path(repo_url_or_path) shutil.copytree(src=norm_path, dst=new_temp_path, symlinks=True) __REPO_FOLDERS.setdefault(repo_name, new_temp_path) @@ -169,7 +171,7 @@ def _process_site_repository(repo_url_or_path, repo_revision): Also validate that the provided ``repo_url_or_path`` is a valid Git repository. If ``repo_url_or_path`` doesn't already exist, clone it. - If it does, extra the appropriate revision and check it out. + If it does, extract the appropriate revision and check it out. :param repo_url_or_path: Repo path or URL and associated auth information. If URL, examples include: @@ -306,9 +308,12 @@ def _handle_repository(repo_url_or_path, *args, **kwargs): checkout specified reference . """ + # Retrieve the clone path where the repo will be cloned + clone_path = config.get_clone_path() try: - return util.git.git_handler(repo_url_or_path, *args, **kwargs) + return util.git.git_handler(repo_url_or_path, clone_path=clone_path, + *args, **kwargs) except exceptions.GitException as e: raise click.ClickException(e) except Exception as e: diff --git a/pegleg/engine/util/git.py b/pegleg/engine/util/git.py index d6d94fbf..e638d7c7 100644 --- a/pegleg/engine/util/git.py +++ b/pegleg/engine/util/git.py @@ -29,7 +29,8 @@ LOG = logging.getLogger(__name__) __all__ = ('git_handler', ) -def git_handler(repo_url, ref=None, proxy_server=None, auth_key=None): +def git_handler(repo_url, ref=None, proxy_server=None, + auth_key=None, clone_path=None): """Handle directories that are Git repositories. If ``repo_url`` is a valid URL for which a local repository doesn't @@ -50,6 +51,8 @@ def git_handler(repo_url, ref=None, proxy_server=None, auth_key=None): :param proxy_server: optional, HTTP proxy to use while cloning the repo. :param auth_key: If supplied results in using SSH to clone the repository with the specified key. If the value is None, SSH is not used. + :param clone_path: The path where the repo will be cloned. By default the + repo will be cloned to the /tmp path. :returns: Path to the cloned repo if a repo was cloned, else absolute path to ``repo_url``. :raises ValueError: If ``repo_url`` isn't a valid URL or doesn't begin @@ -72,7 +75,8 @@ def git_handler(repo_url, ref=None, proxy_server=None, auth_key=None): # we need to clone the repo_url first since it doesn't exist and then # checkout the appropriate reference - and return the tmpdir if parsed_url.scheme in supported_clone_protocols: - return _try_git_clone(repo_url, ref, proxy_server, auth_key) + return _try_git_clone(repo_url, ref, proxy_server, + auth_key, clone_path) else: raise ValueError('repo_url=%s must use one of the following ' 'protocols: %s' % @@ -138,7 +142,8 @@ def _get_current_ref(repo_url): return None -def _try_git_clone(repo_url, ref=None, proxy_server=None, auth_key=None): +def _try_git_clone(repo_url, ref=None, proxy_server=None, + auth_key=None, clone_path=None): """Try cloning Git repo from ``repo_url`` using the reference ``ref``. :param repo_url: URL of remote Git repo or path to local Git repo. @@ -146,6 +151,8 @@ def _try_git_clone(repo_url, ref=None, proxy_server=None, auth_key=None): :param proxy_server: optional, HTTP proxy to use while cloning the repo. :param auth_key: If supplied results in using SSH to clone the repository with the specified key. If the value is None, SSH is not used. + :param clone_path: The path where the repo will be cloned. By default the + repo will be cloned to the /tmp path. :returns: Path to the cloned repo. :rtype: str :raises GitException: If ``repo_url`` is invalid or could not be found. @@ -155,12 +162,20 @@ def _try_git_clone(repo_url, ref=None, proxy_server=None, auth_key=None): """ + if clone_path is None: + clone_path = tempfile.mkdtemp() # the name here is important as it bubbles back up to the output filename # and ensure we handle url/foo.git/ cases. prefix is 'tmp' by default. - root_temp_dir = tempfile.mkdtemp() repo_name = repo_url.rstrip('/').split('/')[-1] - temp_dir = os.path.join(root_temp_dir, repo_name) - os.makedirs(temp_dir) + temp_dir = os.path.join(clone_path, repo_name) + try: + os.makedirs(temp_dir) + except FileExistsError: + msg = "The repository already exists in the given path. Either " \ + "provide a new clone path or pass in the path of the local " \ + "repository as the site repository (-r)." + LOG.error(msg) + raise env_vars = _get_clone_env_vars(repo_url, ref, auth_key) ssh_cmd = env_vars.get('GIT_SSH_COMMAND') diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 8a43f2ae..a05f82b4 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -14,7 +14,10 @@ import copy +import os import pytest +import shutil +import tempfile from pegleg import config """Fixtures that are applied to all unit tests.""" @@ -30,3 +33,25 @@ def restore_config(): yield finally: config.GLOBAL_CONTEXT = original_global_context + + +@pytest.fixture(scope="class", autouse=True) +def clean_temporary_git_repos(): + """Iterates through all temporarily created directories and deletes each + one that was created for testing. + + """ + + def temporary_git_repos(): + root_tempdir = tempfile.gettempdir() + tempdirs = os.listdir(root_tempdir) + for tempdir in tempdirs: + path = os.path.join(root_tempdir, tempdir) + if os.path.isdir(path) and os.access(path, os.R_OK): + if any(p.startswith('airship') for p in os.listdir(path)): + yield path + try: + yield + finally: + for tempdir in temporary_git_repos(): + shutil.rmtree(tempdir, ignore_errors=True) diff --git a/tests/unit/engine/util/test_git.py b/tests/unit/engine/util/test_git.py index 72ff7e06..e732814f 100644 --- a/tests/unit/engine/util/test_git.py +++ b/tests/unit/engine/util/test_git.py @@ -26,20 +26,6 @@ from pegleg.engine.util import git from tests.unit import test_utils -@pytest.fixture(autouse=True) -def clean_git_repos(): - """Iterates through all temporarily created directories and deletes each - one that was created for testing. - - """ - - root_tempdir = tempfile.gettempdir() - for tempdir in os.listdir(root_tempdir): - path = os.path.join(root_tempdir, tempdir) - if git.is_repository(path): - shutil.rmtree(path, ignore_errors=True) - - def _validate_git_clone(repo_dir, fetched_ref=None, checked_out_ref=None): """Validate that git clone/checkout work. @@ -121,7 +107,7 @@ def test_git_clone_behind_proxy(mock_log): url = 'https://github.com/openstack/airship-armada' commit = 'cba78d1d03e4910f6ab1691bae633c5bddce893d' - for proxy_server in _PROXY_SERVERS.values(): + for proxy_server in test_utils._PROXY_SERVERS.values(): git_dir = git.git_handler(url, commit, proxy_server=proxy_server) _validate_git_clone(git_dir, commit) diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py index 5fec4ca4..e9b0c605 100644 --- a/tests/unit/fixtures.py +++ b/tests/unit/fixtures.py @@ -15,6 +15,7 @@ from __future__ import absolute_import import copy import os +import shutil import tempfile import pytest @@ -154,3 +155,13 @@ schema: pegleg/SiteDefinition/v1 files._create_tree(cicd_path, tree=test_structure) yield + + +@pytest.fixture() +def temp_clone_path(): + temp_folder = tempfile.mkdtemp() + try: + yield temp_folder + finally: + if os.path.exists(temp_folder): + shutil.rmtree(temp_folder, ignore_errors=True) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index b303b3b3..2bc37fc6 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -24,7 +24,7 @@ from pegleg import cli from pegleg.engine import errorcodes from pegleg.engine.util import git from tests.unit import test_utils - +from tests.unit.fixtures import temp_clone_path @pytest.mark.skipif( not test_utils.is_connected(), @@ -52,6 +52,97 @@ class BaseCLIActionTest(object): repo_url = "https://github.com/openstack/%s.git" % cls.repo_name cls.treasuremap_path = git.git_handler(repo_url, ref=cls.repo_rev) + +class TestSiteCLIOptions(BaseCLIActionTest): + """Tests site-level CLI options.""" + + ### clone_path tests ### + + def test_list_sites_using_remote_repo_and_clone_path_option( + self, + temp_clone_path): + """Validates clone_path (-p) option is working properly with site list + action when using remote repo. Verify that the repo was cloned in the + clone_path + """ + # Scenario: + # + # 1) List sites (should clone repo automatically to `clone_path` + # location if `clone_path` is set) + + repo_url = 'https://github.com/openstack/%s@%s' % (self.repo_name, + self.repo_rev) + + # Note that the -p option is used to specify the clone_folder + site_list = self.runner.invoke(cli.site, ['-p', temp_clone_path, + '-r', repo_url, 'list']) + + assert site_list.exit_code == 0 + # Verify that the repo was cloned into the clone_path + assert os.path.exists(os.path.join(temp_clone_path, + self.repo_name)) + assert git.is_repository(os.path.join(temp_clone_path, + self.repo_name)) + + def test_list_sites_using_local_repo_and_clone_path_option(self, + temp_clone_path): + """Validates clone_path (-p) option is working properly with site list + action when using a local repo. Verify that the clone_path has NO + effect when using a local repo + """ + # Scenario: + # + # 1) List sites (when using local repo there should be not cloning + # even if the clone_path is passed in) + + repo_path = self.treasuremap_path + + # Note that the -p option is used to specify the clone_folder + site_list = self.runner.invoke(cli.site, ['-p', temp_clone_path, + '-r', repo_path, 'list']) + + assert site_list.exit_code == 0 + # Verify that passing in clone_path when using local repo has no effect + assert not os.path.exists(os.path.join(temp_clone_path, self.repo_name)) + + +class TestSiteCLIOptionsNegative(BaseCLIActionTest): + """Negative Tests for site-level CLI options.""" + + ### Negative clone_path tests ### + + def test_list_sites_using_remote_repo_and_reuse_clone_path_option(self, + temp_clone_path): + """Validates clone_path (-p) option is working properly with site list + action when using remote repo. Verify that the same repo can't be + cloned in the same clone_path if it already exists + """ + # Scenario: + # + # 1) List sites (should clone repo automatically to `clone_path` + # location if `clone_path` is set) + + repo_url = 'https://github.com/openstack/%s@%s' % (self.repo_name, + self.repo_rev) + + # Note that the -p option is used to specify the clone_folder + site_list = self.runner.invoke(cli.site, ['-p', temp_clone_path, + '-r', repo_url, 'list']) + + assert git.is_repository(os.path.join(temp_clone_path, + self.repo_name)) + + # Run site list for a second time to validate that the repo can't be + # cloned twice in the same clone_path + site_list = self.runner.invoke(cli.site, ['-p', temp_clone_path, + '-r', repo_url, 'list']) + + assert site_list.exit_code == 1 + msg = "The repository already exists in the given path. Either " \ + "provide a new clone path or pass in the path of the local " \ + "repository as the site repository (-r)." + assert msg in site_list.output + @classmethod def teardown_class(cls): # Cleanup temporary Git repos.