From 6b96bbf28dd867b764046e220f3a639734a5e167 Mon Sep 17 00:00:00 2001 From: Sean Eagan Date: Tue, 16 Oct 2018 15:42:51 -0500 Subject: [PATCH] Correctly identify latest release This fixes the following issues with listing releases from tiller, which could cause Armada to be confused about the state of the latest release, and do the wrong thing. - Was not filtering out old releases, so we could find both a FAILED and DEPLOYED release for the same chart. When this is the case it likely means the FAILED release is the latest, since otherwise armada would have purged the release (and all its history) upon seeing the FAILED release in a previous run. The issue is that after the purge it would try to upgrade rather than re-install, since it also sees the old DEPLOYED release. Also if a release gets manually fixed (DEPLOYED) outside of armada, armada still sees the old FAILED release, and will purge the fixed release. - Was only fetching DEPLOYED and FAILED releases from tiller, so if the latest release has another status Armada won't see it at all. This changes to: - Fetch releases with all statuses. - Filter out old releases. - Raise an error if latest release has status other than DEPLOYED or FAILED, since it's not clear what other action to take in this scenario. Change-Id: I84712c1486c19d2bba302bf3420df916265ba70c --- armada/const.py | 11 +++ armada/exceptions/armada_exceptions.py | 12 ++++ armada/handlers/armada.py | 26 +------ armada/handlers/chart_deploy.py | 70 +++++++++++-------- armada/handlers/tiller.py | 28 ++++++-- armada/tests/unit/handlers/test_armada.py | 52 ++++++++------ armada/tests/unit/handlers/test_tiller.py | 41 +++++++++-- armada/utils/release.py | 10 +++ .../exceptions/armada-exceptions.inc | 5 ++ 9 files changed, 170 insertions(+), 85 deletions(-) diff --git a/armada/const.py b/armada/const.py index 089509b6..e2538812 100644 --- a/armada/const.py +++ b/armada/const.py @@ -27,8 +27,19 @@ DEFAULT_CHART_TIMEOUT = 900 # Tiller DEFAULT_TILLER_TIMEOUT = 300 +STATUS_UNKNOWN = 'UNKNOWN' STATUS_DEPLOYED = 'DEPLOYED' +STATUS_DELETED = 'DELETED' +STATUS_DELETING = 'DELETING' STATUS_FAILED = 'FAILED' +STATUS_PENDING_INSTALL = 'PENDING_INSTALL' +STATUS_PENDING_UPGRADE = 'PENDING_UPGRADE' +STATUS_PENDING_ROLLBACK = 'PENDING_ROLLBACK' +STATUS_ALL = [ + STATUS_UNKNOWN, STATUS_DEPLOYED, STATUS_DELETED, STATUS_DELETING, + STATUS_FAILED, STATUS_PENDING_INSTALL, STATUS_PENDING_UPGRADE, + STATUS_PENDING_ROLLBACK +] # Kubernetes DEFAULT_K8S_TIMEOUT = 300 diff --git a/armada/exceptions/armada_exceptions.py b/armada/exceptions/armada_exceptions.py index 93410cad..052a66e6 100644 --- a/armada/exceptions/armada_exceptions.py +++ b/armada/exceptions/armada_exceptions.py @@ -86,3 +86,15 @@ class WaitException(ArmadaException): def __init__(self, message): self._message = message super(WaitException, self).__init__(message) + + +class UnexpectedReleaseStatusException(ArmadaException): + ''' + Exception that occurs when armada encounters an existing release for a + chart with an unexpected status which armada does not know what to do with. + ''' + + def __init__(self, release_name, status): + self._message = "Found release {} in unexpected status {}".format( + release_name, status) + super(UnexpectedReleaseStatusException, self).__init__(self._message) diff --git a/armada/handlers/armada.py b/armada/handlers/armada.py index 357f8902..fa6afcd2 100644 --- a/armada/handlers/armada.py +++ b/armada/handlers/armada.py @@ -177,26 +177,6 @@ class Armada(object): chart_name = chart.get('chart_name') raise source_exceptions.ChartSourceException(ct_type, chart_name) - def _get_releases_by_status(self): - ''' - Return a list of current releases with DEPLOYED or FAILED status - ''' - deployed_releases = [] - failed_releases = [] - known_releases = self.tiller.list_charts() - for release in known_releases: - if release[4] == const.STATUS_DEPLOYED: - deployed_releases.append(release) - elif release[4] == const.STATUS_FAILED: - failed_releases.append(release) - else: - # tiller.list_charts() only looks at DEPLOYED/FAILED so - # this should be unreachable - LOG.debug('Ignoring release %s in status %s.', release[0], - release[4]) - - return deployed_releases, failed_releases - def sync(self): ''' Synchronize Helm with the Armada Config(s) @@ -216,8 +196,7 @@ class Armada(object): # a more cleaner format self.pre_flight_ops() - # extract known charts on tiller right now - deployed_releases, failed_releases = self._get_releases_by_status() + known_releases = self.tiller.list_releases() manifest_data = self.manifest.get(const.KEYWORD_ARMADA, {}) prefix = manifest_data.get(const.KEYWORD_PREFIX) @@ -250,8 +229,7 @@ class Armada(object): set_current_chart(chart) try: return self.chart_deploy.execute(chart, cg_test_all_charts, - prefix, deployed_releases, - failed_releases) + prefix, known_releases) finally: set_current_chart(None) diff --git a/armada/handlers/chart_deploy.py b/armada/handlers/chart_deploy.py index 435a0e77..c8d20add 100644 --- a/armada/handlers/chart_deploy.py +++ b/armada/handlers/chart_deploy.py @@ -16,13 +16,14 @@ from oslo_log import log as logging import time import yaml +from armada import const from armada.exceptions import armada_exceptions from armada.handlers.chartbuilder import ChartBuilder from armada.handlers.test import test_release_for_success from armada.handlers.release_diff import ReleaseDiff from armada.handlers.wait import ChartWait from armada.exceptions import tiller_exceptions -from armada.utils.release import release_prefixer +from armada.utils.release import release_prefixer, get_release_status LOG = logging.getLogger(__name__) @@ -39,8 +40,7 @@ class ChartDeploy(object): self.timeout = timeout self.tiller = tiller - def execute(self, chart, cg_test_all_charts, prefix, deployed_releases, - failed_releases): + def execute(self, chart, cg_test_all_charts, prefix, known_releases): namespace = chart.get('namespace') release = chart.get('release') release_name = release_prefixer(prefix, release) @@ -55,8 +55,35 @@ class ChartDeploy(object): protected = chart.get('protected', {}) p_continue = protected.get('continue_processing', False) + old_release = self.find_chart_release(known_releases, release_name) + + status = None + if old_release: + status = get_release_status(old_release) + + if status not in [const.STATUS_FAILED, const.STATUS_DEPLOYED]: + raise armada_exceptions.UnexpectedReleaseStatusException( + release_name, status) + + chart_wait = ChartWait( + self.tiller.k8s, + release_name, + chart, + namespace, + k8s_wait_attempts=self.k8s_wait_attempts, + k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep, + timeout=self.timeout) + + native_wait_enabled = chart_wait.is_native_enabled() + + # Begin Chart timeout deadline + deadline = time.time() + chart_wait.get_timeout() + + chartbuilder = ChartBuilder(chart) + new_chart = chartbuilder.get_helm_chart() + # Check for existing FAILED release, and purge - if release_name in [rel[0] for rel in failed_releases]: + if status == const.STATUS_FAILED: LOG.info('Purging FAILED release %s before deployment.', release_name) if protected: @@ -78,35 +105,19 @@ class ChartDeploy(object): self.tiller.uninstall_release(release_name) result['purge'] = release_name - chart_wait = ChartWait( - self.tiller.k8s, - release_name, - chart, - namespace, - k8s_wait_attempts=self.k8s_wait_attempts, - k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep, - timeout=self.timeout) - - native_wait_enabled = chart_wait.is_native_enabled() - - # Begin Chart timeout deadline - deadline = time.time() + chart_wait.get_timeout() - - chartbuilder = ChartBuilder(chart) - new_chart = chartbuilder.get_helm_chart() - # TODO(mark-burnett): It may be more robust to directly call # tiller status to decide whether to install/upgrade rather # than checking for list membership. - if release_name in [rel[0] for rel in deployed_releases]: + if status == const.STATUS_DEPLOYED: # indicate to the end user what path we are taking LOG.info("Upgrading release %s in namespace %s", release_name, namespace) + # extract the installed chart and installed values from the # latest release so we can compare to the intended state - old_chart, old_values_string = self.find_release_chart( - deployed_releases, release_name) + old_chart = old_release.chart + old_values_string = old_release.config.raw upgrade = chart.get('upgrade', {}) disable_hooks = upgrade.get('no_hooks', False) @@ -236,10 +247,13 @@ class ChartDeploy(object): def get_diff(self, old_chart, old_values, new_chart, values): return ReleaseDiff(old_chart, old_values, new_chart, values).get_diff() - def find_release_chart(self, known_releases, release_name): + def find_chart_release(self, known_releases, release_name): ''' Find a release given a list of known_releases and a release name ''' - for release, _, chart, values, _ in known_releases: - if release == release_name: - return chart, values + for release in known_releases: + if release.name == release_name: + return release + LOG.info("known: %s, release_name: %s", + list(map(lambda r: r.name, known_releases)), release_name) + return None diff --git a/armada/handlers/tiller.py b/armada/handlers/tiller.py index 75db317d..338a387c 100644 --- a/armada/handlers/tiller.py +++ b/armada/handlers/tiller.py @@ -33,7 +33,7 @@ from armada import const from armada.exceptions import tiller_exceptions as ex from armada.handlers.k8s import K8s from armada.handlers import test -from armada.utils.release import label_selectors +from armada.utils.release import label_selectors, get_release_status TILLER_VERSION = b'2.10.0' GRPC_EPSILON = 60 @@ -202,7 +202,7 @@ class Tiller(object): req = ListReleasesRequest( offset=next_release_expected, limit=LIST_RELEASES_PAGE_SIZE, - status_codes=[const.STATUS_DEPLOYED, const.STATUS_FAILED]) + status_codes=const.STATUS_ALL) LOG.debug('Tiller ListReleases() with timeout=%s, request=%s', self.timeout, req) @@ -242,12 +242,32 @@ class Tiller(object): for index in range(LIST_RELEASES_ATTEMPTS): attempt = index + 1 try: - return get_results() + releases = get_results() except ex.TillerListReleasesPagingException: LOG.warning('List releases paging failed on attempt %s/%s', attempt, LIST_RELEASES_ATTEMPTS) if attempt == LIST_RELEASES_ATTEMPTS: raise + else: + # Filter out old releases, similar to helm cli: + # https://github.com/helm/helm/blob/1e26b5300b5166fabb90002535aacd2f9cc7d787/cmd/helm/list.go#L196 + latest_versions = {} + + for r in releases: + max = latest_versions.get(r.name) + if max is not None: + if max > r.version: + continue + latest_versions[r.name] = r.version + + latest_releases = [] + for r in releases: + if latest_versions[r.name] == r.version: + LOG.debug('Found release %s, version %s, status: %s', + r.name, r.version, get_release_status(r)) + latest_releases.append(r) + + return latest_releases def get_chart_templates(self, template_name, name, release_name, namespace, chart, disable_hooks, values): @@ -328,8 +348,6 @@ class Tiller(object): latest_release.info.status.Code.Name( latest_release.info.status.code)) charts.append(release) - LOG.debug('Found release %s, version %s, status: %s', - release[0], release[1], release[4]) except (AttributeError, IndexError) as e: LOG.debug('%s while getting releases: %s, ex=%s', e.__class__.__name__, latest_release, e) diff --git a/armada/tests/unit/handlers/test_armada.py b/armada/tests/unit/handlers/test_armada.py index 4d593bf7..bfecb309 100644 --- a/armada/tests/unit/handlers/test_armada.py +++ b/armada/tests/unit/handlers/test_armada.py @@ -20,7 +20,7 @@ from armada.handlers import armada from armada.handlers import chart_deploy from armada.tests.unit import base from armada.tests.test_utils import AttrDict, makeMockThreadSafe -from armada.utils.release import release_prefixer +from armada.utils.release import release_prefixer, get_release_status from armada.exceptions import ManifestException from armada.exceptions.override_exceptions import InvalidOverrideValueException from armada.exceptions.validate_exceptions import InvalidManifestException @@ -354,7 +354,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): cg_test_all_charts = chart_group.get('test_charts', True) m_tiller = mock_tiller.return_value - m_tiller.list_charts.return_value = known_releases + m_tiller.list_releases.return_value = known_releases if test_failure_to_run: @@ -393,7 +393,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): 'enabled', True)) expected_apply = True - if release_name not in [x[0] for x in known_releases]: + if release_name not in [x.name for x in known_releases]: expected_install_release_calls.append( mock.call( mock_chartbuilder().get_helm_chart(), @@ -407,11 +407,11 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): else: target_release = None for known_release in known_releases: - if known_release[0] == release_name: + if known_release.name == release_name: target_release = known_release break if target_release: - status = target_release[4] + status = get_release_status(target_release) if status == const.STATUS_FAILED: protected = chart.get('protected', {}) if not protected: @@ -527,6 +527,19 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): c for c in yaml_documents if c['data'].get('chart_name') == name ][0] + def get_mock_release(self, name, status): + status_mock = mock.Mock() + status_mock.return_value = status + chart = self._get_chart_by_name(name) + mock_release = mock.Mock( + version=1, + chart=chart, + config=mock.Mock(raw="{}"), + info=mock.Mock( + status=mock.Mock(Code=mock.MagicMock(Name=status_mock)))) + mock_release.name = name + return mock_release + def test_armada_sync_with_no_deployed_releases(self): known_releases = [] self._test_sync(known_releases) @@ -534,50 +547,45 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): def test_armada_sync_with_one_deployed_release(self): c1 = 'armada-test_chart_1' - known_releases = [[c1, None, None, "{}", const.STATUS_DEPLOYED]] + known_releases = [self.get_mock_release(c1, const.STATUS_DEPLOYED)] self._test_sync(known_releases) def test_armada_sync_with_one_deployed_release_no_diff(self): c1 = 'armada-test_chart_1' - known_releases = [[c1, None, None, "{}", const.STATUS_DEPLOYED]] + known_releases = [self.get_mock_release(c1, const.STATUS_DEPLOYED)] self._test_sync(known_releases, diff=set()) def test_armada_sync_with_both_deployed_releases(self): c1 = 'armada-test_chart_1' c2 = 'armada-test_chart_2' - known_releases = [[c1, None, None, "{}", const.STATUS_DEPLOYED], - [c2, None, None, "{}", const.STATUS_DEPLOYED]] + known_releases = [ + self.get_mock_release(c1, const.STATUS_DEPLOYED), + self.get_mock_release(c2, const.STATUS_DEPLOYED) + ] self._test_sync(known_releases) def test_armada_sync_with_unprotected_releases(self): c1 = 'armada-test_chart_1' - known_releases = [[ - c1, None, - self._get_chart_by_name(c1), None, const.STATUS_FAILED - ]] + known_releases = [self.get_mock_release(c1, const.STATUS_FAILED)] self._test_sync(known_releases) def test_armada_sync_with_protected_releases_continue(self): c1 = 'armada-test_chart_1' c2 = 'armada-test_chart_2' - known_releases = [[ - c2, None, - self._get_chart_by_name(c2), None, const.STATUS_FAILED - ], [c1, None, - self._get_chart_by_name(c1), None, const.STATUS_FAILED]] + known_releases = [ + self.get_mock_release(c2, const.STATUS_FAILED), + self.get_mock_release(c1, const.STATUS_FAILED) + ] self._test_sync(known_releases) def test_armada_sync_with_protected_releases_halt(self): c3 = 'armada-test_chart_3' - known_releases = [[ - c3, None, - self._get_chart_by_name(c3), None, const.STATUS_FAILED - ]] + known_releases = [self.get_mock_release(c3, const.STATUS_FAILED)] def _test_method(): self._test_sync(known_releases) diff --git a/armada/tests/unit/handlers/test_tiller.py b/armada/tests/unit/handlers/test_tiller.py index 5671dcd1..a85d0e8e 100644 --- a/armada/tests/unit/handlers/test_tiller.py +++ b/armada/tests/unit/handlers/test_tiller.py @@ -201,9 +201,39 @@ class TillerTestCase(base.ArmadaTestCase): mock_list_releases_request.assert_called_once_with( offset="", limit=tiller.LIST_RELEASES_PAGE_SIZE, - status_codes=[ - tiller.const.STATUS_DEPLOYED, tiller.const.STATUS_FAILED - ]) + status_codes=tiller.const.STATUS_ALL) + + @mock.patch('armada.handlers.tiller.K8s') + @mock.patch('armada.handlers.tiller.grpc') + @mock.patch.object(tiller, 'ListReleasesRequest') + @mock.patch.object(tiller, 'ReleaseServiceStub') + def test_list_releases_returns_latest_only( + self, mock_stub, mock_list_releases_request, mock_grpc, _): + latest = mock.Mock(version=3) + releases = [mock.Mock(version=2), latest, mock.Mock(version=1)] + for r in releases: + r.name = 'test' + mock_stub.return_value.ListReleases.return_value = [ + mock.Mock( + next='', + count=len(releases), + total=len(releases), + releases=releases) + ] + + tiller_obj = tiller.Tiller('host', '8080', None) + self.assertEqual([latest], tiller_obj.list_releases()) + + mock_stub.assert_called_once_with(tiller_obj.channel) + mock_stub.return_value.ListReleases.assert_called_once_with( + mock_list_releases_request.return_value, + tiller_obj.timeout, + metadata=tiller_obj.metadata) + + mock_list_releases_request.assert_called_once_with( + offset="", + limit=tiller.LIST_RELEASES_PAGE_SIZE, + status_codes=tiller.const.STATUS_ALL) @mock.patch('armada.handlers.tiller.K8s') @mock.patch('armada.handlers.tiller.grpc') @@ -251,9 +281,8 @@ class TillerTestCase(base.ArmadaTestCase): offset='' if i == 0 else str(tiller.LIST_RELEASES_PAGE_SIZE * i), limit=tiller.LIST_RELEASES_PAGE_SIZE, - status_codes=[ - tiller.const.STATUS_DEPLOYED, tiller.const.STATUS_FAILED - ]) for i in range(page_count) + status_codes=tiller.const.STATUS_ALL) + for i in range(page_count) ] mock_list_releases_request.assert_has_calls(list_release_request_calls) diff --git a/armada/utils/release.py b/armada/utils/release.py index 27b74003..c40f5ad0 100644 --- a/armada/utils/release.py +++ b/armada/utils/release.py @@ -27,3 +27,13 @@ def label_selectors(labels): :return: string of k8s labels """ return ",".join(["%s=%s" % (k, v) for k, v in labels.items()]) + + +def get_release_status(release): + """ + :param release: protobuf release object + + :return: status name of release + """ + + return release.info.status.Code.Name(release.info.status.code) diff --git a/doc/source/operations/exceptions/armada-exceptions.inc b/doc/source/operations/exceptions/armada-exceptions.inc index 7515c44e..a86679a3 100644 --- a/doc/source/operations/exceptions/armada-exceptions.inc +++ b/doc/source/operations/exceptions/armada-exceptions.inc @@ -46,3 +46,8 @@ Armada Exceptions :members: :show-inheritance: :undoc-members: + +.. autoexception:: armada.exceptions.armada_exceptions.UnexpectedReleaseStatusException + :members: + :show-inheritance: + :undoc-members: