diff --git a/armada/exceptions/armada_exceptions.py b/armada/exceptions/armada_exceptions.py index 052a66e6..4438e9c0 100644 --- a/armada/exceptions/armada_exceptions.py +++ b/armada/exceptions/armada_exceptions.py @@ -31,14 +31,15 @@ class ArmadaTimeoutException(ArmadaException): class ProtectedReleaseException(ArmadaException): ''' - Exception that occurs when Armada encounters a FAILED release that is - designated `protected` in the Chart and `continue_processing` is False. + Exception that occurs when Armada encounters a release with status other + than DEPLOYED that is designated `protected` in the Chart and + `continue_processing` is False. ''' - def __init__(self, reason): + def __init__(self, release, status): self._message = ( - 'Armada encountered protected release %s in FAILED status' % - reason) + 'Armada encountered protected release {} in {} status'.format( + release, status)) super(ProtectedReleaseException, self).__init__(self._message) @@ -88,13 +89,16 @@ class WaitException(ArmadaException): super(WaitException, self).__init__(message) -class UnexpectedReleaseStatusException(ArmadaException): +class DeploymentLikelyPendingException(ArmadaException): ''' - Exception that occurs when armada encounters an existing release for a - chart with an unexpected status which armada does not know what to do with. + Exception that occurs when it is detected that an existing release + operation (e.g. install, update, rollback, delete) is likely still pending. ''' - def __init__(self, release_name, status): - self._message = "Found release {} in unexpected status {}".format( - release_name, status) - super(UnexpectedReleaseStatusException, self).__init__(self._message) + def __init__(self, release, status, last_deployment_age, timeout): + self._message = ( + 'Existing deployment likely pending ' + 'release={}, status={}, ' + '(last deployment age={}s) < (chart wait timeout={}s)'.format( + release, status, last_deployment_age, timeout)) + super(DeploymentLikelyPendingException, self).__init__(self._message) diff --git a/armada/handlers/chart_deploy.py b/armada/handlers/chart_deploy.py index 203a1459..76a3b128 100644 --- a/armada/handlers/chart_deploy.py +++ b/armada/handlers/chart_deploy.py @@ -52,19 +52,12 @@ class ChartDeploy(object): result = {} - protected = chart.get('protected', {}) - p_continue = protected.get('continue_processing', False) - old_release = self.find_chart_release(known_releases, release_name) status = None if old_release: status = r.get_release_status(old_release) - if status not in [const.STATUS_FAILED, const.STATUS_DEPLOYED]: - raise armada_exceptions.UnexpectedReleaseStatusException( - release_name, status) - chart_wait = ChartWait( self.tiller.k8s, release_name, @@ -82,29 +75,6 @@ class ChartDeploy(object): chartbuilder = ChartBuilder(chart) new_chart = chartbuilder.get_helm_chart() - # Check for existing FAILED release, and purge - if status == const.STATUS_FAILED: - LOG.info('Purging FAILED release %s before deployment.', - release_name) - if protected: - if p_continue: - LOG.warn( - 'Release %s is `protected`, ' - 'continue_processing=True. Operator must ' - 'handle FAILED release manually.', release_name) - result['protected'] = release_name - return result - else: - LOG.error( - 'Release %s is `protected`, ' - 'continue_processing=False.', release_name) - raise armada_exceptions.ProtectedReleaseException( - release_name) - else: - # Purge the release - self.tiller.uninstall_release(release_name) - result['purge'] = release_name - # TODO(mark-burnett): It may be more robust to directly call # tiller status to decide whether to install/upgrade rather # than checking for list membership. @@ -181,6 +151,62 @@ class ChartDeploy(object): tiller_result.__dict__) result['upgrade'] = release_name else: + # Check for release with status other than DEPLOYED + if status: + if status != const.STATUS_FAILED: + LOG.warn( + 'Unexpected release status encountered ' + 'release=%s, status=%s', release_name, status) + + # Make best effort to determine whether a deployment is + # likely pending, by checking if the last deployment + # was started within the timeout window of the chart. + last_deployment_age = r.get_last_deployment_age( + old_release) + wait_timeout = chart_wait.get_timeout() + likely_pending = last_deployment_age <= wait_timeout + if likely_pending: + # Give up if a deployment is likely pending, we do not + # want to have multiple operations going on for the + # same release at the same time. + raise armada_exceptions.\ + DeploymentLikelyPendingException( + release_name, status, last_deployment_age, + wait_timeout) + else: + # Release is likely stuck in an unintended (by tiller) + # state. Log and continue on with remediation steps + # below. + LOG.info( + 'Old release %s likely stuck in status %s, ' + '(last deployment age=%ss) >= ' + '(chart wait timeout=%ss)', release, status, + last_deployment_age, wait_timeout) + + protected = chart.get('protected', {}) + if protected: + p_continue = protected.get('continue_processing', False) + if p_continue: + LOG.warn( + 'Release %s is `protected`, ' + 'continue_processing=True. Operator must ' + 'handle %s release manually.', release_name, + status) + result['protected'] = release_name + return result + else: + LOG.error( + 'Release %s is `protected`, ' + 'continue_processing=False.', release_name) + raise armada_exceptions.ProtectedReleaseException( + release_name, status) + else: + # Purge the release + LOG.info('Purging release %s with status %s', release_name, + status) + self.tiller.uninstall_release(release_name) + result['purge'] = release_name + timer = int(round(deadline - time.time())) LOG.info( "Installing release %s in namespace %s, wait=%s, " diff --git a/armada/utils/release.py b/armada/utils/release.py index dbd51d9b..55fdda75 100644 --- a/armada/utils/release.py +++ b/armada/utils/release.py @@ -14,6 +14,8 @@ from armada.handlers.test import Test +import time + def release_prefixer(prefix, release): ''' @@ -53,3 +55,17 @@ def get_last_test_result(release): if not status.HasField('last_test_suite_run'): return None return Test.get_test_suite_run_success(status.last_test_suite_run) + + +def get_last_deployment_age(release): + """ + :param release: protobuf release object + + :return: age in seconds of last deployment of release + """ + + last_deployed = release.info.last_deployed.seconds + now = int(time.time()) + last_deployment_age = now - last_deployed + + return last_deployment_age diff --git a/doc/source/operations/exceptions/armada-exceptions.inc b/doc/source/operations/exceptions/armada-exceptions.inc index a86679a3..87ca4989 100644 --- a/doc/source/operations/exceptions/armada-exceptions.inc +++ b/doc/source/operations/exceptions/armada-exceptions.inc @@ -47,7 +47,7 @@ Armada Exceptions :show-inheritance: :undoc-members: -.. autoexception:: armada.exceptions.armada_exceptions.UnexpectedReleaseStatusException +.. autoexception:: armada.exceptions.armada_exceptions.DeploymentLikelyPendingException :members: :show-inheritance: :undoc-members: