summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.openstack.org>2019-01-28 18:15:50 +0000
committerGerrit Code Review <review@openstack.org>2019-01-28 18:15:50 +0000
commit52f29ddf732f50fe457c518587c0fc115c8eb188 (patch)
tree538aec35d2a036245f07c5283a70277b835b9a8f
parentab3d68d2708fadeca89830777f6eb052226e8367 (diff)
parent2310ddbc2cd7b676ea171ecb88b317f44bc1d05b (diff)
Merge "Remediate releases stuck in non-DEPLOYED statuses"
-rw-r--r--armada/exceptions/armada_exceptions.py28
-rw-r--r--armada/handlers/chart_deploy.py86
-rw-r--r--armada/utils/release.py16
-rw-r--r--doc/source/operations/exceptions/armada-exceptions.inc2
4 files changed, 89 insertions, 43 deletions
diff --git a/armada/exceptions/armada_exceptions.py b/armada/exceptions/armada_exceptions.py
index 052a66e..4438e9c 100644
--- a/armada/exceptions/armada_exceptions.py
+++ b/armada/exceptions/armada_exceptions.py
@@ -31,14 +31,15 @@ class ArmadaTimeoutException(ArmadaException):
31 31
32class ProtectedReleaseException(ArmadaException): 32class ProtectedReleaseException(ArmadaException):
33 ''' 33 '''
34 Exception that occurs when Armada encounters a FAILED release that is 34 Exception that occurs when Armada encounters a release with status other
35 designated `protected` in the Chart and `continue_processing` is False. 35 than DEPLOYED that is designated `protected` in the Chart and
36 `continue_processing` is False.
36 ''' 37 '''
37 38
38 def __init__(self, reason): 39 def __init__(self, release, status):
39 self._message = ( 40 self._message = (
40 'Armada encountered protected release %s in FAILED status' % 41 'Armada encountered protected release {} in {} status'.format(
41 reason) 42 release, status))
42 super(ProtectedReleaseException, self).__init__(self._message) 43 super(ProtectedReleaseException, self).__init__(self._message)
43 44
44 45
@@ -88,13 +89,16 @@ class WaitException(ArmadaException):
88 super(WaitException, self).__init__(message) 89 super(WaitException, self).__init__(message)
89 90
90 91
91class UnexpectedReleaseStatusException(ArmadaException): 92class DeploymentLikelyPendingException(ArmadaException):
92 ''' 93 '''
93 Exception that occurs when armada encounters an existing release for a 94 Exception that occurs when it is detected that an existing release
94 chart with an unexpected status which armada does not know what to do with. 95 operation (e.g. install, update, rollback, delete) is likely still pending.
95 ''' 96 '''
96 97
97 def __init__(self, release_name, status): 98 def __init__(self, release, status, last_deployment_age, timeout):
98 self._message = "Found release {} in unexpected status {}".format( 99 self._message = (
99 release_name, status) 100 'Existing deployment likely pending '
100 super(UnexpectedReleaseStatusException, self).__init__(self._message) 101 'release={}, status={}, '
102 '(last deployment age={}s) < (chart wait timeout={}s)'.format(
103 release, status, last_deployment_age, timeout))
104 super(DeploymentLikelyPendingException, self).__init__(self._message)
diff --git a/armada/handlers/chart_deploy.py b/armada/handlers/chart_deploy.py
index 203a145..76a3b12 100644
--- a/armada/handlers/chart_deploy.py
+++ b/armada/handlers/chart_deploy.py
@@ -52,19 +52,12 @@ class ChartDeploy(object):
52 52
53 result = {} 53 result = {}
54 54
55 protected = chart.get('protected', {})
56 p_continue = protected.get('continue_processing', False)
57
58 old_release = self.find_chart_release(known_releases, release_name) 55 old_release = self.find_chart_release(known_releases, release_name)
59 56
60 status = None 57 status = None
61 if old_release: 58 if old_release:
62 status = r.get_release_status(old_release) 59 status = r.get_release_status(old_release)
63 60
64 if status not in [const.STATUS_FAILED, const.STATUS_DEPLOYED]:
65 raise armada_exceptions.UnexpectedReleaseStatusException(
66 release_name, status)
67
68 chart_wait = ChartWait( 61 chart_wait = ChartWait(
69 self.tiller.k8s, 62 self.tiller.k8s,
70 release_name, 63 release_name,
@@ -82,29 +75,6 @@ class ChartDeploy(object):
82 chartbuilder = ChartBuilder(chart) 75 chartbuilder = ChartBuilder(chart)
83 new_chart = chartbuilder.get_helm_chart() 76 new_chart = chartbuilder.get_helm_chart()
84 77
85 # Check for existing FAILED release, and purge
86 if status == const.STATUS_FAILED:
87 LOG.info('Purging FAILED release %s before deployment.',
88 release_name)
89 if protected:
90 if p_continue:
91 LOG.warn(
92 'Release %s is `protected`, '
93 'continue_processing=True. Operator must '
94 'handle FAILED release manually.', release_name)
95 result['protected'] = release_name
96 return result
97 else:
98 LOG.error(
99 'Release %s is `protected`, '
100 'continue_processing=False.', release_name)
101 raise armada_exceptions.ProtectedReleaseException(
102 release_name)
103 else:
104 # Purge the release
105 self.tiller.uninstall_release(release_name)
106 result['purge'] = release_name
107
108 # TODO(mark-burnett): It may be more robust to directly call 78 # TODO(mark-burnett): It may be more robust to directly call
109 # tiller status to decide whether to install/upgrade rather 79 # tiller status to decide whether to install/upgrade rather
110 # than checking for list membership. 80 # than checking for list membership.
@@ -181,6 +151,62 @@ class ChartDeploy(object):
181 tiller_result.__dict__) 151 tiller_result.__dict__)
182 result['upgrade'] = release_name 152 result['upgrade'] = release_name
183 else: 153 else:
154 # Check for release with status other than DEPLOYED
155 if status:
156 if status != const.STATUS_FAILED:
157 LOG.warn(
158 'Unexpected release status encountered '
159 'release=%s, status=%s', release_name, status)
160
161 # Make best effort to determine whether a deployment is
162 # likely pending, by checking if the last deployment
163 # was started within the timeout window of the chart.
164 last_deployment_age = r.get_last_deployment_age(
165 old_release)
166 wait_timeout = chart_wait.get_timeout()
167 likely_pending = last_deployment_age <= wait_timeout
168 if likely_pending:
169 # Give up if a deployment is likely pending, we do not
170 # want to have multiple operations going on for the
171 # same release at the same time.
172 raise armada_exceptions.\
173 DeploymentLikelyPendingException(
174 release_name, status, last_deployment_age,
175 wait_timeout)
176 else:
177 # Release is likely stuck in an unintended (by tiller)
178 # state. Log and continue on with remediation steps
179 # below.
180 LOG.info(
181 'Old release %s likely stuck in status %s, '
182 '(last deployment age=%ss) >= '
183 '(chart wait timeout=%ss)', release, status,
184 last_deployment_age, wait_timeout)
185
186 protected = chart.get('protected', {})
187 if protected:
188 p_continue = protected.get('continue_processing', False)
189 if p_continue:
190 LOG.warn(
191 'Release %s is `protected`, '
192 'continue_processing=True. Operator must '
193 'handle %s release manually.', release_name,
194 status)
195 result['protected'] = release_name
196 return result
197 else:
198 LOG.error(
199 'Release %s is `protected`, '
200 'continue_processing=False.', release_name)
201 raise armada_exceptions.ProtectedReleaseException(
202 release_name, status)
203 else:
204 # Purge the release
205 LOG.info('Purging release %s with status %s', release_name,
206 status)
207 self.tiller.uninstall_release(release_name)
208 result['purge'] = release_name
209
184 timer = int(round(deadline - time.time())) 210 timer = int(round(deadline - time.time()))
185 LOG.info( 211 LOG.info(
186 "Installing release %s in namespace %s, wait=%s, " 212 "Installing release %s in namespace %s, wait=%s, "
diff --git a/armada/utils/release.py b/armada/utils/release.py
index dbd51d9..55fdda7 100644
--- a/armada/utils/release.py
+++ b/armada/utils/release.py
@@ -14,6 +14,8 @@
14 14
15from armada.handlers.test import Test 15from armada.handlers.test import Test
16 16
17import time
18
17 19
18def release_prefixer(prefix, release): 20def release_prefixer(prefix, release):
19 ''' 21 '''
@@ -53,3 +55,17 @@ def get_last_test_result(release):
53 if not status.HasField('last_test_suite_run'): 55 if not status.HasField('last_test_suite_run'):
54 return None 56 return None
55 return Test.get_test_suite_run_success(status.last_test_suite_run) 57 return Test.get_test_suite_run_success(status.last_test_suite_run)
58
59
60def get_last_deployment_age(release):
61 """
62 :param release: protobuf release object
63
64 :return: age in seconds of last deployment of release
65 """
66
67 last_deployed = release.info.last_deployed.seconds
68 now = int(time.time())
69 last_deployment_age = now - last_deployed
70
71 return last_deployment_age
diff --git a/doc/source/operations/exceptions/armada-exceptions.inc b/doc/source/operations/exceptions/armada-exceptions.inc
index a86679a..87ca498 100644
--- a/doc/source/operations/exceptions/armada-exceptions.inc
+++ b/doc/source/operations/exceptions/armada-exceptions.inc
@@ -47,7 +47,7 @@ Armada Exceptions
47 :show-inheritance: 47 :show-inheritance:
48 :undoc-members: 48 :undoc-members:
49 49
50.. autoexception:: armada.exceptions.armada_exceptions.UnexpectedReleaseStatusException 50.. autoexception:: armada.exceptions.armada_exceptions.DeploymentLikelyPendingException
51 :members: 51 :members:
52 :show-inheritance: 52 :show-inheritance:
53 :undoc-members: 53 :undoc-members: