Merge "[chart_deploy] Handle unknow chart status in a more resilient manner"
This commit is contained in:
commit
55cf2bc92b
|
@ -185,68 +185,6 @@ class ChartDeploy(object):
|
||||||
|
|
||||||
deploy = upgrade
|
deploy = upgrade
|
||||||
else:
|
else:
|
||||||
# Check for release with status other than DEPLOYED
|
|
||||||
if status:
|
|
||||||
if status != const.STATUS_FAILED:
|
|
||||||
LOG.warn(
|
|
||||||
'Unexpected release status encountered '
|
|
||||||
'release=%s, status=%s', release_name, status)
|
|
||||||
|
|
||||||
# Make best effort to determine whether a deployment is
|
|
||||||
# likely pending, by checking if the last deployment
|
|
||||||
# was started within the timeout window of the chart.
|
|
||||||
last_deployment_age = r.get_last_deployment_age(
|
|
||||||
old_release)
|
|
||||||
likely_pending = last_deployment_age <= wait_timeout
|
|
||||||
if likely_pending:
|
|
||||||
# Give up if a deployment is likely pending, we do not
|
|
||||||
# want to have multiple operations going on for the
|
|
||||||
# same release at the same time.
|
|
||||||
raise armada_exceptions.\
|
|
||||||
DeploymentLikelyPendingException(
|
|
||||||
release_name, status, last_deployment_age,
|
|
||||||
wait_timeout)
|
|
||||||
else:
|
|
||||||
# Release is likely stuck in an unintended (by tiller)
|
|
||||||
# state. Log and continue on with remediation steps
|
|
||||||
# below.
|
|
||||||
LOG.info(
|
|
||||||
'Old release %s likely stuck in status %s, '
|
|
||||||
'(last deployment age=%ss) >= '
|
|
||||||
'(chart wait timeout=%ss)', release, status,
|
|
||||||
last_deployment_age, wait_timeout)
|
|
||||||
|
|
||||||
protected = chart.get('protected', {})
|
|
||||||
if protected:
|
|
||||||
p_continue = protected.get('continue_processing', False)
|
|
||||||
if p_continue:
|
|
||||||
LOG.warn(
|
|
||||||
'Release %s is `protected`, '
|
|
||||||
'continue_processing=True. Operator must '
|
|
||||||
'handle %s release manually.', release_name,
|
|
||||||
status)
|
|
||||||
result['protected'] = release_name
|
|
||||||
return result
|
|
||||||
else:
|
|
||||||
LOG.error(
|
|
||||||
'Release %s is `protected`, '
|
|
||||||
'continue_processing=False.', release_name)
|
|
||||||
raise armada_exceptions.ProtectedReleaseException(
|
|
||||||
release_name, status)
|
|
||||||
else:
|
|
||||||
# Purge the release
|
|
||||||
with metrics.CHART_DELETE.get_context(manifest_name,
|
|
||||||
chart_name):
|
|
||||||
|
|
||||||
LOG.info(
|
|
||||||
'Purging release %s with status %s', release_name,
|
|
||||||
status)
|
|
||||||
chart_delete = ChartDelete(
|
|
||||||
chart, release_name, self.tiller)
|
|
||||||
chart_delete.delete()
|
|
||||||
result['purge'] = release_name
|
|
||||||
|
|
||||||
action = metrics.ChartDeployAction.INSTALL
|
|
||||||
|
|
||||||
def install():
|
def install():
|
||||||
timer = int(round(deadline - time.time()))
|
timer = int(round(deadline - time.time()))
|
||||||
|
@ -267,6 +205,55 @@ class ChartDeploy(object):
|
||||||
tiller_result.__dict__)
|
tiller_result.__dict__)
|
||||||
result['install'] = release_name
|
result['install'] = release_name
|
||||||
|
|
||||||
|
# Check for release with status other than DEPLOYED
|
||||||
|
if status:
|
||||||
|
if status != const.STATUS_FAILED:
|
||||||
|
LOG.warn(
|
||||||
|
'Unexpected release status encountered '
|
||||||
|
'release=%s, status=%s', release_name, status)
|
||||||
|
|
||||||
|
# Make best effort to determine whether a deployment is
|
||||||
|
# likely pending, by checking if the last deployment
|
||||||
|
# was started within the timeout window of the chart.
|
||||||
|
last_deployment_age = r.get_last_deployment_age(
|
||||||
|
old_release)
|
||||||
|
likely_pending = last_deployment_age <= wait_timeout
|
||||||
|
if likely_pending:
|
||||||
|
# We don't take any deploy action and wait for the
|
||||||
|
# to get deployed.
|
||||||
|
deploy = noop
|
||||||
|
deadline = deadline - last_deployment_age
|
||||||
|
else:
|
||||||
|
# Release is likely stuck in an unintended (by tiller)
|
||||||
|
# state. Log and continue on with remediation steps
|
||||||
|
# below.
|
||||||
|
LOG.info(
|
||||||
|
'Old release %s likely stuck in status %s, '
|
||||||
|
'(last deployment age=%ss) >= '
|
||||||
|
'(chart wait timeout=%ss)', release, status,
|
||||||
|
last_deployment_age, wait_timeout)
|
||||||
|
res = self.purge_release(
|
||||||
|
chart, release_name, status, manifest_name,
|
||||||
|
chart_name, result)
|
||||||
|
if isinstance(res, dict):
|
||||||
|
if 'protected' in res:
|
||||||
|
return res
|
||||||
|
action = metrics.ChartDeployAction.INSTALL
|
||||||
|
deploy = install
|
||||||
|
else:
|
||||||
|
# The chart is in Failed state, hence we purge
|
||||||
|
# the chart and attempt to install it again.
|
||||||
|
res = self.purge_release(
|
||||||
|
chart, release_name, status, manifest_name, chart_name,
|
||||||
|
result)
|
||||||
|
if isinstance(res, dict):
|
||||||
|
if 'protected' in res:
|
||||||
|
return res
|
||||||
|
action = metrics.ChartDeployAction.INSTALL
|
||||||
|
deploy = install
|
||||||
|
|
||||||
|
if status is None:
|
||||||
|
action = metrics.ChartDeployAction.INSTALL
|
||||||
deploy = install
|
deploy = install
|
||||||
|
|
||||||
# Deploy
|
# Deploy
|
||||||
|
@ -298,6 +285,35 @@ class ChartDeploy(object):
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def purge_release(
|
||||||
|
self, chart, release_name, status, manifest_name, chart_name,
|
||||||
|
result):
|
||||||
|
protected = chart.get('protected', {})
|
||||||
|
if protected:
|
||||||
|
p_continue = protected.get('continue_processing', False)
|
||||||
|
if p_continue:
|
||||||
|
LOG.warn(
|
||||||
|
'Release %s is `protected`, '
|
||||||
|
'continue_processing=True. Operator must '
|
||||||
|
'handle %s release manually.', release_name, status)
|
||||||
|
result['protected'] = release_name
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
LOG.error(
|
||||||
|
'Release %s is `protected`, '
|
||||||
|
'continue_processing=False.', release_name)
|
||||||
|
raise armada_exceptions.ProtectedReleaseException(
|
||||||
|
release_name, status)
|
||||||
|
else:
|
||||||
|
# Purge the release
|
||||||
|
with metrics.CHART_DELETE.get_context(manifest_name, chart_name):
|
||||||
|
|
||||||
|
LOG.info(
|
||||||
|
'Purging release %s with status %s', release_name, status)
|
||||||
|
chart_delete = ChartDelete(chart, release_name, self.tiller)
|
||||||
|
chart_delete.delete()
|
||||||
|
result['purge'] = release_name
|
||||||
|
|
||||||
def _test_chart(self, release_name, test_handler):
|
def _test_chart(self, release_name, test_handler):
|
||||||
success = test_handler.test_release_for_success()
|
success = test_handler.test_release_for_success()
|
||||||
if not success:
|
if not success:
|
||||||
|
|
Loading…
Reference in New Issue