Correctly identify latest release

This fixes the following issues with listing releases from tiller,
which could cause Armada to be confused about the state of the
latest release, and do the wrong thing.

- Was not filtering out old releases, so we could find both a
  FAILED and DEPLOYED release for the same chart. When this is the
  case it likely means the FAILED release is the latest, since
  otherwise armada would have purged the release (and all its
  history) upon seeing the FAILED release in a previous run.
  The issue is that after the purge it would try to upgrade
  rather than re-install, since it also sees the old DEPLOYED
  release. Also if a release gets manually fixed (DEPLOYED)
  outside of armada, armada still sees the old FAILED release,
  and will purge the fixed release.
- Was only fetching DEPLOYED and FAILED releases from tiller, so if
  the latest release has another status Armada won't see it at all.

This changes to:

- Fetch releases with all statuses.
- Filter out old releases.
- Raise an error if latest release has status other than DEPLOYED
  or FAILED, since it's not clear what other action to take in
  this scenario.

Change-Id: I84712c1486c19d2bba302bf3420df916265ba70c
This commit is contained in:
Sean Eagan 2018-10-16 15:42:51 -05:00
parent f30c68e627
commit 6b96bbf28d
9 changed files with 170 additions and 85 deletions

View File

@ -27,8 +27,19 @@ DEFAULT_CHART_TIMEOUT = 900
# Tiller
DEFAULT_TILLER_TIMEOUT = 300
STATUS_UNKNOWN = 'UNKNOWN'
STATUS_DEPLOYED = 'DEPLOYED'
STATUS_DELETED = 'DELETED'
STATUS_DELETING = 'DELETING'
STATUS_FAILED = 'FAILED'
STATUS_PENDING_INSTALL = 'PENDING_INSTALL'
STATUS_PENDING_UPGRADE = 'PENDING_UPGRADE'
STATUS_PENDING_ROLLBACK = 'PENDING_ROLLBACK'
STATUS_ALL = [
STATUS_UNKNOWN, STATUS_DEPLOYED, STATUS_DELETED, STATUS_DELETING,
STATUS_FAILED, STATUS_PENDING_INSTALL, STATUS_PENDING_UPGRADE,
STATUS_PENDING_ROLLBACK
]
# Kubernetes
DEFAULT_K8S_TIMEOUT = 300

View File

@ -86,3 +86,15 @@ class WaitException(ArmadaException):
def __init__(self, message):
self._message = message
super(WaitException, self).__init__(message)
class UnexpectedReleaseStatusException(ArmadaException):
'''
Exception that occurs when armada encounters an existing release for a
chart with an unexpected status which armada does not know what to do with.
'''
def __init__(self, release_name, status):
self._message = "Found release {} in unexpected status {}".format(
release_name, status)
super(UnexpectedReleaseStatusException, self).__init__(self._message)

View File

@ -177,26 +177,6 @@ class Armada(object):
chart_name = chart.get('chart_name')
raise source_exceptions.ChartSourceException(ct_type, chart_name)
def _get_releases_by_status(self):
'''
Return a list of current releases with DEPLOYED or FAILED status
'''
deployed_releases = []
failed_releases = []
known_releases = self.tiller.list_charts()
for release in known_releases:
if release[4] == const.STATUS_DEPLOYED:
deployed_releases.append(release)
elif release[4] == const.STATUS_FAILED:
failed_releases.append(release)
else:
# tiller.list_charts() only looks at DEPLOYED/FAILED so
# this should be unreachable
LOG.debug('Ignoring release %s in status %s.', release[0],
release[4])
return deployed_releases, failed_releases
def sync(self):
'''
Synchronize Helm with the Armada Config(s)
@ -216,8 +196,7 @@ class Armada(object):
# a more cleaner format
self.pre_flight_ops()
# extract known charts on tiller right now
deployed_releases, failed_releases = self._get_releases_by_status()
known_releases = self.tiller.list_releases()
manifest_data = self.manifest.get(const.KEYWORD_ARMADA, {})
prefix = manifest_data.get(const.KEYWORD_PREFIX)
@ -250,8 +229,7 @@ class Armada(object):
set_current_chart(chart)
try:
return self.chart_deploy.execute(chart, cg_test_all_charts,
prefix, deployed_releases,
failed_releases)
prefix, known_releases)
finally:
set_current_chart(None)

View File

@ -16,13 +16,14 @@ from oslo_log import log as logging
import time
import yaml
from armada import const
from armada.exceptions import armada_exceptions
from armada.handlers.chartbuilder import ChartBuilder
from armada.handlers.test import test_release_for_success
from armada.handlers.release_diff import ReleaseDiff
from armada.handlers.wait import ChartWait
from armada.exceptions import tiller_exceptions
from armada.utils.release import release_prefixer
from armada.utils.release import release_prefixer, get_release_status
LOG = logging.getLogger(__name__)
@ -39,8 +40,7 @@ class ChartDeploy(object):
self.timeout = timeout
self.tiller = tiller
def execute(self, chart, cg_test_all_charts, prefix, deployed_releases,
failed_releases):
def execute(self, chart, cg_test_all_charts, prefix, known_releases):
namespace = chart.get('namespace')
release = chart.get('release')
release_name = release_prefixer(prefix, release)
@ -55,8 +55,35 @@ class ChartDeploy(object):
protected = chart.get('protected', {})
p_continue = protected.get('continue_processing', False)
old_release = self.find_chart_release(known_releases, release_name)
status = None
if old_release:
status = get_release_status(old_release)
if status not in [const.STATUS_FAILED, const.STATUS_DEPLOYED]:
raise armada_exceptions.UnexpectedReleaseStatusException(
release_name, status)
chart_wait = ChartWait(
self.tiller.k8s,
release_name,
chart,
namespace,
k8s_wait_attempts=self.k8s_wait_attempts,
k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep,
timeout=self.timeout)
native_wait_enabled = chart_wait.is_native_enabled()
# Begin Chart timeout deadline
deadline = time.time() + chart_wait.get_timeout()
chartbuilder = ChartBuilder(chart)
new_chart = chartbuilder.get_helm_chart()
# Check for existing FAILED release, and purge
if release_name in [rel[0] for rel in failed_releases]:
if status == const.STATUS_FAILED:
LOG.info('Purging FAILED release %s before deployment.',
release_name)
if protected:
@ -78,35 +105,19 @@ class ChartDeploy(object):
self.tiller.uninstall_release(release_name)
result['purge'] = release_name
chart_wait = ChartWait(
self.tiller.k8s,
release_name,
chart,
namespace,
k8s_wait_attempts=self.k8s_wait_attempts,
k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep,
timeout=self.timeout)
native_wait_enabled = chart_wait.is_native_enabled()
# Begin Chart timeout deadline
deadline = time.time() + chart_wait.get_timeout()
chartbuilder = ChartBuilder(chart)
new_chart = chartbuilder.get_helm_chart()
# TODO(mark-burnett): It may be more robust to directly call
# tiller status to decide whether to install/upgrade rather
# than checking for list membership.
if release_name in [rel[0] for rel in deployed_releases]:
if status == const.STATUS_DEPLOYED:
# indicate to the end user what path we are taking
LOG.info("Upgrading release %s in namespace %s", release_name,
namespace)
# extract the installed chart and installed values from the
# latest release so we can compare to the intended state
old_chart, old_values_string = self.find_release_chart(
deployed_releases, release_name)
old_chart = old_release.chart
old_values_string = old_release.config.raw
upgrade = chart.get('upgrade', {})
disable_hooks = upgrade.get('no_hooks', False)
@ -236,10 +247,13 @@ class ChartDeploy(object):
def get_diff(self, old_chart, old_values, new_chart, values):
return ReleaseDiff(old_chart, old_values, new_chart, values).get_diff()
def find_release_chart(self, known_releases, release_name):
def find_chart_release(self, known_releases, release_name):
'''
Find a release given a list of known_releases and a release name
'''
for release, _, chart, values, _ in known_releases:
if release == release_name:
return chart, values
for release in known_releases:
if release.name == release_name:
return release
LOG.info("known: %s, release_name: %s",
list(map(lambda r: r.name, known_releases)), release_name)
return None

View File

@ -33,7 +33,7 @@ from armada import const
from armada.exceptions import tiller_exceptions as ex
from armada.handlers.k8s import K8s
from armada.handlers import test
from armada.utils.release import label_selectors
from armada.utils.release import label_selectors, get_release_status
TILLER_VERSION = b'2.10.0'
GRPC_EPSILON = 60
@ -202,7 +202,7 @@ class Tiller(object):
req = ListReleasesRequest(
offset=next_release_expected,
limit=LIST_RELEASES_PAGE_SIZE,
status_codes=[const.STATUS_DEPLOYED, const.STATUS_FAILED])
status_codes=const.STATUS_ALL)
LOG.debug('Tiller ListReleases() with timeout=%s, request=%s',
self.timeout, req)
@ -242,12 +242,32 @@ class Tiller(object):
for index in range(LIST_RELEASES_ATTEMPTS):
attempt = index + 1
try:
return get_results()
releases = get_results()
except ex.TillerListReleasesPagingException:
LOG.warning('List releases paging failed on attempt %s/%s',
attempt, LIST_RELEASES_ATTEMPTS)
if attempt == LIST_RELEASES_ATTEMPTS:
raise
else:
# Filter out old releases, similar to helm cli:
# https://github.com/helm/helm/blob/1e26b5300b5166fabb90002535aacd2f9cc7d787/cmd/helm/list.go#L196
latest_versions = {}
for r in releases:
max = latest_versions.get(r.name)
if max is not None:
if max > r.version:
continue
latest_versions[r.name] = r.version
latest_releases = []
for r in releases:
if latest_versions[r.name] == r.version:
LOG.debug('Found release %s, version %s, status: %s',
r.name, r.version, get_release_status(r))
latest_releases.append(r)
return latest_releases
def get_chart_templates(self, template_name, name, release_name, namespace,
chart, disable_hooks, values):
@ -328,8 +348,6 @@ class Tiller(object):
latest_release.info.status.Code.Name(
latest_release.info.status.code))
charts.append(release)
LOG.debug('Found release %s, version %s, status: %s',
release[0], release[1], release[4])
except (AttributeError, IndexError) as e:
LOG.debug('%s while getting releases: %s, ex=%s',
e.__class__.__name__, latest_release, e)

View File

@ -20,7 +20,7 @@ from armada.handlers import armada
from armada.handlers import chart_deploy
from armada.tests.unit import base
from armada.tests.test_utils import AttrDict, makeMockThreadSafe
from armada.utils.release import release_prefixer
from armada.utils.release import release_prefixer, get_release_status
from armada.exceptions import ManifestException
from armada.exceptions.override_exceptions import InvalidOverrideValueException
from armada.exceptions.validate_exceptions import InvalidManifestException
@ -354,7 +354,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
cg_test_all_charts = chart_group.get('test_charts', True)
m_tiller = mock_tiller.return_value
m_tiller.list_charts.return_value = known_releases
m_tiller.list_releases.return_value = known_releases
if test_failure_to_run:
@ -393,7 +393,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
'enabled', True))
expected_apply = True
if release_name not in [x[0] for x in known_releases]:
if release_name not in [x.name for x in known_releases]:
expected_install_release_calls.append(
mock.call(
mock_chartbuilder().get_helm_chart(),
@ -407,11 +407,11 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
else:
target_release = None
for known_release in known_releases:
if known_release[0] == release_name:
if known_release.name == release_name:
target_release = known_release
break
if target_release:
status = target_release[4]
status = get_release_status(target_release)
if status == const.STATUS_FAILED:
protected = chart.get('protected', {})
if not protected:
@ -527,6 +527,19 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
c for c in yaml_documents if c['data'].get('chart_name') == name
][0]
def get_mock_release(self, name, status):
status_mock = mock.Mock()
status_mock.return_value = status
chart = self._get_chart_by_name(name)
mock_release = mock.Mock(
version=1,
chart=chart,
config=mock.Mock(raw="{}"),
info=mock.Mock(
status=mock.Mock(Code=mock.MagicMock(Name=status_mock))))
mock_release.name = name
return mock_release
def test_armada_sync_with_no_deployed_releases(self):
known_releases = []
self._test_sync(known_releases)
@ -534,50 +547,45 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
def test_armada_sync_with_one_deployed_release(self):
c1 = 'armada-test_chart_1'
known_releases = [[c1, None, None, "{}", const.STATUS_DEPLOYED]]
known_releases = [self.get_mock_release(c1, const.STATUS_DEPLOYED)]
self._test_sync(known_releases)
def test_armada_sync_with_one_deployed_release_no_diff(self):
c1 = 'armada-test_chart_1'
known_releases = [[c1, None, None, "{}", const.STATUS_DEPLOYED]]
known_releases = [self.get_mock_release(c1, const.STATUS_DEPLOYED)]
self._test_sync(known_releases, diff=set())
def test_armada_sync_with_both_deployed_releases(self):
c1 = 'armada-test_chart_1'
c2 = 'armada-test_chart_2'
known_releases = [[c1, None, None, "{}", const.STATUS_DEPLOYED],
[c2, None, None, "{}", const.STATUS_DEPLOYED]]
known_releases = [
self.get_mock_release(c1, const.STATUS_DEPLOYED),
self.get_mock_release(c2, const.STATUS_DEPLOYED)
]
self._test_sync(known_releases)
def test_armada_sync_with_unprotected_releases(self):
c1 = 'armada-test_chart_1'
known_releases = [[
c1, None,
self._get_chart_by_name(c1), None, const.STATUS_FAILED
]]
known_releases = [self.get_mock_release(c1, const.STATUS_FAILED)]
self._test_sync(known_releases)
def test_armada_sync_with_protected_releases_continue(self):
c1 = 'armada-test_chart_1'
c2 = 'armada-test_chart_2'
known_releases = [[
c2, None,
self._get_chart_by_name(c2), None, const.STATUS_FAILED
], [c1, None,
self._get_chart_by_name(c1), None, const.STATUS_FAILED]]
known_releases = [
self.get_mock_release(c2, const.STATUS_FAILED),
self.get_mock_release(c1, const.STATUS_FAILED)
]
self._test_sync(known_releases)
def test_armada_sync_with_protected_releases_halt(self):
c3 = 'armada-test_chart_3'
known_releases = [[
c3, None,
self._get_chart_by_name(c3), None, const.STATUS_FAILED
]]
known_releases = [self.get_mock_release(c3, const.STATUS_FAILED)]
def _test_method():
self._test_sync(known_releases)

View File

@ -201,9 +201,39 @@ class TillerTestCase(base.ArmadaTestCase):
mock_list_releases_request.assert_called_once_with(
offset="",
limit=tiller.LIST_RELEASES_PAGE_SIZE,
status_codes=[
tiller.const.STATUS_DEPLOYED, tiller.const.STATUS_FAILED
])
status_codes=tiller.const.STATUS_ALL)
@mock.patch('armada.handlers.tiller.K8s')
@mock.patch('armada.handlers.tiller.grpc')
@mock.patch.object(tiller, 'ListReleasesRequest')
@mock.patch.object(tiller, 'ReleaseServiceStub')
def test_list_releases_returns_latest_only(
self, mock_stub, mock_list_releases_request, mock_grpc, _):
latest = mock.Mock(version=3)
releases = [mock.Mock(version=2), latest, mock.Mock(version=1)]
for r in releases:
r.name = 'test'
mock_stub.return_value.ListReleases.return_value = [
mock.Mock(
next='',
count=len(releases),
total=len(releases),
releases=releases)
]
tiller_obj = tiller.Tiller('host', '8080', None)
self.assertEqual([latest], tiller_obj.list_releases())
mock_stub.assert_called_once_with(tiller_obj.channel)
mock_stub.return_value.ListReleases.assert_called_once_with(
mock_list_releases_request.return_value,
tiller_obj.timeout,
metadata=tiller_obj.metadata)
mock_list_releases_request.assert_called_once_with(
offset="",
limit=tiller.LIST_RELEASES_PAGE_SIZE,
status_codes=tiller.const.STATUS_ALL)
@mock.patch('armada.handlers.tiller.K8s')
@mock.patch('armada.handlers.tiller.grpc')
@ -251,9 +281,8 @@ class TillerTestCase(base.ArmadaTestCase):
offset=''
if i == 0 else str(tiller.LIST_RELEASES_PAGE_SIZE * i),
limit=tiller.LIST_RELEASES_PAGE_SIZE,
status_codes=[
tiller.const.STATUS_DEPLOYED, tiller.const.STATUS_FAILED
]) for i in range(page_count)
status_codes=tiller.const.STATUS_ALL)
for i in range(page_count)
]
mock_list_releases_request.assert_has_calls(list_release_request_calls)

View File

@ -27,3 +27,13 @@ def label_selectors(labels):
:return: string of k8s labels
"""
return ",".join(["%s=%s" % (k, v) for k, v in labels.items()])
def get_release_status(release):
"""
:param release: protobuf release object
:return: status name of release
"""
return release.info.status.Code.Name(release.info.status.code)

View File

@ -46,3 +46,8 @@ Armada Exceptions
:members:
:show-inheritance:
:undoc-members:
.. autoexception:: armada.exceptions.armada_exceptions.UnexpectedReleaseStatusException
:members:
:show-inheritance:
:undoc-members: