Automate deletion of test pods

When running helm tests for a chart release multiple times in a site,
if the previous test pod is not deleted, then the test pod creation
can fail due to a name conflict. Armada/helm support immediate test pod
cleanup, but using this means that upon test failure, the test pod logs will
not be available for debugging purposes. Due to this, the recommended approach
for deleting test pods in Armada has been using `upgrade.pre.delete` actions.
So chart authors can accomplish test pod deletion using this
feature, however, it often takes awhile, usually not until they test upgrading
the chart for chart authors to realize that this is necessary and to get it
implemented.

This patchset automates deletion of test pods directly before running tests by
using the `wait.labels` field in the chart doc when they exist to find all pods
in the release and then using their annotations to determine if they are test
pods and deleting them if so.

A later patchset is planned to implement defaulting of the wait labels when
they are not defined.

Change-Id: I2092f448acb88b5ade3b31b397f9c874c0061668
This commit is contained in:
Sean Eagan 2019-01-08 15:06:36 -06:00
parent 52f29ddf73
commit c31a961bf1
13 changed files with 202 additions and 108 deletions

View File

@ -39,7 +39,7 @@ class TestReleasesReleaseNameController(api.BaseResource):
with self.get_tiller(req, resp) as tiller:
cleanup = req.get_param_as_bool('cleanup')
test_handler = Test(release, tiller, cleanup=cleanup)
test_handler = Test({}, release, tiller, cleanup=cleanup)
success = test_handler.test_release_for_success()
if success:
@ -136,15 +136,14 @@ class TestReleasesManifestController(api.BaseResource):
cleanup = req.get_param_as_bool('cleanup')
enable_all = req.get_param_as_bool('enable_all')
cg_test_charts = group.get('test_charts')
test_values = chart.get('test', {})
test_handler = Test(
chart,
release_name,
tiller,
cg_test_charts=cg_test_charts,
cleanup=cleanup,
enable_all=enable_all,
test_values=test_values)
enable_all=enable_all)
if test_handler.test_enabled:
success = test_handler.test_release_for_success()

View File

@ -124,7 +124,10 @@ class TestChartManifest(CliAction):
if self.release:
if not self.ctx.obj.get('api', False):
test_handler = Test(self.release, tiller, cleanup=self.cleanup)
test_handler = Test({},
self.release,
tiller,
cleanup=self.cleanup)
test_handler.test_release_for_success()
else:
client = self.ctx.obj.get('CLIENT')
@ -156,14 +159,12 @@ class TestChartManifest(CliAction):
release_name = release_prefixer(
prefix, chart.get('release'))
if release_name in known_release_names:
test_values = chart.get('test', {})
test_handler = Test(
chart,
release_name,
tiller,
cleanup=self.cleanup,
enable_all=self.enable_all,
test_values=test_values)
enable_all=self.enable_all)
if test_handler.test_enabled:
test_handler.test_release_for_success()

View File

@ -27,8 +27,6 @@ DEFAULT_CHART_TIMEOUT = 900
# Tiller
DEFAULT_TILLER_TIMEOUT = 300
HELM_HOOK_ANNOTATION = 'helm.sh/hook'
HELM_TEST_HOOKS = ['test-success', 'test-failure']
STATUS_UNKNOWN = 'UNKNOWN'
STATUS_DEPLOYED = 'DEPLOYED'
STATUS_DELETED = 'DELETED'

View File

@ -232,12 +232,11 @@ class ChartDeploy(object):
just_deployed = ('install' in result) or ('upgrade' in result)
last_test_passed = old_release and r.get_last_test_result(old_release)
test_values = chart.get('test')
test_handler = Test(
chart,
release_name,
self.tiller,
cg_test_charts=cg_test_all_charts,
test_values=test_values)
cg_test_charts=cg_test_all_charts)
run_test = test_handler.test_enabled and (just_deployed or
not last_test_passed)

View File

@ -16,10 +16,9 @@ from oslo_log import log as logging
from armada import const
TESTRUN_STATUS_UNKNOWN = 0
TESTRUN_STATUS_SUCCESS = 1
TESTRUN_STATUS_FAILURE = 2
TESTRUN_STATUS_RUNNING = 3
from armada.handlers.wait import get_wait_labels
from armada.utils.release import label_selectors
from armada.utils.helm import get_test_suite_run_success, is_test_pod
LOG = logging.getLogger(__name__)
@ -27,33 +26,37 @@ LOG = logging.getLogger(__name__)
class Test(object):
def __init__(self,
chart,
release_name,
tiller,
cg_test_charts=None,
cleanup=None,
enable_all=False,
test_values=None):
enable_all=False):
"""Initialize a test handler to run Helm tests corresponding to a
release.
:param chart: The armada chart document
:param release_name: Name of a Helm release
:param tiller: Tiller object
:param cg_test_charts: Chart group `test_charts` key
:param cleanup: Triggers cleanup; overrides `test.options.cleanup`
:param enable_all: Run tests regardless of the value of `test.enabled`
:param test_values: Test values retrieved from a chart's `test` key
:type chart: dict
:type release_name: str
:type tiller: Tiller object
:type cg_test_charts: bool
:type cleanup: bool
:type enable_all: bool
:type test_values: dict or bool (deprecated)
"""
self.chart = chart
self.release_name = release_name
self.tiller = tiller
self.cleanup = cleanup
self.k8s_timeout = const.DEFAULT_K8S_TIMEOUT
test_values = self.chart.get('test', None)
# NOTE(drewwalters96): Support the chart_group `test_charts` key until
# its deprecation period ends. The `test.enabled`, `enable_all` flag,
@ -106,10 +109,16 @@ class Test(object):
"""
LOG.info('RUNNING: %s tests', self.release_name)
try:
self.delete_test_pods()
except Exception:
LOG.exception("Exception when deleting test pods for release: %s",
self.release_name)
test_suite_run = self.tiller.test_release(
self.release_name, timeout=timeout, cleanup=self.cleanup)
success = self.get_test_suite_run_success(test_suite_run)
success = get_test_suite_run_success(test_suite_run)
if success:
LOG.info('PASSED: %s', self.release_name)
else:
@ -117,7 +126,37 @@ class Test(object):
return success
@classmethod
def get_test_suite_run_success(self, test_suite_run):
return all(
r.status == TESTRUN_STATUS_SUCCESS for r in test_suite_run.results)
def delete_test_pods(self):
"""Deletes any existing test pods for the release, as identified by the
wait labels for the chart, to avoid test pod name conflicts when
creating the new test pod as well as just for general cleanup since
the new test pod should supercede it.
"""
labels = get_wait_labels(self.chart)
# Guard against labels being left empty, so we don't delete other
# chart's test pods.
if labels:
label_selector = label_selectors(labels)
namespace = self.chart['namespace']
list_args = {
'namespace': namespace,
'label_selector': label_selector,
'timeout_seconds': self.k8s_timeout
}
pod_list = self.tiller.k8s.client.list_namespaced_pod(**list_args)
test_pods = (pod for pod in pod_list.items if is_test_pod(pod))
if test_pods:
LOG.info(
'Found existing test pods for release with '
'namespace=%s, labels=(%s)', namespace, label_selector)
for test_pod in test_pods:
pod_name = test_pod.metadata.name
LOG.info('Deleting existing test pod: %s', pod_name)
self.tiller.k8s.delete_pod_action(
pod_name, namespace, timeout=self.k8s_timeout)

View File

@ -32,7 +32,7 @@ from oslo_log import log as logging
from armada import const
from armada.exceptions import tiller_exceptions as ex
from armada.handlers.k8s import K8s
from armada.handlers import test
from armada.utils import helm
from armada.utils.release import label_selectors, get_release_status
TILLER_VERSION = b'2.12.1'
@ -502,7 +502,7 @@ class Tiller(object):
failed = 0
for test_message in test_message_stream:
if test_message.status == test.TESTRUN_STATUS_FAILURE:
if test_message.status == helm.TESTRUN_STATUS_FAILURE:
failed += 1
LOG.info(test_message.msg)
if failed:

View File

@ -21,6 +21,7 @@ import time
from oslo_log import log as logging
from armada import const
from armada.utils.helm import is_test_pod
from armada.utils.release import label_selectors
from armada.exceptions import k8s_exceptions
from armada.exceptions import manifest_exceptions
@ -32,6 +33,11 @@ LOG = logging.getLogger(__name__)
ROLLING_UPDATE_STRATEGY_TYPE = 'RollingUpdate'
def get_wait_labels(chart):
wait_config = chart.get('wait', {})
return wait_config.get('labels', {})
# TODO: Validate this object up front in armada validate flow.
class ChartWait():
@ -46,7 +52,7 @@ class ChartWait():
self.k8s_wait_attempt_sleep = max(k8s_wait_attempt_sleep, 1)
resources = self.wait_config.get('resources')
labels = self.wait_config.get('labels', {})
labels = get_wait_labels(self.chart)
if resources is not None:
waits = []
@ -349,25 +355,16 @@ class PodWait(ResourceWait):
def include_resource(self, resource):
pod = resource
annotations = pod.metadata.annotations
# Retrieve pod's Helm test hooks
test_hooks = None
if annotations:
hook_string = annotations.get(const.HELM_HOOK_ANNOTATION)
if hook_string:
hooks = hook_string.split(',')
test_hooks = [h for h in hooks if h in const.HELM_TEST_HOOKS]
include = not is_test_pod(pod)
# NOTE(drewwalters96): Test pods may cause wait operations to fail
# when old resources remain from previous upgrades/tests. Indicate that
# test pods should not be included in wait operations.
if test_hooks:
LOG.debug('Pod %s will be skipped during wait operations.',
if not include:
LOG.debug('Test pod %s will be skipped during wait operations.',
pod.metadata.name)
return False
else:
return True
return include
def is_resource_ready(self, resource):
pod = resource

View File

@ -17,7 +17,7 @@ import yaml
from armada import const
from armada.handlers import armada
from armada.handlers.test import TESTRUN_STATUS_SUCCESS, TESTRUN_STATUS_FAILURE
from armada.utils.helm import TESTRUN_STATUS_SUCCESS, TESTRUN_STATUS_FAILURE
from armada.tests.unit import base
from armada.tests.test_utils import AttrDict, makeMockThreadSafe
from armada.utils.release import release_prefixer, get_release_status
@ -459,13 +459,12 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
wait=native_wait_enabled,
timeout=mock.ANY))
test_chart_override = chart.get('test')
expected_test_constructor_calls.append(
mock.call(
chart,
release_name,
m_tiller,
cg_test_charts=cg_test_all_charts,
test_values=test_chart_override))
cg_test_charts=cg_test_all_charts))
any_order = not chart_group['sequenced']
# Verify that at least 1 release is either installed or updated.

View File

@ -18,6 +18,7 @@ from armada.handlers import test
from armada.handlers import tiller
from armada.tests.unit import base
from armada.tests.test_utils import AttrDict
from armada.utils import helm
class TestHandlerTestCase(base.ArmadaTestCase):
@ -33,7 +34,7 @@ class TestHandlerTestCase(base.ArmadaTestCase):
tiller_obj.test_release.return_value = AttrDict(
**{'results': results})
test_handler = test.Test(release, tiller_obj)
test_handler = test.Test({}, release, tiller_obj)
success = test_handler.test_release_for_success()
self.assertEqual(expected_success, success)
@ -45,24 +46,24 @@ class TestHandlerTestCase(base.ArmadaTestCase):
def test_unknown(self):
self._test_test_release_for_success(False, [
AttrDict(**{'status': test.TESTRUN_STATUS_SUCCESS}),
AttrDict(**{'status': test.TESTRUN_STATUS_UNKNOWN})
AttrDict(**{'status': helm.TESTRUN_STATUS_SUCCESS}),
AttrDict(**{'status': helm.TESTRUN_STATUS_UNKNOWN})
])
def test_success(self):
self._test_test_release_for_success(
True, [AttrDict(**{'status': test.TESTRUN_STATUS_SUCCESS})])
True, [AttrDict(**{'status': helm.TESTRUN_STATUS_SUCCESS})])
def test_failure(self):
self._test_test_release_for_success(False, [
AttrDict(**{'status': test.TESTRUN_STATUS_SUCCESS}),
AttrDict(**{'status': test.TESTRUN_STATUS_FAILURE})
AttrDict(**{'status': helm.TESTRUN_STATUS_SUCCESS}),
AttrDict(**{'status': helm.TESTRUN_STATUS_FAILURE})
])
def test_running(self):
self._test_test_release_for_success(False, [
AttrDict(**{'status': test.TESTRUN_STATUS_SUCCESS}),
AttrDict(**{'status': test.TESTRUN_STATUS_RUNNING})
AttrDict(**{'status': helm.TESTRUN_STATUS_SUCCESS}),
AttrDict(**{'status': helm.TESTRUN_STATUS_RUNNING})
])
def test_cg_disabled(self):
@ -70,7 +71,10 @@ class TestHandlerTestCase(base.ArmadaTestCase):
tests.
"""
test_handler = test.Test(
release_name='release', tiller=mock.Mock(), cg_test_charts=False)
chart={},
release_name='release',
tiller=mock.Mock(),
cg_test_charts=False)
assert test_handler.test_enabled is False
@ -79,10 +83,10 @@ class TestHandlerTestCase(base.ArmadaTestCase):
tests and the deprecated, boolean `test` key is enabled.
"""
test_handler = test.Test(
chart={'test': True},
release_name='release',
tiller=mock.Mock(),
cg_test_charts=False,
test_values=True)
cg_test_charts=False)
assert test_handler.test_enabled is True
@ -90,13 +94,13 @@ class TestHandlerTestCase(base.ArmadaTestCase):
"""Test that tests are enabled when a chart group disables all
tests and the `test.enabled` key is False.
"""
test_values = {'enabled': True}
test_handler = test.Test(
chart={'test': {
'enabled': True
}},
release_name='release',
tiller=mock.Mock(),
cg_test_charts=False,
test_values=test_values)
cg_test_charts=False)
assert test_handler.test_enabled is True
@ -105,10 +109,10 @@ class TestHandlerTestCase(base.ArmadaTestCase):
tests and the deprecated, boolean `test` key is disabled.
"""
test_handler = test.Test(
chart={'test': False},
release_name='release',
tiller=mock.Mock(),
cg_test_charts=True,
test_values=False)
cg_test_charts=True)
assert test_handler.test_enabled is False
@ -116,13 +120,13 @@ class TestHandlerTestCase(base.ArmadaTestCase):
"""Test that tests are disabled when a chart group enables all
tests and the deprecated, boolean `test` key is disabled.
"""
test_values = {'enabled': False}
test_handler = test.Test(
chart={'test': {
'enabled': False
}},
release_name='release',
tiller=mock.Mock(),
cg_test_charts=True,
test_values=test_values)
cg_test_charts=True)
assert test_handler.test_enabled is False
@ -131,6 +135,7 @@ class TestHandlerTestCase(base.ArmadaTestCase):
True and the chart group `test_enabled` key is disabled.
"""
test_handler = test.Test(
chart={},
release_name='release',
tiller=mock.Mock(),
cg_test_charts=False,
@ -143,10 +148,10 @@ class TestHandlerTestCase(base.ArmadaTestCase):
True and the deprecated, boolean `test` key is disabled.
"""
test_handler = test.Test(
chart={'test': True},
release_name='release',
tiller=mock.Mock(),
enable_all=True,
test_values=False)
enable_all=True)
assert test_handler.test_enabled is True
@ -154,13 +159,13 @@ class TestHandlerTestCase(base.ArmadaTestCase):
"""Test that tests are enabled when the `enable_all` parameter is
True and the `test.enabled` key is False.
"""
test_values = {'enabled': False}
test_handler = test.Test(
chart={'test': {
'enabled': False
}},
release_name='release',
tiller=mock.Mock(),
enable_all=True,
test_values=test_values)
enable_all=True)
assert test_handler.test_enabled is True
@ -169,16 +174,16 @@ class TestHandlerTestCase(base.ArmadaTestCase):
for a chart's test key.
"""
test_handler = test.Test(
release_name='release', tiller=mock.Mock(), test_values=True)
chart={'test': False}, release_name='release', tiller=mock.Mock())
assert test_handler.test_enabled
assert not test_handler.test_enabled
def test_deprecated_test_key_true(self):
"""Test that cleanup is enabled by default when tests are enabled using
the deprecated, boolean value for a chart's `test` key.
"""
test_handler = test.Test(
release_name='release', tiller=mock.Mock(), test_values=True)
chart={'test': True}, release_name='release', tiller=mock.Mock())
assert test_handler.test_enabled is True
assert test_handler.cleanup is True
@ -187,11 +192,12 @@ class TestHandlerTestCase(base.ArmadaTestCase):
"""Test that tests are disabled by a chart's values using the
`test.enabled` path.
"""
test_values = {'enabled': False}
test_handler = test.Test(
chart={'test': {
'enabled': False
}},
release_name='release',
tiller=mock.Mock(),
test_values=test_values)
tiller=mock.Mock())
assert test_handler.test_enabled is False
@ -199,11 +205,12 @@ class TestHandlerTestCase(base.ArmadaTestCase):
"""Test that cleanup is disabled (by default) when tests are enabled by
a chart's values using the `test.enabled` path.
"""
test_values = {'enabled': True}
test_handler = test.Test(
chart={'test': {
'enabled': True
}},
release_name='release',
tiller=mock.Mock(),
test_values=test_values)
tiller=mock.Mock())
assert test_handler.test_enabled is True
assert test_handler.cleanup is False
@ -212,12 +219,15 @@ class TestHandlerTestCase(base.ArmadaTestCase):
"""Test that the test handler uses the values provided by a chart's
`test` key.
"""
test_values = {'enabled': True, 'options': {'cleanup': True}}
test_handler = test.Test(
chart={'test': {
'enabled': True,
'options': {
'cleanup': True
}
}},
release_name='release',
tiller=mock.Mock(),
test_values=test_values)
tiller=mock.Mock())
assert test_handler.test_enabled is True
assert test_handler.cleanup is True
@ -226,12 +236,15 @@ class TestHandlerTestCase(base.ArmadaTestCase):
"""Test that the test handler uses the values provided by a chart's
`test` key.
"""
test_values = {'enabled': True, 'options': {'cleanup': False}}
test_handler = test.Test(
chart={'test': {
'enabled': True,
'options': {
'cleanup': False
}
}},
release_name='release',
tiller=mock.Mock(),
test_values=test_values)
tiller=mock.Mock())
assert test_handler.test_enabled is True
assert test_handler.cleanup is False
@ -240,7 +253,8 @@ class TestHandlerTestCase(base.ArmadaTestCase):
"""Test that the default values are enforced when no chart `test`
values are provided (i.e. tests are enabled and cleanup is disabled).
"""
test_handler = test.Test(release_name='release', tiller=mock.Mock())
test_handler = test.Test(
chart={}, release_name='release', tiller=mock.Mock())
assert test_handler.test_enabled is True
assert test_handler.cleanup is False
@ -249,13 +263,16 @@ class TestHandlerTestCase(base.ArmadaTestCase):
"""Test that a cleanup value passed to the Test handler (i.e. from the
API/CLI) takes precedence over a chart's `test.cleanup` value.
"""
test_values = {'enabled': True, 'options': {'cleanup': False}}
test_handler = test.Test(
chart={'test': {
'enabled': True,
'options': {
'cleanup': False
}
}},
release_name='release',
tiller=mock.Mock(),
cleanup=True,
test_values=test_values)
cleanup=True)
assert test_handler.test_enabled is True
assert test_handler.cleanup is True

View File

@ -17,7 +17,7 @@ from mock import MagicMock
from armada.exceptions import tiller_exceptions as ex
from armada.handlers import tiller
from armada.handlers import test
from armada.utils import helm
from armada.tests.unit import base
from armada.tests.test_utils import AttrDict
@ -552,7 +552,7 @@ class TillerTestCase(base.ArmadaTestCase):
self._test_test_release([
AttrDict(**{
'msg': 'No Tests Found',
'status': test.TESTRUN_STATUS_UNKNOWN
'status': helm.TESTRUN_STATUS_UNKNOWN
})
])
@ -560,11 +560,11 @@ class TillerTestCase(base.ArmadaTestCase):
self._test_test_release([
AttrDict(**{
'msg': 'RUNNING: ...',
'status': test.TESTRUN_STATUS_RUNNING
'status': helm.TESTRUN_STATUS_RUNNING
}),
AttrDict(**{
'msg': 'SUCCESS: ...',
'status': test.TESTRUN_STATUS_SUCCESS
'status': helm.TESTRUN_STATUS_SUCCESS
})
])
@ -572,11 +572,11 @@ class TillerTestCase(base.ArmadaTestCase):
self._test_test_release([
AttrDict(**{
'msg': 'RUNNING: ...',
'status': test.TESTRUN_STATUS_RUNNING
'status': helm.TESTRUN_STATUS_RUNNING
}),
AttrDict(**{
'msg': 'FAILURE: ...',
'status': test.TESTRUN_STATUS_FAILURE
'status': helm.TESTRUN_STATUS_FAILURE
})
])

40
armada/utils/helm.py Normal file
View File

@ -0,0 +1,40 @@
# Copyright 2019 The Armada Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
TESTRUN_STATUS_UNKNOWN = 0
TESTRUN_STATUS_SUCCESS = 1
TESTRUN_STATUS_FAILURE = 2
TESTRUN_STATUS_RUNNING = 3
HELM_HOOK_ANNOTATION = 'helm.sh/hook'
HELM_TEST_HOOKS = ['test-success', 'test-failure']
def is_test_pod(pod):
annotations = pod.metadata.annotations
# Retrieve pod's Helm test hooks
test_hooks = None
if annotations:
hook_string = annotations.get(HELM_HOOK_ANNOTATION)
if hook_string:
hooks = hook_string.split(',')
test_hooks = [h for h in hooks if h in HELM_TEST_HOOKS]
return bool(test_hooks)
def get_test_suite_run_success(test_suite_run):
return all(
r.status == TESTRUN_STATUS_SUCCESS for r in test_suite_run.results)

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from armada.handlers.test import Test
from armada.utils.helm import get_test_suite_run_success
import time
@ -54,7 +54,7 @@ def get_last_test_result(release):
status = release.info.status
if not status.HasField('last_test_suite_run'):
return None
return Test.get_test_suite_run_success(status.last_test_suite_run)
return get_test_suite_run_success(status.last_test_suite_run)
def get_last_deployment_age(release):

View File

@ -203,10 +203,15 @@ Test options to pass through directly to helm.
.. note::
The preferred way to achieve test cleanup is to add a pre-upgrade delete
action on the test pod, which allows for debugging the test pod up until the
next upgrade.
If cleanup is ``true`` this prevents being able to debug a test in the event of failure.
Historically, the preferred way to achieve test cleanup has been to add a pre-upgrade delete
action on the test pod.
This still works, however it is usually no longer necessary as Armada now automatically
cleans up any test pods which match the ``wait.labels`` of the chart, immediately before
running tests. Similar suggestions have been made for how ``helm test --cleanup`` itself
ought to work (https://github.com/helm/helm/issues/3279).
Upgrade - Pre
^^^^^^^^^^^^^