diff --git a/armada/cli/apply.py b/armada/cli/apply.py index 1cb6d4a5..9b740432 100644 --- a/armada/cli/apply.py +++ b/armada/cli/apply.py @@ -190,6 +190,10 @@ class ApplyManifest(CliAction): for ch in resp[result]: if not result == 'diff': msg = 'Chart {} took action: {}'.format(ch, result) + if result == 'protected': + msg += ' and requires operator attention.' + elif result == 'purge': + msg += ' before install/upgrade.' self.logger.info(msg) else: self.logger.info('Chart/values diff: %s', ch) diff --git a/armada/exceptions/armada_exceptions.py b/armada/exceptions/armada_exceptions.py index c133610f..51db8359 100644 --- a/armada/exceptions/armada_exceptions.py +++ b/armada/exceptions/armada_exceptions.py @@ -27,3 +27,16 @@ class ArmadaTimeoutException(ArmadaException): def __init__(self, reason): self._message = 'Armada timed out waiting on: %s' % (reason) super(ArmadaTimeoutException, self).__init__(self._message) + + +class ProtectedReleaseException(ArmadaException): + ''' + Exception that occurs when Armada encounters a FAILED release that is + designated `protected` in the Chart and `continue_processing` is False. + ''' + + def __init__(self, reason): + self._message = ( + 'Armada encountered protected release %s in FAILED status' % reason + ) + super(ProtectedReleaseException, self).__init__(self._message) diff --git a/armada/handlers/armada.py b/armada/handlers/armada.py index 4fee0fdc..8d329dd1 100644 --- a/armada/handlers/armada.py +++ b/armada/handlers/armada.py @@ -20,7 +20,7 @@ from oslo_config import cfg from oslo_log import log as logging from armada import const -from armada.exceptions.armada_exceptions import ArmadaTimeoutException +from armada.exceptions import armada_exceptions from armada.exceptions import source_exceptions from armada.exceptions import tiller_exceptions from armada.exceptions import validate_exceptions @@ -99,13 +99,11 @@ class Armada(object): tiller_host=tiller_host, tiller_port=tiller_port, tiller_namespace=tiller_namespace, dry_run=dry_run) self.documents = Override( - documents, overrides=set_ovr, - values=values).update_manifests() + documents, overrides=set_ovr, values=values).update_manifests() self.k8s_wait_attempts = k8s_wait_attempts self.k8s_wait_attempt_sleep = k8s_wait_attempt_sleep self.manifest = Manifest( - self.documents, - target_manifest=target_manifest).get_manifest() + self.documents, target_manifest=target_manifest).get_manifest() def find_release_chart(self, known_releases, release_name): ''' @@ -137,31 +135,16 @@ class Armada(object): raise validate_exceptions.InvalidManifestException( error_messages=details) + # TODO(MarshM): this is duplicated inside validate_armada_documents() + # L126, and should be unreachable code if it has errors result, msg_list = validate.validate_armada_manifests(self.documents) if not result: raise validate_exceptions.InvalidArmadaObjectException( details=','.join([m.get('message') for m in msg_list])) - # Purge known releases that have failed and are in the current yaml - manifest_data = self.manifest.get(const.KEYWORD_ARMADA, {}) - prefix = manifest_data.get(const.KEYWORD_PREFIX, '') - failed_releases = self.get_releases_by_status(const.STATUS_FAILED) - - for release in failed_releases: - for group in manifest_data.get(const.KEYWORD_GROUPS, []): - for ch in group.get(const.KEYWORD_CHARTS, []): - ch_release_name = release_prefixer( - prefix, ch.get('chart', {}).get('release')) - if release[0] == ch_release_name: - LOG.info('Purging failed release %s ' - 'before deployment.', release[0]) - self.tiller.uninstall_release(release[0]) - # Clone the chart sources - # - # We only support a git source type right now, which can also - # handle git:// local paths as well repos = {} + manifest_data = self.manifest.get(const.KEYWORD_ARMADA, {}) for group in manifest_data.get(const.KEYWORD_GROUPS, []): for ch in group.get(const.KEYWORD_CHARTS, []): self.tag_cloned_repo(ch, repos) @@ -217,19 +200,25 @@ class Armada(object): chart_name = chart.get('chart_name') raise source_exceptions.ChartSourceException(ct_type, chart_name) - def get_releases_by_status(self, status): + def _get_releases_by_status(self): ''' - :params status - status string to filter releases on - - Return a list of current releases with a specified status + Return a list of current releases with DEPLOYED or FAILED status ''' - filtered_releases = [] + deployed_releases = [] + failed_releases = [] known_releases = self.tiller.list_charts() for release in known_releases: - if release[4] == status: - filtered_releases.append(release) + if release[4] == const.STATUS_DEPLOYED: + deployed_releases.append(release) + elif release[4] == const.STATUS_FAILED: + failed_releases.append(release) + else: + # tiller.list_charts() only looks at DEPLOYED/FAILED so + # this should be unreachable + LOG.debug('Ignoring release %s in status %s.', + release[0], release[4]) - return filtered_releases + return deployed_releases, failed_releases def sync(self): ''' @@ -238,16 +227,23 @@ class Armada(object): if self.dry_run: LOG.info('Armada is in DRY RUN mode, no changes being made.') - msg = {'install': [], 'upgrade': [], 'diff': []} + msg = { + 'install': [], + 'upgrade': [], + 'diff': [], + 'purge': [], + 'protected': [] + } # TODO: (gardlt) we need to break up this func into # a more cleaner format self.pre_flight_ops() # extract known charts on tiller right now - known_releases = self.tiller.list_charts() + deployed_releases, failed_releases = self._get_releases_by_status() + manifest_data = self.manifest.get(const.KEYWORD_ARMADA, {}) - prefix = manifest_data.get(const.KEYWORD_PREFIX, '') + prefix = manifest_data.get(const.KEYWORD_PREFIX) for chartgroup in manifest_data.get(const.KEYWORD_GROUPS, []): cg_name = chartgroup.get('name', '') @@ -270,15 +266,39 @@ class Armada(object): chart = chart_entry.get('chart', {}) namespace = chart.get('namespace') release = chart.get('release') + release_name = release_prefixer(prefix, release) values = chart.get('values', {}) pre_actions = {} post_actions = {} + protected = chart.get('protected', {}) + p_continue = protected.get('continue_processing', False) + + # Check for existing FAILED release, and purge + if release_name in [rel[0] for rel in failed_releases]: + LOG.info('Purging FAILED release %s before deployment.', + release_name) + if protected: + if p_continue: + LOG.warn('Release %s is `protected`, ' + 'continue_processing=True. Operator must ' + 'handle FAILED release manually.', + release_name) + msg['protected'].append(release_name) + continue + else: + LOG.error('Release %s is `protected`, ' + 'continue_processing=False.', + release_name) + raise armada_exceptions.ProtectedReleaseException( + release_name) + else: + # Purge the release + self.tiller.uninstall_release(release_name) + msg['purge'].append(release_name) + wait_timeout = self.timeout wait_labels = {} - - release_name = release_prefixer(prefix, release) - # Retrieve appropriate timeout value if wait_timeout <= 0: # TODO(MarshM): chart's `data.timeout` should be deprecated @@ -308,15 +328,13 @@ class Armada(object): chartbuilder = ChartBuilder(chart) protoc_chart = chartbuilder.get_helm_chart() - deployed_releases = [x[0] for x in known_releases] - # Begin Chart timeout deadline deadline = time.time() + wait_timeout # TODO(mark-burnett): It may be more robust to directly call # tiller status to decide whether to install/upgrade rather # than checking for list membership. - if release_name in deployed_releases: + if release_name in [rel[0] for rel in deployed_releases]: # indicate to the end user what path we are taking LOG.info("Upgrading release %s in namespace %s", @@ -324,7 +342,7 @@ class Armada(object): # extract the installed chart and installed values from the # latest release so we can compare to the intended state apply_chart, apply_values = self.find_release_chart( - known_releases, release_name) + deployed_releases, release_name) upgrade = chart.get('upgrade', {}) disable_hooks = upgrade.get('no_hooks', False) @@ -424,7 +442,7 @@ class Armada(object): reason = ('Timeout expired before testing sequenced ' 'release %s' % release_name) LOG.error(reason) - raise ArmadaTimeoutException(reason) + raise armada_exceptions.ArmadaTimeoutException(reason) self._test_chart(release_name, timer) # TODO(MarshM): handle test failure or timeout @@ -453,7 +471,7 @@ class Armada(object): reason = ('Timeout expired waiting on namespace: %s, ' 'labels: (%s)' % (ns, labels_dict)) LOG.error(reason) - raise ArmadaTimeoutException(reason) + raise armada_exceptions.ArmadaTimeoutException(reason) self._wait_until_ready( release_name=None, wait_labels=labels_dict, @@ -470,7 +488,8 @@ class Armada(object): if self.enable_chart_cleanup: self._chart_cleanup( prefix, - self.manifest[const.KEYWORD_ARMADA][const.KEYWORD_GROUPS]) + self.manifest[const.KEYWORD_ARMADA][const.KEYWORD_GROUPS], + msg) LOG.info('Done applying manifest.') return msg @@ -568,7 +587,7 @@ class Armada(object): return result - def _chart_cleanup(self, prefix, charts): + def _chart_cleanup(self, prefix, charts, msg): LOG.info('Processing chart cleanup to remove unspecified releases.') valid_releases = [] @@ -585,3 +604,4 @@ class Armada(object): LOG.info('Purging release %s as part of chart cleanup.', release) self.tiller.uninstall_release(release) + msg['purge'].append(release) diff --git a/armada/handlers/tiller.py b/armada/handlers/tiller.py index 4ff094b5..6cfb7533 100644 --- a/armada/handlers/tiller.py +++ b/armada/handlers/tiller.py @@ -594,6 +594,13 @@ class Tiller(object): get_jobs = self.k8s.get_namespace_job(namespace, label_selector) for jb in get_jobs.items: jb_name = jb.metadata.name + + if self.dry_run: + LOG.info('Skipping delete job during `dry-run`, would ' + 'have deleted job %s in namespace=%s.', + jb_name, namespace) + continue + LOG.info("Deleting job %s in namespace: %s", jb_name, namespace) self.k8s.delete_job_action(jb_name, namespace, timeout=timeout) @@ -604,22 +611,36 @@ class Tiller(object): namespace, label_selector) for jb in get_jobs.items: jb_name = jb.metadata.name - LOG.info("Deleting cron job %s in namespace: %s", - jb_name, namespace) + if resource_type == 'job': # TODO: Eventually disallow this, allowing initially since # some existing clients were expecting this behavior. - LOG.warning("Deleting cron jobs via `type: job` is " - "deprecated, use `type: cronjob` instead") + LOG.warn("Deleting cronjobs via `type: job` is " + "deprecated, use `type: cronjob` instead") + + if self.dry_run: + LOG.info('Skipping delete cronjob during `dry-run`, would ' + 'have deleted cronjob %s in namespace=%s.', + jb_name, namespace) + continue + + LOG.info("Deleting cronjob %s in namespace: %s", + jb_name, namespace) self.k8s.delete_cron_job_action(jb_name, namespace) handled = True if resource_type == 'pod': release_pods = self.k8s.get_namespace_pod( namespace, label_selector) - for pod in release_pods.items: pod_name = pod.metadata.name + + if self.dry_run: + LOG.info('Skipping delete pod during `dry-run`, would ' + 'have deleted pod %s in namespace=%s.', + pod_name, namespace) + continue + LOG.info("Deleting pod %s in namespace: %s", pod_name, namespace) self.k8s.delete_namespace_pod(pod_name, namespace) diff --git a/armada/schemas/armada-chart-schema.yaml b/armada/schemas/armada-chart-schema.yaml index 10efa428..639a9340 100644 --- a/armada/schemas/armada-chart-schema.yaml +++ b/armada/schemas/armada-chart-schema.yaml @@ -52,6 +52,12 @@ data: type: array items: type: string + protected: + type: object + properties: + continue_processing: + type: boolean + additionalProperties: false test: type: boolean timeout: diff --git a/armada/tests/unit/handlers/test_armada.py b/armada/tests/unit/handlers/test_armada.py index 0c6e0d53..57d2f201 100644 --- a/armada/tests/unit/handlers/test_armada.py +++ b/armada/tests/unit/handlers/test_armada.py @@ -19,7 +19,7 @@ from armada import const from armada.handlers import armada from armada.tests.unit import base from armada.utils.release import release_prefixer - +from armada.exceptions.armada_exceptions import ProtectedReleaseException TEST_YAML = """ --- @@ -42,6 +42,31 @@ data: chart_group: - example-chart-1 - example-chart-2 + - example-chart-3 +--- +schema: armada/Chart/v1 +metadata: + schema: metadata/Document/v1 + name: example-chart-3 +data: + chart_name: test_chart_3 + release: test_chart_3 + namespace: test + values: {} + source: + type: local + location: /tmp/dummy/armada + subpath: chart_3 + dependencies: [] + protected: + continue_processing: false + wait: + timeout: 10 + upgrade: + no_hooks: false + options: + force: true + recreate_pods: true --- schema: armada/Chart/v1 metadata: @@ -57,6 +82,8 @@ data: location: /tmp/dummy/armada subpath: chart_2 dependencies: [] + protected: + continue_processing: true wait: timeout: 10 upgrade: @@ -85,7 +112,8 @@ data: """ CHART_SOURCES = [('git://github.com/dummy/armada', 'chart_1'), - ('/tmp/dummy/armada', 'chart_2')] + ('/tmp/dummy/armada', 'chart_2'), + ('/tmp/dummy/armada', 'chart_3')] class ArmadaHandlerTestCase(base.ArmadaTestCase): @@ -124,6 +152,9 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): 'dependencies': [], 'chart_name': 'test_chart_2', 'namespace': 'test', + 'protected': { + 'continue_processing': True + }, 'release': 'test_chart_2', 'source': { 'location': '/tmp/dummy/armada', @@ -143,6 +174,34 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): } } } + }, + { + 'chart': { + 'dependencies': [], + 'chart_name': 'test_chart_3', + 'namespace': 'test', + 'protected': { + 'continue_processing': False + }, + 'release': 'test_chart_3', + 'source': { + 'location': '/tmp/dummy/armada', + 'subpath': 'chart_3', + 'type': 'local' + }, + 'source_dir': CHART_SOURCES[2], + 'values': {}, + 'wait': { + 'timeout': 10 + }, + 'upgrade': { + 'no_hooks': False, + 'options': { + 'force': True, + 'recreate_pods': True + } + } + } } ], 'description': 'this is a test', @@ -180,43 +239,6 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): 'git://github.com/dummy/armada', 'master', auth_method=None, proxy_server=None) - @mock.patch.object(armada, 'source') - @mock.patch('armada.handlers.armada.Tiller') - def test_pre_flight_ops_with_failed_releases(self, mock_tiller, - mock_source): - """Test pre-flight functions uninstalls failed Tiller releases.""" - yaml_documents = list(yaml.safe_load_all(TEST_YAML)) - armada_obj = armada.Armada(yaml_documents) - - # Mock methods called by `pre_flight_ops()`. - m_tiller = mock_tiller.return_value - m_tiller.tiller_status.return_value = True - mock_source.git_clone.return_value = CHART_SOURCES[0][0] - - # Only the first two releases failed and should be uninstalled. Armada - # looks at index [4] for each release to determine the status. - m_tiller.list_charts.return_value = [ - ['armada-test_chart_1', None, None, None, const.STATUS_FAILED], - ['armada-test_chart_2', None, None, None, const.STATUS_FAILED], - [None, None, None, None, const.STATUS_DEPLOYED] - ] - - self._test_pre_flight_ops(armada_obj) - - # Assert both failed releases were uninstalled. - m_tiller.uninstall_release.assert_has_calls([ - mock.call('armada-test_chart_1'), - mock.call('armada-test_chart_2') - ]) - - mock_tiller.assert_called_once_with(tiller_host=None, - tiller_namespace='kube-system', - tiller_port=44134, - dry_run=False) - mock_source.git_clone.assert_called_once_with( - 'git://github.com/dummy/armada', 'master', auth_method=None, - proxy_server=None) - @mock.patch.object(armada, 'source') @mock.patch('armada.handlers.armada.Tiller') def test_post_flight_ops(self, mock_tiller, mock_source): @@ -267,6 +289,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): expected_install_release_calls = [] expected_update_release_calls = [] + expected_uninstall_release_calls = [] for c in charts: chart = c['chart'] @@ -291,28 +314,63 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): ) ) else: - upgrade = chart.get('upgrade', {}) - disable_hooks = upgrade.get('no_hooks', False) - force = upgrade.get('force', False) - recreate_pods = upgrade.get('recreate_pods', False) + target_release = None + for known_release in known_releases: + if known_release[0] == release_name: + target_release = known_release + break + if target_release: + status = target_release[4] + if status == const.STATUS_FAILED: + protected = chart.get('protected', {}) + if not protected: + expected_uninstall_release_calls.append( + mock.call(release_name)) + expected_install_release_calls.append( + mock.call( + mock_chartbuilder().get_helm_chart(), + "{}-{}".format( + armada_obj.manifest['armada'][ + 'release_prefix'], + chart['release']), + chart['namespace'], + values=yaml.safe_dump(chart['values']), + wait=this_chart_should_wait, + timeout=chart['wait']['timeout'] + ) + ) + else: + p_continue = protected.get( + 'continue_processing', False) + if p_continue: + continue + else: + break - expected_update_release_calls.append( - mock.call( - mock_chartbuilder().get_helm_chart(), - "{}-{}".format(armada_obj.manifest['armada'][ - 'release_prefix'], - chart['release']), - chart['namespace'], - pre_actions={}, - post_actions={}, - disable_hooks=disable_hooks, - force=force, - recreate_pods=recreate_pods, - values=yaml.safe_dump(chart['values']), - wait=this_chart_should_wait, - timeout=chart['wait']['timeout'] - ) - ) + if status == const.STATUS_DEPLOYED: + upgrade = chart.get('upgrade', {}) + disable_hooks = upgrade.get('no_hooks', False) + force = upgrade.get('force', False) + recreate_pods = upgrade.get('recreate_pods', False) + + expected_update_release_calls.append( + mock.call( + mock_chartbuilder().get_helm_chart(), + "{}-{}".format( + armada_obj.manifest['armada'][ + 'release_prefix'], + chart['release']), + chart['namespace'], + pre_actions={}, + post_actions={}, + disable_hooks=disable_hooks, + force=force, + recreate_pods=recreate_pods, + values=yaml.safe_dump(chart['values']), + wait=this_chart_should_wait, + timeout=chart['wait']['timeout'] + ) + ) # Verify that at least 1 release is either installed or updated. self.assertTrue( @@ -364,6 +422,42 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase): ] self._test_sync(known_releases) + def test_armada_sync_with_unprotected_releases(self): + c1 = 'armada-test_chart_1' + + known_releases = [ + [c1, None, self._get_chart_by_name(c1), None, + const.STATUS_FAILED] + ] + self._test_sync(known_releases) + + def test_armada_sync_with_protected_releases_continue(self): + c1 = 'armada-test_chart_1' + c2 = 'armada-test_chart_2' + + known_releases = [ + [c2, None, self._get_chart_by_name(c2), None, + const.STATUS_FAILED], + [c1, None, self._get_chart_by_name(c1), None, + const.STATUS_FAILED] + ] + self._test_sync(known_releases) + + def test_armada_sync_with_protected_releases_halt(self): + c3 = 'armada-test_chart_3' + + known_releases = [ + [c3, None, self._get_chart_by_name(c3), None, + const.STATUS_FAILED] + ] + + def _test_method(): + self._test_sync(known_releases) + + self.assertRaises( + ProtectedReleaseException, + _test_method) + @mock.patch.object(armada.Armada, 'post_flight_ops') @mock.patch.object(armada.Armada, 'pre_flight_ops') @mock.patch('armada.handlers.armada.ChartBuilder') diff --git a/doc/source/operations/exceptions/armada-exceptions.inc b/doc/source/operations/exceptions/armada-exceptions.inc index df747f38..42a23bab 100644 --- a/doc/source/operations/exceptions/armada-exceptions.inc +++ b/doc/source/operations/exceptions/armada-exceptions.inc @@ -25,3 +25,8 @@ :members: :show-inheritance: :undoc-members: + * - ProtectedReleaseException + - .. autoexception:: armada.exceptions.armada_exceptions.ProtectedReleaseException + :members: + :show-inheritance: + :undoc-members: diff --git a/doc/source/operations/guide-build-armada-yaml.rst b/doc/source/operations/guide-build-armada-yaml.rst index fab90291..51cef92d 100644 --- a/doc/source/operations/guide-build-armada-yaml.rst +++ b/doc/source/operations/guide-build-armada-yaml.rst @@ -95,6 +95,9 @@ Chart +-----------------+----------+---------------------------------------------------------------------------------------+ | wait | object | contains wait information such as (timeout, labels) | +-----------------+----------+---------------------------------------------------------------------------------------+ +| protected | object | do not delete FAILED releases when encountered from previous run (provide the | +| | | 'continue_processing' bool to continue or halt execution (default: halt)) | ++-----------------+----------+---------------------------------------------------------------------------------------+ | test | bool | run pre-defined helm tests helm in a chart | +-----------------+----------+---------------------------------------------------------------------------------------+ | install | object | install the chart into your Kubernetes cluster | @@ -176,6 +179,8 @@ Chart Example namespace: default wait: timeout: 100 + protected: + continue_processing: false install: no_hooks: false upgrade: @@ -237,7 +242,7 @@ Source Example wait: timeout: 100 labels: - component: blog + component: blog install: no_hooks: false upgrade: