From c838b2def00bc9fc40e0b83013207db89a994b05 Mon Sep 17 00:00:00 2001 From: Sean Eagan Date: Mon, 4 Mar 2019 16:04:33 -0600 Subject: [PATCH] Exclude generated objects from wait logic This excludes the following generated objects from wait logic: 1. cronjob-generated jobs: these are not directly part of the release, so better not to wait on them. if there is a desire to wait on initial cronjob success, we can add a separate "type: cronjob" wait for this for that purpose. 2. job-generated pods: for the purposes of waiting on jobs, one should ensure their configuration includes a "type: job" wait. Once controller-based waits are included by default we can also consider excluding controller-owned pods from the "type: pod" wait, as those will be handled by the controller-based waits then. Change-Id: Ibf56c6fef9ef72b62da0b066c92c5f29ee4ecb5f --- armada/handlers/wait.py | 60 +++++++++++++++------ armada/tests/unit/handlers/test_wait.py | 70 +++++++++++++++++++++---- 2 files changed, 106 insertions(+), 24 deletions(-) diff --git a/armada/handlers/wait.py b/armada/handlers/wait.py index 721d6a1f..4dd9d444 100644 --- a/armada/handlers/wait.py +++ b/armada/handlers/wait.py @@ -154,15 +154,23 @@ class ResourceWait(ABC): ''' pass - def include_resource(self, resource): + def get_exclude_reason(self, resource): ''' - Test to include or exclude a resource in a wait operation. This method - can be used to exclude resources that should not be included in wait - operations (e.g. test pods). + If a resource should be excluded from the wait, returns a message + to explain why. Unless overridden, all resources are included. :param resource: resource to test - :returns: boolean representing test result + :returns: string representing exclude reason ''' - return True + return None + + def include_resource(self, resource): + exclude_reason = self.get_exclude_reason(resource) + + if exclude_reason: + LOG.debug('Excluding %s %s from wait: %s', self.resource_type, + resource.metadata.name, exclude_reason) + + return not exclude_reason def handle_resource(self, resource): resource_name = resource.metadata.name @@ -353,18 +361,21 @@ class PodWait(ResourceWait): resource_type, chart_wait, labels, chart_wait.k8s.client.list_namespaced_pod, **kwargs) - def include_resource(self, resource): + def get_exclude_reason(self, resource): pod = resource - include = not is_test_pod(pod) - # NOTE(drewwalters96): Test pods may cause wait operations to fail - # when old resources remain from previous upgrades/tests. Indicate that - # test pods should not be included in wait operations. - if not include: - LOG.debug('Test pod %s will be skipped during wait operations.', - pod.metadata.name) + # Exclude helm test pods + # TODO: Possibly exclude other helm hook pods/jobs (besides tests)? + if is_test_pod(pod): + return 'helm test pod' - return include + # Exclude job pods + # TODO: Once controller-based waits are enabled by default, ignore + # controller-owned pods as well. + if has_owner(pod, 'Job'): + return 'generated by job (wait on that instead if not already)' + + return None def is_resource_ready(self, resource): pod = resource @@ -392,6 +403,15 @@ class JobWait(ResourceWait): resource_type, chart_wait, labels, chart_wait.k8s.batch_api.list_namespaced_job, **kwargs) + def get_exclude_reason(self, resource): + job = resource + + # Exclude cronjob jobs + if has_owner(job, 'CronJob'): + return 'generated by cronjob (not part of release)' + + return None + def is_resource_ready(self, resource): job = resource name = job.metadata.name @@ -406,6 +426,16 @@ class JobWait(ResourceWait): return (msg.format(name), True) +def has_owner(resource, kind=None): + owner_references = resource.metadata.owner_references or [] + + for owner in owner_references: + if not kind or kind == owner.kind: + return True + + return False + + CountOrPercent = collections.namedtuple('CountOrPercent', 'number is_percent source') diff --git a/armada/tests/unit/handlers/test_wait.py b/armada/tests/unit/handlers/test_wait.py index b64a437d..5a04c0c1 100644 --- a/armada/tests/unit/handlers/test_wait.py +++ b/armada/tests/unit/handlers/test_wait.py @@ -194,12 +194,13 @@ class PodWaitTestCase(base.ArmadaTestCase): def test_include_resource(self): - def mock_resource(annotations): + def mock_resource(annotations={}, owner_references=None): resource = mock.Mock() resource.metadata.annotations = annotations + resource.metadata.owner_references = owner_references return resource - test_resources = [ + test_pods = [ mock_resource({ 'key': 'value', 'helm.sh/hook': 'test-success' @@ -207,18 +208,69 @@ class PodWaitTestCase(base.ArmadaTestCase): mock_resource({'helm.sh/hook': 'test-failure'}), mock_resource({'helm.sh/hook': 'test-success,pre-install'}) ] - non_test_resources = [ + job_pods = [ + mock_resource(owner_references=[mock.Mock(kind='Job')]), + mock_resource(owner_references=[ + mock.Mock(kind='NotAJob'), + mock.Mock(kind='Job') + ]) + ] + included_pods = [ + mock_resource(), + mock_resource(owner_references=[]), mock_resource({'helm.sh/hook': 'pre-install'}), mock_resource({'key': 'value'}), - mock_resource({}) + mock_resource(owner_references=[mock.Mock(kind='NotAJob')]) ] unit = self.get_unit({}) - # Validate test resources excluded - for resource in test_resources: - self.assertFalse(unit.include_resource(resource)) + # Validate test pods excluded + for pod in test_pods: + self.assertFalse(unit.include_resource(pod)) + + # Validate test pods excluded + for pod in job_pods: + self.assertFalse(unit.include_resource(pod)) # Validate other resources included - for resource in non_test_resources: - self.assertTrue(unit.include_resource(resource)) + for pod in included_pods: + self.assertTrue(unit.include_resource(pod)) + + +class JobWaitTestCase(base.ArmadaTestCase): + + def get_unit(self, labels): + return wait.JobWait( + resource_type='job', chart_wait=mock.MagicMock(), labels=labels) + + def test_include_resource(self): + + def mock_resource(annotations={}, owner_references=None): + resource = mock.Mock() + resource.metadata.annotations = annotations + resource.metadata.owner_references = owner_references + return resource + + cronjob_jobs = [ + mock_resource(owner_references=[mock.Mock(kind='CronJob')]), + mock_resource(owner_references=[ + mock.Mock(kind='NotACronJob'), + mock.Mock(kind='CronJob') + ]) + ] + included_jobs = [ + mock_resource(), + mock_resource(owner_references=[]), + mock_resource(owner_references=[mock.Mock(kind='NotAJob')]) + ] + + unit = self.get_unit({}) + + # Validate test pods excluded + for job in cronjob_jobs: + self.assertFalse(unit.include_resource(job)) + + # Validate other resources included + for job in included_jobs: + self.assertTrue(unit.include_resource(job))