diff --git a/src/bin/shipyard_airflow/shipyard_airflow/plugins/drydock_base_operator.py b/src/bin/shipyard_airflow/shipyard_airflow/plugins/drydock_base_operator.py index 8ce5f75a..cb338150 100644 --- a/src/bin/shipyard_airflow/shipyard_airflow/plugins/drydock_base_operator.py +++ b/src/bin/shipyard_airflow/shipyard_airflow/plugins/drydock_base_operator.py @@ -231,6 +231,12 @@ class DrydockBaseOperator(UcpBaseOperator): # Raise Time Out Exception if task_status == 'running' and i == end_range: + # TODO(bryan-strassner) If Shipyard has timed out waiting for + # this task to complete, and Drydock has provided a means + # to cancel a task, that cancellation should be done here. + + # task_failure only exits with an exception, so this is the + # end of processing in the case of a timeout. self.task_failure(False) # Exit 'for' loop if the task is in 'complete' or 'terminated' diff --git a/src/bin/shipyard_airflow/shipyard_airflow/plugins/drydock_nodes.py b/src/bin/shipyard_airflow/shipyard_airflow/plugins/drydock_nodes.py index ee7c70e8..cc720f0e 100644 --- a/src/bin/shipyard_airflow/shipyard_airflow/plugins/drydock_nodes.py +++ b/src/bin/shipyard_airflow/shipyard_airflow/plugins/drydock_nodes.py @@ -126,15 +126,17 @@ class DrydockNodesOperator(DrydockBaseOperator): self.prep_interval, self.prep_timeout) - def _execute_deployment(self, group): + def _execute_deployment(self, group, successful_prepared_nodes): """Execute the deployment of nodes for the group. :param group: The DeploymentGroup to deploy + :param successful_prepared_nodes: Nodes for this group that are + successfully prepared by the prepare nodes step. Returns a QueryTaskResult object """ LOG.info("Group %s is deploying nodes", group.name) - self.node_filter = gen_node_name_filter(group.actionable_nodes) + self.node_filter = gen_node_name_filter(successful_prepared_nodes) task_result = self._execute_task('deploy_nodes', self.dep_interval, self.dep_timeout) @@ -375,12 +377,24 @@ def _process_deployment_groups(dgm, prepare_func, deploy_func): # been marked as failed. continue - # Continue with deployment - dep_qtr = deploy_func(group) - # Mark successes as deployed - for node_name in dep_qtr.successes: - dgm.mark_node_deployed(node_name) - dgm.fail_unsuccessful_nodes(group, dep_qtr.successes) + if prep_qtr.successes: + # Continue with deployment, only for successfully prepared nodes + dep_qtr = deploy_func(group, prep_qtr.successes) + # Mark successes as deployed + for node_name in dep_qtr.successes: + dgm.mark_node_deployed(node_name) + dgm.fail_unsuccessful_nodes(group, dep_qtr.successes) + else: + # TODO(bryan-strassner) Update this message if Drydock provides + # a way to cancel a task, and that method is employed by + # Shipyard upon timeout. + LOG.info("There were no nodes successfully prepared. " + "Deployment will not be attempted for group %s. " + "Success criteria will be immediately checked. " + "If a timeout in the prepare step has occured, it is " + "possible that Drydock is still attempting the prepare " + "task.", + group.name) dgm.evaluate_group_succ_criteria(group.name, Stage.DEPLOYED) diff --git a/src/bin/shipyard_airflow/tests/unit/plugins/test_drydock_nodes_operator.py b/src/bin/shipyard_airflow/tests/unit/plugins/test_drydock_nodes_operator.py index d774fddb..83964781 100644 --- a/src/bin/shipyard_airflow/tests/unit/plugins/test_drydock_nodes_operator.py +++ b/src/bin/shipyard_airflow/tests/unit/plugins/test_drydock_nodes_operator.py @@ -196,7 +196,7 @@ def _gen_pe_func(mode, stand_alone=False): object, it needs to be false, so that the right amount of "self" matches the invocation. """ - def _func(group): + def _func(group, *args): qtr = QueryTaskResult('ti', 'tn') if mode == 'all-success': qtr.successes.extend(group.actionable_nodes) @@ -205,7 +205,7 @@ def _gen_pe_func(mode, stand_alone=False): pass return qtr - def _func_self(self, group): + def _func_self(self, group, *args): return _func(group) if stand_alone: @@ -346,7 +346,8 @@ class TestDrydockNodesOperator: op.join_wait = 0 group = DeploymentGroup(GROUP_DICT, mock.MagicMock()) group.actionable_nodes = ['node1', 'node2', 'node3'] - op._execute_deployment(group) + succ_prep_nodes = ['node1', 'node2', 'node3'] + op._execute_deployment(group, succ_prep_nodes) assert op._execute_task.call_count == 1 assert cns.call_count == 1 @@ -364,7 +365,8 @@ class TestDrydockNodesOperator: op.join_wait = 0 group = DeploymentGroup(GROUP_DICT, mock.MagicMock()) group.actionable_nodes = ['node1', 'node2', 'node3'] - task_res = op._execute_deployment(group) + succ_prep_nodes = ['node1', 'node2', 'node3'] + task_res = op._execute_deployment(group, succ_prep_nodes) assert op._execute_task.call_count == 1 assert cns.call_count == 1 assert 'node4 failed to join Kubernetes' in caplog.text