344 lines
12 KiB
Python
344 lines
12 KiB
Python
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
"""Deployment group manager module
|
|
|
|
Encapsulates classes and functions related to the management and use of
|
|
deployment groups used during baremetal provisioning.
|
|
"""
|
|
import logging
|
|
|
|
import networkx as nx
|
|
|
|
from .deployment_group import DeploymentGroup
|
|
from .deployment_group import Stage
|
|
from .errors import DeploymentGroupCycleError
|
|
from .errors import DeploymentGroupStageError
|
|
from .errors import UnknownDeploymentGroupError
|
|
from .errors import UnknownNodeError
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class DeploymentGroupManager:
|
|
"""Manager object to control ordering and cross-group interactions
|
|
|
|
:param group_dict_list: list of group entries translated from a
|
|
DeploymentStrategy document.
|
|
:param node_lookup: function to lookup nodes based on group selectors
|
|
"""
|
|
|
|
def __init__(self, group_dict_list, node_lookup):
|
|
LOG.debug("Initializing DeploymentGroupManager")
|
|
|
|
# the raw input
|
|
self._group_dict_list = group_dict_list
|
|
|
|
# A dictionary of all groups by group name. E.g.:
|
|
# {
|
|
# 'group-1': DeploymentGroup(...),
|
|
# }
|
|
self._all_groups = {}
|
|
for group_dict in group_dict_list:
|
|
group = DeploymentGroup(group_dict, node_lookup)
|
|
self._all_groups[group.name] = group
|
|
|
|
self._group_graph = _generate_group_graph(
|
|
self._all_groups.values()
|
|
)
|
|
self._group_order = list(nx.topological_sort(self._group_graph))
|
|
|
|
# Setup nodes.
|
|
# self.all_nodes is a dictionary of all nodes by node name,
|
|
# representing each node's status of deployment. E.g.:
|
|
# { 'node-01' : Stage.NOT_STARTED}
|
|
#
|
|
# each group is also updated with group.actionable_nodes based on group
|
|
# ordering (deduplication)
|
|
self._all_nodes = {}
|
|
self._calculate_nodes()
|
|
|
|
def get_next_group(self, stage):
|
|
"""Get the next eligible group name to use for the provided stage
|
|
|
|
Finds the next group that has as status eligible for the stage
|
|
provided.
|
|
Returns None if there are no groups ready for the stage
|
|
"""
|
|
prev_stage = Stage.previous_stage(stage)
|
|
for group in self._group_order:
|
|
if self._all_groups[group].stage in prev_stage:
|
|
return self._all_groups[group]
|
|
return None
|
|
|
|
def group_list(self):
|
|
"""Return a list of DeploymentGroup objects in group order"""
|
|
summary = []
|
|
for group_nm in self._group_order:
|
|
group = self._all_groups[group_nm]
|
|
summary.append(group)
|
|
return summary
|
|
|
|
def critical_groups_failed(self):
|
|
"""Return True if any critical groups have failed"""
|
|
for group in self._all_groups.values():
|
|
if group.stage == Stage.FAILED and group.critical:
|
|
return True
|
|
return False
|
|
|
|
def evaluate_group_succ_criteria(self, group_name, stage):
|
|
"""Checks a group against its success criteria for a stage
|
|
|
|
:param group_name: the name of the group to check
|
|
:param stage: Stage.PREPARED or Stage.DEPLOYED
|
|
Returns a boolean: True = success, False = failure.
|
|
"""
|
|
failed_criteria = self.get_group_failures_for_stage(group_name, stage)
|
|
if failed_criteria:
|
|
# Logging of criteria has already occurred during checking.
|
|
self.mark_group_failed(group_name)
|
|
LOG.info("Group %s has failed to meet its success criteria while "
|
|
"trying to move to stage: %s",
|
|
group_name, stage)
|
|
return False
|
|
elif stage == Stage.DEPLOYED:
|
|
self.mark_group_deployed(group_name)
|
|
LOG.info("Group %s has met its success criteria and is "
|
|
"successfully deployed (%s)", group_name, stage)
|
|
return True
|
|
elif stage == Stage.PREPARED:
|
|
self.mark_group_prepared(group_name)
|
|
LOG.info("Group %s has met its success criteria and is "
|
|
"now set to stage %s", group_name, stage)
|
|
return True
|
|
|
|
def report_group_summary(self):
|
|
"""Reports the status of all groups handled by this deployment"""
|
|
LOG.info("===== Group Summary =====")
|
|
for group in self.group_list():
|
|
LOG.info(" Group %s%s ended with stage: %s",
|
|
group.name,
|
|
" [Critical]" if group.critical else "",
|
|
group.stage)
|
|
LOG.info("===== End Group Summary =====")
|
|
|
|
def report_node_summary(self):
|
|
"""Reports the status of all nodes handled by this deployment"""
|
|
# Ordered stages
|
|
stages = [Stage.NOT_STARTED,
|
|
Stage.PREPARED,
|
|
Stage.DEPLOYED,
|
|
Stage.FAILED]
|
|
|
|
LOG.info("===== Node Summary =====")
|
|
for stage in stages:
|
|
nodes = self.get_nodes(stage=stage)
|
|
LOG.info(" Nodes %s: %s", stage, ", ".join(nodes))
|
|
LOG.info("===== End Node Summary =====")
|
|
|
|
#
|
|
# Methods that support setup of the nodes in groups
|
|
#
|
|
|
|
def _calculate_nodes(self):
|
|
"""Calculate the mapping of all compute nodes
|
|
|
|
Uses self.group_order, self.all_groups
|
|
"""
|
|
for name in self._group_order:
|
|
group = self._all_groups[name]
|
|
known_nodes = set(self._all_nodes.keys())
|
|
_update_group_actionable_nodes(group, known_nodes)
|
|
for node in group.full_nodes:
|
|
self._all_nodes[node] = Stage.NOT_STARTED
|
|
|
|
#
|
|
# Methods for managing marking the stage of processing for a group
|
|
#
|
|
|
|
def mark_group_failed(self, group_name):
|
|
"""Sets status for a group and all successors(dependents) to failed
|
|
|
|
:param group_name: The name of the group to fail
|
|
"""
|
|
group = self._find_group(group_name)
|
|
group.stage = Stage.FAILED
|
|
successors = list(self._group_graph.successors(group_name))
|
|
if successors:
|
|
LOG.info("Group %s (now FAILED) has dependent groups %s",
|
|
group_name, ", ".join(successors))
|
|
for name in successors:
|
|
self.mark_group_failed(name)
|
|
|
|
def mark_group_prepared(self, group_name):
|
|
"""Sets a group to the Stage.PREPARED stage"""
|
|
group = self._find_group(group_name)
|
|
group.stage = Stage.PREPARED
|
|
|
|
def mark_group_deployed(self, group_name):
|
|
"""Sets a group to the Stage.DEPLOYED stage"""
|
|
group = self._find_group(group_name)
|
|
group.stage = Stage.DEPLOYED
|
|
|
|
def _find_group(self, group_name):
|
|
"""Wrapper for accessing groups from self.all_groups"""
|
|
group = self._all_groups.get(group_name)
|
|
if group is None:
|
|
raise UnknownDeploymentGroupError(
|
|
"Group name {} does not refer to a known group".format(
|
|
group_name)
|
|
)
|
|
return group
|
|
|
|
def get_group_failures_for_stage(self, group_name, stage):
|
|
"""Check if the nodes of a group cause the group to fail
|
|
|
|
Returns the list of failed success criteria, or [] if the group is
|
|
successful
|
|
This is only for checking transitions to PREPARED and DEPLOYED. The
|
|
valid stages for input to this method are Stage.PREPARED and
|
|
Stage.DEPLOYED.
|
|
Note that nodes that are DEPLOYED count as PREPARED, but not
|
|
the other way around.
|
|
"""
|
|
if stage not in [Stage.DEPLOYED, Stage.PREPARED]:
|
|
raise DeploymentGroupStageError(
|
|
"The stage {} is not valid for checking group"
|
|
" failures.".format(stage))
|
|
success_nodes = set()
|
|
# deployed nodes count as success for prepared and deployed
|
|
success_nodes.update(self.get_nodes(Stage.DEPLOYED))
|
|
if stage == Stage.PREPARED:
|
|
success_nodes.update(self.get_nodes(Stage.PREPARED))
|
|
group = self._find_group(group_name)
|
|
return group.get_failed_success_criteria(success_nodes)
|
|
|
|
#
|
|
# Methods for handling nodes
|
|
#
|
|
|
|
def fail_unsuccessful_nodes(self, group, successes):
|
|
"""Fail nodes that were not successful in a group's actionable list
|
|
|
|
:param group: the group to check
|
|
:param successes: the list of successful nodes from processing
|
|
|
|
This makes an assumption that all actionable nodes should be in a list
|
|
of successes if they are to be considered successful. If the success
|
|
list is empty, all the actionable nodes in the group would be
|
|
considered failed.
|
|
"""
|
|
# Mark non-successes as failed
|
|
failed_nodes = set(group.actionable_nodes).difference(set(successes))
|
|
for node_name in failed_nodes:
|
|
self.mark_node_failed(node_name)
|
|
|
|
def mark_node_deployed(self, node_name):
|
|
"""Mark a node as deployed"""
|
|
self._set_node_stage(node_name, Stage.DEPLOYED)
|
|
|
|
def mark_node_prepared(self, node_name):
|
|
"""Mark a node as prepared"""
|
|
self._set_node_stage(node_name, Stage.PREPARED)
|
|
|
|
def mark_node_failed(self, node_name):
|
|
"""Mark a node as failed"""
|
|
self._set_node_stage(node_name, Stage.FAILED)
|
|
|
|
def _set_node_stage(self, node_name, stage):
|
|
"""Find and set a node's stage to the specified stage"""
|
|
if node_name in self._all_nodes:
|
|
self._all_nodes[node_name] = stage
|
|
else:
|
|
raise UnknownNodeError("The specified node {} does not"
|
|
" exist in this manager".format(node_name))
|
|
|
|
def get_nodes(self, stage=None):
|
|
"""Get a list of nodes that have the specified status"""
|
|
if stage is None:
|
|
return [name for name in self._all_nodes]
|
|
|
|
return [name for name, n_stage
|
|
in self._all_nodes.items()
|
|
if n_stage == stage]
|
|
|
|
|
|
def _update_group_actionable_nodes(group, known_nodes):
|
|
"""Updates a group's actionable nodes
|
|
|
|
Acitonable nodes is the group's (full_nodes - known_nodes)
|
|
"""
|
|
LOG.debug("Known nodes before processing group %s is %s",
|
|
group.name,
|
|
", ".join(known_nodes))
|
|
|
|
group_nodes = set(group.full_nodes)
|
|
group.actionable_nodes = list(group_nodes.difference(known_nodes))
|
|
LOG.debug("Group %s set actionable_nodes to %s. "
|
|
"Full node list for this group is %s",
|
|
group.name,
|
|
", ".join(group.actionable_nodes),
|
|
", ".join(group.full_nodes))
|
|
|
|
|
|
def _generate_group_graph(groups):
|
|
"""Create the directed graph of groups
|
|
|
|
:param groups: An iterable of DeploymentGroup objects
|
|
returns a directed graph of group names
|
|
"""
|
|
LOG.debug("Generating directed graph of groups based on dependencies")
|
|
graph = nx.DiGraph()
|
|
# Add all groups as graph nodes. It is not strictly necessary to do two
|
|
# loops here, but n is small and for obviousness.
|
|
for group in groups:
|
|
graph.add_node(group.name)
|
|
|
|
# Add all edges
|
|
for group in groups:
|
|
if group.depends_on:
|
|
for parent in group.depends_on:
|
|
LOG.debug("%s has parent %s", group.name, parent)
|
|
graph.add_edge(parent, group.name)
|
|
else:
|
|
LOG.debug("%s is not dependent upon any other groups", group.name)
|
|
|
|
_detect_cycles(graph)
|
|
return graph
|
|
|
|
|
|
def _detect_cycles(graph):
|
|
"""Detect if there are cycles between the groups
|
|
|
|
Raise a DeploymentGroupCycleError if there are any circular
|
|
dependencies
|
|
"""
|
|
LOG.debug("Detecting cycles in graph")
|
|
circ_deps = []
|
|
try:
|
|
circ_deps = list(nx.find_cycle(graph))
|
|
except nx.NetworkXNoCycle:
|
|
LOG.info('There are no cycles detected in the graph')
|
|
pass
|
|
|
|
if circ_deps:
|
|
involved_nodes = set()
|
|
# a value in this list is like: ('group1', 'group2')
|
|
for dep in circ_deps:
|
|
involved_nodes.update(dep)
|
|
raise DeploymentGroupCycleError(
|
|
"The following are involved in a circular dependency:"
|
|
" {}".format(", ".join(involved_nodes))
|
|
)
|