362 lines
13 KiB
Python
362 lines
13 KiB
Python
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
"""Deployment group module
|
|
|
|
Encapsulates classes and functions that provide core deployment group
|
|
functionality used during baremetal provisioning.
|
|
"""
|
|
from enum import Enum
|
|
import logging
|
|
import operator
|
|
|
|
from .errors import DeploymentGroupLabelFormatError
|
|
from .errors import DeploymentGroupStageError
|
|
from .errors import InvalidDeploymentGroupError
|
|
from .errors import InvalidDeploymentGroupNodeLookupError
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
def check_label_format(label_string):
|
|
"""Validates that a label_string is in key:value format.
|
|
|
|
Raises DeploymentGroupLabelFormatError if the value is not compliant.
|
|
"""
|
|
split = label_string.split(":")
|
|
if not len(split) == 2:
|
|
raise DeploymentGroupLabelFormatError(
|
|
"Label {} is formatted incorrectly. One : (colon) character is "
|
|
"required, and the label must be in key:value format".format(
|
|
label_string)
|
|
)
|
|
for v in split:
|
|
if v.strip() == "":
|
|
raise DeploymentGroupLabelFormatError(
|
|
"Label {} is formatted incorrectly. The values on either side "
|
|
"of the colon character must not be empty.".format(
|
|
label_string)
|
|
)
|
|
|
|
|
|
class Stage(Enum):
|
|
"""Valid values for baremetal node and deployment group stages of
|
|
deployment
|
|
"""
|
|
# A node that has not yet started deployment. The default.
|
|
NOT_STARTED = 'NOT_STARTED'
|
|
# A node that has finished the prepare_node stage successfully
|
|
PREPARED = 'PREPARED'
|
|
# A node that has finished the deploy_node stage successfully
|
|
DEPLOYED = 'DEPLOYED'
|
|
# A node that has failed to complete in any step.
|
|
FAILED = 'FAILED'
|
|
|
|
@classmethod
|
|
def is_complete(cls, stage):
|
|
return stage in [cls.DEPLOYED, cls.FAILED]
|
|
|
|
@classmethod
|
|
def previous_stage(cls, stage):
|
|
"""The valid states before the supplied state"""
|
|
if stage == cls.NOT_STARTED:
|
|
return []
|
|
if stage == cls.PREPARED:
|
|
return [cls.NOT_STARTED]
|
|
if stage == cls.DEPLOYED:
|
|
return [cls.PREPARED]
|
|
if stage == cls.FAILED:
|
|
return [cls.NOT_STARTED, cls.PREPARED]
|
|
else:
|
|
raise DeploymentGroupStageError("{} is not a valid stage".format(
|
|
str(stage)))
|
|
|
|
|
|
class GroupNodeSelector:
|
|
"""GroupNodeSelector object
|
|
|
|
:param selector_dict: dictionary representing the possible selector values
|
|
|
|
Encapsulates the criteria defining the selector for a deployment group.
|
|
Example selector_dict::
|
|
|
|
{
|
|
'node_names': [],
|
|
'node_labels': [],
|
|
'node_tags': ['control'],
|
|
'rack_names': ['rack03'],
|
|
}
|
|
"""
|
|
def __init__(self, selector_dict):
|
|
self.node_names = selector_dict.get('node_names', [])
|
|
self.node_labels = selector_dict.get('node_labels', [])
|
|
self.node_tags = selector_dict.get('node_tags', [])
|
|
self.rack_names = selector_dict.get('rack_names', [])
|
|
|
|
for label in self.node_labels:
|
|
check_label_format(label)
|
|
|
|
# A selector is an "all_selector" if there are no criteria specified.
|
|
self.all_selector = not any([self.node_names, self.node_labels,
|
|
self.node_tags, self.rack_names])
|
|
if self.all_selector:
|
|
LOG.debug("Selector values select all available nodes")
|
|
|
|
def get_node_labels_as_dict(self):
|
|
return {label.split(':')[0].strip(): label.split(':')[1].strip()
|
|
for label in self.node_labels}
|
|
|
|
|
|
class SuccessCriteria:
|
|
"""Defines the success criteria for a deployment group
|
|
|
|
:param criteria: a dictionary containing up to 3 fields in
|
|
percent_successful_nodes, minimum_successful_nodes,
|
|
maximum_failed_nodes
|
|
|
|
If no criteria are specified, all results are considered a success
|
|
"""
|
|
def __init__(self, criteria):
|
|
if not criteria:
|
|
self._always_succeed = True
|
|
return
|
|
|
|
self._always_succeed = False
|
|
# set the criteria or let them be None
|
|
self.pct_succ_nodes = criteria.get('percent_successful_nodes')
|
|
self.min_succ_nodes = criteria.get('minimum_successful_nodes')
|
|
self.max_failed_nodes = criteria.get('maximum_failed_nodes')
|
|
|
|
def get_failed(self, succ_list, all_nodes_list):
|
|
"""Determine which criteria have failed.
|
|
|
|
:param succ_list: A list of names of nodes that have successfully
|
|
completed a stage
|
|
:param all_nodes_list: A list of all node names that are to be
|
|
evaluated against.
|
|
|
|
Using the provided list of successful nodes, and the list of all
|
|
nodes, check which of the success criteria have failed to have been
|
|
met.
|
|
"""
|
|
failures = []
|
|
|
|
# If no criteria, or list of all nodes is empty, return empty list
|
|
if self._always_succeed or len(all_nodes_list) == 0:
|
|
return failures
|
|
|
|
succ_set = set(succ_list)
|
|
all_set = set(all_nodes_list)
|
|
|
|
all_size = len(all_set)
|
|
succ_size = len(succ_set.intersection(all_set))
|
|
fail_size = len(all_set.difference(succ_set))
|
|
actual_pct_succ = succ_size / all_size * 100
|
|
|
|
failures.extend(self._check("percent_successful_nodes",
|
|
actual_pct_succ, operator.ge,
|
|
self.pct_succ_nodes))
|
|
failures.extend(self._check("minimum_successful_nodes", succ_size,
|
|
operator.ge, self.min_succ_nodes))
|
|
|
|
failures.extend(self._check("maximum_failed_nodes", fail_size,
|
|
operator.le, self.max_failed_nodes))
|
|
return failures
|
|
|
|
def _check(self, name, actual, op, needed):
|
|
"""Evaluates a single criteria
|
|
|
|
:param name: name of the check
|
|
:param actual: the result that was achieved (LHS)
|
|
:param op: operator used for comparison
|
|
:param needed: the threshold of success (RHS). If this parameter
|
|
is None, the criteria is ignored as "successful" because it
|
|
was not set as a needed criteria
|
|
|
|
Returns a list containing the failure dictionary if the comparison
|
|
fails or and empty list if check is successful.
|
|
"""
|
|
if needed is None:
|
|
LOG.info(" - %s criteria not specified, not evaluated", name)
|
|
return []
|
|
|
|
if op(actual, needed):
|
|
LOG.info(" - %s succeeded, %s %s %s", name, actual, op.__name__,
|
|
needed)
|
|
return []
|
|
else:
|
|
fail = {"criteria": name, "needed": needed, "actual": actual}
|
|
LOG.info(" - %s failed, %s %s %s", name, actual, op.__name__,
|
|
needed)
|
|
return [fail]
|
|
|
|
|
|
class DeploymentGroup:
|
|
"""DeploymentGroup object representing a deployment group
|
|
|
|
:param group_dict: dictionary representing a group
|
|
:param node_lookup: an injected function that will perform node lookup for
|
|
a group. Function must accept an iterable of GroupNodeSelector and
|
|
return a string iterable of node names (or empty iterable if there are
|
|
no node names)
|
|
|
|
Example group_dict::
|
|
|
|
{
|
|
'name': 'control-nodes',
|
|
'critical': True,
|
|
'depends_on': ['ntp-node'],
|
|
'selectors': [
|
|
{
|
|
'node_names': [],
|
|
'node_labels': [],
|
|
'node_tags': ['control'],
|
|
'rack_names': ['rack03'],
|
|
},
|
|
],
|
|
'success_criteria': {
|
|
'percent_successful_nodes': 90,
|
|
'minimum_successful_nodes': 3,
|
|
'maximum_failed_nodes': 1,
|
|
},
|
|
}
|
|
"""
|
|
def __init__(self, group_dict, node_lookup):
|
|
# store the original dictionary
|
|
self._group_dict = group_dict
|
|
|
|
# fields required by schema
|
|
self._check_required_fields()
|
|
|
|
self.critical = group_dict['critical']
|
|
self.depends_on = group_dict['depends_on']
|
|
self.name = group_dict['name']
|
|
|
|
self.selectors = []
|
|
for selector_dict in group_dict['selectors']:
|
|
self.selectors.append(GroupNodeSelector(selector_dict))
|
|
if not self.selectors:
|
|
# no selectors means add an "all" selector
|
|
self.selectors.append(GroupNodeSelector({}))
|
|
|
|
self.success_criteria = SuccessCriteria(
|
|
group_dict.get('success_criteria', {})
|
|
)
|
|
|
|
# all groups start as NOT_STARTED
|
|
self.__stage = None
|
|
self.stage = Stage.NOT_STARTED
|
|
|
|
# node_lookup function for use with this deployment group
|
|
# lookup the full list of nodes for this group's selectors
|
|
self.node_lookup = node_lookup
|
|
self.full_nodes = self._calculate_all_nodes()
|
|
|
|
# actionable_nodes is set up based on multi-group interaction.
|
|
# Only declaring the field here. Used for deduplicaiton.
|
|
self.actionable_nodes = []
|
|
|
|
@property
|
|
def stage(self):
|
|
return self.__stage
|
|
|
|
@stage.setter
|
|
def stage(self, stage):
|
|
valid_prior = Stage.previous_stage(stage)
|
|
pre_change_stage = self.__stage
|
|
if self.__stage == stage:
|
|
return
|
|
elif self.__stage is None and not valid_prior:
|
|
self.__stage = stage
|
|
elif self.__stage in valid_prior:
|
|
self.__stage = stage
|
|
else:
|
|
raise DeploymentGroupStageError(
|
|
"{} is not a valid stage for a group in stage {}".format(
|
|
stage, self.__stage
|
|
))
|
|
LOG.info("Setting group %s with %s -> %s",
|
|
self.name,
|
|
pre_change_stage,
|
|
stage)
|
|
|
|
def _check_required_fields(self):
|
|
"""Checks for required input fields and errors if any are missing"""
|
|
for attr in ['critical', 'depends_on', 'name', 'selectors']:
|
|
try:
|
|
value = self._group_dict[attr]
|
|
LOG.debug("Attribute %s has value %s", attr, str(value))
|
|
except KeyError:
|
|
raise InvalidDeploymentGroupError(
|
|
"Attribute '{}' is required as input to create a "
|
|
"DeploymentGroup".format(attr))
|
|
|
|
def _calculate_all_nodes(self):
|
|
"""Invoke the node_lookup to retrieve nodes
|
|
|
|
After construction of the DeploymentGroup, this method is generally
|
|
not useful as the results are stored in self.full_nodes
|
|
"""
|
|
LOG.debug("Beginning lookup of nodes for group %s", self.name)
|
|
nodes = self.node_lookup(self.selectors)
|
|
if nodes is None:
|
|
nodes = []
|
|
try:
|
|
node_list = list(nodes)
|
|
except TypeError:
|
|
raise InvalidDeploymentGroupNodeLookupError(
|
|
"The node lookup function supplied to the DeploymentGroup "
|
|
"is not an iterable"
|
|
)
|
|
if not all(isinstance(node, str) for node in node_list):
|
|
raise InvalidDeploymentGroupNodeLookupError(
|
|
"The node lookup function supplied to the DeploymentGroup "
|
|
"is not all strings"
|
|
)
|
|
LOG.info("Group %s selectors have resolved to nodes: %s",
|
|
self.name, ", ".join(node_list))
|
|
return node_list
|
|
|
|
def get_failed_success_criteria(self, success_node_list):
|
|
"""Check the success criteria for this group.
|
|
|
|
:param success_node_list: list of nodes that are deemed successful
|
|
to be compared to the success criteria
|
|
|
|
Using the list of all nodes, and the provided success_node_list,
|
|
use the SuccessCriteria for this group to see if that list of
|
|
successes meets the criteria.
|
|
Note that this is not checking for any particular stage of deployment,
|
|
simply the comparison of the total list of nodes to the provided list.
|
|
Returns a list of failures. An empty list indicates successful
|
|
comparison with all criteria.
|
|
|
|
A good pattern for use of this method is to provide a list of all
|
|
nodes being deployed across all groups that are successful for a
|
|
given stage of deployment (e.g. all prepared, all deployed).
|
|
Calculations are done using set comparisons, so nodes that are not
|
|
important for this group will be ignored. It is important *not* to
|
|
provide only a list of nodes that were recently acted upon as part of
|
|
this group, as deduplication from overlapping groups may cause the
|
|
calculations to be skewed and report false failures.
|
|
"""
|
|
LOG.info('Assessing success criteria for group %s', self.name)
|
|
sc = self.success_criteria.get_failed(success_node_list,
|
|
self.full_nodes)
|
|
if sc:
|
|
LOG.info('Group %s failed success criteria', self.name)
|
|
else:
|
|
LOG.info('Group %s success criteria passed', self.name)
|
|
return sc
|