Fix: Make layering more performant.

This is to make Deckhand layering more performant. Layering is
currently the main bottleneck in the rendered-documents endpoint.
The bottleneck is specifically related to calculating document
children in the layering module. The runtime was O(N^2) but
has been decreased to ~O(N) resulting in much faster performance
overall. Using local testing against the lab deployment YAML,
runtime for layering is decreased to 15 seconds or so, down
from 55 seconds, which is roughly 4 times faster. This
performance shouldn't increase by much given even larger
YAMLs due to the linear-time performance change.

Change-Id: Ib5f7fd08a38d05ae79d18227f8aafc25bd13f7ca
This commit is contained in:
Felipe Monteiro 2018-01-27 00:11:01 -05:00 committed by Mark Burnett
parent 3bdebba4bb
commit e42ff5e8e3
5 changed files with 119 additions and 116 deletions

View File

@ -53,6 +53,10 @@ class DocumentDict(dict):
def name(self):
return utils.jsonpath_parse(self, 'metadata.name')
@property
def layering_definition(self):
return utils.jsonpath_parse(self, 'metadata.layeringDefinition')
@property
def layer(self):
return utils.jsonpath_parse(

View File

@ -44,7 +44,36 @@ class DocumentLayering(object):
SUPPORTED_METHODS = ('merge', 'replace', 'delete')
def _calc_document_children(self):
def _is_actual_child_document(self, document, potential_child,
target_layer):
# Documents with different schemas are never layered together,
# so consider only documents with same schema as candidates.
is_potential_child = (
potential_child.layer == target_layer and
potential_child.schema == document.schema
)
if is_potential_child:
parent_selector = potential_child.parent_selector
labels = document.labels
return parent_selector == labels
return False
def _calc_document_children(self, document):
try:
document_layer_idx = self._layer_order.index(document.layer)
child_layer = self._layer_order[document_layer_idx + 1]
except IndexError:
# The lowest layer has been reached, so no children.
return
potential_children = self._documents_by_labels.get(
str(document.labels), [])
for potential_child in potential_children:
if self._is_actual_child_document(document, potential_child,
child_layer):
yield potential_child
def _calc_all_document_children(self):
"""Determine each document's children.
For each document, attempts to find the document's children. Adds a new
@ -65,65 +94,30 @@ class DocumentLayering(object):
:raises IndeterminateDocumentParent: If more than one parent document
was found for a document.
"""
# ``all_children`` is a counter utility for verifying that each
# document has exactly one parent.
all_children = collections.Counter()
# Mapping of (doc.name, doc.metadata.name) => children, where children
# are the documents whose `parentSelector` references the doc.
self._children = {}
def _get_children(doc):
children = []
doc_layer = doc.layer
try:
next_layer_idx = self._layer_order.index(doc_layer) + 1
children_doc_layer = self._layer_order[next_layer_idx]
except IndexError:
# The lowest layer has been reached, so no children. Return
# empty list.
return children
for other_doc in self._layered_docs:
# Documents with different schemas are never layered together,
# so consider only documents with same schema as candidates.
is_potential_child = (
other_doc.layer == children_doc_layer and
other_doc.schema == doc.schema
)
if is_potential_child:
# A document can have many labels but should only have one
# explicit label for the parentSelector.
parent_sel = other_doc.parent_selector
try:
parent_sel_key = list(parent_sel.keys())[0]
parent_sel_val = list(parent_sel.values())[0]
except IndexError:
continue
if (parent_sel_key in doc.labels and
parent_sel_val == doc.labels[parent_sel_key]):
children.append(other_doc)
return children
self._parentless_documents = []
for layer in self._layer_order:
docs_by_layer = list(filter(
(lambda x: x.layer == layer), self._layered_docs))
for doc in docs_by_layer:
children = _get_children(doc)
documents_in_layer = self._documents_by_layer.get(layer, [])
for document in documents_in_layer:
children = list(self._calc_document_children(document))
if children:
all_children.update(children)
self._children.setdefault((doc.name, doc.schema),
children)
self._children.setdefault(
(document.name, document.schema), children)
all_children_elements = list(all_children.elements())
secondary_docs = list(
filter(lambda d: d.layer != self._layer_order[0],
self._layered_docs)
) if self._layer_order else []
for doc in secondary_docs:
secondary_documents = []
for layer, documents in self._documents_by_layer.items():
if self._layer_order and layer != self._layer_order[0]:
secondary_documents.extend(documents)
for doc in secondary_documents:
# Unless the document is the topmost document in the
# `layerOrder` of the LayeringPolicy, it should be a child document
# of another document.
@ -142,16 +136,13 @@ class DocumentLayering(object):
doc.parent_selector)
raise errors.IndeterminateDocumentParent(document=doc)
return self._layered_docs
def _get_layering_order(self, layering_policy):
# Pre-processing stage that removes empty layers from the
# ``layerOrder`` in the layering policy.
layer_order = list(layering_policy.layer_order)
for layer in layer_order[:]:
docs_by_layer = list(filter(
(lambda x: x.layer == layer), self._layered_docs))
if not docs_by_layer:
documents_by_layer = self._documents_by_layer.get(layer, [])
if not documents_by_layer:
LOG.info('%s is an empty layer with no documents. It will be '
'discarded from the layerOrder during the layering '
'process.', layer)
@ -163,17 +154,6 @@ class DocumentLayering(object):
'will be performed.')
return layer_order
def _extract_layering_policy(self, documents):
for doc in documents:
if doc['schema'].startswith(types.LAYERING_POLICY_SCHEMA):
layering_policy = doc
return (
document_wrapper.DocumentDict(layering_policy),
[document_wrapper.DocumentDict(d) for d in documents
if d is not layering_policy]
)
return None, [document_wrapper.DocumentDict(d) for d in documents]
def __init__(self, documents, substitution_sources=None):
"""Contructor for ``DocumentLayering``.
@ -187,22 +167,50 @@ class DocumentLayering(object):
sources for substitution. Should only include concrete documents.
:type substitution_sources: List[dict]
"""
self._layering_policy, self._documents = self._extract_layering_policy(
documents)
self._documents_to_layer = []
self._documents_by_layer = {}
self._documents_by_labels = {}
self._layering_policy = None
for document in documents:
document = document_wrapper.DocumentDict(document)
if document.schema.startswith(types.LAYERING_POLICY_SCHEMA):
if self._layering_policy:
LOG.warning('More than one layering policy document was '
'passed in. Using the first one found: [%s] '
'%s.', document.schema, document.name)
else:
self._layering_policy = document
continue
if document.layering_definition:
self._documents_to_layer.append(document)
if document.layer:
self._documents_by_layer.setdefault(document.layer, [])
self._documents_by_layer[document.layer].append(document)
if document.parent_selector:
self._documents_by_labels.setdefault(
str(document.parent_selector), [])
self._documents_by_labels[
str(document.parent_selector)].append(document)
if self._layering_policy is None:
error_msg = (
'No layering policy found in the system so could not reder '
'documents.')
LOG.error(error_msg)
raise errors.LayeringPolicyNotFound()
self._layered_docs = list(
filter(lambda x: 'layeringDefinition' in x.metadata,
self._documents))
self._layer_order = self._get_layering_order(self._layering_policy)
self._parentless_documents = []
self._layered_documents = self._calc_document_children()
self._calc_all_document_children()
self._substitution_sources = substitution_sources or []
self.secrets_substitution = secrets_manager.SecretsSubstitution(
self._substitution_sources)
del self._documents_by_layer
del self._documents_by_labels
def _apply_action(self, action, child_data, overall_data):
"""Apply actions to each layer that is rendered.
@ -219,7 +227,7 @@ class DocumentLayering(object):
raise errors.UnsupportedActionMethod(
action=action, document=child_data)
# Use copy prevent these data from being updated referentially.
# Use copy to prevent these data from being updated referentially.
overall_data = copy.deepcopy(overall_data)
child_data = copy.deepcopy(child_data)
rendered_data = overall_data
@ -293,15 +301,6 @@ class DocumentLayering(object):
for grandchild in grandchildren:
yield grandchild
def _apply_substitutions(self, document):
try:
secrets_substitution = secrets_manager.SecretsSubstitution(
document, self._substitution_sources)
return secrets_substitution.substitute_all()
except errors.SubstitutionDependencyNotFound:
LOG.error('Failed to render the documents because a secret '
'document could not be found.')
def render(self):
"""Perform layering on the list of documents passed to ``__init__``.
@ -313,7 +312,7 @@ class DocumentLayering(object):
:returns: The list of rendered documents (does not include layering
policy document).
:rtype: list[dict]
:rtype: List[dict]
"""
# ``rendered_data_by_layer`` tracks the set of changes across all
# actions across each layer for a specific document.
@ -323,14 +322,15 @@ class DocumentLayering(object):
# the system. It should probably be impossible for more than 1
# top-level doc to exist, but handle multiple for now.
global_docs = [
doc for doc in self._layered_documents
doc for doc in self._documents_to_layer
if self._layer_order and doc.layer == self._layer_order[0]
]
for doc in global_docs:
layer_idx = self._layer_order.index(doc.layer)
if doc.substitutions:
substituted_data = self._apply_substitutions(doc)
substituted_data = list(
self.secrets_substitution.substitute_all(doc))
if substituted_data:
rendered_data_by_layer[layer_idx] = substituted_data[0]
else:
@ -354,11 +354,12 @@ class DocumentLayering(object):
# Update the actual document data if concrete.
if not child.is_abstract:
child.data = rendered_data.data
substituted_data = self._apply_substitutions(child)
substituted_data = list(
self.secrets_substitution.substitute_all(child))
if substituted_data:
rendered_data = substituted_data[0]
child_index = self._layered_documents.index(child)
self._layered_documents[child_index].data = (
child_index = self._documents_to_layer.index(child)
self._documents_to_layer[child_index].data = (
rendered_data.data)
# Update ``rendered_data_by_layer`` for this layer so that
@ -372,8 +373,9 @@ class DocumentLayering(object):
# parentless documents.
for doc in self._parentless_documents:
if not doc.is_abstract and doc.substitutions:
substituted_data = self._apply_substitutions(doc)
substituted_data = list(
self.secrets_substitution.substitute_all(doc))
if substituted_data:
doc = substituted_data[0]
return self._layered_documents + [self._layering_policy]
return self._documents_to_layer + [self._layering_policy]

View File

@ -97,34 +97,20 @@ class SecretsManager(object):
class SecretsSubstitution(object):
"""Class for document substitution logic for YAML files."""
def __init__(self, documents, substitution_sources=None):
def __init__(self, substitution_sources=None):
"""SecretSubstitution constructor.
This class will automatically detect documents that require
substitution; documents need not be filtered prior to being passed to
the constructor.
:param documents: List of documents that are candidates for
substitution.
:type documents: List[dict]
:param substitution_sources: List of documents that are potential
sources for substitution. Should only include concrete documents.
:type substitution_sources: List[dict]
"""
if not isinstance(documents, list):
documents = [documents]
self._documents = []
self._substitution_sources = {}
for document in documents:
if not isinstance(document, document_wrapper.DocumentDict):
document = document_wrapper.DocumentDict(document)
# If the document has substitutions include it.
if document.substitutions:
self._documents.append(document)
for document in substitution_sources:
if not isinstance(document, document_wrapper.DocumentDict):
document = document_wrapper.DocumentDict(document)
@ -132,7 +118,7 @@ class SecretsSubstitution(object):
self._substitution_sources.setdefault(
(document.schema, document.name), document)
def substitute_all(self):
def substitute_all(self, documents):
"""Substitute all documents that have a `metadata.substitutions` field.
Concrete (non-abstract) documents can be used as a source of
@ -140,17 +126,29 @@ class SecretsSubstitution(object):
layer-independent, a document in the region layer could insert data
from a document in the site layer.
:param documents: List of documents that are candidates for
substitution.
:type documents: dict or List[dict]
:returns: List of fully substituted documents.
:rtype: List[:class:`DocumentDict`]
:raises SubstitutionDependencyNotFound: If a substitution source wasn't
found or something else went wrong during substitution.
:rtype: Generator[:class:`DocumentDict`]
"""
documents_to_substitute = []
if not isinstance(documents, list):
documents = [documents]
for document in documents:
if not isinstance(document, document_wrapper.DocumentDict):
document = document_wrapper.DocumentDict(document)
# If the document has substitutions include it.
if document.substitutions:
documents_to_substitute.append(document)
LOG.debug('Performing substitution on following documents: %s',
', '.join(['[%s] %s' % (d.schema, d.name)
for d in self._documents]))
substituted_docs = []
for d in documents_to_substitute]))
for document in self._documents:
for document in documents_to_substitute:
LOG.debug('Checking for substitutions for document [%s] %s.',
document.schema, document.name)
for sub in document.substitutions:
@ -214,8 +212,7 @@ class SecretsSubstitution(object):
raise errors.SubstitutionDependencyNotFound(
details=six.text_type(e))
substituted_docs.append(document)
return substituted_docs
yield document
@staticmethod
def sanitize_potential_secrets(document):

View File

@ -105,7 +105,7 @@ class TestDocumentLayeringNegative(
for parent_label in ({'key2': 'value2'}, {'key1': 'value2'}):
# Second doc is the global doc, or parent.
documents[1]['metadata']['labels'] = [parent_label]
documents[1]['metadata']['labels'] = parent_label
layering.DocumentLayering(documents)
self.assertRegexpMatches(mock_log.info.mock_calls[0][1][0],

View File

@ -95,8 +95,8 @@ class TestSecretsSubstitution(test_base.TestDbBase):
**{'metadata.layeringDefinition.abstract': False})
secret_substitution = secrets_manager.SecretsSubstitution(
documents, substitution_sources)
substituted_docs = secret_substitution.substitute_all()
substitution_sources)
substituted_docs = secret_substitution.substitute_all(documents)
self.assertIn(expected_document, substituted_docs)