From e42ff5e8e31852394f06f2248706b6548db3d1f6 Mon Sep 17 00:00:00 2001 From: Felipe Monteiro Date: Sat, 27 Jan 2018 00:11:01 -0500 Subject: [PATCH] Fix: Make layering more performant. This is to make Deckhand layering more performant. Layering is currently the main bottleneck in the rendered-documents endpoint. The bottleneck is specifically related to calculating document children in the layering module. The runtime was O(N^2) but has been decreased to ~O(N) resulting in much faster performance overall. Using local testing against the lab deployment YAML, runtime for layering is decreased to 15 seconds or so, down from 55 seconds, which is roughly 4 times faster. This performance shouldn't increase by much given even larger YAMLs due to the linear-time performance change. Change-Id: Ib5f7fd08a38d05ae79d18227f8aafc25bd13f7ca --- deckhand/engine/document_wrapper.py | 4 + deckhand/engine/layering.py | 180 +++++++++--------- deckhand/engine/secrets_manager.py | 45 ++--- .../engine/test_document_layering_negative.py | 2 +- .../tests/unit/engine/test_secrets_manager.py | 4 +- 5 files changed, 119 insertions(+), 116 deletions(-) diff --git a/deckhand/engine/document_wrapper.py b/deckhand/engine/document_wrapper.py index 9c82daa6..1b33fdba 100644 --- a/deckhand/engine/document_wrapper.py +++ b/deckhand/engine/document_wrapper.py @@ -53,6 +53,10 @@ class DocumentDict(dict): def name(self): return utils.jsonpath_parse(self, 'metadata.name') + @property + def layering_definition(self): + return utils.jsonpath_parse(self, 'metadata.layeringDefinition') + @property def layer(self): return utils.jsonpath_parse( diff --git a/deckhand/engine/layering.py b/deckhand/engine/layering.py index e3d835f2..63bbea41 100644 --- a/deckhand/engine/layering.py +++ b/deckhand/engine/layering.py @@ -44,7 +44,36 @@ class DocumentLayering(object): SUPPORTED_METHODS = ('merge', 'replace', 'delete') - def _calc_document_children(self): + def _is_actual_child_document(self, document, potential_child, + target_layer): + # Documents with different schemas are never layered together, + # so consider only documents with same schema as candidates. + is_potential_child = ( + potential_child.layer == target_layer and + potential_child.schema == document.schema + ) + if is_potential_child: + parent_selector = potential_child.parent_selector + labels = document.labels + return parent_selector == labels + return False + + def _calc_document_children(self, document): + try: + document_layer_idx = self._layer_order.index(document.layer) + child_layer = self._layer_order[document_layer_idx + 1] + except IndexError: + # The lowest layer has been reached, so no children. + return + + potential_children = self._documents_by_labels.get( + str(document.labels), []) + for potential_child in potential_children: + if self._is_actual_child_document(document, potential_child, + child_layer): + yield potential_child + + def _calc_all_document_children(self): """Determine each document's children. For each document, attempts to find the document's children. Adds a new @@ -65,65 +94,30 @@ class DocumentLayering(object): :raises IndeterminateDocumentParent: If more than one parent document was found for a document. """ - # ``all_children`` is a counter utility for verifying that each # document has exactly one parent. all_children = collections.Counter() - # Mapping of (doc.name, doc.metadata.name) => children, where children # are the documents whose `parentSelector` references the doc. self._children = {} - - def _get_children(doc): - children = [] - doc_layer = doc.layer - try: - next_layer_idx = self._layer_order.index(doc_layer) + 1 - children_doc_layer = self._layer_order[next_layer_idx] - except IndexError: - # The lowest layer has been reached, so no children. Return - # empty list. - return children - - for other_doc in self._layered_docs: - # Documents with different schemas are never layered together, - # so consider only documents with same schema as candidates. - is_potential_child = ( - other_doc.layer == children_doc_layer and - other_doc.schema == doc.schema - ) - if is_potential_child: - # A document can have many labels but should only have one - # explicit label for the parentSelector. - parent_sel = other_doc.parent_selector - try: - parent_sel_key = list(parent_sel.keys())[0] - parent_sel_val = list(parent_sel.values())[0] - except IndexError: - continue - - if (parent_sel_key in doc.labels and - parent_sel_val == doc.labels[parent_sel_key]): - children.append(other_doc) - - return children + self._parentless_documents = [] for layer in self._layer_order: - docs_by_layer = list(filter( - (lambda x: x.layer == layer), self._layered_docs)) - for doc in docs_by_layer: - children = _get_children(doc) + documents_in_layer = self._documents_by_layer.get(layer, []) + for document in documents_in_layer: + children = list(self._calc_document_children(document)) if children: all_children.update(children) - self._children.setdefault((doc.name, doc.schema), - children) + self._children.setdefault( + (document.name, document.schema), children) all_children_elements = list(all_children.elements()) - secondary_docs = list( - filter(lambda d: d.layer != self._layer_order[0], - self._layered_docs) - ) if self._layer_order else [] - for doc in secondary_docs: + secondary_documents = [] + for layer, documents in self._documents_by_layer.items(): + if self._layer_order and layer != self._layer_order[0]: + secondary_documents.extend(documents) + + for doc in secondary_documents: # Unless the document is the topmost document in the # `layerOrder` of the LayeringPolicy, it should be a child document # of another document. @@ -142,16 +136,13 @@ class DocumentLayering(object): doc.parent_selector) raise errors.IndeterminateDocumentParent(document=doc) - return self._layered_docs - def _get_layering_order(self, layering_policy): # Pre-processing stage that removes empty layers from the # ``layerOrder`` in the layering policy. layer_order = list(layering_policy.layer_order) for layer in layer_order[:]: - docs_by_layer = list(filter( - (lambda x: x.layer == layer), self._layered_docs)) - if not docs_by_layer: + documents_by_layer = self._documents_by_layer.get(layer, []) + if not documents_by_layer: LOG.info('%s is an empty layer with no documents. It will be ' 'discarded from the layerOrder during the layering ' 'process.', layer) @@ -163,17 +154,6 @@ class DocumentLayering(object): 'will be performed.') return layer_order - def _extract_layering_policy(self, documents): - for doc in documents: - if doc['schema'].startswith(types.LAYERING_POLICY_SCHEMA): - layering_policy = doc - return ( - document_wrapper.DocumentDict(layering_policy), - [document_wrapper.DocumentDict(d) for d in documents - if d is not layering_policy] - ) - return None, [document_wrapper.DocumentDict(d) for d in documents] - def __init__(self, documents, substitution_sources=None): """Contructor for ``DocumentLayering``. @@ -187,22 +167,50 @@ class DocumentLayering(object): sources for substitution. Should only include concrete documents. :type substitution_sources: List[dict] """ - self._layering_policy, self._documents = self._extract_layering_policy( - documents) + self._documents_to_layer = [] + self._documents_by_layer = {} + self._documents_by_labels = {} + self._layering_policy = None + + for document in documents: + document = document_wrapper.DocumentDict(document) + if document.schema.startswith(types.LAYERING_POLICY_SCHEMA): + if self._layering_policy: + LOG.warning('More than one layering policy document was ' + 'passed in. Using the first one found: [%s] ' + '%s.', document.schema, document.name) + else: + self._layering_policy = document + continue + + if document.layering_definition: + self._documents_to_layer.append(document) + if document.layer: + self._documents_by_layer.setdefault(document.layer, []) + self._documents_by_layer[document.layer].append(document) + if document.parent_selector: + self._documents_by_labels.setdefault( + str(document.parent_selector), []) + self._documents_by_labels[ + str(document.parent_selector)].append(document) + if self._layering_policy is None: error_msg = ( 'No layering policy found in the system so could not reder ' 'documents.') LOG.error(error_msg) raise errors.LayeringPolicyNotFound() - self._layered_docs = list( - filter(lambda x: 'layeringDefinition' in x.metadata, - self._documents)) + self._layer_order = self._get_layering_order(self._layering_policy) - self._parentless_documents = [] - self._layered_documents = self._calc_document_children() + self._calc_all_document_children() self._substitution_sources = substitution_sources or [] + self.secrets_substitution = secrets_manager.SecretsSubstitution( + self._substitution_sources) + + del self._documents_by_layer + del self._documents_by_labels + def _apply_action(self, action, child_data, overall_data): """Apply actions to each layer that is rendered. @@ -219,7 +227,7 @@ class DocumentLayering(object): raise errors.UnsupportedActionMethod( action=action, document=child_data) - # Use copy prevent these data from being updated referentially. + # Use copy to prevent these data from being updated referentially. overall_data = copy.deepcopy(overall_data) child_data = copy.deepcopy(child_data) rendered_data = overall_data @@ -293,15 +301,6 @@ class DocumentLayering(object): for grandchild in grandchildren: yield grandchild - def _apply_substitutions(self, document): - try: - secrets_substitution = secrets_manager.SecretsSubstitution( - document, self._substitution_sources) - return secrets_substitution.substitute_all() - except errors.SubstitutionDependencyNotFound: - LOG.error('Failed to render the documents because a secret ' - 'document could not be found.') - def render(self): """Perform layering on the list of documents passed to ``__init__``. @@ -313,7 +312,7 @@ class DocumentLayering(object): :returns: The list of rendered documents (does not include layering policy document). - :rtype: list[dict] + :rtype: List[dict] """ # ``rendered_data_by_layer`` tracks the set of changes across all # actions across each layer for a specific document. @@ -323,14 +322,15 @@ class DocumentLayering(object): # the system. It should probably be impossible for more than 1 # top-level doc to exist, but handle multiple for now. global_docs = [ - doc for doc in self._layered_documents + doc for doc in self._documents_to_layer if self._layer_order and doc.layer == self._layer_order[0] ] for doc in global_docs: layer_idx = self._layer_order.index(doc.layer) if doc.substitutions: - substituted_data = self._apply_substitutions(doc) + substituted_data = list( + self.secrets_substitution.substitute_all(doc)) if substituted_data: rendered_data_by_layer[layer_idx] = substituted_data[0] else: @@ -354,11 +354,12 @@ class DocumentLayering(object): # Update the actual document data if concrete. if not child.is_abstract: child.data = rendered_data.data - substituted_data = self._apply_substitutions(child) + substituted_data = list( + self.secrets_substitution.substitute_all(child)) if substituted_data: rendered_data = substituted_data[0] - child_index = self._layered_documents.index(child) - self._layered_documents[child_index].data = ( + child_index = self._documents_to_layer.index(child) + self._documents_to_layer[child_index].data = ( rendered_data.data) # Update ``rendered_data_by_layer`` for this layer so that @@ -372,8 +373,9 @@ class DocumentLayering(object): # parentless documents. for doc in self._parentless_documents: if not doc.is_abstract and doc.substitutions: - substituted_data = self._apply_substitutions(doc) + substituted_data = list( + self.secrets_substitution.substitute_all(doc)) if substituted_data: doc = substituted_data[0] - return self._layered_documents + [self._layering_policy] + return self._documents_to_layer + [self._layering_policy] diff --git a/deckhand/engine/secrets_manager.py b/deckhand/engine/secrets_manager.py index 3b5ec8cb..3291cc8e 100644 --- a/deckhand/engine/secrets_manager.py +++ b/deckhand/engine/secrets_manager.py @@ -97,34 +97,20 @@ class SecretsManager(object): class SecretsSubstitution(object): """Class for document substitution logic for YAML files.""" - def __init__(self, documents, substitution_sources=None): + def __init__(self, substitution_sources=None): """SecretSubstitution constructor. This class will automatically detect documents that require substitution; documents need not be filtered prior to being passed to the constructor. - :param documents: List of documents that are candidates for - substitution. - :type documents: List[dict] :param substitution_sources: List of documents that are potential sources for substitution. Should only include concrete documents. :type substitution_sources: List[dict] """ - if not isinstance(documents, list): - documents = [documents] - - self._documents = [] self._substitution_sources = {} - for document in documents: - if not isinstance(document, document_wrapper.DocumentDict): - document = document_wrapper.DocumentDict(document) - # If the document has substitutions include it. - if document.substitutions: - self._documents.append(document) - for document in substitution_sources: if not isinstance(document, document_wrapper.DocumentDict): document = document_wrapper.DocumentDict(document) @@ -132,7 +118,7 @@ class SecretsSubstitution(object): self._substitution_sources.setdefault( (document.schema, document.name), document) - def substitute_all(self): + def substitute_all(self, documents): """Substitute all documents that have a `metadata.substitutions` field. Concrete (non-abstract) documents can be used as a source of @@ -140,17 +126,29 @@ class SecretsSubstitution(object): layer-independent, a document in the region layer could insert data from a document in the site layer. + :param documents: List of documents that are candidates for + substitution. + :type documents: dict or List[dict] :returns: List of fully substituted documents. - :rtype: List[:class:`DocumentDict`] - :raises SubstitutionDependencyNotFound: If a substitution source wasn't - found or something else went wrong during substitution. + :rtype: Generator[:class:`DocumentDict`] """ + + documents_to_substitute = [] + if not isinstance(documents, list): + documents = [documents] + + for document in documents: + if not isinstance(document, document_wrapper.DocumentDict): + document = document_wrapper.DocumentDict(document) + # If the document has substitutions include it. + if document.substitutions: + documents_to_substitute.append(document) + LOG.debug('Performing substitution on following documents: %s', ', '.join(['[%s] %s' % (d.schema, d.name) - for d in self._documents])) - substituted_docs = [] + for d in documents_to_substitute])) - for document in self._documents: + for document in documents_to_substitute: LOG.debug('Checking for substitutions for document [%s] %s.', document.schema, document.name) for sub in document.substitutions: @@ -214,8 +212,7 @@ class SecretsSubstitution(object): raise errors.SubstitutionDependencyNotFound( details=six.text_type(e)) - substituted_docs.append(document) - return substituted_docs + yield document @staticmethod def sanitize_potential_secrets(document): diff --git a/deckhand/tests/unit/engine/test_document_layering_negative.py b/deckhand/tests/unit/engine/test_document_layering_negative.py index b941a0b6..9f8fb0b3 100644 --- a/deckhand/tests/unit/engine/test_document_layering_negative.py +++ b/deckhand/tests/unit/engine/test_document_layering_negative.py @@ -105,7 +105,7 @@ class TestDocumentLayeringNegative( for parent_label in ({'key2': 'value2'}, {'key1': 'value2'}): # Second doc is the global doc, or parent. - documents[1]['metadata']['labels'] = [parent_label] + documents[1]['metadata']['labels'] = parent_label layering.DocumentLayering(documents) self.assertRegexpMatches(mock_log.info.mock_calls[0][1][0], diff --git a/deckhand/tests/unit/engine/test_secrets_manager.py b/deckhand/tests/unit/engine/test_secrets_manager.py index 0315072a..51ca0047 100644 --- a/deckhand/tests/unit/engine/test_secrets_manager.py +++ b/deckhand/tests/unit/engine/test_secrets_manager.py @@ -95,8 +95,8 @@ class TestSecretsSubstitution(test_base.TestDbBase): **{'metadata.layeringDefinition.abstract': False}) secret_substitution = secrets_manager.SecretsSubstitution( - documents, substitution_sources) - substituted_docs = secret_substitution.substitute_all() + substitution_sources) + substituted_docs = secret_substitution.substitute_all(documents) self.assertIn(expected_document, substituted_docs)