From 18ae85a2292b34fecaa85bee8c59ab1c84875d18 Mon Sep 17 00:00:00 2001 From: Felipe Monteiro Date: Mon, 27 Aug 2018 00:31:34 +0100 Subject: [PATCH] optimization: Skip post-validation for rendered document cache hit This patch set adds logic to optimize performance around rendered documents caching further by implementing logic to skip over post-rendering validation when cache hits occur. This works because: * if the result set isn't cached, then post-validation is performed; if the post-validation fails the just-cached result set is invalidated * if the result set is cached, then post-validation for that result set has already been performed successfully, so it can be safely skipped over It was discovered via profiler anaylsis that document validation accounts for an appreciable amount of run time, so optimizing around this makes a noticeable difference. Change-Id: I18344ea750cf1028a377028f80ecdd931d9672c0 --- deckhand/control/common.py | 3 +- deckhand/control/revision_documents.py | 61 +++++--------------- deckhand/engine/__init__.py | 5 +- deckhand/engine/cache.py | 38 ++++++++++++- deckhand/engine/render.py | 71 +++++++++++++++++++++++- deckhand/engine/revision_diff.py | 2 +- deckhand/tests/unit/engine/test_cache.py | 22 ++++++-- 7 files changed, 140 insertions(+), 62 deletions(-) diff --git a/deckhand/control/common.py b/deckhand/control/common.py index 80650e7a..75a2a8d2 100644 --- a/deckhand/control/common.py +++ b/deckhand/control/common.py @@ -149,11 +149,10 @@ def get_rendered_docs(revision_id, **filters): documents = document_wrapper.DocumentDict.from_list(data) encryption_sources = _resolve_encrypted_data(documents) try: - rendered_documents = engine.render( + return engine.render( revision_id, documents, encryption_sources=encryption_sources) - return rendered_documents except (errors.BarbicanClientException, errors.BarbicanServerException, errors.InvalidDocumentLayer, diff --git a/deckhand/control/revision_documents.py b/deckhand/control/revision_documents.py index c86a358d..78e5e7a8 100644 --- a/deckhand/control/revision_documents.py +++ b/deckhand/control/revision_documents.py @@ -17,11 +17,11 @@ from oslo_log import log as logging import six from deckhand.common import utils -from deckhand.common import validation_message as vm from deckhand.control import base as api_base from deckhand.control import common from deckhand.control.views import document as document_view from deckhand.db.sqlalchemy import api as db_api +from deckhand import engine from deckhand.engine import document_validation from deckhand import errors from deckhand import policy @@ -107,7 +107,18 @@ class RenderedDocumentsResource(api_base.BaseResource): if include_encrypted: filters['metadata.storagePolicy'].append('encrypted') - rendered_documents = common.get_rendered_docs(revision_id, **filters) + rendered_documents, cache_hit = common.get_rendered_docs( + revision_id, **filters) + + # If the rendered documents result set is cached, then post-validation + # for that result set has already been performed successfully, so it + # can be safely skipped over as an optimization. + if not cache_hit: + data_schemas = db_api.revision_documents_get( + schema=types.DATA_SCHEMA_SCHEMA, deleted=False) + validator = document_validation.DocumentValidation( + rendered_documents, data_schemas, pre_validate=False) + engine.validate_render(revision_id, rendered_documents, validator) # Filters to be applied post-rendering, because many documents are # involved in rendering. User filters can only be applied once all @@ -130,50 +141,4 @@ class RenderedDocumentsResource(api_base.BaseResource): rendered_documents = rendered_documents[:limit] resp.status = falcon.HTTP_200 - self._post_validate(rendered_documents) resp.body = self.view_builder.list(rendered_documents) - - def _post_validate(self, rendered_documents): - # Perform schema validation post-rendering to ensure that rendering - # and substitution didn't break anything. - data_schemas = db_api.revision_documents_get( - schema=types.DATA_SCHEMA_SCHEMA, deleted=False) - doc_validator = document_validation.DocumentValidation( - rendered_documents, data_schemas, pre_validate=False) - try: - validations = doc_validator.validate_all() - except errors.InvalidDocumentFormat as e: - # Post-rendering validation errors likely indicate an internal - # rendering bug, so override the default code to 500. - e.code = 500 - LOG.error('Failed to post-validate rendered documents.') - LOG.exception(e.format_message()) - raise e - else: - error_list = [] - - for validation in validations: - if validation['status'] == 'failure': - error_list.extend([ - vm.ValidationMessage( - message=error['message'], - name=vm.DOCUMENT_POST_RENDERING_FAILURE, - doc_schema=error['schema'], - doc_name=error['name'], - doc_layer=error['layer'], - diagnostic={ - k: v for k, v in error.items() if k in ( - 'schema_path', - 'validation_schema', - 'error_section' - ) - } - ) - for error in validation['errors'] - ]) - - if error_list: - raise errors.InvalidDocumentFormat( - error_list=error_list, - reason='Validation' - ) diff --git a/deckhand/engine/__init__.py b/deckhand/engine/__init__.py index db31ec50..5e3a819c 100644 --- a/deckhand/engine/__init__.py +++ b/deckhand/engine/__init__.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from deckhand.engine.render import render +from deckhand.engine.render import * # noqa -__all__ = ('render',) +__all__ = ('render', + 'validate_render') diff --git a/deckhand/engine/cache.py b/deckhand/engine/cache.py index e218378e..fad99974 100644 --- a/deckhand/engine/cache.py +++ b/deckhand/engine/cache.py @@ -31,19 +31,51 @@ _DOCUMENT_RENDERING_CACHE = _CACHE.get_cache('rendered_documents_cache') def lookup_by_revision_id(revision_id, documents, **kwargs): - """Look up rendered documents by ``revision_id``.""" + """Look up rendered documents by ``revision_id``. + + :param revision_id: Revision ID for which to render documents. Used as key + in cache. + :type revision_id: int + :param documents: List of raw documents to render. + :type documents: List[dict] + :param kwargs: Kwargs to pass to ``render``. + :returns: Tuple, where first arg is rendered documents and second arg + indicates whether cache was hit. + :rtype: Tuple[dict, boolean] + + """ def do_render(): """Perform document rendering for the revision.""" document_layering = layering.DocumentLayering(documents, **kwargs) return document_layering.render() + def contains_revision(): + try: + _DOCUMENT_RENDERING_CACHE.get(key=revision_id) + return True + except KeyError: + return False + if CONF.engine.enable_cache: + cache_hit = contains_revision() return _DOCUMENT_RENDERING_CACHE.get(key=revision_id, - createfunc=do_render) + createfunc=do_render), cache_hit else: - return do_render() + # The cache is disabled, so this is necessarily false. + return do_render(), False def invalidate(): + """Invalidate the entire cache.""" _DOCUMENT_RENDERING_CACHE.clear() + + +def invalidate_one(revision_id): + """Invalidate single entry in cache. + + :param revision_id: Revision to invalidate. + :type revision_id: int + + """ + _DOCUMENT_RENDERING_CACHE.remove_value(key=revision_id) diff --git a/deckhand/engine/render.py b/deckhand/engine/render.py index a84a2f2f..86f75487 100644 --- a/deckhand/engine/render.py +++ b/deckhand/engine/render.py @@ -12,9 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from deckhand.engine import cache +from oslo_log import log as logging -__all__ = ('render',) +from deckhand.common import validation_message as vm +from deckhand.engine import cache +from deckhand import errors + +LOG = logging.getLogger(__name__) + +__all__ = ('render', + 'validate_render') def render(revision_id, documents, encryption_sources=None): @@ -43,3 +50,63 @@ def render(revision_id, documents, encryption_sources=None): documents, encryption_sources=encryption_sources, validate=False) + + +def validate_render(revision_id, rendered_documents, validator): + """Validate rendered documents using ``validator``. + + :param revision_id: Key used for caching rendered documents by. + :type revision_id: int + :param documents: List of rendered documents corresponding to + ``revision_id``. + :type documents: List[dict] + :param validator: Validation object used for validating + ``rendered_documents``. + :type validator: deckhand.engine.document_validation.DocumentValidation + :raises: InvalidDocumentFormat if validation fails. + + """ + + # Perform schema validation post-rendering to ensure that rendering + # and substitution didn't break anything. + try: + validations = validator.validate_all() + except errors.InvalidDocumentFormat as e: + # Invalidate cache entry so that future lookups also fail. + cache.invalidate_one(revision_id) + # Post-rendering validation errors likely indicate an internal + # rendering bug, so override the default code to 500. + e.code = 500 + LOG.error('Failed to post-validate rendered documents.') + LOG.exception(e.format_message()) + raise e + + error_list = [] + + for validation in validations: + if validation['status'] == 'failure': + error_list.extend([ + vm.ValidationMessage( + message=error['message'], + name=vm.DOCUMENT_POST_RENDERING_FAILURE, + doc_schema=error['schema'], + doc_name=error['name'], + doc_layer=error['layer'], + diagnostic={ + k: v for k, v in error.items() if k in ( + 'schema_path', + 'validation_schema', + 'error_section' + ) + } + ) + for error in validation['errors'] + ]) + + if error_list: + # Invalidate cache entry so that future lookups also fail. + cache.invalidate_one(revision_id) + raise errors.InvalidDocumentFormat( + error_list=error_list, + reason='Validation', + ) diff --git a/deckhand/engine/revision_diff.py b/deckhand/engine/revision_diff.py index d8af473a..51f5c2c0 100644 --- a/deckhand/engine/revision_diff.py +++ b/deckhand/engine/revision_diff.py @@ -292,5 +292,5 @@ def _format_diff_result(dr): def _rendered_doc(revision_id): """Provides rendered document by given revision id.""" filters = {'deleted': False} - rendered_documents = common.get_rendered_docs(revision_id, **filters) + rendered_documents, _ = common.get_rendered_docs(revision_id, **filters) return rendered_documents diff --git a/deckhand/tests/unit/engine/test_cache.py b/deckhand/tests/unit/engine/test_cache.py index 376aaa7b..e808479e 100644 --- a/deckhand/tests/unit/engine/test_cache.py +++ b/deckhand/tests/unit/engine/test_cache.py @@ -13,6 +13,7 @@ # limitations under the License. from threading import Thread +import time import testtools @@ -39,12 +40,16 @@ class RenderedDocumentsCacheTest(test_base.DeckhandTestCase): documents = document_factory.gen_test({}) # Validate that caching the ref returns expected payload. - rendered_documents = cache.lookup_by_revision_id(1, documents) + rendered_documents, cache_hit = cache.lookup_by_revision_id( + 1, documents) self.assertIsInstance(rendered_documents, list) + self.assertFalse(cache_hit) # Validate that the cache actually works. - next_rendered_documents = cache.lookup_by_revision_id(1, None) + next_rendered_documents, cache_hit = cache.lookup_by_revision_id( + 1, None) self.assertEqual(rendered_documents, next_rendered_documents) + self.assertTrue(cache_hit) # No documents passed in and revision ID 2 isn't cached - so expect # this to blow up. @@ -69,18 +74,25 @@ class RenderedDocumentsCacheTest(test_base.DeckhandTestCase): self.assertNotEqual(documents1, documents2) rendered_documents_by_thread = [] + cache_hit_by_thread = [] def threaded_function(documents): # Validate that caching the ref returns expected payload. - rendered_documents = cache.lookup_by_revision_id(1, documents) + rendered_documents, cache_hit = cache.lookup_by_revision_id( + 1, documents) rendered_documents_by_thread.append(rendered_documents) - return rendered_documents + cache_hit_by_thread.append(cache_hit) thread1 = Thread(target=threaded_function, kwargs={'documents': documents1}) thread2 = Thread(target=threaded_function, kwargs={'documents': documents2}) thread1.start() + # NOTE(felipemonteiro): Add a sleep here to avoid a data race where the + # cache might not be populated fast enough before the second thread + # checks the cache -- and finds nothing thereby proceeding with another + # render request. In real scenarios, though, this is highly unlikely. + time.sleep(1) thread2.start() thread1.join() thread2.join() @@ -90,3 +102,5 @@ class RenderedDocumentsCacheTest(test_base.DeckhandTestCase): self.assertEqual(2, len(rendered_documents_by_thread)) self.assertEqual(rendered_documents_by_thread[0], rendered_documents_by_thread[1]) + self.assertFalse(cache_hit_by_thread[0]) # 1st time missing in cache. + self.assertTrue(cache_hit_by_thread[1]) # 2nd time should hit cache.