From 9345035522aa038615b0816c4c77b4b9a428689f Mon Sep 17 00:00:00 2001 From: pallav Date: Thu, 23 Aug 2018 02:26:19 +0530 Subject: [PATCH] Adding api for revisions deep diffing GET /revisions/{{revision_id}}/deepdiff/{{comparison_revision_id}} - Added deepdiff api for generating diff between two rendered documents. - Deep diffing for data and metadata - Refactor diff functions - Client update - Added unit testcases - Added funtional testcases - Doc update Change-Id: Ib60fa60a3b33e9125a1595a999272ca595721b38 --- charts/deckhand/values.yaml | 1 + deckhand/client/revisions.py | 6 + deckhand/control/common.py | 121 +++++++ deckhand/control/revision_deepdiffing.py | 50 +++ deckhand/control/revision_diffing.py | 17 +- deckhand/control/revision_documents.py | 111 +------ deckhand/db/sqlalchemy/api.py | 207 +----------- deckhand/engine/revision_diff.py | 296 ++++++++++++++++++ deckhand/engine/utils.py | 58 ++++ deckhand/errors.py | 12 + deckhand/factories.py | 56 ++++ deckhand/policies/revision.py | 11 + deckhand/service.py | 3 + .../revision-deepdiff-success.yaml | 287 +++++++++++++++++ .../unit/control/test_api_initialization.py | 11 +- .../test_revisions_deepdiff_controller.py | 32 ++ .../unit/engine/test_revision_deepdiffing.py | 143 +++++++++ .../{db => engine}/test_revision_diffing.py | 4 +- doc/source/api_ref.rst | 90 ++++++ doc/source/developer-overview.rst | 6 +- doc/source/exceptions.rst | 10 + doc/source/revision-history.rst | 9 + etc/deckhand/policy.yaml.sample | 4 + requirements.txt | 1 + 24 files changed, 1224 insertions(+), 322 deletions(-) create mode 100644 deckhand/control/revision_deepdiffing.py create mode 100644 deckhand/engine/revision_diff.py create mode 100644 deckhand/tests/functional/gabbits/revision-deepdiff/revision-deepdiff-success.yaml create mode 100644 deckhand/tests/unit/control/test_revisions_deepdiff_controller.py create mode 100644 deckhand/tests/unit/engine/test_revision_deepdiffing.py rename deckhand/tests/unit/{db => engine}/test_revision_diffing.py (99%) diff --git a/charts/deckhand/values.yaml b/charts/deckhand/values.yaml index 14f6f635..673dced1 100644 --- a/charts/deckhand/values.yaml +++ b/charts/deckhand/values.yaml @@ -234,6 +234,7 @@ conf: deckhand:show_revision: rule:admin_api deckhand:list_revisions: rule:admin_api deckhand:delete_revisions: rule:admin_api + deckhand:show_revision_deepdiff: rule:admin_api deckhand:show_revision_diff: rule:admin_api deckhand:create_tag: rule:admin_api deckhand:show_tag: rule:admin_api diff --git a/deckhand/client/revisions.py b/deckhand/client/revisions.py index 650ae308..54e51bba 100644 --- a/deckhand/client/revisions.py +++ b/deckhand/client/revisions.py @@ -56,6 +56,12 @@ class RevisionManager(base.Manager): revision_id, comparison_revision_id) return self._get(url) + def deepdiff(self, revision_id, comparison_revision_id): + """Get revision deepdiff between two revisions.""" + url = '/revisions/%s/deepdiff/%s' % ( + revision_id, comparison_revision_id) + return self._get(url) + def rollback(self, revision_id): """Rollback to a previous revision, effectively creating a new one.""" url = '/rollback/%s' % revision_id diff --git a/deckhand/control/common.py b/deckhand/control/common.py index 9168ecaf..80650e7a 100644 --- a/deckhand/control/common.py +++ b/deckhand/control/common.py @@ -12,12 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. +import concurrent.futures import functools import falcon +from oslo_config import cfg +from oslo_log import log as logging +from oslo_utils import excutils +import six from deckhand.barbican import cache as barbican_cache +from deckhand.common import document as document_wrapper +from deckhand.db.sqlalchemy import api as db_api +from deckhand import engine from deckhand.engine import cache as engine_cache +from deckhand.engine import secrets_manager +from deckhand import errors +from deckhand import types + +CONF = cfg.CONF +LOG = logging.getLogger(__name__) class ViewBuilder(object): @@ -128,3 +142,110 @@ def invalidate_cache_data(): """Invalidate all data associated with document rendering.""" barbican_cache.invalidate() engine_cache.invalidate() + + +def get_rendered_docs(revision_id, **filters): + data = _retrieve_documents_for_rendering(revision_id, **filters) + documents = document_wrapper.DocumentDict.from_list(data) + encryption_sources = _resolve_encrypted_data(documents) + try: + rendered_documents = engine.render( + revision_id, + documents, + encryption_sources=encryption_sources) + return rendered_documents + except (errors.BarbicanClientException, + errors.BarbicanServerException, + errors.InvalidDocumentLayer, + errors.InvalidDocumentParent, + errors.InvalidDocumentReplacement, + errors.IndeterminateDocumentParent, + errors.LayeringPolicyNotFound, + errors.MissingDocumentKey, + errors.SubstitutionSourceDataNotFound, + errors.SubstitutionSourceNotFound, + errors.UnknownSubstitutionError, + errors.UnsupportedActionMethod) as e: + with excutils.save_and_reraise_exception(): + LOG.exception(e.format_message()) + except errors.EncryptionSourceNotFound as e: + # This branch should be unreachable, but if an encryption source + # wasn't found, then this indicates the controller fed bad data + # to the engine, in which case this is a 500. + e.code = 500 + raise e + + +def _retrieve_documents_for_rendering(revision_id, **filters): + """Retrieve all necessary documents needed for rendering. If a layering + policy isn't found in the current revision, retrieve it in a subsequent + call and add it to the list of documents. + """ + try: + documents = db_api.revision_documents_get(revision_id, **filters) + except errors.RevisionNotFound as e: + LOG.exception(six.text_type(e)) + raise falcon.HTTPNotFound(description=e.format_message()) + + if not any([d['schema'].startswith(types.LAYERING_POLICY_SCHEMA) + for d in documents]): + try: + layering_policy_filters = { + 'deleted': False, + 'schema': types.LAYERING_POLICY_SCHEMA + } + layering_policy = db_api.document_get( + **layering_policy_filters) + except errors.DocumentNotFound as e: + LOG.exception(e.format_message()) + else: + documents.append(layering_policy) + + return documents + + +def _resolve_encrypted_data(documents): + """Resolve unencrypted data from the secret storage backend. + + Submits concurrent requests to the secret storage backend for all + secret references for which unecrypted data is required for future + substitutions during the rendering process. + + :param documents: List of all documents for the current revision. + :type documents: List[dict] + :returns: Dictionary keyed with secret references, whose values are + the corresponding unencrypted data. + :rtype: dict + + """ + encryption_sources = {} + secret_ref = lambda x: x.data + is_encrypted = lambda x: x.is_encrypted and x.has_barbican_ref + encrypted_documents = (d for d in documents if is_encrypted(d)) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=CONF.barbican.max_workers) as executor: + future_to_document = { + executor.submit(secrets_manager.SecretsManager.get, + secret_ref=secret_ref(d), + src_doc=d): d for d in encrypted_documents + } + for future in concurrent.futures.as_completed(future_to_document): + document = future_to_document[future] + try: + unecrypted_data = future.result() + except Exception as exc: + msg = ('Failed to retrieve a required secret from the ' + 'configured secret storage service. Document: [%s,' + ' %s] %s. Secret ref: %s' % ( + document.schema, + document.layer, + document.name, + secret_ref(document))) + LOG.error(msg + '. Details: %s', exc) + raise falcon.HTTPInternalServerError(description=msg) + else: + encryption_sources.setdefault(secret_ref(document), + unecrypted_data) + + return encryption_sources diff --git a/deckhand/control/revision_deepdiffing.py b/deckhand/control/revision_deepdiffing.py new file mode 100644 index 00000000..20fff464 --- /dev/null +++ b/deckhand/control/revision_deepdiffing.py @@ -0,0 +1,50 @@ +# Copyright 2018 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import falcon +from oslo_log import log as logging +from oslo_utils import excutils + +from deckhand.control import base as api_base +from deckhand.engine.revision_diff import revision_diff +from deckhand import errors +from deckhand import policy + +LOG = logging.getLogger(__name__) + + +class RevisionDeepDiffingResource(api_base.BaseResource): + """API resource for realizing revision deepdiffing.""" + + @policy.authorize('deckhand:show_revision_deepdiff') + def on_get(self, req, resp, revision_id, comparison_revision_id): + try: + revision_id = int(revision_id) + except ValueError: + raise errors.InvalidInputException(input_var=str(revision_id)) + try: + comparison_revision_id = int(comparison_revision_id) + except ValueError: + raise errors.InvalidInputException( + input_var=str(comparison_revision_id)) + + try: + resp_body = revision_diff( + revision_id, comparison_revision_id, deepdiff=True) + except errors.RevisionNotFound as e: + with excutils.save_and_reraise_exception(): + LOG.exception(e.format_message()) + + resp.status = falcon.HTTP_200 + resp.body = resp_body diff --git a/deckhand/control/revision_diffing.py b/deckhand/control/revision_diffing.py index 6a33fbb4..6eeb6bb8 100644 --- a/deckhand/control/revision_diffing.py +++ b/deckhand/control/revision_diffing.py @@ -17,7 +17,7 @@ from oslo_log import log as logging from oslo_utils import excutils from deckhand.control import base as api_base -from deckhand.db.sqlalchemy import api as db_api +from deckhand.engine.revision_diff import revision_diff from deckhand import errors from deckhand import policy @@ -29,13 +29,18 @@ class RevisionDiffingResource(api_base.BaseResource): @policy.authorize('deckhand:show_revision_diff') def on_get(self, req, resp, revision_id, comparison_revision_id): - if revision_id == '0': - revision_id = 0 - if comparison_revision_id == '0': - comparison_revision_id = 0 + try: + revision_id = int(revision_id) + except ValueError: + raise errors.InvalidInputException(input_var=str(revision_id)) + try: + comparison_revision_id = int(comparison_revision_id) + except ValueError: + raise errors.InvalidInputException( + input_var=str(comparison_revision_id)) try: - resp_body = db_api.revision_diff( + resp_body = revision_diff( revision_id, comparison_revision_id) except errors.RevisionNotFound as e: with excutils.save_and_reraise_exception(): diff --git a/deckhand/control/revision_documents.py b/deckhand/control/revision_documents.py index 1348b97e..c86a358d 100644 --- a/deckhand/control/revision_documents.py +++ b/deckhand/control/revision_documents.py @@ -12,29 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -import concurrent.futures - import falcon -from oslo_config import cfg from oslo_log import log as logging -from oslo_utils import excutils import six -from deckhand.common import document as document_wrapper from deckhand.common import utils from deckhand.common import validation_message as vm from deckhand.control import base as api_base from deckhand.control import common from deckhand.control.views import document as document_view from deckhand.db.sqlalchemy import api as db_api -from deckhand import engine from deckhand.engine import document_validation -from deckhand.engine import secrets_manager from deckhand import errors from deckhand import policy from deckhand import types -CONF = cfg.CONF LOG = logging.getLogger(__name__) @@ -115,35 +107,7 @@ class RenderedDocumentsResource(api_base.BaseResource): if include_encrypted: filters['metadata.storagePolicy'].append('encrypted') - data = self._retrieve_documents_for_rendering(revision_id, **filters) - documents = document_wrapper.DocumentDict.from_list(data) - encryption_sources = self._resolve_encrypted_data(documents) - try: - rendered_documents = engine.render( - revision_id, - documents, - encryption_sources=encryption_sources) - except (errors.BarbicanClientException, - errors.BarbicanServerException, - errors.InvalidDocumentLayer, - errors.InvalidDocumentParent, - errors.InvalidDocumentReplacement, - errors.IndeterminateDocumentParent, - errors.LayeringPolicyNotFound, - errors.MissingDocumentKey, - errors.MissingDocumentPattern, - errors.SubstitutionSourceDataNotFound, - errors.SubstitutionSourceNotFound, - errors.UnknownSubstitutionError, - errors.UnsupportedActionMethod) as e: - with excutils.save_and_reraise_exception(): - LOG.exception(e.format_message()) - except errors.EncryptionSourceNotFound as e: - # This branch should be unreachable, but if an encryption source - # wasn't found, then this indicates the controller fed bad data - # to the engine, in which case this is a 500. - e.code = 500 - raise e + rendered_documents = common.get_rendered_docs(revision_id, **filters) # Filters to be applied post-rendering, because many documents are # involved in rendering. User filters can only be applied once all @@ -169,79 +133,6 @@ class RenderedDocumentsResource(api_base.BaseResource): self._post_validate(rendered_documents) resp.body = self.view_builder.list(rendered_documents) - def _retrieve_documents_for_rendering(self, revision_id, **filters): - """Retrieve all necessary documents needed for rendering. If a layering - policy isn't found in the current revision, retrieve it in a subsequent - call and add it to the list of documents. - """ - try: - documents = db_api.revision_documents_get(revision_id, **filters) - except errors.RevisionNotFound as e: - LOG.exception(six.text_type(e)) - raise falcon.HTTPNotFound(description=e.format_message()) - - if not any([d['schema'].startswith(types.LAYERING_POLICY_SCHEMA) - for d in documents]): - try: - layering_policy_filters = { - 'deleted': False, - 'schema': types.LAYERING_POLICY_SCHEMA - } - layering_policy = db_api.document_get( - **layering_policy_filters) - except errors.DocumentNotFound as e: - LOG.exception(e.format_message()) - else: - documents.append(layering_policy) - - return documents - - def _resolve_encrypted_data(self, documents): - """Resolve unencrypted data from the secret storage backend. - - Submits concurrent requests to the secret storage backend for all - secret references for which unecrypted data is required for future - substitutions during the rendering process. - - :param documents: List of all documents for the current revision. - :type documents: List[dict] - :returns: Dictionary keyed with secret references, whose values are - the corresponding unencrypted data. - :rtype: dict - - """ - encryption_sources = {} - secret_ref = lambda x: x.data - is_encrypted = lambda x: x.is_encrypted and x.has_barbican_ref - encrypted_documents = (d for d in documents if is_encrypted(d)) - - with concurrent.futures.ThreadPoolExecutor( - max_workers=CONF.barbican.max_workers) as executor: - future_to_document = { - executor.submit(secrets_manager.SecretsManager.get, - secret_ref=secret_ref(d), - src_doc=d): d for d in encrypted_documents - } - for future in concurrent.futures.as_completed(future_to_document): - document = future_to_document[future] - try: - unecrypted_data = future.result() - except Exception as exc: - msg = ('Failed to retrieve a required secret from the ' - 'configured secret storage service. Document: [%s,' - ' %s] %s. Secret ref: %s' % ( - document.schema, - document.layer, - document.name, - secret_ref(document))) - LOG.error(msg + '. Details: %s', exc) - raise falcon.HTTPInternalServerError(description=msg) - else: - encryption_sources.setdefault(secret_ref(document), - unecrypted_data) - - return encryption_sources - def _post_validate(self, rendered_documents): # Perform schema validation post-rendering to ensure that rendering # and substitution didn't break anything. diff --git a/deckhand/db/sqlalchemy/api.py b/deckhand/db/sqlalchemy/api.py index 0de2749c..f6425876 100644 --- a/deckhand/db/sqlalchemy/api.py +++ b/deckhand/db/sqlalchemy/api.py @@ -30,6 +30,7 @@ from sqlalchemy import text from deckhand.common import utils from deckhand.db.sqlalchemy import models +from deckhand.engine import utils as eng_utils from deckhand import errors from deckhand import types @@ -91,14 +92,6 @@ def raw_query(query, **kwargs): return get_engine().execute(stmt) -def _meta(document): - return ( - document['schema'], - document['metadata'].get('layeringDefinition', {}).get('layer'), - document['metadata'].get('name') - ) - - def require_unique_document_schema(schema=None): """Decorator to enforce only one singleton document exists in the system. @@ -129,11 +122,11 @@ def require_unique_document_schema(schema=None): existing_documents = revision_documents_get( schema=schema, deleted=False, include_history=False) existing_document_names = [ - _meta(x) for x in existing_documents + eng_utils.meta(x) for x in existing_documents ] conflicting_names = [ - _meta(x) for x in documents - if _meta(x) not in existing_document_names and + eng_utils.meta(x) for x in documents + if eng_utils.meta(x) not in existing_document_names and x['schema'].startswith(schema) ] if existing_document_names and conflicting_names: @@ -179,8 +172,8 @@ def documents_create(bucket_name, documents, session=None): session=session) ] documents_to_delete = [ - h for h in document_history if _meta(h) not in [ - _meta(d) for d in documents] + h for h in document_history if eng_utils.meta(h) not in [ + eng_utils.meta(d) for d in documents] ] # Only create a revision if any docs have been created, changed or @@ -191,7 +184,7 @@ def documents_create(bucket_name, documents, session=None): if documents_to_delete: LOG.debug('Deleting documents: %s.', - [_meta(d) for d in documents_to_delete]) + [eng_utils.meta(d) for d in documents_to_delete]) deleted_documents = [] for d in documents_to_delete: @@ -597,57 +590,6 @@ def revision_delete_all(): raw_query("DELETE FROM revisions;") -def _exclude_deleted_documents(documents): - """Excludes all documents that have been deleted including all documents - earlier in the revision history with the same ``metadata.name`` and - ``schema`` from ``documents``. - """ - documents_map = {} # (document.meta) => should be included? - - for doc in sorted(documents, key=lambda x: x['created_at']): - if doc['deleted'] is True: - previous_doc = documents_map.get(_meta(doc)) - if previous_doc: - if doc['deleted_at'] >= previous_doc['created_at']: - documents_map[_meta(doc)] = None - else: - documents_map[_meta(doc)] = doc - - return [d for d in documents_map.values() if d is not None] - - -def _filter_revision_documents(documents, unique_only, **filters): - """Return the list of documents that match filters. - - :param documents: List of documents to apply ``filters`` to. - :param unique_only: Return only unique documents if ``True``. - :param filters: Dictionary attributes (including nested) used to filter - out revision documents. - :returns: List of documents that match specified filters. - """ - # TODO(fmontei): Implement this as an sqlalchemy query. - filtered_documents = {} - unique_filters = ('schema', 'name', 'layer') - exclude_deleted = filters.pop('deleted', None) is False - - if exclude_deleted: - documents = _exclude_deleted_documents(documents) - - for document in documents: - if utils.deepfilter(document, **filters): - # Filter out redundant documents from previous revisions, i.e. - # documents schema and metadata.name are repeated. - if unique_only: - unique_key = tuple( - [document[filter] for filter in unique_filters]) - else: - unique_key = document['id'] - if unique_key not in filtered_documents: - filtered_documents[unique_key] = document - - return list(filtered_documents.values()) - - @require_revision_exists def revision_documents_get(revision_id=None, include_history=True, unique_only=True, session=None, **filters): @@ -695,143 +637,12 @@ def revision_documents_get(revision_id=None, include_history=True, revision_documents = _update_revision_history(revision_documents) - filtered_documents = _filter_revision_documents( + filtered_documents = eng_utils.filter_revision_documents( revision_documents, unique_only, **filters) return filtered_documents -# NOTE(fmontei): No need to include `@require_revision_exists` decorator as -# this function immediately calls `revision_documents_get` for both -# revision IDs, which has the decorator applied to it. -def revision_diff(revision_id, comparison_revision_id): - """Generate the diff between two revisions. - - Generate the diff between the two revisions: `revision_id` and - `comparison_revision_id`. A basic comparison of the revisions in terms of - how the buckets involved have changed is generated. Only buckets with - existing documents in either of the two revisions in question will be - reported. - - The ordering of the two revision IDs is interchangeable, i.e. no matter - the order, the same result is generated. - - The differences include: - - - "created": A bucket has been created between the revisions. - - "deleted": A bucket has been deleted between the revisions. - - "modified": A bucket has been modified between the revisions. - - "unmodified": A bucket remains unmodified between the revisions. - - :param revision_id: ID of the first revision. - :param comparison_revision_id: ID of the second revision. - :returns: A dictionary, keyed with the bucket IDs, containing any of the - differences enumerated above. - - Examples:: - - # GET /api/v1.0/revisions/6/diff/3 - bucket_a: created - bucket_b: deleted - bucket_c: modified - bucket_d: unmodified - - # GET /api/v1.0/revisions/0/diff/6 - bucket_a: created - bucket_c: created - bucket_d: created - - # GET /api/v1.0/revisions/6/diff/6 - bucket_a: unmodified - bucket_c: unmodified - bucket_d: unmodified - - # GET /api/v1.0/revisions/0/diff/0 - {} - """ - # Retrieve document history for each revision. Since `revision_id` of 0 - # doesn't exist, treat it as a special case: empty list. - docs = (revision_documents_get(revision_id, - include_history=True, - unique_only=False) - if revision_id != 0 else []) - comparison_docs = (revision_documents_get(comparison_revision_id, - include_history=True, - unique_only=False) - if comparison_revision_id != 0 else []) - - # Remove each deleted document and its older counterparts because those - # documents technically don't exist. - docs = _exclude_deleted_documents(docs) - comparison_docs = _exclude_deleted_documents(comparison_docs) - - revision = revision_get(revision_id) if revision_id != 0 else None - comparison_revision = (revision_get(comparison_revision_id) - if comparison_revision_id != 0 else None) - - # Each dictionary below, keyed with the bucket's name, references the list - # of documents related to each bucket. - buckets = {} - comparison_buckets = {} - for doc in docs: - buckets.setdefault(doc['bucket_name'], []) - buckets[doc['bucket_name']].append(doc) - for doc in comparison_docs: - comparison_buckets.setdefault(doc['bucket_name'], []) - comparison_buckets[doc['bucket_name']].append(doc) - - # `shared_buckets` references buckets shared by both `revision_id` and - # `comparison_revision_id` -- i.e. their intersection. - shared_buckets = set(buckets.keys()).intersection( - comparison_buckets.keys()) - # `unshared_buckets` references buckets not shared by both `revision_id` - # and `comparison_revision_id` -- i.e. their non-intersection. - unshared_buckets = set(buckets.keys()).union( - comparison_buckets.keys()) - shared_buckets - - result = {} - - def _compare_buckets(b1, b2): - # Checks whether buckets' documents are identical. - return (sorted([(d['data_hash'], d['metadata_hash']) for d in b1]) == - sorted([(d['data_hash'], d['metadata_hash']) for d in b2])) - - # If the list of documents for each bucket is identical, then the result - # is "unmodified", else "modified". - for bucket_name in shared_buckets: - unmodified = _compare_buckets(buckets[bucket_name], - comparison_buckets[bucket_name]) - result[bucket_name] = 'unmodified' if unmodified else 'modified' - - for bucket_name in unshared_buckets: - # If neither revision has documents, then there's nothing to compare. - # This is always True for revision_id == comparison_revision_id == 0. - if not any([revision, comparison_revision]): - break - # Else if one revision == 0 and the other revision != 0, then the - # bucket has been created. Which is zero or non-zero doesn't matter. - elif not all([revision, comparison_revision]): - result[bucket_name] = 'created' - # Else if `revision` is newer than `comparison_revision`, then if the - # `bucket_name` isn't in the `revision` buckets, then it has been - # deleted. Otherwise it has been created. - elif revision['created_at'] > comparison_revision['created_at']: - if bucket_name not in buckets: - result[bucket_name] = 'deleted' - elif bucket_name not in comparison_buckets: - result[bucket_name] = 'created' - # Else if `comparison_revision` is newer than `revision`, then if the - # `bucket_name` isn't in the `revision` buckets, then it has been - # created. Otherwise it has been deleted. - else: - if bucket_name not in buckets: - result[bucket_name] = 'created' - elif bucket_name not in comparison_buckets: - result[bucket_name] = 'deleted' - - return result - - #################### @@ -1175,7 +986,7 @@ def _check_validation_entries_against_validation_policies( entry['status'] = 'ignored [%s]' % original_status entry.setdefault('errors', []) - msg_args = _meta(vp) + ( + msg_args = eng_utils.meta(vp) + ( ', '.join(v['name'] for v in vp['data'].get( 'validations', [])), ) diff --git a/deckhand/engine/revision_diff.py b/deckhand/engine/revision_diff.py new file mode 100644 index 00000000..d8af473a --- /dev/null +++ b/deckhand/engine/revision_diff.py @@ -0,0 +1,296 @@ +# Copyright 2018 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from deepdiff import DeepDiff +from deepdiff.helper import RemapDict +import jsonpickle + +from deckhand.control import common +from deckhand.db.sqlalchemy import api as db_api +from deckhand.engine import utils +from deckhand import errors + + +def revision_diff(revision_id, comparison_revision_id, deepdiff=False): + """Generate the diff between two revisions. + + Generate the diff between the two revisions: `revision_id` and + `comparison_revision_id`. + a. When deepdiff=False: A basic comparison of the revisions in terms of + how the buckets involved have changed is generated. Only buckets with + existing documents in either of the two revisions in question will be + reported. + b. When deepdiff=True: Along with basic comparision, It will generate deep + diff between revisions' modified buckets. + + Only in case of diff, The ordering of the two revision IDs is + interchangeable, i.e. no matter the order, the same result is generated. + + The differences include: + + - "created": A bucket has been created between the revisions. + - "deleted": A bucket has been deleted between the revisions. + - "modified": A bucket has been modified between the revisions. + When deepdiff is enabled, It also includes deep + difference between the revisions. + - "unmodified": A bucket remains unmodified between the revisions. + + :param revision_id: ID of the first revision. + :param comparison_revision_id: ID of the second revision. + :param deepdiff: Whether deepdiff needed or not. + :returns: A dictionary, keyed with the bucket IDs, containing any of the + differences enumerated above. + + Examples Diff:: + + # GET /api/v1.0/revisions/6/diff/3 + bucket_a: created + bucket_b: deleted + bucket_c: modified + bucket_d: unmodified + + # GET /api/v1.0/revisions/0/diff/6 + bucket_a: created + bucket_c: created + bucket_d: created + + # GET /api/v1.0/revisions/6/diff/6 + bucket_a: unmodified + bucket_c: unmodified + bucket_d: unmodified + + # GET /api/v1.0/revisions/0/diff/0 + {} + + Examples DeepDiff:: + + # GET /api/v1.0/revisions/3/deepdiff/4 + bucket_a: modified + bucket_a diff: + document_changed: + count: 1 + details: + ('example/Kind/v1', 'doc-b'): + data_changed: + values_changed: + root['foo']: {new_value: 3, old_value: 2} + metadata_changed: {} + + # GET /api/v1.0/revisions/2/deepdiff/3 + bucket_a: modified + bucket_a diff: + document_added: + count: 1 + details: + - [example/Kind/v1, doc-c] + + # GET /api/v1.0/revisions/0/deepdiff/0 + {} + + # GET /api/v1.0/revisions/0/deepdiff/3 + bucket_a: created + """ + if deepdiff: + docs = (_rendered_doc(revision_id) if revision_id != 0 else []) + comparison_docs = (_rendered_doc(comparison_revision_id) + if comparison_revision_id != 0 else []) + else: + # Retrieve document history for each revision. Since `revision_id` of 0 + # doesn't exist, treat it as a special case: empty list. + docs = (db_api.revision_documents_get(revision_id, + include_history=True, + unique_only=False) + if revision_id != 0 else []) + comparison_docs = (db_api.revision_documents_get( + comparison_revision_id, + include_history=True, + unique_only=False + ) if comparison_revision_id != 0 else []) + + # Remove each deleted document and its older counterparts because those + # documents technically don't exist. + docs = utils.exclude_deleted_documents(docs) + comparison_docs = utils.exclude_deleted_documents(comparison_docs) + + revision = db_api.revision_get(revision_id) if revision_id != 0 else None + comparison_revision = (db_api.revision_get(comparison_revision_id) + if comparison_revision_id != 0 else None) + + # Each dictionary below, keyed with the bucket's name, references the list + # of documents related to each bucket. + buckets = {} + comparison_buckets = {} + for doc in docs: + buckets.setdefault(doc['bucket_name'], []) + buckets[doc['bucket_name']].append(doc) + for doc in comparison_docs: + comparison_buckets.setdefault(doc['bucket_name'], []) + comparison_buckets[doc['bucket_name']].append(doc) + + # `shared_buckets` references buckets shared by both `revision_id` and + # `comparison_revision_id` -- i.e. their intersection. + shared_buckets = set(buckets.keys()).intersection( + comparison_buckets.keys()) + # `unshared_buckets` references buckets not shared by both `revision_id` + # and `comparison_revision_id` -- i.e. their non-intersection. + unshared_buckets = set(buckets.keys()).union( + comparison_buckets.keys()) - shared_buckets + + result = {} + + def _compare_buckets(b1, b2): + # Checks whether buckets' documents are identical. + return (sorted([(d['data_hash'], d['metadata_hash']) for d in b1]) == + sorted([(d['data_hash'], d['metadata_hash']) for d in b2])) + + # If the list of documents for each bucket is identical, then the result + # is "unmodified", else "modified". + for bucket_name in shared_buckets: + unmodified = _compare_buckets(buckets[bucket_name], + comparison_buckets[bucket_name]) + if unmodified: + result[bucket_name] = 'unmodified' + else: + result[bucket_name] = 'modified' + # If deepdiff enabled + if deepdiff: + # find out diff between buckets + bucket_diff = _diff_buckets(buckets[bucket_name], + comparison_buckets[bucket_name]) + result[bucket_name + ' diff'] = bucket_diff + + for bucket_name in unshared_buckets: + # If neither revision has documents, then there's nothing to compare. + # This is always True for revision_id == comparison_revision_id == 0. + if not any([revision, comparison_revision]): + break + # Else if one revision == 0 and the other revision != 0, then the + # bucket has been created. Which is zero or non-zero doesn't matter. + elif not all([revision, comparison_revision]): + result[bucket_name] = 'created' + # Else if `revision` is newer than `comparison_revision`, then if the + # `bucket_name` isn't in the `revision` buckets, then it has been + # deleted. Otherwise it has been created. + elif revision['created_at'] > comparison_revision['created_at']: + if bucket_name not in buckets: + result[bucket_name] = 'deleted' + elif bucket_name not in comparison_buckets: + result[bucket_name] = 'created' + # Else if `comparison_revision` is newer than `revision`, then if the + # `bucket_name` isn't in the `revision` buckets, then it has been + # created. Otherwise it has been deleted. + else: + if bucket_name not in buckets: + result[bucket_name] = 'created' + elif bucket_name not in comparison_buckets: + result[bucket_name] = 'deleted' + + return result + + +def _diff_buckets(b1, b2): + """Function to provide deep diff between two revisions""" + b1_tmp = {} + b2_tmp = {} + change_count = 0 + change_details = {} + diff_result = {} + alias = lambda d: (d['schema'], d['name']) + + b1_tmp.update({ + alias(d): d + for d in b1 + }) + b2_tmp.update({ + alias(d): d + for d in b2 + }) + + # doc deleted + doc_deleted = list(set(b1_tmp.keys()) - set(b2_tmp.keys())) + # new doc added + doc_added = list(set(b2_tmp.keys()) - set(b1_tmp.keys())) + + if len(doc_added) > 0: + diff_result.update({'document_added': { + 'count': len(doc_added), + 'details': doc_added}}) + if len(doc_deleted) > 0: + diff_result.update({'document_deleted': { + 'count': len(doc_deleted), + 'details': doc_deleted}}) + + # find modified documents + for k, d in b1_tmp.items(): + if k in b2_tmp: + # matched document, lets see changes + if (d['data_hash'], d['metadata_hash']) != ( + b2_tmp[k]['data_hash'], b2_tmp[k]['metadata_hash']): + # document change counter + change_count += 1 + + data_changed = {'encrypted': True} + # if document is not encrypted then show diff else hide diff + # data. + if not b2_tmp[k].is_encrypted: + try: + # deepdiff returns deepdiff object. So first + # serializing to json then deserializing + # to dict. + data_changed = jsonpickle.decode( + DeepDiff(d['data'], b2_tmp[k]['data']).json) + # deepdiff doesn't provide custom exceptions; + # have to use Exception. + except Exception as ex: + raise errors.DeepDiffException(details=str(ex)) + try: + metadata_changed = jsonpickle.decode( + DeepDiff(d['metadata'], + b2_tmp[k]['metadata']).json) + except Exception as ex: + raise errors.DeepDiffException(details=str(ex)) + + change_details.update({ + str(k): {'data_changed': data_changed, + 'metadata_changed': metadata_changed}}) + + if change_count > 0: + diff_result.update({'document_changed': { + 'count': change_count, + 'details': change_details + }}) + # yaml_safedump failed to parse python objects; + # need diff result format before pass it yaml_safedump + return _format_diff_result(diff_result) + + +def _format_diff_result(dr): + """Formats diff result per yaml_safedump parsing.""" + if isinstance(dr, dict): + for k, v in dr.items(): + if isinstance(v, RemapDict): + v = dict(v) + dr.update({k: v}) + if isinstance(v, type): + dr.update({k: str(v)}) + if isinstance(v, dict): + _format_diff_result(v) + return dr + + +def _rendered_doc(revision_id): + """Provides rendered document by given revision id.""" + filters = {'deleted': False} + rendered_documents = common.get_rendered_docs(revision_id, **filters) + return rendered_documents diff --git a/deckhand/engine/utils.py b/deckhand/engine/utils.py index a8fbf787..32e85eb6 100644 --- a/deckhand/engine/utils.py +++ b/deckhand/engine/utils.py @@ -14,6 +14,8 @@ import collections +from deckhand.common import utils + def deep_merge(dct, merge_dct): """Recursive dict merge. Inspired by :meth:``dict.update()``, instead of @@ -94,3 +96,59 @@ def deep_scrub(value, parent): elif isinstance(value, dict): for v in value.values(): deep_scrub(v, value) + + +def exclude_deleted_documents(documents): + """Excludes all documents that have been deleted including all documents + earlier in the revision history with the same ``metadata.name`` and + ``schema`` from ``documents``. + """ + documents_map = {} # (document.meta) => should be included? + + for doc in sorted(documents, key=lambda x: x['created_at']): + if doc['deleted'] is True: + previous_doc = documents_map.get(meta(doc)) + if previous_doc: + if doc['deleted_at'] >= previous_doc['created_at']: + documents_map[meta(doc)] = None + else: + documents_map[meta(doc)] = doc + return [d for d in documents_map.values() if d is not None] + + +def filter_revision_documents(documents, unique_only, **filters): + """Return the list of documents that match filters. + + :param documents: List of documents to apply ``filters`` to. + :param unique_only: Return only unique documents if ``True``. + :param filters: Dictionary attributes (including nested) used to filter + out revision documents. + :returns: List of documents that match specified filters. + """ + filtered_documents = {} + unique_filters = ('schema', 'name', 'layer') + exclude_deleted = filters.pop('deleted', None) is False + + if exclude_deleted: + documents = exclude_deleted_documents(documents) + + for document in documents: + if utils.deepfilter(document, **filters): + # Filter out redundant documents from previous revisions, i.e. + # documents schema and metadata.name are repeated. + if unique_only: + unique_key = tuple( + [document[filter] for filter in unique_filters]) + else: + unique_key = document['id'] + if unique_key not in filtered_documents: + filtered_documents[unique_key] = document + return list(filtered_documents.values()) + + +def meta(document): + return ( + document['schema'], + document['metadata'].get('layeringDefinition', {}).get('layer'), + document['metadata'].get('name') + ) diff --git a/deckhand/errors.py b/deckhand/errors.py index ad92c15f..8d4d3a37 100644 --- a/deckhand/errors.py +++ b/deckhand/errors.py @@ -472,6 +472,18 @@ class BarbicanServerException(DeckhandException): code = 500 +class InvalidInputException(DeckhandException): + """An Invalid Input provided due to which unable to process request.""" + msg_fmt = ('Failed to process request due to invalid input: %(input_var)s') + code = 400 + + +class DeepDiffException(DeckhandException): + """An Exception occurred while deep diffing""" + msg_fmt = 'An Exception occurred while deep diffing. Details: %(details)s' + code = 500 + + class UnknownSubstitutionError(DeckhandException): """An unknown error occurred during substitution. diff --git a/deckhand/factories.py b/deckhand/factories.py index ca0d592f..231e7c62 100644 --- a/deckhand/factories.py +++ b/deckhand/factories.py @@ -18,6 +18,8 @@ import six from oslo_log import log as logging +from deckhand.common import document as document_wrapper +from deckhand.db.sqlalchemy import api from deckhand.tests import test_utils LOG = logging.getLogger(__name__) @@ -371,3 +373,57 @@ class DocumentSecretFactory(DeckhandFactory): document_secret_template['metadata']['name'] = name return document_secret_template + + +class RenderedDocumentFactory(DeckhandFactory): + """Class for auto-generating Rendered document for testing. + """ + RENDERED_DOCUMENT_TEMPLATE = { + "data": { + }, + "data_hash": "", + "metadata": { + "schema": "metadata/Document/v1", + "name": "", + "layeringDefinition": { + "abstract": False, + "layer": "site" + }, + "storagePolicy": "", + }, + "metadata_hash": "", + "name": "", + "schema": "deckhand/%s/v1", + "status": { + "bucket": "", + "revision": "" + } + } + + def __init__(self, bucket, revision): + """Constructor for ``RenderedDocumentFactory``. + """ + self.doc = [] + self.bucket = bucket + self.revision = revision + + def gen_test(self, schema, name, storagePolicy, data, doc_no=1): + """Generate Test Rendered Document. + """ + for x in range(doc_no): + rendered_doc = copy.deepcopy(self.RENDERED_DOCUMENT_TEMPLATE) + rendered_doc['metadata']['storagePolicy'] = storagePolicy + rendered_doc['metadata']['name'] = name[x] + rendered_doc['name'] = name[x] + rendered_doc['schema'] = ( + rendered_doc['schema'] % schema[x]) + rendered_doc['status']['bucket'] = self.bucket + rendered_doc['status']['revision'] = self.revision + rendered_doc['data'] = copy.deepcopy(data[x]) + rendered_doc['data_hash'] = api._make_hash(rendered_doc['data']) + rendered_doc['metadata_hash'] = api._make_hash( + rendered_doc['metadata']) + + self.doc.append(rendered_doc) + + return document_wrapper.DocumentDict.from_list(self.doc) diff --git a/deckhand/policies/revision.py b/deckhand/policies/revision.py index ecc14e8a..bfa265ef 100644 --- a/deckhand/policies/revision.py +++ b/deckhand/policies/revision.py @@ -49,6 +49,17 @@ database.""", 'path': '/api/v1.0/revisions' } ]), + policy.DocumentedRuleDefault( + base.POLICY_ROOT % 'show_revision_deepdiff', + base.RULE_ADMIN_API, + "Show revision deep diff between two revisions.", + [ + { + 'method': 'GET', + 'path': ('/api/v1.0/revisions/{revision_id}/deepdiff/' + '{comparison_revision_id}') + } + ]), policy.DocumentedRuleDefault( base.POLICY_ROOT % 'show_revision_diff', base.RULE_ADMIN_API, diff --git a/deckhand/service.py b/deckhand/service.py index 10df3152..3d0daff3 100644 --- a/deckhand/service.py +++ b/deckhand/service.py @@ -23,6 +23,7 @@ from deckhand.control import base from deckhand.control import buckets from deckhand.control import health from deckhand.control import middleware +from deckhand.control import revision_deepdiffing from deckhand.control import revision_diffing from deckhand.control import revision_documents from deckhand.control import revision_tags @@ -43,6 +44,8 @@ def configure_app(app, version=''): ('health', health.HealthResource()), ('revisions', revisions.RevisionsResource()), ('revisions/{revision_id}', revisions.RevisionsResource()), + ('revisions/{revision_id}/deepdiff/{comparison_revision_id}', + revision_deepdiffing.RevisionDeepDiffingResource()), ('revisions/{revision_id}/diff/{comparison_revision_id}', revision_diffing.RevisionDiffingResource()), ('revisions/{revision_id}/documents', diff --git a/deckhand/tests/functional/gabbits/revision-deepdiff/revision-deepdiff-success.yaml b/deckhand/tests/functional/gabbits/revision-deepdiff/revision-deepdiff-success.yaml new file mode 100644 index 00000000..18e3ab25 --- /dev/null +++ b/deckhand/tests/functional/gabbits/revision-deepdiff/revision-deepdiff-success.yaml @@ -0,0 +1,287 @@ +# Tests deepdiff functionality +# +# 1. Purges existing data to ensure test isolation +# 2. Verify deepdiff of modified data value +# - Create documents in bucket_a +# - Modify data value of doc-a document in bucket_a +# - Verify deepdiff of modified data value of doc-a +# 3. Verify deepdiff when revision=0 comparision_revision=0 +# 4. Verify deepdiff when revision=0 +# 5. Verify deepdiff when comparision_revision=0 +# 6. Verify deepdiff of newly added document +# - Add new document doc-b in bucket_a +# - Verify deepdiff of newly added doc-b document +# 7. Verify deepdiff of deleted document +# - Delete document doc-b in bucket_a +# - Verify deepdiff of deleted doc-b document +# 8. Verify deepdiff of the same revisions +# 9. Verify deepdiff of different revisions of same document +# 10. Verify deepdiff of addition of new key in a document +# - Adding a new key in doc-a +# - Verify deepdiff of addition of a new key in doc-a +# 11. Verify removal of a key in a document +# - Removing a key in doc-a +# - Verify removal of a key in doc-a +# 12. Verify deepdiff when invalid revision id pass in input +# 13. Verify deepdiff when input revision id is not found + +defaults: + request_headers: + content-type: application/x-yaml + response_headers: + content-type: application/x-yaml + verbose: true + +tests: + - name: purge + desc: Begin testing from known state. + DELETE: /api/v1.0/revisions + status: 204 + response_headers: null + + - name: create_a + desc: Create documents in bucket_a + PUT: /api/v1.0/buckets/bucket_a/documents + status: 200 + data: |- + --- + schema: deckhand/LayeringPolicy/v1 + metadata: + schema: metadata/Control/v1 + name: layering-policy + data: + layerOrder: + - site + ... + --- + schema: example/Kind/v1 + metadata: + schema: metadata/Document/v1 + name: doc-a + storagePolicy: cleartext + layeringDefinition: + abstract: false + layer: site + data: + value: 1 + ... + + - name: modify_data_value_doc-a + desc: Modify data value of doc-a document in bucket_a + PUT: /api/v1.0/buckets/bucket_a/documents + status: 200 + data: |- + --- + schema: example/Kind/v1 + metadata: + schema: metadata/Document/v1 + name: doc-a + storagePolicy: cleartext + layeringDefinition: + abstract: false + layer: site + data: + value: 2 + ... + + - name: verify_doc-a_modified_value + desc: Verify deepdiff of modified data value of doc-a document + GET: /api/v1.0/revisions/1/deepdiff/2 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0].'bucket_a': modified + $.[0].'bucket_a diff'.document_changed.count: 1 + $.[0].'bucket_a diff'.document_changed.details: + ('example/Kind/v1', 'doc-a'): + data_changed: + values_changed: + root['value']: + old_value: 1 + new_value: 2 + metadata_changed: {} + + - name: verify_both_rev_0 + desc: Verify deepdiff when revision=0 comparision_revision=0 + GET: /api/v1.0/revisions/0/deepdiff/0 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0]: {} + + - name: verify_rev_0 + desc: Verify deepdiff when revision=0 + GET: /api/v1.0/revisions/0/deepdiff/2 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0]: + bucket_a: created + + - name: verify_comp_rev_0 + desc: Verify deepdiff when comparision_revision=0 + GET: /api/v1.0/revisions/2/deepdiff/0 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0]: + bucket_a: created + + - name: add_new_doc-b + desc: Add new document doc-b in bucket_a + PUT: /api/v1.0/buckets/bucket_a/documents + status: 200 + data: |- + --- + schema: example/Kind/v1 + metadata: + schema: metadata/Document/v1 + name: doc-a + storagePolicy: cleartext + layeringDefinition: + abstract: false + layer: site + data: + value: 2 + ... + --- + schema: example/Kind/v1 + metadata: + schema: metadata/Document/v1 + name: doc-b + storagePolicy: cleartext + layeringDefinition: + abstract: false + layer: site + data: + foo: val1 + ... + + - name: verify_added_doc-b + desc: Verify deepdiff of newly added doc-b document + GET: /api/v1.0/revisions/2/deepdiff/3 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0].'bucket_a': modified + $.[0].'bucket_a diff'.document_added.count: 1 + $.[0].'bucket_a diff'.document_added.details[0]: ['example/Kind/v1', 'doc-b'] + + - name: delete_doc-b + desc: Delete document doc-b in bucket_a + PUT: /api/v1.0/buckets/bucket_a/documents + status: 200 + data: |- + --- + schema: example/Kind/v1 + metadata: + schema: metadata/Document/v1 + name: doc-a + storagePolicy: cleartext + layeringDefinition: + abstract: false + layer: site + data: + value: 2 + ... + + - name: verify_deleted_doc-b + desc: Verify deepdiff of deleted doc-b document + GET: /api/v1.0/revisions/3/deepdiff/4 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0].'bucket_a': modified + $.[0].'bucket_a diff'.document_deleted.count: 1 + $.[0].'bucket_a diff'.document_deleted.details[0]: ['example/Kind/v1', 'doc-b'] + + - name: verify_revision_equality + desc: Verify deepdiff of the same revisions + GET: /api/v1.0/revisions/3/deepdiff/3 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0].'bucket_a': unmodified + + - name: verify_docs_equality + desc: Verify deepdiff of different revisions of same document + GET: /api/v1.0/revisions/2/deepdiff/4 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0].'bucket_a': unmodified + + - name: adding_new_key_in_doc-a + desc: Adding a new key in doc-a + PUT: /api/v1.0/buckets/bucket_a/documents + status: 200 + data: |- + --- + schema: example/Kind/v1 + metadata: + schema: metadata/Document/v1 + name: doc-a + storagePolicy: cleartext + layeringDefinition: + abstract: false + layer: site + data: + value: 2 + foo: 100 + ... + + - name: verify_doc-a_data_new_key + desc: Verify deepdiff of addition of a new key in doc-a + GET: /api/v1.0/revisions/4/deepdiff/5 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0].'bucket_a': modified + $.[0].'bucket_a diff'.document_changed.count: 1 + $.[0].'bucket_a diff'.document_changed.details: + ('example/Kind/v1', 'doc-a'): + data_changed: + dictionary_item_added: !!set {"root['foo']"} + metadata_changed: {} + + - name: removing_key_in_doc-a + desc: Removing a key in doc-a + PUT: /api/v1.0/buckets/bucket_a/documents + status: 200 + data: |- + --- + schema: example/Kind/v1 + metadata: + schema: metadata/Document/v1 + name: doc-a + storagePolicy: cleartext + layeringDefinition: + abstract: false + layer: site + data: + value: 2 + ... + + - name: verify_doc-a_data_remove_key + desc: Verify deepdiff of removal of a key in doc-a + GET: /api/v1.0/revisions/5/deepdiff/6 + status: 200 + response_multidoc_jsonpaths: + $.`len`: 1 + $.[0].'bucket_a': modified + $.[0].'bucket_a diff'.document_changed.count: 1 + $.[0].'bucket_a diff'.document_changed.details: + ('example/Kind/v1', 'doc-a'): + data_changed: + dictionary_item_removed: !!set {"root['foo']"} + metadata_changed: {} + + - name: verify_invalid_input + desc: Verify deepdiff when invalid revision id pass in input + GET: /api/v1.0/revisions/test/deepdiff/2 + status: 400 + + - name: verify_revision_not_found + desc: Verify deepdiff when input revision id is not found + GET: /api/v1.0/revisions/1000/deepdiff/2 + status: 404 diff --git a/deckhand/tests/unit/control/test_api_initialization.py b/deckhand/tests/unit/control/test_api_initialization.py index a9ebc832..3eb82f65 100644 --- a/deckhand/tests/unit/control/test_api_initialization.py +++ b/deckhand/tests/unit/control/test_api_initialization.py @@ -21,6 +21,7 @@ from deckhand.common import utils from deckhand.control import api from deckhand.control import buckets from deckhand.control import health +from deckhand.control import revision_deepdiffing from deckhand.control import revision_diffing from deckhand.control import revision_documents from deckhand.control import revision_tags @@ -36,9 +37,10 @@ class TestApi(test_base.DeckhandTestCase): def setUp(self): super(TestApi, self).setUp() # Mock the API resources. - for resource in (buckets, health, revision_diffing, revision_documents, - revision_tags, revisions, rollback, validations, - versions): + for resource in ( + buckets, health, revision_deepdiffing, revision_diffing, + revision_documents, revision_tags, revisions, rollback, + validations, versions): class_names = self._get_module_class_names(resource) for class_name in class_names: resource_obj = self.patchobject( @@ -83,6 +85,9 @@ class TestApi(test_base.DeckhandTestCase): mock.call('/api/v1.0/revisions', self.revisions_resource()), mock.call('/api/v1.0/revisions/{revision_id}', self.revisions_resource()), + mock.call('/api/v1.0/revisions/{revision_id}/deepdiff/' + '{comparison_revision_id}', + self.revision_deep_diffing_resource()), mock.call('/api/v1.0/revisions/{revision_id}/diff/' '{comparison_revision_id}', self.revision_diffing_resource()), diff --git a/deckhand/tests/unit/control/test_revisions_deepdiff_controller.py b/deckhand/tests/unit/control/test_revisions_deepdiff_controller.py new file mode 100644 index 00000000..6ea57ca1 --- /dev/null +++ b/deckhand/tests/unit/control/test_revisions_deepdiff_controller.py @@ -0,0 +1,32 @@ +# Copyright 2018 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from deckhand.tests.unit.control import base as test_base + + +class TestRevisionsDeepDiffControllerNegativeRBAC( + test_base.BaseControllerTest): + + """Test suite for validating negative RBAC scenarios for revisions deepdiff + controller. + """ + + def test_show_revision_deepdiff_except_forbidden(self): + rules = {'deckhand:show_revision_deepdiff': 'rule:admin_api'} + self.policy.set_rules(rules) + + resp = self.app.simulate_get( + '/api/v1.0/revisions/0/deepdiff/0', + headers={'Content-Type': 'application/x-yaml'}) + self.assertEqual(403, resp.status_code) diff --git a/deckhand/tests/unit/engine/test_revision_deepdiffing.py b/deckhand/tests/unit/engine/test_revision_deepdiffing.py new file mode 100644 index 00000000..6d394548 --- /dev/null +++ b/deckhand/tests/unit/engine/test_revision_deepdiffing.py @@ -0,0 +1,143 @@ +# Copyright 2018 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +from deckhand.engine import revision_diff +from deckhand import factories +from deckhand.tests.unit.db import base + + +class TestRevisionDeepDiffing(base.TestDbBase): + + def _test_data(self): + return { + 'data': [{'users': ['usr1', 'usr2'], 'description': 'normal user'}, + {'hostname': 'ubuntubox', 'ip': '192.168.0.1'}, + {'project_name': 'test1', 'region_name': 'reg01'}], + 'schema': ['user', 'host', 'project'], + 'doc_name': ['user1', 'host1', 'project1'], + 'policy': 'cleartext' + } + + def test_revision_deepdiff_no_diff(self): + test_data = copy.deepcopy(self._test_data()) + data = copy.deepcopy(test_data['data']) + (schema, doc_name, policy) = ( + test_data['schema'], test_data['doc_name'], test_data['policy']) + expected = {} + rdf1 = factories.RenderedDocumentFactory('bucket_1', 1) + rdoc1 = rdf1.gen_test(schema, doc_name, policy, data, 3) + # both the rendered doc are same. + rdoc2 = copy.deepcopy(rdoc1) + actual = revision_diff._diff_buckets(rdoc1, rdoc2) + self.assertEqual(expected, actual) + + def test_revision_deepdiff_show_diff(self): + test_data = copy.deepcopy(self._test_data()) + data = copy.deepcopy(test_data['data']) + (schema, doc_name, policy) = ( + test_data['schema'], test_data['doc_name'], test_data['policy']) + expected_changed_doc = [( + 'deckhand/user/v1', 'user1'), ('deckhand/project/v1', 'project1')] + rdf1 = factories.RenderedDocumentFactory('bucket_1', 1) + rdoc1 = rdf1.gen_test(schema, doc_name, policy, data, 3) + rdf2 = factories.RenderedDocumentFactory('bucket_1', 2) + # change data + data[0]['users'].append('usr3') + data[2]['project_name'] = "test2" + rdoc2 = rdf2.gen_test(schema, doc_name, policy, data, 3) + actual = revision_diff._diff_buckets(rdoc1, rdoc2) + # verify change document count + self.assertEqual(2, actual['document_changed']['count']) + # verify type of document changed + expected_changed_doc = ["('deckhand/user/v1', 'user1')", + "('deckhand/project/v1', 'project1')"] + actual_data = actual['document_changed']['details'] + actual_changed_doc = [k for k, v in actual_data.items()] + self.assertEqual( + [], list(set(expected_changed_doc) - set(actual_changed_doc))) + + def test_revision_deepdiff_doc_added(self): + test_data = copy.deepcopy(self._test_data()) + data = copy.deepcopy(test_data['data']) + (schema, doc_name, policy) = ( + test_data['schema'], test_data['doc_name'], test_data['policy']) + expected_added_doc = [('deckhand/application/v1', 'application1')] + rdf1 = factories.RenderedDocumentFactory('bucket_1', 1) + rdoc1 = rdf1.gen_test(schema, doc_name, policy, data, 3) + rdf2 = factories.RenderedDocumentFactory('bucket_1', 2) + # add new document + data.append({"application": "mysql", "port": "3306"}) + schema.append("application") + doc_name.append("application1") + rdoc2 = rdf2.gen_test(schema, doc_name, policy, data, 4) + actual = revision_diff._diff_buckets(rdoc1, rdoc2) + # verify added document count + self.assertEqual(1, actual['document_added']['count']) + # verify type of document added + actual_added_doc = [d for d in actual['document_added']['details']] + self.assertEqual( + [], list(set(expected_added_doc) - set(actual_added_doc))) + + def test_revision_deepdiff_doc_deleted(self): + test_data = copy.deepcopy(self._test_data()) + data = copy.deepcopy(test_data['data']) + (schema, doc_name, policy) = ( + test_data['schema'], test_data['doc_name'], test_data['policy']) + rdf1 = factories.RenderedDocumentFactory('bucket_1', 1) + rdoc1 = rdf1.gen_test(schema, doc_name, policy, data, 3) + rdf2 = factories.RenderedDocumentFactory('bucket_1', 2) + # delete a document + del data[2] + del schema[2] + del doc_name[2] + rdoc2 = rdf2.gen_test(schema, doc_name, policy, data, 2) + actual = revision_diff._diff_buckets(rdoc1, rdoc2) + # verify deleted document count + self.assertEqual(1, actual['document_deleted']['count']) + # verify type of document deleted + expected_deleted_doc = [('deckhand/project/v1', 'project1')] + actual_deleted_doc = [d for d in actual['document_deleted']['details']] + self.assertEqual( + [], list(set(expected_deleted_doc) - set(actual_deleted_doc))) + + def test_revision_deepdiff_hide_password_diff(self): + test_data = copy.deepcopy(self._test_data()) + data = copy.deepcopy(test_data['data']) + (schema, doc_name, policy) = ( + test_data['schema'], test_data['doc_name'], test_data['policy']) + rdf1 = factories.RenderedDocumentFactory('bucket_1', 1) + # rdoc1: add encrypt type document + (dt1, sc, do, po) = ([{"password": "ABC123"}], [ + 'node_password'], ['node1'], 'encrypted') + rdf1.gen_test(sc, do, po, dt1) + rdoc1 = rdf1.gen_test(schema, doc_name, policy, data, 3) + rdf2 = factories.RenderedDocumentFactory('bucket_1', 2) + # change password + dt2 = [{"password": "xyz123"}] + rdf2.gen_test(sc, do, po, dt2) + rdoc2 = rdf2.gen_test(schema, doc_name, policy, data, 3) + actual = revision_diff._diff_buckets(rdoc1, rdoc2) + # verify change document count + self.assertEqual(1, actual['document_changed']['count']) + # verify type of document changed + expected_changed_doc = ["('deckhand/node_password/v1', 'node1')"] + actual_data = actual['document_changed']['details'] + actual_changed_doc = [k for k, v in actual_data.items()] + self.assertEqual( + [], list(set(expected_changed_doc) - set(actual_changed_doc))) + # Ensure no password show in diff + self.assertTrue( + actual_data[expected_changed_doc[0]]['data_changed']['encrypted']) diff --git a/deckhand/tests/unit/db/test_revision_diffing.py b/deckhand/tests/unit/engine/test_revision_diffing.py similarity index 99% rename from deckhand/tests/unit/db/test_revision_diffing.py rename to deckhand/tests/unit/engine/test_revision_diffing.py index 6baf2d2e..52e8cf95 100644 --- a/deckhand/tests/unit/db/test_revision_diffing.py +++ b/deckhand/tests/unit/engine/test_revision_diffing.py @@ -14,7 +14,7 @@ import copy -from deckhand.db.sqlalchemy import api as db_api +from deckhand.engine.revision_diff import revision_diff from deckhand.tests import test_utils from deckhand.tests.unit.db import base @@ -27,7 +27,7 @@ class TestRevisionDiffing(base.TestDbBase): # `comparison_revision_id` and `revision_id` args. revision_ids = [revision_id, comparison_revision_id] for rev_ids in (revision_ids, reversed(revision_ids)): - actual = db_api.revision_diff(*rev_ids) + actual = revision_diff(*rev_ids) self.assertEqual(expected, actual) def test_revision_diff_null(self): diff --git a/doc/source/api_ref.rst b/doc/source/api_ref.rst index c7fd34a6..4f22c4a7 100644 --- a/doc/source/api_ref.rst +++ b/doc/source/api_ref.rst @@ -277,6 +277,96 @@ Diffing two revisions that contain the same documents, ``GET /api/v1.0/revisions Diffing revision zero with itself, ``GET /api/v1.0/revisions/0/diff/0``: +.. code-block:: yaml + + --- + {} + +GET ``/revisions/{{revision_id}}/deepdiff/{{comparison_revision_id}}`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This is an advanced version of ``diff`` api. It provides deepdiff between +two revisions of modified buckets. + +The response will contain ``modified``, ``added``, ``deleted`` +documents deepdiff details. Modified documents diff will consist of data +and metadata change details. In case the document storagePolicy is encrypted, +deepdiff will hide data and will return only ``{'encrypted': True}``. + +Examples +"""""""" + +A response for a typical case, ``GET /api/v1.0/revisions/3/deepdiff/4`` + +.. code-block:: yaml + + --- + bucket_a: created + bucket_b: deleted + bucket_c: modified + bucket_c diff: + document_changed: + count: 1 + details: + ('example/Kind/v1', 'doc-b'): + data_changed: + values_changed: + root['foo']: {new_value: 3, old_value: 2} + metadata_changed: {} + +Document added deepdiff response, ``GET /api/v1.0/revisions/4/deepdiff/5`` + +.. code-block:: yaml + + --- + bucket_a: created + bucket_c: modified + bucket_c diff: + document_added: + count: 1 + details: + - [example/Kind/v1, doc-c] + +Document deleted deepdiff response, ``GET /api/v1.0/revisions/5/deepdiff/6`` + +.. code-block:: yaml + + --- + bucket_a: created + bucket_c: modified + bucket_c diff: + document_deleted: + count: 1 + details: + - [example/Kind/v1, doc-c] + +A response for deepdiffing against an empty revision, ``GET /api/v1.0/revisions/0/deepdiff/2``: + +.. code-block:: yaml + + --- + bucket_a: created + bucket_b: created + +A response for deepdiffing a revision against itself, ``GET /api/v1.0/revisions/6/deepdiff/6``: + +.. code-block:: yaml + + --- + bucket_a: unmodified + bucket_c: unmodified + bucket_d: unmodified + +DeepDiffing two revisions that contain the same documents, ``GET /api/v1.0/revisions/1/deepdiff/2``: + +.. code-block:: yaml + + --- + bucket_a: unmodified + bucket_b: unmodified + +DeepDiffing revision zero with itself, ``GET /api/v1.0/revisions/0/deepdiff/0``: + .. code-block:: yaml --- diff --git a/doc/source/developer-overview.rst b/doc/source/developer-overview.rst index 9ab67c92..083bfadc 100644 --- a/doc/source/developer-overview.rst +++ b/doc/source/developer-overview.rst @@ -69,14 +69,14 @@ engine The ``engine`` module is the interface responsible for all :ref:`rendering`. Rendering consists of applying a series of algorithms to the documents, including: topological sorting, :ref:`layering`, -:ref:`substitution`, and :ref:`replacement`. +:ref:`substitution`, and :ref:`replacement`. This module also realizes +revision-diffing and revision-deepdiffing functionality. db -- The ``db`` module is responsible for implementing the database tables needed -to store all Airship documents. The module also realizes version control and -:ref:`revision-diffing` functionality. +to store all Airship documents. This module also realizes version control. client ------ diff --git a/doc/source/exceptions.rst b/doc/source/exceptions.rst index ed1c2f45..ce49eee5 100644 --- a/doc/source/exceptions.rst +++ b/doc/source/exceptions.rst @@ -34,6 +34,11 @@ Deckhand Exceptions :members: :show-inheritance: :undoc-members: + * - DeepDiffException + - .. autoexception:: deckhand.errors.DeepDiffException + :members: + :show-inheritance: + :undoc-members: * - DocumentNotFound - .. autoexception:: deckhand.errors.DocumentNotFound :members: @@ -59,6 +64,11 @@ Deckhand Exceptions :members: :show-inheritance: :undoc-members: + * - InvalidInputException + - .. autoexception:: deckhand.errors.InvalidInputException + :members: + :show-inheritance: + :undoc-members: * - LayeringPolicyNotFound - .. autoexception:: deckhand.errors.LayeringPolicyNotFound :members: diff --git a/doc/source/revision-history.rst b/doc/source/revision-history.rst index 20188868..ded3f8d7 100644 --- a/doc/source/revision-history.rst +++ b/doc/source/revision-history.rst @@ -48,6 +48,15 @@ one of those two documents has been modified, the bucket itself is tagged as ``modified``. For more information about revision diffing, please reference the :ref:`api-ref`. +Revision DeepDiffing +-------------------- + +Revision DeepDiffing is an extended version of existing revision diff api. +When any bucket state gets changed to ``modified``, It shows deep difference +between revisions. DeepDiffing resultset will consist of ``document_added``, +``document_deleted`` and ``document_changed`` count and details. +For more information about revision deepdiffing, please reference the :ref:`api-ref`. + Revision Rollback ----------------- diff --git a/etc/deckhand/policy.yaml.sample b/etc/deckhand/policy.yaml.sample index 965774c5..f95a0782 100644 --- a/etc/deckhand/policy.yaml.sample +++ b/etc/deckhand/policy.yaml.sample @@ -65,6 +65,10 @@ # DELETE /api/v1.0/revisions #"deckhand:delete_revisions": "rule:admin_api" +# Show revision deepdiff between two revisions. +# GET /api/v1.0/revisions/{revision_id}/deepdiff/{comparison_revision_id} +#"deckhand:show_revision_deepdiff": "rule:admin_api" + # Show revision diff between two revisions. # GET /api/v1.0/revisions/{revision_id}/diff/{comparison_revision_id} #"deckhand:show_revision_diff": "rule:admin_api" diff --git a/requirements.txt b/requirements.txt index ad7731e8..0833a3ed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,6 +25,7 @@ jsonpath-ng==1.4.3 jsonschema==2.6.0 beaker==1.9.1 cryptography>=2.1 # BSD/Apache-2.0 +deepdiff==3.3.0 oslo.cache>=1.30.1 # Apache-2.0 oslo.concurrency>=3.27.0 # Apache-2.0